@opentermsarchive/engine 0.26.1 → 0.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/ota-track.js +3 -3
- package/bin/ota-validate.js +2 -2
- package/bin/ota.js +1 -1
- package/config/default.json +1 -1
- package/package.json +3 -4
- package/scripts/dataset/export/index.js +4 -4
- package/scripts/dataset/export/index.test.js +11 -17
- package/scripts/declarations/lint/index.mocha.js +1 -1
- package/scripts/declarations/utils/index.js +12 -12
- package/scripts/declarations/validate/definitions.js +1 -1
- package/scripts/declarations/validate/index.mocha.js +30 -34
- package/scripts/declarations/validate/service.history.schema.js +11 -11
- package/scripts/declarations/validate/service.schema.js +13 -13
- package/scripts/history/migrate-services.js +4 -4
- package/scripts/history/update-to-full-hash.js +2 -2
- package/scripts/import/index.js +14 -14
- package/scripts/rewrite/rewrite-snapshots.js +3 -3
- package/scripts/rewrite/rewrite-versions.js +14 -14
- package/scripts/utils/renamer/README.md +3 -3
- package/scripts/utils/renamer/index.js +13 -13
- package/src/archivist/errors.js +1 -1
- package/src/archivist/extract/exports.js +3 -0
- package/src/archivist/{filter → extract}/index.js +23 -27
- package/src/archivist/extract/index.test.js +516 -0
- package/src/archivist/index.js +101 -140
- package/src/archivist/index.test.js +178 -166
- package/src/archivist/recorder/index.js +11 -55
- package/src/archivist/recorder/index.test.js +310 -356
- package/src/archivist/recorder/record.js +18 -7
- package/src/archivist/recorder/repositories/git/dataMapper.js +41 -31
- package/src/archivist/recorder/repositories/git/index.js +11 -15
- package/src/archivist/recorder/repositories/git/index.test.js +1058 -463
- package/src/archivist/recorder/repositories/interface.js +8 -6
- package/src/archivist/recorder/repositories/mongo/dataMapper.js +21 -14
- package/src/archivist/recorder/repositories/mongo/index.js +8 -8
- package/src/archivist/recorder/repositories/mongo/index.test.js +898 -479
- package/src/archivist/recorder/snapshot.js +5 -0
- package/src/archivist/recorder/snapshot.test.js +65 -0
- package/src/archivist/recorder/version.js +14 -0
- package/src/archivist/recorder/version.test.js +65 -0
- package/src/archivist/services/index.js +60 -51
- package/src/archivist/services/index.test.js +63 -83
- package/src/archivist/services/service.js +26 -22
- package/src/archivist/services/service.test.js +46 -68
- package/src/archivist/services/{pageDeclaration.js → sourceDocument.js} +11 -9
- package/src/archivist/services/{pageDeclaration.test.js → sourceDocument.test.js} +21 -21
- package/src/archivist/services/terms.js +26 -0
- package/src/archivist/services/{documentDeclaration.test.js → terms.test.js} +15 -15
- package/src/exports.js +2 -2
- package/src/index.js +16 -13
- package/src/logger/index.js +35 -36
- package/src/notifier/index.js +8 -8
- package/src/tracker/index.js +6 -6
- package/src/archivist/filter/exports.js +0 -3
- package/src/archivist/filter/index.test.js +0 -564
- package/src/archivist/recorder/record.test.js +0 -91
- package/src/archivist/services/documentDeclaration.js +0 -26
- /package/scripts/utils/renamer/rules/{documentTypes.json → termsTypes.json} +0 -0
- /package/scripts/utils/renamer/rules/{documentTypesByService.json → termsTypesByService.json} +0 -0
package/scripts/import/index.js
CHANGED
|
@@ -18,7 +18,7 @@ const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
|
18
18
|
const ROOT_PATH = path.resolve(__dirname, '../../');
|
|
19
19
|
const MAX_PARALLEL = 10;
|
|
20
20
|
const MAX_RETRY = 5;
|
|
21
|
-
const PDF_MIME_TYPE = '
|
|
21
|
+
const PDF_MIME_TYPE = mime.getType('pdf');
|
|
22
22
|
const COUNTERS = {
|
|
23
23
|
imported: 0,
|
|
24
24
|
skippedNoChanges: 0,
|
|
@@ -87,10 +87,10 @@ function queueErrorHandler(error, { commit }) {
|
|
|
87
87
|
|
|
88
88
|
const serviceId = path.dirname(relativeFilePath);
|
|
89
89
|
const extension = path.extname(relativeFilePath);
|
|
90
|
-
const
|
|
90
|
+
const termsType = path.basename(relativeFilePath, extension);
|
|
91
91
|
|
|
92
92
|
commitsNotImported.push(commit.hash);
|
|
93
|
-
logger.error({ message: `${error.stack}\nCommit details: ${JSON.stringify(commit, null, 2)}`, serviceId, type:
|
|
93
|
+
logger.error({ message: `${error.stack}\nCommit details: ${JSON.stringify(commit, null, 2)}`, serviceId, type: termsType, sha: commit.hash });
|
|
94
94
|
COUNTERS.errors++;
|
|
95
95
|
}
|
|
96
96
|
|
|
@@ -117,9 +117,9 @@ function queueDrainHandler(totalToTreat) {
|
|
|
117
117
|
};
|
|
118
118
|
}
|
|
119
119
|
|
|
120
|
-
async function getCommitContent({ sha, serviceId,
|
|
120
|
+
async function getCommitContent({ sha, serviceId, termsType, extension }) {
|
|
121
121
|
const start = performance.now();
|
|
122
|
-
const url = `https://raw.githubusercontent.com/${config.get('import.githubRepository')}/${sha}/${encodeURI(serviceId)}/${encodeURI(
|
|
122
|
+
const url = `https://raw.githubusercontent.com/${config.get('import.githubRepository')}/${sha}/${encodeURI(serviceId)}/${encodeURI(termsType)}.${extension}`;
|
|
123
123
|
const response = await nodeFetch(url);
|
|
124
124
|
const end = performance.now();
|
|
125
125
|
|
|
@@ -141,7 +141,7 @@ async function getCommitContent({ sha, serviceId, documentType, extension }) {
|
|
|
141
141
|
throw new TooManyRequestsError(`Cannot get commit content on Github ${url}. 429: Too Many Requests`);
|
|
142
142
|
}
|
|
143
143
|
|
|
144
|
-
logger.info({ message: `Fetched in ${Number(end - start).toFixed(2)} ms`, serviceId, type:
|
|
144
|
+
logger.info({ message: `Fetched in ${Number(end - start).toFixed(2)} ms`, serviceId, type: termsType, sha });
|
|
145
145
|
|
|
146
146
|
return content;
|
|
147
147
|
}
|
|
@@ -151,12 +151,12 @@ async function handleCommit(commit, index, total) {
|
|
|
151
151
|
|
|
152
152
|
let serviceId = path.dirname(relativeFilePath);
|
|
153
153
|
const extension = path.extname(relativeFilePath);
|
|
154
|
-
let
|
|
154
|
+
let termsType = path.basename(relativeFilePath, extension);
|
|
155
155
|
|
|
156
156
|
logger.info({
|
|
157
157
|
message: 'Start to handle commit',
|
|
158
158
|
serviceId,
|
|
159
|
-
type:
|
|
159
|
+
type: termsType,
|
|
160
160
|
sha: commit.hash,
|
|
161
161
|
current: index + 1,
|
|
162
162
|
total,
|
|
@@ -168,7 +168,7 @@ async function handleCommit(commit, index, total) {
|
|
|
168
168
|
logger.info({
|
|
169
169
|
message: 'Skipped commit as an entry already exists for this commit',
|
|
170
170
|
serviceId,
|
|
171
|
-
type:
|
|
171
|
+
type: termsType,
|
|
172
172
|
sha: commit.hash,
|
|
173
173
|
});
|
|
174
174
|
COUNTERS.skippedNoChanges++;
|
|
@@ -176,9 +176,9 @@ async function handleCommit(commit, index, total) {
|
|
|
176
176
|
return;
|
|
177
177
|
}
|
|
178
178
|
|
|
179
|
-
let content = await getCommitContent({ sha: commit.hash, serviceId,
|
|
179
|
+
let content = await getCommitContent({ sha: commit.hash, serviceId, termsType, extension: extension.replace('.', '') });
|
|
180
180
|
|
|
181
|
-
({ serviceId,
|
|
181
|
+
({ serviceId, termsType } = renamer.applyRules(serviceId, termsType));
|
|
182
182
|
|
|
183
183
|
const mimeType = mime.getType(extension);
|
|
184
184
|
|
|
@@ -198,7 +198,7 @@ async function handleCommit(commit, index, total) {
|
|
|
198
198
|
|
|
199
199
|
await snapshotsCollection.insertOne({
|
|
200
200
|
serviceId,
|
|
201
|
-
|
|
201
|
+
termsType,
|
|
202
202
|
content,
|
|
203
203
|
mimeType,
|
|
204
204
|
fetchDate: commit.date,
|
|
@@ -207,10 +207,10 @@ async function handleCommit(commit, index, total) {
|
|
|
207
207
|
});
|
|
208
208
|
const end = performance.now();
|
|
209
209
|
|
|
210
|
-
logger.info({ message: `Recorded in ${Number(end - start).toFixed(2)} ms`, serviceId, type:
|
|
210
|
+
logger.info({ message: `Recorded in ${Number(end - start).toFixed(2)} ms`, serviceId, type: termsType });
|
|
211
211
|
COUNTERS.imported++;
|
|
212
212
|
} catch (error) {
|
|
213
|
-
logger.error({ message: `Not saved: ${commit.date} ${error.stack}`, serviceId, type:
|
|
213
|
+
logger.error({ message: `Not saved: ${commit.date} ${error.stack}`, serviceId, type: termsType });
|
|
214
214
|
commitsNotImported.push(commit.hash);
|
|
215
215
|
COUNTERS.errors++;
|
|
216
216
|
}
|
|
@@ -76,13 +76,13 @@ let recorder;
|
|
|
76
76
|
const { content, mimeType } = await loadFile(SNAPSHOTS_SOURCE_PATH, relativeFilePath);
|
|
77
77
|
|
|
78
78
|
let serviceId = path.dirname(relativeFilePath);
|
|
79
|
-
let
|
|
79
|
+
let termsType = path.basename(relativeFilePath, path.extname(relativeFilePath));
|
|
80
80
|
|
|
81
|
-
({ serviceId,
|
|
81
|
+
({ serviceId, termsType } = renamer.applyRules(serviceId, termsType));
|
|
82
82
|
|
|
83
83
|
const { id: snapshotId } = await recorder.recordSnapshot({
|
|
84
84
|
serviceId,
|
|
85
|
-
|
|
85
|
+
termsType,
|
|
86
86
|
content,
|
|
87
87
|
mimeType,
|
|
88
88
|
fetchDate: commit.date,
|
|
@@ -4,7 +4,7 @@ import { fileURLToPath } from 'url';
|
|
|
4
4
|
import config from 'config';
|
|
5
5
|
|
|
6
6
|
import { InaccessibleContentError } from '../../src/archivist/errors.js';
|
|
7
|
-
import
|
|
7
|
+
import extract from '../../src/archivist/extract/index.js';
|
|
8
8
|
import Recorder from '../../src/archivist/recorder/index.js';
|
|
9
9
|
import Git from '../../src/archivist/recorder/repositories/git/git.js';
|
|
10
10
|
import GitRepository from '../../src/archivist/recorder/repositories/git/index.js';
|
|
@@ -86,41 +86,41 @@ let recorder;
|
|
|
86
86
|
const { content, mimeType } = await loadFile(SNAPSHOTS_SOURCE_PATH, relativeFilePath);
|
|
87
87
|
|
|
88
88
|
let serviceId = path.dirname(relativeFilePath);
|
|
89
|
-
let
|
|
89
|
+
let termsType = path.basename(relativeFilePath, path.extname(relativeFilePath));
|
|
90
90
|
|
|
91
|
-
({ serviceId,
|
|
91
|
+
({ serviceId, termsType } = renamer.applyRules(serviceId, termsType));
|
|
92
92
|
|
|
93
93
|
if (!servicesDeclarations[serviceId]) {
|
|
94
94
|
console.log(`⌙ Skip unknown service "${serviceId}"`);
|
|
95
95
|
continue;
|
|
96
96
|
}
|
|
97
97
|
|
|
98
|
-
const
|
|
99
|
-
|
|
98
|
+
const terms = servicesDeclarations[serviceId].getTerms(
|
|
99
|
+
termsType,
|
|
100
100
|
commit.date,
|
|
101
101
|
);
|
|
102
102
|
|
|
103
|
-
if (!
|
|
104
|
-
console.log(`⌙ Skip unknown terms type "${
|
|
103
|
+
if (!terms) {
|
|
104
|
+
console.log(`⌙ Skip unknown terms type "${termsType}" for service "${serviceId}"`);
|
|
105
105
|
continue;
|
|
106
106
|
}
|
|
107
107
|
|
|
108
|
-
if (
|
|
109
|
-
console.log(`⌙ Use declaration valid until ${
|
|
108
|
+
if (terms.validUntil) {
|
|
109
|
+
console.log(`⌙ Use declaration valid until ${terms.validUntil}`);
|
|
110
110
|
}
|
|
111
111
|
|
|
112
112
|
try {
|
|
113
|
-
const
|
|
113
|
+
const versionContent = await extract({
|
|
114
114
|
content,
|
|
115
115
|
mimeType,
|
|
116
|
-
|
|
116
|
+
terms,
|
|
117
117
|
});
|
|
118
118
|
|
|
119
119
|
const { id: versionId } = await recorder.recordVersion({
|
|
120
120
|
serviceId,
|
|
121
|
-
|
|
122
|
-
content:
|
|
123
|
-
mimeType: MARKDOWN_MIME_TYPE, // The result of the `
|
|
121
|
+
termsType,
|
|
122
|
+
content: versionContent,
|
|
123
|
+
mimeType: MARKDOWN_MIME_TYPE, // The result of the `extract` function is always in markdown format
|
|
124
124
|
fetchDate: commit.date,
|
|
125
125
|
snapshotId: commit.hash,
|
|
126
126
|
});
|
|
@@ -8,7 +8,7 @@ You can use it in your other scripts like this:
|
|
|
8
8
|
|
|
9
9
|
```
|
|
10
10
|
await renamer.loadRules();
|
|
11
|
-
const { serviceId: renamedServiceId,
|
|
11
|
+
const { serviceId: renamedServiceId, termsType: renamedDocumentType } = renamer.applyRules(serviceId, termsType);
|
|
12
12
|
```
|
|
13
13
|
|
|
14
14
|
## Adding renaming rules
|
|
@@ -26,7 +26,7 @@ To rename a service, add a rule in `./rules/services.json`, for example, to rena
|
|
|
26
26
|
|
|
27
27
|
### Terms type
|
|
28
28
|
|
|
29
|
-
To rename a terms type, add a rule in `./rules/
|
|
29
|
+
To rename a terms type, add a rule in `./rules/termsTypes.json`, for example, to rename "Program Policies" to "Acceptable Use Policy", add the following line in the file:
|
|
30
30
|
|
|
31
31
|
```json
|
|
32
32
|
{
|
|
@@ -37,7 +37,7 @@ To rename a terms type, add a rule in `./rules/documentTypes.json`, for example,
|
|
|
37
37
|
|
|
38
38
|
### Terms type for a specific service
|
|
39
39
|
|
|
40
|
-
To rename a terms type only for a specific service, add a rule in `./rules/
|
|
40
|
+
To rename a terms type only for a specific service, add a rule in `./rules/termsTypesByService.json`, for example, to rename "Program Policies" to "Acceptable Use Policy" only for Skype, add the following line in the file:
|
|
41
41
|
|
|
42
42
|
```json
|
|
43
43
|
{
|
|
@@ -10,12 +10,12 @@ let renamingRules;
|
|
|
10
10
|
export async function loadRules() {
|
|
11
11
|
renamingRules = {
|
|
12
12
|
serviceNames: JSON.parse(await fs.readFile(path.resolve(__dirname, './rules/serviceNames.json'))),
|
|
13
|
-
|
|
14
|
-
|
|
13
|
+
termsTypes: JSON.parse(await fs.readFile(path.resolve(__dirname, './rules/termsTypes.json'))),
|
|
14
|
+
termsTypesByService: JSON.parse(await fs.readFile(path.resolve(__dirname, './rules/termsTypesByService.json'))),
|
|
15
15
|
};
|
|
16
16
|
}
|
|
17
17
|
|
|
18
|
-
export function applyRules(serviceId,
|
|
18
|
+
export function applyRules(serviceId, termsType) {
|
|
19
19
|
const renamedServiceId = renamingRules.serviceNames[serviceId];
|
|
20
20
|
|
|
21
21
|
if (renamedServiceId) {
|
|
@@ -23,23 +23,23 @@ export function applyRules(serviceId, documentType) {
|
|
|
23
23
|
serviceId = renamedServiceId;
|
|
24
24
|
}
|
|
25
25
|
|
|
26
|
-
const
|
|
26
|
+
const renamedTermsType = renamingRules.termsTypes[termsType];
|
|
27
27
|
|
|
28
|
-
if (
|
|
29
|
-
console.log(`⌙ Rename terms type "${
|
|
30
|
-
|
|
28
|
+
if (renamedTermsType) {
|
|
29
|
+
console.log(`⌙ Rename terms type "${termsType}" to "${renamedTermsType}" of "${serviceId}" service`);
|
|
30
|
+
termsType = renamedTermsType;
|
|
31
31
|
}
|
|
32
32
|
|
|
33
|
-
const
|
|
34
|
-
&& renamingRules.
|
|
33
|
+
const renamedServiceTermsType = renamingRules.termsTypesByService[serviceId]
|
|
34
|
+
&& renamingRules.termsTypesByService[serviceId][termsType];
|
|
35
35
|
|
|
36
|
-
if (
|
|
37
|
-
console.log(`⌙ Specific rename terms type "${
|
|
38
|
-
|
|
36
|
+
if (renamedServiceTermsType) {
|
|
37
|
+
console.log(`⌙ Specific rename terms type "${termsType}" to "${renamedServiceTermsType}" of "${serviceId}" service`);
|
|
38
|
+
termsType = renamedServiceTermsType;
|
|
39
39
|
}
|
|
40
40
|
|
|
41
41
|
return {
|
|
42
42
|
serviceId,
|
|
43
|
-
|
|
43
|
+
termsType,
|
|
44
44
|
};
|
|
45
45
|
}
|
package/src/archivist/errors.js
CHANGED
|
@@ -3,7 +3,7 @@ export class InaccessibleContentError extends Error {
|
|
|
3
3
|
if (Array.isArray(message)) {
|
|
4
4
|
message = `\n - ${message.join('\n - ')}`;
|
|
5
5
|
}
|
|
6
|
-
super(`The
|
|
6
|
+
super(`The documents cannot be accessed or their contents can not be selected:${message}`);
|
|
7
7
|
this.name = 'InaccessibleContentError';
|
|
8
8
|
}
|
|
9
9
|
}
|
|
@@ -5,6 +5,7 @@ import mardownPdf from '@accordproject/markdown-pdf';
|
|
|
5
5
|
import TurndownService from '@opentermsarchive/turndown';
|
|
6
6
|
import turndownPluginGithubFlavouredMarkdown from 'joplin-turndown-plugin-gfm';
|
|
7
7
|
import jsdom from 'jsdom';
|
|
8
|
+
import mime from 'mime';
|
|
8
9
|
|
|
9
10
|
import { InaccessibleContentError } from '../errors.js';
|
|
10
11
|
|
|
@@ -21,32 +22,27 @@ const { CiceroMarkTransformer } = ciceroMark;
|
|
|
21
22
|
const ciceroMarkTransformer = new CiceroMarkTransformer();
|
|
22
23
|
|
|
23
24
|
/**
|
|
24
|
-
*
|
|
25
|
+
* Extract content from source document and convert it to Markdown
|
|
25
26
|
*
|
|
26
|
-
* @param {
|
|
27
|
-
* @
|
|
28
|
-
* @param {string} params.mimeType - MIME type of the given content
|
|
29
|
-
* @param {string} params.pageDeclaration - see {@link ./src/archivist/services/pageDeclaration.js}
|
|
30
|
-
* @returns {Promise<string>} Promise which is fulfilled once the content is filtered and converted in Markdown. The promise will resolve into a string containing the filtered content in Markdown format
|
|
27
|
+
* @param {string} sourceDocument - Source document from which to extract content, see {@link ./src/archivist/services/sourceDocument.js}
|
|
28
|
+
* @returns {Promise<string>} Promise which is fulfilled once the content is extracted and converted in Markdown. The promise will resolve into a string containing the extracted content in Markdown format
|
|
31
29
|
*/
|
|
32
|
-
export default async function
|
|
33
|
-
if (mimeType == '
|
|
34
|
-
return
|
|
30
|
+
export default async function extract(sourceDocument) {
|
|
31
|
+
if (sourceDocument.mimeType == mime.getType('pdf')) {
|
|
32
|
+
return extractFromPDF(sourceDocument);
|
|
35
33
|
}
|
|
36
34
|
|
|
37
|
-
return
|
|
38
|
-
content,
|
|
39
|
-
pageDeclaration,
|
|
40
|
-
});
|
|
35
|
+
return extractFromHTML(sourceDocument);
|
|
41
36
|
}
|
|
42
37
|
|
|
43
|
-
export async function
|
|
38
|
+
export async function extractFromHTML(sourceDocument) {
|
|
44
39
|
const {
|
|
45
40
|
location,
|
|
46
41
|
contentSelectors = [],
|
|
47
|
-
|
|
42
|
+
insignificantContentSelectors = [],
|
|
48
43
|
filters: serviceSpecificFilters = [],
|
|
49
|
-
|
|
44
|
+
content,
|
|
45
|
+
} = sourceDocument;
|
|
50
46
|
|
|
51
47
|
const jsdomInstance = new JSDOM(content, {
|
|
52
48
|
url: location,
|
|
@@ -61,7 +57,7 @@ export async function filterHTML({ content, pageDeclaration }) {
|
|
|
61
57
|
await filterFunction(webPageDOM, {
|
|
62
58
|
fetch: location,
|
|
63
59
|
select: contentSelectors,
|
|
64
|
-
remove:
|
|
60
|
+
remove: insignificantContentSelectors,
|
|
65
61
|
filter: serviceSpecificFilters.map(filter => filter.name),
|
|
66
62
|
});
|
|
67
63
|
/* eslint-enable no-await-in-loop */
|
|
@@ -70,7 +66,7 @@ export async function filterHTML({ content, pageDeclaration }) {
|
|
|
70
66
|
}
|
|
71
67
|
}
|
|
72
68
|
|
|
73
|
-
remove(webPageDOM,
|
|
69
|
+
remove(webPageDOM, insignificantContentSelectors); // remove function works in place
|
|
74
70
|
|
|
75
71
|
const domFragment = select(webPageDOM, contentSelectors);
|
|
76
72
|
|
|
@@ -101,7 +97,7 @@ export async function filterHTML({ content, pageDeclaration }) {
|
|
|
101
97
|
return markdownContent;
|
|
102
98
|
}
|
|
103
99
|
|
|
104
|
-
export async function
|
|
100
|
+
export async function extractFromPDF({ content: pdfBuffer }) {
|
|
105
101
|
try {
|
|
106
102
|
const ciceroMarkdown = await PdfTransformer.toCiceroMark(pdfBuffer);
|
|
107
103
|
|
|
@@ -115,12 +111,12 @@ export async function filterPDF({ content: pdfBuffer }) {
|
|
|
115
111
|
}
|
|
116
112
|
}
|
|
117
113
|
|
|
118
|
-
function selectRange(
|
|
114
|
+
function selectRange(webPageDOM, rangeSelector) {
|
|
119
115
|
const { startBefore, startAfter, endBefore, endAfter } = rangeSelector;
|
|
120
116
|
|
|
121
|
-
const selection =
|
|
122
|
-
const startNode =
|
|
123
|
-
const endNode =
|
|
117
|
+
const selection = webPageDOM.createRange();
|
|
118
|
+
const startNode = webPageDOM.querySelector(startBefore || startAfter);
|
|
119
|
+
const endNode = webPageDOM.querySelector(endBefore || endAfter);
|
|
124
120
|
|
|
125
121
|
if (!startNode) {
|
|
126
122
|
throw new InaccessibleContentError(`The "start" selector has no match in document in: ${JSON.stringify(rangeSelector)}`);
|
|
@@ -136,18 +132,18 @@ function selectRange(document, rangeSelector) {
|
|
|
136
132
|
return selection;
|
|
137
133
|
}
|
|
138
134
|
|
|
139
|
-
export function convertRelativeURLsToAbsolute(
|
|
140
|
-
Array.from(
|
|
135
|
+
export function convertRelativeURLsToAbsolute(webPageDOM, baseURL) {
|
|
136
|
+
Array.from(webPageDOM.querySelectorAll(LINKS_TO_CONVERT_SELECTOR)).forEach(link => {
|
|
141
137
|
link.href = url.resolve(baseURL, link.href);
|
|
142
138
|
});
|
|
143
139
|
}
|
|
144
140
|
|
|
145
141
|
// Works in place
|
|
146
|
-
function remove(webPageDOM,
|
|
142
|
+
function remove(webPageDOM, insignificantContentSelectors) {
|
|
147
143
|
const rangeSelections = [];
|
|
148
144
|
const nodes = [];
|
|
149
145
|
|
|
150
|
-
[].concat(
|
|
146
|
+
[].concat(insignificantContentSelectors).forEach(selector => {
|
|
151
147
|
if (typeof selector === 'object') {
|
|
152
148
|
rangeSelections.push(selectRange(webPageDOM, selector));
|
|
153
149
|
} else {
|