@opentermsarchive/engine 0.26.1 → 0.27.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -3
- package/bin/ota-track.js +3 -3
- package/bin/ota-validate.js +2 -2
- package/bin/ota.js +1 -1
- package/config/default.json +1 -1
- package/package.json +3 -4
- package/scripts/dataset/export/index.js +4 -4
- package/scripts/dataset/export/index.test.js +11 -17
- package/scripts/declarations/lint/index.mocha.js +1 -1
- package/scripts/declarations/utils/index.js +12 -12
- package/scripts/declarations/validate/definitions.js +1 -1
- package/scripts/declarations/validate/index.mocha.js +30 -34
- package/scripts/declarations/validate/service.history.schema.js +11 -11
- package/scripts/declarations/validate/service.schema.js +13 -13
- package/scripts/history/migrate-services.js +4 -4
- package/scripts/history/update-to-full-hash.js +2 -2
- package/scripts/import/index.js +14 -14
- package/scripts/rewrite/rewrite-snapshots.js +3 -3
- package/scripts/rewrite/rewrite-versions.js +14 -14
- package/scripts/utils/renamer/README.md +3 -3
- package/scripts/utils/renamer/index.js +13 -13
- package/src/archivist/errors.js +1 -1
- package/src/archivist/extract/exports.js +3 -0
- package/src/archivist/{filter → extract}/index.js +23 -27
- package/src/archivist/extract/index.test.js +516 -0
- package/src/archivist/index.js +101 -140
- package/src/archivist/index.test.js +178 -166
- package/src/archivist/recorder/index.js +11 -55
- package/src/archivist/recorder/index.test.js +310 -356
- package/src/archivist/recorder/record.js +18 -7
- package/src/archivist/recorder/repositories/git/dataMapper.js +41 -31
- package/src/archivist/recorder/repositories/git/index.js +11 -15
- package/src/archivist/recorder/repositories/git/index.test.js +1058 -463
- package/src/archivist/recorder/repositories/interface.js +8 -6
- package/src/archivist/recorder/repositories/mongo/dataMapper.js +21 -14
- package/src/archivist/recorder/repositories/mongo/index.js +8 -8
- package/src/archivist/recorder/repositories/mongo/index.test.js +898 -479
- package/src/archivist/recorder/snapshot.js +5 -0
- package/src/archivist/recorder/snapshot.test.js +65 -0
- package/src/archivist/recorder/version.js +14 -0
- package/src/archivist/recorder/version.test.js +65 -0
- package/src/archivist/services/index.js +60 -51
- package/src/archivist/services/index.test.js +63 -83
- package/src/archivist/services/service.js +26 -22
- package/src/archivist/services/service.test.js +46 -68
- package/src/archivist/services/{pageDeclaration.js → sourceDocument.js} +11 -9
- package/src/archivist/services/{pageDeclaration.test.js → sourceDocument.test.js} +21 -21
- package/src/archivist/services/terms.js +26 -0
- package/src/archivist/services/{documentDeclaration.test.js → terms.test.js} +15 -15
- package/src/exports.js +2 -2
- package/src/index.js +16 -13
- package/src/logger/index.js +35 -36
- package/src/notifier/index.js +8 -8
- package/src/tracker/index.js +6 -6
- package/src/archivist/filter/exports.js +0 -3
- package/src/archivist/filter/index.test.js +0 -564
- package/src/archivist/recorder/record.test.js +0 -91
- package/src/archivist/services/documentDeclaration.js +0 -26
- /package/scripts/utils/renamer/rules/{documentTypes.json → termsTypes.json} +0 -0
- /package/scripts/utils/renamer/rules/{documentTypesByService.json → termsTypesByService.json} +0 -0
package/README.md
CHANGED
|
@@ -1,8 +1,6 @@
|
|
|
1
|
-
_The document you are reading now is targeted at developers wanting to use or contribute to the engine of [Open Terms Archive](https://opentermsarchive.org). For a high-level overview of Open Terms Archive’s wider goals and processes, please read its [public homepage](https://opentermsarchive.org)._
|
|
2
|
-
|
|
3
1
|
# Open Terms Archive Engine
|
|
4
2
|
|
|
5
|
-
This codebase is a Node.js module enabling downloading, archiving and publishing versions of documents obtained online. It can be used independently from the Open Terms Archive ecosystem.
|
|
3
|
+
This codebase is a Node.js module enabling downloading, archiving and publishing versions of documents obtained online. It can be used independently from the Open Terms Archive ecosystem. For a high-level overview of Open Terms Archive’s wider goals and processes, please read its [public homepage](https://opentermsarchive.org).
|
|
6
4
|
|
|
7
5
|
For documentation, visit [docs.opentermsarchive.org](https://docs.opentermsarchive.org/)
|
|
8
6
|
|
package/bin/ota-track.js
CHANGED
|
@@ -14,8 +14,8 @@ program
|
|
|
14
14
|
.name('ota track')
|
|
15
15
|
.description('Retrieve declared documents, record snapshots, extract versions and publish the resulting records')
|
|
16
16
|
.option('-s, --services [serviceId...]', 'service IDs of services to track')
|
|
17
|
-
.option('-t, --
|
|
18
|
-
.option('-
|
|
19
|
-
.option('--schedule', '
|
|
17
|
+
.option('-t, --types [termsType...]', 'terms types to track')
|
|
18
|
+
.option('-e, --extract-only', 'extract versions from existing snapshots with latest declarations and engine, without recording new snapshots')
|
|
19
|
+
.option('--schedule', 'track automatically at a regular interval');
|
|
20
20
|
|
|
21
21
|
track(program.parse(process.argv).opts());
|
package/bin/ota-validate.js
CHANGED
|
@@ -20,9 +20,9 @@ process.on('unhandledRejection', reason => {
|
|
|
20
20
|
|
|
21
21
|
program
|
|
22
22
|
.name('ota validate')
|
|
23
|
-
.description('Run a series of tests to check the validity of
|
|
23
|
+
.description('Run a series of tests to check the validity of terms declarations')
|
|
24
24
|
.option('-s, --services [serviceId...]', 'service IDs of services to validate')
|
|
25
|
-
.option('-t, --
|
|
25
|
+
.option('-t, --types [termsType...]', 'terms types to validate')
|
|
26
26
|
.option('-m, --modified', 'target only services modified in the current git branch')
|
|
27
27
|
.option('-o, --schema-only', 'much faster check of declarations, but does not check that the documents are actually accessible');
|
|
28
28
|
|
package/bin/ota.js
CHANGED
|
@@ -11,7 +11,7 @@ program
|
|
|
11
11
|
.description(description)
|
|
12
12
|
.version(version)
|
|
13
13
|
.command('track', 'Track the current terms of services according to provided declarations')
|
|
14
|
-
.command('validate', 'Run a series of tests to check the validity of
|
|
14
|
+
.command('validate', 'Run a series of tests to check the validity of terms declarations')
|
|
15
15
|
.command('lint', 'Check format and stylistic errors in declarations and auto fix them')
|
|
16
16
|
.command('dataset', 'Export the versions dataset into a ZIP file and optionally publish it to GitHub releases')
|
|
17
17
|
.parse(process.argv);
|
package/config/default.json
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@opentermsarchive/engine",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.27.1",
|
|
4
4
|
"description": "Tracks and makes visible changes to the terms of online services",
|
|
5
5
|
"homepage": "https://github.com/OpenTermsArchive/engine#readme",
|
|
6
6
|
"bugs": {
|
|
@@ -16,8 +16,8 @@
|
|
|
16
16
|
"exports": {
|
|
17
17
|
".": "./src/exports.js",
|
|
18
18
|
"./fetch": "./src/archivist/fetcher/exports.js",
|
|
19
|
-
"./
|
|
20
|
-
"./
|
|
19
|
+
"./extract": "./src/archivist/extract/exports.js",
|
|
20
|
+
"./sourceDocument": "./src/archivist/services/sourceDocument.js"
|
|
21
21
|
},
|
|
22
22
|
"bin": {
|
|
23
23
|
"ota": "./bin/ota.js"
|
|
@@ -38,7 +38,6 @@
|
|
|
38
38
|
"declarations:validate:schema": "npm run declarations:validate -- --schema-only",
|
|
39
39
|
"lint": "eslint src test scripts bin",
|
|
40
40
|
"lint:fix": "npm run lint -- --fix",
|
|
41
|
-
"refilter": "npm start -- --refilter-only",
|
|
42
41
|
"start": "node --max-http-header-size=32768 bin/ota.js track",
|
|
43
42
|
"start:scheduler": "npm start -- --schedule",
|
|
44
43
|
"test": "cross-env NODE_ENV=test mocha --recursive \"./src/**/*.test.js\" \"./scripts/**/*.test.js\" --exit",
|
|
@@ -31,7 +31,7 @@ export default async function generate({ archivePath, releaseDate }) {
|
|
|
31
31
|
|
|
32
32
|
for await (const version of versionsRepository.iterate()) {
|
|
33
33
|
const { content, fetchDate } = version;
|
|
34
|
-
const { serviceId,
|
|
34
|
+
const { serviceId, termsType } = renamer.applyRules(version.serviceId, version.termsType);
|
|
35
35
|
|
|
36
36
|
if (firstVersionDate > fetchDate) {
|
|
37
37
|
firstVersionDate = fetchDate;
|
|
@@ -43,7 +43,7 @@ export default async function generate({ archivePath, releaseDate }) {
|
|
|
43
43
|
|
|
44
44
|
services.add(serviceId);
|
|
45
45
|
|
|
46
|
-
const versionPath = generateVersionPath({ serviceId,
|
|
46
|
+
const versionPath = generateVersionPath({ serviceId, termsType, fetchDate });
|
|
47
47
|
|
|
48
48
|
logger.info({ message: versionPath, counter: index, hash: version.id });
|
|
49
49
|
|
|
@@ -97,10 +97,10 @@ async function initializeArchive(targetPath) {
|
|
|
97
97
|
return { basename, stream, done };
|
|
98
98
|
}
|
|
99
99
|
|
|
100
|
-
function generateVersionPath({ serviceId,
|
|
100
|
+
function generateVersionPath({ serviceId, termsType, fetchDate }) {
|
|
101
101
|
const fsCompliantDate = fetchDate.toISOString()
|
|
102
102
|
.replace(/\.\d{3}/, '') // remove milliseconds
|
|
103
103
|
.replace(/:|\./g, '-'); // replace `:` and `.` by `-` to be compliant with the file system
|
|
104
104
|
|
|
105
|
-
return `${serviceId}/${
|
|
105
|
+
return `${serviceId}/${termsType}/${fsCompliantDate}.md`;
|
|
106
106
|
}
|
|
@@ -8,8 +8,8 @@ import dircompare from 'dir-compare';
|
|
|
8
8
|
import mime from 'mime';
|
|
9
9
|
import StreamZip from 'node-stream-zip';
|
|
10
10
|
|
|
11
|
-
import Record from '../../../src/archivist/recorder/record.js';
|
|
12
11
|
import GitRepository from '../../../src/archivist/recorder/repositories/git/index.js';
|
|
12
|
+
import Version from '../../../src/archivist/recorder/version.js';
|
|
13
13
|
|
|
14
14
|
import generateArchive from './index.js';
|
|
15
15
|
|
|
@@ -20,8 +20,8 @@ const { expect } = chai;
|
|
|
20
20
|
const FIRST_SERVICE_PROVIDER_ID = 'ServiceA';
|
|
21
21
|
const SECOND_SERVICE_PROVIDER_ID = 'ServiceB';
|
|
22
22
|
|
|
23
|
-
const
|
|
24
|
-
const
|
|
23
|
+
const FIRST_TERMS_TYPE = 'Terms of Service';
|
|
24
|
+
const SECOND_TERMS_TYPE = 'Privacy Policy';
|
|
25
25
|
|
|
26
26
|
const FIRST_FETCH_DATE = '2021-01-01T11:27:00.000Z';
|
|
27
27
|
const SECOND_FETCH_DATE = '2021-01-11T11:32:47.000Z';
|
|
@@ -31,8 +31,6 @@ const FOURTH_FETCH_DATE = '2022-01-01T12:12:24.000Z';
|
|
|
31
31
|
const FIRST_CONTENT = 'First Content';
|
|
32
32
|
const SECOND_CONTENT = 'Second Content';
|
|
33
33
|
|
|
34
|
-
const MIME_TYPE = 'text/markdown';
|
|
35
|
-
|
|
36
34
|
const SNAPSHOT_ID = '721ce4a63ad399ecbdb548a66d6d327e7bc97876';
|
|
37
35
|
|
|
38
36
|
const RELEASE_DATE = '2022-01-01T18:21:00.000Z';
|
|
@@ -56,38 +54,34 @@ describe('Export', () => {
|
|
|
56
54
|
|
|
57
55
|
await repository.initialize();
|
|
58
56
|
|
|
59
|
-
await repository.save(new
|
|
57
|
+
await repository.save(new Version({
|
|
60
58
|
serviceId: FIRST_SERVICE_PROVIDER_ID,
|
|
61
|
-
|
|
59
|
+
termsType: FIRST_TERMS_TYPE,
|
|
62
60
|
content: FIRST_CONTENT,
|
|
63
|
-
mimeType: MIME_TYPE,
|
|
64
61
|
fetchDate: FIRST_FETCH_DATE,
|
|
65
62
|
snapshotId: SNAPSHOT_ID,
|
|
66
63
|
}));
|
|
67
64
|
|
|
68
|
-
await repository.save(new
|
|
65
|
+
await repository.save(new Version({
|
|
69
66
|
serviceId: FIRST_SERVICE_PROVIDER_ID,
|
|
70
|
-
|
|
67
|
+
termsType: FIRST_TERMS_TYPE,
|
|
71
68
|
content: SECOND_CONTENT,
|
|
72
|
-
mimeType: MIME_TYPE,
|
|
73
69
|
fetchDate: SECOND_FETCH_DATE,
|
|
74
70
|
snapshotId: SNAPSHOT_ID,
|
|
75
71
|
}));
|
|
76
72
|
|
|
77
|
-
await repository.save(new
|
|
73
|
+
await repository.save(new Version({
|
|
78
74
|
serviceId: SECOND_SERVICE_PROVIDER_ID,
|
|
79
|
-
|
|
75
|
+
termsType: FIRST_TERMS_TYPE,
|
|
80
76
|
content: FIRST_CONTENT,
|
|
81
|
-
mimeType: MIME_TYPE,
|
|
82
77
|
fetchDate: THIRD_FETCH_DATE,
|
|
83
78
|
snapshotId: SNAPSHOT_ID,
|
|
84
79
|
}));
|
|
85
80
|
|
|
86
|
-
await repository.save(new
|
|
81
|
+
await repository.save(new Version({
|
|
87
82
|
serviceId: SECOND_SERVICE_PROVIDER_ID,
|
|
88
|
-
|
|
83
|
+
termsType: SECOND_TERMS_TYPE,
|
|
89
84
|
content: FIRST_CONTENT,
|
|
90
|
-
mimeType: MIME_TYPE,
|
|
91
85
|
fetchDate: FOURTH_FETCH_DATE,
|
|
92
86
|
snapshotId: SNAPSHOT_ID,
|
|
93
87
|
}));
|
|
@@ -31,7 +31,7 @@ export default async options => {
|
|
|
31
31
|
if (options.modified) {
|
|
32
32
|
const declarationUtils = new DeclarationUtils(instancePath);
|
|
33
33
|
|
|
34
|
-
({ services: servicesToValidate } = await declarationUtils.
|
|
34
|
+
({ services: servicesToValidate } = await declarationUtils.getModifiedServiceTermsTypes());
|
|
35
35
|
}
|
|
36
36
|
|
|
37
37
|
const lintFile = lintAndFixFile(options.fix);
|
|
@@ -24,27 +24,27 @@ export default class DeclarationUtils {
|
|
|
24
24
|
|
|
25
25
|
const modifiedFilePaths = modifiedFilePathsAsString ? modifiedFilePathsAsString.split('\n') : [];
|
|
26
26
|
|
|
27
|
-
return { modifiedFilePaths,
|
|
27
|
+
return { modifiedFilePaths, modifiedServicesIds: Array.from(new Set(modifiedFilePaths.map(DeclarationUtils.filePathToServiceId))) };
|
|
28
28
|
}
|
|
29
29
|
|
|
30
30
|
async getModifiedServices() {
|
|
31
|
-
const {
|
|
31
|
+
const { modifiedServicesIds } = await this.getModifiedData();
|
|
32
32
|
|
|
33
|
-
return
|
|
33
|
+
return modifiedServicesIds;
|
|
34
34
|
}
|
|
35
35
|
|
|
36
|
-
async
|
|
37
|
-
const { modifiedFilePaths,
|
|
38
|
-
const
|
|
36
|
+
async getModifiedServiceTermsTypes() {
|
|
37
|
+
const { modifiedFilePaths, modifiedServicesIds } = await this.getModifiedData();
|
|
38
|
+
const servicesTermsTypes = {};
|
|
39
39
|
|
|
40
40
|
await Promise.all(modifiedFilePaths.map(async modifiedFilePath => {
|
|
41
41
|
const serviceId = DeclarationUtils.filePathToServiceId(modifiedFilePath);
|
|
42
42
|
|
|
43
43
|
if (!modifiedFilePath.endsWith('.json')) {
|
|
44
44
|
// Here we should compare AST of both files to detect on which function
|
|
45
|
-
// change has been made, and then find which
|
|
45
|
+
// change has been made, and then find which terms type depends on this
|
|
46
46
|
// function.
|
|
47
|
-
// As this is a complicated process, we will just send back all
|
|
47
|
+
// As this is a complicated process, we will just send back all terms types
|
|
48
48
|
const declaration = await this.getJSONFile(`declarations/${serviceId}.json`, this.defaultBranch);
|
|
49
49
|
|
|
50
50
|
return Object.keys(declaration.documents);
|
|
@@ -60,7 +60,7 @@ export default class DeclarationUtils {
|
|
|
60
60
|
return;
|
|
61
61
|
}
|
|
62
62
|
|
|
63
|
-
const
|
|
63
|
+
const modifiedTermsTypes = diff.reduce((acc, { path }) => {
|
|
64
64
|
if (modifiedFilePath.includes('.history')) {
|
|
65
65
|
acc.add(path[0]);
|
|
66
66
|
} else if (path[0] == 'documents') {
|
|
@@ -70,12 +70,12 @@ export default class DeclarationUtils {
|
|
|
70
70
|
return acc;
|
|
71
71
|
}, new Set());
|
|
72
72
|
|
|
73
|
-
|
|
73
|
+
servicesTermsTypes[serviceId] = Array.from(new Set([ ...servicesTermsTypes[serviceId] || [], ...modifiedTermsTypes ]));
|
|
74
74
|
}));
|
|
75
75
|
|
|
76
76
|
return {
|
|
77
|
-
services:
|
|
78
|
-
|
|
77
|
+
services: modifiedServicesIds,
|
|
78
|
+
servicesTermsTypes,
|
|
79
79
|
};
|
|
80
80
|
}
|
|
81
81
|
}
|
|
@@ -25,7 +25,7 @@ const definitions = {
|
|
|
25
25
|
],
|
|
26
26
|
},
|
|
27
27
|
contentSelectors: { $ref: '#/definitions/selectors' },
|
|
28
|
-
|
|
28
|
+
insignificantContentSelectors: { $ref: '#/definitions/selectors' },
|
|
29
29
|
filters: {
|
|
30
30
|
type: 'array',
|
|
31
31
|
items: {
|
|
@@ -6,8 +6,8 @@ import { expect } from 'chai';
|
|
|
6
6
|
import config from 'config';
|
|
7
7
|
import jsonSourceMap from 'json-source-map';
|
|
8
8
|
|
|
9
|
+
import extract from '../../../src/archivist/extract/index.js';
|
|
9
10
|
import fetch, { launchHeadlessBrowser, stopHeadlessBrowser } from '../../../src/archivist/fetcher/index.js';
|
|
10
|
-
import filter from '../../../src/archivist/filter/index.js';
|
|
11
11
|
import * as services from '../../../src/archivist/services/index.js';
|
|
12
12
|
import DeclarationUtils from '../utils/index.js';
|
|
13
13
|
|
|
@@ -25,8 +25,8 @@ const instancePath = path.resolve(declarationsPath, '../');
|
|
|
25
25
|
export default async options => {
|
|
26
26
|
const schemaOnly = options.schemaOnly || false;
|
|
27
27
|
let servicesToValidate = options.services || [];
|
|
28
|
-
const
|
|
29
|
-
let
|
|
28
|
+
const termsTypes = options.types || [];
|
|
29
|
+
let servicesTermsTypes = {};
|
|
30
30
|
|
|
31
31
|
const serviceDeclarations = await services.loadWithHistory(servicesToValidate);
|
|
32
32
|
|
|
@@ -37,7 +37,7 @@ export default async options => {
|
|
|
37
37
|
if (options.modified) {
|
|
38
38
|
const declarationUtils = new DeclarationUtils(instancePath);
|
|
39
39
|
|
|
40
|
-
({ services: servicesToValidate,
|
|
40
|
+
({ services: servicesToValidate, servicesTermsTypes } = await declarationUtils.getModifiedServiceTermsTypes());
|
|
41
41
|
}
|
|
42
42
|
|
|
43
43
|
describe('Service declarations validation', async function () {
|
|
@@ -76,61 +76,57 @@ export default async options => {
|
|
|
76
76
|
}
|
|
77
77
|
|
|
78
78
|
if (!schemaOnly && service) {
|
|
79
|
-
service.
|
|
80
|
-
.filter(
|
|
81
|
-
if (
|
|
82
|
-
return
|
|
79
|
+
service.getTermsTypes()
|
|
80
|
+
.filter(termsType => {
|
|
81
|
+
if (servicesTermsTypes[serviceId] && servicesTermsTypes[serviceId].length > 0) {
|
|
82
|
+
return servicesTermsTypes[serviceId].includes(termsType);
|
|
83
83
|
}
|
|
84
84
|
|
|
85
|
-
if (
|
|
86
|
-
return
|
|
85
|
+
if (termsTypes.length > 0) {
|
|
86
|
+
return termsTypes.includes(termsType);
|
|
87
87
|
}
|
|
88
88
|
|
|
89
89
|
return true;
|
|
90
90
|
})
|
|
91
91
|
.forEach(type => {
|
|
92
92
|
describe(type, () => {
|
|
93
|
-
const
|
|
93
|
+
const terms = service.getTerms(type);
|
|
94
94
|
|
|
95
|
-
|
|
96
|
-
let content;
|
|
95
|
+
terms.sourceDocuments.forEach(sourceDocument => {
|
|
97
96
|
let filteredContent;
|
|
98
|
-
let mimeType;
|
|
99
97
|
|
|
100
|
-
context(
|
|
98
|
+
context(sourceDocument.location, () => {
|
|
101
99
|
before(async function () {
|
|
102
|
-
if (!
|
|
100
|
+
if (!terms) {
|
|
103
101
|
console.log(' (Tests skipped as declaration has been archived)');
|
|
104
102
|
this.skip();
|
|
105
103
|
}
|
|
106
104
|
});
|
|
107
105
|
|
|
108
106
|
it('fetchable URL', async () => {
|
|
109
|
-
const { location, executeClientScripts } =
|
|
110
|
-
|
|
107
|
+
const { location, executeClientScripts } = sourceDocument;
|
|
108
|
+
|
|
109
|
+
({ content: sourceDocument.content, mimeType: sourceDocument.mimeType } = await fetch({
|
|
111
110
|
url: location,
|
|
112
111
|
executeClientScripts,
|
|
113
|
-
cssSelectors:
|
|
112
|
+
cssSelectors: sourceDocument.cssSelectors,
|
|
114
113
|
config: config.get('fetcher'),
|
|
115
|
-
});
|
|
116
|
-
|
|
117
|
-
content = document.content;
|
|
118
|
-
mimeType = document.mimeType;
|
|
114
|
+
}));
|
|
119
115
|
});
|
|
120
116
|
|
|
121
|
-
it('selector matches an element in the
|
|
122
|
-
if (!content) {
|
|
117
|
+
it('selector matches an element in the source document', async function checkSelector() {
|
|
118
|
+
if (!sourceDocument.content) {
|
|
123
119
|
console.log(' [Tests skipped as URL is not fetchable]');
|
|
124
120
|
this.skip();
|
|
125
121
|
}
|
|
126
122
|
|
|
127
|
-
filteredContent = await
|
|
123
|
+
filteredContent = await extract(sourceDocument);
|
|
128
124
|
|
|
129
125
|
expect(filteredContent).to.not.be.empty;
|
|
130
126
|
});
|
|
131
127
|
|
|
132
128
|
it(`filtered content has at least ${MIN_DOC_LENGTH} characters`, async function checkContentLength() {
|
|
133
|
-
if (!content) {
|
|
129
|
+
if (!sourceDocument.content) {
|
|
134
130
|
console.log(' [Tests skipped as URL is not fetchable]');
|
|
135
131
|
this.skip();
|
|
136
132
|
}
|
|
@@ -146,7 +142,7 @@ export default async options => {
|
|
|
146
142
|
it('content is consistent when fetched and filtered twice in a row', async function checkContentConsistency() {
|
|
147
143
|
this.slow(SLOW_DOCUMENT_THRESHOLD * 2);
|
|
148
144
|
|
|
149
|
-
if (!content) {
|
|
145
|
+
if (!sourceDocument.content) {
|
|
150
146
|
console.log(' [Tests skipped as URL is not fetchable]');
|
|
151
147
|
this.skip();
|
|
152
148
|
}
|
|
@@ -156,13 +152,13 @@ export default async options => {
|
|
|
156
152
|
this.skip();
|
|
157
153
|
}
|
|
158
154
|
|
|
159
|
-
|
|
160
|
-
url:
|
|
161
|
-
executeClientScripts:
|
|
162
|
-
cssSelectors:
|
|
155
|
+
({ content: sourceDocument.content, mimeType: sourceDocument.mimeType } = await fetch({
|
|
156
|
+
url: sourceDocument.location,
|
|
157
|
+
executeClientScripts: sourceDocument.executeClientScripts,
|
|
158
|
+
cssSelectors: sourceDocument.cssSelectors,
|
|
163
159
|
config: config.get('fetcher'),
|
|
164
|
-
});
|
|
165
|
-
const secondFilteredContent = await
|
|
160
|
+
}));
|
|
161
|
+
const secondFilteredContent = await extract(sourceDocument);
|
|
166
162
|
|
|
167
163
|
expect(secondFilteredContent).to.equal(filteredContent);
|
|
168
164
|
});
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
import
|
|
1
|
+
import TERMS_TYPES from '@opentermsarchive/terms-types';
|
|
2
2
|
|
|
3
3
|
import definitions from './definitions.js';
|
|
4
4
|
|
|
5
|
-
const AVAILABLE_TYPES_NAME = Object.keys(
|
|
5
|
+
const AVAILABLE_TYPES_NAME = Object.keys(TERMS_TYPES);
|
|
6
6
|
|
|
7
|
-
const
|
|
7
|
+
const termsProperties = () => {
|
|
8
8
|
const result = {};
|
|
9
9
|
|
|
10
10
|
AVAILABLE_TYPES_NAME.forEach(type => {
|
|
@@ -12,8 +12,8 @@ const documentsProperties = () => {
|
|
|
12
12
|
type: 'array',
|
|
13
13
|
items: {
|
|
14
14
|
oneOf: [
|
|
15
|
-
{ $ref: '#/definitions/
|
|
16
|
-
{ $ref: '#/definitions/
|
|
15
|
+
{ $ref: '#/definitions/singleSourceDocumentTermsHistory' },
|
|
16
|
+
{ $ref: '#/definitions/multipleSourceDocumentsTermsHistory' },
|
|
17
17
|
{ $ref: '#/definitions/pdfDocumentHistory' },
|
|
18
18
|
],
|
|
19
19
|
},
|
|
@@ -27,7 +27,7 @@ const schema = {
|
|
|
27
27
|
type: 'object',
|
|
28
28
|
additionalProperties: false,
|
|
29
29
|
title: 'Service declaration history',
|
|
30
|
-
properties:
|
|
30
|
+
properties: termsProperties(),
|
|
31
31
|
propertyNames: { enum: AVAILABLE_TYPES_NAME },
|
|
32
32
|
definitions: {
|
|
33
33
|
...definitions,
|
|
@@ -40,7 +40,7 @@ const schema = {
|
|
|
40
40
|
validUntil: { $ref: '#/definitions/validUntil' },
|
|
41
41
|
},
|
|
42
42
|
},
|
|
43
|
-
|
|
43
|
+
singleSourceDocumentTermsHistory: {
|
|
44
44
|
type: 'object',
|
|
45
45
|
additionalProperties: false,
|
|
46
46
|
required: [ 'fetch', 'select', 'validUntil' ],
|
|
@@ -48,12 +48,12 @@ const schema = {
|
|
|
48
48
|
fetch: { $ref: '#/definitions/location' },
|
|
49
49
|
select: { $ref: '#/definitions/contentSelectors' },
|
|
50
50
|
filter: { $ref: '#/definitions/filters' },
|
|
51
|
-
remove: { $ref: '#/definitions/
|
|
51
|
+
remove: { $ref: '#/definitions/insignificantContentSelectors' },
|
|
52
52
|
executeClientScripts: { $ref: '#/definitions/executeClientScripts' },
|
|
53
53
|
validUntil: { $ref: '#/definitions/validUntil' },
|
|
54
54
|
},
|
|
55
55
|
},
|
|
56
|
-
|
|
56
|
+
multipleSourceDocumentsTermsHistory: {
|
|
57
57
|
type: 'object',
|
|
58
58
|
additionalProperties: false,
|
|
59
59
|
required: ['combine'],
|
|
@@ -68,14 +68,14 @@ const schema = {
|
|
|
68
68
|
fetch: { $ref: '#/definitions/location' },
|
|
69
69
|
select: { $ref: '#/definitions/contentSelectors' },
|
|
70
70
|
filter: { $ref: '#/definitions/filters' },
|
|
71
|
-
remove: { $ref: '#/definitions/
|
|
71
|
+
remove: { $ref: '#/definitions/insignificantContentSelectors' },
|
|
72
72
|
executeClientScripts: { $ref: '#/definitions/executeClientScripts' },
|
|
73
73
|
},
|
|
74
74
|
},
|
|
75
75
|
},
|
|
76
76
|
select: { $ref: '#/definitions/contentSelectors' },
|
|
77
77
|
filter: { $ref: '#/definitions/filters' },
|
|
78
|
-
remove: { $ref: '#/definitions/
|
|
78
|
+
remove: { $ref: '#/definitions/insignificantContentSelectors' },
|
|
79
79
|
executeClientScripts: { $ref: '#/definitions/executeClientScripts' },
|
|
80
80
|
validUntil: { $ref: '#/definitions/validUntil' },
|
|
81
81
|
},
|
|
@@ -1,17 +1,17 @@
|
|
|
1
|
-
import
|
|
1
|
+
import TERMS_TYPES from '@opentermsarchive/terms-types';
|
|
2
2
|
|
|
3
3
|
import definitions from './definitions.js';
|
|
4
4
|
|
|
5
|
-
const AVAILABLE_TYPES_NAME = Object.keys(
|
|
5
|
+
const AVAILABLE_TYPES_NAME = Object.keys(TERMS_TYPES);
|
|
6
6
|
|
|
7
|
-
const
|
|
7
|
+
const termsProperties = () => {
|
|
8
8
|
const result = {};
|
|
9
9
|
|
|
10
10
|
AVAILABLE_TYPES_NAME.forEach(type => {
|
|
11
11
|
result[type] = {
|
|
12
12
|
oneOf: [
|
|
13
|
-
{ $ref: '#/definitions/
|
|
14
|
-
{ $ref: '#/definitions/
|
|
13
|
+
{ $ref: '#/definitions/singleSourceDocumentTerms' },
|
|
14
|
+
{ $ref: '#/definitions/multipleSourceDocumentsTerms' },
|
|
15
15
|
{ $ref: '#/definitions/pdfDocument' },
|
|
16
16
|
],
|
|
17
17
|
};
|
|
@@ -33,7 +33,7 @@ const schema = {
|
|
|
33
33
|
},
|
|
34
34
|
documents: {
|
|
35
35
|
type: 'object',
|
|
36
|
-
properties:
|
|
36
|
+
properties: termsProperties(),
|
|
37
37
|
propertyNames: { enum: AVAILABLE_TYPES_NAME },
|
|
38
38
|
},
|
|
39
39
|
importedFrom: {
|
|
@@ -52,7 +52,7 @@ const schema = {
|
|
|
52
52
|
required: ['fetch'],
|
|
53
53
|
properties: { fetch: { $ref: '#/definitions/pdfLocation' } },
|
|
54
54
|
},
|
|
55
|
-
|
|
55
|
+
sourceDocument: {
|
|
56
56
|
type: 'object',
|
|
57
57
|
additionalProperties: false,
|
|
58
58
|
required: ['fetch'],
|
|
@@ -60,28 +60,28 @@ const schema = {
|
|
|
60
60
|
fetch: { $ref: '#/definitions/location' },
|
|
61
61
|
select: { $ref: '#/definitions/contentSelectors' },
|
|
62
62
|
filter: { $ref: '#/definitions/filters' },
|
|
63
|
-
remove: { $ref: '#/definitions/
|
|
63
|
+
remove: { $ref: '#/definitions/insignificantContentSelectors' },
|
|
64
64
|
executeClientScripts: { $ref: '#/definitions/executeClientScripts' },
|
|
65
65
|
},
|
|
66
66
|
},
|
|
67
|
-
|
|
67
|
+
singleSourceDocumentTerms: {
|
|
68
68
|
allOf: [
|
|
69
|
-
{ $ref: '#/definitions/
|
|
69
|
+
{ $ref: '#/definitions/sourceDocument' },
|
|
70
70
|
{ required: [ 'fetch', 'select' ] },
|
|
71
71
|
],
|
|
72
72
|
},
|
|
73
|
-
|
|
73
|
+
multipleSourceDocumentsTerms: {
|
|
74
74
|
type: 'object',
|
|
75
75
|
additionalProperties: false,
|
|
76
76
|
required: ['combine'],
|
|
77
77
|
properties: {
|
|
78
78
|
combine: {
|
|
79
79
|
type: 'array',
|
|
80
|
-
items: { $ref: '#/definitions/
|
|
80
|
+
items: { $ref: '#/definitions/sourceDocument' },
|
|
81
81
|
},
|
|
82
82
|
select: { $ref: '#/definitions/contentSelectors' },
|
|
83
83
|
filter: { $ref: '#/definitions/filters' },
|
|
84
|
-
remove: { $ref: '#/definitions/
|
|
84
|
+
remove: { $ref: '#/definitions/insignificantContentSelectors' },
|
|
85
85
|
executeClientScripts: { $ref: '#/definitions/executeClientScripts' },
|
|
86
86
|
},
|
|
87
87
|
},
|
|
@@ -145,10 +145,10 @@ async function rewriteSnapshots(repository, records, idsMapping, logger) {
|
|
|
145
145
|
idsMapping[record.id] = recordId; // Saves the mapping between the old ID and the new one.
|
|
146
146
|
|
|
147
147
|
if (recordId) {
|
|
148
|
-
logger.info({ message: `Migrated snapshot with new ID: ${recordId}`, serviceId: record.serviceId, type: record.
|
|
148
|
+
logger.info({ message: `Migrated snapshot with new ID: ${recordId}`, serviceId: record.serviceId, type: record.termsType, id: record.id, current: i++, total: records.length });
|
|
149
149
|
counters.migrated++;
|
|
150
150
|
} else {
|
|
151
|
-
logger.info({ message: 'Skipped snapshot', serviceId: record.serviceId, type: record.
|
|
151
|
+
logger.info({ message: 'Skipped snapshot', serviceId: record.serviceId, type: record.termsType, id: record.id, current: i++, total: records.length });
|
|
152
152
|
counters.skipped++;
|
|
153
153
|
}
|
|
154
154
|
}
|
|
@@ -169,10 +169,10 @@ async function rewriteVersions(repository, records, idsMapping, logger) {
|
|
|
169
169
|
const { id: recordId } = await repository.save(record); // eslint-disable-line no-await-in-loop
|
|
170
170
|
|
|
171
171
|
if (recordId) {
|
|
172
|
-
logger.info({ message: `Migrated version with new ID: ${recordId}`, serviceId: record.serviceId, type: record.
|
|
172
|
+
logger.info({ message: `Migrated version with new ID: ${recordId}`, serviceId: record.serviceId, type: record.termsType, id: record.id, current: i++, total: records.length });
|
|
173
173
|
counters.migrated++;
|
|
174
174
|
} else {
|
|
175
|
-
logger.info({ message: 'Skipped version', serviceId: record.serviceId, type: record.
|
|
175
|
+
logger.info({ message: 'Skipped version', serviceId: record.serviceId, type: record.termsType, id: record.id, current: i++, total: records.length });
|
|
176
176
|
counters.skipped++;
|
|
177
177
|
}
|
|
178
178
|
}
|
|
@@ -47,9 +47,9 @@ const ROOT_PATH = path.resolve(__dirname, '../../');
|
|
|
47
47
|
const { id: recordId } = await versionsTargetRepository.save(record);
|
|
48
48
|
|
|
49
49
|
if (!recordId) {
|
|
50
|
-
logger.warn({ message: 'Record skipped', serviceId: record.serviceId, type: record.
|
|
50
|
+
logger.warn({ message: 'Record skipped', serviceId: record.serviceId, type: record.termsType, id: record.id, current, total });
|
|
51
51
|
} else {
|
|
52
|
-
logger.info({ message: `Update short sha ${record.snapshotId} to ${fullSnapshotId}`, serviceId: record.serviceId, type: record.
|
|
52
|
+
logger.info({ message: `Update short sha ${record.snapshotId} to ${fullSnapshotId}`, serviceId: record.serviceId, type: record.termsType, id: record.id, current, total });
|
|
53
53
|
}
|
|
54
54
|
|
|
55
55
|
current++;
|