@opentermsarchive/engine 0.26.1 → 0.27.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/README.md +1 -3
  2. package/bin/ota-track.js +3 -3
  3. package/bin/ota-validate.js +2 -2
  4. package/bin/ota.js +1 -1
  5. package/config/default.json +1 -1
  6. package/package.json +3 -4
  7. package/scripts/dataset/export/index.js +4 -4
  8. package/scripts/dataset/export/index.test.js +11 -17
  9. package/scripts/declarations/lint/index.mocha.js +1 -1
  10. package/scripts/declarations/utils/index.js +12 -12
  11. package/scripts/declarations/validate/definitions.js +1 -1
  12. package/scripts/declarations/validate/index.mocha.js +30 -34
  13. package/scripts/declarations/validate/service.history.schema.js +11 -11
  14. package/scripts/declarations/validate/service.schema.js +13 -13
  15. package/scripts/history/migrate-services.js +4 -4
  16. package/scripts/history/update-to-full-hash.js +2 -2
  17. package/scripts/import/index.js +14 -14
  18. package/scripts/rewrite/rewrite-snapshots.js +3 -3
  19. package/scripts/rewrite/rewrite-versions.js +14 -14
  20. package/scripts/utils/renamer/README.md +3 -3
  21. package/scripts/utils/renamer/index.js +13 -13
  22. package/src/archivist/errors.js +1 -1
  23. package/src/archivist/extract/exports.js +3 -0
  24. package/src/archivist/{filter → extract}/index.js +23 -27
  25. package/src/archivist/extract/index.test.js +516 -0
  26. package/src/archivist/index.js +101 -140
  27. package/src/archivist/index.test.js +178 -166
  28. package/src/archivist/recorder/index.js +11 -55
  29. package/src/archivist/recorder/index.test.js +310 -356
  30. package/src/archivist/recorder/record.js +18 -7
  31. package/src/archivist/recorder/repositories/git/dataMapper.js +41 -31
  32. package/src/archivist/recorder/repositories/git/index.js +11 -15
  33. package/src/archivist/recorder/repositories/git/index.test.js +1058 -463
  34. package/src/archivist/recorder/repositories/interface.js +8 -6
  35. package/src/archivist/recorder/repositories/mongo/dataMapper.js +21 -14
  36. package/src/archivist/recorder/repositories/mongo/index.js +8 -8
  37. package/src/archivist/recorder/repositories/mongo/index.test.js +898 -479
  38. package/src/archivist/recorder/snapshot.js +5 -0
  39. package/src/archivist/recorder/snapshot.test.js +65 -0
  40. package/src/archivist/recorder/version.js +14 -0
  41. package/src/archivist/recorder/version.test.js +65 -0
  42. package/src/archivist/services/index.js +60 -51
  43. package/src/archivist/services/index.test.js +63 -83
  44. package/src/archivist/services/service.js +26 -22
  45. package/src/archivist/services/service.test.js +46 -68
  46. package/src/archivist/services/{pageDeclaration.js → sourceDocument.js} +11 -9
  47. package/src/archivist/services/{pageDeclaration.test.js → sourceDocument.test.js} +21 -21
  48. package/src/archivist/services/terms.js +26 -0
  49. package/src/archivist/services/{documentDeclaration.test.js → terms.test.js} +15 -15
  50. package/src/exports.js +2 -2
  51. package/src/index.js +16 -13
  52. package/src/logger/index.js +35 -36
  53. package/src/notifier/index.js +8 -8
  54. package/src/tracker/index.js +6 -6
  55. package/src/archivist/filter/exports.js +0 -3
  56. package/src/archivist/filter/index.test.js +0 -564
  57. package/src/archivist/recorder/record.test.js +0 -91
  58. package/src/archivist/services/documentDeclaration.js +0 -26
  59. /package/scripts/utils/renamer/rules/{documentTypes.json → termsTypes.json} +0 -0
  60. /package/scripts/utils/renamer/rules/{documentTypesByService.json → termsTypesByService.json} +0 -0
package/README.md CHANGED
@@ -1,8 +1,6 @@
1
- _The document you are reading now is targeted at developers wanting to use or contribute to the engine of [Open Terms Archive](https://opentermsarchive.org). For a high-level overview of Open Terms Archive’s wider goals and processes, please read its [public homepage](https://opentermsarchive.org)._
2
-
3
1
  # Open Terms Archive Engine
4
2
 
5
- This codebase is a Node.js module enabling downloading, archiving and publishing versions of documents obtained online. It can be used independently from the Open Terms Archive ecosystem.
3
+ This codebase is a Node.js module enabling downloading, archiving and publishing versions of documents obtained online. It can be used independently from the Open Terms Archive ecosystem. For a high-level overview of Open Terms Archive’s wider goals and processes, please read its [public homepage](https://opentermsarchive.org).
6
4
 
7
5
  For documentation, visit [docs.opentermsarchive.org](https://docs.opentermsarchive.org/)
8
6
 
package/bin/ota-track.js CHANGED
@@ -14,8 +14,8 @@ program
14
14
  .name('ota track')
15
15
  .description('Retrieve declared documents, record snapshots, extract versions and publish the resulting records')
16
16
  .option('-s, --services [serviceId...]', 'service IDs of services to track')
17
- .option('-t, --terms-types [termsType...]', 'terms types to track')
18
- .option('-r, --refilter-only', 'refilter existing snapshots with latest declarations and engine, without recording new snapshots')
19
- .option('--schedule', 'schedule automatic document tracking');
17
+ .option('-t, --types [termsType...]', 'terms types to track')
18
+ .option('-e, --extract-only', 'extract versions from existing snapshots with latest declarations and engine, without recording new snapshots')
19
+ .option('--schedule', 'track automatically at a regular interval');
20
20
 
21
21
  track(program.parse(process.argv).opts());
@@ -20,9 +20,9 @@ process.on('unhandledRejection', reason => {
20
20
 
21
21
  program
22
22
  .name('ota validate')
23
- .description('Run a series of tests to check the validity of document declarations')
23
+ .description('Run a series of tests to check the validity of terms declarations')
24
24
  .option('-s, --services [serviceId...]', 'service IDs of services to validate')
25
- .option('-t, --terms-types [termsType...]', 'terms types to validate')
25
+ .option('-t, --types [termsType...]', 'terms types to validate')
26
26
  .option('-m, --modified', 'target only services modified in the current git branch')
27
27
  .option('-o, --schema-only', 'much faster check of declarations, but does not check that the documents are actually accessible');
28
28
 
package/bin/ota.js CHANGED
@@ -11,7 +11,7 @@ program
11
11
  .description(description)
12
12
  .version(version)
13
13
  .command('track', 'Track the current terms of services according to provided declarations')
14
- .command('validate', 'Run a series of tests to check the validity of document declarations')
14
+ .command('validate', 'Run a series of tests to check the validity of terms declarations')
15
15
  .command('lint', 'Check format and stylistic errors in declarations and auto fix them')
16
16
  .command('dataset', 'Export the versions dataset into a ZIP file and optionally publish it to GitHub releases')
17
17
  .parse(process.argv);
@@ -60,7 +60,7 @@
60
60
  "label": {
61
61
  "name": "bot-report",
62
62
  "color": "FEF2C0",
63
- "description": "Automatically created by Open Terms Archive when a document cannot be fetched"
63
+ "description": "Automatically created when terms cannot be tracked"
64
64
  }
65
65
  }
66
66
  },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@opentermsarchive/engine",
3
- "version": "0.26.1",
3
+ "version": "0.27.1",
4
4
  "description": "Tracks and makes visible changes to the terms of online services",
5
5
  "homepage": "https://github.com/OpenTermsArchive/engine#readme",
6
6
  "bugs": {
@@ -16,8 +16,8 @@
16
16
  "exports": {
17
17
  ".": "./src/exports.js",
18
18
  "./fetch": "./src/archivist/fetcher/exports.js",
19
- "./filter": "./src/archivist/filter/exports.js",
20
- "./page-declaration": "./src/archivist/services/pageDeclaration.js"
19
+ "./extract": "./src/archivist/extract/exports.js",
20
+ "./sourceDocument": "./src/archivist/services/sourceDocument.js"
21
21
  },
22
22
  "bin": {
23
23
  "ota": "./bin/ota.js"
@@ -38,7 +38,6 @@
38
38
  "declarations:validate:schema": "npm run declarations:validate -- --schema-only",
39
39
  "lint": "eslint src test scripts bin",
40
40
  "lint:fix": "npm run lint -- --fix",
41
- "refilter": "npm start -- --refilter-only",
42
41
  "start": "node --max-http-header-size=32768 bin/ota.js track",
43
42
  "start:scheduler": "npm start -- --schedule",
44
43
  "test": "cross-env NODE_ENV=test mocha --recursive \"./src/**/*.test.js\" \"./scripts/**/*.test.js\" --exit",
@@ -31,7 +31,7 @@ export default async function generate({ archivePath, releaseDate }) {
31
31
 
32
32
  for await (const version of versionsRepository.iterate()) {
33
33
  const { content, fetchDate } = version;
34
- const { serviceId, documentType } = renamer.applyRules(version.serviceId, version.documentType);
34
+ const { serviceId, termsType } = renamer.applyRules(version.serviceId, version.termsType);
35
35
 
36
36
  if (firstVersionDate > fetchDate) {
37
37
  firstVersionDate = fetchDate;
@@ -43,7 +43,7 @@ export default async function generate({ archivePath, releaseDate }) {
43
43
 
44
44
  services.add(serviceId);
45
45
 
46
- const versionPath = generateVersionPath({ serviceId, documentType, fetchDate });
46
+ const versionPath = generateVersionPath({ serviceId, termsType, fetchDate });
47
47
 
48
48
  logger.info({ message: versionPath, counter: index, hash: version.id });
49
49
 
@@ -97,10 +97,10 @@ async function initializeArchive(targetPath) {
97
97
  return { basename, stream, done };
98
98
  }
99
99
 
100
- function generateVersionPath({ serviceId, documentType, fetchDate }) {
100
+ function generateVersionPath({ serviceId, termsType, fetchDate }) {
101
101
  const fsCompliantDate = fetchDate.toISOString()
102
102
  .replace(/\.\d{3}/, '') // remove milliseconds
103
103
  .replace(/:|\./g, '-'); // replace `:` and `.` by `-` to be compliant with the file system
104
104
 
105
- return `${serviceId}/${documentType}/${fsCompliantDate}.md`;
105
+ return `${serviceId}/${termsType}/${fsCompliantDate}.md`;
106
106
  }
@@ -8,8 +8,8 @@ import dircompare from 'dir-compare';
8
8
  import mime from 'mime';
9
9
  import StreamZip from 'node-stream-zip';
10
10
 
11
- import Record from '../../../src/archivist/recorder/record.js';
12
11
  import GitRepository from '../../../src/archivist/recorder/repositories/git/index.js';
12
+ import Version from '../../../src/archivist/recorder/version.js';
13
13
 
14
14
  import generateArchive from './index.js';
15
15
 
@@ -20,8 +20,8 @@ const { expect } = chai;
20
20
  const FIRST_SERVICE_PROVIDER_ID = 'ServiceA';
21
21
  const SECOND_SERVICE_PROVIDER_ID = 'ServiceB';
22
22
 
23
- const FIRST_DOCUMENT_TYPE = 'Terms of Service';
24
- const SECOND_DOCUMENT_TYPE = 'Privacy Policy';
23
+ const FIRST_TERMS_TYPE = 'Terms of Service';
24
+ const SECOND_TERMS_TYPE = 'Privacy Policy';
25
25
 
26
26
  const FIRST_FETCH_DATE = '2021-01-01T11:27:00.000Z';
27
27
  const SECOND_FETCH_DATE = '2021-01-11T11:32:47.000Z';
@@ -31,8 +31,6 @@ const FOURTH_FETCH_DATE = '2022-01-01T12:12:24.000Z';
31
31
  const FIRST_CONTENT = 'First Content';
32
32
  const SECOND_CONTENT = 'Second Content';
33
33
 
34
- const MIME_TYPE = 'text/markdown';
35
-
36
34
  const SNAPSHOT_ID = '721ce4a63ad399ecbdb548a66d6d327e7bc97876';
37
35
 
38
36
  const RELEASE_DATE = '2022-01-01T18:21:00.000Z';
@@ -56,38 +54,34 @@ describe('Export', () => {
56
54
 
57
55
  await repository.initialize();
58
56
 
59
- await repository.save(new Record({
57
+ await repository.save(new Version({
60
58
  serviceId: FIRST_SERVICE_PROVIDER_ID,
61
- documentType: FIRST_DOCUMENT_TYPE,
59
+ termsType: FIRST_TERMS_TYPE,
62
60
  content: FIRST_CONTENT,
63
- mimeType: MIME_TYPE,
64
61
  fetchDate: FIRST_FETCH_DATE,
65
62
  snapshotId: SNAPSHOT_ID,
66
63
  }));
67
64
 
68
- await repository.save(new Record({
65
+ await repository.save(new Version({
69
66
  serviceId: FIRST_SERVICE_PROVIDER_ID,
70
- documentType: FIRST_DOCUMENT_TYPE,
67
+ termsType: FIRST_TERMS_TYPE,
71
68
  content: SECOND_CONTENT,
72
- mimeType: MIME_TYPE,
73
69
  fetchDate: SECOND_FETCH_DATE,
74
70
  snapshotId: SNAPSHOT_ID,
75
71
  }));
76
72
 
77
- await repository.save(new Record({
73
+ await repository.save(new Version({
78
74
  serviceId: SECOND_SERVICE_PROVIDER_ID,
79
- documentType: FIRST_DOCUMENT_TYPE,
75
+ termsType: FIRST_TERMS_TYPE,
80
76
  content: FIRST_CONTENT,
81
- mimeType: MIME_TYPE,
82
77
  fetchDate: THIRD_FETCH_DATE,
83
78
  snapshotId: SNAPSHOT_ID,
84
79
  }));
85
80
 
86
- await repository.save(new Record({
81
+ await repository.save(new Version({
87
82
  serviceId: SECOND_SERVICE_PROVIDER_ID,
88
- documentType: SECOND_DOCUMENT_TYPE,
83
+ termsType: SECOND_TERMS_TYPE,
89
84
  content: FIRST_CONTENT,
90
- mimeType: MIME_TYPE,
91
85
  fetchDate: FOURTH_FETCH_DATE,
92
86
  snapshotId: SNAPSHOT_ID,
93
87
  }));
@@ -31,7 +31,7 @@ export default async options => {
31
31
  if (options.modified) {
32
32
  const declarationUtils = new DeclarationUtils(instancePath);
33
33
 
34
- ({ services: servicesToValidate } = await declarationUtils.getModifiedServiceDocumentTypes());
34
+ ({ services: servicesToValidate } = await declarationUtils.getModifiedServiceTermsTypes());
35
35
  }
36
36
 
37
37
  const lintFile = lintAndFixFile(options.fix);
@@ -24,27 +24,27 @@ export default class DeclarationUtils {
24
24
 
25
25
  const modifiedFilePaths = modifiedFilePathsAsString ? modifiedFilePathsAsString.split('\n') : [];
26
26
 
27
- return { modifiedFilePaths, modifiedServiceIds: Array.from(new Set(modifiedFilePaths.map(DeclarationUtils.filePathToServiceId))) };
27
+ return { modifiedFilePaths, modifiedServicesIds: Array.from(new Set(modifiedFilePaths.map(DeclarationUtils.filePathToServiceId))) };
28
28
  }
29
29
 
30
30
  async getModifiedServices() {
31
- const { modifiedServiceIds } = await this.getModifiedData();
31
+ const { modifiedServicesIds } = await this.getModifiedData();
32
32
 
33
- return modifiedServiceIds;
33
+ return modifiedServicesIds;
34
34
  }
35
35
 
36
- async getModifiedServiceDocumentTypes() {
37
- const { modifiedFilePaths, modifiedServiceIds } = await this.getModifiedData();
38
- const servicesDocumentTypes = {};
36
+ async getModifiedServiceTermsTypes() {
37
+ const { modifiedFilePaths, modifiedServicesIds } = await this.getModifiedData();
38
+ const servicesTermsTypes = {};
39
39
 
40
40
  await Promise.all(modifiedFilePaths.map(async modifiedFilePath => {
41
41
  const serviceId = DeclarationUtils.filePathToServiceId(modifiedFilePath);
42
42
 
43
43
  if (!modifiedFilePath.endsWith('.json')) {
44
44
  // Here we should compare AST of both files to detect on which function
45
- // change has been made, and then find which document type depends on this
45
+ // change has been made, and then find which terms type depends on this
46
46
  // function.
47
- // As this is a complicated process, we will just send back all document types
47
+ // As this is a complicated process, we will just send back all terms types
48
48
  const declaration = await this.getJSONFile(`declarations/${serviceId}.json`, this.defaultBranch);
49
49
 
50
50
  return Object.keys(declaration.documents);
@@ -60,7 +60,7 @@ export default class DeclarationUtils {
60
60
  return;
61
61
  }
62
62
 
63
- const modifiedDocumentTypes = diff.reduce((acc, { path }) => {
63
+ const modifiedTermsTypes = diff.reduce((acc, { path }) => {
64
64
  if (modifiedFilePath.includes('.history')) {
65
65
  acc.add(path[0]);
66
66
  } else if (path[0] == 'documents') {
@@ -70,12 +70,12 @@ export default class DeclarationUtils {
70
70
  return acc;
71
71
  }, new Set());
72
72
 
73
- servicesDocumentTypes[serviceId] = Array.from(new Set([ ...servicesDocumentTypes[serviceId] || [], ...modifiedDocumentTypes ]));
73
+ servicesTermsTypes[serviceId] = Array.from(new Set([ ...servicesTermsTypes[serviceId] || [], ...modifiedTermsTypes ]));
74
74
  }));
75
75
 
76
76
  return {
77
- services: modifiedServiceIds,
78
- servicesDocumentTypes,
77
+ services: modifiedServicesIds,
78
+ servicesTermsTypes,
79
79
  };
80
80
  }
81
81
  }
@@ -25,7 +25,7 @@ const definitions = {
25
25
  ],
26
26
  },
27
27
  contentSelectors: { $ref: '#/definitions/selectors' },
28
- noiseSelectors: { $ref: '#/definitions/selectors' },
28
+ insignificantContentSelectors: { $ref: '#/definitions/selectors' },
29
29
  filters: {
30
30
  type: 'array',
31
31
  items: {
@@ -6,8 +6,8 @@ import { expect } from 'chai';
6
6
  import config from 'config';
7
7
  import jsonSourceMap from 'json-source-map';
8
8
 
9
+ import extract from '../../../src/archivist/extract/index.js';
9
10
  import fetch, { launchHeadlessBrowser, stopHeadlessBrowser } from '../../../src/archivist/fetcher/index.js';
10
- import filter from '../../../src/archivist/filter/index.js';
11
11
  import * as services from '../../../src/archivist/services/index.js';
12
12
  import DeclarationUtils from '../utils/index.js';
13
13
 
@@ -25,8 +25,8 @@ const instancePath = path.resolve(declarationsPath, '../');
25
25
  export default async options => {
26
26
  const schemaOnly = options.schemaOnly || false;
27
27
  let servicesToValidate = options.services || [];
28
- const documentTypes = options.termsTypes || [];
29
- let servicesDocumentTypes = {};
28
+ const termsTypes = options.types || [];
29
+ let servicesTermsTypes = {};
30
30
 
31
31
  const serviceDeclarations = await services.loadWithHistory(servicesToValidate);
32
32
 
@@ -37,7 +37,7 @@ export default async options => {
37
37
  if (options.modified) {
38
38
  const declarationUtils = new DeclarationUtils(instancePath);
39
39
 
40
- ({ services: servicesToValidate, servicesDocumentTypes } = await declarationUtils.getModifiedServiceDocumentTypes());
40
+ ({ services: servicesToValidate, servicesTermsTypes } = await declarationUtils.getModifiedServiceTermsTypes());
41
41
  }
42
42
 
43
43
  describe('Service declarations validation', async function () {
@@ -76,61 +76,57 @@ export default async options => {
76
76
  }
77
77
 
78
78
  if (!schemaOnly && service) {
79
- service.getDocumentTypes()
80
- .filter(documentType => {
81
- if (servicesDocumentTypes[serviceId] && servicesDocumentTypes[serviceId].length > 0) {
82
- return servicesDocumentTypes[serviceId].includes(documentType);
79
+ service.getTermsTypes()
80
+ .filter(termsType => {
81
+ if (servicesTermsTypes[serviceId] && servicesTermsTypes[serviceId].length > 0) {
82
+ return servicesTermsTypes[serviceId].includes(termsType);
83
83
  }
84
84
 
85
- if (documentTypes.length > 0) {
86
- return documentTypes.includes(documentType);
85
+ if (termsTypes.length > 0) {
86
+ return termsTypes.includes(termsType);
87
87
  }
88
88
 
89
89
  return true;
90
90
  })
91
91
  .forEach(type => {
92
92
  describe(type, () => {
93
- const documentDeclaration = service.getDocumentDeclaration(type);
93
+ const terms = service.getTerms(type);
94
94
 
95
- documentDeclaration.pages.forEach(page => {
96
- let content;
95
+ terms.sourceDocuments.forEach(sourceDocument => {
97
96
  let filteredContent;
98
- let mimeType;
99
97
 
100
- context(page.location, () => {
98
+ context(sourceDocument.location, () => {
101
99
  before(async function () {
102
- if (!documentDeclaration) {
100
+ if (!terms) {
103
101
  console.log(' (Tests skipped as declaration has been archived)');
104
102
  this.skip();
105
103
  }
106
104
  });
107
105
 
108
106
  it('fetchable URL', async () => {
109
- const { location, executeClientScripts } = page;
110
- const document = await fetch({
107
+ const { location, executeClientScripts } = sourceDocument;
108
+
109
+ ({ content: sourceDocument.content, mimeType: sourceDocument.mimeType } = await fetch({
111
110
  url: location,
112
111
  executeClientScripts,
113
- cssSelectors: page.cssSelectors,
112
+ cssSelectors: sourceDocument.cssSelectors,
114
113
  config: config.get('fetcher'),
115
- });
116
-
117
- content = document.content;
118
- mimeType = document.mimeType;
114
+ }));
119
115
  });
120
116
 
121
- it('selector matches an element in the web page', async function checkSelector() {
122
- if (!content) {
117
+ it('selector matches an element in the source document', async function checkSelector() {
118
+ if (!sourceDocument.content) {
123
119
  console.log(' [Tests skipped as URL is not fetchable]');
124
120
  this.skip();
125
121
  }
126
122
 
127
- filteredContent = await filter({ content, pageDeclaration: page, mimeType });
123
+ filteredContent = await extract(sourceDocument);
128
124
 
129
125
  expect(filteredContent).to.not.be.empty;
130
126
  });
131
127
 
132
128
  it(`filtered content has at least ${MIN_DOC_LENGTH} characters`, async function checkContentLength() {
133
- if (!content) {
129
+ if (!sourceDocument.content) {
134
130
  console.log(' [Tests skipped as URL is not fetchable]');
135
131
  this.skip();
136
132
  }
@@ -146,7 +142,7 @@ export default async options => {
146
142
  it('content is consistent when fetched and filtered twice in a row', async function checkContentConsistency() {
147
143
  this.slow(SLOW_DOCUMENT_THRESHOLD * 2);
148
144
 
149
- if (!content) {
145
+ if (!sourceDocument.content) {
150
146
  console.log(' [Tests skipped as URL is not fetchable]');
151
147
  this.skip();
152
148
  }
@@ -156,13 +152,13 @@ export default async options => {
156
152
  this.skip();
157
153
  }
158
154
 
159
- const document = await fetch({
160
- url: page.location,
161
- executeClientScripts: page.executeClientScripts,
162
- cssSelectors: page.cssSelectors,
155
+ ({ content: sourceDocument.content, mimeType: sourceDocument.mimeType } = await fetch({
156
+ url: sourceDocument.location,
157
+ executeClientScripts: sourceDocument.executeClientScripts,
158
+ cssSelectors: sourceDocument.cssSelectors,
163
159
  config: config.get('fetcher'),
164
- });
165
- const secondFilteredContent = await filter({ content: document.content, pageDeclaration: page, mimeType: document.mimeType });
160
+ }));
161
+ const secondFilteredContent = await extract(sourceDocument);
166
162
 
167
163
  expect(secondFilteredContent).to.equal(filteredContent);
168
164
  });
@@ -1,10 +1,10 @@
1
- import { DOCUMENT_TYPES } from '../../../src/archivist/services/index.js';
1
+ import TERMS_TYPES from '@opentermsarchive/terms-types';
2
2
 
3
3
  import definitions from './definitions.js';
4
4
 
5
- const AVAILABLE_TYPES_NAME = Object.keys(DOCUMENT_TYPES);
5
+ const AVAILABLE_TYPES_NAME = Object.keys(TERMS_TYPES);
6
6
 
7
- const documentsProperties = () => {
7
+ const termsProperties = () => {
8
8
  const result = {};
9
9
 
10
10
  AVAILABLE_TYPES_NAME.forEach(type => {
@@ -12,8 +12,8 @@ const documentsProperties = () => {
12
12
  type: 'array',
13
13
  items: {
14
14
  oneOf: [
15
- { $ref: '#/definitions/singlePageDocumentHistory' },
16
- { $ref: '#/definitions/multiPageDocumentHistory' },
15
+ { $ref: '#/definitions/singleSourceDocumentTermsHistory' },
16
+ { $ref: '#/definitions/multipleSourceDocumentsTermsHistory' },
17
17
  { $ref: '#/definitions/pdfDocumentHistory' },
18
18
  ],
19
19
  },
@@ -27,7 +27,7 @@ const schema = {
27
27
  type: 'object',
28
28
  additionalProperties: false,
29
29
  title: 'Service declaration history',
30
- properties: documentsProperties(),
30
+ properties: termsProperties(),
31
31
  propertyNames: { enum: AVAILABLE_TYPES_NAME },
32
32
  definitions: {
33
33
  ...definitions,
@@ -40,7 +40,7 @@ const schema = {
40
40
  validUntil: { $ref: '#/definitions/validUntil' },
41
41
  },
42
42
  },
43
- singlePageDocumentHistory: {
43
+ singleSourceDocumentTermsHistory: {
44
44
  type: 'object',
45
45
  additionalProperties: false,
46
46
  required: [ 'fetch', 'select', 'validUntil' ],
@@ -48,12 +48,12 @@ const schema = {
48
48
  fetch: { $ref: '#/definitions/location' },
49
49
  select: { $ref: '#/definitions/contentSelectors' },
50
50
  filter: { $ref: '#/definitions/filters' },
51
- remove: { $ref: '#/definitions/noiseSelectors' },
51
+ remove: { $ref: '#/definitions/insignificantContentSelectors' },
52
52
  executeClientScripts: { $ref: '#/definitions/executeClientScripts' },
53
53
  validUntil: { $ref: '#/definitions/validUntil' },
54
54
  },
55
55
  },
56
- multiPageDocumentHistory: {
56
+ multipleSourceDocumentsTermsHistory: {
57
57
  type: 'object',
58
58
  additionalProperties: false,
59
59
  required: ['combine'],
@@ -68,14 +68,14 @@ const schema = {
68
68
  fetch: { $ref: '#/definitions/location' },
69
69
  select: { $ref: '#/definitions/contentSelectors' },
70
70
  filter: { $ref: '#/definitions/filters' },
71
- remove: { $ref: '#/definitions/noiseSelectors' },
71
+ remove: { $ref: '#/definitions/insignificantContentSelectors' },
72
72
  executeClientScripts: { $ref: '#/definitions/executeClientScripts' },
73
73
  },
74
74
  },
75
75
  },
76
76
  select: { $ref: '#/definitions/contentSelectors' },
77
77
  filter: { $ref: '#/definitions/filters' },
78
- remove: { $ref: '#/definitions/noiseSelectors' },
78
+ remove: { $ref: '#/definitions/insignificantContentSelectors' },
79
79
  executeClientScripts: { $ref: '#/definitions/executeClientScripts' },
80
80
  validUntil: { $ref: '#/definitions/validUntil' },
81
81
  },
@@ -1,17 +1,17 @@
1
- import { DOCUMENT_TYPES } from '../../../src/archivist/services/index.js';
1
+ import TERMS_TYPES from '@opentermsarchive/terms-types';
2
2
 
3
3
  import definitions from './definitions.js';
4
4
 
5
- const AVAILABLE_TYPES_NAME = Object.keys(DOCUMENT_TYPES);
5
+ const AVAILABLE_TYPES_NAME = Object.keys(TERMS_TYPES);
6
6
 
7
- const documentsProperties = () => {
7
+ const termsProperties = () => {
8
8
  const result = {};
9
9
 
10
10
  AVAILABLE_TYPES_NAME.forEach(type => {
11
11
  result[type] = {
12
12
  oneOf: [
13
- { $ref: '#/definitions/singlePageDocument' },
14
- { $ref: '#/definitions/multiPageDocument' },
13
+ { $ref: '#/definitions/singleSourceDocumentTerms' },
14
+ { $ref: '#/definitions/multipleSourceDocumentsTerms' },
15
15
  { $ref: '#/definitions/pdfDocument' },
16
16
  ],
17
17
  };
@@ -33,7 +33,7 @@ const schema = {
33
33
  },
34
34
  documents: {
35
35
  type: 'object',
36
- properties: documentsProperties(),
36
+ properties: termsProperties(),
37
37
  propertyNames: { enum: AVAILABLE_TYPES_NAME },
38
38
  },
39
39
  importedFrom: {
@@ -52,7 +52,7 @@ const schema = {
52
52
  required: ['fetch'],
53
53
  properties: { fetch: { $ref: '#/definitions/pdfLocation' } },
54
54
  },
55
- page: {
55
+ sourceDocument: {
56
56
  type: 'object',
57
57
  additionalProperties: false,
58
58
  required: ['fetch'],
@@ -60,28 +60,28 @@ const schema = {
60
60
  fetch: { $ref: '#/definitions/location' },
61
61
  select: { $ref: '#/definitions/contentSelectors' },
62
62
  filter: { $ref: '#/definitions/filters' },
63
- remove: { $ref: '#/definitions/noiseSelectors' },
63
+ remove: { $ref: '#/definitions/insignificantContentSelectors' },
64
64
  executeClientScripts: { $ref: '#/definitions/executeClientScripts' },
65
65
  },
66
66
  },
67
- singlePageDocument: {
67
+ singleSourceDocumentTerms: {
68
68
  allOf: [
69
- { $ref: '#/definitions/page' },
69
+ { $ref: '#/definitions/sourceDocument' },
70
70
  { required: [ 'fetch', 'select' ] },
71
71
  ],
72
72
  },
73
- multiPageDocument: {
73
+ multipleSourceDocumentsTerms: {
74
74
  type: 'object',
75
75
  additionalProperties: false,
76
76
  required: ['combine'],
77
77
  properties: {
78
78
  combine: {
79
79
  type: 'array',
80
- items: { $ref: '#/definitions/page' },
80
+ items: { $ref: '#/definitions/sourceDocument' },
81
81
  },
82
82
  select: { $ref: '#/definitions/contentSelectors' },
83
83
  filter: { $ref: '#/definitions/filters' },
84
- remove: { $ref: '#/definitions/noiseSelectors' },
84
+ remove: { $ref: '#/definitions/insignificantContentSelectors' },
85
85
  executeClientScripts: { $ref: '#/definitions/executeClientScripts' },
86
86
  },
87
87
  },
@@ -145,10 +145,10 @@ async function rewriteSnapshots(repository, records, idsMapping, logger) {
145
145
  idsMapping[record.id] = recordId; // Saves the mapping between the old ID and the new one.
146
146
 
147
147
  if (recordId) {
148
- logger.info({ message: `Migrated snapshot with new ID: ${recordId}`, serviceId: record.serviceId, type: record.documentType, id: record.id, current: i++, total: records.length });
148
+ logger.info({ message: `Migrated snapshot with new ID: ${recordId}`, serviceId: record.serviceId, type: record.termsType, id: record.id, current: i++, total: records.length });
149
149
  counters.migrated++;
150
150
  } else {
151
- logger.info({ message: 'Skipped snapshot', serviceId: record.serviceId, type: record.documentType, id: record.id, current: i++, total: records.length });
151
+ logger.info({ message: 'Skipped snapshot', serviceId: record.serviceId, type: record.termsType, id: record.id, current: i++, total: records.length });
152
152
  counters.skipped++;
153
153
  }
154
154
  }
@@ -169,10 +169,10 @@ async function rewriteVersions(repository, records, idsMapping, logger) {
169
169
  const { id: recordId } = await repository.save(record); // eslint-disable-line no-await-in-loop
170
170
 
171
171
  if (recordId) {
172
- logger.info({ message: `Migrated version with new ID: ${recordId}`, serviceId: record.serviceId, type: record.documentType, id: record.id, current: i++, total: records.length });
172
+ logger.info({ message: `Migrated version with new ID: ${recordId}`, serviceId: record.serviceId, type: record.termsType, id: record.id, current: i++, total: records.length });
173
173
  counters.migrated++;
174
174
  } else {
175
- logger.info({ message: 'Skipped version', serviceId: record.serviceId, type: record.documentType, id: record.id, current: i++, total: records.length });
175
+ logger.info({ message: 'Skipped version', serviceId: record.serviceId, type: record.termsType, id: record.id, current: i++, total: records.length });
176
176
  counters.skipped++;
177
177
  }
178
178
  }
@@ -47,9 +47,9 @@ const ROOT_PATH = path.resolve(__dirname, '../../');
47
47
  const { id: recordId } = await versionsTargetRepository.save(record);
48
48
 
49
49
  if (!recordId) {
50
- logger.warn({ message: 'Record skipped', serviceId: record.serviceId, type: record.documentType, id: record.id, current, total });
50
+ logger.warn({ message: 'Record skipped', serviceId: record.serviceId, type: record.termsType, id: record.id, current, total });
51
51
  } else {
52
- logger.info({ message: `Update short sha ${record.snapshotId} to ${fullSnapshotId}`, serviceId: record.serviceId, type: record.documentType, id: record.id, current, total });
52
+ logger.info({ message: `Update short sha ${record.snapshotId} to ${fullSnapshotId}`, serviceId: record.serviceId, type: record.termsType, id: record.id, current, total });
53
53
  }
54
54
 
55
55
  current++;