@opentermsarchive/engine 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +3 -0
- package/.eslintrc.yaml +116 -0
- package/.github/workflows/deploy.yml +50 -0
- package/.github/workflows/release.yml +64 -0
- package/.github/workflows/test.yml +77 -0
- package/CHANGELOG.md +14 -0
- package/CODE_OF_CONDUCT.md +128 -0
- package/CONTRIBUTING.md +143 -0
- package/LICENSE +153 -0
- package/MIGRATING.md +42 -0
- package/README.fr.md +110 -0
- package/README.md +438 -0
- package/Vagrantfile +38 -0
- package/ansible.cfg +13 -0
- package/bin/.env.js +1 -0
- package/bin/lint-declarations.js +31 -0
- package/bin/track.js +26 -0
- package/bin/validate-declarations.js +68 -0
- package/config/ci.json +5 -0
- package/config/contrib.json +35 -0
- package/config/dating.json +37 -0
- package/config/default.json +71 -0
- package/config/france.json +40 -0
- package/config/p2b-compliance.json +40 -0
- package/config/pga.json +40 -0
- package/config/production.json +27 -0
- package/config/test.json +49 -0
- package/config/vagrant.json +24 -0
- package/decision-records/0001-service-name-and-id.md +73 -0
- package/decision-records/0002-service-history.md +212 -0
- package/decision-records/0003-snapshots-database.md +123 -0
- package/ops/README.md +280 -0
- package/ops/app.yml +5 -0
- package/ops/infra.yml +6 -0
- package/ops/inventories/dev.yml +7 -0
- package/ops/inventories/production.yml +27 -0
- package/ops/roles/infra/defaults/main.yml +2 -0
- package/ops/roles/infra/files/.gitconfig +3 -0
- package/ops/roles/infra/files/mongod.conf +18 -0
- package/ops/roles/infra/files/ota-bot-key.private_key +26 -0
- package/ops/roles/infra/tasks/main.yml +78 -0
- package/ops/roles/infra/tasks/mongo.yml +40 -0
- package/ops/roles/infra/templates/ssh_config.j2 +5 -0
- package/ops/roles/ota/defaults/main.yml +14 -0
- package/ops/roles/ota/files/.env +21 -0
- package/ops/roles/ota/tasks/database.yml +65 -0
- package/ops/roles/ota/tasks/main.yml +110 -0
- package/ops/site.yml +6 -0
- package/package.json +101 -0
- package/pm2.config.cjs +20 -0
- package/scripts/dataset/README.md +37 -0
- package/scripts/dataset/assets/LICENSE +540 -0
- package/scripts/dataset/assets/README.template.js +65 -0
- package/scripts/dataset/export/index.js +106 -0
- package/scripts/dataset/export/index.test.js +155 -0
- package/scripts/dataset/export/test/fixtures/dataset/LICENSE +540 -0
- package/scripts/dataset/export/test/fixtures/dataset/README.md +40 -0
- package/scripts/dataset/export/test/fixtures/dataset/ServiceA/Terms of Service/2021-01-01T11-27-00Z.md +1 -0
- package/scripts/dataset/export/test/fixtures/dataset/ServiceA/Terms of Service/2021-01-11T11-32-47Z.md +1 -0
- package/scripts/dataset/export/test/fixtures/dataset/ServiceB/Privacy Policy/2022-01-01T12-12-24Z.md +1 -0
- package/scripts/dataset/export/test/fixtures/dataset/ServiceB/Terms of Service/2022-01-06T11-32-47Z.md +1 -0
- package/scripts/dataset/index.js +40 -0
- package/scripts/dataset/logger/index.js +17 -0
- package/scripts/dataset/main.js +25 -0
- package/scripts/dataset/publish/index.js +39 -0
- package/scripts/declarations/lint/index.js +36 -0
- package/scripts/declarations/utils/index.js +81 -0
- package/scripts/declarations/validate/definitions.js +63 -0
- package/scripts/declarations/validate/index.mocha.js +262 -0
- package/scripts/declarations/validate/service.history.schema.js +86 -0
- package/scripts/declarations/validate/service.schema.js +91 -0
- package/scripts/history/logger/index.js +39 -0
- package/scripts/history/migrate-services.js +212 -0
- package/scripts/history/update-to-full-hash.js +61 -0
- package/scripts/history/utils/index.js +23 -0
- package/scripts/import/README.md +59 -0
- package/scripts/import/config/import.json +12 -0
- package/scripts/import/index.js +224 -0
- package/scripts/import/loadCommits.js +66 -0
- package/scripts/import/logger/index.js +43 -0
- package/scripts/rewrite/README.md +131 -0
- package/scripts/rewrite/config/rewrite-snapshots.json +32 -0
- package/scripts/rewrite/config/rewrite-versions.json +32 -0
- package/scripts/rewrite/initializer/files/license +428 -0
- package/scripts/rewrite/initializer/files/readme.md +8 -0
- package/scripts/rewrite/initializer/index.js +44 -0
- package/scripts/rewrite/rewrite-snapshots.js +108 -0
- package/scripts/rewrite/rewrite-versions.js +160 -0
- package/scripts/rewrite/utils.js +33 -0
- package/scripts/utils/renamer/README.md +49 -0
- package/scripts/utils/renamer/index.js +45 -0
- package/scripts/utils/renamer/rules/documentTypes.json +25 -0
- package/scripts/utils/renamer/rules/documentTypesByService.json +170 -0
- package/scripts/utils/renamer/rules/serviceNames.json +92 -0
- package/src/archivist/errors.js +9 -0
- package/src/archivist/fetcher/errors.js +6 -0
- package/src/archivist/fetcher/exports.js +18 -0
- package/src/archivist/fetcher/fullDomFetcher.js +84 -0
- package/src/archivist/fetcher/htmlOnlyFetcher.js +62 -0
- package/src/archivist/fetcher/index.js +35 -0
- package/src/archivist/fetcher/index.test.js +239 -0
- package/src/archivist/filter/exports.js +3 -0
- package/src/archivist/filter/index.js +178 -0
- package/src/archivist/filter/index.test.js +561 -0
- package/src/archivist/index.js +276 -0
- package/src/archivist/index.test.js +600 -0
- package/src/archivist/recorder/index.js +77 -0
- package/src/archivist/recorder/index.test.js +463 -0
- package/src/archivist/recorder/record.js +35 -0
- package/src/archivist/recorder/record.test.js +91 -0
- package/src/archivist/recorder/repositories/factory.js +23 -0
- package/src/archivist/recorder/repositories/git/dataMapper.js +83 -0
- package/src/archivist/recorder/repositories/git/git.js +122 -0
- package/src/archivist/recorder/repositories/git/git.test.js +86 -0
- package/src/archivist/recorder/repositories/git/index.js +182 -0
- package/src/archivist/recorder/repositories/git/index.test.js +714 -0
- package/src/archivist/recorder/repositories/interface.js +108 -0
- package/src/archivist/recorder/repositories/mongo/dataMapper.js +32 -0
- package/src/archivist/recorder/repositories/mongo/index.js +121 -0
- package/src/archivist/recorder/repositories/mongo/index.test.js +721 -0
- package/src/archivist/services/documentDeclaration.js +26 -0
- package/src/archivist/services/documentDeclaration.test.js +85 -0
- package/src/archivist/services/documentTypes.json +386 -0
- package/src/archivist/services/index.js +255 -0
- package/src/archivist/services/index.test.js +327 -0
- package/src/archivist/services/pageDeclaration.js +51 -0
- package/src/archivist/services/pageDeclaration.test.js +224 -0
- package/src/archivist/services/service.js +60 -0
- package/src/archivist/services/service.test.js +164 -0
- package/src/exports.js +3 -0
- package/src/index.js +59 -0
- package/src/logger/README.md +1 -0
- package/src/logger/index.js +131 -0
- package/src/main.js +18 -0
- package/src/notifier/README.md +1 -0
- package/src/notifier/index.js +150 -0
- package/src/tracker/README.md +1 -0
- package/src/tracker/index.js +215 -0
- package/test/fixtures/service_A.js +22 -0
- package/test/fixtures/service_A_terms.md +10 -0
- package/test/fixtures/service_A_terms_snapshot.html +14 -0
- package/test/fixtures/service_B.js +22 -0
- package/test/fixtures/service_with_declaration_history.js +65 -0
- package/test/fixtures/service_with_filters_history.js +155 -0
- package/test/fixtures/service_with_history.js +188 -0
- package/test/fixtures/service_with_multipage_document.js +100 -0
- package/test/fixtures/service_without_history.js +31 -0
- package/test/fixtures/services.js +19 -0
- package/test/fixtures/terms.pdf +0 -0
- package/test/fixtures/termsFromPDF.md +25 -0
- package/test/fixtures/termsModified.pdf +0 -0
- package/test/services/service_A.json +9 -0
- package/test/services/service_B.json +9 -0
- package/test/services/service_with_declaration_history.filters.js +7 -0
- package/test/services/service_with_declaration_history.history.json +17 -0
- package/test/services/service_with_declaration_history.json +13 -0
- package/test/services/service_with_filters_history.filters.history.js +29 -0
- package/test/services/service_with_filters_history.filters.js +7 -0
- package/test/services/service_with_filters_history.json +13 -0
- package/test/services/service_with_history.filters.history.js +29 -0
- package/test/services/service_with_history.filters.js +7 -0
- package/test/services/service_with_history.history.json +26 -0
- package/test/services/service_with_history.json +17 -0
- package/test/services/service_with_multipage_document.filters.js +7 -0
- package/test/services/service_with_multipage_document.history.json +37 -0
- package/test/services/service_with_multipage_document.json +28 -0
- package/test/services/service_without_history.filters.js +7 -0
- package/test/services/service_without_history.json +13 -0
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
import fsApi from 'fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import { fileURLToPath, pathToFileURL } from 'url';
|
|
4
|
+
|
|
5
|
+
import config from 'config';
|
|
6
|
+
|
|
7
|
+
import DocumentDeclaration from './documentDeclaration.js';
|
|
8
|
+
import PageDeclaration from './pageDeclaration.js';
|
|
9
|
+
import Service from './service.js';
|
|
10
|
+
|
|
11
|
+
const fs = fsApi.promises;
|
|
12
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
13
|
+
const declarationsPath = path.resolve(__dirname, '../../..', config.get('services.declarationsPath'));
|
|
14
|
+
|
|
15
|
+
export const DOCUMENT_TYPES = JSON.parse(fsApi.readFileSync(path.resolve(__dirname, './documentTypes.json')));
|
|
16
|
+
|
|
17
|
+
export async function load(servicesIdsToLoad = []) {
|
|
18
|
+
let servicesIds = await getDeclaredServicesIds();
|
|
19
|
+
|
|
20
|
+
if (servicesIdsToLoad.length) {
|
|
21
|
+
servicesIds = servicesIds.filter(serviceId => serviceId.match(new RegExp(`^${servicesIdsToLoad.join('|')}$`, 'g')));
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
const services = {};
|
|
25
|
+
|
|
26
|
+
await Promise.all(servicesIds.map(async serviceId => {
|
|
27
|
+
const { name, documents: documentsDeclaration } = await loadServiceDeclaration(serviceId);
|
|
28
|
+
|
|
29
|
+
const service = new Service({ id: serviceId, name });
|
|
30
|
+
|
|
31
|
+
await Promise.all(Object.keys(documentsDeclaration).map(documentType => loadServiceDocument(service, documentType, documentsDeclaration[documentType])));
|
|
32
|
+
|
|
33
|
+
services[serviceId] = service;
|
|
34
|
+
}));
|
|
35
|
+
|
|
36
|
+
return services;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
async function loadServiceDeclaration(serviceId) {
|
|
40
|
+
const jsonDeclarationFilePath = path.join(declarationsPath, `${serviceId}.json`);
|
|
41
|
+
const rawServiceDeclaration = await fs.readFile(jsonDeclarationFilePath);
|
|
42
|
+
|
|
43
|
+
try {
|
|
44
|
+
return JSON.parse(rawServiceDeclaration);
|
|
45
|
+
} catch (error) {
|
|
46
|
+
throw new Error(`The "${serviceId}" service declaration is malformed and cannot be parsed in ${jsonDeclarationFilePath}`);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
async function loadServiceFilters(serviceId, filterNames) {
|
|
51
|
+
if (!filterNames) {
|
|
52
|
+
return;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const filterFilePath = `${serviceId}.filters.js`;
|
|
56
|
+
const serviceFilters = await import(pathToFileURL(path.join(declarationsPath, filterFilePath))); // eslint-disable-line no-await-in-loop
|
|
57
|
+
|
|
58
|
+
return filterNames.map(filterName => serviceFilters[filterName]);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
async function loadServiceDocument(service, documentType, documentTypeDeclaration) {
|
|
62
|
+
const { filter: filterNames, fetch: location, executeClientScripts, select: contentSelectors, remove: noiseSelectors, combine } = documentTypeDeclaration;
|
|
63
|
+
|
|
64
|
+
const pages = [];
|
|
65
|
+
|
|
66
|
+
const filters = await loadServiceFilters(service.id, filterNames);
|
|
67
|
+
|
|
68
|
+
if (!combine) {
|
|
69
|
+
pages.push(new PageDeclaration({ location, executeClientScripts, contentSelectors, noiseSelectors, filters }));
|
|
70
|
+
} else {
|
|
71
|
+
for (const pageDeclaration of combine) {
|
|
72
|
+
const { filter: pageFilterNames, fetch: pageLocation, executeClientScripts: pageExecuteClientScripts, select: pageContentSelectors, remove: pageNoiseSelectors } = pageDeclaration;
|
|
73
|
+
|
|
74
|
+
const pageFilters = await loadServiceFilters(service.id, pageFilterNames); // eslint-disable-line no-await-in-loop
|
|
75
|
+
|
|
76
|
+
pages.push(new PageDeclaration({
|
|
77
|
+
location: pageLocation || location,
|
|
78
|
+
executeClientScripts: (pageExecuteClientScripts === undefined || pageExecuteClientScripts === null ? executeClientScripts : pageExecuteClientScripts),
|
|
79
|
+
contentSelectors: pageContentSelectors || contentSelectors,
|
|
80
|
+
noiseSelectors: pageNoiseSelectors || noiseSelectors,
|
|
81
|
+
filters: pageFilters || filters,
|
|
82
|
+
}));
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
service.addDocumentDeclaration(new DocumentDeclaration({ service, type: documentType, pages }));
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
async function getDeclaredServicesIds() {
|
|
90
|
+
const fileNames = await fs.readdir(declarationsPath);
|
|
91
|
+
|
|
92
|
+
const servicesFileNames = fileNames.filter(fileName => path.extname(fileName) == '.json' && !fileName.includes('.history.json'));
|
|
93
|
+
|
|
94
|
+
return servicesFileNames.map(serviceFileName => path.basename(serviceFileName, '.json'));
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export async function loadWithHistory(servicesIds = []) {
|
|
98
|
+
const services = await load(servicesIds);
|
|
99
|
+
|
|
100
|
+
for (const serviceId of Object.keys(services)) {
|
|
101
|
+
const { declarations, filters } = await loadServiceHistoryFiles(serviceId); // eslint-disable-line no-await-in-loop
|
|
102
|
+
|
|
103
|
+
for (const documentType of Object.keys(declarations)) {
|
|
104
|
+
const documentTypeDeclarationEntries = declarations[documentType];
|
|
105
|
+
const filterNames = [...new Set(documentTypeDeclarationEntries.flatMap(declaration => declaration.filter))].filter(Boolean);
|
|
106
|
+
const allHistoryDates = extractHistoryDates({ documentTypeDeclarationEntries, filters, filterNames });
|
|
107
|
+
|
|
108
|
+
const latestValidDocumentDeclaration = documentTypeDeclarationEntries.find(entry => !entry.validUntil);
|
|
109
|
+
|
|
110
|
+
allHistoryDates.forEach(async date => {
|
|
111
|
+
const declarationForThisDate = documentTypeDeclarationEntries.find(entry => new Date(date) <= new Date(entry.validUntil)) || latestValidDocumentDeclaration;
|
|
112
|
+
const { filter: declarationForThisDateFilterNames, combine } = declarationForThisDate;
|
|
113
|
+
|
|
114
|
+
const pages = [];
|
|
115
|
+
let actualFilters;
|
|
116
|
+
|
|
117
|
+
if (declarationForThisDateFilterNames) {
|
|
118
|
+
actualFilters = declarationForThisDateFilterNames.map(filterName => {
|
|
119
|
+
const currentlyValidFilters = filters[filterName].find(entry => !entry.validUntil);
|
|
120
|
+
const validFilterForThisDate = filters[filterName].find(entry => new Date(date) <= new Date(entry.validUntil))
|
|
121
|
+
|| currentlyValidFilters;
|
|
122
|
+
|
|
123
|
+
return validFilterForThisDate.filter;
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if (!combine) {
|
|
128
|
+
pages.push(new PageDeclaration({
|
|
129
|
+
location: declarationForThisDate.fetch,
|
|
130
|
+
executeClientScripts: declarationForThisDate.executeClientScripts,
|
|
131
|
+
contentSelectors: declarationForThisDate.select,
|
|
132
|
+
noiseSelectors: declarationForThisDate.remove,
|
|
133
|
+
filters: actualFilters,
|
|
134
|
+
}));
|
|
135
|
+
} else {
|
|
136
|
+
for (const pageDeclaration of combine) {
|
|
137
|
+
const { filter: pageFilterNames, fetch: pageLocation, executeClientScripts: pageExecuteClientScripts, select: pageContentSelectors, remove: pageNoiseSelectors } = pageDeclaration;
|
|
138
|
+
|
|
139
|
+
const pageFilters = await loadServiceFilters(serviceId, pageFilterNames); // eslint-disable-line no-await-in-loop
|
|
140
|
+
|
|
141
|
+
pages.push(new PageDeclaration({
|
|
142
|
+
location: pageLocation || declarationForThisDate.fetch,
|
|
143
|
+
executeClientScripts: (pageExecuteClientScripts === undefined || pageExecuteClientScripts === null ? declarationForThisDate.executeClientScripts : pageExecuteClientScripts),
|
|
144
|
+
contentSelectors: pageContentSelectors || declarationForThisDate.select,
|
|
145
|
+
noiseSelectors: pageNoiseSelectors || declarationForThisDate.remove,
|
|
146
|
+
filters: pageFilters || actualFilters,
|
|
147
|
+
}));
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
services[serviceId].addDocumentDeclaration(new DocumentDeclaration({
|
|
152
|
+
service: services[serviceId],
|
|
153
|
+
type: documentType,
|
|
154
|
+
pages,
|
|
155
|
+
validUntil: date,
|
|
156
|
+
}));
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
return services;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
function extractHistoryDates({ filters, filterNames, documentTypeDeclarationEntries }) {
|
|
165
|
+
const allHistoryDates = [];
|
|
166
|
+
|
|
167
|
+
Object.keys(filters).forEach(filterName => {
|
|
168
|
+
if (filterNames.includes(filterName)) {
|
|
169
|
+
filters[filterName].forEach(({ validUntil }) => allHistoryDates.push(validUntil));
|
|
170
|
+
}
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
documentTypeDeclarationEntries.forEach(({ validUntil }) => allHistoryDates.push(validUntil));
|
|
174
|
+
|
|
175
|
+
const sortedDates = allHistoryDates.sort((a, b) => new Date(a) - new Date(b));
|
|
176
|
+
const uniqSortedDates = [...new Set(sortedDates)];
|
|
177
|
+
|
|
178
|
+
return uniqSortedDates;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
function sortHistory(history = {}) {
|
|
182
|
+
Object.keys(history).forEach(entry => {
|
|
183
|
+
history[entry].sort((a, b) => new Date(a.validUntil) - new Date(b.validUntil));
|
|
184
|
+
});
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
async function loadServiceHistoryFiles(serviceId) {
|
|
188
|
+
const serviceFileName = path.join(declarationsPath, `${serviceId}.json`);
|
|
189
|
+
const jsonDeclarationFilePath = await fs.readFile(serviceFileName);
|
|
190
|
+
let serviceDeclaration;
|
|
191
|
+
|
|
192
|
+
try {
|
|
193
|
+
serviceDeclaration = JSON.parse(jsonDeclarationFilePath);
|
|
194
|
+
} catch (e) {
|
|
195
|
+
throw new Error(`The "${path.basename(jsonDeclarationFilePath, '.json')}" service declaration is malformed and cannot be parsed in ${jsonDeclarationFilePath}`);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
const serviceHistoryFileName = path.join(declarationsPath, `${serviceId}.history.json`);
|
|
199
|
+
const serviceFiltersFileName = path.join(declarationsPath, `${serviceId}.filters.js`);
|
|
200
|
+
const serviceFiltersHistoryFileName = path.join(declarationsPath, `${serviceId}.filters.history.js`);
|
|
201
|
+
|
|
202
|
+
let serviceHistory = {};
|
|
203
|
+
const serviceFiltersHistory = {};
|
|
204
|
+
let serviceFiltersHistoryModule;
|
|
205
|
+
|
|
206
|
+
if (await fileExists(serviceHistoryFileName)) {
|
|
207
|
+
try {
|
|
208
|
+
serviceHistory = JSON.parse(await fs.readFile(serviceHistoryFileName));
|
|
209
|
+
} catch (e) {
|
|
210
|
+
throw new Error(`The "${path.basename(serviceHistoryFileName, '.json')}" service declaration is malformed and cannot be parsed in ${serviceHistoryFileName}`);
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
Object.keys(serviceDeclaration.documents).forEach(documentType => {
|
|
215
|
+
serviceHistory[documentType] = serviceHistory[documentType] || [];
|
|
216
|
+
serviceHistory[documentType].push(serviceDeclaration.documents[documentType]);
|
|
217
|
+
});
|
|
218
|
+
|
|
219
|
+
sortHistory(serviceHistory);
|
|
220
|
+
|
|
221
|
+
if (await fileExists(serviceFiltersHistoryFileName)) {
|
|
222
|
+
serviceFiltersHistoryModule = await import(pathToFileURL(serviceFiltersHistoryFileName));
|
|
223
|
+
Object.keys(serviceFiltersHistoryModule).forEach(filterName => {
|
|
224
|
+
serviceFiltersHistory[filterName] = serviceFiltersHistoryModule[filterName];
|
|
225
|
+
});
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
if (await fileExists(serviceFiltersFileName)) {
|
|
229
|
+
const serviceFilters = await import(pathToFileURL(serviceFiltersFileName));
|
|
230
|
+
|
|
231
|
+
Object.keys(serviceFilters).forEach(filterName => {
|
|
232
|
+
serviceFiltersHistory[filterName] = serviceFiltersHistory[filterName] || [];
|
|
233
|
+
serviceFiltersHistory[filterName].push({ filter: serviceFilters[filterName] });
|
|
234
|
+
});
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
sortHistory(serviceFiltersHistory);
|
|
238
|
+
|
|
239
|
+
return {
|
|
240
|
+
declarations: serviceHistory || {},
|
|
241
|
+
filters: serviceFiltersHistory || {},
|
|
242
|
+
};
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
async function fileExists(filePath) {
|
|
246
|
+
try {
|
|
247
|
+
await fs.access(filePath);
|
|
248
|
+
|
|
249
|
+
return true;
|
|
250
|
+
} catch (error) {
|
|
251
|
+
if (error.code === 'ENOENT') {
|
|
252
|
+
return false;
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
}
|
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
import chai from 'chai';
|
|
2
|
+
import chaiExclude from 'chai-exclude';
|
|
3
|
+
|
|
4
|
+
import expectedServices from '../../../test/fixtures/services.js';
|
|
5
|
+
|
|
6
|
+
import * as services from './index.js';
|
|
7
|
+
|
|
8
|
+
chai.use(chaiExclude);
|
|
9
|
+
const { expect } = chai;
|
|
10
|
+
|
|
11
|
+
describe('Services', () => {
|
|
12
|
+
describe('#load', () => {
|
|
13
|
+
let result;
|
|
14
|
+
|
|
15
|
+
async function validateServiceWithoutHistory(serviceId, expected) {
|
|
16
|
+
/* eslint-disable no-loop-func */
|
|
17
|
+
for (const documentType of expected.getDocumentTypes()) {
|
|
18
|
+
context(`${documentType}`, () => {
|
|
19
|
+
let actualDocumentDeclaration;
|
|
20
|
+
let actualFilters;
|
|
21
|
+
let actualContentSelectors;
|
|
22
|
+
let actualNoiseSelectors;
|
|
23
|
+
let actualExecuteClientScripts;
|
|
24
|
+
|
|
25
|
+
const expectedDocumentDeclaration = expected.getDocumentDeclaration(documentType);
|
|
26
|
+
|
|
27
|
+
const { pages } = expectedDocumentDeclaration;
|
|
28
|
+
|
|
29
|
+
pages.forEach((page, index) => {
|
|
30
|
+
const {
|
|
31
|
+
filters: expectedFilters,
|
|
32
|
+
contentSelectors: expectedContentSelectors,
|
|
33
|
+
noiseSelectors: expectedNoiseSelectors,
|
|
34
|
+
executeClientScripts: expectedExecuteClientScripts,
|
|
35
|
+
} = page;
|
|
36
|
+
|
|
37
|
+
context(`Page: ${page.id}`, () => {
|
|
38
|
+
before(() => {
|
|
39
|
+
actualDocumentDeclaration = result[serviceId].getDocumentDeclaration(documentType);
|
|
40
|
+
const { pages: actualPages } = actualDocumentDeclaration;
|
|
41
|
+
|
|
42
|
+
({
|
|
43
|
+
filters: actualFilters,
|
|
44
|
+
contentSelectors: actualContentSelectors,
|
|
45
|
+
noiseSelectors: actualNoiseSelectors,
|
|
46
|
+
executeClientScripts: actualExecuteClientScripts,
|
|
47
|
+
} = actualPages[index]);
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
it('has the proper service name', () => {
|
|
51
|
+
expect(actualDocumentDeclaration.service.name).to.eql(expectedDocumentDeclaration.service.name);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
it('has the proper document type', () => {
|
|
55
|
+
expect(actualDocumentDeclaration.type).to.eql(expectedDocumentDeclaration.type);
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
it('has no validity date', () => {
|
|
59
|
+
expect(actualDocumentDeclaration.validUntil).to.be.undefined;
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
it('has the proper content selectors', async () => {
|
|
63
|
+
expect(actualContentSelectors).to.equal(expectedContentSelectors);
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
it('has the proper noise selectors', async () => {
|
|
67
|
+
expect(actualNoiseSelectors).to.equal(expectedNoiseSelectors);
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
it('has the proper executeClientScripts option', async () => {
|
|
71
|
+
expect(actualExecuteClientScripts).to.equal(expectedExecuteClientScripts);
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
if (expectedFilters) {
|
|
75
|
+
it('has the proper number of filter functions', async () => {
|
|
76
|
+
expect(actualFilters.length).to.equal(expectedFilters.length);
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
for (let indexFilter = 0; indexFilter < expectedFilters.length; indexFilter++) {
|
|
80
|
+
it(`has the proper "${expectedFilters[indexFilter].name}" filter function`, async () => {
|
|
81
|
+
expect(await actualFilters[indexFilter]()).equal(await expectedFilters[indexFilter]()); // eslint-disable-line no-await-in-loop
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
} else {
|
|
85
|
+
it('has no filters', () => {
|
|
86
|
+
expect(actualFilters).to.be.undefined;
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
});
|
|
90
|
+
});
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
/* eslint-enable no-loop-func */
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
before(async () => {
|
|
97
|
+
result = await services.load();
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
describe('Service A', async () => {
|
|
101
|
+
await validateServiceWithoutHistory('service_A', expectedServices.service_A);
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
describe('Service B', async () => {
|
|
105
|
+
await validateServiceWithoutHistory('service_B', expectedServices.service_B);
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
context('when a service has no history', async () => {
|
|
109
|
+
describe('Service without history', async () => {
|
|
110
|
+
await validateServiceWithoutHistory('service_without_history', expectedServices.service_without_history);
|
|
111
|
+
});
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
context('when a service has only history for declarations', async () => {
|
|
115
|
+
describe('Service with declaration history', async () => {
|
|
116
|
+
await validateServiceWithoutHistory('service_with_declaration_history', expectedServices.service_with_declaration_history);
|
|
117
|
+
});
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
context('when a service has only history for filters', async () => {
|
|
121
|
+
describe('Service with filters history', async () => {
|
|
122
|
+
await validateServiceWithoutHistory('service_with_filters_history', expectedServices.service_with_filters_history);
|
|
123
|
+
});
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
context('when a service has histories both for filters and for declarations', async () => {
|
|
127
|
+
describe('Service with history', async () => {
|
|
128
|
+
await validateServiceWithoutHistory('service_with_history', expectedServices.service_with_history);
|
|
129
|
+
});
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
context('when a service has a multipage document', async () => {
|
|
133
|
+
describe('Service with a multipage document', async () => {
|
|
134
|
+
await validateServiceWithoutHistory('service_with_multipage_document', expectedServices.service_with_multipage_document);
|
|
135
|
+
});
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
context('when specifying services to load', async () => {
|
|
139
|
+
before(async () => {
|
|
140
|
+
result = await services.load([ 'service_A', 'service_B' ]);
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
it('loads only the given services', async () => {
|
|
144
|
+
expect(result).to.have.all.keys('service_A', 'service_B');
|
|
145
|
+
});
|
|
146
|
+
});
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
describe('#loadWithHistory', () => {
|
|
150
|
+
let result;
|
|
151
|
+
|
|
152
|
+
async function validateServiceWithHistory(serviceId, expected) {
|
|
153
|
+
/* eslint-disable no-loop-func */
|
|
154
|
+
for (const documentType of expected.getDocumentTypes()) {
|
|
155
|
+
context(`${documentType}`, () => {
|
|
156
|
+
const { history: expectedHistory } = expected.documents[documentType];
|
|
157
|
+
const expectedHistoryDates = expectedHistory && [ ...expectedHistory.map(entry => entry.validUntil), null ]; // add `null` entry to simulate the still current valid declaration
|
|
158
|
+
|
|
159
|
+
let actualDocumentDeclaration;
|
|
160
|
+
let actualFilters;
|
|
161
|
+
const expectedDocumentDeclaration = expected.getDocumentDeclaration(documentType);
|
|
162
|
+
|
|
163
|
+
const { pages } = expectedDocumentDeclaration;
|
|
164
|
+
|
|
165
|
+
before(() => {
|
|
166
|
+
actualDocumentDeclaration = result[serviceId].getDocumentDeclaration(documentType);
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
it('has the proper service name', () => {
|
|
170
|
+
expect(actualDocumentDeclaration.service.name).to.eql(expectedDocumentDeclaration.service.name);
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
it('has the proper document type', () => {
|
|
174
|
+
expect(actualDocumentDeclaration.type).to.eql(expectedDocumentDeclaration.type);
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
pages.forEach((page, index) => {
|
|
178
|
+
const { filters: expectedFilters } = page;
|
|
179
|
+
|
|
180
|
+
context(`${page.id} page`, () => {
|
|
181
|
+
before(() => {
|
|
182
|
+
const { pages: actualPages } = actualDocumentDeclaration;
|
|
183
|
+
|
|
184
|
+
({ filters: actualFilters } = actualPages[index]);
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
if (expectedHistoryDates) {
|
|
188
|
+
for (const date of expectedHistoryDates) {
|
|
189
|
+
context(`${date || 'Current'}`, () => {
|
|
190
|
+
let actualFiltersForThisDate;
|
|
191
|
+
let contentSelectorsForThisDate;
|
|
192
|
+
let noiseSelectorsForThisDate;
|
|
193
|
+
let actualExecuteClientScriptsForThisDate;
|
|
194
|
+
|
|
195
|
+
const { pages: pagesForThisDate } = expected.getDocumentDeclaration(documentType, date);
|
|
196
|
+
const {
|
|
197
|
+
filters: expectedFiltersForThisDate,
|
|
198
|
+
contentSelectors: expectedContentSelectors,
|
|
199
|
+
noiseSelectors: expectedNoiseSelectors,
|
|
200
|
+
expectedExecuteClientScripts: expectedExecuteClientScriptsForThisDate,
|
|
201
|
+
} = pagesForThisDate[index];
|
|
202
|
+
|
|
203
|
+
before(() => {
|
|
204
|
+
const { pages: actualPagesForThisDate } = result[serviceId].getDocumentDeclaration(documentType, date);
|
|
205
|
+
|
|
206
|
+
({
|
|
207
|
+
filters: actualFiltersForThisDate,
|
|
208
|
+
contentSelectors: contentSelectorsForThisDate,
|
|
209
|
+
noiseSelectors: noiseSelectorsForThisDate,
|
|
210
|
+
expectedExecuteClientScripts: actualExecuteClientScriptsForThisDate,
|
|
211
|
+
} = actualPagesForThisDate[index]);
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
it('has the proper content selectors', async () => {
|
|
215
|
+
expect(contentSelectorsForThisDate).to.equal(expectedContentSelectors);
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
it('has the proper noise selectors', async () => {
|
|
219
|
+
expect(noiseSelectorsForThisDate).to.equal(expectedNoiseSelectors);
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
it('has the proper executeClientScripts option', async () => {
|
|
223
|
+
expect(actualExecuteClientScriptsForThisDate).to.equal(expectedExecuteClientScriptsForThisDate);
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
if (expectedFiltersForThisDate) {
|
|
227
|
+
it('has the proper number of filter functions', async () => {
|
|
228
|
+
expect(actualFiltersForThisDate.length).to.equal(expectedFiltersForThisDate.length);
|
|
229
|
+
});
|
|
230
|
+
|
|
231
|
+
for (let indexFilter = 0; indexFilter < expectedFiltersForThisDate.length; indexFilter++) {
|
|
232
|
+
it(`has the proper "${expectedFiltersForThisDate[indexFilter].name}" filter function`, async () => {
|
|
233
|
+
expect(await actualFiltersForThisDate[indexFilter]()).equal(await expectedFiltersForThisDate[indexFilter]()); // eslint-disable-line no-await-in-loop
|
|
234
|
+
});
|
|
235
|
+
}
|
|
236
|
+
} else {
|
|
237
|
+
it('has no filters', () => {
|
|
238
|
+
expect(actualFiltersForThisDate).to.be.undefined;
|
|
239
|
+
});
|
|
240
|
+
}
|
|
241
|
+
});
|
|
242
|
+
}
|
|
243
|
+
} else {
|
|
244
|
+
it('has no history', async () => {
|
|
245
|
+
expect(actualDocumentDeclaration.validUntil).to.be.undefined;
|
|
246
|
+
});
|
|
247
|
+
|
|
248
|
+
if (expectedFilters) {
|
|
249
|
+
it('has the proper number of filter functions', async () => {
|
|
250
|
+
expect(actualFilters.length).to.equal(expectedFilters.length);
|
|
251
|
+
});
|
|
252
|
+
|
|
253
|
+
for (
|
|
254
|
+
let indexFilter = 0;
|
|
255
|
+
indexFilter < expectedFilters.length;
|
|
256
|
+
indexFilter++
|
|
257
|
+
) {
|
|
258
|
+
it(`has the proper "${expectedFilters[indexFilter].name}" filter function`, async () => {
|
|
259
|
+
expect(await actualFilters[indexFilter]()).equal(await expectedFilters[indexFilter]()); // eslint-disable-line no-await-in-loop
|
|
260
|
+
});
|
|
261
|
+
}
|
|
262
|
+
} else {
|
|
263
|
+
it('has no filters', () => {
|
|
264
|
+
expect(actualFilters).to.be.undefined;
|
|
265
|
+
});
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
});
|
|
269
|
+
});
|
|
270
|
+
});
|
|
271
|
+
}
|
|
272
|
+
/* eslint-enable no-loop-func */
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
before(async () => {
|
|
276
|
+
result = await services.loadWithHistory();
|
|
277
|
+
});
|
|
278
|
+
|
|
279
|
+
describe('Service A', async () => {
|
|
280
|
+
await validateServiceWithHistory('service_A', expectedServices.service_A);
|
|
281
|
+
});
|
|
282
|
+
|
|
283
|
+
describe('Service B', async () => {
|
|
284
|
+
await validateServiceWithHistory('service_B', expectedServices.service_B);
|
|
285
|
+
});
|
|
286
|
+
|
|
287
|
+
context('when a service has no history', async () => {
|
|
288
|
+
describe('Service without history', async () => {
|
|
289
|
+
await validateServiceWithHistory('service_without_history', expectedServices.service_without_history);
|
|
290
|
+
});
|
|
291
|
+
});
|
|
292
|
+
|
|
293
|
+
context('when a service has only declarations history', async () => {
|
|
294
|
+
describe('Service with declaration history', async () => {
|
|
295
|
+
await validateServiceWithHistory('service_with_declaration_history', expectedServices.service_with_declaration_history);
|
|
296
|
+
});
|
|
297
|
+
});
|
|
298
|
+
|
|
299
|
+
context('when a service has only filters history', async () => {
|
|
300
|
+
describe('Service with filters history', async () => {
|
|
301
|
+
await validateServiceWithHistory('service_with_filters_history', expectedServices.service_with_filters_history);
|
|
302
|
+
});
|
|
303
|
+
});
|
|
304
|
+
|
|
305
|
+
context('when a service has both filters and declarations histories', async () => {
|
|
306
|
+
describe('Service with history', async () => {
|
|
307
|
+
await validateServiceWithHistory('service_with_history', expectedServices.service_with_history);
|
|
308
|
+
});
|
|
309
|
+
});
|
|
310
|
+
|
|
311
|
+
context('when a service has a multipage document', async () => {
|
|
312
|
+
describe('Service with a multipage document', async () => {
|
|
313
|
+
await validateServiceWithHistory('service_with_multipage_document', expectedServices.service_with_multipage_document);
|
|
314
|
+
});
|
|
315
|
+
});
|
|
316
|
+
|
|
317
|
+
context('when specifying services to load', async () => {
|
|
318
|
+
before(async () => {
|
|
319
|
+
result = await services.loadWithHistory([ 'service_A', 'service_B' ]);
|
|
320
|
+
});
|
|
321
|
+
|
|
322
|
+
it('loads only the given services', async () => {
|
|
323
|
+
expect(result).to.have.all.keys('service_A', 'service_B');
|
|
324
|
+
});
|
|
325
|
+
});
|
|
326
|
+
});
|
|
327
|
+
});
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
export default class PageDeclaration {
|
|
2
|
+
constructor({ location, executeClientScripts, contentSelectors, noiseSelectors, filters }) {
|
|
3
|
+
this.location = location;
|
|
4
|
+
this.executeClientScripts = executeClientScripts;
|
|
5
|
+
this.contentSelectors = contentSelectors;
|
|
6
|
+
this.noiseSelectors = noiseSelectors;
|
|
7
|
+
this.filters = filters;
|
|
8
|
+
this.id = new URL(location).pathname.split('/').filter(Boolean).join('-');
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
get cssSelectors() {
|
|
12
|
+
const { contentSelectors, noiseSelectors } = this;
|
|
13
|
+
|
|
14
|
+
const result = [
|
|
15
|
+
...PageDeclaration.extractCssSelectorsFromProperty(contentSelectors),
|
|
16
|
+
...PageDeclaration.extractCssSelectorsFromProperty(noiseSelectors),
|
|
17
|
+
];
|
|
18
|
+
|
|
19
|
+
return result.filter(selector => selector);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
static extractCssSelectorsFromProperty(property) {
|
|
23
|
+
if (Array.isArray(property)) {
|
|
24
|
+
return []
|
|
25
|
+
.concat(property)
|
|
26
|
+
.flatMap(selector => PageDeclaration.extractCssSelectorsFromSelector(selector));
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
return PageDeclaration.extractCssSelectorsFromSelector(property);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
static extractCssSelectorsFromSelector(selector) {
|
|
33
|
+
if (typeof selector === 'object') {
|
|
34
|
+
const { startBefore, endBefore, startAfter, endAfter } = selector;
|
|
35
|
+
|
|
36
|
+
return [ startBefore, endBefore, startAfter, endAfter ].filter(rangeSelector => rangeSelector);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
return [selector];
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
toPersistence() {
|
|
43
|
+
return {
|
|
44
|
+
fetch: this.location,
|
|
45
|
+
select: this.contentSelectors,
|
|
46
|
+
remove: this.noiseSelectors,
|
|
47
|
+
filter: this.filters ? this.filters.map(filter => filter.name) : undefined,
|
|
48
|
+
executeClientScripts: this.executeClientScripts,
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
}
|