@opentermsarchive/engine 7.1.0 → 7.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,8 @@ import { pathToFileURL } from 'url';
4
4
 
5
5
  import config from 'config';
6
6
 
7
+ import * as exposedFilters from '../extract/exposedFilters.js';
8
+
7
9
  import Service from './service.js';
8
10
  import SourceDocument from './sourceDocument.js';
9
11
  import Terms from './terms.js';
@@ -11,244 +13,304 @@ import Terms from './terms.js';
11
13
  export const DECLARATIONS_PATH = './declarations';
12
14
  const declarationsPath = path.resolve(process.cwd(), config.get('@opentermsarchive/engine.collectionPath'), DECLARATIONS_PATH);
13
15
 
14
- export async function load(servicesIdsToLoad = []) {
15
- let servicesIds = await getDeclaredServicesIds();
16
-
17
- if (servicesIdsToLoad.length) {
18
- servicesIds = servicesIds.filter(serviceId => serviceId.match(new RegExp(`^${servicesIdsToLoad.join('|')}$`, 'g')));
19
- }
16
+ const JSON_EXT = '.json';
17
+ const JS_EXT = '.js';
18
+ const HISTORY_SUFFIX = '.history';
19
+ const FILTERS_SUFFIX = '.filters';
20
20
 
21
+ export async function load(servicesIdsToLoad = []) {
22
+ const allServicesIds = await getDeclaredServicesIds();
23
+ const servicesIds = servicesIdsToLoad.length ? allServicesIds.filter(serviceId => servicesIdsToLoad.includes(serviceId)) : allServicesIds;
21
24
  const services = {};
22
25
 
23
26
  await Promise.all(servicesIds.map(async serviceId => {
24
- const { name, terms } = await loadServiceDeclaration(serviceId);
27
+ services[serviceId] = await createServiceFromDeclaration(serviceId);
28
+ }));
25
29
 
26
- const service = new Service({ id: serviceId, name });
30
+ return services;
31
+ }
32
+
33
+ export async function createServiceFromDeclaration(serviceId) {
34
+ const { name, terms: termsDeclarations } = await loadServiceDeclaration(serviceId);
35
+ const service = new Service({ id: serviceId, name });
27
36
 
28
- await Promise.all(Object.keys(terms).map(termsType => loadServiceDocument(service, termsType, terms[termsType])));
37
+ await Promise.all(Object.entries(termsDeclarations).map(async ([ termsType, termsDeclaration ]) => {
38
+ const sourceDocuments = await createSourceDocuments(service.id, termsDeclaration);
29
39
 
30
- services[serviceId] = service;
40
+ service.addTerms(new Terms({ service, type: termsType, sourceDocuments }));
31
41
  }));
32
42
 
33
- return services;
43
+ return service;
34
44
  }
35
45
 
36
- async function loadServiceDeclaration(serviceId) {
37
- const jsonDeclarationFilePath = path.join(declarationsPath, `${serviceId}.json`);
38
- const rawServiceDeclaration = await fs.readFile(jsonDeclarationFilePath);
46
+ export async function loadServiceDeclaration(serviceId) {
47
+ const filePath = path.join(declarationsPath, `${serviceId}${JSON_EXT}`);
48
+ const rawServiceDeclaration = await fs.readFile(filePath);
39
49
 
40
50
  try {
41
51
  return JSON.parse(rawServiceDeclaration);
42
52
  } catch (error) {
43
- throw new Error(`The "${serviceId}" service declaration is malformed and cannot be parsed in ${jsonDeclarationFilePath}`);
53
+ throw new Error(`The "${serviceId}" service declaration is malformed and cannot be parsed in ${filePath}`);
54
+ }
55
+ }
56
+
57
+ export async function createSourceDocuments(serviceId, termsDeclaration) {
58
+ const serviceFilters = await loadServiceFilters(serviceId);
59
+
60
+ if (!termsDeclaration.combine) {
61
+ return [new SourceDocument({
62
+ location: termsDeclaration.fetch,
63
+ executeClientScripts: termsDeclaration.executeClientScripts,
64
+ contentSelectors: termsDeclaration.select,
65
+ insignificantContentSelectors: termsDeclaration.remove,
66
+ filters: await getServiceFilters(serviceFilters, termsDeclaration.filter),
67
+ })];
68
+ }
69
+
70
+ return Promise.all(termsDeclaration.combine.map(async sourceDocumentDeclaration =>
71
+ new SourceDocument({
72
+ location: sourceDocumentDeclaration.fetch ?? termsDeclaration.fetch,
73
+ executeClientScripts: sourceDocumentDeclaration.executeClientScripts ?? termsDeclaration.executeClientScripts,
74
+ contentSelectors: sourceDocumentDeclaration.select ?? termsDeclaration.select,
75
+ insignificantContentSelectors: sourceDocumentDeclaration.remove ?? termsDeclaration.remove,
76
+ filters: await getServiceFilters(serviceFilters, sourceDocumentDeclaration.filter ?? termsDeclaration.filter),
77
+ })));
78
+ }
79
+
80
+ export async function loadServiceFilters(serviceId) {
81
+ const serviceFiltersPath = path.join(declarationsPath, `${serviceId}${FILTERS_SUFFIX}${JS_EXT}`);
82
+
83
+ if (await fileExists(serviceFiltersPath)) {
84
+ return import(pathToFileURL(serviceFiltersPath));
85
+ }
86
+
87
+ return {};
88
+ }
89
+
90
+ function parseFilterItem(filterItem) {
91
+ if (typeof filterItem === 'string') {
92
+ return { filterName: filterItem, filterParams: undefined };
93
+ }
94
+
95
+ if (typeof filterItem === 'object' && filterItem !== null) {
96
+ const [ filterNameEntry, filterParamsEntry ] = Object.entries(filterItem)[0];
97
+
98
+ return { filterName: filterNameEntry, filterParams: filterParamsEntry };
44
99
  }
100
+
101
+ return { filterName: undefined, filterParams: undefined };
45
102
  }
46
103
 
47
- async function loadServiceFilters(serviceId, filterNames) {
48
- if (!filterNames) {
104
+ function createWrappedFilter(baseFunction, filterName, filterParams) {
105
+ if (!baseFunction) {
49
106
  return;
50
107
  }
51
108
 
52
- const filterFilePath = `${serviceId}.filters.js`;
53
- const serviceFilters = await import(pathToFileURL(path.join(declarationsPath, filterFilePath)));
109
+ if (filterParams) {
110
+ const wrappedFilter = (webPageDOM, context) => baseFunction(webPageDOM, filterParams, context);
111
+
112
+ Object.defineProperty(wrappedFilter, 'name', { value: filterName });
113
+
114
+ return wrappedFilter;
115
+ }
54
116
 
55
- return filterNames.map(filterName => serviceFilters[filterName]);
117
+ return baseFunction;
56
118
  }
57
119
 
58
- async function loadServiceDocument(service, termsType, termsTypeDeclaration) {
59
- const { filter: filterNames, fetch: location, executeClientScripts, select: contentSelectors, remove: insignificantContentSelectors, combine } = termsTypeDeclaration;
60
-
61
- const sourceDocuments = [];
62
-
63
- const filters = await loadServiceFilters(service.id, filterNames);
64
-
65
- if (!combine) {
66
- sourceDocuments.push(new SourceDocument({ location, executeClientScripts, contentSelectors, insignificantContentSelectors, filters }));
67
- } else {
68
- for (const sourceDocument of combine) {
69
- const {
70
- filter: sourceDocumentFilterNames,
71
- fetch: sourceDocumentLocation,
72
- executeClientScripts: sourceDocumentExecuteClientScripts,
73
- select: sourceDocumentContentSelectors,
74
- remove: sourceDocumentInsignificantContentSelectors,
75
- } = sourceDocument;
76
-
77
- const sourceDocumentFilters = await loadServiceFilters(service.id, sourceDocumentFilterNames);
78
-
79
- sourceDocuments.push(new SourceDocument({
80
- location: sourceDocumentLocation || location,
81
- executeClientScripts: (sourceDocumentExecuteClientScripts === undefined || sourceDocumentExecuteClientScripts === null ? executeClientScripts : sourceDocumentExecuteClientScripts),
82
- contentSelectors: sourceDocumentContentSelectors || contentSelectors,
83
- insignificantContentSelectors: sourceDocumentInsignificantContentSelectors || insignificantContentSelectors,
84
- filters: sourceDocumentFilters || filters,
85
- }));
86
- }
120
+ export function getServiceFilters(serviceFilters, declaredFilters) {
121
+ if (!declaredFilters) {
122
+ return;
87
123
  }
88
124
 
89
- service.addTerms(new Terms({ service, type: termsType, sourceDocuments }));
125
+ const filters = declaredFilters.reduce((filters, filterItem) => {
126
+ const { filterName, filterParams } = parseFilterItem(filterItem);
127
+
128
+ if (!filterName) {
129
+ return filters;
130
+ }
131
+
132
+ const baseFunction = exposedFilters[filterName] || serviceFilters[filterName];
133
+ const filterFunction = createWrappedFilter(baseFunction, filterName, filterParams);
134
+
135
+ if (filterFunction) {
136
+ filters.push(filterFunction);
137
+ }
138
+
139
+ return filters;
140
+ }, []);
141
+
142
+ return filters.length ? filters : undefined;
90
143
  }
91
144
 
92
- async function getDeclaredServicesIds() {
145
+ export async function getDeclaredServicesIds() {
93
146
  const fileNames = await fs.readdir(declarationsPath);
94
147
 
95
- const servicesFileNames = fileNames.filter(fileName => path.extname(fileName) == '.json' && !fileName.includes('.history.json'));
96
-
97
- return servicesFileNames.map(serviceFileName => path.basename(serviceFileName, '.json'));
148
+ return fileNames
149
+ .filter(fileName => fileName.endsWith(JSON_EXT) && !fileName.includes(`${HISTORY_SUFFIX}${JSON_EXT}`))
150
+ .map(fileName => path.basename(fileName, JSON_EXT));
98
151
  }
99
152
 
100
153
  export async function loadWithHistory(servicesIds = []) {
101
154
  const services = await load(servicesIds);
102
155
 
103
- for (const serviceId of Object.keys(services)) {
104
- const { declarations, filters } = await loadServiceHistoryFiles(serviceId);
105
-
106
- for (const termsType of Object.keys(declarations)) {
107
- const termsTypeDeclarationEntries = declarations[termsType];
108
- const filterNames = [...new Set(termsTypeDeclarationEntries.flatMap(declaration => declaration.filter))].filter(Boolean);
109
- const allHistoryDates = extractHistoryDates({ termsTypeDeclarationEntries, filters, filterNames });
110
-
111
- const latestValidTerms = termsTypeDeclarationEntries.find(entry => !entry.validUntil);
112
-
113
- allHistoryDates.forEach(async date => {
114
- const declarationForThisDate = termsTypeDeclarationEntries.find(entry => new Date(date) <= new Date(entry.validUntil)) || latestValidTerms;
115
- const { filter: declarationForThisDateFilterNames, combine } = declarationForThisDate;
116
-
117
- const sourceDocuments = [];
118
- let actualFilters;
119
-
120
- if (declarationForThisDateFilterNames) {
121
- actualFilters = declarationForThisDateFilterNames.map(filterName => {
122
- const currentlyValidFilters = filters[filterName].find(entry => !entry.validUntil);
123
- const validFilterForThisDate = filters[filterName].find(entry => new Date(date) <= new Date(entry.validUntil))
124
- || currentlyValidFilters;
125
-
126
- return validFilterForThisDate.filter;
127
- });
128
- }
129
-
130
- if (!combine) {
131
- sourceDocuments.push(new SourceDocument({
132
- location: declarationForThisDate.fetch,
133
- executeClientScripts: declarationForThisDate.executeClientScripts,
134
- contentSelectors: declarationForThisDate.select,
135
- insignificantContentSelectors: declarationForThisDate.remove,
136
- filters: actualFilters,
137
- }));
138
- } else {
139
- for (const sourceDocument of combine) {
140
- const {
141
- filter: sourceDocumentFilterNames,
142
- fetch: sourceDocumentLocation,
143
- executeClientScripts: sourceDocumentExecuteClientScripts,
144
- select: sourceDocumentContentSelectors,
145
- remove: sourceDocumentInsignificantContentSelectors,
146
- } = sourceDocument;
147
-
148
- const sourceDocumentFilters = await loadServiceFilters(serviceId, sourceDocumentFilterNames);
149
-
150
- sourceDocuments.push(new SourceDocument({
151
- location: sourceDocumentLocation || declarationForThisDate.fetch,
152
- executeClientScripts: (sourceDocumentExecuteClientScripts === undefined || sourceDocumentExecuteClientScripts === null ? declarationForThisDate.executeClientScripts : sourceDocumentExecuteClientScripts),
153
- contentSelectors: sourceDocumentContentSelectors || declarationForThisDate.select,
154
- insignificantContentSelectors: sourceDocumentInsignificantContentSelectors || declarationForThisDate.remove,
155
- filters: sourceDocumentFilters || actualFilters,
156
- }));
157
- }
158
- }
159
-
160
- services[serviceId].addTerms(new Terms({
161
- service: services[serviceId],
162
- type: termsType,
163
- sourceDocuments,
164
- validUntil: date,
165
- }));
166
- });
167
- }
168
- }
156
+ await Promise.all(Object.keys(services).map(serviceId => addHistoryToService(services[serviceId])));
169
157
 
170
158
  return services;
171
159
  }
172
160
 
173
- function extractHistoryDates({ filters, filterNames, termsTypeDeclarationEntries }) {
174
- const allHistoryDates = [];
161
+ async function addHistoryToService(service) {
162
+ const { declarations, filters } = await loadServiceHistoryFiles(service.id);
163
+
164
+ await Promise.all(Object.entries(declarations).map(([ termsType, declarationEntries ]) => addTermsHistory(service, service.id, termsType, declarationEntries, filters)));
165
+ }
166
+
167
+ async function addTermsHistory(service, serviceId, termsType, declarationEntries, filters) {
168
+ const declaredFilters = [...new Set(declarationEntries.flatMap(declarationEntrie => declarationEntrie.filter))].filter(Boolean);
169
+ const historyDates = extractHistoryDates({ termsTypeDeclarationEntries: declarationEntries, filters, declaredFilters });
170
+ const latestValidTerms = declarationEntries.find(entry => !entry.validUntil);
171
+
172
+ await Promise.all(historyDates.map(date => createTermsForDate(service, serviceId, termsType, date, declarationEntries, filters, latestValidTerms)));
173
+ }
174
+
175
+ async function createTermsForDate(service, serviceId, termsType, date, declarationEntries, filters, latestValidTerms) {
176
+ const declaration = declarationEntries.find(entry => new Date(date) <= new Date(entry.validUntil)) || latestValidTerms;
177
+ const actualFilters = resolveFiltersForDate(date, declaration.filter, filters);
178
+
179
+ const sourceDocuments = await createHistorySourceDocuments(serviceId, declaration, actualFilters);
180
+
181
+ service.addTerms(new Terms({
182
+ service,
183
+ type: termsType,
184
+ sourceDocuments,
185
+ validUntil: date,
186
+ }));
187
+ }
188
+
189
+ function resolveFiltersForDate(date, declaredFilters, filters) {
190
+ return declaredFilters?.map(filterItem => {
191
+ const { filterName, filterParams } = parseFilterItem(filterItem);
175
192
 
176
- Object.keys(filters).forEach(filterName => {
177
- if (filterNames.includes(filterName)) {
178
- filters[filterName].forEach(({ validUntil }) => allHistoryDates.push(validUntil));
193
+ if (!filterName) {
194
+ return;
179
195
  }
196
+
197
+ const filterHistory = filters[filterName];
198
+ const historicalFilter = filterHistory?.find(entry => new Date(date) <= new Date(entry.validUntil));
199
+ const currentFilter = filterHistory?.find(entry => !entry.validUntil);
200
+ const filter = (historicalFilter || currentFilter)?.filter;
201
+
202
+ return createWrappedFilter(filter, filterName, filterParams);
180
203
  });
204
+ }
181
205
 
182
- termsTypeDeclarationEntries.forEach(({ validUntil }) => allHistoryDates.push(validUntil));
206
+ async function createHistorySourceDocuments(serviceId, termsDeclaration, actualFilters) {
207
+ const serviceFilters = await loadServiceFilters(serviceId);
208
+
209
+ if (!termsDeclaration.combine) {
210
+ return [new SourceDocument({
211
+ location: termsDeclaration.fetch,
212
+ executeClientScripts: termsDeclaration.executeClientScripts,
213
+ contentSelectors: termsDeclaration.select,
214
+ insignificantContentSelectors: termsDeclaration.remove,
215
+ filters: actualFilters,
216
+ })];
217
+ }
183
218
 
184
- const sortedDates = allHistoryDates.sort((a, b) => new Date(a) - new Date(b));
185
- const uniqSortedDates = [...new Set(sortedDates)];
219
+ return Promise.all(termsDeclaration.combine.map(async sourceDocument => {
220
+ const filters = await getServiceFilters(serviceFilters, sourceDocument.filter) || actualFilters;
186
221
 
187
- return uniqSortedDates;
222
+ return new SourceDocument({
223
+ location: sourceDocument.fetch || termsDeclaration.fetch,
224
+ executeClientScripts: sourceDocument.executeClientScripts ?? termsDeclaration.executeClientScripts,
225
+ contentSelectors: sourceDocument.select || termsDeclaration.select,
226
+ insignificantContentSelectors: sourceDocument.remove || termsDeclaration.remove,
227
+ filters,
228
+ });
229
+ }));
230
+ }
231
+
232
+ function extractHistoryDates({ filters, declaredFilters, termsTypeDeclarationEntries }) {
233
+ const filterDates = Object.entries(filters)
234
+ .filter(([filterName]) => declaredFilters.some(filterItem => {
235
+ const { filterName: itemName } = parseFilterItem(filterItem);
236
+
237
+ return itemName === filterName;
238
+ }))
239
+ .flatMap(([ , filterEntries ]) => filterEntries.map(({ validUntil }) => validUntil))
240
+ .filter(Boolean);
241
+
242
+ const declarationDates = termsTypeDeclarationEntries
243
+ .map(({ validUntil }) => validUntil)
244
+ .filter(Boolean);
245
+
246
+ return [...new Set([ ...filterDates, ...declarationDates ])].sort((a, b) => new Date(a) - new Date(b));
188
247
  }
189
248
 
190
249
  function sortHistory(history = {}) {
191
- Object.keys(history).forEach(entry => {
192
- history[entry].sort((a, b) => new Date(a.validUntil) - new Date(b.validUntil));
193
- });
250
+ Object.values(history).forEach(entries => entries.sort((a, b) => new Date(a.validUntil) - new Date(b.validUntil)));
194
251
  }
195
252
 
196
253
  async function loadServiceHistoryFiles(serviceId) {
197
- const serviceFileName = path.join(declarationsPath, `${serviceId}.json`);
198
- const jsonDeclarationFilePath = await fs.readFile(serviceFileName);
199
- let serviceDeclaration;
254
+ const serviceDeclaration = await loadServiceDeclaration(serviceId);
255
+ const filePaths = getHistoryFilePaths(serviceId);
200
256
 
201
- try {
202
- serviceDeclaration = JSON.parse(jsonDeclarationFilePath);
203
- } catch (e) {
204
- throw new Error(`The "${path.basename(jsonDeclarationFilePath, '.json')}" service declaration is malformed and cannot be parsed in ${jsonDeclarationFilePath}`);
205
- }
257
+ const [ serviceHistory, serviceFiltersHistory ] = await Promise.all([
258
+ loadServiceHistory(filePaths.history),
259
+ loadServiceFiltersHistory(filePaths.filtersHistory, filePaths.filters),
260
+ ]);
261
+
262
+ Object.entries(serviceDeclaration.terms).forEach(([ termsType, declaration ]) => {
263
+ serviceHistory[termsType] = serviceHistory[termsType] || [];
264
+ serviceHistory[termsType].push(declaration);
265
+ });
206
266
 
207
- const serviceHistoryFileName = path.join(declarationsPath, `${serviceId}.history.json`);
208
- const serviceFiltersFileName = path.join(declarationsPath, `${serviceId}.filters.js`);
209
- const serviceFiltersHistoryFileName = path.join(declarationsPath, `${serviceId}.filters.history.js`);
267
+ sortHistory(serviceHistory);
268
+ sortHistory(serviceFiltersHistory);
269
+
270
+ return { declarations: serviceHistory, filters: serviceFiltersHistory };
271
+ }
210
272
 
211
- let serviceHistory = {};
212
- const serviceFiltersHistory = {};
213
- let serviceFiltersHistoryModule;
273
+ function getHistoryFilePaths(serviceId) {
274
+ const basePath = path.join(declarationsPath, serviceId);
214
275
 
215
- if (await fileExists(serviceHistoryFileName)) {
216
- try {
217
- serviceHistory = JSON.parse(await fs.readFile(serviceHistoryFileName));
218
- } catch (e) {
219
- throw new Error(`The "${path.basename(serviceHistoryFileName, '.json')}" service declaration is malformed and cannot be parsed in ${serviceHistoryFileName}`);
220
- }
276
+ return {
277
+ history: `${basePath}${HISTORY_SUFFIX}${JSON_EXT}`,
278
+ filters: `${basePath}${FILTERS_SUFFIX}${JS_EXT}`,
279
+ filtersHistory: `${basePath}${FILTERS_SUFFIX}${HISTORY_SUFFIX}${JS_EXT}`,
280
+ };
281
+ }
282
+
283
+ async function loadServiceHistory(historyFilePath) {
284
+ if (!(await fileExists(historyFilePath))) return {};
285
+
286
+ try {
287
+ return JSON.parse(await fs.readFile(historyFilePath));
288
+ } catch (error) {
289
+ const fileName = path.basename(historyFilePath, JSON_EXT);
290
+
291
+ throw new Error(`The "${fileName}" service declaration is malformed and cannot be parsed in ${historyFilePath}`);
221
292
  }
293
+ }
222
294
 
223
- Object.keys(serviceDeclaration.terms).forEach(termsType => {
224
- serviceHistory[termsType] = serviceHistory[termsType] || [];
225
- serviceHistory[termsType].push(serviceDeclaration.terms[termsType]);
226
- });
295
+ async function loadServiceFiltersHistory(filtersHistoryPath, filtersPath) {
296
+ const filtersHistory = {};
227
297
 
228
- sortHistory(serviceHistory);
298
+ if (await fileExists(filtersHistoryPath)) {
299
+ const historyModule = await import(pathToFileURL(filtersHistoryPath));
229
300
 
230
- if (await fileExists(serviceFiltersHistoryFileName)) {
231
- serviceFiltersHistoryModule = await import(pathToFileURL(serviceFiltersHistoryFileName));
232
- Object.keys(serviceFiltersHistoryModule).forEach(filterName => {
233
- serviceFiltersHistory[filterName] = serviceFiltersHistoryModule[filterName];
234
- });
301
+ Object.assign(filtersHistory, historyModule);
235
302
  }
236
303
 
237
- if (await fileExists(serviceFiltersFileName)) {
238
- const serviceFilters = await import(pathToFileURL(serviceFiltersFileName));
304
+ if (await fileExists(filtersPath)) {
305
+ const filtersModule = await import(pathToFileURL(filtersPath));
239
306
 
240
- Object.keys(serviceFilters).forEach(filterName => {
241
- serviceFiltersHistory[filterName] = serviceFiltersHistory[filterName] || [];
242
- serviceFiltersHistory[filterName].push({ filter: serviceFilters[filterName] });
307
+ Object.entries(filtersModule).forEach(([ filterName, filter ]) => {
308
+ filtersHistory[filterName] = filtersHistory[filterName] || [];
309
+ filtersHistory[filterName].push({ filter });
243
310
  });
244
311
  }
245
312
 
246
- sortHistory(serviceFiltersHistory);
247
-
248
- return {
249
- declarations: serviceHistory || {},
250
- filters: serviceFiltersHistory || {},
251
- };
313
+ return filtersHistory;
252
314
  }
253
315
 
254
316
  async function fileExists(filePath) {
@@ -260,5 +322,7 @@ async function fileExists(filePath) {
260
322
  if (error.code === 'ENOENT') {
261
323
  return false;
262
324
  }
325
+
326
+ throw error;
263
327
  }
264
328
  }