@opentermsarchive/engine 9.2.3 → 10.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,7 +20,7 @@ const { version: PACKAGE_VERSION } = require('../../package.json');
20
20
  // - too many requests on the same endpoint yield 403
21
21
  // - sometimes when creating a commit no SHA are returned for unknown reasons
22
22
  const MAX_PARALLEL_TRACKING = 1;
23
- const MAX_PARALLEL_EXTRACTING = 10;
23
+ const MAX_PARALLEL_TECHNICAL_UPGRADES = 10;
24
24
 
25
25
  export const EVENTS = [
26
26
  'snapshotRecorded',
@@ -128,14 +128,32 @@ export default class Archivist extends events.EventEmitter {
128
128
  });
129
129
  }
130
130
 
131
- async track({ services: servicesIds = this.servicesIds, types: termsTypes = [], extractOnly = false } = {}) {
131
+ async track({ services: servicesIds = this.servicesIds, types: termsTypes = [] } = {}) {
132
+ await this.processTerms({
133
+ servicesIds,
134
+ termsTypes,
135
+ technicalUpgradeOnly: false,
136
+ concurrency: MAX_PARALLEL_TRACKING,
137
+ });
138
+ }
139
+
140
+ async applyTechnicalUpgrades({ services: servicesIds = this.servicesIds, types: termsTypes = [] } = {}) {
141
+ await this.processTerms({
142
+ servicesIds,
143
+ termsTypes,
144
+ technicalUpgradeOnly: true,
145
+ concurrency: MAX_PARALLEL_TECHNICAL_UPGRADES,
146
+ });
147
+ }
148
+
149
+ async processTerms({ servicesIds, termsTypes, technicalUpgradeOnly, concurrency }) {
132
150
  const numberOfTerms = Service.getNumberOfTerms(this.services, servicesIds, termsTypes);
133
151
 
134
- this.emit('trackingStarted', servicesIds.length, numberOfTerms, extractOnly);
152
+ this.emit('trackingStarted', servicesIds.length, numberOfTerms, technicalUpgradeOnly);
135
153
 
136
154
  await Promise.all([ launchHeadlessBrowser(), this.recorder.initialize() ]);
137
155
 
138
- this.trackingQueue.concurrency = extractOnly ? MAX_PARALLEL_EXTRACTING : MAX_PARALLEL_TRACKING;
156
+ this.trackingQueue.concurrency = concurrency;
139
157
 
140
158
  servicesIds.forEach(serviceId => {
141
159
  this.services[serviceId].getTermsTypes().forEach(termsType => {
@@ -143,7 +161,7 @@ export default class Archivist extends events.EventEmitter {
143
161
  return;
144
162
  }
145
163
 
146
- this.trackingQueue.push({ terms: this.services[serviceId].getTerms({ type: termsType }), extractOnly });
164
+ this.trackingQueue.push({ terms: this.services[serviceId].getTerms({ type: termsType }), technicalUpgradeOnly });
147
165
  });
148
166
  });
149
167
 
@@ -153,12 +171,14 @@ export default class Archivist extends events.EventEmitter {
153
171
 
154
172
  await Promise.all([ stopHeadlessBrowser(), this.recorder.finalize() ]);
155
173
 
156
- this.emit('trackingCompleted', servicesIds.length, numberOfTerms, extractOnly);
174
+ this.emit('trackingCompleted', servicesIds.length, numberOfTerms, technicalUpgradeOnly);
157
175
  }
158
176
 
159
- async trackTermsChanges({ terms, extractOnly = false }) {
160
- if (!extractOnly) {
177
+ async trackTermsChanges({ terms, technicalUpgradeOnly = false }) {
178
+ if (!technicalUpgradeOnly) {
161
179
  await this.fetchAndRecordSnapshots(terms);
180
+ } else {
181
+ await this.fetchAndRecordNewSourceDocuments(terms); // In technical upgrade mode, fetch and record snapshots only for new source documents that don't have existing snapshots yet (e.g., when a declaration is updated to add a new source document)
162
182
  }
163
183
 
164
184
  const contents = await this.extractContentsFromSnapshots(terms);
@@ -167,7 +187,7 @@ export default class Archivist extends events.EventEmitter {
167
187
  return;
168
188
  }
169
189
 
170
- await this.recordVersion(terms, contents.join(Version.SOURCE_DOCUMENTS_SEPARATOR), extractOnly);
190
+ await this.recordVersion(terms, contents.join(Version.SOURCE_DOCUMENTS_SEPARATOR), technicalUpgradeOnly);
171
191
  }
172
192
 
173
193
  async fetchAndRecordSnapshots(terms) {
@@ -190,6 +210,50 @@ export default class Archivist extends events.EventEmitter {
190
210
  }
191
211
  }
192
212
 
213
+ async fetchAndRecordNewSourceDocuments(terms) {
214
+ if (!terms.hasMultipleSourceDocuments) { // If the terms has only one source document, there is nothing to do
215
+ return;
216
+ }
217
+
218
+ const existingVersion = await this.recorder.versionsRepository.findLatest(terms.service.id, terms.type);
219
+
220
+ if (!existingVersion) { // If the terms does not have a version recorded, skip this step as the next version will be tagged as "First record…" anyway
221
+ return;
222
+ }
223
+
224
+ const missingSourceDocuments = [];
225
+
226
+ for (const sourceDocument of terms.sourceDocuments) {
227
+ const snapshot = await this.recorder.getLatestSnapshot(terms, sourceDocument.id);
228
+
229
+ if (!snapshot) {
230
+ missingSourceDocuments.push(sourceDocument);
231
+ }
232
+ }
233
+
234
+ if (!missingSourceDocuments.length) {
235
+ return;
236
+ }
237
+
238
+ terms.fetchDate = new Date();
239
+ const fetchDocumentErrors = [];
240
+
241
+ for (const sourceDocument of missingSourceDocuments) {
242
+ const error = await this.fetchSourceDocument(sourceDocument);
243
+
244
+ if (error) {
245
+ fetchDocumentErrors.push(error);
246
+ } else {
247
+ await this.recordSnapshot(terms, sourceDocument);
248
+ sourceDocument.clearContent(); // Reduce memory usage by clearing no longer needed large content strings
249
+ }
250
+ }
251
+
252
+ if (fetchDocumentErrors.length) {
253
+ throw new InaccessibleContentError(fetchDocumentErrors);
254
+ }
255
+ }
256
+
193
257
  async fetchSourceDocument(sourceDocument) {
194
258
  const { location: url, executeClientScripts, cssSelectors } = sourceDocument;
195
259
 
@@ -249,14 +313,14 @@ export default class Archivist extends events.EventEmitter {
249
313
  return contents;
250
314
  }
251
315
 
252
- async recordVersion(terms, content, extractOnly) {
316
+ async recordVersion(terms, content, technicalUpgradeOnly) {
253
317
  const record = new Version({
254
318
  content,
255
319
  snapshotIds: terms.sourceDocuments.map(sourceDocuments => sourceDocuments.snapshotId),
256
320
  serviceId: terms.service.id,
257
321
  termsType: terms.type,
258
322
  fetchDate: terms.fetchDate,
259
- isExtractOnly: extractOnly,
323
+ isTechnicalUpgrade: technicalUpgradeOnly,
260
324
  metadata: { 'x-engine-version': PACKAGE_VERSION },
261
325
  });
262
326