@opentermsarchive/engine 9.2.3 → 10.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,7 @@ import Version from '../../version.js';
7
7
 
8
8
  export const COMMIT_MESSAGE_PREFIXES = {
9
9
  startTracking: 'First record of',
10
- extractOnly: 'Apply technical or declaration upgrade on',
10
+ technicalUpgrade: 'Apply technical or declaration upgrade on',
11
11
  update: 'Record new changes of',
12
12
  deprecated_startTracking: 'Start tracking',
13
13
  deprecated_refilter: 'Refilter',
@@ -22,9 +22,9 @@ const MULTIPLE_SOURCE_DOCUMENTS_PREFIX = 'This version was recorded after extrac
22
22
  export const COMMIT_MESSAGE_PREFIXES_REGEXP = new RegExp(`^(${Object.values(COMMIT_MESSAGE_PREFIXES).join('|')})`);
23
23
 
24
24
  export function toPersistence(record, snapshotIdentiferTemplate) {
25
- const { serviceId, termsType, documentId, isExtractOnly, snapshotIds = [], mimeType, isFirstRecord, metadata } = record;
25
+ const { serviceId, termsType, documentId, isTechnicalUpgrade, snapshotIds = [], mimeType, isFirstRecord, metadata } = record;
26
26
 
27
- let prefix = isExtractOnly ? COMMIT_MESSAGE_PREFIXES.extractOnly : COMMIT_MESSAGE_PREFIXES.update;
27
+ let prefix = isTechnicalUpgrade ? COMMIT_MESSAGE_PREFIXES.technicalUpgrade : COMMIT_MESSAGE_PREFIXES.update;
28
28
 
29
29
  prefix = isFirstRecord ? COMMIT_MESSAGE_PREFIXES.startTracking : prefix;
30
30
 
@@ -75,7 +75,7 @@ export function toDomain(commit) {
75
75
  const mimeTypeValue = mime.getType(relativeFilePath);
76
76
 
77
77
  if (mimeTypeValue == mime.getType('markdown')) {
78
- attributes.isExtractOnly = message.startsWith(COMMIT_MESSAGE_PREFIXES.extractOnly) || message.startsWith(COMMIT_MESSAGE_PREFIXES.deprecated_refilter);
78
+ attributes.isTechnicalUpgrade = message.startsWith(COMMIT_MESSAGE_PREFIXES.technicalUpgrade) || message.startsWith(COMMIT_MESSAGE_PREFIXES.deprecated_refilter);
79
79
  attributes.snapshotIds = snapshotIdsMatch;
80
80
 
81
81
  return new Version(attributes);
@@ -208,7 +208,7 @@ describe('GitRepository', () => {
208
208
  });
209
209
  });
210
210
 
211
- context('when it is an extracted only version', () => {
211
+ context('when it is an technical upgrade version', () => {
212
212
  const EXTRACTED_ONLY_CONTENT = `${CONTENT} extracted only`;
213
213
 
214
214
  before(async () => {
@@ -217,7 +217,7 @@ describe('GitRepository', () => {
217
217
  termsType: TERMS_TYPE,
218
218
  content: CONTENT,
219
219
  fetchDate: FETCH_DATE_EARLIER,
220
- })); // An extracted only version cannot be the first record
220
+ })); // An technical upgrade version cannot be the first record
221
221
 
222
222
  numberOfRecordsBefore = (await git.log()).length;
223
223
 
@@ -226,7 +226,7 @@ describe('GitRepository', () => {
226
226
  termsType: TERMS_TYPE,
227
227
  content: EXTRACTED_ONLY_CONTENT,
228
228
  fetchDate: FETCH_DATE,
229
- isExtractOnly: true,
229
+ isTechnicalUpgrade: true,
230
230
  snapshotIds: [SNAPSHOT_ID],
231
231
  })));
232
232
 
@@ -245,8 +245,8 @@ describe('GitRepository', () => {
245
245
  expect(commit.hash).to.include(id);
246
246
  });
247
247
 
248
- it('stores information that it is an extracted only version', () => {
249
- expect(commit.message).to.include(COMMIT_MESSAGE_PREFIXES.extractOnly);
248
+ it('stores information that it is an technical upgrade version', () => {
249
+ expect(commit.message).to.include(COMMIT_MESSAGE_PREFIXES.technicalUpgrade);
250
250
  });
251
251
  });
252
252
 
@@ -518,7 +518,7 @@ describe('GitRepository', () => {
518
518
  serviceId: SERVICE_PROVIDER_ID,
519
519
  termsType: TERMS_TYPE,
520
520
  content: `${CONTENT} - updated 2`,
521
- isExtractOnly: true,
521
+ isTechnicalUpgrade: true,
522
522
  fetchDate: FETCH_DATE_EARLIER,
523
523
  snapshotIds: [SNAPSHOT_ID],
524
524
  }));
@@ -569,7 +569,7 @@ describe('GitRepository', () => {
569
569
  serviceId: SERVICE_PROVIDER_ID,
570
570
  termsType: TERMS_TYPE,
571
571
  content: `${CONTENT} - updated 2`,
572
- isExtractOnly: true,
572
+ isTechnicalUpgrade: true,
573
573
  fetchDate: FETCH_DATE_EARLIER,
574
574
  snapshotIds: [SNAPSHOT_ID],
575
575
  }));
@@ -678,7 +678,7 @@ describe('GitRepository', () => {
678
678
  serviceId: SERVICE_PROVIDER_ID,
679
679
  termsType: TERMS_TYPE,
680
680
  content: `${CONTENT} - updated 2`,
681
- isExtractOnly: true,
681
+ isTechnicalUpgrade: true,
682
682
  fetchDate: FETCH_DATE_EARLIER,
683
683
  snapshotIds: [SNAPSHOT_ID],
684
684
  mimeType: HTML_MIME_TYPE,
@@ -1079,7 +1079,7 @@ describe('GitRepository', () => {
1079
1079
  serviceId: SERVICE_PROVIDER_ID,
1080
1080
  termsType: TERMS_TYPE,
1081
1081
  content: `${CONTENT} - updated 2`,
1082
- isExtractOnly: true,
1082
+ isTechnicalUpgrade: true,
1083
1083
  fetchDate: FETCH_DATE_EARLIER,
1084
1084
  mimeType: HTML_MIME_TYPE,
1085
1085
  }));
@@ -1130,7 +1130,7 @@ describe('GitRepository', () => {
1130
1130
  serviceId: SERVICE_PROVIDER_ID,
1131
1131
  termsType: TERMS_TYPE,
1132
1132
  content: `${CONTENT} - updated 2`,
1133
- isExtractOnly: true,
1133
+ isTechnicalUpgrade: true,
1134
1134
  fetchDate: FETCH_DATE_EARLIER,
1135
1135
  mimeType: HTML_MIME_TYPE,
1136
1136
  }));
@@ -1269,7 +1269,7 @@ describe('GitRepository', () => {
1269
1269
  serviceId: SERVICE_PROVIDER_ID,
1270
1270
  termsType: TERMS_TYPE,
1271
1271
  content: `${CONTENT} - updated 2`,
1272
- isExtractOnly: true,
1272
+ isTechnicalUpgrade: true,
1273
1273
  fetchDate: FETCH_DATE_EARLIER,
1274
1274
  mimeType: HTML_MIME_TYPE,
1275
1275
  }));
@@ -1398,24 +1398,24 @@ describe('GitRepository', () => {
1398
1398
  after(() => subject.removeAll());
1399
1399
 
1400
1400
  describe('Records attributes', () => {
1401
- describe('#isExtractOnly', () => {
1401
+ describe('#isTechnicalUpgrade', () => {
1402
1402
  context('records with deprecated message', () => {
1403
1403
  it('returns the proper value', async () => {
1404
- expect((await subject.findById(commits.deprecatedRefilter.id)).isExtractOnly).to.be.true;
1404
+ expect((await subject.findById(commits.deprecatedRefilter.id)).isTechnicalUpgrade).to.be.true;
1405
1405
  });
1406
1406
 
1407
1407
  it('returns the proper value', async () => {
1408
- expect((await subject.findById(commits.deprecatedFirstRecord.id)).isExtractOnly).to.be.false;
1408
+ expect((await subject.findById(commits.deprecatedFirstRecord.id)).isTechnicalUpgrade).to.be.false;
1409
1409
  });
1410
1410
  });
1411
1411
 
1412
1412
  context('record with current message', () => {
1413
1413
  it('returns the proper value', async () => {
1414
- expect((await subject.findById(commits.currentExtractOnly.id)).isExtractOnly).to.be.true;
1414
+ expect((await subject.findById(commits.currentExtractOnly.id)).isTechnicalUpgrade).to.be.true;
1415
1415
  });
1416
1416
 
1417
1417
  it('returns the proper value', async () => {
1418
- expect((await subject.findById(commits.currentFirstRecord.id)).isExtractOnly).to.be.false;
1418
+ expect((await subject.findById(commits.currentFirstRecord.id)).isTechnicalUpgrade).to.be.false;
1419
1419
  });
1420
1420
  });
1421
1421
  });
@@ -17,7 +17,7 @@ export function toPersistence(record) {
17
17
  }
18
18
 
19
19
  export function toDomain(mongoDocument) {
20
- const { _id, serviceId, termsType, documentId, fetchDate, mimeType, isExtractOnly, isRefilter, isFirstRecord, snapshotIds, metadata } = mongoDocument;
20
+ const { _id, serviceId, termsType, documentId, fetchDate, mimeType, isTechnicalUpgrade, isExtractOnly, isRefilter, isFirstRecord, snapshotIds, metadata } = mongoDocument;
21
21
 
22
22
  const attributes = {
23
23
  id: _id.toString(),
@@ -27,7 +27,7 @@ export function toDomain(mongoDocument) {
27
27
  mimeType,
28
28
  fetchDate: new Date(fetchDate),
29
29
  isFirstRecord: Boolean(isFirstRecord),
30
- isExtractOnly: Boolean(isExtractOnly) || Boolean(isRefilter),
30
+ isTechnicalUpgrade: Boolean(isTechnicalUpgrade) || Boolean(isExtractOnly) || Boolean(isRefilter),
31
31
  snapshotIds: snapshotIds?.map(snapshotId => snapshotId.toString()) || [],
32
32
  metadata,
33
33
  };
@@ -16,6 +16,7 @@ const __dirname = path.dirname(fileURLToPath(import.meta.url));
16
16
 
17
17
  const { connectionURI } = config.get('@opentermsarchive/engine.recorder.snapshots.storage.mongo');
18
18
  const client = new MongoClient(connectionURI);
19
+ const isWindows = process.platform === 'win32';
19
20
 
20
21
  const SERVICE_PROVIDER_ID = 'test_service';
21
22
  const TERMS_TYPE = 'Terms of Service';
@@ -41,6 +42,16 @@ const METADATA = {
41
42
  let collection;
42
43
 
43
44
  describe('MongoRepository', () => {
45
+ before(function () {
46
+ if (isWindows) {
47
+ console.log('MongoDB tests are unstable on Windows due to race condition in connection cleanup.');
48
+ console.log('Lacking a production use case for Mongo on Windows, we skip tests. Please reach out if you have a use case.');
49
+ // On Windows, when multiple repositories connect to the same MongoDB server and are closed in parallel or even sequentially, unhandled "Operation interrupted because client was closed" errors occur after all tests pass.
50
+ // The issue does not occur on Linux or macOS, so it appears to be a platform-specific difference in how the MongoDB driver handles connection pool cleanup during client.close().
51
+ this.skip();
52
+ }
53
+ });
54
+
44
55
  let subject;
45
56
 
46
57
  context('Version', () => {
@@ -220,7 +231,7 @@ describe('MongoRepository', () => {
220
231
  });
221
232
  });
222
233
 
223
- context('when it is an extracted only version', () => {
234
+ context('when it is an technical upgrade version', () => {
224
235
  const EXTRACTED_ONLY_CONTENT = `${CONTENT} extracted only`;
225
236
 
226
237
  before(async () => {
@@ -230,7 +241,7 @@ describe('MongoRepository', () => {
230
241
  content: CONTENT,
231
242
  fetchDate: FETCH_DATE_EARLIER,
232
243
  snapshotIds: [SNAPSHOT_ID],
233
- })); // An extracted only version cannot be the first record
244
+ })); // An technical upgrade version cannot be the first record
234
245
 
235
246
  numberOfRecordsBefore = await collection.countDocuments({
236
247
  serviceId: SERVICE_PROVIDER_ID,
@@ -243,7 +254,7 @@ describe('MongoRepository', () => {
243
254
  content: EXTRACTED_ONLY_CONTENT,
244
255
  fetchDate: FETCH_DATE,
245
256
  snapshotIds: [SNAPSHOT_ID],
246
- isExtractOnly: true,
257
+ isTechnicalUpgrade: true,
247
258
  })));
248
259
 
249
260
  numberOfRecordsAfter = await collection.countDocuments({
@@ -267,8 +278,8 @@ describe('MongoRepository', () => {
267
278
  expect(mongoDocument._id.toString()).to.equal(record.id);
268
279
  });
269
280
 
270
- it('stores information that it is an extracted only version', () => {
271
- expect(mongoDocument.isExtractOnly).to.be.true;
281
+ it('stores information that it is an technical upgrade version', () => {
282
+ expect(mongoDocument.isTechnicalUpgrade).to.be.true;
272
283
  });
273
284
  });
274
285
 
@@ -596,7 +607,7 @@ describe('MongoRepository', () => {
596
607
  serviceId: SERVICE_PROVIDER_ID,
597
608
  termsType: TERMS_TYPE,
598
609
  content: `${CONTENT} - updated 2`,
599
- isExtractOnly: true,
610
+ isTechnicalUpgrade: true,
600
611
  fetchDate: FETCH_DATE_EARLIER,
601
612
  snapshotIds: [SNAPSHOT_ID],
602
613
  }));
@@ -645,7 +656,7 @@ describe('MongoRepository', () => {
645
656
  serviceId: SERVICE_PROVIDER_ID,
646
657
  termsType: TERMS_TYPE,
647
658
  content: `${CONTENT} - updated 2`,
648
- isExtractOnly: true,
659
+ isTechnicalUpgrade: true,
649
660
  fetchDate: FETCH_DATE_EARLIER,
650
661
  snapshotIds: [SNAPSHOT_ID],
651
662
  }));
@@ -810,7 +821,7 @@ describe('MongoRepository', () => {
810
821
  serviceId: SERVICE_PROVIDER_ID,
811
822
  termsType: TERMS_TYPE,
812
823
  content: `${CONTENT} - updated 2`,
813
- isExtractOnly: true,
824
+ isTechnicalUpgrade: true,
814
825
  fetchDate: FETCH_DATE_EARLIER,
815
826
  snapshotIds: [SNAPSHOT_ID],
816
827
  }));
@@ -1164,7 +1175,7 @@ describe('MongoRepository', () => {
1164
1175
  serviceId: SERVICE_PROVIDER_ID,
1165
1176
  termsType: TERMS_TYPE,
1166
1177
  content: `${CONTENT} - updated 2`,
1167
- isExtractOnly: true,
1178
+ isTechnicalUpgrade: true,
1168
1179
  fetchDate: FETCH_DATE_EARLIER,
1169
1180
  mimeType: HTML_MIME_TYPE,
1170
1181
  }));
@@ -1213,7 +1224,7 @@ describe('MongoRepository', () => {
1213
1224
  serviceId: SERVICE_PROVIDER_ID,
1214
1225
  termsType: TERMS_TYPE,
1215
1226
  content: `${CONTENT} - updated 2`,
1216
- isExtractOnly: true,
1227
+ isTechnicalUpgrade: true,
1217
1228
  fetchDate: FETCH_DATE_EARLIER,
1218
1229
  mimeType: HTML_MIME_TYPE,
1219
1230
  }));
@@ -1421,7 +1432,7 @@ describe('MongoRepository', () => {
1421
1432
  serviceId: SERVICE_PROVIDER_ID,
1422
1433
  termsType: TERMS_TYPE,
1423
1434
  content: `${CONTENT} - updated 2`,
1424
- isExtractOnly: true,
1435
+ isTechnicalUpgrade: true,
1425
1436
  fetchDate: FETCH_DATE_EARLIER,
1426
1437
  mimeType: HTML_MIME_TYPE,
1427
1438
  }));
@@ -19,7 +19,7 @@ app.use(errorsMiddleware);
19
19
 
20
20
  const port = config.get('@opentermsarchive/engine.collection-api.port');
21
21
 
22
- app.listen(port);
22
+ app.listen(port, '127.0.0.1');
23
23
 
24
24
  if (process.env.NODE_ENV !== 'test') {
25
25
  logger.info(`Start Open Terms Archive API on http://localhost:${port}${BASE_PATH}`);
package/src/index.js CHANGED
@@ -13,7 +13,7 @@ import Reporter from './reporter/index.js';
13
13
  const require = createRequire(import.meta.url);
14
14
  const { version: PACKAGE_VERSION } = require('../package.json');
15
15
 
16
- export default async function track({ services, types, extractOnly, schedule }) {
16
+ async function initialize(services) {
17
17
  const archivist = new Archivist({
18
18
  recorderConfig: config.get('@opentermsarchive/engine.recorder'),
19
19
  fetcherConfig: config.get('@opentermsarchive/engine.fetcher'),
@@ -40,13 +40,17 @@ export default async function track({ services, types, extractOnly, schedule })
40
40
  });
41
41
  }
42
42
 
43
- // The result of the extraction step that generates the version from the snapshots may depend on changes to the engine or its dependencies.
44
- // The process thus starts by only performing the extraction process so that any version following such changes can be labelled (to avoid sending notifications, for example)
45
- await archivist.track({ services, types, extractOnly: true });
43
+ return { archivist, services };
44
+ }
46
45
 
47
- if (extractOnly) {
48
- return;
49
- }
46
+ export default async function track({ services, types, schedule }) {
47
+ const { archivist, services: filteredServices } = await initialize(services);
48
+
49
+ // Technical upgrade pass: apply changes from engine, dependency, or declaration upgrades.
50
+ // This regenerates versions from existing snapshots with updated extraction logic.
51
+ // For terms with combined source documents, if a new document was added to the declaration, it will be fetched and combined with existing snapshots to regenerate the complete version.
52
+ // All versions from this pass are labeled as technical upgrades to avoid false notifications about content changes.
53
+ await archivist.applyTechnicalUpgrades({ services: filteredServices, types });
50
54
 
51
55
  if (process.env.OTA_ENGINE_SENDINBLUE_API_KEY) {
52
56
  try {
@@ -72,7 +76,7 @@ export default async function track({ services, types, extractOnly, schedule })
72
76
  }
73
77
 
74
78
  if (!schedule) {
75
- await archivist.track({ services, types });
79
+ await archivist.track({ services: filteredServices, types });
76
80
 
77
81
  return;
78
82
  }
@@ -86,6 +90,12 @@ export default async function track({ services, types, extractOnly, schedule })
86
90
  new Cron( // eslint-disable-line no-new
87
91
  trackingSchedule,
88
92
  { protect: job => logger.warn(`Tracking scheduled at ${new Date().toISOString()} were blocked by an unfinished tracking started at ${job.currentRun().toISOString()}`) },
89
- () => archivist.track({ services, types }),
93
+ () => archivist.track({ services: filteredServices, types }),
90
94
  );
91
95
  }
96
+
97
+ export async function applyTechnicalUpgrades({ services, types }) {
98
+ const { archivist, services: filteredServices } = await initialize(services);
99
+
100
+ await archivist.applyTechnicalUpgrades({ services: filteredServices, types });
101
+ }
@@ -195,9 +195,9 @@ logger.onVersionNotChanged = ({ serviceId, termsType }) => {
195
195
  logger.info({ message: 'No changes after filtering, did not record version', serviceId, termsType });
196
196
  };
197
197
 
198
- logger.onTrackingStarted = (numberOfServices, numberOfTerms, extractOnly) => {
199
- if (extractOnly) {
200
- logger.info(`Examining ${numberOfTerms} terms from ${numberOfServices} services for extraction…`);
198
+ logger.onTrackingStarted = (numberOfServices, numberOfTerms, technicalUpgradeOnly) => {
199
+ if (technicalUpgradeOnly) {
200
+ logger.info(`Applying technical upgrades to ${numberOfTerms} terms from ${numberOfServices} services…`);
201
201
  } else {
202
202
  logger.info(`Tracking changes of ${numberOfTerms} terms from ${numberOfServices} services…`);
203
203
  }
@@ -206,11 +206,11 @@ logger.onTrackingStarted = (numberOfServices, numberOfTerms, extractOnly) => {
206
206
  trackingStartTime = Date.now();
207
207
  };
208
208
 
209
- logger.onTrackingCompleted = (numberOfServices, numberOfTerms, extractOnly) => {
209
+ logger.onTrackingCompleted = (numberOfServices, numberOfTerms, technicalUpgradeOnly) => {
210
210
  const duration = formatDuration(Date.now() - trackingStartTime);
211
211
 
212
- if (extractOnly) {
213
- logger.info(`Examined ${numberOfTerms} terms from ${numberOfServices} services for extraction in ${duration}`);
212
+ if (technicalUpgradeOnly) {
213
+ logger.info(`Applied technical upgrades to ${numberOfTerms} terms from ${numberOfServices} services in ${duration}`);
214
214
  logger.info(`Recorded ${recordedVersionsCount} new versions\n`);
215
215
  } else {
216
216
  logger.info(`Tracked changes of ${numberOfTerms} terms from ${numberOfServices} services in ${duration}`);
@@ -1,5 +1,5 @@
1
- import HttpProxyAgent from 'http-proxy-agent';
2
- import HttpsProxyAgent from 'https-proxy-agent';
1
+ import { HttpProxyAgent } from 'http-proxy-agent';
2
+ import { HttpsProxyAgent } from 'https-proxy-agent';
3
3
  import nodeFetch from 'node-fetch';
4
4
 
5
5
  import logger from '../../logger/index.js';