@opentermsarchive/engine 9.2.3 → 10.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/ota-apply-technical-upgrades.js +19 -0
- package/bin/ota-track.js +0 -1
- package/bin/ota.js +1 -0
- package/package.json +1 -1
- package/src/archivist/index.js +75 -11
- package/src/archivist/index.test.js +345 -96
- package/src/archivist/recorder/index.js +2 -5
- package/src/archivist/recorder/index.test.js +18 -9
- package/src/archivist/recorder/repositories/git/dataMapper.js +4 -4
- package/src/archivist/recorder/repositories/git/index.test.js +16 -16
- package/src/archivist/recorder/repositories/mongo/dataMapper.js +2 -2
- package/src/archivist/recorder/repositories/mongo/index.test.js +22 -11
- package/src/index.js +19 -9
- package/src/logger/index.js +6 -6
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
#! /usr/bin/env node
|
|
2
|
+
import './env.js';
|
|
3
|
+
|
|
4
|
+
import path from 'path';
|
|
5
|
+
import { fileURLToPath, pathToFileURL } from 'url';
|
|
6
|
+
|
|
7
|
+
import { program } from 'commander';
|
|
8
|
+
|
|
9
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
10
|
+
|
|
11
|
+
const { applyTechnicalUpgrades } = await import(pathToFileURL(path.resolve(__dirname, '../src/index.js'))); // load asynchronously to ensure env.js is loaded before
|
|
12
|
+
|
|
13
|
+
program
|
|
14
|
+
.name('ota apply-technical-upgrades')
|
|
15
|
+
.description('Apply technical upgrades by generating new versions from the latest snapshots using updated declarations, engine logic, or dependencies, and by retrieving any missing snapshots for newly added source documents')
|
|
16
|
+
.option('-s, --services [serviceId...]', 'service IDs to apply technical upgrades to')
|
|
17
|
+
.option('-t, --types [termsType...]', 'terms types to apply technical upgrades to');
|
|
18
|
+
|
|
19
|
+
applyTechnicalUpgrades(program.parse(process.argv).opts());
|
package/bin/ota-track.js
CHANGED
|
@@ -15,7 +15,6 @@ program
|
|
|
15
15
|
.description('Retrieve declared documents, record snapshots, extract versions and publish the resulting records')
|
|
16
16
|
.option('-s, --services [serviceId...]', 'service IDs of services to track')
|
|
17
17
|
.option('-t, --types [termsType...]', 'terms types to track')
|
|
18
|
-
.option('-e, --extract-only', 'extract versions from existing snapshots with latest declarations and engine, without recording new snapshots')
|
|
19
18
|
.option('--schedule', 'track automatically at a regular interval');
|
|
20
19
|
|
|
21
20
|
track(program.parse(process.argv).opts());
|
package/bin/ota.js
CHANGED
|
@@ -11,6 +11,7 @@ program
|
|
|
11
11
|
.description(description)
|
|
12
12
|
.version(version)
|
|
13
13
|
.command('track', 'Track the current terms of services according to provided declarations')
|
|
14
|
+
.command('apply-technical-upgrades', 'Apply technical upgrades by generating new versions from the latest snapshots using updated declarations, engine logic, or dependencies')
|
|
14
15
|
.command('validate', 'Run a series of tests to check the validity of terms declarations')
|
|
15
16
|
.command('lint', 'Check format and stylistic errors in declarations and auto fix them')
|
|
16
17
|
.command('dataset', 'Export the versions dataset into a ZIP file and optionally publish it to GitHub releases')
|
package/package.json
CHANGED
package/src/archivist/index.js
CHANGED
|
@@ -20,7 +20,7 @@ const { version: PACKAGE_VERSION } = require('../../package.json');
|
|
|
20
20
|
// - too many requests on the same endpoint yield 403
|
|
21
21
|
// - sometimes when creating a commit no SHA are returned for unknown reasons
|
|
22
22
|
const MAX_PARALLEL_TRACKING = 1;
|
|
23
|
-
const
|
|
23
|
+
const MAX_PARALLEL_TECHNICAL_UPGRADES = 10;
|
|
24
24
|
|
|
25
25
|
export const EVENTS = [
|
|
26
26
|
'snapshotRecorded',
|
|
@@ -128,14 +128,32 @@ export default class Archivist extends events.EventEmitter {
|
|
|
128
128
|
});
|
|
129
129
|
}
|
|
130
130
|
|
|
131
|
-
async track({ services: servicesIds = this.servicesIds, types: termsTypes = []
|
|
131
|
+
async track({ services: servicesIds = this.servicesIds, types: termsTypes = [] } = {}) {
|
|
132
|
+
await this.processTerms({
|
|
133
|
+
servicesIds,
|
|
134
|
+
termsTypes,
|
|
135
|
+
technicalUpgradeOnly: false,
|
|
136
|
+
concurrency: MAX_PARALLEL_TRACKING,
|
|
137
|
+
});
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
async applyTechnicalUpgrades({ services: servicesIds = this.servicesIds, types: termsTypes = [] } = {}) {
|
|
141
|
+
await this.processTerms({
|
|
142
|
+
servicesIds,
|
|
143
|
+
termsTypes,
|
|
144
|
+
technicalUpgradeOnly: true,
|
|
145
|
+
concurrency: MAX_PARALLEL_TECHNICAL_UPGRADES,
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
async processTerms({ servicesIds, termsTypes, technicalUpgradeOnly, concurrency }) {
|
|
132
150
|
const numberOfTerms = Service.getNumberOfTerms(this.services, servicesIds, termsTypes);
|
|
133
151
|
|
|
134
|
-
this.emit('trackingStarted', servicesIds.length, numberOfTerms,
|
|
152
|
+
this.emit('trackingStarted', servicesIds.length, numberOfTerms, technicalUpgradeOnly);
|
|
135
153
|
|
|
136
154
|
await Promise.all([ launchHeadlessBrowser(), this.recorder.initialize() ]);
|
|
137
155
|
|
|
138
|
-
this.trackingQueue.concurrency =
|
|
156
|
+
this.trackingQueue.concurrency = concurrency;
|
|
139
157
|
|
|
140
158
|
servicesIds.forEach(serviceId => {
|
|
141
159
|
this.services[serviceId].getTermsTypes().forEach(termsType => {
|
|
@@ -143,7 +161,7 @@ export default class Archivist extends events.EventEmitter {
|
|
|
143
161
|
return;
|
|
144
162
|
}
|
|
145
163
|
|
|
146
|
-
this.trackingQueue.push({ terms: this.services[serviceId].getTerms({ type: termsType }),
|
|
164
|
+
this.trackingQueue.push({ terms: this.services[serviceId].getTerms({ type: termsType }), technicalUpgradeOnly });
|
|
147
165
|
});
|
|
148
166
|
});
|
|
149
167
|
|
|
@@ -153,12 +171,14 @@ export default class Archivist extends events.EventEmitter {
|
|
|
153
171
|
|
|
154
172
|
await Promise.all([ stopHeadlessBrowser(), this.recorder.finalize() ]);
|
|
155
173
|
|
|
156
|
-
this.emit('trackingCompleted', servicesIds.length, numberOfTerms,
|
|
174
|
+
this.emit('trackingCompleted', servicesIds.length, numberOfTerms, technicalUpgradeOnly);
|
|
157
175
|
}
|
|
158
176
|
|
|
159
|
-
async trackTermsChanges({ terms,
|
|
160
|
-
if (!
|
|
177
|
+
async trackTermsChanges({ terms, technicalUpgradeOnly = false }) {
|
|
178
|
+
if (!technicalUpgradeOnly) {
|
|
161
179
|
await this.fetchAndRecordSnapshots(terms);
|
|
180
|
+
} else {
|
|
181
|
+
await this.fetchAndRecordNewSourceDocuments(terms); // In technical upgrade mode, fetch and record snapshots only for new source documents that don't have existing snapshots yet (e.g., when a declaration is updated to add a new source document)
|
|
162
182
|
}
|
|
163
183
|
|
|
164
184
|
const contents = await this.extractContentsFromSnapshots(terms);
|
|
@@ -167,7 +187,7 @@ export default class Archivist extends events.EventEmitter {
|
|
|
167
187
|
return;
|
|
168
188
|
}
|
|
169
189
|
|
|
170
|
-
await this.recordVersion(terms, contents.join(Version.SOURCE_DOCUMENTS_SEPARATOR),
|
|
190
|
+
await this.recordVersion(terms, contents.join(Version.SOURCE_DOCUMENTS_SEPARATOR), technicalUpgradeOnly);
|
|
171
191
|
}
|
|
172
192
|
|
|
173
193
|
async fetchAndRecordSnapshots(terms) {
|
|
@@ -190,6 +210,50 @@ export default class Archivist extends events.EventEmitter {
|
|
|
190
210
|
}
|
|
191
211
|
}
|
|
192
212
|
|
|
213
|
+
async fetchAndRecordNewSourceDocuments(terms) {
|
|
214
|
+
if (!terms.hasMultipleSourceDocuments) { // If the terms has only one source document, there is nothing to do
|
|
215
|
+
return;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
const existingVersion = await this.recorder.versionsRepository.findLatest(terms.service.id, terms.type);
|
|
219
|
+
|
|
220
|
+
if (!existingVersion) { // If the terms does not have a version recorded, skip this step as the next version will be tagged as "First record…" anyway
|
|
221
|
+
return;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
const missingSourceDocuments = [];
|
|
225
|
+
|
|
226
|
+
for (const sourceDocument of terms.sourceDocuments) {
|
|
227
|
+
const snapshot = await this.recorder.getLatestSnapshot(terms, sourceDocument.id);
|
|
228
|
+
|
|
229
|
+
if (!snapshot) {
|
|
230
|
+
missingSourceDocuments.push(sourceDocument);
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
if (!missingSourceDocuments.length) {
|
|
235
|
+
return;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
terms.fetchDate = new Date();
|
|
239
|
+
const fetchDocumentErrors = [];
|
|
240
|
+
|
|
241
|
+
for (const sourceDocument of missingSourceDocuments) {
|
|
242
|
+
const error = await this.fetchSourceDocument(sourceDocument);
|
|
243
|
+
|
|
244
|
+
if (error) {
|
|
245
|
+
fetchDocumentErrors.push(error);
|
|
246
|
+
} else {
|
|
247
|
+
await this.recordSnapshot(terms, sourceDocument);
|
|
248
|
+
sourceDocument.clearContent(); // Reduce memory usage by clearing no longer needed large content strings
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
if (fetchDocumentErrors.length) {
|
|
253
|
+
throw new InaccessibleContentError(fetchDocumentErrors);
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
193
257
|
async fetchSourceDocument(sourceDocument) {
|
|
194
258
|
const { location: url, executeClientScripts, cssSelectors } = sourceDocument;
|
|
195
259
|
|
|
@@ -249,14 +313,14 @@ export default class Archivist extends events.EventEmitter {
|
|
|
249
313
|
return contents;
|
|
250
314
|
}
|
|
251
315
|
|
|
252
|
-
async recordVersion(terms, content,
|
|
316
|
+
async recordVersion(terms, content, technicalUpgradeOnly) {
|
|
253
317
|
const record = new Version({
|
|
254
318
|
content,
|
|
255
319
|
snapshotIds: terms.sourceDocuments.map(sourceDocuments => sourceDocuments.snapshotId),
|
|
256
320
|
serviceId: terms.service.id,
|
|
257
321
|
termsType: terms.type,
|
|
258
322
|
fetchDate: terms.fetchDate,
|
|
259
|
-
|
|
323
|
+
isTechnicalUpgrade: technicalUpgradeOnly,
|
|
260
324
|
metadata: { 'x-engine-version': PACKAGE_VERSION },
|
|
261
325
|
});
|
|
262
326
|
|
|
@@ -11,6 +11,7 @@ import sinonChai from 'sinon-chai';
|
|
|
11
11
|
import { InaccessibleContentError } from './errors.js';
|
|
12
12
|
import { FetchDocumentError } from './fetcher/index.js';
|
|
13
13
|
import Git from './recorder/repositories/git/git.js';
|
|
14
|
+
import SourceDocument from './services/sourceDocument.js';
|
|
14
15
|
|
|
15
16
|
import Archivist, { EVENTS } from './index.js';
|
|
16
17
|
|
|
@@ -52,6 +53,31 @@ describe('Archivist', function () {
|
|
|
52
53
|
|
|
53
54
|
const services = [ 'service·A', 'Service B!' ];
|
|
54
55
|
|
|
56
|
+
function setupNockForServices({ serviceA = true, serviceB = true } = {}) {
|
|
57
|
+
nock.cleanAll();
|
|
58
|
+
if (serviceA) {
|
|
59
|
+
nock('https://www.servicea.example')
|
|
60
|
+
.get('/tos')
|
|
61
|
+
.reply(200, serviceASnapshotExpectedContent, { 'Content-Type': 'text/html' });
|
|
62
|
+
}
|
|
63
|
+
if (serviceB) {
|
|
64
|
+
nock('https://www.serviceb.example')
|
|
65
|
+
.get('/privacy')
|
|
66
|
+
.reply(200, serviceBSnapshotExpectedContent, { 'Content-Type': 'application/pdf' });
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
async function createAndInitializeArchivist() {
|
|
71
|
+
const archivist = new Archivist({
|
|
72
|
+
recorderConfig: config.get('@opentermsarchive/engine.recorder'),
|
|
73
|
+
fetcherConfig: config.get('@opentermsarchive/engine.fetcher'),
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
await archivist.initialize();
|
|
77
|
+
|
|
78
|
+
return archivist;
|
|
79
|
+
}
|
|
80
|
+
|
|
55
81
|
before(async () => {
|
|
56
82
|
gitVersion = new Git({
|
|
57
83
|
path: VERSIONS_PATH,
|
|
@@ -70,13 +96,8 @@ describe('Archivist', function () {
|
|
|
70
96
|
|
|
71
97
|
describe('#track', () => {
|
|
72
98
|
before(async () => {
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
app = new Archivist({
|
|
76
|
-
recorderConfig: config.get('@opentermsarchive/engine.recorder'),
|
|
77
|
-
fetcherConfig: config.get('@opentermsarchive/engine.fetcher'),
|
|
78
|
-
});
|
|
79
|
-
await app.initialize();
|
|
99
|
+
setupNockForServices();
|
|
100
|
+
app = await createAndInitializeArchivist();
|
|
80
101
|
});
|
|
81
102
|
|
|
82
103
|
context('when everything works fine', () => {
|
|
@@ -112,8 +133,7 @@ describe('Archivist', function () {
|
|
|
112
133
|
context('when there is an operational error with service A', () => {
|
|
113
134
|
before(async () => {
|
|
114
135
|
// as there is no more HTTP request mocks for service A, it should throw an `ENOTFOUND` error which is considered as an expected error in our workflow
|
|
115
|
-
|
|
116
|
-
nock('https://www.serviceb.example').get('/privacy').reply(200, serviceBSnapshotExpectedContent, { 'Content-Type': 'application/pdf' });
|
|
136
|
+
setupNockForServices({ serviceA: false, serviceB: true });
|
|
117
137
|
await app.track({ services });
|
|
118
138
|
});
|
|
119
139
|
|
|
@@ -139,107 +159,353 @@ describe('Archivist', function () {
|
|
|
139
159
|
expect(resultingTerms).to.equal(serviceBVersionExpectedContent);
|
|
140
160
|
});
|
|
141
161
|
});
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
describe('#applyTechnicalUpgrades', () => {
|
|
165
|
+
context('when a service’s filter declaration changes', () => {
|
|
166
|
+
context('when everything works fine', () => {
|
|
167
|
+
let originalSnapshotId;
|
|
168
|
+
let firstVersionId;
|
|
169
|
+
let reExtractedVersionId;
|
|
170
|
+
let reExtractedVersionMessageBody;
|
|
171
|
+
let serviceBCommits;
|
|
172
|
+
|
|
173
|
+
before(async () => {
|
|
174
|
+
setupNockForServices();
|
|
175
|
+
app = await createAndInitializeArchivist();
|
|
176
|
+
await app.track({ services });
|
|
177
|
+
|
|
178
|
+
({ id: originalSnapshotId } = await app.recorder.snapshotsRepository.findLatest(SERVICE_A_ID, SERVICE_A_TYPE));
|
|
179
|
+
({ id: firstVersionId } = await app.recorder.versionsRepository.findLatest(SERVICE_A_ID, SERVICE_A_TYPE));
|
|
180
|
+
|
|
181
|
+
serviceBCommits = await gitVersion.log({ file: SERVICE_B_EXPECTED_VERSION_FILE_PATH });
|
|
182
|
+
|
|
183
|
+
app.services[SERVICE_A_ID].getTerms({ type: SERVICE_A_TYPE }).sourceDocuments[0].contentSelectors = 'h1';
|
|
184
|
+
|
|
185
|
+
await app.applyTechnicalUpgrades({ services: [ 'service·A', 'Service B!' ] });
|
|
186
|
+
|
|
187
|
+
const [reExtractedVersionCommit] = await gitVersion.log({ file: SERVICE_A_EXPECTED_VERSION_FILE_PATH });
|
|
188
|
+
|
|
189
|
+
reExtractedVersionId = reExtractedVersionCommit.hash;
|
|
190
|
+
reExtractedVersionMessageBody = reExtractedVersionCommit.body;
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
after(resetGitRepositories);
|
|
194
|
+
|
|
195
|
+
it('updates the version of the changed service', async () => {
|
|
196
|
+
const serviceAContent = await fs.readFile(path.resolve(__dirname, SERVICE_A_EXPECTED_VERSION_FILE_PATH), { encoding: 'utf8' });
|
|
197
|
+
|
|
198
|
+
expect(serviceAContent).to.equal('Terms of service with UTF-8 \'çhãràčtęrs"\n========================================');
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
it('generates a new version id', () => {
|
|
202
|
+
expect(reExtractedVersionId).to.not.equal(firstVersionId);
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
it('mentions the snapshot id in the changelog', () => {
|
|
206
|
+
expect(reExtractedVersionMessageBody).to.include(originalSnapshotId);
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
it('does not change other services', async () => {
|
|
210
|
+
const serviceBVersion = await fs.readFile(path.resolve(__dirname, SERVICE_B_EXPECTED_VERSION_FILE_PATH), { encoding: 'utf8' });
|
|
211
|
+
|
|
212
|
+
expect(serviceBVersion).to.equal(serviceBVersionExpectedContent);
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
it('does not generate a new id for other services', async () => {
|
|
216
|
+
const serviceBCommitsAfterExtraction = await gitVersion.log({ file: SERVICE_B_EXPECTED_VERSION_FILE_PATH });
|
|
217
|
+
|
|
218
|
+
expect(serviceBCommitsAfterExtraction.map(commit => commit.hash)).to.deep.equal(serviceBCommits.map(commit => commit.hash));
|
|
219
|
+
});
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
context('when there is an operational error with service A', () => {
|
|
223
|
+
let inaccessibleContentSpy;
|
|
224
|
+
let versionNotChangedSpy;
|
|
225
|
+
let versionB;
|
|
226
|
+
|
|
227
|
+
before(async () => {
|
|
228
|
+
setupNockForServices();
|
|
229
|
+
app = await createAndInitializeArchivist();
|
|
230
|
+
await app.track({ services });
|
|
231
|
+
app.services[SERVICE_A_ID].getTerms({ type: SERVICE_A_TYPE }).sourceDocuments[0].contentSelectors = 'inexistant-selector';
|
|
232
|
+
inaccessibleContentSpy = sinon.spy();
|
|
233
|
+
versionNotChangedSpy = sinon.spy();
|
|
234
|
+
app.on('inaccessibleContent', inaccessibleContentSpy);
|
|
235
|
+
app.on('versionNotChanged', record => {
|
|
236
|
+
if (record.serviceId == 'Service B!') {
|
|
237
|
+
versionB = record;
|
|
238
|
+
}
|
|
239
|
+
versionNotChangedSpy(record);
|
|
240
|
+
});
|
|
241
|
+
await app.applyTechnicalUpgrades({ services });
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
after(resetGitRepositories);
|
|
245
|
+
|
|
246
|
+
it('emits an inaccessibleContent event', () => {
|
|
247
|
+
expect(inaccessibleContentSpy).to.have.been.called;
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
it('still extracts the terms of other services', () => {
|
|
251
|
+
expect(versionNotChangedSpy).to.have.been.calledWith(versionB);
|
|
252
|
+
});
|
|
253
|
+
});
|
|
254
|
+
|
|
255
|
+
describe('with combined source documents', () => {
|
|
256
|
+
const MULTI_SOURCE_DOCS = {
|
|
257
|
+
SERVICE_ID: 'service_with_multiple_source_documents_terms',
|
|
258
|
+
TERMS_TYPE: 'Community Guidelines',
|
|
259
|
+
BASE_URL: 'https://www.service-with-multiple-source-documents-terms.example',
|
|
260
|
+
CONTENT: {
|
|
261
|
+
COMMUNITY_STANDARDS: '<html><body id="main"><h1>Community Standards</h1><p>Community Standards content</p></body></html>',
|
|
262
|
+
HATE_SPEECH: '<html><body><p>Hate speech content</p><div id="footer">Footer</div></body></html>',
|
|
263
|
+
VIOLENCE_INCITEMENT: '<html><body><p>Violence incitement content</p><button class="share">Share</button><button class="print">Print</button></body></html>',
|
|
264
|
+
NEW_POLICY: '<html><body><p>New additional policy</p></body></html>',
|
|
265
|
+
},
|
|
266
|
+
PATHS: {
|
|
267
|
+
COMMUNITY_STANDARDS: '/community-standards',
|
|
268
|
+
HATE_SPEECH: '/community-standards/hate-speech/',
|
|
269
|
+
VIOLENCE_INCITEMENT: '/community-standards/violence-incitement/',
|
|
270
|
+
NEW_POLICY: '/community-standards/new-policy/',
|
|
271
|
+
},
|
|
272
|
+
EXPECTED_TEXTS: {
|
|
273
|
+
COMMUNITY_STANDARDS: 'Community Standards',
|
|
274
|
+
HATE_SPEECH: 'Hate speech content',
|
|
275
|
+
VIOLENCE_INCITEMENT: 'Violence incitement content',
|
|
276
|
+
NEW_POLICY: 'New additional policy',
|
|
277
|
+
},
|
|
278
|
+
};
|
|
279
|
+
|
|
280
|
+
const { SERVICE_ID, TERMS_TYPE } = MULTI_SOURCE_DOCS;
|
|
281
|
+
|
|
282
|
+
function setupNockForMultiSourceDocs(pathKeys) {
|
|
283
|
+
pathKeys.forEach(pathKey => {
|
|
284
|
+
nock(MULTI_SOURCE_DOCS.BASE_URL)
|
|
285
|
+
.persist()
|
|
286
|
+
.get(MULTI_SOURCE_DOCS.PATHS[pathKey])
|
|
287
|
+
.reply(200, MULTI_SOURCE_DOCS.CONTENT[pathKey], { 'Content-Type': 'text/html' });
|
|
288
|
+
});
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
function disableClientScriptsForTerms(terms) {
|
|
292
|
+
terms.sourceDocuments.forEach(doc => {
|
|
293
|
+
doc.executeClientScripts = false;
|
|
294
|
+
});
|
|
295
|
+
}
|
|
142
296
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
let originalSnapshotId;
|
|
147
|
-
let firstVersionId;
|
|
148
|
-
let reExtractedVersionId;
|
|
149
|
-
let reExtractedVersionMessageBody;
|
|
150
|
-
let serviceBCommits;
|
|
297
|
+
context('when a source document is added to existing combined terms', () => {
|
|
298
|
+
let initialVersion;
|
|
299
|
+
let upgradeVersion;
|
|
151
300
|
|
|
152
301
|
before(async () => {
|
|
153
|
-
|
|
154
|
-
nock('https://www.serviceb.example').get('/privacy').reply(200, serviceBSnapshotExpectedContent, { 'Content-Type': 'application/pdf' });
|
|
155
|
-
app = new Archivist({
|
|
156
|
-
recorderConfig: config.get('@opentermsarchive/engine.recorder'),
|
|
157
|
-
fetcherConfig: config.get('@opentermsarchive/engine.fetcher'),
|
|
158
|
-
});
|
|
302
|
+
setupNockForMultiSourceDocs([ 'COMMUNITY_STANDARDS', 'HATE_SPEECH', 'VIOLENCE_INCITEMENT', 'NEW_POLICY' ]);
|
|
159
303
|
|
|
160
|
-
await
|
|
161
|
-
await app.track({ services });
|
|
304
|
+
app = await createAndInitializeArchivist();
|
|
162
305
|
|
|
163
|
-
|
|
164
|
-
({ id: firstVersionId } = await app.recorder.versionsRepository.findLatest(SERVICE_A_ID, SERVICE_A_TYPE));
|
|
306
|
+
let terms = app.services[SERVICE_ID].getTerms({ type: TERMS_TYPE });
|
|
165
307
|
|
|
166
|
-
|
|
308
|
+
disableClientScriptsForTerms(terms);
|
|
167
309
|
|
|
168
|
-
|
|
310
|
+
// First, track the terms normally to create initial version
|
|
311
|
+
await app.track({ services: [SERVICE_ID], types: [TERMS_TYPE] });
|
|
312
|
+
initialVersion = await app.recorder.versionsRepository.findLatest(SERVICE_ID, TERMS_TYPE);
|
|
169
313
|
|
|
170
|
-
|
|
314
|
+
// Modify the declaration to add a new source document
|
|
315
|
+
terms = app.services[SERVICE_ID].getTerms({ type: TERMS_TYPE });
|
|
171
316
|
|
|
172
|
-
|
|
317
|
+
terms.sourceDocuments.push(new SourceDocument({
|
|
318
|
+
id: 'new-policy',
|
|
319
|
+
location: `${MULTI_SOURCE_DOCS.BASE_URL}${MULTI_SOURCE_DOCS.PATHS.NEW_POLICY}`,
|
|
320
|
+
contentSelectors: 'body',
|
|
321
|
+
executeClientScripts: false,
|
|
322
|
+
filters: [],
|
|
323
|
+
}));
|
|
173
324
|
|
|
174
|
-
|
|
175
|
-
|
|
325
|
+
// Apply technical upgrades
|
|
326
|
+
await app.applyTechnicalUpgrades({ services: [SERVICE_ID], types: [TERMS_TYPE] });
|
|
327
|
+
upgradeVersion = await app.recorder.versionsRepository.findLatest(SERVICE_ID, TERMS_TYPE);
|
|
176
328
|
});
|
|
177
329
|
|
|
178
|
-
after(
|
|
330
|
+
after(async () => {
|
|
331
|
+
await resetGitRepositories();
|
|
332
|
+
nock.cleanAll();
|
|
333
|
+
});
|
|
179
334
|
|
|
180
|
-
it('
|
|
181
|
-
|
|
335
|
+
it('creates a new version', () => {
|
|
336
|
+
expect(upgradeVersion.id).to.not.equal(initialVersion.id);
|
|
337
|
+
});
|
|
182
338
|
|
|
183
|
-
|
|
339
|
+
it('marks the new version as technical upgrade', () => {
|
|
340
|
+
expect(upgradeVersion.isTechnicalUpgrade).to.be.true;
|
|
184
341
|
});
|
|
185
342
|
|
|
186
|
-
it('
|
|
187
|
-
|
|
343
|
+
it('fetches and includes the new source document in the version', async () => {
|
|
344
|
+
const versionContent = await upgradeVersion.content;
|
|
345
|
+
|
|
346
|
+
expect(versionContent).to.include(MULTI_SOURCE_DOCS.EXPECTED_TEXTS.NEW_POLICY);
|
|
188
347
|
});
|
|
189
348
|
|
|
190
|
-
it('
|
|
191
|
-
|
|
349
|
+
it('includes all source documents in version', async () => {
|
|
350
|
+
const versionContent = await upgradeVersion.content;
|
|
351
|
+
|
|
352
|
+
expect(versionContent).to.include(MULTI_SOURCE_DOCS.EXPECTED_TEXTS.COMMUNITY_STANDARDS);
|
|
353
|
+
expect(versionContent).to.include(MULTI_SOURCE_DOCS.EXPECTED_TEXTS.HATE_SPEECH);
|
|
354
|
+
expect(versionContent).to.include(MULTI_SOURCE_DOCS.EXPECTED_TEXTS.VIOLENCE_INCITEMENT);
|
|
355
|
+
expect(versionContent).to.include(MULTI_SOURCE_DOCS.EXPECTED_TEXTS.NEW_POLICY);
|
|
192
356
|
});
|
|
357
|
+
});
|
|
358
|
+
|
|
359
|
+
context('when a source document location is modified in combined terms', () => {
|
|
360
|
+
let initialVersion;
|
|
361
|
+
let latestVersion;
|
|
362
|
+
let newLocationScope;
|
|
363
|
+
|
|
364
|
+
before(async () => {
|
|
365
|
+
setupNockForMultiSourceDocs([ 'COMMUNITY_STANDARDS', 'HATE_SPEECH', 'VIOLENCE_INCITEMENT' ]);
|
|
366
|
+
|
|
367
|
+
app = await createAndInitializeArchivist();
|
|
368
|
+
|
|
369
|
+
let terms = app.services[SERVICE_ID].getTerms({ type: TERMS_TYPE });
|
|
370
|
+
|
|
371
|
+
disableClientScriptsForTerms(terms);
|
|
372
|
+
|
|
373
|
+
// First, track the terms normally
|
|
374
|
+
await app.track({ services: [SERVICE_ID], types: [TERMS_TYPE] });
|
|
375
|
+
initialVersion = await app.recorder.versionsRepository.findLatest(SERVICE_ID, TERMS_TYPE);
|
|
376
|
+
|
|
377
|
+
// Mock new location (but it won't be fetched during technical upgrade)
|
|
378
|
+
newLocationScope = nock(MULTI_SOURCE_DOCS.BASE_URL)
|
|
379
|
+
.persist()
|
|
380
|
+
.get('/community-standards/hate-speech-updated/')
|
|
381
|
+
.reply(200, '<html><body><p>Updated hate speech policy</p></body></html>', { 'Content-Type': 'text/html' });
|
|
193
382
|
|
|
194
|
-
|
|
195
|
-
|
|
383
|
+
// Modify the declaration to change location
|
|
384
|
+
terms = app.services[SERVICE_ID].getTerms({ type: TERMS_TYPE });
|
|
196
385
|
|
|
197
|
-
|
|
386
|
+
terms.sourceDocuments[1].location = `${MULTI_SOURCE_DOCS.BASE_URL}/community-standards/hate-speech-updated/`;
|
|
387
|
+
|
|
388
|
+
// Apply technical upgrades
|
|
389
|
+
await app.applyTechnicalUpgrades({ services: [SERVICE_ID], types: [TERMS_TYPE] });
|
|
390
|
+
latestVersion = await app.recorder.versionsRepository.findLatest(SERVICE_ID, TERMS_TYPE);
|
|
391
|
+
});
|
|
392
|
+
|
|
393
|
+
after(async () => {
|
|
394
|
+
await resetGitRepositories();
|
|
395
|
+
nock.cleanAll();
|
|
396
|
+
});
|
|
397
|
+
|
|
398
|
+
it('does not create a new version', () => {
|
|
399
|
+
expect(latestVersion.id).to.equal(initialVersion.id);
|
|
400
|
+
});
|
|
401
|
+
|
|
402
|
+
it('does not fetch from new location', () => {
|
|
403
|
+
expect(newLocationScope.isDone()).to.be.false;
|
|
198
404
|
});
|
|
199
405
|
|
|
200
|
-
it('does not
|
|
201
|
-
const
|
|
406
|
+
it('does not include content from the new location', async () => {
|
|
407
|
+
const versionContent = await latestVersion.content;
|
|
202
408
|
|
|
203
|
-
expect(
|
|
409
|
+
expect(versionContent).to.not.include('Updated hate speech policy');
|
|
204
410
|
});
|
|
205
411
|
});
|
|
206
412
|
|
|
207
|
-
context('when
|
|
208
|
-
let
|
|
209
|
-
let
|
|
210
|
-
let
|
|
413
|
+
context('when a source document selector is modified in combined terms', () => {
|
|
414
|
+
let initialVersion;
|
|
415
|
+
let latestVersion;
|
|
416
|
+
let initialVersionContent;
|
|
417
|
+
let upgradeVersionContent;
|
|
211
418
|
|
|
212
419
|
before(async () => {
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
app =
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
420
|
+
setupNockForMultiSourceDocs([ 'COMMUNITY_STANDARDS', 'HATE_SPEECH', 'VIOLENCE_INCITEMENT' ]);
|
|
421
|
+
|
|
422
|
+
app = await createAndInitializeArchivist();
|
|
423
|
+
|
|
424
|
+
let terms = app.services[SERVICE_ID].getTerms({ type: TERMS_TYPE });
|
|
425
|
+
|
|
426
|
+
disableClientScriptsForTerms(terms);
|
|
427
|
+
|
|
428
|
+
// First, track the terms normally
|
|
429
|
+
await app.track({ services: [SERVICE_ID], types: [TERMS_TYPE] });
|
|
430
|
+
initialVersion = await app.recorder.versionsRepository.findLatest(SERVICE_ID, TERMS_TYPE);
|
|
431
|
+
initialVersionContent = await initialVersion.content;
|
|
432
|
+
|
|
433
|
+
// Modify the declaration to change selector
|
|
434
|
+
terms = app.services[SERVICE_ID].getTerms({ type: TERMS_TYPE });
|
|
219
435
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
app.
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
436
|
+
// Change from 'body' to 'h1' for the first source document
|
|
437
|
+
terms.sourceDocuments[0].contentSelectors = 'h1';
|
|
438
|
+
|
|
439
|
+
// Apply technical upgrades
|
|
440
|
+
await app.applyTechnicalUpgrades({ services: [SERVICE_ID], types: [TERMS_TYPE] });
|
|
441
|
+
latestVersion = await app.recorder.versionsRepository.findLatest(SERVICE_ID, TERMS_TYPE);
|
|
442
|
+
upgradeVersionContent = await latestVersion.content;
|
|
443
|
+
});
|
|
444
|
+
|
|
445
|
+
after(async () => {
|
|
446
|
+
await resetGitRepositories();
|
|
447
|
+
nock.cleanAll();
|
|
448
|
+
});
|
|
449
|
+
|
|
450
|
+
it('creates a new version', () => {
|
|
451
|
+
expect(latestVersion.id).to.not.equal(initialVersion.id);
|
|
452
|
+
});
|
|
453
|
+
|
|
454
|
+
it('marks the new version as technical upgrade', () => {
|
|
455
|
+
expect(latestVersion.isTechnicalUpgrade).to.be.true;
|
|
456
|
+
});
|
|
457
|
+
|
|
458
|
+
it('extracts content with the new selector from existing snapshot', () => {
|
|
459
|
+
// With new selector 'h1', should only extract the heading
|
|
460
|
+
expect(upgradeVersionContent).to.include(MULTI_SOURCE_DOCS.EXPECTED_TEXTS.COMMUNITY_STANDARDS);
|
|
461
|
+
// The rest should be from other source documents
|
|
462
|
+
expect(upgradeVersionContent).to.include(MULTI_SOURCE_DOCS.EXPECTED_TEXTS.HATE_SPEECH);
|
|
463
|
+
expect(upgradeVersionContent).to.include(MULTI_SOURCE_DOCS.EXPECTED_TEXTS.VIOLENCE_INCITEMENT);
|
|
464
|
+
});
|
|
465
|
+
|
|
466
|
+
it('regenerates version with updated extraction logic', () => {
|
|
467
|
+
expect(upgradeVersionContent).to.not.equal(initialVersionContent);
|
|
468
|
+
});
|
|
469
|
+
});
|
|
470
|
+
|
|
471
|
+
context('when adding source document but no version exists yet', () => {
|
|
472
|
+
let newSourceScope;
|
|
473
|
+
|
|
474
|
+
before(async () => {
|
|
475
|
+
newSourceScope = nock(MULTI_SOURCE_DOCS.BASE_URL)
|
|
476
|
+
.get(MULTI_SOURCE_DOCS.PATHS.NEW_POLICY)
|
|
477
|
+
.reply(200, MULTI_SOURCE_DOCS.CONTENT.NEW_POLICY, { 'Content-Type': 'text/html' });
|
|
478
|
+
|
|
479
|
+
app = await createAndInitializeArchivist();
|
|
480
|
+
|
|
481
|
+
// Modify declaration before any tracking
|
|
482
|
+
const terms = app.services[SERVICE_ID].getTerms({ type: TERMS_TYPE });
|
|
483
|
+
|
|
484
|
+
terms.sourceDocuments.push({
|
|
485
|
+
id: 'new-policy',
|
|
486
|
+
location: `${MULTI_SOURCE_DOCS.BASE_URL}${MULTI_SOURCE_DOCS.PATHS.NEW_POLICY}`,
|
|
487
|
+
contentSelectors: 'body',
|
|
488
|
+
executeClientScripts: false,
|
|
489
|
+
filters: [],
|
|
231
490
|
});
|
|
232
|
-
|
|
491
|
+
|
|
492
|
+
// Apply technical upgrades (should skip because no version exists)
|
|
493
|
+
await app.applyTechnicalUpgrades({ services: [SERVICE_ID], types: [TERMS_TYPE] });
|
|
233
494
|
});
|
|
234
495
|
|
|
235
|
-
after(
|
|
496
|
+
after(async () => {
|
|
497
|
+
await resetGitRepositories();
|
|
498
|
+
nock.cleanAll();
|
|
499
|
+
});
|
|
236
500
|
|
|
237
|
-
it('
|
|
238
|
-
|
|
501
|
+
it('does not create a version when none existed before', async () => {
|
|
502
|
+
const version = await app.recorder.versionsRepository.findLatest(SERVICE_ID, TERMS_TYPE);
|
|
503
|
+
|
|
504
|
+
expect(version).to.be.null;
|
|
239
505
|
});
|
|
240
506
|
|
|
241
|
-
it('
|
|
242
|
-
expect(
|
|
507
|
+
it('does not fetch the new source document', () => {
|
|
508
|
+
expect(newSourceScope.isDone()).to.be.false;
|
|
243
509
|
});
|
|
244
510
|
});
|
|
245
511
|
});
|
|
@@ -256,11 +522,7 @@ describe('Archivist', function () {
|
|
|
256
522
|
const retryableError = new FetchDocumentError(FetchDocumentError.LIKELY_TRANSIENT_ERRORS[0]);
|
|
257
523
|
|
|
258
524
|
before(async () => {
|
|
259
|
-
app =
|
|
260
|
-
recorderConfig: config.get('@opentermsarchive/engine.recorder'),
|
|
261
|
-
fetcherConfig: config.get('@opentermsarchive/engine.fetcher'),
|
|
262
|
-
});
|
|
263
|
-
await app.initialize();
|
|
525
|
+
app = await createAndInitializeArchivist();
|
|
264
526
|
});
|
|
265
527
|
|
|
266
528
|
beforeEach(() => {
|
|
@@ -345,11 +607,7 @@ describe('Archivist', function () {
|
|
|
345
607
|
|
|
346
608
|
describe('#attach', () => {
|
|
347
609
|
before(async () => {
|
|
348
|
-
app =
|
|
349
|
-
recorderConfig: config.get('@opentermsarchive/engine.recorder'),
|
|
350
|
-
fetcherConfig: config.get('@opentermsarchive/engine.fetcher'),
|
|
351
|
-
});
|
|
352
|
-
await app.initialize();
|
|
610
|
+
app = await createAndInitializeArchivist();
|
|
353
611
|
|
|
354
612
|
EVENTS.forEach(event => {
|
|
355
613
|
const handlerName = `on${event[0].toUpperCase()}${event.substring(1)}`;
|
|
@@ -378,14 +636,9 @@ describe('Archivist', function () {
|
|
|
378
636
|
let plugin;
|
|
379
637
|
|
|
380
638
|
before(async () => {
|
|
381
|
-
|
|
382
|
-
nock('https://www.servicea.example').get('/tos').reply(200, serviceASnapshotExpectedContent, { 'Content-Type': 'text/html' });
|
|
639
|
+
setupNockForServices({ serviceA: true, serviceB: false });
|
|
383
640
|
|
|
384
|
-
app =
|
|
385
|
-
recorderConfig: config.get('@opentermsarchive/engine.recorder'),
|
|
386
|
-
fetcherConfig: config.get('@opentermsarchive/engine.fetcher'),
|
|
387
|
-
});
|
|
388
|
-
await app.initialize();
|
|
641
|
+
app = await createAndInitializeArchivist();
|
|
389
642
|
|
|
390
643
|
plugin = { onFirstVersionRecorded: () => { throw new Error('Plugin error'); } };
|
|
391
644
|
|
|
@@ -432,11 +685,7 @@ describe('Archivist', function () {
|
|
|
432
685
|
}
|
|
433
686
|
|
|
434
687
|
before(async () => {
|
|
435
|
-
app =
|
|
436
|
-
recorderConfig: config.get('@opentermsarchive/engine.recorder'),
|
|
437
|
-
fetcherConfig: config.get('@opentermsarchive/engine.fetcher'),
|
|
438
|
-
});
|
|
439
|
-
await app.initialize();
|
|
688
|
+
app = await createAndInitializeArchivist();
|
|
440
689
|
|
|
441
690
|
EVENTS.forEach(event => {
|
|
442
691
|
const handlerName = `on${event[0].toUpperCase()}${event.substr(1)}`;
|
|
@@ -12,11 +12,8 @@ export default class Recorder {
|
|
|
12
12
|
return Promise.all([ this.versionsRepository.initialize(), this.snapshotsRepository.initialize() ]);
|
|
13
13
|
}
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
// Parallel closing can cause "Operation interrupted because client was closed" errors, especially on Windows.
|
|
18
|
-
await this.versionsRepository.finalize();
|
|
19
|
-
await this.snapshotsRepository.finalize();
|
|
15
|
+
finalize() {
|
|
16
|
+
return Promise.all([ this.versionsRepository.finalize(), this.snapshotsRepository.finalize() ]);
|
|
20
17
|
}
|
|
21
18
|
|
|
22
19
|
getLatestSnapshot(terms, sourceDocumentId) {
|
|
@@ -6,6 +6,8 @@ import Version from './version.js';
|
|
|
6
6
|
|
|
7
7
|
import Recorder from './index.js';
|
|
8
8
|
|
|
9
|
+
const isWindows = process.platform === 'win32';
|
|
10
|
+
|
|
9
11
|
const MIME_TYPE = 'text/html';
|
|
10
12
|
const FETCH_DATE = new Date('2000-01-01T12:00:00.000Z');
|
|
11
13
|
const FETCH_DATE_LATER = new Date('2000-01-02T12:00:00.000Z');
|
|
@@ -18,7 +20,14 @@ describe('Recorder', () => {
|
|
|
18
20
|
describe(repositoryType, () => {
|
|
19
21
|
let recorder;
|
|
20
22
|
|
|
21
|
-
before(async ()
|
|
23
|
+
before(async function () {
|
|
24
|
+
if (repositoryType == 'mongo' && isWindows) {
|
|
25
|
+
console.log('MongoDB tests are unstable on Windows due to race condition in connection cleanup.');
|
|
26
|
+
console.log('Lacking a production use case for Mongo on Windows, we skip tests. Please reach out if you have a use case.');
|
|
27
|
+
// On Windows, when multiple repositories connect to the same MongoDB server and are closed in parallel or even sequentially, unhandled "Operation interrupted because client was closed" errors occur after all tests pass.
|
|
28
|
+
// The issue does not occur on Linux or macOS, so it appears to be a platform-specific difference in how the MongoDB driver handles connection pool cleanup during client.close().
|
|
29
|
+
this.skip();
|
|
30
|
+
}
|
|
22
31
|
const options = config.util.cloneDeep(config.get('@opentermsarchive/engine.recorder'));
|
|
23
32
|
|
|
24
33
|
options.versions.storage.type = repositoryType;
|
|
@@ -28,7 +37,7 @@ describe('Recorder', () => {
|
|
|
28
37
|
await recorder.initialize();
|
|
29
38
|
});
|
|
30
39
|
|
|
31
|
-
after(() => recorder
|
|
40
|
+
after(() => recorder?.finalize());
|
|
32
41
|
|
|
33
42
|
context('Snapshot', () => {
|
|
34
43
|
describe('#record', () => {
|
|
@@ -258,8 +267,8 @@ describe('Recorder', () => {
|
|
|
258
267
|
expect(await record.content).to.equal(UPDATED_CONTENT);
|
|
259
268
|
});
|
|
260
269
|
|
|
261
|
-
it('records in the version that it is not
|
|
262
|
-
expect(record.
|
|
270
|
+
it('records in the version that it is not a technical upgrade version', () => {
|
|
271
|
+
expect(record.isTechnicalUpgrade).to.equal(false);
|
|
263
272
|
});
|
|
264
273
|
|
|
265
274
|
it('returns the record id', () => {
|
|
@@ -315,7 +324,7 @@ describe('Recorder', () => {
|
|
|
315
324
|
content: CONTENT,
|
|
316
325
|
snapshotIds: [SNAPSHOT_ID],
|
|
317
326
|
fetchDate: FETCH_DATE,
|
|
318
|
-
|
|
327
|
+
isTechnicalUpgrade: true,
|
|
319
328
|
})));
|
|
320
329
|
|
|
321
330
|
record = await recorder.versionsRepository.findLatest(SERVICE_ID, TYPE);
|
|
@@ -354,7 +363,7 @@ describe('Recorder', () => {
|
|
|
354
363
|
content: UPDATED_CONTENT,
|
|
355
364
|
snapshotIds: [SNAPSHOT_ID],
|
|
356
365
|
fetchDate: FETCH_DATE_LATER,
|
|
357
|
-
|
|
366
|
+
isTechnicalUpgrade: true,
|
|
358
367
|
})));
|
|
359
368
|
|
|
360
369
|
record = await recorder.versionsRepository.findLatest(SERVICE_ID, TYPE);
|
|
@@ -366,8 +375,8 @@ describe('Recorder', () => {
|
|
|
366
375
|
expect(await record.content).to.equal(UPDATED_CONTENT);
|
|
367
376
|
});
|
|
368
377
|
|
|
369
|
-
it('records in the version that it is an
|
|
370
|
-
expect(record.
|
|
378
|
+
it('records in the version that it is an technical upgrade version', () => {
|
|
379
|
+
expect(record.isTechnicalUpgrade).to.equal(true);
|
|
371
380
|
});
|
|
372
381
|
|
|
373
382
|
it('returns the record id', () => {
|
|
@@ -395,7 +404,7 @@ describe('Recorder', () => {
|
|
|
395
404
|
content: CONTENT,
|
|
396
405
|
snapshotIds: [SNAPSHOT_ID],
|
|
397
406
|
fetchDate: FETCH_DATE_LATER,
|
|
398
|
-
|
|
407
|
+
isTechnicalUpgrade: true,
|
|
399
408
|
})));
|
|
400
409
|
|
|
401
410
|
record = await recorder.versionsRepository.findLatest(SERVICE_ID, TYPE);
|
|
@@ -7,7 +7,7 @@ import Version from '../../version.js';
|
|
|
7
7
|
|
|
8
8
|
export const COMMIT_MESSAGE_PREFIXES = {
|
|
9
9
|
startTracking: 'First record of',
|
|
10
|
-
|
|
10
|
+
technicalUpgrade: 'Apply technical or declaration upgrade on',
|
|
11
11
|
update: 'Record new changes of',
|
|
12
12
|
deprecated_startTracking: 'Start tracking',
|
|
13
13
|
deprecated_refilter: 'Refilter',
|
|
@@ -22,9 +22,9 @@ const MULTIPLE_SOURCE_DOCUMENTS_PREFIX = 'This version was recorded after extrac
|
|
|
22
22
|
export const COMMIT_MESSAGE_PREFIXES_REGEXP = new RegExp(`^(${Object.values(COMMIT_MESSAGE_PREFIXES).join('|')})`);
|
|
23
23
|
|
|
24
24
|
export function toPersistence(record, snapshotIdentiferTemplate) {
|
|
25
|
-
const { serviceId, termsType, documentId,
|
|
25
|
+
const { serviceId, termsType, documentId, isTechnicalUpgrade, snapshotIds = [], mimeType, isFirstRecord, metadata } = record;
|
|
26
26
|
|
|
27
|
-
let prefix =
|
|
27
|
+
let prefix = isTechnicalUpgrade ? COMMIT_MESSAGE_PREFIXES.technicalUpgrade : COMMIT_MESSAGE_PREFIXES.update;
|
|
28
28
|
|
|
29
29
|
prefix = isFirstRecord ? COMMIT_MESSAGE_PREFIXES.startTracking : prefix;
|
|
30
30
|
|
|
@@ -75,7 +75,7 @@ export function toDomain(commit) {
|
|
|
75
75
|
const mimeTypeValue = mime.getType(relativeFilePath);
|
|
76
76
|
|
|
77
77
|
if (mimeTypeValue == mime.getType('markdown')) {
|
|
78
|
-
attributes.
|
|
78
|
+
attributes.isTechnicalUpgrade = message.startsWith(COMMIT_MESSAGE_PREFIXES.technicalUpgrade) || message.startsWith(COMMIT_MESSAGE_PREFIXES.deprecated_refilter);
|
|
79
79
|
attributes.snapshotIds = snapshotIdsMatch;
|
|
80
80
|
|
|
81
81
|
return new Version(attributes);
|
|
@@ -208,7 +208,7 @@ describe('GitRepository', () => {
|
|
|
208
208
|
});
|
|
209
209
|
});
|
|
210
210
|
|
|
211
|
-
context('when it is an
|
|
211
|
+
context('when it is an technical upgrade version', () => {
|
|
212
212
|
const EXTRACTED_ONLY_CONTENT = `${CONTENT} extracted only`;
|
|
213
213
|
|
|
214
214
|
before(async () => {
|
|
@@ -217,7 +217,7 @@ describe('GitRepository', () => {
|
|
|
217
217
|
termsType: TERMS_TYPE,
|
|
218
218
|
content: CONTENT,
|
|
219
219
|
fetchDate: FETCH_DATE_EARLIER,
|
|
220
|
-
})); // An
|
|
220
|
+
})); // An technical upgrade version cannot be the first record
|
|
221
221
|
|
|
222
222
|
numberOfRecordsBefore = (await git.log()).length;
|
|
223
223
|
|
|
@@ -226,7 +226,7 @@ describe('GitRepository', () => {
|
|
|
226
226
|
termsType: TERMS_TYPE,
|
|
227
227
|
content: EXTRACTED_ONLY_CONTENT,
|
|
228
228
|
fetchDate: FETCH_DATE,
|
|
229
|
-
|
|
229
|
+
isTechnicalUpgrade: true,
|
|
230
230
|
snapshotIds: [SNAPSHOT_ID],
|
|
231
231
|
})));
|
|
232
232
|
|
|
@@ -245,8 +245,8 @@ describe('GitRepository', () => {
|
|
|
245
245
|
expect(commit.hash).to.include(id);
|
|
246
246
|
});
|
|
247
247
|
|
|
248
|
-
it('stores information that it is an
|
|
249
|
-
expect(commit.message).to.include(COMMIT_MESSAGE_PREFIXES.
|
|
248
|
+
it('stores information that it is an technical upgrade version', () => {
|
|
249
|
+
expect(commit.message).to.include(COMMIT_MESSAGE_PREFIXES.technicalUpgrade);
|
|
250
250
|
});
|
|
251
251
|
});
|
|
252
252
|
|
|
@@ -518,7 +518,7 @@ describe('GitRepository', () => {
|
|
|
518
518
|
serviceId: SERVICE_PROVIDER_ID,
|
|
519
519
|
termsType: TERMS_TYPE,
|
|
520
520
|
content: `${CONTENT} - updated 2`,
|
|
521
|
-
|
|
521
|
+
isTechnicalUpgrade: true,
|
|
522
522
|
fetchDate: FETCH_DATE_EARLIER,
|
|
523
523
|
snapshotIds: [SNAPSHOT_ID],
|
|
524
524
|
}));
|
|
@@ -569,7 +569,7 @@ describe('GitRepository', () => {
|
|
|
569
569
|
serviceId: SERVICE_PROVIDER_ID,
|
|
570
570
|
termsType: TERMS_TYPE,
|
|
571
571
|
content: `${CONTENT} - updated 2`,
|
|
572
|
-
|
|
572
|
+
isTechnicalUpgrade: true,
|
|
573
573
|
fetchDate: FETCH_DATE_EARLIER,
|
|
574
574
|
snapshotIds: [SNAPSHOT_ID],
|
|
575
575
|
}));
|
|
@@ -678,7 +678,7 @@ describe('GitRepository', () => {
|
|
|
678
678
|
serviceId: SERVICE_PROVIDER_ID,
|
|
679
679
|
termsType: TERMS_TYPE,
|
|
680
680
|
content: `${CONTENT} - updated 2`,
|
|
681
|
-
|
|
681
|
+
isTechnicalUpgrade: true,
|
|
682
682
|
fetchDate: FETCH_DATE_EARLIER,
|
|
683
683
|
snapshotIds: [SNAPSHOT_ID],
|
|
684
684
|
mimeType: HTML_MIME_TYPE,
|
|
@@ -1079,7 +1079,7 @@ describe('GitRepository', () => {
|
|
|
1079
1079
|
serviceId: SERVICE_PROVIDER_ID,
|
|
1080
1080
|
termsType: TERMS_TYPE,
|
|
1081
1081
|
content: `${CONTENT} - updated 2`,
|
|
1082
|
-
|
|
1082
|
+
isTechnicalUpgrade: true,
|
|
1083
1083
|
fetchDate: FETCH_DATE_EARLIER,
|
|
1084
1084
|
mimeType: HTML_MIME_TYPE,
|
|
1085
1085
|
}));
|
|
@@ -1130,7 +1130,7 @@ describe('GitRepository', () => {
|
|
|
1130
1130
|
serviceId: SERVICE_PROVIDER_ID,
|
|
1131
1131
|
termsType: TERMS_TYPE,
|
|
1132
1132
|
content: `${CONTENT} - updated 2`,
|
|
1133
|
-
|
|
1133
|
+
isTechnicalUpgrade: true,
|
|
1134
1134
|
fetchDate: FETCH_DATE_EARLIER,
|
|
1135
1135
|
mimeType: HTML_MIME_TYPE,
|
|
1136
1136
|
}));
|
|
@@ -1269,7 +1269,7 @@ describe('GitRepository', () => {
|
|
|
1269
1269
|
serviceId: SERVICE_PROVIDER_ID,
|
|
1270
1270
|
termsType: TERMS_TYPE,
|
|
1271
1271
|
content: `${CONTENT} - updated 2`,
|
|
1272
|
-
|
|
1272
|
+
isTechnicalUpgrade: true,
|
|
1273
1273
|
fetchDate: FETCH_DATE_EARLIER,
|
|
1274
1274
|
mimeType: HTML_MIME_TYPE,
|
|
1275
1275
|
}));
|
|
@@ -1398,24 +1398,24 @@ describe('GitRepository', () => {
|
|
|
1398
1398
|
after(() => subject.removeAll());
|
|
1399
1399
|
|
|
1400
1400
|
describe('Records attributes', () => {
|
|
1401
|
-
describe('#
|
|
1401
|
+
describe('#isTechnicalUpgrade', () => {
|
|
1402
1402
|
context('records with deprecated message', () => {
|
|
1403
1403
|
it('returns the proper value', async () => {
|
|
1404
|
-
expect((await subject.findById(commits.deprecatedRefilter.id)).
|
|
1404
|
+
expect((await subject.findById(commits.deprecatedRefilter.id)).isTechnicalUpgrade).to.be.true;
|
|
1405
1405
|
});
|
|
1406
1406
|
|
|
1407
1407
|
it('returns the proper value', async () => {
|
|
1408
|
-
expect((await subject.findById(commits.deprecatedFirstRecord.id)).
|
|
1408
|
+
expect((await subject.findById(commits.deprecatedFirstRecord.id)).isTechnicalUpgrade).to.be.false;
|
|
1409
1409
|
});
|
|
1410
1410
|
});
|
|
1411
1411
|
|
|
1412
1412
|
context('record with current message', () => {
|
|
1413
1413
|
it('returns the proper value', async () => {
|
|
1414
|
-
expect((await subject.findById(commits.currentExtractOnly.id)).
|
|
1414
|
+
expect((await subject.findById(commits.currentExtractOnly.id)).isTechnicalUpgrade).to.be.true;
|
|
1415
1415
|
});
|
|
1416
1416
|
|
|
1417
1417
|
it('returns the proper value', async () => {
|
|
1418
|
-
expect((await subject.findById(commits.currentFirstRecord.id)).
|
|
1418
|
+
expect((await subject.findById(commits.currentFirstRecord.id)).isTechnicalUpgrade).to.be.false;
|
|
1419
1419
|
});
|
|
1420
1420
|
});
|
|
1421
1421
|
});
|
|
@@ -17,7 +17,7 @@ export function toPersistence(record) {
|
|
|
17
17
|
}
|
|
18
18
|
|
|
19
19
|
export function toDomain(mongoDocument) {
|
|
20
|
-
const { _id, serviceId, termsType, documentId, fetchDate, mimeType, isExtractOnly, isRefilter, isFirstRecord, snapshotIds, metadata } = mongoDocument;
|
|
20
|
+
const { _id, serviceId, termsType, documentId, fetchDate, mimeType, isTechnicalUpgrade, isExtractOnly, isRefilter, isFirstRecord, snapshotIds, metadata } = mongoDocument;
|
|
21
21
|
|
|
22
22
|
const attributes = {
|
|
23
23
|
id: _id.toString(),
|
|
@@ -27,7 +27,7 @@ export function toDomain(mongoDocument) {
|
|
|
27
27
|
mimeType,
|
|
28
28
|
fetchDate: new Date(fetchDate),
|
|
29
29
|
isFirstRecord: Boolean(isFirstRecord),
|
|
30
|
-
|
|
30
|
+
isTechnicalUpgrade: Boolean(isTechnicalUpgrade) || Boolean(isExtractOnly) || Boolean(isRefilter),
|
|
31
31
|
snapshotIds: snapshotIds?.map(snapshotId => snapshotId.toString()) || [],
|
|
32
32
|
metadata,
|
|
33
33
|
};
|
|
@@ -16,6 +16,7 @@ const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
|
16
16
|
|
|
17
17
|
const { connectionURI } = config.get('@opentermsarchive/engine.recorder.snapshots.storage.mongo');
|
|
18
18
|
const client = new MongoClient(connectionURI);
|
|
19
|
+
const isWindows = process.platform === 'win32';
|
|
19
20
|
|
|
20
21
|
const SERVICE_PROVIDER_ID = 'test_service';
|
|
21
22
|
const TERMS_TYPE = 'Terms of Service';
|
|
@@ -41,6 +42,16 @@ const METADATA = {
|
|
|
41
42
|
let collection;
|
|
42
43
|
|
|
43
44
|
describe('MongoRepository', () => {
|
|
45
|
+
before(function () {
|
|
46
|
+
if (isWindows) {
|
|
47
|
+
console.log('MongoDB tests are unstable on Windows due to race condition in connection cleanup.');
|
|
48
|
+
console.log('Lacking a production use case for Mongo on Windows, we skip tests. Please reach out if you have a use case.');
|
|
49
|
+
// On Windows, when multiple repositories connect to the same MongoDB server and are closed in parallel or even sequentially, unhandled "Operation interrupted because client was closed" errors occur after all tests pass.
|
|
50
|
+
// The issue does not occur on Linux or macOS, so it appears to be a platform-specific difference in how the MongoDB driver handles connection pool cleanup during client.close().
|
|
51
|
+
this.skip();
|
|
52
|
+
}
|
|
53
|
+
});
|
|
54
|
+
|
|
44
55
|
let subject;
|
|
45
56
|
|
|
46
57
|
context('Version', () => {
|
|
@@ -220,7 +231,7 @@ describe('MongoRepository', () => {
|
|
|
220
231
|
});
|
|
221
232
|
});
|
|
222
233
|
|
|
223
|
-
context('when it is an
|
|
234
|
+
context('when it is an technical upgrade version', () => {
|
|
224
235
|
const EXTRACTED_ONLY_CONTENT = `${CONTENT} extracted only`;
|
|
225
236
|
|
|
226
237
|
before(async () => {
|
|
@@ -230,7 +241,7 @@ describe('MongoRepository', () => {
|
|
|
230
241
|
content: CONTENT,
|
|
231
242
|
fetchDate: FETCH_DATE_EARLIER,
|
|
232
243
|
snapshotIds: [SNAPSHOT_ID],
|
|
233
|
-
})); // An
|
|
244
|
+
})); // An technical upgrade version cannot be the first record
|
|
234
245
|
|
|
235
246
|
numberOfRecordsBefore = await collection.countDocuments({
|
|
236
247
|
serviceId: SERVICE_PROVIDER_ID,
|
|
@@ -243,7 +254,7 @@ describe('MongoRepository', () => {
|
|
|
243
254
|
content: EXTRACTED_ONLY_CONTENT,
|
|
244
255
|
fetchDate: FETCH_DATE,
|
|
245
256
|
snapshotIds: [SNAPSHOT_ID],
|
|
246
|
-
|
|
257
|
+
isTechnicalUpgrade: true,
|
|
247
258
|
})));
|
|
248
259
|
|
|
249
260
|
numberOfRecordsAfter = await collection.countDocuments({
|
|
@@ -267,8 +278,8 @@ describe('MongoRepository', () => {
|
|
|
267
278
|
expect(mongoDocument._id.toString()).to.equal(record.id);
|
|
268
279
|
});
|
|
269
280
|
|
|
270
|
-
it('stores information that it is an
|
|
271
|
-
expect(mongoDocument.
|
|
281
|
+
it('stores information that it is an technical upgrade version', () => {
|
|
282
|
+
expect(mongoDocument.isTechnicalUpgrade).to.be.true;
|
|
272
283
|
});
|
|
273
284
|
});
|
|
274
285
|
|
|
@@ -596,7 +607,7 @@ describe('MongoRepository', () => {
|
|
|
596
607
|
serviceId: SERVICE_PROVIDER_ID,
|
|
597
608
|
termsType: TERMS_TYPE,
|
|
598
609
|
content: `${CONTENT} - updated 2`,
|
|
599
|
-
|
|
610
|
+
isTechnicalUpgrade: true,
|
|
600
611
|
fetchDate: FETCH_DATE_EARLIER,
|
|
601
612
|
snapshotIds: [SNAPSHOT_ID],
|
|
602
613
|
}));
|
|
@@ -645,7 +656,7 @@ describe('MongoRepository', () => {
|
|
|
645
656
|
serviceId: SERVICE_PROVIDER_ID,
|
|
646
657
|
termsType: TERMS_TYPE,
|
|
647
658
|
content: `${CONTENT} - updated 2`,
|
|
648
|
-
|
|
659
|
+
isTechnicalUpgrade: true,
|
|
649
660
|
fetchDate: FETCH_DATE_EARLIER,
|
|
650
661
|
snapshotIds: [SNAPSHOT_ID],
|
|
651
662
|
}));
|
|
@@ -810,7 +821,7 @@ describe('MongoRepository', () => {
|
|
|
810
821
|
serviceId: SERVICE_PROVIDER_ID,
|
|
811
822
|
termsType: TERMS_TYPE,
|
|
812
823
|
content: `${CONTENT} - updated 2`,
|
|
813
|
-
|
|
824
|
+
isTechnicalUpgrade: true,
|
|
814
825
|
fetchDate: FETCH_DATE_EARLIER,
|
|
815
826
|
snapshotIds: [SNAPSHOT_ID],
|
|
816
827
|
}));
|
|
@@ -1164,7 +1175,7 @@ describe('MongoRepository', () => {
|
|
|
1164
1175
|
serviceId: SERVICE_PROVIDER_ID,
|
|
1165
1176
|
termsType: TERMS_TYPE,
|
|
1166
1177
|
content: `${CONTENT} - updated 2`,
|
|
1167
|
-
|
|
1178
|
+
isTechnicalUpgrade: true,
|
|
1168
1179
|
fetchDate: FETCH_DATE_EARLIER,
|
|
1169
1180
|
mimeType: HTML_MIME_TYPE,
|
|
1170
1181
|
}));
|
|
@@ -1213,7 +1224,7 @@ describe('MongoRepository', () => {
|
|
|
1213
1224
|
serviceId: SERVICE_PROVIDER_ID,
|
|
1214
1225
|
termsType: TERMS_TYPE,
|
|
1215
1226
|
content: `${CONTENT} - updated 2`,
|
|
1216
|
-
|
|
1227
|
+
isTechnicalUpgrade: true,
|
|
1217
1228
|
fetchDate: FETCH_DATE_EARLIER,
|
|
1218
1229
|
mimeType: HTML_MIME_TYPE,
|
|
1219
1230
|
}));
|
|
@@ -1421,7 +1432,7 @@ describe('MongoRepository', () => {
|
|
|
1421
1432
|
serviceId: SERVICE_PROVIDER_ID,
|
|
1422
1433
|
termsType: TERMS_TYPE,
|
|
1423
1434
|
content: `${CONTENT} - updated 2`,
|
|
1424
|
-
|
|
1435
|
+
isTechnicalUpgrade: true,
|
|
1425
1436
|
fetchDate: FETCH_DATE_EARLIER,
|
|
1426
1437
|
mimeType: HTML_MIME_TYPE,
|
|
1427
1438
|
}));
|
package/src/index.js
CHANGED
|
@@ -13,7 +13,7 @@ import Reporter from './reporter/index.js';
|
|
|
13
13
|
const require = createRequire(import.meta.url);
|
|
14
14
|
const { version: PACKAGE_VERSION } = require('../package.json');
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
async function initialize(services) {
|
|
17
17
|
const archivist = new Archivist({
|
|
18
18
|
recorderConfig: config.get('@opentermsarchive/engine.recorder'),
|
|
19
19
|
fetcherConfig: config.get('@opentermsarchive/engine.fetcher'),
|
|
@@ -40,13 +40,17 @@ export default async function track({ services, types, extractOnly, schedule })
|
|
|
40
40
|
});
|
|
41
41
|
}
|
|
42
42
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
await archivist.track({ services, types, extractOnly: true });
|
|
43
|
+
return { archivist, services };
|
|
44
|
+
}
|
|
46
45
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
46
|
+
export default async function track({ services, types, schedule }) {
|
|
47
|
+
const { archivist, services: filteredServices } = await initialize(services);
|
|
48
|
+
|
|
49
|
+
// Technical upgrade pass: apply changes from engine, dependency, or declaration upgrades.
|
|
50
|
+
// This regenerates versions from existing snapshots with updated extraction logic.
|
|
51
|
+
// For terms with combined source documents, if a new document was added to the declaration, it will be fetched and combined with existing snapshots to regenerate the complete version.
|
|
52
|
+
// All versions from this pass are labeled as technical upgrades to avoid false notifications about content changes.
|
|
53
|
+
await archivist.applyTechnicalUpgrades({ services: filteredServices, types });
|
|
50
54
|
|
|
51
55
|
if (process.env.OTA_ENGINE_SENDINBLUE_API_KEY) {
|
|
52
56
|
try {
|
|
@@ -72,7 +76,7 @@ export default async function track({ services, types, extractOnly, schedule })
|
|
|
72
76
|
}
|
|
73
77
|
|
|
74
78
|
if (!schedule) {
|
|
75
|
-
await archivist.track({ services, types });
|
|
79
|
+
await archivist.track({ services: filteredServices, types });
|
|
76
80
|
|
|
77
81
|
return;
|
|
78
82
|
}
|
|
@@ -86,6 +90,12 @@ export default async function track({ services, types, extractOnly, schedule })
|
|
|
86
90
|
new Cron( // eslint-disable-line no-new
|
|
87
91
|
trackingSchedule,
|
|
88
92
|
{ protect: job => logger.warn(`Tracking scheduled at ${new Date().toISOString()} were blocked by an unfinished tracking started at ${job.currentRun().toISOString()}`) },
|
|
89
|
-
() => archivist.track({ services, types }),
|
|
93
|
+
() => archivist.track({ services: filteredServices, types }),
|
|
90
94
|
);
|
|
91
95
|
}
|
|
96
|
+
|
|
97
|
+
export async function applyTechnicalUpgrades({ services, types }) {
|
|
98
|
+
const { archivist, services: filteredServices } = await initialize(services);
|
|
99
|
+
|
|
100
|
+
await archivist.applyTechnicalUpgrades({ services: filteredServices, types });
|
|
101
|
+
}
|
package/src/logger/index.js
CHANGED
|
@@ -195,9 +195,9 @@ logger.onVersionNotChanged = ({ serviceId, termsType }) => {
|
|
|
195
195
|
logger.info({ message: 'No changes after filtering, did not record version', serviceId, termsType });
|
|
196
196
|
};
|
|
197
197
|
|
|
198
|
-
logger.onTrackingStarted = (numberOfServices, numberOfTerms,
|
|
199
|
-
if (
|
|
200
|
-
logger.info(`
|
|
198
|
+
logger.onTrackingStarted = (numberOfServices, numberOfTerms, technicalUpgradeOnly) => {
|
|
199
|
+
if (technicalUpgradeOnly) {
|
|
200
|
+
logger.info(`Applying technical upgrades to ${numberOfTerms} terms from ${numberOfServices} services…`);
|
|
201
201
|
} else {
|
|
202
202
|
logger.info(`Tracking changes of ${numberOfTerms} terms from ${numberOfServices} services…`);
|
|
203
203
|
}
|
|
@@ -206,11 +206,11 @@ logger.onTrackingStarted = (numberOfServices, numberOfTerms, extractOnly) => {
|
|
|
206
206
|
trackingStartTime = Date.now();
|
|
207
207
|
};
|
|
208
208
|
|
|
209
|
-
logger.onTrackingCompleted = (numberOfServices, numberOfTerms,
|
|
209
|
+
logger.onTrackingCompleted = (numberOfServices, numberOfTerms, technicalUpgradeOnly) => {
|
|
210
210
|
const duration = formatDuration(Date.now() - trackingStartTime);
|
|
211
211
|
|
|
212
|
-
if (
|
|
213
|
-
logger.info(`
|
|
212
|
+
if (technicalUpgradeOnly) {
|
|
213
|
+
logger.info(`Applied technical upgrades to ${numberOfTerms} terms from ${numberOfServices} services in ${duration}`);
|
|
214
214
|
logger.info(`Recorded ${recordedVersionsCount} new versions\n`);
|
|
215
215
|
} else {
|
|
216
216
|
logger.info(`Tracked changes of ${numberOfTerms} terms from ${numberOfServices} services in ${duration}`);
|