@opentermsarchive/engine 9.2.3 → 10.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/ota-apply-technical-upgrades.js +19 -0
- package/bin/ota-dataset.js +2 -2
- package/bin/ota-track.js +0 -1
- package/bin/ota.js +1 -0
- package/config/default.json +1 -1
- package/config/development.json +60 -0
- package/package.json +1 -1
- package/scripts/dataset/assets/README.template.js +1 -1
- package/scripts/dataset/export/test/fixtures/dataset/README.md +1 -1
- package/scripts/dataset/index.js +8 -3
- package/scripts/dataset/logger/index.js +25 -3
- package/scripts/dataset/publish/datagouv/dataset.js +234 -0
- package/scripts/dataset/publish/datagouv/index.js +82 -0
- package/scripts/dataset/publish/github/index.js +11 -2
- package/scripts/dataset/publish/gitlab/index.js +3 -1
- package/scripts/dataset/publish/index.js +39 -5
- package/src/archivist/index.js +75 -11
- package/src/archivist/index.test.js +345 -96
- package/src/archivist/recorder/index.js +2 -5
- package/src/archivist/recorder/index.test.js +18 -9
- package/src/archivist/recorder/repositories/git/dataMapper.js +4 -4
- package/src/archivist/recorder/repositories/git/index.test.js +16 -16
- package/src/archivist/recorder/repositories/mongo/dataMapper.js +2 -2
- package/src/archivist/recorder/repositories/mongo/index.test.js +22 -11
- package/src/collection-api/server.js +1 -1
- package/src/index.js +19 -9
- package/src/logger/index.js +6 -6
- package/src/reporter/gitlab/index.js +2 -2
|
@@ -11,6 +11,7 @@ import sinonChai from 'sinon-chai';
|
|
|
11
11
|
import { InaccessibleContentError } from './errors.js';
|
|
12
12
|
import { FetchDocumentError } from './fetcher/index.js';
|
|
13
13
|
import Git from './recorder/repositories/git/git.js';
|
|
14
|
+
import SourceDocument from './services/sourceDocument.js';
|
|
14
15
|
|
|
15
16
|
import Archivist, { EVENTS } from './index.js';
|
|
16
17
|
|
|
@@ -52,6 +53,31 @@ describe('Archivist', function () {
|
|
|
52
53
|
|
|
53
54
|
const services = [ 'service·A', 'Service B!' ];
|
|
54
55
|
|
|
56
|
+
function setupNockForServices({ serviceA = true, serviceB = true } = {}) {
|
|
57
|
+
nock.cleanAll();
|
|
58
|
+
if (serviceA) {
|
|
59
|
+
nock('https://www.servicea.example')
|
|
60
|
+
.get('/tos')
|
|
61
|
+
.reply(200, serviceASnapshotExpectedContent, { 'Content-Type': 'text/html' });
|
|
62
|
+
}
|
|
63
|
+
if (serviceB) {
|
|
64
|
+
nock('https://www.serviceb.example')
|
|
65
|
+
.get('/privacy')
|
|
66
|
+
.reply(200, serviceBSnapshotExpectedContent, { 'Content-Type': 'application/pdf' });
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
async function createAndInitializeArchivist() {
|
|
71
|
+
const archivist = new Archivist({
|
|
72
|
+
recorderConfig: config.get('@opentermsarchive/engine.recorder'),
|
|
73
|
+
fetcherConfig: config.get('@opentermsarchive/engine.fetcher'),
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
await archivist.initialize();
|
|
77
|
+
|
|
78
|
+
return archivist;
|
|
79
|
+
}
|
|
80
|
+
|
|
55
81
|
before(async () => {
|
|
56
82
|
gitVersion = new Git({
|
|
57
83
|
path: VERSIONS_PATH,
|
|
@@ -70,13 +96,8 @@ describe('Archivist', function () {
|
|
|
70
96
|
|
|
71
97
|
describe('#track', () => {
|
|
72
98
|
before(async () => {
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
app = new Archivist({
|
|
76
|
-
recorderConfig: config.get('@opentermsarchive/engine.recorder'),
|
|
77
|
-
fetcherConfig: config.get('@opentermsarchive/engine.fetcher'),
|
|
78
|
-
});
|
|
79
|
-
await app.initialize();
|
|
99
|
+
setupNockForServices();
|
|
100
|
+
app = await createAndInitializeArchivist();
|
|
80
101
|
});
|
|
81
102
|
|
|
82
103
|
context('when everything works fine', () => {
|
|
@@ -112,8 +133,7 @@ describe('Archivist', function () {
|
|
|
112
133
|
context('when there is an operational error with service A', () => {
|
|
113
134
|
before(async () => {
|
|
114
135
|
// as there is no more HTTP request mocks for service A, it should throw an `ENOTFOUND` error which is considered as an expected error in our workflow
|
|
115
|
-
|
|
116
|
-
nock('https://www.serviceb.example').get('/privacy').reply(200, serviceBSnapshotExpectedContent, { 'Content-Type': 'application/pdf' });
|
|
136
|
+
setupNockForServices({ serviceA: false, serviceB: true });
|
|
117
137
|
await app.track({ services });
|
|
118
138
|
});
|
|
119
139
|
|
|
@@ -139,107 +159,353 @@ describe('Archivist', function () {
|
|
|
139
159
|
expect(resultingTerms).to.equal(serviceBVersionExpectedContent);
|
|
140
160
|
});
|
|
141
161
|
});
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
describe('#applyTechnicalUpgrades', () => {
|
|
165
|
+
context('when a service’s filter declaration changes', () => {
|
|
166
|
+
context('when everything works fine', () => {
|
|
167
|
+
let originalSnapshotId;
|
|
168
|
+
let firstVersionId;
|
|
169
|
+
let reExtractedVersionId;
|
|
170
|
+
let reExtractedVersionMessageBody;
|
|
171
|
+
let serviceBCommits;
|
|
172
|
+
|
|
173
|
+
before(async () => {
|
|
174
|
+
setupNockForServices();
|
|
175
|
+
app = await createAndInitializeArchivist();
|
|
176
|
+
await app.track({ services });
|
|
177
|
+
|
|
178
|
+
({ id: originalSnapshotId } = await app.recorder.snapshotsRepository.findLatest(SERVICE_A_ID, SERVICE_A_TYPE));
|
|
179
|
+
({ id: firstVersionId } = await app.recorder.versionsRepository.findLatest(SERVICE_A_ID, SERVICE_A_TYPE));
|
|
180
|
+
|
|
181
|
+
serviceBCommits = await gitVersion.log({ file: SERVICE_B_EXPECTED_VERSION_FILE_PATH });
|
|
182
|
+
|
|
183
|
+
app.services[SERVICE_A_ID].getTerms({ type: SERVICE_A_TYPE }).sourceDocuments[0].contentSelectors = 'h1';
|
|
184
|
+
|
|
185
|
+
await app.applyTechnicalUpgrades({ services: [ 'service·A', 'Service B!' ] });
|
|
186
|
+
|
|
187
|
+
const [reExtractedVersionCommit] = await gitVersion.log({ file: SERVICE_A_EXPECTED_VERSION_FILE_PATH });
|
|
188
|
+
|
|
189
|
+
reExtractedVersionId = reExtractedVersionCommit.hash;
|
|
190
|
+
reExtractedVersionMessageBody = reExtractedVersionCommit.body;
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
after(resetGitRepositories);
|
|
194
|
+
|
|
195
|
+
it('updates the version of the changed service', async () => {
|
|
196
|
+
const serviceAContent = await fs.readFile(path.resolve(__dirname, SERVICE_A_EXPECTED_VERSION_FILE_PATH), { encoding: 'utf8' });
|
|
197
|
+
|
|
198
|
+
expect(serviceAContent).to.equal('Terms of service with UTF-8 \'çhãràčtęrs"\n========================================');
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
it('generates a new version id', () => {
|
|
202
|
+
expect(reExtractedVersionId).to.not.equal(firstVersionId);
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
it('mentions the snapshot id in the changelog', () => {
|
|
206
|
+
expect(reExtractedVersionMessageBody).to.include(originalSnapshotId);
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
it('does not change other services', async () => {
|
|
210
|
+
const serviceBVersion = await fs.readFile(path.resolve(__dirname, SERVICE_B_EXPECTED_VERSION_FILE_PATH), { encoding: 'utf8' });
|
|
211
|
+
|
|
212
|
+
expect(serviceBVersion).to.equal(serviceBVersionExpectedContent);
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
it('does not generate a new id for other services', async () => {
|
|
216
|
+
const serviceBCommitsAfterExtraction = await gitVersion.log({ file: SERVICE_B_EXPECTED_VERSION_FILE_PATH });
|
|
217
|
+
|
|
218
|
+
expect(serviceBCommitsAfterExtraction.map(commit => commit.hash)).to.deep.equal(serviceBCommits.map(commit => commit.hash));
|
|
219
|
+
});
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
context('when there is an operational error with service A', () => {
|
|
223
|
+
let inaccessibleContentSpy;
|
|
224
|
+
let versionNotChangedSpy;
|
|
225
|
+
let versionB;
|
|
226
|
+
|
|
227
|
+
before(async () => {
|
|
228
|
+
setupNockForServices();
|
|
229
|
+
app = await createAndInitializeArchivist();
|
|
230
|
+
await app.track({ services });
|
|
231
|
+
app.services[SERVICE_A_ID].getTerms({ type: SERVICE_A_TYPE }).sourceDocuments[0].contentSelectors = 'inexistant-selector';
|
|
232
|
+
inaccessibleContentSpy = sinon.spy();
|
|
233
|
+
versionNotChangedSpy = sinon.spy();
|
|
234
|
+
app.on('inaccessibleContent', inaccessibleContentSpy);
|
|
235
|
+
app.on('versionNotChanged', record => {
|
|
236
|
+
if (record.serviceId == 'Service B!') {
|
|
237
|
+
versionB = record;
|
|
238
|
+
}
|
|
239
|
+
versionNotChangedSpy(record);
|
|
240
|
+
});
|
|
241
|
+
await app.applyTechnicalUpgrades({ services });
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
after(resetGitRepositories);
|
|
245
|
+
|
|
246
|
+
it('emits an inaccessibleContent event', () => {
|
|
247
|
+
expect(inaccessibleContentSpy).to.have.been.called;
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
it('still extracts the terms of other services', () => {
|
|
251
|
+
expect(versionNotChangedSpy).to.have.been.calledWith(versionB);
|
|
252
|
+
});
|
|
253
|
+
});
|
|
254
|
+
|
|
255
|
+
describe('with combined source documents', () => {
|
|
256
|
+
const MULTI_SOURCE_DOCS = {
|
|
257
|
+
SERVICE_ID: 'service_with_multiple_source_documents_terms',
|
|
258
|
+
TERMS_TYPE: 'Community Guidelines',
|
|
259
|
+
BASE_URL: 'https://www.service-with-multiple-source-documents-terms.example',
|
|
260
|
+
CONTENT: {
|
|
261
|
+
COMMUNITY_STANDARDS: '<html><body id="main"><h1>Community Standards</h1><p>Community Standards content</p></body></html>',
|
|
262
|
+
HATE_SPEECH: '<html><body><p>Hate speech content</p><div id="footer">Footer</div></body></html>',
|
|
263
|
+
VIOLENCE_INCITEMENT: '<html><body><p>Violence incitement content</p><button class="share">Share</button><button class="print">Print</button></body></html>',
|
|
264
|
+
NEW_POLICY: '<html><body><p>New additional policy</p></body></html>',
|
|
265
|
+
},
|
|
266
|
+
PATHS: {
|
|
267
|
+
COMMUNITY_STANDARDS: '/community-standards',
|
|
268
|
+
HATE_SPEECH: '/community-standards/hate-speech/',
|
|
269
|
+
VIOLENCE_INCITEMENT: '/community-standards/violence-incitement/',
|
|
270
|
+
NEW_POLICY: '/community-standards/new-policy/',
|
|
271
|
+
},
|
|
272
|
+
EXPECTED_TEXTS: {
|
|
273
|
+
COMMUNITY_STANDARDS: 'Community Standards',
|
|
274
|
+
HATE_SPEECH: 'Hate speech content',
|
|
275
|
+
VIOLENCE_INCITEMENT: 'Violence incitement content',
|
|
276
|
+
NEW_POLICY: 'New additional policy',
|
|
277
|
+
},
|
|
278
|
+
};
|
|
279
|
+
|
|
280
|
+
const { SERVICE_ID, TERMS_TYPE } = MULTI_SOURCE_DOCS;
|
|
281
|
+
|
|
282
|
+
function setupNockForMultiSourceDocs(pathKeys) {
|
|
283
|
+
pathKeys.forEach(pathKey => {
|
|
284
|
+
nock(MULTI_SOURCE_DOCS.BASE_URL)
|
|
285
|
+
.persist()
|
|
286
|
+
.get(MULTI_SOURCE_DOCS.PATHS[pathKey])
|
|
287
|
+
.reply(200, MULTI_SOURCE_DOCS.CONTENT[pathKey], { 'Content-Type': 'text/html' });
|
|
288
|
+
});
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
function disableClientScriptsForTerms(terms) {
|
|
292
|
+
terms.sourceDocuments.forEach(doc => {
|
|
293
|
+
doc.executeClientScripts = false;
|
|
294
|
+
});
|
|
295
|
+
}
|
|
142
296
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
let originalSnapshotId;
|
|
147
|
-
let firstVersionId;
|
|
148
|
-
let reExtractedVersionId;
|
|
149
|
-
let reExtractedVersionMessageBody;
|
|
150
|
-
let serviceBCommits;
|
|
297
|
+
context('when a source document is added to existing combined terms', () => {
|
|
298
|
+
let initialVersion;
|
|
299
|
+
let upgradeVersion;
|
|
151
300
|
|
|
152
301
|
before(async () => {
|
|
153
|
-
|
|
154
|
-
nock('https://www.serviceb.example').get('/privacy').reply(200, serviceBSnapshotExpectedContent, { 'Content-Type': 'application/pdf' });
|
|
155
|
-
app = new Archivist({
|
|
156
|
-
recorderConfig: config.get('@opentermsarchive/engine.recorder'),
|
|
157
|
-
fetcherConfig: config.get('@opentermsarchive/engine.fetcher'),
|
|
158
|
-
});
|
|
302
|
+
setupNockForMultiSourceDocs([ 'COMMUNITY_STANDARDS', 'HATE_SPEECH', 'VIOLENCE_INCITEMENT', 'NEW_POLICY' ]);
|
|
159
303
|
|
|
160
|
-
await
|
|
161
|
-
await app.track({ services });
|
|
304
|
+
app = await createAndInitializeArchivist();
|
|
162
305
|
|
|
163
|
-
|
|
164
|
-
({ id: firstVersionId } = await app.recorder.versionsRepository.findLatest(SERVICE_A_ID, SERVICE_A_TYPE));
|
|
306
|
+
let terms = app.services[SERVICE_ID].getTerms({ type: TERMS_TYPE });
|
|
165
307
|
|
|
166
|
-
|
|
308
|
+
disableClientScriptsForTerms(terms);
|
|
167
309
|
|
|
168
|
-
|
|
310
|
+
// First, track the terms normally to create initial version
|
|
311
|
+
await app.track({ services: [SERVICE_ID], types: [TERMS_TYPE] });
|
|
312
|
+
initialVersion = await app.recorder.versionsRepository.findLatest(SERVICE_ID, TERMS_TYPE);
|
|
169
313
|
|
|
170
|
-
|
|
314
|
+
// Modify the declaration to add a new source document
|
|
315
|
+
terms = app.services[SERVICE_ID].getTerms({ type: TERMS_TYPE });
|
|
171
316
|
|
|
172
|
-
|
|
317
|
+
terms.sourceDocuments.push(new SourceDocument({
|
|
318
|
+
id: 'new-policy',
|
|
319
|
+
location: `${MULTI_SOURCE_DOCS.BASE_URL}${MULTI_SOURCE_DOCS.PATHS.NEW_POLICY}`,
|
|
320
|
+
contentSelectors: 'body',
|
|
321
|
+
executeClientScripts: false,
|
|
322
|
+
filters: [],
|
|
323
|
+
}));
|
|
173
324
|
|
|
174
|
-
|
|
175
|
-
|
|
325
|
+
// Apply technical upgrades
|
|
326
|
+
await app.applyTechnicalUpgrades({ services: [SERVICE_ID], types: [TERMS_TYPE] });
|
|
327
|
+
upgradeVersion = await app.recorder.versionsRepository.findLatest(SERVICE_ID, TERMS_TYPE);
|
|
176
328
|
});
|
|
177
329
|
|
|
178
|
-
after(
|
|
330
|
+
after(async () => {
|
|
331
|
+
await resetGitRepositories();
|
|
332
|
+
nock.cleanAll();
|
|
333
|
+
});
|
|
179
334
|
|
|
180
|
-
it('
|
|
181
|
-
|
|
335
|
+
it('creates a new version', () => {
|
|
336
|
+
expect(upgradeVersion.id).to.not.equal(initialVersion.id);
|
|
337
|
+
});
|
|
182
338
|
|
|
183
|
-
|
|
339
|
+
it('marks the new version as technical upgrade', () => {
|
|
340
|
+
expect(upgradeVersion.isTechnicalUpgrade).to.be.true;
|
|
184
341
|
});
|
|
185
342
|
|
|
186
|
-
it('
|
|
187
|
-
|
|
343
|
+
it('fetches and includes the new source document in the version', async () => {
|
|
344
|
+
const versionContent = await upgradeVersion.content;
|
|
345
|
+
|
|
346
|
+
expect(versionContent).to.include(MULTI_SOURCE_DOCS.EXPECTED_TEXTS.NEW_POLICY);
|
|
188
347
|
});
|
|
189
348
|
|
|
190
|
-
it('
|
|
191
|
-
|
|
349
|
+
it('includes all source documents in version', async () => {
|
|
350
|
+
const versionContent = await upgradeVersion.content;
|
|
351
|
+
|
|
352
|
+
expect(versionContent).to.include(MULTI_SOURCE_DOCS.EXPECTED_TEXTS.COMMUNITY_STANDARDS);
|
|
353
|
+
expect(versionContent).to.include(MULTI_SOURCE_DOCS.EXPECTED_TEXTS.HATE_SPEECH);
|
|
354
|
+
expect(versionContent).to.include(MULTI_SOURCE_DOCS.EXPECTED_TEXTS.VIOLENCE_INCITEMENT);
|
|
355
|
+
expect(versionContent).to.include(MULTI_SOURCE_DOCS.EXPECTED_TEXTS.NEW_POLICY);
|
|
192
356
|
});
|
|
357
|
+
});
|
|
358
|
+
|
|
359
|
+
context('when a source document location is modified in combined terms', () => {
|
|
360
|
+
let initialVersion;
|
|
361
|
+
let latestVersion;
|
|
362
|
+
let newLocationScope;
|
|
363
|
+
|
|
364
|
+
before(async () => {
|
|
365
|
+
setupNockForMultiSourceDocs([ 'COMMUNITY_STANDARDS', 'HATE_SPEECH', 'VIOLENCE_INCITEMENT' ]);
|
|
366
|
+
|
|
367
|
+
app = await createAndInitializeArchivist();
|
|
368
|
+
|
|
369
|
+
let terms = app.services[SERVICE_ID].getTerms({ type: TERMS_TYPE });
|
|
370
|
+
|
|
371
|
+
disableClientScriptsForTerms(terms);
|
|
372
|
+
|
|
373
|
+
// First, track the terms normally
|
|
374
|
+
await app.track({ services: [SERVICE_ID], types: [TERMS_TYPE] });
|
|
375
|
+
initialVersion = await app.recorder.versionsRepository.findLatest(SERVICE_ID, TERMS_TYPE);
|
|
376
|
+
|
|
377
|
+
// Mock new location (but it won't be fetched during technical upgrade)
|
|
378
|
+
newLocationScope = nock(MULTI_SOURCE_DOCS.BASE_URL)
|
|
379
|
+
.persist()
|
|
380
|
+
.get('/community-standards/hate-speech-updated/')
|
|
381
|
+
.reply(200, '<html><body><p>Updated hate speech policy</p></body></html>', { 'Content-Type': 'text/html' });
|
|
193
382
|
|
|
194
|
-
|
|
195
|
-
|
|
383
|
+
// Modify the declaration to change location
|
|
384
|
+
terms = app.services[SERVICE_ID].getTerms({ type: TERMS_TYPE });
|
|
196
385
|
|
|
197
|
-
|
|
386
|
+
terms.sourceDocuments[1].location = `${MULTI_SOURCE_DOCS.BASE_URL}/community-standards/hate-speech-updated/`;
|
|
387
|
+
|
|
388
|
+
// Apply technical upgrades
|
|
389
|
+
await app.applyTechnicalUpgrades({ services: [SERVICE_ID], types: [TERMS_TYPE] });
|
|
390
|
+
latestVersion = await app.recorder.versionsRepository.findLatest(SERVICE_ID, TERMS_TYPE);
|
|
391
|
+
});
|
|
392
|
+
|
|
393
|
+
after(async () => {
|
|
394
|
+
await resetGitRepositories();
|
|
395
|
+
nock.cleanAll();
|
|
396
|
+
});
|
|
397
|
+
|
|
398
|
+
it('does not create a new version', () => {
|
|
399
|
+
expect(latestVersion.id).to.equal(initialVersion.id);
|
|
400
|
+
});
|
|
401
|
+
|
|
402
|
+
it('does not fetch from new location', () => {
|
|
403
|
+
expect(newLocationScope.isDone()).to.be.false;
|
|
198
404
|
});
|
|
199
405
|
|
|
200
|
-
it('does not
|
|
201
|
-
const
|
|
406
|
+
it('does not include content from the new location', async () => {
|
|
407
|
+
const versionContent = await latestVersion.content;
|
|
202
408
|
|
|
203
|
-
expect(
|
|
409
|
+
expect(versionContent).to.not.include('Updated hate speech policy');
|
|
204
410
|
});
|
|
205
411
|
});
|
|
206
412
|
|
|
207
|
-
context('when
|
|
208
|
-
let
|
|
209
|
-
let
|
|
210
|
-
let
|
|
413
|
+
context('when a source document selector is modified in combined terms', () => {
|
|
414
|
+
let initialVersion;
|
|
415
|
+
let latestVersion;
|
|
416
|
+
let initialVersionContent;
|
|
417
|
+
let upgradeVersionContent;
|
|
211
418
|
|
|
212
419
|
before(async () => {
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
app =
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
420
|
+
setupNockForMultiSourceDocs([ 'COMMUNITY_STANDARDS', 'HATE_SPEECH', 'VIOLENCE_INCITEMENT' ]);
|
|
421
|
+
|
|
422
|
+
app = await createAndInitializeArchivist();
|
|
423
|
+
|
|
424
|
+
let terms = app.services[SERVICE_ID].getTerms({ type: TERMS_TYPE });
|
|
425
|
+
|
|
426
|
+
disableClientScriptsForTerms(terms);
|
|
427
|
+
|
|
428
|
+
// First, track the terms normally
|
|
429
|
+
await app.track({ services: [SERVICE_ID], types: [TERMS_TYPE] });
|
|
430
|
+
initialVersion = await app.recorder.versionsRepository.findLatest(SERVICE_ID, TERMS_TYPE);
|
|
431
|
+
initialVersionContent = await initialVersion.content;
|
|
432
|
+
|
|
433
|
+
// Modify the declaration to change selector
|
|
434
|
+
terms = app.services[SERVICE_ID].getTerms({ type: TERMS_TYPE });
|
|
219
435
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
app.
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
436
|
+
// Change from 'body' to 'h1' for the first source document
|
|
437
|
+
terms.sourceDocuments[0].contentSelectors = 'h1';
|
|
438
|
+
|
|
439
|
+
// Apply technical upgrades
|
|
440
|
+
await app.applyTechnicalUpgrades({ services: [SERVICE_ID], types: [TERMS_TYPE] });
|
|
441
|
+
latestVersion = await app.recorder.versionsRepository.findLatest(SERVICE_ID, TERMS_TYPE);
|
|
442
|
+
upgradeVersionContent = await latestVersion.content;
|
|
443
|
+
});
|
|
444
|
+
|
|
445
|
+
after(async () => {
|
|
446
|
+
await resetGitRepositories();
|
|
447
|
+
nock.cleanAll();
|
|
448
|
+
});
|
|
449
|
+
|
|
450
|
+
it('creates a new version', () => {
|
|
451
|
+
expect(latestVersion.id).to.not.equal(initialVersion.id);
|
|
452
|
+
});
|
|
453
|
+
|
|
454
|
+
it('marks the new version as technical upgrade', () => {
|
|
455
|
+
expect(latestVersion.isTechnicalUpgrade).to.be.true;
|
|
456
|
+
});
|
|
457
|
+
|
|
458
|
+
it('extracts content with the new selector from existing snapshot', () => {
|
|
459
|
+
// With new selector 'h1', should only extract the heading
|
|
460
|
+
expect(upgradeVersionContent).to.include(MULTI_SOURCE_DOCS.EXPECTED_TEXTS.COMMUNITY_STANDARDS);
|
|
461
|
+
// The rest should be from other source documents
|
|
462
|
+
expect(upgradeVersionContent).to.include(MULTI_SOURCE_DOCS.EXPECTED_TEXTS.HATE_SPEECH);
|
|
463
|
+
expect(upgradeVersionContent).to.include(MULTI_SOURCE_DOCS.EXPECTED_TEXTS.VIOLENCE_INCITEMENT);
|
|
464
|
+
});
|
|
465
|
+
|
|
466
|
+
it('regenerates version with updated extraction logic', () => {
|
|
467
|
+
expect(upgradeVersionContent).to.not.equal(initialVersionContent);
|
|
468
|
+
});
|
|
469
|
+
});
|
|
470
|
+
|
|
471
|
+
context('when adding source document but no version exists yet', () => {
|
|
472
|
+
let newSourceScope;
|
|
473
|
+
|
|
474
|
+
before(async () => {
|
|
475
|
+
newSourceScope = nock(MULTI_SOURCE_DOCS.BASE_URL)
|
|
476
|
+
.get(MULTI_SOURCE_DOCS.PATHS.NEW_POLICY)
|
|
477
|
+
.reply(200, MULTI_SOURCE_DOCS.CONTENT.NEW_POLICY, { 'Content-Type': 'text/html' });
|
|
478
|
+
|
|
479
|
+
app = await createAndInitializeArchivist();
|
|
480
|
+
|
|
481
|
+
// Modify declaration before any tracking
|
|
482
|
+
const terms = app.services[SERVICE_ID].getTerms({ type: TERMS_TYPE });
|
|
483
|
+
|
|
484
|
+
terms.sourceDocuments.push({
|
|
485
|
+
id: 'new-policy',
|
|
486
|
+
location: `${MULTI_SOURCE_DOCS.BASE_URL}${MULTI_SOURCE_DOCS.PATHS.NEW_POLICY}`,
|
|
487
|
+
contentSelectors: 'body',
|
|
488
|
+
executeClientScripts: false,
|
|
489
|
+
filters: [],
|
|
231
490
|
});
|
|
232
|
-
|
|
491
|
+
|
|
492
|
+
// Apply technical upgrades (should skip because no version exists)
|
|
493
|
+
await app.applyTechnicalUpgrades({ services: [SERVICE_ID], types: [TERMS_TYPE] });
|
|
233
494
|
});
|
|
234
495
|
|
|
235
|
-
after(
|
|
496
|
+
after(async () => {
|
|
497
|
+
await resetGitRepositories();
|
|
498
|
+
nock.cleanAll();
|
|
499
|
+
});
|
|
236
500
|
|
|
237
|
-
it('
|
|
238
|
-
|
|
501
|
+
it('does not create a version when none existed before', async () => {
|
|
502
|
+
const version = await app.recorder.versionsRepository.findLatest(SERVICE_ID, TERMS_TYPE);
|
|
503
|
+
|
|
504
|
+
expect(version).to.be.null;
|
|
239
505
|
});
|
|
240
506
|
|
|
241
|
-
it('
|
|
242
|
-
expect(
|
|
507
|
+
it('does not fetch the new source document', () => {
|
|
508
|
+
expect(newSourceScope.isDone()).to.be.false;
|
|
243
509
|
});
|
|
244
510
|
});
|
|
245
511
|
});
|
|
@@ -256,11 +522,7 @@ describe('Archivist', function () {
|
|
|
256
522
|
const retryableError = new FetchDocumentError(FetchDocumentError.LIKELY_TRANSIENT_ERRORS[0]);
|
|
257
523
|
|
|
258
524
|
before(async () => {
|
|
259
|
-
app =
|
|
260
|
-
recorderConfig: config.get('@opentermsarchive/engine.recorder'),
|
|
261
|
-
fetcherConfig: config.get('@opentermsarchive/engine.fetcher'),
|
|
262
|
-
});
|
|
263
|
-
await app.initialize();
|
|
525
|
+
app = await createAndInitializeArchivist();
|
|
264
526
|
});
|
|
265
527
|
|
|
266
528
|
beforeEach(() => {
|
|
@@ -345,11 +607,7 @@ describe('Archivist', function () {
|
|
|
345
607
|
|
|
346
608
|
describe('#attach', () => {
|
|
347
609
|
before(async () => {
|
|
348
|
-
app =
|
|
349
|
-
recorderConfig: config.get('@opentermsarchive/engine.recorder'),
|
|
350
|
-
fetcherConfig: config.get('@opentermsarchive/engine.fetcher'),
|
|
351
|
-
});
|
|
352
|
-
await app.initialize();
|
|
610
|
+
app = await createAndInitializeArchivist();
|
|
353
611
|
|
|
354
612
|
EVENTS.forEach(event => {
|
|
355
613
|
const handlerName = `on${event[0].toUpperCase()}${event.substring(1)}`;
|
|
@@ -378,14 +636,9 @@ describe('Archivist', function () {
|
|
|
378
636
|
let plugin;
|
|
379
637
|
|
|
380
638
|
before(async () => {
|
|
381
|
-
|
|
382
|
-
nock('https://www.servicea.example').get('/tos').reply(200, serviceASnapshotExpectedContent, { 'Content-Type': 'text/html' });
|
|
639
|
+
setupNockForServices({ serviceA: true, serviceB: false });
|
|
383
640
|
|
|
384
|
-
app =
|
|
385
|
-
recorderConfig: config.get('@opentermsarchive/engine.recorder'),
|
|
386
|
-
fetcherConfig: config.get('@opentermsarchive/engine.fetcher'),
|
|
387
|
-
});
|
|
388
|
-
await app.initialize();
|
|
641
|
+
app = await createAndInitializeArchivist();
|
|
389
642
|
|
|
390
643
|
plugin = { onFirstVersionRecorded: () => { throw new Error('Plugin error'); } };
|
|
391
644
|
|
|
@@ -432,11 +685,7 @@ describe('Archivist', function () {
|
|
|
432
685
|
}
|
|
433
686
|
|
|
434
687
|
before(async () => {
|
|
435
|
-
app =
|
|
436
|
-
recorderConfig: config.get('@opentermsarchive/engine.recorder'),
|
|
437
|
-
fetcherConfig: config.get('@opentermsarchive/engine.fetcher'),
|
|
438
|
-
});
|
|
439
|
-
await app.initialize();
|
|
688
|
+
app = await createAndInitializeArchivist();
|
|
440
689
|
|
|
441
690
|
EVENTS.forEach(event => {
|
|
442
691
|
const handlerName = `on${event[0].toUpperCase()}${event.substr(1)}`;
|
|
@@ -12,11 +12,8 @@ export default class Recorder {
|
|
|
12
12
|
return Promise.all([ this.versionsRepository.initialize(), this.snapshotsRepository.initialize() ]);
|
|
13
13
|
}
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
// Parallel closing can cause "Operation interrupted because client was closed" errors, especially on Windows.
|
|
18
|
-
await this.versionsRepository.finalize();
|
|
19
|
-
await this.snapshotsRepository.finalize();
|
|
15
|
+
finalize() {
|
|
16
|
+
return Promise.all([ this.versionsRepository.finalize(), this.snapshotsRepository.finalize() ]);
|
|
20
17
|
}
|
|
21
18
|
|
|
22
19
|
getLatestSnapshot(terms, sourceDocumentId) {
|
|
@@ -6,6 +6,8 @@ import Version from './version.js';
|
|
|
6
6
|
|
|
7
7
|
import Recorder from './index.js';
|
|
8
8
|
|
|
9
|
+
const isWindows = process.platform === 'win32';
|
|
10
|
+
|
|
9
11
|
const MIME_TYPE = 'text/html';
|
|
10
12
|
const FETCH_DATE = new Date('2000-01-01T12:00:00.000Z');
|
|
11
13
|
const FETCH_DATE_LATER = new Date('2000-01-02T12:00:00.000Z');
|
|
@@ -18,7 +20,14 @@ describe('Recorder', () => {
|
|
|
18
20
|
describe(repositoryType, () => {
|
|
19
21
|
let recorder;
|
|
20
22
|
|
|
21
|
-
before(async ()
|
|
23
|
+
before(async function () {
|
|
24
|
+
if (repositoryType == 'mongo' && isWindows) {
|
|
25
|
+
console.log('MongoDB tests are unstable on Windows due to race condition in connection cleanup.');
|
|
26
|
+
console.log('Lacking a production use case for Mongo on Windows, we skip tests. Please reach out if you have a use case.');
|
|
27
|
+
// On Windows, when multiple repositories connect to the same MongoDB server and are closed in parallel or even sequentially, unhandled "Operation interrupted because client was closed" errors occur after all tests pass.
|
|
28
|
+
// The issue does not occur on Linux or macOS, so it appears to be a platform-specific difference in how the MongoDB driver handles connection pool cleanup during client.close().
|
|
29
|
+
this.skip();
|
|
30
|
+
}
|
|
22
31
|
const options = config.util.cloneDeep(config.get('@opentermsarchive/engine.recorder'));
|
|
23
32
|
|
|
24
33
|
options.versions.storage.type = repositoryType;
|
|
@@ -28,7 +37,7 @@ describe('Recorder', () => {
|
|
|
28
37
|
await recorder.initialize();
|
|
29
38
|
});
|
|
30
39
|
|
|
31
|
-
after(() => recorder
|
|
40
|
+
after(() => recorder?.finalize());
|
|
32
41
|
|
|
33
42
|
context('Snapshot', () => {
|
|
34
43
|
describe('#record', () => {
|
|
@@ -258,8 +267,8 @@ describe('Recorder', () => {
|
|
|
258
267
|
expect(await record.content).to.equal(UPDATED_CONTENT);
|
|
259
268
|
});
|
|
260
269
|
|
|
261
|
-
it('records in the version that it is not
|
|
262
|
-
expect(record.
|
|
270
|
+
it('records in the version that it is not a technical upgrade version', () => {
|
|
271
|
+
expect(record.isTechnicalUpgrade).to.equal(false);
|
|
263
272
|
});
|
|
264
273
|
|
|
265
274
|
it('returns the record id', () => {
|
|
@@ -315,7 +324,7 @@ describe('Recorder', () => {
|
|
|
315
324
|
content: CONTENT,
|
|
316
325
|
snapshotIds: [SNAPSHOT_ID],
|
|
317
326
|
fetchDate: FETCH_DATE,
|
|
318
|
-
|
|
327
|
+
isTechnicalUpgrade: true,
|
|
319
328
|
})));
|
|
320
329
|
|
|
321
330
|
record = await recorder.versionsRepository.findLatest(SERVICE_ID, TYPE);
|
|
@@ -354,7 +363,7 @@ describe('Recorder', () => {
|
|
|
354
363
|
content: UPDATED_CONTENT,
|
|
355
364
|
snapshotIds: [SNAPSHOT_ID],
|
|
356
365
|
fetchDate: FETCH_DATE_LATER,
|
|
357
|
-
|
|
366
|
+
isTechnicalUpgrade: true,
|
|
358
367
|
})));
|
|
359
368
|
|
|
360
369
|
record = await recorder.versionsRepository.findLatest(SERVICE_ID, TYPE);
|
|
@@ -366,8 +375,8 @@ describe('Recorder', () => {
|
|
|
366
375
|
expect(await record.content).to.equal(UPDATED_CONTENT);
|
|
367
376
|
});
|
|
368
377
|
|
|
369
|
-
it('records in the version that it is an
|
|
370
|
-
expect(record.
|
|
378
|
+
it('records in the version that it is an technical upgrade version', () => {
|
|
379
|
+
expect(record.isTechnicalUpgrade).to.equal(true);
|
|
371
380
|
});
|
|
372
381
|
|
|
373
382
|
it('returns the record id', () => {
|
|
@@ -395,7 +404,7 @@ describe('Recorder', () => {
|
|
|
395
404
|
content: CONTENT,
|
|
396
405
|
snapshotIds: [SNAPSHOT_ID],
|
|
397
406
|
fetchDate: FETCH_DATE_LATER,
|
|
398
|
-
|
|
407
|
+
isTechnicalUpgrade: true,
|
|
399
408
|
})));
|
|
400
409
|
|
|
401
410
|
record = await recorder.versionsRepository.findLatest(SERVICE_ID, TYPE);
|