@opentermsarchive/engine 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. package/.env.example +3 -0
  2. package/.eslintrc.yaml +116 -0
  3. package/.github/workflows/deploy.yml +50 -0
  4. package/.github/workflows/release.yml +64 -0
  5. package/.github/workflows/test.yml +77 -0
  6. package/CHANGELOG.md +14 -0
  7. package/CODE_OF_CONDUCT.md +128 -0
  8. package/CONTRIBUTING.md +143 -0
  9. package/LICENSE +153 -0
  10. package/MIGRATING.md +42 -0
  11. package/README.fr.md +110 -0
  12. package/README.md +438 -0
  13. package/Vagrantfile +38 -0
  14. package/ansible.cfg +13 -0
  15. package/bin/.env.js +1 -0
  16. package/bin/lint-declarations.js +31 -0
  17. package/bin/track.js +26 -0
  18. package/bin/validate-declarations.js +68 -0
  19. package/config/ci.json +5 -0
  20. package/config/contrib.json +35 -0
  21. package/config/dating.json +37 -0
  22. package/config/default.json +71 -0
  23. package/config/france.json +40 -0
  24. package/config/p2b-compliance.json +40 -0
  25. package/config/pga.json +40 -0
  26. package/config/production.json +27 -0
  27. package/config/test.json +49 -0
  28. package/config/vagrant.json +24 -0
  29. package/decision-records/0001-service-name-and-id.md +73 -0
  30. package/decision-records/0002-service-history.md +212 -0
  31. package/decision-records/0003-snapshots-database.md +123 -0
  32. package/ops/README.md +280 -0
  33. package/ops/app.yml +5 -0
  34. package/ops/infra.yml +6 -0
  35. package/ops/inventories/dev.yml +7 -0
  36. package/ops/inventories/production.yml +27 -0
  37. package/ops/roles/infra/defaults/main.yml +2 -0
  38. package/ops/roles/infra/files/.gitconfig +3 -0
  39. package/ops/roles/infra/files/mongod.conf +18 -0
  40. package/ops/roles/infra/files/ota-bot-key.private_key +26 -0
  41. package/ops/roles/infra/tasks/main.yml +78 -0
  42. package/ops/roles/infra/tasks/mongo.yml +40 -0
  43. package/ops/roles/infra/templates/ssh_config.j2 +5 -0
  44. package/ops/roles/ota/defaults/main.yml +14 -0
  45. package/ops/roles/ota/files/.env +21 -0
  46. package/ops/roles/ota/tasks/database.yml +65 -0
  47. package/ops/roles/ota/tasks/main.yml +110 -0
  48. package/ops/site.yml +6 -0
  49. package/package.json +101 -0
  50. package/pm2.config.cjs +20 -0
  51. package/scripts/dataset/README.md +37 -0
  52. package/scripts/dataset/assets/LICENSE +540 -0
  53. package/scripts/dataset/assets/README.template.js +65 -0
  54. package/scripts/dataset/export/index.js +106 -0
  55. package/scripts/dataset/export/index.test.js +155 -0
  56. package/scripts/dataset/export/test/fixtures/dataset/LICENSE +540 -0
  57. package/scripts/dataset/export/test/fixtures/dataset/README.md +40 -0
  58. package/scripts/dataset/export/test/fixtures/dataset/ServiceA/Terms of Service/2021-01-01T11-27-00Z.md +1 -0
  59. package/scripts/dataset/export/test/fixtures/dataset/ServiceA/Terms of Service/2021-01-11T11-32-47Z.md +1 -0
  60. package/scripts/dataset/export/test/fixtures/dataset/ServiceB/Privacy Policy/2022-01-01T12-12-24Z.md +1 -0
  61. package/scripts/dataset/export/test/fixtures/dataset/ServiceB/Terms of Service/2022-01-06T11-32-47Z.md +1 -0
  62. package/scripts/dataset/index.js +40 -0
  63. package/scripts/dataset/logger/index.js +17 -0
  64. package/scripts/dataset/main.js +25 -0
  65. package/scripts/dataset/publish/index.js +39 -0
  66. package/scripts/declarations/lint/index.js +36 -0
  67. package/scripts/declarations/utils/index.js +81 -0
  68. package/scripts/declarations/validate/definitions.js +63 -0
  69. package/scripts/declarations/validate/index.mocha.js +262 -0
  70. package/scripts/declarations/validate/service.history.schema.js +86 -0
  71. package/scripts/declarations/validate/service.schema.js +91 -0
  72. package/scripts/history/logger/index.js +39 -0
  73. package/scripts/history/migrate-services.js +212 -0
  74. package/scripts/history/update-to-full-hash.js +61 -0
  75. package/scripts/history/utils/index.js +23 -0
  76. package/scripts/import/README.md +59 -0
  77. package/scripts/import/config/import.json +12 -0
  78. package/scripts/import/index.js +224 -0
  79. package/scripts/import/loadCommits.js +66 -0
  80. package/scripts/import/logger/index.js +43 -0
  81. package/scripts/rewrite/README.md +131 -0
  82. package/scripts/rewrite/config/rewrite-snapshots.json +32 -0
  83. package/scripts/rewrite/config/rewrite-versions.json +32 -0
  84. package/scripts/rewrite/initializer/files/license +428 -0
  85. package/scripts/rewrite/initializer/files/readme.md +8 -0
  86. package/scripts/rewrite/initializer/index.js +44 -0
  87. package/scripts/rewrite/rewrite-snapshots.js +108 -0
  88. package/scripts/rewrite/rewrite-versions.js +160 -0
  89. package/scripts/rewrite/utils.js +33 -0
  90. package/scripts/utils/renamer/README.md +49 -0
  91. package/scripts/utils/renamer/index.js +45 -0
  92. package/scripts/utils/renamer/rules/documentTypes.json +25 -0
  93. package/scripts/utils/renamer/rules/documentTypesByService.json +170 -0
  94. package/scripts/utils/renamer/rules/serviceNames.json +92 -0
  95. package/src/archivist/errors.js +9 -0
  96. package/src/archivist/fetcher/errors.js +6 -0
  97. package/src/archivist/fetcher/exports.js +18 -0
  98. package/src/archivist/fetcher/fullDomFetcher.js +84 -0
  99. package/src/archivist/fetcher/htmlOnlyFetcher.js +62 -0
  100. package/src/archivist/fetcher/index.js +35 -0
  101. package/src/archivist/fetcher/index.test.js +239 -0
  102. package/src/archivist/filter/exports.js +3 -0
  103. package/src/archivist/filter/index.js +178 -0
  104. package/src/archivist/filter/index.test.js +561 -0
  105. package/src/archivist/index.js +276 -0
  106. package/src/archivist/index.test.js +600 -0
  107. package/src/archivist/recorder/index.js +77 -0
  108. package/src/archivist/recorder/index.test.js +463 -0
  109. package/src/archivist/recorder/record.js +35 -0
  110. package/src/archivist/recorder/record.test.js +91 -0
  111. package/src/archivist/recorder/repositories/factory.js +23 -0
  112. package/src/archivist/recorder/repositories/git/dataMapper.js +83 -0
  113. package/src/archivist/recorder/repositories/git/git.js +122 -0
  114. package/src/archivist/recorder/repositories/git/git.test.js +86 -0
  115. package/src/archivist/recorder/repositories/git/index.js +182 -0
  116. package/src/archivist/recorder/repositories/git/index.test.js +714 -0
  117. package/src/archivist/recorder/repositories/interface.js +108 -0
  118. package/src/archivist/recorder/repositories/mongo/dataMapper.js +32 -0
  119. package/src/archivist/recorder/repositories/mongo/index.js +121 -0
  120. package/src/archivist/recorder/repositories/mongo/index.test.js +721 -0
  121. package/src/archivist/services/documentDeclaration.js +26 -0
  122. package/src/archivist/services/documentDeclaration.test.js +85 -0
  123. package/src/archivist/services/documentTypes.json +386 -0
  124. package/src/archivist/services/index.js +255 -0
  125. package/src/archivist/services/index.test.js +327 -0
  126. package/src/archivist/services/pageDeclaration.js +51 -0
  127. package/src/archivist/services/pageDeclaration.test.js +224 -0
  128. package/src/archivist/services/service.js +60 -0
  129. package/src/archivist/services/service.test.js +164 -0
  130. package/src/exports.js +3 -0
  131. package/src/index.js +59 -0
  132. package/src/logger/README.md +1 -0
  133. package/src/logger/index.js +131 -0
  134. package/src/main.js +18 -0
  135. package/src/notifier/README.md +1 -0
  136. package/src/notifier/index.js +150 -0
  137. package/src/tracker/README.md +1 -0
  138. package/src/tracker/index.js +215 -0
  139. package/test/fixtures/service_A.js +22 -0
  140. package/test/fixtures/service_A_terms.md +10 -0
  141. package/test/fixtures/service_A_terms_snapshot.html +14 -0
  142. package/test/fixtures/service_B.js +22 -0
  143. package/test/fixtures/service_with_declaration_history.js +65 -0
  144. package/test/fixtures/service_with_filters_history.js +155 -0
  145. package/test/fixtures/service_with_history.js +188 -0
  146. package/test/fixtures/service_with_multipage_document.js +100 -0
  147. package/test/fixtures/service_without_history.js +31 -0
  148. package/test/fixtures/services.js +19 -0
  149. package/test/fixtures/terms.pdf +0 -0
  150. package/test/fixtures/termsFromPDF.md +25 -0
  151. package/test/fixtures/termsModified.pdf +0 -0
  152. package/test/services/service_A.json +9 -0
  153. package/test/services/service_B.json +9 -0
  154. package/test/services/service_with_declaration_history.filters.js +7 -0
  155. package/test/services/service_with_declaration_history.history.json +17 -0
  156. package/test/services/service_with_declaration_history.json +13 -0
  157. package/test/services/service_with_filters_history.filters.history.js +29 -0
  158. package/test/services/service_with_filters_history.filters.js +7 -0
  159. package/test/services/service_with_filters_history.json +13 -0
  160. package/test/services/service_with_history.filters.history.js +29 -0
  161. package/test/services/service_with_history.filters.js +7 -0
  162. package/test/services/service_with_history.history.json +26 -0
  163. package/test/services/service_with_history.json +17 -0
  164. package/test/services/service_with_multipage_document.filters.js +7 -0
  165. package/test/services/service_with_multipage_document.history.json +37 -0
  166. package/test/services/service_with_multipage_document.json +28 -0
  167. package/test/services/service_without_history.filters.js +7 -0
  168. package/test/services/service_without_history.json +13 -0
@@ -0,0 +1,463 @@
1
+ import chai from 'chai';
2
+ import config from 'config';
3
+
4
+ import Recorder from './index.js';
5
+
6
+ const { expect } = chai;
7
+
8
+ const MIME_TYPE = 'text/html';
9
+ const FETCH_DATE = new Date('2000-01-01T12:00:00.000Z');
10
+ const FETCH_DATE_LATER = new Date('2000-01-02T12:00:00.000Z');
11
+
12
+ describe('Recorder', () => {
13
+ const SERVICE_ID = 'test_service';
14
+ const TYPE = 'Terms of Service';
15
+
16
+ for (const repositoryType of [ 'git', 'mongo' ]) {
17
+ describe(repositoryType, () => {
18
+ let recorder;
19
+
20
+ before(async () => {
21
+ const options = config.util.cloneDeep(config.recorder);
22
+
23
+ options.versions.storage.type = repositoryType;
24
+ options.snapshots.storage.type = repositoryType;
25
+
26
+ recorder = new Recorder(options);
27
+ await recorder.initialize();
28
+ });
29
+
30
+ after(async () => recorder.finalize());
31
+
32
+ describe('#recordSnapshot', () => {
33
+ const CONTENT = '<html><h1>ToS fixture data with UTF-8 çhãràčtęrs</h1></html>';
34
+
35
+ let id;
36
+ let isFirstRecord;
37
+ let record;
38
+
39
+ context('when a required param is missing', () => {
40
+ after(async () => recorder.snapshotsRepository.removeAll());
41
+
42
+ const validParams = {
43
+ serviceId: SERVICE_ID,
44
+ documentType: TYPE,
45
+ content: CONTENT,
46
+ fetchDate: FETCH_DATE,
47
+ mimeType: MIME_TYPE,
48
+ };
49
+
50
+ const paramsNameToExpectedTextInError = {
51
+ serviceId: 'service ID',
52
+ documentType: 'document type',
53
+ fetchDate: 'fetch date',
54
+ content: 'content',
55
+ mimeType: 'mime type',
56
+ };
57
+
58
+ Object.entries(validParams).forEach(([testedRequiredParam]) => {
59
+ context(`when "${testedRequiredParam}" is missing`, () => {
60
+ it('throws an error', async () => {
61
+ try {
62
+ const validParamsExceptTheOneTested = Object.fromEntries(Object.entries(validParams).filter(([paramName]) => paramName != testedRequiredParam));
63
+
64
+ await recorder.recordSnapshot(validParamsExceptTheOneTested);
65
+ } catch (e) {
66
+ expect(e).to.be.an('error');
67
+ expect(e.message).to.contain(paramsNameToExpectedTextInError[testedRequiredParam]);
68
+
69
+ return;
70
+ }
71
+ expect.fail('No error was thrown');
72
+ });
73
+ });
74
+ });
75
+ });
76
+
77
+ context('when it is the first record', () => {
78
+ before(async () => {
79
+ ({ id, isFirstRecord } = await recorder.recordSnapshot({
80
+ serviceId: SERVICE_ID,
81
+ documentType: TYPE,
82
+ content: CONTENT,
83
+ mimeType: MIME_TYPE,
84
+ fetchDate: FETCH_DATE,
85
+ }));
86
+
87
+ record = await recorder.snapshotsRepository.findLatest(SERVICE_ID, TYPE);
88
+ });
89
+
90
+ after(async () => recorder.snapshotsRepository.removeAll());
91
+
92
+ it('records the document with the proper content', async () => {
93
+ expect(await record.content).to.equal(CONTENT);
94
+ });
95
+
96
+ it('returns the record id', async () => {
97
+ expect(record.id).to.include(id);
98
+ });
99
+
100
+ it('returns a boolean to know if it is the first record', async () => {
101
+ expect(isFirstRecord).to.be.true;
102
+ });
103
+ });
104
+
105
+ context('when it is not the first record', () => {
106
+ const UPDATED_CONTENT = '<html><h1>ToS fixture data with UTF-8 çhãràčtęrs</h1><h2>Updated!</h2></html>';
107
+
108
+ before(async () => {
109
+ await recorder.recordSnapshot({
110
+ serviceId: SERVICE_ID,
111
+ documentType: TYPE,
112
+ content: CONTENT,
113
+ mimeType: MIME_TYPE,
114
+ fetchDate: FETCH_DATE,
115
+ });
116
+
117
+ ({ id, isFirstRecord } = await recorder.recordSnapshot({
118
+ serviceId: SERVICE_ID,
119
+ documentType: TYPE,
120
+ content: UPDATED_CONTENT,
121
+ mimeType: MIME_TYPE,
122
+ fetchDate: FETCH_DATE_LATER,
123
+ }));
124
+
125
+ record = await recorder.snapshotsRepository.findLatest(SERVICE_ID, TYPE);
126
+ });
127
+
128
+ after(async () => recorder.snapshotsRepository.removeAll());
129
+
130
+ it('records the document with the proper content', async () => {
131
+ expect(await record.content).to.equal(UPDATED_CONTENT);
132
+ });
133
+
134
+ it('returns the record id', async () => {
135
+ expect(record.id).to.include(id);
136
+ });
137
+
138
+ it('returns a boolean to know if it is the first record', async () => {
139
+ expect(isFirstRecord).to.be.false;
140
+ });
141
+ });
142
+
143
+ context('when the content has not changed', () => {
144
+ before(async () => {
145
+ await recorder.recordSnapshot({
146
+ serviceId: SERVICE_ID,
147
+ documentType: TYPE,
148
+ content: CONTENT,
149
+ mimeType: MIME_TYPE,
150
+ fetchDate: FETCH_DATE,
151
+ });
152
+
153
+ ({ id, isFirstRecord } = await recorder.recordSnapshot({
154
+ serviceId: SERVICE_ID,
155
+ documentType: TYPE,
156
+ content: CONTENT,
157
+ mimeType: MIME_TYPE,
158
+ fetchDate: FETCH_DATE_LATER,
159
+ }));
160
+
161
+ record = await recorder.snapshotsRepository.findLatest(SERVICE_ID, TYPE);
162
+ });
163
+
164
+ after(async () => recorder.snapshotsRepository.removeAll());
165
+
166
+ it('does not record the document', async () => {
167
+ expect(id).to.not.be.ok;
168
+ });
169
+ });
170
+ });
171
+
172
+ describe('#recordVersion', () => {
173
+ const CONTENT = '# ToS fixture data with UTF-8 çhãràčtęrs';
174
+ const SNAPSHOT_ID = '61af86dc5ff5caa74ae926ad';
175
+
176
+ let id;
177
+ let isFirstRecord;
178
+ let record;
179
+
180
+ context('when a required param is missing', () => {
181
+ after(async () => recorder.versionsRepository.removeAll());
182
+
183
+ const validParams = {
184
+ serviceId: SERVICE_ID,
185
+ documentType: TYPE,
186
+ content: CONTENT,
187
+ snapshotIds: [SNAPSHOT_ID],
188
+ fetchDate: FETCH_DATE,
189
+ };
190
+
191
+ const paramsNameToExpectedTextInError = {
192
+ serviceId: 'service ID',
193
+ documentType: 'document type',
194
+ snapshotIds: 'snapshot ID',
195
+ fetchDate: 'fetch date',
196
+ content: 'content',
197
+ };
198
+
199
+ Object.entries(validParams).forEach(([testedRequiredParam]) => {
200
+ context(`when "${testedRequiredParam}" is missing`, () => {
201
+ it('throws an error', async () => {
202
+ try {
203
+ const validParamsExceptTheOneTested = Object.fromEntries(Object.entries(validParams).filter(([paramName]) => paramName != testedRequiredParam));
204
+
205
+ await recorder.recordVersion(validParamsExceptTheOneTested);
206
+ } catch (e) {
207
+ expect(e).to.be.an('error');
208
+ expect(e.message).to.contain(paramsNameToExpectedTextInError[testedRequiredParam]);
209
+
210
+ return;
211
+ }
212
+ expect.fail('No error was thrown');
213
+ });
214
+ });
215
+ });
216
+ });
217
+
218
+ context('when it is the first record', () => {
219
+ before(async () => {
220
+ ({ id, isFirstRecord } = await recorder.recordVersion({
221
+ serviceId: SERVICE_ID,
222
+ documentType: TYPE,
223
+ content: CONTENT,
224
+ snapshotIds: [SNAPSHOT_ID],
225
+ fetchDate: FETCH_DATE,
226
+ }));
227
+
228
+ record = await recorder.versionsRepository.findLatest(SERVICE_ID, TYPE);
229
+ });
230
+
231
+ after(async () => recorder.versionsRepository.removeAll());
232
+
233
+ it('records the document with the proper content', async () => {
234
+ expect(await record.content).to.equal(CONTENT);
235
+ });
236
+
237
+ it('returns the record id', async () => {
238
+ expect(record.id).to.include(id);
239
+ });
240
+
241
+ it('returns a boolean to know if it is the first record', async () => {
242
+ expect(isFirstRecord).to.be.true;
243
+ });
244
+ });
245
+
246
+ context('when it is not the first record', () => {
247
+ const UPDATED_CONTENT = '<html><h1>ToS fixture data with UTF-8 çhãràčtęrs</h1><h2>Updated!</h2></html>';
248
+
249
+ before(async () => {
250
+ await recorder.recordVersion({
251
+ serviceId: SERVICE_ID,
252
+ documentType: TYPE,
253
+ content: CONTENT,
254
+ snapshotIds: [SNAPSHOT_ID],
255
+ fetchDate: FETCH_DATE,
256
+ });
257
+
258
+ ({ id, isFirstRecord } = await recorder.recordVersion({
259
+ serviceId: SERVICE_ID,
260
+ documentType: TYPE,
261
+ content: UPDATED_CONTENT,
262
+ snapshotIds: [SNAPSHOT_ID],
263
+ fetchDate: FETCH_DATE_LATER,
264
+ }));
265
+
266
+ record = await recorder.versionsRepository.findLatest(SERVICE_ID, TYPE);
267
+ });
268
+
269
+ after(async () => recorder.versionsRepository.removeAll());
270
+
271
+ it('records the document with the proper content', async () => {
272
+ expect(await record.content).to.equal(UPDATED_CONTENT);
273
+ });
274
+
275
+ it('records in the document that it is not a refilter', async () => {
276
+ expect(record.isRefilter).to.equal(false);
277
+ });
278
+
279
+ it('returns the record id', async () => {
280
+ expect(record.id).to.include(id);
281
+ });
282
+
283
+ it('returns a boolean to know if it is the first record', async () => {
284
+ expect(isFirstRecord).to.be.false;
285
+ });
286
+ });
287
+
288
+ context('when the content has not changed', () => {
289
+ before(async () => {
290
+ await recorder.recordVersion({
291
+ serviceId: SERVICE_ID,
292
+ documentType: TYPE,
293
+ content: CONTENT,
294
+ snapshotIds: [SNAPSHOT_ID],
295
+ fetchDate: FETCH_DATE,
296
+ });
297
+
298
+ ({ id, isFirstRecord } = await recorder.recordVersion({
299
+ serviceId: SERVICE_ID,
300
+ documentType: TYPE,
301
+ content: CONTENT,
302
+ snapshotIds: [SNAPSHOT_ID],
303
+ fetchDate: FETCH_DATE_LATER,
304
+ }));
305
+
306
+ record = await recorder.versionsRepository.findLatest(SERVICE_ID, TYPE);
307
+ });
308
+
309
+ after(async () => recorder.versionsRepository.removeAll());
310
+
311
+ it('does not record the document', async () => {
312
+ expect(id).to.not.be.ok;
313
+ });
314
+ });
315
+ });
316
+
317
+ describe('#recordRefilter', () => {
318
+ const CONTENT = '# ToS fixture data with UTF-8 çhãràčtęrs';
319
+ const SNAPSHOT_ID = '61af86dc5ff5caa74ae926ad';
320
+
321
+ let id;
322
+ let isFirstRecord;
323
+ let record;
324
+
325
+ context('when a required param is missing', () => {
326
+ after(async () => recorder.versionsRepository.removeAll());
327
+
328
+ const validParams = {
329
+ serviceId: SERVICE_ID,
330
+ documentType: TYPE,
331
+ content: CONTENT,
332
+ snapshotIds: [SNAPSHOT_ID],
333
+ fetchDate: FETCH_DATE,
334
+ };
335
+
336
+ const paramsNameToExpectedTextInError = {
337
+ serviceId: 'service ID',
338
+ documentType: 'document type',
339
+ snapshotIds: 'snapshot ID',
340
+ fetchDate: 'fetch date',
341
+ content: 'content',
342
+ };
343
+
344
+ Object.entries(validParams).forEach(([testedRequiredParam]) => {
345
+ context(`when "${testedRequiredParam}" is missing`, () => {
346
+ it('throws an error', async () => {
347
+ try {
348
+ const validParamsExceptTheOneTested = Object.fromEntries(Object.entries(validParams).filter(([paramName]) => paramName != testedRequiredParam));
349
+
350
+ await recorder.recordRefilter(validParamsExceptTheOneTested);
351
+ } catch (e) {
352
+ expect(e).to.be.an('error');
353
+ expect(e.message).to.contain(paramsNameToExpectedTextInError[testedRequiredParam]);
354
+
355
+ return;
356
+ }
357
+ expect.fail('No error was thrown');
358
+ });
359
+ });
360
+ });
361
+ });
362
+
363
+ context('when it is the first record', () => {
364
+ before(async () => {
365
+ ({ id, isFirstRecord } = await recorder.recordRefilter({
366
+ serviceId: SERVICE_ID,
367
+ documentType: TYPE,
368
+ content: CONTENT,
369
+ snapshotIds: [SNAPSHOT_ID],
370
+ fetchDate: FETCH_DATE,
371
+ }));
372
+
373
+ record = await recorder.versionsRepository.findLatest(SERVICE_ID, TYPE);
374
+ });
375
+
376
+ after(async () => recorder.versionsRepository.removeAll()); after(async () => recorder.versionsRepository.removeAll());
377
+
378
+ it('records the document with the proper content', async () => {
379
+ expect(await record.content).to.equal(CONTENT);
380
+ });
381
+
382
+ it('returns the record id', async () => {
383
+ expect(record.id).to.include(id);
384
+ });
385
+
386
+ it('returns a boolean to know if it is the first record', async () => {
387
+ expect(isFirstRecord).to.be.true;
388
+ });
389
+ });
390
+
391
+ context('when it is not the first record', () => {
392
+ const UPDATED_CONTENT = '<html><h1>ToS fixture data with UTF-8 çhãràčtęrs</h1><h2>Updated!</h2></html>';
393
+
394
+ before(async () => {
395
+ await recorder.recordRefilter({
396
+ serviceId: SERVICE_ID,
397
+ documentType: TYPE,
398
+ content: CONTENT,
399
+ snapshotIds: [SNAPSHOT_ID],
400
+ fetchDate: FETCH_DATE,
401
+ });
402
+
403
+ ({ id, isFirstRecord } = await recorder.recordRefilter({
404
+ serviceId: SERVICE_ID,
405
+ documentType: TYPE,
406
+ content: UPDATED_CONTENT,
407
+ snapshotIds: [SNAPSHOT_ID],
408
+ fetchDate: FETCH_DATE_LATER,
409
+ }));
410
+
411
+ record = await recorder.versionsRepository.findLatest(SERVICE_ID, TYPE);
412
+ });
413
+
414
+ after(async () => recorder.versionsRepository.removeAll());
415
+
416
+ it('records the document with the proper content', async () => {
417
+ expect(await record.content).to.equal(UPDATED_CONTENT);
418
+ });
419
+
420
+ it('records in the document that it is a refilter', async () => {
421
+ expect(record.isRefilter).to.equal(true);
422
+ });
423
+
424
+ it('returns the record id', async () => {
425
+ expect(record.id).to.include(id);
426
+ });
427
+
428
+ it('returns a boolean to know if it is the first record', async () => {
429
+ expect(isFirstRecord).to.be.false;
430
+ });
431
+ });
432
+
433
+ context('when the content has not changed', () => {
434
+ before(async () => {
435
+ await recorder.recordRefilter({
436
+ serviceId: SERVICE_ID,
437
+ documentType: TYPE,
438
+ content: CONTENT,
439
+ snapshotIds: [SNAPSHOT_ID],
440
+ fetchDate: FETCH_DATE,
441
+ });
442
+
443
+ ({ id, isFirstRecord } = await recorder.recordRefilter({
444
+ serviceId: SERVICE_ID,
445
+ documentType: TYPE,
446
+ content: CONTENT,
447
+ snapshotIds: [SNAPSHOT_ID],
448
+ fetchDate: FETCH_DATE_LATER,
449
+ }));
450
+
451
+ record = await recorder.versionsRepository.findLatest(SERVICE_ID, TYPE);
452
+ });
453
+
454
+ after(async () => recorder.versionsRepository.removeAll());
455
+
456
+ it('does not record the document', async () => {
457
+ expect(id).to.not.be.ok;
458
+ });
459
+ });
460
+ });
461
+ });
462
+ }
463
+ });
@@ -0,0 +1,35 @@
1
+ export default class Record {
2
+ #content;
3
+
4
+ static REQUIRED_PARAMS = Object.freeze([ 'serviceId', 'documentType', 'mimeType', 'fetchDate' ]);
5
+
6
+ constructor(params) {
7
+ Record.validate(params);
8
+
9
+ Object.assign(this, Object.fromEntries(Object.entries(params)));
10
+
11
+ if (params.content) {
12
+ this.#content = params.content;
13
+ }
14
+ }
15
+
16
+ get content() {
17
+ if (this.#content === undefined) {
18
+ throw new Error('Record content not defined, set the content or use Repository#loadRecordContent');
19
+ }
20
+
21
+ return this.#content;
22
+ }
23
+
24
+ set content(content) {
25
+ this.#content = content;
26
+ }
27
+
28
+ static validate(givenParams) {
29
+ for (const param of Record.REQUIRED_PARAMS) {
30
+ if (!Object.prototype.hasOwnProperty.call(givenParams, param) || givenParams[param] == null) {
31
+ throw new Error(`"${param}" is required`);
32
+ }
33
+ }
34
+ }
35
+ }
@@ -0,0 +1,91 @@
1
+ import chai from 'chai';
2
+ import config from 'config';
3
+
4
+ import Record from './record.js';
5
+ import RepositoryFactory from './repositories/factory.js';
6
+
7
+ const { expect } = chai;
8
+
9
+ describe('Record', () => {
10
+ let repository;
11
+ let subject;
12
+ const REQUIRED_PARAMS = [ 'serviceId', 'documentType', 'mimeType', 'fetchDate' ];
13
+ const recordParams = {
14
+ serviceId: 'ServiceA',
15
+ documentType: 'Terms of Service',
16
+ mimeType: 'text/html',
17
+ fetchDate: new Date('2000-01-01T12:00:00.000Z'),
18
+ };
19
+
20
+ describe('Validation', () => {
21
+ describe('Required paramaters', () => {
22
+ REQUIRED_PARAMS.forEach(requiredParam => {
23
+ describe(`"${requiredParam}"`, () => {
24
+ context('when it is missing', () => {
25
+ it('throws an error', async () => {
26
+ try {
27
+ const params = {};
28
+
29
+ Object.keys(recordParams).filter(param => param != requiredParam).forEach(param => {
30
+ params[param] = recordParams[param];
31
+ });
32
+
33
+ subject = new Record({ ...params });
34
+ } catch (e) {
35
+ expect(e).to.be.an('error');
36
+ expect(e.message).to.have.string(`"${requiredParam}" is required`);
37
+
38
+ return;
39
+ }
40
+ expect.fail('No error was thrown');
41
+ });
42
+ });
43
+
44
+ context('when it is null', () => {
45
+ it('throws an error', async () => {
46
+ try {
47
+ subject = new Record({ ...recordParams, [requiredParam]: null });
48
+ } catch (e) {
49
+ expect(e).to.be.an('error');
50
+ expect(e.message).to.have.string(`"${requiredParam}" is required`);
51
+
52
+ return;
53
+ }
54
+ expect.fail('No error was thrown');
55
+ });
56
+ });
57
+ });
58
+ });
59
+ });
60
+ });
61
+
62
+ describe('Content access', () => {
63
+ before(async () => {
64
+ repository = await RepositoryFactory.create(config.get('recorder.versions.storage')).initialize();
65
+ await repository.save(new Record({
66
+ ...recordParams,
67
+ content: 'content',
68
+ }));
69
+ ([subject] = await repository.findAll());
70
+ });
71
+
72
+ after(async () => {
73
+ await repository.removeAll();
74
+ await repository.finalize();
75
+ });
76
+
77
+ context('when it is neither defined nor loaded', () => {
78
+ it('throws an error explaining how to recover', async () => {
79
+ try {
80
+ console.log(subject.content);
81
+ } catch (e) {
82
+ expect(e).to.be.an('error');
83
+ expect(e.message).to.have.string('set the content or use Repository#loadRecordContent');
84
+
85
+ return;
86
+ }
87
+ expect.fail('No error was thrown');
88
+ });
89
+ });
90
+ });
91
+ });
@@ -0,0 +1,23 @@
1
+ import path from 'path';
2
+ import { fileURLToPath } from 'url';
3
+
4
+ import GitRepository from './git/index.js';
5
+ import MongoRepository from './mongo/index.js';
6
+
7
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
8
+
9
+ export default class RepositoryFactory {
10
+ static create(params) {
11
+ switch (params.type) {
12
+ case 'git':
13
+ return new GitRepository({
14
+ ...params.git,
15
+ path: path.resolve(__dirname, '../../../../', params.git.path),
16
+ });
17
+ case 'mongo':
18
+ return new MongoRepository(params.mongo);
19
+ default:
20
+ throw new Error(`Unknown storage repository configuration for type '${params.type}'`);
21
+ }
22
+ }
23
+ }
@@ -0,0 +1,83 @@
1
+ import path from 'path';
2
+
3
+ import mime from 'mime';
4
+
5
+ import Record from '../../record.js';
6
+
7
+ mime.define({ 'text/markdown': ['md'] }, true); // ensure extension for markdown files is `.md` and not `.markdown`
8
+
9
+ export const COMMIT_MESSAGE_PREFIX = {
10
+ startTracking: 'Start tracking',
11
+ refilter: 'Refilter',
12
+ update: 'Update',
13
+ };
14
+
15
+ export const DOCUMENT_TYPE_AND_PAGE_ID_SEPARATOR = ' #';
16
+ export const SNAPSHOT_ID_MARKER = '%SNAPSHOT_ID';
17
+ const SINGLE_SNAPSHOT_PREFIX = 'This version was recorded after filtering snapshot';
18
+ const MULTIPLE_SNAPSHOT_PREFIX = 'This version was recorded after filtering and assembling the following snapshots from %NUMBER pages:';
19
+
20
+ export const COMMIT_MESSAGE_PREFIXES_REGEXP = new RegExp(`^(${COMMIT_MESSAGE_PREFIX.startTracking}|${COMMIT_MESSAGE_PREFIX.refilter}|${COMMIT_MESSAGE_PREFIX.update})`);
21
+
22
+ export function toPersistence(record, snapshotIdentiferTemplate) {
23
+ const { serviceId, documentType, pageId, isRefilter, snapshotIds = [], mimeType, isFirstRecord } = record;
24
+
25
+ let prefix = isRefilter ? COMMIT_MESSAGE_PREFIX.refilter : COMMIT_MESSAGE_PREFIX.update;
26
+
27
+ prefix = isFirstRecord ? COMMIT_MESSAGE_PREFIX.startTracking : prefix;
28
+
29
+ const subject = `${prefix} ${serviceId} ${documentType}`;
30
+ const pageIdMessage = `${pageId ? `Page ID ${pageId}\n\n` : ''}`;
31
+ let snapshotIdsMessage;
32
+
33
+ if (snapshotIds.length == 1) {
34
+ snapshotIdsMessage = `${SINGLE_SNAPSHOT_PREFIX} ${snapshotIdentiferTemplate.replace(SNAPSHOT_ID_MARKER, snapshotIds[0])}`;
35
+ } else if (snapshotIds.length > 1) {
36
+ snapshotIdsMessage = `${MULTIPLE_SNAPSHOT_PREFIX.replace('%NUMBER', snapshotIds.length)}\n${snapshotIds.map(snapshotId => `- ${snapshotIdentiferTemplate.replace(SNAPSHOT_ID_MARKER, snapshotId)}`).join('\n')}`;
37
+ }
38
+
39
+ const filePath = generateFilePath(serviceId, documentType, pageId, mimeType);
40
+
41
+ return {
42
+ message: `${subject}\n\n${pageIdMessage || ''}\n\n${snapshotIdsMessage || ''}`,
43
+ content: record.content,
44
+ filePath,
45
+ };
46
+ }
47
+
48
+ export function toDomain(commit) {
49
+ const { hash, date, message, body, diff } = commit;
50
+
51
+ const modifiedFilesInCommit = diff.files.map(({ file }) => file);
52
+
53
+ if (modifiedFilesInCommit.length > 1) {
54
+ throw new Error(`Only one document should have been recorded in ${hash}, but all these documents were recorded: ${modifiedFilesInCommit.join(', ')}`);
55
+ }
56
+
57
+ const [relativeFilePath] = modifiedFilesInCommit;
58
+ const snapshotIdsMatch = body.match(/\b[0-9a-f]{5,40}\b/g);
59
+
60
+ const [ documentType, pageId ] = path.basename(relativeFilePath, path.extname(relativeFilePath)).split(DOCUMENT_TYPE_AND_PAGE_ID_SEPARATOR);
61
+
62
+ return new Record({
63
+ id: hash,
64
+ serviceId: path.dirname(relativeFilePath),
65
+ documentType,
66
+ pageId,
67
+ mimeType: mime.getType(relativeFilePath),
68
+ fetchDate: new Date(date),
69
+ isFirstRecord: message.startsWith(COMMIT_MESSAGE_PREFIX.startTracking),
70
+ isRefilter: message.startsWith(COMMIT_MESSAGE_PREFIX.refilter),
71
+ snapshotIds: snapshotIdsMatch || [],
72
+ });
73
+ }
74
+
75
+ function generateFileName(documentType, pageId, extension) {
76
+ return `${documentType}${pageId ? `${DOCUMENT_TYPE_AND_PAGE_ID_SEPARATOR}${pageId}` : ''}.${extension}`;
77
+ }
78
+
79
+ export function generateFilePath(serviceId, documentType, pageId, mimeType) {
80
+ const extension = mime.getExtension(mimeType) || '*'; // If mime type is undefined, an asterisk is set as an extension. Used to match all files for the given service ID, document type and page ID when mime type is unknown.
81
+
82
+ return `${serviceId}/${generateFileName(documentType, pageId, extension)}`; // Do not use `path.join` as even for Windows, the path should be with `/` and not `\`. See https://github.com/ambanum/OpenTermsArchive/runs/8110230474?check_suite_focus=true#step:7:125
83
+ }