@opentermsarchive/engine 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. package/.env.example +3 -0
  2. package/.eslintrc.yaml +116 -0
  3. package/.github/workflows/deploy.yml +50 -0
  4. package/.github/workflows/release.yml +64 -0
  5. package/.github/workflows/test.yml +77 -0
  6. package/CHANGELOG.md +14 -0
  7. package/CODE_OF_CONDUCT.md +128 -0
  8. package/CONTRIBUTING.md +143 -0
  9. package/LICENSE +153 -0
  10. package/MIGRATING.md +42 -0
  11. package/README.fr.md +110 -0
  12. package/README.md +438 -0
  13. package/Vagrantfile +38 -0
  14. package/ansible.cfg +13 -0
  15. package/bin/.env.js +1 -0
  16. package/bin/lint-declarations.js +31 -0
  17. package/bin/track.js +26 -0
  18. package/bin/validate-declarations.js +68 -0
  19. package/config/ci.json +5 -0
  20. package/config/contrib.json +35 -0
  21. package/config/dating.json +37 -0
  22. package/config/default.json +71 -0
  23. package/config/france.json +40 -0
  24. package/config/p2b-compliance.json +40 -0
  25. package/config/pga.json +40 -0
  26. package/config/production.json +27 -0
  27. package/config/test.json +49 -0
  28. package/config/vagrant.json +24 -0
  29. package/decision-records/0001-service-name-and-id.md +73 -0
  30. package/decision-records/0002-service-history.md +212 -0
  31. package/decision-records/0003-snapshots-database.md +123 -0
  32. package/ops/README.md +280 -0
  33. package/ops/app.yml +5 -0
  34. package/ops/infra.yml +6 -0
  35. package/ops/inventories/dev.yml +7 -0
  36. package/ops/inventories/production.yml +27 -0
  37. package/ops/roles/infra/defaults/main.yml +2 -0
  38. package/ops/roles/infra/files/.gitconfig +3 -0
  39. package/ops/roles/infra/files/mongod.conf +18 -0
  40. package/ops/roles/infra/files/ota-bot-key.private_key +26 -0
  41. package/ops/roles/infra/tasks/main.yml +78 -0
  42. package/ops/roles/infra/tasks/mongo.yml +40 -0
  43. package/ops/roles/infra/templates/ssh_config.j2 +5 -0
  44. package/ops/roles/ota/defaults/main.yml +14 -0
  45. package/ops/roles/ota/files/.env +21 -0
  46. package/ops/roles/ota/tasks/database.yml +65 -0
  47. package/ops/roles/ota/tasks/main.yml +110 -0
  48. package/ops/site.yml +6 -0
  49. package/package.json +101 -0
  50. package/pm2.config.cjs +20 -0
  51. package/scripts/dataset/README.md +37 -0
  52. package/scripts/dataset/assets/LICENSE +540 -0
  53. package/scripts/dataset/assets/README.template.js +65 -0
  54. package/scripts/dataset/export/index.js +106 -0
  55. package/scripts/dataset/export/index.test.js +155 -0
  56. package/scripts/dataset/export/test/fixtures/dataset/LICENSE +540 -0
  57. package/scripts/dataset/export/test/fixtures/dataset/README.md +40 -0
  58. package/scripts/dataset/export/test/fixtures/dataset/ServiceA/Terms of Service/2021-01-01T11-27-00Z.md +1 -0
  59. package/scripts/dataset/export/test/fixtures/dataset/ServiceA/Terms of Service/2021-01-11T11-32-47Z.md +1 -0
  60. package/scripts/dataset/export/test/fixtures/dataset/ServiceB/Privacy Policy/2022-01-01T12-12-24Z.md +1 -0
  61. package/scripts/dataset/export/test/fixtures/dataset/ServiceB/Terms of Service/2022-01-06T11-32-47Z.md +1 -0
  62. package/scripts/dataset/index.js +40 -0
  63. package/scripts/dataset/logger/index.js +17 -0
  64. package/scripts/dataset/main.js +25 -0
  65. package/scripts/dataset/publish/index.js +39 -0
  66. package/scripts/declarations/lint/index.js +36 -0
  67. package/scripts/declarations/utils/index.js +81 -0
  68. package/scripts/declarations/validate/definitions.js +63 -0
  69. package/scripts/declarations/validate/index.mocha.js +262 -0
  70. package/scripts/declarations/validate/service.history.schema.js +86 -0
  71. package/scripts/declarations/validate/service.schema.js +91 -0
  72. package/scripts/history/logger/index.js +39 -0
  73. package/scripts/history/migrate-services.js +212 -0
  74. package/scripts/history/update-to-full-hash.js +61 -0
  75. package/scripts/history/utils/index.js +23 -0
  76. package/scripts/import/README.md +59 -0
  77. package/scripts/import/config/import.json +12 -0
  78. package/scripts/import/index.js +224 -0
  79. package/scripts/import/loadCommits.js +66 -0
  80. package/scripts/import/logger/index.js +43 -0
  81. package/scripts/rewrite/README.md +131 -0
  82. package/scripts/rewrite/config/rewrite-snapshots.json +32 -0
  83. package/scripts/rewrite/config/rewrite-versions.json +32 -0
  84. package/scripts/rewrite/initializer/files/license +428 -0
  85. package/scripts/rewrite/initializer/files/readme.md +8 -0
  86. package/scripts/rewrite/initializer/index.js +44 -0
  87. package/scripts/rewrite/rewrite-snapshots.js +108 -0
  88. package/scripts/rewrite/rewrite-versions.js +160 -0
  89. package/scripts/rewrite/utils.js +33 -0
  90. package/scripts/utils/renamer/README.md +49 -0
  91. package/scripts/utils/renamer/index.js +45 -0
  92. package/scripts/utils/renamer/rules/documentTypes.json +25 -0
  93. package/scripts/utils/renamer/rules/documentTypesByService.json +170 -0
  94. package/scripts/utils/renamer/rules/serviceNames.json +92 -0
  95. package/src/archivist/errors.js +9 -0
  96. package/src/archivist/fetcher/errors.js +6 -0
  97. package/src/archivist/fetcher/exports.js +18 -0
  98. package/src/archivist/fetcher/fullDomFetcher.js +84 -0
  99. package/src/archivist/fetcher/htmlOnlyFetcher.js +62 -0
  100. package/src/archivist/fetcher/index.js +35 -0
  101. package/src/archivist/fetcher/index.test.js +239 -0
  102. package/src/archivist/filter/exports.js +3 -0
  103. package/src/archivist/filter/index.js +178 -0
  104. package/src/archivist/filter/index.test.js +561 -0
  105. package/src/archivist/index.js +276 -0
  106. package/src/archivist/index.test.js +600 -0
  107. package/src/archivist/recorder/index.js +77 -0
  108. package/src/archivist/recorder/index.test.js +463 -0
  109. package/src/archivist/recorder/record.js +35 -0
  110. package/src/archivist/recorder/record.test.js +91 -0
  111. package/src/archivist/recorder/repositories/factory.js +23 -0
  112. package/src/archivist/recorder/repositories/git/dataMapper.js +83 -0
  113. package/src/archivist/recorder/repositories/git/git.js +122 -0
  114. package/src/archivist/recorder/repositories/git/git.test.js +86 -0
  115. package/src/archivist/recorder/repositories/git/index.js +182 -0
  116. package/src/archivist/recorder/repositories/git/index.test.js +714 -0
  117. package/src/archivist/recorder/repositories/interface.js +108 -0
  118. package/src/archivist/recorder/repositories/mongo/dataMapper.js +32 -0
  119. package/src/archivist/recorder/repositories/mongo/index.js +121 -0
  120. package/src/archivist/recorder/repositories/mongo/index.test.js +721 -0
  121. package/src/archivist/services/documentDeclaration.js +26 -0
  122. package/src/archivist/services/documentDeclaration.test.js +85 -0
  123. package/src/archivist/services/documentTypes.json +386 -0
  124. package/src/archivist/services/index.js +255 -0
  125. package/src/archivist/services/index.test.js +327 -0
  126. package/src/archivist/services/pageDeclaration.js +51 -0
  127. package/src/archivist/services/pageDeclaration.test.js +224 -0
  128. package/src/archivist/services/service.js +60 -0
  129. package/src/archivist/services/service.test.js +164 -0
  130. package/src/exports.js +3 -0
  131. package/src/index.js +59 -0
  132. package/src/logger/README.md +1 -0
  133. package/src/logger/index.js +131 -0
  134. package/src/main.js +18 -0
  135. package/src/notifier/README.md +1 -0
  136. package/src/notifier/index.js +150 -0
  137. package/src/tracker/README.md +1 -0
  138. package/src/tracker/index.js +215 -0
  139. package/test/fixtures/service_A.js +22 -0
  140. package/test/fixtures/service_A_terms.md +10 -0
  141. package/test/fixtures/service_A_terms_snapshot.html +14 -0
  142. package/test/fixtures/service_B.js +22 -0
  143. package/test/fixtures/service_with_declaration_history.js +65 -0
  144. package/test/fixtures/service_with_filters_history.js +155 -0
  145. package/test/fixtures/service_with_history.js +188 -0
  146. package/test/fixtures/service_with_multipage_document.js +100 -0
  147. package/test/fixtures/service_without_history.js +31 -0
  148. package/test/fixtures/services.js +19 -0
  149. package/test/fixtures/terms.pdf +0 -0
  150. package/test/fixtures/termsFromPDF.md +25 -0
  151. package/test/fixtures/termsModified.pdf +0 -0
  152. package/test/services/service_A.json +9 -0
  153. package/test/services/service_B.json +9 -0
  154. package/test/services/service_with_declaration_history.filters.js +7 -0
  155. package/test/services/service_with_declaration_history.history.json +17 -0
  156. package/test/services/service_with_declaration_history.json +13 -0
  157. package/test/services/service_with_filters_history.filters.history.js +29 -0
  158. package/test/services/service_with_filters_history.filters.js +7 -0
  159. package/test/services/service_with_filters_history.json +13 -0
  160. package/test/services/service_with_history.filters.history.js +29 -0
  161. package/test/services/service_with_history.filters.js +7 -0
  162. package/test/services/service_with_history.history.json +26 -0
  163. package/test/services/service_with_history.json +17 -0
  164. package/test/services/service_with_multipage_document.filters.js +7 -0
  165. package/test/services/service_with_multipage_document.history.json +37 -0
  166. package/test/services/service_with_multipage_document.json +28 -0
  167. package/test/services/service_without_history.filters.js +7 -0
  168. package/test/services/service_without_history.json +13 -0
@@ -0,0 +1,721 @@
1
+ import fs from 'fs';
2
+ import path from 'path';
3
+ import { fileURLToPath } from 'url';
4
+
5
+ import chai from 'chai';
6
+ import config from 'config';
7
+ import { MongoClient } from 'mongodb';
8
+
9
+ import Record from '../../record.js';
10
+
11
+ import MongoRepository from './index.js';
12
+
13
+ const { expect } = chai;
14
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
15
+
16
+ const { connectionURI } = config.get('recorder.snapshots.storage.mongo');
17
+ const client = new MongoClient(connectionURI);
18
+
19
+ const SERVICE_PROVIDER_ID = 'test_service';
20
+ const DOCUMENT_TYPE = 'Terms of Service';
21
+ const PAGE_ID = 'community-standards-hate-speech';
22
+ const CONTENT = 'ToS fixture data with UTF-8 çhãràčtęrs';
23
+ const MIME_TYPE = 'text/html';
24
+ const FETCH_DATE = new Date('2000-01-01T12:00:00.000Z');
25
+ const FETCH_DATE_LATER = new Date('2000-01-02T12:00:00.000Z');
26
+ const FETCH_DATE_EARLIER = new Date('2000-01-01T06:00:00.000Z');
27
+ const SNAPSHOT_ID = '61af86dc5ff5caa74ae926ad';
28
+ const PDF_CONTENT = fs.readFileSync(path.resolve(__dirname, '../../../../../test/fixtures/terms.pdf'));
29
+ const UPDATED_PDF_CONTENT = fs.readFileSync(path.resolve(__dirname, '../../../../../test/fixtures/termsModified.pdf'));
30
+ const PDF_MIME_TYPE = 'application/pdf';
31
+
32
+ let collection;
33
+
34
+ describe('MongoRepository', () => {
35
+ let subject;
36
+
37
+ before(async () => {
38
+ subject = new MongoRepository(config.get('recorder.snapshots.storage.mongo'));
39
+ await subject.initialize();
40
+ await client.connect();
41
+ const db = client.db(config.get('recorder.snapshots.storage.mongo.database'));
42
+
43
+ collection = db.collection(config.get('recorder.snapshots.storage.mongo.collection'));
44
+ });
45
+
46
+ describe('#save', () => {
47
+ let record;
48
+ let mongoDocument;
49
+ let numberOfRecordsBefore;
50
+ let numberOfRecordsAfter;
51
+
52
+ context('when it is the first record', () => {
53
+ before(async () => {
54
+ numberOfRecordsBefore = await collection.find({
55
+ serviceId: SERVICE_PROVIDER_ID,
56
+ documentType: DOCUMENT_TYPE,
57
+ }).count();
58
+
59
+ (record = await subject.save(new Record({
60
+ serviceId: SERVICE_PROVIDER_ID,
61
+ documentType: DOCUMENT_TYPE,
62
+ pageId: PAGE_ID,
63
+ content: CONTENT,
64
+ mimeType: MIME_TYPE,
65
+ fetchDate: FETCH_DATE,
66
+ snapshotIds: [SNAPSHOT_ID],
67
+ })));
68
+
69
+ numberOfRecordsAfter = await collection.find({
70
+ serviceId: SERVICE_PROVIDER_ID,
71
+ documentType: DOCUMENT_TYPE,
72
+ }).count();
73
+
74
+ (mongoDocument = await collection.findOne({
75
+ serviceId: SERVICE_PROVIDER_ID,
76
+ documentType: DOCUMENT_TYPE,
77
+ }));
78
+ });
79
+
80
+ after(async () => subject.removeAll());
81
+
82
+ it('saves the record', () => {
83
+ expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1);
84
+ });
85
+
86
+ it('returns the record id', () => {
87
+ expect(mongoDocument._id.toString()).to.equal(record.id);
88
+ });
89
+
90
+ it('returns a boolean to know if it is the first record', () => {
91
+ expect(record.isFirstRecord).to.be.true;
92
+ });
93
+
94
+ it('stores the service ID', () => {
95
+ expect(mongoDocument.serviceId).to.include(SERVICE_PROVIDER_ID);
96
+ });
97
+
98
+ it('stores the document type', () => {
99
+ expect(mongoDocument.documentType).to.include(DOCUMENT_TYPE);
100
+ });
101
+
102
+ it('stores information that it is the first record for this specific document', () => {
103
+ expect(mongoDocument.isFirstRecord).to.be.true;
104
+ });
105
+
106
+ it('stores the proper content', () => {
107
+ expect(mongoDocument.content).to.equal(CONTENT);
108
+ });
109
+
110
+ context('when provided', () => {
111
+ it('stores the fetch date', () => {
112
+ expect(new Date(mongoDocument.fetchDate).getTime()).to.equal(FETCH_DATE.getTime());
113
+ });
114
+
115
+ it('stores the MIME type', () => {
116
+ expect(mongoDocument.mimeType).to.equal(MIME_TYPE);
117
+ });
118
+
119
+ it('stores the snapshot ID', () => {
120
+ expect(mongoDocument.snapshotIds.map(snapshotId => snapshotId.toString())).to.deep.equal([SNAPSHOT_ID]);
121
+ });
122
+
123
+ it('stores the page ID', () => {
124
+ expect(mongoDocument.pageId).to.equal(PAGE_ID);
125
+ });
126
+ });
127
+ });
128
+
129
+ context('when it is not the first record', () => {
130
+ const UPDATED_CONTENT = `${CONTENT} updated`;
131
+
132
+ before(async () => {
133
+ (record = await subject.save(new Record({
134
+ serviceId: SERVICE_PROVIDER_ID,
135
+ documentType: DOCUMENT_TYPE,
136
+ content: CONTENT,
137
+ mimeType: MIME_TYPE,
138
+ fetchDate: FETCH_DATE,
139
+ snapshotIds: [SNAPSHOT_ID],
140
+ })));
141
+
142
+ numberOfRecordsBefore = await collection.find({
143
+ serviceId: SERVICE_PROVIDER_ID,
144
+ documentType: DOCUMENT_TYPE,
145
+ }).count();
146
+
147
+ (record = await subject.save(new Record({
148
+ serviceId: SERVICE_PROVIDER_ID,
149
+ documentType: DOCUMENT_TYPE,
150
+ content: UPDATED_CONTENT,
151
+ mimeType: MIME_TYPE,
152
+ fetchDate: FETCH_DATE,
153
+ snapshotIds: [SNAPSHOT_ID],
154
+ })));
155
+
156
+ numberOfRecordsAfter = await collection.find({
157
+ serviceId: SERVICE_PROVIDER_ID,
158
+ documentType: DOCUMENT_TYPE,
159
+ }).count();
160
+
161
+ ([mongoDocument] = await collection.find({
162
+ serviceId: SERVICE_PROVIDER_ID,
163
+ documentType: DOCUMENT_TYPE,
164
+ }).limit(1).sort({ created_at: -1 }).toArray());
165
+ });
166
+
167
+ after(async () => subject.removeAll());
168
+
169
+ it('saves the record', () => {
170
+ expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1);
171
+ });
172
+
173
+ it('returns the record id', () => {
174
+ expect(mongoDocument._id.toString()).to.equal(record.id);
175
+ });
176
+
177
+ it('returns a boolean to know if it is the first record', () => {
178
+ expect(record.isFirstRecord).to.be.false;
179
+ });
180
+ });
181
+
182
+ context('when the content has not changed', () => {
183
+ before(async () => {
184
+ await subject.save(new Record({
185
+ serviceId: SERVICE_PROVIDER_ID,
186
+ documentType: DOCUMENT_TYPE,
187
+ content: CONTENT,
188
+ mimeType: MIME_TYPE,
189
+ fetchDate: FETCH_DATE,
190
+ }));
191
+
192
+ numberOfRecordsBefore = await collection.find({
193
+ serviceId: SERVICE_PROVIDER_ID,
194
+ documentType: DOCUMENT_TYPE,
195
+ }).count();
196
+
197
+ (record = await subject.save(new Record({
198
+ serviceId: SERVICE_PROVIDER_ID,
199
+ documentType: DOCUMENT_TYPE,
200
+ content: CONTENT,
201
+ mimeType: MIME_TYPE,
202
+ fetchDate: FETCH_DATE_LATER,
203
+ })));
204
+
205
+ numberOfRecordsAfter = await collection.find({
206
+ serviceId: SERVICE_PROVIDER_ID,
207
+ documentType: DOCUMENT_TYPE,
208
+ }).count();
209
+ });
210
+
211
+ after(async () => subject.removeAll());
212
+
213
+ it('does not save the record', () => {
214
+ expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore);
215
+ });
216
+
217
+ it('returns no id', () => {
218
+ expect(record.id).to.equal(undefined);
219
+ });
220
+ });
221
+
222
+ context('when it is a refilter', () => {
223
+ const REFILTERED_CONTENT = `${CONTENT} refiltered`;
224
+
225
+ before(async () => {
226
+ await subject.save(new Record({
227
+ serviceId: SERVICE_PROVIDER_ID,
228
+ documentType: DOCUMENT_TYPE,
229
+ content: CONTENT,
230
+ mimeType: MIME_TYPE,
231
+ fetchDate: FETCH_DATE_EARLIER,
232
+ })); // A refilter cannot be the first record
233
+
234
+ numberOfRecordsBefore = await collection.find({
235
+ serviceId: SERVICE_PROVIDER_ID,
236
+ documentType: DOCUMENT_TYPE,
237
+ }).count();
238
+
239
+ (record = await subject.save(new Record({
240
+ serviceId: SERVICE_PROVIDER_ID,
241
+ documentType: DOCUMENT_TYPE,
242
+ content: REFILTERED_CONTENT,
243
+ mimeType: MIME_TYPE,
244
+ fetchDate: FETCH_DATE,
245
+ snapshotIds: [SNAPSHOT_ID],
246
+ isRefilter: true,
247
+ })));
248
+
249
+ numberOfRecordsAfter = await collection.find({
250
+ serviceId: SERVICE_PROVIDER_ID,
251
+ documentType: DOCUMENT_TYPE,
252
+ }).count();
253
+
254
+ ([mongoDocument] = await collection.find({
255
+ serviceId: SERVICE_PROVIDER_ID,
256
+ documentType: DOCUMENT_TYPE,
257
+ }).limit(1).sort({ created_at: -1 }).toArray());
258
+ });
259
+
260
+ after(async () => subject.removeAll());
261
+
262
+ it('saves the record', () => {
263
+ expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1);
264
+ });
265
+
266
+ it('returns the record id', () => {
267
+ expect(mongoDocument._id.toString()).to.equal(record.id);
268
+ });
269
+
270
+ it('stores information that it is a refilter of this specific document', () => {
271
+ expect(mongoDocument.isRefilter).to.be.true;
272
+ });
273
+ });
274
+
275
+ context('with PDF document', () => {
276
+ before(async () => {
277
+ numberOfRecordsBefore = await collection.find({
278
+ serviceId: SERVICE_PROVIDER_ID,
279
+ documentType: DOCUMENT_TYPE,
280
+ content: PDF_CONTENT,
281
+ mimeType: PDF_MIME_TYPE,
282
+ }).count();
283
+
284
+ (record = await subject.save(new Record({
285
+ serviceId: SERVICE_PROVIDER_ID,
286
+ documentType: DOCUMENT_TYPE,
287
+ content: PDF_CONTENT,
288
+ mimeType: PDF_MIME_TYPE,
289
+ fetchDate: FETCH_DATE,
290
+ snapshotIds: [SNAPSHOT_ID],
291
+ })));
292
+
293
+ numberOfRecordsAfter = await collection.find({
294
+ serviceId: SERVICE_PROVIDER_ID,
295
+ documentType: DOCUMENT_TYPE,
296
+ }).count();
297
+
298
+ (mongoDocument = await collection.findOne({
299
+ serviceId: SERVICE_PROVIDER_ID,
300
+ documentType: DOCUMENT_TYPE,
301
+ }));
302
+ });
303
+
304
+ after(async () => subject.removeAll());
305
+
306
+ it('saves the record', () => {
307
+ expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1);
308
+ });
309
+
310
+ it('returns the record id', () => {
311
+ expect(mongoDocument._id.toString()).to.equal(record.id);
312
+ });
313
+
314
+ it('stores the proper content', async () => {
315
+ const isSameContent = Buffer.compare(mongoDocument.content.buffer, PDF_CONTENT) == 0;
316
+
317
+ expect(isSameContent).to.be.true;
318
+ });
319
+
320
+ it('stores the MIME type', () => {
321
+ expect(mongoDocument.mimeType).to.equal(PDF_MIME_TYPE);
322
+ });
323
+ });
324
+
325
+ context('when there is no snapshots IDs specified', () => {
326
+ before(async () => {
327
+ (record = await subject.save(new Record({
328
+ serviceId: SERVICE_PROVIDER_ID,
329
+ documentType: DOCUMENT_TYPE,
330
+ pageId: PAGE_ID,
331
+ content: CONTENT,
332
+ mimeType: MIME_TYPE,
333
+ fetchDate: FETCH_DATE,
334
+ })));
335
+
336
+ (mongoDocument = await collection.findOne({
337
+ serviceId: SERVICE_PROVIDER_ID,
338
+ documentType: DOCUMENT_TYPE,
339
+ }));
340
+ });
341
+
342
+ after(async () => subject.removeAll());
343
+
344
+ it('does not store snapshots IDs', () => {
345
+ expect(mongoDocument.snapshotIds).to.be.undefined;
346
+ });
347
+
348
+ it('stores the service ID', () => {
349
+ expect(mongoDocument.serviceId).to.include(SERVICE_PROVIDER_ID);
350
+ });
351
+
352
+ it('stores the document type', () => {
353
+ expect(mongoDocument.documentType).to.include(DOCUMENT_TYPE);
354
+ });
355
+
356
+ it('stores the page ID', () => {
357
+ expect(mongoDocument.pageId).to.include(PAGE_ID);
358
+ });
359
+ });
360
+
361
+ context('when there are many snapshots IDs specified', () => {
362
+ const SNAPSHOT_ID_1 = '61af86dc5ff5caa74ae926ad';
363
+ const SNAPSHOT_ID_2 = '630cdfa67d2e3cc51f6e284c';
364
+
365
+ before(async () => {
366
+ (record = await subject.save(new Record({
367
+ serviceId: SERVICE_PROVIDER_ID,
368
+ documentType: DOCUMENT_TYPE,
369
+ pageId: PAGE_ID,
370
+ content: CONTENT,
371
+ mimeType: MIME_TYPE,
372
+ fetchDate: FETCH_DATE,
373
+ snapshotIds: [ SNAPSHOT_ID_1, SNAPSHOT_ID_2 ],
374
+ })));
375
+
376
+ (mongoDocument = await collection.findOne({
377
+ serviceId: SERVICE_PROVIDER_ID,
378
+ documentType: DOCUMENT_TYPE,
379
+ }));
380
+ });
381
+
382
+ after(async () => subject.removeAll());
383
+
384
+ it('stores snapshots IDs', () => {
385
+ const snapshotIds = mongoDocument.snapshotIds.map(id => id.toString());
386
+
387
+ expect(snapshotIds).to.include(SNAPSHOT_ID_1);
388
+ expect(snapshotIds).to.include(SNAPSHOT_ID_2);
389
+ });
390
+
391
+ it('stores the service ID', () => {
392
+ expect(mongoDocument.serviceId).to.include(SERVICE_PROVIDER_ID);
393
+ });
394
+
395
+ it('stores the document type', () => {
396
+ expect(mongoDocument.documentType).to.include(DOCUMENT_TYPE);
397
+ });
398
+
399
+ it('stores the page ID', () => {
400
+ expect(mongoDocument.pageId).to.include(PAGE_ID);
401
+ });
402
+ });
403
+ });
404
+
405
+ describe('#findById', () => {
406
+ let record;
407
+ let id;
408
+
409
+ before(async () => {
410
+ ({ id } = await subject.save(new Record({
411
+ serviceId: SERVICE_PROVIDER_ID,
412
+ documentType: DOCUMENT_TYPE,
413
+ pageId: PAGE_ID,
414
+ content: CONTENT,
415
+ fetchDate: FETCH_DATE,
416
+ snapshotIds: [SNAPSHOT_ID],
417
+ mimeType: MIME_TYPE,
418
+ })));
419
+
420
+ (record = await subject.findById(id));
421
+ });
422
+
423
+ after(async () => subject.removeAll());
424
+
425
+ it('returns the record id', () => {
426
+ expect(record.id).to.include(id);
427
+ });
428
+
429
+ it('returns a boolean to know if it is the first record', () => {
430
+ expect(record.isFirstRecord).to.be.true;
431
+ });
432
+
433
+ it('returns the service ID', () => {
434
+ expect(record.serviceId).to.equal(SERVICE_PROVIDER_ID);
435
+ });
436
+
437
+ it('returns the document type', () => {
438
+ expect(record.documentType).to.equal(DOCUMENT_TYPE);
439
+ });
440
+
441
+ it('returns the content', async () => {
442
+ expect(record.content).to.equal(CONTENT);
443
+ });
444
+
445
+ it('returns the fetch date', () => {
446
+ expect(new Date(record.fetchDate).getTime()).to.equal(FETCH_DATE.getTime());
447
+ });
448
+
449
+ it('returns the MIME type', () => {
450
+ expect(record.mimeType).to.equal(MIME_TYPE);
451
+ });
452
+
453
+ it('returns the snapshot ID', () => {
454
+ expect(record.snapshotIds).to.deep.equal([SNAPSHOT_ID]);
455
+ });
456
+
457
+ it('returns the page ID', () => {
458
+ expect(record.pageId).to.equal(PAGE_ID);
459
+ });
460
+
461
+ context('when requested record does not exist', () => {
462
+ it('returns null', async () => {
463
+ expect(await subject.findById('inexistantID')).to.equal(null);
464
+ });
465
+ });
466
+ });
467
+
468
+ describe('#findAll', () => {
469
+ let records;
470
+ const expectedIds = [];
471
+
472
+ before(async () => {
473
+ const { id: id1 } = await subject.save(new Record({
474
+ serviceId: SERVICE_PROVIDER_ID,
475
+ documentType: DOCUMENT_TYPE,
476
+ content: CONTENT,
477
+ fetchDate: FETCH_DATE,
478
+ snapshotIds: [SNAPSHOT_ID],
479
+ mimeType: MIME_TYPE,
480
+ }));
481
+
482
+ expectedIds.push(id1);
483
+
484
+ const { id: id2 } = await subject.save(new Record({
485
+ serviceId: SERVICE_PROVIDER_ID,
486
+ documentType: DOCUMENT_TYPE,
487
+ content: `${CONTENT} - updated`,
488
+ fetchDate: FETCH_DATE_LATER,
489
+ snapshotIds: [SNAPSHOT_ID],
490
+ mimeType: MIME_TYPE,
491
+ }));
492
+
493
+ expectedIds.push(id2);
494
+
495
+ const { id: id3 } = await subject.save(new Record({
496
+ serviceId: SERVICE_PROVIDER_ID,
497
+ documentType: DOCUMENT_TYPE,
498
+ content: `${CONTENT} - updated 2`,
499
+ isRefilter: true,
500
+ fetchDate: FETCH_DATE_EARLIER,
501
+ snapshotIds: [SNAPSHOT_ID],
502
+ mimeType: MIME_TYPE,
503
+ }));
504
+
505
+ expectedIds.push(id3);
506
+
507
+ (records = await subject.findAll());
508
+ });
509
+
510
+ after(async () => subject.removeAll());
511
+
512
+ it('returns all records', () => {
513
+ expect(records.length).to.equal(3);
514
+ });
515
+
516
+ it('returns Record objects', () => {
517
+ for (const record of records) {
518
+ expect(record).to.be.an.instanceof(Record);
519
+ }
520
+ });
521
+
522
+ it('returns records in ascending order', async () => {
523
+ expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_EARLIER, FETCH_DATE, FETCH_DATE_LATER ]);
524
+ });
525
+ });
526
+
527
+ describe('#count', () => {
528
+ let count;
529
+
530
+ before(async () => {
531
+ await subject.save(new Record({
532
+ serviceId: SERVICE_PROVIDER_ID,
533
+ documentType: DOCUMENT_TYPE,
534
+ content: CONTENT,
535
+ fetchDate: FETCH_DATE,
536
+ snapshotIds: [SNAPSHOT_ID],
537
+ mimeType: MIME_TYPE,
538
+ }));
539
+ await subject.save(new Record({
540
+ serviceId: SERVICE_PROVIDER_ID,
541
+ documentType: DOCUMENT_TYPE,
542
+ content: `${CONTENT} - updated`,
543
+ fetchDate: FETCH_DATE_LATER,
544
+ snapshotIds: [SNAPSHOT_ID],
545
+ mimeType: MIME_TYPE,
546
+ }));
547
+ await subject.save(new Record({
548
+ serviceId: SERVICE_PROVIDER_ID,
549
+ documentType: DOCUMENT_TYPE,
550
+ content: `${CONTENT} - updated 2`,
551
+ isRefilter: true,
552
+ fetchDate: FETCH_DATE_EARLIER,
553
+ snapshotIds: [SNAPSHOT_ID],
554
+ mimeType: MIME_TYPE,
555
+ }));
556
+
557
+ (count = await subject.count());
558
+ });
559
+
560
+ after(async () => subject.removeAll());
561
+
562
+ it('returns the proper count', async () => {
563
+ expect(count).to.equal(3);
564
+ });
565
+ });
566
+
567
+ describe('#findLatest', () => {
568
+ context('when there are records for the given service', () => {
569
+ let lastSnapshotId;
570
+ let latestRecord;
571
+
572
+ context('with HTML document', () => {
573
+ const UPDATED_CONTENT = `${CONTENT} (with additional content to trigger a record)`;
574
+
575
+ before(async () => {
576
+ await subject.save(new Record({
577
+ serviceId: SERVICE_PROVIDER_ID,
578
+ documentType: DOCUMENT_TYPE,
579
+ content: CONTENT,
580
+ fetchDate: FETCH_DATE,
581
+ mimeType: MIME_TYPE,
582
+ }));
583
+
584
+ ({ id: lastSnapshotId } = await subject.save(new Record({
585
+ serviceId: SERVICE_PROVIDER_ID,
586
+ documentType: DOCUMENT_TYPE,
587
+ content: UPDATED_CONTENT,
588
+ mimeType: MIME_TYPE,
589
+ fetchDate: FETCH_DATE_LATER,
590
+ })));
591
+
592
+ latestRecord = await subject.findLatest(
593
+ SERVICE_PROVIDER_ID,
594
+ DOCUMENT_TYPE,
595
+ );
596
+ });
597
+
598
+ after(async () => subject.removeAll());
599
+
600
+ it('returns the latest record id', () => {
601
+ expect(latestRecord.id).to.include(lastSnapshotId);
602
+ });
603
+
604
+ it('returns the latest record content', async () => {
605
+ expect((await latestRecord.content).toString('utf8')).to.equal(UPDATED_CONTENT);
606
+ });
607
+
608
+ it('returns the latest record mime type', () => {
609
+ expect(latestRecord.mimeType).to.equal(MIME_TYPE);
610
+ });
611
+ });
612
+
613
+ context('with PDF document', () => {
614
+ before(async () => {
615
+ await subject.save(new Record({
616
+ serviceId: SERVICE_PROVIDER_ID,
617
+ documentType: DOCUMENT_TYPE,
618
+ content: PDF_CONTENT,
619
+ mimeType: PDF_MIME_TYPE,
620
+ fetchDate: FETCH_DATE,
621
+ }));
622
+
623
+ ({ id: lastSnapshotId } = await subject.save(new Record({
624
+ serviceId: SERVICE_PROVIDER_ID,
625
+ documentType: DOCUMENT_TYPE,
626
+ content: UPDATED_PDF_CONTENT,
627
+ mimeType: PDF_MIME_TYPE,
628
+ fetchDate: FETCH_DATE_LATER,
629
+ })));
630
+
631
+ latestRecord = await subject.findLatest(SERVICE_PROVIDER_ID, DOCUMENT_TYPE);
632
+ });
633
+
634
+ after(async () => subject.removeAll());
635
+
636
+ it('returns the latest record id', () => {
637
+ expect(latestRecord.id).to.include(lastSnapshotId);
638
+ });
639
+
640
+ it('returns the latest record content', async () => {
641
+ const isSameContent = Buffer.compare(latestRecord.content, UPDATED_PDF_CONTENT) == 0;
642
+
643
+ expect(isSameContent).to.be.true;
644
+ });
645
+
646
+ it('returns the latest record mime type', () => {
647
+ expect(latestRecord.mimeType).to.equal(PDF_MIME_TYPE);
648
+ });
649
+ });
650
+ });
651
+
652
+ context('when there are no records for the given service', () => {
653
+ let latestRecord;
654
+
655
+ before(async () => {
656
+ latestRecord = await subject.findLatest(SERVICE_PROVIDER_ID, DOCUMENT_TYPE);
657
+ });
658
+
659
+ it('returns null', async () => {
660
+ expect(latestRecord).to.equal(null);
661
+ });
662
+ });
663
+ });
664
+
665
+ describe('#iterate', () => {
666
+ const expectedIds = [];
667
+ const ids = [];
668
+ const fetchDates = [];
669
+
670
+ before(async () => {
671
+ const { id: id1 } = await subject.save(new Record({
672
+ serviceId: SERVICE_PROVIDER_ID,
673
+ documentType: DOCUMENT_TYPE,
674
+ content: CONTENT,
675
+ fetchDate: FETCH_DATE,
676
+ snapshotIds: [SNAPSHOT_ID],
677
+ mimeType: MIME_TYPE,
678
+ }));
679
+
680
+ expectedIds.push(id1);
681
+
682
+ const { id: id2 } = await subject.save(new Record({
683
+ serviceId: SERVICE_PROVIDER_ID,
684
+ documentType: DOCUMENT_TYPE,
685
+ content: `${CONTENT} - updated`,
686
+ fetchDate: FETCH_DATE_LATER,
687
+ snapshotIds: [SNAPSHOT_ID],
688
+ mimeType: MIME_TYPE,
689
+ }));
690
+
691
+ expectedIds.push(id2);
692
+
693
+ const { id: id3 } = await subject.save(new Record({
694
+ serviceId: SERVICE_PROVIDER_ID,
695
+ documentType: DOCUMENT_TYPE,
696
+ content: `${CONTENT} - updated 2`,
697
+ isRefilter: true,
698
+ fetchDate: FETCH_DATE_EARLIER,
699
+ snapshotIds: [SNAPSHOT_ID],
700
+ mimeType: MIME_TYPE,
701
+ }));
702
+
703
+ expectedIds.push(id3);
704
+
705
+ for await (const record of subject.iterate()) {
706
+ ids.push(record.id);
707
+ fetchDates.push(record.fetchDate);
708
+ }
709
+ });
710
+
711
+ after(async () => subject.removeAll());
712
+
713
+ it('iterates through all records', async () => {
714
+ expect(ids).to.have.members(expectedIds);
715
+ });
716
+
717
+ it('iterates in ascending order', async () => {
718
+ expect(fetchDates).to.deep.equal([ FETCH_DATE_EARLIER, FETCH_DATE, FETCH_DATE_LATER ]);
719
+ });
720
+ });
721
+ });