@opentermsarchive/engine 0.26.0 → 0.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +1 -469
  2. package/bin/ota-track.js +3 -3
  3. package/bin/ota-validate.js +2 -2
  4. package/bin/ota.js +1 -1
  5. package/config/default.json +1 -1
  6. package/config/test.json +2 -2
  7. package/package.json +6 -7
  8. package/scripts/dataset/export/index.js +4 -4
  9. package/scripts/dataset/export/index.test.js +11 -17
  10. package/scripts/dataset/export/test/fixtures/dataset/README.md +1 -1
  11. package/scripts/declarations/lint/index.mocha.js +1 -1
  12. package/scripts/declarations/utils/index.js +12 -12
  13. package/scripts/declarations/validate/definitions.js +1 -1
  14. package/scripts/declarations/validate/index.mocha.js +30 -34
  15. package/scripts/declarations/validate/service.history.schema.js +11 -11
  16. package/scripts/declarations/validate/service.schema.js +13 -13
  17. package/scripts/history/migrate-services.js +4 -4
  18. package/scripts/history/update-to-full-hash.js +2 -2
  19. package/scripts/import/index.js +14 -14
  20. package/scripts/rewrite/config/rewrite-snapshots.json +1 -1
  21. package/scripts/rewrite/config/rewrite-versions.json +1 -1
  22. package/scripts/rewrite/rewrite-snapshots.js +3 -3
  23. package/scripts/rewrite/rewrite-versions.js +14 -14
  24. package/scripts/utils/renamer/README.md +3 -3
  25. package/scripts/utils/renamer/index.js +13 -13
  26. package/src/archivist/errors.js +1 -1
  27. package/src/archivist/extract/exports.js +3 -0
  28. package/src/archivist/{filter → extract}/index.js +23 -27
  29. package/src/archivist/extract/index.test.js +516 -0
  30. package/src/archivist/index.js +101 -140
  31. package/src/archivist/index.test.js +178 -166
  32. package/src/archivist/recorder/index.js +11 -55
  33. package/src/archivist/recorder/index.test.js +310 -356
  34. package/src/archivist/recorder/record.js +18 -7
  35. package/src/archivist/recorder/repositories/git/dataMapper.js +41 -31
  36. package/src/archivist/recorder/repositories/git/index.js +11 -15
  37. package/src/archivist/recorder/repositories/git/index.test.js +1058 -463
  38. package/src/archivist/recorder/repositories/interface.js +8 -6
  39. package/src/archivist/recorder/repositories/mongo/dataMapper.js +21 -14
  40. package/src/archivist/recorder/repositories/mongo/index.js +8 -8
  41. package/src/archivist/recorder/repositories/mongo/index.test.js +898 -479
  42. package/src/archivist/recorder/snapshot.js +5 -0
  43. package/src/archivist/recorder/snapshot.test.js +65 -0
  44. package/src/archivist/recorder/version.js +14 -0
  45. package/src/archivist/recorder/version.test.js +65 -0
  46. package/src/archivist/services/index.js +60 -51
  47. package/src/archivist/services/index.test.js +63 -83
  48. package/src/archivist/services/service.js +26 -22
  49. package/src/archivist/services/service.test.js +46 -68
  50. package/src/archivist/services/{pageDeclaration.js → sourceDocument.js} +11 -9
  51. package/src/archivist/services/{pageDeclaration.test.js → sourceDocument.test.js} +21 -21
  52. package/src/archivist/services/terms.js +26 -0
  53. package/src/archivist/services/{documentDeclaration.test.js → terms.test.js} +15 -15
  54. package/src/exports.js +2 -2
  55. package/src/index.js +16 -13
  56. package/src/logger/index.js +35 -36
  57. package/src/notifier/index.js +8 -8
  58. package/src/tracker/index.js +6 -6
  59. package/src/archivist/filter/exports.js +0 -3
  60. package/src/archivist/filter/index.test.js +0 -564
  61. package/src/archivist/recorder/record.test.js +0 -91
  62. package/src/archivist/services/documentDeclaration.js +0 -26
  63. /package/scripts/utils/renamer/rules/{documentTypes.json → termsTypes.json} +0 -0
  64. /package/scripts/utils/renamer/rules/{documentTypesByService.json → termsTypesByService.json} +0 -0
@@ -4,9 +4,11 @@ import { fileURLToPath } from 'url';
4
4
 
5
5
  import chai from 'chai';
6
6
  import config from 'config';
7
+ import mime from 'mime';
7
8
  import { MongoClient } from 'mongodb';
8
9
 
9
- import Record from '../../record.js';
10
+ import Snapshot from '../../snapshot.js';
11
+ import Version from '../../version.js';
10
12
 
11
13
  import MongoRepository from './index.js';
12
14
 
@@ -17,705 +19,1122 @@ const { connectionURI } = config.get('recorder.snapshots.storage.mongo');
17
19
  const client = new MongoClient(connectionURI);
18
20
 
19
21
  const SERVICE_PROVIDER_ID = 'test_service';
20
- const DOCUMENT_TYPE = 'Terms of Service';
21
- const PAGE_ID = 'community-standards-hate-speech';
22
+ const TERMS_TYPE = 'Terms of Service';
23
+ const DOCUMENT_ID = 'community-standards-hate-speech';
22
24
  const CONTENT = 'ToS fixture data with UTF-8 çhãràčtęrs';
23
- const MIME_TYPE = 'text/html';
25
+
24
26
  const FETCH_DATE = new Date('2000-01-01T12:00:00.000Z');
25
27
  const FETCH_DATE_LATER = new Date('2000-01-02T12:00:00.000Z');
26
28
  const FETCH_DATE_EARLIER = new Date('2000-01-01T06:00:00.000Z');
29
+
27
30
  const SNAPSHOT_ID = '61af86dc5ff5caa74ae926ad';
31
+ const HTML_MIME_TYPE = mime.getType('html');
32
+
28
33
  const PDF_CONTENT = fs.readFileSync(path.resolve(__dirname, '../../../../../test/fixtures/terms.pdf'));
29
34
  const UPDATED_PDF_CONTENT = fs.readFileSync(path.resolve(__dirname, '../../../../../test/fixtures/termsModified.pdf'));
30
- const PDF_MIME_TYPE = 'application/pdf';
35
+ const PDF_MIME_TYPE = mime.getType('pdf');
31
36
 
32
37
  let collection;
33
38
 
34
39
  describe('MongoRepository', () => {
35
40
  let subject;
36
41
 
37
- before(async () => {
38
- subject = new MongoRepository(config.get('recorder.snapshots.storage.mongo'));
39
- await subject.initialize();
40
- await client.connect();
41
- const db = client.db(config.get('recorder.snapshots.storage.mongo.database'));
42
+ context('Version', () => {
43
+ before(async () => {
44
+ subject = new MongoRepository(config.get('recorder.versions.storage.mongo'));
45
+ await subject.initialize();
46
+ await client.connect();
47
+ const db = client.db(config.get('recorder.versions.storage.mongo.database'));
42
48
 
43
- collection = db.collection(config.get('recorder.snapshots.storage.mongo.collection'));
44
- });
49
+ collection = db.collection(config.get('recorder.versions.storage.mongo.collection'));
50
+ });
45
51
 
46
- describe('#save', () => {
47
- let record;
48
- let mongoDocument;
49
- let numberOfRecordsBefore;
50
- let numberOfRecordsAfter;
52
+ describe('#save', () => {
53
+ let record;
54
+ let mongoDocument;
55
+ let numberOfRecordsBefore;
56
+ let numberOfRecordsAfter;
51
57
 
52
- context('when it is the first record', () => {
53
- before(async () => {
54
- numberOfRecordsBefore = await collection.find({
55
- serviceId: SERVICE_PROVIDER_ID,
56
- documentType: DOCUMENT_TYPE,
57
- }).count();
58
+ context('when it is the first record', () => {
59
+ before(async () => {
60
+ numberOfRecordsBefore = await collection.find({
61
+ serviceId: SERVICE_PROVIDER_ID,
62
+ termsType: TERMS_TYPE,
63
+ }).count();
58
64
 
59
- (record = await subject.save(new Record({
60
- serviceId: SERVICE_PROVIDER_ID,
61
- documentType: DOCUMENT_TYPE,
62
- pageId: PAGE_ID,
63
- content: CONTENT,
64
- mimeType: MIME_TYPE,
65
- fetchDate: FETCH_DATE,
66
- snapshotIds: [SNAPSHOT_ID],
67
- })));
65
+ (record = await subject.save(new Version({
66
+ serviceId: SERVICE_PROVIDER_ID,
67
+ termsType: TERMS_TYPE,
68
+ content: CONTENT,
69
+ fetchDate: FETCH_DATE,
70
+ snapshotIds: [SNAPSHOT_ID],
71
+ })));
68
72
 
69
- numberOfRecordsAfter = await collection.find({
70
- serviceId: SERVICE_PROVIDER_ID,
71
- documentType: DOCUMENT_TYPE,
72
- }).count();
73
+ numberOfRecordsAfter = await collection.find({
74
+ serviceId: SERVICE_PROVIDER_ID,
75
+ termsType: TERMS_TYPE,
76
+ }).count();
73
77
 
74
- (mongoDocument = await collection.findOne({
75
- serviceId: SERVICE_PROVIDER_ID,
76
- documentType: DOCUMENT_TYPE,
77
- }));
78
- });
78
+ (mongoDocument = await collection.findOne({
79
+ serviceId: SERVICE_PROVIDER_ID,
80
+ termsType: TERMS_TYPE,
81
+ }));
82
+ });
79
83
 
80
- after(async () => subject.removeAll());
84
+ after(async () => subject.removeAll());
81
85
 
82
- it('saves the record', () => {
83
- expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1);
84
- });
86
+ it('saves the record', () => {
87
+ expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1);
88
+ });
85
89
 
86
- it('returns the record id', () => {
87
- expect(mongoDocument._id.toString()).to.equal(record.id);
88
- });
90
+ it('returns the record id', () => {
91
+ expect(mongoDocument._id.toString()).to.equal(record.id);
92
+ });
89
93
 
90
- it('returns a boolean to know if it is the first record', () => {
91
- expect(record.isFirstRecord).to.be.true;
92
- });
94
+ it('states that it is the first record', () => {
95
+ expect(record.isFirstRecord).to.be.true;
96
+ });
93
97
 
94
- it('stores the service ID', () => {
95
- expect(mongoDocument.serviceId).to.include(SERVICE_PROVIDER_ID);
96
- });
98
+ it('stores the service ID', () => {
99
+ expect(mongoDocument.serviceId).to.include(SERVICE_PROVIDER_ID);
100
+ });
97
101
 
98
- it('stores the terms type', () => {
99
- expect(mongoDocument.documentType).to.include(DOCUMENT_TYPE);
100
- });
102
+ it('stores the terms type', () => {
103
+ expect(mongoDocument.termsType).to.include(TERMS_TYPE);
104
+ });
101
105
 
102
- it('stores information that it is the first record for this specific document', () => {
103
- expect(mongoDocument.isFirstRecord).to.be.true;
104
- });
106
+ it('stores information that it is the first record for these specific terms', () => {
107
+ expect(mongoDocument.isFirstRecord).to.be.true;
108
+ });
105
109
 
106
- it('stores the proper content', () => {
107
- expect(mongoDocument.content).to.equal(CONTENT);
108
- });
110
+ it('stores the proper content', () => {
111
+ expect(mongoDocument.content).to.equal(CONTENT);
112
+ });
109
113
 
110
- context('when provided', () => {
111
114
  it('stores the fetch date', () => {
112
115
  expect(new Date(mongoDocument.fetchDate).getTime()).to.equal(FETCH_DATE.getTime());
113
116
  });
114
117
 
115
- it('stores the MIME type', () => {
116
- expect(mongoDocument.mimeType).to.equal(MIME_TYPE);
117
- });
118
-
119
118
  it('stores the snapshot ID', () => {
120
119
  expect(mongoDocument.snapshotIds.map(snapshotId => snapshotId.toString())).to.deep.equal([SNAPSHOT_ID]);
121
120
  });
121
+ });
122
+
123
+ context('when it is not the first record', () => {
124
+ const UPDATED_CONTENT = `${CONTENT} updated`;
122
125
 
123
- it('stores the page ID', () => {
124
- expect(mongoDocument.pageId).to.equal(PAGE_ID);
126
+ before(async () => {
127
+ (record = await subject.save(new Version({
128
+ serviceId: SERVICE_PROVIDER_ID,
129
+ termsType: TERMS_TYPE,
130
+ content: CONTENT,
131
+ fetchDate: FETCH_DATE,
132
+ snapshotIds: [SNAPSHOT_ID],
133
+ })));
134
+
135
+ numberOfRecordsBefore = await collection.find({
136
+ serviceId: SERVICE_PROVIDER_ID,
137
+ termsType: TERMS_TYPE,
138
+ }).count();
139
+
140
+ (record = await subject.save(new Version({
141
+ serviceId: SERVICE_PROVIDER_ID,
142
+ termsType: TERMS_TYPE,
143
+ content: UPDATED_CONTENT,
144
+ fetchDate: FETCH_DATE,
145
+ snapshotIds: [SNAPSHOT_ID],
146
+ })));
147
+
148
+ numberOfRecordsAfter = await collection.find({
149
+ serviceId: SERVICE_PROVIDER_ID,
150
+ termsType: TERMS_TYPE,
151
+ }).count();
152
+
153
+ ([mongoDocument] = await collection.find({
154
+ serviceId: SERVICE_PROVIDER_ID,
155
+ termsType: TERMS_TYPE,
156
+ }).limit(1).sort({ created_at: -1 }).toArray());
157
+ });
158
+
159
+ after(async () => subject.removeAll());
160
+
161
+ it('saves the record', () => {
162
+ expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1);
163
+ });
164
+
165
+ it('returns the record id', () => {
166
+ expect(mongoDocument._id.toString()).to.equal(record.id);
167
+ });
168
+
169
+ it('states that it is not the first record', () => {
170
+ expect(record.isFirstRecord).to.be.false;
125
171
  });
126
172
  });
127
- });
128
173
 
129
- context('when it is not the first record', () => {
130
- const UPDATED_CONTENT = `${CONTENT} updated`;
174
+ context('when the content has not changed', () => {
175
+ before(async () => {
176
+ await subject.save(new Version({
177
+ serviceId: SERVICE_PROVIDER_ID,
178
+ termsType: TERMS_TYPE,
179
+ content: CONTENT,
180
+ fetchDate: FETCH_DATE,
181
+ snapshotIds: [SNAPSHOT_ID],
182
+ }));
131
183
 
132
- before(async () => {
133
- (record = await subject.save(new Record({
134
- serviceId: SERVICE_PROVIDER_ID,
135
- documentType: DOCUMENT_TYPE,
136
- content: CONTENT,
137
- mimeType: MIME_TYPE,
138
- fetchDate: FETCH_DATE,
139
- snapshotIds: [SNAPSHOT_ID],
140
- })));
184
+ numberOfRecordsBefore = await collection.find({
185
+ serviceId: SERVICE_PROVIDER_ID,
186
+ termsType: TERMS_TYPE,
187
+ }).count();
141
188
 
142
- numberOfRecordsBefore = await collection.find({
143
- serviceId: SERVICE_PROVIDER_ID,
144
- documentType: DOCUMENT_TYPE,
145
- }).count();
189
+ (record = await subject.save(new Version({
190
+ serviceId: SERVICE_PROVIDER_ID,
191
+ termsType: TERMS_TYPE,
192
+ content: CONTENT,
193
+ fetchDate: FETCH_DATE_LATER,
194
+ snapshotIds: [SNAPSHOT_ID],
195
+ })));
146
196
 
147
- (record = await subject.save(new Record({
148
- serviceId: SERVICE_PROVIDER_ID,
149
- documentType: DOCUMENT_TYPE,
150
- content: UPDATED_CONTENT,
151
- mimeType: MIME_TYPE,
152
- fetchDate: FETCH_DATE,
153
- snapshotIds: [SNAPSHOT_ID],
154
- })));
197
+ numberOfRecordsAfter = await collection.find({
198
+ serviceId: SERVICE_PROVIDER_ID,
199
+ termsType: TERMS_TYPE,
200
+ }).count();
201
+ });
155
202
 
156
- numberOfRecordsAfter = await collection.find({
157
- serviceId: SERVICE_PROVIDER_ID,
158
- documentType: DOCUMENT_TYPE,
159
- }).count();
203
+ after(async () => subject.removeAll());
160
204
 
161
- ([mongoDocument] = await collection.find({
162
- serviceId: SERVICE_PROVIDER_ID,
163
- documentType: DOCUMENT_TYPE,
164
- }).limit(1).sort({ created_at: -1 }).toArray());
205
+ it('does not save the record', () => {
206
+ expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore);
207
+ });
208
+
209
+ it('returns no id', () => {
210
+ expect(record.id).to.equal(undefined);
211
+ });
165
212
  });
166
213
 
167
- after(async () => subject.removeAll());
214
+ context('when it is an extracted only version', () => {
215
+ const EXTRACTED_ONLY_CONTENT = `${CONTENT} extracted only`;
168
216
 
169
- it('saves the record', () => {
170
- expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1);
171
- });
217
+ before(async () => {
218
+ await subject.save(new Version({
219
+ serviceId: SERVICE_PROVIDER_ID,
220
+ termsType: TERMS_TYPE,
221
+ content: CONTENT,
222
+ fetchDate: FETCH_DATE_EARLIER,
223
+ snapshotIds: [SNAPSHOT_ID],
224
+ })); // An extracted only version cannot be the first record
172
225
 
173
- it('returns the record id', () => {
174
- expect(mongoDocument._id.toString()).to.equal(record.id);
175
- });
226
+ numberOfRecordsBefore = await collection.find({
227
+ serviceId: SERVICE_PROVIDER_ID,
228
+ termsType: TERMS_TYPE,
229
+ }).count();
176
230
 
177
- it('returns a boolean to know if it is the first record', () => {
178
- expect(record.isFirstRecord).to.be.false;
179
- });
180
- });
231
+ (record = await subject.save(new Version({
232
+ serviceId: SERVICE_PROVIDER_ID,
233
+ termsType: TERMS_TYPE,
234
+ content: EXTRACTED_ONLY_CONTENT,
235
+ fetchDate: FETCH_DATE,
236
+ snapshotIds: [SNAPSHOT_ID],
237
+ isExtractOnly: true,
238
+ })));
181
239
 
182
- context('when the content has not changed', () => {
183
- before(async () => {
184
- await subject.save(new Record({
185
- serviceId: SERVICE_PROVIDER_ID,
186
- documentType: DOCUMENT_TYPE,
187
- content: CONTENT,
188
- mimeType: MIME_TYPE,
189
- fetchDate: FETCH_DATE,
190
- }));
240
+ numberOfRecordsAfter = await collection.find({
241
+ serviceId: SERVICE_PROVIDER_ID,
242
+ termsType: TERMS_TYPE,
243
+ }).count();
191
244
 
192
- numberOfRecordsBefore = await collection.find({
193
- serviceId: SERVICE_PROVIDER_ID,
194
- documentType: DOCUMENT_TYPE,
195
- }).count();
245
+ ([mongoDocument] = await collection.find({
246
+ serviceId: SERVICE_PROVIDER_ID,
247
+ termsType: TERMS_TYPE,
248
+ }).limit(1).sort({ created_at: -1 }).toArray());
249
+ });
196
250
 
197
- (record = await subject.save(new Record({
198
- serviceId: SERVICE_PROVIDER_ID,
199
- documentType: DOCUMENT_TYPE,
200
- content: CONTENT,
201
- mimeType: MIME_TYPE,
202
- fetchDate: FETCH_DATE_LATER,
203
- })));
251
+ after(async () => subject.removeAll());
204
252
 
205
- numberOfRecordsAfter = await collection.find({
206
- serviceId: SERVICE_PROVIDER_ID,
207
- documentType: DOCUMENT_TYPE,
208
- }).count();
253
+ it('saves the record', () => {
254
+ expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1);
255
+ });
256
+
257
+ it('returns the record id', () => {
258
+ expect(mongoDocument._id.toString()).to.equal(record.id);
259
+ });
260
+
261
+ it('stores information that it is an extracted only version', () => {
262
+ expect(mongoDocument.isExtractOnly).to.be.true;
263
+ });
209
264
  });
210
265
 
211
- after(async () => subject.removeAll());
266
+ context('when one snapshot ID is specified', () => {
267
+ before(async () => {
268
+ (record = await subject.save(new Version({
269
+ serviceId: SERVICE_PROVIDER_ID,
270
+ termsType: TERMS_TYPE,
271
+ content: CONTENT,
272
+ fetchDate: FETCH_DATE,
273
+ snapshotIds: [SNAPSHOT_ID],
274
+ })));
275
+
276
+ (mongoDocument = await collection.findOne({
277
+ serviceId: SERVICE_PROVIDER_ID,
278
+ termsType: TERMS_TYPE,
279
+ }));
280
+ });
281
+
282
+ after(async () => subject.removeAll());
283
+
284
+ it('stores snapshot ID', () => {
285
+ const snapshotIds = mongoDocument.snapshotIds.map(id => id.toString());
286
+
287
+ expect(snapshotIds).to.include(SNAPSHOT_ID);
288
+ });
212
289
 
213
- it('does not save the record', () => {
214
- expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore);
290
+ it('stores the service ID', () => {
291
+ expect(mongoDocument.serviceId).to.include(SERVICE_PROVIDER_ID);
292
+ });
293
+
294
+ it('stores the terms type', () => {
295
+ expect(mongoDocument.termsType).to.include(TERMS_TYPE);
296
+ });
215
297
  });
216
298
 
217
- it('returns no id', () => {
218
- expect(record.id).to.equal(undefined);
299
+ context('when there are many snapshots IDs specified', () => {
300
+ const SNAPSHOT_ID_1 = '61af86dc5ff5caa74ae926ad';
301
+ const SNAPSHOT_ID_2 = '630cdfa67d2e3cc51f6e284c';
302
+
303
+ before(async () => {
304
+ (record = await subject.save(new Version({
305
+ serviceId: SERVICE_PROVIDER_ID,
306
+ termsType: TERMS_TYPE,
307
+ content: CONTENT,
308
+ fetchDate: FETCH_DATE,
309
+ snapshotIds: [ SNAPSHOT_ID_1, SNAPSHOT_ID_2 ],
310
+ })));
311
+
312
+ (mongoDocument = await collection.findOne({
313
+ serviceId: SERVICE_PROVIDER_ID,
314
+ termsType: TERMS_TYPE,
315
+ }));
316
+ });
317
+
318
+ after(async () => subject.removeAll());
319
+
320
+ it('stores snapshots IDs', () => {
321
+ const snapshotIds = mongoDocument.snapshotIds.map(id => id.toString());
322
+
323
+ expect(snapshotIds).to.include(SNAPSHOT_ID_1);
324
+ expect(snapshotIds).to.include(SNAPSHOT_ID_2);
325
+ });
326
+
327
+ it('stores the service ID', () => {
328
+ expect(mongoDocument.serviceId).to.include(SERVICE_PROVIDER_ID);
329
+ });
330
+
331
+ it('stores the terms type', () => {
332
+ expect(mongoDocument.termsType).to.include(TERMS_TYPE);
333
+ });
219
334
  });
220
335
  });
221
336
 
222
- context('when it is a refilter', () => {
223
- const REFILTERED_CONTENT = `${CONTENT} refiltered`;
337
+ describe('#findById', () => {
338
+ let record;
339
+ let id;
224
340
 
225
341
  before(async () => {
226
- await subject.save(new Record({
342
+ ({ id } = await subject.save(new Version({
227
343
  serviceId: SERVICE_PROVIDER_ID,
228
- documentType: DOCUMENT_TYPE,
344
+ termsType: TERMS_TYPE,
229
345
  content: CONTENT,
230
- mimeType: MIME_TYPE,
231
- fetchDate: FETCH_DATE_EARLIER,
232
- })); // A refilter cannot be the first record
233
-
234
- numberOfRecordsBefore = await collection.find({
235
- serviceId: SERVICE_PROVIDER_ID,
236
- documentType: DOCUMENT_TYPE,
237
- }).count();
238
-
239
- (record = await subject.save(new Record({
240
- serviceId: SERVICE_PROVIDER_ID,
241
- documentType: DOCUMENT_TYPE,
242
- content: REFILTERED_CONTENT,
243
- mimeType: MIME_TYPE,
244
346
  fetchDate: FETCH_DATE,
245
347
  snapshotIds: [SNAPSHOT_ID],
246
- isRefilter: true,
247
348
  })));
248
349
 
249
- numberOfRecordsAfter = await collection.find({
250
- serviceId: SERVICE_PROVIDER_ID,
251
- documentType: DOCUMENT_TYPE,
252
- }).count();
253
-
254
- ([mongoDocument] = await collection.find({
255
- serviceId: SERVICE_PROVIDER_ID,
256
- documentType: DOCUMENT_TYPE,
257
- }).limit(1).sort({ created_at: -1 }).toArray());
350
+ (record = await subject.findById(id));
258
351
  });
259
352
 
260
353
  after(async () => subject.removeAll());
261
354
 
262
- it('saves the record', () => {
263
- expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1);
355
+ it('returns a Version object', () => {
356
+ expect(record).to.be.an.instanceof(Version);
264
357
  });
265
358
 
266
359
  it('returns the record id', () => {
267
- expect(mongoDocument._id.toString()).to.equal(record.id);
360
+ expect(record.id).to.include(id);
361
+ });
362
+
363
+ it('states that it is the first record', () => {
364
+ expect(record.isFirstRecord).to.be.true;
365
+ });
366
+
367
+ it('returns the service ID', () => {
368
+ expect(record.serviceId).to.equal(SERVICE_PROVIDER_ID);
369
+ });
370
+
371
+ it('returns the terms type', () => {
372
+ expect(record.termsType).to.equal(TERMS_TYPE);
373
+ });
374
+
375
+ it('returns the content', async () => {
376
+ expect(record.content).to.equal(CONTENT);
377
+ });
378
+
379
+ it('returns the fetch date', () => {
380
+ expect(new Date(record.fetchDate).getTime()).to.equal(FETCH_DATE.getTime());
268
381
  });
269
382
 
270
- it('stores information that it is a refilter of this specific document', () => {
271
- expect(mongoDocument.isRefilter).to.be.true;
383
+ it('returns the snapshot ID', () => {
384
+ expect(record.snapshotIds).to.deep.equal([SNAPSHOT_ID]);
385
+ });
386
+
387
+ context('when requested record does not exist', () => {
388
+ it('returns null', async () => {
389
+ expect(await subject.findById('inexistantID')).to.equal(null);
390
+ });
272
391
  });
273
392
  });
274
393
 
275
- context('with PDF document', () => {
276
- before(async () => {
277
- numberOfRecordsBefore = await collection.find({
278
- serviceId: SERVICE_PROVIDER_ID,
279
- documentType: DOCUMENT_TYPE,
280
- content: PDF_CONTENT,
281
- mimeType: PDF_MIME_TYPE,
282
- }).count();
394
+ describe('#findAll', () => {
395
+ let records;
396
+ const expectedIds = [];
283
397
 
284
- (record = await subject.save(new Record({
398
+ before(async () => {
399
+ const { id: id1 } = await subject.save(new Version({
285
400
  serviceId: SERVICE_PROVIDER_ID,
286
- documentType: DOCUMENT_TYPE,
287
- content: PDF_CONTENT,
288
- mimeType: PDF_MIME_TYPE,
401
+ termsType: TERMS_TYPE,
402
+ content: CONTENT,
289
403
  fetchDate: FETCH_DATE,
290
404
  snapshotIds: [SNAPSHOT_ID],
291
- })));
405
+ }));
406
+
407
+ expectedIds.push(id1);
292
408
 
293
- numberOfRecordsAfter = await collection.find({
409
+ const { id: id2 } = await subject.save(new Version({
294
410
  serviceId: SERVICE_PROVIDER_ID,
295
- documentType: DOCUMENT_TYPE,
296
- }).count();
411
+ termsType: TERMS_TYPE,
412
+ content: `${CONTENT} - updated`,
413
+ fetchDate: FETCH_DATE_LATER,
414
+ snapshotIds: [SNAPSHOT_ID],
415
+ }));
297
416
 
298
- (mongoDocument = await collection.findOne({
417
+ expectedIds.push(id2);
418
+
419
+ const { id: id3 } = await subject.save(new Version({
299
420
  serviceId: SERVICE_PROVIDER_ID,
300
- documentType: DOCUMENT_TYPE,
421
+ termsType: TERMS_TYPE,
422
+ content: `${CONTENT} - updated 2`,
423
+ isExtractOnly: true,
424
+ fetchDate: FETCH_DATE_EARLIER,
425
+ snapshotIds: [SNAPSHOT_ID],
301
426
  }));
302
- });
303
427
 
304
- after(async () => subject.removeAll());
428
+ expectedIds.push(id3);
305
429
 
306
- it('saves the record', () => {
307
- expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1);
430
+ (records = await subject.findAll());
308
431
  });
309
432
 
310
- it('returns the record id', () => {
311
- expect(mongoDocument._id.toString()).to.equal(record.id);
312
- });
433
+ after(async () => subject.removeAll());
313
434
 
314
- it('stores the proper content', async () => {
315
- const isSameContent = Buffer.compare(mongoDocument.content.buffer, PDF_CONTENT) == 0;
435
+ it('returns all records', () => {
436
+ expect(records.length).to.equal(3);
437
+ });
316
438
 
317
- expect(isSameContent).to.be.true;
439
+ it('returns Version objects', () => {
440
+ for (const record of records) {
441
+ expect(record).to.be.an.instanceof(Version);
442
+ }
318
443
  });
319
444
 
320
- it('stores the MIME type', () => {
321
- expect(mongoDocument.mimeType).to.equal(PDF_MIME_TYPE);
445
+ it('returns records in ascending order', async () => {
446
+ expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_EARLIER, FETCH_DATE, FETCH_DATE_LATER ]);
322
447
  });
323
448
  });
324
449
 
325
- context('when there is no snapshots IDs specified', () => {
450
+ describe('#count', () => {
451
+ let count;
452
+
326
453
  before(async () => {
327
- (record = await subject.save(new Record({
454
+ await subject.save(new Version({
328
455
  serviceId: SERVICE_PROVIDER_ID,
329
- documentType: DOCUMENT_TYPE,
330
- pageId: PAGE_ID,
456
+ termsType: TERMS_TYPE,
331
457
  content: CONTENT,
332
- mimeType: MIME_TYPE,
333
458
  fetchDate: FETCH_DATE,
334
- })));
335
-
336
- (mongoDocument = await collection.findOne({
459
+ snapshotIds: [SNAPSHOT_ID],
460
+ }));
461
+ await subject.save(new Version({
337
462
  serviceId: SERVICE_PROVIDER_ID,
338
- documentType: DOCUMENT_TYPE,
463
+ termsType: TERMS_TYPE,
464
+ content: `${CONTENT} - updated`,
465
+ fetchDate: FETCH_DATE_LATER,
466
+ snapshotIds: [SNAPSHOT_ID],
339
467
  }));
468
+ await subject.save(new Version({
469
+ serviceId: SERVICE_PROVIDER_ID,
470
+ termsType: TERMS_TYPE,
471
+ content: `${CONTENT} - updated 2`,
472
+ isExtractOnly: true,
473
+ fetchDate: FETCH_DATE_EARLIER,
474
+ snapshotIds: [SNAPSHOT_ID],
475
+ }));
476
+
477
+ (count = await subject.count());
340
478
  });
341
479
 
342
480
  after(async () => subject.removeAll());
343
481
 
344
- it('does not store snapshots IDs', () => {
345
- expect(mongoDocument.snapshotIds).to.be.undefined;
482
+ it('returns the proper count', async () => {
483
+ expect(count).to.equal(3);
346
484
  });
485
+ });
347
486
 
348
- it('stores the service ID', () => {
349
- expect(mongoDocument.serviceId).to.include(SERVICE_PROVIDER_ID);
487
+ describe('#findLatest', () => {
488
+ context('when there are records for the given service', () => {
489
+ let lastSnapshotId;
490
+ let latestRecord;
491
+
492
+ context('with HTML document', () => {
493
+ const UPDATED_CONTENT = `${CONTENT} (with additional content to trigger a record)`;
494
+
495
+ before(async () => {
496
+ await subject.save(new Version({
497
+ serviceId: SERVICE_PROVIDER_ID,
498
+ termsType: TERMS_TYPE,
499
+ content: CONTENT,
500
+ fetchDate: FETCH_DATE,
501
+ snapshotIds: [SNAPSHOT_ID],
502
+ }));
503
+
504
+ ({ id: lastSnapshotId } = await subject.save(new Version({
505
+ serviceId: SERVICE_PROVIDER_ID,
506
+ termsType: TERMS_TYPE,
507
+ content: UPDATED_CONTENT,
508
+ fetchDate: FETCH_DATE_LATER,
509
+ snapshotIds: [SNAPSHOT_ID],
510
+ })));
511
+
512
+ latestRecord = await subject.findLatest(
513
+ SERVICE_PROVIDER_ID,
514
+ TERMS_TYPE,
515
+ );
516
+ });
517
+
518
+ after(async () => subject.removeAll());
519
+
520
+ it('returns a Version object', () => {
521
+ expect(latestRecord).to.be.an.instanceof(Version);
522
+ });
523
+
524
+ it('returns the latest record id', () => {
525
+ expect(latestRecord.id).to.include(lastSnapshotId);
526
+ });
527
+
528
+ it('returns the latest record content', async () => {
529
+ expect((await latestRecord.content).toString('utf8')).to.equal(UPDATED_CONTENT);
530
+ });
531
+ });
350
532
  });
351
533
 
352
- it('stores the terms type', () => {
353
- expect(mongoDocument.documentType).to.include(DOCUMENT_TYPE);
354
- });
534
+ context('when there are no records for the given service', () => {
535
+ let latestRecord;
536
+
537
+ before(async () => {
538
+ latestRecord = await subject.findLatest(SERVICE_PROVIDER_ID, TERMS_TYPE);
539
+ });
355
540
 
356
- it('stores the page ID', () => {
357
- expect(mongoDocument.pageId).to.include(PAGE_ID);
541
+ it('returns null', async () => {
542
+ expect(latestRecord).to.equal(null);
543
+ });
358
544
  });
359
545
  });
360
546
 
361
- context('when there are many snapshots IDs specified', () => {
362
- const SNAPSHOT_ID_1 = '61af86dc5ff5caa74ae926ad';
363
- const SNAPSHOT_ID_2 = '630cdfa67d2e3cc51f6e284c';
547
+ describe('#iterate', () => {
548
+ const expectedIds = [];
549
+ const ids = [];
550
+ const fetchDates = [];
364
551
 
365
552
  before(async () => {
366
- (record = await subject.save(new Record({
553
+ const { id: id1 } = await subject.save(new Version({
367
554
  serviceId: SERVICE_PROVIDER_ID,
368
- documentType: DOCUMENT_TYPE,
369
- pageId: PAGE_ID,
555
+ termsType: TERMS_TYPE,
370
556
  content: CONTENT,
371
- mimeType: MIME_TYPE,
372
557
  fetchDate: FETCH_DATE,
373
- snapshotIds: [ SNAPSHOT_ID_1, SNAPSHOT_ID_2 ],
374
- })));
558
+ snapshotIds: [SNAPSHOT_ID],
559
+ }));
560
+
561
+ expectedIds.push(id1);
375
562
 
376
- (mongoDocument = await collection.findOne({
563
+ const { id: id2 } = await subject.save(new Version({
377
564
  serviceId: SERVICE_PROVIDER_ID,
378
- documentType: DOCUMENT_TYPE,
565
+ termsType: TERMS_TYPE,
566
+ content: `${CONTENT} - updated`,
567
+ fetchDate: FETCH_DATE_LATER,
568
+ snapshotIds: [SNAPSHOT_ID],
379
569
  }));
380
- });
381
570
 
382
- after(async () => subject.removeAll());
571
+ expectedIds.push(id2);
572
+
573
+ const { id: id3 } = await subject.save(new Version({
574
+ serviceId: SERVICE_PROVIDER_ID,
575
+ termsType: TERMS_TYPE,
576
+ content: `${CONTENT} - updated 2`,
577
+ isExtractOnly: true,
578
+ fetchDate: FETCH_DATE_EARLIER,
579
+ snapshotIds: [SNAPSHOT_ID],
580
+ }));
383
581
 
384
- it('stores snapshots IDs', () => {
385
- const snapshotIds = mongoDocument.snapshotIds.map(id => id.toString());
582
+ expectedIds.push(id3);
386
583
 
387
- expect(snapshotIds).to.include(SNAPSHOT_ID_1);
388
- expect(snapshotIds).to.include(SNAPSHOT_ID_2);
584
+ for await (const record of subject.iterate()) {
585
+ ids.push(record.id);
586
+ fetchDates.push(record.fetchDate);
587
+ }
389
588
  });
390
589
 
391
- it('stores the service ID', () => {
392
- expect(mongoDocument.serviceId).to.include(SERVICE_PROVIDER_ID);
393
- });
590
+ after(async () => subject.removeAll());
394
591
 
395
- it('stores the terms type', () => {
396
- expect(mongoDocument.documentType).to.include(DOCUMENT_TYPE);
592
+ it('iterates through all records', async () => {
593
+ expect(ids).to.have.members(expectedIds);
397
594
  });
398
595
 
399
- it('stores the page ID', () => {
400
- expect(mongoDocument.pageId).to.include(PAGE_ID);
596
+ it('iterates in ascending order', async () => {
597
+ expect(fetchDates).to.deep.equal([ FETCH_DATE_EARLIER, FETCH_DATE, FETCH_DATE_LATER ]);
401
598
  });
402
599
  });
403
600
  });
404
601
 
405
- describe('#findById', () => {
406
- let record;
407
- let id;
408
-
602
+ context('Snapshot', () => {
409
603
  before(async () => {
410
- ({ id } = await subject.save(new Record({
411
- serviceId: SERVICE_PROVIDER_ID,
412
- documentType: DOCUMENT_TYPE,
413
- pageId: PAGE_ID,
414
- content: CONTENT,
415
- fetchDate: FETCH_DATE,
416
- snapshotIds: [SNAPSHOT_ID],
417
- mimeType: MIME_TYPE,
418
- })));
419
-
420
- (record = await subject.findById(id));
604
+ subject = new MongoRepository(config.get('recorder.snapshots.storage.mongo'));
605
+ await subject.initialize();
606
+ await client.connect();
607
+ const db = client.db(config.get('recorder.snapshots.storage.mongo.database'));
608
+
609
+ collection = db.collection(config.get('recorder.snapshots.storage.mongo.collection'));
421
610
  });
422
611
 
423
- after(async () => subject.removeAll());
612
+ describe('#save', () => {
613
+ let record;
614
+ let mongoDocument;
615
+ let numberOfRecordsBefore;
616
+ let numberOfRecordsAfter;
424
617
 
425
- it('returns the record id', () => {
426
- expect(record.id).to.include(id);
427
- });
618
+ context('when it is the first record', () => {
619
+ before(async () => {
620
+ numberOfRecordsBefore = await collection.find({
621
+ serviceId: SERVICE_PROVIDER_ID,
622
+ termsType: TERMS_TYPE,
623
+ }).count();
428
624
 
429
- it('returns a boolean to know if it is the first record', () => {
430
- expect(record.isFirstRecord).to.be.true;
431
- });
625
+ (record = await subject.save(new Snapshot({
626
+ serviceId: SERVICE_PROVIDER_ID,
627
+ termsType: TERMS_TYPE,
628
+ documentId: DOCUMENT_ID,
629
+ content: CONTENT,
630
+ mimeType: HTML_MIME_TYPE,
631
+ fetchDate: FETCH_DATE,
632
+ })));
432
633
 
433
- it('returns the service ID', () => {
434
- expect(record.serviceId).to.equal(SERVICE_PROVIDER_ID);
435
- });
634
+ numberOfRecordsAfter = await collection.find({
635
+ serviceId: SERVICE_PROVIDER_ID,
636
+ termsType: TERMS_TYPE,
637
+ }).count();
436
638
 
437
- it('returns the terms type', () => {
438
- expect(record.documentType).to.equal(DOCUMENT_TYPE);
439
- });
639
+ (mongoDocument = await collection.findOne({
640
+ serviceId: SERVICE_PROVIDER_ID,
641
+ termsType: TERMS_TYPE,
642
+ }));
643
+ });
440
644
 
441
- it('returns the content', async () => {
442
- expect(record.content).to.equal(CONTENT);
443
- });
645
+ after(async () => subject.removeAll());
444
646
 
445
- it('returns the fetch date', () => {
446
- expect(new Date(record.fetchDate).getTime()).to.equal(FETCH_DATE.getTime());
447
- });
647
+ it('saves the record', () => {
648
+ expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1);
649
+ });
448
650
 
449
- it('returns the MIME type', () => {
450
- expect(record.mimeType).to.equal(MIME_TYPE);
451
- });
651
+ it('returns the record id', () => {
652
+ expect(mongoDocument._id.toString()).to.equal(record.id);
653
+ });
452
654
 
453
- it('returns the snapshot ID', () => {
454
- expect(record.snapshotIds).to.deep.equal([SNAPSHOT_ID]);
455
- });
655
+ it('states that it is the first record', () => {
656
+ expect(record.isFirstRecord).to.be.true;
657
+ });
456
658
 
457
- it('returns the page ID', () => {
458
- expect(record.pageId).to.equal(PAGE_ID);
459
- });
659
+ it('stores the service ID', () => {
660
+ expect(mongoDocument.serviceId).to.include(SERVICE_PROVIDER_ID);
661
+ });
460
662
 
461
- context('when requested record does not exist', () => {
462
- it('returns null', async () => {
463
- expect(await subject.findById('inexistantID')).to.equal(null);
464
- });
465
- });
466
- });
663
+ it('stores the terms type', () => {
664
+ expect(mongoDocument.termsType).to.include(TERMS_TYPE);
665
+ });
467
666
 
468
- describe('#findAll', () => {
469
- let records;
470
- const expectedIds = [];
667
+ it('stores information that it is the first record for these specific terms', () => {
668
+ expect(mongoDocument.isFirstRecord).to.be.true;
669
+ });
471
670
 
472
- before(async () => {
473
- const { id: id1 } = await subject.save(new Record({
474
- serviceId: SERVICE_PROVIDER_ID,
475
- documentType: DOCUMENT_TYPE,
476
- content: CONTENT,
477
- fetchDate: FETCH_DATE,
478
- snapshotIds: [SNAPSHOT_ID],
479
- mimeType: MIME_TYPE,
480
- }));
481
-
482
- expectedIds.push(id1);
483
-
484
- const { id: id2 } = await subject.save(new Record({
485
- serviceId: SERVICE_PROVIDER_ID,
486
- documentType: DOCUMENT_TYPE,
487
- content: `${CONTENT} - updated`,
488
- fetchDate: FETCH_DATE_LATER,
489
- snapshotIds: [SNAPSHOT_ID],
490
- mimeType: MIME_TYPE,
491
- }));
492
-
493
- expectedIds.push(id2);
494
-
495
- const { id: id3 } = await subject.save(new Record({
496
- serviceId: SERVICE_PROVIDER_ID,
497
- documentType: DOCUMENT_TYPE,
498
- content: `${CONTENT} - updated 2`,
499
- isRefilter: true,
500
- fetchDate: FETCH_DATE_EARLIER,
501
- snapshotIds: [SNAPSHOT_ID],
502
- mimeType: MIME_TYPE,
503
- }));
504
-
505
- expectedIds.push(id3);
506
-
507
- (records = await subject.findAll());
508
- });
671
+ it('stores the proper content', () => {
672
+ expect(mongoDocument.content).to.equal(CONTENT);
673
+ });
509
674
 
510
- after(async () => subject.removeAll());
675
+ it('stores the fetch date', () => {
676
+ expect(new Date(mongoDocument.fetchDate).getTime()).to.equal(FETCH_DATE.getTime());
677
+ });
511
678
 
512
- it('returns all records', () => {
513
- expect(records.length).to.equal(3);
514
- });
679
+ it('stores the MIME type', () => {
680
+ expect(mongoDocument.mimeType).to.equal(HTML_MIME_TYPE);
681
+ });
515
682
 
516
- it('returns Record objects', () => {
517
- for (const record of records) {
518
- expect(record).to.be.an.instanceof(Record);
519
- }
520
- });
683
+ it('stores the document ID', () => {
684
+ expect(mongoDocument.documentId).to.equal(DOCUMENT_ID);
685
+ });
686
+ });
521
687
 
522
- it('returns records in ascending order', async () => {
523
- expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_EARLIER, FETCH_DATE, FETCH_DATE_LATER ]);
524
- });
525
- });
688
+ context('when it is not the first record', () => {
689
+ const UPDATED_CONTENT = `${CONTENT} updated`;
526
690
 
527
- describe('#count', () => {
528
- let count;
691
+ before(async () => {
692
+ (record = await subject.save(new Snapshot({
693
+ serviceId: SERVICE_PROVIDER_ID,
694
+ termsType: TERMS_TYPE,
695
+ content: CONTENT,
696
+ mimeType: HTML_MIME_TYPE,
697
+ fetchDate: FETCH_DATE,
698
+ })));
529
699
 
530
- before(async () => {
531
- await subject.save(new Record({
532
- serviceId: SERVICE_PROVIDER_ID,
533
- documentType: DOCUMENT_TYPE,
534
- content: CONTENT,
535
- fetchDate: FETCH_DATE,
536
- snapshotIds: [SNAPSHOT_ID],
537
- mimeType: MIME_TYPE,
538
- }));
539
- await subject.save(new Record({
540
- serviceId: SERVICE_PROVIDER_ID,
541
- documentType: DOCUMENT_TYPE,
542
- content: `${CONTENT} - updated`,
543
- fetchDate: FETCH_DATE_LATER,
544
- snapshotIds: [SNAPSHOT_ID],
545
- mimeType: MIME_TYPE,
546
- }));
547
- await subject.save(new Record({
548
- serviceId: SERVICE_PROVIDER_ID,
549
- documentType: DOCUMENT_TYPE,
550
- content: `${CONTENT} - updated 2`,
551
- isRefilter: true,
552
- fetchDate: FETCH_DATE_EARLIER,
553
- snapshotIds: [SNAPSHOT_ID],
554
- mimeType: MIME_TYPE,
555
- }));
556
-
557
- (count = await subject.count());
558
- });
700
+ numberOfRecordsBefore = await collection.find({
701
+ serviceId: SERVICE_PROVIDER_ID,
702
+ termsType: TERMS_TYPE,
703
+ }).count();
559
704
 
560
- after(async () => subject.removeAll());
705
+ (record = await subject.save(new Snapshot({
706
+ serviceId: SERVICE_PROVIDER_ID,
707
+ termsType: TERMS_TYPE,
708
+ content: UPDATED_CONTENT,
709
+ mimeType: HTML_MIME_TYPE,
710
+ fetchDate: FETCH_DATE,
711
+ })));
561
712
 
562
- it('returns the proper count', async () => {
563
- expect(count).to.equal(3);
564
- });
565
- });
713
+ numberOfRecordsAfter = await collection.find({
714
+ serviceId: SERVICE_PROVIDER_ID,
715
+ termsType: TERMS_TYPE,
716
+ }).count();
566
717
 
567
- describe('#findLatest', () => {
568
- context('when there are records for the given service', () => {
569
- let lastSnapshotId;
570
- let latestRecord;
718
+ ([mongoDocument] = await collection.find({
719
+ serviceId: SERVICE_PROVIDER_ID,
720
+ termsType: TERMS_TYPE,
721
+ }).limit(1).sort({ created_at: -1 }).toArray());
722
+ });
723
+
724
+ after(async () => subject.removeAll());
725
+
726
+ it('saves the record', () => {
727
+ expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1);
728
+ });
729
+
730
+ it('returns the record id', () => {
731
+ expect(mongoDocument._id.toString()).to.equal(record.id);
732
+ });
571
733
 
572
- context('with HTML document', () => {
573
- const UPDATED_CONTENT = `${CONTENT} (with additional content to trigger a record)`;
734
+ it('states that it is not the first record', () => {
735
+ expect(record.isFirstRecord).to.be.false;
736
+ });
737
+ });
574
738
 
739
+ context('when the content has not changed', () => {
575
740
  before(async () => {
576
- await subject.save(new Record({
741
+ await subject.save(new Snapshot({
577
742
  serviceId: SERVICE_PROVIDER_ID,
578
- documentType: DOCUMENT_TYPE,
743
+ termsType: TERMS_TYPE,
579
744
  content: CONTENT,
745
+ mimeType: HTML_MIME_TYPE,
580
746
  fetchDate: FETCH_DATE,
581
- mimeType: MIME_TYPE,
582
747
  }));
583
748
 
584
- ({ id: lastSnapshotId } = await subject.save(new Record({
749
+ numberOfRecordsBefore = await collection.find({
585
750
  serviceId: SERVICE_PROVIDER_ID,
586
- documentType: DOCUMENT_TYPE,
587
- content: UPDATED_CONTENT,
588
- mimeType: MIME_TYPE,
751
+ termsType: TERMS_TYPE,
752
+ }).count();
753
+
754
+ (record = await subject.save(new Snapshot({
755
+ serviceId: SERVICE_PROVIDER_ID,
756
+ termsType: TERMS_TYPE,
757
+ content: CONTENT,
758
+ mimeType: HTML_MIME_TYPE,
589
759
  fetchDate: FETCH_DATE_LATER,
590
760
  })));
591
761
 
592
- latestRecord = await subject.findLatest(
593
- SERVICE_PROVIDER_ID,
594
- DOCUMENT_TYPE,
595
- );
762
+ numberOfRecordsAfter = await collection.find({
763
+ serviceId: SERVICE_PROVIDER_ID,
764
+ termsType: TERMS_TYPE,
765
+ }).count();
596
766
  });
597
767
 
598
768
  after(async () => subject.removeAll());
599
769
 
600
- it('returns the latest record id', () => {
601
- expect(latestRecord.id).to.include(lastSnapshotId);
602
- });
603
-
604
- it('returns the latest record content', async () => {
605
- expect((await latestRecord.content).toString('utf8')).to.equal(UPDATED_CONTENT);
770
+ it('does not save the record', () => {
771
+ expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore);
606
772
  });
607
773
 
608
- it('returns the latest record mime type', () => {
609
- expect(latestRecord.mimeType).to.equal(MIME_TYPE);
774
+ it('returns no id', () => {
775
+ expect(record.id).to.equal(undefined);
610
776
  });
611
777
  });
612
778
 
613
779
  context('with PDF document', () => {
614
780
  before(async () => {
615
- await subject.save(new Record({
781
+ numberOfRecordsBefore = await collection.find({
616
782
  serviceId: SERVICE_PROVIDER_ID,
617
- documentType: DOCUMENT_TYPE,
783
+ termsType: TERMS_TYPE,
618
784
  content: PDF_CONTENT,
619
785
  mimeType: PDF_MIME_TYPE,
620
- fetchDate: FETCH_DATE,
621
- }));
786
+ }).count();
622
787
 
623
- ({ id: lastSnapshotId } = await subject.save(new Record({
788
+ (record = await subject.save(new Snapshot({
624
789
  serviceId: SERVICE_PROVIDER_ID,
625
- documentType: DOCUMENT_TYPE,
626
- content: UPDATED_PDF_CONTENT,
790
+ termsType: TERMS_TYPE,
791
+ content: PDF_CONTENT,
627
792
  mimeType: PDF_MIME_TYPE,
628
- fetchDate: FETCH_DATE_LATER,
793
+ fetchDate: FETCH_DATE,
629
794
  })));
630
795
 
631
- latestRecord = await subject.findLatest(SERVICE_PROVIDER_ID, DOCUMENT_TYPE);
796
+ numberOfRecordsAfter = await collection.find({
797
+ serviceId: SERVICE_PROVIDER_ID,
798
+ termsType: TERMS_TYPE,
799
+ }).count();
800
+
801
+ (mongoDocument = await collection.findOne({
802
+ serviceId: SERVICE_PROVIDER_ID,
803
+ termsType: TERMS_TYPE,
804
+ }));
632
805
  });
633
806
 
634
807
  after(async () => subject.removeAll());
635
808
 
636
- it('returns the latest record id', () => {
637
- expect(latestRecord.id).to.include(lastSnapshotId);
809
+ it('saves the record', () => {
810
+ expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1);
811
+ });
812
+
813
+ it('returns the record id', () => {
814
+ expect(mongoDocument._id.toString()).to.equal(record.id);
638
815
  });
639
816
 
640
- it('returns the latest record content', async () => {
641
- const isSameContent = Buffer.compare(latestRecord.content, UPDATED_PDF_CONTENT) == 0;
817
+ it('stores the proper content', async () => {
818
+ const isSameContent = Buffer.compare(mongoDocument.content.buffer, PDF_CONTENT) == 0;
642
819
 
643
820
  expect(isSameContent).to.be.true;
644
821
  });
645
822
 
646
- it('returns the latest record mime type', () => {
647
- expect(latestRecord.mimeType).to.equal(PDF_MIME_TYPE);
823
+ it('stores the MIME type', () => {
824
+ expect(mongoDocument.mimeType).to.equal(PDF_MIME_TYPE);
648
825
  });
649
826
  });
650
827
  });
651
828
 
652
- context('when there are no records for the given service', () => {
653
- let latestRecord;
829
+ describe('#findById', () => {
830
+ let record;
831
+ let id;
654
832
 
655
833
  before(async () => {
656
- latestRecord = await subject.findLatest(SERVICE_PROVIDER_ID, DOCUMENT_TYPE);
834
+ ({ id } = await subject.save(new Snapshot({
835
+ serviceId: SERVICE_PROVIDER_ID,
836
+ termsType: TERMS_TYPE,
837
+ documentId: DOCUMENT_ID,
838
+ content: CONTENT,
839
+ fetchDate: FETCH_DATE,
840
+ mimeType: HTML_MIME_TYPE,
841
+ })));
842
+
843
+ (record = await subject.findById(id));
844
+ });
845
+
846
+ after(async () => subject.removeAll());
847
+
848
+ it('returns a Snapshot object', () => {
849
+ expect(record).to.be.an.instanceof(Snapshot);
850
+ });
851
+
852
+ it('returns the record id', () => {
853
+ expect(record.id).to.include(id);
854
+ });
855
+
856
+ it('states that it is the first record', () => {
857
+ expect(record.isFirstRecord).to.be.true;
858
+ });
859
+
860
+ it('returns the service ID', () => {
861
+ expect(record.serviceId).to.equal(SERVICE_PROVIDER_ID);
862
+ });
863
+
864
+ it('returns the terms type', () => {
865
+ expect(record.termsType).to.equal(TERMS_TYPE);
866
+ });
867
+
868
+ it('returns the content', async () => {
869
+ expect(record.content).to.equal(CONTENT);
870
+ });
871
+
872
+ it('returns the fetch date', () => {
873
+ expect(new Date(record.fetchDate).getTime()).to.equal(FETCH_DATE.getTime());
874
+ });
875
+
876
+ it('returns the MIME type', () => {
877
+ expect(record.mimeType).to.equal(HTML_MIME_TYPE);
878
+ });
879
+
880
+ it('returns the document ID', () => {
881
+ expect(record.documentId).to.equal(DOCUMENT_ID);
657
882
  });
658
883
 
659
- it('returns null', async () => {
660
- expect(latestRecord).to.equal(null);
884
+ context('when requested record does not exist', () => {
885
+ it('returns null', async () => {
886
+ expect(await subject.findById('inexistantID')).to.equal(null);
887
+ });
661
888
  });
662
889
  });
663
- });
664
890
 
665
- describe('#iterate', () => {
666
- const expectedIds = [];
667
- const ids = [];
668
- const fetchDates = [];
891
+ describe('#findAll', () => {
892
+ let records;
893
+ const expectedIds = [];
669
894
 
670
- before(async () => {
671
- const { id: id1 } = await subject.save(new Record({
672
- serviceId: SERVICE_PROVIDER_ID,
673
- documentType: DOCUMENT_TYPE,
674
- content: CONTENT,
675
- fetchDate: FETCH_DATE,
676
- snapshotIds: [SNAPSHOT_ID],
677
- mimeType: MIME_TYPE,
678
- }));
679
-
680
- expectedIds.push(id1);
681
-
682
- const { id: id2 } = await subject.save(new Record({
683
- serviceId: SERVICE_PROVIDER_ID,
684
- documentType: DOCUMENT_TYPE,
685
- content: `${CONTENT} - updated`,
686
- fetchDate: FETCH_DATE_LATER,
687
- snapshotIds: [SNAPSHOT_ID],
688
- mimeType: MIME_TYPE,
689
- }));
690
-
691
- expectedIds.push(id2);
692
-
693
- const { id: id3 } = await subject.save(new Record({
694
- serviceId: SERVICE_PROVIDER_ID,
695
- documentType: DOCUMENT_TYPE,
696
- content: `${CONTENT} - updated 2`,
697
- isRefilter: true,
698
- fetchDate: FETCH_DATE_EARLIER,
699
- snapshotIds: [SNAPSHOT_ID],
700
- mimeType: MIME_TYPE,
701
- }));
702
-
703
- expectedIds.push(id3);
704
-
705
- for await (const record of subject.iterate()) {
706
- ids.push(record.id);
707
- fetchDates.push(record.fetchDate);
708
- }
895
+ before(async () => {
896
+ const { id: id1 } = await subject.save(new Snapshot({
897
+ serviceId: SERVICE_PROVIDER_ID,
898
+ termsType: TERMS_TYPE,
899
+ content: CONTENT,
900
+ fetchDate: FETCH_DATE,
901
+ mimeType: HTML_MIME_TYPE,
902
+ }));
903
+
904
+ expectedIds.push(id1);
905
+
906
+ const { id: id2 } = await subject.save(new Snapshot({
907
+ serviceId: SERVICE_PROVIDER_ID,
908
+ termsType: TERMS_TYPE,
909
+ content: `${CONTENT} - updated`,
910
+ fetchDate: FETCH_DATE_LATER,
911
+ mimeType: HTML_MIME_TYPE,
912
+ }));
913
+
914
+ expectedIds.push(id2);
915
+
916
+ const { id: id3 } = await subject.save(new Snapshot({
917
+ serviceId: SERVICE_PROVIDER_ID,
918
+ termsType: TERMS_TYPE,
919
+ content: `${CONTENT} - updated 2`,
920
+ isExtractOnly: true,
921
+ fetchDate: FETCH_DATE_EARLIER,
922
+ mimeType: HTML_MIME_TYPE,
923
+ }));
924
+
925
+ expectedIds.push(id3);
926
+
927
+ (records = await subject.findAll());
928
+ });
929
+
930
+ after(async () => subject.removeAll());
931
+
932
+ it('returns all records', () => {
933
+ expect(records.length).to.equal(3);
934
+ });
935
+
936
+ it('returns Snapshot objects', () => {
937
+ for (const record of records) {
938
+ expect(record).to.be.an.instanceof(Snapshot);
939
+ }
940
+ });
941
+
942
+ it('returns records in ascending order', async () => {
943
+ expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_EARLIER, FETCH_DATE, FETCH_DATE_LATER ]);
944
+ });
945
+ });
946
+
947
+ describe('#count', () => {
948
+ let count;
949
+
950
+ before(async () => {
951
+ await subject.save(new Snapshot({
952
+ serviceId: SERVICE_PROVIDER_ID,
953
+ termsType: TERMS_TYPE,
954
+ content: CONTENT,
955
+ fetchDate: FETCH_DATE,
956
+ mimeType: HTML_MIME_TYPE,
957
+ }));
958
+ await subject.save(new Snapshot({
959
+ serviceId: SERVICE_PROVIDER_ID,
960
+ termsType: TERMS_TYPE,
961
+ content: `${CONTENT} - updated`,
962
+ fetchDate: FETCH_DATE_LATER,
963
+ mimeType: HTML_MIME_TYPE,
964
+ }));
965
+ await subject.save(new Snapshot({
966
+ serviceId: SERVICE_PROVIDER_ID,
967
+ termsType: TERMS_TYPE,
968
+ content: `${CONTENT} - updated 2`,
969
+ isExtractOnly: true,
970
+ fetchDate: FETCH_DATE_EARLIER,
971
+ mimeType: HTML_MIME_TYPE,
972
+ }));
973
+
974
+ (count = await subject.count());
975
+ });
976
+
977
+ after(async () => subject.removeAll());
978
+
979
+ it('returns the proper count', async () => {
980
+ expect(count).to.equal(3);
981
+ });
709
982
  });
710
983
 
711
- after(async () => subject.removeAll());
984
+ describe('#findLatest', () => {
985
+ context('when there are records for the given service', () => {
986
+ let lastSnapshotId;
987
+ let latestRecord;
988
+
989
+ context('with HTML document', () => {
990
+ const UPDATED_CONTENT = `${CONTENT} (with additional content to trigger a record)`;
991
+
992
+ before(async () => {
993
+ await subject.save(new Snapshot({
994
+ serviceId: SERVICE_PROVIDER_ID,
995
+ termsType: TERMS_TYPE,
996
+ content: CONTENT,
997
+ fetchDate: FETCH_DATE,
998
+ mimeType: HTML_MIME_TYPE,
999
+ }));
1000
+
1001
+ ({ id: lastSnapshotId } = await subject.save(new Snapshot({
1002
+ serviceId: SERVICE_PROVIDER_ID,
1003
+ termsType: TERMS_TYPE,
1004
+ content: UPDATED_CONTENT,
1005
+ mimeType: HTML_MIME_TYPE,
1006
+ fetchDate: FETCH_DATE_LATER,
1007
+ })));
1008
+
1009
+ latestRecord = await subject.findLatest(
1010
+ SERVICE_PROVIDER_ID,
1011
+ TERMS_TYPE,
1012
+ );
1013
+ });
1014
+
1015
+ after(async () => subject.removeAll());
1016
+
1017
+ it('returns a Snapshot object', () => {
1018
+ expect(latestRecord).to.be.an.instanceof(Snapshot);
1019
+ });
1020
+
1021
+ it('returns the latest record id', () => {
1022
+ expect(latestRecord.id).to.include(lastSnapshotId);
1023
+ });
1024
+
1025
+ it('returns the latest record content', async () => {
1026
+ expect((await latestRecord.content).toString('utf8')).to.equal(UPDATED_CONTENT);
1027
+ });
1028
+
1029
+ it('returns the latest record mime type', () => {
1030
+ expect(latestRecord.mimeType).to.equal(HTML_MIME_TYPE);
1031
+ });
1032
+ });
1033
+
1034
+ context('with PDF document', () => {
1035
+ before(async () => {
1036
+ await subject.save(new Snapshot({
1037
+ serviceId: SERVICE_PROVIDER_ID,
1038
+ termsType: TERMS_TYPE,
1039
+ content: PDF_CONTENT,
1040
+ mimeType: PDF_MIME_TYPE,
1041
+ fetchDate: FETCH_DATE,
1042
+ }));
1043
+
1044
+ ({ id: lastSnapshotId } = await subject.save(new Snapshot({
1045
+ serviceId: SERVICE_PROVIDER_ID,
1046
+ termsType: TERMS_TYPE,
1047
+ content: UPDATED_PDF_CONTENT,
1048
+ mimeType: PDF_MIME_TYPE,
1049
+ fetchDate: FETCH_DATE_LATER,
1050
+ })));
1051
+
1052
+ latestRecord = await subject.findLatest(SERVICE_PROVIDER_ID, TERMS_TYPE);
1053
+ });
1054
+
1055
+ after(async () => subject.removeAll());
1056
+
1057
+ it('returns the latest record id', () => {
1058
+ expect(latestRecord.id).to.include(lastSnapshotId);
1059
+ });
1060
+
1061
+ it('returns the latest record content', async () => {
1062
+ const isSameContent = Buffer.compare(latestRecord.content, UPDATED_PDF_CONTENT) == 0;
1063
+
1064
+ expect(isSameContent).to.be.true;
1065
+ });
1066
+
1067
+ it('returns the latest record mime type', () => {
1068
+ expect(latestRecord.mimeType).to.equal(PDF_MIME_TYPE);
1069
+ });
1070
+ });
1071
+ });
1072
+
1073
+ context('when there are no records for the given service', () => {
1074
+ let latestRecord;
1075
+
1076
+ before(async () => {
1077
+ latestRecord = await subject.findLatest(SERVICE_PROVIDER_ID, TERMS_TYPE);
1078
+ });
712
1079
 
713
- it('iterates through all records', async () => {
714
- expect(ids).to.have.members(expectedIds);
1080
+ it('returns null', async () => {
1081
+ expect(latestRecord).to.equal(null);
1082
+ });
1083
+ });
715
1084
  });
716
1085
 
717
- it('iterates in ascending order', async () => {
718
- expect(fetchDates).to.deep.equal([ FETCH_DATE_EARLIER, FETCH_DATE, FETCH_DATE_LATER ]);
1086
+ describe('#iterate', () => {
1087
+ const expectedIds = [];
1088
+ const ids = [];
1089
+ const fetchDates = [];
1090
+
1091
+ before(async () => {
1092
+ const { id: id1 } = await subject.save(new Snapshot({
1093
+ serviceId: SERVICE_PROVIDER_ID,
1094
+ termsType: TERMS_TYPE,
1095
+ content: CONTENT,
1096
+ fetchDate: FETCH_DATE,
1097
+ mimeType: HTML_MIME_TYPE,
1098
+ }));
1099
+
1100
+ expectedIds.push(id1);
1101
+
1102
+ const { id: id2 } = await subject.save(new Snapshot({
1103
+ serviceId: SERVICE_PROVIDER_ID,
1104
+ termsType: TERMS_TYPE,
1105
+ content: `${CONTENT} - updated`,
1106
+ fetchDate: FETCH_DATE_LATER,
1107
+ mimeType: HTML_MIME_TYPE,
1108
+ }));
1109
+
1110
+ expectedIds.push(id2);
1111
+
1112
+ const { id: id3 } = await subject.save(new Snapshot({
1113
+ serviceId: SERVICE_PROVIDER_ID,
1114
+ termsType: TERMS_TYPE,
1115
+ content: `${CONTENT} - updated 2`,
1116
+ isExtractOnly: true,
1117
+ fetchDate: FETCH_DATE_EARLIER,
1118
+ mimeType: HTML_MIME_TYPE,
1119
+ }));
1120
+
1121
+ expectedIds.push(id3);
1122
+
1123
+ for await (const record of subject.iterate()) {
1124
+ ids.push(record.id);
1125
+ fetchDates.push(record.fetchDate);
1126
+ }
1127
+ });
1128
+
1129
+ after(async () => subject.removeAll());
1130
+
1131
+ it('iterates through all records', async () => {
1132
+ expect(ids).to.have.members(expectedIds);
1133
+ });
1134
+
1135
+ it('iterates in ascending order', async () => {
1136
+ expect(fetchDates).to.deep.equal([ FETCH_DATE_EARLIER, FETCH_DATE, FETCH_DATE_LATER ]);
1137
+ });
719
1138
  });
720
1139
  });
721
1140
  });