@opentermsarchive/engine 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. package/.env.example +3 -0
  2. package/.eslintrc.yaml +116 -0
  3. package/.github/workflows/deploy.yml +50 -0
  4. package/.github/workflows/release.yml +64 -0
  5. package/.github/workflows/test.yml +77 -0
  6. package/CHANGELOG.md +14 -0
  7. package/CODE_OF_CONDUCT.md +128 -0
  8. package/CONTRIBUTING.md +143 -0
  9. package/LICENSE +153 -0
  10. package/MIGRATING.md +42 -0
  11. package/README.fr.md +110 -0
  12. package/README.md +438 -0
  13. package/Vagrantfile +38 -0
  14. package/ansible.cfg +13 -0
  15. package/bin/.env.js +1 -0
  16. package/bin/lint-declarations.js +31 -0
  17. package/bin/track.js +26 -0
  18. package/bin/validate-declarations.js +68 -0
  19. package/config/ci.json +5 -0
  20. package/config/contrib.json +35 -0
  21. package/config/dating.json +37 -0
  22. package/config/default.json +71 -0
  23. package/config/france.json +40 -0
  24. package/config/p2b-compliance.json +40 -0
  25. package/config/pga.json +40 -0
  26. package/config/production.json +27 -0
  27. package/config/test.json +49 -0
  28. package/config/vagrant.json +24 -0
  29. package/decision-records/0001-service-name-and-id.md +73 -0
  30. package/decision-records/0002-service-history.md +212 -0
  31. package/decision-records/0003-snapshots-database.md +123 -0
  32. package/ops/README.md +280 -0
  33. package/ops/app.yml +5 -0
  34. package/ops/infra.yml +6 -0
  35. package/ops/inventories/dev.yml +7 -0
  36. package/ops/inventories/production.yml +27 -0
  37. package/ops/roles/infra/defaults/main.yml +2 -0
  38. package/ops/roles/infra/files/.gitconfig +3 -0
  39. package/ops/roles/infra/files/mongod.conf +18 -0
  40. package/ops/roles/infra/files/ota-bot-key.private_key +26 -0
  41. package/ops/roles/infra/tasks/main.yml +78 -0
  42. package/ops/roles/infra/tasks/mongo.yml +40 -0
  43. package/ops/roles/infra/templates/ssh_config.j2 +5 -0
  44. package/ops/roles/ota/defaults/main.yml +14 -0
  45. package/ops/roles/ota/files/.env +21 -0
  46. package/ops/roles/ota/tasks/database.yml +65 -0
  47. package/ops/roles/ota/tasks/main.yml +110 -0
  48. package/ops/site.yml +6 -0
  49. package/package.json +101 -0
  50. package/pm2.config.cjs +20 -0
  51. package/scripts/dataset/README.md +37 -0
  52. package/scripts/dataset/assets/LICENSE +540 -0
  53. package/scripts/dataset/assets/README.template.js +65 -0
  54. package/scripts/dataset/export/index.js +106 -0
  55. package/scripts/dataset/export/index.test.js +155 -0
  56. package/scripts/dataset/export/test/fixtures/dataset/LICENSE +540 -0
  57. package/scripts/dataset/export/test/fixtures/dataset/README.md +40 -0
  58. package/scripts/dataset/export/test/fixtures/dataset/ServiceA/Terms of Service/2021-01-01T11-27-00Z.md +1 -0
  59. package/scripts/dataset/export/test/fixtures/dataset/ServiceA/Terms of Service/2021-01-11T11-32-47Z.md +1 -0
  60. package/scripts/dataset/export/test/fixtures/dataset/ServiceB/Privacy Policy/2022-01-01T12-12-24Z.md +1 -0
  61. package/scripts/dataset/export/test/fixtures/dataset/ServiceB/Terms of Service/2022-01-06T11-32-47Z.md +1 -0
  62. package/scripts/dataset/index.js +40 -0
  63. package/scripts/dataset/logger/index.js +17 -0
  64. package/scripts/dataset/main.js +25 -0
  65. package/scripts/dataset/publish/index.js +39 -0
  66. package/scripts/declarations/lint/index.js +36 -0
  67. package/scripts/declarations/utils/index.js +81 -0
  68. package/scripts/declarations/validate/definitions.js +63 -0
  69. package/scripts/declarations/validate/index.mocha.js +262 -0
  70. package/scripts/declarations/validate/service.history.schema.js +86 -0
  71. package/scripts/declarations/validate/service.schema.js +91 -0
  72. package/scripts/history/logger/index.js +39 -0
  73. package/scripts/history/migrate-services.js +212 -0
  74. package/scripts/history/update-to-full-hash.js +61 -0
  75. package/scripts/history/utils/index.js +23 -0
  76. package/scripts/import/README.md +59 -0
  77. package/scripts/import/config/import.json +12 -0
  78. package/scripts/import/index.js +224 -0
  79. package/scripts/import/loadCommits.js +66 -0
  80. package/scripts/import/logger/index.js +43 -0
  81. package/scripts/rewrite/README.md +131 -0
  82. package/scripts/rewrite/config/rewrite-snapshots.json +32 -0
  83. package/scripts/rewrite/config/rewrite-versions.json +32 -0
  84. package/scripts/rewrite/initializer/files/license +428 -0
  85. package/scripts/rewrite/initializer/files/readme.md +8 -0
  86. package/scripts/rewrite/initializer/index.js +44 -0
  87. package/scripts/rewrite/rewrite-snapshots.js +108 -0
  88. package/scripts/rewrite/rewrite-versions.js +160 -0
  89. package/scripts/rewrite/utils.js +33 -0
  90. package/scripts/utils/renamer/README.md +49 -0
  91. package/scripts/utils/renamer/index.js +45 -0
  92. package/scripts/utils/renamer/rules/documentTypes.json +25 -0
  93. package/scripts/utils/renamer/rules/documentTypesByService.json +170 -0
  94. package/scripts/utils/renamer/rules/serviceNames.json +92 -0
  95. package/src/archivist/errors.js +9 -0
  96. package/src/archivist/fetcher/errors.js +6 -0
  97. package/src/archivist/fetcher/exports.js +18 -0
  98. package/src/archivist/fetcher/fullDomFetcher.js +84 -0
  99. package/src/archivist/fetcher/htmlOnlyFetcher.js +62 -0
  100. package/src/archivist/fetcher/index.js +35 -0
  101. package/src/archivist/fetcher/index.test.js +239 -0
  102. package/src/archivist/filter/exports.js +3 -0
  103. package/src/archivist/filter/index.js +178 -0
  104. package/src/archivist/filter/index.test.js +561 -0
  105. package/src/archivist/index.js +276 -0
  106. package/src/archivist/index.test.js +600 -0
  107. package/src/archivist/recorder/index.js +77 -0
  108. package/src/archivist/recorder/index.test.js +463 -0
  109. package/src/archivist/recorder/record.js +35 -0
  110. package/src/archivist/recorder/record.test.js +91 -0
  111. package/src/archivist/recorder/repositories/factory.js +23 -0
  112. package/src/archivist/recorder/repositories/git/dataMapper.js +83 -0
  113. package/src/archivist/recorder/repositories/git/git.js +122 -0
  114. package/src/archivist/recorder/repositories/git/git.test.js +86 -0
  115. package/src/archivist/recorder/repositories/git/index.js +182 -0
  116. package/src/archivist/recorder/repositories/git/index.test.js +714 -0
  117. package/src/archivist/recorder/repositories/interface.js +108 -0
  118. package/src/archivist/recorder/repositories/mongo/dataMapper.js +32 -0
  119. package/src/archivist/recorder/repositories/mongo/index.js +121 -0
  120. package/src/archivist/recorder/repositories/mongo/index.test.js +721 -0
  121. package/src/archivist/services/documentDeclaration.js +26 -0
  122. package/src/archivist/services/documentDeclaration.test.js +85 -0
  123. package/src/archivist/services/documentTypes.json +386 -0
  124. package/src/archivist/services/index.js +255 -0
  125. package/src/archivist/services/index.test.js +327 -0
  126. package/src/archivist/services/pageDeclaration.js +51 -0
  127. package/src/archivist/services/pageDeclaration.test.js +224 -0
  128. package/src/archivist/services/service.js +60 -0
  129. package/src/archivist/services/service.test.js +164 -0
  130. package/src/exports.js +3 -0
  131. package/src/index.js +59 -0
  132. package/src/logger/README.md +1 -0
  133. package/src/logger/index.js +131 -0
  134. package/src/main.js +18 -0
  135. package/src/notifier/README.md +1 -0
  136. package/src/notifier/index.js +150 -0
  137. package/src/tracker/README.md +1 -0
  138. package/src/tracker/index.js +215 -0
  139. package/test/fixtures/service_A.js +22 -0
  140. package/test/fixtures/service_A_terms.md +10 -0
  141. package/test/fixtures/service_A_terms_snapshot.html +14 -0
  142. package/test/fixtures/service_B.js +22 -0
  143. package/test/fixtures/service_with_declaration_history.js +65 -0
  144. package/test/fixtures/service_with_filters_history.js +155 -0
  145. package/test/fixtures/service_with_history.js +188 -0
  146. package/test/fixtures/service_with_multipage_document.js +100 -0
  147. package/test/fixtures/service_without_history.js +31 -0
  148. package/test/fixtures/services.js +19 -0
  149. package/test/fixtures/terms.pdf +0 -0
  150. package/test/fixtures/termsFromPDF.md +25 -0
  151. package/test/fixtures/termsModified.pdf +0 -0
  152. package/test/services/service_A.json +9 -0
  153. package/test/services/service_B.json +9 -0
  154. package/test/services/service_with_declaration_history.filters.js +7 -0
  155. package/test/services/service_with_declaration_history.history.json +17 -0
  156. package/test/services/service_with_declaration_history.json +13 -0
  157. package/test/services/service_with_filters_history.filters.history.js +29 -0
  158. package/test/services/service_with_filters_history.filters.js +7 -0
  159. package/test/services/service_with_filters_history.json +13 -0
  160. package/test/services/service_with_history.filters.history.js +29 -0
  161. package/test/services/service_with_history.filters.js +7 -0
  162. package/test/services/service_with_history.history.json +26 -0
  163. package/test/services/service_with_history.json +17 -0
  164. package/test/services/service_with_multipage_document.filters.js +7 -0
  165. package/test/services/service_with_multipage_document.history.json +37 -0
  166. package/test/services/service_with_multipage_document.json +28 -0
  167. package/test/services/service_without_history.filters.js +7 -0
  168. package/test/services/service_without_history.json +13 -0
@@ -0,0 +1,600 @@
1
+ import fsApi from 'fs';
2
+ import path from 'path';
3
+ import { fileURLToPath } from 'url';
4
+
5
+ import chai from 'chai';
6
+ import config from 'config';
7
+ import nock from 'nock';
8
+ import sinon from 'sinon';
9
+ import sinonChai from 'sinon-chai';
10
+
11
+ import Git from './recorder/repositories/git/git.js';
12
+
13
+ import Archivist, { AVAILABLE_EVENTS } from './index.js';
14
+
15
+ const fs = fsApi.promises;
16
+
17
+ chai.use(sinonChai);
18
+ const { expect } = chai;
19
+
20
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
21
+
22
+ const ROOT_PATH = path.resolve(__dirname, '../../');
23
+ const SNAPSHOTS_PATH = path.resolve(ROOT_PATH, config.get('recorder.snapshots.storage.git.path'));
24
+ const VERSIONS_PATH = path.resolve(ROOT_PATH, config.get('recorder.versions.storage.git.path'));
25
+
26
+ const MIME_TYPE = 'text/html';
27
+ const FETCH_DATE = new Date('2000-01-02T12:00:00.000Z');
28
+ let gitVersion;
29
+ let app;
30
+
31
+ async function resetGitRepositories() {
32
+ return Promise.all([ app.recorder.snapshotsRepository.removeAll(), app.recorder.versionsRepository.removeAll() ]);
33
+ }
34
+
35
+ describe('Archivist', function () {
36
+ this.timeout(10000);
37
+
38
+ const SERVICE_A_ID = 'service_A';
39
+ const SERVICE_A_TYPE = 'Terms of Service';
40
+ const SERVICE_A_EXPECTED_SNAPSHOT_FILE_PATH = `${SNAPSHOTS_PATH}/${SERVICE_A_ID}/${SERVICE_A_TYPE}.html`;
41
+ const SERVICE_A_EXPECTED_VERSION_FILE_PATH = `${VERSIONS_PATH}/${SERVICE_A_ID}/${SERVICE_A_TYPE}.md`;
42
+ let serviceASnapshotExpectedContent;
43
+ let serviceAVersionExpectedContent;
44
+
45
+ const SERVICE_B_ID = 'service_B';
46
+ const SERVICE_B_TYPE = 'Privacy Policy';
47
+ const SERVICE_B_EXPECTED_SNAPSHOT_FILE_PATH = `${SNAPSHOTS_PATH}/${SERVICE_B_ID}/${SERVICE_B_TYPE}.pdf`;
48
+ const SERVICE_B_EXPECTED_VERSION_FILE_PATH = `${VERSIONS_PATH}/${SERVICE_B_ID}/${SERVICE_B_TYPE}.md`;
49
+ let serviceBSnapshotExpectedContent;
50
+ let serviceBVersionExpectedContent;
51
+
52
+ const serviceIds = [ 'service_A', 'service_B' ];
53
+
54
+ before(async () => {
55
+ gitVersion = new Git({
56
+ path: VERSIONS_PATH,
57
+ author: {
58
+ name: config.get('recorder.versions.storage.git.author.name'),
59
+ email: config.get('recorder.versions.storage.git.author.email'),
60
+ },
61
+ });
62
+ await gitVersion.initialize();
63
+
64
+ serviceASnapshotExpectedContent = await fs.readFile(path.resolve(ROOT_PATH, 'test/fixtures/service_A_terms_snapshot.html'), { encoding: 'utf8' });
65
+ serviceAVersionExpectedContent = await fs.readFile(path.resolve(ROOT_PATH, 'test/fixtures/service_A_terms.md'), { encoding: 'utf8' });
66
+ serviceBSnapshotExpectedContent = await fs.readFile(path.resolve(ROOT_PATH, 'test/fixtures/terms.pdf'));
67
+ serviceBVersionExpectedContent = await fs.readFile(path.resolve(ROOT_PATH, 'test/fixtures/termsFromPDF.md'), { encoding: 'utf8' });
68
+ });
69
+
70
+ describe('#trackChanges', () => {
71
+ before(async () => {
72
+ nock('https://www.servicea.example').get('/tos').reply(200, serviceASnapshotExpectedContent, { 'Content-Type': 'text/html' });
73
+ nock('https://www.serviceb.example').get('/privacy').reply(200, serviceBSnapshotExpectedContent, { 'Content-Type': 'application/pdf' });
74
+ app = new Archivist({ recorderConfig: config.get('recorder') });
75
+ await app.initialize();
76
+ });
77
+
78
+ context('when everything works fine', () => {
79
+ before(async () => app.trackChanges(serviceIds));
80
+
81
+ after(resetGitRepositories);
82
+
83
+ it('records snapshot for service A', async () => {
84
+ const resultingSnapshotTerms = await fs.readFile(path.resolve(__dirname, SERVICE_A_EXPECTED_SNAPSHOT_FILE_PATH), { encoding: 'utf8' });
85
+
86
+ expect(resultingSnapshotTerms).to.equal(serviceASnapshotExpectedContent);
87
+ });
88
+
89
+ it('records version for service A', async () => {
90
+ const resultingTerms = await fs.readFile(path.resolve(__dirname, SERVICE_A_EXPECTED_VERSION_FILE_PATH), { encoding: 'utf8' });
91
+
92
+ expect(resultingTerms).to.equal(serviceAVersionExpectedContent);
93
+ });
94
+
95
+ it('records snapshot for service B', async () => {
96
+ const resultingSnapshotTerms = await fs.readFile(path.resolve(__dirname, SERVICE_B_EXPECTED_SNAPSHOT_FILE_PATH));
97
+
98
+ expect(resultingSnapshotTerms.equals(serviceBSnapshotExpectedContent)).to.be.true;
99
+ });
100
+
101
+ it('records version for service B', async () => {
102
+ const resultingTerms = await fs.readFile(path.resolve(__dirname, SERVICE_B_EXPECTED_VERSION_FILE_PATH), { encoding: 'utf8' });
103
+
104
+ expect(resultingTerms).to.equal(serviceBVersionExpectedContent);
105
+ });
106
+ });
107
+
108
+ context('when there is an expected error', () => {
109
+ before(async () => {
110
+ // as there is no more HTTP request mocks for service A, it should throw an `ENOTFOUND` error which is considered as an expected error in our workflow
111
+ nock.cleanAll();
112
+ nock('https://www.serviceb.example').get('/privacy').reply(200, serviceBSnapshotExpectedContent, { 'Content-Type': 'application/pdf' });
113
+ await app.trackChanges(serviceIds);
114
+ });
115
+
116
+ after(resetGitRepositories);
117
+
118
+ it('records no snapshot for service A', async () => {
119
+ expect(fsApi.existsSync(path.resolve(__dirname, SERVICE_A_EXPECTED_SNAPSHOT_FILE_PATH))).to.be.false;
120
+ });
121
+
122
+ it('records no version for service A', async () => {
123
+ expect(fsApi.existsSync(path.resolve(__dirname, SERVICE_A_EXPECTED_VERSION_FILE_PATH))).to.be.false;
124
+ });
125
+
126
+ it('still records snapshot for service B', async () => {
127
+ const resultingSnapshotTerms = await fs.readFile(path.resolve(__dirname, SERVICE_B_EXPECTED_SNAPSHOT_FILE_PATH));
128
+
129
+ expect(resultingSnapshotTerms.equals(serviceBSnapshotExpectedContent)).to.be.true;
130
+ });
131
+
132
+ it('still records version for service B', async () => {
133
+ const resultingTerms = await fs.readFile(path.resolve(__dirname, SERVICE_B_EXPECTED_VERSION_FILE_PATH), { encoding: 'utf8' });
134
+
135
+ expect(resultingTerms).to.equal(serviceBVersionExpectedContent);
136
+ });
137
+ });
138
+ });
139
+
140
+ describe('#refilterAndRecord', () => {
141
+ context('when a service’s filter declaration changes', () => {
142
+ context('when everything works fine', () => {
143
+ let originalSnapshotId;
144
+ let firstVersionId;
145
+ let refilterVersionId;
146
+ let refilterVersionMessageBody;
147
+ let serviceBCommits;
148
+
149
+ before(async () => {
150
+ nock('https://www.servicea.example').get('/tos').reply(200, serviceASnapshotExpectedContent, { 'Content-Type': 'text/html' });
151
+ nock('https://www.serviceb.example').get('/privacy').reply(200, serviceBSnapshotExpectedContent, { 'Content-Type': 'application/pdf' });
152
+ app = new Archivist({ recorderConfig: config.get('recorder') });
153
+
154
+ await app.initialize();
155
+ await app.trackChanges(serviceIds);
156
+
157
+ ({ id: originalSnapshotId } = await app.recorder.snapshotsRepository.findLatest(SERVICE_A_ID, SERVICE_A_TYPE));
158
+ ({ id: firstVersionId } = await app.recorder.versionsRepository.findLatest(SERVICE_A_ID, SERVICE_A_TYPE));
159
+
160
+ serviceBCommits = await gitVersion.log({ file: SERVICE_B_EXPECTED_VERSION_FILE_PATH });
161
+
162
+ app.serviceDeclarations[SERVICE_A_ID].getDocumentDeclaration(SERVICE_A_TYPE).pages[0].contentSelectors = 'h1';
163
+
164
+ await app.refilterAndRecord([ 'service_A', 'service_B' ]);
165
+
166
+ const [refilterVersionCommit] = await gitVersion.log({ file: SERVICE_A_EXPECTED_VERSION_FILE_PATH });
167
+
168
+ refilterVersionId = refilterVersionCommit.hash;
169
+ refilterVersionMessageBody = refilterVersionCommit.body;
170
+ });
171
+
172
+ after(resetGitRepositories);
173
+
174
+ it('refilters the changed service', async () => {
175
+ const serviceAContent = await fs.readFile(path.resolve(__dirname, SERVICE_A_EXPECTED_VERSION_FILE_PATH), { encoding: 'utf8' });
176
+
177
+ expect(serviceAContent).to.equal('Terms of service with UTF-8 \'çhãràčtęrs"\n========================================');
178
+ });
179
+
180
+ it('generates a new version id', async () => {
181
+ expect(refilterVersionId).to.not.equal(firstVersionId);
182
+ });
183
+
184
+ it('mentions the snapshot id in the changelog', async () => {
185
+ expect(refilterVersionMessageBody).to.include(originalSnapshotId);
186
+ });
187
+
188
+ it('does not change other services', async () => {
189
+ const serviceBVersion = await fs.readFile(path.resolve(__dirname, SERVICE_B_EXPECTED_VERSION_FILE_PATH), { encoding: 'utf8' });
190
+
191
+ expect(serviceBVersion).to.equal(serviceBVersionExpectedContent);
192
+ });
193
+
194
+ it('does not generate a new id for other services', async () => {
195
+ const serviceBCommitsAfterRefiltering = await gitVersion.log({ file: SERVICE_B_EXPECTED_VERSION_FILE_PATH });
196
+
197
+ expect(serviceBCommitsAfterRefiltering.map(commit => commit.hash)).to.deep.equal(serviceBCommits.map(commit => commit.hash));
198
+ });
199
+ });
200
+
201
+ context('when there is an expected error', () => {
202
+ let inaccessibleContentSpy;
203
+ let versionNotChangedSpy;
204
+
205
+ before(async () => {
206
+ nock('https://www.servicea.example').get('/tos').reply(200, serviceASnapshotExpectedContent, { 'Content-Type': 'text/html' });
207
+ nock('https://www.serviceb.example').get('/privacy').reply(200, serviceBSnapshotExpectedContent, { 'Content-Type': 'application/pdf' });
208
+ app = new Archivist({ recorderConfig: config.get('recorder') });
209
+
210
+ await app.initialize();
211
+ await app.trackChanges(serviceIds);
212
+
213
+ app.serviceDeclarations[SERVICE_A_ID].getDocumentDeclaration(SERVICE_A_TYPE).pages[0].contentSelectors = 'inexistant-selector';
214
+ inaccessibleContentSpy = sinon.spy();
215
+ versionNotChangedSpy = sinon.spy();
216
+ app.on('inaccessibleContent', inaccessibleContentSpy);
217
+ app.on('versionNotChanged', versionNotChangedSpy);
218
+ await app.refilterAndRecord(serviceIds);
219
+ });
220
+
221
+ after(resetGitRepositories);
222
+
223
+ it('emits an inaccessibleContent event when an error happens during refiltering', async () => {
224
+ expect(inaccessibleContentSpy).to.have.been.called;
225
+ });
226
+
227
+ it('still refilters other services', async () => {
228
+ expect(versionNotChangedSpy).to.have.been.calledWith(SERVICE_B_ID, SERVICE_B_TYPE);
229
+ });
230
+ });
231
+ });
232
+ });
233
+
234
+ describe('events', () => {
235
+ const spies = {};
236
+
237
+ function resetSpiesHistory() {
238
+ Object.keys(spies).forEach(spyName => spies[spyName].resetHistory());
239
+ }
240
+
241
+ function emitsOnly(eventNames) {
242
+ AVAILABLE_EVENTS.filter(el => eventNames.indexOf(el) < 0).forEach(event => {
243
+ const handlerName = `on${event[0].toUpperCase()}${event.substr(1)}`;
244
+
245
+ it(`emits no "${event}" event`, () => {
246
+ expect(spies[handlerName]).to.have.not.been.called;
247
+ });
248
+ });
249
+ }
250
+
251
+ before(async () => {
252
+ app = new Archivist({ recorderConfig: config.get('recorder') });
253
+ await app.initialize();
254
+
255
+ AVAILABLE_EVENTS.forEach(event => {
256
+ const handlerName = `on${event[0].toUpperCase()}${event.substr(1)}`;
257
+
258
+ spies[handlerName] = sinon.spy();
259
+ app.on(event, spies[handlerName]);
260
+ });
261
+ });
262
+
263
+ describe('#recordSnapshot', () => {
264
+ context('when it is the first record', () => {
265
+ before(async () => app.recordSnapshot({
266
+ content: 'document content 3',
267
+ serviceId: SERVICE_A_ID,
268
+ documentType: SERVICE_A_TYPE,
269
+ mimeType: MIME_TYPE,
270
+ fetchDate: FETCH_DATE,
271
+ }));
272
+
273
+ after(() => {
274
+ resetSpiesHistory();
275
+
276
+ return resetGitRepositories();
277
+ });
278
+
279
+ it('emits "firstSnapshotRecorded" event', async () => {
280
+ expect(spies.onFirstSnapshotRecorded).to.have.been.calledWith(SERVICE_A_ID, SERVICE_A_TYPE);
281
+ });
282
+
283
+ emitsOnly(['firstSnapshotRecorded']);
284
+ });
285
+
286
+ context('when it is not the first record', () => {
287
+ context('when there are changes', () => {
288
+ before(async () => {
289
+ await app.recordSnapshot({
290
+ content: 'document content',
291
+ serviceId: SERVICE_A_ID,
292
+ documentType: SERVICE_A_TYPE,
293
+ mimeType: MIME_TYPE,
294
+ fetchDate: FETCH_DATE,
295
+ });
296
+ resetSpiesHistory();
297
+ await app.recordSnapshot({
298
+ content: 'document content modified',
299
+ serviceId: SERVICE_A_ID,
300
+ documentType: SERVICE_A_TYPE,
301
+ mimeType: MIME_TYPE,
302
+ fetchDate: FETCH_DATE,
303
+ });
304
+ });
305
+
306
+ after(() => {
307
+ resetSpiesHistory();
308
+
309
+ return resetGitRepositories();
310
+ });
311
+
312
+ it('emits "snapshotRecorded" event', async () => {
313
+ expect(spies.onSnapshotRecorded).to.have.been.calledWith(SERVICE_A_ID, SERVICE_A_TYPE);
314
+ });
315
+
316
+ emitsOnly(['snapshotRecorded']);
317
+ });
318
+
319
+ context('when there are no changes', () => {
320
+ before(async () => {
321
+ await app.recordSnapshot({
322
+ content: 'document content',
323
+ serviceId: SERVICE_A_ID,
324
+ documentType: SERVICE_A_TYPE,
325
+ mimeType: MIME_TYPE,
326
+ fetchDate: FETCH_DATE,
327
+ });
328
+ resetSpiesHistory();
329
+ await app.recordSnapshot({
330
+ content: 'document content',
331
+ serviceId: SERVICE_A_ID,
332
+ documentType: SERVICE_A_TYPE,
333
+ mimeType: MIME_TYPE,
334
+ fetchDate: FETCH_DATE,
335
+ });
336
+ });
337
+
338
+ after(() => {
339
+ resetSpiesHistory();
340
+
341
+ return resetGitRepositories();
342
+ });
343
+
344
+ it('emits "snapshotNotChanged" event', async () => {
345
+ expect(spies.onSnapshotNotChanged).to.have.been.calledWith(SERVICE_A_ID, SERVICE_A_TYPE);
346
+ });
347
+
348
+ emitsOnly(['snapshotNotChanged']);
349
+ });
350
+ });
351
+ });
352
+
353
+ describe('#recordVersion', () => {
354
+ context('when it is the first record', () => {
355
+ before(async () =>
356
+ app.recordVersion({
357
+ content: serviceASnapshotExpectedContent,
358
+ snapshotIds: ['sha'],
359
+ mimeType: MIME_TYPE,
360
+ fetchDate: FETCH_DATE,
361
+ serviceId: SERVICE_A_ID,
362
+ documentType: SERVICE_A_TYPE,
363
+ }));
364
+
365
+ after(() => {
366
+ resetSpiesHistory();
367
+
368
+ return resetGitRepositories();
369
+ });
370
+
371
+ it('emits "firstVersionRecorded" event', async () => {
372
+ expect(spies.onFirstVersionRecorded).to.have.been.calledWith(SERVICE_A_ID, SERVICE_A_TYPE);
373
+ });
374
+
375
+ emitsOnly(['firstVersionRecorded']);
376
+ });
377
+
378
+ context('when it is not the first record', () => {
379
+ context('when there are changes', () => {
380
+ before(async () => {
381
+ await app.recordVersion({
382
+ content: serviceASnapshotExpectedContent,
383
+ mimeType: MIME_TYPE,
384
+ fetchDate: FETCH_DATE,
385
+ snapshotIds: ['sha'],
386
+ serviceId: SERVICE_A_ID,
387
+ documentType: SERVICE_A_TYPE,
388
+ });
389
+ resetSpiesHistory();
390
+ await app.recordVersion({
391
+ content: serviceBSnapshotExpectedContent,
392
+ mimeType: MIME_TYPE,
393
+ fetchDate: FETCH_DATE,
394
+ snapshotIds: ['sha'],
395
+ serviceId: SERVICE_A_ID,
396
+ documentType: SERVICE_A_TYPE,
397
+ });
398
+ });
399
+
400
+ after(() => {
401
+ resetSpiesHistory();
402
+
403
+ return resetGitRepositories();
404
+ });
405
+
406
+ it('emits "versionRecorded" event', async () => {
407
+ expect(spies.onVersionRecorded).to.have.been.calledWith(SERVICE_A_ID, SERVICE_A_TYPE);
408
+ });
409
+
410
+ emitsOnly(['versionRecorded']);
411
+ });
412
+
413
+ context('when there are no changes', () => {
414
+ before(async () => {
415
+ await app.recordVersion({
416
+ content: serviceASnapshotExpectedContent,
417
+ snapshotIds: ['sha'],
418
+ mimeType: MIME_TYPE,
419
+ fetchDate: FETCH_DATE,
420
+ serviceId: SERVICE_A_ID,
421
+ documentType: SERVICE_A_TYPE,
422
+ });
423
+ resetSpiesHistory();
424
+ await app.recordVersion({
425
+ content: serviceASnapshotExpectedContent,
426
+ snapshotIds: ['sha'],
427
+ mimeType: MIME_TYPE,
428
+ fetchDate: FETCH_DATE,
429
+ serviceId: SERVICE_A_ID,
430
+ documentType: SERVICE_A_TYPE,
431
+ });
432
+ });
433
+
434
+ after(() => {
435
+ resetSpiesHistory();
436
+
437
+ return resetGitRepositories();
438
+ });
439
+
440
+ it('emits "versionNotChanged" event', async () => {
441
+ expect(spies.onVersionNotChanged).to.have.been.calledWith(SERVICE_A_ID, SERVICE_A_TYPE);
442
+ });
443
+
444
+ emitsOnly(['versionNotChanged']);
445
+ });
446
+ });
447
+ });
448
+
449
+ context('when tracking changes on new services', () => {
450
+ before(async () => {
451
+ nock('https://www.servicea.example').get('/tos').reply(200, serviceASnapshotExpectedContent, { 'Content-Type': 'text/html' });
452
+ nock('https://www.serviceb.example').get('/privacy').reply(200, serviceBSnapshotExpectedContent, { 'Content-Type': 'application/pdf' });
453
+
454
+ return app.trackChanges(serviceIds);
455
+ });
456
+
457
+ after(() => {
458
+ resetSpiesHistory();
459
+
460
+ return resetGitRepositories();
461
+ });
462
+
463
+ it('emits "trackingStarted" event', async () => {
464
+ expect(spies.onTrackingStarted).to.have.been.calledOnce;
465
+ });
466
+
467
+ it('emits "firstSnapshotRecorded" events', async () => {
468
+ expect(spies.onFirstSnapshotRecorded).to.have.been.calledTwice;
469
+ });
470
+
471
+ it('emits "firstVersionRecorded" events', async () => {
472
+ expect(spies.onFirstVersionRecorded).to.have.been.calledTwice;
473
+ });
474
+
475
+ it('emits "firstVersionRecorded" events after "firstSnapshotRecorded" events', async () => {
476
+ expect(spies.onFirstVersionRecorded).to.have.been.calledAfter(spies.onFirstSnapshotRecorded);
477
+ });
478
+
479
+ it('emits "trackingCompleted" event', async () => {
480
+ expect(spies.onTrackingCompleted).to.have.been.calledAfter(spies.onTrackingStarted);
481
+ });
482
+
483
+ emitsOnly([
484
+ 'firstSnapshotRecorded',
485
+ 'firstVersionRecorded',
486
+ 'trackingStarted',
487
+ 'trackingCompleted',
488
+ ]);
489
+ });
490
+
491
+ context('when tracking changes on already tracked services', () => {
492
+ context('when services did not change', () => {
493
+ before(async () => {
494
+ nock('https://www.servicea.example').get('/tos').reply(200, serviceASnapshotExpectedContent, { 'Content-Type': 'text/html' });
495
+ nock('https://www.serviceb.example').get('/privacy').reply(200, serviceBSnapshotExpectedContent, { 'Content-Type': 'application/pdf' });
496
+
497
+ await app.trackChanges(serviceIds);
498
+
499
+ nock('https://www.servicea.example').get('/tos').reply(200, serviceASnapshotExpectedContent, { 'Content-Type': 'text/html' });
500
+ nock('https://www.serviceb.example').get('/privacy').reply(200, serviceBSnapshotExpectedContent, { 'Content-Type': 'application/pdf' });
501
+
502
+ resetSpiesHistory();
503
+
504
+ return app.trackChanges(serviceIds);
505
+ });
506
+
507
+ after(() => {
508
+ resetSpiesHistory();
509
+
510
+ return resetGitRepositories();
511
+ });
512
+
513
+ it('emits "trackingStarted" event', async () => {
514
+ expect(spies.onTrackingStarted).to.have.been.calledOnce;
515
+ });
516
+
517
+ it('emits "snapshotNotChanged" events', async () => {
518
+ expect(spies.onSnapshotNotChanged).to.have.been.calledTwice;
519
+ });
520
+
521
+ it('emits "versionNotChanged" events', async () => {
522
+ expect(spies.onVersionNotChanged).to.have.been.calledTwice;
523
+ });
524
+
525
+ it('emits "versionNotChanged" events after "snapshotRecorded" events', async () => {
526
+ expect(spies.onVersionNotChanged).to.have.been.calledAfter(spies.onSnapshotNotChanged);
527
+ });
528
+
529
+ it('emits "trackingCompleted" event', async () => {
530
+ expect(spies.onTrackingCompleted).to.have.been.calledAfter(spies.onTrackingStarted);
531
+ });
532
+
533
+ emitsOnly([
534
+ 'snapshotNotChanged',
535
+ 'versionNotChanged',
536
+ 'trackingStarted',
537
+ 'trackingCompleted',
538
+ ]);
539
+ });
540
+
541
+ context('when a service changed', () => {
542
+ before(async () => {
543
+ nock('https://www.servicea.example').get('/tos').reply(200, serviceASnapshotExpectedContent, { 'Content-Type': 'text/html' });
544
+ nock('https://www.serviceb.example').get('/privacy').reply(200, serviceBSnapshotExpectedContent, { 'Content-Type': 'application/pdf' });
545
+
546
+ await app.trackChanges(serviceIds);
547
+
548
+ nock('https://www.servicea.example').get('/tos').reply(200, serviceBSnapshotExpectedContent, { 'Content-Type': 'text/html' });
549
+ nock('https://www.serviceb.example').get('/privacy').reply(200, serviceBSnapshotExpectedContent, { 'Content-Type': 'application/pdf' });
550
+
551
+ resetSpiesHistory();
552
+ await app.trackChanges(serviceIds);
553
+ });
554
+
555
+ after(() => {
556
+ resetSpiesHistory();
557
+
558
+ return resetGitRepositories();
559
+ });
560
+
561
+ it('emits "trackingStarted" event', async () => {
562
+ expect(spies.onTrackingStarted).to.have.been.calledOnce;
563
+ });
564
+
565
+ it('emits "snapshotNotChanged" event for the service that was not changed', async () => {
566
+ expect(spies.onSnapshotNotChanged).to.have.been.calledOnceWith(SERVICE_B_ID, SERVICE_B_TYPE);
567
+ });
568
+
569
+ it('emits "snapshotRecorded" event for the service that was changed', async () => {
570
+ expect(spies.onSnapshotRecorded).to.have.been.calledOnceWith(SERVICE_A_ID, SERVICE_A_TYPE);
571
+ });
572
+
573
+ it('emits "versionNotChanged" events for the service that was not changed', async () => {
574
+ expect(spies.onVersionNotChanged).to.have.been.calledOnceWith(SERVICE_B_ID, SERVICE_B_TYPE);
575
+ });
576
+
577
+ it('emits "versionRecorded" event for the service that was changed', async () => {
578
+ expect(spies.onVersionRecorded).to.have.been.calledOnceWith(SERVICE_A_ID, SERVICE_A_TYPE);
579
+ });
580
+
581
+ it('emits "snapshotRecorded" events after "versionRecorded" events', async () => {
582
+ expect(spies.onVersionRecorded).to.have.been.calledAfter(spies.onSnapshotRecorded);
583
+ });
584
+
585
+ it('emits "trackingCompleted" event', async () => {
586
+ expect(spies.onTrackingCompleted).to.have.been.calledAfter(spies.onTrackingStarted);
587
+ });
588
+
589
+ emitsOnly([
590
+ 'snapshotNotChanged',
591
+ 'snapshotRecorded',
592
+ 'versionNotChanged',
593
+ 'versionRecorded',
594
+ 'trackingStarted',
595
+ 'trackingCompleted',
596
+ ]);
597
+ });
598
+ });
599
+ });
600
+ });
@@ -0,0 +1,77 @@
1
+ import mime from 'mime';
2
+
3
+ import Record from './record.js';
4
+ import RepositoryFactory from './repositories/factory.js';
5
+
6
+ export default class Recorder {
7
+ constructor(config) {
8
+ this.versionsRepository = RepositoryFactory.create(config.versions.storage);
9
+ this.snapshotsRepository = RepositoryFactory.create(config.snapshots.storage);
10
+ }
11
+
12
+ async initialize() {
13
+ return Promise.all([ this.versionsRepository.initialize(), this.snapshotsRepository.initialize() ]);
14
+ }
15
+
16
+ async finalize() {
17
+ return Promise.all([ this.versionsRepository.finalize(), this.snapshotsRepository.finalize() ]);
18
+ }
19
+
20
+ async getLatestSnapshot(serviceId, documentType, pageId) {
21
+ return this.snapshotsRepository.findLatest(serviceId, documentType, pageId);
22
+ }
23
+
24
+ async recordSnapshot({ serviceId, documentType, pageId, fetchDate, mimeType, content }) {
25
+ if (!serviceId) {
26
+ throw new Error('A service ID is required');
27
+ }
28
+
29
+ if (!documentType) {
30
+ throw new Error('A document type is required');
31
+ }
32
+
33
+ if (!fetchDate) {
34
+ throw new Error('The fetch date of the snapshot is required to ensure data consistency');
35
+ }
36
+
37
+ if (!content) {
38
+ throw new Error('A document content is required');
39
+ }
40
+
41
+ if (!mimeType) {
42
+ throw new Error('A document mime type is required to ensure data consistency');
43
+ }
44
+
45
+ return this.snapshotsRepository.save(new Record({ serviceId, documentType, pageId, fetchDate, mimeType, content }));
46
+ }
47
+
48
+ async recordVersion({ serviceId, documentType, snapshotIds, fetchDate, content, isRefilter }) {
49
+ if (!serviceId) {
50
+ throw new Error('A service ID is required');
51
+ }
52
+
53
+ if (!documentType) {
54
+ throw new Error('A document type is required');
55
+ }
56
+
57
+ if (!snapshotIds?.length) {
58
+ throw new Error(`At least one snapshot ID is required to ensure data consistency for ${serviceId}'s ${documentType}`);
59
+ }
60
+
61
+ if (!fetchDate) {
62
+ throw new Error('The fetch date of the snapshot is required to ensure data consistency');
63
+ }
64
+
65
+ if (!content) {
66
+ throw new Error('A document content is required');
67
+ }
68
+
69
+ const mimeType = mime.getType('markdown'); // A version is always in markdown format
70
+
71
+ return this.versionsRepository.save(new Record({ serviceId, documentType, snapshotIds, fetchDate, mimeType, content, isRefilter }));
72
+ }
73
+
74
+ async recordRefilter(params) {
75
+ return this.recordVersion({ isRefilter: true, ...params });
76
+ }
77
+ }