@opentermsarchive/engine 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +3 -0
- package/.eslintrc.yaml +116 -0
- package/.github/workflows/deploy.yml +50 -0
- package/.github/workflows/release.yml +64 -0
- package/.github/workflows/test.yml +77 -0
- package/CHANGELOG.md +14 -0
- package/CODE_OF_CONDUCT.md +128 -0
- package/CONTRIBUTING.md +143 -0
- package/LICENSE +153 -0
- package/MIGRATING.md +42 -0
- package/README.fr.md +110 -0
- package/README.md +438 -0
- package/Vagrantfile +38 -0
- package/ansible.cfg +13 -0
- package/bin/.env.js +1 -0
- package/bin/lint-declarations.js +31 -0
- package/bin/track.js +26 -0
- package/bin/validate-declarations.js +68 -0
- package/config/ci.json +5 -0
- package/config/contrib.json +35 -0
- package/config/dating.json +37 -0
- package/config/default.json +71 -0
- package/config/france.json +40 -0
- package/config/p2b-compliance.json +40 -0
- package/config/pga.json +40 -0
- package/config/production.json +27 -0
- package/config/test.json +49 -0
- package/config/vagrant.json +24 -0
- package/decision-records/0001-service-name-and-id.md +73 -0
- package/decision-records/0002-service-history.md +212 -0
- package/decision-records/0003-snapshots-database.md +123 -0
- package/ops/README.md +280 -0
- package/ops/app.yml +5 -0
- package/ops/infra.yml +6 -0
- package/ops/inventories/dev.yml +7 -0
- package/ops/inventories/production.yml +27 -0
- package/ops/roles/infra/defaults/main.yml +2 -0
- package/ops/roles/infra/files/.gitconfig +3 -0
- package/ops/roles/infra/files/mongod.conf +18 -0
- package/ops/roles/infra/files/ota-bot-key.private_key +26 -0
- package/ops/roles/infra/tasks/main.yml +78 -0
- package/ops/roles/infra/tasks/mongo.yml +40 -0
- package/ops/roles/infra/templates/ssh_config.j2 +5 -0
- package/ops/roles/ota/defaults/main.yml +14 -0
- package/ops/roles/ota/files/.env +21 -0
- package/ops/roles/ota/tasks/database.yml +65 -0
- package/ops/roles/ota/tasks/main.yml +110 -0
- package/ops/site.yml +6 -0
- package/package.json +101 -0
- package/pm2.config.cjs +20 -0
- package/scripts/dataset/README.md +37 -0
- package/scripts/dataset/assets/LICENSE +540 -0
- package/scripts/dataset/assets/README.template.js +65 -0
- package/scripts/dataset/export/index.js +106 -0
- package/scripts/dataset/export/index.test.js +155 -0
- package/scripts/dataset/export/test/fixtures/dataset/LICENSE +540 -0
- package/scripts/dataset/export/test/fixtures/dataset/README.md +40 -0
- package/scripts/dataset/export/test/fixtures/dataset/ServiceA/Terms of Service/2021-01-01T11-27-00Z.md +1 -0
- package/scripts/dataset/export/test/fixtures/dataset/ServiceA/Terms of Service/2021-01-11T11-32-47Z.md +1 -0
- package/scripts/dataset/export/test/fixtures/dataset/ServiceB/Privacy Policy/2022-01-01T12-12-24Z.md +1 -0
- package/scripts/dataset/export/test/fixtures/dataset/ServiceB/Terms of Service/2022-01-06T11-32-47Z.md +1 -0
- package/scripts/dataset/index.js +40 -0
- package/scripts/dataset/logger/index.js +17 -0
- package/scripts/dataset/main.js +25 -0
- package/scripts/dataset/publish/index.js +39 -0
- package/scripts/declarations/lint/index.js +36 -0
- package/scripts/declarations/utils/index.js +81 -0
- package/scripts/declarations/validate/definitions.js +63 -0
- package/scripts/declarations/validate/index.mocha.js +262 -0
- package/scripts/declarations/validate/service.history.schema.js +86 -0
- package/scripts/declarations/validate/service.schema.js +91 -0
- package/scripts/history/logger/index.js +39 -0
- package/scripts/history/migrate-services.js +212 -0
- package/scripts/history/update-to-full-hash.js +61 -0
- package/scripts/history/utils/index.js +23 -0
- package/scripts/import/README.md +59 -0
- package/scripts/import/config/import.json +12 -0
- package/scripts/import/index.js +224 -0
- package/scripts/import/loadCommits.js +66 -0
- package/scripts/import/logger/index.js +43 -0
- package/scripts/rewrite/README.md +131 -0
- package/scripts/rewrite/config/rewrite-snapshots.json +32 -0
- package/scripts/rewrite/config/rewrite-versions.json +32 -0
- package/scripts/rewrite/initializer/files/license +428 -0
- package/scripts/rewrite/initializer/files/readme.md +8 -0
- package/scripts/rewrite/initializer/index.js +44 -0
- package/scripts/rewrite/rewrite-snapshots.js +108 -0
- package/scripts/rewrite/rewrite-versions.js +160 -0
- package/scripts/rewrite/utils.js +33 -0
- package/scripts/utils/renamer/README.md +49 -0
- package/scripts/utils/renamer/index.js +45 -0
- package/scripts/utils/renamer/rules/documentTypes.json +25 -0
- package/scripts/utils/renamer/rules/documentTypesByService.json +170 -0
- package/scripts/utils/renamer/rules/serviceNames.json +92 -0
- package/src/archivist/errors.js +9 -0
- package/src/archivist/fetcher/errors.js +6 -0
- package/src/archivist/fetcher/exports.js +18 -0
- package/src/archivist/fetcher/fullDomFetcher.js +84 -0
- package/src/archivist/fetcher/htmlOnlyFetcher.js +62 -0
- package/src/archivist/fetcher/index.js +35 -0
- package/src/archivist/fetcher/index.test.js +239 -0
- package/src/archivist/filter/exports.js +3 -0
- package/src/archivist/filter/index.js +178 -0
- package/src/archivist/filter/index.test.js +561 -0
- package/src/archivist/index.js +276 -0
- package/src/archivist/index.test.js +600 -0
- package/src/archivist/recorder/index.js +77 -0
- package/src/archivist/recorder/index.test.js +463 -0
- package/src/archivist/recorder/record.js +35 -0
- package/src/archivist/recorder/record.test.js +91 -0
- package/src/archivist/recorder/repositories/factory.js +23 -0
- package/src/archivist/recorder/repositories/git/dataMapper.js +83 -0
- package/src/archivist/recorder/repositories/git/git.js +122 -0
- package/src/archivist/recorder/repositories/git/git.test.js +86 -0
- package/src/archivist/recorder/repositories/git/index.js +182 -0
- package/src/archivist/recorder/repositories/git/index.test.js +714 -0
- package/src/archivist/recorder/repositories/interface.js +108 -0
- package/src/archivist/recorder/repositories/mongo/dataMapper.js +32 -0
- package/src/archivist/recorder/repositories/mongo/index.js +121 -0
- package/src/archivist/recorder/repositories/mongo/index.test.js +721 -0
- package/src/archivist/services/documentDeclaration.js +26 -0
- package/src/archivist/services/documentDeclaration.test.js +85 -0
- package/src/archivist/services/documentTypes.json +386 -0
- package/src/archivist/services/index.js +255 -0
- package/src/archivist/services/index.test.js +327 -0
- package/src/archivist/services/pageDeclaration.js +51 -0
- package/src/archivist/services/pageDeclaration.test.js +224 -0
- package/src/archivist/services/service.js +60 -0
- package/src/archivist/services/service.test.js +164 -0
- package/src/exports.js +3 -0
- package/src/index.js +59 -0
- package/src/logger/README.md +1 -0
- package/src/logger/index.js +131 -0
- package/src/main.js +18 -0
- package/src/notifier/README.md +1 -0
- package/src/notifier/index.js +150 -0
- package/src/tracker/README.md +1 -0
- package/src/tracker/index.js +215 -0
- package/test/fixtures/service_A.js +22 -0
- package/test/fixtures/service_A_terms.md +10 -0
- package/test/fixtures/service_A_terms_snapshot.html +14 -0
- package/test/fixtures/service_B.js +22 -0
- package/test/fixtures/service_with_declaration_history.js +65 -0
- package/test/fixtures/service_with_filters_history.js +155 -0
- package/test/fixtures/service_with_history.js +188 -0
- package/test/fixtures/service_with_multipage_document.js +100 -0
- package/test/fixtures/service_without_history.js +31 -0
- package/test/fixtures/services.js +19 -0
- package/test/fixtures/terms.pdf +0 -0
- package/test/fixtures/termsFromPDF.md +25 -0
- package/test/fixtures/termsModified.pdf +0 -0
- package/test/services/service_A.json +9 -0
- package/test/services/service_B.json +9 -0
- package/test/services/service_with_declaration_history.filters.js +7 -0
- package/test/services/service_with_declaration_history.history.json +17 -0
- package/test/services/service_with_declaration_history.json +13 -0
- package/test/services/service_with_filters_history.filters.history.js +29 -0
- package/test/services/service_with_filters_history.filters.js +7 -0
- package/test/services/service_with_filters_history.json +13 -0
- package/test/services/service_with_history.filters.history.js +29 -0
- package/test/services/service_with_history.filters.js +7 -0
- package/test/services/service_with_history.history.json +26 -0
- package/test/services/service_with_history.json +17 -0
- package/test/services/service_with_multipage_document.filters.js +7 -0
- package/test/services/service_with_multipage_document.history.json +37 -0
- package/test/services/service_with_multipage_document.json +28 -0
- package/test/services/service_without_history.filters.js +7 -0
- package/test/services/service_without_history.json +13 -0
|
@@ -0,0 +1,714 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import { fileURLToPath } from 'url';
|
|
4
|
+
|
|
5
|
+
import chai from 'chai';
|
|
6
|
+
import config from 'config';
|
|
7
|
+
import mime from 'mime';
|
|
8
|
+
|
|
9
|
+
import Record from '../../record.js';
|
|
10
|
+
|
|
11
|
+
import { DOCUMENT_TYPE_AND_PAGE_ID_SEPARATOR, SNAPSHOT_ID_MARKER } from './dataMapper.js';
|
|
12
|
+
import Git from './git.js';
|
|
13
|
+
|
|
14
|
+
import GitRepository from './index.js';
|
|
15
|
+
|
|
16
|
+
const { expect } = chai;
|
|
17
|
+
|
|
18
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
19
|
+
const RECORDER_PATH = path.resolve(__dirname, '../../../', config.get('recorder.versions.storage.git.path'));
|
|
20
|
+
|
|
21
|
+
const SERVICE_PROVIDER_ID = 'test_service';
|
|
22
|
+
const DOCUMENT_TYPE = 'Terms of Service';
|
|
23
|
+
const PAGE_ID = 'community-standards-hate-speech';
|
|
24
|
+
const CONTENT = 'ToS fixture data with UTF-8 çhãràčtęrs';
|
|
25
|
+
const EXPECTED_FILE_PATH = `${RECORDER_PATH}/${SERVICE_PROVIDER_ID}/${DOCUMENT_TYPE}.html`;
|
|
26
|
+
const EXPECTED_FILE_PATH_WITH_PAGE_ID = `${RECORDER_PATH}/${SERVICE_PROVIDER_ID}/${DOCUMENT_TYPE}${DOCUMENT_TYPE_AND_PAGE_ID_SEPARATOR}${PAGE_ID}.html`;
|
|
27
|
+
const EXPECTED_PDF_FILE_PATH = EXPECTED_FILE_PATH.replace('html', 'pdf');
|
|
28
|
+
const FETCH_DATE = new Date('2000-01-01T12:00:00.000Z');
|
|
29
|
+
const FETCH_DATE_LATER = new Date('2000-01-02T12:00:00.000Z');
|
|
30
|
+
const FETCH_DATE_EARLIER = new Date('2000-01-01T06:00:00.000Z');
|
|
31
|
+
const SNAPSHOT_ID = '513fadb2ae415c87747047e33287805d59e2dd55';
|
|
32
|
+
const MIME_TYPE = 'text/html';
|
|
33
|
+
const PDF_CONTENT = fs.readFileSync(path.resolve(__dirname, '../../../../../test/fixtures/terms.pdf'), { encoding: 'utf8' });
|
|
34
|
+
const PDF_MIME_TYPE = 'application/pdf';
|
|
35
|
+
|
|
36
|
+
let git;
|
|
37
|
+
|
|
38
|
+
describe('GitRepository', () => {
|
|
39
|
+
let subject;
|
|
40
|
+
|
|
41
|
+
before(async () => {
|
|
42
|
+
git = new Git({
|
|
43
|
+
path: RECORDER_PATH,
|
|
44
|
+
author: {
|
|
45
|
+
name: config.get('recorder.versions.storage.git.author.name'),
|
|
46
|
+
email: config.get('recorder.versions.storage.git.author.email'),
|
|
47
|
+
},
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
await git.initialize();
|
|
51
|
+
|
|
52
|
+
subject = new GitRepository({
|
|
53
|
+
...config.get('recorder.versions.storage.git'),
|
|
54
|
+
path: RECORDER_PATH,
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
return subject.initialize();
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
describe('#save', () => {
|
|
61
|
+
let id;
|
|
62
|
+
let commit;
|
|
63
|
+
let isFirstRecord;
|
|
64
|
+
let numberOfRecordsBefore;
|
|
65
|
+
let numberOfRecordsAfter;
|
|
66
|
+
|
|
67
|
+
context('when it is the first record', () => {
|
|
68
|
+
before(async () => {
|
|
69
|
+
numberOfRecordsBefore = (await git.log()).length;
|
|
70
|
+
|
|
71
|
+
({ id, isFirstRecord } = await subject.save(new Record({
|
|
72
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
73
|
+
documentType: DOCUMENT_TYPE,
|
|
74
|
+
pageId: PAGE_ID,
|
|
75
|
+
content: CONTENT,
|
|
76
|
+
fetchDate: FETCH_DATE,
|
|
77
|
+
snapshotIds: [SNAPSHOT_ID],
|
|
78
|
+
mimeType: MIME_TYPE,
|
|
79
|
+
})));
|
|
80
|
+
|
|
81
|
+
numberOfRecordsAfter = (await git.log()).length;
|
|
82
|
+
|
|
83
|
+
([commit] = await git.log());
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
after(async () => subject.removeAll());
|
|
87
|
+
|
|
88
|
+
it('saves the record', () => {
|
|
89
|
+
expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1);
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
it('returns the record id', () => {
|
|
93
|
+
expect(commit.hash).to.include(id);
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
it('returns a boolean to know if it is the first record', () => {
|
|
97
|
+
expect(isFirstRecord).to.be.true;
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
it('stores the service ID', () => {
|
|
101
|
+
expect(commit.message).to.include(SERVICE_PROVIDER_ID);
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
it('stores the document type', () => {
|
|
105
|
+
expect(commit.message).to.include(DOCUMENT_TYPE);
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
it('stores information that it is the first record for this specific document', () => {
|
|
109
|
+
expect(commit.message).to.include('Start tracking');
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
it('stores the proper content', () => {
|
|
113
|
+
expect(fs.readFileSync(EXPECTED_FILE_PATH_WITH_PAGE_ID, { encoding: 'utf8' })).to.equal(CONTENT);
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
context('when provided', () => {
|
|
117
|
+
it('stores the fetch date', () => {
|
|
118
|
+
expect(new Date(commit.date).getTime()).to.equal(FETCH_DATE.getTime());
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
it('stores the MIME type', () => {
|
|
122
|
+
expect(mime.getType(EXPECTED_FILE_PATH_WITH_PAGE_ID)).to.equal(MIME_TYPE);
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
it('stores the snapshot ID', () => {
|
|
126
|
+
expect(commit.body).to.include(SNAPSHOT_ID);
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
it('stores the page ID', () => {
|
|
130
|
+
expect(commit.body).to.include(PAGE_ID);
|
|
131
|
+
});
|
|
132
|
+
});
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
context('when it is not the first record', () => {
|
|
136
|
+
const UPDATED_CONTENT = `${CONTENT} updated`;
|
|
137
|
+
|
|
138
|
+
before(async () => {
|
|
139
|
+
await subject.save(new Record({
|
|
140
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
141
|
+
documentType: DOCUMENT_TYPE,
|
|
142
|
+
content: CONTENT,
|
|
143
|
+
mimeType: MIME_TYPE,
|
|
144
|
+
fetchDate: FETCH_DATE,
|
|
145
|
+
}));
|
|
146
|
+
|
|
147
|
+
numberOfRecordsBefore = (await git.log()).length;
|
|
148
|
+
|
|
149
|
+
({ id, isFirstRecord } = await subject.save(new Record({
|
|
150
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
151
|
+
documentType: DOCUMENT_TYPE,
|
|
152
|
+
content: UPDATED_CONTENT,
|
|
153
|
+
fetchDate: FETCH_DATE,
|
|
154
|
+
snapshotIds: [SNAPSHOT_ID],
|
|
155
|
+
mimeType: MIME_TYPE,
|
|
156
|
+
})));
|
|
157
|
+
|
|
158
|
+
numberOfRecordsAfter = (await git.log()).length;
|
|
159
|
+
|
|
160
|
+
([commit] = await git.log());
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
after(async () => subject.removeAll());
|
|
164
|
+
|
|
165
|
+
it('saves the record', () => {
|
|
166
|
+
expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1);
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
it('returns the record id', () => {
|
|
170
|
+
expect(commit.hash).to.include(id);
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
it('returns a boolean to know if it is the first record', () => {
|
|
174
|
+
expect(isFirstRecord).to.be.false;
|
|
175
|
+
});
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
context('when the content has not changed', () => {
|
|
179
|
+
before(async () => {
|
|
180
|
+
await subject.save(new Record({
|
|
181
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
182
|
+
documentType: DOCUMENT_TYPE,
|
|
183
|
+
content: CONTENT,
|
|
184
|
+
mimeType: MIME_TYPE,
|
|
185
|
+
fetchDate: FETCH_DATE,
|
|
186
|
+
}));
|
|
187
|
+
|
|
188
|
+
numberOfRecordsBefore = (await git.log()).length;
|
|
189
|
+
|
|
190
|
+
({ id, isFirstRecord } = await subject.save(new Record({
|
|
191
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
192
|
+
documentType: DOCUMENT_TYPE,
|
|
193
|
+
content: CONTENT,
|
|
194
|
+
mimeType: MIME_TYPE,
|
|
195
|
+
fetchDate: FETCH_DATE,
|
|
196
|
+
})));
|
|
197
|
+
|
|
198
|
+
numberOfRecordsAfter = (await git.log()).length;
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
after(async () => subject.removeAll());
|
|
202
|
+
|
|
203
|
+
it('does not save the record', () => {
|
|
204
|
+
expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore);
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
it('returns no id', () => {
|
|
208
|
+
expect(id).to.equal(undefined);
|
|
209
|
+
});
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
context('when it is a refilter', () => {
|
|
213
|
+
const REFILTERED_CONTENT = `${CONTENT} refiltered`;
|
|
214
|
+
|
|
215
|
+
before(async () => {
|
|
216
|
+
await subject.save(new Record({
|
|
217
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
218
|
+
documentType: DOCUMENT_TYPE,
|
|
219
|
+
content: CONTENT,
|
|
220
|
+
mimeType: MIME_TYPE,
|
|
221
|
+
fetchDate: FETCH_DATE_EARLIER,
|
|
222
|
+
})); // A refilter cannot be the first record
|
|
223
|
+
|
|
224
|
+
numberOfRecordsBefore = (await git.log()).length;
|
|
225
|
+
|
|
226
|
+
({ id, isFirstRecord } = await subject.save(new Record({
|
|
227
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
228
|
+
documentType: DOCUMENT_TYPE,
|
|
229
|
+
content: REFILTERED_CONTENT,
|
|
230
|
+
fetchDate: FETCH_DATE,
|
|
231
|
+
isRefilter: true,
|
|
232
|
+
snapshotIds: [SNAPSHOT_ID],
|
|
233
|
+
mimeType: MIME_TYPE,
|
|
234
|
+
})));
|
|
235
|
+
|
|
236
|
+
numberOfRecordsAfter = (await git.log()).length;
|
|
237
|
+
|
|
238
|
+
([commit] = await git.log());
|
|
239
|
+
});
|
|
240
|
+
|
|
241
|
+
after(async () => subject.removeAll());
|
|
242
|
+
|
|
243
|
+
it('saves the record', () => {
|
|
244
|
+
expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1);
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
it('returns the record id', () => {
|
|
248
|
+
expect(commit.hash).to.include(id);
|
|
249
|
+
});
|
|
250
|
+
|
|
251
|
+
it('stores information that it is a refilter of this specific document', () => {
|
|
252
|
+
expect(commit.message).to.include('Refilter');
|
|
253
|
+
});
|
|
254
|
+
});
|
|
255
|
+
|
|
256
|
+
context('with PDF document', () => {
|
|
257
|
+
before(async () => {
|
|
258
|
+
numberOfRecordsBefore = (await git.log()).length;
|
|
259
|
+
|
|
260
|
+
({ id, isFirstRecord } = await subject.save(new Record({
|
|
261
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
262
|
+
documentType: DOCUMENT_TYPE,
|
|
263
|
+
content: PDF_CONTENT,
|
|
264
|
+
fetchDate: FETCH_DATE,
|
|
265
|
+
snapshotIds: [SNAPSHOT_ID],
|
|
266
|
+
mimeType: PDF_MIME_TYPE,
|
|
267
|
+
})));
|
|
268
|
+
|
|
269
|
+
numberOfRecordsAfter = (await git.log()).length;
|
|
270
|
+
|
|
271
|
+
([commit] = await git.log());
|
|
272
|
+
});
|
|
273
|
+
|
|
274
|
+
after(async () => subject.removeAll());
|
|
275
|
+
|
|
276
|
+
it('saves the record', () => {
|
|
277
|
+
expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1);
|
|
278
|
+
});
|
|
279
|
+
|
|
280
|
+
it('returns the record id', () => {
|
|
281
|
+
expect(commit.hash).to.include(id);
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
it('stores the proper content', () => {
|
|
285
|
+
expect(fs.readFileSync(EXPECTED_PDF_FILE_PATH, { encoding: 'utf8' })).to.equal(PDF_CONTENT);
|
|
286
|
+
});
|
|
287
|
+
|
|
288
|
+
it('stores the MIME type', () => {
|
|
289
|
+
expect(mime.getType(EXPECTED_PDF_FILE_PATH)).to.equal(PDF_MIME_TYPE);
|
|
290
|
+
});
|
|
291
|
+
});
|
|
292
|
+
|
|
293
|
+
context('when there is no snapshots IDs specified', () => {
|
|
294
|
+
before(async () => {
|
|
295
|
+
({ id, isFirstRecord } = await subject.save(new Record({
|
|
296
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
297
|
+
documentType: DOCUMENT_TYPE,
|
|
298
|
+
pageId: PAGE_ID,
|
|
299
|
+
content: CONTENT,
|
|
300
|
+
fetchDate: FETCH_DATE,
|
|
301
|
+
mimeType: MIME_TYPE,
|
|
302
|
+
})));
|
|
303
|
+
|
|
304
|
+
([commit] = await git.log());
|
|
305
|
+
});
|
|
306
|
+
|
|
307
|
+
after(async () => subject.removeAll());
|
|
308
|
+
|
|
309
|
+
it('does not store snapshots IDs', () => {
|
|
310
|
+
expect(commit.body).to.be.equal(`Page ID ${PAGE_ID}\n`);
|
|
311
|
+
});
|
|
312
|
+
|
|
313
|
+
it('stores the service ID', () => {
|
|
314
|
+
expect(commit.message).to.include(SERVICE_PROVIDER_ID);
|
|
315
|
+
});
|
|
316
|
+
|
|
317
|
+
it('stores the document type', () => {
|
|
318
|
+
expect(commit.message).to.include(DOCUMENT_TYPE);
|
|
319
|
+
});
|
|
320
|
+
|
|
321
|
+
it('stores the page ID', () => {
|
|
322
|
+
expect(commit.body).to.include(PAGE_ID);
|
|
323
|
+
});
|
|
324
|
+
});
|
|
325
|
+
|
|
326
|
+
context('when one snapshot ID is specified', () => {
|
|
327
|
+
const SNAPSHOT_ID = 'c01533c0e546ef430eea84d23c1b18a2b8420dfb';
|
|
328
|
+
const snapshotIds = [SNAPSHOT_ID];
|
|
329
|
+
|
|
330
|
+
before(async () => {
|
|
331
|
+
({ id, isFirstRecord } = await subject.save(new Record({
|
|
332
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
333
|
+
documentType: DOCUMENT_TYPE,
|
|
334
|
+
pageId: PAGE_ID,
|
|
335
|
+
content: CONTENT,
|
|
336
|
+
fetchDate: FETCH_DATE,
|
|
337
|
+
mimeType: MIME_TYPE,
|
|
338
|
+
snapshotIds,
|
|
339
|
+
})));
|
|
340
|
+
|
|
341
|
+
([commit] = await git.log());
|
|
342
|
+
});
|
|
343
|
+
|
|
344
|
+
after(async () => subject.removeAll());
|
|
345
|
+
|
|
346
|
+
it('stores snapshot ID', () => {
|
|
347
|
+
expect(commit.body).to.include(config.get('recorder.versions.storage.git.snapshotIdentiferTemplate').replace(SNAPSHOT_ID_MARKER, SNAPSHOT_ID));
|
|
348
|
+
});
|
|
349
|
+
|
|
350
|
+
it('stores the service ID', () => {
|
|
351
|
+
expect(commit.message).to.include(SERVICE_PROVIDER_ID);
|
|
352
|
+
});
|
|
353
|
+
|
|
354
|
+
it('stores the document type', () => {
|
|
355
|
+
expect(commit.message).to.include(DOCUMENT_TYPE);
|
|
356
|
+
});
|
|
357
|
+
|
|
358
|
+
it('stores the page ID', () => {
|
|
359
|
+
expect(commit.body).to.include(PAGE_ID);
|
|
360
|
+
});
|
|
361
|
+
});
|
|
362
|
+
|
|
363
|
+
context('when there are many snapshots IDs specified', () => {
|
|
364
|
+
const SNAPSHOT_ID_1 = 'c01533c0e546ef430eea84d23c1b18a2b8420dfb';
|
|
365
|
+
const SNAPSHOT_ID_2 = '0fd16cca9e1a86a2267bd587107c485f06099d7d';
|
|
366
|
+
const snapshotIds = [ SNAPSHOT_ID_1, SNAPSHOT_ID_2 ];
|
|
367
|
+
|
|
368
|
+
before(async () => {
|
|
369
|
+
({ id, isFirstRecord } = await subject.save(new Record({
|
|
370
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
371
|
+
documentType: DOCUMENT_TYPE,
|
|
372
|
+
pageId: PAGE_ID,
|
|
373
|
+
content: CONTENT,
|
|
374
|
+
fetchDate: FETCH_DATE,
|
|
375
|
+
mimeType: MIME_TYPE,
|
|
376
|
+
snapshotIds,
|
|
377
|
+
})));
|
|
378
|
+
|
|
379
|
+
([commit] = await git.log());
|
|
380
|
+
});
|
|
381
|
+
|
|
382
|
+
after(async () => subject.removeAll());
|
|
383
|
+
|
|
384
|
+
it('stores snapshots IDs', () => {
|
|
385
|
+
expect(commit.body).to.include(config.get('recorder.versions.storage.git.snapshotIdentiferTemplate').replace(SNAPSHOT_ID_MARKER, SNAPSHOT_ID_1));
|
|
386
|
+
expect(commit.body).to.include(config.get('recorder.versions.storage.git.snapshotIdentiferTemplate').replace(SNAPSHOT_ID_MARKER, SNAPSHOT_ID_2));
|
|
387
|
+
});
|
|
388
|
+
|
|
389
|
+
it('stores number of pages', () => {
|
|
390
|
+
expect(commit.body).to.include(`${snapshotIds.length} pages`);
|
|
391
|
+
});
|
|
392
|
+
|
|
393
|
+
it('stores the service ID', () => {
|
|
394
|
+
expect(commit.message).to.include(SERVICE_PROVIDER_ID);
|
|
395
|
+
});
|
|
396
|
+
|
|
397
|
+
it('stores the document type', () => {
|
|
398
|
+
expect(commit.message).to.include(DOCUMENT_TYPE);
|
|
399
|
+
});
|
|
400
|
+
|
|
401
|
+
it('stores the page ID', () => {
|
|
402
|
+
expect(commit.body).to.include(PAGE_ID);
|
|
403
|
+
});
|
|
404
|
+
});
|
|
405
|
+
});
|
|
406
|
+
|
|
407
|
+
describe('#findById', () => {
|
|
408
|
+
let record;
|
|
409
|
+
let id;
|
|
410
|
+
|
|
411
|
+
before(async () => {
|
|
412
|
+
({ id } = await subject.save(new Record({
|
|
413
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
414
|
+
documentType: DOCUMENT_TYPE,
|
|
415
|
+
pageId: PAGE_ID,
|
|
416
|
+
content: CONTENT,
|
|
417
|
+
fetchDate: FETCH_DATE,
|
|
418
|
+
snapshotIds: [SNAPSHOT_ID],
|
|
419
|
+
mimeType: MIME_TYPE,
|
|
420
|
+
})));
|
|
421
|
+
|
|
422
|
+
(record = await subject.findById(id));
|
|
423
|
+
});
|
|
424
|
+
|
|
425
|
+
after(async () => subject.removeAll());
|
|
426
|
+
|
|
427
|
+
it('returns the record id', () => {
|
|
428
|
+
expect(record.id).to.include(id);
|
|
429
|
+
});
|
|
430
|
+
|
|
431
|
+
it('returns a boolean to know if it is the first record', () => {
|
|
432
|
+
expect(record.isFirstRecord).to.be.true;
|
|
433
|
+
});
|
|
434
|
+
|
|
435
|
+
it('returns the service ID', () => {
|
|
436
|
+
expect(record.serviceId).to.equal(SERVICE_PROVIDER_ID);
|
|
437
|
+
});
|
|
438
|
+
|
|
439
|
+
it('returns the document type', () => {
|
|
440
|
+
expect(record.documentType).to.equal(DOCUMENT_TYPE);
|
|
441
|
+
});
|
|
442
|
+
|
|
443
|
+
it('returns the content', async () => {
|
|
444
|
+
expect(record.content).to.equal(CONTENT);
|
|
445
|
+
});
|
|
446
|
+
|
|
447
|
+
it('returns the fetch date', () => {
|
|
448
|
+
expect(new Date(record.fetchDate).getTime()).to.equal(FETCH_DATE.getTime());
|
|
449
|
+
});
|
|
450
|
+
|
|
451
|
+
it('returns the MIME type', () => {
|
|
452
|
+
expect(record.mimeType).to.equal(MIME_TYPE);
|
|
453
|
+
});
|
|
454
|
+
|
|
455
|
+
it('returns the snapshot ID', () => {
|
|
456
|
+
expect(record.snapshotIds).to.deep.equal([SNAPSHOT_ID]);
|
|
457
|
+
});
|
|
458
|
+
|
|
459
|
+
it('returns the page ID', () => {
|
|
460
|
+
expect(record.pageId).to.equal(PAGE_ID);
|
|
461
|
+
});
|
|
462
|
+
|
|
463
|
+
context('when requested record does not exist', () => {
|
|
464
|
+
it('returns null', async () => {
|
|
465
|
+
expect(await subject.findById('inexistantID')).to.equal(null);
|
|
466
|
+
});
|
|
467
|
+
});
|
|
468
|
+
});
|
|
469
|
+
|
|
470
|
+
describe('#findAll', () => {
|
|
471
|
+
let records;
|
|
472
|
+
const expectedIds = [];
|
|
473
|
+
|
|
474
|
+
before(async function () {
|
|
475
|
+
this.timeout(5000);
|
|
476
|
+
|
|
477
|
+
const { id: id1 } = await subject.save(new Record({
|
|
478
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
479
|
+
documentType: DOCUMENT_TYPE,
|
|
480
|
+
content: CONTENT,
|
|
481
|
+
fetchDate: FETCH_DATE,
|
|
482
|
+
snapshotIds: [SNAPSHOT_ID],
|
|
483
|
+
mimeType: MIME_TYPE,
|
|
484
|
+
}));
|
|
485
|
+
|
|
486
|
+
expectedIds.push(id1);
|
|
487
|
+
|
|
488
|
+
const { id: id2 } = await subject.save(new Record({
|
|
489
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
490
|
+
documentType: DOCUMENT_TYPE,
|
|
491
|
+
content: `${CONTENT} - updated`,
|
|
492
|
+
fetchDate: FETCH_DATE_LATER,
|
|
493
|
+
snapshotIds: [SNAPSHOT_ID],
|
|
494
|
+
mimeType: MIME_TYPE,
|
|
495
|
+
}));
|
|
496
|
+
|
|
497
|
+
expectedIds.push(id2);
|
|
498
|
+
|
|
499
|
+
const { id: id3 } = await subject.save(new Record({
|
|
500
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
501
|
+
documentType: DOCUMENT_TYPE,
|
|
502
|
+
content: `${CONTENT} - updated 2`,
|
|
503
|
+
isRefilter: true,
|
|
504
|
+
fetchDate: FETCH_DATE_EARLIER,
|
|
505
|
+
snapshotIds: [SNAPSHOT_ID],
|
|
506
|
+
mimeType: MIME_TYPE,
|
|
507
|
+
}));
|
|
508
|
+
|
|
509
|
+
expectedIds.push(id3);
|
|
510
|
+
|
|
511
|
+
(records = await subject.findAll());
|
|
512
|
+
});
|
|
513
|
+
|
|
514
|
+
after(async () => subject.removeAll());
|
|
515
|
+
|
|
516
|
+
it('returns all records', () => {
|
|
517
|
+
expect(records.length).to.equal(3);
|
|
518
|
+
});
|
|
519
|
+
|
|
520
|
+
it('returns Record objects', () => {
|
|
521
|
+
for (const record of records) {
|
|
522
|
+
expect(record).to.be.an.instanceof(Record);
|
|
523
|
+
}
|
|
524
|
+
});
|
|
525
|
+
|
|
526
|
+
it('returns records in ascending order', async () => {
|
|
527
|
+
expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_EARLIER, FETCH_DATE, FETCH_DATE_LATER ]);
|
|
528
|
+
});
|
|
529
|
+
});
|
|
530
|
+
|
|
531
|
+
describe('#count', () => {
|
|
532
|
+
let count;
|
|
533
|
+
|
|
534
|
+
before(async function () {
|
|
535
|
+
this.timeout(5000);
|
|
536
|
+
|
|
537
|
+
await subject.save(new Record({
|
|
538
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
539
|
+
documentType: DOCUMENT_TYPE,
|
|
540
|
+
content: CONTENT,
|
|
541
|
+
fetchDate: FETCH_DATE,
|
|
542
|
+
snapshotIds: [SNAPSHOT_ID],
|
|
543
|
+
mimeType: MIME_TYPE,
|
|
544
|
+
}));
|
|
545
|
+
await subject.save(new Record({
|
|
546
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
547
|
+
documentType: DOCUMENT_TYPE,
|
|
548
|
+
content: `${CONTENT} - updated`,
|
|
549
|
+
fetchDate: FETCH_DATE_LATER,
|
|
550
|
+
snapshotIds: [SNAPSHOT_ID],
|
|
551
|
+
mimeType: MIME_TYPE,
|
|
552
|
+
}));
|
|
553
|
+
await subject.save(new Record({
|
|
554
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
555
|
+
documentType: DOCUMENT_TYPE,
|
|
556
|
+
content: `${CONTENT} - updated 2`,
|
|
557
|
+
isRefilter: true,
|
|
558
|
+
fetchDate: FETCH_DATE_EARLIER,
|
|
559
|
+
snapshotIds: [SNAPSHOT_ID],
|
|
560
|
+
mimeType: MIME_TYPE,
|
|
561
|
+
}));
|
|
562
|
+
|
|
563
|
+
(count = await subject.count());
|
|
564
|
+
});
|
|
565
|
+
|
|
566
|
+
after(async () => subject.removeAll());
|
|
567
|
+
|
|
568
|
+
it('returns the proper count', async () => {
|
|
569
|
+
expect(count).to.equal(3);
|
|
570
|
+
});
|
|
571
|
+
});
|
|
572
|
+
|
|
573
|
+
describe('#findLatest', () => {
|
|
574
|
+
context('when there are records for the given service', () => {
|
|
575
|
+
let lastSnapshotId;
|
|
576
|
+
let latestRecord;
|
|
577
|
+
|
|
578
|
+
context('with HTML document', () => {
|
|
579
|
+
const UPDATED_FILE_CONTENT = `${CONTENT} (with additional content to trigger a record)`;
|
|
580
|
+
|
|
581
|
+
before(async () => {
|
|
582
|
+
await subject.save(new Record({
|
|
583
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
584
|
+
documentType: DOCUMENT_TYPE,
|
|
585
|
+
content: CONTENT,
|
|
586
|
+
mimeType: MIME_TYPE,
|
|
587
|
+
fetchDate: FETCH_DATE_EARLIER,
|
|
588
|
+
}));
|
|
589
|
+
|
|
590
|
+
({ id: lastSnapshotId } = await subject.save(new Record({
|
|
591
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
592
|
+
documentType: DOCUMENT_TYPE,
|
|
593
|
+
content: UPDATED_FILE_CONTENT,
|
|
594
|
+
mimeType: MIME_TYPE,
|
|
595
|
+
fetchDate: FETCH_DATE,
|
|
596
|
+
})));
|
|
597
|
+
|
|
598
|
+
latestRecord = await subject.findLatest(SERVICE_PROVIDER_ID, DOCUMENT_TYPE);
|
|
599
|
+
});
|
|
600
|
+
|
|
601
|
+
after(async () => subject.removeAll());
|
|
602
|
+
|
|
603
|
+
it('returns the latest record id', () => {
|
|
604
|
+
expect(latestRecord.id).to.include(lastSnapshotId);
|
|
605
|
+
});
|
|
606
|
+
|
|
607
|
+
it('returns the latest record content', async () => {
|
|
608
|
+
expect(latestRecord.content.toString('utf8')).to.equal(UPDATED_FILE_CONTENT);
|
|
609
|
+
});
|
|
610
|
+
|
|
611
|
+
it('returns the latest record mime type', () => {
|
|
612
|
+
expect(latestRecord.mimeType).to.equal(MIME_TYPE);
|
|
613
|
+
});
|
|
614
|
+
});
|
|
615
|
+
|
|
616
|
+
context('with PDF document', () => {
|
|
617
|
+
before(async () => {
|
|
618
|
+
({ id: lastSnapshotId } = await subject.save(new Record({
|
|
619
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
620
|
+
documentType: DOCUMENT_TYPE,
|
|
621
|
+
content: PDF_CONTENT,
|
|
622
|
+
mimeType: PDF_MIME_TYPE,
|
|
623
|
+
fetchDate: FETCH_DATE,
|
|
624
|
+
})));
|
|
625
|
+
|
|
626
|
+
latestRecord = await subject.findLatest(SERVICE_PROVIDER_ID, DOCUMENT_TYPE);
|
|
627
|
+
});
|
|
628
|
+
|
|
629
|
+
after(async () => subject.removeAll());
|
|
630
|
+
|
|
631
|
+
it('returns the latest record id', () => {
|
|
632
|
+
expect(latestRecord.id).to.include(lastSnapshotId);
|
|
633
|
+
});
|
|
634
|
+
|
|
635
|
+
it('returns the latest record content', async () => {
|
|
636
|
+
expect(latestRecord.content.toString('utf8')).to.equal(PDF_CONTENT);
|
|
637
|
+
});
|
|
638
|
+
|
|
639
|
+
it('returns the latest record mime type', () => {
|
|
640
|
+
expect(latestRecord.mimeType).to.equal(PDF_MIME_TYPE);
|
|
641
|
+
});
|
|
642
|
+
});
|
|
643
|
+
});
|
|
644
|
+
|
|
645
|
+
context('when there are no records for the given service', () => {
|
|
646
|
+
let latestRecord;
|
|
647
|
+
|
|
648
|
+
before(async () => {
|
|
649
|
+
latestRecord = await subject.findLatest(SERVICE_PROVIDER_ID, DOCUMENT_TYPE);
|
|
650
|
+
});
|
|
651
|
+
|
|
652
|
+
it('returns null', async () => {
|
|
653
|
+
expect(latestRecord).to.equal(null);
|
|
654
|
+
});
|
|
655
|
+
});
|
|
656
|
+
});
|
|
657
|
+
|
|
658
|
+
describe('#iterate', () => {
|
|
659
|
+
const expectedIds = [];
|
|
660
|
+
const ids = [];
|
|
661
|
+
const fetchDates = [];
|
|
662
|
+
|
|
663
|
+
before(async () => {
|
|
664
|
+
const { id: id1 } = await subject.save(new Record({
|
|
665
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
666
|
+
documentType: DOCUMENT_TYPE,
|
|
667
|
+
content: CONTENT,
|
|
668
|
+
fetchDate: FETCH_DATE,
|
|
669
|
+
snapshotIds: [SNAPSHOT_ID],
|
|
670
|
+
mimeType: MIME_TYPE,
|
|
671
|
+
}));
|
|
672
|
+
|
|
673
|
+
expectedIds.push(id1);
|
|
674
|
+
|
|
675
|
+
const { id: id2 } = await subject.save(new Record({
|
|
676
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
677
|
+
documentType: DOCUMENT_TYPE,
|
|
678
|
+
content: `${CONTENT} - updated`,
|
|
679
|
+
fetchDate: FETCH_DATE_LATER,
|
|
680
|
+
snapshotIds: [SNAPSHOT_ID],
|
|
681
|
+
mimeType: MIME_TYPE,
|
|
682
|
+
}));
|
|
683
|
+
|
|
684
|
+
expectedIds.push(id2);
|
|
685
|
+
|
|
686
|
+
const { id: id3 } = await subject.save(new Record({
|
|
687
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
688
|
+
documentType: DOCUMENT_TYPE,
|
|
689
|
+
content: `${CONTENT} - updated 2`,
|
|
690
|
+
isRefilter: true,
|
|
691
|
+
fetchDate: FETCH_DATE_EARLIER,
|
|
692
|
+
snapshotIds: [SNAPSHOT_ID],
|
|
693
|
+
mimeType: MIME_TYPE,
|
|
694
|
+
}));
|
|
695
|
+
|
|
696
|
+
expectedIds.push(id3);
|
|
697
|
+
|
|
698
|
+
for await (const record of subject.iterate()) {
|
|
699
|
+
ids.push(record.id);
|
|
700
|
+
fetchDates.push(record.fetchDate);
|
|
701
|
+
}
|
|
702
|
+
});
|
|
703
|
+
|
|
704
|
+
after(async () => subject.removeAll());
|
|
705
|
+
|
|
706
|
+
it('iterates through all records', async () => {
|
|
707
|
+
expect(ids).to.have.members(expectedIds);
|
|
708
|
+
});
|
|
709
|
+
|
|
710
|
+
it('iterates in ascending order', async () => {
|
|
711
|
+
expect(fetchDates).to.deep.equal([ FETCH_DATE_EARLIER, FETCH_DATE, FETCH_DATE_LATER ]);
|
|
712
|
+
});
|
|
713
|
+
});
|
|
714
|
+
});
|