@opentermsarchive/engine 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +3 -0
- package/.eslintrc.yaml +116 -0
- package/.github/workflows/deploy.yml +50 -0
- package/.github/workflows/release.yml +64 -0
- package/.github/workflows/test.yml +77 -0
- package/CHANGELOG.md +14 -0
- package/CODE_OF_CONDUCT.md +128 -0
- package/CONTRIBUTING.md +143 -0
- package/LICENSE +153 -0
- package/MIGRATING.md +42 -0
- package/README.fr.md +110 -0
- package/README.md +438 -0
- package/Vagrantfile +38 -0
- package/ansible.cfg +13 -0
- package/bin/.env.js +1 -0
- package/bin/lint-declarations.js +31 -0
- package/bin/track.js +26 -0
- package/bin/validate-declarations.js +68 -0
- package/config/ci.json +5 -0
- package/config/contrib.json +35 -0
- package/config/dating.json +37 -0
- package/config/default.json +71 -0
- package/config/france.json +40 -0
- package/config/p2b-compliance.json +40 -0
- package/config/pga.json +40 -0
- package/config/production.json +27 -0
- package/config/test.json +49 -0
- package/config/vagrant.json +24 -0
- package/decision-records/0001-service-name-and-id.md +73 -0
- package/decision-records/0002-service-history.md +212 -0
- package/decision-records/0003-snapshots-database.md +123 -0
- package/ops/README.md +280 -0
- package/ops/app.yml +5 -0
- package/ops/infra.yml +6 -0
- package/ops/inventories/dev.yml +7 -0
- package/ops/inventories/production.yml +27 -0
- package/ops/roles/infra/defaults/main.yml +2 -0
- package/ops/roles/infra/files/.gitconfig +3 -0
- package/ops/roles/infra/files/mongod.conf +18 -0
- package/ops/roles/infra/files/ota-bot-key.private_key +26 -0
- package/ops/roles/infra/tasks/main.yml +78 -0
- package/ops/roles/infra/tasks/mongo.yml +40 -0
- package/ops/roles/infra/templates/ssh_config.j2 +5 -0
- package/ops/roles/ota/defaults/main.yml +14 -0
- package/ops/roles/ota/files/.env +21 -0
- package/ops/roles/ota/tasks/database.yml +65 -0
- package/ops/roles/ota/tasks/main.yml +110 -0
- package/ops/site.yml +6 -0
- package/package.json +101 -0
- package/pm2.config.cjs +20 -0
- package/scripts/dataset/README.md +37 -0
- package/scripts/dataset/assets/LICENSE +540 -0
- package/scripts/dataset/assets/README.template.js +65 -0
- package/scripts/dataset/export/index.js +106 -0
- package/scripts/dataset/export/index.test.js +155 -0
- package/scripts/dataset/export/test/fixtures/dataset/LICENSE +540 -0
- package/scripts/dataset/export/test/fixtures/dataset/README.md +40 -0
- package/scripts/dataset/export/test/fixtures/dataset/ServiceA/Terms of Service/2021-01-01T11-27-00Z.md +1 -0
- package/scripts/dataset/export/test/fixtures/dataset/ServiceA/Terms of Service/2021-01-11T11-32-47Z.md +1 -0
- package/scripts/dataset/export/test/fixtures/dataset/ServiceB/Privacy Policy/2022-01-01T12-12-24Z.md +1 -0
- package/scripts/dataset/export/test/fixtures/dataset/ServiceB/Terms of Service/2022-01-06T11-32-47Z.md +1 -0
- package/scripts/dataset/index.js +40 -0
- package/scripts/dataset/logger/index.js +17 -0
- package/scripts/dataset/main.js +25 -0
- package/scripts/dataset/publish/index.js +39 -0
- package/scripts/declarations/lint/index.js +36 -0
- package/scripts/declarations/utils/index.js +81 -0
- package/scripts/declarations/validate/definitions.js +63 -0
- package/scripts/declarations/validate/index.mocha.js +262 -0
- package/scripts/declarations/validate/service.history.schema.js +86 -0
- package/scripts/declarations/validate/service.schema.js +91 -0
- package/scripts/history/logger/index.js +39 -0
- package/scripts/history/migrate-services.js +212 -0
- package/scripts/history/update-to-full-hash.js +61 -0
- package/scripts/history/utils/index.js +23 -0
- package/scripts/import/README.md +59 -0
- package/scripts/import/config/import.json +12 -0
- package/scripts/import/index.js +224 -0
- package/scripts/import/loadCommits.js +66 -0
- package/scripts/import/logger/index.js +43 -0
- package/scripts/rewrite/README.md +131 -0
- package/scripts/rewrite/config/rewrite-snapshots.json +32 -0
- package/scripts/rewrite/config/rewrite-versions.json +32 -0
- package/scripts/rewrite/initializer/files/license +428 -0
- package/scripts/rewrite/initializer/files/readme.md +8 -0
- package/scripts/rewrite/initializer/index.js +44 -0
- package/scripts/rewrite/rewrite-snapshots.js +108 -0
- package/scripts/rewrite/rewrite-versions.js +160 -0
- package/scripts/rewrite/utils.js +33 -0
- package/scripts/utils/renamer/README.md +49 -0
- package/scripts/utils/renamer/index.js +45 -0
- package/scripts/utils/renamer/rules/documentTypes.json +25 -0
- package/scripts/utils/renamer/rules/documentTypesByService.json +170 -0
- package/scripts/utils/renamer/rules/serviceNames.json +92 -0
- package/src/archivist/errors.js +9 -0
- package/src/archivist/fetcher/errors.js +6 -0
- package/src/archivist/fetcher/exports.js +18 -0
- package/src/archivist/fetcher/fullDomFetcher.js +84 -0
- package/src/archivist/fetcher/htmlOnlyFetcher.js +62 -0
- package/src/archivist/fetcher/index.js +35 -0
- package/src/archivist/fetcher/index.test.js +239 -0
- package/src/archivist/filter/exports.js +3 -0
- package/src/archivist/filter/index.js +178 -0
- package/src/archivist/filter/index.test.js +561 -0
- package/src/archivist/index.js +276 -0
- package/src/archivist/index.test.js +600 -0
- package/src/archivist/recorder/index.js +77 -0
- package/src/archivist/recorder/index.test.js +463 -0
- package/src/archivist/recorder/record.js +35 -0
- package/src/archivist/recorder/record.test.js +91 -0
- package/src/archivist/recorder/repositories/factory.js +23 -0
- package/src/archivist/recorder/repositories/git/dataMapper.js +83 -0
- package/src/archivist/recorder/repositories/git/git.js +122 -0
- package/src/archivist/recorder/repositories/git/git.test.js +86 -0
- package/src/archivist/recorder/repositories/git/index.js +182 -0
- package/src/archivist/recorder/repositories/git/index.test.js +714 -0
- package/src/archivist/recorder/repositories/interface.js +108 -0
- package/src/archivist/recorder/repositories/mongo/dataMapper.js +32 -0
- package/src/archivist/recorder/repositories/mongo/index.js +121 -0
- package/src/archivist/recorder/repositories/mongo/index.test.js +721 -0
- package/src/archivist/services/documentDeclaration.js +26 -0
- package/src/archivist/services/documentDeclaration.test.js +85 -0
- package/src/archivist/services/documentTypes.json +386 -0
- package/src/archivist/services/index.js +255 -0
- package/src/archivist/services/index.test.js +327 -0
- package/src/archivist/services/pageDeclaration.js +51 -0
- package/src/archivist/services/pageDeclaration.test.js +224 -0
- package/src/archivist/services/service.js +60 -0
- package/src/archivist/services/service.test.js +164 -0
- package/src/exports.js +3 -0
- package/src/index.js +59 -0
- package/src/logger/README.md +1 -0
- package/src/logger/index.js +131 -0
- package/src/main.js +18 -0
- package/src/notifier/README.md +1 -0
- package/src/notifier/index.js +150 -0
- package/src/tracker/README.md +1 -0
- package/src/tracker/index.js +215 -0
- package/test/fixtures/service_A.js +22 -0
- package/test/fixtures/service_A_terms.md +10 -0
- package/test/fixtures/service_A_terms_snapshot.html +14 -0
- package/test/fixtures/service_B.js +22 -0
- package/test/fixtures/service_with_declaration_history.js +65 -0
- package/test/fixtures/service_with_filters_history.js +155 -0
- package/test/fixtures/service_with_history.js +188 -0
- package/test/fixtures/service_with_multipage_document.js +100 -0
- package/test/fixtures/service_without_history.js +31 -0
- package/test/fixtures/services.js +19 -0
- package/test/fixtures/terms.pdf +0 -0
- package/test/fixtures/termsFromPDF.md +25 -0
- package/test/fixtures/termsModified.pdf +0 -0
- package/test/services/service_A.json +9 -0
- package/test/services/service_B.json +9 -0
- package/test/services/service_with_declaration_history.filters.js +7 -0
- package/test/services/service_with_declaration_history.history.json +17 -0
- package/test/services/service_with_declaration_history.json +13 -0
- package/test/services/service_with_filters_history.filters.history.js +29 -0
- package/test/services/service_with_filters_history.filters.js +7 -0
- package/test/services/service_with_filters_history.json +13 -0
- package/test/services/service_with_history.filters.history.js +29 -0
- package/test/services/service_with_history.filters.js +7 -0
- package/test/services/service_with_history.history.json +26 -0
- package/test/services/service_with_history.json +17 -0
- package/test/services/service_with_multipage_document.filters.js +7 -0
- package/test/services/service_with_multipage_document.history.json +37 -0
- package/test/services/service_with_multipage_document.json +28 -0
- package/test/services/service_without_history.filters.js +7 -0
- package/test/services/service_without_history.json +13 -0
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import fsApi from 'fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import { fileURLToPath } from 'url';
|
|
4
|
+
|
|
5
|
+
import archiver from 'archiver';
|
|
6
|
+
import config from 'config';
|
|
7
|
+
|
|
8
|
+
import RepositoryFactory from '../../../src/archivist/recorder/repositories/factory.js';
|
|
9
|
+
import * as renamer from '../../utils/renamer/index.js';
|
|
10
|
+
import readme from '../assets/README.template.js';
|
|
11
|
+
import logger from '../logger/index.js';
|
|
12
|
+
|
|
13
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
14
|
+
|
|
15
|
+
const fs = fsApi.promises;
|
|
16
|
+
|
|
17
|
+
const ARCHIVE_FORMAT = 'zip'; // for supported formats, see https://www.archiverjs.com/docs/archive-formats
|
|
18
|
+
|
|
19
|
+
export default async function generate({ archivePath, releaseDate }) {
|
|
20
|
+
const versionsRepository = await RepositoryFactory.create(config.get('recorder.versions.storage')).initialize();
|
|
21
|
+
|
|
22
|
+
const archive = await initializeArchive(archivePath);
|
|
23
|
+
|
|
24
|
+
await renamer.loadRules();
|
|
25
|
+
|
|
26
|
+
const services = new Set();
|
|
27
|
+
let firstVersionDate = new Date();
|
|
28
|
+
let lastVersionDate = new Date(0);
|
|
29
|
+
|
|
30
|
+
let index = 1;
|
|
31
|
+
|
|
32
|
+
for await (const version of versionsRepository.iterate()) {
|
|
33
|
+
const { content, fetchDate } = version;
|
|
34
|
+
const { serviceId, documentType } = renamer.applyRules(version.serviceId, version.documentType);
|
|
35
|
+
|
|
36
|
+
if (firstVersionDate > fetchDate) {
|
|
37
|
+
firstVersionDate = fetchDate;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
if (fetchDate > lastVersionDate) {
|
|
41
|
+
lastVersionDate = fetchDate;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
services.add(serviceId);
|
|
45
|
+
|
|
46
|
+
const versionPath = generateVersionPath({ serviceId, documentType, fetchDate });
|
|
47
|
+
|
|
48
|
+
logger.info({ message: versionPath, counter: index, hash: version.id });
|
|
49
|
+
|
|
50
|
+
archive.stream.append(
|
|
51
|
+
content,
|
|
52
|
+
{ name: `${archive.basename}/${versionPath}` },
|
|
53
|
+
);
|
|
54
|
+
index++;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
archive.stream.append(
|
|
58
|
+
readme({
|
|
59
|
+
servicesCount: services.size,
|
|
60
|
+
releaseDate,
|
|
61
|
+
firstVersionDate,
|
|
62
|
+
lastVersionDate,
|
|
63
|
+
}),
|
|
64
|
+
{ name: `${archive.basename}/README.md` },
|
|
65
|
+
);
|
|
66
|
+
archive.stream.append(
|
|
67
|
+
fsApi.readFileSync(path.resolve(__dirname, '../assets/LICENSE')),
|
|
68
|
+
{ name: `${archive.basename}/LICENSE` },
|
|
69
|
+
);
|
|
70
|
+
|
|
71
|
+
archive.stream.finalize();
|
|
72
|
+
|
|
73
|
+
await archive.done;
|
|
74
|
+
await versionsRepository.finalize();
|
|
75
|
+
|
|
76
|
+
return {
|
|
77
|
+
servicesCount: services.size,
|
|
78
|
+
firstVersionDate,
|
|
79
|
+
lastVersionDate,
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
async function initializeArchive(targetPath) {
|
|
84
|
+
await fs.mkdir(path.dirname(targetPath), { recursive: true });
|
|
85
|
+
|
|
86
|
+
const basename = path.basename(targetPath, path.extname(targetPath));
|
|
87
|
+
|
|
88
|
+
const output = fsApi.createWriteStream(targetPath);
|
|
89
|
+
const stream = archiver(ARCHIVE_FORMAT, { zlib: { level: 9 } }); // set compression to max level
|
|
90
|
+
|
|
91
|
+
const done = new Promise(resolve => {
|
|
92
|
+
output.on('close', resolve);
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
stream.pipe(output);
|
|
96
|
+
|
|
97
|
+
return { basename, stream, done };
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function generateVersionPath({ serviceId, documentType, fetchDate }) {
|
|
101
|
+
const fsCompliantDate = fetchDate.toISOString()
|
|
102
|
+
.replace(/\.\d{3}/, '') // remove milliseconds
|
|
103
|
+
.replace(/:|\./g, '-'); // replace `:` and `.` by `-` to be compliant with the file system
|
|
104
|
+
|
|
105
|
+
return `${serviceId}/${documentType}/${fsCompliantDate}.md`;
|
|
106
|
+
}
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
import fs from 'fs/promises';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import { fileURLToPath } from 'url';
|
|
4
|
+
|
|
5
|
+
import chai from 'chai';
|
|
6
|
+
import config from 'config';
|
|
7
|
+
import dircompare from 'dir-compare';
|
|
8
|
+
import mime from 'mime';
|
|
9
|
+
import StreamZip from 'node-stream-zip';
|
|
10
|
+
|
|
11
|
+
import Record from '../../../src/archivist/recorder/record.js';
|
|
12
|
+
import GitRepository from '../../../src/archivist/recorder/repositories/git/index.js';
|
|
13
|
+
|
|
14
|
+
import generateArchive from './index.js';
|
|
15
|
+
|
|
16
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
17
|
+
|
|
18
|
+
const { expect } = chai;
|
|
19
|
+
|
|
20
|
+
const FIRST_SERVICE_PROVIDER_ID = 'ServiceA';
|
|
21
|
+
const SECOND_SERVICE_PROVIDER_ID = 'ServiceB';
|
|
22
|
+
|
|
23
|
+
const FIRST_DOCUMENT_TYPE = 'Terms of Service';
|
|
24
|
+
const SECOND_DOCUMENT_TYPE = 'Privacy Policy';
|
|
25
|
+
|
|
26
|
+
const FIRST_FETCH_DATE = '2021-01-01T11:27:00.000Z';
|
|
27
|
+
const SECOND_FETCH_DATE = '2021-01-11T11:32:47.000Z';
|
|
28
|
+
const THIRD_FETCH_DATE = '2022-01-06T11:32:47.000Z';
|
|
29
|
+
const FOURTH_FETCH_DATE = '2022-01-01T12:12:24.000Z';
|
|
30
|
+
|
|
31
|
+
const FIRST_CONTENT = 'First Content';
|
|
32
|
+
const SECOND_CONTENT = 'Second Content';
|
|
33
|
+
|
|
34
|
+
const MIME_TYPE = 'text/markdown';
|
|
35
|
+
|
|
36
|
+
const SNAPSHOT_ID = '721ce4a63ad399ecbdb548a66d6d327e7bc97876';
|
|
37
|
+
|
|
38
|
+
const RELEASE_DATE = '2022-01-01T18:21:00.000Z';
|
|
39
|
+
|
|
40
|
+
describe('Export', () => {
|
|
41
|
+
describe('#generateArchive', () => {
|
|
42
|
+
const ARCHIVE_NAME = 'test-dataset';
|
|
43
|
+
const ARCHIVE_PATH = path.resolve(__dirname, `./tmp/${ARCHIVE_NAME}.zip`);
|
|
44
|
+
const TMP_PATH = path.resolve(__dirname, './tmp');
|
|
45
|
+
const EXPECTED_DATASET_PATH = path.resolve(__dirname, './test/fixtures/dataset');
|
|
46
|
+
|
|
47
|
+
let repository;
|
|
48
|
+
let zip;
|
|
49
|
+
|
|
50
|
+
before(async function () {
|
|
51
|
+
this.timeout(10000);
|
|
52
|
+
repository = new GitRepository({
|
|
53
|
+
...config.get('recorder.versions.storage.git'),
|
|
54
|
+
path: path.resolve(__dirname, '../../../', config.get('recorder.versions.storage.git.path')),
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
await repository.initialize();
|
|
58
|
+
|
|
59
|
+
await repository.save(new Record({
|
|
60
|
+
serviceId: FIRST_SERVICE_PROVIDER_ID,
|
|
61
|
+
documentType: FIRST_DOCUMENT_TYPE,
|
|
62
|
+
content: FIRST_CONTENT,
|
|
63
|
+
mimeType: MIME_TYPE,
|
|
64
|
+
fetchDate: FIRST_FETCH_DATE,
|
|
65
|
+
snapshotId: SNAPSHOT_ID,
|
|
66
|
+
}));
|
|
67
|
+
|
|
68
|
+
await repository.save(new Record({
|
|
69
|
+
serviceId: FIRST_SERVICE_PROVIDER_ID,
|
|
70
|
+
documentType: FIRST_DOCUMENT_TYPE,
|
|
71
|
+
content: SECOND_CONTENT,
|
|
72
|
+
mimeType: MIME_TYPE,
|
|
73
|
+
fetchDate: SECOND_FETCH_DATE,
|
|
74
|
+
snapshotId: SNAPSHOT_ID,
|
|
75
|
+
}));
|
|
76
|
+
|
|
77
|
+
await repository.save(new Record({
|
|
78
|
+
serviceId: SECOND_SERVICE_PROVIDER_ID,
|
|
79
|
+
documentType: FIRST_DOCUMENT_TYPE,
|
|
80
|
+
content: FIRST_CONTENT,
|
|
81
|
+
mimeType: MIME_TYPE,
|
|
82
|
+
fetchDate: THIRD_FETCH_DATE,
|
|
83
|
+
snapshotId: SNAPSHOT_ID,
|
|
84
|
+
}));
|
|
85
|
+
|
|
86
|
+
await repository.save(new Record({
|
|
87
|
+
serviceId: SECOND_SERVICE_PROVIDER_ID,
|
|
88
|
+
documentType: SECOND_DOCUMENT_TYPE,
|
|
89
|
+
content: FIRST_CONTENT,
|
|
90
|
+
mimeType: MIME_TYPE,
|
|
91
|
+
fetchDate: FOURTH_FETCH_DATE,
|
|
92
|
+
snapshotId: SNAPSHOT_ID,
|
|
93
|
+
}));
|
|
94
|
+
|
|
95
|
+
await generateArchive({
|
|
96
|
+
archivePath: ARCHIVE_PATH,
|
|
97
|
+
releaseDate: new Date(RELEASE_DATE),
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
zip = new StreamZip.async({ file: ARCHIVE_PATH });
|
|
101
|
+
await zip.extract('', TMP_PATH);
|
|
102
|
+
await zip.close();
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
after(async () => {
|
|
106
|
+
await fs.rm(TMP_PATH, { recursive: true });
|
|
107
|
+
await repository.removeAll();
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
it('is an archive', () => {
|
|
111
|
+
const mimeType = mime.getType(ARCHIVE_PATH);
|
|
112
|
+
|
|
113
|
+
expect(mimeType).to.equal('application/zip');
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
it('has the proper contents', () => {
|
|
117
|
+
expect(`${TMP_PATH}/${ARCHIVE_NAME}`).to.have.sameContentAs(EXPECTED_DATASET_PATH);
|
|
118
|
+
});
|
|
119
|
+
});
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
chai.use(chai => {
|
|
123
|
+
const { Assertion } = chai;
|
|
124
|
+
|
|
125
|
+
Assertion.addMethod('sameContentAs', function (expectedContentPath) {
|
|
126
|
+
const givenContentPath = this._obj;
|
|
127
|
+
|
|
128
|
+
const result = dircompare.compareSync(givenContentPath, expectedContentPath, {
|
|
129
|
+
excludeFilter: '.DS_Store',
|
|
130
|
+
compareContent: true,
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
this.assert(
|
|
134
|
+
result.same,
|
|
135
|
+
generateFailureMessage(result),
|
|
136
|
+
`expected ${givenContentPath} to have a different content as ${expectedContentPath}`,
|
|
137
|
+
);
|
|
138
|
+
|
|
139
|
+
function generateFailureMessage(result) {
|
|
140
|
+
let message = `expected ${givenContentPath} to have the same content as ${expectedContentPath}
|
|
141
|
+
|
|
142
|
+
There are ${result.differences} differences:\n`;
|
|
143
|
+
|
|
144
|
+
result.diffSet.forEach(diff => {
|
|
145
|
+
if (diff.state == 'equal') {
|
|
146
|
+
return;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
message += ` ${diff.reason} on file ${diff.name1} | ${diff.name2}\n`;
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
return message;
|
|
153
|
+
}
|
|
154
|
+
});
|
|
155
|
+
});
|