@opentermsarchive/engine 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. package/.env.example +3 -0
  2. package/.eslintrc.yaml +116 -0
  3. package/.github/workflows/deploy.yml +50 -0
  4. package/.github/workflows/release.yml +64 -0
  5. package/.github/workflows/test.yml +77 -0
  6. package/CHANGELOG.md +14 -0
  7. package/CODE_OF_CONDUCT.md +128 -0
  8. package/CONTRIBUTING.md +143 -0
  9. package/LICENSE +153 -0
  10. package/MIGRATING.md +42 -0
  11. package/README.fr.md +110 -0
  12. package/README.md +438 -0
  13. package/Vagrantfile +38 -0
  14. package/ansible.cfg +13 -0
  15. package/bin/.env.js +1 -0
  16. package/bin/lint-declarations.js +31 -0
  17. package/bin/track.js +26 -0
  18. package/bin/validate-declarations.js +68 -0
  19. package/config/ci.json +5 -0
  20. package/config/contrib.json +35 -0
  21. package/config/dating.json +37 -0
  22. package/config/default.json +71 -0
  23. package/config/france.json +40 -0
  24. package/config/p2b-compliance.json +40 -0
  25. package/config/pga.json +40 -0
  26. package/config/production.json +27 -0
  27. package/config/test.json +49 -0
  28. package/config/vagrant.json +24 -0
  29. package/decision-records/0001-service-name-and-id.md +73 -0
  30. package/decision-records/0002-service-history.md +212 -0
  31. package/decision-records/0003-snapshots-database.md +123 -0
  32. package/ops/README.md +280 -0
  33. package/ops/app.yml +5 -0
  34. package/ops/infra.yml +6 -0
  35. package/ops/inventories/dev.yml +7 -0
  36. package/ops/inventories/production.yml +27 -0
  37. package/ops/roles/infra/defaults/main.yml +2 -0
  38. package/ops/roles/infra/files/.gitconfig +3 -0
  39. package/ops/roles/infra/files/mongod.conf +18 -0
  40. package/ops/roles/infra/files/ota-bot-key.private_key +26 -0
  41. package/ops/roles/infra/tasks/main.yml +78 -0
  42. package/ops/roles/infra/tasks/mongo.yml +40 -0
  43. package/ops/roles/infra/templates/ssh_config.j2 +5 -0
  44. package/ops/roles/ota/defaults/main.yml +14 -0
  45. package/ops/roles/ota/files/.env +21 -0
  46. package/ops/roles/ota/tasks/database.yml +65 -0
  47. package/ops/roles/ota/tasks/main.yml +110 -0
  48. package/ops/site.yml +6 -0
  49. package/package.json +101 -0
  50. package/pm2.config.cjs +20 -0
  51. package/scripts/dataset/README.md +37 -0
  52. package/scripts/dataset/assets/LICENSE +540 -0
  53. package/scripts/dataset/assets/README.template.js +65 -0
  54. package/scripts/dataset/export/index.js +106 -0
  55. package/scripts/dataset/export/index.test.js +155 -0
  56. package/scripts/dataset/export/test/fixtures/dataset/LICENSE +540 -0
  57. package/scripts/dataset/export/test/fixtures/dataset/README.md +40 -0
  58. package/scripts/dataset/export/test/fixtures/dataset/ServiceA/Terms of Service/2021-01-01T11-27-00Z.md +1 -0
  59. package/scripts/dataset/export/test/fixtures/dataset/ServiceA/Terms of Service/2021-01-11T11-32-47Z.md +1 -0
  60. package/scripts/dataset/export/test/fixtures/dataset/ServiceB/Privacy Policy/2022-01-01T12-12-24Z.md +1 -0
  61. package/scripts/dataset/export/test/fixtures/dataset/ServiceB/Terms of Service/2022-01-06T11-32-47Z.md +1 -0
  62. package/scripts/dataset/index.js +40 -0
  63. package/scripts/dataset/logger/index.js +17 -0
  64. package/scripts/dataset/main.js +25 -0
  65. package/scripts/dataset/publish/index.js +39 -0
  66. package/scripts/declarations/lint/index.js +36 -0
  67. package/scripts/declarations/utils/index.js +81 -0
  68. package/scripts/declarations/validate/definitions.js +63 -0
  69. package/scripts/declarations/validate/index.mocha.js +262 -0
  70. package/scripts/declarations/validate/service.history.schema.js +86 -0
  71. package/scripts/declarations/validate/service.schema.js +91 -0
  72. package/scripts/history/logger/index.js +39 -0
  73. package/scripts/history/migrate-services.js +212 -0
  74. package/scripts/history/update-to-full-hash.js +61 -0
  75. package/scripts/history/utils/index.js +23 -0
  76. package/scripts/import/README.md +59 -0
  77. package/scripts/import/config/import.json +12 -0
  78. package/scripts/import/index.js +224 -0
  79. package/scripts/import/loadCommits.js +66 -0
  80. package/scripts/import/logger/index.js +43 -0
  81. package/scripts/rewrite/README.md +131 -0
  82. package/scripts/rewrite/config/rewrite-snapshots.json +32 -0
  83. package/scripts/rewrite/config/rewrite-versions.json +32 -0
  84. package/scripts/rewrite/initializer/files/license +428 -0
  85. package/scripts/rewrite/initializer/files/readme.md +8 -0
  86. package/scripts/rewrite/initializer/index.js +44 -0
  87. package/scripts/rewrite/rewrite-snapshots.js +108 -0
  88. package/scripts/rewrite/rewrite-versions.js +160 -0
  89. package/scripts/rewrite/utils.js +33 -0
  90. package/scripts/utils/renamer/README.md +49 -0
  91. package/scripts/utils/renamer/index.js +45 -0
  92. package/scripts/utils/renamer/rules/documentTypes.json +25 -0
  93. package/scripts/utils/renamer/rules/documentTypesByService.json +170 -0
  94. package/scripts/utils/renamer/rules/serviceNames.json +92 -0
  95. package/src/archivist/errors.js +9 -0
  96. package/src/archivist/fetcher/errors.js +6 -0
  97. package/src/archivist/fetcher/exports.js +18 -0
  98. package/src/archivist/fetcher/fullDomFetcher.js +84 -0
  99. package/src/archivist/fetcher/htmlOnlyFetcher.js +62 -0
  100. package/src/archivist/fetcher/index.js +35 -0
  101. package/src/archivist/fetcher/index.test.js +239 -0
  102. package/src/archivist/filter/exports.js +3 -0
  103. package/src/archivist/filter/index.js +178 -0
  104. package/src/archivist/filter/index.test.js +561 -0
  105. package/src/archivist/index.js +276 -0
  106. package/src/archivist/index.test.js +600 -0
  107. package/src/archivist/recorder/index.js +77 -0
  108. package/src/archivist/recorder/index.test.js +463 -0
  109. package/src/archivist/recorder/record.js +35 -0
  110. package/src/archivist/recorder/record.test.js +91 -0
  111. package/src/archivist/recorder/repositories/factory.js +23 -0
  112. package/src/archivist/recorder/repositories/git/dataMapper.js +83 -0
  113. package/src/archivist/recorder/repositories/git/git.js +122 -0
  114. package/src/archivist/recorder/repositories/git/git.test.js +86 -0
  115. package/src/archivist/recorder/repositories/git/index.js +182 -0
  116. package/src/archivist/recorder/repositories/git/index.test.js +714 -0
  117. package/src/archivist/recorder/repositories/interface.js +108 -0
  118. package/src/archivist/recorder/repositories/mongo/dataMapper.js +32 -0
  119. package/src/archivist/recorder/repositories/mongo/index.js +121 -0
  120. package/src/archivist/recorder/repositories/mongo/index.test.js +721 -0
  121. package/src/archivist/services/documentDeclaration.js +26 -0
  122. package/src/archivist/services/documentDeclaration.test.js +85 -0
  123. package/src/archivist/services/documentTypes.json +386 -0
  124. package/src/archivist/services/index.js +255 -0
  125. package/src/archivist/services/index.test.js +327 -0
  126. package/src/archivist/services/pageDeclaration.js +51 -0
  127. package/src/archivist/services/pageDeclaration.test.js +224 -0
  128. package/src/archivist/services/service.js +60 -0
  129. package/src/archivist/services/service.test.js +164 -0
  130. package/src/exports.js +3 -0
  131. package/src/index.js +59 -0
  132. package/src/logger/README.md +1 -0
  133. package/src/logger/index.js +131 -0
  134. package/src/main.js +18 -0
  135. package/src/notifier/README.md +1 -0
  136. package/src/notifier/index.js +150 -0
  137. package/src/tracker/README.md +1 -0
  138. package/src/tracker/index.js +215 -0
  139. package/test/fixtures/service_A.js +22 -0
  140. package/test/fixtures/service_A_terms.md +10 -0
  141. package/test/fixtures/service_A_terms_snapshot.html +14 -0
  142. package/test/fixtures/service_B.js +22 -0
  143. package/test/fixtures/service_with_declaration_history.js +65 -0
  144. package/test/fixtures/service_with_filters_history.js +155 -0
  145. package/test/fixtures/service_with_history.js +188 -0
  146. package/test/fixtures/service_with_multipage_document.js +100 -0
  147. package/test/fixtures/service_without_history.js +31 -0
  148. package/test/fixtures/services.js +19 -0
  149. package/test/fixtures/terms.pdf +0 -0
  150. package/test/fixtures/termsFromPDF.md +25 -0
  151. package/test/fixtures/termsModified.pdf +0 -0
  152. package/test/services/service_A.json +9 -0
  153. package/test/services/service_B.json +9 -0
  154. package/test/services/service_with_declaration_history.filters.js +7 -0
  155. package/test/services/service_with_declaration_history.history.json +17 -0
  156. package/test/services/service_with_declaration_history.json +13 -0
  157. package/test/services/service_with_filters_history.filters.history.js +29 -0
  158. package/test/services/service_with_filters_history.filters.js +7 -0
  159. package/test/services/service_with_filters_history.json +13 -0
  160. package/test/services/service_with_history.filters.history.js +29 -0
  161. package/test/services/service_with_history.filters.js +7 -0
  162. package/test/services/service_with_history.history.json +26 -0
  163. package/test/services/service_with_history.json +17 -0
  164. package/test/services/service_with_multipage_document.filters.js +7 -0
  165. package/test/services/service_with_multipage_document.history.json +37 -0
  166. package/test/services/service_with_multipage_document.json +28 -0
  167. package/test/services/service_without_history.filters.js +7 -0
  168. package/test/services/service_without_history.json +13 -0
@@ -0,0 +1,108 @@
1
+ /**
2
+ * Interface for classes that model a collection of domain objects with querying capabilities
3
+ * @see {@link https://martinfowler.com/eaaCatalog/repository.html|Repository}
4
+ * @interface
5
+ */
6
+ export default class RepositoryInterface {
7
+ /**
8
+ * [Optional] Initialize repository
9
+ * Override this method if the repository needs some asynchronous initialization code (open database connection and create collections, initialize Git…)
10
+ *
11
+ * @returns {Promise<Repository>} Promise that will be resolved with the current repository instance
12
+ */
13
+ async initialize() {
14
+ return this;
15
+ }
16
+
17
+ /**
18
+ * [Optional] Finalize repository
19
+ * Override this method if the repository needs some asynchronous code to properly close the repository (close database connection, push changes on Git remote…)
20
+ *
21
+ * @returns {Promise<Repository>} Promise that will be resolved with the current repository instance
22
+ */
23
+ async finalize() {
24
+ return this;
25
+ }
26
+
27
+ /**
28
+ * Persist the given record if it does not already exist in repository
29
+ *
30
+ * @param {Record} record - Record to persist
31
+ * @returns {Promise<Record>} Promise that will be resolved with the given record when it has been persisted
32
+ */
33
+ async save(record) {
34
+ throw new Error(`#save method is not implemented in ${this.constructor.name}`);
35
+ }
36
+
37
+ /**
38
+ * Find the most recent record that matches the given service ID and document type and optionally the page ID
39
+ * In case of snapshots, if the record is related to a multipage document, the page ID is required to find the corresponding snapshot
40
+ *
41
+ * @param {string} serviceId - Service ID of record to find
42
+ * @param {string} documentType - Document type of record to find
43
+ * @param {string} [pageId] - Page ID of record to find. Used to differentiate pages of multipage document. Not necessary for single page document
44
+ * @returns {Promise<Record>} Promise that will be resolved with the found record or an empty object if none match the given criteria
45
+ */
46
+ async findLatest(serviceId, documentType, pageId) {
47
+ throw new Error(`#findLatest method is not implemented in ${this.constructor.name}`);
48
+ }
49
+
50
+ /**
51
+ * Find the record that matches the given record ID
52
+ *
53
+ * @param {string} recordId - Record ID of the record to find
54
+ * @returns {Promise<Record>} Promise that will be resolved with the found record or an empty object if none match the given ID
55
+ */
56
+ async findById(recordId) {
57
+ throw new Error(`#findById method is not implemented in ${this.constructor.name}`);
58
+ }
59
+
60
+ /**
61
+ * Find all records
62
+ * For performance reasons, the content of the records will not be loaded by default. Use #loadRecordContent to load the content of individual records
63
+ *
64
+ * @see RepositoryInterface#loadRecordContent
65
+ * @returns {Promise<Array<Record>>} Promise that will be resolved with an array of all records
66
+ */
67
+ async findAll() {
68
+ throw new Error(`#findAll method is not implemented in ${this.constructor.name}`);
69
+ }
70
+
71
+ /**
72
+ * Count the total number of records in the repository
73
+ * For performance reasons, use this method rather than counting the number of entries returned by #findAll if you only need the size of a repository
74
+ *
75
+ * @returns {Promise<number>} Promise that will be resolved with the total number of records
76
+ */
77
+ async count() {
78
+ throw new Error(`#count method is not implemented in ${this.constructor.name}`);
79
+ }
80
+
81
+ /**
82
+ * Iterate over all records in the repository, from oldest to most recent
83
+ *
84
+ * @yields {Record}
85
+ */
86
+ async* iterate() {
87
+ throw new Error(`#iterate method is not implemented in ${this.constructor.name}`);
88
+ }
89
+
90
+ /**
91
+ * Remove all records
92
+ *
93
+ * @returns {Promise} Promise that will be resolved when all records are removed
94
+ */
95
+ async removeAll() {
96
+ throw new Error(`#removeAll method is not implemented in ${this.constructor.name}`);
97
+ }
98
+
99
+ /**
100
+ * Load content of the given record
101
+ *
102
+ * @param {Record} record - Record of which to populate content
103
+ * @returns {Promise<Record>} Promise that will be resolved with the given record when its content has been loaded
104
+ */
105
+ async loadRecordContent(record) {
106
+ throw new Error(`#loadRecordContent method is not implemented in ${this.constructor.name}`);
107
+ }
108
+ }
@@ -0,0 +1,32 @@
1
+ import { ObjectId } from 'mongodb';
2
+
3
+ import Record from '../../record.js';
4
+
5
+ export function toPersistence(record) {
6
+ const documentFields = Object.fromEntries(Object.entries(record));
7
+
8
+ if (documentFields.snapshotIds) {
9
+ documentFields.snapshotIds = record.snapshotIds.map(snapshotId => new ObjectId(snapshotId));
10
+ }
11
+
12
+ documentFields.content = record.content;
13
+ documentFields.created_at = new Date();
14
+
15
+ return documentFields;
16
+ }
17
+
18
+ export function toDomain(document) {
19
+ const { _id, serviceId, documentType, pageId, fetchDate, mimeType, isRefilter, isFirstRecord, snapshotIds } = document;
20
+
21
+ return new Record({
22
+ id: _id.toString(),
23
+ serviceId,
24
+ documentType,
25
+ pageId,
26
+ mimeType,
27
+ fetchDate: new Date(fetchDate),
28
+ isFirstRecord: Boolean(isFirstRecord),
29
+ isRefilter: Boolean(isRefilter),
30
+ snapshotIds: snapshotIds?.map(snapshotId => snapshotId.toString()) || [],
31
+ });
32
+ }
@@ -0,0 +1,121 @@
1
+ /**
2
+ * This module is the boundary beyond which the usage of MongoDB is abstracted.
3
+ * Object IDs are used as opaque unique IDs.
4
+ */
5
+
6
+ import { MongoClient, ObjectId, Binary } from 'mongodb';
7
+
8
+ import RepositoryInterface from '../interface.js';
9
+
10
+ import * as DataMapper from './dataMapper.js';
11
+
12
+ export default class MongoRepository extends RepositoryInterface {
13
+ constructor({ database: databaseName, collection: collectionName, connectionURI }) {
14
+ super();
15
+
16
+ this.client = new MongoClient(connectionURI);
17
+ this.databaseName = databaseName;
18
+ this.collectionName = collectionName;
19
+ }
20
+
21
+ async initialize() {
22
+ await this.client.connect();
23
+ const db = this.client.db(this.databaseName);
24
+
25
+ this.collection = db.collection(this.collectionName);
26
+
27
+ return this;
28
+ }
29
+
30
+ async finalize() {
31
+ return this.client.close();
32
+ }
33
+
34
+ async save(record) {
35
+ const { serviceId, documentType } = record;
36
+
37
+ if (record.isFirstRecord === undefined || record.isFirstRecord === null) {
38
+ record.isFirstRecord = !await this.collection.findOne({ serviceId, documentType });
39
+ }
40
+
41
+ const documentFields = await this.#toPersistence(record);
42
+ const previousRecord = await this.findLatest(serviceId, documentType);
43
+
44
+ if (previousRecord?.content == documentFields.content) {
45
+ return Object(null);
46
+ }
47
+
48
+ const insertResult = await this.collection.insertOne(documentFields);
49
+
50
+ record.id = insertResult.insertedId.toString();
51
+
52
+ return record;
53
+ }
54
+
55
+ async findLatest(serviceId, documentType) {
56
+ const [mongoDocument] = await this.collection.find({ serviceId, documentType }).limit(1).sort({ fetchDate: -1 }).toArray(); // `findOne` doesn't support the `sort` method, so even for only one document use `find`
57
+
58
+ return this.#toDomain(mongoDocument);
59
+ }
60
+
61
+ async findById(recordId) {
62
+ const mongoDocument = await this.collection.findOne({ _id: new ObjectId(recordId) });
63
+
64
+ return this.#toDomain(mongoDocument);
65
+ }
66
+
67
+ async findAll() {
68
+ return Promise.all((await this.collection.find().project({ content: 0 }).sort({ fetchDate: 1 }).toArray())
69
+ .map(mongoDocument => this.#toDomain(mongoDocument, { deferContentLoading: true })));
70
+ }
71
+
72
+ async count() {
73
+ return this.collection.find().count();
74
+ }
75
+
76
+ async* iterate() {
77
+ const cursor = this.collection.find().sort({ fetchDate: 1 });
78
+
79
+ /* eslint-disable no-await-in-loop */
80
+ while (await cursor.hasNext()) {
81
+ const mongoDocument = await cursor.next();
82
+
83
+ yield this.#toDomain(mongoDocument);
84
+ }
85
+ /* eslint-enable no-await-in-loop */
86
+ }
87
+
88
+ async removeAll() {
89
+ return this.collection.deleteMany();
90
+ }
91
+
92
+ async loadRecordContent(record) {
93
+ const { content } = await this.collection.findOne({ _id: new ObjectId(record.id) }, { projection: { content: 1 } });
94
+
95
+ record.content = content instanceof Binary ? content.buffer : content;
96
+ }
97
+
98
+ async #toDomain(document, { deferContentLoading } = {}) {
99
+ if (!document) {
100
+ return null;
101
+ }
102
+
103
+ const record = DataMapper.toDomain(document);
104
+
105
+ if (deferContentLoading) {
106
+ return record;
107
+ }
108
+
109
+ await this.loadRecordContent(record);
110
+
111
+ return record;
112
+ }
113
+
114
+ async #toPersistence(record) {
115
+ if (record.content === undefined || record.content === null) {
116
+ await this.repository.loadRecordContent(record);
117
+ }
118
+
119
+ return DataMapper.toPersistence(record);
120
+ }
121
+ }