@opentermsarchive/engine 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. package/.env.example +3 -0
  2. package/.eslintrc.yaml +116 -0
  3. package/.github/workflows/deploy.yml +50 -0
  4. package/.github/workflows/release.yml +64 -0
  5. package/.github/workflows/test.yml +77 -0
  6. package/CHANGELOG.md +14 -0
  7. package/CODE_OF_CONDUCT.md +128 -0
  8. package/CONTRIBUTING.md +143 -0
  9. package/LICENSE +153 -0
  10. package/MIGRATING.md +42 -0
  11. package/README.fr.md +110 -0
  12. package/README.md +438 -0
  13. package/Vagrantfile +38 -0
  14. package/ansible.cfg +13 -0
  15. package/bin/.env.js +1 -0
  16. package/bin/lint-declarations.js +31 -0
  17. package/bin/track.js +26 -0
  18. package/bin/validate-declarations.js +68 -0
  19. package/config/ci.json +5 -0
  20. package/config/contrib.json +35 -0
  21. package/config/dating.json +37 -0
  22. package/config/default.json +71 -0
  23. package/config/france.json +40 -0
  24. package/config/p2b-compliance.json +40 -0
  25. package/config/pga.json +40 -0
  26. package/config/production.json +27 -0
  27. package/config/test.json +49 -0
  28. package/config/vagrant.json +24 -0
  29. package/decision-records/0001-service-name-and-id.md +73 -0
  30. package/decision-records/0002-service-history.md +212 -0
  31. package/decision-records/0003-snapshots-database.md +123 -0
  32. package/ops/README.md +280 -0
  33. package/ops/app.yml +5 -0
  34. package/ops/infra.yml +6 -0
  35. package/ops/inventories/dev.yml +7 -0
  36. package/ops/inventories/production.yml +27 -0
  37. package/ops/roles/infra/defaults/main.yml +2 -0
  38. package/ops/roles/infra/files/.gitconfig +3 -0
  39. package/ops/roles/infra/files/mongod.conf +18 -0
  40. package/ops/roles/infra/files/ota-bot-key.private_key +26 -0
  41. package/ops/roles/infra/tasks/main.yml +78 -0
  42. package/ops/roles/infra/tasks/mongo.yml +40 -0
  43. package/ops/roles/infra/templates/ssh_config.j2 +5 -0
  44. package/ops/roles/ota/defaults/main.yml +14 -0
  45. package/ops/roles/ota/files/.env +21 -0
  46. package/ops/roles/ota/tasks/database.yml +65 -0
  47. package/ops/roles/ota/tasks/main.yml +110 -0
  48. package/ops/site.yml +6 -0
  49. package/package.json +101 -0
  50. package/pm2.config.cjs +20 -0
  51. package/scripts/dataset/README.md +37 -0
  52. package/scripts/dataset/assets/LICENSE +540 -0
  53. package/scripts/dataset/assets/README.template.js +65 -0
  54. package/scripts/dataset/export/index.js +106 -0
  55. package/scripts/dataset/export/index.test.js +155 -0
  56. package/scripts/dataset/export/test/fixtures/dataset/LICENSE +540 -0
  57. package/scripts/dataset/export/test/fixtures/dataset/README.md +40 -0
  58. package/scripts/dataset/export/test/fixtures/dataset/ServiceA/Terms of Service/2021-01-01T11-27-00Z.md +1 -0
  59. package/scripts/dataset/export/test/fixtures/dataset/ServiceA/Terms of Service/2021-01-11T11-32-47Z.md +1 -0
  60. package/scripts/dataset/export/test/fixtures/dataset/ServiceB/Privacy Policy/2022-01-01T12-12-24Z.md +1 -0
  61. package/scripts/dataset/export/test/fixtures/dataset/ServiceB/Terms of Service/2022-01-06T11-32-47Z.md +1 -0
  62. package/scripts/dataset/index.js +40 -0
  63. package/scripts/dataset/logger/index.js +17 -0
  64. package/scripts/dataset/main.js +25 -0
  65. package/scripts/dataset/publish/index.js +39 -0
  66. package/scripts/declarations/lint/index.js +36 -0
  67. package/scripts/declarations/utils/index.js +81 -0
  68. package/scripts/declarations/validate/definitions.js +63 -0
  69. package/scripts/declarations/validate/index.mocha.js +262 -0
  70. package/scripts/declarations/validate/service.history.schema.js +86 -0
  71. package/scripts/declarations/validate/service.schema.js +91 -0
  72. package/scripts/history/logger/index.js +39 -0
  73. package/scripts/history/migrate-services.js +212 -0
  74. package/scripts/history/update-to-full-hash.js +61 -0
  75. package/scripts/history/utils/index.js +23 -0
  76. package/scripts/import/README.md +59 -0
  77. package/scripts/import/config/import.json +12 -0
  78. package/scripts/import/index.js +224 -0
  79. package/scripts/import/loadCommits.js +66 -0
  80. package/scripts/import/logger/index.js +43 -0
  81. package/scripts/rewrite/README.md +131 -0
  82. package/scripts/rewrite/config/rewrite-snapshots.json +32 -0
  83. package/scripts/rewrite/config/rewrite-versions.json +32 -0
  84. package/scripts/rewrite/initializer/files/license +428 -0
  85. package/scripts/rewrite/initializer/files/readme.md +8 -0
  86. package/scripts/rewrite/initializer/index.js +44 -0
  87. package/scripts/rewrite/rewrite-snapshots.js +108 -0
  88. package/scripts/rewrite/rewrite-versions.js +160 -0
  89. package/scripts/rewrite/utils.js +33 -0
  90. package/scripts/utils/renamer/README.md +49 -0
  91. package/scripts/utils/renamer/index.js +45 -0
  92. package/scripts/utils/renamer/rules/documentTypes.json +25 -0
  93. package/scripts/utils/renamer/rules/documentTypesByService.json +170 -0
  94. package/scripts/utils/renamer/rules/serviceNames.json +92 -0
  95. package/src/archivist/errors.js +9 -0
  96. package/src/archivist/fetcher/errors.js +6 -0
  97. package/src/archivist/fetcher/exports.js +18 -0
  98. package/src/archivist/fetcher/fullDomFetcher.js +84 -0
  99. package/src/archivist/fetcher/htmlOnlyFetcher.js +62 -0
  100. package/src/archivist/fetcher/index.js +35 -0
  101. package/src/archivist/fetcher/index.test.js +239 -0
  102. package/src/archivist/filter/exports.js +3 -0
  103. package/src/archivist/filter/index.js +178 -0
  104. package/src/archivist/filter/index.test.js +561 -0
  105. package/src/archivist/index.js +276 -0
  106. package/src/archivist/index.test.js +600 -0
  107. package/src/archivist/recorder/index.js +77 -0
  108. package/src/archivist/recorder/index.test.js +463 -0
  109. package/src/archivist/recorder/record.js +35 -0
  110. package/src/archivist/recorder/record.test.js +91 -0
  111. package/src/archivist/recorder/repositories/factory.js +23 -0
  112. package/src/archivist/recorder/repositories/git/dataMapper.js +83 -0
  113. package/src/archivist/recorder/repositories/git/git.js +122 -0
  114. package/src/archivist/recorder/repositories/git/git.test.js +86 -0
  115. package/src/archivist/recorder/repositories/git/index.js +182 -0
  116. package/src/archivist/recorder/repositories/git/index.test.js +714 -0
  117. package/src/archivist/recorder/repositories/interface.js +108 -0
  118. package/src/archivist/recorder/repositories/mongo/dataMapper.js +32 -0
  119. package/src/archivist/recorder/repositories/mongo/index.js +121 -0
  120. package/src/archivist/recorder/repositories/mongo/index.test.js +721 -0
  121. package/src/archivist/services/documentDeclaration.js +26 -0
  122. package/src/archivist/services/documentDeclaration.test.js +85 -0
  123. package/src/archivist/services/documentTypes.json +386 -0
  124. package/src/archivist/services/index.js +255 -0
  125. package/src/archivist/services/index.test.js +327 -0
  126. package/src/archivist/services/pageDeclaration.js +51 -0
  127. package/src/archivist/services/pageDeclaration.test.js +224 -0
  128. package/src/archivist/services/service.js +60 -0
  129. package/src/archivist/services/service.test.js +164 -0
  130. package/src/exports.js +3 -0
  131. package/src/index.js +59 -0
  132. package/src/logger/README.md +1 -0
  133. package/src/logger/index.js +131 -0
  134. package/src/main.js +18 -0
  135. package/src/notifier/README.md +1 -0
  136. package/src/notifier/index.js +150 -0
  137. package/src/tracker/README.md +1 -0
  138. package/src/tracker/index.js +215 -0
  139. package/test/fixtures/service_A.js +22 -0
  140. package/test/fixtures/service_A_terms.md +10 -0
  141. package/test/fixtures/service_A_terms_snapshot.html +14 -0
  142. package/test/fixtures/service_B.js +22 -0
  143. package/test/fixtures/service_with_declaration_history.js +65 -0
  144. package/test/fixtures/service_with_filters_history.js +155 -0
  145. package/test/fixtures/service_with_history.js +188 -0
  146. package/test/fixtures/service_with_multipage_document.js +100 -0
  147. package/test/fixtures/service_without_history.js +31 -0
  148. package/test/fixtures/services.js +19 -0
  149. package/test/fixtures/terms.pdf +0 -0
  150. package/test/fixtures/termsFromPDF.md +25 -0
  151. package/test/fixtures/termsModified.pdf +0 -0
  152. package/test/services/service_A.json +9 -0
  153. package/test/services/service_B.json +9 -0
  154. package/test/services/service_with_declaration_history.filters.js +7 -0
  155. package/test/services/service_with_declaration_history.history.json +17 -0
  156. package/test/services/service_with_declaration_history.json +13 -0
  157. package/test/services/service_with_filters_history.filters.history.js +29 -0
  158. package/test/services/service_with_filters_history.filters.js +7 -0
  159. package/test/services/service_with_filters_history.json +13 -0
  160. package/test/services/service_with_history.filters.history.js +29 -0
  161. package/test/services/service_with_history.filters.js +7 -0
  162. package/test/services/service_with_history.history.json +26 -0
  163. package/test/services/service_with_history.json +17 -0
  164. package/test/services/service_with_multipage_document.filters.js +7 -0
  165. package/test/services/service_with_multipage_document.history.json +37 -0
  166. package/test/services/service_with_multipage_document.json +28 -0
  167. package/test/services/service_without_history.filters.js +7 -0
  168. package/test/services/service_without_history.json +13 -0
@@ -0,0 +1,224 @@
1
+ import chai from 'chai';
2
+
3
+ import PageDeclaration from './pageDeclaration.js';
4
+
5
+ const { expect } = chai;
6
+
7
+ describe('PageDeclaration', () => {
8
+ const URL = 'https://www.service.example/terms';
9
+
10
+ describe('#getCssSelectors', () => {
11
+ context('with "select" property', () => {
12
+ context('with string selector', () => {
13
+ it('extracts selectors', async () => {
14
+ const result = new PageDeclaration({ location: URL, contentSelectors: 'body' }).cssSelectors;
15
+
16
+ expect(result).to.deep.equal(['body']);
17
+ });
18
+ });
19
+
20
+ context('with range selector', () => {
21
+ it('extracts selectors', async () => {
22
+ const result = new PageDeclaration({
23
+ location: URL,
24
+ contentSelectors: {
25
+ startBefore: '#startBefore',
26
+ endBefore: '#endBefore',
27
+ },
28
+ }).cssSelectors;
29
+
30
+ expect(result).to.deep.equal([ '#startBefore', '#endBefore' ]);
31
+ });
32
+ });
33
+
34
+ context('with an array of mixed selectors', () => {
35
+ it('extracts selectors', async () => {
36
+ const result = new PageDeclaration({
37
+ location: URL,
38
+ contentSelectors: [
39
+ {
40
+ startBefore: '#startBefore',
41
+ endBefore: '#endBefore',
42
+ },
43
+ 'body',
44
+ ],
45
+ }).cssSelectors;
46
+
47
+ expect(result).to.deep.equal([ '#startBefore', '#endBefore', 'body' ]);
48
+ });
49
+ });
50
+ });
51
+
52
+ context('with "remove" property', () => {
53
+ context('with string selector', () => {
54
+ it('extracts selectors', async () => {
55
+ const result = new PageDeclaration({ location: URL, noiseSelectors: 'body' }).cssSelectors;
56
+
57
+ expect(result).to.deep.equal(['body']);
58
+ });
59
+ });
60
+
61
+ context('with range selector', () => {
62
+ it('extracts selectors', async () => {
63
+ const result = new PageDeclaration({
64
+ location: URL,
65
+ noiseSelectors: {
66
+ startBefore: '#startBefore',
67
+ endBefore: '#endBefore',
68
+ },
69
+ }).cssSelectors;
70
+
71
+ expect(result).to.deep.equal([ '#startBefore', '#endBefore' ]);
72
+ });
73
+ });
74
+
75
+ context('with an array of mixed selectors', () => {
76
+ it('extracts selectors', async () => {
77
+ const result = new PageDeclaration({
78
+ location: URL,
79
+ noiseSelectors: [
80
+ {
81
+ startBefore: '#startBefore',
82
+ endBefore: '#endBefore',
83
+ },
84
+ 'body',
85
+ ],
86
+ }).cssSelectors;
87
+
88
+ expect(result).to.deep.equal([ '#startBefore', '#endBefore', 'body' ]);
89
+ });
90
+ });
91
+ });
92
+
93
+ context('with both "select" and "remove" property', () => {
94
+ context('with string selector', () => {
95
+ it('extracts selectors', async () => {
96
+ const result = new PageDeclaration({
97
+ location: URL,
98
+ contentSelectors: 'body',
99
+ noiseSelectors: 'h1',
100
+ }).cssSelectors;
101
+
102
+ expect(result).to.deep.equal([ 'body', 'h1' ]);
103
+ });
104
+ });
105
+
106
+ context('with range selector', () => {
107
+ it('extracts selectors', async () => {
108
+ const result = new PageDeclaration({
109
+ location: URL,
110
+ contentSelectors: {
111
+ startBefore: '#startBefore',
112
+ endBefore: '#endBefore',
113
+ },
114
+ noiseSelectors: {
115
+ startBefore: '#startBefore',
116
+ endBefore: '#endBefore',
117
+ },
118
+ }).cssSelectors;
119
+
120
+ expect(result).to.deep.equal([
121
+ '#startBefore',
122
+ '#endBefore',
123
+ '#startBefore',
124
+ '#endBefore',
125
+ ]);
126
+ });
127
+ });
128
+
129
+ context('with an array of mixed selectors', () => {
130
+ it('extracts selectors', async () => {
131
+ const result = new PageDeclaration({
132
+ location: URL,
133
+ contentSelectors: [
134
+ {
135
+ startBefore: '#startBefore',
136
+ endBefore: '#endBefore',
137
+ },
138
+ 'body',
139
+ ],
140
+ noiseSelectors: [
141
+ {
142
+ startBefore: '#startBefore',
143
+ endBefore: '#endBefore',
144
+ },
145
+ 'body',
146
+ ],
147
+ }).cssSelectors;
148
+
149
+ expect(result).to.deep.equal([
150
+ '#startBefore',
151
+ '#endBefore',
152
+ 'body',
153
+ '#startBefore',
154
+ '#endBefore',
155
+ 'body',
156
+ ]);
157
+ });
158
+ });
159
+ });
160
+ });
161
+
162
+ describe('#toPersistence', () => {
163
+ it('converts basic page declaration into JSON representation', async () => {
164
+ const result = new PageDeclaration({
165
+ location: URL,
166
+ contentSelectors: 'body',
167
+ }).toPersistence();
168
+
169
+ const expectedResult = {
170
+ fetch: URL,
171
+ select: 'body',
172
+ remove: undefined,
173
+ filter: undefined,
174
+ executeClientScripts: undefined,
175
+ };
176
+
177
+ expect(result).to.deep.equal(expectedResult);
178
+ });
179
+
180
+ it('converts page declaration with all fields to JSON representation', async () => {
181
+ const result = new PageDeclaration({
182
+ location: URL,
183
+ contentSelectors: [
184
+ {
185
+ startBefore: '#startBefore',
186
+ endBefore: '#endBefore',
187
+ },
188
+ 'body',
189
+ ],
190
+ noiseSelectors: [
191
+ {
192
+ startBefore: '#startBefore',
193
+ endBefore: '#endBefore',
194
+ },
195
+ 'body',
196
+ ],
197
+ filters: [function filterSomething() {}],
198
+ executeClientScripts: true,
199
+ }).toPersistence();
200
+
201
+ const expectedResult = {
202
+ fetch: URL,
203
+ select: [
204
+ {
205
+ startBefore: '#startBefore',
206
+ endBefore: '#endBefore',
207
+ },
208
+ 'body',
209
+ ],
210
+ remove: [
211
+ {
212
+ startBefore: '#startBefore',
213
+ endBefore: '#endBefore',
214
+ },
215
+ 'body',
216
+ ],
217
+ filter: ['filterSomething'],
218
+ executeClientScripts: true,
219
+ };
220
+
221
+ expect(result).to.deep.equal(expectedResult);
222
+ });
223
+ });
224
+ });
@@ -0,0 +1,60 @@
1
+ export default class Service {
2
+ documents = new Map();
3
+
4
+ constructor({ id, name }) {
5
+ this.id = id;
6
+ this.name = name;
7
+ }
8
+
9
+ getDocumentDeclaration(documentType, date) {
10
+ if (!this.documents[documentType]) {
11
+ return null;
12
+ }
13
+
14
+ const { latest: currentlyValidDocumentDeclaration, history } = this.documents[documentType];
15
+
16
+ if (!date) {
17
+ return currentlyValidDocumentDeclaration;
18
+ }
19
+
20
+ return (
21
+ history?.find(entry => new Date(date) <= new Date(entry.validUntil))
22
+ || currentlyValidDocumentDeclaration
23
+ );
24
+ }
25
+
26
+ getDocumentTypes() {
27
+ return Object.keys(this.documents);
28
+ }
29
+
30
+ addDocumentDeclaration(document) {
31
+ if (!document.service) {
32
+ document.service = this;
33
+ }
34
+
35
+ this.documents[document.type] = this.documents[document.type] || {};
36
+
37
+ if (!document.validUntil) {
38
+ this.documents[document.type].latest = document;
39
+
40
+ return;
41
+ }
42
+
43
+ this.documents[document.type].history = this.documents[document.type].history || [];
44
+ this.documents[document.type].history.push(document);
45
+ this.documents[document.type].history.sort((a, b) => new Date(a.validUntil) - new Date(b.validUntil));
46
+ }
47
+
48
+ getHistoryDates(documentType) {
49
+ return this.documents[documentType].history.map(entry => entry.validUntil);
50
+ }
51
+
52
+ getNumberOfDocuments() {
53
+ return this.getDocumentTypes().length;
54
+ }
55
+
56
+ hasHistory() {
57
+ // If a service is loaded without its history it could return false even if a history declaration file exists.
58
+ return Boolean(Object.keys(this.documents).find(documentType => this.documents[documentType].history));
59
+ }
60
+ }
@@ -0,0 +1,164 @@
1
+ import chai from 'chai';
2
+
3
+ import DocumentDeclaration from './documentDeclaration.js';
4
+ import Service from './service.js';
5
+
6
+ const { expect } = chai;
7
+
8
+ describe('Service', () => {
9
+ let subject;
10
+ const DOCUMENT_TYPE = 'Terms of Service';
11
+
12
+ describe('#addDocumentDeclaration', () => {
13
+ let documentDeclaration;
14
+
15
+ before(async () => {
16
+ documentDeclaration = new DocumentDeclaration({
17
+ type: DOCUMENT_TYPE,
18
+ service: subject,
19
+ pages: [{
20
+ location: 'https://www.service.example/tos',
21
+ contentSelectors: 'body',
22
+ }],
23
+ });
24
+ });
25
+
26
+ context('when document declaration has no validity date', () => {
27
+ before(async () => {
28
+ subject = new Service({ id: 'serviceID', name: 'serviceName' });
29
+ subject.addDocumentDeclaration(documentDeclaration);
30
+ });
31
+
32
+ it('adds the document as the last valid document declaration', async () => {
33
+ expect(subject.getDocumentDeclaration(DOCUMENT_TYPE)).to.deep.eql(documentDeclaration);
34
+ });
35
+ });
36
+
37
+ context('when document declaration has a validity date', () => {
38
+ let expiredDocumentDeclaration;
39
+ const VALIDITY_DATE = new Date('2020-07-22T11:30:21.000Z');
40
+
41
+ before(async () => {
42
+ subject = new Service({ id: 'serviceID', name: 'serviceName' });
43
+ expiredDocumentDeclaration = new DocumentDeclaration({
44
+ type: 'Terms of Service',
45
+ service: subject,
46
+ validUntil: VALIDITY_DATE,
47
+ pages: [{
48
+ location: 'https://www.service.example/terms',
49
+ contentSelectors: 'main',
50
+ }],
51
+ });
52
+ subject.addDocumentDeclaration(expiredDocumentDeclaration);
53
+ subject.addDocumentDeclaration(documentDeclaration);
54
+ });
55
+
56
+ it('adds the document with the proper validity date', async () => {
57
+ expect(subject.getDocumentDeclaration(DOCUMENT_TYPE, VALIDITY_DATE)).to.deep.eql(expiredDocumentDeclaration);
58
+ });
59
+ });
60
+ });
61
+
62
+ describe('#getDocumentDeclaration', () => {
63
+ let subject;
64
+
65
+ const lastDeclaration = new DocumentDeclaration({
66
+ type: 'Terms of Service',
67
+ location: 'https://www.service.example/tos',
68
+ contentSelectors: 'body',
69
+ });
70
+
71
+ context('when there is no history', () => {
72
+ before(async () => {
73
+ subject = new Service({ id: 'serviceID', name: 'serviceName' });
74
+ subject.addDocumentDeclaration(lastDeclaration);
75
+ });
76
+
77
+ context('without given date', () => {
78
+ it('returns the last document declaration', async () => {
79
+ expect(subject.getDocumentDeclaration(DOCUMENT_TYPE)).to.eql(lastDeclaration);
80
+ });
81
+ });
82
+
83
+ context('with a date', () => {
84
+ it('returns the last document declaration', async () => {
85
+ expect(subject.getDocumentDeclaration(DOCUMENT_TYPE, '2020-08-21T11:30:21.000Z')).to.eql(lastDeclaration);
86
+ });
87
+ });
88
+ });
89
+
90
+ context('when the document has a history', () => {
91
+ const firstDeclaration = new DocumentDeclaration({
92
+ type: 'Terms of Service',
93
+ location: 'https://www.service.example/terms',
94
+ contentSelectors: 'main',
95
+ validUntil: '2020-07-22T11:30:21.000Z',
96
+ });
97
+
98
+ const secondDeclaration = new DocumentDeclaration({
99
+ type: 'Terms of Service',
100
+ location: 'https://www.service.example/terms-of-service',
101
+ contentSelectors: 'main',
102
+ validUntil: '2020-08-22T11:30:21.000Z',
103
+ });
104
+
105
+ before(async () => {
106
+ subject = new Service({ id: 'serviceID', name: 'serviceName' });
107
+ subject.addDocumentDeclaration(lastDeclaration);
108
+ subject.addDocumentDeclaration(firstDeclaration);
109
+ subject.addDocumentDeclaration(secondDeclaration);
110
+ });
111
+
112
+ context('without given date', () => {
113
+ it('returns the last document declaration', async () => {
114
+ expect(subject.getDocumentDeclaration(DOCUMENT_TYPE)).to.eql(lastDeclaration);
115
+ });
116
+ });
117
+
118
+ context('with a date', () => {
119
+ it('returns the document declaration according to the given date', async () => {
120
+ expect(subject.getDocumentDeclaration(DOCUMENT_TYPE, '2020-08-21T11:30:21.000Z')).to.eql(secondDeclaration);
121
+ });
122
+
123
+ context('strictly equal to a document declaration validity date', () => {
124
+ it('returns the document declaration with the validity date equal to the given date', async () => {
125
+ expect(subject.getDocumentDeclaration(DOCUMENT_TYPE, secondDeclaration.validUntil)).to.eql(secondDeclaration);
126
+ });
127
+ });
128
+ });
129
+ });
130
+ });
131
+
132
+ describe('#getDocumentTypes', () => {
133
+ let subject;
134
+ let termsOfServiceDeclaration;
135
+ let privacyPolicyDeclaration;
136
+
137
+ before(async () => {
138
+ subject = new Service({ id: 'serviceID', name: 'serviceName' });
139
+
140
+ termsOfServiceDeclaration = new DocumentDeclaration({
141
+ type: 'Terms of Service',
142
+ location: 'https://www.service.example/tos',
143
+ contentSelectors: 'body',
144
+ });
145
+
146
+ privacyPolicyDeclaration = new DocumentDeclaration({
147
+ type: 'Privacy Policy',
148
+ location: 'https://www.service.example/terms',
149
+ contentSelectors: 'main',
150
+ validUntil: '2020-07-22T11:30:21.000Z',
151
+ });
152
+
153
+ subject.addDocumentDeclaration(termsOfServiceDeclaration);
154
+ subject.addDocumentDeclaration(privacyPolicyDeclaration);
155
+ });
156
+
157
+ it('returns the service document types', async () => {
158
+ expect(subject.getDocumentTypes()).to.have.members([
159
+ termsOfServiceDeclaration.type,
160
+ privacyPolicyDeclaration.type,
161
+ ]);
162
+ });
163
+ });
164
+ });
package/src/exports.js ADDED
@@ -0,0 +1,3 @@
1
+ export { default as pageDeclaration } from './archivist/services/pageDeclaration.js';
2
+ export { default as filter } from './archivist/filter/exports.js';
3
+ export { default as fetch } from './archivist/fetcher/exports.js';
package/src/index.js ADDED
@@ -0,0 +1,59 @@
1
+ import config from 'config';
2
+ import cron from 'croner';
3
+
4
+ import Archivist from './archivist/index.js';
5
+ import logger from './logger/index.js';
6
+ import Notifier from './notifier/index.js';
7
+ import Tracker from './tracker/index.js';
8
+
9
+ export default async function track({ services = [], documentTypes, refilterOnly, schedule }) {
10
+ const archivist = new Archivist({ recorderConfig: config.get('recorder') });
11
+
12
+ archivist.attach(logger);
13
+
14
+ await archivist.initialize();
15
+
16
+ logger.info('Start Open Terms Archive\n');
17
+
18
+ let serviceIds;
19
+
20
+ if (services.length) {
21
+ serviceIds = services.filter(serviceId => {
22
+ const isServiceDeclared = archivist.serviceDeclarations[serviceId];
23
+
24
+ if (!isServiceDeclared) {
25
+ logger.warn(`Parameter "${serviceId}" was interpreted as a service ID to update, but no matching declaration was found; it will be ignored`);
26
+ }
27
+
28
+ return isServiceDeclared;
29
+ });
30
+ }
31
+
32
+ await archivist.refilterAndRecord(serviceIds, documentTypes);
33
+
34
+ if (refilterOnly) {
35
+ return;
36
+ }
37
+
38
+ if (process.env.NODE_ENV === 'production') {
39
+ archivist.attach(new Notifier(archivist.serviceDeclarations));
40
+ }
41
+
42
+ if (process.env.GITHUB_TOKEN) {
43
+ const tracker = new Tracker(config.get('tracker'));
44
+
45
+ await tracker.initialize();
46
+ archivist.attach(tracker);
47
+ }
48
+
49
+ await archivist.trackChanges(serviceIds, documentTypes);
50
+
51
+ if (!schedule) {
52
+ return;
53
+ }
54
+
55
+ logger.info('The scheduler is running…');
56
+ logger.info('Documents will be tracked every six hours starting at half past midnight');
57
+
58
+ cron('30 */6 * * *', () => archivist.trackChanges(serviceIds, documentTypes));
59
+ }
@@ -0,0 +1 @@
1
+ This module is intended to be considered as a simple consumer of the Archivist application's events API and therefore to be extracted from this repository. It is still in this repository to facilitate quick iterations but is expected to be extracted as soon as the Open Terms Archive app is stabilized.
@@ -0,0 +1,131 @@
1
+ import os from 'os';
2
+
3
+ import config from 'config';
4
+ import dotenv from 'dotenv';
5
+ import winston from 'winston';
6
+ import 'winston-mail';
7
+
8
+ dotenv.config();
9
+ const { combine, timestamp, printf, colorize } = winston.format;
10
+
11
+ const alignedWithColorsAndTime = combine(
12
+ colorize(),
13
+ timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }),
14
+ printf(({ level, message, timestamp, serviceId, type, pageId }) => {
15
+ let prefix = '';
16
+
17
+ if (serviceId && type) {
18
+ prefix = `${serviceId} — ${type}`;
19
+ }
20
+
21
+ if (pageId) {
22
+ prefix = `${prefix}:${pageId}`;
23
+ }
24
+
25
+ if (prefix.length > 75) {
26
+ prefix = `${prefix.substring(0, 74)}…`;
27
+ }
28
+
29
+ return `${timestamp} ${level.padEnd(15)} ${prefix.padEnd(75)} ${message}`;
30
+ }),
31
+ );
32
+
33
+ const consoleTransport = new winston.transports.Console();
34
+
35
+ const transports = [consoleTransport];
36
+
37
+ if (config.get('logger.sendMailOnError')) {
38
+ const mailerOptions = {
39
+ to: config.get('logger.sendMailOnError.to'),
40
+ from: config.get('logger.sendMailOnError.from'),
41
+ host: config.get('logger.smtp.host'),
42
+ username: config.get('logger.smtp.username'),
43
+ password: process.env.SMTP_PASSWORD,
44
+ ssl: true,
45
+ timeout: 30 * 1000,
46
+ formatter: args => args[Object.getOwnPropertySymbols(args)[1]], // Returns the full error message, the same visible in the console. It is referenced in the argument object with a Symbol of which we do not have the reference but we know it is the second one.
47
+ exitOnError: true,
48
+ };
49
+
50
+ transports.push(new winston.transports.Mail({
51
+ ...mailerOptions,
52
+ level: 'error',
53
+ subject: `[OTA] Error Report — ${os.hostname()}`,
54
+ }));
55
+
56
+ if (config.get('logger.sendMailOnError.sendWarnings')) {
57
+ transports.push(new winston.transports.Mail({
58
+ ...mailerOptions,
59
+ level: 'warn',
60
+ subject: `[OTA] Inaccessible content — ${os.hostname()}`,
61
+ }));
62
+ }
63
+ }
64
+
65
+ let recordedSnapshotsCount;
66
+ let recordedVersionsCount;
67
+
68
+ const logger = winston.createLogger({
69
+ format: alignedWithColorsAndTime,
70
+ transports,
71
+ rejectionHandlers: transports,
72
+ });
73
+
74
+ logger.onFirstSnapshotRecorded = (serviceId, type, pageId, snapshotId) => {
75
+ logger.info({ message: `Recorded first snapshot with id ${snapshotId}`, serviceId, type, pageId });
76
+ recordedSnapshotsCount++;
77
+ };
78
+
79
+ logger.onSnapshotRecorded = (serviceId, type, pageId, snapshotId) => {
80
+ logger.info({ message: `Recorded snapshot with id ${snapshotId}`, serviceId, type, pageId });
81
+ recordedSnapshotsCount++;
82
+ };
83
+
84
+ logger.onSnapshotNotChanged = (serviceId, type, pageId) => {
85
+ logger.info({ message: 'No changes, did not record snapshot', serviceId, type, pageId });
86
+ };
87
+
88
+ logger.onFirstVersionRecorded = (serviceId, type, versionId) => {
89
+ logger.info({ message: `Recorded first version with id ${versionId}`, serviceId, type });
90
+ recordedVersionsCount++;
91
+ };
92
+
93
+ logger.onVersionRecorded = (serviceId, type, versionId) => {
94
+ logger.info({ message: `Recorded version with id ${versionId}`, serviceId, type });
95
+ recordedVersionsCount++;
96
+ };
97
+
98
+ logger.onVersionNotChanged = (serviceId, type) => {
99
+ logger.info({ message: 'No changes after filtering, did not record version', serviceId, type });
100
+ };
101
+
102
+ logger.onRefilteringStarted = (numberOfServices, numberOfDocuments) => {
103
+ logger.info(`Examining ${numberOfDocuments} documents from ${numberOfServices} services for refiltering…`);
104
+ recordedVersionsCount = 0;
105
+ };
106
+
107
+ logger.onRefilteringCompleted = (numberOfServices, numberOfDocuments) => {
108
+ logger.info(`Examined ${numberOfDocuments} documents from ${numberOfServices} services for refiltering`);
109
+ logger.info(`Recorded ${recordedVersionsCount} new versions\n`);
110
+ };
111
+
112
+ logger.onTrackingStarted = (numberOfServices, numberOfDocuments) => {
113
+ logger.info(`Tracking changes of ${numberOfDocuments} documents from ${numberOfServices} services…`);
114
+ recordedSnapshotsCount = 0;
115
+ recordedVersionsCount = 0;
116
+ };
117
+
118
+ logger.onTrackingCompleted = (numberOfServices, numberOfDocuments) => {
119
+ logger.info(`Tracked changes of ${numberOfDocuments} documents from ${numberOfServices} services`);
120
+ logger.info(`Recorded ${recordedSnapshotsCount} new snapshots and ${recordedVersionsCount} new versions\n`);
121
+ };
122
+
123
+ logger.onInaccessibleContent = ({ message }, serviceId, type) => {
124
+ logger.warn({ message, serviceId, type });
125
+ };
126
+
127
+ logger.onError = (error, serviceId, type, pageId) => {
128
+ logger.error({ message: error.stack, serviceId, type, pageId });
129
+ };
130
+
131
+ export default logger;
package/src/main.js ADDED
@@ -0,0 +1,18 @@
1
+ import fs from 'fs';
2
+
3
+ import { program } from 'commander';
4
+
5
+ import track from './index.js';
6
+
7
+ const { name, description, version } = JSON.parse(fs.readFileSync(new URL('../package.json', import.meta.url)).toString());
8
+
9
+ program
10
+ .name(name)
11
+ .description(description)
12
+ .version(version)
13
+ .option('-s, --services [serviceId...]', 'service IDs of services to handle')
14
+ .option('-d, --documentTypes [documentType...]', 'document types to handle')
15
+ .option('-r, --refilter-only', 'only refilter exisiting snapshots with last declarations and engine\'s updates')
16
+ .option('--schedule', 'schedule automatic document tracking');
17
+
18
+ track(program.parse(process.argv).opts());
@@ -0,0 +1 @@
1
+ This module is intended to be considered as a simple consumer of the Archivist application's events API and therefore to be extracted from this repository. It is still in this repository to facilitate quick iterations but is expected to be extracted as soon as the Open Terms Archive app is stabilized.