@opentermsarchive/engine 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. package/.env.example +3 -0
  2. package/.eslintrc.yaml +116 -0
  3. package/.github/workflows/deploy.yml +50 -0
  4. package/.github/workflows/release.yml +64 -0
  5. package/.github/workflows/test.yml +77 -0
  6. package/CHANGELOG.md +14 -0
  7. package/CODE_OF_CONDUCT.md +128 -0
  8. package/CONTRIBUTING.md +143 -0
  9. package/LICENSE +153 -0
  10. package/MIGRATING.md +42 -0
  11. package/README.fr.md +110 -0
  12. package/README.md +438 -0
  13. package/Vagrantfile +38 -0
  14. package/ansible.cfg +13 -0
  15. package/bin/.env.js +1 -0
  16. package/bin/lint-declarations.js +31 -0
  17. package/bin/track.js +26 -0
  18. package/bin/validate-declarations.js +68 -0
  19. package/config/ci.json +5 -0
  20. package/config/contrib.json +35 -0
  21. package/config/dating.json +37 -0
  22. package/config/default.json +71 -0
  23. package/config/france.json +40 -0
  24. package/config/p2b-compliance.json +40 -0
  25. package/config/pga.json +40 -0
  26. package/config/production.json +27 -0
  27. package/config/test.json +49 -0
  28. package/config/vagrant.json +24 -0
  29. package/decision-records/0001-service-name-and-id.md +73 -0
  30. package/decision-records/0002-service-history.md +212 -0
  31. package/decision-records/0003-snapshots-database.md +123 -0
  32. package/ops/README.md +280 -0
  33. package/ops/app.yml +5 -0
  34. package/ops/infra.yml +6 -0
  35. package/ops/inventories/dev.yml +7 -0
  36. package/ops/inventories/production.yml +27 -0
  37. package/ops/roles/infra/defaults/main.yml +2 -0
  38. package/ops/roles/infra/files/.gitconfig +3 -0
  39. package/ops/roles/infra/files/mongod.conf +18 -0
  40. package/ops/roles/infra/files/ota-bot-key.private_key +26 -0
  41. package/ops/roles/infra/tasks/main.yml +78 -0
  42. package/ops/roles/infra/tasks/mongo.yml +40 -0
  43. package/ops/roles/infra/templates/ssh_config.j2 +5 -0
  44. package/ops/roles/ota/defaults/main.yml +14 -0
  45. package/ops/roles/ota/files/.env +21 -0
  46. package/ops/roles/ota/tasks/database.yml +65 -0
  47. package/ops/roles/ota/tasks/main.yml +110 -0
  48. package/ops/site.yml +6 -0
  49. package/package.json +101 -0
  50. package/pm2.config.cjs +20 -0
  51. package/scripts/dataset/README.md +37 -0
  52. package/scripts/dataset/assets/LICENSE +540 -0
  53. package/scripts/dataset/assets/README.template.js +65 -0
  54. package/scripts/dataset/export/index.js +106 -0
  55. package/scripts/dataset/export/index.test.js +155 -0
  56. package/scripts/dataset/export/test/fixtures/dataset/LICENSE +540 -0
  57. package/scripts/dataset/export/test/fixtures/dataset/README.md +40 -0
  58. package/scripts/dataset/export/test/fixtures/dataset/ServiceA/Terms of Service/2021-01-01T11-27-00Z.md +1 -0
  59. package/scripts/dataset/export/test/fixtures/dataset/ServiceA/Terms of Service/2021-01-11T11-32-47Z.md +1 -0
  60. package/scripts/dataset/export/test/fixtures/dataset/ServiceB/Privacy Policy/2022-01-01T12-12-24Z.md +1 -0
  61. package/scripts/dataset/export/test/fixtures/dataset/ServiceB/Terms of Service/2022-01-06T11-32-47Z.md +1 -0
  62. package/scripts/dataset/index.js +40 -0
  63. package/scripts/dataset/logger/index.js +17 -0
  64. package/scripts/dataset/main.js +25 -0
  65. package/scripts/dataset/publish/index.js +39 -0
  66. package/scripts/declarations/lint/index.js +36 -0
  67. package/scripts/declarations/utils/index.js +81 -0
  68. package/scripts/declarations/validate/definitions.js +63 -0
  69. package/scripts/declarations/validate/index.mocha.js +262 -0
  70. package/scripts/declarations/validate/service.history.schema.js +86 -0
  71. package/scripts/declarations/validate/service.schema.js +91 -0
  72. package/scripts/history/logger/index.js +39 -0
  73. package/scripts/history/migrate-services.js +212 -0
  74. package/scripts/history/update-to-full-hash.js +61 -0
  75. package/scripts/history/utils/index.js +23 -0
  76. package/scripts/import/README.md +59 -0
  77. package/scripts/import/config/import.json +12 -0
  78. package/scripts/import/index.js +224 -0
  79. package/scripts/import/loadCommits.js +66 -0
  80. package/scripts/import/logger/index.js +43 -0
  81. package/scripts/rewrite/README.md +131 -0
  82. package/scripts/rewrite/config/rewrite-snapshots.json +32 -0
  83. package/scripts/rewrite/config/rewrite-versions.json +32 -0
  84. package/scripts/rewrite/initializer/files/license +428 -0
  85. package/scripts/rewrite/initializer/files/readme.md +8 -0
  86. package/scripts/rewrite/initializer/index.js +44 -0
  87. package/scripts/rewrite/rewrite-snapshots.js +108 -0
  88. package/scripts/rewrite/rewrite-versions.js +160 -0
  89. package/scripts/rewrite/utils.js +33 -0
  90. package/scripts/utils/renamer/README.md +49 -0
  91. package/scripts/utils/renamer/index.js +45 -0
  92. package/scripts/utils/renamer/rules/documentTypes.json +25 -0
  93. package/scripts/utils/renamer/rules/documentTypesByService.json +170 -0
  94. package/scripts/utils/renamer/rules/serviceNames.json +92 -0
  95. package/src/archivist/errors.js +9 -0
  96. package/src/archivist/fetcher/errors.js +6 -0
  97. package/src/archivist/fetcher/exports.js +18 -0
  98. package/src/archivist/fetcher/fullDomFetcher.js +84 -0
  99. package/src/archivist/fetcher/htmlOnlyFetcher.js +62 -0
  100. package/src/archivist/fetcher/index.js +35 -0
  101. package/src/archivist/fetcher/index.test.js +239 -0
  102. package/src/archivist/filter/exports.js +3 -0
  103. package/src/archivist/filter/index.js +178 -0
  104. package/src/archivist/filter/index.test.js +561 -0
  105. package/src/archivist/index.js +276 -0
  106. package/src/archivist/index.test.js +600 -0
  107. package/src/archivist/recorder/index.js +77 -0
  108. package/src/archivist/recorder/index.test.js +463 -0
  109. package/src/archivist/recorder/record.js +35 -0
  110. package/src/archivist/recorder/record.test.js +91 -0
  111. package/src/archivist/recorder/repositories/factory.js +23 -0
  112. package/src/archivist/recorder/repositories/git/dataMapper.js +83 -0
  113. package/src/archivist/recorder/repositories/git/git.js +122 -0
  114. package/src/archivist/recorder/repositories/git/git.test.js +86 -0
  115. package/src/archivist/recorder/repositories/git/index.js +182 -0
  116. package/src/archivist/recorder/repositories/git/index.test.js +714 -0
  117. package/src/archivist/recorder/repositories/interface.js +108 -0
  118. package/src/archivist/recorder/repositories/mongo/dataMapper.js +32 -0
  119. package/src/archivist/recorder/repositories/mongo/index.js +121 -0
  120. package/src/archivist/recorder/repositories/mongo/index.test.js +721 -0
  121. package/src/archivist/services/documentDeclaration.js +26 -0
  122. package/src/archivist/services/documentDeclaration.test.js +85 -0
  123. package/src/archivist/services/documentTypes.json +386 -0
  124. package/src/archivist/services/index.js +255 -0
  125. package/src/archivist/services/index.test.js +327 -0
  126. package/src/archivist/services/pageDeclaration.js +51 -0
  127. package/src/archivist/services/pageDeclaration.test.js +224 -0
  128. package/src/archivist/services/service.js +60 -0
  129. package/src/archivist/services/service.test.js +164 -0
  130. package/src/exports.js +3 -0
  131. package/src/index.js +59 -0
  132. package/src/logger/README.md +1 -0
  133. package/src/logger/index.js +131 -0
  134. package/src/main.js +18 -0
  135. package/src/notifier/README.md +1 -0
  136. package/src/notifier/index.js +150 -0
  137. package/src/tracker/README.md +1 -0
  138. package/src/tracker/index.js +215 -0
  139. package/test/fixtures/service_A.js +22 -0
  140. package/test/fixtures/service_A_terms.md +10 -0
  141. package/test/fixtures/service_A_terms_snapshot.html +14 -0
  142. package/test/fixtures/service_B.js +22 -0
  143. package/test/fixtures/service_with_declaration_history.js +65 -0
  144. package/test/fixtures/service_with_filters_history.js +155 -0
  145. package/test/fixtures/service_with_history.js +188 -0
  146. package/test/fixtures/service_with_multipage_document.js +100 -0
  147. package/test/fixtures/service_without_history.js +31 -0
  148. package/test/fixtures/services.js +19 -0
  149. package/test/fixtures/terms.pdf +0 -0
  150. package/test/fixtures/termsFromPDF.md +25 -0
  151. package/test/fixtures/termsModified.pdf +0 -0
  152. package/test/services/service_A.json +9 -0
  153. package/test/services/service_B.json +9 -0
  154. package/test/services/service_with_declaration_history.filters.js +7 -0
  155. package/test/services/service_with_declaration_history.history.json +17 -0
  156. package/test/services/service_with_declaration_history.json +13 -0
  157. package/test/services/service_with_filters_history.filters.history.js +29 -0
  158. package/test/services/service_with_filters_history.filters.js +7 -0
  159. package/test/services/service_with_filters_history.json +13 -0
  160. package/test/services/service_with_history.filters.history.js +29 -0
  161. package/test/services/service_with_history.filters.js +7 -0
  162. package/test/services/service_with_history.history.json +26 -0
  163. package/test/services/service_with_history.json +17 -0
  164. package/test/services/service_with_multipage_document.filters.js +7 -0
  165. package/test/services/service_with_multipage_document.history.json +37 -0
  166. package/test/services/service_with_multipage_document.json +28 -0
  167. package/test/services/service_without_history.filters.js +7 -0
  168. package/test/services/service_without_history.json +13 -0
@@ -0,0 +1,37 @@
1
+ {
2
+ "services": {
3
+ "repository": "https://github.com/OpenTermsArchive/dating-declarations.git"
4
+ },
5
+ "recorder": {
6
+ "versions": {
7
+ "storage": {
8
+ "git": {
9
+ "snapshotIdentiferTemplate": "https://github.com/OpenTermsArchive/dating-snapshots/commit/%SNAPSHOT_ID",
10
+ "repository": "git@github.com:OpenTermsArchive/dating-versions.git"
11
+ }
12
+ }
13
+ },
14
+ "snapshots": {
15
+ "storage": {
16
+ "git": {
17
+ "repository": "git@github.com:OpenTermsArchive/dating-snapshots.git"
18
+ }
19
+ }
20
+ }
21
+ },
22
+ "notifier": {
23
+ "sendInBlue": {
24
+ "updatesListId": 594,
25
+ "updateTemplateId": 40
26
+ }
27
+ },
28
+ "tracker": {
29
+ "githubIssues": {
30
+ "repository": "OpenTermsArchive/dating-declarations"
31
+ }
32
+ },
33
+ "dataset": {
34
+ "title": "dating",
35
+ "versionsRepositoryURL": "https://github.com/OpenTermsArchive/dating-versions"
36
+ }
37
+ }
@@ -0,0 +1,71 @@
1
+ {
2
+ "services": {
3
+ "declarationsPath": "../declarations/declarations"
4
+ },
5
+ "recorder": {
6
+ "versions": {
7
+ "storage": {
8
+ "type": "git",
9
+ "git": {
10
+ "path": "./data/versions",
11
+ "publish": false,
12
+ "snapshotIdentiferTemplate": "./data/snapshots/%SNAPSHOT_ID",
13
+ "author": {
14
+ "name": "Open Terms Archive Bot",
15
+ "email": "bot@opentermsarchive.org"
16
+ }
17
+ }
18
+ }
19
+ },
20
+ "snapshots": {
21
+ "storage": {
22
+ "type": "git",
23
+ "git": {
24
+ "path": "./data/snapshots",
25
+ "publish": false,
26
+ "author": {
27
+ "name": "Open Terms Archive Bot",
28
+ "email": "bot@opentermsarchive.org"
29
+ }
30
+ },
31
+ "mongo": {
32
+ "connectionURI": "mongodb://127.0.0.1:27017",
33
+ "database": "open-terms-archive",
34
+ "collection": "snapshots"
35
+ }
36
+ }
37
+ }
38
+ },
39
+ "fetcher": {
40
+ "waitForElementsTimeout": 10000,
41
+ "navigationTimeout": 30000,
42
+ "language": "en"
43
+ },
44
+ "logger": {
45
+ "smtp": {
46
+ "host": "smtp-relay.sendinblue.com",
47
+ "username": "admin@opentermsarchive.org"
48
+ },
49
+ "sendMailOnError": false
50
+ },
51
+ "notifier": {
52
+ "sendInBlue": {
53
+ "updatesListId": 850,
54
+ "updateTemplateId": 7
55
+ }
56
+ },
57
+ "tracker": {
58
+ "githubIssues": {
59
+ "repository": "OpenTermsArchive/sandbox",
60
+ "label": {
61
+ "name": "bot-report",
62
+ "color": "FEF2C0",
63
+ "description": "Automatically created by Open Terms Archive when a document cannot be fetched"
64
+ }
65
+ }
66
+ },
67
+ "dataset": {
68
+ "title": "sandbox",
69
+ "versionsRepositoryURL": "https://github.com/OpenTermsArchive/sandbox"
70
+ }
71
+ }
@@ -0,0 +1,40 @@
1
+ {
2
+ "services": {
3
+ "repository": "https://github.com/OpenTermsArchive/france-declarations.git"
4
+ },
5
+ "recorder": {
6
+ "versions": {
7
+ "storage": {
8
+ "git": {
9
+ "snapshotIdentiferTemplate": "https://github.com/OpenTermsArchive/france-snapshots/commit/%SNAPSHOT_ID",
10
+ "repository": "git@github.com:OpenTermsArchive/france-versions.git"
11
+ }
12
+ }
13
+ },
14
+ "snapshots": {
15
+ "storage": {
16
+ "git": {
17
+ "repository": "git@github.com:OpenTermsArchive/france-snapshots.git"
18
+ }
19
+ }
20
+ }
21
+ },
22
+ "fetcher": {
23
+ "language": "fr"
24
+ },
25
+ "notifier": {
26
+ "sendInBlue": {
27
+ "updatesListId": 595,
28
+ "updateTemplateId": 42
29
+ }
30
+ },
31
+ "tracker": {
32
+ "githubIssues": {
33
+ "repository": "OpenTermsArchive/france-declarations"
34
+ }
35
+ },
36
+ "dataset": {
37
+ "title": "france",
38
+ "versionsRepositoryURL": "https://github.com/OpenTermsArchive/france-versions"
39
+ }
40
+ }
@@ -0,0 +1,40 @@
1
+ {
2
+ "services": {
3
+ "repository": "https://github.com/OpenTermsArchive/p2b-compliance-declarations.git"
4
+ },
5
+ "recorder": {
6
+ "versions": {
7
+ "storage": {
8
+ "git": {
9
+ "snapshotIdentiferTemplate": "https://github.com/OpenTermsArchive/p2b-compliance-snapshots/commit/%SNAPSHOT_ID",
10
+ "repository": "git@github.com:OpenTermsArchive/p2b-compliance-versions.git"
11
+ }
12
+ }
13
+ },
14
+ "snapshots": {
15
+ "storage": {
16
+ "git": {
17
+ "repository": "git@github.com:OpenTermsArchive/p2b-compliance-snapshots.git"
18
+ }
19
+ }
20
+ }
21
+ },
22
+ "fetcher": {
23
+ "language": "en"
24
+ },
25
+ "notifier": {
26
+ "sendInBlue": {
27
+ "updatesListId": 861,
28
+ "updateTemplateId": 56
29
+ }
30
+ },
31
+ "tracker": {
32
+ "githubIssues": {
33
+ "repository": "OpenTermsArchive/p2b-compliance-declarations"
34
+ }
35
+ },
36
+ "dataset": {
37
+ "title": "p2b-compliance",
38
+ "versionsRepositoryURL": "https://github.com/OpenTermsArchive/p2b-compliance-versions"
39
+ }
40
+ }
@@ -0,0 +1,40 @@
1
+ {
2
+ "services": {
3
+ "repository": "https://github.com/OpenTermsArchive/pga-declarations.git"
4
+ },
5
+ "recorder": {
6
+ "versions": {
7
+ "storage": {
8
+ "git": {
9
+ "snapshotIdentiferTemplate": "https://github.com/OpenTermsArchive/pga-snapshots/commit/%SNAPSHOT_ID",
10
+ "repository": "git@github.com:OpenTermsArchive/pga-versions.git"
11
+ }
12
+ }
13
+ },
14
+ "snapshots": {
15
+ "storage": {
16
+ "git": {
17
+ "repository": "git@github.com:OpenTermsArchive/pga-snapshots.git"
18
+ }
19
+ }
20
+ }
21
+ },
22
+ "fetcher": {
23
+ "language": "en"
24
+ },
25
+ "notifier": {
26
+ "sendInBlue": {
27
+ "updatesListId": 855,
28
+ "updateTemplateId": 41
29
+ }
30
+ },
31
+ "tracker": {
32
+ "githubIssues": {
33
+ "repository": "OpenTermsArchive/pga-declarations"
34
+ }
35
+ },
36
+ "dataset": {
37
+ "title": "pga",
38
+ "versionsRepositoryURL": "https://github.com/OpenTermsArchive/pga-versions"
39
+ }
40
+ }
@@ -0,0 +1,27 @@
1
+ {
2
+ "recorder": {
3
+ "versions": {
4
+ "storage": {
5
+ "git": {
6
+ "path": "../versions",
7
+ "publish": true
8
+ }
9
+ }
10
+ },
11
+ "snapshots": {
12
+ "storage": {
13
+ "git": {
14
+ "path": "../snapshots",
15
+ "publish": true
16
+ }
17
+ }
18
+ }
19
+ },
20
+ "logger": {
21
+ "sendMailOnError": {
22
+ "to": "admin@opentermsarchive.org",
23
+ "from": "noreply@opentermsarchive.org",
24
+ "sendWarnings": false
25
+ }
26
+ }
27
+ }
@@ -0,0 +1,49 @@
1
+ {
2
+ "services": {
3
+ "declarationsPath": "./test/services"
4
+ },
5
+ "recorder": {
6
+ "versions": {
7
+ "storage": {
8
+ "git": {
9
+ "path": "./test/data/versions",
10
+ "publish": false,
11
+ "snapshotIdentiferTemplate": "https://github.com/ambanum/OpenTermsArchive-snapshots/commit/%SNAPSHOT_ID",
12
+ "author": {
13
+ "name": "Open Terms Archive Testing Bot",
14
+ "email": "bot@opentermsarchive.org"
15
+ }
16
+ },
17
+ "mongo": {
18
+ "connectionURI": "mongodb://127.0.0.1:27017",
19
+ "database": "open-terms-archive-test",
20
+ "collection": "versions"
21
+ }
22
+ }
23
+ },
24
+ "snapshots": {
25
+ "storage": {
26
+ "git": {
27
+ "path": "./test/data/snapshots",
28
+ "publish": false,
29
+ "author": {
30
+ "name": "Open Terms Archive Testing Bot",
31
+ "email": "bot@opentermsarchive.org"
32
+ }
33
+ },
34
+ "mongo": {
35
+ "connectionURI": "mongodb://127.0.0.1:27017",
36
+ "database": "open-terms-archive-test",
37
+ "collection": "snapshots"
38
+ }
39
+ }
40
+ }
41
+ },
42
+ "fetcher": {
43
+ "waitForElementsTimeout": 1000
44
+ },
45
+ "dataset": {
46
+ "title": "sandbox",
47
+ "versionsRepositoryURL": "https://github.com/OpenTermsArchive/sandbox"
48
+ }
49
+ }
@@ -0,0 +1,24 @@
1
+ {
2
+ "services": {
3
+ "repository": "https://github.com/OpenTermsArchive/contrib-declarations.git"
4
+ },
5
+ "recorder": {
6
+ "versions": {
7
+ "storage": {
8
+ "git": {
9
+ "repository": "git@github.com:OpenTermsArchive/sandbox-versions.git"
10
+ }
11
+ }
12
+ },
13
+ "snapshots": {
14
+ "storage": {
15
+ "git": {
16
+ "repository": "git@github.com:OpenTermsArchive/sandbox-snapshots.git"
17
+ }
18
+ }
19
+ }
20
+ },
21
+ "logger": {
22
+ "sendMailOnError": false
23
+ }
24
+ }
@@ -0,0 +1,73 @@
1
+ # Choosing service name and service ID
2
+
3
+ - Date: 2020-10-14
4
+
5
+ ## Context and Problem Statement
6
+
7
+ To scale up from 50 to 5,000 services, we need a clear way for choosing the service name and the service ID.
8
+
9
+ ### We need
10
+
11
+ A name that reflects the common name used by the provider itself, to be exposed in a GUI. This name is currently exposed as the name property in the service declaration.
12
+ An ID of sorts that can be represented in the filesystem. This ID is currently exposed as the filename of the service declaration, without the .json extension.
13
+
14
+ ### Use cases
15
+
16
+ The service name is presented to end users. It should reflect as closely as possible the official service name, so that it can be identified easily.
17
+ The ID is used internally and exposed for analysis. It should be easy to handle with scripts and other tools.
18
+
19
+ ### Constraints for the ID
20
+
21
+ As long as this ID is stored in the filesystem:
22
+
23
+ - No `/` for UNIX.
24
+ - No `\` for Windows.
25
+ - No `:` for APFS and HFS.
26
+ - No case-sensitive duplicates to support case-insensitive filesystems.
27
+ - No more than 255 characters to support transfer over [FAT32](https://en.wikipedia.org/wiki/File_Allocation_Table#FAT32).
28
+
29
+ UTF, spaces and capitals are all supported, even on case-insensitive filesystems.
30
+
31
+ ### However
32
+
33
+ - UTF in filenames can be [a (fixable) problem with Git and HFS+](https://stackoverflow.com/questions/5581857/git-and-the-umlaut-problem-on-mac-os-x).
34
+ - UTF in filenames is by default quoted in Git, leading for example `été.txt` to be displayed as `"\303\251t\303\251.txt"`.
35
+ - Most online services align their brand name with their domain name. Even though UTF is now officially supported in domain names, support is limited and most services, even non-Western, have an official ASCII transliteration used at least in their domain name (e.g. “qq” by Tencent, “rzd.ru” for “РЖД”, “yahoo” for “Yahoo!”).
36
+ - We currently use GitHub as a GUI, so the service ID is presented to the user instead of the service name. The name is used in email notifications.
37
+
38
+ ## Decision Outcome
39
+
40
+ 1. The service name should be the one used by the service itself, no matter the alphabet.
41
+
42
+ - _Example: `туту.ру`_.
43
+
44
+ 2. We don't support non-ASCII characters in service IDs, at least as long as the database is Git and the filesystem, in order to minimise risk. Service IDs are derived from the service name through normalization into ASCII.
45
+
46
+ - _Example: `туту.ру` → `tutu.ru`_.
47
+ - _Example: `historielærer.dk` → `historielaerer.dk`_.
48
+ - _Example: `RTÉ` → `RTE`_.
49
+
50
+ 3. We support punctuation, except characters that have meaning at filesystem level (`:`, `/`, `\`). These are replaced with a dash (`-`).
51
+
52
+ - _Example: `Yahoo!` → `Yahoo!`_.
53
+ - _Example: `Last.fm` → `Last.fm`_.
54
+ - _Example: `re:start` → `re-start`_.
55
+ - _Example: `we://` → `we---`_.
56
+
57
+ 4. We support capitals. Casing is expected to reflect the official service name casing.
58
+
59
+ - _Example: `hi5` → `hi5`_.
60
+ - _Example: `DeviantArt` → `DeviantArt`_.
61
+ - _Example: `LINE` → `LINE`_.
62
+
63
+ 5. We support spaces. Spaces are expected to reflect the official service name spacing.
64
+
65
+ - _Example: `App Store` → `App Store`_.
66
+ - _Example: `DeviantArt` → `DeviantArt`_.
67
+
68
+ 6. We prefix the service name by the provider name when self-references are ambiguous, separated by a space. For example, Facebook refers to their Self-serve Ads service simply as “Ads”, which we cannot use in a shared database. We thus call the service “Facebook Ads”.
69
+
70
+ - _Example: `Ads` (by Facebook) → `Facebook Ads`_.
71
+ - _Example: `Analytics` (by Google) → `Google Analytics`_.
72
+ - _Example: `Firebase` (by Google) → `Firebase`_.
73
+ - _Example: `App Store` (by Apple) → `App Store`_.
@@ -0,0 +1,212 @@
1
+ # Defining a service history system
2
+
3
+ - Date: 2020-11-23
4
+
5
+ ## Context and Problem Statement
6
+
7
+ We need to be able to regenerate versions from snapshots. As documents is aim to change over time (location or filters) we can't rely on the last version of the declaration to regenerate the version from an old snapshot. So we need a system to keep track of declaration changes, that's what we called declarations and filters versioning.
8
+
9
+ ## Solutions considered
10
+
11
+ At this time, we see three solutions which have in common the following rules:
12
+
13
+ - `history` is optional
14
+ - the current valid declaration has no date and should be clearly identifiable
15
+ - the `valid_until` date is an inclusive expiration date. It should be the exact authored date of the last snapshot commit for which the declaration is still valid.
16
+
17
+ ## Option 1: Add an `history` field in service declaration
18
+
19
+ In `services/ASKfm.json`:
20
+
21
+ ```
22
+ {
23
+ "name": "ASKfm",
24
+ "documents": {
25
+ "Terms of Service": {
26
+ "fetch": "https://ask.fm/docs/terms_of_use/?lang=en",
27
+ "select": ".selection",
28
+ "filter": [ "add" ]
29
+ "history": [
30
+ {
31
+ "fetch": "https://ask.fm/docs/terms_of_use/?lang=en",
32
+ "select": "body",
33
+ "filter": [ "add" ]
34
+ "valid_until": "2020-08-24T14:02:39Z"
35
+ },
36
+ {
37
+ "fetch": "https://ask.fm/docs/terms_of_use/?lang=en",
38
+ "select": "body",
39
+ "valid_until": "2020-08-23T14:02:39Z"
40
+ }
41
+ ]
42
+ }
43
+ }
44
+ }
45
+ ```
46
+
47
+ Note: When no historisation is needed the file may have no mention of history.
48
+
49
+ **Pros:**
50
+
51
+ - Everything is in the same file:
52
+ - might prevent to forget to update existing history
53
+ - might help user to know that history is a thing and encourage them to learn about it if they feel the need
54
+ - no (pseudo-)hidden knowledge about history
55
+
56
+ **Cons:**
57
+
58
+ - Apparent complexity can discourage new contributors
59
+ - With time, the file can become huge
60
+
61
+ ## Option 2: Add an `serviceId.history.json` file
62
+
63
+ In `services/ASKfm.json`:
64
+
65
+ ```
66
+ {
67
+ "name": "ASKfm",
68
+ "documents": {
69
+ "Terms of Service": {
70
+ "fetch": "https://ask.fm/docs/terms_of_use/?lang=en",
71
+ "select": ".selection",
72
+ "filter": [ "add" ]
73
+ }
74
+ }
75
+ }
76
+ ```
77
+
78
+ In `services/ASKfm.history.json`:
79
+
80
+ ```
81
+ {
82
+ "name": "ASKfm",
83
+ "documents": {
84
+ "Terms of Service": [
85
+ {
86
+ "fetch": "https://ask.fm/docs/terms_of_use/?lang=en",
87
+ "select": "body",
88
+ "filter": [ "add" ]
89
+ "valid_until": "2020-08-24T14:02:39Z"
90
+ },
91
+ {
92
+ "fetch": "https://ask.fm/docs/terms_of_use/?lang=en",
93
+ "select": "body",
94
+ "valid_until": "2020-08-23T14:02:39Z"
95
+ }
96
+ ]
97
+ }
98
+ }
99
+ ```
100
+
101
+ **Pros:**
102
+
103
+ - Service declaration stay small and simple
104
+ - History file is kept close to the service declaration so users might see them
105
+
106
+ **Cons:**
107
+
108
+ - Make the discovery of history capacities less easy
109
+ - Increase the probability of forgetting to update history file when making a change in the service discovery
110
+
111
+ ## Option 2A
112
+
113
+ Same as option 2, but the history file should only contain the document declarations to avoid divergence on service properties with the one in the original file.
114
+
115
+ In `services/ASKfm.json`, **called the “service declaration”**:
116
+
117
+ ```
118
+ {
119
+ "name": "ASKfm",
120
+ "documents": {
121
+ "Terms of Service": {
122
+ "fetch": "https://ask.fm/docs/terms_of_use/?lang=en",
123
+ "select": ".selection",
124
+ "filter": [ "add" ]
125
+ }
126
+ }
127
+ }
128
+ ```
129
+
130
+ In `services/ASKfm.history.json`, **called the “service history”**:
131
+
132
+ ```
133
+ {
134
+ "Terms of Service": [
135
+ {
136
+ "fetch": "https://ask.fm/docs/terms_of_use/?lang=en",
137
+ "select": "body",
138
+ "filter": [ "add" ]
139
+ "valid_until": "2020-08-24T14:02:39Z"
140
+ },
141
+ {
142
+ "fetch": "https://ask.fm/docs/terms_of_use/?lang=en",
143
+ "select": "body",
144
+ "valid_until": "2020-08-23T14:02:39Z"
145
+ }
146
+ ]
147
+ }
148
+ ```
149
+
150
+ ## Option 3: Add an history service declaration file in `services/history` folder
151
+
152
+ In `services/ASKfm.json`:
153
+
154
+ ```
155
+ {
156
+ "name": "ASKfm",
157
+ "documents": {
158
+ "Terms of Service": {
159
+ "fetch": "https://ask.fm/docs/terms_of_use/?lang=en",
160
+ "select": ".selection",
161
+ "filter": [ "add" ]
162
+ }
163
+ }
164
+ }
165
+ ```
166
+
167
+ In `services/history/ASKfm.json`:
168
+
169
+ ```
170
+ {
171
+ "name": "ASKfm",
172
+ "documents": {
173
+ "Terms of Service": [
174
+ {
175
+ "fetch": "https://ask.fm/docs/terms_of_use/?lang=en",
176
+ "select": "body",
177
+ "filter": [ "add" ]
178
+ "valid_until": "2020-08-24T14:02:39Z"
179
+ },
180
+ {
181
+ "fetch": "https://ask.fm/docs/terms_of_use/?lang=en",
182
+ "select": "body",
183
+ "valid_until": "2020-08-23T14:02:39Z"
184
+ }
185
+ ]
186
+ }
187
+ }
188
+ ```
189
+
190
+ **Pros:**
191
+
192
+ - Service declaration stay small and simple
193
+ - All history updates are reserved to users with the knowledge that might work as gatekeepers
194
+
195
+ **Cons:**
196
+
197
+ - All history updates are reserved to users with the knowledge that might work as gatekeepers :)
198
+ - Need to rely on people with knowledge to keep the history
199
+
200
+ ## Some thoughts
201
+
202
+ ### Community
203
+
204
+ The choice might have implication on the community that will grow around the project.
205
+
206
+ _Option 1_ shows everything to everyone, it might frightened some contributors with some apparent complexity (once there are history in the declaration file), but it might also encourage them to learn about it if they want or feel the need to. All contributors will share the same view and knowledge about the system. This might encourage collaboration between them to learn and improve together.
207
+
208
+ _Option 2_ and _Option 3_ hide the complexity of history management in separate files and only most adventurous contributors will find them by themselves. Contribution to those files will probably be done by specific contributors that will be taught to manage those file. Thus creating two different kind of contributors: those who will stay with the basic service declaration, not knowing that more complex options exist, and those who will have the knowledge of history management whose work might stay in the shadow or work as gatekeeper.
209
+
210
+ ## Decision Outcome
211
+
212
+ [After consulting the community](https://github.com/ambanum/OpenTermsArchive/issues/156), the options 2A is retained as it hide complexity (compared to Option 1) of the history while increasing its discoverability (compared to Option 3) for contributors who might become more “adventurous”.