@opentermsarchive/engine 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +3 -0
- package/.eslintrc.yaml +116 -0
- package/.github/workflows/deploy.yml +50 -0
- package/.github/workflows/release.yml +64 -0
- package/.github/workflows/test.yml +77 -0
- package/CHANGELOG.md +14 -0
- package/CODE_OF_CONDUCT.md +128 -0
- package/CONTRIBUTING.md +143 -0
- package/LICENSE +153 -0
- package/MIGRATING.md +42 -0
- package/README.fr.md +110 -0
- package/README.md +438 -0
- package/Vagrantfile +38 -0
- package/ansible.cfg +13 -0
- package/bin/.env.js +1 -0
- package/bin/lint-declarations.js +31 -0
- package/bin/track.js +26 -0
- package/bin/validate-declarations.js +68 -0
- package/config/ci.json +5 -0
- package/config/contrib.json +35 -0
- package/config/dating.json +37 -0
- package/config/default.json +71 -0
- package/config/france.json +40 -0
- package/config/p2b-compliance.json +40 -0
- package/config/pga.json +40 -0
- package/config/production.json +27 -0
- package/config/test.json +49 -0
- package/config/vagrant.json +24 -0
- package/decision-records/0001-service-name-and-id.md +73 -0
- package/decision-records/0002-service-history.md +212 -0
- package/decision-records/0003-snapshots-database.md +123 -0
- package/ops/README.md +280 -0
- package/ops/app.yml +5 -0
- package/ops/infra.yml +6 -0
- package/ops/inventories/dev.yml +7 -0
- package/ops/inventories/production.yml +27 -0
- package/ops/roles/infra/defaults/main.yml +2 -0
- package/ops/roles/infra/files/.gitconfig +3 -0
- package/ops/roles/infra/files/mongod.conf +18 -0
- package/ops/roles/infra/files/ota-bot-key.private_key +26 -0
- package/ops/roles/infra/tasks/main.yml +78 -0
- package/ops/roles/infra/tasks/mongo.yml +40 -0
- package/ops/roles/infra/templates/ssh_config.j2 +5 -0
- package/ops/roles/ota/defaults/main.yml +14 -0
- package/ops/roles/ota/files/.env +21 -0
- package/ops/roles/ota/tasks/database.yml +65 -0
- package/ops/roles/ota/tasks/main.yml +110 -0
- package/ops/site.yml +6 -0
- package/package.json +101 -0
- package/pm2.config.cjs +20 -0
- package/scripts/dataset/README.md +37 -0
- package/scripts/dataset/assets/LICENSE +540 -0
- package/scripts/dataset/assets/README.template.js +65 -0
- package/scripts/dataset/export/index.js +106 -0
- package/scripts/dataset/export/index.test.js +155 -0
- package/scripts/dataset/export/test/fixtures/dataset/LICENSE +540 -0
- package/scripts/dataset/export/test/fixtures/dataset/README.md +40 -0
- package/scripts/dataset/export/test/fixtures/dataset/ServiceA/Terms of Service/2021-01-01T11-27-00Z.md +1 -0
- package/scripts/dataset/export/test/fixtures/dataset/ServiceA/Terms of Service/2021-01-11T11-32-47Z.md +1 -0
- package/scripts/dataset/export/test/fixtures/dataset/ServiceB/Privacy Policy/2022-01-01T12-12-24Z.md +1 -0
- package/scripts/dataset/export/test/fixtures/dataset/ServiceB/Terms of Service/2022-01-06T11-32-47Z.md +1 -0
- package/scripts/dataset/index.js +40 -0
- package/scripts/dataset/logger/index.js +17 -0
- package/scripts/dataset/main.js +25 -0
- package/scripts/dataset/publish/index.js +39 -0
- package/scripts/declarations/lint/index.js +36 -0
- package/scripts/declarations/utils/index.js +81 -0
- package/scripts/declarations/validate/definitions.js +63 -0
- package/scripts/declarations/validate/index.mocha.js +262 -0
- package/scripts/declarations/validate/service.history.schema.js +86 -0
- package/scripts/declarations/validate/service.schema.js +91 -0
- package/scripts/history/logger/index.js +39 -0
- package/scripts/history/migrate-services.js +212 -0
- package/scripts/history/update-to-full-hash.js +61 -0
- package/scripts/history/utils/index.js +23 -0
- package/scripts/import/README.md +59 -0
- package/scripts/import/config/import.json +12 -0
- package/scripts/import/index.js +224 -0
- package/scripts/import/loadCommits.js +66 -0
- package/scripts/import/logger/index.js +43 -0
- package/scripts/rewrite/README.md +131 -0
- package/scripts/rewrite/config/rewrite-snapshots.json +32 -0
- package/scripts/rewrite/config/rewrite-versions.json +32 -0
- package/scripts/rewrite/initializer/files/license +428 -0
- package/scripts/rewrite/initializer/files/readme.md +8 -0
- package/scripts/rewrite/initializer/index.js +44 -0
- package/scripts/rewrite/rewrite-snapshots.js +108 -0
- package/scripts/rewrite/rewrite-versions.js +160 -0
- package/scripts/rewrite/utils.js +33 -0
- package/scripts/utils/renamer/README.md +49 -0
- package/scripts/utils/renamer/index.js +45 -0
- package/scripts/utils/renamer/rules/documentTypes.json +25 -0
- package/scripts/utils/renamer/rules/documentTypesByService.json +170 -0
- package/scripts/utils/renamer/rules/serviceNames.json +92 -0
- package/src/archivist/errors.js +9 -0
- package/src/archivist/fetcher/errors.js +6 -0
- package/src/archivist/fetcher/exports.js +18 -0
- package/src/archivist/fetcher/fullDomFetcher.js +84 -0
- package/src/archivist/fetcher/htmlOnlyFetcher.js +62 -0
- package/src/archivist/fetcher/index.js +35 -0
- package/src/archivist/fetcher/index.test.js +239 -0
- package/src/archivist/filter/exports.js +3 -0
- package/src/archivist/filter/index.js +178 -0
- package/src/archivist/filter/index.test.js +561 -0
- package/src/archivist/index.js +276 -0
- package/src/archivist/index.test.js +600 -0
- package/src/archivist/recorder/index.js +77 -0
- package/src/archivist/recorder/index.test.js +463 -0
- package/src/archivist/recorder/record.js +35 -0
- package/src/archivist/recorder/record.test.js +91 -0
- package/src/archivist/recorder/repositories/factory.js +23 -0
- package/src/archivist/recorder/repositories/git/dataMapper.js +83 -0
- package/src/archivist/recorder/repositories/git/git.js +122 -0
- package/src/archivist/recorder/repositories/git/git.test.js +86 -0
- package/src/archivist/recorder/repositories/git/index.js +182 -0
- package/src/archivist/recorder/repositories/git/index.test.js +714 -0
- package/src/archivist/recorder/repositories/interface.js +108 -0
- package/src/archivist/recorder/repositories/mongo/dataMapper.js +32 -0
- package/src/archivist/recorder/repositories/mongo/index.js +121 -0
- package/src/archivist/recorder/repositories/mongo/index.test.js +721 -0
- package/src/archivist/services/documentDeclaration.js +26 -0
- package/src/archivist/services/documentDeclaration.test.js +85 -0
- package/src/archivist/services/documentTypes.json +386 -0
- package/src/archivist/services/index.js +255 -0
- package/src/archivist/services/index.test.js +327 -0
- package/src/archivist/services/pageDeclaration.js +51 -0
- package/src/archivist/services/pageDeclaration.test.js +224 -0
- package/src/archivist/services/service.js +60 -0
- package/src/archivist/services/service.test.js +164 -0
- package/src/exports.js +3 -0
- package/src/index.js +59 -0
- package/src/logger/README.md +1 -0
- package/src/logger/index.js +131 -0
- package/src/main.js +18 -0
- package/src/notifier/README.md +1 -0
- package/src/notifier/index.js +150 -0
- package/src/tracker/README.md +1 -0
- package/src/tracker/index.js +215 -0
- package/test/fixtures/service_A.js +22 -0
- package/test/fixtures/service_A_terms.md +10 -0
- package/test/fixtures/service_A_terms_snapshot.html +14 -0
- package/test/fixtures/service_B.js +22 -0
- package/test/fixtures/service_with_declaration_history.js +65 -0
- package/test/fixtures/service_with_filters_history.js +155 -0
- package/test/fixtures/service_with_history.js +188 -0
- package/test/fixtures/service_with_multipage_document.js +100 -0
- package/test/fixtures/service_without_history.js +31 -0
- package/test/fixtures/services.js +19 -0
- package/test/fixtures/terms.pdf +0 -0
- package/test/fixtures/termsFromPDF.md +25 -0
- package/test/fixtures/termsModified.pdf +0 -0
- package/test/services/service_A.json +9 -0
- package/test/services/service_B.json +9 -0
- package/test/services/service_with_declaration_history.filters.js +7 -0
- package/test/services/service_with_declaration_history.history.json +17 -0
- package/test/services/service_with_declaration_history.json +13 -0
- package/test/services/service_with_filters_history.filters.history.js +29 -0
- package/test/services/service_with_filters_history.filters.js +7 -0
- package/test/services/service_with_filters_history.json +13 -0
- package/test/services/service_with_history.filters.history.js +29 -0
- package/test/services/service_with_history.filters.js +7 -0
- package/test/services/service_with_history.history.json +26 -0
- package/test/services/service_with_history.json +17 -0
- package/test/services/service_with_multipage_document.filters.js +7 -0
- package/test/services/service_with_multipage_document.history.json +37 -0
- package/test/services/service_with_multipage_document.json +28 -0
- package/test/services/service_without_history.filters.js +7 -0
- package/test/services/service_without_history.json +13 -0
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
import path from 'path';
|
|
2
|
+
import { fileURLToPath } from 'url';
|
|
3
|
+
|
|
4
|
+
import config from 'config';
|
|
5
|
+
|
|
6
|
+
import { InaccessibleContentError } from '../../src/archivist/errors.js';
|
|
7
|
+
import filter from '../../src/archivist/filter/index.js';
|
|
8
|
+
import Recorder from '../../src/archivist/recorder/index.js';
|
|
9
|
+
import Git from '../../src/archivist/recorder/repositories/git/git.js';
|
|
10
|
+
import GitRepository from '../../src/archivist/recorder/repositories/git/index.js';
|
|
11
|
+
import * as services from '../../src/archivist/services/index.js';
|
|
12
|
+
import * as renamer from '../utils/renamer/index.js';
|
|
13
|
+
|
|
14
|
+
import * as initializer from './initializer/index.js';
|
|
15
|
+
import { loadFile } from './utils.js';
|
|
16
|
+
|
|
17
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
18
|
+
|
|
19
|
+
const ROOT_PATH = path.resolve(__dirname, '../../');
|
|
20
|
+
const MARKDOWN_MIME_TYPE = 'text/markdown';
|
|
21
|
+
|
|
22
|
+
export const SNAPSHOTS_SOURCE_PATH = path.resolve(
|
|
23
|
+
ROOT_PATH,
|
|
24
|
+
config.get('rewrite.snapshotsSourcePath'),
|
|
25
|
+
);
|
|
26
|
+
export const VERSIONS_TARGET_PATH = path.resolve(ROOT_PATH, config.get('recorder.versions.storage.git.path'));
|
|
27
|
+
|
|
28
|
+
const initialize = process.argv.includes('--init');
|
|
29
|
+
|
|
30
|
+
const COUNTERS = {
|
|
31
|
+
rewritten: 0,
|
|
32
|
+
skippedNoChanges: 0,
|
|
33
|
+
skippedInaccessibleContent: 0,
|
|
34
|
+
skippedUnknownError: 0,
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
let recorder;
|
|
38
|
+
|
|
39
|
+
(async () => {
|
|
40
|
+
console.time('Total time');
|
|
41
|
+
console.log('Start rewritting history.');
|
|
42
|
+
|
|
43
|
+
await renamer.loadRules();
|
|
44
|
+
const servicesDeclarations = await services.loadWithHistory();
|
|
45
|
+
const sourceRepo = new Git({ path: SNAPSHOTS_SOURCE_PATH, author: config.get('recorder.snapshots.storage.git.author') });
|
|
46
|
+
|
|
47
|
+
await sourceRepo.initialize();
|
|
48
|
+
|
|
49
|
+
console.log('Waiting for git log… (this can take a while)');
|
|
50
|
+
const commits = (await sourceRepo.log(['--stat=4096'])).sort((a, b) => new Date(a.date) - new Date(b.date));
|
|
51
|
+
|
|
52
|
+
console.log(`Source repo contains ${commits.length} commits.\n`);
|
|
53
|
+
|
|
54
|
+
if (initialize) {
|
|
55
|
+
const targetRepo = await initializer.initTargetRepo(VERSIONS_TARGET_PATH);
|
|
56
|
+
const [readmeCommit] = commits;
|
|
57
|
+
|
|
58
|
+
await initializer.initReadmeAndLicense(targetRepo, VERSIONS_TARGET_PATH, readmeCommit.date);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
recorder = new Recorder({
|
|
62
|
+
versionsRepository: new GitRepository({
|
|
63
|
+
...config.get('recorder.versions.storage.git'),
|
|
64
|
+
path: VERSIONS_TARGET_PATH,
|
|
65
|
+
}),
|
|
66
|
+
snapshotsRepository: new GitRepository({
|
|
67
|
+
...config.get('recorder.snapshots.storage.git'),
|
|
68
|
+
path: SNAPSHOTS_SOURCE_PATH,
|
|
69
|
+
}),
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
await recorder.initialize();
|
|
73
|
+
|
|
74
|
+
const filteredCommits = commits.filter(({ message }) =>
|
|
75
|
+
message.match(/^(Start tracking|Update)/));
|
|
76
|
+
|
|
77
|
+
/* eslint-disable no-await-in-loop */
|
|
78
|
+
/* eslint-disable no-continue */
|
|
79
|
+
for (const commit of filteredCommits) {
|
|
80
|
+
console.log(Date.now(), commit.hash, commit.date, commit.message);
|
|
81
|
+
|
|
82
|
+
await sourceRepo.checkout(commit.hash);
|
|
83
|
+
|
|
84
|
+
const [{ file: relativeFilePath }] = commit.diff.files;
|
|
85
|
+
|
|
86
|
+
const { content, mimeType } = await loadFile(SNAPSHOTS_SOURCE_PATH, relativeFilePath);
|
|
87
|
+
|
|
88
|
+
let serviceId = path.dirname(relativeFilePath);
|
|
89
|
+
let documentType = path.basename(relativeFilePath, path.extname(relativeFilePath));
|
|
90
|
+
|
|
91
|
+
({ serviceId, documentType } = renamer.applyRules(serviceId, documentType));
|
|
92
|
+
|
|
93
|
+
if (!servicesDeclarations[serviceId]) {
|
|
94
|
+
console.log(`⌙ Skip unknown service "${serviceId}"`);
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const documentDeclaration = servicesDeclarations[serviceId].getDocumentDeclaration(
|
|
99
|
+
documentType,
|
|
100
|
+
commit.date,
|
|
101
|
+
);
|
|
102
|
+
|
|
103
|
+
if (!documentDeclaration) {
|
|
104
|
+
console.log(`⌙ Skip unknown document type "${documentType}" for service "${serviceId}"`);
|
|
105
|
+
continue;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
if (documentDeclaration.validUntil) {
|
|
109
|
+
console.log(`⌙ Use declaration valid until ${documentDeclaration.validUntil}`);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
try {
|
|
113
|
+
const document = await filter({
|
|
114
|
+
content,
|
|
115
|
+
mimeType,
|
|
116
|
+
documentDeclaration,
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
const { id: versionId } = await recorder.recordVersion({
|
|
120
|
+
serviceId,
|
|
121
|
+
documentType,
|
|
122
|
+
content: document,
|
|
123
|
+
mimeType: MARKDOWN_MIME_TYPE, // The result of the `filter` function is always in markdown format
|
|
124
|
+
fetchDate: commit.date,
|
|
125
|
+
snapshotId: commit.hash,
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
if (versionId) {
|
|
129
|
+
COUNTERS.rewritten++;
|
|
130
|
+
} else {
|
|
131
|
+
COUNTERS.skippedNoChanges++;
|
|
132
|
+
}
|
|
133
|
+
} catch (error) {
|
|
134
|
+
if (error instanceof InaccessibleContentError) {
|
|
135
|
+
console.log('⌙ Skip inacessible content');
|
|
136
|
+
COUNTERS.skippedInaccessibleContent++;
|
|
137
|
+
} else {
|
|
138
|
+
console.log('⌙ Unknown error:', error);
|
|
139
|
+
COUNTERS.skippedUnknownError++;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
const totalTreatedCommits = Object.values(COUNTERS).reduce((acc, value) => acc + value, 0);
|
|
145
|
+
|
|
146
|
+
console.log(`\nCommits treated: ${totalTreatedCommits} on ${filteredCommits.length}`);
|
|
147
|
+
console.log(`⌙ Commits rewritten: ${COUNTERS.rewritten}`);
|
|
148
|
+
console.log(`⌙ Skipped not changed commits: ${COUNTERS.skippedNoChanges}`);
|
|
149
|
+
console.log(`⌙ Skipped inacessible content: ${COUNTERS.skippedInaccessibleContent}`);
|
|
150
|
+
console.log(`⌙ Skipped unknown error: ${COUNTERS.skippedUnknownError}`);
|
|
151
|
+
console.timeEnd('Total time');
|
|
152
|
+
|
|
153
|
+
if (totalTreatedCommits != filteredCommits.length) {
|
|
154
|
+
console.error('\n⚠ WARNING: Total treated commits does not match the total number of commits to be treated! ⚠');
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
if (COUNTERS.skippedUnknownError) {
|
|
158
|
+
console.error('\n⚠ WARNING: Some unknown errors occured, check log! ⚠');
|
|
159
|
+
}
|
|
160
|
+
})();
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import fsApi from 'fs';
|
|
2
|
+
|
|
3
|
+
import mime from 'mime';
|
|
4
|
+
|
|
5
|
+
const fs = fsApi.promises;
|
|
6
|
+
|
|
7
|
+
export async function fileExists(filePath) {
|
|
8
|
+
try {
|
|
9
|
+
await fs.access(filePath);
|
|
10
|
+
|
|
11
|
+
return true;
|
|
12
|
+
} catch (error) {
|
|
13
|
+
if (error.code === 'ENOENT') {
|
|
14
|
+
return false;
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export async function loadFile(repoPath, relativeFilePath) {
|
|
20
|
+
const absoluteFilePath = `${repoPath}/${relativeFilePath}`;
|
|
21
|
+
|
|
22
|
+
const mimeType = mime.getType(absoluteFilePath);
|
|
23
|
+
const readFileOptions = {};
|
|
24
|
+
|
|
25
|
+
if (mimeType.startsWith('text/')) {
|
|
26
|
+
readFileOptions.encoding = 'utf8';
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
return {
|
|
30
|
+
content: await fs.readFile(absoluteFilePath, readFileOptions),
|
|
31
|
+
mimeType,
|
|
32
|
+
};
|
|
33
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# Renamer
|
|
2
|
+
|
|
3
|
+
This module is used to apply renaming rules to service IDs and document types.
|
|
4
|
+
|
|
5
|
+
## Usage
|
|
6
|
+
|
|
7
|
+
You can use it in your other scripts like this:
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
await renamer.loadRules();
|
|
11
|
+
const { serviceId: renamedServiceId, documentType: renamedDocumentType } = renamer.applyRules(serviceId, documentType);
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Adding renaming rules
|
|
15
|
+
|
|
16
|
+
### Service
|
|
17
|
+
|
|
18
|
+
To rename a service, add a rule in `./rules/services.json`, for example, to rename "GoogleAds" to "Google Ads", add the following line in the file:
|
|
19
|
+
|
|
20
|
+
```json
|
|
21
|
+
{
|
|
22
|
+
…
|
|
23
|
+
"GoogleAds": "Google Ads"
|
|
24
|
+
}
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
### Document type
|
|
28
|
+
|
|
29
|
+
To rename a document type, add a rule in `./rules/documentTypes.json`, for example, to rename "Program Policies" to "Acceptable Use Policy", add the following line in the file:
|
|
30
|
+
|
|
31
|
+
```json
|
|
32
|
+
{
|
|
33
|
+
…
|
|
34
|
+
"Program Policies": "Acceptable Use Policy"
|
|
35
|
+
}
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### Document type for a specific service
|
|
39
|
+
|
|
40
|
+
To rename a document type only for a specific service, add a rule in `./rules/servicesDocumentTypes.json`, for example, to rename "Program Policies" to "Acceptable Use Policy" only for Skype, add the following line in the file:
|
|
41
|
+
|
|
42
|
+
```json
|
|
43
|
+
{
|
|
44
|
+
…
|
|
45
|
+
"Skype": {
|
|
46
|
+
"Program Policies": "Acceptable Use Policy"
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
```
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import fsApi from 'fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import { fileURLToPath } from 'url';
|
|
4
|
+
|
|
5
|
+
const fs = fsApi.promises;
|
|
6
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
7
|
+
|
|
8
|
+
let renamingRules;
|
|
9
|
+
|
|
10
|
+
export async function loadRules() {
|
|
11
|
+
renamingRules = {
|
|
12
|
+
serviceNames: JSON.parse(await fs.readFile(path.resolve(__dirname, './rules/serviceNames.json'))),
|
|
13
|
+
documentTypes: JSON.parse(await fs.readFile(path.resolve(__dirname, './rules/documentTypes.json'))),
|
|
14
|
+
documentTypesByService: JSON.parse(await fs.readFile(path.resolve(__dirname, './rules/documentTypesByService.json'))),
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export function applyRules(serviceId, documentType) {
|
|
19
|
+
const renamedServiceId = renamingRules.serviceNames[serviceId];
|
|
20
|
+
|
|
21
|
+
if (renamedServiceId) {
|
|
22
|
+
console.log(`⌙ Rename service "${serviceId}" to "${renamedServiceId}"`);
|
|
23
|
+
serviceId = renamedServiceId;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const renamedDocumentType = renamingRules.documentTypes[documentType];
|
|
27
|
+
|
|
28
|
+
if (renamedDocumentType) {
|
|
29
|
+
console.log(`⌙ Rename document type "${documentType}" to "${renamedDocumentType}" of "${serviceId}" service`);
|
|
30
|
+
documentType = renamedDocumentType;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const renamedServiceDocumentType = renamingRules.documentTypesByService[serviceId]
|
|
34
|
+
&& renamingRules.documentTypesByService[serviceId][documentType];
|
|
35
|
+
|
|
36
|
+
if (renamedServiceDocumentType) {
|
|
37
|
+
console.log(`⌙ Specific rename document type "${documentType}" to "${renamedServiceDocumentType}" of "${serviceId}" service`);
|
|
38
|
+
documentType = renamedServiceDocumentType;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
return {
|
|
42
|
+
serviceId,
|
|
43
|
+
documentType,
|
|
44
|
+
};
|
|
45
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"acceptable_use_policy": "Acceptable Use Policy",
|
|
3
|
+
"brand_guidelines": "Brand Guidelines",
|
|
4
|
+
"commercial_terms": "Commercial Terms",
|
|
5
|
+
"community_guidelines": "Community Guidelines",
|
|
6
|
+
"controller_controller_data_protection_terms": "Data Controller Agreement",
|
|
7
|
+
"Cookies Policy": "Trackers Policy",
|
|
8
|
+
"cookies_policy": "Trackers Policy",
|
|
9
|
+
"copyright_policy": "Copyright Claims Policy",
|
|
10
|
+
"data_processing_terms": "Data Processor Agreement",
|
|
11
|
+
"developer_agreement": "Developer Terms",
|
|
12
|
+
"developer_policy": "Developer Terms",
|
|
13
|
+
"in_app_purchases_policy": "In-App Purchases Policy",
|
|
14
|
+
"law_enforcement_guidelines": "Law Enforcement Guidelines",
|
|
15
|
+
"privacy_policy": "Privacy Policy",
|
|
16
|
+
"Program Policies": "Acceptable Use Policy",
|
|
17
|
+
"review_guidelines": "Review Guidelines",
|
|
18
|
+
"software_license_agreement": "Software License Agreement",
|
|
19
|
+
"Terms Of Service": "Terms of Service",
|
|
20
|
+
"terms_of_service_parent_company": "Parent Organization Terms",
|
|
21
|
+
"terms_of_service": "Terms of Service",
|
|
22
|
+
"tos_parent": "Parent Organization Terms",
|
|
23
|
+
"user_consent_policy": "User Consent Policy",
|
|
24
|
+
"WEBSITE TERMS OF USE": "Website Terms of Use"
|
|
25
|
+
}
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
{
|
|
2
|
+
"Skype": {
|
|
3
|
+
"undefined": "Parent Organization Terms"
|
|
4
|
+
},
|
|
5
|
+
"Aegean Airlines": {
|
|
6
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
7
|
+
},
|
|
8
|
+
"Aigle": {
|
|
9
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
10
|
+
},
|
|
11
|
+
"Air Corsica": {
|
|
12
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
13
|
+
},
|
|
14
|
+
"Air Europa": {
|
|
15
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
16
|
+
},
|
|
17
|
+
"BackMarket": {
|
|
18
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
19
|
+
},
|
|
20
|
+
"Benetton": {
|
|
21
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
22
|
+
},
|
|
23
|
+
"Bershka": {
|
|
24
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
25
|
+
},
|
|
26
|
+
"Bizzbee": {
|
|
27
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
28
|
+
},
|
|
29
|
+
"BlaBlaCar": {
|
|
30
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
31
|
+
},
|
|
32
|
+
"Boulanger": {
|
|
33
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
34
|
+
},
|
|
35
|
+
"Brice": {
|
|
36
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
37
|
+
},
|
|
38
|
+
"C-A": {
|
|
39
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
40
|
+
},
|
|
41
|
+
"Camaieu": {
|
|
42
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
43
|
+
},
|
|
44
|
+
"Caroll": {
|
|
45
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
46
|
+
},
|
|
47
|
+
"Cdiscount": {
|
|
48
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
49
|
+
},
|
|
50
|
+
"Comptoir des Cotonniers": {
|
|
51
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
52
|
+
},
|
|
53
|
+
"Cop-Copine": {
|
|
54
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
55
|
+
},
|
|
56
|
+
"Damart": {
|
|
57
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
58
|
+
},
|
|
59
|
+
"Darty": {
|
|
60
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
61
|
+
},
|
|
62
|
+
"Decathlon": {
|
|
63
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
64
|
+
},
|
|
65
|
+
"Devred": {
|
|
66
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
67
|
+
},
|
|
68
|
+
"E.Leclerc": {
|
|
69
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
70
|
+
},
|
|
71
|
+
"Esprit": {
|
|
72
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
73
|
+
},
|
|
74
|
+
"Etam": {
|
|
75
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
76
|
+
},
|
|
77
|
+
"Eurolines": {
|
|
78
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
79
|
+
},
|
|
80
|
+
"FlixBus": {
|
|
81
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
82
|
+
},
|
|
83
|
+
"G7": {
|
|
84
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
85
|
+
},
|
|
86
|
+
"Jennyfer": {
|
|
87
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
88
|
+
},
|
|
89
|
+
"Jules": {
|
|
90
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
91
|
+
},
|
|
92
|
+
"Kaporal": {
|
|
93
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
94
|
+
},
|
|
95
|
+
"Kappa": {
|
|
96
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
97
|
+
},
|
|
98
|
+
"Kookai": {
|
|
99
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
100
|
+
},
|
|
101
|
+
"La Redoute": {
|
|
102
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
103
|
+
},
|
|
104
|
+
"Lacoste": {
|
|
105
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
106
|
+
},
|
|
107
|
+
"Lafuma": {
|
|
108
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
109
|
+
},
|
|
110
|
+
"Le Coq Sportif": {
|
|
111
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
112
|
+
},
|
|
113
|
+
"LeCab": {
|
|
114
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
115
|
+
},
|
|
116
|
+
"Levis": {
|
|
117
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
118
|
+
},
|
|
119
|
+
"Morgan": {
|
|
120
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
121
|
+
},
|
|
122
|
+
"Naf Naf": {
|
|
123
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
124
|
+
},
|
|
125
|
+
"New Balance": {
|
|
126
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
127
|
+
},
|
|
128
|
+
"Nike": {
|
|
129
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
130
|
+
},
|
|
131
|
+
"Printemps": {
|
|
132
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
133
|
+
},
|
|
134
|
+
"Rue du Commerce": {
|
|
135
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
136
|
+
},
|
|
137
|
+
"SHEIN": {
|
|
138
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
139
|
+
},
|
|
140
|
+
"SNCF Connect": {
|
|
141
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
142
|
+
},
|
|
143
|
+
"Salto": {
|
|
144
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
145
|
+
},
|
|
146
|
+
"Showroomprive.com": {
|
|
147
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
148
|
+
},
|
|
149
|
+
"Spartoo": {
|
|
150
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
151
|
+
},
|
|
152
|
+
"Tunisair": {
|
|
153
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
154
|
+
},
|
|
155
|
+
"Vans": {
|
|
156
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
157
|
+
},
|
|
158
|
+
"Veepee": {
|
|
159
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
160
|
+
},
|
|
161
|
+
"Verbaudet": {
|
|
162
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
163
|
+
},
|
|
164
|
+
"Vinted": {
|
|
165
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
166
|
+
},
|
|
167
|
+
"Zalando": {
|
|
168
|
+
"Commercial Terms": "General Conditions of Sale"
|
|
169
|
+
}
|
|
170
|
+
}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
{
|
|
2
|
+
"123 Greetings": "123Greetings",
|
|
3
|
+
"Academia": "Academia.edu",
|
|
4
|
+
"Alibaba": "Alibaba.com",
|
|
5
|
+
"Amazon": "Amazon.com",
|
|
6
|
+
"AmazonAlexa": "Alexa",
|
|
7
|
+
"AmazonAmazonAppStoreAndroid": "Amazon Appstore",
|
|
8
|
+
"AmazonCoins": "Amazon Coins",
|
|
9
|
+
"AmazonDevice": "Amazon Device",
|
|
10
|
+
"AmazonDriveAndPrimePhotos": "Amazon Photos",
|
|
11
|
+
"AmazonKindleStore": "Amazon Kindle Store",
|
|
12
|
+
"AmazonMaps": "Amazon Maps",
|
|
13
|
+
"AmazonMonthlyPayments": "Amazon Monthly Payments",
|
|
14
|
+
"AmazonMusic": "Amazon Music",
|
|
15
|
+
"AmazonPrime": "Amazon Prime",
|
|
16
|
+
"AmazonSilk": "Amazon Silk",
|
|
17
|
+
"AppleAppStore": "App Store",
|
|
18
|
+
"AppleGameCenter": "Apple Game Center",
|
|
19
|
+
"AppleICloud": "iCloud",
|
|
20
|
+
"AskFM": "ASKfm",
|
|
21
|
+
"Asus": "ASUS",
|
|
22
|
+
"Bbc": "BBC",
|
|
23
|
+
"Blizzard": "Blizzard Entertainment",
|
|
24
|
+
"Cnn": "CNN International",
|
|
25
|
+
"Commentcamarche": "CommentCaMarche",
|
|
26
|
+
"Cvs": "CVS Pharmacy",
|
|
27
|
+
"Dailymail": "Daily Mail",
|
|
28
|
+
"deviantART": "DeviantArt",
|
|
29
|
+
"Discordapp": "Discord",
|
|
30
|
+
"Duckduckgo": "DuckDuckGo",
|
|
31
|
+
"Ebay": "eBay",
|
|
32
|
+
"FacebookAds": "Facebook Ads",
|
|
33
|
+
"FacebookPayments": "Facebook Payments",
|
|
34
|
+
"Fedex": "FedEx",
|
|
35
|
+
"Foxnews": "Fox News",
|
|
36
|
+
"Ft": "Financial Times",
|
|
37
|
+
"Github": "GitHub",
|
|
38
|
+
"Gitlab": "GitLab",
|
|
39
|
+
"GoogleAdMob": "AdMob",
|
|
40
|
+
"GoogleAds": "Google Ads",
|
|
41
|
+
"GoogleAdSense": "AdSense",
|
|
42
|
+
"GoogleAnalytics": "Google Analytics",
|
|
43
|
+
"GooglePlayStore": "Google Play",
|
|
44
|
+
"Hsbc": "HSBC",
|
|
45
|
+
"Ign": "IGN Entertainment",
|
|
46
|
+
"Imdb": "IMDb",
|
|
47
|
+
"LastFm": "Last.fm",
|
|
48
|
+
"LinguaLeo": "Lingualeo",
|
|
49
|
+
"Linkedin": "LinkedIn",
|
|
50
|
+
"Microsoftstore": "Microsoft Store",
|
|
51
|
+
"Msn": "MSN",
|
|
52
|
+
"Nationalgeographic": "National Geographic",
|
|
53
|
+
"Nytimes": "The New York Times",
|
|
54
|
+
"Okcupid": "OkCupid",
|
|
55
|
+
"Oreilly": "O'Reilly",
|
|
56
|
+
"Reuters": "thomsonreuters.com",
|
|
57
|
+
"Shockwave": "Shockwave.com",
|
|
58
|
+
"Sonic": "Sonic.net",
|
|
59
|
+
"Sony": "sony.com",
|
|
60
|
+
"StackOverflow": "Stack Overflow",
|
|
61
|
+
"Surveymonkey": "SurveyMonkey",
|
|
62
|
+
"Theguardian": "The Guardian",
|
|
63
|
+
"Theregister": "The Register",
|
|
64
|
+
"Time": "TIME",
|
|
65
|
+
"Timeanddate": "timeanddate.com",
|
|
66
|
+
"Toyota": "Toyota Connected Services",
|
|
67
|
+
"Tp-link": "TP-Link",
|
|
68
|
+
"Ubuntu": "ubuntu.com",
|
|
69
|
+
"Ucla": "ucla.edu",
|
|
70
|
+
"Ui": "Ubiquiti",
|
|
71
|
+
"Ulster": "ulster.ac.uk",
|
|
72
|
+
"Urbandictionary": "UrbanDictionary.com",
|
|
73
|
+
"Usa": "USA.gov",
|
|
74
|
+
"Veracitypayments": "Vanco Payment Solutions",
|
|
75
|
+
"Veranda": "Hearst.com",
|
|
76
|
+
"W3schools": "W3Schools",
|
|
77
|
+
"Waterhavens": "WaterHavens",
|
|
78
|
+
"Webcrawler": "WebCrawler",
|
|
79
|
+
"Webpronews": "WebProNews",
|
|
80
|
+
"WeChatOpenPlatform": "WeChat Open Platform",
|
|
81
|
+
"WeHeartIt": "We Heart It",
|
|
82
|
+
"Wikimediafoundation": "Wikimedia Foundation",
|
|
83
|
+
"Wineverygame": "WinEveryGame",
|
|
84
|
+
"Wolframalpha": "WolframAlpha",
|
|
85
|
+
"Womansday": "Womans Day",
|
|
86
|
+
"Wordpress": "WordPress.com",
|
|
87
|
+
"Worldmarket": "World Market",
|
|
88
|
+
"XfinityResidentialSubscription": "Xfinity Residential Services",
|
|
89
|
+
"XfinityWebServices": "Xfinity Web Services",
|
|
90
|
+
"Youtube": "YouTube",
|
|
91
|
+
"Verbaudet": "Vertbaudet"
|
|
92
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export class InaccessibleContentError extends Error {
|
|
2
|
+
constructor(message) {
|
|
3
|
+
if (Array.isArray(message)) {
|
|
4
|
+
message = `\n - ${message.join('\n - ')}`;
|
|
5
|
+
}
|
|
6
|
+
super(`The document cannot be accessed or its content can not be selected:${message}`);
|
|
7
|
+
this.name = 'InaccessibleContentError';
|
|
8
|
+
}
|
|
9
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import '../../../bin/.env.js'; // Workaround to ensure `SUPPRESS_NO_CONFIG_WARNING` is set before config is imported
|
|
2
|
+
|
|
3
|
+
import fs from 'fs';
|
|
4
|
+
import path from 'path';
|
|
5
|
+
import { fileURLToPath } from 'url';
|
|
6
|
+
|
|
7
|
+
import config from 'config';
|
|
8
|
+
|
|
9
|
+
import fetcher from './index.js';
|
|
10
|
+
|
|
11
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
12
|
+
const defaultConfigs = JSON.parse(fs.readFileSync(path.resolve(__dirname, '../../../config/default.json')));
|
|
13
|
+
|
|
14
|
+
config.util.setModuleDefaults('fetcher', defaultConfigs.fetcher);
|
|
15
|
+
|
|
16
|
+
export { launchHeadlessBrowser, stopHeadlessBrowser } from './index.js';
|
|
17
|
+
|
|
18
|
+
export default fetcher;
|