@opentermsarchive/engine 0.20.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -7
- package/bin/ota-dataset.js +33 -0
- package/bin/ota-track.js +1 -1
- package/bin/ota-validate.js +1 -1
- package/bin/ota.js +1 -0
- package/package.json +3 -3
- package/src/index.js +1 -1
- package/scripts/dataset/README.md +0 -37
- package/scripts/dataset/main.js +0 -25
package/README.md
CHANGED
|
@@ -184,7 +184,7 @@ npx ota track --services "<service_id>" ["<service_id>"...]
|
|
|
184
184
|
##### Track specific terms of specific services
|
|
185
185
|
|
|
186
186
|
```sh
|
|
187
|
-
npx ota track --services "<service_id>" ["<service_id>"...] --
|
|
187
|
+
npx ota track --services "<service_id>" ["<service_id>"...] --terms-types "<terms_type>" ["<terms_type>"...]
|
|
188
188
|
```
|
|
189
189
|
|
|
190
190
|
##### Track documents four times a day
|
|
@@ -196,7 +196,7 @@ npx ota track --schedule
|
|
|
196
196
|
#### `ota validate`
|
|
197
197
|
|
|
198
198
|
```sh
|
|
199
|
-
npx ota validate [--services <service_id>...] [--
|
|
199
|
+
npx ota validate [--services <service_id>...] [--terms-types <terms_type>...]
|
|
200
200
|
```
|
|
201
201
|
|
|
202
202
|
Check that all declarations allow recording a snapshot and a version properly.
|
|
@@ -206,7 +206,7 @@ If one or several `<service_id>` are provided, check only those services.
|
|
|
206
206
|
##### Validate schema only
|
|
207
207
|
|
|
208
208
|
```sh
|
|
209
|
-
npx ota validate --schema-only [--services <service_id>...] [--
|
|
209
|
+
npx ota validate --schema-only [--services <service_id>...] [--terms-types <terms_type>...]
|
|
210
210
|
```
|
|
211
211
|
|
|
212
212
|
Check that all declarations are readable by the engine.
|
|
@@ -227,6 +227,38 @@ Automatically correct formatting mistakes and ensure that all declarations are s
|
|
|
227
227
|
|
|
228
228
|
If one or several `<service_id>` are provided, check only those services.
|
|
229
229
|
|
|
230
|
+
#### `ota dataset`
|
|
231
|
+
|
|
232
|
+
Export the versions dataset into a ZIP file and publish it to GitHub releases.
|
|
233
|
+
|
|
234
|
+
The dataset title and the URL of the versions repository are defined in the [configuration](#configuring).
|
|
235
|
+
|
|
236
|
+
To export the dataset into a local ZIP file:
|
|
237
|
+
|
|
238
|
+
```sh
|
|
239
|
+
npx ota dataset [--file <filename>]
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
To export the dataset into a ZIP file and publish it on GitHub releases:
|
|
243
|
+
|
|
244
|
+
```sh
|
|
245
|
+
GITHUB_TOKEN=ghp_XXXXXXXXX npx ota dataset --publish
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
The `GITHUB_TOKEN` can also be defined in a [`.env` file](#environment-variables).
|
|
249
|
+
|
|
250
|
+
To export, publish the dataset and remove the local copy that was created after it has been uploaded:
|
|
251
|
+
|
|
252
|
+
```sh
|
|
253
|
+
GITHUB_TOKEN=ghp_XXXXXXXXX npx ota dataset --publish --remove-local-copy
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
To schedule export, publishing and local copy removal:
|
|
257
|
+
|
|
258
|
+
```sh
|
|
259
|
+
GITHUB_TOKEN=ghp_XXXXXXXXX npx ota dataset --schedule --publish --remove-local-copy
|
|
260
|
+
```
|
|
261
|
+
|
|
230
262
|
### API
|
|
231
263
|
|
|
232
264
|
Once added as a dependency, the engine exposes a JavaScript API that can be called in your own code. The following modules are available.
|
|
@@ -277,10 +309,6 @@ import pageDeclaration from '@opentermsarchive/engine/page-declaration';
|
|
|
277
309
|
|
|
278
310
|
The `PageDeclaration` format is defined [in source code](./src/archivist/services/pageDeclaration.js).
|
|
279
311
|
|
|
280
|
-
### Dataset generation
|
|
281
|
-
|
|
282
|
-
See the [`dataset` script documentation](./scripts/dataset/README.md).
|
|
283
|
-
|
|
284
312
|
## Configuring
|
|
285
313
|
|
|
286
314
|
### Configuration file
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
#! /usr/bin/env node
|
|
2
|
+
import './env.js';
|
|
3
|
+
|
|
4
|
+
import { program } from 'commander';
|
|
5
|
+
import cron from 'croner';
|
|
6
|
+
|
|
7
|
+
import { release } from '../scripts/dataset/index.js';
|
|
8
|
+
import logger from '../src/logger/index.js';
|
|
9
|
+
|
|
10
|
+
program
|
|
11
|
+
.name('ota dataset')
|
|
12
|
+
.description('Export the versions dataset into a ZIP file and optionally publish it to GitHub releases')
|
|
13
|
+
.option('-f, --file <filename>', 'file name of the generated dataset')
|
|
14
|
+
.option('-p, --publish', 'publish dataset to GitHub releases on versions repository. Mandatory authentication to GitHub is provided through the `GITHUB_TOKEN` environment variable')
|
|
15
|
+
.option('-r, --remove-local-copy', 'remove local copy of dataset after publishing. Works only in combination with --publish option')
|
|
16
|
+
.option('--schedule', 'schedule automatic dataset generation');
|
|
17
|
+
|
|
18
|
+
const { schedule, publish, removeLocalCopy, file: fileName } = program.parse().opts();
|
|
19
|
+
|
|
20
|
+
const options = {
|
|
21
|
+
fileName,
|
|
22
|
+
shouldPublish: publish,
|
|
23
|
+
shouldRemoveLocalCopy: removeLocalCopy,
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
if (!schedule) {
|
|
27
|
+
await release(options);
|
|
28
|
+
} else {
|
|
29
|
+
logger.info('The scheduler is running…');
|
|
30
|
+
logger.info('Dataset will be published every Monday at 08:30 in the timezone of this machine');
|
|
31
|
+
|
|
32
|
+
cron('30 8 * * MON', () => release(options));
|
|
33
|
+
}
|
package/bin/ota-track.js
CHANGED
|
@@ -14,7 +14,7 @@ program
|
|
|
14
14
|
.name('ota track')
|
|
15
15
|
.description('Retrieve declared documents, record snapshots, extract versions and publish the resulting records')
|
|
16
16
|
.option('-s, --services [serviceId...]', 'service IDs of services to track')
|
|
17
|
-
.option('-t, --
|
|
17
|
+
.option('-t, --terms-types [termsType...]', 'terms types to track')
|
|
18
18
|
.option('-r, --refilter-only', 'refilter existing snapshots with latest declarations and engine, without recording new snapshots')
|
|
19
19
|
.option('--schedule', 'schedule automatic document tracking');
|
|
20
20
|
|
package/bin/ota-validate.js
CHANGED
|
@@ -21,7 +21,7 @@ program
|
|
|
21
21
|
.name('ota validate')
|
|
22
22
|
.description('Run a series of tests to check the validity of document declarations')
|
|
23
23
|
.option('-s, --services [serviceId...]', 'service IDs of services to validate')
|
|
24
|
-
.option('-t, --
|
|
24
|
+
.option('-t, --terms-types [termsType...]', 'terms types to validate')
|
|
25
25
|
.option('-m, --modified', 'target only services modified in the current git branch')
|
|
26
26
|
.option('-o, --schema-only', 'much faster check of declarations, but does not check that the documents are actually accessible');
|
|
27
27
|
|
package/bin/ota.js
CHANGED
|
@@ -13,4 +13,5 @@ program
|
|
|
13
13
|
.command('track', 'Track the current terms of services according to provided declarations')
|
|
14
14
|
.command('validate', 'Run a series of tests to check the validity of document declarations')
|
|
15
15
|
.command('lint', 'Check format and stylistic errors in declarations and auto fix them')
|
|
16
|
+
.command('dataset', 'Export the versions dataset into a ZIP file and optionally publish it to GitHub releases')
|
|
16
17
|
.parse(process.argv);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@opentermsarchive/engine",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.21.0",
|
|
4
4
|
"description": "Tracks and makes visible changes to the terms of online services",
|
|
5
5
|
"homepage": "https://github.com/ambanum/OpenTermsArchive#readme",
|
|
6
6
|
"bugs": {
|
|
@@ -30,8 +30,8 @@
|
|
|
30
30
|
".eslintrc.yaml"
|
|
31
31
|
],
|
|
32
32
|
"scripts": {
|
|
33
|
-
"dataset:generate": "node
|
|
34
|
-
"dataset:release": "node
|
|
33
|
+
"dataset:generate": "node bin/ota.js dataset",
|
|
34
|
+
"dataset:release": "node bin/ota.js dataset --publish --remove-local-copy",
|
|
35
35
|
"dataset:scheduler": "npm run dataset:release -- --schedule",
|
|
36
36
|
"declarations:lint": "node bin/ota.js lint",
|
|
37
37
|
"declarations:validate": "node bin/ota.js validate",
|
package/src/index.js
CHANGED
|
@@ -6,7 +6,7 @@ import logger from './logger/index.js';
|
|
|
6
6
|
import Notifier from './notifier/index.js';
|
|
7
7
|
import Tracker from './tracker/index.js';
|
|
8
8
|
|
|
9
|
-
export default async function track({ services = [],
|
|
9
|
+
export default async function track({ services = [], termsTypes: documentTypes, refilterOnly, schedule }) {
|
|
10
10
|
const archivist = new Archivist({ recorderConfig: config.get('recorder') });
|
|
11
11
|
|
|
12
12
|
archivist.attach(logger);
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
# Dataset release
|
|
2
|
-
|
|
3
|
-
Export the versions dataset into a ZIP file and publish it to GitHub releases.
|
|
4
|
-
|
|
5
|
-
## Configuring
|
|
6
|
-
|
|
7
|
-
You can change the configuration in the appropriate config file in the `config` folder. See the [main README](../../README.md#configuring) for documentation on using the configuration file.
|
|
8
|
-
|
|
9
|
-
## Running
|
|
10
|
-
|
|
11
|
-
To export the dataset into a local ZIP file:
|
|
12
|
-
|
|
13
|
-
```sh
|
|
14
|
-
node scripts/dataset/main.js [$filename]
|
|
15
|
-
```
|
|
16
|
-
|
|
17
|
-
To export the dataset into a ZIP file and publish it on GitHub releases:
|
|
18
|
-
|
|
19
|
-
```sh
|
|
20
|
-
node scripts/dataset/main.js --publish
|
|
21
|
-
```
|
|
22
|
-
|
|
23
|
-
To export, publish the dataset and remove the local copy that was created after it has been uploaded:
|
|
24
|
-
|
|
25
|
-
```sh
|
|
26
|
-
node scripts/dataset/main.js --publish --remove-local-copy
|
|
27
|
-
```
|
|
28
|
-
|
|
29
|
-
To schedule export, publishing and local copy removal:
|
|
30
|
-
|
|
31
|
-
```sh
|
|
32
|
-
node scripts/dataset/main.js --schedule --publish --remove-local-copy
|
|
33
|
-
```
|
|
34
|
-
|
|
35
|
-
## Adding renaming rules
|
|
36
|
-
|
|
37
|
-
See the [renamer module documentation](../utils/renamer/README.md).
|
package/scripts/dataset/main.js
DELETED
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
import cron from 'croner';
|
|
2
|
-
|
|
3
|
-
import logger from './logger/index.js';
|
|
4
|
-
|
|
5
|
-
import { release } from './index.js';
|
|
6
|
-
|
|
7
|
-
const args = process.argv.slice(2);
|
|
8
|
-
const argsWithoutOptions = args.filter(arg => !arg.startsWith('--'));
|
|
9
|
-
const [fileName] = argsWithoutOptions;
|
|
10
|
-
const shouldSchedule = args.includes('--schedule');
|
|
11
|
-
|
|
12
|
-
const options = {
|
|
13
|
-
fileName,
|
|
14
|
-
shouldPublish: args.includes('--publish'),
|
|
15
|
-
shouldRemoveLocalCopy: args.includes('--remove-local-copy'),
|
|
16
|
-
};
|
|
17
|
-
|
|
18
|
-
if (!shouldSchedule) {
|
|
19
|
-
release(options);
|
|
20
|
-
} else {
|
|
21
|
-
logger.info('The scheduler is running…');
|
|
22
|
-
logger.info('Dataset will be published at 08:30 on every Monday');
|
|
23
|
-
|
|
24
|
-
cron('30 8 * * MON', () => release(options));
|
|
25
|
-
}
|