@opentermsarchive/engine 4.0.2 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/ota-lint.js CHANGED
@@ -12,7 +12,7 @@ const __dirname = path.dirname(fileURLToPath(import.meta.url));
12
12
  const LINT_TEST_FILEPATH = '../scripts/declarations/lint/index.mocha.js';
13
13
  const LINT_PATH = path.resolve(__dirname, LINT_TEST_FILEPATH);
14
14
 
15
- // Mocha catches unhandled rejection from the user code and re-emits them to the process (see https://github.com/mochajs/mocha/blob/master/lib/runner.js#L198)
15
+ // Mocha catches unhandled rejection from the user code and re-emits them to the process
16
16
  process.on('unhandledRejection', reason => {
17
17
  // Re-throw them so that the validation command fails in these cases (for example, if there is a syntax error when parsing JSON declaration files)
18
18
  throw reason;
@@ -1,6 +1,5 @@
1
1
  #! /usr/bin/env node
2
2
  import './env.js';
3
-
4
3
  import path from 'path';
5
4
  import { fileURLToPath } from 'url';
6
5
 
@@ -9,49 +8,71 @@ import Mocha from 'mocha';
9
8
 
10
9
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
11
10
 
12
- const VALIDATE_TEST_FILEPATH = '../scripts/declarations/validate/index.mocha.js';
13
- const VALIDATE_PATH = path.resolve(__dirname, VALIDATE_TEST_FILEPATH);
11
+ export function createMocha({ delay = false, reporter = 'spec' } = {}) {
12
+ return new Mocha({
13
+ delay,
14
+ failZero: true,
15
+ reporter,
16
+ });
17
+ }
18
+
19
+ export async function runMochaTests(mocha, testPath) {
20
+ try {
21
+ mocha.addFile(testPath); // With `delay` option, this statement will not load the file directly, `loadFilesAsync` is required.
22
+ await mocha.loadFilesAsync(); // Load files previously added to the Mocha cache with `addFile`.
23
+
24
+ return new Promise(resolve => {
25
+ let hasFailedTests = false;
26
+
27
+ mocha.run()
28
+ .on('fail', () => { hasFailedTests = true; })
29
+ .on('end', () => { resolve(hasFailedTests ? 1 : 0); });
30
+ });
31
+ } catch (error) {
32
+ console.error('Error running tests:', error);
33
+
34
+ return 2;
35
+ }
36
+ }
14
37
 
15
- // Mocha catches unhandled rejection from the user code and re-emits them to the process (see https://github.com/mochajs/mocha/blob/master/lib/runner.js#L198)
16
- process.on('unhandledRejection', reason => {
17
- // Re-throw them so that the validation command fails in these cases (for example, if there is a syntax error when parsing JSON declaration files)
18
- throw reason;
38
+ process.on('unhandledRejection', reason => { // Mocha catches unhandled rejection from the user code and re-emits them to the process
39
+ throw reason; // Re-throw them so that the validation command fails in these cases (for example, if there is a syntax error when parsing JSON declaration files)
19
40
  });
20
41
 
21
42
  program
22
43
  .name('ota validate')
44
+ .description('Validate terms declarations and metadata files');
45
+
46
+ program.command('declarations')
23
47
  .description('Run a series of tests to check the validity of terms declarations')
24
48
  .option('-s, --services [serviceId...]', 'service IDs of services to validate')
25
49
  .option('-t, --types [termsType...]', 'terms types to validate')
26
50
  .option('-m, --modified', 'target only services modified in the current git branch')
27
- .option('-o, --schema-only', 'much faster check of declarations, but does not check that the documents are actually accessible');
51
+ .option('-o, --schema-only', 'much faster check of declarations, but does not check that the documents are actually accessible')
52
+ .action(async options => {
53
+ const VALIDATE_TEST_FILEPATH = '../scripts/declarations/validate/index.mocha.js';
54
+ const VALIDATE_PATH = path.resolve(__dirname, VALIDATE_TEST_FILEPATH);
28
55
 
29
- const mocha = new Mocha({
30
- delay: true, // as the validation script performs an asynchronous load before running the tests, the execution of the tests are delayed until run() is called
31
- failZero: true, // consider that being called with no service to validate is a failure
32
- });
56
+ const mocha = createMocha({ delay: true }); // as the validation script performs an asynchronous load before running the tests, the execution of the tests are delayed until run() is called
57
+ const generateValidationTestSuite = (await import(VALIDATE_TEST_FILEPATH)).default;
33
58
 
34
- (async () => {
35
- mocha.addFile(VALIDATE_PATH); // As `delay` has been called, this statement will not load the file directly, `loadFilesAsync` is required.
36
- await mocha.loadFilesAsync() // Load files previously added to the Mocha cache with `addFile`.
37
- .catch(error => {
38
- console.error(error);
39
- process.exit(2);
40
- });
59
+ generateValidationTestSuite(options);
41
60
 
42
- let hasFailedTests = false;
61
+ const exitCode = await runMochaTests(mocha, VALIDATE_PATH);
43
62
 
44
- const generateValidationTestSuite = (await import(VALIDATE_TEST_FILEPATH)).default;
63
+ process.exit(exitCode);
64
+ });
45
65
 
46
- generateValidationTestSuite(program.parse().opts());
66
+ program.command('metadata')
67
+ .description('Validate the metadata file structure')
68
+ .action(async () => {
69
+ const VALIDATE_TEST_FILEPATH = '../scripts/metadata/index.mocha.js';
70
+ const VALIDATE_PATH = path.resolve(__dirname, VALIDATE_TEST_FILEPATH);
47
71
 
48
- mocha.run()
49
- .on('fail', () => { hasFailedTests = true; })
50
- .on('end', () => {
51
- if (hasFailedTests) {
52
- process.exit(1);
53
- }
72
+ const mocha = createMocha();
73
+ const exitCode = await runMochaTests(mocha, VALIDATE_PATH);
54
74
 
55
- process.exit(0);
56
- });
57
- })();
75
+ process.exit(exitCode);
76
+ });
77
+
78
+ program.parse();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@opentermsarchive/engine",
3
- "version": "4.0.2",
3
+ "version": "4.2.0",
4
4
  "description": "Tracks and makes visible changes to the terms of online services",
5
5
  "homepage": "https://opentermsarchive.org",
6
6
  "bugs": {
@@ -36,10 +36,11 @@
36
36
  "dataset:release": "node bin/ota.js dataset --publish --remove-local-copy",
37
37
  "dataset:scheduler": "npm run dataset:release -- --schedule",
38
38
  "declarations:lint": "node bin/ota.js lint",
39
- "declarations:validate": "node bin/ota.js validate",
39
+ "declarations:validate": "node bin/ota.js validate declarations",
40
40
  "declarations:validate:schema": "npm run declarations:validate -- --schema-only",
41
41
  "lint": "eslint src test scripts bin",
42
42
  "lint:fix": "npm run lint -- --fix",
43
+ "metadata:validate": "node bin/ota.js validate metadata",
43
44
  "start": "node -r dotenv/config --max-http-header-size=32768 bin/ota.js track",
44
45
  "start:api": "node bin/ota.js serve",
45
46
  "start:scheduler": "npm start -- --schedule",
@@ -54,7 +55,8 @@
54
55
  "@opentermsarchive/turndown": "^7.1.3",
55
56
  "@stylistic/eslint-plugin-js": "^1.4.1",
56
57
  "abort-controller": "^3.0.0",
57
- "ajv": "^6.12.6",
58
+ "ajv": "^8.17.1",
59
+ "ajv-formats": "^3.0.1",
58
60
  "archiver": "^5.3.0",
59
61
  "async": "^3.2.2",
60
62
  "chai": "^4.3.4",
@@ -2,6 +2,7 @@ import fsApi from 'fs';
2
2
  import path from 'path';
3
3
 
4
4
  import Ajv from 'ajv';
5
+ import addFormats from 'ajv-formats';
5
6
  import { expect } from 'chai';
6
7
  import config from 'config';
7
8
  import jsonSourceMap from 'json-source-map';
@@ -178,10 +179,9 @@ export default async options => {
178
179
  run();
179
180
  };
180
181
 
181
- const validator = new Ajv({
182
- allErrors: true,
183
- jsonPointers: true,
184
- });
182
+ const validator = new Ajv({ allErrors: true });
183
+
184
+ addFormats(validator);
185
185
 
186
186
  function assertValid(schema, subject) {
187
187
  const valid = validator.validate(schema, subject);
@@ -193,7 +193,6 @@ function assertValid(schema, subject) {
193
193
  const jsonLines = sourceMap.json.split('\n');
194
194
 
195
195
  validator.errors.forEach(error => {
196
- console.log('error', error);
197
196
  errorMessage += `\n\n${validator.errorsText([error])}`;
198
197
  const errorPointer = sourceMap.pointers[error.dataPath];
199
198
 
@@ -67,7 +67,10 @@ const schema = {
67
67
  singleSourceDocumentTerms: {
68
68
  allOf: [
69
69
  { $ref: '#/definitions/sourceDocument' },
70
- { required: [ 'fetch', 'select' ] },
70
+ {
71
+ type: 'object',
72
+ required: [ 'fetch', 'select' ],
73
+ },
71
74
  ],
72
75
  },
73
76
  multipleSourceDocumentsTerms: {
@@ -0,0 +1,85 @@
1
+ import fs from 'fs/promises';
2
+ import path from 'path';
3
+
4
+ import Ajv from 'ajv';
5
+ import addFormats from 'ajv-formats';
6
+ import config from 'config';
7
+ import Croner from 'croner';
8
+ import yaml from 'js-yaml';
9
+
10
+ import specsRouter from '../../src/collection-api/routes/docs.js';
11
+
12
+ describe('Metadata file validation', () => {
13
+ const formatValidators = {
14
+ 'iso639-1': code => /^[a-z]{2}$/.test(code),
15
+ 'iso3166-2': code => /^[A-Z]{2}(-[A-Z0-9]{1,3})?$/.test(code),
16
+ 'cron-expression': cronExpression => {
17
+ try {
18
+ Croner(cronExpression); // eslint-disable-line new-cap
19
+
20
+ return true;
21
+ } catch {
22
+ return false;
23
+ }
24
+ },
25
+ };
26
+
27
+ const formatMessages = {
28
+ 'iso639-1': 'must be a valid ISO 639-1 language code (two lowercase letters, e.g., "en", "fr")',
29
+ 'iso3166-2': 'must be a valid ISO 3166-2 region code (two uppercase letters, e.g., "FR", "US")',
30
+ 'cron-expression': 'must be a valid cron expression (see https://en.wikipedia.org/wiki/Cron#Cron_expression)',
31
+ };
32
+
33
+ let metadata;
34
+ let validate;
35
+
36
+ before(async () => {
37
+ const { specs } = specsRouter(''); // Extract Metadata OpenAPI specification from JSDoc comments in the collection API router to validate the metadata schema. Can be achieved until API specification and Metadata file schema diverge
38
+ const metadataSchema = specs.components.schemas.Metadata;
39
+ const collectionPath = path.resolve(process.cwd(), config.get('@opentermsarchive/engine.collectionPath'));
40
+ const metadataContent = await fs.readFile(path.join(collectionPath, 'metadata.yml'), 'utf8');
41
+
42
+ metadata = yaml.load(metadataContent, { schema: yaml.CORE_SCHEMA }); // Use CORE_SCHEMA to parse dates as strings rather than JavaScript Date objects
43
+
44
+ const ajv = new Ajv({ allErrors: true });
45
+
46
+ addFormats(ajv);
47
+
48
+ Object.entries(formatValidators).forEach(([ format, validator ]) => {
49
+ ajv.addFormat(format, { type: 'string', validate: validator });
50
+ });
51
+
52
+ validate = ajv.compile(metadataSchema);
53
+ validate(metadata);
54
+ });
55
+
56
+ it('is valid', () => {
57
+ if (!validate.errors) {
58
+ return;
59
+ }
60
+
61
+ const errors = validate.errors.map(error => {
62
+ const instancePath = error.instancePath.split('/').slice(1);
63
+ const actualValue = instancePath.reduce((obj, key) => obj?.[key], metadata);
64
+ const basePath = error.instancePath || '/root';
65
+
66
+ if (error.keyword === 'additionalProperties') {
67
+ return `- ${basePath}: Found unexpected property "${error.params.additionalProperty}"`;
68
+ }
69
+
70
+ if (error.keyword === 'format' && formatMessages[error.params.format]) {
71
+ return `- ${basePath}: "${actualValue}" ${formatMessages[error.params.format]}`;
72
+ }
73
+
74
+ let message = `- ${basePath}: "${actualValue}" ${error.message}`;
75
+
76
+ if (error.keyword === 'enum') {
77
+ message += ` "${error.params.allowedValues.join('", "')}"`;
78
+ }
79
+
80
+ return message;
81
+ });
82
+
83
+ throw new Error(`\n${errors.join('\n')}`);
84
+ });
85
+ });
@@ -34,5 +34,7 @@ export default function specsRouter(basePath) {
34
34
  return swaggerUi.setup(specs)(req, res);
35
35
  });
36
36
 
37
+ router.specs = specs;
38
+
37
39
  return router;
38
40
  }
@@ -19,6 +19,7 @@ const PACKAGE_JSON_PATH = '../../../package.json';
19
19
  * Metadata:
20
20
  * type: object
21
21
  * description: Collection metadata
22
+ * additionalProperties: false
22
23
  * properties:
23
24
  * id:
24
25
  * type: string
@@ -69,18 +70,21 @@ const PACKAGE_JSON_PATH = '../../../package.json';
69
70
  * description: URL to the collection logo
70
71
  * languages:
71
72
  * type: array
73
+ * description: List of ISO 639-1 (two-letter) language codes representing languages allowed by the collection
72
74
  * items:
73
75
  * type: string
74
- * description: List of ISO 639 language codes representing languages allowed by the collection
76
+ * format: iso639-1
75
77
  * jurisdictions:
76
78
  * type: array
79
+ * description: List of ISO 3166-2 country codes representing jurisdictions covered by the collection
77
80
  * items:
78
81
  * type: string
79
- * description: List of ISO 3166-2 country codes representing jurisdictions covered by the collection
82
+ * format: iso3166-2
80
83
  * trackingPeriods:
81
84
  * type: array
82
85
  * items:
83
86
  * type: object
87
+ * additionalProperties: false
84
88
  * properties:
85
89
  * startDate:
86
90
  * type: string
@@ -88,6 +92,7 @@ const PACKAGE_JSON_PATH = '../../../package.json';
88
92
  * description: The date when tracking started for this period
89
93
  * schedule:
90
94
  * type: string
95
+ * format: cron-expression
91
96
  * description: A cron expression defining when terms are tracked (e.g. "0 0 * * *" for daily at midnight)
92
97
  * serverLocation:
93
98
  * type: string
@@ -100,6 +105,7 @@ const PACKAGE_JSON_PATH = '../../../package.json';
100
105
  * type: object
101
106
  * additionalProperties:
102
107
  * type: object
108
+ * additionalProperties: false
103
109
  * properties:
104
110
  * url:
105
111
  * type: string
@@ -115,6 +121,11 @@ const PACKAGE_JSON_PATH = '../../../package.json';
115
121
  * type: string
116
122
  * enum: [host, administrator, curator, maintainer, sponsor]
117
123
  * description: Roles of the entity within the governance
124
+ * i18n:
125
+ * type: object
126
+ * description: Internationalization of any of the Metadata properties (except i18n itself) for different language codes
127
+ * additionalProperties:
128
+ * type: object
118
129
  */
119
130
  export default async function metadataRouter(collectionPath, services) {
120
131
  const router = express.Router();
@@ -18,9 +18,7 @@ export default class GitLab {
18
18
  static ISSUE_STATE_ALL = 'all';
19
19
 
20
20
  constructor(repository, baseURL = BASE_URL, apiBaseURL = API_BASE_URL) {
21
- const [ owner, repo ] = repository.split('/');
22
-
23
- this.commonParams = { owner, repo };
21
+ this.repositoryPath = repository;
24
22
  this.projectId = null;
25
23
  this.baseURL = baseURL;
26
24
  console.log('this.baseURL', this.baseURL);
@@ -31,9 +29,8 @@ export default class GitLab {
31
29
  const options = GitLab.baseOptionsHttpReq();
32
30
 
33
31
  try {
34
- const repositoryPath = `${this.commonParams.owner}/${this.commonParams.repo}`;
35
32
  const response = await nodeFetch(
36
- `${this.apiBaseURL}/projects/${encodeURIComponent(repositoryPath)}`,
33
+ `${this.apiBaseURL}/projects/${encodeURIComponent(this.repositoryPath)}`,
37
34
  options,
38
35
  );
39
36
 
@@ -42,7 +39,7 @@ export default class GitLab {
42
39
  if (response.ok) {
43
40
  this.projectId = res.id;
44
41
  } else {
45
- logger.error(`Error while obtaining projectId: ${JSON.strinfigy(res)}`);
42
+ logger.error(`Error while obtaining projectId: ${JSON.stringify(res)}`);
46
43
  this.projectId = null;
47
44
  }
48
45
  } catch (error) {
@@ -367,15 +364,15 @@ export default class GitLab {
367
364
  }
368
365
 
369
366
  generateDeclarationURL(serviceName) {
370
- return `${this.baseURL}/${this.commonParams.owner}/${this.commonParams.repo}/-/blob/main/declarations/${encodeURIComponent(serviceName)}.json`;
367
+ return `${this.baseURL}/${this.repositoryPath}/-/blob/main/declarations/${encodeURIComponent(serviceName)}.json`;
371
368
  }
372
369
 
373
370
  generateVersionURL(serviceName, termsType) {
374
- return `${this.baseURL}/${this.commonParams.owner}/${this.commonParams.repo}/-/blob/main/${encodeURIComponent(serviceName)}/${encodeURIComponent(serviceName, termsType)}.md`;
371
+ return `${this.baseURL}/${this.repositoryPath}/-/blob/main/${encodeURIComponent(serviceName)}/${encodeURIComponent(serviceName, termsType)}.md`;
375
372
  }
376
373
 
377
374
  generateSnapshotsBaseUrl(serviceName, termsType) {
378
- return `${this.baseURL}/${this.commonParams.owner}/${this.commonParams.repo}/-/blob/main/${encodeURIComponent(serviceName)}/${encodeURIComponent(termsType)}`;
375
+ return `${this.baseURL}/${this.repositoryPath}/-/blob/main/${encodeURIComponent(serviceName)}/${encodeURIComponent(termsType)}`;
379
376
  }
380
377
 
381
378
  // GitLab API responses are not cached unlike GitHub, so this method only exists to satisfy the Reporter interface contract