@opentermsarchive/engine 0.33.1 → 0.34.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@opentermsarchive/engine",
3
- "version": "0.33.1",
3
+ "version": "0.34.0",
4
4
  "description": "Tracks and makes visible changes to the terms of online services",
5
5
  "homepage": "https://github.com/OpenTermsArchive/engine#readme",
6
6
  "bugs": {
@@ -71,6 +71,7 @@
71
71
  "eslint-plugin-import": "^2.29.0",
72
72
  "eslint-plugin-json-format": "^2.0.1",
73
73
  "express": "^4.18.2",
74
+ "express-async-errors": "^3.1.1",
74
75
  "fs-extra": "^10.0.0",
75
76
  "helmet": "^6.0.1",
76
77
  "http-proxy-agent": "^5.0.0",
@@ -2,5 +2,5 @@ import logger from '../logger.js';
2
2
 
3
3
  export default function errorsMiddleware(err, req, res, next) {
4
4
  logger.error(err.stack);
5
- res.status(500).send('Something went wrong!');
5
+ res.status(500).json({ error: err.message });
6
6
  }
@@ -13,7 +13,7 @@ export default function specsRouter(basePath) {
13
13
  definition: {
14
14
  openapi: '3.1.0',
15
15
  info: {
16
- title: 'Open Terms Archive API',
16
+ title: 'Open Terms Archive API [Beta]',
17
17
  version: '1.0.0',
18
18
  license: {
19
19
  name: 'EUPL-1.2',
@@ -10,7 +10,7 @@ describe('Docs API', () => {
10
10
  describe('GET /docs', () => {
11
11
  let response;
12
12
 
13
- context('When requested as JSON', () => {
13
+ context('when requested as JSON', () => {
14
14
  before(async () => {
15
15
  response = await request(app).get(`${basePath}/v1/docs/`).set('Accept', 'application/json');
16
16
  });
@@ -50,11 +50,15 @@ describe('Docs API', () => {
50
50
  it('/service/{serviceId}', () => {
51
51
  expect(subject).to.have.property('/service/{serviceId}');
52
52
  });
53
+
54
+ it('/version/{serviceId}/{termsType}/{date}', () => {
55
+ expect(subject).to.have.property('/version/{serviceId}/{termsType}/{date}');
56
+ });
53
57
  });
54
58
  });
55
59
  });
56
60
 
57
- context('When requested as HTML', () => {
61
+ context('when requested as HTML', () => {
58
62
  before(async () => {
59
63
  response = await request(app).get(`${basePath}/v1/docs/`);
60
64
  });
@@ -3,6 +3,7 @@ import helmet from 'helmet';
3
3
 
4
4
  import docsRouter from './docs.js';
5
5
  import servicesRouter from './services.js';
6
+ import versionsRouter from './versions.js';
6
7
 
7
8
  export default function apiRouter(basePath) {
8
9
  const router = express.Router();
@@ -27,6 +28,7 @@ export default function apiRouter(basePath) {
27
28
  });
28
29
 
29
30
  router.use(servicesRouter);
31
+ router.use(versionsRouter);
30
32
 
31
33
  return router;
32
34
  }
@@ -106,7 +106,7 @@ describe('Services API', () => {
106
106
  });
107
107
  });
108
108
 
109
- context('With a case-insensitive service ID parameter', () => {
109
+ context('with a case-insensitive service ID parameter', () => {
110
110
  before(async () => {
111
111
  response = await request(app).get(`${basePath}/v1/service/${encodeURI(CASE_INSENSITIVE_SERVICE_ID)}`);
112
112
  });
@@ -152,7 +152,7 @@ describe('Services API', () => {
152
152
  });
153
153
  });
154
154
 
155
- context('When no matching service is found', () => {
155
+ context('when no matching service is found', () => {
156
156
  before(async () => {
157
157
  response = await request(app).get(`${basePath}/v1/service/nonExistentService`);
158
158
  });
@@ -0,0 +1,110 @@
1
+ import config from 'config';
2
+ import express from 'express';
3
+
4
+ import RepositoryFactory from '../../archivist/recorder/repositories/factory.js';
5
+ import { toISODateWithoutMilliseconds } from '../../archivist/utils/date.js';
6
+
7
+ /**
8
+ * @swagger
9
+ * tags:
10
+ * name: Versions
11
+ * description: Versions API
12
+ * components:
13
+ * schemas:
14
+ * Version:
15
+ * type: object
16
+ * description: Version content and metadata
17
+ * properties:
18
+ * fetchDate:
19
+ * type: string
20
+ * format: date-time
21
+ * description: The ISO 8601 datetime string when the version was recorded.
22
+ * id:
23
+ * type: string
24
+ * description: The ID of the version.
25
+ * content:
26
+ * type: string
27
+ * description: The JSON-escaped Markdown content of the version.
28
+ */
29
+ const router = express.Router();
30
+
31
+ const versionsRepository = await RepositoryFactory.create(config.get('recorder.versions.storage')).initialize();
32
+
33
+ /**
34
+ * @swagger
35
+ * /version/{serviceId}/{termsType}/{date}:
36
+ * get:
37
+ * summary: Get a specific version of some terms at a given date.
38
+ * tags: [Versions]
39
+ * produces:
40
+ * - application/json
41
+ * parameters:
42
+ * - in: path
43
+ * name: serviceId
44
+ * description: The ID of the service whose version will be returned.
45
+ * schema:
46
+ * type: string
47
+ * required: true
48
+ * - in: path
49
+ * name: termsType
50
+ * description: The type of terms whose version will be returned.
51
+ * schema:
52
+ * type: string
53
+ * required: true
54
+ * - in: path
55
+ * name: date
56
+ * description: The date and time for which the version is requested, in ISO 8601 format.
57
+ * schema:
58
+ * type: string
59
+ * format: date-time
60
+ * required: true
61
+ * responses:
62
+ * 200:
63
+ * description: A JSON object containing the version content and metadata.
64
+ * content:
65
+ * application/json:
66
+ * schema:
67
+ * $ref: '#/components/schemas/Version'
68
+ * 404:
69
+ * description: No version found for the specified combination of service ID, terms type and date.
70
+ * content:
71
+ * application/json:
72
+ * schema:
73
+ * type: object
74
+ * properties:
75
+ * error:
76
+ * type: string
77
+ * description: Error message indicating that no version is found.
78
+ * 416:
79
+ * description: The requested date is in the future.
80
+ * content:
81
+ * application/json:
82
+ * schema:
83
+ * type: object
84
+ * properties:
85
+ * error:
86
+ * type: string
87
+ * description: Error message indicating that the requested date is in the future.
88
+ */
89
+ router.get('/version/:serviceId/:termsType/:date', async (req, res) => {
90
+ const { serviceId, termsType, date } = req.params;
91
+ const requestedDate = new Date(date);
92
+
93
+ if (requestedDate > new Date()) {
94
+ return res.status(416).json({ error: 'Requested version is in the future' });
95
+ }
96
+
97
+ const version = await versionsRepository.findByDate(serviceId, termsType, requestedDate);
98
+
99
+ if (!version) {
100
+ return res.status(404).json({ error: `No version found for date ${date}` });
101
+ }
102
+
103
+ return res.status(200).json({
104
+ id: version.id,
105
+ fetchDate: toISODateWithoutMilliseconds(version.fetchDate),
106
+ content: version.content,
107
+ });
108
+ });
109
+
110
+ export default router;
@@ -0,0 +1,120 @@
1
+ import chai from 'chai';
2
+ import config from 'config';
3
+ import supertest from 'supertest';
4
+
5
+ import RepositoryFactory from '../../archivist/recorder/repositories/factory.js';
6
+ import Version from '../../archivist/recorder/version.js';
7
+ import { toISODateWithoutMilliseconds } from '../../archivist/utils/date.js';
8
+ import app from '../server.js';
9
+
10
+ const basePath = config.get('api.basePath');
11
+
12
+ const { expect } = chai;
13
+ const request = supertest(app);
14
+
15
+ describe('Versions API', () => {
16
+ describe('GET /version/:serviceId/:termsType/:date', () => {
17
+ let expectedResult;
18
+ let versionsRepository;
19
+ const FETCH_DATE = new Date('2023-01-01T12:00:00Z');
20
+ const VERSION_COMMON_ATTRIBUTES = {
21
+ serviceId: 'service-1',
22
+ termsType: 'Terms of Service',
23
+ snapshotId: ['snapshot_id'],
24
+ };
25
+
26
+ before(async () => {
27
+ versionsRepository = RepositoryFactory.create(config.get('recorder.versions.storage'));
28
+
29
+ await versionsRepository.initialize();
30
+
31
+ const ONE_HOUR = 60 * 60 * 1000;
32
+
33
+ await versionsRepository.save(new Version({
34
+ ...VERSION_COMMON_ATTRIBUTES,
35
+ content: 'initial content',
36
+ fetchDate: new Date(new Date(FETCH_DATE).getTime() - ONE_HOUR),
37
+ }));
38
+
39
+ const version = new Version({
40
+ ...VERSION_COMMON_ATTRIBUTES,
41
+ content: 'updated content',
42
+ fetchDate: FETCH_DATE,
43
+ });
44
+
45
+ await versionsRepository.save(version);
46
+
47
+ await versionsRepository.save(new Version({
48
+ ...VERSION_COMMON_ATTRIBUTES,
49
+ content: 'latest content',
50
+ fetchDate: new Date(new Date(FETCH_DATE).getTime() + ONE_HOUR),
51
+ }));
52
+
53
+ expectedResult = {
54
+ id: version.id,
55
+ fetchDate: toISODateWithoutMilliseconds(version.fetchDate),
56
+ content: version.content,
57
+ };
58
+ });
59
+
60
+ after(async () => versionsRepository.removeAll());
61
+
62
+ let response;
63
+
64
+ context('when a version is found', () => {
65
+ before(async () => {
66
+ response = await request.get(`${basePath}/v1/version/service-1/Terms%20of%20Service/${encodeURIComponent(toISODateWithoutMilliseconds(FETCH_DATE))}`);
67
+ });
68
+
69
+ it('responds with 200 status code', () => {
70
+ expect(response.status).to.equal(200);
71
+ });
72
+
73
+ it('responds with Content-Type application/json', () => {
74
+ expect(response.type).to.equal('application/json');
75
+ });
76
+
77
+ it('returns the expected version', () => {
78
+ expect(response.body).to.deep.equal(expectedResult);
79
+ });
80
+ });
81
+
82
+ context('when the requested date is anterior to the first available version', () => {
83
+ before(async () => {
84
+ response = await request.get(`${basePath}/v1/version/service-1/Terms%20of%20Service/2000-01-01T12:00:00Z`);
85
+ });
86
+
87
+ it('responds with 404 status code', () => {
88
+ expect(response.status).to.equal(404);
89
+ });
90
+
91
+ it('responds with Content-Type application/json', () => {
92
+ expect(response.type).to.equal('application/json');
93
+ });
94
+
95
+ it('returns an error message', () => {
96
+ expect(response.body.error).to.contain('No version found').and.to.contain('2000-01-01T12:00:00Z');
97
+ });
98
+ });
99
+
100
+ context('when the requested date is in the future', () => {
101
+ before(async () => {
102
+ const dateInTheFuture = new Date(Date.now() + 60000); // 1 minute in the future
103
+
104
+ response = await request.get(`${basePath}/v1/version/service-1/Terms%20of%20Service/${encodeURIComponent(toISODateWithoutMilliseconds(dateInTheFuture))}`);
105
+ });
106
+
107
+ it('responds with 416 status code', () => {
108
+ expect(response.status).to.equal(416);
109
+ });
110
+
111
+ it('responds with Content-Type application/json', () => {
112
+ expect(response.type).to.equal('application/json');
113
+ });
114
+
115
+ it('returns an error message', () => {
116
+ expect(response.body.error).to.equal('Requested version is in the future');
117
+ });
118
+ });
119
+ });
120
+ });
package/src/api/server.js CHANGED
@@ -1,6 +1,7 @@
1
1
  import config from 'config';
2
2
  import express from 'express';
3
3
 
4
+ import 'express-async-errors';
4
5
  import logger from './logger.js';
5
6
  import errorsMiddleware from './middlewares/errors.js';
6
7
  import loggerMiddleware from './middlewares/logger.js';
@@ -12,12 +13,17 @@ if (process.env.NODE_ENV !== 'test') {
12
13
  app.use(loggerMiddleware);
13
14
  }
14
15
 
15
- const basePath = `${config.get('api.basePath')}/v1`;
16
+ const BASE_PATH = `${config.get('api.basePath')}/v1`;
16
17
 
17
- app.use(basePath, apiRouter(basePath));
18
+ app.use(BASE_PATH, apiRouter(BASE_PATH));
18
19
  app.use(errorsMiddleware);
19
20
 
20
- app.listen(config.get('api.port'));
21
- logger.info('Start Open Terms Archive API\n');
21
+ const port = config.get('api.port');
22
+
23
+ app.listen(port);
24
+
25
+ if (process.env.NODE_ENV !== 'test') {
26
+ logger.info(`Start Open Terms Archive API on http://localhost:${port}${BASE_PATH}`);
27
+ }
22
28
 
23
29
  export default app;
@@ -71,6 +71,13 @@ export default class GitRepository extends RepositoryInterface {
71
71
  return this.#toDomain(commit);
72
72
  }
73
73
 
74
+ async findByDate(serviceId, termsType, date, documentId) {
75
+ const filePath = DataMapper.generateFilePath(serviceId, termsType, documentId);
76
+ const commit = await this.git.getCommit([ `--until=${date?.toISOString()}`, filePath ]);
77
+
78
+ return this.#toDomain(commit);
79
+ }
80
+
74
81
  async findById(recordId) {
75
82
  const commit = await this.git.getCommit([recordId]);
76
83
 
@@ -374,6 +374,69 @@ describe('GitRepository', () => {
374
374
  });
375
375
  });
376
376
 
377
+ describe('#findByDate', () => {
378
+ context('when there are records for the given service', () => {
379
+ let recordToFindId;
380
+ let recordFound;
381
+
382
+ context('when a record exists for the requested service and date', () => {
383
+ const UPDATED_FILE_CONTENT = `${CONTENT} (with additional content to trigger a record)`;
384
+
385
+ before(async () => {
386
+ await subject.save(new Version({
387
+ serviceId: SERVICE_PROVIDER_ID,
388
+ termsType: TERMS_TYPE,
389
+ content: CONTENT,
390
+ fetchDate: FETCH_DATE_EARLIER,
391
+ snapshotIds: [SNAPSHOT_ID],
392
+ }));
393
+
394
+ ({ id: recordToFindId } = await subject.save(new Version({
395
+ serviceId: SERVICE_PROVIDER_ID,
396
+ termsType: TERMS_TYPE,
397
+ content: UPDATED_FILE_CONTENT,
398
+ fetchDate: FETCH_DATE,
399
+ snapshotIds: [SNAPSHOT_ID],
400
+ })));
401
+
402
+ await subject.save(new Version({
403
+ serviceId: SERVICE_PROVIDER_ID,
404
+ termsType: TERMS_TYPE,
405
+ content: `${CONTENT}CONTENT`,
406
+ fetchDate: FETCH_DATE_LATER,
407
+ snapshotIds: [SNAPSHOT_ID],
408
+ }));
409
+
410
+ const oneHourBeforeFetchDateLater = new Date(FETCH_DATE_LATER.getTime() - 60 * 60 * 1000);
411
+
412
+ recordFound = await subject.findByDate(SERVICE_PROVIDER_ID, TERMS_TYPE, oneHourBeforeFetchDateLater);
413
+ });
414
+
415
+ after(async () => subject.removeAll());
416
+
417
+ it('returns a Version object', () => {
418
+ expect(recordFound).to.be.an.instanceof(Version);
419
+ });
420
+
421
+ it('returns the latest record id', () => {
422
+ expect(recordFound.id).to.include(recordToFindId);
423
+ });
424
+ });
425
+ });
426
+
427
+ context('when there are no records for the given service', () => {
428
+ let recordFound;
429
+
430
+ before(async () => {
431
+ recordFound = await subject.findByDate(SERVICE_PROVIDER_ID, TERMS_TYPE);
432
+ });
433
+
434
+ it('returns null', async () => {
435
+ expect(recordFound).to.equal(null);
436
+ });
437
+ });
438
+ });
439
+
377
440
  describe('#findAll', () => {
378
441
  let records;
379
442
  const expectedIds = [];
@@ -47,6 +47,20 @@ class RepositoryInterface {
47
47
  throw new Error(`#findLatest method is not implemented in ${this.constructor.name}`);
48
48
  }
49
49
 
50
+ /**
51
+ * Find the record that was valid on the given date and that matches the given service ID and terms type and optionally the document ID
52
+ * In case of snapshots, if the record is related to terms extracted from multiple source documents, the document ID is required to find the source snapshot
53
+ *
54
+ * @param {string} serviceId - Service ID of record to find
55
+ * @param {string} termsType - Terms type of record to find
56
+ * @param {date} date - Datetime on which the record to find was valid
57
+ * @param {string} [documentId] - Document ID of record to find. Used to identify the source in terms extracted from multiple source documents. Not necessary for terms with a single source document
58
+ * @returns {Promise<Record>} Promise that will be resolved with the found record or an empty object if none match the given criteria
59
+ */
60
+ async findByDate(serviceId, termsType, date, documentId) {
61
+ throw new Error(`#findByDate method is not implemented in ${this.constructor.name}`);
62
+ }
63
+
50
64
  /**
51
65
  * Find the record that matches the given record ID
52
66
  *
@@ -58,6 +58,12 @@ export default class MongoRepository extends RepositoryInterface {
58
58
  return this.#toDomain(mongoDocument);
59
59
  }
60
60
 
61
+ async findByDate(serviceId, termsType, date) {
62
+ const [mongoDocument] = await this.collection.find({ serviceId, termsType, fetchDate: { $lte: new Date(date) } }).limit(1).sort({ fetchDate: -1 }).toArray(); // `findOne` doesn't support the `sort` method, so even for only one mongo document use `find`
63
+
64
+ return this.#toDomain(mongoDocument);
65
+ }
66
+
61
67
  async findById(recordId) {
62
68
  const mongoDocument = await this.collection.findOne({ _id: new ObjectId(recordId) });
63
69
 
@@ -391,6 +391,57 @@ describe('MongoRepository', () => {
391
391
  });
392
392
  });
393
393
 
394
+ describe('#findByDate', () => {
395
+ context('when there are records for the given service', () => {
396
+ let recordToFindId;
397
+ let recordFound;
398
+
399
+ context('when a record exists for the requested service and date', () => {
400
+ const UPDATED_FILE_CONTENT = `${CONTENT} (with additional content to trigger a record)`;
401
+
402
+ before(async () => {
403
+ await subject.save(new Version({
404
+ serviceId: SERVICE_PROVIDER_ID,
405
+ termsType: TERMS_TYPE,
406
+ content: CONTENT,
407
+ fetchDate: FETCH_DATE_EARLIER,
408
+ snapshotIds: [SNAPSHOT_ID],
409
+ }));
410
+
411
+ ({ id: recordToFindId } = await subject.save(new Version({
412
+ serviceId: SERVICE_PROVIDER_ID,
413
+ termsType: TERMS_TYPE,
414
+ content: UPDATED_FILE_CONTENT,
415
+ fetchDate: FETCH_DATE,
416
+ snapshotIds: [SNAPSHOT_ID],
417
+ })));
418
+
419
+ await subject.save(new Version({
420
+ serviceId: SERVICE_PROVIDER_ID,
421
+ termsType: TERMS_TYPE,
422
+ content: `${CONTENT}CONTENT`,
423
+ fetchDate: FETCH_DATE_LATER,
424
+ snapshotIds: [SNAPSHOT_ID],
425
+ }));
426
+
427
+ const oneHourBeforeFetchDateLater = new Date(FETCH_DATE_LATER.getTime() - 60 * 60 * 1000);
428
+
429
+ recordFound = await subject.findByDate(SERVICE_PROVIDER_ID, TERMS_TYPE, oneHourBeforeFetchDateLater);
430
+ });
431
+
432
+ after(async () => subject.removeAll());
433
+
434
+ it('returns a Version object', () => {
435
+ expect(recordFound).to.be.an.instanceof(Version);
436
+ });
437
+
438
+ it('returns the latest record id', () => {
439
+ expect(recordFound.id).to.include(recordToFindId);
440
+ });
441
+ });
442
+ });
443
+ });
444
+
394
445
  describe('#findAll', () => {
395
446
  let records;
396
447
  const expectedIds = [];
@@ -0,0 +1,3 @@
1
+ export function toISODateWithoutMilliseconds(date) {
2
+ return new Date(date).toISOString().replace(/\.\d+/, '');
3
+ }
@@ -0,0 +1,28 @@
1
+ import { expect } from 'chai';
2
+
3
+ import { toISODateWithoutMilliseconds } from './date.js';
4
+
5
+ describe('toISODateWithoutMilliseconds', () => {
6
+ const EXPECTED_RESULT = '2023-12-06T12:34:56Z';
7
+ const INPUTS = {
8
+ 'valid ISO 8601 date string': '2023-12-06T12:34:56.789Z',
9
+ 'parsable date string not in ISO 8601 format': 'Wed, 06 Dec 2023 12:34:56 GMT',
10
+ 'a valid ISO 8601 date string without milliseconds': '2023-12-06T12:34:56Z',
11
+ };
12
+
13
+ Object.entries(INPUTS).forEach(([ description, input ]) => {
14
+ context(`with ${description}`, () => {
15
+ it('returns the given date in ISO 8601 format without milliseconds', () => {
16
+ expect(toISODateWithoutMilliseconds(input)).to.equal(EXPECTED_RESULT);
17
+ });
18
+ });
19
+ });
20
+
21
+ context('with an invalid date string', () => {
22
+ it('throws an error', () => {
23
+ const inputDate = 'invalidDateString';
24
+
25
+ expect(() => toISODateWithoutMilliseconds(inputDate)).to.throw(Error);
26
+ });
27
+ });
28
+ });
@@ -1,5 +1,7 @@
1
1
  import mime from 'mime';
2
2
 
3
+ import { toISODateWithoutMilliseconds } from '../archivist/utils/date.js';
4
+
3
5
  import GitHub from './github.js';
4
6
 
5
7
  const CONTRIBUTION_TOOL_URL = 'https://contribute.opentermsarchive.org/en/service';
@@ -78,7 +80,7 @@ No changes were found in the last run, so no new version has been recorded.`,
78
80
  generateDescription({ error, terms }) {
79
81
  const date = new Date();
80
82
  const currentFormattedDate = date.toLocaleDateString('en-GB', { year: 'numeric', month: 'long', day: 'numeric', hour: 'numeric', minute: 'numeric', second: 'numeric', timeZoneName: 'short', timeZone: 'UTC' });
81
- const validUntil = date.toISOString().replace(/\.\d+/, ''); // ISO date without milliseconds
83
+ const validUntil = toISODateWithoutMilliseconds(date);
82
84
 
83
85
  const hasSnapshots = terms.sourceDocuments.every(sourceDocument => sourceDocument.snapshotId);
84
86