@opentermsarchive/engine 5.0.2 → 5.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.eslintrc.yaml CHANGED
@@ -1,5 +1,6 @@
1
1
  extends:
2
2
  - airbnb-base
3
+ - plugin:jsdoc/recommended-error
3
4
  parserOptions:
4
5
  ecmaVersion: 2022
5
6
  env:
@@ -11,7 +12,16 @@ plugins:
11
12
  - import
12
13
  - json-format
13
14
  - no-only-tests
15
+ - jsdoc
14
16
  rules:
17
+ jsdoc/require-jsdoc: 0
18
+ jsdoc/check-tag-names:
19
+ - error
20
+ - definedTags:
21
+ - swagger
22
+ jsdoc/check-line-alignment:
23
+ - error
24
+ - always
15
25
  arrow-parens:
16
26
  - error
17
27
  - as-needed
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@opentermsarchive/engine",
3
- "version": "5.0.2",
3
+ "version": "5.0.4",
4
4
  "description": "Tracks and makes visible changes to the terms of online services",
5
5
  "homepage": "https://opentermsarchive.org",
6
6
  "bugs": {
@@ -108,6 +108,7 @@
108
108
  "devDependencies": {
109
109
  "@commitlint/cli": "^19.0.3",
110
110
  "dir-compare": "^4.0.0",
111
+ "eslint-plugin-jsdoc": "^50.6.9",
111
112
  "keep-a-changelog": "^2.5.3",
112
113
  "nock": "^13.2.1",
113
114
  "node-stream-zip": "^1.15.0",
@@ -5,6 +5,8 @@ import turndownPluginGithubFlavouredMarkdown from 'joplin-turndown-plugin-gfm';
5
5
  import jsdom from 'jsdom';
6
6
  import mime from 'mime';
7
7
 
8
+ import SourceDocument from '../services/sourceDocument.js';
9
+
8
10
  import { ExtractDocumentError } from './errors.js';
9
11
 
10
12
  export { ExtractDocumentError } from './errors.js';
@@ -23,11 +25,11 @@ const ciceroMarkTransformer = new CiceroMarkTransformer();
23
25
 
24
26
  /**
25
27
  * Extract content from source document and convert it to Markdown
26
- *
27
28
  * @function extract
28
- * @param {string} sourceDocument - Source document from which to extract content, see {@link ./src/archivist/services/sourceDocument.js}
29
- * @returns {Promise<string>} Promise which is fulfilled once the content is extracted and converted in Markdown. The promise will resolve into a string containing the extracted content in Markdown format
30
- */
29
+ * @param {string} sourceDocument Source document from which to extract content, see {@link SourceDocument}
30
+ * @returns {Promise<string>} Promise which is fulfilled once the content is extracted and converted in Markdown. The promise will resolve into a string containing the extracted content in Markdown format
31
+ * @async
32
+ */
31
33
  export default async function extract(sourceDocument) {
32
34
  try {
33
35
  if (sourceDocument.mimeType == mime.getType('pdf')) {
@@ -62,6 +62,12 @@ export default async function fetch(url, cssSelectors, config) {
62
62
  }
63
63
  }
64
64
 
65
+ /**
66
+ * Launches a headless browser instance using Puppeteer if one is not already running. Returns the existing browser instance if one is already running, otherwise creates and returns a new instance.
67
+ * @function launchHeadlessBrowser
68
+ * @returns {Promise<puppeteer.Browser>} The Puppeteer browser instance.
69
+ * @async
70
+ */
65
71
  export async function launchHeadlessBrowser() {
66
72
  if (browser) {
67
73
  return browser;
@@ -72,6 +78,12 @@ export async function launchHeadlessBrowser() {
72
78
  return browser;
73
79
  }
74
80
 
81
+ /**
82
+ * Stops the headless browser instance if one is running. If no instance exists, it does nothing.
83
+ * @function stopHeadlessBrowser
84
+ * @returns {Promise<void>}
85
+ * @async
86
+ */
75
87
  export async function stopHeadlessBrowser() {
76
88
  if (!browser) {
77
89
  return;
@@ -9,17 +9,17 @@ export { FetchDocumentError } from './errors.js';
9
9
 
10
10
  /**
11
11
  * Fetch a resource from the network, returning a promise which is fulfilled once the response is available
12
- *
13
12
  * @function fetch
14
- * @param {Object} params - Fetcher parameters
15
- * @param {string} params.url - URL of the resource you want to fetch
16
- * @param {boolean} [params.executeClientScripts] - Enable execution of client scripts. When set to `true`, this property loads the page in a headless browser to load all assets and execute client scripts before returning its content
17
- * @param {string|Array} [params.cssSelectors] - List of CSS selectors to await when loading the resource in a headless browser. Can be a CSS selector or an array of CSS selectors. Only relevant when `executeClientScripts` is enabled
18
- * @param {Object} [params.config] - Fetcher configuration
19
- * @param {number} [params.config.navigationTimeout] - Maximum time (in milliseconds) to wait before considering the fetch failed
20
- * @param {string} [params.config.language] - Language (in [ISO 639-1 format](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes)) to be passed in request headers
21
- * @param {number} [params.config.waitForElementsTimeout] - Maximum time (in milliseconds) to wait for selectors to exist on page before considering the fetch failed. Only relevant when `executeClientScripts` is enabled
22
- * @returns {Promise} @returns {Promise<Object>} Promise which will be resolved with an object containing the `mimeType` and the `content` of the URL as string or Buffer
13
+ * @param {object} params Fetcher parameters
14
+ * @param {string} params.url URL of the resource you want to fetch
15
+ * @param {boolean} [params.executeClientScripts] Enable execution of client scripts. When set to `true`, this property loads the page in a headless browser to load all assets and execute client scripts before returning its content
16
+ * @param {string|Array} [params.cssSelectors] List of CSS selectors to await when loading the resource in a headless browser. Can be a CSS selector or an array of CSS selectors. Only relevant when `executeClientScripts` is enabled
17
+ * @param {object} [params.config] Fetcher configuration
18
+ * @param {number} [params.config.navigationTimeout] Maximum time (in milliseconds) to wait before considering the fetch failed
19
+ * @param {string} [params.config.language] Language (in [ISO 639-1 format](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes)) to be passed in request headers
20
+ * @param {number} [params.config.waitForElementsTimeout] Maximum time (in milliseconds) to wait for selectors to exist on page before considering the fetch failed. Only relevant when `executeClientScripts` is enabled
21
+ * @returns {Promise<{ mimeType: string, content: string | Buffer }>} Promise containing the fetched resource's MIME type and content
22
+ * @async
23
23
  */
24
24
  export default async function fetch({
25
25
  url, executeClientScripts, cssSelectors,
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * Abstract Class Record.
3
- *
4
3
  * @class Record
4
+ * @private
5
5
  */
6
6
  export default class Record {
7
7
  #content;
@@ -4,120 +4,113 @@
4
4
  * @interface
5
5
  */
6
6
 
7
+ import Record from '../record.js';
8
+
7
9
  /* eslint-disable require-await */
8
10
  class RepositoryInterface {
9
11
  /**
10
- * [Optional] Initialize repository
11
- * Override this method if the repository needs some asynchronous initialization code (open database connection and create collections, initialize Git…)
12
- *
13
- * @returns {Promise<Repository>} Promise that will be resolved with the current repository instance
14
- */
12
+ * [Optional] Initialize repository
13
+ * Override this method if the repository needs some asynchronous initialization code (open database connection and create collections, initialize Git…)
14
+ * @returns {Promise<RepositoryInterface>} Promise that will be resolved with the current repository instance
15
+ */
15
16
  async initialize() {
16
17
  return this;
17
18
  }
18
19
 
19
20
  /**
20
- * [Optional] Finalize repository
21
- * Override this method if the repository needs some asynchronous code to properly close the repository (close database connection, push changes on Git remote…)
22
- *
23
- * @returns {Promise<Repository>} Promise that will be resolved with the current repository instance
24
- */
21
+ * [Optional] Finalize repository
22
+ * Override this method if the repository needs some asynchronous code to properly close the repository (close database connection, push changes on Git remote…)
23
+ * @returns {Promise<RepositoryInterface>} Promise that will be resolved with the current repository instance
24
+ */
25
25
  async finalize() {
26
26
  return this;
27
27
  }
28
28
 
29
29
  /**
30
- * Persist the given record if it does not already exist in repository
31
- *
32
- * @param {Record} record - Record to persist
33
- * @returns {Promise<Record>} Promise that will be resolved with the given record when it has been persisted
34
- */
30
+ * Persist the given record if it does not already exist in repository
31
+ * @param {Record} record - Record to persist
32
+ * @returns {Promise<Record>} Promise that will be resolved with the given record when it has been persisted
33
+ */
35
34
  async save(record) {
36
35
  throw new Error(`#save method is not implemented in ${this.constructor.name}`);
37
36
  }
38
37
 
39
38
  /**
40
- * Find the most recent record that matches the given service ID and terms type and optionally the document ID
41
- * In case of snapshots, if the record is related to terms extracted from multiple source documents, the document ID is required to find the source snapshot
42
- *
43
- * @param {string} serviceId - Service ID of record to find
44
- * @param {string} termsType - Terms type of record to find
45
- * @param {string} [documentId] - Document ID of record to find. Used to identify the source in terms extracted from multiple source documents. Not necessary for terms with a single source document
46
- * @returns {Promise<Record>} Promise that will be resolved with the found record or an empty object if none match the given criteria
47
- */
39
+ * Find the most recent record that matches the given service ID and terms type and optionally the document ID
40
+ * In case of snapshots, if the record is related to terms extracted from multiple source documents, the document ID is required to find the source snapshot
41
+ * @param {string} serviceId - Service ID of record to find
42
+ * @param {string} termsType - Terms type of record to find
43
+ * @param {string} [documentId] - Document ID of record to find. Used to identify the source in terms extracted from multiple source documents. Not necessary for terms with a single source document
44
+ * @returns {Promise<Record>} Promise that will be resolved with the found record or an empty object if none match the given criteria
45
+ */
48
46
  async findLatest(serviceId, termsType, documentId) {
49
47
  throw new Error(`#findLatest method is not implemented in ${this.constructor.name}`);
50
48
  }
51
49
 
52
50
  /**
53
- * Find the record that was valid on the given date and that matches the given service ID and terms type and optionally the document ID
54
- * In case of snapshots, if the record is related to terms extracted from multiple source documents, the document ID is required to find the source snapshot
55
- *
56
- * @param {string} serviceId - Service ID of record to find
57
- * @param {string} termsType - Terms type of record to find
58
- * @param {date} date - Datetime on which the record to find was valid
59
- * @param {string} [documentId] - Document ID of record to find. Used to identify the source in terms extracted from multiple source documents. Not necessary for terms with a single source document
60
- * @returns {Promise<Record>} Promise that will be resolved with the found record or an empty object if none match the given criteria
61
- */
51
+ * Find the record that was valid on the given date and that matches the given service ID and terms type and optionally the document ID
52
+ * In case of snapshots, if the record is related to terms extracted from multiple source documents, the document ID is required to find the source snapshot
53
+ * @param {string} serviceId - Service ID of record to find
54
+ * @param {string} termsType - Terms type of record to find
55
+ * @param {Date} date - Datetime on which the record to find was valid
56
+ * @param {string} [documentId] - Document ID of record to find. Used to identify the source in terms extracted from multiple source documents. Not necessary for terms with a single source document
57
+ * @returns {Promise<Record>} Promise that will be resolved with the found record or an empty object if none match the given criteria
58
+ */
62
59
  async findByDate(serviceId, termsType, date, documentId) {
63
60
  throw new Error(`#findByDate method is not implemented in ${this.constructor.name}`);
64
61
  }
65
62
 
66
63
  /**
67
- * Find the record that matches the given record ID
68
- *
69
- * @param {string} recordId - Record ID of the record to find
70
- * @returns {Promise<Record>} Promise that will be resolved with the found record or an empty object if none match the given ID
71
- */
64
+ * Find the record that matches the given record ID
65
+ * @param {string} recordId - Record ID of the record to find
66
+ * @returns {Promise<Record>} Promise that will be resolved with the found record or an empty object if none match the given ID
67
+ */
72
68
  async findById(recordId) {
73
69
  throw new Error(`#findById method is not implemented in ${this.constructor.name}`);
74
70
  }
75
71
 
76
72
  /**
77
- * Find all records
78
- * For performance reasons, the content of the records will not be loaded by default. Use #loadRecordContent to load the content of individual records
79
- *
80
- * @see RepositoryInterface#loadRecordContent
81
- * @returns {Promise<Array<Record>>} Promise that will be resolved with an array of all records
82
- */
73
+ * Find all records
74
+ * For performance reasons, the content of the records will not be loaded by default. Use #loadRecordContent to load the content of individual records
75
+ * @see RepositoryInterface#loadRecordContent
76
+ * @returns {Promise<Array<Record>>} Promise that will be resolved with an array of all records
77
+ */
83
78
  async findAll() {
84
79
  throw new Error(`#findAll method is not implemented in ${this.constructor.name}`);
85
80
  }
86
81
 
87
82
  /**
88
- * Count the total number of records in the repository
89
- * For performance reasons, use this method rather than counting the number of entries returned by #findAll if you only need the size of a repository
90
- *
91
- * @returns {Promise<number>} Promise that will be resolved with the total number of records
92
- */
83
+ * Count the total number of records in the repository
84
+ * For performance reasons, use this method rather than counting the number of entries returned by #findAll if you only need the size of a repository
85
+ * @returns {Promise<number>} Promise that will be resolved with the total number of records
86
+ */
93
87
  async count() {
94
88
  throw new Error(`#count method is not implemented in ${this.constructor.name}`);
95
89
  }
96
90
 
91
+ /* eslint-disable jsdoc/require-yields-check */
97
92
  /**
98
- * Iterate over all records in the repository, from oldest to most recent
99
- *
100
- * @yields {Record}
101
- */
93
+ * Iterate over all records in the repository, from oldest to most recent
94
+ * @yields {Record}
95
+ */
102
96
  async* iterate() {
103
97
  throw new Error(`#iterate method is not implemented in ${this.constructor.name}`);
104
98
  }
99
+ /* eslint-enable jsdoc/require-yields-check */
105
100
 
106
101
  /**
107
- * Remove all records
108
- *
109
- * @returns {Promise} Promise that will be resolved when all records are removed
110
- */
102
+ * Remove all records
103
+ * @returns {Promise} Promise that will be resolved when all records are removed
104
+ */
111
105
  async removeAll() {
112
106
  throw new Error(`#removeAll method is not implemented in ${this.constructor.name}`);
113
107
  }
114
108
 
115
109
  /**
116
- * Load content of the given record
117
- *
118
- * @param {Record} record - Record of which to populate content
119
- * @returns {Promise<Record>} Promise that will be resolved with the given record when its content has been loaded
120
- */
110
+ * Load content of the given record
111
+ * @param {Record} record - Record of which to populate content
112
+ * @returns {Promise<Record>} Promise that will be resolved with the given record when its content has been loaded
113
+ */
121
114
  async loadRecordContent(record) {
122
115
  throw new Error(`#loadRecordContent method is not implemented in ${this.constructor.name}`);
123
116
  }
@@ -34,14 +34,14 @@ export default class MongoRepository extends RepositoryInterface {
34
34
  }
35
35
 
36
36
  async save(record) {
37
- const { serviceId, termsType } = record;
37
+ const { serviceId, termsType, documentId } = record;
38
38
 
39
39
  if (record.isFirstRecord === undefined || record.isFirstRecord === null) {
40
- record.isFirstRecord = !await this.collection.findOne({ serviceId, termsType });
40
+ record.isFirstRecord = !await this.collection.findOne({ serviceId, termsType, documentId });
41
41
  }
42
42
 
43
43
  const documentFields = await this.#toPersistence(record);
44
- const previousRecord = await this.findLatest(serviceId, termsType);
44
+ const previousRecord = await this.findLatest(serviceId, termsType, documentId);
45
45
 
46
46
  if (previousRecord?.content == documentFields.content) {
47
47
  return Object(null);
@@ -54,14 +54,26 @@ export default class MongoRepository extends RepositoryInterface {
54
54
  return record;
55
55
  }
56
56
 
57
- async findLatest(serviceId, termsType) {
58
- const [mongoDocument] = await this.collection.find({ serviceId, termsType }).limit(1).sort({ fetchDate: -1 }).toArray(); // `findOne` doesn't support the `sort` method, so even for only one mongo document use `find`
57
+ async findLatest(serviceId, termsType, documentId) {
58
+ const query = { serviceId, termsType };
59
+
60
+ if (documentId !== undefined) {
61
+ query.documentId = documentId;
62
+ }
63
+
64
+ const [mongoDocument] = await this.collection.find(query).limit(1).sort({ fetchDate: -1 }).toArray(); // `findOne` doesn't support the `sort` method, so even for only one mongo document use `find`
59
65
 
60
66
  return this.#toDomain(mongoDocument);
61
67
  }
62
68
 
63
- async findByDate(serviceId, termsType, date) {
64
- const [mongoDocument] = await this.collection.find({ serviceId, termsType, fetchDate: { $lte: new Date(date) } }).limit(1).sort({ fetchDate: -1 }).toArray(); // `findOne` doesn't support the `sort` method, so even for only one mongo document use `find`
69
+ async findByDate(serviceId, termsType, date, documentId) {
70
+ const query = { serviceId, termsType, fetchDate: { $lte: new Date(date) } };
71
+
72
+ if (documentId !== undefined) {
73
+ query.documentId = documentId;
74
+ }
75
+
76
+ const [mongoDocument] = await this.collection.find(query).limit(1).sort({ fetchDate: -1 }).toArray(); // `findOne` doesn't support the `sort` method, so even for only one mongo document use `find`
65
77
 
66
78
  return this.#toDomain(mongoDocument);
67
79
  }
@@ -332,6 +332,30 @@ describe('MongoRepository', () => {
332
332
  expect(mongoDocument.termsType).to.include(TERMS_TYPE);
333
333
  });
334
334
  });
335
+
336
+ context('when document ID is specified', () => {
337
+ before(async () => {
338
+ (record = await subject.save(new Version({
339
+ serviceId: SERVICE_PROVIDER_ID,
340
+ termsType: TERMS_TYPE,
341
+ documentId: DOCUMENT_ID,
342
+ content: CONTENT,
343
+ fetchDate: FETCH_DATE,
344
+ snapshotIds: [SNAPSHOT_ID],
345
+ })));
346
+
347
+ (mongoDocument = await collection.findOne({
348
+ serviceId: SERVICE_PROVIDER_ID,
349
+ termsType: TERMS_TYPE,
350
+ }));
351
+ });
352
+
353
+ after(() => subject.removeAll());
354
+
355
+ it('stores the document ID', () => {
356
+ expect(mongoDocument.documentId).to.equal(DOCUMENT_ID);
357
+ });
358
+ });
335
359
  });
336
360
 
337
361
  describe('#findById', () => {
@@ -439,6 +463,46 @@ describe('MongoRepository', () => {
439
463
  expect(recordFound.id).to.include(recordToFindId);
440
464
  });
441
465
  });
466
+
467
+ context('when document ID is specified', () => {
468
+ let recordFound;
469
+ const DIFFERENT_DOCUMENT_ID = 'other-document';
470
+ const UPDATED_CONTENT = `${CONTENT} (with additional content)`;
471
+
472
+ before(async () => {
473
+ await subject.save(new Version({
474
+ serviceId: SERVICE_PROVIDER_ID,
475
+ termsType: TERMS_TYPE,
476
+ documentId: DOCUMENT_ID,
477
+ content: CONTENT,
478
+ fetchDate: FETCH_DATE,
479
+ snapshotIds: [SNAPSHOT_ID],
480
+ }));
481
+
482
+ await subject.save(new Version({
483
+ serviceId: SERVICE_PROVIDER_ID,
484
+ termsType: TERMS_TYPE,
485
+ documentId: DIFFERENT_DOCUMENT_ID,
486
+ content: UPDATED_CONTENT,
487
+ fetchDate: FETCH_DATE_LATER,
488
+ snapshotIds: [SNAPSHOT_ID],
489
+ }));
490
+
491
+ recordFound = await subject.findByDate(
492
+ SERVICE_PROVIDER_ID,
493
+ TERMS_TYPE,
494
+ FETCH_DATE_LATER,
495
+ DOCUMENT_ID,
496
+ );
497
+ });
498
+
499
+ after(() => subject.removeAll());
500
+
501
+ it('returns only records matching the document ID', () => {
502
+ expect(recordFound.documentId).to.equal(DOCUMENT_ID);
503
+ expect(recordFound.content).to.equal(CONTENT);
504
+ });
505
+ });
442
506
  });
443
507
  });
444
508
 
@@ -580,6 +644,44 @@ describe('MongoRepository', () => {
580
644
  expect((await latestRecord.content).toString('utf8')).to.equal(UPDATED_CONTENT);
581
645
  });
582
646
  });
647
+
648
+ context('when document ID is specified', () => {
649
+ let latestRecord;
650
+ const DIFFERENT_DOCUMENT_ID = 'other-document';
651
+
652
+ before(async () => {
653
+ await subject.save(new Version({
654
+ serviceId: SERVICE_PROVIDER_ID,
655
+ termsType: TERMS_TYPE,
656
+ documentId: DOCUMENT_ID,
657
+ content: CONTENT,
658
+ fetchDate: FETCH_DATE_LATER,
659
+ snapshotIds: [SNAPSHOT_ID],
660
+ }));
661
+
662
+ await subject.save(new Version({
663
+ serviceId: SERVICE_PROVIDER_ID,
664
+ termsType: TERMS_TYPE,
665
+ documentId: DIFFERENT_DOCUMENT_ID,
666
+ content: CONTENT,
667
+ fetchDate: FETCH_DATE,
668
+ snapshotIds: [SNAPSHOT_ID],
669
+ }));
670
+
671
+ latestRecord = await subject.findLatest(
672
+ SERVICE_PROVIDER_ID,
673
+ TERMS_TYPE,
674
+ DIFFERENT_DOCUMENT_ID,
675
+ );
676
+ });
677
+
678
+ after(() => subject.removeAll());
679
+
680
+ it('returns only records matching the document ID', () => {
681
+ expect(latestRecord.documentId).to.equal(DIFFERENT_DOCUMENT_ID);
682
+ expect(latestRecord.fetchDate).to.deep.equal(FETCH_DATE);
683
+ });
684
+ });
583
685
  });
584
686
 
585
687
  context('when there are no records for the given service', () => {
@@ -1119,6 +1221,44 @@ describe('MongoRepository', () => {
1119
1221
  expect(latestRecord.mimeType).to.equal(PDF_MIME_TYPE);
1120
1222
  });
1121
1223
  });
1224
+
1225
+ context('when document ID is specified', () => {
1226
+ let latestRecord;
1227
+ const DIFFERENT_DOCUMENT_ID = 'other-document';
1228
+
1229
+ before(async () => {
1230
+ await subject.save(new Snapshot({
1231
+ serviceId: SERVICE_PROVIDER_ID,
1232
+ termsType: TERMS_TYPE,
1233
+ documentId: DOCUMENT_ID,
1234
+ content: CONTENT,
1235
+ fetchDate: FETCH_DATE_LATER,
1236
+ mimeType: HTML_MIME_TYPE,
1237
+ }));
1238
+
1239
+ await subject.save(new Snapshot({
1240
+ serviceId: SERVICE_PROVIDER_ID,
1241
+ termsType: TERMS_TYPE,
1242
+ documentId: DIFFERENT_DOCUMENT_ID,
1243
+ content: CONTENT,
1244
+ fetchDate: FETCH_DATE,
1245
+ mimeType: HTML_MIME_TYPE,
1246
+ }));
1247
+
1248
+ latestRecord = await subject.findLatest(
1249
+ SERVICE_PROVIDER_ID,
1250
+ TERMS_TYPE,
1251
+ DIFFERENT_DOCUMENT_ID,
1252
+ );
1253
+ });
1254
+
1255
+ after(() => subject.removeAll());
1256
+
1257
+ it('returns only records matching the document ID', () => {
1258
+ expect(latestRecord.documentId).to.equal(DIFFERENT_DOCUMENT_ID);
1259
+ expect(latestRecord.fetchDate).to.deep.equal(FETCH_DATE);
1260
+ });
1261
+ });
1122
1262
  });
1123
1263
 
1124
1264
  context('when there are no records for the given service', () => {
@@ -1,4 +1,18 @@
1
1
  export default class SourceDocument {
2
+ /**
3
+ * Represents a source document containing web content and metadata for extraction.
4
+ * Includes the document location, selectors for content inclusion/exclusion,
5
+ * content filters, raw content data, and MIME type information.
6
+ * @class SourceDocument
7
+ * @param {object} params The source document parameters
8
+ * @param {string} params.location The URL location of the document
9
+ * @param {boolean} params.executeClientScripts Whether to execute client-side scripts
10
+ * @param {(string | object | Array)} params.contentSelectors CSS selectors for content to include
11
+ * @param {(string | object | Array)} params.insignificantContentSelectors CSS selectors for content to exclude
12
+ * @param {Array} params.filters Array of filters to apply
13
+ * @param {string} params.content The document content
14
+ * @param {string} params.mimeType The MIME type of the content
15
+ */
2
16
  constructor({ location, executeClientScripts, contentSelectors, insignificantContentSelectors, filters, content, mimeType }) {
3
17
  this.location = location;
4
18
  this.executeClientScripts = executeClientScripts;
@@ -10,6 +10,9 @@ const METADATA_FILENAME = 'metadata.yml';
10
10
  const PACKAGE_JSON_PATH = '../../../package.json';
11
11
 
12
12
  /**
13
+ * @param {string} collectionPath The path to the collection
14
+ * @param {object} services The services of the collection
15
+ * @returns {express.Router} The router instance
13
16
  * @swagger
14
17
  * tags:
15
18
  * name: Metadata
@@ -1,6 +1,8 @@
1
1
  import express from 'express';
2
2
 
3
3
  /**
4
+ * @param {object} services The services to be exposed by the API
5
+ * @returns {express.Router} The router instance
4
6
  * @swagger
5
7
  * tags:
6
8
  * name: Services
@@ -5,6 +5,7 @@ import RepositoryFactory from '../../archivist/recorder/repositories/factory.js'
5
5
  import { toISODateWithoutMilliseconds } from '../../archivist/utils/date.js';
6
6
 
7
7
  /**
8
+ * @private
8
9
  * @swagger
9
10
  * tags:
10
11
  * name: Versions
@@ -24,13 +25,14 @@ import { toISODateWithoutMilliseconds } from '../../archivist/utils/date.js';
24
25
  * description: The ID of the version.
25
26
  * content:
26
27
  * type: string
27
- * description: The JSON-escaped Markdown content of the version.
28
+ * description: The JSON-escaped Markdown content of the version
28
29
  */
29
30
  const router = express.Router();
30
31
 
31
32
  const versionsRepository = await RepositoryFactory.create(config.get('@opentermsarchive/engine.recorder.versions.storage')).initialize();
32
33
 
33
34
  /**
35
+ * @private
34
36
  * @swagger
35
37
  * /version/{serviceId}/{termsType}/{date}:
36
38
  * get:
@@ -99,7 +99,10 @@ export default class GitHub {
99
99
  }
100
100
 
101
101
  async getRepositoryLabels() {
102
- const { data: labels } = await this.octokit.request('GET /repos/{owner}/{repo}/labels', { ...this.commonParams });
102
+ const labels = await this.octokit.paginate('GET /repos/{owner}/{repo}/labels', {
103
+ ...this.commonParams,
104
+ per_page: 100,
105
+ });
103
106
 
104
107
  return labels;
105
108
  }
@@ -33,6 +33,7 @@ describe('GitHub', function () {
33
33
 
34
34
  nock('https://api.github.com')
35
35
  .get('/repos/owner/repo/labels')
36
+ .query(true)
36
37
  .reply(200, existingLabels);
37
38
 
38
39
  const missingLabels = MANAGED_LABELS.slice(-2);
@@ -61,6 +62,7 @@ describe('GitHub', function () {
61
62
  before(async () => {
62
63
  scope = nock('https://api.github.com')
63
64
  .get('/repos/owner/repo/labels')
65
+ .query(true)
64
66
  .reply(200, LABELS);
65
67
 
66
68
  result = await github.getRepositoryLabels();
@@ -44,8 +44,7 @@ export default class Reporter {
44
44
 
45
45
  /**
46
46
  * Support for legacy config format where reporter configuration was nested under `githubIssues`
47
- * Example:
48
- *
47
+ * @example
49
48
  * ```json
50
49
  * {
51
50
  * "githubIssues": {
@@ -55,8 +54,10 @@ export default class Reporter {
55
54
  * }
56
55
  * }
57
56
  * ```
58
- *
57
+ * @param {object} config - The configuration object to normalize
58
+ * @returns {object} The normalized configuration object
59
59
  * @deprecated
60
+ * @private
60
61
  */
61
62
  static normalizeConfig(config) {
62
63
  if (config.githubIssues) {