@opentermsarchive/engine 5.2.0 → 5.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/archivist/fetcher/fullDomFetcher.js +1 -1
- package/src/archivist/fetcher/htmlOnlyFetcher.js +5 -6
- package/src/archivist/fetcher/index.js +66 -12
- package/src/archivist/fetcher/index.test.js +61 -8
- package/src/archivist/index.js +8 -1
- package/src/archivist/recorder/repositories/git/dataMapper.js +12 -6
- package/src/archivist/recorder/repositories/git/git.js +17 -3
- package/src/archivist/recorder/repositories/git/index.js +4 -4
- package/src/archivist/recorder/repositories/git/index.test.js +112 -0
- package/src/archivist/recorder/repositories/git/trailers.js +48 -0
- package/src/archivist/recorder/repositories/git/trailers.test.js +158 -0
- package/src/archivist/recorder/repositories/mongo/dataMapper.js +2 -1
- package/src/archivist/recorder/repositories/mongo/index.js +1 -1
- package/src/archivist/recorder/repositories/mongo/index.test.js +139 -33
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@opentermsarchive/engine",
|
|
3
|
-
"version": "5.
|
|
3
|
+
"version": "5.3.1",
|
|
4
4
|
"description": "Tracks and makes visible changes to the terms of online services",
|
|
5
5
|
"homepage": "https://opentermsarchive.org",
|
|
6
6
|
"bugs": {
|
|
@@ -51,7 +51,6 @@
|
|
|
51
51
|
"dependencies": {
|
|
52
52
|
"@accordproject/markdown-cicero": "^0.15.2",
|
|
53
53
|
"@accordproject/markdown-pdf": "^0.15.2",
|
|
54
|
-
"@opentermsarchive/fetch-charset-detection": "^1.0.1",
|
|
55
54
|
"@opentermsarchive/turndown": "^7.1.3",
|
|
56
55
|
"@stylistic/eslint-plugin-js": "^1.4.1",
|
|
57
56
|
"abort-controller": "^3.0.0",
|
|
@@ -78,6 +77,7 @@
|
|
|
78
77
|
"eslint-plugin-no-only-tests": "^3.1.0",
|
|
79
78
|
"express": "^4.19.2",
|
|
80
79
|
"express-async-errors": "^3.1.1",
|
|
80
|
+
"fetch-charset-detection": "^1.0.1",
|
|
81
81
|
"fs-extra": "^10.0.0",
|
|
82
82
|
"helmet": "^6.0.1",
|
|
83
83
|
"http-proxy-agent": "^5.0.0",
|
|
@@ -33,7 +33,7 @@ export default async function fetch(url, cssSelectors, config) {
|
|
|
33
33
|
throw new Error(`Received HTTP code ${statusCode} when trying to fetch '${url}'`);
|
|
34
34
|
}
|
|
35
35
|
|
|
36
|
-
const waitForSelectorsPromises = selectors.map(selector => page.waitForSelector(selector, { timeout: config.waitForElementsTimeout }));
|
|
36
|
+
const waitForSelectorsPromises = selectors.filter(Boolean).map(selector => page.waitForSelector(selector, { timeout: config.waitForElementsTimeout }));
|
|
37
37
|
|
|
38
38
|
// We expect all elements to be present on the page…
|
|
39
39
|
await Promise.all(waitForSelectorsPromises).catch(error => {
|
|
@@ -1,18 +1,17 @@
|
|
|
1
|
-
import convertBody from '@opentermsarchive/fetch-charset-detection'; // eslint-disable-line import/no-unresolved
|
|
2
1
|
import AbortController from 'abort-controller';
|
|
3
|
-
|
|
2
|
+
import convertBody from 'fetch-charset-detection'; // eslint-disable-line import/no-unresolved
|
|
4
3
|
import HttpProxyAgent from 'http-proxy-agent';
|
|
5
4
|
import HttpsProxyAgent from 'https-proxy-agent';
|
|
6
5
|
import nodeFetch, { AbortError } from 'node-fetch';
|
|
7
6
|
|
|
8
|
-
export default async function fetch(url,
|
|
7
|
+
export default async function fetch(url, config) {
|
|
9
8
|
const controller = new AbortController();
|
|
10
|
-
const timeout = setTimeout(() => controller.abort(),
|
|
9
|
+
const timeout = setTimeout(() => controller.abort(), config.navigationTimeout);
|
|
11
10
|
|
|
12
11
|
const nodeFetchOptions = {
|
|
13
12
|
signal: controller.signal,
|
|
14
13
|
credentials: 'include',
|
|
15
|
-
headers: { 'Accept-Language':
|
|
14
|
+
headers: { 'Accept-Language': config.language },
|
|
16
15
|
};
|
|
17
16
|
|
|
18
17
|
if (url.startsWith('https:') && process.env.HTTPS_PROXY) {
|
|
@@ -51,7 +50,7 @@ export default async function fetch(url, configuration) {
|
|
|
51
50
|
};
|
|
52
51
|
} catch (error) {
|
|
53
52
|
if (error instanceof AbortError) {
|
|
54
|
-
throw new Error(`Timed out after ${
|
|
53
|
+
throw new Error(`Timed out after ${config.navigationTimeout / 1000} seconds when trying to fetch '${url}'`);
|
|
55
54
|
}
|
|
56
55
|
|
|
57
56
|
throw new Error(error.message);
|
|
@@ -7,35 +7,89 @@ import fetchHtmlOnly from './htmlOnlyFetcher.js';
|
|
|
7
7
|
export { launchHeadlessBrowser, stopHeadlessBrowser } from './fullDomFetcher.js';
|
|
8
8
|
export { FetchDocumentError } from './errors.js';
|
|
9
9
|
|
|
10
|
+
export const FETCHER_TYPES = {
|
|
11
|
+
FULL_DOM: 'fullDom',
|
|
12
|
+
HTML_ONLY: 'htmlOnly',
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
const LIKELY_BOT_BLOCKING_ERRORS = [
|
|
16
|
+
'HTTP code 403',
|
|
17
|
+
'HTTP code 406',
|
|
18
|
+
'HTTP code 502',
|
|
19
|
+
'ECONNRESET',
|
|
20
|
+
];
|
|
21
|
+
|
|
10
22
|
/**
|
|
11
23
|
* Fetch a resource from the network, returning a promise which is fulfilled once the response is available
|
|
12
24
|
* @function fetch
|
|
13
|
-
* @param {object}
|
|
14
|
-
* @param {string}
|
|
15
|
-
* @param {boolean}
|
|
16
|
-
* @param {string|Array}
|
|
17
|
-
* @param {object}
|
|
18
|
-
* @param {number}
|
|
19
|
-
* @param {string}
|
|
20
|
-
* @param {number}
|
|
21
|
-
* @returns {Promise<{ mimeType: string, content: string | Buffer }>} Promise containing the fetched resource's MIME type and
|
|
25
|
+
* @param {object} params Fetcher parameters
|
|
26
|
+
* @param {string} params.url URL of the resource you want to fetch
|
|
27
|
+
* @param {boolean} [params.executeClientScripts] Enable execution of client scripts. When set to `true`, this property loads the page in a headless browser to load all assets and execute client scripts before returning its content. If undefined, the engine will automatically balance performance and tracking success rate, defaulting to not executing scripts and escalating to headless browser if needed
|
|
28
|
+
* @param {string|Array} [params.cssSelectors] List of CSS selectors to await when loading the resource in a headless browser. Can be a CSS selector or an array of CSS selectors. Only relevant when `executeClientScripts` is enabled
|
|
29
|
+
* @param {object} [params.config] Fetcher configuration
|
|
30
|
+
* @param {number} [params.config.navigationTimeout] Maximum time (in milliseconds) to wait before considering the fetch failed
|
|
31
|
+
* @param {string} [params.config.language] Language (in [ISO 639-1 format](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes)) to be passed in request headers
|
|
32
|
+
* @param {number} [params.config.waitForElementsTimeout] Maximum time (in milliseconds) to wait for selectors to exist on page before considering the fetch failed. Only relevant when `executeClientScripts` is enabled
|
|
33
|
+
* @returns {Promise<{ mimeType: string, content: string | Buffer, fetcher: string }>} Promise containing the fetched resource's MIME type, content, and fetcher type
|
|
34
|
+
* @throws {FetchDocumentError} When the fetch operation fails
|
|
22
35
|
* @async
|
|
23
36
|
*/
|
|
24
37
|
export default async function fetch({
|
|
25
|
-
url,
|
|
38
|
+
url,
|
|
39
|
+
executeClientScripts,
|
|
40
|
+
cssSelectors,
|
|
26
41
|
config: {
|
|
27
42
|
navigationTimeout = config.get('@opentermsarchive/engine.fetcher.navigationTimeout'),
|
|
28
43
|
language = config.get('@opentermsarchive/engine.fetcher.language'),
|
|
29
44
|
waitForElementsTimeout = config.get('@opentermsarchive/engine.fetcher.waitForElementsTimeout'),
|
|
30
45
|
} = {},
|
|
31
46
|
}) {
|
|
47
|
+
if (!url) {
|
|
48
|
+
throw new FetchDocumentError('URL is required');
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const fetcherConfig = {
|
|
52
|
+
navigationTimeout,
|
|
53
|
+
language,
|
|
54
|
+
waitForElementsTimeout,
|
|
55
|
+
executeClientScripts,
|
|
56
|
+
};
|
|
57
|
+
|
|
32
58
|
try {
|
|
33
59
|
if (executeClientScripts) {
|
|
34
|
-
return await
|
|
60
|
+
return await fetchWithFullDom(url, cssSelectors, fetcherConfig);
|
|
35
61
|
}
|
|
36
62
|
|
|
37
|
-
return await
|
|
63
|
+
return await fetchWithFallback(url, cssSelectors, fetcherConfig);
|
|
38
64
|
} catch (error) {
|
|
39
65
|
throw new FetchDocumentError(error.message);
|
|
40
66
|
}
|
|
41
67
|
}
|
|
68
|
+
|
|
69
|
+
async function fetchWithFallback(url, cssSelectors, fetcherConfig) {
|
|
70
|
+
try {
|
|
71
|
+
return await fetchWithHtmlOnly(url, fetcherConfig);
|
|
72
|
+
} catch (error) {
|
|
73
|
+
const isBotBlockingError = LIKELY_BOT_BLOCKING_ERRORS.some(code => error.message.includes(code));
|
|
74
|
+
|
|
75
|
+
if (!isBotBlockingError || fetcherConfig.executeClientScripts === false) {
|
|
76
|
+
throw error;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
return fetchWithFullDom(url, cssSelectors, fetcherConfig);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
async function fetchWithFullDom(url, cssSelectors, fetcherConfig) {
|
|
84
|
+
return {
|
|
85
|
+
...await fetchFullDom(url, cssSelectors, fetcherConfig),
|
|
86
|
+
fetcher: FETCHER_TYPES.FULL_DOM,
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
async function fetchWithHtmlOnly(url, fetcherConfig) {
|
|
91
|
+
return {
|
|
92
|
+
...await fetchHtmlOnly(url, fetcherConfig),
|
|
93
|
+
fetcher: FETCHER_TYPES.HTML_ONLY,
|
|
94
|
+
};
|
|
95
|
+
}
|
|
@@ -7,7 +7,7 @@ import chai from 'chai';
|
|
|
7
7
|
import chaiAsPromised from 'chai-as-promised';
|
|
8
8
|
import iconv from 'iconv-lite';
|
|
9
9
|
|
|
10
|
-
import fetch, { launchHeadlessBrowser, stopHeadlessBrowser, FetchDocumentError } from './index.js';
|
|
10
|
+
import fetch, { launchHeadlessBrowser, stopHeadlessBrowser, FetchDocumentError, FETCHER_TYPES } from './index.js';
|
|
11
11
|
|
|
12
12
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
13
13
|
|
|
@@ -31,6 +31,8 @@ describe('Fetcher', function () {
|
|
|
31
31
|
let expectedPDFContent;
|
|
32
32
|
|
|
33
33
|
before(done => {
|
|
34
|
+
let blockCount = 0;
|
|
35
|
+
|
|
34
36
|
temporaryServer = http.createServer((request, response) => {
|
|
35
37
|
if (request.url === '/') {
|
|
36
38
|
response.writeHead(200, { 'Content-Type': 'text/html' }).write(termsHTML);
|
|
@@ -46,9 +48,19 @@ describe('Fetcher', function () {
|
|
|
46
48
|
}
|
|
47
49
|
if (request.url == '/terms.pdf') {
|
|
48
50
|
expectedPDFContent = fs.readFileSync(path.resolve(__dirname, '../../../test/fixtures/terms.pdf'));
|
|
49
|
-
|
|
50
51
|
response.writeHead(200, { 'Content-Type': 'application/pdf' }).write(expectedPDFContent);
|
|
51
52
|
}
|
|
53
|
+
if (request.url === '/block-once') {
|
|
54
|
+
if (blockCount === 0) {
|
|
55
|
+
blockCount++;
|
|
56
|
+
response.writeHead(403, { 'Content-Type': 'text/html' }).write('<!DOCTYPE html><html><body>Access Denied - Bot Detected</body></html>');
|
|
57
|
+
} else {
|
|
58
|
+
response.writeHead(200, { 'Content-Type': 'text/html' }).write(termsHTML);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
if (request.url === '/always-block') {
|
|
62
|
+
response.writeHead(403, { 'Content-Type': 'text/html' }).write('<!DOCTYPE html><html><body>Access Denied - Bot Detected</body></html>');
|
|
63
|
+
}
|
|
52
64
|
|
|
53
65
|
return response.end();
|
|
54
66
|
}).listen(SERVER_PORT);
|
|
@@ -66,11 +78,12 @@ describe('Fetcher', function () {
|
|
|
66
78
|
context('when html page is available', () => {
|
|
67
79
|
let content;
|
|
68
80
|
let mimeType;
|
|
81
|
+
let fetcher;
|
|
69
82
|
const url = `http://127.0.0.1:${SERVER_PORT}`;
|
|
70
83
|
|
|
71
84
|
context('when expected selectors are present', () => {
|
|
72
85
|
before(async () => {
|
|
73
|
-
({ content, mimeType } = await fetch({ url, cssSelectors: 'body' }));
|
|
86
|
+
({ content, mimeType, fetcher } = await fetch({ url, cssSelectors: 'body' }));
|
|
74
87
|
});
|
|
75
88
|
|
|
76
89
|
it('returns the web page content of the given URL', () => {
|
|
@@ -81,9 +94,13 @@ describe('Fetcher', function () {
|
|
|
81
94
|
expect(mimeType).to.equal('text/html');
|
|
82
95
|
});
|
|
83
96
|
|
|
97
|
+
it('uses HTML-only fetcher by default', () => {
|
|
98
|
+
expect(fetcher).to.equal(FETCHER_TYPES.HTML_ONLY);
|
|
99
|
+
});
|
|
100
|
+
|
|
84
101
|
context('with client script enabled', () => {
|
|
85
102
|
before(async () => {
|
|
86
|
-
({ content, mimeType } = await fetch({ url, cssSelectors: 'body', executeClientScripts: true }));
|
|
103
|
+
({ content, mimeType, fetcher } = await fetch({ url, cssSelectors: 'body', executeClientScripts: true }));
|
|
87
104
|
});
|
|
88
105
|
|
|
89
106
|
it('returns the web page content of the given URL', () => {
|
|
@@ -93,6 +110,10 @@ describe('Fetcher', function () {
|
|
|
93
110
|
it('returns the MIME type of the given URL', () => {
|
|
94
111
|
expect(mimeType).to.equal('text/html');
|
|
95
112
|
});
|
|
113
|
+
|
|
114
|
+
it('uses full DOM fetcher when client scripts are enabled', () => {
|
|
115
|
+
expect(fetcher).to.equal(FETCHER_TYPES.FULL_DOM);
|
|
116
|
+
});
|
|
96
117
|
});
|
|
97
118
|
});
|
|
98
119
|
|
|
@@ -100,7 +121,7 @@ describe('Fetcher', function () {
|
|
|
100
121
|
const NOT_PRESENT_SELECTOR = 'h2';
|
|
101
122
|
|
|
102
123
|
before(async () => {
|
|
103
|
-
({ content, mimeType } = await fetch({ url, cssSelectors: NOT_PRESENT_SELECTOR }));
|
|
124
|
+
({ content, mimeType, fetcher } = await fetch({ url, cssSelectors: NOT_PRESENT_SELECTOR }));
|
|
104
125
|
});
|
|
105
126
|
|
|
106
127
|
it('returns the web page content of the given URL', () => {
|
|
@@ -111,9 +132,13 @@ describe('Fetcher', function () {
|
|
|
111
132
|
expect(mimeType).to.equal('text/html');
|
|
112
133
|
});
|
|
113
134
|
|
|
135
|
+
it('uses HTML-only fetcher by default', () => {
|
|
136
|
+
expect(fetcher).to.equal(FETCHER_TYPES.HTML_ONLY);
|
|
137
|
+
});
|
|
138
|
+
|
|
114
139
|
context('with client script enabled', () => {
|
|
115
140
|
before(async () => {
|
|
116
|
-
({ content, mimeType } = await fetch({ url, cssSelectors: NOT_PRESENT_SELECTOR, executeClientScripts: true }));
|
|
141
|
+
({ content, mimeType, fetcher } = await fetch({ url, cssSelectors: NOT_PRESENT_SELECTOR, executeClientScripts: true }));
|
|
117
142
|
});
|
|
118
143
|
|
|
119
144
|
it('returns the web page content of the given URL', () => {
|
|
@@ -123,32 +148,42 @@ describe('Fetcher', function () {
|
|
|
123
148
|
it('returns the MIME type of the given URL', () => {
|
|
124
149
|
expect(mimeType).to.equal('text/html');
|
|
125
150
|
});
|
|
151
|
+
|
|
152
|
+
it('uses full DOM fetcher when client scripts are enabled', () => {
|
|
153
|
+
expect(fetcher).to.equal(FETCHER_TYPES.FULL_DOM);
|
|
154
|
+
});
|
|
126
155
|
});
|
|
127
156
|
});
|
|
128
157
|
});
|
|
129
158
|
|
|
130
159
|
context('when html page is in different charset', () => {
|
|
131
160
|
let content;
|
|
161
|
+
let fetcher;
|
|
132
162
|
const url = `http://127.0.0.1:${SERVER_PORT}/other-charset`;
|
|
133
163
|
|
|
134
164
|
context('when expected selectors are present', () => {
|
|
135
165
|
before(async () => {
|
|
136
|
-
({ content } = await fetch({ url, cssSelectors: 'body' }));
|
|
166
|
+
({ content, fetcher } = await fetch({ url, cssSelectors: 'body' }));
|
|
137
167
|
});
|
|
138
168
|
|
|
139
169
|
it('returns the web page content of the given URL', () => {
|
|
140
170
|
expect(content).to.equal(termsWithOtherCharsetHTML);
|
|
141
171
|
});
|
|
172
|
+
|
|
173
|
+
it('uses HTML-only fetcher by default', () => {
|
|
174
|
+
expect(fetcher).to.equal(FETCHER_TYPES.HTML_ONLY);
|
|
175
|
+
});
|
|
142
176
|
});
|
|
143
177
|
});
|
|
144
178
|
|
|
145
179
|
context('when url targets a PDF file', () => {
|
|
146
180
|
let content;
|
|
147
181
|
let mimeType;
|
|
182
|
+
let fetcher;
|
|
148
183
|
const pdfUrl = `http://127.0.0.1:${SERVER_PORT}/terms.pdf`;
|
|
149
184
|
|
|
150
185
|
before(async () => {
|
|
151
|
-
({ content, mimeType } = await fetch({ url: pdfUrl }));
|
|
186
|
+
({ content, mimeType, fetcher } = await fetch({ url: pdfUrl }));
|
|
152
187
|
});
|
|
153
188
|
|
|
154
189
|
it('returns a buffer for PDF content', () => {
|
|
@@ -162,6 +197,10 @@ describe('Fetcher', function () {
|
|
|
162
197
|
it('returns a blob with the file content', () => {
|
|
163
198
|
expect(content.equals(expectedPDFContent)).to.be.true;
|
|
164
199
|
});
|
|
200
|
+
|
|
201
|
+
it('returns the fetcher used to fetch the PDF file', () => {
|
|
202
|
+
expect(fetcher).to.equal(FETCHER_TYPES.HTML_ONLY);
|
|
203
|
+
});
|
|
165
204
|
});
|
|
166
205
|
|
|
167
206
|
context('when server responds with empty content', () => {
|
|
@@ -245,6 +284,20 @@ describe('Fetcher', function () {
|
|
|
245
284
|
});
|
|
246
285
|
});
|
|
247
286
|
});
|
|
287
|
+
|
|
288
|
+
describe('when bot blocking is detected', () => {
|
|
289
|
+
it('falls back to full DOM fetcher when bot blocking is detected', async () => {
|
|
290
|
+
const { content, mimeType, fetcher } = await fetch({ url: `http://127.0.0.1:${SERVER_PORT}/block-once` });
|
|
291
|
+
|
|
292
|
+
expect(content).to.equal(termsHTML);
|
|
293
|
+
expect(mimeType).to.equal('text/html');
|
|
294
|
+
expect(fetcher).to.equal(FETCHER_TYPES.FULL_DOM);
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
it('still throws FetchDocumentError if both fetchers fail', async () => {
|
|
298
|
+
await expect(fetch({ url: `http://127.0.0.1:${SERVER_PORT}/always-block` })).to.be.rejectedWith(FetchDocumentError);
|
|
299
|
+
});
|
|
300
|
+
});
|
|
248
301
|
});
|
|
249
302
|
});
|
|
250
303
|
});
|
package/src/archivist/index.js
CHANGED
|
@@ -183,10 +183,11 @@ export default class Archivist extends events.EventEmitter {
|
|
|
183
183
|
const { location: url, executeClientScripts, cssSelectors } = sourceDocument;
|
|
184
184
|
|
|
185
185
|
try {
|
|
186
|
-
const { mimeType, content } = await this.fetch({ url, executeClientScripts, cssSelectors });
|
|
186
|
+
const { mimeType, content, fetcher } = await this.fetch({ url, executeClientScripts, cssSelectors });
|
|
187
187
|
|
|
188
188
|
sourceDocument.content = content;
|
|
189
189
|
sourceDocument.mimeType = mimeType;
|
|
190
|
+
sourceDocument.fetcher = fetcher;
|
|
190
191
|
} catch (error) {
|
|
191
192
|
if (!(error instanceof FetchDocumentError)) {
|
|
192
193
|
throw error;
|
|
@@ -248,6 +249,7 @@ export default class Archivist extends events.EventEmitter {
|
|
|
248
249
|
termsType: terms.type,
|
|
249
250
|
fetchDate: terms.fetchDate,
|
|
250
251
|
isExtractOnly: extractOnly,
|
|
252
|
+
metadata: { 'x-engine-version': process.env.npm_package_version },
|
|
251
253
|
});
|
|
252
254
|
|
|
253
255
|
await this.recorder.record(record);
|
|
@@ -272,6 +274,11 @@ export default class Archivist extends events.EventEmitter {
|
|
|
272
274
|
fetchDate: terms.fetchDate,
|
|
273
275
|
content: sourceDocument.content,
|
|
274
276
|
mimeType: sourceDocument.mimeType,
|
|
277
|
+
metadata: {
|
|
278
|
+
'x-engine-version': process.env.npm_package_version,
|
|
279
|
+
'x-fetcher': sourceDocument.fetcher,
|
|
280
|
+
'x-source-document-location': sourceDocument.location,
|
|
281
|
+
},
|
|
275
282
|
});
|
|
276
283
|
|
|
277
284
|
await this.recorder.record(record);
|
|
@@ -24,7 +24,7 @@ const MULTIPLE_SOURCE_DOCUMENTS_PREFIX = 'This version was recorded after extrac
|
|
|
24
24
|
export const COMMIT_MESSAGE_PREFIXES_REGEXP = new RegExp(`^(${Object.values(COMMIT_MESSAGE_PREFIXES).join('|')})`);
|
|
25
25
|
|
|
26
26
|
export function toPersistence(record, snapshotIdentiferTemplate) {
|
|
27
|
-
const { serviceId, termsType, documentId, isExtractOnly, snapshotIds = [], mimeType, isFirstRecord } = record;
|
|
27
|
+
const { serviceId, termsType, documentId, isExtractOnly, snapshotIds = [], mimeType, isFirstRecord, metadata } = record;
|
|
28
28
|
|
|
29
29
|
let prefix = isExtractOnly ? COMMIT_MESSAGE_PREFIXES.extractOnly : COMMIT_MESSAGE_PREFIXES.update;
|
|
30
30
|
|
|
@@ -46,11 +46,12 @@ export function toPersistence(record, snapshotIdentiferTemplate) {
|
|
|
46
46
|
message: `${subject}\n\n${documentIdMessage || ''}\n\n${snapshotIdsMessage || ''}`,
|
|
47
47
|
content: record.content,
|
|
48
48
|
filePath,
|
|
49
|
+
metadata,
|
|
49
50
|
};
|
|
50
51
|
}
|
|
51
52
|
|
|
52
53
|
export function toDomain(commit) {
|
|
53
|
-
const { hash, date, message, body, diff } = commit;
|
|
54
|
+
const { hash, date, message, body, diff, trailers = {} } = commit;
|
|
54
55
|
|
|
55
56
|
const modifiedFilesInCommit = diff.files.map(({ file }) => file);
|
|
56
57
|
|
|
@@ -68,17 +69,22 @@ export function toDomain(commit) {
|
|
|
68
69
|
serviceId: path.dirname(relativeFilePath),
|
|
69
70
|
termsType,
|
|
70
71
|
documentId,
|
|
71
|
-
mimeType: mime.getType(relativeFilePath),
|
|
72
72
|
fetchDate: new Date(date),
|
|
73
73
|
isFirstRecord: message.startsWith(COMMIT_MESSAGE_PREFIXES.startTracking) || message.startsWith(COMMIT_MESSAGE_PREFIXES.deprecated_startTracking),
|
|
74
|
-
|
|
75
|
-
snapshotIds: snapshotIdsMatch || [],
|
|
74
|
+
metadata: { ...trailers },
|
|
76
75
|
};
|
|
77
76
|
|
|
78
|
-
|
|
77
|
+
const mimeTypeValue = mime.getType(relativeFilePath);
|
|
78
|
+
|
|
79
|
+
if (mimeTypeValue == mime.getType('markdown')) {
|
|
80
|
+
attributes.isExtractOnly = message.startsWith(COMMIT_MESSAGE_PREFIXES.extractOnly) || message.startsWith(COMMIT_MESSAGE_PREFIXES.deprecated_refilter);
|
|
81
|
+
attributes.snapshotIds = snapshotIdsMatch;
|
|
82
|
+
|
|
79
83
|
return new Version(attributes);
|
|
80
84
|
}
|
|
81
85
|
|
|
86
|
+
attributes.mimeType = mimeTypeValue;
|
|
87
|
+
|
|
82
88
|
return new Snapshot(attributes);
|
|
83
89
|
}
|
|
84
90
|
|
|
@@ -3,6 +3,8 @@ import path from 'path';
|
|
|
3
3
|
|
|
4
4
|
import simpleGit from 'simple-git';
|
|
5
5
|
|
|
6
|
+
import { parseTrailers, formatTrailers } from './trailers.js';
|
|
7
|
+
|
|
6
8
|
process.env.LC_ALL = 'en_GB'; // Ensure git messages will be in English as some errors are handled by analysing the message content
|
|
7
9
|
|
|
8
10
|
const fs = fsApi.promises;
|
|
@@ -38,7 +40,7 @@ export default class Git {
|
|
|
38
40
|
return this.git.add(this.relativePath(filePath));
|
|
39
41
|
}
|
|
40
42
|
|
|
41
|
-
async commit({ filePath, message, date = new Date() }) {
|
|
43
|
+
async commit({ filePath, message, date = new Date(), trailers = {} }) {
|
|
42
44
|
const commitDate = new Date(date).toISOString();
|
|
43
45
|
let summary;
|
|
44
46
|
|
|
@@ -46,7 +48,10 @@ export default class Git {
|
|
|
46
48
|
process.env.GIT_AUTHOR_DATE = commitDate;
|
|
47
49
|
process.env.GIT_COMMITTER_DATE = commitDate;
|
|
48
50
|
|
|
49
|
-
|
|
51
|
+
const trailersSection = formatTrailers(trailers);
|
|
52
|
+
const finalMessage = trailersSection ? `${message}\n\n${trailersSection}` : message;
|
|
53
|
+
|
|
54
|
+
summary = await this.git.commit(finalMessage, filePath, ['--no-verify']); // Skip pre-commit and commit-msg hooks, as commits are programmatically managed, to optimize performance
|
|
50
55
|
} finally {
|
|
51
56
|
process.env.GIT_AUTHOR_DATE = '';
|
|
52
57
|
process.env.GIT_COMMITTER_DATE = '';
|
|
@@ -70,14 +75,23 @@ export default class Git {
|
|
|
70
75
|
async getCommit(options) {
|
|
71
76
|
const [commit] = await this.listCommits([ '-1', ...options ]); // Returns only the most recent commit matching the given options
|
|
72
77
|
|
|
78
|
+
if (commit) {
|
|
79
|
+
commit.trailers = parseTrailers(commit.body);
|
|
80
|
+
}
|
|
81
|
+
|
|
73
82
|
return commit;
|
|
74
83
|
}
|
|
75
84
|
|
|
76
85
|
async log(options = []) {
|
|
77
86
|
try {
|
|
78
87
|
const logSummary = await this.git.log(options);
|
|
88
|
+
const commits = logSummary.all;
|
|
89
|
+
|
|
90
|
+
commits.forEach(commit => {
|
|
91
|
+
commit.trailers = parseTrailers(commit.body);
|
|
92
|
+
});
|
|
79
93
|
|
|
80
|
-
return
|
|
94
|
+
return commits;
|
|
81
95
|
} catch (error) {
|
|
82
96
|
if (/unknown revision or path not in the working tree|does not have any commits yet/.test(error.message)) {
|
|
83
97
|
return [];
|
|
@@ -41,12 +41,12 @@ export default class GitRepository extends RepositoryInterface {
|
|
|
41
41
|
record.isFirstRecord = !await this.#isTracked(serviceId, termsType, documentId);
|
|
42
42
|
}
|
|
43
43
|
|
|
44
|
-
const { message, content, filePath: relativeFilePath } = await this.#toPersistence(record);
|
|
44
|
+
const { message, content, filePath: relativeFilePath, metadata } = await this.#toPersistence(record);
|
|
45
45
|
|
|
46
46
|
const filePath = path.join(this.path, relativeFilePath);
|
|
47
47
|
|
|
48
48
|
await GitRepository.writeFile({ filePath, content });
|
|
49
|
-
const sha = await this.#commit({ filePath, message, date: fetchDate });
|
|
49
|
+
const sha = await this.#commit({ filePath, message, date: fetchDate, trailers: metadata });
|
|
50
50
|
|
|
51
51
|
if (!sha) {
|
|
52
52
|
return Object(null);
|
|
@@ -153,11 +153,11 @@ export default class GitRepository extends RepositoryInterface {
|
|
|
153
153
|
return filePath;
|
|
154
154
|
}
|
|
155
155
|
|
|
156
|
-
async #commit({ filePath, message, date }) {
|
|
156
|
+
async #commit({ filePath, message, date, trailers }) {
|
|
157
157
|
try {
|
|
158
158
|
await this.git.add(filePath);
|
|
159
159
|
|
|
160
|
-
return await this.git.commit({ filePath, message, date });
|
|
160
|
+
return await this.git.commit({ filePath, message, date, trailers });
|
|
161
161
|
} catch (error) {
|
|
162
162
|
throw new Error(`Could not commit ${filePath} with message "${message}" due to error: "${error}"`);
|
|
163
163
|
}
|
|
@@ -41,6 +41,11 @@ const HTML_MIME_TYPE = mime.getType('html');
|
|
|
41
41
|
const PDF_MIME_TYPE = mime.getType('pdf');
|
|
42
42
|
const PDF_CONTENT = fs.readFileSync(path.resolve(__dirname, '../../../../../test/fixtures/terms.pdf'), { encoding: 'utf8' });
|
|
43
43
|
|
|
44
|
+
const METADATA = {
|
|
45
|
+
fetcher: 'test-fetcher',
|
|
46
|
+
'engine-version': '5.0.0',
|
|
47
|
+
};
|
|
48
|
+
|
|
44
49
|
describe('GitRepository', () => {
|
|
45
50
|
let git;
|
|
46
51
|
let subject;
|
|
@@ -314,6 +319,26 @@ describe('GitRepository', () => {
|
|
|
314
319
|
expect(commit.message).to.include(TERMS_TYPE);
|
|
315
320
|
});
|
|
316
321
|
});
|
|
322
|
+
|
|
323
|
+
context('when metadata is provided', () => {
|
|
324
|
+
before(async () => {
|
|
325
|
+
({ id, isFirstRecord } = await subject.save(new Version({
|
|
326
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
327
|
+
termsType: TERMS_TYPE,
|
|
328
|
+
content: CONTENT,
|
|
329
|
+
fetchDate: FETCH_DATE,
|
|
330
|
+
metadata: METADATA,
|
|
331
|
+
})));
|
|
332
|
+
|
|
333
|
+
([commit] = await git.log());
|
|
334
|
+
});
|
|
335
|
+
|
|
336
|
+
after(() => subject.removeAll());
|
|
337
|
+
|
|
338
|
+
it('stores metadata as commit trailers', () => {
|
|
339
|
+
expect(commit.trailers).to.deep.equal(METADATA);
|
|
340
|
+
});
|
|
341
|
+
});
|
|
317
342
|
});
|
|
318
343
|
|
|
319
344
|
describe('#findById', () => {
|
|
@@ -328,6 +353,7 @@ describe('GitRepository', () => {
|
|
|
328
353
|
fetchDate: FETCH_DATE,
|
|
329
354
|
snapshotIds: [SNAPSHOT_ID],
|
|
330
355
|
mimeType: HTML_MIME_TYPE,
|
|
356
|
+
metadata: METADATA,
|
|
331
357
|
})));
|
|
332
358
|
|
|
333
359
|
(record = await subject.findById(id));
|
|
@@ -367,6 +393,10 @@ describe('GitRepository', () => {
|
|
|
367
393
|
expect(record.snapshotIds).to.deep.equal([SNAPSHOT_ID]);
|
|
368
394
|
});
|
|
369
395
|
|
|
396
|
+
it('returns metadata', () => {
|
|
397
|
+
expect(record.metadata).to.deep.equal(METADATA);
|
|
398
|
+
});
|
|
399
|
+
|
|
370
400
|
context('when requested record does not exist', () => {
|
|
371
401
|
it('returns null', async () => {
|
|
372
402
|
expect(await subject.findById('inexistantID')).to.equal(null);
|
|
@@ -435,6 +465,28 @@ describe('GitRepository', () => {
|
|
|
435
465
|
expect(recordFound).to.equal(null);
|
|
436
466
|
});
|
|
437
467
|
});
|
|
468
|
+
|
|
469
|
+
context('when metadata is provided', () => {
|
|
470
|
+
let record;
|
|
471
|
+
|
|
472
|
+
before(async () => {
|
|
473
|
+
await subject.save(new Version({
|
|
474
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
475
|
+
termsType: TERMS_TYPE,
|
|
476
|
+
content: CONTENT,
|
|
477
|
+
fetchDate: FETCH_DATE,
|
|
478
|
+
metadata: METADATA,
|
|
479
|
+
}));
|
|
480
|
+
|
|
481
|
+
record = await subject.findByDate(SERVICE_PROVIDER_ID, TERMS_TYPE, FETCH_DATE);
|
|
482
|
+
});
|
|
483
|
+
|
|
484
|
+
after(() => subject.removeAll());
|
|
485
|
+
|
|
486
|
+
it('retrieves metadata', () => {
|
|
487
|
+
expect(record.metadata).to.deep.equal(METADATA);
|
|
488
|
+
});
|
|
489
|
+
});
|
|
438
490
|
});
|
|
439
491
|
|
|
440
492
|
describe('#findAll', () => {
|
|
@@ -557,6 +609,7 @@ describe('GitRepository', () => {
|
|
|
557
609
|
content: UPDATED_FILE_CONTENT,
|
|
558
610
|
fetchDate: FETCH_DATE,
|
|
559
611
|
snapshotIds: [SNAPSHOT_ID],
|
|
612
|
+
metadata: METADATA,
|
|
560
613
|
})));
|
|
561
614
|
|
|
562
615
|
latestRecord = await subject.findLatest(SERVICE_PROVIDER_ID, TERMS_TYPE);
|
|
@@ -575,6 +628,10 @@ describe('GitRepository', () => {
|
|
|
575
628
|
it('returns the latest record content', () => {
|
|
576
629
|
expect(latestRecord.content.toString('utf8')).to.equal(UPDATED_FILE_CONTENT);
|
|
577
630
|
});
|
|
631
|
+
|
|
632
|
+
it('returns metadata', () => {
|
|
633
|
+
expect(latestRecord.metadata).to.deep.equal(METADATA);
|
|
634
|
+
});
|
|
578
635
|
});
|
|
579
636
|
});
|
|
580
637
|
|
|
@@ -901,6 +958,28 @@ describe('GitRepository', () => {
|
|
|
901
958
|
expect(mime.getType(EXPECTED_PDF_SNAPSHOT_FILE_PATH)).to.equal(PDF_MIME_TYPE);
|
|
902
959
|
});
|
|
903
960
|
});
|
|
961
|
+
|
|
962
|
+
context('when metadata is provided', () => {
|
|
963
|
+
before(async () => {
|
|
964
|
+
({ id, isFirstRecord } = await subject.save(new Snapshot({
|
|
965
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
966
|
+
termsType: TERMS_TYPE,
|
|
967
|
+
documentId: DOCUMENT_ID,
|
|
968
|
+
content: CONTENT,
|
|
969
|
+
fetchDate: FETCH_DATE,
|
|
970
|
+
mimeType: HTML_MIME_TYPE,
|
|
971
|
+
metadata: METADATA,
|
|
972
|
+
})));
|
|
973
|
+
|
|
974
|
+
([commit] = await git.log());
|
|
975
|
+
});
|
|
976
|
+
|
|
977
|
+
after(() => subject.removeAll());
|
|
978
|
+
|
|
979
|
+
it('stores metadata as commit trailers', () => {
|
|
980
|
+
expect(commit.trailers).to.deep.equal(METADATA);
|
|
981
|
+
});
|
|
982
|
+
});
|
|
904
983
|
});
|
|
905
984
|
|
|
906
985
|
describe('#findById', () => {
|
|
@@ -915,6 +994,7 @@ describe('GitRepository', () => {
|
|
|
915
994
|
content: CONTENT,
|
|
916
995
|
fetchDate: FETCH_DATE,
|
|
917
996
|
mimeType: HTML_MIME_TYPE,
|
|
997
|
+
metadata: METADATA,
|
|
918
998
|
})));
|
|
919
999
|
|
|
920
1000
|
(record = await subject.findById(id));
|
|
@@ -958,6 +1038,10 @@ describe('GitRepository', () => {
|
|
|
958
1038
|
expect(record.documentId).to.equal(DOCUMENT_ID);
|
|
959
1039
|
});
|
|
960
1040
|
|
|
1041
|
+
it('returns metadata', () => {
|
|
1042
|
+
expect(record.metadata).to.deep.equal(METADATA);
|
|
1043
|
+
});
|
|
1044
|
+
|
|
961
1045
|
context('when requested record does not exist', () => {
|
|
962
1046
|
it('returns null', async () => {
|
|
963
1047
|
expect(await subject.findById('inexistantID')).to.equal(null);
|
|
@@ -1086,6 +1170,7 @@ describe('GitRepository', () => {
|
|
|
1086
1170
|
content: UPDATED_FILE_CONTENT,
|
|
1087
1171
|
mimeType: HTML_MIME_TYPE,
|
|
1088
1172
|
fetchDate: FETCH_DATE,
|
|
1173
|
+
metadata: METADATA,
|
|
1089
1174
|
})));
|
|
1090
1175
|
|
|
1091
1176
|
latestRecord = await subject.findLatest(SERVICE_PROVIDER_ID, TERMS_TYPE);
|
|
@@ -1108,6 +1193,10 @@ describe('GitRepository', () => {
|
|
|
1108
1193
|
it('returns the latest record mime type', () => {
|
|
1109
1194
|
expect(latestRecord.mimeType).to.equal(HTML_MIME_TYPE);
|
|
1110
1195
|
});
|
|
1196
|
+
|
|
1197
|
+
it('returns metadata', () => {
|
|
1198
|
+
expect(latestRecord.metadata).to.deep.equal(METADATA);
|
|
1199
|
+
});
|
|
1111
1200
|
});
|
|
1112
1201
|
|
|
1113
1202
|
context('with PDF document', () => {
|
|
@@ -1205,6 +1294,29 @@ describe('GitRepository', () => {
|
|
|
1205
1294
|
expect(fetchDates).to.deep.equal([ FETCH_DATE_EARLIER, FETCH_DATE, FETCH_DATE_LATER ]);
|
|
1206
1295
|
});
|
|
1207
1296
|
});
|
|
1297
|
+
|
|
1298
|
+
context('when metadata is provided', () => {
|
|
1299
|
+
let record;
|
|
1300
|
+
|
|
1301
|
+
before(async () => {
|
|
1302
|
+
await subject.save(new Snapshot({
|
|
1303
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
1304
|
+
termsType: TERMS_TYPE,
|
|
1305
|
+
content: CONTENT,
|
|
1306
|
+
fetchDate: FETCH_DATE,
|
|
1307
|
+
mimeType: HTML_MIME_TYPE,
|
|
1308
|
+
metadata: METADATA,
|
|
1309
|
+
}));
|
|
1310
|
+
|
|
1311
|
+
record = await subject.findByDate(SERVICE_PROVIDER_ID, TERMS_TYPE, FETCH_DATE);
|
|
1312
|
+
});
|
|
1313
|
+
|
|
1314
|
+
after(() => subject.removeAll());
|
|
1315
|
+
|
|
1316
|
+
it('retrieves metadata', () => {
|
|
1317
|
+
expect(record.metadata).to.deep.equal(METADATA);
|
|
1318
|
+
});
|
|
1319
|
+
});
|
|
1208
1320
|
});
|
|
1209
1321
|
|
|
1210
1322
|
context('backwards compatibility with deprecated commit messages', () => {
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
export function parseTrailers(message) {
|
|
2
|
+
const trailers = {};
|
|
3
|
+
|
|
4
|
+
const sections = message.split(/\n\n+/);
|
|
5
|
+
const trailersSection = sections[sections.length - 1];
|
|
6
|
+
|
|
7
|
+
if (!trailersSection.includes(':')) {
|
|
8
|
+
return trailers;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
const validTrailerKeyRegex = /^[A-Za-z0-9]+(?:-[A-Za-z0-9]+)*:$/; // Accepts either a single word or multiple words separated by dashes
|
|
12
|
+
|
|
13
|
+
for (const line of trailersSection.split('\n')) {
|
|
14
|
+
const trimmedLine = line.trim();
|
|
15
|
+
|
|
16
|
+
if (!trimmedLine) { // Skip empty lines
|
|
17
|
+
continue;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
const colonIndex = trimmedLine.indexOf(':');
|
|
21
|
+
|
|
22
|
+
if (colonIndex === -1) { // Skip lines without a colon
|
|
23
|
+
continue;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const key = trimmedLine.slice(0, colonIndex + 1);
|
|
27
|
+
const value = trimmedLine.slice(colonIndex + 1).trim();
|
|
28
|
+
|
|
29
|
+
if (validTrailerKeyRegex.test(key) && value) {
|
|
30
|
+
const keyWithoutColon = key.slice(0, -1);
|
|
31
|
+
|
|
32
|
+
trailers[keyWithoutColon.toLowerCase()] = value;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
return trailers;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export function formatTrailers(trailers) {
|
|
40
|
+
if (Object.keys(trailers).length === 0) {
|
|
41
|
+
return '';
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return Object.entries(trailers)
|
|
45
|
+
.filter(([ , value ]) => value !== '')
|
|
46
|
+
.map(([ key, value ]) => `${key[0].toUpperCase() + key.slice(1).toLowerCase()}: ${value}`)
|
|
47
|
+
.join('\n');
|
|
48
|
+
}
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import { expect } from 'chai';
|
|
2
|
+
|
|
3
|
+
import { parseTrailers, formatTrailers } from './trailers.js';
|
|
4
|
+
|
|
5
|
+
describe('trailers', () => {
|
|
6
|
+
describe('#parseTrailers', () => {
|
|
7
|
+
it('returns empty object for message without trailers', () => {
|
|
8
|
+
const message = 'A simple commit message\n\nWith a body';
|
|
9
|
+
|
|
10
|
+
expect(parseTrailers(message)).to.deep.equal({});
|
|
11
|
+
});
|
|
12
|
+
|
|
13
|
+
it('returns empty object when last section has no colon', () => {
|
|
14
|
+
const message = 'A commit message\n\nWith a body\n\nNo trailers here';
|
|
15
|
+
|
|
16
|
+
expect(parseTrailers(message)).to.deep.equal({});
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
it('parses single word trailer key', () => {
|
|
20
|
+
const message = 'A commit message\n\nWith a body\n\nFetcher: my-fetcher';
|
|
21
|
+
|
|
22
|
+
expect(parseTrailers(message)).to.deep.equal({ fetcher: 'my-fetcher' });
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
it('parses multi-word trailer key with dashes', () => {
|
|
26
|
+
const message = 'A commit message\n\nWith a body\n\nFeature-Request: my-feature';
|
|
27
|
+
|
|
28
|
+
expect(parseTrailers(message)).to.deep.equal({ 'feature-request': 'my-feature' });
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
it('parses multiple trailers with different key formats', () => {
|
|
32
|
+
const message = 'A commit message\n\nWith a body\n\nFetcher: my-fetcher\nFeature-Request: my-feature';
|
|
33
|
+
|
|
34
|
+
expect(parseTrailers(message)).to.deep.equal({
|
|
35
|
+
fetcher: 'my-fetcher',
|
|
36
|
+
'feature-request': 'my-feature',
|
|
37
|
+
});
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
it('handles case-insensitive keys', () => {
|
|
41
|
+
const message = 'A commit message\n\nWith a body\n\nFETCHER: my-fetcher\nFeature-Request: my-feature';
|
|
42
|
+
|
|
43
|
+
expect(parseTrailers(message)).to.deep.equal({
|
|
44
|
+
fetcher: 'my-fetcher',
|
|
45
|
+
'feature-request': 'my-feature',
|
|
46
|
+
});
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
it('handles trailers with colons in values', () => {
|
|
50
|
+
const message = 'A commit message\n\nWith a body\n\nFetcher: my:fetcher:with:colons';
|
|
51
|
+
|
|
52
|
+
expect(parseTrailers(message)).to.deep.equal({ fetcher: 'my:fetcher:with:colons' });
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
it('ignores malformed trailer lines', () => {
|
|
56
|
+
const message = 'A commit message\n\nWith a body\n\nFetcher: my-fetcher\nInvalid line\nReviewer: john-doe';
|
|
57
|
+
|
|
58
|
+
expect(parseTrailers(message)).to.deep.equal({
|
|
59
|
+
fetcher: 'my-fetcher',
|
|
60
|
+
reviewer: 'john-doe',
|
|
61
|
+
});
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
it('ignores trailer keys with spaces', () => {
|
|
65
|
+
const message = 'A commit message\n\nWith a body\n\nFeature Request: my-feature\nFetcher: my-fetcher';
|
|
66
|
+
|
|
67
|
+
expect(parseTrailers(message)).to.deep.equal({ fetcher: 'my-fetcher' });
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
it('ignores trailer keys with spaces before colon', () => {
|
|
71
|
+
const message = 'A commit message\n\nWith a body\n\nFetcher : my-fetcher\nFeature-Request: my-feature';
|
|
72
|
+
|
|
73
|
+
expect(parseTrailers(message)).to.deep.equal({ 'feature-request': 'my-feature' });
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
it('ignores trailer keys ending with dash', () => {
|
|
77
|
+
const message = 'A commit message\n\nWith a body\n\nFeature-: my-feature\nFetcher: my-fetcher';
|
|
78
|
+
|
|
79
|
+
expect(parseTrailers(message)).to.deep.equal({ fetcher: 'my-fetcher' });
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
it('only keeps trailers from the last section', () => {
|
|
83
|
+
const message = 'A commit message\n\nWith a body\n\nFetcher: my-fetcher\n\nFeature-Request: my-feature';
|
|
84
|
+
|
|
85
|
+
expect(parseTrailers(message)).to.deep.equal({ 'feature-request': 'my-feature' });
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
it('ignores trailers with empty values', () => {
|
|
89
|
+
const message = 'A commit message\n\nWith a body\n\nFetcher:\nFeature-request: my-feature';
|
|
90
|
+
|
|
91
|
+
expect(parseTrailers(message)).to.deep.equal({ 'feature-request': 'my-feature' });
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
it('handles keys with numbers', () => {
|
|
95
|
+
const message = 'A commit message\n\nWith a body\n\nIssue-123: my-issue\nFetcher: my-fetcher';
|
|
96
|
+
|
|
97
|
+
expect(parseTrailers(message)).to.deep.equal({
|
|
98
|
+
'issue-123': 'my-issue',
|
|
99
|
+
fetcher: 'my-fetcher',
|
|
100
|
+
});
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
it('handles multiple consecutive empty lines in message', () => {
|
|
104
|
+
const message = 'A commit message\n\n\n\nWith a body\n\nFetcher: my-fetcher';
|
|
105
|
+
|
|
106
|
+
expect(parseTrailers(message)).to.deep.equal({ fetcher: 'my-fetcher' });
|
|
107
|
+
});
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
describe('#formatTrailers', () => {
|
|
111
|
+
it('returns empty string when no trailers', () => {
|
|
112
|
+
expect(formatTrailers({})).to.equal('');
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
it('formats single word trailer key', () => {
|
|
116
|
+
expect(formatTrailers({ fetcher: 'my-fetcher' })).to.equal('Fetcher: my-fetcher');
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
it('formats multi-word trailer key with dashes', () => {
|
|
120
|
+
expect(formatTrailers({ 'feature-request': 'my-feature' })).to.equal('Feature-request: my-feature');
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
it('formats multiple trailers with different key formats', () => {
|
|
124
|
+
expect(formatTrailers({
|
|
125
|
+
fetcher: 'my-fetcher',
|
|
126
|
+
'feature-request': 'my-feature',
|
|
127
|
+
})).to.equal('Fetcher: my-fetcher\nFeature-request: my-feature');
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
it('capitalizes trailer keys', () => {
|
|
131
|
+
expect(formatTrailers({
|
|
132
|
+
fetcher: 'my-fetcher',
|
|
133
|
+
'feature-request': 'my-feature',
|
|
134
|
+
})).to.equal('Fetcher: my-fetcher\nFeature-request: my-feature');
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
it('handles case-insensitive keys', () => {
|
|
138
|
+
expect(formatTrailers({
|
|
139
|
+
FETCHER: 'my-fetcher',
|
|
140
|
+
'FEATURE-REQUEST': 'my-feature',
|
|
141
|
+
})).to.equal('Fetcher: my-fetcher\nFeature-request: my-feature');
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
it('skips empty string values', () => {
|
|
145
|
+
expect(formatTrailers({
|
|
146
|
+
fetcher: '',
|
|
147
|
+
'feature-request': 'my-feature',
|
|
148
|
+
})).to.equal('Feature-request: my-feature');
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
it('handles keys with numbers', () => {
|
|
152
|
+
expect(formatTrailers({
|
|
153
|
+
'issue-123': 'my-issue',
|
|
154
|
+
fetcher: 'my-fetcher',
|
|
155
|
+
})).to.equal('Issue-123: my-issue\nFetcher: my-fetcher');
|
|
156
|
+
});
|
|
157
|
+
});
|
|
158
|
+
});
|
|
@@ -17,7 +17,7 @@ export function toPersistence(record) {
|
|
|
17
17
|
}
|
|
18
18
|
|
|
19
19
|
export function toDomain(mongoDocument) {
|
|
20
|
-
const { _id, serviceId, termsType, documentId, fetchDate, mimeType, isExtractOnly, isRefilter, isFirstRecord, snapshotIds } = mongoDocument;
|
|
20
|
+
const { _id, serviceId, termsType, documentId, fetchDate, mimeType, isExtractOnly, isRefilter, isFirstRecord, snapshotIds, metadata } = mongoDocument;
|
|
21
21
|
|
|
22
22
|
const attributes = {
|
|
23
23
|
id: _id.toString(),
|
|
@@ -29,6 +29,7 @@ export function toDomain(mongoDocument) {
|
|
|
29
29
|
isFirstRecord: Boolean(isFirstRecord),
|
|
30
30
|
isExtractOnly: Boolean(isExtractOnly) || Boolean(isRefilter),
|
|
31
31
|
snapshotIds: snapshotIds?.map(snapshotId => snapshotId.toString()) || [],
|
|
32
|
+
metadata,
|
|
32
33
|
};
|
|
33
34
|
|
|
34
35
|
if (snapshotIds) {
|
|
@@ -34,6 +34,11 @@ const PDF_CONTENT = fs.readFileSync(path.resolve(__dirname, '../../../../../test
|
|
|
34
34
|
const UPDATED_PDF_CONTENT = fs.readFileSync(path.resolve(__dirname, '../../../../../test/fixtures/termsModified.pdf'));
|
|
35
35
|
const PDF_MIME_TYPE = mime.getType('pdf');
|
|
36
36
|
|
|
37
|
+
const METADATA = {
|
|
38
|
+
fetcher: 'test-fetcher',
|
|
39
|
+
'engine-version': '5.0.0',
|
|
40
|
+
};
|
|
41
|
+
|
|
37
42
|
let collection;
|
|
38
43
|
|
|
39
44
|
describe('MongoRepository', () => {
|
|
@@ -57,10 +62,10 @@ describe('MongoRepository', () => {
|
|
|
57
62
|
|
|
58
63
|
context('when it is the first record', () => {
|
|
59
64
|
before(async () => {
|
|
60
|
-
numberOfRecordsBefore = await collection.
|
|
65
|
+
numberOfRecordsBefore = await collection.countDocuments({
|
|
61
66
|
serviceId: SERVICE_PROVIDER_ID,
|
|
62
67
|
termsType: TERMS_TYPE,
|
|
63
|
-
})
|
|
68
|
+
});
|
|
64
69
|
|
|
65
70
|
(record = await subject.save(new Version({
|
|
66
71
|
serviceId: SERVICE_PROVIDER_ID,
|
|
@@ -68,12 +73,13 @@ describe('MongoRepository', () => {
|
|
|
68
73
|
content: CONTENT,
|
|
69
74
|
fetchDate: FETCH_DATE,
|
|
70
75
|
snapshotIds: [SNAPSHOT_ID],
|
|
76
|
+
metadata: METADATA,
|
|
71
77
|
})));
|
|
72
78
|
|
|
73
|
-
numberOfRecordsAfter = await collection.
|
|
79
|
+
numberOfRecordsAfter = await collection.countDocuments({
|
|
74
80
|
serviceId: SERVICE_PROVIDER_ID,
|
|
75
81
|
termsType: TERMS_TYPE,
|
|
76
|
-
})
|
|
82
|
+
});
|
|
77
83
|
|
|
78
84
|
(mongoDocument = await collection.findOne({
|
|
79
85
|
serviceId: SERVICE_PROVIDER_ID,
|
|
@@ -132,10 +138,10 @@ describe('MongoRepository', () => {
|
|
|
132
138
|
snapshotIds: [SNAPSHOT_ID],
|
|
133
139
|
})));
|
|
134
140
|
|
|
135
|
-
numberOfRecordsBefore = await collection.
|
|
141
|
+
numberOfRecordsBefore = await collection.countDocuments({
|
|
136
142
|
serviceId: SERVICE_PROVIDER_ID,
|
|
137
143
|
termsType: TERMS_TYPE,
|
|
138
|
-
})
|
|
144
|
+
});
|
|
139
145
|
|
|
140
146
|
(record = await subject.save(new Version({
|
|
141
147
|
serviceId: SERVICE_PROVIDER_ID,
|
|
@@ -145,10 +151,10 @@ describe('MongoRepository', () => {
|
|
|
145
151
|
snapshotIds: [SNAPSHOT_ID],
|
|
146
152
|
})));
|
|
147
153
|
|
|
148
|
-
numberOfRecordsAfter = await collection.
|
|
154
|
+
numberOfRecordsAfter = await collection.countDocuments({
|
|
149
155
|
serviceId: SERVICE_PROVIDER_ID,
|
|
150
156
|
termsType: TERMS_TYPE,
|
|
151
|
-
})
|
|
157
|
+
});
|
|
152
158
|
|
|
153
159
|
([mongoDocument] = await collection.find({
|
|
154
160
|
serviceId: SERVICE_PROVIDER_ID,
|
|
@@ -181,10 +187,10 @@ describe('MongoRepository', () => {
|
|
|
181
187
|
snapshotIds: [SNAPSHOT_ID],
|
|
182
188
|
}));
|
|
183
189
|
|
|
184
|
-
numberOfRecordsBefore = await collection.
|
|
190
|
+
numberOfRecordsBefore = await collection.countDocuments({
|
|
185
191
|
serviceId: SERVICE_PROVIDER_ID,
|
|
186
192
|
termsType: TERMS_TYPE,
|
|
187
|
-
})
|
|
193
|
+
});
|
|
188
194
|
|
|
189
195
|
(record = await subject.save(new Version({
|
|
190
196
|
serviceId: SERVICE_PROVIDER_ID,
|
|
@@ -194,10 +200,10 @@ describe('MongoRepository', () => {
|
|
|
194
200
|
snapshotIds: [SNAPSHOT_ID],
|
|
195
201
|
})));
|
|
196
202
|
|
|
197
|
-
numberOfRecordsAfter = await collection.
|
|
203
|
+
numberOfRecordsAfter = await collection.countDocuments({
|
|
198
204
|
serviceId: SERVICE_PROVIDER_ID,
|
|
199
205
|
termsType: TERMS_TYPE,
|
|
200
|
-
})
|
|
206
|
+
});
|
|
201
207
|
});
|
|
202
208
|
|
|
203
209
|
after(() => subject.removeAll());
|
|
@@ -223,10 +229,10 @@ describe('MongoRepository', () => {
|
|
|
223
229
|
snapshotIds: [SNAPSHOT_ID],
|
|
224
230
|
})); // An extracted only version cannot be the first record
|
|
225
231
|
|
|
226
|
-
numberOfRecordsBefore = await collection.
|
|
232
|
+
numberOfRecordsBefore = await collection.countDocuments({
|
|
227
233
|
serviceId: SERVICE_PROVIDER_ID,
|
|
228
234
|
termsType: TERMS_TYPE,
|
|
229
|
-
})
|
|
235
|
+
});
|
|
230
236
|
|
|
231
237
|
(record = await subject.save(new Version({
|
|
232
238
|
serviceId: SERVICE_PROVIDER_ID,
|
|
@@ -237,10 +243,10 @@ describe('MongoRepository', () => {
|
|
|
237
243
|
isExtractOnly: true,
|
|
238
244
|
})));
|
|
239
245
|
|
|
240
|
-
numberOfRecordsAfter = await collection.
|
|
246
|
+
numberOfRecordsAfter = await collection.countDocuments({
|
|
241
247
|
serviceId: SERVICE_PROVIDER_ID,
|
|
242
248
|
termsType: TERMS_TYPE,
|
|
243
|
-
})
|
|
249
|
+
});
|
|
244
250
|
|
|
245
251
|
([mongoDocument] = await collection.find({
|
|
246
252
|
serviceId: SERVICE_PROVIDER_ID,
|
|
@@ -356,6 +362,29 @@ describe('MongoRepository', () => {
|
|
|
356
362
|
expect(mongoDocument.documentId).to.equal(DOCUMENT_ID);
|
|
357
363
|
});
|
|
358
364
|
});
|
|
365
|
+
|
|
366
|
+
context('when metadata is provided', () => {
|
|
367
|
+
before(async () => {
|
|
368
|
+
await subject.save(new Version({
|
|
369
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
370
|
+
termsType: TERMS_TYPE,
|
|
371
|
+
content: CONTENT,
|
|
372
|
+
fetchDate: FETCH_DATE,
|
|
373
|
+
metadata: METADATA,
|
|
374
|
+
}));
|
|
375
|
+
|
|
376
|
+
(mongoDocument = await collection.findOne({
|
|
377
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
378
|
+
termsType: TERMS_TYPE,
|
|
379
|
+
}));
|
|
380
|
+
});
|
|
381
|
+
|
|
382
|
+
after(() => subject.removeAll());
|
|
383
|
+
|
|
384
|
+
it('stores metadata as commit trailers', () => {
|
|
385
|
+
expect(mongoDocument.metadata).to.deep.equal(METADATA);
|
|
386
|
+
});
|
|
387
|
+
});
|
|
359
388
|
});
|
|
360
389
|
|
|
361
390
|
describe('#findById', () => {
|
|
@@ -369,6 +398,7 @@ describe('MongoRepository', () => {
|
|
|
369
398
|
content: CONTENT,
|
|
370
399
|
fetchDate: FETCH_DATE,
|
|
371
400
|
snapshotIds: [SNAPSHOT_ID],
|
|
401
|
+
metadata: METADATA,
|
|
372
402
|
})));
|
|
373
403
|
|
|
374
404
|
(record = await subject.findById(id));
|
|
@@ -408,6 +438,10 @@ describe('MongoRepository', () => {
|
|
|
408
438
|
expect(record.snapshotIds).to.deep.equal([SNAPSHOT_ID]);
|
|
409
439
|
});
|
|
410
440
|
|
|
441
|
+
it('returns the metadata', () => {
|
|
442
|
+
expect(record.metadata).to.deep.equal(METADATA);
|
|
443
|
+
});
|
|
444
|
+
|
|
411
445
|
context('when requested record does not exist', () => {
|
|
412
446
|
it('returns null', async () => {
|
|
413
447
|
expect(await subject.findById('inexistantID')).to.equal(null);
|
|
@@ -504,6 +538,28 @@ describe('MongoRepository', () => {
|
|
|
504
538
|
});
|
|
505
539
|
});
|
|
506
540
|
});
|
|
541
|
+
|
|
542
|
+
context('when metadata is provided', () => {
|
|
543
|
+
let record;
|
|
544
|
+
|
|
545
|
+
before(async () => {
|
|
546
|
+
await subject.save(new Version({
|
|
547
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
548
|
+
termsType: TERMS_TYPE,
|
|
549
|
+
content: CONTENT,
|
|
550
|
+
fetchDate: FETCH_DATE,
|
|
551
|
+
metadata: METADATA,
|
|
552
|
+
}));
|
|
553
|
+
|
|
554
|
+
record = await subject.findByDate(SERVICE_PROVIDER_ID, TERMS_TYPE, FETCH_DATE);
|
|
555
|
+
});
|
|
556
|
+
|
|
557
|
+
after(() => subject.removeAll());
|
|
558
|
+
|
|
559
|
+
it('retrieves metadata', () => {
|
|
560
|
+
expect(record.metadata).to.deep.equal(METADATA);
|
|
561
|
+
});
|
|
562
|
+
});
|
|
507
563
|
});
|
|
508
564
|
|
|
509
565
|
describe('#findAll', () => {
|
|
@@ -695,6 +751,28 @@ describe('MongoRepository', () => {
|
|
|
695
751
|
expect(latestRecord).to.equal(null);
|
|
696
752
|
});
|
|
697
753
|
});
|
|
754
|
+
|
|
755
|
+
context('when metadata is provided', () => {
|
|
756
|
+
let record;
|
|
757
|
+
|
|
758
|
+
before(async () => {
|
|
759
|
+
await subject.save(new Version({
|
|
760
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
761
|
+
termsType: TERMS_TYPE,
|
|
762
|
+
content: CONTENT,
|
|
763
|
+
fetchDate: FETCH_DATE,
|
|
764
|
+
metadata: METADATA,
|
|
765
|
+
}));
|
|
766
|
+
|
|
767
|
+
record = await subject.findLatest(SERVICE_PROVIDER_ID, TERMS_TYPE);
|
|
768
|
+
});
|
|
769
|
+
|
|
770
|
+
after(() => subject.removeAll());
|
|
771
|
+
|
|
772
|
+
it('retrieves metadata', () => {
|
|
773
|
+
expect(record.metadata).to.deep.equal(METADATA);
|
|
774
|
+
});
|
|
775
|
+
});
|
|
698
776
|
});
|
|
699
777
|
|
|
700
778
|
describe('#iterate', () => {
|
|
@@ -770,10 +848,10 @@ describe('MongoRepository', () => {
|
|
|
770
848
|
|
|
771
849
|
context('when it is the first record', () => {
|
|
772
850
|
before(async () => {
|
|
773
|
-
numberOfRecordsBefore = await collection.
|
|
851
|
+
numberOfRecordsBefore = await collection.countDocuments({
|
|
774
852
|
serviceId: SERVICE_PROVIDER_ID,
|
|
775
853
|
termsType: TERMS_TYPE,
|
|
776
|
-
})
|
|
854
|
+
});
|
|
777
855
|
|
|
778
856
|
(record = await subject.save(new Snapshot({
|
|
779
857
|
serviceId: SERVICE_PROVIDER_ID,
|
|
@@ -784,10 +862,10 @@ describe('MongoRepository', () => {
|
|
|
784
862
|
fetchDate: FETCH_DATE,
|
|
785
863
|
})));
|
|
786
864
|
|
|
787
|
-
numberOfRecordsAfter = await collection.
|
|
865
|
+
numberOfRecordsAfter = await collection.countDocuments({
|
|
788
866
|
serviceId: SERVICE_PROVIDER_ID,
|
|
789
867
|
termsType: TERMS_TYPE,
|
|
790
|
-
})
|
|
868
|
+
});
|
|
791
869
|
|
|
792
870
|
(mongoDocument = await collection.findOne({
|
|
793
871
|
serviceId: SERVICE_PROVIDER_ID,
|
|
@@ -850,10 +928,10 @@ describe('MongoRepository', () => {
|
|
|
850
928
|
fetchDate: FETCH_DATE,
|
|
851
929
|
})));
|
|
852
930
|
|
|
853
|
-
numberOfRecordsBefore = await collection.
|
|
931
|
+
numberOfRecordsBefore = await collection.countDocuments({
|
|
854
932
|
serviceId: SERVICE_PROVIDER_ID,
|
|
855
933
|
termsType: TERMS_TYPE,
|
|
856
|
-
})
|
|
934
|
+
});
|
|
857
935
|
|
|
858
936
|
(record = await subject.save(new Snapshot({
|
|
859
937
|
serviceId: SERVICE_PROVIDER_ID,
|
|
@@ -863,10 +941,10 @@ describe('MongoRepository', () => {
|
|
|
863
941
|
fetchDate: FETCH_DATE,
|
|
864
942
|
})));
|
|
865
943
|
|
|
866
|
-
numberOfRecordsAfter = await collection.
|
|
944
|
+
numberOfRecordsAfter = await collection.countDocuments({
|
|
867
945
|
serviceId: SERVICE_PROVIDER_ID,
|
|
868
946
|
termsType: TERMS_TYPE,
|
|
869
|
-
})
|
|
947
|
+
});
|
|
870
948
|
|
|
871
949
|
([mongoDocument] = await collection.find({
|
|
872
950
|
serviceId: SERVICE_PROVIDER_ID,
|
|
@@ -899,10 +977,10 @@ describe('MongoRepository', () => {
|
|
|
899
977
|
fetchDate: FETCH_DATE,
|
|
900
978
|
}));
|
|
901
979
|
|
|
902
|
-
numberOfRecordsBefore = await collection.
|
|
980
|
+
numberOfRecordsBefore = await collection.countDocuments({
|
|
903
981
|
serviceId: SERVICE_PROVIDER_ID,
|
|
904
982
|
termsType: TERMS_TYPE,
|
|
905
|
-
})
|
|
983
|
+
});
|
|
906
984
|
|
|
907
985
|
(record = await subject.save(new Snapshot({
|
|
908
986
|
serviceId: SERVICE_PROVIDER_ID,
|
|
@@ -912,10 +990,10 @@ describe('MongoRepository', () => {
|
|
|
912
990
|
fetchDate: FETCH_DATE_LATER,
|
|
913
991
|
})));
|
|
914
992
|
|
|
915
|
-
numberOfRecordsAfter = await collection.
|
|
993
|
+
numberOfRecordsAfter = await collection.countDocuments({
|
|
916
994
|
serviceId: SERVICE_PROVIDER_ID,
|
|
917
995
|
termsType: TERMS_TYPE,
|
|
918
|
-
})
|
|
996
|
+
});
|
|
919
997
|
});
|
|
920
998
|
|
|
921
999
|
after(() => subject.removeAll());
|
|
@@ -931,12 +1009,12 @@ describe('MongoRepository', () => {
|
|
|
931
1009
|
|
|
932
1010
|
context('with PDF document', () => {
|
|
933
1011
|
before(async () => {
|
|
934
|
-
numberOfRecordsBefore = await collection.
|
|
1012
|
+
numberOfRecordsBefore = await collection.countDocuments({
|
|
935
1013
|
serviceId: SERVICE_PROVIDER_ID,
|
|
936
1014
|
termsType: TERMS_TYPE,
|
|
937
1015
|
content: PDF_CONTENT,
|
|
938
1016
|
mimeType: PDF_MIME_TYPE,
|
|
939
|
-
})
|
|
1017
|
+
});
|
|
940
1018
|
|
|
941
1019
|
(record = await subject.save(new Snapshot({
|
|
942
1020
|
serviceId: SERVICE_PROVIDER_ID,
|
|
@@ -946,10 +1024,10 @@ describe('MongoRepository', () => {
|
|
|
946
1024
|
fetchDate: FETCH_DATE,
|
|
947
1025
|
})));
|
|
948
1026
|
|
|
949
|
-
numberOfRecordsAfter = await collection.
|
|
1027
|
+
numberOfRecordsAfter = await collection.countDocuments({
|
|
950
1028
|
serviceId: SERVICE_PROVIDER_ID,
|
|
951
1029
|
termsType: TERMS_TYPE,
|
|
952
|
-
})
|
|
1030
|
+
});
|
|
953
1031
|
|
|
954
1032
|
(mongoDocument = await collection.findOne({
|
|
955
1033
|
serviceId: SERVICE_PROVIDER_ID,
|
|
@@ -991,9 +1069,10 @@ describe('MongoRepository', () => {
|
|
|
991
1069
|
content: CONTENT,
|
|
992
1070
|
fetchDate: FETCH_DATE,
|
|
993
1071
|
mimeType: HTML_MIME_TYPE,
|
|
1072
|
+
metadata: METADATA,
|
|
994
1073
|
})));
|
|
995
1074
|
|
|
996
|
-
|
|
1075
|
+
record = await subject.findById(id);
|
|
997
1076
|
});
|
|
998
1077
|
|
|
999
1078
|
after(() => subject.removeAll());
|
|
@@ -1034,6 +1113,10 @@ describe('MongoRepository', () => {
|
|
|
1034
1113
|
expect(record.documentId).to.equal(DOCUMENT_ID);
|
|
1035
1114
|
});
|
|
1036
1115
|
|
|
1116
|
+
it('returns the metadata', () => {
|
|
1117
|
+
expect(record.metadata).to.deep.equal(METADATA);
|
|
1118
|
+
});
|
|
1119
|
+
|
|
1037
1120
|
context('when requested record does not exist', () => {
|
|
1038
1121
|
it('returns null', async () => {
|
|
1039
1122
|
expect(await subject.findById('inexistantID')).to.equal(null);
|
|
@@ -1272,6 +1355,29 @@ describe('MongoRepository', () => {
|
|
|
1272
1355
|
expect(latestRecord).to.equal(null);
|
|
1273
1356
|
});
|
|
1274
1357
|
});
|
|
1358
|
+
|
|
1359
|
+
context('when metadata is provided', () => {
|
|
1360
|
+
let record;
|
|
1361
|
+
|
|
1362
|
+
before(async () => {
|
|
1363
|
+
await subject.save(new Snapshot({
|
|
1364
|
+
serviceId: SERVICE_PROVIDER_ID,
|
|
1365
|
+
termsType: TERMS_TYPE,
|
|
1366
|
+
content: CONTENT,
|
|
1367
|
+
fetchDate: FETCH_DATE,
|
|
1368
|
+
mimeType: HTML_MIME_TYPE,
|
|
1369
|
+
metadata: METADATA,
|
|
1370
|
+
}));
|
|
1371
|
+
|
|
1372
|
+
record = await subject.findLatest(SERVICE_PROVIDER_ID, TERMS_TYPE);
|
|
1373
|
+
});
|
|
1374
|
+
|
|
1375
|
+
after(() => subject.removeAll());
|
|
1376
|
+
|
|
1377
|
+
it('retrieves metadata', () => {
|
|
1378
|
+
expect(record.metadata).to.deep.equal(METADATA);
|
|
1379
|
+
});
|
|
1380
|
+
});
|
|
1275
1381
|
});
|
|
1276
1382
|
|
|
1277
1383
|
describe('#iterate', () => {
|