reffy 6.2.0 → 6.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +158 -158
- package/index.js +11 -11
- package/package.json +53 -53
- package/reffy.js +248 -248
- package/src/browserlib/canonicalize-url.mjs +50 -50
- package/src/browserlib/create-outline.mjs +352 -352
- package/src/browserlib/extract-cssdfn.mjs +319 -319
- package/src/browserlib/extract-dfns.mjs +686 -686
- package/src/browserlib/extract-elements.mjs +205 -205
- package/src/browserlib/extract-headings.mjs +48 -48
- package/src/browserlib/extract-ids.mjs +28 -28
- package/src/browserlib/extract-links.mjs +28 -28
- package/src/browserlib/extract-references.mjs +203 -203
- package/src/browserlib/extract-webidl.mjs +134 -134
- package/src/browserlib/get-absolute-url.mjs +21 -21
- package/src/browserlib/get-generator.mjs +26 -26
- package/src/browserlib/get-lastmodified-date.mjs +13 -13
- package/src/browserlib/get-title.mjs +11 -11
- package/src/browserlib/informative-selector.mjs +16 -16
- package/src/browserlib/map-ids-to-headings.mjs +136 -136
- package/src/browserlib/reffy.json +53 -53
- package/src/cli/check-missing-dfns.js +609 -609
- package/src/cli/generate-idlnames.js +430 -430
- package/src/cli/generate-idlparsed.js +139 -139
- package/src/cli/merge-crawl-results.js +128 -128
- package/src/cli/parse-webidl.js +430 -430
- package/src/lib/css-grammar-parse-tree.schema.json +109 -109
- package/src/lib/css-grammar-parser.js +440 -440
- package/src/lib/fetch.js +55 -55
- package/src/lib/nock-server.js +119 -119
- package/src/lib/specs-crawler.js +605 -603
- package/src/lib/util.js +898 -898
- package/src/specs/missing-css-rules.json +197 -197
- package/src/specs/spec-equivalents.json +149 -149
- package/src/browserlib/extract-editors.mjs~ +0 -14
- package/src/browserlib/generate-es-dfn-report.sh~ +0 -4
- package/src/cli/csstree-grammar-check.js +0 -28
- package/src/cli/csstree-grammar-check.js~ +0 -10
- package/src/cli/csstree-grammar-parser.js +0 -11
- package/src/cli/csstree-grammar-parser.js~ +0 -1
- package/src/cli/extract-editors.js~ +0 -38
- package/src/cli/process-specs.js~ +0 -28
|
@@ -1,139 +1,139 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
/**
|
|
3
|
-
* The parsed IDL generator takes a crawl report or a single spec as input, and
|
|
4
|
-
* generates (or re-generates if it already exists) a parsed IDL structure from
|
|
5
|
-
* the raw IDL that the spec defines. Result is dumped to the console or saved
|
|
6
|
-
* to the given folder.
|
|
7
|
-
*
|
|
8
|
-
* The parsed IDL generator is used by the crawler to create and save the parsed
|
|
9
|
-
* IDL structures. It is also useful to re-generated the parsed IDL info when
|
|
10
|
-
* an IDL patch has been applied to the raw IDL.
|
|
11
|
-
*
|
|
12
|
-
* The parsed IDL generator can be called directly through:
|
|
13
|
-
*
|
|
14
|
-
* `node generate-idlparsed.js [crawl report] [save folder]`
|
|
15
|
-
*
|
|
16
|
-
* where `crawl report` is the path to the folder that contains the
|
|
17
|
-
* `index.json` file and all other crawl results produced by specs-crawler.js,
|
|
18
|
-
* and `save folder` is an optional folder (which must exist) where IDL
|
|
19
|
-
* name extracts are to be saved. In the absence of this parameter, the report
|
|
20
|
-
* is written to the console.
|
|
21
|
-
*
|
|
22
|
-
* When a folder is provided, the IDL name extracts are saved as a JSON
|
|
23
|
-
* structure in an `idlparsed` subfolder.
|
|
24
|
-
*/
|
|
25
|
-
|
|
26
|
-
const fs = require('fs');
|
|
27
|
-
const path = require('path');
|
|
28
|
-
const webidlParser = require('../cli/parse-webidl');
|
|
29
|
-
const {
|
|
30
|
-
expandCrawlResult,
|
|
31
|
-
requireFromWorkingDirectory,
|
|
32
|
-
createFolderIfNeeded
|
|
33
|
-
} = require('../lib/util');
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
/**
|
|
37
|
-
* Update the spec object in place with parsed IDL information.
|
|
38
|
-
*
|
|
39
|
-
* @function
|
|
40
|
-
* @public
|
|
41
|
-
* @param {Object} spec The spec object to update. The function looks for the
|
|
42
|
-
* raw IDL in the `idl` property.
|
|
43
|
-
* @return {Object} The updated spec with an `idl` property that contains the
|
|
44
|
-
* parsed version of the IDL, and the raw IDL moved under the `idl.idl`
|
|
45
|
-
* sub-property. Note the spec object is updated in place.
|
|
46
|
-
*/
|
|
47
|
-
async function generateIdlParsed(spec) {
|
|
48
|
-
if (!spec?.idl) {
|
|
49
|
-
return spec;
|
|
50
|
-
}
|
|
51
|
-
try {
|
|
52
|
-
spec.idlparsed = await webidlParser.parse(spec.idl);
|
|
53
|
-
spec.idlparsed.hasObsoleteIdl = webidlParser.hasObsoleteIdl(spec.idl);
|
|
54
|
-
}
|
|
55
|
-
catch (err) {
|
|
56
|
-
// IDL content is invalid and cannot be parsed.
|
|
57
|
-
// Let's return the error, along with the raw IDL
|
|
58
|
-
// content so that it may be saved to a file.
|
|
59
|
-
spec.idlparsed = err.toString();
|
|
60
|
-
}
|
|
61
|
-
return spec;
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
async function generateIdlParsedFromPath(crawlPath) {
|
|
66
|
-
const crawlIndex = requireFromWorkingDirectory(path.resolve(crawlPath, 'index.json'));
|
|
67
|
-
const crawlResults = await expandCrawlResult(crawlIndex, crawlPath, ['idl']);
|
|
68
|
-
await Promise.all(crawlResults.results.map(generateIdlParsed));
|
|
69
|
-
return crawlResults;
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
/**
|
|
74
|
-
* Generate the `idlparsed` export for the spec.
|
|
75
|
-
*
|
|
76
|
-
* Note that the raw IDL (under `spec.idl.idl`) gets deleted in the process.
|
|
77
|
-
*
|
|
78
|
-
* @function
|
|
79
|
-
* @public
|
|
80
|
-
* @param {Object} spec Spec object with the parsed IDL
|
|
81
|
-
* @param {String} folder Path to root folder where `idlparsed` folder needs to
|
|
82
|
-
* appear.
|
|
83
|
-
* @return {String} The relative path from the root folder to the generated file
|
|
84
|
-
*/
|
|
85
|
-
async function saveIdlParsed(spec, folder) {
|
|
86
|
-
function specInfo(spec) {
|
|
87
|
-
return {
|
|
88
|
-
spec: {
|
|
89
|
-
title: spec.title,
|
|
90
|
-
url: spec.crawled
|
|
91
|
-
}
|
|
92
|
-
};
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
const subfolder = path.join(folder, 'idlparsed');
|
|
96
|
-
await createFolderIfNeeded(subfolder);
|
|
97
|
-
|
|
98
|
-
if (!spec?.idlparsed) {
|
|
99
|
-
return;
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
const json = JSON.stringify(
|
|
103
|
-
Object.assign(specInfo(spec), { idlparsed: spec.idlparsed }),
|
|
104
|
-
null, 2);
|
|
105
|
-
const filename = path.join(subfolder, spec.shortname + '.json');
|
|
106
|
-
await fs.promises.writeFile(filename, json);
|
|
107
|
-
return `idlparsed/${spec.shortname}.json`;
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
/**************************************************
|
|
112
|
-
Export methods for use as module
|
|
113
|
-
**************************************************/
|
|
114
|
-
module.exports.generateIdlParsed = generateIdlParsed;
|
|
115
|
-
module.exports.saveIdlParsed = saveIdlParsed;
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
/**************************************************
|
|
119
|
-
Code run if the code is run as a stand-alone module
|
|
120
|
-
**************************************************/
|
|
121
|
-
if (require.main === module) {
|
|
122
|
-
const crawlPath = process.argv[2];
|
|
123
|
-
if (!crawlPath) {
|
|
124
|
-
console.error('Required path to crawl results folder is missing');
|
|
125
|
-
process.exit(2);
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
const savePath = process.argv[3];
|
|
129
|
-
generateIdlParsedFromPath(crawlPath)
|
|
130
|
-
.then(report => {
|
|
131
|
-
if (savePath) {
|
|
132
|
-
return Promise.all(report.results.map(
|
|
133
|
-
spec => saveIdlParsed(spec, savePath)));
|
|
134
|
-
}
|
|
135
|
-
else {
|
|
136
|
-
console.log(JSON.stringify(report, null, 2));
|
|
137
|
-
}
|
|
138
|
-
});
|
|
139
|
-
}
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* The parsed IDL generator takes a crawl report or a single spec as input, and
|
|
4
|
+
* generates (or re-generates if it already exists) a parsed IDL structure from
|
|
5
|
+
* the raw IDL that the spec defines. Result is dumped to the console or saved
|
|
6
|
+
* to the given folder.
|
|
7
|
+
*
|
|
8
|
+
* The parsed IDL generator is used by the crawler to create and save the parsed
|
|
9
|
+
* IDL structures. It is also useful to re-generated the parsed IDL info when
|
|
10
|
+
* an IDL patch has been applied to the raw IDL.
|
|
11
|
+
*
|
|
12
|
+
* The parsed IDL generator can be called directly through:
|
|
13
|
+
*
|
|
14
|
+
* `node generate-idlparsed.js [crawl report] [save folder]`
|
|
15
|
+
*
|
|
16
|
+
* where `crawl report` is the path to the folder that contains the
|
|
17
|
+
* `index.json` file and all other crawl results produced by specs-crawler.js,
|
|
18
|
+
* and `save folder` is an optional folder (which must exist) where IDL
|
|
19
|
+
* name extracts are to be saved. In the absence of this parameter, the report
|
|
20
|
+
* is written to the console.
|
|
21
|
+
*
|
|
22
|
+
* When a folder is provided, the IDL name extracts are saved as a JSON
|
|
23
|
+
* structure in an `idlparsed` subfolder.
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
const fs = require('fs');
|
|
27
|
+
const path = require('path');
|
|
28
|
+
const webidlParser = require('../cli/parse-webidl');
|
|
29
|
+
const {
|
|
30
|
+
expandCrawlResult,
|
|
31
|
+
requireFromWorkingDirectory,
|
|
32
|
+
createFolderIfNeeded
|
|
33
|
+
} = require('../lib/util');
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Update the spec object in place with parsed IDL information.
|
|
38
|
+
*
|
|
39
|
+
* @function
|
|
40
|
+
* @public
|
|
41
|
+
* @param {Object} spec The spec object to update. The function looks for the
|
|
42
|
+
* raw IDL in the `idl` property.
|
|
43
|
+
* @return {Object} The updated spec with an `idl` property that contains the
|
|
44
|
+
* parsed version of the IDL, and the raw IDL moved under the `idl.idl`
|
|
45
|
+
* sub-property. Note the spec object is updated in place.
|
|
46
|
+
*/
|
|
47
|
+
async function generateIdlParsed(spec) {
|
|
48
|
+
if (!spec?.idl) {
|
|
49
|
+
return spec;
|
|
50
|
+
}
|
|
51
|
+
try {
|
|
52
|
+
spec.idlparsed = await webidlParser.parse(spec.idl);
|
|
53
|
+
spec.idlparsed.hasObsoleteIdl = webidlParser.hasObsoleteIdl(spec.idl);
|
|
54
|
+
}
|
|
55
|
+
catch (err) {
|
|
56
|
+
// IDL content is invalid and cannot be parsed.
|
|
57
|
+
// Let's return the error, along with the raw IDL
|
|
58
|
+
// content so that it may be saved to a file.
|
|
59
|
+
spec.idlparsed = err.toString();
|
|
60
|
+
}
|
|
61
|
+
return spec;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
async function generateIdlParsedFromPath(crawlPath) {
|
|
66
|
+
const crawlIndex = requireFromWorkingDirectory(path.resolve(crawlPath, 'index.json'));
|
|
67
|
+
const crawlResults = await expandCrawlResult(crawlIndex, crawlPath, ['idl']);
|
|
68
|
+
await Promise.all(crawlResults.results.map(generateIdlParsed));
|
|
69
|
+
return crawlResults;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Generate the `idlparsed` export for the spec.
|
|
75
|
+
*
|
|
76
|
+
* Note that the raw IDL (under `spec.idl.idl`) gets deleted in the process.
|
|
77
|
+
*
|
|
78
|
+
* @function
|
|
79
|
+
* @public
|
|
80
|
+
* @param {Object} spec Spec object with the parsed IDL
|
|
81
|
+
* @param {String} folder Path to root folder where `idlparsed` folder needs to
|
|
82
|
+
* appear.
|
|
83
|
+
* @return {String} The relative path from the root folder to the generated file
|
|
84
|
+
*/
|
|
85
|
+
async function saveIdlParsed(spec, folder) {
|
|
86
|
+
function specInfo(spec) {
|
|
87
|
+
return {
|
|
88
|
+
spec: {
|
|
89
|
+
title: spec.title,
|
|
90
|
+
url: spec.crawled
|
|
91
|
+
}
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const subfolder = path.join(folder, 'idlparsed');
|
|
96
|
+
await createFolderIfNeeded(subfolder);
|
|
97
|
+
|
|
98
|
+
if (!spec?.idlparsed) {
|
|
99
|
+
return;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const json = JSON.stringify(
|
|
103
|
+
Object.assign(specInfo(spec), { idlparsed: spec.idlparsed }),
|
|
104
|
+
null, 2);
|
|
105
|
+
const filename = path.join(subfolder, spec.shortname + '.json');
|
|
106
|
+
await fs.promises.writeFile(filename, json);
|
|
107
|
+
return `idlparsed/${spec.shortname}.json`;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
/**************************************************
|
|
112
|
+
Export methods for use as module
|
|
113
|
+
**************************************************/
|
|
114
|
+
module.exports.generateIdlParsed = generateIdlParsed;
|
|
115
|
+
module.exports.saveIdlParsed = saveIdlParsed;
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
/**************************************************
|
|
119
|
+
Code run if the code is run as a stand-alone module
|
|
120
|
+
**************************************************/
|
|
121
|
+
if (require.main === module) {
|
|
122
|
+
const crawlPath = process.argv[2];
|
|
123
|
+
if (!crawlPath) {
|
|
124
|
+
console.error('Required path to crawl results folder is missing');
|
|
125
|
+
process.exit(2);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
const savePath = process.argv[3];
|
|
129
|
+
generateIdlParsedFromPath(crawlPath)
|
|
130
|
+
.then(report => {
|
|
131
|
+
if (savePath) {
|
|
132
|
+
return Promise.all(report.results.map(
|
|
133
|
+
spec => saveIdlParsed(spec, savePath)));
|
|
134
|
+
}
|
|
135
|
+
else {
|
|
136
|
+
console.log(JSON.stringify(report, null, 2));
|
|
137
|
+
}
|
|
138
|
+
});
|
|
139
|
+
}
|
|
@@ -1,128 +1,128 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
/**
|
|
3
|
-
* The crawl report merger can be used to merge a new crawl report into a
|
|
4
|
-
* reference one. This tool is typically useful to make incremental updates to a
|
|
5
|
-
* reference crawl, used as knowledge database. It replaces the crawl results of
|
|
6
|
-
* a given spec by the new results where appropriate.
|
|
7
|
-
*
|
|
8
|
-
* The crawl report merge can be called directly through:
|
|
9
|
-
*
|
|
10
|
-
* `node merge-crawl-results.js [new report] [ref report] [merged report]`
|
|
11
|
-
*
|
|
12
|
-
* where `new report` is the name of the new report to merge into `ref report`
|
|
13
|
-
* to produce the `merged report` file.
|
|
14
|
-
*
|
|
15
|
-
* @module merger
|
|
16
|
-
*/
|
|
17
|
-
|
|
18
|
-
const fs = require('fs');
|
|
19
|
-
const requireFromWorkingDirectory = require('../lib/util').requireFromWorkingDirectory;
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
/**
|
|
23
|
-
* Compares specs for ordering by URL
|
|
24
|
-
*/
|
|
25
|
-
const byURL = (a, b) => a.url.localeCompare(b.url);
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
/**
|
|
29
|
-
* Merge given crawl results with the given reference crawl results and return
|
|
30
|
-
* the new results.
|
|
31
|
-
*
|
|
32
|
-
* @function
|
|
33
|
-
* @param {String} newCrawl The crawl results to merge
|
|
34
|
-
* @param {String} refCrawl The reference crawl results
|
|
35
|
-
* @param {Object} options Merge options. Only "matchTitle" is supported for now
|
|
36
|
-
* @return {Promise} The promise to get a new crawl results that contains the
|
|
37
|
-
* results of the merge
|
|
38
|
-
*/
|
|
39
|
-
function mergeCrawlResults(newCrawl, refCrawl, options) {
|
|
40
|
-
options = options || {};
|
|
41
|
-
|
|
42
|
-
let newResults = newCrawl.results || [];
|
|
43
|
-
let refResults = refCrawl.results || [];
|
|
44
|
-
|
|
45
|
-
let results = refResults.filter(refSpec => !newResults.some(newSpec =>
|
|
46
|
-
(refSpec.url && newSpec.url && (refSpec.url === newSpec.url)) ||
|
|
47
|
-
(refSpec.html && newSpec.html && (refSpec.html === newSpec.html)) ||
|
|
48
|
-
(refSpec.latest && newSpec.latest && (refSpec.latest === newSpec.latest)) ||
|
|
49
|
-
(refSpec.shortname && newSpec.shortname && (refSpec.shortname === newSpec.shortname)) ||
|
|
50
|
-
(refSpec.versions && newSpec.versions &&
|
|
51
|
-
refSpec.versions.some(refVersion => newSpec.versions.some(newVersion => (refVersion === newVersion)))) ||
|
|
52
|
-
(options.matchTitle && refSpec.title && newSpec.title && (refSpec.title === newSpec.title))
|
|
53
|
-
)).concat(newResults);
|
|
54
|
-
|
|
55
|
-
let crawlData = {};
|
|
56
|
-
crawlData.title = newCrawl.title || refCrawl.title || 'Reffy crawl';
|
|
57
|
-
if (newCrawl.description || refCrawl.description) {
|
|
58
|
-
crawlData.description = newCrawl.description || refCrawl.description;
|
|
59
|
-
}
|
|
60
|
-
crawlData.date = (new Date()).toJSON();
|
|
61
|
-
crawlData.stats = {};
|
|
62
|
-
crawlData.results = results;
|
|
63
|
-
crawlData.results.sort(byURL);
|
|
64
|
-
crawlData.stats = {
|
|
65
|
-
crawled: crawlData.results.length,
|
|
66
|
-
errors: crawlData.results.filter(spec => !!spec.error).length
|
|
67
|
-
};
|
|
68
|
-
|
|
69
|
-
return Promise.resolve(crawlData);
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
/**
|
|
74
|
-
* Merge the crawl results in the first JSON file with the crawl results in the
|
|
75
|
-
* second JSON file, and create a third JSON file with the results.
|
|
76
|
-
*
|
|
77
|
-
* @function
|
|
78
|
-
* @param {String} newCrawlPath The JSON file that contains the results to merge
|
|
79
|
-
* @param {String} refCrawlPath The JSON file that contains the reference results
|
|
80
|
-
* @param {String} resPath The JSON file that will contain the result of the merge
|
|
81
|
-
* @param {Object} options Merge options. Only "matchTitle" is supported for now
|
|
82
|
-
* @return {Promise} The promise to have merged the two JSON files into one
|
|
83
|
-
*/
|
|
84
|
-
function mergeCrawlFiles(newCrawlPath, refCrawlPath, resPath, options) {
|
|
85
|
-
options = options || {};
|
|
86
|
-
|
|
87
|
-
let newCrawl = requireFromWorkingDirectory(newCrawlPath);
|
|
88
|
-
let refCrawl = requireFromWorkingDirectory(refCrawlPath);
|
|
89
|
-
return mergeCrawlResults(newCrawl, refCrawl, options)
|
|
90
|
-
.then(filedata => new Promise((resolve, reject) =>
|
|
91
|
-
fs.writeFile(resPath, JSON.stringify(filedata, null, 2),
|
|
92
|
-
err => { if (err) return reject(err); resolve(); })))
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
/**************************************************
|
|
97
|
-
Export the methods for use as module
|
|
98
|
-
**************************************************/
|
|
99
|
-
module.exports.mergeCrawlResults = mergeCrawlResults;
|
|
100
|
-
module.exports.mergeCrawlFiles = mergeCrawlFiles;
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
/**************************************************
|
|
104
|
-
Code run if the code is run as a stand-alone module
|
|
105
|
-
**************************************************/
|
|
106
|
-
if (require.main === module) {
|
|
107
|
-
let newCrawlPath = process.argv[2];
|
|
108
|
-
let refCrawlPath = process.argv[3];
|
|
109
|
-
let resPath = process.argv[4];
|
|
110
|
-
if (!newCrawlPath || !refCrawlPath || !resPath) {
|
|
111
|
-
console.error('Command needs 3 filename parameters:');
|
|
112
|
-
console.error(' 1. the crawl results to merge into the reference crawl results');
|
|
113
|
-
console.error(' 2. the reference crawl results');
|
|
114
|
-
console.error(' 3. where to save the result of the merge');
|
|
115
|
-
process.exit(2);
|
|
116
|
-
}
|
|
117
|
-
let mergeOptions = {
|
|
118
|
-
matchTitle: true
|
|
119
|
-
};
|
|
120
|
-
|
|
121
|
-
console.log('Merging crawl files into: ' + resPath);
|
|
122
|
-
mergeCrawlFiles(newCrawlPath, refCrawlPath, resPath, mergeOptions)
|
|
123
|
-
.then(_ => console.log('Finished'))
|
|
124
|
-
.catch(err => {
|
|
125
|
-
console.error(err);
|
|
126
|
-
process.exit(64)
|
|
127
|
-
});
|
|
128
|
-
}
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* The crawl report merger can be used to merge a new crawl report into a
|
|
4
|
+
* reference one. This tool is typically useful to make incremental updates to a
|
|
5
|
+
* reference crawl, used as knowledge database. It replaces the crawl results of
|
|
6
|
+
* a given spec by the new results where appropriate.
|
|
7
|
+
*
|
|
8
|
+
* The crawl report merge can be called directly through:
|
|
9
|
+
*
|
|
10
|
+
* `node merge-crawl-results.js [new report] [ref report] [merged report]`
|
|
11
|
+
*
|
|
12
|
+
* where `new report` is the name of the new report to merge into `ref report`
|
|
13
|
+
* to produce the `merged report` file.
|
|
14
|
+
*
|
|
15
|
+
* @module merger
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
const fs = require('fs');
|
|
19
|
+
const requireFromWorkingDirectory = require('../lib/util').requireFromWorkingDirectory;
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Compares specs for ordering by URL
|
|
24
|
+
*/
|
|
25
|
+
const byURL = (a, b) => a.url.localeCompare(b.url);
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Merge given crawl results with the given reference crawl results and return
|
|
30
|
+
* the new results.
|
|
31
|
+
*
|
|
32
|
+
* @function
|
|
33
|
+
* @param {String} newCrawl The crawl results to merge
|
|
34
|
+
* @param {String} refCrawl The reference crawl results
|
|
35
|
+
* @param {Object} options Merge options. Only "matchTitle" is supported for now
|
|
36
|
+
* @return {Promise} The promise to get a new crawl results that contains the
|
|
37
|
+
* results of the merge
|
|
38
|
+
*/
|
|
39
|
+
function mergeCrawlResults(newCrawl, refCrawl, options) {
|
|
40
|
+
options = options || {};
|
|
41
|
+
|
|
42
|
+
let newResults = newCrawl.results || [];
|
|
43
|
+
let refResults = refCrawl.results || [];
|
|
44
|
+
|
|
45
|
+
let results = refResults.filter(refSpec => !newResults.some(newSpec =>
|
|
46
|
+
(refSpec.url && newSpec.url && (refSpec.url === newSpec.url)) ||
|
|
47
|
+
(refSpec.html && newSpec.html && (refSpec.html === newSpec.html)) ||
|
|
48
|
+
(refSpec.latest && newSpec.latest && (refSpec.latest === newSpec.latest)) ||
|
|
49
|
+
(refSpec.shortname && newSpec.shortname && (refSpec.shortname === newSpec.shortname)) ||
|
|
50
|
+
(refSpec.versions && newSpec.versions &&
|
|
51
|
+
refSpec.versions.some(refVersion => newSpec.versions.some(newVersion => (refVersion === newVersion)))) ||
|
|
52
|
+
(options.matchTitle && refSpec.title && newSpec.title && (refSpec.title === newSpec.title))
|
|
53
|
+
)).concat(newResults);
|
|
54
|
+
|
|
55
|
+
let crawlData = {};
|
|
56
|
+
crawlData.title = newCrawl.title || refCrawl.title || 'Reffy crawl';
|
|
57
|
+
if (newCrawl.description || refCrawl.description) {
|
|
58
|
+
crawlData.description = newCrawl.description || refCrawl.description;
|
|
59
|
+
}
|
|
60
|
+
crawlData.date = (new Date()).toJSON();
|
|
61
|
+
crawlData.stats = {};
|
|
62
|
+
crawlData.results = results;
|
|
63
|
+
crawlData.results.sort(byURL);
|
|
64
|
+
crawlData.stats = {
|
|
65
|
+
crawled: crawlData.results.length,
|
|
66
|
+
errors: crawlData.results.filter(spec => !!spec.error).length
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
return Promise.resolve(crawlData);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Merge the crawl results in the first JSON file with the crawl results in the
|
|
75
|
+
* second JSON file, and create a third JSON file with the results.
|
|
76
|
+
*
|
|
77
|
+
* @function
|
|
78
|
+
* @param {String} newCrawlPath The JSON file that contains the results to merge
|
|
79
|
+
* @param {String} refCrawlPath The JSON file that contains the reference results
|
|
80
|
+
* @param {String} resPath The JSON file that will contain the result of the merge
|
|
81
|
+
* @param {Object} options Merge options. Only "matchTitle" is supported for now
|
|
82
|
+
* @return {Promise} The promise to have merged the two JSON files into one
|
|
83
|
+
*/
|
|
84
|
+
function mergeCrawlFiles(newCrawlPath, refCrawlPath, resPath, options) {
|
|
85
|
+
options = options || {};
|
|
86
|
+
|
|
87
|
+
let newCrawl = requireFromWorkingDirectory(newCrawlPath);
|
|
88
|
+
let refCrawl = requireFromWorkingDirectory(refCrawlPath);
|
|
89
|
+
return mergeCrawlResults(newCrawl, refCrawl, options)
|
|
90
|
+
.then(filedata => new Promise((resolve, reject) =>
|
|
91
|
+
fs.writeFile(resPath, JSON.stringify(filedata, null, 2),
|
|
92
|
+
err => { if (err) return reject(err); resolve(); })))
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
/**************************************************
|
|
97
|
+
Export the methods for use as module
|
|
98
|
+
**************************************************/
|
|
99
|
+
module.exports.mergeCrawlResults = mergeCrawlResults;
|
|
100
|
+
module.exports.mergeCrawlFiles = mergeCrawlFiles;
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
/**************************************************
|
|
104
|
+
Code run if the code is run as a stand-alone module
|
|
105
|
+
**************************************************/
|
|
106
|
+
if (require.main === module) {
|
|
107
|
+
let newCrawlPath = process.argv[2];
|
|
108
|
+
let refCrawlPath = process.argv[3];
|
|
109
|
+
let resPath = process.argv[4];
|
|
110
|
+
if (!newCrawlPath || !refCrawlPath || !resPath) {
|
|
111
|
+
console.error('Command needs 3 filename parameters:');
|
|
112
|
+
console.error(' 1. the crawl results to merge into the reference crawl results');
|
|
113
|
+
console.error(' 2. the reference crawl results');
|
|
114
|
+
console.error(' 3. where to save the result of the merge');
|
|
115
|
+
process.exit(2);
|
|
116
|
+
}
|
|
117
|
+
let mergeOptions = {
|
|
118
|
+
matchTitle: true
|
|
119
|
+
};
|
|
120
|
+
|
|
121
|
+
console.log('Merging crawl files into: ' + resPath);
|
|
122
|
+
mergeCrawlFiles(newCrawlPath, refCrawlPath, resPath, mergeOptions)
|
|
123
|
+
.then(_ => console.log('Finished'))
|
|
124
|
+
.catch(err => {
|
|
125
|
+
console.error(err);
|
|
126
|
+
process.exit(64)
|
|
127
|
+
});
|
|
128
|
+
}
|