reffy 6.1.2 → 6.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +7 -7
- package/reffy.js +12 -0
- package/src/lib/nock-server.js +9 -1
- package/src/lib/specs-crawler.js +60 -7
- package/src/lib/util.js +97 -64
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "reffy",
|
|
3
|
-
"version": "6.1
|
|
3
|
+
"version": "6.2.1",
|
|
4
4
|
"description": "W3C/WHATWG spec dependencies exploration companion. Features a short set of tools to study spec references as well as WebIDL term definitions and references found in W3C specifications.",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -32,20 +32,20 @@
|
|
|
32
32
|
"bin": "./reffy.js",
|
|
33
33
|
"dependencies": {
|
|
34
34
|
"abortcontroller-polyfill": "1.7.3",
|
|
35
|
-
"browser-specs": "2.
|
|
36
|
-
"commander": "
|
|
35
|
+
"browser-specs": "2.27.0",
|
|
36
|
+
"commander": "9.0.0",
|
|
37
37
|
"fetch-filecache-for-crawling": "4.0.2",
|
|
38
|
-
"puppeteer": "13.1.
|
|
38
|
+
"puppeteer": "13.1.3",
|
|
39
39
|
"semver": "^7.3.5",
|
|
40
40
|
"webidl2": "24.2.0"
|
|
41
41
|
},
|
|
42
42
|
"devDependencies": {
|
|
43
|
-
"chai": "4.3.
|
|
43
|
+
"chai": "4.3.6",
|
|
44
44
|
"mocha": "9.2.0",
|
|
45
45
|
"nock": "13.2.2",
|
|
46
|
-
"respec": "
|
|
46
|
+
"respec": "29.0.5",
|
|
47
47
|
"respec-hljs": "2.1.1",
|
|
48
|
-
"rollup": "2.
|
|
48
|
+
"rollup": "2.67.0"
|
|
49
49
|
},
|
|
50
50
|
"scripts": {
|
|
51
51
|
"test": "mocha --recursive tests/"
|
package/reffy.js
CHANGED
|
@@ -70,6 +70,7 @@ program
|
|
|
70
70
|
.usage('[options]')
|
|
71
71
|
.description('Crawls and processes a list of Web specifications')
|
|
72
72
|
.option('-d, --debug', 'debug mode, crawl one spec at a time')
|
|
73
|
+
.option('-f, --fallback <json>', 'fallback data to use when a spec crawl fails')
|
|
73
74
|
.option('-m, --module <modules...>', 'spec processing modules')
|
|
74
75
|
.option('-o, --output <folder>', 'existing folder/file where crawl results are to be saved')
|
|
75
76
|
.option('-q, --quiet', 'do not report progress and other warnings to the console')
|
|
@@ -92,6 +93,7 @@ will dump ~100MB of data to the console:
|
|
|
92
93
|
}
|
|
93
94
|
const crawlOptions = {
|
|
94
95
|
debug: options.debug,
|
|
96
|
+
fallback: options.fallback,
|
|
95
97
|
output: options.output,
|
|
96
98
|
publishedVersion: options.release,
|
|
97
99
|
quiet: options.quiet,
|
|
@@ -143,6 +145,16 @@ Description:
|
|
|
143
145
|
strongly recommended.
|
|
144
146
|
|
|
145
147
|
Usage notes for some of the options:
|
|
148
|
+
-f, --fallback <jsondata>
|
|
149
|
+
Provides an existing JSON crawl data file to use as a source of fallback data
|
|
150
|
+
for specs that fail to be crawled.
|
|
151
|
+
|
|
152
|
+
The fallback data gets copied as-is. It is the responsibility of the caller
|
|
153
|
+
to make sure that extracts it may link to actually exist and match the ones
|
|
154
|
+
that the crawl would produce in the absence of errors (e.g. same modules).
|
|
155
|
+
|
|
156
|
+
The "error" property is set on specs for which fallback data was used.
|
|
157
|
+
|
|
146
158
|
-m, --module <modules...>
|
|
147
159
|
If processing modules are not specified, the crawler runs all core processing
|
|
148
160
|
modules defined in:
|
package/src/lib/nock-server.js
CHANGED
|
@@ -97,7 +97,15 @@ nock("https://www.w3.org")
|
|
|
97
97
|
{ "Content-Type": "application/js" })
|
|
98
98
|
.get("/Tools/respec/respec-w3c").replyWithFile(200,
|
|
99
99
|
path.join(modulesFolder, "respec", "builds", "respec-w3c.js"),
|
|
100
|
-
{ "Content-Type": "application/js" })
|
|
100
|
+
{ "Content-Type": "application/js" })
|
|
101
|
+
.get("/TR/idontexist/").reply(404, '');
|
|
102
|
+
|
|
103
|
+
nock("https://drafts.csswg.org")
|
|
104
|
+
.persist()
|
|
105
|
+
.get("/server-hiccup/").reply(200,
|
|
106
|
+
`<html><title>Server hiccup</title>
|
|
107
|
+
<h1> Index of Server Hiccup Module Level 42 </h1>`,
|
|
108
|
+
{ 'Content-Type': 'text/html' });
|
|
101
109
|
|
|
102
110
|
nock.emitter.on('error', function (err) {
|
|
103
111
|
console.error(err);
|
package/src/lib/specs-crawler.js
CHANGED
|
@@ -20,6 +20,7 @@ const {
|
|
|
20
20
|
completeWithAlternativeUrls,
|
|
21
21
|
expandBrowserModules,
|
|
22
22
|
expandCrawlResult,
|
|
23
|
+
expandSpecResult,
|
|
23
24
|
getGeneratedIDLNamesByCSSProperty,
|
|
24
25
|
isLatestLevelThatPasses,
|
|
25
26
|
processSpecification,
|
|
@@ -29,6 +30,36 @@ const {
|
|
|
29
30
|
} = require('./util');
|
|
30
31
|
|
|
31
32
|
|
|
33
|
+
/**
|
|
34
|
+
* Return the spec if crawl succeeded or crawl result from given fallback list
|
|
35
|
+
* if crawl yielded an error (and fallback does exist).
|
|
36
|
+
*
|
|
37
|
+
* The function keeps the "error" property on the crawl result it returns so
|
|
38
|
+
* that the error does not get entirely lost.
|
|
39
|
+
*
|
|
40
|
+
* @function
|
|
41
|
+
* @param {Object} spec Actual spec crawl result
|
|
42
|
+
* * @param {Object} spec Actual spec crawl result
|
|
43
|
+
* @param {String} fallbackFolder The folder that contains fallback extracts
|
|
44
|
+
* @param {Array<Object>} fallbackData A list of crawl results to use as
|
|
45
|
+
* fallback when needed
|
|
46
|
+
* @return {Object} The given crawl result or a new one that reuses fallback
|
|
47
|
+
* content if needed
|
|
48
|
+
*/
|
|
49
|
+
async function specOrFallback(spec, fallbackFolder, fallbackData) {
|
|
50
|
+
if (spec.error && fallbackData) {
|
|
51
|
+
const fallback = fallbackData.find(s => s.url === spec.url);
|
|
52
|
+
if (fallback) {
|
|
53
|
+
const copy = Object.assign({}, fallback);
|
|
54
|
+
const result = await expandSpecResult(copy, fallbackFolder);
|
|
55
|
+
result.error = spec.error;
|
|
56
|
+
return result;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
return spec;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
|
|
32
63
|
/**
|
|
33
64
|
* Load and parse the given spec.
|
|
34
65
|
*
|
|
@@ -43,9 +74,11 @@ async function crawlSpec(spec, crawlOptions) {
|
|
|
43
74
|
spec.crawled = crawlOptions.publishedVersion ?
|
|
44
75
|
(spec.release ? spec.release : spec.nightly) :
|
|
45
76
|
spec.nightly;
|
|
77
|
+
const fallbackFolder = crawlOptions.fallback ?
|
|
78
|
+
path.dirname(crawlOptions.fallback) : '';
|
|
46
79
|
|
|
47
80
|
if (spec.error) {
|
|
48
|
-
return spec;
|
|
81
|
+
return specOrFallback(spec, fallbackFolder, crawlOptions.fallbackData);
|
|
49
82
|
}
|
|
50
83
|
|
|
51
84
|
try {
|
|
@@ -150,7 +183,7 @@ async function crawlSpec(spec, crawlOptions) {
|
|
|
150
183
|
spec.error = err.toString() + (err.stack ? ' ' + err.stack : '');
|
|
151
184
|
}
|
|
152
185
|
|
|
153
|
-
return spec;
|
|
186
|
+
return specOrFallback(spec, fallbackFolder, crawlOptions.fallbackData);
|
|
154
187
|
}
|
|
155
188
|
|
|
156
189
|
|
|
@@ -308,10 +341,23 @@ async function saveSpecResults(spec, settings) {
|
|
|
308
341
|
* specification descriptions
|
|
309
342
|
*/
|
|
310
343
|
async function crawlList(speclist, crawlOptions) {
|
|
311
|
-
|
|
344
|
+
// Make a shallow copy of crawl options object since we're going
|
|
345
|
+
// to modify properties in place
|
|
346
|
+
crawlOptions = Object.assign({}, crawlOptions);
|
|
312
347
|
|
|
313
|
-
//
|
|
348
|
+
// Expand list of processing modules to use if not already done
|
|
314
349
|
crawlOptions.modules = expandBrowserModules(crawlOptions.modules);
|
|
350
|
+
|
|
351
|
+
// Load fallback data if necessary
|
|
352
|
+
if (crawlOptions.fallback) {
|
|
353
|
+
try {
|
|
354
|
+
crawlOptions.fallbackData = JSON.parse(await fs.promises.readFile(crawlOptions.fallback)).results;
|
|
355
|
+
} catch (e) {
|
|
356
|
+
throw new Error(`Could not parse fallback data file ${crawlOptions.fallback}`);
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
// Prepare Puppeteer instance
|
|
315
361
|
await setupBrowser(crawlOptions.modules);
|
|
316
362
|
|
|
317
363
|
const list = speclist.map(completeWithAlternativeUrls);
|
|
@@ -397,10 +443,9 @@ async function adjustExtractsPerSeries(data, property, settings) {
|
|
|
397
443
|
}
|
|
398
444
|
else if (spec[property]) {
|
|
399
445
|
// Not the right full level in the series, drop created extract
|
|
400
|
-
// and link to the series extract instead
|
|
401
446
|
const pathname = path.resolve(settings.output, spec[property]);
|
|
402
447
|
fs.unlinkSync(pathname);
|
|
403
|
-
|
|
448
|
+
delete spec[property];
|
|
404
449
|
}
|
|
405
450
|
});
|
|
406
451
|
|
|
@@ -493,16 +538,24 @@ function crawlSpecs(options) {
|
|
|
493
538
|
});
|
|
494
539
|
}
|
|
495
540
|
|
|
496
|
-
const requestedList =
|
|
541
|
+
const requestedList = options?.specs ?
|
|
497
542
|
prepareListOfSpecs(options.specs) :
|
|
498
543
|
specs;
|
|
499
544
|
|
|
545
|
+
// Make a shallow copy of passed options parameter and expand modules
|
|
546
|
+
// in place.
|
|
547
|
+
options = Object.assign({}, options);
|
|
548
|
+
options.modules = expandBrowserModules(options.modules);
|
|
549
|
+
|
|
500
550
|
return crawlList(requestedList, options)
|
|
501
551
|
.then(async results => {
|
|
502
552
|
// Merge extracts per series when necessary (CSS/IDL extracts)
|
|
503
553
|
for (const mod of options.modules) {
|
|
504
554
|
if (mod.extractsPerSeries) {
|
|
505
555
|
await adjustExtractsPerSeries(results, mod.property, options);
|
|
556
|
+
if (mod.property === 'idl') {
|
|
557
|
+
await adjustExtractsPerSeries(results, 'idlparsed', options);
|
|
558
|
+
}
|
|
506
559
|
}
|
|
507
560
|
}
|
|
508
561
|
return results;
|
package/src/lib/util.js
CHANGED
|
@@ -496,11 +496,15 @@ async function processSpecification(spec, processFunction, args, options) {
|
|
|
496
496
|
};
|
|
497
497
|
|
|
498
498
|
// Load the page
|
|
499
|
+
// (note HTTP status is 0 when `file://` URLs are loaded)
|
|
499
500
|
if (spec.html) {
|
|
500
501
|
await page.setContent(spec.html, loadOptions);
|
|
501
502
|
}
|
|
502
503
|
else {
|
|
503
|
-
await page.goto(spec.url, loadOptions);
|
|
504
|
+
const result = await page.goto(spec.url, loadOptions);
|
|
505
|
+
if ((result.status() !== 200) && (!spec.url.startsWith('file://') || (result.status() !== 0))) {
|
|
506
|
+
throw new Error(`Loading ${spec.url} triggered HTTP status ${result.status()}`);
|
|
507
|
+
}
|
|
504
508
|
}
|
|
505
509
|
|
|
506
510
|
// Handle multi-page specs
|
|
@@ -516,7 +520,11 @@ async function processSpecification(spec, processFunction, args, options) {
|
|
|
516
520
|
await subCdp.send('Fetch.enable');
|
|
517
521
|
subCdp.on('Fetch.requestPaused', interceptRequest(subCdp, subAbort));
|
|
518
522
|
try {
|
|
519
|
-
|
|
523
|
+
// (Note HTTP status is 0 when `file://` URLs are loaded)
|
|
524
|
+
const subresult = await subPage.goto(url, loadOptions);
|
|
525
|
+
if ((subresult.status() !== 200) && (!url.startsWith('file://') || (subresult.status() !== 0))) {
|
|
526
|
+
throw new Error(`Loading ${spec.url} triggered HTTP status ${result.status()}`);
|
|
527
|
+
}
|
|
520
528
|
const html = await subPage.evaluate(() => {
|
|
521
529
|
return document.body.outerHTML
|
|
522
530
|
.replace(/<body/, '<section')
|
|
@@ -542,6 +550,14 @@ async function processSpecification(spec, processFunction, args, options) {
|
|
|
542
550
|
|
|
543
551
|
// Wait until the generation of the spec is completely over
|
|
544
552
|
await page.evaluate(async () => {
|
|
553
|
+
// Detect draft CSS server hiccups as done in browser-specs:
|
|
554
|
+
// https://github.com/w3c/browser-specs/blob/b31fc0b03ba67a19162883afc30e01fcec3c600d/src/fetch-info.js#L292
|
|
555
|
+
const title = (window.document.querySelector('h1')?.textContent || '')
|
|
556
|
+
.replace(/\n/g, '').trim();
|
|
557
|
+
if (title.startsWith('Index of ')) {
|
|
558
|
+
throw new Error(`CSS server issue detected`);
|
|
559
|
+
}
|
|
560
|
+
|
|
545
561
|
const usesRespec = (window.respecConfig || window.eval('typeof respecConfig !== "undefined"')) &&
|
|
546
562
|
window.document.head.querySelector("script[src*='respec']");
|
|
547
563
|
|
|
@@ -694,6 +710,79 @@ function isLatestLevelThatPasses(spec, list, predicate) {
|
|
|
694
710
|
}
|
|
695
711
|
|
|
696
712
|
|
|
713
|
+
/**
|
|
714
|
+
* Takes the results of a crawl for a given spec and expands it to include the
|
|
715
|
+
* contents of referenced files.
|
|
716
|
+
*
|
|
717
|
+
* The function handles both files and HTTPS resources, using either filesystem
|
|
718
|
+
* functions (for files) or fetch (for HTTPS resources).
|
|
719
|
+
*
|
|
720
|
+
* Note the spec object is expanded in place.
|
|
721
|
+
*
|
|
722
|
+
* @function
|
|
723
|
+
* @public
|
|
724
|
+
* @param {Object} spec Spec crawl result that needs to be expanded
|
|
725
|
+
* @param {string} baseFolder The base folder that contains the crawl file, or
|
|
726
|
+
* the base HTTPS URI to resolve relative links in the crawl object.
|
|
727
|
+
* @param {Array(string)} properties An explicit list of properties to expand
|
|
728
|
+
* (no value means "expand all possible properties")
|
|
729
|
+
* @return {Promise(object)} The promise to get an expanded crawl object that
|
|
730
|
+
* contains the contents of referenced files and no longer references external
|
|
731
|
+
* files (for the requested properties)
|
|
732
|
+
*/
|
|
733
|
+
async function expandSpecResult(spec, baseFolder, properties) {
|
|
734
|
+
baseFolder = baseFolder || '';
|
|
735
|
+
await Promise.all(Object.keys(spec).map(async property => {
|
|
736
|
+
// Only consider properties explicitly requested
|
|
737
|
+
if (properties && !properties.includes(property)) {
|
|
738
|
+
return;
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
// Only consider properties that link to an extract, i.e. an IDL
|
|
742
|
+
// or JSON file in subfolder.
|
|
743
|
+
if (!spec[property] ||
|
|
744
|
+
(typeof spec[property] !== 'string') ||
|
|
745
|
+
!spec[property].match(/^[^\/]+\/[^\/]+\.(json|idl)$/)) {
|
|
746
|
+
return;
|
|
747
|
+
}
|
|
748
|
+
let contents = null;
|
|
749
|
+
if (baseFolder.startsWith('https:')) {
|
|
750
|
+
const url = (new URL(spec[property], baseFolder)).toString();
|
|
751
|
+
const response = await fetch(url, { nolog: true });
|
|
752
|
+
contents = await response.text();
|
|
753
|
+
}
|
|
754
|
+
else {
|
|
755
|
+
const filename = path.join(baseFolder, spec[property]);
|
|
756
|
+
contents = await fs.readFile(filename, 'utf8');
|
|
757
|
+
}
|
|
758
|
+
if (spec[property].endsWith('.json')) {
|
|
759
|
+
contents = JSON.parse(contents);
|
|
760
|
+
}
|
|
761
|
+
if (property === 'css') {
|
|
762
|
+
// Special case for CSS where the "css" level does not exist
|
|
763
|
+
// in the generated files
|
|
764
|
+
const css = Object.assign({}, contents);
|
|
765
|
+
delete css.spec;
|
|
766
|
+
spec[property] = css;
|
|
767
|
+
}
|
|
768
|
+
else if (property === 'idl') {
|
|
769
|
+
// Special case for raw IDL extracts, which are text extracts.
|
|
770
|
+
// Also drop header that may have been added when extract was
|
|
771
|
+
// serialized.
|
|
772
|
+
if (contents.startsWith('// GENERATED CONTENT - DO NOT EDIT')) {
|
|
773
|
+
const endOfHeader = contents.indexOf('\n\n');
|
|
774
|
+
contents = contents.substring(endOfHeader + 2);
|
|
775
|
+
}
|
|
776
|
+
spec.idl = contents;
|
|
777
|
+
}
|
|
778
|
+
else {
|
|
779
|
+
spec[property] = contents[property];
|
|
780
|
+
}
|
|
781
|
+
}));
|
|
782
|
+
return spec;
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
|
|
697
786
|
/**
|
|
698
787
|
* Takes the results of a crawl (typically the contents of the index.json file)
|
|
699
788
|
* and expands it to include the contents of all referenced files.
|
|
@@ -708,73 +797,16 @@ function isLatestLevelThatPasses(spec, list, predicate) {
|
|
|
708
797
|
* @param {Object} crawl Crawl index object that needs to be expanded
|
|
709
798
|
* @param {string} baseFolder The base folder that contains the crawl file, or
|
|
710
799
|
* the base HTTPS URI to resolve relative links in the crawl object.
|
|
711
|
-
* @param {Array(string)} An explicit list of properties to expand
|
|
712
|
-
* means "expand all possible properties")
|
|
800
|
+
* @param {Array(string)} properties An explicit list of properties to expand
|
|
801
|
+
* (no value means "expand all possible properties")
|
|
713
802
|
* @return {Promise(object)} The promise to get an expanded crawl object that
|
|
714
803
|
* contains the entire crawl report (and no longer references external files)
|
|
715
804
|
*/
|
|
716
805
|
async function expandCrawlResult(crawl, baseFolder, properties) {
|
|
717
806
|
baseFolder = baseFolder || '';
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
// Only consider properties explicitly requested
|
|
722
|
-
if (properties && !properties.includes(property)) {
|
|
723
|
-
return;
|
|
724
|
-
}
|
|
725
|
-
|
|
726
|
-
// Only consider properties that link to an extract, i.e. an IDL
|
|
727
|
-
// or JSON file in subfolder.
|
|
728
|
-
if (!spec[property] ||
|
|
729
|
-
(typeof spec[property] !== 'string') ||
|
|
730
|
-
!spec[property].match(/^[^\/]+\/[^\/]+\.(json|idl)$/)) {
|
|
731
|
-
return;
|
|
732
|
-
}
|
|
733
|
-
let contents = null;
|
|
734
|
-
if (baseFolder.startsWith('https:')) {
|
|
735
|
-
const url = (new URL(spec[property], baseFolder)).toString();
|
|
736
|
-
const response = await fetch(url, { nolog: true });
|
|
737
|
-
contents = await response.text();
|
|
738
|
-
}
|
|
739
|
-
else {
|
|
740
|
-
const filename = path.join(baseFolder, spec[property]);
|
|
741
|
-
contents = await fs.readFile(filename, 'utf8');
|
|
742
|
-
}
|
|
743
|
-
|
|
744
|
-
// Force UNIX-style line endings
|
|
745
|
-
// (Git may auto-convert LF to CRLF on Windows machines and we
|
|
746
|
-
// want to store multiline IDL fragments as values of properties
|
|
747
|
-
// in parsed IDL trees)
|
|
748
|
-
contents = contents.replace(/\r\n/g, '\n');
|
|
749
|
-
|
|
750
|
-
if (spec[property].endsWith('.json')) {
|
|
751
|
-
contents = JSON.parse(contents);
|
|
752
|
-
}
|
|
753
|
-
if (property === 'css') {
|
|
754
|
-
// Special case for CSS where the "css" level does not exist
|
|
755
|
-
// in the generated files
|
|
756
|
-
const css = Object.assign({}, contents);
|
|
757
|
-
delete css.spec;
|
|
758
|
-
spec[property] = css;
|
|
759
|
-
}
|
|
760
|
-
else if (property === 'idl') {
|
|
761
|
-
// Special case for raw IDL extracts, which are text extracts.
|
|
762
|
-
// Also drop header that may have been added when extract was
|
|
763
|
-
// serialized.
|
|
764
|
-
if (contents.startsWith('// GENERATED CONTENT - DO NOT EDIT')) {
|
|
765
|
-
const endOfHeader = contents.indexOf('\n\n');
|
|
766
|
-
contents = contents.substring(endOfHeader + 2);
|
|
767
|
-
}
|
|
768
|
-
spec.idl = contents;
|
|
769
|
-
}
|
|
770
|
-
else {
|
|
771
|
-
spec[property] = contents[property];
|
|
772
|
-
}
|
|
773
|
-
}));
|
|
774
|
-
return spec;
|
|
775
|
-
}
|
|
776
|
-
|
|
777
|
-
crawl.results = await Promise.all(crawl.results.map(expandSpec));
|
|
807
|
+
crawl.results = await Promise.all(
|
|
808
|
+
crawl.results.map(spec => expandSpecResult(spec, baseFolder, properties))
|
|
809
|
+
);
|
|
778
810
|
return crawl;
|
|
779
811
|
}
|
|
780
812
|
|
|
@@ -860,6 +892,7 @@ module.exports = {
|
|
|
860
892
|
completeWithAlternativeUrls,
|
|
861
893
|
isLatestLevelThatPasses,
|
|
862
894
|
expandCrawlResult,
|
|
895
|
+
expandSpecResult,
|
|
863
896
|
getGeneratedIDLNamesByCSSProperty,
|
|
864
897
|
createFolderIfNeeded
|
|
865
898
|
};
|