reffy 6.2.2 → 6.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +7 -7
- package/reffy.js +1 -1
- package/src/lib/fetch.js +2 -2
- package/src/lib/nock-server.js +10 -3
- package/src/lib/specs-crawler.js +22 -5
- package/src/lib/util.js +55 -12
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "reffy",
|
|
3
|
-
"version": "6.
|
|
3
|
+
"version": "6.4.2",
|
|
4
4
|
"description": "W3C/WHATWG spec dependencies exploration companion. Features a short set of tools to study spec references as well as WebIDL term definitions and references found in W3C specifications.",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -32,20 +32,20 @@
|
|
|
32
32
|
"bin": "./reffy.js",
|
|
33
33
|
"dependencies": {
|
|
34
34
|
"abortcontroller-polyfill": "1.7.3",
|
|
35
|
-
"browser-specs": "2.27.0",
|
|
36
35
|
"commander": "9.0.0",
|
|
37
|
-
"fetch-filecache-for-crawling": "4.0
|
|
38
|
-
"puppeteer": "13.
|
|
36
|
+
"fetch-filecache-for-crawling": "4.1.0",
|
|
37
|
+
"puppeteer": "13.3.2",
|
|
39
38
|
"semver": "^7.3.5",
|
|
39
|
+
"web-specs": "1.1.0",
|
|
40
40
|
"webidl2": "24.2.0"
|
|
41
41
|
},
|
|
42
42
|
"devDependencies": {
|
|
43
43
|
"chai": "4.3.6",
|
|
44
44
|
"mocha": "9.2.0",
|
|
45
|
-
"nock": "13.2.
|
|
46
|
-
"respec": "
|
|
45
|
+
"nock": "13.2.4",
|
|
46
|
+
"respec": "30.0.5",
|
|
47
47
|
"respec-hljs": "2.1.1",
|
|
48
|
-
"rollup": "2.67.
|
|
48
|
+
"rollup": "2.67.2"
|
|
49
49
|
},
|
|
50
50
|
"scripts": {
|
|
51
51
|
"test": "mocha --recursive tests/"
|
package/reffy.js
CHANGED
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
|
|
24
24
|
const commander = require('commander');
|
|
25
25
|
const satisfies = require('semver/functions/satisfies');
|
|
26
|
-
const specs = require('
|
|
26
|
+
const specs = require('web-specs');
|
|
27
27
|
const { version, engines } = require('./package.json');
|
|
28
28
|
const { requireFromWorkingDirectory } = require('./src/lib/util');
|
|
29
29
|
const { crawlSpecs } = require('./src/lib/specs-crawler');
|
package/src/lib/fetch.js
CHANGED
|
@@ -33,7 +33,7 @@ catch (err) {
|
|
|
33
33
|
* @return {Promise(Response)} Promise to get an HTTP response
|
|
34
34
|
*/
|
|
35
35
|
async function fetch(url, options) {
|
|
36
|
-
options = Object.assign({}, options);
|
|
36
|
+
options = Object.assign({headers: {}}, options);
|
|
37
37
|
['cacheFolder', 'resetCache', 'cacheRefresh', 'logToConsole'].forEach(param => {
|
|
38
38
|
let fetchParam = (param === 'cacheRefresh') ? 'refresh' : param;
|
|
39
39
|
if (config[param] && !options.hasOwnProperty(fetchParam)) {
|
|
@@ -53,4 +53,4 @@ async function fetch(url, options) {
|
|
|
53
53
|
}
|
|
54
54
|
|
|
55
55
|
|
|
56
|
-
module.exports = fetch;
|
|
56
|
+
module.exports = fetch;
|
package/src/lib/nock-server.js
CHANGED
|
@@ -82,7 +82,7 @@ Object.keys(mockSpecs).forEach(path => {
|
|
|
82
82
|
// Handling requests generated by ReSpec documents
|
|
83
83
|
nock("https://api.specref.org")
|
|
84
84
|
.persist()
|
|
85
|
-
.get("/bibrefs?refs=webidl
|
|
85
|
+
.get("/bibrefs?refs=webidl").reply(200,
|
|
86
86
|
{ webidl: { href: "https://webidl.spec.whatwg.org/" } },
|
|
87
87
|
{ "Access-Control-Allow-Origin": "*" }
|
|
88
88
|
);
|
|
@@ -98,7 +98,14 @@ nock("https://www.w3.org")
|
|
|
98
98
|
.get("/Tools/respec/respec-w3c").replyWithFile(200,
|
|
99
99
|
path.join(modulesFolder, "respec", "builds", "respec-w3c.js"),
|
|
100
100
|
{ "Content-Type": "application/js" })
|
|
101
|
-
.get("/TR/idontexist/").reply(404, '')
|
|
101
|
+
.get("/TR/idontexist/").reply(404, '')
|
|
102
|
+
.get("/TR/ididnotchange/").reply(function() {
|
|
103
|
+
if (this.req.headers['if-modified-since'][0] === "Fri, 11 Feb 2022 00:00:42 GMT") {
|
|
104
|
+
return [304, ''];
|
|
105
|
+
} else {
|
|
106
|
+
return [200, 'Unexpected path'];
|
|
107
|
+
}
|
|
108
|
+
});
|
|
102
109
|
|
|
103
110
|
nock("https://drafts.csswg.org")
|
|
104
111
|
.persist()
|
|
@@ -117,4 +124,4 @@ nock.emitter.on('no match', function(req, options, requestBody) {
|
|
|
117
124
|
}
|
|
118
125
|
});
|
|
119
126
|
|
|
120
|
-
module.exports = nock;
|
|
127
|
+
module.exports = nock;
|
package/src/lib/specs-crawler.js
CHANGED
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
|
|
13
13
|
const fs = require('fs');
|
|
14
14
|
const path = require('path');
|
|
15
|
-
const specs = require('
|
|
15
|
+
const specs = require('web-specs');
|
|
16
16
|
const cssDfnParser = require('./css-grammar-parser');
|
|
17
17
|
const { generateIdlParsed, saveIdlParsed } = require('../cli/generate-idlparsed');
|
|
18
18
|
const { generateIdlNames, saveIdlNames } = require('../cli/generate-idlnames');
|
|
@@ -29,6 +29,7 @@ const {
|
|
|
29
29
|
createFolderIfNeeded
|
|
30
30
|
} = require('./util');
|
|
31
31
|
|
|
32
|
+
const {version: reffyVersion} = require('../../package.json');
|
|
32
33
|
|
|
33
34
|
/**
|
|
34
35
|
* Return the spec if crawl succeeded or crawl result from given fallback list
|
|
@@ -78,10 +79,15 @@ async function crawlSpec(spec, crawlOptions) {
|
|
|
78
79
|
path.dirname(crawlOptions.fallback) : '';
|
|
79
80
|
|
|
80
81
|
if (spec.error) {
|
|
81
|
-
return specOrFallback(spec, fallbackFolder, crawlOptions.fallbackData);
|
|
82
|
+
return specOrFallback(spec, fallbackFolder, crawlOptions.fallbackData?.results);
|
|
82
83
|
}
|
|
83
84
|
|
|
84
85
|
try {
|
|
86
|
+
const fallback = crawlOptions.fallbackData?.results?.find(s => s.url === spec.url);
|
|
87
|
+
let cacheInfo = {};
|
|
88
|
+
if (crawlOptions.fallbackData?.crawler === `reffy-${reffyVersion}`) {
|
|
89
|
+
cacheInfo = Object.assign({}, fallback?.crawlCacheInfo);
|
|
90
|
+
}
|
|
85
91
|
const result = await processSpecification(
|
|
86
92
|
spec.crawled,
|
|
87
93
|
(spec, modules) => {
|
|
@@ -97,8 +103,14 @@ async function crawlSpec(spec, crawlOptions) {
|
|
|
97
103
|
},
|
|
98
104
|
[spec, crawlOptions.modules],
|
|
99
105
|
{ quiet: crawlOptions.quiet,
|
|
100
|
-
forceLocalFetch: crawlOptions.forceLocalFetch
|
|
106
|
+
forceLocalFetch: crawlOptions.forceLocalFetch,
|
|
107
|
+
...cacheInfo}
|
|
101
108
|
);
|
|
109
|
+
if (result.status === "notmodified" && fallback) {
|
|
110
|
+
crawlOptions.quiet ?? console.warn(`skipping ${spec.url}, no change`);
|
|
111
|
+
const copy = Object.assign({}, fallback);
|
|
112
|
+
return expandSpecResult(copy, fallbackFolder);
|
|
113
|
+
}
|
|
102
114
|
|
|
103
115
|
// Specific rule for IDL extracts:
|
|
104
116
|
// parse the extracted WebIdl content
|
|
@@ -169,6 +181,9 @@ async function crawlSpec(spec, crawlOptions) {
|
|
|
169
181
|
|
|
170
182
|
// Copy results back into initial spec object
|
|
171
183
|
spec.crawled = result.crawled;
|
|
184
|
+
if (result.crawlCacheInfo) {
|
|
185
|
+
spec.crawlCacheInfo = result.crawlCacheInfo;
|
|
186
|
+
}
|
|
172
187
|
crawlOptions.modules.forEach(mod => {
|
|
173
188
|
if (result[mod.property]) {
|
|
174
189
|
spec[mod.property] = result[mod.property];
|
|
@@ -183,7 +198,7 @@ async function crawlSpec(spec, crawlOptions) {
|
|
|
183
198
|
spec.error = err.toString() + (err.stack ? ' ' + err.stack : '');
|
|
184
199
|
}
|
|
185
200
|
|
|
186
|
-
return specOrFallback(spec, fallbackFolder, crawlOptions.fallbackData);
|
|
201
|
+
return specOrFallback(spec, fallbackFolder, crawlOptions.fallbackData?.results);
|
|
187
202
|
}
|
|
188
203
|
|
|
189
204
|
|
|
@@ -351,7 +366,7 @@ async function crawlList(speclist, crawlOptions) {
|
|
|
351
366
|
// Load fallback data if necessary
|
|
352
367
|
if (crawlOptions.fallback) {
|
|
353
368
|
try {
|
|
354
|
-
crawlOptions.fallbackData = JSON.parse(await fs.promises.readFile(crawlOptions.fallback))
|
|
369
|
+
crawlOptions.fallbackData = JSON.parse(await fs.promises.readFile(crawlOptions.fallback));
|
|
355
370
|
} catch (e) {
|
|
356
371
|
throw new Error(`Could not parse fallback data file ${crawlOptions.fallback}`);
|
|
357
372
|
}
|
|
@@ -469,12 +484,14 @@ async function saveResults(data, settings) {
|
|
|
469
484
|
|
|
470
485
|
// Save all results to an index.json file
|
|
471
486
|
const indexFilename = path.join(settings.output, 'index.json');
|
|
487
|
+
|
|
472
488
|
const contents = {
|
|
473
489
|
type: 'crawl',
|
|
474
490
|
title: 'Reffy crawl',
|
|
475
491
|
date: (new Date()).toJSON(),
|
|
476
492
|
options: settings,
|
|
477
493
|
stats: {},
|
|
494
|
+
crawler: `reffy-${reffyVersion}`,
|
|
478
495
|
results: data
|
|
479
496
|
};
|
|
480
497
|
contents.options.modules = contents.options.modules.map(mod => mod.property);
|
package/src/lib/util.js
CHANGED
|
@@ -13,7 +13,6 @@ const specEquivalents = require('../specs/spec-equivalents.json');
|
|
|
13
13
|
|
|
14
14
|
const reffyModules = require('../browserlib/reffy.json');
|
|
15
15
|
|
|
16
|
-
|
|
17
16
|
/**
|
|
18
17
|
* Maximum depth difference supported between Reffy's install path and custom
|
|
19
18
|
* modules that may be provided on the command-line
|
|
@@ -22,7 +21,6 @@ const reffyModules = require('../browserlib/reffy.json');
|
|
|
22
21
|
*/
|
|
23
22
|
const maxPathDepth = 20;
|
|
24
23
|
|
|
25
|
-
|
|
26
24
|
/**
|
|
27
25
|
* Returns a range array from 0 to the number provided (not included)
|
|
28
26
|
*/
|
|
@@ -325,7 +323,8 @@ async function teardownBrowser() {
|
|
|
325
323
|
* flag tells the function that all network requests need to be only handled
|
|
326
324
|
* by Node.js's "fetch" function (as opposed to falling back to Puppeteer's
|
|
327
325
|
* network and caching logic), which is useful to keep full control of network
|
|
328
|
-
* requests in tests.
|
|
326
|
+
* requests in tests. The "etag" and "lastModified" options give input
|
|
327
|
+
* to the conditional fetch request sent for the primary crawled URL
|
|
329
328
|
* @return {Promise} The promise to get the results of the processing function
|
|
330
329
|
*/
|
|
331
330
|
async function processSpecification(spec, processFunction, args, options) {
|
|
@@ -333,6 +332,7 @@ async function processSpecification(spec, processFunction, args, options) {
|
|
|
333
332
|
processFunction = processFunction || function () {};
|
|
334
333
|
args = args || [];
|
|
335
334
|
options = options || {};
|
|
335
|
+
let prefetchedResponse = {};
|
|
336
336
|
|
|
337
337
|
if (!browser) {
|
|
338
338
|
throw new Error('Browser instance not initialized, setupBrowser() must be called before processSpecification().');
|
|
@@ -409,17 +409,18 @@ async function processSpecification(spec, processFunction, args, options) {
|
|
|
409
409
|
await cdp.send('Fetch.continueRequest', { requestId });
|
|
410
410
|
return;
|
|
411
411
|
}
|
|
412
|
+
const response = prefetchedResponse[request.url] ?? await fetch(request.url, { signal: controller.signal, headers: request.headers });
|
|
412
413
|
|
|
413
|
-
const response = await fetch(request.url, { signal: controller.signal });
|
|
414
414
|
const body = await response.buffer();
|
|
415
|
+
|
|
415
416
|
await cdp.send('Fetch.fulfillRequest', {
|
|
416
417
|
requestId,
|
|
417
418
|
responseCode: response.status,
|
|
418
419
|
responseHeaders: Object.keys(response.headers.raw()).map(header => {
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
420
|
+
return {
|
|
421
|
+
name: header,
|
|
422
|
+
value: response.headers.raw()[header].join(',')
|
|
423
|
+
};
|
|
423
424
|
}),
|
|
424
425
|
body: body.toString('base64')
|
|
425
426
|
});
|
|
@@ -442,8 +443,8 @@ async function processSpecification(spec, processFunction, args, options) {
|
|
|
442
443
|
await cdp.send('Fetch.failRequest', { requestId, errorReason: 'Failed' });
|
|
443
444
|
}
|
|
444
445
|
else {
|
|
445
|
-
options.quiet ?? console.warn(`[warn] Fall back to regular network request for ${request.url}`, err);
|
|
446
446
|
try {
|
|
447
|
+
options.quiet ?? console.warn(`[warn] Fall back to regular network request for ${request.url}`, err);
|
|
447
448
|
await cdp.send('Fetch.continueRequest', { requestId });
|
|
448
449
|
}
|
|
449
450
|
catch (err) {
|
|
@@ -457,6 +458,34 @@ async function processSpecification(spec, processFunction, args, options) {
|
|
|
457
458
|
}
|
|
458
459
|
|
|
459
460
|
try {
|
|
461
|
+
// Fetch the spec URL if using https
|
|
462
|
+
// This allow to skip launching a browser
|
|
463
|
+
// if we have a fallback data source
|
|
464
|
+
// with a defined cache target for the spec
|
|
465
|
+
if (!spec.url.startsWith('file://')) {
|
|
466
|
+
let response;
|
|
467
|
+
// We set a conditional request header
|
|
468
|
+
// Use If-Modified-Since in preference as it is in practice
|
|
469
|
+
// more reliable for conditional requests
|
|
470
|
+
let headers = {'Accept-Encoding': 'gzip, deflate, br', 'Upgrade-Insecure-Requests': 1, 'User-Agent': browser.userAgent()};
|
|
471
|
+
if (options.lastModified) {
|
|
472
|
+
headers["If-Modified-Since"] = options.lastModified;
|
|
473
|
+
} else if (options.etag) {
|
|
474
|
+
headers["If-None-Match"] = options.etag;
|
|
475
|
+
}
|
|
476
|
+
try {
|
|
477
|
+
response = await fetch(spec.url, {headers});
|
|
478
|
+
if (response.status === 304) {
|
|
479
|
+
return {status: "notmodified"};
|
|
480
|
+
}
|
|
481
|
+
prefetchedResponse[spec.url] = response;
|
|
482
|
+
} catch (err) {
|
|
483
|
+
throw new Error(`Loading ${spec.url} triggered network error ${err}`);
|
|
484
|
+
}
|
|
485
|
+
if (response.status !== 200) {
|
|
486
|
+
throw new Error(`Loading ${spec.url} triggered HTTP status ${response.status}`);
|
|
487
|
+
}
|
|
488
|
+
}
|
|
460
489
|
const page = await browser.newPage();
|
|
461
490
|
|
|
462
491
|
// Disable cache if caller wants to handle all network requests
|
|
@@ -497,13 +526,27 @@ async function processSpecification(spec, processFunction, args, options) {
|
|
|
497
526
|
|
|
498
527
|
// Load the page
|
|
499
528
|
// (note HTTP status is 0 when `file://` URLs are loaded)
|
|
529
|
+
let cacheInfo;
|
|
500
530
|
if (spec.html) {
|
|
501
531
|
await page.setContent(spec.html, loadOptions);
|
|
502
532
|
}
|
|
503
533
|
else {
|
|
504
|
-
|
|
534
|
+
let result;
|
|
535
|
+
try {
|
|
536
|
+
result = await page.goto(spec.url, loadOptions);
|
|
537
|
+
} catch (err) {
|
|
538
|
+
throw new Error(`Loading ${spec.url} triggered network error ${err}`);
|
|
539
|
+
}
|
|
505
540
|
if ((result.status() !== 200) && (!spec.url.startsWith('file://') || (result.status() !== 0))) {
|
|
506
|
-
|
|
541
|
+
throw new Error(`Loading ${spec.url} triggered HTTP status ${result.status()}`);
|
|
542
|
+
}
|
|
543
|
+
const responseHeaders = result.headers();
|
|
544
|
+
// Use Last-Modified in preference as it is in practice
|
|
545
|
+
// more reliable for conditional requests
|
|
546
|
+
if (responseHeaders['last-modified']) {
|
|
547
|
+
cacheInfo = {lastModified: responseHeaders['last-modified']};
|
|
548
|
+
} else if (responseHeaders.etag) {
|
|
549
|
+
cacheInfo = {etag: responseHeaders.etag};
|
|
507
550
|
}
|
|
508
551
|
}
|
|
509
552
|
|
|
@@ -613,7 +656,7 @@ async function processSpecification(spec, processFunction, args, options) {
|
|
|
613
656
|
|
|
614
657
|
// Run the processFunction method in the browser context
|
|
615
658
|
const results = await page.evaluate(processFunction, ...args);
|
|
616
|
-
|
|
659
|
+
results.crawlCacheInfo = cacheInfo;
|
|
617
660
|
// Pending network requests may still be in the queue, flag the page
|
|
618
661
|
// as closed not to send commands on a CDP session that's no longer
|
|
619
662
|
// attached to anything
|