reffy 6.2.2 → 6.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "reffy",
3
- "version": "6.2.2",
3
+ "version": "6.4.2",
4
4
  "description": "W3C/WHATWG spec dependencies exploration companion. Features a short set of tools to study spec references as well as WebIDL term definitions and references found in W3C specifications.",
5
5
  "repository": {
6
6
  "type": "git",
@@ -32,20 +32,20 @@
32
32
  "bin": "./reffy.js",
33
33
  "dependencies": {
34
34
  "abortcontroller-polyfill": "1.7.3",
35
- "browser-specs": "2.27.0",
36
35
  "commander": "9.0.0",
37
- "fetch-filecache-for-crawling": "4.0.2",
38
- "puppeteer": "13.1.3",
36
+ "fetch-filecache-for-crawling": "4.1.0",
37
+ "puppeteer": "13.3.2",
39
38
  "semver": "^7.3.5",
39
+ "web-specs": "1.1.0",
40
40
  "webidl2": "24.2.0"
41
41
  },
42
42
  "devDependencies": {
43
43
  "chai": "4.3.6",
44
44
  "mocha": "9.2.0",
45
- "nock": "13.2.2",
46
- "respec": "29.0.5",
45
+ "nock": "13.2.4",
46
+ "respec": "30.0.5",
47
47
  "respec-hljs": "2.1.1",
48
- "rollup": "2.67.0"
48
+ "rollup": "2.67.2"
49
49
  },
50
50
  "scripts": {
51
51
  "test": "mocha --recursive tests/"
package/reffy.js CHANGED
@@ -23,7 +23,7 @@
23
23
 
24
24
  const commander = require('commander');
25
25
  const satisfies = require('semver/functions/satisfies');
26
- const specs = require('browser-specs');
26
+ const specs = require('web-specs');
27
27
  const { version, engines } = require('./package.json');
28
28
  const { requireFromWorkingDirectory } = require('./src/lib/util');
29
29
  const { crawlSpecs } = require('./src/lib/specs-crawler');
package/src/lib/fetch.js CHANGED
@@ -33,7 +33,7 @@ catch (err) {
33
33
  * @return {Promise(Response)} Promise to get an HTTP response
34
34
  */
35
35
  async function fetch(url, options) {
36
- options = Object.assign({}, options);
36
+ options = Object.assign({headers: {}}, options);
37
37
  ['cacheFolder', 'resetCache', 'cacheRefresh', 'logToConsole'].forEach(param => {
38
38
  let fetchParam = (param === 'cacheRefresh') ? 'refresh' : param;
39
39
  if (config[param] && !options.hasOwnProperty(fetchParam)) {
@@ -53,4 +53,4 @@ async function fetch(url, options) {
53
53
  }
54
54
 
55
55
 
56
- module.exports = fetch;
56
+ module.exports = fetch;
@@ -82,7 +82,7 @@ Object.keys(mockSpecs).forEach(path => {
82
82
  // Handling requests generated by ReSpec documents
83
83
  nock("https://api.specref.org")
84
84
  .persist()
85
- .get("/bibrefs?refs=webidl,html").reply(200,
85
+ .get("/bibrefs?refs=webidl").reply(200,
86
86
  { webidl: { href: "https://webidl.spec.whatwg.org/" } },
87
87
  { "Access-Control-Allow-Origin": "*" }
88
88
  );
@@ -98,7 +98,14 @@ nock("https://www.w3.org")
98
98
  .get("/Tools/respec/respec-w3c").replyWithFile(200,
99
99
  path.join(modulesFolder, "respec", "builds", "respec-w3c.js"),
100
100
  { "Content-Type": "application/js" })
101
- .get("/TR/idontexist/").reply(404, '');
101
+ .get("/TR/idontexist/").reply(404, '')
102
+ .get("/TR/ididnotchange/").reply(function() {
103
+ if (this.req.headers['if-modified-since'][0] === "Fri, 11 Feb 2022 00:00:42 GMT") {
104
+ return [304, ''];
105
+ } else {
106
+ return [200, 'Unexpected path'];
107
+ }
108
+ });
102
109
 
103
110
  nock("https://drafts.csswg.org")
104
111
  .persist()
@@ -117,4 +124,4 @@ nock.emitter.on('no match', function(req, options, requestBody) {
117
124
  }
118
125
  });
119
126
 
120
- module.exports = nock;
127
+ module.exports = nock;
@@ -12,7 +12,7 @@
12
12
 
13
13
  const fs = require('fs');
14
14
  const path = require('path');
15
- const specs = require('browser-specs');
15
+ const specs = require('web-specs');
16
16
  const cssDfnParser = require('./css-grammar-parser');
17
17
  const { generateIdlParsed, saveIdlParsed } = require('../cli/generate-idlparsed');
18
18
  const { generateIdlNames, saveIdlNames } = require('../cli/generate-idlnames');
@@ -29,6 +29,7 @@ const {
29
29
  createFolderIfNeeded
30
30
  } = require('./util');
31
31
 
32
+ const {version: reffyVersion} = require('../../package.json');
32
33
 
33
34
  /**
34
35
  * Return the spec if crawl succeeded or crawl result from given fallback list
@@ -78,10 +79,15 @@ async function crawlSpec(spec, crawlOptions) {
78
79
  path.dirname(crawlOptions.fallback) : '';
79
80
 
80
81
  if (spec.error) {
81
- return specOrFallback(spec, fallbackFolder, crawlOptions.fallbackData);
82
+ return specOrFallback(spec, fallbackFolder, crawlOptions.fallbackData?.results);
82
83
  }
83
84
 
84
85
  try {
86
+ const fallback = crawlOptions.fallbackData?.results?.find(s => s.url === spec.url);
87
+ let cacheInfo = {};
88
+ if (crawlOptions.fallbackData?.crawler === `reffy-${reffyVersion}`) {
89
+ cacheInfo = Object.assign({}, fallback?.crawlCacheInfo);
90
+ }
85
91
  const result = await processSpecification(
86
92
  spec.crawled,
87
93
  (spec, modules) => {
@@ -97,8 +103,14 @@ async function crawlSpec(spec, crawlOptions) {
97
103
  },
98
104
  [spec, crawlOptions.modules],
99
105
  { quiet: crawlOptions.quiet,
100
- forceLocalFetch: crawlOptions.forceLocalFetch }
106
+ forceLocalFetch: crawlOptions.forceLocalFetch,
107
+ ...cacheInfo}
101
108
  );
109
+ if (result.status === "notmodified" && fallback) {
110
+ crawlOptions.quiet ?? console.warn(`skipping ${spec.url}, no change`);
111
+ const copy = Object.assign({}, fallback);
112
+ return expandSpecResult(copy, fallbackFolder);
113
+ }
102
114
 
103
115
  // Specific rule for IDL extracts:
104
116
  // parse the extracted WebIdl content
@@ -169,6 +181,9 @@ async function crawlSpec(spec, crawlOptions) {
169
181
 
170
182
  // Copy results back into initial spec object
171
183
  spec.crawled = result.crawled;
184
+ if (result.crawlCacheInfo) {
185
+ spec.crawlCacheInfo = result.crawlCacheInfo;
186
+ }
172
187
  crawlOptions.modules.forEach(mod => {
173
188
  if (result[mod.property]) {
174
189
  spec[mod.property] = result[mod.property];
@@ -183,7 +198,7 @@ async function crawlSpec(spec, crawlOptions) {
183
198
  spec.error = err.toString() + (err.stack ? ' ' + err.stack : '');
184
199
  }
185
200
 
186
- return specOrFallback(spec, fallbackFolder, crawlOptions.fallbackData);
201
+ return specOrFallback(spec, fallbackFolder, crawlOptions.fallbackData?.results);
187
202
  }
188
203
 
189
204
 
@@ -351,7 +366,7 @@ async function crawlList(speclist, crawlOptions) {
351
366
  // Load fallback data if necessary
352
367
  if (crawlOptions.fallback) {
353
368
  try {
354
- crawlOptions.fallbackData = JSON.parse(await fs.promises.readFile(crawlOptions.fallback)).results;
369
+ crawlOptions.fallbackData = JSON.parse(await fs.promises.readFile(crawlOptions.fallback));
355
370
  } catch (e) {
356
371
  throw new Error(`Could not parse fallback data file ${crawlOptions.fallback}`);
357
372
  }
@@ -469,12 +484,14 @@ async function saveResults(data, settings) {
469
484
 
470
485
  // Save all results to an index.json file
471
486
  const indexFilename = path.join(settings.output, 'index.json');
487
+
472
488
  const contents = {
473
489
  type: 'crawl',
474
490
  title: 'Reffy crawl',
475
491
  date: (new Date()).toJSON(),
476
492
  options: settings,
477
493
  stats: {},
494
+ crawler: `reffy-${reffyVersion}`,
478
495
  results: data
479
496
  };
480
497
  contents.options.modules = contents.options.modules.map(mod => mod.property);
package/src/lib/util.js CHANGED
@@ -13,7 +13,6 @@ const specEquivalents = require('../specs/spec-equivalents.json');
13
13
 
14
14
  const reffyModules = require('../browserlib/reffy.json');
15
15
 
16
-
17
16
  /**
18
17
  * Maximum depth difference supported between Reffy's install path and custom
19
18
  * modules that may be provided on the command-line
@@ -22,7 +21,6 @@ const reffyModules = require('../browserlib/reffy.json');
22
21
  */
23
22
  const maxPathDepth = 20;
24
23
 
25
-
26
24
  /**
27
25
  * Returns a range array from 0 to the number provided (not included)
28
26
  */
@@ -325,7 +323,8 @@ async function teardownBrowser() {
325
323
  * flag tells the function that all network requests need to be only handled
326
324
  * by Node.js's "fetch" function (as opposed to falling back to Puppeteer's
327
325
  * network and caching logic), which is useful to keep full control of network
328
- * requests in tests.
326
+ * requests in tests. The "etag" and "lastModified" options give input
327
+ * to the conditional fetch request sent for the primary crawled URL
329
328
  * @return {Promise} The promise to get the results of the processing function
330
329
  */
331
330
  async function processSpecification(spec, processFunction, args, options) {
@@ -333,6 +332,7 @@ async function processSpecification(spec, processFunction, args, options) {
333
332
  processFunction = processFunction || function () {};
334
333
  args = args || [];
335
334
  options = options || {};
335
+ let prefetchedResponse = {};
336
336
 
337
337
  if (!browser) {
338
338
  throw new Error('Browser instance not initialized, setupBrowser() must be called before processSpecification().');
@@ -409,17 +409,18 @@ async function processSpecification(spec, processFunction, args, options) {
409
409
  await cdp.send('Fetch.continueRequest', { requestId });
410
410
  return;
411
411
  }
412
+ const response = prefetchedResponse[request.url] ?? await fetch(request.url, { signal: controller.signal, headers: request.headers });
412
413
 
413
- const response = await fetch(request.url, { signal: controller.signal });
414
414
  const body = await response.buffer();
415
+
415
416
  await cdp.send('Fetch.fulfillRequest', {
416
417
  requestId,
417
418
  responseCode: response.status,
418
419
  responseHeaders: Object.keys(response.headers.raw()).map(header => {
419
- return {
420
- name: header,
421
- value: response.headers.raw()[header].join(',')
422
- };
420
+ return {
421
+ name: header,
422
+ value: response.headers.raw()[header].join(',')
423
+ };
423
424
  }),
424
425
  body: body.toString('base64')
425
426
  });
@@ -442,8 +443,8 @@ async function processSpecification(spec, processFunction, args, options) {
442
443
  await cdp.send('Fetch.failRequest', { requestId, errorReason: 'Failed' });
443
444
  }
444
445
  else {
445
- options.quiet ?? console.warn(`[warn] Fall back to regular network request for ${request.url}`, err);
446
446
  try {
447
+ options.quiet ?? console.warn(`[warn] Fall back to regular network request for ${request.url}`, err);
447
448
  await cdp.send('Fetch.continueRequest', { requestId });
448
449
  }
449
450
  catch (err) {
@@ -457,6 +458,34 @@ async function processSpecification(spec, processFunction, args, options) {
457
458
  }
458
459
 
459
460
  try {
461
+ // Fetch the spec URL if using https
462
+ // This allow to skip launching a browser
463
+ // if we have a fallback data source
464
+ // with a defined cache target for the spec
465
+ if (!spec.url.startsWith('file://')) {
466
+ let response;
467
+ // We set a conditional request header
468
+ // Use If-Modified-Since in preference as it is in practice
469
+ // more reliable for conditional requests
470
+ let headers = {'Accept-Encoding': 'gzip, deflate, br', 'Upgrade-Insecure-Requests': 1, 'User-Agent': browser.userAgent()};
471
+ if (options.lastModified) {
472
+ headers["If-Modified-Since"] = options.lastModified;
473
+ } else if (options.etag) {
474
+ headers["If-None-Match"] = options.etag;
475
+ }
476
+ try {
477
+ response = await fetch(spec.url, {headers});
478
+ if (response.status === 304) {
479
+ return {status: "notmodified"};
480
+ }
481
+ prefetchedResponse[spec.url] = response;
482
+ } catch (err) {
483
+ throw new Error(`Loading ${spec.url} triggered network error ${err}`);
484
+ }
485
+ if (response.status !== 200) {
486
+ throw new Error(`Loading ${spec.url} triggered HTTP status ${response.status}`);
487
+ }
488
+ }
460
489
  const page = await browser.newPage();
461
490
 
462
491
  // Disable cache if caller wants to handle all network requests
@@ -497,13 +526,27 @@ async function processSpecification(spec, processFunction, args, options) {
497
526
 
498
527
  // Load the page
499
528
  // (note HTTP status is 0 when `file://` URLs are loaded)
529
+ let cacheInfo;
500
530
  if (spec.html) {
501
531
  await page.setContent(spec.html, loadOptions);
502
532
  }
503
533
  else {
504
- const result = await page.goto(spec.url, loadOptions);
534
+ let result;
535
+ try {
536
+ result = await page.goto(spec.url, loadOptions);
537
+ } catch (err) {
538
+ throw new Error(`Loading ${spec.url} triggered network error ${err}`);
539
+ }
505
540
  if ((result.status() !== 200) && (!spec.url.startsWith('file://') || (result.status() !== 0))) {
506
- throw new Error(`Loading ${spec.url} triggered HTTP status ${result.status()}`);
541
+ throw new Error(`Loading ${spec.url} triggered HTTP status ${result.status()}`);
542
+ }
543
+ const responseHeaders = result.headers();
544
+ // Use Last-Modified in preference as it is in practice
545
+ // more reliable for conditional requests
546
+ if (responseHeaders['last-modified']) {
547
+ cacheInfo = {lastModified: responseHeaders['last-modified']};
548
+ } else if (responseHeaders.etag) {
549
+ cacheInfo = {etag: responseHeaders.etag};
507
550
  }
508
551
  }
509
552
 
@@ -613,7 +656,7 @@ async function processSpecification(spec, processFunction, args, options) {
613
656
 
614
657
  // Run the processFunction method in the browser context
615
658
  const results = await page.evaluate(processFunction, ...args);
616
-
659
+ results.crawlCacheInfo = cacheInfo;
617
660
  // Pending network requests may still be in the queue, flag the page
618
661
  // as closed not to send commands on a CDP session that's no longer
619
662
  // attached to anything