reffy 6.1.3 → 6.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "reffy",
3
- "version": "6.1.3",
3
+ "version": "6.2.2",
4
4
  "description": "W3C/WHATWG spec dependencies exploration companion. Features a short set of tools to study spec references as well as WebIDL term definitions and references found in W3C specifications.",
5
5
  "repository": {
6
6
  "type": "git",
@@ -32,10 +32,10 @@
32
32
  "bin": "./reffy.js",
33
33
  "dependencies": {
34
34
  "abortcontroller-polyfill": "1.7.3",
35
- "browser-specs": "2.23.0",
36
- "commander": "8.3.0",
35
+ "browser-specs": "2.27.0",
36
+ "commander": "9.0.0",
37
37
  "fetch-filecache-for-crawling": "4.0.2",
38
- "puppeteer": "13.1.2",
38
+ "puppeteer": "13.1.3",
39
39
  "semver": "^7.3.5",
40
40
  "webidl2": "24.2.0"
41
41
  },
@@ -43,9 +43,9 @@
43
43
  "chai": "4.3.6",
44
44
  "mocha": "9.2.0",
45
45
  "nock": "13.2.2",
46
- "respec": "28.2.8",
46
+ "respec": "29.0.5",
47
47
  "respec-hljs": "2.1.1",
48
- "rollup": "2.66.1"
48
+ "rollup": "2.67.0"
49
49
  },
50
50
  "scripts": {
51
51
  "test": "mocha --recursive tests/"
package/reffy.js CHANGED
@@ -70,6 +70,7 @@ program
70
70
  .usage('[options]')
71
71
  .description('Crawls and processes a list of Web specifications')
72
72
  .option('-d, --debug', 'debug mode, crawl one spec at a time')
73
+ .option('-f, --fallback <json>', 'fallback data to use when a spec crawl fails')
73
74
  .option('-m, --module <modules...>', 'spec processing modules')
74
75
  .option('-o, --output <folder>', 'existing folder/file where crawl results are to be saved')
75
76
  .option('-q, --quiet', 'do not report progress and other warnings to the console')
@@ -92,6 +93,7 @@ will dump ~100MB of data to the console:
92
93
  }
93
94
  const crawlOptions = {
94
95
  debug: options.debug,
96
+ fallback: options.fallback,
95
97
  output: options.output,
96
98
  publishedVersion: options.release,
97
99
  quiet: options.quiet,
@@ -143,6 +145,16 @@ Description:
143
145
  strongly recommended.
144
146
 
145
147
  Usage notes for some of the options:
148
+ -f, --fallback <jsondata>
149
+ Provides an existing JSON crawl data file to use as a source of fallback data
150
+ for specs that fail to be crawled.
151
+
152
+ The fallback data gets copied as-is. It is the responsibility of the caller
153
+ to make sure that extracts it may link to actually exist and match the ones
154
+ that the crawl would produce in the absence of errors (e.g. same modules).
155
+
156
+ The "error" property is set on specs for which fallback data was used.
157
+
146
158
  -m, --module <modules...>
147
159
  If processing modules are not specified, the crawler runs all core processing
148
160
  modules defined in:
@@ -97,7 +97,15 @@ nock("https://www.w3.org")
97
97
  { "Content-Type": "application/js" })
98
98
  .get("/Tools/respec/respec-w3c").replyWithFile(200,
99
99
  path.join(modulesFolder, "respec", "builds", "respec-w3c.js"),
100
- { "Content-Type": "application/js" });
100
+ { "Content-Type": "application/js" })
101
+ .get("/TR/idontexist/").reply(404, '');
102
+
103
+ nock("https://drafts.csswg.org")
104
+ .persist()
105
+ .get("/server-hiccup/").reply(200,
106
+ `<html><title>Server hiccup</title>
107
+ <h1> Index of Server Hiccup Module Level 42 </h1>`,
108
+ { 'Content-Type': 'text/html' });
101
109
 
102
110
  nock.emitter.on('error', function (err) {
103
111
  console.error(err);
@@ -20,6 +20,7 @@ const {
20
20
  completeWithAlternativeUrls,
21
21
  expandBrowserModules,
22
22
  expandCrawlResult,
23
+ expandSpecResult,
23
24
  getGeneratedIDLNamesByCSSProperty,
24
25
  isLatestLevelThatPasses,
25
26
  processSpecification,
@@ -29,6 +30,36 @@ const {
29
30
  } = require('./util');
30
31
 
31
32
 
33
+ /**
34
+ * Return the spec if crawl succeeded or crawl result from given fallback list
35
+ * if crawl yielded an error (and fallback does exist).
36
+ *
37
+ * The function keeps the "error" property on the crawl result it returns so
38
+ * that the error does not get entirely lost.
39
+ *
40
+ * @function
41
+ * @param {Object} spec Actual spec crawl result
42
+ * * @param {Object} spec Actual spec crawl result
43
+ * @param {String} fallbackFolder The folder that contains fallback extracts
44
+ * @param {Array<Object>} fallbackData A list of crawl results to use as
45
+ * fallback when needed
46
+ * @return {Object} The given crawl result or a new one that reuses fallback
47
+ * content if needed
48
+ */
49
+ async function specOrFallback(spec, fallbackFolder, fallbackData) {
50
+ if (spec.error && fallbackData) {
51
+ const fallback = fallbackData.find(s => s.url === spec.url);
52
+ if (fallback) {
53
+ const copy = Object.assign({}, fallback);
54
+ const result = await expandSpecResult(copy, fallbackFolder);
55
+ result.error = spec.error;
56
+ return result;
57
+ }
58
+ }
59
+ return spec;
60
+ }
61
+
62
+
32
63
  /**
33
64
  * Load and parse the given spec.
34
65
  *
@@ -43,9 +74,11 @@ async function crawlSpec(spec, crawlOptions) {
43
74
  spec.crawled = crawlOptions.publishedVersion ?
44
75
  (spec.release ? spec.release : spec.nightly) :
45
76
  spec.nightly;
77
+ const fallbackFolder = crawlOptions.fallback ?
78
+ path.dirname(crawlOptions.fallback) : '';
46
79
 
47
80
  if (spec.error) {
48
- return spec;
81
+ return specOrFallback(spec, fallbackFolder, crawlOptions.fallbackData);
49
82
  }
50
83
 
51
84
  try {
@@ -150,7 +183,7 @@ async function crawlSpec(spec, crawlOptions) {
150
183
  spec.error = err.toString() + (err.stack ? ' ' + err.stack : '');
151
184
  }
152
185
 
153
- return spec;
186
+ return specOrFallback(spec, fallbackFolder, crawlOptions.fallbackData);
154
187
  }
155
188
 
156
189
 
@@ -308,10 +341,23 @@ async function saveSpecResults(spec, settings) {
308
341
  * specification descriptions
309
342
  */
310
343
  async function crawlList(speclist, crawlOptions) {
311
- crawlOptions = crawlOptions || {};
344
+ // Make a shallow copy of crawl options object since we're going
345
+ // to modify properties in place
346
+ crawlOptions = Object.assign({}, crawlOptions);
312
347
 
313
- // Prepare Puppeteer instance
348
+ // Expand list of processing modules to use if not already done
314
349
  crawlOptions.modules = expandBrowserModules(crawlOptions.modules);
350
+
351
+ // Load fallback data if necessary
352
+ if (crawlOptions.fallback) {
353
+ try {
354
+ crawlOptions.fallbackData = JSON.parse(await fs.promises.readFile(crawlOptions.fallback)).results;
355
+ } catch (e) {
356
+ throw new Error(`Could not parse fallback data file ${crawlOptions.fallback}`);
357
+ }
358
+ }
359
+
360
+ // Prepare Puppeteer instance
315
361
  await setupBrowser(crawlOptions.modules);
316
362
 
317
363
  const list = speclist.map(completeWithAlternativeUrls);
@@ -397,10 +443,9 @@ async function adjustExtractsPerSeries(data, property, settings) {
397
443
  }
398
444
  else if (spec[property]) {
399
445
  // Not the right full level in the series, drop created extract
400
- // and link to the series extract instead
401
446
  const pathname = path.resolve(settings.output, spec[property]);
402
447
  fs.unlinkSync(pathname);
403
- spec[property] = `${property}/${spec.series.shortname}${path.extname(spec[property])}`;
448
+ delete spec[property];
404
449
  }
405
450
  });
406
451
 
@@ -493,16 +538,24 @@ function crawlSpecs(options) {
493
538
  });
494
539
  }
495
540
 
496
- const requestedList = (options && options.specs) ?
541
+ const requestedList = options?.specs ?
497
542
  prepareListOfSpecs(options.specs) :
498
543
  specs;
499
544
 
545
+ // Make a shallow copy of passed options parameter and expand modules
546
+ // in place.
547
+ options = Object.assign({}, options);
548
+ options.modules = expandBrowserModules(options.modules);
549
+
500
550
  return crawlList(requestedList, options)
501
551
  .then(async results => {
502
552
  // Merge extracts per series when necessary (CSS/IDL extracts)
503
553
  for (const mod of options.modules) {
504
554
  if (mod.extractsPerSeries) {
505
555
  await adjustExtractsPerSeries(results, mod.property, options);
556
+ if (mod.property === 'idl') {
557
+ await adjustExtractsPerSeries(results, 'idlparsed', options);
558
+ }
506
559
  }
507
560
  }
508
561
  return results;
package/src/lib/util.js CHANGED
@@ -496,11 +496,15 @@ async function processSpecification(spec, processFunction, args, options) {
496
496
  };
497
497
 
498
498
  // Load the page
499
+ // (note HTTP status is 0 when `file://` URLs are loaded)
499
500
  if (spec.html) {
500
501
  await page.setContent(spec.html, loadOptions);
501
502
  }
502
503
  else {
503
- await page.goto(spec.url, loadOptions);
504
+ const result = await page.goto(spec.url, loadOptions);
505
+ if ((result.status() !== 200) && (!spec.url.startsWith('file://') || (result.status() !== 0))) {
506
+ throw new Error(`Loading ${spec.url} triggered HTTP status ${result.status()}`);
507
+ }
504
508
  }
505
509
 
506
510
  // Handle multi-page specs
@@ -516,7 +520,11 @@ async function processSpecification(spec, processFunction, args, options) {
516
520
  await subCdp.send('Fetch.enable');
517
521
  subCdp.on('Fetch.requestPaused', interceptRequest(subCdp, subAbort));
518
522
  try {
519
- await subPage.goto(url, loadOptions);
523
+ // (Note HTTP status is 0 when `file://` URLs are loaded)
524
+ const subresult = await subPage.goto(url, loadOptions);
525
+ if ((subresult.status() !== 200) && (!url.startsWith('file://') || (subresult.status() !== 0))) {
526
+ throw new Error(`Loading ${spec.url} triggered HTTP status ${result.status()}`);
527
+ }
520
528
  const html = await subPage.evaluate(() => {
521
529
  return document.body.outerHTML
522
530
  .replace(/<body/, '<section')
@@ -542,6 +550,14 @@ async function processSpecification(spec, processFunction, args, options) {
542
550
 
543
551
  // Wait until the generation of the spec is completely over
544
552
  await page.evaluate(async () => {
553
+ // Detect draft CSS server hiccups as done in browser-specs:
554
+ // https://github.com/w3c/browser-specs/blob/b31fc0b03ba67a19162883afc30e01fcec3c600d/src/fetch-info.js#L292
555
+ const title = (window.document.querySelector('h1')?.textContent || '')
556
+ .replace(/\n/g, '').trim();
557
+ if (title.startsWith('Index of ')) {
558
+ throw new Error(`CSS server issue detected`);
559
+ }
560
+
545
561
  const usesRespec = (window.respecConfig || window.eval('typeof respecConfig !== "undefined"')) &&
546
562
  window.document.head.querySelector("script[src*='respec']");
547
563
 
@@ -694,6 +710,81 @@ function isLatestLevelThatPasses(spec, list, predicate) {
694
710
  }
695
711
 
696
712
 
713
+ /**
714
+ * Takes the results of a crawl for a given spec and expands it to include the
715
+ * contents of referenced files.
716
+ *
717
+ * The function handles both files and HTTPS resources, using either filesystem
718
+ * functions (for files) or fetch (for HTTPS resources).
719
+ *
720
+ * Note the spec object is expanded in place.
721
+ *
722
+ * @function
723
+ * @public
724
+ * @param {Object} spec Spec crawl result that needs to be expanded
725
+ * @param {string} baseFolder The base folder that contains the crawl file, or
726
+ * the base HTTPS URI to resolve relative links in the crawl object.
727
+ * @param {Array(string)} properties An explicit list of properties to expand
728
+ * (no value means "expand all possible properties")
729
+ * @return {Promise(object)} The promise to get an expanded crawl object that
730
+ * contains the contents of referenced files and no longer references external
731
+ * files (for the requested properties)
732
+ */
733
+ async function expandSpecResult(spec, baseFolder, properties) {
734
+ baseFolder = baseFolder || '';
735
+ await Promise.all(Object.keys(spec).map(async property => {
736
+ // Only consider properties explicitly requested
737
+ if (properties && !properties.includes(property)) {
738
+ return;
739
+ }
740
+
741
+ // Only consider properties that link to an extract, i.e. an IDL
742
+ // or JSON file in subfolder.
743
+ if (!spec[property] ||
744
+ (typeof spec[property] !== 'string') ||
745
+ !spec[property].match(/^[^\/]+\/[^\/]+\.(json|idl)$/)) {
746
+ return;
747
+ }
748
+ let contents = null;
749
+ if (baseFolder.startsWith('https:')) {
750
+ const url = (new URL(spec[property], baseFolder)).toString();
751
+ const response = await fetch(url, { nolog: true });
752
+ contents = await response.text();
753
+ }
754
+ else {
755
+ const filename = path.join(baseFolder, spec[property]);
756
+ contents = await fs.readFile(filename, 'utf8');
757
+ }
758
+ if (spec[property].endsWith('.json')) {
759
+ contents = JSON.parse(contents);
760
+ }
761
+ if (property === 'css') {
762
+ // Special case for CSS where the "css" level does not exist
763
+ // in the generated files
764
+ const css = Object.assign({}, contents);
765
+ delete css.spec;
766
+ spec[property] = css;
767
+ }
768
+ else if (property === 'idl') {
769
+ // Special case for raw IDL extracts, which are text extracts.
770
+ // Also drop header that may have been added when extract was
771
+ // serialized.
772
+ if (contents.startsWith('// GENERATED CONTENT - DO NOT EDIT')) {
773
+ const endOfHeader = contents.indexOf('\n\n');
774
+ contents = contents.substring(endOfHeader + 2)
775
+ // remove trailing newline added in saveIdl
776
+ .slice(0, -1);
777
+ }
778
+ spec.idl = contents;
779
+ }
780
+ else {
781
+ spec[property] = contents[property];
782
+ }
783
+ }));
784
+ return spec;
785
+ }
786
+
787
+
697
788
  /**
698
789
  * Takes the results of a crawl (typically the contents of the index.json file)
699
790
  * and expands it to include the contents of all referenced files.
@@ -708,73 +799,16 @@ function isLatestLevelThatPasses(spec, list, predicate) {
708
799
  * @param {Object} crawl Crawl index object that needs to be expanded
709
800
  * @param {string} baseFolder The base folder that contains the crawl file, or
710
801
  * the base HTTPS URI to resolve relative links in the crawl object.
711
- * @param {Array(string)} An explicit list of properties to expand (no value
712
- * means "expand all possible properties")
802
+ * @param {Array(string)} properties An explicit list of properties to expand
803
+ * (no value means "expand all possible properties")
713
804
  * @return {Promise(object)} The promise to get an expanded crawl object that
714
805
  * contains the entire crawl report (and no longer references external files)
715
806
  */
716
807
  async function expandCrawlResult(crawl, baseFolder, properties) {
717
808
  baseFolder = baseFolder || '';
718
-
719
- async function expandSpec(spec) {
720
- await Promise.all(Object.keys(spec).map(async property => {
721
- // Only consider properties explicitly requested
722
- if (properties && !properties.includes(property)) {
723
- return;
724
- }
725
-
726
- // Only consider properties that link to an extract, i.e. an IDL
727
- // or JSON file in subfolder.
728
- if (!spec[property] ||
729
- (typeof spec[property] !== 'string') ||
730
- !spec[property].match(/^[^\/]+\/[^\/]+\.(json|idl)$/)) {
731
- return;
732
- }
733
- let contents = null;
734
- if (baseFolder.startsWith('https:')) {
735
- const url = (new URL(spec[property], baseFolder)).toString();
736
- const response = await fetch(url, { nolog: true });
737
- contents = await response.text();
738
- }
739
- else {
740
- const filename = path.join(baseFolder, spec[property]);
741
- contents = await fs.readFile(filename, 'utf8');
742
- }
743
-
744
- // Force UNIX-style line endings
745
- // (Git may auto-convert LF to CRLF on Windows machines and we
746
- // want to store multiline IDL fragments as values of properties
747
- // in parsed IDL trees)
748
- contents = contents.replace(/\r\n/g, '\n');
749
-
750
- if (spec[property].endsWith('.json')) {
751
- contents = JSON.parse(contents);
752
- }
753
- if (property === 'css') {
754
- // Special case for CSS where the "css" level does not exist
755
- // in the generated files
756
- const css = Object.assign({}, contents);
757
- delete css.spec;
758
- spec[property] = css;
759
- }
760
- else if (property === 'idl') {
761
- // Special case for raw IDL extracts, which are text extracts.
762
- // Also drop header that may have been added when extract was
763
- // serialized.
764
- if (contents.startsWith('// GENERATED CONTENT - DO NOT EDIT')) {
765
- const endOfHeader = contents.indexOf('\n\n');
766
- contents = contents.substring(endOfHeader + 2);
767
- }
768
- spec.idl = contents;
769
- }
770
- else {
771
- spec[property] = contents[property];
772
- }
773
- }));
774
- return spec;
775
- }
776
-
777
- crawl.results = await Promise.all(crawl.results.map(expandSpec));
809
+ crawl.results = await Promise.all(
810
+ crawl.results.map(spec => expandSpecResult(spec, baseFolder, properties))
811
+ );
778
812
  return crawl;
779
813
  }
780
814
 
@@ -860,6 +894,7 @@ module.exports = {
860
894
  completeWithAlternativeUrls,
861
895
  isLatestLevelThatPasses,
862
896
  expandCrawlResult,
897
+ expandSpecResult,
863
898
  getGeneratedIDLNamesByCSSProperty,
864
899
  createFolderIfNeeded
865
900
  };