reffy 6.1.4 → 6.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "reffy",
3
- "version": "6.1.4",
3
+ "version": "6.2.0",
4
4
  "description": "W3C/WHATWG spec dependencies exploration companion. Features a short set of tools to study spec references as well as WebIDL term definitions and references found in W3C specifications.",
5
5
  "repository": {
6
6
  "type": "git",
@@ -32,7 +32,7 @@
32
32
  "bin": "./reffy.js",
33
33
  "dependencies": {
34
34
  "abortcontroller-polyfill": "1.7.3",
35
- "browser-specs": "2.25.0",
35
+ "browser-specs": "2.26.0",
36
36
  "commander": "9.0.0",
37
37
  "fetch-filecache-for-crawling": "4.0.2",
38
38
  "puppeteer": "13.1.3",
@@ -45,7 +45,7 @@
45
45
  "nock": "13.2.2",
46
46
  "respec": "29.0.4",
47
47
  "respec-hljs": "2.1.1",
48
- "rollup": "2.66.1"
48
+ "rollup": "2.67.0"
49
49
  },
50
50
  "scripts": {
51
51
  "test": "mocha --recursive tests/"
package/reffy.js CHANGED
@@ -70,6 +70,7 @@ program
70
70
  .usage('[options]')
71
71
  .description('Crawls and processes a list of Web specifications')
72
72
  .option('-d, --debug', 'debug mode, crawl one spec at a time')
73
+ .option('-f, --fallback <json>', 'fallback data to use when a spec crawl fails')
73
74
  .option('-m, --module <modules...>', 'spec processing modules')
74
75
  .option('-o, --output <folder>', 'existing folder/file where crawl results are to be saved')
75
76
  .option('-q, --quiet', 'do not report progress and other warnings to the console')
@@ -92,6 +93,7 @@ will dump ~100MB of data to the console:
92
93
  }
93
94
  const crawlOptions = {
94
95
  debug: options.debug,
96
+ fallback: options.fallback,
95
97
  output: options.output,
96
98
  publishedVersion: options.release,
97
99
  quiet: options.quiet,
@@ -143,6 +145,16 @@ Description:
143
145
  strongly recommended.
144
146
 
145
147
  Usage notes for some of the options:
148
+ -f, --fallback <jsondata>
149
+ Provides an existing JSON crawl data file to use as a source of fallback data
150
+ for specs that fail to be crawled.
151
+
152
+ The fallback data gets copied as-is. It is the responsibility of the caller
153
+ to make sure that extracts it may link to actually exist and match the ones
154
+ that the crawl would produce in the absence of errors (e.g. same modules).
155
+
156
+ The "error" property is set on specs for which fallback data was used.
157
+
146
158
  -m, --module <modules...>
147
159
  If processing modules are not specified, the crawler runs all core processing
148
160
  modules defined in:
@@ -97,7 +97,15 @@ nock("https://www.w3.org")
97
97
  { "Content-Type": "application/js" })
98
98
  .get("/Tools/respec/respec-w3c").replyWithFile(200,
99
99
  path.join(modulesFolder, "respec", "builds", "respec-w3c.js"),
100
- { "Content-Type": "application/js" });
100
+ { "Content-Type": "application/js" })
101
+ .get("/TR/idontexist/").reply(404, '');
102
+
103
+ nock("https://drafts.csswg.org")
104
+ .persist()
105
+ .get("/server-hiccup/").reply(200,
106
+ `<html><title>Server hiccup</title>
107
+ <h1> Index of Server Hiccup Module Level 42 </h1>`,
108
+ { 'Content-Type': 'text/html' });
101
109
 
102
110
  nock.emitter.on('error', function (err) {
103
111
  console.error(err);
@@ -20,6 +20,7 @@ const {
20
20
  completeWithAlternativeUrls,
21
21
  expandBrowserModules,
22
22
  expandCrawlResult,
23
+ expandSpecResult,
23
24
  getGeneratedIDLNamesByCSSProperty,
24
25
  isLatestLevelThatPasses,
25
26
  processSpecification,
@@ -29,6 +30,36 @@ const {
29
30
  } = require('./util');
30
31
 
31
32
 
33
+ /**
34
+ * Return the spec if crawl succeeded or crawl result from given fallback list
35
+ * if crawl yielded an error (and fallback does exist).
36
+ *
37
+ * The function keeps the "error" property on the crawl result it returns so
38
+ * that the error does not get entirely lost.
39
+ *
40
+ * @function
41
+ * @param {Object} spec Actual spec crawl result
42
+ * * @param {Object} spec Actual spec crawl result
43
+ * @param {String} fallbackFolder The folder that contains fallback extracts
44
+ * @param {Array<Object>} fallbackData A list of crawl results to use as
45
+ * fallback when needed
46
+ * @return {Object} The given crawl result or a new one that reuses fallback
47
+ * content if needed
48
+ */
49
+ async function specOrFallback(spec, fallbackFolder, fallbackData) {
50
+ if (spec.error && fallbackData) {
51
+ const fallback = fallbackData.find(s => s.url === spec.url);
52
+ if (fallback) {
53
+ const copy = Object.assign({}, fallback);
54
+ const result = await expandSpecResult(copy, fallbackFolder);
55
+ result.error = spec.error;
56
+ return result;
57
+ }
58
+ }
59
+ return spec;
60
+ }
61
+
62
+
32
63
  /**
33
64
  * Load and parse the given spec.
34
65
  *
@@ -43,9 +74,11 @@ async function crawlSpec(spec, crawlOptions) {
43
74
  spec.crawled = crawlOptions.publishedVersion ?
44
75
  (spec.release ? spec.release : spec.nightly) :
45
76
  spec.nightly;
77
+ const fallbackFolder = crawlOptions.fallback ?
78
+ path.dirname(crawlOptions.fallback) : '';
46
79
 
47
80
  if (spec.error) {
48
- return spec;
81
+ return specOrFallback(spec, fallbackFolder, crawlOptions.fallbackData);
49
82
  }
50
83
 
51
84
  try {
@@ -150,7 +183,7 @@ async function crawlSpec(spec, crawlOptions) {
150
183
  spec.error = err.toString() + (err.stack ? ' ' + err.stack : '');
151
184
  }
152
185
 
153
- return spec;
186
+ return specOrFallback(spec, fallbackFolder, crawlOptions.fallbackData);
154
187
  }
155
188
 
156
189
 
@@ -308,10 +341,23 @@ async function saveSpecResults(spec, settings) {
308
341
  * specification descriptions
309
342
  */
310
343
  async function crawlList(speclist, crawlOptions) {
311
- crawlOptions = crawlOptions || {};
344
+ // Make a shallow copy of crawl options object since we're going
345
+ // to modify properties in place
346
+ crawlOptions = Object.assign({}, crawlOptions);
312
347
 
313
- // Prepare Puppeteer instance
348
+ // Expand list of processing modules to use if not already done
314
349
  crawlOptions.modules = expandBrowserModules(crawlOptions.modules);
350
+
351
+ // Load fallback data if necessary
352
+ if (crawlOptions.fallback) {
353
+ try {
354
+ crawlOptions.fallbackData = JSON.parse(await fs.promises.readFile(crawlOptions.fallback)).results;
355
+ } catch (e) {
356
+ throw new Error(`Could not parse fallback data file ${crawlOptions.fallback}`);
357
+ }
358
+ }
359
+
360
+ // Prepare Puppeteer instance
315
361
  await setupBrowser(crawlOptions.modules);
316
362
 
317
363
  const list = speclist.map(completeWithAlternativeUrls);
@@ -493,10 +539,15 @@ function crawlSpecs(options) {
493
539
  });
494
540
  }
495
541
 
496
- const requestedList = (options && options.specs) ?
542
+ const requestedList = options?.specs ?
497
543
  prepareListOfSpecs(options.specs) :
498
544
  specs;
499
545
 
546
+ // Make a shallow copy of passed options parameter and expand modules
547
+ // in place.
548
+ options = Object.assign({}, options);
549
+ options.modules = expandBrowserModules(options.modules);
550
+
500
551
  return crawlList(requestedList, options)
501
552
  .then(async results => {
502
553
  // Merge extracts per series when necessary (CSS/IDL extracts)
package/src/lib/util.js CHANGED
@@ -496,11 +496,15 @@ async function processSpecification(spec, processFunction, args, options) {
496
496
  };
497
497
 
498
498
  // Load the page
499
+ // (note HTTP status is 0 when `file://` URLs are loaded)
499
500
  if (spec.html) {
500
501
  await page.setContent(spec.html, loadOptions);
501
502
  }
502
503
  else {
503
- await page.goto(spec.url, loadOptions);
504
+ const result = await page.goto(spec.url, loadOptions);
505
+ if ((result.status() !== 200) && (!spec.url.startsWith('file://') || (result.status() !== 0))) {
506
+ throw new Error(`Loading ${spec.url} triggered HTTP status ${result.status()}`);
507
+ }
504
508
  }
505
509
 
506
510
  // Handle multi-page specs
@@ -516,7 +520,11 @@ async function processSpecification(spec, processFunction, args, options) {
516
520
  await subCdp.send('Fetch.enable');
517
521
  subCdp.on('Fetch.requestPaused', interceptRequest(subCdp, subAbort));
518
522
  try {
519
- await subPage.goto(url, loadOptions);
523
+ // (Note HTTP status is 0 when `file://` URLs are loaded)
524
+ const subresult = await subPage.goto(url, loadOptions);
525
+ if ((subresult.status() !== 200) && (!url.startsWith('file://') || (subresult.status() !== 0))) {
526
+ throw new Error(`Loading ${spec.url} triggered HTTP status ${result.status()}`);
527
+ }
520
528
  const html = await subPage.evaluate(() => {
521
529
  return document.body.outerHTML
522
530
  .replace(/<body/, '<section')
@@ -542,6 +550,14 @@ async function processSpecification(spec, processFunction, args, options) {
542
550
 
543
551
  // Wait until the generation of the spec is completely over
544
552
  await page.evaluate(async () => {
553
+ // Detect draft CSS server hiccups as done in browser-specs:
554
+ // https://github.com/w3c/browser-specs/blob/b31fc0b03ba67a19162883afc30e01fcec3c600d/src/fetch-info.js#L292
555
+ const title = (window.document.querySelector('h1')?.textContent || '')
556
+ .replace(/\n/g, '').trim();
557
+ if (title.startsWith('Index of ')) {
558
+ throw new Error(`CSS server issue detected`);
559
+ }
560
+
545
561
  const usesRespec = (window.respecConfig || window.eval('typeof respecConfig !== "undefined"')) &&
546
562
  window.document.head.querySelector("script[src*='respec']");
547
563
 
@@ -694,6 +710,79 @@ function isLatestLevelThatPasses(spec, list, predicate) {
694
710
  }
695
711
 
696
712
 
713
+ /**
714
+ * Takes the results of a crawl for a given spec and expands it to include the
715
+ * contents of referenced files.
716
+ *
717
+ * The function handles both files and HTTPS resources, using either filesystem
718
+ * functions (for files) or fetch (for HTTPS resources).
719
+ *
720
+ * Note the spec object is expanded in place.
721
+ *
722
+ * @function
723
+ * @public
724
+ * @param {Object} spec Spec crawl result that needs to be expanded
725
+ * @param {string} baseFolder The base folder that contains the crawl file, or
726
+ * the base HTTPS URI to resolve relative links in the crawl object.
727
+ * @param {Array(string)} properties An explicit list of properties to expand
728
+ * (no value means "expand all possible properties")
729
+ * @return {Promise(object)} The promise to get an expanded crawl object that
730
+ * contains the contents of referenced files and no longer references external
731
+ * files (for the requested properties)
732
+ */
733
+ async function expandSpecResult(spec, baseFolder, properties) {
734
+ baseFolder = baseFolder || '';
735
+ await Promise.all(Object.keys(spec).map(async property => {
736
+ // Only consider properties explicitly requested
737
+ if (properties && !properties.includes(property)) {
738
+ return;
739
+ }
740
+
741
+ // Only consider properties that link to an extract, i.e. an IDL
742
+ // or JSON file in subfolder.
743
+ if (!spec[property] ||
744
+ (typeof spec[property] !== 'string') ||
745
+ !spec[property].match(/^[^\/]+\/[^\/]+\.(json|idl)$/)) {
746
+ return;
747
+ }
748
+ let contents = null;
749
+ if (baseFolder.startsWith('https:')) {
750
+ const url = (new URL(spec[property], baseFolder)).toString();
751
+ const response = await fetch(url, { nolog: true });
752
+ contents = await response.text();
753
+ }
754
+ else {
755
+ const filename = path.join(baseFolder, spec[property]);
756
+ contents = await fs.readFile(filename, 'utf8');
757
+ }
758
+ if (spec[property].endsWith('.json')) {
759
+ contents = JSON.parse(contents);
760
+ }
761
+ if (property === 'css') {
762
+ // Special case for CSS where the "css" level does not exist
763
+ // in the generated files
764
+ const css = Object.assign({}, contents);
765
+ delete css.spec;
766
+ spec[property] = css;
767
+ }
768
+ else if (property === 'idl') {
769
+ // Special case for raw IDL extracts, which are text extracts.
770
+ // Also drop header that may have been added when extract was
771
+ // serialized.
772
+ if (contents.startsWith('// GENERATED CONTENT - DO NOT EDIT')) {
773
+ const endOfHeader = contents.indexOf('\n\n');
774
+ contents = contents.substring(endOfHeader + 2);
775
+ }
776
+ spec.idl = contents;
777
+ }
778
+ else {
779
+ spec[property] = contents[property];
780
+ }
781
+ }));
782
+ return spec;
783
+ }
784
+
785
+
697
786
  /**
698
787
  * Takes the results of a crawl (typically the contents of the index.json file)
699
788
  * and expands it to include the contents of all referenced files.
@@ -708,73 +797,16 @@ function isLatestLevelThatPasses(spec, list, predicate) {
708
797
  * @param {Object} crawl Crawl index object that needs to be expanded
709
798
  * @param {string} baseFolder The base folder that contains the crawl file, or
710
799
  * the base HTTPS URI to resolve relative links in the crawl object.
711
- * @param {Array(string)} An explicit list of properties to expand (no value
712
- * means "expand all possible properties")
800
+ * @param {Array(string)} properties An explicit list of properties to expand
801
+ * (no value means "expand all possible properties")
713
802
  * @return {Promise(object)} The promise to get an expanded crawl object that
714
803
  * contains the entire crawl report (and no longer references external files)
715
804
  */
716
805
  async function expandCrawlResult(crawl, baseFolder, properties) {
717
806
  baseFolder = baseFolder || '';
718
-
719
- async function expandSpec(spec) {
720
- await Promise.all(Object.keys(spec).map(async property => {
721
- // Only consider properties explicitly requested
722
- if (properties && !properties.includes(property)) {
723
- return;
724
- }
725
-
726
- // Only consider properties that link to an extract, i.e. an IDL
727
- // or JSON file in subfolder.
728
- if (!spec[property] ||
729
- (typeof spec[property] !== 'string') ||
730
- !spec[property].match(/^[^\/]+\/[^\/]+\.(json|idl)$/)) {
731
- return;
732
- }
733
- let contents = null;
734
- if (baseFolder.startsWith('https:')) {
735
- const url = (new URL(spec[property], baseFolder)).toString();
736
- const response = await fetch(url, { nolog: true });
737
- contents = await response.text();
738
- }
739
- else {
740
- const filename = path.join(baseFolder, spec[property]);
741
- contents = await fs.readFile(filename, 'utf8');
742
- }
743
-
744
- // Force UNIX-style line endings
745
- // (Git may auto-convert LF to CRLF on Windows machines and we
746
- // want to store multiline IDL fragments as values of properties
747
- // in parsed IDL trees)
748
- contents = contents.replace(/\r\n/g, '\n');
749
-
750
- if (spec[property].endsWith('.json')) {
751
- contents = JSON.parse(contents);
752
- }
753
- if (property === 'css') {
754
- // Special case for CSS where the "css" level does not exist
755
- // in the generated files
756
- const css = Object.assign({}, contents);
757
- delete css.spec;
758
- spec[property] = css;
759
- }
760
- else if (property === 'idl') {
761
- // Special case for raw IDL extracts, which are text extracts.
762
- // Also drop header that may have been added when extract was
763
- // serialized.
764
- if (contents.startsWith('// GENERATED CONTENT - DO NOT EDIT')) {
765
- const endOfHeader = contents.indexOf('\n\n');
766
- contents = contents.substring(endOfHeader + 2);
767
- }
768
- spec.idl = contents;
769
- }
770
- else {
771
- spec[property] = contents[property];
772
- }
773
- }));
774
- return spec;
775
- }
776
-
777
- crawl.results = await Promise.all(crawl.results.map(expandSpec));
807
+ crawl.results = await Promise.all(
808
+ crawl.results.map(spec => expandSpecResult(spec, baseFolder, properties))
809
+ );
778
810
  return crawl;
779
811
  }
780
812
 
@@ -860,6 +892,7 @@ module.exports = {
860
892
  completeWithAlternativeUrls,
861
893
  isLatestLevelThatPasses,
862
894
  expandCrawlResult,
895
+ expandSpecResult,
863
896
  getGeneratedIDLNamesByCSSProperty,
864
897
  createFolderIfNeeded
865
898
  };