npm - reffy - Versions diffs - 6.1.4 → 6.2.0 - Mend

reffy 6.1.4 → 6.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "reffy",
-  "version": "6.1.4",
+  "version": "6.2.0",
   "description": "W3C/WHATWG spec dependencies exploration companion. Features a short set of tools to study spec references as well as WebIDL term definitions and references found in W3C specifications.",
   "repository": {
     "type": "git",
@@ -32,7 +32,7 @@
   "bin": "./reffy.js",
   "dependencies": {
     "abortcontroller-polyfill": "1.7.3",
-    "browser-specs": "2.25.0",
+    "browser-specs": "2.26.0",
     "commander": "9.0.0",
     "fetch-filecache-for-crawling": "4.0.2",
     "puppeteer": "13.1.3",
@@ -45,7 +45,7 @@
     "nock": "13.2.2",
     "respec": "29.0.4",
     "respec-hljs": "2.1.1",
-    "rollup": "2.66.1"
+    "rollup": "2.67.0"
   },
   "scripts": {
     "test": "mocha --recursive tests/"

package/reffy.js CHANGED Viewed

@@ -70,6 +70,7 @@ program
     .usage('[options]')
     .description('Crawls and processes a list of Web specifications')
     .option('-d, --debug', 'debug mode, crawl one spec at a time')
+    .option('-f, --fallback <json>', 'fallback data to use when a spec crawl fails')
     .option('-m, --module <modules...>', 'spec processing modules')
     .option('-o, --output <folder>', 'existing folder/file where crawl results are to be saved')
     .option('-q, --quiet', 'do not report progress and other warnings to the console')
@@ -92,6 +93,7 @@ will dump ~100MB of data to the console:
         }
         const crawlOptions = {
             debug: options.debug,
+            fallback: options.fallback,
             output: options.output,
             publishedVersion: options.release,
             quiet: options.quiet,
@@ -143,6 +145,16 @@ Description:
   strongly recommended.
 Usage notes for some of the options:
+-f, --fallback <jsondata>
+  Provides an existing JSON crawl data file to use as a source of fallback data
+  for specs that fail to be crawled.
+  The fallback data gets copied as-is. It is the responsibility of the caller
+  to make sure that extracts it may link to actually exist and match the ones
+  that the crawl would produce in the absence of errors (e.g. same modules).
+  The "error" property is set on specs for which fallback data was used.
 -m, --module <modules...>
   If processing modules are not specified, the crawler runs all core processing
   modules defined in:

package/src/lib/nock-server.js CHANGED Viewed

@@ -97,7 +97,15 @@ nock("https://www.w3.org")
     { "Content-Type": "application/js" })
   .get("/Tools/respec/respec-w3c").replyWithFile(200,
     path.join(modulesFolder, "respec", "builds", "respec-w3c.js"),
-    { "Content-Type": "application/js" });
+    { "Content-Type": "application/js" })
+  .get("/TR/idontexist/").reply(404, '');
+nock("https://drafts.csswg.org")
+  .persist()
+  .get("/server-hiccup/").reply(200,
+    `<html><title>Server hiccup</title>
+    <h1> Index of Server Hiccup Module Level 42 </h1>`,
+    { 'Content-Type': 'text/html' });
 nock.emitter.on('error', function (err) {
   console.error(err);

package/src/lib/specs-crawler.js CHANGED Viewed

@@ -20,6 +20,7 @@ const {
     completeWithAlternativeUrls,
     expandBrowserModules,
     expandCrawlResult,
+    expandSpecResult,
     getGeneratedIDLNamesByCSSProperty,
     isLatestLevelThatPasses,
     processSpecification,
@@ -29,6 +30,36 @@ const {
 } = require('./util');
+/**
+ * Return the spec if crawl succeeded or crawl result from given fallback list
+ * if crawl yielded an error (and fallback does exist).
+ *
+ * The function keeps the "error" property on the crawl result it returns so
+ * that the error does not get entirely lost.
+ *
+ * @function
+ * @param {Object} spec Actual spec crawl result
+ * * @param {Object} spec Actual spec crawl result
+ * @param {String} fallbackFolder The folder that contains fallback extracts
+ * @param {Array<Object>} fallbackData A list of crawl results to use as
+ *   fallback when needed
+ * @return {Object} The given crawl result or a new one that reuses fallback
+ *   content if needed
+ */
+async function specOrFallback(spec, fallbackFolder, fallbackData) {
+    if (spec.error && fallbackData) {
+        const fallback = fallbackData.find(s => s.url === spec.url);
+        if (fallback) {
+            const copy = Object.assign({}, fallback);
+            const result = await expandSpecResult(copy, fallbackFolder);
+            result.error = spec.error;
+            return result;
+        }
+    }
+    return spec;
+}
 /**
  * Load and parse the given spec.
  *
@@ -43,9 +74,11 @@ async function crawlSpec(spec, crawlOptions) {
     spec.crawled = crawlOptions.publishedVersion ?
         (spec.release ? spec.release : spec.nightly) :
         spec.nightly;
+    const fallbackFolder = crawlOptions.fallback ?
+        path.dirname(crawlOptions.fallback) : '';
     if (spec.error) {
-        return spec;
+        return specOrFallback(spec, fallbackFolder, crawlOptions.fallbackData);
     }
     try {
@@ -150,7 +183,7 @@ async function crawlSpec(spec, crawlOptions) {
         spec.error = err.toString() + (err.stack ? ' ' + err.stack : '');
     }
-    return spec;
+    return specOrFallback(spec, fallbackFolder, crawlOptions.fallbackData);
 }
@@ -308,10 +341,23 @@ async function saveSpecResults(spec, settings) {
  *   specification descriptions
  */
 async function crawlList(speclist, crawlOptions) {
-    crawlOptions = crawlOptions || {};
+    // Make a shallow copy of crawl options object since we're going
+    // to modify properties in place
+    crawlOptions = Object.assign({}, crawlOptions);
-    // Prepare Puppeteer instance
+    // Expand list of processing modules to use if not already done
     crawlOptions.modules = expandBrowserModules(crawlOptions.modules);
+    // Load fallback data if necessary
+    if (crawlOptions.fallback) {
+        try {
+            crawlOptions.fallbackData = JSON.parse(await fs.promises.readFile(crawlOptions.fallback)).results;
+        } catch (e) {
+            throw new Error(`Could not parse fallback data file ${crawlOptions.fallback}`);
+        }
+    }
+    // Prepare Puppeteer instance
     await setupBrowser(crawlOptions.modules);
     const list = speclist.map(completeWithAlternativeUrls);
@@ -493,10 +539,15 @@ function crawlSpecs(options) {
         });
     }
-    const requestedList = (options && options.specs) ?
+    const requestedList = options?.specs ?
         prepareListOfSpecs(options.specs) :
         specs;
+    // Make a shallow copy of passed options parameter and expand modules
+    // in place.
+    options = Object.assign({}, options);
+    options.modules = expandBrowserModules(options.modules);
     return crawlList(requestedList, options)
         .then(async results => {
             // Merge extracts per series when necessary (CSS/IDL extracts)

package/src/lib/util.js CHANGED Viewed

@@ -496,11 +496,15 @@ async function processSpecification(spec, processFunction, args, options) {
         };
         // Load the page
+        // (note HTTP status is 0 when `file://` URLs are loaded)
         if (spec.html) {
             await page.setContent(spec.html, loadOptions);
         }
         else {
-            await page.goto(spec.url, loadOptions);
+            const result = await page.goto(spec.url, loadOptions);
+            if ((result.status() !== 200) && (!spec.url.startsWith('file://') || (result.status() !== 0))) {
+                throw new Error(`Loading ${spec.url} triggered HTTP status ${result.status()}`);
+            }
         }
         // Handle multi-page specs
@@ -516,7 +520,11 @@ async function processSpecification(spec, processFunction, args, options) {
                 await subCdp.send('Fetch.enable');
                 subCdp.on('Fetch.requestPaused', interceptRequest(subCdp, subAbort));
                 try {
-                    await subPage.goto(url, loadOptions);
+                    // (Note HTTP status is 0 when `file://` URLs are loaded)
+                    const subresult = await subPage.goto(url, loadOptions);
+                    if ((subresult.status() !== 200) && (!url.startsWith('file://') || (subresult.status() !== 0))) {
+                        throw new Error(`Loading ${spec.url} triggered HTTP status ${result.status()}`);
+                    }
                     const html = await subPage.evaluate(() => {
                         return document.body.outerHTML
                             .replace(/<body/, '<section')
@@ -542,6 +550,14 @@ async function processSpecification(spec, processFunction, args, options) {
         // Wait until the generation of the spec is completely over
         await page.evaluate(async () => {
+            // Detect draft CSS server hiccups as done in browser-specs:
+            // https://github.com/w3c/browser-specs/blob/b31fc0b03ba67a19162883afc30e01fcec3c600d/src/fetch-info.js#L292
+            const title = (window.document.querySelector('h1')?.textContent || '')
+                .replace(/\n/g, '').trim();
+            if (title.startsWith('Index of ')) {
+                throw new Error(`CSS server issue detected`);
+            }
             const usesRespec = (window.respecConfig || window.eval('typeof respecConfig !== "undefined"')) &&
                 window.document.head.querySelector("script[src*='respec']");
@@ -694,6 +710,79 @@ function isLatestLevelThatPasses(spec, list, predicate) {
 }
+/**
+ * Takes the results of a crawl for a given spec and expands it to include the
+ * contents of referenced files.
+ *
+ * The function handles both files and HTTPS resources, using either filesystem
+ * functions (for files) or fetch (for HTTPS resources).
+ *
+ * Note the spec object is expanded in place.
+ *
+ * @function
+ * @public
+ * @param {Object} spec Spec crawl result that needs to be expanded
+ * @param {string} baseFolder The base folder that contains the crawl file, or
+ *   the base HTTPS URI to resolve relative links in the crawl object.
+ * @param {Array(string)} properties An explicit list of properties to expand
+ *   (no value means "expand all possible properties")
+ * @return {Promise(object)} The promise to get an expanded crawl object that
+ *   contains the contents of referenced files and no longer references external
+ *   files (for the requested properties)
+ */
+async function expandSpecResult(spec, baseFolder, properties) {
+    baseFolder = baseFolder || '';
+    await Promise.all(Object.keys(spec).map(async property => {
+        // Only consider properties explicitly requested
+        if (properties && !properties.includes(property)) {
+            return;
+        }
+        // Only consider properties that link to an extract, i.e. an IDL
+        // or JSON file in subfolder.
+        if (!spec[property] ||
+                (typeof spec[property] !== 'string') ||
+                !spec[property].match(/^[^\/]+\/[^\/]+\.(json|idl)$/)) {
+            return;
+        }
+        let contents = null;
+        if (baseFolder.startsWith('https:')) {
+            const url = (new URL(spec[property], baseFolder)).toString();
+            const response = await fetch(url, { nolog: true });
+            contents = await response.text();
+        }
+        else {
+            const filename = path.join(baseFolder, spec[property]);
+            contents = await fs.readFile(filename, 'utf8');
+        }
+        if (spec[property].endsWith('.json')) {
+            contents = JSON.parse(contents);
+        }
+        if (property === 'css') {
+            // Special case for CSS where the "css" level does not exist
+            // in the generated files
+            const css = Object.assign({}, contents);
+            delete css.spec;
+            spec[property] = css;
+        }
+        else if (property === 'idl') {
+            // Special case for raw IDL extracts, which are text extracts.
+            // Also drop header that may have been added when extract was
+            // serialized.
+            if (contents.startsWith('// GENERATED CONTENT - DO NOT EDIT')) {
+                const endOfHeader = contents.indexOf('\n\n');
+                contents = contents.substring(endOfHeader + 2);
+            }
+            spec.idl = contents;
+        }
+        else {
+            spec[property] = contents[property];
+        }
+    }));
+    return spec;
+}
 /**
  * Takes the results of a crawl (typically the contents of the index.json file)
  * and expands it to include the contents of all referenced files.
@@ -708,73 +797,16 @@ function isLatestLevelThatPasses(spec, list, predicate) {
  * @param {Object} crawl Crawl index object that needs to be expanded
  * @param {string} baseFolder The base folder that contains the crawl file, or
  *   the base HTTPS URI to resolve relative links in the crawl object.
- * @param {Array(string)} An explicit list of properties to expand (no value
- *   means "expand all possible properties")
+ * @param {Array(string)} properties An explicit list of properties to expand
+ *   (no value means "expand all possible properties")
  * @return {Promise(object)} The promise to get an expanded crawl object that
  *   contains the entire crawl report (and no longer references external files)
  */
 async function expandCrawlResult(crawl, baseFolder, properties) {
     baseFolder = baseFolder || '';
-    async function expandSpec(spec) {
-        await Promise.all(Object.keys(spec).map(async property => {
-            // Only consider properties explicitly requested
-            if (properties && !properties.includes(property)) {
-                return;
-            }
-            // Only consider properties that link to an extract, i.e. an IDL
-            // or JSON file in subfolder.
-            if (!spec[property] ||
-                    (typeof spec[property] !== 'string') ||
-                    !spec[property].match(/^[^\/]+\/[^\/]+\.(json|idl)$/)) {
-                return;
-            }
-            let contents = null;
-            if (baseFolder.startsWith('https:')) {
-                const url = (new URL(spec[property], baseFolder)).toString();
-                const response = await fetch(url, { nolog: true });
-                contents = await response.text();
-            }
-            else {
-                const filename = path.join(baseFolder, spec[property]);
-                contents = await fs.readFile(filename, 'utf8');
-            }
-            // Force UNIX-style line endings
-            // (Git may auto-convert LF to CRLF on Windows machines and we
-            // want to store multiline IDL fragments as values of properties
-            // in parsed IDL trees)
-            contents = contents.replace(/\r\n/g, '\n');
-            if (spec[property].endsWith('.json')) {
-                contents = JSON.parse(contents);
-            }
-            if (property === 'css') {
-                // Special case for CSS where the "css" level does not exist
-                // in the generated files
-                const css = Object.assign({}, contents);
-                delete css.spec;
-                spec[property] = css;
-            }
-            else if (property === 'idl') {
-                // Special case for raw IDL extracts, which are text extracts.
-                // Also drop header that may have been added when extract was
-                // serialized.
-                if (contents.startsWith('// GENERATED CONTENT - DO NOT EDIT')) {
-                    const endOfHeader = contents.indexOf('\n\n');
-                    contents = contents.substring(endOfHeader + 2);
-                }
-                spec.idl = contents;
-            }
-            else {
-                spec[property] = contents[property];
-            }
-        }));
-        return spec;
-    }
-    crawl.results = await Promise.all(crawl.results.map(expandSpec));
+    crawl.results = await Promise.all(
+        crawl.results.map(spec => expandSpecResult(spec, baseFolder, properties))
+    );
     return crawl;
 }
@@ -860,6 +892,7 @@ module.exports = {
     completeWithAlternativeUrls,
     isLatestLevelThatPasses,
     expandCrawlResult,
+    expandSpecResult,
     getGeneratedIDLNamesByCSSProperty,
     createFolderIfNeeded
 };