npm - reffy - Versions diffs - 5.2.0 → 6.0.0 - Mend

reffy 5.2.0 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md +4 -0
package/package.json +6 -6
package/src/cli/check-missing-dfns.js +1 -1
package/src/cli/generate-idlnames.js +7 -18
package/src/cli/generate-idlparsed.js +139 -0
package/src/lib/nock-server.js +40 -23
package/src/lib/specs-crawler.js +18 -37
package/src/lib/util.js +76 -42

package/README.md CHANGED Viewed

@@ -93,6 +93,10 @@ To create the WebIDL extract in the first place, you will need to run the `idl`
 reffy --spec fetch --module idl > fetch.idl
 ```
+### Parsed WebIDL generator
+The **Parsed WebIDL generator** takes the results of a crawl as input and applies the WebIDL parser to all specs it contains to create JSON extracts in an `idlparsed` folder. To run the generator: `node src/cli/generate-idlparsed.js [crawl folder] [save folder]`
 ### WebIDL names generator

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "reffy",
-  "version": "5.2.0",
+  "version": "6.0.0",
   "description": "W3C/WHATWG spec dependencies exploration companion. Features a short set of tools to study spec references as well as WebIDL term definitions and references found in W3C specifications.",
   "repository": {
     "type": "git",
@@ -32,20 +32,20 @@
   "bin": "./reffy.js",
   "dependencies": {
     "abortcontroller-polyfill": "1.7.3",
-    "browser-specs": "2.16.0",
+    "browser-specs": "2.21.0",
     "commander": "8.3.0",
     "fetch-filecache-for-crawling": "4.0.2",
-    "puppeteer": "11.0.0",
+    "puppeteer": "13.0.1",
     "semver": "^7.3.5",
     "webidl2": "24.2.0"
   },
   "devDependencies": {
     "chai": "4.3.4",
     "mocha": "9.1.3",
-    "nock": "13.2.0",
-    "respec": "28.0.0",
+    "nock": "13.2.1",
+    "respec": "28.1.0",
     "respec-hljs": "2.1.1",
-    "rollup": "2.59.0"
+    "rollup": "2.63.0"
   },
   "scripts": {
     "test": "mocha --recursive tests/"

package/src/cli/check-missing-dfns.js CHANGED Viewed

@@ -399,7 +399,7 @@ function checkSpecDefinitions(spec, options = {}) {
     (spec.css || {});
   const idl = (typeof spec.idlparsed === "string") ?
     require(path.resolve(options.rootFolder, spec.idlparsed)).idlparsed :
-    spec.idl;
+    spec.idlparsed;
   // Make sure that all expected CSS definitions exist in the dfns extract
   const expectedCSSDfns = getExpectedDfnsFromCSS(css);

package/src/cli/generate-idlnames.js CHANGED Viewed

@@ -28,7 +28,8 @@ const { matchIdlDfn, getExpectedDfnFromIdlDesc } = require('./check-missing-dfns
 const {
   expandCrawlResult,
   isLatestLevelThatPasses,
-  requireFromWorkingDirectory
+  requireFromWorkingDirectory,
+  createFolderIfNeeded
 } = require('../lib/util');
@@ -118,7 +119,7 @@ function generateIdlNames(results, options = {}) {
   const names = {};
   function defineIDLContent(spec) {
-    return spec.idl && (spec.idl.idlNames || spec.idl.idlExtendedNames);
+    return spec.idlparsed?.idlNames || spec.idlparsed?.idlExtendedNames;
   }
   // Only keep latest version of specs and delta specs that define some IDL
@@ -129,10 +130,10 @@ function generateIdlNames(results, options = {}) {
   // Add main definitions of all IDL names
   // (using the latest version of a spec that defines some IDL)
   results.forEach(spec => {
-    if (!spec.idl || !spec.idl.idlNames) {
+    if (!spec.idlparsed.idlNames) {
       return;
     }
-    Object.entries(spec.idl.idlNames).forEach(([name, idl]) => {
+    Object.entries(spec.idlparsed.idlNames).forEach(([name, idl]) => {
       const desc = Object.assign(specInfo(spec), { fragment: idl.fragment });
       fragments[idl.fragment] = idl;
@@ -157,10 +158,10 @@ function generateIdlNames(results, options = {}) {
   // Add definitions that extend base definitions
   results.forEach(spec => {
-    if (!spec.idl || !spec.idl.idlExtendedNames) {
+    if (!spec.idlparsed.idlExtendedNames) {
       return;
     }
-    Object.entries(spec.idl.idlExtendedNames).forEach(([name, extensions]) =>
+    Object.entries(spec.idlparsed.idlExtendedNames).forEach(([name, extensions]) =>
       extensions.forEach(idl => {
         const desc = Object.assign(specInfo(spec), { fragment: idl.fragment });
         fragments[idl.fragment] = idl;
@@ -308,18 +309,6 @@ async function generateIdlNamesFromPath(crawlPath, options = {}) {
 }
-async function createFolderIfNeeded(name) {
-  try {
-    await fs.promises.mkdir(name);
-  }
-  catch (err) {
-    if (err.code !== 'EEXIST') {
-      throw err;
-    }
-  }
-}
 /**
  * Save IDL names to individual JSON files in the given folder
  *

package/src/cli/generate-idlparsed.js ADDED Viewed

@@ -0,0 +1,139 @@
+#!/usr/bin/env node
+/**
+ * The parsed IDL generator takes a crawl report or a single spec as input, and
+ * generates (or re-generates if it already exists) a parsed IDL structure from
+ * the raw IDL that the spec defines. Result is dumped to the console or saved
+ * to the given folder.
+ *
+ * The parsed IDL generator is used by the crawler to create and save the parsed
+ * IDL structures. It is also useful to re-generated the parsed IDL info when
+ * an IDL patch has been applied to the raw IDL.
+ *
+ * The parsed IDL generator can be called directly through:
+ *
+ * `node generate-idlparsed.js [crawl report] [save folder]`
+ *
+ * where `crawl report` is the path to the folder that contains the
+ * `index.json` file and all other crawl results produced by specs-crawler.js,
+ * and `save folder` is an optional folder (which must exist) where IDL
+ * name extracts are to be saved. In the absence of this parameter, the report
+ * is written to the console.
+ *
+ * When a folder is provided, the IDL name extracts are saved as a JSON
+ * structure in an `idlparsed` subfolder.
+ */
+const fs = require('fs');
+const path = require('path');
+const webidlParser = require('../cli/parse-webidl');
+const {
+  expandCrawlResult,
+  requireFromWorkingDirectory,
+  createFolderIfNeeded
+} = require('../lib/util');
+/**
+ * Update the spec object in place with parsed IDL information.
+ *
+ * @function
+ * @public
+ * @param {Object} spec The spec object to update. The function looks for the
+ *   raw IDL in the `idl` property.
+ * @return {Object} The updated spec with an `idl` property that contains the
+ *   parsed version of the IDL, and the raw IDL moved under the `idl.idl`
+ *   sub-property. Note the spec object is updated in place.
+ */
+async function generateIdlParsed(spec) {
+  if (!spec?.idl) {
+    return spec;
+  }
+  try {
+    spec.idlparsed = await webidlParser.parse(spec.idl);
+    spec.idlparsed.hasObsoleteIdl = webidlParser.hasObsoleteIdl(spec.idl);
+  }
+  catch (err) {
+    // IDL content is invalid and cannot be parsed.
+    // Let's return the error, along with the raw IDL
+    // content so that it may be saved to a file.
+    spec.idlparsed = err;
+  }
+  return spec;
+}
+async function generateIdlParsedFromPath(crawlPath) {
+  const crawlIndex = requireFromWorkingDirectory(path.resolve(crawlPath, 'index.json'));
+  const crawlResults = await expandCrawlResult(crawlIndex, crawlPath, ['idl']);
+  await Promise.all(crawlResults.results.map(generateIdlParsed));
+  return crawlResults;
+}
+/**
+ * Generate the `idlparsed` export for the spec.
+ *
+ * Note that the raw IDL (under `spec.idl.idl`) gets deleted in the process.
+ *
+ * @function
+ * @public
+ * @param {Object} spec Spec object with the parsed IDL
+ * @param {String} folder Path to root folder where `idlparsed` folder needs to
+ *   appear.
+ * @return {String} The relative path from the root folder to the generated file
+ */
+async function saveIdlParsed(spec, folder) {
+  function specInfo(spec) {
+    return {
+      spec: {
+        title: spec.title,
+        url: spec.crawled
+      }
+    };
+  }
+  const subfolder = path.join(folder, 'idlparsed');
+  await createFolderIfNeeded(subfolder);
+  if (!spec?.idlparsed) {
+    return;
+  }
+  const json = JSON.stringify(
+    Object.assign(specInfo(spec), { idlparsed: spec.idlparsed }),
+    null, 2);
+  const filename = path.join(subfolder, spec.shortname + '.json');
+  await fs.promises.writeFile(filename, json);
+  return `idlparsed/${spec.shortname}.json`;
+}
+/**************************************************
+Export methods for use as module
+**************************************************/
+module.exports.generateIdlParsed = generateIdlParsed;
+module.exports.saveIdlParsed = saveIdlParsed;
+/**************************************************
+Code run if the code is run as a stand-alone module
+**************************************************/
+if (require.main === module) {
+  const crawlPath = process.argv[2];
+  if (!crawlPath) {
+    console.error('Required path to crawl results folder is missing');
+    process.exit(2);
+  }
+  const savePath = process.argv[3];
+  generateIdlParsedFromPath(crawlPath)
+    .then(report => {
+      if (savePath) {
+        return Promise.all(report.results.map(
+          spec => saveIdlParsed(spec, savePath)));
+      }
+      else {
+        console.log(JSON.stringify(report, null, 2));
+      }
+    });
+}

package/src/lib/nock-server.js CHANGED Viewed

@@ -9,7 +9,6 @@ const nock = require("nock");
 const path = require("path");
 const { existsSync } = require('fs');
 /**
  * Determine the path to the "node_modules" folder. The path depends on whether
  * Reffy is run directly, or installed as a library.
@@ -30,14 +29,29 @@ const modulesFolder = getModulesFolder();
 const mockSpecs = {
   "/woff/woff2/": {
-    html: `<title>WOFF2</title><body><dfn id='foo'>Foo</dfn><a href="https://www.w3.org/TR/bar/#baz">bar</a><ul class='toc'><li><a href='page.html'>page</a></ul>`,
+    html: `
+      <title>WOFF2</title>
+      <body>
+        <dfn id='foo'>Foo</dfn>
+        <a href="https://www.w3.org/TR/bar/#baz">bar</a>
+        <ul class='toc'><li><a href='page.html'>page</a></ul>`,
     pages: {
       "page.html": `<h2 id='bar'>Heading in subpage</h2>`
     }
   },
-  "/mediacapture-output/": `<script>respecConfig = { shortName: 'test' };</script><script src='https://www.w3.org/Tools/respec/respec-w3c'></script><div id=abstract></div><pre class='idl'>[Exposed=Window] interface Foo { attribute DOMString bar; };</pre>`,
-  "/accelerometer/": `<html><h2>Normative references</h2><dl><dt>FOO</dt><dd><a href='https://www.w3.org/TR/Foo'>Foo</a></dd></dl>`,
-  "/pointerlock/": `<html><h1>Pointer Lock 2.0`
+  "/mediacapture-output/": `
+    <script>respecConfig = { shortName: 'test' };</script>
+    <script src='https://www.w3.org/Tools/respec/respec-w3c'></script>
+    <div id=abstract></div>
+    <pre class='idl'>[Exposed=Window] interface Foo { attribute DOMString bar; };</pre>`,
+  "/accelerometer/": `<html>
+    <h2>Normative references</h2>
+    <dl>
+      <dt>FOO</dt>
+      <dd><a href='https://www.w3.org/TR/Foo'>Foo</a></dd>
+    </dl>`,
+  "/pointerlock/": `<html>
+    <h1>Pointer Lock 2.0`
 };
 nock.disableNetConnect();
@@ -48,47 +62,50 @@ Object.keys(mockSpecs).forEach(path => {
   nock("https://w3c.github.io")
     .persist()
     .get(path)
-    .reply(200, typeof mockSpecs[path] === "string" ? mockSpecs[path] : mockSpecs[path].html, {'Content-Type': 'text/html'});
+    .reply(200,
+      typeof mockSpecs[path] === "string" ? mockSpecs[path] : mockSpecs[path].html,
+      { 'Content-Type': 'text/html' }
+    );
   Object.keys(mockSpecs[path].pages || {}).forEach(page => {
     nock("https://w3c.github.io")
       .persist()
       .get(path + page)
-      .reply(200, mockSpecs[path].pages[page], {'Content-Type': 'text/html'});
+      .reply(200,
+        mockSpecs[path].pages[page],
+        { 'Content-Type': 'text/html' });
   });
 });
-// Handling requests generated by ReSpec document
-nock("https://respec.org")
-  .persist()
-  .options("/xref/").reply(204, '', {"Access-Control-Allow-Methods": "POST,GET",
-                                     "Access-Control-Allow-Origin": "*"}).
-  post("/xref/").reply(200, {"result":[["cc15613180c92a877452c092012792b9572ad189",[{"shortname":"webidl","spec":"webidl","type":"extended-attribute","normative":true,"uri":"#Exposed"}]],["a28dcf4738f5492eb05f1fd8a27b8ce0ae124d21",[{"shortname":"webidl","spec":"webidl","type":"interface","normative":true,"uri":"#idl-DOMString"}]],["2eb09984ad7f314b43fefeb75a6feedb049ad595",[]]]});
+// Handling requests generated by ReSpec documents
 nock("https://api.specref.org")
   .persist()
-  .get("/bibrefs?refs=webidl,html").reply(200, {webidl:{href:"https://webidl.spec.whatwg.org/"}}, {"Access-Control-Allow-Origin": "*"})
-  .get("/bibrefs?refs=HTML").reply(200, {HTML:{href:"https://html.spec.whatwg.org/multipage/"}}, {"Access-Control-Allow-Origin": "*"});
+  .get("/bibrefs?refs=webidl,html").reply(200,
+    { webidl: { href: "https://webidl.spec.whatwg.org/" } },
+    { "Access-Control-Allow-Origin": "*" }
+  );
 nock("https://www.w3.org")
   .persist()
   .get("/scripts/TR/2021/fixup.js").reply(200, '')
   .get("/StyleSheets/TR/2021/logos/W3C").reply(200, '')
   .get("/StyleSheets/TR/2021/base.css").reply(200, '')
-  .get("/Tools/respec/respec-highlight").replyWithFile(200, path.join(modulesFolder, "respec-hljs", "dist", "respec-highlight.js"), {"Content-Type": "application/js"})
-  .get("/Tools/respec/respec-w3c").replyWithFile(200, path.join(modulesFolder, "respec", "builds", "respec-w3c.js"), {"Content-Type": "application/js"});
+  .get("/Tools/respec/respec-highlight").replyWithFile(200,
+    path.join(modulesFolder, "respec-hljs", "dist", "respec-highlight.js"),
+    { "Content-Type": "application/js" })
+  .get("/Tools/respec/respec-w3c").replyWithFile(200,
+    path.join(modulesFolder, "respec", "builds", "respec-w3c.js"),
+    { "Content-Type": "application/js" });
-nock.emitter.on('error', function(err) {
-  throw(err);
+nock.emitter.on('error', function (err) {
+  console.error(err);
 });
 nock.emitter.on('no match', function(req, options, requestBody) {
   // 127.0.0.1 is used by the devtool protocol, we ignore it
   if (req && req.hostname !== '127.0.0.1') {
-    const error = new Error("No match for nock request on " + (options ? options.href : req.href));
-    throw(error);
+    console.error("No match for nock request on " + (options ? options.href : req.href));
   }
 });

package/src/lib/specs-crawler.js CHANGED Viewed

@@ -13,8 +13,8 @@
 const fs = require('fs');
 const path = require('path');
 const specs = require('browser-specs');
-const webidlParser = require('../cli/parse-webidl');
 const cssDfnParser = require('./css-grammar-parser');
+const { generateIdlParsed, saveIdlParsed } = require('../cli/generate-idlparsed');
 const { generateIdlNames, saveIdlNames } = require('../cli/generate-idlnames');
 const {
     completeWithAlternativeUrls,
@@ -24,7 +24,8 @@ const {
     isLatestLevelThatPasses,
     processSpecification,
     setupBrowser,
-    teardownBrowser
+    teardownBrowser,
+    createFolderIfNeeded
 } = require('./util');
@@ -62,26 +63,13 @@ async function crawlSpec(spec, crawlOptions) {
                 return res;
             },
             [spec, crawlOptions.modules],
-            { quiet: crawlOptions.quiet }
+            { quiet: crawlOptions.quiet,
+              forceLocalFetch: crawlOptions.forceLocalFetch }
         );
         // Specific rule for IDL extracts:
         // parse the extracted WebIdl content
-        if (result.idl !== undefined) {
-            try {
-                const parsedIdl = await webidlParser.parse(result.idl);
-                parsedIdl.hasObsoleteIdl = webidlParser.hasObsoleteIdl(result.idl);
-                parsedIdl.idl = result.idl;
-                result.idl = parsedIdl;
-            }
-            catch (err) {
-                // IDL content is invalid and cannot be parsed.
-                // Let's return the error, along with the raw IDL
-                // content so that it may be saved to a file.
-                err.idl = result.idl;
-                result.idl = err;
-            }
-        }
+        await generateIdlParsed(result);
         if (result.css) {
             // Specific rule for CSS properties:
@@ -151,6 +139,9 @@ async function crawlSpec(spec, crawlOptions) {
         crawlOptions.modules.forEach(mod => {
             if (result[mod.property]) {
                 spec[mod.property] = result[mod.property];
+                if (mod.property === 'idl') {
+                    spec.idlparsed = result.idlparsed;
+                }
             }
         });
     }
@@ -186,14 +177,7 @@ async function saveSpecResults(spec, settings) {
     async function getSubfolder(name) {
         let subfolder = path.join(settings.output, name);
-        try {
-            await fs.promises.mkdir(subfolder);
-        }
-        catch (err) {
-            if (err.code !== 'EEXIST') {
-                throw err;
-            }
-        }
+        await createFolderIfNeeded(subfolder);
         return subfolder;
     }
@@ -242,13 +226,14 @@ async function saveSpecResults(spec, settings) {
             // (https://github.com/w3c/webref)
             // Source: ${spec.title} (${spec.crawled})`;
         idlHeader = idlHeader.replace(/^\s+/gm, '').trim() + '\n\n';
-        let idl = spec.idl.idl
+        let idl = spec.idl
             .replace(/\s+$/gm, '\n')
             .replace(/\t/g, '  ')
             .trim();
         idl = idlHeader + idl + '\n';
         await fs.promises.writeFile(
             path.join(folders.idl, spec.shortname + '.idl'), idl);
+        return `idl/${spec.shortname}.idl`;
     };
     async function saveCss(spec) {
@@ -265,19 +250,15 @@ async function saveSpecResults(spec, settings) {
         }, 2) + '\n';
         const pathname = path.join(folders.css, spec.shortname + '.json')
         await fs.promises.writeFile(pathname, json);
-        spec.css = `css/${spec.shortname}.json`;
+        return `css/${spec.shortname}.json`;
     };
     // Save IDL dumps
-    if (spec.idl && spec.idl.idl) {
-        await saveIdl(spec);
-        delete spec.idl.idl;
-        spec.idlparsed = spec.idl;
-        spec.idl = `idl/${spec.shortname}.idl`;
-        await saveExtract(spec, 'idlparsed', spec => spec.idlparsed);
+    if (spec.idl) {
+        spec.idl = await saveIdl(spec);
     }
-    else if (spec.idl) {
-        delete spec.idl;
+    if (spec.idlparsed) {
+        spec.idlparsed = await saveIdlParsed(spec, settings.output);
     }
     // Save CSS dumps
@@ -288,7 +269,7 @@ async function saveSpecResults(spec, settings) {
             (Object.keys(spec.css.valuespaces || {}).length > 0));
     }
     if (defineCSSContent(spec)) {
-        await saveCss(spec);
+        spec.css = await saveCss(spec);
     }
     // Specs that define CSS now have a "css" key that point to the CSS extract.

package/src/lib/util.js CHANGED Viewed

@@ -83,6 +83,14 @@ const modulesFolder = getModulesFolder();
  */
 let browser = null;
+/**
+ * Promise resolved when there is no running instance of Puppeteer. This allows
+ * to serialize calls to setupBrowser (and thus to crawlList and crawlSpecs in
+ * specs-crawler.js)
+ */
+let browserClosed = Promise.resolve();
+let resolveBrowserClosed = null;
 /**
  * The browser JS library that will be loaded onto every crawled page
  */
@@ -228,9 +236,13 @@ window.reffy.${module.name} = ${module.name};
  * @public
  */
 async function setupBrowser(modules) {
-    // Create browser instance (one per specification. Switch "headless" to
-    // "false" (and commenting out the call to "browser.close()") is typically
-    // useful when something goes wrong to access dev tools and debug)
+    // There can be only one crawl running at a time
+    await browserClosed;
+    browserClosed = new Promise(resolve => resolveBrowserClosed = resolve);
+    // Create browser instance
+    // Note: switch "headless" to "false" (and comment out the call to
+    // "browser.close()") to access dev tools in debug mode
     browser = await puppeteer.launch({ headless: true });
     setupBrowserlib(modules);
 }
@@ -248,6 +260,8 @@ async function teardownBrowser() {
     if (browser) {
         await browser.close();
         browser = null;
+        resolveBrowserClosed();
+        resolveBrowserClosed = null;
     }
 }
@@ -306,8 +320,12 @@ async function teardownBrowser() {
  *   These arguments typically make it possible to pass contextual information
  *   to the processing function (such as the spec object that describes the
  *   spec being processed, or the list of processing modules to run)
- * @param {Object} options Processing options. The only supported option is
- *   "quiet", which tells the function not to report warnings to the console
+ * @param {Object} options Processing options. The "quiet" flag tells the
+ *   function not to report warnings to the console. The "forceLocalFetch"
+ *   flag tells the function that all network requests need to be only handled
+ *   by Node.js's "fetch" function (as opposed to falling back to Puppeteer's
+ *   network and caching logic), which is useful to keep full control of network
+ *   requests in tests.
  * @return {Promise} The promise to get the results of the processing function
  */
 async function processSpecification(spec, processFunction, args, options) {
@@ -416,15 +434,22 @@ async function processSpecification(spec, processFunction, args, options) {
                     return;
                 }
-                // Fetch from file cache failed somehow, report a warning
-                // and let Puppeteer handle the request as fallback
-                options.quiet ?? console.warn(`[warn] Fall back to regular network request for ${request.url}`, err);
-                try {
-                    await cdp.send('Fetch.continueRequest', { requestId });
+                // Fetch from file cache failed somehow
+                // Let Puppeteer handle the request as fallback unless
+                // calling function asked us not to do that
+                if (options.forceLocalFetch) {
+                    options.quiet ?? console.warn(`[warn] Network request for ${request.url} failed`, err);
+                    await cdp.send('Fetch.failRequest', { requestId, errorReason: 'Failed' });
                 }
-                catch (err) {
-                    if (!controller.signal.aborted) {
-                        options.quiet ?? console.warn(`[warn] Fall back to regular network request for ${request.url} failed`, err);
+                else {
+                    options.quiet ?? console.warn(`[warn] Fall back to regular network request for ${request.url}`, err);
+                    try {
+                        await cdp.send('Fetch.continueRequest', { requestId });
+                    }
+                    catch (err) {
+                        if (!controller.signal.aborted) {
+                            options.quiet ?? console.warn(`[warn] Fall back to regular network request for ${request.url} failed`, err);
+                        }
                     }
                 }
             }
@@ -434,6 +459,9 @@ async function processSpecification(spec, processFunction, args, options) {
     try {
         const page = await browser.newPage();
+        // Disable cache if caller wants to handle all network requests
+        await page.setCacheEnabled(!options.forceLocalFetch);
         // Intercept all network requests to use our own version of "fetch"
         // that makes use of the local file cache.
         const cdp = await page.target().createCDPSession();
@@ -462,17 +490,17 @@ async function processSpecification(spec, processFunction, args, options) {
         // network connections in the past 500ms. This should be enough to
         // handle "redirection" through JS or meta refresh (which would not
         // have time to run if we used "load").
-        const options = {
+        const loadOptions = {
             timeout: 120000,
             waitUntil: 'networkidle0'
         };
         // Load the page
         if (spec.html) {
-            await page.setContent(spec.html, options);
+            await page.setContent(spec.html, loadOptions);
         }
         else {
-            await page.goto(spec.url, options);
+            await page.goto(spec.url, loadOptions);
         }
         // Handle multi-page specs
@@ -483,11 +511,12 @@ async function processSpecification(spec, processFunction, args, options) {
             for (const url of pageUrls) {
                 const subAbort = new AbortController();
                 const subPage = await browser.newPage();
+                await subPage.setCacheEnabled(!options.forceLocalFetch);
                 const subCdp = await subPage.target().createCDPSession();
                 await subCdp.send('Fetch.enable');
                 subCdp.on('Fetch.requestPaused', interceptRequest(subCdp, subAbort));
                 try {
-                    await subPage.goto(url, options);
+                    await subPage.goto(url, loadOptions);
                     const html = await subPage.evaluate(() => {
                         return document.body.outerHTML
                             .replace(/<body/, '<section')
@@ -688,23 +717,6 @@ async function expandCrawlResult(crawl, baseFolder, properties) {
     baseFolder = baseFolder || '';
     async function expandSpec(spec) {
-        // Special case for "idl" that must be processed first
-        if (spec.idl && (typeof spec.idl === 'string') &&
-                (!properties || properties.includes('idl') || properties.includes('idlparsed'))) {
-            if (baseFolder.startsWith('https:')) {
-                const url = (new URL(spec.idl, baseFolder)).toString();
-                let response = await fetch(url, { nolog: true });
-                spec.idl = {
-                    idl: await response.text()
-                };
-            }
-            else {
-                spec.idl = {
-                    idl: await fs.readFile(path.join(baseFolder, spec.idl), 'utf8')
-                };
-            }
-        }
         await Promise.all(Object.keys(spec).map(async property => {
             // Only consider properties explicitly requested
             if (properties && !properties.includes(property)) {
@@ -738,14 +750,15 @@ async function expandCrawlResult(crawl, baseFolder, properties) {
                 delete css.spec;
                 spec[property] = css;
             }
-            else if (property === 'idlparsed') {
-                // Special case for parsed IDL extracts, as result needs to be
-                // attached to "idl"
-                if (!spec.idl) {
-                    spec.idl = {};
+            else if (property === 'idl') {
+                // Special case for raw IDL extracts, which are text extracts.
+                // Also drop header that may have been added when extract was
+                // serialized.
+                if (contents.startsWith('// GENERATED CONTENT - DO NOT EDIT')) {
+                    const endOfHeader = contents.indexOf('\n\n');
+                    contents = contents.substring(endOfHeader + 2);
                 }
-                Object.assign(spec.idl, contents[property]);
-                delete spec.idlparsed;
+                spec.idl = contents;
             }
             else {
                 spec[property] = contents[property];
@@ -810,6 +823,26 @@ function getGeneratedIDLNamesByCSSProperty(property) {
 };
+/**
+ * Creates the given folder if it does not exist yet.
+ *
+ * @function
+ * @public
+ * @param {String} folder Path to folder to create
+ *   (from current working directory)
+ */
+async function createFolderIfNeeded(folder) {
+    try {
+        await fs.mkdir(folder);
+    }
+    catch (err) {
+        if (err.code !== 'EEXIST') {
+            throw err;
+        }
+    }
+}
 module.exports = {
     fetch,
     requireFromWorkingDirectory,
@@ -820,5 +853,6 @@ module.exports = {
     completeWithAlternativeUrls,
     isLatestLevelThatPasses,
     expandCrawlResult,
-    getGeneratedIDLNamesByCSSProperty
+    getGeneratedIDLNamesByCSSProperty,
+    createFolderIfNeeded
 };