reffy 5.2.0 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -93,6 +93,10 @@ To create the WebIDL extract in the first place, you will need to run the `idl`
93
93
  reffy --spec fetch --module idl > fetch.idl
94
94
  ```
95
95
 
96
+ ### Parsed WebIDL generator
97
+
98
+ The **Parsed WebIDL generator** takes the results of a crawl as input and applies the WebIDL parser to all specs it contains to create JSON extracts in an `idlparsed` folder. To run the generator: `node src/cli/generate-idlparsed.js [crawl folder] [save folder]`
99
+
96
100
 
97
101
  ### WebIDL names generator
98
102
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "reffy",
3
- "version": "5.2.0",
3
+ "version": "6.0.0",
4
4
  "description": "W3C/WHATWG spec dependencies exploration companion. Features a short set of tools to study spec references as well as WebIDL term definitions and references found in W3C specifications.",
5
5
  "repository": {
6
6
  "type": "git",
@@ -32,20 +32,20 @@
32
32
  "bin": "./reffy.js",
33
33
  "dependencies": {
34
34
  "abortcontroller-polyfill": "1.7.3",
35
- "browser-specs": "2.16.0",
35
+ "browser-specs": "2.21.0",
36
36
  "commander": "8.3.0",
37
37
  "fetch-filecache-for-crawling": "4.0.2",
38
- "puppeteer": "11.0.0",
38
+ "puppeteer": "13.0.1",
39
39
  "semver": "^7.3.5",
40
40
  "webidl2": "24.2.0"
41
41
  },
42
42
  "devDependencies": {
43
43
  "chai": "4.3.4",
44
44
  "mocha": "9.1.3",
45
- "nock": "13.2.0",
46
- "respec": "28.0.0",
45
+ "nock": "13.2.1",
46
+ "respec": "28.1.0",
47
47
  "respec-hljs": "2.1.1",
48
- "rollup": "2.59.0"
48
+ "rollup": "2.63.0"
49
49
  },
50
50
  "scripts": {
51
51
  "test": "mocha --recursive tests/"
@@ -399,7 +399,7 @@ function checkSpecDefinitions(spec, options = {}) {
399
399
  (spec.css || {});
400
400
  const idl = (typeof spec.idlparsed === "string") ?
401
401
  require(path.resolve(options.rootFolder, spec.idlparsed)).idlparsed :
402
- spec.idl;
402
+ spec.idlparsed;
403
403
 
404
404
  // Make sure that all expected CSS definitions exist in the dfns extract
405
405
  const expectedCSSDfns = getExpectedDfnsFromCSS(css);
@@ -28,7 +28,8 @@ const { matchIdlDfn, getExpectedDfnFromIdlDesc } = require('./check-missing-dfns
28
28
  const {
29
29
  expandCrawlResult,
30
30
  isLatestLevelThatPasses,
31
- requireFromWorkingDirectory
31
+ requireFromWorkingDirectory,
32
+ createFolderIfNeeded
32
33
  } = require('../lib/util');
33
34
 
34
35
 
@@ -118,7 +119,7 @@ function generateIdlNames(results, options = {}) {
118
119
  const names = {};
119
120
 
120
121
  function defineIDLContent(spec) {
121
- return spec.idl && (spec.idl.idlNames || spec.idl.idlExtendedNames);
122
+ return spec.idlparsed?.idlNames || spec.idlparsed?.idlExtendedNames;
122
123
  }
123
124
 
124
125
  // Only keep latest version of specs and delta specs that define some IDL
@@ -129,10 +130,10 @@ function generateIdlNames(results, options = {}) {
129
130
  // Add main definitions of all IDL names
130
131
  // (using the latest version of a spec that defines some IDL)
131
132
  results.forEach(spec => {
132
- if (!spec.idl || !spec.idl.idlNames) {
133
+ if (!spec.idlparsed.idlNames) {
133
134
  return;
134
135
  }
135
- Object.entries(spec.idl.idlNames).forEach(([name, idl]) => {
136
+ Object.entries(spec.idlparsed.idlNames).forEach(([name, idl]) => {
136
137
  const desc = Object.assign(specInfo(spec), { fragment: idl.fragment });
137
138
  fragments[idl.fragment] = idl;
138
139
 
@@ -157,10 +158,10 @@ function generateIdlNames(results, options = {}) {
157
158
 
158
159
  // Add definitions that extend base definitions
159
160
  results.forEach(spec => {
160
- if (!spec.idl || !spec.idl.idlExtendedNames) {
161
+ if (!spec.idlparsed.idlExtendedNames) {
161
162
  return;
162
163
  }
163
- Object.entries(spec.idl.idlExtendedNames).forEach(([name, extensions]) =>
164
+ Object.entries(spec.idlparsed.idlExtendedNames).forEach(([name, extensions]) =>
164
165
  extensions.forEach(idl => {
165
166
  const desc = Object.assign(specInfo(spec), { fragment: idl.fragment });
166
167
  fragments[idl.fragment] = idl;
@@ -308,18 +309,6 @@ async function generateIdlNamesFromPath(crawlPath, options = {}) {
308
309
  }
309
310
 
310
311
 
311
- async function createFolderIfNeeded(name) {
312
- try {
313
- await fs.promises.mkdir(name);
314
- }
315
- catch (err) {
316
- if (err.code !== 'EEXIST') {
317
- throw err;
318
- }
319
- }
320
- }
321
-
322
-
323
312
  /**
324
313
  * Save IDL names to individual JSON files in the given folder
325
314
  *
@@ -0,0 +1,139 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * The parsed IDL generator takes a crawl report or a single spec as input, and
4
+ * generates (or re-generates if it already exists) a parsed IDL structure from
5
+ * the raw IDL that the spec defines. Result is dumped to the console or saved
6
+ * to the given folder.
7
+ *
8
+ * The parsed IDL generator is used by the crawler to create and save the parsed
9
+ * IDL structures. It is also useful to re-generated the parsed IDL info when
10
+ * an IDL patch has been applied to the raw IDL.
11
+ *
12
+ * The parsed IDL generator can be called directly through:
13
+ *
14
+ * `node generate-idlparsed.js [crawl report] [save folder]`
15
+ *
16
+ * where `crawl report` is the path to the folder that contains the
17
+ * `index.json` file and all other crawl results produced by specs-crawler.js,
18
+ * and `save folder` is an optional folder (which must exist) where IDL
19
+ * name extracts are to be saved. In the absence of this parameter, the report
20
+ * is written to the console.
21
+ *
22
+ * When a folder is provided, the IDL name extracts are saved as a JSON
23
+ * structure in an `idlparsed` subfolder.
24
+ */
25
+
26
+ const fs = require('fs');
27
+ const path = require('path');
28
+ const webidlParser = require('../cli/parse-webidl');
29
+ const {
30
+ expandCrawlResult,
31
+ requireFromWorkingDirectory,
32
+ createFolderIfNeeded
33
+ } = require('../lib/util');
34
+
35
+
36
+ /**
37
+ * Update the spec object in place with parsed IDL information.
38
+ *
39
+ * @function
40
+ * @public
41
+ * @param {Object} spec The spec object to update. The function looks for the
42
+ * raw IDL in the `idl` property.
43
+ * @return {Object} The updated spec with an `idl` property that contains the
44
+ * parsed version of the IDL, and the raw IDL moved under the `idl.idl`
45
+ * sub-property. Note the spec object is updated in place.
46
+ */
47
+ async function generateIdlParsed(spec) {
48
+ if (!spec?.idl) {
49
+ return spec;
50
+ }
51
+ try {
52
+ spec.idlparsed = await webidlParser.parse(spec.idl);
53
+ spec.idlparsed.hasObsoleteIdl = webidlParser.hasObsoleteIdl(spec.idl);
54
+ }
55
+ catch (err) {
56
+ // IDL content is invalid and cannot be parsed.
57
+ // Let's return the error, along with the raw IDL
58
+ // content so that it may be saved to a file.
59
+ spec.idlparsed = err;
60
+ }
61
+ return spec;
62
+ }
63
+
64
+
65
+ async function generateIdlParsedFromPath(crawlPath) {
66
+ const crawlIndex = requireFromWorkingDirectory(path.resolve(crawlPath, 'index.json'));
67
+ const crawlResults = await expandCrawlResult(crawlIndex, crawlPath, ['idl']);
68
+ await Promise.all(crawlResults.results.map(generateIdlParsed));
69
+ return crawlResults;
70
+ }
71
+
72
+
73
+ /**
74
+ * Generate the `idlparsed` export for the spec.
75
+ *
76
+ * Note that the raw IDL (under `spec.idl.idl`) gets deleted in the process.
77
+ *
78
+ * @function
79
+ * @public
80
+ * @param {Object} spec Spec object with the parsed IDL
81
+ * @param {String} folder Path to root folder where `idlparsed` folder needs to
82
+ * appear.
83
+ * @return {String} The relative path from the root folder to the generated file
84
+ */
85
+ async function saveIdlParsed(spec, folder) {
86
+ function specInfo(spec) {
87
+ return {
88
+ spec: {
89
+ title: spec.title,
90
+ url: spec.crawled
91
+ }
92
+ };
93
+ }
94
+
95
+ const subfolder = path.join(folder, 'idlparsed');
96
+ await createFolderIfNeeded(subfolder);
97
+
98
+ if (!spec?.idlparsed) {
99
+ return;
100
+ }
101
+
102
+ const json = JSON.stringify(
103
+ Object.assign(specInfo(spec), { idlparsed: spec.idlparsed }),
104
+ null, 2);
105
+ const filename = path.join(subfolder, spec.shortname + '.json');
106
+ await fs.promises.writeFile(filename, json);
107
+ return `idlparsed/${spec.shortname}.json`;
108
+ }
109
+
110
+
111
+ /**************************************************
112
+ Export methods for use as module
113
+ **************************************************/
114
+ module.exports.generateIdlParsed = generateIdlParsed;
115
+ module.exports.saveIdlParsed = saveIdlParsed;
116
+
117
+
118
+ /**************************************************
119
+ Code run if the code is run as a stand-alone module
120
+ **************************************************/
121
+ if (require.main === module) {
122
+ const crawlPath = process.argv[2];
123
+ if (!crawlPath) {
124
+ console.error('Required path to crawl results folder is missing');
125
+ process.exit(2);
126
+ }
127
+
128
+ const savePath = process.argv[3];
129
+ generateIdlParsedFromPath(crawlPath)
130
+ .then(report => {
131
+ if (savePath) {
132
+ return Promise.all(report.results.map(
133
+ spec => saveIdlParsed(spec, savePath)));
134
+ }
135
+ else {
136
+ console.log(JSON.stringify(report, null, 2));
137
+ }
138
+ });
139
+ }
@@ -9,7 +9,6 @@ const nock = require("nock");
9
9
  const path = require("path");
10
10
  const { existsSync } = require('fs');
11
11
 
12
-
13
12
  /**
14
13
  * Determine the path to the "node_modules" folder. The path depends on whether
15
14
  * Reffy is run directly, or installed as a library.
@@ -30,14 +29,29 @@ const modulesFolder = getModulesFolder();
30
29
 
31
30
  const mockSpecs = {
32
31
  "/woff/woff2/": {
33
- html: `<title>WOFF2</title><body><dfn id='foo'>Foo</dfn><a href="https://www.w3.org/TR/bar/#baz">bar</a><ul class='toc'><li><a href='page.html'>page</a></ul>`,
32
+ html: `
33
+ <title>WOFF2</title>
34
+ <body>
35
+ <dfn id='foo'>Foo</dfn>
36
+ <a href="https://www.w3.org/TR/bar/#baz">bar</a>
37
+ <ul class='toc'><li><a href='page.html'>page</a></ul>`,
34
38
  pages: {
35
39
  "page.html": `<h2 id='bar'>Heading in subpage</h2>`
36
40
  }
37
41
  },
38
- "/mediacapture-output/": `<script>respecConfig = { shortName: 'test' };</script><script src='https://www.w3.org/Tools/respec/respec-w3c'></script><div id=abstract></div><pre class='idl'>[Exposed=Window] interface Foo { attribute DOMString bar; };</pre>`,
39
- "/accelerometer/": `<html><h2>Normative references</h2><dl><dt>FOO</dt><dd><a href='https://www.w3.org/TR/Foo'>Foo</a></dd></dl>`,
40
- "/pointerlock/": `<html><h1>Pointer Lock 2.0`
42
+ "/mediacapture-output/": `
43
+ <script>respecConfig = { shortName: 'test' };</script>
44
+ <script src='https://www.w3.org/Tools/respec/respec-w3c'></script>
45
+ <div id=abstract></div>
46
+ <pre class='idl'>[Exposed=Window] interface Foo { attribute DOMString bar; };</pre>`,
47
+ "/accelerometer/": `<html>
48
+ <h2>Normative references</h2>
49
+ <dl>
50
+ <dt>FOO</dt>
51
+ <dd><a href='https://www.w3.org/TR/Foo'>Foo</a></dd>
52
+ </dl>`,
53
+ "/pointerlock/": `<html>
54
+ <h1>Pointer Lock 2.0`
41
55
  };
42
56
 
43
57
  nock.disableNetConnect();
@@ -48,47 +62,50 @@ Object.keys(mockSpecs).forEach(path => {
48
62
  nock("https://w3c.github.io")
49
63
  .persist()
50
64
  .get(path)
51
- .reply(200, typeof mockSpecs[path] === "string" ? mockSpecs[path] : mockSpecs[path].html, {'Content-Type': 'text/html'});
65
+ .reply(200,
66
+ typeof mockSpecs[path] === "string" ? mockSpecs[path] : mockSpecs[path].html,
67
+ { 'Content-Type': 'text/html' }
68
+ );
52
69
 
53
70
  Object.keys(mockSpecs[path].pages || {}).forEach(page => {
54
71
  nock("https://w3c.github.io")
55
72
  .persist()
56
73
  .get(path + page)
57
- .reply(200, mockSpecs[path].pages[page], {'Content-Type': 'text/html'});
74
+ .reply(200,
75
+ mockSpecs[path].pages[page],
76
+ { 'Content-Type': 'text/html' });
58
77
 
59
78
  });
60
79
  });
61
80
 
62
81
 
63
- // Handling requests generated by ReSpec document
64
- nock("https://respec.org")
65
- .persist()
66
- .options("/xref/").reply(204, '', {"Access-Control-Allow-Methods": "POST,GET",
67
- "Access-Control-Allow-Origin": "*"}).
68
- post("/xref/").reply(200, {"result":[["cc15613180c92a877452c092012792b9572ad189",[{"shortname":"webidl","spec":"webidl","type":"extended-attribute","normative":true,"uri":"#Exposed"}]],["a28dcf4738f5492eb05f1fd8a27b8ce0ae124d21",[{"shortname":"webidl","spec":"webidl","type":"interface","normative":true,"uri":"#idl-DOMString"}]],["2eb09984ad7f314b43fefeb75a6feedb049ad595",[]]]});
69
-
82
+ // Handling requests generated by ReSpec documents
70
83
  nock("https://api.specref.org")
71
84
  .persist()
72
- .get("/bibrefs?refs=webidl,html").reply(200, {webidl:{href:"https://webidl.spec.whatwg.org/"}}, {"Access-Control-Allow-Origin": "*"})
73
- .get("/bibrefs?refs=HTML").reply(200, {HTML:{href:"https://html.spec.whatwg.org/multipage/"}}, {"Access-Control-Allow-Origin": "*"});
85
+ .get("/bibrefs?refs=webidl,html").reply(200,
86
+ { webidl: { href: "https://webidl.spec.whatwg.org/" } },
87
+ { "Access-Control-Allow-Origin": "*" }
88
+ );
74
89
 
75
90
  nock("https://www.w3.org")
76
91
  .persist()
77
92
  .get("/scripts/TR/2021/fixup.js").reply(200, '')
78
93
  .get("/StyleSheets/TR/2021/logos/W3C").reply(200, '')
79
94
  .get("/StyleSheets/TR/2021/base.css").reply(200, '')
80
- .get("/Tools/respec/respec-highlight").replyWithFile(200, path.join(modulesFolder, "respec-hljs", "dist", "respec-highlight.js"), {"Content-Type": "application/js"})
81
- .get("/Tools/respec/respec-w3c").replyWithFile(200, path.join(modulesFolder, "respec", "builds", "respec-w3c.js"), {"Content-Type": "application/js"});
82
-
95
+ .get("/Tools/respec/respec-highlight").replyWithFile(200,
96
+ path.join(modulesFolder, "respec-hljs", "dist", "respec-highlight.js"),
97
+ { "Content-Type": "application/js" })
98
+ .get("/Tools/respec/respec-w3c").replyWithFile(200,
99
+ path.join(modulesFolder, "respec", "builds", "respec-w3c.js"),
100
+ { "Content-Type": "application/js" });
83
101
 
84
- nock.emitter.on('error', function(err) {
85
- throw(err);
102
+ nock.emitter.on('error', function (err) {
103
+ console.error(err);
86
104
  });
87
105
  nock.emitter.on('no match', function(req, options, requestBody) {
88
106
  // 127.0.0.1 is used by the devtool protocol, we ignore it
89
107
  if (req && req.hostname !== '127.0.0.1') {
90
- const error = new Error("No match for nock request on " + (options ? options.href : req.href));
91
- throw(error);
108
+ console.error("No match for nock request on " + (options ? options.href : req.href));
92
109
  }
93
110
  });
94
111
 
@@ -13,8 +13,8 @@
13
13
  const fs = require('fs');
14
14
  const path = require('path');
15
15
  const specs = require('browser-specs');
16
- const webidlParser = require('../cli/parse-webidl');
17
16
  const cssDfnParser = require('./css-grammar-parser');
17
+ const { generateIdlParsed, saveIdlParsed } = require('../cli/generate-idlparsed');
18
18
  const { generateIdlNames, saveIdlNames } = require('../cli/generate-idlnames');
19
19
  const {
20
20
  completeWithAlternativeUrls,
@@ -24,7 +24,8 @@ const {
24
24
  isLatestLevelThatPasses,
25
25
  processSpecification,
26
26
  setupBrowser,
27
- teardownBrowser
27
+ teardownBrowser,
28
+ createFolderIfNeeded
28
29
  } = require('./util');
29
30
 
30
31
 
@@ -62,26 +63,13 @@ async function crawlSpec(spec, crawlOptions) {
62
63
  return res;
63
64
  },
64
65
  [spec, crawlOptions.modules],
65
- { quiet: crawlOptions.quiet }
66
+ { quiet: crawlOptions.quiet,
67
+ forceLocalFetch: crawlOptions.forceLocalFetch }
66
68
  );
67
69
 
68
70
  // Specific rule for IDL extracts:
69
71
  // parse the extracted WebIdl content
70
- if (result.idl !== undefined) {
71
- try {
72
- const parsedIdl = await webidlParser.parse(result.idl);
73
- parsedIdl.hasObsoleteIdl = webidlParser.hasObsoleteIdl(result.idl);
74
- parsedIdl.idl = result.idl;
75
- result.idl = parsedIdl;
76
- }
77
- catch (err) {
78
- // IDL content is invalid and cannot be parsed.
79
- // Let's return the error, along with the raw IDL
80
- // content so that it may be saved to a file.
81
- err.idl = result.idl;
82
- result.idl = err;
83
- }
84
- }
72
+ await generateIdlParsed(result);
85
73
 
86
74
  if (result.css) {
87
75
  // Specific rule for CSS properties:
@@ -151,6 +139,9 @@ async function crawlSpec(spec, crawlOptions) {
151
139
  crawlOptions.modules.forEach(mod => {
152
140
  if (result[mod.property]) {
153
141
  spec[mod.property] = result[mod.property];
142
+ if (mod.property === 'idl') {
143
+ spec.idlparsed = result.idlparsed;
144
+ }
154
145
  }
155
146
  });
156
147
  }
@@ -186,14 +177,7 @@ async function saveSpecResults(spec, settings) {
186
177
 
187
178
  async function getSubfolder(name) {
188
179
  let subfolder = path.join(settings.output, name);
189
- try {
190
- await fs.promises.mkdir(subfolder);
191
- }
192
- catch (err) {
193
- if (err.code !== 'EEXIST') {
194
- throw err;
195
- }
196
- }
180
+ await createFolderIfNeeded(subfolder);
197
181
  return subfolder;
198
182
  }
199
183
 
@@ -242,13 +226,14 @@ async function saveSpecResults(spec, settings) {
242
226
  // (https://github.com/w3c/webref)
243
227
  // Source: ${spec.title} (${spec.crawled})`;
244
228
  idlHeader = idlHeader.replace(/^\s+/gm, '').trim() + '\n\n';
245
- let idl = spec.idl.idl
229
+ let idl = spec.idl
246
230
  .replace(/\s+$/gm, '\n')
247
231
  .replace(/\t/g, ' ')
248
232
  .trim();
249
233
  idl = idlHeader + idl + '\n';
250
234
  await fs.promises.writeFile(
251
235
  path.join(folders.idl, spec.shortname + '.idl'), idl);
236
+ return `idl/${spec.shortname}.idl`;
252
237
  };
253
238
 
254
239
  async function saveCss(spec) {
@@ -265,19 +250,15 @@ async function saveSpecResults(spec, settings) {
265
250
  }, 2) + '\n';
266
251
  const pathname = path.join(folders.css, spec.shortname + '.json')
267
252
  await fs.promises.writeFile(pathname, json);
268
- spec.css = `css/${spec.shortname}.json`;
253
+ return `css/${spec.shortname}.json`;
269
254
  };
270
255
 
271
256
  // Save IDL dumps
272
- if (spec.idl && spec.idl.idl) {
273
- await saveIdl(spec);
274
- delete spec.idl.idl;
275
- spec.idlparsed = spec.idl;
276
- spec.idl = `idl/${spec.shortname}.idl`;
277
- await saveExtract(spec, 'idlparsed', spec => spec.idlparsed);
257
+ if (spec.idl) {
258
+ spec.idl = await saveIdl(spec);
278
259
  }
279
- else if (spec.idl) {
280
- delete spec.idl;
260
+ if (spec.idlparsed) {
261
+ spec.idlparsed = await saveIdlParsed(spec, settings.output);
281
262
  }
282
263
 
283
264
  // Save CSS dumps
@@ -288,7 +269,7 @@ async function saveSpecResults(spec, settings) {
288
269
  (Object.keys(spec.css.valuespaces || {}).length > 0));
289
270
  }
290
271
  if (defineCSSContent(spec)) {
291
- await saveCss(spec);
272
+ spec.css = await saveCss(spec);
292
273
  }
293
274
 
294
275
  // Specs that define CSS now have a "css" key that point to the CSS extract.
package/src/lib/util.js CHANGED
@@ -83,6 +83,14 @@ const modulesFolder = getModulesFolder();
83
83
  */
84
84
  let browser = null;
85
85
 
86
+ /**
87
+ * Promise resolved when there is no running instance of Puppeteer. This allows
88
+ * to serialize calls to setupBrowser (and thus to crawlList and crawlSpecs in
89
+ * specs-crawler.js)
90
+ */
91
+ let browserClosed = Promise.resolve();
92
+ let resolveBrowserClosed = null;
93
+
86
94
  /**
87
95
  * The browser JS library that will be loaded onto every crawled page
88
96
  */
@@ -228,9 +236,13 @@ window.reffy.${module.name} = ${module.name};
228
236
  * @public
229
237
  */
230
238
  async function setupBrowser(modules) {
231
- // Create browser instance (one per specification. Switch "headless" to
232
- // "false" (and commenting out the call to "browser.close()") is typically
233
- // useful when something goes wrong to access dev tools and debug)
239
+ // There can be only one crawl running at a time
240
+ await browserClosed;
241
+ browserClosed = new Promise(resolve => resolveBrowserClosed = resolve);
242
+
243
+ // Create browser instance
244
+ // Note: switch "headless" to "false" (and comment out the call to
245
+ // "browser.close()") to access dev tools in debug mode
234
246
  browser = await puppeteer.launch({ headless: true });
235
247
  setupBrowserlib(modules);
236
248
  }
@@ -248,6 +260,8 @@ async function teardownBrowser() {
248
260
  if (browser) {
249
261
  await browser.close();
250
262
  browser = null;
263
+ resolveBrowserClosed();
264
+ resolveBrowserClosed = null;
251
265
  }
252
266
  }
253
267
 
@@ -306,8 +320,12 @@ async function teardownBrowser() {
306
320
  * These arguments typically make it possible to pass contextual information
307
321
  * to the processing function (such as the spec object that describes the
308
322
  * spec being processed, or the list of processing modules to run)
309
- * @param {Object} options Processing options. The only supported option is
310
- * "quiet", which tells the function not to report warnings to the console
323
+ * @param {Object} options Processing options. The "quiet" flag tells the
324
+ * function not to report warnings to the console. The "forceLocalFetch"
325
+ * flag tells the function that all network requests need to be only handled
326
+ * by Node.js's "fetch" function (as opposed to falling back to Puppeteer's
327
+ * network and caching logic), which is useful to keep full control of network
328
+ * requests in tests.
311
329
  * @return {Promise} The promise to get the results of the processing function
312
330
  */
313
331
  async function processSpecification(spec, processFunction, args, options) {
@@ -416,15 +434,22 @@ async function processSpecification(spec, processFunction, args, options) {
416
434
  return;
417
435
  }
418
436
 
419
- // Fetch from file cache failed somehow, report a warning
420
- // and let Puppeteer handle the request as fallback
421
- options.quiet ?? console.warn(`[warn] Fall back to regular network request for ${request.url}`, err);
422
- try {
423
- await cdp.send('Fetch.continueRequest', { requestId });
437
+ // Fetch from file cache failed somehow
438
+ // Let Puppeteer handle the request as fallback unless
439
+ // calling function asked us not to do that
440
+ if (options.forceLocalFetch) {
441
+ options.quiet ?? console.warn(`[warn] Network request for ${request.url} failed`, err);
442
+ await cdp.send('Fetch.failRequest', { requestId, errorReason: 'Failed' });
424
443
  }
425
- catch (err) {
426
- if (!controller.signal.aborted) {
427
- options.quiet ?? console.warn(`[warn] Fall back to regular network request for ${request.url} failed`, err);
444
+ else {
445
+ options.quiet ?? console.warn(`[warn] Fall back to regular network request for ${request.url}`, err);
446
+ try {
447
+ await cdp.send('Fetch.continueRequest', { requestId });
448
+ }
449
+ catch (err) {
450
+ if (!controller.signal.aborted) {
451
+ options.quiet ?? console.warn(`[warn] Fall back to regular network request for ${request.url} failed`, err);
452
+ }
428
453
  }
429
454
  }
430
455
  }
@@ -434,6 +459,9 @@ async function processSpecification(spec, processFunction, args, options) {
434
459
  try {
435
460
  const page = await browser.newPage();
436
461
 
462
+ // Disable cache if caller wants to handle all network requests
463
+ await page.setCacheEnabled(!options.forceLocalFetch);
464
+
437
465
  // Intercept all network requests to use our own version of "fetch"
438
466
  // that makes use of the local file cache.
439
467
  const cdp = await page.target().createCDPSession();
@@ -462,17 +490,17 @@ async function processSpecification(spec, processFunction, args, options) {
462
490
  // network connections in the past 500ms. This should be enough to
463
491
  // handle "redirection" through JS or meta refresh (which would not
464
492
  // have time to run if we used "load").
465
- const options = {
493
+ const loadOptions = {
466
494
  timeout: 120000,
467
495
  waitUntil: 'networkidle0'
468
496
  };
469
497
 
470
498
  // Load the page
471
499
  if (spec.html) {
472
- await page.setContent(spec.html, options);
500
+ await page.setContent(spec.html, loadOptions);
473
501
  }
474
502
  else {
475
- await page.goto(spec.url, options);
503
+ await page.goto(spec.url, loadOptions);
476
504
  }
477
505
 
478
506
  // Handle multi-page specs
@@ -483,11 +511,12 @@ async function processSpecification(spec, processFunction, args, options) {
483
511
  for (const url of pageUrls) {
484
512
  const subAbort = new AbortController();
485
513
  const subPage = await browser.newPage();
514
+ await subPage.setCacheEnabled(!options.forceLocalFetch);
486
515
  const subCdp = await subPage.target().createCDPSession();
487
516
  await subCdp.send('Fetch.enable');
488
517
  subCdp.on('Fetch.requestPaused', interceptRequest(subCdp, subAbort));
489
518
  try {
490
- await subPage.goto(url, options);
519
+ await subPage.goto(url, loadOptions);
491
520
  const html = await subPage.evaluate(() => {
492
521
  return document.body.outerHTML
493
522
  .replace(/<body/, '<section')
@@ -688,23 +717,6 @@ async function expandCrawlResult(crawl, baseFolder, properties) {
688
717
  baseFolder = baseFolder || '';
689
718
 
690
719
  async function expandSpec(spec) {
691
- // Special case for "idl" that must be processed first
692
- if (spec.idl && (typeof spec.idl === 'string') &&
693
- (!properties || properties.includes('idl') || properties.includes('idlparsed'))) {
694
- if (baseFolder.startsWith('https:')) {
695
- const url = (new URL(spec.idl, baseFolder)).toString();
696
- let response = await fetch(url, { nolog: true });
697
- spec.idl = {
698
- idl: await response.text()
699
- };
700
- }
701
- else {
702
- spec.idl = {
703
- idl: await fs.readFile(path.join(baseFolder, spec.idl), 'utf8')
704
- };
705
- }
706
- }
707
-
708
720
  await Promise.all(Object.keys(spec).map(async property => {
709
721
  // Only consider properties explicitly requested
710
722
  if (properties && !properties.includes(property)) {
@@ -738,14 +750,15 @@ async function expandCrawlResult(crawl, baseFolder, properties) {
738
750
  delete css.spec;
739
751
  spec[property] = css;
740
752
  }
741
- else if (property === 'idlparsed') {
742
- // Special case for parsed IDL extracts, as result needs to be
743
- // attached to "idl"
744
- if (!spec.idl) {
745
- spec.idl = {};
753
+ else if (property === 'idl') {
754
+ // Special case for raw IDL extracts, which are text extracts.
755
+ // Also drop header that may have been added when extract was
756
+ // serialized.
757
+ if (contents.startsWith('// GENERATED CONTENT - DO NOT EDIT')) {
758
+ const endOfHeader = contents.indexOf('\n\n');
759
+ contents = contents.substring(endOfHeader + 2);
746
760
  }
747
- Object.assign(spec.idl, contents[property]);
748
- delete spec.idlparsed;
761
+ spec.idl = contents;
749
762
  }
750
763
  else {
751
764
  spec[property] = contents[property];
@@ -810,6 +823,26 @@ function getGeneratedIDLNamesByCSSProperty(property) {
810
823
  };
811
824
 
812
825
 
826
+ /**
827
+ * Creates the given folder if it does not exist yet.
828
+ *
829
+ * @function
830
+ * @public
831
+ * @param {String} folder Path to folder to create
832
+ * (from current working directory)
833
+ */
834
+ async function createFolderIfNeeded(folder) {
835
+ try {
836
+ await fs.mkdir(folder);
837
+ }
838
+ catch (err) {
839
+ if (err.code !== 'EEXIST') {
840
+ throw err;
841
+ }
842
+ }
843
+ }
844
+
845
+
813
846
  module.exports = {
814
847
  fetch,
815
848
  requireFromWorkingDirectory,
@@ -820,5 +853,6 @@ module.exports = {
820
853
  completeWithAlternativeUrls,
821
854
  isLatestLevelThatPasses,
822
855
  expandCrawlResult,
823
- getGeneratedIDLNamesByCSSProperty
856
+ getGeneratedIDLNamesByCSSProperty,
857
+ createFolderIfNeeded
824
858
  };