reffy 6.2.0 → 6.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +158 -158
- package/index.js +11 -11
- package/package.json +53 -53
- package/reffy.js +248 -248
- package/src/browserlib/canonicalize-url.mjs +50 -50
- package/src/browserlib/create-outline.mjs +352 -352
- package/src/browserlib/extract-cssdfn.mjs +319 -319
- package/src/browserlib/extract-dfns.mjs +686 -686
- package/src/browserlib/extract-elements.mjs +205 -205
- package/src/browserlib/extract-headings.mjs +48 -48
- package/src/browserlib/extract-ids.mjs +28 -28
- package/src/browserlib/extract-links.mjs +28 -28
- package/src/browserlib/extract-references.mjs +203 -203
- package/src/browserlib/extract-webidl.mjs +134 -134
- package/src/browserlib/get-absolute-url.mjs +21 -21
- package/src/browserlib/get-generator.mjs +26 -26
- package/src/browserlib/get-lastmodified-date.mjs +13 -13
- package/src/browserlib/get-title.mjs +11 -11
- package/src/browserlib/informative-selector.mjs +16 -16
- package/src/browserlib/map-ids-to-headings.mjs +136 -136
- package/src/browserlib/reffy.json +53 -53
- package/src/cli/check-missing-dfns.js +609 -609
- package/src/cli/generate-idlnames.js +430 -430
- package/src/cli/generate-idlparsed.js +139 -139
- package/src/cli/merge-crawl-results.js +128 -128
- package/src/cli/parse-webidl.js +430 -430
- package/src/lib/css-grammar-parse-tree.schema.json +109 -109
- package/src/lib/css-grammar-parser.js +440 -440
- package/src/lib/fetch.js +55 -55
- package/src/lib/nock-server.js +119 -119
- package/src/lib/specs-crawler.js +605 -603
- package/src/lib/util.js +898 -898
- package/src/specs/missing-css-rules.json +197 -197
- package/src/specs/spec-equivalents.json +149 -149
- package/src/browserlib/extract-editors.mjs~ +0 -14
- package/src/browserlib/generate-es-dfn-report.sh~ +0 -4
- package/src/cli/csstree-grammar-check.js +0 -28
- package/src/cli/csstree-grammar-check.js~ +0 -10
- package/src/cli/csstree-grammar-parser.js +0 -11
- package/src/cli/csstree-grammar-parser.js~ +0 -1
- package/src/cli/extract-editors.js~ +0 -38
- package/src/cli/process-specs.js~ +0 -28
package/src/lib/util.js
CHANGED
|
@@ -1,898 +1,898 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* A bunch of utility functions common to multiple scripts
|
|
3
|
-
*/
|
|
4
|
-
|
|
5
|
-
const fs = require('fs').promises;
|
|
6
|
-
const { existsSync } = require('fs');
|
|
7
|
-
const path = require('path');
|
|
8
|
-
const puppeteer = require('puppeteer');
|
|
9
|
-
const crypto = require('crypto');
|
|
10
|
-
const { AbortController } = require('abortcontroller-polyfill/dist/cjs-ponyfill');
|
|
11
|
-
const fetch = require('./fetch');
|
|
12
|
-
const specEquivalents = require('../specs/spec-equivalents.json');
|
|
13
|
-
|
|
14
|
-
const reffyModules = require('../browserlib/reffy.json');
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
/**
|
|
18
|
-
* Maximum depth difference supported between Reffy's install path and custom
|
|
19
|
-
* modules that may be provided on the command-line
|
|
20
|
-
*
|
|
21
|
-
* TODO: Find a way to get right of that, there should be no limit
|
|
22
|
-
*/
|
|
23
|
-
const maxPathDepth = 20;
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
/**
|
|
27
|
-
* Returns a range array from 0 to the number provided (not included)
|
|
28
|
-
*/
|
|
29
|
-
const range = n => Array.from(Array(n).keys());
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
/**
|
|
33
|
-
* Shortcut that returns a property extractor iterator
|
|
34
|
-
*/
|
|
35
|
-
const prop = p => x => x[p];
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
/**
|
|
39
|
-
* Wrapper around the "require" function to require files relative to the
|
|
40
|
-
* current working directory (CWD), instead of relative to the current JS
|
|
41
|
-
* file.
|
|
42
|
-
*
|
|
43
|
-
* This is typically needed to be able to use "require" to load JSON config
|
|
44
|
-
* files provided as command-line arguments.
|
|
45
|
-
*
|
|
46
|
-
* @function
|
|
47
|
-
* @param {String} filename The path to the file to require
|
|
48
|
-
* @return {Object} The result of requiring the file relative to the current
|
|
49
|
-
* working directory.
|
|
50
|
-
*/
|
|
51
|
-
function requireFromWorkingDirectory(filename) {
|
|
52
|
-
try {
|
|
53
|
-
return require(path.resolve(filename));
|
|
54
|
-
}
|
|
55
|
-
catch (err) {
|
|
56
|
-
return null;
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
/**
|
|
62
|
-
* Determine the path to the "node_modules" folder to resolve relative links
|
|
63
|
-
* in the ES6 browser lib modules. The path depends on whether Reffy is run
|
|
64
|
-
* directly, or installed as a library.
|
|
65
|
-
*
|
|
66
|
-
* @function
|
|
67
|
-
* @return {String} Path to the node_modules folder.
|
|
68
|
-
*/
|
|
69
|
-
function getModulesFolder() {
|
|
70
|
-
const rootFolder = path.resolve(__dirname, '../..');
|
|
71
|
-
let folder = path.resolve(rootFolder, 'node_modules');
|
|
72
|
-
if (existsSync(folder)) {
|
|
73
|
-
return folder;
|
|
74
|
-
}
|
|
75
|
-
folder = path.resolve(rootFolder, '..');
|
|
76
|
-
return folder;
|
|
77
|
-
}
|
|
78
|
-
const modulesFolder = getModulesFolder();
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
/**
|
|
82
|
-
* Puppeteer browser instance used to load and process specifications
|
|
83
|
-
*/
|
|
84
|
-
let browser = null;
|
|
85
|
-
|
|
86
|
-
/**
|
|
87
|
-
* Promise resolved when there is no running instance of Puppeteer. This allows
|
|
88
|
-
* to serialize calls to setupBrowser (and thus to crawlList and crawlSpecs in
|
|
89
|
-
* specs-crawler.js)
|
|
90
|
-
*/
|
|
91
|
-
let browserClosed = Promise.resolve();
|
|
92
|
-
let resolveBrowserClosed = null;
|
|
93
|
-
|
|
94
|
-
/**
|
|
95
|
-
* The browser JS library that will be loaded onto every crawled page
|
|
96
|
-
*/
|
|
97
|
-
let browserlib = null;
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
/**
|
|
101
|
-
* Expand list of browser modules with right set of descriptive properties
|
|
102
|
-
*
|
|
103
|
-
* User may specify a browser module as:
|
|
104
|
-
* - a name which must match one of the existing modules in browserlib
|
|
105
|
-
* - a relative path to an .mjs file which must exist
|
|
106
|
-
* - an object with an "href" property that is a relative path to an .mjs file
|
|
107
|
-
* which must exist
|
|
108
|
-
*
|
|
109
|
-
* Relative paths provided by the user are interpreted as relative to the
|
|
110
|
-
* current working directory, and converted to be relative to the browserlib
|
|
111
|
-
* directory.
|
|
112
|
-
*
|
|
113
|
-
* @function
|
|
114
|
-
* @public
|
|
115
|
-
* @return {Array(Object)} List of modules with an href, name and property keys
|
|
116
|
-
*/
|
|
117
|
-
function expandBrowserModules(modules) {
|
|
118
|
-
// Helper function to create a camelCase name out of a module path
|
|
119
|
-
function getCamelCaseName(href) {
|
|
120
|
-
const filename = href.replace(/([^\/\\]+)\.mjs$/, '$1');
|
|
121
|
-
const nameParts = filename.split('-');
|
|
122
|
-
let name;
|
|
123
|
-
let namePart;
|
|
124
|
-
while (namePart = nameParts.shift()) {
|
|
125
|
-
namePart = namePart.replace(/\W/g, '');
|
|
126
|
-
if (name) {
|
|
127
|
-
name += namePart.substring(0, 1).toUpperCase() + namePart.substring(1);
|
|
128
|
-
}
|
|
129
|
-
else {
|
|
130
|
-
name = namePart;
|
|
131
|
-
}
|
|
132
|
-
}
|
|
133
|
-
return name;
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
const browserlibPath = path.resolve(__dirname, '..', 'browserlib');
|
|
137
|
-
if (!modules) {
|
|
138
|
-
return reffyModules.map(mod => Object.assign({
|
|
139
|
-
name: getCamelCaseName(mod.href),
|
|
140
|
-
expanded: true
|
|
141
|
-
}, mod));
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
modules = modules.map(mod => {
|
|
145
|
-
if (typeof mod === 'string') {
|
|
146
|
-
if (mod.endsWith('.mjs')) {
|
|
147
|
-
const name = getCamelCaseName(mod);
|
|
148
|
-
return {
|
|
149
|
-
href: path.relative(browserlibPath, path.join(process.cwd(), mod)).replace(/\\/g, '/'),
|
|
150
|
-
name,
|
|
151
|
-
property: name,
|
|
152
|
-
expanded: true
|
|
153
|
-
};
|
|
154
|
-
}
|
|
155
|
-
else if (mod === 'core') {
|
|
156
|
-
return reffyModules.map(mod => Object.assign({
|
|
157
|
-
name: getCamelCaseName(mod.href),
|
|
158
|
-
expanded: true
|
|
159
|
-
}, mod));
|
|
160
|
-
}
|
|
161
|
-
else {
|
|
162
|
-
const res = reffyModules.find(m => m.href === mod ||
|
|
163
|
-
getCamelCaseName(m.href) === mod || m.property === mod);
|
|
164
|
-
if (!res) {
|
|
165
|
-
throw new Error(`Unknown browserlib module ${mod}`);
|
|
166
|
-
}
|
|
167
|
-
return Object.assign({
|
|
168
|
-
name: getCamelCaseName(res.href),
|
|
169
|
-
expanded: true
|
|
170
|
-
}, res);
|
|
171
|
-
}
|
|
172
|
-
}
|
|
173
|
-
else if (mod.expanded) {
|
|
174
|
-
return mod;
|
|
175
|
-
}
|
|
176
|
-
else {
|
|
177
|
-
if (!mod.href) {
|
|
178
|
-
throw new Error('Browserlib module does not have an "href" property');
|
|
179
|
-
}
|
|
180
|
-
mod.href = path.relative(browserlibPath, path.join(process.cwd(), mod.href)).replace(/\\/g, '/');
|
|
181
|
-
if (!mod.name) {
|
|
182
|
-
mod.name = getCamelCaseName(mod.href);
|
|
183
|
-
}
|
|
184
|
-
if (!mod.property) {
|
|
185
|
-
mod.property = mod.name;
|
|
186
|
-
}
|
|
187
|
-
mod.expanded = true;
|
|
188
|
-
return mod;
|
|
189
|
-
}
|
|
190
|
-
});
|
|
191
|
-
|
|
192
|
-
return modules.flat();
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
/**
|
|
197
|
-
* Prepare the browserlib script that will be loaded in every crawled page.
|
|
198
|
-
*
|
|
199
|
-
* The script exposes a global reffy namespace with the requested modules.
|
|
200
|
-
*
|
|
201
|
-
* The function must be called before any attempt to call `processSpecification`
|
|
202
|
-
* and should only be called once. The `setupBrowser` function takes care of it.
|
|
203
|
-
*
|
|
204
|
-
* @function
|
|
205
|
-
* @private
|
|
206
|
-
*/
|
|
207
|
-
function setupBrowserlib(modules) {
|
|
208
|
-
modules = expandBrowserModules(modules);
|
|
209
|
-
browserlib = 'window.reffy = window.reffy ?? {};\n';
|
|
210
|
-
|
|
211
|
-
if (modules.find(module => module.needsIdToHeadingMap)) {
|
|
212
|
-
browserlib += `
|
|
213
|
-
import mapIdsToHeadings from './map-ids-to-headings.mjs';
|
|
214
|
-
window.reffy.mapIdsToHeadings = mapIdsToHeadings;\n`;
|
|
215
|
-
}
|
|
216
|
-
|
|
217
|
-
browserlib += modules.map(module => `
|
|
218
|
-
import ${module.name} from '${module.href}';
|
|
219
|
-
window.reffy.${module.name} = ${module.name};
|
|
220
|
-
`).join('\n');
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
/**
|
|
225
|
-
* Setup and launch browser instance to use to load and process specifications.
|
|
226
|
-
*
|
|
227
|
-
* The function must be called before any attempt to call `processSpecification`
|
|
228
|
-
* and should only be called once.
|
|
229
|
-
*
|
|
230
|
-
* The function also generates the code that will inject the `reffy` namespace
|
|
231
|
-
* in each processed page.
|
|
232
|
-
*
|
|
233
|
-
* Note: Switch `headless` to `false` to access dev tools and debug processing
|
|
234
|
-
*
|
|
235
|
-
* @function
|
|
236
|
-
* @public
|
|
237
|
-
*/
|
|
238
|
-
async function setupBrowser(modules) {
|
|
239
|
-
// There can be only one crawl running at a time
|
|
240
|
-
await browserClosed;
|
|
241
|
-
browserClosed = new Promise(resolve => resolveBrowserClosed = resolve);
|
|
242
|
-
|
|
243
|
-
// Create browser instance
|
|
244
|
-
// Note: switch "headless" to "false" (and comment out the call to
|
|
245
|
-
// "browser.close()") to access dev tools in debug mode
|
|
246
|
-
browser = await puppeteer.launch({ headless: true });
|
|
247
|
-
setupBrowserlib(modules);
|
|
248
|
-
}
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
/**
|
|
252
|
-
* Close and destroy browser instance.
|
|
253
|
-
*
|
|
254
|
-
* The function should be called once at the end of the processing.
|
|
255
|
-
*
|
|
256
|
-
* @function
|
|
257
|
-
* @public
|
|
258
|
-
*/
|
|
259
|
-
async function teardownBrowser() {
|
|
260
|
-
if (browser) {
|
|
261
|
-
await browser.close();
|
|
262
|
-
browser = null;
|
|
263
|
-
resolveBrowserClosed();
|
|
264
|
-
resolveBrowserClosed = null;
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
/**
|
|
270
|
-
* Load and process the given specification.
|
|
271
|
-
*
|
|
272
|
-
* The method automatically exposes Reffy's library functions in a window.reffy
|
|
273
|
-
* namespace (see setupBrowserlib) so that the callback function can
|
|
274
|
-
* call them directly. Additional callback arguments that would need to be
|
|
275
|
-
* passed to the browser context can be provided through the "args" parameter.
|
|
276
|
-
*
|
|
277
|
-
* A crawl will typically fetch and render hundreds of specs, triggering a lot
|
|
278
|
-
* of network requests. Given that some of these requests (e.g. those on images)
|
|
279
|
-
* are of no interest for the processing, that it is wasteful to fetch the same
|
|
280
|
-
* resource again and again during a crawl, and that it is useful to have an
|
|
281
|
-
* offline mode for debugging purpose, the method will intercept network
|
|
282
|
-
* requests made by the browser, fail those that don't seem needed, and serve
|
|
283
|
-
* requests on resources that have already been fetched from a local file cache
|
|
284
|
-
* (the "cacheRefresh" setting in "config.json" allows to adjust this behavior).
|
|
285
|
-
*
|
|
286
|
-
* This triggers a few hiccups and needs for workarounds though:
|
|
287
|
-
* - Puppeteer's page.setRequestInterception does not play nicely with workers
|
|
288
|
-
* (which Respec typically uses) for the time being, so code uses the Chrome
|
|
289
|
-
* DevTools Protocol (CDP) directly, see:
|
|
290
|
-
* https://github.com/puppeteer/puppeteer/issues/4208
|
|
291
|
-
* - Tampering with network requests means that the loaded page gets
|
|
292
|
-
* automatically flagged as "non secure". That's mostly fine but means that
|
|
293
|
-
* "window.crypto.subtle" is not available and Respec needs that to generate
|
|
294
|
-
* hashes. The code re-creates that method manually.
|
|
295
|
-
* - A few specs send HTTP requests that return "streams". This does not work
|
|
296
|
-
* well with Puppeteer's "networkidle0" option (to detect when a spec is mostly
|
|
297
|
-
* done loading), and that does not work with a file cache approach either.
|
|
298
|
-
* These requests get intercepted.
|
|
299
|
-
*
|
|
300
|
-
* A couple of additional notes:
|
|
301
|
-
* - Requests to CSS stylesheets are not intercepted because Respec dynamically
|
|
302
|
-
* loads a few CSS resources, and intercepting them could perhaps impact the
|
|
303
|
-
* rest of the generation.
|
|
304
|
-
* - SVG images are not intercepted because a couple of specs have a PNG
|
|
305
|
-
* fallback mechanism that, when interception is on, make the browser spin
|
|
306
|
-
* forever, see discussion in: https://github.com/w3c/accelerometer/pull/55
|
|
307
|
-
*
|
|
308
|
-
* Strictly speaking, intercepting request is only needed to be able to use the
|
|
309
|
-
* "networkidle0" option. The whole interception logic could be dropped (and
|
|
310
|
-
* "networkidle2" could be used instead) if it proves too unstable.
|
|
311
|
-
*
|
|
312
|
-
* @function
|
|
313
|
-
* @public
|
|
314
|
-
* @param {Object|String} spec The spec to load. Must either be a URL string or
|
|
315
|
-
* an object with a "url" property. If the object contains an "html" property,
|
|
316
|
-
* the HTML content is loaded instead.
|
|
317
|
-
* @param {function} processFunction Processing function that will be evaluated
|
|
318
|
-
* in the browser context where the spec gets loaded
|
|
319
|
-
* @param {Arrays} args List of arguments to pass to the processing function.
|
|
320
|
-
* These arguments typically make it possible to pass contextual information
|
|
321
|
-
* to the processing function (such as the spec object that describes the
|
|
322
|
-
* spec being processed, or the list of processing modules to run)
|
|
323
|
-
* @param {Object} options Processing options. The "quiet" flag tells the
|
|
324
|
-
* function not to report warnings to the console. The "forceLocalFetch"
|
|
325
|
-
* flag tells the function that all network requests need to be only handled
|
|
326
|
-
* by Node.js's "fetch" function (as opposed to falling back to Puppeteer's
|
|
327
|
-
* network and caching logic), which is useful to keep full control of network
|
|
328
|
-
* requests in tests.
|
|
329
|
-
* @return {Promise} The promise to get the results of the processing function
|
|
330
|
-
*/
|
|
331
|
-
async function processSpecification(spec, processFunction, args, options) {
|
|
332
|
-
spec = (typeof spec === 'string') ? { url: spec } : spec;
|
|
333
|
-
processFunction = processFunction || function () {};
|
|
334
|
-
args = args || [];
|
|
335
|
-
options = options || {};
|
|
336
|
-
|
|
337
|
-
if (!browser) {
|
|
338
|
-
throw new Error('Browser instance not initialized, setupBrowser() must be called before processSpecification().');
|
|
339
|
-
}
|
|
340
|
-
|
|
341
|
-
// Create an abort controller for network requests directly handled by the
|
|
342
|
-
// Node.js code (and not by Puppeteer)
|
|
343
|
-
const abortController = new AbortController();
|
|
344
|
-
|
|
345
|
-
// Inner function that returns a network interception method suitable for
|
|
346
|
-
// a given CDP session.
|
|
347
|
-
function interceptRequest(cdp, controller) {
|
|
348
|
-
return async function ({ requestId, request }) {
|
|
349
|
-
try {
|
|
350
|
-
// Abort network requests to common image formats
|
|
351
|
-
if (/\.(gif|ico|jpg|jpeg|png|ttf|woff)$/i.test(request.url)) {
|
|
352
|
-
await cdp.send('Fetch.failRequest', { requestId, errorReason: 'Failed' });
|
|
353
|
-
return;
|
|
354
|
-
}
|
|
355
|
-
|
|
356
|
-
// Abort network requests that return a "stream", they won't
|
|
357
|
-
// play well with Puppeteer's "networkidle0" option, and our
|
|
358
|
-
// custom "fetch" function does not handle streams in any case
|
|
359
|
-
if (request.url.startsWith('https://drafts.csswg.org/api/drafts/') ||
|
|
360
|
-
request.url.startsWith('https://drafts.css-houdini.org/api/drafts/') ||
|
|
361
|
-
request.url.startsWith('https://drafts.fxtf.org/api/drafts/') ||
|
|
362
|
-
request.url.startsWith('https://api.csswg.org/shepherd/')) {
|
|
363
|
-
await cdp.send('Fetch.failRequest', { requestId, errorReason: 'Failed' });
|
|
364
|
-
return;
|
|
365
|
-
}
|
|
366
|
-
|
|
367
|
-
// The request needs to be intercepted, either because it
|
|
368
|
-
// targets one of the local script files, or because we would
|
|
369
|
-
// like to use our local cache to avoid sending network requests
|
|
370
|
-
// when possible.
|
|
371
|
-
//console.log(`intercept ${request.url}`);
|
|
372
|
-
const reffyPath = '/reffy/scripts/';
|
|
373
|
-
const webidl2Path = '/node_modules/webidl2/';
|
|
374
|
-
if (request.url.includes(reffyPath) || request.url.includes(webidl2Path)) {
|
|
375
|
-
let body;
|
|
376
|
-
if (request.url.endsWith('reffy.mjs')) {
|
|
377
|
-
body = Buffer.from(browserlib);
|
|
378
|
-
}
|
|
379
|
-
else if (request.url.includes(webidl2Path)) {
|
|
380
|
-
const file = path.resolve(modulesFolder, 'webidl2',
|
|
381
|
-
request.url.substring(request.url.indexOf(webidl2Path) + webidl2Path.length));
|
|
382
|
-
body = await fs.readFile(file);
|
|
383
|
-
}
|
|
384
|
-
else {
|
|
385
|
-
// The "__" folders are just a means to resolve
|
|
386
|
-
// relative paths that are higher than the "browserlib"
|
|
387
|
-
// folder on the storage drive
|
|
388
|
-
const requestPath = request.url.substring(request.url.indexOf(reffyPath) + reffyPath.length);
|
|
389
|
-
let depth = requestPath.lastIndexOf('__/') / 3;
|
|
390
|
-
const filename = requestPath.substring(requestPath.lastIndexOf('__/') + 3);
|
|
391
|
-
let filePath = path.resolve(__dirname, '..', 'browserlib');
|
|
392
|
-
while (depth < maxPathDepth - 1) {
|
|
393
|
-
filePath = path.resolve(filePath, '..');
|
|
394
|
-
depth += 1;
|
|
395
|
-
}
|
|
396
|
-
const file = path.resolve(filePath, filename);
|
|
397
|
-
body = await fs.readFile(file);
|
|
398
|
-
}
|
|
399
|
-
await cdp.send('Fetch.fulfillRequest', {
|
|
400
|
-
requestId,
|
|
401
|
-
responseCode: 200,
|
|
402
|
-
responseHeaders: [{ name: 'Content-Type', value: 'application/javascript' }],
|
|
403
|
-
body: body.toString('base64')
|
|
404
|
-
});
|
|
405
|
-
}
|
|
406
|
-
else {
|
|
407
|
-
if ((request.method !== 'GET') ||
|
|
408
|
-
(!request.url.startsWith('http:') && !request.url.startsWith('https:'))) {
|
|
409
|
-
await cdp.send('Fetch.continueRequest', { requestId });
|
|
410
|
-
return;
|
|
411
|
-
}
|
|
412
|
-
|
|
413
|
-
const response = await fetch(request.url, { signal: controller.signal });
|
|
414
|
-
const body = await response.buffer();
|
|
415
|
-
await cdp.send('Fetch.fulfillRequest', {
|
|
416
|
-
requestId,
|
|
417
|
-
responseCode: response.status,
|
|
418
|
-
responseHeaders: Object.keys(response.headers.raw()).map(header => {
|
|
419
|
-
return {
|
|
420
|
-
name: header,
|
|
421
|
-
value: response.headers.raw()[header].join(',')
|
|
422
|
-
};
|
|
423
|
-
}),
|
|
424
|
-
body: body.toString('base64')
|
|
425
|
-
});
|
|
426
|
-
}
|
|
427
|
-
//console.log(`intercept ${request.url} - done`);
|
|
428
|
-
}
|
|
429
|
-
catch (err) {
|
|
430
|
-
if (controller.signal.aborted) {
|
|
431
|
-
// All is normal, processing was over, page and CDP session
|
|
432
|
-
// have been closed, and network requests have been aborted
|
|
433
|
-
// console.log(`intercept ${request.url} - aborted`);
|
|
434
|
-
return;
|
|
435
|
-
}
|
|
436
|
-
|
|
437
|
-
// Fetch from file cache failed somehow
|
|
438
|
-
// Let Puppeteer handle the request as fallback unless
|
|
439
|
-
// calling function asked us not to do that
|
|
440
|
-
if (options.forceLocalFetch) {
|
|
441
|
-
options.quiet ?? console.warn(`[warn] Network request for ${request.url} failed`, err);
|
|
442
|
-
await cdp.send('Fetch.failRequest', { requestId, errorReason: 'Failed' });
|
|
443
|
-
}
|
|
444
|
-
else {
|
|
445
|
-
options.quiet ?? console.warn(`[warn] Fall back to regular network request for ${request.url}`, err);
|
|
446
|
-
try {
|
|
447
|
-
await cdp.send('Fetch.continueRequest', { requestId });
|
|
448
|
-
}
|
|
449
|
-
catch (err) {
|
|
450
|
-
if (!controller.signal.aborted) {
|
|
451
|
-
options.quiet ?? console.warn(`[warn] Fall back to regular network request for ${request.url} failed`, err);
|
|
452
|
-
}
|
|
453
|
-
}
|
|
454
|
-
}
|
|
455
|
-
}
|
|
456
|
-
}
|
|
457
|
-
}
|
|
458
|
-
|
|
459
|
-
try {
|
|
460
|
-
const page = await browser.newPage();
|
|
461
|
-
|
|
462
|
-
// Disable cache if caller wants to handle all network requests
|
|
463
|
-
await page.setCacheEnabled(!options.forceLocalFetch);
|
|
464
|
-
|
|
465
|
-
// Intercept all network requests to use our own version of "fetch"
|
|
466
|
-
// that makes use of the local file cache.
|
|
467
|
-
const cdp = await page.target().createCDPSession();
|
|
468
|
-
await cdp.send('Fetch.enable');
|
|
469
|
-
cdp.on('Fetch.requestPaused', interceptRequest(cdp, abortController));
|
|
470
|
-
|
|
471
|
-
// Quick and dirty workaround to re-create the "window.crypto.digest"
|
|
472
|
-
// function that Respec needs (context is seen as unsecure because we're
|
|
473
|
-
// tampering with network requests)
|
|
474
|
-
await page.exposeFunction('hashdigest', (algorithm, buffer) => {
|
|
475
|
-
return crypto.createHash(algorithm).update(Buffer.from(Object.values(buffer))).digest();
|
|
476
|
-
});
|
|
477
|
-
await page.evaluateOnNewDocument(() => {
|
|
478
|
-
window.crypto.subtle = {
|
|
479
|
-
digest: function (algorithm, buffer) {
|
|
480
|
-
const res = window.hashdigest('sha1', buffer);
|
|
481
|
-
return res.then(buf => {
|
|
482
|
-
return Uint8Array.from(buf.data);
|
|
483
|
-
});
|
|
484
|
-
}
|
|
485
|
-
};
|
|
486
|
-
});
|
|
487
|
-
|
|
488
|
-
// Common loading option to give the browser enough time to load large
|
|
489
|
-
// specs, and to consider navigation done when there haven't been
|
|
490
|
-
// network connections in the past 500ms. This should be enough to
|
|
491
|
-
// handle "redirection" through JS or meta refresh (which would not
|
|
492
|
-
// have time to run if we used "load").
|
|
493
|
-
const loadOptions = {
|
|
494
|
-
timeout: 120000,
|
|
495
|
-
waitUntil: 'networkidle0'
|
|
496
|
-
};
|
|
497
|
-
|
|
498
|
-
// Load the page
|
|
499
|
-
// (note HTTP status is 0 when `file://` URLs are loaded)
|
|
500
|
-
if (spec.html) {
|
|
501
|
-
await page.setContent(spec.html, loadOptions);
|
|
502
|
-
}
|
|
503
|
-
else {
|
|
504
|
-
const result = await page.goto(spec.url, loadOptions);
|
|
505
|
-
if ((result.status() !== 200) && (!spec.url.startsWith('file://') || (result.status() !== 0))) {
|
|
506
|
-
throw new Error(`Loading ${spec.url} triggered HTTP status ${result.status()}`);
|
|
507
|
-
}
|
|
508
|
-
}
|
|
509
|
-
|
|
510
|
-
// Handle multi-page specs
|
|
511
|
-
const pageUrls = spec.pages || [];
|
|
512
|
-
|
|
513
|
-
if (pageUrls.length > 0) {
|
|
514
|
-
const pages = [];
|
|
515
|
-
for (const url of pageUrls) {
|
|
516
|
-
const subAbort = new AbortController();
|
|
517
|
-
const subPage = await browser.newPage();
|
|
518
|
-
await subPage.setCacheEnabled(!options.forceLocalFetch);
|
|
519
|
-
const subCdp = await subPage.target().createCDPSession();
|
|
520
|
-
await subCdp.send('Fetch.enable');
|
|
521
|
-
subCdp.on('Fetch.requestPaused', interceptRequest(subCdp, subAbort));
|
|
522
|
-
try {
|
|
523
|
-
// (Note HTTP status is 0 when `file://` URLs are loaded)
|
|
524
|
-
const subresult = await subPage.goto(url, loadOptions);
|
|
525
|
-
if ((subresult.status() !== 200) && (!url.startsWith('file://') || (subresult.status() !== 0))) {
|
|
526
|
-
throw new Error(`Loading ${spec.url} triggered HTTP status ${result.status()}`);
|
|
527
|
-
}
|
|
528
|
-
const html = await subPage.evaluate(() => {
|
|
529
|
-
return document.body.outerHTML
|
|
530
|
-
.replace(/<body/, '<section')
|
|
531
|
-
.replace(/<\/body/, '</section');
|
|
532
|
-
});
|
|
533
|
-
pages.push({ url, html });
|
|
534
|
-
}
|
|
535
|
-
finally {
|
|
536
|
-
subAbort.abort();
|
|
537
|
-
await subCdp.detach();
|
|
538
|
-
await subPage.close();
|
|
539
|
-
}
|
|
540
|
-
}
|
|
541
|
-
await page.evaluate(pages => {
|
|
542
|
-
for (const subPage of pages) {
|
|
543
|
-
const section = document.createElement('section');
|
|
544
|
-
section.setAttribute('data-reffy-page', subPage.url);
|
|
545
|
-
section.innerHTML = subPage.html;
|
|
546
|
-
document.body.appendChild(section);
|
|
547
|
-
}
|
|
548
|
-
}, pages);
|
|
549
|
-
}
|
|
550
|
-
|
|
551
|
-
// Wait until the generation of the spec is completely over
|
|
552
|
-
await page.evaluate(async () => {
|
|
553
|
-
// Detect draft CSS server hiccups as done in browser-specs:
|
|
554
|
-
// https://github.com/w3c/browser-specs/blob/b31fc0b03ba67a19162883afc30e01fcec3c600d/src/fetch-info.js#L292
|
|
555
|
-
const title = (window.document.querySelector('h1')?.textContent || '')
|
|
556
|
-
.replace(/\n/g, '').trim();
|
|
557
|
-
if (title.startsWith('Index of ')) {
|
|
558
|
-
throw new Error(`CSS server issue detected`);
|
|
559
|
-
}
|
|
560
|
-
|
|
561
|
-
const usesRespec = (window.respecConfig || window.eval('typeof respecConfig !== "undefined"')) &&
|
|
562
|
-
window.document.head.querySelector("script[src*='respec']");
|
|
563
|
-
|
|
564
|
-
function sleep(ms) {
|
|
565
|
-
return new Promise(resolve => setTimeout(resolve, ms));
|
|
566
|
-
}
|
|
567
|
-
|
|
568
|
-
async function isReady(counter) {
|
|
569
|
-
counter = counter || 0;
|
|
570
|
-
if (counter > 60) {
|
|
571
|
-
throw new Error('Respec generation took too long');
|
|
572
|
-
}
|
|
573
|
-
if (window.document.respec?.ready) {
|
|
574
|
-
await window.document.respec.ready;
|
|
575
|
-
}
|
|
576
|
-
else if (usesRespec) {
|
|
577
|
-
await sleep(1000);
|
|
578
|
-
await isReady(counter + 1);
|
|
579
|
-
}
|
|
580
|
-
}
|
|
581
|
-
|
|
582
|
-
await isReady();
|
|
583
|
-
});
|
|
584
|
-
|
|
585
|
-
// Capture and report Reffy's browserlib warnings
|
|
586
|
-
page.on('console', msg => {
|
|
587
|
-
const text = msg.text();
|
|
588
|
-
if (text.startsWith('[reffy] ')) {
|
|
589
|
-
options.quiet ?? console.warn(spec.url, `[${msg.type()}]`, msg.text().substr('[reffy] '.length));
|
|
590
|
-
}
|
|
591
|
-
});
|
|
592
|
-
|
|
593
|
-
// Capture and report when page throws an error
|
|
594
|
-
page.on('pageerror', err => {
|
|
595
|
-
options.quiet ?? console.warn(err);
|
|
596
|
-
});
|
|
597
|
-
|
|
598
|
-
// Expose additional functions defined in src/browserlib/ to the
|
|
599
|
-
// browser context, under a window.reffy namespace, so that processing
|
|
600
|
-
// script may call them. The script is an ES6 module and needs to be
|
|
601
|
-
// loaded as such.
|
|
602
|
-
// Note that we're using a fake relative URL on purpose. In practice,
|
|
603
|
-
// the request will be processed by "interceptRequest", which will
|
|
604
|
-
// respond with the contents of the script file. Also, there are
|
|
605
|
-
// multiple path levels in that fake URL on purpose as well, because
|
|
606
|
-
// scripts import the WebIDL2.js library with a URL like
|
|
607
|
-
// "../../node_modules/[...]" and may import other scripts that are
|
|
608
|
-
// higher in the folder tree.
|
|
609
|
-
await page.addScriptTag({
|
|
610
|
-
url: `reffy/scripts/${range(maxPathDepth).map(n => '__').join('/')}/reffy.mjs`,
|
|
611
|
-
type: 'module'
|
|
612
|
-
});
|
|
613
|
-
|
|
614
|
-
// Run the processFunction method in the browser context
|
|
615
|
-
const results = await page.evaluate(processFunction, ...args);
|
|
616
|
-
|
|
617
|
-
// Pending network requests may still be in the queue, flag the page
|
|
618
|
-
// as closed not to send commands on a CDP session that's no longer
|
|
619
|
-
// attached to anything
|
|
620
|
-
abortController.abort();
|
|
621
|
-
|
|
622
|
-
// Close CDP session and page
|
|
623
|
-
// Note that gets done no matter what when browser.close() gets called.
|
|
624
|
-
await cdp.detach();
|
|
625
|
-
await page.close();
|
|
626
|
-
|
|
627
|
-
return results;
|
|
628
|
-
}
|
|
629
|
-
finally {
|
|
630
|
-
// Signal abortion again (in case an exception was thrown)
|
|
631
|
-
abortController.abort();
|
|
632
|
-
}
|
|
633
|
-
}
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
/**
|
|
637
|
-
* Enrich the spec description with alternative URLs (versions and equivalents)
|
|
638
|
-
*
|
|
639
|
-
* TODO: The list used to contain published versions of TR specs retrieved from
|
|
640
|
-
* the W3C API. They are useful to improve the relevance of reported anomalies.
|
|
641
|
-
*
|
|
642
|
-
* @function
|
|
643
|
-
* @param {Object} spec Spec description structure (only the URL is useful)
|
|
644
|
-
* @return {Object} The same structure, enriched with the URL of the editor's
|
|
645
|
-
* draft when one is found
|
|
646
|
-
*/
|
|
647
|
-
function completeWithAlternativeUrls(spec) {
|
|
648
|
-
spec.versions = new Set();
|
|
649
|
-
spec.versions.add(spec.url);
|
|
650
|
-
if (spec.release) {
|
|
651
|
-
spec.versions.add(spec.release.url);
|
|
652
|
-
}
|
|
653
|
-
if (spec.nightly) {
|
|
654
|
-
spec.versions.add(spec.nightly.url);
|
|
655
|
-
}
|
|
656
|
-
if (specEquivalents[spec.url]) {
|
|
657
|
-
spec.versions = new Set([
|
|
658
|
-
...spec.versions,
|
|
659
|
-
...specEquivalents[spec.url]
|
|
660
|
-
]);
|
|
661
|
-
}
|
|
662
|
-
spec.versions = [...spec.versions];
|
|
663
|
-
return spec;
|
|
664
|
-
}
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
/**
|
|
668
|
-
* Returns true when the given spec is the latest "fullest" level of that spec
|
|
669
|
-
* in the given list of specs that passes the given predicate.
|
|
670
|
-
*
|
|
671
|
-
* "Fullest" means "not a delta spec, unless that is the only level that passes
|
|
672
|
-
* the predicate".
|
|
673
|
-
*
|
|
674
|
-
* @function
|
|
675
|
-
* @public
|
|
676
|
-
* @param {Object} spec Spec to check
|
|
677
|
-
* @param {Array(Object)} list List of specs (must include the spec to check)
|
|
678
|
-
* @param {function} predicate Predicate function that the spec must pass. Must
|
|
679
|
-
* be a function that takes a spec as argument and returns a boolean.
|
|
680
|
-
* @return {Boolean} true if the spec is the latest "fullest" level in the list
|
|
681
|
-
* that passes the predicate.
|
|
682
|
-
*/
|
|
683
|
-
function isLatestLevelThatPasses(spec, list, predicate) {
|
|
684
|
-
predicate = predicate || (_ => true);
|
|
685
|
-
if (!predicate(spec)) {
|
|
686
|
-
return false;
|
|
687
|
-
}
|
|
688
|
-
if (spec.seriesComposition === 'delta') {
|
|
689
|
-
while (spec.seriesPrevious) {
|
|
690
|
-
spec = list.find(s => s.shortname === spec.seriesPrevious);
|
|
691
|
-
if (!spec) {
|
|
692
|
-
break;
|
|
693
|
-
}
|
|
694
|
-
if ((spec.seriesComposition === 'full') && predicate(spec)) {
|
|
695
|
-
return false;
|
|
696
|
-
}
|
|
697
|
-
}
|
|
698
|
-
return true;
|
|
699
|
-
}
|
|
700
|
-
while (spec.seriesNext) {
|
|
701
|
-
if (!spec) {
|
|
702
|
-
break;
|
|
703
|
-
}
|
|
704
|
-
spec = list.find(s => s.shortname === spec.seriesNext);
|
|
705
|
-
if ((spec.seriesComposition === 'full') && predicate(spec)) {
|
|
706
|
-
return false;
|
|
707
|
-
}
|
|
708
|
-
}
|
|
709
|
-
return true;
|
|
710
|
-
}
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
/**
|
|
714
|
-
* Takes the results of a crawl for a given spec and expands it to include the
|
|
715
|
-
* contents of referenced files.
|
|
716
|
-
*
|
|
717
|
-
* The function handles both files and HTTPS resources, using either filesystem
|
|
718
|
-
* functions (for files) or fetch (for HTTPS resources).
|
|
719
|
-
*
|
|
720
|
-
* Note the spec object is expanded in place.
|
|
721
|
-
*
|
|
722
|
-
* @function
|
|
723
|
-
* @public
|
|
724
|
-
* @param {Object} spec Spec crawl result that needs to be expanded
|
|
725
|
-
* @param {string} baseFolder The base folder that contains the crawl file, or
|
|
726
|
-
* the base HTTPS URI to resolve relative links in the crawl object.
|
|
727
|
-
* @param {Array(string)} properties An explicit list of properties to expand
|
|
728
|
-
* (no value means "expand all possible properties")
|
|
729
|
-
* @return {Promise(object)} The promise to get an expanded crawl object that
|
|
730
|
-
* contains the contents of referenced files and no longer references external
|
|
731
|
-
* files (for the requested properties)
|
|
732
|
-
*/
|
|
733
|
-
async function expandSpecResult(spec, baseFolder, properties) {
|
|
734
|
-
baseFolder = baseFolder || '';
|
|
735
|
-
await Promise.all(Object.keys(spec).map(async property => {
|
|
736
|
-
// Only consider properties explicitly requested
|
|
737
|
-
if (properties && !properties.includes(property)) {
|
|
738
|
-
return;
|
|
739
|
-
}
|
|
740
|
-
|
|
741
|
-
// Only consider properties that link to an extract, i.e. an IDL
|
|
742
|
-
// or JSON file in subfolder.
|
|
743
|
-
if (!spec[property] ||
|
|
744
|
-
(typeof spec[property] !== 'string') ||
|
|
745
|
-
!spec[property].match(/^[^\/]+\/[^\/]+\.(json|idl)$/)) {
|
|
746
|
-
return;
|
|
747
|
-
}
|
|
748
|
-
let contents = null;
|
|
749
|
-
if (baseFolder.startsWith('https:')) {
|
|
750
|
-
const url = (new URL(spec[property], baseFolder)).toString();
|
|
751
|
-
const response = await fetch(url, { nolog: true });
|
|
752
|
-
contents = await response.text();
|
|
753
|
-
}
|
|
754
|
-
else {
|
|
755
|
-
const filename = path.join(baseFolder, spec[property]);
|
|
756
|
-
contents = await fs.readFile(filename, 'utf8');
|
|
757
|
-
}
|
|
758
|
-
if (spec[property].endsWith('.json')) {
|
|
759
|
-
contents = JSON.parse(contents);
|
|
760
|
-
}
|
|
761
|
-
if (property === 'css') {
|
|
762
|
-
// Special case for CSS where the "css" level does not exist
|
|
763
|
-
// in the generated files
|
|
764
|
-
const css = Object.assign({}, contents);
|
|
765
|
-
delete css.spec;
|
|
766
|
-
spec[property] = css;
|
|
767
|
-
}
|
|
768
|
-
else if (property === 'idl') {
|
|
769
|
-
// Special case for raw IDL extracts, which are text extracts.
|
|
770
|
-
// Also drop header that may have been added when extract was
|
|
771
|
-
// serialized.
|
|
772
|
-
if (contents.startsWith('// GENERATED CONTENT - DO NOT EDIT')) {
|
|
773
|
-
const endOfHeader = contents.indexOf('\n\n');
|
|
774
|
-
contents = contents.substring(endOfHeader + 2);
|
|
775
|
-
}
|
|
776
|
-
spec.idl = contents;
|
|
777
|
-
}
|
|
778
|
-
else {
|
|
779
|
-
spec[property] = contents[property];
|
|
780
|
-
}
|
|
781
|
-
}));
|
|
782
|
-
return spec;
|
|
783
|
-
}
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
/**
|
|
787
|
-
* Takes the results of a crawl (typically the contents of the index.json file)
|
|
788
|
-
* and expands it to include the contents of all referenced files.
|
|
789
|
-
*
|
|
790
|
-
* The function handles both files and HTTPS resources, using either filesystem
|
|
791
|
-
* functions (for files) or fetch (for HTTPS resources).
|
|
792
|
-
*
|
|
793
|
-
* Note the crawl object is expanded in place.
|
|
794
|
-
*
|
|
795
|
-
* @function
|
|
796
|
-
* @public
|
|
797
|
-
* @param {Object} crawl Crawl index object that needs to be expanded
|
|
798
|
-
* @param {string} baseFolder The base folder that contains the crawl file, or
|
|
799
|
-
* the base HTTPS URI to resolve relative links in the crawl object.
|
|
800
|
-
* @param {Array(string)} properties An explicit list of properties to expand
|
|
801
|
-
* (no value means "expand all possible properties")
|
|
802
|
-
* @return {Promise(object)} The promise to get an expanded crawl object that
|
|
803
|
-
* contains the entire crawl report (and no longer references external files)
|
|
804
|
-
*/
|
|
805
|
-
async function expandCrawlResult(crawl, baseFolder, properties) {
|
|
806
|
-
baseFolder = baseFolder || '';
|
|
807
|
-
crawl.results = await Promise.all(
|
|
808
|
-
crawl.results.map(spec => expandSpecResult(spec, baseFolder, properties))
|
|
809
|
-
);
|
|
810
|
-
return crawl;
|
|
811
|
-
}
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
/**
|
|
815
|
-
* Retrieves the list of IDL attribute names that the CSS property generates
|
|
816
|
-
* per the CSSOM spec, see:
|
|
817
|
-
* https://drafts.csswg.org/cssom/#ref-for-css-property-to-idl-attribute
|
|
818
|
-
*
|
|
819
|
-
* @function
|
|
820
|
-
* @param {String} property CSS property name
|
|
821
|
-
* @return {Array(String)} An array of IDL attribute names, dashed attribute
|
|
822
|
-
* first, then camel-cased attribute if different, then webkit-cased attribute
|
|
823
|
-
* name if needed
|
|
824
|
-
*/
|
|
825
|
-
function getGeneratedIDLNamesByCSSProperty(property) {
|
|
826
|
-
// Converts a CSS property to an IDL attribute name per the CSSOM spec:
|
|
827
|
-
// https://drafts.csswg.org/cssom/#css-property-to-idl-attribute
|
|
828
|
-
function cssPropertyToIDLAttribute(property, lowercaseFirst) {
|
|
829
|
-
let output = '';
|
|
830
|
-
let uppercaseNext = false;
|
|
831
|
-
if (lowercaseFirst) {
|
|
832
|
-
property = property.substr(1);
|
|
833
|
-
}
|
|
834
|
-
for (const c of property) {
|
|
835
|
-
if (c === '-') {
|
|
836
|
-
uppercaseNext = true;
|
|
837
|
-
} else if (uppercaseNext) {
|
|
838
|
-
uppercaseNext = false;
|
|
839
|
-
output += c.toUpperCase();
|
|
840
|
-
} else {
|
|
841
|
-
output += c;
|
|
842
|
-
}
|
|
843
|
-
}
|
|
844
|
-
return output;
|
|
845
|
-
}
|
|
846
|
-
|
|
847
|
-
// Start with dashed attribute
|
|
848
|
-
const res = [property];
|
|
849
|
-
|
|
850
|
-
// Add camel-cased attribute if different
|
|
851
|
-
const camelCased = cssPropertyToIDLAttribute(property, false);
|
|
852
|
-
if (camelCased !== property) {
|
|
853
|
-
res.push(camelCased);
|
|
854
|
-
}
|
|
855
|
-
|
|
856
|
-
// Add webkit-cased attribute if needed
|
|
857
|
-
if (property.startsWith('-webkit-')) {
|
|
858
|
-
res.push(cssPropertyToIDLAttribute(property, true));
|
|
859
|
-
}
|
|
860
|
-
|
|
861
|
-
return res;
|
|
862
|
-
};
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
/**
|
|
866
|
-
* Creates the given folder if it does not exist yet.
|
|
867
|
-
*
|
|
868
|
-
* @function
|
|
869
|
-
* @public
|
|
870
|
-
* @param {String} folder Path to folder to create
|
|
871
|
-
* (from current working directory)
|
|
872
|
-
*/
|
|
873
|
-
async function createFolderIfNeeded(folder) {
|
|
874
|
-
try {
|
|
875
|
-
await fs.mkdir(folder);
|
|
876
|
-
}
|
|
877
|
-
catch (err) {
|
|
878
|
-
if (err.code !== 'EEXIST') {
|
|
879
|
-
throw err;
|
|
880
|
-
}
|
|
881
|
-
}
|
|
882
|
-
}
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
module.exports = {
|
|
886
|
-
fetch,
|
|
887
|
-
requireFromWorkingDirectory,
|
|
888
|
-
expandBrowserModules,
|
|
889
|
-
setupBrowser,
|
|
890
|
-
teardownBrowser,
|
|
891
|
-
processSpecification,
|
|
892
|
-
completeWithAlternativeUrls,
|
|
893
|
-
isLatestLevelThatPasses,
|
|
894
|
-
expandCrawlResult,
|
|
895
|
-
expandSpecResult,
|
|
896
|
-
getGeneratedIDLNamesByCSSProperty,
|
|
897
|
-
createFolderIfNeeded
|
|
898
|
-
};
|
|
1
|
+
/**
|
|
2
|
+
* A bunch of utility functions common to multiple scripts
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
const fs = require('fs').promises;
|
|
6
|
+
const { existsSync } = require('fs');
|
|
7
|
+
const path = require('path');
|
|
8
|
+
const puppeteer = require('puppeteer');
|
|
9
|
+
const crypto = require('crypto');
|
|
10
|
+
const { AbortController } = require('abortcontroller-polyfill/dist/cjs-ponyfill');
|
|
11
|
+
const fetch = require('./fetch');
|
|
12
|
+
const specEquivalents = require('../specs/spec-equivalents.json');
|
|
13
|
+
|
|
14
|
+
const reffyModules = require('../browserlib/reffy.json');
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Maximum depth difference supported between Reffy's install path and custom
|
|
19
|
+
* modules that may be provided on the command-line
|
|
20
|
+
*
|
|
21
|
+
* TODO: Find a way to get right of that, there should be no limit
|
|
22
|
+
*/
|
|
23
|
+
const maxPathDepth = 20;
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Returns a range array from 0 to the number provided (not included)
|
|
28
|
+
*/
|
|
29
|
+
const range = n => Array.from(Array(n).keys());
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Shortcut that returns a property extractor iterator
|
|
34
|
+
*/
|
|
35
|
+
const prop = p => x => x[p];
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Wrapper around the "require" function to require files relative to the
|
|
40
|
+
* current working directory (CWD), instead of relative to the current JS
|
|
41
|
+
* file.
|
|
42
|
+
*
|
|
43
|
+
* This is typically needed to be able to use "require" to load JSON config
|
|
44
|
+
* files provided as command-line arguments.
|
|
45
|
+
*
|
|
46
|
+
* @function
|
|
47
|
+
* @param {String} filename The path to the file to require
|
|
48
|
+
* @return {Object} The result of requiring the file relative to the current
|
|
49
|
+
* working directory.
|
|
50
|
+
*/
|
|
51
|
+
function requireFromWorkingDirectory(filename) {
|
|
52
|
+
try {
|
|
53
|
+
return require(path.resolve(filename));
|
|
54
|
+
}
|
|
55
|
+
catch (err) {
|
|
56
|
+
return null;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Determine the path to the "node_modules" folder to resolve relative links
|
|
63
|
+
* in the ES6 browser lib modules. The path depends on whether Reffy is run
|
|
64
|
+
* directly, or installed as a library.
|
|
65
|
+
*
|
|
66
|
+
* @function
|
|
67
|
+
* @return {String} Path to the node_modules folder.
|
|
68
|
+
*/
|
|
69
|
+
function getModulesFolder() {
|
|
70
|
+
const rootFolder = path.resolve(__dirname, '../..');
|
|
71
|
+
let folder = path.resolve(rootFolder, 'node_modules');
|
|
72
|
+
if (existsSync(folder)) {
|
|
73
|
+
return folder;
|
|
74
|
+
}
|
|
75
|
+
folder = path.resolve(rootFolder, '..');
|
|
76
|
+
return folder;
|
|
77
|
+
}
|
|
78
|
+
const modulesFolder = getModulesFolder();
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Puppeteer browser instance used to load and process specifications
|
|
83
|
+
*/
|
|
84
|
+
let browser = null;
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Promise resolved when there is no running instance of Puppeteer. This allows
|
|
88
|
+
* to serialize calls to setupBrowser (and thus to crawlList and crawlSpecs in
|
|
89
|
+
* specs-crawler.js)
|
|
90
|
+
*/
|
|
91
|
+
let browserClosed = Promise.resolve();
|
|
92
|
+
let resolveBrowserClosed = null;
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* The browser JS library that will be loaded onto every crawled page
|
|
96
|
+
*/
|
|
97
|
+
let browserlib = null;
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Expand list of browser modules with right set of descriptive properties
|
|
102
|
+
*
|
|
103
|
+
* User may specify a browser module as:
|
|
104
|
+
* - a name which must match one of the existing modules in browserlib
|
|
105
|
+
* - a relative path to an .mjs file which must exist
|
|
106
|
+
* - an object with an "href" property that is a relative path to an .mjs file
|
|
107
|
+
* which must exist
|
|
108
|
+
*
|
|
109
|
+
* Relative paths provided by the user are interpreted as relative to the
|
|
110
|
+
* current working directory, and converted to be relative to the browserlib
|
|
111
|
+
* directory.
|
|
112
|
+
*
|
|
113
|
+
* @function
|
|
114
|
+
* @public
|
|
115
|
+
* @return {Array(Object)} List of modules with an href, name and property keys
|
|
116
|
+
*/
|
|
117
|
+
function expandBrowserModules(modules) {
|
|
118
|
+
// Helper function to create a camelCase name out of a module path
|
|
119
|
+
function getCamelCaseName(href) {
|
|
120
|
+
const filename = href.replace(/([^\/\\]+)\.mjs$/, '$1');
|
|
121
|
+
const nameParts = filename.split('-');
|
|
122
|
+
let name;
|
|
123
|
+
let namePart;
|
|
124
|
+
while (namePart = nameParts.shift()) {
|
|
125
|
+
namePart = namePart.replace(/\W/g, '');
|
|
126
|
+
if (name) {
|
|
127
|
+
name += namePart.substring(0, 1).toUpperCase() + namePart.substring(1);
|
|
128
|
+
}
|
|
129
|
+
else {
|
|
130
|
+
name = namePart;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
return name;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
const browserlibPath = path.resolve(__dirname, '..', 'browserlib');
|
|
137
|
+
if (!modules) {
|
|
138
|
+
return reffyModules.map(mod => Object.assign({
|
|
139
|
+
name: getCamelCaseName(mod.href),
|
|
140
|
+
expanded: true
|
|
141
|
+
}, mod));
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
modules = modules.map(mod => {
|
|
145
|
+
if (typeof mod === 'string') {
|
|
146
|
+
if (mod.endsWith('.mjs')) {
|
|
147
|
+
const name = getCamelCaseName(mod);
|
|
148
|
+
return {
|
|
149
|
+
href: path.relative(browserlibPath, path.join(process.cwd(), mod)).replace(/\\/g, '/'),
|
|
150
|
+
name,
|
|
151
|
+
property: name,
|
|
152
|
+
expanded: true
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
else if (mod === 'core') {
|
|
156
|
+
return reffyModules.map(mod => Object.assign({
|
|
157
|
+
name: getCamelCaseName(mod.href),
|
|
158
|
+
expanded: true
|
|
159
|
+
}, mod));
|
|
160
|
+
}
|
|
161
|
+
else {
|
|
162
|
+
const res = reffyModules.find(m => m.href === mod ||
|
|
163
|
+
getCamelCaseName(m.href) === mod || m.property === mod);
|
|
164
|
+
if (!res) {
|
|
165
|
+
throw new Error(`Unknown browserlib module ${mod}`);
|
|
166
|
+
}
|
|
167
|
+
return Object.assign({
|
|
168
|
+
name: getCamelCaseName(res.href),
|
|
169
|
+
expanded: true
|
|
170
|
+
}, res);
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
else if (mod.expanded) {
|
|
174
|
+
return mod;
|
|
175
|
+
}
|
|
176
|
+
else {
|
|
177
|
+
if (!mod.href) {
|
|
178
|
+
throw new Error('Browserlib module does not have an "href" property');
|
|
179
|
+
}
|
|
180
|
+
mod.href = path.relative(browserlibPath, path.join(process.cwd(), mod.href)).replace(/\\/g, '/');
|
|
181
|
+
if (!mod.name) {
|
|
182
|
+
mod.name = getCamelCaseName(mod.href);
|
|
183
|
+
}
|
|
184
|
+
if (!mod.property) {
|
|
185
|
+
mod.property = mod.name;
|
|
186
|
+
}
|
|
187
|
+
mod.expanded = true;
|
|
188
|
+
return mod;
|
|
189
|
+
}
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
return modules.flat();
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Prepare the browserlib script that will be loaded in every crawled page.
|
|
198
|
+
*
|
|
199
|
+
* The script exposes a global reffy namespace with the requested modules.
|
|
200
|
+
*
|
|
201
|
+
* The function must be called before any attempt to call `processSpecification`
|
|
202
|
+
* and should only be called once. The `setupBrowser` function takes care of it.
|
|
203
|
+
*
|
|
204
|
+
* @function
|
|
205
|
+
* @private
|
|
206
|
+
*/
|
|
207
|
+
function setupBrowserlib(modules) {
|
|
208
|
+
modules = expandBrowserModules(modules);
|
|
209
|
+
browserlib = 'window.reffy = window.reffy ?? {};\n';
|
|
210
|
+
|
|
211
|
+
if (modules.find(module => module.needsIdToHeadingMap)) {
|
|
212
|
+
browserlib += `
|
|
213
|
+
import mapIdsToHeadings from './map-ids-to-headings.mjs';
|
|
214
|
+
window.reffy.mapIdsToHeadings = mapIdsToHeadings;\n`;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
browserlib += modules.map(module => `
|
|
218
|
+
import ${module.name} from '${module.href}';
|
|
219
|
+
window.reffy.${module.name} = ${module.name};
|
|
220
|
+
`).join('\n');
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Setup and launch browser instance to use to load and process specifications.
|
|
226
|
+
*
|
|
227
|
+
* The function must be called before any attempt to call `processSpecification`
|
|
228
|
+
* and should only be called once.
|
|
229
|
+
*
|
|
230
|
+
* The function also generates the code that will inject the `reffy` namespace
|
|
231
|
+
* in each processed page.
|
|
232
|
+
*
|
|
233
|
+
* Note: Switch `headless` to `false` to access dev tools and debug processing
|
|
234
|
+
*
|
|
235
|
+
* @function
|
|
236
|
+
* @public
|
|
237
|
+
*/
|
|
238
|
+
async function setupBrowser(modules) {
|
|
239
|
+
// There can be only one crawl running at a time
|
|
240
|
+
await browserClosed;
|
|
241
|
+
browserClosed = new Promise(resolve => resolveBrowserClosed = resolve);
|
|
242
|
+
|
|
243
|
+
// Create browser instance
|
|
244
|
+
// Note: switch "headless" to "false" (and comment out the call to
|
|
245
|
+
// "browser.close()") to access dev tools in debug mode
|
|
246
|
+
browser = await puppeteer.launch({ headless: true });
|
|
247
|
+
setupBrowserlib(modules);
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Close and destroy browser instance.
|
|
253
|
+
*
|
|
254
|
+
* The function should be called once at the end of the processing.
|
|
255
|
+
*
|
|
256
|
+
* @function
|
|
257
|
+
* @public
|
|
258
|
+
*/
|
|
259
|
+
async function teardownBrowser() {
|
|
260
|
+
if (browser) {
|
|
261
|
+
await browser.close();
|
|
262
|
+
browser = null;
|
|
263
|
+
resolveBrowserClosed();
|
|
264
|
+
resolveBrowserClosed = null;
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
/**
|
|
270
|
+
* Load and process the given specification.
|
|
271
|
+
*
|
|
272
|
+
* The method automatically exposes Reffy's library functions in a window.reffy
|
|
273
|
+
* namespace (see setupBrowserlib) so that the callback function can
|
|
274
|
+
* call them directly. Additional callback arguments that would need to be
|
|
275
|
+
* passed to the browser context can be provided through the "args" parameter.
|
|
276
|
+
*
|
|
277
|
+
* A crawl will typically fetch and render hundreds of specs, triggering a lot
|
|
278
|
+
* of network requests. Given that some of these requests (e.g. those on images)
|
|
279
|
+
* are of no interest for the processing, that it is wasteful to fetch the same
|
|
280
|
+
* resource again and again during a crawl, and that it is useful to have an
|
|
281
|
+
* offline mode for debugging purpose, the method will intercept network
|
|
282
|
+
* requests made by the browser, fail those that don't seem needed, and serve
|
|
283
|
+
* requests on resources that have already been fetched from a local file cache
|
|
284
|
+
* (the "cacheRefresh" setting in "config.json" allows to adjust this behavior).
|
|
285
|
+
*
|
|
286
|
+
* This triggers a few hiccups and needs for workarounds though:
|
|
287
|
+
* - Puppeteer's page.setRequestInterception does not play nicely with workers
|
|
288
|
+
* (which Respec typically uses) for the time being, so code uses the Chrome
|
|
289
|
+
* DevTools Protocol (CDP) directly, see:
|
|
290
|
+
* https://github.com/puppeteer/puppeteer/issues/4208
|
|
291
|
+
* - Tampering with network requests means that the loaded page gets
|
|
292
|
+
* automatically flagged as "non secure". That's mostly fine but means that
|
|
293
|
+
* "window.crypto.subtle" is not available and Respec needs that to generate
|
|
294
|
+
* hashes. The code re-creates that method manually.
|
|
295
|
+
* - A few specs send HTTP requests that return "streams". This does not work
|
|
296
|
+
* well with Puppeteer's "networkidle0" option (to detect when a spec is mostly
|
|
297
|
+
* done loading), and that does not work with a file cache approach either.
|
|
298
|
+
* These requests get intercepted.
|
|
299
|
+
*
|
|
300
|
+
* A couple of additional notes:
|
|
301
|
+
* - Requests to CSS stylesheets are not intercepted because Respec dynamically
|
|
302
|
+
* loads a few CSS resources, and intercepting them could perhaps impact the
|
|
303
|
+
* rest of the generation.
|
|
304
|
+
* - SVG images are not intercepted because a couple of specs have a PNG
|
|
305
|
+
* fallback mechanism that, when interception is on, make the browser spin
|
|
306
|
+
* forever, see discussion in: https://github.com/w3c/accelerometer/pull/55
|
|
307
|
+
*
|
|
308
|
+
* Strictly speaking, intercepting request is only needed to be able to use the
|
|
309
|
+
* "networkidle0" option. The whole interception logic could be dropped (and
|
|
310
|
+
* "networkidle2" could be used instead) if it proves too unstable.
|
|
311
|
+
*
|
|
312
|
+
* @function
|
|
313
|
+
* @public
|
|
314
|
+
* @param {Object|String} spec The spec to load. Must either be a URL string or
|
|
315
|
+
* an object with a "url" property. If the object contains an "html" property,
|
|
316
|
+
* the HTML content is loaded instead.
|
|
317
|
+
* @param {function} processFunction Processing function that will be evaluated
|
|
318
|
+
* in the browser context where the spec gets loaded
|
|
319
|
+
* @param {Arrays} args List of arguments to pass to the processing function.
|
|
320
|
+
* These arguments typically make it possible to pass contextual information
|
|
321
|
+
* to the processing function (such as the spec object that describes the
|
|
322
|
+
* spec being processed, or the list of processing modules to run)
|
|
323
|
+
* @param {Object} options Processing options. The "quiet" flag tells the
|
|
324
|
+
* function not to report warnings to the console. The "forceLocalFetch"
|
|
325
|
+
* flag tells the function that all network requests need to be only handled
|
|
326
|
+
* by Node.js's "fetch" function (as opposed to falling back to Puppeteer's
|
|
327
|
+
* network and caching logic), which is useful to keep full control of network
|
|
328
|
+
* requests in tests.
|
|
329
|
+
* @return {Promise} The promise to get the results of the processing function
|
|
330
|
+
*/
|
|
331
|
+
async function processSpecification(spec, processFunction, args, options) {
|
|
332
|
+
spec = (typeof spec === 'string') ? { url: spec } : spec;
|
|
333
|
+
processFunction = processFunction || function () {};
|
|
334
|
+
args = args || [];
|
|
335
|
+
options = options || {};
|
|
336
|
+
|
|
337
|
+
if (!browser) {
|
|
338
|
+
throw new Error('Browser instance not initialized, setupBrowser() must be called before processSpecification().');
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
// Create an abort controller for network requests directly handled by the
|
|
342
|
+
// Node.js code (and not by Puppeteer)
|
|
343
|
+
const abortController = new AbortController();
|
|
344
|
+
|
|
345
|
+
// Inner function that returns a network interception method suitable for
|
|
346
|
+
// a given CDP session.
|
|
347
|
+
function interceptRequest(cdp, controller) {
|
|
348
|
+
return async function ({ requestId, request }) {
|
|
349
|
+
try {
|
|
350
|
+
// Abort network requests to common image formats
|
|
351
|
+
if (/\.(gif|ico|jpg|jpeg|png|ttf|woff)$/i.test(request.url)) {
|
|
352
|
+
await cdp.send('Fetch.failRequest', { requestId, errorReason: 'Failed' });
|
|
353
|
+
return;
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
// Abort network requests that return a "stream", they won't
|
|
357
|
+
// play well with Puppeteer's "networkidle0" option, and our
|
|
358
|
+
// custom "fetch" function does not handle streams in any case
|
|
359
|
+
if (request.url.startsWith('https://drafts.csswg.org/api/drafts/') ||
|
|
360
|
+
request.url.startsWith('https://drafts.css-houdini.org/api/drafts/') ||
|
|
361
|
+
request.url.startsWith('https://drafts.fxtf.org/api/drafts/') ||
|
|
362
|
+
request.url.startsWith('https://api.csswg.org/shepherd/')) {
|
|
363
|
+
await cdp.send('Fetch.failRequest', { requestId, errorReason: 'Failed' });
|
|
364
|
+
return;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
// The request needs to be intercepted, either because it
|
|
368
|
+
// targets one of the local script files, or because we would
|
|
369
|
+
// like to use our local cache to avoid sending network requests
|
|
370
|
+
// when possible.
|
|
371
|
+
//console.log(`intercept ${request.url}`);
|
|
372
|
+
const reffyPath = '/reffy/scripts/';
|
|
373
|
+
const webidl2Path = '/node_modules/webidl2/';
|
|
374
|
+
if (request.url.includes(reffyPath) || request.url.includes(webidl2Path)) {
|
|
375
|
+
let body;
|
|
376
|
+
if (request.url.endsWith('reffy.mjs')) {
|
|
377
|
+
body = Buffer.from(browserlib);
|
|
378
|
+
}
|
|
379
|
+
else if (request.url.includes(webidl2Path)) {
|
|
380
|
+
const file = path.resolve(modulesFolder, 'webidl2',
|
|
381
|
+
request.url.substring(request.url.indexOf(webidl2Path) + webidl2Path.length));
|
|
382
|
+
body = await fs.readFile(file);
|
|
383
|
+
}
|
|
384
|
+
else {
|
|
385
|
+
// The "__" folders are just a means to resolve
|
|
386
|
+
// relative paths that are higher than the "browserlib"
|
|
387
|
+
// folder on the storage drive
|
|
388
|
+
const requestPath = request.url.substring(request.url.indexOf(reffyPath) + reffyPath.length);
|
|
389
|
+
let depth = requestPath.lastIndexOf('__/') / 3;
|
|
390
|
+
const filename = requestPath.substring(requestPath.lastIndexOf('__/') + 3);
|
|
391
|
+
let filePath = path.resolve(__dirname, '..', 'browserlib');
|
|
392
|
+
while (depth < maxPathDepth - 1) {
|
|
393
|
+
filePath = path.resolve(filePath, '..');
|
|
394
|
+
depth += 1;
|
|
395
|
+
}
|
|
396
|
+
const file = path.resolve(filePath, filename);
|
|
397
|
+
body = await fs.readFile(file);
|
|
398
|
+
}
|
|
399
|
+
await cdp.send('Fetch.fulfillRequest', {
|
|
400
|
+
requestId,
|
|
401
|
+
responseCode: 200,
|
|
402
|
+
responseHeaders: [{ name: 'Content-Type', value: 'application/javascript' }],
|
|
403
|
+
body: body.toString('base64')
|
|
404
|
+
});
|
|
405
|
+
}
|
|
406
|
+
else {
|
|
407
|
+
if ((request.method !== 'GET') ||
|
|
408
|
+
(!request.url.startsWith('http:') && !request.url.startsWith('https:'))) {
|
|
409
|
+
await cdp.send('Fetch.continueRequest', { requestId });
|
|
410
|
+
return;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
const response = await fetch(request.url, { signal: controller.signal });
|
|
414
|
+
const body = await response.buffer();
|
|
415
|
+
await cdp.send('Fetch.fulfillRequest', {
|
|
416
|
+
requestId,
|
|
417
|
+
responseCode: response.status,
|
|
418
|
+
responseHeaders: Object.keys(response.headers.raw()).map(header => {
|
|
419
|
+
return {
|
|
420
|
+
name: header,
|
|
421
|
+
value: response.headers.raw()[header].join(',')
|
|
422
|
+
};
|
|
423
|
+
}),
|
|
424
|
+
body: body.toString('base64')
|
|
425
|
+
});
|
|
426
|
+
}
|
|
427
|
+
//console.log(`intercept ${request.url} - done`);
|
|
428
|
+
}
|
|
429
|
+
catch (err) {
|
|
430
|
+
if (controller.signal.aborted) {
|
|
431
|
+
// All is normal, processing was over, page and CDP session
|
|
432
|
+
// have been closed, and network requests have been aborted
|
|
433
|
+
// console.log(`intercept ${request.url} - aborted`);
|
|
434
|
+
return;
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
// Fetch from file cache failed somehow
|
|
438
|
+
// Let Puppeteer handle the request as fallback unless
|
|
439
|
+
// calling function asked us not to do that
|
|
440
|
+
if (options.forceLocalFetch) {
|
|
441
|
+
options.quiet ?? console.warn(`[warn] Network request for ${request.url} failed`, err);
|
|
442
|
+
await cdp.send('Fetch.failRequest', { requestId, errorReason: 'Failed' });
|
|
443
|
+
}
|
|
444
|
+
else {
|
|
445
|
+
options.quiet ?? console.warn(`[warn] Fall back to regular network request for ${request.url}`, err);
|
|
446
|
+
try {
|
|
447
|
+
await cdp.send('Fetch.continueRequest', { requestId });
|
|
448
|
+
}
|
|
449
|
+
catch (err) {
|
|
450
|
+
if (!controller.signal.aborted) {
|
|
451
|
+
options.quiet ?? console.warn(`[warn] Fall back to regular network request for ${request.url} failed`, err);
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
try {
|
|
460
|
+
const page = await browser.newPage();
|
|
461
|
+
|
|
462
|
+
// Disable cache if caller wants to handle all network requests
|
|
463
|
+
await page.setCacheEnabled(!options.forceLocalFetch);
|
|
464
|
+
|
|
465
|
+
// Intercept all network requests to use our own version of "fetch"
|
|
466
|
+
// that makes use of the local file cache.
|
|
467
|
+
const cdp = await page.target().createCDPSession();
|
|
468
|
+
await cdp.send('Fetch.enable');
|
|
469
|
+
cdp.on('Fetch.requestPaused', interceptRequest(cdp, abortController));
|
|
470
|
+
|
|
471
|
+
// Quick and dirty workaround to re-create the "window.crypto.digest"
|
|
472
|
+
// function that Respec needs (context is seen as unsecure because we're
|
|
473
|
+
// tampering with network requests)
|
|
474
|
+
await page.exposeFunction('hashdigest', (algorithm, buffer) => {
|
|
475
|
+
return crypto.createHash(algorithm).update(Buffer.from(Object.values(buffer))).digest();
|
|
476
|
+
});
|
|
477
|
+
await page.evaluateOnNewDocument(() => {
|
|
478
|
+
window.crypto.subtle = {
|
|
479
|
+
digest: function (algorithm, buffer) {
|
|
480
|
+
const res = window.hashdigest('sha1', buffer);
|
|
481
|
+
return res.then(buf => {
|
|
482
|
+
return Uint8Array.from(buf.data);
|
|
483
|
+
});
|
|
484
|
+
}
|
|
485
|
+
};
|
|
486
|
+
});
|
|
487
|
+
|
|
488
|
+
// Common loading option to give the browser enough time to load large
|
|
489
|
+
// specs, and to consider navigation done when there haven't been
|
|
490
|
+
// network connections in the past 500ms. This should be enough to
|
|
491
|
+
// handle "redirection" through JS or meta refresh (which would not
|
|
492
|
+
// have time to run if we used "load").
|
|
493
|
+
const loadOptions = {
|
|
494
|
+
timeout: 120000,
|
|
495
|
+
waitUntil: 'networkidle0'
|
|
496
|
+
};
|
|
497
|
+
|
|
498
|
+
// Load the page
|
|
499
|
+
// (note HTTP status is 0 when `file://` URLs are loaded)
|
|
500
|
+
if (spec.html) {
|
|
501
|
+
await page.setContent(spec.html, loadOptions);
|
|
502
|
+
}
|
|
503
|
+
else {
|
|
504
|
+
const result = await page.goto(spec.url, loadOptions);
|
|
505
|
+
if ((result.status() !== 200) && (!spec.url.startsWith('file://') || (result.status() !== 0))) {
|
|
506
|
+
throw new Error(`Loading ${spec.url} triggered HTTP status ${result.status()}`);
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
// Handle multi-page specs
|
|
511
|
+
const pageUrls = spec.pages || [];
|
|
512
|
+
|
|
513
|
+
if (pageUrls.length > 0) {
|
|
514
|
+
const pages = [];
|
|
515
|
+
for (const url of pageUrls) {
|
|
516
|
+
const subAbort = new AbortController();
|
|
517
|
+
const subPage = await browser.newPage();
|
|
518
|
+
await subPage.setCacheEnabled(!options.forceLocalFetch);
|
|
519
|
+
const subCdp = await subPage.target().createCDPSession();
|
|
520
|
+
await subCdp.send('Fetch.enable');
|
|
521
|
+
subCdp.on('Fetch.requestPaused', interceptRequest(subCdp, subAbort));
|
|
522
|
+
try {
|
|
523
|
+
// (Note HTTP status is 0 when `file://` URLs are loaded)
|
|
524
|
+
const subresult = await subPage.goto(url, loadOptions);
|
|
525
|
+
if ((subresult.status() !== 200) && (!url.startsWith('file://') || (subresult.status() !== 0))) {
|
|
526
|
+
throw new Error(`Loading ${spec.url} triggered HTTP status ${result.status()}`);
|
|
527
|
+
}
|
|
528
|
+
const html = await subPage.evaluate(() => {
|
|
529
|
+
return document.body.outerHTML
|
|
530
|
+
.replace(/<body/, '<section')
|
|
531
|
+
.replace(/<\/body/, '</section');
|
|
532
|
+
});
|
|
533
|
+
pages.push({ url, html });
|
|
534
|
+
}
|
|
535
|
+
finally {
|
|
536
|
+
subAbort.abort();
|
|
537
|
+
await subCdp.detach();
|
|
538
|
+
await subPage.close();
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
await page.evaluate(pages => {
|
|
542
|
+
for (const subPage of pages) {
|
|
543
|
+
const section = document.createElement('section');
|
|
544
|
+
section.setAttribute('data-reffy-page', subPage.url);
|
|
545
|
+
section.innerHTML = subPage.html;
|
|
546
|
+
document.body.appendChild(section);
|
|
547
|
+
}
|
|
548
|
+
}, pages);
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
// Wait until the generation of the spec is completely over
|
|
552
|
+
await page.evaluate(async () => {
|
|
553
|
+
// Detect draft CSS server hiccups as done in browser-specs:
|
|
554
|
+
// https://github.com/w3c/browser-specs/blob/b31fc0b03ba67a19162883afc30e01fcec3c600d/src/fetch-info.js#L292
|
|
555
|
+
const title = (window.document.querySelector('h1')?.textContent || '')
|
|
556
|
+
.replace(/\n/g, '').trim();
|
|
557
|
+
if (title.startsWith('Index of ')) {
|
|
558
|
+
throw new Error(`CSS server issue detected`);
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
const usesRespec = (window.respecConfig || window.eval('typeof respecConfig !== "undefined"')) &&
|
|
562
|
+
window.document.head.querySelector("script[src*='respec']");
|
|
563
|
+
|
|
564
|
+
function sleep(ms) {
|
|
565
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
async function isReady(counter) {
|
|
569
|
+
counter = counter || 0;
|
|
570
|
+
if (counter > 60) {
|
|
571
|
+
throw new Error('Respec generation took too long');
|
|
572
|
+
}
|
|
573
|
+
if (window.document.respec?.ready) {
|
|
574
|
+
await window.document.respec.ready;
|
|
575
|
+
}
|
|
576
|
+
else if (usesRespec) {
|
|
577
|
+
await sleep(1000);
|
|
578
|
+
await isReady(counter + 1);
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
await isReady();
|
|
583
|
+
});
|
|
584
|
+
|
|
585
|
+
// Capture and report Reffy's browserlib warnings
|
|
586
|
+
page.on('console', msg => {
|
|
587
|
+
const text = msg.text();
|
|
588
|
+
if (text.startsWith('[reffy] ')) {
|
|
589
|
+
options.quiet ?? console.warn(spec.url, `[${msg.type()}]`, msg.text().substr('[reffy] '.length));
|
|
590
|
+
}
|
|
591
|
+
});
|
|
592
|
+
|
|
593
|
+
// Capture and report when page throws an error
|
|
594
|
+
page.on('pageerror', err => {
|
|
595
|
+
options.quiet ?? console.warn(err);
|
|
596
|
+
});
|
|
597
|
+
|
|
598
|
+
// Expose additional functions defined in src/browserlib/ to the
|
|
599
|
+
// browser context, under a window.reffy namespace, so that processing
|
|
600
|
+
// script may call them. The script is an ES6 module and needs to be
|
|
601
|
+
// loaded as such.
|
|
602
|
+
// Note that we're using a fake relative URL on purpose. In practice,
|
|
603
|
+
// the request will be processed by "interceptRequest", which will
|
|
604
|
+
// respond with the contents of the script file. Also, there are
|
|
605
|
+
// multiple path levels in that fake URL on purpose as well, because
|
|
606
|
+
// scripts import the WebIDL2.js library with a URL like
|
|
607
|
+
// "../../node_modules/[...]" and may import other scripts that are
|
|
608
|
+
// higher in the folder tree.
|
|
609
|
+
await page.addScriptTag({
|
|
610
|
+
url: `reffy/scripts/${range(maxPathDepth).map(n => '__').join('/')}/reffy.mjs`,
|
|
611
|
+
type: 'module'
|
|
612
|
+
});
|
|
613
|
+
|
|
614
|
+
// Run the processFunction method in the browser context
|
|
615
|
+
const results = await page.evaluate(processFunction, ...args);
|
|
616
|
+
|
|
617
|
+
// Pending network requests may still be in the queue, flag the page
|
|
618
|
+
// as closed not to send commands on a CDP session that's no longer
|
|
619
|
+
// attached to anything
|
|
620
|
+
abortController.abort();
|
|
621
|
+
|
|
622
|
+
// Close CDP session and page
|
|
623
|
+
// Note that gets done no matter what when browser.close() gets called.
|
|
624
|
+
await cdp.detach();
|
|
625
|
+
await page.close();
|
|
626
|
+
|
|
627
|
+
return results;
|
|
628
|
+
}
|
|
629
|
+
finally {
|
|
630
|
+
// Signal abortion again (in case an exception was thrown)
|
|
631
|
+
abortController.abort();
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
|
|
636
|
+
/**
|
|
637
|
+
* Enrich the spec description with alternative URLs (versions and equivalents)
|
|
638
|
+
*
|
|
639
|
+
* TODO: The list used to contain published versions of TR specs retrieved from
|
|
640
|
+
* the W3C API. They are useful to improve the relevance of reported anomalies.
|
|
641
|
+
*
|
|
642
|
+
* @function
|
|
643
|
+
* @param {Object} spec Spec description structure (only the URL is useful)
|
|
644
|
+
* @return {Object} The same structure, enriched with the URL of the editor's
|
|
645
|
+
* draft when one is found
|
|
646
|
+
*/
|
|
647
|
+
function completeWithAlternativeUrls(spec) {
|
|
648
|
+
spec.versions = new Set();
|
|
649
|
+
spec.versions.add(spec.url);
|
|
650
|
+
if (spec.release) {
|
|
651
|
+
spec.versions.add(spec.release.url);
|
|
652
|
+
}
|
|
653
|
+
if (spec.nightly) {
|
|
654
|
+
spec.versions.add(spec.nightly.url);
|
|
655
|
+
}
|
|
656
|
+
if (specEquivalents[spec.url]) {
|
|
657
|
+
spec.versions = new Set([
|
|
658
|
+
...spec.versions,
|
|
659
|
+
...specEquivalents[spec.url]
|
|
660
|
+
]);
|
|
661
|
+
}
|
|
662
|
+
spec.versions = [...spec.versions];
|
|
663
|
+
return spec;
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
/**
|
|
668
|
+
* Returns true when the given spec is the latest "fullest" level of that spec
|
|
669
|
+
* in the given list of specs that passes the given predicate.
|
|
670
|
+
*
|
|
671
|
+
* "Fullest" means "not a delta spec, unless that is the only level that passes
|
|
672
|
+
* the predicate".
|
|
673
|
+
*
|
|
674
|
+
* @function
|
|
675
|
+
* @public
|
|
676
|
+
* @param {Object} spec Spec to check
|
|
677
|
+
* @param {Array(Object)} list List of specs (must include the spec to check)
|
|
678
|
+
* @param {function} predicate Predicate function that the spec must pass. Must
|
|
679
|
+
* be a function that takes a spec as argument and returns a boolean.
|
|
680
|
+
* @return {Boolean} true if the spec is the latest "fullest" level in the list
|
|
681
|
+
* that passes the predicate.
|
|
682
|
+
*/
|
|
683
|
+
function isLatestLevelThatPasses(spec, list, predicate) {
|
|
684
|
+
predicate = predicate || (_ => true);
|
|
685
|
+
if (!predicate(spec)) {
|
|
686
|
+
return false;
|
|
687
|
+
}
|
|
688
|
+
if (spec.seriesComposition === 'delta') {
|
|
689
|
+
while (spec.seriesPrevious) {
|
|
690
|
+
spec = list.find(s => s.shortname === spec.seriesPrevious);
|
|
691
|
+
if (!spec) {
|
|
692
|
+
break;
|
|
693
|
+
}
|
|
694
|
+
if ((spec.seriesComposition === 'full') && predicate(spec)) {
|
|
695
|
+
return false;
|
|
696
|
+
}
|
|
697
|
+
}
|
|
698
|
+
return true;
|
|
699
|
+
}
|
|
700
|
+
while (spec.seriesNext) {
|
|
701
|
+
if (!spec) {
|
|
702
|
+
break;
|
|
703
|
+
}
|
|
704
|
+
spec = list.find(s => s.shortname === spec.seriesNext);
|
|
705
|
+
if ((spec.seriesComposition === 'full') && predicate(spec)) {
|
|
706
|
+
return false;
|
|
707
|
+
}
|
|
708
|
+
}
|
|
709
|
+
return true;
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
|
|
713
|
+
/**
|
|
714
|
+
* Takes the results of a crawl for a given spec and expands it to include the
|
|
715
|
+
* contents of referenced files.
|
|
716
|
+
*
|
|
717
|
+
* The function handles both files and HTTPS resources, using either filesystem
|
|
718
|
+
* functions (for files) or fetch (for HTTPS resources).
|
|
719
|
+
*
|
|
720
|
+
* Note the spec object is expanded in place.
|
|
721
|
+
*
|
|
722
|
+
* @function
|
|
723
|
+
* @public
|
|
724
|
+
* @param {Object} spec Spec crawl result that needs to be expanded
|
|
725
|
+
* @param {string} baseFolder The base folder that contains the crawl file, or
|
|
726
|
+
* the base HTTPS URI to resolve relative links in the crawl object.
|
|
727
|
+
* @param {Array(string)} properties An explicit list of properties to expand
|
|
728
|
+
* (no value means "expand all possible properties")
|
|
729
|
+
* @return {Promise(object)} The promise to get an expanded crawl object that
|
|
730
|
+
* contains the contents of referenced files and no longer references external
|
|
731
|
+
* files (for the requested properties)
|
|
732
|
+
*/
|
|
733
|
+
async function expandSpecResult(spec, baseFolder, properties) {
|
|
734
|
+
baseFolder = baseFolder || '';
|
|
735
|
+
await Promise.all(Object.keys(spec).map(async property => {
|
|
736
|
+
// Only consider properties explicitly requested
|
|
737
|
+
if (properties && !properties.includes(property)) {
|
|
738
|
+
return;
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
// Only consider properties that link to an extract, i.e. an IDL
|
|
742
|
+
// or JSON file in subfolder.
|
|
743
|
+
if (!spec[property] ||
|
|
744
|
+
(typeof spec[property] !== 'string') ||
|
|
745
|
+
!spec[property].match(/^[^\/]+\/[^\/]+\.(json|idl)$/)) {
|
|
746
|
+
return;
|
|
747
|
+
}
|
|
748
|
+
let contents = null;
|
|
749
|
+
if (baseFolder.startsWith('https:')) {
|
|
750
|
+
const url = (new URL(spec[property], baseFolder)).toString();
|
|
751
|
+
const response = await fetch(url, { nolog: true });
|
|
752
|
+
contents = await response.text();
|
|
753
|
+
}
|
|
754
|
+
else {
|
|
755
|
+
const filename = path.join(baseFolder, spec[property]);
|
|
756
|
+
contents = await fs.readFile(filename, 'utf8');
|
|
757
|
+
}
|
|
758
|
+
if (spec[property].endsWith('.json')) {
|
|
759
|
+
contents = JSON.parse(contents);
|
|
760
|
+
}
|
|
761
|
+
if (property === 'css') {
|
|
762
|
+
// Special case for CSS where the "css" level does not exist
|
|
763
|
+
// in the generated files
|
|
764
|
+
const css = Object.assign({}, contents);
|
|
765
|
+
delete css.spec;
|
|
766
|
+
spec[property] = css;
|
|
767
|
+
}
|
|
768
|
+
else if (property === 'idl') {
|
|
769
|
+
// Special case for raw IDL extracts, which are text extracts.
|
|
770
|
+
// Also drop header that may have been added when extract was
|
|
771
|
+
// serialized.
|
|
772
|
+
if (contents.startsWith('// GENERATED CONTENT - DO NOT EDIT')) {
|
|
773
|
+
const endOfHeader = contents.indexOf('\n\n');
|
|
774
|
+
contents = contents.substring(endOfHeader + 2);
|
|
775
|
+
}
|
|
776
|
+
spec.idl = contents;
|
|
777
|
+
}
|
|
778
|
+
else {
|
|
779
|
+
spec[property] = contents[property];
|
|
780
|
+
}
|
|
781
|
+
}));
|
|
782
|
+
return spec;
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
|
|
786
|
+
/**
|
|
787
|
+
* Takes the results of a crawl (typically the contents of the index.json file)
|
|
788
|
+
* and expands it to include the contents of all referenced files.
|
|
789
|
+
*
|
|
790
|
+
* The function handles both files and HTTPS resources, using either filesystem
|
|
791
|
+
* functions (for files) or fetch (for HTTPS resources).
|
|
792
|
+
*
|
|
793
|
+
* Note the crawl object is expanded in place.
|
|
794
|
+
*
|
|
795
|
+
* @function
|
|
796
|
+
* @public
|
|
797
|
+
* @param {Object} crawl Crawl index object that needs to be expanded
|
|
798
|
+
* @param {string} baseFolder The base folder that contains the crawl file, or
|
|
799
|
+
* the base HTTPS URI to resolve relative links in the crawl object.
|
|
800
|
+
* @param {Array(string)} properties An explicit list of properties to expand
|
|
801
|
+
* (no value means "expand all possible properties")
|
|
802
|
+
* @return {Promise(object)} The promise to get an expanded crawl object that
|
|
803
|
+
* contains the entire crawl report (and no longer references external files)
|
|
804
|
+
*/
|
|
805
|
+
async function expandCrawlResult(crawl, baseFolder, properties) {
|
|
806
|
+
baseFolder = baseFolder || '';
|
|
807
|
+
crawl.results = await Promise.all(
|
|
808
|
+
crawl.results.map(spec => expandSpecResult(spec, baseFolder, properties))
|
|
809
|
+
);
|
|
810
|
+
return crawl;
|
|
811
|
+
}
|
|
812
|
+
|
|
813
|
+
|
|
814
|
+
/**
|
|
815
|
+
* Retrieves the list of IDL attribute names that the CSS property generates
|
|
816
|
+
* per the CSSOM spec, see:
|
|
817
|
+
* https://drafts.csswg.org/cssom/#ref-for-css-property-to-idl-attribute
|
|
818
|
+
*
|
|
819
|
+
* @function
|
|
820
|
+
* @param {String} property CSS property name
|
|
821
|
+
* @return {Array(String)} An array of IDL attribute names, dashed attribute
|
|
822
|
+
* first, then camel-cased attribute if different, then webkit-cased attribute
|
|
823
|
+
* name if needed
|
|
824
|
+
*/
|
|
825
|
+
function getGeneratedIDLNamesByCSSProperty(property) {
|
|
826
|
+
// Converts a CSS property to an IDL attribute name per the CSSOM spec:
|
|
827
|
+
// https://drafts.csswg.org/cssom/#css-property-to-idl-attribute
|
|
828
|
+
function cssPropertyToIDLAttribute(property, lowercaseFirst) {
|
|
829
|
+
let output = '';
|
|
830
|
+
let uppercaseNext = false;
|
|
831
|
+
if (lowercaseFirst) {
|
|
832
|
+
property = property.substr(1);
|
|
833
|
+
}
|
|
834
|
+
for (const c of property) {
|
|
835
|
+
if (c === '-') {
|
|
836
|
+
uppercaseNext = true;
|
|
837
|
+
} else if (uppercaseNext) {
|
|
838
|
+
uppercaseNext = false;
|
|
839
|
+
output += c.toUpperCase();
|
|
840
|
+
} else {
|
|
841
|
+
output += c;
|
|
842
|
+
}
|
|
843
|
+
}
|
|
844
|
+
return output;
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
// Start with dashed attribute
|
|
848
|
+
const res = [property];
|
|
849
|
+
|
|
850
|
+
// Add camel-cased attribute if different
|
|
851
|
+
const camelCased = cssPropertyToIDLAttribute(property, false);
|
|
852
|
+
if (camelCased !== property) {
|
|
853
|
+
res.push(camelCased);
|
|
854
|
+
}
|
|
855
|
+
|
|
856
|
+
// Add webkit-cased attribute if needed
|
|
857
|
+
if (property.startsWith('-webkit-')) {
|
|
858
|
+
res.push(cssPropertyToIDLAttribute(property, true));
|
|
859
|
+
}
|
|
860
|
+
|
|
861
|
+
return res;
|
|
862
|
+
};
|
|
863
|
+
|
|
864
|
+
|
|
865
|
+
/**
|
|
866
|
+
* Creates the given folder if it does not exist yet.
|
|
867
|
+
*
|
|
868
|
+
* @function
|
|
869
|
+
* @public
|
|
870
|
+
* @param {String} folder Path to folder to create
|
|
871
|
+
* (from current working directory)
|
|
872
|
+
*/
|
|
873
|
+
async function createFolderIfNeeded(folder) {
|
|
874
|
+
try {
|
|
875
|
+
await fs.mkdir(folder);
|
|
876
|
+
}
|
|
877
|
+
catch (err) {
|
|
878
|
+
if (err.code !== 'EEXIST') {
|
|
879
|
+
throw err;
|
|
880
|
+
}
|
|
881
|
+
}
|
|
882
|
+
}
|
|
883
|
+
|
|
884
|
+
|
|
885
|
+
module.exports = {
|
|
886
|
+
fetch,
|
|
887
|
+
requireFromWorkingDirectory,
|
|
888
|
+
expandBrowserModules,
|
|
889
|
+
setupBrowser,
|
|
890
|
+
teardownBrowser,
|
|
891
|
+
processSpecification,
|
|
892
|
+
completeWithAlternativeUrls,
|
|
893
|
+
isLatestLevelThatPasses,
|
|
894
|
+
expandCrawlResult,
|
|
895
|
+
expandSpecResult,
|
|
896
|
+
getGeneratedIDLNamesByCSSProperty,
|
|
897
|
+
createFolderIfNeeded
|
|
898
|
+
};
|