reffy 6.2.0 → 6.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +158 -158
  3. package/index.js +11 -11
  4. package/package.json +53 -53
  5. package/reffy.js +248 -248
  6. package/src/browserlib/canonicalize-url.mjs +50 -50
  7. package/src/browserlib/create-outline.mjs +352 -352
  8. package/src/browserlib/extract-cssdfn.mjs +319 -319
  9. package/src/browserlib/extract-dfns.mjs +686 -686
  10. package/src/browserlib/extract-elements.mjs +205 -205
  11. package/src/browserlib/extract-headings.mjs +48 -48
  12. package/src/browserlib/extract-ids.mjs +28 -28
  13. package/src/browserlib/extract-links.mjs +28 -28
  14. package/src/browserlib/extract-references.mjs +203 -203
  15. package/src/browserlib/extract-webidl.mjs +134 -134
  16. package/src/browserlib/get-absolute-url.mjs +21 -21
  17. package/src/browserlib/get-generator.mjs +26 -26
  18. package/src/browserlib/get-lastmodified-date.mjs +13 -13
  19. package/src/browserlib/get-title.mjs +11 -11
  20. package/src/browserlib/informative-selector.mjs +16 -16
  21. package/src/browserlib/map-ids-to-headings.mjs +136 -136
  22. package/src/browserlib/reffy.json +53 -53
  23. package/src/cli/check-missing-dfns.js +609 -609
  24. package/src/cli/generate-idlnames.js +430 -430
  25. package/src/cli/generate-idlparsed.js +139 -139
  26. package/src/cli/merge-crawl-results.js +128 -128
  27. package/src/cli/parse-webidl.js +430 -430
  28. package/src/lib/css-grammar-parse-tree.schema.json +109 -109
  29. package/src/lib/css-grammar-parser.js +440 -440
  30. package/src/lib/fetch.js +55 -55
  31. package/src/lib/nock-server.js +119 -119
  32. package/src/lib/specs-crawler.js +605 -603
  33. package/src/lib/util.js +898 -898
  34. package/src/specs/missing-css-rules.json +197 -197
  35. package/src/specs/spec-equivalents.json +149 -149
  36. package/src/browserlib/extract-editors.mjs~ +0 -14
  37. package/src/browserlib/generate-es-dfn-report.sh~ +0 -4
  38. package/src/cli/csstree-grammar-check.js +0 -28
  39. package/src/cli/csstree-grammar-check.js~ +0 -10
  40. package/src/cli/csstree-grammar-parser.js +0 -11
  41. package/src/cli/csstree-grammar-parser.js~ +0 -1
  42. package/src/cli/extract-editors.js~ +0 -38
  43. package/src/cli/process-specs.js~ +0 -28
package/src/lib/util.js CHANGED
@@ -1,898 +1,898 @@
1
- /**
2
- * A bunch of utility functions common to multiple scripts
3
- */
4
-
5
- const fs = require('fs').promises;
6
- const { existsSync } = require('fs');
7
- const path = require('path');
8
- const puppeteer = require('puppeteer');
9
- const crypto = require('crypto');
10
- const { AbortController } = require('abortcontroller-polyfill/dist/cjs-ponyfill');
11
- const fetch = require('./fetch');
12
- const specEquivalents = require('../specs/spec-equivalents.json');
13
-
14
- const reffyModules = require('../browserlib/reffy.json');
15
-
16
-
17
- /**
18
- * Maximum depth difference supported between Reffy's install path and custom
19
- * modules that may be provided on the command-line
20
- *
21
- * TODO: Find a way to get right of that, there should be no limit
22
- */
23
- const maxPathDepth = 20;
24
-
25
-
26
- /**
27
- * Returns a range array from 0 to the number provided (not included)
28
- */
29
- const range = n => Array.from(Array(n).keys());
30
-
31
-
32
- /**
33
- * Shortcut that returns a property extractor iterator
34
- */
35
- const prop = p => x => x[p];
36
-
37
-
38
- /**
39
- * Wrapper around the "require" function to require files relative to the
40
- * current working directory (CWD), instead of relative to the current JS
41
- * file.
42
- *
43
- * This is typically needed to be able to use "require" to load JSON config
44
- * files provided as command-line arguments.
45
- *
46
- * @function
47
- * @param {String} filename The path to the file to require
48
- * @return {Object} The result of requiring the file relative to the current
49
- * working directory.
50
- */
51
- function requireFromWorkingDirectory(filename) {
52
- try {
53
- return require(path.resolve(filename));
54
- }
55
- catch (err) {
56
- return null;
57
- }
58
- }
59
-
60
-
61
- /**
62
- * Determine the path to the "node_modules" folder to resolve relative links
63
- * in the ES6 browser lib modules. The path depends on whether Reffy is run
64
- * directly, or installed as a library.
65
- *
66
- * @function
67
- * @return {String} Path to the node_modules folder.
68
- */
69
- function getModulesFolder() {
70
- const rootFolder = path.resolve(__dirname, '../..');
71
- let folder = path.resolve(rootFolder, 'node_modules');
72
- if (existsSync(folder)) {
73
- return folder;
74
- }
75
- folder = path.resolve(rootFolder, '..');
76
- return folder;
77
- }
78
- const modulesFolder = getModulesFolder();
79
-
80
-
81
- /**
82
- * Puppeteer browser instance used to load and process specifications
83
- */
84
- let browser = null;
85
-
86
- /**
87
- * Promise resolved when there is no running instance of Puppeteer. This allows
88
- * to serialize calls to setupBrowser (and thus to crawlList and crawlSpecs in
89
- * specs-crawler.js)
90
- */
91
- let browserClosed = Promise.resolve();
92
- let resolveBrowserClosed = null;
93
-
94
- /**
95
- * The browser JS library that will be loaded onto every crawled page
96
- */
97
- let browserlib = null;
98
-
99
-
100
- /**
101
- * Expand list of browser modules with right set of descriptive properties
102
- *
103
- * User may specify a browser module as:
104
- * - a name which must match one of the existing modules in browserlib
105
- * - a relative path to an .mjs file which must exist
106
- * - an object with an "href" property that is a relative path to an .mjs file
107
- * which must exist
108
- *
109
- * Relative paths provided by the user are interpreted as relative to the
110
- * current working directory, and converted to be relative to the browserlib
111
- * directory.
112
- *
113
- * @function
114
- * @public
115
- * @return {Array(Object)} List of modules with an href, name and property keys
116
- */
117
- function expandBrowserModules(modules) {
118
- // Helper function to create a camelCase name out of a module path
119
- function getCamelCaseName(href) {
120
- const filename = href.replace(/([^\/\\]+)\.mjs$/, '$1');
121
- const nameParts = filename.split('-');
122
- let name;
123
- let namePart;
124
- while (namePart = nameParts.shift()) {
125
- namePart = namePart.replace(/\W/g, '');
126
- if (name) {
127
- name += namePart.substring(0, 1).toUpperCase() + namePart.substring(1);
128
- }
129
- else {
130
- name = namePart;
131
- }
132
- }
133
- return name;
134
- }
135
-
136
- const browserlibPath = path.resolve(__dirname, '..', 'browserlib');
137
- if (!modules) {
138
- return reffyModules.map(mod => Object.assign({
139
- name: getCamelCaseName(mod.href),
140
- expanded: true
141
- }, mod));
142
- }
143
-
144
- modules = modules.map(mod => {
145
- if (typeof mod === 'string') {
146
- if (mod.endsWith('.mjs')) {
147
- const name = getCamelCaseName(mod);
148
- return {
149
- href: path.relative(browserlibPath, path.join(process.cwd(), mod)).replace(/\\/g, '/'),
150
- name,
151
- property: name,
152
- expanded: true
153
- };
154
- }
155
- else if (mod === 'core') {
156
- return reffyModules.map(mod => Object.assign({
157
- name: getCamelCaseName(mod.href),
158
- expanded: true
159
- }, mod));
160
- }
161
- else {
162
- const res = reffyModules.find(m => m.href === mod ||
163
- getCamelCaseName(m.href) === mod || m.property === mod);
164
- if (!res) {
165
- throw new Error(`Unknown browserlib module ${mod}`);
166
- }
167
- return Object.assign({
168
- name: getCamelCaseName(res.href),
169
- expanded: true
170
- }, res);
171
- }
172
- }
173
- else if (mod.expanded) {
174
- return mod;
175
- }
176
- else {
177
- if (!mod.href) {
178
- throw new Error('Browserlib module does not have an "href" property');
179
- }
180
- mod.href = path.relative(browserlibPath, path.join(process.cwd(), mod.href)).replace(/\\/g, '/');
181
- if (!mod.name) {
182
- mod.name = getCamelCaseName(mod.href);
183
- }
184
- if (!mod.property) {
185
- mod.property = mod.name;
186
- }
187
- mod.expanded = true;
188
- return mod;
189
- }
190
- });
191
-
192
- return modules.flat();
193
- }
194
-
195
-
196
- /**
197
- * Prepare the browserlib script that will be loaded in every crawled page.
198
- *
199
- * The script exposes a global reffy namespace with the requested modules.
200
- *
201
- * The function must be called before any attempt to call `processSpecification`
202
- * and should only be called once. The `setupBrowser` function takes care of it.
203
- *
204
- * @function
205
- * @private
206
- */
207
- function setupBrowserlib(modules) {
208
- modules = expandBrowserModules(modules);
209
- browserlib = 'window.reffy = window.reffy ?? {};\n';
210
-
211
- if (modules.find(module => module.needsIdToHeadingMap)) {
212
- browserlib += `
213
- import mapIdsToHeadings from './map-ids-to-headings.mjs';
214
- window.reffy.mapIdsToHeadings = mapIdsToHeadings;\n`;
215
- }
216
-
217
- browserlib += modules.map(module => `
218
- import ${module.name} from '${module.href}';
219
- window.reffy.${module.name} = ${module.name};
220
- `).join('\n');
221
- }
222
-
223
-
224
- /**
225
- * Setup and launch browser instance to use to load and process specifications.
226
- *
227
- * The function must be called before any attempt to call `processSpecification`
228
- * and should only be called once.
229
- *
230
- * The function also generates the code that will inject the `reffy` namespace
231
- * in each processed page.
232
- *
233
- * Note: Switch `headless` to `false` to access dev tools and debug processing
234
- *
235
- * @function
236
- * @public
237
- */
238
- async function setupBrowser(modules) {
239
- // There can be only one crawl running at a time
240
- await browserClosed;
241
- browserClosed = new Promise(resolve => resolveBrowserClosed = resolve);
242
-
243
- // Create browser instance
244
- // Note: switch "headless" to "false" (and comment out the call to
245
- // "browser.close()") to access dev tools in debug mode
246
- browser = await puppeteer.launch({ headless: true });
247
- setupBrowserlib(modules);
248
- }
249
-
250
-
251
- /**
252
- * Close and destroy browser instance.
253
- *
254
- * The function should be called once at the end of the processing.
255
- *
256
- * @function
257
- * @public
258
- */
259
- async function teardownBrowser() {
260
- if (browser) {
261
- await browser.close();
262
- browser = null;
263
- resolveBrowserClosed();
264
- resolveBrowserClosed = null;
265
- }
266
- }
267
-
268
-
269
- /**
270
- * Load and process the given specification.
271
- *
272
- * The method automatically exposes Reffy's library functions in a window.reffy
273
- * namespace (see setupBrowserlib) so that the callback function can
274
- * call them directly. Additional callback arguments that would need to be
275
- * passed to the browser context can be provided through the "args" parameter.
276
- *
277
- * A crawl will typically fetch and render hundreds of specs, triggering a lot
278
- * of network requests. Given that some of these requests (e.g. those on images)
279
- * are of no interest for the processing, that it is wasteful to fetch the same
280
- * resource again and again during a crawl, and that it is useful to have an
281
- * offline mode for debugging purpose, the method will intercept network
282
- * requests made by the browser, fail those that don't seem needed, and serve
283
- * requests on resources that have already been fetched from a local file cache
284
- * (the "cacheRefresh" setting in "config.json" allows to adjust this behavior).
285
- *
286
- * This triggers a few hiccups and needs for workarounds though:
287
- * - Puppeteer's page.setRequestInterception does not play nicely with workers
288
- * (which Respec typically uses) for the time being, so code uses the Chrome
289
- * DevTools Protocol (CDP) directly, see:
290
- * https://github.com/puppeteer/puppeteer/issues/4208
291
- * - Tampering with network requests means that the loaded page gets
292
- * automatically flagged as "non secure". That's mostly fine but means that
293
- * "window.crypto.subtle" is not available and Respec needs that to generate
294
- * hashes. The code re-creates that method manually.
295
- * - A few specs send HTTP requests that return "streams". This does not work
296
- * well with Puppeteer's "networkidle0" option (to detect when a spec is mostly
297
- * done loading), and that does not work with a file cache approach either.
298
- * These requests get intercepted.
299
- *
300
- * A couple of additional notes:
301
- * - Requests to CSS stylesheets are not intercepted because Respec dynamically
302
- * loads a few CSS resources, and intercepting them could perhaps impact the
303
- * rest of the generation.
304
- * - SVG images are not intercepted because a couple of specs have a PNG
305
- * fallback mechanism that, when interception is on, make the browser spin
306
- * forever, see discussion in: https://github.com/w3c/accelerometer/pull/55
307
- *
308
- * Strictly speaking, intercepting request is only needed to be able to use the
309
- * "networkidle0" option. The whole interception logic could be dropped (and
310
- * "networkidle2" could be used instead) if it proves too unstable.
311
- *
312
- * @function
313
- * @public
314
- * @param {Object|String} spec The spec to load. Must either be a URL string or
315
- * an object with a "url" property. If the object contains an "html" property,
316
- * the HTML content is loaded instead.
317
- * @param {function} processFunction Processing function that will be evaluated
318
- * in the browser context where the spec gets loaded
319
- * @param {Arrays} args List of arguments to pass to the processing function.
320
- * These arguments typically make it possible to pass contextual information
321
- * to the processing function (such as the spec object that describes the
322
- * spec being processed, or the list of processing modules to run)
323
- * @param {Object} options Processing options. The "quiet" flag tells the
324
- * function not to report warnings to the console. The "forceLocalFetch"
325
- * flag tells the function that all network requests need to be only handled
326
- * by Node.js's "fetch" function (as opposed to falling back to Puppeteer's
327
- * network and caching logic), which is useful to keep full control of network
328
- * requests in tests.
329
- * @return {Promise} The promise to get the results of the processing function
330
- */
331
- async function processSpecification(spec, processFunction, args, options) {
332
- spec = (typeof spec === 'string') ? { url: spec } : spec;
333
- processFunction = processFunction || function () {};
334
- args = args || [];
335
- options = options || {};
336
-
337
- if (!browser) {
338
- throw new Error('Browser instance not initialized, setupBrowser() must be called before processSpecification().');
339
- }
340
-
341
- // Create an abort controller for network requests directly handled by the
342
- // Node.js code (and not by Puppeteer)
343
- const abortController = new AbortController();
344
-
345
- // Inner function that returns a network interception method suitable for
346
- // a given CDP session.
347
- function interceptRequest(cdp, controller) {
348
- return async function ({ requestId, request }) {
349
- try {
350
- // Abort network requests to common image formats
351
- if (/\.(gif|ico|jpg|jpeg|png|ttf|woff)$/i.test(request.url)) {
352
- await cdp.send('Fetch.failRequest', { requestId, errorReason: 'Failed' });
353
- return;
354
- }
355
-
356
- // Abort network requests that return a "stream", they won't
357
- // play well with Puppeteer's "networkidle0" option, and our
358
- // custom "fetch" function does not handle streams in any case
359
- if (request.url.startsWith('https://drafts.csswg.org/api/drafts/') ||
360
- request.url.startsWith('https://drafts.css-houdini.org/api/drafts/') ||
361
- request.url.startsWith('https://drafts.fxtf.org/api/drafts/') ||
362
- request.url.startsWith('https://api.csswg.org/shepherd/')) {
363
- await cdp.send('Fetch.failRequest', { requestId, errorReason: 'Failed' });
364
- return;
365
- }
366
-
367
- // The request needs to be intercepted, either because it
368
- // targets one of the local script files, or because we would
369
- // like to use our local cache to avoid sending network requests
370
- // when possible.
371
- //console.log(`intercept ${request.url}`);
372
- const reffyPath = '/reffy/scripts/';
373
- const webidl2Path = '/node_modules/webidl2/';
374
- if (request.url.includes(reffyPath) || request.url.includes(webidl2Path)) {
375
- let body;
376
- if (request.url.endsWith('reffy.mjs')) {
377
- body = Buffer.from(browserlib);
378
- }
379
- else if (request.url.includes(webidl2Path)) {
380
- const file = path.resolve(modulesFolder, 'webidl2',
381
- request.url.substring(request.url.indexOf(webidl2Path) + webidl2Path.length));
382
- body = await fs.readFile(file);
383
- }
384
- else {
385
- // The "__" folders are just a means to resolve
386
- // relative paths that are higher than the "browserlib"
387
- // folder on the storage drive
388
- const requestPath = request.url.substring(request.url.indexOf(reffyPath) + reffyPath.length);
389
- let depth = requestPath.lastIndexOf('__/') / 3;
390
- const filename = requestPath.substring(requestPath.lastIndexOf('__/') + 3);
391
- let filePath = path.resolve(__dirname, '..', 'browserlib');
392
- while (depth < maxPathDepth - 1) {
393
- filePath = path.resolve(filePath, '..');
394
- depth += 1;
395
- }
396
- const file = path.resolve(filePath, filename);
397
- body = await fs.readFile(file);
398
- }
399
- await cdp.send('Fetch.fulfillRequest', {
400
- requestId,
401
- responseCode: 200,
402
- responseHeaders: [{ name: 'Content-Type', value: 'application/javascript' }],
403
- body: body.toString('base64')
404
- });
405
- }
406
- else {
407
- if ((request.method !== 'GET') ||
408
- (!request.url.startsWith('http:') && !request.url.startsWith('https:'))) {
409
- await cdp.send('Fetch.continueRequest', { requestId });
410
- return;
411
- }
412
-
413
- const response = await fetch(request.url, { signal: controller.signal });
414
- const body = await response.buffer();
415
- await cdp.send('Fetch.fulfillRequest', {
416
- requestId,
417
- responseCode: response.status,
418
- responseHeaders: Object.keys(response.headers.raw()).map(header => {
419
- return {
420
- name: header,
421
- value: response.headers.raw()[header].join(',')
422
- };
423
- }),
424
- body: body.toString('base64')
425
- });
426
- }
427
- //console.log(`intercept ${request.url} - done`);
428
- }
429
- catch (err) {
430
- if (controller.signal.aborted) {
431
- // All is normal, processing was over, page and CDP session
432
- // have been closed, and network requests have been aborted
433
- // console.log(`intercept ${request.url} - aborted`);
434
- return;
435
- }
436
-
437
- // Fetch from file cache failed somehow
438
- // Let Puppeteer handle the request as fallback unless
439
- // calling function asked us not to do that
440
- if (options.forceLocalFetch) {
441
- options.quiet ?? console.warn(`[warn] Network request for ${request.url} failed`, err);
442
- await cdp.send('Fetch.failRequest', { requestId, errorReason: 'Failed' });
443
- }
444
- else {
445
- options.quiet ?? console.warn(`[warn] Fall back to regular network request for ${request.url}`, err);
446
- try {
447
- await cdp.send('Fetch.continueRequest', { requestId });
448
- }
449
- catch (err) {
450
- if (!controller.signal.aborted) {
451
- options.quiet ?? console.warn(`[warn] Fall back to regular network request for ${request.url} failed`, err);
452
- }
453
- }
454
- }
455
- }
456
- }
457
- }
458
-
459
- try {
460
- const page = await browser.newPage();
461
-
462
- // Disable cache if caller wants to handle all network requests
463
- await page.setCacheEnabled(!options.forceLocalFetch);
464
-
465
- // Intercept all network requests to use our own version of "fetch"
466
- // that makes use of the local file cache.
467
- const cdp = await page.target().createCDPSession();
468
- await cdp.send('Fetch.enable');
469
- cdp.on('Fetch.requestPaused', interceptRequest(cdp, abortController));
470
-
471
- // Quick and dirty workaround to re-create the "window.crypto.digest"
472
- // function that Respec needs (context is seen as unsecure because we're
473
- // tampering with network requests)
474
- await page.exposeFunction('hashdigest', (algorithm, buffer) => {
475
- return crypto.createHash(algorithm).update(Buffer.from(Object.values(buffer))).digest();
476
- });
477
- await page.evaluateOnNewDocument(() => {
478
- window.crypto.subtle = {
479
- digest: function (algorithm, buffer) {
480
- const res = window.hashdigest('sha1', buffer);
481
- return res.then(buf => {
482
- return Uint8Array.from(buf.data);
483
- });
484
- }
485
- };
486
- });
487
-
488
- // Common loading option to give the browser enough time to load large
489
- // specs, and to consider navigation done when there haven't been
490
- // network connections in the past 500ms. This should be enough to
491
- // handle "redirection" through JS or meta refresh (which would not
492
- // have time to run if we used "load").
493
- const loadOptions = {
494
- timeout: 120000,
495
- waitUntil: 'networkidle0'
496
- };
497
-
498
- // Load the page
499
- // (note HTTP status is 0 when `file://` URLs are loaded)
500
- if (spec.html) {
501
- await page.setContent(spec.html, loadOptions);
502
- }
503
- else {
504
- const result = await page.goto(spec.url, loadOptions);
505
- if ((result.status() !== 200) && (!spec.url.startsWith('file://') || (result.status() !== 0))) {
506
- throw new Error(`Loading ${spec.url} triggered HTTP status ${result.status()}`);
507
- }
508
- }
509
-
510
- // Handle multi-page specs
511
- const pageUrls = spec.pages || [];
512
-
513
- if (pageUrls.length > 0) {
514
- const pages = [];
515
- for (const url of pageUrls) {
516
- const subAbort = new AbortController();
517
- const subPage = await browser.newPage();
518
- await subPage.setCacheEnabled(!options.forceLocalFetch);
519
- const subCdp = await subPage.target().createCDPSession();
520
- await subCdp.send('Fetch.enable');
521
- subCdp.on('Fetch.requestPaused', interceptRequest(subCdp, subAbort));
522
- try {
523
- // (Note HTTP status is 0 when `file://` URLs are loaded)
524
- const subresult = await subPage.goto(url, loadOptions);
525
- if ((subresult.status() !== 200) && (!url.startsWith('file://') || (subresult.status() !== 0))) {
526
- throw new Error(`Loading ${spec.url} triggered HTTP status ${result.status()}`);
527
- }
528
- const html = await subPage.evaluate(() => {
529
- return document.body.outerHTML
530
- .replace(/<body/, '<section')
531
- .replace(/<\/body/, '</section');
532
- });
533
- pages.push({ url, html });
534
- }
535
- finally {
536
- subAbort.abort();
537
- await subCdp.detach();
538
- await subPage.close();
539
- }
540
- }
541
- await page.evaluate(pages => {
542
- for (const subPage of pages) {
543
- const section = document.createElement('section');
544
- section.setAttribute('data-reffy-page', subPage.url);
545
- section.innerHTML = subPage.html;
546
- document.body.appendChild(section);
547
- }
548
- }, pages);
549
- }
550
-
551
- // Wait until the generation of the spec is completely over
552
- await page.evaluate(async () => {
553
- // Detect draft CSS server hiccups as done in browser-specs:
554
- // https://github.com/w3c/browser-specs/blob/b31fc0b03ba67a19162883afc30e01fcec3c600d/src/fetch-info.js#L292
555
- const title = (window.document.querySelector('h1')?.textContent || '')
556
- .replace(/\n/g, '').trim();
557
- if (title.startsWith('Index of ')) {
558
- throw new Error(`CSS server issue detected`);
559
- }
560
-
561
- const usesRespec = (window.respecConfig || window.eval('typeof respecConfig !== "undefined"')) &&
562
- window.document.head.querySelector("script[src*='respec']");
563
-
564
- function sleep(ms) {
565
- return new Promise(resolve => setTimeout(resolve, ms));
566
- }
567
-
568
- async function isReady(counter) {
569
- counter = counter || 0;
570
- if (counter > 60) {
571
- throw new Error('Respec generation took too long');
572
- }
573
- if (window.document.respec?.ready) {
574
- await window.document.respec.ready;
575
- }
576
- else if (usesRespec) {
577
- await sleep(1000);
578
- await isReady(counter + 1);
579
- }
580
- }
581
-
582
- await isReady();
583
- });
584
-
585
- // Capture and report Reffy's browserlib warnings
586
- page.on('console', msg => {
587
- const text = msg.text();
588
- if (text.startsWith('[reffy] ')) {
589
- options.quiet ?? console.warn(spec.url, `[${msg.type()}]`, msg.text().substr('[reffy] '.length));
590
- }
591
- });
592
-
593
- // Capture and report when page throws an error
594
- page.on('pageerror', err => {
595
- options.quiet ?? console.warn(err);
596
- });
597
-
598
- // Expose additional functions defined in src/browserlib/ to the
599
- // browser context, under a window.reffy namespace, so that processing
600
- // script may call them. The script is an ES6 module and needs to be
601
- // loaded as such.
602
- // Note that we're using a fake relative URL on purpose. In practice,
603
- // the request will be processed by "interceptRequest", which will
604
- // respond with the contents of the script file. Also, there are
605
- // multiple path levels in that fake URL on purpose as well, because
606
- // scripts import the WebIDL2.js library with a URL like
607
- // "../../node_modules/[...]" and may import other scripts that are
608
- // higher in the folder tree.
609
- await page.addScriptTag({
610
- url: `reffy/scripts/${range(maxPathDepth).map(n => '__').join('/')}/reffy.mjs`,
611
- type: 'module'
612
- });
613
-
614
- // Run the processFunction method in the browser context
615
- const results = await page.evaluate(processFunction, ...args);
616
-
617
- // Pending network requests may still be in the queue, flag the page
618
- // as closed not to send commands on a CDP session that's no longer
619
- // attached to anything
620
- abortController.abort();
621
-
622
- // Close CDP session and page
623
- // Note that gets done no matter what when browser.close() gets called.
624
- await cdp.detach();
625
- await page.close();
626
-
627
- return results;
628
- }
629
- finally {
630
- // Signal abortion again (in case an exception was thrown)
631
- abortController.abort();
632
- }
633
- }
634
-
635
-
636
- /**
637
- * Enrich the spec description with alternative URLs (versions and equivalents)
638
- *
639
- * TODO: The list used to contain published versions of TR specs retrieved from
640
- * the W3C API. They are useful to improve the relevance of reported anomalies.
641
- *
642
- * @function
643
- * @param {Object} spec Spec description structure (only the URL is useful)
644
- * @return {Object} The same structure, enriched with the URL of the editor's
645
- * draft when one is found
646
- */
647
- function completeWithAlternativeUrls(spec) {
648
- spec.versions = new Set();
649
- spec.versions.add(spec.url);
650
- if (spec.release) {
651
- spec.versions.add(spec.release.url);
652
- }
653
- if (spec.nightly) {
654
- spec.versions.add(spec.nightly.url);
655
- }
656
- if (specEquivalents[spec.url]) {
657
- spec.versions = new Set([
658
- ...spec.versions,
659
- ...specEquivalents[spec.url]
660
- ]);
661
- }
662
- spec.versions = [...spec.versions];
663
- return spec;
664
- }
665
-
666
-
667
- /**
668
- * Returns true when the given spec is the latest "fullest" level of that spec
669
- * in the given list of specs that passes the given predicate.
670
- *
671
- * "Fullest" means "not a delta spec, unless that is the only level that passes
672
- * the predicate".
673
- *
674
- * @function
675
- * @public
676
- * @param {Object} spec Spec to check
677
- * @param {Array(Object)} list List of specs (must include the spec to check)
678
- * @param {function} predicate Predicate function that the spec must pass. Must
679
- * be a function that takes a spec as argument and returns a boolean.
680
- * @return {Boolean} true if the spec is the latest "fullest" level in the list
681
- * that passes the predicate.
682
- */
683
- function isLatestLevelThatPasses(spec, list, predicate) {
684
- predicate = predicate || (_ => true);
685
- if (!predicate(spec)) {
686
- return false;
687
- }
688
- if (spec.seriesComposition === 'delta') {
689
- while (spec.seriesPrevious) {
690
- spec = list.find(s => s.shortname === spec.seriesPrevious);
691
- if (!spec) {
692
- break;
693
- }
694
- if ((spec.seriesComposition === 'full') && predicate(spec)) {
695
- return false;
696
- }
697
- }
698
- return true;
699
- }
700
- while (spec.seriesNext) {
701
- if (!spec) {
702
- break;
703
- }
704
- spec = list.find(s => s.shortname === spec.seriesNext);
705
- if ((spec.seriesComposition === 'full') && predicate(spec)) {
706
- return false;
707
- }
708
- }
709
- return true;
710
- }
711
-
712
-
713
- /**
714
- * Takes the results of a crawl for a given spec and expands it to include the
715
- * contents of referenced files.
716
- *
717
- * The function handles both files and HTTPS resources, using either filesystem
718
- * functions (for files) or fetch (for HTTPS resources).
719
- *
720
- * Note the spec object is expanded in place.
721
- *
722
- * @function
723
- * @public
724
- * @param {Object} spec Spec crawl result that needs to be expanded
725
- * @param {string} baseFolder The base folder that contains the crawl file, or
726
- * the base HTTPS URI to resolve relative links in the crawl object.
727
- * @param {Array(string)} properties An explicit list of properties to expand
728
- * (no value means "expand all possible properties")
729
- * @return {Promise(object)} The promise to get an expanded crawl object that
730
- * contains the contents of referenced files and no longer references external
731
- * files (for the requested properties)
732
- */
733
- async function expandSpecResult(spec, baseFolder, properties) {
734
- baseFolder = baseFolder || '';
735
- await Promise.all(Object.keys(spec).map(async property => {
736
- // Only consider properties explicitly requested
737
- if (properties && !properties.includes(property)) {
738
- return;
739
- }
740
-
741
- // Only consider properties that link to an extract, i.e. an IDL
742
- // or JSON file in subfolder.
743
- if (!spec[property] ||
744
- (typeof spec[property] !== 'string') ||
745
- !spec[property].match(/^[^\/]+\/[^\/]+\.(json|idl)$/)) {
746
- return;
747
- }
748
- let contents = null;
749
- if (baseFolder.startsWith('https:')) {
750
- const url = (new URL(spec[property], baseFolder)).toString();
751
- const response = await fetch(url, { nolog: true });
752
- contents = await response.text();
753
- }
754
- else {
755
- const filename = path.join(baseFolder, spec[property]);
756
- contents = await fs.readFile(filename, 'utf8');
757
- }
758
- if (spec[property].endsWith('.json')) {
759
- contents = JSON.parse(contents);
760
- }
761
- if (property === 'css') {
762
- // Special case for CSS where the "css" level does not exist
763
- // in the generated files
764
- const css = Object.assign({}, contents);
765
- delete css.spec;
766
- spec[property] = css;
767
- }
768
- else if (property === 'idl') {
769
- // Special case for raw IDL extracts, which are text extracts.
770
- // Also drop header that may have been added when extract was
771
- // serialized.
772
- if (contents.startsWith('// GENERATED CONTENT - DO NOT EDIT')) {
773
- const endOfHeader = contents.indexOf('\n\n');
774
- contents = contents.substring(endOfHeader + 2);
775
- }
776
- spec.idl = contents;
777
- }
778
- else {
779
- spec[property] = contents[property];
780
- }
781
- }));
782
- return spec;
783
- }
784
-
785
-
786
- /**
787
- * Takes the results of a crawl (typically the contents of the index.json file)
788
- * and expands it to include the contents of all referenced files.
789
- *
790
- * The function handles both files and HTTPS resources, using either filesystem
791
- * functions (for files) or fetch (for HTTPS resources).
792
- *
793
- * Note the crawl object is expanded in place.
794
- *
795
- * @function
796
- * @public
797
- * @param {Object} crawl Crawl index object that needs to be expanded
798
- * @param {string} baseFolder The base folder that contains the crawl file, or
799
- * the base HTTPS URI to resolve relative links in the crawl object.
800
- * @param {Array(string)} properties An explicit list of properties to expand
801
- * (no value means "expand all possible properties")
802
- * @return {Promise(object)} The promise to get an expanded crawl object that
803
- * contains the entire crawl report (and no longer references external files)
804
- */
805
- async function expandCrawlResult(crawl, baseFolder, properties) {
806
- baseFolder = baseFolder || '';
807
- crawl.results = await Promise.all(
808
- crawl.results.map(spec => expandSpecResult(spec, baseFolder, properties))
809
- );
810
- return crawl;
811
- }
812
-
813
-
814
- /**
815
- * Retrieves the list of IDL attribute names that the CSS property generates
816
- * per the CSSOM spec, see:
817
- * https://drafts.csswg.org/cssom/#ref-for-css-property-to-idl-attribute
818
- *
819
- * @function
820
- * @param {String} property CSS property name
821
- * @return {Array(String)} An array of IDL attribute names, dashed attribute
822
- * first, then camel-cased attribute if different, then webkit-cased attribute
823
- * name if needed
824
- */
825
- function getGeneratedIDLNamesByCSSProperty(property) {
826
- // Converts a CSS property to an IDL attribute name per the CSSOM spec:
827
- // https://drafts.csswg.org/cssom/#css-property-to-idl-attribute
828
- function cssPropertyToIDLAttribute(property, lowercaseFirst) {
829
- let output = '';
830
- let uppercaseNext = false;
831
- if (lowercaseFirst) {
832
- property = property.substr(1);
833
- }
834
- for (const c of property) {
835
- if (c === '-') {
836
- uppercaseNext = true;
837
- } else if (uppercaseNext) {
838
- uppercaseNext = false;
839
- output += c.toUpperCase();
840
- } else {
841
- output += c;
842
- }
843
- }
844
- return output;
845
- }
846
-
847
- // Start with dashed attribute
848
- const res = [property];
849
-
850
- // Add camel-cased attribute if different
851
- const camelCased = cssPropertyToIDLAttribute(property, false);
852
- if (camelCased !== property) {
853
- res.push(camelCased);
854
- }
855
-
856
- // Add webkit-cased attribute if needed
857
- if (property.startsWith('-webkit-')) {
858
- res.push(cssPropertyToIDLAttribute(property, true));
859
- }
860
-
861
- return res;
862
- };
863
-
864
-
865
- /**
866
- * Creates the given folder if it does not exist yet.
867
- *
868
- * @function
869
- * @public
870
- * @param {String} folder Path to folder to create
871
- * (from current working directory)
872
- */
873
- async function createFolderIfNeeded(folder) {
874
- try {
875
- await fs.mkdir(folder);
876
- }
877
- catch (err) {
878
- if (err.code !== 'EEXIST') {
879
- throw err;
880
- }
881
- }
882
- }
883
-
884
-
885
- module.exports = {
886
- fetch,
887
- requireFromWorkingDirectory,
888
- expandBrowserModules,
889
- setupBrowser,
890
- teardownBrowser,
891
- processSpecification,
892
- completeWithAlternativeUrls,
893
- isLatestLevelThatPasses,
894
- expandCrawlResult,
895
- expandSpecResult,
896
- getGeneratedIDLNamesByCSSProperty,
897
- createFolderIfNeeded
898
- };
1
+ /**
2
+ * A bunch of utility functions common to multiple scripts
3
+ */
4
+
5
+ const fs = require('fs').promises;
6
+ const { existsSync } = require('fs');
7
+ const path = require('path');
8
+ const puppeteer = require('puppeteer');
9
+ const crypto = require('crypto');
10
+ const { AbortController } = require('abortcontroller-polyfill/dist/cjs-ponyfill');
11
+ const fetch = require('./fetch');
12
+ const specEquivalents = require('../specs/spec-equivalents.json');
13
+
14
+ const reffyModules = require('../browserlib/reffy.json');
15
+
16
+
17
+ /**
18
+ * Maximum depth difference supported between Reffy's install path and custom
19
+ * modules that may be provided on the command-line
20
+ *
21
+ * TODO: Find a way to get right of that, there should be no limit
22
+ */
23
+ const maxPathDepth = 20;
24
+
25
+
26
+ /**
27
+ * Returns a range array from 0 to the number provided (not included)
28
+ */
29
+ const range = n => Array.from(Array(n).keys());
30
+
31
+
32
+ /**
33
+ * Shortcut that returns a property extractor iterator
34
+ */
35
+ const prop = p => x => x[p];
36
+
37
+
38
+ /**
39
+ * Wrapper around the "require" function to require files relative to the
40
+ * current working directory (CWD), instead of relative to the current JS
41
+ * file.
42
+ *
43
+ * This is typically needed to be able to use "require" to load JSON config
44
+ * files provided as command-line arguments.
45
+ *
46
+ * @function
47
+ * @param {String} filename The path to the file to require
48
+ * @return {Object} The result of requiring the file relative to the current
49
+ * working directory.
50
+ */
51
+ function requireFromWorkingDirectory(filename) {
52
+ try {
53
+ return require(path.resolve(filename));
54
+ }
55
+ catch (err) {
56
+ return null;
57
+ }
58
+ }
59
+
60
+
61
+ /**
62
+ * Determine the path to the "node_modules" folder to resolve relative links
63
+ * in the ES6 browser lib modules. The path depends on whether Reffy is run
64
+ * directly, or installed as a library.
65
+ *
66
+ * @function
67
+ * @return {String} Path to the node_modules folder.
68
+ */
69
+ function getModulesFolder() {
70
+ const rootFolder = path.resolve(__dirname, '../..');
71
+ let folder = path.resolve(rootFolder, 'node_modules');
72
+ if (existsSync(folder)) {
73
+ return folder;
74
+ }
75
+ folder = path.resolve(rootFolder, '..');
76
+ return folder;
77
+ }
78
+ const modulesFolder = getModulesFolder();
79
+
80
+
81
+ /**
82
+ * Puppeteer browser instance used to load and process specifications
83
+ */
84
+ let browser = null;
85
+
86
+ /**
87
+ * Promise resolved when there is no running instance of Puppeteer. This allows
88
+ * to serialize calls to setupBrowser (and thus to crawlList and crawlSpecs in
89
+ * specs-crawler.js)
90
+ */
91
+ let browserClosed = Promise.resolve();
92
+ let resolveBrowserClosed = null;
93
+
94
+ /**
95
+ * The browser JS library that will be loaded onto every crawled page
96
+ */
97
+ let browserlib = null;
98
+
99
+
100
+ /**
101
+ * Expand list of browser modules with right set of descriptive properties
102
+ *
103
+ * User may specify a browser module as:
104
+ * - a name which must match one of the existing modules in browserlib
105
+ * - a relative path to an .mjs file which must exist
106
+ * - an object with an "href" property that is a relative path to an .mjs file
107
+ * which must exist
108
+ *
109
+ * Relative paths provided by the user are interpreted as relative to the
110
+ * current working directory, and converted to be relative to the browserlib
111
+ * directory.
112
+ *
113
+ * @function
114
+ * @public
115
+ * @return {Array(Object)} List of modules with an href, name and property keys
116
+ */
117
+ function expandBrowserModules(modules) {
118
+ // Helper function to create a camelCase name out of a module path
119
+ function getCamelCaseName(href) {
120
+ const filename = href.replace(/([^\/\\]+)\.mjs$/, '$1');
121
+ const nameParts = filename.split('-');
122
+ let name;
123
+ let namePart;
124
+ while (namePart = nameParts.shift()) {
125
+ namePart = namePart.replace(/\W/g, '');
126
+ if (name) {
127
+ name += namePart.substring(0, 1).toUpperCase() + namePart.substring(1);
128
+ }
129
+ else {
130
+ name = namePart;
131
+ }
132
+ }
133
+ return name;
134
+ }
135
+
136
+ const browserlibPath = path.resolve(__dirname, '..', 'browserlib');
137
+ if (!modules) {
138
+ return reffyModules.map(mod => Object.assign({
139
+ name: getCamelCaseName(mod.href),
140
+ expanded: true
141
+ }, mod));
142
+ }
143
+
144
+ modules = modules.map(mod => {
145
+ if (typeof mod === 'string') {
146
+ if (mod.endsWith('.mjs')) {
147
+ const name = getCamelCaseName(mod);
148
+ return {
149
+ href: path.relative(browserlibPath, path.join(process.cwd(), mod)).replace(/\\/g, '/'),
150
+ name,
151
+ property: name,
152
+ expanded: true
153
+ };
154
+ }
155
+ else if (mod === 'core') {
156
+ return reffyModules.map(mod => Object.assign({
157
+ name: getCamelCaseName(mod.href),
158
+ expanded: true
159
+ }, mod));
160
+ }
161
+ else {
162
+ const res = reffyModules.find(m => m.href === mod ||
163
+ getCamelCaseName(m.href) === mod || m.property === mod);
164
+ if (!res) {
165
+ throw new Error(`Unknown browserlib module ${mod}`);
166
+ }
167
+ return Object.assign({
168
+ name: getCamelCaseName(res.href),
169
+ expanded: true
170
+ }, res);
171
+ }
172
+ }
173
+ else if (mod.expanded) {
174
+ return mod;
175
+ }
176
+ else {
177
+ if (!mod.href) {
178
+ throw new Error('Browserlib module does not have an "href" property');
179
+ }
180
+ mod.href = path.relative(browserlibPath, path.join(process.cwd(), mod.href)).replace(/\\/g, '/');
181
+ if (!mod.name) {
182
+ mod.name = getCamelCaseName(mod.href);
183
+ }
184
+ if (!mod.property) {
185
+ mod.property = mod.name;
186
+ }
187
+ mod.expanded = true;
188
+ return mod;
189
+ }
190
+ });
191
+
192
+ return modules.flat();
193
+ }
194
+
195
+
196
+ /**
197
+ * Prepare the browserlib script that will be loaded in every crawled page.
198
+ *
199
+ * The script exposes a global reffy namespace with the requested modules.
200
+ *
201
+ * The function must be called before any attempt to call `processSpecification`
202
+ * and should only be called once. The `setupBrowser` function takes care of it.
203
+ *
204
+ * @function
205
+ * @private
206
+ */
207
+ function setupBrowserlib(modules) {
208
+ modules = expandBrowserModules(modules);
209
+ browserlib = 'window.reffy = window.reffy ?? {};\n';
210
+
211
+ if (modules.find(module => module.needsIdToHeadingMap)) {
212
+ browserlib += `
213
+ import mapIdsToHeadings from './map-ids-to-headings.mjs';
214
+ window.reffy.mapIdsToHeadings = mapIdsToHeadings;\n`;
215
+ }
216
+
217
+ browserlib += modules.map(module => `
218
+ import ${module.name} from '${module.href}';
219
+ window.reffy.${module.name} = ${module.name};
220
+ `).join('\n');
221
+ }
222
+
223
+
224
+ /**
225
+ * Setup and launch browser instance to use to load and process specifications.
226
+ *
227
+ * The function must be called before any attempt to call `processSpecification`
228
+ * and should only be called once.
229
+ *
230
+ * The function also generates the code that will inject the `reffy` namespace
231
+ * in each processed page.
232
+ *
233
+ * Note: Switch `headless` to `false` to access dev tools and debug processing
234
+ *
235
+ * @function
236
+ * @public
237
+ */
238
+ async function setupBrowser(modules) {
239
+ // There can be only one crawl running at a time
240
+ await browserClosed;
241
+ browserClosed = new Promise(resolve => resolveBrowserClosed = resolve);
242
+
243
+ // Create browser instance
244
+ // Note: switch "headless" to "false" (and comment out the call to
245
+ // "browser.close()") to access dev tools in debug mode
246
+ browser = await puppeteer.launch({ headless: true });
247
+ setupBrowserlib(modules);
248
+ }
249
+
250
+
251
+ /**
252
+ * Close and destroy browser instance.
253
+ *
254
+ * The function should be called once at the end of the processing.
255
+ *
256
+ * @function
257
+ * @public
258
+ */
259
+ async function teardownBrowser() {
260
+ if (browser) {
261
+ await browser.close();
262
+ browser = null;
263
+ resolveBrowserClosed();
264
+ resolveBrowserClosed = null;
265
+ }
266
+ }
267
+
268
+
269
+ /**
270
+ * Load and process the given specification.
271
+ *
272
+ * The method automatically exposes Reffy's library functions in a window.reffy
273
+ * namespace (see setupBrowserlib) so that the callback function can
274
+ * call them directly. Additional callback arguments that would need to be
275
+ * passed to the browser context can be provided through the "args" parameter.
276
+ *
277
+ * A crawl will typically fetch and render hundreds of specs, triggering a lot
278
+ * of network requests. Given that some of these requests (e.g. those on images)
279
+ * are of no interest for the processing, that it is wasteful to fetch the same
280
+ * resource again and again during a crawl, and that it is useful to have an
281
+ * offline mode for debugging purpose, the method will intercept network
282
+ * requests made by the browser, fail those that don't seem needed, and serve
283
+ * requests on resources that have already been fetched from a local file cache
284
+ * (the "cacheRefresh" setting in "config.json" allows to adjust this behavior).
285
+ *
286
+ * This triggers a few hiccups and needs for workarounds though:
287
+ * - Puppeteer's page.setRequestInterception does not play nicely with workers
288
+ * (which Respec typically uses) for the time being, so code uses the Chrome
289
+ * DevTools Protocol (CDP) directly, see:
290
+ * https://github.com/puppeteer/puppeteer/issues/4208
291
+ * - Tampering with network requests means that the loaded page gets
292
+ * automatically flagged as "non secure". That's mostly fine but means that
293
+ * "window.crypto.subtle" is not available and Respec needs that to generate
294
+ * hashes. The code re-creates that method manually.
295
+ * - A few specs send HTTP requests that return "streams". This does not work
296
+ * well with Puppeteer's "networkidle0" option (to detect when a spec is mostly
297
+ * done loading), and that does not work with a file cache approach either.
298
+ * These requests get intercepted.
299
+ *
300
+ * A couple of additional notes:
301
+ * - Requests to CSS stylesheets are not intercepted because Respec dynamically
302
+ * loads a few CSS resources, and intercepting them could perhaps impact the
303
+ * rest of the generation.
304
+ * - SVG images are not intercepted because a couple of specs have a PNG
305
+ * fallback mechanism that, when interception is on, make the browser spin
306
+ * forever, see discussion in: https://github.com/w3c/accelerometer/pull/55
307
+ *
308
+ * Strictly speaking, intercepting request is only needed to be able to use the
309
+ * "networkidle0" option. The whole interception logic could be dropped (and
310
+ * "networkidle2" could be used instead) if it proves too unstable.
311
+ *
312
+ * @function
313
+ * @public
314
+ * @param {Object|String} spec The spec to load. Must either be a URL string or
315
+ * an object with a "url" property. If the object contains an "html" property,
316
+ * the HTML content is loaded instead.
317
+ * @param {function} processFunction Processing function that will be evaluated
318
+ * in the browser context where the spec gets loaded
319
+ * @param {Arrays} args List of arguments to pass to the processing function.
320
+ * These arguments typically make it possible to pass contextual information
321
+ * to the processing function (such as the spec object that describes the
322
+ * spec being processed, or the list of processing modules to run)
323
+ * @param {Object} options Processing options. The "quiet" flag tells the
324
+ * function not to report warnings to the console. The "forceLocalFetch"
325
+ * flag tells the function that all network requests need to be only handled
326
+ * by Node.js's "fetch" function (as opposed to falling back to Puppeteer's
327
+ * network and caching logic), which is useful to keep full control of network
328
+ * requests in tests.
329
+ * @return {Promise} The promise to get the results of the processing function
330
+ */
331
+ async function processSpecification(spec, processFunction, args, options) {
332
+ spec = (typeof spec === 'string') ? { url: spec } : spec;
333
+ processFunction = processFunction || function () {};
334
+ args = args || [];
335
+ options = options || {};
336
+
337
+ if (!browser) {
338
+ throw new Error('Browser instance not initialized, setupBrowser() must be called before processSpecification().');
339
+ }
340
+
341
+ // Create an abort controller for network requests directly handled by the
342
+ // Node.js code (and not by Puppeteer)
343
+ const abortController = new AbortController();
344
+
345
+ // Inner function that returns a network interception method suitable for
346
+ // a given CDP session.
347
+ function interceptRequest(cdp, controller) {
348
+ return async function ({ requestId, request }) {
349
+ try {
350
+ // Abort network requests to common image formats
351
+ if (/\.(gif|ico|jpg|jpeg|png|ttf|woff)$/i.test(request.url)) {
352
+ await cdp.send('Fetch.failRequest', { requestId, errorReason: 'Failed' });
353
+ return;
354
+ }
355
+
356
+ // Abort network requests that return a "stream", they won't
357
+ // play well with Puppeteer's "networkidle0" option, and our
358
+ // custom "fetch" function does not handle streams in any case
359
+ if (request.url.startsWith('https://drafts.csswg.org/api/drafts/') ||
360
+ request.url.startsWith('https://drafts.css-houdini.org/api/drafts/') ||
361
+ request.url.startsWith('https://drafts.fxtf.org/api/drafts/') ||
362
+ request.url.startsWith('https://api.csswg.org/shepherd/')) {
363
+ await cdp.send('Fetch.failRequest', { requestId, errorReason: 'Failed' });
364
+ return;
365
+ }
366
+
367
+ // The request needs to be intercepted, either because it
368
+ // targets one of the local script files, or because we would
369
+ // like to use our local cache to avoid sending network requests
370
+ // when possible.
371
+ //console.log(`intercept ${request.url}`);
372
+ const reffyPath = '/reffy/scripts/';
373
+ const webidl2Path = '/node_modules/webidl2/';
374
+ if (request.url.includes(reffyPath) || request.url.includes(webidl2Path)) {
375
+ let body;
376
+ if (request.url.endsWith('reffy.mjs')) {
377
+ body = Buffer.from(browserlib);
378
+ }
379
+ else if (request.url.includes(webidl2Path)) {
380
+ const file = path.resolve(modulesFolder, 'webidl2',
381
+ request.url.substring(request.url.indexOf(webidl2Path) + webidl2Path.length));
382
+ body = await fs.readFile(file);
383
+ }
384
+ else {
385
+ // The "__" folders are just a means to resolve
386
+ // relative paths that are higher than the "browserlib"
387
+ // folder on the storage drive
388
+ const requestPath = request.url.substring(request.url.indexOf(reffyPath) + reffyPath.length);
389
+ let depth = requestPath.lastIndexOf('__/') / 3;
390
+ const filename = requestPath.substring(requestPath.lastIndexOf('__/') + 3);
391
+ let filePath = path.resolve(__dirname, '..', 'browserlib');
392
+ while (depth < maxPathDepth - 1) {
393
+ filePath = path.resolve(filePath, '..');
394
+ depth += 1;
395
+ }
396
+ const file = path.resolve(filePath, filename);
397
+ body = await fs.readFile(file);
398
+ }
399
+ await cdp.send('Fetch.fulfillRequest', {
400
+ requestId,
401
+ responseCode: 200,
402
+ responseHeaders: [{ name: 'Content-Type', value: 'application/javascript' }],
403
+ body: body.toString('base64')
404
+ });
405
+ }
406
+ else {
407
+ if ((request.method !== 'GET') ||
408
+ (!request.url.startsWith('http:') && !request.url.startsWith('https:'))) {
409
+ await cdp.send('Fetch.continueRequest', { requestId });
410
+ return;
411
+ }
412
+
413
+ const response = await fetch(request.url, { signal: controller.signal });
414
+ const body = await response.buffer();
415
+ await cdp.send('Fetch.fulfillRequest', {
416
+ requestId,
417
+ responseCode: response.status,
418
+ responseHeaders: Object.keys(response.headers.raw()).map(header => {
419
+ return {
420
+ name: header,
421
+ value: response.headers.raw()[header].join(',')
422
+ };
423
+ }),
424
+ body: body.toString('base64')
425
+ });
426
+ }
427
+ //console.log(`intercept ${request.url} - done`);
428
+ }
429
+ catch (err) {
430
+ if (controller.signal.aborted) {
431
+ // All is normal, processing was over, page and CDP session
432
+ // have been closed, and network requests have been aborted
433
+ // console.log(`intercept ${request.url} - aborted`);
434
+ return;
435
+ }
436
+
437
+ // Fetch from file cache failed somehow
438
+ // Let Puppeteer handle the request as fallback unless
439
+ // calling function asked us not to do that
440
+ if (options.forceLocalFetch) {
441
+ options.quiet ?? console.warn(`[warn] Network request for ${request.url} failed`, err);
442
+ await cdp.send('Fetch.failRequest', { requestId, errorReason: 'Failed' });
443
+ }
444
+ else {
445
+ options.quiet ?? console.warn(`[warn] Fall back to regular network request for ${request.url}`, err);
446
+ try {
447
+ await cdp.send('Fetch.continueRequest', { requestId });
448
+ }
449
+ catch (err) {
450
+ if (!controller.signal.aborted) {
451
+ options.quiet ?? console.warn(`[warn] Fall back to regular network request for ${request.url} failed`, err);
452
+ }
453
+ }
454
+ }
455
+ }
456
+ }
457
+ }
458
+
459
+ try {
460
+ const page = await browser.newPage();
461
+
462
+ // Disable cache if caller wants to handle all network requests
463
+ await page.setCacheEnabled(!options.forceLocalFetch);
464
+
465
+ // Intercept all network requests to use our own version of "fetch"
466
+ // that makes use of the local file cache.
467
+ const cdp = await page.target().createCDPSession();
468
+ await cdp.send('Fetch.enable');
469
+ cdp.on('Fetch.requestPaused', interceptRequest(cdp, abortController));
470
+
471
+ // Quick and dirty workaround to re-create the "window.crypto.digest"
472
+ // function that Respec needs (context is seen as unsecure because we're
473
+ // tampering with network requests)
474
+ await page.exposeFunction('hashdigest', (algorithm, buffer) => {
475
+ return crypto.createHash(algorithm).update(Buffer.from(Object.values(buffer))).digest();
476
+ });
477
+ await page.evaluateOnNewDocument(() => {
478
+ window.crypto.subtle = {
479
+ digest: function (algorithm, buffer) {
480
+ const res = window.hashdigest('sha1', buffer);
481
+ return res.then(buf => {
482
+ return Uint8Array.from(buf.data);
483
+ });
484
+ }
485
+ };
486
+ });
487
+
488
+ // Common loading option to give the browser enough time to load large
489
+ // specs, and to consider navigation done when there haven't been
490
+ // network connections in the past 500ms. This should be enough to
491
+ // handle "redirection" through JS or meta refresh (which would not
492
+ // have time to run if we used "load").
493
+ const loadOptions = {
494
+ timeout: 120000,
495
+ waitUntil: 'networkidle0'
496
+ };
497
+
498
+ // Load the page
499
+ // (note HTTP status is 0 when `file://` URLs are loaded)
500
+ if (spec.html) {
501
+ await page.setContent(spec.html, loadOptions);
502
+ }
503
+ else {
504
+ const result = await page.goto(spec.url, loadOptions);
505
+ if ((result.status() !== 200) && (!spec.url.startsWith('file://') || (result.status() !== 0))) {
506
+ throw new Error(`Loading ${spec.url} triggered HTTP status ${result.status()}`);
507
+ }
508
+ }
509
+
510
+ // Handle multi-page specs
511
+ const pageUrls = spec.pages || [];
512
+
513
+ if (pageUrls.length > 0) {
514
+ const pages = [];
515
+ for (const url of pageUrls) {
516
+ const subAbort = new AbortController();
517
+ const subPage = await browser.newPage();
518
+ await subPage.setCacheEnabled(!options.forceLocalFetch);
519
+ const subCdp = await subPage.target().createCDPSession();
520
+ await subCdp.send('Fetch.enable');
521
+ subCdp.on('Fetch.requestPaused', interceptRequest(subCdp, subAbort));
522
+ try {
523
+ // (Note HTTP status is 0 when `file://` URLs are loaded)
524
+ const subresult = await subPage.goto(url, loadOptions);
525
+ if ((subresult.status() !== 200) && (!url.startsWith('file://') || (subresult.status() !== 0))) {
526
+ throw new Error(`Loading ${spec.url} triggered HTTP status ${result.status()}`);
527
+ }
528
+ const html = await subPage.evaluate(() => {
529
+ return document.body.outerHTML
530
+ .replace(/<body/, '<section')
531
+ .replace(/<\/body/, '</section');
532
+ });
533
+ pages.push({ url, html });
534
+ }
535
+ finally {
536
+ subAbort.abort();
537
+ await subCdp.detach();
538
+ await subPage.close();
539
+ }
540
+ }
541
+ await page.evaluate(pages => {
542
+ for (const subPage of pages) {
543
+ const section = document.createElement('section');
544
+ section.setAttribute('data-reffy-page', subPage.url);
545
+ section.innerHTML = subPage.html;
546
+ document.body.appendChild(section);
547
+ }
548
+ }, pages);
549
+ }
550
+
551
+ // Wait until the generation of the spec is completely over
552
+ await page.evaluate(async () => {
553
+ // Detect draft CSS server hiccups as done in browser-specs:
554
+ // https://github.com/w3c/browser-specs/blob/b31fc0b03ba67a19162883afc30e01fcec3c600d/src/fetch-info.js#L292
555
+ const title = (window.document.querySelector('h1')?.textContent || '')
556
+ .replace(/\n/g, '').trim();
557
+ if (title.startsWith('Index of ')) {
558
+ throw new Error(`CSS server issue detected`);
559
+ }
560
+
561
+ const usesRespec = (window.respecConfig || window.eval('typeof respecConfig !== "undefined"')) &&
562
+ window.document.head.querySelector("script[src*='respec']");
563
+
564
+ function sleep(ms) {
565
+ return new Promise(resolve => setTimeout(resolve, ms));
566
+ }
567
+
568
+ async function isReady(counter) {
569
+ counter = counter || 0;
570
+ if (counter > 60) {
571
+ throw new Error('Respec generation took too long');
572
+ }
573
+ if (window.document.respec?.ready) {
574
+ await window.document.respec.ready;
575
+ }
576
+ else if (usesRespec) {
577
+ await sleep(1000);
578
+ await isReady(counter + 1);
579
+ }
580
+ }
581
+
582
+ await isReady();
583
+ });
584
+
585
+ // Capture and report Reffy's browserlib warnings
586
+ page.on('console', msg => {
587
+ const text = msg.text();
588
+ if (text.startsWith('[reffy] ')) {
589
+ options.quiet ?? console.warn(spec.url, `[${msg.type()}]`, msg.text().substr('[reffy] '.length));
590
+ }
591
+ });
592
+
593
+ // Capture and report when page throws an error
594
+ page.on('pageerror', err => {
595
+ options.quiet ?? console.warn(err);
596
+ });
597
+
598
+ // Expose additional functions defined in src/browserlib/ to the
599
+ // browser context, under a window.reffy namespace, so that processing
600
+ // script may call them. The script is an ES6 module and needs to be
601
+ // loaded as such.
602
+ // Note that we're using a fake relative URL on purpose. In practice,
603
+ // the request will be processed by "interceptRequest", which will
604
+ // respond with the contents of the script file. Also, there are
605
+ // multiple path levels in that fake URL on purpose as well, because
606
+ // scripts import the WebIDL2.js library with a URL like
607
+ // "../../node_modules/[...]" and may import other scripts that are
608
+ // higher in the folder tree.
609
+ await page.addScriptTag({
610
+ url: `reffy/scripts/${range(maxPathDepth).map(n => '__').join('/')}/reffy.mjs`,
611
+ type: 'module'
612
+ });
613
+
614
+ // Run the processFunction method in the browser context
615
+ const results = await page.evaluate(processFunction, ...args);
616
+
617
+ // Pending network requests may still be in the queue, flag the page
618
+ // as closed not to send commands on a CDP session that's no longer
619
+ // attached to anything
620
+ abortController.abort();
621
+
622
+ // Close CDP session and page
623
+ // Note that gets done no matter what when browser.close() gets called.
624
+ await cdp.detach();
625
+ await page.close();
626
+
627
+ return results;
628
+ }
629
+ finally {
630
+ // Signal abortion again (in case an exception was thrown)
631
+ abortController.abort();
632
+ }
633
+ }
634
+
635
+
636
+ /**
637
+ * Enrich the spec description with alternative URLs (versions and equivalents)
638
+ *
639
+ * TODO: The list used to contain published versions of TR specs retrieved from
640
+ * the W3C API. They are useful to improve the relevance of reported anomalies.
641
+ *
642
+ * @function
643
+ * @param {Object} spec Spec description structure (only the URL is useful)
644
+ * @return {Object} The same structure, enriched with the URL of the editor's
645
+ * draft when one is found
646
+ */
647
+ function completeWithAlternativeUrls(spec) {
648
+ spec.versions = new Set();
649
+ spec.versions.add(spec.url);
650
+ if (spec.release) {
651
+ spec.versions.add(spec.release.url);
652
+ }
653
+ if (spec.nightly) {
654
+ spec.versions.add(spec.nightly.url);
655
+ }
656
+ if (specEquivalents[spec.url]) {
657
+ spec.versions = new Set([
658
+ ...spec.versions,
659
+ ...specEquivalents[spec.url]
660
+ ]);
661
+ }
662
+ spec.versions = [...spec.versions];
663
+ return spec;
664
+ }
665
+
666
+
667
+ /**
668
+ * Returns true when the given spec is the latest "fullest" level of that spec
669
+ * in the given list of specs that passes the given predicate.
670
+ *
671
+ * "Fullest" means "not a delta spec, unless that is the only level that passes
672
+ * the predicate".
673
+ *
674
+ * @function
675
+ * @public
676
+ * @param {Object} spec Spec to check
677
+ * @param {Array(Object)} list List of specs (must include the spec to check)
678
+ * @param {function} predicate Predicate function that the spec must pass. Must
679
+ * be a function that takes a spec as argument and returns a boolean.
680
+ * @return {Boolean} true if the spec is the latest "fullest" level in the list
681
+ * that passes the predicate.
682
+ */
683
+ function isLatestLevelThatPasses(spec, list, predicate) {
684
+ predicate = predicate || (_ => true);
685
+ if (!predicate(spec)) {
686
+ return false;
687
+ }
688
+ if (spec.seriesComposition === 'delta') {
689
+ while (spec.seriesPrevious) {
690
+ spec = list.find(s => s.shortname === spec.seriesPrevious);
691
+ if (!spec) {
692
+ break;
693
+ }
694
+ if ((spec.seriesComposition === 'full') && predicate(spec)) {
695
+ return false;
696
+ }
697
+ }
698
+ return true;
699
+ }
700
+ while (spec.seriesNext) {
701
+ if (!spec) {
702
+ break;
703
+ }
704
+ spec = list.find(s => s.shortname === spec.seriesNext);
705
+ if ((spec.seriesComposition === 'full') && predicate(spec)) {
706
+ return false;
707
+ }
708
+ }
709
+ return true;
710
+ }
711
+
712
+
713
+ /**
714
+ * Takes the results of a crawl for a given spec and expands it to include the
715
+ * contents of referenced files.
716
+ *
717
+ * The function handles both files and HTTPS resources, using either filesystem
718
+ * functions (for files) or fetch (for HTTPS resources).
719
+ *
720
+ * Note the spec object is expanded in place.
721
+ *
722
+ * @function
723
+ * @public
724
+ * @param {Object} spec Spec crawl result that needs to be expanded
725
+ * @param {string} baseFolder The base folder that contains the crawl file, or
726
+ * the base HTTPS URI to resolve relative links in the crawl object.
727
+ * @param {Array(string)} properties An explicit list of properties to expand
728
+ * (no value means "expand all possible properties")
729
+ * @return {Promise(object)} The promise to get an expanded crawl object that
730
+ * contains the contents of referenced files and no longer references external
731
+ * files (for the requested properties)
732
+ */
733
+ async function expandSpecResult(spec, baseFolder, properties) {
734
+ baseFolder = baseFolder || '';
735
+ await Promise.all(Object.keys(spec).map(async property => {
736
+ // Only consider properties explicitly requested
737
+ if (properties && !properties.includes(property)) {
738
+ return;
739
+ }
740
+
741
+ // Only consider properties that link to an extract, i.e. an IDL
742
+ // or JSON file in subfolder.
743
+ if (!spec[property] ||
744
+ (typeof spec[property] !== 'string') ||
745
+ !spec[property].match(/^[^\/]+\/[^\/]+\.(json|idl)$/)) {
746
+ return;
747
+ }
748
+ let contents = null;
749
+ if (baseFolder.startsWith('https:')) {
750
+ const url = (new URL(spec[property], baseFolder)).toString();
751
+ const response = await fetch(url, { nolog: true });
752
+ contents = await response.text();
753
+ }
754
+ else {
755
+ const filename = path.join(baseFolder, spec[property]);
756
+ contents = await fs.readFile(filename, 'utf8');
757
+ }
758
+ if (spec[property].endsWith('.json')) {
759
+ contents = JSON.parse(contents);
760
+ }
761
+ if (property === 'css') {
762
+ // Special case for CSS where the "css" level does not exist
763
+ // in the generated files
764
+ const css = Object.assign({}, contents);
765
+ delete css.spec;
766
+ spec[property] = css;
767
+ }
768
+ else if (property === 'idl') {
769
+ // Special case for raw IDL extracts, which are text extracts.
770
+ // Also drop header that may have been added when extract was
771
+ // serialized.
772
+ if (contents.startsWith('// GENERATED CONTENT - DO NOT EDIT')) {
773
+ const endOfHeader = contents.indexOf('\n\n');
774
+ contents = contents.substring(endOfHeader + 2);
775
+ }
776
+ spec.idl = contents;
777
+ }
778
+ else {
779
+ spec[property] = contents[property];
780
+ }
781
+ }));
782
+ return spec;
783
+ }
784
+
785
+
786
+ /**
787
+ * Takes the results of a crawl (typically the contents of the index.json file)
788
+ * and expands it to include the contents of all referenced files.
789
+ *
790
+ * The function handles both files and HTTPS resources, using either filesystem
791
+ * functions (for files) or fetch (for HTTPS resources).
792
+ *
793
+ * Note the crawl object is expanded in place.
794
+ *
795
+ * @function
796
+ * @public
797
+ * @param {Object} crawl Crawl index object that needs to be expanded
798
+ * @param {string} baseFolder The base folder that contains the crawl file, or
799
+ * the base HTTPS URI to resolve relative links in the crawl object.
800
+ * @param {Array(string)} properties An explicit list of properties to expand
801
+ * (no value means "expand all possible properties")
802
+ * @return {Promise(object)} The promise to get an expanded crawl object that
803
+ * contains the entire crawl report (and no longer references external files)
804
+ */
805
+ async function expandCrawlResult(crawl, baseFolder, properties) {
806
+ baseFolder = baseFolder || '';
807
+ crawl.results = await Promise.all(
808
+ crawl.results.map(spec => expandSpecResult(spec, baseFolder, properties))
809
+ );
810
+ return crawl;
811
+ }
812
+
813
+
814
+ /**
815
+ * Retrieves the list of IDL attribute names that the CSS property generates
816
+ * per the CSSOM spec, see:
817
+ * https://drafts.csswg.org/cssom/#ref-for-css-property-to-idl-attribute
818
+ *
819
+ * @function
820
+ * @param {String} property CSS property name
821
+ * @return {Array(String)} An array of IDL attribute names, dashed attribute
822
+ * first, then camel-cased attribute if different, then webkit-cased attribute
823
+ * name if needed
824
+ */
825
+ function getGeneratedIDLNamesByCSSProperty(property) {
826
+ // Converts a CSS property to an IDL attribute name per the CSSOM spec:
827
+ // https://drafts.csswg.org/cssom/#css-property-to-idl-attribute
828
+ function cssPropertyToIDLAttribute(property, lowercaseFirst) {
829
+ let output = '';
830
+ let uppercaseNext = false;
831
+ if (lowercaseFirst) {
832
+ property = property.substr(1);
833
+ }
834
+ for (const c of property) {
835
+ if (c === '-') {
836
+ uppercaseNext = true;
837
+ } else if (uppercaseNext) {
838
+ uppercaseNext = false;
839
+ output += c.toUpperCase();
840
+ } else {
841
+ output += c;
842
+ }
843
+ }
844
+ return output;
845
+ }
846
+
847
+ // Start with dashed attribute
848
+ const res = [property];
849
+
850
+ // Add camel-cased attribute if different
851
+ const camelCased = cssPropertyToIDLAttribute(property, false);
852
+ if (camelCased !== property) {
853
+ res.push(camelCased);
854
+ }
855
+
856
+ // Add webkit-cased attribute if needed
857
+ if (property.startsWith('-webkit-')) {
858
+ res.push(cssPropertyToIDLAttribute(property, true));
859
+ }
860
+
861
+ return res;
862
+ };
863
+
864
+
865
+ /**
866
+ * Creates the given folder if it does not exist yet.
867
+ *
868
+ * @function
869
+ * @public
870
+ * @param {String} folder Path to folder to create
871
+ * (from current working directory)
872
+ */
873
+ async function createFolderIfNeeded(folder) {
874
+ try {
875
+ await fs.mkdir(folder);
876
+ }
877
+ catch (err) {
878
+ if (err.code !== 'EEXIST') {
879
+ throw err;
880
+ }
881
+ }
882
+ }
883
+
884
+
885
+ module.exports = {
886
+ fetch,
887
+ requireFromWorkingDirectory,
888
+ expandBrowserModules,
889
+ setupBrowser,
890
+ teardownBrowser,
891
+ processSpecification,
892
+ completeWithAlternativeUrls,
893
+ isLatestLevelThatPasses,
894
+ expandCrawlResult,
895
+ expandSpecResult,
896
+ getGeneratedIDLNamesByCSSProperty,
897
+ createFolderIfNeeded
898
+ };