reffy 3.1.0 → 4.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -201,6 +201,9 @@ export default function (spec, idToHeading = {}) {
201
201
  case "html":
202
202
  preProcessHTML();
203
203
  break;
204
+ case "ecmascript":
205
+ preProcessEcmascript();
206
+ break;
204
207
  case "SVG2":
205
208
  preProcessSVG2();
206
209
  break;
@@ -225,6 +228,339 @@ export default function (spec, idToHeading = {}) {
225
228
  .map(node => definitionMapper(node, idToHeading));
226
229
  }
227
230
 
231
+ function preProcessEcmascript() {
232
+ // Skip elements in sections marked as legacy
233
+ const legacySectionFilter = n => !n.closest("[legacy]");
234
+
235
+ const wrapWithDfn = (el) => {
236
+ // wrap with a dfn
237
+ const dfn = document.createElement("dfn");
238
+ for (let child of [...el.childNodes]) {
239
+ dfn.appendChild(child);
240
+ }
241
+ el.appendChild(dfn);
242
+ // set id
243
+ dfn.setAttribute("id", el.parentNode.getAttribute("id"));
244
+ dfn.dataset.ltNodefault = true;
245
+ return dfn;
246
+ };
247
+
248
+ const cleanMethodName = (name) => {
249
+ return name.replace(/\[/g, '')
250
+ .replace(/\]/g, '') // removing brackets used to mark optional args
251
+ .replace(/ \( */, '(')
252
+ .replace(/ *\)/, ')')
253
+ .replace(/ *,/g, ','); // trimming internal spaces
254
+ };
255
+
256
+ let definitionNames = new Set();
257
+ let idlTypes = {};
258
+
259
+ // We find the list of abstract methods
260
+ // to help with scoping abstract operations
261
+ let abstractMethods = {};
262
+ const abstractMethodCaptions = [...document.querySelectorAll("figcaption")]
263
+ .filter(el => el.textContent.match(/(abstract|additional) method/i) && el.parentNode.querySelector("emu-xref"));
264
+ for (const figcaption of abstractMethodCaptions) {
265
+ const scope = figcaption.querySelector("emu-xref").textContent;
266
+ const table = figcaption.parentNode.querySelector("tbody");
267
+ for (const td of table.querySelectorAll("tr td:first-child")) {
268
+ // We only consider the name of the method, not the potential parameters
269
+ // as they're not necessarily consistently named across
270
+ // the list and the definition
271
+ const methodName = td.textContent.trim().split('(')[0];
272
+ abstractMethods[methodName] = scope;
273
+ }
274
+ }
275
+
276
+ const sectionNumberRegExp = /^([A-Z]\.)?[0-9\.]+ /;
277
+ [...document.querySelectorAll("h1")]
278
+ .filter(legacySectionFilter)
279
+ .forEach(el => {
280
+ let dfnName = el.textContent.replace(sectionNumberRegExp, '').trim() ;// remove section number
281
+ const dfnId = el.parentNode.id;
282
+ if (dfnId.match(/-objects?$/) && dfnName.match(/ Objects?$/)) {
283
+
284
+ // Skip headings that look like object definitions, but aren't
285
+ const notObjectIds = ["sec-global-object", "sec-fundamental-objects", "sec-waiterlist-objects"];
286
+ if (notObjectIds.includes(dfnId)) return;
287
+
288
+ // only keep ids that match a credible pattern for object names
289
+ // i.e. a single word
290
+ // there are exceptions to that simple rule
291
+ // RegExp includes its expansion (regular expansion) in the id
292
+ // WeakRef is translated into weak-ref in the id
293
+ const objectsIdsExceptions = ["sec-regexp-regular-expression-objects", "sec-weak-ref-objects", "sec-aggregate-error-objects", "sec-finalization-registry-objects", "sec-async-function-objects"];
294
+
295
+ if (!dfnId.match(/sec-[a-z]+-objects?/)
296
+ && !objectsIdsExceptions.includes(dfnId)
297
+ ) return;
298
+ const dfn = wrapWithDfn(el);
299
+ // set data-lt
300
+ dfnName = dfnName
301
+ .replace(/^The /, '')
302
+ .replace(/ Objects?$/, '')
303
+ // regexp def includes "(Regular Expression)"
304
+ .replace(/ \([^\)]*\)/, '') ;
305
+ dfn.dataset.lt = dfnName;
306
+
307
+ // FIXME
308
+ // These interfaces are also defined in WebIDL, which in general is
309
+ // the prefered source for these terms
310
+ // Because bikeshed does not yet support spec-specific imports,
311
+ // we hide these terms as not exported
312
+ // cf https://github.com/w3c/reffy/pull/732#issuecomment-925950287
313
+ const exportExceptions = [ "Promise", "DataView", "ArrayBuffer" ];
314
+ if (exportExceptions.includes(dfnName)) {
315
+ dfn.dataset.noexport = "";
316
+ }
317
+
318
+ if (dfnName.match(/^[A-Z]/)) {
319
+ // set dfn-type
320
+ if (dfnName.match(/Error$/)) {
321
+ dfn.dataset.dfnType = "exception";
322
+ } else if (!el.parentNode.querySelector('[id$="constructor"]')) {
323
+ // Objects without constructors match to the namespace type
324
+ dfn.dataset.dfnType = "namespace";
325
+ } else {
326
+ dfn.dataset.dfnType = "interface";
327
+ }
328
+ // We keep track of types associated with a name
329
+ // to associate the same type to the relevant intrinsic object
330
+ // à la %Math%
331
+ idlTypes[dfnName] = dfn.dataset.dfnType;
332
+ }
333
+ definitionNames.add(dfnName);
334
+ } else if (dfnId.match(/-[a-z]+error$/) && !dfnName.match(/\(/)) {
335
+ const dfn = wrapWithDfn(el);
336
+ dfn.dataset.lt = dfnName;
337
+ dfn.dataset.dfnType = "exception";
338
+ definitionNames.add(dfnName);
339
+ idlTypes[dfnName] = dfn.dataset.dfnType;
340
+ } else if (dfnId.match(/[-\.]prototype[-\.]/)) {
341
+ // methods and attributes on objects
342
+
343
+ // Skip headings with a space and no parenthesis
344
+ // (they mention prototype but aren't a prototype property def)
345
+ // with the exception of "set " and "get " headings
346
+ // (which describe setters and getters)
347
+ if (!dfnName.match(/\(/) && (dfnName.match(/ /) && !dfnName.match(/^[gs]et /))) return;
348
+
349
+ // Skip unscoped internal methods à la [[SetPrototypeOf]](V)
350
+ if (dfnName.match(/\[\[/)) return;
351
+
352
+ // Skip symbol-based property definitions;
353
+ // not clear they're useful as externally referenceable names
354
+ if (dfnName.match(/@@/)) return;
355
+
356
+ // Skip .constructor as that cannot be considered as an attribute
357
+ if (dfnName.match(/\.constructor$/)) return;
358
+
359
+ const dfn = wrapWithDfn(el);
360
+ // set definition scope
361
+ dfn.dataset.dfnFor = dfnName.replace(/\.prototype\..*/, '')
362
+ .replace(/^[gs]et /, ''); // remove "get"/"set" markers
363
+
364
+ // Remove parent object prototype (set as scope)
365
+ dfnName = dfnName.replace(/.*\.prototype\./, '');
366
+
367
+ dfn.dataset.lt = dfnName;
368
+ // set dfn-type
369
+ if (dfn.dataset.lt.match(/\(/)) {
370
+ dfnName = cleanMethodName(dfnName);
371
+ dfn.dataset.lt = dfnName;
372
+ dfn.dataset.dfnType = "method";
373
+ } else {
374
+ dfn.dataset.dfnType = "attribute";
375
+ }
376
+ } else if (el.closest("#sec-value-properties-of-the-global-object")) {
377
+ // properties of the global object
378
+ if (el.id !== "#sec-value-properties-of-the-global-object"){
379
+ const dfn = wrapWithDfn(el);
380
+ dfn.dataset.lt = dfnName;
381
+ dfn.dataset.dfnType = "attribute";
382
+ dfn.dataset.dfnFor = "globalThis";
383
+ }
384
+ } else {
385
+ // We handle other headings that look like a method / property
386
+ // on an object instance (rather than its prototype)
387
+ // or an abstract op
388
+
389
+ // if there is already a dfn element, we move on
390
+ if (el.querySelector("dfn")) return;
391
+
392
+ // only dealing with well-known patterns
393
+ if (!dfnName.match(/^[a-z]+\.[a-z]+/i) // à la JSON.parse
394
+ && !dfnName.match(/^([a-z]+)+ *\(/i) // à la ArrayCreate ( or decodeURI (
395
+ ) return;
396
+ // Skip symbol-based property definitions
397
+ if (dfnName.match(/@@/)) return;
398
+
399
+ // Skip .prototype as that cannot be considered
400
+ // as an attribute
401
+ if (dfnName.match(/\.prototype$/)) return;
402
+
403
+ // Skip headings where foo.bar appears as part of a longer phrase
404
+ if (!dfnName.match(/\(/) && dfnName.match(/ /)) return;
405
+
406
+ // redundant definitions of constructors on the global object
407
+ // e.g. "Array ( . . . )"
408
+ if (dfnName.match(/\. \. \./)) return;
409
+
410
+ const dfn = wrapWithDfn(el);
411
+
412
+ if (dfnName.match(/^[a-z]+\.[a-z]+/i)) {
413
+ // set definition scope
414
+ // This assumes that such methods and attributes are only defined
415
+ // one-level deep from the global scope
416
+ dfn.dataset.dfnFor = dfnName.replace(/\..*$/, '');
417
+ dfnName = dfnName.replace(dfn.dataset.dfnFor + ".", '');
418
+ if (dfnName.match(/\(/)) {
419
+ dfnName = cleanMethodName(dfnName);
420
+ dfn.dataset.lt = dfnName;
421
+ dfn.dataset.dfnType = "method";
422
+ } else {
423
+ dfn.dataset.lt = dfnName;
424
+ if (dfnName.match(/^[A-Z]+$/)) {
425
+ dfn.dataset.dfnType = "const";
426
+ } else {
427
+ dfn.dataset.dfnType = "attribute";
428
+ }
429
+ }
430
+ } else if (dfnName.match(/^([A-Z]+[a-z]*)+ *\(/)) { // Abstract ops à la ArrayCreate or global constructor
431
+ dfnName = cleanMethodName(dfnName);
432
+ dfn.dataset.lt = dfnName;
433
+ const opName = dfnName.split('(')[0];
434
+
435
+ // distinguish global constructors from abstract operations
436
+ if (idlTypes[opName]) {
437
+ dfn.dataset.dfnType = "constructor";
438
+ dfn.dataset.dfnFor = opName;
439
+ } else {
440
+ // If the name is listed as an Abstract Method
441
+ // we set the dfn-for accordingly
442
+ if (abstractMethods[opName]) {
443
+ dfn.dataset.dfnFor = abstractMethods[opName];
444
+ }
445
+
446
+ dfn.dataset.dfnType = "abstract-op";
447
+ }
448
+ } else { // methods of the global object
449
+ dfnName = cleanMethodName(dfnName);
450
+ dfn.dataset.lt = dfnName;
451
+ dfn.dataset.dfnType = "method";
452
+ dfn.dataset.dfnFor = "globalThis";
453
+ }
454
+ definitionNames.add(dfnName);
455
+ }
456
+ });
457
+ // Extract abstract operations from <emu-eqn> with aoid attribute
458
+ [...document.querySelectorAll("emu-eqn[aoid]")]
459
+ .filter(legacySectionFilter)
460
+ .forEach(el => {
461
+ // Skip definitions of constant values (e.g. msPerDay)
462
+ if (el.textContent.match(/=/)) return;
463
+ const dfn = wrapWithDfn(el);
464
+ dfn.dataset.lt = el.getAttribute("aoid");
465
+ dfn.dataset.dfnType = "abstract-op";
466
+ dfn.id = el.id;
467
+ });
468
+
469
+ // Extract State Components from tables
470
+ [...document.querySelectorAll("figure > table")]
471
+ .filter(legacySectionFilter)
472
+ .forEach(el => {
473
+ const title = el.parentNode.querySelector("figcaption")?.textContent || "";
474
+ if (!title.match(/state components for/i)) return;
475
+ const scope = title.replace(/^.*state components for/i, '').trim();
476
+ for (const td of el.querySelectorAll("tr td:first-child")) {
477
+ const dfn = wrapWithDfn(td);
478
+ dfn.dataset.dfnFor = scope;
479
+ dfn.id = el.closest("emu-table[id],emu-clause[id]").id;
480
+ }
481
+ });
482
+
483
+ [...document.querySelectorAll("dfn")]
484
+ .filter(legacySectionFilter)
485
+ .forEach(el => {
486
+ // Skip definitions in conformance page and conventions page
487
+ if (el.closest('section[data-reffy-page$="conformance.html"]') ||
488
+ el.closest('section[data-reffy-page$="notational-conventions.html"]')) {
489
+ el.removeAttribute("id");
490
+ return;
491
+ }
492
+
493
+ // rely on the aoid attribute as a hint we're dealing
494
+ // with an abstract-op
495
+ if (el.getAttribute("aoid")) {
496
+ el.dataset.dfnType = "abstract-op";
497
+ }
498
+
499
+ // Mark well-known intrinsic objects as the same type as their visible object (if set), defaulting to "interface"
500
+ if (el.textContent.match(/^%[A-Z].*%$/)) {
501
+ el.dataset.dfnType = idlTypes[el.textContent.replace(/%/g, '')] || "interface";
502
+ definitionNames.add(el.textContent.trim());
503
+ }
504
+
505
+ // %names% in the global object section are operations of the globalThis object
506
+ if (el.closest('[data-reffy-page$="global-object.html"]') && el.textContent.match(/^%[a-z]+%/i)) {
507
+ el.dataset.dfnFor = "globalThis";
508
+ // TODO: this doesn't capture the arguments
509
+ el.dataset.dfnType = "method";
510
+ }
511
+
512
+ // Mark well-known symbols as "const"
513
+ // for lack of a better type, and as the WebIDL spec has been doing
514
+ if (el.textContent.match(/^@@[a-z]*$/i)) {
515
+ el.dataset.dfnType = "const";
516
+ }
517
+ if (el.getAttribute("variants")) {
518
+ el.dataset.lt = (el.dataset.lt ?? el.textContent.trim()) + "|" + el.getAttribute("variants");
519
+ }
520
+
521
+ // Skip definitions that have already been identified
522
+ // with a more specific typing
523
+ if (!el.dataset.dfnType) {
524
+ // we already have a matching typed definition
525
+ if (definitionNames.has(el.textContent.trim())) return;
526
+ }
527
+
528
+ // If the <dfn> has no id, we attach it the one from the closest
529
+ // <emu-clause> with an id
530
+ // Note that this means several definitions can share the same id
531
+ if (!el.getAttribute("id")) {
532
+ if (el.closest("emu-clause[id]")) {
533
+ el.setAttribute("id", el.closest("emu-clause").getAttribute("id"));
534
+ }
535
+ }
536
+
537
+ // Any generic <dfn> not previously filtered out
538
+ // is deemed to be exported, scoped to ECMAScript
539
+ if (!el.dataset.dfnType) {
540
+ if (!el.dataset.dfnFor) {
541
+ el.dataset.dfnFor = "ECMAScript";
542
+ }
543
+ el.dataset.export = "";
544
+ }
545
+ });
546
+ // Another pass of clean up for duplicates
547
+ // This cannot be done in the first pass
548
+ // because %Foo.prototype% does not necessarily get identified before
549
+ // the equivalent " prototype object" dfn
550
+
551
+ [...document.querySelectorAll("dfn[id][data-export]")]
552
+ .filter(legacySectionFilter)
553
+ .forEach(dfn => {
554
+ // we have the syntactic equivalent %x.prototype%
555
+ let m = dfn.textContent.trim().match(/^(.*) prototype( object)?$/);
556
+ if (m && definitionNames.has(`%${m[1].trim()}.prototype%`)) {
557
+ dfn.removeAttribute("id");
558
+ delete dfn.dataset.export;
559
+ return;
560
+ }
561
+ });
562
+ }
563
+
228
564
  function preProcessHTML() {
229
565
  const headingSelector = [
230
566
  'h2[id]:not([data-dfn-type]) dfn',
@@ -235,7 +571,7 @@ function preProcessHTML() {
235
571
  ].join(',');
236
572
 
237
573
  // we copy the id on the dfn when it is set on the surrounding heading
238
- [...document.querySelectorAll(headingSelector)]
574
+ document.querySelectorAll(headingSelector)
239
575
  .forEach(el => {
240
576
  const headingId = el.closest("h2, h3, h4, h5, h6").id;
241
577
  if (!el.id) {
@@ -246,11 +582,12 @@ function preProcessHTML() {
246
582
  // all the definitions in indices.html are non-normative, so we skip them
247
583
  // to avoid having to properly type them
248
584
  // they're not all that interesting
249
- [...document.querySelectorAll('section[data-reffy-page$="indices.html"] dfn[id]')].forEach(el => {
250
- el.dataset.dfnSkip = true;
251
- });
585
+ document.querySelectorAll('section[data-reffy-page$="indices.html"] dfn[id]')
586
+ .forEach(el => {
587
+ el.dataset.dfnSkip = true;
588
+ });
252
589
 
253
- [...document.querySelectorAll("dfn[id]:not([data-dfn-type]):not([data-skip])")]
590
+ document.querySelectorAll("dfn[id]:not([data-dfn-type]):not([data-skip])")
254
591
  .forEach(el => {
255
592
  // Hard coded rules for special ids
256
593
  // dom-style is defined elsewhere
@@ -281,7 +618,7 @@ function preProcessSVG2() {
281
618
  linkHeading.dataset.lt = "link";
282
619
  }
283
620
 
284
- [...document.querySelectorAll(".attrdef dfn[id]:not([data-dfn-type]):not([data-skip])")]
621
+ document.querySelectorAll(".attrdef dfn[id]:not([data-dfn-type]):not([data-skip])")
285
622
  .forEach(el => {
286
623
  el.dataset.dfnType = "element-attr";
287
624
  const attrDesc = document.querySelector('[data-reffy-page$="attindex.html"] th span.attr-name a[href$="#' + el.id + '"]');
@@ -291,7 +628,8 @@ function preProcessSVG2() {
291
628
  console.error("Could not find description for " + el.textContent);
292
629
  }
293
630
  });
294
- [...document.querySelectorAll("dt[id] > .adef, dt[id] > .property")].forEach(el => {
631
+ document.querySelectorAll("dt[id] > .adef, dt[id] > .property")
632
+ .forEach(el => {
295
633
  const dt = el.parentNode;
296
634
  const newDt = document.createElement("dt");
297
635
  const dfn = document.createElement("dfn");
@@ -309,7 +647,7 @@ function preProcessSVG2() {
309
647
  newDt.appendChild(dfn);
310
648
  dt.replaceWith(newDt);
311
649
  });
312
- [...document.querySelectorAll('b[id^="__svg__"]')].forEach(el => {
650
+ document.querySelectorAll('b[id^="__svg__"]').forEach(el => {
313
651
  const [,, containername, membername] = el.id.split('__');
314
652
  if (containername && membername) {
315
653
  let container = idlTree.find(i => i.name === containername);
@@ -326,14 +664,14 @@ function preProcessSVG2() {
326
664
  }
327
665
  }
328
666
  });
329
- [...document.querySelectorAll('h3[id^="Interface"]:not([data-dfn-type])')].forEach(el => {
667
+ document.querySelectorAll('h3[id^="Interface"]:not([data-dfn-type])').forEach(el => {
330
668
  const name = el.id.slice("Interface".length);
331
669
  if (idlTree.find(i => i.name === name && i.type === "interface")) {
332
670
  el.dataset.dfnType = "interface";
333
671
  el.dataset.lt = name;
334
672
  }
335
673
  });
336
- [...document.querySelectorAll('b[id]:not([data-dfn-type])')].forEach(el => {
674
+ document.querySelectorAll('b[id]:not([data-dfn-type])').forEach(el => {
337
675
  const name = el.textContent;
338
676
  const idlItem = idlTree.find(i => i.name === name) ;
339
677
  if (idlItem) {
@@ -1,6 +1,12 @@
1
1
  import createOutline from './create-outline.mjs';
2
2
  import getAbsoluteUrl from './get-absolute-url.mjs';
3
3
 
4
+ // Regular expression to capture the numbering of a heading. The expression
5
+ // extracts numbers such as "1.", "A.", "A.3", "13.3.4.". Note: a top-level
6
+ // number always ends with a ".", but there may be no final "." in sublevels
7
+ // (Bikeshed adds one, ReSpec does not).
8
+ const reNumber = /^([A-Z0-9]\.|[A-Z](\.[0-9]+)+\.?|[0-9]+(\.[0-9]+)+\.?)\s/;
9
+
4
10
  /**
5
11
  * Generate a mapping between elements that have an ID and the closest heading
6
12
  * (that also has an ID) under which these elements appear in the DOM tree.
@@ -21,11 +27,12 @@ import getAbsoluteUrl from './get-absolute-url.mjs';
21
27
  * such a heading.
22
28
  */
23
29
  export default function () {
24
- // Regular expression to capture the numbering of a heading. The expression
25
- // extracts numbers such as "1.", "A.", "A.3", "13.3.4.". Note: a top-level
26
- // number always ends with a ".", but there may be no final "." in sublevels
27
- // (Bikeshed adds one, ReSpec does not).
28
- const reNumber = /^([A-Z0-9]\.|[A-Z](\.[0-9]+)+\.?|[0-9]+(\.[0-9]+)+\.?)\s/;
30
+ // Special-casing ecmascript specs which use special markup for sections
31
+ // <emu-clause>
32
+ if (document.querySelector("emu-clause")) {
33
+ return esMapIdToHeadings();
34
+ }
35
+
29
36
 
30
37
  // Get a flat list of all conceptual sections
31
38
  function flattenSections(outline) {
@@ -86,4 +93,44 @@ export default function () {
86
93
  });
87
94
 
88
95
  return mappingTable;
89
- }
96
+ }
97
+
98
+ function esMapIdToHeadings() {
99
+ // Based on https://tc39.es/ecmarkup/
100
+ // and actual emu-* tags used in the ecmascript spec with ids
101
+ const ignoreTags = ["emu-xref"];
102
+ const sectionTags = ["emu-intro", "emu-clause", "emu-annex"];
103
+
104
+ // Compute once whether we created a single page version out of multiple pages
105
+ const singlePage = !document.querySelector('[data-reffy-page]');
106
+
107
+ let mappingTable = {};
108
+ [...document.querySelectorAll(`[id]:not(${ignoreTags.join(',')}`)]
109
+ .forEach(el => {
110
+ const section = el.closest(`${sectionTags.map(t => `${t}[id]`).join(',')}`);
111
+
112
+ // These are spec UI-related ids, so not a loss
113
+ if (!section) return;
114
+
115
+ const heading = section.querySelector("h1");
116
+ const trimmedText = heading.textContent.trim();
117
+ const nodeid = getAbsoluteUrl(el, { singlePage });
118
+ const href = getAbsoluteUrl(section, { singlePage });
119
+
120
+ const match = trimmedText.match(reNumber);
121
+ const number = match ? match[1] : null;
122
+
123
+ mappingTable[nodeid] = {
124
+ id: section.id,
125
+ href,
126
+ title: trimmedText.replace(reNumber, '').trim().replace(/\s+/g, ' ')
127
+ };
128
+
129
+ if (number) {
130
+ // Store the number without the final "."
131
+ mappingTable[nodeid].number = number.replace(/\.$/, '');
132
+ }
133
+
134
+ });
135
+ return mappingTable;
136
+ }
@@ -28,7 +28,7 @@ const path = require('path');
28
28
  const browserSpecs = require('browser-specs');
29
29
  const requireFromWorkingDirectory = require('../lib/util').requireFromWorkingDirectory;
30
30
  const expandCrawlResult = require('../lib/util').expandCrawlResult;
31
- const crawlList = require('./crawl-specs').crawlList;
31
+ const crawlList = require('../lib/specs-crawler').crawlList;
32
32
  const mergeCrawlResults = require('./merge-crawl-results').mergeCrawlResults;
33
33
  const studyCrawl = require('./study-crawl').studyCrawl;
34
34