reffy 21.0.0 → 21.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,1094 +1,1097 @@
1
- import extractWebIdl from './extract-webidl.mjs';
2
- import informativeSelector from './informative-selector.mjs';
3
- import getAbsoluteUrl from './get-absolute-url.mjs';
4
- import {parse} from "../../node_modules/webidl2/index.js";
5
- /**
6
- * Extract definitions in the spec that follow the "Definitions data model":
7
- * https://tabatkins.github.io/bikeshed/#dfn-contract
8
- *
9
- * Each definition returned by the function will have the following properties:
10
- * - id: The local ID in the DOM. Should be unique within a spec page.
11
- * - href: The absolute URL to the definition.
12
- * - linkingText: List of linking phrases for references.
13
- * - localLinkingText: List of linking phrases for local references only.
14
- * - type: The definition type. One of the values in
15
- * https://tabatkins.github.io/bikeshed/#dfn-types
16
- * - for: The list of namespaces for the definition
17
- * - access: "public" when definition can be referenced by other specifications,
18
- * "private" when it should be viewed as a local definition.
19
- * - informative: true when definition appears in an informative section,
20
- * false if it is normative
21
- * - heading: Heading under which the term is to be found. An object with "id",
22
- * "title", and "number" properties
23
- * - definedIn: An indication of where the definition appears in the spec. Value
24
- * can be one of "dt", "pre", "table", "heading", "note", "example", or
25
- * "prose" (last one indicates that definition appears in the main body of
26
- * the spec)
27
- * - links: A list of interesting links with IDs that complete the definitions,
28
- * notably non-normative descriptions that target web developers.
29
- *
30
- * The extraction ignores definitions with an unknown type. A warning is issued
31
- * to the console when that happens.
32
- *
33
- * The extraction uses the first definition it finds when it bumps into a term
34
- * that is defined more than once (same "linkingText", same "type", same "for").
35
- * A warning is issued to the console when that happens.
36
- *
37
- * @function
38
- * @public
39
- * @return {Array(Object)} An Array of definitions
40
- */
41
-
42
- import cloneAndClean from './clone-and-clean.mjs';
43
-
44
- function normalize(str) {
45
- return str.trim().replace(/\s+/g, ' ');
46
- }
47
-
48
- // Valid types defined in https://tabatkins.github.io/bikeshed/#dfn-types
49
- // (+ "namespace", "event" and "permission" which are not yet in the doc)
50
- function hasValidType(el) {
51
- const validDfnTypes = [
52
- // CSS types
53
- 'property',
54
- 'descriptor',
55
- 'value',
56
- 'type',
57
- 'at-rule',
58
- 'function',
59
- 'selector',
60
-
61
- // Web IDL types
62
- 'namespace',
63
- 'interface',
64
- 'constructor',
65
- 'method',
66
- 'argument',
67
- 'attribute',
68
- 'callback',
69
- 'dictionary',
70
- 'dict-member',
71
- 'enum',
72
- 'enum-value',
73
- 'exception',
74
- 'const',
75
- 'typedef',
76
- 'stringifier',
77
- 'serializer',
78
- 'iterator',
79
- 'maplike',
80
- 'setlike',
81
- 'extended-attribute',
82
- 'event',
83
- 'permission',
84
-
85
- // Element types
86
- 'element',
87
- 'element-state',
88
- 'element-attr',
89
- 'attr-value',
90
-
91
- // CDDL types
92
- 'cddl-module',
93
- 'cddl-type',
94
- 'cddl-parameter',
95
- 'cddl-key',
96
- 'cddl-value',
97
-
98
- // URL scheme
99
- 'scheme',
100
-
101
- // HTTP header
102
- 'http-header',
103
-
104
- // Grammar type
105
- 'grammar',
106
-
107
- // "English" terms
108
- 'abstract-op',
109
- 'dfn'
110
- ];
111
-
112
- const type = el.getAttribute('data-dfn-type') ?? 'dfn';
113
- const isValid = validDfnTypes.includes(type);
114
- if (!isValid) {
115
- console.warn('[reffy]', `"${type}" is an invalid dfn type for "${normalize(el.textContent)}"`);
116
- }
117
- return isValid;
118
- }
119
-
120
- // Return true when exported definition is not already defined in the list,
121
- // Return false and issue a warning when it is already defined.
122
- function isNotAlreadyExported(dfn, idx, list) {
123
- const first = list.find(d => d === dfn ||
124
- (d.access === 'public' && dfn.access === 'public' &&
125
- d.type === dfn.type &&
126
- d.linkingText.length === dfn.linkingText.length &&
127
- d.linkingText.every(lt => dfn.linkingText.find(t => t == lt)) &&
128
- d.for.length === dfn.for.length &&
129
- d.for.every(lt => dfn.for.find(t => t === lt))));
130
- if (first !== dfn) {
131
- console.warn('[reffy]', `Duplicate dfn found for "${dfn.linkingText[0]}", type="${dfn.type}", for="${dfn.for[0]}", dupl=${dfn.href}, first=${first.href}`);
132
- }
133
- return first === dfn;
134
- }
135
-
136
- // Extract the element's inner HTML content, removing any complex structure,
137
- // so that the result can be injected elsewhere without creating problems.
138
- function getHtmlProseDefinition(proseEl) {
139
- // Strip element of all annotations
140
- proseEl = cloneAndClean(proseEl);
141
-
142
- // Keep simple grouping content and text-level semantics elements
143
- const keepSelector = [
144
- 'blockquote', 'dd', 'div', 'dl', 'dt', 'figcaption', 'figure', 'hr', 'li',
145
- 'ol', 'p', 'pre', 'ul',
146
- 'a', 'abbr', 'b', 'bdi', 'bdo', 'br', 'cite', 'code', 'data', 'dfn', 'em',
147
- 'i', 'kbd', 'mark', 'q', 'rp', 'rt', 'ruby', 's', 'samp', 'small', 'span',
148
- 'strong', 'sub', 'sup', 'time', 'u', 'var', 'wbr'
149
- ].join(',');
150
- let el;
151
- while (el = proseEl.querySelector(`:not(${keepSelector})`)) {
152
- // The content is more complex than anticipated. It may be worth checking
153
- // the definition to assess whether the extraction logic needs to become
154
- // smarter. For lack of a better reporting mechanism for now, let's record
155
- // a warning.
156
- console.warn('[reffy]', `Unexpected element "${el.nodeName}" found in textual definition of "${proseEl.getAttribute('data-defines')}"`);
157
- el.remove();
158
- }
159
-
160
- // Drop all attributes except "href", "dir", "lang" and "title"
161
- // For "href", let's make sure that we have an absolute URL
162
- [...proseEl.querySelectorAll('*')].forEach(el => {
163
- el.getAttributeNames().forEach(attr => {
164
- if (attr === 'href') {
165
- const page = el.closest('[data-reffy-page]')?.getAttribute('data-reffy-page');
166
- const url = new URL(el.getAttribute('href'), page ?? window.location.href);
167
- el.setAttribute('href', url.toString());
168
- }
169
- else if (!['dir', 'lang', 'title'].includes(attr)) {
170
- el.removeAttribute(attr);
171
- }
172
- });
173
- });
174
-
175
- return proseEl.innerHTML.trim();
176
- }
177
-
178
- function definitionMapper(el, idToHeading, usesDfnDataModel) {
179
- let definedIn = 'prose';
180
- const enclosingEl = el.closest('dt,pre,table,h1,h2,h3,h4,h5,h6,.note,.example') || el;
181
- switch (enclosingEl.nodeName) {
182
- case 'DT':
183
- case 'PRE':
184
- case 'TABLE':
185
- definedIn = enclosingEl.nodeName.toLowerCase();
186
- break;
187
- case 'H1':
188
- case 'H2':
189
- case 'H3':
190
- case 'H4':
191
- case 'H5':
192
- case 'H6':
193
- definedIn = 'heading';
194
- break;
195
- default:
196
- if (enclosingEl.classList.contains('note')) {
197
- definedIn = 'note';
198
- }
199
- else if (enclosingEl.classList.contains('example')) {
200
- definedIn = 'example';
201
- }
202
- break;
203
- }
204
-
205
- // Linking text is given by the data-lt attribute if present, or it is the
206
- // textual content... but we'll skip section numbers that might have been
207
- // captured when definition is defined in a heading, as in:
208
- // https://www.w3.org/TR/ethical-web-principles/#oneweb
209
- let linkingText = [];
210
- if (el.hasAttribute('data-lt')) {
211
- linkingText = el.getAttribute('data-lt').split('|').map(normalize);
212
- }
213
- else if (el.querySelector('.secno')) {
214
- const copy = el.cloneNode(true);
215
- const secno = copy.querySelector('.secno');
216
- secno.remove();
217
- linkingText = [normalize(copy.textContent)];
218
- }
219
- else {
220
- linkingText = [normalize(el.textContent)];
221
- }
222
-
223
- // Compute the absolute URL with fragment
224
- // (Note the crawler merges pages of a multi-page spec in the first page
225
- // to ease parsing logic, and we want to get back to the URL of the page)
226
- const page = el.closest('[data-reffy-page]')?.getAttribute('data-reffy-page');
227
- const url = new URL(page ?? window.location.href);
228
- url.hash = '#' + encodeURIComponent(el.getAttribute('id'));
229
- const href = url.toString();
230
-
231
- const dfn = {
232
- // ID is the id attribute
233
- // (ID may not be unique in a multi-page spec)
234
- id: el.getAttribute('id'),
235
-
236
- // Absolute URL with fragment
237
- href,
238
-
239
- // Linking text
240
- linkingText,
241
-
242
- // Additional linking text can be defined for local references
243
- localLinkingText: el.getAttribute('data-local-lt') ?
244
- el.getAttribute('data-local-lt').split('|').map(normalize) :
245
- [],
246
-
247
- // Link type must be specified, or it is "dfn"
248
- type: el.getAttribute('data-dfn-type') || 'dfn',
249
-
250
- // Definition may be namespaced to other constructs. Note the list is not
251
- // purely comma-separated due to function parameters. For instance,
252
- // attribute value may be "method(foo,bar), method()"
253
- for: el.getAttribute('data-dfn-for') ?
254
- el.getAttribute('data-dfn-for').split(/,(?![^\(]*\))/).map(normalize) :
255
- [],
256
-
257
- // Definition is public if explicitly marked as exportable or if export has
258
- // not been explicitly disallowed and its type is not "dfn" or a CDDL type,
259
- // or if the spec is an old spec that does not use the "data-dfn-type"
260
- // convention.
261
- access: (!usesDfnDataModel ||
262
- el.hasAttribute('data-export') ||
263
- (!el.hasAttribute('data-noexport') &&
264
- el.hasAttribute('data-dfn-type') &&
265
- el.getAttribute('data-dfn-type') !== 'dfn' &&
266
- !el.getAttribute('data-dfn-type').startsWith('cddl-'))) ?
267
- 'public' : 'private',
268
-
269
- // Whether the term is defined in a normative/informative section
270
- informative: !!el.closest(informativeSelector),
271
-
272
- // Heading under which the term is to be found,
273
- // Defaults to the page or document URL and the spec's title
274
- heading: idToHeading[href] ?? {
275
- href: (new URL(page ?? window.location.href)).toString(),
276
- title: document.title
277
- },
278
-
279
- // Enclosing element under which the definition appears. Value can be one of
280
- // "dt", "pre", "table", "heading", "note", "example", or "prose" (last one
281
- // indicates that definition appears in the main body of the specification)
282
- definedIn,
283
-
284
- // Important links that complement the definition
285
- // (typically: anchors in "for web developers" sections)
286
- links: []
287
- };
288
-
289
- // Extract a prose definition in HTML for the term, if available
290
- const proseEl = document.querySelector(`[data-defines="#${dfn.id}"]`);
291
- if (proseEl) {
292
- const htmlProse = getHtmlProseDefinition(proseEl);
293
- if (htmlProse) {
294
- dfn.htmlProse = htmlProse;
295
- }
296
- }
297
-
298
- return dfn;
299
- }
300
-
301
- export default function (spec, idToHeading = {}) {
302
- const definitionsSelector = [
303
- // re data-lt, see https://github.com/w3c/reffy/issues/336#issuecomment-650339747
304
- // As for `<dfn>` we'll consider that headings without a `data-dfn-type`
305
- // have an implicit `"data-dfn-type"="dfn"` attribute, provided they also
306
- // have some other definition related attribute (because we only want to
307
- // extract headings that want to be seen as definitions)
308
- 'dfn[id]:not([data-lt=""])',
309
- ':is(h2,h3,h4,h5,h6)[id]:is([data-dfn-type],[data-dfn-for],[data-export],[data-noexport],[data-lt]):not([data-lt=""])'
310
- ].join(',');
311
-
312
- const shortname = (typeof spec === 'string') ? spec : spec.shortname;
313
- switch (shortname) {
314
- case "CSS2":
315
- preProcessCSS2();
316
- break;
317
- case "html":
318
- preProcessHTML();
319
- break;
320
- case "ecmascript":
321
- preProcessEcmascript();
322
- break;
323
- case "SVG2":
324
- preProcessSVG2();
325
- break;
326
- case "rfc8610":
327
- // RFC8610 defines CDDL
328
- preProcessRFC8610();
329
- break;
330
- case "webgl1":
331
- preProcessWebGL1();
332
- break;
333
- }
334
-
335
- const dfnEls = [...document.querySelectorAll(definitionsSelector)];
336
- const usesDfnDataModel = dfnEls.some(dfn =>
337
- dfn.hasAttribute('data-dfn-type') ||
338
- dfn.hasAttribute('data-dfn-for') ||
339
- dfn.hasAttribute('data-export') ||
340
- dfn.hasAttribute('data-noexport'));
341
-
342
- const definitions = dfnEls
343
- .map(node => {
344
- // 2021-06-21: Temporary preprocessing of invalid "idl" dfn type (used for
345
- // internal slots) while fix for https://github.com/w3c/respec/issues/3644
346
- // propagates to all EDs and /TR specs. To be dropped once crawls no
347
- // longer produce warnings.
348
- if (node.getAttribute('data-dfn-type') === 'idl') {
349
- const linkingText = node.hasAttribute('data-lt') ?
350
- node.getAttribute('data-lt').split('|').map(normalize) :
351
- [normalize(node.textContent)];
352
- node.setAttribute('data-dfn-type', linkingText[0].endsWith(')') ? 'method' : 'attribute');
353
- console.warn('[reffy]', `Fixed invalid "idl" dfn type "${normalize(node.textContent)}"`);
354
- }
355
- return node;
356
- })
357
- .filter(hasValidType)
358
- // Exclude IDL terms defined in a block that is flagged as to be excluded
359
- // or inside a <del>
360
- .filter(node => !node.closest('.exclude,del'))
361
- // When the whole term links to an external spec, the definition is an
362
- // imported definition. Such definitions are not "real" definitions, let's
363
- // skip them.
364
- // One hardcoded exception-to-the-rule, see:
365
- // https://github.com/w3c/webref/issues/882
366
- // (pending a proper dfns curation process, see:
367
- // https://github.com/w3c/webref/issues/789)
368
- .filter(node => {
369
- const link =
370
- node.querySelector('a[href^="http"]') ??
371
- node.closest('a[href^="http"]');
372
- return !link ||
373
- (node.textContent.trim() !== link.textContent.trim()) ||
374
- (link.href === 'https://www.w3.org/TR/CSS2/syndata.html#vendor-keywords');
375
- })
376
- .map(node => definitionMapper(node, idToHeading, usesDfnDataModel))
377
- .filter(isNotAlreadyExported);
378
-
379
- // Some specs have informative "For web developers" sections targeted at
380
- // presenting concepts to web developers. These sections contain anchors
381
- // that are useful for documentation purpose. The anchors themselves are
382
- // references to terms defined elsewhere in the spec. We will capture them in
383
- // a `links` property attached to the underlying definition.
384
- // Note: Ideally, `.domintro` would be added to the informative selector list
385
- // but some specs use `.domintro` for lists that define IDL terms. We'll get
386
- // rid of them by skipping lists that have `dfn`.
387
- const devSelector = '.domintro dt:not(dt:has(dfn)) a[id]';
388
- for (const node of [...document.querySelectorAll(devSelector)]) {
389
- const dfnHref = getAbsoluteUrl(node, { attribute: 'href' });
390
- const dfn = definitions.find(d => d.href === dfnHref);
391
- if (dfn) {
392
- const href = getAbsoluteUrl(node);
393
- const page = node.closest('[data-reffy-page]')?.getAttribute('data-reffy-page');
394
- dfn.links.push({
395
- type: 'dev',
396
- id: node.getAttribute('id'),
397
- name: normalize(node.closest('dt').textContent),
398
- href,
399
- heading: idToHeading[href] ?? {
400
- href: (new URL(page ?? window.location.href)).toString(),
401
- title: document.title
402
- }
403
- });
404
- }
405
- else {
406
- // When an interface inherits from another, the reference may target
407
- // a base dfn in another spec. For example:
408
- // https://encoding.spec.whatwg.org/#ref-for-dom-generictransformstream-readable
409
- // ... targets the Streams spec. There aren't many occurrences of this
410
- // pattern and the occurrences do not look super interesting to link to
411
- // from a documentation perspective. Let's skip them.
412
- console.warn('[reffy]', `Dev dfn ${node.textContent} (${node.id}) targets unknown/external dfn at ${node.href}`);
413
- }
414
- }
415
-
416
- return definitions;
417
- }
418
-
419
- function preProcessEcmascript() {
420
- // Skip elements in sections marked as legacy
421
- const legacySectionFilter = n => !n.closest("[legacy]");
422
-
423
- const wrapWithDfn = (el) => {
424
- // wrap with a dfn
425
- const dfn = document.createElement("dfn");
426
- for (let child of [...el.childNodes]) {
427
- dfn.appendChild(child);
428
- }
429
- el.appendChild(dfn);
430
- // set id
431
- dfn.setAttribute("id", el.parentNode.getAttribute("id"));
432
- if (el.parentNode.hasAttribute("aoid")) {
433
- dfn.setAttribute("aoid", el.parentNode.getAttribute("aoid"));
434
- }
435
- return dfn;
436
- };
437
-
438
- const cleanMethodName = (name) => {
439
- return name.replace(/\[/g, '')
440
- .replace(/\]/g, '') // removing brackets used to mark optional args
441
- .replace(/ \( */, '(')
442
- .replace(/ *\)/, ')')
443
- .replace(/ *,/g, ','); // trimming internal spaces
444
- };
445
-
446
- let definitionNames = new Set();
447
- let idlTypes = {};
448
-
449
- // We find the list of abstract methods
450
- // to help with scoping abstract operations
451
- let abstractMethods = {};
452
- const abstractMethodCaptions = [...document.querySelectorAll("figcaption")]
453
- .filter(el => el.textContent.match(/(abstract|additional) method/i) && el.parentNode.querySelector("emu-xref"));
454
- for (const figcaption of abstractMethodCaptions) {
455
- let scope = figcaption.querySelector("emu-xref").textContent;
456
- if (scope.endsWith('Environment Records')) {
457
- // Environment records come with an abstract class, and subclasses:
458
- // https://tc39.es/ecma262/multipage/executable-code-and-execution-contexts.html#sec-environment-records
459
- // Methods are defined for each class. We pretend that the scope is the
460
- // abstract class for now. Exact scope will be determined by looking at
461
- // the title of the section under which the method is found.
462
- scope = 'Environment Records';
463
- }
464
- const table = figcaption.parentNode.querySelector("tbody");
465
- for (const td of table.querySelectorAll("tr td:first-child")) {
466
- // We only consider the name of the method, not the potential parameters
467
- // as they're not necessarily consistently named across
468
- // the list and the definition
469
- const methodName = td.textContent.split('(')[0].trim();
470
- abstractMethods[methodName] = scope;
471
- }
472
- }
473
-
474
- // Regular expression used to drop section numbers from section titles
475
- const sectionNumberRegExp = /^([A-Z]\.)?[0-9\.]+ /;
476
-
477
- // Regular expression that matches scoped methods à la "JSON.parse"
478
- const scopedNameRegExp = /^[a-z0-9]+\.[a-z0-9]+/i;
479
-
480
- // Regular expression that matches general unscoped method names à la
481
- // "ArrayCreate (", "ToInt32 (" or "decodeURI (". The expression also matches
482
- // constructors.
483
- const methodNameRegExp = /^([a-z0-9]+)+ *\(/i;
484
-
485
- // More specific regular expression that matches abstract operations methods
486
- // à la "ToInt32 (". Does not match "decodeURI (" for instance as it does not
487
- // start with an upper case character.
488
- const abstractOpRegExp = /^[A-Z][a-zA-Z0-9]+ *\(/;
489
-
490
- [...document.querySelectorAll("h1")]
491
- .filter(legacySectionFilter)
492
- .forEach(el => {
493
- let dfnName = el.textContent.replace(sectionNumberRegExp, '').trim() ;// remove section number
494
- const dfnId = el.parentNode.id;
495
- if (dfnId.match(/-objects?$/) && dfnName.match(/ Objects?$/)) {
496
-
497
- // Skip headings that look like object definitions, but aren't
498
- const notObjectIds = ["sec-global-object", "sec-fundamental-objects", "sec-waiterlist-objects"];
499
- if (notObjectIds.includes(dfnId)) return;
500
-
501
- // only keep ids that match a credible pattern for object names
502
- // i.e. a single word
503
- // there are exceptions to that simple rule
504
- // RegExp includes its expansion (regular expansion) in the id
505
- // WeakRef is translated into weak-ref in the id
506
- const objectsIdsExceptions = ["sec-regexp-regular-expression-objects", "sec-weak-ref-objects", "sec-aggregate-error-objects", "sec-finalization-registry-objects", "sec-async-function-objects"];
507
-
508
- if (!dfnId.match(/sec-[a-z]+-objects?/)
509
- && !objectsIdsExceptions.includes(dfnId)
510
- ) return;
511
- const dfn = wrapWithDfn(el);
512
- // set data-lt
513
- dfnName = dfnName
514
- .replace(/^The /, '')
515
- .replace(/ Objects?$/, '')
516
- // regexp def includes "(Regular Expression)"
517
- .replace(/ \([^\)]*\)/, '') ;
518
- dfn.dataset.lt = dfnName;
519
-
520
- // FIXME
521
- // These interfaces are also defined in WebIDL, which in general is
522
- // the prefered source for these terms
523
- // Because bikeshed does not yet support spec-specific imports,
524
- // we hide these terms as not exported
525
- // cf https://github.com/w3c/reffy/pull/732#issuecomment-925950287
526
- const exportExceptions = [ "Promise", "DataView", "ArrayBuffer" ];
527
- if (exportExceptions.includes(dfnName)) {
528
- dfn.dataset.noexport = "";
529
- }
530
-
531
- if (dfnName.match(/^[A-Z]/)) {
532
- // set dfn-type
533
- if (dfnName.match(/Error$/)) {
534
- dfn.dataset.dfnType = "exception";
535
- } else if (!el.parentNode.querySelector('[id$="constructor"]')) {
536
- // Objects without constructors match to the namespace type
537
- dfn.dataset.dfnType = "namespace";
538
- } else {
539
- dfn.dataset.dfnType = "interface";
540
- }
541
- // We keep track of types associated with a name
542
- // to associate the same type to the relevant intrinsic object
543
- // à la %Math%
544
- idlTypes[dfnName] = dfn.dataset.dfnType;
545
- }
546
- definitionNames.add(dfnName);
547
- } else if (dfnId.match(/-[a-z]+error$/) && !dfnName.match(/\(/)) {
548
- const dfn = wrapWithDfn(el);
549
- dfn.dataset.lt = dfnName;
550
- dfn.dataset.dfnType = "exception";
551
- definitionNames.add(dfnName);
552
- idlTypes[dfnName] = dfn.dataset.dfnType;
553
- } else if (dfnId.match(/[-\.]prototype[-\.]/)) {
554
- // methods and attributes on objects
555
-
556
- // Skip headings with a space and no parenthesis
557
- // (they mention prototype but aren't a prototype property def)
558
- // with the exception of "set " and "get " headings
559
- // (which describe setters and getters)
560
- if (!dfnName.match(/\(/) && (dfnName.match(/ /) && !dfnName.match(/^[gs]et /))) return;
561
-
562
- // Skip unscoped internal methods à la [[SetPrototypeOf]](V)
563
- if (dfnName.match(/\[\[/)) return;
564
-
565
- // Skip symbol-based property definitions;
566
- // not clear they're useful as externally referenceable names
567
- if (dfnName.match(/@@/)) return;
568
-
569
- // Skip .constructor as that cannot be considered as an attribute
570
- if (dfnName.match(/\.constructor$/)) return;
571
-
572
- const dfn = wrapWithDfn(el);
573
- // set definition scope
574
- dfn.dataset.dfnFor = dfnName.replace(/\.prototype\..*/, '')
575
- .replace(/^[gs]et /, ''); // remove "get"/"set" markers
576
-
577
- // Remove parent object prototype (set as scope)
578
- dfnName = dfnName.replace(/.*\.prototype\./, '');
579
-
580
- dfn.dataset.lt = dfnName;
581
- // set dfn-type
582
- if (dfn.dataset.lt.match(/\(/)) {
583
- dfnName = cleanMethodName(dfnName);
584
- dfn.dataset.lt = dfnName;
585
- dfn.dataset.dfnType = "method";
586
- } else {
587
- dfn.dataset.dfnType = "attribute";
588
- }
589
- } else if (el.closest("#sec-value-properties-of-the-global-object")) {
590
- // properties of the global object
591
- if (el.id !== "#sec-value-properties-of-the-global-object"){
592
- const dfn = wrapWithDfn(el);
593
- dfn.dataset.lt = dfnName;
594
- dfn.dataset.dfnType = "attribute";
595
- dfn.dataset.dfnFor = "globalThis";
596
- }
597
- } else {
598
- // We handle other headings that look like a method / property
599
- // on an object instance (rather than its prototype)
600
- // or an abstract op
601
-
602
- // if there is already a dfn element, we move on
603
- if (el.querySelector("dfn")) return;
604
-
605
- // only dealing with well-known patterns
606
- if (!dfnName.match(scopedNameRegExp)
607
- && !dfnName.match(methodNameRegExp)
608
- ) return;
609
- // Skip symbol-based property definitions
610
- if (dfnName.match(/@@/)) return;
611
-
612
- // Skip .prototype as that cannot be considered
613
- // as an attribute
614
- if (dfnName.match(/\.prototype$/)) return;
615
-
616
- // Skip headings where foo.bar appears as part of a longer phrase
617
- if (!dfnName.match(/\(/) && dfnName.match(/ /)) return;
618
-
619
- // redundant definitions of constructors on the global object
620
- // e.g. "Array ( . . . )"
621
- if (dfnName.match(/\. \. \./)) return;
622
-
623
- const dfn = wrapWithDfn(el);
624
-
625
- if (dfnName.match(scopedNameRegExp)) {
626
- // set definition scope
627
- // This assumes that such methods and attributes are only defined
628
- // one-level deep from the global scope
629
- dfn.dataset.dfnFor = dfnName.replace(/\..*$/, '');
630
- dfnName = dfnName.replace(dfn.dataset.dfnFor + ".", '');
631
- if (dfnName.match(/\(/)) {
632
- dfnName = cleanMethodName(dfnName);
633
- dfn.dataset.lt = dfnName;
634
- dfn.dataset.dfnType = "method";
635
- } else {
636
- dfn.dataset.lt = dfnName;
637
- if (dfnName.match(/^[A-Z]+$/)) {
638
- dfn.dataset.dfnType = "const";
639
- } else {
640
- dfn.dataset.dfnType = "attribute";
641
- }
642
- }
643
- } else if (dfnName.match(abstractOpRegExp)) {
644
- dfnName = cleanMethodName(dfnName);
645
- dfn.dataset.lt = dfnName;
646
- const opName = dfnName.split('(')[0];
647
-
648
- // distinguish global constructors from abstract operations
649
- if (idlTypes[opName]) {
650
- dfn.dataset.dfnType = "constructor";
651
- dfn.dataset.dfnFor = opName;
652
- } else {
653
- // If the name is listed as an Abstract Method
654
- // we set the dfn-for accordingly
655
- // Note we look for a possibly more specific scope by looking at the
656
- // title of the containing section. This is useful for
657
- // "Environment Records" methods.
658
- if (abstractMethods[opName]) {
659
- const baseClass = abstractMethods[opName];
660
- let parent = dfn.parentNode.closest('emu-clause');
661
- while (parent) {
662
- const title = parent.querySelector('h1')?.textContent.replace(sectionNumberRegExp, '').trim();
663
- if (title?.toLowerCase().endsWith(baseClass.toLowerCase())) {
664
- dfn.dataset.dfnFor = title;
665
- break;
666
- }
667
- parent = parent.parentNode.closest('emu-clause');
668
- }
669
- if (!dfn.dataset.dfnFor) {
670
- dfn.dataset.dfnFor = baseClass;
671
- }
672
- }
673
- if (dfn.getAttribute("aoid")) {
674
- dfn.dataset.lt = dfn.getAttribute("aoid") + '|' + dfn.dataset.lt;
675
- }
676
- dfn.dataset.dfnType = "abstract-op";
677
- }
678
- } else { // methods of the global object
679
- dfnName = cleanMethodName(dfnName);
680
- dfn.dataset.lt = dfnName;
681
- dfn.dataset.dfnType = "method";
682
- dfn.dataset.dfnFor = "globalThis";
683
- }
684
- definitionNames.add(dfnName);
685
- }
686
- });
687
- // Extract abstract operations from <emu-eqn> with aoid attribute
688
- [...document.querySelectorAll("emu-eqn[aoid]")]
689
- .filter(legacySectionFilter)
690
- .forEach(el => {
691
- // Skip definitions of constant values (e.g. msPerDay)
692
- if (el.textContent.match(/=/)) return;
693
- const dfn = wrapWithDfn(el);
694
- dfn.dataset.lt = el.getAttribute("aoid");
695
- dfn.dataset.dfnType = "abstract-op";
696
- dfn.id = el.id;
697
- });
698
-
699
- // Extract State Components from tables
700
- [...document.querySelectorAll("figure > table")]
701
- .filter(legacySectionFilter)
702
- .forEach(el => {
703
- const title = el.parentNode.querySelector("figcaption")?.textContent || "";
704
- if (!title.match(/state components for/i)) return;
705
- const scope = title.replace(/^.*state components for/i, '').trim();
706
- for (const td of el.querySelectorAll("tr td:first-child")) {
707
- const dfn = wrapWithDfn(td);
708
- dfn.dataset.dfnFor = scope;
709
- dfn.id = el.closest("emu-table[id],emu-clause[id]").id;
710
- }
711
- });
712
-
713
- // Extract production rules
714
- [...document.querySelectorAll("emu-grammar[type=definition] emu-production")]
715
- .forEach(el => {
716
- const dfn = wrapWithDfn(el);
717
- dfn.id = el.id;
718
- dfn.dataset.lt = el.getAttribute("name");
719
- dfn.dataset.dfnType = "grammar";
720
- dfn.dataset.noexport = "";
721
- if (el.closest('[data-reffy-page$="additional-ecmascript-features-for-web-browsers.html"]')) {
722
- // Production rules in Annex B replace some of the production rules
723
- // defined in other sections for web browser hosts.
724
- dfn.dataset.dfnFor = "Web browsers";
725
- }
726
- });
727
-
728
- [...document.querySelectorAll("dfn")]
729
- .filter(legacySectionFilter)
730
- .forEach(el => {
731
- // Skip definitions in conformance page and conventions page
732
- if (el.closest('section[data-reffy-page$="conformance.html"]') ||
733
- el.closest('section[data-reffy-page$="notational-conventions.html"]')) {
734
- el.removeAttribute("id");
735
- return;
736
- }
737
-
738
- // rely on the aoid attribute as a hint we're dealing
739
- // with an abstract-op
740
- if (el.getAttribute("aoid")) {
741
- el.dataset.dfnType = "abstract-op";
742
- }
743
-
744
- // Mark well-known intrinsic objects as the same type as their visible object (if set), defaulting to "interface"
745
- if (el.textContent.match(/^%[A-Z].*%$/)) {
746
- el.dataset.dfnType = idlTypes[el.textContent.replace(/%/g, '')] || "interface";
747
- definitionNames.add(el.textContent.trim());
748
- }
749
-
750
- // %names% in the global object section are operations of the globalThis object
751
- if (el.closest('[data-reffy-page$="global-object.html"]') && el.textContent.match(/^%[a-z]+%/i)) {
752
- el.dataset.dfnFor = "globalThis";
753
- // TODO: this doesn't capture the arguments
754
- el.dataset.dfnType = "method";
755
- }
756
-
757
- // Mark well-known symbols as "const"
758
- // for lack of a better type, and as the WebIDL spec has been doing
759
- if (el.textContent.match(/^@@[a-z]*$/i)) {
760
- el.dataset.dfnType = "const";
761
- }
762
- if (el.getAttribute("variants")) {
763
- el.dataset.lt = (el.dataset.lt ?? el.textContent.trim()) + "|" + el.getAttribute("variants");
764
- }
765
-
766
- // Skip definitions that have already been identified
767
- // with a more specific typing
768
- if (!el.dataset.dfnType) {
769
- // we already have a matching typed definition
770
- if (definitionNames.has(el.textContent.trim())) return;
771
- }
772
-
773
- // If the <dfn> has no id, we attach it the one from the closest
774
- // <emu-clause> with an id
775
- // Note that this means several definitions can share the same id
776
- if (!el.getAttribute("id")) {
777
- if (el.closest("emu-clause[id]")) {
778
- el.setAttribute("id", el.closest("emu-clause").getAttribute("id"));
779
- }
780
- }
781
-
782
- // Any generic <dfn> not previously filtered out
783
- // is deemed to be exported, scoped to ECMAScript
784
- if (!el.dataset.dfnType) {
785
- if (!el.dataset.dfnFor) {
786
- el.dataset.dfnFor = "ECMAScript";
787
- }
788
- el.dataset.export = "";
789
- }
790
- });
791
- // Another pass of clean up for duplicates
792
- // This cannot be done in the first pass
793
- // because %Foo.prototype% does not necessarily get identified before
794
- // the equivalent " prototype object" dfn
795
-
796
- [...document.querySelectorAll("dfn[id][data-export]")]
797
- .filter(legacySectionFilter)
798
- .forEach(dfn => {
799
- // we have the syntactic equivalent %x.prototype%
800
- let m = dfn.textContent.trim().match(/^(.*) prototype( object)?$/);
801
- if (m && definitionNames.has(`%${m[1].trim()}.prototype%`)) {
802
- dfn.removeAttribute("id");
803
- delete dfn.dataset.export;
804
- return;
805
- }
806
- });
807
- }
808
-
809
- function preProcessHTML() {
810
- const headingSelector = ':is(h2,h3,h4,h5,h6)[id]:not(:is([data-dfn-type],[data-dfn-for],[data-export],[data-noexport],[data-lt])) dfn';
811
-
812
- // we copy the id on the dfn when it is set on the surrounding heading
813
- document.querySelectorAll(headingSelector)
814
- .forEach(el => {
815
- const headingId = el.closest("h2, h3, h4, h5, h6").id;
816
- if (!el.id) {
817
- el.id = headingId;
818
- }
819
- });
820
- }
821
-
822
- /**
823
- * CSS 2.1 does not use the definitions data model and needs to be processed
824
- * to create the right definitions.
825
- *
826
- * Note: CSS 2.2 does follow the definitions data model, but does not contain
827
- * any element that matches the `span.index-def` selector, so the function is
828
- * a no-op for CSS 2.2 and that's a good thing.
829
- */
830
- function preProcessCSS2() {
831
- document.querySelectorAll('span.index-def')
832
- .forEach(span => {
833
- // Definition ID is to be found in a nearby anchor
834
- const anchor = span.querySelector('a[name]') ?? span.closest('a[name]');
835
- if (!anchor) {
836
- return;
837
- }
838
-
839
- // Once in a while, definition has a "<dfn>", and once in a while, that
840
- // "<dfn>" already follows the dfn data model.
841
- let dfn = span.querySelector('dfn') ?? span.closest('dfn');
842
- if (dfn?.id) {
843
- return;
844
- }
845
-
846
- // No "<dfn>"? Let's create it
847
- if (!dfn) {
848
- dfn = document.createElement('dfn');
849
- for (let child of [...span.childNodes]) {
850
- dfn.appendChild(child);
851
- }
852
- span.appendChild(dfn);
853
- }
854
-
855
- // Complete the "<dfn>" with expected attributes
856
- dfn.id = anchor.getAttribute('name');
857
- dfn.dataset.export = '';
858
- // Drop suffixes such "::definition of" and wrapping quotes,
859
- // and drop possible duplicates
860
- dfn.dataset.lt = (span.getAttribute('title') ?? dfn.textContent).split('|')
861
- .map(normalize)
862
- .map(text => text.replace(/::definition of$/, '')
863
- .replace(/, definition of$/, '')
864
- .replace(/^'(.*)'$/, '$1'))
865
- .filter((text, idx, array) => array.indexOf(text) === idx)
866
- .join('|');
867
- let dfnType = null;
868
- switch (anchor.getAttribute('class') ?? '') {
869
- case 'propdef-title':
870
- dfnType = 'property';
871
- break;
872
- case 'value-def':
873
- if (dfn.dataset.lt.match(/^<.*>$/)) {
874
- dfnType = 'type';
875
- }
876
- else {
877
- dfnType = 'value';
878
- }
879
- break;
880
- }
881
- if (dfnType) {
882
- dfn.dataset.dfnType = dfnType;
883
- }
884
- });
885
- }
886
-
887
- function preProcessSVG2() {
888
- const idl = extractWebIdl();
889
- const idlTree = parse(idl);
890
- const idlInterfaces = idlTree.filter(item => item.type === "interface" || item.type === "interface mixin");
891
-
892
- // the only element definition not properly marked up in the SVG spec
893
- const linkHeading = document.getElementById("LinkElement");
894
- if (linkHeading && !linkHeading.dataset.dfnType) {
895
- linkHeading.dataset.dfnType = "element";
896
- linkHeading.dataset.lt = "link";
897
- }
898
-
899
- document.querySelectorAll(".attrdef dfn[id]:not([data-dfn-type]):not([data-skip])")
900
- .forEach(el => {
901
- el.dataset.dfnType = "element-attr";
902
- const attrDesc = document.querySelector('[data-reffy-page$="attindex.html"] th span.attr-name a[href$="#' + el.id + '"]');
903
- if (attrDesc) {
904
- el.dataset.dfnFor = attrDesc.closest('tr').querySelector('td').textContent;
905
- } else {
906
- console.error("Could not find description for " + el.textContent);
907
- }
908
- });
909
- document.querySelectorAll("dt[id] > .adef, dt[id] > .property")
910
- .forEach(el => {
911
- const dt = el.parentNode;
912
- const newDt = document.createElement("dt");
913
- const dfn = document.createElement("dfn");
914
- dfn.id = dt.id;
915
- dfn.dataset.dfnType = el.classList.contains("adef") ? "element-attr" : "property";
916
- const indexPage = el.classList.contains("adef") ? "attindex.html" : "propidx.html";
917
- const attrDesc = document.querySelector('[data-reffy-page$="' + indexPage + '"] th a[href$="#' + dfn.id + '"]');
918
- if (attrDesc) {
919
- // TODO: this doesn't deal with grouping of elements, e.g. "text content elements"
920
- dfn.dataset.dfnFor = [...attrDesc.closest('tr').querySelectorAll('span.element-name a')].map (n => n.textContent).join(',');
921
- } else {
922
- console.error("Could not find description for " + el.textContent + "/" + dfn.id);
923
- }
924
- dfn.textContent = el.textContent;
925
- newDt.appendChild(dfn);
926
- dt.replaceWith(newDt);
927
- });
928
- document.querySelectorAll('b[id^="__svg__"]').forEach(el => {
929
- const [,, containername, membername] = el.id.split('__');
930
- if (containername && membername) {
931
- let container = idlTree.find(i => i.name === containername);
932
- if (container) {
933
- let member = container.members.find(m => m.name === membername);
934
- if (member) {
935
- const dfn = document.createElement("dfn");
936
- dfn.id = el.id;
937
- dfn.textContent = el.textContent;
938
- dfn.dataset.dfnFor = containername;
939
- dfn.dataset.dfnType = member.type === "operation" ? "method" : member.type;
940
- el.replaceWith(dfn);
941
- }
942
- }
943
- }
944
- });
945
- document.querySelectorAll('h3[id^="Interface"]:not([data-dfn-type])').forEach(el => {
946
- const name = el.id.slice("Interface".length);
947
- if (idlTree.find(i => i.name === name && i.type === "interface")) {
948
- el.dataset.dfnType = "interface";
949
- el.dataset.lt = name;
950
- }
951
- });
952
- document.querySelectorAll('b[id]:not([data-dfn-type])').forEach(el => {
953
- const name = el.textContent;
954
- const idlItem = idlTree.find(i => i.name === name) ;
955
- if (idlItem) {
956
- const dfn = document.createElement("dfn");
957
- dfn.id = el.id;
958
- dfn.dataset.dfnType = idlItem.type;
959
- dfn.textContent = el.textContent;
960
- el.replaceWith(dfn);
961
- }
962
- });
963
-
964
- }
965
-
966
- /**
967
- * The CDDL RFC defines a standard prelude with a number of CDDL types that
968
- * other specs that define CDDL make extensive use of. To be able to link back
969
- * to these type definitions from other specs, we need these types to appear
970
- * in the dfns extract of the RFC somehow.
971
- *
972
- * Now, the RFC only defines one ID for the appendix that contains the
973
- * standard prelude. We need to "share" that ID across all types. To avoid
974
- * introducing definitions that have the same ID and href, which could perhaps
975
- * confuse tools that ingest the definitions, the approach taken here is to
976
- * create a single definition that contains all the types as linking text.
977
- */
978
- function preProcessRFC8610() {
979
- // The RFC is defined as a set of pages (yuck!)
980
- // The standard prelude is an appendix, let's look for it
981
- const prePages = [...document.querySelectorAll('pre.newpage')];
982
- const preludeStart = /<a [^>]*id=[^>]*>Appendix .<\/a>\.\s+Standard Prelude/;
983
- const preludeEnd = /Figure \d+: CDDL Prelude/;
984
- const preStart = prePages
985
- .findIndex(pre => pre.innerHTML.match(preludeStart));
986
- if (preStart === -1) {
987
- // Can't find the expected prelude start text, not a good start!
988
- return;
989
- }
990
- const preEnd = prePages
991
- .findIndex((pre, idx) => idx >= preStart && pre.innerHTML.match(preludeEnd));
992
- if (preEnd === -1) {
993
- // Can't find the expected prelude ending text, not a good start!
994
- return;
995
- }
996
-
997
- // Extract the list of types defined in the appendix
998
- const preludeTypes = prePages.slice(preStart, preEnd + 1)
999
- .map(pre => [...pre.innerHTML.matchAll(/^\s+([a-z0-9\-]+) = .*$/mg)]
1000
- .map(m => m[1])
1001
- )
1002
- .flat();
1003
-
1004
- // Convert the appendix heading into a cddl-type definition that lists
1005
- // all CDDL types.
1006
- const el = prePages[preStart].querySelector(`a[id]`);
1007
- const dfn = document.createElement("dfn");
1008
- dfn.id = el.id;
1009
- dfn.dataset.dfnType = 'cddl-type';
1010
- dfn.dataset.lt = preludeTypes.join('|');
1011
- dfn.dataset.export = '';
1012
- dfn.textContent = el.textContent;
1013
- el.replaceWith(dfn);
1014
- }
1015
-
1016
-
1017
- /**
1018
- * WebGL 1.0 defines a few (~15) IDL attributes without following the
1019
- * definitions data model. These IDL constructs have IDs that look like
1020
- * `DOM-[interface]-[attr]`.
1021
- *
1022
- * The spec also defines a few IDL methods with anchors so that they can be
1023
- * referenced. BCD and MDN typically link to these.
1024
- *
1025
- * Not much choice to understand what the anchors map to and create the
1026
- * appropriate dfns, we need to extract and parse the whole IDL
1027
- */
1028
- function preProcessWebGL1() {
1029
- const idl = extractWebIdl();
1030
- const idlTree = parse(idl);
1031
-
1032
- const attributes = [...document.querySelectorAll('.attribute-name a[id^=DOM-]')];
1033
- for (const attribute of attributes) {
1034
- const dfn = document.createElement('dfn');
1035
- // Notes:
1036
- // - The interface name appears in the ID but... name cannot be trusted
1037
- // because it targets the concrete interface and not the underlying mixin
1038
- // when one exists, whereas we want to create a dfn scoped to the mixin.
1039
- // - Fortunately, no two interfaces define the same attribute in WebGL1, so
1040
- // we can just match on the attribute name
1041
- const attrName = attribute.textContent.trim();
1042
- const idlItems = idlTree.filter(i => i.members?.find(m =>
1043
- m.type === 'attribute' && m.name === attrName));
1044
- if (idlItems.length === 0) {
1045
- console.warn('[reffy]', `could not find attribute ${attrName}`);
1046
- continue;
1047
- }
1048
- if (idlItems.length > 1) {
1049
- console.warn('[reffy]', `more than one matching attribute found for ${attrName}`);
1050
- continue;
1051
- }
1052
- dfn.id = attribute.id;
1053
- dfn.dataset.dfnType = 'attribute';
1054
- dfn.dataset.dfnFor = idlItems[0].name;
1055
- dfn.textContent = attrName;
1056
- attribute.replaceWith(dfn);
1057
- }
1058
-
1059
- const methods = [...document.querySelectorAll('.idl-code a[name]')];
1060
- for (const method of methods) {
1061
- const dfn = document.createElement('dfn');
1062
- // Notes:
1063
- // - The anchor also wraps possible flags and the return type
1064
- // - The return type is best ignored: The IDL block was fixed to use
1065
- // `undefined` but the prose still uses `void` (sigh!).
1066
- // - The parameter names and types are after the anchor. We need to look at
1067
- // them because some of the anchors target overloaded methods... We'll also
1068
- // use them to create appropriate linking texts for the methods.
1069
- // - We cannot match on parameter names for overloaded methods because the
1070
- // spec uses *different* parameter names in the IDL block and in the prose
1071
- // that defines the method (re-sigh!). Matching on the number of parameters is
1072
- // enough to disambiguate between overloaded methods.
1073
- const methodName = method.textContent.split(' ').pop();
1074
- const methodArgs = method.parentNode.textContent
1075
- // Note the "s" flag as parameters may be split over multiple lines
1076
- .match(/\((.*?)\)/s)[1]
1077
- .split(',')
1078
- .map(arg => arg.split(' ').pop());
1079
- const idlItem = idlTree.find(i => i.members?.find(m =>
1080
- m.type === 'operation' &&
1081
- m.name === methodName &&
1082
- m.arguments.length === methodArgs.length));
1083
- if (!idlItem) {
1084
- console.warn('[reffy]', `could not find method ${methodName}`);
1085
- continue;
1086
- }
1087
- dfn.id = method.getAttribute('name');
1088
- dfn.dataset.dfnType = 'method';
1089
- dfn.dataset.dfnFor = idlItem.name;
1090
- dfn.dataset.lt = `${methodName}(${methodArgs.join(', ')})`;
1091
- dfn.textContent = method.textContent;
1092
- method.replaceWith(dfn);
1093
- }
1
+ import extractWebIdl from './extract-webidl.mjs';
2
+ import informativeSelector from './informative-selector.mjs';
3
+ import getAbsoluteUrl from './get-absolute-url.mjs';
4
+ import {parse} from "../../node_modules/webidl2/index.js";
5
+ /**
6
+ * Extract definitions in the spec that follow the "Definitions data model":
7
+ * https://tabatkins.github.io/bikeshed/#dfn-contract
8
+ *
9
+ * Each definition returned by the function will have the following properties:
10
+ * - id: The local ID in the DOM. Should be unique within a spec page.
11
+ * - href: The absolute URL to the definition.
12
+ * - linkingText: List of linking phrases for references.
13
+ * - localLinkingText: List of linking phrases for local references only.
14
+ * - type: The definition type. One of the values in
15
+ * https://tabatkins.github.io/bikeshed/#dfn-types
16
+ * - for: The list of namespaces for the definition
17
+ * - access: "public" when definition can be referenced by other specifications,
18
+ * "private" when it should be viewed as a local definition.
19
+ * - informative: true when definition appears in an informative section,
20
+ * false if it is normative
21
+ * - heading: Heading under which the term is to be found. An object with "id",
22
+ * "title", and "number" properties
23
+ * - definedIn: An indication of where the definition appears in the spec. Value
24
+ * can be one of "dt", "pre", "table", "heading", "note", "example", or
25
+ * "prose" (last one indicates that definition appears in the main body of
26
+ * the spec)
27
+ * - links: A list of interesting links with IDs that complete the definitions,
28
+ * notably non-normative descriptions that target web developers.
29
+ *
30
+ * The extraction ignores definitions with an unknown type. A warning is issued
31
+ * to the console when that happens.
32
+ *
33
+ * The extraction uses the first definition it finds when it bumps into a term
34
+ * that is defined more than once (same "linkingText", same "type", same "for").
35
+ * A warning is issued to the console when that happens.
36
+ *
37
+ * @function
38
+ * @public
39
+ * @return {Array(Object)} An Array of definitions
40
+ */
41
+
42
+ import cloneAndClean from './clone-and-clean.mjs';
43
+
44
+ function normalize(str) {
45
+ return str.trim().replace(/\s+/g, ' ');
46
+ }
47
+
48
+ // Valid types defined in https://tabatkins.github.io/bikeshed/#dfn-types
49
+ // (+ "namespace", "event" and "permission" which are not yet in the doc)
50
+ function hasValidType(el) {
51
+ const validDfnTypes = [
52
+ // CSS types
53
+ 'property',
54
+ 'descriptor',
55
+ 'value',
56
+ 'type',
57
+ 'at-rule',
58
+ 'function',
59
+ 'selector',
60
+
61
+ // Web IDL types
62
+ 'namespace',
63
+ 'interface',
64
+ 'constructor',
65
+ 'method',
66
+ 'argument',
67
+ 'attribute',
68
+ 'callback',
69
+ 'dictionary',
70
+ 'dict-member',
71
+ 'enum',
72
+ 'enum-value',
73
+ 'exception',
74
+ 'const',
75
+ 'typedef',
76
+ 'stringifier',
77
+ 'serializer',
78
+ 'iterator',
79
+ 'maplike',
80
+ 'setlike',
81
+ 'extended-attribute',
82
+ 'event',
83
+ 'permission',
84
+
85
+ // Element types
86
+ 'element',
87
+ 'element-state',
88
+ 'element-attr',
89
+ 'attr-value',
90
+
91
+ // CDDL types
92
+ 'cddl-module',
93
+ 'cddl-type',
94
+ 'cddl-parameter',
95
+ 'cddl-key',
96
+ 'cddl-value',
97
+
98
+ // URL scheme
99
+ 'scheme',
100
+
101
+ // HTTP header
102
+ 'http-header',
103
+
104
+ // Grammar type
105
+ 'grammar',
106
+
107
+ // "English" terms
108
+ 'abstract-op',
109
+ 'dfn'
110
+ ];
111
+
112
+ const type = el.getAttribute('data-dfn-type') ?? 'dfn';
113
+ const isValid = validDfnTypes.includes(type);
114
+ if (!isValid) {
115
+ console.warn('[reffy]', `"${type}" is an invalid dfn type for "${normalize(el.textContent)}"`);
116
+ }
117
+ return isValid;
118
+ }
119
+
120
+ // Return true when exported definition is not already defined in the list,
121
+ // Return false and issue a warning when it is already defined.
122
+ function isNotAlreadyExported(dfn, idx, list) {
123
+ const first = list.find(d => d === dfn ||
124
+ (d.access === 'public' && dfn.access === 'public' &&
125
+ d.type === dfn.type &&
126
+ d.linkingText.length === dfn.linkingText.length &&
127
+ d.linkingText.every(lt => dfn.linkingText.find(t => t == lt)) &&
128
+ d.for.length === dfn.for.length &&
129
+ d.for.every(lt => dfn.for.find(t => t === lt))));
130
+ if (first !== dfn) {
131
+ console.warn('[reffy]', `Duplicate dfn found for "${dfn.linkingText[0]}", type="${dfn.type}", for="${dfn.for[0]}", dupl=${dfn.href}, first=${first.href}`);
132
+ }
133
+ return first === dfn;
134
+ }
135
+
136
+ // Extract the element's inner HTML content, removing any complex structure,
137
+ // so that the result can be injected elsewhere without creating problems.
138
+ function getHtmlProseDefinition(proseEl) {
139
+ // Strip element of all annotations
140
+ proseEl = cloneAndClean(proseEl);
141
+
142
+ // Keep simple grouping content and text-level semantics elements
143
+ const keepSelector = [
144
+ 'blockquote', 'dd', 'div', 'dl', 'dt', 'figcaption', 'figure', 'hr', 'li',
145
+ 'ol', 'p', 'pre', 'ul',
146
+ 'a', 'abbr', 'b', 'bdi', 'bdo', 'br', 'cite', 'code', 'data', 'dfn', 'em',
147
+ 'i', 'kbd', 'mark', 'q', 'rp', 'rt', 'ruby', 's', 'samp', 'small', 'span',
148
+ 'strong', 'sub', 'sup', 'time', 'u', 'var', 'wbr'
149
+ ].join(',');
150
+ let el;
151
+ while (el = proseEl.querySelector(`:not(${keepSelector})`)) {
152
+ // The content is more complex than anticipated. It may be worth checking
153
+ // the definition to assess whether the extraction logic needs to become
154
+ // smarter. For lack of a better reporting mechanism for now, let's record
155
+ // a warning.
156
+ console.warn('[reffy]', `Unexpected element "${el.nodeName}" found in textual definition of "${proseEl.getAttribute('data-defines')}"`);
157
+ el.remove();
158
+ }
159
+
160
+ // Drop all attributes except "href", "dir", "lang" and "title"
161
+ // For "href", let's make sure that we have an absolute URL
162
+ [...proseEl.querySelectorAll('*')].forEach(el => {
163
+ el.getAttributeNames().forEach(attr => {
164
+ if (attr === 'href') {
165
+ const page = el.closest('[data-reffy-page]')?.getAttribute('data-reffy-page');
166
+ const url = new URL(el.getAttribute('href'), page ?? window.location.href);
167
+ el.setAttribute('href', url.toString());
168
+ }
169
+ else if (!['dir', 'lang', 'title'].includes(attr)) {
170
+ el.removeAttribute(attr);
171
+ }
172
+ });
173
+ });
174
+
175
+ return proseEl.innerHTML.trim();
176
+ }
177
+
178
+ function definitionMapper(el, idToHeading, usesDfnDataModel) {
179
+ let definedIn = 'prose';
180
+ const enclosingEl = el.closest('dt,pre,table,h1,h2,h3,h4,h5,h6,.note,.example') || el;
181
+ switch (enclosingEl.nodeName) {
182
+ case 'DT':
183
+ case 'PRE':
184
+ case 'TABLE':
185
+ definedIn = enclosingEl.nodeName.toLowerCase();
186
+ break;
187
+ case 'H1':
188
+ case 'H2':
189
+ case 'H3':
190
+ case 'H4':
191
+ case 'H5':
192
+ case 'H6':
193
+ definedIn = 'heading';
194
+ break;
195
+ default:
196
+ if (enclosingEl.classList.contains('note')) {
197
+ definedIn = 'note';
198
+ }
199
+ else if (enclosingEl.classList.contains('example')) {
200
+ definedIn = 'example';
201
+ }
202
+ break;
203
+ }
204
+
205
+ // Linking text is given by the data-lt attribute if present, or it is the
206
+ // textual content... but we'll skip section numbers that might have been
207
+ // captured when definition is defined in a heading, as in:
208
+ // https://www.w3.org/TR/ethical-web-principles/#oneweb
209
+ let linkingText = [];
210
+ if (el.hasAttribute('data-lt')) {
211
+ linkingText = el.getAttribute('data-lt').split('|').map(normalize);
212
+ }
213
+ else if (el.querySelector('.secno')) {
214
+ const copy = el.cloneNode(true);
215
+ const secno = copy.querySelector('.secno');
216
+ secno.remove();
217
+ linkingText = [normalize(copy.textContent)];
218
+ }
219
+ else {
220
+ linkingText = [normalize(el.textContent)];
221
+ }
222
+
223
+ // Compute the absolute URL with fragment
224
+ // (Note the crawler merges pages of a multi-page spec in the first page
225
+ // to ease parsing logic, and we want to get back to the URL of the page)
226
+ const page = el.closest('[data-reffy-page]')?.getAttribute('data-reffy-page');
227
+ const url = new URL(page ?? window.location.href);
228
+ url.hash = '#' + encodeURIComponent(el.getAttribute('id'));
229
+ const href = url.toString();
230
+
231
+ const dfn = {
232
+ // ID is the id attribute
233
+ // (ID may not be unique in a multi-page spec)
234
+ id: el.getAttribute('id'),
235
+
236
+ // Absolute URL with fragment
237
+ href,
238
+
239
+ // Linking text
240
+ linkingText,
241
+
242
+ // Additional linking text can be defined for local references
243
+ localLinkingText: el.getAttribute('data-local-lt') ?
244
+ el.getAttribute('data-local-lt').split('|').map(normalize) :
245
+ [],
246
+
247
+ // Link type must be specified, or it is "dfn"
248
+ type: el.getAttribute('data-dfn-type') || 'dfn',
249
+
250
+ // Definition may be namespaced to other constructs. Note the list is not
251
+ // purely comma-separated due to function parameters. For instance,
252
+ // attribute value may be "method(foo,bar), method()"
253
+ for: el.getAttribute('data-dfn-for') ?
254
+ el.getAttribute('data-dfn-for').split(/,(?![^\(]*\))/).map(normalize) :
255
+ [],
256
+
257
+ // Definition is public if explicitly marked as exportable or if export has
258
+ // not been explicitly disallowed and its type is not "dfn" or a CDDL type,
259
+ // or if the spec is an old spec that does not use the "data-dfn-type"
260
+ // convention.
261
+ access: (!usesDfnDataModel ||
262
+ el.hasAttribute('data-export') ||
263
+ (!el.hasAttribute('data-noexport') &&
264
+ el.hasAttribute('data-dfn-type') &&
265
+ el.getAttribute('data-dfn-type') !== 'dfn' &&
266
+ !el.getAttribute('data-dfn-type').startsWith('cddl-'))) ?
267
+ 'public' : 'private',
268
+
269
+ // Whether the term is defined in a normative/informative section
270
+ informative: !!el.closest(informativeSelector),
271
+
272
+ // Heading under which the term is to be found,
273
+ // Defaults to the page or document URL and the spec's title
274
+ heading: idToHeading[href] ?? {
275
+ href: (new URL(page ?? window.location.href)).toString(),
276
+ title: document.title
277
+ },
278
+
279
+ // Enclosing element under which the definition appears. Value can be one of
280
+ // "dt", "pre", "table", "heading", "note", "example", or "prose" (last one
281
+ // indicates that definition appears in the main body of the specification)
282
+ definedIn,
283
+
284
+ // Important links that complement the definition
285
+ // (typically: anchors in "for web developers" sections)
286
+ links: []
287
+ };
288
+
289
+ // Extract a prose definition in HTML for the term, if available
290
+ const proseEl = document.querySelector(`[data-defines="#${dfn.id}"]`);
291
+ if (proseEl) {
292
+ const htmlProse = getHtmlProseDefinition(proseEl);
293
+ if (htmlProse) {
294
+ dfn.htmlProse = htmlProse;
295
+ }
296
+ }
297
+
298
+ return dfn;
299
+ }
300
+
301
+ export default function (spec, idToHeading = {}) {
302
+ const definitionsSelector = [
303
+ // re data-lt, see https://github.com/w3c/reffy/issues/336#issuecomment-650339747
304
+ // As for `<dfn>` we'll consider that headings without a `data-dfn-type`
305
+ // have an implicit `"data-dfn-type"="dfn"` attribute, provided they also
306
+ // have some other definition related attribute (because we only want to
307
+ // extract headings that want to be seen as definitions)
308
+ 'dfn[id]:not([data-lt=""])',
309
+ ':is(h2,h3,h4,h5,h6)[id]:is([data-dfn-type],[data-dfn-for],[data-export],[data-noexport],[data-lt]):not([data-lt=""])'
310
+ ].join(',');
311
+
312
+ const shortname = (typeof spec === 'string') ? spec : spec.shortname;
313
+ switch (shortname) {
314
+ case "CSS2":
315
+ preProcessCSS2();
316
+ break;
317
+ case "html":
318
+ preProcessHTML();
319
+ break;
320
+ case "ecmascript":
321
+ preProcessEcmascript();
322
+ break;
323
+ case "SVG2":
324
+ preProcessSVG2();
325
+ break;
326
+ case "rfc8610":
327
+ // RFC8610 defines CDDL
328
+ preProcessRFC8610();
329
+ break;
330
+ case "webgl1":
331
+ preProcessWebGL1();
332
+ break;
333
+ }
334
+
335
+ const dfnEls = [...document.querySelectorAll(definitionsSelector)];
336
+ const usesDfnDataModel = dfnEls.some(dfn =>
337
+ dfn.hasAttribute('data-dfn-type') ||
338
+ dfn.hasAttribute('data-dfn-for') ||
339
+ dfn.hasAttribute('data-export') ||
340
+ dfn.hasAttribute('data-noexport'));
341
+
342
+ const definitions = dfnEls
343
+ .map(node => {
344
+ // 2021-06-21: Temporary preprocessing of invalid "idl" dfn type (used for
345
+ // internal slots) while fix for https://github.com/w3c/respec/issues/3644
346
+ // propagates to all EDs and /TR specs. To be dropped once crawls no
347
+ // longer produce warnings.
348
+ if (node.getAttribute('data-dfn-type') === 'idl') {
349
+ const linkingText = node.hasAttribute('data-lt') ?
350
+ node.getAttribute('data-lt').split('|').map(normalize) :
351
+ [normalize(node.textContent)];
352
+ node.setAttribute('data-dfn-type', linkingText[0].endsWith(')') ? 'method' : 'attribute');
353
+ console.warn('[reffy]', `Fixed invalid "idl" dfn type "${normalize(node.textContent)}"`);
354
+ }
355
+ return node;
356
+ })
357
+ .filter(hasValidType)
358
+ // Exclude IDL terms defined in a block that is flagged as to be excluded
359
+ // or inside a <del>
360
+ .filter(node => !node.closest('.exclude,del'))
361
+ // When the whole term links to an external spec, the definition is an
362
+ // imported definition. Such definitions are not "real" definitions, let's
363
+ // skip them.
364
+ // One hardcoded exception-to-the-rule, see:
365
+ // https://github.com/w3c/webref/issues/882
366
+ // (pending a proper dfns curation process, see:
367
+ // https://github.com/w3c/webref/issues/789)
368
+ .filter(node => {
369
+ const link =
370
+ node.querySelector('a[href^="http"]') ??
371
+ node.closest('a[href^="http"]');
372
+ return !link ||
373
+ (node.textContent.trim() !== link.textContent.trim()) ||
374
+ (link.href === 'https://www.w3.org/TR/CSS2/syndata.html#vendor-keywords');
375
+ })
376
+ .map(node => definitionMapper(node, idToHeading, usesDfnDataModel))
377
+ .filter(isNotAlreadyExported);
378
+
379
+ // Some specs have informative "For web developers" sections targeted at
380
+ // presenting concepts to web developers. These sections contain anchors
381
+ // that are useful for documentation purpose. The anchors themselves are
382
+ // references to terms defined elsewhere in the spec. We will capture them in
383
+ // a `links` property attached to the underlying definition.
384
+ // Note: Ideally, `.domintro` would be added to the informative selector list
385
+ // but some specs use `.domintro` for lists that define IDL terms. We'll get
386
+ // rid of them by skipping lists that have `dfn`.
387
+ const devSelector = '.domintro dt:not(dt:has(dfn)) a[id]';
388
+ for (const node of [...document.querySelectorAll(devSelector)]) {
389
+ const dfnHref = getAbsoluteUrl(node, { attribute: 'href' });
390
+ const dfn = definitions.find(d => d.href === dfnHref);
391
+ if (dfn) {
392
+ const href = getAbsoluteUrl(node);
393
+ const page = node.closest('[data-reffy-page]')?.getAttribute('data-reffy-page');
394
+ dfn.links.push({
395
+ type: 'dev',
396
+ id: node.getAttribute('id'),
397
+ name: normalize(node.closest('dt').textContent),
398
+ href,
399
+ heading: idToHeading[href] ?? {
400
+ href: (new URL(page ?? window.location.href)).toString(),
401
+ title: document.title
402
+ }
403
+ });
404
+ }
405
+ else {
406
+ // When an interface inherits from another, the reference may target
407
+ // a base dfn in another spec. For example:
408
+ // https://encoding.spec.whatwg.org/#ref-for-dom-generictransformstream-readable
409
+ // ... targets the Streams spec. There aren't many occurrences of this
410
+ // pattern and the occurrences do not look super interesting to link to
411
+ // from a documentation perspective. Let's skip them.
412
+ console.warn('[reffy]', `Dev dfn ${node.textContent} (${node.id}) targets unknown/external dfn at ${node.href}`);
413
+ }
414
+ }
415
+
416
+ return definitions;
417
+ }
418
+
419
+ function preProcessEcmascript() {
420
+ // Skip elements in sections marked as legacy
421
+ const legacySectionFilter = n => !n.closest("[legacy]");
422
+
423
+ const wrapWithDfn = (el) => {
424
+ // wrap with a dfn
425
+ const dfn = document.createElement("dfn");
426
+ for (let child of [...el.childNodes]) {
427
+ dfn.appendChild(child);
428
+ }
429
+ el.appendChild(dfn);
430
+ // set id
431
+ dfn.setAttribute("id", el.parentNode.getAttribute("id"));
432
+ if (el.parentNode.hasAttribute("aoid")) {
433
+ dfn.setAttribute("aoid", el.parentNode.getAttribute("aoid"));
434
+ }
435
+ return dfn;
436
+ };
437
+
438
+ const cleanMethodName = (name) => {
439
+ return name.replace(/\[/g, '')
440
+ .replace(/\]/g, '') // removing brackets used to mark optional args
441
+ .replace(/ \( */, '(')
442
+ .replace(/ *\)/, ')')
443
+ .replace(/ *,/g, ','); // trimming internal spaces
444
+ };
445
+
446
+ let definitionNames = new Set();
447
+ let idlTypes = {};
448
+
449
+ // We find the list of abstract methods
450
+ // to help with scoping abstract operations
451
+ let abstractMethods = {};
452
+ const abstractMethodCaptions = [...document.querySelectorAll("figcaption")]
453
+ .filter(el => el.textContent.match(/(abstract|additional) method/i) && el.parentNode.querySelector("emu-xref"));
454
+ for (const figcaption of abstractMethodCaptions) {
455
+ let scope = figcaption.querySelector("emu-xref").textContent;
456
+ if (scope.endsWith('Environment Records')) {
457
+ // Environment records come with an abstract class, and subclasses:
458
+ // https://tc39.es/ecma262/multipage/executable-code-and-execution-contexts.html#sec-environment-records
459
+ // Methods are defined for each class. We pretend that the scope is the
460
+ // abstract class for now. Exact scope will be determined by looking at
461
+ // the title of the section under which the method is found.
462
+ scope = 'Environment Records';
463
+ }
464
+ const table = figcaption.parentNode.querySelector("tbody");
465
+ for (const td of table.querySelectorAll("tr td:first-child")) {
466
+ // We only consider the name of the method, not the potential parameters
467
+ // as they're not necessarily consistently named across
468
+ // the list and the definition
469
+ const methodName = td.textContent.split('(')[0].trim();
470
+ abstractMethods[methodName] = scope;
471
+ }
472
+ }
473
+
474
+ // Regular expression used to drop section numbers from section titles
475
+ const sectionNumberRegExp = /^([A-Z]\.)?[0-9\.]+ /;
476
+
477
+ // Regular expression that matches scoped methods à la "JSON.parse"
478
+ const scopedNameRegExp = /^[a-z0-9]+\.[a-z0-9]+/i;
479
+
480
+ // Regular expression that matches general unscoped method names à la
481
+ // "ArrayCreate (", "ToInt32 (" or "decodeURI (". The expression also matches
482
+ // constructors.
483
+ const methodNameRegExp = /^([a-z0-9]+)+ *\(/i;
484
+
485
+ // More specific regular expression that matches abstract operations methods
486
+ // à la "ToInt32 (". Does not match "decodeURI (" for instance as it does not
487
+ // start with an upper case character.
488
+ const abstractOpRegExp = /^[A-Z][a-zA-Z0-9]+ *\(/;
489
+
490
+ [...document.querySelectorAll("h1")]
491
+ .filter(legacySectionFilter)
492
+ .forEach(el => {
493
+ let dfnName = el.textContent.replace(sectionNumberRegExp, '').trim() ;// remove section number
494
+ const dfnId = el.parentNode.id;
495
+ if (dfnId.match(/-objects?$/) && dfnName.match(/ Objects?$/)) {
496
+
497
+ // Skip headings that look like object definitions, but aren't
498
+ const notObjectIds = ["sec-global-object", "sec-fundamental-objects", "sec-waiterlist-objects"];
499
+ if (notObjectIds.includes(dfnId)) return;
500
+
501
+ // only keep ids that match a credible pattern for object names
502
+ // i.e. a single word
503
+ // there are exceptions to that simple rule
504
+ // RegExp includes its expansion (regular expansion) in the id
505
+ // WeakRef is translated into weak-ref in the id
506
+ const objectsIdsExceptions = ["sec-regexp-regular-expression-objects", "sec-weak-ref-objects", "sec-aggregate-error-objects", "sec-finalization-registry-objects", "sec-async-function-objects"];
507
+
508
+ if (!dfnId.match(/sec-[a-z]+-objects?/)
509
+ && !objectsIdsExceptions.includes(dfnId)
510
+ ) return;
511
+ const dfn = wrapWithDfn(el);
512
+ // set data-lt
513
+ dfnName = dfnName
514
+ .replace(/^The /, '')
515
+ .replace(/ Objects?$/, '')
516
+ // regexp def includes "(Regular Expression)"
517
+ .replace(/ \([^\)]*\)/, '') ;
518
+ dfn.dataset.lt = dfnName;
519
+
520
+ // FIXME
521
+ // These interfaces are also defined in WebIDL, which in general is
522
+ // the prefered source for these terms
523
+ // Because bikeshed does not yet support spec-specific imports,
524
+ // we hide these terms as not exported
525
+ // cf https://github.com/w3c/reffy/pull/732#issuecomment-925950287
526
+ const exportExceptions = [ "Promise", "DataView", "ArrayBuffer" ];
527
+ if (exportExceptions.includes(dfnName)) {
528
+ dfn.dataset.noexport = "";
529
+ }
530
+
531
+ if (dfnName.match(/^[A-Z]/)) {
532
+ // set dfn-type
533
+ if (dfnName.match(/Error$/)) {
534
+ dfn.dataset.dfnType = "exception";
535
+ } else if (!el.parentNode.querySelector('[id$="constructor"]')) {
536
+ // Objects without constructors match to the namespace type
537
+ dfn.dataset.dfnType = "namespace";
538
+ } else {
539
+ dfn.dataset.dfnType = "interface";
540
+ }
541
+ // We keep track of types associated with a name
542
+ // to associate the same type to the relevant intrinsic object
543
+ // à la %Math%
544
+ idlTypes[dfnName] = dfn.dataset.dfnType;
545
+ }
546
+ definitionNames.add(dfnName);
547
+ } else if (dfnId.match(/-[a-z]+error$/) && !dfnName.match(/\(/)) {
548
+ const dfn = wrapWithDfn(el);
549
+ dfn.dataset.lt = dfnName;
550
+ dfn.dataset.dfnType = "exception";
551
+ definitionNames.add(dfnName);
552
+ idlTypes[dfnName] = dfn.dataset.dfnType;
553
+ } else if (dfnId.match(/[-\.]prototype[-\.]/)) {
554
+ // methods and attributes on objects
555
+
556
+ // Skip headings with a space and no parenthesis
557
+ // (they mention prototype but aren't a prototype property def)
558
+ // with the exception of "set " and "get " headings
559
+ // (which describe setters and getters)
560
+ if (!dfnName.match(/\(/) && (dfnName.match(/ /) && !dfnName.match(/^[gs]et /))) return;
561
+
562
+ // Skip unscoped internal methods à la [[SetPrototypeOf]](V)
563
+ if (dfnName.match(/\[\[/)) return;
564
+
565
+ // Skip symbol-based property definitions;
566
+ // not clear they're useful as externally referenceable names
567
+ if (dfnName.match(/@@/)) return;
568
+
569
+ // Skip .constructor as that cannot be considered as an attribute
570
+ if (dfnName.match(/\.constructor$/)) return;
571
+
572
+ const dfn = wrapWithDfn(el);
573
+ // set definition scope
574
+ dfn.dataset.dfnFor = dfnName.replace(/\.prototype\..*/, '')
575
+ .replace(/^[gs]et /, ''); // remove "get"/"set" markers
576
+
577
+ // Remove parent object prototype (set as scope)
578
+ dfnName = dfnName.replace(/.*\.prototype\./, '');
579
+
580
+ dfn.dataset.lt = dfnName;
581
+ // set dfn-type
582
+ if (dfn.dataset.lt.match(/\(/)) {
583
+ dfnName = cleanMethodName(dfnName);
584
+ dfn.dataset.lt = dfnName;
585
+ dfn.dataset.dfnType = "method";
586
+ } else {
587
+ dfn.dataset.dfnType = "attribute";
588
+ }
589
+ } else if (el.closest("#sec-value-properties-of-the-global-object")) {
590
+ // properties of the global object
591
+ if (el.id !== "#sec-value-properties-of-the-global-object"){
592
+ const dfn = wrapWithDfn(el);
593
+ dfn.dataset.lt = dfnName;
594
+ dfn.dataset.dfnType = "attribute";
595
+ dfn.dataset.dfnFor = "globalThis";
596
+ }
597
+ } else {
598
+ // We handle other headings that look like a method / property
599
+ // on an object instance (rather than its prototype)
600
+ // or an abstract op
601
+
602
+ // if there is already a dfn element, we move on
603
+ if (el.querySelector("dfn")) return;
604
+
605
+ // only dealing with well-known patterns
606
+ if (!dfnName.match(scopedNameRegExp)
607
+ && !dfnName.match(methodNameRegExp)
608
+ ) return;
609
+ // Skip symbol-based property definitions
610
+ if (dfnName.match(/@@/)) return;
611
+
612
+ // Skip .prototype as that cannot be considered
613
+ // as an attribute
614
+ if (dfnName.match(/\.prototype$/)) return;
615
+
616
+ // Skip headings where foo.bar appears as part of a longer phrase
617
+ if (!dfnName.match(/\(/) && dfnName.match(/ /)) return;
618
+
619
+ // redundant definitions of constructors on the global object
620
+ // e.g. "Array ( . . . )"
621
+ if (dfnName.match(/\. \. \./)) return;
622
+
623
+ const dfn = wrapWithDfn(el);
624
+
625
+ if (dfnName.match(scopedNameRegExp)) {
626
+ // set definition scope
627
+ // This assumes that such methods and attributes are only defined
628
+ // one-level deep from the global scope
629
+ dfn.dataset.dfnFor = dfnName.replace(/\..*$/, '');
630
+ dfnName = dfnName.replace(dfn.dataset.dfnFor + ".", '');
631
+ if (dfnName.match(/\(/)) {
632
+ dfnName = cleanMethodName(dfnName);
633
+ dfn.dataset.lt = dfnName;
634
+ dfn.dataset.dfnType = "method";
635
+ } else {
636
+ dfn.dataset.lt = dfnName;
637
+ if (dfnName.match(/^[A-Z]+$/)) {
638
+ dfn.dataset.dfnType = "const";
639
+ } else {
640
+ dfn.dataset.dfnType = "attribute";
641
+ }
642
+ }
643
+ } else if (dfnName.match(abstractOpRegExp)) {
644
+ dfnName = cleanMethodName(dfnName);
645
+ dfn.dataset.lt = dfnName;
646
+ const opName = dfnName.split('(')[0];
647
+
648
+ // distinguish global constructors from abstract operations
649
+ if (idlTypes[opName]) {
650
+ dfn.dataset.dfnType = "constructor";
651
+ dfn.dataset.dfnFor = opName;
652
+ } else {
653
+ // If the name is listed as an Abstract Method
654
+ // we set the dfn-for accordingly
655
+ // Note we look for a possibly more specific scope by looking at the
656
+ // title of the containing section. This is useful for
657
+ // "Environment Records" methods.
658
+ if (abstractMethods[opName]) {
659
+ const baseClass = abstractMethods[opName];
660
+ let parent = dfn.parentNode.closest('emu-clause');
661
+ while (parent) {
662
+ const title = parent.querySelector('h1')?.textContent.replace(sectionNumberRegExp, '').trim();
663
+ if (title?.toLowerCase().endsWith(baseClass.toLowerCase())) {
664
+ dfn.dataset.dfnFor = title;
665
+ break;
666
+ }
667
+ parent = parent.parentNode.closest('emu-clause');
668
+ }
669
+ if (!dfn.dataset.dfnFor) {
670
+ dfn.dataset.dfnFor = baseClass;
671
+ }
672
+ }
673
+ if (dfn.getAttribute("aoid")) {
674
+ dfn.dataset.lt = dfn.getAttribute("aoid") + '|' + dfn.dataset.lt;
675
+ }
676
+ dfn.dataset.dfnType = "abstract-op";
677
+ }
678
+ } else { // methods of the global object
679
+ dfnName = cleanMethodName(dfnName);
680
+ dfn.dataset.lt = dfnName;
681
+ dfn.dataset.dfnType = "method";
682
+ dfn.dataset.dfnFor = "globalThis";
683
+ }
684
+ definitionNames.add(dfnName);
685
+ }
686
+ });
687
+ // Extract abstract operations from <emu-eqn> with aoid attribute
688
+ [...document.querySelectorAll("emu-eqn[aoid]")]
689
+ .filter(legacySectionFilter)
690
+ .forEach(el => {
691
+ // Skip definitions of constant values (e.g. msPerDay)
692
+ if (el.textContent.match(/=/)) return;
693
+ const dfn = wrapWithDfn(el);
694
+ dfn.dataset.lt = el.getAttribute("aoid");
695
+ dfn.dataset.dfnType = "abstract-op";
696
+ dfn.id = el.id;
697
+ });
698
+
699
+ // Extract State Components from tables
700
+ [...document.querySelectorAll("figure > table")]
701
+ .filter(legacySectionFilter)
702
+ .forEach(el => {
703
+ const title = el.parentNode.querySelector("figcaption")?.textContent || "";
704
+ if (!title.match(/state components for/i)) return;
705
+ const scope = title.replace(/^.*state components for/i, '').trim();
706
+ for (const td of el.querySelectorAll("tr td:first-child")) {
707
+ const dfn = wrapWithDfn(td);
708
+ dfn.dataset.dfnFor = scope;
709
+ dfn.id = el.closest("emu-table[id],emu-clause[id]").id;
710
+ }
711
+ });
712
+
713
+ // Extract production rules
714
+ [...document.querySelectorAll("emu-grammar[type=definition] emu-production")]
715
+ .forEach(el => {
716
+ const dfn = wrapWithDfn(el);
717
+ dfn.id = el.id;
718
+ dfn.dataset.lt = el.getAttribute("name");
719
+ dfn.dataset.dfnType = "grammar";
720
+ dfn.dataset.noexport = "";
721
+ if (el.closest('[data-reffy-page$="additional-ecmascript-features-for-web-browsers.html"]')) {
722
+ // Production rules in Annex B replace some of the production rules
723
+ // defined in other sections for web browser hosts.
724
+ dfn.dataset.dfnFor = "Web browsers";
725
+ }
726
+ });
727
+
728
+ [...document.querySelectorAll("dfn")]
729
+ .filter(legacySectionFilter)
730
+ .forEach(el => {
731
+ // Skip definitions in conformance page and conventions page
732
+ if (el.closest('section[data-reffy-page$="conformance.html"]') ||
733
+ el.closest('section[data-reffy-page$="notational-conventions.html"]')) {
734
+ el.removeAttribute("id");
735
+ return;
736
+ }
737
+
738
+ // rely on the aoid attribute as a hint we're dealing
739
+ // with an abstract-op
740
+ if (el.getAttribute("aoid")) {
741
+ el.dataset.dfnType = "abstract-op";
742
+ }
743
+
744
+ // Mark well-known intrinsic objects as the same type as their visible object (if set), defaulting to "interface"
745
+ if (el.textContent.match(/^%[A-Z].*%$/)) {
746
+ el.dataset.dfnType = idlTypes[el.textContent.replace(/%/g, '')] || "interface";
747
+ definitionNames.add(el.textContent.trim());
748
+ }
749
+
750
+ // %names% in the global object section are operations of the globalThis object
751
+ if (el.closest('[data-reffy-page$="global-object.html"]') && el.textContent.match(/^%[a-z]+%/i)) {
752
+ el.dataset.dfnFor = "globalThis";
753
+ // TODO: this doesn't capture the arguments
754
+ el.dataset.dfnType = "method";
755
+ }
756
+
757
+ // Mark well-known symbols as "const"
758
+ // for lack of a better type, and as the WebIDL spec has been doing
759
+ if (el.textContent.match(/^@@[a-z]*$/i)) {
760
+ el.dataset.dfnType = "const";
761
+ }
762
+ if (el.getAttribute("variants")) {
763
+ el.dataset.lt = (el.dataset.lt ?? el.textContent.trim()) + "|" + el.getAttribute("variants");
764
+ }
765
+
766
+ // Skip definitions that have already been identified
767
+ // with a more specific typing
768
+ if (!el.dataset.dfnType) {
769
+ // we already have a matching typed definition
770
+ if (definitionNames.has(el.textContent.trim())) return;
771
+ }
772
+
773
+ // If the <dfn> has no id, we attach it the one from the closest
774
+ // <emu-clause> with an id
775
+ // Note that this means several definitions can share the same id
776
+ if (!el.getAttribute("id")) {
777
+ if (el.closest("emu-clause[id]")) {
778
+ el.setAttribute("id", el.closest("emu-clause").getAttribute("id"));
779
+ }
780
+ }
781
+
782
+ // Any generic <dfn> not previously filtered out
783
+ // is deemed to be exported, scoped to ECMAScript
784
+ if (!el.dataset.dfnType) {
785
+ if (!el.dataset.dfnFor) {
786
+ el.dataset.dfnFor = "ECMAScript";
787
+ }
788
+ el.dataset.export = "";
789
+ }
790
+ });
791
+ // Another pass of clean up for duplicates
792
+ // This cannot be done in the first pass
793
+ // because %Foo.prototype% does not necessarily get identified before
794
+ // the equivalent " prototype object" dfn
795
+
796
+ [...document.querySelectorAll("dfn[id][data-export]")]
797
+ .filter(legacySectionFilter)
798
+ .forEach(dfn => {
799
+ // we have the syntactic equivalent %x.prototype%
800
+ let m = dfn.textContent.trim().match(/^(.*) prototype( object)?$/);
801
+ if (m && definitionNames.has(`%${m[1].trim()}.prototype%`)) {
802
+ dfn.removeAttribute("id");
803
+ delete dfn.dataset.export;
804
+ return;
805
+ }
806
+ });
807
+ }
808
+
809
+ function preProcessHTML() {
810
+ const headingSelector = ':is(h2,h3,h4,h5,h6)[id]:not(:is([data-dfn-type],[data-dfn-for],[data-export],[data-noexport],[data-lt])) dfn';
811
+
812
+ // we copy the id on the dfn when it is set on the surrounding heading
813
+ document.querySelectorAll(headingSelector)
814
+ .forEach(el => {
815
+ const headingId = el.closest("h2, h3, h4, h5, h6").id;
816
+ if (!el.id) {
817
+ el.id = headingId;
818
+ }
819
+ });
820
+ }
821
+
822
+ /**
823
+ * CSS 2.1 does not use the definitions data model and needs to be processed
824
+ * to create the right definitions.
825
+ *
826
+ * Note: CSS 2.2 does follow the definitions data model, but does not contain
827
+ * any element that matches the `span.index-def` selector, so the function is
828
+ * a no-op for CSS 2.2 and that's a good thing.
829
+ */
830
+ function preProcessCSS2() {
831
+ document.querySelectorAll('span.index-def')
832
+ .forEach(span => {
833
+ // Definition ID is to be found in a nearby anchor
834
+ const anchor = span.querySelector('a[name]') ?? span.closest('a[name]');
835
+ if (!anchor) {
836
+ return;
837
+ }
838
+
839
+ // Once in a while, definition has a "<dfn>", and once in a while, that
840
+ // "<dfn>" already follows the dfn data model.
841
+ let dfn = span.querySelector('dfn') ?? span.closest('dfn');
842
+ if (dfn?.id) {
843
+ return;
844
+ }
845
+
846
+ // No "<dfn>"? Let's create it
847
+ if (!dfn) {
848
+ dfn = document.createElement('dfn');
849
+ for (let child of [...span.childNodes]) {
850
+ dfn.appendChild(child);
851
+ }
852
+ span.appendChild(dfn);
853
+ }
854
+
855
+ // Complete the "<dfn>" with expected attributes
856
+ dfn.id = anchor.getAttribute('name');
857
+ dfn.dataset.export = '';
858
+ // Drop suffixes such "::definition of" and wrapping quotes,
859
+ // and drop possible duplicates
860
+ dfn.dataset.lt = (span.getAttribute('title') ?? dfn.textContent).split('|')
861
+ .map(normalize)
862
+ .map(text => text.replace(/::definition of$/, '')
863
+ .replace(/, definition of$/, '')
864
+ .replace(/^'(.*)'$/, '$1'))
865
+ .filter((text, idx, array) => array.indexOf(text) === idx)
866
+ .join('|');
867
+ let dfnType = null;
868
+ switch (anchor.getAttribute('class') ?? '') {
869
+ case 'propdef-title':
870
+ dfnType = 'property';
871
+ break;
872
+ case 'value-def':
873
+ if (dfn.dataset.lt.match(/^<.*>$/)) {
874
+ dfnType = 'type';
875
+ }
876
+ else {
877
+ dfnType = 'value';
878
+ }
879
+ break;
880
+ }
881
+ if (dfnType) {
882
+ dfn.dataset.dfnType = dfnType;
883
+ }
884
+ });
885
+ }
886
+
887
+ function preProcessSVG2() {
888
+ const idl = extractWebIdl();
889
+ const idlTree = parse(idl);
890
+ const idlInterfaces = idlTree.filter(item => item.type === "interface" || item.type === "interface mixin");
891
+
892
+ // the only element definition not properly marked up in the SVG spec
893
+ const linkHeading = document.getElementById("LinkElement");
894
+ if (linkHeading && !linkHeading.dataset.dfnType) {
895
+ linkHeading.dataset.dfnType = "element";
896
+ linkHeading.dataset.lt = "link";
897
+ }
898
+
899
+ document.querySelectorAll(".attrdef dfn[id]:not([data-dfn-type]):not([data-skip])")
900
+ .forEach(el => {
901
+ el.dataset.dfnType = "element-attr";
902
+ const attrDesc = document.querySelector('[data-reffy-page$="attindex.html"] th span.attr-name a[href$="#' + el.id + '"]');
903
+ if (attrDesc) {
904
+ el.dataset.dfnFor = attrDesc.closest('tr').querySelector('td').textContent;
905
+ } else {
906
+ console.error("Could not find description for " + el.textContent);
907
+ }
908
+ });
909
+ document.querySelectorAll("dt[id] > .adef, dt[id] > .property")
910
+ .forEach(el => {
911
+ const dt = el.parentNode;
912
+ const newDt = document.createElement("dt");
913
+ const dfn = document.createElement("dfn");
914
+ dfn.id = dt.id;
915
+ dfn.dataset.dfnType = el.classList.contains("adef") ? "element-attr" : "property";
916
+ const indexPage = el.classList.contains("adef") ? "attindex.html" : "propidx.html";
917
+ const attrDesc = document.querySelector('[data-reffy-page$="' + indexPage + '"] th a[href$="#' + dfn.id + '"]');
918
+ if (attrDesc) {
919
+ // TODO: this doesn't deal with grouping of elements, e.g. "text content elements"
920
+ dfn.dataset.dfnFor = [...attrDesc.closest('tr').querySelectorAll('span.element-name a')].map (n => n.textContent).join(',');
921
+ } else {
922
+ console.error("Could not find description for " + el.textContent + "/" + dfn.id);
923
+ }
924
+ dfn.textContent = el.textContent;
925
+ newDt.appendChild(dfn);
926
+ dt.replaceWith(newDt);
927
+ });
928
+ document.querySelectorAll('b[id^="__svg__"]').forEach(el => {
929
+ const [,, containername, membername] = el.id.split('__');
930
+ if (containername && membername) {
931
+ let container = idlTree.find(i => i.name === containername);
932
+ if (container) {
933
+ let member = container.members.find(m => m.name === membername);
934
+ if (member) {
935
+ const dfn = document.createElement("dfn");
936
+ dfn.id = el.id;
937
+ dfn.textContent = el.textContent;
938
+ dfn.dataset.dfnFor = containername;
939
+ dfn.dataset.dfnType = member.type === "operation" ? "method" : member.type;
940
+ el.replaceWith(dfn);
941
+ }
942
+ }
943
+ }
944
+ });
945
+ document.querySelectorAll('h3[id^="Interface"]:not([data-dfn-type])').forEach(el => {
946
+ const name = el.id.slice("Interface".length);
947
+ if (idlTree.find(i => i.name === name && i.type === "interface")) {
948
+ el.dataset.dfnType = "interface";
949
+ el.dataset.lt = name;
950
+ }
951
+ });
952
+ document.querySelectorAll('b[id]:not([data-dfn-type])').forEach(el => {
953
+ const name = el.textContent;
954
+ const idlItem = idlTree.find(i => i.name === name) ;
955
+ if (idlItem) {
956
+ const dfn = document.createElement("dfn");
957
+ dfn.id = el.id;
958
+ dfn.dataset.dfnType = idlItem.type;
959
+ dfn.textContent = el.textContent;
960
+ el.replaceWith(dfn);
961
+ }
962
+ });
963
+
964
+ }
965
+
966
+ /**
967
+ * The CDDL RFC defines a standard prelude with a number of CDDL types that
968
+ * other specs that define CDDL make extensive use of. To be able to link back
969
+ * to these type definitions from other specs, we need these types to appear
970
+ * in the dfns extract of the RFC somehow.
971
+ *
972
+ * Now, the RFC only defines one ID for the appendix that contains the
973
+ * standard prelude. We need to "share" that ID across all types. To avoid
974
+ * introducing definitions that have the same ID and href, which could perhaps
975
+ * confuse tools that ingest the definitions, the approach taken here is to
976
+ * create a single definition that contains all the types as linking text.
977
+ */
978
+ function preProcessRFC8610() {
979
+ // The RFC is defined as a set of pages (yuck!)
980
+ // The standard prelude is an appendix, let's look for it.
981
+ // Note: we match on text because RFC editor gets innovative once in a while
982
+ // and adds HTML comments that are hard to capture with a regexp approach.
983
+ // We also look for anchors with IDs to avoid matching the table of contents.
984
+ const prePages = [...document.querySelectorAll('pre.newpage')];
985
+ const preludeStart = /Appendix .\.\s+Standard Prelude/;
986
+ const preludeEnd = /Figure \d+: CDDL Prelude/;
987
+ const preStart = prePages
988
+ .findIndex(pre => pre.textContent.match(preludeStart) && pre.querySelector('a[id]'));
989
+ if (preStart === -1) {
990
+ // Can't find the expected prelude start text, not a good start!
991
+ return;
992
+ }
993
+ const preEnd = prePages
994
+ .findIndex((pre, idx) => idx >= preStart && pre.textContent.match(preludeEnd));
995
+ if (preEnd === -1) {
996
+ // Can't find the expected prelude ending text, not a good start!
997
+ return;
998
+ }
999
+
1000
+ // Extract the list of types defined in the appendix
1001
+ const preludeTypes = prePages.slice(preStart, preEnd + 1)
1002
+ .map(pre => [...pre.innerHTML.matchAll(/^\s+([a-z0-9\-]+) = .*$/mg)]
1003
+ .map(m => m[1])
1004
+ )
1005
+ .flat();
1006
+
1007
+ // Convert the appendix heading into a cddl-type definition that lists
1008
+ // all CDDL types.
1009
+ const el = prePages[preStart].querySelector(`a[id]`);
1010
+ const dfn = document.createElement("dfn");
1011
+ dfn.id = el.id;
1012
+ dfn.dataset.dfnType = 'cddl-type';
1013
+ dfn.dataset.lt = preludeTypes.join('|');
1014
+ dfn.dataset.export = '';
1015
+ dfn.textContent = el.textContent;
1016
+ el.replaceWith(dfn);
1017
+ }
1018
+
1019
+
1020
+ /**
1021
+ * WebGL 1.0 defines a few (~15) IDL attributes without following the
1022
+ * definitions data model. These IDL constructs have IDs that look like
1023
+ * `DOM-[interface]-[attr]`.
1024
+ *
1025
+ * The spec also defines a few IDL methods with anchors so that they can be
1026
+ * referenced. BCD and MDN typically link to these.
1027
+ *
1028
+ * Not much choice to understand what the anchors map to and create the
1029
+ * appropriate dfns, we need to extract and parse the whole IDL
1030
+ */
1031
+ function preProcessWebGL1() {
1032
+ const idl = extractWebIdl();
1033
+ const idlTree = parse(idl);
1034
+
1035
+ const attributes = [...document.querySelectorAll('.attribute-name a[id^=DOM-]')];
1036
+ for (const attribute of attributes) {
1037
+ const dfn = document.createElement('dfn');
1038
+ // Notes:
1039
+ // - The interface name appears in the ID but... name cannot be trusted
1040
+ // because it targets the concrete interface and not the underlying mixin
1041
+ // when one exists, whereas we want to create a dfn scoped to the mixin.
1042
+ // - Fortunately, no two interfaces define the same attribute in WebGL1, so
1043
+ // we can just match on the attribute name
1044
+ const attrName = attribute.textContent.trim();
1045
+ const idlItems = idlTree.filter(i => i.members?.find(m =>
1046
+ m.type === 'attribute' && m.name === attrName));
1047
+ if (idlItems.length === 0) {
1048
+ console.warn('[reffy]', `could not find attribute ${attrName}`);
1049
+ continue;
1050
+ }
1051
+ if (idlItems.length > 1) {
1052
+ console.warn('[reffy]', `more than one matching attribute found for ${attrName}`);
1053
+ continue;
1054
+ }
1055
+ dfn.id = attribute.id;
1056
+ dfn.dataset.dfnType = 'attribute';
1057
+ dfn.dataset.dfnFor = idlItems[0].name;
1058
+ dfn.textContent = attrName;
1059
+ attribute.replaceWith(dfn);
1060
+ }
1061
+
1062
+ const methods = [...document.querySelectorAll('.idl-code a[name]')];
1063
+ for (const method of methods) {
1064
+ const dfn = document.createElement('dfn');
1065
+ // Notes:
1066
+ // - The anchor also wraps possible flags and the return type
1067
+ // - The return type is best ignored: The IDL block was fixed to use
1068
+ // `undefined` but the prose still uses `void` (sigh!).
1069
+ // - The parameter names and types are after the anchor. We need to look at
1070
+ // them because some of the anchors target overloaded methods... We'll also
1071
+ // use them to create appropriate linking texts for the methods.
1072
+ // - We cannot match on parameter names for overloaded methods because the
1073
+ // spec uses *different* parameter names in the IDL block and in the prose
1074
+ // that defines the method (re-sigh!). Matching on the number of parameters is
1075
+ // enough to disambiguate between overloaded methods.
1076
+ const methodName = method.textContent.split(' ').pop();
1077
+ const methodArgs = method.parentNode.textContent
1078
+ // Note the "s" flag as parameters may be split over multiple lines
1079
+ .match(/\((.*?)\)/s)[1]
1080
+ .split(',')
1081
+ .map(arg => arg.split(' ').pop());
1082
+ const idlItem = idlTree.find(i => i.members?.find(m =>
1083
+ m.type === 'operation' &&
1084
+ m.name === methodName &&
1085
+ m.arguments.length === methodArgs.length));
1086
+ if (!idlItem) {
1087
+ console.warn('[reffy]', `could not find method ${methodName}`);
1088
+ continue;
1089
+ }
1090
+ dfn.id = method.getAttribute('name');
1091
+ dfn.dataset.dfnType = 'method';
1092
+ dfn.dataset.dfnFor = idlItem.name;
1093
+ dfn.dataset.lt = `${methodName}(${methodArgs.join(', ')})`;
1094
+ dfn.textContent = method.textContent;
1095
+ method.replaceWith(dfn);
1096
+ }
1094
1097
  }