reffy 20.0.13 → 20.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +151 -151
  3. package/index.js +29 -29
  4. package/package.json +5 -5
  5. package/reffy.js +324 -324
  6. package/schemas/browserlib/extract-algorithms.json +52 -52
  7. package/schemas/browserlib/extract-cssdfn.json +108 -108
  8. package/schemas/browserlib/extract-dfns.json +90 -90
  9. package/schemas/browserlib/extract-elements.json +17 -17
  10. package/schemas/browserlib/extract-events.json +31 -31
  11. package/schemas/browserlib/extract-headings.json +19 -19
  12. package/schemas/browserlib/extract-ids.json +7 -7
  13. package/schemas/browserlib/extract-links.json +12 -12
  14. package/schemas/browserlib/extract-refs.json +12 -12
  15. package/schemas/common.json +876 -876
  16. package/schemas/files/extracts/algorithms.json +12 -12
  17. package/schemas/files/extracts/css.json +16 -16
  18. package/schemas/files/extracts/dfns.json +12 -12
  19. package/schemas/files/extracts/elements.json +12 -12
  20. package/schemas/files/extracts/events.json +12 -12
  21. package/schemas/files/extracts/headings.json +12 -12
  22. package/schemas/files/extracts/ids.json +12 -12
  23. package/schemas/files/extracts/links.json +12 -12
  24. package/schemas/files/extracts/refs.json +12 -12
  25. package/schemas/files/index.json +59 -59
  26. package/schemas/postprocessing/events.json +50 -50
  27. package/schemas/postprocessing/idlnames-parsed.json +27 -27
  28. package/schemas/postprocessing/idlnames.json +17 -17
  29. package/schemas/postprocessing/idlparsed.json +67 -67
  30. package/src/browserlib/clone-and-clean.mjs +24 -24
  31. package/src/browserlib/create-outline.mjs +353 -353
  32. package/src/browserlib/extract-algorithms.mjs +723 -723
  33. package/src/browserlib/extract-cddl.mjs +125 -125
  34. package/src/browserlib/extract-dfns.mjs +1093 -1093
  35. package/src/browserlib/extract-headings.mjs +76 -76
  36. package/src/browserlib/extract-ids.mjs +28 -28
  37. package/src/browserlib/extract-links.mjs +45 -45
  38. package/src/browserlib/extract-references.mjs +308 -308
  39. package/src/browserlib/extract-webidl.mjs +89 -89
  40. package/src/browserlib/get-absolute-url.mjs +29 -29
  41. package/src/browserlib/get-code-elements.mjs +20 -20
  42. package/src/browserlib/get-generator.mjs +26 -26
  43. package/src/browserlib/get-lastmodified-date.mjs +13 -13
  44. package/src/browserlib/get-revision.mjs +12 -12
  45. package/src/browserlib/get-title.mjs +14 -14
  46. package/src/browserlib/informative-selector.mjs +24 -24
  47. package/src/browserlib/map-ids-to-headings.mjs +173 -173
  48. package/src/browserlib/reffy.json +85 -85
  49. package/src/browserlib/trim-spaces.mjs +35 -35
  50. package/src/cli/check-missing-dfns.js +587 -587
  51. package/src/cli/merge-crawl-results.js +132 -132
  52. package/src/cli/parse-webidl.js +447 -447
  53. package/src/lib/css-grammar-parse-tree.schema.json +109 -109
  54. package/src/lib/css-grammar-parser.js +440 -440
  55. package/src/lib/fetch.js +51 -51
  56. package/src/lib/markdown-report.js +360 -360
  57. package/src/lib/mock-server.js +218 -218
  58. package/src/lib/post-processor.js +322 -322
  59. package/src/lib/throttled-queue.js +129 -129
  60. package/src/postprocessing/annotate-links.js +41 -41
  61. package/src/postprocessing/csscomplete.js +48 -48
  62. package/src/postprocessing/idlnames.js +391 -391
  63. package/src/postprocessing/idlparsed.js +179 -179
  64. package/src/postprocessing/patch-dfns.js +51 -51
  65. package/src/specs/missing-css-rules.json +197 -197
  66. package/src/specs/spec-equivalents.json +149 -149
  67. package/src/browserlib/extract-editors.mjs~ +0 -14
  68. package/src/browserlib/extract-events.mjs~ +0 -3
  69. package/src/browserlib/generate-es-dfn-report.sh~ +0 -4
  70. package/src/browserlib/get-revision.mjs~ +0 -7
  71. package/src/cli/csstree-grammar-check.js +0 -28
  72. package/src/cli/csstree-grammar-check.js~ +0 -10
  73. package/src/cli/csstree-grammar-parser.js +0 -11
  74. package/src/cli/csstree-grammar-parser.js~ +0 -1
  75. package/src/cli/extract-editors.js~ +0 -38
  76. package/src/cli/process-specs.js~ +0 -28
  77. package/src/postprocessing/annotate-links.js~ +0 -8
  78. package/src/postprocessing/events.js~ +0 -245
@@ -1,723 +1,723 @@
1
- /**
2
- * Extract normative algorithms defined in specs.
3
- *
4
- * An algorithm extract is essentially an object with the following keys:
5
- * - `name`: The name of the algorithm, when one exists
6
- * - `href`: The URL with fragment to reach the algorithm, when one exists
7
- * - `html`: Some introductory prose for the algorithm. That prose may well
8
- * contain actual algorithmic operations, e.g.: "When invoked, run the following
9
- * steps in parallel". href/src attributes in the HTML have absolute URLs.
10
- * - `rationale`: A short string indicating the rationale for selecting the
11
- * algorithm. This property is mainly intended for helping with debugging.
12
- * Example values include ".algorithm" when the algorithm comes with an
13
- * "algorithm" class, "let" when a step was found with a related operation,
14
- * etc. Any verb in `stepOperations` may appear, as well as a few other regular
15
- * expressions (serialized as a string).
16
- * - `steps`: Atomic algorithm steps.
17
- *
18
- * Each step is essentially an object that follows the same structure as an
19
- * algorithm, except that it does not have `name`, `href` and `rationale` keys,
20
- * and may also have the following keys:
21
- * - `operation`: Gives the name of the main operation performed by the step,
22
- * for example "switch", "let", "set", "if", "return", "resolve", "reject",
23
- * "queue a task", "fire an event", etc.
24
- * - `case`: Used in switch steps to identify the switch condition that
25
- * triggers the step.
26
- * - `ignored`: Ordered lists found at the step level that do no look like
27
- * algorithm steps. Or maybe they are? The lists should get reviewed: they
28
- * usually describe inputs/outputs or conditions, but they may signal parts
29
- * where the extraction logic needs to be improved. The lists are reported as
30
- * text prose.
31
- * - `additional`: Each step should contain one and only one algorithm. When
32
- * other algorithms are found at the same level, they get reported in that
33
- * property. That usually either signals that the spec could be improved
34
- * because if fails to use different list items for different steps, and/or
35
- * that the extraction logic needs to be smarter.
36
- *
37
- * TODO: flag step operation when understood (queue a task, fire an event,
38
- * run in parallel, etc.) to ease analysis.
39
- * (the property is only set for identified "switch" constructs for now)
40
- * TODO: handle "read requests"
41
- * https://fetch.spec.whatwg.org/#incrementally-read-loop
42
- * https://w3c.github.io/webcodecs/#imagedecoder-fetch-stream-data-loop
43
- * TODO: handle "fetch" process request/response algorithms
44
- * https://wicg.github.io/background-fetch/#complete-a-record
45
- * https://wicg.github.io/nav-speculation/prefetch.html#create-navigation-params-by-fetching
46
- * TODO: support a switch without a ".switch" class
47
- * https://w3c.github.io/webcodecs/#dom-videoframe-videoframe
48
- * https://w3c.github.io/web-nfc/#dfn-map-text-to-ndef
49
- * TODO: support a switch that is not phrased as a switch
50
- * https://w3c.github.io/clipboard-apis/#to-os-specific-well-known-format
51
- * TODO: support a switch where cases don't have <dd>
52
- * https://fidoalliance.org/specs/fido-v2.1-ps-20210615/fido-client-to-authenticator-protocol-v2.1-ps-errata-20220621.html#sctn-minpinlength-extension
53
- * TODO: don't get confused by conditions that look like steps
54
- * (code reports them as "ignored", that's a good start, ignore them fully!)
55
- * https://w3c.github.io/webcodecs/#imagedecoder-decode-complete-frame
56
- * https://w3c.github.io/presentation-api/#dom-presentationrequest-start
57
- * https://w3c.github.io/clipboard-apis/#dom-clipboard-read
58
- * TODO: don't get confused by informative "algorithms"
59
- * (noting informative sections are not flagged as such in Bikeshed)
60
- * https://drafts.csswg.org/css-view-transitions-2/#lifecycle
61
- * TODO: convert branching operations to substeps when needed ("if")
62
- * https://drafts.css-houdini.org/css-layout-api-1/#construct-a-fragment-result
63
- * https://w3c.github.io/webappsec-credential-management/#dom-passwordcredential-store-slot
64
- * https://dom.spec.whatwg.org/#concept-create-element
65
- * TODO: don't get confused by intermediary notes that jeopardize steps lists
66
- * (but then, the specs need fixing!)
67
- * https://w3c.github.io/secure-payment-confirmation/#sctn-steps-to-check-if-a-payment-can-be-made
68
- * https://w3c.github.io/ServiceWorker/#on-fetch-request-algorithm
69
- * https://wicg.github.io/turtledove/#dom-navigator-createauctionnonce
70
- * TODO: convert inline operations to substeps when needed
71
- * TODO: filter out CSS algorithms that are not JS algorithms
72
- * https://drafts.fxtf.org/filter-effects/#interpolation-of-filter-functions
73
- * TODO: improve the algorithm steps detection mechanism. It's relatively easy
74
- * to miss steps.
75
- * TODO: don't skip intermediary <dl> levels and/or support "struct with keys"
76
- * https://w3c.github.io/webdriver-bidi/#parse-url-pattern
77
- * TODO: don't get confused by a switch that follows steps
78
- * https://w3c.github.io/geolocation/#dfn-acquire-a-position
79
- * TODO: support TC39 specs with <emu-alg> clauses
80
- * https://tc39.es/ecma402/
81
- * TODO: skip monkeypatching identified as such?
82
- * https://wicg.github.io/scroll-to-text-fragment/
83
- *
84
- * And then later:
85
- * TODO: extract algorithm parameters
86
- *
87
- * @function
88
- * @public
89
- * @return {Array(Object)} An Array of algorithms
90
- */
91
-
92
- import informativeSelector from './informative-selector.mjs';
93
- import getAbsoluteUrl from './get-absolute-url.mjs';
94
- import cloneAndClean from './clone-and-clean.mjs';
95
-
96
-
97
- /**
98
- * Algorithm steps typically start with verbs that define the operation to
99
- * perform.
100
- *
101
- * The following list of verbs is used to assess whether a set of steps "looks
102
- * like" a set of algorithm steps, so as to avoid extracting lists that are not
103
- * algorithms.
104
- *
105
- * The list is completed with a few branching operations that are not verbs:
106
- * "for", "if", "while".
107
- *
108
- * Using a growing list of verbs may not be a good idea. That said, it is an
109
- * instructive exercise to analyze the diversity of operations being used,
110
- * and their meaning (or lack of).
111
- *
112
- * Note some steps may start with an adverb, e.g., "Additionally",
113
- * "Optionally", "Asynchronously", or with contextualizations such as
114
- * "In step 6". These forms are not captured here. They will be captured
115
- * through the inline operations (see below) or need to be handled separately.
116
- * They will be reported in the `ignored` property otherwise.
117
- *
118
- * Note "Asynchronously", typically used in Service Workers, does not mean much
119
- * in a browsing context. It should probably rather be re-written using
120
- * "in parallel"
121
- * https://w3c.github.io/ServiceWorker/
122
- */
123
- const stepOperations = [
124
- 'abort',
125
- 'acknowledge',
126
- 'activate',
127
- 'add',
128
- 'adopt',
129
- 'advance',
130
- 'append',
131
- 'apply',
132
- 'ask',
133
- 'assert',
134
- 'assign',
135
- 'attach',
136
- 'attempt',
137
- 'batch',
138
- 'block',
139
- 'branch',
140
- 'call',
141
- 'check',
142
- 'cancel',
143
- 'cause',
144
- 'change',
145
- 'choose',
146
- 'clamp',
147
- 'clean',
148
- 'clear',
149
- 'close',
150
- 'collect',
151
- 'complete',
152
- 'compute',
153
- 'consume',
154
- 'continue',
155
- 'convert',
156
- 'copy',
157
- 'create',
158
- 'deactivate',
159
- 'decrease',
160
- 'decrement',
161
- 'decrypt',
162
- 'define',
163
- 'delete',
164
- 'dequeue',
165
- 'destroy',
166
- 'determine',
167
- 'discard',
168
- 'dismiss',
169
- 'dispatch',
170
- 'display',
171
- 'down-mix',
172
- 'do',
173
- 'dump',
174
- 'emit',
175
- 'empty',
176
- 'end',
177
- 'enqueue',
178
- 'ensure',
179
- 'error',
180
- 'establish',
181
- 'execute',
182
- 'extend',
183
- 'extract',
184
- 'fail',
185
- 'fetch',
186
- 'finalize',
187
- 'find',
188
- 'finish',
189
- 'fire',
190
- 'gather',
191
- 'generate',
192
- 'give',
193
- 'handle',
194
- 'hand-off',
195
- 'increase',
196
- 'increment',
197
- 'initialize',
198
- 'insert',
199
- 'interpret',
200
- 'invoke',
201
- 'issue',
202
- 'jump',
203
- 'let',
204
- 'load',
205
- 'make',
206
- 'mark',
207
- 'match',
208
- 'move',
209
- 'multiply',
210
- 'navigate',
211
- 'paint',
212
- 'parse',
213
- 'perform',
214
- 'place',
215
- 'pop',
216
- 'populate',
217
- 'prepare',
218
- 'prepend',
219
- 'process',
220
- 'prompt',
221
- 'push',
222
- 'query',
223
- 'queue',
224
- 'recalculate',
225
- 'rectify',
226
- 'reference',
227
- 'register',
228
- 'reinitialize',
229
- 'reject',
230
- 'release',
231
- 'remove',
232
- 'replace',
233
- 'reset',
234
- 'resolve',
235
- 'resolve',
236
- 'restore',
237
- 'render',
238
- 'remap',
239
- 'report',
240
- 'return',
241
- 'run',
242
- 'score',
243
- 'scroll',
244
- 'send',
245
- 'serialize',
246
- 'set',
247
- 'shuffle',
248
- 'skip',
249
- 'sort',
250
- 'split',
251
- 'spin',
252
- 'start',
253
- 'stop',
254
- 'store',
255
- 'strip',
256
- 'suspend',
257
- 'switch',
258
- 'take',
259
- 'terminate',
260
- 'throw',
261
- 'trap',
262
- 'try',
263
- 'undisplay',
264
- 'unset',
265
- 'up-mix',
266
- 'update',
267
- 'update',
268
- 'upgrade',
269
- 'use',
270
- 'validate',
271
- 'verify',
272
- 'visit',
273
- 'wait',
274
-
275
- 'for',
276
- 'if',
277
- 'while'
278
- ];
279
-
280
-
281
- /**
282
- * When the step does not start with a verb, or when that verb is not followed
283
- * by a white space, the following constructs help detect the actual operation.
284
- */
285
- const stepInlineOperations = [
286
- 'abort all these steps',
287
- 'abort these steps',
288
- 'fire a simple event',
289
- 'fire an event',
290
- 'in parallel',
291
- 'reject',
292
- 'resolve',
293
- 'run the following steps',
294
- 'run these steps',
295
- 'terminate these steps',
296
- /queue a( \w+)? task/i
297
- ];
298
-
299
-
300
- /**
301
- * Additional anchors that suggest algorithm steps
302
- */
303
- const stepAnchors = [
304
- /^⌛/,
305
- 'in parallel',
306
- /^otherwise(\,| )/i,
307
- ];
308
-
309
-
310
- /**
311
- * Return the normalized text content for the given DOM element, removing all
312
- * annotations
313
- */
314
- function getTextContent(el) {
315
- const clone = cloneAndClean(el);
316
- return normalize(clone.textContent);
317
- }
318
-
319
-
320
- /**
321
- * Return the normalized HTML content for the given DOM element, removing all
322
- * annotations
323
- */
324
- function getHTMLContent(el) {
325
- // Prepare mapping table to turn relative links to absolute ones
326
- // (we cannot do that once the element has been cloned because cloning
327
- // removes the element from the DOM tree)
328
- const relativeUrlSelector = '[href]:not([href^="http"]),[src]:not([src^="http"])';
329
- const relativeToAbsolute = {};
330
- const page = el.closest('[data-reffy-page]')?.getAttribute('data-reffy-page');
331
- for (const linkEl of el.querySelectorAll(relativeUrlSelector)) {
332
- const attr = linkEl.getAttribute('href') ? 'href' : 'src';
333
- const url = new URL(linkEl.getAttribute(attr), page ?? window.location.href);
334
- relativeToAbsolute[linkEl.getAttribute(attr)] = url.toString();
335
- }
336
-
337
- const clone = cloneAndClean(el);
338
- let ol;
339
- while (ol = clone.querySelector('ol')) {
340
- ol.remove();
341
- }
342
- for (const linkEl of clone.querySelectorAll(relativeUrlSelector)) {
343
- const attr = linkEl.getAttribute('href') ? 'href' : 'src';
344
- linkEl.setAttribute(attr, relativeToAbsolute[linkEl.getAttribute(attr)]);
345
- }
346
- return clone.innerHTML.trim();
347
- }
348
-
349
- /**
350
- * Normalize a text for serialization purpose
351
- */
352
- function normalize(str) {
353
- return str.replace(/\r|\n/g, ' ').replace(/\s+/g, ' ').trim();
354
- }
355
-
356
- /**
357
- * Return the name and href of the first dfn contained in the given element
358
- */
359
- function getDefinedNameIn(el) {
360
- const dfn = el.nodeName === 'DFN' ?
361
- el :
362
- el.querySelector('dfn,h2[data-dfn-type],h3[data-dfn-type],h4[data-dfn-type],h5[data-dfn-type],h6[data-dfn-type]');
363
- if (dfn) {
364
- let name = '';
365
- if (dfn.getAttribute('data-dfn-for')) {
366
- name = normalize(dfn.getAttribute('data-dfn-for').split(/,(?![^\(]*\))/)[0]) + '/';
367
- }
368
- if (dfn.getAttribute('data-lt')) {
369
- name += normalize(dfn.getAttribute('data-lt').split('|')[0]);
370
- }
371
- else {
372
- name += getTextContent(dfn);
373
- }
374
- if (dfn.id) {
375
- return { name, href: getAbsoluteUrl(dfn) };
376
- }
377
- else {
378
- // Two known exceptions to the rule:
379
- // - one due to CSS 2.1 not following the definitions data model:
380
- // https://www.w3.org/TR/CSS21/visudet.html#containing-block-details
381
- // - the other due to HTML still containing dfns without IDs as well,
382
- // including one for an algorithm:
383
- // https://html.spec.whatwg.org/multipage/server-sent-events.html#processField
384
- // It's possible to find an ID in both cases. But it's not clear that
385
- // CSS 2.1 algorithms are real algorithms; and it seems doable to fix the
386
- // HTML spec. Let's just return the name without href, not to end up
387
- // with a null `href` that the JSON schema forbids.
388
- return { name };
389
- }
390
- }
391
- else {
392
- const heading = el.querySelector('h2[id],h3[id],h4[id],h5[id],h6[id]');
393
- if (heading) {
394
- return { name: getTextContent(heading), href: getAbsoluteUrl(heading) };
395
- }
396
- }
397
- return {};
398
- }
399
-
400
-
401
- /**
402
- * Retrieve a pointer to the introductory paragraph for the algorithm, if
403
- * there's one.
404
- */
405
- function findIntroParagraph(algo) {
406
- let paragraph;
407
- let container = algo.root.closest('li,.algorithm');
408
- while (container) {
409
- const dfn = container.querySelector('dfn');
410
- if (dfn && !algo.root.contains(dfn)) {
411
- paragraph = dfn.closest('p,div,li');
412
- break;
413
- }
414
- if (container.nodeName === 'LI') {
415
- break;
416
- }
417
- container = container.parentElement.closest('li,.algorithm');
418
- }
419
-
420
- if (!paragraph) {
421
- // Consider that the introductory paragraph is the previous paragraph.
422
- // That's not going to be 100% correct. For example, we will incorrectly
423
- // capture an intermediary paragraph as in:
424
- // https://w3c.github.io/webappsec-csp/#abstract-opdef-parse-a-serialized-csp
425
- // TODO: improve!
426
- paragraph = algo.root;
427
- while (paragraph && (paragraph.nodeName !== 'P' || paragraph.matches(informativeSelector))) {
428
- paragraph = paragraph.previousElementSibling;
429
- }
430
- }
431
-
432
- return paragraph;
433
- }
434
-
435
-
436
- /**
437
- * Find information about an algorithm (name and href).
438
- *
439
- * The name is given by a nearby `dfn`. If there's no nearby `dfn`, the
440
- * name is the content of the preceding paragraph.
441
- */
442
- function getAlgorithmInfo(algo, context) {
443
- // Look for a name in the algorithm container, if there's one.
444
- // Note some specs add the "algorithm" class to the `<ol>` and to the
445
- // wrapping container, and define the name in the wrapping container.
446
- let info = {};
447
-
448
- let container = algo.root.closest('.algorithm');
449
- if (!context?.nested) {
450
- while (container) {
451
- if (container.getAttribute('data-algorithm')) {
452
- info.name = normalize(container.getAttribute('data-algorithm'));
453
- if (container.getAttribute('data-algorithm-for')) {
454
- info.name = normalize(container.getAttribute('data-algorithm-for')) +
455
- '/' + info.name;
456
- }
457
- if (container.id) {
458
- // Use the container ID as anchor
459
- info.href = getAbsoluteUrl(container);
460
- }
461
- else {
462
- // Container has no ID but if there's a dfn in there, that's probably
463
- // the right anchor
464
- const dfn = getDefinedNameIn(container);
465
- if (dfn) {
466
- info.href = dfn.href;
467
- }
468
- }
469
- }
470
- else {
471
- info = getDefinedNameIn(container);
472
- if (info.name || info.href) {
473
- break;
474
- }
475
- }
476
- container = container.parentElement.closest('.algorithm');
477
- }
478
- }
479
-
480
- // Get the introductory prose from the previous paragraph
481
- let paragraph = algo.intro;
482
- if (paragraph) {
483
- // Also look for a definition in the paragraph if we don't have a name and
484
- // href already.
485
- if (!context?.nested && !(info.name && info.href)) {
486
- info = Object.assign(getDefinedNameIn(paragraph), info);
487
- }
488
- info.html = getHTMLContent(paragraph);
489
- }
490
- else if (['LI', 'DD', 'DIV'].includes(algo.root.parentElement.nodeName)) {
491
- // If there's no paragraph, we may be in a list or definition list, the
492
- // introductory prose is whatever text exists before the algorithm
493
- const textEl = document.createElement('div');
494
- let node = algo.root.parentElement.firstChild;
495
- while (node !== algo.root) {
496
- textEl.appendChild(node.cloneNode(true));
497
- node = node.nextSibling;
498
- }
499
- if (!context?.nested && !(info.name && info.href)) {
500
- info = Object.assign(getDefinedNameIn(textEl), info);
501
- }
502
- info.html = getHTMLContent(textEl);
503
- }
504
-
505
- if (!context?.nested && !(info.name && info.href) &&
506
- algo.root.parentElement.nodeName === 'DD') {
507
- let dt = algo.root.parentElement.previousElementSibling;
508
- while (dt && dt.nodeName !== 'DT') {
509
- dt = dt.previousElementSibling;
510
- }
511
- if (dt) {
512
- info = Object.assign(getDefinedNameIn(dt), info);
513
- }
514
- }
515
-
516
- // TODO: look for the closest heading?
517
- return info;
518
- }
519
-
520
- /**
521
- * Serialize the given algorithm
522
- *
523
- * Context object allows to distinguish between top-level algorithms and
524
- * nested ones. Nested ones typically don't have names.
525
- */
526
- function serializeAlgorithm(algo, context) {
527
- let res = getAlgorithmInfo(algo, context);
528
- res.rationale = algo.rationale;
529
- const steps = serializeSteps(algo.root);
530
- if (steps.length > 0) {
531
- res.steps = steps;
532
- }
533
- return res;
534
- }
535
-
536
- /**
537
- * Serialize the given steps contained in the given root element.
538
- */
539
- function serializeSteps(root) {
540
- if (root.nodeName === 'DL') {
541
- return [
542
- {
543
- operation: 'switch',
544
- steps: [...root.querySelectorAll('& > dt')].map(option => {
545
- let dd = option.nextElementSibling;
546
- while (dd && dd.nodeName !== 'DD') {
547
- dd = dd.nextElementSibling;
548
- }
549
- if (!dd) {
550
- throw new Error('Switch option without <dd> found: ' + option.textContent);
551
- }
552
- return Object.assign(
553
- { 'case': getTextContent(option) },
554
- serializeStep(dd));
555
- })
556
- }
557
- ]
558
- }
559
- else if (root.nodeName === 'OL') {
560
- return [...root.querySelectorAll('& > li')].map(li => serializeStep(li));
561
- }
562
- else {
563
- return [];
564
- }
565
- }
566
-
567
- /**
568
- * Serialize an algorithm step
569
- */
570
- function serializeStep(li) {
571
- let res = {};
572
- const candidateAlgorithms = findAlgorithms(li, { includeIgnored: true });
573
- const algorithms = candidateAlgorithms.filter(algo => !!algo.rationale);
574
- if (algorithms.length > 0) {
575
- res = serializeAlgorithm(algorithms[0], { nested: true });
576
- }
577
- if (!res.html) {
578
- res.html = getHTMLContent(li);
579
- }
580
- if (algorithms.length > 1) {
581
- res.additional = algorithms.slice(1)
582
- .map(algo => serializeAlgorithm(algo, { nested: true }));
583
- }
584
- const ignoredAlgorithms = candidateAlgorithms.filter(algo => !algo.rationale);
585
- if (ignoredAlgorithms.length > 0) {
586
- res.ignored = ignoredAlgorithms.map(algo => getTextContent(algo.root));
587
-
588
- }
589
- return res;
590
- }
591
-
592
- /**
593
- * Parse a list element looking for algorithmic operations or other anchors
594
- * that should allow us to assess that the steps are indeed part of an
595
- * algorithm. Return a string representation of that rationale.
596
- */
597
- function findRationale(ol) {
598
- let rationale = null;
599
-
600
- if (ol.matches('.algorithm')) {
601
- return '.algorithm';
602
- }
603
- [...ol.querySelectorAll('li')].find(li => {
604
- const text = getTextContent(li).toLowerCase();
605
- rationale = stepOperations.find(op => {
606
- return text.match(new RegExp(`^${op}(\\.|:| )`, 'i'));
607
- });
608
-
609
- if (!rationale) {
610
- rationale = stepInlineOperations.find(op => {
611
- if (typeof op === 'string') {
612
- return text.includes(op);
613
- }
614
- else {
615
- return text.match(op);
616
- }
617
- });
618
- }
619
-
620
- if (!rationale) {
621
- rationale = stepAnchors.find(anchor => {
622
- if (typeof anchor === 'string') {
623
- return text.includes(anchor);
624
- }
625
- else {
626
- return text.match(anchor);
627
- }
628
- });
629
- }
630
-
631
- return !!rationale;
632
- });
633
-
634
- return rationale?.toString();
635
- }
636
-
637
-
638
- /**
639
- * Find the list of normative algorithms defined in the document's section
640
- */
641
- function findAlgorithms(section, { includeIgnored } = { includeIgnored: false }) {
642
- // Well-behaved algorithms have an "algorithm" class and start with an <ol>,
643
- // or they have a "switch" class, à la:
644
- // https://dom.spec.whatwg.org/#locate-a-namespace
645
- const actual = [...section.querySelectorAll('.algorithm,.switch')]
646
- .filter(el => !el.closest(informativeSelector))
647
- .map(el => Object.assign({
648
- rationale: el.matches('.algorithm') ? '.algorithm' : '.switch',
649
- root: el
650
- }))
651
- .map(algo => {
652
- if (algo.root.nodeName !== 'DL' && algo.root.nodeName !== 'OL') {
653
- algo.root = algo.root.querySelector('ol');
654
- }
655
- return algo;
656
- })
657
- .filter(algo => !!algo.root);
658
-
659
- // Probable algorithms do not have an "algorithm" class but start with an <ol>
660
- const probable = [...section.querySelectorAll('ol')]
661
- .filter(ol => !ol.closest(informativeSelector))
662
- .filter(ol => !ol.closest('nav,.toc,#toc'))
663
- .filter(ol => !actual.find(algo => algo.root.contains(ol)))
664
- // Find an interesting anchor in there to filter out
665
- // lists that don't look like steps
666
- .map(ol => {
667
- const rationale = findRationale(ol);
668
- return { rationale: rationale?.toString(), root: ol };
669
- })
670
- .filter(algo => includeIgnored || !!algo.rationale);
671
-
672
- // Merge actual and probable algorithms, dropping duplicates and algorithms
673
- // that are nested under other algorithms.
674
- let all = actual.concat(probable);
675
- all = all.filter((algo, idx) => all.findIndex(al => al.root === algo.root) === idx);
676
- all = all.filter(algo1 => !all.find(algo2 => algo1 !== algo2 && algo2.root.contains(algo1.root)));
677
-
678
- // Look for the "intro" paragraph for the algorithms, if there's one.
679
- // This will be used right after to extract "one-step" algorithms.
680
- for (const algo of all) {
681
- algo.intro = findIntroParagraph(algo);
682
- }
683
-
684
- // Complete the list with probable "one-step" algorithms: those defined in a
685
- // paragraph, that start with "To " followed by an exported definition of
686
- // type "dfn" or "abstract-op", and that don't have any steps (in other
687
- // words, that haven't been captured yet).
688
- const candidateDfnSelectors = [
689
- 'dfn[data-export][data-dfn-type="dfn"]',
690
- 'dfn[data-export][data-dfn-type="abstract-op"]'
691
- ];
692
- const probableOneLine = [...section.querySelectorAll(candidateDfnSelectors.map(s => `p:has(${s})`).join(','))]
693
- .filter(p => p.textContent.startsWith('To ' + p.querySelector(candidateDfnSelectors.join(',')).textContent))
694
- .filter(p => !all.find(algo => algo.intro === p))
695
- .map(p => {
696
- return { rationale: 'To <dfn>', root: p, intro: p };
697
- })
698
- all = all.concat(probableOneLine);
699
-
700
- // Consider algorithms in document order
701
- // (if we find more than one at the same level, first one will be reported as
702
- // the actual algorithm, the other ones as "additional" algorithms)
703
- all.sort((algo1, algo2) => {
704
- const cmp = algo1.root.compareDocumentPosition(algo2.root);
705
- if (cmp & Node.DOCUMENT_POSITION_PRECEDING) {
706
- return 1;
707
- }
708
- else if (algo1.root !== algo2.root) {
709
- return -1;
710
- }
711
- });
712
- return all;
713
- }
714
-
715
-
716
- export default function (spec, idToHeading = {}) {
717
- // ECMA specs typically use <emu-alg> clauses, not supported for now.
718
- if (spec.organization === 'Ecma International') {
719
- return [];
720
- }
721
- const algorithms = findAlgorithms(document);
722
- return algorithms.map(algo => serializeAlgorithm(algo));
723
- }
1
+ /**
2
+ * Extract normative algorithms defined in specs.
3
+ *
4
+ * An algorithm extract is essentially an object with the following keys:
5
+ * - `name`: The name of the algorithm, when one exists
6
+ * - `href`: The URL with fragment to reach the algorithm, when one exists
7
+ * - `html`: Some introductory prose for the algorithm. That prose may well
8
+ * contain actual algorithmic operations, e.g.: "When invoked, run the following
9
+ * steps in parallel". href/src attributes in the HTML have absolute URLs.
10
+ * - `rationale`: A short string indicating the rationale for selecting the
11
+ * algorithm. This property is mainly intended for helping with debugging.
12
+ * Example values include ".algorithm" when the algorithm comes with an
13
+ * "algorithm" class, "let" when a step was found with a related operation,
14
+ * etc. Any verb in `stepOperations` may appear, as well as a few other regular
15
+ * expressions (serialized as a string).
16
+ * - `steps`: Atomic algorithm steps.
17
+ *
18
+ * Each step is essentially an object that follows the same structure as an
19
+ * algorithm, except that it does not have `name`, `href` and `rationale` keys,
20
+ * and may also have the following keys:
21
+ * - `operation`: Gives the name of the main operation performed by the step,
22
+ * for example "switch", "let", "set", "if", "return", "resolve", "reject",
23
+ * "queue a task", "fire an event", etc.
24
+ * - `case`: Used in switch steps to identify the switch condition that
25
+ * triggers the step.
26
+ * - `ignored`: Ordered lists found at the step level that do no look like
27
+ * algorithm steps. Or maybe they are? The lists should get reviewed: they
28
+ * usually describe inputs/outputs or conditions, but they may signal parts
29
+ * where the extraction logic needs to be improved. The lists are reported as
30
+ * text prose.
31
+ * - `additional`: Each step should contain one and only one algorithm. When
32
+ * other algorithms are found at the same level, they get reported in that
33
+ * property. That usually either signals that the spec could be improved
34
+ * because if fails to use different list items for different steps, and/or
35
+ * that the extraction logic needs to be smarter.
36
+ *
37
+ * TODO: flag step operation when understood (queue a task, fire an event,
38
+ * run in parallel, etc.) to ease analysis.
39
+ * (the property is only set for identified "switch" constructs for now)
40
+ * TODO: handle "read requests"
41
+ * https://fetch.spec.whatwg.org/#incrementally-read-loop
42
+ * https://w3c.github.io/webcodecs/#imagedecoder-fetch-stream-data-loop
43
+ * TODO: handle "fetch" process request/response algorithms
44
+ * https://wicg.github.io/background-fetch/#complete-a-record
45
+ * https://wicg.github.io/nav-speculation/prefetch.html#create-navigation-params-by-fetching
46
+ * TODO: support a switch without a ".switch" class
47
+ * https://w3c.github.io/webcodecs/#dom-videoframe-videoframe
48
+ * https://w3c.github.io/web-nfc/#dfn-map-text-to-ndef
49
+ * TODO: support a switch that is not phrased as a switch
50
+ * https://w3c.github.io/clipboard-apis/#to-os-specific-well-known-format
51
+ * TODO: support a switch where cases don't have <dd>
52
+ * https://fidoalliance.org/specs/fido-v2.1-ps-20210615/fido-client-to-authenticator-protocol-v2.1-ps-errata-20220621.html#sctn-minpinlength-extension
53
+ * TODO: don't get confused by conditions that look like steps
54
+ * (code reports them as "ignored", that's a good start, ignore them fully!)
55
+ * https://w3c.github.io/webcodecs/#imagedecoder-decode-complete-frame
56
+ * https://w3c.github.io/presentation-api/#dom-presentationrequest-start
57
+ * https://w3c.github.io/clipboard-apis/#dom-clipboard-read
58
+ * TODO: don't get confused by informative "algorithms"
59
+ * (noting informative sections are not flagged as such in Bikeshed)
60
+ * https://drafts.csswg.org/css-view-transitions-2/#lifecycle
61
+ * TODO: convert branching operations to substeps when needed ("if")
62
+ * https://drafts.css-houdini.org/css-layout-api-1/#construct-a-fragment-result
63
+ * https://w3c.github.io/webappsec-credential-management/#dom-passwordcredential-store-slot
64
+ * https://dom.spec.whatwg.org/#concept-create-element
65
+ * TODO: don't get confused by intermediary notes that jeopardize steps lists
66
+ * (but then, the specs need fixing!)
67
+ * https://w3c.github.io/secure-payment-confirmation/#sctn-steps-to-check-if-a-payment-can-be-made
68
+ * https://w3c.github.io/ServiceWorker/#on-fetch-request-algorithm
69
+ * https://wicg.github.io/turtledove/#dom-navigator-createauctionnonce
70
+ * TODO: convert inline operations to substeps when needed
71
+ * TODO: filter out CSS algorithms that are not JS algorithms
72
+ * https://drafts.fxtf.org/filter-effects/#interpolation-of-filter-functions
73
+ * TODO: improve the algorithm steps detection mechanism. It's relatively easy
74
+ * to miss steps.
75
+ * TODO: don't skip intermediary <dl> levels and/or support "struct with keys"
76
+ * https://w3c.github.io/webdriver-bidi/#parse-url-pattern
77
+ * TODO: don't get confused by a switch that follows steps
78
+ * https://w3c.github.io/geolocation/#dfn-acquire-a-position
79
+ * TODO: support TC39 specs with <emu-alg> clauses
80
+ * https://tc39.es/ecma402/
81
+ * TODO: skip monkeypatching identified as such?
82
+ * https://wicg.github.io/scroll-to-text-fragment/
83
+ *
84
+ * And then later:
85
+ * TODO: extract algorithm parameters
86
+ *
87
+ * @function
88
+ * @public
89
+ * @return {Array(Object)} An Array of algorithms
90
+ */
91
+
92
+ import informativeSelector from './informative-selector.mjs';
93
+ import getAbsoluteUrl from './get-absolute-url.mjs';
94
+ import cloneAndClean from './clone-and-clean.mjs';
95
+
96
+
97
+ /**
98
+ * Algorithm steps typically start with verbs that define the operation to
99
+ * perform.
100
+ *
101
+ * The following list of verbs is used to assess whether a set of steps "looks
102
+ * like" a set of algorithm steps, so as to avoid extracting lists that are not
103
+ * algorithms.
104
+ *
105
+ * The list is completed with a few branching operations that are not verbs:
106
+ * "for", "if", "while".
107
+ *
108
+ * Using a growing list of verbs may not be a good idea. That said, it is an
109
+ * instructive exercise to analyze the diversity of operations being used,
110
+ * and their meaning (or lack of).
111
+ *
112
+ * Note some steps may start with an adverb, e.g., "Additionally",
113
+ * "Optionally", "Asynchronously", or with contextualizations such as
114
+ * "In step 6". These forms are not captured here. They will be captured
115
+ * through the inline operations (see below) or need to be handled separately.
116
+ * They will be reported in the `ignored` property otherwise.
117
+ *
118
+ * Note "Asynchronously", typically used in Service Workers, does not mean much
119
+ * in a browsing context. It should probably rather be re-written using
120
+ * "in parallel"
121
+ * https://w3c.github.io/ServiceWorker/
122
+ */
123
+ const stepOperations = [
124
+ 'abort',
125
+ 'acknowledge',
126
+ 'activate',
127
+ 'add',
128
+ 'adopt',
129
+ 'advance',
130
+ 'append',
131
+ 'apply',
132
+ 'ask',
133
+ 'assert',
134
+ 'assign',
135
+ 'attach',
136
+ 'attempt',
137
+ 'batch',
138
+ 'block',
139
+ 'branch',
140
+ 'call',
141
+ 'check',
142
+ 'cancel',
143
+ 'cause',
144
+ 'change',
145
+ 'choose',
146
+ 'clamp',
147
+ 'clean',
148
+ 'clear',
149
+ 'close',
150
+ 'collect',
151
+ 'complete',
152
+ 'compute',
153
+ 'consume',
154
+ 'continue',
155
+ 'convert',
156
+ 'copy',
157
+ 'create',
158
+ 'deactivate',
159
+ 'decrease',
160
+ 'decrement',
161
+ 'decrypt',
162
+ 'define',
163
+ 'delete',
164
+ 'dequeue',
165
+ 'destroy',
166
+ 'determine',
167
+ 'discard',
168
+ 'dismiss',
169
+ 'dispatch',
170
+ 'display',
171
+ 'down-mix',
172
+ 'do',
173
+ 'dump',
174
+ 'emit',
175
+ 'empty',
176
+ 'end',
177
+ 'enqueue',
178
+ 'ensure',
179
+ 'error',
180
+ 'establish',
181
+ 'execute',
182
+ 'extend',
183
+ 'extract',
184
+ 'fail',
185
+ 'fetch',
186
+ 'finalize',
187
+ 'find',
188
+ 'finish',
189
+ 'fire',
190
+ 'gather',
191
+ 'generate',
192
+ 'give',
193
+ 'handle',
194
+ 'hand-off',
195
+ 'increase',
196
+ 'increment',
197
+ 'initialize',
198
+ 'insert',
199
+ 'interpret',
200
+ 'invoke',
201
+ 'issue',
202
+ 'jump',
203
+ 'let',
204
+ 'load',
205
+ 'make',
206
+ 'mark',
207
+ 'match',
208
+ 'move',
209
+ 'multiply',
210
+ 'navigate',
211
+ 'paint',
212
+ 'parse',
213
+ 'perform',
214
+ 'place',
215
+ 'pop',
216
+ 'populate',
217
+ 'prepare',
218
+ 'prepend',
219
+ 'process',
220
+ 'prompt',
221
+ 'push',
222
+ 'query',
223
+ 'queue',
224
+ 'recalculate',
225
+ 'rectify',
226
+ 'reference',
227
+ 'register',
228
+ 'reinitialize',
229
+ 'reject',
230
+ 'release',
231
+ 'remove',
232
+ 'replace',
233
+ 'reset',
234
+ 'resolve',
235
+ 'resolve',
236
+ 'restore',
237
+ 'render',
238
+ 'remap',
239
+ 'report',
240
+ 'return',
241
+ 'run',
242
+ 'score',
243
+ 'scroll',
244
+ 'send',
245
+ 'serialize',
246
+ 'set',
247
+ 'shuffle',
248
+ 'skip',
249
+ 'sort',
250
+ 'split',
251
+ 'spin',
252
+ 'start',
253
+ 'stop',
254
+ 'store',
255
+ 'strip',
256
+ 'suspend',
257
+ 'switch',
258
+ 'take',
259
+ 'terminate',
260
+ 'throw',
261
+ 'trap',
262
+ 'try',
263
+ 'undisplay',
264
+ 'unset',
265
+ 'up-mix',
266
+ 'update',
267
+ 'update',
268
+ 'upgrade',
269
+ 'use',
270
+ 'validate',
271
+ 'verify',
272
+ 'visit',
273
+ 'wait',
274
+
275
+ 'for',
276
+ 'if',
277
+ 'while'
278
+ ];
279
+
280
+
281
+ /**
282
+ * When the step does not start with a verb, or when that verb is not followed
283
+ * by a white space, the following constructs help detect the actual operation.
284
+ */
285
+ const stepInlineOperations = [
286
+ 'abort all these steps',
287
+ 'abort these steps',
288
+ 'fire a simple event',
289
+ 'fire an event',
290
+ 'in parallel',
291
+ 'reject',
292
+ 'resolve',
293
+ 'run the following steps',
294
+ 'run these steps',
295
+ 'terminate these steps',
296
+ /queue a( \w+)? task/i
297
+ ];
298
+
299
+
300
+ /**
301
+ * Additional anchors that suggest algorithm steps
302
+ */
303
+ const stepAnchors = [
304
+ /^⌛/,
305
+ 'in parallel',
306
+ /^otherwise(\,| )/i,
307
+ ];
308
+
309
+
310
+ /**
311
+ * Return the normalized text content for the given DOM element, removing all
312
+ * annotations
313
+ */
314
+ function getTextContent(el) {
315
+ const clone = cloneAndClean(el);
316
+ return normalize(clone.textContent);
317
+ }
318
+
319
+
320
+ /**
321
+ * Return the normalized HTML content for the given DOM element, removing all
322
+ * annotations
323
+ */
324
+ function getHTMLContent(el) {
325
+ // Prepare mapping table to turn relative links to absolute ones
326
+ // (we cannot do that once the element has been cloned because cloning
327
+ // removes the element from the DOM tree)
328
+ const relativeUrlSelector = '[href]:not([href^="http"]),[src]:not([src^="http"])';
329
+ const relativeToAbsolute = {};
330
+ const page = el.closest('[data-reffy-page]')?.getAttribute('data-reffy-page');
331
+ for (const linkEl of el.querySelectorAll(relativeUrlSelector)) {
332
+ const attr = linkEl.getAttribute('href') ? 'href' : 'src';
333
+ const url = new URL(linkEl.getAttribute(attr), page ?? window.location.href);
334
+ relativeToAbsolute[linkEl.getAttribute(attr)] = url.toString();
335
+ }
336
+
337
+ const clone = cloneAndClean(el);
338
+ let ol;
339
+ while (ol = clone.querySelector('ol')) {
340
+ ol.remove();
341
+ }
342
+ for (const linkEl of clone.querySelectorAll(relativeUrlSelector)) {
343
+ const attr = linkEl.getAttribute('href') ? 'href' : 'src';
344
+ linkEl.setAttribute(attr, relativeToAbsolute[linkEl.getAttribute(attr)]);
345
+ }
346
+ return clone.innerHTML.trim();
347
+ }
348
+
349
+ /**
350
+ * Normalize a text for serialization purpose
351
+ */
352
+ function normalize(str) {
353
+ return str.replace(/\r|\n/g, ' ').replace(/\s+/g, ' ').trim();
354
+ }
355
+
356
+ /**
357
+ * Return the name and href of the first dfn contained in the given element
358
+ */
359
+ function getDefinedNameIn(el) {
360
+ const dfn = el.nodeName === 'DFN' ?
361
+ el :
362
+ el.querySelector('dfn,h2[data-dfn-type],h3[data-dfn-type],h4[data-dfn-type],h5[data-dfn-type],h6[data-dfn-type]');
363
+ if (dfn) {
364
+ let name = '';
365
+ if (dfn.getAttribute('data-dfn-for')) {
366
+ name = normalize(dfn.getAttribute('data-dfn-for').split(/,(?![^\(]*\))/)[0]) + '/';
367
+ }
368
+ if (dfn.getAttribute('data-lt')) {
369
+ name += normalize(dfn.getAttribute('data-lt').split('|')[0]);
370
+ }
371
+ else {
372
+ name += getTextContent(dfn);
373
+ }
374
+ if (dfn.id) {
375
+ return { name, href: getAbsoluteUrl(dfn) };
376
+ }
377
+ else {
378
+ // Two known exceptions to the rule:
379
+ // - one due to CSS 2.1 not following the definitions data model:
380
+ // https://www.w3.org/TR/CSS21/visudet.html#containing-block-details
381
+ // - the other due to HTML still containing dfns without IDs as well,
382
+ // including one for an algorithm:
383
+ // https://html.spec.whatwg.org/multipage/server-sent-events.html#processField
384
+ // It's possible to find an ID in both cases. But it's not clear that
385
+ // CSS 2.1 algorithms are real algorithms; and it seems doable to fix the
386
+ // HTML spec. Let's just return the name without href, not to end up
387
+ // with a null `href` that the JSON schema forbids.
388
+ return { name };
389
+ }
390
+ }
391
+ else {
392
+ const heading = el.querySelector('h2[id],h3[id],h4[id],h5[id],h6[id]');
393
+ if (heading) {
394
+ return { name: getTextContent(heading), href: getAbsoluteUrl(heading) };
395
+ }
396
+ }
397
+ return {};
398
+ }
399
+
400
+
401
+ /**
402
+ * Retrieve a pointer to the introductory paragraph for the algorithm, if
403
+ * there's one.
404
+ */
405
+ function findIntroParagraph(algo) {
406
+ let paragraph;
407
+ let container = algo.root.closest('li,.algorithm');
408
+ while (container) {
409
+ const dfn = container.querySelector('dfn');
410
+ if (dfn && !algo.root.contains(dfn)) {
411
+ paragraph = dfn.closest('p,div,li');
412
+ break;
413
+ }
414
+ if (container.nodeName === 'LI') {
415
+ break;
416
+ }
417
+ container = container.parentElement.closest('li,.algorithm');
418
+ }
419
+
420
+ if (!paragraph) {
421
+ // Consider that the introductory paragraph is the previous paragraph.
422
+ // That's not going to be 100% correct. For example, we will incorrectly
423
+ // capture an intermediary paragraph as in:
424
+ // https://w3c.github.io/webappsec-csp/#abstract-opdef-parse-a-serialized-csp
425
+ // TODO: improve!
426
+ paragraph = algo.root;
427
+ while (paragraph && (paragraph.nodeName !== 'P' || paragraph.matches(informativeSelector))) {
428
+ paragraph = paragraph.previousElementSibling;
429
+ }
430
+ }
431
+
432
+ return paragraph;
433
+ }
434
+
435
+
436
+ /**
437
+ * Find information about an algorithm (name and href).
438
+ *
439
+ * The name is given by a nearby `dfn`. If there's no nearby `dfn`, the
440
+ * name is the content of the preceding paragraph.
441
+ */
442
+ function getAlgorithmInfo(algo, context) {
443
+ // Look for a name in the algorithm container, if there's one.
444
+ // Note some specs add the "algorithm" class to the `<ol>` and to the
445
+ // wrapping container, and define the name in the wrapping container.
446
+ let info = {};
447
+
448
+ let container = algo.root.closest('.algorithm');
449
+ if (!context?.nested) {
450
+ while (container) {
451
+ if (container.getAttribute('data-algorithm')) {
452
+ info.name = normalize(container.getAttribute('data-algorithm'));
453
+ if (container.getAttribute('data-algorithm-for')) {
454
+ info.name = normalize(container.getAttribute('data-algorithm-for')) +
455
+ '/' + info.name;
456
+ }
457
+ if (container.id) {
458
+ // Use the container ID as anchor
459
+ info.href = getAbsoluteUrl(container);
460
+ }
461
+ else {
462
+ // Container has no ID but if there's a dfn in there, that's probably
463
+ // the right anchor
464
+ const dfn = getDefinedNameIn(container);
465
+ if (dfn) {
466
+ info.href = dfn.href;
467
+ }
468
+ }
469
+ }
470
+ else {
471
+ info = getDefinedNameIn(container);
472
+ if (info.name || info.href) {
473
+ break;
474
+ }
475
+ }
476
+ container = container.parentElement.closest('.algorithm');
477
+ }
478
+ }
479
+
480
+ // Get the introductory prose from the previous paragraph
481
+ let paragraph = algo.intro;
482
+ if (paragraph) {
483
+ // Also look for a definition in the paragraph if we don't have a name and
484
+ // href already.
485
+ if (!context?.nested && !(info.name && info.href)) {
486
+ info = Object.assign(getDefinedNameIn(paragraph), info);
487
+ }
488
+ info.html = getHTMLContent(paragraph);
489
+ }
490
+ else if (['LI', 'DD', 'DIV'].includes(algo.root.parentElement.nodeName)) {
491
+ // If there's no paragraph, we may be in a list or definition list, the
492
+ // introductory prose is whatever text exists before the algorithm
493
+ const textEl = document.createElement('div');
494
+ let node = algo.root.parentElement.firstChild;
495
+ while (node !== algo.root) {
496
+ textEl.appendChild(node.cloneNode(true));
497
+ node = node.nextSibling;
498
+ }
499
+ if (!context?.nested && !(info.name && info.href)) {
500
+ info = Object.assign(getDefinedNameIn(textEl), info);
501
+ }
502
+ info.html = getHTMLContent(textEl);
503
+ }
504
+
505
+ if (!context?.nested && !(info.name && info.href) &&
506
+ algo.root.parentElement.nodeName === 'DD') {
507
+ let dt = algo.root.parentElement.previousElementSibling;
508
+ while (dt && dt.nodeName !== 'DT') {
509
+ dt = dt.previousElementSibling;
510
+ }
511
+ if (dt) {
512
+ info = Object.assign(getDefinedNameIn(dt), info);
513
+ }
514
+ }
515
+
516
+ // TODO: look for the closest heading?
517
+ return info;
518
+ }
519
+
520
+ /**
521
+ * Serialize the given algorithm
522
+ *
523
+ * Context object allows to distinguish between top-level algorithms and
524
+ * nested ones. Nested ones typically don't have names.
525
+ */
526
+ function serializeAlgorithm(algo, context) {
527
+ let res = getAlgorithmInfo(algo, context);
528
+ res.rationale = algo.rationale;
529
+ const steps = serializeSteps(algo.root);
530
+ if (steps.length > 0) {
531
+ res.steps = steps;
532
+ }
533
+ return res;
534
+ }
535
+
536
+ /**
537
+ * Serialize the given steps contained in the given root element.
538
+ */
539
+ function serializeSteps(root) {
540
+ if (root.nodeName === 'DL') {
541
+ return [
542
+ {
543
+ operation: 'switch',
544
+ steps: [...root.querySelectorAll('& > dt')].map(option => {
545
+ let dd = option.nextElementSibling;
546
+ while (dd && dd.nodeName !== 'DD') {
547
+ dd = dd.nextElementSibling;
548
+ }
549
+ if (!dd) {
550
+ throw new Error('Switch option without <dd> found: ' + option.textContent);
551
+ }
552
+ return Object.assign(
553
+ { 'case': getTextContent(option) },
554
+ serializeStep(dd));
555
+ })
556
+ }
557
+ ]
558
+ }
559
+ else if (root.nodeName === 'OL') {
560
+ return [...root.querySelectorAll('& > li')].map(li => serializeStep(li));
561
+ }
562
+ else {
563
+ return [];
564
+ }
565
+ }
566
+
567
+ /**
568
+ * Serialize an algorithm step
569
+ */
570
+ function serializeStep(li) {
571
+ let res = {};
572
+ const candidateAlgorithms = findAlgorithms(li, { includeIgnored: true });
573
+ const algorithms = candidateAlgorithms.filter(algo => !!algo.rationale);
574
+ if (algorithms.length > 0) {
575
+ res = serializeAlgorithm(algorithms[0], { nested: true });
576
+ }
577
+ if (!res.html) {
578
+ res.html = getHTMLContent(li);
579
+ }
580
+ if (algorithms.length > 1) {
581
+ res.additional = algorithms.slice(1)
582
+ .map(algo => serializeAlgorithm(algo, { nested: true }));
583
+ }
584
+ const ignoredAlgorithms = candidateAlgorithms.filter(algo => !algo.rationale);
585
+ if (ignoredAlgorithms.length > 0) {
586
+ res.ignored = ignoredAlgorithms.map(algo => getTextContent(algo.root));
587
+
588
+ }
589
+ return res;
590
+ }
591
+
592
+ /**
593
+ * Parse a list element looking for algorithmic operations or other anchors
594
+ * that should allow us to assess that the steps are indeed part of an
595
+ * algorithm. Return a string representation of that rationale.
596
+ */
597
+ function findRationale(ol) {
598
+ let rationale = null;
599
+
600
+ if (ol.matches('.algorithm')) {
601
+ return '.algorithm';
602
+ }
603
+ [...ol.querySelectorAll('li')].find(li => {
604
+ const text = getTextContent(li).toLowerCase();
605
+ rationale = stepOperations.find(op => {
606
+ return text.match(new RegExp(`^${op}(\\.|:| )`, 'i'));
607
+ });
608
+
609
+ if (!rationale) {
610
+ rationale = stepInlineOperations.find(op => {
611
+ if (typeof op === 'string') {
612
+ return text.includes(op);
613
+ }
614
+ else {
615
+ return text.match(op);
616
+ }
617
+ });
618
+ }
619
+
620
+ if (!rationale) {
621
+ rationale = stepAnchors.find(anchor => {
622
+ if (typeof anchor === 'string') {
623
+ return text.includes(anchor);
624
+ }
625
+ else {
626
+ return text.match(anchor);
627
+ }
628
+ });
629
+ }
630
+
631
+ return !!rationale;
632
+ });
633
+
634
+ return rationale?.toString();
635
+ }
636
+
637
+
638
+ /**
639
+ * Find the list of normative algorithms defined in the document's section
640
+ */
641
+ function findAlgorithms(section, { includeIgnored } = { includeIgnored: false }) {
642
+ // Well-behaved algorithms have an "algorithm" class and start with an <ol>,
643
+ // or they have a "switch" class, à la:
644
+ // https://dom.spec.whatwg.org/#locate-a-namespace
645
+ const actual = [...section.querySelectorAll('.algorithm,.switch')]
646
+ .filter(el => !el.closest(informativeSelector))
647
+ .map(el => Object.assign({
648
+ rationale: el.matches('.algorithm') ? '.algorithm' : '.switch',
649
+ root: el
650
+ }))
651
+ .map(algo => {
652
+ if (algo.root.nodeName !== 'DL' && algo.root.nodeName !== 'OL') {
653
+ algo.root = algo.root.querySelector('ol');
654
+ }
655
+ return algo;
656
+ })
657
+ .filter(algo => !!algo.root);
658
+
659
+ // Probable algorithms do not have an "algorithm" class but start with an <ol>
660
+ const probable = [...section.querySelectorAll('ol')]
661
+ .filter(ol => !ol.closest(informativeSelector))
662
+ .filter(ol => !ol.closest('nav,.toc,#toc'))
663
+ .filter(ol => !actual.find(algo => algo.root.contains(ol)))
664
+ // Find an interesting anchor in there to filter out
665
+ // lists that don't look like steps
666
+ .map(ol => {
667
+ const rationale = findRationale(ol);
668
+ return { rationale: rationale?.toString(), root: ol };
669
+ })
670
+ .filter(algo => includeIgnored || !!algo.rationale);
671
+
672
+ // Merge actual and probable algorithms, dropping duplicates and algorithms
673
+ // that are nested under other algorithms.
674
+ let all = actual.concat(probable);
675
+ all = all.filter((algo, idx) => all.findIndex(al => al.root === algo.root) === idx);
676
+ all = all.filter(algo1 => !all.find(algo2 => algo1 !== algo2 && algo2.root.contains(algo1.root)));
677
+
678
+ // Look for the "intro" paragraph for the algorithms, if there's one.
679
+ // This will be used right after to extract "one-step" algorithms.
680
+ for (const algo of all) {
681
+ algo.intro = findIntroParagraph(algo);
682
+ }
683
+
684
+ // Complete the list with probable "one-step" algorithms: those defined in a
685
+ // paragraph, that start with "To " followed by an exported definition of
686
+ // type "dfn" or "abstract-op", and that don't have any steps (in other
687
+ // words, that haven't been captured yet).
688
+ const candidateDfnSelectors = [
689
+ 'dfn[data-export][data-dfn-type="dfn"]',
690
+ 'dfn[data-export][data-dfn-type="abstract-op"]'
691
+ ];
692
+ const probableOneLine = [...section.querySelectorAll(candidateDfnSelectors.map(s => `p:has(${s})`).join(','))]
693
+ .filter(p => p.textContent.startsWith('To ' + p.querySelector(candidateDfnSelectors.join(',')).textContent))
694
+ .filter(p => !all.find(algo => algo.intro === p))
695
+ .map(p => {
696
+ return { rationale: 'To <dfn>', root: p, intro: p };
697
+ })
698
+ all = all.concat(probableOneLine);
699
+
700
+ // Consider algorithms in document order
701
+ // (if we find more than one at the same level, first one will be reported as
702
+ // the actual algorithm, the other ones as "additional" algorithms)
703
+ all.sort((algo1, algo2) => {
704
+ const cmp = algo1.root.compareDocumentPosition(algo2.root);
705
+ if (cmp & Node.DOCUMENT_POSITION_PRECEDING) {
706
+ return 1;
707
+ }
708
+ else if (algo1.root !== algo2.root) {
709
+ return -1;
710
+ }
711
+ });
712
+ return all;
713
+ }
714
+
715
+
716
+ export default function (spec, idToHeading = {}) {
717
+ // ECMA specs typically use <emu-alg> clauses, not supported for now.
718
+ if (spec.organization === 'Ecma International') {
719
+ return [];
720
+ }
721
+ const algorithms = findAlgorithms(document);
722
+ return algorithms.map(algo => serializeAlgorithm(algo));
723
+ }