reffy 20.0.13 → 20.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +151 -151
- package/index.js +29 -29
- package/package.json +5 -5
- package/reffy.js +324 -324
- package/schemas/browserlib/extract-algorithms.json +52 -52
- package/schemas/browserlib/extract-cssdfn.json +108 -108
- package/schemas/browserlib/extract-dfns.json +90 -90
- package/schemas/browserlib/extract-elements.json +17 -17
- package/schemas/browserlib/extract-events.json +31 -31
- package/schemas/browserlib/extract-headings.json +19 -19
- package/schemas/browserlib/extract-ids.json +7 -7
- package/schemas/browserlib/extract-links.json +12 -12
- package/schemas/browserlib/extract-refs.json +12 -12
- package/schemas/common.json +876 -876
- package/schemas/files/extracts/algorithms.json +12 -12
- package/schemas/files/extracts/css.json +16 -16
- package/schemas/files/extracts/dfns.json +12 -12
- package/schemas/files/extracts/elements.json +12 -12
- package/schemas/files/extracts/events.json +12 -12
- package/schemas/files/extracts/headings.json +12 -12
- package/schemas/files/extracts/ids.json +12 -12
- package/schemas/files/extracts/links.json +12 -12
- package/schemas/files/extracts/refs.json +12 -12
- package/schemas/files/index.json +59 -59
- package/schemas/postprocessing/events.json +50 -50
- package/schemas/postprocessing/idlnames-parsed.json +27 -27
- package/schemas/postprocessing/idlnames.json +17 -17
- package/schemas/postprocessing/idlparsed.json +67 -67
- package/src/browserlib/clone-and-clean.mjs +24 -24
- package/src/browserlib/create-outline.mjs +353 -353
- package/src/browserlib/extract-algorithms.mjs +723 -723
- package/src/browserlib/extract-cddl.mjs +125 -125
- package/src/browserlib/extract-dfns.mjs +1093 -1093
- package/src/browserlib/extract-headings.mjs +76 -76
- package/src/browserlib/extract-ids.mjs +28 -28
- package/src/browserlib/extract-links.mjs +45 -45
- package/src/browserlib/extract-references.mjs +308 -308
- package/src/browserlib/extract-webidl.mjs +89 -89
- package/src/browserlib/get-absolute-url.mjs +29 -29
- package/src/browserlib/get-code-elements.mjs +20 -20
- package/src/browserlib/get-generator.mjs +26 -26
- package/src/browserlib/get-lastmodified-date.mjs +13 -13
- package/src/browserlib/get-revision.mjs +12 -12
- package/src/browserlib/get-title.mjs +14 -14
- package/src/browserlib/informative-selector.mjs +24 -24
- package/src/browserlib/map-ids-to-headings.mjs +173 -173
- package/src/browserlib/reffy.json +85 -85
- package/src/browserlib/trim-spaces.mjs +35 -35
- package/src/cli/check-missing-dfns.js +587 -587
- package/src/cli/merge-crawl-results.js +132 -132
- package/src/cli/parse-webidl.js +447 -447
- package/src/lib/css-grammar-parse-tree.schema.json +109 -109
- package/src/lib/css-grammar-parser.js +440 -440
- package/src/lib/fetch.js +51 -51
- package/src/lib/markdown-report.js +360 -360
- package/src/lib/mock-server.js +218 -218
- package/src/lib/post-processor.js +322 -322
- package/src/lib/throttled-queue.js +129 -129
- package/src/postprocessing/annotate-links.js +41 -41
- package/src/postprocessing/csscomplete.js +48 -48
- package/src/postprocessing/idlnames.js +391 -391
- package/src/postprocessing/idlparsed.js +179 -179
- package/src/postprocessing/patch-dfns.js +51 -51
- package/src/specs/missing-css-rules.json +197 -197
- package/src/specs/spec-equivalents.json +149 -149
- package/src/browserlib/extract-editors.mjs~ +0 -14
- package/src/browserlib/extract-events.mjs~ +0 -3
- package/src/browserlib/generate-es-dfn-report.sh~ +0 -4
- package/src/browserlib/get-revision.mjs~ +0 -7
- package/src/cli/csstree-grammar-check.js +0 -28
- package/src/cli/csstree-grammar-check.js~ +0 -10
- package/src/cli/csstree-grammar-parser.js +0 -11
- package/src/cli/csstree-grammar-parser.js~ +0 -1
- package/src/cli/extract-editors.js~ +0 -38
- package/src/cli/process-specs.js~ +0 -28
- package/src/postprocessing/annotate-links.js~ +0 -8
- package/src/postprocessing/events.js~ +0 -245
|
@@ -1,723 +1,723 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Extract normative algorithms defined in specs.
|
|
3
|
-
*
|
|
4
|
-
* An algorithm extract is essentially an object with the following keys:
|
|
5
|
-
* - `name`: The name of the algorithm, when one exists
|
|
6
|
-
* - `href`: The URL with fragment to reach the algorithm, when one exists
|
|
7
|
-
* - `html`: Some introductory prose for the algorithm. That prose may well
|
|
8
|
-
* contain actual algorithmic operations, e.g.: "When invoked, run the following
|
|
9
|
-
* steps in parallel". href/src attributes in the HTML have absolute URLs.
|
|
10
|
-
* - `rationale`: A short string indicating the rationale for selecting the
|
|
11
|
-
* algorithm. This property is mainly intended for helping with debugging.
|
|
12
|
-
* Example values include ".algorithm" when the algorithm comes with an
|
|
13
|
-
* "algorithm" class, "let" when a step was found with a related operation,
|
|
14
|
-
* etc. Any verb in `stepOperations` may appear, as well as a few other regular
|
|
15
|
-
* expressions (serialized as a string).
|
|
16
|
-
* - `steps`: Atomic algorithm steps.
|
|
17
|
-
*
|
|
18
|
-
* Each step is essentially an object that follows the same structure as an
|
|
19
|
-
* algorithm, except that it does not have `name`, `href` and `rationale` keys,
|
|
20
|
-
* and may also have the following keys:
|
|
21
|
-
* - `operation`: Gives the name of the main operation performed by the step,
|
|
22
|
-
* for example "switch", "let", "set", "if", "return", "resolve", "reject",
|
|
23
|
-
* "queue a task", "fire an event", etc.
|
|
24
|
-
* - `case`: Used in switch steps to identify the switch condition that
|
|
25
|
-
* triggers the step.
|
|
26
|
-
* - `ignored`: Ordered lists found at the step level that do no look like
|
|
27
|
-
* algorithm steps. Or maybe they are? The lists should get reviewed: they
|
|
28
|
-
* usually describe inputs/outputs or conditions, but they may signal parts
|
|
29
|
-
* where the extraction logic needs to be improved. The lists are reported as
|
|
30
|
-
* text prose.
|
|
31
|
-
* - `additional`: Each step should contain one and only one algorithm. When
|
|
32
|
-
* other algorithms are found at the same level, they get reported in that
|
|
33
|
-
* property. That usually either signals that the spec could be improved
|
|
34
|
-
* because if fails to use different list items for different steps, and/or
|
|
35
|
-
* that the extraction logic needs to be smarter.
|
|
36
|
-
*
|
|
37
|
-
* TODO: flag step operation when understood (queue a task, fire an event,
|
|
38
|
-
* run in parallel, etc.) to ease analysis.
|
|
39
|
-
* (the property is only set for identified "switch" constructs for now)
|
|
40
|
-
* TODO: handle "read requests"
|
|
41
|
-
* https://fetch.spec.whatwg.org/#incrementally-read-loop
|
|
42
|
-
* https://w3c.github.io/webcodecs/#imagedecoder-fetch-stream-data-loop
|
|
43
|
-
* TODO: handle "fetch" process request/response algorithms
|
|
44
|
-
* https://wicg.github.io/background-fetch/#complete-a-record
|
|
45
|
-
* https://wicg.github.io/nav-speculation/prefetch.html#create-navigation-params-by-fetching
|
|
46
|
-
* TODO: support a switch without a ".switch" class
|
|
47
|
-
* https://w3c.github.io/webcodecs/#dom-videoframe-videoframe
|
|
48
|
-
* https://w3c.github.io/web-nfc/#dfn-map-text-to-ndef
|
|
49
|
-
* TODO: support a switch that is not phrased as a switch
|
|
50
|
-
* https://w3c.github.io/clipboard-apis/#to-os-specific-well-known-format
|
|
51
|
-
* TODO: support a switch where cases don't have <dd>
|
|
52
|
-
* https://fidoalliance.org/specs/fido-v2.1-ps-20210615/fido-client-to-authenticator-protocol-v2.1-ps-errata-20220621.html#sctn-minpinlength-extension
|
|
53
|
-
* TODO: don't get confused by conditions that look like steps
|
|
54
|
-
* (code reports them as "ignored", that's a good start, ignore them fully!)
|
|
55
|
-
* https://w3c.github.io/webcodecs/#imagedecoder-decode-complete-frame
|
|
56
|
-
* https://w3c.github.io/presentation-api/#dom-presentationrequest-start
|
|
57
|
-
* https://w3c.github.io/clipboard-apis/#dom-clipboard-read
|
|
58
|
-
* TODO: don't get confused by informative "algorithms"
|
|
59
|
-
* (noting informative sections are not flagged as such in Bikeshed)
|
|
60
|
-
* https://drafts.csswg.org/css-view-transitions-2/#lifecycle
|
|
61
|
-
* TODO: convert branching operations to substeps when needed ("if")
|
|
62
|
-
* https://drafts.css-houdini.org/css-layout-api-1/#construct-a-fragment-result
|
|
63
|
-
* https://w3c.github.io/webappsec-credential-management/#dom-passwordcredential-store-slot
|
|
64
|
-
* https://dom.spec.whatwg.org/#concept-create-element
|
|
65
|
-
* TODO: don't get confused by intermediary notes that jeopardize steps lists
|
|
66
|
-
* (but then, the specs need fixing!)
|
|
67
|
-
* https://w3c.github.io/secure-payment-confirmation/#sctn-steps-to-check-if-a-payment-can-be-made
|
|
68
|
-
* https://w3c.github.io/ServiceWorker/#on-fetch-request-algorithm
|
|
69
|
-
* https://wicg.github.io/turtledove/#dom-navigator-createauctionnonce
|
|
70
|
-
* TODO: convert inline operations to substeps when needed
|
|
71
|
-
* TODO: filter out CSS algorithms that are not JS algorithms
|
|
72
|
-
* https://drafts.fxtf.org/filter-effects/#interpolation-of-filter-functions
|
|
73
|
-
* TODO: improve the algorithm steps detection mechanism. It's relatively easy
|
|
74
|
-
* to miss steps.
|
|
75
|
-
* TODO: don't skip intermediary <dl> levels and/or support "struct with keys"
|
|
76
|
-
* https://w3c.github.io/webdriver-bidi/#parse-url-pattern
|
|
77
|
-
* TODO: don't get confused by a switch that follows steps
|
|
78
|
-
* https://w3c.github.io/geolocation/#dfn-acquire-a-position
|
|
79
|
-
* TODO: support TC39 specs with <emu-alg> clauses
|
|
80
|
-
* https://tc39.es/ecma402/
|
|
81
|
-
* TODO: skip monkeypatching identified as such?
|
|
82
|
-
* https://wicg.github.io/scroll-to-text-fragment/
|
|
83
|
-
*
|
|
84
|
-
* And then later:
|
|
85
|
-
* TODO: extract algorithm parameters
|
|
86
|
-
*
|
|
87
|
-
* @function
|
|
88
|
-
* @public
|
|
89
|
-
* @return {Array(Object)} An Array of algorithms
|
|
90
|
-
*/
|
|
91
|
-
|
|
92
|
-
import informativeSelector from './informative-selector.mjs';
|
|
93
|
-
import getAbsoluteUrl from './get-absolute-url.mjs';
|
|
94
|
-
import cloneAndClean from './clone-and-clean.mjs';
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
/**
|
|
98
|
-
* Algorithm steps typically start with verbs that define the operation to
|
|
99
|
-
* perform.
|
|
100
|
-
*
|
|
101
|
-
* The following list of verbs is used to assess whether a set of steps "looks
|
|
102
|
-
* like" a set of algorithm steps, so as to avoid extracting lists that are not
|
|
103
|
-
* algorithms.
|
|
104
|
-
*
|
|
105
|
-
* The list is completed with a few branching operations that are not verbs:
|
|
106
|
-
* "for", "if", "while".
|
|
107
|
-
*
|
|
108
|
-
* Using a growing list of verbs may not be a good idea. That said, it is an
|
|
109
|
-
* instructive exercise to analyze the diversity of operations being used,
|
|
110
|
-
* and their meaning (or lack of).
|
|
111
|
-
*
|
|
112
|
-
* Note some steps may start with an adverb, e.g., "Additionally",
|
|
113
|
-
* "Optionally", "Asynchronously", or with contextualizations such as
|
|
114
|
-
* "In step 6". These forms are not captured here. They will be captured
|
|
115
|
-
* through the inline operations (see below) or need to be handled separately.
|
|
116
|
-
* They will be reported in the `ignored` property otherwise.
|
|
117
|
-
*
|
|
118
|
-
* Note "Asynchronously", typically used in Service Workers, does not mean much
|
|
119
|
-
* in a browsing context. It should probably rather be re-written using
|
|
120
|
-
* "in parallel"
|
|
121
|
-
* https://w3c.github.io/ServiceWorker/
|
|
122
|
-
*/
|
|
123
|
-
const stepOperations = [
|
|
124
|
-
'abort',
|
|
125
|
-
'acknowledge',
|
|
126
|
-
'activate',
|
|
127
|
-
'add',
|
|
128
|
-
'adopt',
|
|
129
|
-
'advance',
|
|
130
|
-
'append',
|
|
131
|
-
'apply',
|
|
132
|
-
'ask',
|
|
133
|
-
'assert',
|
|
134
|
-
'assign',
|
|
135
|
-
'attach',
|
|
136
|
-
'attempt',
|
|
137
|
-
'batch',
|
|
138
|
-
'block',
|
|
139
|
-
'branch',
|
|
140
|
-
'call',
|
|
141
|
-
'check',
|
|
142
|
-
'cancel',
|
|
143
|
-
'cause',
|
|
144
|
-
'change',
|
|
145
|
-
'choose',
|
|
146
|
-
'clamp',
|
|
147
|
-
'clean',
|
|
148
|
-
'clear',
|
|
149
|
-
'close',
|
|
150
|
-
'collect',
|
|
151
|
-
'complete',
|
|
152
|
-
'compute',
|
|
153
|
-
'consume',
|
|
154
|
-
'continue',
|
|
155
|
-
'convert',
|
|
156
|
-
'copy',
|
|
157
|
-
'create',
|
|
158
|
-
'deactivate',
|
|
159
|
-
'decrease',
|
|
160
|
-
'decrement',
|
|
161
|
-
'decrypt',
|
|
162
|
-
'define',
|
|
163
|
-
'delete',
|
|
164
|
-
'dequeue',
|
|
165
|
-
'destroy',
|
|
166
|
-
'determine',
|
|
167
|
-
'discard',
|
|
168
|
-
'dismiss',
|
|
169
|
-
'dispatch',
|
|
170
|
-
'display',
|
|
171
|
-
'down-mix',
|
|
172
|
-
'do',
|
|
173
|
-
'dump',
|
|
174
|
-
'emit',
|
|
175
|
-
'empty',
|
|
176
|
-
'end',
|
|
177
|
-
'enqueue',
|
|
178
|
-
'ensure',
|
|
179
|
-
'error',
|
|
180
|
-
'establish',
|
|
181
|
-
'execute',
|
|
182
|
-
'extend',
|
|
183
|
-
'extract',
|
|
184
|
-
'fail',
|
|
185
|
-
'fetch',
|
|
186
|
-
'finalize',
|
|
187
|
-
'find',
|
|
188
|
-
'finish',
|
|
189
|
-
'fire',
|
|
190
|
-
'gather',
|
|
191
|
-
'generate',
|
|
192
|
-
'give',
|
|
193
|
-
'handle',
|
|
194
|
-
'hand-off',
|
|
195
|
-
'increase',
|
|
196
|
-
'increment',
|
|
197
|
-
'initialize',
|
|
198
|
-
'insert',
|
|
199
|
-
'interpret',
|
|
200
|
-
'invoke',
|
|
201
|
-
'issue',
|
|
202
|
-
'jump',
|
|
203
|
-
'let',
|
|
204
|
-
'load',
|
|
205
|
-
'make',
|
|
206
|
-
'mark',
|
|
207
|
-
'match',
|
|
208
|
-
'move',
|
|
209
|
-
'multiply',
|
|
210
|
-
'navigate',
|
|
211
|
-
'paint',
|
|
212
|
-
'parse',
|
|
213
|
-
'perform',
|
|
214
|
-
'place',
|
|
215
|
-
'pop',
|
|
216
|
-
'populate',
|
|
217
|
-
'prepare',
|
|
218
|
-
'prepend',
|
|
219
|
-
'process',
|
|
220
|
-
'prompt',
|
|
221
|
-
'push',
|
|
222
|
-
'query',
|
|
223
|
-
'queue',
|
|
224
|
-
'recalculate',
|
|
225
|
-
'rectify',
|
|
226
|
-
'reference',
|
|
227
|
-
'register',
|
|
228
|
-
'reinitialize',
|
|
229
|
-
'reject',
|
|
230
|
-
'release',
|
|
231
|
-
'remove',
|
|
232
|
-
'replace',
|
|
233
|
-
'reset',
|
|
234
|
-
'resolve',
|
|
235
|
-
'resolve',
|
|
236
|
-
'restore',
|
|
237
|
-
'render',
|
|
238
|
-
'remap',
|
|
239
|
-
'report',
|
|
240
|
-
'return',
|
|
241
|
-
'run',
|
|
242
|
-
'score',
|
|
243
|
-
'scroll',
|
|
244
|
-
'send',
|
|
245
|
-
'serialize',
|
|
246
|
-
'set',
|
|
247
|
-
'shuffle',
|
|
248
|
-
'skip',
|
|
249
|
-
'sort',
|
|
250
|
-
'split',
|
|
251
|
-
'spin',
|
|
252
|
-
'start',
|
|
253
|
-
'stop',
|
|
254
|
-
'store',
|
|
255
|
-
'strip',
|
|
256
|
-
'suspend',
|
|
257
|
-
'switch',
|
|
258
|
-
'take',
|
|
259
|
-
'terminate',
|
|
260
|
-
'throw',
|
|
261
|
-
'trap',
|
|
262
|
-
'try',
|
|
263
|
-
'undisplay',
|
|
264
|
-
'unset',
|
|
265
|
-
'up-mix',
|
|
266
|
-
'update',
|
|
267
|
-
'update',
|
|
268
|
-
'upgrade',
|
|
269
|
-
'use',
|
|
270
|
-
'validate',
|
|
271
|
-
'verify',
|
|
272
|
-
'visit',
|
|
273
|
-
'wait',
|
|
274
|
-
|
|
275
|
-
'for',
|
|
276
|
-
'if',
|
|
277
|
-
'while'
|
|
278
|
-
];
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
/**
|
|
282
|
-
* When the step does not start with a verb, or when that verb is not followed
|
|
283
|
-
* by a white space, the following constructs help detect the actual operation.
|
|
284
|
-
*/
|
|
285
|
-
const stepInlineOperations = [
|
|
286
|
-
'abort all these steps',
|
|
287
|
-
'abort these steps',
|
|
288
|
-
'fire a simple event',
|
|
289
|
-
'fire an event',
|
|
290
|
-
'in parallel',
|
|
291
|
-
'reject',
|
|
292
|
-
'resolve',
|
|
293
|
-
'run the following steps',
|
|
294
|
-
'run these steps',
|
|
295
|
-
'terminate these steps',
|
|
296
|
-
/queue a( \w+)? task/i
|
|
297
|
-
];
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
/**
|
|
301
|
-
* Additional anchors that suggest algorithm steps
|
|
302
|
-
*/
|
|
303
|
-
const stepAnchors = [
|
|
304
|
-
/^⌛/,
|
|
305
|
-
'in parallel',
|
|
306
|
-
/^otherwise(\,| )/i,
|
|
307
|
-
];
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
/**
|
|
311
|
-
* Return the normalized text content for the given DOM element, removing all
|
|
312
|
-
* annotations
|
|
313
|
-
*/
|
|
314
|
-
function getTextContent(el) {
|
|
315
|
-
const clone = cloneAndClean(el);
|
|
316
|
-
return normalize(clone.textContent);
|
|
317
|
-
}
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
/**
|
|
321
|
-
* Return the normalized HTML content for the given DOM element, removing all
|
|
322
|
-
* annotations
|
|
323
|
-
*/
|
|
324
|
-
function getHTMLContent(el) {
|
|
325
|
-
// Prepare mapping table to turn relative links to absolute ones
|
|
326
|
-
// (we cannot do that once the element has been cloned because cloning
|
|
327
|
-
// removes the element from the DOM tree)
|
|
328
|
-
const relativeUrlSelector = '[href]:not([href^="http"]),[src]:not([src^="http"])';
|
|
329
|
-
const relativeToAbsolute = {};
|
|
330
|
-
const page = el.closest('[data-reffy-page]')?.getAttribute('data-reffy-page');
|
|
331
|
-
for (const linkEl of el.querySelectorAll(relativeUrlSelector)) {
|
|
332
|
-
const attr = linkEl.getAttribute('href') ? 'href' : 'src';
|
|
333
|
-
const url = new URL(linkEl.getAttribute(attr), page ?? window.location.href);
|
|
334
|
-
relativeToAbsolute[linkEl.getAttribute(attr)] = url.toString();
|
|
335
|
-
}
|
|
336
|
-
|
|
337
|
-
const clone = cloneAndClean(el);
|
|
338
|
-
let ol;
|
|
339
|
-
while (ol = clone.querySelector('ol')) {
|
|
340
|
-
ol.remove();
|
|
341
|
-
}
|
|
342
|
-
for (const linkEl of clone.querySelectorAll(relativeUrlSelector)) {
|
|
343
|
-
const attr = linkEl.getAttribute('href') ? 'href' : 'src';
|
|
344
|
-
linkEl.setAttribute(attr, relativeToAbsolute[linkEl.getAttribute(attr)]);
|
|
345
|
-
}
|
|
346
|
-
return clone.innerHTML.trim();
|
|
347
|
-
}
|
|
348
|
-
|
|
349
|
-
/**
|
|
350
|
-
* Normalize a text for serialization purpose
|
|
351
|
-
*/
|
|
352
|
-
function normalize(str) {
|
|
353
|
-
return str.replace(/\r|\n/g, ' ').replace(/\s+/g, ' ').trim();
|
|
354
|
-
}
|
|
355
|
-
|
|
356
|
-
/**
|
|
357
|
-
* Return the name and href of the first dfn contained in the given element
|
|
358
|
-
*/
|
|
359
|
-
function getDefinedNameIn(el) {
|
|
360
|
-
const dfn = el.nodeName === 'DFN' ?
|
|
361
|
-
el :
|
|
362
|
-
el.querySelector('dfn,h2[data-dfn-type],h3[data-dfn-type],h4[data-dfn-type],h5[data-dfn-type],h6[data-dfn-type]');
|
|
363
|
-
if (dfn) {
|
|
364
|
-
let name = '';
|
|
365
|
-
if (dfn.getAttribute('data-dfn-for')) {
|
|
366
|
-
name = normalize(dfn.getAttribute('data-dfn-for').split(/,(?![^\(]*\))/)[0]) + '/';
|
|
367
|
-
}
|
|
368
|
-
if (dfn.getAttribute('data-lt')) {
|
|
369
|
-
name += normalize(dfn.getAttribute('data-lt').split('|')[0]);
|
|
370
|
-
}
|
|
371
|
-
else {
|
|
372
|
-
name += getTextContent(dfn);
|
|
373
|
-
}
|
|
374
|
-
if (dfn.id) {
|
|
375
|
-
return { name, href: getAbsoluteUrl(dfn) };
|
|
376
|
-
}
|
|
377
|
-
else {
|
|
378
|
-
// Two known exceptions to the rule:
|
|
379
|
-
// - one due to CSS 2.1 not following the definitions data model:
|
|
380
|
-
// https://www.w3.org/TR/CSS21/visudet.html#containing-block-details
|
|
381
|
-
// - the other due to HTML still containing dfns without IDs as well,
|
|
382
|
-
// including one for an algorithm:
|
|
383
|
-
// https://html.spec.whatwg.org/multipage/server-sent-events.html#processField
|
|
384
|
-
// It's possible to find an ID in both cases. But it's not clear that
|
|
385
|
-
// CSS 2.1 algorithms are real algorithms; and it seems doable to fix the
|
|
386
|
-
// HTML spec. Let's just return the name without href, not to end up
|
|
387
|
-
// with a null `href` that the JSON schema forbids.
|
|
388
|
-
return { name };
|
|
389
|
-
}
|
|
390
|
-
}
|
|
391
|
-
else {
|
|
392
|
-
const heading = el.querySelector('h2[id],h3[id],h4[id],h5[id],h6[id]');
|
|
393
|
-
if (heading) {
|
|
394
|
-
return { name: getTextContent(heading), href: getAbsoluteUrl(heading) };
|
|
395
|
-
}
|
|
396
|
-
}
|
|
397
|
-
return {};
|
|
398
|
-
}
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
/**
|
|
402
|
-
* Retrieve a pointer to the introductory paragraph for the algorithm, if
|
|
403
|
-
* there's one.
|
|
404
|
-
*/
|
|
405
|
-
function findIntroParagraph(algo) {
|
|
406
|
-
let paragraph;
|
|
407
|
-
let container = algo.root.closest('li,.algorithm');
|
|
408
|
-
while (container) {
|
|
409
|
-
const dfn = container.querySelector('dfn');
|
|
410
|
-
if (dfn && !algo.root.contains(dfn)) {
|
|
411
|
-
paragraph = dfn.closest('p,div,li');
|
|
412
|
-
break;
|
|
413
|
-
}
|
|
414
|
-
if (container.nodeName === 'LI') {
|
|
415
|
-
break;
|
|
416
|
-
}
|
|
417
|
-
container = container.parentElement.closest('li,.algorithm');
|
|
418
|
-
}
|
|
419
|
-
|
|
420
|
-
if (!paragraph) {
|
|
421
|
-
// Consider that the introductory paragraph is the previous paragraph.
|
|
422
|
-
// That's not going to be 100% correct. For example, we will incorrectly
|
|
423
|
-
// capture an intermediary paragraph as in:
|
|
424
|
-
// https://w3c.github.io/webappsec-csp/#abstract-opdef-parse-a-serialized-csp
|
|
425
|
-
// TODO: improve!
|
|
426
|
-
paragraph = algo.root;
|
|
427
|
-
while (paragraph && (paragraph.nodeName !== 'P' || paragraph.matches(informativeSelector))) {
|
|
428
|
-
paragraph = paragraph.previousElementSibling;
|
|
429
|
-
}
|
|
430
|
-
}
|
|
431
|
-
|
|
432
|
-
return paragraph;
|
|
433
|
-
}
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
/**
|
|
437
|
-
* Find information about an algorithm (name and href).
|
|
438
|
-
*
|
|
439
|
-
* The name is given by a nearby `dfn`. If there's no nearby `dfn`, the
|
|
440
|
-
* name is the content of the preceding paragraph.
|
|
441
|
-
*/
|
|
442
|
-
function getAlgorithmInfo(algo, context) {
|
|
443
|
-
// Look for a name in the algorithm container, if there's one.
|
|
444
|
-
// Note some specs add the "algorithm" class to the `<ol>` and to the
|
|
445
|
-
// wrapping container, and define the name in the wrapping container.
|
|
446
|
-
let info = {};
|
|
447
|
-
|
|
448
|
-
let container = algo.root.closest('.algorithm');
|
|
449
|
-
if (!context?.nested) {
|
|
450
|
-
while (container) {
|
|
451
|
-
if (container.getAttribute('data-algorithm')) {
|
|
452
|
-
info.name = normalize(container.getAttribute('data-algorithm'));
|
|
453
|
-
if (container.getAttribute('data-algorithm-for')) {
|
|
454
|
-
info.name = normalize(container.getAttribute('data-algorithm-for')) +
|
|
455
|
-
'/' + info.name;
|
|
456
|
-
}
|
|
457
|
-
if (container.id) {
|
|
458
|
-
// Use the container ID as anchor
|
|
459
|
-
info.href = getAbsoluteUrl(container);
|
|
460
|
-
}
|
|
461
|
-
else {
|
|
462
|
-
// Container has no ID but if there's a dfn in there, that's probably
|
|
463
|
-
// the right anchor
|
|
464
|
-
const dfn = getDefinedNameIn(container);
|
|
465
|
-
if (dfn) {
|
|
466
|
-
info.href = dfn.href;
|
|
467
|
-
}
|
|
468
|
-
}
|
|
469
|
-
}
|
|
470
|
-
else {
|
|
471
|
-
info = getDefinedNameIn(container);
|
|
472
|
-
if (info.name || info.href) {
|
|
473
|
-
break;
|
|
474
|
-
}
|
|
475
|
-
}
|
|
476
|
-
container = container.parentElement.closest('.algorithm');
|
|
477
|
-
}
|
|
478
|
-
}
|
|
479
|
-
|
|
480
|
-
// Get the introductory prose from the previous paragraph
|
|
481
|
-
let paragraph = algo.intro;
|
|
482
|
-
if (paragraph) {
|
|
483
|
-
// Also look for a definition in the paragraph if we don't have a name and
|
|
484
|
-
// href already.
|
|
485
|
-
if (!context?.nested && !(info.name && info.href)) {
|
|
486
|
-
info = Object.assign(getDefinedNameIn(paragraph), info);
|
|
487
|
-
}
|
|
488
|
-
info.html = getHTMLContent(paragraph);
|
|
489
|
-
}
|
|
490
|
-
else if (['LI', 'DD', 'DIV'].includes(algo.root.parentElement.nodeName)) {
|
|
491
|
-
// If there's no paragraph, we may be in a list or definition list, the
|
|
492
|
-
// introductory prose is whatever text exists before the algorithm
|
|
493
|
-
const textEl = document.createElement('div');
|
|
494
|
-
let node = algo.root.parentElement.firstChild;
|
|
495
|
-
while (node !== algo.root) {
|
|
496
|
-
textEl.appendChild(node.cloneNode(true));
|
|
497
|
-
node = node.nextSibling;
|
|
498
|
-
}
|
|
499
|
-
if (!context?.nested && !(info.name && info.href)) {
|
|
500
|
-
info = Object.assign(getDefinedNameIn(textEl), info);
|
|
501
|
-
}
|
|
502
|
-
info.html = getHTMLContent(textEl);
|
|
503
|
-
}
|
|
504
|
-
|
|
505
|
-
if (!context?.nested && !(info.name && info.href) &&
|
|
506
|
-
algo.root.parentElement.nodeName === 'DD') {
|
|
507
|
-
let dt = algo.root.parentElement.previousElementSibling;
|
|
508
|
-
while (dt && dt.nodeName !== 'DT') {
|
|
509
|
-
dt = dt.previousElementSibling;
|
|
510
|
-
}
|
|
511
|
-
if (dt) {
|
|
512
|
-
info = Object.assign(getDefinedNameIn(dt), info);
|
|
513
|
-
}
|
|
514
|
-
}
|
|
515
|
-
|
|
516
|
-
// TODO: look for the closest heading?
|
|
517
|
-
return info;
|
|
518
|
-
}
|
|
519
|
-
|
|
520
|
-
/**
|
|
521
|
-
* Serialize the given algorithm
|
|
522
|
-
*
|
|
523
|
-
* Context object allows to distinguish between top-level algorithms and
|
|
524
|
-
* nested ones. Nested ones typically don't have names.
|
|
525
|
-
*/
|
|
526
|
-
function serializeAlgorithm(algo, context) {
|
|
527
|
-
let res = getAlgorithmInfo(algo, context);
|
|
528
|
-
res.rationale = algo.rationale;
|
|
529
|
-
const steps = serializeSteps(algo.root);
|
|
530
|
-
if (steps.length > 0) {
|
|
531
|
-
res.steps = steps;
|
|
532
|
-
}
|
|
533
|
-
return res;
|
|
534
|
-
}
|
|
535
|
-
|
|
536
|
-
/**
|
|
537
|
-
* Serialize the given steps contained in the given root element.
|
|
538
|
-
*/
|
|
539
|
-
function serializeSteps(root) {
|
|
540
|
-
if (root.nodeName === 'DL') {
|
|
541
|
-
return [
|
|
542
|
-
{
|
|
543
|
-
operation: 'switch',
|
|
544
|
-
steps: [...root.querySelectorAll('& > dt')].map(option => {
|
|
545
|
-
let dd = option.nextElementSibling;
|
|
546
|
-
while (dd && dd.nodeName !== 'DD') {
|
|
547
|
-
dd = dd.nextElementSibling;
|
|
548
|
-
}
|
|
549
|
-
if (!dd) {
|
|
550
|
-
throw new Error('Switch option without <dd> found: ' + option.textContent);
|
|
551
|
-
}
|
|
552
|
-
return Object.assign(
|
|
553
|
-
{ 'case': getTextContent(option) },
|
|
554
|
-
serializeStep(dd));
|
|
555
|
-
})
|
|
556
|
-
}
|
|
557
|
-
]
|
|
558
|
-
}
|
|
559
|
-
else if (root.nodeName === 'OL') {
|
|
560
|
-
return [...root.querySelectorAll('& > li')].map(li => serializeStep(li));
|
|
561
|
-
}
|
|
562
|
-
else {
|
|
563
|
-
return [];
|
|
564
|
-
}
|
|
565
|
-
}
|
|
566
|
-
|
|
567
|
-
/**
|
|
568
|
-
* Serialize an algorithm step
|
|
569
|
-
*/
|
|
570
|
-
function serializeStep(li) {
|
|
571
|
-
let res = {};
|
|
572
|
-
const candidateAlgorithms = findAlgorithms(li, { includeIgnored: true });
|
|
573
|
-
const algorithms = candidateAlgorithms.filter(algo => !!algo.rationale);
|
|
574
|
-
if (algorithms.length > 0) {
|
|
575
|
-
res = serializeAlgorithm(algorithms[0], { nested: true });
|
|
576
|
-
}
|
|
577
|
-
if (!res.html) {
|
|
578
|
-
res.html = getHTMLContent(li);
|
|
579
|
-
}
|
|
580
|
-
if (algorithms.length > 1) {
|
|
581
|
-
res.additional = algorithms.slice(1)
|
|
582
|
-
.map(algo => serializeAlgorithm(algo, { nested: true }));
|
|
583
|
-
}
|
|
584
|
-
const ignoredAlgorithms = candidateAlgorithms.filter(algo => !algo.rationale);
|
|
585
|
-
if (ignoredAlgorithms.length > 0) {
|
|
586
|
-
res.ignored = ignoredAlgorithms.map(algo => getTextContent(algo.root));
|
|
587
|
-
|
|
588
|
-
}
|
|
589
|
-
return res;
|
|
590
|
-
}
|
|
591
|
-
|
|
592
|
-
/**
|
|
593
|
-
* Parse a list element looking for algorithmic operations or other anchors
|
|
594
|
-
* that should allow us to assess that the steps are indeed part of an
|
|
595
|
-
* algorithm. Return a string representation of that rationale.
|
|
596
|
-
*/
|
|
597
|
-
function findRationale(ol) {
|
|
598
|
-
let rationale = null;
|
|
599
|
-
|
|
600
|
-
if (ol.matches('.algorithm')) {
|
|
601
|
-
return '.algorithm';
|
|
602
|
-
}
|
|
603
|
-
[...ol.querySelectorAll('li')].find(li => {
|
|
604
|
-
const text = getTextContent(li).toLowerCase();
|
|
605
|
-
rationale = stepOperations.find(op => {
|
|
606
|
-
return text.match(new RegExp(`^${op}(\\.|:| )`, 'i'));
|
|
607
|
-
});
|
|
608
|
-
|
|
609
|
-
if (!rationale) {
|
|
610
|
-
rationale = stepInlineOperations.find(op => {
|
|
611
|
-
if (typeof op === 'string') {
|
|
612
|
-
return text.includes(op);
|
|
613
|
-
}
|
|
614
|
-
else {
|
|
615
|
-
return text.match(op);
|
|
616
|
-
}
|
|
617
|
-
});
|
|
618
|
-
}
|
|
619
|
-
|
|
620
|
-
if (!rationale) {
|
|
621
|
-
rationale = stepAnchors.find(anchor => {
|
|
622
|
-
if (typeof anchor === 'string') {
|
|
623
|
-
return text.includes(anchor);
|
|
624
|
-
}
|
|
625
|
-
else {
|
|
626
|
-
return text.match(anchor);
|
|
627
|
-
}
|
|
628
|
-
});
|
|
629
|
-
}
|
|
630
|
-
|
|
631
|
-
return !!rationale;
|
|
632
|
-
});
|
|
633
|
-
|
|
634
|
-
return rationale?.toString();
|
|
635
|
-
}
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
/**
|
|
639
|
-
* Find the list of normative algorithms defined in the document's section
|
|
640
|
-
*/
|
|
641
|
-
function findAlgorithms(section, { includeIgnored } = { includeIgnored: false }) {
|
|
642
|
-
// Well-behaved algorithms have an "algorithm" class and start with an <ol>,
|
|
643
|
-
// or they have a "switch" class, à la:
|
|
644
|
-
// https://dom.spec.whatwg.org/#locate-a-namespace
|
|
645
|
-
const actual = [...section.querySelectorAll('.algorithm,.switch')]
|
|
646
|
-
.filter(el => !el.closest(informativeSelector))
|
|
647
|
-
.map(el => Object.assign({
|
|
648
|
-
rationale: el.matches('.algorithm') ? '.algorithm' : '.switch',
|
|
649
|
-
root: el
|
|
650
|
-
}))
|
|
651
|
-
.map(algo => {
|
|
652
|
-
if (algo.root.nodeName !== 'DL' && algo.root.nodeName !== 'OL') {
|
|
653
|
-
algo.root = algo.root.querySelector('ol');
|
|
654
|
-
}
|
|
655
|
-
return algo;
|
|
656
|
-
})
|
|
657
|
-
.filter(algo => !!algo.root);
|
|
658
|
-
|
|
659
|
-
// Probable algorithms do not have an "algorithm" class but start with an <ol>
|
|
660
|
-
const probable = [...section.querySelectorAll('ol')]
|
|
661
|
-
.filter(ol => !ol.closest(informativeSelector))
|
|
662
|
-
.filter(ol => !ol.closest('nav,.toc,#toc'))
|
|
663
|
-
.filter(ol => !actual.find(algo => algo.root.contains(ol)))
|
|
664
|
-
// Find an interesting anchor in there to filter out
|
|
665
|
-
// lists that don't look like steps
|
|
666
|
-
.map(ol => {
|
|
667
|
-
const rationale = findRationale(ol);
|
|
668
|
-
return { rationale: rationale?.toString(), root: ol };
|
|
669
|
-
})
|
|
670
|
-
.filter(algo => includeIgnored || !!algo.rationale);
|
|
671
|
-
|
|
672
|
-
// Merge actual and probable algorithms, dropping duplicates and algorithms
|
|
673
|
-
// that are nested under other algorithms.
|
|
674
|
-
let all = actual.concat(probable);
|
|
675
|
-
all = all.filter((algo, idx) => all.findIndex(al => al.root === algo.root) === idx);
|
|
676
|
-
all = all.filter(algo1 => !all.find(algo2 => algo1 !== algo2 && algo2.root.contains(algo1.root)));
|
|
677
|
-
|
|
678
|
-
// Look for the "intro" paragraph for the algorithms, if there's one.
|
|
679
|
-
// This will be used right after to extract "one-step" algorithms.
|
|
680
|
-
for (const algo of all) {
|
|
681
|
-
algo.intro = findIntroParagraph(algo);
|
|
682
|
-
}
|
|
683
|
-
|
|
684
|
-
// Complete the list with probable "one-step" algorithms: those defined in a
|
|
685
|
-
// paragraph, that start with "To " followed by an exported definition of
|
|
686
|
-
// type "dfn" or "abstract-op", and that don't have any steps (in other
|
|
687
|
-
// words, that haven't been captured yet).
|
|
688
|
-
const candidateDfnSelectors = [
|
|
689
|
-
'dfn[data-export][data-dfn-type="dfn"]',
|
|
690
|
-
'dfn[data-export][data-dfn-type="abstract-op"]'
|
|
691
|
-
];
|
|
692
|
-
const probableOneLine = [...section.querySelectorAll(candidateDfnSelectors.map(s => `p:has(${s})`).join(','))]
|
|
693
|
-
.filter(p => p.textContent.startsWith('To ' + p.querySelector(candidateDfnSelectors.join(',')).textContent))
|
|
694
|
-
.filter(p => !all.find(algo => algo.intro === p))
|
|
695
|
-
.map(p => {
|
|
696
|
-
return { rationale: 'To <dfn>', root: p, intro: p };
|
|
697
|
-
})
|
|
698
|
-
all = all.concat(probableOneLine);
|
|
699
|
-
|
|
700
|
-
// Consider algorithms in document order
|
|
701
|
-
// (if we find more than one at the same level, first one will be reported as
|
|
702
|
-
// the actual algorithm, the other ones as "additional" algorithms)
|
|
703
|
-
all.sort((algo1, algo2) => {
|
|
704
|
-
const cmp = algo1.root.compareDocumentPosition(algo2.root);
|
|
705
|
-
if (cmp & Node.DOCUMENT_POSITION_PRECEDING) {
|
|
706
|
-
return 1;
|
|
707
|
-
}
|
|
708
|
-
else if (algo1.root !== algo2.root) {
|
|
709
|
-
return -1;
|
|
710
|
-
}
|
|
711
|
-
});
|
|
712
|
-
return all;
|
|
713
|
-
}
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
export default function (spec, idToHeading = {}) {
|
|
717
|
-
// ECMA specs typically use <emu-alg> clauses, not supported for now.
|
|
718
|
-
if (spec.organization === 'Ecma International') {
|
|
719
|
-
return [];
|
|
720
|
-
}
|
|
721
|
-
const algorithms = findAlgorithms(document);
|
|
722
|
-
return algorithms.map(algo => serializeAlgorithm(algo));
|
|
723
|
-
}
|
|
1
|
+
/**
|
|
2
|
+
* Extract normative algorithms defined in specs.
|
|
3
|
+
*
|
|
4
|
+
* An algorithm extract is essentially an object with the following keys:
|
|
5
|
+
* - `name`: The name of the algorithm, when one exists
|
|
6
|
+
* - `href`: The URL with fragment to reach the algorithm, when one exists
|
|
7
|
+
* - `html`: Some introductory prose for the algorithm. That prose may well
|
|
8
|
+
* contain actual algorithmic operations, e.g.: "When invoked, run the following
|
|
9
|
+
* steps in parallel". href/src attributes in the HTML have absolute URLs.
|
|
10
|
+
* - `rationale`: A short string indicating the rationale for selecting the
|
|
11
|
+
* algorithm. This property is mainly intended for helping with debugging.
|
|
12
|
+
* Example values include ".algorithm" when the algorithm comes with an
|
|
13
|
+
* "algorithm" class, "let" when a step was found with a related operation,
|
|
14
|
+
* etc. Any verb in `stepOperations` may appear, as well as a few other regular
|
|
15
|
+
* expressions (serialized as a string).
|
|
16
|
+
* - `steps`: Atomic algorithm steps.
|
|
17
|
+
*
|
|
18
|
+
* Each step is essentially an object that follows the same structure as an
|
|
19
|
+
* algorithm, except that it does not have `name`, `href` and `rationale` keys,
|
|
20
|
+
* and may also have the following keys:
|
|
21
|
+
* - `operation`: Gives the name of the main operation performed by the step,
|
|
22
|
+
* for example "switch", "let", "set", "if", "return", "resolve", "reject",
|
|
23
|
+
* "queue a task", "fire an event", etc.
|
|
24
|
+
* - `case`: Used in switch steps to identify the switch condition that
|
|
25
|
+
* triggers the step.
|
|
26
|
+
* - `ignored`: Ordered lists found at the step level that do no look like
|
|
27
|
+
* algorithm steps. Or maybe they are? The lists should get reviewed: they
|
|
28
|
+
* usually describe inputs/outputs or conditions, but they may signal parts
|
|
29
|
+
* where the extraction logic needs to be improved. The lists are reported as
|
|
30
|
+
* text prose.
|
|
31
|
+
* - `additional`: Each step should contain one and only one algorithm. When
|
|
32
|
+
* other algorithms are found at the same level, they get reported in that
|
|
33
|
+
* property. That usually either signals that the spec could be improved
|
|
34
|
+
* because if fails to use different list items for different steps, and/or
|
|
35
|
+
* that the extraction logic needs to be smarter.
|
|
36
|
+
*
|
|
37
|
+
* TODO: flag step operation when understood (queue a task, fire an event,
|
|
38
|
+
* run in parallel, etc.) to ease analysis.
|
|
39
|
+
* (the property is only set for identified "switch" constructs for now)
|
|
40
|
+
* TODO: handle "read requests"
|
|
41
|
+
* https://fetch.spec.whatwg.org/#incrementally-read-loop
|
|
42
|
+
* https://w3c.github.io/webcodecs/#imagedecoder-fetch-stream-data-loop
|
|
43
|
+
* TODO: handle "fetch" process request/response algorithms
|
|
44
|
+
* https://wicg.github.io/background-fetch/#complete-a-record
|
|
45
|
+
* https://wicg.github.io/nav-speculation/prefetch.html#create-navigation-params-by-fetching
|
|
46
|
+
* TODO: support a switch without a ".switch" class
|
|
47
|
+
* https://w3c.github.io/webcodecs/#dom-videoframe-videoframe
|
|
48
|
+
* https://w3c.github.io/web-nfc/#dfn-map-text-to-ndef
|
|
49
|
+
* TODO: support a switch that is not phrased as a switch
|
|
50
|
+
* https://w3c.github.io/clipboard-apis/#to-os-specific-well-known-format
|
|
51
|
+
* TODO: support a switch where cases don't have <dd>
|
|
52
|
+
* https://fidoalliance.org/specs/fido-v2.1-ps-20210615/fido-client-to-authenticator-protocol-v2.1-ps-errata-20220621.html#sctn-minpinlength-extension
|
|
53
|
+
* TODO: don't get confused by conditions that look like steps
|
|
54
|
+
* (code reports them as "ignored", that's a good start, ignore them fully!)
|
|
55
|
+
* https://w3c.github.io/webcodecs/#imagedecoder-decode-complete-frame
|
|
56
|
+
* https://w3c.github.io/presentation-api/#dom-presentationrequest-start
|
|
57
|
+
* https://w3c.github.io/clipboard-apis/#dom-clipboard-read
|
|
58
|
+
* TODO: don't get confused by informative "algorithms"
|
|
59
|
+
* (noting informative sections are not flagged as such in Bikeshed)
|
|
60
|
+
* https://drafts.csswg.org/css-view-transitions-2/#lifecycle
|
|
61
|
+
* TODO: convert branching operations to substeps when needed ("if")
|
|
62
|
+
* https://drafts.css-houdini.org/css-layout-api-1/#construct-a-fragment-result
|
|
63
|
+
* https://w3c.github.io/webappsec-credential-management/#dom-passwordcredential-store-slot
|
|
64
|
+
* https://dom.spec.whatwg.org/#concept-create-element
|
|
65
|
+
* TODO: don't get confused by intermediary notes that jeopardize steps lists
|
|
66
|
+
* (but then, the specs need fixing!)
|
|
67
|
+
* https://w3c.github.io/secure-payment-confirmation/#sctn-steps-to-check-if-a-payment-can-be-made
|
|
68
|
+
* https://w3c.github.io/ServiceWorker/#on-fetch-request-algorithm
|
|
69
|
+
* https://wicg.github.io/turtledove/#dom-navigator-createauctionnonce
|
|
70
|
+
* TODO: convert inline operations to substeps when needed
|
|
71
|
+
* TODO: filter out CSS algorithms that are not JS algorithms
|
|
72
|
+
* https://drafts.fxtf.org/filter-effects/#interpolation-of-filter-functions
|
|
73
|
+
* TODO: improve the algorithm steps detection mechanism. It's relatively easy
|
|
74
|
+
* to miss steps.
|
|
75
|
+
* TODO: don't skip intermediary <dl> levels and/or support "struct with keys"
|
|
76
|
+
* https://w3c.github.io/webdriver-bidi/#parse-url-pattern
|
|
77
|
+
* TODO: don't get confused by a switch that follows steps
|
|
78
|
+
* https://w3c.github.io/geolocation/#dfn-acquire-a-position
|
|
79
|
+
* TODO: support TC39 specs with <emu-alg> clauses
|
|
80
|
+
* https://tc39.es/ecma402/
|
|
81
|
+
* TODO: skip monkeypatching identified as such?
|
|
82
|
+
* https://wicg.github.io/scroll-to-text-fragment/
|
|
83
|
+
*
|
|
84
|
+
* And then later:
|
|
85
|
+
* TODO: extract algorithm parameters
|
|
86
|
+
*
|
|
87
|
+
* @function
|
|
88
|
+
* @public
|
|
89
|
+
* @return {Array(Object)} An Array of algorithms
|
|
90
|
+
*/
|
|
91
|
+
|
|
92
|
+
import informativeSelector from './informative-selector.mjs';
|
|
93
|
+
import getAbsoluteUrl from './get-absolute-url.mjs';
|
|
94
|
+
import cloneAndClean from './clone-and-clean.mjs';
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Algorithm steps typically start with verbs that define the operation to
|
|
99
|
+
* perform.
|
|
100
|
+
*
|
|
101
|
+
* The following list of verbs is used to assess whether a set of steps "looks
|
|
102
|
+
* like" a set of algorithm steps, so as to avoid extracting lists that are not
|
|
103
|
+
* algorithms.
|
|
104
|
+
*
|
|
105
|
+
* The list is completed with a few branching operations that are not verbs:
|
|
106
|
+
* "for", "if", "while".
|
|
107
|
+
*
|
|
108
|
+
* Using a growing list of verbs may not be a good idea. That said, it is an
|
|
109
|
+
* instructive exercise to analyze the diversity of operations being used,
|
|
110
|
+
* and their meaning (or lack of).
|
|
111
|
+
*
|
|
112
|
+
* Note some steps may start with an adverb, e.g., "Additionally",
|
|
113
|
+
* "Optionally", "Asynchronously", or with contextualizations such as
|
|
114
|
+
* "In step 6". These forms are not captured here. They will be captured
|
|
115
|
+
* through the inline operations (see below) or need to be handled separately.
|
|
116
|
+
* They will be reported in the `ignored` property otherwise.
|
|
117
|
+
*
|
|
118
|
+
* Note "Asynchronously", typically used in Service Workers, does not mean much
|
|
119
|
+
* in a browsing context. It should probably rather be re-written using
|
|
120
|
+
* "in parallel"
|
|
121
|
+
* https://w3c.github.io/ServiceWorker/
|
|
122
|
+
*/
|
|
123
|
+
const stepOperations = [
|
|
124
|
+
'abort',
|
|
125
|
+
'acknowledge',
|
|
126
|
+
'activate',
|
|
127
|
+
'add',
|
|
128
|
+
'adopt',
|
|
129
|
+
'advance',
|
|
130
|
+
'append',
|
|
131
|
+
'apply',
|
|
132
|
+
'ask',
|
|
133
|
+
'assert',
|
|
134
|
+
'assign',
|
|
135
|
+
'attach',
|
|
136
|
+
'attempt',
|
|
137
|
+
'batch',
|
|
138
|
+
'block',
|
|
139
|
+
'branch',
|
|
140
|
+
'call',
|
|
141
|
+
'check',
|
|
142
|
+
'cancel',
|
|
143
|
+
'cause',
|
|
144
|
+
'change',
|
|
145
|
+
'choose',
|
|
146
|
+
'clamp',
|
|
147
|
+
'clean',
|
|
148
|
+
'clear',
|
|
149
|
+
'close',
|
|
150
|
+
'collect',
|
|
151
|
+
'complete',
|
|
152
|
+
'compute',
|
|
153
|
+
'consume',
|
|
154
|
+
'continue',
|
|
155
|
+
'convert',
|
|
156
|
+
'copy',
|
|
157
|
+
'create',
|
|
158
|
+
'deactivate',
|
|
159
|
+
'decrease',
|
|
160
|
+
'decrement',
|
|
161
|
+
'decrypt',
|
|
162
|
+
'define',
|
|
163
|
+
'delete',
|
|
164
|
+
'dequeue',
|
|
165
|
+
'destroy',
|
|
166
|
+
'determine',
|
|
167
|
+
'discard',
|
|
168
|
+
'dismiss',
|
|
169
|
+
'dispatch',
|
|
170
|
+
'display',
|
|
171
|
+
'down-mix',
|
|
172
|
+
'do',
|
|
173
|
+
'dump',
|
|
174
|
+
'emit',
|
|
175
|
+
'empty',
|
|
176
|
+
'end',
|
|
177
|
+
'enqueue',
|
|
178
|
+
'ensure',
|
|
179
|
+
'error',
|
|
180
|
+
'establish',
|
|
181
|
+
'execute',
|
|
182
|
+
'extend',
|
|
183
|
+
'extract',
|
|
184
|
+
'fail',
|
|
185
|
+
'fetch',
|
|
186
|
+
'finalize',
|
|
187
|
+
'find',
|
|
188
|
+
'finish',
|
|
189
|
+
'fire',
|
|
190
|
+
'gather',
|
|
191
|
+
'generate',
|
|
192
|
+
'give',
|
|
193
|
+
'handle',
|
|
194
|
+
'hand-off',
|
|
195
|
+
'increase',
|
|
196
|
+
'increment',
|
|
197
|
+
'initialize',
|
|
198
|
+
'insert',
|
|
199
|
+
'interpret',
|
|
200
|
+
'invoke',
|
|
201
|
+
'issue',
|
|
202
|
+
'jump',
|
|
203
|
+
'let',
|
|
204
|
+
'load',
|
|
205
|
+
'make',
|
|
206
|
+
'mark',
|
|
207
|
+
'match',
|
|
208
|
+
'move',
|
|
209
|
+
'multiply',
|
|
210
|
+
'navigate',
|
|
211
|
+
'paint',
|
|
212
|
+
'parse',
|
|
213
|
+
'perform',
|
|
214
|
+
'place',
|
|
215
|
+
'pop',
|
|
216
|
+
'populate',
|
|
217
|
+
'prepare',
|
|
218
|
+
'prepend',
|
|
219
|
+
'process',
|
|
220
|
+
'prompt',
|
|
221
|
+
'push',
|
|
222
|
+
'query',
|
|
223
|
+
'queue',
|
|
224
|
+
'recalculate',
|
|
225
|
+
'rectify',
|
|
226
|
+
'reference',
|
|
227
|
+
'register',
|
|
228
|
+
'reinitialize',
|
|
229
|
+
'reject',
|
|
230
|
+
'release',
|
|
231
|
+
'remove',
|
|
232
|
+
'replace',
|
|
233
|
+
'reset',
|
|
234
|
+
'resolve',
|
|
235
|
+
'resolve',
|
|
236
|
+
'restore',
|
|
237
|
+
'render',
|
|
238
|
+
'remap',
|
|
239
|
+
'report',
|
|
240
|
+
'return',
|
|
241
|
+
'run',
|
|
242
|
+
'score',
|
|
243
|
+
'scroll',
|
|
244
|
+
'send',
|
|
245
|
+
'serialize',
|
|
246
|
+
'set',
|
|
247
|
+
'shuffle',
|
|
248
|
+
'skip',
|
|
249
|
+
'sort',
|
|
250
|
+
'split',
|
|
251
|
+
'spin',
|
|
252
|
+
'start',
|
|
253
|
+
'stop',
|
|
254
|
+
'store',
|
|
255
|
+
'strip',
|
|
256
|
+
'suspend',
|
|
257
|
+
'switch',
|
|
258
|
+
'take',
|
|
259
|
+
'terminate',
|
|
260
|
+
'throw',
|
|
261
|
+
'trap',
|
|
262
|
+
'try',
|
|
263
|
+
'undisplay',
|
|
264
|
+
'unset',
|
|
265
|
+
'up-mix',
|
|
266
|
+
'update',
|
|
267
|
+
'update',
|
|
268
|
+
'upgrade',
|
|
269
|
+
'use',
|
|
270
|
+
'validate',
|
|
271
|
+
'verify',
|
|
272
|
+
'visit',
|
|
273
|
+
'wait',
|
|
274
|
+
|
|
275
|
+
'for',
|
|
276
|
+
'if',
|
|
277
|
+
'while'
|
|
278
|
+
];
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
/**
|
|
282
|
+
* When the step does not start with a verb, or when that verb is not followed
|
|
283
|
+
* by a white space, the following constructs help detect the actual operation.
|
|
284
|
+
*/
|
|
285
|
+
const stepInlineOperations = [
|
|
286
|
+
'abort all these steps',
|
|
287
|
+
'abort these steps',
|
|
288
|
+
'fire a simple event',
|
|
289
|
+
'fire an event',
|
|
290
|
+
'in parallel',
|
|
291
|
+
'reject',
|
|
292
|
+
'resolve',
|
|
293
|
+
'run the following steps',
|
|
294
|
+
'run these steps',
|
|
295
|
+
'terminate these steps',
|
|
296
|
+
/queue a( \w+)? task/i
|
|
297
|
+
];
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
/**
|
|
301
|
+
* Additional anchors that suggest algorithm steps
|
|
302
|
+
*/
|
|
303
|
+
const stepAnchors = [
|
|
304
|
+
/^⌛/,
|
|
305
|
+
'in parallel',
|
|
306
|
+
/^otherwise(\,| )/i,
|
|
307
|
+
];
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
/**
|
|
311
|
+
* Return the normalized text content for the given DOM element, removing all
|
|
312
|
+
* annotations
|
|
313
|
+
*/
|
|
314
|
+
function getTextContent(el) {
|
|
315
|
+
const clone = cloneAndClean(el);
|
|
316
|
+
return normalize(clone.textContent);
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
/**
|
|
321
|
+
* Return the normalized HTML content for the given DOM element, removing all
|
|
322
|
+
* annotations
|
|
323
|
+
*/
|
|
324
|
+
function getHTMLContent(el) {
|
|
325
|
+
// Prepare mapping table to turn relative links to absolute ones
|
|
326
|
+
// (we cannot do that once the element has been cloned because cloning
|
|
327
|
+
// removes the element from the DOM tree)
|
|
328
|
+
const relativeUrlSelector = '[href]:not([href^="http"]),[src]:not([src^="http"])';
|
|
329
|
+
const relativeToAbsolute = {};
|
|
330
|
+
const page = el.closest('[data-reffy-page]')?.getAttribute('data-reffy-page');
|
|
331
|
+
for (const linkEl of el.querySelectorAll(relativeUrlSelector)) {
|
|
332
|
+
const attr = linkEl.getAttribute('href') ? 'href' : 'src';
|
|
333
|
+
const url = new URL(linkEl.getAttribute(attr), page ?? window.location.href);
|
|
334
|
+
relativeToAbsolute[linkEl.getAttribute(attr)] = url.toString();
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
const clone = cloneAndClean(el);
|
|
338
|
+
let ol;
|
|
339
|
+
while (ol = clone.querySelector('ol')) {
|
|
340
|
+
ol.remove();
|
|
341
|
+
}
|
|
342
|
+
for (const linkEl of clone.querySelectorAll(relativeUrlSelector)) {
|
|
343
|
+
const attr = linkEl.getAttribute('href') ? 'href' : 'src';
|
|
344
|
+
linkEl.setAttribute(attr, relativeToAbsolute[linkEl.getAttribute(attr)]);
|
|
345
|
+
}
|
|
346
|
+
return clone.innerHTML.trim();
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
/**
|
|
350
|
+
* Normalize a text for serialization purpose
|
|
351
|
+
*/
|
|
352
|
+
function normalize(str) {
|
|
353
|
+
return str.replace(/\r|\n/g, ' ').replace(/\s+/g, ' ').trim();
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
/**
|
|
357
|
+
* Return the name and href of the first dfn contained in the given element
|
|
358
|
+
*/
|
|
359
|
+
function getDefinedNameIn(el) {
|
|
360
|
+
const dfn = el.nodeName === 'DFN' ?
|
|
361
|
+
el :
|
|
362
|
+
el.querySelector('dfn,h2[data-dfn-type],h3[data-dfn-type],h4[data-dfn-type],h5[data-dfn-type],h6[data-dfn-type]');
|
|
363
|
+
if (dfn) {
|
|
364
|
+
let name = '';
|
|
365
|
+
if (dfn.getAttribute('data-dfn-for')) {
|
|
366
|
+
name = normalize(dfn.getAttribute('data-dfn-for').split(/,(?![^\(]*\))/)[0]) + '/';
|
|
367
|
+
}
|
|
368
|
+
if (dfn.getAttribute('data-lt')) {
|
|
369
|
+
name += normalize(dfn.getAttribute('data-lt').split('|')[0]);
|
|
370
|
+
}
|
|
371
|
+
else {
|
|
372
|
+
name += getTextContent(dfn);
|
|
373
|
+
}
|
|
374
|
+
if (dfn.id) {
|
|
375
|
+
return { name, href: getAbsoluteUrl(dfn) };
|
|
376
|
+
}
|
|
377
|
+
else {
|
|
378
|
+
// Two known exceptions to the rule:
|
|
379
|
+
// - one due to CSS 2.1 not following the definitions data model:
|
|
380
|
+
// https://www.w3.org/TR/CSS21/visudet.html#containing-block-details
|
|
381
|
+
// - the other due to HTML still containing dfns without IDs as well,
|
|
382
|
+
// including one for an algorithm:
|
|
383
|
+
// https://html.spec.whatwg.org/multipage/server-sent-events.html#processField
|
|
384
|
+
// It's possible to find an ID in both cases. But it's not clear that
|
|
385
|
+
// CSS 2.1 algorithms are real algorithms; and it seems doable to fix the
|
|
386
|
+
// HTML spec. Let's just return the name without href, not to end up
|
|
387
|
+
// with a null `href` that the JSON schema forbids.
|
|
388
|
+
return { name };
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
else {
|
|
392
|
+
const heading = el.querySelector('h2[id],h3[id],h4[id],h5[id],h6[id]');
|
|
393
|
+
if (heading) {
|
|
394
|
+
return { name: getTextContent(heading), href: getAbsoluteUrl(heading) };
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
return {};
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
/**
|
|
402
|
+
* Retrieve a pointer to the introductory paragraph for the algorithm, if
|
|
403
|
+
* there's one.
|
|
404
|
+
*/
|
|
405
|
+
function findIntroParagraph(algo) {
|
|
406
|
+
let paragraph;
|
|
407
|
+
let container = algo.root.closest('li,.algorithm');
|
|
408
|
+
while (container) {
|
|
409
|
+
const dfn = container.querySelector('dfn');
|
|
410
|
+
if (dfn && !algo.root.contains(dfn)) {
|
|
411
|
+
paragraph = dfn.closest('p,div,li');
|
|
412
|
+
break;
|
|
413
|
+
}
|
|
414
|
+
if (container.nodeName === 'LI') {
|
|
415
|
+
break;
|
|
416
|
+
}
|
|
417
|
+
container = container.parentElement.closest('li,.algorithm');
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
if (!paragraph) {
|
|
421
|
+
// Consider that the introductory paragraph is the previous paragraph.
|
|
422
|
+
// That's not going to be 100% correct. For example, we will incorrectly
|
|
423
|
+
// capture an intermediary paragraph as in:
|
|
424
|
+
// https://w3c.github.io/webappsec-csp/#abstract-opdef-parse-a-serialized-csp
|
|
425
|
+
// TODO: improve!
|
|
426
|
+
paragraph = algo.root;
|
|
427
|
+
while (paragraph && (paragraph.nodeName !== 'P' || paragraph.matches(informativeSelector))) {
|
|
428
|
+
paragraph = paragraph.previousElementSibling;
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
return paragraph;
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
/**
|
|
437
|
+
* Find information about an algorithm (name and href).
|
|
438
|
+
*
|
|
439
|
+
* The name is given by a nearby `dfn`. If there's no nearby `dfn`, the
|
|
440
|
+
* name is the content of the preceding paragraph.
|
|
441
|
+
*/
|
|
442
|
+
function getAlgorithmInfo(algo, context) {
|
|
443
|
+
// Look for a name in the algorithm container, if there's one.
|
|
444
|
+
// Note some specs add the "algorithm" class to the `<ol>` and to the
|
|
445
|
+
// wrapping container, and define the name in the wrapping container.
|
|
446
|
+
let info = {};
|
|
447
|
+
|
|
448
|
+
let container = algo.root.closest('.algorithm');
|
|
449
|
+
if (!context?.nested) {
|
|
450
|
+
while (container) {
|
|
451
|
+
if (container.getAttribute('data-algorithm')) {
|
|
452
|
+
info.name = normalize(container.getAttribute('data-algorithm'));
|
|
453
|
+
if (container.getAttribute('data-algorithm-for')) {
|
|
454
|
+
info.name = normalize(container.getAttribute('data-algorithm-for')) +
|
|
455
|
+
'/' + info.name;
|
|
456
|
+
}
|
|
457
|
+
if (container.id) {
|
|
458
|
+
// Use the container ID as anchor
|
|
459
|
+
info.href = getAbsoluteUrl(container);
|
|
460
|
+
}
|
|
461
|
+
else {
|
|
462
|
+
// Container has no ID but if there's a dfn in there, that's probably
|
|
463
|
+
// the right anchor
|
|
464
|
+
const dfn = getDefinedNameIn(container);
|
|
465
|
+
if (dfn) {
|
|
466
|
+
info.href = dfn.href;
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
else {
|
|
471
|
+
info = getDefinedNameIn(container);
|
|
472
|
+
if (info.name || info.href) {
|
|
473
|
+
break;
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
container = container.parentElement.closest('.algorithm');
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
// Get the introductory prose from the previous paragraph
|
|
481
|
+
let paragraph = algo.intro;
|
|
482
|
+
if (paragraph) {
|
|
483
|
+
// Also look for a definition in the paragraph if we don't have a name and
|
|
484
|
+
// href already.
|
|
485
|
+
if (!context?.nested && !(info.name && info.href)) {
|
|
486
|
+
info = Object.assign(getDefinedNameIn(paragraph), info);
|
|
487
|
+
}
|
|
488
|
+
info.html = getHTMLContent(paragraph);
|
|
489
|
+
}
|
|
490
|
+
else if (['LI', 'DD', 'DIV'].includes(algo.root.parentElement.nodeName)) {
|
|
491
|
+
// If there's no paragraph, we may be in a list or definition list, the
|
|
492
|
+
// introductory prose is whatever text exists before the algorithm
|
|
493
|
+
const textEl = document.createElement('div');
|
|
494
|
+
let node = algo.root.parentElement.firstChild;
|
|
495
|
+
while (node !== algo.root) {
|
|
496
|
+
textEl.appendChild(node.cloneNode(true));
|
|
497
|
+
node = node.nextSibling;
|
|
498
|
+
}
|
|
499
|
+
if (!context?.nested && !(info.name && info.href)) {
|
|
500
|
+
info = Object.assign(getDefinedNameIn(textEl), info);
|
|
501
|
+
}
|
|
502
|
+
info.html = getHTMLContent(textEl);
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
if (!context?.nested && !(info.name && info.href) &&
|
|
506
|
+
algo.root.parentElement.nodeName === 'DD') {
|
|
507
|
+
let dt = algo.root.parentElement.previousElementSibling;
|
|
508
|
+
while (dt && dt.nodeName !== 'DT') {
|
|
509
|
+
dt = dt.previousElementSibling;
|
|
510
|
+
}
|
|
511
|
+
if (dt) {
|
|
512
|
+
info = Object.assign(getDefinedNameIn(dt), info);
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
// TODO: look for the closest heading?
|
|
517
|
+
return info;
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
/**
|
|
521
|
+
* Serialize the given algorithm
|
|
522
|
+
*
|
|
523
|
+
* Context object allows to distinguish between top-level algorithms and
|
|
524
|
+
* nested ones. Nested ones typically don't have names.
|
|
525
|
+
*/
|
|
526
|
+
function serializeAlgorithm(algo, context) {
|
|
527
|
+
let res = getAlgorithmInfo(algo, context);
|
|
528
|
+
res.rationale = algo.rationale;
|
|
529
|
+
const steps = serializeSteps(algo.root);
|
|
530
|
+
if (steps.length > 0) {
|
|
531
|
+
res.steps = steps;
|
|
532
|
+
}
|
|
533
|
+
return res;
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
/**
|
|
537
|
+
* Serialize the given steps contained in the given root element.
|
|
538
|
+
*/
|
|
539
|
+
function serializeSteps(root) {
|
|
540
|
+
if (root.nodeName === 'DL') {
|
|
541
|
+
return [
|
|
542
|
+
{
|
|
543
|
+
operation: 'switch',
|
|
544
|
+
steps: [...root.querySelectorAll('& > dt')].map(option => {
|
|
545
|
+
let dd = option.nextElementSibling;
|
|
546
|
+
while (dd && dd.nodeName !== 'DD') {
|
|
547
|
+
dd = dd.nextElementSibling;
|
|
548
|
+
}
|
|
549
|
+
if (!dd) {
|
|
550
|
+
throw new Error('Switch option without <dd> found: ' + option.textContent);
|
|
551
|
+
}
|
|
552
|
+
return Object.assign(
|
|
553
|
+
{ 'case': getTextContent(option) },
|
|
554
|
+
serializeStep(dd));
|
|
555
|
+
})
|
|
556
|
+
}
|
|
557
|
+
]
|
|
558
|
+
}
|
|
559
|
+
else if (root.nodeName === 'OL') {
|
|
560
|
+
return [...root.querySelectorAll('& > li')].map(li => serializeStep(li));
|
|
561
|
+
}
|
|
562
|
+
else {
|
|
563
|
+
return [];
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
/**
|
|
568
|
+
* Serialize an algorithm step
|
|
569
|
+
*/
|
|
570
|
+
function serializeStep(li) {
|
|
571
|
+
let res = {};
|
|
572
|
+
const candidateAlgorithms = findAlgorithms(li, { includeIgnored: true });
|
|
573
|
+
const algorithms = candidateAlgorithms.filter(algo => !!algo.rationale);
|
|
574
|
+
if (algorithms.length > 0) {
|
|
575
|
+
res = serializeAlgorithm(algorithms[0], { nested: true });
|
|
576
|
+
}
|
|
577
|
+
if (!res.html) {
|
|
578
|
+
res.html = getHTMLContent(li);
|
|
579
|
+
}
|
|
580
|
+
if (algorithms.length > 1) {
|
|
581
|
+
res.additional = algorithms.slice(1)
|
|
582
|
+
.map(algo => serializeAlgorithm(algo, { nested: true }));
|
|
583
|
+
}
|
|
584
|
+
const ignoredAlgorithms = candidateAlgorithms.filter(algo => !algo.rationale);
|
|
585
|
+
if (ignoredAlgorithms.length > 0) {
|
|
586
|
+
res.ignored = ignoredAlgorithms.map(algo => getTextContent(algo.root));
|
|
587
|
+
|
|
588
|
+
}
|
|
589
|
+
return res;
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
/**
|
|
593
|
+
* Parse a list element looking for algorithmic operations or other anchors
|
|
594
|
+
* that should allow us to assess that the steps are indeed part of an
|
|
595
|
+
* algorithm. Return a string representation of that rationale.
|
|
596
|
+
*/
|
|
597
|
+
function findRationale(ol) {
|
|
598
|
+
let rationale = null;
|
|
599
|
+
|
|
600
|
+
if (ol.matches('.algorithm')) {
|
|
601
|
+
return '.algorithm';
|
|
602
|
+
}
|
|
603
|
+
[...ol.querySelectorAll('li')].find(li => {
|
|
604
|
+
const text = getTextContent(li).toLowerCase();
|
|
605
|
+
rationale = stepOperations.find(op => {
|
|
606
|
+
return text.match(new RegExp(`^${op}(\\.|:| )`, 'i'));
|
|
607
|
+
});
|
|
608
|
+
|
|
609
|
+
if (!rationale) {
|
|
610
|
+
rationale = stepInlineOperations.find(op => {
|
|
611
|
+
if (typeof op === 'string') {
|
|
612
|
+
return text.includes(op);
|
|
613
|
+
}
|
|
614
|
+
else {
|
|
615
|
+
return text.match(op);
|
|
616
|
+
}
|
|
617
|
+
});
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
if (!rationale) {
|
|
621
|
+
rationale = stepAnchors.find(anchor => {
|
|
622
|
+
if (typeof anchor === 'string') {
|
|
623
|
+
return text.includes(anchor);
|
|
624
|
+
}
|
|
625
|
+
else {
|
|
626
|
+
return text.match(anchor);
|
|
627
|
+
}
|
|
628
|
+
});
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
return !!rationale;
|
|
632
|
+
});
|
|
633
|
+
|
|
634
|
+
return rationale?.toString();
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
|
|
638
|
+
/**
|
|
639
|
+
* Find the list of normative algorithms defined in the document's section
|
|
640
|
+
*/
|
|
641
|
+
function findAlgorithms(section, { includeIgnored } = { includeIgnored: false }) {
|
|
642
|
+
// Well-behaved algorithms have an "algorithm" class and start with an <ol>,
|
|
643
|
+
// or they have a "switch" class, à la:
|
|
644
|
+
// https://dom.spec.whatwg.org/#locate-a-namespace
|
|
645
|
+
const actual = [...section.querySelectorAll('.algorithm,.switch')]
|
|
646
|
+
.filter(el => !el.closest(informativeSelector))
|
|
647
|
+
.map(el => Object.assign({
|
|
648
|
+
rationale: el.matches('.algorithm') ? '.algorithm' : '.switch',
|
|
649
|
+
root: el
|
|
650
|
+
}))
|
|
651
|
+
.map(algo => {
|
|
652
|
+
if (algo.root.nodeName !== 'DL' && algo.root.nodeName !== 'OL') {
|
|
653
|
+
algo.root = algo.root.querySelector('ol');
|
|
654
|
+
}
|
|
655
|
+
return algo;
|
|
656
|
+
})
|
|
657
|
+
.filter(algo => !!algo.root);
|
|
658
|
+
|
|
659
|
+
// Probable algorithms do not have an "algorithm" class but start with an <ol>
|
|
660
|
+
const probable = [...section.querySelectorAll('ol')]
|
|
661
|
+
.filter(ol => !ol.closest(informativeSelector))
|
|
662
|
+
.filter(ol => !ol.closest('nav,.toc,#toc'))
|
|
663
|
+
.filter(ol => !actual.find(algo => algo.root.contains(ol)))
|
|
664
|
+
// Find an interesting anchor in there to filter out
|
|
665
|
+
// lists that don't look like steps
|
|
666
|
+
.map(ol => {
|
|
667
|
+
const rationale = findRationale(ol);
|
|
668
|
+
return { rationale: rationale?.toString(), root: ol };
|
|
669
|
+
})
|
|
670
|
+
.filter(algo => includeIgnored || !!algo.rationale);
|
|
671
|
+
|
|
672
|
+
// Merge actual and probable algorithms, dropping duplicates and algorithms
|
|
673
|
+
// that are nested under other algorithms.
|
|
674
|
+
let all = actual.concat(probable);
|
|
675
|
+
all = all.filter((algo, idx) => all.findIndex(al => al.root === algo.root) === idx);
|
|
676
|
+
all = all.filter(algo1 => !all.find(algo2 => algo1 !== algo2 && algo2.root.contains(algo1.root)));
|
|
677
|
+
|
|
678
|
+
// Look for the "intro" paragraph for the algorithms, if there's one.
|
|
679
|
+
// This will be used right after to extract "one-step" algorithms.
|
|
680
|
+
for (const algo of all) {
|
|
681
|
+
algo.intro = findIntroParagraph(algo);
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
// Complete the list with probable "one-step" algorithms: those defined in a
|
|
685
|
+
// paragraph, that start with "To " followed by an exported definition of
|
|
686
|
+
// type "dfn" or "abstract-op", and that don't have any steps (in other
|
|
687
|
+
// words, that haven't been captured yet).
|
|
688
|
+
const candidateDfnSelectors = [
|
|
689
|
+
'dfn[data-export][data-dfn-type="dfn"]',
|
|
690
|
+
'dfn[data-export][data-dfn-type="abstract-op"]'
|
|
691
|
+
];
|
|
692
|
+
const probableOneLine = [...section.querySelectorAll(candidateDfnSelectors.map(s => `p:has(${s})`).join(','))]
|
|
693
|
+
.filter(p => p.textContent.startsWith('To ' + p.querySelector(candidateDfnSelectors.join(',')).textContent))
|
|
694
|
+
.filter(p => !all.find(algo => algo.intro === p))
|
|
695
|
+
.map(p => {
|
|
696
|
+
return { rationale: 'To <dfn>', root: p, intro: p };
|
|
697
|
+
})
|
|
698
|
+
all = all.concat(probableOneLine);
|
|
699
|
+
|
|
700
|
+
// Consider algorithms in document order
|
|
701
|
+
// (if we find more than one at the same level, first one will be reported as
|
|
702
|
+
// the actual algorithm, the other ones as "additional" algorithms)
|
|
703
|
+
all.sort((algo1, algo2) => {
|
|
704
|
+
const cmp = algo1.root.compareDocumentPosition(algo2.root);
|
|
705
|
+
if (cmp & Node.DOCUMENT_POSITION_PRECEDING) {
|
|
706
|
+
return 1;
|
|
707
|
+
}
|
|
708
|
+
else if (algo1.root !== algo2.root) {
|
|
709
|
+
return -1;
|
|
710
|
+
}
|
|
711
|
+
});
|
|
712
|
+
return all;
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
|
|
716
|
+
export default function (spec, idToHeading = {}) {
|
|
717
|
+
// ECMA specs typically use <emu-alg> clauses, not supported for now.
|
|
718
|
+
if (spec.organization === 'Ecma International') {
|
|
719
|
+
return [];
|
|
720
|
+
}
|
|
721
|
+
const algorithms = findAlgorithms(document);
|
|
722
|
+
return algorithms.map(algo => serializeAlgorithm(algo));
|
|
723
|
+
}
|