reffy 20.0.13 → 20.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +151 -151
- package/index.js +29 -29
- package/package.json +3 -3
- package/reffy.js +324 -324
- package/schemas/browserlib/extract-algorithms.json +52 -52
- package/schemas/browserlib/extract-cssdfn.json +108 -108
- package/schemas/browserlib/extract-dfns.json +90 -90
- package/schemas/browserlib/extract-elements.json +17 -17
- package/schemas/browserlib/extract-events.json +31 -31
- package/schemas/browserlib/extract-headings.json +19 -19
- package/schemas/browserlib/extract-ids.json +7 -7
- package/schemas/browserlib/extract-links.json +12 -12
- package/schemas/browserlib/extract-refs.json +12 -12
- package/schemas/common.json +876 -876
- package/schemas/files/extracts/algorithms.json +12 -12
- package/schemas/files/extracts/css.json +16 -16
- package/schemas/files/extracts/dfns.json +12 -12
- package/schemas/files/extracts/elements.json +12 -12
- package/schemas/files/extracts/events.json +12 -12
- package/schemas/files/extracts/headings.json +12 -12
- package/schemas/files/extracts/ids.json +12 -12
- package/schemas/files/extracts/links.json +12 -12
- package/schemas/files/extracts/refs.json +12 -12
- package/schemas/files/index.json +59 -59
- package/schemas/postprocessing/events.json +50 -50
- package/schemas/postprocessing/idlnames-parsed.json +27 -27
- package/schemas/postprocessing/idlnames.json +17 -17
- package/schemas/postprocessing/idlparsed.json +67 -67
- package/src/browserlib/clone-and-clean.mjs +24 -24
- package/src/browserlib/create-outline.mjs +353 -353
- package/src/browserlib/extract-algorithms.mjs +723 -723
- package/src/browserlib/extract-cddl.mjs +125 -125
- package/src/browserlib/extract-dfns.mjs +1093 -1093
- package/src/browserlib/extract-headings.mjs +76 -76
- package/src/browserlib/extract-ids.mjs +28 -28
- package/src/browserlib/extract-links.mjs +45 -45
- package/src/browserlib/extract-references.mjs +308 -308
- package/src/browserlib/extract-webidl.mjs +89 -89
- package/src/browserlib/get-absolute-url.mjs +29 -29
- package/src/browserlib/get-code-elements.mjs +20 -20
- package/src/browserlib/get-generator.mjs +26 -26
- package/src/browserlib/get-lastmodified-date.mjs +13 -13
- package/src/browserlib/get-revision.mjs +12 -12
- package/src/browserlib/get-title.mjs +14 -14
- package/src/browserlib/informative-selector.mjs +24 -24
- package/src/browserlib/map-ids-to-headings.mjs +173 -173
- package/src/browserlib/reffy.json +85 -85
- package/src/browserlib/trim-spaces.mjs +35 -35
- package/src/cli/check-missing-dfns.js +587 -587
- package/src/cli/merge-crawl-results.js +132 -132
- package/src/cli/parse-webidl.js +447 -447
- package/src/lib/css-grammar-parse-tree.schema.json +109 -109
- package/src/lib/css-grammar-parser.js +440 -440
- package/src/lib/fetch.js +51 -51
- package/src/lib/markdown-report.js +360 -360
- package/src/lib/mock-server.js +218 -218
- package/src/lib/post-processor.js +322 -322
- package/src/lib/throttled-queue.js +129 -129
- package/src/postprocessing/annotate-links.js +41 -41
- package/src/postprocessing/csscomplete.js +48 -48
- package/src/postprocessing/idlnames.js +391 -391
- package/src/postprocessing/idlparsed.js +179 -179
- package/src/postprocessing/patch-dfns.js +51 -51
- package/src/specs/missing-css-rules.json +197 -197
- package/src/specs/spec-equivalents.json +149 -149
- package/src/browserlib/extract-editors.mjs~ +0 -14
- package/src/browserlib/extract-events.mjs~ +0 -3
- package/src/browserlib/generate-es-dfn-report.sh~ +0 -4
- package/src/browserlib/get-revision.mjs~ +0 -7
- package/src/cli/csstree-grammar-check.js +0 -28
- package/src/cli/csstree-grammar-check.js~ +0 -10
- package/src/cli/csstree-grammar-parser.js +0 -11
- package/src/cli/csstree-grammar-parser.js~ +0 -1
- package/src/cli/extract-editors.js~ +0 -38
- package/src/cli/process-specs.js~ +0 -28
- package/src/postprocessing/annotate-links.js~ +0 -8
- package/src/postprocessing/events.js~ +0 -245
|
@@ -1,308 +1,308 @@
|
|
|
1
|
-
import getGenerator from './get-generator.mjs';
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* Extract the list of references from the "References" appendix of the
|
|
6
|
-
* current document.
|
|
7
|
-
*
|
|
8
|
-
* Notes:
|
|
9
|
-
* - By definition, this function does not return the specifications that
|
|
10
|
-
* the current document references in the prose but failed to add to the
|
|
11
|
-
* "References" appendix.
|
|
12
|
-
* - The function throws when no references could be found
|
|
13
|
-
*
|
|
14
|
-
* @function
|
|
15
|
-
* @public
|
|
16
|
-
* @return {Object} An object with a "normative" and/or an "informative"
|
|
17
|
-
* property that list references as they appear in the "References".
|
|
18
|
-
*/
|
|
19
|
-
export default function () {
|
|
20
|
-
const generator = getGenerator();
|
|
21
|
-
const extractionRules = getExtractionRules(generator);
|
|
22
|
-
const references = extractReferences(extractionRules);
|
|
23
|
-
if (references?.normative.length || references?.informative.length) {
|
|
24
|
-
return references;
|
|
25
|
-
}
|
|
26
|
-
else {
|
|
27
|
-
return null;
|
|
28
|
-
}
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
/**
|
|
34
|
-
* Given the name of the generator used to create the document,
|
|
35
|
-
* return the rules to use to extract references.
|
|
36
|
-
*
|
|
37
|
-
* @function
|
|
38
|
-
* @private
|
|
39
|
-
* @param {String} generator The well-known generator used to create the doc,
|
|
40
|
-
* null if unknown
|
|
41
|
-
* @return {Object} Relevant extraction rules (or null if no rules seem to apply).
|
|
42
|
-
*/
|
|
43
|
-
function getExtractionRules(generator) {
|
|
44
|
-
const extractionRules = {
|
|
45
|
-
bikeshed: {
|
|
46
|
-
generator: "Bikeshed",
|
|
47
|
-
listSelector: {
|
|
48
|
-
normative: "#normative + dl",
|
|
49
|
-
informative: "#informative + dl"
|
|
50
|
-
}
|
|
51
|
-
},
|
|
52
|
-
respec: {
|
|
53
|
-
generator: "ReSpec",
|
|
54
|
-
listSelector: {
|
|
55
|
-
normative: "#normative-references > dl",
|
|
56
|
-
informative: "#informative-references > dl"
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
|
-
};
|
|
60
|
-
|
|
61
|
-
return (generator ? extractionRules[generator] : null);
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
/**
|
|
66
|
-
* Skip next siblings until another tag with the given name is found
|
|
67
|
-
*
|
|
68
|
-
* @function
|
|
69
|
-
* @private
|
|
70
|
-
* @param {Node} node The DOM node to use as starting point
|
|
71
|
-
* @param {String} name The sibling name to find, "heading" to match any heading
|
|
72
|
-
* @param {Node} until The optional DOM sibling at which to stop no matter what
|
|
73
|
-
* @return {Node} The next sibling with the given name, null if not found
|
|
74
|
-
*/
|
|
75
|
-
function nextTag(node, name, until) {
|
|
76
|
-
let nextEl = node.nextElementSibling;
|
|
77
|
-
const selector = name === "heading" ? "h1,h2,h3,h4,h5,h6,hgroup" : name;
|
|
78
|
-
while (nextEl && nextEl !== until && !nextEl.matches(selector)) {
|
|
79
|
-
nextEl = nextEl.nextElementSibling;
|
|
80
|
-
}
|
|
81
|
-
if (nextEl === until) {
|
|
82
|
-
nextEl = null;
|
|
83
|
-
}
|
|
84
|
-
return nextEl;
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
/**
|
|
89
|
-
* Given a markup definition list, parse and return the list of references
|
|
90
|
-
*
|
|
91
|
-
* @function
|
|
92
|
-
* @param {Node} referenceList The "dl" to parse
|
|
93
|
-
* @param {Object} options Parsing options, set "filterInformative" to put
|
|
94
|
-
* references flagged as "non-normative" to a separate returned list
|
|
95
|
-
* @return {Array} An array whose first item is the list of references and the
|
|
96
|
-
* second item the list of "non-normative" references (the second item is only
|
|
97
|
-
* set when "filterInformative" is set)
|
|
98
|
-
*/
|
|
99
|
-
function parseReferences(referenceList, options) {
|
|
100
|
-
var defaultRef = [], informativeRef = [];
|
|
101
|
-
options = options || {};
|
|
102
|
-
if (referenceList.tagName === "DL") {
|
|
103
|
-
[...referenceList.children]
|
|
104
|
-
.filter(child => child.tagName === "DT")
|
|
105
|
-
.forEach(function (dt) {
|
|
106
|
-
var ref = {};
|
|
107
|
-
ref.name = dt.textContent.replace(/[\[\] \n]/g, '');
|
|
108
|
-
var desc = nextTag(dt, "dd");
|
|
109
|
-
if (!desc || !ref.name) {
|
|
110
|
-
return;
|
|
111
|
-
}
|
|
112
|
-
const url = desc.querySelector('a[href*="://"]')?.href;
|
|
113
|
-
if (url) {
|
|
114
|
-
ref.url = url;
|
|
115
|
-
}
|
|
116
|
-
if (options.filterInformative &&
|
|
117
|
-
desc.textContent.match(/non-normative/i)) {
|
|
118
|
-
return informativeRef.push(ref);
|
|
119
|
-
}
|
|
120
|
-
defaultRef.push(ref);
|
|
121
|
-
});
|
|
122
|
-
}
|
|
123
|
-
else if (referenceList.tagName === "UL") {
|
|
124
|
-
[...referenceList.children]
|
|
125
|
-
.filter(child => child.tagName === "LI")
|
|
126
|
-
.forEach(function (li) {
|
|
127
|
-
// The ECMA-402 spec lists nests another list for more atomic
|
|
128
|
-
// references with "URLs in your face":
|
|
129
|
-
// https://tc39.es/ecma402/#normative-references
|
|
130
|
-
// Let's drop nested lists for now to avoid extracting noise
|
|
131
|
-
// (TODO: consider smarter code or creating an exception to the rule
|
|
132
|
-
// for ECMA-402)
|
|
133
|
-
li = li.cloneNode(true);
|
|
134
|
-
[...li.querySelectorAll("ul")].map(el => el.remove());
|
|
135
|
-
var anchor = li.querySelector("a[href]");
|
|
136
|
-
var ref = {};
|
|
137
|
-
if (anchor) {
|
|
138
|
-
ref.name = anchor.innerText.trim();
|
|
139
|
-
ref.url = anchor.getAttribute("href");
|
|
140
|
-
}
|
|
141
|
-
else {
|
|
142
|
-
ref.name = li.innerText.trim();
|
|
143
|
-
}
|
|
144
|
-
defaultRef.push(ref);
|
|
145
|
-
});
|
|
146
|
-
}
|
|
147
|
-
return [defaultRef, informativeRef];
|
|
148
|
-
};
|
|
149
|
-
|
|
150
|
-
const textMatch = re => n => n.textContent.match(re);
|
|
151
|
-
|
|
152
|
-
/**
|
|
153
|
-
* Extract references from generic documents that we could not associate with
|
|
154
|
-
* any particular set of extraction rules.
|
|
155
|
-
*
|
|
156
|
-
* @function
|
|
157
|
-
* @private
|
|
158
|
-
* @return {Object} A list of references.
|
|
159
|
-
*/
|
|
160
|
-
function extractReferencesWithoutRules() {
|
|
161
|
-
const references = {
|
|
162
|
-
normative: [],
|
|
163
|
-
informative: []
|
|
164
|
-
};
|
|
165
|
-
const anchors = [...document.querySelectorAll("h1, h2, h3")];
|
|
166
|
-
console.log('[reffy]', 'extract refs without rules');
|
|
167
|
-
|
|
168
|
-
// Custom logic for Source map format specification (ECMA-426)
|
|
169
|
-
// Looks for <emu-clause id="sec-references"> and its child clauses
|
|
170
|
-
for (const refType of ['normative', 'informative']) {
|
|
171
|
-
const clause = document.querySelector([
|
|
172
|
-
`emu-clause#sec-references-${refType}`,
|
|
173
|
-
`emu-clause#sec-${refType}-references`
|
|
174
|
-
].join(','));
|
|
175
|
-
if (clause) {
|
|
176
|
-
const refs = [];
|
|
177
|
-
clause.querySelectorAll('p').forEach(p => {
|
|
178
|
-
const ref = {};
|
|
179
|
-
const nameMatch = p.innerText.match(/(.+?)(,|\.)/m);
|
|
180
|
-
const name = nameMatch ? nameMatch[1].trim() : null;
|
|
181
|
-
const hasFullTitle = !!p.querySelector('i');
|
|
182
|
-
const anchor = p.querySelector('a[href]');
|
|
183
|
-
if (name && (anchor || hasFullTitle)) {
|
|
184
|
-
ref.name = name;
|
|
185
|
-
}
|
|
186
|
-
if (ref.name) {
|
|
187
|
-
if (anchor) {
|
|
188
|
-
const url = anchor.getAttribute('href');
|
|
189
|
-
if (url.match(/^https?:\/\//)) {
|
|
190
|
-
ref.url = url;
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
refs.push(ref);
|
|
194
|
-
}
|
|
195
|
-
});
|
|
196
|
-
references[refType] = refs;
|
|
197
|
-
}
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
// Look for a "Normative references" heading
|
|
201
|
-
const normative = anchors.findLast(
|
|
202
|
-
textMatch(/^\s*((\w|\d+)(\.\d+)*\.?)?\s*normative\s+references\s*$/i));
|
|
203
|
-
if (normative) {
|
|
204
|
-
console.log('[reffy]', 'normative references section found', normative.textContent);
|
|
205
|
-
const nextHeading = nextTag(normative, "heading");
|
|
206
|
-
let nList = nextTag(normative, "dl", nextHeading);
|
|
207
|
-
if (!nList) {
|
|
208
|
-
nList = nextTag(normative, "ul", nextHeading);
|
|
209
|
-
}
|
|
210
|
-
if (nList) {
|
|
211
|
-
references.normative = parseReferences(nList)[0];
|
|
212
|
-
}
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
// Look for an "Informative references" heading
|
|
216
|
-
const informative = anchors.findLast(
|
|
217
|
-
textMatch(/^\s*((\w|\d+)(\.\d+)*\.?)?\s*(informative|non-normative)\s+references\s*$/i));
|
|
218
|
-
if (informative) {
|
|
219
|
-
const nextHeading = nextTag(informative, "heading");
|
|
220
|
-
let iList = nextTag(informative, "dl", nextHeading);
|
|
221
|
-
if (!iList) {
|
|
222
|
-
iList = nextTag(informative, "ul", nextHeading);
|
|
223
|
-
}
|
|
224
|
-
if (iList) {
|
|
225
|
-
references.informative = parseReferences(iList)[0];
|
|
226
|
-
}
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
if (informative || normative) {
|
|
230
|
-
return references;
|
|
231
|
-
}
|
|
232
|
-
|
|
233
|
-
// Look for a generic "references" heading
|
|
234
|
-
const refHeading = anchors.findLast(textMatch(/references/i));
|
|
235
|
-
if (refHeading) {
|
|
236
|
-
const nextSection = nextTag(refHeading, refHeading.tagName);
|
|
237
|
-
const subHeadingLevel = "h" + (parseInt(refHeading.tagName.substring(1), 10) + 1);
|
|
238
|
-
let subHeading = refHeading;
|
|
239
|
-
while (subHeading = nextTag(subHeading, subHeadingLevel, nextSection)) {
|
|
240
|
-
if (subHeading.textContent.match(/normative/i) ||
|
|
241
|
-
subHeading.textContent.match(/informative/i)) {
|
|
242
|
-
let list = nextTag(subHeading, "dl", nextSection);
|
|
243
|
-
if (!list) {
|
|
244
|
-
list = nextTag(subHeading, "ul", nextSection);
|
|
245
|
-
}
|
|
246
|
-
if (list) {
|
|
247
|
-
const type = subHeading.textContent.match(/normative/i) ?
|
|
248
|
-
"normative" : "informative";
|
|
249
|
-
references[type] = parseReferences(list)[0];
|
|
250
|
-
}
|
|
251
|
-
}
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
if (references.normative.length === 0 &&
|
|
255
|
-
references.informative.length === 0) {
|
|
256
|
-
// No subheading, flat list of references
|
|
257
|
-
let list = nextTag(refHeading, "dl", nextSection);
|
|
258
|
-
if (!list) {
|
|
259
|
-
list = nextTag(refHeading, "ul", nextSection);
|
|
260
|
-
}
|
|
261
|
-
if (list) {
|
|
262
|
-
const refs = parseReferences(list, { filterInformative: true });
|
|
263
|
-
references.normative = refs[0];
|
|
264
|
-
references.informative = refs[1];
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
}
|
|
268
|
-
return references;
|
|
269
|
-
}
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
/**
|
|
273
|
-
* Extract references from the given document
|
|
274
|
-
*
|
|
275
|
-
* @function
|
|
276
|
-
* @private
|
|
277
|
-
* @param {Object} rules Extraction rules to use
|
|
278
|
-
* @return {Object} A list of references.
|
|
279
|
-
*/
|
|
280
|
-
function extractReferences(rules) {
|
|
281
|
-
if (!rules) {
|
|
282
|
-
return extractReferencesWithoutRules();
|
|
283
|
-
}
|
|
284
|
-
if (!rules.listSelector ||
|
|
285
|
-
!rules.listSelector.normative) {
|
|
286
|
-
throw new Error("Extraction rules for the list of references are incorrect");
|
|
287
|
-
}
|
|
288
|
-
const generator = rules.generator || "an unknown generator";
|
|
289
|
-
|
|
290
|
-
const references = {
|
|
291
|
-
normative: [],
|
|
292
|
-
informative: []
|
|
293
|
-
};
|
|
294
|
-
["normative", "informative"].forEach(function (referenceType) {
|
|
295
|
-
const referenceList = document.querySelector(rules.listSelector[referenceType]);
|
|
296
|
-
if (referenceList) {
|
|
297
|
-
const refs = parseReferences(referenceList, {
|
|
298
|
-
filterInformative: (referenceType === "normative")
|
|
299
|
-
});
|
|
300
|
-
references[referenceType] = references[referenceType].concat(refs[0]);
|
|
301
|
-
if (referenceType === "normative") {
|
|
302
|
-
references.informative = references.informative.concat(refs[1]);
|
|
303
|
-
}
|
|
304
|
-
}
|
|
305
|
-
});
|
|
306
|
-
|
|
307
|
-
return references;
|
|
308
|
-
}
|
|
1
|
+
import getGenerator from './get-generator.mjs';
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Extract the list of references from the "References" appendix of the
|
|
6
|
+
* current document.
|
|
7
|
+
*
|
|
8
|
+
* Notes:
|
|
9
|
+
* - By definition, this function does not return the specifications that
|
|
10
|
+
* the current document references in the prose but failed to add to the
|
|
11
|
+
* "References" appendix.
|
|
12
|
+
* - The function throws when no references could be found
|
|
13
|
+
*
|
|
14
|
+
* @function
|
|
15
|
+
* @public
|
|
16
|
+
* @return {Object} An object with a "normative" and/or an "informative"
|
|
17
|
+
* property that list references as they appear in the "References".
|
|
18
|
+
*/
|
|
19
|
+
export default function () {
|
|
20
|
+
const generator = getGenerator();
|
|
21
|
+
const extractionRules = getExtractionRules(generator);
|
|
22
|
+
const references = extractReferences(extractionRules);
|
|
23
|
+
if (references?.normative.length || references?.informative.length) {
|
|
24
|
+
return references;
|
|
25
|
+
}
|
|
26
|
+
else {
|
|
27
|
+
return null;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Given the name of the generator used to create the document,
|
|
35
|
+
* return the rules to use to extract references.
|
|
36
|
+
*
|
|
37
|
+
* @function
|
|
38
|
+
* @private
|
|
39
|
+
* @param {String} generator The well-known generator used to create the doc,
|
|
40
|
+
* null if unknown
|
|
41
|
+
* @return {Object} Relevant extraction rules (or null if no rules seem to apply).
|
|
42
|
+
*/
|
|
43
|
+
function getExtractionRules(generator) {
|
|
44
|
+
const extractionRules = {
|
|
45
|
+
bikeshed: {
|
|
46
|
+
generator: "Bikeshed",
|
|
47
|
+
listSelector: {
|
|
48
|
+
normative: "#normative + dl",
|
|
49
|
+
informative: "#informative + dl"
|
|
50
|
+
}
|
|
51
|
+
},
|
|
52
|
+
respec: {
|
|
53
|
+
generator: "ReSpec",
|
|
54
|
+
listSelector: {
|
|
55
|
+
normative: "#normative-references > dl",
|
|
56
|
+
informative: "#informative-references > dl"
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
return (generator ? extractionRules[generator] : null);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Skip next siblings until another tag with the given name is found
|
|
67
|
+
*
|
|
68
|
+
* @function
|
|
69
|
+
* @private
|
|
70
|
+
* @param {Node} node The DOM node to use as starting point
|
|
71
|
+
* @param {String} name The sibling name to find, "heading" to match any heading
|
|
72
|
+
* @param {Node} until The optional DOM sibling at which to stop no matter what
|
|
73
|
+
* @return {Node} The next sibling with the given name, null if not found
|
|
74
|
+
*/
|
|
75
|
+
function nextTag(node, name, until) {
|
|
76
|
+
let nextEl = node.nextElementSibling;
|
|
77
|
+
const selector = name === "heading" ? "h1,h2,h3,h4,h5,h6,hgroup" : name;
|
|
78
|
+
while (nextEl && nextEl !== until && !nextEl.matches(selector)) {
|
|
79
|
+
nextEl = nextEl.nextElementSibling;
|
|
80
|
+
}
|
|
81
|
+
if (nextEl === until) {
|
|
82
|
+
nextEl = null;
|
|
83
|
+
}
|
|
84
|
+
return nextEl;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Given a markup definition list, parse and return the list of references
|
|
90
|
+
*
|
|
91
|
+
* @function
|
|
92
|
+
* @param {Node} referenceList The "dl" to parse
|
|
93
|
+
* @param {Object} options Parsing options, set "filterInformative" to put
|
|
94
|
+
* references flagged as "non-normative" to a separate returned list
|
|
95
|
+
* @return {Array} An array whose first item is the list of references and the
|
|
96
|
+
* second item the list of "non-normative" references (the second item is only
|
|
97
|
+
* set when "filterInformative" is set)
|
|
98
|
+
*/
|
|
99
|
+
function parseReferences(referenceList, options) {
|
|
100
|
+
var defaultRef = [], informativeRef = [];
|
|
101
|
+
options = options || {};
|
|
102
|
+
if (referenceList.tagName === "DL") {
|
|
103
|
+
[...referenceList.children]
|
|
104
|
+
.filter(child => child.tagName === "DT")
|
|
105
|
+
.forEach(function (dt) {
|
|
106
|
+
var ref = {};
|
|
107
|
+
ref.name = dt.textContent.replace(/[\[\] \n]/g, '');
|
|
108
|
+
var desc = nextTag(dt, "dd");
|
|
109
|
+
if (!desc || !ref.name) {
|
|
110
|
+
return;
|
|
111
|
+
}
|
|
112
|
+
const url = desc.querySelector('a[href*="://"]')?.href;
|
|
113
|
+
if (url) {
|
|
114
|
+
ref.url = url;
|
|
115
|
+
}
|
|
116
|
+
if (options.filterInformative &&
|
|
117
|
+
desc.textContent.match(/non-normative/i)) {
|
|
118
|
+
return informativeRef.push(ref);
|
|
119
|
+
}
|
|
120
|
+
defaultRef.push(ref);
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
else if (referenceList.tagName === "UL") {
|
|
124
|
+
[...referenceList.children]
|
|
125
|
+
.filter(child => child.tagName === "LI")
|
|
126
|
+
.forEach(function (li) {
|
|
127
|
+
// The ECMA-402 spec lists nests another list for more atomic
|
|
128
|
+
// references with "URLs in your face":
|
|
129
|
+
// https://tc39.es/ecma402/#normative-references
|
|
130
|
+
// Let's drop nested lists for now to avoid extracting noise
|
|
131
|
+
// (TODO: consider smarter code or creating an exception to the rule
|
|
132
|
+
// for ECMA-402)
|
|
133
|
+
li = li.cloneNode(true);
|
|
134
|
+
[...li.querySelectorAll("ul")].map(el => el.remove());
|
|
135
|
+
var anchor = li.querySelector("a[href]");
|
|
136
|
+
var ref = {};
|
|
137
|
+
if (anchor) {
|
|
138
|
+
ref.name = anchor.innerText.trim();
|
|
139
|
+
ref.url = anchor.getAttribute("href");
|
|
140
|
+
}
|
|
141
|
+
else {
|
|
142
|
+
ref.name = li.innerText.trim();
|
|
143
|
+
}
|
|
144
|
+
defaultRef.push(ref);
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
return [defaultRef, informativeRef];
|
|
148
|
+
};
|
|
149
|
+
|
|
150
|
+
const textMatch = re => n => n.textContent.match(re);
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* Extract references from generic documents that we could not associate with
|
|
154
|
+
* any particular set of extraction rules.
|
|
155
|
+
*
|
|
156
|
+
* @function
|
|
157
|
+
* @private
|
|
158
|
+
* @return {Object} A list of references.
|
|
159
|
+
*/
|
|
160
|
+
function extractReferencesWithoutRules() {
|
|
161
|
+
const references = {
|
|
162
|
+
normative: [],
|
|
163
|
+
informative: []
|
|
164
|
+
};
|
|
165
|
+
const anchors = [...document.querySelectorAll("h1, h2, h3")];
|
|
166
|
+
console.log('[reffy]', 'extract refs without rules');
|
|
167
|
+
|
|
168
|
+
// Custom logic for Source map format specification (ECMA-426)
|
|
169
|
+
// Looks for <emu-clause id="sec-references"> and its child clauses
|
|
170
|
+
for (const refType of ['normative', 'informative']) {
|
|
171
|
+
const clause = document.querySelector([
|
|
172
|
+
`emu-clause#sec-references-${refType}`,
|
|
173
|
+
`emu-clause#sec-${refType}-references`
|
|
174
|
+
].join(','));
|
|
175
|
+
if (clause) {
|
|
176
|
+
const refs = [];
|
|
177
|
+
clause.querySelectorAll('p').forEach(p => {
|
|
178
|
+
const ref = {};
|
|
179
|
+
const nameMatch = p.innerText.match(/(.+?)(,|\.)/m);
|
|
180
|
+
const name = nameMatch ? nameMatch[1].trim() : null;
|
|
181
|
+
const hasFullTitle = !!p.querySelector('i');
|
|
182
|
+
const anchor = p.querySelector('a[href]');
|
|
183
|
+
if (name && (anchor || hasFullTitle)) {
|
|
184
|
+
ref.name = name;
|
|
185
|
+
}
|
|
186
|
+
if (ref.name) {
|
|
187
|
+
if (anchor) {
|
|
188
|
+
const url = anchor.getAttribute('href');
|
|
189
|
+
if (url.match(/^https?:\/\//)) {
|
|
190
|
+
ref.url = url;
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
refs.push(ref);
|
|
194
|
+
}
|
|
195
|
+
});
|
|
196
|
+
references[refType] = refs;
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// Look for a "Normative references" heading
|
|
201
|
+
const normative = anchors.findLast(
|
|
202
|
+
textMatch(/^\s*((\w|\d+)(\.\d+)*\.?)?\s*normative\s+references\s*$/i));
|
|
203
|
+
if (normative) {
|
|
204
|
+
console.log('[reffy]', 'normative references section found', normative.textContent);
|
|
205
|
+
const nextHeading = nextTag(normative, "heading");
|
|
206
|
+
let nList = nextTag(normative, "dl", nextHeading);
|
|
207
|
+
if (!nList) {
|
|
208
|
+
nList = nextTag(normative, "ul", nextHeading);
|
|
209
|
+
}
|
|
210
|
+
if (nList) {
|
|
211
|
+
references.normative = parseReferences(nList)[0];
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// Look for an "Informative references" heading
|
|
216
|
+
const informative = anchors.findLast(
|
|
217
|
+
textMatch(/^\s*((\w|\d+)(\.\d+)*\.?)?\s*(informative|non-normative)\s+references\s*$/i));
|
|
218
|
+
if (informative) {
|
|
219
|
+
const nextHeading = nextTag(informative, "heading");
|
|
220
|
+
let iList = nextTag(informative, "dl", nextHeading);
|
|
221
|
+
if (!iList) {
|
|
222
|
+
iList = nextTag(informative, "ul", nextHeading);
|
|
223
|
+
}
|
|
224
|
+
if (iList) {
|
|
225
|
+
references.informative = parseReferences(iList)[0];
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
if (informative || normative) {
|
|
230
|
+
return references;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// Look for a generic "references" heading
|
|
234
|
+
const refHeading = anchors.findLast(textMatch(/references/i));
|
|
235
|
+
if (refHeading) {
|
|
236
|
+
const nextSection = nextTag(refHeading, refHeading.tagName);
|
|
237
|
+
const subHeadingLevel = "h" + (parseInt(refHeading.tagName.substring(1), 10) + 1);
|
|
238
|
+
let subHeading = refHeading;
|
|
239
|
+
while (subHeading = nextTag(subHeading, subHeadingLevel, nextSection)) {
|
|
240
|
+
if (subHeading.textContent.match(/normative/i) ||
|
|
241
|
+
subHeading.textContent.match(/informative/i)) {
|
|
242
|
+
let list = nextTag(subHeading, "dl", nextSection);
|
|
243
|
+
if (!list) {
|
|
244
|
+
list = nextTag(subHeading, "ul", nextSection);
|
|
245
|
+
}
|
|
246
|
+
if (list) {
|
|
247
|
+
const type = subHeading.textContent.match(/normative/i) ?
|
|
248
|
+
"normative" : "informative";
|
|
249
|
+
references[type] = parseReferences(list)[0];
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
if (references.normative.length === 0 &&
|
|
255
|
+
references.informative.length === 0) {
|
|
256
|
+
// No subheading, flat list of references
|
|
257
|
+
let list = nextTag(refHeading, "dl", nextSection);
|
|
258
|
+
if (!list) {
|
|
259
|
+
list = nextTag(refHeading, "ul", nextSection);
|
|
260
|
+
}
|
|
261
|
+
if (list) {
|
|
262
|
+
const refs = parseReferences(list, { filterInformative: true });
|
|
263
|
+
references.normative = refs[0];
|
|
264
|
+
references.informative = refs[1];
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
return references;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* Extract references from the given document
|
|
274
|
+
*
|
|
275
|
+
* @function
|
|
276
|
+
* @private
|
|
277
|
+
* @param {Object} rules Extraction rules to use
|
|
278
|
+
* @return {Object} A list of references.
|
|
279
|
+
*/
|
|
280
|
+
function extractReferences(rules) {
|
|
281
|
+
if (!rules) {
|
|
282
|
+
return extractReferencesWithoutRules();
|
|
283
|
+
}
|
|
284
|
+
if (!rules.listSelector ||
|
|
285
|
+
!rules.listSelector.normative) {
|
|
286
|
+
throw new Error("Extraction rules for the list of references are incorrect");
|
|
287
|
+
}
|
|
288
|
+
const generator = rules.generator || "an unknown generator";
|
|
289
|
+
|
|
290
|
+
const references = {
|
|
291
|
+
normative: [],
|
|
292
|
+
informative: []
|
|
293
|
+
};
|
|
294
|
+
["normative", "informative"].forEach(function (referenceType) {
|
|
295
|
+
const referenceList = document.querySelector(rules.listSelector[referenceType]);
|
|
296
|
+
if (referenceList) {
|
|
297
|
+
const refs = parseReferences(referenceList, {
|
|
298
|
+
filterInformative: (referenceType === "normative")
|
|
299
|
+
});
|
|
300
|
+
references[referenceType] = references[referenceType].concat(refs[0]);
|
|
301
|
+
if (referenceType === "normative") {
|
|
302
|
+
references.informative = references.informative.concat(refs[1]);
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
});
|
|
306
|
+
|
|
307
|
+
return references;
|
|
308
|
+
}
|