reffy 12.0.0 → 12.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "reffy",
3
- "version": "12.0.0",
3
+ "version": "12.1.0",
4
4
  "description": "W3C/WHATWG spec dependencies exploration companion. Features a short set of tools to study spec references as well as WebIDL term definitions and references found in W3C specifications.",
5
5
  "repository": {
6
6
  "type": "git",
@@ -37,9 +37,9 @@
37
37
  "ajv-formats": "2.1.1",
38
38
  "commander": "10.0.0",
39
39
  "fetch-filecache-for-crawling": "4.1.0",
40
- "puppeteer": "19.7.2",
40
+ "puppeteer": "19.7.3",
41
41
  "semver": "^7.3.5",
42
- "web-specs": "2.50.0",
42
+ "web-specs": "2.51.0",
43
43
  "webidl2": "24.2.2"
44
44
  },
45
45
  "devDependencies": {
@@ -48,7 +48,7 @@
48
48
  "nock": "13.3.0",
49
49
  "respec": "32.7.1",
50
50
  "respec-hljs": "2.1.1",
51
- "rollup": "3.17.2"
51
+ "rollup": "3.18.0"
52
52
  },
53
53
  "scripts": {
54
54
  "test": "mocha --recursive tests/"
@@ -8,7 +8,7 @@
8
8
  * As a by-product of generating the outline, the function also generates a
9
9
  * mapping between elements and the (conceptual) section that contains them in
10
10
  * the outline. To save memory, this mapping is only done for elements that have
11
- * an ID.
11
+ * an ID (or a "name" attribute).
12
12
  *
13
13
  * Both the outline and the mapping are returned.
14
14
  */
@@ -307,8 +307,9 @@ export default function (root) {
307
307
  // In addition, whenever the walk exits a node, after doing the steps above,
308
308
  // if the node is not associated with a section yet, associate the node with
309
309
  // the section current section.
310
- // (we will only do that for elements that have an ID)
311
- if (node.getAttribute('id') && !nodeToSection.has(node)) {
310
+ // (we will only do that for elements that have an ID or a "name" attribute)
311
+ if ((node.getAttribute('id') || node.getAttribute('name')) &&
312
+ !nodeToSection.has(node)) {
312
313
  nodeToSection.set(node, currentSection);
313
314
  }
314
315
  }
@@ -333,6 +333,14 @@ const dfnLabel2Property = label => label.trim()
333
333
  .replace(/^newValue$/, 'newValues');
334
334
 
335
335
 
336
+ /**
337
+ * Selector to use to exclude inner blocks that list tests, references and/or
338
+ * link to implementation statuses, which would provide too much detailed info
339
+ * in prose content.
340
+ */
341
+ const asideSelector = 'aside, .mdn-anno, .wpt-tests-block';
342
+
343
+
336
344
  /**
337
345
  * Extract a CSS definition from a table
338
346
  *
@@ -343,7 +351,7 @@ const extractTableDfn = table => {
343
351
  const lines = [...table.querySelectorAll('tr')]
344
352
  .map(line => {
345
353
  const cleanedLine = line.cloneNode(true);
346
- const annotations = cleanedLine.querySelectorAll("aside, .mdn-anno");
354
+ const annotations = cleanedLine.querySelectorAll(asideSelector);
347
355
  annotations.forEach(n => n.remove());
348
356
  return {
349
357
  name: dfnLabel2Property(cleanedLine.querySelector(':first-child').textContent),
@@ -580,7 +588,7 @@ const extractTypedDfn = dfn => {
580
588
  // and remove MDN annotations as well
581
589
  [...parent.querySelectorAll('sup')]
582
590
  .map(sup => sup.parentNode.removeChild(sup));
583
- [...parent.querySelectorAll('aside, .mdn-anno')]
591
+ [...parent.querySelectorAll(asideSelector)]
584
592
  .map(annotation => annotation.parentNode.removeChild(annotation));
585
593
 
586
594
  const text = parent.textContent.trim();
@@ -655,7 +663,7 @@ const extractTypedDfn = dfn => {
655
663
  });
656
664
  [...dd.querySelectorAll('sup')]
657
665
  .map(sup => sup.parentNode.removeChild(sup));
658
- [...dd.querySelectorAll('aside, .mdn-anno')]
666
+ [...dd.querySelectorAll(asideSelector)]
659
667
  .map(annotation => annotation.parentNode.removeChild(annotation));
660
668
 
661
669
  res = {
@@ -711,7 +719,7 @@ const extractProductionRules = root => {
711
719
  .filter(el => !el.closest(informativeSelector))
712
720
  .map(el => el.cloneNode(true))
713
721
  .map(el => {
714
- [...el.querySelectorAll('aside, .mdn-anno')]
722
+ [...el.querySelectorAll(asideSelector)]
715
723
  .map(aside => aside.parentNode.removeChild(aside));
716
724
  return el;
717
725
  })
@@ -226,6 +226,9 @@ export default function (spec, idToHeading = {}) {
226
226
 
227
227
  const shortname = (typeof spec === 'string') ? spec : spec.shortname;
228
228
  switch (shortname) {
229
+ case "CSS2":
230
+ preProcessCSS21();
231
+ break;
229
232
  case "html":
230
233
  preProcessHTML();
231
234
  break;
@@ -667,6 +670,52 @@ function preProcessHTML() {
667
670
  });
668
671
  }
669
672
 
673
+ function preProcessCSS21() {
674
+ document.querySelectorAll('span.index-def')
675
+ .forEach(span => {
676
+ // Definition ID is to be found in a nearby anchor
677
+ const anchor = span.querySelector('a[name]') ?? span.closest('a[name]');
678
+ if (!anchor) {
679
+ return;
680
+ }
681
+
682
+ // Once in a while, definition has a "<dfn>", and once in a while, that
683
+ // "<dfn>" already follows the dfn data model.
684
+ let dfn = span.querySelector('dfn') ?? span.closest('dfn');
685
+ if (dfn?.id) {
686
+ return;
687
+ }
688
+
689
+ // No "<dfn>"? Let's create it
690
+ if (!dfn) {
691
+ dfn = document.createElement('dfn');
692
+ for (let child of [...span.childNodes]) {
693
+ dfn.appendChild(child);
694
+ }
695
+ span.appendChild(dfn);
696
+ }
697
+
698
+ // Complete the "<dfn>" with expected attributes
699
+ dfn.id = anchor.getAttribute('name');
700
+ dfn.dataset.export = '';
701
+ if (span.getAttribute('title')) {
702
+ dfn.dataset.lt = span.getAttribute('title');
703
+ }
704
+ let dfnType = null;
705
+ switch (anchor.getAttribute('class') ?? '') {
706
+ case 'propdef-title':
707
+ dfnType = 'property';
708
+ break;
709
+ case 'value-def':
710
+ dfnType = 'value';
711
+ break;
712
+ }
713
+ if (dfnType) {
714
+ dfn.dataset.dfnType = dfnType;
715
+ }
716
+ });
717
+ }
718
+
670
719
  function preProcessSVG2() {
671
720
  const idl = extractWebIdl();
672
721
  const idlTree = parse(idl);
@@ -19,7 +19,13 @@ export default function (spec, idToHeading) {
19
19
  number: headingNumber
20
20
  };
21
21
  });
22
- return esHeadings.concat([...document.querySelectorAll('h1[id], h2[id], h3[id], h4[id], h5[id] ,h6[id]')].map(n => {
22
+
23
+ const headingsSelector = [
24
+ ':is(h1,h2,h3,h4,h5,h6)[id]', // Regular headings
25
+ ':is(h1,h2,h3,h4,h5,h6):not([id]) > a[name]' // CSS 2.1 headings
26
+ ].join(',');
27
+
28
+ return esHeadings.concat([...document.querySelectorAll(headingsSelector)].map(n => {
23
29
  // Note: In theory, all <hX> heading elements that have an ID are associated
24
30
  // with a heading in idToHeading. One exception to the rule: when the
25
31
  // heading element appears in a <hgroup> element, the mapping is not
@@ -27,9 +33,11 @@ export default function (spec, idToHeading) {
27
33
  // headings not to create a mess in the outline). In practice, this only
28
34
  // really happens so far for WHATWG spec titles that (correctly) group the
29
35
  // title and subtitle headings in a <hgroup>.
30
- const href = getAbsoluteUrl(n, { singlePage });
36
+ const idAttr = n.hasAttribute('id') ? 'id' : 'name';
37
+ const headingEl = n.hasAttribute('id') ? n : n.parentNode;
38
+ const href = getAbsoluteUrl(n, { singlePage, attribute: idAttr });
31
39
  const heading = idToHeading[href] || {
32
- id: n.id,
40
+ id: n.getAttribute(idAttr),
33
41
  href,
34
42
  title: n.textContent.trim()
35
43
  };
@@ -37,7 +45,7 @@ export default function (spec, idToHeading) {
37
45
  const res = {
38
46
  id: heading.id,
39
47
  href: heading.href,
40
- level: parseInt(n.tagName.slice(1), 10),
48
+ level: parseInt(headingEl.tagName.slice(1), 10),
41
49
  title: heading.title
42
50
  };
43
51
  if (heading.number) {
@@ -2,14 +2,33 @@ import createOutline from './create-outline.mjs';
2
2
  import getAbsoluteUrl from './get-absolute-url.mjs';
3
3
 
4
4
  // Regular expression to capture the numbering of a heading. The expression
5
- // extracts numbers such as "1.", "A.", "A.3", "13.3.4.". Note: a top-level
6
- // number always ends with a ".", but there may be no final "." in sublevels
7
- // (Bikeshed adds one, ReSpec does not).
8
- const reNumber = /^([A-Z0-9]\.|[A-Z](\.[0-9]+)+\.?|[0-9]+(\.[0-9]+)+\.?)\s/;
5
+ // extracts numbers such as "1.", "A.", "A.3", "13.3.4.". Notes:
6
+ // - A top-level number always ends with a ".", except in CSS 2.1, some IETF RFCs
7
+ // and WebGL specs.
8
+ // - There may be no final "." in sublevels (Bikeshed adds one, not ReSpec)
9
+ // - Top level appendices (e.g. in CSS 2.1, IETF RFCs and Bikeshed specs) start
10
+ // with "Appendix", sometimes followed by ":"
11
+ const reNumber = /^([A-Z\d]\.|[A-Z](\.\d+)+\.?|\d+(\.\d+)+\.?|\d|Appendix [A-Z][\.:])\s/;
9
12
 
10
13
  /**
11
- * Generate a mapping between elements that have an ID and the closest heading
12
- * (that also has an ID) under which these elements appear in the DOM tree.
14
+ * Retrieve a "cleaned" version of the node's text content, without aside notes
15
+ * such as links to tests, MDN or references.
16
+ *
17
+ * Note that this is mainly intended for CSS Color 3, which has test annotations
18
+ * within headings.
19
+ */
20
+ function getCleanTextContent(node) {
21
+ const asideSelector = 'aside, .mdn-anno, .wpt-tests-block, .annotation';
22
+ const cleanedNode = node.cloneNode(true);
23
+ const annotations = cleanedNode.querySelectorAll(asideSelector);
24
+ annotations.forEach(n => n.remove());
25
+ return cleanedNode.textContent.trim().replace(/\s+/g, ' ');
26
+ }
27
+
28
+ /**
29
+ * Generate a mapping between elements that have an ID (or a "name") and the
30
+ * closest heading (that also has an ID) under which these elements appear in
31
+ * the DOM tree.
13
32
  *
14
33
  * The main difficulty is that the structure of a DOM tree does not necessarily
15
34
  * follow the structure of the outline of the document, which means that there
@@ -48,7 +67,7 @@ export default function () {
48
67
  const singlePage = !document.querySelector('[data-reffy-page]');
49
68
 
50
69
  const mappingTable = {};
51
- [...document.querySelectorAll('[id]')].forEach(node => {
70
+ [...document.querySelectorAll('[id],[name]')].forEach(node => {
52
71
  let parentSection = nodeToSection.get(node);
53
72
  while (parentSection) {
54
73
  if (parentSection.heading !== '__implied') {
@@ -62,20 +81,32 @@ export default function () {
62
81
  // Compute the absolute URL with fragment
63
82
  // (Note the crawler merges pages of a multi-page spec in the first page
64
83
  // to ease parsing logic, and we want to get back to the URL of the page)
65
- const nodeid = getAbsoluteUrl(node, { singlePage });
84
+ const idAttr = node.hasAttribute('id') ? 'id' : 'name';
85
+ const nodeid = getAbsoluteUrl(node, { singlePage, attribute: idAttr });
66
86
  let href = nodeid;
67
87
 
68
88
  if (parentSection) {
89
+ let id;
90
+
69
91
  const heading = parentSection.heading;
70
- let id = heading.id;
71
- href = getAbsoluteUrl(heading, { singlePage });
92
+ if (heading.hasAttribute('id')) {
93
+ id = heading.id;
94
+ href = getAbsoluteUrl(heading, { singlePage });
95
+ }
96
+ else {
97
+ const anchor = heading.querySelector('a[name]');
98
+ if (anchor) {
99
+ id = anchor.getAttribute('name');
100
+ href = getAbsoluteUrl(anchor, { singlePage, attribute: 'name' });
101
+ }
102
+ }
72
103
 
73
104
  if (parentSection.root && parentSection.root.hasAttribute('id')) {
74
105
  id = parentSection.root.id;
75
106
  href = getAbsoluteUrl(parentSection.root, { singlePage });
76
107
  }
77
108
 
78
- const trimmedText = heading.textContent.trim();
109
+ const trimmedText = getCleanTextContent(heading);
79
110
  const match = trimmedText.match(reNumber);
80
111
  const number = match ? match[1] : null;
81
112
 
@@ -84,12 +115,13 @@ export default function () {
84
115
  mapping.id = id;
85
116
  }
86
117
  mapping.href = href;
87
- mapping.title = trimmedText.replace(reNumber, '').trim().replace(/\s+/g, ' ');
118
+ mapping.title = trimmedText.replace(reNumber, '');
88
119
  mappingTable[nodeid] = mapping;
89
120
 
90
121
  if (number) {
91
- // Store the number without the final "."
92
- mappingTable[nodeid].number = number.replace(/\.$/, '');
122
+ // Store the number without the final "." or ":"
123
+ // (and without the "Appendix" prefix in the CSS 2.1 case)
124
+ mappingTable[nodeid].number = number.replace(/[\.:]$/, '').replace(/^Appendix /, '');
93
125
  }
94
126
  }
95
127
  });
@@ -115,7 +147,7 @@ function esMapIdToHeadings() {
115
147
  if (!section) return;
116
148
 
117
149
  const heading = section.querySelector("h1");
118
- const trimmedText = heading.textContent.trim();
150
+ const trimmedText = getCleanTextContent(heading);
119
151
  const nodeid = getAbsoluteUrl(el, { singlePage });
120
152
  const href = getAbsoluteUrl(section, { singlePage });
121
153
 
@@ -127,7 +159,7 @@ function esMapIdToHeadings() {
127
159
  mapping.id = section.id;
128
160
  }
129
161
  mapping.href = href;
130
- mapping.title = trimmedText.replace(reNumber, '').trim().replace(/\s+/g, ' ');
162
+ mapping.title = trimmedText.replace(reNumber, '');
131
163
  mappingTable[nodeid] = mapping;
132
164
 
133
165
  if (number) {