reffy 18.0.1 → 18.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "reffy",
3
- "version": "18.0.1",
3
+ "version": "18.1.1",
4
4
  "description": "W3C/WHATWG spec dependencies exploration companion. Features a short set of tools to study spec references as well as WebIDL term definitions and references found in W3C specifications.",
5
5
  "repository": {
6
6
  "type": "git",
@@ -37,14 +37,14 @@
37
37
  "ajv-formats": "3.0.1",
38
38
  "commander": "12.1.0",
39
39
  "fetch-filecache-for-crawling": "5.1.1",
40
- "puppeteer": "23.10.1",
40
+ "puppeteer": "23.10.4",
41
41
  "semver": "^7.3.5",
42
- "web-specs": "3.29.0",
42
+ "web-specs": "3.30.0",
43
43
  "webidl2": "24.4.1"
44
44
  },
45
45
  "devDependencies": {
46
46
  "mocha": "11.0.1",
47
- "respec": "35.2.0",
47
+ "respec": "35.2.1",
48
48
  "respec-hljs": "2.1.1",
49
49
  "rollup": "4.28.1",
50
50
  "undici": "^7.0.0"
@@ -0,0 +1,125 @@
1
+ import getCodeElements from './get-code-elements.mjs';
2
+ import trimSpaces from './trim-spaces.mjs';
3
+
4
+ /**
5
+ * Extract the list of CDDL definitions in the current spec.
6
+ *
7
+ * A spec may define more that one CDDL module. For example, the WebDriver BiDi
8
+ * spec has CDDL definitions that apply to either of both the local end and the
9
+ * remote end. The functions returns an array that lists all CDDL modules.
10
+ *
11
+ * Each CDDL module is represented as an object with the following keys whose
12
+ * values are strings:
13
+ * - shortname: the CDDL module shortname. Shortname is "" if the spec does not
14
+ * define any module, and "all" for the dump of all CDDL definitions.
15
+ * - label: A full name for the CDDL module, when defined.
16
+ * - cddl: A dump of the CDDL definitions.
17
+ *
18
+ * If the spec defines more than one module, the first item in the array is the
19
+ * "all" module that contains a dump of all CDDL definitions, regardless of the
20
+ * module they are actually defined for (the assumption is that looking at the
21
+ * union of all CDDL modules defined in a spec will always make sense, and that
22
+ * a spec will never reuse the same rule name with a different definition for
23
+ * different CDDL modules).
24
+ *
25
+ * @function
26
+ * @public
27
+ * @return {Array} A dump of the CDDL definitions per CDDL module, or an empty
28
+ * array if the spec does not contain any CDDL.
29
+ */
30
+ export default function () {
31
+ // Specs with CDDL are either recent enough that they all use the same
32
+ // `<pre class="cddl">` convention, or they don't flag CDDL blocks in any
33
+ // way, making it impossible to extract them.
34
+ const cddlSelectors = ['pre.cddl:not(.exclude):not(.extract)'];
35
+ const excludeSelectors = ['#cddl-index'];
36
+
37
+ // Retrieve all elements that contains CDDL content
38
+ const cddlEls = getCodeElements(cddlSelectors, { excludeSelectors });
39
+
40
+ // Start by assembling the list of modules
41
+ const modules = {};
42
+ for (const el of cddlEls) {
43
+ const elModules = getModules(el);
44
+ for (const name of elModules) {
45
+ // "all" does not create a module on its own, that's the name of
46
+ // the CDDL module that contains all CDDL definitions.
47
+ if (name !== 'all') {
48
+ modules[name] = [];
49
+ }
50
+ }
51
+ }
52
+
53
+ // Assemble the CDDL per module
54
+ const mergedCddl = [];
55
+ for (const el of cddlEls) {
56
+ const cddl = trimSpaces(el.textContent);
57
+ if (!cddl) {
58
+ continue;
59
+ }
60
+ // All CDDL appears in the "all" module.
61
+ mergedCddl.push(cddl);
62
+ let elModules = getModules(el);
63
+ if (elModules.length === 0) {
64
+ // No module means the CDDL is defined for all modules
65
+ elModules = Object.keys(modules);
66
+ }
67
+ for (const name of elModules) {
68
+ // CDDL defined for the "all" module is only defined for it
69
+ if (name !== 'all') {
70
+ if (!modules[name]) {
71
+ modules[name] = [];
72
+ }
73
+ modules[name].push(cddl);
74
+ }
75
+ }
76
+ }
77
+
78
+ if (mergedCddl.length === 0) {
79
+ return [];
80
+ }
81
+
82
+ const res = [{
83
+ name: Object.keys(modules).length > 0 ? 'all' : '',
84
+ cddl: mergedCddl.join('\n\n')
85
+ }];
86
+ for (const [name, cddl] of Object.entries(modules)) {
87
+ res.push({ name, cddl: cddl.join('\n\n') });
88
+ }
89
+ // Remove trailing spaces and use spaces throughout
90
+ for (const cddlModule of res) {
91
+ cddlModule.cddl = cddlModule.cddl
92
+ .replace(/\s+$/gm, '\n')
93
+ .replace(/\t/g, ' ')
94
+ .trim();
95
+ }
96
+ return res;
97
+ }
98
+
99
+
100
+ /**
101
+ * Retrieve the list of CDDL module shortnames that the element references.
102
+ *
103
+ * This list of modules is either specified in a `data-cddl-module` attribute
104
+ * or directly within the class attribute prefixed by `cddl-` or suffixed by
105
+ * `-cddl`.
106
+ */
107
+ function getModules(el) {
108
+ const moduleAttr = el.getAttribute('data-cddl-module');
109
+ if (moduleAttr) {
110
+ return moduleAttr.split(',').map(str => str.trim());
111
+ }
112
+
113
+ const list = [];
114
+ const classes = el.classList.values()
115
+ for (const name of classes) {
116
+ const match = name.match(/^(.*)-cddl$|^cddl-(.*)$/);
117
+ if (match) {
118
+ const shortname = match[1] ?? match[2];
119
+ if (!list.includes(shortname)) {
120
+ list.push(shortname);
121
+ }
122
+ }
123
+ }
124
+ return list;
125
+ }
@@ -1,14 +1,14 @@
1
1
  import getGenerator from './get-generator.mjs';
2
- import informativeSelector from './informative-selector.mjs';
3
- import cloneAndClean from './clone-and-clean.mjs';
2
+ import getCodeElements from './get-code-elements.mjs';
3
+ import trimSpaces from './trim-spaces.mjs';
4
4
 
5
5
  /**
6
6
  * Extract the list of WebIDL definitions in the current spec
7
7
  *
8
8
  * @function
9
9
  * @public
10
- * @return {Promise} The promise to get a dump of the IDL definitions, or
11
- * an empty string if the spec does not contain any IDL.
10
+ * @return {String} A dump of the IDL definitions, or an empty string if the
11
+ * spec does not contain any IDL.
12
12
  */
13
13
  export default function () {
14
14
  const generator = getGenerator();
@@ -70,56 +70,21 @@ function extractBikeshedIdl() {
70
70
  * sure that it only extracts elements once.
71
71
  */
72
72
  function extractRespecIdl() {
73
- // Helper function that trims individual lines in an IDL block,
74
- // removing as much space as possible from the beginning of the page
75
- // while preserving indentation. Rules followed:
76
- // - Always trim the first line
77
- // - Remove whitespaces from the end of each line
78
- // - Replace lines that contain spaces with empty lines
79
- // - Drop same number of leading whitespaces from all other lines
80
- const trimIdlSpaces = idl => {
81
- const lines = idl.trim().split('\n');
82
- const toRemove = lines
83
- .slice(1)
84
- .filter(line => line.search(/\S/) > -1)
85
- .reduce(
86
- (min, line) => Math.min(min, line.search(/\S/)),
87
- Number.MAX_VALUE);
88
- return lines
89
- .map(line => {
90
- let firstRealChat = line.search(/\S/);
91
- if (firstRealChat === -1) {
92
- return '';
93
- }
94
- else if (firstRealChat === 0) {
95
- return line.replace(/\s+$/, '');
96
- }
97
- else {
98
- return line.substring(toRemove).replace(/\s+$/, '');
99
- }
100
- })
101
- .join('\n');
102
- };
103
-
104
- // Detect the IDL index appendix if there's one (to exclude it)
105
- const idlEl = document.querySelector('#idl-index pre') ||
106
- document.querySelector('.chapter-idl pre'); // SVG 2 draft
107
-
108
- let idl = [
73
+ const idlSelectors = [
109
74
  'pre.idl:not(.exclude):not(.extract):not(#actual-idl-index)',
110
75
  'pre:not(.exclude):not(.extract) > code.idl-code:not(.exclude):not(.extract)',
111
76
  'pre:not(.exclude):not(.extract) > code.idl:not(.exclude):not(.extract)',
112
77
  'div.idl-code:not(.exclude):not(.extract) > pre:not(.exclude):not(.extract)',
113
78
  'pre.widl:not(.exclude):not(.extract)'
114
- ]
115
- .map(sel => [...document.querySelectorAll(sel)])
116
- .reduce((res, elements) => res.concat(elements), [])
117
- .filter(el => el !== idlEl)
118
- .filter((el, idx, self) => self.indexOf(el) === idx)
119
- .filter(el => !el.closest(informativeSelector))
120
- .map(cloneAndClean)
121
- .map(el => trimIdlSpaces(el.textContent))
122
- .join('\n\n');
79
+ ];
123
80
 
124
- return idl;
81
+ const excludeSelectors = [
82
+ '#idl-index',
83
+ '.chapter-idl'
84
+ ];
85
+
86
+ const idlElements = getCodeElements(idlSelectors, { excludeSelectors });
87
+ return idlElements
88
+ .map(el => trimSpaces(el.textContent))
89
+ .join('\n\n');
125
90
  }
@@ -0,0 +1,21 @@
1
+ import informativeSelector from './informative-selector.mjs';
2
+ import cloneAndClean from './clone-and-clean.mjs';
3
+
4
+ /**
5
+ * Helper function that returns a set of code elements in document order based
6
+ * on a given set of selectors, excluding elements that are within an index.
7
+ *
8
+ * The function excludes elements defined in informative sections.
9
+ *
10
+ * The code elements are cloned and cleaned before they are returned to strip
11
+ * annotations and other asides.
12
+ */
13
+ export default function getCodeElements(codeSelectors, { excludeSelectors = [] }) {
14
+ return [...document.querySelectorAll(codeSelectors.join(', '))]
15
+ // Skip excluded and elements and those in informative content
16
+ .filter(el => !el.closest(excludeSelectors.join(', ')))
17
+ .filter(el => !el.closest(informativeSelector))
18
+
19
+ // Clone and clean the elements
20
+ .map(cloneAndClean);
21
+ }
@@ -62,5 +62,9 @@
62
62
  "href": "./extract-ids.mjs",
63
63
  "property": "ids",
64
64
  "needsIdToHeadingMap": true
65
+ },
66
+ {
67
+ "href": "./extract-cddl.mjs",
68
+ "property": "cddl"
65
69
  }
66
70
  ]
@@ -0,0 +1,36 @@
1
+ /**
2
+ * Helper function that trims individual lines in a code block, removing as
3
+ * much space as possible from the beginning of the page while preserving
4
+ * indentation.
5
+ *
6
+ * Typically useful for CDDL and IDL extracts
7
+ *
8
+ * Rules followed:
9
+ * - Always trim the first line
10
+ * - Remove whitespaces from the end of each line
11
+ * - Replace lines that contain spaces with empty lines
12
+ * - Drop same number of leading whitespaces from all other lines
13
+ */
14
+ export default function trimSpaces(code) {
15
+ const lines = code.trim().split('\n');
16
+ const toRemove = lines
17
+ .slice(1)
18
+ .filter(line => line.search(/\S/) > -1)
19
+ .reduce(
20
+ (min, line) => Math.min(min, line.search(/\S/)),
21
+ Number.MAX_VALUE);
22
+ return lines
23
+ .map(line => {
24
+ let firstRealChar = line.search(/\S/);
25
+ if (firstRealChar === -1) {
26
+ return '';
27
+ }
28
+ else if (firstRealChar === 0) {
29
+ return line.replace(/\s+$/, '');
30
+ }
31
+ else {
32
+ return line.substring(toRemove).replace(/\s+$/, '');
33
+ }
34
+ })
35
+ .join('\n');
36
+ }
@@ -251,6 +251,29 @@ async function saveSpecResults(spec, settings) {
251
251
  return `css/${spec.shortname}.json`;
252
252
  };
253
253
 
254
+ async function saveCddl(spec) {
255
+ let cddlHeader = `
256
+ ; GENERATED CONTENT - DO NOT EDIT
257
+ ; Content was automatically extracted by Reffy into webref
258
+ ; (https://github.com/w3c/webref)
259
+ ; Source: ${spec.title} (${spec.crawled})`;
260
+ cddlHeader = cddlHeader.replace(/^\s+/gm, '').trim() + '\n\n';
261
+ const res = [];
262
+ for (const cddlModule of spec.cddl) {
263
+ const cddl = cddlHeader + cddlModule.cddl + '\n';
264
+ const filename = spec.shortname +
265
+ (cddlModule.name ? `-${cddlModule.name}` : '') +
266
+ '.cddl';
267
+ await fs.promises.writeFile(
268
+ path.join(folders.cddl, filename), cddl);
269
+ res.push({
270
+ name: cddlModule.name,
271
+ file: `cddl/${filename}`
272
+ });
273
+ }
274
+ return res;
275
+ };
276
+
254
277
  // Save IDL dumps
255
278
  if (spec.idl) {
256
279
  spec.idl = await saveIdl(spec);
@@ -283,9 +306,14 @@ async function saveSpecResults(spec, settings) {
283
306
  (typeof thing == 'object') && (Object.keys(thing).length === 0);
284
307
  }
285
308
 
309
+ // Save CDDL extracts (text files, multiple modules possible)
310
+ if (!isEmpty(spec.cddl)) {
311
+ spec.cddl = await saveCddl(spec);
312
+ }
313
+
286
314
  // Save all other extracts from crawling modules
287
315
  const remainingModules = modules.filter(mod =>
288
- !mod.metadata && mod.property !== 'css' && mod.property !== 'idl');
316
+ !mod.metadata && !['cddl', 'css', 'idl'].includes(mod.property));
289
317
  for (const mod of remainingModules) {
290
318
  await saveExtract(spec, mod.property, spec => !isEmpty(spec[mod.property]));
291
319
  }
package/src/lib/util.js CHANGED
@@ -796,6 +796,37 @@ async function expandSpecResult(spec, baseFolder, properties) {
796
796
  return;
797
797
  }
798
798
 
799
+ // Treat CDDL extracts separately, one spec may have multiple CDDL
800
+ // extracts (actual treatment is similar to IDL extracts otherwise)
801
+ let contents = null;
802
+ if (property === 'cddl') {
803
+ if (!spec[property]) {
804
+ return;
805
+ }
806
+ for (const cddlModule of spec[property]) {
807
+ if (!cddlModule.file) {
808
+ continue;
809
+ }
810
+ if (baseFolder.startsWith('https:')) {
811
+ const url = (new URL(cddlModule.file, baseFolder)).toString();
812
+ const response = await fetch(url, { nolog: true });
813
+ contents = await response.text();
814
+ }
815
+ else {
816
+ const filename = path.join(baseFolder, cddlModule.file);
817
+ contents = await fs.readFile(filename, 'utf8');
818
+ }
819
+ if (contents.startsWith('; GENERATED CONTENT - DO NOT EDIT')) {
820
+ // Normalize newlines to avoid off-by-one slices when we remove
821
+ // the trailing newline that was added by saveCddl
822
+ contents = contents.replace(/\r/g, '');
823
+ const endOfHeader = contents.indexOf('\n\n');
824
+ contents = contents.substring(endOfHeader + 2).slice(0, -1);
825
+ }
826
+ cddlModule.cddl = contents;
827
+ }
828
+ }
829
+
799
830
  // Only consider properties that link to an extract, i.e. an IDL
800
831
  // or JSON file in subfolder.
801
832
  if (!spec[property] ||
@@ -803,7 +834,6 @@ async function expandSpecResult(spec, baseFolder, properties) {
803
834
  !spec[property].match(/^[^\/]+\/[^\/]+\.(json|idl)$/)) {
804
835
  return;
805
836
  }
806
- let contents = null;
807
837
  if (baseFolder.startsWith('https:')) {
808
838
  const url = (new URL(spec[property], baseFolder)).toString();
809
839
  const response = await fetch(url, { nolog: true });