reffy 18.4.7 → 18.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "reffy",
3
- "version": "18.4.7",
3
+ "version": "18.6.0",
4
4
  "description": "W3C/WHATWG spec dependencies exploration companion. Features a short set of tools to study spec references as well as WebIDL term definitions and references found in W3C specifications.",
5
5
  "repository": {
6
6
  "type": "git",
@@ -37,7 +37,7 @@
37
37
  "ajv-formats": "3.0.1",
38
38
  "commander": "13.1.0",
39
39
  "fetch-filecache-for-crawling": "5.1.1",
40
- "puppeteer": "24.6.0",
40
+ "puppeteer": "24.6.1",
41
41
  "semver": "^7.3.5",
42
42
  "web-specs": "3.46.0",
43
43
  "webidl2": "24.4.1"
@@ -46,7 +46,7 @@
46
46
  "mocha": "11.1.0",
47
47
  "respec": "35.3.0",
48
48
  "respec-hljs": "2.1.1",
49
- "rollup": "4.39.0",
49
+ "rollup": "4.40.0",
50
50
  "undici": "^7.0.0"
51
51
  },
52
52
  "overrides": {
package/reffy.js CHANGED
@@ -84,12 +84,14 @@ program
84
84
  .description('Crawls and processes a list of Web specifications')
85
85
  .option('-d, --debug', 'debug mode, crawl one spec at a time')
86
86
  .option('-f, --fallback <json>', 'fallback data to use when a spec crawl fails')
87
+ .option('--md, --markdown', 'output a Markdown report')
87
88
  .option('-m, --module <modules...>', 'spec processing modules')
88
89
  .option('-o, --output <folder>', 'existing folder/file where crawl results are to be saved')
89
90
  .option('-p, --post <modules...>', 'post-processing modules')
90
91
  .option('-q, --quiet', 'do not report progress and other warnings to the console')
91
92
  .option('-r, --release', 'crawl release (TR) version of specs')
92
93
  .option('-s, --spec <specs...>', 'specs to crawl')
94
+ .option('--summary', 'include a crawl summary in Markdown for each spec')
93
95
  .option('-t, --terse', 'output crawl results without metadata')
94
96
  .option('-u, --use-crawl <folder>', 'use given crawl result folder as input for post-processing')
95
97
  .action(async options => {
@@ -109,9 +111,11 @@ will dump ~100MB of data to the console:
109
111
  const crawlOptions = {
110
112
  debug: options.debug,
111
113
  fallback: options.fallback,
114
+ markdown: options.markdown,
112
115
  output: options.output,
113
116
  publishedVersion: options.release,
114
117
  quiet: options.quiet,
118
+ summary: options.summary,
115
119
  terse: options.terse,
116
120
  useCrawl: options.useCrawl
117
121
  };
@@ -177,6 +181,10 @@ Usage notes for some of the options:
177
181
 
178
182
  The "error" property is set on specs for which fallback data was used.
179
183
 
184
+ --md, --markdown
185
+ Output a crawl summary in Markdown instead of a JSON report. The option takes
186
+ precedence over the \`--output\` option.
187
+
180
188
  -m, --module <modules...>
181
189
  If processing modules are not specified, the crawler runs all core processing
182
190
  modules defined in:
@@ -286,6 +294,13 @@ Usage notes for some of the options:
286
294
  spec, run:
287
295
  $ reffy -o reports/test -s all DOM-Level-2-Style
288
296
 
297
+ --summary
298
+ Tells Reffy to attach a Markdown summary of the crawl per spec to the JSON
299
+ report, in a \`crawlSummary\` property. The Markdown report is suitable for
300
+ inclusion in a GitHub issue or similar. It starts with a summary, and then
301
+ details a few noteworthy extracts (CSS, dfns, Web IDL) in expandable
302
+ sections, with links to the online xref database search where appropriate.
303
+
289
304
  -t, --terse
290
305
  This flag cannot be combined with the --output option and cannot be set if
291
306
  more than one processing module gets run. When set, the crawler writes the
@@ -35,6 +35,25 @@
35
35
  "items": {
36
36
  "type": "object"
37
37
  }
38
+ },
39
+
40
+ "post": {
41
+ "type": "array",
42
+ "items": {
43
+ "type": "object",
44
+ "properties": {
45
+ "mod": {
46
+ "type": "string"
47
+ },
48
+ "result": {
49
+ "oneOf": [
50
+ { "type": "object" },
51
+ { "type": "array" }
52
+ ]
53
+ },
54
+ "additionalProperties": false
55
+ }
56
+ }
38
57
  }
39
58
  }
40
59
  }
@@ -1,69 +1,84 @@
1
1
  [
2
2
  {
3
+ "label": "Title",
3
4
  "href": "./get-title.mjs",
4
5
  "property": "title",
5
6
  "metadata": true
6
7
  },
7
8
  {
9
+ "label": "Authoring tool",
8
10
  "href": "./get-generator.mjs",
9
11
  "property": "generator",
10
12
  "metadata": true
11
13
  },
12
14
  {
15
+ "label": "Date",
13
16
  "href": "./get-lastmodified-date.mjs",
14
17
  "property": "date",
15
18
  "metadata": true
16
19
  },
17
20
  {
21
+ "label": "Revision ID",
18
22
  "href": "./get-revision.mjs",
19
23
  "property": "revision",
20
24
  "metadata": true
21
25
  },
22
26
  {
27
+ "label": "Algorithms",
23
28
  "href": "./extract-algorithms.mjs",
24
29
  "property": "algorithms"
25
30
  },
26
31
  {
32
+ "label": "Links",
27
33
  "href": "./extract-links.mjs",
28
34
  "property": "links"
29
35
  },
30
36
  {
37
+ "label": "References",
31
38
  "href": "./extract-references.mjs",
32
39
  "property": "refs"
33
40
  },
34
41
  {
42
+ "label": "Events",
35
43
  "href": "./extract-events.mjs",
36
44
  "property": "events"
37
45
  },
38
46
  {
47
+ "label": "Web IDL",
39
48
  "href": "./extract-webidl.mjs",
40
49
  "property": "idl",
41
50
  "extractsPerSeries": true
42
51
  },
43
52
  {
53
+ "label": "CSS",
44
54
  "href": "./extract-cssdfn.mjs",
45
55
  "property": "css",
46
56
  "extractsPerSeries": true
47
57
  },
48
58
  {
59
+ "label": "Terms",
49
60
  "href": "./extract-dfns.mjs",
50
61
  "property": "dfns",
51
62
  "needsIdToHeadingMap": true
52
63
  },
53
64
  {
65
+ "label": "Elements",
54
66
  "href": "./extract-elements.mjs",
55
67
  "property": "elements"
56
68
  },
57
69
  {
70
+ "label": "Headings",
58
71
  "href": "./extract-headings.mjs",
59
72
  "property": "headings"
60
73
  },
61
74
  {
75
+ "label": "IDs",
62
76
  "href": "./extract-ids.mjs",
63
77
  "property": "ids",
64
78
  "needsIdToHeadingMap": true
65
79
  },
66
80
  {
81
+ "label": "CDDL",
67
82
  "href": "./extract-cddl.mjs",
68
83
  "property": "cddl"
69
84
  }
@@ -0,0 +1,361 @@
1
+ /**
2
+ * Helper function to generate a short report of a crawl in GitHub Markdown
3
+ * for a spec that features a summary of the crawl result, and details in
4
+ * expandable details sections about elements worthy of interest (such as CSS
5
+ * properties, exported definitions, Web IDL interfaces, etc.).
6
+ *
7
+ * The markdown does not contain titles on purpose so that it can be embedded
8
+ * as is in a larger Markdown context (e.g., a GitHub issue that looks at a
9
+ * spec from various perspectives).
10
+ */
11
+
12
+ import reffyModules from '../browserlib/reffy.json' with { type: 'json' };
13
+ import idlparsed from '../postprocessing/idlparsed.js';
14
+
15
+
16
+ /**
17
+ * For each module, we need to know how to detect whether Reffy actually
18
+ * extracted something from the spec, how to summarize the results when it
19
+ * did, and whether/how to highlight specific details.
20
+ *
21
+ * TODO: reffy.json, browserlib code, and schemas could be refactored to bind
22
+ * all the logic linked to a module together: how to extract, whether something
23
+ * was extracted, how to summarize, etc. (but note the extraction logic actually
24
+ * runs in a browser page, while the rest runs in a Node.js context and that,
25
+ * for IDL, interesting info is returned by the idlparsed post-processing
26
+ * module)
27
+ */
28
+ const moduleFunctions = {
29
+ algorithms: {
30
+ isPresent: isArrayPresent,
31
+ summary: arrayInfo
32
+ },
33
+ cddl: {
34
+ isPresent: isArrayPresent,
35
+ summary: value => 'found'
36
+ },
37
+ css: {
38
+ isPresent: value => ['properties', 'atrules', 'selectors', 'values']
39
+ .find(prop => isArrayPresent(value?.[prop])),
40
+ summary: value => ['properties', 'atrules', 'selectors', 'values']
41
+ .map(prop => value[prop]?.length > 0 ?
42
+ value[prop].length + ' ' + getCSSLabel(prop, value[prop].length) :
43
+ null)
44
+ .filter(found => found)
45
+ .join(', '),
46
+ details: value => ['properties', 'atrules', 'selectors']
47
+ .map(prop => {
48
+ if (!isArrayPresent(value[prop])) {
49
+ return null;
50
+ }
51
+ const types = [
52
+ 'css-at-rule',
53
+ 'css-descriptor',
54
+ 'css-function',
55
+ 'css-property',
56
+ 'css-selector',
57
+ 'css-type',
58
+ 'css-value'
59
+ ].join(',')
60
+ const details = value[prop]
61
+ .map(val => '- ' + wrapTerm(val.name, 'css type', val.href) +
62
+ ` ([xref search](https://respec.org/xref/?term=${encodeURIComponent(val.name)}&types=${encodeURIComponent(types)}))`
63
+ );
64
+ if (details.length === 0) {
65
+ return null;
66
+ }
67
+ const report = ['<details>'];
68
+ report.push(`<summary>${details.length} CSS ${getCSSLabel(prop, details.length)}</summary>`);
69
+ report.push('');
70
+ report.push(...details);
71
+ report.push('</details>');
72
+ return report.join('\n');
73
+ })
74
+ .filter(details => !!details)
75
+ .join('\n')
76
+ },
77
+ dfns: {
78
+ // For dfns, note we make a distinction between terms that are exported by
79
+ // default (such as CSS and Web IDL terms) and terms that editors choose to
80
+ // export explicitly. The former get reported in other details, the latter
81
+ // are the ones most likely to cause duplication issues.
82
+ isPresent: isArrayPresent,
83
+ summary: value => [
84
+ {
85
+ access: 'explicitly exported',
86
+ dfns: value
87
+ .filter(dfn => dfn.access === 'public')
88
+ .filter(dfn => dfn.type === 'dfn' || dfn.type === 'cddl')
89
+ },
90
+ {
91
+ access: 'exported by default',
92
+ dfns: value
93
+ .filter(dfn => dfn.access === 'public')
94
+ .filter(dfn => dfn.type !== 'dfn' && dfn.type !== 'cddl')
95
+ },
96
+ {
97
+ access: 'private',
98
+ dfns: value
99
+ .filter(dfn => dfn.access !== 'public')
100
+ }
101
+ ]
102
+ .map(t => t.dfns.length > 0 ? t.dfns.length + ' ' + t.access : null)
103
+ .filter(found => found)
104
+ .join(', '),
105
+ details: value => {
106
+ const details = value
107
+ .filter(dfn => dfn.access === 'public')
108
+ .filter(dfn => dfn.type === 'dfn' || dfn.type === 'cddl')
109
+ .map(dfn => '- ' + wrapTerm(dfn.linkingText[0], dfn.type, dfn.href) +
110
+ (dfn.for?.length > 0 ? ' for ' + wrapTerm(dfn.for[0], dfn.type): '') +
111
+ `, type ${dfn.type}` +
112
+ ` ([xref search](https://respec.org/xref/?term=${encodeURIComponent(dfn.linkingText[0])}))`
113
+ );
114
+ if (details.length === 0) {
115
+ return null;
116
+ }
117
+ const s = details.length > 1 ? 's' : '';
118
+ const report = ['<details>'];
119
+ report.push(`<summary>${details.length} explicitly exported term${s}</summary>`);
120
+ report.push('');
121
+ report.push(...details);
122
+ report.push('</details>');
123
+ return report.join('\n');
124
+ }
125
+ },
126
+ events: {
127
+ isPresent: isArrayPresent,
128
+ summary: arrayInfo
129
+ },
130
+ headings: {
131
+ isPresent: isArrayPresent,
132
+ summary: arrayInfo
133
+ },
134
+ idl: {
135
+ // Note: For IDL, we're more interested in the info that gets produced by the
136
+ // idlparsed post-processing module (which gets run automatically if it
137
+ // did not run during crawl)
138
+ // For extended names, exclude names that the spec itself defines
139
+ // (they will be reported as names defined by the spec already)
140
+ isPresent: value => (typeof value === 'string') && value.length > 0,
141
+ summary: (value, spec) => {
142
+ const parsedIdl = spec.idlparsed;
143
+ if (typeof parsedIdl === 'string') {
144
+ return 'invalid Web IDL found';
145
+ }
146
+ const res = [];
147
+ const idlNames = Object.keys(parsedIdl.idlNames)
148
+ .concat(Object.keys(parsedIdl.idlExtendedNames)
149
+ .filter(name => !parsedIdl.idlNames[name]));
150
+ if (idlNames.length > 0) {
151
+ const s = idlNames.length > 1 ? 's' : '';
152
+ res.push(`${idlNames.length} name${s} (or partial${s})`);
153
+ }
154
+ const globals = Object.keys(parsedIdl.globals);
155
+ if (globals.length > 0) {
156
+ const s = globals.length > 1 ? 's' : '';
157
+ res.push(`${globals.length} global${s}`);
158
+ }
159
+ return res.join(', ');
160
+ },
161
+ details: (value, spec) => {
162
+ const parsedIdl = spec.idlparsed;
163
+ if (typeof parsedIdl === 'string') {
164
+ return null;
165
+ }
166
+
167
+ const report = [];
168
+
169
+ const idlNames = Object.keys(parsedIdl.idlNames);
170
+ if (idlNames.length > 0) {
171
+ const s = idlNames.length > 1 ? 's' : '';
172
+ report.push('<details>');
173
+ report.push(`<summary>${idlNames.length} Web IDL name${s}</summary>`);
174
+ report.push('');
175
+ for (const name of idlNames) {
176
+ const type = parsedIdl.idlNames[name].type;
177
+ report.push('- ' + type + ' ' +
178
+ wrapTerm(name, type, parsedIdl.idlNames[name].href) +
179
+ ` ([xref search](https://respec.org/xref/?term=${encodeURIComponent(name)}&types=_IDL_))`);
180
+ }
181
+ report.push('</details>');
182
+ }
183
+
184
+ const idlExtendedNames = Object.keys(parsedIdl.idlExtendedNames)
185
+ .filter(name => !parsedIdl.idlNames[name]);
186
+ if (idlExtendedNames.length > 0) {
187
+ const s = idlExtendedNames.length > 1 ? 's' : '';
188
+ report.push('<details>');
189
+ report.push(`<summary>${idlExtendedNames.length} extended Web IDL name${s}</summary>`);
190
+ report.push('');
191
+ for (const name of idlExtendedNames) {
192
+ const type = parsedIdl.idlExtendedNames[name][0].type;
193
+ report.push('- ' + type + ' ' +
194
+ wrapTerm(name, type, parsedIdl.idlExtendedNames[name][0].href) +
195
+ ` ([xref search](https://respec.org/xref/?term=${encodeURIComponent(name)}&types=_IDL_))`);
196
+ }
197
+ report.push('</details>');
198
+ }
199
+
200
+ const globals = Object.keys(parsedIdl.globals);
201
+ if (globals.length > 0) {
202
+ const s = globals.length > 1 ? 's' : '';
203
+ report.push('<details>');
204
+ report.push(`<summary>${globals.length} Web IDL global${s}</summary>`);
205
+ report.push('');
206
+ for (const glob of globals) {
207
+ report.push(`- \`${glob}\``);
208
+ }
209
+ report.push('</details>');
210
+ }
211
+
212
+ return report.join('\n');
213
+ }
214
+ },
215
+ ids: {
216
+ isPresent: isArrayPresent,
217
+ summary: arrayInfo
218
+ },
219
+ links: {
220
+ isPresent: value =>
221
+ isArrayPresent(Object.keys(value?.rawlinks ?? {})) ||
222
+ isArrayPresent(Object.keys(value?.autolinks ?? {})),
223
+ summary: value => ['rawlinks', 'autolinks']
224
+ .map(prop => Object.keys(value[prop]).length > 0 ?
225
+ Object.keys(value[prop]).length + ' ' + prop :
226
+ null)
227
+ .filter(found => found)
228
+ .join(', ')
229
+ },
230
+ refs: {
231
+ isPresent: value =>
232
+ isArrayPresent(value?.normative) ||
233
+ isArrayPresent(value?.informative),
234
+ summary: value => ['normative', 'informative']
235
+ .map(prop => value[prop].length > 0 ?
236
+ value[prop].length + ' ' + prop :
237
+ null)
238
+ .filter(found => found)
239
+ .join(', ')
240
+ }
241
+ };
242
+
243
+
244
+ /**
245
+ * Return true if the given value is an array that contains at least one item.
246
+ */
247
+ function isArrayPresent(value) {
248
+ return Array.isArray(value) && value.length > 0;
249
+ }
250
+
251
+
252
+ /**
253
+ * Return the number of items found in the array
254
+ */
255
+ function arrayInfo(value) {
256
+ return value.length + ' found';
257
+ }
258
+
259
+ function wrapTerm(term, type, href) {
260
+ if (type === 'abstract-op' || type === 'dfn') {
261
+ if (href) {
262
+ return `[${term}](${href})`;
263
+ }
264
+ else {
265
+ return `"${term}"`;
266
+ }
267
+ }
268
+ const res = '`' + term + '`';
269
+ if (href) {
270
+ return `[${res}](${href})`;
271
+ }
272
+ else {
273
+ return res;
274
+ }
275
+ }
276
+
277
+ function getCSSLabel(prop, nb) {
278
+ switch (prop) {
279
+ case 'atrules':
280
+ return nb > 1 ? 'at-rules' : 'at-rule';
281
+ case 'properties':
282
+ return nb > 1 ? 'properties' : 'property';
283
+ case 'selectors':
284
+ return nb > 1 ? 'selectors' : 'selector';
285
+ case 'values':
286
+ return nb > 1 ? 'values': 'value';
287
+ }
288
+ }
289
+
290
+
291
+ /**
292
+ * Return a Markdown string that summarizes the given spec crawl results
293
+ */
294
+ export async function generateSpecReport(specResult) {
295
+ // Start report with a summary on spec metadata, adding URLs as needed
296
+ const summary = [];
297
+ for (const mod of reffyModules) {
298
+ if (!mod.metadata) {
299
+ continue;
300
+ }
301
+ if (specResult[mod.property]) {
302
+ summary.push(`- ${mod.label}: ${specResult[mod.property]}`);
303
+ }
304
+ }
305
+ summary.push(`- Canonical URL: [${specResult.url}](${specResult.url})`);
306
+ if (specResult.crawled && specResult.crawled !== specResult.url) {
307
+ summary.push(`- Crawled URL: [${specResult.crawled}](${specResult.crawled})`);
308
+ }
309
+
310
+ // If the spec defines IDL, run the idlparsed post-processing module
311
+ if (specResult.idl && !specResult.idlparsed) {
312
+ await idlparsed.run(specResult);
313
+ }
314
+
315
+ // Add summary of extracts found and not found
316
+ const extractModules = reffyModules
317
+ .filter(mod => !mod.metadata && moduleFunctions[mod.property])
318
+ .map(mod => Object.assign(mod, moduleFunctions[mod.property]));
319
+ const extractsSummary = [];
320
+ const missingSummary = [];
321
+ for (const mod of extractModules) {
322
+ const value = specResult[mod.property];
323
+ if (mod.isPresent(value)) {
324
+ extractsSummary.push(` - ${mod.label}: ${mod.summary(value, specResult)}`);
325
+ }
326
+ else {
327
+ missingSummary.push(mod.label);
328
+ }
329
+ }
330
+ if (extractsSummary.length > 0) {
331
+ extractsSummary.sort();
332
+ summary.push(`- Spec defines:`);
333
+ summary.push(...extractsSummary);
334
+ }
335
+ if (missingSummary.length > 0) {
336
+ missingSummary.sort();
337
+ summary.push(`- No ${missingSummary.join(', ')} definitions found`);
338
+ }
339
+
340
+ // End of summary, look at possible details of interest
341
+ const details = [];
342
+ for (const mod of extractModules) {
343
+ const value = specResult[mod.property];
344
+ if (!mod.details || !mod.isPresent(value)) {
345
+ continue;
346
+ }
347
+ const modDetails = mod.details(value, specResult);
348
+ if (modDetails) {
349
+ details.push(modDetails);
350
+ }
351
+ }
352
+
353
+ const report = [];
354
+ report.push('Crawl summary:');
355
+ report.push(...summary);
356
+ if (details.length > 0) {
357
+ report.push('');
358
+ report.push(...details);
359
+ }
360
+ return report.join('\n');
361
+ }
@@ -50,7 +50,7 @@
50
50
  import fs from 'node:fs';
51
51
  import path from 'node:path';
52
52
  import { pathToFileURL } from 'node:url';
53
- import { createFolderIfNeeded } from './util.js';
53
+ import { createFolderIfNeeded, shouldSaveToFile } from './util.js';
54
54
  import csscomplete from '../postprocessing/csscomplete.js';
55
55
  import events from '../postprocessing/events.js';
56
56
  import idlnames from '../postprocessing/idlnames.js';
@@ -220,7 +220,7 @@ async function save(mod, processResult, options) {
220
220
  }
221
221
  }
222
222
 
223
- if (!options.output) {
223
+ if (!shouldSaveToFile(options)) {
224
224
  // Nothing to do if no output folder was given
225
225
  return;
226
226
  }
@@ -16,6 +16,7 @@ import { inspect } from 'node:util';
16
16
  import specs from 'web-specs' with { type: 'json' };
17
17
  import postProcessor from './post-processor.js';
18
18
  import ThrottledQueue from './throttled-queue.js';
19
+ import { generateSpecReport } from './markdown-report.js';
19
20
  import {
20
21
  completeWithAlternativeUrls,
21
22
  expandBrowserModules,
@@ -26,7 +27,8 @@ import {
26
27
  setupBrowser,
27
28
  teardownBrowser,
28
29
  createFolderIfNeeded,
29
- loadJSON
30
+ loadJSON,
31
+ shouldSaveToFile
30
32
  } from './util.js';
31
33
 
32
34
  import packageConfig from '../../package.json' with { type: 'json' };
@@ -187,7 +189,7 @@ async function crawlSpec(spec, crawlOptions) {
187
189
  */
188
190
  async function saveSpecResults(spec, settings) {
189
191
  settings = settings || {};
190
- if (!settings.output) {
192
+ if (!shouldSaveToFile(settings)) {
191
193
  return spec;
192
194
  }
193
195
 
@@ -336,16 +338,78 @@ async function saveSpecResults(spec, settings) {
336
338
 
337
339
 
338
340
  /**
339
- * Main method that crawls the list of specification URLs and return a structure
340
- * that full describes its title, URLs, references, and IDL definitions.
341
+ * Helper function that takes a list of specs as inputs and expands them to an
342
+ * object suitable for crawling, with as much information as possible.
341
343
  *
342
344
  * @function
343
- * @param {Array(String)} speclist List of URLs to parse
345
+ * @param {Array(String|Object)} list A list of "specs", where each spec can be
346
+ * a string that represents a spec's shortname, series shortname or URL, or an
347
+ * object that already contains appropriate information.
348
+ * @return {Array(Object)} An array of spec objects. Note: When a spec was
349
+ * already described through an object, the function returns the object as-is
350
+ * and makes no attempt at validating it.
351
+ */
352
+ function prepareListOfSpecs(list) {
353
+ return list.map(spec => {
354
+ if (typeof spec !== 'string') {
355
+ return spec;
356
+ }
357
+ let match = specs.find(s => s.url === spec || s.shortname === spec);
358
+ if (!match) {
359
+ match = specs.find(s => s.series &&
360
+ s.series.shortname === spec &&
361
+ s.series.currentSpecification === s.shortname);
362
+ }
363
+ if (match) {
364
+ return match;
365
+ }
366
+
367
+ let url = null;
368
+ try {
369
+ url = (new URL(spec)).href;
370
+ }
371
+ catch {
372
+ if (spec.endsWith('.html')) {
373
+ url = (new URL(spec, `file://${process.cwd()}/`)).href;
374
+ }
375
+ else {
376
+ const msg = `Spec ID "${spec}" can neither be interpreted as a URL, a valid shortname or a relative path to an HTML file`;
377
+ throw new Error(msg);
378
+ }
379
+ }
380
+ return {
381
+ url,
382
+ nightly: { url },
383
+ shortname: spec.replace(/[:\/\\\.]/g, ''),
384
+ series: {
385
+ shortname: spec.replace(/[:\/\\\.]/g, ''),
386
+ }
387
+ };
388
+ });
389
+ }
390
+
391
+
392
+ /**
393
+ * Crawl the provided list of specifications and return an array with the crawl
394
+ * results.
395
+ *
396
+ * Crawl options may be specified as a second parameter. The function ignores
397
+ * options that affect the output such as `output`, `markdown` or `terse`. The
398
+ * function also does not run post-processing modules that apply at the "crawl"
399
+ * level.
400
+ *
401
+ * @function
402
+ * @param {Array(String|Object)} speclist List of specs to crawl, where each
403
+ * spec can be a string that represents a spec's shortname, series shortname or
404
+ * URL, or an object that already contains appropriate information.
344
405
  * @param {Object} crawlOptions Crawl options
345
- * @return {Promise<Array(Object)} The promise to get an array of complete
346
- * specification descriptions
406
+ * @return {Promise<Array(Object)} The promise to get an array with crawl
407
+ * results.
347
408
  */
348
409
  async function crawlList(speclist, crawlOptions) {
410
+ // Expand the list of specs to spec objects suitable for crawling
411
+ speclist = prepareListOfSpecs(speclist);
412
+
349
413
  // Make a shallow copy of crawl options object since we're going
350
414
  // to modify properties in place
351
415
  crawlOptions = Object.assign({speclist}, crawlOptions);
@@ -419,6 +483,25 @@ async function crawlList(speclist, crawlOptions) {
419
483
  await teardownBrowser();
420
484
  }
421
485
 
486
+ // Merge extracts per series when necessary (CSS/IDL extracts)
487
+ for (const mod of crawlOptions.modules) {
488
+ if (mod.extractsPerSeries) {
489
+ await adjustExtractsPerSeries(results, mod.property, crawlOptions);
490
+ }
491
+ }
492
+ for (const mod of crawlOptions.post ?? []) {
493
+ if (postProcessor.extractsPerSeries(mod)) {
494
+ await adjustExtractsPerSeries(results, mod.property, crawlOptions);
495
+ }
496
+ }
497
+
498
+ // Attach a crawl summary in Markdown if so requested
499
+ if (crawlOptions.markdown || crawlOptions.summary) {
500
+ for (const res of results) {
501
+ res.crawlSummary = await generateSpecReport(res);
502
+ }
503
+ }
504
+
422
505
  return results;
423
506
  }
424
507
 
@@ -434,7 +517,7 @@ async function crawlList(speclist, crawlOptions) {
434
517
  * @return {Promise(Array)} The promise to get an updated crawl results array
435
518
  */
436
519
  async function adjustExtractsPerSeries(data, property, settings) {
437
- if (!settings.output) {
520
+ if (!shouldSaveToFile(settings)) {
438
521
  return data;
439
522
  }
440
523
 
@@ -486,7 +569,7 @@ async function adjustExtractsPerSeries(data, property, settings) {
486
569
  * @return {Promise<void>} The promise to have saved the data
487
570
  */
488
571
  async function saveResults(contents, settings) {
489
- if (!settings.output) {
572
+ if (!shouldSaveToFile(settings)) {
490
573
  return;
491
574
  }
492
575
  const indexFilename = path.join(settings.output, 'index.json');
@@ -495,62 +578,38 @@ async function saveResults(contents, settings) {
495
578
 
496
579
 
497
580
  /**
498
- * Crawls the specifications listed in the given JSON file and generates a
499
- * crawl report in the given folder.
581
+ * Run a crawl given a set of options.
582
+ *
583
+ * The set of options matches those defined in the CLI. The function crawls all
584
+ * specs by default in particular.
585
+ *
586
+ * If the `output` option is not set, the function outputs a JSON dump of the
587
+ * crawl results to the console (or a report in Markdown if the `markdown`
588
+ * option is set) and does not return anything to the caller.
589
+ *
590
+ * If the `output` option is set to the magic value `{return}`, the function
591
+ * outputs nothing but returns an object that represents the crawl results,
592
+ * with the actual results per spec stored in a `results` property.
593
+ *
594
+ * If the `output` option is set to any other value, the function interprets it
595
+ * as a folder, creates subfolders and files with crawl results in that folder,
596
+ * with a root `index.json` entry point, and does not return anything.
500
597
  *
501
598
  * @function
502
- * @param {Object} options Crawl options. Possible options are:
599
+ * @param {Object} options Crawl options. Possible options include:
503
600
  * publishedVersion, debug, output, terse, modules and specs.
504
601
  * See CLI help (node reffy.js --help) for details.
505
- * @return {Promise<void>} The promise that the crawl will have been made
602
+ * @return {Promise<void|Object>} The promise that the crawl will have been
603
+ * made along with the index of crawl results if the `output` option was set
604
+ * to the specific value `{return}`.
506
605
  */
507
606
  async function crawlSpecs(options) {
508
- function prepareListOfSpecs(list) {
509
- return list.map(spec => {
510
- if (typeof spec !== 'string') {
511
- return spec;
512
- }
513
- let match = specs.find(s => s.url === spec || s.shortname === spec);
514
- if (!match) {
515
- match = specs.find(s => s.series &&
516
- s.series.shortname === spec &&
517
- s.series.currentSpecification === s.shortname);
518
- }
519
- if (match) {
520
- return match;
521
- }
522
-
523
- let url = null;
524
- try {
525
- url = (new URL(spec)).href;
526
- }
527
- catch {
528
- if (spec.endsWith('.html')) {
529
- url = (new URL(spec, `file://${process.cwd()}/`)).href;
530
- }
531
- else {
532
- const msg = `Spec ID "${spec}" can neither be interpreted as a URL, a valid shortname or a relative path to an HTML file`;
533
- throw new Error(msg);
534
- }
535
- }
536
- return {
537
- url,
538
- nightly: { url },
539
- shortname: spec.replace(/[:\/\\\.]/g, ''),
540
- series: {
541
- shortname: spec.replace(/[:\/\\\.]/g, ''),
542
- }
543
- };
544
- });
545
- }
546
-
547
607
  const crawlIndex = options?.useCrawl ?
548
608
  await loadJSON(path.join(options.useCrawl, 'index.json')) :
549
609
  null;
550
-
551
- const requestedList = crawlIndex ? crawlIndex.results :
552
- options?.specs ? prepareListOfSpecs(options.specs) :
553
- specs;
610
+ const requestedList = crawlIndex ?
611
+ crawlIndex.results :
612
+ (options?.specs ?? specs);
554
613
 
555
614
  // Make a shallow copy of passed options parameter and expand modules
556
615
  // in place.
@@ -558,20 +617,6 @@ async function crawlSpecs(options) {
558
617
  options.modules = expandBrowserModules(options.modules);
559
618
 
560
619
  return crawlList(requestedList, options)
561
- .then(async results => {
562
- // Merge extracts per series when necessary (CSS/IDL extracts)
563
- for (const mod of options.modules) {
564
- if (mod.extractsPerSeries) {
565
- await adjustExtractsPerSeries(results, mod.property, options);
566
- }
567
- }
568
- for (const mod of options.post ?? []) {
569
- if (postProcessor.extractsPerSeries(mod)) {
570
- await adjustExtractsPerSeries(results, mod.property, options);
571
- }
572
- }
573
- return results;
574
- })
575
620
  .then(async results => {
576
621
  // Create and return a crawl index out of the results, to allow
577
622
  // post-processing modules to run.
@@ -605,10 +650,19 @@ async function crawlSpecs(options) {
605
650
  console.log(typeof results === 'string' ?
606
651
  results : JSON.stringify(results, null, 2));
607
652
  }
653
+ else if (options.markdown) {
654
+ console.log('# Crawl results');
655
+ console.log();
656
+ for (const res of results) {
657
+ console.log(`## ${res.title}`);
658
+ console.log(res.crawlSummary);
659
+ console.log();
660
+ }
661
+ }
608
662
  else if (!options.output) {
609
663
  console.log(JSON.stringify(results, null, 2));
610
664
  }
611
- else {
665
+ else if (shouldSaveToFile(options)) {
612
666
  await saveResults(index, options);
613
667
  }
614
668
  return index;
@@ -619,7 +673,7 @@ async function crawlSpecs(options) {
619
673
  if (!postProcessor.appliesAtLevel(mod, 'crawl')) {
620
674
  continue;
621
675
  }
622
- const crawlResults = options.output ?
676
+ const crawlResults = shouldSaveToFile(options) ?
623
677
  await expandCrawlResult(
624
678
  crawlIndex, options.output, postProcessor.dependsOn(mod)) :
625
679
  crawlIndex;
@@ -630,25 +684,66 @@ async function crawlSpecs(options) {
630
684
  console.log();
631
685
  console.log(JSON.stringify(result, null, 2));
632
686
  }
687
+ else if (!shouldSaveToFile(options)) {
688
+ // Attach the post-processing result to the index of the
689
+ // crawl results.
690
+ crawlIndex.post = crawlIndex.post ?? [];
691
+ crawlIndex.post.push({
692
+ mod: postProcessor.getProperty(mod),
693
+ result
694
+ });
695
+ }
633
696
  }
697
+
698
+ // Function does not return anything if it already reported the
699
+ // results to the console or files. It returns the index of the
700
+ // crawl results otherwise.
701
+ if (!options.output || shouldSaveToFile(options)) {
702
+ return;
703
+ }
704
+ return crawlIndex;
634
705
  });
635
706
  }
636
707
 
637
708
 
638
- /**************************************************
639
- Export methods for use as module
640
- **************************************************/
641
- // TODO: consider more alignment between the two crawl functions or
642
- // find more explicit names to distinguish between them:
643
- // - "crawlList" takes an explicit list of specs as input, does not run the
644
- // post-processor, and returns the results without saving them to files.
645
- // - "crawlSpecs" takes options as input, runs all steps and saves results
646
- // to files (or outputs the results to the console). It does not return
647
- // anything.
709
+ /**
710
+ * Crawl a set of specs according to the given set of crawl options.
711
+ *
712
+ * The function behaves differently depending on the parameters it receives.
713
+ *
714
+ * If it receives no parameter, the function behaves as it were called with a
715
+ * single empty object as parameter.
716
+ *
717
+ * If it receives a single object as parameter, this object sets crawl options
718
+ * (essentially matching CLI options). What the function outputs or returns
719
+ * depends on the `output` option. If `output` is not set, the function outputs
720
+ * a JSON dump of the index of the crawl results to the console and returns
721
+ * nothing to the caller. If `output` is set to the "magic" value `{return}`,
722
+ * the function does not output anything but returns the index of the crawl
723
+ * results which a caller may then process in any way they wish. If `output` is
724
+ * set to any other value, it defines a folder, the function saves crawl
725
+ * results as folders and files in that folder and returns nothing.
726
+ *
727
+ * If it receives an array as first parameter, the array defines the set of
728
+ * specs that are to be crawled (each spec may be a string representing the
729
+ * spec's shortname, series shortname, or URL; or a spec object). The second
730
+ * parameter, if present, defines additional crawl options (same as above,
731
+ * except the `specs` option should not be set). The function returns an
732
+ * array of crawl results to the caller.
733
+ *
734
+ * Note the function does not apply post-processing modules that run at the
735
+ * "crawl" level when it receives an array as first parameter. It will also
736
+ * ignore crawl options that control the output such as `output`, `markdown`
737
+ * and `terse`.
738
+ */
648
739
  function crawl(...args) {
649
740
  return Array.isArray(args[0]) ?
650
741
  crawlList.apply(this, args) :
651
742
  crawlSpecs.apply(this, args);
652
743
  }
653
744
 
745
+
746
+ /**************************************************
747
+ Export crawl method for use as module
748
+ **************************************************/
654
749
  export { crawl as crawlSpecs };
package/src/lib/util.js CHANGED
@@ -1137,6 +1137,21 @@ async function getSchemaValidationFunction(schemaName) {
1137
1137
  };
1138
1138
  }
1139
1139
 
1140
+
1141
+ /**
1142
+ * Return true if the crawler should save results to files given the crawl
1143
+ * options.
1144
+ *
1145
+ * @function
1146
+ * @param {Object} crawlOptions Crawl options (optional)
1147
+ * @return {Boolean} true when the crawler should save the results to files,
1148
+ * false otherwise.
1149
+ */
1150
+ function shouldSaveToFile(crawlOptions) {
1151
+ return crawlOptions?.output && crawlOptions.output !== '{return}';
1152
+ }
1153
+
1154
+
1140
1155
  export {
1141
1156
  fetch,
1142
1157
  expandBrowserModules,
@@ -1151,5 +1166,6 @@ export {
1151
1166
  createFolderIfNeeded,
1152
1167
  getInterfaceTreeInfo,
1153
1168
  getSchemaValidationFunction,
1154
- loadJSON
1169
+ loadJSON,
1170
+ shouldSaveToFile
1155
1171
  };
@@ -14,7 +14,8 @@ import {
14
14
  getExpectedDfnFromIdlDesc } from '../cli/check-missing-dfns.js';
15
15
  import {
16
16
  isLatestLevelThatPasses,
17
- createFolderIfNeeded } from '../lib/util.js';
17
+ createFolderIfNeeded,
18
+ shouldSaveToFile } from '../lib/util.js';
18
19
 
19
20
 
20
21
  /**
@@ -379,7 +380,7 @@ async function generateIdlNames(crawl, options) {
379
380
  * @param {Object} options Crawl options ("output" will be used)
380
381
  */
381
382
  async function saveIdlNames(names, options) {
382
- if (!options?.output) {
383
+ if (!shouldSaveToFile(options)) {
383
384
  return;
384
385
  }
385
386