reffy 20.0.13 → 20.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +151 -151
  3. package/index.js +29 -29
  4. package/package.json +5 -5
  5. package/reffy.js +324 -324
  6. package/schemas/browserlib/extract-algorithms.json +52 -52
  7. package/schemas/browserlib/extract-cssdfn.json +108 -108
  8. package/schemas/browserlib/extract-dfns.json +90 -90
  9. package/schemas/browserlib/extract-elements.json +17 -17
  10. package/schemas/browserlib/extract-events.json +31 -31
  11. package/schemas/browserlib/extract-headings.json +19 -19
  12. package/schemas/browserlib/extract-ids.json +7 -7
  13. package/schemas/browserlib/extract-links.json +12 -12
  14. package/schemas/browserlib/extract-refs.json +12 -12
  15. package/schemas/common.json +876 -876
  16. package/schemas/files/extracts/algorithms.json +12 -12
  17. package/schemas/files/extracts/css.json +16 -16
  18. package/schemas/files/extracts/dfns.json +12 -12
  19. package/schemas/files/extracts/elements.json +12 -12
  20. package/schemas/files/extracts/events.json +12 -12
  21. package/schemas/files/extracts/headings.json +12 -12
  22. package/schemas/files/extracts/ids.json +12 -12
  23. package/schemas/files/extracts/links.json +12 -12
  24. package/schemas/files/extracts/refs.json +12 -12
  25. package/schemas/files/index.json +59 -59
  26. package/schemas/postprocessing/events.json +50 -50
  27. package/schemas/postprocessing/idlnames-parsed.json +27 -27
  28. package/schemas/postprocessing/idlnames.json +17 -17
  29. package/schemas/postprocessing/idlparsed.json +67 -67
  30. package/src/browserlib/clone-and-clean.mjs +24 -24
  31. package/src/browserlib/create-outline.mjs +353 -353
  32. package/src/browserlib/extract-algorithms.mjs +723 -723
  33. package/src/browserlib/extract-cddl.mjs +125 -125
  34. package/src/browserlib/extract-dfns.mjs +1093 -1093
  35. package/src/browserlib/extract-headings.mjs +76 -76
  36. package/src/browserlib/extract-ids.mjs +28 -28
  37. package/src/browserlib/extract-links.mjs +45 -45
  38. package/src/browserlib/extract-references.mjs +308 -308
  39. package/src/browserlib/extract-webidl.mjs +89 -89
  40. package/src/browserlib/get-absolute-url.mjs +29 -29
  41. package/src/browserlib/get-code-elements.mjs +20 -20
  42. package/src/browserlib/get-generator.mjs +26 -26
  43. package/src/browserlib/get-lastmodified-date.mjs +13 -13
  44. package/src/browserlib/get-revision.mjs +12 -12
  45. package/src/browserlib/get-title.mjs +14 -14
  46. package/src/browserlib/informative-selector.mjs +24 -24
  47. package/src/browserlib/map-ids-to-headings.mjs +173 -173
  48. package/src/browserlib/reffy.json +85 -85
  49. package/src/browserlib/trim-spaces.mjs +35 -35
  50. package/src/cli/check-missing-dfns.js +587 -587
  51. package/src/cli/merge-crawl-results.js +132 -132
  52. package/src/cli/parse-webidl.js +447 -447
  53. package/src/lib/css-grammar-parse-tree.schema.json +109 -109
  54. package/src/lib/css-grammar-parser.js +440 -440
  55. package/src/lib/fetch.js +51 -51
  56. package/src/lib/markdown-report.js +360 -360
  57. package/src/lib/mock-server.js +218 -218
  58. package/src/lib/post-processor.js +322 -322
  59. package/src/lib/throttled-queue.js +129 -129
  60. package/src/postprocessing/annotate-links.js +41 -41
  61. package/src/postprocessing/csscomplete.js +48 -48
  62. package/src/postprocessing/idlnames.js +391 -391
  63. package/src/postprocessing/idlparsed.js +179 -179
  64. package/src/postprocessing/patch-dfns.js +51 -51
  65. package/src/specs/missing-css-rules.json +197 -197
  66. package/src/specs/spec-equivalents.json +149 -149
  67. package/src/browserlib/extract-editors.mjs~ +0 -14
  68. package/src/browserlib/extract-events.mjs~ +0 -3
  69. package/src/browserlib/generate-es-dfn-report.sh~ +0 -4
  70. package/src/browserlib/get-revision.mjs~ +0 -7
  71. package/src/cli/csstree-grammar-check.js +0 -28
  72. package/src/cli/csstree-grammar-check.js~ +0 -10
  73. package/src/cli/csstree-grammar-parser.js +0 -11
  74. package/src/cli/csstree-grammar-parser.js~ +0 -1
  75. package/src/cli/extract-editors.js~ +0 -38
  76. package/src/cli/process-specs.js~ +0 -28
  77. package/src/postprocessing/annotate-links.js~ +0 -8
  78. package/src/postprocessing/events.js~ +0 -245
package/reffy.js CHANGED
@@ -1,324 +1,324 @@
1
- #!/usr/bin/env node
2
- /**
3
- * The spec crawler takes a list of spec URLs as input, gathers some knowledge
4
- * about these specs (published versions, URL of the Editor's Draft, etc.),
5
- * fetches these specs, parses them, extracts relevant information that they
6
- * contain (such as the WebIDL they define, the list of specifications that they
7
- * reference, and links to external specs), and produces a crawl report with the
8
- * results of these investigations.
9
- *
10
- * Provided Reffy was installed as a global package, the spec crawler can be
11
- * called directly through:
12
- *
13
- * `reffy [options]`
14
- *
15
- * Use the `--help` option for usage instructions.
16
- *
17
- * If Reffy was not installed as a global package, call:
18
- *
19
- * `node reffy.js [options]`
20
- *
21
- * @module crawler
22
- */
23
-
24
- import { Command } from 'commander';
25
- import satisfies from 'semver/functions/satisfies.js';
26
- import specs from 'web-specs' with { type: 'json' };
27
- import packageConfig from './package.json' with { type: 'json' };
28
- import { crawlSpecs } from './src/lib/specs-crawler.js';
29
- import postProcessor from './src/lib/post-processor.js';
30
- import { loadJSON } from './src/lib/util.js';
31
-
32
- // Warn if version of Node.js does not satisfy requirements
33
- if (packageConfig.engines && packageConfig.engines.node &&
34
- !satisfies(process.version, packageConfig.engines.node)) {
35
- console.warn(`
36
- [WARNING] Node.js ${process.version} detected but Reffy needs Node.js ${packageConfig.engines.node}.
37
- Please consider upgrading Node.js if the program crashes!`);
38
- }
39
-
40
-
41
- function parseModuleOption(input) {
42
- const parts = input.split(':');
43
- if (parts.length > 2) {
44
- console.error('Module input cannot have more than one ":" character');
45
- process.exit(2);
46
- }
47
- if (parts.length === 2) {
48
- return {
49
- href: parts[1],
50
- property: parts[0]
51
- };
52
- }
53
- else {
54
- return parts[0];
55
- }
56
- }
57
-
58
- async function parseSpecOption(input) {
59
- if (input === 'all') {
60
- return specs
61
- .filter(s => s.standing !== 'discontinued')
62
- .map(s => s.shortname)
63
- }
64
- else {
65
- const list = await loadJSON(input);
66
- return list ?? input;
67
- }
68
- }
69
-
70
- function parsePostOption(input) {
71
- if (input === 'core') {
72
- return postProcessor.modules;
73
- }
74
- else {
75
- return input;
76
- }
77
- }
78
-
79
-
80
- const program = new Command();
81
- program
82
- .version(packageConfig.version)
83
- .usage('[options]')
84
- .description('Crawls and processes a list of Web specifications')
85
- .option('-d, --debug', 'debug mode, crawl one spec at a time')
86
- .option('-f, --fallback <json>', 'fallback data to use when a spec crawl fails')
87
- .option('--md, --markdown', 'output a Markdown report')
88
- .option('-m, --module <modules...>', 'spec processing modules')
89
- .option('-o, --output <folder>', 'existing folder/file where crawl results are to be saved')
90
- .option('-p, --post <modules...>', 'post-processing modules')
91
- .option('-q, --quiet', 'do not report progress and other warnings to the console')
92
- .option('-r, --release', 'crawl release (TR) version of specs')
93
- .option('-s, --spec <specs...>', 'specs to crawl')
94
- .option('--summary', 'include a crawl summary in Markdown for each spec')
95
- .option('-t, --terse', 'output crawl results without metadata')
96
- .option('-u, --use-crawl <folder>', 'use given crawl result folder as input for post-processing')
97
- .action(async options => {
98
- if (!(options.output || options.module || options.spec || options.useCrawl)) {
99
- console.error(`
100
- At least one of the --output, --module, --spec or --use-crawl options needs to be
101
- specified. For usage notes, run:
102
- reffy --help
103
-
104
- If you really want to crawl all specs, run all processing modules and report the
105
- JSON outcome to the console, you may run the following command but note that it
106
- will dump ~100MB of data to the console:
107
- reffy --spec all
108
- `);
109
- process.exit(2);
110
- }
111
- const crawlOptions = {
112
- debug: options.debug,
113
- fallback: options.fallback,
114
- markdown: options.markdown,
115
- output: options.output,
116
- publishedVersion: options.release,
117
- quiet: options.quiet,
118
- summary: options.summary,
119
- terse: options.terse,
120
- useCrawl: options.useCrawl
121
- };
122
- if (options.module) {
123
- crawlOptions.modules = options.module.map(parseModuleOption);
124
- }
125
- if (options.spec) {
126
- crawlOptions.specs = (await Promise.all(options.spec.map(parseSpecOption))).flat();
127
- }
128
- else {
129
- crawlOptions.specs = await parseSpecOption('all');
130
- }
131
- if (options.post) {
132
- crawlOptions.post = options.post.map(parsePostOption).flat();
133
- }
134
-
135
- if (crawlOptions.terse && crawlOptions.output) {
136
- console.error('The --terse option cannot be combined with the --output option');
137
- process.exit(2);
138
- }
139
- if (crawlOptions.terse && (!crawlOptions.modules || crawlOptions.modules.length === 0 || crawlOptions.modules.length > 1)) {
140
- console.error('The --terse option can only be set when only one core processing module runs');
141
- process.exit(2);
142
- }
143
- crawlSpecs(crawlOptions)
144
- .then(_ => {
145
- process.exit(0);
146
- })
147
- .catch(err => {
148
- console.error(err);
149
- process.exit(1);
150
- });
151
- })
152
- .showHelpAfterError('(run with --help for usage information)')
153
- .addHelpText('after', `
154
- Minimal usage example:
155
- To crawl all known specs, run all processing modules, and save generated
156
- extracts to the current folder, run:
157
- $ reffy -o .
158
-
159
- Description:
160
- Crawls a set of specifications and runs processing modules against each of
161
- them to generate extracts.
162
-
163
- Crawl results are written to the console as a serialized JSON array with one
164
- entry per spec by default. The order of the specs in the array matches the
165
- order of the specs provided as input (or the order of the specs in
166
- browser-specs if no explicit spec was provided).
167
-
168
- Resulting array may be large. Crawling all specs with core processing module
169
- produces ~100MB of serialized JSON for instance. To avoid janking the console
170
- or running into possible memory issues, setting the --output option is
171
- strongly recommended.
172
-
173
- Usage notes for some of the options:
174
- -f, --fallback <jsondata>
175
- Provides an existing JSON crawl data file to use as a source of fallback data
176
- for specs that fail to be crawled.
177
-
178
- The fallback data gets copied as-is. It is the responsibility of the caller
179
- to make sure that extracts it may link to actually exist and match the ones
180
- that the crawl would produce in the absence of errors (e.g. same modules).
181
-
182
- The "error" property is set on specs for which fallback data was used.
183
-
184
- --md, --markdown
185
- Output a crawl summary in Markdown instead of a JSON report. The option takes
186
- precedence over the \`--output\` option.
187
-
188
- -m, --module <modules...>
189
- If processing modules are not specified, the crawler runs all core processing
190
- modules defined in:
191
- https://github.com/w3c/reffy/blob/main/src/browserlib/reffy.json
192
-
193
- Modules must be specified using a relative path to an ".mjs" file that defines
194
- the processing logic to run on the spec's page in a browser context. For
195
- instance:
196
- $ reffy --output reports/test --module extract-editors.mjs
197
-
198
- Absolute paths to modules are not properly handled and will likely result in a
199
- crawling error.
200
-
201
- Multiple modules can be specified, repeating the option name or not:
202
- $ reffy -o reports/test -m extract-words.mjs extract-editors.mjs
203
- $ reffy -o reports/test -m extract-words.mjs -m extract-editors.mjs
204
-
205
- Core processing modules may be referenced using the name of the extract folder
206
- or property that they would create:
207
- $ reffy --output reports/test --module dfns
208
-
209
- To run all core processing modules, use "core". For instance, to apply a
210
- processing module on top of core processing modules, use:
211
- $ reffy --output reports/test --module core extract-editors.mjs
212
-
213
- Each module must export a function that takes a spec object as input and
214
- return a result that can be serialized as JSON. A typical module code looks
215
- like:
216
- https://github.com/w3c/reffy/blob/main/src/browserlib/extract-ids.mjs
217
-
218
- Individual extracts will be created under "<folder>/[camelCaseModule]" where
219
- "[camelCaseModule]" is derived from the module's filename. For instance:
220
- "extract-editors.mjs" creates extracts under "<folder>/extractEditors"
221
-
222
- The name of the folder where extracts get created may be specified for custom
223
- modules by prefixing the path to the module with the folder name followed by
224
- ":". For instance, to save extracts to "reports/test/editors", use:
225
- $ reffy --output reports/test --module editors:extract-editors.mjs
226
-
227
- -o, --output <folder>
228
- By default, crawl results are written to the console as a serialized JSON
229
- array with one entry per spec, and module processing results attached as
230
- property values in each of these entries.
231
-
232
- If an output <folder> is specified, crawl results are rather saved to that
233
- folder, with module processing results created under subfolders (see the
234
- --module option) and linked from an index.json file created under <folder>.
235
-
236
- Additionally, if an output <folder> is specified and if the IDL processing
237
- module is run, the crawler will also creates an index of IDL names named
238
- "idlnames.json" that links to relevant extracts in subfolders.
239
-
240
- The folder targeted by <folder> must exist.
241
-
242
- -p, --post <modules...>
243
- Post-processing modules either run after a spec is done crawling or after the
244
- entire crawl is over. They allow developers to complete data based on other
245
- extracts that were not available when extraction ran.
246
-
247
- To run all core post-processing modules, use "core". Core post-processing
248
- modules are defined in:
249
- https://github.com/w3c/reffy/blob/main/src/postprocessing.js
250
-
251
- The crawler does not run any post-processing modules by default.
252
-
253
- Custom post-processing modules may be specified using a relative path to a
254
- ".js" file that defines the post-processing logic. For instance:
255
- $ reffy --output reports/test --post mypostprocessing.js
256
-
257
- Each module must export a "run" function. See the post-processor's code for
258
- details:
259
- https://github.com/w3c/reffy/blob/main/src/lib/post-processor.js
260
-
261
- Absolute paths to modules are not properly handled and will likely result in a
262
- processing error.
263
-
264
- Multiple post-processing modules can be specified, repeating the option name
265
- or not:
266
- $ reffy -o reports/test -p cssdfns cssidl events
267
- $ reffy -o reports/test -p events -p idlparsed -p idlnames
268
-
269
- -r, --release
270
- The crawler defaults to crawling the nightly version of requested specs.
271
- Set this flag to tell the crawler to crawl the published version of the specs
272
- instead. When the flag is set, the crawler will ignore specs that do not have
273
- a published version.
274
-
275
- -s, --spec <specs...>
276
- If specs to crawl are not specified, all specs in browser-specs that are not
277
- identified as being discontinued get crawled:
278
- https://github.com/w3c/browser-specs/
279
-
280
- Valid spec values may be a shortname, a URL, or a relative path to a file that
281
- contains a list of spec URLs and/or shortnames. All shortnames must exist in
282
- browser-specs. Shortname may be the shortname of the spec series, in which
283
- case the spec identified as the current specification in the series is used.
284
- For instance, as of September 2021, "pointerlock" will map to "pointerlock-2"
285
- because Pointer Lock 2.0 is the current level in the series.
286
-
287
- Use "all" to include all specs in browser-specs in the crawl. For instance, to
288
- crawl all specs plus one custom spec that does not exist in browser-specs:
289
- $ reffy -o reports/test -s all https://example.org/myspec
290
-
291
- When "all" is used, to force a crawl on some of the discontinued specs too,
292
- include their shortname explicitly (or point to a JSON file that lists their
293
- shortnames). For instance, to also crawl the discontinued DOM Level 2 Style
294
- spec, run:
295
- $ reffy -o reports/test -s all DOM-Level-2-Style
296
-
297
- --summary
298
- Tells Reffy to attach a Markdown summary of the crawl per spec to the JSON
299
- report, in a \`crawlSummary\` property. The Markdown report is suitable for
300
- inclusion in a GitHub issue or similar. It starts with a summary, and then
301
- details a few noteworthy extracts (CSS, dfns, Web IDL) in expandable
302
- sections, with links to the online xref database search where appropriate.
303
-
304
- -t, --terse
305
- This flag cannot be combined with the --output option and cannot be set if
306
- more than one processing module gets run. When set, the crawler writes the
307
- processing module results to the console directly without wrapping them with
308
- spec metadata. In other words, the spec entry in the crawl results directly
309
- contains the outcome of the processing module when the flag is set.
310
-
311
- Additionally, if crawl runs on a single specification, the array is omitted
312
- and the processing module results are thus written to the console directly.
313
- For instance:
314
- $ reffy --spec fetch --module idl --terse
315
-
316
- -u, --use-crawl <folder>
317
- Tells Reffy to skip the crawl part and only run requested post-processing
318
- modules on the crawl results present in the specified folder.
319
-
320
- If post-processing modules are not specified, Reffy will merely copy the crawl
321
- results to the output folder (or to the console).
322
- `);
323
-
324
- program.parse(process.argv);
1
+ #!/usr/bin/env node
2
+ /**
3
+ * The spec crawler takes a list of spec URLs as input, gathers some knowledge
4
+ * about these specs (published versions, URL of the Editor's Draft, etc.),
5
+ * fetches these specs, parses them, extracts relevant information that they
6
+ * contain (such as the WebIDL they define, the list of specifications that they
7
+ * reference, and links to external specs), and produces a crawl report with the
8
+ * results of these investigations.
9
+ *
10
+ * Provided Reffy was installed as a global package, the spec crawler can be
11
+ * called directly through:
12
+ *
13
+ * `reffy [options]`
14
+ *
15
+ * Use the `--help` option for usage instructions.
16
+ *
17
+ * If Reffy was not installed as a global package, call:
18
+ *
19
+ * `node reffy.js [options]`
20
+ *
21
+ * @module crawler
22
+ */
23
+
24
+ import { Command } from 'commander';
25
+ import satisfies from 'semver/functions/satisfies.js';
26
+ import specs from 'web-specs' with { type: 'json' };
27
+ import packageConfig from './package.json' with { type: 'json' };
28
+ import { crawlSpecs } from './src/lib/specs-crawler.js';
29
+ import postProcessor from './src/lib/post-processor.js';
30
+ import { loadJSON } from './src/lib/util.js';
31
+
32
+ // Warn if version of Node.js does not satisfy requirements
33
+ if (packageConfig.engines && packageConfig.engines.node &&
34
+ !satisfies(process.version, packageConfig.engines.node)) {
35
+ console.warn(`
36
+ [WARNING] Node.js ${process.version} detected but Reffy needs Node.js ${packageConfig.engines.node}.
37
+ Please consider upgrading Node.js if the program crashes!`);
38
+ }
39
+
40
+
41
+ function parseModuleOption(input) {
42
+ const parts = input.split(':');
43
+ if (parts.length > 2) {
44
+ console.error('Module input cannot have more than one ":" character');
45
+ process.exit(2);
46
+ }
47
+ if (parts.length === 2) {
48
+ return {
49
+ href: parts[1],
50
+ property: parts[0]
51
+ };
52
+ }
53
+ else {
54
+ return parts[0];
55
+ }
56
+ }
57
+
58
+ async function parseSpecOption(input) {
59
+ if (input === 'all') {
60
+ return specs
61
+ .filter(s => s.standing !== 'discontinued')
62
+ .map(s => s.shortname)
63
+ }
64
+ else {
65
+ const list = await loadJSON(input);
66
+ return list ?? input;
67
+ }
68
+ }
69
+
70
+ function parsePostOption(input) {
71
+ if (input === 'core') {
72
+ return postProcessor.modules;
73
+ }
74
+ else {
75
+ return input;
76
+ }
77
+ }
78
+
79
+
80
+ const program = new Command();
81
+ program
82
+ .version(packageConfig.version)
83
+ .usage('[options]')
84
+ .description('Crawls and processes a list of Web specifications')
85
+ .option('-d, --debug', 'debug mode, crawl one spec at a time')
86
+ .option('-f, --fallback <json>', 'fallback data to use when a spec crawl fails')
87
+ .option('--md, --markdown', 'output a Markdown report')
88
+ .option('-m, --module <modules...>', 'spec processing modules')
89
+ .option('-o, --output <folder>', 'existing folder/file where crawl results are to be saved')
90
+ .option('-p, --post <modules...>', 'post-processing modules')
91
+ .option('-q, --quiet', 'do not report progress and other warnings to the console')
92
+ .option('-r, --release', 'crawl release (TR) version of specs')
93
+ .option('-s, --spec <specs...>', 'specs to crawl')
94
+ .option('--summary', 'include a crawl summary in Markdown for each spec')
95
+ .option('-t, --terse', 'output crawl results without metadata')
96
+ .option('-u, --use-crawl <folder>', 'use given crawl result folder as input for post-processing')
97
+ .action(async options => {
98
+ if (!(options.output || options.module || options.spec || options.useCrawl)) {
99
+ console.error(`
100
+ At least one of the --output, --module, --spec or --use-crawl options needs to be
101
+ specified. For usage notes, run:
102
+ reffy --help
103
+
104
+ If you really want to crawl all specs, run all processing modules and report the
105
+ JSON outcome to the console, you may run the following command but note that it
106
+ will dump ~100MB of data to the console:
107
+ reffy --spec all
108
+ `);
109
+ process.exit(2);
110
+ }
111
+ const crawlOptions = {
112
+ debug: options.debug,
113
+ fallback: options.fallback,
114
+ markdown: options.markdown,
115
+ output: options.output,
116
+ publishedVersion: options.release,
117
+ quiet: options.quiet,
118
+ summary: options.summary,
119
+ terse: options.terse,
120
+ useCrawl: options.useCrawl
121
+ };
122
+ if (options.module) {
123
+ crawlOptions.modules = options.module.map(parseModuleOption);
124
+ }
125
+ if (options.spec) {
126
+ crawlOptions.specs = (await Promise.all(options.spec.map(parseSpecOption))).flat();
127
+ }
128
+ else {
129
+ crawlOptions.specs = await parseSpecOption('all');
130
+ }
131
+ if (options.post) {
132
+ crawlOptions.post = options.post.map(parsePostOption).flat();
133
+ }
134
+
135
+ if (crawlOptions.terse && crawlOptions.output) {
136
+ console.error('The --terse option cannot be combined with the --output option');
137
+ process.exit(2);
138
+ }
139
+ if (crawlOptions.terse && (!crawlOptions.modules || crawlOptions.modules.length === 0 || crawlOptions.modules.length > 1)) {
140
+ console.error('The --terse option can only be set when only one core processing module runs');
141
+ process.exit(2);
142
+ }
143
+ crawlSpecs(crawlOptions)
144
+ .then(_ => {
145
+ process.exit(0);
146
+ })
147
+ .catch(err => {
148
+ console.error(err);
149
+ process.exit(1);
150
+ });
151
+ })
152
+ .showHelpAfterError('(run with --help for usage information)')
153
+ .addHelpText('after', `
154
+ Minimal usage example:
155
+ To crawl all known specs, run all processing modules, and save generated
156
+ extracts to the current folder, run:
157
+ $ reffy -o .
158
+
159
+ Description:
160
+ Crawls a set of specifications and runs processing modules against each of
161
+ them to generate extracts.
162
+
163
+ Crawl results are written to the console as a serialized JSON array with one
164
+ entry per spec by default. The order of the specs in the array matches the
165
+ order of the specs provided as input (or the order of the specs in
166
+ browser-specs if no explicit spec was provided).
167
+
168
+ Resulting array may be large. Crawling all specs with core processing module
169
+ produces ~100MB of serialized JSON for instance. To avoid janking the console
170
+ or running into possible memory issues, setting the --output option is
171
+ strongly recommended.
172
+
173
+ Usage notes for some of the options:
174
+ -f, --fallback <jsondata>
175
+ Provides an existing JSON crawl data file to use as a source of fallback data
176
+ for specs that fail to be crawled.
177
+
178
+ The fallback data gets copied as-is. It is the responsibility of the caller
179
+ to make sure that extracts it may link to actually exist and match the ones
180
+ that the crawl would produce in the absence of errors (e.g. same modules).
181
+
182
+ The "error" property is set on specs for which fallback data was used.
183
+
184
+ --md, --markdown
185
+ Output a crawl summary in Markdown instead of a JSON report. The option takes
186
+ precedence over the \`--output\` option.
187
+
188
+ -m, --module <modules...>
189
+ If processing modules are not specified, the crawler runs all core processing
190
+ modules defined in:
191
+ https://github.com/w3c/reffy/blob/main/src/browserlib/reffy.json
192
+
193
+ Modules must be specified using a relative path to an ".mjs" file that defines
194
+ the processing logic to run on the spec's page in a browser context. For
195
+ instance:
196
+ $ reffy --output reports/test --module extract-editors.mjs
197
+
198
+ Absolute paths to modules are not properly handled and will likely result in a
199
+ crawling error.
200
+
201
+ Multiple modules can be specified, repeating the option name or not:
202
+ $ reffy -o reports/test -m extract-words.mjs extract-editors.mjs
203
+ $ reffy -o reports/test -m extract-words.mjs -m extract-editors.mjs
204
+
205
+ Core processing modules may be referenced using the name of the extract folder
206
+ or property that they would create:
207
+ $ reffy --output reports/test --module dfns
208
+
209
+ To run all core processing modules, use "core". For instance, to apply a
210
+ processing module on top of core processing modules, use:
211
+ $ reffy --output reports/test --module core extract-editors.mjs
212
+
213
+ Each module must export a function that takes a spec object as input and
214
+ return a result that can be serialized as JSON. A typical module code looks
215
+ like:
216
+ https://github.com/w3c/reffy/blob/main/src/browserlib/extract-ids.mjs
217
+
218
+ Individual extracts will be created under "<folder>/[camelCaseModule]" where
219
+ "[camelCaseModule]" is derived from the module's filename. For instance:
220
+ "extract-editors.mjs" creates extracts under "<folder>/extractEditors"
221
+
222
+ The name of the folder where extracts get created may be specified for custom
223
+ modules by prefixing the path to the module with the folder name followed by
224
+ ":". For instance, to save extracts to "reports/test/editors", use:
225
+ $ reffy --output reports/test --module editors:extract-editors.mjs
226
+
227
+ -o, --output <folder>
228
+ By default, crawl results are written to the console as a serialized JSON
229
+ array with one entry per spec, and module processing results attached as
230
+ property values in each of these entries.
231
+
232
+ If an output <folder> is specified, crawl results are rather saved to that
233
+ folder, with module processing results created under subfolders (see the
234
+ --module option) and linked from an index.json file created under <folder>.
235
+
236
+ Additionally, if an output <folder> is specified and if the IDL processing
237
+ module is run, the crawler will also creates an index of IDL names named
238
+ "idlnames.json" that links to relevant extracts in subfolders.
239
+
240
+ The folder targeted by <folder> must exist.
241
+
242
+ -p, --post <modules...>
243
+ Post-processing modules either run after a spec is done crawling or after the
244
+ entire crawl is over. They allow developers to complete data based on other
245
+ extracts that were not available when extraction ran.
246
+
247
+ To run all core post-processing modules, use "core". Core post-processing
248
+ modules are defined in:
249
+ https://github.com/w3c/reffy/blob/main/src/postprocessing.js
250
+
251
+ The crawler does not run any post-processing modules by default.
252
+
253
+ Custom post-processing modules may be specified using a relative path to a
254
+ ".js" file that defines the post-processing logic. For instance:
255
+ $ reffy --output reports/test --post mypostprocessing.js
256
+
257
+ Each module must export a "run" function. See the post-processor's code for
258
+ details:
259
+ https://github.com/w3c/reffy/blob/main/src/lib/post-processor.js
260
+
261
+ Absolute paths to modules are not properly handled and will likely result in a
262
+ processing error.
263
+
264
+ Multiple post-processing modules can be specified, repeating the option name
265
+ or not:
266
+ $ reffy -o reports/test -p cssdfns cssidl events
267
+ $ reffy -o reports/test -p events -p idlparsed -p idlnames
268
+
269
+ -r, --release
270
+ The crawler defaults to crawling the nightly version of requested specs.
271
+ Set this flag to tell the crawler to crawl the published version of the specs
272
+ instead. When the flag is set, the crawler will ignore specs that do not have
273
+ a published version.
274
+
275
+ -s, --spec <specs...>
276
+ If specs to crawl are not specified, all specs in browser-specs that are not
277
+ identified as being discontinued get crawled:
278
+ https://github.com/w3c/browser-specs/
279
+
280
+ Valid spec values may be a shortname, a URL, or a relative path to a file that
281
+ contains a list of spec URLs and/or shortnames. All shortnames must exist in
282
+ browser-specs. Shortname may be the shortname of the spec series, in which
283
+ case the spec identified as the current specification in the series is used.
284
+ For instance, as of September 2021, "pointerlock" will map to "pointerlock-2"
285
+ because Pointer Lock 2.0 is the current level in the series.
286
+
287
+ Use "all" to include all specs in browser-specs in the crawl. For instance, to
288
+ crawl all specs plus one custom spec that does not exist in browser-specs:
289
+ $ reffy -o reports/test -s all https://example.org/myspec
290
+
291
+ When "all" is used, to force a crawl on some of the discontinued specs too,
292
+ include their shortname explicitly (or point to a JSON file that lists their
293
+ shortnames). For instance, to also crawl the discontinued DOM Level 2 Style
294
+ spec, run:
295
+ $ reffy -o reports/test -s all DOM-Level-2-Style
296
+
297
+ --summary
298
+ Tells Reffy to attach a Markdown summary of the crawl per spec to the JSON
299
+ report, in a \`crawlSummary\` property. The Markdown report is suitable for
300
+ inclusion in a GitHub issue or similar. It starts with a summary, and then
301
+ details a few noteworthy extracts (CSS, dfns, Web IDL) in expandable
302
+ sections, with links to the online xref database search where appropriate.
303
+
304
+ -t, --terse
305
+ This flag cannot be combined with the --output option and cannot be set if
306
+ more than one processing module gets run. When set, the crawler writes the
307
+ processing module results to the console directly without wrapping them with
308
+ spec metadata. In other words, the spec entry in the crawl results directly
309
+ contains the outcome of the processing module when the flag is set.
310
+
311
+ Additionally, if crawl runs on a single specification, the array is omitted
312
+ and the processing module results are thus written to the console directly.
313
+ For instance:
314
+ $ reffy --spec fetch --module idl --terse
315
+
316
+ -u, --use-crawl <folder>
317
+ Tells Reffy to skip the crawl part and only run requested post-processing
318
+ modules on the crawl results present in the specified folder.
319
+
320
+ If post-processing modules are not specified, Reffy will merely copy the crawl
321
+ results to the output folder (or to the console).
322
+ `);
323
+
324
+ program.parse(process.argv);