reffy 3.1.0 → 4.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/reffy.js CHANGED
@@ -1,211 +1,227 @@
1
1
  #!/usr/bin/env node
2
2
  /**
3
- * Reffy's main command line interface that you can use to crawl and study spec
4
- * references.
3
+ * The spec crawler takes a list of spec URLs as input, gathers some knowledge
4
+ * about these specs (published versions, URL of the Editor's Draft, etc.),
5
+ * fetches these specs, parses them, extracts relevant information that they
6
+ * contain (such as the WebIDL they define, the list of specifications that they
7
+ * reference, and links to external specs), and produces a crawl report with the
8
+ * results of these investigations.
5
9
  *
6
- * Reffy can be called directly through:
10
+ * Provided Reffy was installed as a global package, the spec crawler can be
11
+ * called directly through:
7
12
  *
8
- * `node reffy.js [command]`
13
+ * `reffy [options]`
9
14
  *
10
- * Run `node reffy.js -h` for help
15
+ * Use the `--help` option for usage instructions.
16
+ *
17
+ * If Reffy was not installed as a global package, call:
18
+ *
19
+ * `node reffy.js [options]`
11
20
  *
12
21
  * @module crawler
13
22
  */
14
23
 
15
- const program = require('commander');
24
+ const commander = require('commander');
16
25
  const version = require('./package.json').version;
17
- const fs = require('fs');
18
- const path = require('path');
19
- const crawlSpecs = require('./src/cli/crawl-specs.js').crawlSpecs;
20
- const studyCrawl = require('./src/cli/study-crawl.js').studyCrawl;
21
- const generateReport = require('./src/cli/generate-report.js').generateReport;
22
- const pandoc = require('node-pandoc');
23
-
24
-
25
- // List of possible perspectives and associated parameters
26
- // Note the "ed" perspective produces reports under "whatwg" for backward
27
- // compatibility reason.
28
- const perspectives = {
29
- 'ed': {
30
- description: 'Crawls the latest Editor\'s Drafts',
31
- refStudy: 'https://w3c.github.io/webref/ed/study.json'
32
- },
33
- 'tr': {
34
- description: 'Crawls the latest published versions of specifications in /TR/ space instead of the latest Editor\'s Drafts',
35
- publishedVersion: true,
36
- refStudy: 'https://w3c.github.io/webref/tr/study.json'
37
- }
38
- };
39
-
40
- // List of possible actions for each perspective
41
- const possibleActions = {
42
- 'all': 'crawl specs, study report and generate markdown, HTML and diff reports. Default action',
43
- 'crawl': 'crawl specs and generate a machine-readable report with facts about each spec',
44
- 'study': 'parse the machine-readable report generated by the crawler, and create a study report of potential anomalies found in the report',
45
- 'markdown': 'produce a human-readable report in Markdown format out of the anomalies report returned by the study action',
46
- 'html': 'produce an HTML report out of the Markdown report generated by the markdown action',
47
- 'diff': 'compare the anomalies report with the latest published anomalies report and generate diff report',
48
- 'diffnew': 'compare the anomalies report with the latest published anomalies report and generate diff report that only contains new anomalies'
49
- };
50
-
51
- let command = null;
52
- program
53
- .version(version)
54
- .option('-d, --debug', 'run crawl in debug mode (single process, one spec at a time)');
26
+ const specs = require('browser-specs');
27
+ const { requireFromWorkingDirectory } = require('./src/lib/util');
28
+ const { crawlSpecs } = require('./src/lib/specs-crawler');
55
29
 
56
- program
57
- .command('run <perspective> [action]')
58
- .description('run a new crawl and study from the given perspective')
59
- .option('-d, --debug', 'run crawl in debug mode (single process, one spec at a time)')
60
- .action(async (perspective, action, cmdObj) => {
61
- command = 'run';
62
- if (!(perspective in perspectives)) {
63
- return program.help();
30
+
31
+ function parseModuleOption(input) {
32
+ const parts = input.split(':');
33
+ if (parts.length > 2) {
34
+ console.error('Module input cannot have more than one ":" character');
35
+ process.exit(2);
36
+ }
37
+ if (parts.length === 2) {
38
+ return {
39
+ href: parts[1],
40
+ property: parts[0]
41
+ };
64
42
  }
65
- if (action && !(action in possibleActions)) {
66
- return program.help();
43
+ else {
44
+ return parts[0];
67
45
  }
46
+ }
68
47
 
69
- let debug = cmdObj.debug || program.debug;
70
- let publishedVersion = perspectives[perspective].publishedVersion;
71
- let refStudy = perspectives[perspective].refStudy;
72
- let reportFolder = perspectives[perspective].reportFolder ||
73
- 'reports/' + perspective;
74
- let crawlReport = path.join(reportFolder, 'index.json');
75
- let studyReport = path.join(reportFolder, 'study.json');
76
-
77
- let promise = Promise.resolve();
78
- let actions = (!action || (action === 'all')) ?
79
- ['crawl', 'study', 'markdown', 'html', 'diff', 'diffnew'] :
80
- [action];
81
-
82
- actions.forEach(action => {
83
- switch (action) {
84
- case 'crawl':
85
- promise = promise
86
- .then(_ => crawlSpecs(
87
- { publishedVersion, debug, output: reportFolder }));
88
- break;
89
-
90
- case 'study':
91
- const options = {};
92
- if (perspective === 'ed') {
93
- const trFolder = perspectives.tr.reportFolder || 'reports/tr';
94
- const trReport = path.join(trFolder, 'index.json');
95
- if (fs.existsSync(trReport)) {
96
- options.trResults = trReport;
97
- }
98
- }
99
- promise = promise
100
- .then(_ => studyCrawl(crawlReport, options))
101
- .then(results => {
102
- fs.writeFileSync(path.join(reportFolder, 'study.json'),
103
- JSON.stringify(results, null, 2));
104
- });
105
- break;
106
-
107
- case 'markdown':
108
- promise = promise
109
- .then(_ => generateReport(studyReport, { perSpec: true }))
110
- .then(report => fs.writeFileSync(path.join(reportFolder, 'index.md'), report))
111
- .then(_ => generateReport(studyReport, { perSpec: false }))
112
- .then(report => fs.writeFileSync(path.join(reportFolder, 'perissue.md'), report));
113
- break;
114
-
115
- case 'html':
116
- promise = promise
117
- .then(_ => new Promise((resolve, reject) => {
118
- let args = [
119
- '-f', 'markdown', '-t', 'html5', '--section-divs', '-s',
120
- '--template', path.join(__dirname, 'src', 'templates', 'report-template.html'),
121
- '-o', path.join(reportFolder, 'index.html')
122
- ];
123
- pandoc(path.join(reportFolder, 'index.md'), args,
124
- (err => {
125
- if (err) {
126
- return reject(err);
127
- }
128
- args = [
129
- '-f', 'markdown', '-t', 'html5', '--section-divs', '-s',
130
- '--template', path.join(__dirname, 'src', 'templates', 'report-perissue-template.html'),
131
- '-o', path.join(reportFolder, 'perissue.html')];
132
- pandoc(path.join(reportFolder, 'perissue.md'), args,
133
- (err => {
134
- if (err) {
135
- return reject(err);
136
- }
137
- return resolve();
138
- }));
139
- }));
140
- }));
141
- break;
142
-
143
- case 'diff':
144
- promise = promise
145
- .then(_ => generateReport(studyReport, {
146
- diffReport: true,
147
- refStudyFile: refStudy
148
- }))
149
- .then(report => fs.writeFileSync(path.join(reportFolder, 'diff.md'), report));
150
- break;
151
-
152
- case 'diffnew':
153
- promise = promise
154
- .then(_ => generateReport(studyReport, {
155
- diffReport: true,
156
- refStudyFile: refStudy,
157
- onlyNew: true
158
- }))
159
- .then(report => fs.writeFileSync(path.join(reportFolder, 'diffnew.md'), report));
160
- break;
161
- }
162
- });
163
-
164
- return promise;
165
- });
166
-
167
- program.on('--help', function() {
168
- console.log('');
169
- console.log(' Possible perspectives:');
170
- console.log('');
171
- Object.keys(perspectives).forEach(perspective => {
172
- console.log(' ' + perspective + ': ' + perspectives[perspective].description);
173
- });
174
- console.log('');
175
-
176
- console.log(' Possible actions:');
177
- console.log('');
178
- Object.keys(possibleActions).forEach(action => {
179
- console.log(' ' + action + ': ' + possibleActions[action]);
180
- });
181
- console.log('');
182
-
183
- console.log(' Possible options:');
184
- console.log('');
185
- console.log(' -d, --debug: run crawl in debug mode (single process, one spec at a time)');
186
- console.log('');
187
- });
188
-
189
- program.on('command:*', function () {
190
- console.error('Invalid command: %s.\n', program.args.join(' '));
191
- program.outputHelp();
192
- process.exit(1);
193
- });
194
-
195
- if (!process.argv.slice(2).length) {
196
- console.error('Cannot run program without arguments.\n');
197
- program.outputHelp();
198
- process.exit(1);
48
+ function parseSpecOption(input) {
49
+ if (input === 'all') {
50
+ return specs.map(s => s.shortname);
51
+ }
52
+ else {
53
+ const list = requireFromWorkingDirectory(input);
54
+ return list ?? input;
55
+ }
199
56
  }
200
57
 
58
+ const program = new commander.Command();
201
59
  program
202
- .parseAsync(process.argv)
203
- .then(_ => {
204
- console.log('-- THE END -- ');
205
- process.exit(0);
206
- })
207
- .catch(err => {
208
- console.error('-- ERROR CAUGHT --');
209
- console.error(err);
210
- process.exit(1);
211
- });
60
+ .version(version)
61
+ .usage('[options]')
62
+ .description('Crawls and processes a list of Web specifications')
63
+ .option('-d, --debug', 'debug mode, crawl one spec at a time')
64
+ .option('-m, --module <modules...>', 'spec processing modules')
65
+ .option('-o, --output <folder>', 'existing folder/file where crawl results are to be saved')
66
+ .option('-q, --quiet', 'do not report progress and other warnings to the console')
67
+ .option('-r, --release', 'crawl release (TR) version of specs')
68
+ .option('-s, --spec <specs...>', 'specs to crawl')
69
+ .option('-t, --terse', 'output crawl results without metadata')
70
+ .action(options => {
71
+ if (!(options.output || options.module || options.spec)) {
72
+ console.error(`
73
+ At least one of the --output, --module or --spec options needs to be specified.
74
+ For usage notes, run:
75
+ reffy --help
76
+
77
+ If you really want to crawl all specs, run all processing modules and report the
78
+ JSON outcome to the console, you may run the following command but note that it
79
+ will dump ~100MB of data to the console:
80
+ reffy --spec all
81
+ `);
82
+ process.exit(2);
83
+ }
84
+ const crawlOptions = {
85
+ debug: options.debug,
86
+ output: options.output,
87
+ publishedVersion: options.release,
88
+ quiet: options.quiet,
89
+ terse: options.terse
90
+ };
91
+ if (options.module) {
92
+ crawlOptions.modules = options.module.map(parseModuleOption);
93
+ }
94
+ if (options.spec) {
95
+ crawlOptions.specs = options.spec.map(parseSpecOption).flat();
96
+ }
97
+
98
+ if (crawlOptions.terse && crawlOptions.output) {
99
+ console.error('The --terse option cannot be combined with the --output option');
100
+ process.exit(2);
101
+ }
102
+ if (crawlOptions.terse && (!crawlOptions.modules || crawlOptions.modules.length === 0 || crawlOptions.modules.length > 1)) {
103
+ console.error('The --terse option can be only be set when only one core processing module runs');
104
+ process.exit(2);
105
+ }
106
+ crawlSpecs(crawlOptions)
107
+ .then(_ => {
108
+ process.exit(0);
109
+ })
110
+ .catch(err => {
111
+ console.error(err);
112
+ process.exit(1);
113
+ });
114
+ })
115
+ .showHelpAfterError('(run with --help for usage information)')
116
+ .addHelpText('after', `
117
+ Minimal usage example:
118
+ To crawl all known specs, run all processing modules, and save generated
119
+ extracts to the current folder, run:
120
+ $ reffy -o .
121
+
122
+ Description:
123
+ Crawls a set of specifications and runs processing modules against each of
124
+ them to generate extracts.
125
+
126
+ Crawl results are written to the console as a serialized JSON array with one
127
+ entry per spec by default. The order of the specs in the array matches the
128
+ order of the specs provided as input (or the order of the specs in
129
+ browser-specs if no explicit spec was provided).
130
+
131
+ Resulting array may be large. Crawling all specs with core processing module
132
+ produces ~100MB of serialized JSON for instance. To avoid janking the console
133
+ or running into possible memory issues, setting the --output option is
134
+ strongly recommended.
135
+
136
+ Usage notes for some of the options:
137
+ -m, --module <modules...>
138
+ If processing modules are not specified, the crawler runs all core processing
139
+ modules defined in:
140
+ https://github.com/w3c/reffy/tree/main/src/reffy.json
141
+
142
+ Modules must be specified using a relative path to an ".mjs" file that defines
143
+ the processing logic to run on the spec's page in a browser context. For
144
+ instance:
145
+ $ reffy reports/test --module extract-editors.mjs
146
+
147
+ Absolute paths to modules are not properly handled and will likely result in a
148
+ crawling error.
149
+
150
+ Multiple modules can be specified, repeating the option name or not:
151
+ $ reffy reports/test -m extract-words.mjs extract-editors.mjs
152
+ $ reffy reports/test -m extract-words.mjs -m extract-editors.mjs
153
+
154
+ The option cannot appear before <folder>, unless you use "--" to flag the end
155
+ of the list:
156
+ $ reffy --module extract-editors.mjs -- reports/test
157
+
158
+ Core processing modules may be referenced using the name of the extract folder
159
+ or property that they would create:
160
+ $ reffy reports/test --module dfns
161
+
162
+ To run all core processing modules, use "core". For instance, to apply a
163
+ processing module on top of core processing modules, use:
164
+ $ reffy reports/test --module core extract-editors.mjs
165
+
166
+ Each module must export a function that takes a spec object as input and
167
+ return a result that can be serialized as JSON. A typical module code looks
168
+ like:
169
+ https://github.com/w3c/reffy/blob/main/src/browserlib/extract-ids.mjs
170
+
171
+ Individual extracts will be created under "<folder>/[camelCaseModule]" where
172
+ "[camelCaseModule]" is derived from the module's filename. For instance:
173
+ "extract-editors.mjs" creates extracts under "<folder>/extractEditors"
174
+
175
+ The name of the folder where extracts get created may be specified for custom
176
+ modules by prefixing the path to the module with the folder name followed by
177
+ ":". For instance, to save extracts to "reports/test/editors", use:
178
+ $ reffy reports/test --module editors:extract-editors.mjs
179
+
180
+ -o, --output <folder>
181
+ By default, crawl results are written to the console as a serialized JSON
182
+ array with one entry per spec, and module processing results attached as
183
+ property values in each of these entries.
184
+
185
+ If an output <folder> is specified, crawl results are rather saved to that
186
+ folder, with module processing results created under subfolders (see the
187
+ --module option) and linked from an index.json file created under <folder>.
188
+
189
+ Additionally, if an output <folder> is specified and if the IDL processing
190
+ module is run, the crawler will also creates an index of IDL names named
191
+ "idlnames.json" that links to relevant extracts in subfolders.
192
+
193
+ The folder targeted by <folder> must exist.
194
+
195
+ -r, --release
196
+ If the flag is not set, the crawler defaults to crawl nightly versions of the
197
+ specs.
198
+
199
+ -s, --spec <specs...>
200
+ If specs to crawl are not specified, all specs in browser-specs get crawled:
201
+ https://github.com/w3c/browser-specs/
202
+
203
+ Valid spec values may be a shortname, a URL, or a relative path to a file that
204
+ contains a list of spec URLs and/or shortnames. All shortnames must exist in
205
+ browser-specs. Shortname may be the shortname of the spec series, in which
206
+ case the spec identified as the current specification in the series is used.
207
+ For instance, as of September 2021, "pointerlock" will map to "pointerlock-2"
208
+ because Pointer Lock 2.0 is the current level in the series.
209
+
210
+ Use "all" to include all specs in browser-specs in the crawl. For instance, to
211
+ crawl all specs plus one custom spec that does not exist in browser-specs:
212
+ $ reffy reports/test -s all https://example.org/myspec
213
+
214
+ -t, --terse
215
+ This flag cannot be combined with the --output option and cannot be set if
216
+ more than one processing module gets run. When set, the crawler writes the
217
+ processing module results to the console directly without wrapping them with
218
+ spec metadata. In other words, the spec entry in the crawl results directly
219
+ contains the outcome of the processing module when the flag is set.
220
+
221
+ Additionally, if crawl runs on a single specification, the array is omitted
222
+ and the processing module results are thus written to the console directly.
223
+ For instance:
224
+ $ reffy --spec fetch --module idl --terse
225
+ `);
226
+
227
+ program.parse(process.argv);