reffy 3.1.0 → 4.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,212 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Reffy's command line interface that you can use to crawl and study spec
4
+ * references. The tool runs the crawler, then the study tools to create the
5
+ * full reports that typically show up under w3c/webref.
6
+ *
7
+ * Tool can be called directly through:
8
+ *
9
+ * `node crawl-and-study.js [command]`
10
+ *
11
+ * Run `node crawl-and-study.js -h` for help
12
+ *
13
+ * @module crawler
14
+ */
15
+
16
+ const program = require('commander');
17
+ const version = require('../../package.json').version;
18
+ const fs = require('fs');
19
+ const path = require('path');
20
+ const crawlSpecs = require('../lib/specs-crawler').crawlSpecs;
21
+ const studyCrawl = require('./study-crawl').studyCrawl;
22
+ const generateReport = require('./generate-report').generateReport;
23
+ const pandoc = require('node-pandoc');
24
+
25
+
26
+ // List of possible perspectives and associated parameters
27
+ // Note the "ed" perspective produces reports under "whatwg" for backward
28
+ // compatibility reason.
29
+ const perspectives = {
30
+ 'ed': {
31
+ description: 'Crawls the latest Editor\'s Drafts',
32
+ refStudy: 'https://w3c.github.io/webref/ed/study.json'
33
+ },
34
+ 'tr': {
35
+ description: 'Crawls the latest published versions of specifications in /TR/ space instead of the latest Editor\'s Drafts',
36
+ publishedVersion: true,
37
+ refStudy: 'https://w3c.github.io/webref/tr/study.json'
38
+ }
39
+ };
40
+
41
+ // List of possible actions for each perspective
42
+ const possibleActions = {
43
+ 'all': 'crawl specs, study report and generate markdown, HTML and diff reports. Default action',
44
+ 'crawl': 'crawl specs and generate a machine-readable report with facts about each spec',
45
+ 'study': 'parse the machine-readable report generated by the crawler, and create a study report of potential anomalies found in the report',
46
+ 'markdown': 'produce a human-readable report in Markdown format out of the anomalies report returned by the study action',
47
+ 'html': 'produce an HTML report out of the Markdown report generated by the markdown action',
48
+ 'diff': 'compare the anomalies report with the latest published anomalies report and generate diff report',
49
+ 'diffnew': 'compare the anomalies report with the latest published anomalies report and generate diff report that only contains new anomalies'
50
+ };
51
+
52
+ let command = null;
53
+ program
54
+ .version(version)
55
+ .option('-d, --debug', 'run crawl in debug mode (single process, one spec at a time)');
56
+
57
+ program
58
+ .command('run <perspective> [action]')
59
+ .description('run a new crawl and study from the given perspective')
60
+ .option('-d, --debug', 'run crawl in debug mode (single process, one spec at a time)')
61
+ .action(async (perspective, action, cmdObj) => {
62
+ command = 'run';
63
+ if (!(perspective in perspectives)) {
64
+ return program.help();
65
+ }
66
+ if (action && !(action in possibleActions)) {
67
+ return program.help();
68
+ }
69
+
70
+ let debug = cmdObj.debug || program.debug;
71
+ let publishedVersion = perspectives[perspective].publishedVersion;
72
+ let refStudy = perspectives[perspective].refStudy;
73
+ let reportFolder = perspectives[perspective].reportFolder ||
74
+ 'reports/' + perspective;
75
+ let crawlReport = path.join(reportFolder, 'index.json');
76
+ let studyReport = path.join(reportFolder, 'study.json');
77
+
78
+ let promise = Promise.resolve();
79
+ let actions = (!action || (action === 'all')) ?
80
+ ['crawl', 'study', 'markdown', 'html', 'diff', 'diffnew'] :
81
+ [action];
82
+
83
+ actions.forEach(action => {
84
+ switch (action) {
85
+ case 'crawl':
86
+ promise = promise
87
+ .then(_ => crawlSpecs(
88
+ { publishedVersion, debug, output: reportFolder }));
89
+ break;
90
+
91
+ case 'study':
92
+ const options = {};
93
+ if (perspective === 'ed') {
94
+ const trFolder = perspectives.tr.reportFolder || 'reports/tr';
95
+ const trReport = path.join(trFolder, 'index.json');
96
+ if (fs.existsSync(trReport)) {
97
+ options.trResults = trReport;
98
+ }
99
+ }
100
+ promise = promise
101
+ .then(_ => studyCrawl(crawlReport, options))
102
+ .then(results => {
103
+ fs.writeFileSync(path.join(reportFolder, 'study.json'),
104
+ JSON.stringify(results, null, 2));
105
+ });
106
+ break;
107
+
108
+ case 'markdown':
109
+ promise = promise
110
+ .then(_ => generateReport(studyReport, { perSpec: true }))
111
+ .then(report => fs.writeFileSync(path.join(reportFolder, 'index.md'), report))
112
+ .then(_ => generateReport(studyReport, { perSpec: false }))
113
+ .then(report => fs.writeFileSync(path.join(reportFolder, 'perissue.md'), report));
114
+ break;
115
+
116
+ case 'html':
117
+ promise = promise
118
+ .then(_ => new Promise((resolve, reject) => {
119
+ let args = [
120
+ '-f', 'markdown', '-t', 'html5', '--section-divs', '-s',
121
+ '--template', path.join(__dirname, '..', 'templates', 'report-template.html'),
122
+ '-o', path.join(reportFolder, 'index.html')
123
+ ];
124
+ pandoc(path.join(reportFolder, 'index.md'), args,
125
+ (err => {
126
+ if (err) {
127
+ return reject(err);
128
+ }
129
+ args = [
130
+ '-f', 'markdown', '-t', 'html5', '--section-divs', '-s',
131
+ '--template', path.join(__dirname, '..', 'templates', 'report-perissue-template.html'),
132
+ '-o', path.join(reportFolder, 'perissue.html')];
133
+ pandoc(path.join(reportFolder, 'perissue.md'), args,
134
+ (err => {
135
+ if (err) {
136
+ return reject(err);
137
+ }
138
+ return resolve();
139
+ }));
140
+ }));
141
+ }));
142
+ break;
143
+
144
+ case 'diff':
145
+ promise = promise
146
+ .then(_ => generateReport(studyReport, {
147
+ diffReport: true,
148
+ refStudyFile: refStudy
149
+ }))
150
+ .then(report => fs.writeFileSync(path.join(reportFolder, 'diff.md'), report));
151
+ break;
152
+
153
+ case 'diffnew':
154
+ promise = promise
155
+ .then(_ => generateReport(studyReport, {
156
+ diffReport: true,
157
+ refStudyFile: refStudy,
158
+ onlyNew: true
159
+ }))
160
+ .then(report => fs.writeFileSync(path.join(reportFolder, 'diffnew.md'), report));
161
+ break;
162
+ }
163
+ });
164
+
165
+ return promise;
166
+ });
167
+
168
+ program.on('--help', function() {
169
+ console.log('');
170
+ console.log(' Possible perspectives:');
171
+ console.log('');
172
+ Object.keys(perspectives).forEach(perspective => {
173
+ console.log(' ' + perspective + ': ' + perspectives[perspective].description);
174
+ });
175
+ console.log('');
176
+
177
+ console.log(' Possible actions:');
178
+ console.log('');
179
+ Object.keys(possibleActions).forEach(action => {
180
+ console.log(' ' + action + ': ' + possibleActions[action]);
181
+ });
182
+ console.log('');
183
+
184
+ console.log(' Possible options:');
185
+ console.log('');
186
+ console.log(' -d, --debug: run crawl in debug mode (single process, one spec at a time)');
187
+ console.log('');
188
+ });
189
+
190
+ program.on('command:*', function () {
191
+ console.error('Invalid command: %s.\n', program.args.join(' '));
192
+ program.outputHelp();
193
+ process.exit(1);
194
+ });
195
+
196
+ if (!process.argv.slice(2).length) {
197
+ console.error('Cannot run program without arguments.\n');
198
+ program.outputHelp();
199
+ process.exit(1);
200
+ }
201
+
202
+ program
203
+ .parseAsync(process.argv)
204
+ .then(_ => {
205
+ console.log('-- THE END -- ');
206
+ process.exit(0);
207
+ })
208
+ .catch(err => {
209
+ console.error('-- ERROR CAUGHT --');
210
+ console.error(err);
211
+ process.exit(1);
212
+ });
@@ -9,7 +9,7 @@
9
9
  * `node generate-idlnames.js [crawl report] [dfns] [save folder]`
10
10
  *
11
11
  * where `crawl report` is the path to the folder that contains the
12
- * `index.json` file and all other crawl results produced by crawl-specs.js,
12
+ * `index.json` file and all other crawl results produced by specs-crawler.js,
13
13
  * `dfns` a param to set to "true" or "dfns" to embed dfns in the generated
14
14
  * report, and `save folder` is an optional folder (which must exist) where IDL
15
15
  * name extracts are to be saved. In the absence of this parameter, the report
@@ -146,6 +146,7 @@ function generateIdlNames(results, options = {}) {
146
146
  }
147
147
  names[name] = {
148
148
  name: name,
149
+ type: idl.type,
149
150
  defined: desc,
150
151
  extended: [],
151
152
  inheritance: idl.inheritance,
@@ -437,4 +438,4 @@ if (require.main === module) {
437
438
  console.log(JSON.stringify(report, null, 2));
438
439
  }
439
440
  });
440
- }
441
+ }
package/src/lib/fetch.js CHANGED
@@ -5,6 +5,7 @@
5
5
  * @module finder
6
6
  */
7
7
 
8
+ const os = require('os');
8
9
  const path = require('path');
9
10
  const baseFetch = require('fetch-filecache-for-crawling');
10
11
 
@@ -43,6 +44,11 @@ async function fetch(url, options) {
43
44
  options.refresh = 'once';
44
45
  }
45
46
 
47
+ // Use cache folder in tmp folder by default
48
+ if (!options.cacheFolder) {
49
+ options.cacheFolder = path.resolve(os.tmpdir(), 'reffy-cache');
50
+ }
51
+
46
52
  return baseFetch(url, options);
47
53
  }
48
54
 
@@ -36,7 +36,8 @@ const mockSpecs = {
36
36
  }
37
37
  },
38
38
  "/mediacapture-output/": `<script>respecConfig = { shortName: 'test' };</script><script src='https://www.w3.org/Tools/respec/respec-w3c'></script><div id=abstract></div><pre class='idl'>[Exposed=Window] interface Foo { attribute DOMString bar; };</pre>`,
39
- "/accelerometer/": `<html><h2>Normative references</h2><dl><dt>FOO</dt><dd><a href='https://www.w3.org/TR/Foo'>Foo</a></dd></dl>`
39
+ "/accelerometer/": `<html><h2>Normative references</h2><dl><dt>FOO</dt><dd><a href='https://www.w3.org/TR/Foo'>Foo</a></dd></dl>`,
40
+ "/pointerlock/": `<html><h1>Pointer Lock 2.0`
40
41
  };
41
42
 
42
43
  nock.disableNetConnect();
@@ -7,42 +7,25 @@
7
7
  * reference, and links to external specs), and produces a crawl report with the
8
8
  * results of these investigations.
9
9
  *
10
- * The spec crawler can be called directly through:
11
- *
12
- * `node crawl-specs.js [options]`
13
- *
14
- * Use `--help` option for usage instructions.
15
- *
16
- * The JSON file that contains the list of specs to crawl must be an array whose
17
- * individual items are either:
18
- * 1. a string that gets interpreted as the URL or the shortname of the spec to
19
- * crawl. The spec must exist in w3c/browser-specs
20
- * 2. an object that follows the w3c/browser-specs model:
21
- * https://github.com/w3c/browser-specs#spec-object
22
- *
23
10
  * @module crawler
24
11
  */
25
12
 
26
- const commander = require('commander');
27
- const version = require('../../package.json').version;
28
13
  const fs = require('fs');
29
14
  const path = require('path');
30
15
  const specs = require('browser-specs');
31
- const webidlParser = require('./parse-webidl');
32
- const cssDfnParser = require('../lib/css-grammar-parser');
33
- const { generateIdlNames, saveIdlNames } = require('./generate-idlnames');
16
+ const webidlParser = require('../cli/parse-webidl');
17
+ const cssDfnParser = require('./css-grammar-parser');
18
+ const { generateIdlNames, saveIdlNames } = require('../cli/generate-idlnames');
34
19
  const {
35
20
  completeWithAlternativeUrls,
36
- fetch,
37
21
  expandBrowserModules,
38
22
  expandCrawlResult,
39
23
  getGeneratedIDLNamesByCSSProperty,
40
24
  isLatestLevelThatPasses,
41
25
  processSpecification,
42
- requireFromWorkingDirectory,
43
26
  setupBrowser,
44
27
  teardownBrowser
45
- } = require('../lib/util');
28
+ } = require('./util');
46
29
 
47
30
 
48
31
  /**
@@ -189,9 +172,9 @@ async function crawlSpec(spec, crawlOptions) {
189
172
  * metadata about the spec and the crawl processing results in appropriate
190
173
  * properties.
191
174
  * @param {Object} settings Crawl settings. Recognized settings: "modules",
192
- * "output" and "quiet". See CLI help (node crawl-specs.js --help) for
193
- * details. The "modules" setting is mandatory and note that the function
194
- * will not do anything if "output" is not set.
175
+ * "output" and "quiet". See CLI help (node reffy.js --help) for details.
176
+ * The "modules" setting is mandatory and note that the function will not do
177
+ * anything if "output" is not set.
195
178
  * @return {Promise<Object>} The promise to get an updated spec object that
196
179
  * contains links to created extracts.
197
180
  */
@@ -490,7 +473,7 @@ async function saveResults(data, settings) {
490
473
  * @function
491
474
  * @param {Object} options Crawl options. Possible options are:
492
475
  * publishedVersion, debug, output, terse, modules and specs.
493
- * See CLI help (node crawl-specs.js --help) for details.
476
+ * See CLI help (node reffy.js --help) for details.
494
477
  * @return {Promise<void>} The promise that the crawl will have been made
495
478
  */
496
479
  function crawlSpecs(options) {
@@ -499,7 +482,12 @@ function crawlSpecs(options) {
499
482
  if (typeof spec !== 'string') {
500
483
  return spec;
501
484
  }
502
- const match = specs.find(s => s.url === spec || s.shortname === spec);
485
+ let match = specs.find(s => s.url === spec || s.shortname === spec);
486
+ if (!match) {
487
+ match = specs.find(s => s.series &&
488
+ s.series.shortname === spec &&
489
+ s.series.currentSpecification === s.shortname);
490
+ }
503
491
  if (match) {
504
492
  return match;
505
493
  }
@@ -513,7 +501,8 @@ function crawlSpecs(options) {
513
501
  url = (new URL(spec, `file://${process.cwd()}/`)).href;
514
502
  }
515
503
  else {
516
- throw new Error(`Spec ID "${spec}" can neither be interpreted as a URL, a valid shortname or a relative path to an HTML file`);
504
+ const msg = `Spec ID "${spec}" can neither be interpreted as a URL, a valid shortname or a relative path to an HTML file`;
505
+ throw new Error(msg);
517
506
  }
518
507
  }
519
508
  return {
@@ -584,188 +573,3 @@ Export methods for use as module
584
573
  **************************************************/
585
574
  module.exports.crawlList = crawlList;
586
575
  module.exports.crawlSpecs = crawlSpecs;
587
-
588
-
589
- /**************************************************
590
- Code run if the code is run as a stand-alone module
591
- **************************************************/
592
- if (require.main === module) {
593
-
594
- function parseModuleOption(input) {
595
- const parts = input.split(':');
596
- if (parts.length > 2) {
597
- console.error('Module input cannot have more than one ":" character');
598
- process.exit(2);
599
- }
600
- if (parts.length === 2) {
601
- return {
602
- href: parts[1],
603
- property: parts[0]
604
- };
605
- }
606
- else {
607
- return parts[0];
608
- }
609
- }
610
-
611
- function parseSpecOption(input) {
612
- if (input === 'all') {
613
- return specs.map(s => s.shortname);
614
- }
615
- else {
616
- const list = requireFromWorkingDirectory(input);
617
- return list ?? input;
618
- }
619
- }
620
-
621
- const program = new commander.Command();
622
- program
623
- .version(version)
624
- .usage('[options]')
625
- .description('Crawls and processes a list of Web specifications')
626
- .option('-d, --debug', 'debug mode, crawl one spec at a time')
627
- .option('-m, --module <modules...>', 'spec processing modules')
628
- .option('-o, --output <folder>', 'existing folder/file where crawl results are to be saved')
629
- .option('-q, --quiet', 'do not report progress and other warnings to the console')
630
- .option('-r, --release', 'crawl release (TR) version of specs')
631
- .option('-s, --spec <specs...>', 'specs to crawl')
632
- .option('-t, --terse', 'output crawl results without metadata')
633
- .action(options => {
634
- const crawlOptions = {
635
- debug: options.debug,
636
- output: options.output,
637
- publishedVersion: options.release,
638
- quiet: options.quiet,
639
- terse: options.terse
640
- };
641
- if (options.module) {
642
- crawlOptions.modules = options.module.map(parseModuleOption);
643
- }
644
- if (options.spec) {
645
- crawlOptions.specs = options.spec.map(parseSpecOption).flat();
646
- }
647
-
648
- if (crawlOptions.terse && crawlOptions.output) {
649
- console.error('The --terse option cannot be combined with the --output option');
650
- process.exit(2);
651
- }
652
- if (crawlOptions.terse && (!crawlOptions.modules || crawlOptions.modules.length === 0 || crawlOptions.modules.length > 1)) {
653
- console.error('The --terse option can be only be set when only one core processing module runs');
654
- process.exit(2);
655
- }
656
- crawlSpecs(crawlOptions)
657
- .then(_ => {
658
- process.exit(0);
659
- })
660
- .catch(err => {
661
- console.error(err);
662
- process.exit(1);
663
- });
664
- })
665
- .showHelpAfterError('(run with --help for usage information)')
666
- .addHelpText('after', `
667
- Minimal usage example:
668
- $ node crawl-specs.js reports/test
669
-
670
- Description:
671
- Crawls a set of specifications and runs processing modules against each of
672
- them to generate extracts.
673
-
674
- Crawl results are written to the console as a serialized JSON array with one
675
- entry per spec by default. The order of the specs in the array matches the
676
- order of the specs provided as input (or the order of the specs in
677
- browser-specs if no explicit spec was provided).
678
-
679
- Resulting array may be large. Crawling all specs with core processing module
680
- produces ~100MB of serialized JSON for instance. To avoid janking the console
681
- or running into possible memory issues, setting the --output option is
682
- strongly recommended.
683
-
684
- Usage notes for some of the options:
685
- -m, --module <modules...>
686
- If processing modules are not specified, the crawler runs all core processing
687
- modules defined in:
688
- https://github.com/w3c/reffy/tree/main/src/reffy.json
689
-
690
- Modules must be specified using a relative path to an ".mjs" file that defines
691
- the processing logic to run on the spec's page in a browser context. For
692
- instance:
693
- $ node crawl-specs.js reports/test --module extract-editors.mjs
694
-
695
- Absolute paths to modules are not properly handled and will likely result in a
696
- crawling error.
697
-
698
- Multiple modules can be specified, repeating the option name or not:
699
- $ node crawl-specs.js reports/test -m extract-words.mjs extract-editors.mjs
700
- $ node crawl-specs.js reports/test -m extract-words.mjs -m extract-editors.mjs
701
-
702
- The option cannot appear before <folder>, unless you use "--" to flag the end
703
- of the list:
704
- $ node crawl-specs.js --module extract-editors.mjs -- reports/test
705
-
706
- Core processing modules may be referenced using the name of the extract folder
707
- or property that they would create:
708
- $ node crawl-specs.js reports/test --module dfns
709
-
710
- To run all core processing modules, use "core". For instance, to apply a
711
- processing module on top of core processing modules, use:
712
- $ node crawl-specs.js reports/test --module core extract-editors.mjs
713
-
714
- Each module must export a function that takes a spec object as input and
715
- return a result that can be serialized as JSON. A typical module code looks
716
- like:
717
- https://github.com/w3c/reffy/blob/main/src/browserlib/extract-ids.mjs
718
-
719
- Individual extracts will be created under "<folder>/[camelCaseModule]" where
720
- "[camelCaseModule]" is derived from the module's filename. For instance:
721
- "extract-editors.mjs" creates extracts under "<folder>/extractEditors"
722
-
723
- The name of the folder where extracts get created may be specified for custom
724
- modules by prefixing the path to the module with the folder name followed by
725
- ":". For instance, to save extracts to "reports/test/editors", use:
726
- $ node crawl-specs.js reports/test --module editors:extract-editors.mjs
727
-
728
- -o, --output <folder>
729
- By default, crawl results are written to the console as a serialized JSON
730
- array with one entry per spec, and module processing results attached as
731
- property values in each of these entries.
732
-
733
- If an output <folder> is specified, crawl results are rather saved to that
734
- folder, with module processing results created under subfolders (see the
735
- --module option) and linked from an index.json file created under <folder>.
736
-
737
- Additionally, if an output <folder> is specified and if the IDL processing
738
- module is run, the crawler will also creates an index of IDL names named
739
- "idlnames.json" that links to relevant extracts in subfolders.
740
-
741
- -r, --release
742
- If the flag is not set, the crawler defaults to crawl nightly versions of the
743
- specs.
744
-
745
- -s, --spec <specs...>
746
- If specs to crawl are not specified, all specs in browser-specs get crawled:
747
- https://github.com/w3c/browser-specs/
748
-
749
- Valid spec values may be a shortname, a URL, or a relative path to a file that
750
- contains a list of spec URLs and/or shortnames. All shortnames must exist in
751
- browser-specs.
752
-
753
- Use "all" to include all specs in browser-specs in the crawl. For instance, to
754
- crawl all specs plus one custom spec that does not exist in browser-specs:
755
- $ node crawl-specs.js reports/test -s all https://example.org/myspec
756
-
757
- -t, --terse
758
- This flag cannot be combined with the --output option and cannot be set if
759
- more than one processing module gets run. When set, the crawler writes the
760
- processing module results to the console directly without wrapping them with
761
- spec metadata. In other words, the spec entry in the crawl results directly
762
- contains the outcome of the processing module when the flag is set.
763
-
764
- Additionally, if crawl runs on a single specification, the array is omitted
765
- and the processing module results are thus written to the console directly.
766
- For instance:
767
- $ node crawl-specs.js --spec fetch --module idl --terse
768
- `);
769
-
770
- program.parse(process.argv);
771
- }
package/src/lib/util.js CHANGED
@@ -14,6 +14,21 @@ const specEquivalents = require('../specs/spec-equivalents.json');
14
14
  const reffyModules = require('../browserlib/reffy.json');
15
15
 
16
16
 
17
+ /**
18
+ * Maximum depth difference supported between Reffy's install path and custom
19
+ * modules that may be provided on the command-line
20
+ *
21
+ * TODO: Find a way to get right of that, there should be no limit
22
+ */
23
+ const maxPathDepth = 20;
24
+
25
+
26
+ /**
27
+ * Returns a range array from 0 to the number provided (not included)
28
+ */
29
+ const range = n => Array.from(Array(n).keys());
30
+
31
+
17
32
  /**
18
33
  * Shortcut that returns a property extractor iterator
19
34
  */
@@ -356,7 +371,7 @@ async function processSpecification(spec, processFunction, args, options) {
356
371
  let depth = requestPath.lastIndexOf('__/') / 3;
357
372
  const filename = requestPath.substring(requestPath.lastIndexOf('__/') + 3);
358
373
  let filePath = path.resolve(__dirname, '..', 'browserlib');
359
- while (depth < 7) {
374
+ while (depth < maxPathDepth - 1) {
360
375
  filePath = path.resolve(filePath, '..');
361
376
  depth += 1;
362
377
  }
@@ -547,7 +562,7 @@ async function processSpecification(spec, processFunction, args, options) {
547
562
  // "../../node_modules/[...]" and may import other scripts that are
548
563
  // higher in the folder tree.
549
564
  await page.addScriptTag({
550
- url: 'reffy/scripts/__/__/__/__/__/__/__/__/reffy.mjs',
565
+ url: `reffy/scripts/${range(maxPathDepth).map(n => '__').join('/')}/reffy.mjs`,
551
566
  type: 'module'
552
567
  });
553
568