reffy 7.2.10 → 8.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -9
- package/index.js +3 -5
- package/package.json +4 -4
- package/reffy.js +61 -15
- package/src/browserlib/extract-cssdfn.mjs +15 -3
- package/src/browserlib/extract-events.mjs +20 -20
- package/src/browserlib/reffy.json +1 -1
- package/src/lib/post-processor.js +269 -0
- package/src/lib/specs-crawler.js +93 -127
- package/src/lib/util.js +76 -6
- package/src/postprocessing/csscomplete.js +50 -0
- package/src/postprocessing/events.js +197 -0
- package/src/{cli/generate-idlnames.js → postprocessing/idlnames.js} +102 -143
- package/src/postprocessing/idlparsed.js +31 -0
- package/src/cli/generate-idlparsed.js +0 -139
package/README.md
CHANGED
|
@@ -32,6 +32,8 @@ npm update -g reffy
|
|
|
32
32
|
|
|
33
33
|
Reffy crawls requested specifications and runs a set of processing modules on the content fetched to create relevant extracts from each spec. Which specs get crawled, and which processing modules get run depend on how the crawler gets called. By default, the crawler crawls all specs defined in [browser-specs](https://github.com/w3c/browser-specs/) and runs all core processing modules defined in the [`browserlib`](https://github.com/w3c/reffy/tree/main/src/browserlib) folder.
|
|
34
34
|
|
|
35
|
+
Reffy can also run post-processing modules on the results of the crawl to create additional views of the data extracted from the spec during the crawl.
|
|
36
|
+
|
|
35
37
|
Crawl results will either be returned to the console or saved in individual files in a report folder when the `--output` parameter is set.
|
|
36
38
|
|
|
37
39
|
Examples of information that can be extracted from the specs:
|
|
@@ -93,15 +95,6 @@ To create the WebIDL extract in the first place, you will need to run the `idl`
|
|
|
93
95
|
reffy --spec fetch --module idl > fetch.idl
|
|
94
96
|
```
|
|
95
97
|
|
|
96
|
-
### Parsed WebIDL generator
|
|
97
|
-
|
|
98
|
-
The **Parsed WebIDL generator** takes the results of a crawl as input and applies the WebIDL parser to all specs it contains to create JSON extracts in an `idlparsed` folder. To run the generator: `node src/cli/generate-idlparsed.js [crawl folder] [save folder]`
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
### WebIDL names generator
|
|
102
|
-
|
|
103
|
-
The **WebIDL names generator** takes the results of a crawl as input and creates a report per referenceable IDL name, that details the complete parsed IDL structure that defines the name across all specs. To run the generator: `node src/cli/generate-idlnames.js [crawl folder] [save folder]`
|
|
104
|
-
|
|
105
98
|
|
|
106
99
|
### Crawl results merger
|
|
107
100
|
|
package/index.js
CHANGED
|
@@ -1,11 +1,9 @@
|
|
|
1
1
|
module.exports = {
|
|
2
2
|
parseIdl: require("./src/cli/parse-webidl").parse,
|
|
3
|
-
crawlSpecs: require("./src/lib/specs-crawler").
|
|
3
|
+
crawlSpecs: require("./src/lib/specs-crawler").crawlSpecs,
|
|
4
4
|
expandCrawlResult: require("./src/lib/util").expandCrawlResult,
|
|
5
5
|
mergeCrawlResults: require("./src/lib/util").mergeCrawlResults,
|
|
6
6
|
isLatestLevelThatPasses: require("./src/lib/util").isLatestLevelThatPasses,
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
generateIdlParsed: require("./src/cli/generate-idlparsed").generateIdlParsed,
|
|
10
|
-
saveIdlParsed: require("./src/cli/generate-idlparsed").saveIdlParsed
|
|
7
|
+
getInterfaceTreeInfo: require("./src/lib/util").getInterfaceTreeInfo,
|
|
8
|
+
postProcessor: require("./src/lib/post-processor")
|
|
11
9
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "reffy",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "8.0.2",
|
|
4
4
|
"description": "W3C/WHATWG spec dependencies exploration companion. Features a short set of tools to study spec references as well as WebIDL term definitions and references found in W3C specifications.",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -32,7 +32,7 @@
|
|
|
32
32
|
"bin": "./reffy.js",
|
|
33
33
|
"dependencies": {
|
|
34
34
|
"abortcontroller-polyfill": "1.7.3",
|
|
35
|
-
"commander": "9.
|
|
35
|
+
"commander": "9.4.0",
|
|
36
36
|
"fetch-filecache-for-crawling": "4.1.0",
|
|
37
37
|
"puppeteer": "15.4.0",
|
|
38
38
|
"semver": "^7.3.5",
|
|
@@ -42,10 +42,10 @@
|
|
|
42
42
|
"devDependencies": {
|
|
43
43
|
"chai": "4.3.6",
|
|
44
44
|
"mocha": "10.0.0",
|
|
45
|
-
"nock": "13.2.
|
|
45
|
+
"nock": "13.2.9",
|
|
46
46
|
"respec": "32.1.10",
|
|
47
47
|
"respec-hljs": "2.1.1",
|
|
48
|
-
"rollup": "2.
|
|
48
|
+
"rollup": "2.77.0"
|
|
49
49
|
},
|
|
50
50
|
"scripts": {
|
|
51
51
|
"test": "mocha --recursive tests/"
|
package/reffy.js
CHANGED
|
@@ -27,6 +27,7 @@ const specs = require('web-specs');
|
|
|
27
27
|
const { version, engines } = require('./package.json');
|
|
28
28
|
const { requireFromWorkingDirectory } = require('./src/lib/util');
|
|
29
29
|
const { crawlSpecs } = require('./src/lib/specs-crawler');
|
|
30
|
+
const { modules } = require('./src/lib/post-processor');
|
|
30
31
|
|
|
31
32
|
// Warn if version of Node.js does not satisfy requirements
|
|
32
33
|
if (engines && engines.node && !satisfies(process.version, engines.node)) {
|
|
@@ -63,6 +64,15 @@ function parseSpecOption(input) {
|
|
|
63
64
|
}
|
|
64
65
|
}
|
|
65
66
|
|
|
67
|
+
function parsePostOption(input) {
|
|
68
|
+
if (input === 'core') {
|
|
69
|
+
return modules;
|
|
70
|
+
}
|
|
71
|
+
else {
|
|
72
|
+
return input;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
66
76
|
|
|
67
77
|
const program = new commander.Command();
|
|
68
78
|
program
|
|
@@ -73,15 +83,17 @@ program
|
|
|
73
83
|
.option('-f, --fallback <json>', 'fallback data to use when a spec crawl fails')
|
|
74
84
|
.option('-m, --module <modules...>', 'spec processing modules')
|
|
75
85
|
.option('-o, --output <folder>', 'existing folder/file where crawl results are to be saved')
|
|
86
|
+
.option('-p, --post <modules...>', 'post-processing modules')
|
|
76
87
|
.option('-q, --quiet', 'do not report progress and other warnings to the console')
|
|
77
88
|
.option('-r, --release', 'crawl release (TR) version of specs')
|
|
78
89
|
.option('-s, --spec <specs...>', 'specs to crawl')
|
|
79
90
|
.option('-t, --terse', 'output crawl results without metadata')
|
|
91
|
+
.option('-u, --use-crawl <folder>', 'use given crawl result folder as input for post-processing')
|
|
80
92
|
.action(options => {
|
|
81
|
-
if (!(options.output || options.module || options.spec)) {
|
|
93
|
+
if (!(options.output || options.module || options.spec || options.useCrawl)) {
|
|
82
94
|
console.error(`
|
|
83
|
-
At least one of the --output, --module or --
|
|
84
|
-
For usage notes, run:
|
|
95
|
+
At least one of the --output, --module, --spec or --use-crawl options needs to be
|
|
96
|
+
specified. For usage notes, run:
|
|
85
97
|
reffy --help
|
|
86
98
|
|
|
87
99
|
If you really want to crawl all specs, run all processing modules and report the
|
|
@@ -97,7 +109,8 @@ will dump ~100MB of data to the console:
|
|
|
97
109
|
output: options.output,
|
|
98
110
|
publishedVersion: options.release,
|
|
99
111
|
quiet: options.quiet,
|
|
100
|
-
terse: options.terse
|
|
112
|
+
terse: options.terse,
|
|
113
|
+
useCrawl: options.useCrawl
|
|
101
114
|
};
|
|
102
115
|
if (options.module) {
|
|
103
116
|
crawlOptions.modules = options.module.map(parseModuleOption);
|
|
@@ -105,6 +118,9 @@ will dump ~100MB of data to the console:
|
|
|
105
118
|
if (options.spec) {
|
|
106
119
|
crawlOptions.specs = options.spec.map(parseSpecOption).flat();
|
|
107
120
|
}
|
|
121
|
+
if (options.post) {
|
|
122
|
+
crawlOptions.post = options.post.map(parsePostOption).flat();
|
|
123
|
+
}
|
|
108
124
|
|
|
109
125
|
if (crawlOptions.terse && crawlOptions.output) {
|
|
110
126
|
console.error('The --terse option cannot be combined with the --output option');
|
|
@@ -163,26 +179,22 @@ Usage notes for some of the options:
|
|
|
163
179
|
Modules must be specified using a relative path to an ".mjs" file that defines
|
|
164
180
|
the processing logic to run on the spec's page in a browser context. For
|
|
165
181
|
instance:
|
|
166
|
-
$ reffy reports/test --module extract-editors.mjs
|
|
182
|
+
$ reffy --output reports/test --module extract-editors.mjs
|
|
167
183
|
|
|
168
184
|
Absolute paths to modules are not properly handled and will likely result in a
|
|
169
185
|
crawling error.
|
|
170
186
|
|
|
171
187
|
Multiple modules can be specified, repeating the option name or not:
|
|
172
|
-
$ reffy reports/test -m extract-words.mjs extract-editors.mjs
|
|
173
|
-
$ reffy reports/test -m extract-words.mjs -m extract-editors.mjs
|
|
174
|
-
|
|
175
|
-
The option cannot appear before <folder>, unless you use "--" to flag the end
|
|
176
|
-
of the list:
|
|
177
|
-
$ reffy --module extract-editors.mjs -- reports/test
|
|
188
|
+
$ reffy -o reports/test -m extract-words.mjs extract-editors.mjs
|
|
189
|
+
$ reffy -o reports/test -m extract-words.mjs -m extract-editors.mjs
|
|
178
190
|
|
|
179
191
|
Core processing modules may be referenced using the name of the extract folder
|
|
180
192
|
or property that they would create:
|
|
181
|
-
$ reffy reports/test --module dfns
|
|
193
|
+
$ reffy --output reports/test --module dfns
|
|
182
194
|
|
|
183
195
|
To run all core processing modules, use "core". For instance, to apply a
|
|
184
196
|
processing module on top of core processing modules, use:
|
|
185
|
-
$ reffy reports/test --module core extract-editors.mjs
|
|
197
|
+
$ reffy --output reports/test --module core extract-editors.mjs
|
|
186
198
|
|
|
187
199
|
Each module must export a function that takes a spec object as input and
|
|
188
200
|
return a result that can be serialized as JSON. A typical module code looks
|
|
@@ -196,7 +208,7 @@ Usage notes for some of the options:
|
|
|
196
208
|
The name of the folder where extracts get created may be specified for custom
|
|
197
209
|
modules by prefixing the path to the module with the folder name followed by
|
|
198
210
|
":". For instance, to save extracts to "reports/test/editors", use:
|
|
199
|
-
$ reffy reports/test --module editors:extract-editors.mjs
|
|
211
|
+
$ reffy --output reports/test --module editors:extract-editors.mjs
|
|
200
212
|
|
|
201
213
|
-o, --output <folder>
|
|
202
214
|
By default, crawl results are written to the console as a serialized JSON
|
|
@@ -213,6 +225,33 @@ Usage notes for some of the options:
|
|
|
213
225
|
|
|
214
226
|
The folder targeted by <folder> must exist.
|
|
215
227
|
|
|
228
|
+
-p, --post <modules...>
|
|
229
|
+
Post-processing modules either run after a spec is done crawling or after the
|
|
230
|
+
entire crawl is over. They allow developers to complete data based on other
|
|
231
|
+
extracts that were not available when extraction ran.
|
|
232
|
+
|
|
233
|
+
To run all core post-processing modules, use "core". Core post-processing
|
|
234
|
+
modules are defined in:
|
|
235
|
+
https://github.com/w3c/reffy/blob/main/src/postprocessing.js
|
|
236
|
+
|
|
237
|
+
The crawler does not run any post-processing modules by default.
|
|
238
|
+
|
|
239
|
+
Custom post-processing modules may be specified using a relative path to a
|
|
240
|
+
".js" file that defines the post-processing logic. For instance:
|
|
241
|
+
$ reffy --output reports/test --post mypostprocessing.js
|
|
242
|
+
|
|
243
|
+
Each module must export a "run" function. See the post-processor's code for
|
|
244
|
+
details:
|
|
245
|
+
https://github.com/w3c/reffy/blob/main/src/lib/post-processor.js
|
|
246
|
+
|
|
247
|
+
Absolute paths to modules are not properly handled and will likely result in a
|
|
248
|
+
processing error.
|
|
249
|
+
|
|
250
|
+
Multiple post-processing modules can be specified, repeating the option name
|
|
251
|
+
or not:
|
|
252
|
+
$ reffy -o reports/test -p cssdfns cssidl events
|
|
253
|
+
$ reffy -o reports/test -p events -p idlparsed -p idlnames
|
|
254
|
+
|
|
216
255
|
-r, --release
|
|
217
256
|
If the flag is not set, the crawler defaults to crawl nightly versions of the
|
|
218
257
|
specs.
|
|
@@ -230,7 +269,7 @@ Usage notes for some of the options:
|
|
|
230
269
|
|
|
231
270
|
Use "all" to include all specs in browser-specs in the crawl. For instance, to
|
|
232
271
|
crawl all specs plus one custom spec that does not exist in browser-specs:
|
|
233
|
-
$ reffy reports/test -s all https://example.org/myspec
|
|
272
|
+
$ reffy -o reports/test -s all https://example.org/myspec
|
|
234
273
|
|
|
235
274
|
-t, --terse
|
|
236
275
|
This flag cannot be combined with the --output option and cannot be set if
|
|
@@ -243,6 +282,13 @@ Usage notes for some of the options:
|
|
|
243
282
|
and the processing module results are thus written to the console directly.
|
|
244
283
|
For instance:
|
|
245
284
|
$ reffy --spec fetch --module idl --terse
|
|
285
|
+
|
|
286
|
+
-u, --use-crawl <folder>
|
|
287
|
+
Tells Reffy to skip the crawl part and only run requested post-processing
|
|
288
|
+
modules on the crawl results present in the specified folder.
|
|
289
|
+
|
|
290
|
+
If post-processing modules are not specified, Reffy will merely copy the crawl
|
|
291
|
+
results to the output folder (or to the console).
|
|
246
292
|
`);
|
|
247
293
|
|
|
248
294
|
program.parse(process.argv);
|
|
@@ -257,10 +257,22 @@ const extractValueSpaces = doc => {
|
|
|
257
257
|
// https://drafts.csswg.org/css-easing-2/#typedef-step-easing-function
|
|
258
258
|
const prod = text.split(reSplitRules)
|
|
259
259
|
.find(p => p.trim().startsWith(dfn.textContent.trim()));
|
|
260
|
-
if (
|
|
261
|
-
|
|
260
|
+
if (prod) {
|
|
261
|
+
parseProductionRule(prod, { pureSyntax: true });
|
|
262
|
+
}
|
|
263
|
+
else {
|
|
264
|
+
// "=" may appear in another formula in the body of the text, as in:
|
|
265
|
+
// https://drafts.csswg.org/css-speech-1/#typedef-voice-volume-decibel
|
|
266
|
+
// It may be worth checking but not an error per se.
|
|
267
|
+
console.warn('[reffy]', `Found "=" next to definition of ${dfn.textContent.trim()} but no production rule. Did I miss something?`);
|
|
268
|
+
const name = (dfn.getAttribute('data-lt') ?? dfn.textContent)
|
|
269
|
+
.trim().replace(/^<?(.*?)>?$/, '<$1>');
|
|
270
|
+
if (!(name in res)) {
|
|
271
|
+
res[name] = {
|
|
272
|
+
prose: parent.textContent.trim().replace(/\s+/g, ' ')
|
|
273
|
+
};
|
|
274
|
+
}
|
|
262
275
|
}
|
|
263
|
-
parseProductionRule(prod, { pureSyntax: true });
|
|
264
276
|
}
|
|
265
277
|
else if (dfn.textContent.trim().match(/^[a-zA-Z_][a-zA-Z0-9_\-]+\([^\)]+\)$/)) {
|
|
266
278
|
// Definition is "prod(foo bar)", create a "prod() = prod(foo bar)" entry
|
|
@@ -102,12 +102,12 @@ export default function (spec) {
|
|
|
102
102
|
if (el.tagName === "DFN" && el.id) {
|
|
103
103
|
event.href = href(el);
|
|
104
104
|
} else if (el.tagName === "A") {
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
105
|
+
if (!el.getAttribute("href").startsWith("https://")) {
|
|
106
|
+
const url = new URL(el.href);
|
|
107
|
+
event.href = href(document.getElementById(url.hash.slice(1)));
|
|
108
|
+
} else {
|
|
109
|
+
event.href = el.href;
|
|
110
|
+
}
|
|
111
111
|
}
|
|
112
112
|
event.src = { format: "summary table", href: href(el.closest('*[id]')) };
|
|
113
113
|
event.type = eventEl.textContent.trim();
|
|
@@ -120,9 +120,9 @@ export default function (spec) {
|
|
|
120
120
|
tr.querySelector(`td:nth-child(${interfaceColumn + 1}) a`)?.textContent ??
|
|
121
121
|
tr.querySelector(`td:nth-child(${interfaceColumn + 1}) code`)?.textContent;
|
|
122
122
|
}
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
123
|
+
if (targetsColumn >= 0 && !event.targets) {
|
|
124
|
+
event.targets = tr.querySelector(`td:nth-child(${targetsColumn + 1})`)?.textContent?.split(',').map(t => t.trim());
|
|
125
|
+
}
|
|
126
126
|
events.push(event);
|
|
127
127
|
eventEl.replaceWith(origEventEl);
|
|
128
128
|
});
|
|
@@ -205,8 +205,8 @@ export default function (spec) {
|
|
|
205
205
|
} else {
|
|
206
206
|
event.type = name;
|
|
207
207
|
// looking at the element following the link
|
|
208
|
-
|
|
209
|
-
|
|
208
|
+
// if its content match the name of the event
|
|
209
|
+
const eventEl = a.nextElementSibling?.textContent?.trim() === event.type ? a.nextElementSibling.querySelector("a,dfn") || a.nextElementSibling : null;
|
|
210
210
|
if (eventEl) {
|
|
211
211
|
if (eventEl.tagName === "A" && eventEl.getAttribute("href")) {
|
|
212
212
|
// use the target of the link as our href
|
|
@@ -227,7 +227,7 @@ export default function (spec) {
|
|
|
227
227
|
while ((curEl = curEl.nextElementSibling)) {
|
|
228
228
|
if (curEl.textContent.match(/^([A-Z]+[a-z0-9]*)+Event$/)) {
|
|
229
229
|
iface = curEl.textContent.trim();
|
|
230
|
-
|
|
230
|
+
break;
|
|
231
231
|
}
|
|
232
232
|
}
|
|
233
233
|
if (iface) {
|
|
@@ -322,20 +322,20 @@ export default function (spec) {
|
|
|
322
322
|
// of the section where the definitions are located
|
|
323
323
|
let currentEl = container.parentNode;
|
|
324
324
|
while(currentEl) {
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
325
|
+
if (currentEl.tagName.match(/^H[1-6]$/)) {
|
|
326
|
+
break;
|
|
327
|
+
}
|
|
328
|
+
currentEl = currentEl.previousElementSibling;
|
|
329
329
|
}
|
|
330
330
|
const interfaceEl = currentEl?.querySelector("code");
|
|
331
331
|
if (interfaceEl?.textContent?.match(/^[A-Z][a-z]+Event$/)) {
|
|
332
|
-
|
|
332
|
+
iface = interfaceEl.textContent;
|
|
333
333
|
}
|
|
334
334
|
}
|
|
335
335
|
const ev = events.find(e => isSameEvent(event, e));
|
|
336
336
|
if (!ev) {
|
|
337
337
|
if (iface) {
|
|
338
|
-
|
|
338
|
+
event.interface = iface;
|
|
339
339
|
}
|
|
340
340
|
event.bubbles = bubbles;
|
|
341
341
|
events.push(event);
|
|
@@ -347,12 +347,12 @@ export default function (spec) {
|
|
|
347
347
|
ev.interface = iface;
|
|
348
348
|
}
|
|
349
349
|
if (!ev.href && event.href) {
|
|
350
|
-
|
|
350
|
+
ev.href = event.href;
|
|
351
351
|
}
|
|
352
352
|
if (bubbles !== undefined) {
|
|
353
353
|
ev.bubbles = bubbles;
|
|
354
354
|
}
|
|
355
355
|
}
|
|
356
356
|
});
|
|
357
|
-
return events.map(e => e.href && !e.href.startsWith(
|
|
357
|
+
return events.map(e => e.href && !e.href.startsWith(window.location.toString()) ? Object.assign(e, {isExtension: true}) : e) ;
|
|
358
358
|
}
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* The post-processor runs post-processing modules against crawl results.
|
|
4
|
+
*
|
|
5
|
+
* There are two types of post-processing modules:
|
|
6
|
+
* 1. Modules that run against the result of crawling an individual spec. Such
|
|
7
|
+
* modules take the spec crawl result as input and typically update it in place
|
|
8
|
+
* 2. Modules that run against an entire crawl result. Such modules take the
|
|
9
|
+
* entire crawl result as input and return whatever structure they would like
|
|
10
|
+
* to return.
|
|
11
|
+
*
|
|
12
|
+
* The post-processor exposes two main functions:
|
|
13
|
+
* - run() to run a post-processing module against crawl results or against a
|
|
14
|
+
* spec crawl result (depending on the module)
|
|
15
|
+
* - save() to save processing results to files
|
|
16
|
+
*
|
|
17
|
+
* A post-processing module needs to expose the following properties and
|
|
18
|
+
* functions:
|
|
19
|
+
* - dependsOn: list of crawl result info that the module depends on. Values
|
|
20
|
+
* include "css", "dfns", "idl", as well as info that other post-processing
|
|
21
|
+
* modules may generate such as "idlparsed".
|
|
22
|
+
* - input: either "crawl" or "spec". Default is "spec". Tells whether the
|
|
23
|
+
* module operates on a spec crawl result or on the entire crawl result
|
|
24
|
+
* - property: When "input" is "spec", gives the name of the property that
|
|
25
|
+
* will be set in the spec crawl result when the post-processing module runs
|
|
26
|
+
* and of the folder that will contain the spec extracts (unless module has its
|
|
27
|
+
* "save" logic). For modules that run at the crawl level, gives the name of
|
|
28
|
+
* the final extract file that gets created (unless module has its own "save"
|
|
29
|
+
* logic).
|
|
30
|
+
* - run: Async function to call to apply the post-processing module. The
|
|
31
|
+
* function is called with either a spec crawl result of the entire crawl result
|
|
32
|
+
* depending on "input". Second parameter is the crawl options object. The
|
|
33
|
+
* function should return the created structure when "input" is "crawl" and
|
|
34
|
+
* the updated spec crawl result when "input" is "spec". Note the function
|
|
35
|
+
* may update the spec crawl result in place.
|
|
36
|
+
* - save: Function to call to save the results of the post-processing module.
|
|
37
|
+
* The function is called with the returned result of running the
|
|
38
|
+
* post-processing module. Second parameter is the crawl options object. The
|
|
39
|
+
* function is only needed if "save" needs to do specific things that the
|
|
40
|
+
* post-processor cannot do on its own. Function must return the relative path
|
|
41
|
+
* to the file that was saved
|
|
42
|
+
* - extractsPerSeries: A boolean flag that tells the crawler that it should
|
|
43
|
+
* clean up extract afterwards to produce extracts per series instead of
|
|
44
|
+
* extracts per spec. The flag is only meaningful if module runs at the spec
|
|
45
|
+
* level and if "property" is set.
|
|
46
|
+
*
|
|
47
|
+
* @module
|
|
48
|
+
*/
|
|
49
|
+
|
|
50
|
+
const fs = require('fs');
|
|
51
|
+
const path = require('path');
|
|
52
|
+
const { createFolderIfNeeded, requireFromWorkingDirectory } = require('./util');
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Core post-processing modules
|
|
57
|
+
*/
|
|
58
|
+
const modules = {
|
|
59
|
+
csscomplete: require('../postprocessing/csscomplete'),
|
|
60
|
+
events: require('../postprocessing/events'),
|
|
61
|
+
idlnames: require('../postprocessing/idlnames'),
|
|
62
|
+
idlparsed: require('../postprocessing/idlparsed')
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Returns the post-processing module that match the requested name, or the
|
|
68
|
+
* given parameter if it is a post-processing module already
|
|
69
|
+
*
|
|
70
|
+
* @function
|
|
71
|
+
* @param {String|Object} mod Module name of known post-processing module, or
|
|
72
|
+
* actual post-processing module.
|
|
73
|
+
* @return {Object} Post-processing module
|
|
74
|
+
*/
|
|
75
|
+
function getModule(mod) {
|
|
76
|
+
if (typeof mod === 'string') {
|
|
77
|
+
if (modules[mod]) {
|
|
78
|
+
return Object.assign({ name: mod }, modules[mod]);
|
|
79
|
+
}
|
|
80
|
+
else {
|
|
81
|
+
const fmod = requireFromWorkingDirectory(mod);
|
|
82
|
+
if (!fmod) {
|
|
83
|
+
throw new Error(`Unknown post-processing module "${mod}"`);
|
|
84
|
+
}
|
|
85
|
+
if (!isModuleValid(fmod)) {
|
|
86
|
+
throw new Error(`"${mod}" is not a valid post-processing module`);
|
|
87
|
+
}
|
|
88
|
+
return Object.assign({ name: mod }, fmod);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
else if (!isModuleValid(mod)) {
|
|
92
|
+
throw new Error(`Post-processing module given as parameter does not have a "run" function`);
|
|
93
|
+
}
|
|
94
|
+
return mod;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Returns true if given module object looks like a valid module, false
|
|
100
|
+
* otherwise.
|
|
101
|
+
*
|
|
102
|
+
* @function
|
|
103
|
+
* @param {Object} mod Post-processing module object
|
|
104
|
+
* @return {boolean} True when module looks valid, false otherwise
|
|
105
|
+
*/
|
|
106
|
+
function isModuleValid(mod) {
|
|
107
|
+
return !!mod && mod.run && (typeof mod.run === 'function');
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Run a post-processing module against some crawl result
|
|
113
|
+
*
|
|
114
|
+
* @function
|
|
115
|
+
* @param {String|Object} mod Module name for known module or the actual
|
|
116
|
+
* module implementation.
|
|
117
|
+
* @param {Object} crawlResult The entire crawl results if module runs at the
|
|
118
|
+
* "crawl" input level, the result of crawling a spec if module runs at the
|
|
119
|
+
* "spec" input level.
|
|
120
|
+
* @param {Object} options Crawl options. See spec crawler for details.
|
|
121
|
+
* @return {Object} Post-processing structure
|
|
122
|
+
*/
|
|
123
|
+
async function run(mod, crawlResult, options) {
|
|
124
|
+
mod = getModule(mod);
|
|
125
|
+
|
|
126
|
+
if (mod.input === 'crawl') {
|
|
127
|
+
if (crawlResult.crawled) {
|
|
128
|
+
// Post-processing module runs at the crawl level and we received
|
|
129
|
+
// a spec crawl result
|
|
130
|
+
return;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// TODO: make sure that there is at least one spec for which properties
|
|
134
|
+
// listed in "dependsOn" are set. If not, the module cannot run, which
|
|
135
|
+
// typically signals that the crawler was called with incompatible settings.
|
|
136
|
+
}
|
|
137
|
+
else {
|
|
138
|
+
if (!crawlResult.crawled) {
|
|
139
|
+
// Post-processing module runs at the spec level and we received
|
|
140
|
+
// a full crawl result
|
|
141
|
+
return;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// TODO: check properties listed in "dependsOn". If none is set, no need to
|
|
145
|
+
// run the module (but not an error per se, it may just be that this
|
|
146
|
+
// particular spec does not define relevant info)
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
return await mod.run(crawlResult, options);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Save post-processing results
|
|
155
|
+
*
|
|
156
|
+
* @function
|
|
157
|
+
* @param {String|Object} mod Module name for known module or the actual
|
|
158
|
+
* module implementation.
|
|
159
|
+
* @param {Object} processResult The post-processing results
|
|
160
|
+
* @param {Object} options Crawl options. See spec crawler for details.
|
|
161
|
+
* @return {String} Relative path to the file created
|
|
162
|
+
*/
|
|
163
|
+
async function save(mod, processResult, options) {
|
|
164
|
+
mod = getModule(mod);
|
|
165
|
+
processResult = processResult || {};
|
|
166
|
+
options = options || {};
|
|
167
|
+
|
|
168
|
+
if (mod.input === 'crawl') {
|
|
169
|
+
if (processResult.shortname) {
|
|
170
|
+
// Post-processing module runs at the crawl level and we received
|
|
171
|
+
// a spec crawl result
|
|
172
|
+
return;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
else {
|
|
176
|
+
if (!processResult.shortname) {
|
|
177
|
+
// Post-processing module runs at the spec level and we received
|
|
178
|
+
// a full crawl result
|
|
179
|
+
return;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
if (!options.output) {
|
|
184
|
+
// Nothing to do if no output folder was given
|
|
185
|
+
return;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
if (mod.save) {
|
|
189
|
+
// For post-processing modules that have some save logic, we'll just let
|
|
190
|
+
// them do whatever they want
|
|
191
|
+
return mod.save(processResult, options);
|
|
192
|
+
}
|
|
193
|
+
else if (!mod.property) {
|
|
194
|
+
// For post-processing modules that don't touch any single property, default
|
|
195
|
+
// save operation is to do nothing.
|
|
196
|
+
return;
|
|
197
|
+
}
|
|
198
|
+
else if (mod.input === 'crawl') {
|
|
199
|
+
// For post-processing modules that apply at the crawl level, default save
|
|
200
|
+
// operation is to create a JSON file in the output folder named after the
|
|
201
|
+
// post-processing module
|
|
202
|
+
const filename = path.join(options.output, `${mod.property}.json`);
|
|
203
|
+
await createFolderIfNeeded(options.output);
|
|
204
|
+
await fs.promises.writeFile(filename, JSON.stringify(processResult, null, 2), 'utf8');
|
|
205
|
+
return `${mod.property}.json`;
|
|
206
|
+
}
|
|
207
|
+
else {
|
|
208
|
+
// For post-processing modules that apply at the spec level, default save
|
|
209
|
+
// operation is to create a JSON extract file named after the spec's
|
|
210
|
+
// shortname under a subfolder named after the post-processing module in the
|
|
211
|
+
// output folder. Contents of the extract are the contents of the property
|
|
212
|
+
// that has the same name as the module (or the name of the module's
|
|
213
|
+
// "property" parameter if defined) in the post-processing result.
|
|
214
|
+
if (!processResult[mod.property]) {
|
|
215
|
+
return;
|
|
216
|
+
}
|
|
217
|
+
const folder = path.join(options.output, mod.property);
|
|
218
|
+
const filename = path.join(folder, `${processResult.shortname}.json`);
|
|
219
|
+
const contents = {
|
|
220
|
+
spec: {
|
|
221
|
+
title: processResult.title,
|
|
222
|
+
url: processResult.crawled
|
|
223
|
+
}
|
|
224
|
+
};
|
|
225
|
+
contents[mod.property] = processResult[mod.property];
|
|
226
|
+
await createFolderIfNeeded(folder);
|
|
227
|
+
await fs.promises.writeFile(filename, JSON.stringify(contents, null, 2), 'utf8');
|
|
228
|
+
processResult[mod.property] = `${mod.property}/${processResult.shortname}.json`;
|
|
229
|
+
return processResult[mod.property];
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* Return true if post-processing module generates extracts per spec series
|
|
236
|
+
*/
|
|
237
|
+
function extractsPerSeries(mod) {
|
|
238
|
+
mod = getModule(mod);
|
|
239
|
+
return (mod.input !== 'crawl') && !!mod.property && !!mod.extractsPerSeries;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Return true if post-processing module generates extracts per spec series
|
|
245
|
+
*/
|
|
246
|
+
function dependsOn(mod) {
|
|
247
|
+
mod = getModule(mod);
|
|
248
|
+
return mod.dependsOn;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
function appliesAtLevel(mod, level) {
|
|
253
|
+
mod = getModule(mod);
|
|
254
|
+
const crawlLevel = mod.input === 'crawl';
|
|
255
|
+
return level === 'crawl' ? crawlLevel : !crawlLevel;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
/**************************************************
|
|
261
|
+
Export post-processing functions
|
|
262
|
+
**************************************************/
|
|
263
|
+
module.exports = {
|
|
264
|
+
modules: Object.keys(modules),
|
|
265
|
+
run, save,
|
|
266
|
+
extractsPerSeries,
|
|
267
|
+
dependsOn,
|
|
268
|
+
appliesAtLevel
|
|
269
|
+
};
|