reffy 6.2.0 → 6.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +158 -158
- package/index.js +11 -11
- package/package.json +53 -53
- package/reffy.js +248 -248
- package/src/browserlib/canonicalize-url.mjs +50 -50
- package/src/browserlib/create-outline.mjs +352 -352
- package/src/browserlib/extract-cssdfn.mjs +319 -319
- package/src/browserlib/extract-dfns.mjs +686 -686
- package/src/browserlib/extract-elements.mjs +205 -205
- package/src/browserlib/extract-headings.mjs +48 -48
- package/src/browserlib/extract-ids.mjs +28 -28
- package/src/browserlib/extract-links.mjs +28 -28
- package/src/browserlib/extract-references.mjs +203 -203
- package/src/browserlib/extract-webidl.mjs +134 -134
- package/src/browserlib/get-absolute-url.mjs +21 -21
- package/src/browserlib/get-generator.mjs +26 -26
- package/src/browserlib/get-lastmodified-date.mjs +13 -13
- package/src/browserlib/get-title.mjs +11 -11
- package/src/browserlib/informative-selector.mjs +16 -16
- package/src/browserlib/map-ids-to-headings.mjs +136 -136
- package/src/browserlib/reffy.json +53 -53
- package/src/cli/check-missing-dfns.js +609 -609
- package/src/cli/generate-idlnames.js +430 -430
- package/src/cli/generate-idlparsed.js +139 -139
- package/src/cli/merge-crawl-results.js +128 -128
- package/src/cli/parse-webidl.js +430 -430
- package/src/lib/css-grammar-parse-tree.schema.json +109 -109
- package/src/lib/css-grammar-parser.js +440 -440
- package/src/lib/fetch.js +55 -55
- package/src/lib/nock-server.js +119 -119
- package/src/lib/specs-crawler.js +605 -603
- package/src/lib/util.js +898 -898
- package/src/specs/missing-css-rules.json +197 -197
- package/src/specs/spec-equivalents.json +149 -149
- package/src/browserlib/extract-editors.mjs~ +0 -14
- package/src/browserlib/generate-es-dfn-report.sh~ +0 -4
- package/src/cli/csstree-grammar-check.js +0 -28
- package/src/cli/csstree-grammar-check.js~ +0 -10
- package/src/cli/csstree-grammar-parser.js +0 -11
- package/src/cli/csstree-grammar-parser.js~ +0 -1
- package/src/cli/extract-editors.js~ +0 -38
- package/src/cli/process-specs.js~ +0 -28
package/src/lib/specs-crawler.js
CHANGED
|
@@ -1,603 +1,605 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
/**
|
|
3
|
-
* The spec crawler takes a list of spec URLs as input, gathers some knowledge
|
|
4
|
-
* about these specs (published versions, URL of the Editor's Draft, etc.),
|
|
5
|
-
* fetches these specs, parses them, extracts relevant information that they
|
|
6
|
-
* contain (such as the WebIDL they define, the list of specifications that they
|
|
7
|
-
* reference, and links to external specs), and produces a crawl report with the
|
|
8
|
-
* results of these investigations.
|
|
9
|
-
*
|
|
10
|
-
* @module crawler
|
|
11
|
-
*/
|
|
12
|
-
|
|
13
|
-
const fs = require('fs');
|
|
14
|
-
const path = require('path');
|
|
15
|
-
const specs = require('browser-specs');
|
|
16
|
-
const cssDfnParser = require('./css-grammar-parser');
|
|
17
|
-
const { generateIdlParsed, saveIdlParsed } = require('../cli/generate-idlparsed');
|
|
18
|
-
const { generateIdlNames, saveIdlNames } = require('../cli/generate-idlnames');
|
|
19
|
-
const {
|
|
20
|
-
completeWithAlternativeUrls,
|
|
21
|
-
expandBrowserModules,
|
|
22
|
-
expandCrawlResult,
|
|
23
|
-
expandSpecResult,
|
|
24
|
-
getGeneratedIDLNamesByCSSProperty,
|
|
25
|
-
isLatestLevelThatPasses,
|
|
26
|
-
processSpecification,
|
|
27
|
-
setupBrowser,
|
|
28
|
-
teardownBrowser,
|
|
29
|
-
createFolderIfNeeded
|
|
30
|
-
} = require('./util');
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
/**
|
|
34
|
-
* Return the spec if crawl succeeded or crawl result from given fallback list
|
|
35
|
-
* if crawl yielded an error (and fallback does exist).
|
|
36
|
-
*
|
|
37
|
-
* The function keeps the "error" property on the crawl result it returns so
|
|
38
|
-
* that the error does not get entirely lost.
|
|
39
|
-
*
|
|
40
|
-
* @function
|
|
41
|
-
* @param {Object} spec Actual spec crawl result
|
|
42
|
-
* * @param {Object} spec Actual spec crawl result
|
|
43
|
-
* @param {String} fallbackFolder The folder that contains fallback extracts
|
|
44
|
-
* @param {Array<Object>} fallbackData A list of crawl results to use as
|
|
45
|
-
* fallback when needed
|
|
46
|
-
* @return {Object} The given crawl result or a new one that reuses fallback
|
|
47
|
-
* content if needed
|
|
48
|
-
*/
|
|
49
|
-
async function specOrFallback(spec, fallbackFolder, fallbackData) {
|
|
50
|
-
if (spec.error && fallbackData) {
|
|
51
|
-
const fallback = fallbackData.find(s => s.url === spec.url);
|
|
52
|
-
if (fallback) {
|
|
53
|
-
const copy = Object.assign({}, fallback);
|
|
54
|
-
const result = await expandSpecResult(copy, fallbackFolder);
|
|
55
|
-
result.error = spec.error;
|
|
56
|
-
return result;
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
|
-
return spec;
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
/**
|
|
64
|
-
* Load and parse the given spec.
|
|
65
|
-
*
|
|
66
|
-
* @function
|
|
67
|
-
* @param {Object} spec The spec to load (must already have been completed with
|
|
68
|
-
* useful info, as returned by "createInitialSpecDescriptions")
|
|
69
|
-
* @param {Object} crawlOptions Crawl options
|
|
70
|
-
* @return {Promise<Object>} The promise to get a spec object with crawl info
|
|
71
|
-
*/
|
|
72
|
-
async function crawlSpec(spec, crawlOptions) {
|
|
73
|
-
crawlOptions = crawlOptions || {};
|
|
74
|
-
spec.crawled = crawlOptions.publishedVersion ?
|
|
75
|
-
(spec.release ? spec.release : spec.nightly) :
|
|
76
|
-
spec.nightly;
|
|
77
|
-
const fallbackFolder = crawlOptions.fallback ?
|
|
78
|
-
path.dirname(crawlOptions.fallback) : '';
|
|
79
|
-
|
|
80
|
-
if (spec.error) {
|
|
81
|
-
return specOrFallback(spec, fallbackFolder, crawlOptions.fallbackData);
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
try {
|
|
85
|
-
const result = await processSpecification(
|
|
86
|
-
spec.crawled,
|
|
87
|
-
(spec, modules) => {
|
|
88
|
-
const idToHeading = modules.find(m => m.needsIdToHeadingMap) ?
|
|
89
|
-
window.reffy.mapIdsToHeadings() : null;
|
|
90
|
-
const res = {
|
|
91
|
-
crawled: window.location.toString()
|
|
92
|
-
};
|
|
93
|
-
modules.forEach(mod => {
|
|
94
|
-
res[mod.property] = window.reffy[mod.name](spec, idToHeading);
|
|
95
|
-
});
|
|
96
|
-
return res;
|
|
97
|
-
},
|
|
98
|
-
[spec, crawlOptions.modules],
|
|
99
|
-
{ quiet: crawlOptions.quiet,
|
|
100
|
-
forceLocalFetch: crawlOptions.forceLocalFetch }
|
|
101
|
-
);
|
|
102
|
-
|
|
103
|
-
// Specific rule for IDL extracts:
|
|
104
|
-
// parse the extracted WebIdl content
|
|
105
|
-
await generateIdlParsed(result);
|
|
106
|
-
|
|
107
|
-
if (result.css) {
|
|
108
|
-
// Specific rule for CSS properties:
|
|
109
|
-
// Add CSS property definitions that weren't in a table
|
|
110
|
-
if (result.dfns) {
|
|
111
|
-
result.dfns
|
|
112
|
-
.filter(dfn => dfn.type == "property" && !dfn.informative)
|
|
113
|
-
.forEach(propDfn => {
|
|
114
|
-
propDfn.linkingText.forEach(lt => {
|
|
115
|
-
if (!result.css.properties.hasOwnProperty(lt)) {
|
|
116
|
-
result.css.properties[lt] = {
|
|
117
|
-
name: lt
|
|
118
|
-
};
|
|
119
|
-
}
|
|
120
|
-
});
|
|
121
|
-
});
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
// Specific rule for CSS properties:
|
|
125
|
-
// Ideally, the sample definition (property-name) in CSS2 and the custom
|
|
126
|
-
// property definition (--*) in CSS Variables would not be flagged as
|
|
127
|
-
// real CSS properties. In practice, they are. Let's remove them from
|
|
128
|
-
// the extract.
|
|
129
|
-
['property-name', '--*'].forEach(prop => {
|
|
130
|
-
if ((result.css.properties || {})[prop]) {
|
|
131
|
-
delete result.css.properties[prop];
|
|
132
|
-
}
|
|
133
|
-
});
|
|
134
|
-
|
|
135
|
-
// Specific rule for CSS extracts:
|
|
136
|
-
// Parse extracted CSS definitions and add generated IDL attribute names
|
|
137
|
-
Object.entries(result.css.properties || {}).forEach(([prop, dfn]) => {
|
|
138
|
-
if (dfn.value || dfn.newValues) {
|
|
139
|
-
try {
|
|
140
|
-
dfn.parsedValue = cssDfnParser.parsePropDefValue(
|
|
141
|
-
dfn.value || dfn.newValues);
|
|
142
|
-
} catch (e) {
|
|
143
|
-
dfn.valueParseError = e.message;
|
|
144
|
-
}
|
|
145
|
-
}
|
|
146
|
-
dfn.styleDeclaration = getGeneratedIDLNamesByCSSProperty(prop);
|
|
147
|
-
});
|
|
148
|
-
Object.entries(result.css.descriptors || {}).forEach(([desc, dfn]) => {
|
|
149
|
-
if (dfn.value) {
|
|
150
|
-
try {
|
|
151
|
-
dfn.parsedValue = cssDfnParser.parsePropDefValue(
|
|
152
|
-
dfn.value);
|
|
153
|
-
} catch (e) {
|
|
154
|
-
dfn.valueParseError = e.message;
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
});
|
|
158
|
-
Object.entries(result.css.valuespaces || {}).forEach(([vs, dfn]) => {
|
|
159
|
-
if (dfn.value) {
|
|
160
|
-
try {
|
|
161
|
-
dfn.parsedValue = cssDfnParser.parsePropDefValue(
|
|
162
|
-
dfn.value);
|
|
163
|
-
} catch (e) {
|
|
164
|
-
dfn.valueParseError = e.message;
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
});
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
// Copy results back into initial spec object
|
|
171
|
-
spec.crawled = result.crawled;
|
|
172
|
-
crawlOptions.modules.forEach(mod => {
|
|
173
|
-
if (result[mod.property]) {
|
|
174
|
-
spec[mod.property] = result[mod.property];
|
|
175
|
-
if (mod.property === 'idl') {
|
|
176
|
-
spec.idlparsed = result.idlparsed;
|
|
177
|
-
}
|
|
178
|
-
}
|
|
179
|
-
});
|
|
180
|
-
}
|
|
181
|
-
catch (err) {
|
|
182
|
-
spec.title = spec.title || '[Could not be determined, see error]';
|
|
183
|
-
spec.error = err.toString() + (err.stack ? ' ' + err.stack : '');
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
return specOrFallback(spec, fallbackFolder, crawlOptions.fallbackData);
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
/**
|
|
191
|
-
* Saves spec results to extract files as needed and replaces the results with
|
|
192
|
-
* links accordingly.
|
|
193
|
-
*
|
|
194
|
-
* @function
|
|
195
|
-
* @param {Object} spec The results of crawling the spec. Object should contain
|
|
196
|
-
* metadata about the spec and the crawl processing results in appropriate
|
|
197
|
-
* properties.
|
|
198
|
-
* @param {Object} settings Crawl settings. Recognized settings: "modules",
|
|
199
|
-
* "output" and "quiet". See CLI help (node reffy.js --help) for details.
|
|
200
|
-
* The "modules" setting is mandatory and note that the function will not do
|
|
201
|
-
* anything if "output" is not set.
|
|
202
|
-
* @return {Promise<Object>} The promise to get an updated spec object that
|
|
203
|
-
* contains links to created extracts.
|
|
204
|
-
*/
|
|
205
|
-
async function saveSpecResults(spec, settings) {
|
|
206
|
-
settings = settings || {};
|
|
207
|
-
if (!settings.output) {
|
|
208
|
-
return spec;
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
async function getSubfolder(name) {
|
|
212
|
-
let subfolder = path.join(settings.output, name);
|
|
213
|
-
await createFolderIfNeeded(subfolder);
|
|
214
|
-
return subfolder;
|
|
215
|
-
}
|
|
216
|
-
|
|
217
|
-
const modules = settings.modules;
|
|
218
|
-
const folders = {};
|
|
219
|
-
for (const mod of modules) {
|
|
220
|
-
if (mod.metadata) {
|
|
221
|
-
continue;
|
|
222
|
-
}
|
|
223
|
-
folders[mod.property] = await getSubfolder(mod.property);
|
|
224
|
-
|
|
225
|
-
// Specific rule for IDL:
|
|
226
|
-
// Raw IDL goes to "idl" subfolder, parsed IDL goes to "idlparsed"
|
|
227
|
-
if (mod.property === 'idl') {
|
|
228
|
-
folders.idlparsed = await getSubfolder('idlparsed');
|
|
229
|
-
}
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
function getBaseJSON(spec) {
|
|
233
|
-
return {
|
|
234
|
-
spec: {
|
|
235
|
-
title: spec.title,
|
|
236
|
-
url: spec.crawled
|
|
237
|
-
}
|
|
238
|
-
};
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
async function saveExtract(spec, property, filter) {
|
|
242
|
-
if (filter(spec)) {
|
|
243
|
-
const contents = getBaseJSON(spec);
|
|
244
|
-
contents[property] = spec[property];
|
|
245
|
-
const json = JSON.stringify(contents, null, 2);
|
|
246
|
-
const filename = path.join(folders[property], spec.shortname + '.json');
|
|
247
|
-
await fs.promises.writeFile(filename, json);
|
|
248
|
-
spec[property] = `${property}/${spec.shortname}.json`;
|
|
249
|
-
}
|
|
250
|
-
else {
|
|
251
|
-
delete spec[property];
|
|
252
|
-
}
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
async function saveIdl(spec) {
|
|
256
|
-
let idlHeader = `
|
|
257
|
-
// GENERATED CONTENT - DO NOT EDIT
|
|
258
|
-
// Content was automatically extracted by Reffy into webref
|
|
259
|
-
// (https://github.com/w3c/webref)
|
|
260
|
-
// Source: ${spec.title} (${spec.crawled})`;
|
|
261
|
-
idlHeader = idlHeader.replace(/^\s+/gm, '').trim() + '\n\n';
|
|
262
|
-
const idl = idlHeader + spec.idl + '\n';
|
|
263
|
-
await fs.promises.writeFile(
|
|
264
|
-
path.join(folders.idl, spec.shortname + '.idl'), idl);
|
|
265
|
-
return `idl/${spec.shortname}.idl`;
|
|
266
|
-
};
|
|
267
|
-
|
|
268
|
-
async function saveCss(spec) {
|
|
269
|
-
// There are no comments in JSON, so include the spec title+URL as the
|
|
270
|
-
// first property instead.
|
|
271
|
-
const css = Object.assign(getBaseJSON(spec), spec.css);
|
|
272
|
-
const json = JSON.stringify(css, (key, val) => {
|
|
273
|
-
if ((key === 'parsedValue') || (key === 'valueParseError')) {
|
|
274
|
-
return undefined;
|
|
275
|
-
}
|
|
276
|
-
else {
|
|
277
|
-
return val;
|
|
278
|
-
}
|
|
279
|
-
}, 2) + '\n';
|
|
280
|
-
const pathname = path.join(folders.css, spec.shortname + '.json')
|
|
281
|
-
await fs.promises.writeFile(pathname, json);
|
|
282
|
-
return `css/${spec.shortname}.json`;
|
|
283
|
-
};
|
|
284
|
-
|
|
285
|
-
// Save IDL dumps
|
|
286
|
-
if (spec.idl) {
|
|
287
|
-
spec.idl = await saveIdl(spec);
|
|
288
|
-
}
|
|
289
|
-
if (spec.idlparsed) {
|
|
290
|
-
spec.idlparsed = await saveIdlParsed(spec, settings.output);
|
|
291
|
-
}
|
|
292
|
-
|
|
293
|
-
// Save CSS dumps
|
|
294
|
-
function defineCSSContent(spec) {
|
|
295
|
-
return spec.css && (
|
|
296
|
-
(Object.keys(spec.css.properties || {}).length > 0) ||
|
|
297
|
-
(Object.keys(spec.css.descriptors || {}).length > 0) ||
|
|
298
|
-
(Object.keys(spec.css.valuespaces || {}).length > 0));
|
|
299
|
-
}
|
|
300
|
-
if (defineCSSContent(spec)) {
|
|
301
|
-
spec.css = await saveCss(spec);
|
|
302
|
-
}
|
|
303
|
-
|
|
304
|
-
// Specs that define CSS now have a "css" key that point to the CSS extract.
|
|
305
|
-
// Specs that don't define CSS still have a "css" key that points to an
|
|
306
|
-
// empty object structure. Let's get rid of it.
|
|
307
|
-
if (spec.css && typeof spec.css !== 'string') {
|
|
308
|
-
delete spec.css;
|
|
309
|
-
}
|
|
310
|
-
|
|
311
|
-
// Quick and dirty function to determine whether a variable is "empty"
|
|
312
|
-
// (it returns true for falsy values, which is good enough for what we need)
|
|
313
|
-
function isEmpty(thing) {
|
|
314
|
-
return !thing ||
|
|
315
|
-
Array.isArray(thing) && (thing.length === 0) ||
|
|
316
|
-
(typeof thing == 'object') && (Object.keys(thing).length === 0);
|
|
317
|
-
}
|
|
318
|
-
|
|
319
|
-
// Save all other extracts
|
|
320
|
-
const remainingModules = modules.filter(mod =>
|
|
321
|
-
!mod.metadata && mod.property !== 'css' && mod.property !== 'idl');
|
|
322
|
-
for (const mod of remainingModules) {
|
|
323
|
-
await saveExtract(spec, mod.property, spec => !isEmpty(spec[mod.property]));
|
|
324
|
-
if (spec[mod.property] && typeof spec[mod.property] !== 'string') {
|
|
325
|
-
delete spec[mod.property];
|
|
326
|
-
}
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
return spec;
|
|
330
|
-
}
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
/**
|
|
334
|
-
* Main method that crawls the list of specification URLs and return a structure
|
|
335
|
-
* that full describes its title, URLs, references, and IDL definitions.
|
|
336
|
-
*
|
|
337
|
-
* @function
|
|
338
|
-
* @param {Array(String)} speclist List of URLs to parse
|
|
339
|
-
* @param {Object} crawlOptions Crawl options
|
|
340
|
-
* @return {Promise<Array(Object)} The promise to get an array of complete
|
|
341
|
-
* specification descriptions
|
|
342
|
-
*/
|
|
343
|
-
async function crawlList(speclist, crawlOptions) {
|
|
344
|
-
// Make a shallow copy of crawl options object since we're going
|
|
345
|
-
// to modify properties in place
|
|
346
|
-
crawlOptions = Object.assign({}, crawlOptions);
|
|
347
|
-
|
|
348
|
-
// Expand list of processing modules to use if not already done
|
|
349
|
-
crawlOptions.modules = expandBrowserModules(crawlOptions.modules);
|
|
350
|
-
|
|
351
|
-
// Load fallback data if necessary
|
|
352
|
-
if (crawlOptions.fallback) {
|
|
353
|
-
try {
|
|
354
|
-
crawlOptions.fallbackData = JSON.parse(await fs.promises.readFile(crawlOptions.fallback)).results;
|
|
355
|
-
} catch (e) {
|
|
356
|
-
throw new Error(`Could not parse fallback data file ${crawlOptions.fallback}`);
|
|
357
|
-
}
|
|
358
|
-
}
|
|
359
|
-
|
|
360
|
-
// Prepare Puppeteer instance
|
|
361
|
-
await setupBrowser(crawlOptions.modules);
|
|
362
|
-
|
|
363
|
-
const list = speclist.map(completeWithAlternativeUrls);
|
|
364
|
-
const listAndPromise = list.map(spec => {
|
|
365
|
-
let resolve = null;
|
|
366
|
-
let reject = null;
|
|
367
|
-
let readyToCrawl = new Promise((resolveFunction, rejectFunction) => {
|
|
368
|
-
resolve = resolveFunction;
|
|
369
|
-
reject = rejectFunction;
|
|
370
|
-
});
|
|
371
|
-
return { spec, readyToCrawl, resolve, reject };
|
|
372
|
-
});
|
|
373
|
-
|
|
374
|
-
// In debug mode, specs are processed one by one. In normal mode,
|
|
375
|
-
// specs are processing in chunks
|
|
376
|
-
const chunkSize = Math.min((crawlOptions.debug ? 1 : 4), list.length);
|
|
377
|
-
|
|
378
|
-
let pos = 0;
|
|
379
|
-
function flagNextSpecAsReadyToCrawl() {
|
|
380
|
-
if (pos < listAndPromise.length) {
|
|
381
|
-
listAndPromise[pos].resolve();
|
|
382
|
-
pos += 1;
|
|
383
|
-
}
|
|
384
|
-
}
|
|
385
|
-
for (let i = 0; i < chunkSize; i++) {
|
|
386
|
-
flagNextSpecAsReadyToCrawl();
|
|
387
|
-
}
|
|
388
|
-
|
|
389
|
-
const nbStr = '' + listAndPromise.length;
|
|
390
|
-
async function crawlSpecAndPromise(specAndPromise, idx) {
|
|
391
|
-
await specAndPromise.readyToCrawl;
|
|
392
|
-
const spec = specAndPromise.spec;
|
|
393
|
-
const logCounter = ('' + (idx + 1)).padStart(nbStr.length, ' ') + '/' + nbStr;
|
|
394
|
-
crawlOptions.quiet ?? console.warn(`${logCounter} - ${spec.url} - crawling`);
|
|
395
|
-
let result = await crawlSpec(spec, crawlOptions);
|
|
396
|
-
result = await saveSpecResults(result, crawlOptions);
|
|
397
|
-
crawlOptions.quiet ?? console.warn(`${logCounter} - ${spec.url} - done`);
|
|
398
|
-
flagNextSpecAsReadyToCrawl();
|
|
399
|
-
|
|
400
|
-
return result;
|
|
401
|
-
}
|
|
402
|
-
|
|
403
|
-
const results = await Promise.all(listAndPromise.map(crawlSpecAndPromise));
|
|
404
|
-
|
|
405
|
-
// Close Puppeteer instance
|
|
406
|
-
teardownBrowser();
|
|
407
|
-
|
|
408
|
-
return results;
|
|
409
|
-
}
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
/**
|
|
413
|
-
* Merges extracts per series for the given property and adjusts links
|
|
414
|
-
*
|
|
415
|
-
* @function
|
|
416
|
-
* @param {Array(object)} data Crawl results
|
|
417
|
-
* @param {string} property The extract property to process
|
|
418
|
-
* @param {Object} settings Crawl settings. The function looks at the "output"
|
|
419
|
-
* setting to determine where to look for extracts
|
|
420
|
-
* @return {Promise(Array)} The promise to get an updated crawl results array
|
|
421
|
-
*/
|
|
422
|
-
async function adjustExtractsPerSeries(data, property, settings) {
|
|
423
|
-
if (!settings.output) {
|
|
424
|
-
return data;
|
|
425
|
-
}
|
|
426
|
-
|
|
427
|
-
const fullLevels = data.filter(spec =>
|
|
428
|
-
(spec.seriesComposition !== 'delta') &&
|
|
429
|
-
isLatestLevelThatPasses(spec, data, spec => spec[property]));
|
|
430
|
-
const deltaLevels = data.filter(spec =>
|
|
431
|
-
(spec.seriesComposition === 'delta') && spec[property]);
|
|
432
|
-
|
|
433
|
-
data.forEach(spec => {
|
|
434
|
-
if (fullLevels.includes(spec)) {
|
|
435
|
-
// Full level, rename the extract after the series' shortname
|
|
436
|
-
const pathname = path.resolve(settings.output, spec[property]);
|
|
437
|
-
spec[property] = `${property}/${spec.series.shortname}${path.extname(spec[property])}`;
|
|
438
|
-
const newpathname = path.resolve(settings.output, spec[property]);
|
|
439
|
-
fs.renameSync(pathname, newpathname);
|
|
440
|
-
}
|
|
441
|
-
else if (deltaLevels.includes(spec)) {
|
|
442
|
-
// Delta level, need to keep the extract as-is
|
|
443
|
-
}
|
|
444
|
-
else if (spec[property]) {
|
|
445
|
-
// Not the right full level in the series, drop created extract
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
*
|
|
459
|
-
*
|
|
460
|
-
* @
|
|
461
|
-
* @param {
|
|
462
|
-
*
|
|
463
|
-
*
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
const
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
contents.
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
*
|
|
494
|
-
*
|
|
495
|
-
*
|
|
496
|
-
* @
|
|
497
|
-
*
|
|
498
|
-
*
|
|
499
|
-
*
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
function
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
s.series.
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
url,
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
//
|
|
547
|
-
|
|
548
|
-
options =
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
await
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* The spec crawler takes a list of spec URLs as input, gathers some knowledge
|
|
4
|
+
* about these specs (published versions, URL of the Editor's Draft, etc.),
|
|
5
|
+
* fetches these specs, parses them, extracts relevant information that they
|
|
6
|
+
* contain (such as the WebIDL they define, the list of specifications that they
|
|
7
|
+
* reference, and links to external specs), and produces a crawl report with the
|
|
8
|
+
* results of these investigations.
|
|
9
|
+
*
|
|
10
|
+
* @module crawler
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
const fs = require('fs');
|
|
14
|
+
const path = require('path');
|
|
15
|
+
const specs = require('browser-specs');
|
|
16
|
+
const cssDfnParser = require('./css-grammar-parser');
|
|
17
|
+
const { generateIdlParsed, saveIdlParsed } = require('../cli/generate-idlparsed');
|
|
18
|
+
const { generateIdlNames, saveIdlNames } = require('../cli/generate-idlnames');
|
|
19
|
+
const {
|
|
20
|
+
completeWithAlternativeUrls,
|
|
21
|
+
expandBrowserModules,
|
|
22
|
+
expandCrawlResult,
|
|
23
|
+
expandSpecResult,
|
|
24
|
+
getGeneratedIDLNamesByCSSProperty,
|
|
25
|
+
isLatestLevelThatPasses,
|
|
26
|
+
processSpecification,
|
|
27
|
+
setupBrowser,
|
|
28
|
+
teardownBrowser,
|
|
29
|
+
createFolderIfNeeded
|
|
30
|
+
} = require('./util');
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Return the spec if crawl succeeded or crawl result from given fallback list
|
|
35
|
+
* if crawl yielded an error (and fallback does exist).
|
|
36
|
+
*
|
|
37
|
+
* The function keeps the "error" property on the crawl result it returns so
|
|
38
|
+
* that the error does not get entirely lost.
|
|
39
|
+
*
|
|
40
|
+
* @function
|
|
41
|
+
* @param {Object} spec Actual spec crawl result
|
|
42
|
+
* * @param {Object} spec Actual spec crawl result
|
|
43
|
+
* @param {String} fallbackFolder The folder that contains fallback extracts
|
|
44
|
+
* @param {Array<Object>} fallbackData A list of crawl results to use as
|
|
45
|
+
* fallback when needed
|
|
46
|
+
* @return {Object} The given crawl result or a new one that reuses fallback
|
|
47
|
+
* content if needed
|
|
48
|
+
*/
|
|
49
|
+
async function specOrFallback(spec, fallbackFolder, fallbackData) {
|
|
50
|
+
if (spec.error && fallbackData) {
|
|
51
|
+
const fallback = fallbackData.find(s => s.url === spec.url);
|
|
52
|
+
if (fallback) {
|
|
53
|
+
const copy = Object.assign({}, fallback);
|
|
54
|
+
const result = await expandSpecResult(copy, fallbackFolder);
|
|
55
|
+
result.error = spec.error;
|
|
56
|
+
return result;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
return spec;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Load and parse the given spec.
|
|
65
|
+
*
|
|
66
|
+
* @function
|
|
67
|
+
* @param {Object} spec The spec to load (must already have been completed with
|
|
68
|
+
* useful info, as returned by "createInitialSpecDescriptions")
|
|
69
|
+
* @param {Object} crawlOptions Crawl options
|
|
70
|
+
* @return {Promise<Object>} The promise to get a spec object with crawl info
|
|
71
|
+
*/
|
|
72
|
+
async function crawlSpec(spec, crawlOptions) {
|
|
73
|
+
crawlOptions = crawlOptions || {};
|
|
74
|
+
spec.crawled = crawlOptions.publishedVersion ?
|
|
75
|
+
(spec.release ? spec.release : spec.nightly) :
|
|
76
|
+
spec.nightly;
|
|
77
|
+
const fallbackFolder = crawlOptions.fallback ?
|
|
78
|
+
path.dirname(crawlOptions.fallback) : '';
|
|
79
|
+
|
|
80
|
+
if (spec.error) {
|
|
81
|
+
return specOrFallback(spec, fallbackFolder, crawlOptions.fallbackData);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
try {
|
|
85
|
+
const result = await processSpecification(
|
|
86
|
+
spec.crawled,
|
|
87
|
+
(spec, modules) => {
|
|
88
|
+
const idToHeading = modules.find(m => m.needsIdToHeadingMap) ?
|
|
89
|
+
window.reffy.mapIdsToHeadings() : null;
|
|
90
|
+
const res = {
|
|
91
|
+
crawled: window.location.toString()
|
|
92
|
+
};
|
|
93
|
+
modules.forEach(mod => {
|
|
94
|
+
res[mod.property] = window.reffy[mod.name](spec, idToHeading);
|
|
95
|
+
});
|
|
96
|
+
return res;
|
|
97
|
+
},
|
|
98
|
+
[spec, crawlOptions.modules],
|
|
99
|
+
{ quiet: crawlOptions.quiet,
|
|
100
|
+
forceLocalFetch: crawlOptions.forceLocalFetch }
|
|
101
|
+
);
|
|
102
|
+
|
|
103
|
+
// Specific rule for IDL extracts:
|
|
104
|
+
// parse the extracted WebIdl content
|
|
105
|
+
await generateIdlParsed(result);
|
|
106
|
+
|
|
107
|
+
if (result.css) {
|
|
108
|
+
// Specific rule for CSS properties:
|
|
109
|
+
// Add CSS property definitions that weren't in a table
|
|
110
|
+
if (result.dfns) {
|
|
111
|
+
result.dfns
|
|
112
|
+
.filter(dfn => dfn.type == "property" && !dfn.informative)
|
|
113
|
+
.forEach(propDfn => {
|
|
114
|
+
propDfn.linkingText.forEach(lt => {
|
|
115
|
+
if (!result.css.properties.hasOwnProperty(lt)) {
|
|
116
|
+
result.css.properties[lt] = {
|
|
117
|
+
name: lt
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
});
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Specific rule for CSS properties:
|
|
125
|
+
// Ideally, the sample definition (property-name) in CSS2 and the custom
|
|
126
|
+
// property definition (--*) in CSS Variables would not be flagged as
|
|
127
|
+
// real CSS properties. In practice, they are. Let's remove them from
|
|
128
|
+
// the extract.
|
|
129
|
+
['property-name', '--*'].forEach(prop => {
|
|
130
|
+
if ((result.css.properties || {})[prop]) {
|
|
131
|
+
delete result.css.properties[prop];
|
|
132
|
+
}
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
// Specific rule for CSS extracts:
|
|
136
|
+
// Parse extracted CSS definitions and add generated IDL attribute names
|
|
137
|
+
Object.entries(result.css.properties || {}).forEach(([prop, dfn]) => {
|
|
138
|
+
if (dfn.value || dfn.newValues) {
|
|
139
|
+
try {
|
|
140
|
+
dfn.parsedValue = cssDfnParser.parsePropDefValue(
|
|
141
|
+
dfn.value || dfn.newValues);
|
|
142
|
+
} catch (e) {
|
|
143
|
+
dfn.valueParseError = e.message;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
dfn.styleDeclaration = getGeneratedIDLNamesByCSSProperty(prop);
|
|
147
|
+
});
|
|
148
|
+
Object.entries(result.css.descriptors || {}).forEach(([desc, dfn]) => {
|
|
149
|
+
if (dfn.value) {
|
|
150
|
+
try {
|
|
151
|
+
dfn.parsedValue = cssDfnParser.parsePropDefValue(
|
|
152
|
+
dfn.value);
|
|
153
|
+
} catch (e) {
|
|
154
|
+
dfn.valueParseError = e.message;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
});
|
|
158
|
+
Object.entries(result.css.valuespaces || {}).forEach(([vs, dfn]) => {
|
|
159
|
+
if (dfn.value) {
|
|
160
|
+
try {
|
|
161
|
+
dfn.parsedValue = cssDfnParser.parsePropDefValue(
|
|
162
|
+
dfn.value);
|
|
163
|
+
} catch (e) {
|
|
164
|
+
dfn.valueParseError = e.message;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// Copy results back into initial spec object
|
|
171
|
+
spec.crawled = result.crawled;
|
|
172
|
+
crawlOptions.modules.forEach(mod => {
|
|
173
|
+
if (result[mod.property]) {
|
|
174
|
+
spec[mod.property] = result[mod.property];
|
|
175
|
+
if (mod.property === 'idl') {
|
|
176
|
+
spec.idlparsed = result.idlparsed;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
});
|
|
180
|
+
}
|
|
181
|
+
catch (err) {
|
|
182
|
+
spec.title = spec.title || '[Could not be determined, see error]';
|
|
183
|
+
spec.error = err.toString() + (err.stack ? ' ' + err.stack : '');
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
return specOrFallback(spec, fallbackFolder, crawlOptions.fallbackData);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Saves spec results to extract files as needed and replaces the results with
|
|
192
|
+
* links accordingly.
|
|
193
|
+
*
|
|
194
|
+
* @function
|
|
195
|
+
* @param {Object} spec The results of crawling the spec. Object should contain
|
|
196
|
+
* metadata about the spec and the crawl processing results in appropriate
|
|
197
|
+
* properties.
|
|
198
|
+
* @param {Object} settings Crawl settings. Recognized settings: "modules",
|
|
199
|
+
* "output" and "quiet". See CLI help (node reffy.js --help) for details.
|
|
200
|
+
* The "modules" setting is mandatory and note that the function will not do
|
|
201
|
+
* anything if "output" is not set.
|
|
202
|
+
* @return {Promise<Object>} The promise to get an updated spec object that
|
|
203
|
+
* contains links to created extracts.
|
|
204
|
+
*/
|
|
205
|
+
async function saveSpecResults(spec, settings) {
|
|
206
|
+
settings = settings || {};
|
|
207
|
+
if (!settings.output) {
|
|
208
|
+
return spec;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
async function getSubfolder(name) {
|
|
212
|
+
let subfolder = path.join(settings.output, name);
|
|
213
|
+
await createFolderIfNeeded(subfolder);
|
|
214
|
+
return subfolder;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
const modules = settings.modules;
|
|
218
|
+
const folders = {};
|
|
219
|
+
for (const mod of modules) {
|
|
220
|
+
if (mod.metadata) {
|
|
221
|
+
continue;
|
|
222
|
+
}
|
|
223
|
+
folders[mod.property] = await getSubfolder(mod.property);
|
|
224
|
+
|
|
225
|
+
// Specific rule for IDL:
|
|
226
|
+
// Raw IDL goes to "idl" subfolder, parsed IDL goes to "idlparsed"
|
|
227
|
+
if (mod.property === 'idl') {
|
|
228
|
+
folders.idlparsed = await getSubfolder('idlparsed');
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
function getBaseJSON(spec) {
|
|
233
|
+
return {
|
|
234
|
+
spec: {
|
|
235
|
+
title: spec.title,
|
|
236
|
+
url: spec.crawled
|
|
237
|
+
}
|
|
238
|
+
};
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
async function saveExtract(spec, property, filter) {
|
|
242
|
+
if (filter(spec)) {
|
|
243
|
+
const contents = getBaseJSON(spec);
|
|
244
|
+
contents[property] = spec[property];
|
|
245
|
+
const json = JSON.stringify(contents, null, 2);
|
|
246
|
+
const filename = path.join(folders[property], spec.shortname + '.json');
|
|
247
|
+
await fs.promises.writeFile(filename, json);
|
|
248
|
+
spec[property] = `${property}/${spec.shortname}.json`;
|
|
249
|
+
}
|
|
250
|
+
else {
|
|
251
|
+
delete spec[property];
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
async function saveIdl(spec) {
|
|
256
|
+
let idlHeader = `
|
|
257
|
+
// GENERATED CONTENT - DO NOT EDIT
|
|
258
|
+
// Content was automatically extracted by Reffy into webref
|
|
259
|
+
// (https://github.com/w3c/webref)
|
|
260
|
+
// Source: ${spec.title} (${spec.crawled})`;
|
|
261
|
+
idlHeader = idlHeader.replace(/^\s+/gm, '').trim() + '\n\n';
|
|
262
|
+
const idl = idlHeader + spec.idl + '\n';
|
|
263
|
+
await fs.promises.writeFile(
|
|
264
|
+
path.join(folders.idl, spec.shortname + '.idl'), idl);
|
|
265
|
+
return `idl/${spec.shortname}.idl`;
|
|
266
|
+
};
|
|
267
|
+
|
|
268
|
+
async function saveCss(spec) {
|
|
269
|
+
// There are no comments in JSON, so include the spec title+URL as the
|
|
270
|
+
// first property instead.
|
|
271
|
+
const css = Object.assign(getBaseJSON(spec), spec.css);
|
|
272
|
+
const json = JSON.stringify(css, (key, val) => {
|
|
273
|
+
if ((key === 'parsedValue') || (key === 'valueParseError')) {
|
|
274
|
+
return undefined;
|
|
275
|
+
}
|
|
276
|
+
else {
|
|
277
|
+
return val;
|
|
278
|
+
}
|
|
279
|
+
}, 2) + '\n';
|
|
280
|
+
const pathname = path.join(folders.css, spec.shortname + '.json')
|
|
281
|
+
await fs.promises.writeFile(pathname, json);
|
|
282
|
+
return `css/${spec.shortname}.json`;
|
|
283
|
+
};
|
|
284
|
+
|
|
285
|
+
// Save IDL dumps
|
|
286
|
+
if (spec.idl) {
|
|
287
|
+
spec.idl = await saveIdl(spec);
|
|
288
|
+
}
|
|
289
|
+
if (spec.idlparsed) {
|
|
290
|
+
spec.idlparsed = await saveIdlParsed(spec, settings.output);
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// Save CSS dumps
|
|
294
|
+
function defineCSSContent(spec) {
|
|
295
|
+
return spec.css && (
|
|
296
|
+
(Object.keys(spec.css.properties || {}).length > 0) ||
|
|
297
|
+
(Object.keys(spec.css.descriptors || {}).length > 0) ||
|
|
298
|
+
(Object.keys(spec.css.valuespaces || {}).length > 0));
|
|
299
|
+
}
|
|
300
|
+
if (defineCSSContent(spec)) {
|
|
301
|
+
spec.css = await saveCss(spec);
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
// Specs that define CSS now have a "css" key that point to the CSS extract.
|
|
305
|
+
// Specs that don't define CSS still have a "css" key that points to an
|
|
306
|
+
// empty object structure. Let's get rid of it.
|
|
307
|
+
if (spec.css && typeof spec.css !== 'string') {
|
|
308
|
+
delete spec.css;
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
// Quick and dirty function to determine whether a variable is "empty"
|
|
312
|
+
// (it returns true for falsy values, which is good enough for what we need)
|
|
313
|
+
function isEmpty(thing) {
|
|
314
|
+
return !thing ||
|
|
315
|
+
Array.isArray(thing) && (thing.length === 0) ||
|
|
316
|
+
(typeof thing == 'object') && (Object.keys(thing).length === 0);
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
// Save all other extracts
|
|
320
|
+
const remainingModules = modules.filter(mod =>
|
|
321
|
+
!mod.metadata && mod.property !== 'css' && mod.property !== 'idl');
|
|
322
|
+
for (const mod of remainingModules) {
|
|
323
|
+
await saveExtract(spec, mod.property, spec => !isEmpty(spec[mod.property]));
|
|
324
|
+
if (spec[mod.property] && typeof spec[mod.property] !== 'string') {
|
|
325
|
+
delete spec[mod.property];
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
return spec;
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
/**
|
|
334
|
+
* Main method that crawls the list of specification URLs and return a structure
|
|
335
|
+
* that full describes its title, URLs, references, and IDL definitions.
|
|
336
|
+
*
|
|
337
|
+
* @function
|
|
338
|
+
* @param {Array(String)} speclist List of URLs to parse
|
|
339
|
+
* @param {Object} crawlOptions Crawl options
|
|
340
|
+
* @return {Promise<Array(Object)} The promise to get an array of complete
|
|
341
|
+
* specification descriptions
|
|
342
|
+
*/
|
|
343
|
+
async function crawlList(speclist, crawlOptions) {
|
|
344
|
+
// Make a shallow copy of crawl options object since we're going
|
|
345
|
+
// to modify properties in place
|
|
346
|
+
crawlOptions = Object.assign({}, crawlOptions);
|
|
347
|
+
|
|
348
|
+
// Expand list of processing modules to use if not already done
|
|
349
|
+
crawlOptions.modules = expandBrowserModules(crawlOptions.modules);
|
|
350
|
+
|
|
351
|
+
// Load fallback data if necessary
|
|
352
|
+
if (crawlOptions.fallback) {
|
|
353
|
+
try {
|
|
354
|
+
crawlOptions.fallbackData = JSON.parse(await fs.promises.readFile(crawlOptions.fallback)).results;
|
|
355
|
+
} catch (e) {
|
|
356
|
+
throw new Error(`Could not parse fallback data file ${crawlOptions.fallback}`);
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
// Prepare Puppeteer instance
|
|
361
|
+
await setupBrowser(crawlOptions.modules);
|
|
362
|
+
|
|
363
|
+
const list = speclist.map(completeWithAlternativeUrls);
|
|
364
|
+
const listAndPromise = list.map(spec => {
|
|
365
|
+
let resolve = null;
|
|
366
|
+
let reject = null;
|
|
367
|
+
let readyToCrawl = new Promise((resolveFunction, rejectFunction) => {
|
|
368
|
+
resolve = resolveFunction;
|
|
369
|
+
reject = rejectFunction;
|
|
370
|
+
});
|
|
371
|
+
return { spec, readyToCrawl, resolve, reject };
|
|
372
|
+
});
|
|
373
|
+
|
|
374
|
+
// In debug mode, specs are processed one by one. In normal mode,
|
|
375
|
+
// specs are processing in chunks
|
|
376
|
+
const chunkSize = Math.min((crawlOptions.debug ? 1 : 4), list.length);
|
|
377
|
+
|
|
378
|
+
let pos = 0;
|
|
379
|
+
function flagNextSpecAsReadyToCrawl() {
|
|
380
|
+
if (pos < listAndPromise.length) {
|
|
381
|
+
listAndPromise[pos].resolve();
|
|
382
|
+
pos += 1;
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
for (let i = 0; i < chunkSize; i++) {
|
|
386
|
+
flagNextSpecAsReadyToCrawl();
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
const nbStr = '' + listAndPromise.length;
|
|
390
|
+
async function crawlSpecAndPromise(specAndPromise, idx) {
|
|
391
|
+
await specAndPromise.readyToCrawl;
|
|
392
|
+
const spec = specAndPromise.spec;
|
|
393
|
+
const logCounter = ('' + (idx + 1)).padStart(nbStr.length, ' ') + '/' + nbStr;
|
|
394
|
+
crawlOptions.quiet ?? console.warn(`${logCounter} - ${spec.url} - crawling`);
|
|
395
|
+
let result = await crawlSpec(spec, crawlOptions);
|
|
396
|
+
result = await saveSpecResults(result, crawlOptions);
|
|
397
|
+
crawlOptions.quiet ?? console.warn(`${logCounter} - ${spec.url} - done`);
|
|
398
|
+
flagNextSpecAsReadyToCrawl();
|
|
399
|
+
|
|
400
|
+
return result;
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
const results = await Promise.all(listAndPromise.map(crawlSpecAndPromise));
|
|
404
|
+
|
|
405
|
+
// Close Puppeteer instance
|
|
406
|
+
teardownBrowser();
|
|
407
|
+
|
|
408
|
+
return results;
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
/**
|
|
413
|
+
* Merges extracts per series for the given property and adjusts links
|
|
414
|
+
*
|
|
415
|
+
* @function
|
|
416
|
+
* @param {Array(object)} data Crawl results
|
|
417
|
+
* @param {string} property The extract property to process
|
|
418
|
+
* @param {Object} settings Crawl settings. The function looks at the "output"
|
|
419
|
+
* setting to determine where to look for extracts
|
|
420
|
+
* @return {Promise(Array)} The promise to get an updated crawl results array
|
|
421
|
+
*/
|
|
422
|
+
async function adjustExtractsPerSeries(data, property, settings) {
|
|
423
|
+
if (!settings.output) {
|
|
424
|
+
return data;
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
const fullLevels = data.filter(spec =>
|
|
428
|
+
(spec.seriesComposition !== 'delta') &&
|
|
429
|
+
isLatestLevelThatPasses(spec, data, spec => spec[property]));
|
|
430
|
+
const deltaLevels = data.filter(spec =>
|
|
431
|
+
(spec.seriesComposition === 'delta') && spec[property]);
|
|
432
|
+
|
|
433
|
+
data.forEach(spec => {
|
|
434
|
+
if (fullLevels.includes(spec)) {
|
|
435
|
+
// Full level, rename the extract after the series' shortname
|
|
436
|
+
const pathname = path.resolve(settings.output, spec[property]);
|
|
437
|
+
spec[property] = `${property}/${spec.series.shortname}${path.extname(spec[property])}`;
|
|
438
|
+
const newpathname = path.resolve(settings.output, spec[property]);
|
|
439
|
+
fs.renameSync(pathname, newpathname);
|
|
440
|
+
}
|
|
441
|
+
else if (deltaLevels.includes(spec)) {
|
|
442
|
+
// Delta level, need to keep the extract as-is
|
|
443
|
+
}
|
|
444
|
+
else if (spec[property]) {
|
|
445
|
+
// Not the right full level in the series, drop created extract
|
|
446
|
+
const pathname = path.resolve(settings.output, spec[property]);
|
|
447
|
+
fs.unlinkSync(pathname);
|
|
448
|
+
delete spec[property];
|
|
449
|
+
}
|
|
450
|
+
});
|
|
451
|
+
|
|
452
|
+
return data;
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
/**
|
|
457
|
+
* Saves the crawl results to an index.json file.
|
|
458
|
+
*
|
|
459
|
+
* @function
|
|
460
|
+
* @param {Array(Object)} data The list of specification structures to save
|
|
461
|
+
* @param {Object} settings Crawl settings. The function does not create any
|
|
462
|
+
* save file if the "output" setting is not set.
|
|
463
|
+
* @return {Promise<void>} The promise to have saved the data
|
|
464
|
+
*/
|
|
465
|
+
async function saveResults(data, settings) {
|
|
466
|
+
if (!settings.output) {
|
|
467
|
+
return data;
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
// Save all results to an index.json file
|
|
471
|
+
const indexFilename = path.join(settings.output, 'index.json');
|
|
472
|
+
const contents = {
|
|
473
|
+
type: 'crawl',
|
|
474
|
+
title: 'Reffy crawl',
|
|
475
|
+
date: (new Date()).toJSON(),
|
|
476
|
+
options: settings,
|
|
477
|
+
stats: {},
|
|
478
|
+
results: data
|
|
479
|
+
};
|
|
480
|
+
contents.options.modules = contents.options.modules.map(mod => mod.property);
|
|
481
|
+
contents.stats = {
|
|
482
|
+
crawled: contents.results.length,
|
|
483
|
+
errors: contents.results.filter(spec => !!spec.error).length
|
|
484
|
+
};
|
|
485
|
+
|
|
486
|
+
await fs.promises.writeFile(indexFilename, JSON.stringify(contents, null, 2));
|
|
487
|
+
return contents;
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
/**
|
|
492
|
+
* Crawls the specifications listed in the given JSON file and generates a
|
|
493
|
+
* crawl report in the given folder.
|
|
494
|
+
*
|
|
495
|
+
* @function
|
|
496
|
+
* @param {Object} options Crawl options. Possible options are:
|
|
497
|
+
* publishedVersion, debug, output, terse, modules and specs.
|
|
498
|
+
* See CLI help (node reffy.js --help) for details.
|
|
499
|
+
* @return {Promise<void>} The promise that the crawl will have been made
|
|
500
|
+
*/
|
|
501
|
+
function crawlSpecs(options) {
|
|
502
|
+
function prepareListOfSpecs(list) {
|
|
503
|
+
return list.map(spec => {
|
|
504
|
+
if (typeof spec !== 'string') {
|
|
505
|
+
return spec;
|
|
506
|
+
}
|
|
507
|
+
let match = specs.find(s => s.url === spec || s.shortname === spec);
|
|
508
|
+
if (!match) {
|
|
509
|
+
match = specs.find(s => s.series &&
|
|
510
|
+
s.series.shortname === spec &&
|
|
511
|
+
s.series.currentSpecification === s.shortname);
|
|
512
|
+
}
|
|
513
|
+
if (match) {
|
|
514
|
+
return match;
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
let url = null;
|
|
518
|
+
try {
|
|
519
|
+
url = (new URL(spec)).href;
|
|
520
|
+
}
|
|
521
|
+
catch {
|
|
522
|
+
if (spec.endsWith('.html')) {
|
|
523
|
+
url = (new URL(spec, `file://${process.cwd()}/`)).href;
|
|
524
|
+
}
|
|
525
|
+
else {
|
|
526
|
+
const msg = `Spec ID "${spec}" can neither be interpreted as a URL, a valid shortname or a relative path to an HTML file`;
|
|
527
|
+
throw new Error(msg);
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
return {
|
|
531
|
+
url,
|
|
532
|
+
nightly: { url },
|
|
533
|
+
shortname: spec.replace(/[:\/\\\.]/g, ''),
|
|
534
|
+
series: {
|
|
535
|
+
shortname: spec.replace(/[:\/\\\.]/g, ''),
|
|
536
|
+
}
|
|
537
|
+
};
|
|
538
|
+
});
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
const requestedList = options?.specs ?
|
|
542
|
+
prepareListOfSpecs(options.specs) :
|
|
543
|
+
specs;
|
|
544
|
+
|
|
545
|
+
// Make a shallow copy of passed options parameter and expand modules
|
|
546
|
+
// in place.
|
|
547
|
+
options = Object.assign({}, options);
|
|
548
|
+
options.modules = expandBrowserModules(options.modules);
|
|
549
|
+
|
|
550
|
+
return crawlList(requestedList, options)
|
|
551
|
+
.then(async results => {
|
|
552
|
+
// Merge extracts per series when necessary (CSS/IDL extracts)
|
|
553
|
+
for (const mod of options.modules) {
|
|
554
|
+
if (mod.extractsPerSeries) {
|
|
555
|
+
await adjustExtractsPerSeries(results, mod.property, options);
|
|
556
|
+
if (mod.property === 'idl') {
|
|
557
|
+
await adjustExtractsPerSeries(results, 'idlparsed', options);
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
return results;
|
|
562
|
+
})
|
|
563
|
+
.then(results => {
|
|
564
|
+
// Return results to the console or save crawl results to an
|
|
565
|
+
// index.json file
|
|
566
|
+
if (options.terse) {
|
|
567
|
+
const property = options.modules[0].property;
|
|
568
|
+
results = results.map(result => {
|
|
569
|
+
let res = result[property];
|
|
570
|
+
if (property === 'idl') {
|
|
571
|
+
res = res?.idl;
|
|
572
|
+
}
|
|
573
|
+
return res;
|
|
574
|
+
});
|
|
575
|
+
if (results.length === 1) {
|
|
576
|
+
results = results[0];
|
|
577
|
+
}
|
|
578
|
+
console.log(typeof results === 'string' ?
|
|
579
|
+
results : JSON.stringify(results, null, 2));
|
|
580
|
+
}
|
|
581
|
+
else if (!options.output) {
|
|
582
|
+
console.log(JSON.stringify(results, null, 2));
|
|
583
|
+
}
|
|
584
|
+
else {
|
|
585
|
+
return saveResults(results, options);
|
|
586
|
+
}
|
|
587
|
+
})
|
|
588
|
+
.then(async crawlIndex => {
|
|
589
|
+
// Generate IDL names extracts from IDL extracts
|
|
590
|
+
// (and dfns extracts to create links to definitions)
|
|
591
|
+
if (!options.output || !crawlIndex?.options?.modules?.find(mod => mod === 'idl')) {
|
|
592
|
+
return;
|
|
593
|
+
}
|
|
594
|
+
const crawlResults = await expandCrawlResult(crawlIndex, options.output, ['idlparsed', 'dfns']);
|
|
595
|
+
const idlNames = generateIdlNames(crawlResults.results, options);
|
|
596
|
+
await saveIdlNames(idlNames, options.output);
|
|
597
|
+
});
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
|
|
601
|
+
/**************************************************
|
|
602
|
+
Export methods for use as module
|
|
603
|
+
**************************************************/
|
|
604
|
+
module.exports.crawlList = crawlList;
|
|
605
|
+
module.exports.crawlSpecs = crawlSpecs;
|