npm - reffy - Versions diffs - 18.4.7 → 18.6.0 - Mend

reffy 18.4.7 → 18.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/package.json +3 -3
package/reffy.js +15 -0
package/schemas/files/index.json +19 -0
package/src/browserlib/reffy.json +15 -0
package/src/lib/markdown-report.js +361 -0
package/src/lib/post-processor.js +2 -2
package/src/lib/specs-crawler.js +177 -82
package/src/lib/util.js +17 -1
package/src/postprocessing/idlnames.js +3 -2

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "reffy",
-  "version": "18.4.7",
+  "version": "18.6.0",
   "description": "W3C/WHATWG spec dependencies exploration companion. Features a short set of tools to study spec references as well as WebIDL term definitions and references found in W3C specifications.",
   "repository": {
     "type": "git",
@@ -37,7 +37,7 @@
     "ajv-formats": "3.0.1",
     "commander": "13.1.0",
     "fetch-filecache-for-crawling": "5.1.1",
-    "puppeteer": "24.6.0",
+    "puppeteer": "24.6.1",
     "semver": "^7.3.5",
     "web-specs": "3.46.0",
     "webidl2": "24.4.1"
@@ -46,7 +46,7 @@
     "mocha": "11.1.0",
     "respec": "35.3.0",
     "respec-hljs": "2.1.1",
-    "rollup": "4.39.0",
+    "rollup": "4.40.0",
     "undici": "^7.0.0"
   },
   "overrides": {

package/reffy.js CHANGED Viewed

@@ -84,12 +84,14 @@ program
     .description('Crawls and processes a list of Web specifications')
     .option('-d, --debug', 'debug mode, crawl one spec at a time')
     .option('-f, --fallback <json>', 'fallback data to use when a spec crawl fails')
+    .option('--md, --markdown', 'output a Markdown report')
     .option('-m, --module <modules...>', 'spec processing modules')
     .option('-o, --output <folder>', 'existing folder/file where crawl results are to be saved')
     .option('-p, --post <modules...>', 'post-processing modules')
     .option('-q, --quiet', 'do not report progress and other warnings to the console')
     .option('-r, --release', 'crawl release (TR) version of specs')
     .option('-s, --spec <specs...>', 'specs to crawl')
+    .option('--summary', 'include a crawl summary in Markdown for each spec')
     .option('-t, --terse', 'output crawl results without metadata')
     .option('-u, --use-crawl <folder>', 'use given crawl result folder as input for post-processing')
     .action(async options => {
@@ -109,9 +111,11 @@ will dump ~100MB of data to the console:
         const crawlOptions = {
             debug: options.debug,
             fallback: options.fallback,
+            markdown: options.markdown,
             output: options.output,
             publishedVersion: options.release,
             quiet: options.quiet,
+            summary: options.summary,
             terse: options.terse,
             useCrawl: options.useCrawl
         };
@@ -177,6 +181,10 @@ Usage notes for some of the options:
   The "error" property is set on specs for which fallback data was used.
+--md, --markdown
+  Output a crawl summary in Markdown instead of a JSON report. The option takes
+  precedence over the \`--output\` option.
 -m, --module <modules...>
   If processing modules are not specified, the crawler runs all core processing
   modules defined in:
@@ -286,6 +294,13 @@ Usage notes for some of the options:
   spec, run:
     $ reffy -o reports/test -s all DOM-Level-2-Style
+--summary
+  Tells Reffy to attach a Markdown summary of the crawl per spec to the JSON
+  report, in a \`crawlSummary\` property. The Markdown report is suitable for
+  inclusion in a GitHub issue or similar. It starts with a summary, and then
+  details a few noteworthy extracts (CSS, dfns, Web IDL) in expandable
+  sections, with links to the online xref database search where appropriate.
 -t, --terse
   This flag cannot be combined with the --output option and cannot be set if
   more than one processing module gets run. When set, the crawler writes the

package/schemas/files/index.json CHANGED Viewed

@@ -35,6 +35,25 @@
       "items": {
         "type": "object"
       }
+    },
+    "post": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "mod": {
+            "type": "string"
+          },
+          "result": {
+            "oneOf": [
+              { "type": "object" },
+              { "type": "array" }
+            ]
+          },
+          "additionalProperties": false
+        }
+      }
     }
   }
 }

package/src/browserlib/reffy.json CHANGED Viewed

@@ -1,69 +1,84 @@
 [
   {
+    "label": "Title",
     "href": "./get-title.mjs",
     "property": "title",
     "metadata": true
   },
   {
+    "label": "Authoring tool",
     "href": "./get-generator.mjs",
     "property": "generator",
     "metadata": true
   },
   {
+    "label": "Date",
     "href": "./get-lastmodified-date.mjs",
     "property": "date",
     "metadata": true
   },
   {
+    "label": "Revision ID",
     "href": "./get-revision.mjs",
     "property": "revision",
     "metadata": true
   },
   {
+    "label": "Algorithms",
     "href": "./extract-algorithms.mjs",
     "property": "algorithms"
   },
   {
+    "label": "Links",
     "href": "./extract-links.mjs",
     "property": "links"
   },
   {
+    "label": "References",
     "href": "./extract-references.mjs",
     "property": "refs"
   },
   {
+    "label": "Events",
     "href": "./extract-events.mjs",
     "property": "events"
   },
   {
+    "label": "Web IDL",
     "href": "./extract-webidl.mjs",
     "property": "idl",
     "extractsPerSeries": true
   },
   {
+    "label": "CSS",
     "href": "./extract-cssdfn.mjs",
     "property": "css",
     "extractsPerSeries": true
   },
   {
+    "label": "Terms",
     "href": "./extract-dfns.mjs",
     "property": "dfns",
     "needsIdToHeadingMap": true
   },
   {
+    "label": "Elements",
     "href": "./extract-elements.mjs",
     "property": "elements"
   },
   {
+    "label": "Headings",
     "href": "./extract-headings.mjs",
     "property": "headings"
   },
   {
+    "label": "IDs",
     "href": "./extract-ids.mjs",
     "property": "ids",
     "needsIdToHeadingMap": true
   },
   {
+    "label": "CDDL",
     "href": "./extract-cddl.mjs",
     "property": "cddl"
   }

package/src/lib/markdown-report.js ADDED Viewed

@@ -0,0 +1,361 @@
+/**
+ * Helper function to generate a short report of a crawl in GitHub Markdown
+ * for a spec that features a summary of the crawl result, and details in
+ * expandable details sections about elements worthy of interest (such as CSS
+ * properties, exported definitions, Web IDL interfaces, etc.).
+ *
+ * The markdown does not contain titles on purpose so that it can be embedded
+ * as is in a larger Markdown context (e.g., a GitHub issue that looks at a
+ * spec from various perspectives).
+ */
+import reffyModules from '../browserlib/reffy.json' with { type: 'json' };
+import idlparsed from '../postprocessing/idlparsed.js';
+/**
+ * For each module, we need to know how to detect whether Reffy actually
+ * extracted something from the spec, how to summarize the results when it
+ * did, and whether/how to highlight specific details.
+ *
+ * TODO: reffy.json, browserlib code, and schemas could be refactored to bind
+ * all the logic linked to a module together: how to extract, whether something
+ * was extracted, how to summarize, etc. (but note the extraction logic actually
+ * runs in a browser page, while the rest runs in a Node.js context and that,
+ * for IDL, interesting info is returned by the idlparsed post-processing
+ * module)
+ */
+const moduleFunctions = {
+  algorithms: {
+    isPresent: isArrayPresent,
+    summary: arrayInfo
+  },
+  cddl: {
+    isPresent: isArrayPresent,
+    summary: value => 'found'
+  },
+  css: {
+    isPresent: value => ['properties', 'atrules', 'selectors', 'values']
+      .find(prop => isArrayPresent(value?.[prop])),
+    summary: value => ['properties', 'atrules', 'selectors', 'values']
+      .map(prop => value[prop]?.length > 0 ?
+        value[prop].length + ' ' + getCSSLabel(prop, value[prop].length) :
+        null)
+      .filter(found => found)
+      .join(', '),
+    details: value => ['properties', 'atrules', 'selectors']
+      .map(prop => {
+        if (!isArrayPresent(value[prop])) {
+          return null;
+        }
+        const types = [
+          'css-at-rule',
+          'css-descriptor',
+          'css-function',
+          'css-property',
+          'css-selector',
+          'css-type',
+          'css-value'
+        ].join(',')
+        const details = value[prop]
+          .map(val => '- ' + wrapTerm(val.name, 'css type', val.href) +
+            ` ([xref search](https://respec.org/xref/?term=${encodeURIComponent(val.name)}&types=${encodeURIComponent(types)}))`
+          );
+        if (details.length === 0) {
+          return null;
+        }
+        const report = ['<details>'];
+        report.push(`<summary>${details.length} CSS ${getCSSLabel(prop, details.length)}</summary>`);
+        report.push('');
+        report.push(...details);
+        report.push('</details>');
+        return report.join('\n');
+      })
+      .filter(details => !!details)
+      .join('\n')
+  },
+  dfns: {
+    // For dfns, note we make a distinction between terms that are exported by
+    // default (such as CSS and Web IDL terms) and terms that editors choose to
+    // export explicitly. The former get reported in other details, the latter
+    // are the ones most likely to cause duplication issues.
+    isPresent: isArrayPresent,
+    summary: value => [
+      {
+        access: 'explicitly exported',
+        dfns: value
+          .filter(dfn => dfn.access === 'public')
+          .filter(dfn => dfn.type === 'dfn' || dfn.type === 'cddl')
+      },
+      {
+        access: 'exported by default',
+        dfns: value
+          .filter(dfn => dfn.access === 'public')
+          .filter(dfn => dfn.type !== 'dfn' && dfn.type !== 'cddl')
+      },
+      {
+        access: 'private',
+        dfns: value
+          .filter(dfn => dfn.access !== 'public')
+      }
+    ]
+      .map(t => t.dfns.length > 0 ? t.dfns.length + ' ' + t.access : null)
+      .filter(found => found)
+      .join(', '),
+    details: value => {
+      const details = value
+        .filter(dfn => dfn.access === 'public')
+        .filter(dfn => dfn.type === 'dfn' || dfn.type === 'cddl')
+        .map(dfn => '- ' + wrapTerm(dfn.linkingText[0], dfn.type, dfn.href) +
+          (dfn.for?.length > 0 ? ' for ' + wrapTerm(dfn.for[0], dfn.type): '') +
+          `, type ${dfn.type}` +
+          ` ([xref search](https://respec.org/xref/?term=${encodeURIComponent(dfn.linkingText[0])}))`
+        );
+      if (details.length === 0) {
+        return null;
+      }
+      const s = details.length > 1 ? 's' : '';
+      const report = ['<details>'];
+      report.push(`<summary>${details.length} explicitly exported term${s}</summary>`);
+      report.push('');
+      report.push(...details);
+      report.push('</details>');
+      return report.join('\n');
+    }
+  },
+  events: {
+    isPresent: isArrayPresent,
+    summary: arrayInfo
+  },
+  headings: {
+    isPresent: isArrayPresent,
+    summary: arrayInfo
+  },
+  idl: {
+    // Note: For IDL, we're more interested in the info that gets produced by the
+    // idlparsed post-processing module (which gets run automatically if it
+    // did not run during crawl)
+    // For extended names, exclude names that the spec itself defines
+    // (they will be reported as names defined by the spec already)
+    isPresent: value => (typeof value === 'string') && value.length > 0,
+    summary: (value, spec) => {
+      const parsedIdl = spec.idlparsed;
+      if (typeof parsedIdl === 'string') {
+        return 'invalid Web IDL found';
+      }
+      const res = [];
+      const idlNames = Object.keys(parsedIdl.idlNames)
+        .concat(Object.keys(parsedIdl.idlExtendedNames)
+          .filter(name => !parsedIdl.idlNames[name]));
+      if (idlNames.length > 0) {
+        const s = idlNames.length > 1 ? 's' : '';
+        res.push(`${idlNames.length} name${s} (or partial${s})`);
+      }
+      const globals = Object.keys(parsedIdl.globals);
+      if (globals.length > 0) {
+        const s = globals.length > 1 ? 's' : '';
+        res.push(`${globals.length} global${s}`);
+      }
+      return res.join(', ');
+    },
+    details: (value, spec) => {
+      const parsedIdl = spec.idlparsed;
+      if (typeof parsedIdl === 'string') {
+        return null;
+      }
+      const report = [];
+      const idlNames = Object.keys(parsedIdl.idlNames);
+      if (idlNames.length > 0) {
+        const s = idlNames.length > 1 ? 's' : '';
+        report.push('<details>');
+        report.push(`<summary>${idlNames.length} Web IDL name${s}</summary>`);
+        report.push('');
+        for (const name of idlNames) {
+          const type = parsedIdl.idlNames[name].type;
+          report.push('- ' + type + ' ' +
+            wrapTerm(name, type, parsedIdl.idlNames[name].href) +
+            ` ([xref search](https://respec.org/xref/?term=${encodeURIComponent(name)}&types=_IDL_))`);
+        }
+        report.push('</details>');
+      }
+      const idlExtendedNames = Object.keys(parsedIdl.idlExtendedNames)
+        .filter(name => !parsedIdl.idlNames[name]);
+      if (idlExtendedNames.length > 0) {
+        const s = idlExtendedNames.length > 1 ? 's' : '';
+        report.push('<details>');
+        report.push(`<summary>${idlExtendedNames.length} extended Web IDL name${s}</summary>`);
+        report.push('');
+        for (const name of idlExtendedNames) {
+          const type = parsedIdl.idlExtendedNames[name][0].type;
+          report.push('- ' + type + ' ' +
+            wrapTerm(name, type, parsedIdl.idlExtendedNames[name][0].href) +
+            ` ([xref search](https://respec.org/xref/?term=${encodeURIComponent(name)}&types=_IDL_))`);
+        }
+        report.push('</details>');
+      }
+      const globals = Object.keys(parsedIdl.globals);
+      if (globals.length > 0) {
+        const s = globals.length > 1 ? 's' : '';
+        report.push('<details>');
+        report.push(`<summary>${globals.length} Web IDL global${s}</summary>`);
+        report.push('');
+        for (const glob of globals) {
+          report.push(`- \`${glob}\``);
+        }
+        report.push('</details>');
+      }
+      return report.join('\n');
+    }
+  },
+  ids: {
+    isPresent: isArrayPresent,
+    summary: arrayInfo
+  },
+  links: {
+    isPresent: value =>
+      isArrayPresent(Object.keys(value?.rawlinks ?? {})) ||
+      isArrayPresent(Object.keys(value?.autolinks ?? {})),
+    summary: value => ['rawlinks', 'autolinks']
+      .map(prop => Object.keys(value[prop]).length > 0 ?
+        Object.keys(value[prop]).length + ' ' + prop :
+        null)
+      .filter(found => found)
+      .join(', ')
+  },
+  refs: {
+    isPresent: value =>
+      isArrayPresent(value?.normative) ||
+      isArrayPresent(value?.informative),
+    summary: value => ['normative', 'informative']
+      .map(prop => value[prop].length > 0 ?
+        value[prop].length + ' ' + prop :
+        null)
+      .filter(found => found)
+      .join(', ')
+  }
+};
+/**
+ * Return true if the given value is an array that contains at least one item.
+ */
+function isArrayPresent(value) {
+  return Array.isArray(value) && value.length > 0;
+}
+/**
+ * Return the number of items found in the array
+ */
+function arrayInfo(value) {
+  return value.length + ' found';
+}
+function wrapTerm(term, type, href) {
+  if (type === 'abstract-op' || type === 'dfn') {
+    if (href) {
+      return `[${term}](${href})`;
+    }
+    else {
+      return `"${term}"`;
+    }
+  }
+  const res = '`' + term + '`';
+  if (href) {
+    return `[${res}](${href})`;
+  }
+  else {
+    return res;
+  }
+}
+function getCSSLabel(prop, nb) {
+  switch (prop) {
+  case 'atrules':
+    return nb > 1 ? 'at-rules' : 'at-rule';
+  case 'properties':
+    return nb > 1 ? 'properties' : 'property';
+  case 'selectors':
+    return nb > 1 ? 'selectors' : 'selector';
+  case 'values':
+    return nb > 1 ? 'values': 'value';
+  }
+}
+/**
+ * Return a Markdown string that summarizes the given spec crawl results
+ */
+export async function generateSpecReport(specResult) {
+  // Start report with a summary on spec metadata, adding URLs as needed
+  const summary = [];
+  for (const mod of reffyModules) {
+    if (!mod.metadata) {
+      continue;
+    }
+    if (specResult[mod.property]) {
+      summary.push(`- ${mod.label}: ${specResult[mod.property]}`);
+    }
+  }
+  summary.push(`- Canonical URL: [${specResult.url}](${specResult.url})`);
+  if (specResult.crawled && specResult.crawled !== specResult.url) {
+    summary.push(`- Crawled URL: [${specResult.crawled}](${specResult.crawled})`);
+  }
+  // If the spec defines IDL, run the idlparsed post-processing module
+  if (specResult.idl && !specResult.idlparsed) {
+    await idlparsed.run(specResult);
+  }
+  // Add summary of extracts found and not found
+  const extractModules = reffyModules
+    .filter(mod => !mod.metadata && moduleFunctions[mod.property])
+    .map(mod => Object.assign(mod, moduleFunctions[mod.property]));
+  const extractsSummary = [];
+  const missingSummary = [];
+  for (const mod of extractModules) {
+    const value = specResult[mod.property];
+    if (mod.isPresent(value)) {
+      extractsSummary.push(`  - ${mod.label}: ${mod.summary(value, specResult)}`);
+    }
+    else {
+      missingSummary.push(mod.label);
+    }
+  }
+  if (extractsSummary.length > 0) {
+    extractsSummary.sort();
+    summary.push(`- Spec defines:`);
+    summary.push(...extractsSummary);
+  }
+  if (missingSummary.length > 0) {
+    missingSummary.sort();
+    summary.push(`- No ${missingSummary.join(', ')} definitions found`);
+  }
+  // End of summary, look at possible details of interest
+  const details = [];
+  for (const mod of extractModules) {
+    const value = specResult[mod.property];
+    if (!mod.details || !mod.isPresent(value)) {
+      continue;
+    }
+    const modDetails = mod.details(value, specResult);
+    if (modDetails) {
+      details.push(modDetails);
+    }
+  }
+  const report = [];
+  report.push('Crawl summary:');
+  report.push(...summary);
+  if (details.length > 0) {
+    report.push('');
+    report.push(...details);
+  }
+  return report.join('\n');
+}

package/src/lib/post-processor.js CHANGED Viewed

@@ -50,7 +50,7 @@
 import fs from 'node:fs';
 import path from 'node:path';
 import { pathToFileURL } from 'node:url';
-import { createFolderIfNeeded } from './util.js';
+import { createFolderIfNeeded, shouldSaveToFile } from './util.js';
 import csscomplete from '../postprocessing/csscomplete.js';
 import events from '../postprocessing/events.js';
 import idlnames from '../postprocessing/idlnames.js';
@@ -220,7 +220,7 @@ async function save(mod, processResult, options) {
     }
   }
-  if (!options.output) {
+  if (!shouldSaveToFile(options)) {
     // Nothing to do if no output folder was given
     return;
   }

package/src/lib/specs-crawler.js CHANGED Viewed

@@ -16,6 +16,7 @@ import { inspect } from 'node:util';
 import specs from 'web-specs' with { type: 'json' };
 import postProcessor from './post-processor.js';
 import ThrottledQueue from './throttled-queue.js';
+import { generateSpecReport } from './markdown-report.js';
 import {
     completeWithAlternativeUrls,
     expandBrowserModules,
@@ -26,7 +27,8 @@ import {
     setupBrowser,
     teardownBrowser,
     createFolderIfNeeded,
-    loadJSON
+    loadJSON,
+    shouldSaveToFile
 } from './util.js';
 import packageConfig from '../../package.json' with { type: 'json' };
@@ -187,7 +189,7 @@ async function crawlSpec(spec, crawlOptions) {
  */
 async function saveSpecResults(spec, settings) {
     settings = settings || {};
-    if (!settings.output) {
+    if (!shouldSaveToFile(settings)) {
         return spec;
     }
@@ -336,16 +338,78 @@ async function saveSpecResults(spec, settings) {
 /**
- * Main method that crawls the list of specification URLs and return a structure
- * that full describes its title, URLs, references, and IDL definitions.
+ * Helper function that takes a list of specs as inputs and expands them to an
+ * object suitable for crawling, with as much information as possible.
  *
  * @function
- * @param {Array(String)} speclist List of URLs to parse
+ * @param {Array(String|Object)} list A list of "specs", where each spec can be
+ * a string that represents a spec's shortname, series shortname or URL, or an
+ * object that already contains appropriate information.
+ * @return {Array(Object)} An array of spec objects. Note: When a spec was
+ * already described through an object, the function returns the object as-is
+ * and makes no attempt at validating it.
+ */
+function prepareListOfSpecs(list) {
+    return list.map(spec => {
+        if (typeof spec !== 'string') {
+            return spec;
+        }
+        let match = specs.find(s => s.url === spec || s.shortname === spec);
+        if (!match) {
+            match = specs.find(s => s.series &&
+                s.series.shortname === spec &&
+                s.series.currentSpecification === s.shortname);
+        }
+        if (match) {
+            return match;
+        }
+        let url = null;
+        try {
+            url = (new URL(spec)).href;
+        }
+        catch {
+            if (spec.endsWith('.html')) {
+                url = (new URL(spec, `file://${process.cwd()}/`)).href;
+            }
+            else {
+                const msg = `Spec ID "${spec}" can neither be interpreted as a URL, a valid shortname or a relative path to an HTML file`;
+                throw new Error(msg);
+            }
+        }
+        return {
+            url,
+            nightly: { url },
+            shortname: spec.replace(/[:\/\\\.]/g, ''),
+            series: {
+                shortname: spec.replace(/[:\/\\\.]/g, ''),
+            }
+        };
+    });
+}
+/**
+ * Crawl the provided list of specifications and return an array with the crawl
+ * results.
+ *
+ * Crawl options may be specified as a second parameter. The function ignores
+ * options that affect the output such as `output`, `markdown` or `terse`. The
+ * function also does not run post-processing modules that apply at the "crawl"
+ * level.
+ *
+ * @function
+ * @param {Array(String|Object)} speclist List of specs to crawl, where each
+ * spec can be a string that represents a spec's shortname, series shortname or
+ * URL, or an object that already contains appropriate information.
  * @param {Object} crawlOptions Crawl options
- * @return {Promise<Array(Object)} The promise to get an array of complete
- *   specification descriptions
+ * @return {Promise<Array(Object)} The promise to get an array with crawl
+ *   results.
  */
 async function crawlList(speclist, crawlOptions) {
+    // Expand the list of specs to spec objects suitable for crawling
+    speclist = prepareListOfSpecs(speclist);
     // Make a shallow copy of crawl options object since we're going
     // to modify properties in place
     crawlOptions = Object.assign({speclist}, crawlOptions);
@@ -419,6 +483,25 @@ async function crawlList(speclist, crawlOptions) {
         await teardownBrowser();
     }
+    // Merge extracts per series when necessary (CSS/IDL extracts)
+    for (const mod of crawlOptions.modules) {
+        if (mod.extractsPerSeries) {
+            await adjustExtractsPerSeries(results, mod.property, crawlOptions);
+        }
+    }
+    for (const mod of crawlOptions.post ?? []) {
+        if (postProcessor.extractsPerSeries(mod)) {
+            await adjustExtractsPerSeries(results, mod.property, crawlOptions);
+        }
+    }
+    // Attach a crawl summary in Markdown if so requested
+    if (crawlOptions.markdown || crawlOptions.summary) {
+        for (const res of results) {
+            res.crawlSummary = await generateSpecReport(res);
+        }
+    }
     return results;
 }
@@ -434,7 +517,7 @@ async function crawlList(speclist, crawlOptions) {
  * @return {Promise(Array)} The promise to get an updated crawl results array
  */
 async function adjustExtractsPerSeries(data, property, settings) {
-    if (!settings.output) {
+    if (!shouldSaveToFile(settings)) {
         return data;
     }
@@ -486,7 +569,7 @@ async function adjustExtractsPerSeries(data, property, settings) {
  * @return {Promise<void>} The promise to have saved the data
  */
 async function saveResults(contents, settings) {
-    if (!settings.output) {
+    if (!shouldSaveToFile(settings)) {
         return;
     }
     const indexFilename = path.join(settings.output, 'index.json');
@@ -495,62 +578,38 @@ async function saveResults(contents, settings) {
 /**
- * Crawls the specifications listed in the given JSON file and generates a
- * crawl report in the given folder.
+ * Run a crawl given a set of options.
+ *
+ * The set of options matches those defined in the CLI. The function crawls all
+ * specs by default in particular.
+ *
+ * If the `output` option is not set, the function outputs a JSON dump of the
+ * crawl results to the console (or a report in Markdown if the `markdown`
+ * option is set) and does not return anything to the caller.
+ *
+ * If the `output` option is set to the magic value `{return}`, the function
+ * outputs nothing but returns an object that represents the crawl results,
+ * with the actual results per spec stored in a `results` property.
+ *
+ * If the `output` option is set to any other value, the function interprets it
+ * as a folder, creates subfolders and files with crawl results in that folder,
+ * with a root `index.json` entry point, and does not return anything.
  *
  * @function
- * @param {Object} options Crawl options. Possible options are:
+ * @param {Object} options Crawl options. Possible options include:
  *   publishedVersion, debug, output, terse, modules and specs.
  *   See CLI help (node reffy.js --help) for details.
- * @return {Promise<void>} The promise that the crawl will have been made
+ * @return {Promise<void|Object>} The promise that the crawl will have been
+ *   made along with the index of crawl results if the `output` option was set
+ *   to the specific value `{return}`.
  */
 async function crawlSpecs(options) {
-    function prepareListOfSpecs(list) {
-        return list.map(spec => {
-            if (typeof spec !== 'string') {
-                return spec;
-            }
-            let match = specs.find(s => s.url === spec || s.shortname === spec);
-            if (!match) {
-                match = specs.find(s => s.series &&
-                    s.series.shortname === spec &&
-                    s.series.currentSpecification === s.shortname);
-            }
-            if (match) {
-                return match;
-            }
-            let url = null;
-            try {
-                url = (new URL(spec)).href;
-            }
-            catch {
-                if (spec.endsWith('.html')) {
-                    url = (new URL(spec, `file://${process.cwd()}/`)).href;
-                }
-                else {
-                    const msg = `Spec ID "${spec}" can neither be interpreted as a URL, a valid shortname or a relative path to an HTML file`;
-                    throw new Error(msg);
-                }
-            }
-            return {
-                url,
-                nightly: { url },
-                shortname: spec.replace(/[:\/\\\.]/g, ''),
-                series: {
-                    shortname: spec.replace(/[:\/\\\.]/g, ''),
-                }
-            };
-        });
-    }
     const crawlIndex = options?.useCrawl ?
         await loadJSON(path.join(options.useCrawl, 'index.json')) :
         null;
-    const requestedList = crawlIndex ? crawlIndex.results :
-        options?.specs ? prepareListOfSpecs(options.specs) :
-        specs;
+    const requestedList = crawlIndex ?
+        crawlIndex.results :
+        (options?.specs ?? specs);
     // Make a shallow copy of passed options parameter and expand modules
     // in place.
@@ -558,20 +617,6 @@ async function crawlSpecs(options) {
     options.modules = expandBrowserModules(options.modules);
     return crawlList(requestedList, options)
-        .then(async results => {
-            // Merge extracts per series when necessary (CSS/IDL extracts)
-            for (const mod of options.modules) {
-                if (mod.extractsPerSeries) {
-                    await adjustExtractsPerSeries(results, mod.property, options);
-                }
-            }
-            for (const mod of options.post ?? []) {
-                if (postProcessor.extractsPerSeries(mod)) {
-                    await adjustExtractsPerSeries(results, mod.property, options);
-                }
-            }
-            return results;
-        })
         .then(async results => {
             // Create and return a crawl index out of the results, to allow
             // post-processing modules to run.
@@ -605,10 +650,19 @@ async function crawlSpecs(options) {
                 console.log(typeof results === 'string' ?
                     results : JSON.stringify(results, null, 2));
             }
+            else if (options.markdown) {
+                console.log('# Crawl results');
+                console.log();
+                for (const res of results) {
+                    console.log(`## ${res.title}`);
+                    console.log(res.crawlSummary);
+                    console.log();
+                }
+            }
             else if (!options.output) {
                 console.log(JSON.stringify(results, null, 2));
             }
-            else {
+            else if (shouldSaveToFile(options)) {
                 await saveResults(index, options);
             }
             return index;
@@ -619,7 +673,7 @@ async function crawlSpecs(options) {
                 if (!postProcessor.appliesAtLevel(mod, 'crawl')) {
                     continue;
                 }
-                const crawlResults = options.output ?
+                const crawlResults = shouldSaveToFile(options) ?
                     await expandCrawlResult(
                         crawlIndex, options.output, postProcessor.dependsOn(mod)) :
                     crawlIndex;
@@ -630,25 +684,66 @@ async function crawlSpecs(options) {
                     console.log();
                     console.log(JSON.stringify(result, null, 2));
                 }
+                else if (!shouldSaveToFile(options)) {
+                    // Attach the post-processing result to the index of the
+                    // crawl results.
+                    crawlIndex.post = crawlIndex.post ?? [];
+                    crawlIndex.post.push({
+                        mod: postProcessor.getProperty(mod),
+                        result
+                    });
+                }
             }
+            // Function does not return anything if it already reported the
+            // results to the console or files. It returns the index of the
+            // crawl results otherwise.
+            if (!options.output || shouldSaveToFile(options)) {
+                return;
+            }
+            return crawlIndex;
         });
 }
-/**************************************************
-Export methods for use as module
-**************************************************/
-// TODO: consider more alignment between the two crawl functions or
-// find more explicit names to distinguish between them:
-// - "crawlList" takes an explicit list of specs as input, does not run the
-// post-processor, and returns the results without saving them to files.
-// - "crawlSpecs" takes options as input, runs all steps and saves results
-// to files (or outputs the results to the console). It does not return
-// anything.
+/**
+ * Crawl a set of specs according to the given set of crawl options.
+ *
+ * The function behaves differently depending on the parameters it receives.
+ *
+ * If it receives no parameter, the function behaves as it were called with a
+ * single empty object as parameter.
+ *
+ * If it receives a single object as parameter, this object sets crawl options
+ * (essentially matching CLI options). What the function outputs or returns
+ * depends on the `output` option. If `output` is not set, the function outputs
+ * a JSON dump of the index of the crawl results to the console and returns
+ * nothing to the caller. If `output` is set to the "magic" value `{return}`,
+ * the function does not output anything but returns the index of the crawl
+ * results which a caller may then process in any way they wish. If `output` is
+ * set to any other value, it defines a folder, the function saves crawl
+ * results as folders and files in that folder and returns nothing.
+ *
+ * If it receives an array as first parameter, the array defines the set of
+ * specs that are to be crawled (each spec may be a string representing the
+ * spec's shortname, series shortname, or URL; or a spec object). The second
+ * parameter, if present, defines additional crawl options (same as above,
+ * except the `specs` option should not be set). The function returns an
+ * array of crawl results to the caller.
+ *
+ * Note the function does not apply post-processing modules that run at the
+ * "crawl" level when it receives an array as first parameter. It will also
+ * ignore crawl options that control the output such as `output`, `markdown`
+ * and `terse`.
+ */
 function crawl(...args) {
     return Array.isArray(args[0]) ?
         crawlList.apply(this, args) :
         crawlSpecs.apply(this, args);
 }
+/**************************************************
+Export crawl method for use as module
+**************************************************/
 export { crawl as crawlSpecs };

package/src/lib/util.js CHANGED Viewed

@@ -1137,6 +1137,21 @@ async function getSchemaValidationFunction(schemaName) {
     };
 }
+/**
+ * Return true if the crawler should save results to files given the crawl
+ * options.
+ *
+ * @function
+ * @param {Object} crawlOptions Crawl options (optional)
+ * @return {Boolean} true when the crawler should save the results to files,
+ *   false otherwise.
+ */
+function shouldSaveToFile(crawlOptions) {
+    return crawlOptions?.output && crawlOptions.output !== '{return}';
+}
 export {
     fetch,
     expandBrowserModules,
@@ -1151,5 +1166,6 @@ export {
     createFolderIfNeeded,
     getInterfaceTreeInfo,
     getSchemaValidationFunction,
-    loadJSON
+    loadJSON,
+    shouldSaveToFile
 };

package/src/postprocessing/idlnames.js CHANGED Viewed

@@ -14,7 +14,8 @@ import {
   getExpectedDfnFromIdlDesc } from '../cli/check-missing-dfns.js';
 import {
   isLatestLevelThatPasses,
-  createFolderIfNeeded } from '../lib/util.js';
+  createFolderIfNeeded,
+  shouldSaveToFile } from '../lib/util.js';
 /**
@@ -379,7 +380,7 @@ async function generateIdlNames(crawl, options) {
  * @param {Object} options Crawl options ("output" will be used)
  */
 async function saveIdlNames(names, options) {
-  if (!options?.output) {
+  if (!shouldSaveToFile(options)) {
     return;
   }