@markuplint/spec-generator 4.6.19 → 4.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -3,6 +3,32 @@
3
3
  All notable changes to this project will be documented in this file.
4
4
  See [Conventional Commits](https://conventionalcommits.org) for commit guidelines.
5
5
 
6
+ # [4.8.0](https://github.com/markuplint/markuplint/compare/@markuplint/spec-generator@4.7.0...@markuplint/spec-generator@4.8.0) (2025-08-24)
7
+
8
+
9
+ ### Bug Fixes
10
+
11
+ * **spec-generator:** remove debug log ([a00691b](https://github.com/markuplint/markuplint/commit/a00691bc9874ba6a8adb5bf6217e7d6c79959660))
12
+
13
+
14
+ ### Features
15
+
16
+ * **spec-generator:** update MDN scraping logic ([45889a9](https://github.com/markuplint/markuplint/commit/45889a9a308c48ccd43df6837463d0ecabd547a8))
17
+
18
+
19
+
20
+
21
+
22
+ # [4.7.0](https://github.com/markuplint/markuplint/compare/@markuplint/spec-generator@4.6.19...@markuplint/spec-generator@4.7.0) (2025-08-13)
23
+
24
+ ### Bug Fixes
25
+
26
+ - ensure that each `clean` command correctly removes build files ([110b78e](https://github.com/markuplint/markuplint/commit/110b78e85379d29a84ca68325127344a87a570b6))
27
+
28
+ ### Features
29
+
30
+ - **spec-generator:** update ARIA spec scraping for ARIA 1.3 page structure ([f614b54](https://github.com/markuplint/markuplint/commit/f614b54b6e495fd52783ac3fa5e5ca199f5632a3))
31
+
6
32
  ## [4.6.19](https://github.com/markuplint/markuplint/compare/@markuplint/spec-generator@4.6.18...@markuplint/spec-generator@4.6.19) (2025-04-13)
7
33
 
8
34
  **Note:** Version bump only for package @markuplint/spec-generator
package/README.md CHANGED
@@ -1,3 +1,82 @@
1
1
  # @markuplint/spec-generator
2
2
 
3
- This is private package for generating `@markuplint/html-spec`
3
+ Private builder used to generate `@markuplint/html-spec`.
4
+
5
+ It assembles an Extended Spec JSON from the HTML element source files and external references
6
+ (MDN, WAI‑ARIA, HTML‑ARIA), then writes `index.json` in `@markuplint/html-spec`.
7
+
8
+ ## How it is invoked
9
+
10
+ Called from `packages/@markuplint/html-spec/build.mjs`:
11
+
12
+ ```ts
13
+ await main({
14
+ outputFilePath: 'index.json',
15
+ htmlFilePattern: 'src/spec.*.json',
16
+ commonAttrsFilePath: 'src/spec-common.attributes.json',
17
+ commonContentsFilePath: 'src/spec-common.contents.json',
18
+ });
19
+ ```
20
+
21
+ You normally don't run this directly; use:
22
+
23
+ - From repo root: `yarn up:gen`
24
+ - Only html-spec: `yarn workspace @markuplint/html-spec run gen`
25
+
26
+ ## What it does
27
+
28
+ 1. Read element sources
29
+
30
+ - Load every `src/spec.*.json` and infer the element name from the filename (e.g. `spec.a.json` → `a`).
31
+
32
+ 2. Enrich from MDN
33
+
34
+ - Fetch the MDN element page and populate missing metadata:
35
+ - `cite`, `description`, `categories`, `omission`, attribute flags
36
+ - Existing fields in `src/spec.*.json` take precedence over scraped values
37
+ - Attributes are merged name-by-name; manual entries win
38
+
39
+ 3. Add obsolete elements
40
+
41
+ - Inject HTML obsolete elements (WHATWG list) and some deprecated SVG elements if not present.
42
+
43
+ 4. Load shared data
44
+
45
+ - `def['#globalAttrs']` from `src/spec-common.attributes.json`
46
+ - `def['#contentModels']` from `src/spec-common.contents.json` (`models` key)
47
+
48
+ 5. Build ARIA definitions
49
+
50
+ - Scrape WAI‑ARIA (1.1/1.2/1.3) and Graphics‑ARIA, plus HTML‑ARIA cross‑refs, to produce
51
+ `def['#aria']` (roles, properties, synonyms, defaults, and equivalent HTML attrs).
52
+
53
+ 6. Emit Extended Spec JSON
54
+
55
+ - `{ cites, def: { #globalAttrs, #aria, #contentModels }, specs: [...] }` → `index.json`
56
+ (Pretty‑printed by the caller)
57
+
58
+ ## Source of truth vs. generated data
59
+
60
+ - Source of truth for element specs is in `@markuplint/html-spec/src/`.
61
+ - This generator is purely a build step; do not edit the output `index.json` by hand.
62
+
63
+ ## Precedence rules (important)
64
+
65
+ - Manual data in `src/spec.*.json` overrides MDN‑scraped values on conflict.
66
+ - Attribute objects are merged per name; manual keys win, MDN may fill missing flags.
67
+ - Shared files under `src/spec-common.*.json` are imported as‑is.
68
+
69
+ ## Network and caching
70
+
71
+ - Uses live HTTP fetch against MDN/W3C specs. There is an in‑process cache for the current run only.
72
+ - If a fetch fails, the entry may be left empty; re‑run later or edit your manual source to cover it.
73
+
74
+ ## When to change this package
75
+
76
+ - Only when the scraping targets change (DOM structure/URLs), or when the Extended Spec shape evolves
77
+ in `@markuplint/ml-spec`.
78
+
79
+ ## See also
80
+
81
+ - `@markuplint/html-spec` README — how to edit the element sources.
82
+ - `@markuplint/ml-spec` README — schema shapes, generation, and spec merging.
package/lib/aria.js CHANGED
@@ -1,4 +1,3 @@
1
- /* global cheerio */
2
1
  import { fetch } from './fetch.js';
3
2
  import { arrayUnique, nameCompare } from './utils.js';
4
3
  export async function getAria() {
@@ -80,7 +79,7 @@ async function getRoles(version, graphicsAria = false) {
80
79
  /\[deprecated in aria 1\.\d]/i) === -1
81
80
  ? undefined
82
81
  : true;
83
- const $features = $el.find('.role-features tr');
82
+ const $features = $el.find('.role-features tr, table.def');
84
83
  const generalization = $features
85
84
  .find('.role-parent a')
86
85
  .toArray()
@@ -193,8 +192,8 @@ async function getProps(version, roles) {
193
192
  .toArray()
194
193
  .map(el => {
195
194
  const href = $(el).prop('href');
196
- const hashIndex = href.indexOf('#');
197
- const hash = hashIndex === -1 ? undefined : href.slice(hashIndex);
195
+ const hashIndex = href?.indexOf('#');
196
+ const hash = hashIndex === -1 ? undefined : href?.slice(hashIndex);
198
197
  return hash?.slice(1);
199
198
  })
200
199
  .filter((s) => !!s));
@@ -203,9 +202,9 @@ async function getProps(version, roles) {
203
202
  const className = $section.attr('class');
204
203
  const type = className && /property/i.test(className) ? 'property' : 'state';
205
204
  const deprecated = (className && /deprecated/i.test(className)) || undefined;
206
- const $value = $section.find(`table.${type}-features .${type}-value, .state-features .property-value`);
205
+ const $value = $section.find(`table .${type}-value, table .property-value, .state-features .property-value`);
207
206
  const value = $value.text().trim();
208
- const $valueDescriptions = $section.find('table.value-descriptions tbody tr');
207
+ const $valueDescriptions = $section.find('table:is(.value-descriptions, .def:has(.value-description)) tbody tr');
209
208
  const valueDescriptions = {};
210
209
  $valueDescriptions.each((_, $tr) => {
211
210
  const name = $($tr)
@@ -213,7 +212,7 @@ async function getProps(version, roles) {
213
212
  .text()
214
213
  .replaceAll(/\(default\)\s*:?/gi, '')
215
214
  .trim();
216
- const desc = $($tr).find('.value-description').text().trim();
215
+ const desc = $($tr).find('.value-description').text().trim().replaceAll(/\s+/g, ' ');
217
216
  valueDescriptions[name] = desc;
218
217
  });
219
218
  const enumValues = [];
@@ -227,7 +226,7 @@ async function getProps(version, roles) {
227
226
  .trim());
228
227
  enumValues.push(...values);
229
228
  }
230
- const $defaultValue = $section.find('table.value-descriptions .value-name .default');
229
+ const $defaultValue = $section.find('table:is(.value-descriptions, .def:has(.value-description)) .value-name .default');
231
230
  const defaultValue = $defaultValue
232
231
  .text()
233
232
  .replaceAll(/\(default\)/gi, '')
package/lib/fetch.d.ts CHANGED
@@ -1,3 +1,4 @@
1
- export declare function fetch(url: string): Promise<cheerio.Root>;
1
+ import * as cheerio from 'cheerio';
2
+ export declare function fetch(url: string): Promise<cheerio.CheerioAPI>;
2
3
  export declare function fetchText(url: string): Promise<string>;
3
4
  export declare function getReferences(): string[];
@@ -50,13 +50,16 @@ export async function getElements(filePattern) {
50
50
  specs.push(...obsoleteElements);
51
51
  specs = await Promise.all(specs.map(async (el) => {
52
52
  const { localName, namespace, ml } = getName(el.name);
53
- const cite = `https://developer.mozilla.org/en-US/docs/Web/${ml}/Element/${localName}`;
53
+ const urlTagName = /^h[1-6]$/i.test(localName) ? 'Heading_Elements' : localName;
54
+ // https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/a
55
+ // https://developer.mozilla.org/en-US/docs/Web/SVG/Reference/Element/a
56
+ const cite = `https://developer.mozilla.org/en-US/docs/Web/${ml}/Reference/Element${ml === 'HTML' ? 's' : ''}/${urlTagName}`;
54
57
  const mdnData = await fetchHTMLElement(cite);
55
58
  // @ts-ignore
56
59
  delete el.name;
57
60
  // @ts-ignore
58
61
  delete el.namespace;
59
- return {
62
+ const spec = {
60
63
  // @ts-ignore
61
64
  name: namespace === 'http://www.w3.org/2000/svg' ? `svg:${localName}` : localName,
62
65
  namespace,
@@ -99,9 +102,9 @@ export async function getElements(filePattern) {
99
102
  return attrs;
100
103
  })()),
101
104
  };
105
+ return spec;
102
106
  }));
103
107
  return specs
104
108
  .sort(nameCompare)
105
- .sort((a, b) => (a.namespace == b.namespace ? 0 : a.namespace === 'http://www.w3.org/2000/svg' ? 1 : -1))
106
- .filter(spec => spec.name !== 'h1-h6');
109
+ .sort((a, b) => (a.namespace == b.namespace ? 0 : a.namespace === 'http://www.w3.org/2000/svg' ? 1 : -1));
107
110
  }
package/lib/scraping.d.ts CHANGED
@@ -1,4 +1,3 @@
1
1
  import type { ExtendedElementSpec } from '@markuplint/ml-spec';
2
- export declare function fetchHTMLElementLinks(): Promise<string[]>;
3
2
  export declare function fetchObsoleteElements(obsoleteList: readonly string[], specs: readonly ExtendedElementSpec[]): ExtendedElementSpec[];
4
3
  export declare function fetchHTMLElement(link: string): Promise<ExtendedElementSpec>;
package/lib/scraping.js CHANGED
@@ -1,19 +1,6 @@
1
- /* global cheerio */
2
1
  import { fetch } from './fetch.js';
3
- import { getThisOutline, sortObjectByKey } from './utils.js';
4
- const MAIN_ARTICLE_SELECTOR = 'article.main-page-content, article.article';
5
- export async function fetchHTMLElementLinks() {
6
- const $ = await fetch('https://developer.mozilla.org/en-US/docs/Web/HTML/Element');
7
- const $listHeading = $($('#sidebar-quicklinks summary')
8
- .toArray()
9
- .find(el => /html elements/i.test($(el).text())));
10
- const $list = $listHeading.siblings('ol,ul');
11
- const lists = $list
12
- .find('li a')
13
- .toArray()
14
- .map(el => `https://developer.mozilla.org${$(el).attr('href')}`);
15
- return lists;
16
- }
2
+ import { sortObjectByKey } from './utils.js';
3
+ const MAIN_ARTICLE_SELECTOR = 'main#content';
17
4
  export function fetchObsoleteElements(obsoleteList, specs) {
18
5
  return obsoleteList
19
6
  .map(name => {
@@ -42,16 +29,13 @@ export function fetchObsoleteElements(obsoleteList, specs) {
42
29
  }
43
30
  export async function fetchHTMLElement(link) {
44
31
  const $ = await fetch(link);
45
- let name = link.replace(/.+\/([\w-]+)$/, '$1').toLowerCase();
46
- if (name === 'heading_elements') {
47
- name = 'h1-h6';
48
- }
32
+ const name = link.replace(/.+\/([\w-]+)$/, '$1').toLowerCase();
49
33
  const $article = $(MAIN_ARTICLE_SELECTOR);
50
- $article.find('p:empty').remove();
51
- const description = $article.find('h2#summary').next('div').find('> p:first-of-type').text().trim().replaceAll(/\s+/g, ' ') ||
52
- $article.find('.seoSummary').closest('p').text().trim().replaceAll(/\s+/g, ' ') ||
53
- $article.find('h1').next('div').find('> p:first-of-type').text().trim().replaceAll(/\s+/g, ' ') ||
54
- $article.find('.section-content:eq(0)').find('> p:eq(0)').text().trim().replaceAll(/\s+/g, ' ');
34
+ const description = $article
35
+ .find('.reference-layout__header .content-section')
36
+ .text()
37
+ .trim()
38
+ .replaceAll(/\s+/g, ' ');
55
39
  const $bcTable = $article.find('.bc-table');
56
40
  const $bcTableFirstRow = $bcTable.find('tbody tr:first-child th');
57
41
  const isBcTableIsAvailable = $bcTableFirstRow.find('code').text().trim() === name;
@@ -99,11 +83,11 @@ export async function fetchHTMLElement(link) {
99
83
  categories.push('#palpable');
100
84
  if (/script-supporting/i.test(cat))
101
85
  categories.push('#script-supporting');
102
- let { attributes } = getAttributes($, '#attributes', name);
103
- const { attributes: deprecatedAttributes } = getAttributes($, '#deprecated_attributes', name);
104
- const { attributes: individualAttributes } = getAttributes($, '#individual_attributes', name);
105
- const { attributes: nonStandardAttributes } = getAttributes($, '#non-standard_attributes', name);
106
- const { attributes: obsoleteAttributes } = getAttributes($, '#obsolete_attributes', name);
86
+ let { attributes } = getAttributes($, 'attributes');
87
+ const { attributes: deprecatedAttributes } = getAttributes($, 'deprecated_attributes');
88
+ const { attributes: individualAttributes } = getAttributes($, 'individual_attributes');
89
+ const { attributes: nonStandardAttributes } = getAttributes($, 'non-standard_attributes');
90
+ const { attributes: obsoleteAttributes } = getAttributes($, 'obsolete_attributes');
107
91
  attributes = sortObjectByKey({
108
92
  ...attributes,
109
93
  ...deprecatedAttributes,
@@ -135,7 +119,7 @@ export async function fetchHTMLElement(link) {
135
119
  function getProperty(
136
120
  // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
137
121
  $, prop) {
138
- const $tr = $(MAIN_ARTICLE_SELECTOR).find('table.properties tr') ?? $('#Technical_summary').next('table tr');
122
+ const $tr = $('#technical_summary ~ figure.table-container > table tr');
139
123
  const $th = $($tr
140
124
  .find('th')
141
125
  .toArray()
@@ -144,11 +128,10 @@ $, prop) {
144
128
  }
145
129
  function getAttributes(
146
130
  // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
147
- $, heading, tagName) {
148
- const $heading = $(heading);
149
- const $outline = getThisOutline($, $heading);
131
+ $, id) {
132
+ const $section = $(`.content-section[aria-labelledby="${id}"]`);
150
133
  const attributes = {};
151
- for (const dt of $outline.find('> div > dl > dt').toArray()) {
134
+ for (const dt of $section.find('> dl > dt').toArray()) {
152
135
  const $dt = $(dt);
153
136
  const name = $dt.find('code').text().trim();
154
137
  if (!name) {
package/lib/utils.d.ts CHANGED
@@ -1,10 +1,12 @@
1
+ import type * as cheerio from 'cheerio';
2
+ import type { AnyNode } from 'domhandler';
1
3
  type HasName = {
2
4
  readonly name: string;
3
5
  };
4
6
  export declare function nameCompare(a: HasName | string, b: HasName | string): 1 | 0 | -1;
5
7
  export declare function sortObjectByKey<T>(o: T): T;
6
8
  export declare function arrayUnique<T extends HasName>(array: readonly T[]): T[];
7
- export declare function getThisOutline($: cheerio.Root, $start: cheerio.Cheerio): cheerio.Cheerio;
9
+ export declare function getThisOutline($: cheerio.CheerioAPI, $start: cheerio.Cheerio<AnyNode>): cheerio.Cheerio<AnyNode>;
8
10
  export declare function mergeAttributes<T>(fromDocs: T, fromJSON: T): T;
9
11
  export declare function keys<T, K = keyof T>(object: T): K[];
10
12
  export declare function getName(origin: string): {
package/lib/utils.js CHANGED
@@ -1,4 +1,3 @@
1
- /* global cheerio */
2
1
  export function nameCompare(a, b) {
3
2
  const nameA = typeof a === 'string' ? a : (a.name?.toUpperCase() ?? String(a));
4
3
  const nameB = typeof b === 'string' ? b : (b.name?.toUpperCase() ?? String(b));
package/package.json CHANGED
@@ -1,11 +1,10 @@
1
1
  {
2
2
  "name": "@markuplint/spec-generator",
3
- "version": "4.6.19",
3
+ "version": "4.8.0",
4
4
  "description": "Generates @markuplint/html-spec",
5
5
  "repository": "git@github.com:markuplint/markuplint.git",
6
6
  "author": "Yusuke Hirao <yusukehirao@me.com>",
7
7
  "license": "MIT",
8
- "private": false,
9
8
  "type": "module",
10
9
  "exports": {
11
10
  ".": {
@@ -19,22 +18,22 @@
19
18
  "scripts": {
20
19
  "build": "tsc --project tsconfig.build.json",
21
20
  "dev": "tsc --watch --project tsconfig.build.json",
22
- "clean": "tsc --build --clean"
21
+ "clean": "tsc --build --clean tsconfig.build.json"
23
22
  },
24
23
  "dependencies": {
25
- "@types/cheerio": "0.22.35",
24
+ "@types/cheerio": "1.0.0",
26
25
  "ajv": "8.17.1",
27
- "cheerio": "1.0.0",
26
+ "cheerio": "1.1.2",
28
27
  "cli-progress": "3.12.0",
29
- "fast-xml-parser": "5.2.0",
30
- "glob": "11.0.1",
31
- "strip-json-comments": "5.0.1"
28
+ "fast-xml-parser": "5.2.5",
29
+ "glob": "11.0.3",
30
+ "strip-json-comments": "5.0.3"
32
31
  },
33
32
  "devDependencies": {
34
- "@markuplint/ml-spec": "4.9.6",
35
- "@markuplint/test-tools": "4.5.19",
33
+ "@markuplint/ml-spec": "4.10.0",
34
+ "@markuplint/test-tools": "4.5.21",
36
35
  "@types/cli-progress": "3.11.6",
37
- "type-fest": "4.39.1"
36
+ "type-fest": "4.41.0"
38
37
  },
39
- "gitHead": "eb36d59f7e13d4e59ff3f3c4eabb5ec06c070eb0"
38
+ "gitHead": "ae97eb2d31ecedf4f0800fbbf18588aad4ebca04"
40
39
  }