@markuplint/spec-generator 4.8.0 → 4.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.ja.md +178 -0
- package/ARCHITECTURE.md +178 -0
- package/CHANGELOG.md +9 -5
- package/README.md +29 -41
- package/SKILL.md +134 -0
- package/docs/maintenance.ja.md +212 -0
- package/docs/maintenance.md +212 -0
- package/docs/modules.ja.md +252 -0
- package/docs/modules.md +252 -0
- package/docs/scraping.ja.md +320 -0
- package/docs/scraping.md +320 -0
- package/lib/aria.d.ts +6 -0
- package/lib/aria.js +45 -0
- package/lib/fetch.d.ts +21 -0
- package/lib/fetch.js +28 -1
- package/lib/global-attrs.d.ts +6 -0
- package/lib/global-attrs.js +6 -0
- package/lib/html-elements.d.ts +8 -0
- package/lib/html-elements.js +31 -9
- package/lib/index.d.ts +22 -0
- package/lib/index.js +15 -0
- package/lib/read-json.d.ts +18 -0
- package/lib/read-json.js +18 -0
- package/lib/scraping.d.ts +15 -0
- package/lib/scraping.js +52 -0
- package/lib/svg.d.ts +7 -0
- package/lib/svg.js +7 -0
- package/lib/utils.d.ts +59 -0
- package/lib/utils.js +56 -0
- package/package.json +7 -7
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
# @markuplint/spec-generator
|
|
2
|
+
|
|
3
|
+
## 概要
|
|
4
|
+
|
|
5
|
+
`@markuplint/spec-generator` は、markuplint の拡張仕様 JSON を生成するビルドツールです。W3C および MDN のウェブ標準ドキュメントをスクレイピングし、HTML/SVG 要素仕様、グローバル属性、ARIA ロール・プロパティ、コンテンツモデル定義を集約して、`@markuplint/html-spec` が消費する単一の `index.json` ファイルに出力します。
|
|
6
|
+
|
|
7
|
+
このパッケージは直接利用するために公開されるものではありません。`@markuplint/html-spec/build.mjs` からのみ呼び出されます。
|
|
8
|
+
|
|
9
|
+
## ディレクトリ構成
|
|
10
|
+
|
|
11
|
+
```
|
|
12
|
+
src/
|
|
13
|
+
├── index.ts — main() オーケストレータおよび公開 API
|
|
14
|
+
├── html-elements.ts — 要素仕様のアセンブリ、非推奨要素リスト
|
|
15
|
+
├── scraping.ts — MDN 要素ページのスクレイピング(説明、カテゴリ、属性)
|
|
16
|
+
├── aria.ts — W3C ARIA 仕様のスクレイピング(ロール、プロパティ、ステート)
|
|
17
|
+
├── global-attrs.ts — グローバル属性定義の読み込み
|
|
18
|
+
├── svg.ts — MDN から SVG 非推奨要素名を取得
|
|
19
|
+
├── fetch.ts — HTTP フェッチ(プロセス内キャッシュ+プログレスバー付き)
|
|
20
|
+
├── read-json.ts — コメント除去付き JSON ファイル読み込み+ glob 対応
|
|
21
|
+
└── utils.ts — 共有ヘルパー関数(ソート、重複排除、名前解析)
|
|
22
|
+
|
|
23
|
+
lib/ — コンパイル出力(`yarn build` で生成)
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## アーキテクチャ図
|
|
27
|
+
|
|
28
|
+
```mermaid
|
|
29
|
+
flowchart TD
|
|
30
|
+
subgraph entry ["エントリーポイント"]
|
|
31
|
+
main["main(options)"]
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
subgraph parallel ["並列データ収集"]
|
|
35
|
+
getElements["getElements()\n(html-elements.ts)"]
|
|
36
|
+
getGlobalAttrs["getGlobalAttrs()\n(global-attrs.ts)"]
|
|
37
|
+
getAria["getAria()\n(aria.ts)"]
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
subgraph scraping ["ウェブスクレイピング"]
|
|
41
|
+
mdnHTML["MDN HTML 要素ページ\n(scraping.ts)"]
|
|
42
|
+
mdnSVG["MDN SVG 要素インデックス\n(svg.ts)"]
|
|
43
|
+
ariaSpecs["W3C ARIA 1.1 / 1.2 / 1.3\n(aria.ts)"]
|
|
44
|
+
graphicsAria["Graphics ARIA\n(aria.ts)"]
|
|
45
|
+
htmlAria["HTML-ARIA マッピング\n(aria.ts)"]
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
subgraph local ["ローカルデータ"]
|
|
49
|
+
specFiles["spec.*.json ファイル\n(read-json.ts)"]
|
|
50
|
+
commonAttrs["spec-common.attributes.json\n(read-json.ts)"]
|
|
51
|
+
commonContents["spec-common.contents.json\n(read-json.ts)"]
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
subgraph output ["出力"]
|
|
55
|
+
extendedSpec["ExtendedSpec JSON\n{ cites, def, specs }"]
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
main --> |"Promise.all"| getElements
|
|
59
|
+
main --> |"Promise.all"| getGlobalAttrs
|
|
60
|
+
main --> |"Promise.all"| getAria
|
|
61
|
+
|
|
62
|
+
getElements --> specFiles
|
|
63
|
+
getElements --> mdnHTML
|
|
64
|
+
getElements --> mdnSVG
|
|
65
|
+
getGlobalAttrs --> commonAttrs
|
|
66
|
+
getAria --> ariaSpecs
|
|
67
|
+
getAria --> graphicsAria
|
|
68
|
+
getAria --> htmlAria
|
|
69
|
+
|
|
70
|
+
getElements --> extendedSpec
|
|
71
|
+
getGlobalAttrs --> extendedSpec
|
|
72
|
+
getAria --> extendedSpec
|
|
73
|
+
main --> commonContents
|
|
74
|
+
main --> extendedSpec
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## モジュール一覧
|
|
78
|
+
|
|
79
|
+
| モジュール | 主要エクスポート | 役割 |
|
|
80
|
+
| ------------------ | --------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------- |
|
|
81
|
+
| `index.ts` | `main()`, `Options` | 全データ収集のオーケストレーション、出力の組み立て、ファイル書き込み |
|
|
82
|
+
| `html-elements.ts` | `getElements()` | 要素仕様ファイルの読み込み、MDN データでの補完、非推奨要素の追加 |
|
|
83
|
+
| `scraping.ts` | `fetchHTMLElement()`, `fetchObsoleteElements()` | MDN 要素ページからメタデータと属性をスクレイピング |
|
|
84
|
+
| `aria.ts` | `getAria()` | W3C ARIA 仕様からロール、プロパティ、ステートをスクレイピング |
|
|
85
|
+
| `global-attrs.ts` | `getGlobalAttrs()` | JSON からグローバル属性定義を読み込み |
|
|
86
|
+
| `svg.ts` | `getSVGElementList()` | MDN から非推奨 SVG 要素名を取得 |
|
|
87
|
+
| `fetch.ts` | `fetch()`, `fetchText()`, `getReferences()` | 2層キャッシュとプログレスバー付き HTTP フェッチ |
|
|
88
|
+
| `read-json.ts` | `readJson()`, `readJsons()` | コメント除去と glob マッチング付き JSON 読み込み |
|
|
89
|
+
| `utils.ts` | `nameCompare()`, `sortObjectByKey()`, `arrayUnique()`, `getName()`, `getThisOutline()`, `mergeAttributes()`, `keys()` | 共有ユーティリティ |
|
|
90
|
+
|
|
91
|
+
## 公開 API
|
|
92
|
+
|
|
93
|
+
パッケージは単一のエントリーポイントをエクスポートします:
|
|
94
|
+
|
|
95
|
+
```typescript
|
|
96
|
+
export type Options = {
|
|
97
|
+
readonly outputFilePath: string; // 生成 JSON の出力先パス
|
|
98
|
+
readonly htmlFilePattern: string; // 要素仕様ファイルの glob パターン
|
|
99
|
+
readonly commonAttrsFilePath: string; // グローバル属性 JSON のパス
|
|
100
|
+
readonly commonContentsFilePath: string; // コンテンツモデル JSON のパス
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
export async function main(options: Options): Promise<void>;
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## データフロー
|
|
107
|
+
|
|
108
|
+
1. **並列データ収集** -- `main()` は `Promise.all` で3つのタスクを同時実行:
|
|
109
|
+
- `getElements(htmlFilePattern)` -- ローカル仕様ファイルを読み込み、各要素について MDN をスクレイピングし、非推奨要素を追加
|
|
110
|
+
- `getGlobalAttrs(commonAttrsFilePath)` -- グローバル属性定義を読み込み
|
|
111
|
+
- `getAria()` -- W3C ARIA 仕様(1.1, 1.2, 1.3)および Graphics ARIA、HTML-ARIA をスクレイピング
|
|
112
|
+
|
|
113
|
+
2. **組み立て** -- 結果を `ExtendedSpec` オブジェクトに統合:
|
|
114
|
+
- `cites` -- フェッチした全 URL のソート済みリスト(`getReferences()` から)
|
|
115
|
+
- `def["#globalAttrs"]` -- グローバル属性カテゴリ
|
|
116
|
+
- `def["#aria"]` -- バージョンごとの ARIA ロール、プロパティ、グラフィックスロール
|
|
117
|
+
- `def["#contentModels"]` -- コンテンツモデルカテゴリ(`readJson()` から)
|
|
118
|
+
- `specs` -- 要素仕様の配列
|
|
119
|
+
|
|
120
|
+
3. **出力** -- 組み立てた JSON を `outputFilePath` に書き込み
|
|
121
|
+
|
|
122
|
+
## 外部依存
|
|
123
|
+
|
|
124
|
+
| パッケージ | 用途 |
|
|
125
|
+
| --------------------- | --------------------------------------------------------- |
|
|
126
|
+
| `cheerio` | スクレイピングしたウェブページの HTML 解析と DOM クエリ |
|
|
127
|
+
| `cli-progress` | フェッチ操作のターミナルプログレスバー |
|
|
128
|
+
| `ajv` | JSON スキーマバリデーション(利用可能だが実行時は未使用) |
|
|
129
|
+
| `fast-xml-parser` | XML パース(利用可能だが現在のモジュールでは未使用) |
|
|
130
|
+
| `glob` | `readJsons()` 用のファイル glob パターンマッチング |
|
|
131
|
+
| `strip-json-comments` | パース前に JSON ファイルからコメントを除去 |
|
|
132
|
+
|
|
133
|
+
**開発依存:**
|
|
134
|
+
|
|
135
|
+
| パッケージ | 用途 |
|
|
136
|
+
| --------------------- | ------------------------------------------------------------------------ |
|
|
137
|
+
| `@markuplint/ml-spec` | 型定義(`ExtendedSpec`, `ExtendedElementSpec`, `ARIARoleInSchema` など) |
|
|
138
|
+
| `type-fest` | `WritableDeep` ユーティリティ型 |
|
|
139
|
+
|
|
140
|
+
## 統合ポイント
|
|
141
|
+
|
|
142
|
+
```mermaid
|
|
143
|
+
flowchart LR
|
|
144
|
+
subgraph upstream ["型プロバイダ"]
|
|
145
|
+
mlSpec["@markuplint/ml-spec\n(型: ExtendedSpec,\nExtendedElementSpec など)"]
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
subgraph pkg ["@markuplint/spec-generator"]
|
|
149
|
+
main["main()"]
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
subgraph consumer ["消費者"]
|
|
153
|
+
htmlSpec["@markuplint/html-spec\n(build.mjs が main() を呼び出し)"]
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
subgraph external ["外部ソース"]
|
|
157
|
+
mdn["MDN Web Docs"]
|
|
158
|
+
w3c["W3C ARIA / HTML-ARIA"]
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
mlSpec -->|"型"| pkg
|
|
162
|
+
external -->|"HTTP スクレイピング"| pkg
|
|
163
|
+
pkg -->|"index.json を生成"| consumer
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### 上流
|
|
167
|
+
|
|
168
|
+
- **`@markuplint/ml-spec`** はこのパッケージで使用される全 TypeScript 型を提供: `ExtendedSpec`, `ExtendedElementSpec`, `ARIARoleInSchema`, `ARIAProperty`, `Category`, `Attribute` など
|
|
169
|
+
|
|
170
|
+
### 下流
|
|
171
|
+
|
|
172
|
+
- **`@markuplint/html-spec`** が唯一の消費者。`build.mjs` がこのパッケージから `main()` をインポートし、仕様ソースと出力先のファイルパスを渡す
|
|
173
|
+
|
|
174
|
+
## ドキュメントマップ
|
|
175
|
+
|
|
176
|
+
- [モジュールリファレンス](docs/modules.ja.md) -- 各ソースモジュールの詳細ドキュメント
|
|
177
|
+
- [スクレイピング詳細](docs/scraping.ja.md) -- スクレイピング対象、セレクタ、エラー処理
|
|
178
|
+
- [メンテナンスガイド](docs/maintenance.ja.md) -- トラブルシューティング、レシピ、デバッグ
|
package/ARCHITECTURE.md
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
# @markuplint/spec-generator
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
`@markuplint/spec-generator` is a build tool that generates the markuplint extended specification JSON. It scrapes W3C and MDN web standards documentation, aggregates HTML/SVG element specs, global attributes, ARIA roles and properties, and content model definitions into a single `index.json` file consumed by `@markuplint/html-spec`.
|
|
6
|
+
|
|
7
|
+
This package is not published for direct use. It is invoked exclusively from `@markuplint/html-spec/build.mjs`.
|
|
8
|
+
|
|
9
|
+
## Directory Structure
|
|
10
|
+
|
|
11
|
+
```
|
|
12
|
+
src/
|
|
13
|
+
├── index.ts — main() orchestrator and public API
|
|
14
|
+
├── html-elements.ts — Element specification assembly, obsolete element list
|
|
15
|
+
├── scraping.ts — MDN element page scraping (descriptions, categories, attributes)
|
|
16
|
+
├── aria.ts — W3C ARIA specification scraping (roles, properties, states)
|
|
17
|
+
├── global-attrs.ts — Global attribute definition loader
|
|
18
|
+
├── svg.ts — SVG deprecated element list fetcher
|
|
19
|
+
├── fetch.ts — HTTP fetch with in-process caching and progress bar
|
|
20
|
+
├── read-json.ts — JSON file reader with comment stripping and glob support
|
|
21
|
+
└── utils.ts — Shared helper functions (sorting, deduplication, name parsing)
|
|
22
|
+
|
|
23
|
+
lib/ — Compiled output (generated by `yarn build`)
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Architecture Diagram
|
|
27
|
+
|
|
28
|
+
```mermaid
|
|
29
|
+
flowchart TD
|
|
30
|
+
subgraph entry ["Entry Point"]
|
|
31
|
+
main["main(options)"]
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
subgraph parallel ["Parallel Data Gathering"]
|
|
35
|
+
getElements["getElements()\n(html-elements.ts)"]
|
|
36
|
+
getGlobalAttrs["getGlobalAttrs()\n(global-attrs.ts)"]
|
|
37
|
+
getAria["getAria()\n(aria.ts)"]
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
subgraph scraping ["Web Scraping"]
|
|
41
|
+
mdnHTML["MDN HTML Element Pages\n(scraping.ts)"]
|
|
42
|
+
mdnSVG["MDN SVG Element Index\n(svg.ts)"]
|
|
43
|
+
ariaSpecs["W3C ARIA 1.1 / 1.2 / 1.3\n(aria.ts)"]
|
|
44
|
+
graphicsAria["Graphics ARIA\n(aria.ts)"]
|
|
45
|
+
htmlAria["HTML-ARIA Mapping\n(aria.ts)"]
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
subgraph local ["Local Data"]
|
|
49
|
+
specFiles["spec.*.json files\n(read-json.ts)"]
|
|
50
|
+
commonAttrs["spec-common.attributes.json\n(read-json.ts)"]
|
|
51
|
+
commonContents["spec-common.contents.json\n(read-json.ts)"]
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
subgraph output ["Output"]
|
|
55
|
+
extendedSpec["ExtendedSpec JSON\n{ cites, def, specs }"]
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
main --> |"Promise.all"| getElements
|
|
59
|
+
main --> |"Promise.all"| getGlobalAttrs
|
|
60
|
+
main --> |"Promise.all"| getAria
|
|
61
|
+
|
|
62
|
+
getElements --> specFiles
|
|
63
|
+
getElements --> mdnHTML
|
|
64
|
+
getElements --> mdnSVG
|
|
65
|
+
getGlobalAttrs --> commonAttrs
|
|
66
|
+
getAria --> ariaSpecs
|
|
67
|
+
getAria --> graphicsAria
|
|
68
|
+
getAria --> htmlAria
|
|
69
|
+
|
|
70
|
+
getElements --> extendedSpec
|
|
71
|
+
getGlobalAttrs --> extendedSpec
|
|
72
|
+
getAria --> extendedSpec
|
|
73
|
+
main --> commonContents
|
|
74
|
+
main --> extendedSpec
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Module Overview
|
|
78
|
+
|
|
79
|
+
| Module | Primary Export(s) | Role |
|
|
80
|
+
| ------------------ | --------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------- |
|
|
81
|
+
| `index.ts` | `main()`, `Options` | Orchestrates all data gathering, assembles output, writes file |
|
|
82
|
+
| `html-elements.ts` | `getElements()` | Reads element spec files, enriches with MDN data, adds obsoletes |
|
|
83
|
+
| `scraping.ts` | `fetchHTMLElement()`, `fetchObsoleteElements()` | Scrapes MDN element pages for metadata and attributes |
|
|
84
|
+
| `aria.ts` | `getAria()` | Scrapes W3C ARIA specs for roles, properties, states |
|
|
85
|
+
| `global-attrs.ts` | `getGlobalAttrs()` | Reads global attribute definitions from JSON |
|
|
86
|
+
| `svg.ts` | `getSVGElementList()` | Fetches deprecated SVG element names from MDN |
|
|
87
|
+
| `fetch.ts` | `fetch()`, `fetchText()`, `getReferences()` | HTTP fetching with dual-layer cache and progress bar |
|
|
88
|
+
| `read-json.ts` | `readJson()`, `readJsons()` | JSON reading with comment stripping and glob matching |
|
|
89
|
+
| `utils.ts` | `nameCompare()`, `sortObjectByKey()`, `arrayUnique()`, `getName()`, `getThisOutline()`, `mergeAttributes()`, `keys()` | Shared utilities |
|
|
90
|
+
|
|
91
|
+
## Public API
|
|
92
|
+
|
|
93
|
+
The package exports a single entry point:
|
|
94
|
+
|
|
95
|
+
```typescript
|
|
96
|
+
export type Options = {
|
|
97
|
+
readonly outputFilePath: string; // Where to write the generated JSON
|
|
98
|
+
readonly htmlFilePattern: string; // Glob pattern for element spec files
|
|
99
|
+
readonly commonAttrsFilePath: string; // Path to global attributes JSON
|
|
100
|
+
readonly commonContentsFilePath: string; // Path to content models JSON
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
export async function main(options: Options): Promise<void>;
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## Data Flow
|
|
107
|
+
|
|
108
|
+
1. **Parallel data gathering** -- `main()` launches three tasks concurrently via `Promise.all`:
|
|
109
|
+
- `getElements(htmlFilePattern)` -- reads local spec files, scrapes MDN for each element, appends obsolete elements
|
|
110
|
+
- `getGlobalAttrs(commonAttrsFilePath)` -- reads global attribute definitions
|
|
111
|
+
- `getAria()` -- scrapes W3C ARIA specs (1.1, 1.2, 1.3) plus Graphics ARIA and HTML-ARIA
|
|
112
|
+
|
|
113
|
+
2. **Assembly** -- The results are combined into an `ExtendedSpec` object:
|
|
114
|
+
- `cites` -- sorted list of all fetched URLs (from `getReferences()`)
|
|
115
|
+
- `def["#globalAttrs"]` -- global attribute categories
|
|
116
|
+
- `def["#aria"]` -- ARIA roles, properties, and graphics roles per version
|
|
117
|
+
- `def["#contentModels"]` -- content model categories (from `readJson()`)
|
|
118
|
+
- `specs` -- element specification array
|
|
119
|
+
|
|
120
|
+
3. **Output** -- The assembled JSON is written to `outputFilePath`
|
|
121
|
+
|
|
122
|
+
## External Dependencies
|
|
123
|
+
|
|
124
|
+
| Package | Purpose |
|
|
125
|
+
| --------------------- | -------------------------------------------------------- |
|
|
126
|
+
| `cheerio` | HTML parsing and DOM querying of scraped web pages |
|
|
127
|
+
| `cli-progress` | Terminal progress bar for fetch operations |
|
|
128
|
+
| `ajv` | JSON Schema validation (available but unused in runtime) |
|
|
129
|
+
| `fast-xml-parser` | XML parsing (available but unused in current modules) |
|
|
130
|
+
| `glob` | File glob pattern matching for `readJsons()` |
|
|
131
|
+
| `strip-json-comments` | Remove comments from JSON files before parsing |
|
|
132
|
+
|
|
133
|
+
**Dev dependencies:**
|
|
134
|
+
|
|
135
|
+
| Package | Purpose |
|
|
136
|
+
| --------------------- | ---------------------------------------------------------------------------------- |
|
|
137
|
+
| `@markuplint/ml-spec` | Type definitions (`ExtendedSpec`, `ExtendedElementSpec`, `ARIARoleInSchema`, etc.) |
|
|
138
|
+
| `type-fest` | `WritableDeep` utility type |
|
|
139
|
+
|
|
140
|
+
## Integration Points
|
|
141
|
+
|
|
142
|
+
```mermaid
|
|
143
|
+
flowchart LR
|
|
144
|
+
subgraph upstream ["Type Provider"]
|
|
145
|
+
mlSpec["@markuplint/ml-spec\n(types: ExtendedSpec,\nExtendedElementSpec, etc.)"]
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
subgraph pkg ["@markuplint/spec-generator"]
|
|
149
|
+
main["main()"]
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
subgraph consumer ["Consumer"]
|
|
153
|
+
htmlSpec["@markuplint/html-spec\n(build.mjs calls main())"]
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
subgraph external ["External Sources"]
|
|
157
|
+
mdn["MDN Web Docs"]
|
|
158
|
+
w3c["W3C ARIA / HTML-ARIA"]
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
mlSpec -->|"types"| pkg
|
|
162
|
+
external -->|"HTTP scraping"| pkg
|
|
163
|
+
pkg -->|"generates index.json"| consumer
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### Upstream
|
|
167
|
+
|
|
168
|
+
- **`@markuplint/ml-spec`** provides all TypeScript types used in this package: `ExtendedSpec`, `ExtendedElementSpec`, `ARIARoleInSchema`, `ARIAProperty`, `Category`, `Attribute`, etc.
|
|
169
|
+
|
|
170
|
+
### Downstream
|
|
171
|
+
|
|
172
|
+
- **`@markuplint/html-spec`** is the sole consumer. Its `build.mjs` imports `main()` from this package and passes file paths for the spec sources and output destination.
|
|
173
|
+
|
|
174
|
+
## Documentation Map
|
|
175
|
+
|
|
176
|
+
- [Module Reference](docs/modules.md) -- Detailed documentation for each source module
|
|
177
|
+
- [Scraping Details](docs/scraping.md) -- Web scraping targets, selectors, and error handling
|
|
178
|
+
- [Maintenance Guide](docs/maintenance.md) -- Troubleshooting, common recipes, and debugging
|
package/CHANGELOG.md
CHANGED
|
@@ -3,21 +3,25 @@
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
See [Conventional Commits](https://conventionalcommits.org) for commit guidelines.
|
|
5
5
|
|
|
6
|
-
|
|
7
|
-
|
|
6
|
+
## [4.8.2](https://github.com/markuplint/markuplint/compare/@markuplint/spec-generator@4.8.1...@markuplint/spec-generator@4.8.2) (2026-02-10)
|
|
8
7
|
|
|
9
8
|
### Bug Fixes
|
|
10
9
|
|
|
11
|
-
|
|
10
|
+
- **spec-generator:** merge MDN data into spec-defined attributes ([ae4db37](https://github.com/markuplint/markuplint/commit/ae4db37b109bac3daed22d8ba0a147acf2d71787))
|
|
12
11
|
|
|
12
|
+
## [4.8.1](https://github.com/markuplint/markuplint/compare/@markuplint/spec-generator@4.8.0...@markuplint/spec-generator@4.8.1) (2025-11-05)
|
|
13
13
|
|
|
14
|
-
|
|
14
|
+
**Note:** Version bump only for package @markuplint/spec-generator
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
# [4.8.0](https://github.com/markuplint/markuplint/compare/@markuplint/spec-generator@4.7.0...@markuplint/spec-generator@4.8.0) (2025-08-24)
|
|
17
17
|
|
|
18
|
+
### Bug Fixes
|
|
18
19
|
|
|
20
|
+
- **spec-generator:** remove debug log ([a00691b](https://github.com/markuplint/markuplint/commit/a00691bc9874ba6a8adb5bf6217e7d6c79959660))
|
|
19
21
|
|
|
22
|
+
### Features
|
|
20
23
|
|
|
24
|
+
- **spec-generator:** update MDN scraping logic ([45889a9](https://github.com/markuplint/markuplint/commit/45889a9a308c48ccd43df6837463d0ecabd547a8))
|
|
21
25
|
|
|
22
26
|
# [4.7.0](https://github.com/markuplint/markuplint/compare/@markuplint/spec-generator@4.6.19...@markuplint/spec-generator@4.7.0) (2025-08-13)
|
|
23
27
|
|
package/README.md
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
Private builder used to generate `@markuplint/html-spec`.
|
|
4
4
|
|
|
5
5
|
It assembles an Extended Spec JSON from the HTML element source files and external references
|
|
6
|
-
(MDN, WAI
|
|
6
|
+
(MDN, WAI-ARIA, HTML-ARIA), then writes `index.json` in `@markuplint/html-spec`.
|
|
7
7
|
|
|
8
8
|
## How it is invoked
|
|
9
9
|
|
|
@@ -25,58 +25,46 @@ You normally don't run this directly; use:
|
|
|
25
25
|
|
|
26
26
|
## What it does
|
|
27
27
|
|
|
28
|
-
1. Read element sources
|
|
28
|
+
1. **Read element sources** -- Load every `src/spec.*.json` and infer the element name from the filename
|
|
29
|
+
2. **Enrich from MDN** -- Fetch MDN element pages for descriptions, categories, and attribute metadata (manual specs take precedence)
|
|
30
|
+
3. **Add obsolete elements** -- Inject HTML obsolete elements and deprecated SVG elements
|
|
31
|
+
4. **Load shared data** -- Read global attributes and content model definitions
|
|
32
|
+
5. **Build ARIA definitions** -- Scrape WAI-ARIA (1.1/1.2/1.3), Graphics-ARIA, and HTML-ARIA
|
|
33
|
+
6. **Emit Extended Spec JSON** -- Write `{ cites, def, specs }` to `index.json`
|
|
29
34
|
|
|
30
|
-
|
|
35
|
+
For detailed architecture and data flow, see [ARCHITECTURE.md](ARCHITECTURE.md).
|
|
31
36
|
|
|
32
|
-
|
|
37
|
+
## Precedence rules
|
|
33
38
|
|
|
34
|
-
-
|
|
35
|
-
- `cite`, `description`, `categories`, `omission`, attribute flags
|
|
36
|
-
- Existing fields in `src/spec.*.json` take precedence over scraped values
|
|
37
|
-
- Attributes are merged name-by-name; manual entries win
|
|
38
|
-
|
|
39
|
-
3. Add obsolete elements
|
|
40
|
-
|
|
41
|
-
- Inject HTML obsolete elements (WHATWG list) and some deprecated SVG elements if not present.
|
|
42
|
-
|
|
43
|
-
4. Load shared data
|
|
44
|
-
|
|
45
|
-
- `def['#globalAttrs']` from `src/spec-common.attributes.json`
|
|
46
|
-
- `def['#contentModels']` from `src/spec-common.contents.json` (`models` key)
|
|
47
|
-
|
|
48
|
-
5. Build ARIA definitions
|
|
49
|
-
|
|
50
|
-
- Scrape WAI‑ARIA (1.1/1.2/1.3) and Graphics‑ARIA, plus HTML‑ARIA cross‑refs, to produce
|
|
51
|
-
`def['#aria']` (roles, properties, synonyms, defaults, and equivalent HTML attrs).
|
|
52
|
-
|
|
53
|
-
6. Emit Extended Spec JSON
|
|
54
|
-
|
|
55
|
-
- `{ cites, def: { #globalAttrs, #aria, #contentModels }, specs: [...] }` → `index.json`
|
|
56
|
-
(Pretty‑printed by the caller)
|
|
57
|
-
|
|
58
|
-
## Source of truth vs. generated data
|
|
59
|
-
|
|
60
|
-
- Source of truth for element specs is in `@markuplint/html-spec/src/`.
|
|
61
|
-
- This generator is purely a build step; do not edit the output `index.json` by hand.
|
|
62
|
-
|
|
63
|
-
## Precedence rules (important)
|
|
64
|
-
|
|
65
|
-
- Manual data in `src/spec.*.json` overrides MDN‑scraped values on conflict.
|
|
39
|
+
- Manual data in `src/spec.*.json` overrides MDN-scraped values on conflict.
|
|
66
40
|
- Attribute objects are merged per name; manual keys win, MDN may fill missing flags.
|
|
67
|
-
- Shared files under `src/spec-common.*.json` are imported as
|
|
41
|
+
- Shared files under `src/spec-common.*.json` are imported as-is.
|
|
68
42
|
|
|
69
43
|
## Network and caching
|
|
70
44
|
|
|
71
|
-
- Uses live HTTP fetch against MDN/W3C specs. There is an in
|
|
72
|
-
- If a fetch fails, the entry may be left empty; re
|
|
45
|
+
- Uses live HTTP fetch against MDN/W3C specs. There is an in-process cache for the current run only.
|
|
46
|
+
- If a fetch fails, the entry may be left empty; re-run later or edit your manual source to cover it.
|
|
73
47
|
|
|
74
48
|
## When to change this package
|
|
75
49
|
|
|
76
50
|
- Only when the scraping targets change (DOM structure/URLs), or when the Extended Spec shape evolves
|
|
77
51
|
in `@markuplint/ml-spec`.
|
|
78
52
|
|
|
53
|
+
## Documentation
|
|
54
|
+
|
|
55
|
+
- [ARCHITECTURE.md](ARCHITECTURE.md) -- Package overview, module structure, data flow
|
|
56
|
+
- [docs/modules.md](docs/modules.md) -- Detailed reference for each source module
|
|
57
|
+
- [docs/scraping.md](docs/scraping.md) -- Web scraping targets, CSS selectors, and error handling
|
|
58
|
+
- [docs/maintenance.md](docs/maintenance.md) -- Troubleshooting, common recipes, and debugging
|
|
59
|
+
|
|
60
|
+
Japanese versions are also available:
|
|
61
|
+
|
|
62
|
+
- [ARCHITECTURE.ja.md](ARCHITECTURE.ja.md)
|
|
63
|
+
- [docs/modules.ja.md](docs/modules.ja.md)
|
|
64
|
+
- [docs/scraping.ja.md](docs/scraping.ja.md)
|
|
65
|
+
- [docs/maintenance.ja.md](docs/maintenance.ja.md)
|
|
66
|
+
|
|
79
67
|
## See also
|
|
80
68
|
|
|
81
|
-
- `@markuplint/html-spec` README
|
|
82
|
-
- `@markuplint/ml-spec` README
|
|
69
|
+
- `@markuplint/html-spec` README -- how to edit the element sources.
|
|
70
|
+
- `@markuplint/ml-spec` README -- schema shapes, generation, and spec merging.
|
package/SKILL.md
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Perform maintenance tasks for @markuplint/spec-generator
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
# spec-generator-maintenance
|
|
6
|
+
|
|
7
|
+
Perform maintenance tasks for `@markuplint/spec-generator`: fix scraping breakages,
|
|
8
|
+
add ARIA versions, update the obsolete element list, and debug scraping issues.
|
|
9
|
+
|
|
10
|
+
## Input
|
|
11
|
+
|
|
12
|
+
`$ARGUMENTS` specifies the task. Supported tasks:
|
|
13
|
+
|
|
14
|
+
| Task | Description |
|
|
15
|
+
| ------------------------ | ----------------------------------------------------- |
|
|
16
|
+
| `fix-scraping <source>` | Fix scraping breakage caused by upstream page changes |
|
|
17
|
+
| `add-aria-version <ver>` | Add support for a new ARIA specification version |
|
|
18
|
+
| `add-obsolete <element>` | Add an element to the obsolete list |
|
|
19
|
+
| `debug-element <name>` | Debug scraping results for a specific element |
|
|
20
|
+
| `update-deps` | Update dependencies and verify compatibility |
|
|
21
|
+
|
|
22
|
+
If omitted, defaults to `fix-scraping`.
|
|
23
|
+
|
|
24
|
+
## Reference
|
|
25
|
+
|
|
26
|
+
Before executing any task, read `docs/maintenance.md` (or `docs/maintenance.ja.md`)
|
|
27
|
+
for the full guide. The recipes there are the source of truth for procedures.
|
|
28
|
+
|
|
29
|
+
Also read:
|
|
30
|
+
|
|
31
|
+
- `docs/scraping.md` -- Scraping targets, CSS selectors, and fragile points
|
|
32
|
+
- `docs/modules.md` -- Module reference for all source files
|
|
33
|
+
- `ARCHITECTURE.md` -- Package overview and data flow
|
|
34
|
+
|
|
35
|
+
## Task: fix-scraping
|
|
36
|
+
|
|
37
|
+
Fix CSS selectors that have broken due to upstream page structure changes.
|
|
38
|
+
|
|
39
|
+
### Step 1: Identify the scope
|
|
40
|
+
|
|
41
|
+
The `<source>` argument specifies which scraping target is affected:
|
|
42
|
+
|
|
43
|
+
| Source | Module | Upstream Site |
|
|
44
|
+
| ----------- | ------------- | ------------------------------------ |
|
|
45
|
+
| `mdn` | `scraping.ts` | MDN Web Docs element reference pages |
|
|
46
|
+
| `aria` | `aria.ts` | W3C ARIA specification pages |
|
|
47
|
+
| `svg` | `svg.ts` | MDN SVG element index page |
|
|
48
|
+
| `html-aria` | `aria.ts` | W3C HTML-ARIA mapping page |
|
|
49
|
+
|
|
50
|
+
### Step 2: Diagnose
|
|
51
|
+
|
|
52
|
+
1. Run `yarn up:gen` and check the `index.json` diff:
|
|
53
|
+
```bash
|
|
54
|
+
git diff packages/@markuplint/html-spec/index.json
|
|
55
|
+
```
|
|
56
|
+
2. Identify which data is missing or incorrect
|
|
57
|
+
3. Open the affected upstream page in a browser
|
|
58
|
+
4. Compare the actual HTML structure with the CSS selectors in the module
|
|
59
|
+
5. Refer to `docs/scraping.md` for the complete selector reference
|
|
60
|
+
|
|
61
|
+
### Step 3: Fix
|
|
62
|
+
|
|
63
|
+
1. Update the CSS selectors in the identified module to match the new page structure
|
|
64
|
+
2. Build: `yarn build --scope @markuplint/spec-generator`
|
|
65
|
+
3. Regenerate: `yarn up:gen`
|
|
66
|
+
4. Verify the `index.json` diff shows correct data restoration
|
|
67
|
+
|
|
68
|
+
### Step 4: Test and commit
|
|
69
|
+
|
|
70
|
+
1. Run `yarn workspace @markuplint/html-spec run test`
|
|
71
|
+
2. Stage and commit both the selector fix and the regenerated `index.json`
|
|
72
|
+
|
|
73
|
+
## Task: add-aria-version
|
|
74
|
+
|
|
75
|
+
Add support for a new ARIA specification version. Follow recipe #2 in `docs/maintenance.md`.
|
|
76
|
+
|
|
77
|
+
1. Read `src/aria.ts` to understand the current version handling
|
|
78
|
+
2. Add the new version URL in `getARIASpecURLByVersion()`
|
|
79
|
+
3. Add the new version data fetching in `getAria()`
|
|
80
|
+
4. **Cross-package:** Update `ARIAVersion` type in `@markuplint/ml-spec`
|
|
81
|
+
5. Build: `yarn build --scope @markuplint/spec-generator`
|
|
82
|
+
6. Regenerate: `yarn up:gen`
|
|
83
|
+
7. Verify the new version's data appears in `index.json`
|
|
84
|
+
8. Run tests: `yarn workspace @markuplint/html-spec run test`
|
|
85
|
+
|
|
86
|
+
## Task: add-obsolete
|
|
87
|
+
|
|
88
|
+
Add an element to the hardcoded obsolete list. Follow recipe #3 in `docs/maintenance.md`.
|
|
89
|
+
|
|
90
|
+
1. Read `src/html-elements.ts`
|
|
91
|
+
2. Add the element name to the `obsoleteList` array
|
|
92
|
+
3. Build: `yarn build --scope @markuplint/spec-generator`
|
|
93
|
+
4. Regenerate: `yarn up:gen`
|
|
94
|
+
5. Verify the element appears in `index.json` with `"obsolete": true`
|
|
95
|
+
6. Run tests: `yarn workspace @markuplint/html-spec run test`
|
|
96
|
+
|
|
97
|
+
## Task: debug-element
|
|
98
|
+
|
|
99
|
+
Debug the scraping results for a specific element to understand what data is being extracted.
|
|
100
|
+
|
|
101
|
+
1. Read `src/scraping.ts` and `src/html-elements.ts`
|
|
102
|
+
2. Construct the MDN URL for the element:
|
|
103
|
+
- HTML: `https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/<name>`
|
|
104
|
+
- SVG: `https://developer.mozilla.org/en-US/docs/Web/SVG/Reference/Element/<name>`
|
|
105
|
+
- Heading elements (`h1`-`h6`): use `Heading_Elements` as the name
|
|
106
|
+
3. Use WebFetch to inspect the MDN page structure
|
|
107
|
+
4. Compare the page structure with the CSS selectors in `scraping.ts`
|
|
108
|
+
5. Report findings: which selectors match, which data is extracted, and any discrepancies
|
|
109
|
+
|
|
110
|
+
## Task: update-deps
|
|
111
|
+
|
|
112
|
+
Update package dependencies and verify compatibility.
|
|
113
|
+
|
|
114
|
+
1. Read `package.json` for current dependency versions
|
|
115
|
+
2. Check for available updates
|
|
116
|
+
3. For `cheerio` updates:
|
|
117
|
+
- Review the changelog for breaking API changes
|
|
118
|
+
- Refer to recipe #5 in `docs/maintenance.md` for the API surface used
|
|
119
|
+
4. For `@markuplint/ml-spec` updates:
|
|
120
|
+
- Check for type changes that affect this package
|
|
121
|
+
- Refer to recipe #4 in `docs/maintenance.md`
|
|
122
|
+
5. Update dependencies
|
|
123
|
+
6. Build: `yarn build --scope @markuplint/spec-generator`
|
|
124
|
+
7. Regenerate: `yarn up:gen`
|
|
125
|
+
8. Run tests: `yarn workspace @markuplint/html-spec run test`
|
|
126
|
+
9. Review `index.json` diff for unexpected changes
|
|
127
|
+
|
|
128
|
+
## Rules
|
|
129
|
+
|
|
130
|
+
1. **Always build after source changes.** Run `yarn build --scope @markuplint/spec-generator` before `yarn up:gen`.
|
|
131
|
+
2. **Always check the index.json diff.** The diff is the primary way to verify scraping correctness.
|
|
132
|
+
3. **Use the actual page structure as the source of truth.** When selectors break, inspect the live page -- do not guess.
|
|
133
|
+
4. **Cross-package changes may be required.** ARIA version additions and type changes often affect `@markuplint/ml-spec`.
|
|
134
|
+
5. **Failed fetches are cached as empty strings.** If a URL fails during `yarn up:gen`, the data will be empty. Re-running will re-fetch.
|