@markuplint/mdx-parser 5.0.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +212 -0
- package/CHANGELOG.md +10 -0
- package/LICENSE +21 -0
- package/README.ja.md +43 -0
- package/README.md +43 -0
- package/lib/index.d.ts +4 -0
- package/lib/index.js +4 -0
- package/lib/parser.d.ts +64 -0
- package/lib/parser.js +260 -0
- package/package.json +40 -0
- package/src/index.spec.ts +923 -0
- package/src/index.ts +4 -0
- package/src/parser.ts +308 -0
- package/tsconfig.build.json +9 -0
- package/tsconfig.build.tsbuildinfo +1 -0
- package/tsconfig.json +14 -0
package/ARCHITECTURE.md
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
# @markuplint/mdx-parser
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
`@markuplint/mdx-parser` parses MDX (`.mdx`) files for markuplint. MDX combines Markdown with JSX, allowing React components, JavaScript expressions, and ES module imports/exports alongside Markdown content. This parser converts both JSX elements and Markdown constructs into markuplint's AST, enabling lint rules to work on MDX documents.
|
|
6
|
+
|
|
7
|
+
## Design Decisions
|
|
8
|
+
|
|
9
|
+
### Why extend `MarkdownAwareParser`?
|
|
10
|
+
|
|
11
|
+
Both plain Markdown and MDX share the same Markdown constructs (headings, links, images, lists, emphasis, etc.). The `MarkdownAwareParser` base class (from `@markuplint/markdown-parser`) provides shared conversion logic for these constructs. The MDX parser adds:
|
|
12
|
+
|
|
13
|
+
1. **JSX element handling** -- `mdxJsxFlowElement`/`mdxJsxTextElement` mapped to `MLASTElement`
|
|
14
|
+
2. **Expression handling** -- `{expr}` mapped to psblock nodes
|
|
15
|
+
3. **ESM handling** -- `import`/`export` mapped to psblock nodes
|
|
16
|
+
4. **JSX-specific options** -- XML-style end tags, booleanish attributes, case-sensitive tag names
|
|
17
|
+
|
|
18
|
+
### MDX JSX vs HTML
|
|
19
|
+
|
|
20
|
+
| Aspect | Markdown HTML | MDX JSX |
|
|
21
|
+
| ----------------- | ------------------------- | ----------------------------------- |
|
|
22
|
+
| Self-closing tags | Optional (`<br>`) | Required (`<br />`) |
|
|
23
|
+
| Comments | `<!-- comment -->` | `{/* comment */}` |
|
|
24
|
+
| Attribute names | HTML (`class`, `for`) | JSX (`className`, `htmlFor`) |
|
|
25
|
+
| Attribute values | Strings only (`data="x"`) | Strings or expressions (`data={x}`) |
|
|
26
|
+
| Components | Not supported | `<MyComponent />` |
|
|
27
|
+
|
|
28
|
+
### Why `remark-parse` + `remark-mdx` (not `@mdx-js/mdx`)?
|
|
29
|
+
|
|
30
|
+
`@mdx-js/mdx` internally wraps `remark-parse` + `remark-mdx` and adds ~24 unnecessary dependencies for JS compilation that markuplint doesn't need. The parse-stage mdast is identical either way.
|
|
31
|
+
|
|
32
|
+
### MDX v2/v3 only (not v1)
|
|
33
|
+
|
|
34
|
+
MDX v1 uses a fundamentally different parser architecture with no shared code. Since v1 is deprecated and the ecosystem has migrated, we target v2/v3 only.
|
|
35
|
+
|
|
36
|
+
### No `@markuplint/mdx-spec` package needed
|
|
37
|
+
|
|
38
|
+
MDX uses React JSX syntax. The existing `@markuplint/react-spec` provides JSX attribute mappings:
|
|
39
|
+
|
|
40
|
+
```json
|
|
41
|
+
{
|
|
42
|
+
"parser": { "\\.mdx$": "@markuplint/mdx-parser" },
|
|
43
|
+
"specs": { "\\.mdx$": "@markuplint/react-spec" }
|
|
44
|
+
}
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## How It Works
|
|
48
|
+
|
|
49
|
+
```
|
|
50
|
+
.mdx source file
|
|
51
|
+
|
|
|
52
|
+
v
|
|
53
|
+
[remark-parse + remark-gfm + remark-mdx + remark-frontmatter]
|
|
54
|
+
|
|
|
55
|
+
v
|
|
56
|
+
[collectDefinitions()] Extract [id]: url definitions
|
|
57
|
+
|
|
|
58
|
+
v
|
|
59
|
+
[flattenMdastChildren()] Unwrap JSX elements from paragraph wrappers
|
|
60
|
+
|
|
|
61
|
+
v
|
|
62
|
+
[nodeize() per mdast node]
|
|
63
|
+
|
|
|
64
|
+
+-- mdxJsxFlowElement --> MLASTElement via parseCodeFragment
|
|
65
|
+
+-- mdxJsxTextElement --> MLASTElement via parseCodeFragment
|
|
66
|
+
+-- mdxFlowExpression --> psblock
|
|
67
|
+
+-- mdxTextExpression --> psblock
|
|
68
|
+
+-- mdxjsEsm --> psblock (import/export)
|
|
69
|
+
+-- heading --> h1-h6 element (via MarkdownAwareParser)
|
|
70
|
+
+-- paragraph --> p element
|
|
71
|
+
+-- emphasis/strong --> em/strong elements
|
|
72
|
+
+-- link/image --> a/img elements with attributes
|
|
73
|
+
+-- list/listItem --> ul/ol > li elements
|
|
74
|
+
+-- blockquote --> blockquote element
|
|
75
|
+
+-- inlineCode/code --> code / pre>code elements
|
|
76
|
+
+-- table/row/cell --> table > tr > th/td elements
|
|
77
|
+
+-- delete (GFM) --> del element
|
|
78
|
+
+-- linkReference --> a element (resolved) or psblock
|
|
79
|
+
+-- imageReference --> img element (resolved) or psblock
|
|
80
|
+
+-- text --> MLASTText
|
|
81
|
+
+-- yaml --> psblock (#ps:yaml)
|
|
82
|
+
+-- html (rare in MDX) --> parseCodeFragment
|
|
83
|
+
v
|
|
84
|
+
MLASTDocument
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### Class Hierarchy
|
|
88
|
+
|
|
89
|
+
```
|
|
90
|
+
Parser<MdastNode> (from @markuplint/parser-utils)
|
|
91
|
+
|
|
|
92
|
+
MarkdownAwareParser (from @markuplint/markdown-parser)
|
|
93
|
+
| - Shared Markdown-to-HTML conversion
|
|
94
|
+
|
|
|
95
|
+
MDXParser (this package)
|
|
96
|
+
| - tokenize(): remark-parse + remark-gfm + remark-mdx
|
|
97
|
+
| - nodeize(): MDX types first, then Markdown delegation
|
|
98
|
+
| - #visitJsxElement(): JSX -> MLASTElement
|
|
99
|
+
| - visitAttr(): JSX attribute handling (quoteSet, dynamic value)
|
|
100
|
+
| - detectElementType(): uppercase/dot = authored
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### Paragraph Flattening
|
|
104
|
+
|
|
105
|
+
MDX's remark-mdx wraps inline JSX elements in `paragraph` nodes:
|
|
106
|
+
|
|
107
|
+
```mdx
|
|
108
|
+
Text with <Badge>inline</Badge> component.
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
produces a `paragraph` containing `text`, `mdxJsxTextElement`, `text`. The `flattenMdastChildren()` function detects paragraphs containing JSX and unwraps their children to the top level, ensuring JSX elements are directly accessible.
|
|
112
|
+
|
|
113
|
+
Pure Markdown paragraphs (no JSX) are kept as-is and converted to `<p>` elements.
|
|
114
|
+
|
|
115
|
+
### JSX Element Handling
|
|
116
|
+
|
|
117
|
+
JSX elements are parsed via `parseCodeFragment()` with `namelessFragment: true`:
|
|
118
|
+
|
|
119
|
+
1. **Self-closing** (`<Component prop="val" />`): entire token is the start tag
|
|
120
|
+
2. **With children** (`<Card>...</Card>`): split into start tag token + children + end tag token
|
|
121
|
+
3. **Fragments** (`<>...</>`): produce `#jsx-fragment` nodes
|
|
122
|
+
|
|
123
|
+
The start tag is parsed by `parseCodeFragment` to extract attributes, then `visitElement()` wires up children.
|
|
124
|
+
|
|
125
|
+
### JSX Attribute Handling
|
|
126
|
+
|
|
127
|
+
| Attribute Form | Example | Mapping |
|
|
128
|
+
| ------------------ | -------------------------- | ------------------------ |
|
|
129
|
+
| String value | `name="email"` | Standard attribute |
|
|
130
|
+
| Expression value | `data={value}` | `isDynamicValue: true` |
|
|
131
|
+
| Object expression | `style={{ color: "red" }}` | `isDynamicValue: true` |
|
|
132
|
+
| Boolean (no value) | `disabled` | Empty value (booleanish) |
|
|
133
|
+
| Spread | `{...props}` | `type: 'spread'` |
|
|
134
|
+
|
|
135
|
+
IDL attribute mapping (`className` -> `class`, `htmlFor` -> `for`) is handled at the core level by `MLAttr` when paired with `@markuplint/react-spec` (which sets `useIDLAttributeNames: true`).
|
|
136
|
+
|
|
137
|
+
### Component Detection
|
|
138
|
+
|
|
139
|
+
| Tag | `detectElementType()` | Example |
|
|
140
|
+
| ----------------- | --------------------- | --------------------------- |
|
|
141
|
+
| `<div>` | `html` | Native HTML element |
|
|
142
|
+
| `<MyComponent>` | `authored` | React component |
|
|
143
|
+
| `<Layout.Header>` | `authored` | Member expression component |
|
|
144
|
+
| `<x-widget>` | `web-component` | Custom element |
|
|
145
|
+
|
|
146
|
+
## Architecture Diagram
|
|
147
|
+
|
|
148
|
+
```mermaid
|
|
149
|
+
flowchart TD
|
|
150
|
+
subgraph upstream ["Upstream"]
|
|
151
|
+
parserUtils["@markuplint/parser-utils\n(Parser)"]
|
|
152
|
+
mdParser["@markuplint/markdown-parser\n(MarkdownAwareParser)"]
|
|
153
|
+
remark["remark-parse + remark-gfm\n(Markdown + GFM)"]
|
|
154
|
+
remarkMdx["remark-mdx\n(MDX syntax extensions)"]
|
|
155
|
+
remarkFm["remark-frontmatter\n(Front matter support)"]
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
subgraph pkg ["@markuplint/mdx-parser"]
|
|
159
|
+
mdxParser["MDXParser\nextends MarkdownAwareParser"]
|
|
160
|
+
flatten["flattenMdastChildren()\nparagraph unwrapping"]
|
|
161
|
+
jsxVisit["#visitJsxElement()\nJSX -> MLASTElement"]
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
subgraph downstream ["Downstream"]
|
|
165
|
+
mlCore["@markuplint/ml-core\n(MLASTDocument -> MLDOM)"]
|
|
166
|
+
reactSpec["@markuplint/react-spec\n(JSX attribute specs)"]
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
parserUtils -->|"base utilities"| mdParser
|
|
170
|
+
mdParser -->|"MarkdownAwareParser"| mdxParser
|
|
171
|
+
remark -->|"mdast AST"| mdxParser
|
|
172
|
+
remarkMdx -->|"MDX node types"| mdxParser
|
|
173
|
+
remarkFm -->|"front matter"| mdxParser
|
|
174
|
+
mdxParser --> flatten --> jsxVisit
|
|
175
|
+
mdxParser -->|"MLASTDocument"| mlCore
|
|
176
|
+
reactSpec -->|"JSX attribute mapping"| mlCore
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
## External Dependencies
|
|
180
|
+
|
|
181
|
+
| Dependency | Purpose |
|
|
182
|
+
| ----------------------------- | --------------------------------------------- |
|
|
183
|
+
| `@markuplint/markdown-parser` | MarkdownAwareParser base class |
|
|
184
|
+
| `@markuplint/ml-ast` | AST type definitions |
|
|
185
|
+
| `@markuplint/parser-utils` | Parser utilities |
|
|
186
|
+
| `unified` | Processor pipeline for remark |
|
|
187
|
+
| `remark-parse` | Markdown -> mdast parser |
|
|
188
|
+
| `remark-gfm` | GFM extensions (tables, strikethrough) |
|
|
189
|
+
| `remark-mdx` | MDX syntax extensions (JSX, expressions, ESM) |
|
|
190
|
+
| `remark-frontmatter` | Front matter (YAML) support |
|
|
191
|
+
|
|
192
|
+
## Directory Structure
|
|
193
|
+
|
|
194
|
+
```
|
|
195
|
+
src/
|
|
196
|
+
+-- index.ts -- Re-exports parser instance
|
|
197
|
+
+-- parser.ts -- MDXParser class, flattenMdastChildren helper
|
|
198
|
+
+-- index.spec.ts -- Unit and integration tests
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
## Relationship to @markuplint/markdown-parser
|
|
202
|
+
|
|
203
|
+
Both parsers share `MarkdownAwareParser` as a common base class:
|
|
204
|
+
|
|
205
|
+
| Aspect | markdown-parser (.md) | mdx-parser (.mdx) |
|
|
206
|
+
| --------------- | -------------------------- | ---------------------------------- |
|
|
207
|
+
| Base class | MarkdownAwareParser | MarkdownAwareParser |
|
|
208
|
+
| HTML regions | HtmlParser re-parse | parseCodeFragment (XML-style) |
|
|
209
|
+
| JSX support | No | Yes (components, expressions, ESM) |
|
|
210
|
+
| Attribute style | HTML (`class`, `for`) | JSX (`className`, `htmlFor`) |
|
|
211
|
+
| Tag closing | HTML rules (void elements) | XML rules (self-closing required) |
|
|
212
|
+
| Spec package | None needed | `@markuplint/react-spec` |
|
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# Change Log
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
See [Conventional Commits](https://conventionalcommits.org) for commit guidelines.
|
|
5
|
+
|
|
6
|
+
# [5.0.0-alpha.0](https://github.com/markuplint/markuplint/compare/v4.14.1...v5.0.0-alpha.0) (2026-02-20)
|
|
7
|
+
|
|
8
|
+
### Features
|
|
9
|
+
|
|
10
|
+
- **mdx-parser:** add MDX parser for markuplint ([a097250](https://github.com/markuplint/markuplint/commit/a0972504aac1317cda5b2e6f0b2f3a1d7bc578fd))
|
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2017-2024 Yusuke Hirao
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.ja.md
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# @markuplint/mdx-parser
|
|
2
|
+
|
|
3
|
+
[](https://www.npmjs.com/package/@markuplint/mdx-parser)
|
|
4
|
+
|
|
5
|
+
[**MDX**](https://mdxjs.com/) ファイルで **markuplint** を使用するためのパーサーです。
|
|
6
|
+
|
|
7
|
+
JSX 要素、式、import/export を標準的な Markdown コンテンツと併せてパースします。Markdown 構文は対応する HTML 要素に変換され、markuplint のルールで解析できるようになります。[GFM](https://github.github.com/gfm/)(テーブル、取り消し線、オートリンク)および YAML フロントマターをサポートしています。
|
|
8
|
+
|
|
9
|
+
## インストール
|
|
10
|
+
|
|
11
|
+
```shell
|
|
12
|
+
$ npm install -D @markuplint/mdx-parser
|
|
13
|
+
|
|
14
|
+
$ yarn add -D @markuplint/mdx-parser
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## 使い方
|
|
18
|
+
|
|
19
|
+
[設定ファイル](https://markuplint.dev/configuration/#properties/parser)に `parser` と `specs` オプションを追加してください。
|
|
20
|
+
|
|
21
|
+
```json
|
|
22
|
+
{
|
|
23
|
+
"parser": {
|
|
24
|
+
".mdx$": "@markuplint/mdx-parser"
|
|
25
|
+
},
|
|
26
|
+
"specs": {
|
|
27
|
+
".mdx$": "@markuplint/react-spec"
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## 機能
|
|
33
|
+
|
|
34
|
+
- **JSX 要素**: 自己閉じ(`<Badge />`)とコンテナ(`<Card>...</Card>`)の両方のコンポーネントに対応
|
|
35
|
+
- **IDL 属性変換**: React スタイルの属性(例: `className`、`htmlFor`)を対応する HTML 属性に変換
|
|
36
|
+
- **式**: `{variable}` や `{condition ? a : b}` を動的な値として扱う
|
|
37
|
+
- **ESM import/export**: `import` 文と `export` 文をブロックとして認識
|
|
38
|
+
- **JSX 内の Markdown**: JSX コンテナ内の Markdown コンテンツを再帰的にパース
|
|
39
|
+
|
|
40
|
+
## 既知の制限事項
|
|
41
|
+
|
|
42
|
+
- **MDX v2/v3 のみ**: MDX v1 構文はサポートされていません。
|
|
43
|
+
- **合成された属性位置**: Markdown 構文から導出された属性(例: `[テキスト](url)` の `href`)は、属性値の正確な文字位置ではなく、Markdown 構文全体のソース位置を共有します。
|
package/README.md
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# @markuplint/mdx-parser
|
|
2
|
+
|
|
3
|
+
[](https://www.npmjs.com/package/@markuplint/mdx-parser)
|
|
4
|
+
|
|
5
|
+
Use **markuplint** with [**MDX**](https://mdxjs.com/) files.
|
|
6
|
+
|
|
7
|
+
Parses JSX elements, expressions, and imports alongside standard Markdown content. Markdown constructs are converted to their corresponding HTML elements so that markuplint rules can analyze them. Supports [GFM](https://github.github.com/gfm/) (tables, strikethrough, autolinks) and YAML frontmatter.
|
|
8
|
+
|
|
9
|
+
## Install
|
|
10
|
+
|
|
11
|
+
```shell
|
|
12
|
+
$ npm install -D @markuplint/mdx-parser
|
|
13
|
+
|
|
14
|
+
$ yarn add -D @markuplint/mdx-parser
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Usage
|
|
18
|
+
|
|
19
|
+
Add `parser` and `specs` options to your [configuration](https://markuplint.dev/configuration/#properties/parser).
|
|
20
|
+
|
|
21
|
+
```json
|
|
22
|
+
{
|
|
23
|
+
"parser": {
|
|
24
|
+
".mdx$": "@markuplint/mdx-parser"
|
|
25
|
+
},
|
|
26
|
+
"specs": {
|
|
27
|
+
".mdx$": "@markuplint/react-spec"
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Features
|
|
33
|
+
|
|
34
|
+
- **JSX elements**: Both self-closing (`<Badge />`) and container (`<Card>...</Card>`) components
|
|
35
|
+
- **IDL attribute conversion**: When paired with `@markuplint/react-spec`, React-style attributes (e.g., `className`, `htmlFor`) are mapped to their HTML equivalents
|
|
36
|
+
- **Expressions**: `{variable}` and `{condition ? a : b}` are treated as dynamic values
|
|
37
|
+
- **ESM imports/exports**: `import` and `export` statements are recognized as blocks
|
|
38
|
+
- **Markdown inside JSX**: Markdown content within JSX containers is recursively parsed
|
|
39
|
+
|
|
40
|
+
## Known Limitations
|
|
41
|
+
|
|
42
|
+
- **MDX v2/v3 only**: MDX v1 syntax is not supported.
|
|
43
|
+
- **Synthesized attribute positions**: Attributes derived from Markdown syntax (e.g., `href` from `[text](url)`) share the source position of the enclosing Markdown construct rather than pointing to the exact character range of the attribute value.
|
package/lib/index.d.ts
ADDED
package/lib/index.js
ADDED
package/lib/parser.d.ts
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import type { MLASTNodeTreeItem, MLASTParentNode } from '@markuplint/ml-ast';
|
|
2
|
+
import type { Token } from '@markuplint/parser-utils';
|
|
3
|
+
import type { RootContent } from 'mdast';
|
|
4
|
+
import { MarkdownAwareParser } from '@markuplint/markdown-parser';
|
|
5
|
+
type MdastNode = RootContent;
|
|
6
|
+
/**
|
|
7
|
+
* Parser for MDX files that maps JSX elements to markuplint's AST.
|
|
8
|
+
*
|
|
9
|
+
* Uses remark-parse + remark-mdx to produce an MDX-extended mdast,
|
|
10
|
+
* then converts JSX elements to MLASTElement nodes and delegates
|
|
11
|
+
* Markdown content to the shared MarkdownAwareParser base class.
|
|
12
|
+
*/
|
|
13
|
+
declare class MDXParser extends MarkdownAwareParser {
|
|
14
|
+
#private;
|
|
15
|
+
constructor();
|
|
16
|
+
/**
|
|
17
|
+
* Tokenizes the raw MDX source into an mdast tree.
|
|
18
|
+
*
|
|
19
|
+
* Resets parser state, parses via remark (with GFM, MDX, and frontmatter
|
|
20
|
+
* plugins), collects link/image reference definitions, and flattens
|
|
21
|
+
* JSX elements out of paragraph wrappers.
|
|
22
|
+
*
|
|
23
|
+
* @returns The flattened mdast children and fragment flag.
|
|
24
|
+
*/
|
|
25
|
+
tokenize(): {
|
|
26
|
+
ast: RootContent[];
|
|
27
|
+
isFragment: boolean;
|
|
28
|
+
};
|
|
29
|
+
/**
|
|
30
|
+
* Converts a single mdast node into markuplint AST nodes.
|
|
31
|
+
*
|
|
32
|
+
* Handles MDX-specific nodes (JSX elements, expressions, ESM imports/exports)
|
|
33
|
+
* first, then delegates to the shared `nodeizeMarkdownNode()` for common
|
|
34
|
+
* Markdown constructs.
|
|
35
|
+
*
|
|
36
|
+
* @param originNode - The mdast node to convert.
|
|
37
|
+
* @param parentNode - Parent AST node, or `null` for top-level.
|
|
38
|
+
* @param depth - Current nesting depth.
|
|
39
|
+
* @returns An array of markuplint AST nodes.
|
|
40
|
+
*/
|
|
41
|
+
nodeize(originNode: MdastNode, parentNode: MLASTParentNode | null, depth: number): readonly MLASTNodeTreeItem[];
|
|
42
|
+
/**
|
|
43
|
+
* Processes a JSX attribute token with dynamic value detection.
|
|
44
|
+
* IDL attribute name mapping is now handled declaratively by the spec's
|
|
45
|
+
* useIDLAttributeNames and ml-core's attr resolution.
|
|
46
|
+
*
|
|
47
|
+
* @param token - The raw attribute token from the source.
|
|
48
|
+
* @returns The processed attribute node.
|
|
49
|
+
*/
|
|
50
|
+
visitAttr(token: Token): (import("@markuplint/ml-ast").MLASTHTMLAttr & {
|
|
51
|
+
__rightText?: string;
|
|
52
|
+
}) | (import("@markuplint/ml-ast").MLASTSpreadAttr & {
|
|
53
|
+
__rightText?: string;
|
|
54
|
+
});
|
|
55
|
+
/**
|
|
56
|
+
* MDX components use uppercase or dot notation following React's convention.
|
|
57
|
+
*
|
|
58
|
+
* @param nodeName - The element name to classify.
|
|
59
|
+
* @returns The element type (`'html'` or `'authored'`).
|
|
60
|
+
*/
|
|
61
|
+
detectElementType(nodeName: string): import("@markuplint/ml-ast").ElementType;
|
|
62
|
+
}
|
|
63
|
+
export declare const parser: MDXParser;
|
|
64
|
+
export {};
|
package/lib/parser.js
ADDED
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
import { MarkdownAwareParser } from '@markuplint/markdown-parser';
|
|
2
|
+
import remarkFrontmatter from 'remark-frontmatter';
|
|
3
|
+
import remarkGfm from 'remark-gfm';
|
|
4
|
+
import remarkMdx from 'remark-mdx';
|
|
5
|
+
import remarkParse from 'remark-parse';
|
|
6
|
+
import { unified } from 'unified';
|
|
7
|
+
/**
|
|
8
|
+
* Parser for MDX files that maps JSX elements to markuplint's AST.
|
|
9
|
+
*
|
|
10
|
+
* Uses remark-parse + remark-mdx to produce an MDX-extended mdast,
|
|
11
|
+
* then converts JSX elements to MLASTElement nodes and delegates
|
|
12
|
+
* Markdown content to the shared MarkdownAwareParser base class.
|
|
13
|
+
*/
|
|
14
|
+
class MDXParser extends MarkdownAwareParser {
|
|
15
|
+
constructor() {
|
|
16
|
+
super({
|
|
17
|
+
endTagType: 'xml',
|
|
18
|
+
booleanish: true,
|
|
19
|
+
tagNameCaseSensitive: true,
|
|
20
|
+
});
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Tokenizes the raw MDX source into an mdast tree.
|
|
24
|
+
*
|
|
25
|
+
* Resets parser state, parses via remark (with GFM, MDX, and frontmatter
|
|
26
|
+
* plugins), collects link/image reference definitions, and flattens
|
|
27
|
+
* JSX elements out of paragraph wrappers.
|
|
28
|
+
*
|
|
29
|
+
* @returns The flattened mdast children and fragment flag.
|
|
30
|
+
*/
|
|
31
|
+
tokenize() {
|
|
32
|
+
this.resetMarkdownState();
|
|
33
|
+
const processor = unified().use(remarkParse).use(remarkGfm).use(remarkMdx).use(remarkFrontmatter, ['yaml']);
|
|
34
|
+
const mdast = processor.parse(this.rawCode);
|
|
35
|
+
this.collectDefinitions(mdast.children);
|
|
36
|
+
return {
|
|
37
|
+
ast: flattenMdastChildren(mdast.children),
|
|
38
|
+
isFragment: true,
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Converts a single mdast node into markuplint AST nodes.
|
|
43
|
+
*
|
|
44
|
+
* Handles MDX-specific nodes (JSX elements, expressions, ESM imports/exports)
|
|
45
|
+
* first, then delegates to the shared `nodeizeMarkdownNode()` for common
|
|
46
|
+
* Markdown constructs.
|
|
47
|
+
*
|
|
48
|
+
* @param originNode - The mdast node to convert.
|
|
49
|
+
* @param parentNode - Parent AST node, or `null` for top-level.
|
|
50
|
+
* @param depth - Current nesting depth.
|
|
51
|
+
* @returns An array of markuplint AST nodes.
|
|
52
|
+
*/
|
|
53
|
+
nodeize(
|
|
54
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
|
|
55
|
+
originNode, parentNode, depth) {
|
|
56
|
+
const position = originNode.position;
|
|
57
|
+
if (!position) {
|
|
58
|
+
return [];
|
|
59
|
+
}
|
|
60
|
+
const offset = position.start.offset ?? 0;
|
|
61
|
+
const endOffset = position.end.offset ?? offset;
|
|
62
|
+
const token = this.sliceFragment(offset, endOffset);
|
|
63
|
+
// MDX-specific node types first
|
|
64
|
+
switch (originNode.type) {
|
|
65
|
+
case 'mdxJsxFlowElement':
|
|
66
|
+
case 'mdxJsxTextElement': {
|
|
67
|
+
return this.#visitJsxElement(originNode, token, depth, parentNode);
|
|
68
|
+
}
|
|
69
|
+
case 'mdxFlowExpression':
|
|
70
|
+
case 'mdxTextExpression': {
|
|
71
|
+
return this.visitPsBlock({
|
|
72
|
+
...token,
|
|
73
|
+
depth,
|
|
74
|
+
parentNode,
|
|
75
|
+
nodeName: originNode.type,
|
|
76
|
+
isFragment: false,
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
case 'mdxjsEsm': {
|
|
80
|
+
return this.visitPsBlock({
|
|
81
|
+
...token,
|
|
82
|
+
depth,
|
|
83
|
+
parentNode,
|
|
84
|
+
nodeName: 'mdxjsEsm',
|
|
85
|
+
isFragment: false,
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
// Try common Markdown node handling
|
|
90
|
+
const result = this.nodeizeMarkdownNode(originNode, token, offset, endOffset, depth, parentNode);
|
|
91
|
+
if (result !== null) {
|
|
92
|
+
return result;
|
|
93
|
+
}
|
|
94
|
+
switch (originNode.type) {
|
|
95
|
+
case 'text': {
|
|
96
|
+
return this.visitText({
|
|
97
|
+
...token,
|
|
98
|
+
depth,
|
|
99
|
+
parentNode,
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
case 'html': {
|
|
103
|
+
return this.parseCodeFragment({
|
|
104
|
+
...token,
|
|
105
|
+
depth,
|
|
106
|
+
parentNode,
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
default: {
|
|
110
|
+
// Unhandled node types fallback to psblock
|
|
111
|
+
return this.visitPsBlock({
|
|
112
|
+
...token,
|
|
113
|
+
depth,
|
|
114
|
+
parentNode,
|
|
115
|
+
nodeName: originNode.type,
|
|
116
|
+
isFragment: false,
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Processes a JSX attribute token with dynamic value detection.
|
|
123
|
+
* IDL attribute name mapping is now handled declaratively by the spec's
|
|
124
|
+
* useIDLAttributeNames and ml-core's attr resolution.
|
|
125
|
+
*
|
|
126
|
+
* @param token - The raw attribute token from the source.
|
|
127
|
+
* @returns The processed attribute node.
|
|
128
|
+
*/
|
|
129
|
+
visitAttr(token) {
|
|
130
|
+
const attr = super.visitAttr(token, {
|
|
131
|
+
quoteSet: [
|
|
132
|
+
{ start: '"', end: '"', type: 'string' },
|
|
133
|
+
{ start: "'", end: "'", type: 'string' },
|
|
134
|
+
{ start: '{', end: '}', type: 'script' },
|
|
135
|
+
],
|
|
136
|
+
});
|
|
137
|
+
if (attr.type === 'spread') {
|
|
138
|
+
return attr;
|
|
139
|
+
}
|
|
140
|
+
if (attr.startQuote.raw === '{' && attr.endQuote.raw === '}') {
|
|
141
|
+
this.updateAttr(attr, {
|
|
142
|
+
isDynamicValue: true,
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
return attr;
|
|
146
|
+
}
|
|
147
|
+
/**
|
|
148
|
+
* MDX components use uppercase or dot notation following React's convention.
|
|
149
|
+
*
|
|
150
|
+
* @param nodeName - The element name to classify.
|
|
151
|
+
* @returns The element type (`'html'` or `'authored'`).
|
|
152
|
+
*/
|
|
153
|
+
detectElementType(nodeName) {
|
|
154
|
+
return super.detectElementType(nodeName, /^[A-Z]|\./);
|
|
155
|
+
}
|
|
156
|
+
/**
|
|
157
|
+
* Visits a JSX element from the mdast, computing start tag and end tag
|
|
158
|
+
* positions from the element's children and delegating to visitElement.
|
|
159
|
+
*/
|
|
160
|
+
#visitJsxElement(
|
|
161
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
|
|
162
|
+
originNode, token, depth, parentNode) {
|
|
163
|
+
const children = originNode.children;
|
|
164
|
+
const isSelfClosing = token.raw.trimEnd().endsWith('/>');
|
|
165
|
+
if (isSelfClosing || children.length === 0) {
|
|
166
|
+
// Self-closing or empty element: the whole token is the start tag
|
|
167
|
+
const parsedNodes = this.parseCodeFragment({
|
|
168
|
+
...token,
|
|
169
|
+
depth,
|
|
170
|
+
parentNode,
|
|
171
|
+
}, { namelessFragment: true });
|
|
172
|
+
const startTag = parsedNodes.at(0);
|
|
173
|
+
if (!startTag || startTag.type !== 'starttag') {
|
|
174
|
+
return this.visitPsBlock({
|
|
175
|
+
...token,
|
|
176
|
+
depth,
|
|
177
|
+
parentNode,
|
|
178
|
+
nodeName: originNode.type,
|
|
179
|
+
isFragment: false,
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
return super.visitElement(startTag, [], {
|
|
183
|
+
namelessFragment: true,
|
|
184
|
+
createEndTagToken: () => null,
|
|
185
|
+
});
|
|
186
|
+
}
|
|
187
|
+
// Element with children: split into start tag + children + end tag
|
|
188
|
+
const firstChild = children[0];
|
|
189
|
+
const lastChild = children.at(-1);
|
|
190
|
+
const firstChildOffset = firstChild?.position?.start.offset ?? token.offset + token.raw.length;
|
|
191
|
+
const lastChildEndOffset = lastChild?.position?.end.offset ?? firstChildOffset;
|
|
192
|
+
const elementEndOffset = originNode.position?.end.offset ?? 0;
|
|
193
|
+
const startTagToken = this.sliceFragment(token.offset, firstChildOffset);
|
|
194
|
+
const parsedNodes = this.parseCodeFragment({
|
|
195
|
+
...startTagToken,
|
|
196
|
+
depth,
|
|
197
|
+
parentNode,
|
|
198
|
+
}, { namelessFragment: true });
|
|
199
|
+
const startTag = parsedNodes.at(0);
|
|
200
|
+
if (!startTag || startTag.type !== 'starttag') {
|
|
201
|
+
return this.visitPsBlock({
|
|
202
|
+
...token,
|
|
203
|
+
depth,
|
|
204
|
+
parentNode,
|
|
205
|
+
nodeName: originNode.type,
|
|
206
|
+
isFragment: false,
|
|
207
|
+
});
|
|
208
|
+
}
|
|
209
|
+
return super.visitElement(startTag, [...children], {
|
|
210
|
+
namelessFragment: true,
|
|
211
|
+
createEndTagToken: () => {
|
|
212
|
+
if (lastChildEndOffset >= elementEndOffset) {
|
|
213
|
+
return null;
|
|
214
|
+
}
|
|
215
|
+
const endTagToken = this.sliceFragment(lastChildEndOffset, elementEndOffset);
|
|
216
|
+
if (!endTagToken.raw.trim()) {
|
|
217
|
+
return null;
|
|
218
|
+
}
|
|
219
|
+
return {
|
|
220
|
+
...endTagToken,
|
|
221
|
+
depth,
|
|
222
|
+
parentNode,
|
|
223
|
+
};
|
|
224
|
+
},
|
|
225
|
+
});
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
/**
|
|
229
|
+
* Flattens top-level mdast children by extracting JSX elements
|
|
230
|
+
* from paragraph wrappers so they appear at the top level.
|
|
231
|
+
*
|
|
232
|
+
* Only inspects direct children of root; does not recurse into nested nodes.
|
|
233
|
+
* Position information of unwrapped children is preserved from the original mdast.
|
|
234
|
+
*
|
|
235
|
+
* @param children - The root-level mdast children to process.
|
|
236
|
+
* @returns A new array with JSX-containing paragraphs unwrapped.
|
|
237
|
+
*/
|
|
238
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
|
|
239
|
+
function flattenMdastChildren(children) {
|
|
240
|
+
const result = [];
|
|
241
|
+
for (const child of children) {
|
|
242
|
+
if (child.type === 'paragraph' && 'children' in child) {
|
|
243
|
+
const pgChildren = child.children;
|
|
244
|
+
const hasJsx = pgChildren.some(c => c.type === 'mdxJsxTextElement' || c.type === 'mdxTextExpression');
|
|
245
|
+
if (hasJsx) {
|
|
246
|
+
// JSX in paragraph: unwrap all children to top level
|
|
247
|
+
result.push(...pgChildren);
|
|
248
|
+
}
|
|
249
|
+
else {
|
|
250
|
+
// Pure markdown paragraph: keep as-is
|
|
251
|
+
result.push(child);
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
else {
|
|
255
|
+
result.push(child);
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
return result;
|
|
259
|
+
}
|
|
260
|
+
export const parser = new MDXParser();
|