@markuplint/parser-utils 4.8.9 → 4.8.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.ja.md +208 -0
- package/ARCHITECTURE.md +251 -0
- package/CHANGELOG.md +5 -1
- package/README.md +6 -0
- package/SKILL.md +126 -0
- package/docs/maintenance.ja.md +176 -0
- package/docs/maintenance.md +176 -0
- package/docs/parser-class.ja.md +655 -0
- package/docs/parser-class.md +655 -0
- package/lib/debugger.d.ts +25 -0
- package/lib/debugger.js +25 -0
- package/lib/enums.d.ts +10 -0
- package/lib/enums.js +10 -0
- package/lib/get-namespace.d.ts +2 -0
- package/lib/get-namespace.js +29 -0
- package/lib/idl-attributes.d.ts +9 -0
- package/lib/idl-attributes.js +9 -0
- package/lib/parser-error.d.ts +16 -0
- package/lib/parser-error.js +12 -0
- package/lib/parser.d.ts +282 -0
- package/lib/parser.js +265 -3
- package/lib/script-parser.d.ts +21 -0
- package/lib/script-parser.js +17 -0
- package/lib/types.d.ts +57 -0
- package/package.json +10 -10
|
@@ -0,0 +1,655 @@
|
|
|
1
|
+
# Parser Class Reference
|
|
2
|
+
|
|
3
|
+
The `Parser<Node, State>` abstract class is the foundation of every markuplint parser. It defines the complete parsing pipeline — from raw source code to a flat `MLASTNodeTreeItem[]` — and provides a rich set of visitor and utility methods that subclasses override to support specific markup languages.
|
|
4
|
+
|
|
5
|
+
## Design Pattern
|
|
6
|
+
|
|
7
|
+
The Parser uses the **Template Method** pattern. The `parse()` method orchestrates an 11-step pipeline, calling protected hook methods at each stage. Subclasses override specific hooks (primarily `tokenize` and `nodeize`) to inject language-specific behavior while inheriting the common pipeline logic.
|
|
8
|
+
|
|
9
|
+
```mermaid
|
|
10
|
+
classDiagram
|
|
11
|
+
class Parser~Node State~ {
|
|
12
|
+
<<abstract>>
|
|
13
|
+
+parse(rawCode, options) MLASTDocument
|
|
14
|
+
+tokenize(options) Tokenized
|
|
15
|
+
+nodeize(originNode, parentNode, depth) MLASTNodeTreeItem[]
|
|
16
|
+
+beforeParse(rawCode, options) string
|
|
17
|
+
+afterParse(nodeList, options) MLASTNodeTreeItem[]
|
|
18
|
+
+visitElement(token, childNodes, options) MLASTNodeTreeItem[]
|
|
19
|
+
+visitText(token, options) MLASTNodeTreeItem[]
|
|
20
|
+
+visitComment(token, options) MLASTNodeTreeItem[]
|
|
21
|
+
+visitAttr(token, options) MLASTAttr
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
class HtmlParser {
|
|
25
|
+
+tokenize() Tokenized
|
|
26
|
+
+nodeize() MLASTNodeTreeItem[]
|
|
27
|
+
+beforeParse() string
|
|
28
|
+
+afterParse() MLASTNodeTreeItem[]
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
class JSXParser {
|
|
32
|
+
+tokenize() Tokenized
|
|
33
|
+
+nodeize() MLASTNodeTreeItem[]
|
|
34
|
+
+afterTraverse() MLASTNodeTreeItem[]
|
|
35
|
+
+visitAttr() MLASTAttr
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
class VueParser {
|
|
39
|
+
+tokenize() Tokenized
|
|
40
|
+
+nodeize() MLASTNodeTreeItem[]
|
|
41
|
+
+flattenNodes() MLASTNodeTreeItem[]
|
|
42
|
+
+visitAttr() MLASTAttr
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
class SvelteParser {
|
|
46
|
+
+tokenize() Tokenized
|
|
47
|
+
+nodeize() MLASTNodeTreeItem[]
|
|
48
|
+
+visitText() MLASTNodeTreeItem[]
|
|
49
|
+
+visitPsBlock() MLASTNodeTreeItem[]
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
Parser <|-- HtmlParser
|
|
53
|
+
Parser <|-- JSXParser
|
|
54
|
+
Parser <|-- VueParser
|
|
55
|
+
Parser <|-- SvelteParser
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Type Parameters
|
|
59
|
+
|
|
60
|
+
| Parameter | Constraint | Default | Description |
|
|
61
|
+
| --------- | ----------------- | ------- | ---------------------------------------------------------------------------------------------------------------------------------------- |
|
|
62
|
+
| `Node` | `extends {}` | `{}` | The language-specific AST node type produced by the tokenizer (e.g., parse5's `Node`, Svelte's `SvelteNode`) |
|
|
63
|
+
| `State` | `extends unknown` | `null` | An optional parser state type that persists across a single `parse()` call. Cloned from `defaultState` at the start and reset at the end |
|
|
64
|
+
|
|
65
|
+
## Constructor / ParserOptions
|
|
66
|
+
|
|
67
|
+
```ts
|
|
68
|
+
constructor(options?: ParserOptions, defaultState?: State)
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
The constructor accepts a `ParserOptions` object and an optional default state value:
|
|
72
|
+
|
|
73
|
+
| Option | Type | Default | Description |
|
|
74
|
+
| ---------------------- | ---------------------- | ------------------------------- | ------------------------------------------------------------------------------------------ |
|
|
75
|
+
| `booleanish` | `boolean` | `false` | Treat omitted attribute values as `true` (e.g., JSX `<Component aria-hidden />`) |
|
|
76
|
+
| `endTagType` | `EndTagType` | `'omittable'` | `'xml'`: end tag required or self-close; `'omittable'`: may omit; `'never'`: never need |
|
|
77
|
+
| `ignoreTags` | `readonly IgnoreTag[]` | `[]` | Patterns for code blocks to mask before parsing (e.g., template expressions) |
|
|
78
|
+
| `maskChar` | `string` | `'\uE000'` (MASK_CHAR) | Character used to replace masked code blocks |
|
|
79
|
+
| `tagNameCaseSensitive` | `boolean` | `false` | Whether tag name comparisons are case-sensitive (e.g., JSX, Svelte) |
|
|
80
|
+
| `selfCloseType` | `SelfCloseType` | `'html'` | `'html'`: only void elements self-close; `'xml'`: solidus determines; `'html+xml'`: either |
|
|
81
|
+
| `spaceChars` | `readonly string[]` | `['\t', '\n', '\f', '\r', ' ']` | Characters treated as whitespace in tag parsing |
|
|
82
|
+
| `rawTextElements` | `readonly string[]` | `['style', 'script']` | Elements whose children are not traversed (raw text content) |
|
|
83
|
+
|
|
84
|
+
## Parse Pipeline
|
|
85
|
+
|
|
86
|
+
The `parse()` method drives the full pipeline:
|
|
87
|
+
|
|
88
|
+
```mermaid
|
|
89
|
+
flowchart TD
|
|
90
|
+
A["1. beforeParse()"] --> B["2. ignoreFrontMatter()"]
|
|
91
|
+
B --> C["3. ignoreBlock()"]
|
|
92
|
+
C --> D["4. tokenize()"]
|
|
93
|
+
D --> E["5. traverse() → nodeize()"]
|
|
94
|
+
E --> F["6. afterTraverse()"]
|
|
95
|
+
F --> G["7. flattenNodes()"]
|
|
96
|
+
G --> H["8. afterFlattenNodes()"]
|
|
97
|
+
H --> I["9. restoreNode()"]
|
|
98
|
+
I --> J["10. afterParse()"]
|
|
99
|
+
J --> K["11. Return MLASTDocument"]
|
|
100
|
+
|
|
101
|
+
style D fill:#e1f5fe
|
|
102
|
+
style E fill:#e1f5fe
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
Steps highlighted in blue are the primary override points.
|
|
106
|
+
|
|
107
|
+
### Step 1: beforeParse()
|
|
108
|
+
|
|
109
|
+
```ts
|
|
110
|
+
beforeParse(rawCode: string, options?: ParseOptions): string
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Prepends offset spaces based on `ParseOptions` (`offsetOffset`, `offsetLine`, `offsetColumn`). This adjusts the coordinate system for embedded code fragments (e.g., a `<template>` block inside a `.vue` file).
|
|
114
|
+
|
|
115
|
+
### Step 2: Front Matter Removal
|
|
116
|
+
|
|
117
|
+
If `options.ignoreFrontMatter` is true, `ignoreFrontMatter()` detects YAML front matter (`---\n...\n---\n`) and replaces it with spaces while preserving line breaks. The front matter is restored as a `#ps:front-matter` psblock node at the end of the pipeline.
|
|
118
|
+
|
|
119
|
+
### Step 3: Ignore Block Masking
|
|
120
|
+
|
|
121
|
+
`ignoreBlock()` scans the source for patterns defined in `ignoreTags` and replaces matching blocks with mask characters wrapped in `<!...>` bogus comment syntax. This prevents template expressions (e.g., `{{ expr }}`, `{#if}`) from interfering with HTML parsing.
|
|
122
|
+
|
|
123
|
+
### Step 4: tokenize()
|
|
124
|
+
|
|
125
|
+
```ts
|
|
126
|
+
tokenize(options?: ParseOptions): Tokenized<Node, State>
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
**Primary override point.** The default implementation returns an empty array. Each parser overrides this to invoke its language-specific tokenizer (parse5, vue-eslint-parser, svelte/compiler, etc.) and return the resulting AST.
|
|
130
|
+
|
|
131
|
+
### Step 5: traverse() → nodeize()
|
|
132
|
+
|
|
133
|
+
```ts
|
|
134
|
+
traverse(originNodes: readonly Node[], parentNode: MLASTParentNode | null, depth: number)
|
|
135
|
+
nodeize(originNode: Node, parentNode: MLASTParentNode | null, depth: number): readonly MLASTNodeTreeItem[]
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
`traverse()` iterates over tokenized nodes and calls `nodeize()` for each one. **`nodeize()` is the second primary override point** — subclasses convert language-specific AST nodes into markuplint AST nodes using visitor methods.
|
|
139
|
+
|
|
140
|
+
After `nodeize()`, `afterNodeize()` separates the resulting nodes into siblings at the current depth and ancestors at shallower depths.
|
|
141
|
+
|
|
142
|
+
### Step 6: afterTraverse()
|
|
143
|
+
|
|
144
|
+
```ts
|
|
145
|
+
afterTraverse(nodeTree: readonly MLASTNodeTreeItem[]): readonly MLASTNodeTreeItem[]
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
Sorts the node tree by source position. Subclasses may override for post-traversal restructuring (e.g., JSX remaps parentId references for expression containers).
|
|
149
|
+
|
|
150
|
+
### Step 7: flattenNodes()
|
|
151
|
+
|
|
152
|
+
```ts
|
|
153
|
+
flattenNodes(nodeTree: readonly MLASTNodeTreeItem[]): readonly MLASTNodeTreeItem[]
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
Walks the hierarchical node tree depth-first and produces a flat, sorted list. Removes duplicated nodes.
|
|
157
|
+
|
|
158
|
+
### Step 8: afterFlattenNodes()
|
|
159
|
+
|
|
160
|
+
```ts
|
|
161
|
+
afterFlattenNodes(
|
|
162
|
+
nodeList: readonly MLASTNodeTreeItem[],
|
|
163
|
+
options?: {
|
|
164
|
+
readonly exposeInvalidNode?: boolean; // default: true
|
|
165
|
+
readonly exposeWhiteSpace?: boolean; // default: true
|
|
166
|
+
readonly concatText?: boolean; // default: true
|
|
167
|
+
}
|
|
168
|
+
): readonly MLASTNodeTreeItem[]
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
Performs four cleanup passes:
|
|
172
|
+
|
|
173
|
+
1. **Expose remnant nodes** — discovers whitespace and invalid markup between known nodes
|
|
174
|
+
2. **Orphan end tags → bogus** — converts unmatched end tags to `invalid` nodes
|
|
175
|
+
3. **Concatenate text** — merges adjacent `#text` nodes at the same offset
|
|
176
|
+
4. **Trim text** — trims overlapping text node boundaries
|
|
177
|
+
|
|
178
|
+
### Step 9: restoreNode()
|
|
179
|
+
|
|
180
|
+
`restoreNode()` walks the flat node list and replaces mask characters with the original code. Each restored block becomes a `#ps:<type>` psblock node. Masked content inside attribute values is also restored and marked as `isDynamicValue`.
|
|
181
|
+
|
|
182
|
+
### Step 10: afterParse()
|
|
183
|
+
|
|
184
|
+
```ts
|
|
185
|
+
afterParse(nodeList: readonly MLASTNodeTreeItem[], options?: ParseOptions): readonly MLASTNodeTreeItem[]
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
Removes the offset spaces prepended in step 1. Subclasses may add further post-processing.
|
|
189
|
+
|
|
190
|
+
### Step 11: Return
|
|
191
|
+
|
|
192
|
+
Returns an `MLASTDocument` containing `{ raw, nodeList, isFragment }`.
|
|
193
|
+
|
|
194
|
+
## Visitor Methods
|
|
195
|
+
|
|
196
|
+
### visitElement()
|
|
197
|
+
|
|
198
|
+
```ts
|
|
199
|
+
visitElement(
|
|
200
|
+
token: ChildToken & { nodeName: string; namespace: string },
|
|
201
|
+
childNodes?: readonly Node[],
|
|
202
|
+
options?: {
|
|
203
|
+
createEndTagToken?: (startTag: MLASTElement) => ChildToken | null;
|
|
204
|
+
namelessFragment?: boolean;
|
|
205
|
+
overwriteProps?: Partial<MLASTElement>;
|
|
206
|
+
}
|
|
207
|
+
): readonly MLASTNodeTreeItem[]
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
Creates an element start tag node. Handles:
|
|
211
|
+
|
|
212
|
+
- **Ghost elements** — if `token.raw === ''`, creates an `isGhost: true` element (used for omitted tags like implicit `<head>`, `<body>` in HTML)
|
|
213
|
+
- **Self-closing detection** — based on `selfCloseType` setting and void element status
|
|
214
|
+
- **End tag pairing** — if `createEndTagToken` returns a token, creates and pairs the end tag
|
|
215
|
+
- **Nameless fragments** — JSX `<>...</>` fragments with empty tag name
|
|
216
|
+
|
|
217
|
+
### visitText()
|
|
218
|
+
|
|
219
|
+
```ts
|
|
220
|
+
visitText(
|
|
221
|
+
token: ChildToken,
|
|
222
|
+
options?: {
|
|
223
|
+
researchTags?: boolean;
|
|
224
|
+
invalidTagAsText?: boolean;
|
|
225
|
+
}
|
|
226
|
+
): readonly MLASTNodeTreeItem[]
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
Creates a text node. When `researchTags` is true, re-parses the text via `parseCodeFragment()` to discover embedded HTML tags. If `invalidTagAsText` is also true, any discovered start tags cause the entire content to be treated as a single text node.
|
|
230
|
+
|
|
231
|
+
### visitComment()
|
|
232
|
+
|
|
233
|
+
```ts
|
|
234
|
+
visitComment(
|
|
235
|
+
token: ChildToken,
|
|
236
|
+
options?: { isBogus?: boolean }
|
|
237
|
+
): readonly MLASTNodeTreeItem[]
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
Creates a comment node. Automatically detects bogus comments (those not starting with `<!--`). The `isBogus` option can override this detection.
|
|
241
|
+
|
|
242
|
+
### visitDoctype()
|
|
243
|
+
|
|
244
|
+
```ts
|
|
245
|
+
visitDoctype(
|
|
246
|
+
token: ChildToken & { name: string; publicId: string; systemId: string }
|
|
247
|
+
): readonly MLASTNodeTreeItem[]
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
Creates a doctype node from a token containing the doctype name, public ID, and system ID.
|
|
251
|
+
|
|
252
|
+
### visitPsBlock()
|
|
253
|
+
|
|
254
|
+
```ts
|
|
255
|
+
visitPsBlock(
|
|
256
|
+
token: ChildToken & { nodeName: string; isFragment: boolean },
|
|
257
|
+
childNodes?: readonly Node[],
|
|
258
|
+
conditionalType?: MLASTPreprocessorSpecificBlockConditionalType,
|
|
259
|
+
originBlockNode?: Node
|
|
260
|
+
): readonly MLASTNodeTreeItem[]
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
Creates a preprocessor-specific block node. The `nodeName` is automatically prefixed with `#ps:` (e.g., `#ps:if`, `#ps:each`, `#ps:front-matter`). Recursively traverses child nodes via `visitChildren()`.
|
|
264
|
+
|
|
265
|
+
### visitAttr()
|
|
266
|
+
|
|
267
|
+
```ts
|
|
268
|
+
visitAttr(
|
|
269
|
+
token: Token,
|
|
270
|
+
options?: {
|
|
271
|
+
quoteSet?: readonly QuoteSet[];
|
|
272
|
+
noQuoteValueType?: ValueType;
|
|
273
|
+
endOfUnquotedValueChars?: readonly string[];
|
|
274
|
+
startState?: AttrState;
|
|
275
|
+
}
|
|
276
|
+
): MLASTAttr & { __rightText?: string }
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
Parses a raw attribute string into a fully decomposed `MLASTAttr` with individual tokens for spaces, name, equal sign, quotes, and value. Uses the `AttrState` state machine internally via `attrTokenizer()`.
|
|
280
|
+
|
|
281
|
+
If the raw string contains multiple attributes, only the first is parsed and the remainder is returned in `__rightText` for iterative processing.
|
|
282
|
+
|
|
283
|
+
Also attempts to detect spread attributes via `visitSpreadAttr()`.
|
|
284
|
+
|
|
285
|
+
### visitSpreadAttr()
|
|
286
|
+
|
|
287
|
+
```ts
|
|
288
|
+
visitSpreadAttr(token: Token): MLASTSpreadAttr | null
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
Detects JSX spread attributes matching the pattern `{...expr}`. Returns null if the token doesn't match. HTML parser overrides this to always return null.
|
|
292
|
+
|
|
293
|
+
### visitChildren()
|
|
294
|
+
|
|
295
|
+
```ts
|
|
296
|
+
visitChildren(
|
|
297
|
+
children: readonly Node[],
|
|
298
|
+
parentNode: MLASTParentNode | null
|
|
299
|
+
): readonly MLASTNodeTreeItem[]
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
Traverses child nodes under a parent. Skips traversal for `rawTextElements` (e.g., `<script>`, `<style>`). Returns sibling nodes that belong to ancestor depth levels.
|
|
303
|
+
|
|
304
|
+
## State Machines
|
|
305
|
+
|
|
306
|
+
### TagState
|
|
307
|
+
|
|
308
|
+
Used during tag parsing in `#parseTag()`:
|
|
309
|
+
|
|
310
|
+
```mermaid
|
|
311
|
+
stateDiagram-v2
|
|
312
|
+
[*] --> BeforeOpenTag
|
|
313
|
+
BeforeOpenTag --> FirstCharOfTagName : "<"
|
|
314
|
+
FirstCharOfTagName --> TagName : /[a-z]/i
|
|
315
|
+
FirstCharOfTagName --> FirstCharOfTagName : "/" (close tag)
|
|
316
|
+
FirstCharOfTagName --> AfterOpenTag : ">" (nameless)
|
|
317
|
+
TagName --> Attrs : whitespace
|
|
318
|
+
TagName --> AfterAttrs : "/"
|
|
319
|
+
TagName --> AfterOpenTag : ">"
|
|
320
|
+
Attrs --> AfterAttrs : "/" or ">"
|
|
321
|
+
AfterAttrs --> AfterOpenTag : ">"
|
|
322
|
+
AfterOpenTag --> [*]
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
### AttrState
|
|
326
|
+
|
|
327
|
+
Used during attribute parsing in `attrTokenizer()`:
|
|
328
|
+
|
|
329
|
+
```mermaid
|
|
330
|
+
stateDiagram-v2
|
|
331
|
+
[*] --> BeforeName
|
|
332
|
+
BeforeName --> Name : non-space, non-">"
|
|
333
|
+
BeforeName --> AfterValue : ">" or "/"
|
|
334
|
+
Name --> Equal : whitespace
|
|
335
|
+
Name --> BeforeValue : "="
|
|
336
|
+
Name --> AfterValue : ">" or "/"
|
|
337
|
+
Equal --> BeforeValue : "="
|
|
338
|
+
Equal --> AfterValue : other
|
|
339
|
+
BeforeValue --> Value : quote or char
|
|
340
|
+
Value --> AfterValue : end quote or unquoted end
|
|
341
|
+
AfterValue --> [*]
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
## Token Creation Utilities
|
|
345
|
+
|
|
346
|
+
### createToken()
|
|
347
|
+
|
|
348
|
+
```ts
|
|
349
|
+
createToken(token: Token): MLASTToken;
|
|
350
|
+
createToken(token: string, startOffset: number, startLine: number, startCol: number): MLASTToken;
|
|
351
|
+
```
|
|
352
|
+
|
|
353
|
+
Creates a new `MLASTToken` with a generated UUID (8 chars) and computed end position. Accepts either a `Token` object or a raw string with explicit coordinates.
|
|
354
|
+
|
|
355
|
+
### sliceFragment()
|
|
356
|
+
|
|
357
|
+
```ts
|
|
358
|
+
sliceFragment(start: number, end?: number): Token
|
|
359
|
+
```
|
|
360
|
+
|
|
361
|
+
Extracts a `Token` from the current `rawCode` at the given byte offset range, computing line and column from the source position.
|
|
362
|
+
|
|
363
|
+
### getOffsetsFromCode()
|
|
364
|
+
|
|
365
|
+
```ts
|
|
366
|
+
getOffsetsFromCode(
|
|
367
|
+
startLine: number, startCol: number,
|
|
368
|
+
endLine: number, endCol: number
|
|
369
|
+
): { offset: number; endOffset: number }
|
|
370
|
+
```
|
|
371
|
+
|
|
372
|
+
Converts line/column positions to byte offsets within the current raw source code.
|
|
373
|
+
|
|
374
|
+
## Tree Manipulation
|
|
375
|
+
|
|
376
|
+
### appendChild()
|
|
377
|
+
|
|
378
|
+
```ts
|
|
379
|
+
appendChild(parentNode: MLASTParentNode | null, ...childNodes: readonly MLASTChildNode[]): void
|
|
380
|
+
```
|
|
381
|
+
|
|
382
|
+
Appends child nodes to a parent, maintaining sorted order by source position. If a child already exists (by UUID), it is replaced in place.
|
|
383
|
+
|
|
384
|
+
### replaceChild()
|
|
385
|
+
|
|
386
|
+
```ts
|
|
387
|
+
replaceChild(
|
|
388
|
+
parentNode: MLASTParentNode,
|
|
389
|
+
oldChildNode: MLASTChildNode,
|
|
390
|
+
...replacementChildNodes: readonly MLASTChildNode[]
|
|
391
|
+
): void
|
|
392
|
+
```
|
|
393
|
+
|
|
394
|
+
Replaces a child node within a parent's child list with one or more replacement nodes.
|
|
395
|
+
|
|
396
|
+
### walk()
|
|
397
|
+
|
|
398
|
+
```ts
|
|
399
|
+
walk<Node extends MLASTNodeTreeItem>(
|
|
400
|
+
nodeList: readonly Node[],
|
|
401
|
+
walker: Walker<Node>,
|
|
402
|
+
depth?: number
|
|
403
|
+
): void
|
|
404
|
+
```
|
|
405
|
+
|
|
406
|
+
Walks a node list depth-first, invoking the walker callback for each node. The walker receives the current node, the sequentially previous node, and the depth. Automatically recurses into child nodes.
|
|
407
|
+
|
|
408
|
+
## Update Methods
|
|
409
|
+
|
|
410
|
+
### updateLocation()
|
|
411
|
+
|
|
412
|
+
```ts
|
|
413
|
+
updateLocation(
|
|
414
|
+
node: MLASTNodeTreeItem,
|
|
415
|
+
props: Partial<Pick<MLASTNodeTreeItem, 'startOffset' | 'startLine' | 'startCol' | 'depth'>>
|
|
416
|
+
): void
|
|
417
|
+
```
|
|
418
|
+
|
|
419
|
+
Updates position and depth properties of an AST node, recalculating end offsets/lines/columns from the new start values.
|
|
420
|
+
|
|
421
|
+
### updateRaw()
|
|
422
|
+
|
|
423
|
+
```ts
|
|
424
|
+
updateRaw(node: MLASTToken, raw: string): void
|
|
425
|
+
```
|
|
426
|
+
|
|
427
|
+
Replaces the raw code of a node and updates all positional properties accordingly.
|
|
428
|
+
|
|
429
|
+
### updateElement()
|
|
430
|
+
|
|
431
|
+
```ts
|
|
432
|
+
updateElement(el: MLASTElement, props: Partial<Pick<MLASTElement, 'nodeName' | 'elementType'>>): void
|
|
433
|
+
updateElement(el: MLASTElementCloseTag, props: Partial<Pick<MLASTElementCloseTag, 'nodeName'>>): void
|
|
434
|
+
```
|
|
435
|
+
|
|
436
|
+
Updates the node name and/or element type of an element or close tag node.
|
|
437
|
+
|
|
438
|
+
### updateAttr()
|
|
439
|
+
|
|
440
|
+
```ts
|
|
441
|
+
updateAttr(
|
|
442
|
+
attr: MLASTHTMLAttr,
|
|
443
|
+
props: Partial<Pick<MLASTHTMLAttr,
|
|
444
|
+
'isDynamicValue' | 'isDirective' | 'potentialName' | 'potentialValue' |
|
|
445
|
+
'valueType' | 'candidate' | 'isDuplicatable'
|
|
446
|
+
>>
|
|
447
|
+
): void
|
|
448
|
+
```
|
|
449
|
+
|
|
450
|
+
Updates metadata properties on an attribute node, such as marking it as a directive or dynamic value.
|
|
451
|
+
|
|
452
|
+
## Ignore Block System
|
|
453
|
+
|
|
454
|
+
The ignore block system masks template expressions and preprocessor directives before HTML parsing, then restores them afterward.
|
|
455
|
+
|
|
456
|
+
### Lifecycle
|
|
457
|
+
|
|
458
|
+
1. **Define** — `IgnoreTag` patterns in `ParserOptions.ignoreTags`:
|
|
459
|
+
|
|
460
|
+
```ts
|
|
461
|
+
{ type: 'mustache', start: '{{', end: '}}' }
|
|
462
|
+
{ type: 'Style', start: '<style', end: '</style>' }
|
|
463
|
+
```
|
|
464
|
+
|
|
465
|
+
2. **Mask** — `ignoreBlock()` replaces matches with mask characters inside bogus comment syntax (`<!...>`), preserving line breaks for position tracking
|
|
466
|
+
|
|
467
|
+
3. **Parse** — the masked code is safe for HTML tokenization
|
|
468
|
+
|
|
469
|
+
4. **Restore** — `restoreNode()` walks the flat node list and replaces masked regions with `#ps:<type>` psblock nodes. Masked content in attribute values is restored and marked `isDynamicValue: true`
|
|
470
|
+
|
|
471
|
+
### IgnoreTag Definition
|
|
472
|
+
|
|
473
|
+
```ts
|
|
474
|
+
type IgnoreTag = {
|
|
475
|
+
readonly type: string; // Name used for #ps: prefix
|
|
476
|
+
readonly start: RegExp | string; // Start pattern
|
|
477
|
+
readonly end: RegExp | string; // End pattern
|
|
478
|
+
};
|
|
479
|
+
```
|
|
480
|
+
|
|
481
|
+
## Element Type Detection
|
|
482
|
+
|
|
483
|
+
```ts
|
|
484
|
+
detectElementType(nodeName: string, defaultPattern?: ParserAuthoredElementNameDistinguishing): ElementType
|
|
485
|
+
```
|
|
486
|
+
|
|
487
|
+
Classifies elements into three types:
|
|
488
|
+
|
|
489
|
+
| Type | Description | Example |
|
|
490
|
+
| ----------------- | ---------------------------------------------- | --------------------------- |
|
|
491
|
+
| `'html'` | Standard HTML element | `div`, `span`, `input` |
|
|
492
|
+
| `'web-component'` | Custom element (contains hyphen, per spec) | `my-component`, `x-button` |
|
|
493
|
+
| `'authored'` | Framework component (matches authored pattern) | `MyComponent`, `App.Header` |
|
|
494
|
+
|
|
495
|
+
The `authoredElementName` pattern is set from `ParseOptions` and can be a string, RegExp, function, or array of these. Each parser provides a framework-specific default pattern (e.g., `/^[A-Z]/` for JSX/Svelte, PascalCase + built-in list for Vue).
|
|
496
|
+
|
|
497
|
+
## Accessor Properties
|
|
498
|
+
|
|
499
|
+
| Property | Type | Description |
|
|
500
|
+
| ---------------------- | ------------------------------------------------------ | -------------------------------------------------------------- |
|
|
501
|
+
| `rawCode` | `string` | The current raw source code being parsed (may be preprocessed) |
|
|
502
|
+
| `booleanish` | `boolean` | Whether omitted attribute values are treated as `true` |
|
|
503
|
+
| `endTag` | `EndTagType` | The end tag handling strategy |
|
|
504
|
+
| `tagNameCaseSensitive` | `boolean` | Whether tag name comparisons are case-sensitive |
|
|
505
|
+
| `authoredElementName` | `ParserAuthoredElementNameDistinguishing \| undefined` | The pattern for distinguishing authored elements |
|
|
506
|
+
| `state` | `State` | The mutable parser state (reset after each `parse()` call) |
|
|
507
|
+
|
|
508
|
+
## Implementing a Parser
|
|
509
|
+
|
|
510
|
+
### Basic Structure
|
|
511
|
+
|
|
512
|
+
```ts
|
|
513
|
+
import { Parser } from '@markuplint/parser-utils';
|
|
514
|
+
import type { ParserOptions, ParseOptions, Tokenized, ChildToken } from '@markuplint/parser-utils';
|
|
515
|
+
import type { MLASTParentNode, MLASTNodeTreeItem } from '@markuplint/ml-ast';
|
|
516
|
+
|
|
517
|
+
// Your language-specific AST node type
|
|
518
|
+
type MyNode = {
|
|
519
|
+
/* ... */
|
|
520
|
+
};
|
|
521
|
+
|
|
522
|
+
class MyParser extends Parser<MyNode> {
|
|
523
|
+
constructor() {
|
|
524
|
+
super({
|
|
525
|
+
endTagType: 'xml',
|
|
526
|
+
tagNameCaseSensitive: true,
|
|
527
|
+
// ... other options
|
|
528
|
+
});
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
tokenize(options?: ParseOptions): Tokenized<MyNode> {
|
|
532
|
+
// Parse this.rawCode with your language's parser
|
|
533
|
+
const ast = myLanguageParser(this.rawCode);
|
|
534
|
+
return { ast: ast.children, isFragment: true };
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
nodeize(originNode: MyNode, parentNode: MLASTParentNode | null, depth: number): readonly MLASTNodeTreeItem[] {
|
|
538
|
+
// Convert each language-specific node to markuplint AST nodes
|
|
539
|
+
// using visitor methods
|
|
540
|
+
switch (originNode.type) {
|
|
541
|
+
case 'element':
|
|
542
|
+
return this.visitElement(/* ... */);
|
|
543
|
+
case 'text':
|
|
544
|
+
return this.visitText(/* ... */);
|
|
545
|
+
case 'comment':
|
|
546
|
+
return this.visitComment(/* ... */);
|
|
547
|
+
default:
|
|
548
|
+
return [];
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
```
|
|
553
|
+
|
|
554
|
+
### Override Pattern Reference
|
|
555
|
+
|
|
556
|
+
| Method | super call | Pattern | Reason |
|
|
557
|
+
| --------------------- | -------------- | -------------------- | ------------------------------------------------------------------------------ |
|
|
558
|
+
| `tokenize()` | **Not needed** | Full replacement | Default returns empty array. Each parser provides its own tokenizer |
|
|
559
|
+
| `nodeize()` | **Not needed** | Full replacement | Default returns empty array. Each parser provides its own node conversion |
|
|
560
|
+
| `beforeParse()` | **Required** | super-first | `super.beforeParse()` handles offset space prepending. Add processing after |
|
|
561
|
+
| `afterParse()` | **Required** | super-first | `super.afterParse()` handles offset space removal. Add processing after |
|
|
562
|
+
| `afterTraverse()` | Recommended | super-first | `super` sorts by position. JSX adds parentId remapping after |
|
|
563
|
+
| `afterFlattenNodes()` | Recommended | wrapper | Pass options to `super` to control cleanup steps |
|
|
564
|
+
| `flattenNodes()` | Recommended | super-first | Vue calls super then injects template comments |
|
|
565
|
+
| `visitText()` | Recommended | wrapper | Pass options to `super`. Svelte post-processes script→psblock |
|
|
566
|
+
| `visitComment()` | Recommended | super-first | JSX overrides `isBogus` to `false` after super |
|
|
567
|
+
| `visitPsBlock()` | Recommended | wrapper + validation | Svelte validates return count after super |
|
|
568
|
+
| `visitChildren()` | Recommended | wrapper + validation | Svelte validates no siblings after super |
|
|
569
|
+
| `visitAttr()` | **Required** | super-first | `super.visitAttr()` performs token decomposition. Add directive handling after |
|
|
570
|
+
| `visitSpreadAttr()` | Not needed | Full replacement | HTML overrides to return `null` (no spread support) |
|
|
571
|
+
| `detectElementType()` | **Required** | wrapper | Pass framework-specific default pattern to `super` |
|
|
572
|
+
| `parseError()` | Recommended | conditional chain | Handle framework-specific errors first, fallback to `super` |
|
|
573
|
+
| `parse()` | Recommended | wrapper | Svelte modifies options then delegates to super |
|
|
574
|
+
|
|
575
|
+
### Pattern 1: Full Replacement (tokenize, nodeize)
|
|
576
|
+
|
|
577
|
+
No `super` call needed — the base implementation returns an empty array.
|
|
578
|
+
|
|
579
|
+
```ts
|
|
580
|
+
// From HtmlParser
|
|
581
|
+
tokenize(): Tokenized<Node, State> {
|
|
582
|
+
const doc = parse5.parse(this.rawCode);
|
|
583
|
+
return {
|
|
584
|
+
ast: doc.childNodes,
|
|
585
|
+
isFragment: false,
|
|
586
|
+
};
|
|
587
|
+
}
|
|
588
|
+
```
|
|
589
|
+
|
|
590
|
+
### Pattern 2: super-first + Post-processing (beforeParse, afterParse, visitAttr)
|
|
591
|
+
|
|
592
|
+
Call `super` first, then add processing.
|
|
593
|
+
|
|
594
|
+
```ts
|
|
595
|
+
// From HtmlParser
|
|
596
|
+
beforeParse(rawCode: string, options?: ParseOptions) {
|
|
597
|
+
const code = super.beforeParse(rawCode, options);
|
|
598
|
+
// Additional preprocessing...
|
|
599
|
+
return code;
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
// From VueParser
|
|
603
|
+
visitAttr(token: Token) {
|
|
604
|
+
const attr = super.visitAttr(token);
|
|
605
|
+
// Resolve Vue directive shorthands
|
|
606
|
+
if (attr.type === 'attr' && attr.name.raw.startsWith(':')) {
|
|
607
|
+
this.updateAttr(attr, {
|
|
608
|
+
potentialName: `v-bind:${attr.name.raw.slice(1)}`,
|
|
609
|
+
isDirective: true,
|
|
610
|
+
isDynamicValue: true,
|
|
611
|
+
});
|
|
612
|
+
}
|
|
613
|
+
return attr;
|
|
614
|
+
}
|
|
615
|
+
```
|
|
616
|
+
|
|
617
|
+
### Pattern 3: wrapper + Options Delegation (afterFlattenNodes, visitText)
|
|
618
|
+
|
|
619
|
+
Pass control options to `super`.
|
|
620
|
+
|
|
621
|
+
```ts
|
|
622
|
+
// From JSXParser
|
|
623
|
+
afterFlattenNodes(nodeList: readonly MLASTNodeTreeItem[]) {
|
|
624
|
+
return super.afterFlattenNodes(nodeList, {
|
|
625
|
+
exposeWhiteSpace: false,
|
|
626
|
+
exposeInvalidNode: false,
|
|
627
|
+
});
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
// From HtmlParser
|
|
631
|
+
visitText(token: ChildToken) {
|
|
632
|
+
return super.visitText(token, {
|
|
633
|
+
researchTags: true,
|
|
634
|
+
invalidTagAsText: true,
|
|
635
|
+
});
|
|
636
|
+
}
|
|
637
|
+
```
|
|
638
|
+
|
|
639
|
+
### Pattern 4: Conditional Chain (parseError)
|
|
640
|
+
|
|
641
|
+
Handle known error formats first, delegate unknown errors to `super`.
|
|
642
|
+
|
|
643
|
+
```ts
|
|
644
|
+
// From JSXParser
|
|
645
|
+
parseError(error: any) {
|
|
646
|
+
if (error.lineNumber != null && error.column != null) {
|
|
647
|
+
return new ParserError(error.message, {
|
|
648
|
+
line: error.lineNumber,
|
|
649
|
+
col: error.column,
|
|
650
|
+
raw: this.rawCode,
|
|
651
|
+
});
|
|
652
|
+
}
|
|
653
|
+
return super.parseError(error);
|
|
654
|
+
}
|
|
655
|
+
```
|
package/lib/debugger.d.ts
CHANGED
|
@@ -1,4 +1,29 @@
|
|
|
1
1
|
import type { MLASTAttr, MLASTNode } from '@markuplint/ml-ast';
|
|
2
|
+
/**
|
|
3
|
+
* Converts a list of AST nodes into human-readable debug strings showing
|
|
4
|
+
* each node's position, type, and raw content. Useful for snapshot testing.
|
|
5
|
+
*
|
|
6
|
+
* @param nodeList - The flat list of AST nodes to convert
|
|
7
|
+
* @param withAttr - Whether to include detailed attribute debug info for start tags
|
|
8
|
+
* @returns An array of formatted debug strings, one per node (plus attribute lines when enabled)
|
|
9
|
+
*/
|
|
2
10
|
export declare function nodeListToDebugMaps(nodeList: readonly (MLASTNode | null)[], withAttr?: boolean): string[];
|
|
11
|
+
/**
|
|
12
|
+
* Converts a list of AST attributes into detailed debug strings showing
|
|
13
|
+
* each attribute's components (name, equal sign, value, quotes) with
|
|
14
|
+
* their positions and additional metadata like directives and dynamic values.
|
|
15
|
+
*
|
|
16
|
+
* @param attributes - The list of attributes to convert into debug representations
|
|
17
|
+
* @returns An array of string arrays, one inner array per attribute containing its debug lines
|
|
18
|
+
*/
|
|
3
19
|
export declare function attributesToDebugMaps(attributes: readonly MLASTAttr[]): string[][];
|
|
20
|
+
/**
|
|
21
|
+
* Produces a tree-style debug view of AST nodes, showing indentation
|
|
22
|
+
* based on depth, parent-child relationships, pair node links,
|
|
23
|
+
* and ghost/bogus markers. Useful for visualizing the parsed DOM structure.
|
|
24
|
+
*
|
|
25
|
+
* @param nodeTree - The flat list of AST nodes to visualize as a tree
|
|
26
|
+
* @param idFilter - Whether to replace UUIDs with short sequential hex IDs for readability
|
|
27
|
+
* @returns An array of formatted strings representing the tree view
|
|
28
|
+
*/
|
|
4
29
|
export declare function nodeTreeDebugView(nodeTree: readonly MLASTNode[], idFilter?: boolean): (string | undefined)[];
|