@markuplint/parser-utils 4.8.10 → 5.0.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,655 @@
1
+ # Parser Class Reference
2
+
3
+ The `Parser<Node, State>` abstract class is the foundation of every markuplint parser. It defines the complete parsing pipeline — from raw source code to a flat `MLASTNodeTreeItem[]` — and provides a rich set of visitor and utility methods that subclasses override to support specific markup languages.
4
+
5
+ ## Design Pattern
6
+
7
+ The Parser uses the **Template Method** pattern. The `parse()` method orchestrates an 11-step pipeline, calling protected hook methods at each stage. Subclasses override specific hooks (primarily `tokenize` and `nodeize`) to inject language-specific behavior while inheriting the common pipeline logic.
8
+
9
+ ```mermaid
10
+ classDiagram
11
+ class Parser~Node State~ {
12
+ <<abstract>>
13
+ +parse(rawCode, options) MLASTDocument
14
+ +tokenize(options) Tokenized
15
+ +nodeize(originNode, parentNode, depth) MLASTNodeTreeItem[]
16
+ +beforeParse(rawCode, options) string
17
+ +afterParse(nodeList, options) MLASTNodeTreeItem[]
18
+ +visitElement(token, childNodes, options) MLASTNodeTreeItem[]
19
+ +visitText(token, options) MLASTNodeTreeItem[]
20
+ +visitComment(token, options) MLASTNodeTreeItem[]
21
+ +visitAttr(token, options) MLASTAttr
22
+ }
23
+
24
+ class HtmlParser {
25
+ +tokenize() Tokenized
26
+ +nodeize() MLASTNodeTreeItem[]
27
+ +beforeParse() string
28
+ +afterParse() MLASTNodeTreeItem[]
29
+ }
30
+
31
+ class JSXParser {
32
+ +tokenize() Tokenized
33
+ +nodeize() MLASTNodeTreeItem[]
34
+ +afterTraverse() MLASTNodeTreeItem[]
35
+ +visitAttr() MLASTAttr
36
+ }
37
+
38
+ class VueParser {
39
+ +tokenize() Tokenized
40
+ +nodeize() MLASTNodeTreeItem[]
41
+ +flattenNodes() MLASTNodeTreeItem[]
42
+ +visitAttr() MLASTAttr
43
+ }
44
+
45
+ class SvelteParser {
46
+ +tokenize() Tokenized
47
+ +nodeize() MLASTNodeTreeItem[]
48
+ +visitText() MLASTNodeTreeItem[]
49
+ +visitPsBlock() MLASTNodeTreeItem[]
50
+ }
51
+
52
+ Parser <|-- HtmlParser
53
+ Parser <|-- JSXParser
54
+ Parser <|-- VueParser
55
+ Parser <|-- SvelteParser
56
+ ```
57
+
58
+ ## Type Parameters
59
+
60
+ | Parameter | Constraint | Default | Description |
61
+ | --------- | ----------------- | ------- | ---------------------------------------------------------------------------------------------------------------------------------------- |
62
+ | `Node` | `extends {}` | `{}` | The language-specific AST node type produced by the tokenizer (e.g., parse5's `Node`, Svelte's `SvelteNode`) |
63
+ | `State` | `extends unknown` | `null` | An optional parser state type that persists across a single `parse()` call. Cloned from `defaultState` at the start and reset at the end |
64
+
65
+ ## Constructor / ParserOptions
66
+
67
+ ```ts
68
+ constructor(options?: ParserOptions, defaultState?: State)
69
+ ```
70
+
71
+ The constructor accepts a `ParserOptions` object and an optional default state value:
72
+
73
+ | Option | Type | Default | Description |
74
+ | ---------------------- | ---------------------- | ------------------------------- | ------------------------------------------------------------------------------------------ |
75
+ | `booleanish` | `boolean` | `false` | Treat omitted attribute values as `true` (e.g., JSX `<Component aria-hidden />`) |
76
+ | `endTagType` | `EndTagType` | `'omittable'` | `'xml'`: end tag required or self-close; `'omittable'`: may omit; `'never'`: never need |
77
+ | `ignoreTags` | `readonly IgnoreTag[]` | `[]` | Patterns for code blocks to mask before parsing (e.g., template expressions) |
78
+ | `maskChar` | `string` | `'\uE000'` (MASK_CHAR) | Character used to replace masked code blocks |
79
+ | `tagNameCaseSensitive` | `boolean` | `false` | Whether tag name comparisons are case-sensitive (e.g., JSX, Svelte) |
80
+ | `selfCloseType` | `SelfCloseType` | `'html'` | `'html'`: only void elements self-close; `'xml'`: solidus determines; `'html+xml'`: either |
81
+ | `spaceChars` | `readonly string[]` | `['\t', '\n', '\f', '\r', ' ']` | Characters treated as whitespace in tag parsing |
82
+ | `rawTextElements` | `readonly string[]` | `['style', 'script']` | Elements whose children are not traversed (raw text content) |
83
+
84
+ ## Parse Pipeline
85
+
86
+ The `parse()` method drives the full pipeline:
87
+
88
+ ```mermaid
89
+ flowchart TD
90
+ A["1. beforeParse()"] --> B["2. ignoreFrontMatter()"]
91
+ B --> C["3. ignoreBlock()"]
92
+ C --> D["4. tokenize()"]
93
+ D --> E["5. traverse() → nodeize()"]
94
+ E --> F["6. afterTraverse()"]
95
+ F --> G["7. flattenNodes()"]
96
+ G --> H["8. afterFlattenNodes()"]
97
+ H --> I["9. restoreNode()"]
98
+ I --> J["10. afterParse()"]
99
+ J --> K["11. Return MLASTDocument"]
100
+
101
+ style D fill:#e1f5fe
102
+ style E fill:#e1f5fe
103
+ ```
104
+
105
+ Steps highlighted in blue are the primary override points.
106
+
107
+ ### Step 1: beforeParse()
108
+
109
+ ```ts
110
+ beforeParse(rawCode: string, options?: ParseOptions): string
111
+ ```
112
+
113
+ Prepends offset spaces based on `ParseOptions` (`offsetOffset`, `offsetLine`, `offsetColumn`). This adjusts the coordinate system for embedded code fragments (e.g., a `<template>` block inside a `.vue` file).
114
+
115
+ ### Step 2: Front Matter Removal
116
+
117
+ If `options.ignoreFrontMatter` is true, `ignoreFrontMatter()` detects YAML front matter (`---\n...\n---\n`) and replaces it with spaces while preserving line breaks. The front matter is restored as a `#ps:front-matter` psblock node at the end of the pipeline.
118
+
119
+ ### Step 3: Ignore Block Masking
120
+
121
+ `ignoreBlock()` scans the source for patterns defined in `ignoreTags` and replaces matching blocks with mask characters wrapped in `<!...>` bogus comment syntax. This prevents template expressions (e.g., `{{ expr }}`, `{#if}`) from interfering with HTML parsing.
122
+
123
+ ### Step 4: tokenize()
124
+
125
+ ```ts
126
+ tokenize(options?: ParseOptions): Tokenized<Node, State>
127
+ ```
128
+
129
+ **Primary override point.** The default implementation returns an empty array. Each parser overrides this to invoke its language-specific tokenizer (parse5, vue-eslint-parser, svelte/compiler, etc.) and return the resulting AST.
130
+
131
+ ### Step 5: traverse() → nodeize()
132
+
133
+ ```ts
134
+ traverse(originNodes: readonly Node[], parentNode: MLASTParentNode | null, depth: number)
135
+ nodeize(originNode: Node, parentNode: MLASTParentNode | null, depth: number): readonly MLASTNodeTreeItem[]
136
+ ```
137
+
138
+ `traverse()` iterates over tokenized nodes and calls `nodeize()` for each one. **`nodeize()` is the second primary override point** — subclasses convert language-specific AST nodes into markuplint AST nodes using visitor methods.
139
+
140
+ After `nodeize()`, `afterNodeize()` separates the resulting nodes into siblings at the current depth and ancestors at shallower depths.
141
+
142
+ ### Step 6: afterTraverse()
143
+
144
+ ```ts
145
+ afterTraverse(nodeTree: readonly MLASTNodeTreeItem[]): readonly MLASTNodeTreeItem[]
146
+ ```
147
+
148
+ Sorts the node tree by source position. Subclasses may override for post-traversal restructuring (e.g., JSX remaps parentId references for expression containers).
149
+
150
+ ### Step 7: flattenNodes()
151
+
152
+ ```ts
153
+ flattenNodes(nodeTree: readonly MLASTNodeTreeItem[]): readonly MLASTNodeTreeItem[]
154
+ ```
155
+
156
+ Walks the hierarchical node tree depth-first and produces a flat, sorted list. Removes duplicated nodes.
157
+
158
+ ### Step 8: afterFlattenNodes()
159
+
160
+ ```ts
161
+ afterFlattenNodes(
162
+ nodeList: readonly MLASTNodeTreeItem[],
163
+ options?: {
164
+ readonly exposeInvalidNode?: boolean; // default: true
165
+ readonly exposeWhiteSpace?: boolean; // default: true
166
+ readonly concatText?: boolean; // default: true
167
+ }
168
+ ): readonly MLASTNodeTreeItem[]
169
+ ```
170
+
171
+ Performs four cleanup passes:
172
+
173
+ 1. **Expose remnant nodes** — discovers whitespace and invalid markup between known nodes
174
+ 2. **Orphan end tags → bogus** — converts unmatched end tags to `invalid` nodes
175
+ 3. **Concatenate text** — merges adjacent `#text` nodes at the same offset
176
+ 4. **Trim text** — trims overlapping text node boundaries
177
+
178
+ ### Step 9: restoreNode()
179
+
180
+ `restoreNode()` walks the flat node list and replaces mask characters with the original code. Each restored block becomes a `#ps:<type>` psblock node. Masked content inside attribute values is also restored and marked as `isDynamicValue`.
181
+
182
+ ### Step 10: afterParse()
183
+
184
+ ```ts
185
+ afterParse(nodeList: readonly MLASTNodeTreeItem[], options?: ParseOptions): readonly MLASTNodeTreeItem[]
186
+ ```
187
+
188
+ Removes the offset spaces prepended in step 1. Subclasses may add further post-processing.
189
+
190
+ ### Step 11: Return
191
+
192
+ Returns an `MLASTDocument` containing `{ raw, nodeList, isFragment }`.
193
+
194
+ ## Visitor Methods
195
+
196
+ ### visitElement()
197
+
198
+ ```ts
199
+ visitElement(
200
+ token: ChildToken & { nodeName: string; namespace: string },
201
+ childNodes?: readonly Node[],
202
+ options?: {
203
+ createEndTagToken?: (startTag: MLASTElement) => ChildToken | null;
204
+ namelessFragment?: boolean;
205
+ overwriteProps?: Partial<MLASTElement>;
206
+ }
207
+ ): readonly MLASTNodeTreeItem[]
208
+ ```
209
+
210
+ Creates an element start tag node. Handles:
211
+
212
+ - **Ghost elements** — if `token.raw === ''`, creates an `isGhost: true` element (used for omitted tags like implicit `<head>`, `<body>` in HTML)
213
+ - **Self-closing detection** — based on `selfCloseType` setting and void element status
214
+ - **End tag pairing** — if `createEndTagToken` returns a token, creates and pairs the end tag
215
+ - **Nameless fragments** — JSX `<>...</>` fragments with empty tag name
216
+
217
+ ### visitText()
218
+
219
+ ```ts
220
+ visitText(
221
+ token: ChildToken,
222
+ options?: {
223
+ researchTags?: boolean;
224
+ invalidTagAsText?: boolean;
225
+ }
226
+ ): readonly MLASTNodeTreeItem[]
227
+ ```
228
+
229
+ Creates a text node. When `researchTags` is true, re-parses the text via `parseCodeFragment()` to discover embedded HTML tags. If `invalidTagAsText` is also true, any discovered start tags cause the entire content to be treated as a single text node.
230
+
231
+ ### visitComment()
232
+
233
+ ```ts
234
+ visitComment(
235
+ token: ChildToken,
236
+ options?: { isBogus?: boolean }
237
+ ): readonly MLASTNodeTreeItem[]
238
+ ```
239
+
240
+ Creates a comment node. Automatically detects bogus comments (those not starting with `<!--`). The `isBogus` option can override this detection.
241
+
242
+ ### visitDoctype()
243
+
244
+ ```ts
245
+ visitDoctype(
246
+ token: ChildToken & { name: string; publicId: string; systemId: string }
247
+ ): readonly MLASTNodeTreeItem[]
248
+ ```
249
+
250
+ Creates a doctype node from a token containing the doctype name, public ID, and system ID.
251
+
252
+ ### visitPsBlock()
253
+
254
+ ```ts
255
+ visitPsBlock(
256
+ token: ChildToken & { nodeName: string; isFragment: boolean },
257
+ childNodes?: readonly Node[],
258
+ blockBehavior?: MLASTBlockBehavior | null,
259
+ originBlockNode?: Node
260
+ ): readonly MLASTNodeTreeItem[]
261
+ ```
262
+
263
+ Creates a preprocessor-specific block node. The `nodeName` is automatically prefixed with `#ps:` (e.g., `#ps:if`, `#ps:each`, `#ps:front-matter`). The `blockBehavior` parameter describes the control-flow semantics (type and expression) of the block. Recursively traverses child nodes via `visitChildren()`.
264
+
265
+ ### visitAttr()
266
+
267
+ ```ts
268
+ visitAttr(
269
+ token: Token,
270
+ options?: {
271
+ quoteSet?: readonly QuoteSet[];
272
+ noQuoteValueType?: ValueType;
273
+ endOfUnquotedValueChars?: readonly string[];
274
+ startState?: AttrState;
275
+ }
276
+ ): MLASTAttr & { __rightText?: string }
277
+ ```
278
+
279
+ Parses a raw attribute string into a fully decomposed `MLASTAttr` with individual tokens for spaces, name, equal sign, quotes, and value. Uses the `AttrState` state machine internally via `attrTokenizer()`.
280
+
281
+ If the raw string contains multiple attributes, only the first is parsed and the remainder is returned in `__rightText` for iterative processing.
282
+
283
+ Also attempts to detect spread attributes via `visitSpreadAttr()`.
284
+
285
+ ### visitSpreadAttr()
286
+
287
+ ```ts
288
+ visitSpreadAttr(token: Token): MLASTSpreadAttr | null
289
+ ```
290
+
291
+ Detects JSX spread attributes matching the pattern `{...expr}`. Returns null if the token doesn't match. HTML parser overrides this to always return null.
292
+
293
+ ### visitChildren()
294
+
295
+ ```ts
296
+ visitChildren(
297
+ children: readonly Node[],
298
+ parentNode: MLASTParentNode | null
299
+ ): readonly MLASTNodeTreeItem[]
300
+ ```
301
+
302
+ Traverses child nodes under a parent. Skips traversal for `rawTextElements` (e.g., `<script>`, `<style>`). Returns sibling nodes that belong to ancestor depth levels.
303
+
304
+ ## State Machines
305
+
306
+ ### TagState
307
+
308
+ Used during tag parsing in `#parseTag()`:
309
+
310
+ ```mermaid
311
+ stateDiagram-v2
312
+ [*] --> BeforeOpenTag
313
+ BeforeOpenTag --> FirstCharOfTagName : "<"
314
+ FirstCharOfTagName --> TagName : /[a-z]/i
315
+ FirstCharOfTagName --> FirstCharOfTagName : "/" (close tag)
316
+ FirstCharOfTagName --> AfterOpenTag : ">" (nameless)
317
+ TagName --> Attrs : whitespace
318
+ TagName --> AfterAttrs : "/"
319
+ TagName --> AfterOpenTag : ">"
320
+ Attrs --> AfterAttrs : "/" or ">"
321
+ AfterAttrs --> AfterOpenTag : ">"
322
+ AfterOpenTag --> [*]
323
+ ```
324
+
325
+ ### AttrState
326
+
327
+ Used during attribute parsing in `attrTokenizer()`:
328
+
329
+ ```mermaid
330
+ stateDiagram-v2
331
+ [*] --> BeforeName
332
+ BeforeName --> Name : non-space, non-">"
333
+ BeforeName --> AfterValue : ">" or "/"
334
+ Name --> Equal : whitespace
335
+ Name --> BeforeValue : "="
336
+ Name --> AfterValue : ">" or "/"
337
+ Equal --> BeforeValue : "="
338
+ Equal --> AfterValue : other
339
+ BeforeValue --> Value : quote or char
340
+ Value --> AfterValue : end quote or unquoted end
341
+ AfterValue --> [*]
342
+ ```
343
+
344
+ ## Token Creation Utilities
345
+
346
+ ### createToken()
347
+
348
+ ```ts
349
+ createToken(token: Token): MLASTToken;
350
+ createToken(token: string, offset: number, line: number, col: number): MLASTToken;
351
+ ```
352
+
353
+ Creates a new `MLASTToken` with a generated UUID (8 chars). Accepts either a `Token` object or a raw string with explicit coordinates.
354
+
355
+ ### sliceFragment()
356
+
357
+ ```ts
358
+ sliceFragment(start: number, end?: number): Token
359
+ ```
360
+
361
+ Extracts a `Token` from the current `rawCode` at the given byte offset range, computing line and column from the source position.
362
+
363
+ ### getOffsetsFromCode()
364
+
365
+ ```ts
366
+ getOffsetsFromCode(
367
+ startLine: number, startCol: number,
368
+ endLine: number, endCol: number
369
+ ): { offset: number; endOffset: number }
370
+ ```
371
+
372
+ Converts line/column positions to byte offsets within the current raw source code.
373
+
374
+ ## Tree Manipulation
375
+
376
+ ### appendChild()
377
+
378
+ ```ts
379
+ appendChild(parentNode: MLASTParentNode | null, ...childNodes: readonly MLASTChildNode[]): void
380
+ ```
381
+
382
+ Appends child nodes to a parent, maintaining sorted order by source position. If a child already exists (by UUID), it is replaced in place.
383
+
384
+ ### replaceChild()
385
+
386
+ ```ts
387
+ replaceChild(
388
+ parentNode: MLASTParentNode,
389
+ oldChildNode: MLASTChildNode,
390
+ ...replacementChildNodes: readonly MLASTChildNode[]
391
+ ): void
392
+ ```
393
+
394
+ Replaces a child node within a parent's child list with one or more replacement nodes.
395
+
396
+ ### walk()
397
+
398
+ ```ts
399
+ walk<Node extends MLASTNodeTreeItem>(
400
+ nodeList: readonly Node[],
401
+ walker: Walker<Node>,
402
+ depth?: number
403
+ ): void
404
+ ```
405
+
406
+ Walks a node list depth-first, invoking the walker callback for each node. The walker receives the current node, the sequentially previous node, and the depth. Automatically recurses into child nodes.
407
+
408
+ ## Update Methods
409
+
410
+ ### updateLocation()
411
+
412
+ ```ts
413
+ updateLocation(
414
+ node: MLASTNodeTreeItem,
415
+ props: Partial<Pick<MLASTNodeTreeItem, 'offset' | 'line' | 'col' | 'depth'>>
416
+ ): void
417
+ ```
418
+
419
+ Updates position and depth properties of an AST node.
420
+
421
+ ### updateRaw()
422
+
423
+ ```ts
424
+ updateRaw(node: MLASTToken, raw: string): void
425
+ ```
426
+
427
+ Replaces the raw code of a node and updates all positional properties accordingly.
428
+
429
+ ### updateElement()
430
+
431
+ ```ts
432
+ updateElement(el: MLASTElement, props: Partial<Pick<MLASTElement, 'nodeName' | 'elementType'>>): void
433
+ updateElement(el: MLASTElementCloseTag, props: Partial<Pick<MLASTElementCloseTag, 'nodeName'>>): void
434
+ ```
435
+
436
+ Updates the node name and/or element type of an element or close tag node.
437
+
438
+ ### updateAttr()
439
+
440
+ ```ts
441
+ updateAttr(
442
+ attr: MLASTHTMLAttr,
443
+ props: Partial<Pick<MLASTHTMLAttr,
444
+ 'isDynamicValue' | 'isDirective' | 'potentialName' | 'potentialValue' |
445
+ 'valueType' | 'candidate' | 'isDuplicatable'
446
+ >>
447
+ ): void
448
+ ```
449
+
450
+ Updates metadata properties on an attribute node, such as marking it as a directive or dynamic value.
451
+
452
+ ## Ignore Block System
453
+
454
+ The ignore block system masks template expressions and preprocessor directives before HTML parsing, then restores them afterward.
455
+
456
+ ### Lifecycle
457
+
458
+ 1. **Define** — `IgnoreTag` patterns in `ParserOptions.ignoreTags`:
459
+
460
+ ```ts
461
+ { type: 'mustache', start: '{{', end: '}}' }
462
+ { type: 'Style', start: '<style', end: '</style>' }
463
+ ```
464
+
465
+ 2. **Mask** — `ignoreBlock()` replaces matches with mask characters inside bogus comment syntax (`<!...>`), preserving line breaks for position tracking
466
+
467
+ 3. **Parse** — the masked code is safe for HTML tokenization
468
+
469
+ 4. **Restore** — `restoreNode()` walks the flat node list and replaces masked regions with `#ps:<type>` psblock nodes. Masked content in attribute values is restored and marked `isDynamicValue: true`
470
+
471
+ ### IgnoreTag Definition
472
+
473
+ ```ts
474
+ type IgnoreTag = {
475
+ readonly type: string; // Name used for #ps: prefix
476
+ readonly start: RegExp | string; // Start pattern
477
+ readonly end: RegExp | string; // End pattern
478
+ };
479
+ ```
480
+
481
+ ## Element Type Detection
482
+
483
+ ```ts
484
+ detectElementType(nodeName: string, defaultPattern?: ParserAuthoredElementNameDistinguishing): ElementType
485
+ ```
486
+
487
+ Classifies elements into three types:
488
+
489
+ | Type | Description | Example |
490
+ | ----------------- | ---------------------------------------------- | --------------------------- |
491
+ | `'html'` | Standard HTML element | `div`, `span`, `input` |
492
+ | `'web-component'` | Custom element (contains hyphen, per spec) | `my-component`, `x-button` |
493
+ | `'authored'` | Framework component (matches authored pattern) | `MyComponent`, `App.Header` |
494
+
495
+ The `authoredElementName` pattern is set from `ParseOptions` and can be a string, RegExp, function, or array of these. Each parser provides a framework-specific default pattern (e.g., `/^[A-Z]/` for JSX/Svelte, PascalCase + built-in list for Vue).
496
+
497
+ ## Accessor Properties
498
+
499
+ | Property | Type | Description |
500
+ | ---------------------- | ------------------------------------------------------ | -------------------------------------------------------------- |
501
+ | `rawCode` | `string` | The current raw source code being parsed (may be preprocessed) |
502
+ | `booleanish` | `boolean` | Whether omitted attribute values are treated as `true` |
503
+ | `endTag` | `EndTagType` | The end tag handling strategy |
504
+ | `tagNameCaseSensitive` | `boolean` | Whether tag name comparisons are case-sensitive |
505
+ | `authoredElementName` | `ParserAuthoredElementNameDistinguishing \| undefined` | The pattern for distinguishing authored elements |
506
+ | `state` | `State` | The mutable parser state (reset after each `parse()` call) |
507
+
508
+ ## Implementing a Parser
509
+
510
+ ### Basic Structure
511
+
512
+ ```ts
513
+ import { Parser } from '@markuplint/parser-utils';
514
+ import type { ParserOptions, ParseOptions, Tokenized, ChildToken } from '@markuplint/parser-utils';
515
+ import type { MLASTParentNode, MLASTNodeTreeItem } from '@markuplint/ml-ast';
516
+
517
+ // Your language-specific AST node type
518
+ type MyNode = {
519
+ /* ... */
520
+ };
521
+
522
+ class MyParser extends Parser<MyNode> {
523
+ constructor() {
524
+ super({
525
+ endTagType: 'xml',
526
+ tagNameCaseSensitive: true,
527
+ // ... other options
528
+ });
529
+ }
530
+
531
+ tokenize(options?: ParseOptions): Tokenized<MyNode> {
532
+ // Parse this.rawCode with your language's parser
533
+ const ast = myLanguageParser(this.rawCode);
534
+ return { ast: ast.children, isFragment: true };
535
+ }
536
+
537
+ nodeize(originNode: MyNode, parentNode: MLASTParentNode | null, depth: number): readonly MLASTNodeTreeItem[] {
538
+ // Convert each language-specific node to markuplint AST nodes
539
+ // using visitor methods
540
+ switch (originNode.type) {
541
+ case 'element':
542
+ return this.visitElement(/* ... */);
543
+ case 'text':
544
+ return this.visitText(/* ... */);
545
+ case 'comment':
546
+ return this.visitComment(/* ... */);
547
+ default:
548
+ return [];
549
+ }
550
+ }
551
+ }
552
+ ```
553
+
554
+ ### Override Pattern Reference
555
+
556
+ | Method | super call | Pattern | Reason |
557
+ | --------------------- | -------------- | -------------------- | ------------------------------------------------------------------------------ |
558
+ | `tokenize()` | **Not needed** | Full replacement | Default returns empty array. Each parser provides its own tokenizer |
559
+ | `nodeize()` | **Not needed** | Full replacement | Default returns empty array. Each parser provides its own node conversion |
560
+ | `beforeParse()` | **Required** | super-first | `super.beforeParse()` handles offset space prepending. Add processing after |
561
+ | `afterParse()` | **Required** | super-first | `super.afterParse()` handles offset space removal. Add processing after |
562
+ | `afterTraverse()` | Recommended | super-first | `super` sorts by position. JSX adds parentId remapping after |
563
+ | `afterFlattenNodes()` | Recommended | wrapper | Pass options to `super` to control cleanup steps |
564
+ | `flattenNodes()` | Recommended | super-first | Vue calls super then injects template comments |
565
+ | `visitText()` | Recommended | wrapper | Pass options to `super`. Svelte post-processes script→psblock |
566
+ | `visitComment()` | Recommended | super-first | JSX overrides `isBogus` to `false` after super |
567
+ | `visitPsBlock()` | Recommended | wrapper + validation | Svelte validates return count after super |
568
+ | `visitChildren()` | Recommended | wrapper + validation | Svelte validates no siblings after super |
569
+ | `visitAttr()` | **Required** | super-first | `super.visitAttr()` performs token decomposition. Add directive handling after |
570
+ | `visitSpreadAttr()` | Not needed | Full replacement | HTML overrides to return `null` (no spread support) |
571
+ | `detectElementType()` | **Required** | wrapper | Pass framework-specific default pattern to `super` |
572
+ | `parseError()` | Recommended | conditional chain | Handle framework-specific errors first, fallback to `super` |
573
+ | `parse()` | Recommended | wrapper | Svelte modifies options then delegates to super |
574
+
575
+ ### Pattern 1: Full Replacement (tokenize, nodeize)
576
+
577
+ No `super` call needed — the base implementation returns an empty array.
578
+
579
+ ```ts
580
+ // From HtmlParser
581
+ tokenize(): Tokenized<Node, State> {
582
+ const doc = parse5.parse(this.rawCode);
583
+ return {
584
+ ast: doc.childNodes,
585
+ isFragment: false,
586
+ };
587
+ }
588
+ ```
589
+
590
+ ### Pattern 2: super-first + Post-processing (beforeParse, afterParse, visitAttr)
591
+
592
+ Call `super` first, then add processing.
593
+
594
+ ```ts
595
+ // From HtmlParser
596
+ beforeParse(rawCode: string, options?: ParseOptions) {
597
+ const code = super.beforeParse(rawCode, options);
598
+ // Additional preprocessing...
599
+ return code;
600
+ }
601
+
602
+ // From VueParser
603
+ visitAttr(token: Token) {
604
+ const attr = super.visitAttr(token);
605
+ // Resolve Vue directive shorthands
606
+ if (attr.type === 'attr' && attr.name.raw.startsWith(':')) {
607
+ this.updateAttr(attr, {
608
+ potentialName: `v-bind:${attr.name.raw.slice(1)}`,
609
+ isDirective: true,
610
+ isDynamicValue: true,
611
+ });
612
+ }
613
+ return attr;
614
+ }
615
+ ```
616
+
617
+ ### Pattern 3: wrapper + Options Delegation (afterFlattenNodes, visitText)
618
+
619
+ Pass control options to `super`.
620
+
621
+ ```ts
622
+ // From JSXParser
623
+ afterFlattenNodes(nodeList: readonly MLASTNodeTreeItem[]) {
624
+ return super.afterFlattenNodes(nodeList, {
625
+ exposeWhiteSpace: false,
626
+ exposeInvalidNode: false,
627
+ });
628
+ }
629
+
630
+ // From HtmlParser
631
+ visitText(token: ChildToken) {
632
+ return super.visitText(token, {
633
+ researchTags: true,
634
+ invalidTagAsText: true,
635
+ });
636
+ }
637
+ ```
638
+
639
+ ### Pattern 4: Conditional Chain (parseError)
640
+
641
+ Handle known error formats first, delegate unknown errors to `super`.
642
+
643
+ ```ts
644
+ // From JSXParser
645
+ parseError(error: any) {
646
+ if (error.lineNumber != null && error.column != null) {
647
+ return new ParserError(error.message, {
648
+ line: error.lineNumber,
649
+ col: error.column,
650
+ raw: this.rawCode,
651
+ });
652
+ }
653
+ return super.parseError(error);
654
+ }
655
+ ```