@markuplint/parser-utils 4.8.10 → 5.0.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/parser.d.ts CHANGED
@@ -1,11 +1,30 @@
1
1
  import type { Token, ChildToken, QuoteSet, ParseOptions, ParserOptions, Tokenized, ValueType } from './types.js';
2
- import type { EndTagType, MLASTDocument, MLASTParentNode, MLParser, ParserAuthoredElementNameDistinguishing, MLASTElement, MLASTElementCloseTag, MLASTToken, MLASTNodeTreeItem, MLASTTag, MLASTText, MLASTAttr, MLASTChildNode, MLASTSpreadAttr, ElementType, Walker, MLASTHTMLAttr, MLASTPreprocessorSpecificBlockConditionalType } from '@markuplint/ml-ast';
2
+ import type { EndTagType, MLASTDocument, MLASTParentNode, MLParser, ParserAuthoredElementNameDistinguishing, MLASTElement, MLASTElementCloseTag, MLASTToken, MLASTNodeTreeItem, MLASTTag, MLASTText, MLASTAttr, MLASTChildNode, MLASTSpreadAttr, ElementType, Walker, MLASTHTMLAttr, MLASTBlockBehavior } from '@markuplint/ml-ast';
3
3
  import { AttrState } from './enums.js';
4
4
  import { ParserError } from './parser-error.js';
5
+ /**
6
+ * Abstract base class for all markuplint parsers. Provides the core parsing pipeline
7
+ * including tokenization, tree traversal, node flattening, and error handling.
8
+ * Subclasses must implement `nodeize` to convert language-specific AST nodes
9
+ * into the markuplint AST format.
10
+ *
11
+ * @template Node - The language-specific AST node type produced by the tokenizer
12
+ * @template State - An optional parser state type that persists across tokenization
13
+ */
5
14
  export declare abstract class Parser<Node extends {} = {}, State extends unknown = null> implements MLParser {
6
15
  #private;
7
16
  state: State;
17
+ /**
18
+ * Creates a new Parser instance with the given options and initial state.
19
+ *
20
+ * @param options - Configuration options controlling tag handling, whitespace, and quoting behavior
21
+ * @param defaultState - The initial parser state, cloned and restored after each parse call
22
+ */
8
23
  constructor(options?: ParserOptions, defaultState?: State);
24
+ /**
25
+ * The pattern used to distinguish authored (component) element names
26
+ * from native HTML elements, as specified by the parse options.
27
+ */
9
28
  get authoredElementName(): ParserAuthoredElementNameDistinguishing | undefined;
10
29
  /**
11
30
  * Detect value as a true if its attribute is booleanish value and omitted.
@@ -26,55 +45,224 @@ export declare abstract class Parser<Node extends {} = {}, State extends unknown
26
45
  * - `"never"`: Never need
27
46
  */
28
47
  get endTag(): EndTagType;
48
+ /**
49
+ * The current raw source code being parsed, which may have been
50
+ * preprocessed (e.g., ignore blocks masked, front matter removed).
51
+ */
29
52
  get rawCode(): string;
53
+ /**
54
+ * Whether tag names should be compared in a case-sensitive manner.
55
+ * When false (the default), tag name comparisons are case-insensitive (HTML behavior).
56
+ */
30
57
  get tagNameCaseSensitive(): boolean;
58
+ /**
59
+ * Tokenizes the raw source code into language-specific AST nodes.
60
+ * Subclasses should override this method to provide actual tokenization logic.
61
+ *
62
+ * @param options - Parse options controlling offset, depth, and other parse-time settings
63
+ * @returns The tokenized result containing the AST node array and fragment flag
64
+ */
31
65
  tokenize(options?: ParseOptions): Tokenized<Node, State>;
66
+ /**
67
+ * Hook called before parsing begins, allowing subclasses to preprocess
68
+ * the raw source code. The default implementation prepends offset spaces
69
+ * based on the parse options.
70
+ *
71
+ * @param rawCode - The raw source code about to be parsed
72
+ * @param options - Parse options that may specify offset positioning
73
+ * @returns The preprocessed source code to be used for tokenization
74
+ */
32
75
  beforeParse(rawCode: string, options?: ParseOptions): string;
76
+ /**
77
+ * Parses raw source code through the full pipeline: preprocessing, tokenization,
78
+ * traversal, flattening, ignore-block restoration, and post-processing.
79
+ * Returns the complete markuplint AST document.
80
+ *
81
+ * @param rawCode - The raw source code to parse
82
+ * @param options - Parse options controlling offsets, depth, front matter, and authored element names
83
+ * @returns The parsed AST document containing the node list and fragment flag
84
+ */
33
85
  parse(rawCode: string, options?: ParseOptions): MLASTDocument;
86
+ /**
87
+ * Hook called after the main parse pipeline completes, allowing subclasses
88
+ * to perform final transformations on the node list. The default implementation
89
+ * removes any offset spaces that were prepended during preprocessing.
90
+ *
91
+ * @param nodeList - The fully parsed and flattened node list
92
+ * @param options - The parse options used for this parse invocation
93
+ * @returns The post-processed node list
94
+ */
34
95
  afterParse(nodeList: readonly MLASTNodeTreeItem[], options?: ParseOptions): readonly MLASTNodeTreeItem[];
96
+ /**
97
+ * Wraps an arbitrary error into a ParserError with source location information.
98
+ * Extracts line and column numbers from common error formats.
99
+ *
100
+ * @param error - The original error to wrap
101
+ * @returns A ParserError containing the original error's message and location data
102
+ */
35
103
  parseError(error: any): ParserError;
104
+ /**
105
+ * Recursively traverses language-specific AST nodes by calling `nodeize` on each,
106
+ * filtering duplicates, and separating child nodes from ancestor-level siblings.
107
+ *
108
+ * @param originNodes - The language-specific AST nodes to traverse
109
+ * @param parentNode - The parent markuplint AST node, or null for top-level nodes
110
+ * @param depth - The current nesting depth in the tree
111
+ * @returns An object containing `childNodes` at the current depth and `siblings` that belong to ancestor levels
112
+ */
36
113
  traverse(originNodes: readonly Node[], parentNode: (MLASTParentNode | null) | undefined, depth: number): {
37
114
  childNodes: readonly MLASTChildNode[];
38
115
  siblings: readonly MLASTNodeTreeItem[];
39
116
  };
117
+ /**
118
+ * Hook called after traversal completes, used to sort the resulting node tree
119
+ * by source position. Subclasses may override for custom post-traversal logic.
120
+ *
121
+ * @param nodeTree - The unsorted node tree produced by traversal
122
+ * @returns The node tree sorted by source position
123
+ */
40
124
  afterTraverse(nodeTree: readonly MLASTNodeTreeItem[]): readonly MLASTNodeTreeItem[];
125
+ /**
126
+ * Converts a single language-specific AST node into one or more markuplint AST nodes.
127
+ * Subclasses must override this method to provide actual node conversion logic
128
+ * using visitor methods like `visitElement`, `visitText`, `visitComment`, etc.
129
+ *
130
+ * @param originNode - The language-specific AST node to convert
131
+ * @param parentNode - The parent markuplint AST node, or null for top-level nodes
132
+ * @param depth - The current nesting depth in the tree
133
+ * @returns An array of markuplint AST nodes produced from the origin node
134
+ */
41
135
  nodeize(originNode: Node, parentNode: MLASTParentNode | null, depth: number): readonly MLASTNodeTreeItem[];
136
+ /**
137
+ * Post-processes the nodes produced by `nodeize`, separating them into siblings
138
+ * at the current depth and ancestors that belong to a shallower depth level.
139
+ * Doctype nodes at depth 0 are promoted to ancestors.
140
+ *
141
+ * @param siblings - The nodes produced by `nodeize` for a single origin node
142
+ * @param parentNode - The parent markuplint AST node, or null for top-level nodes
143
+ * @param depth - The current nesting depth
144
+ * @returns An object with `siblings` at the current depth and `ancestors` at shallower depths
145
+ */
42
146
  afterNodeize(siblings: readonly MLASTNodeTreeItem[], parentNode: MLASTParentNode | null, depth: number): {
43
147
  siblings: MLASTChildNode[];
44
148
  ancestors: MLASTNodeTreeItem[];
45
149
  };
150
+ /**
151
+ * Flattens a hierarchical node tree into a flat, sorted list by walking
152
+ * the tree depth-first and removing duplicated nodes.
153
+ *
154
+ * @param nodeTree - The hierarchical node tree to flatten
155
+ * @returns A flat array of all nodes in source order
156
+ */
46
157
  flattenNodes(nodeTree: readonly MLASTNodeTreeItem[]): readonly MLASTNodeTreeItem[];
158
+ /**
159
+ * Post-processes the flattened node list by exposing remnant whitespace and
160
+ * invalid nodes between known nodes, converting orphan end tags to bogus markers,
161
+ * concatenating adjacent text nodes, and trimming overlapping text.
162
+ *
163
+ * @param nodeList - The flat node list to post-process
164
+ * @param options - Controls which post-processing steps are applied
165
+ * @returns The cleaned-up flat node list
166
+ */
47
167
  afterFlattenNodes(nodeList: readonly MLASTNodeTreeItem[], options?: {
48
168
  readonly exposeInvalidNode?: boolean;
49
169
  readonly exposeWhiteSpace?: boolean;
50
170
  readonly concatText?: boolean;
51
171
  }): readonly MLASTNodeTreeItem[];
172
+ /**
173
+ * Creates an AST doctype node from a token containing the doctype
174
+ * name, public ID, and system ID.
175
+ *
176
+ * @param token - The child token with doctype-specific properties
177
+ * @returns An array containing the single doctype AST node
178
+ */
52
179
  visitDoctype(token: ChildToken & {
53
180
  readonly name: string;
54
181
  readonly publicId: string;
55
182
  readonly systemId: string;
56
183
  }): readonly MLASTNodeTreeItem[];
184
+ /**
185
+ * Creates an AST comment node from a token. Automatically detects whether
186
+ * the comment is a bogus comment (not starting with `<!--`).
187
+ *
188
+ * @param token - The child token containing the comment's raw text and position
189
+ * @param options - Optional settings to override the bogus detection
190
+ * @returns An array containing the single comment AST node
191
+ */
57
192
  visitComment(token: ChildToken, options?: {
58
193
  readonly isBogus?: boolean;
59
194
  }): readonly MLASTNodeTreeItem[];
195
+ /**
196
+ * Creates AST text node(s) from a token. Optionally re-parses the text content
197
+ * to discover embedded HTML tags within it.
198
+ *
199
+ * @param token - The child token containing the text content and position
200
+ * @param options - Controls whether to search for embedded tags and how to handle invalid ones
201
+ * @returns An array of AST nodes; a single text node or multiple tag/text nodes if tags were found
202
+ */
60
203
  visitText(token: ChildToken, options?: {
61
204
  readonly researchTags?: boolean;
62
205
  readonly invalidTagAsText?: boolean;
63
206
  }): readonly MLASTNodeTreeItem[];
207
+ /**
208
+ * Creates AST element node(s) from a token, including the start tag, optional end tag,
209
+ * and recursively traversed child nodes. Handles ghost elements (empty raw),
210
+ * self-closing tags, and nameless fragments (e.g., JSX `<>`).
211
+ *
212
+ * @param token - The child token with the element's node name; namespace is auto-detected from tag name and parent node
213
+ * @param childNodes - The language-specific child AST nodes to traverse
214
+ * @param options - Controls end tag creation, fragment handling, and property overrides
215
+ * @returns An array of AST nodes including the start tag, optional end tag, and any sibling nodes
216
+ */
64
217
  visitElement(token: ChildToken & {
65
218
  readonly nodeName: string;
66
- readonly namespace: string;
67
219
  }, childNodes?: readonly Node[], options?: {
68
220
  readonly createEndTagToken?: (startTag: MLASTElement) => ChildToken | null;
69
221
  readonly namelessFragment?: boolean;
70
222
  readonly overwriteProps?: Partial<MLASTElement>;
71
223
  }): readonly MLASTNodeTreeItem[];
224
+ /**
225
+ * Creates an AST preprocessor-specific block node (e.g., for template directives
226
+ * like `{#if}`, `{#each}`, or front matter). Recursively traverses child nodes.
227
+ *
228
+ * @param token - The child token with the block's node name and fragment flag
229
+ * @param childNodes - The language-specific child AST nodes to traverse
230
+ * @param blockBehavior - The block behavior if this is a control-flow block (e.g., "if", "each")
231
+ * @param originBlockNode - The original language-specific block node for reference
232
+ * @returns An array of AST nodes including the block node and any sibling nodes
233
+ */
72
234
  visitPsBlock(token: ChildToken & {
73
235
  readonly nodeName: string;
74
236
  readonly isFragment: boolean;
75
- }, childNodes?: readonly Node[], conditionalType?: MLASTPreprocessorSpecificBlockConditionalType, originBlockNode?: Node): readonly MLASTNodeTreeItem[];
237
+ }, childNodes?: readonly Node[], blockBehavior?: MLASTBlockBehavior | null, originBlockNode?: Node): readonly MLASTNodeTreeItem[];
238
+ /**
239
+ * Traverses a list of child nodes under the given parent, appending the resulting
240
+ * child AST nodes to the parent and returning any sibling nodes that belong
241
+ * to ancestor levels. Skips traversal for raw text elements (e.g., `<script>`, `<style>`).
242
+ *
243
+ * @param children - The language-specific child AST nodes to traverse
244
+ * @param parentNode - The parent markuplint AST node to which children will be appended
245
+ * @returns An array of sibling nodes that belong to ancestor depth levels
246
+ */
76
247
  visitChildren(children: readonly Node[], parentNode: MLASTParentNode | null): readonly MLASTNodeTreeItem[];
248
+ /**
249
+ * Attempts to parse a token as a JSX spread attribute (e.g., `{...props}`).
250
+ * Returns null if the token does not match the spread attribute pattern.
251
+ *
252
+ * @param token - The token to inspect for spread attribute syntax
253
+ * @returns A spread attribute AST node, or null if the token is not a spread attribute
254
+ */
77
255
  visitSpreadAttr(token: Token): MLASTSpreadAttr | null;
256
+ /**
257
+ * Parses a token into a fully structured attribute AST node, breaking it down
258
+ * into its constituent parts: spaces, name, equal sign, quotes, and value.
259
+ * Also detects spread attributes. If there is leftover text after the attribute,
260
+ * it is returned in the `__rightText` property for further processing.
261
+ *
262
+ * @param token - The token containing the raw attribute text and position
263
+ * @param options - Controls quoting behavior, value types, and the initial parser state
264
+ * @returns The parsed attribute AST node with an optional `__rightText` for remaining unparsed content
265
+ */
78
266
  visitAttr(token: Token, options?: {
79
267
  readonly quoteSet?: readonly QuoteSet[];
80
268
  readonly noQuoteValueType?: ValueType;
@@ -83,31 +271,121 @@ export declare abstract class Parser<Node extends {} = {}, State extends unknown
83
271
  }): MLASTAttr & {
84
272
  __rightText?: string;
85
273
  };
274
+ /**
275
+ * Re-parses a text token to discover embedded HTML/XML tags within it,
276
+ * splitting the content into a sequence of tag and text AST nodes.
277
+ * Handles self-closing detection, depth tracking, and void element recognition.
278
+ *
279
+ * @param token - The child token containing the code fragment to re-parse
280
+ * @param options - Controls whether nameless fragments (JSX `<>`) are recognized
281
+ * @returns An array of tag and text AST nodes discovered in the code fragment
282
+ */
86
283
  parseCodeFragment(token: ChildToken, options?: {
87
284
  readonly namelessFragment?: boolean;
88
285
  }): (MLASTTag | MLASTText)[];
89
- updateLocation(node: MLASTNodeTreeItem, props: Partial<Pick<MLASTNodeTreeItem, 'startOffset' | 'startLine' | 'startCol' | 'depth'>>): void;
90
286
  /**
91
- * Set new raw code to target node.
287
+ * Updates the position and depth properties of an AST node.
92
288
  *
93
- * Replace the raw code and update the start/end offset/line/column.
289
+ * @param node - The AST node whose location should be updated
290
+ * @param props - The new position and depth values to apply (only provided values are changed)
291
+ */
292
+ updateLocation(node: MLASTNodeTreeItem, props: Partial<Pick<MLASTNodeTreeItem, 'offset' | 'line' | 'col' | 'depth'>>): void;
293
+ /**
294
+ * Set new raw code to target node.
94
295
  *
95
296
  * @param node target node
96
297
  * @param raw new raw code
97
298
  */
98
299
  updateRaw(node: MLASTToken, raw: string): void;
300
+ /**
301
+ * Updates the node name and/or element type of an element or close tag AST node.
302
+ * Useful for renaming elements or changing their classification after initial parsing.
303
+ *
304
+ * @param el - The element or close tag AST node to update
305
+ * @param props - The properties to overwrite on the element
306
+ */
99
307
  updateElement(el: MLASTElement, props: Partial<Pick<MLASTElement, 'nodeName' | 'elementType'>>): void;
100
308
  updateElement(el: MLASTElementCloseTag, props: Partial<Pick<MLASTElementCloseTag, 'nodeName'>>): void;
309
+ /**
310
+ * Updates metadata properties on an HTML attribute AST node, such as marking
311
+ * it as a directive, dynamic value, or setting its potential name/value
312
+ * for preprocessor-specific attribute transformations.
313
+ *
314
+ * @param attr - The HTML attribute AST node to update
315
+ * @param props - The metadata properties to overwrite on the attribute
316
+ */
101
317
  updateAttr(attr: MLASTHTMLAttr, props: Partial<Pick<MLASTHTMLAttr, 'isDynamicValue' | 'isDirective' | 'potentialName' | 'potentialValue' | 'valueType' | 'candidate' | 'isDuplicatable'>>): void;
318
+ /**
319
+ * Determines the element type (e.g., "html", "web-component", "authored") for a
320
+ * given tag name, using the parser's authored element name distinguishing pattern.
321
+ *
322
+ * @param nodeName - The tag name to classify
323
+ * @param defaultPattern - A fallback pattern if no authored element name pattern is set
324
+ * @returns The element type classification
325
+ */
102
326
  detectElementType(nodeName: string, defaultPattern?: ParserAuthoredElementNameDistinguishing): ElementType;
327
+ /**
328
+ * Creates a new MLASTToken with a generated UUID.
329
+ * Accepts either a Token object or a raw string with explicit start coordinates.
330
+ *
331
+ * @param token - A Token object or raw string to create the AST token from
332
+ * @param offset - The zero-based byte offset where the token starts (required when token is a string)
333
+ * @param line - The one-based line number where the token starts (required when token is a string)
334
+ * @param col - The one-based column number where the token starts (required when token is a string)
335
+ * @returns An AST token with UUID, start position, and raw content
336
+ */
103
337
  createToken(token: Token): MLASTToken;
104
- createToken(token: string, startOffset: number, startLine: number, startCol: number): MLASTToken;
338
+ createToken(token: string, offset: number, line: number, col: number): MLASTToken;
339
+ /**
340
+ * Extracts a Token from the current raw code at the given byte offset range,
341
+ * computing the line and column from the source position.
342
+ *
343
+ * @param start - The starting byte offset (inclusive) in the raw code
344
+ * @param end - The ending byte offset (exclusive) in the raw code; if omitted, slices to the end
345
+ * @returns A Token containing the sliced raw content and its start position
346
+ */
105
347
  sliceFragment(start: number, end?: number): Token;
348
+ /**
349
+ * Calculates start and end byte offsets from line/column positions
350
+ * within the current raw source code.
351
+ *
352
+ * @param startLine - The starting line number (1-based)
353
+ * @param startCol - The starting column number (1-based)
354
+ * @param endLine - The ending line number (1-based)
355
+ * @param endCol - The ending column number (1-based)
356
+ * @returns The computed start and end byte offsets
357
+ */
106
358
  getOffsetsFromCode(startLine: number, startCol: number, endLine: number, endCol: number): {
107
359
  offset: number;
108
360
  endOffset: number;
109
361
  };
362
+ /**
363
+ * Walks through a node list depth-first, invoking the walker callback for each node.
364
+ * The walker receives the current node, the sequentially previous node, and the depth.
365
+ * Automatically recurses into child nodes of parent elements and preprocessor blocks.
366
+ *
367
+ * @template Node - The specific AST node type being walked
368
+ * @param nodeList - The list of nodes to walk
369
+ * @param walker - The callback invoked for each node during the walk
370
+ * @param depth - The current depth (starts at 0 for top-level calls)
371
+ */
110
372
  walk<Node extends MLASTNodeTreeItem>(nodeList: readonly Node[], walker: Walker<Node>, depth?: number): void;
373
+ /**
374
+ * Appends child nodes to a parent node, updating parent references and
375
+ * maintaining sorted order by source position. If a child already exists
376
+ * in the parent (by UUID), it is replaced in place rather than duplicated.
377
+ *
378
+ * @param parentNode - The parent node to append children to, or null (no-op)
379
+ * @param childNodes - The child nodes to append
380
+ */
111
381
  appendChild(parentNode: MLASTParentNode | null, ...childNodes: readonly MLASTChildNode[]): void;
382
+ /**
383
+ * Replaces a child node within a parent's child list with one or more replacement nodes.
384
+ * If the old child is not found in the parent, the operation is a no-op.
385
+ *
386
+ * @param parentNode - The parent node containing the child to replace
387
+ * @param oldChildNode - The existing child node to be replaced
388
+ * @param replacementChildNodes - The replacement nodes to insert at the old child's position
389
+ */
112
390
  replaceChild(parentNode: MLASTParentNode, oldChildNode: MLASTChildNode, ...replacementChildNodes: readonly MLASTChildNode[]): void;
113
391
  }