@markuplint/parser-utils 4.8.9 → 4.8.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.ja.md +208 -0
- package/ARCHITECTURE.md +251 -0
- package/CHANGELOG.md +5 -1
- package/README.md +6 -0
- package/SKILL.md +126 -0
- package/docs/maintenance.ja.md +176 -0
- package/docs/maintenance.md +176 -0
- package/docs/parser-class.ja.md +655 -0
- package/docs/parser-class.md +655 -0
- package/lib/debugger.d.ts +25 -0
- package/lib/debugger.js +25 -0
- package/lib/enums.d.ts +10 -0
- package/lib/enums.js +10 -0
- package/lib/get-namespace.d.ts +2 -0
- package/lib/get-namespace.js +29 -0
- package/lib/idl-attributes.d.ts +9 -0
- package/lib/idl-attributes.js +9 -0
- package/lib/parser-error.d.ts +16 -0
- package/lib/parser-error.js +12 -0
- package/lib/parser.d.ts +282 -0
- package/lib/parser.js +265 -3
- package/lib/script-parser.d.ts +21 -0
- package/lib/script-parser.js +17 -0
- package/lib/types.d.ts +57 -0
- package/package.json +10 -10
package/lib/parser.js
CHANGED
|
@@ -23,7 +23,22 @@ import { ignoreFrontMatter } from './ignore-front-matter.js';
|
|
|
23
23
|
import { ParserError } from './parser-error.js';
|
|
24
24
|
import { sortNodes } from './sort-nodes.js';
|
|
25
25
|
const timer = new PerformanceTimer();
|
|
26
|
+
/**
|
|
27
|
+
* Abstract base class for all markuplint parsers. Provides the core parsing pipeline
|
|
28
|
+
* including tokenization, tree traversal, node flattening, and error handling.
|
|
29
|
+
* Subclasses must implement `nodeize` to convert language-specific AST nodes
|
|
30
|
+
* into the markuplint AST format.
|
|
31
|
+
*
|
|
32
|
+
* @template Node - The language-specific AST node type produced by the tokenizer
|
|
33
|
+
* @template State - An optional parser state type that persists across tokenization
|
|
34
|
+
*/
|
|
26
35
|
export class Parser {
|
|
36
|
+
/**
|
|
37
|
+
* Creates a new Parser instance with the given options and initial state.
|
|
38
|
+
*
|
|
39
|
+
* @param options - Configuration options controlling tag handling, whitespace, and quoting behavior
|
|
40
|
+
* @param defaultState - The initial parser state, cloned and restored after each parse call
|
|
41
|
+
*/
|
|
27
42
|
constructor(options, defaultState) {
|
|
28
43
|
_Parser_instances.add(this);
|
|
29
44
|
_Parser_booleanish.set(this, false);
|
|
@@ -51,6 +66,10 @@ export class Parser {
|
|
|
51
66
|
__classPrivateFieldSet(this, _Parser_defaultState, defaultState ?? null, "f");
|
|
52
67
|
this.state = structuredClone(__classPrivateFieldGet(this, _Parser_defaultState, "f"));
|
|
53
68
|
}
|
|
69
|
+
/**
|
|
70
|
+
* The pattern used to distinguish authored (component) element names
|
|
71
|
+
* from native HTML elements, as specified by the parse options.
|
|
72
|
+
*/
|
|
54
73
|
get authoredElementName() {
|
|
55
74
|
return __classPrivateFieldGet(this, _Parser_authoredElementName, "f");
|
|
56
75
|
}
|
|
@@ -77,22 +96,55 @@ export class Parser {
|
|
|
77
96
|
get endTag() {
|
|
78
97
|
return __classPrivateFieldGet(this, _Parser_endTagType, "f");
|
|
79
98
|
}
|
|
99
|
+
/**
|
|
100
|
+
* The current raw source code being parsed, which may have been
|
|
101
|
+
* preprocessed (e.g., ignore blocks masked, front matter removed).
|
|
102
|
+
*/
|
|
80
103
|
get rawCode() {
|
|
81
104
|
return __classPrivateFieldGet(this, _Parser_rawCode, "f");
|
|
82
105
|
}
|
|
106
|
+
/**
|
|
107
|
+
* Whether tag names should be compared in a case-sensitive manner.
|
|
108
|
+
* When false (the default), tag name comparisons are case-insensitive (HTML behavior).
|
|
109
|
+
*/
|
|
83
110
|
get tagNameCaseSensitive() {
|
|
84
111
|
return __classPrivateFieldGet(this, _Parser_tagNameCaseSensitive, "f");
|
|
85
112
|
}
|
|
113
|
+
/**
|
|
114
|
+
* Tokenizes the raw source code into language-specific AST nodes.
|
|
115
|
+
* Subclasses should override this method to provide actual tokenization logic.
|
|
116
|
+
*
|
|
117
|
+
* @param options - Parse options controlling offset, depth, and other parse-time settings
|
|
118
|
+
* @returns The tokenized result containing the AST node array and fragment flag
|
|
119
|
+
*/
|
|
86
120
|
tokenize(options) {
|
|
87
121
|
return {
|
|
88
122
|
ast: [],
|
|
89
123
|
isFragment: false,
|
|
90
124
|
};
|
|
91
125
|
}
|
|
126
|
+
/**
|
|
127
|
+
* Hook called before parsing begins, allowing subclasses to preprocess
|
|
128
|
+
* the raw source code. The default implementation prepends offset spaces
|
|
129
|
+
* based on the parse options.
|
|
130
|
+
*
|
|
131
|
+
* @param rawCode - The raw source code about to be parsed
|
|
132
|
+
* @param options - Parse options that may specify offset positioning
|
|
133
|
+
* @returns The preprocessed source code to be used for tokenization
|
|
134
|
+
*/
|
|
92
135
|
beforeParse(rawCode, options) {
|
|
93
136
|
const spaces = __classPrivateFieldGet(this, _Parser_instances, "m", _Parser_createOffsetSpaces).call(this, options);
|
|
94
137
|
return spaces + rawCode;
|
|
95
138
|
}
|
|
139
|
+
/**
|
|
140
|
+
* Parses raw source code through the full pipeline: preprocessing, tokenization,
|
|
141
|
+
* traversal, flattening, ignore-block restoration, and post-processing.
|
|
142
|
+
* Returns the complete markuplint AST document.
|
|
143
|
+
*
|
|
144
|
+
* @param rawCode - The raw source code to parse
|
|
145
|
+
* @param options - Parse options controlling offsets, depth, front matter, and authored element names
|
|
146
|
+
* @returns The parsed AST document containing the node list and fragment flag
|
|
147
|
+
*/
|
|
96
148
|
parse(rawCode, options) {
|
|
97
149
|
try {
|
|
98
150
|
// Initialize raw code
|
|
@@ -168,9 +220,25 @@ export class Parser {
|
|
|
168
220
|
throw this.parseError(error);
|
|
169
221
|
}
|
|
170
222
|
}
|
|
223
|
+
/**
|
|
224
|
+
* Hook called after the main parse pipeline completes, allowing subclasses
|
|
225
|
+
* to perform final transformations on the node list. The default implementation
|
|
226
|
+
* removes any offset spaces that were prepended during preprocessing.
|
|
227
|
+
*
|
|
228
|
+
* @param nodeList - The fully parsed and flattened node list
|
|
229
|
+
* @param options - The parse options used for this parse invocation
|
|
230
|
+
* @returns The post-processed node list
|
|
231
|
+
*/
|
|
171
232
|
afterParse(nodeList, options) {
|
|
172
233
|
return __classPrivateFieldGet(this, _Parser_instances, "m", _Parser_removeOffsetSpaces).call(this, nodeList, options);
|
|
173
234
|
}
|
|
235
|
+
/**
|
|
236
|
+
* Wraps an arbitrary error into a ParserError with source location information.
|
|
237
|
+
* Extracts line and column numbers from common error formats.
|
|
238
|
+
*
|
|
239
|
+
* @param error - The original error to wrap
|
|
240
|
+
* @returns A ParserError containing the original error's message and location data
|
|
241
|
+
*/
|
|
174
242
|
parseError(error) {
|
|
175
243
|
return new ParserError(error, {
|
|
176
244
|
line: error.line ?? error.lineNumber ?? 0,
|
|
@@ -179,6 +247,15 @@ export class Parser {
|
|
|
179
247
|
stack: error.stack,
|
|
180
248
|
});
|
|
181
249
|
}
|
|
250
|
+
/**
|
|
251
|
+
* Recursively traverses language-specific AST nodes by calling `nodeize` on each,
|
|
252
|
+
* filtering duplicates, and separating child nodes from ancestor-level siblings.
|
|
253
|
+
*
|
|
254
|
+
* @param originNodes - The language-specific AST nodes to traverse
|
|
255
|
+
* @param parentNode - The parent markuplint AST node, or null for top-level nodes
|
|
256
|
+
* @param depth - The current nesting depth in the tree
|
|
257
|
+
* @returns An object containing `childNodes` at the current depth and `siblings` that belong to ancestor levels
|
|
258
|
+
*/
|
|
182
259
|
traverse(originNodes, parentNode = null, depth) {
|
|
183
260
|
if (originNodes.length === 0) {
|
|
184
261
|
return {
|
|
@@ -212,15 +289,42 @@ export class Parser {
|
|
|
212
289
|
siblings,
|
|
213
290
|
};
|
|
214
291
|
}
|
|
292
|
+
/**
|
|
293
|
+
* Hook called after traversal completes, used to sort the resulting node tree
|
|
294
|
+
* by source position. Subclasses may override for custom post-traversal logic.
|
|
295
|
+
*
|
|
296
|
+
* @param nodeTree - The unsorted node tree produced by traversal
|
|
297
|
+
* @returns The node tree sorted by source position
|
|
298
|
+
*/
|
|
215
299
|
afterTraverse(nodeTree) {
|
|
216
300
|
return Array.prototype.toSorted == null
|
|
217
301
|
? // TODO: Use sort instead of toSorted until we end support for Node 18
|
|
218
302
|
[...nodeTree].sort(sortNodes)
|
|
219
303
|
: nodeTree.toSorted(sortNodes);
|
|
220
304
|
}
|
|
305
|
+
/**
|
|
306
|
+
* Converts a single language-specific AST node into one or more markuplint AST nodes.
|
|
307
|
+
* Subclasses must override this method to provide actual node conversion logic
|
|
308
|
+
* using visitor methods like `visitElement`, `visitText`, `visitComment`, etc.
|
|
309
|
+
*
|
|
310
|
+
* @param originNode - The language-specific AST node to convert
|
|
311
|
+
* @param parentNode - The parent markuplint AST node, or null for top-level nodes
|
|
312
|
+
* @param depth - The current nesting depth in the tree
|
|
313
|
+
* @returns An array of markuplint AST nodes produced from the origin node
|
|
314
|
+
*/
|
|
221
315
|
nodeize(originNode, parentNode, depth) {
|
|
222
316
|
return [];
|
|
223
317
|
}
|
|
318
|
+
/**
|
|
319
|
+
* Post-processes the nodes produced by `nodeize`, separating them into siblings
|
|
320
|
+
* at the current depth and ancestors that belong to a shallower depth level.
|
|
321
|
+
* Doctype nodes at depth 0 are promoted to ancestors.
|
|
322
|
+
*
|
|
323
|
+
* @param siblings - The nodes produced by `nodeize` for a single origin node
|
|
324
|
+
* @param parentNode - The parent markuplint AST node, or null for top-level nodes
|
|
325
|
+
* @param depth - The current nesting depth
|
|
326
|
+
* @returns An object with `siblings` at the current depth and `ancestors` at shallower depths
|
|
327
|
+
*/
|
|
224
328
|
afterNodeize(siblings, parentNode, depth) {
|
|
225
329
|
const newSiblings = [];
|
|
226
330
|
const ancestors = [];
|
|
@@ -246,9 +350,25 @@ export class Parser {
|
|
|
246
350
|
ancestors,
|
|
247
351
|
};
|
|
248
352
|
}
|
|
353
|
+
/**
|
|
354
|
+
* Flattens a hierarchical node tree into a flat, sorted list by walking
|
|
355
|
+
* the tree depth-first and removing duplicated nodes.
|
|
356
|
+
*
|
|
357
|
+
* @param nodeTree - The hierarchical node tree to flatten
|
|
358
|
+
* @returns A flat array of all nodes in source order
|
|
359
|
+
*/
|
|
249
360
|
flattenNodes(nodeTree) {
|
|
250
361
|
return __classPrivateFieldGet(this, _Parser_instances, "m", _Parser_arrayize).call(this, nodeTree);
|
|
251
362
|
}
|
|
363
|
+
/**
|
|
364
|
+
* Post-processes the flattened node list by exposing remnant whitespace and
|
|
365
|
+
* invalid nodes between known nodes, converting orphan end tags to bogus markers,
|
|
366
|
+
* concatenating adjacent text nodes, and trimming overlapping text.
|
|
367
|
+
*
|
|
368
|
+
* @param nodeList - The flat node list to post-process
|
|
369
|
+
* @param options - Controls which post-processing steps are applied
|
|
370
|
+
* @returns The cleaned-up flat node list
|
|
371
|
+
*/
|
|
252
372
|
afterFlattenNodes(nodeList, options) {
|
|
253
373
|
const exposeInvalidNode = options?.exposeInvalidNode ?? true;
|
|
254
374
|
const exposeWhiteSpace = options?.exposeWhiteSpace ?? true;
|
|
@@ -261,6 +381,13 @@ export class Parser {
|
|
|
261
381
|
nodeList = __classPrivateFieldGet(this, _Parser_instances, "m", _Parser_trimText).call(this, nodeList);
|
|
262
382
|
return nodeList;
|
|
263
383
|
}
|
|
384
|
+
/**
|
|
385
|
+
* Creates an AST doctype node from a token containing the doctype
|
|
386
|
+
* name, public ID, and system ID.
|
|
387
|
+
*
|
|
388
|
+
* @param token - The child token with doctype-specific properties
|
|
389
|
+
* @returns An array containing the single doctype AST node
|
|
390
|
+
*/
|
|
264
391
|
visitDoctype(token) {
|
|
265
392
|
timer.push('visitDoctype');
|
|
266
393
|
const node = {
|
|
@@ -271,6 +398,14 @@ export class Parser {
|
|
|
271
398
|
};
|
|
272
399
|
return [node];
|
|
273
400
|
}
|
|
401
|
+
/**
|
|
402
|
+
* Creates an AST comment node from a token. Automatically detects whether
|
|
403
|
+
* the comment is a bogus comment (not starting with `<!--`).
|
|
404
|
+
*
|
|
405
|
+
* @param token - The child token containing the comment's raw text and position
|
|
406
|
+
* @param options - Optional settings to override the bogus detection
|
|
407
|
+
* @returns An array containing the single comment AST node
|
|
408
|
+
*/
|
|
274
409
|
visitComment(token, options) {
|
|
275
410
|
timer.push('visitComment');
|
|
276
411
|
const isBogus = options?.isBogus ?? !token.raw.startsWith('<!--');
|
|
@@ -283,6 +418,14 @@ export class Parser {
|
|
|
283
418
|
};
|
|
284
419
|
return [node];
|
|
285
420
|
}
|
|
421
|
+
/**
|
|
422
|
+
* Creates AST text node(s) from a token. Optionally re-parses the text content
|
|
423
|
+
* to discover embedded HTML tags within it.
|
|
424
|
+
*
|
|
425
|
+
* @param token - The child token containing the text content and position
|
|
426
|
+
* @param options - Controls whether to search for embedded tags and how to handle invalid ones
|
|
427
|
+
* @returns An array of AST nodes; a single text node or multiple tag/text nodes if tags were found
|
|
428
|
+
*/
|
|
286
429
|
visitText(token, options) {
|
|
287
430
|
timer.push('visitText');
|
|
288
431
|
const node = {
|
|
@@ -302,6 +445,16 @@ export class Parser {
|
|
|
302
445
|
}
|
|
303
446
|
return [node];
|
|
304
447
|
}
|
|
448
|
+
/**
|
|
449
|
+
* Creates AST element node(s) from a token, including the start tag, optional end tag,
|
|
450
|
+
* and recursively traversed child nodes. Handles ghost elements (empty raw),
|
|
451
|
+
* self-closing tags, and nameless fragments (e.g., JSX `<>`).
|
|
452
|
+
*
|
|
453
|
+
* @param token - The child token with the element's node name and namespace
|
|
454
|
+
* @param childNodes - The language-specific child AST nodes to traverse
|
|
455
|
+
* @param options - Controls end tag creation, fragment handling, and property overrides
|
|
456
|
+
* @returns An array of AST nodes including the start tag, optional end tag, and any sibling nodes
|
|
457
|
+
*/
|
|
305
458
|
visitElement(token, childNodes = [], options) {
|
|
306
459
|
timer.push('visitElement');
|
|
307
460
|
const createEndTagToken = options?.createEndTagToken;
|
|
@@ -344,6 +497,16 @@ export class Parser {
|
|
|
344
497
|
}
|
|
345
498
|
return [startTag, ...siblings];
|
|
346
499
|
}
|
|
500
|
+
/**
|
|
501
|
+
* Creates an AST preprocessor-specific block node (e.g., for template directives
|
|
502
|
+
* like `{#if}`, `{#each}`, or front matter). Recursively traverses child nodes.
|
|
503
|
+
*
|
|
504
|
+
* @param token - The child token with the block's node name and fragment flag
|
|
505
|
+
* @param childNodes - The language-specific child AST nodes to traverse
|
|
506
|
+
* @param conditionalType - The conditional type if this is a conditional block (e.g., "if", "else")
|
|
507
|
+
* @param originBlockNode - The original language-specific block node for reference
|
|
508
|
+
* @returns An array of AST nodes including the block node and any sibling nodes
|
|
509
|
+
*/
|
|
347
510
|
visitPsBlock(token, childNodes = [], conditionalType = null, originBlockNode) {
|
|
348
511
|
timer.push('visitPsBlock');
|
|
349
512
|
const block = {
|
|
@@ -358,6 +521,15 @@ export class Parser {
|
|
|
358
521
|
const siblings = this.visitChildren(childNodes, block);
|
|
359
522
|
return [block, ...siblings];
|
|
360
523
|
}
|
|
524
|
+
/**
|
|
525
|
+
* Traverses a list of child nodes under the given parent, appending the resulting
|
|
526
|
+
* child AST nodes to the parent and returning any sibling nodes that belong
|
|
527
|
+
* to ancestor levels. Skips traversal for raw text elements (e.g., `<script>`, `<style>`).
|
|
528
|
+
*
|
|
529
|
+
* @param children - The language-specific child AST nodes to traverse
|
|
530
|
+
* @param parentNode - The parent markuplint AST node to which children will be appended
|
|
531
|
+
* @returns An array of sibling nodes that belong to ancestor depth levels
|
|
532
|
+
*/
|
|
361
533
|
visitChildren(children, parentNode) {
|
|
362
534
|
if (children.length === 0) {
|
|
363
535
|
return [];
|
|
@@ -369,6 +541,13 @@ export class Parser {
|
|
|
369
541
|
this.appendChild(parentNode, ...traversed.childNodes);
|
|
370
542
|
return traversed.siblings;
|
|
371
543
|
}
|
|
544
|
+
/**
|
|
545
|
+
* Attempts to parse a token as a JSX spread attribute (e.g., `{...props}`).
|
|
546
|
+
* Returns null if the token does not match the spread attribute pattern.
|
|
547
|
+
*
|
|
548
|
+
* @param token - The token to inspect for spread attribute syntax
|
|
549
|
+
* @returns A spread attribute AST node, or null if the token is not a spread attribute
|
|
550
|
+
*/
|
|
372
551
|
visitSpreadAttr(token) {
|
|
373
552
|
timer.push('visitSpreadAttr');
|
|
374
553
|
const raw = token.raw.trim();
|
|
@@ -390,6 +569,16 @@ export class Parser {
|
|
|
390
569
|
nodeName: '#spread',
|
|
391
570
|
};
|
|
392
571
|
}
|
|
572
|
+
/**
|
|
573
|
+
* Parses a token into a fully structured attribute AST node, breaking it down
|
|
574
|
+
* into its constituent parts: spaces, name, equal sign, quotes, and value.
|
|
575
|
+
* Also detects spread attributes. If there is leftover text after the attribute,
|
|
576
|
+
* it is returned in the `__rightText` property for further processing.
|
|
577
|
+
*
|
|
578
|
+
* @param token - The token containing the raw attribute text and position
|
|
579
|
+
* @param options - Controls quoting behavior, value types, and the initial parser state
|
|
580
|
+
* @returns The parsed attribute AST node with an optional `__rightText` for remaining unparsed content
|
|
581
|
+
*/
|
|
393
582
|
visitAttr(token, options) {
|
|
394
583
|
timer.push('visitAttr');
|
|
395
584
|
const raw = token.raw;
|
|
@@ -469,6 +658,15 @@ export class Parser {
|
|
|
469
658
|
}
|
|
470
659
|
return spread ?? htmlAttr;
|
|
471
660
|
}
|
|
661
|
+
/**
|
|
662
|
+
* Re-parses a text token to discover embedded HTML/XML tags within it,
|
|
663
|
+
* splitting the content into a sequence of tag and text AST nodes.
|
|
664
|
+
* Handles self-closing detection, depth tracking, and void element recognition.
|
|
665
|
+
*
|
|
666
|
+
* @param token - The child token containing the code fragment to re-parse
|
|
667
|
+
* @param options - Controls whether nameless fragments (JSX `<>`) are recognized
|
|
668
|
+
* @returns An array of tag and text AST nodes discovered in the code fragment
|
|
669
|
+
*/
|
|
472
670
|
parseCodeFragment(token, options) {
|
|
473
671
|
const nodes = [];
|
|
474
672
|
let raw = token.raw;
|
|
@@ -553,6 +751,13 @@ export class Parser {
|
|
|
553
751
|
}
|
|
554
752
|
return nodes;
|
|
555
753
|
}
|
|
754
|
+
/**
|
|
755
|
+
* Updates the position and depth properties of an AST node, recalculating
|
|
756
|
+
* end offsets, lines, and columns based on the new start values.
|
|
757
|
+
*
|
|
758
|
+
* @param node - The AST node whose location should be updated
|
|
759
|
+
* @param props - The new position and depth values to apply (only provided values are changed)
|
|
760
|
+
*/
|
|
556
761
|
updateLocation(node, props) {
|
|
557
762
|
Object.assign(node, {
|
|
558
763
|
startOffset: props.startOffset ?? node.startOffset,
|
|
@@ -592,9 +797,25 @@ export class Parser {
|
|
|
592
797
|
updateElement(el, props) {
|
|
593
798
|
Object.assign(el, props);
|
|
594
799
|
}
|
|
800
|
+
/**
|
|
801
|
+
* Updates metadata properties on an HTML attribute AST node, such as marking
|
|
802
|
+
* it as a directive, dynamic value, or setting its potential name/value
|
|
803
|
+
* for preprocessor-specific attribute transformations.
|
|
804
|
+
*
|
|
805
|
+
* @param attr - The HTML attribute AST node to update
|
|
806
|
+
* @param props - The metadata properties to overwrite on the attribute
|
|
807
|
+
*/
|
|
595
808
|
updateAttr(attr, props) {
|
|
596
809
|
Object.assign(attr, props);
|
|
597
810
|
}
|
|
811
|
+
/**
|
|
812
|
+
* Determines the element type (e.g., "html", "web-component", "authored") for a
|
|
813
|
+
* given tag name, using the parser's authored element name distinguishing pattern.
|
|
814
|
+
*
|
|
815
|
+
* @param nodeName - The tag name to classify
|
|
816
|
+
* @param defaultPattern - A fallback pattern if no authored element name pattern is set
|
|
817
|
+
* @returns The element type classification
|
|
818
|
+
*/
|
|
598
819
|
detectElementType(nodeName, defaultPattern) {
|
|
599
820
|
return detectElementType(nodeName, __classPrivateFieldGet(this, _Parser_authoredElementName, "f"), defaultPattern);
|
|
600
821
|
}
|
|
@@ -613,6 +834,14 @@ export class Parser {
|
|
|
613
834
|
...__classPrivateFieldGet(this, _Parser_instances, "m", _Parser_getEndLocation).call(this, props),
|
|
614
835
|
};
|
|
615
836
|
}
|
|
837
|
+
/**
|
|
838
|
+
* Extracts a Token from the current raw code at the given byte offset range,
|
|
839
|
+
* computing the line and column from the source position.
|
|
840
|
+
*
|
|
841
|
+
* @param start - The starting byte offset (inclusive) in the raw code
|
|
842
|
+
* @param end - The ending byte offset (exclusive) in the raw code; if omitted, slices to the end
|
|
843
|
+
* @returns A Token containing the sliced raw content and its start position
|
|
844
|
+
*/
|
|
616
845
|
sliceFragment(start, end) {
|
|
617
846
|
const raw = this.rawCode.slice(start, end);
|
|
618
847
|
const { line, column } = getPosition(this.rawCode, start);
|
|
@@ -623,9 +852,29 @@ export class Parser {
|
|
|
623
852
|
startCol: column,
|
|
624
853
|
};
|
|
625
854
|
}
|
|
855
|
+
/**
|
|
856
|
+
* Calculates start and end byte offsets from line/column positions
|
|
857
|
+
* within the current raw source code.
|
|
858
|
+
*
|
|
859
|
+
* @param startLine - The starting line number (1-based)
|
|
860
|
+
* @param startCol - The starting column number (1-based)
|
|
861
|
+
* @param endLine - The ending line number (1-based)
|
|
862
|
+
* @param endCol - The ending column number (1-based)
|
|
863
|
+
* @returns The computed start and end byte offsets
|
|
864
|
+
*/
|
|
626
865
|
getOffsetsFromCode(startLine, startCol, endLine, endCol) {
|
|
627
866
|
return getOffsetsFromCode(this.rawCode, startLine, startCol, endLine, endCol);
|
|
628
867
|
}
|
|
868
|
+
/**
|
|
869
|
+
* Walks through a node list depth-first, invoking the walker callback for each node.
|
|
870
|
+
* The walker receives the current node, the sequentially previous node, and the depth.
|
|
871
|
+
* Automatically recurses into child nodes of parent elements and preprocessor blocks.
|
|
872
|
+
*
|
|
873
|
+
* @template Node - The specific AST node type being walked
|
|
874
|
+
* @param nodeList - The list of nodes to walk
|
|
875
|
+
* @param walker - The callback invoked for each node during the walk
|
|
876
|
+
* @param depth - The current depth (starts at 0 for top-level calls)
|
|
877
|
+
*/
|
|
629
878
|
walk(nodeList, walker, depth = 0) {
|
|
630
879
|
for (const node of nodeList) {
|
|
631
880
|
walker(node, __classPrivateFieldGet(this, _Parser_walkMethodSequentailPrevNode, "f"), depth);
|
|
@@ -638,6 +887,14 @@ export class Parser {
|
|
|
638
887
|
__classPrivateFieldSet(this, _Parser_walkMethodSequentailPrevNode, null, "f");
|
|
639
888
|
}
|
|
640
889
|
}
|
|
890
|
+
/**
|
|
891
|
+
* Appends child nodes to a parent node, updating parent references and
|
|
892
|
+
* maintaining sorted order by source position. If a child already exists
|
|
893
|
+
* in the parent (by UUID), it is replaced in place rather than duplicated.
|
|
894
|
+
*
|
|
895
|
+
* @param parentNode - The parent node to append children to, or null (no-op)
|
|
896
|
+
* @param childNodes - The child nodes to append
|
|
897
|
+
*/
|
|
641
898
|
appendChild(parentNode, ...childNodes) {
|
|
642
899
|
if (!parentNode || childNodes.length === 0) {
|
|
643
900
|
return;
|
|
@@ -659,6 +916,14 @@ export class Parser {
|
|
|
659
916
|
: newChildNodes.toSorted(sortNodes),
|
|
660
917
|
});
|
|
661
918
|
}
|
|
919
|
+
/**
|
|
920
|
+
* Replaces a child node within a parent's child list with one or more replacement nodes.
|
|
921
|
+
* If the old child is not found in the parent, the operation is a no-op.
|
|
922
|
+
*
|
|
923
|
+
* @param parentNode - The parent node containing the child to replace
|
|
924
|
+
* @param oldChildNode - The existing child node to be replaced
|
|
925
|
+
* @param replacementChildNodes - The replacement nodes to insert at the old child's position
|
|
926
|
+
*/
|
|
662
927
|
replaceChild(parentNode, oldChildNode, ...replacementChildNodes) {
|
|
663
928
|
const index = parentNode.childNodes.findIndex(childNode => childNode.uuid === oldChildNode.uuid);
|
|
664
929
|
if (index === -1) {
|
|
@@ -984,9 +1249,6 @@ _Parser_booleanish = new WeakMap(), _Parser_defaultState = new WeakMap(), _Parse
|
|
|
984
1249
|
col = endSpace.endCol;
|
|
985
1250
|
offset = endSpace.endOffset;
|
|
986
1251
|
const selfClosingSolidus = this.createToken(selfClosingSolidusChar, offset, line, col);
|
|
987
|
-
line = selfClosingSolidus.endLine;
|
|
988
|
-
col = selfClosingSolidus.endCol;
|
|
989
|
-
offset = selfClosingSolidus.endOffset;
|
|
990
1252
|
const rawCodeFragment = raw.slice(beforeOpenTagChars.length, raw.length - leftover.length);
|
|
991
1253
|
if (!rawCodeFragment) {
|
|
992
1254
|
return {
|
package/lib/script-parser.d.ts
CHANGED
|
@@ -1,9 +1,30 @@
|
|
|
1
1
|
import type { CustomParser } from './types.js';
|
|
2
|
+
/**
|
|
3
|
+
* Tokenizes a JavaScript code string into an array of typed tokens using espree.
|
|
4
|
+
* Each token contains its type (e.g., Identifier, Punctuator) and raw value.
|
|
5
|
+
*
|
|
6
|
+
* @param script - The JavaScript source code to tokenize
|
|
7
|
+
* @returns An array of tokens with their type and string value
|
|
8
|
+
*/
|
|
2
9
|
export declare function scriptParser(script: string): ScriptTokenType[];
|
|
10
|
+
/**
|
|
11
|
+
* Attempts to extract the longest valid JavaScript prefix from a script string
|
|
12
|
+
* that may contain trailing non-JS content (e.g., HTML after an inline expression).
|
|
13
|
+
* Falls back to wrapping the script as an object literal or spread operator
|
|
14
|
+
* if the initial parse fails.
|
|
15
|
+
*
|
|
16
|
+
* @param script - The potentially mixed script/markup string to parse
|
|
17
|
+
* @param parse - A custom parse function to validate the script; defaults to espree with JSX support
|
|
18
|
+
* @returns An object containing the `validScript` prefix and the remaining `leftover` string
|
|
19
|
+
*/
|
|
3
20
|
export declare function safeScriptParser(script: string, parse?: CustomParser): {
|
|
4
21
|
validScript: string;
|
|
5
22
|
leftover: string;
|
|
6
23
|
};
|
|
24
|
+
/**
|
|
25
|
+
* A token produced by the script tokenizer, representing a single
|
|
26
|
+
* lexical unit of JavaScript source code.
|
|
27
|
+
*/
|
|
7
28
|
export type ScriptTokenType = {
|
|
8
29
|
type: 'Identifier' | 'Boolean' | 'Numeric' | 'String' | 'Template' | 'Punctuator';
|
|
9
30
|
value: string;
|
package/lib/script-parser.js
CHANGED
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
// @ts-ignore
|
|
2
2
|
import { tokenize, parse } from 'espree';
|
|
3
|
+
/**
|
|
4
|
+
* Tokenizes a JavaScript code string into an array of typed tokens using espree.
|
|
5
|
+
* Each token contains its type (e.g., Identifier, Punctuator) and raw value.
|
|
6
|
+
*
|
|
7
|
+
* @param script - The JavaScript source code to tokenize
|
|
8
|
+
* @returns An array of tokens with their type and string value
|
|
9
|
+
*/
|
|
3
10
|
export function scriptParser(script) {
|
|
4
11
|
const tokens = tokenize(script, {
|
|
5
12
|
ecmaVersion: 'latest',
|
|
@@ -10,6 +17,16 @@ export function scriptParser(script) {
|
|
|
10
17
|
value: token.value,
|
|
11
18
|
}));
|
|
12
19
|
}
|
|
20
|
+
/**
|
|
21
|
+
* Attempts to extract the longest valid JavaScript prefix from a script string
|
|
22
|
+
* that may contain trailing non-JS content (e.g., HTML after an inline expression).
|
|
23
|
+
* Falls back to wrapping the script as an object literal or spread operator
|
|
24
|
+
* if the initial parse fails.
|
|
25
|
+
*
|
|
26
|
+
* @param script - The potentially mixed script/markup string to parse
|
|
27
|
+
* @param parse - A custom parse function to validate the script; defaults to espree with JSX support
|
|
28
|
+
* @returns An object containing the `validScript` prefix and the remaining `leftover` string
|
|
29
|
+
*/
|
|
13
30
|
export function safeScriptParser(script, parse = defaultParse) {
|
|
14
31
|
let { validScript, leftover } = safeParse(script, parse);
|
|
15
32
|
// Support for object literal
|
package/lib/types.d.ts
CHANGED
|
@@ -1,4 +1,8 @@
|
|
|
1
1
|
import type { EndTagType, MLASTParentNode, ParserOptions as ConfigParserOptions } from '@markuplint/ml-ast';
|
|
2
|
+
/**
|
|
3
|
+
* Configuration options for initializing a Parser instance,
|
|
4
|
+
* controlling how the parser handles tags, attributes, and whitespace.
|
|
5
|
+
*/
|
|
2
6
|
export type ParserOptions = {
|
|
3
7
|
readonly booleanish?: boolean;
|
|
4
8
|
readonly endTagType?: EndTagType;
|
|
@@ -9,28 +13,58 @@ export type ParserOptions = {
|
|
|
9
13
|
readonly spaceChars?: readonly string[];
|
|
10
14
|
readonly rawTextElements?: readonly string[];
|
|
11
15
|
};
|
|
16
|
+
/**
|
|
17
|
+
* Options passed to a single parse invocation, extending the base config parser options
|
|
18
|
+
* with offset positioning and depth control for embedded code fragments.
|
|
19
|
+
*/
|
|
12
20
|
export type ParseOptions = ConfigParserOptions & {
|
|
13
21
|
readonly offsetOffset?: number;
|
|
14
22
|
readonly offsetLine?: number;
|
|
15
23
|
readonly offsetColumn?: number;
|
|
16
24
|
readonly depth?: number;
|
|
17
25
|
};
|
|
26
|
+
/**
|
|
27
|
+
* The result of tokenizing raw source code, containing the AST nodes
|
|
28
|
+
* and metadata about whether the parsed content is a document fragment.
|
|
29
|
+
*
|
|
30
|
+
* @template N - The AST node type produced by the tokenizer
|
|
31
|
+
* @template State - The parser state type carried through tokenization
|
|
32
|
+
*/
|
|
18
33
|
export type Tokenized<N extends {} = {}, State extends unknown = null> = {
|
|
19
34
|
readonly ast: N[];
|
|
20
35
|
readonly isFragment: boolean;
|
|
21
36
|
readonly state?: State;
|
|
22
37
|
};
|
|
38
|
+
/**
|
|
39
|
+
* A minimal source token representing a raw string fragment
|
|
40
|
+
* along with its starting position in the source code.
|
|
41
|
+
*/
|
|
23
42
|
export type Token = {
|
|
24
43
|
readonly raw: string;
|
|
25
44
|
readonly startOffset: number;
|
|
26
45
|
readonly startLine: number;
|
|
27
46
|
readonly startCol: number;
|
|
28
47
|
};
|
|
48
|
+
/**
|
|
49
|
+
* A token that belongs to a parent node in the AST, extending the base Token
|
|
50
|
+
* with nesting depth and a reference to the enclosing parent node.
|
|
51
|
+
*/
|
|
29
52
|
export type ChildToken = Token & {
|
|
30
53
|
readonly depth: number;
|
|
31
54
|
readonly parentNode: MLASTParentNode | null;
|
|
32
55
|
};
|
|
56
|
+
/**
|
|
57
|
+
* Determines how self-closing tags (e.g., `<br />`) are interpreted.
|
|
58
|
+
*
|
|
59
|
+
* - `"html"`: Only void elements are treated as self-closing (HTML spec behavior)
|
|
60
|
+
* - `"xml"`: The self-closing solidus (`/`) determines self-closing behavior
|
|
61
|
+
* - `"html+xml"`: Either void elements or the self-closing solidus cause self-closing
|
|
62
|
+
*/
|
|
33
63
|
export type SelfCloseType = 'html' | 'xml' | 'html+xml';
|
|
64
|
+
/**
|
|
65
|
+
* Represents a tagged code block (e.g., template expressions or preprocessor directives)
|
|
66
|
+
* that was extracted from the source during the ignore-block phase.
|
|
67
|
+
*/
|
|
34
68
|
export type Code = {
|
|
35
69
|
readonly type: string;
|
|
36
70
|
readonly index: number;
|
|
@@ -39,22 +73,45 @@ export type Code = {
|
|
|
39
73
|
readonly endTag: string | null;
|
|
40
74
|
resolved: boolean;
|
|
41
75
|
};
|
|
76
|
+
/**
|
|
77
|
+
* Defines a pattern for identifying blocks of code that should be masked
|
|
78
|
+
* (replaced with placeholder characters) before parsing, such as template
|
|
79
|
+
* language expressions or preprocessor directives.
|
|
80
|
+
*/
|
|
42
81
|
export type IgnoreTag = {
|
|
43
82
|
readonly type: string;
|
|
44
83
|
readonly start: Readonly<RegExp> | string;
|
|
45
84
|
readonly end: Readonly<RegExp> | string;
|
|
46
85
|
};
|
|
86
|
+
/**
|
|
87
|
+
* The result of masking ignore-tagged code blocks in the source, preserving
|
|
88
|
+
* the original source alongside the replaced version and a stack of extracted codes.
|
|
89
|
+
*/
|
|
47
90
|
export type IgnoreBlock = {
|
|
48
91
|
readonly source: string;
|
|
49
92
|
readonly replaced: string;
|
|
50
93
|
readonly stack: readonly Code[];
|
|
51
94
|
readonly maskChar: string;
|
|
52
95
|
};
|
|
96
|
+
/**
|
|
97
|
+
* Defines a pair of quote delimiters and the value type they enclose,
|
|
98
|
+
* used when parsing attribute values with non-standard quoting
|
|
99
|
+
* (e.g., JSX expression braces or template literals).
|
|
100
|
+
*/
|
|
53
101
|
export type QuoteSet = {
|
|
54
102
|
readonly start: string;
|
|
55
103
|
readonly end: string;
|
|
56
104
|
readonly type: ValueType;
|
|
57
105
|
readonly parser?: CustomParser;
|
|
58
106
|
};
|
|
107
|
+
/**
|
|
108
|
+
* A function that attempts to parse a code string, throwing a SyntaxError
|
|
109
|
+
* if the code is invalid. Used by the safe script parser to determine
|
|
110
|
+
* the boundary of valid script content.
|
|
111
|
+
*/
|
|
59
112
|
export type CustomParser = (code: string) => void;
|
|
113
|
+
/**
|
|
114
|
+
* The semantic type of an attribute value, distinguishing between
|
|
115
|
+
* plain string values and script/expression values.
|
|
116
|
+
*/
|
|
60
117
|
export type ValueType = 'string' | 'script';
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@markuplint/parser-utils",
|
|
3
|
-
"version": "4.8.
|
|
3
|
+
"version": "4.8.11",
|
|
4
4
|
"description": "Utility module for markuplint parser plugin",
|
|
5
5
|
"repository": "git@github.com:markuplint/markuplint.git",
|
|
6
6
|
"author": "Yusuke Hirao <yusukehirao@me.com>",
|
|
@@ -28,17 +28,17 @@
|
|
|
28
28
|
"clean": "tsc --build --clean tsconfig.build.json"
|
|
29
29
|
},
|
|
30
30
|
"dependencies": {
|
|
31
|
-
"@markuplint/ml-ast": "4.4.
|
|
32
|
-
"@markuplint/ml-spec": "4.10.
|
|
33
|
-
"@markuplint/types": "4.8.
|
|
34
|
-
"@types/uuid": "
|
|
35
|
-
"debug": "4.4.
|
|
36
|
-
"espree": "
|
|
31
|
+
"@markuplint/ml-ast": "4.4.11",
|
|
32
|
+
"@markuplint/ml-spec": "4.10.2",
|
|
33
|
+
"@markuplint/types": "4.8.2",
|
|
34
|
+
"@types/uuid": "11.0.0",
|
|
35
|
+
"debug": "4.4.3",
|
|
36
|
+
"espree": "11.1.0",
|
|
37
37
|
"type-fest": "4.41.0",
|
|
38
|
-
"uuid": "
|
|
38
|
+
"uuid": "13.0.0"
|
|
39
39
|
},
|
|
40
40
|
"devDependencies": {
|
|
41
|
-
"@typescript-eslint/typescript-estree": "8.
|
|
41
|
+
"@typescript-eslint/typescript-estree": "8.54.0"
|
|
42
42
|
},
|
|
43
|
-
"gitHead": "
|
|
43
|
+
"gitHead": "193ee7c1262bbed95424e38efdf1a8e56ff049f4"
|
|
44
44
|
}
|