@markuplint/parser-utils 4.8.10 → 5.0.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.ja.md +208 -0
- package/ARCHITECTURE.md +251 -0
- package/CHANGELOG.md +33 -2
- package/README.md +6 -0
- package/SKILL.md +126 -0
- package/docs/maintenance.ja.md +176 -0
- package/docs/maintenance.md +176 -0
- package/docs/parser-class.ja.md +655 -0
- package/docs/parser-class.md +655 -0
- package/lib/debug.js +8 -24
- package/lib/debugger.d.ts +25 -0
- package/lib/debugger.js +34 -4
- package/lib/enums.d.ts +10 -0
- package/lib/enums.js +10 -0
- package/lib/get-location.d.ts +31 -0
- package/lib/get-location.js +33 -0
- package/lib/get-namespace.d.ts +11 -0
- package/lib/get-namespace.js +38 -0
- package/lib/idl-attributes.d.ts +9 -0
- package/lib/idl-attributes.js +9 -0
- package/lib/ignore-block.js +15 -14
- package/lib/index.d.ts +2 -1
- package/lib/index.js +1 -1
- package/lib/parser-error.d.ts +16 -0
- package/lib/parser-error.js +20 -3
- package/lib/parser.d.ts +285 -7
- package/lib/parser.js +763 -551
- package/lib/script-parser.d.ts +21 -0
- package/lib/script-parser.js +17 -0
- package/lib/sort-nodes.d.ts +8 -0
- package/lib/sort-nodes.js +11 -3
- package/lib/types.d.ts +60 -3
- package/package.json +11 -10
package/lib/parser.js
CHANGED
|
@@ -1,17 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
if (kind === "a" && !f) throw new TypeError("Private accessor was defined without a getter");
|
|
3
|
-
if (typeof state === "function" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError("Cannot read private member from an object whose class did not declare it");
|
|
4
|
-
return kind === "m" ? f : kind === "a" ? f.call(receiver) : f ? f.value : state.get(receiver);
|
|
5
|
-
};
|
|
6
|
-
var __classPrivateFieldSet = (this && this.__classPrivateFieldSet) || function (receiver, state, value, kind, f) {
|
|
7
|
-
if (kind === "m") throw new TypeError("Private method is not writable");
|
|
8
|
-
if (kind === "a" && !f) throw new TypeError("Private accessor was defined without a setter");
|
|
9
|
-
if (typeof state === "function" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError("Cannot write private member to an object whose class did not declare it");
|
|
10
|
-
return (kind === "a" ? f.call(receiver, value) : f ? f.value = value : state.set(receiver, value)), value;
|
|
11
|
-
};
|
|
12
|
-
var _Parser_instances, _Parser_booleanish, _Parser_defaultState, _Parser_endTagType, _Parser_ignoreTags, _Parser_maskChar, _Parser_tagNameCaseSensitive, _Parser_selfCloseType, _Parser_spaceChars, _Parser_rawTextElements, _Parser_authoredElementName, _Parser_originalRawCode, _Parser_rawCode, _Parser_defaultDepth, _Parser_walkMethodSequentailPrevNode, _Parser_arrayize, _Parser_concatText, _Parser_concatTextNodes, _Parser_convertIntoInvalidNode, _Parser_createOffsetSpaces, _Parser_createRemnantNode, _Parser_exposeRemnantNodes, _Parser_getEndLocation, _Parser_orphanEndTagToBogusMark, _Parser_pairing, _Parser_parseEndTag, _Parser_parseStartTag, _Parser_parseTag, _Parser_removeChild, _Parser_removeDeprecatedNode, _Parser_removeOffsetSpaces, _Parser_reset, _Parser_setRawCode, _Parser_trimText;
|
|
1
|
+
import { randomUUID } from 'node:crypto';
|
|
13
2
|
import { isVoidElement as detectVoidElement } from '@markuplint/ml-spec';
|
|
14
|
-
import { v4 as uuid } from 'uuid';
|
|
15
3
|
import { attrTokenizer } from './attr-tokenizer.js';
|
|
16
4
|
import { defaultSpaces } from './const.js';
|
|
17
5
|
import { domLog, PerformanceTimer } from './debug.js';
|
|
@@ -22,37 +10,57 @@ import { ignoreBlock, restoreNode } from './ignore-block.js';
|
|
|
22
10
|
import { ignoreFrontMatter } from './ignore-front-matter.js';
|
|
23
11
|
import { ParserError } from './parser-error.js';
|
|
24
12
|
import { sortNodes } from './sort-nodes.js';
|
|
13
|
+
import { getNamespace } from './get-namespace.js';
|
|
25
14
|
const timer = new PerformanceTimer();
|
|
15
|
+
/**
|
|
16
|
+
* Abstract base class for all markuplint parsers. Provides the core parsing pipeline
|
|
17
|
+
* including tokenization, tree traversal, node flattening, and error handling.
|
|
18
|
+
* Subclasses must implement `nodeize` to convert language-specific AST nodes
|
|
19
|
+
* into the markuplint AST format.
|
|
20
|
+
*
|
|
21
|
+
* @template Node - The language-specific AST node type produced by the tokenizer
|
|
22
|
+
* @template State - An optional parser state type that persists across tokenization
|
|
23
|
+
*/
|
|
26
24
|
export class Parser {
|
|
25
|
+
#booleanish = false;
|
|
26
|
+
#defaultState;
|
|
27
|
+
#endTagType = 'omittable';
|
|
28
|
+
#ignoreTags = [];
|
|
29
|
+
#maskChar;
|
|
30
|
+
#tagNameCaseSensitive = false;
|
|
31
|
+
#selfCloseType = 'html';
|
|
32
|
+
#spaceChars = defaultSpaces;
|
|
33
|
+
#rawTextElements = ['style', 'script'];
|
|
34
|
+
#authoredElementName;
|
|
35
|
+
#originalRawCode = '';
|
|
36
|
+
#rawCode = '';
|
|
37
|
+
#defaultDepth = 0;
|
|
38
|
+
#walkMethodSequentailPrevNode = null;
|
|
39
|
+
state;
|
|
40
|
+
/**
|
|
41
|
+
* Creates a new Parser instance with the given options and initial state.
|
|
42
|
+
*
|
|
43
|
+
* @param options - Configuration options controlling tag handling, whitespace, and quoting behavior
|
|
44
|
+
* @param defaultState - The initial parser state, cloned and restored after each parse call
|
|
45
|
+
*/
|
|
27
46
|
constructor(options, defaultState) {
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
_Parser_authoredElementName.set(this, void 0);
|
|
39
|
-
_Parser_originalRawCode.set(this, '');
|
|
40
|
-
_Parser_rawCode.set(this, '');
|
|
41
|
-
_Parser_defaultDepth.set(this, 0);
|
|
42
|
-
_Parser_walkMethodSequentailPrevNode.set(this, null);
|
|
43
|
-
__classPrivateFieldSet(this, _Parser_booleanish, options?.booleanish ?? __classPrivateFieldGet(this, _Parser_booleanish, "f"), "f");
|
|
44
|
-
__classPrivateFieldSet(this, _Parser_endTagType, options?.endTagType ?? __classPrivateFieldGet(this, _Parser_endTagType, "f"), "f");
|
|
45
|
-
__classPrivateFieldSet(this, _Parser_ignoreTags, options?.ignoreTags ?? __classPrivateFieldGet(this, _Parser_ignoreTags, "f"), "f");
|
|
46
|
-
__classPrivateFieldSet(this, _Parser_maskChar, options?.maskChar ?? __classPrivateFieldGet(this, _Parser_maskChar, "f"), "f");
|
|
47
|
-
__classPrivateFieldSet(this, _Parser_tagNameCaseSensitive, options?.tagNameCaseSensitive ?? __classPrivateFieldGet(this, _Parser_tagNameCaseSensitive, "f"), "f");
|
|
48
|
-
__classPrivateFieldSet(this, _Parser_selfCloseType, options?.selfCloseType ?? __classPrivateFieldGet(this, _Parser_selfCloseType, "f"), "f");
|
|
49
|
-
__classPrivateFieldSet(this, _Parser_spaceChars, options?.spaceChars ?? __classPrivateFieldGet(this, _Parser_spaceChars, "f"), "f");
|
|
50
|
-
__classPrivateFieldSet(this, _Parser_rawTextElements, options?.rawTextElements ?? __classPrivateFieldGet(this, _Parser_rawTextElements, "f"), "f");
|
|
51
|
-
__classPrivateFieldSet(this, _Parser_defaultState, defaultState ?? null, "f");
|
|
52
|
-
this.state = structuredClone(__classPrivateFieldGet(this, _Parser_defaultState, "f"));
|
|
47
|
+
this.#booleanish = options?.booleanish ?? this.#booleanish;
|
|
48
|
+
this.#endTagType = options?.endTagType ?? this.#endTagType;
|
|
49
|
+
this.#ignoreTags = options?.ignoreTags ?? this.#ignoreTags;
|
|
50
|
+
this.#maskChar = options?.maskChar ?? this.#maskChar;
|
|
51
|
+
this.#tagNameCaseSensitive = options?.tagNameCaseSensitive ?? this.#tagNameCaseSensitive;
|
|
52
|
+
this.#selfCloseType = options?.selfCloseType ?? this.#selfCloseType;
|
|
53
|
+
this.#spaceChars = options?.spaceChars ?? this.#spaceChars;
|
|
54
|
+
this.#rawTextElements = options?.rawTextElements ?? this.#rawTextElements;
|
|
55
|
+
this.#defaultState = defaultState ?? null;
|
|
56
|
+
this.state = structuredClone(this.#defaultState);
|
|
53
57
|
}
|
|
58
|
+
/**
|
|
59
|
+
* The pattern used to distinguish authored (component) element names
|
|
60
|
+
* from native HTML elements, as specified by the parse options.
|
|
61
|
+
*/
|
|
54
62
|
get authoredElementName() {
|
|
55
|
-
return
|
|
63
|
+
return this.#authoredElementName;
|
|
56
64
|
}
|
|
57
65
|
/**
|
|
58
66
|
* Detect value as a true if its attribute is booleanish value and omitted.
|
|
@@ -65,7 +73,7 @@ export class Parser {
|
|
|
65
73
|
* In the above, the `aria-hidden` is `true`.
|
|
66
74
|
*/
|
|
67
75
|
get booleanish() {
|
|
68
|
-
return
|
|
76
|
+
return this.#booleanish;
|
|
69
77
|
}
|
|
70
78
|
/**
|
|
71
79
|
* The end tag omittable type.
|
|
@@ -75,50 +83,83 @@ export class Parser {
|
|
|
75
83
|
* - `"never"`: Never need
|
|
76
84
|
*/
|
|
77
85
|
get endTag() {
|
|
78
|
-
return
|
|
86
|
+
return this.#endTagType;
|
|
79
87
|
}
|
|
88
|
+
/**
|
|
89
|
+
* The current raw source code being parsed, which may have been
|
|
90
|
+
* preprocessed (e.g., ignore blocks masked, front matter removed).
|
|
91
|
+
*/
|
|
80
92
|
get rawCode() {
|
|
81
|
-
return
|
|
93
|
+
return this.#rawCode;
|
|
82
94
|
}
|
|
95
|
+
/**
|
|
96
|
+
* Whether tag names should be compared in a case-sensitive manner.
|
|
97
|
+
* When false (the default), tag name comparisons are case-insensitive (HTML behavior).
|
|
98
|
+
*/
|
|
83
99
|
get tagNameCaseSensitive() {
|
|
84
|
-
return
|
|
100
|
+
return this.#tagNameCaseSensitive;
|
|
85
101
|
}
|
|
102
|
+
/**
|
|
103
|
+
* Tokenizes the raw source code into language-specific AST nodes.
|
|
104
|
+
* Subclasses should override this method to provide actual tokenization logic.
|
|
105
|
+
*
|
|
106
|
+
* @param options - Parse options controlling offset, depth, and other parse-time settings
|
|
107
|
+
* @returns The tokenized result containing the AST node array and fragment flag
|
|
108
|
+
*/
|
|
86
109
|
tokenize(options) {
|
|
87
110
|
return {
|
|
88
111
|
ast: [],
|
|
89
112
|
isFragment: false,
|
|
90
113
|
};
|
|
91
114
|
}
|
|
115
|
+
/**
|
|
116
|
+
* Hook called before parsing begins, allowing subclasses to preprocess
|
|
117
|
+
* the raw source code. The default implementation prepends offset spaces
|
|
118
|
+
* based on the parse options.
|
|
119
|
+
*
|
|
120
|
+
* @param rawCode - The raw source code about to be parsed
|
|
121
|
+
* @param options - Parse options that may specify offset positioning
|
|
122
|
+
* @returns The preprocessed source code to be used for tokenization
|
|
123
|
+
*/
|
|
92
124
|
beforeParse(rawCode, options) {
|
|
93
|
-
const spaces =
|
|
125
|
+
const spaces = this.#createOffsetSpaces(options);
|
|
94
126
|
return spaces + rawCode;
|
|
95
127
|
}
|
|
128
|
+
/**
|
|
129
|
+
* Parses raw source code through the full pipeline: preprocessing, tokenization,
|
|
130
|
+
* traversal, flattening, ignore-block restoration, and post-processing.
|
|
131
|
+
* Returns the complete markuplint AST document.
|
|
132
|
+
*
|
|
133
|
+
* @param rawCode - The raw source code to parse
|
|
134
|
+
* @param options - Parse options controlling offsets, depth, front matter, and authored element names
|
|
135
|
+
* @returns The parsed AST document containing the node list and fragment flag
|
|
136
|
+
*/
|
|
96
137
|
parse(rawCode, options) {
|
|
97
138
|
try {
|
|
98
139
|
// Initialize raw code
|
|
99
|
-
|
|
140
|
+
this.#setRawCode(rawCode, rawCode);
|
|
100
141
|
timer.push('beforeParse');
|
|
101
142
|
const beforeParsedCode = this.beforeParse(this.rawCode, options);
|
|
102
143
|
// Override raw code
|
|
103
|
-
|
|
104
|
-
|
|
144
|
+
this.#setRawCode(beforeParsedCode);
|
|
145
|
+
this.#authoredElementName = options?.authoredElementName;
|
|
105
146
|
let frontMatter = null;
|
|
106
147
|
if (options?.ignoreFrontMatter) {
|
|
107
148
|
timer.push('ignoreFrontMatter');
|
|
108
149
|
const fm = ignoreFrontMatter(this.rawCode);
|
|
109
|
-
|
|
150
|
+
this.#setRawCode(fm.code);
|
|
110
151
|
frontMatter = fm.frontMatter;
|
|
111
152
|
}
|
|
112
153
|
timer.push('ignoreBlock');
|
|
113
|
-
const blocks = ignoreBlock(this.rawCode,
|
|
114
|
-
|
|
154
|
+
const blocks = ignoreBlock(this.rawCode, this.#ignoreTags, this.#maskChar);
|
|
155
|
+
this.#setRawCode(blocks.replaced);
|
|
115
156
|
timer.push('tokenize');
|
|
116
157
|
const tokenized = this.tokenize(options);
|
|
117
158
|
const ast = tokenized.ast;
|
|
118
159
|
const isFragment = tokenized.isFragment;
|
|
119
|
-
|
|
160
|
+
this.#defaultDepth = options?.depth ?? this.#defaultDepth;
|
|
120
161
|
timer.push('traverse');
|
|
121
|
-
const traversed = this.traverse(ast, null,
|
|
162
|
+
const traversed = this.traverse(ast, null, this.#defaultDepth);
|
|
122
163
|
timer.push('afterTraverse');
|
|
123
164
|
const nodeTree = this.afterTraverse([...traversed.childNodes, ...traversed.siblings]);
|
|
124
165
|
timer.push('flattenNodes');
|
|
@@ -157,7 +198,7 @@ export class Parser {
|
|
|
157
198
|
}
|
|
158
199
|
timer.log();
|
|
159
200
|
domLog(nodeList);
|
|
160
|
-
|
|
201
|
+
this.#reset();
|
|
161
202
|
return {
|
|
162
203
|
raw: rawCode,
|
|
163
204
|
nodeList,
|
|
@@ -168,9 +209,25 @@ export class Parser {
|
|
|
168
209
|
throw this.parseError(error);
|
|
169
210
|
}
|
|
170
211
|
}
|
|
212
|
+
/**
|
|
213
|
+
* Hook called after the main parse pipeline completes, allowing subclasses
|
|
214
|
+
* to perform final transformations on the node list. The default implementation
|
|
215
|
+
* removes any offset spaces that were prepended during preprocessing.
|
|
216
|
+
*
|
|
217
|
+
* @param nodeList - The fully parsed and flattened node list
|
|
218
|
+
* @param options - The parse options used for this parse invocation
|
|
219
|
+
* @returns The post-processed node list
|
|
220
|
+
*/
|
|
171
221
|
afterParse(nodeList, options) {
|
|
172
|
-
return
|
|
222
|
+
return this.#removeOffsetSpaces(nodeList, options);
|
|
173
223
|
}
|
|
224
|
+
/**
|
|
225
|
+
* Wraps an arbitrary error into a ParserError with source location information.
|
|
226
|
+
* Extracts line and column numbers from common error formats.
|
|
227
|
+
*
|
|
228
|
+
* @param error - The original error to wrap
|
|
229
|
+
* @returns A ParserError containing the original error's message and location data
|
|
230
|
+
*/
|
|
174
231
|
parseError(error) {
|
|
175
232
|
return new ParserError(error, {
|
|
176
233
|
line: error.line ?? error.lineNumber ?? 0,
|
|
@@ -179,6 +236,15 @@ export class Parser {
|
|
|
179
236
|
stack: error.stack,
|
|
180
237
|
});
|
|
181
238
|
}
|
|
239
|
+
/**
|
|
240
|
+
* Recursively traverses language-specific AST nodes by calling `nodeize` on each,
|
|
241
|
+
* filtering duplicates, and separating child nodes from ancestor-level siblings.
|
|
242
|
+
*
|
|
243
|
+
* @param originNodes - The language-specific AST nodes to traverse
|
|
244
|
+
* @param parentNode - The parent markuplint AST node, or null for top-level nodes
|
|
245
|
+
* @param depth - The current nesting depth in the tree
|
|
246
|
+
* @returns An object containing `childNodes` at the current depth and `siblings` that belong to ancestor levels
|
|
247
|
+
*/
|
|
182
248
|
traverse(originNodes, parentNode = null, depth) {
|
|
183
249
|
if (originNodes.length === 0) {
|
|
184
250
|
return {
|
|
@@ -195,7 +261,7 @@ export class Parser {
|
|
|
195
261
|
const filteredNodes = [];
|
|
196
262
|
for (const node of nodes) {
|
|
197
263
|
// Remove duplicated nodes
|
|
198
|
-
const id = `${node.
|
|
264
|
+
const id = `${node.offset}:${node.offset + node.raw.length}:${node.nodeName}:${node.type}:${node.raw}`;
|
|
199
265
|
if (existence.has(id)) {
|
|
200
266
|
continue;
|
|
201
267
|
}
|
|
@@ -212,15 +278,39 @@ export class Parser {
|
|
|
212
278
|
siblings,
|
|
213
279
|
};
|
|
214
280
|
}
|
|
281
|
+
/**
|
|
282
|
+
* Hook called after traversal completes, used to sort the resulting node tree
|
|
283
|
+
* by source position. Subclasses may override for custom post-traversal logic.
|
|
284
|
+
*
|
|
285
|
+
* @param nodeTree - The unsorted node tree produced by traversal
|
|
286
|
+
* @returns The node tree sorted by source position
|
|
287
|
+
*/
|
|
215
288
|
afterTraverse(nodeTree) {
|
|
216
|
-
return
|
|
217
|
-
? // TODO: Use sort instead of toSorted until we end support for Node 18
|
|
218
|
-
[...nodeTree].sort(sortNodes)
|
|
219
|
-
: nodeTree.toSorted(sortNodes);
|
|
289
|
+
return nodeTree.toSorted(sortNodes);
|
|
220
290
|
}
|
|
291
|
+
/**
|
|
292
|
+
* Converts a single language-specific AST node into one or more markuplint AST nodes.
|
|
293
|
+
* Subclasses must override this method to provide actual node conversion logic
|
|
294
|
+
* using visitor methods like `visitElement`, `visitText`, `visitComment`, etc.
|
|
295
|
+
*
|
|
296
|
+
* @param originNode - The language-specific AST node to convert
|
|
297
|
+
* @param parentNode - The parent markuplint AST node, or null for top-level nodes
|
|
298
|
+
* @param depth - The current nesting depth in the tree
|
|
299
|
+
* @returns An array of markuplint AST nodes produced from the origin node
|
|
300
|
+
*/
|
|
221
301
|
nodeize(originNode, parentNode, depth) {
|
|
222
302
|
return [];
|
|
223
303
|
}
|
|
304
|
+
/**
|
|
305
|
+
* Post-processes the nodes produced by `nodeize`, separating them into siblings
|
|
306
|
+
* at the current depth and ancestors that belong to a shallower depth level.
|
|
307
|
+
* Doctype nodes at depth 0 are promoted to ancestors.
|
|
308
|
+
*
|
|
309
|
+
* @param siblings - The nodes produced by `nodeize` for a single origin node
|
|
310
|
+
* @param parentNode - The parent markuplint AST node, or null for top-level nodes
|
|
311
|
+
* @param depth - The current nesting depth
|
|
312
|
+
* @returns An object with `siblings` at the current depth and `ancestors` at shallower depths
|
|
313
|
+
*/
|
|
224
314
|
afterNodeize(siblings, parentNode, depth) {
|
|
225
315
|
const newSiblings = [];
|
|
226
316
|
const ancestors = [];
|
|
@@ -246,21 +336,44 @@ export class Parser {
|
|
|
246
336
|
ancestors,
|
|
247
337
|
};
|
|
248
338
|
}
|
|
339
|
+
/**
|
|
340
|
+
* Flattens a hierarchical node tree into a flat, sorted list by walking
|
|
341
|
+
* the tree depth-first and removing duplicated nodes.
|
|
342
|
+
*
|
|
343
|
+
* @param nodeTree - The hierarchical node tree to flatten
|
|
344
|
+
* @returns A flat array of all nodes in source order
|
|
345
|
+
*/
|
|
249
346
|
flattenNodes(nodeTree) {
|
|
250
|
-
return
|
|
347
|
+
return this.#arrayize(nodeTree);
|
|
251
348
|
}
|
|
349
|
+
/**
|
|
350
|
+
* Post-processes the flattened node list by exposing remnant whitespace and
|
|
351
|
+
* invalid nodes between known nodes, converting orphan end tags to bogus markers,
|
|
352
|
+
* concatenating adjacent text nodes, and trimming overlapping text.
|
|
353
|
+
*
|
|
354
|
+
* @param nodeList - The flat node list to post-process
|
|
355
|
+
* @param options - Controls which post-processing steps are applied
|
|
356
|
+
* @returns The cleaned-up flat node list
|
|
357
|
+
*/
|
|
252
358
|
afterFlattenNodes(nodeList, options) {
|
|
253
359
|
const exposeInvalidNode = options?.exposeInvalidNode ?? true;
|
|
254
360
|
const exposeWhiteSpace = options?.exposeWhiteSpace ?? true;
|
|
255
361
|
const concatText = options?.concatText ?? true;
|
|
256
|
-
nodeList =
|
|
257
|
-
nodeList =
|
|
362
|
+
nodeList = this.#exposeRemnantNodes(nodeList, exposeInvalidNode, exposeWhiteSpace);
|
|
363
|
+
nodeList = this.#orphanEndTagToBogusMark(nodeList);
|
|
258
364
|
if (concatText) {
|
|
259
|
-
nodeList =
|
|
365
|
+
nodeList = this.#concatText(nodeList);
|
|
260
366
|
}
|
|
261
|
-
nodeList =
|
|
367
|
+
nodeList = this.#trimText(nodeList);
|
|
262
368
|
return nodeList;
|
|
263
369
|
}
|
|
370
|
+
/**
|
|
371
|
+
* Creates an AST doctype node from a token containing the doctype
|
|
372
|
+
* name, public ID, and system ID.
|
|
373
|
+
*
|
|
374
|
+
* @param token - The child token with doctype-specific properties
|
|
375
|
+
* @returns An array containing the single doctype AST node
|
|
376
|
+
*/
|
|
264
377
|
visitDoctype(token) {
|
|
265
378
|
timer.push('visitDoctype');
|
|
266
379
|
const node = {
|
|
@@ -271,6 +384,14 @@ export class Parser {
|
|
|
271
384
|
};
|
|
272
385
|
return [node];
|
|
273
386
|
}
|
|
387
|
+
/**
|
|
388
|
+
* Creates an AST comment node from a token. Automatically detects whether
|
|
389
|
+
* the comment is a bogus comment (not starting with `<!--`).
|
|
390
|
+
*
|
|
391
|
+
* @param token - The child token containing the comment's raw text and position
|
|
392
|
+
* @param options - Optional settings to override the bogus detection
|
|
393
|
+
* @returns An array containing the single comment AST node
|
|
394
|
+
*/
|
|
274
395
|
visitComment(token, options) {
|
|
275
396
|
timer.push('visitComment');
|
|
276
397
|
const isBogus = options?.isBogus ?? !token.raw.startsWith('<!--');
|
|
@@ -283,6 +404,14 @@ export class Parser {
|
|
|
283
404
|
};
|
|
284
405
|
return [node];
|
|
285
406
|
}
|
|
407
|
+
/**
|
|
408
|
+
* Creates AST text node(s) from a token. Optionally re-parses the text content
|
|
409
|
+
* to discover embedded HTML tags within it.
|
|
410
|
+
*
|
|
411
|
+
* @param token - The child token containing the text content and position
|
|
412
|
+
* @param options - Controls whether to search for embedded tags and how to handle invalid ones
|
|
413
|
+
* @returns An array of AST nodes; a single text node or multiple tag/text nodes if tags were found
|
|
414
|
+
*/
|
|
286
415
|
visitText(token, options) {
|
|
287
416
|
timer.push('visitText');
|
|
288
417
|
const node = {
|
|
@@ -302,6 +431,16 @@ export class Parser {
|
|
|
302
431
|
}
|
|
303
432
|
return [node];
|
|
304
433
|
}
|
|
434
|
+
/**
|
|
435
|
+
* Creates AST element node(s) from a token, including the start tag, optional end tag,
|
|
436
|
+
* and recursively traversed child nodes. Handles ghost elements (empty raw),
|
|
437
|
+
* self-closing tags, and nameless fragments (e.g., JSX `<>`).
|
|
438
|
+
*
|
|
439
|
+
* @param token - The child token with the element's node name; namespace is auto-detected from tag name and parent node
|
|
440
|
+
* @param childNodes - The language-specific child AST nodes to traverse
|
|
441
|
+
* @param options - Controls end tag creation, fragment handling, and property overrides
|
|
442
|
+
* @returns An array of AST nodes including the start tag, optional end tag, and any sibling nodes
|
|
443
|
+
*/
|
|
305
444
|
visitElement(token, childNodes = [], options) {
|
|
306
445
|
timer.push('visitElement');
|
|
307
446
|
const createEndTagToken = options?.createEndTagToken;
|
|
@@ -316,20 +455,21 @@ export class Parser {
|
|
|
316
455
|
elementType: 'html',
|
|
317
456
|
attributes: [],
|
|
318
457
|
childNodes: [],
|
|
458
|
+
blockBehavior: null,
|
|
319
459
|
parentNode: token.parentNode,
|
|
320
460
|
pairNode: null,
|
|
321
461
|
tagCloseChar: '',
|
|
322
462
|
tagOpenChar: '',
|
|
323
463
|
isGhost: true,
|
|
324
464
|
isFragment: false,
|
|
465
|
+
namespace: getNamespace(null, token.parentNode),
|
|
325
466
|
...overwriteProps,
|
|
326
467
|
};
|
|
327
468
|
const siblings = this.visitChildren(childNodes, startTag);
|
|
328
469
|
return [startTag, ...siblings];
|
|
329
470
|
}
|
|
330
471
|
const startTag = {
|
|
331
|
-
...
|
|
332
|
-
namespace: token.namespace,
|
|
472
|
+
...this.#parseStartTag(token, {
|
|
333
473
|
...overwriteProps,
|
|
334
474
|
}, namelessFragment),
|
|
335
475
|
};
|
|
@@ -337,20 +477,30 @@ export class Parser {
|
|
|
337
477
|
if (createEndTagToken) {
|
|
338
478
|
const endTagToken = createEndTagToken(startTag);
|
|
339
479
|
if (endTagToken) {
|
|
340
|
-
const endTag =
|
|
341
|
-
|
|
480
|
+
const endTag = this.#parseEndTag(endTagToken, namelessFragment);
|
|
481
|
+
this.#pairing(startTag, endTag);
|
|
342
482
|
return [startTag, endTag, ...siblings];
|
|
343
483
|
}
|
|
344
484
|
}
|
|
345
485
|
return [startTag, ...siblings];
|
|
346
486
|
}
|
|
347
|
-
|
|
487
|
+
/**
|
|
488
|
+
* Creates an AST preprocessor-specific block node (e.g., for template directives
|
|
489
|
+
* like `{#if}`, `{#each}`, or front matter). Recursively traverses child nodes.
|
|
490
|
+
*
|
|
491
|
+
* @param token - The child token with the block's node name and fragment flag
|
|
492
|
+
* @param childNodes - The language-specific child AST nodes to traverse
|
|
493
|
+
* @param blockBehavior - The block behavior if this is a control-flow block (e.g., "if", "each")
|
|
494
|
+
* @param originBlockNode - The original language-specific block node for reference
|
|
495
|
+
* @returns An array of AST nodes including the block node and any sibling nodes
|
|
496
|
+
*/
|
|
497
|
+
visitPsBlock(token, childNodes = [], blockBehavior = null, originBlockNode) {
|
|
348
498
|
timer.push('visitPsBlock');
|
|
349
499
|
const block = {
|
|
350
500
|
...token,
|
|
351
501
|
...this.createToken(token),
|
|
352
502
|
type: 'psblock',
|
|
353
|
-
|
|
503
|
+
blockBehavior,
|
|
354
504
|
nodeName: `#ps:${token.nodeName}`,
|
|
355
505
|
childNodes: [],
|
|
356
506
|
isBogus: false,
|
|
@@ -358,17 +508,33 @@ export class Parser {
|
|
|
358
508
|
const siblings = this.visitChildren(childNodes, block);
|
|
359
509
|
return [block, ...siblings];
|
|
360
510
|
}
|
|
511
|
+
/**
|
|
512
|
+
* Traverses a list of child nodes under the given parent, appending the resulting
|
|
513
|
+
* child AST nodes to the parent and returning any sibling nodes that belong
|
|
514
|
+
* to ancestor levels. Skips traversal for raw text elements (e.g., `<script>`, `<style>`).
|
|
515
|
+
*
|
|
516
|
+
* @param children - The language-specific child AST nodes to traverse
|
|
517
|
+
* @param parentNode - The parent markuplint AST node to which children will be appended
|
|
518
|
+
* @returns An array of sibling nodes that belong to ancestor depth levels
|
|
519
|
+
*/
|
|
361
520
|
visitChildren(children, parentNode) {
|
|
362
521
|
if (children.length === 0) {
|
|
363
522
|
return [];
|
|
364
523
|
}
|
|
365
|
-
if (parentNode &&
|
|
524
|
+
if (parentNode && this.#rawTextElements.includes(parentNode.nodeName.toLowerCase())) {
|
|
366
525
|
return [];
|
|
367
526
|
}
|
|
368
527
|
const traversed = this.traverse(children, parentNode, parentNode ? parentNode.depth + 1 : 0);
|
|
369
528
|
this.appendChild(parentNode, ...traversed.childNodes);
|
|
370
529
|
return traversed.siblings;
|
|
371
530
|
}
|
|
531
|
+
/**
|
|
532
|
+
* Attempts to parse a token as a JSX spread attribute (e.g., `{...props}`).
|
|
533
|
+
* Returns null if the token does not match the spread attribute pattern.
|
|
534
|
+
*
|
|
535
|
+
* @param token - The token to inspect for spread attribute syntax
|
|
536
|
+
* @returns A spread attribute AST node, or null if the token is not a spread attribute
|
|
537
|
+
*/
|
|
372
538
|
visitSpreadAttr(token) {
|
|
373
539
|
timer.push('visitSpreadAttr');
|
|
374
540
|
const raw = token.raw.trim();
|
|
@@ -382,14 +548,23 @@ export class Parser {
|
|
|
382
548
|
if (!raw.endsWith('}')) {
|
|
383
549
|
return null;
|
|
384
550
|
}
|
|
385
|
-
const node = this.createToken(raw, token.
|
|
551
|
+
const node = this.createToken(raw, token.offset, token.line, token.col);
|
|
386
552
|
return {
|
|
387
553
|
...node,
|
|
388
|
-
...__classPrivateFieldGet(this, _Parser_instances, "m", _Parser_getEndLocation).call(this, node),
|
|
389
554
|
type: 'spread',
|
|
390
555
|
nodeName: '#spread',
|
|
391
556
|
};
|
|
392
557
|
}
|
|
558
|
+
/**
|
|
559
|
+
* Parses a token into a fully structured attribute AST node, breaking it down
|
|
560
|
+
* into its constituent parts: spaces, name, equal sign, quotes, and value.
|
|
561
|
+
* Also detects spread attributes. If there is leftover text after the attribute,
|
|
562
|
+
* it is returned in the `__rightText` property for further processing.
|
|
563
|
+
*
|
|
564
|
+
* @param token - The token containing the raw attribute text and position
|
|
565
|
+
* @param options - Controls quoting behavior, value types, and the initial parser state
|
|
566
|
+
* @returns The parsed attribute AST node with an optional `__rightText` for remaining unparsed content
|
|
567
|
+
*/
|
|
393
568
|
visitAttr(token, options) {
|
|
394
569
|
timer.push('visitAttr');
|
|
395
570
|
const raw = token.raw;
|
|
@@ -397,9 +572,9 @@ export class Parser {
|
|
|
397
572
|
const startState = options?.startState ?? AttrState.BeforeName;
|
|
398
573
|
const noQuoteValueType = options?.noQuoteValueType;
|
|
399
574
|
const endOfUnquotedValueChars = options?.endOfUnquotedValueChars;
|
|
400
|
-
let
|
|
401
|
-
let
|
|
402
|
-
let
|
|
575
|
+
let curOffset = token.offset;
|
|
576
|
+
let curLine = token.line;
|
|
577
|
+
let curCol = token.col;
|
|
403
578
|
let tokens;
|
|
404
579
|
try {
|
|
405
580
|
tokens = attrTokenizer(raw, quoteSet, startState, noQuoteValueType, endOfUnquotedValueChars);
|
|
@@ -410,42 +585,28 @@ export class Parser {
|
|
|
410
585
|
}
|
|
411
586
|
throw error;
|
|
412
587
|
}
|
|
413
|
-
const spacesBeforeName = this.createToken(tokens.spacesBeforeAttrName,
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
const
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
const
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
const
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
startOffset = equal.endOffset;
|
|
429
|
-
const spacesAfterEqual = this.createToken(tokens.spacesAfterEqual, startOffset, startLine, startCol);
|
|
430
|
-
startLine = spacesAfterEqual.endLine;
|
|
431
|
-
startCol = spacesAfterEqual.endCol;
|
|
432
|
-
startOffset = spacesAfterEqual.endOffset;
|
|
433
|
-
const startQuote = this.createToken(tokens.quoteStart, startOffset, startLine, startCol);
|
|
434
|
-
startLine = startQuote.endLine;
|
|
435
|
-
startCol = startQuote.endCol;
|
|
436
|
-
startOffset = startQuote.endOffset;
|
|
437
|
-
const value = this.createToken(tokens.attrValue, startOffset, startLine, startCol);
|
|
438
|
-
startLine = value.endLine;
|
|
439
|
-
startCol = value.endCol;
|
|
440
|
-
startOffset = value.endOffset;
|
|
441
|
-
const endQuote = this.createToken(tokens.quoteEnd, startOffset, startLine, startCol);
|
|
588
|
+
const spacesBeforeName = this.createToken(tokens.spacesBeforeAttrName, curOffset, curLine, curCol);
|
|
589
|
+
({ offset: curOffset, line: curLine, col: curCol } = this.#getEndLocation(spacesBeforeName));
|
|
590
|
+
const name = this.createToken(tokens.attrName, curOffset, curLine, curCol);
|
|
591
|
+
({ offset: curOffset, line: curLine, col: curCol } = this.#getEndLocation(name));
|
|
592
|
+
const spacesBeforeEqual = this.createToken(tokens.spacesBeforeEqual, curOffset, curLine, curCol);
|
|
593
|
+
({ offset: curOffset, line: curLine, col: curCol } = this.#getEndLocation(spacesBeforeEqual));
|
|
594
|
+
const equal = this.createToken(tokens.equal, curOffset, curLine, curCol);
|
|
595
|
+
({ offset: curOffset, line: curLine, col: curCol } = this.#getEndLocation(equal));
|
|
596
|
+
const spacesAfterEqual = this.createToken(tokens.spacesAfterEqual, curOffset, curLine, curCol);
|
|
597
|
+
({ offset: curOffset, line: curLine, col: curCol } = this.#getEndLocation(spacesAfterEqual));
|
|
598
|
+
const startQuote = this.createToken(tokens.quoteStart, curOffset, curLine, curCol);
|
|
599
|
+
({ offset: curOffset, line: curLine, col: curCol } = this.#getEndLocation(startQuote));
|
|
600
|
+
const value = this.createToken(tokens.attrValue, curOffset, curLine, curCol);
|
|
601
|
+
({ offset: curOffset, line: curLine, col: curCol } = this.#getEndLocation(value));
|
|
602
|
+
const endQuote = this.createToken(tokens.quoteEnd, curOffset, curLine, curCol);
|
|
442
603
|
const attrToken = this.createToken(tokens.attrName +
|
|
443
604
|
tokens.spacesBeforeEqual +
|
|
444
605
|
tokens.equal +
|
|
445
606
|
tokens.spacesAfterEqual +
|
|
446
607
|
tokens.quoteStart +
|
|
447
608
|
tokens.attrValue +
|
|
448
|
-
tokens.quoteEnd, name.
|
|
609
|
+
tokens.quoteEnd, name.offset, name.line, name.col);
|
|
449
610
|
const htmlAttr = {
|
|
450
611
|
...attrToken,
|
|
451
612
|
type: 'attr',
|
|
@@ -469,25 +630,34 @@ export class Parser {
|
|
|
469
630
|
}
|
|
470
631
|
return spread ?? htmlAttr;
|
|
471
632
|
}
|
|
633
|
+
/**
|
|
634
|
+
* Re-parses a text token to discover embedded HTML/XML tags within it,
|
|
635
|
+
* splitting the content into a sequence of tag and text AST nodes.
|
|
636
|
+
* Handles self-closing detection, depth tracking, and void element recognition.
|
|
637
|
+
*
|
|
638
|
+
* @param token - The child token containing the code fragment to re-parse
|
|
639
|
+
* @param options - Controls whether nameless fragments (JSX `<>`) are recognized
|
|
640
|
+
* @returns An array of tag and text AST nodes discovered in the code fragment
|
|
641
|
+
*/
|
|
472
642
|
parseCodeFragment(token, options) {
|
|
473
643
|
const nodes = [];
|
|
474
644
|
let raw = token.raw;
|
|
475
|
-
let
|
|
476
|
-
let
|
|
477
|
-
let
|
|
645
|
+
let curOffset = token.offset;
|
|
646
|
+
let curLine = token.line;
|
|
647
|
+
let curCol = token.col;
|
|
478
648
|
let depth = token.depth;
|
|
479
649
|
const depthStack = new Map();
|
|
480
650
|
while (raw) {
|
|
481
|
-
const parsed =
|
|
651
|
+
const parsed = this.#parseTag({
|
|
482
652
|
raw,
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
653
|
+
offset: curOffset,
|
|
654
|
+
line: curLine,
|
|
655
|
+
col: curCol,
|
|
486
656
|
depth,
|
|
487
|
-
parentNode:
|
|
657
|
+
parentNode: token.parentNode,
|
|
488
658
|
}, true, true, options?.namelessFragment ?? false);
|
|
489
659
|
if (parsed.__left) {
|
|
490
|
-
const token = this.createToken(parsed.__left,
|
|
660
|
+
const token = this.createToken(parsed.__left, curOffset, curLine, curCol);
|
|
491
661
|
const textNode = {
|
|
492
662
|
...token,
|
|
493
663
|
type: 'text',
|
|
@@ -502,12 +672,10 @@ export class Parser {
|
|
|
502
672
|
continue;
|
|
503
673
|
}
|
|
504
674
|
const tag = parsed.token;
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
startOffset = tag.endOffset;
|
|
508
|
-
let isSelfClose = tag.type === 'starttag' && tag.selfClosingSolidus?.raw === '/';
|
|
675
|
+
({ offset: curOffset, line: curLine, col: curCol } = this.#getEndLocation(tag));
|
|
676
|
+
let isSelfClose = tag.type === 'starttag' && tag.tagCloseChar.startsWith('/');
|
|
509
677
|
const isVoidElement = detectVoidElement({ localName: tag.nodeName.toLowerCase() });
|
|
510
|
-
switch (
|
|
678
|
+
switch (this.#selfCloseType) {
|
|
511
679
|
case 'html': {
|
|
512
680
|
isSelfClose = isVoidElement;
|
|
513
681
|
break;
|
|
@@ -532,7 +700,7 @@ export class Parser {
|
|
|
532
700
|
depthStack.delete(tag.nodeName);
|
|
533
701
|
}
|
|
534
702
|
else {
|
|
535
|
-
depth = Math.max(depth - 1,
|
|
703
|
+
depth = Math.max(depth - 1, this.#defaultDepth);
|
|
536
704
|
}
|
|
537
705
|
this.updateLocation(tag, {
|
|
538
706
|
depth,
|
|
@@ -553,91 +721,131 @@ export class Parser {
|
|
|
553
721
|
}
|
|
554
722
|
return nodes;
|
|
555
723
|
}
|
|
724
|
+
/**
|
|
725
|
+
* Updates the position and depth properties of an AST node.
|
|
726
|
+
*
|
|
727
|
+
* @param node - The AST node whose location should be updated
|
|
728
|
+
* @param props - The new position and depth values to apply (only provided values are changed)
|
|
729
|
+
*/
|
|
556
730
|
updateLocation(node, props) {
|
|
557
731
|
Object.assign(node, {
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
endOffset: props.startOffset == null ? node.endOffset : props.startOffset + node.raw.length,
|
|
562
|
-
endLine: props.startLine == null ? node.endLine : getEndLine(node.raw, props.startLine),
|
|
563
|
-
endCol: props.startCol == null ? node.endCol : getEndCol(node.raw, props.startCol),
|
|
732
|
+
offset: props.offset ?? node.offset,
|
|
733
|
+
line: props.line ?? node.line,
|
|
734
|
+
col: props.col ?? node.col,
|
|
564
735
|
depth: props.depth ?? node.depth,
|
|
565
736
|
});
|
|
566
737
|
}
|
|
567
738
|
/**
|
|
568
739
|
* Set new raw code to target node.
|
|
569
740
|
*
|
|
570
|
-
* Replace the raw code and update the start/end offset/line/column.
|
|
571
|
-
*
|
|
572
741
|
* @param node target node
|
|
573
742
|
* @param raw new raw code
|
|
574
743
|
*/
|
|
575
744
|
updateRaw(node, raw) {
|
|
576
|
-
const startOffset = node.startOffset;
|
|
577
|
-
const startLine = node.startLine;
|
|
578
|
-
const startCol = node.startCol;
|
|
579
|
-
const endOffset = startOffset + raw.length;
|
|
580
|
-
const endLine = getEndLine(raw, startLine);
|
|
581
|
-
const endCol = getEndCol(raw, startCol);
|
|
582
745
|
Object.assign(node, {
|
|
583
746
|
raw,
|
|
584
|
-
startOffset,
|
|
585
|
-
endOffset,
|
|
586
|
-
startLine,
|
|
587
|
-
endLine,
|
|
588
|
-
startCol,
|
|
589
|
-
endCol,
|
|
590
747
|
});
|
|
591
748
|
}
|
|
592
749
|
updateElement(el, props) {
|
|
593
750
|
Object.assign(el, props);
|
|
594
751
|
}
|
|
752
|
+
/**
|
|
753
|
+
* Updates metadata properties on an HTML attribute AST node, such as marking
|
|
754
|
+
* it as a directive, dynamic value, or setting its potential name/value
|
|
755
|
+
* for preprocessor-specific attribute transformations.
|
|
756
|
+
*
|
|
757
|
+
* @param attr - The HTML attribute AST node to update
|
|
758
|
+
* @param props - The metadata properties to overwrite on the attribute
|
|
759
|
+
*/
|
|
595
760
|
updateAttr(attr, props) {
|
|
596
761
|
Object.assign(attr, props);
|
|
597
762
|
}
|
|
763
|
+
/**
|
|
764
|
+
* Determines the element type (e.g., "html", "web-component", "authored") for a
|
|
765
|
+
* given tag name, using the parser's authored element name distinguishing pattern.
|
|
766
|
+
*
|
|
767
|
+
* @param nodeName - The tag name to classify
|
|
768
|
+
* @param defaultPattern - A fallback pattern if no authored element name pattern is set
|
|
769
|
+
* @returns The element type classification
|
|
770
|
+
*/
|
|
598
771
|
detectElementType(nodeName, defaultPattern) {
|
|
599
|
-
return detectElementType(nodeName,
|
|
772
|
+
return detectElementType(nodeName, this.#authoredElementName, defaultPattern);
|
|
600
773
|
}
|
|
601
|
-
createToken(token,
|
|
774
|
+
createToken(token, offset, line, col) {
|
|
602
775
|
const props = typeof token === 'string'
|
|
603
776
|
? {
|
|
604
777
|
raw: token,
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
778
|
+
offset: offset ?? 0,
|
|
779
|
+
line: line ?? 1,
|
|
780
|
+
col: col ?? 1,
|
|
608
781
|
}
|
|
609
782
|
: token;
|
|
610
783
|
return {
|
|
611
|
-
uuid:
|
|
784
|
+
uuid: randomUUID().slice(0, 8),
|
|
612
785
|
...props,
|
|
613
|
-
...__classPrivateFieldGet(this, _Parser_instances, "m", _Parser_getEndLocation).call(this, props),
|
|
614
786
|
};
|
|
615
787
|
}
|
|
788
|
+
/**
|
|
789
|
+
* Extracts a Token from the current raw code at the given byte offset range,
|
|
790
|
+
* computing the line and column from the source position.
|
|
791
|
+
*
|
|
792
|
+
* @param start - The starting byte offset (inclusive) in the raw code
|
|
793
|
+
* @param end - The ending byte offset (exclusive) in the raw code; if omitted, slices to the end
|
|
794
|
+
* @returns A Token containing the sliced raw content and its start position
|
|
795
|
+
*/
|
|
616
796
|
sliceFragment(start, end) {
|
|
617
797
|
const raw = this.rawCode.slice(start, end);
|
|
618
|
-
const { line, column } = getPosition(this.rawCode, start);
|
|
798
|
+
const { line: l, column } = getPosition(this.rawCode, start);
|
|
619
799
|
return {
|
|
620
800
|
raw,
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
801
|
+
offset: start,
|
|
802
|
+
line: l,
|
|
803
|
+
col: column,
|
|
624
804
|
};
|
|
625
805
|
}
|
|
806
|
+
/**
|
|
807
|
+
* Calculates start and end byte offsets from line/column positions
|
|
808
|
+
* within the current raw source code.
|
|
809
|
+
*
|
|
810
|
+
* @param startLine - The starting line number (1-based)
|
|
811
|
+
* @param startCol - The starting column number (1-based)
|
|
812
|
+
* @param endLine - The ending line number (1-based)
|
|
813
|
+
* @param endCol - The ending column number (1-based)
|
|
814
|
+
* @returns The computed start and end byte offsets
|
|
815
|
+
*/
|
|
626
816
|
getOffsetsFromCode(startLine, startCol, endLine, endCol) {
|
|
627
817
|
return getOffsetsFromCode(this.rawCode, startLine, startCol, endLine, endCol);
|
|
628
818
|
}
|
|
819
|
+
/**
|
|
820
|
+
* Walks through a node list depth-first, invoking the walker callback for each node.
|
|
821
|
+
* The walker receives the current node, the sequentially previous node, and the depth.
|
|
822
|
+
* Automatically recurses into child nodes of parent elements and preprocessor blocks.
|
|
823
|
+
*
|
|
824
|
+
* @template Node - The specific AST node type being walked
|
|
825
|
+
* @param nodeList - The list of nodes to walk
|
|
826
|
+
* @param walker - The callback invoked for each node during the walk
|
|
827
|
+
* @param depth - The current depth (starts at 0 for top-level calls)
|
|
828
|
+
*/
|
|
629
829
|
walk(nodeList, walker, depth = 0) {
|
|
630
830
|
for (const node of nodeList) {
|
|
631
|
-
walker(node,
|
|
632
|
-
|
|
831
|
+
walker(node, this.#walkMethodSequentailPrevNode, depth);
|
|
832
|
+
this.#walkMethodSequentailPrevNode = node;
|
|
633
833
|
if ('childNodes' in node && node.childNodes.length > 0) {
|
|
634
834
|
this.walk(node.childNodes, walker, depth + 1);
|
|
635
835
|
}
|
|
636
836
|
}
|
|
637
837
|
if (depth === 0) {
|
|
638
|
-
|
|
838
|
+
this.#walkMethodSequentailPrevNode = null;
|
|
639
839
|
}
|
|
640
840
|
}
|
|
841
|
+
/**
|
|
842
|
+
* Appends child nodes to a parent node, updating parent references and
|
|
843
|
+
* maintaining sorted order by source position. If a child already exists
|
|
844
|
+
* in the parent (by UUID), it is replaced in place rather than duplicated.
|
|
845
|
+
*
|
|
846
|
+
* @param parentNode - The parent node to append children to, or null (no-op)
|
|
847
|
+
* @param childNodes - The child nodes to append
|
|
848
|
+
*/
|
|
641
849
|
appendChild(parentNode, ...childNodes) {
|
|
642
850
|
if (!parentNode || childNodes.length === 0) {
|
|
643
851
|
return;
|
|
@@ -653,468 +861,472 @@ export class Parser {
|
|
|
653
861
|
newChildNodes.splice(currentIndex, 1, appendingChild);
|
|
654
862
|
}
|
|
655
863
|
Object.assign(parentNode, {
|
|
656
|
-
childNodes:
|
|
657
|
-
? // TODO: Use sort instead of toSorted until we end support for Node 18
|
|
658
|
-
[...newChildNodes].sort(sortNodes)
|
|
659
|
-
: newChildNodes.toSorted(sortNodes),
|
|
864
|
+
childNodes: newChildNodes.toSorted(sortNodes),
|
|
660
865
|
});
|
|
661
866
|
}
|
|
867
|
+
/**
|
|
868
|
+
* Replaces a child node within a parent's child list with one or more replacement nodes.
|
|
869
|
+
* If the old child is not found in the parent, the operation is a no-op.
|
|
870
|
+
*
|
|
871
|
+
* @param parentNode - The parent node containing the child to replace
|
|
872
|
+
* @param oldChildNode - The existing child node to be replaced
|
|
873
|
+
* @param replacementChildNodes - The replacement nodes to insert at the old child's position
|
|
874
|
+
*/
|
|
662
875
|
replaceChild(parentNode, oldChildNode, ...replacementChildNodes) {
|
|
663
876
|
const index = parentNode.childNodes.findIndex(childNode => childNode.uuid === oldChildNode.uuid);
|
|
664
877
|
if (index === -1) {
|
|
665
878
|
return;
|
|
666
879
|
}
|
|
667
|
-
if (Array.prototype.toSpliced == null) {
|
|
668
|
-
const newChildNodes = [...parentNode.childNodes];
|
|
669
|
-
// TODO: Use splice instead of toSpliced until we end support for Node 18
|
|
670
|
-
newChildNodes.splice(index, 1, ...replacementChildNodes);
|
|
671
|
-
Object.assign(parentNode, { childNodes: newChildNodes });
|
|
672
|
-
return;
|
|
673
|
-
}
|
|
674
880
|
const newChildNodes = parentNode.childNodes.toSpliced(index, 1, ...replacementChildNodes);
|
|
675
881
|
Object.assign(parentNode, { childNodes: newChildNodes });
|
|
676
882
|
}
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
return nodeList;
|
|
685
|
-
}, _Parser_concatText = function _Parser_concatText(nodeList) {
|
|
686
|
-
const newNodeList = [];
|
|
687
|
-
for (const node of nodeList) {
|
|
688
|
-
const prevNode = newNodeList.at(-1) ?? null;
|
|
689
|
-
if (prevNode?.type === 'text' &&
|
|
690
|
-
prevNode?.nodeName === '#text' &&
|
|
691
|
-
node.type === 'text' &&
|
|
692
|
-
node.nodeName === '#text' &&
|
|
693
|
-
prevNode?.endOffset === node.startOffset) {
|
|
694
|
-
const newNode = __classPrivateFieldGet(this, _Parser_instances, "m", _Parser_concatTextNodes).call(this, prevNode, node);
|
|
695
|
-
newNodeList.pop();
|
|
696
|
-
newNodeList.push(newNode);
|
|
697
|
-
continue;
|
|
698
|
-
}
|
|
699
|
-
newNodeList.push(node);
|
|
700
|
-
}
|
|
701
|
-
return newNodeList;
|
|
702
|
-
}, _Parser_concatTextNodes = function _Parser_concatTextNodes(...nodes) {
|
|
703
|
-
if (nodes.length === 0) {
|
|
704
|
-
throw new Error('Empty node list');
|
|
705
|
-
}
|
|
706
|
-
const firstNode = nodes.at(0);
|
|
707
|
-
const lastNode = nodes.at(-1);
|
|
708
|
-
if (firstNode.uuid === lastNode.uuid) {
|
|
709
|
-
return firstNode;
|
|
710
|
-
}
|
|
711
|
-
const textNode = {
|
|
712
|
-
...firstNode,
|
|
713
|
-
uuid: uuid().slice(0, 8),
|
|
714
|
-
raw: nodes.map(n => n.raw).join(''),
|
|
715
|
-
endOffset: lastNode.endOffset,
|
|
716
|
-
endLine: lastNode.endLine,
|
|
717
|
-
endCol: lastNode.endCol,
|
|
718
|
-
};
|
|
719
|
-
for (const node of nodes) {
|
|
720
|
-
__classPrivateFieldGet(this, _Parser_instances, "m", _Parser_removeChild).call(this, node.parentNode, node);
|
|
883
|
+
#arrayize(nodeTree) {
|
|
884
|
+
let nodeList = [];
|
|
885
|
+
this.walk(nodeTree, node => {
|
|
886
|
+
nodeList.push(node);
|
|
887
|
+
});
|
|
888
|
+
nodeList = this.#removeDeprecatedNode(nodeList);
|
|
889
|
+
return nodeList;
|
|
721
890
|
}
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
891
|
+
#concatText(nodeList) {
|
|
892
|
+
const newNodeList = [];
|
|
893
|
+
for (const node of nodeList) {
|
|
894
|
+
const prevNode = newNodeList.at(-1) ?? null;
|
|
895
|
+
if (prevNode?.type === 'text' &&
|
|
896
|
+
prevNode?.nodeName === '#text' &&
|
|
897
|
+
node.type === 'text' &&
|
|
898
|
+
node.nodeName === '#text' &&
|
|
899
|
+
prevNode.offset + prevNode.raw.length === node.offset) {
|
|
900
|
+
const newNode = this.#concatTextNodes(prevNode, node);
|
|
901
|
+
newNodeList.pop();
|
|
902
|
+
newNodeList.push(newNode);
|
|
903
|
+
continue;
|
|
904
|
+
}
|
|
905
|
+
newNodeList.push(node);
|
|
906
|
+
}
|
|
907
|
+
return newNodeList;
|
|
727
908
|
}
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
nodeName: '#invalid',
|
|
732
|
-
isBogus: true,
|
|
733
|
-
kind: node.type,
|
|
734
|
-
};
|
|
735
|
-
}, _Parser_createOffsetSpaces = function _Parser_createOffsetSpaces(options) {
|
|
736
|
-
const offsetOffset = Math.max(options?.offsetOffset ?? 0, 0);
|
|
737
|
-
const offsetLine = Math.max((options?.offsetLine ?? 0) - 1, 0);
|
|
738
|
-
const offsetColumn = Math.max((options?.offsetColumn ?? 0) - 1, 0);
|
|
739
|
-
const offsetSpaces = ' '.repeat(offsetOffset - offsetLine - offsetColumn);
|
|
740
|
-
const offsetLines = '\n'.repeat(offsetLine);
|
|
741
|
-
const offsetColumns = ' '.repeat(offsetColumn);
|
|
742
|
-
return offsetSpaces + offsetLines + offsetColumns;
|
|
743
|
-
}, _Parser_createRemnantNode = function _Parser_createRemnantNode(start, end, depth, parentNode, exposeInvalidNode, exposeWhitespace) {
|
|
744
|
-
const codeFragment = this.sliceFragment(start, end);
|
|
745
|
-
if (codeFragment.raw) {
|
|
746
|
-
const remnantNodes = this.visitText({
|
|
747
|
-
...codeFragment,
|
|
748
|
-
depth: depth,
|
|
749
|
-
parentNode: parentNode,
|
|
750
|
-
}, { researchTags: true }).filter((node) => 'parentNode' in node);
|
|
751
|
-
if (remnantNodes.length > 1) {
|
|
752
|
-
this.appendChild(parentNode, ...remnantNodes);
|
|
753
|
-
return remnantNodes;
|
|
909
|
+
#concatTextNodes(...nodes) {
|
|
910
|
+
if (nodes.length === 0) {
|
|
911
|
+
throw new Error('Empty node list');
|
|
754
912
|
}
|
|
755
|
-
const
|
|
756
|
-
|
|
757
|
-
|
|
913
|
+
const firstNode = nodes.at(0);
|
|
914
|
+
const lastNode = nodes.at(-1);
|
|
915
|
+
if (firstNode.uuid === lastNode.uuid) {
|
|
916
|
+
return firstNode;
|
|
758
917
|
}
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
918
|
+
const textNode = {
|
|
919
|
+
...firstNode,
|
|
920
|
+
uuid: randomUUID().slice(0, 8),
|
|
921
|
+
raw: nodes.map(n => n.raw).join(''),
|
|
922
|
+
};
|
|
923
|
+
for (const node of nodes) {
|
|
924
|
+
this.#removeChild(node.parentNode, node);
|
|
763
925
|
}
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
926
|
+
this.appendChild(textNode.parentNode, textNode);
|
|
927
|
+
return textNode;
|
|
928
|
+
}
|
|
929
|
+
#convertIntoInvalidNode(node) {
|
|
930
|
+
if (node.type === 'invalid') {
|
|
931
|
+
return node;
|
|
767
932
|
}
|
|
933
|
+
return {
|
|
934
|
+
...node,
|
|
935
|
+
type: 'invalid',
|
|
936
|
+
nodeName: '#invalid',
|
|
937
|
+
isBogus: true,
|
|
938
|
+
kind: node.type,
|
|
939
|
+
};
|
|
768
940
|
}
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
941
|
+
#createOffsetSpaces(options) {
|
|
942
|
+
const offsetOffset = Math.max(options?.offsetOffset ?? 0, 0);
|
|
943
|
+
const offsetLine = Math.max((options?.offsetLine ?? 0) - 1, 0);
|
|
944
|
+
const offsetColumn = Math.max((options?.offsetColumn ?? 0) - 1, 0);
|
|
945
|
+
const offsetSpaces = ' '.repeat(offsetOffset - offsetLine - offsetColumn);
|
|
946
|
+
const offsetLines = '\n'.repeat(offsetLine);
|
|
947
|
+
const offsetColumns = ' '.repeat(offsetColumn);
|
|
948
|
+
return offsetSpaces + offsetLines + offsetColumns;
|
|
773
949
|
}
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
950
|
+
#createRemnantNode(start, end, depth, parentNode, exposeInvalidNode, exposeWhitespace) {
|
|
951
|
+
const codeFragment = this.sliceFragment(start, end);
|
|
952
|
+
if (codeFragment.raw) {
|
|
953
|
+
const remnantNodes = this.visitText({
|
|
954
|
+
...codeFragment,
|
|
955
|
+
depth: depth,
|
|
956
|
+
parentNode: parentNode,
|
|
957
|
+
}, { researchTags: true }).filter((node) => 'parentNode' in node);
|
|
958
|
+
if (remnantNodes.length > 1) {
|
|
959
|
+
this.appendChild(parentNode, ...remnantNodes);
|
|
960
|
+
return remnantNodes;
|
|
961
|
+
}
|
|
962
|
+
const remnantNode = remnantNodes[0];
|
|
963
|
+
if (!remnantNode) {
|
|
964
|
+
return null;
|
|
965
|
+
}
|
|
966
|
+
if (exposeInvalidNode && remnantNode.raw.trim() !== '') {
|
|
967
|
+
const invalidNode = this.#convertIntoInvalidNode(remnantNode);
|
|
968
|
+
this.appendChild(parentNode, invalidNode);
|
|
969
|
+
return [remnantNode];
|
|
970
|
+
}
|
|
971
|
+
if (exposeWhitespace && remnantNode.type === 'text' && remnantNode.raw.trim() === '') {
|
|
972
|
+
this.appendChild(parentNode, remnantNode);
|
|
973
|
+
return [remnantNode];
|
|
782
974
|
}
|
|
783
975
|
}
|
|
784
|
-
|
|
976
|
+
return null;
|
|
785
977
|
}
|
|
786
|
-
|
|
787
|
-
|
|
978
|
+
#exposeRemnantNodes(nodeList, invalidNode, whitespace) {
|
|
979
|
+
if (!invalidNode && !whitespace) {
|
|
980
|
+
return nodeList;
|
|
981
|
+
}
|
|
982
|
+
const newNodeList = [];
|
|
983
|
+
for (const [i, node] of nodeList.entries()) {
|
|
984
|
+
const sequentailPrevNode = nodeList[i - 1] ?? null;
|
|
985
|
+
if (!this.#rawTextElements.includes(node.nodeName.toLowerCase())) {
|
|
986
|
+
const prevEndOffset = sequentailPrevNode
|
|
987
|
+
? sequentailPrevNode.offset + sequentailPrevNode.raw.length
|
|
988
|
+
: 0;
|
|
989
|
+
const remnantNodes = this.#createRemnantNode(prevEndOffset, node.offset, node.depth, node.parentNode, invalidNode, whitespace);
|
|
990
|
+
if (remnantNodes) {
|
|
991
|
+
newNodeList.push(...remnantNodes);
|
|
992
|
+
}
|
|
993
|
+
}
|
|
994
|
+
newNodeList.push(node);
|
|
995
|
+
}
|
|
996
|
+
const lastNode = newNodeList.at(-1);
|
|
997
|
+
if (!lastNode) {
|
|
998
|
+
return newNodeList;
|
|
999
|
+
}
|
|
1000
|
+
const remnantNodes = this.#createRemnantNode(lastNode.offset + lastNode.raw.length, undefined, lastNode.depth, lastNode.parentNode, invalidNode, whitespace);
|
|
1001
|
+
if (!remnantNodes) {
|
|
1002
|
+
return newNodeList;
|
|
1003
|
+
}
|
|
1004
|
+
newNodeList.push(...remnantNodes);
|
|
788
1005
|
return newNodeList;
|
|
789
1006
|
}
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
1007
|
+
#getEndLocation(token) {
|
|
1008
|
+
return {
|
|
1009
|
+
offset: token.offset + token.raw.length,
|
|
1010
|
+
line: getEndLine(token.raw, token.line),
|
|
1011
|
+
col: getEndCol(token.raw, token.col),
|
|
1012
|
+
};
|
|
793
1013
|
}
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
}, _Parser_orphanEndTagToBogusMark = function _Parser_orphanEndTagToBogusMark(nodeList) {
|
|
804
|
-
const newNodeList = [];
|
|
805
|
-
for (let node of nodeList) {
|
|
806
|
-
if (node.type === 'endtag') {
|
|
807
|
-
const endTagUUID = node.uuid;
|
|
808
|
-
const openTag = newNodeList.findLast((n) => n.type === 'starttag' && !n.isGhost ? n.pairNode?.uuid === endTagUUID : false);
|
|
809
|
-
if (!openTag) {
|
|
810
|
-
node = __classPrivateFieldGet(this, _Parser_instances, "m", _Parser_convertIntoInvalidNode).call(this, node);
|
|
1014
|
+
#orphanEndTagToBogusMark(nodeList) {
|
|
1015
|
+
const newNodeList = [];
|
|
1016
|
+
for (let node of nodeList) {
|
|
1017
|
+
if (node.type === 'endtag') {
|
|
1018
|
+
const endTagUUID = node.uuid;
|
|
1019
|
+
const openTag = newNodeList.findLast((n) => n.type === 'starttag' && !n.isGhost ? n.pairNode?.uuid === endTagUUID : false);
|
|
1020
|
+
if (!openTag) {
|
|
1021
|
+
node = this.#convertIntoInvalidNode(node);
|
|
1022
|
+
}
|
|
811
1023
|
}
|
|
1024
|
+
newNodeList.push(node);
|
|
812
1025
|
}
|
|
813
|
-
newNodeList
|
|
1026
|
+
return newNodeList;
|
|
814
1027
|
}
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
this.appendChild(startTag.parentNode, endTag);
|
|
820
|
-
}, _Parser_parseEndTag = function _Parser_parseEndTag(token, namelessFragment) {
|
|
821
|
-
const parsed = __classPrivateFieldGet(this, _Parser_instances, "m", _Parser_parseTag).call(this, token, true, false, namelessFragment);
|
|
822
|
-
if (!parsed.token || parsed.token.type !== 'endtag') {
|
|
823
|
-
throw new ParserError("Expected end tag but it's not end tag", token);
|
|
1028
|
+
#pairing(startTag, endTag) {
|
|
1029
|
+
Object.assign(startTag, { pairNode: endTag });
|
|
1030
|
+
Object.assign(endTag, { pairNode: startTag });
|
|
1031
|
+
this.appendChild(startTag.parentNode, endTag);
|
|
824
1032
|
}
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
1033
|
+
#parseEndTag(token, namelessFragment) {
|
|
1034
|
+
const parsed = this.#parseTag(token, true, false, namelessFragment);
|
|
1035
|
+
if (!parsed.token || parsed.token.type !== 'endtag') {
|
|
1036
|
+
throw new ParserError("Expected end tag but it's not end tag", token);
|
|
1037
|
+
}
|
|
1038
|
+
return parsed.token;
|
|
830
1039
|
}
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
this.appendChild(token.parentNode, startTag);
|
|
836
|
-
return startTag;
|
|
837
|
-
}, _Parser_parseTag = function _Parser_parseTag(token, praseAttr, failSafe, namelessFragment) {
|
|
838
|
-
const raw = token.raw;
|
|
839
|
-
const depth = token.depth;
|
|
840
|
-
const initialOffset = token.startOffset;
|
|
841
|
-
const initialLine = token.startLine;
|
|
842
|
-
const initialCol = token.startCol;
|
|
843
|
-
let offset = initialOffset;
|
|
844
|
-
let line = initialLine;
|
|
845
|
-
let col = initialCol;
|
|
846
|
-
let tagStartOffset = offset;
|
|
847
|
-
let tagStartLine = line;
|
|
848
|
-
let tagStartCol = col;
|
|
849
|
-
let state = TagState.BeforeOpenTag;
|
|
850
|
-
let beforeOpenTagChars = '';
|
|
851
|
-
let tagName = '';
|
|
852
|
-
let afterAttrsSpaceChars = '';
|
|
853
|
-
let selfClosingSolidusChar = '';
|
|
854
|
-
let isOpenTag = true;
|
|
855
|
-
const attrs = [];
|
|
856
|
-
const chars = [...raw];
|
|
857
|
-
while (chars.length > 0) {
|
|
858
|
-
if (state === TagState.AfterOpenTag) {
|
|
859
|
-
break;
|
|
1040
|
+
#parseStartTag(token, overwriteProps, namelessFragment) {
|
|
1041
|
+
const parsed = this.#parseTag(token, true, false, namelessFragment);
|
|
1042
|
+
if (!parsed.token || parsed.token.type !== 'starttag') {
|
|
1043
|
+
throw new ParserError("Expected start tag but it's not start tag", token);
|
|
860
1044
|
}
|
|
861
|
-
const
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
break;
|
|
890
|
-
}
|
|
891
|
-
if (namelessFragment && char === '>') {
|
|
892
|
-
state = TagState.AfterOpenTag;
|
|
893
|
-
break;
|
|
894
|
-
}
|
|
895
|
-
chars.unshift(char);
|
|
896
|
-
state = TagState.AfterOpenTag;
|
|
1045
|
+
const startTag = {
|
|
1046
|
+
...parsed.token,
|
|
1047
|
+
...overwriteProps,
|
|
1048
|
+
};
|
|
1049
|
+
this.appendChild(token.parentNode, startTag);
|
|
1050
|
+
return startTag;
|
|
1051
|
+
}
|
|
1052
|
+
#parseTag(token, praseAttr, failSafe, namelessFragment) {
|
|
1053
|
+
const raw = token.raw;
|
|
1054
|
+
const depth = token.depth;
|
|
1055
|
+
const initialOffset = token.offset;
|
|
1056
|
+
const initialLine = token.line;
|
|
1057
|
+
const initialCol = token.col;
|
|
1058
|
+
let offset = initialOffset;
|
|
1059
|
+
let line = initialLine;
|
|
1060
|
+
let col = initialCol;
|
|
1061
|
+
let tagStartOffset = offset;
|
|
1062
|
+
let tagStartLine = line;
|
|
1063
|
+
let tagStartCol = col;
|
|
1064
|
+
let state = TagState.BeforeOpenTag;
|
|
1065
|
+
let beforeOpenTagChars = '';
|
|
1066
|
+
let tagName = '';
|
|
1067
|
+
let selfClosingSolidusChar = '';
|
|
1068
|
+
let isOpenTag = true;
|
|
1069
|
+
const attrs = [];
|
|
1070
|
+
const chars = [...raw];
|
|
1071
|
+
while (chars.length > 0) {
|
|
1072
|
+
if (state === TagState.AfterOpenTag) {
|
|
897
1073
|
break;
|
|
898
1074
|
}
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
if (
|
|
903
|
-
|
|
904
|
-
offset
|
|
1075
|
+
const char = chars.shift();
|
|
1076
|
+
stateSwitch: switch (state) {
|
|
1077
|
+
case TagState.BeforeOpenTag: {
|
|
1078
|
+
if (char === '<') {
|
|
1079
|
+
const beforeOpenTag = this.createToken(beforeOpenTagChars, offset, line, col);
|
|
1080
|
+
({ offset, line, col } = this.#getEndLocation(beforeOpenTag));
|
|
1081
|
+
tagStartOffset = offset;
|
|
1082
|
+
tagStartLine = line;
|
|
1083
|
+
tagStartCol = col;
|
|
1084
|
+
// Add `<` length
|
|
905
1085
|
col += 1;
|
|
1086
|
+
offset += 1;
|
|
1087
|
+
state = TagState.FirstCharOfTagName;
|
|
1088
|
+
break;
|
|
906
1089
|
}
|
|
907
|
-
|
|
908
|
-
col += tagName.length;
|
|
909
|
-
state = TagState.Attrs;
|
|
1090
|
+
beforeOpenTagChars += char;
|
|
910
1091
|
break;
|
|
911
1092
|
}
|
|
912
|
-
|
|
1093
|
+
case TagState.FirstCharOfTagName: {
|
|
1094
|
+
if (/[a-z]/i.test(char)) {
|
|
1095
|
+
tagName += char;
|
|
1096
|
+
state = TagState.TagName;
|
|
1097
|
+
break;
|
|
1098
|
+
}
|
|
1099
|
+
if (char === '/') {
|
|
1100
|
+
isOpenTag = false;
|
|
1101
|
+
break;
|
|
1102
|
+
}
|
|
1103
|
+
if (namelessFragment && char === '>') {
|
|
1104
|
+
state = TagState.AfterOpenTag;
|
|
1105
|
+
break;
|
|
1106
|
+
}
|
|
913
1107
|
chars.unshift(char);
|
|
914
|
-
state = TagState.AfterAttrs;
|
|
915
|
-
break;
|
|
916
|
-
}
|
|
917
|
-
if (char === '>') {
|
|
918
1108
|
state = TagState.AfterOpenTag;
|
|
919
1109
|
break;
|
|
920
1110
|
}
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
1111
|
+
case TagState.TagName: {
|
|
1112
|
+
if (this.#spaceChars.includes(char)) {
|
|
1113
|
+
chars.unshift(char);
|
|
1114
|
+
if (!isOpenTag) {
|
|
1115
|
+
// Add `/` of `</`(close tag) length
|
|
1116
|
+
offset += 1;
|
|
1117
|
+
col += 1;
|
|
1118
|
+
}
|
|
1119
|
+
offset += tagName.length;
|
|
1120
|
+
col += tagName.length;
|
|
1121
|
+
state = TagState.Attrs;
|
|
1122
|
+
break;
|
|
1123
|
+
}
|
|
1124
|
+
if (char === '/') {
|
|
1125
|
+
chars.unshift(char);
|
|
934
1126
|
state = TagState.AfterAttrs;
|
|
935
|
-
break
|
|
1127
|
+
break;
|
|
936
1128
|
}
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
startLine: line,
|
|
941
|
-
startCol: col,
|
|
942
|
-
});
|
|
943
|
-
line = attr.endLine;
|
|
944
|
-
col = attr.endCol;
|
|
945
|
-
offset = attr.endOffset;
|
|
946
|
-
if (leftover === attr.__rightText) {
|
|
947
|
-
throw new SyntaxError(`Invalid attribute syntax: ${leftover}`);
|
|
1129
|
+
if (char === '>') {
|
|
1130
|
+
state = TagState.AfterOpenTag;
|
|
1131
|
+
break;
|
|
948
1132
|
}
|
|
949
|
-
|
|
950
|
-
delete attr.__rightText;
|
|
951
|
-
attrs.push(attr);
|
|
952
|
-
}
|
|
953
|
-
break;
|
|
954
|
-
}
|
|
955
|
-
case TagState.AfterAttrs: {
|
|
956
|
-
if (char === '>') {
|
|
957
|
-
state = TagState.AfterOpenTag;
|
|
958
|
-
break;
|
|
959
|
-
}
|
|
960
|
-
if (__classPrivateFieldGet(this, _Parser_spaceChars, "f").includes(char)) {
|
|
961
|
-
afterAttrsSpaceChars += char;
|
|
1133
|
+
tagName += char;
|
|
962
1134
|
break;
|
|
963
1135
|
}
|
|
964
|
-
|
|
965
|
-
|
|
1136
|
+
case TagState.Attrs: {
|
|
1137
|
+
if (!praseAttr) {
|
|
1138
|
+
state = TagState.AfterAttrs;
|
|
1139
|
+
break stateSwitch;
|
|
1140
|
+
}
|
|
1141
|
+
let leftover = char + chars.join('');
|
|
1142
|
+
while (leftover.trim()) {
|
|
1143
|
+
if (leftover.trim().startsWith('/') || leftover.trim().startsWith('>')) {
|
|
1144
|
+
chars.length = 0;
|
|
1145
|
+
chars.push(...leftover);
|
|
1146
|
+
state = TagState.AfterAttrs;
|
|
1147
|
+
break stateSwitch;
|
|
1148
|
+
}
|
|
1149
|
+
const attr = this.visitAttr({
|
|
1150
|
+
raw: leftover,
|
|
1151
|
+
offset,
|
|
1152
|
+
line,
|
|
1153
|
+
col,
|
|
1154
|
+
});
|
|
1155
|
+
({ offset, line, col } = this.#getEndLocation(attr));
|
|
1156
|
+
if (leftover === attr.__rightText) {
|
|
1157
|
+
throw new SyntaxError(`Invalid attribute syntax: ${leftover}`);
|
|
1158
|
+
}
|
|
1159
|
+
leftover = attr.__rightText == null ? '' : `${attr.__rightText}`;
|
|
1160
|
+
delete attr.__rightText;
|
|
1161
|
+
attrs.push(attr);
|
|
1162
|
+
}
|
|
966
1163
|
break;
|
|
967
1164
|
}
|
|
968
|
-
|
|
969
|
-
|
|
1165
|
+
case TagState.AfterAttrs: {
|
|
1166
|
+
if (char === '>') {
|
|
1167
|
+
state = TagState.AfterOpenTag;
|
|
1168
|
+
break;
|
|
1169
|
+
}
|
|
1170
|
+
if (this.#spaceChars.includes(char)) {
|
|
1171
|
+
break;
|
|
1172
|
+
}
|
|
1173
|
+
if (char === '/') {
|
|
1174
|
+
selfClosingSolidusChar = char;
|
|
1175
|
+
break;
|
|
1176
|
+
}
|
|
1177
|
+
if (!praseAttr) {
|
|
1178
|
+
break;
|
|
1179
|
+
}
|
|
1180
|
+
throw new SyntaxError(`Invalid tag syntax: "${raw}"`);
|
|
970
1181
|
}
|
|
971
|
-
throw new SyntaxError(`Invalid tag syntax: "${raw}"`);
|
|
972
1182
|
}
|
|
973
1183
|
}
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
1184
|
+
const leftover = chars.join('');
|
|
1185
|
+
if (!failSafe && !leftover && state === TagState.TagName) {
|
|
1186
|
+
throw new SyntaxError(`Invalid tag syntax: "${raw}"`);
|
|
1187
|
+
}
|
|
1188
|
+
if (!failSafe && !namelessFragment && tagName === '') {
|
|
1189
|
+
throw new SyntaxError(`No tag name: "${raw}"`);
|
|
1190
|
+
}
|
|
1191
|
+
const rawCodeFragment = raw.slice(beforeOpenTagChars.length, raw.length - leftover.length);
|
|
1192
|
+
if (!rawCodeFragment) {
|
|
1193
|
+
return {
|
|
1194
|
+
__left: beforeOpenTagChars,
|
|
1195
|
+
__right: leftover,
|
|
1196
|
+
};
|
|
1197
|
+
}
|
|
1198
|
+
const tagToken = this.createToken(rawCodeFragment, tagStartOffset, tagStartLine, tagStartCol);
|
|
1199
|
+
const isFragment = tagName === '';
|
|
1200
|
+
const commons = {
|
|
1201
|
+
depth,
|
|
1202
|
+
nodeName: isFragment ? '#jsx-fragment' : tagName,
|
|
1203
|
+
parentNode: null,
|
|
1204
|
+
};
|
|
1205
|
+
const tag = isOpenTag
|
|
1206
|
+
? {
|
|
1207
|
+
...tagToken,
|
|
1208
|
+
...commons,
|
|
1209
|
+
type: 'starttag',
|
|
1210
|
+
elementType: this.detectElementType(tagName),
|
|
1211
|
+
namespace: 'namespace' in token
|
|
1212
|
+
? token.namespace
|
|
1213
|
+
: getNamespace(tagName, token.parentNode),
|
|
1214
|
+
attributes: attrs,
|
|
1215
|
+
childNodes: [],
|
|
1216
|
+
pairNode: null,
|
|
1217
|
+
tagOpenChar: '<',
|
|
1218
|
+
tagCloseChar: selfClosingSolidusChar + '>',
|
|
1219
|
+
blockBehavior: null,
|
|
1220
|
+
isGhost: false,
|
|
1221
|
+
isFragment,
|
|
1222
|
+
}
|
|
1223
|
+
: {
|
|
1224
|
+
...tagToken,
|
|
1225
|
+
...commons,
|
|
1226
|
+
type: 'endtag',
|
|
1227
|
+
pairNode: {},
|
|
1228
|
+
tagOpenChar: '</',
|
|
1229
|
+
tagCloseChar: '>',
|
|
1230
|
+
};
|
|
992
1231
|
return {
|
|
1232
|
+
token: tag,
|
|
993
1233
|
__left: beforeOpenTagChars,
|
|
994
1234
|
__right: leftover,
|
|
995
1235
|
};
|
|
996
1236
|
}
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
depth,
|
|
1001
|
-
nodeName: isFragment ? '#jsx-fragment' : tagName,
|
|
1002
|
-
parentNode: null,
|
|
1003
|
-
};
|
|
1004
|
-
const tag = isOpenTag
|
|
1005
|
-
? {
|
|
1006
|
-
...tagToken,
|
|
1007
|
-
...commons,
|
|
1008
|
-
type: 'starttag',
|
|
1009
|
-
elementType: this.detectElementType(tagName),
|
|
1010
|
-
namespace: '',
|
|
1011
|
-
attributes: attrs,
|
|
1012
|
-
childNodes: [],
|
|
1013
|
-
pairNode: null,
|
|
1014
|
-
tagOpenChar: '<',
|
|
1015
|
-
tagCloseChar: selfClosingSolidusChar + '>',
|
|
1016
|
-
selfClosingSolidus,
|
|
1017
|
-
isGhost: false,
|
|
1018
|
-
isFragment,
|
|
1237
|
+
#removeChild(parentNode, ...childNodes) {
|
|
1238
|
+
if (!parentNode || childNodes.length === 0) {
|
|
1239
|
+
return;
|
|
1019
1240
|
}
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
...commons,
|
|
1023
|
-
type: 'endtag',
|
|
1024
|
-
pairNode: {},
|
|
1025
|
-
tagOpenChar: '</',
|
|
1026
|
-
tagCloseChar: '>',
|
|
1027
|
-
};
|
|
1028
|
-
return {
|
|
1029
|
-
token: tag,
|
|
1030
|
-
__left: beforeOpenTagChars,
|
|
1031
|
-
__right: leftover,
|
|
1032
|
-
};
|
|
1033
|
-
}, _Parser_removeChild = function _Parser_removeChild(parentNode, ...childNodes) {
|
|
1034
|
-
if (!parentNode || childNodes.length === 0) {
|
|
1035
|
-
return;
|
|
1241
|
+
const newChildNodes = parentNode.childNodes.filter(n => !childNodes.includes(n));
|
|
1242
|
+
Object.assign(parentNode, { childNodes: newChildNodes });
|
|
1036
1243
|
}
|
|
1037
|
-
const newChildNodes = parentNode.childNodes.filter(n => !childNodes.includes(n));
|
|
1038
|
-
Object.assign(parentNode, { childNodes: newChildNodes });
|
|
1039
|
-
}, _Parser_removeDeprecatedNode = function _Parser_removeDeprecatedNode(nodeOrders) {
|
|
1040
1244
|
/**
|
|
1041
|
-
*
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
? // TODO: Use sort instead of toSorted until we end support for Node 18
|
|
1045
|
-
[...nodeOrders].sort(sortNodes)
|
|
1046
|
-
: nodeOrders.toSorted(sortNodes);
|
|
1047
|
-
/**
|
|
1048
|
-
* remove duplicated node
|
|
1245
|
+
*
|
|
1246
|
+
* @disruptive
|
|
1247
|
+
* @param nodeOrders [Disruptive change]
|
|
1049
1248
|
*/
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1249
|
+
#removeDeprecatedNode(nodeOrders) {
|
|
1250
|
+
/**
|
|
1251
|
+
* sorting
|
|
1252
|
+
*/
|
|
1253
|
+
const sorted = nodeOrders.toSorted(sortNodes);
|
|
1254
|
+
/**
|
|
1255
|
+
* remove duplicated node
|
|
1256
|
+
*/
|
|
1257
|
+
const stack = {};
|
|
1258
|
+
const removeIndexes = [];
|
|
1259
|
+
for (const [i, node] of sorted.entries()) {
|
|
1260
|
+
const id = `${node.offset}::${node.nodeName}`;
|
|
1261
|
+
if (stack[id] != null) {
|
|
1262
|
+
removeIndexes.push(i);
|
|
1263
|
+
}
|
|
1264
|
+
stack[id] = i;
|
|
1056
1265
|
}
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
sorted.splice(r, 1);
|
|
1266
|
+
let r = sorted.length;
|
|
1267
|
+
while (r-- > 0) {
|
|
1268
|
+
if (removeIndexes.includes(r)) {
|
|
1269
|
+
sorted.splice(r, 1);
|
|
1270
|
+
}
|
|
1063
1271
|
}
|
|
1272
|
+
return sorted;
|
|
1064
1273
|
}
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1274
|
+
#removeOffsetSpaces(nodeList, options) {
|
|
1275
|
+
const offsetOffset = options?.offsetOffset ?? 0;
|
|
1276
|
+
const offsetLine = options?.offsetLine ?? 1;
|
|
1277
|
+
const offsetColumn = options?.offsetColumn ?? 1;
|
|
1278
|
+
if (offsetOffset === 0) {
|
|
1279
|
+
return nodeList;
|
|
1280
|
+
}
|
|
1281
|
+
const firstNode = nodeList.at(0);
|
|
1282
|
+
if (!firstNode || firstNode.type !== 'text') {
|
|
1283
|
+
return nodeList;
|
|
1284
|
+
}
|
|
1285
|
+
const raw = firstNode.raw.slice(offsetOffset);
|
|
1286
|
+
if (!raw) {
|
|
1287
|
+
return nodeList.toSpliced(0, 1);
|
|
1288
|
+
}
|
|
1289
|
+
this.updateRaw(firstNode, raw);
|
|
1290
|
+
this.updateLocation(firstNode, {
|
|
1291
|
+
offset: offsetOffset,
|
|
1292
|
+
line: offsetLine,
|
|
1293
|
+
col: offsetColumn,
|
|
1294
|
+
});
|
|
1071
1295
|
return nodeList;
|
|
1072
1296
|
}
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1297
|
+
#reset() {
|
|
1298
|
+
// Reset state
|
|
1299
|
+
this.state = structuredClone(this.#defaultState);
|
|
1300
|
+
this.#defaultDepth = 0;
|
|
1076
1301
|
}
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
const newNodeList = [...nodeList];
|
|
1081
|
-
// TODO: Use splice instead of toSpliced until we end support for Node 18
|
|
1082
|
-
newNodeList.splice(0, 1);
|
|
1083
|
-
return newNodeList;
|
|
1084
|
-
}
|
|
1085
|
-
return nodeList.toSpliced(0, 1);
|
|
1302
|
+
#setRawCode(rawCode, originalRawCode) {
|
|
1303
|
+
this.#rawCode = rawCode;
|
|
1304
|
+
this.#originalRawCode = originalRawCode ?? this.#originalRawCode;
|
|
1086
1305
|
}
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
node.startOffset !== node.endOffset) {
|
|
1108
|
-
const prevNodeEndOffset = prevNode.endOffset;
|
|
1109
|
-
const nodeStartOffset = node.startOffset;
|
|
1110
|
-
if (prevNodeEndOffset > nodeStartOffset) {
|
|
1111
|
-
const prevNodeRaw = prevNode.raw;
|
|
1112
|
-
const prevNodeTrimmedRaw = prevNodeRaw.slice(0, nodeStartOffset - prevNode.startOffset);
|
|
1113
|
-
this.updateRaw(prevNode, prevNodeTrimmedRaw);
|
|
1306
|
+
/**
|
|
1307
|
+
* Trim overlapping sections of text nodes for proper node separation
|
|
1308
|
+
*
|
|
1309
|
+
* @param nodeList
|
|
1310
|
+
* @returns
|
|
1311
|
+
*/
|
|
1312
|
+
#trimText(nodeList) {
|
|
1313
|
+
const newNodeList = [];
|
|
1314
|
+
let prevNode = null;
|
|
1315
|
+
for (const node of nodeList) {
|
|
1316
|
+
if (prevNode?.type === 'text' &&
|
|
1317
|
+
// Empty node
|
|
1318
|
+
node.raw.length > 0) {
|
|
1319
|
+
const prevNodeEndOffset = prevNode.offset + prevNode.raw.length;
|
|
1320
|
+
const nodeStartOffset = node.offset;
|
|
1321
|
+
if (prevNodeEndOffset > nodeStartOffset) {
|
|
1322
|
+
const prevNodeRaw = prevNode.raw;
|
|
1323
|
+
const prevNodeTrimmedRaw = prevNodeRaw.slice(0, nodeStartOffset - prevNode.offset);
|
|
1324
|
+
this.updateRaw(prevNode, prevNodeTrimmedRaw);
|
|
1325
|
+
}
|
|
1114
1326
|
}
|
|
1327
|
+
newNodeList.push(node);
|
|
1328
|
+
prevNode = node;
|
|
1115
1329
|
}
|
|
1116
|
-
newNodeList
|
|
1117
|
-
prevNode = node;
|
|
1330
|
+
return newNodeList;
|
|
1118
1331
|
}
|
|
1119
|
-
|
|
1120
|
-
};
|
|
1332
|
+
}
|