@tkeron/html-parser 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bun.lock +6 -9
- package/package.json +3 -3
- package/src/tokenizer.ts +5 -49
- package/tests/custom-elements.test.ts +745 -0
package/bun.lock
CHANGED
|
@@ -1,28 +1,25 @@
|
|
|
1
1
|
{
|
|
2
2
|
"lockfileVersion": 1,
|
|
3
|
+
"configVersion": 0,
|
|
3
4
|
"workspaces": {
|
|
4
5
|
"": {
|
|
5
6
|
"name": "html-parser",
|
|
6
7
|
"devDependencies": {
|
|
7
|
-
"@types/bun": "
|
|
8
|
+
"@types/bun": "^1.3.4",
|
|
8
9
|
},
|
|
9
10
|
"peerDependencies": {
|
|
10
|
-
"typescript": "^5.
|
|
11
|
+
"typescript": "^5.9.3",
|
|
11
12
|
},
|
|
12
13
|
},
|
|
13
14
|
},
|
|
14
15
|
"packages": {
|
|
15
|
-
"@types/bun": ["@types/bun@1.
|
|
16
|
+
"@types/bun": ["@types/bun@1.3.4", "", { "dependencies": { "bun-types": "1.3.4" } }, "sha512-EEPTKXHP+zKGPkhRLv+HI0UEX8/o+65hqARxLy8Ov5rIxMBPNTjeZww00CIihrIQGEQBYg+0roO5qOnS/7boGA=="],
|
|
16
17
|
|
|
17
18
|
"@types/node": ["@types/node@24.0.4", "", { "dependencies": { "undici-types": "~7.8.0" } }, "sha512-ulyqAkrhnuNq9pB76DRBTkcS6YsmDALy6Ua63V8OhrOBgbcYt6IOdzpw5P1+dyRIyMerzLkeYWBeOXPpA9GMAA=="],
|
|
18
19
|
|
|
19
|
-
"
|
|
20
|
+
"bun-types": ["bun-types@1.3.4", "", { "dependencies": { "@types/node": "*" } }, "sha512-5ua817+BZPZOlNaRgGBpZJOSAQ9RQ17pkwPD0yR7CfJg+r8DgIILByFifDTa+IPDDxzf5VNhtNlcKqFzDgJvlQ=="],
|
|
20
21
|
|
|
21
|
-
"
|
|
22
|
-
|
|
23
|
-
"csstype": ["csstype@3.1.3", "", {}, "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw=="],
|
|
24
|
-
|
|
25
|
-
"typescript": ["typescript@5.8.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ=="],
|
|
22
|
+
"typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],
|
|
26
23
|
|
|
27
24
|
"undici-types": ["undici-types@7.8.0", "", {}, "sha512-9UJ2xGDvQ43tYyVMpuHlsgApydB8ZKfVYTsLDhXkFL/6gfkp+U8xTGdh8pMJv1SpZna0zxG1DwsKZsreLbXBxw=="],
|
|
28
25
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tkeron/html-parser",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2",
|
|
4
4
|
"description": "A fast and lightweight HTML parser for Bun",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"module": "index.ts",
|
|
@@ -8,10 +8,10 @@
|
|
|
8
8
|
"author": "tkeron",
|
|
9
9
|
"license": "MIT",
|
|
10
10
|
"devDependencies": {
|
|
11
|
-
"@types/bun": "
|
|
11
|
+
"@types/bun": "^1.3.4"
|
|
12
12
|
},
|
|
13
13
|
"peerDependencies": {
|
|
14
|
-
"typescript": "^5.
|
|
14
|
+
"typescript": "^5.9.3"
|
|
15
15
|
},
|
|
16
16
|
"keywords": [
|
|
17
17
|
"cli",
|
package/src/tokenizer.ts
CHANGED
|
@@ -1,8 +1,3 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* HTML Tokenizer using Bun's HTMLRewriter for efficient HTML parsing
|
|
3
|
-
* This tokenizer provides a stream-based approach to HTML parsing
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
1
|
export enum TokenType {
|
|
7
2
|
TAG_OPEN = 'TAG_OPEN',
|
|
8
3
|
TAG_CLOSE = 'TAG_CLOSE',
|
|
@@ -29,7 +24,6 @@ export interface Token {
|
|
|
29
24
|
isClosing?: boolean;
|
|
30
25
|
}
|
|
31
26
|
|
|
32
|
-
// HTML entities mapping
|
|
33
27
|
const HTML_ENTITIES: Record<string, string> = {
|
|
34
28
|
'&': '&',
|
|
35
29
|
'<': '<',
|
|
@@ -54,10 +48,8 @@ const HTML_ENTITIES: Record<string, string> = {
|
|
|
54
48
|
* Decode HTML entities in a string and handle null characters
|
|
55
49
|
*/
|
|
56
50
|
function decodeEntities(text: string): string {
|
|
57
|
-
// First, replace null characters with the Unicode replacement character
|
|
58
51
|
let result = text.replace(/\u0000/g, '\uFFFD');
|
|
59
52
|
|
|
60
|
-
// Then decode HTML entities
|
|
61
53
|
return result.replace(/&(?:#x([0-9a-fA-F]+);?|#([0-9]+);?|([a-zA-Z][a-zA-Z0-9]*);?)/g, (match, hex, decimal, named) => {
|
|
62
54
|
if (hex) {
|
|
63
55
|
return String.fromCharCode(parseInt(hex, 16));
|
|
@@ -66,12 +58,10 @@ function decodeEntities(text: string): string {
|
|
|
66
58
|
return String.fromCharCode(parseInt(decimal, 10));
|
|
67
59
|
}
|
|
68
60
|
if (named) {
|
|
69
|
-
// First try with semicolon
|
|
70
61
|
if (HTML_ENTITIES[`&${named};`]) {
|
|
71
62
|
return HTML_ENTITIES[`&${named};`];
|
|
72
63
|
}
|
|
73
64
|
|
|
74
|
-
// For entities without semicolon, try to find the longest valid entity prefix
|
|
75
65
|
if (!match.endsWith(';')) {
|
|
76
66
|
for (let i = named.length; i > 0; i--) {
|
|
77
67
|
const prefix = named.substring(0, i);
|
|
@@ -94,7 +84,6 @@ function decodeEntities(text: string): string {
|
|
|
94
84
|
function parseAttributes(attributeString: string): Record<string, string> {
|
|
95
85
|
const attributes: Record<string, string> = {};
|
|
96
86
|
|
|
97
|
-
// Regex to match attributes: name="value", name='value', name=value, or just name
|
|
98
87
|
const attrRegex = /([a-zA-Z][a-zA-Z0-9\-_:]*)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^\s>]+)))?/g;
|
|
99
88
|
let match;
|
|
100
89
|
|
|
@@ -129,42 +118,34 @@ export function tokenize(html: string): Token[] {
|
|
|
129
118
|
const tokens: Token[] = [];
|
|
130
119
|
let position = 0;
|
|
131
120
|
|
|
132
|
-
// Handle special cases first (DOCTYPE, comments, CDATA, processing instructions)
|
|
133
121
|
const specialCases = [
|
|
134
|
-
// DOCTYPE
|
|
135
122
|
{
|
|
136
123
|
pattern: /<!DOCTYPE\s+[^>]*>/gi,
|
|
137
124
|
type: TokenType.DOCTYPE,
|
|
138
125
|
getValue: (match: string) => {
|
|
139
|
-
// Extract just the doctype name (e.g., "html" from "<!DOCTYPE html>")
|
|
140
126
|
const doctypeMatch = match.match(/<!DOCTYPE\s+([^\s>]+)/i);
|
|
141
127
|
return doctypeMatch && doctypeMatch[1] ? doctypeMatch[1].toLowerCase() : match;
|
|
142
128
|
}
|
|
143
129
|
},
|
|
144
|
-
// Comments (including unclosed ones)
|
|
145
130
|
{
|
|
146
131
|
pattern: /<!--([\s\S]*?)(?:-->|$)/g,
|
|
147
132
|
type: TokenType.COMMENT,
|
|
148
133
|
getValue: (match: string) => match.slice(4, match.endsWith('-->') ? -3 : match.length)
|
|
149
134
|
},
|
|
150
|
-
// CDATA
|
|
151
135
|
{
|
|
152
136
|
pattern: /<!\[CDATA\[([\s\S]*?)\]\]>/g,
|
|
153
137
|
type: TokenType.CDATA,
|
|
154
138
|
getValue: (match: string) => match.slice(9, -3)
|
|
155
139
|
},
|
|
156
|
-
// Processing Instructions
|
|
157
140
|
{
|
|
158
141
|
pattern: /<\?([^?]*(?:\?(?!>)[^?]*)*)\?>/g,
|
|
159
142
|
type: TokenType.PROCESSING_INSTRUCTION,
|
|
160
|
-
getValue: (match: string) => match.slice(0, -2)
|
|
143
|
+
getValue: (match: string) => match.slice(0, -2)
|
|
161
144
|
}
|
|
162
145
|
];
|
|
163
146
|
|
|
164
|
-
// Track processed ranges to avoid double processing
|
|
165
147
|
const processedRanges: Array<[number, number]> = [];
|
|
166
148
|
|
|
167
|
-
// Process special cases first
|
|
168
149
|
for (const { pattern, type, getValue } of specialCases) {
|
|
169
150
|
const regex = new RegExp(pattern);
|
|
170
151
|
let match;
|
|
@@ -183,20 +164,16 @@ export function tokenize(html: string): Token[] {
|
|
|
183
164
|
}
|
|
184
165
|
}
|
|
185
166
|
|
|
186
|
-
// Sort processed ranges by start position
|
|
187
167
|
processedRanges.sort((a, b) => a[0] - b[0]);
|
|
188
168
|
|
|
189
|
-
// Process remaining HTML with manual parsing
|
|
190
169
|
let currentPos = 0;
|
|
191
170
|
|
|
192
171
|
while (currentPos < html.length) {
|
|
193
|
-
// Check if current position is in a processed range
|
|
194
172
|
const inProcessedRange = processedRanges.some(([start, end]) =>
|
|
195
173
|
currentPos >= start && currentPos < end
|
|
196
174
|
);
|
|
197
175
|
|
|
198
176
|
if (inProcessedRange) {
|
|
199
|
-
// Skip to end of processed range
|
|
200
177
|
const range = processedRanges.find(([start, end]) =>
|
|
201
178
|
currentPos >= start && currentPos < end
|
|
202
179
|
);
|
|
@@ -209,8 +186,7 @@ export function tokenize(html: string): Token[] {
|
|
|
209
186
|
const char = html[currentPos];
|
|
210
187
|
|
|
211
188
|
if (char === '<') {
|
|
212
|
-
|
|
213
|
-
const tagMatch = html.slice(currentPos).match(/^<\/?([a-zA-Z][a-zA-Z0-9]*)[^>]*>/);
|
|
189
|
+
const tagMatch = html.slice(currentPos).match(/^<\/?([a-zA-Z][^\s/>]*)([^>]*)>/);
|
|
214
190
|
|
|
215
191
|
if (tagMatch) {
|
|
216
192
|
const fullTag = tagMatch[0];
|
|
@@ -224,10 +200,9 @@ export function tokenize(html: string): Token[] {
|
|
|
224
200
|
const isClosing = fullTag.startsWith('</');
|
|
225
201
|
const isSelfClosing = fullTag.endsWith('/>');
|
|
226
202
|
|
|
227
|
-
// Parse attributes if it's an opening tag
|
|
228
203
|
let attributes: Record<string, string> = {};
|
|
229
204
|
if (!isClosing) {
|
|
230
|
-
const attrMatch = fullTag.match(/^<[a-zA-Z][
|
|
205
|
+
const attrMatch = fullTag.match(/^<[a-zA-Z][^\s/>]*\s+([^>]*?)\/?>$/);
|
|
231
206
|
if (attrMatch && attrMatch[1]) {
|
|
232
207
|
attributes = parseAttributes(attrMatch[1]);
|
|
233
208
|
}
|
|
@@ -245,17 +220,15 @@ export function tokenize(html: string): Token[] {
|
|
|
245
220
|
|
|
246
221
|
currentPos += fullTag.length;
|
|
247
222
|
} else {
|
|
248
|
-
// Not a valid tag, treat as text
|
|
249
223
|
const textStart = currentPos;
|
|
250
224
|
currentPos++;
|
|
251
225
|
|
|
252
|
-
// Find the end of text (next '<' or end of string)
|
|
253
226
|
while (currentPos < html.length && html[currentPos] !== '<') {
|
|
254
227
|
currentPos++;
|
|
255
228
|
}
|
|
256
229
|
|
|
257
230
|
const textContent = html.slice(textStart, currentPos);
|
|
258
|
-
if (textContent) {
|
|
231
|
+
if (textContent) {
|
|
259
232
|
tokens.push({
|
|
260
233
|
type: TokenType.TEXT,
|
|
261
234
|
value: decodeEntities(textContent),
|
|
@@ -264,16 +237,14 @@ export function tokenize(html: string): Token[] {
|
|
|
264
237
|
}
|
|
265
238
|
}
|
|
266
239
|
} else {
|
|
267
|
-
// Text content
|
|
268
240
|
const textStart = currentPos;
|
|
269
241
|
|
|
270
|
-
// Find the end of text (next '<' or end of string)
|
|
271
242
|
while (currentPos < html.length && html[currentPos] !== '<') {
|
|
272
243
|
currentPos++;
|
|
273
244
|
}
|
|
274
245
|
|
|
275
246
|
const textContent = html.slice(textStart, currentPos);
|
|
276
|
-
if (textContent) {
|
|
247
|
+
if (textContent) {
|
|
277
248
|
tokens.push({
|
|
278
249
|
type: TokenType.TEXT,
|
|
279
250
|
value: decodeEntities(textContent),
|
|
@@ -296,22 +267,15 @@ export function tokenize(html: string): Token[] {
|
|
|
296
267
|
return tokens;
|
|
297
268
|
}
|
|
298
269
|
|
|
299
|
-
/**
|
|
300
|
-
* Enhanced tokenizer that uses HTMLRewriter for better performance on large HTML
|
|
301
|
-
* This is more efficient for well-formed HTML documents
|
|
302
|
-
*/
|
|
303
270
|
export function tokenizeWithRewriter(html: string): Token[] {
|
|
304
271
|
const tokens: Token[] = [];
|
|
305
272
|
let textBuffer = '';
|
|
306
273
|
let position = 0;
|
|
307
274
|
|
|
308
|
-
// First pass: collect all tokens using HTMLRewriter
|
|
309
275
|
const rewriter = new HTMLRewriter();
|
|
310
276
|
|
|
311
|
-
// Handle all elements
|
|
312
277
|
rewriter.on('*', {
|
|
313
278
|
element(element) {
|
|
314
|
-
// Flush any accumulated text
|
|
315
279
|
if (textBuffer.trim()) {
|
|
316
280
|
tokens.push({
|
|
317
281
|
type: TokenType.TEXT,
|
|
@@ -335,9 +299,7 @@ export function tokenizeWithRewriter(html: string): Token[] {
|
|
|
335
299
|
isSelfClosing: element.selfClosing
|
|
336
300
|
});
|
|
337
301
|
|
|
338
|
-
// Handle self-closing tags
|
|
339
302
|
if (!element.selfClosing) {
|
|
340
|
-
// We'll add the closing tag in the end handler
|
|
341
303
|
element.onEndTag((endTag) => {
|
|
342
304
|
tokens.push({
|
|
343
305
|
type: TokenType.TAG_CLOSE,
|
|
@@ -396,18 +358,12 @@ export function tokenizeWithRewriter(html: string): Token[] {
|
|
|
396
358
|
return tokens;
|
|
397
359
|
}
|
|
398
360
|
|
|
399
|
-
/**
|
|
400
|
-
* Smart tokenizer that chooses the best method based on HTML content
|
|
401
|
-
*/
|
|
402
361
|
export function smartTokenize(html: string): Token[] {
|
|
403
|
-
// Use HTMLRewriter for well-formed HTML, manual parsing for edge cases
|
|
404
362
|
const hasSpecialContent = /<!DOCTYPE|<!--|\[CDATA\[|<\?/.test(html);
|
|
405
363
|
|
|
406
364
|
if (hasSpecialContent || html.length < 1000) {
|
|
407
|
-
// Use manual parsing for small HTML or HTML with special content
|
|
408
365
|
return tokenize(html);
|
|
409
366
|
} else {
|
|
410
|
-
// Use HTMLRewriter for large, well-formed HTML
|
|
411
367
|
return tokenizeWithRewriter(html);
|
|
412
368
|
}
|
|
413
369
|
}
|
|
@@ -0,0 +1,745 @@
|
|
|
1
|
+
import { expect, test, describe } from 'bun:test';
|
|
2
|
+
import { tokenize } from '../src/tokenizer';
|
|
3
|
+
import { parse, ASTNodeType, type ASTNode } from '../src/parser';
|
|
4
|
+
|
|
5
|
+
describe('Custom Elements Support', () => {
|
|
6
|
+
|
|
7
|
+
describe('Basic Custom Elements', () => {
|
|
8
|
+
test('should parse simple custom element with single hyphen', () => {
|
|
9
|
+
const tokens = tokenize('<my-component></my-component>');
|
|
10
|
+
const ast = parse(tokens);
|
|
11
|
+
|
|
12
|
+
expect(ast.type).toBe(ASTNodeType.DOCUMENT);
|
|
13
|
+
expect(ast.children).toHaveLength(1);
|
|
14
|
+
|
|
15
|
+
const element = ast.children![0]!;
|
|
16
|
+
expect(element.type).toBe(ASTNodeType.ELEMENT);
|
|
17
|
+
expect(element.tagName).toBe('my-component');
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
test('should parse custom element with numbers', () => {
|
|
21
|
+
const tokens = tokenize('<my-component-123></my-component-123>');
|
|
22
|
+
const ast = parse(tokens);
|
|
23
|
+
|
|
24
|
+
const element = ast.children![0]!;
|
|
25
|
+
expect(element.type).toBe(ASTNodeType.ELEMENT);
|
|
26
|
+
expect(element.tagName).toBe('my-component-123');
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
test('should parse short custom element', () => {
|
|
30
|
+
const tokens = tokenize('<x-button></x-button>');
|
|
31
|
+
const ast = parse(tokens);
|
|
32
|
+
|
|
33
|
+
const element = ast.children![0]!;
|
|
34
|
+
expect(element.type).toBe(ASTNodeType.ELEMENT);
|
|
35
|
+
expect(element.tagName).toBe('x-button');
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
test('should parse custom element with multiple hyphens', () => {
|
|
39
|
+
const tokens = tokenize('<app-header-nav></app-header-nav>');
|
|
40
|
+
const ast = parse(tokens);
|
|
41
|
+
|
|
42
|
+
const element = ast.children![0]!;
|
|
43
|
+
expect(element.type).toBe(ASTNodeType.ELEMENT);
|
|
44
|
+
expect(element.tagName).toBe('app-header-nav');
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
test('should parse custom element with many hyphens', () => {
|
|
48
|
+
const tokens = tokenize('<my-custom-super-component></my-custom-super-component>');
|
|
49
|
+
const ast = parse(tokens);
|
|
50
|
+
|
|
51
|
+
const element = ast.children![0]!;
|
|
52
|
+
expect(element.type).toBe(ASTNodeType.ELEMENT);
|
|
53
|
+
expect(element.tagName).toBe('my-custom-super-component');
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
test('should parse custom element with dots', () => {
|
|
57
|
+
const tokens = tokenize('<my-comp.v2></my-comp.v2>');
|
|
58
|
+
const ast = parse(tokens);
|
|
59
|
+
|
|
60
|
+
const element = ast.children![0]!;
|
|
61
|
+
expect(element.type).toBe(ASTNodeType.ELEMENT);
|
|
62
|
+
expect(element.tagName).toBe('my-comp.v2');
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
test('should parse custom element with underscores', () => {
|
|
66
|
+
const tokens = tokenize('<my-comp_beta></my-comp_beta>');
|
|
67
|
+
const ast = parse(tokens);
|
|
68
|
+
|
|
69
|
+
const element = ast.children![0]!;
|
|
70
|
+
expect(element.type).toBe(ASTNodeType.ELEMENT);
|
|
71
|
+
expect(element.tagName).toBe('my-comp_beta');
|
|
72
|
+
});
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
describe('Custom Elements with Attributes', () => {
|
|
76
|
+
test('should parse custom element with class attribute', () => {
|
|
77
|
+
const tokens = tokenize('<my-comp class="test"></my-comp>');
|
|
78
|
+
const ast = parse(tokens);
|
|
79
|
+
|
|
80
|
+
const element = ast.children![0]!;
|
|
81
|
+
expect(element.type).toBe(ASTNodeType.ELEMENT);
|
|
82
|
+
expect(element.tagName).toBe('my-comp');
|
|
83
|
+
expect(element.attributes).toEqual({ class: 'test' });
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
test('should parse custom element with multiple attributes', () => {
|
|
87
|
+
const tokens = tokenize('<my-comp class="test" id="main" data-value="123"></my-comp>');
|
|
88
|
+
const ast = parse(tokens);
|
|
89
|
+
|
|
90
|
+
const element = ast.children![0]!;
|
|
91
|
+
expect(element.type).toBe(ASTNodeType.ELEMENT);
|
|
92
|
+
expect(element.tagName).toBe('my-comp');
|
|
93
|
+
expect(element.attributes).toEqual({
|
|
94
|
+
class: 'test',
|
|
95
|
+
id: 'main',
|
|
96
|
+
'data-value': '123'
|
|
97
|
+
});
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
test('should parse custom element with custom attributes', () => {
|
|
101
|
+
const tokens = tokenize('<user-card name="John" age="30"></user-card>');
|
|
102
|
+
const ast = parse(tokens);
|
|
103
|
+
|
|
104
|
+
const element = ast.children![0]!;
|
|
105
|
+
expect(element.type).toBe(ASTNodeType.ELEMENT);
|
|
106
|
+
expect(element.tagName).toBe('user-card');
|
|
107
|
+
expect(element.attributes).toEqual({
|
|
108
|
+
name: 'John',
|
|
109
|
+
age: '30'
|
|
110
|
+
});
|
|
111
|
+
});
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
describe('Self-Closing Custom Elements', () => {
|
|
115
|
+
test('should parse self-closing custom element with space', () => {
|
|
116
|
+
const tokens = tokenize('<self-closing />');
|
|
117
|
+
const ast = parse(tokens);
|
|
118
|
+
|
|
119
|
+
const element = ast.children![0]!;
|
|
120
|
+
expect(element.type).toBe(ASTNodeType.ELEMENT);
|
|
121
|
+
expect(element.tagName).toBe('self-closing');
|
|
122
|
+
expect(element.isSelfClosing).toBe(true);
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
test('should parse self-closing custom element without space', () => {
|
|
126
|
+
const tokens = tokenize('<my-comp/>');
|
|
127
|
+
const ast = parse(tokens);
|
|
128
|
+
|
|
129
|
+
const element = ast.children![0]!;
|
|
130
|
+
expect(element.type).toBe(ASTNodeType.ELEMENT);
|
|
131
|
+
expect(element.tagName).toBe('my-comp');
|
|
132
|
+
expect(element.isSelfClosing).toBe(true);
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
test('should parse self-closing custom element with attributes', () => {
|
|
136
|
+
const tokens = tokenize('<icon-button type="primary" size="lg" />');
|
|
137
|
+
const ast = parse(tokens);
|
|
138
|
+
|
|
139
|
+
const element = ast.children![0]!;
|
|
140
|
+
expect(element.type).toBe(ASTNodeType.ELEMENT);
|
|
141
|
+
expect(element.tagName).toBe('icon-button');
|
|
142
|
+
expect(element.isSelfClosing).toBe(true);
|
|
143
|
+
expect(element.attributes).toEqual({
|
|
144
|
+
type: 'primary',
|
|
145
|
+
size: 'lg'
|
|
146
|
+
});
|
|
147
|
+
});
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
describe('Nested Custom Elements', () => {
|
|
151
|
+
test('should parse nested custom elements', () => {
|
|
152
|
+
const tokens = tokenize('<outer-comp><inner-comp>text</inner-comp></outer-comp>');
|
|
153
|
+
const ast = parse(tokens);
|
|
154
|
+
|
|
155
|
+
const outer = ast.children![0]!;
|
|
156
|
+
expect(outer.type).toBe(ASTNodeType.ELEMENT);
|
|
157
|
+
expect(outer.tagName).toBe('outer-comp');
|
|
158
|
+
expect(outer.children).toHaveLength(1);
|
|
159
|
+
|
|
160
|
+
const inner = outer.children![0]!;
|
|
161
|
+
expect(inner.type).toBe(ASTNodeType.ELEMENT);
|
|
162
|
+
expect(inner.tagName).toBe('inner-comp');
|
|
163
|
+
expect(inner.children).toHaveLength(1);
|
|
164
|
+
|
|
165
|
+
const text = inner.children![0]!;
|
|
166
|
+
expect(text.type).toBe(ASTNodeType.TEXT);
|
|
167
|
+
expect(text.content).toBe('text');
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
test('should parse deeply nested custom elements', () => {
|
|
171
|
+
const tokens = tokenize('<level-1><level-2><level-3>content</level-3></level-2></level-1>');
|
|
172
|
+
const ast = parse(tokens);
|
|
173
|
+
|
|
174
|
+
const level1 = ast.children![0]!;
|
|
175
|
+
expect(level1.tagName).toBe('level-1');
|
|
176
|
+
|
|
177
|
+
const level2 = level1.children![0]!;
|
|
178
|
+
expect(level2.tagName).toBe('level-2');
|
|
179
|
+
|
|
180
|
+
const level3 = level2.children![0]!;
|
|
181
|
+
expect(level3.tagName).toBe('level-3');
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
test('should parse custom elements mixed with standard elements', () => {
|
|
185
|
+
const tokens = tokenize('<div><my-comp><span>text</span></my-comp></div>');
|
|
186
|
+
const ast = parse(tokens);
|
|
187
|
+
|
|
188
|
+
const div = ast.children![0]!;
|
|
189
|
+
expect(div.tagName).toBe('div');
|
|
190
|
+
|
|
191
|
+
const myComp = div.children![0]!;
|
|
192
|
+
expect(myComp.tagName).toBe('my-comp');
|
|
193
|
+
|
|
194
|
+
const span = myComp.children![0]!;
|
|
195
|
+
expect(span.tagName).toBe('span');
|
|
196
|
+
});
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
describe('Tag Name Normalization', () => {
|
|
200
|
+
test('should normalize custom element tagName to UPPERCASE', () => {
|
|
201
|
+
const tokens = tokenize('<my-comp></my-comp>');
|
|
202
|
+
const ast = parse(tokens);
|
|
203
|
+
|
|
204
|
+
const element = ast.children![0]!;
|
|
205
|
+
expect(element.tagName.toUpperCase()).toBe('MY-COMP');
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
test('should normalize nodeName to UPPERCASE', () => {
|
|
209
|
+
const tokens = tokenize('<my-comp></my-comp>');
|
|
210
|
+
const ast = parse(tokens);
|
|
211
|
+
|
|
212
|
+
const element = ast.children![0]!;
|
|
213
|
+
// nodeName should also be uppercase
|
|
214
|
+
if (element.nodeName) {
|
|
215
|
+
expect(element.nodeName.toUpperCase()).toBe('MY-COMP');
|
|
216
|
+
}
|
|
217
|
+
});
|
|
218
|
+
});
|
|
219
|
+
|
|
220
|
+
describe('Regression Tests - Standard Elements', () => {
|
|
221
|
+
test('should still parse standard div element', () => {
|
|
222
|
+
const tokens = tokenize('<div></div>');
|
|
223
|
+
const ast = parse(tokens);
|
|
224
|
+
|
|
225
|
+
const element = ast.children![0]!;
|
|
226
|
+
expect(element.type).toBe(ASTNodeType.ELEMENT);
|
|
227
|
+
expect(element.tagName).toBe('div');
|
|
228
|
+
});
|
|
229
|
+
|
|
230
|
+
test('should still parse standard header element', () => {
|
|
231
|
+
const tokens = tokenize('<header></header>');
|
|
232
|
+
const ast = parse(tokens);
|
|
233
|
+
|
|
234
|
+
const element = ast.children![0]!;
|
|
235
|
+
expect(element.type).toBe(ASTNodeType.ELEMENT);
|
|
236
|
+
expect(element.tagName).toBe('header');
|
|
237
|
+
});
|
|
238
|
+
|
|
239
|
+
test('should still parse standard section element', () => {
|
|
240
|
+
const tokens = tokenize('<section></section>');
|
|
241
|
+
const ast = parse(tokens);
|
|
242
|
+
|
|
243
|
+
const element = ast.children![0]!;
|
|
244
|
+
expect(element.type).toBe(ASTNodeType.ELEMENT);
|
|
245
|
+
expect(element.tagName).toBe('section');
|
|
246
|
+
});
|
|
247
|
+
|
|
248
|
+
test('should distinguish between header tag and header-comp custom element', () => {
|
|
249
|
+
const tokens = tokenize('<header></header><header-comp></header-comp>');
|
|
250
|
+
const ast = parse(tokens);
|
|
251
|
+
|
|
252
|
+
expect(ast.children).toHaveLength(2);
|
|
253
|
+
|
|
254
|
+
const header = ast.children![0]!;
|
|
255
|
+
expect(header.tagName).toBe('header');
|
|
256
|
+
|
|
257
|
+
const headerComp = ast.children![1]!;
|
|
258
|
+
expect(headerComp.tagName).toBe('header-comp');
|
|
259
|
+
});
|
|
260
|
+
});
|
|
261
|
+
|
|
262
|
+
describe('Custom Elements with Different Formats', () => {
|
|
263
|
+
test('should parse custom element: my-comp', () => {
|
|
264
|
+
const tokens = tokenize('<my-comp></my-comp>');
|
|
265
|
+
const ast = parse(tokens);
|
|
266
|
+
|
|
267
|
+
const element = ast.children![0]!;
|
|
268
|
+
expect(element.tagName).toBe('my-comp');
|
|
269
|
+
});
|
|
270
|
+
|
|
271
|
+
test('should parse custom element: comp-v2', () => {
|
|
272
|
+
const tokens = tokenize('<comp-v2></comp-v2>');
|
|
273
|
+
const ast = parse(tokens);
|
|
274
|
+
|
|
275
|
+
const element = ast.children![0]!;
|
|
276
|
+
expect(element.tagName).toBe('comp-v2');
|
|
277
|
+
});
|
|
278
|
+
|
|
279
|
+
test('should parse custom element: my-comp-123', () => {
|
|
280
|
+
const tokens = tokenize('<my-comp-123></my-comp-123>');
|
|
281
|
+
const ast = parse(tokens);
|
|
282
|
+
|
|
283
|
+
const element = ast.children![0]!;
|
|
284
|
+
expect(element.tagName).toBe('my-comp-123');
|
|
285
|
+
});
|
|
286
|
+
|
|
287
|
+
test('should parse custom element: x-foo', () => {
|
|
288
|
+
const tokens = tokenize('<x-foo></x-foo>');
|
|
289
|
+
const ast = parse(tokens);
|
|
290
|
+
|
|
291
|
+
const element = ast.children![0]!;
|
|
292
|
+
expect(element.tagName).toBe('x-foo');
|
|
293
|
+
});
|
|
294
|
+
|
|
295
|
+
test('should parse custom element with numbers: comp-123-test', () => {
|
|
296
|
+
const tokens = tokenize('<comp-123-test></comp-123-test>');
|
|
297
|
+
const ast = parse(tokens);
|
|
298
|
+
|
|
299
|
+
const element = ast.children![0]!;
|
|
300
|
+
expect(element.tagName).toBe('comp-123-test');
|
|
301
|
+
});
|
|
302
|
+
});
|
|
303
|
+
|
|
304
|
+
describe('Edge Cases', () => {
|
|
305
|
+
test('should parse custom element with whitespace before closing bracket', () => {
|
|
306
|
+
const tokens = tokenize('<my-comp ></my-comp>');
|
|
307
|
+
const ast = parse(tokens);
|
|
308
|
+
|
|
309
|
+
const element = ast.children![0]!;
|
|
310
|
+
expect(element.tagName).toBe('my-comp');
|
|
311
|
+
});
|
|
312
|
+
|
|
313
|
+
test('should parse multiple custom elements in sequence', () => {
|
|
314
|
+
const tokens = tokenize('<first-comp></first-comp><second-comp></second-comp><third-comp></third-comp>');
|
|
315
|
+
const ast = parse(tokens);
|
|
316
|
+
|
|
317
|
+
expect(ast.children).toHaveLength(3);
|
|
318
|
+
expect(ast.children![0]!.tagName).toBe('first-comp');
|
|
319
|
+
expect(ast.children![1]!.tagName).toBe('second-comp');
|
|
320
|
+
expect(ast.children![2]!.tagName).toBe('third-comp');
|
|
321
|
+
});
|
|
322
|
+
|
|
323
|
+
test('should parse custom element with text content', () => {
|
|
324
|
+
const tokens = tokenize('<user-name>John Doe</user-name>');
|
|
325
|
+
const ast = parse(tokens);
|
|
326
|
+
|
|
327
|
+
const element = ast.children![0]!;
|
|
328
|
+
expect(element.tagName).toBe('user-name');
|
|
329
|
+
expect(element.children).toHaveLength(1);
|
|
330
|
+
expect(element.children![0]!.type).toBe(ASTNodeType.TEXT);
|
|
331
|
+
expect(element.children![0]!.content).toBe('John Doe');
|
|
332
|
+
});
|
|
333
|
+
|
|
334
|
+
test('should parse custom element with child elements and text', () => {
|
|
335
|
+
const tokens = tokenize('<card-header><h1>Title</h1><sub-title>Subtitle</sub-title></card-header>');
|
|
336
|
+
const ast = parse(tokens);
|
|
337
|
+
|
|
338
|
+
const cardHeader = ast.children![0]!;
|
|
339
|
+
expect(cardHeader.tagName).toBe('card-header');
|
|
340
|
+
expect(cardHeader.children).toHaveLength(2);
|
|
341
|
+
|
|
342
|
+
expect(cardHeader.children![0]!.tagName).toBe('h1');
|
|
343
|
+
expect(cardHeader.children![1]!.tagName).toBe('sub-title');
|
|
344
|
+
});
|
|
345
|
+
|
|
346
|
+
test('should handle unclosed custom element gracefully', () => {
|
|
347
|
+
const tokens = tokenize('<my-comp>');
|
|
348
|
+
const ast = parse(tokens);
|
|
349
|
+
|
|
350
|
+
const element = ast.children![0]!;
|
|
351
|
+
expect(element.tagName).toBe('my-comp');
|
|
352
|
+
});
|
|
353
|
+
|
|
354
|
+
test('should parse custom element with trailing slash in opening tag', () => {
|
|
355
|
+
const tokens = tokenize('<my-comp/>');
|
|
356
|
+
const ast = parse(tokens);
|
|
357
|
+
|
|
358
|
+
const element = ast.children![0]!;
|
|
359
|
+
expect(element.tagName).toBe('my-comp');
|
|
360
|
+
expect(element.isSelfClosing).toBe(true);
|
|
361
|
+
});
|
|
362
|
+
});
|
|
363
|
+
|
|
364
|
+
describe('Complex Real-World Scenarios', () => {
|
|
365
|
+
test('should parse web component with shadow DOM structure', () => {
|
|
366
|
+
const html = `
|
|
367
|
+
<user-profile>
|
|
368
|
+
<profile-header>
|
|
369
|
+
<avatar-img src="user.jpg" />
|
|
370
|
+
<user-name>Jane Smith</user-name>
|
|
371
|
+
</profile-header>
|
|
372
|
+
<profile-content>
|
|
373
|
+
<bio-section>Biography text here</bio-section>
|
|
374
|
+
<stats-panel>
|
|
375
|
+
<stat-item label="Posts" value="123" />
|
|
376
|
+
<stat-item label="Followers" value="456" />
|
|
377
|
+
</stats-panel>
|
|
378
|
+
</profile-content>
|
|
379
|
+
</user-profile>
|
|
380
|
+
`;
|
|
381
|
+
|
|
382
|
+
const tokens = tokenize(html);
|
|
383
|
+
const ast = parse(tokens);
|
|
384
|
+
|
|
385
|
+
// Find first element (skip whitespace text nodes)
|
|
386
|
+
const userProfile = ast.children!.find(node => node.type === ASTNodeType.ELEMENT)!;
|
|
387
|
+
expect(userProfile.tagName).toBe('user-profile');
|
|
388
|
+
|
|
389
|
+
// Should have proper nesting
|
|
390
|
+
expect(userProfile.children).toBeDefined();
|
|
391
|
+
expect(userProfile.children!.length).toBeGreaterThan(0);
|
|
392
|
+
});
|
|
393
|
+
|
|
394
|
+
test('should parse framework-style component tree', () => {
|
|
395
|
+
const html = `
|
|
396
|
+
<app-root>
|
|
397
|
+
<app-header>
|
|
398
|
+
<nav-bar>
|
|
399
|
+
<nav-item href="/home">Home</nav-item>
|
|
400
|
+
<nav-item href="/about">About</nav-item>
|
|
401
|
+
</nav-bar>
|
|
402
|
+
</app-header>
|
|
403
|
+
<main-content>
|
|
404
|
+
<article-list>
|
|
405
|
+
<article-card title="Test" />
|
|
406
|
+
</article-list>
|
|
407
|
+
</main-content>
|
|
408
|
+
<app-footer />
|
|
409
|
+
</app-root>
|
|
410
|
+
`;
|
|
411
|
+
|
|
412
|
+
const tokens = tokenize(html);
|
|
413
|
+
const ast = parse(tokens);
|
|
414
|
+
|
|
415
|
+
// Find first element (skip whitespace text nodes)
|
|
416
|
+
const appRoot = ast.children!.find(node => node.type === ASTNodeType.ELEMENT)!;
|
|
417
|
+
expect(appRoot.tagName).toBe('app-root');
|
|
418
|
+
});
|
|
419
|
+
|
|
420
|
+
test('should parse custom elements with data attributes', () => {
|
|
421
|
+
const tokens = tokenize('<my-widget data-id="123" data-type="primary" data-config=\'{"key":"value"}\'></my-widget>');
|
|
422
|
+
const ast = parse(tokens);
|
|
423
|
+
|
|
424
|
+
const element = ast.children![0]!;
|
|
425
|
+
expect(element.tagName).toBe('my-widget');
|
|
426
|
+
expect(element.attributes).toHaveProperty('data-id', '123');
|
|
427
|
+
expect(element.attributes).toHaveProperty('data-type', 'primary');
|
|
428
|
+
expect(element.attributes).toHaveProperty('data-config');
|
|
429
|
+
});
|
|
430
|
+
});
|
|
431
|
+
|
|
432
|
+
describe('Custom Element Name Validation Pattern', () => {
|
|
433
|
+
test('valid: starts with lowercase letter, contains hyphen', () => {
|
|
434
|
+
const validNames = [
|
|
435
|
+
'a-b',
|
|
436
|
+
'my-component',
|
|
437
|
+
'x-button',
|
|
438
|
+
'user-card',
|
|
439
|
+
'app-header',
|
|
440
|
+
'my-comp-123',
|
|
441
|
+
'comp-v2.1',
|
|
442
|
+
'test_element-1'
|
|
443
|
+
];
|
|
444
|
+
|
|
445
|
+
validNames.forEach(name => {
|
|
446
|
+
const tokens = tokenize(`<${name}></${name}>`);
|
|
447
|
+
const ast = parse(tokens);
|
|
448
|
+
const element = ast.children![0]!;
|
|
449
|
+
expect(element.tagName).toBe(name);
|
|
450
|
+
});
|
|
451
|
+
});
|
|
452
|
+
|
|
453
|
+
test('should handle complex hyphenated names', () => {
|
|
454
|
+
const complexNames = [
|
|
455
|
+
'my-super-long-component-name',
|
|
456
|
+
'x-1-2-3-4',
|
|
457
|
+
'component-v2-beta-test',
|
|
458
|
+
'ui-button-primary-large'
|
|
459
|
+
];
|
|
460
|
+
|
|
461
|
+
complexNames.forEach(name => {
|
|
462
|
+
const tokens = tokenize(`<${name}></${name}>`);
|
|
463
|
+
const ast = parse(tokens);
|
|
464
|
+
const element = ast.children![0]!;
|
|
465
|
+
expect(element.tagName).toBe(name);
|
|
466
|
+
});
|
|
467
|
+
});
|
|
468
|
+
});
|
|
469
|
+
|
|
470
|
+
describe('Tokenizer-specific Tests', () => {
|
|
471
|
+
test('tokenizer should capture full custom element name', () => {
|
|
472
|
+
const tokens = tokenize('<my-component-123></my-component-123>');
|
|
473
|
+
|
|
474
|
+
// Find the opening tag token
|
|
475
|
+
const openTag = tokens.find(t => t.type === 'TAG_OPEN');
|
|
476
|
+
expect(openTag).toBeDefined();
|
|
477
|
+
expect(openTag!.value).toBe('my-component-123');
|
|
478
|
+
|
|
479
|
+
// Find the closing tag token
|
|
480
|
+
const closeTag = tokens.find(t => t.type === 'TAG_CLOSE');
|
|
481
|
+
expect(closeTag).toBeDefined();
|
|
482
|
+
expect(closeTag!.value).toBe('my-component-123');
|
|
483
|
+
});
|
|
484
|
+
|
|
485
|
+
test('tokenizer should handle custom element with attributes correctly', () => {
|
|
486
|
+
const tokens = tokenize('<my-comp class="test" id="main"></my-comp>');
|
|
487
|
+
|
|
488
|
+
const openTag = tokens.find(t => t.type === 'TAG_OPEN');
|
|
489
|
+
expect(openTag).toBeDefined();
|
|
490
|
+
expect(openTag!.value).toBe('my-comp');
|
|
491
|
+
expect(openTag!.attributes).toEqual({
|
|
492
|
+
class: 'test',
|
|
493
|
+
id: 'main'
|
|
494
|
+
});
|
|
495
|
+
});
|
|
496
|
+
|
|
497
|
+
test('tokenizer should handle self-closing custom elements', () => {
|
|
498
|
+
const tokens = tokenize('<my-comp />');
|
|
499
|
+
|
|
500
|
+
const openTag = tokens.find(t => t.type === 'TAG_OPEN');
|
|
501
|
+
expect(openTag).toBeDefined();
|
|
502
|
+
expect(openTag!.value).toBe('my-comp');
|
|
503
|
+
expect(openTag!.isSelfClosing).toBe(true);
|
|
504
|
+
});
|
|
505
|
+
});
|
|
506
|
+
|
|
507
|
+
describe('Customized Built-in Elements (is attribute)', () => {
|
|
508
|
+
test('should parse button with is attribute', () => {
|
|
509
|
+
const tokens = tokenize('<button is="plastic-button">Click Me!</button>');
|
|
510
|
+
const ast = parse(tokens);
|
|
511
|
+
|
|
512
|
+
const button = ast.children![0]!;
|
|
513
|
+
expect(button.type).toBe(ASTNodeType.ELEMENT);
|
|
514
|
+
expect(button.tagName).toBe('button');
|
|
515
|
+
expect(button.attributes).toHaveProperty('is', 'plastic-button');
|
|
516
|
+
});
|
|
517
|
+
|
|
518
|
+
test('should parse input with is attribute', () => {
|
|
519
|
+
const tokens = tokenize('<input is="custom-input" type="text" />');
|
|
520
|
+
const ast = parse(tokens);
|
|
521
|
+
|
|
522
|
+
const input = ast.children![0]!;
|
|
523
|
+
expect(input.tagName).toBe('input');
|
|
524
|
+
expect(input.attributes).toHaveProperty('is', 'custom-input');
|
|
525
|
+
expect(input.attributes).toHaveProperty('type', 'text');
|
|
526
|
+
});
|
|
527
|
+
|
|
528
|
+
test('should parse div with is attribute', () => {
|
|
529
|
+
const tokens = tokenize('<div is="fancy-div"></div>');
|
|
530
|
+
const ast = parse(tokens);
|
|
531
|
+
|
|
532
|
+
const div = ast.children![0]!;
|
|
533
|
+
expect(div.tagName).toBe('div');
|
|
534
|
+
expect(div.attributes).toHaveProperty('is', 'fancy-div');
|
|
535
|
+
});
|
|
536
|
+
});
|
|
537
|
+
|
|
538
|
+
describe('Reserved Custom Element Names', () => {
|
|
539
|
+
test('should parse annotation-xml (reserved SVG name)', () => {
|
|
540
|
+
const tokens = tokenize('<annotation-xml></annotation-xml>');
|
|
541
|
+
const ast = parse(tokens);
|
|
542
|
+
|
|
543
|
+
const element = ast.children![0]!;
|
|
544
|
+
expect(element.tagName).toBe('annotation-xml');
|
|
545
|
+
});
|
|
546
|
+
|
|
547
|
+
test('should parse font-face (reserved SVG name)', () => {
|
|
548
|
+
const tokens = tokenize('<font-face></font-face>');
|
|
549
|
+
const ast = parse(tokens);
|
|
550
|
+
|
|
551
|
+
const element = ast.children![0]!;
|
|
552
|
+
expect(element.tagName).toBe('font-face');
|
|
553
|
+
});
|
|
554
|
+
|
|
555
|
+
test('should parse color-profile (reserved SVG name)', () => {
|
|
556
|
+
const tokens = tokenize('<color-profile></color-profile>');
|
|
557
|
+
const ast = parse(tokens);
|
|
558
|
+
|
|
559
|
+
const element = ast.children![0]!;
|
|
560
|
+
expect(element.tagName).toBe('color-profile');
|
|
561
|
+
});
|
|
562
|
+
});
|
|
563
|
+
|
|
564
|
+
describe('Unicode Custom Element Names', () => {
|
|
565
|
+
test('should parse custom element with Greek letters', () => {
|
|
566
|
+
const tokens = tokenize('<math-α></math-α>');
|
|
567
|
+
const ast = parse(tokens);
|
|
568
|
+
|
|
569
|
+
const element = ast.children![0]!;
|
|
570
|
+
expect(element.tagName).toBe('math-α');
|
|
571
|
+
});
|
|
572
|
+
|
|
573
|
+
test('should parse custom element with emoji', () => {
|
|
574
|
+
const tokens = tokenize('<emotion-😍></emotion-😍>');
|
|
575
|
+
const ast = parse(tokens);
|
|
576
|
+
|
|
577
|
+
const element = ast.children![0]!;
|
|
578
|
+
expect(element.tagName).toBe('emotion-😍');
|
|
579
|
+
});
|
|
580
|
+
|
|
581
|
+
test('should parse custom element with Chinese characters', () => {
|
|
582
|
+
const tokens = tokenize('<my-元素></my-元素>');
|
|
583
|
+
const ast = parse(tokens);
|
|
584
|
+
|
|
585
|
+
const element = ast.children![0]!;
|
|
586
|
+
expect(element.tagName).toBe('my-元素');
|
|
587
|
+
});
|
|
588
|
+
|
|
589
|
+
test('should parse custom element with Arabic characters', () => {
|
|
590
|
+
const tokens = tokenize('<my-عنصر></my-عنصر>');
|
|
591
|
+
const ast = parse(tokens);
|
|
592
|
+
|
|
593
|
+
const element = ast.children![0]!;
|
|
594
|
+
expect(element.tagName).toBe('my-عنصر');
|
|
595
|
+
});
|
|
596
|
+
});
|
|
597
|
+
|
|
598
|
+
describe('Extreme Edge Cases', () => {
|
|
599
|
+
test('should parse very long custom element name', () => {
|
|
600
|
+
const longName = 'my-super-duper-extra-long-custom-component-name-that-keeps-going-and-going';
|
|
601
|
+
const tokens = tokenize(`<${longName}></${longName}>`);
|
|
602
|
+
const ast = parse(tokens);
|
|
603
|
+
|
|
604
|
+
const element = ast.children![0]!;
|
|
605
|
+
expect(element.tagName).toBe(longName);
|
|
606
|
+
});
|
|
607
|
+
|
|
608
|
+
test('should parse custom element with many consecutive hyphens', () => {
|
|
609
|
+
const tokens = tokenize('<my---component></my---component>');
|
|
610
|
+
const ast = parse(tokens);
|
|
611
|
+
|
|
612
|
+
const element = ast.children![0]!;
|
|
613
|
+
expect(element.tagName).toBe('my---component');
|
|
614
|
+
});
|
|
615
|
+
|
|
616
|
+
test('should parse custom element starting with x-', () => {
|
|
617
|
+
const tokens = tokenize('<x-></x->');
|
|
618
|
+
const ast = parse(tokens);
|
|
619
|
+
|
|
620
|
+
const element = ast.children![0]!;
|
|
621
|
+
expect(element.tagName).toBe('x-');
|
|
622
|
+
});
|
|
623
|
+
|
|
624
|
+
test('should handle custom element with newlines in attributes', () => {
|
|
625
|
+
const html = `<my-comp
|
|
626
|
+
class="test"
|
|
627
|
+
id="main"
|
|
628
|
+
data-value="123"
|
|
629
|
+
></my-comp>`;
|
|
630
|
+
|
|
631
|
+
const tokens = tokenize(html);
|
|
632
|
+
const ast = parse(tokens);
|
|
633
|
+
|
|
634
|
+
const element = ast.children![0]!;
|
|
635
|
+
expect(element.tagName).toBe('my-comp');
|
|
636
|
+
expect(element.attributes).toHaveProperty('class', 'test');
|
|
637
|
+
expect(element.attributes).toHaveProperty('id', 'main');
|
|
638
|
+
});
|
|
639
|
+
|
|
640
|
+
test('should parse custom element mixed with comments', () => {
|
|
641
|
+
const html = '<!-- comment --><my-comp>text</my-comp><!-- another comment -->';
|
|
642
|
+
const tokens = tokenize(html);
|
|
643
|
+
const ast = parse(tokens);
|
|
644
|
+
|
|
645
|
+
// Should have comment, element, comment
|
|
646
|
+
const myComp = ast.children!.find(node => node.type === ASTNodeType.ELEMENT)!;
|
|
647
|
+
expect(myComp.tagName).toBe('my-comp');
|
|
648
|
+
});
|
|
649
|
+
|
|
650
|
+
test('should parse empty custom element', () => {
|
|
651
|
+
const tokens = tokenize('<my-comp></my-comp>');
|
|
652
|
+
const ast = parse(tokens);
|
|
653
|
+
|
|
654
|
+
const element = ast.children![0]!;
|
|
655
|
+
expect(element.tagName).toBe('my-comp');
|
|
656
|
+
expect(element.children).toBeDefined();
|
|
657
|
+
expect(element.children!.length).toBe(0);
|
|
658
|
+
});
|
|
659
|
+
|
|
660
|
+
test('should parse custom element with only whitespace content', () => {
|
|
661
|
+
const tokens = tokenize('<my-comp> \n\t </my-comp>');
|
|
662
|
+
const ast = parse(tokens);
|
|
663
|
+
|
|
664
|
+
const element = ast.children![0]!;
|
|
665
|
+
expect(element.tagName).toBe('my-comp');
|
|
666
|
+
expect(element.children).toBeDefined();
|
|
667
|
+
expect(element.children!.length).toBeGreaterThan(0);
|
|
668
|
+
});
|
|
669
|
+
});
|
|
670
|
+
|
|
671
|
+
describe('Malformed Custom Elements', () => {
|
|
672
|
+
test('should handle mismatched closing tag', () => {
|
|
673
|
+
const tokens = tokenize('<my-comp></my-other>');
|
|
674
|
+
const ast = parse(tokens);
|
|
675
|
+
|
|
676
|
+
const element = ast.children![0]!;
|
|
677
|
+
expect(element.tagName).toBe('my-comp');
|
|
678
|
+
});
|
|
679
|
+
|
|
680
|
+
test('should handle multiple unclosed custom elements', () => {
|
|
681
|
+
const tokens = tokenize('<my-comp><nested-comp><deep-comp>');
|
|
682
|
+
const ast = parse(tokens);
|
|
683
|
+
|
|
684
|
+
const element = ast.children![0]!;
|
|
685
|
+
expect(element.tagName).toBe('my-comp');
|
|
686
|
+
});
|
|
687
|
+
|
|
688
|
+
test('should handle custom element with malformed attributes', () => {
|
|
689
|
+
const tokens = tokenize('<my-comp attr-without-value attr="value"></my-comp>');
|
|
690
|
+
const ast = parse(tokens);
|
|
691
|
+
|
|
692
|
+
const element = ast.children![0]!;
|
|
693
|
+
expect(element.tagName).toBe('my-comp');
|
|
694
|
+
expect(element.attributes).toBeDefined();
|
|
695
|
+
});
|
|
696
|
+
});
|
|
697
|
+
|
|
698
|
+
describe('Custom Elements in Special Contexts', () => {
|
|
699
|
+
test('should parse custom element inside table', () => {
|
|
700
|
+
const tokens = tokenize('<table><tr><td><my-cell>content</my-cell></td></tr></table>');
|
|
701
|
+
const ast = parse(tokens);
|
|
702
|
+
|
|
703
|
+
// Find the custom element
|
|
704
|
+
const table = ast.children![0]!;
|
|
705
|
+
expect(table.tagName).toBe('table');
|
|
706
|
+
});
|
|
707
|
+
|
|
708
|
+
test('should parse custom element inside list', () => {
|
|
709
|
+
const tokens = tokenize('<ul><li><list-item></list-item></li></ul>');
|
|
710
|
+
const ast = parse(tokens);
|
|
711
|
+
|
|
712
|
+
const ul = ast.children![0]!;
|
|
713
|
+
expect(ul.tagName).toBe('ul');
|
|
714
|
+
});
|
|
715
|
+
|
|
716
|
+
test('should parse custom element inside form', () => {
|
|
717
|
+
const tokens = tokenize('<form><form-field name="test"></form-field></form>');
|
|
718
|
+
const ast = parse(tokens);
|
|
719
|
+
|
|
720
|
+
const form = ast.children![0]!;
|
|
721
|
+
expect(form.tagName).toBe('form');
|
|
722
|
+
});
|
|
723
|
+
});
|
|
724
|
+
|
|
725
|
+
describe('ARIA and Accessibility', () => {
|
|
726
|
+
test('should parse custom element with ARIA attributes', () => {
|
|
727
|
+
const tokens = tokenize('<my-button role="button" aria-label="Click me" aria-disabled="true"></my-button>');
|
|
728
|
+
const ast = parse(tokens);
|
|
729
|
+
|
|
730
|
+
const element = ast.children![0]!;
|
|
731
|
+
expect(element.tagName).toBe('my-button');
|
|
732
|
+
expect(element.attributes).toHaveProperty('role', 'button');
|
|
733
|
+
expect(element.attributes).toHaveProperty('aria-label', 'Click me');
|
|
734
|
+
expect(element.attributes).toHaveProperty('aria-disabled', 'true');
|
|
735
|
+
});
|
|
736
|
+
|
|
737
|
+
test('should parse custom element with tabindex', () => {
|
|
738
|
+
const tokens = tokenize('<my-comp tabindex="0"></my-comp>');
|
|
739
|
+
const ast = parse(tokens);
|
|
740
|
+
|
|
741
|
+
const element = ast.children![0]!;
|
|
742
|
+
expect(element.attributes).toHaveProperty('tabindex', '0');
|
|
743
|
+
});
|
|
744
|
+
});
|
|
745
|
+
});
|