@tkeron/html-parser 1.1.2 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/.github/workflows/npm_deploy.yml +14 -4
  2. package/README.md +6 -6
  3. package/bun.lock +6 -8
  4. package/check-versions.ts +147 -0
  5. package/index.ts +4 -8
  6. package/package.json +5 -6
  7. package/src/dom-simulator/append-child.ts +130 -0
  8. package/src/dom-simulator/append.ts +18 -0
  9. package/src/dom-simulator/attributes.ts +23 -0
  10. package/src/dom-simulator/clone-node.ts +51 -0
  11. package/src/dom-simulator/convert-ast-node-to-dom.ts +37 -0
  12. package/src/dom-simulator/create-cdata.ts +18 -0
  13. package/src/dom-simulator/create-comment.ts +23 -0
  14. package/src/dom-simulator/create-doctype.ts +24 -0
  15. package/src/dom-simulator/create-document.ts +81 -0
  16. package/src/dom-simulator/create-element.ts +195 -0
  17. package/src/dom-simulator/create-processing-instruction.ts +19 -0
  18. package/src/dom-simulator/create-temp-parent.ts +9 -0
  19. package/src/dom-simulator/create-text-node.ts +23 -0
  20. package/src/dom-simulator/escape-text-content.ts +6 -0
  21. package/src/dom-simulator/find-special-elements.ts +14 -0
  22. package/src/dom-simulator/get-text-content.ts +18 -0
  23. package/src/dom-simulator/index.ts +36 -0
  24. package/src/dom-simulator/inner-outer-html.ts +182 -0
  25. package/src/dom-simulator/insert-after.ts +20 -0
  26. package/src/dom-simulator/insert-before.ts +108 -0
  27. package/src/dom-simulator/matches.ts +26 -0
  28. package/src/dom-simulator/node-types.ts +26 -0
  29. package/src/dom-simulator/prepend.ts +24 -0
  30. package/src/dom-simulator/remove-child.ts +68 -0
  31. package/src/dom-simulator/remove.ts +7 -0
  32. package/src/dom-simulator/replace-child.ts +152 -0
  33. package/src/dom-simulator/set-text-content.ts +33 -0
  34. package/src/dom-simulator/update-element-content.ts +56 -0
  35. package/src/dom-simulator.ts +12 -1126
  36. package/src/encoding/constants.ts +8 -0
  37. package/src/encoding/detect-encoding.ts +21 -0
  38. package/src/encoding/index.ts +1 -0
  39. package/src/encoding/normalize-encoding.ts +6 -0
  40. package/src/html-entities.ts +2127 -0
  41. package/src/index.ts +5 -5
  42. package/src/parser/adoption-agency-helpers.ts +145 -0
  43. package/src/parser/constants.ts +137 -0
  44. package/src/parser/dom-to-ast.ts +79 -0
  45. package/src/parser/index.ts +9 -0
  46. package/src/parser/parse.ts +772 -0
  47. package/src/parser/types.ts +56 -0
  48. package/src/selectors/find-elements-descendant.ts +47 -0
  49. package/src/selectors/index.ts +2 -0
  50. package/src/selectors/matches-selector.ts +12 -0
  51. package/src/selectors/matches-token.ts +27 -0
  52. package/src/selectors/parse-selector.ts +48 -0
  53. package/src/selectors/query-selector-all.ts +43 -0
  54. package/src/selectors/query-selector.ts +6 -0
  55. package/src/selectors/types.ts +10 -0
  56. package/src/serializer/attributes.ts +74 -0
  57. package/src/serializer/escape.ts +13 -0
  58. package/src/serializer/index.ts +1 -0
  59. package/src/serializer/serialize-tokens.ts +511 -0
  60. package/src/tokenizer/calculate-position.ts +10 -0
  61. package/src/tokenizer/constants.ts +11 -0
  62. package/src/tokenizer/decode-entities.ts +64 -0
  63. package/src/tokenizer/index.ts +2 -0
  64. package/src/tokenizer/parse-attributes.ts +74 -0
  65. package/src/tokenizer/tokenize.ts +165 -0
  66. package/src/tokenizer/types.ts +25 -0
  67. package/tests/adoption-agency-helpers.test.ts +304 -0
  68. package/tests/advanced.test.ts +242 -221
  69. package/tests/cloneNode.test.ts +19 -66
  70. package/tests/custom-elements-head.test.ts +54 -55
  71. package/tests/dom-extended.test.ts +77 -64
  72. package/tests/dom-manipulation.test.ts +51 -24
  73. package/tests/dom.test.ts +15 -13
  74. package/tests/encoding/detect-encoding.test.ts +33 -0
  75. package/tests/google-dom.test.ts +2 -2
  76. package/tests/helpers/tokenizer-adapter.test.ts +29 -43
  77. package/tests/helpers/tokenizer-adapter.ts +36 -33
  78. package/tests/helpers/tree-adapter.test.ts +20 -20
  79. package/tests/helpers/tree-adapter.ts +34 -24
  80. package/tests/html-entities-text.test.ts +6 -2
  81. package/tests/innerhtml-void-elements.test.ts +52 -36
  82. package/tests/outerHTML-replacement.test.ts +37 -65
  83. package/tests/parser/dom-to-ast.test.ts +109 -0
  84. package/tests/parser/parse.test.ts +139 -0
  85. package/tests/parser.test.ts +281 -217
  86. package/tests/selectors/query-selector-all.test.ts +39 -0
  87. package/tests/selectors/query-selector.test.ts +42 -0
  88. package/tests/serializer/attributes.test.ts +132 -0
  89. package/tests/serializer/escape.test.ts +51 -0
  90. package/tests/serializer/serialize-tokens.test.ts +80 -0
  91. package/tests/serializer-core.test.ts +6 -6
  92. package/tests/serializer-injectmeta.test.ts +6 -6
  93. package/tests/serializer-optionaltags.test.ts +9 -6
  94. package/tests/serializer-options.test.ts +6 -6
  95. package/tests/serializer-whitespace.test.ts +6 -6
  96. package/tests/tokenizer/calculate-position.test.ts +34 -0
  97. package/tests/tokenizer/decode-entities.test.ts +31 -0
  98. package/tests/tokenizer/parse-attributes.test.ts +44 -0
  99. package/tests/tokenizer/tokenize.test.ts +757 -0
  100. package/tests/tokenizer-namedEntities.test.ts +10 -7
  101. package/tests/tokenizer-pendingSpecChanges.test.ts +10 -7
  102. package/tests/tokenizer.test.ts +268 -256
  103. package/tests/tree-construction-adoption01.test.ts +25 -16
  104. package/tests/tree-construction-adoption02.test.ts +30 -19
  105. package/tests/tree-construction-domjs-unsafe.test.ts +6 -4
  106. package/tests/tree-construction-entities02.test.ts +18 -16
  107. package/tests/tree-construction-html5test-com.test.ts +16 -10
  108. package/tests/tree-construction-math.test.ts +11 -9
  109. package/tests/tree-construction-namespace-sensitivity.test.ts +11 -9
  110. package/tests/tree-construction-noscript01.test.ts +11 -9
  111. package/tests/tree-construction-ruby.test.ts +6 -4
  112. package/tests/tree-construction-scriptdata01.test.ts +6 -4
  113. package/tests/tree-construction-svg.test.ts +6 -4
  114. package/tests/tree-construction-template.test.ts +6 -4
  115. package/tests/tree-construction-tests10.test.ts +6 -4
  116. package/tests/tree-construction-tests11.test.ts +6 -4
  117. package/tests/tree-construction-tests20.test.ts +7 -4
  118. package/tests/tree-construction-tests21.test.ts +7 -4
  119. package/tests/tree-construction-tests23.test.ts +7 -4
  120. package/tests/tree-construction-tests24.test.ts +7 -4
  121. package/tests/tree-construction-tests5.test.ts +6 -5
  122. package/tests/tree-construction-tests6.test.ts +6 -5
  123. package/tests/tree-construction-tests_innerHTML_1.test.ts +6 -5
  124. package/tests/void-elements.test.ts +85 -40
  125. package/tsconfig.json +1 -1
  126. package/src/css-selector.ts +0 -185
  127. package/src/encoding.ts +0 -39
  128. package/src/parser.ts +0 -682
  129. package/src/serializer.ts +0 -450
  130. package/src/tokenizer.ts +0 -325
  131. package/tests/selectors.test.ts +0 -128
@@ -1,252 +1,251 @@
1
- import { expect, it, describe } from 'bun:test';
2
- import {
3
- tokenize,
4
- TokenType,
5
- type Token
6
- } from '../src/tokenizer';
1
+ import { expect, it, describe } from "bun:test";
2
+ import { tokenize, TokenType } from "../src/tokenizer/index.js";
7
3
 
8
- describe('HTML Tokenizer', () => {
9
-
10
- describe('Basic Tags', () => {
11
- it('should tokenize simple opening tag', () => {
12
- const tokens = tokenize('<div>');
4
+ describe("HTML Tokenizer", () => {
5
+ describe("Basic Tags", () => {
6
+ it("should tokenize simple opening tag", () => {
7
+ const tokens = tokenize("<div>");
13
8
 
14
9
  expect(tokens).toHaveLength(2);
15
10
  expect(tokens[0]!).toEqual({
16
11
  type: TokenType.TAG_OPEN,
17
- value: 'div',
12
+ value: "div",
18
13
  position: expect.any(Object),
19
14
  attributes: {},
20
- isSelfClosing: false
15
+ isSelfClosing: false,
21
16
  });
22
17
  expect(tokens[1]!.type).toBe(TokenType.EOF);
23
18
  });
24
19
 
25
- it('should tokenize simple closing tag', () => {
26
- const tokens = tokenize('</div>');
20
+ it("should tokenize simple closing tag", () => {
21
+ const tokens = tokenize("</div>");
27
22
 
28
23
  expect(tokens).toHaveLength(2);
29
24
  expect(tokens[0]!).toEqual({
30
25
  type: TokenType.TAG_CLOSE,
31
- value: 'div',
26
+ value: "div",
32
27
  position: expect.any(Object),
33
- isClosing: true
28
+ isClosing: true,
34
29
  });
35
30
  });
36
31
 
37
- it('should tokenize self-closing tag', () => {
38
- const tokens = tokenize('<img/>');
32
+ it("should tokenize self-closing tag", () => {
33
+ const tokens = tokenize("<img/>");
39
34
 
40
35
  expect(tokens).toHaveLength(2);
41
36
  expect(tokens[0]!).toEqual({
42
37
  type: TokenType.TAG_OPEN,
43
- value: 'img',
38
+ value: "img",
44
39
  position: expect.any(Object),
45
40
  attributes: {},
46
- isSelfClosing: true
41
+ isSelfClosing: true,
47
42
  });
48
43
  });
49
44
 
50
- it('should handle case insensitive tag names', () => {
51
- const tokens = tokenize('<DIV></DIV>');
45
+ it("should handle case insensitive tag names", () => {
46
+ const tokens = tokenize("<DIV></DIV>");
52
47
 
53
- expect(tokens[0]!.value).toBe('div');
54
- expect(tokens[1]!.value).toBe('div');
48
+ expect(tokens[0]!.value).toBe("div");
49
+ expect(tokens[1]!.value).toBe("div");
55
50
  });
56
51
  });
57
52
 
58
- describe('Attributes', () => {
59
- it('should parse attributes with double quotes', () => {
53
+ describe("Attributes", () => {
54
+ it("should parse attributes with double quotes", () => {
60
55
  const tokens = tokenize('<div class="container" id="main">');
61
56
 
62
57
  expect(tokens[0]?.attributes).toEqual({
63
- class: 'container',
64
- id: 'main'
58
+ class: "container",
59
+ id: "main",
65
60
  });
66
61
  });
67
62
 
68
- it('should parse attributes with single quotes', () => {
63
+ it("should parse attributes with single quotes", () => {
69
64
  const tokens = tokenize(`<div class='container' id='main'>`);
70
65
 
71
66
  expect(tokens[0]?.attributes).toEqual({
72
- class: 'container',
73
- id: 'main'
67
+ class: "container",
68
+ id: "main",
74
69
  });
75
70
  });
76
71
 
77
- it('should parse unquoted attributes', () => {
78
- const tokens = tokenize('<div class=container id=main>');
72
+ it("should parse unquoted attributes", () => {
73
+ const tokens = tokenize("<div class=container id=main>");
79
74
 
80
75
  expect(tokens[0]?.attributes).toEqual({
81
- class: 'container',
82
- id: 'main'
76
+ class: "container",
77
+ id: "main",
83
78
  });
84
79
  });
85
80
 
86
- it('should parse boolean attributes', () => {
87
- const tokens = tokenize('<input disabled checked>');
81
+ it("should parse boolean attributes", () => {
82
+ const tokens = tokenize("<input disabled checked>");
88
83
 
89
84
  expect(tokens[0]?.attributes).toEqual({
90
- disabled: '',
91
- checked: ''
85
+ disabled: "",
86
+ checked: "",
92
87
  });
93
88
  });
94
89
 
95
- it('should handle mixed attribute types', () => {
90
+ it("should handle mixed attribute types", () => {
96
91
  const tokens = tokenize('<input type="text" disabled value=test>');
97
92
 
98
93
  expect(tokens[0]?.attributes).toEqual({
99
- type: 'text',
100
- disabled: '',
101
- value: 'test'
94
+ type: "text",
95
+ disabled: "",
96
+ value: "test",
102
97
  });
103
98
  });
104
99
 
105
- it('should handle attributes with special characters', () => {
100
+ it("should handle attributes with special characters", () => {
106
101
  const tokens = tokenize('<div data-test="value" aria-label="test">');
107
102
 
108
103
  expect(tokens[0]?.attributes).toEqual({
109
- 'data-test': 'value',
110
- 'aria-label': 'test'
104
+ "data-test": "value",
105
+ "aria-label": "test",
111
106
  });
112
107
  });
113
108
  });
114
109
 
115
- describe('Text Content', () => {
116
- it('should tokenize plain text', () => {
117
- const tokens = tokenize('Hello World');
110
+ describe("Text Content", () => {
111
+ it("should tokenize plain text", () => {
112
+ const tokens = tokenize("Hello World");
118
113
 
119
114
  expect(tokens).toHaveLength(2);
120
115
  expect(tokens[0]).toEqual({
121
116
  type: TokenType.TEXT,
122
- value: 'Hello World',
123
- position: expect.any(Object)
117
+ value: "Hello World",
118
+ position: expect.any(Object),
124
119
  });
125
120
  });
126
121
 
127
- it('should handle text with whitespace', () => {
128
- const tokens = tokenize(' Hello World ');
122
+ it("should handle text with whitespace", () => {
123
+ const tokens = tokenize(" Hello World ");
129
124
 
130
- expect(tokens[0]?.value).toBe(' Hello World ');
125
+ expect(tokens[0]?.value).toBe(" Hello World ");
131
126
  });
132
127
 
133
- it('should handle multiline text', () => {
134
- const tokens = tokenize('Line 1\nLine 2\nLine 3');
128
+ it("should handle multiline text", () => {
129
+ const tokens = tokenize("Line 1\nLine 2\nLine 3");
135
130
 
136
- expect(tokens[0]?.value).toBe('Line 1\nLine 2\nLine 3');
131
+ expect(tokens[0]?.value).toBe("Line 1\nLine 2\nLine 3");
137
132
  });
138
133
  });
139
134
 
140
- describe('HTML Entities', () => {
141
- it('should parse named entities', () => {
142
- const tokens = tokenize('&amp; &lt; &gt; &quot; &nbsp;');
135
+ describe("HTML Entities", () => {
136
+ it("should parse named entities", () => {
137
+ const tokens = tokenize("&amp; &lt; &gt; &quot; &nbsp;");
143
138
 
144
139
  expect(tokens[0]?.value).toBe('& < > " \u00A0');
145
140
  });
146
141
 
147
- it('should parse numeric entities', () => {
148
- const tokens = tokenize('&#65; &#66; &#67;');
142
+ it("should parse numeric entities", () => {
143
+ const tokens = tokenize("&#65; &#66; &#67;");
149
144
 
150
- expect(tokens[0]?.value).toBe('A B C');
145
+ expect(tokens[0]?.value).toBe("A B C");
151
146
  });
152
147
 
153
- it('should parse hexadecimal entities', () => {
154
- const tokens = tokenize('&#x41; &#x42; &#x43;');
148
+ it("should parse hexadecimal entities", () => {
149
+ const tokens = tokenize("&#x41; &#x42; &#x43;");
155
150
 
156
- expect(tokens[0]?.value).toBe('A B C');
151
+ expect(tokens[0]?.value).toBe("A B C");
157
152
  });
158
153
 
159
- it('should handle entities in attributes', () => {
154
+ it("should handle entities in attributes", () => {
160
155
  const tokens = tokenize('<div title="&quot;Hello&quot;">');
161
156
 
162
157
  expect(tokens[0]?.attributes!.title).toBe('"Hello"');
163
158
  });
164
159
 
165
- it('should handle unknown entities', () => {
166
- const tokens = tokenize('&unknown;');
160
+ it("should handle unknown entities", () => {
161
+ const tokens = tokenize("&unknown;");
167
162
 
168
- expect(tokens[0]?.value).toBe('&unknown;');
163
+ expect(tokens[0]?.value).toBe("&unknown;");
169
164
  });
170
165
  });
171
166
 
172
- describe('Comments', () => {
173
- it('should parse HTML comments', () => {
174
- const tokens = tokenize('<!-- This is a comment -->');
167
+ describe("Comments", () => {
168
+ it("should parse HTML comments", () => {
169
+ const tokens = tokenize("<!-- This is a comment -->");
175
170
 
176
171
  expect(tokens[0]).toEqual({
177
172
  type: TokenType.COMMENT,
178
- value: ' This is a comment ',
179
- position: expect.any(Object)
173
+ value: " This is a comment ",
174
+ position: expect.any(Object),
180
175
  });
181
176
  });
182
177
 
183
- it('should handle multiline comments', () => {
184
- const tokens = tokenize(`<!-- \n Multi line\n comment\n -->`);
178
+ it("should handle multiline comments", () => {
179
+ const tokens = tokenize(
180
+ `<!-- \n Multi line\n comment\n -->`,
181
+ );
185
182
 
186
183
  expect(tokens[0]?.type).toBe(TokenType.COMMENT);
187
- expect(tokens[0]?.value).toContain('Multi line');
184
+ expect(tokens[0]?.value).toContain("Multi line");
188
185
  });
189
186
 
190
- it('should handle empty comments', () => {
191
- const tokens = tokenize('<!---->');
187
+ it("should handle empty comments", () => {
188
+ const tokens = tokenize("<!---->");
192
189
 
193
190
  expect(tokens[0]).toEqual({
194
191
  type: TokenType.COMMENT,
195
- value: '',
196
- position: expect.any(Object)
192
+ value: "",
193
+ position: expect.any(Object),
197
194
  });
198
195
  });
199
196
  });
200
197
 
201
- describe('CDATA Sections (HTML5: treated as bogus comments)', () => {
202
- it('should parse CDATA sections as bogus comments in HTML5', () => {
203
- const tokens = tokenize('<![CDATA[Some data]]>');
198
+ describe("CDATA Sections (HTML5: treated as bogus comments)", () => {
199
+ it("should parse CDATA sections as bogus comments in HTML5", () => {
200
+ const tokens = tokenize("<![CDATA[Some data]]>");
204
201
 
205
202
  expect(tokens[0]).toEqual({
206
203
  type: TokenType.COMMENT,
207
- value: '[CDATA[Some data]]',
208
- position: expect.any(Object)
204
+ value: "[CDATA[Some data]]",
205
+ position: expect.any(Object),
209
206
  });
210
207
  });
211
208
 
212
- it('should handle CDATA with special characters as bogus comment', () => {
209
+ it("should handle CDATA with special characters as bogus comment", () => {
213
210
  const tokens = tokenize('<![CDATA[<script>alert("test");</script>]]>');
214
211
 
215
212
  expect(tokens[0]?.value).toBe('[CDATA[<script>alert("test");</script>]]');
216
213
  });
217
214
  });
218
215
 
219
- describe('DOCTYPE Declaration', () => {
220
- it('should parse DOCTYPE declaration', () => {
221
- const tokens = tokenize('<!DOCTYPE html>');
216
+ describe("DOCTYPE Declaration", () => {
217
+ it("should parse DOCTYPE declaration", () => {
218
+ const tokens = tokenize("<!DOCTYPE html>");
222
219
 
223
220
  expect(tokens[0]).toEqual({
224
221
  type: TokenType.DOCTYPE,
225
- value: 'html',
226
- position: expect.any(Object)
222
+ value: "html",
223
+ position: expect.any(Object),
227
224
  });
228
225
  });
229
226
 
230
- it('should parse complex DOCTYPE', () => {
231
- const tokens = tokenize('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">');
227
+ it("should parse complex DOCTYPE", () => {
228
+ const tokens = tokenize(
229
+ '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">',
230
+ );
232
231
 
233
232
  expect(tokens[0]?.type).toBe(TokenType.DOCTYPE);
234
- expect(tokens[0]?.value).toBe('html');
233
+ expect(tokens[0]?.value).toBe("html");
235
234
  });
236
235
  });
237
236
 
238
- describe('Processing Instructions (HTML5: treated as bogus comments)', () => {
239
- it('should parse XML processing instruction as bogus comment', () => {
237
+ describe("Processing Instructions (HTML5: treated as bogus comments)", () => {
238
+ it("should parse XML processing instruction as bogus comment", () => {
240
239
  const tokens = tokenize('<?xml version="1.0" encoding="UTF-8"?>');
241
240
 
242
241
  expect(tokens[0]).toEqual({
243
242
  type: TokenType.COMMENT,
244
243
  value: '?xml version="1.0" encoding="UTF-8"?',
245
- position: expect.any(Object)
244
+ position: expect.any(Object),
246
245
  });
247
246
  });
248
247
 
249
- it('should parse PHP-style processing instruction as bogus comment', () => {
248
+ it("should parse PHP-style processing instruction as bogus comment", () => {
250
249
  const tokens = tokenize('<?php echo "Hello"; ?>');
251
250
 
252
251
  expect(tokens[0]?.type).toBe(TokenType.COMMENT);
@@ -254,8 +253,8 @@ describe('HTML Tokenizer', () => {
254
253
  });
255
254
  });
256
255
 
257
- describe('Complex HTML Documents', () => {
258
- it('should tokenize complete HTML document', () => {
256
+ describe("Complex HTML Documents", () => {
257
+ it("should tokenize complete HTML document", () => {
259
258
  const html = `<!DOCTYPE html>
260
259
  <html lang="en">
261
260
  <head>
@@ -273,12 +272,14 @@ describe('HTML Tokenizer', () => {
273
272
  expect(tokens[0]?.type).toBe(TokenType.DOCTYPE);
274
273
  expect(tokens[tokens?.length - 1]?.type).toBe(TokenType.EOF);
275
274
 
276
- const htmlTag = tokens.find(t => t.type === TokenType.TAG_OPEN && t.value === 'html');
275
+ const htmlTag = tokens.find(
276
+ (t) => t.type === TokenType.TAG_OPEN && t.value === "html",
277
+ );
277
278
  expect(htmlTag).toBeDefined();
278
- expect(htmlTag!.attributes!.lang).toBe('en');
279
+ expect(htmlTag!.attributes!.lang).toBe("en");
279
280
  });
280
281
 
281
- it('should handle mixed content', () => {
282
+ it("should handle mixed content", () => {
282
283
  const html = `<div>
283
284
  Text before <!-- comment -->
284
285
  <span>nested</span>
@@ -287,128 +288,134 @@ describe('HTML Tokenizer', () => {
287
288
 
288
289
  const tokens = tokenize(html);
289
290
 
290
- expect(tokens.some(t => t.type === TokenType.TAG_OPEN)).toBe(true);
291
- expect(tokens.some(t => t.type === TokenType.TEXT)).toBe(true);
292
- expect(tokens.some(t => t.type === TokenType.COMMENT)).toBe(true);
291
+ expect(tokens.some((t) => t.type === TokenType.TAG_OPEN)).toBe(true);
292
+ expect(tokens.some((t) => t.type === TokenType.TEXT)).toBe(true);
293
+ expect(tokens.some((t) => t.type === TokenType.COMMENT)).toBe(true);
293
294
  });
294
295
  });
295
296
 
296
- describe('Edge Cases', () => {
297
- it('should handle empty input', () => {
298
- const tokens = tokenize('');
297
+ describe("Edge Cases", () => {
298
+ it("should handle empty input", () => {
299
+ const tokens = tokenize("");
299
300
 
300
301
  expect(tokens).toHaveLength(1);
301
302
  expect(tokens[0]?.type).toBe(TokenType.EOF);
302
303
  });
303
304
 
304
- it('should handle whitespace only', () => {
305
- const tokens = tokenize(' \n\t ');
305
+ it("should handle whitespace only", () => {
306
+ const tokens = tokenize(" \n\t ");
306
307
 
307
308
  expect(tokens).toHaveLength(2);
308
309
  expect(tokens[0]?.type).toBe(TokenType.TEXT);
309
- expect(tokens[0]?.value).toBe(' \n\t ');
310
+ expect(tokens[0]?.value).toBe(" \n\t ");
310
311
  });
311
312
 
312
- it('should handle malformed tags', () => {
313
+ it("should handle malformed tags", () => {
313
314
  const tokens = tokenize('<div class="test>');
314
315
 
315
316
  expect(tokens[0]?.type).toBe(TokenType.TAG_OPEN);
316
- expect(tokens[0]?.value).toBe('div');
317
+ expect(tokens[0]?.value).toBe("div");
317
318
  });
318
319
 
319
- it('should handle unclosed comments', () => {
320
- const tokens = tokenize('<!-- unclosed comment');
320
+ it("should handle unclosed comments", () => {
321
+ const tokens = tokenize("<!-- unclosed comment");
321
322
 
322
323
  expect(tokens[0]?.type).toBe(TokenType.COMMENT);
323
- expect(tokens[0]?.value).toBe(' unclosed comment');
324
+ expect(tokens[0]?.value).toBe(" unclosed comment");
324
325
  });
325
326
  });
326
327
 
327
- describe('Advanced Edge Cases', () => {
328
- it('should handle attributes with no spaces', () => {
328
+ describe("Advanced Edge Cases", () => {
329
+ it("should handle attributes with no spaces", () => {
329
330
  const tokens = tokenize('<div class="test"id="main"data-value="123">');
330
331
  expect(tokens.length).toBeGreaterThan(0);
331
332
  const tag = tokens[0]!;
332
333
 
333
334
  expect(tag.attributes).toEqual({
334
- class: 'test',
335
- id: 'main',
336
- 'data-value': '123'
335
+ class: "test",
336
+ id: "main",
337
+ "data-value": "123",
337
338
  });
338
339
  });
339
340
 
340
- it('should handle attributes with excessive spaces', () => {
341
+ it("should handle attributes with excessive spaces", () => {
341
342
  const tokens = tokenize('<div class = "test" id = "main" >');
342
343
  expect(tokens.length).toBeGreaterThan(0);
343
344
  const tag = tokens[0]!;
344
345
 
345
346
  expect(tag.attributes).toEqual({
346
- class: 'test',
347
- id: 'main'
347
+ class: "test",
348
+ id: "main",
348
349
  });
349
350
  });
350
351
 
351
- it('should handle mixed quote styles in same tag', () => {
352
- const tokens = tokenize(`<div class='single' id="double" data-test='mix "quoted" content'>`);
352
+ it("should handle mixed quote styles in same tag", () => {
353
+ const tokens = tokenize(
354
+ `<div class='single' id="double" data-test='mix "quoted" content'>`,
355
+ );
353
356
  expect(tokens.length).toBeGreaterThan(0);
354
357
  const tag = tokens[0]!;
355
358
 
356
- expect(tag.attributes!.class).toBe('single');
357
- expect(tag.attributes!.id).toBe('double');
358
- expect(tag.attributes!['data-test']).toBe('mix "quoted" content');
359
+ expect(tag.attributes!.class).toBe("single");
360
+ expect(tag.attributes!.id).toBe("double");
361
+ expect(tag.attributes!["data-test"]).toBe('mix "quoted" content');
359
362
  });
360
363
 
361
- it('should handle malformed quotes gracefully', () => {
364
+ it("should handle malformed quotes gracefully", () => {
362
365
  const tokens = tokenize('<div class="unclosed id="test">');
363
366
  expect(tokens.length).toBeGreaterThan(0);
364
367
  const tag = tokens[0]!;
365
368
 
366
369
  expect(tag.type).toBe(TokenType.TAG_OPEN);
367
- expect(tag.value).toBe('div');
370
+ expect(tag.value).toBe("div");
368
371
  expect(tag.attributes).toBeDefined();
369
372
  });
370
373
 
371
- it('should handle empty tag names', () => {
372
- const tokens = tokenize('<>content</>');
374
+ it("should handle empty tag names", () => {
375
+ const tokens = tokenize("<>content</>");
373
376
 
374
377
  expect(tokens.length).toBeGreaterThan(0);
375
378
  });
376
379
 
377
- it('should handle tags with numbers and special characters', () => {
380
+ it("should handle tags with numbers and special characters", () => {
378
381
  const tokens = tokenize('<h1 class="heading-1" data-level="1">');
379
382
  expect(tokens.length).toBeGreaterThan(0);
380
383
  const tag = tokens[0]!;
381
384
 
382
- expect(tag.value).toBe('h1');
385
+ expect(tag.value).toBe("h1");
383
386
  expect(tag.attributes).toEqual({
384
- class: 'heading-1',
385
- 'data-level': '1'
387
+ class: "heading-1",
388
+ "data-level": "1",
386
389
  });
387
390
  });
388
391
 
389
- it('should handle extremely long attribute values', () => {
390
- const longValue = 'a'.repeat(10000);
392
+ it("should handle extremely long attribute values", () => {
393
+ const longValue = "a".repeat(10000);
391
394
  const tokens = tokenize(`<div data-long="${longValue}">`);
392
395
  expect(tokens.length).toBeGreaterThan(0);
393
396
  const tag = tokens[0]!;
394
397
 
395
- expect(tag.attributes!['data-long']).toBe(longValue);
398
+ expect(tag.attributes!["data-long"]).toBe(longValue);
396
399
  });
397
400
 
398
- it('should handle unicode characters in attributes', () => {
399
- const tokens = tokenize('<div title="测试" data-emoji="🚀" class="café">');
401
+ it("should handle unicode characters in attributes", () => {
402
+ const tokens = tokenize(
403
+ '<div title="测试" data-emoji="🚀" class="café">',
404
+ );
400
405
  expect(tokens.length).toBeGreaterThan(0);
401
406
  const tag = tokens[0]!;
402
407
 
403
408
  expect(tag.attributes).toEqual({
404
- title: '测试',
405
- 'data-emoji': '🚀',
406
- class: 'café'
409
+ title: "测试",
410
+ "data-emoji": "🚀",
411
+ class: "café",
407
412
  });
408
413
  });
409
414
 
410
- it('should handle nested quotes in attributes', () => {
411
- const tokens = tokenize(`<div onclick="alert('Hello')" title='She said "hi"'>`);
415
+ it("should handle nested quotes in attributes", () => {
416
+ const tokens = tokenize(
417
+ `<div onclick="alert('Hello')" title='She said "hi"'>`,
418
+ );
412
419
  expect(tokens.length).toBeGreaterThan(0);
413
420
  const tag = tokens[0]!;
414
421
 
@@ -416,20 +423,22 @@ describe('HTML Tokenizer', () => {
416
423
  expect(tag.attributes!.title).toBe('She said "hi"');
417
424
  });
418
425
 
419
- it('should handle attributes without values', () => {
420
- const tokens = tokenize('<input type="checkbox" checked disabled readonly>');
426
+ it("should handle attributes without values", () => {
427
+ const tokens = tokenize(
428
+ '<input type="checkbox" checked disabled readonly>',
429
+ );
421
430
  expect(tokens.length).toBeGreaterThan(0);
422
431
  const tag = tokens[0]!;
423
432
 
424
433
  expect(tag.attributes).toEqual({
425
- type: 'checkbox',
426
- checked: '',
427
- disabled: '',
428
- readonly: ''
434
+ type: "checkbox",
435
+ checked: "",
436
+ disabled: "",
437
+ readonly: "",
429
438
  });
430
439
  });
431
440
 
432
- it('should handle CDATA as bogus comment with complex content', () => {
441
+ it("should handle CDATA as bogus comment with complex content", () => {
433
442
  const complexContent = `
434
443
  function it() {
435
444
  return "<div>HTML inside JS</div>";
@@ -441,18 +450,21 @@ describe('HTML Tokenizer', () => {
441
450
  const cdataToken = tokens[0]!;
442
451
 
443
452
  expect(cdataToken.type).toBe(TokenType.COMMENT);
444
- expect(cdataToken.value).toBe('[CDATA[' + complexContent + ']]');
453
+ expect(cdataToken.value).toBe("[CDATA[" + complexContent + "]]");
445
454
  });
446
455
 
447
- it('should handle processing instructions as bogus comments', () => {
456
+ it("should handle processing instructions as bogus comments", () => {
448
457
  const tests = [
449
- { input: '<?xml version="1.0" encoding="UTF-8"?>', expected: 'xml' },
450
- { input: '<?xml-stylesheet type="text/xsl" href="style.xsl"?>', expected: 'xml' },
451
- { input: '<?php echo "Hello World"; ?>', expected: 'php' },
452
- { input: '<?python print("Hello") ?>', expected: 'python' }
458
+ { input: '<?xml version="1.0" encoding="UTF-8"?>', expected: "xml" },
459
+ {
460
+ input: '<?xml-stylesheet type="text/xsl" href="style.xsl"?>',
461
+ expected: "xml",
462
+ },
463
+ { input: '<?php echo "Hello World"; ?>', expected: "php" },
464
+ { input: '<?python print("Hello") ?>', expected: "python" },
453
465
  ];
454
466
 
455
- tests.forEach(test => {
467
+ tests.forEach((test) => {
456
468
  const tokens = tokenize(test.input);
457
469
  const piToken = tokens[0]!;
458
470
 
@@ -461,16 +473,16 @@ describe('HTML Tokenizer', () => {
461
473
  });
462
474
  });
463
475
 
464
- it('should handle comments with special content', () => {
476
+ it("should handle comments with special content", () => {
465
477
  const specialComments = [
466
- '<!-- TODO: Fix this -->',
478
+ "<!-- TODO: Fix this -->",
467
479
  '<!-- <script>alert("xss")</script> -->',
468
- '<!-- Multi\nline\ncomment -->',
469
- '<!-- Comment with -- inside -->',
470
- '<!--[if IE]><![endif]-->'
480
+ "<!-- Multi\nline\ncomment -->",
481
+ "<!-- Comment with -- inside -->",
482
+ "<!--[if IE]><![endif]-->",
471
483
  ];
472
484
 
473
- specialComments.forEach(comment => {
485
+ specialComments.forEach((comment) => {
474
486
  const tokens = tokenize(comment);
475
487
  const commentToken = tokens[0]!;
476
488
 
@@ -478,7 +490,7 @@ describe('HTML Tokenizer', () => {
478
490
  });
479
491
  });
480
492
 
481
- it('should handle mixed content with all token types (HTML5 mode)', () => {
493
+ it("should handle mixed content with all token types (HTML5 mode)", () => {
482
494
  const html = `
483
495
  <!DOCTYPE html>
484
496
  <!-- Main document -->
@@ -503,10 +515,10 @@ describe('HTML Tokenizer', () => {
503
515
  [TokenType.TAG_OPEN]: 0,
504
516
  [TokenType.TAG_CLOSE]: 0,
505
517
  [TokenType.TEXT]: 0,
506
- [TokenType.EOF]: 0
518
+ [TokenType.EOF]: 0,
507
519
  };
508
520
 
509
- tokens.forEach(token => {
521
+ tokens.forEach((token) => {
510
522
  if (token.type in tokenCounts) {
511
523
  tokenCounts[token.type]++;
512
524
  }
@@ -519,16 +531,15 @@ describe('HTML Tokenizer', () => {
519
531
  expect(tokenCounts[TokenType.TEXT]).toBeGreaterThan(0);
520
532
  expect(tokenCounts[TokenType.EOF]).toBe(1);
521
533
  });
522
- })
523
-
524
- describe('Performance and Stress Tests', () => {
525
- it('should handle very large documents', () => {
534
+ });
526
535
 
527
- let html = '<div>';
536
+ describe("Performance and Stress Tests", () => {
537
+ it("should handle very large documents", () => {
538
+ let html = "<div>";
528
539
  for (let i = 0; i < 1000; i++) {
529
540
  html += `<p id="para-${i}" class="paragraph">Paragraph ${i} content</p>`;
530
541
  }
531
- html += '</div>';
542
+ html += "</div>";
532
543
 
533
544
  const startTime = Date.now();
534
545
  const tokens = tokenize(html);
@@ -538,16 +549,16 @@ describe('HTML Tokenizer', () => {
538
549
  expect(endTime - startTime).toBeLessThan(1000);
539
550
  });
540
551
 
541
- it('should handle deeply nested structures', () => {
542
- let html = '';
552
+ it("should handle deeply nested structures", () => {
553
+ let html = "";
543
554
  const depth = 100;
544
555
 
545
556
  for (let i = 0; i < depth; i++) {
546
557
  html += `<div level="${i}">`;
547
558
  }
548
- html += 'Content';
559
+ html += "Content";
549
560
  for (let i = 0; i < depth; i++) {
550
- html += '</div>';
561
+ html += "</div>";
551
562
  }
552
563
 
553
564
  const tokens = tokenize(html);
@@ -555,23 +566,23 @@ describe('HTML Tokenizer', () => {
555
566
  expect(tokens.length).toBe(depth * 2 + 2);
556
567
  });
557
568
 
558
- it('should handle many attributes per element', () => {
559
- let html = '<div';
569
+ it("should handle many attributes per element", () => {
570
+ let html = "<div";
560
571
  for (let i = 0; i < 100; i++) {
561
572
  html += ` attr-${i}="value-${i}"`;
562
573
  }
563
- html += '>';
574
+ html += ">";
564
575
 
565
576
  const tokens = tokenize(html);
566
577
  const divTag = tokens[0]!;
567
578
 
568
579
  expect(Object.keys(divTag.attributes!).length).toBe(100);
569
- expect(divTag.attributes!['attr-50']).toBe('value-50');
580
+ expect(divTag.attributes!["attr-50"]).toBe("value-50");
570
581
  });
571
- })
582
+ });
572
583
 
573
- describe('Real-world Scenarios', () => {
574
- it('should handle SVG elements', () => {
584
+ describe("Real-world Scenarios", () => {
585
+ it("should handle SVG elements", () => {
575
586
  const svg = `
576
587
  <svg width="100" height="100" xmlns="http://www.w3.org/2000/svg">
577
588
  <circle cx="50" cy="50" r="40" stroke="black" stroke-width="3" fill="red"/>
@@ -581,15 +592,15 @@ describe('HTML Tokenizer', () => {
581
592
 
582
593
  const tokens = tokenize(svg);
583
594
 
584
- const svgTag = tokens.find(token => token.value === 'svg')!;
585
- expect(svgTag.attributes!.xmlns).toBe('http://www.w3.org/2000/svg');
595
+ const svgTag = tokens.find((token) => token.value === "svg")!;
596
+ expect(svgTag.attributes!.xmlns).toBe("http://www.w3.org/2000/svg");
586
597
 
587
- const circleTag = tokens.find(token => token.value === 'circle')!;
598
+ const circleTag = tokens.find((token) => token.value === "circle")!;
588
599
  expect(circleTag.isSelfClosing).toBe(true);
589
- expect(circleTag.attributes!.fill).toBe('red');
600
+ expect(circleTag.attributes!.fill).toBe("red");
590
601
  });
591
602
 
592
- it('should handle script and style tags', () => {
603
+ it("should handle script and style tags", () => {
593
604
  const html = `
594
605
  <script type="text/javascript">
595
606
  function hello() {
@@ -604,14 +615,14 @@ describe('HTML Tokenizer', () => {
604
615
 
605
616
  const tokens = tokenize(html);
606
617
 
607
- const scriptTag = tokens.find(token => token.value === 'script')!;
608
- const styleTag = tokens.find(token => token.value === 'style')!;
618
+ const scriptTag = tokens.find((token) => token.value === "script")!;
619
+ const styleTag = tokens.find((token) => token.value === "style")!;
609
620
 
610
- expect(scriptTag.attributes!.type).toBe('text/javascript');
611
- expect(styleTag.attributes!.type).toBe('text/css');
621
+ expect(scriptTag.attributes!.type).toBe("text/javascript");
622
+ expect(styleTag.attributes!.type).toBe("text/css");
612
623
  });
613
624
 
614
- it('should handle form elements with complex attributes', () => {
625
+ it("should handle form elements with complex attributes", () => {
615
626
  const html = `
616
627
  <form method="POST" action="/submit" enctype="multipart/form-data">
617
628
  <input type="email" name="email" required pattern="[a-z0-9._%+-]+@[a-z0-9.-]+\\.[a-z]{2,}$" title="Please enter a valid email">
@@ -624,17 +635,17 @@ describe('HTML Tokenizer', () => {
624
635
 
625
636
  const tokens = tokenize(html);
626
637
 
627
- const inputTag = tokens.find(token => token.value === 'input')!;
628
- expect(inputTag.attributes!.pattern).toContain('@');
629
- expect(inputTag.attributes!.required).toBe('');
638
+ const inputTag = tokens.find((token) => token.value === "input")!;
639
+ expect(inputTag.attributes!.pattern).toContain("@");
640
+ expect(inputTag.attributes!.required).toBe("");
630
641
 
631
- const selectTag = tokens.find(token => token.value === 'select')!;
632
- expect(selectTag.attributes!.multiple).toBe('');
642
+ const selectTag = tokens.find((token) => token.value === "select")!;
643
+ expect(selectTag.attributes!.multiple).toBe("");
633
644
  });
634
- })
645
+ });
635
646
 
636
- describe('Error Recovery', () => {
637
- it('should handle incomplete tags gracefully', () => {
647
+ describe("Error Recovery", () => {
648
+ it("should handle incomplete tags gracefully", () => {
638
649
  const malformedHTML = '<div class="test><p>Content</p>';
639
650
  const tokens = tokenize(malformedHTML);
640
651
 
@@ -642,104 +653,105 @@ describe('HTML Tokenizer', () => {
642
653
  expect(tokens[tokens.length - 1]!.type).toBe(TokenType.EOF);
643
654
  });
644
655
 
645
- it('should handle unmatched quotes in attributes', () => {
656
+ it("should handle unmatched quotes in attributes", () => {
646
657
  const html = '<div class="test id=\'main">Content</div>';
647
658
  const tokens = tokenize(html);
648
659
 
649
- const divTag = tokens.find(token => token.value === 'div')!;
660
+ const divTag = tokens.find((token) => token.value === "div")!;
650
661
  expect(divTag).toBeDefined();
651
662
  });
652
663
 
653
- it('should continue parsing after errors', () => {
654
- const html = '<div><p>Valid paragraph</p><span>Valid span</span>';
664
+ it("should continue parsing after errors", () => {
665
+ const html = "<div><p>Valid paragraph</p><span>Valid span</span>";
655
666
  const tokens = tokenize(html);
656
667
 
657
- const hasValidElements = tokens.some(token => token.value === 'p') ||
658
- tokens.some(token => token.value === 'span');
668
+ const hasValidElements =
669
+ tokens.some((token) => token.value === "p") ||
670
+ tokens.some((token) => token.value === "span");
659
671
  expect(hasValidElements).toBe(true);
660
672
  });
661
673
 
662
- it('should handle empty angle brackets <>', () => {
663
- const html = '<>text<div>content</div>';
674
+ it("should handle empty angle brackets <>", () => {
675
+ const html = "<>text<div>content</div>";
664
676
  const tokens = tokenize(html);
665
677
 
666
678
  // Should skip the invalid <> and continue parsing
667
679
  expect(tokens[tokens.length - 1]!.type).toBe(TokenType.EOF);
668
- const divToken = tokens.find(t => t.value === 'div');
680
+ const divToken = tokens.find((t) => t.value === "div");
669
681
  expect(divToken).toBeDefined();
670
682
  });
671
683
 
672
- it('should handle angle bracket with only space < >', () => {
673
- const html = '< >text<p>paragraph</p>';
684
+ it("should handle angle bracket with only space < >", () => {
685
+ const html = "< >text<p>paragraph</p>";
674
686
  const tokens = tokenize(html);
675
687
 
676
688
  expect(tokens[tokens.length - 1]!.type).toBe(TokenType.EOF);
677
- const pToken = tokens.find(t => t.value === 'p');
689
+ const pToken = tokens.find((t) => t.value === "p");
678
690
  expect(pToken).toBeDefined();
679
691
  });
680
692
 
681
- it('should handle tag with no valid name', () => {
682
- const html = '<123>text</123><div>ok</div>';
693
+ it("should handle tag with no valid name", () => {
694
+ const html = "<123>text</123><div>ok</div>";
683
695
  const tokens = tokenize(html);
684
696
 
685
697
  // Tags starting with numbers are invalid, should be treated as text
686
698
  expect(tokens[tokens.length - 1]!.type).toBe(TokenType.EOF);
687
- const divToken = tokens.find(t => t.value === 'div');
699
+ const divToken = tokens.find((t) => t.value === "div");
688
700
  expect(divToken).toBeDefined();
689
701
  });
690
702
  });
691
703
 
692
- describe('Entity Edge Cases', () => {
693
- it('should handle entity without semicolon with valid prefix', () => {
704
+ describe("Entity Edge Cases", () => {
705
+ it("should handle entity without semicolon with valid prefix", () => {
694
706
  // &nbsp followed by other text (no semicolon) should decode &nbsp
695
- const tokens = tokenize('<div>&nbsptext</div>');
696
-
697
- const textToken = tokens.find(t => t.type === TokenType.TEXT);
707
+ const tokens = tokenize("<div>&nbsptext</div>");
708
+
709
+ const textToken = tokens.find((t) => t.type === TokenType.TEXT);
698
710
  expect(textToken).toBeDefined();
699
711
  // Should decode &nbsp (non-breaking space) and keep "text"
700
- expect(textToken!.value).toContain('text');
712
+ expect(textToken!.value).toContain("text");
701
713
  });
702
714
 
703
- it('should handle entity without semicolon - lt prefix', () => {
704
- const tokens = tokenize('<div>&ltvalue</div>');
705
-
706
- const textToken = tokens.find(t => t.type === TokenType.TEXT);
715
+ it("should handle entity without semicolon - lt prefix", () => {
716
+ const tokens = tokenize("<div>&ltvalue</div>");
717
+
718
+ const textToken = tokens.find((t) => t.type === TokenType.TEXT);
707
719
  expect(textToken).toBeDefined();
708
- expect(textToken!.value).toBe('&ltvalue');
720
+ expect(textToken!.value).toBe("&ltvalue");
709
721
  });
710
722
 
711
- it('should handle entity without semicolon - gt prefix', () => {
712
- const tokens = tokenize('<div>&gtvalue</div>');
713
-
714
- const textToken = tokens.find(t => t.type === TokenType.TEXT);
723
+ it("should handle entity without semicolon - gt prefix", () => {
724
+ const tokens = tokenize("<div>&gtvalue</div>");
725
+
726
+ const textToken = tokens.find((t) => t.type === TokenType.TEXT);
715
727
  expect(textToken).toBeDefined();
716
- expect(textToken!.value).toBe('&gtvalue');
728
+ expect(textToken!.value).toBe("&gtvalue");
717
729
  });
718
730
 
719
- it('should handle entity without semicolon - amp prefix', () => {
720
- const tokens = tokenize('<div>&ampvalue</div>');
721
-
722
- const textToken = tokens.find(t => t.type === TokenType.TEXT);
731
+ it("should handle entity without semicolon - amp prefix", () => {
732
+ const tokens = tokenize("<div>&ampvalue</div>");
733
+
734
+ const textToken = tokens.find((t) => t.type === TokenType.TEXT);
723
735
  expect(textToken).toBeDefined();
724
- expect(textToken!.value).toBe('&ampvalue');
736
+ expect(textToken!.value).toBe("&ampvalue");
725
737
  });
726
738
 
727
- it('should handle unknown entity gracefully', () => {
728
- const tokens = tokenize('<div>&unknownentity;</div>');
729
-
730
- const textToken = tokens.find(t => t.type === TokenType.TEXT);
739
+ it("should handle unknown entity gracefully", () => {
740
+ const tokens = tokenize("<div>&unknownentity;</div>");
741
+
742
+ const textToken = tokens.find((t) => t.type === TokenType.TEXT);
731
743
  expect(textToken).toBeDefined();
732
744
  // Unknown entity should be kept as-is
733
- expect(textToken!.value).toBe('&unknownentity;');
745
+ expect(textToken!.value).toBe("&unknownentity;");
734
746
  });
735
747
 
736
- it('should handle partial entity name with no matching prefix', () => {
737
- const tokens = tokenize('<div>&xyz</div>');
738
-
739
- const textToken = tokens.find(t => t.type === TokenType.TEXT);
748
+ it("should handle partial entity name with no matching prefix", () => {
749
+ const tokens = tokenize("<div>&xyz</div>");
750
+
751
+ const textToken = tokens.find((t) => t.type === TokenType.TEXT);
740
752
  expect(textToken).toBeDefined();
741
753
  // No valid entity prefix, keep as-is
742
- expect(textToken!.value).toBe('&xyz');
754
+ expect(textToken!.value).toBe("&xyz");
743
755
  });
744
- })
756
+ });
745
757
  });