@tkeron/html-parser 1.1.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tkeron/html-parser",
3
- "version": "1.1.1",
3
+ "version": "1.1.2",
4
4
  "description": "A fast and lightweight HTML parser for Bun",
5
5
  "main": "index.js",
6
6
  "module": "index.ts",
@@ -856,7 +856,7 @@ function updateElementContent(element: any): void {
856
856
  const innerHTML = element.childNodes
857
857
  .map((child: any) => {
858
858
  if (child.nodeType === NodeType.TEXT_NODE) {
859
- return child.textContent;
859
+ return escapeTextContent(child.textContent || "");
860
860
  } else if (child.nodeType === NodeType.ELEMENT_NODE) {
861
861
  return child.outerHTML;
862
862
  } else if (child.nodeType === NodeType.COMMENT_NODE) {
@@ -1153,7 +1153,7 @@ export function getInnerHTML(element: any): string {
1153
1153
  if (child.nodeType === NodeType.ELEMENT_NODE) {
1154
1154
  innerHTML += child.outerHTML;
1155
1155
  } else if (child.nodeType === NodeType.TEXT_NODE) {
1156
- innerHTML += child.textContent || "";
1156
+ innerHTML += escapeTextContent(child.textContent || "");
1157
1157
  } else if (child.nodeType === NodeType.COMMENT_NODE) {
1158
1158
  innerHTML += `<!--${child.data || ""}-->`;
1159
1159
  }
@@ -3,31 +3,35 @@ import { parseHTML } from "../src/index";
3
3
 
4
4
  describe("Edge Cases: Scripts with HTML inside", () => {
5
5
  it("should handle script with less-than and greater-than operators", () => {
6
- const doc = parseHTML('<script>if (a < b && c > d) { console.log("ok"); }</script>');
7
- const script = doc.querySelector('script');
6
+ const doc = parseHTML(
7
+ '<script>if (a < b && c > d) { console.log("ok"); }</script>',
8
+ );
9
+ const script = doc.querySelector("script");
8
10
  expect(script).toBeTruthy();
9
- expect(script!.textContent).toContain('a < b');
10
- expect(script!.textContent).toContain('c > d');
11
+ expect(script!.textContent).toContain("a < b");
12
+ expect(script!.textContent).toContain("c > d");
11
13
  });
12
14
 
13
15
  it("should handle script containing </script> as a string", () => {
14
16
  const doc = parseHTML('<script>var html = "<\\/script>";</script>');
15
- const script = doc.querySelector('script');
17
+ const script = doc.querySelector("script");
16
18
  expect(script).toBeTruthy();
17
19
  });
18
20
 
19
21
  it("should handle script with HTML-like content in strings", () => {
20
- const doc = parseHTML('<script>var html = "<div class=\\"test\\">Hello</div>";</script>');
21
- const script = doc.querySelector('script');
22
+ const doc = parseHTML(
23
+ '<script>var html = "<div class=\\"test\\">Hello</div>";</script>',
24
+ );
25
+ const script = doc.querySelector("script");
22
26
  expect(script).toBeTruthy();
23
- expect(script!.textContent).toContain('<div');
27
+ expect(script!.textContent).toContain("<div");
24
28
  });
25
29
 
26
30
  it("should handle script with template literals containing HTML", () => {
27
- const doc = parseHTML('<script>const tpl = `<div>${name}</div>`;</script>');
28
- const script = doc.querySelector('script');
31
+ const doc = parseHTML("<script>const tpl = `<div>${name}</div>`;</script>");
32
+ const script = doc.querySelector("script");
29
33
  expect(script).toBeTruthy();
30
- expect(script!.textContent).toContain('<div>');
34
+ expect(script!.textContent).toContain("<div>");
31
35
  });
32
36
 
33
37
  it("should handle multiple scripts with complex content", () => {
@@ -36,84 +40,86 @@ describe("Edge Cases: Scripts with HTML inside", () => {
36
40
  <script>var b = 3 > 1;</script>
37
41
  `;
38
42
  const doc = parseHTML(html);
39
- const scripts = doc.querySelectorAll('script');
43
+ const scripts = doc.querySelectorAll("script");
40
44
  expect(scripts.length).toBe(2);
41
45
  });
42
46
  });
43
47
 
44
48
  describe("Edge Cases: Template placeholders", () => {
45
49
  it("should preserve template placeholders", () => {
46
- const doc = parseHTML('<div>{{user.name}}</div>');
47
- const div = doc.querySelector('div');
48
- expect(div!.textContent).toBe('{{user.name}}');
50
+ const doc = parseHTML("<div>{{user.name}}</div>");
51
+ const div = doc.querySelector("div");
52
+ expect(div!.textContent).toBe("{{user.name}}");
49
53
  });
50
54
 
51
55
  it("should preserve placeholders in attributes", () => {
52
56
  const doc = parseHTML('<img src="{{imageUrl}}">');
53
- const img = doc.querySelector('img');
54
- expect(img!.getAttribute('src')).toBe('{{imageUrl}}');
57
+ const img = doc.querySelector("img");
58
+ expect(img!.getAttribute("src")).toBe("{{imageUrl}}");
55
59
  });
56
60
 
57
61
  it("should preserve custom attribute prefixes", () => {
58
- const doc = parseHTML('<div data-bind="value" x-on:click="handler" custom-attr="test"></div>');
59
- const div = doc.querySelector('div');
60
- expect(div!.getAttribute('data-bind')).toBe('value');
61
- expect(div!.getAttribute('x-on:click')).toBe('handler');
62
- expect(div!.getAttribute('custom-attr')).toBe('test');
62
+ const doc = parseHTML(
63
+ '<div data-bind="value" x-on:click="handler" custom-attr="test"></div>',
64
+ );
65
+ const div = doc.querySelector("div");
66
+ expect(div!.getAttribute("data-bind")).toBe("value");
67
+ expect(div!.getAttribute("x-on:click")).toBe("handler");
68
+ expect(div!.getAttribute("custom-attr")).toBe("test");
63
69
  });
64
70
 
65
71
  it("should preserve EJS/ERB style placeholders", () => {
66
- const doc = parseHTML('<div><%= user.name %></div>');
67
- const div = doc.querySelector('div');
68
- expect(div!.textContent).toBe('<%= user.name %>');
72
+ const doc = parseHTML("<div><%= user.name %></div>");
73
+ const div = doc.querySelector("div");
74
+ expect(div!.textContent).toBe("<%= user.name %>");
69
75
  });
70
76
 
71
77
  it("should handle nested template expressions", () => {
72
- const doc = parseHTML('<div>{{#each items}}{{this}}{{/each}}</div>');
73
- const div = doc.querySelector('div');
74
- expect(div!.textContent).toContain('{{#each items}}');
75
- expect(div!.textContent).toContain('{{/each}}');
78
+ const doc = parseHTML("<div>{{#each items}}{{this}}{{/each}}</div>");
79
+ const div = doc.querySelector("div");
80
+ expect(div!.textContent).toContain("{{#each items}}");
81
+ expect(div!.textContent).toContain("{{/each}}");
76
82
  });
77
83
  });
78
84
 
79
85
  describe("Edge Cases: Malformed but common HTML", () => {
80
86
  it("should handle unclosed paragraph tags", () => {
81
- const doc = parseHTML('<p>Párrafo 1<p>Párrafo 2');
82
- const paragraphs = doc.querySelectorAll('p');
87
+ const doc = parseHTML("<p>Párrafo 1<p>Párrafo 2");
88
+ const paragraphs = doc.querySelectorAll("p");
83
89
  expect(paragraphs.length).toBe(2);
84
90
  });
85
91
 
86
92
  it("should handle list items without parent list", () => {
87
- const doc = parseHTML('<li>Item 1</li><li>Item 2</li>');
88
- const items = doc.querySelectorAll('li');
93
+ const doc = parseHTML("<li>Item 1</li><li>Item 2</li>");
94
+ const items = doc.querySelectorAll("li");
89
95
  expect(items.length).toBe(2);
90
96
  });
91
97
 
92
98
  it("should handle nested unclosed tags", () => {
93
- const doc = parseHTML('<div><span>Text<div>Nested</div></span></div>');
94
- const div = doc.querySelector('div');
99
+ const doc = parseHTML("<div><span>Text<div>Nested</div></span></div>");
100
+ const div = doc.querySelector("div");
95
101
  expect(div).toBeTruthy();
96
102
  });
97
103
 
98
104
  it("should handle missing closing tags at end", () => {
99
- const doc = parseHTML('<div><span>Text');
100
- const div = doc.querySelector('div');
101
- const span = doc.querySelector('span');
105
+ const doc = parseHTML("<div><span>Text");
106
+ const div = doc.querySelector("div");
107
+ const span = doc.querySelector("span");
102
108
  expect(div).toBeTruthy();
103
109
  expect(span).toBeTruthy();
104
110
  });
105
111
 
106
112
  it("should handle extra closing tags", () => {
107
- const doc = parseHTML('<div>Text</div></div></span>');
108
- const div = doc.querySelector('div');
113
+ const doc = parseHTML("<div>Text</div></div></span>");
114
+ const div = doc.querySelector("div");
109
115
  expect(div).toBeTruthy();
110
- expect(div!.textContent).toBe('Text');
116
+ expect(div!.textContent).toBe("Text");
111
117
  });
112
118
 
113
119
  it("should handle incorrectly nested tags", () => {
114
- const doc = parseHTML('<b><i>Bold and italic</b></i>');
115
- const b = doc.querySelector('b');
116
- const i = doc.querySelector('i');
120
+ const doc = parseHTML("<b><i>Bold and italic</b></i>");
121
+ const b = doc.querySelector("b");
122
+ const i = doc.querySelector("i");
117
123
  expect(b).toBeTruthy();
118
124
  expect(i).toBeTruthy();
119
125
  });
@@ -121,97 +127,105 @@ describe("Edge Cases: Malformed but common HTML", () => {
121
127
 
122
128
  describe("Edge Cases: Significant whitespace", () => {
123
129
  it("should preserve whitespace in pre tags", () => {
124
- const doc = parseHTML('<pre> multiple spaces here </pre>');
125
- const pre = doc.querySelector('pre');
126
- expect(pre!.textContent).toBe(' multiple spaces here ');
130
+ const doc = parseHTML("<pre> multiple spaces here </pre>");
131
+ const pre = doc.querySelector("pre");
132
+ expect(pre!.textContent).toBe(" multiple spaces here ");
127
133
  });
128
134
 
129
135
  it("should preserve newlines in pre tags", () => {
130
- const doc = parseHTML('<pre>line1\nline2\nline3</pre>');
131
- const pre = doc.querySelector('pre');
132
- expect(pre!.textContent).toContain('\n');
136
+ const doc = parseHTML("<pre>line1\nline2\nline3</pre>");
137
+ const pre = doc.querySelector("pre");
138
+ expect(pre!.textContent).toContain("\n");
133
139
  });
134
140
 
135
141
  it("should preserve whitespace in code tags", () => {
136
- const doc = parseHTML('<code>function() { }</code>');
137
- const code = doc.querySelector('code');
138
- expect(code!.textContent).toBe('function() { }');
142
+ const doc = parseHTML("<code>function() { }</code>");
143
+ const code = doc.querySelector("code");
144
+ expect(code!.textContent).toBe("function() { }");
139
145
  });
140
146
 
141
147
  it("should preserve whitespace in textarea", () => {
142
- const doc = parseHTML('<textarea> indented\n more indented </textarea>');
143
- const textarea = doc.querySelector('textarea');
144
- expect(textarea!.textContent).toContain(' indented');
148
+ const doc = parseHTML(
149
+ "<textarea> indented\n more indented </textarea>",
150
+ );
151
+ const textarea = doc.querySelector("textarea");
152
+ expect(textarea!.textContent).toContain(" indented");
145
153
  });
146
154
 
147
155
  it("should handle tabs in pre", () => {
148
- const doc = parseHTML('<pre>\ttab\t\ttabs</pre>');
149
- const pre = doc.querySelector('pre');
150
- expect(pre!.textContent).toBe('\ttab\t\ttabs');
156
+ const doc = parseHTML("<pre>\ttab\t\ttabs</pre>");
157
+ const pre = doc.querySelector("pre");
158
+ expect(pre!.textContent).toBe("\ttab\t\ttabs");
151
159
  });
152
160
  });
153
161
 
154
162
  describe("Edge Cases: Special characters in attributes", () => {
155
163
  it("should handle JSON in data attributes", () => {
156
- const doc = parseHTML('<div data-json=\'{"key": "value", "num": 123}\'></div>');
157
- const div = doc.querySelector('div');
158
- const json = div!.getAttribute('data-json');
164
+ const doc = parseHTML(
165
+ '<div data-json=\'{"key": "value", "num": 123}\'></div>',
166
+ );
167
+ const div = doc.querySelector("div");
168
+ const json = div!.getAttribute("data-json");
159
169
  expect(json).toBe('{"key": "value", "num": 123}');
160
170
  });
161
171
 
162
172
  it("should handle double quotes inside single-quoted attributes", () => {
163
173
  const doc = parseHTML("<div title='He said \"Hello\"'></div>");
164
- const div = doc.querySelector('div');
165
- expect(div!.getAttribute('title')).toBe('He said "Hello"');
174
+ const div = doc.querySelector("div");
175
+ expect(div!.getAttribute("title")).toBe('He said "Hello"');
166
176
  });
167
177
 
168
178
  it("should handle single quotes inside double-quoted attributes", () => {
169
179
  const doc = parseHTML('<div title="It\'s working"></div>');
170
- const div = doc.querySelector('div');
171
- expect(div!.getAttribute('title')).toBe("It's working");
180
+ const div = doc.querySelector("div");
181
+ expect(div!.getAttribute("title")).toBe("It's working");
172
182
  });
173
183
 
174
184
  it("should handle HTML entities in attributes", () => {
175
185
  const doc = parseHTML('<div title="&lt;html&gt;"></div>');
176
- const div = doc.querySelector('div');
186
+ const div = doc.querySelector("div");
177
187
  // Depending on parser behavior, entities may or may not be decoded
178
- const title = div!.getAttribute('title');
179
- expect(title === '&lt;html&gt;' || title === '<html>').toBe(true);
188
+ const title = div!.getAttribute("title");
189
+ expect(title === "&lt;html&gt;" || title === "<html>").toBe(true);
180
190
  });
181
191
 
182
192
  it("should handle unicode characters in attributes", () => {
183
193
  const doc = parseHTML('<div title="Hello 世界 🌍"></div>');
184
- const div = doc.querySelector('div');
185
- expect(div!.getAttribute('title')).toBe('Hello 世界 🌍');
194
+ const div = doc.querySelector("div");
195
+ expect(div!.getAttribute("title")).toBe("Hello 世界 🌍");
186
196
  });
187
197
 
188
198
  it("should handle newlines in attributes", () => {
189
199
  const doc = parseHTML('<div title="line1\nline2"></div>');
190
- const div = doc.querySelector('div');
191
- expect(div!.getAttribute('title')).toContain('line1');
200
+ const div = doc.querySelector("div");
201
+ expect(div!.getAttribute("title")).toContain("line1");
192
202
  });
193
203
 
194
204
  it("should handle equals signs in attribute values", () => {
195
205
  const doc = parseHTML('<div data-equation="a=b+c"></div>');
196
- const div = doc.querySelector('div');
197
- expect(div!.getAttribute('data-equation')).toBe('a=b+c');
206
+ const div = doc.querySelector("div");
207
+ expect(div!.getAttribute("data-equation")).toBe("a=b+c");
198
208
  });
199
209
  });
200
210
 
201
211
  describe("Edge Cases: SVG and MathML inline", () => {
202
212
  it("should parse inline SVG", () => {
203
- const doc = parseHTML('<svg width="100" height="100"><circle cx="50" cy="50" r="40"/></svg>');
204
- const svg = doc.querySelector('svg');
213
+ const doc = parseHTML(
214
+ '<svg width="100" height="100"><circle cx="50" cy="50" r="40"/></svg>',
215
+ );
216
+ const svg = doc.querySelector("svg");
205
217
  expect(svg).toBeTruthy();
206
- expect(svg!.getAttribute('width')).toBe('100');
218
+ expect(svg!.getAttribute("width")).toBe("100");
207
219
  });
208
220
 
209
221
  it("should parse SVG with nested elements", () => {
210
- const doc = parseHTML('<svg><g><rect width="10" height="10"/><text>Hello</text></g></svg>');
211
- const svg = doc.querySelector('svg');
212
- const g = doc.querySelector('g');
213
- const rect = doc.querySelector('rect');
214
- const text = doc.querySelector('text');
222
+ const doc = parseHTML(
223
+ '<svg><g><rect width="10" height="10"/><text>Hello</text></g></svg>',
224
+ );
225
+ const svg = doc.querySelector("svg");
226
+ const g = doc.querySelector("g");
227
+ const rect = doc.querySelector("rect");
228
+ const text = doc.querySelector("text");
215
229
  expect(svg).toBeTruthy();
216
230
  expect(g).toBeTruthy();
217
231
  expect(rect).toBeTruthy();
@@ -219,25 +233,27 @@ describe("Edge Cases: SVG and MathML inline", () => {
219
233
  });
220
234
 
221
235
  it("should handle foreignObject with HTML inside SVG", () => {
222
- const doc = parseHTML('<svg><foreignObject><div>HTML inside SVG</div></foreignObject></svg>');
223
- const svg = doc.querySelector('svg');
224
- const fo = doc.querySelector('foreignObject');
225
- const div = doc.querySelector('div');
236
+ const doc = parseHTML(
237
+ "<svg><foreignObject><div>HTML inside SVG</div></foreignObject></svg>",
238
+ );
239
+ const svg = doc.querySelector("svg");
240
+ const fo = doc.querySelector("foreignObject");
241
+ const div = doc.querySelector("div");
226
242
  expect(svg).toBeTruthy();
227
243
  expect(fo).toBeTruthy();
228
244
  expect(div).toBeTruthy();
229
245
  });
230
246
 
231
247
  it("should parse inline MathML", () => {
232
- const doc = parseHTML('<math><mi>x</mi><mo>=</mo><mn>2</mn></math>');
233
- const math = doc.querySelector('math');
248
+ const doc = parseHTML("<math><mi>x</mi><mo>=</mo><mn>2</mn></math>");
249
+ const math = doc.querySelector("math");
234
250
  expect(math).toBeTruthy();
235
251
  });
236
252
 
237
253
  it("should handle SVG with CDATA-like content in style", () => {
238
- const doc = parseHTML('<svg><style>/* styles */</style></svg>');
239
- const svg = doc.querySelector('svg');
240
- const style = doc.querySelector('style');
254
+ const doc = parseHTML("<svg><style>/* styles */</style></svg>");
255
+ const svg = doc.querySelector("svg");
256
+ const style = doc.querySelector("style");
241
257
  expect(svg).toBeTruthy();
242
258
  expect(style).toBeTruthy();
243
259
  });
@@ -245,20 +261,22 @@ describe("Edge Cases: SVG and MathML inline", () => {
245
261
 
246
262
  describe("Edge Cases: CDATA sections", () => {
247
263
  it("should handle CDATA in script", () => {
248
- const doc = parseHTML('<script>//<![CDATA[\nvar x = 1;\n//]]></script>');
249
- const script = doc.querySelector('script');
264
+ const doc = parseHTML("<script>//<![CDATA[\nvar x = 1;\n//]]></script>");
265
+ const script = doc.querySelector("script");
250
266
  expect(script).toBeTruthy();
251
267
  });
252
268
 
253
269
  it("should handle CDATA in style", () => {
254
- const doc = parseHTML('<style>/*<![CDATA[*/ body { color: red; } /*]]>*/</style>');
255
- const style = doc.querySelector('style');
270
+ const doc = parseHTML(
271
+ "<style>/*<![CDATA[*/ body { color: red; } /*]]>*/</style>",
272
+ );
273
+ const style = doc.querySelector("style");
256
274
  expect(style).toBeTruthy();
257
275
  });
258
276
 
259
277
  it("should handle XML CDATA sections", () => {
260
- const doc = parseHTML('<div><![CDATA[Some <special> content]]></div>');
261
- const div = doc.querySelector('div');
278
+ const doc = parseHTML("<div><![CDATA[Some <special> content]]></div>");
279
+ const div = doc.querySelector("div");
262
280
  expect(div).toBeTruthy();
263
281
  });
264
282
  });
@@ -271,187 +289,295 @@ describe("Edge Cases: IE Conditional Comments", () => {
271
289
  });
272
290
 
273
291
  it("should handle IE conditional with version", () => {
274
- const doc = parseHTML('<!--[if lt IE 9]><script src="html5shiv.js"></script><![endif]-->');
292
+ const doc = parseHTML(
293
+ '<!--[if lt IE 9]><script src="html5shiv.js"></script><![endif]-->',
294
+ );
275
295
  expect(doc).toBeTruthy();
276
296
  });
277
297
 
278
298
  it("should handle downlevel-hidden conditional", () => {
279
- const doc = parseHTML('<!--[if !IE]>--><link href="modern.css"><!--<![endif]-->');
299
+ const doc = parseHTML(
300
+ '<!--[if !IE]>--><link href="modern.css"><!--<![endif]-->',
301
+ );
280
302
  expect(doc).toBeTruthy();
281
303
  });
282
304
 
283
305
  it("should preserve content around conditional comments", () => {
284
- const doc = parseHTML('<div>Before</div><!--[if IE]>IE only<![endif]--><div>After</div>');
285
- const divs = doc.querySelectorAll('div');
306
+ const doc = parseHTML(
307
+ "<div>Before</div><!--[if IE]>IE only<![endif]--><div>After</div>",
308
+ );
309
+ const divs = doc.querySelectorAll("div");
286
310
  expect(divs.length).toBe(2);
287
- expect(divs[0].textContent).toBe('Before');
288
- expect(divs[1].textContent).toBe('After');
311
+ expect(divs[0].textContent).toBe("Before");
312
+ expect(divs[1].textContent).toBe("After");
289
313
  });
290
314
  });
291
315
 
292
316
  describe("Edge Cases: innerHTML on special elements", () => {
293
317
  it("should handle innerHTML on table with tr/td", () => {
294
- const doc = parseHTML('<table></table>');
295
- const table = doc.querySelector('table');
296
- table!.innerHTML = '<tr><td>Cell 1</td><td>Cell 2</td></tr>';
318
+ const doc = parseHTML("<table></table>");
319
+ const table = doc.querySelector("table");
320
+ table!.innerHTML = "<tr><td>Cell 1</td><td>Cell 2</td></tr>";
297
321
  // Browser auto-wraps in tbody, parser behavior may vary
298
- const tr = table!.querySelector('tr');
299
- const tds = table!.querySelectorAll('td');
322
+ const tr = table!.querySelector("tr");
323
+ const tds = table!.querySelectorAll("td");
300
324
  expect(tr).toBeTruthy();
301
325
  expect(tds.length).toBe(2);
302
326
  });
303
327
 
304
328
  it("should handle innerHTML on select with options", () => {
305
- const doc = parseHTML('<select></select>');
306
- const select = doc.querySelector('select');
307
- select!.innerHTML = '<option value="1">One</option><option value="2">Two</option>';
308
- const options = select!.querySelectorAll('option');
329
+ const doc = parseHTML("<select></select>");
330
+ const select = doc.querySelector("select");
331
+ select!.innerHTML =
332
+ '<option value="1">One</option><option value="2">Two</option>';
333
+ const options = select!.querySelectorAll("option");
309
334
  expect(options.length).toBe(2);
310
335
  });
311
336
 
312
337
  it("should handle innerHTML on ul with li", () => {
313
- const doc = parseHTML('<ul></ul>');
314
- const ul = doc.querySelector('ul');
315
- ul!.innerHTML = '<li>Item 1</li><li>Item 2</li><li>Item 3</li>';
316
- const items = ul!.querySelectorAll('li');
338
+ const doc = parseHTML("<ul></ul>");
339
+ const ul = doc.querySelector("ul");
340
+ ul!.innerHTML = "<li>Item 1</li><li>Item 2</li><li>Item 3</li>";
341
+ const items = ul!.querySelectorAll("li");
317
342
  expect(items.length).toBe(3);
318
343
  });
319
344
 
320
345
  it("should handle innerHTML on template element", () => {
321
- const doc = parseHTML('<template></template>');
322
- const template = doc.querySelector('template');
323
- template!.innerHTML = '<div>Template content</div>';
346
+ const doc = parseHTML("<template></template>");
347
+ const template = doc.querySelector("template");
348
+ template!.innerHTML = "<div>Template content</div>";
324
349
  // Template behavior is special in browsers
325
350
  expect(template).toBeTruthy();
326
351
  });
327
352
 
328
353
  it("should handle innerHTML on colgroup", () => {
329
- const doc = parseHTML('<table><colgroup></colgroup></table>');
330
- const colgroup = doc.querySelector('colgroup');
354
+ const doc = parseHTML("<table><colgroup></colgroup></table>");
355
+ const colgroup = doc.querySelector("colgroup");
331
356
  colgroup!.innerHTML = '<col span="2"><col>';
332
- const cols = colgroup!.querySelectorAll('col');
357
+ const cols = colgroup!.querySelectorAll("col");
333
358
  expect(cols.length).toBe(2);
334
359
  });
335
360
 
336
361
  it("should handle innerHTML replacement multiple times on table", () => {
337
- const doc = parseHTML('<table></table>');
338
- const table = doc.querySelector('table');
339
-
340
- table!.innerHTML = '<tr><td>First</td></tr>';
341
- expect(table!.querySelector('td')!.textContent).toBe('First');
342
-
343
- table!.innerHTML = '<tr><td>Second</td></tr>';
344
- expect(table!.querySelector('td')!.textContent).toBe('Second');
362
+ const doc = parseHTML("<table></table>");
363
+ const table = doc.querySelector("table");
364
+
365
+ table!.innerHTML = "<tr><td>First</td></tr>";
366
+ expect(table!.querySelector("td")!.textContent).toBe("First");
367
+
368
+ table!.innerHTML = "<tr><td>Second</td></tr>";
369
+ expect(table!.querySelector("td")!.textContent).toBe("Second");
345
370
  });
346
371
  });
347
372
 
348
373
  describe("Edge Cases: Self-closing tags in HTML5", () => {
349
374
  it("should handle self-closing div (invalid in HTML5)", () => {
350
- const doc = parseHTML('<div/>');
351
- const div = doc.querySelector('div');
375
+ const doc = parseHTML("<div/>");
376
+ const div = doc.querySelector("div");
352
377
  expect(div).toBeTruthy();
353
378
  // In HTML5, <div/> is treated as <div> (not closed)
354
379
  });
355
380
 
356
381
  it("should handle self-closing span", () => {
357
- const doc = parseHTML('<span/>text');
358
- const span = doc.querySelector('span');
382
+ const doc = parseHTML("<span/>text");
383
+ const span = doc.querySelector("span");
359
384
  expect(span).toBeTruthy();
360
385
  });
361
386
 
362
387
  it("should handle valid self-closing void elements", () => {
363
- const doc = parseHTML('<br/><hr/><img/>');
364
- const br = doc.querySelector('br');
365
- const hr = doc.querySelector('hr');
366
- const img = doc.querySelector('img');
388
+ const doc = parseHTML("<br/><hr/><img/>");
389
+ const br = doc.querySelector("br");
390
+ const hr = doc.querySelector("hr");
391
+ const img = doc.querySelector("img");
367
392
  expect(br).toBeTruthy();
368
393
  expect(hr).toBeTruthy();
369
394
  expect(img).toBeTruthy();
370
395
  });
371
396
 
372
397
  it("should handle self-closing with space before slash", () => {
373
- const doc = parseHTML('<br />');
374
- const br = doc.querySelector('br');
398
+ const doc = parseHTML("<br />");
399
+ const br = doc.querySelector("br");
375
400
  expect(br).toBeTruthy();
376
401
  });
377
402
 
378
403
  it("should handle self-closing in XHTML style", () => {
379
404
  const doc = parseHTML('<input type="text" />');
380
- const input = doc.querySelector('input');
405
+ const input = doc.querySelector("input");
381
406
  expect(input).toBeTruthy();
382
- expect(input!.getAttribute('type')).toBe('text');
407
+ expect(input!.getAttribute("type")).toBe("text");
383
408
  });
384
409
 
385
410
  it("should handle mixed self-closing styles", () => {
386
- const doc = parseHTML('<div><br><br/><br /></div>');
387
- const brs = doc.querySelectorAll('br');
411
+ const doc = parseHTML("<div><br><br/><br /></div>");
412
+ const brs = doc.querySelectorAll("br");
388
413
  expect(brs.length).toBe(3);
389
414
  });
390
415
  });
391
416
 
392
417
  describe("Edge Cases: Additional common scenarios", () => {
393
418
  it("should handle empty attributes", () => {
394
- const doc = parseHTML('<input disabled readonly>');
395
- const input = doc.querySelector('input');
396
- expect(input!.hasAttribute('disabled')).toBe(true);
397
- expect(input!.hasAttribute('readonly')).toBe(true);
419
+ const doc = parseHTML("<input disabled readonly>");
420
+ const input = doc.querySelector("input");
421
+ expect(input!.hasAttribute("disabled")).toBe(true);
422
+ expect(input!.hasAttribute("readonly")).toBe(true);
398
423
  });
399
424
 
400
425
  it("should handle unquoted attribute values", () => {
401
- const doc = parseHTML('<div class=myclass id=myid></div>');
402
- const div = doc.querySelector('div');
403
- expect(div!.getAttribute('class')).toBe('myclass');
404
- expect(div!.getAttribute('id')).toBe('myid');
426
+ const doc = parseHTML("<div class=myclass id=myid></div>");
427
+ const div = doc.querySelector("div");
428
+ expect(div!.getAttribute("class")).toBe("myclass");
429
+ expect(div!.getAttribute("id")).toBe("myid");
405
430
  });
406
431
 
407
432
  it("should handle attributes with no value", () => {
408
- const doc = parseHTML('<option selected>Choice</option>');
409
- const option = doc.querySelector('option');
410
- expect(option!.hasAttribute('selected')).toBe(true);
433
+ const doc = parseHTML("<option selected>Choice</option>");
434
+ const option = doc.querySelector("option");
435
+ expect(option!.hasAttribute("selected")).toBe(true);
411
436
  });
412
437
 
413
438
  it("should handle doctype", () => {
414
- const doc = parseHTML('<!DOCTYPE html><html><body>Test</body></html>');
439
+ const doc = parseHTML("<!DOCTYPE html><html><body>Test</body></html>");
415
440
  expect(doc.body).toBeTruthy();
416
- expect(doc.body.textContent).toBe('Test');
441
+ expect(doc.body.textContent).toBe("Test");
417
442
  });
418
443
 
419
444
  it("should handle comments between tags", () => {
420
- const doc = parseHTML('<div><!-- comment --><span>Text</span></div>');
421
- const span = doc.querySelector('span');
445
+ const doc = parseHTML("<div><!-- comment --><span>Text</span></div>");
446
+ const span = doc.querySelector("span");
422
447
  expect(span).toBeTruthy();
423
- expect(span!.textContent).toBe('Text');
448
+ expect(span!.textContent).toBe("Text");
424
449
  });
425
450
 
426
451
  it("should handle deeply nested structure", () => {
427
- const doc = parseHTML('<div><div><div><div><div><span>Deep</span></div></div></div></div></div>');
428
- const span = doc.querySelector('span');
452
+ const doc = parseHTML(
453
+ "<div><div><div><div><div><span>Deep</span></div></div></div></div></div>",
454
+ );
455
+ const span = doc.querySelector("span");
429
456
  expect(span).toBeTruthy();
430
- expect(span!.textContent).toBe('Deep');
457
+ expect(span!.textContent).toBe("Deep");
431
458
  });
432
459
 
433
460
  it("should handle adjacent text nodes conceptually", () => {
434
- const doc = parseHTML('<div>Hello World</div>');
435
- const div = doc.querySelector('div');
436
- expect(div!.textContent).toBe('Hello World');
461
+ const doc = parseHTML("<div>Hello World</div>");
462
+ const div = doc.querySelector("div");
463
+ expect(div!.textContent).toBe("Hello World");
437
464
  });
438
465
 
439
466
  it("should handle style tags with CSS", () => {
440
- const doc = parseHTML('<style>.class { color: red; } #id > div { margin: 0; }</style>');
441
- const style = doc.querySelector('style');
467
+ const doc = parseHTML(
468
+ "<style>.class { color: red; } #id > div { margin: 0; }</style>",
469
+ );
470
+ const style = doc.querySelector("style");
442
471
  expect(style).toBeTruthy();
443
- expect(style!.textContent).toContain('color: red');
472
+ expect(style!.textContent).toContain("color: red");
444
473
  });
445
474
 
446
475
  it("should handle noscript content", () => {
447
- const doc = parseHTML('<noscript><div>Please enable JavaScript</div></noscript>');
448
- const noscript = doc.querySelector('noscript');
476
+ const doc = parseHTML(
477
+ "<noscript><div>Please enable JavaScript</div></noscript>",
478
+ );
479
+ const noscript = doc.querySelector("noscript");
449
480
  expect(noscript).toBeTruthy();
450
481
  });
451
482
 
452
483
  it("should handle data URLs in attributes", () => {
453
484
  const doc = parseHTML('<img src="">');
454
- const img = doc.querySelector('img');
455
- expect(img!.getAttribute('src')).toContain('data:image/png');
485
+ const img = doc.querySelector("img");
486
+ expect(img!.getAttribute("src")).toContain("data:image/png");
487
+ });
488
+ });
489
+
490
+ describe("innerHTML with void elements", () => {
491
+ it("should correctly set innerHTML on void elements like meta tags", () => {
492
+ // Create a document with a custom element
493
+ const html = "<html><body><custom-meta></custom-meta></body></html>";
494
+ const doc = parseHTML(html);
495
+ const customElement = doc.querySelector("custom-meta");
496
+
497
+ // Verify element exists
498
+ expect(customElement).not.toBeNull();
499
+ expect(customElement?.tagName?.toLowerCase()).toBe("custom-meta");
500
+
501
+ // Set innerHTML with meta tags (void elements)
502
+ const metaContent = `<meta name="description" content="Test description">
503
+ <meta name="keywords" content="test, keywords">
504
+ <meta property="og:title" content="Test Title">`;
505
+
506
+ customElement!.innerHTML = metaContent;
507
+
508
+ // The bug: innerHTML should contain the meta tags
509
+ expect(customElement!.innerHTML).toBe(metaContent);
510
+ expect(customElement!.innerHTML.length).toBeGreaterThan(0);
511
+
512
+ // outerHTML should reflect the change
513
+ expect(customElement!.outerHTML).toContain('<meta name="description"');
514
+ expect(customElement!.outerHTML).toContain('<meta name="keywords"');
515
+ expect(customElement!.outerHTML).toContain('<meta property="og:title"');
516
+
517
+ // Should have child nodes
518
+ expect(customElement!.childNodes.length).toBeGreaterThan(0);
519
+ });
520
+
521
+ it("should handle innerHTML assignment in subprocess context", async () => {
522
+ // This test simulates what happens in the component wrapper execution
523
+ const wrapperCode = `
524
+ import { parseHTML } from "${process.cwd()}/src/index.ts";
525
+
526
+ const html = '<html><body><meta-tags></meta-tags></body></html>';
527
+ const doc = parseHTML(html);
528
+ const com = doc.querySelector('meta-tags');
529
+
530
+ if (!com) {
531
+ throw new Error("Element not found");
532
+ }
533
+
534
+ // This is what component code does
535
+ com.innerHTML = \`<meta name="description" content="Test">
536
+ <meta name="keywords" content="test">\`;
537
+
538
+ // Return the result
539
+ console.log(JSON.stringify({
540
+ innerHTML: com.innerHTML,
541
+ outerHTML: com.outerHTML,
542
+ childCount: com.childNodes.length
543
+ }));
544
+ `;
545
+
546
+ // Execute the wrapper code in a subprocess (simulating Bun.spawn)
547
+ const proc = Bun.spawn(["bun", "-e", wrapperCode], {
548
+ stdout: "pipe",
549
+ stderr: "pipe",
550
+ cwd: process.cwd(), // Set working directory to current project directory
551
+ });
552
+
553
+ const exitCode = await proc.exited;
554
+
555
+ if (exitCode !== 0) {
556
+ const stderr = await new Response(proc.stderr).text();
557
+ throw new Error(`Subprocess failed: ${stderr}`);
558
+ }
559
+
560
+ const stdout = await new Response(proc.stdout).text();
561
+ const result = JSON.parse(stdout.trim());
562
+
563
+ // The bug manifests here: innerHTML should not be empty
564
+ expect(result.innerHTML).not.toBe("");
565
+ expect(result.innerHTML).toContain('<meta name="description"');
566
+ expect(result.childCount).toBeGreaterThan(0);
567
+ });
568
+
569
+ it("should work correctly with regular elements for comparison", () => {
570
+ // Test with a regular element to ensure it works as expected
571
+ const html = "<html><body><div></div></body></html>";
572
+ const doc = parseHTML(html);
573
+ const div = doc.querySelector("div");
574
+
575
+ expect(div).not.toBeNull();
576
+
577
+ div!.innerHTML = "<span>Test content</span><p>More content</p>";
578
+
579
+ expect(div!.innerHTML).toBe("<span>Test content</span><p>More content</p>");
580
+ expect(div!.childNodes.length).toBe(2);
581
+ expect(div!.outerHTML).toContain("<span>Test content</span>");
456
582
  });
457
583
  });
@@ -0,0 +1,67 @@
1
+ import { describe, it, expect } from "bun:test";
2
+ import { parseHTML } from "../src/index";
3
+
4
+ describe("HTML entities in text content", () => {
5
+ it("should preserve &lt; and &gt; entities when serializing innerHTML", () => {
6
+ const doc = parseHTML("<p>&lt;div&gt;</p>");
7
+ const p = doc.querySelector("p");
8
+ expect(p.innerHTML).toBe("&lt;div&gt;");
9
+ });
10
+
11
+ it("should preserve &lt; and &gt; in code elements", () => {
12
+ const doc = parseHTML("<code>&lt;script&gt;alert('xss')&lt;/script&gt;</code>");
13
+ const code = doc.querySelector("code");
14
+ expect(code.innerHTML).toBe("&lt;script&gt;alert('xss')&lt;/script&gt;");
15
+ });
16
+
17
+ it("should preserve &amp; entity when serializing innerHTML", () => {
18
+ const doc = parseHTML("<span>foo &amp; bar</span>");
19
+ const span = doc.querySelector("span");
20
+ expect(span.innerHTML).toBe("foo &amp; bar");
21
+ });
22
+
23
+ it("should preserve mixed entities in text", () => {
24
+ const doc = parseHTML("<div>&lt;a href=&quot;test&quot;&gt;link&lt;/a&gt;</div>");
25
+ const div = doc.querySelector("div");
26
+ expect(div.innerHTML).toBe('&lt;a href="test"&gt;link&lt;/a&gt;');
27
+ });
28
+
29
+ it("should handle textContent correctly (decoded)", () => {
30
+ const doc = parseHTML("<p>&lt;div&gt;</p>");
31
+ const p = doc.querySelector("p");
32
+ expect(p.textContent).toBe("<div>");
33
+ });
34
+
35
+ it("should preserve entities in outerHTML", () => {
36
+ const doc = parseHTML("<p>&lt;test&gt;</p>");
37
+ const p = doc.querySelector("p");
38
+ expect(p.outerHTML).toBe("<p>&lt;test&gt;</p>");
39
+ });
40
+
41
+ it("should preserve entities in nested elements", () => {
42
+ const doc = parseHTML("<div><span>&lt;nested&gt;</span></div>");
43
+ const div = doc.querySelector("div");
44
+ expect(div.innerHTML).toBe("<span>&lt;nested&gt;</span>");
45
+ });
46
+
47
+ it("should handle multiple text nodes with entities", () => {
48
+ const doc = parseHTML("<p>&lt;first&gt; and &lt;second&gt;</p>");
49
+ const p = doc.querySelector("p");
50
+ expect(p.innerHTML).toBe("&lt;first&gt; and &lt;second&gt;");
51
+ });
52
+
53
+ it("should not double-escape already escaped content", () => {
54
+ const doc = parseHTML("<p>&amp;lt;</p>");
55
+ const p = doc.querySelector("p");
56
+ expect(p.textContent).toBe("&lt;");
57
+ expect(p.innerHTML).toBe("&amp;lt;");
58
+ });
59
+
60
+ it("should preserve entities after DOM manipulation", () => {
61
+ const doc = parseHTML("<div></div>");
62
+ const div = doc.querySelector("div");
63
+ const text = doc.createTextNode("<script>alert('xss')</script>");
64
+ div.appendChild(text);
65
+ expect(div.innerHTML).toBe("&lt;script&gt;alert('xss')&lt;/script&gt;");
66
+ });
67
+ });
@@ -1,4 +1,4 @@
1
- import { describe, it } from "bun:test";
1
+ import { describe, expect, it } from "bun:test";
2
2
  import { readFileSync } from "fs";
3
3
  import { parse } from "../src/index.ts";
4
4