@tkeron/html-parser 1.1.0 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tkeron/html-parser",
3
- "version": "1.1.0",
3
+ "version": "1.1.2",
4
4
  "description": "A fast and lightweight HTML parser for Bun",
5
5
  "main": "index.js",
6
6
  "module": "index.ts",
@@ -856,7 +856,7 @@ function updateElementContent(element: any): void {
856
856
  const innerHTML = element.childNodes
857
857
  .map((child: any) => {
858
858
  if (child.nodeType === NodeType.TEXT_NODE) {
859
- return child.textContent;
859
+ return escapeTextContent(child.textContent || "");
860
860
  } else if (child.nodeType === NodeType.ELEMENT_NODE) {
861
861
  return child.outerHTML;
862
862
  } else if (child.nodeType === NodeType.COMMENT_NODE) {
@@ -943,11 +943,12 @@ export function setInnerHTML(element: any, html: string): void {
943
943
  element.lastElementChild = null;
944
944
 
945
945
  if (html.trim()) {
946
- const tokens = tokenize(html);
946
+ const wrappedHtml = '<div>' + html + '</div>';
947
+ const tokens = tokenize(wrappedHtml);
947
948
  const doc = parse(tokens);
948
- const body = doc.body;
949
- if (body && body.childNodes) {
950
- const nodesToMove = [...body.childNodes];
949
+ const div = doc.querySelector('div');
950
+ if (div && div.childNodes) {
951
+ const nodesToMove = [...div.childNodes];
951
952
  for (const child of nodesToMove) {
952
953
  child.parentNode = null;
953
954
  appendChild(element, child);
@@ -1152,7 +1153,7 @@ export function getInnerHTML(element: any): string {
1152
1153
  if (child.nodeType === NodeType.ELEMENT_NODE) {
1153
1154
  innerHTML += child.outerHTML;
1154
1155
  } else if (child.nodeType === NodeType.TEXT_NODE) {
1155
- innerHTML += child.textContent || "";
1156
+ innerHTML += escapeTextContent(child.textContent || "");
1156
1157
  } else if (child.nodeType === NodeType.COMMENT_NODE) {
1157
1158
  innerHTML += `<!--${child.data || ""}-->`;
1158
1159
  }
@@ -0,0 +1,583 @@
1
+ import { describe, it, expect } from "bun:test";
2
+ import { parseHTML } from "../src/index";
3
+
4
+ describe("Edge Cases: Scripts with HTML inside", () => {
5
+ it("should handle script with less-than and greater-than operators", () => {
6
+ const doc = parseHTML(
7
+ '<script>if (a < b && c > d) { console.log("ok"); }</script>',
8
+ );
9
+ const script = doc.querySelector("script");
10
+ expect(script).toBeTruthy();
11
+ expect(script!.textContent).toContain("a < b");
12
+ expect(script!.textContent).toContain("c > d");
13
+ });
14
+
15
+ it("should handle script containing </script> as a string", () => {
16
+ const doc = parseHTML('<script>var html = "<\\/script>";</script>');
17
+ const script = doc.querySelector("script");
18
+ expect(script).toBeTruthy();
19
+ });
20
+
21
+ it("should handle script with HTML-like content in strings", () => {
22
+ const doc = parseHTML(
23
+ '<script>var html = "<div class=\\"test\\">Hello</div>";</script>',
24
+ );
25
+ const script = doc.querySelector("script");
26
+ expect(script).toBeTruthy();
27
+ expect(script!.textContent).toContain("<div");
28
+ });
29
+
30
+ it("should handle script with template literals containing HTML", () => {
31
+ const doc = parseHTML("<script>const tpl = `<div>${name}</div>`;</script>");
32
+ const script = doc.querySelector("script");
33
+ expect(script).toBeTruthy();
34
+ expect(script!.textContent).toContain("<div>");
35
+ });
36
+
37
+ it("should handle multiple scripts with complex content", () => {
38
+ const html = `
39
+ <script>var a = 1 < 2;</script>
40
+ <script>var b = 3 > 1;</script>
41
+ `;
42
+ const doc = parseHTML(html);
43
+ const scripts = doc.querySelectorAll("script");
44
+ expect(scripts.length).toBe(2);
45
+ });
46
+ });
47
+
48
+ describe("Edge Cases: Template placeholders", () => {
49
+ it("should preserve template placeholders", () => {
50
+ const doc = parseHTML("<div>{{user.name}}</div>");
51
+ const div = doc.querySelector("div");
52
+ expect(div!.textContent).toBe("{{user.name}}");
53
+ });
54
+
55
+ it("should preserve placeholders in attributes", () => {
56
+ const doc = parseHTML('<img src="{{imageUrl}}">');
57
+ const img = doc.querySelector("img");
58
+ expect(img!.getAttribute("src")).toBe("{{imageUrl}}");
59
+ });
60
+
61
+ it("should preserve custom attribute prefixes", () => {
62
+ const doc = parseHTML(
63
+ '<div data-bind="value" x-on:click="handler" custom-attr="test"></div>',
64
+ );
65
+ const div = doc.querySelector("div");
66
+ expect(div!.getAttribute("data-bind")).toBe("value");
67
+ expect(div!.getAttribute("x-on:click")).toBe("handler");
68
+ expect(div!.getAttribute("custom-attr")).toBe("test");
69
+ });
70
+
71
+ it("should preserve EJS/ERB style placeholders", () => {
72
+ const doc = parseHTML("<div><%= user.name %></div>");
73
+ const div = doc.querySelector("div");
74
+ expect(div!.textContent).toBe("<%= user.name %>");
75
+ });
76
+
77
+ it("should handle nested template expressions", () => {
78
+ const doc = parseHTML("<div>{{#each items}}{{this}}{{/each}}</div>");
79
+ const div = doc.querySelector("div");
80
+ expect(div!.textContent).toContain("{{#each items}}");
81
+ expect(div!.textContent).toContain("{{/each}}");
82
+ });
83
+ });
84
+
85
+ describe("Edge Cases: Malformed but common HTML", () => {
86
+ it("should handle unclosed paragraph tags", () => {
87
+ const doc = parseHTML("<p>Párrafo 1<p>Párrafo 2");
88
+ const paragraphs = doc.querySelectorAll("p");
89
+ expect(paragraphs.length).toBe(2);
90
+ });
91
+
92
+ it("should handle list items without parent list", () => {
93
+ const doc = parseHTML("<li>Item 1</li><li>Item 2</li>");
94
+ const items = doc.querySelectorAll("li");
95
+ expect(items.length).toBe(2);
96
+ });
97
+
98
+ it("should handle nested unclosed tags", () => {
99
+ const doc = parseHTML("<div><span>Text<div>Nested</div></span></div>");
100
+ const div = doc.querySelector("div");
101
+ expect(div).toBeTruthy();
102
+ });
103
+
104
+ it("should handle missing closing tags at end", () => {
105
+ const doc = parseHTML("<div><span>Text");
106
+ const div = doc.querySelector("div");
107
+ const span = doc.querySelector("span");
108
+ expect(div).toBeTruthy();
109
+ expect(span).toBeTruthy();
110
+ });
111
+
112
+ it("should handle extra closing tags", () => {
113
+ const doc = parseHTML("<div>Text</div></div></span>");
114
+ const div = doc.querySelector("div");
115
+ expect(div).toBeTruthy();
116
+ expect(div!.textContent).toBe("Text");
117
+ });
118
+
119
+ it("should handle incorrectly nested tags", () => {
120
+ const doc = parseHTML("<b><i>Bold and italic</b></i>");
121
+ const b = doc.querySelector("b");
122
+ const i = doc.querySelector("i");
123
+ expect(b).toBeTruthy();
124
+ expect(i).toBeTruthy();
125
+ });
126
+ });
127
+
128
+ describe("Edge Cases: Significant whitespace", () => {
129
+ it("should preserve whitespace in pre tags", () => {
130
+ const doc = parseHTML("<pre> multiple spaces here </pre>");
131
+ const pre = doc.querySelector("pre");
132
+ expect(pre!.textContent).toBe(" multiple spaces here ");
133
+ });
134
+
135
+ it("should preserve newlines in pre tags", () => {
136
+ const doc = parseHTML("<pre>line1\nline2\nline3</pre>");
137
+ const pre = doc.querySelector("pre");
138
+ expect(pre!.textContent).toContain("\n");
139
+ });
140
+
141
+ it("should preserve whitespace in code tags", () => {
142
+ const doc = parseHTML("<code>function() { }</code>");
143
+ const code = doc.querySelector("code");
144
+ expect(code!.textContent).toBe("function() { }");
145
+ });
146
+
147
+ it("should preserve whitespace in textarea", () => {
148
+ const doc = parseHTML(
149
+ "<textarea> indented\n more indented </textarea>",
150
+ );
151
+ const textarea = doc.querySelector("textarea");
152
+ expect(textarea!.textContent).toContain(" indented");
153
+ });
154
+
155
+ it("should handle tabs in pre", () => {
156
+ const doc = parseHTML("<pre>\ttab\t\ttabs</pre>");
157
+ const pre = doc.querySelector("pre");
158
+ expect(pre!.textContent).toBe("\ttab\t\ttabs");
159
+ });
160
+ });
161
+
162
+ describe("Edge Cases: Special characters in attributes", () => {
163
+ it("should handle JSON in data attributes", () => {
164
+ const doc = parseHTML(
165
+ '<div data-json=\'{"key": "value", "num": 123}\'></div>',
166
+ );
167
+ const div = doc.querySelector("div");
168
+ const json = div!.getAttribute("data-json");
169
+ expect(json).toBe('{"key": "value", "num": 123}');
170
+ });
171
+
172
+ it("should handle double quotes inside single-quoted attributes", () => {
173
+ const doc = parseHTML("<div title='He said \"Hello\"'></div>");
174
+ const div = doc.querySelector("div");
175
+ expect(div!.getAttribute("title")).toBe('He said "Hello"');
176
+ });
177
+
178
+ it("should handle single quotes inside double-quoted attributes", () => {
179
+ const doc = parseHTML('<div title="It\'s working"></div>');
180
+ const div = doc.querySelector("div");
181
+ expect(div!.getAttribute("title")).toBe("It's working");
182
+ });
183
+
184
+ it("should handle HTML entities in attributes", () => {
185
+ const doc = parseHTML('<div title="&lt;html&gt;"></div>');
186
+ const div = doc.querySelector("div");
187
+ // Depending on parser behavior, entities may or may not be decoded
188
+ const title = div!.getAttribute("title");
189
+ expect(title === "&lt;html&gt;" || title === "<html>").toBe(true);
190
+ });
191
+
192
+ it("should handle unicode characters in attributes", () => {
193
+ const doc = parseHTML('<div title="Hello 世界 🌍"></div>');
194
+ const div = doc.querySelector("div");
195
+ expect(div!.getAttribute("title")).toBe("Hello 世界 🌍");
196
+ });
197
+
198
+ it("should handle newlines in attributes", () => {
199
+ const doc = parseHTML('<div title="line1\nline2"></div>');
200
+ const div = doc.querySelector("div");
201
+ expect(div!.getAttribute("title")).toContain("line1");
202
+ });
203
+
204
+ it("should handle equals signs in attribute values", () => {
205
+ const doc = parseHTML('<div data-equation="a=b+c"></div>');
206
+ const div = doc.querySelector("div");
207
+ expect(div!.getAttribute("data-equation")).toBe("a=b+c");
208
+ });
209
+ });
210
+
211
+ describe("Edge Cases: SVG and MathML inline", () => {
212
+ it("should parse inline SVG", () => {
213
+ const doc = parseHTML(
214
+ '<svg width="100" height="100"><circle cx="50" cy="50" r="40"/></svg>',
215
+ );
216
+ const svg = doc.querySelector("svg");
217
+ expect(svg).toBeTruthy();
218
+ expect(svg!.getAttribute("width")).toBe("100");
219
+ });
220
+
221
+ it("should parse SVG with nested elements", () => {
222
+ const doc = parseHTML(
223
+ '<svg><g><rect width="10" height="10"/><text>Hello</text></g></svg>',
224
+ );
225
+ const svg = doc.querySelector("svg");
226
+ const g = doc.querySelector("g");
227
+ const rect = doc.querySelector("rect");
228
+ const text = doc.querySelector("text");
229
+ expect(svg).toBeTruthy();
230
+ expect(g).toBeTruthy();
231
+ expect(rect).toBeTruthy();
232
+ expect(text).toBeTruthy();
233
+ });
234
+
235
+ it("should handle foreignObject with HTML inside SVG", () => {
236
+ const doc = parseHTML(
237
+ "<svg><foreignObject><div>HTML inside SVG</div></foreignObject></svg>",
238
+ );
239
+ const svg = doc.querySelector("svg");
240
+ const fo = doc.querySelector("foreignObject");
241
+ const div = doc.querySelector("div");
242
+ expect(svg).toBeTruthy();
243
+ expect(fo).toBeTruthy();
244
+ expect(div).toBeTruthy();
245
+ });
246
+
247
+ it("should parse inline MathML", () => {
248
+ const doc = parseHTML("<math><mi>x</mi><mo>=</mo><mn>2</mn></math>");
249
+ const math = doc.querySelector("math");
250
+ expect(math).toBeTruthy();
251
+ });
252
+
253
+ it("should handle SVG with CDATA-like content in style", () => {
254
+ const doc = parseHTML("<svg><style>/* styles */</style></svg>");
255
+ const svg = doc.querySelector("svg");
256
+ const style = doc.querySelector("style");
257
+ expect(svg).toBeTruthy();
258
+ expect(style).toBeTruthy();
259
+ });
260
+ });
261
+
262
+ describe("Edge Cases: CDATA sections", () => {
263
+ it("should handle CDATA in script", () => {
264
+ const doc = parseHTML("<script>//<![CDATA[\nvar x = 1;\n//]]></script>");
265
+ const script = doc.querySelector("script");
266
+ expect(script).toBeTruthy();
267
+ });
268
+
269
+ it("should handle CDATA in style", () => {
270
+ const doc = parseHTML(
271
+ "<style>/*<![CDATA[*/ body { color: red; } /*]]>*/</style>",
272
+ );
273
+ const style = doc.querySelector("style");
274
+ expect(style).toBeTruthy();
275
+ });
276
+
277
+ it("should handle XML CDATA sections", () => {
278
+ const doc = parseHTML("<div><![CDATA[Some <special> content]]></div>");
279
+ const div = doc.querySelector("div");
280
+ expect(div).toBeTruthy();
281
+ });
282
+ });
283
+
284
+ describe("Edge Cases: IE Conditional Comments", () => {
285
+ it("should handle basic IE conditional comments", () => {
286
+ const doc = parseHTML('<!--[if IE]><link href="ie.css"><![endif]-->');
287
+ // Should parse without errors, comment handling varies by parser
288
+ expect(doc).toBeTruthy();
289
+ });
290
+
291
+ it("should handle IE conditional with version", () => {
292
+ const doc = parseHTML(
293
+ '<!--[if lt IE 9]><script src="html5shiv.js"></script><![endif]-->',
294
+ );
295
+ expect(doc).toBeTruthy();
296
+ });
297
+
298
+ it("should handle downlevel-hidden conditional", () => {
299
+ const doc = parseHTML(
300
+ '<!--[if !IE]>--><link href="modern.css"><!--<![endif]-->',
301
+ );
302
+ expect(doc).toBeTruthy();
303
+ });
304
+
305
+ it("should preserve content around conditional comments", () => {
306
+ const doc = parseHTML(
307
+ "<div>Before</div><!--[if IE]>IE only<![endif]--><div>After</div>",
308
+ );
309
+ const divs = doc.querySelectorAll("div");
310
+ expect(divs.length).toBe(2);
311
+ expect(divs[0].textContent).toBe("Before");
312
+ expect(divs[1].textContent).toBe("After");
313
+ });
314
+ });
315
+
316
+ describe("Edge Cases: innerHTML on special elements", () => {
317
+ it("should handle innerHTML on table with tr/td", () => {
318
+ const doc = parseHTML("<table></table>");
319
+ const table = doc.querySelector("table");
320
+ table!.innerHTML = "<tr><td>Cell 1</td><td>Cell 2</td></tr>";
321
+ // Browser auto-wraps in tbody, parser behavior may vary
322
+ const tr = table!.querySelector("tr");
323
+ const tds = table!.querySelectorAll("td");
324
+ expect(tr).toBeTruthy();
325
+ expect(tds.length).toBe(2);
326
+ });
327
+
328
+ it("should handle innerHTML on select with options", () => {
329
+ const doc = parseHTML("<select></select>");
330
+ const select = doc.querySelector("select");
331
+ select!.innerHTML =
332
+ '<option value="1">One</option><option value="2">Two</option>';
333
+ const options = select!.querySelectorAll("option");
334
+ expect(options.length).toBe(2);
335
+ });
336
+
337
+ it("should handle innerHTML on ul with li", () => {
338
+ const doc = parseHTML("<ul></ul>");
339
+ const ul = doc.querySelector("ul");
340
+ ul!.innerHTML = "<li>Item 1</li><li>Item 2</li><li>Item 3</li>";
341
+ const items = ul!.querySelectorAll("li");
342
+ expect(items.length).toBe(3);
343
+ });
344
+
345
+ it("should handle innerHTML on template element", () => {
346
+ const doc = parseHTML("<template></template>");
347
+ const template = doc.querySelector("template");
348
+ template!.innerHTML = "<div>Template content</div>";
349
+ // Template behavior is special in browsers
350
+ expect(template).toBeTruthy();
351
+ });
352
+
353
+ it("should handle innerHTML on colgroup", () => {
354
+ const doc = parseHTML("<table><colgroup></colgroup></table>");
355
+ const colgroup = doc.querySelector("colgroup");
356
+ colgroup!.innerHTML = '<col span="2"><col>';
357
+ const cols = colgroup!.querySelectorAll("col");
358
+ expect(cols.length).toBe(2);
359
+ });
360
+
361
+ it("should handle innerHTML replacement multiple times on table", () => {
362
+ const doc = parseHTML("<table></table>");
363
+ const table = doc.querySelector("table");
364
+
365
+ table!.innerHTML = "<tr><td>First</td></tr>";
366
+ expect(table!.querySelector("td")!.textContent).toBe("First");
367
+
368
+ table!.innerHTML = "<tr><td>Second</td></tr>";
369
+ expect(table!.querySelector("td")!.textContent).toBe("Second");
370
+ });
371
+ });
372
+
373
+ describe("Edge Cases: Self-closing tags in HTML5", () => {
374
+ it("should handle self-closing div (invalid in HTML5)", () => {
375
+ const doc = parseHTML("<div/>");
376
+ const div = doc.querySelector("div");
377
+ expect(div).toBeTruthy();
378
+ // In HTML5, <div/> is treated as <div> (not closed)
379
+ });
380
+
381
+ it("should handle self-closing span", () => {
382
+ const doc = parseHTML("<span/>text");
383
+ const span = doc.querySelector("span");
384
+ expect(span).toBeTruthy();
385
+ });
386
+
387
+ it("should handle valid self-closing void elements", () => {
388
+ const doc = parseHTML("<br/><hr/><img/>");
389
+ const br = doc.querySelector("br");
390
+ const hr = doc.querySelector("hr");
391
+ const img = doc.querySelector("img");
392
+ expect(br).toBeTruthy();
393
+ expect(hr).toBeTruthy();
394
+ expect(img).toBeTruthy();
395
+ });
396
+
397
+ it("should handle self-closing with space before slash", () => {
398
+ const doc = parseHTML("<br />");
399
+ const br = doc.querySelector("br");
400
+ expect(br).toBeTruthy();
401
+ });
402
+
403
+ it("should handle self-closing in XHTML style", () => {
404
+ const doc = parseHTML('<input type="text" />');
405
+ const input = doc.querySelector("input");
406
+ expect(input).toBeTruthy();
407
+ expect(input!.getAttribute("type")).toBe("text");
408
+ });
409
+
410
+ it("should handle mixed self-closing styles", () => {
411
+ const doc = parseHTML("<div><br><br/><br /></div>");
412
+ const brs = doc.querySelectorAll("br");
413
+ expect(brs.length).toBe(3);
414
+ });
415
+ });
416
+
417
+ describe("Edge Cases: Additional common scenarios", () => {
418
+ it("should handle empty attributes", () => {
419
+ const doc = parseHTML("<input disabled readonly>");
420
+ const input = doc.querySelector("input");
421
+ expect(input!.hasAttribute("disabled")).toBe(true);
422
+ expect(input!.hasAttribute("readonly")).toBe(true);
423
+ });
424
+
425
+ it("should handle unquoted attribute values", () => {
426
+ const doc = parseHTML("<div class=myclass id=myid></div>");
427
+ const div = doc.querySelector("div");
428
+ expect(div!.getAttribute("class")).toBe("myclass");
429
+ expect(div!.getAttribute("id")).toBe("myid");
430
+ });
431
+
432
+ it("should handle attributes with no value", () => {
433
+ const doc = parseHTML("<option selected>Choice</option>");
434
+ const option = doc.querySelector("option");
435
+ expect(option!.hasAttribute("selected")).toBe(true);
436
+ });
437
+
438
+ it("should handle doctype", () => {
439
+ const doc = parseHTML("<!DOCTYPE html><html><body>Test</body></html>");
440
+ expect(doc.body).toBeTruthy();
441
+ expect(doc.body.textContent).toBe("Test");
442
+ });
443
+
444
+ it("should handle comments between tags", () => {
445
+ const doc = parseHTML("<div><!-- comment --><span>Text</span></div>");
446
+ const span = doc.querySelector("span");
447
+ expect(span).toBeTruthy();
448
+ expect(span!.textContent).toBe("Text");
449
+ });
450
+
451
+ it("should handle deeply nested structure", () => {
452
+ const doc = parseHTML(
453
+ "<div><div><div><div><div><span>Deep</span></div></div></div></div></div>",
454
+ );
455
+ const span = doc.querySelector("span");
456
+ expect(span).toBeTruthy();
457
+ expect(span!.textContent).toBe("Deep");
458
+ });
459
+
460
+ it("should handle adjacent text nodes conceptually", () => {
461
+ const doc = parseHTML("<div>Hello World</div>");
462
+ const div = doc.querySelector("div");
463
+ expect(div!.textContent).toBe("Hello World");
464
+ });
465
+
466
+ it("should handle style tags with CSS", () => {
467
+ const doc = parseHTML(
468
+ "<style>.class { color: red; } #id > div { margin: 0; }</style>",
469
+ );
470
+ const style = doc.querySelector("style");
471
+ expect(style).toBeTruthy();
472
+ expect(style!.textContent).toContain("color: red");
473
+ });
474
+
475
+ it("should handle noscript content", () => {
476
+ const doc = parseHTML(
477
+ "<noscript><div>Please enable JavaScript</div></noscript>",
478
+ );
479
+ const noscript = doc.querySelector("noscript");
480
+ expect(noscript).toBeTruthy();
481
+ });
482
+
483
+ it("should handle data URLs in attributes", () => {
484
+ const doc = parseHTML('<img src="data:image/png;base64,iVBORw0KGgo=">');
485
+ const img = doc.querySelector("img");
486
+ expect(img!.getAttribute("src")).toContain("data:image/png");
487
+ });
488
+ });
489
+
490
+ describe("innerHTML with void elements", () => {
491
+ it("should correctly set innerHTML on void elements like meta tags", () => {
492
+ // Create a document with a custom element
493
+ const html = "<html><body><custom-meta></custom-meta></body></html>";
494
+ const doc = parseHTML(html);
495
+ const customElement = doc.querySelector("custom-meta");
496
+
497
+ // Verify element exists
498
+ expect(customElement).not.toBeNull();
499
+ expect(customElement?.tagName?.toLowerCase()).toBe("custom-meta");
500
+
501
+ // Set innerHTML with meta tags (void elements)
502
+ const metaContent = `<meta name="description" content="Test description">
503
+ <meta name="keywords" content="test, keywords">
504
+ <meta property="og:title" content="Test Title">`;
505
+
506
+ customElement!.innerHTML = metaContent;
507
+
508
+ // The bug: innerHTML should contain the meta tags
509
+ expect(customElement!.innerHTML).toBe(metaContent);
510
+ expect(customElement!.innerHTML.length).toBeGreaterThan(0);
511
+
512
+ // outerHTML should reflect the change
513
+ expect(customElement!.outerHTML).toContain('<meta name="description"');
514
+ expect(customElement!.outerHTML).toContain('<meta name="keywords"');
515
+ expect(customElement!.outerHTML).toContain('<meta property="og:title"');
516
+
517
+ // Should have child nodes
518
+ expect(customElement!.childNodes.length).toBeGreaterThan(0);
519
+ });
520
+
521
+ it("should handle innerHTML assignment in subprocess context", async () => {
522
+ // This test simulates what happens in the component wrapper execution
523
+ const wrapperCode = `
524
+ import { parseHTML } from "${process.cwd()}/src/index.ts";
525
+
526
+ const html = '<html><body><meta-tags></meta-tags></body></html>';
527
+ const doc = parseHTML(html);
528
+ const com = doc.querySelector('meta-tags');
529
+
530
+ if (!com) {
531
+ throw new Error("Element not found");
532
+ }
533
+
534
+ // This is what component code does
535
+ com.innerHTML = \`<meta name="description" content="Test">
536
+ <meta name="keywords" content="test">\`;
537
+
538
+ // Return the result
539
+ console.log(JSON.stringify({
540
+ innerHTML: com.innerHTML,
541
+ outerHTML: com.outerHTML,
542
+ childCount: com.childNodes.length
543
+ }));
544
+ `;
545
+
546
+ // Execute the wrapper code in a subprocess (simulating Bun.spawn)
547
+ const proc = Bun.spawn(["bun", "-e", wrapperCode], {
548
+ stdout: "pipe",
549
+ stderr: "pipe",
550
+ cwd: process.cwd(), // Set working directory to current project directory
551
+ });
552
+
553
+ const exitCode = await proc.exited;
554
+
555
+ if (exitCode !== 0) {
556
+ const stderr = await new Response(proc.stderr).text();
557
+ throw new Error(`Subprocess failed: ${stderr}`);
558
+ }
559
+
560
+ const stdout = await new Response(proc.stdout).text();
561
+ const result = JSON.parse(stdout.trim());
562
+
563
+ // The bug manifests here: innerHTML should not be empty
564
+ expect(result.innerHTML).not.toBe("");
565
+ expect(result.innerHTML).toContain('<meta name="description"');
566
+ expect(result.childCount).toBeGreaterThan(0);
567
+ });
568
+
569
+ it("should work correctly with regular elements for comparison", () => {
570
+ // Test with a regular element to ensure it works as expected
571
+ const html = "<html><body><div></div></body></html>";
572
+ const doc = parseHTML(html);
573
+ const div = doc.querySelector("div");
574
+
575
+ expect(div).not.toBeNull();
576
+
577
+ div!.innerHTML = "<span>Test content</span><p>More content</p>";
578
+
579
+ expect(div!.innerHTML).toBe("<span>Test content</span><p>More content</p>");
580
+ expect(div!.childNodes.length).toBe(2);
581
+ expect(div!.outerHTML).toContain("<span>Test content</span>");
582
+ });
583
+ });
@@ -0,0 +1,67 @@
1
+ import { describe, it, expect } from "bun:test";
2
+ import { parseHTML } from "../src/index";
3
+
4
+ describe("HTML entities in text content", () => {
5
+ it("should preserve &lt; and &gt; entities when serializing innerHTML", () => {
6
+ const doc = parseHTML("<p>&lt;div&gt;</p>");
7
+ const p = doc.querySelector("p");
8
+ expect(p.innerHTML).toBe("&lt;div&gt;");
9
+ });
10
+
11
+ it("should preserve &lt; and &gt; in code elements", () => {
12
+ const doc = parseHTML("<code>&lt;script&gt;alert('xss')&lt;/script&gt;</code>");
13
+ const code = doc.querySelector("code");
14
+ expect(code.innerHTML).toBe("&lt;script&gt;alert('xss')&lt;/script&gt;");
15
+ });
16
+
17
+ it("should preserve &amp; entity when serializing innerHTML", () => {
18
+ const doc = parseHTML("<span>foo &amp; bar</span>");
19
+ const span = doc.querySelector("span");
20
+ expect(span.innerHTML).toBe("foo &amp; bar");
21
+ });
22
+
23
+ it("should preserve mixed entities in text", () => {
24
+ const doc = parseHTML("<div>&lt;a href=&quot;test&quot;&gt;link&lt;/a&gt;</div>");
25
+ const div = doc.querySelector("div");
26
+ expect(div.innerHTML).toBe('&lt;a href="test"&gt;link&lt;/a&gt;');
27
+ });
28
+
29
+ it("should handle textContent correctly (decoded)", () => {
30
+ const doc = parseHTML("<p>&lt;div&gt;</p>");
31
+ const p = doc.querySelector("p");
32
+ expect(p.textContent).toBe("<div>");
33
+ });
34
+
35
+ it("should preserve entities in outerHTML", () => {
36
+ const doc = parseHTML("<p>&lt;test&gt;</p>");
37
+ const p = doc.querySelector("p");
38
+ expect(p.outerHTML).toBe("<p>&lt;test&gt;</p>");
39
+ });
40
+
41
+ it("should preserve entities in nested elements", () => {
42
+ const doc = parseHTML("<div><span>&lt;nested&gt;</span></div>");
43
+ const div = doc.querySelector("div");
44
+ expect(div.innerHTML).toBe("<span>&lt;nested&gt;</span>");
45
+ });
46
+
47
+ it("should handle multiple text nodes with entities", () => {
48
+ const doc = parseHTML("<p>&lt;first&gt; and &lt;second&gt;</p>");
49
+ const p = doc.querySelector("p");
50
+ expect(p.innerHTML).toBe("&lt;first&gt; and &lt;second&gt;");
51
+ });
52
+
53
+ it("should not double-escape already escaped content", () => {
54
+ const doc = parseHTML("<p>&amp;lt;</p>");
55
+ const p = doc.querySelector("p");
56
+ expect(p.textContent).toBe("&lt;");
57
+ expect(p.innerHTML).toBe("&amp;lt;");
58
+ });
59
+
60
+ it("should preserve entities after DOM manipulation", () => {
61
+ const doc = parseHTML("<div></div>");
62
+ const div = doc.querySelector("div");
63
+ const text = doc.createTextNode("<script>alert('xss')</script>");
64
+ div.appendChild(text);
65
+ expect(div.innerHTML).toBe("&lt;script&gt;alert('xss')&lt;/script&gt;");
66
+ });
67
+ });
@@ -0,0 +1,84 @@
1
+ import { describe, it, expect } from "bun:test";
2
+ import { parseHTML } from "../src/index";
3
+
4
+ describe("innerHTML with void elements", () => {
5
+ it('innerHTML should work with void elements', () => {
6
+ const doc = parseHTML('<custom></custom>');
7
+ const element = doc.querySelector('custom');
8
+
9
+ element!.innerHTML = '<meta name="test">';
10
+ expect(element!.innerHTML).toBe('<meta name="test">');
11
+ expect(element!.childNodes.length).toBe(1);
12
+ });
13
+
14
+ it('innerHTML should work with multiple void elements', () => {
15
+ const doc = parseHTML('<custom></custom>');
16
+ const element = doc.querySelector('custom');
17
+
18
+ element!.innerHTML = '<meta name="a"><link rel="b"><input type="c">';
19
+ expect(element!.childNodes.length).toBe(3);
20
+ });
21
+
22
+ it('innerHTML should work with mixed void and non-void elements', () => {
23
+ const doc = parseHTML('<custom></custom>');
24
+ const element = doc.querySelector('custom');
25
+
26
+ element!.innerHTML = '<meta name="test"><div>Hello</div><br><span>World</span>';
27
+ expect(element!.childNodes.length).toBe(4);
28
+ expect(element!.children[0].tagName).toBe('META');
29
+ expect(element!.children[1].tagName).toBe('DIV');
30
+ expect(element!.children[2].tagName).toBe('BR');
31
+ expect(element!.children[3].tagName).toBe('SPAN');
32
+ });
33
+
34
+ it('innerHTML should work with void elements nested inside containers', () => {
35
+ const doc = parseHTML('<custom></custom>');
36
+ const element = doc.querySelector('custom');
37
+
38
+ element!.innerHTML = '<div><img src="test.jpg"><input type="text"></div>';
39
+ expect(element!.childNodes.length).toBe(1);
40
+ const div = element!.children[0];
41
+ expect(div.childNodes.length).toBe(2);
42
+ expect(div.children[0].tagName).toBe('IMG');
43
+ expect(div.children[1].tagName).toBe('INPUT');
44
+ });
45
+
46
+ it('innerHTML can be replaced multiple times with void elements', () => {
47
+ const doc = parseHTML('<custom></custom>');
48
+ const element = doc.querySelector('custom');
49
+
50
+ element!.innerHTML = '<meta name="first">';
51
+ expect(element!.childNodes.length).toBe(1);
52
+
53
+ element!.innerHTML = '<link rel="second"><hr>';
54
+ expect(element!.childNodes.length).toBe(2);
55
+
56
+ element!.innerHTML = '';
57
+ expect(element!.childNodes.length).toBe(0);
58
+ });
59
+
60
+ it('innerHTML should work with all void element types', () => {
61
+ const doc = parseHTML('<custom></custom>');
62
+ const element = doc.querySelector('custom');
63
+
64
+ // Test all void elements
65
+ const voidElements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'source', 'track', 'wbr'];
66
+
67
+ for (const tag of voidElements) {
68
+ element!.innerHTML = `<${tag}>`;
69
+ expect(element!.childNodes.length).toBe(1);
70
+ expect(element!.children[0].tagName).toBe(tag.toUpperCase());
71
+ }
72
+ });
73
+
74
+ it('innerHTML with void elements preserves attributes', () => {
75
+ const doc = parseHTML('<custom></custom>');
76
+ const element = doc.querySelector('custom');
77
+
78
+ element!.innerHTML = '<meta charset="utf-8" name="viewport" content="width=device-width">';
79
+ const meta = element!.children[0];
80
+ expect(meta.getAttribute('charset')).toBe('utf-8');
81
+ expect(meta.getAttribute('name')).toBe('viewport');
82
+ expect(meta.getAttribute('content')).toBe('width=device-width');
83
+ });
84
+ });
@@ -1,4 +1,4 @@
1
- import { describe, it } from "bun:test";
1
+ import { describe, expect, it } from "bun:test";
2
2
  import { readFileSync } from "fs";
3
3
  import { parse } from "../src/index.ts";
4
4