@tkeron/html-parser 1.1.1 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
package/src/dom-simulator.ts
CHANGED
|
@@ -856,7 +856,7 @@ function updateElementContent(element: any): void {
|
|
|
856
856
|
const innerHTML = element.childNodes
|
|
857
857
|
.map((child: any) => {
|
|
858
858
|
if (child.nodeType === NodeType.TEXT_NODE) {
|
|
859
|
-
return child.textContent;
|
|
859
|
+
return escapeTextContent(child.textContent || "");
|
|
860
860
|
} else if (child.nodeType === NodeType.ELEMENT_NODE) {
|
|
861
861
|
return child.outerHTML;
|
|
862
862
|
} else if (child.nodeType === NodeType.COMMENT_NODE) {
|
|
@@ -1153,7 +1153,7 @@ export function getInnerHTML(element: any): string {
|
|
|
1153
1153
|
if (child.nodeType === NodeType.ELEMENT_NODE) {
|
|
1154
1154
|
innerHTML += child.outerHTML;
|
|
1155
1155
|
} else if (child.nodeType === NodeType.TEXT_NODE) {
|
|
1156
|
-
innerHTML += child.textContent || "";
|
|
1156
|
+
innerHTML += escapeTextContent(child.textContent || "");
|
|
1157
1157
|
} else if (child.nodeType === NodeType.COMMENT_NODE) {
|
|
1158
1158
|
innerHTML += `<!--${child.data || ""}-->`;
|
|
1159
1159
|
}
|
package/tests/edge-cases.test.ts
CHANGED
|
@@ -3,31 +3,35 @@ import { parseHTML } from "../src/index";
|
|
|
3
3
|
|
|
4
4
|
describe("Edge Cases: Scripts with HTML inside", () => {
|
|
5
5
|
it("should handle script with less-than and greater-than operators", () => {
|
|
6
|
-
const doc = parseHTML(
|
|
7
|
-
|
|
6
|
+
const doc = parseHTML(
|
|
7
|
+
'<script>if (a < b && c > d) { console.log("ok"); }</script>',
|
|
8
|
+
);
|
|
9
|
+
const script = doc.querySelector("script");
|
|
8
10
|
expect(script).toBeTruthy();
|
|
9
|
-
expect(script!.textContent).toContain(
|
|
10
|
-
expect(script!.textContent).toContain(
|
|
11
|
+
expect(script!.textContent).toContain("a < b");
|
|
12
|
+
expect(script!.textContent).toContain("c > d");
|
|
11
13
|
});
|
|
12
14
|
|
|
13
15
|
it("should handle script containing </script> as a string", () => {
|
|
14
16
|
const doc = parseHTML('<script>var html = "<\\/script>";</script>');
|
|
15
|
-
const script = doc.querySelector(
|
|
17
|
+
const script = doc.querySelector("script");
|
|
16
18
|
expect(script).toBeTruthy();
|
|
17
19
|
});
|
|
18
20
|
|
|
19
21
|
it("should handle script with HTML-like content in strings", () => {
|
|
20
|
-
const doc = parseHTML(
|
|
21
|
-
|
|
22
|
+
const doc = parseHTML(
|
|
23
|
+
'<script>var html = "<div class=\\"test\\">Hello</div>";</script>',
|
|
24
|
+
);
|
|
25
|
+
const script = doc.querySelector("script");
|
|
22
26
|
expect(script).toBeTruthy();
|
|
23
|
-
expect(script!.textContent).toContain(
|
|
27
|
+
expect(script!.textContent).toContain("<div");
|
|
24
28
|
});
|
|
25
29
|
|
|
26
30
|
it("should handle script with template literals containing HTML", () => {
|
|
27
|
-
const doc = parseHTML(
|
|
28
|
-
const script = doc.querySelector(
|
|
31
|
+
const doc = parseHTML("<script>const tpl = `<div>${name}</div>`;</script>");
|
|
32
|
+
const script = doc.querySelector("script");
|
|
29
33
|
expect(script).toBeTruthy();
|
|
30
|
-
expect(script!.textContent).toContain(
|
|
34
|
+
expect(script!.textContent).toContain("<div>");
|
|
31
35
|
});
|
|
32
36
|
|
|
33
37
|
it("should handle multiple scripts with complex content", () => {
|
|
@@ -36,84 +40,86 @@ describe("Edge Cases: Scripts with HTML inside", () => {
|
|
|
36
40
|
<script>var b = 3 > 1;</script>
|
|
37
41
|
`;
|
|
38
42
|
const doc = parseHTML(html);
|
|
39
|
-
const scripts = doc.querySelectorAll(
|
|
43
|
+
const scripts = doc.querySelectorAll("script");
|
|
40
44
|
expect(scripts.length).toBe(2);
|
|
41
45
|
});
|
|
42
46
|
});
|
|
43
47
|
|
|
44
48
|
describe("Edge Cases: Template placeholders", () => {
|
|
45
49
|
it("should preserve template placeholders", () => {
|
|
46
|
-
const doc = parseHTML(
|
|
47
|
-
const div = doc.querySelector(
|
|
48
|
-
expect(div!.textContent).toBe(
|
|
50
|
+
const doc = parseHTML("<div>{{user.name}}</div>");
|
|
51
|
+
const div = doc.querySelector("div");
|
|
52
|
+
expect(div!.textContent).toBe("{{user.name}}");
|
|
49
53
|
});
|
|
50
54
|
|
|
51
55
|
it("should preserve placeholders in attributes", () => {
|
|
52
56
|
const doc = parseHTML('<img src="{{imageUrl}}">');
|
|
53
|
-
const img = doc.querySelector(
|
|
54
|
-
expect(img!.getAttribute(
|
|
57
|
+
const img = doc.querySelector("img");
|
|
58
|
+
expect(img!.getAttribute("src")).toBe("{{imageUrl}}");
|
|
55
59
|
});
|
|
56
60
|
|
|
57
61
|
it("should preserve custom attribute prefixes", () => {
|
|
58
|
-
const doc = parseHTML(
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
expect(div!.getAttribute(
|
|
62
|
+
const doc = parseHTML(
|
|
63
|
+
'<div data-bind="value" x-on:click="handler" custom-attr="test"></div>',
|
|
64
|
+
);
|
|
65
|
+
const div = doc.querySelector("div");
|
|
66
|
+
expect(div!.getAttribute("data-bind")).toBe("value");
|
|
67
|
+
expect(div!.getAttribute("x-on:click")).toBe("handler");
|
|
68
|
+
expect(div!.getAttribute("custom-attr")).toBe("test");
|
|
63
69
|
});
|
|
64
70
|
|
|
65
71
|
it("should preserve EJS/ERB style placeholders", () => {
|
|
66
|
-
const doc = parseHTML(
|
|
67
|
-
const div = doc.querySelector(
|
|
68
|
-
expect(div!.textContent).toBe(
|
|
72
|
+
const doc = parseHTML("<div><%= user.name %></div>");
|
|
73
|
+
const div = doc.querySelector("div");
|
|
74
|
+
expect(div!.textContent).toBe("<%= user.name %>");
|
|
69
75
|
});
|
|
70
76
|
|
|
71
77
|
it("should handle nested template expressions", () => {
|
|
72
|
-
const doc = parseHTML(
|
|
73
|
-
const div = doc.querySelector(
|
|
74
|
-
expect(div!.textContent).toContain(
|
|
75
|
-
expect(div!.textContent).toContain(
|
|
78
|
+
const doc = parseHTML("<div>{{#each items}}{{this}}{{/each}}</div>");
|
|
79
|
+
const div = doc.querySelector("div");
|
|
80
|
+
expect(div!.textContent).toContain("{{#each items}}");
|
|
81
|
+
expect(div!.textContent).toContain("{{/each}}");
|
|
76
82
|
});
|
|
77
83
|
});
|
|
78
84
|
|
|
79
85
|
describe("Edge Cases: Malformed but common HTML", () => {
|
|
80
86
|
it("should handle unclosed paragraph tags", () => {
|
|
81
|
-
const doc = parseHTML(
|
|
82
|
-
const paragraphs = doc.querySelectorAll(
|
|
87
|
+
const doc = parseHTML("<p>Párrafo 1<p>Párrafo 2");
|
|
88
|
+
const paragraphs = doc.querySelectorAll("p");
|
|
83
89
|
expect(paragraphs.length).toBe(2);
|
|
84
90
|
});
|
|
85
91
|
|
|
86
92
|
it("should handle list items without parent list", () => {
|
|
87
|
-
const doc = parseHTML(
|
|
88
|
-
const items = doc.querySelectorAll(
|
|
93
|
+
const doc = parseHTML("<li>Item 1</li><li>Item 2</li>");
|
|
94
|
+
const items = doc.querySelectorAll("li");
|
|
89
95
|
expect(items.length).toBe(2);
|
|
90
96
|
});
|
|
91
97
|
|
|
92
98
|
it("should handle nested unclosed tags", () => {
|
|
93
|
-
const doc = parseHTML(
|
|
94
|
-
const div = doc.querySelector(
|
|
99
|
+
const doc = parseHTML("<div><span>Text<div>Nested</div></span></div>");
|
|
100
|
+
const div = doc.querySelector("div");
|
|
95
101
|
expect(div).toBeTruthy();
|
|
96
102
|
});
|
|
97
103
|
|
|
98
104
|
it("should handle missing closing tags at end", () => {
|
|
99
|
-
const doc = parseHTML(
|
|
100
|
-
const div = doc.querySelector(
|
|
101
|
-
const span = doc.querySelector(
|
|
105
|
+
const doc = parseHTML("<div><span>Text");
|
|
106
|
+
const div = doc.querySelector("div");
|
|
107
|
+
const span = doc.querySelector("span");
|
|
102
108
|
expect(div).toBeTruthy();
|
|
103
109
|
expect(span).toBeTruthy();
|
|
104
110
|
});
|
|
105
111
|
|
|
106
112
|
it("should handle extra closing tags", () => {
|
|
107
|
-
const doc = parseHTML(
|
|
108
|
-
const div = doc.querySelector(
|
|
113
|
+
const doc = parseHTML("<div>Text</div></div></span>");
|
|
114
|
+
const div = doc.querySelector("div");
|
|
109
115
|
expect(div).toBeTruthy();
|
|
110
|
-
expect(div!.textContent).toBe(
|
|
116
|
+
expect(div!.textContent).toBe("Text");
|
|
111
117
|
});
|
|
112
118
|
|
|
113
119
|
it("should handle incorrectly nested tags", () => {
|
|
114
|
-
const doc = parseHTML(
|
|
115
|
-
const b = doc.querySelector(
|
|
116
|
-
const i = doc.querySelector(
|
|
120
|
+
const doc = parseHTML("<b><i>Bold and italic</b></i>");
|
|
121
|
+
const b = doc.querySelector("b");
|
|
122
|
+
const i = doc.querySelector("i");
|
|
117
123
|
expect(b).toBeTruthy();
|
|
118
124
|
expect(i).toBeTruthy();
|
|
119
125
|
});
|
|
@@ -121,97 +127,105 @@ describe("Edge Cases: Malformed but common HTML", () => {
|
|
|
121
127
|
|
|
122
128
|
describe("Edge Cases: Significant whitespace", () => {
|
|
123
129
|
it("should preserve whitespace in pre tags", () => {
|
|
124
|
-
const doc = parseHTML(
|
|
125
|
-
const pre = doc.querySelector(
|
|
126
|
-
expect(pre!.textContent).toBe(
|
|
130
|
+
const doc = parseHTML("<pre> multiple spaces here </pre>");
|
|
131
|
+
const pre = doc.querySelector("pre");
|
|
132
|
+
expect(pre!.textContent).toBe(" multiple spaces here ");
|
|
127
133
|
});
|
|
128
134
|
|
|
129
135
|
it("should preserve newlines in pre tags", () => {
|
|
130
|
-
const doc = parseHTML(
|
|
131
|
-
const pre = doc.querySelector(
|
|
132
|
-
expect(pre!.textContent).toContain(
|
|
136
|
+
const doc = parseHTML("<pre>line1\nline2\nline3</pre>");
|
|
137
|
+
const pre = doc.querySelector("pre");
|
|
138
|
+
expect(pre!.textContent).toContain("\n");
|
|
133
139
|
});
|
|
134
140
|
|
|
135
141
|
it("should preserve whitespace in code tags", () => {
|
|
136
|
-
const doc = parseHTML(
|
|
137
|
-
const code = doc.querySelector(
|
|
138
|
-
expect(code!.textContent).toBe(
|
|
142
|
+
const doc = parseHTML("<code>function() { }</code>");
|
|
143
|
+
const code = doc.querySelector("code");
|
|
144
|
+
expect(code!.textContent).toBe("function() { }");
|
|
139
145
|
});
|
|
140
146
|
|
|
141
147
|
it("should preserve whitespace in textarea", () => {
|
|
142
|
-
const doc = parseHTML(
|
|
143
|
-
|
|
144
|
-
|
|
148
|
+
const doc = parseHTML(
|
|
149
|
+
"<textarea> indented\n more indented </textarea>",
|
|
150
|
+
);
|
|
151
|
+
const textarea = doc.querySelector("textarea");
|
|
152
|
+
expect(textarea!.textContent).toContain(" indented");
|
|
145
153
|
});
|
|
146
154
|
|
|
147
155
|
it("should handle tabs in pre", () => {
|
|
148
|
-
const doc = parseHTML(
|
|
149
|
-
const pre = doc.querySelector(
|
|
150
|
-
expect(pre!.textContent).toBe(
|
|
156
|
+
const doc = parseHTML("<pre>\ttab\t\ttabs</pre>");
|
|
157
|
+
const pre = doc.querySelector("pre");
|
|
158
|
+
expect(pre!.textContent).toBe("\ttab\t\ttabs");
|
|
151
159
|
});
|
|
152
160
|
});
|
|
153
161
|
|
|
154
162
|
describe("Edge Cases: Special characters in attributes", () => {
|
|
155
163
|
it("should handle JSON in data attributes", () => {
|
|
156
|
-
const doc = parseHTML(
|
|
157
|
-
|
|
158
|
-
|
|
164
|
+
const doc = parseHTML(
|
|
165
|
+
'<div data-json=\'{"key": "value", "num": 123}\'></div>',
|
|
166
|
+
);
|
|
167
|
+
const div = doc.querySelector("div");
|
|
168
|
+
const json = div!.getAttribute("data-json");
|
|
159
169
|
expect(json).toBe('{"key": "value", "num": 123}');
|
|
160
170
|
});
|
|
161
171
|
|
|
162
172
|
it("should handle double quotes inside single-quoted attributes", () => {
|
|
163
173
|
const doc = parseHTML("<div title='He said \"Hello\"'></div>");
|
|
164
|
-
const div = doc.querySelector(
|
|
165
|
-
expect(div!.getAttribute(
|
|
174
|
+
const div = doc.querySelector("div");
|
|
175
|
+
expect(div!.getAttribute("title")).toBe('He said "Hello"');
|
|
166
176
|
});
|
|
167
177
|
|
|
168
178
|
it("should handle single quotes inside double-quoted attributes", () => {
|
|
169
179
|
const doc = parseHTML('<div title="It\'s working"></div>');
|
|
170
|
-
const div = doc.querySelector(
|
|
171
|
-
expect(div!.getAttribute(
|
|
180
|
+
const div = doc.querySelector("div");
|
|
181
|
+
expect(div!.getAttribute("title")).toBe("It's working");
|
|
172
182
|
});
|
|
173
183
|
|
|
174
184
|
it("should handle HTML entities in attributes", () => {
|
|
175
185
|
const doc = parseHTML('<div title="<html>"></div>');
|
|
176
|
-
const div = doc.querySelector(
|
|
186
|
+
const div = doc.querySelector("div");
|
|
177
187
|
// Depending on parser behavior, entities may or may not be decoded
|
|
178
|
-
const title = div!.getAttribute(
|
|
179
|
-
expect(title ===
|
|
188
|
+
const title = div!.getAttribute("title");
|
|
189
|
+
expect(title === "<html>" || title === "<html>").toBe(true);
|
|
180
190
|
});
|
|
181
191
|
|
|
182
192
|
it("should handle unicode characters in attributes", () => {
|
|
183
193
|
const doc = parseHTML('<div title="Hello 世界 🌍"></div>');
|
|
184
|
-
const div = doc.querySelector(
|
|
185
|
-
expect(div!.getAttribute(
|
|
194
|
+
const div = doc.querySelector("div");
|
|
195
|
+
expect(div!.getAttribute("title")).toBe("Hello 世界 🌍");
|
|
186
196
|
});
|
|
187
197
|
|
|
188
198
|
it("should handle newlines in attributes", () => {
|
|
189
199
|
const doc = parseHTML('<div title="line1\nline2"></div>');
|
|
190
|
-
const div = doc.querySelector(
|
|
191
|
-
expect(div!.getAttribute(
|
|
200
|
+
const div = doc.querySelector("div");
|
|
201
|
+
expect(div!.getAttribute("title")).toContain("line1");
|
|
192
202
|
});
|
|
193
203
|
|
|
194
204
|
it("should handle equals signs in attribute values", () => {
|
|
195
205
|
const doc = parseHTML('<div data-equation="a=b+c"></div>');
|
|
196
|
-
const div = doc.querySelector(
|
|
197
|
-
expect(div!.getAttribute(
|
|
206
|
+
const div = doc.querySelector("div");
|
|
207
|
+
expect(div!.getAttribute("data-equation")).toBe("a=b+c");
|
|
198
208
|
});
|
|
199
209
|
});
|
|
200
210
|
|
|
201
211
|
describe("Edge Cases: SVG and MathML inline", () => {
|
|
202
212
|
it("should parse inline SVG", () => {
|
|
203
|
-
const doc = parseHTML(
|
|
204
|
-
|
|
213
|
+
const doc = parseHTML(
|
|
214
|
+
'<svg width="100" height="100"><circle cx="50" cy="50" r="40"/></svg>',
|
|
215
|
+
);
|
|
216
|
+
const svg = doc.querySelector("svg");
|
|
205
217
|
expect(svg).toBeTruthy();
|
|
206
|
-
expect(svg!.getAttribute(
|
|
218
|
+
expect(svg!.getAttribute("width")).toBe("100");
|
|
207
219
|
});
|
|
208
220
|
|
|
209
221
|
it("should parse SVG with nested elements", () => {
|
|
210
|
-
const doc = parseHTML(
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
const
|
|
214
|
-
const
|
|
222
|
+
const doc = parseHTML(
|
|
223
|
+
'<svg><g><rect width="10" height="10"/><text>Hello</text></g></svg>',
|
|
224
|
+
);
|
|
225
|
+
const svg = doc.querySelector("svg");
|
|
226
|
+
const g = doc.querySelector("g");
|
|
227
|
+
const rect = doc.querySelector("rect");
|
|
228
|
+
const text = doc.querySelector("text");
|
|
215
229
|
expect(svg).toBeTruthy();
|
|
216
230
|
expect(g).toBeTruthy();
|
|
217
231
|
expect(rect).toBeTruthy();
|
|
@@ -219,25 +233,27 @@ describe("Edge Cases: SVG and MathML inline", () => {
|
|
|
219
233
|
});
|
|
220
234
|
|
|
221
235
|
it("should handle foreignObject with HTML inside SVG", () => {
|
|
222
|
-
const doc = parseHTML(
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
const
|
|
236
|
+
const doc = parseHTML(
|
|
237
|
+
"<svg><foreignObject><div>HTML inside SVG</div></foreignObject></svg>",
|
|
238
|
+
);
|
|
239
|
+
const svg = doc.querySelector("svg");
|
|
240
|
+
const fo = doc.querySelector("foreignObject");
|
|
241
|
+
const div = doc.querySelector("div");
|
|
226
242
|
expect(svg).toBeTruthy();
|
|
227
243
|
expect(fo).toBeTruthy();
|
|
228
244
|
expect(div).toBeTruthy();
|
|
229
245
|
});
|
|
230
246
|
|
|
231
247
|
it("should parse inline MathML", () => {
|
|
232
|
-
const doc = parseHTML(
|
|
233
|
-
const math = doc.querySelector(
|
|
248
|
+
const doc = parseHTML("<math><mi>x</mi><mo>=</mo><mn>2</mn></math>");
|
|
249
|
+
const math = doc.querySelector("math");
|
|
234
250
|
expect(math).toBeTruthy();
|
|
235
251
|
});
|
|
236
252
|
|
|
237
253
|
it("should handle SVG with CDATA-like content in style", () => {
|
|
238
|
-
const doc = parseHTML(
|
|
239
|
-
const svg = doc.querySelector(
|
|
240
|
-
const style = doc.querySelector(
|
|
254
|
+
const doc = parseHTML("<svg><style>/* styles */</style></svg>");
|
|
255
|
+
const svg = doc.querySelector("svg");
|
|
256
|
+
const style = doc.querySelector("style");
|
|
241
257
|
expect(svg).toBeTruthy();
|
|
242
258
|
expect(style).toBeTruthy();
|
|
243
259
|
});
|
|
@@ -245,20 +261,22 @@ describe("Edge Cases: SVG and MathML inline", () => {
|
|
|
245
261
|
|
|
246
262
|
describe("Edge Cases: CDATA sections", () => {
|
|
247
263
|
it("should handle CDATA in script", () => {
|
|
248
|
-
const doc = parseHTML(
|
|
249
|
-
const script = doc.querySelector(
|
|
264
|
+
const doc = parseHTML("<script>//<![CDATA[\nvar x = 1;\n//]]></script>");
|
|
265
|
+
const script = doc.querySelector("script");
|
|
250
266
|
expect(script).toBeTruthy();
|
|
251
267
|
});
|
|
252
268
|
|
|
253
269
|
it("should handle CDATA in style", () => {
|
|
254
|
-
const doc = parseHTML(
|
|
255
|
-
|
|
270
|
+
const doc = parseHTML(
|
|
271
|
+
"<style>/*<![CDATA[*/ body { color: red; } /*]]>*/</style>",
|
|
272
|
+
);
|
|
273
|
+
const style = doc.querySelector("style");
|
|
256
274
|
expect(style).toBeTruthy();
|
|
257
275
|
});
|
|
258
276
|
|
|
259
277
|
it("should handle XML CDATA sections", () => {
|
|
260
|
-
const doc = parseHTML(
|
|
261
|
-
const div = doc.querySelector(
|
|
278
|
+
const doc = parseHTML("<div><![CDATA[Some <special> content]]></div>");
|
|
279
|
+
const div = doc.querySelector("div");
|
|
262
280
|
expect(div).toBeTruthy();
|
|
263
281
|
});
|
|
264
282
|
});
|
|
@@ -271,187 +289,295 @@ describe("Edge Cases: IE Conditional Comments", () => {
|
|
|
271
289
|
});
|
|
272
290
|
|
|
273
291
|
it("should handle IE conditional with version", () => {
|
|
274
|
-
const doc = parseHTML(
|
|
292
|
+
const doc = parseHTML(
|
|
293
|
+
'<!--[if lt IE 9]><script src="html5shiv.js"></script><![endif]-->',
|
|
294
|
+
);
|
|
275
295
|
expect(doc).toBeTruthy();
|
|
276
296
|
});
|
|
277
297
|
|
|
278
298
|
it("should handle downlevel-hidden conditional", () => {
|
|
279
|
-
const doc = parseHTML(
|
|
299
|
+
const doc = parseHTML(
|
|
300
|
+
'<!--[if !IE]>--><link href="modern.css"><!--<![endif]-->',
|
|
301
|
+
);
|
|
280
302
|
expect(doc).toBeTruthy();
|
|
281
303
|
});
|
|
282
304
|
|
|
283
305
|
it("should preserve content around conditional comments", () => {
|
|
284
|
-
const doc = parseHTML(
|
|
285
|
-
|
|
306
|
+
const doc = parseHTML(
|
|
307
|
+
"<div>Before</div><!--[if IE]>IE only<![endif]--><div>After</div>",
|
|
308
|
+
);
|
|
309
|
+
const divs = doc.querySelectorAll("div");
|
|
286
310
|
expect(divs.length).toBe(2);
|
|
287
|
-
expect(divs[0].textContent).toBe(
|
|
288
|
-
expect(divs[1].textContent).toBe(
|
|
311
|
+
expect(divs[0].textContent).toBe("Before");
|
|
312
|
+
expect(divs[1].textContent).toBe("After");
|
|
289
313
|
});
|
|
290
314
|
});
|
|
291
315
|
|
|
292
316
|
describe("Edge Cases: innerHTML on special elements", () => {
|
|
293
317
|
it("should handle innerHTML on table with tr/td", () => {
|
|
294
|
-
const doc = parseHTML(
|
|
295
|
-
const table = doc.querySelector(
|
|
296
|
-
table!.innerHTML =
|
|
318
|
+
const doc = parseHTML("<table></table>");
|
|
319
|
+
const table = doc.querySelector("table");
|
|
320
|
+
table!.innerHTML = "<tr><td>Cell 1</td><td>Cell 2</td></tr>";
|
|
297
321
|
// Browser auto-wraps in tbody, parser behavior may vary
|
|
298
|
-
const tr = table!.querySelector(
|
|
299
|
-
const tds = table!.querySelectorAll(
|
|
322
|
+
const tr = table!.querySelector("tr");
|
|
323
|
+
const tds = table!.querySelectorAll("td");
|
|
300
324
|
expect(tr).toBeTruthy();
|
|
301
325
|
expect(tds.length).toBe(2);
|
|
302
326
|
});
|
|
303
327
|
|
|
304
328
|
it("should handle innerHTML on select with options", () => {
|
|
305
|
-
const doc = parseHTML(
|
|
306
|
-
const select = doc.querySelector(
|
|
307
|
-
select!.innerHTML =
|
|
308
|
-
|
|
329
|
+
const doc = parseHTML("<select></select>");
|
|
330
|
+
const select = doc.querySelector("select");
|
|
331
|
+
select!.innerHTML =
|
|
332
|
+
'<option value="1">One</option><option value="2">Two</option>';
|
|
333
|
+
const options = select!.querySelectorAll("option");
|
|
309
334
|
expect(options.length).toBe(2);
|
|
310
335
|
});
|
|
311
336
|
|
|
312
337
|
it("should handle innerHTML on ul with li", () => {
|
|
313
|
-
const doc = parseHTML(
|
|
314
|
-
const ul = doc.querySelector(
|
|
315
|
-
ul!.innerHTML =
|
|
316
|
-
const items = ul!.querySelectorAll(
|
|
338
|
+
const doc = parseHTML("<ul></ul>");
|
|
339
|
+
const ul = doc.querySelector("ul");
|
|
340
|
+
ul!.innerHTML = "<li>Item 1</li><li>Item 2</li><li>Item 3</li>";
|
|
341
|
+
const items = ul!.querySelectorAll("li");
|
|
317
342
|
expect(items.length).toBe(3);
|
|
318
343
|
});
|
|
319
344
|
|
|
320
345
|
it("should handle innerHTML on template element", () => {
|
|
321
|
-
const doc = parseHTML(
|
|
322
|
-
const template = doc.querySelector(
|
|
323
|
-
template!.innerHTML =
|
|
346
|
+
const doc = parseHTML("<template></template>");
|
|
347
|
+
const template = doc.querySelector("template");
|
|
348
|
+
template!.innerHTML = "<div>Template content</div>";
|
|
324
349
|
// Template behavior is special in browsers
|
|
325
350
|
expect(template).toBeTruthy();
|
|
326
351
|
});
|
|
327
352
|
|
|
328
353
|
it("should handle innerHTML on colgroup", () => {
|
|
329
|
-
const doc = parseHTML(
|
|
330
|
-
const colgroup = doc.querySelector(
|
|
354
|
+
const doc = parseHTML("<table><colgroup></colgroup></table>");
|
|
355
|
+
const colgroup = doc.querySelector("colgroup");
|
|
331
356
|
colgroup!.innerHTML = '<col span="2"><col>';
|
|
332
|
-
const cols = colgroup!.querySelectorAll(
|
|
357
|
+
const cols = colgroup!.querySelectorAll("col");
|
|
333
358
|
expect(cols.length).toBe(2);
|
|
334
359
|
});
|
|
335
360
|
|
|
336
361
|
it("should handle innerHTML replacement multiple times on table", () => {
|
|
337
|
-
const doc = parseHTML(
|
|
338
|
-
const table = doc.querySelector(
|
|
339
|
-
|
|
340
|
-
table!.innerHTML =
|
|
341
|
-
expect(table!.querySelector(
|
|
342
|
-
|
|
343
|
-
table!.innerHTML =
|
|
344
|
-
expect(table!.querySelector(
|
|
362
|
+
const doc = parseHTML("<table></table>");
|
|
363
|
+
const table = doc.querySelector("table");
|
|
364
|
+
|
|
365
|
+
table!.innerHTML = "<tr><td>First</td></tr>";
|
|
366
|
+
expect(table!.querySelector("td")!.textContent).toBe("First");
|
|
367
|
+
|
|
368
|
+
table!.innerHTML = "<tr><td>Second</td></tr>";
|
|
369
|
+
expect(table!.querySelector("td")!.textContent).toBe("Second");
|
|
345
370
|
});
|
|
346
371
|
});
|
|
347
372
|
|
|
348
373
|
describe("Edge Cases: Self-closing tags in HTML5", () => {
|
|
349
374
|
it("should handle self-closing div (invalid in HTML5)", () => {
|
|
350
|
-
const doc = parseHTML(
|
|
351
|
-
const div = doc.querySelector(
|
|
375
|
+
const doc = parseHTML("<div/>");
|
|
376
|
+
const div = doc.querySelector("div");
|
|
352
377
|
expect(div).toBeTruthy();
|
|
353
378
|
// In HTML5, <div/> is treated as <div> (not closed)
|
|
354
379
|
});
|
|
355
380
|
|
|
356
381
|
it("should handle self-closing span", () => {
|
|
357
|
-
const doc = parseHTML(
|
|
358
|
-
const span = doc.querySelector(
|
|
382
|
+
const doc = parseHTML("<span/>text");
|
|
383
|
+
const span = doc.querySelector("span");
|
|
359
384
|
expect(span).toBeTruthy();
|
|
360
385
|
});
|
|
361
386
|
|
|
362
387
|
it("should handle valid self-closing void elements", () => {
|
|
363
|
-
const doc = parseHTML(
|
|
364
|
-
const br = doc.querySelector(
|
|
365
|
-
const hr = doc.querySelector(
|
|
366
|
-
const img = doc.querySelector(
|
|
388
|
+
const doc = parseHTML("<br/><hr/><img/>");
|
|
389
|
+
const br = doc.querySelector("br");
|
|
390
|
+
const hr = doc.querySelector("hr");
|
|
391
|
+
const img = doc.querySelector("img");
|
|
367
392
|
expect(br).toBeTruthy();
|
|
368
393
|
expect(hr).toBeTruthy();
|
|
369
394
|
expect(img).toBeTruthy();
|
|
370
395
|
});
|
|
371
396
|
|
|
372
397
|
it("should handle self-closing with space before slash", () => {
|
|
373
|
-
const doc = parseHTML(
|
|
374
|
-
const br = doc.querySelector(
|
|
398
|
+
const doc = parseHTML("<br />");
|
|
399
|
+
const br = doc.querySelector("br");
|
|
375
400
|
expect(br).toBeTruthy();
|
|
376
401
|
});
|
|
377
402
|
|
|
378
403
|
it("should handle self-closing in XHTML style", () => {
|
|
379
404
|
const doc = parseHTML('<input type="text" />');
|
|
380
|
-
const input = doc.querySelector(
|
|
405
|
+
const input = doc.querySelector("input");
|
|
381
406
|
expect(input).toBeTruthy();
|
|
382
|
-
expect(input!.getAttribute(
|
|
407
|
+
expect(input!.getAttribute("type")).toBe("text");
|
|
383
408
|
});
|
|
384
409
|
|
|
385
410
|
it("should handle mixed self-closing styles", () => {
|
|
386
|
-
const doc = parseHTML(
|
|
387
|
-
const brs = doc.querySelectorAll(
|
|
411
|
+
const doc = parseHTML("<div><br><br/><br /></div>");
|
|
412
|
+
const brs = doc.querySelectorAll("br");
|
|
388
413
|
expect(brs.length).toBe(3);
|
|
389
414
|
});
|
|
390
415
|
});
|
|
391
416
|
|
|
392
417
|
describe("Edge Cases: Additional common scenarios", () => {
|
|
393
418
|
it("should handle empty attributes", () => {
|
|
394
|
-
const doc = parseHTML(
|
|
395
|
-
const input = doc.querySelector(
|
|
396
|
-
expect(input!.hasAttribute(
|
|
397
|
-
expect(input!.hasAttribute(
|
|
419
|
+
const doc = parseHTML("<input disabled readonly>");
|
|
420
|
+
const input = doc.querySelector("input");
|
|
421
|
+
expect(input!.hasAttribute("disabled")).toBe(true);
|
|
422
|
+
expect(input!.hasAttribute("readonly")).toBe(true);
|
|
398
423
|
});
|
|
399
424
|
|
|
400
425
|
it("should handle unquoted attribute values", () => {
|
|
401
|
-
const doc = parseHTML(
|
|
402
|
-
const div = doc.querySelector(
|
|
403
|
-
expect(div!.getAttribute(
|
|
404
|
-
expect(div!.getAttribute(
|
|
426
|
+
const doc = parseHTML("<div class=myclass id=myid></div>");
|
|
427
|
+
const div = doc.querySelector("div");
|
|
428
|
+
expect(div!.getAttribute("class")).toBe("myclass");
|
|
429
|
+
expect(div!.getAttribute("id")).toBe("myid");
|
|
405
430
|
});
|
|
406
431
|
|
|
407
432
|
it("should handle attributes with no value", () => {
|
|
408
|
-
const doc = parseHTML(
|
|
409
|
-
const option = doc.querySelector(
|
|
410
|
-
expect(option!.hasAttribute(
|
|
433
|
+
const doc = parseHTML("<option selected>Choice</option>");
|
|
434
|
+
const option = doc.querySelector("option");
|
|
435
|
+
expect(option!.hasAttribute("selected")).toBe(true);
|
|
411
436
|
});
|
|
412
437
|
|
|
413
438
|
it("should handle doctype", () => {
|
|
414
|
-
const doc = parseHTML(
|
|
439
|
+
const doc = parseHTML("<!DOCTYPE html><html><body>Test</body></html>");
|
|
415
440
|
expect(doc.body).toBeTruthy();
|
|
416
|
-
expect(doc.body.textContent).toBe(
|
|
441
|
+
expect(doc.body.textContent).toBe("Test");
|
|
417
442
|
});
|
|
418
443
|
|
|
419
444
|
it("should handle comments between tags", () => {
|
|
420
|
-
const doc = parseHTML(
|
|
421
|
-
const span = doc.querySelector(
|
|
445
|
+
const doc = parseHTML("<div><!-- comment --><span>Text</span></div>");
|
|
446
|
+
const span = doc.querySelector("span");
|
|
422
447
|
expect(span).toBeTruthy();
|
|
423
|
-
expect(span!.textContent).toBe(
|
|
448
|
+
expect(span!.textContent).toBe("Text");
|
|
424
449
|
});
|
|
425
450
|
|
|
426
451
|
it("should handle deeply nested structure", () => {
|
|
427
|
-
const doc = parseHTML(
|
|
428
|
-
|
|
452
|
+
const doc = parseHTML(
|
|
453
|
+
"<div><div><div><div><div><span>Deep</span></div></div></div></div></div>",
|
|
454
|
+
);
|
|
455
|
+
const span = doc.querySelector("span");
|
|
429
456
|
expect(span).toBeTruthy();
|
|
430
|
-
expect(span!.textContent).toBe(
|
|
457
|
+
expect(span!.textContent).toBe("Deep");
|
|
431
458
|
});
|
|
432
459
|
|
|
433
460
|
it("should handle adjacent text nodes conceptually", () => {
|
|
434
|
-
const doc = parseHTML(
|
|
435
|
-
const div = doc.querySelector(
|
|
436
|
-
expect(div!.textContent).toBe(
|
|
461
|
+
const doc = parseHTML("<div>Hello World</div>");
|
|
462
|
+
const div = doc.querySelector("div");
|
|
463
|
+
expect(div!.textContent).toBe("Hello World");
|
|
437
464
|
});
|
|
438
465
|
|
|
439
466
|
it("should handle style tags with CSS", () => {
|
|
440
|
-
const doc = parseHTML(
|
|
441
|
-
|
|
467
|
+
const doc = parseHTML(
|
|
468
|
+
"<style>.class { color: red; } #id > div { margin: 0; }</style>",
|
|
469
|
+
);
|
|
470
|
+
const style = doc.querySelector("style");
|
|
442
471
|
expect(style).toBeTruthy();
|
|
443
|
-
expect(style!.textContent).toContain(
|
|
472
|
+
expect(style!.textContent).toContain("color: red");
|
|
444
473
|
});
|
|
445
474
|
|
|
446
475
|
it("should handle noscript content", () => {
|
|
447
|
-
const doc = parseHTML(
|
|
448
|
-
|
|
476
|
+
const doc = parseHTML(
|
|
477
|
+
"<noscript><div>Please enable JavaScript</div></noscript>",
|
|
478
|
+
);
|
|
479
|
+
const noscript = doc.querySelector("noscript");
|
|
449
480
|
expect(noscript).toBeTruthy();
|
|
450
481
|
});
|
|
451
482
|
|
|
452
483
|
it("should handle data URLs in attributes", () => {
|
|
453
484
|
const doc = parseHTML('<img src="data:image/png;base64,iVBORw0KGgo=">');
|
|
454
|
-
const img = doc.querySelector(
|
|
455
|
-
expect(img!.getAttribute(
|
|
485
|
+
const img = doc.querySelector("img");
|
|
486
|
+
expect(img!.getAttribute("src")).toContain("data:image/png");
|
|
487
|
+
});
|
|
488
|
+
});
|
|
489
|
+
|
|
490
|
+
describe("innerHTML with void elements", () => {
|
|
491
|
+
it("should correctly set innerHTML on void elements like meta tags", () => {
|
|
492
|
+
// Create a document with a custom element
|
|
493
|
+
const html = "<html><body><custom-meta></custom-meta></body></html>";
|
|
494
|
+
const doc = parseHTML(html);
|
|
495
|
+
const customElement = doc.querySelector("custom-meta");
|
|
496
|
+
|
|
497
|
+
// Verify element exists
|
|
498
|
+
expect(customElement).not.toBeNull();
|
|
499
|
+
expect(customElement?.tagName?.toLowerCase()).toBe("custom-meta");
|
|
500
|
+
|
|
501
|
+
// Set innerHTML with meta tags (void elements)
|
|
502
|
+
const metaContent = `<meta name="description" content="Test description">
|
|
503
|
+
<meta name="keywords" content="test, keywords">
|
|
504
|
+
<meta property="og:title" content="Test Title">`;
|
|
505
|
+
|
|
506
|
+
customElement!.innerHTML = metaContent;
|
|
507
|
+
|
|
508
|
+
// The bug: innerHTML should contain the meta tags
|
|
509
|
+
expect(customElement!.innerHTML).toBe(metaContent);
|
|
510
|
+
expect(customElement!.innerHTML.length).toBeGreaterThan(0);
|
|
511
|
+
|
|
512
|
+
// outerHTML should reflect the change
|
|
513
|
+
expect(customElement!.outerHTML).toContain('<meta name="description"');
|
|
514
|
+
expect(customElement!.outerHTML).toContain('<meta name="keywords"');
|
|
515
|
+
expect(customElement!.outerHTML).toContain('<meta property="og:title"');
|
|
516
|
+
|
|
517
|
+
// Should have child nodes
|
|
518
|
+
expect(customElement!.childNodes.length).toBeGreaterThan(0);
|
|
519
|
+
});
|
|
520
|
+
|
|
521
|
+
it("should handle innerHTML assignment in subprocess context", async () => {
|
|
522
|
+
// This test simulates what happens in the component wrapper execution
|
|
523
|
+
const wrapperCode = `
|
|
524
|
+
import { parseHTML } from "${process.cwd()}/src/index.ts";
|
|
525
|
+
|
|
526
|
+
const html = '<html><body><meta-tags></meta-tags></body></html>';
|
|
527
|
+
const doc = parseHTML(html);
|
|
528
|
+
const com = doc.querySelector('meta-tags');
|
|
529
|
+
|
|
530
|
+
if (!com) {
|
|
531
|
+
throw new Error("Element not found");
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
// This is what component code does
|
|
535
|
+
com.innerHTML = \`<meta name="description" content="Test">
|
|
536
|
+
<meta name="keywords" content="test">\`;
|
|
537
|
+
|
|
538
|
+
// Return the result
|
|
539
|
+
console.log(JSON.stringify({
|
|
540
|
+
innerHTML: com.innerHTML,
|
|
541
|
+
outerHTML: com.outerHTML,
|
|
542
|
+
childCount: com.childNodes.length
|
|
543
|
+
}));
|
|
544
|
+
`;
|
|
545
|
+
|
|
546
|
+
// Execute the wrapper code in a subprocess (simulating Bun.spawn)
|
|
547
|
+
const proc = Bun.spawn(["bun", "-e", wrapperCode], {
|
|
548
|
+
stdout: "pipe",
|
|
549
|
+
stderr: "pipe",
|
|
550
|
+
cwd: process.cwd(), // Set working directory to current project directory
|
|
551
|
+
});
|
|
552
|
+
|
|
553
|
+
const exitCode = await proc.exited;
|
|
554
|
+
|
|
555
|
+
if (exitCode !== 0) {
|
|
556
|
+
const stderr = await new Response(proc.stderr).text();
|
|
557
|
+
throw new Error(`Subprocess failed: ${stderr}`);
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
const stdout = await new Response(proc.stdout).text();
|
|
561
|
+
const result = JSON.parse(stdout.trim());
|
|
562
|
+
|
|
563
|
+
// The bug manifests here: innerHTML should not be empty
|
|
564
|
+
expect(result.innerHTML).not.toBe("");
|
|
565
|
+
expect(result.innerHTML).toContain('<meta name="description"');
|
|
566
|
+
expect(result.childCount).toBeGreaterThan(0);
|
|
567
|
+
});
|
|
568
|
+
|
|
569
|
+
it("should work correctly with regular elements for comparison", () => {
|
|
570
|
+
// Test with a regular element to ensure it works as expected
|
|
571
|
+
const html = "<html><body><div></div></body></html>";
|
|
572
|
+
const doc = parseHTML(html);
|
|
573
|
+
const div = doc.querySelector("div");
|
|
574
|
+
|
|
575
|
+
expect(div).not.toBeNull();
|
|
576
|
+
|
|
577
|
+
div!.innerHTML = "<span>Test content</span><p>More content</p>";
|
|
578
|
+
|
|
579
|
+
expect(div!.innerHTML).toBe("<span>Test content</span><p>More content</p>");
|
|
580
|
+
expect(div!.childNodes.length).toBe(2);
|
|
581
|
+
expect(div!.outerHTML).toContain("<span>Test content</span>");
|
|
456
582
|
});
|
|
457
583
|
});
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { describe, it, expect } from "bun:test";
|
|
2
|
+
import { parseHTML } from "../src/index";
|
|
3
|
+
|
|
4
|
+
describe("HTML entities in text content", () => {
|
|
5
|
+
it("should preserve < and > entities when serializing innerHTML", () => {
|
|
6
|
+
const doc = parseHTML("<p><div></p>");
|
|
7
|
+
const p = doc.querySelector("p");
|
|
8
|
+
expect(p.innerHTML).toBe("<div>");
|
|
9
|
+
});
|
|
10
|
+
|
|
11
|
+
it("should preserve < and > in code elements", () => {
|
|
12
|
+
const doc = parseHTML("<code><script>alert('xss')</script></code>");
|
|
13
|
+
const code = doc.querySelector("code");
|
|
14
|
+
expect(code.innerHTML).toBe("<script>alert('xss')</script>");
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
it("should preserve & entity when serializing innerHTML", () => {
|
|
18
|
+
const doc = parseHTML("<span>foo & bar</span>");
|
|
19
|
+
const span = doc.querySelector("span");
|
|
20
|
+
expect(span.innerHTML).toBe("foo & bar");
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
it("should preserve mixed entities in text", () => {
|
|
24
|
+
const doc = parseHTML("<div><a href="test">link</a></div>");
|
|
25
|
+
const div = doc.querySelector("div");
|
|
26
|
+
expect(div.innerHTML).toBe('<a href="test">link</a>');
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
it("should handle textContent correctly (decoded)", () => {
|
|
30
|
+
const doc = parseHTML("<p><div></p>");
|
|
31
|
+
const p = doc.querySelector("p");
|
|
32
|
+
expect(p.textContent).toBe("<div>");
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it("should preserve entities in outerHTML", () => {
|
|
36
|
+
const doc = parseHTML("<p><test></p>");
|
|
37
|
+
const p = doc.querySelector("p");
|
|
38
|
+
expect(p.outerHTML).toBe("<p><test></p>");
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
it("should preserve entities in nested elements", () => {
|
|
42
|
+
const doc = parseHTML("<div><span><nested></span></div>");
|
|
43
|
+
const div = doc.querySelector("div");
|
|
44
|
+
expect(div.innerHTML).toBe("<span><nested></span>");
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
it("should handle multiple text nodes with entities", () => {
|
|
48
|
+
const doc = parseHTML("<p><first> and <second></p>");
|
|
49
|
+
const p = doc.querySelector("p");
|
|
50
|
+
expect(p.innerHTML).toBe("<first> and <second>");
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
it("should not double-escape already escaped content", () => {
|
|
54
|
+
const doc = parseHTML("<p>&lt;</p>");
|
|
55
|
+
const p = doc.querySelector("p");
|
|
56
|
+
expect(p.textContent).toBe("<");
|
|
57
|
+
expect(p.innerHTML).toBe("&lt;");
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
it("should preserve entities after DOM manipulation", () => {
|
|
61
|
+
const doc = parseHTML("<div></div>");
|
|
62
|
+
const div = doc.querySelector("div");
|
|
63
|
+
const text = doc.createTextNode("<script>alert('xss')</script>");
|
|
64
|
+
div.appendChild(text);
|
|
65
|
+
expect(div.innerHTML).toBe("<script>alert('xss')</script>");
|
|
66
|
+
});
|
|
67
|
+
});
|