@tkeron/html-parser 1.1.2 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/npm_deploy.yml +14 -4
- package/README.md +6 -6
- package/bun.lock +6 -8
- package/check-versions.ts +147 -0
- package/index.ts +4 -8
- package/package.json +5 -6
- package/src/dom-simulator/append-child.ts +130 -0
- package/src/dom-simulator/append.ts +18 -0
- package/src/dom-simulator/attributes.ts +23 -0
- package/src/dom-simulator/clone-node.ts +51 -0
- package/src/dom-simulator/convert-ast-node-to-dom.ts +37 -0
- package/src/dom-simulator/create-cdata.ts +18 -0
- package/src/dom-simulator/create-comment.ts +23 -0
- package/src/dom-simulator/create-doctype.ts +24 -0
- package/src/dom-simulator/create-document.ts +81 -0
- package/src/dom-simulator/create-element.ts +195 -0
- package/src/dom-simulator/create-processing-instruction.ts +19 -0
- package/src/dom-simulator/create-temp-parent.ts +9 -0
- package/src/dom-simulator/create-text-node.ts +23 -0
- package/src/dom-simulator/escape-text-content.ts +6 -0
- package/src/dom-simulator/find-special-elements.ts +14 -0
- package/src/dom-simulator/get-text-content.ts +18 -0
- package/src/dom-simulator/index.ts +36 -0
- package/src/dom-simulator/inner-outer-html.ts +182 -0
- package/src/dom-simulator/insert-after.ts +20 -0
- package/src/dom-simulator/insert-before.ts +108 -0
- package/src/dom-simulator/matches.ts +26 -0
- package/src/dom-simulator/node-types.ts +26 -0
- package/src/dom-simulator/prepend.ts +24 -0
- package/src/dom-simulator/remove-child.ts +68 -0
- package/src/dom-simulator/remove.ts +7 -0
- package/src/dom-simulator/replace-child.ts +152 -0
- package/src/dom-simulator/set-text-content.ts +33 -0
- package/src/dom-simulator/update-element-content.ts +56 -0
- package/src/dom-simulator.ts +12 -1126
- package/src/encoding/constants.ts +8 -0
- package/src/encoding/detect-encoding.ts +21 -0
- package/src/encoding/index.ts +1 -0
- package/src/encoding/normalize-encoding.ts +6 -0
- package/src/html-entities.ts +2127 -0
- package/src/index.ts +5 -5
- package/src/parser/adoption-agency-helpers.ts +145 -0
- package/src/parser/constants.ts +137 -0
- package/src/parser/dom-to-ast.ts +79 -0
- package/src/parser/index.ts +9 -0
- package/src/parser/parse.ts +772 -0
- package/src/parser/types.ts +56 -0
- package/src/selectors/find-elements-descendant.ts +47 -0
- package/src/selectors/index.ts +2 -0
- package/src/selectors/matches-selector.ts +12 -0
- package/src/selectors/matches-token.ts +27 -0
- package/src/selectors/parse-selector.ts +48 -0
- package/src/selectors/query-selector-all.ts +43 -0
- package/src/selectors/query-selector.ts +6 -0
- package/src/selectors/types.ts +10 -0
- package/src/serializer/attributes.ts +74 -0
- package/src/serializer/escape.ts +13 -0
- package/src/serializer/index.ts +1 -0
- package/src/serializer/serialize-tokens.ts +511 -0
- package/src/tokenizer/calculate-position.ts +10 -0
- package/src/tokenizer/constants.ts +11 -0
- package/src/tokenizer/decode-entities.ts +64 -0
- package/src/tokenizer/index.ts +2 -0
- package/src/tokenizer/parse-attributes.ts +74 -0
- package/src/tokenizer/tokenize.ts +165 -0
- package/src/tokenizer/types.ts +25 -0
- package/tests/adoption-agency-helpers.test.ts +304 -0
- package/tests/advanced.test.ts +242 -221
- package/tests/cloneNode.test.ts +19 -66
- package/tests/custom-elements-head.test.ts +54 -55
- package/tests/dom-extended.test.ts +77 -64
- package/tests/dom-manipulation.test.ts +51 -24
- package/tests/dom.test.ts +15 -13
- package/tests/encoding/detect-encoding.test.ts +33 -0
- package/tests/google-dom.test.ts +2 -2
- package/tests/helpers/tokenizer-adapter.test.ts +29 -43
- package/tests/helpers/tokenizer-adapter.ts +36 -33
- package/tests/helpers/tree-adapter.test.ts +20 -20
- package/tests/helpers/tree-adapter.ts +34 -24
- package/tests/html-entities-text.test.ts +6 -2
- package/tests/innerhtml-void-elements.test.ts +52 -36
- package/tests/outerHTML-replacement.test.ts +37 -65
- package/tests/parser/dom-to-ast.test.ts +109 -0
- package/tests/parser/parse.test.ts +139 -0
- package/tests/parser.test.ts +281 -217
- package/tests/selectors/query-selector-all.test.ts +39 -0
- package/tests/selectors/query-selector.test.ts +42 -0
- package/tests/serializer/attributes.test.ts +132 -0
- package/tests/serializer/escape.test.ts +51 -0
- package/tests/serializer/serialize-tokens.test.ts +80 -0
- package/tests/serializer-core.test.ts +6 -6
- package/tests/serializer-injectmeta.test.ts +6 -6
- package/tests/serializer-optionaltags.test.ts +9 -6
- package/tests/serializer-options.test.ts +6 -6
- package/tests/serializer-whitespace.test.ts +6 -6
- package/tests/tokenizer/calculate-position.test.ts +34 -0
- package/tests/tokenizer/decode-entities.test.ts +31 -0
- package/tests/tokenizer/parse-attributes.test.ts +44 -0
- package/tests/tokenizer/tokenize.test.ts +757 -0
- package/tests/tokenizer-namedEntities.test.ts +10 -7
- package/tests/tokenizer-pendingSpecChanges.test.ts +10 -7
- package/tests/tokenizer.test.ts +268 -256
- package/tests/tree-construction-adoption01.test.ts +25 -16
- package/tests/tree-construction-adoption02.test.ts +30 -19
- package/tests/tree-construction-domjs-unsafe.test.ts +6 -4
- package/tests/tree-construction-entities02.test.ts +18 -16
- package/tests/tree-construction-html5test-com.test.ts +16 -10
- package/tests/tree-construction-math.test.ts +11 -9
- package/tests/tree-construction-namespace-sensitivity.test.ts +11 -9
- package/tests/tree-construction-noscript01.test.ts +11 -9
- package/tests/tree-construction-ruby.test.ts +6 -4
- package/tests/tree-construction-scriptdata01.test.ts +6 -4
- package/tests/tree-construction-svg.test.ts +6 -4
- package/tests/tree-construction-template.test.ts +6 -4
- package/tests/tree-construction-tests10.test.ts +6 -4
- package/tests/tree-construction-tests11.test.ts +6 -4
- package/tests/tree-construction-tests20.test.ts +7 -4
- package/tests/tree-construction-tests21.test.ts +7 -4
- package/tests/tree-construction-tests23.test.ts +7 -4
- package/tests/tree-construction-tests24.test.ts +7 -4
- package/tests/tree-construction-tests5.test.ts +6 -5
- package/tests/tree-construction-tests6.test.ts +6 -5
- package/tests/tree-construction-tests_innerHTML_1.test.ts +6 -5
- package/tests/void-elements.test.ts +85 -40
- package/tsconfig.json +1 -1
- package/src/css-selector.ts +0 -185
- package/src/encoding.ts +0 -39
- package/src/parser.ts +0 -682
- package/src/serializer.ts +0 -450
- package/src/tokenizer.ts +0 -325
- package/tests/selectors.test.ts +0 -128
|
@@ -1,70 +1,56 @@
|
|
|
1
|
-
import { tokenize } from
|
|
2
|
-
import { adaptTokens
|
|
1
|
+
import { tokenize } from "../../src/tokenizer/index.js";
|
|
2
|
+
import { adaptTokens } from "./tokenizer-adapter.ts";
|
|
3
3
|
|
|
4
|
-
describe(
|
|
5
|
-
it(
|
|
6
|
-
const tokens = tokenize(
|
|
4
|
+
describe("Tokenizer Adapter Tests", () => {
|
|
5
|
+
it("should adapt simple start tag", () => {
|
|
6
|
+
const tokens = tokenize("<div>");
|
|
7
7
|
const adapted = adaptTokens(tokens);
|
|
8
|
-
expect(adapted).toEqual([
|
|
9
|
-
['StartTag', 'div', {}]
|
|
10
|
-
]);
|
|
8
|
+
expect(adapted).toEqual([["StartTag", "div", {}]]);
|
|
11
9
|
});
|
|
12
10
|
|
|
13
|
-
it(
|
|
11
|
+
it("should adapt start tag with attributes", () => {
|
|
14
12
|
const tokens = tokenize('<div class="foo" id="bar">');
|
|
15
13
|
const adapted = adaptTokens(tokens);
|
|
16
|
-
expect(adapted).toEqual([
|
|
17
|
-
['StartTag', 'div', { class: 'foo', id: 'bar' }]
|
|
18
|
-
]);
|
|
14
|
+
expect(adapted).toEqual([["StartTag", "div", { class: "foo", id: "bar" }]]);
|
|
19
15
|
});
|
|
20
16
|
|
|
21
|
-
it(
|
|
22
|
-
const tokens = tokenize(
|
|
17
|
+
it("should adapt self-closing tag", () => {
|
|
18
|
+
const tokens = tokenize("<br/>");
|
|
23
19
|
const adapted = adaptTokens(tokens);
|
|
24
|
-
expect(adapted).toEqual([
|
|
25
|
-
['StartTag', 'br', {}, true]
|
|
26
|
-
]);
|
|
20
|
+
expect(adapted).toEqual([["StartTag", "br", {}, true]]);
|
|
27
21
|
});
|
|
28
22
|
|
|
29
|
-
it(
|
|
30
|
-
const tokens = tokenize(
|
|
23
|
+
it("should adapt end tag", () => {
|
|
24
|
+
const tokens = tokenize("</div>");
|
|
31
25
|
const adapted = adaptTokens(tokens);
|
|
32
|
-
expect(adapted).toEqual([
|
|
33
|
-
['EndTag', 'div']
|
|
34
|
-
]);
|
|
26
|
+
expect(adapted).toEqual([["EndTag", "div"]]);
|
|
35
27
|
});
|
|
36
28
|
|
|
37
|
-
it(
|
|
38
|
-
const tokens = tokenize(
|
|
29
|
+
it("should adapt text", () => {
|
|
30
|
+
const tokens = tokenize("hello world");
|
|
39
31
|
const adapted = adaptTokens(tokens);
|
|
40
|
-
expect(adapted).toEqual([
|
|
41
|
-
['Character', 'hello world']
|
|
42
|
-
]);
|
|
32
|
+
expect(adapted).toEqual([["Character", "hello world"]]);
|
|
43
33
|
});
|
|
44
34
|
|
|
45
|
-
it(
|
|
46
|
-
const tokens = tokenize(
|
|
35
|
+
it("should adapt comment", () => {
|
|
36
|
+
const tokens = tokenize("<!-- comment -->");
|
|
47
37
|
const adapted = adaptTokens(tokens);
|
|
48
|
-
expect(adapted).toEqual([
|
|
49
|
-
['Comment', ' comment ']
|
|
50
|
-
]);
|
|
38
|
+
expect(adapted).toEqual([["Comment", " comment "]]);
|
|
51
39
|
});
|
|
52
40
|
|
|
53
|
-
it(
|
|
54
|
-
const tokens = tokenize(
|
|
41
|
+
it("should adapt DOCTYPE", () => {
|
|
42
|
+
const tokens = tokenize("<!DOCTYPE html>");
|
|
55
43
|
const adapted = adaptTokens(tokens);
|
|
56
|
-
expect(adapted).toEqual([
|
|
57
|
-
['DOCTYPE', 'html', null, null, true]
|
|
58
|
-
]);
|
|
44
|
+
expect(adapted).toEqual([["DOCTYPE", "html", null, null, true]]);
|
|
59
45
|
});
|
|
60
46
|
|
|
61
|
-
it(
|
|
62
|
-
const tokens = tokenize(
|
|
47
|
+
it("should adapt mixed content", () => {
|
|
48
|
+
const tokens = tokenize("<div>hello</div>");
|
|
63
49
|
const adapted = adaptTokens(tokens);
|
|
64
50
|
expect(adapted).toEqual([
|
|
65
|
-
[
|
|
66
|
-
[
|
|
67
|
-
[
|
|
51
|
+
["StartTag", "div", {}],
|
|
52
|
+
["Character", "hello"],
|
|
53
|
+
["EndTag", "div"],
|
|
68
54
|
]);
|
|
69
55
|
});
|
|
70
|
-
});
|
|
56
|
+
});
|
|
@@ -1,65 +1,68 @@
|
|
|
1
1
|
// tests/helpers/tokenizer-adapter.ts
|
|
2
2
|
|
|
3
|
-
import type { Token } from
|
|
3
|
+
import type { Token } from "../../src/tokenizer/index.js";
|
|
4
4
|
|
|
5
|
-
export type Html5libToken =
|
|
6
|
-
| [
|
|
7
|
-
| [
|
|
8
|
-
| [
|
|
9
|
-
| [
|
|
10
|
-
| [
|
|
11
|
-
| [
|
|
5
|
+
export type Html5libToken =
|
|
6
|
+
| ["StartTag", string, Record<string, string>]
|
|
7
|
+
| ["StartTag", string, Record<string, string>, boolean] // con self-closing flag
|
|
8
|
+
| ["EndTag", string]
|
|
9
|
+
| ["Character", string]
|
|
10
|
+
| ["Comment", string]
|
|
11
|
+
| ["DOCTYPE", string, string | null, string | null, boolean];
|
|
12
12
|
|
|
13
13
|
export function adaptTokens(tokens: Token[]): Html5libToken[] {
|
|
14
14
|
const result: Html5libToken[] = [];
|
|
15
|
-
|
|
15
|
+
|
|
16
16
|
for (const token of tokens) {
|
|
17
|
-
if (token.type ===
|
|
18
|
-
|
|
17
|
+
if (token.type === "EOF") continue;
|
|
18
|
+
|
|
19
19
|
switch (token.type) {
|
|
20
|
-
case
|
|
20
|
+
case "TAG_OPEN":
|
|
21
21
|
if (token.isClosing) {
|
|
22
|
-
result.push([
|
|
22
|
+
result.push(["EndTag", token.value]);
|
|
23
23
|
} else {
|
|
24
24
|
const attrs = token.attributes || {};
|
|
25
25
|
if (token.isSelfClosing) {
|
|
26
|
-
result.push([
|
|
26
|
+
result.push(["StartTag", token.value, attrs, true]);
|
|
27
27
|
} else {
|
|
28
|
-
result.push([
|
|
28
|
+
result.push(["StartTag", token.value, attrs]);
|
|
29
29
|
}
|
|
30
30
|
}
|
|
31
31
|
break;
|
|
32
|
-
|
|
33
|
-
case
|
|
34
|
-
result.push([
|
|
32
|
+
|
|
33
|
+
case "TAG_CLOSE":
|
|
34
|
+
result.push(["EndTag", token.value]);
|
|
35
35
|
break;
|
|
36
|
-
|
|
37
|
-
case
|
|
38
|
-
result.push([
|
|
36
|
+
|
|
37
|
+
case "TEXT":
|
|
38
|
+
result.push(["Character", token.value]);
|
|
39
39
|
break;
|
|
40
|
-
|
|
41
|
-
case
|
|
42
|
-
result.push([
|
|
40
|
+
|
|
41
|
+
case "COMMENT":
|
|
42
|
+
result.push(["Comment", token.value]);
|
|
43
43
|
break;
|
|
44
|
-
|
|
45
|
-
case
|
|
44
|
+
|
|
45
|
+
case "DOCTYPE":
|
|
46
46
|
// Parsear DOCTYPE para extraer name, publicId, systemId
|
|
47
|
-
result.push([
|
|
47
|
+
result.push(["DOCTYPE", token.value, null, null, true]);
|
|
48
48
|
break;
|
|
49
|
-
|
|
50
|
-
case
|
|
51
|
-
result.push([
|
|
49
|
+
|
|
50
|
+
case "CDATA":
|
|
51
|
+
result.push(["Character", token.value]);
|
|
52
52
|
break;
|
|
53
53
|
}
|
|
54
54
|
}
|
|
55
|
-
|
|
55
|
+
|
|
56
56
|
return result;
|
|
57
57
|
}
|
|
58
58
|
|
|
59
59
|
// Función para comparar tokens, manejando casos especiales
|
|
60
|
-
export function compareTokens(
|
|
60
|
+
export function compareTokens(
|
|
61
|
+
actual: Html5libToken[],
|
|
62
|
+
expected: any[],
|
|
63
|
+
): boolean {
|
|
61
64
|
// Implementar comparación flexible
|
|
62
65
|
// - Coalescer Characters consecutivos
|
|
63
66
|
// - Ignorar diferencias de whitespace en algunos casos
|
|
64
67
|
return JSON.stringify(actual) === JSON.stringify(expected);
|
|
65
|
-
}
|
|
68
|
+
}
|
|
@@ -1,39 +1,39 @@
|
|
|
1
|
-
import { parseHTML } from
|
|
2
|
-
import { serializeToHtml5lib } from
|
|
1
|
+
import { parseHTML } from "../../index.ts";
|
|
2
|
+
import { serializeToHtml5lib } from "./tree-adapter.ts";
|
|
3
3
|
|
|
4
|
-
describe(
|
|
5
|
-
it(
|
|
6
|
-
const doc = parseHTML(
|
|
4
|
+
describe("Tree Adapter Tests", () => {
|
|
5
|
+
it("should serialize simple element", () => {
|
|
6
|
+
const doc = parseHTML("<div></div>");
|
|
7
7
|
const serialized = serializeToHtml5lib(doc);
|
|
8
|
-
expect(serialized).toContain(
|
|
9
|
-
expect(serialized).toContain(
|
|
10
|
-
expect(serialized).toContain(
|
|
8
|
+
expect(serialized).toContain("| <html>");
|
|
9
|
+
expect(serialized).toContain("| <body>");
|
|
10
|
+
expect(serialized).toContain("| <div>");
|
|
11
11
|
});
|
|
12
12
|
|
|
13
|
-
it(
|
|
13
|
+
it("should serialize element with attributes", () => {
|
|
14
14
|
const doc = parseHTML('<div class="foo" id="bar"></div>');
|
|
15
15
|
const serialized = serializeToHtml5lib(doc);
|
|
16
|
-
expect(serialized).toContain(
|
|
16
|
+
expect(serialized).toContain("<div>");
|
|
17
17
|
expect(serialized).toContain('class="foo"');
|
|
18
18
|
expect(serialized).toContain('id="bar"');
|
|
19
19
|
});
|
|
20
20
|
|
|
21
|
-
it(
|
|
22
|
-
const doc = parseHTML(
|
|
21
|
+
it("should serialize text content", () => {
|
|
22
|
+
const doc = parseHTML("<div>hello</div>");
|
|
23
23
|
const serialized = serializeToHtml5lib(doc);
|
|
24
24
|
expect(serialized).toContain('"hello"');
|
|
25
25
|
});
|
|
26
26
|
|
|
27
|
-
it(
|
|
28
|
-
const doc = parseHTML(
|
|
27
|
+
it("should serialize comment", () => {
|
|
28
|
+
const doc = parseHTML("<div><!-- comment --></div>");
|
|
29
29
|
const serialized = serializeToHtml5lib(doc);
|
|
30
|
-
expect(serialized).toContain(
|
|
30
|
+
expect(serialized).toContain("<!-- comment -->");
|
|
31
31
|
});
|
|
32
32
|
|
|
33
|
-
it(
|
|
34
|
-
const doc = parseHTML(
|
|
33
|
+
it("should serialize DOCTYPE", () => {
|
|
34
|
+
const doc = parseHTML("<!DOCTYPE html><div></div>");
|
|
35
35
|
const serialized = serializeToHtml5lib(doc);
|
|
36
|
-
expect(serialized).toContain(
|
|
37
|
-
expect(serialized).toContain(
|
|
36
|
+
expect(serialized).toContain("<!DOCTYPE html>");
|
|
37
|
+
expect(serialized).toContain("<div>");
|
|
38
38
|
});
|
|
39
|
-
});
|
|
39
|
+
});
|
|
@@ -4,57 +4,67 @@ export interface SerializeOptions {
|
|
|
4
4
|
skipImplicitDoctype?: boolean;
|
|
5
5
|
}
|
|
6
6
|
|
|
7
|
-
export function serializeToHtml5lib(
|
|
7
|
+
export function serializeToHtml5lib(
|
|
8
|
+
doc: any,
|
|
9
|
+
options: SerializeOptions = {},
|
|
10
|
+
): string {
|
|
8
11
|
const lines: string[] = [];
|
|
9
12
|
|
|
10
13
|
function serialize(node: any, depth: number): void {
|
|
11
|
-
const indent =
|
|
14
|
+
const indent = "| " + " ".repeat(depth);
|
|
12
15
|
|
|
13
|
-
if (node.nodeType === 9) {
|
|
16
|
+
if (node.nodeType === 9) {
|
|
17
|
+
// DOCUMENT
|
|
14
18
|
for (const child of node.childNodes || []) {
|
|
15
19
|
serialize(child, depth);
|
|
16
20
|
}
|
|
17
|
-
} else if (node.nodeType === 1) {
|
|
21
|
+
} else if (node.nodeType === 1) {
|
|
22
|
+
// ELEMENT
|
|
18
23
|
const tagName = node.tagName.toLowerCase();
|
|
19
24
|
const ns = node.namespaceURI;
|
|
20
|
-
|
|
21
|
-
let nsPrefix =
|
|
22
|
-
if (ns ===
|
|
23
|
-
nsPrefix =
|
|
24
|
-
} else if (ns ===
|
|
25
|
-
nsPrefix =
|
|
25
|
+
|
|
26
|
+
let nsPrefix = "";
|
|
27
|
+
if (ns === "http://www.w3.org/2000/svg") {
|
|
28
|
+
nsPrefix = " svg";
|
|
29
|
+
} else if (ns === "http://www.w3.org/1998/Math/MathML") {
|
|
30
|
+
nsPrefix = " math";
|
|
26
31
|
}
|
|
27
|
-
|
|
32
|
+
|
|
28
33
|
lines.push(`${indent}<${tagName}${nsPrefix}>`);
|
|
29
|
-
|
|
34
|
+
|
|
30
35
|
// Atributos en orden alfabético
|
|
31
|
-
const attrs = Object.entries(node.attributes || {}).sort(([a], [b]) =>
|
|
36
|
+
const attrs = Object.entries(node.attributes || {}).sort(([a], [b]) =>
|
|
37
|
+
a.localeCompare(b),
|
|
38
|
+
);
|
|
32
39
|
for (const [name, value] of attrs) {
|
|
33
40
|
lines.push(`${indent} ${name}="${value}"`);
|
|
34
41
|
}
|
|
35
|
-
|
|
42
|
+
|
|
36
43
|
// Template special case
|
|
37
|
-
if (node.tagName.toLowerCase() ===
|
|
44
|
+
if (node.tagName.toLowerCase() === "template" && node.content) {
|
|
38
45
|
lines.push(`${indent} content`);
|
|
39
46
|
serialize(node.content, depth + 2);
|
|
40
47
|
}
|
|
41
|
-
|
|
48
|
+
|
|
42
49
|
// Children
|
|
43
50
|
for (const child of node.childNodes || []) {
|
|
44
51
|
serialize(child, depth + 1);
|
|
45
52
|
}
|
|
46
|
-
} else if (node.nodeType === 3) {
|
|
53
|
+
} else if (node.nodeType === 3) {
|
|
54
|
+
// TEXT
|
|
47
55
|
lines.push(`${indent}"${node.textContent}"`);
|
|
48
|
-
} else if (node.nodeType === 8) {
|
|
49
|
-
|
|
56
|
+
} else if (node.nodeType === 8) {
|
|
57
|
+
// COMMENT
|
|
58
|
+
const commentData = node.data || node.nodeValue || node.textContent || "";
|
|
50
59
|
lines.push(`${indent}<!-- ${commentData} -->`);
|
|
51
|
-
} else if (node.nodeType === 10) {
|
|
60
|
+
} else if (node.nodeType === 10) {
|
|
61
|
+
// DOCTYPE
|
|
52
62
|
if (!options.skipImplicitDoctype) {
|
|
53
|
-
lines.push(`${indent}<!DOCTYPE ${node.name ||
|
|
63
|
+
lines.push(`${indent}<!DOCTYPE ${node.name || "html"}>`);
|
|
54
64
|
}
|
|
55
65
|
}
|
|
56
66
|
}
|
|
57
|
-
|
|
67
|
+
|
|
58
68
|
serialize(doc, 0);
|
|
59
|
-
return lines.join(
|
|
60
|
-
}
|
|
69
|
+
return lines.join("\n") + "\n";
|
|
70
|
+
}
|
|
@@ -9,7 +9,9 @@ describe("HTML entities in text content", () => {
|
|
|
9
9
|
});
|
|
10
10
|
|
|
11
11
|
it("should preserve < and > in code elements", () => {
|
|
12
|
-
const doc = parseHTML(
|
|
12
|
+
const doc = parseHTML(
|
|
13
|
+
"<code><script>alert('xss')</script></code>",
|
|
14
|
+
);
|
|
13
15
|
const code = doc.querySelector("code");
|
|
14
16
|
expect(code.innerHTML).toBe("<script>alert('xss')</script>");
|
|
15
17
|
});
|
|
@@ -21,7 +23,9 @@ describe("HTML entities in text content", () => {
|
|
|
21
23
|
});
|
|
22
24
|
|
|
23
25
|
it("should preserve mixed entities in text", () => {
|
|
24
|
-
const doc = parseHTML(
|
|
26
|
+
const doc = parseHTML(
|
|
27
|
+
"<div><a href="test">link</a></div>",
|
|
28
|
+
);
|
|
25
29
|
const div = doc.querySelector("div");
|
|
26
30
|
expect(div.innerHTML).toBe('<a href="test">link</a>');
|
|
27
31
|
});
|
|
@@ -2,50 +2,51 @@ import { describe, it, expect } from "bun:test";
|
|
|
2
2
|
import { parseHTML } from "../src/index";
|
|
3
3
|
|
|
4
4
|
describe("innerHTML with void elements", () => {
|
|
5
|
-
it(
|
|
6
|
-
const doc = parseHTML(
|
|
7
|
-
const element = doc.querySelector(
|
|
5
|
+
it("innerHTML should work with void elements", () => {
|
|
6
|
+
const doc = parseHTML("<custom></custom>");
|
|
7
|
+
const element = doc.querySelector("custom");
|
|
8
8
|
|
|
9
9
|
element!.innerHTML = '<meta name="test">';
|
|
10
10
|
expect(element!.innerHTML).toBe('<meta name="test">');
|
|
11
11
|
expect(element!.childNodes.length).toBe(1);
|
|
12
12
|
});
|
|
13
13
|
|
|
14
|
-
it(
|
|
15
|
-
const doc = parseHTML(
|
|
16
|
-
const element = doc.querySelector(
|
|
14
|
+
it("innerHTML should work with multiple void elements", () => {
|
|
15
|
+
const doc = parseHTML("<custom></custom>");
|
|
16
|
+
const element = doc.querySelector("custom");
|
|
17
17
|
|
|
18
18
|
element!.innerHTML = '<meta name="a"><link rel="b"><input type="c">';
|
|
19
19
|
expect(element!.childNodes.length).toBe(3);
|
|
20
20
|
});
|
|
21
21
|
|
|
22
|
-
it(
|
|
23
|
-
const doc = parseHTML(
|
|
24
|
-
const element = doc.querySelector(
|
|
22
|
+
it("innerHTML should work with mixed void and non-void elements", () => {
|
|
23
|
+
const doc = parseHTML("<custom></custom>");
|
|
24
|
+
const element = doc.querySelector("custom");
|
|
25
25
|
|
|
26
|
-
element!.innerHTML =
|
|
26
|
+
element!.innerHTML =
|
|
27
|
+
'<meta name="test"><div>Hello</div><br><span>World</span>';
|
|
27
28
|
expect(element!.childNodes.length).toBe(4);
|
|
28
|
-
expect(element!.children[0].tagName).toBe(
|
|
29
|
-
expect(element!.children[1].tagName).toBe(
|
|
30
|
-
expect(element!.children[2].tagName).toBe(
|
|
31
|
-
expect(element!.children[3].tagName).toBe(
|
|
29
|
+
expect(element!.children[0].tagName).toBe("META");
|
|
30
|
+
expect(element!.children[1].tagName).toBe("DIV");
|
|
31
|
+
expect(element!.children[2].tagName).toBe("BR");
|
|
32
|
+
expect(element!.children[3].tagName).toBe("SPAN");
|
|
32
33
|
});
|
|
33
34
|
|
|
34
|
-
it(
|
|
35
|
-
const doc = parseHTML(
|
|
36
|
-
const element = doc.querySelector(
|
|
35
|
+
it("innerHTML should work with void elements nested inside containers", () => {
|
|
36
|
+
const doc = parseHTML("<custom></custom>");
|
|
37
|
+
const element = doc.querySelector("custom");
|
|
37
38
|
|
|
38
39
|
element!.innerHTML = '<div><img src="test.jpg"><input type="text"></div>';
|
|
39
40
|
expect(element!.childNodes.length).toBe(1);
|
|
40
41
|
const div = element!.children[0];
|
|
41
42
|
expect(div.childNodes.length).toBe(2);
|
|
42
|
-
expect(div.children[0].tagName).toBe(
|
|
43
|
-
expect(div.children[1].tagName).toBe(
|
|
43
|
+
expect(div.children[0].tagName).toBe("IMG");
|
|
44
|
+
expect(div.children[1].tagName).toBe("INPUT");
|
|
44
45
|
});
|
|
45
46
|
|
|
46
|
-
it(
|
|
47
|
-
const doc = parseHTML(
|
|
48
|
-
const element = doc.querySelector(
|
|
47
|
+
it("innerHTML can be replaced multiple times with void elements", () => {
|
|
48
|
+
const doc = parseHTML("<custom></custom>");
|
|
49
|
+
const element = doc.querySelector("custom");
|
|
49
50
|
|
|
50
51
|
element!.innerHTML = '<meta name="first">';
|
|
51
52
|
expect(element!.childNodes.length).toBe(1);
|
|
@@ -53,17 +54,31 @@ describe("innerHTML with void elements", () => {
|
|
|
53
54
|
element!.innerHTML = '<link rel="second"><hr>';
|
|
54
55
|
expect(element!.childNodes.length).toBe(2);
|
|
55
56
|
|
|
56
|
-
element!.innerHTML =
|
|
57
|
+
element!.innerHTML = "";
|
|
57
58
|
expect(element!.childNodes.length).toBe(0);
|
|
58
59
|
});
|
|
59
60
|
|
|
60
|
-
it(
|
|
61
|
-
const doc = parseHTML(
|
|
62
|
-
const element = doc.querySelector(
|
|
61
|
+
it("innerHTML should work with all void element types", () => {
|
|
62
|
+
const doc = parseHTML("<custom></custom>");
|
|
63
|
+
const element = doc.querySelector("custom");
|
|
63
64
|
|
|
64
65
|
// Test all void elements
|
|
65
|
-
const voidElements = [
|
|
66
|
-
|
|
66
|
+
const voidElements = [
|
|
67
|
+
"area",
|
|
68
|
+
"base",
|
|
69
|
+
"br",
|
|
70
|
+
"col",
|
|
71
|
+
"embed",
|
|
72
|
+
"hr",
|
|
73
|
+
"img",
|
|
74
|
+
"input",
|
|
75
|
+
"link",
|
|
76
|
+
"meta",
|
|
77
|
+
"source",
|
|
78
|
+
"track",
|
|
79
|
+
"wbr",
|
|
80
|
+
];
|
|
81
|
+
|
|
67
82
|
for (const tag of voidElements) {
|
|
68
83
|
element!.innerHTML = `<${tag}>`;
|
|
69
84
|
expect(element!.childNodes.length).toBe(1);
|
|
@@ -71,14 +86,15 @@ describe("innerHTML with void elements", () => {
|
|
|
71
86
|
}
|
|
72
87
|
});
|
|
73
88
|
|
|
74
|
-
it(
|
|
75
|
-
const doc = parseHTML(
|
|
76
|
-
const element = doc.querySelector(
|
|
89
|
+
it("innerHTML with void elements preserves attributes", () => {
|
|
90
|
+
const doc = parseHTML("<custom></custom>");
|
|
91
|
+
const element = doc.querySelector("custom");
|
|
77
92
|
|
|
78
|
-
element!.innerHTML =
|
|
93
|
+
element!.innerHTML =
|
|
94
|
+
'<meta charset="utf-8" name="viewport" content="width=device-width">';
|
|
79
95
|
const meta = element!.children[0];
|
|
80
|
-
expect(meta.getAttribute(
|
|
81
|
-
expect(meta.getAttribute(
|
|
82
|
-
expect(meta.getAttribute(
|
|
96
|
+
expect(meta.getAttribute("charset")).toBe("utf-8");
|
|
97
|
+
expect(meta.getAttribute("name")).toBe("viewport");
|
|
98
|
+
expect(meta.getAttribute("content")).toBe("width=device-width");
|
|
83
99
|
});
|
|
84
|
-
});
|
|
100
|
+
});
|