@tkeron/html-parser 0.1.5 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/README.md +1 -7
  2. package/bun.lock +8 -3
  3. package/index.ts +4 -0
  4. package/package.json +13 -6
  5. package/src/css-selector.ts +45 -27
  6. package/src/dom-simulator.ts +162 -20
  7. package/src/encoding.ts +39 -0
  8. package/src/index.ts +9 -0
  9. package/src/parser.ts +478 -183
  10. package/src/serializer.ts +450 -0
  11. package/src/tokenizer.ts +59 -139
  12. package/tests/advanced.test.ts +119 -106
  13. package/tests/custom-elements.test.ts +172 -162
  14. package/tests/dom-extended.test.ts +12 -12
  15. package/tests/dom-manipulation.test.ts +637 -0
  16. package/tests/dom.test.ts +32 -27
  17. package/tests/helpers/tokenizer-adapter.test.ts +70 -0
  18. package/tests/helpers/tokenizer-adapter.ts +65 -0
  19. package/tests/helpers/tree-adapter.test.ts +39 -0
  20. package/tests/helpers/tree-adapter.ts +43 -0
  21. package/tests/html5lib-data/tokenizer/namedEntities.test +42422 -0
  22. package/tests/html5lib-data/tokenizer/pendingSpecChanges.test +9 -0
  23. package/tests/html5lib-data/tree-construction/adoption01.dat +354 -0
  24. package/tests/html5lib-data/tree-construction/adoption02.dat +39 -0
  25. package/tests/html5lib-data/tree-construction/domjs-unsafe.dat +0 -0
  26. package/tests/html5lib-data/tree-construction/entities02.dat +309 -0
  27. package/tests/html5lib-data/tree-construction/html5test-com.dat +301 -0
  28. package/tests/html5lib-data/tree-construction/math.dat +104 -0
  29. package/tests/html5lib-data/tree-construction/namespace-sensitivity.dat +22 -0
  30. package/tests/html5lib-data/tree-construction/noscript01.dat +237 -0
  31. package/tests/html5lib-data/tree-construction/ruby.dat +302 -0
  32. package/tests/html5lib-data/tree-construction/scriptdata01.dat +372 -0
  33. package/tests/html5lib-data/tree-construction/svg.dat +104 -0
  34. package/tests/html5lib-data/tree-construction/template.dat +1673 -0
  35. package/tests/html5lib-data/tree-construction/tests10.dat +853 -0
  36. package/tests/html5lib-data/tree-construction/tests11.dat +523 -0
  37. package/tests/html5lib-data/tree-construction/tests20.dat +842 -0
  38. package/tests/html5lib-data/tree-construction/tests21.dat +306 -0
  39. package/tests/html5lib-data/tree-construction/tests23.dat +168 -0
  40. package/tests/html5lib-data/tree-construction/tests24.dat +79 -0
  41. package/tests/html5lib-data/tree-construction/tests5.dat +210 -0
  42. package/tests/html5lib-data/tree-construction/tests6.dat +663 -0
  43. package/tests/html5lib-data/tree-construction/tests_innerHTML_1.dat +844 -0
  44. package/tests/parser.test.ts +172 -193
  45. package/tests/selectors.test.ts +64 -1
  46. package/tests/serializer-core.test.ts +16 -0
  47. package/tests/serializer-data/core.test +125 -0
  48. package/tests/serializer-data/injectmeta.test +66 -0
  49. package/tests/serializer-data/optionaltags.test +965 -0
  50. package/tests/serializer-data/options.test +60 -0
  51. package/tests/serializer-data/whitespace.test +51 -0
  52. package/tests/serializer-injectmeta.test.ts +16 -0
  53. package/tests/serializer-optionaltags.test.ts +16 -0
  54. package/tests/serializer-options.test.ts +16 -0
  55. package/tests/serializer-whitespace.test.ts +16 -0
  56. package/tests/tokenizer-namedEntities.test.ts +20 -0
  57. package/tests/tokenizer-pendingSpecChanges.test.ts +20 -0
  58. package/tests/tokenizer.test.ts +83 -0
  59. package/tests/tree-construction-adoption01.test.ts +37 -0
  60. package/tests/tree-construction-adoption02.test.ts +34 -0
  61. package/tests/tree-construction-domjs-unsafe.test.ts +24 -0
  62. package/tests/tree-construction-entities02.test.ts +33 -0
  63. package/tests/tree-construction-html5test-com.test.ts +24 -0
  64. package/tests/tree-construction-math.test.ts +18 -0
  65. package/tests/tree-construction-namespace-sensitivity.test.ts +18 -0
  66. package/tests/tree-construction-noscript01.test.ts +18 -0
  67. package/tests/tree-construction-ruby.test.ts +21 -0
  68. package/tests/tree-construction-scriptdata01.test.ts +21 -0
  69. package/tests/tree-construction-svg.test.ts +21 -0
  70. package/tests/tree-construction-template.test.ts +21 -0
  71. package/tests/tree-construction-tests10.test.ts +21 -0
  72. package/tests/tree-construction-tests11.test.ts +21 -0
  73. package/tests/tree-construction-tests20.test.ts +18 -0
  74. package/tests/tree-construction-tests21.test.ts +18 -0
  75. package/tests/tree-construction-tests23.test.ts +18 -0
  76. package/tests/tree-construction-tests24.test.ts +18 -0
  77. package/tests/tree-construction-tests5.test.ts +21 -0
  78. package/tests/tree-construction-tests6.test.ts +21 -0
  79. package/tests/tree-construction-tests_innerHTML_1.test.ts +21 -0
  80. package/tests/void-elements.test.ts +471 -0
  81. package/tests/official/README.md +0 -87
  82. package/tests/official/acid/acid-tests.test.ts +0 -309
  83. package/tests/official/final-output/final-output.test.ts +0 -361
  84. package/tests/official/html5lib/tokenizer-utils.ts +0 -192
  85. package/tests/official/html5lib/tokenizer.test.ts +0 -171
  86. package/tests/official/html5lib/tree-construction-utils.ts +0 -194
  87. package/tests/official/html5lib/tree-construction.test.ts +0 -250
  88. package/tests/official/validator/validator-tests.test.ts +0 -237
  89. package/tests/official/validator-nu/validator-nu.test.ts +0 -335
  90. package/tests/official/whatwg/whatwg-tests.test.ts +0 -205
  91. package/tests/official/wpt/wpt-tests.test.ts +0 -409
package/tests/dom.test.ts CHANGED
@@ -10,6 +10,10 @@ import {
10
10
  } from "../src/dom-simulator";
11
11
  import { parse } from "../src/parser";
12
12
 
13
+ function getBodyContent(doc: any): any {
14
+ return doc.body?.firstChild;
15
+ }
16
+
13
17
  describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
14
18
  describe("parseHTML basic functionality", () => {
15
19
  it("should return a Document object", () => {
@@ -21,8 +25,7 @@ describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
21
25
  it("should parse simple HTML elements", () => {
22
26
  const doc = parseHTML("<p>Hello World</p>");
23
27
 
24
- expect(doc.childNodes.length).toBe(1);
25
- const paragraph = doc.childNodes[0]!;
28
+ const paragraph = getBodyContent(doc);
26
29
 
27
30
  expect(paragraph.nodeType).toBe(NodeType.ELEMENT_NODE);
28
31
  expect(paragraph.nodeName).toBe("P");
@@ -31,7 +34,7 @@ describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
31
34
 
32
35
  it("should parse text content correctly", () => {
33
36
  const doc = parseHTML("<p>Hello World</p>");
34
- const paragraph = doc.childNodes[0]!;
37
+ const paragraph = getBodyContent(doc);
35
38
 
36
39
  expect(paragraph.childNodes.length).toBe(1);
37
40
  const textNode = paragraph.childNodes[0]!;
@@ -44,7 +47,7 @@ describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
44
47
  it("should parse nested elements", () => {
45
48
  const doc = parseHTML("<div><p>Hello</p><span>World</span></div>");
46
49
 
47
- const div = doc.childNodes[0]!;
50
+ const div = getBodyContent(doc);
48
51
  expect(div.nodeName).toBe("DIV");
49
52
  expect(div.childNodes.length).toBe(2);
50
53
 
@@ -57,17 +60,19 @@ describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
57
60
 
58
61
  it("should handle attributes correctly", () => {
59
62
  const doc = parseHTML('<p id="test" class="highlight">Content</p>');
60
- const paragraph = doc.childNodes[0]! as any;
63
+ const paragraph = getBodyContent(doc) as any;
61
64
 
62
65
  expect(paragraph.attributes.id).toBe("test");
63
66
  expect(paragraph.attributes.class).toBe("highlight");
64
67
  });
65
68
 
66
69
  it("should parse comments", () => {
67
- const doc = parseHTML("<!-- This is a comment --><p>Hello</p>");
70
+ const doc = parseHTML("<div><!-- This is a comment --></div><p>Hello</p>");
68
71
 
69
- expect(doc.childNodes.length).toBe(2);
70
- const comment = doc.childNodes[0]!;
72
+ const body = doc.body;
73
+ expect(body.childNodes.length).toBe(2);
74
+ const div = body.childNodes[0]!;
75
+ const comment = div.childNodes[0]!;
71
76
 
72
77
  expect(comment.nodeType).toBe(NodeType.COMMENT_NODE);
73
78
  expect(comment.nodeName).toBe("#comment");
@@ -77,11 +82,11 @@ describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
77
82
  it("should set parent-child relationships correctly", () => {
78
83
  const doc = parseHTML("<div><p>Hello</p></div>");
79
84
 
80
- const div = doc.childNodes[0]!;
85
+ const div = getBodyContent(doc);
81
86
  const p = div.childNodes[0]!;
82
87
 
83
88
  expect(p.parentNode).toBe(<any>div);
84
- expect(div.parentNode).toBe(doc);
89
+ expect(div.parentNode).toBe(doc.body);
85
90
  expect(div.firstChild).toBe(p);
86
91
  expect(div.lastChild).toBe(p);
87
92
  });
@@ -91,7 +96,7 @@ describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
91
96
  "<div><p>First</p><span>Second</span><em>Third</em></div>"
92
97
  );
93
98
 
94
- const div = doc.childNodes[0]!;
99
+ const div = getBodyContent(doc);
95
100
  const p = div.childNodes[0]!;
96
101
  const span = div.childNodes[1]!;
97
102
  const em = div.childNodes[2]!;
@@ -108,7 +113,7 @@ describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
108
113
  it("should handle self-closing elements", () => {
109
114
  const doc = parseHTML("<p>Before<br/>After</p>");
110
115
 
111
- const p = doc.childNodes[0]!;
116
+ const p = getBodyContent(doc);
112
117
  expect(p.childNodes.length).toBe(3);
113
118
 
114
119
  const br = p.childNodes[1]!;
@@ -119,7 +124,7 @@ describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
119
124
  it("should handle empty elements", () => {
120
125
  const doc = parseHTML("<div></div>");
121
126
 
122
- const div = doc.childNodes[0]!;
127
+ const div = getBodyContent(doc);
123
128
  expect(div.childNodes.length).toBe(0);
124
129
  expect(div.firstChild).toBeNull();
125
130
  expect(div.lastChild).toBeNull();
@@ -154,7 +159,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
154
159
  describe("getTextContent", () => {
155
160
  it("should get text content from a simple text node", () => {
156
161
  const doc = parseHTML("<p>Hello World</p>");
157
- const p = doc.childNodes[0]!;
162
+ const p = getBodyContent(doc);
158
163
  const textNode = p.childNodes[0]!;
159
164
 
160
165
  expect(getTextContent(textNode)).toBe("Hello World");
@@ -162,14 +167,14 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
162
167
 
163
168
  it("should get text content from an element with text", () => {
164
169
  const doc = parseHTML("<p>Hello World</p>");
165
- const p = doc.childNodes[0]!;
170
+ const p = getBodyContent(doc);
166
171
 
167
172
  expect(getTextContent(p)).toBe("Hello World");
168
173
  });
169
174
 
170
175
  it("should get concatenated text from nested elements", () => {
171
176
  const doc = parseHTML("<div>Hello <span>beautiful</span> world</div>");
172
- const div = doc.childNodes[0]!;
177
+ const div = getBodyContent(doc);
173
178
 
174
179
  expect(getTextContent(div)).toBe("Hello beautiful world");
175
180
  });
@@ -178,28 +183,28 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
178
183
  const doc = parseHTML(
179
184
  "<div>Start <p>Middle <em>Deep <strong>Deeper</strong></em></p> End</div>"
180
185
  );
181
- const div = doc.childNodes[0]!;
186
+ const div = getBodyContent(doc);
182
187
 
183
188
  expect(getTextContent(div)).toBe("Start Middle Deep Deeper End");
184
189
  });
185
190
 
186
191
  it("should return empty string for elements with no text", () => {
187
192
  const doc = parseHTML("<div></div>");
188
- const div = doc.childNodes[0]!;
193
+ const div = getBodyContent(doc);
189
194
 
190
195
  expect(getTextContent(div)).toBe("");
191
196
  });
192
197
 
193
198
  it("should ignore comments when getting text content", () => {
194
199
  const doc = parseHTML("<div>Before<!-- comment -->After</div>");
195
- const div = doc.childNodes[0]!;
200
+ const div = getBodyContent(doc);
196
201
 
197
202
  expect(getTextContent(div)).toBe("BeforeAfter");
198
203
  });
199
204
 
200
205
  it("should handle mixed content with self-closing elements", () => {
201
206
  const doc = parseHTML("<p>Before<br/>After</p>");
202
- const p = doc.childNodes[0]!;
207
+ const p = getBodyContent(doc);
203
208
 
204
209
  expect(getTextContent(p)).toBe("BeforeAfter");
205
210
  });
@@ -210,7 +215,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
210
215
  const doc = parseHTML(
211
216
  '<div id="test" class="highlight" data-value="123">Content</div>'
212
217
  );
213
- const div = doc.childNodes[0]! as any;
218
+ const div = getBodyContent(doc) as any;
214
219
 
215
220
  expect(getAttribute(div, "id")).toBe("test");
216
221
  expect(getAttribute(div, "class")).toBe("highlight");
@@ -219,7 +224,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
219
224
 
220
225
  it("should return null for non-existing attributes", () => {
221
226
  const doc = parseHTML('<div id="test">Content</div>');
222
- const div = doc.childNodes[0]! as any;
227
+ const div = getBodyContent(doc) as any;
223
228
 
224
229
  expect(getAttribute(div, "nonexistent")).toBeNull();
225
230
  expect(getAttribute(div, "class")).toBeNull();
@@ -227,7 +232,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
227
232
 
228
233
  it("should check if attributes exist", () => {
229
234
  const doc = parseHTML('<div id="test" class="highlight">Content</div>');
230
- const div = doc.childNodes[0]! as any;
235
+ const div = getBodyContent(doc) as any;
231
236
 
232
237
  expect(hasAttribute(div, "id")).toBe(true);
233
238
  expect(hasAttribute(div, "class")).toBe(true);
@@ -236,7 +241,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
236
241
 
237
242
  it("should set new attributes", () => {
238
243
  const doc = parseHTML("<div>Content</div>");
239
- const div = doc.childNodes[0]! as any;
244
+ const div = getBodyContent(doc) as any;
240
245
 
241
246
  setAttribute(div, "id", "new-id");
242
247
  setAttribute(div, "class", "new-class");
@@ -249,7 +254,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
249
254
 
250
255
  it("should update existing attributes", () => {
251
256
  const doc = parseHTML('<div id="old-id" class="old-class">Content</div>');
252
- const div = doc.childNodes[0]! as any;
257
+ const div = getBodyContent(doc) as any;
253
258
 
254
259
  setAttribute(div, "id", "new-id");
255
260
  setAttribute(div, "class", "new-class");
@@ -262,7 +267,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
262
267
  const doc = parseHTML(
263
268
  '<div id="test" class="highlight" data-value="123">Content</div>'
264
269
  );
265
- const div = doc.childNodes[0]! as any;
270
+ const div = getBodyContent(doc) as any;
266
271
 
267
272
  removeAttribute(div, "class");
268
273
  removeAttribute(div, "data-value");
@@ -276,7 +281,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
276
281
 
277
282
  it("should handle removing non-existing attributes gracefully", () => {
278
283
  const doc = parseHTML('<div id="test">Content</div>');
279
- const div = doc.childNodes[0]! as any;
284
+ const div = getBodyContent(doc) as any;
280
285
 
281
286
  removeAttribute(div, "nonexistent");
282
287
 
@@ -0,0 +1,70 @@
1
+ import { tokenize } from '../../src/tokenizer';
2
+ import { adaptTokens, type Html5libToken } from './tokenizer-adapter.ts';
3
+
4
+ describe('Tokenizer Adapter Tests', () => {
5
+ it('should adapt simple start tag', () => {
6
+ const tokens = tokenize('<div>');
7
+ const adapted = adaptTokens(tokens);
8
+ expect(adapted).toEqual([
9
+ ['StartTag', 'div', {}]
10
+ ]);
11
+ });
12
+
13
+ it('should adapt start tag with attributes', () => {
14
+ const tokens = tokenize('<div class="foo" id="bar">');
15
+ const adapted = adaptTokens(tokens);
16
+ expect(adapted).toEqual([
17
+ ['StartTag', 'div', { class: 'foo', id: 'bar' }]
18
+ ]);
19
+ });
20
+
21
+ it('should adapt self-closing tag', () => {
22
+ const tokens = tokenize('<br/>');
23
+ const adapted = adaptTokens(tokens);
24
+ expect(adapted).toEqual([
25
+ ['StartTag', 'br', {}, true]
26
+ ]);
27
+ });
28
+
29
+ it('should adapt end tag', () => {
30
+ const tokens = tokenize('</div>');
31
+ const adapted = adaptTokens(tokens);
32
+ expect(adapted).toEqual([
33
+ ['EndTag', 'div']
34
+ ]);
35
+ });
36
+
37
+ it('should adapt text', () => {
38
+ const tokens = tokenize('hello world');
39
+ const adapted = adaptTokens(tokens);
40
+ expect(adapted).toEqual([
41
+ ['Character', 'hello world']
42
+ ]);
43
+ });
44
+
45
+ it('should adapt comment', () => {
46
+ const tokens = tokenize('<!-- comment -->');
47
+ const adapted = adaptTokens(tokens);
48
+ expect(adapted).toEqual([
49
+ ['Comment', ' comment ']
50
+ ]);
51
+ });
52
+
53
+ it('should adapt DOCTYPE', () => {
54
+ const tokens = tokenize('<!DOCTYPE html>');
55
+ const adapted = adaptTokens(tokens);
56
+ expect(adapted).toEqual([
57
+ ['DOCTYPE', 'html', null, null, true]
58
+ ]);
59
+ });
60
+
61
+ it('should adapt mixed content', () => {
62
+ const tokens = tokenize('<div>hello</div>');
63
+ const adapted = adaptTokens(tokens);
64
+ expect(adapted).toEqual([
65
+ ['StartTag', 'div', {}],
66
+ ['Character', 'hello'],
67
+ ['EndTag', 'div']
68
+ ]);
69
+ });
70
+ });
@@ -0,0 +1,65 @@
1
+ // tests/helpers/tokenizer-adapter.ts
2
+
3
+ import type { Token } from '../../src/tokenizer';
4
+
5
+ export type Html5libToken =
6
+ | ['StartTag', string, Record<string, string>]
7
+ | ['StartTag', string, Record<string, string>, boolean] // con self-closing flag
8
+ | ['EndTag', string]
9
+ | ['Character', string]
10
+ | ['Comment', string]
11
+ | ['DOCTYPE', string, string | null, string | null, boolean];
12
+
13
+ export function adaptTokens(tokens: Token[]): Html5libToken[] {
14
+ const result: Html5libToken[] = [];
15
+
16
+ for (const token of tokens) {
17
+ if (token.type === 'EOF') continue;
18
+
19
+ switch (token.type) {
20
+ case 'TAG_OPEN':
21
+ if (token.isClosing) {
22
+ result.push(['EndTag', token.value]);
23
+ } else {
24
+ const attrs = token.attributes || {};
25
+ if (token.isSelfClosing) {
26
+ result.push(['StartTag', token.value, attrs, true]);
27
+ } else {
28
+ result.push(['StartTag', token.value, attrs]);
29
+ }
30
+ }
31
+ break;
32
+
33
+ case 'TAG_CLOSE':
34
+ result.push(['EndTag', token.value]);
35
+ break;
36
+
37
+ case 'TEXT':
38
+ result.push(['Character', token.value]);
39
+ break;
40
+
41
+ case 'COMMENT':
42
+ result.push(['Comment', token.value]);
43
+ break;
44
+
45
+ case 'DOCTYPE':
46
+ // Parsear DOCTYPE para extraer name, publicId, systemId
47
+ result.push(['DOCTYPE', token.value, null, null, true]);
48
+ break;
49
+
50
+ case 'CDATA':
51
+ result.push(['Character', token.value]);
52
+ break;
53
+ }
54
+ }
55
+
56
+ return result;
57
+ }
58
+
59
+ // Función para comparar tokens, manejando casos especiales
60
+ export function compareTokens(actual: Html5libToken[], expected: any[]): boolean {
61
+ // Implementar comparación flexible
62
+ // - Coalescer Characters consecutivos
63
+ // - Ignorar diferencias de whitespace en algunos casos
64
+ return JSON.stringify(actual) === JSON.stringify(expected);
65
+ }
@@ -0,0 +1,39 @@
1
+ import { parseHTML } from '../../index.ts';
2
+ import { serializeToHtml5lib } from './tree-adapter.ts';
3
+
4
+ describe('Tree Adapter Tests', () => {
5
+ it('should serialize simple element', () => {
6
+ const doc = parseHTML('<div></div>');
7
+ const serialized = serializeToHtml5lib(doc);
8
+ expect(serialized).toContain('| <html>');
9
+ expect(serialized).toContain('| <body>');
10
+ expect(serialized).toContain('| <div>');
11
+ });
12
+
13
+ it('should serialize element with attributes', () => {
14
+ const doc = parseHTML('<div class="foo" id="bar"></div>');
15
+ const serialized = serializeToHtml5lib(doc);
16
+ expect(serialized).toContain('<div>');
17
+ expect(serialized).toContain('class="foo"');
18
+ expect(serialized).toContain('id="bar"');
19
+ });
20
+
21
+ it('should serialize text content', () => {
22
+ const doc = parseHTML('<div>hello</div>');
23
+ const serialized = serializeToHtml5lib(doc);
24
+ expect(serialized).toContain('"hello"');
25
+ });
26
+
27
+ it('should serialize comment', () => {
28
+ const doc = parseHTML('<div><!-- comment --></div>');
29
+ const serialized = serializeToHtml5lib(doc);
30
+ expect(serialized).toContain('<!-- -->');
31
+ });
32
+
33
+ it('should serialize DOCTYPE', () => {
34
+ const doc = parseHTML('<!DOCTYPE html><div></div>');
35
+ const serialized = serializeToHtml5lib(doc);
36
+ expect(serialized).toContain('<!DOCTYPE html>');
37
+ expect(serialized).toContain('<div>');
38
+ });
39
+ });
@@ -0,0 +1,43 @@
1
+ // tests/helpers/tree-adapter.ts
2
+
3
+ export function serializeToHtml5lib(doc: any): string {
4
+ const lines: string[] = [];
5
+
6
+ function serialize(node: any, depth: number): void {
7
+ const indent = '| ' + ' '.repeat(depth);
8
+
9
+ if (node.nodeType === 9) { // DOCUMENT
10
+ for (const child of node.childNodes || []) {
11
+ serialize(child, depth);
12
+ }
13
+ } else if (node.nodeType === 1) { // ELEMENT
14
+ lines.push(`${indent}<${node.tagName.toLowerCase()}>`);
15
+
16
+ // Atributos en orden alfabético
17
+ const attrs = Object.entries(node.attributes || {}).sort(([a], [b]) => a.localeCompare(b));
18
+ for (const [name, value] of attrs) {
19
+ lines.push(`${indent} ${name}="${value}"`);
20
+ }
21
+
22
+ // Template special case
23
+ if (node.tagName.toLowerCase() === 'template' && node.content) {
24
+ lines.push(`${indent} content`);
25
+ serialize(node.content, depth + 2);
26
+ }
27
+
28
+ // Children
29
+ for (const child of node.childNodes || []) {
30
+ serialize(child, depth + 1);
31
+ }
32
+ } else if (node.nodeType === 3) { // TEXT
33
+ lines.push(`${indent}"${node.textContent}"`);
34
+ } else if (node.nodeType === 8) { // COMMENT
35
+ lines.push(`${indent}<!-- ${node.textContent} -->`);
36
+ } else if (node.nodeType === 10) { // DOCTYPE
37
+ lines.push(`${indent}<!DOCTYPE ${node.name || 'html'}>`);
38
+ }
39
+ }
40
+
41
+ serialize(doc, 0);
42
+ return lines.join('\n') + '\n';
43
+ }