@tkeron/html-parser 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,7 @@ import {
5
5
  type HTML5libTokenizerTestSuite
6
6
  } from './tokenizer-utils';
7
7
 
8
- // Sample HTML5lib tokenizer tests embedded directly
8
+
9
9
  const basicTokenizerTests: HTML5libTokenizerTestSuite = {
10
10
  "tests": [
11
11
  {
@@ -86,7 +86,7 @@ const basicTokenizerTests: HTML5libTokenizerTestSuite = {
86
86
  ]
87
87
  };
88
88
 
89
- // Entity tests
89
+
90
90
  const entityTests: HTML5libTokenizerTestSuite = {
91
91
  "tests": [
92
92
  {
@@ -123,7 +123,7 @@ const entityTests: HTML5libTokenizerTestSuite = {
123
123
  ]
124
124
  };
125
125
 
126
- // Comment tests
126
+
127
127
  const commentTests: HTML5libTokenizerTestSuite = {
128
128
  "tests": [
129
129
  {
@@ -163,7 +163,7 @@ const commentTests: HTML5libTokenizerTestSuite = {
163
163
  ]
164
164
  };
165
165
 
166
- // Run the embedded tests
166
+
167
167
  describe('HTML5lib Tokenizer Tests', () => {
168
168
  runHTML5libTokenizerTestSuite(basicTokenizerTests, 'Basic Tokenizer');
169
169
  runHTML5libTokenizerTestSuite(entityTests, 'Entity Handling');
@@ -3,7 +3,7 @@ import { parse } from '../../../src/parser';
3
3
  import { tokenize } from '../../../src/tokenizer';
4
4
  import type { ASTNode } from '../../../src/parser';
5
5
 
6
- // HTML5lib tree construction test format
6
+
7
7
  export interface HTML5libTreeTest {
8
8
  data: string;
9
9
  errors: string[];
@@ -14,9 +14,7 @@ export interface HTML5libTreeTest {
14
14
  document: string;
15
15
  }
16
16
 
17
- /**
18
- * Parses HTML5lib DAT format test files
19
- */
17
+
20
18
  export function parseHTML5libDATFile(content: string): HTML5libTreeTest[] {
21
19
  const tests: HTML5libTreeTest[] = [];
22
20
  const sections = content.split('\n\n').filter(section => section.trim());
@@ -24,7 +22,7 @@ export function parseHTML5libDATFile(content: string): HTML5libTreeTest[] {
24
22
  for (const section of sections) {
25
23
  const lines = section.split('\n');
26
24
  const test: Partial<HTML5libTreeTest> = {
27
- errors: [] // Initialize errors as empty array
25
+ errors: []
28
26
  };
29
27
 
30
28
  let currentSection = '';
@@ -32,7 +30,7 @@ export function parseHTML5libDATFile(content: string): HTML5libTreeTest[] {
32
30
 
33
31
  for (const line of lines) {
34
32
  if (line.startsWith('#')) {
35
- // Save previous section
33
+
36
34
  if (currentSection) {
37
35
  switch (currentSection) {
38
36
  case 'data':
@@ -53,11 +51,11 @@ export function parseHTML5libDATFile(content: string): HTML5libTreeTest[] {
53
51
  }
54
52
  }
55
53
 
56
- // Start new section
54
+
57
55
  currentSection = line.substring(1);
58
56
  currentContent = [];
59
57
 
60
- // Handle script flags
58
+
61
59
  if (currentSection === 'script-off') {
62
60
  test.scriptOff = true;
63
61
  } else if (currentSection === 'script-on') {
@@ -68,7 +66,7 @@ export function parseHTML5libDATFile(content: string): HTML5libTreeTest[] {
68
66
  }
69
67
  }
70
68
 
71
- // Save last section
69
+
72
70
  if (currentSection) {
73
71
  switch (currentSection) {
74
72
  case 'data':
@@ -97,16 +95,14 @@ export function parseHTML5libDATFile(content: string): HTML5libTreeTest[] {
97
95
  return tests;
98
96
  }
99
97
 
100
- /**
101
- * Converts AST to HTML5lib tree format
102
- */
98
+
103
99
  export function convertASTToHTML5libTree(node: ASTNode, depth: number = 0): string[] {
104
100
  const lines: string[] = [];
105
101
  const indent = '| ' + ' '.repeat(depth);
106
102
 
107
103
  switch (node.type) {
108
104
  case 'DOCUMENT':
109
- // Document node doesn't have a line representation
105
+
110
106
  break;
111
107
  case 'DOCTYPE':
112
108
  lines.push(`${indent}<!DOCTYPE ${node.tagName || 'html'}>`);
@@ -115,7 +111,7 @@ export function convertASTToHTML5libTree(node: ASTNode, depth: number = 0): stri
115
111
  const tagName = node.tagName || 'unknown';
116
112
  lines.push(`${indent}<${tagName}>`);
117
113
 
118
- // Add attributes
114
+
119
115
  if (node.attributes) {
120
116
  for (const [name, value] of Object.entries(node.attributes).sort()) {
121
117
  lines.push(`${indent} ${name}="${value}"`);
@@ -135,7 +131,7 @@ export function convertASTToHTML5libTree(node: ASTNode, depth: number = 0): stri
135
131
  break;
136
132
  }
137
133
 
138
- // Add children
134
+
139
135
  if (node.children) {
140
136
  for (const child of node.children) {
141
137
  lines.push(...convertASTToHTML5libTree(child, depth + 1));
@@ -145,9 +141,7 @@ export function convertASTToHTML5libTree(node: ASTNode, depth: number = 0): stri
145
141
  return lines;
146
142
  }
147
143
 
148
- /**
149
- * Normalizes HTML5lib tree format for comparison
150
- */
144
+
151
145
  export function normalizeHTML5libTree(tree: string): string {
152
146
  return tree
153
147
  .split('\n')
@@ -156,33 +150,29 @@ export function normalizeHTML5libTree(tree: string): string {
156
150
  .join('\n');
157
151
  }
158
152
 
159
- /**
160
- * Runs a single HTML5lib tree construction test
161
- */
153
+
162
154
  export function runHTML5libTreeTest(test: HTML5libTreeTest, testName: string): void {
163
155
  it(testName, () => {
164
156
  const { data, document: expectedTree, documentFragment, scriptOff, scriptOn } = test;
165
157
 
166
- // Parse the HTML
158
+
167
159
  const tokens = tokenize(data);
168
160
  const ast = parse(tokens);
169
161
 
170
- // Convert to HTML5lib tree format
162
+
171
163
  const actualTreeLines = convertASTToHTML5libTree(ast);
172
164
  const actualTree = actualTreeLines.join('\n');
173
165
 
174
- // Normalize both trees for comparison
166
+
175
167
  const normalizedActual = normalizeHTML5libTree(actualTree);
176
168
  const normalizedExpected = normalizeHTML5libTree(expectedTree);
177
169
 
178
- // Compare trees
170
+
179
171
  expect(normalizedActual).toBe(normalizedExpected);
180
172
  });
181
173
  }
182
174
 
183
- /**
184
- * Runs all tests from an HTML5lib tree construction test suite
185
- */
175
+
186
176
  export function runHTML5libTreeTestSuite(tests: HTML5libTreeTest[], suiteName: string): void {
187
177
  describe(`HTML5lib Tree Construction Tests: ${suiteName}`, () => {
188
178
  tests.forEach((test, index) => {
@@ -192,17 +182,13 @@ export function runHTML5libTreeTestSuite(tests: HTML5libTreeTest[], suiteName: s
192
182
  });
193
183
  }
194
184
 
195
- /**
196
- * Loads and runs HTML5lib tree construction tests from DAT format
197
- */
185
+
198
186
  export async function loadHTML5libTreeTests(testData: string, suiteName: string): Promise<void> {
199
187
  const tests = parseHTML5libDATFile(testData);
200
188
  runHTML5libTreeTestSuite(tests, suiteName);
201
189
  }
202
190
 
203
- /**
204
- * Validates HTML5lib tree construction test format
205
- */
191
+
206
192
  export function validateHTML5libTreeTest(test: HTML5libTreeTest): boolean {
207
193
  return !!(test.data && test.document && test.errors !== undefined);
208
194
  }
@@ -6,7 +6,7 @@ import {
6
6
  type HTML5libTreeTest,
7
7
  } from "./tree-construction-utils";
8
8
 
9
- // Sample HTML5lib tree construction tests in DAT format
9
+
10
10
  const basicTreeTestData = `#data
11
11
  Test
12
12
  #errors
@@ -206,13 +206,13 @@ const errorHandlingTestData = `#data
206
206
  | <a>
207
207
  | <a>`;
208
208
 
209
- // Run the embedded tests
209
+
210
210
  describe("HTML5lib Tree Construction Tests", () => {
211
211
  it("should parse DAT format correctly", () => {
212
212
  const tests = parseHTML5libDATFile(basicTreeTestData);
213
213
  expect(tests.length).toBeGreaterThan(0);
214
214
 
215
- // Check first test
215
+
216
216
  const firstTest = tests[0];
217
217
  if (firstTest) {
218
218
  expect(firstTest.data).toBe("Test");
@@ -225,7 +225,7 @@ describe("HTML5lib Tree Construction Tests", () => {
225
225
  const tests = parseHTML5libDATFile(doctypeTestData);
226
226
  expect(tests.length).toBeGreaterThan(0);
227
227
 
228
- // Check first doctype test
228
+
229
229
  const firstTest = tests[0];
230
230
  if (firstTest) {
231
231
  expect(firstTest.data).toBe("<!DOCTYPE html>");
@@ -238,7 +238,7 @@ describe("HTML5lib Tree Construction Tests", () => {
238
238
  const tests = parseHTML5libDATFile(errorHandlingTestData);
239
239
  expect(tests.length).toBeGreaterThan(0);
240
240
 
241
- // Check error handling
241
+
242
242
  const firstTest = tests[0];
243
243
  if (firstTest) {
244
244
  expect(firstTest.errors.length).toBeGreaterThan(0);
@@ -42,7 +42,7 @@ describe('Validator.nu Tests', () => {
42
42
  });
43
43
 
44
44
  it('should handle content model violations', () => {
45
- // These should parse but may generate warnings in a full validator
45
+
46
46
  const contentModelHTML = `
47
47
  <p>
48
48
  <div>Block inside paragraph</div>
@@ -53,7 +53,7 @@ describe('Validator.nu Tests', () => {
53
53
  `;
54
54
 
55
55
  const document = parseHTML(contentModelHTML);
56
- // const ast = parse(tokens);
56
+
57
57
 
58
58
  expect(document).toBeDefined();
59
59
  });
@@ -67,7 +67,7 @@ describe('Validator.nu Tests', () => {
67
67
  `;
68
68
 
69
69
  const document = parseHTML(obsoleteHTML);
70
- // const ast = parse(tokens);
70
+
71
71
 
72
72
  expect(document).toBeDefined();
73
73
  });
@@ -85,7 +85,7 @@ describe('Validator.nu Tests', () => {
85
85
  `;
86
86
 
87
87
  const document = parseHTML(deprecatedHTML);
88
- // const ast = parse(tokens);
88
+
89
89
 
90
90
  expect(document).toBeDefined();
91
91
  });
@@ -114,7 +114,7 @@ describe('Validator.nu Tests', () => {
114
114
  `;
115
115
 
116
116
  const document = parseHTML(formHTML);
117
- // const ast = parse(tokens);
117
+
118
118
 
119
119
  expect(document).toBeDefined();
120
120
  });
@@ -136,7 +136,7 @@ describe('Validator.nu Tests', () => {
136
136
  `;
137
137
 
138
138
  const document = parseHTML(mediaHTML);
139
- // const ast = parse(tokens);
139
+
140
140
 
141
141
  expect(document).toBeDefined();
142
142
  });
@@ -162,7 +162,7 @@ describe('Validator.nu Tests', () => {
162
162
  `;
163
163
 
164
164
  const document = parseHTML(semanticHTML);
165
- // const ast = parse(tokens);
165
+
166
166
 
167
167
  expect(document).toBeDefined();
168
168
  });
@@ -184,7 +184,7 @@ describe('Validator.nu Tests', () => {
184
184
  `;
185
185
 
186
186
  const document = parseHTML(interactiveHTML);
187
- // const ast = parse(tokens);
187
+
188
188
 
189
189
  expect(document).toBeDefined();
190
190
  });
@@ -200,7 +200,7 @@ describe('Validator.nu Tests', () => {
200
200
  `;
201
201
 
202
202
  const document = parseHTML(unclosedHTML);
203
- // const ast = parse(tokens);
203
+
204
204
 
205
205
  expect(document).toBeDefined();
206
206
  });
@@ -215,7 +215,7 @@ describe('Validator.nu Tests', () => {
215
215
  `;
216
216
 
217
217
  const document = parseHTML(mismatchedHTML);
218
- // const ast = parse(tokens);
218
+
219
219
 
220
220
  expect(document).toBeDefined();
221
221
  });
@@ -229,7 +229,7 @@ describe('Validator.nu Tests', () => {
229
229
  `;
230
230
 
231
231
  const document = parseHTML(invalidNestingHTML);
232
- // const ast = parse(tokens);
232
+
233
233
 
234
234
  expect(document).toBeDefined();
235
235
  });
@@ -135,10 +135,10 @@ describe('Web Platform Tests (WPT) Compliance', () => {
135
135
 
136
136
  it('should handle numeric character references', () => {
137
137
  const numericRefs = [
138
- '&#65;', // A
139
- '&#x41;', // A (hex)
140
- '&#8364;', // Euro symbol
141
- '&#x20AC;' // Euro symbol (hex)
138
+ '&#65;',
139
+ '&#x41;',
140
+ '&#8364;',
141
+ '&#x20AC;'
142
142
  ];
143
143
 
144
144
  numericRefs.forEach(ref => {
@@ -404,6 +404,6 @@ describe('WPT Integration Tests', () => {
404
404
 
405
405
  expect(ast).toBeDefined();
406
406
  expect((ast as any).children?.length).toBeGreaterThan(0);
407
- expect(end - start).toBeLessThan(1000); // Should parse within 1 second
407
+ expect(end - start).toBeLessThan(1000);
408
408
  });
409
409
  });
@@ -0,0 +1,208 @@
1
+ import { describe, it, expect } from "bun:test";
2
+ import { parseHTML } from "../index";
3
+
4
+ describe("outerHTML replacement - Browser behavior", () => {
5
+ it("should replace element with its innerHTML when setting outerHTML = innerHTML", () => {
6
+
7
+
8
+
9
+
10
+ const doc = parseHTML(`
11
+ <html>
12
+ <body>
13
+ <div id="mi-prueba" style="border: 2px solid red; padding: 10px;">
14
+ <strong>Lorem ipsum!</strong> Dolor sit amet consectetur.
15
+ </div>
16
+ </body>
17
+ </html>
18
+ `);
19
+
20
+ const elem = doc.querySelector("#mi-prueba");
21
+ expect(elem).not.toBeNull();
22
+
23
+
24
+ const innerHTML = elem!.innerHTML;
25
+ expect(innerHTML).toContain("<strong>Lorem ipsum!</strong>");
26
+ expect(innerHTML).toContain("Dolor sit amet consectetur.");
27
+
28
+
29
+ const parent = elem!.parentNode;
30
+ expect(parent).not.toBeNull();
31
+ expect(parent!.childNodes).toContain(elem);
32
+
33
+
34
+ elem!.outerHTML = innerHTML;
35
+
36
+
37
+ const elemAfter = doc.querySelector("#mi-prueba");
38
+ expect(elemAfter).toBeNull();
39
+
40
+
41
+ const body = doc.querySelector("body");
42
+ expect(body!.innerHTML).toContain("<strong>Lorem ipsum!</strong>");
43
+ expect(body!.innerHTML).toContain("Dolor sit amet consectetur.");
44
+
45
+
46
+ expect(body!.innerHTML).not.toContain('id="mi-prueba"');
47
+ expect(body!.innerHTML).not.toContain('style=');
48
+ });
49
+
50
+ it("should replace element with simple text content", () => {
51
+ const doc = parseHTML(`
52
+ <div>
53
+ <p id="paragraph" class="styled">Simple text</p>
54
+ </div>
55
+ `);
56
+
57
+ const paragraph = doc.querySelector("#paragraph");
58
+ expect(paragraph).not.toBeNull();
59
+
60
+ const parent = paragraph!.parentNode;
61
+ const innerHTML = paragraph!.innerHTML;
62
+
63
+
64
+ paragraph!.outerHTML = innerHTML;
65
+
66
+
67
+ expect(doc.querySelector("#paragraph")).toBeNull();
68
+
69
+
70
+ expect(parent!.textContent).toContain("Simple text");
71
+ });
72
+
73
+ it("should replace element with multiple child nodes", () => {
74
+ const doc = parseHTML(`
75
+ <ul>
76
+ <li id="item-container">
77
+ <span>Item 1</span>
78
+ <span>Item 2</span>
79
+ </li>
80
+ </ul>
81
+ `);
82
+
83
+ const container = doc.querySelector("#item-container");
84
+ expect(container).not.toBeNull();
85
+
86
+ const ul = doc.querySelector("ul");
87
+ const innerHTML = container!.innerHTML;
88
+
89
+
90
+ container!.outerHTML = innerHTML;
91
+
92
+
93
+ expect(doc.querySelector("#item-container")).toBeNull();
94
+
95
+
96
+ const spans = ul!.querySelectorAll("span");
97
+ expect(spans.length).toBe(2);
98
+ expect(spans[0]?.textContent).toBe("Item 1");
99
+ expect(spans[1]?.textContent).toBe("Item 2");
100
+ });
101
+
102
+ it("should replace element with empty string", () => {
103
+ const doc = parseHTML(`
104
+ <div>
105
+ <span id="to-remove"></span>
106
+ </div>
107
+ `);
108
+
109
+ const span = doc.querySelector("#to-remove");
110
+ expect(span).not.toBeNull();
111
+
112
+ const parent = span!.parentNode;
113
+ const childCountBefore = parent!.childNodes.length;
114
+
115
+
116
+ span!.outerHTML = "";
117
+
118
+
119
+ expect(doc.querySelector("#to-remove")).toBeNull();
120
+
121
+
122
+ expect(parent!.childNodes.length).toBe(childCountBefore - 1);
123
+ });
124
+
125
+ it("should replace element with new HTML structure", () => {
126
+ const doc = parseHTML(`
127
+ <div>
128
+ <p id="old">Old content</p>
129
+ </div>
130
+ `);
131
+
132
+ const oldParagraph = doc.querySelector("#old");
133
+ expect(oldParagraph).not.toBeNull();
134
+
135
+ const parent = oldParagraph!.parentNode;
136
+
137
+
138
+ oldParagraph!.outerHTML = '<div id="new">New content</div>';
139
+
140
+
141
+ expect(doc.querySelector("#old")).toBeNull();
142
+
143
+
144
+ const newDiv = doc.querySelector("#new");
145
+ expect(newDiv).not.toBeNull();
146
+ expect(newDiv!.textContent).toBe("New content");
147
+ expect(newDiv!.parentNode).toBe(parent);
148
+ });
149
+
150
+ it("should maintain sibling relationships after outerHTML replacement", () => {
151
+ const doc = parseHTML(`
152
+ <div>
153
+ <span>First</span>
154
+ <p id="middle">Middle</p>
155
+ <span>Last</span>
156
+ </div>
157
+ `);
158
+
159
+ const middle = doc.querySelector("#middle");
160
+ const firstSpan = doc.querySelectorAll("span")[0];
161
+ const lastSpan = doc.querySelectorAll("span")[1];
162
+
163
+
164
+ middle!.outerHTML = middle!.innerHTML;
165
+
166
+
167
+ expect(firstSpan!.nextSibling).not.toBe(middle);
168
+ expect(lastSpan!.previousSibling).not.toBe(middle);
169
+
170
+
171
+ const parent = firstSpan!.parentNode;
172
+ expect(parent!.textContent).toContain("Middle");
173
+ });
174
+
175
+ it("should handle complex nested HTML replacement", () => {
176
+ const doc = parseHTML(`
177
+ <article>
178
+ <section id="wrapper" class="container" data-id="123">
179
+ <h2>Title</h2>
180
+ <p>Paragraph <strong>bold</strong> text</p>
181
+ <ul>
182
+ <li>Item 1</li>
183
+ <li>Item 2</li>
184
+ </ul>
185
+ </section>
186
+ </article>
187
+ `);
188
+
189
+ const wrapper = doc.querySelector("#wrapper");
190
+ expect(wrapper).not.toBeNull();
191
+
192
+ const article = doc.querySelector("article");
193
+ const innerHTML = wrapper!.innerHTML;
194
+
195
+
196
+ wrapper!.outerHTML = innerHTML;
197
+
198
+
199
+ expect(doc.querySelector("#wrapper")).toBeNull();
200
+ expect(doc.querySelector("section")).toBeNull();
201
+
202
+
203
+ expect(article!.querySelector("h2")).not.toBeNull();
204
+ expect(article!.querySelector("h2")!.textContent).toBe("Title");
205
+ expect(article!.querySelector("strong")).not.toBeNull();
206
+ expect(article!.querySelectorAll("li").length).toBe(2);
207
+ });
208
+ });
@@ -326,7 +326,7 @@ describe('HTML Parser', () => {
326
326
  )!;
327
327
  expect(htmlElement.type).toBe(ASTNodeType.ELEMENT);
328
328
  expect(htmlElement.tagName).toBe('html');
329
- expect(htmlElement.attributes!.lang).toBe('es');
329
+ expect(htmlElement.attributes!.lang).toBe('en');
330
330
  const headElement = htmlElement.children!.find(
331
331
  child => child.type === ASTNodeType.ELEMENT && child.tagName === 'head'
332
332
  )!;