npm - @tkeron/html-parser - Versions diffs - 0.1.5 → 1.0.0 - Mend

@tkeron/html-parser 0.1.5 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (91) hide show

package/README.md +1 -7
package/bun.lock +8 -3
package/index.ts +4 -0
package/package.json +13 -6
package/src/css-selector.ts +45 -27
package/src/dom-simulator.ts +162 -20
package/src/encoding.ts +39 -0
package/src/index.ts +9 -0
package/src/parser.ts +478 -183
package/src/serializer.ts +450 -0
package/src/tokenizer.ts +59 -139
package/tests/advanced.test.ts +119 -106
package/tests/custom-elements.test.ts +172 -162
package/tests/dom-extended.test.ts +12 -12
package/tests/dom-manipulation.test.ts +637 -0
package/tests/dom.test.ts +32 -27
package/tests/helpers/tokenizer-adapter.test.ts +70 -0
package/tests/helpers/tokenizer-adapter.ts +65 -0
package/tests/helpers/tree-adapter.test.ts +39 -0
package/tests/helpers/tree-adapter.ts +43 -0
package/tests/html5lib-data/tokenizer/namedEntities.test +42422 -0
package/tests/html5lib-data/tokenizer/pendingSpecChanges.test +9 -0
package/tests/html5lib-data/tree-construction/adoption01.dat +354 -0
package/tests/html5lib-data/tree-construction/adoption02.dat +39 -0
package/tests/html5lib-data/tree-construction/domjs-unsafe.dat +0 -0
package/tests/html5lib-data/tree-construction/entities02.dat +309 -0
package/tests/html5lib-data/tree-construction/html5test-com.dat +301 -0
package/tests/html5lib-data/tree-construction/math.dat +104 -0
package/tests/html5lib-data/tree-construction/namespace-sensitivity.dat +22 -0
package/tests/html5lib-data/tree-construction/noscript01.dat +237 -0
package/tests/html5lib-data/tree-construction/ruby.dat +302 -0
package/tests/html5lib-data/tree-construction/scriptdata01.dat +372 -0
package/tests/html5lib-data/tree-construction/svg.dat +104 -0
package/tests/html5lib-data/tree-construction/template.dat +1673 -0
package/tests/html5lib-data/tree-construction/tests10.dat +853 -0
package/tests/html5lib-data/tree-construction/tests11.dat +523 -0
package/tests/html5lib-data/tree-construction/tests20.dat +842 -0
package/tests/html5lib-data/tree-construction/tests21.dat +306 -0
package/tests/html5lib-data/tree-construction/tests23.dat +168 -0
package/tests/html5lib-data/tree-construction/tests24.dat +79 -0
package/tests/html5lib-data/tree-construction/tests5.dat +210 -0
package/tests/html5lib-data/tree-construction/tests6.dat +663 -0
package/tests/html5lib-data/tree-construction/tests_innerHTML_1.dat +844 -0
package/tests/parser.test.ts +172 -193
package/tests/selectors.test.ts +64 -1
package/tests/serializer-core.test.ts +16 -0
package/tests/serializer-data/core.test +125 -0
package/tests/serializer-data/injectmeta.test +66 -0
package/tests/serializer-data/optionaltags.test +965 -0
package/tests/serializer-data/options.test +60 -0
package/tests/serializer-data/whitespace.test +51 -0
package/tests/serializer-injectmeta.test.ts +16 -0
package/tests/serializer-optionaltags.test.ts +16 -0
package/tests/serializer-options.test.ts +16 -0
package/tests/serializer-whitespace.test.ts +16 -0
package/tests/tokenizer-namedEntities.test.ts +20 -0
package/tests/tokenizer-pendingSpecChanges.test.ts +20 -0
package/tests/tokenizer.test.ts +83 -0
package/tests/tree-construction-adoption01.test.ts +37 -0
package/tests/tree-construction-adoption02.test.ts +34 -0
package/tests/tree-construction-domjs-unsafe.test.ts +24 -0
package/tests/tree-construction-entities02.test.ts +33 -0
package/tests/tree-construction-html5test-com.test.ts +24 -0
package/tests/tree-construction-math.test.ts +18 -0
package/tests/tree-construction-namespace-sensitivity.test.ts +18 -0
package/tests/tree-construction-noscript01.test.ts +18 -0
package/tests/tree-construction-ruby.test.ts +21 -0
package/tests/tree-construction-scriptdata01.test.ts +21 -0
package/tests/tree-construction-svg.test.ts +21 -0
package/tests/tree-construction-template.test.ts +21 -0
package/tests/tree-construction-tests10.test.ts +21 -0
package/tests/tree-construction-tests11.test.ts +21 -0
package/tests/tree-construction-tests20.test.ts +18 -0
package/tests/tree-construction-tests21.test.ts +18 -0
package/tests/tree-construction-tests23.test.ts +18 -0
package/tests/tree-construction-tests24.test.ts +18 -0
package/tests/tree-construction-tests5.test.ts +21 -0
package/tests/tree-construction-tests6.test.ts +21 -0
package/tests/tree-construction-tests_innerHTML_1.test.ts +21 -0
package/tests/void-elements.test.ts +471 -0
package/tests/official/README.md +0 -87
package/tests/official/acid/acid-tests.test.ts +0 -309
package/tests/official/final-output/final-output.test.ts +0 -361
package/tests/official/html5lib/tokenizer-utils.ts +0 -192
package/tests/official/html5lib/tokenizer.test.ts +0 -171
package/tests/official/html5lib/tree-construction-utils.ts +0 -194
package/tests/official/html5lib/tree-construction.test.ts +0 -250
package/tests/official/validator/validator-tests.test.ts +0 -237
package/tests/official/validator-nu/validator-nu.test.ts +0 -335
package/tests/official/whatwg/whatwg-tests.test.ts +0 -205
package/tests/official/wpt/wpt-tests.test.ts +0 -409

package/tests/serializer-data/options.test ADDED Viewed

@@ -0,0 +1,60 @@
+{"tests":[
+{"description": "quote_char=\"'\"",
+ "options": {"quote_char": "'"},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "test 'with' quote_char"}]]],
+ "expected": ["<span title='test &#39;with&#39; quote_char'>"]
+},
+{"description": "quote_attr_values=true",
+ "options": {"quote_attr_values": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "button", [{"namespace": null, "name": "disabled", "value" :"disabled"}]]],
+ "expected": ["<button disabled>"],
+ "xhtml":    ["<button disabled=\"disabled\">"]
+},
+{"description": "quote_attr_values=true with irrelevant",
+ "options": {"quote_attr_values": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :"irrelevant"}]]],
+ "expected": ["<div irrelevant>"],
+ "xhtml":    ["<div irrelevant=\"irrelevant\">"]
+},
+{"description": "use_trailing_solidus=true with void element",
+ "options": {"use_trailing_solidus": true},
+ "input": [["EmptyTag", "img", {}]],
+ "expected": ["<img />"]
+},
+{"description": "use_trailing_solidus=true with non-void element",
+ "options": {"use_trailing_solidus": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", {}]],
+ "expected": ["<div>"]
+},
+{"description": "minimize_boolean_attributes=false",
+ "options": {"minimize_boolean_attributes": false},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :"irrelevant"}]]],
+ "expected": ["<div irrelevant=irrelevant>"],
+ "xhtml":    ["<div irrelevant=\"irrelevant\">"]
+},
+{"description": "minimize_boolean_attributes=false with empty value",
+ "options": {"minimize_boolean_attributes": false},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :""}]]],
+ "expected": ["<div irrelevant=\"\">"]
+},
+{"description": "escape less than signs in attribute values",
+ "options": {"escape_lt_in_attrs": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "a", [{"namespace": null, "name": "title", "value": "a<b>c&d"}]]],
+ "expected": ["<a title=\"a&lt;b>c&amp;d\">"]
+},
+{"description": "rcdata",
+ "options": {"escape_rcdata": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "a<b>c&d"]],
+ "expected": ["<script>a&lt;b&gt;c&amp;d"]
+}
+]}

package/tests/serializer-data/whitespace.test ADDED Viewed

@@ -0,0 +1,51 @@
+{"tests": [
+{"description": "bare text with leading spaces",
+ "options": {"strip_whitespace": true},
+ "input": [["Characters", "\t\r\n\u000C foo"]],
+ "expected": [" foo"]
+},
+{"description": "bare text with trailing spaces",
+ "options": {"strip_whitespace": true},
+ "input": [["Characters", "foo \t\r\n\u000C"]],
+ "expected": ["foo "]
+},
+{"description": "bare text with inner spaces",
+ "options": {"strip_whitespace": true},
+ "input": [["Characters", "foo \t\r\n\u000C bar"]],
+ "expected": ["foo bar"]
+},
+{"description": "text within <pre>",
+ "options": {"strip_whitespace": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "pre"]],
+ "expected": ["<pre>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</pre>"]
+},
+{"description": "text within <pre>, with inner markup",
+ "options": {"strip_whitespace": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}], ["Characters", "\t\r\n\u000C fo"], ["StartTag", "http://www.w3.org/1999/xhtml", "span", {}], ["Characters", "o \t\r\n\u000C b"], ["EndTag", "http://www.w3.org/1999/xhtml", "span"], ["Characters", "ar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "pre"]],
+ "expected": ["<pre>\t\r\n\u000C fo<span>o \t\r\n\u000C b</span>ar \t\r\n\u000C</pre>"]
+},
+{"description": "text within <textarea>",
+ "options": {"strip_whitespace": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "textarea", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "textarea"]],
+ "expected": ["<textarea>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</textarea>"]
+},
+{"description": "text within <script>",
+ "options": {"strip_whitespace": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "script"]],
+ "expected": ["<script>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</script>"]
+},
+{"description": "text within <style>",
+ "options": {"strip_whitespace": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "style"]],
+ "expected": ["<style>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</style>"]
+}
+]}

package/tests/serializer-injectmeta.test.ts ADDED Viewed

@@ -0,0 +1,16 @@
+import { expect, it, describe } from 'bun:test';
+import { serializeTokens } from '../src/serializer';
+import { readFileSync } from 'fs';
+describe('Serializer Inject Meta Tests', () => {
+  const content = readFileSync('tests/serializer-data/injectmeta.test', 'utf8');
+  const data = JSON.parse(content);
+  const tests = data.tests;
+  tests.forEach((test: any, index: number) => {
+    it(test.description, () => {
+      const result = serializeTokens(test.input, test.options);
+      expect(result).toBe(test.expected[0]);
+    });
+  });
+});

package/tests/serializer-optionaltags.test.ts ADDED Viewed

@@ -0,0 +1,16 @@
+import { expect, it, describe } from 'bun:test';
+import { serializeTokens } from '../src/serializer';
+import { readFileSync } from 'fs';
+describe('Serializer Optional Tags Tests', () => {
+  const content = readFileSync('tests/serializer-data/optionaltags.test', 'utf8');
+  const data = JSON.parse(content);
+  const tests = data.tests;
+  tests.forEach((test: any, index: number) => {
+    it(test.description, () => {
+      const result = serializeTokens(test.input, test.options);
+      expect(result).toBe(test.expected[0]);
+    });
+  });
+});

package/tests/serializer-options.test.ts ADDED Viewed

@@ -0,0 +1,16 @@
+import { expect, it, describe } from 'bun:test';
+import { serializeTokens } from '../src/serializer';
+import { readFileSync } from 'fs';
+describe('Serializer Options Tests', () => {
+  const content = readFileSync('tests/serializer-data/options.test', 'utf8');
+  const data = JSON.parse(content);
+  const tests = data.tests;
+  tests.forEach((test: any, index: number) => {
+    it(test.description, () => {
+      const result = serializeTokens(test.input, test.options);
+      expect(result).toBe(test.expected[0]);
+    });
+  });
+});

package/tests/serializer-whitespace.test.ts ADDED Viewed

@@ -0,0 +1,16 @@
+import { expect, it, describe } from 'bun:test';
+import { serializeTokens } from '../src/serializer';
+import { readFileSync } from 'fs';
+describe('Serializer Whitespace Tests', () => {
+  const content = readFileSync('tests/serializer-data/whitespace.test', 'utf8');
+  const data = JSON.parse(content);
+  const tests = data.tests;
+  tests.forEach((test: any, index: number) => {
+    it(test.description, () => {
+      const result = serializeTokens(test.input, test.options);
+      expect(result).toBe(test.expected[0]);
+    });
+  });
+});

package/tests/tokenizer-namedEntities.test.ts ADDED Viewed

@@ -0,0 +1,20 @@
+import { expect, it, describe } from 'bun:test';
+import { tokenize } from '../src/tokenizer';
+import { readFileSync } from 'fs';
+import { adaptTokens } from './helpers/tokenizer-adapter';
+describe('Tokenizer NamedEntities Tests', () => {
+  const content = readFileSync('tests/html5lib-data/tokenizer/namedEntities.test', 'utf8');
+  const data = JSON.parse(content);
+  const tests = data.tests;
+  tests.forEach((test: any, index: number) => {
+    if (!test.errors || test.errors.length === 0) {
+      it(test.description, () => {
+        const tokens = tokenize(test.input);
+        const adapted = adaptTokens(tokens);
+        expect(adapted).toEqual(test.output);
+      });
+    }
+  });
+});

package/tests/tokenizer-pendingSpecChanges.test.ts ADDED Viewed

@@ -0,0 +1,20 @@
+import { expect, it, describe } from 'bun:test';
+import { tokenize } from '../src/tokenizer';
+import { readFileSync } from 'fs';
+import { adaptTokens } from './helpers/tokenizer-adapter';
+describe('Tokenizer PendingSpecChanges Tests', () => {
+  const content = readFileSync('tests/html5lib-data/tokenizer/pendingSpecChanges.test', 'utf8');
+  const data = JSON.parse(content);
+  const tests = data.tests;
+  tests.forEach((test: any, index: number) => {
+    if (!test.errors || test.errors.length === 0) {
+      it(test.description, () => {
+        const tokens = tokenize(test.input);
+        const adapted = adaptTokens(tokens);
+        expect(adapted).toEqual(test.output);
+      });
+    }
+  });
+});

package/tests/tokenizer.test.ts CHANGED Viewed

@@ -662,5 +662,88 @@ describe('HTML Tokenizer', () => {
         tokens.some(token => token.value === 'span');
       expect(hasValidElements).toBe(true);
     });
+    it('should handle empty angle brackets <>', () => {
+      const html = '<>text<div>content</div>';
+      const tokens = tokenize(html);
+      // Should skip the invalid <> and continue parsing
+      expect(tokens[tokens.length - 1]!.type).toBe(TokenType.EOF);
+      const divToken = tokens.find(t => t.value === 'div');
+      expect(divToken).toBeDefined();
+    });
+    it('should handle angle bracket with only space < >', () => {
+      const html = '< >text<p>paragraph</p>';
+      const tokens = tokenize(html);
+      expect(tokens[tokens.length - 1]!.type).toBe(TokenType.EOF);
+      const pToken = tokens.find(t => t.value === 'p');
+      expect(pToken).toBeDefined();
+    });
+    it('should handle tag with no valid name', () => {
+      const html = '<123>text</123><div>ok</div>';
+      const tokens = tokenize(html);
+      // Tags starting with numbers are invalid, should be treated as text
+      expect(tokens[tokens.length - 1]!.type).toBe(TokenType.EOF);
+      const divToken = tokens.find(t => t.value === 'div');
+      expect(divToken).toBeDefined();
+    });
+  });
+  describe('Entity Edge Cases', () => {
+    it('should handle entity without semicolon with valid prefix', () => {
+      // &nbsp followed by other text (no semicolon) should decode &nbsp
+      const tokens = tokenize('<div>&nbsptext</div>');
+      const textToken = tokens.find(t => t.type === TokenType.TEXT);
+      expect(textToken).toBeDefined();
+      // Should decode &nbsp (non-breaking space) and keep "text"
+      expect(textToken!.value).toContain('text');
+    });
+    it('should handle entity without semicolon - lt prefix', () => {
+      const tokens = tokenize('<div>&ltvalue</div>');
+      const textToken = tokens.find(t => t.type === TokenType.TEXT);
+      expect(textToken).toBeDefined();
+      expect(textToken!.value).toBe('&ltvalue');
+    });
+    it('should handle entity without semicolon - gt prefix', () => {
+      const tokens = tokenize('<div>&gtvalue</div>');
+      const textToken = tokens.find(t => t.type === TokenType.TEXT);
+      expect(textToken).toBeDefined();
+      expect(textToken!.value).toBe('&gtvalue');
+    });
+    it('should handle entity without semicolon - amp prefix', () => {
+      const tokens = tokenize('<div>&ampvalue</div>');
+      const textToken = tokens.find(t => t.type === TokenType.TEXT);
+      expect(textToken).toBeDefined();
+      expect(textToken!.value).toBe('&ampvalue');
+    });
+    it('should handle unknown entity gracefully', () => {
+      const tokens = tokenize('<div>&unknownentity;</div>');
+      const textToken = tokens.find(t => t.type === TokenType.TEXT);
+      expect(textToken).toBeDefined();
+      // Unknown entity should be kept as-is
+      expect(textToken!.value).toBe('&unknownentity;');
+    });
+    it('should handle partial entity name with no matching prefix', () => {
+      const tokens = tokenize('<div>&xyz</div>');
+      const textToken = tokens.find(t => t.type === TokenType.TEXT);
+      expect(textToken).toBeDefined();
+      // No valid entity prefix, keep as-is
+      expect(textToken!.value).toBe('&xyz');
+    });
   })
 });

package/tests/tree-construction-adoption01.test.ts ADDED Viewed

@@ -0,0 +1,37 @@
+import { expect, it, describe } from 'bun:test';
+import { parseHTML } from '../index';
+import { serializeToHtml5lib } from './helpers/tree-adapter';
+import { readFileSync } from 'fs';
+describe('Tree Construction Adoption01 Tests', () => {
+  const content = readFileSync('tests/html5lib-data/tree-construction/adoption01.dat', 'utf8');
+  const sections = content.split('#data\n').slice(1);
+  sections.forEach((section, index) => {
+    const lines = section.trim().split('\n');
+    let data = '';
+    let document = '';
+    let inDocument = false;
+    let inData = true; // Start with data since we split on #data\n
+    for (const line of lines) {
+      if (line.startsWith('#document')) {
+        inDocument = true;
+        inData = false;
+      } else if (line.startsWith('#errors')) {
+        inData = false;
+        inDocument = false;
+      } else if (inDocument) {
+        document += line + '\n';
+      } else if (inData) {
+        data += line;
+      }
+    }
+    it.skip(`Adoption test ${index + 1}`, () => {
+      const doc = parseHTML(data);
+      const serialized = serializeToHtml5lib(doc);
+      expect(serialized).toBe(document);
+    });
+  });
+});

package/tests/tree-construction-adoption02.test.ts ADDED Viewed

@@ -0,0 +1,34 @@
+import { expect, it, describe } from 'bun:test';
+import { parseHTML } from '../index';
+import { serializeToHtml5lib } from './helpers/tree-adapter';
+import { readFileSync } from 'fs';
+describe('Tree Construction Adoption02 Tests', () => {
+  const content = readFileSync('tests/html5lib-data/tree-construction/adoption02.dat', 'utf8');
+  const sections = content.split('#data\n').slice(1);
+  sections.forEach((section, index) => {
+    const lines = section.trim().split('\n');
+    let data = '';
+    let document = '';
+    let inDocument = false;
+    for (const line of lines) {
+      if (line.startsWith('#document')) {
+        inDocument = true;
+      } else if (line.startsWith('#data')) {
+        // next section
+      } else if (inDocument) {
+        document += line.slice(2) + '\n';
+      } else if (!line.startsWith('#')) {
+        data += line;
+      }
+    }
+    it.skip(`Adoption02 test ${index + 1}`, () => {
+      const doc = parseHTML(data);
+      const serialized = serializeToHtml5lib(doc);
+      expect(serialized).toBe(document.trim());
+    });
+  });
+});

package/tests/tree-construction-domjs-unsafe.test.ts ADDED Viewed

@@ -0,0 +1,24 @@
+import { describe, it } from "bun:test";
+import { readFileSync } from "fs";
+import { parse } from "../src/index.ts";
+describe("Tree Construction DomjsUnsafe Tests", () => {
+  const data = readFileSync("tests/html5lib-data/tree-construction/domjs-unsafe.dat", "utf8");
+  const sections = data.split("#data\n").slice(1);
+  for (const section of sections) {
+    const parts = section.split("#document\n");
+    if (parts.length < 2) continue;
+    const inputWithErrors = parts[0];
+    const expected = parts[1];
+    const input = inputWithErrors.split("#errors\n")[0].trim();
+    const testName = input.split("\n")[0] || "DomjsUnsafe test";
+    it.skip(testName, () => {
+      const doc = parse(input);
+      // TODO: Implement DOM tree comparison with expected
+      // For now, just ensure parsing doesn't throw
+      expect(doc).toBeDefined();
+    });
+  }
+});

package/tests/tree-construction-entities02.test.ts ADDED Viewed

@@ -0,0 +1,33 @@
+import { expect, it, describe } from 'bun:test';
+import { parse } from '../src/parser';
+import { readFileSync } from 'fs';
+describe('Tree Construction Entities02 Tests', () => {
+  const content = readFileSync('tests/html5lib-data/tree-construction/entities02.dat', 'utf8');
+  const sections = content.split('#data\n').slice(1);
+  sections.forEach((section, index) => {
+    const lines = section.trim().split('\n');
+    let data = '';
+    let document = '';
+    let inDocument = false;
+    for (const line of lines) {
+      if (line.startsWith('#document')) {
+        inDocument = true;
+      } else if (line.startsWith('#data')) {
+        // next section
+      } else if (inDocument) {
+        document += line + '\n';
+      } else if (!line.startsWith('#')) {
+        data += line;
+      }
+    }
+    it(`Entities02 test ${index + 1}`, () => {
+      const doc = parse(data);
+      // TODO: compare doc with expected document tree
+      expect(true).toBe(true); // placeholder
+    });
+  });
+});

package/tests/tree-construction-html5test-com.test.ts ADDED Viewed

@@ -0,0 +1,24 @@
+import { describe, it } from "bun:test";
+import { readFileSync } from "fs";
+import { parse } from "../src/index.ts";
+describe("Tree Construction Html5testCom Tests", () => {
+  const data = readFileSync("tests/html5lib-data/tree-construction/html5test-com.dat", "utf8");
+  const sections = data.split("#data\n").slice(1);
+  for (const section of sections) {
+    const parts = section.split("#document\n");
+    if (parts.length < 2) continue;
+    const inputWithErrors = parts[0];
+    const expected = parts[1];
+    const input = inputWithErrors.split("#errors\n")[0].trim();
+    const testName = input.split("\n")[0] || "Html5testCom test";
+    it.skip(testName, () => {
+      const doc = parse(input);
+      // TODO: Implement DOM tree comparison with expected
+      // For now, just ensure parsing doesn't throw
+      expect(doc).toBeDefined();
+    });
+  }
+});

package/tests/tree-construction-math.test.ts ADDED Viewed

@@ -0,0 +1,18 @@
+import { readFileSync } from 'fs';
+import { parse } from '../src/index.ts';
+describe('Tree Construction Math Tests', () => {
+  const content = readFileSync('tests/html5lib-data/tree-construction/math.dat', 'utf8');
+  const tests = content.split('#data\n').slice(1);
+  tests.forEach((test, index) => {
+    const parts = test.split('#document\n');
+    const input = parts[0].trim();
+    const expected = parts[1]?.split('#errors\n')[0]?.trim() || '';
+    it.skip(`Math test ${index + 1}`, () => {
+      const doc = parse(input);
+      expect(doc).toBeDefined();
+    });
+  });
+});

package/tests/tree-construction-namespace-sensitivity.test.ts ADDED Viewed

@@ -0,0 +1,18 @@
+import { readFileSync } from 'fs';
+import { parse } from '../src/index.ts';
+describe('Tree Construction NamespaceSensitivity Tests', () => {
+  const content = readFileSync('tests/html5lib-data/tree-construction/namespace-sensitivity.dat', 'utf8');
+  const tests = content.split('#data\n').slice(1);
+  tests.forEach((test, index) => {
+    const parts = test.split('#document\n');
+    const input = parts[0].trim();
+    const expected = parts[1]?.split('#errors\n')[0]?.trim() || '';
+    it.skip(`NamespaceSensitivity test ${index + 1}`, () => {
+      const doc = parse(input);
+      expect(doc).toBeDefined();
+    });
+  });
+});

package/tests/tree-construction-noscript01.test.ts ADDED Viewed

@@ -0,0 +1,18 @@
+import { readFileSync } from 'fs';
+import { parse } from '../src/index.ts';
+describe('Tree Construction Noscript01 Tests', () => {
+  const content = readFileSync('tests/html5lib-data/tree-construction/noscript01.dat', 'utf8');
+  const tests = content.split('#data\n').slice(1);
+  tests.forEach((test, index) => {
+    const parts = test.split('#document\n');
+    const input = parts[0].trim();
+    const expected = parts[1]?.split('#errors\n')[0]?.trim() || '';
+    it.skip(`Noscript01 test ${index + 1}`, () => {
+      const doc = parse(input);
+      expect(doc).toBeDefined();
+    });
+  });
+});

package/tests/tree-construction-ruby.test.ts ADDED Viewed

@@ -0,0 +1,21 @@
+import { describe, it, expect } from "bun:test";
+import { readFileSync } from "fs";
+import { parse } from "../src/index.ts";
+describe("Tree Construction Ruby Tests", () => {
+  const content = readFileSync("tests/html5lib-data/tree-construction/ruby.dat", "utf8");
+  const sections = content.split(/^#data$/gm).slice(1);
+  for (const section of sections) {
+    const [data, document] = section.split(/^#document$/gm);
+    const input = data.trim();
+    const expected = document.trim();
+    it(`Ruby test: ${input.slice(0, 50)}${input.length > 50 ? "..." : ""}`, () => {
+      const doc = parse(input);
+      expect(doc).toBeDefined();
+      // TODO: Implement DOM serialization and comparison
+      // expect(serialize(doc)).toBe(expected);
+    });
+  }
+});

package/tests/tree-construction-scriptdata01.test.ts ADDED Viewed

@@ -0,0 +1,21 @@
+import { describe, it, expect } from "bun:test";
+import { readFileSync } from "fs";
+import { parse } from "../src/index.ts";
+describe("Tree Construction Scriptdata01 Tests", () => {
+  const content = readFileSync("tests/html5lib-data/tree-construction/scriptdata01.dat", "utf8");
+  const sections = content.split(/^#data$/gm).slice(1);
+  for (const section of sections) {
+    const [data, document] = section.split(/^#document$/gm);
+    const input = data.trim();
+    const expected = document.trim();
+    it(`Scriptdata01 test: ${input.slice(0, 50)}${input.length > 50 ? "..." : ""}`, () => {
+      const doc = parse(input);
+      expect(doc).toBeDefined();
+      // TODO: Implement DOM serialization and comparison
+      // expect(serialize(doc)).toBe(expected);
+    });
+  }
+});

package/tests/tree-construction-svg.test.ts ADDED Viewed

@@ -0,0 +1,21 @@
+import { describe, it, expect } from "bun:test";
+import { readFileSync } from "fs";
+import { parse } from "../src/index.ts";
+describe("Tree Construction SVG Tests", () => {
+  const content = readFileSync("tests/html5lib-data/tree-construction/svg.dat", "utf8");
+  const sections = content.split(/^#data$/gm).slice(1);
+  for (const section of sections) {
+    const [data, document] = section.split(/^#document$/gm);
+    const input = data.trim();
+    const expected = document.trim();
+    it(`SVG test: ${input.slice(0, 50)}${input.length > 50 ? "..." : ""}`, () => {
+      const doc = parse(input);
+      expect(doc).toBeDefined();
+      // TODO: Implement DOM serialization and comparison
+      // expect(serialize(doc)).toBe(expected);
+    });
+  }
+});

package/tests/tree-construction-template.test.ts ADDED Viewed

@@ -0,0 +1,21 @@
+import { describe, it, expect } from "bun:test";
+import { readFileSync } from "fs";
+import { parse } from "../src/index.ts";
+describe("Tree Construction Template Tests", () => {
+  const content = readFileSync("tests/html5lib-data/tree-construction/template.dat", "utf8");
+  const sections = content.split(/^#data$/gm).slice(1);
+  for (const section of sections) {
+    const [data, document] = section.split(/^#document$/gm);
+    const input = data.trim();
+    const expected = document.trim();
+    it(`Template test: ${input.slice(0, 50)}${input.length > 50 ? "..." : ""}`, () => {
+      const doc = parse(input);
+      expect(doc).toBeDefined();
+      // TODO: Implement DOM serialization and comparison
+      // expect(serialize(doc)).toBe(expected);
+    });
+  }
+});

package/tests/tree-construction-tests10.test.ts ADDED Viewed

@@ -0,0 +1,21 @@
+import { describe, it, expect } from "bun:test";
+import { readFileSync } from "fs";
+import { parse } from "../src/index.ts";
+describe("Tree Construction Tests10 Tests", () => {
+  const content = readFileSync("tests/html5lib-data/tree-construction/tests10.dat", "utf8");
+  const sections = content.split(/^#data$/gm).slice(1);
+  for (const section of sections) {
+    const [data, document] = section.split(/^#document$/gm);
+    const input = data.trim();
+    const expected = document.trim();
+    it(`Tests10 test: ${input.slice(0, 50)}${input.length > 50 ? "..." : ""}`, () => {
+      const doc = parse(input);
+      expect(doc).toBeDefined();
+      // TODO: Implement DOM serialization and comparison
+      // expect(serialize(doc)).toBe(expected);
+    });
+  }
+});