npm - @tkeron/html-parser - Versions diffs - 0.1.4 → 0.1.7 - Mend

@tkeron/html-parser 0.1.4 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/README.md +6 -6
package/bun.lock +3 -3
package/index.ts +0 -5
package/package.json +7 -6
package/src/css-selector.ts +45 -32
package/src/dom-simulator.ts +243 -46
package/src/parser.ts +0 -39
package/src/tokenizer.ts +0 -116
package/tests/advanced.test.ts +2 -2
package/tests/cloneNode.test.ts +50 -50
package/tests/custom-elements.test.ts +8 -8
package/tests/dom-manipulation.test.ts +638 -0
package/tests/official/acid/acid-tests.test.ts +6 -6
package/tests/official/final-output/final-output.test.ts +15 -15
package/tests/official/html5lib/tokenizer-utils.ts +19 -31
package/tests/official/html5lib/tokenizer.test.ts +4 -4
package/tests/official/html5lib/tree-construction-utils.ts +20 -34
package/tests/official/html5lib/tree-construction.test.ts +5 -5
package/tests/official/validator/validator-tests.test.ts +11 -11
package/tests/official/wpt/wpt-tests.test.ts +5 -5
package/tests/outerHTML-replacement.test.ts +208 -0
package/tests/parser.test.ts +1 -1
package/tests/selectors.test.ts +64 -1
package/tests/test-page-0.txt +12 -355
package/tests/tokenizer.test.ts +86 -0
package/tests/void-elements.test.ts +471 -0
package/tests/api-integration.test.ts +0 -114
package/tests/cloneNode-bug-reproduction.test.ts +0 -325
package/tests/cloneNode-interactive.ts +0 -235
package/tests/dom-adoption.test.ts +0 -363
package/tests/dom-synchronization.test.ts +0 -675
package/tests/setAttribute-outerHTML.test.ts +0 -102

package/tests/official/final-output/final-output.test.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import { describe, it, expect } from 'bun:test';
 import { parseHTML } from '../../../index';
-// Helper function to normalize text for comparison
 function normalizeText(text: string): string {
   return text
     .replace(/\s+/g, ' ')
@@ -16,7 +16,7 @@ describe('Final HTML Output Validation', () => {
       const document = parseHTML(html);
       expect(document).toBeDefined();
-      expect(document.nodeType).toBe(9); // DOCUMENT_NODE
+      expect(document.nodeType).toBe(9);
       const div = document.querySelector('div');
       expect(div).toBeDefined();
@@ -101,9 +101,9 @@ describe('Final HTML Output Validation', () => {
       expect(p).toBeDefined();
       expect(p?.textContent).toBe('Content');
-      // Check for comment node
       const commentNode = div?.childNodes[0];
-      expect(commentNode?.nodeType).toBe(8); // COMMENT_NODE
+      expect(commentNode?.nodeType).toBe(8);
     });
   });
@@ -135,16 +135,16 @@ describe('Final HTML Output Validation', () => {
       expect(div).toBeDefined();
       expect(div?.childNodes.length).toBe(3);
-      // First text node
-      expect(div?.childNodes[0]?.nodeType).toBe(3); // TEXT_NODE
+      expect(div?.childNodes[0]?.nodeType).toBe(3);
       expect(div?.childNodes[0]?.textContent).toBe('Text before ');
-      // Span element
-      expect(div?.childNodes[1]?.nodeType).toBe(1); // ELEMENT_NODE
+      expect(div?.childNodes[1]?.nodeType).toBe(1);
       expect((div?.childNodes[1] as Element)?.tagName).toBe('SPAN');
-      // Last text node
-      expect(div?.childNodes[2]?.nodeType).toBe(3); // TEXT_NODE
+      expect(div?.childNodes[2]?.nodeType).toBe(3);
       expect(div?.childNodes[2]?.textContent).toBe(' text after');
     });
@@ -264,7 +264,7 @@ describe('Final HTML Output Validation', () => {
       const document = parseHTML(malformedHTML);
       expect(document).toBeDefined();
-      expect(document.nodeType).toBe(9); // DOCUMENT_NODE
+      expect(document.nodeType).toBe(9);
       const divs = document.querySelectorAll('div');
       expect(divs.length).toBeGreaterThan(0);
@@ -296,7 +296,7 @@ describe('Final HTML Output Validation', () => {
       const p = document.querySelector('p');
       expect(p).toBeDefined();
       expect(p?.textContent).toContain('Special chars:');
-      // The exact entity handling depends on your implementation
     });
     it('should handle multiple top-level elements', () => {
@@ -321,17 +321,17 @@ describe('Final HTML Output Validation', () => {
       const document = parseHTML(html);
-      // Test getElementById
       const byId = document.getElementById('test');
       expect(byId).toBeDefined();
       expect(byId?.tagName).toBe('DIV');
-      // Test querySelector
       const bySelector = document.querySelector('.container');
       expect(bySelector).toBeDefined();
       expect(bySelector?.id).toBe('test');
-      // Test querySelectorAll
       const byClass = document.querySelectorAll('.text');
       expect(byClass.length).toBe(1);
       expect(byClass[0]?.textContent).toBe('Hello');

package/tests/official/html5lib/tokenizer-utils.ts CHANGED Viewed

@@ -2,7 +2,7 @@ import { expect, describe, it } from 'bun:test';
 import { tokenize, TokenType } from '../../../src/tokenizer';
 import type { Token } from '../../../src/tokenizer';
-// HTML5lib tokenizer test format
 export interface HTML5libTokenizerTest {
   description: string;
   input: string;
@@ -18,12 +18,12 @@ export interface HTML5libTokenizerTestSuite {
 }
 export type HTML5libTokenOutput =
-  | ['StartTag', string, Record<string, string>] // StartTag without self-closing
-  | ['StartTag', string, Record<string, string>, boolean] // StartTag with self-closing
-  | ['EndTag', string] // EndTag
-  | ['Comment', string] // Comment
-  | ['Character', string] // Character
-  | ['DOCTYPE', string, string | null, string | null, boolean]; // DOCTYPE
+  | ['StartTag', string, Record<string, string>]
+  | ['StartTag', string, Record<string, string>, boolean]
+  | ['EndTag', string]
+  | ['Comment', string]
+  | ['Character', string]
+  | ['DOCTYPE', string, string | null, string | null, boolean];
 export interface HTML5libError {
   code: string;
@@ -31,9 +31,7 @@ export interface HTML5libError {
   col: number;
 }
-/**
- * Converts HTML5lib token format to our internal token format
- */
 export function convertHTML5libToken(html5libToken: HTML5libTokenOutput): Partial<Token> {
   const type = html5libToken[0];
   const nameOrData = html5libToken[1];
@@ -78,9 +76,7 @@ export function convertHTML5libToken(html5libToken: HTML5libTokenOutput): Partia
   }
 }
-/**
- * Converts our internal token format to HTML5lib format for comparison
- */
 export function convertToHTML5libToken(token: Token): HTML5libTokenOutput {
   switch (token.type) {
     case TokenType.DOCTYPE:
@@ -102,9 +98,7 @@ export function convertToHTML5libToken(token: Token): HTML5libTokenOutput {
   }
 }
-/**
- * Normalizes adjacent character tokens as per HTML5lib spec
- */
 export function normalizeCharacterTokens(tokens: Token[]): Token[] {
   const normalized: Token[] = [];
   let currentText = '';
@@ -140,13 +134,11 @@ export function normalizeCharacterTokens(tokens: Token[]): Token[] {
   return normalized;
 }
-/**
- * Runs a single HTML5lib tokenizer test
- */
 export function runHTML5libTokenizerTest(test: HTML5libTokenizerTest): void {
   const { description, input, output: expectedOutput, initialStates = ['Data state'] } = test;
-  // Process double-escaped input if needed
   let processedInput = input;
   if (test.doubleEscaped) {
     processedInput = processedInput.replace(/\\u([0-9a-fA-F]{4})/g, (match, hex) => {
@@ -156,16 +148,16 @@ export function runHTML5libTokenizerTest(test: HTML5libTokenizerTest): void {
   for (const initialState of initialStates) {
     it(`${description} (${initialState})`, () => {
-      // Tokenize the input
       const tokens = tokenize(processedInput);
-      // Normalize character tokens
       const normalizedTokens = normalizeCharacterTokens(tokens);
-      // Convert to HTML5lib format for comparison
       const actualOutput = normalizedTokens.map(convertToHTML5libToken);
-      // Process expected output if double-escaped
       let processedExpectedOutput = expectedOutput;
       if (test.doubleEscaped) {
         processedExpectedOutput = expectedOutput.map(token => {
@@ -178,15 +170,13 @@ export function runHTML5libTokenizerTest(test: HTML5libTokenizerTest): void {
         });
       }
-      // Compare outputs
       expect(actualOutput).toEqual(processedExpectedOutput);
     });
   }
 }
-/**
- * Runs all tests from an HTML5lib tokenizer test suite
- */
 export function runHTML5libTokenizerTestSuite(testSuite: HTML5libTokenizerTestSuite, suiteName: string): void {
   describe(`HTML5lib Tokenizer Tests: ${suiteName}`, () => {
     testSuite.tests.forEach(test => {
@@ -195,9 +185,7 @@ export function runHTML5libTokenizerTestSuite(testSuite: HTML5libTokenizerTestSu
   });
 }
-/**
- * Loads and runs HTML5lib tokenizer tests from JSON
- */
 export async function loadHTML5libTokenizerTests(testData: string, suiteName: string): Promise<void> {
   const testSuite: HTML5libTokenizerTestSuite = JSON.parse(testData);
   runHTML5libTokenizerTestSuite(testSuite, suiteName);

package/tests/official/html5lib/tokenizer.test.ts CHANGED Viewed

@@ -5,7 +5,7 @@ import {
   type HTML5libTokenizerTestSuite
 } from './tokenizer-utils';
-// Sample HTML5lib tokenizer tests embedded directly
 const basicTokenizerTests: HTML5libTokenizerTestSuite = {
   "tests": [
     {
@@ -86,7 +86,7 @@ const basicTokenizerTests: HTML5libTokenizerTestSuite = {
   ]
 };
-// Entity tests
 const entityTests: HTML5libTokenizerTestSuite = {
   "tests": [
     {
@@ -123,7 +123,7 @@ const entityTests: HTML5libTokenizerTestSuite = {
   ]
 };
-// Comment tests
 const commentTests: HTML5libTokenizerTestSuite = {
   "tests": [
     {
@@ -163,7 +163,7 @@ const commentTests: HTML5libTokenizerTestSuite = {
   ]
 };
-// Run the embedded tests
 describe('HTML5lib Tokenizer Tests', () => {
   runHTML5libTokenizerTestSuite(basicTokenizerTests, 'Basic Tokenizer');
   runHTML5libTokenizerTestSuite(entityTests, 'Entity Handling');

package/tests/official/html5lib/tree-construction-utils.ts CHANGED Viewed

@@ -3,7 +3,7 @@ import { parse } from '../../../src/parser';
 import { tokenize } from '../../../src/tokenizer';
 import type { ASTNode } from '../../../src/parser';
-// HTML5lib tree construction test format
 export interface HTML5libTreeTest {
   data: string;
   errors: string[];
@@ -14,9 +14,7 @@ export interface HTML5libTreeTest {
   document: string;
 }
-/**
- * Parses HTML5lib DAT format test files
- */
 export function parseHTML5libDATFile(content: string): HTML5libTreeTest[] {
   const tests: HTML5libTreeTest[] = [];
   const sections = content.split('\n\n').filter(section => section.trim());
@@ -24,7 +22,7 @@ export function parseHTML5libDATFile(content: string): HTML5libTreeTest[] {
   for (const section of sections) {
     const lines = section.split('\n');
     const test: Partial<HTML5libTreeTest> = {
-      errors: [] // Initialize errors as empty array
+      errors: []
     };
     let currentSection = '';
@@ -32,7 +30,7 @@ export function parseHTML5libDATFile(content: string): HTML5libTreeTest[] {
     for (const line of lines) {
       if (line.startsWith('#')) {
-        // Save previous section
         if (currentSection) {
           switch (currentSection) {
             case 'data':
@@ -53,11 +51,11 @@ export function parseHTML5libDATFile(content: string): HTML5libTreeTest[] {
           }
         }
-        // Start new section
         currentSection = line.substring(1);
         currentContent = [];
-        // Handle script flags
         if (currentSection === 'script-off') {
           test.scriptOff = true;
         } else if (currentSection === 'script-on') {
@@ -68,7 +66,7 @@ export function parseHTML5libDATFile(content: string): HTML5libTreeTest[] {
       }
     }
-    // Save last section
     if (currentSection) {
       switch (currentSection) {
         case 'data':
@@ -97,16 +95,14 @@ export function parseHTML5libDATFile(content: string): HTML5libTreeTest[] {
   return tests;
 }
-/**
- * Converts AST to HTML5lib tree format
- */
 export function convertASTToHTML5libTree(node: ASTNode, depth: number = 0): string[] {
   const lines: string[] = [];
   const indent = '| ' + '  '.repeat(depth);
   switch (node.type) {
     case 'DOCUMENT':
-      // Document node doesn't have a line representation
       break;
     case 'DOCTYPE':
       lines.push(`${indent}<!DOCTYPE ${node.tagName || 'html'}>`);
@@ -115,7 +111,7 @@ export function convertASTToHTML5libTree(node: ASTNode, depth: number = 0): stri
       const tagName = node.tagName || 'unknown';
       lines.push(`${indent}<${tagName}>`);
-      // Add attributes
       if (node.attributes) {
         for (const [name, value] of Object.entries(node.attributes).sort()) {
           lines.push(`${indent}  ${name}="${value}"`);
@@ -135,7 +131,7 @@ export function convertASTToHTML5libTree(node: ASTNode, depth: number = 0): stri
       break;
   }
-  // Add children
   if (node.children) {
     for (const child of node.children) {
       lines.push(...convertASTToHTML5libTree(child, depth + 1));
@@ -145,9 +141,7 @@ export function convertASTToHTML5libTree(node: ASTNode, depth: number = 0): stri
   return lines;
 }
-/**
- * Normalizes HTML5lib tree format for comparison
- */
 export function normalizeHTML5libTree(tree: string): string {
   return tree
     .split('\n')
@@ -156,33 +150,29 @@ export function normalizeHTML5libTree(tree: string): string {
     .join('\n');
 }
-/**
- * Runs a single HTML5lib tree construction test
- */
 export function runHTML5libTreeTest(test: HTML5libTreeTest, testName: string): void {
   it(testName, () => {
     const { data, document: expectedTree, documentFragment, scriptOff, scriptOn } = test;
-    // Parse the HTML
     const tokens = tokenize(data);
     const ast = parse(tokens);
-    // Convert to HTML5lib tree format
     const actualTreeLines = convertASTToHTML5libTree(ast);
     const actualTree = actualTreeLines.join('\n');
-    // Normalize both trees for comparison
     const normalizedActual = normalizeHTML5libTree(actualTree);
     const normalizedExpected = normalizeHTML5libTree(expectedTree);
-    // Compare trees
     expect(normalizedActual).toBe(normalizedExpected);
   });
 }
-/**
- * Runs all tests from an HTML5lib tree construction test suite
- */
 export function runHTML5libTreeTestSuite(tests: HTML5libTreeTest[], suiteName: string): void {
   describe(`HTML5lib Tree Construction Tests: ${suiteName}`, () => {
     tests.forEach((test, index) => {
@@ -192,17 +182,13 @@ export function runHTML5libTreeTestSuite(tests: HTML5libTreeTest[], suiteName: s
   });
 }
-/**
- * Loads and runs HTML5lib tree construction tests from DAT format
- */
 export async function loadHTML5libTreeTests(testData: string, suiteName: string): Promise<void> {
   const tests = parseHTML5libDATFile(testData);
   runHTML5libTreeTestSuite(tests, suiteName);
 }
-/**
- * Validates HTML5lib tree construction test format
- */
 export function validateHTML5libTreeTest(test: HTML5libTreeTest): boolean {
   return !!(test.data && test.document && test.errors !== undefined);
 }

package/tests/official/html5lib/tree-construction.test.ts CHANGED Viewed

@@ -6,7 +6,7 @@ import {
   type HTML5libTreeTest,
 } from "./tree-construction-utils";
-// Sample HTML5lib tree construction tests in DAT format
 const basicTreeTestData = `#data
 Test
 #errors
@@ -206,13 +206,13 @@ const errorHandlingTestData = `#data
 |       <a>
 |     <a>`;
-// Run the embedded tests
 describe("HTML5lib Tree Construction Tests", () => {
   it("should parse DAT format correctly", () => {
     const tests = parseHTML5libDATFile(basicTreeTestData);
     expect(tests.length).toBeGreaterThan(0);
-    // Check first test
     const firstTest = tests[0];
     if (firstTest) {
       expect(firstTest.data).toBe("Test");
@@ -225,7 +225,7 @@ describe("HTML5lib Tree Construction Tests", () => {
     const tests = parseHTML5libDATFile(doctypeTestData);
     expect(tests.length).toBeGreaterThan(0);
-    // Check first doctype test
     const firstTest = tests[0];
     if (firstTest) {
       expect(firstTest.data).toBe("<!DOCTYPE html>");
@@ -238,7 +238,7 @@ describe("HTML5lib Tree Construction Tests", () => {
     const tests = parseHTML5libDATFile(errorHandlingTestData);
     expect(tests.length).toBeGreaterThan(0);
-    // Check error handling
     const firstTest = tests[0];
     if (firstTest) {
       expect(firstTest.errors.length).toBeGreaterThan(0);

package/tests/official/validator/validator-tests.test.ts CHANGED Viewed

@@ -42,7 +42,7 @@ describe('Validator.nu Tests', () => {
     });
     it('should handle content model violations', () => {
-      // These should parse but may generate warnings in a full validator
       const contentModelHTML = `
         <p>
           <div>Block inside paragraph</div>
@@ -53,7 +53,7 @@ describe('Validator.nu Tests', () => {
       `;
       const document = parseHTML(contentModelHTML);
-      // const ast = parse(tokens);
       expect(document).toBeDefined();
     });
@@ -67,7 +67,7 @@ describe('Validator.nu Tests', () => {
       `;
       const document = parseHTML(obsoleteHTML);
-      // const ast = parse(tokens);
       expect(document).toBeDefined();
     });
@@ -85,7 +85,7 @@ describe('Validator.nu Tests', () => {
       `;
       const document = parseHTML(deprecatedHTML);
-      // const ast = parse(tokens);
       expect(document).toBeDefined();
     });
@@ -114,7 +114,7 @@ describe('Validator.nu Tests', () => {
       `;
       const document = parseHTML(formHTML);
-      // const ast = parse(tokens);
       expect(document).toBeDefined();
     });
@@ -136,7 +136,7 @@ describe('Validator.nu Tests', () => {
       `;
       const document = parseHTML(mediaHTML);
-      // const ast = parse(tokens);
       expect(document).toBeDefined();
     });
@@ -162,7 +162,7 @@ describe('Validator.nu Tests', () => {
       `;
       const document = parseHTML(semanticHTML);
-      // const ast = parse(tokens);
       expect(document).toBeDefined();
     });
@@ -184,7 +184,7 @@ describe('Validator.nu Tests', () => {
       `;
       const document = parseHTML(interactiveHTML);
-      // const ast = parse(tokens);
       expect(document).toBeDefined();
     });
@@ -200,7 +200,7 @@ describe('Validator.nu Tests', () => {
       `;
       const document = parseHTML(unclosedHTML);
-      // const ast = parse(tokens);
       expect(document).toBeDefined();
     });
@@ -215,7 +215,7 @@ describe('Validator.nu Tests', () => {
       `;
       const document = parseHTML(mismatchedHTML);
-      // const ast = parse(tokens);
       expect(document).toBeDefined();
     });
@@ -229,7 +229,7 @@ describe('Validator.nu Tests', () => {
       `;
       const document = parseHTML(invalidNestingHTML);
-      // const ast = parse(tokens);
       expect(document).toBeDefined();
     });

package/tests/official/wpt/wpt-tests.test.ts CHANGED Viewed

@@ -135,10 +135,10 @@ describe('Web Platform Tests (WPT) Compliance', () => {
     it('should handle numeric character references', () => {
       const numericRefs = [
-        '&#65;',   // A
-        '&#x41;',  // A (hex)
-        '&#8364;', // Euro symbol
-        '&#x20AC;' // Euro symbol (hex)
+        '&#65;',
+        '&#x41;',
+        '&#8364;',
+        '&#x20AC;'
       ];
       numericRefs.forEach(ref => {
@@ -404,6 +404,6 @@ describe('WPT Integration Tests', () => {
     expect(ast).toBeDefined();
     expect((ast as any).children?.length).toBeGreaterThan(0);
-    expect(end - start).toBeLessThan(1000); // Should parse within 1 second
+    expect(end - start).toBeLessThan(1000);
   });
 });