npm - @tkeron/html-parser - Versions diffs - 0.1.4 → 0.1.7 - Mend

@tkeron/html-parser 0.1.4 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/README.md +6 -6
package/bun.lock +3 -3
package/index.ts +0 -5
package/package.json +7 -6
package/src/css-selector.ts +45 -32
package/src/dom-simulator.ts +243 -46
package/src/parser.ts +0 -39
package/src/tokenizer.ts +0 -116
package/tests/advanced.test.ts +2 -2
package/tests/cloneNode.test.ts +50 -50
package/tests/custom-elements.test.ts +8 -8
package/tests/dom-manipulation.test.ts +638 -0
package/tests/official/acid/acid-tests.test.ts +6 -6
package/tests/official/final-output/final-output.test.ts +15 -15
package/tests/official/html5lib/tokenizer-utils.ts +19 -31
package/tests/official/html5lib/tokenizer.test.ts +4 -4
package/tests/official/html5lib/tree-construction-utils.ts +20 -34
package/tests/official/html5lib/tree-construction.test.ts +5 -5
package/tests/official/validator/validator-tests.test.ts +11 -11
package/tests/official/wpt/wpt-tests.test.ts +5 -5
package/tests/outerHTML-replacement.test.ts +208 -0
package/tests/parser.test.ts +1 -1
package/tests/selectors.test.ts +64 -1
package/tests/test-page-0.txt +12 -355
package/tests/tokenizer.test.ts +86 -0
package/tests/void-elements.test.ts +471 -0
package/tests/api-integration.test.ts +0 -114
package/tests/cloneNode-bug-reproduction.test.ts +0 -325
package/tests/cloneNode-interactive.ts +0 -235
package/tests/dom-adoption.test.ts +0 -363
package/tests/dom-synchronization.test.ts +0 -675
package/tests/setAttribute-outerHTML.test.ts +0 -102

package/README.md CHANGED Viewed

@@ -11,7 +11,7 @@ A fast and lightweight HTML parser for Bun that converts HTML strings into DOM D
 - 🪶 **Lightweight**: Minimal dependencies, native implementation
 - 🌐 **Standards Compliant**: Returns standard DOM Document objects
 - 🔧 **TypeScript Support**: Full TypeScript definitions included
-- ✅ **Well Tested**: Comprehensive unit test suite (181/181 passing)
+- ✅ **Well Tested**: Comprehensive unit test suite (569 tests passing)
 - 🔄 **100% Compatible**: Drop-in replacement, same API
 ## Installation
@@ -21,19 +21,19 @@ A fast and lightweight HTML parser for Bun that converts HTML strings into DOM D
 Once published, it will be available as:
 ```bash
-npm install html-parser
+npm install @tkeron/html-parser
 ```
 Or with Bun:
 ```bash
-bun add html-parser
+bun add @tkeron/html-parser
 ```
 ## Usage
 ```typescript
-import { parseHTML } from "html-parser";
+import { parseHTML } from "@tkeron/html-parser";
 // Parse HTML string into DOM Document
 const html =
@@ -51,7 +51,7 @@ console.log(heading); // "Hello World"
 ### Simple Example
 ```typescript
-import { parseHTML } from "html-parser";
+import { parseHTML } from "@tkeron/html-parser";
 const html = `
   <div class="container">
@@ -117,4 +117,4 @@ MIT
 ## Support
-If you encounter any issues or have questions, please file an issue on the [GitHub repository](https://github.com/yourusername/html-parser).
+If you encounter any issues or have questions, please file an issue on the [GitHub repository](https://github.com/tkeron/html-parser).

package/bun.lock CHANGED Viewed

@@ -5,7 +5,7 @@
     "": {
       "name": "@tkeron/html-parser",
       "devDependencies": {
-        "@types/bun": "^1.3.4",
+        "@types/bun": "^1.3.6",
       },
       "peerDependencies": {
         "typescript": "^5.9.3",
@@ -13,11 +13,11 @@
     },
   },
   "packages": {
-    "@types/bun": ["@types/bun@1.3.4", "", { "dependencies": { "bun-types": "1.3.4" } }, "sha512-EEPTKXHP+zKGPkhRLv+HI0UEX8/o+65hqARxLy8Ov5rIxMBPNTjeZww00CIihrIQGEQBYg+0roO5qOnS/7boGA=="],
+    "@types/bun": ["@types/bun@1.3.6", "", { "dependencies": { "bun-types": "1.3.6" } }, "sha512-uWCv6FO/8LcpREhenN1d1b6fcspAB+cefwD7uti8C8VffIv0Um08TKMn98FynpTiU38+y2dUO55T11NgDt8VAA=="],
     "@types/node": ["@types/node@25.0.3", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-W609buLVRVmeW693xKfzHeIV6nJGGz98uCPfeXI1ELMLXVeKYZ9m15fAMSaUPBHYLGFsVRcMmSCksQOrZV9BYA=="],
-    "bun-types": ["bun-types@1.3.4", "", { "dependencies": { "@types/node": "*" } }, "sha512-5ua817+BZPZOlNaRgGBpZJOSAQ9RQ17pkwPD0yR7CfJg+r8DgIILByFifDTa+IPDDxzf5VNhtNlcKqFzDgJvlQ=="],
+    "bun-types": ["bun-types@1.3.6", "", { "dependencies": { "@types/node": "*" } }, "sha512-OlFwHcnNV99r//9v5IIOgQ9Uk37gZqrNMCcqEaExdkVq3Avwqok1bJFmvGMCkCE0FqzdY8VMOZpfpR3lwI+CsQ=="],
     "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],

package/index.ts CHANGED Viewed

@@ -4,11 +4,6 @@ import {
   astToDOM,
 } from './src/dom-simulator.js';
-/**
- * Parse HTML string into Document object
- * @param html The HTML string to parse
- * @returns A Document object
- */
 export function parseHTML(html: string = ""): Document {
   const tokens = tokenize(html);
   const ast = parse(tokens);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tkeron/html-parser",
-  "version": "0.1.4",
+  "version": "0.1.7",
   "description": "A fast and lightweight HTML parser for Bun",
   "main": "index.js",
   "module": "index.ts",
@@ -8,16 +8,17 @@
   "author": "tkeron",
   "license": "MIT",
   "devDependencies": {
-    "@types/bun": "^1.3.4"
+    "@types/bun": "^1.3.6"
   },
   "peerDependencies": {
     "typescript": "^5.9.3"
   },
   "keywords": [
-    "cli",
-    "commands",
-    "command-line",
-    "arguments"
+    "html",
+    "parser",
+    "dom",
+    "bun",
+    "tokenizer"
   ],
   "repository": {
     "url": "git@github.com:tkeron/html-parser.git"

package/src/css-selector.ts CHANGED Viewed

@@ -14,36 +14,47 @@ function parseSelector(selector: string): SelectorGroup[] {
   return parts.map((part) => {
     const trimmed = part.trim();
-    let tokens: SelectorToken[];
-    if (trimmed.startsWith("#")) {
-      tokens = [{ type: "id", value: trimmed.slice(1) }];
-    } else if (trimmed.startsWith(".")) {
-      tokens = [{ type: "class", value: trimmed.slice(1) }];
-    } else if (trimmed.includes("[") && trimmed.includes("]")) {
-      // Handle attribute selectors like input[type="email"], meta[charset], or [role="button"]
-      const attributeMatch = trimmed.match(/^([^[\]]*)\[([^=\]]+)(?:=["']?([^"'\]]*?)["']?)?\]$/);
-      if (attributeMatch) {
-        const [, tagName, attrName, attrValue] = attributeMatch;
-        tokens = [];
-        // Add tag token if there's a tag name
-        if (tagName && tagName.trim()) {
-          tokens.push({ type: "tag", value: tagName.trim().toLowerCase() });
-        }
-        // Add attribute token
-        tokens.push({
-          type: "attribute",
-          value: (attrName || "").trim(),
-          attributeName: (attrName || "").trim(),
-          attributeValue: attrValue ? attrValue.trim() : undefined
-        });
-      } else {
-        tokens = [{ type: "tag", value: trimmed.toLowerCase() }];
-      }
-    } else {
-      tokens = [{ type: "tag", value: trimmed.toLowerCase() }];
+    let tokens: SelectorToken[] = [];
+    // Handle universal selector
+    if (trimmed === '*') {
+      // Match any element - we'll handle this specially
+      return { tokens: [] };
+    }
+    // Parse complex selectors like p#intro.first or .foo.bar.baz
+    let remaining = trimmed;
+    // Extract tag name first if present
+    const tagMatch = remaining.match(/^([a-zA-Z][a-zA-Z0-9]*)/);
+    if (tagMatch) {
+      tokens.push({ type: "tag", value: tagMatch[1].toLowerCase() });
+      remaining = remaining.slice(tagMatch[1].length);
+    }
+    // Extract all IDs (HTML5 allows IDs starting with digits)
+    const idMatches = remaining.matchAll(/#([a-zA-Z0-9][a-zA-Z0-9_-]*)/g);
+    for (const match of idMatches) {
+      tokens.push({ type: "id", value: match[1] });
+    }
+    remaining = remaining.replace(/#[a-zA-Z0-9][a-zA-Z0-9_-]*/g, '');
+    // Extract all classes
+    const classMatches = remaining.matchAll(/\.([a-zA-Z][a-zA-Z0-9_-]*)/g);
+    for (const match of classMatches) {
+      tokens.push({ type: "class", value: match[1] });
+    }
+    remaining = remaining.replace(/\.[a-zA-Z][a-zA-Z0-9_-]*/g, '');
+    // Extract attributes
+    const attrMatches = remaining.matchAll(/\[([^=\]]+)(?:=["']?([^"'\]]*?)["']?)?\]/g);
+    for (const match of attrMatches) {
+      tokens.push({
+        type: "attribute",
+        value: match[1].trim(),
+        attributeName: match[1].trim(),
+        attributeValue: match[2] ? match[2].trim() : undefined
+      });
     }
     return { tokens };
@@ -67,11 +78,9 @@ function matchesToken(element: any, token: SelectorToken): boolean {
       return element.attributes?.id === token.value;
     case "attribute":
       const attrValue = element.attributes?.[token.attributeName || ""];
-      // If no attribute value specified in selector, just check if attribute exists
       if (token.attributeValue === undefined) {
         return attrValue !== undefined;
       }
-      // Otherwise check for exact match
       return attrValue === token.attributeValue;
     default:
       return false;
@@ -79,6 +88,10 @@ function matchesToken(element: any, token: SelectorToken): boolean {
 }
 function matchesSelector(element: any, tokens: SelectorToken[]): boolean {
+  // Universal selector - matches any element
+  if (tokens.length === 0) {
+    return true;
+  }
   return tokens.every((token) => matchesToken(element, token));
 }