@tkeron/html-parser 0.1.4 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -11,7 +11,7 @@ A fast and lightweight HTML parser for Bun that converts HTML strings into DOM D
11
11
  - ðŸŠķ **Lightweight**: Minimal dependencies, native implementation
12
12
  - 🌐 **Standards Compliant**: Returns standard DOM Document objects
13
13
  - 🔧 **TypeScript Support**: Full TypeScript definitions included
14
- - ✅ **Well Tested**: Comprehensive unit test suite (181/181 passing)
14
+ - ✅ **Well Tested**: Comprehensive unit test suite (569 tests passing)
15
15
  - 🔄 **100% Compatible**: Drop-in replacement, same API
16
16
 
17
17
  ## Installation
@@ -21,19 +21,19 @@ A fast and lightweight HTML parser for Bun that converts HTML strings into DOM D
21
21
  Once published, it will be available as:
22
22
 
23
23
  ```bash
24
- npm install html-parser
24
+ npm install @tkeron/html-parser
25
25
  ```
26
26
 
27
27
  Or with Bun:
28
28
 
29
29
  ```bash
30
- bun add html-parser
30
+ bun add @tkeron/html-parser
31
31
  ```
32
32
 
33
33
  ## Usage
34
34
 
35
35
  ```typescript
36
- import { parseHTML } from "html-parser";
36
+ import { parseHTML } from "@tkeron/html-parser";
37
37
 
38
38
  // Parse HTML string into DOM Document
39
39
  const html =
@@ -51,7 +51,7 @@ console.log(heading); // "Hello World"
51
51
  ### Simple Example
52
52
 
53
53
  ```typescript
54
- import { parseHTML } from "html-parser";
54
+ import { parseHTML } from "@tkeron/html-parser";
55
55
 
56
56
  const html = `
57
57
  <div class="container">
@@ -117,4 +117,4 @@ MIT
117
117
 
118
118
  ## Support
119
119
 
120
- If you encounter any issues or have questions, please file an issue on the [GitHub repository](https://github.com/yourusername/html-parser).
120
+ If you encounter any issues or have questions, please file an issue on the [GitHub repository](https://github.com/tkeron/html-parser).
package/bun.lock CHANGED
@@ -5,7 +5,7 @@
5
5
  "": {
6
6
  "name": "@tkeron/html-parser",
7
7
  "devDependencies": {
8
- "@types/bun": "^1.3.4",
8
+ "@types/bun": "^1.3.6",
9
9
  },
10
10
  "peerDependencies": {
11
11
  "typescript": "^5.9.3",
@@ -13,11 +13,11 @@
13
13
  },
14
14
  },
15
15
  "packages": {
16
- "@types/bun": ["@types/bun@1.3.4", "", { "dependencies": { "bun-types": "1.3.4" } }, "sha512-EEPTKXHP+zKGPkhRLv+HI0UEX8/o+65hqARxLy8Ov5rIxMBPNTjeZww00CIihrIQGEQBYg+0roO5qOnS/7boGA=="],
16
+ "@types/bun": ["@types/bun@1.3.6", "", { "dependencies": { "bun-types": "1.3.6" } }, "sha512-uWCv6FO/8LcpREhenN1d1b6fcspAB+cefwD7uti8C8VffIv0Um08TKMn98FynpTiU38+y2dUO55T11NgDt8VAA=="],
17
17
 
18
18
  "@types/node": ["@types/node@25.0.3", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-W609buLVRVmeW693xKfzHeIV6nJGGz98uCPfeXI1ELMLXVeKYZ9m15fAMSaUPBHYLGFsVRcMmSCksQOrZV9BYA=="],
19
19
 
20
- "bun-types": ["bun-types@1.3.4", "", { "dependencies": { "@types/node": "*" } }, "sha512-5ua817+BZPZOlNaRgGBpZJOSAQ9RQ17pkwPD0yR7CfJg+r8DgIILByFifDTa+IPDDxzf5VNhtNlcKqFzDgJvlQ=="],
20
+ "bun-types": ["bun-types@1.3.6", "", { "dependencies": { "@types/node": "*" } }, "sha512-OlFwHcnNV99r//9v5IIOgQ9Uk37gZqrNMCcqEaExdkVq3Avwqok1bJFmvGMCkCE0FqzdY8VMOZpfpR3lwI+CsQ=="],
21
21
 
22
22
  "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],
23
23
 
package/index.ts CHANGED
@@ -4,11 +4,6 @@ import {
4
4
  astToDOM,
5
5
  } from './src/dom-simulator.js';
6
6
 
7
- /**
8
- * Parse HTML string into Document object
9
- * @param html The HTML string to parse
10
- * @returns A Document object
11
- */
12
7
  export function parseHTML(html: string = ""): Document {
13
8
  const tokens = tokenize(html);
14
9
  const ast = parse(tokens);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tkeron/html-parser",
3
- "version": "0.1.4",
3
+ "version": "0.1.7",
4
4
  "description": "A fast and lightweight HTML parser for Bun",
5
5
  "main": "index.js",
6
6
  "module": "index.ts",
@@ -8,16 +8,17 @@
8
8
  "author": "tkeron",
9
9
  "license": "MIT",
10
10
  "devDependencies": {
11
- "@types/bun": "^1.3.4"
11
+ "@types/bun": "^1.3.6"
12
12
  },
13
13
  "peerDependencies": {
14
14
  "typescript": "^5.9.3"
15
15
  },
16
16
  "keywords": [
17
- "cli",
18
- "commands",
19
- "command-line",
20
- "arguments"
17
+ "html",
18
+ "parser",
19
+ "dom",
20
+ "bun",
21
+ "tokenizer"
21
22
  ],
22
23
  "repository": {
23
24
  "url": "git@github.com:tkeron/html-parser.git"
@@ -14,36 +14,47 @@ function parseSelector(selector: string): SelectorGroup[] {
14
14
 
15
15
  return parts.map((part) => {
16
16
  const trimmed = part.trim();
17
- let tokens: SelectorToken[];
18
-
19
- if (trimmed.startsWith("#")) {
20
- tokens = [{ type: "id", value: trimmed.slice(1) }];
21
- } else if (trimmed.startsWith(".")) {
22
- tokens = [{ type: "class", value: trimmed.slice(1) }];
23
- } else if (trimmed.includes("[") && trimmed.includes("]")) {
24
- // Handle attribute selectors like input[type="email"], meta[charset], or [role="button"]
25
- const attributeMatch = trimmed.match(/^([^[\]]*)\[([^=\]]+)(?:=["']?([^"'\]]*?)["']?)?\]$/);
26
- if (attributeMatch) {
27
- const [, tagName, attrName, attrValue] = attributeMatch;
28
- tokens = [];
29
-
30
- // Add tag token if there's a tag name
31
- if (tagName && tagName.trim()) {
32
- tokens.push({ type: "tag", value: tagName.trim().toLowerCase() });
33
- }
34
-
35
- // Add attribute token
36
- tokens.push({
37
- type: "attribute",
38
- value: (attrName || "").trim(),
39
- attributeName: (attrName || "").trim(),
40
- attributeValue: attrValue ? attrValue.trim() : undefined
41
- });
42
- } else {
43
- tokens = [{ type: "tag", value: trimmed.toLowerCase() }];
44
- }
45
- } else {
46
- tokens = [{ type: "tag", value: trimmed.toLowerCase() }];
17
+ let tokens: SelectorToken[] = [];
18
+
19
+ // Handle universal selector
20
+ if (trimmed === '*') {
21
+ // Match any element - we'll handle this specially
22
+ return { tokens: [] };
23
+ }
24
+
25
+ // Parse complex selectors like p#intro.first or .foo.bar.baz
26
+ let remaining = trimmed;
27
+
28
+ // Extract tag name first if present
29
+ const tagMatch = remaining.match(/^([a-zA-Z][a-zA-Z0-9]*)/);
30
+ if (tagMatch) {
31
+ tokens.push({ type: "tag", value: tagMatch[1].toLowerCase() });
32
+ remaining = remaining.slice(tagMatch[1].length);
33
+ }
34
+
35
+ // Extract all IDs (HTML5 allows IDs starting with digits)
36
+ const idMatches = remaining.matchAll(/#([a-zA-Z0-9][a-zA-Z0-9_-]*)/g);
37
+ for (const match of idMatches) {
38
+ tokens.push({ type: "id", value: match[1] });
39
+ }
40
+ remaining = remaining.replace(/#[a-zA-Z0-9][a-zA-Z0-9_-]*/g, '');
41
+
42
+ // Extract all classes
43
+ const classMatches = remaining.matchAll(/\.([a-zA-Z][a-zA-Z0-9_-]*)/g);
44
+ for (const match of classMatches) {
45
+ tokens.push({ type: "class", value: match[1] });
46
+ }
47
+ remaining = remaining.replace(/\.[a-zA-Z][a-zA-Z0-9_-]*/g, '');
48
+
49
+ // Extract attributes
50
+ const attrMatches = remaining.matchAll(/\[([^=\]]+)(?:=["']?([^"'\]]*?)["']?)?\]/g);
51
+ for (const match of attrMatches) {
52
+ tokens.push({
53
+ type: "attribute",
54
+ value: match[1].trim(),
55
+ attributeName: match[1].trim(),
56
+ attributeValue: match[2] ? match[2].trim() : undefined
57
+ });
47
58
  }
48
59
 
49
60
  return { tokens };
@@ -67,11 +78,9 @@ function matchesToken(element: any, token: SelectorToken): boolean {
67
78
  return element.attributes?.id === token.value;
68
79
  case "attribute":
69
80
  const attrValue = element.attributes?.[token.attributeName || ""];
70
- // If no attribute value specified in selector, just check if attribute exists
71
81
  if (token.attributeValue === undefined) {
72
82
  return attrValue !== undefined;
73
83
  }
74
- // Otherwise check for exact match
75
84
  return attrValue === token.attributeValue;
76
85
  default:
77
86
  return false;
@@ -79,6 +88,10 @@ function matchesToken(element: any, token: SelectorToken): boolean {
79
88
  }
80
89
 
81
90
  function matchesSelector(element: any, tokens: SelectorToken[]): boolean {
91
+ // Universal selector - matches any element
92
+ if (tokens.length === 0) {
93
+ return true;
94
+ }
82
95
  return tokens.every((token) => matchesToken(element, token));
83
96
  }
84
97