@tkeron/html-parser 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -21,19 +21,19 @@ A fast and lightweight HTML parser for Bun that converts HTML strings into DOM D
21
21
  Once published, it will be available as:
22
22
 
23
23
  ```bash
24
- npm install html-parser
24
+ npm install @tkeron/html-parser
25
25
  ```
26
26
 
27
27
  Or with Bun:
28
28
 
29
29
  ```bash
30
- bun add html-parser
30
+ bun add @tkeron/html-parser
31
31
  ```
32
32
 
33
33
  ## Usage
34
34
 
35
35
  ```typescript
36
- import { parseHTML } from "html-parser";
36
+ import { parseHTML } from "@tkeron/html-parser";
37
37
 
38
38
  // Parse HTML string into DOM Document
39
39
  const html =
@@ -51,7 +51,7 @@ console.log(heading); // "Hello World"
51
51
  ### Simple Example
52
52
 
53
53
  ```typescript
54
- import { parseHTML } from "html-parser";
54
+ import { parseHTML } from "@tkeron/html-parser";
55
55
 
56
56
  const html = `
57
57
  <div class="container">
@@ -117,4 +117,4 @@ MIT
117
117
 
118
118
  ## Support
119
119
 
120
- If you encounter any issues or have questions, please file an issue on the [GitHub repository](https://github.com/yourusername/html-parser).
120
+ If you encounter any issues or have questions, please file an issue on the [GitHub repository](https://github.com/tkeron/html-parser).
package/index.ts CHANGED
@@ -4,11 +4,6 @@ import {
4
4
  astToDOM,
5
5
  } from './src/dom-simulator.js';
6
6
 
7
- /**
8
- * Parse HTML string into Document object
9
- * @param html The HTML string to parse
10
- * @returns A Document object
11
- */
12
7
  export function parseHTML(html: string = ""): Document {
13
8
  const tokens = tokenize(html);
14
9
  const ast = parse(tokens);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tkeron/html-parser",
3
- "version": "0.1.4",
3
+ "version": "0.1.5",
4
4
  "description": "A fast and lightweight HTML parser for Bun",
5
5
  "main": "index.js",
6
6
  "module": "index.ts",
@@ -21,18 +21,15 @@ function parseSelector(selector: string): SelectorGroup[] {
21
21
  } else if (trimmed.startsWith(".")) {
22
22
  tokens = [{ type: "class", value: trimmed.slice(1) }];
23
23
  } else if (trimmed.includes("[") && trimmed.includes("]")) {
24
- // Handle attribute selectors like input[type="email"], meta[charset], or [role="button"]
25
24
  const attributeMatch = trimmed.match(/^([^[\]]*)\[([^=\]]+)(?:=["']?([^"'\]]*?)["']?)?\]$/);
26
25
  if (attributeMatch) {
27
26
  const [, tagName, attrName, attrValue] = attributeMatch;
28
27
  tokens = [];
29
28
 
30
- // Add tag token if there's a tag name
31
29
  if (tagName && tagName.trim()) {
32
30
  tokens.push({ type: "tag", value: tagName.trim().toLowerCase() });
33
31
  }
34
32
 
35
- // Add attribute token
36
33
  tokens.push({
37
34
  type: "attribute",
38
35
  value: (attrName || "").trim(),
@@ -67,11 +64,9 @@ function matchesToken(element: any, token: SelectorToken): boolean {
67
64
  return element.attributes?.id === token.value;
68
65
  case "attribute":
69
66
  const attrValue = element.attributes?.[token.attributeName || ""];
70
- // If no attribute value specified in selector, just check if attribute exists
71
67
  if (token.attributeValue === undefined) {
72
68
  return attrValue !== undefined;
73
69
  }
74
- // Otherwise check for exact match
75
70
  return attrValue === token.attributeValue;
76
71
  default:
77
72
  return false;
@@ -22,7 +22,7 @@ export function createElement(
22
22
  ): any {
23
23
  const innerHTML = "";
24
24
  const tagNameLower = tagName.toLowerCase();
25
- const outerHTML = `<${tagNameLower}${Object.entries(attributes)
25
+ const initialOuterHTML = `<${tagNameLower}${Object.entries(attributes)
26
26
  .map(([k, v]) => ` ${k}="${v}"`)
27
27
  .join("")}></${tagNameLower}>`;
28
28
  const textContent = "";
@@ -37,7 +37,7 @@ export function createElement(
37
37
  children: [],
38
38
  textContent,
39
39
  innerHTML,
40
- outerHTML,
40
+ _internalOuterHTML: initialOuterHTML,
41
41
  parentNode: null,
42
42
  parentElement: null,
43
43
  firstChild: null,
@@ -123,7 +123,6 @@ export function createElement(
123
123
  configurable: true,
124
124
  });
125
125
 
126
- // Add className property
127
126
  Object.defineProperty(element, "className", {
128
127
  get() {
129
128
  return element.attributes.class || "";
@@ -135,7 +134,6 @@ export function createElement(
135
134
  configurable: true,
136
135
  });
137
136
 
138
- // Add id property
139
137
  Object.defineProperty(element, "id", {
140
138
  get() {
141
139
  return element.attributes.id || "";
@@ -147,6 +145,17 @@ export function createElement(
147
145
  configurable: true,
148
146
  });
149
147
 
148
+ Object.defineProperty(element, "outerHTML", {
149
+ get() {
150
+ return element._internalOuterHTML || "";
151
+ },
152
+ set(value: string) {
153
+ setOuterHTML(element, value);
154
+ },
155
+ enumerable: true,
156
+ configurable: true,
157
+ });
158
+
150
159
  return element;
151
160
  }
152
161
 
@@ -326,8 +335,6 @@ function convertASTNodeToDOM(astNode: ASTNode): any {
326
335
  }
327
336
 
328
337
  function appendChild(parent: any, child: any): void {
329
- // Check for hierarchy request error: prevent circular references
330
- // Check if parent is a descendant of child
331
338
  if (child.nodeType === NodeType.ELEMENT_NODE || child.nodeType === NodeType.DOCUMENT_NODE) {
332
339
  let ancestor = parent;
333
340
  while (ancestor) {
@@ -338,7 +345,6 @@ function appendChild(parent: any, child: any): void {
338
345
  }
339
346
  }
340
347
 
341
- // Remove child from its current parent if it has one
342
348
  if (child.parentNode) {
343
349
  removeChild(child.parentNode, child);
344
350
  }
@@ -411,7 +417,6 @@ function removeChild(parent: any, child: any): any {
411
417
  parent.lastChild = child.previousSibling;
412
418
  }
413
419
 
414
- // Only handle element-specific relationships if parent is an element
415
420
  if (parent.nodeType === NodeType.ELEMENT_NODE && child.nodeType === NodeType.ELEMENT_NODE) {
416
421
  const childElement = child;
417
422
  const elemIndex = parent.children.indexOf(childElement);
@@ -454,19 +459,16 @@ function removeChild(parent: any, child: any): any {
454
459
  }
455
460
 
456
461
  function insertBefore(parent: any, newNode: any, referenceNode: any): any {
457
- // If referenceNode is null, append to the end
458
462
  if (referenceNode === null) {
459
463
  appendChild(parent, newNode);
460
464
  return newNode;
461
465
  }
462
466
 
463
- // Verify referenceNode is actually a child of parent
464
467
  const refIndex = parent.childNodes.indexOf(referenceNode);
465
468
  if (refIndex === -1) {
466
469
  throw new Error("Reference node is not a child of this node");
467
470
  }
468
471
 
469
- // Check for hierarchy request error: prevent circular references
470
472
  if (newNode.nodeType === NodeType.ELEMENT_NODE || newNode.nodeType === NodeType.DOCUMENT_NODE) {
471
473
  let ancestor = parent;
472
474
  while (ancestor) {
@@ -477,16 +479,13 @@ function insertBefore(parent: any, newNode: any, referenceNode: any): any {
477
479
  }
478
480
  }
479
481
 
480
- // Remove newNode from its current parent if it has one
481
482
  if (newNode.parentNode) {
482
483
  removeChild(newNode.parentNode, newNode);
483
484
  }
484
485
 
485
- // Insert into childNodes
486
486
  parent.childNodes.splice(refIndex, 0, newNode);
487
487
  newNode.parentNode = parent;
488
488
 
489
- // Update sibling relationships for all nodes
490
489
  newNode.previousSibling = referenceNode.previousSibling;
491
490
  newNode.nextSibling = referenceNode;
492
491
 
@@ -495,12 +494,10 @@ function insertBefore(parent: any, newNode: any, referenceNode: any): any {
495
494
  }
496
495
  referenceNode.previousSibling = newNode;
497
496
 
498
- // Update firstChild if inserting at the beginning
499
497
  if (parent.firstChild === referenceNode) {
500
498
  parent.firstChild = newNode;
501
499
  }
502
500
 
503
- // Handle element-specific relationships
504
501
  if (
505
502
  parent.nodeType === NodeType.ELEMENT_NODE &&
506
503
  newNode.nodeType === NodeType.ELEMENT_NODE
@@ -510,12 +507,10 @@ function insertBefore(parent: any, newNode: any, referenceNode: any): any {
510
507
 
511
508
  newElement.parentElement = parentElement;
512
509
 
513
- // Find the reference node in the children array
514
510
  let refElementIndex = -1;
515
511
  if (referenceNode.nodeType === NodeType.ELEMENT_NODE) {
516
512
  refElementIndex = parentElement.children.indexOf(referenceNode);
517
513
  } else {
518
- // Find the next element sibling
519
514
  let nextElement = referenceNode.nextSibling;
520
515
  while (nextElement && nextElement.nodeType !== NodeType.ELEMENT_NODE) {
521
516
  nextElement = nextElement.nextSibling;
@@ -526,14 +521,11 @@ function insertBefore(parent: any, newNode: any, referenceNode: any): any {
526
521
  }
527
522
 
528
523
  if (refElementIndex === -1) {
529
- // No element siblings after, append to children
530
524
  parentElement.children.push(newElement);
531
525
  } else {
532
- // Insert before the reference element
533
526
  parentElement.children.splice(refElementIndex, 0, newElement);
534
527
  }
535
528
 
536
- // Update element sibling relationships
537
529
  const newElemIndex = parentElement.children.indexOf(newElement);
538
530
  newElement.previousElementSibling =
539
531
  newElemIndex > 0 ? parentElement.children[newElemIndex - 1] : null;
@@ -549,12 +541,9 @@ function insertBefore(parent: any, newNode: any, referenceNode: any): any {
549
541
  newElement.nextElementSibling.previousElementSibling = newElement;
550
542
  }
551
543
 
552
- // Update firstElementChild if needed
553
544
  if (newElemIndex === 0) {
554
545
  parentElement.firstElementChild = newElement;
555
546
  }
556
-
557
- // lastElementChild is not affected since we're inserting before
558
547
  }
559
548
 
560
549
  if (parent.nodeType === NodeType.ELEMENT_NODE) {
@@ -565,13 +554,11 @@ function insertBefore(parent: any, newNode: any, referenceNode: any): any {
565
554
  }
566
555
 
567
556
  function replaceChild(parent: any, newChild: any, oldChild: any): any {
568
- // Verify oldChild is actually a child of parent
569
557
  const oldIndex = parent.childNodes.indexOf(oldChild);
570
558
  if (oldIndex === -1) {
571
559
  throw new Error("Old child is not a child of this node");
572
560
  }
573
561
 
574
- // Check for hierarchy request error: prevent circular references
575
562
  if (newChild.nodeType === NodeType.ELEMENT_NODE || newChild.nodeType === NodeType.DOCUMENT_NODE) {
576
563
  let ancestor = parent;
577
564
  while (ancestor) {
@@ -582,16 +569,13 @@ function replaceChild(parent: any, newChild: any, oldChild: any): any {
582
569
  }
583
570
  }
584
571
 
585
- // Remove newChild from its current parent if it has one
586
572
  if (newChild.parentNode) {
587
573
  removeChild(newChild.parentNode, newChild);
588
574
  }
589
575
 
590
- // Replace in childNodes array
591
576
  parent.childNodes[oldIndex] = newChild;
592
577
  newChild.parentNode = parent;
593
578
 
594
- // Transfer sibling relationships
595
579
  newChild.previousSibling = oldChild.previousSibling;
596
580
  newChild.nextSibling = oldChild.nextSibling;
597
581
 
@@ -602,7 +586,6 @@ function replaceChild(parent: any, newChild: any, oldChild: any): any {
602
586
  oldChild.nextSibling.previousSibling = newChild;
603
587
  }
604
588
 
605
- // Update first/last child if needed
606
589
  if (parent.firstChild === oldChild) {
607
590
  parent.firstChild = newChild;
608
591
  }
@@ -610,20 +593,16 @@ function replaceChild(parent: any, newChild: any, oldChild: any): any {
610
593
  parent.lastChild = newChild;
611
594
  }
612
595
 
613
- // Handle element-specific relationships
614
596
  if (parent.nodeType === NodeType.ELEMENT_NODE) {
615
597
  const parentElement = parent;
616
598
 
617
- // Remove old element from children if it's an element
618
599
  if (oldChild.nodeType === NodeType.ELEMENT_NODE) {
619
600
  const oldElemIndex = parentElement.children.indexOf(oldChild);
620
601
  if (oldElemIndex !== -1) {
621
602
  if (newChild.nodeType === NodeType.ELEMENT_NODE) {
622
- // Replace with new element
623
603
  parentElement.children[oldElemIndex] = newChild;
624
604
  newChild.parentElement = parentElement;
625
605
 
626
- // Transfer element sibling relationships
627
606
  newChild.previousElementSibling = oldChild.previousElementSibling;
628
607
  newChild.nextElementSibling = oldChild.nextElementSibling;
629
608
 
@@ -641,7 +620,6 @@ function replaceChild(parent: any, newChild: any, oldChild: any): any {
641
620
  parentElement.lastElementChild = newChild;
642
621
  }
643
622
  } else {
644
- // Replacing element with non-element, remove from children
645
623
  parentElement.children.splice(oldElemIndex, 1);
646
624
 
647
625
  if (oldChild.previousElementSibling) {
@@ -662,11 +640,9 @@ function replaceChild(parent: any, newChild: any, oldChild: any): any {
662
640
  }
663
641
  }
664
642
  } else if (newChild.nodeType === NodeType.ELEMENT_NODE) {
665
- // Replacing non-element with element, need to insert into children array
666
643
  const newElement = newChild;
667
644
  newElement.parentElement = parentElement;
668
645
 
669
- // Find correct position in children array
670
646
  let insertIndex = 0;
671
647
  for (let i = 0; i < oldIndex; i++) {
672
648
  if (parent.childNodes[i].nodeType === NodeType.ELEMENT_NODE) {
@@ -676,7 +652,6 @@ function replaceChild(parent: any, newChild: any, oldChild: any): any {
676
652
 
677
653
  parentElement.children.splice(insertIndex, 0, newElement);
678
654
 
679
- // Update element sibling relationships
680
655
  newElement.previousElementSibling =
681
656
  insertIndex > 0 ? parentElement.children[insertIndex - 1] : null;
682
657
  newElement.nextElementSibling =
@@ -700,7 +675,6 @@ function replaceChild(parent: any, newChild: any, oldChild: any): any {
700
675
  }
701
676
  }
702
677
 
703
- // Clear oldChild's relationships
704
678
  oldChild.parentNode = null;
705
679
  if (oldChild.nodeType === NodeType.ELEMENT_NODE) {
706
680
  oldChild.parentElement = null;
@@ -720,19 +694,16 @@ function replaceChild(parent: any, newChild: any, oldChild: any): any {
720
694
  }
721
695
 
722
696
  function insertAfter(parent: any, newNode: any, referenceNode: any): any {
723
- // If referenceNode is null, insert at the beginning
724
697
  if (referenceNode === null) {
725
698
  insertBefore(parent, newNode, parent.firstChild);
726
699
  return newNode;
727
700
  }
728
701
 
729
- // Verify referenceNode is actually a child of parent
730
702
  const refIndex = parent.childNodes.indexOf(referenceNode);
731
703
  if (refIndex === -1) {
732
704
  throw new Error("Reference node is not a child of this node");
733
705
  }
734
706
 
735
- // Insert after means insert before the next sibling
736
707
  const nextSibling = referenceNode.nextSibling;
737
708
  return insertBefore(parent, newNode, nextSibling);
738
709
  }
@@ -762,7 +733,13 @@ function updateElementContent(element: any): void {
762
733
  .map(([k, v]) => ` ${k}="${v}"`)
763
734
  .join("");
764
735
  const tagNameLower = element.tagName.toLowerCase();
765
- element.outerHTML = `<${tagNameLower}${attrs}>${innerHTML}</${tagNameLower}>`;
736
+
737
+ Object.defineProperty(element, "_internalOuterHTML", {
738
+ value: `<${tagNameLower}${attrs}>${innerHTML}</${tagNameLower}>`,
739
+ writable: true,
740
+ enumerable: false,
741
+ configurable: true,
742
+ });
766
743
 
767
744
  const computedTextContent = getTextContent(element);
768
745
  Object.defineProperty(element, "_internalTextContent", {
@@ -772,7 +749,6 @@ function updateElementContent(element: any): void {
772
749
  configurable: true,
773
750
  });
774
751
 
775
- // Propagate changes up to parent elements
776
752
  if (element.parentElement) {
777
753
  updateElementContent(element.parentElement);
778
754
  }
@@ -854,7 +830,108 @@ export function setInnerHTML(element: any, html: string): void {
854
830
  .map(([k, v]) => ` ${k}="${v}"`)
855
831
  .join("");
856
832
  const tagNameLower = element.tagName.toLowerCase();
857
- element.outerHTML = `<${tagNameLower}${attrs}>${actualInnerHTML}</${tagNameLower}>`;
833
+
834
+ Object.defineProperty(element, "_internalOuterHTML", {
835
+ value: `<${tagNameLower}${attrs}>${actualInnerHTML}</${tagNameLower}>`,
836
+ writable: true,
837
+ enumerable: false,
838
+ configurable: true,
839
+ });
840
+ }
841
+
842
+ export function setOuterHTML(element: any, html: string): void {
843
+ if (!element.parentNode) {
844
+ throw new Error("Cannot set outerHTML on element without a parent");
845
+ }
846
+
847
+ const parent = element.parentNode;
848
+ const indexInParent = parent.childNodes.indexOf(element);
849
+
850
+ if (indexInParent === -1) {
851
+ throw new Error("Element not found in parent's childNodes");
852
+ }
853
+
854
+ let newNodes: any[] = [];
855
+
856
+ if (html.trim()) {
857
+ const tokens = tokenize(html);
858
+ const ast = parse(tokens);
859
+
860
+ if (ast.children) {
861
+ for (const child of ast.children) {
862
+ const domChild = convertASTNodeToDOM(child);
863
+ if (domChild) {
864
+ newNodes.push(domChild);
865
+ }
866
+ }
867
+ }
868
+ }
869
+
870
+ const previousSibling = element.previousSibling;
871
+ const nextSibling = element.nextSibling;
872
+
873
+ parent.childNodes.splice(indexInParent, 1);
874
+
875
+ if (newNodes.length > 0) {
876
+ parent.childNodes.splice(indexInParent, 0, ...newNodes);
877
+
878
+ for (const newNode of newNodes) {
879
+ newNode.parentNode = parent;
880
+ newNode.parentElement = parent.nodeType === NodeType.ELEMENT_NODE ? parent : null;
881
+ }
882
+
883
+ for (let i = 0; i < newNodes.length; i++) {
884
+ const currentNode = newNodes[i];
885
+
886
+ if (i === 0) {
887
+ currentNode.previousSibling = previousSibling;
888
+ if (previousSibling) {
889
+ previousSibling.nextSibling = currentNode;
890
+ }
891
+ } else {
892
+ currentNode.previousSibling = newNodes[i - 1];
893
+ }
894
+
895
+ if (i === newNodes.length - 1) {
896
+ currentNode.nextSibling = nextSibling;
897
+ if (nextSibling) {
898
+ nextSibling.previousSibling = currentNode;
899
+ }
900
+ } else {
901
+ currentNode.nextSibling = newNodes[i + 1];
902
+ }
903
+ }
904
+ } else {
905
+ if (previousSibling) {
906
+ previousSibling.nextSibling = nextSibling;
907
+ }
908
+ if (nextSibling) {
909
+ nextSibling.previousSibling = previousSibling;
910
+ }
911
+ }
912
+
913
+ element.parentNode = null;
914
+ element.parentElement = null;
915
+ element.previousSibling = null;
916
+ element.nextSibling = null;
917
+
918
+ parent.children = parent.childNodes.filter(
919
+ (child: any) => child.nodeType === NodeType.ELEMENT_NODE
920
+ );
921
+
922
+ parent.firstChild = parent.childNodes.length > 0 ? parent.childNodes[0] : null;
923
+ parent.lastChild = parent.childNodes.length > 0 ? parent.childNodes[parent.childNodes.length - 1] : null;
924
+
925
+ parent.firstElementChild = parent.children.length > 0 ? parent.children[0] : null;
926
+ parent.lastElementChild = parent.children.length > 0 ? parent.children[parent.children.length - 1] : null;
927
+
928
+ for (let i = 0; i < parent.children.length; i++) {
929
+ const child = parent.children[i];
930
+ child.previousElementSibling = i > 0 ? parent.children[i - 1] : null;
931
+ child.nextElementSibling = i < parent.children.length - 1 ? parent.children[i + 1] : null;
932
+ }
933
+
934
+ updateElementContent(parent);
858
935
  }
859
936
 
860
937
  function setTextContent(element: any, text: string): void {
package/src/tokenizer.ts CHANGED
@@ -44,9 +44,6 @@ const HTML_ENTITIES: Record<string, string> = {
44
44
  '&not;': '¬'
45
45
  };
46
46
 
47
- /**
48
- * Decode HTML entities in a string and handle null characters
49
- */
50
47
  function decodeEntities(text: string): string {
51
48
  let result = text.replace(/\u0000/g, '\uFFFD');
52
49
 
@@ -78,9 +75,6 @@ function decodeEntities(text: string): string {
78
75
  });
79
76
  }
80
77
 
81
- /**
82
- * Parse attributes from a tag string
83
- */
84
78
  function parseAttributes(attributeString: string): Record<string, string> {
85
79
  const attributes: Record<string, string> = {};
86
80
 
@@ -98,9 +92,6 @@ function parseAttributes(attributeString: string): Record<string, string> {
98
92
  return attributes;
99
93
  }
100
94
 
101
- /**
102
- * Calculate position in text
103
- */
104
95
  function calculatePosition(text: string, offset: number): Position {
105
96
  const lines = text.slice(0, offset).split('\n');
106
97
  return {
@@ -110,10 +101,6 @@ function calculatePosition(text: string, offset: number): Position {
110
101
  };
111
102
  }
112
103
 
113
- /**
114
- * Tokenize HTML using a combination of HTMLRewriter and manual parsing
115
- * HTMLRewriter is great for structured HTML but we need manual parsing for edge cases
116
- */
117
104
  export function tokenize(html: string): Token[] {
118
105
  const tokens: Token[] = [];
119
106
  let position = 0;
@@ -254,10 +241,8 @@ export function tokenize(html: string): Token[] {
254
241
  }
255
242
  }
256
243
 
257
- // Sort tokens by position
258
244
  tokens.sort((a, b) => a.position.offset - b.position.offset);
259
245
 
260
- // Add EOF token
261
246
  tokens.push({
262
247
  type: TokenType.EOF,
263
248
  value: '',
@@ -285,7 +270,6 @@ export function tokenizeWithRewriter(html: string): Token[] {
285
270
  textBuffer = '';
286
271
  }
287
272
 
288
- // Add opening tag
289
273
  const attributes: Record<string, string> = {};
290
274
  for (const [name, value] of element.attributes) {
291
275
  attributes[name] = value;
@@ -325,14 +309,12 @@ export function tokenizeWithRewriter(html: string): Token[] {
325
309
  });
326
310
 
327
311
  try {
328
- // Transform the HTML (this triggers the rewriter)
329
312
  const response = new Response(html, {
330
313
  headers: { 'Content-Type': 'text/html' }
331
314
  });
332
315
 
333
316
  rewriter.transform(response);
334
317
 
335
- // Flush any remaining text
336
318
  if (textBuffer.trim()) {
337
319
  tokens.push({
338
320
  type: TokenType.TEXT,
@@ -342,12 +324,10 @@ export function tokenizeWithRewriter(html: string): Token[] {
342
324
  }
343
325
 
344
326
  } catch (error) {
345
- // If HTMLRewriter fails, fall back to manual parsing
346
327
  console.warn('HTMLRewriter failed, falling back to manual parsing:', error);
347
328
  return tokenize(html);
348
329
  }
349
330
 
350
- // Sort tokens by position and add EOF
351
331
  tokens.sort((a, b) => a.position.offset - b.position.offset);
352
332
  tokens.push({
353
333
  type: TokenType.EOF,
@@ -28,14 +28,14 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
28
28
  });
29
29
 
30
30
  test('should handle unicode characters', () => {
31
- const tokens = tokenize('<div title="测试" data-emoji="🚀" class="café">');
31
+ const tokens = tokenize('<div title="测试" data-emoji="🚀" class="lorem">');
32
32
  expect(tokens.length).toBeGreaterThan(0);
33
33
  const tag = tokens[0]!;
34
34
 
35
35
  expect(tag.attributes).toEqual({
36
36
  title: '测试',
37
37
  'data-emoji': '🚀',
38
- class: 'café'
38
+ class: 'lorem'
39
39
  });
40
40
  });
41
41