npm - @tkeron/html-parser - Versions diffs - 1.1.2 → 1.3.0 - Mend

@tkeron/html-parser 1.1.2 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (131) hide show

package/.github/workflows/npm_deploy.yml +14 -4
package/README.md +6 -6
package/bun.lock +6 -8
package/check-versions.ts +147 -0
package/index.ts +4 -8
package/package.json +5 -6
package/src/dom-simulator/append-child.ts +130 -0
package/src/dom-simulator/append.ts +18 -0
package/src/dom-simulator/attributes.ts +23 -0
package/src/dom-simulator/clone-node.ts +51 -0
package/src/dom-simulator/convert-ast-node-to-dom.ts +37 -0
package/src/dom-simulator/create-cdata.ts +18 -0
package/src/dom-simulator/create-comment.ts +23 -0
package/src/dom-simulator/create-doctype.ts +24 -0
package/src/dom-simulator/create-document.ts +81 -0
package/src/dom-simulator/create-element.ts +195 -0
package/src/dom-simulator/create-processing-instruction.ts +19 -0
package/src/dom-simulator/create-temp-parent.ts +9 -0
package/src/dom-simulator/create-text-node.ts +23 -0
package/src/dom-simulator/escape-text-content.ts +6 -0
package/src/dom-simulator/find-special-elements.ts +14 -0
package/src/dom-simulator/get-text-content.ts +18 -0
package/src/dom-simulator/index.ts +36 -0
package/src/dom-simulator/inner-outer-html.ts +182 -0
package/src/dom-simulator/insert-after.ts +20 -0
package/src/dom-simulator/insert-before.ts +108 -0
package/src/dom-simulator/matches.ts +26 -0
package/src/dom-simulator/node-types.ts +26 -0
package/src/dom-simulator/prepend.ts +24 -0
package/src/dom-simulator/remove-child.ts +68 -0
package/src/dom-simulator/remove.ts +7 -0
package/src/dom-simulator/replace-child.ts +152 -0
package/src/dom-simulator/set-text-content.ts +33 -0
package/src/dom-simulator/update-element-content.ts +56 -0
package/src/dom-simulator.ts +12 -1126
package/src/encoding/constants.ts +8 -0
package/src/encoding/detect-encoding.ts +21 -0
package/src/encoding/index.ts +1 -0
package/src/encoding/normalize-encoding.ts +6 -0
package/src/html-entities.ts +2127 -0
package/src/index.ts +5 -5
package/src/parser/adoption-agency-helpers.ts +145 -0
package/src/parser/constants.ts +137 -0
package/src/parser/dom-to-ast.ts +79 -0
package/src/parser/index.ts +9 -0
package/src/parser/parse.ts +772 -0
package/src/parser/types.ts +56 -0
package/src/selectors/find-elements-descendant.ts +47 -0
package/src/selectors/index.ts +2 -0
package/src/selectors/matches-selector.ts +12 -0
package/src/selectors/matches-token.ts +27 -0
package/src/selectors/parse-selector.ts +48 -0
package/src/selectors/query-selector-all.ts +43 -0
package/src/selectors/query-selector.ts +6 -0
package/src/selectors/types.ts +10 -0
package/src/serializer/attributes.ts +74 -0
package/src/serializer/escape.ts +13 -0
package/src/serializer/index.ts +1 -0
package/src/serializer/serialize-tokens.ts +511 -0
package/src/tokenizer/calculate-position.ts +10 -0
package/src/tokenizer/constants.ts +11 -0
package/src/tokenizer/decode-entities.ts +64 -0
package/src/tokenizer/index.ts +2 -0
package/src/tokenizer/parse-attributes.ts +74 -0
package/src/tokenizer/tokenize.ts +165 -0
package/src/tokenizer/types.ts +25 -0
package/tests/adoption-agency-helpers.test.ts +304 -0
package/tests/advanced.test.ts +242 -221
package/tests/cloneNode.test.ts +19 -66
package/tests/custom-elements-head.test.ts +54 -55
package/tests/dom-extended.test.ts +77 -64
package/tests/dom-manipulation.test.ts +51 -24
package/tests/dom.test.ts +15 -13
package/tests/encoding/detect-encoding.test.ts +33 -0
package/tests/google-dom.test.ts +2 -2
package/tests/helpers/tokenizer-adapter.test.ts +29 -43
package/tests/helpers/tokenizer-adapter.ts +36 -33
package/tests/helpers/tree-adapter.test.ts +20 -20
package/tests/helpers/tree-adapter.ts +34 -24
package/tests/html-entities-text.test.ts +6 -2
package/tests/innerhtml-void-elements.test.ts +52 -36
package/tests/outerHTML-replacement.test.ts +37 -65
package/tests/parser/dom-to-ast.test.ts +109 -0
package/tests/parser/parse.test.ts +139 -0
package/tests/parser.test.ts +281 -217
package/tests/selectors/query-selector-all.test.ts +39 -0
package/tests/selectors/query-selector.test.ts +42 -0
package/tests/serializer/attributes.test.ts +132 -0
package/tests/serializer/escape.test.ts +51 -0
package/tests/serializer/serialize-tokens.test.ts +80 -0
package/tests/serializer-core.test.ts +6 -6
package/tests/serializer-injectmeta.test.ts +6 -6
package/tests/serializer-optionaltags.test.ts +9 -6
package/tests/serializer-options.test.ts +6 -6
package/tests/serializer-whitespace.test.ts +6 -6
package/tests/tokenizer/calculate-position.test.ts +34 -0
package/tests/tokenizer/decode-entities.test.ts +31 -0
package/tests/tokenizer/parse-attributes.test.ts +44 -0
package/tests/tokenizer/tokenize.test.ts +757 -0
package/tests/tokenizer-namedEntities.test.ts +10 -7
package/tests/tokenizer-pendingSpecChanges.test.ts +10 -7
package/tests/tokenizer.test.ts +268 -256
package/tests/tree-construction-adoption01.test.ts +25 -16
package/tests/tree-construction-adoption02.test.ts +30 -19
package/tests/tree-construction-domjs-unsafe.test.ts +6 -4
package/tests/tree-construction-entities02.test.ts +18 -16
package/tests/tree-construction-html5test-com.test.ts +16 -10
package/tests/tree-construction-math.test.ts +11 -9
package/tests/tree-construction-namespace-sensitivity.test.ts +11 -9
package/tests/tree-construction-noscript01.test.ts +11 -9
package/tests/tree-construction-ruby.test.ts +6 -4
package/tests/tree-construction-scriptdata01.test.ts +6 -4
package/tests/tree-construction-svg.test.ts +6 -4
package/tests/tree-construction-template.test.ts +6 -4
package/tests/tree-construction-tests10.test.ts +6 -4
package/tests/tree-construction-tests11.test.ts +6 -4
package/tests/tree-construction-tests20.test.ts +7 -4
package/tests/tree-construction-tests21.test.ts +7 -4
package/tests/tree-construction-tests23.test.ts +7 -4
package/tests/tree-construction-tests24.test.ts +7 -4
package/tests/tree-construction-tests5.test.ts +6 -5
package/tests/tree-construction-tests6.test.ts +6 -5
package/tests/tree-construction-tests_innerHTML_1.test.ts +6 -5
package/tests/void-elements.test.ts +85 -40
package/tsconfig.json +1 -1
package/src/css-selector.ts +0 -185
package/src/encoding.ts +0 -39
package/src/parser.ts +0 -682
package/src/serializer.ts +0 -450
package/src/tokenizer.ts +0 -325
package/tests/selectors.test.ts +0 -128

package/tests/tokenizer.test.ts CHANGED Viewed

@@ -1,252 +1,251 @@
-import { expect, it, describe } from 'bun:test';
-import {
-  tokenize,
-  TokenType,
-  type Token
-} from '../src/tokenizer';
+import { expect, it, describe } from "bun:test";
+import { tokenize, TokenType } from "../src/tokenizer/index.js";
-describe('HTML Tokenizer', () => {
-  describe('Basic Tags', () => {
-    it('should tokenize simple opening tag', () => {
-      const tokens = tokenize('<div>');
+describe("HTML Tokenizer", () => {
+  describe("Basic Tags", () => {
+    it("should tokenize simple opening tag", () => {
+      const tokens = tokenize("<div>");
       expect(tokens).toHaveLength(2);
       expect(tokens[0]!).toEqual({
         type: TokenType.TAG_OPEN,
-        value: 'div',
+        value: "div",
         position: expect.any(Object),
         attributes: {},
-        isSelfClosing: false
+        isSelfClosing: false,
       });
       expect(tokens[1]!.type).toBe(TokenType.EOF);
     });
-    it('should tokenize simple closing tag', () => {
-      const tokens = tokenize('</div>');
+    it("should tokenize simple closing tag", () => {
+      const tokens = tokenize("</div>");
       expect(tokens).toHaveLength(2);
       expect(tokens[0]!).toEqual({
         type: TokenType.TAG_CLOSE,
-        value: 'div',
+        value: "div",
         position: expect.any(Object),
-        isClosing: true
+        isClosing: true,
       });
     });
-    it('should tokenize self-closing tag', () => {
-      const tokens = tokenize('<img/>');
+    it("should tokenize self-closing tag", () => {
+      const tokens = tokenize("<img/>");
       expect(tokens).toHaveLength(2);
       expect(tokens[0]!).toEqual({
         type: TokenType.TAG_OPEN,
-        value: 'img',
+        value: "img",
         position: expect.any(Object),
         attributes: {},
-        isSelfClosing: true
+        isSelfClosing: true,
       });
     });
-    it('should handle case insensitive tag names', () => {
-      const tokens = tokenize('<DIV></DIV>');
+    it("should handle case insensitive tag names", () => {
+      const tokens = tokenize("<DIV></DIV>");
-      expect(tokens[0]!.value).toBe('div');
-      expect(tokens[1]!.value).toBe('div');
+      expect(tokens[0]!.value).toBe("div");
+      expect(tokens[1]!.value).toBe("div");
     });
   });
-  describe('Attributes', () => {
-    it('should parse attributes with double quotes', () => {
+  describe("Attributes", () => {
+    it("should parse attributes with double quotes", () => {
       const tokens = tokenize('<div class="container" id="main">');
       expect(tokens[0]?.attributes).toEqual({
-        class: 'container',
-        id: 'main'
+        class: "container",
+        id: "main",
       });
     });
-    it('should parse attributes with single quotes', () => {
+    it("should parse attributes with single quotes", () => {
       const tokens = tokenize(`<div class='container' id='main'>`);
       expect(tokens[0]?.attributes).toEqual({
-        class: 'container',
-        id: 'main'
+        class: "container",
+        id: "main",
       });
     });
-    it('should parse unquoted attributes', () => {
-      const tokens = tokenize('<div class=container id=main>');
+    it("should parse unquoted attributes", () => {
+      const tokens = tokenize("<div class=container id=main>");
       expect(tokens[0]?.attributes).toEqual({
-        class: 'container',
-        id: 'main'
+        class: "container",
+        id: "main",
       });
     });
-    it('should parse boolean attributes', () => {
-      const tokens = tokenize('<input disabled checked>');
+    it("should parse boolean attributes", () => {
+      const tokens = tokenize("<input disabled checked>");
       expect(tokens[0]?.attributes).toEqual({
-        disabled: '',
-        checked: ''
+        disabled: "",
+        checked: "",
       });
     });
-    it('should handle mixed attribute types', () => {
+    it("should handle mixed attribute types", () => {
       const tokens = tokenize('<input type="text" disabled value=test>');
       expect(tokens[0]?.attributes).toEqual({
-        type: 'text',
-        disabled: '',
-        value: 'test'
+        type: "text",
+        disabled: "",
+        value: "test",
       });
     });
-    it('should handle attributes with special characters', () => {
+    it("should handle attributes with special characters", () => {
       const tokens = tokenize('<div data-test="value" aria-label="test">');
       expect(tokens[0]?.attributes).toEqual({
-        'data-test': 'value',
-        'aria-label': 'test'
+        "data-test": "value",
+        "aria-label": "test",
       });
     });
   });
-  describe('Text Content', () => {
-    it('should tokenize plain text', () => {
-      const tokens = tokenize('Hello World');
+  describe("Text Content", () => {
+    it("should tokenize plain text", () => {
+      const tokens = tokenize("Hello World");
       expect(tokens).toHaveLength(2);
       expect(tokens[0]).toEqual({
         type: TokenType.TEXT,
-        value: 'Hello World',
-        position: expect.any(Object)
+        value: "Hello World",
+        position: expect.any(Object),
       });
     });
-    it('should handle text with whitespace', () => {
-      const tokens = tokenize('  Hello   World  ');
+    it("should handle text with whitespace", () => {
+      const tokens = tokenize("  Hello   World  ");
-      expect(tokens[0]?.value).toBe('  Hello   World  ');
+      expect(tokens[0]?.value).toBe("  Hello   World  ");
     });
-    it('should handle multiline text', () => {
-      const tokens = tokenize('Line 1\nLine 2\nLine 3');
+    it("should handle multiline text", () => {
+      const tokens = tokenize("Line 1\nLine 2\nLine 3");
-      expect(tokens[0]?.value).toBe('Line 1\nLine 2\nLine 3');
+      expect(tokens[0]?.value).toBe("Line 1\nLine 2\nLine 3");
     });
   });
-  describe('HTML Entities', () => {
-    it('should parse named entities', () => {
-      const tokens = tokenize('&amp; &lt; &gt; &quot; &nbsp;');
+  describe("HTML Entities", () => {
+    it("should parse named entities", () => {
+      const tokens = tokenize("&amp; &lt; &gt; &quot; &nbsp;");
       expect(tokens[0]?.value).toBe('& < > " \u00A0');
     });
-    it('should parse numeric entities', () => {
-      const tokens = tokenize('&#65; &#66; &#67;');
+    it("should parse numeric entities", () => {
+      const tokens = tokenize("&#65; &#66; &#67;");
-      expect(tokens[0]?.value).toBe('A B C');
+      expect(tokens[0]?.value).toBe("A B C");
     });
-    it('should parse hexadecimal entities', () => {
-      const tokens = tokenize('&#x41; &#x42; &#x43;');
+    it("should parse hexadecimal entities", () => {
+      const tokens = tokenize("&#x41; &#x42; &#x43;");
-      expect(tokens[0]?.value).toBe('A B C');
+      expect(tokens[0]?.value).toBe("A B C");
     });
-    it('should handle entities in attributes', () => {
+    it("should handle entities in attributes", () => {
       const tokens = tokenize('<div title="&quot;Hello&quot;">');
       expect(tokens[0]?.attributes!.title).toBe('"Hello"');
     });
-    it('should handle unknown entities', () => {
-      const tokens = tokenize('&unknown;');
+    it("should handle unknown entities", () => {
+      const tokens = tokenize("&unknown;");
-      expect(tokens[0]?.value).toBe('&unknown;');
+      expect(tokens[0]?.value).toBe("&unknown;");
     });
   });
-  describe('Comments', () => {
-    it('should parse HTML comments', () => {
-      const tokens = tokenize('<!-- This is a comment -->');
+  describe("Comments", () => {
+    it("should parse HTML comments", () => {
+      const tokens = tokenize("<!-- This is a comment -->");
       expect(tokens[0]).toEqual({
         type: TokenType.COMMENT,
-        value: ' This is a comment ',
-        position: expect.any(Object)
+        value: " This is a comment ",
+        position: expect.any(Object),
       });
     });
-    it('should handle multiline comments', () => {
-      const tokens = tokenize(`<!-- \n        Multi line\n        comment\n      -->`);
+    it("should handle multiline comments", () => {
+      const tokens = tokenize(
+        `<!-- \n        Multi line\n        comment\n      -->`,
+      );
       expect(tokens[0]?.type).toBe(TokenType.COMMENT);
-      expect(tokens[0]?.value).toContain('Multi line');
+      expect(tokens[0]?.value).toContain("Multi line");
     });
-    it('should handle empty comments', () => {
-      const tokens = tokenize('<!---->');
+    it("should handle empty comments", () => {
+      const tokens = tokenize("<!---->");
       expect(tokens[0]).toEqual({
         type: TokenType.COMMENT,
-        value: '',
-        position: expect.any(Object)
+        value: "",
+        position: expect.any(Object),
       });
     });
   });
-  describe('CDATA Sections (HTML5: treated as bogus comments)', () => {
-    it('should parse CDATA sections as bogus comments in HTML5', () => {
-      const tokens = tokenize('<![CDATA[Some data]]>');
+  describe("CDATA Sections (HTML5: treated as bogus comments)", () => {
+    it("should parse CDATA sections as bogus comments in HTML5", () => {
+      const tokens = tokenize("<![CDATA[Some data]]>");
       expect(tokens[0]).toEqual({
         type: TokenType.COMMENT,
-        value: '[CDATA[Some data]]',
-        position: expect.any(Object)
+        value: "[CDATA[Some data]]",
+        position: expect.any(Object),
       });
     });
-    it('should handle CDATA with special characters as bogus comment', () => {
+    it("should handle CDATA with special characters as bogus comment", () => {
       const tokens = tokenize('<![CDATA[<script>alert("test");</script>]]>');
       expect(tokens[0]?.value).toBe('[CDATA[<script>alert("test");</script>]]');
     });
   });
-  describe('DOCTYPE Declaration', () => {
-    it('should parse DOCTYPE declaration', () => {
-      const tokens = tokenize('<!DOCTYPE html>');
+  describe("DOCTYPE Declaration", () => {
+    it("should parse DOCTYPE declaration", () => {
+      const tokens = tokenize("<!DOCTYPE html>");
       expect(tokens[0]).toEqual({
         type: TokenType.DOCTYPE,
-        value: 'html',
-        position: expect.any(Object)
+        value: "html",
+        position: expect.any(Object),
       });
     });
-    it('should parse complex DOCTYPE', () => {
-      const tokens = tokenize('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">');
+    it("should parse complex DOCTYPE", () => {
+      const tokens = tokenize(
+        '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">',
+      );
       expect(tokens[0]?.type).toBe(TokenType.DOCTYPE);
-      expect(tokens[0]?.value).toBe('html');
+      expect(tokens[0]?.value).toBe("html");
     });
   });
-  describe('Processing Instructions (HTML5: treated as bogus comments)', () => {
-    it('should parse XML processing instruction as bogus comment', () => {
+  describe("Processing Instructions (HTML5: treated as bogus comments)", () => {
+    it("should parse XML processing instruction as bogus comment", () => {
       const tokens = tokenize('<?xml version="1.0" encoding="UTF-8"?>');
       expect(tokens[0]).toEqual({
         type: TokenType.COMMENT,
         value: '?xml version="1.0" encoding="UTF-8"?',
-        position: expect.any(Object)
+        position: expect.any(Object),
       });
     });
-    it('should parse PHP-style processing instruction as bogus comment', () => {
+    it("should parse PHP-style processing instruction as bogus comment", () => {
       const tokens = tokenize('<?php echo "Hello"; ?>');
       expect(tokens[0]?.type).toBe(TokenType.COMMENT);
@@ -254,8 +253,8 @@ describe('HTML Tokenizer', () => {
     });
   });
-  describe('Complex HTML Documents', () => {
-    it('should tokenize complete HTML document', () => {
+  describe("Complex HTML Documents", () => {
+    it("should tokenize complete HTML document", () => {
       const html = `<!DOCTYPE html>
 <html lang="en">
   <head>
@@ -273,12 +272,14 @@ describe('HTML Tokenizer', () => {
       expect(tokens[0]?.type).toBe(TokenType.DOCTYPE);
       expect(tokens[tokens?.length - 1]?.type).toBe(TokenType.EOF);
-      const htmlTag = tokens.find(t => t.type === TokenType.TAG_OPEN && t.value === 'html');
+      const htmlTag = tokens.find(
+        (t) => t.type === TokenType.TAG_OPEN && t.value === "html",
+      );
       expect(htmlTag).toBeDefined();
-      expect(htmlTag!.attributes!.lang).toBe('en');
+      expect(htmlTag!.attributes!.lang).toBe("en");
     });
-    it('should handle mixed content', () => {
+    it("should handle mixed content", () => {
       const html = `<div>
         Text before <!-- comment -->
         <span>nested</span>
@@ -287,128 +288,134 @@ describe('HTML Tokenizer', () => {
       const tokens = tokenize(html);
-      expect(tokens.some(t => t.type === TokenType.TAG_OPEN)).toBe(true);
-      expect(tokens.some(t => t.type === TokenType.TEXT)).toBe(true);
-      expect(tokens.some(t => t.type === TokenType.COMMENT)).toBe(true);
+      expect(tokens.some((t) => t.type === TokenType.TAG_OPEN)).toBe(true);
+      expect(tokens.some((t) => t.type === TokenType.TEXT)).toBe(true);
+      expect(tokens.some((t) => t.type === TokenType.COMMENT)).toBe(true);
     });
   });
-  describe('Edge Cases', () => {
-    it('should handle empty input', () => {
-      const tokens = tokenize('');
+  describe("Edge Cases", () => {
+    it("should handle empty input", () => {
+      const tokens = tokenize("");
       expect(tokens).toHaveLength(1);
       expect(tokens[0]?.type).toBe(TokenType.EOF);
     });
-    it('should handle whitespace only', () => {
-      const tokens = tokenize('   \n\t  ');
+    it("should handle whitespace only", () => {
+      const tokens = tokenize("   \n\t  ");
       expect(tokens).toHaveLength(2);
       expect(tokens[0]?.type).toBe(TokenType.TEXT);
-      expect(tokens[0]?.value).toBe('   \n\t  ');
+      expect(tokens[0]?.value).toBe("   \n\t  ");
     });
-    it('should handle malformed tags', () => {
+    it("should handle malformed tags", () => {
       const tokens = tokenize('<div class="test>');
       expect(tokens[0]?.type).toBe(TokenType.TAG_OPEN);
-      expect(tokens[0]?.value).toBe('div');
+      expect(tokens[0]?.value).toBe("div");
     });
-    it('should handle unclosed comments', () => {
-      const tokens = tokenize('<!-- unclosed comment');
+    it("should handle unclosed comments", () => {
+      const tokens = tokenize("<!-- unclosed comment");
       expect(tokens[0]?.type).toBe(TokenType.COMMENT);
-      expect(tokens[0]?.value).toBe(' unclosed comment');
+      expect(tokens[0]?.value).toBe(" unclosed comment");
     });
   });
-  describe('Advanced Edge Cases', () => {
-    it('should handle attributes with no spaces', () => {
+  describe("Advanced Edge Cases", () => {
+    it("should handle attributes with no spaces", () => {
       const tokens = tokenize('<div class="test"id="main"data-value="123">');
       expect(tokens.length).toBeGreaterThan(0);
       const tag = tokens[0]!;
       expect(tag.attributes).toEqual({
-        class: 'test',
-        id: 'main',
-        'data-value': '123'
+        class: "test",
+        id: "main",
+        "data-value": "123",
       });
     });
-    it('should handle attributes with excessive spaces', () => {
+    it("should handle attributes with excessive spaces", () => {
       const tokens = tokenize('<div   class  =  "test"    id   =   "main"   >');
       expect(tokens.length).toBeGreaterThan(0);
       const tag = tokens[0]!;
       expect(tag.attributes).toEqual({
-        class: 'test',
-        id: 'main'
+        class: "test",
+        id: "main",
       });
     });
-    it('should handle mixed quote styles in same tag', () => {
-      const tokens = tokenize(`<div class='single' id="double" data-test='mix "quoted" content'>`);
+    it("should handle mixed quote styles in same tag", () => {
+      const tokens = tokenize(
+        `<div class='single' id="double" data-test='mix "quoted" content'>`,
+      );
       expect(tokens.length).toBeGreaterThan(0);
       const tag = tokens[0]!;
-      expect(tag.attributes!.class).toBe('single');
-      expect(tag.attributes!.id).toBe('double');
-      expect(tag.attributes!['data-test']).toBe('mix "quoted" content');
+      expect(tag.attributes!.class).toBe("single");
+      expect(tag.attributes!.id).toBe("double");
+      expect(tag.attributes!["data-test"]).toBe('mix "quoted" content');
     });
-    it('should handle malformed quotes gracefully', () => {
+    it("should handle malformed quotes gracefully", () => {
       const tokens = tokenize('<div class="unclosed id="test">');
       expect(tokens.length).toBeGreaterThan(0);
       const tag = tokens[0]!;
       expect(tag.type).toBe(TokenType.TAG_OPEN);
-      expect(tag.value).toBe('div');
+      expect(tag.value).toBe("div");
       expect(tag.attributes).toBeDefined();
     });
-    it('should handle empty tag names', () => {
-      const tokens = tokenize('<>content</>');
+    it("should handle empty tag names", () => {
+      const tokens = tokenize("<>content</>");
       expect(tokens.length).toBeGreaterThan(0);
     });
-    it('should handle tags with numbers and special characters', () => {
+    it("should handle tags with numbers and special characters", () => {
       const tokens = tokenize('<h1 class="heading-1" data-level="1">');
       expect(tokens.length).toBeGreaterThan(0);
       const tag = tokens[0]!;
-      expect(tag.value).toBe('h1');
+      expect(tag.value).toBe("h1");
       expect(tag.attributes).toEqual({
-        class: 'heading-1',
-        'data-level': '1'
+        class: "heading-1",
+        "data-level": "1",
       });
     });
-    it('should handle extremely long attribute values', () => {
-      const longValue = 'a'.repeat(10000);
+    it("should handle extremely long attribute values", () => {
+      const longValue = "a".repeat(10000);
       const tokens = tokenize(`<div data-long="${longValue}">`);
       expect(tokens.length).toBeGreaterThan(0);
       const tag = tokens[0]!;
-      expect(tag.attributes!['data-long']).toBe(longValue);
+      expect(tag.attributes!["data-long"]).toBe(longValue);
     });
-    it('should handle unicode characters in attributes', () => {
-      const tokens = tokenize('<div title="测试" data-emoji="🚀" class="café">');
+    it("should handle unicode characters in attributes", () => {
+      const tokens = tokenize(
+        '<div title="测试" data-emoji="🚀" class="café">',
+      );
       expect(tokens.length).toBeGreaterThan(0);
       const tag = tokens[0]!;
       expect(tag.attributes).toEqual({
-        title: '测试',
-        'data-emoji': '🚀',
-        class: 'café'
+        title: "测试",
+        "data-emoji": "🚀",
+        class: "café",
       });
     });
-    it('should handle nested quotes in attributes', () => {
-      const tokens = tokenize(`<div onclick="alert('Hello')" title='She said "hi"'>`);
+    it("should handle nested quotes in attributes", () => {
+      const tokens = tokenize(
+        `<div onclick="alert('Hello')" title='She said "hi"'>`,
+      );
       expect(tokens.length).toBeGreaterThan(0);
       const tag = tokens[0]!;
@@ -416,20 +423,22 @@ describe('HTML Tokenizer', () => {
       expect(tag.attributes!.title).toBe('She said "hi"');
     });
-    it('should handle attributes without values', () => {
-      const tokens = tokenize('<input type="checkbox" checked disabled readonly>');
+    it("should handle attributes without values", () => {
+      const tokens = tokenize(
+        '<input type="checkbox" checked disabled readonly>',
+      );
       expect(tokens.length).toBeGreaterThan(0);
       const tag = tokens[0]!;
       expect(tag.attributes).toEqual({
-        type: 'checkbox',
-        checked: '',
-        disabled: '',
-        readonly: ''
+        type: "checkbox",
+        checked: "",
+        disabled: "",
+        readonly: "",
       });
     });
-    it('should handle CDATA as bogus comment with complex content', () => {
+    it("should handle CDATA as bogus comment with complex content", () => {
       const complexContent = `
         function it() {
           return "<div>HTML inside JS</div>";
@@ -441,18 +450,21 @@ describe('HTML Tokenizer', () => {
       const cdataToken = tokens[0]!;
       expect(cdataToken.type).toBe(TokenType.COMMENT);
-      expect(cdataToken.value).toBe('[CDATA[' + complexContent + ']]');
+      expect(cdataToken.value).toBe("[CDATA[" + complexContent + "]]");
     });
-    it('should handle processing instructions as bogus comments', () => {
+    it("should handle processing instructions as bogus comments", () => {
       const tests = [
-        { input: '<?xml version="1.0" encoding="UTF-8"?>', expected: 'xml' },
-        { input: '<?xml-stylesheet type="text/xsl" href="style.xsl"?>', expected: 'xml' },
-        { input: '<?php echo "Hello World"; ?>', expected: 'php' },
-        { input: '<?python print("Hello") ?>', expected: 'python' }
+        { input: '<?xml version="1.0" encoding="UTF-8"?>', expected: "xml" },
+        {
+          input: '<?xml-stylesheet type="text/xsl" href="style.xsl"?>',
+          expected: "xml",
+        },
+        { input: '<?php echo "Hello World"; ?>', expected: "php" },
+        { input: '<?python print("Hello") ?>', expected: "python" },
       ];
-      tests.forEach(test => {
+      tests.forEach((test) => {
         const tokens = tokenize(test.input);
         const piToken = tokens[0]!;
@@ -461,16 +473,16 @@ describe('HTML Tokenizer', () => {
       });
     });
-    it('should handle comments with special content', () => {
+    it("should handle comments with special content", () => {
       const specialComments = [
-        '<!-- TODO: Fix this -->',
+        "<!-- TODO: Fix this -->",
         '<!-- <script>alert("xss")</script> -->',
-        '<!-- Multi\nline\ncomment -->',
-        '<!-- Comment with -- inside -->',
-        '<!--[if IE]><![endif]-->'
+        "<!-- Multi\nline\ncomment -->",
+        "<!-- Comment with -- inside -->",
+        "<!--[if IE]><![endif]-->",
       ];
-      specialComments.forEach(comment => {
+      specialComments.forEach((comment) => {
         const tokens = tokenize(comment);
         const commentToken = tokens[0]!;
@@ -478,7 +490,7 @@ describe('HTML Tokenizer', () => {
       });
     });
-    it('should handle mixed content with all token types (HTML5 mode)', () => {
+    it("should handle mixed content with all token types (HTML5 mode)", () => {
       const html = `
         <!DOCTYPE html>
         <!-- Main document -->
@@ -503,10 +515,10 @@ describe('HTML Tokenizer', () => {
         [TokenType.TAG_OPEN]: 0,
         [TokenType.TAG_CLOSE]: 0,
         [TokenType.TEXT]: 0,
-        [TokenType.EOF]: 0
+        [TokenType.EOF]: 0,
       };
-      tokens.forEach(token => {
+      tokens.forEach((token) => {
         if (token.type in tokenCounts) {
           tokenCounts[token.type]++;
         }
@@ -519,16 +531,15 @@ describe('HTML Tokenizer', () => {
       expect(tokenCounts[TokenType.TEXT]).toBeGreaterThan(0);
       expect(tokenCounts[TokenType.EOF]).toBe(1);
     });
-  })
-  describe('Performance and Stress Tests', () => {
-    it('should handle very large documents', () => {
+  });
-      let html = '<div>';
+  describe("Performance and Stress Tests", () => {
+    it("should handle very large documents", () => {
+      let html = "<div>";
       for (let i = 0; i < 1000; i++) {
         html += `<p id="para-${i}" class="paragraph">Paragraph ${i} content</p>`;
       }
-      html += '</div>';
+      html += "</div>";
       const startTime = Date.now();
       const tokens = tokenize(html);
@@ -538,16 +549,16 @@ describe('HTML Tokenizer', () => {
       expect(endTime - startTime).toBeLessThan(1000);
     });
-    it('should handle deeply nested structures', () => {
-      let html = '';
+    it("should handle deeply nested structures", () => {
+      let html = "";
       const depth = 100;
       for (let i = 0; i < depth; i++) {
         html += `<div level="${i}">`;
       }
-      html += 'Content';
+      html += "Content";
       for (let i = 0; i < depth; i++) {
-        html += '</div>';
+        html += "</div>";
       }
       const tokens = tokenize(html);
@@ -555,23 +566,23 @@ describe('HTML Tokenizer', () => {
       expect(tokens.length).toBe(depth * 2 + 2);
     });
-    it('should handle many attributes per element', () => {
-      let html = '<div';
+    it("should handle many attributes per element", () => {
+      let html = "<div";
       for (let i = 0; i < 100; i++) {
         html += ` attr-${i}="value-${i}"`;
       }
-      html += '>';
+      html += ">";
       const tokens = tokenize(html);
       const divTag = tokens[0]!;
       expect(Object.keys(divTag.attributes!).length).toBe(100);
-      expect(divTag.attributes!['attr-50']).toBe('value-50');
+      expect(divTag.attributes!["attr-50"]).toBe("value-50");
     });
-  })
+  });
-  describe('Real-world Scenarios', () => {
-    it('should handle SVG elements', () => {
+  describe("Real-world Scenarios", () => {
+    it("should handle SVG elements", () => {
       const svg = `
         <svg width="100" height="100" xmlns="http://www.w3.org/2000/svg">
           <circle cx="50" cy="50" r="40" stroke="black" stroke-width="3" fill="red"/>
@@ -581,15 +592,15 @@ describe('HTML Tokenizer', () => {
       const tokens = tokenize(svg);
-      const svgTag = tokens.find(token => token.value === 'svg')!;
-      expect(svgTag.attributes!.xmlns).toBe('http://www.w3.org/2000/svg');
+      const svgTag = tokens.find((token) => token.value === "svg")!;
+      expect(svgTag.attributes!.xmlns).toBe("http://www.w3.org/2000/svg");
-      const circleTag = tokens.find(token => token.value === 'circle')!;
+      const circleTag = tokens.find((token) => token.value === "circle")!;
       expect(circleTag.isSelfClosing).toBe(true);
-      expect(circleTag.attributes!.fill).toBe('red');
+      expect(circleTag.attributes!.fill).toBe("red");
     });
-    it('should handle script and style tags', () => {
+    it("should handle script and style tags", () => {
       const html = `
         <script type="text/javascript">
           function hello() {
@@ -604,14 +615,14 @@ describe('HTML Tokenizer', () => {
       const tokens = tokenize(html);
-      const scriptTag = tokens.find(token => token.value === 'script')!;
-      const styleTag = tokens.find(token => token.value === 'style')!;
+      const scriptTag = tokens.find((token) => token.value === "script")!;
+      const styleTag = tokens.find((token) => token.value === "style")!;
-      expect(scriptTag.attributes!.type).toBe('text/javascript');
-      expect(styleTag.attributes!.type).toBe('text/css');
+      expect(scriptTag.attributes!.type).toBe("text/javascript");
+      expect(styleTag.attributes!.type).toBe("text/css");
     });
-    it('should handle form elements with complex attributes', () => {
+    it("should handle form elements with complex attributes", () => {
       const html = `
         <form method="POST" action="/submit" enctype="multipart/form-data">
           <input type="email" name="email" required pattern="[a-z0-9._%+-]+@[a-z0-9.-]+\\.[a-z]{2,}$" title="Please enter a valid email">
@@ -624,17 +635,17 @@ describe('HTML Tokenizer', () => {
       const tokens = tokenize(html);
-      const inputTag = tokens.find(token => token.value === 'input')!;
-      expect(inputTag.attributes!.pattern).toContain('@');
-      expect(inputTag.attributes!.required).toBe('');
+      const inputTag = tokens.find((token) => token.value === "input")!;
+      expect(inputTag.attributes!.pattern).toContain("@");
+      expect(inputTag.attributes!.required).toBe("");
-      const selectTag = tokens.find(token => token.value === 'select')!;
-      expect(selectTag.attributes!.multiple).toBe('');
+      const selectTag = tokens.find((token) => token.value === "select")!;
+      expect(selectTag.attributes!.multiple).toBe("");
     });
-  })
+  });
-  describe('Error Recovery', () => {
-    it('should handle incomplete tags gracefully', () => {
+  describe("Error Recovery", () => {
+    it("should handle incomplete tags gracefully", () => {
       const malformedHTML = '<div class="test><p>Content</p>';
       const tokens = tokenize(malformedHTML);
@@ -642,104 +653,105 @@ describe('HTML Tokenizer', () => {
       expect(tokens[tokens.length - 1]!.type).toBe(TokenType.EOF);
     });
-    it('should handle unmatched quotes in attributes', () => {
+    it("should handle unmatched quotes in attributes", () => {
       const html = '<div class="test id=\'main">Content</div>';
       const tokens = tokenize(html);
-      const divTag = tokens.find(token => token.value === 'div')!;
+      const divTag = tokens.find((token) => token.value === "div")!;
       expect(divTag).toBeDefined();
     });
-    it('should continue parsing after errors', () => {
-      const html = '<div><p>Valid paragraph</p><span>Valid span</span>';
+    it("should continue parsing after errors", () => {
+      const html = "<div><p>Valid paragraph</p><span>Valid span</span>";
       const tokens = tokenize(html);
-      const hasValidElements = tokens.some(token => token.value === 'p') ||
-        tokens.some(token => token.value === 'span');
+      const hasValidElements =
+        tokens.some((token) => token.value === "p") ||
+        tokens.some((token) => token.value === "span");
       expect(hasValidElements).toBe(true);
     });
-    it('should handle empty angle brackets <>', () => {
-      const html = '<>text<div>content</div>';
+    it("should handle empty angle brackets <>", () => {
+      const html = "<>text<div>content</div>";
       const tokens = tokenize(html);
       // Should skip the invalid <> and continue parsing
       expect(tokens[tokens.length - 1]!.type).toBe(TokenType.EOF);
-      const divToken = tokens.find(t => t.value === 'div');
+      const divToken = tokens.find((t) => t.value === "div");
       expect(divToken).toBeDefined();
     });
-    it('should handle angle bracket with only space < >', () => {
-      const html = '< >text<p>paragraph</p>';
+    it("should handle angle bracket with only space < >", () => {
+      const html = "< >text<p>paragraph</p>";
       const tokens = tokenize(html);
       expect(tokens[tokens.length - 1]!.type).toBe(TokenType.EOF);
-      const pToken = tokens.find(t => t.value === 'p');
+      const pToken = tokens.find((t) => t.value === "p");
       expect(pToken).toBeDefined();
     });
-    it('should handle tag with no valid name', () => {
-      const html = '<123>text</123><div>ok</div>';
+    it("should handle tag with no valid name", () => {
+      const html = "<123>text</123><div>ok</div>";
       const tokens = tokenize(html);
       // Tags starting with numbers are invalid, should be treated as text
       expect(tokens[tokens.length - 1]!.type).toBe(TokenType.EOF);
-      const divToken = tokens.find(t => t.value === 'div');
+      const divToken = tokens.find((t) => t.value === "div");
       expect(divToken).toBeDefined();
     });
   });
-  describe('Entity Edge Cases', () => {
-    it('should handle entity without semicolon with valid prefix', () => {
+  describe("Entity Edge Cases", () => {
+    it("should handle entity without semicolon with valid prefix", () => {
       // &nbsp followed by other text (no semicolon) should decode &nbsp
-      const tokens = tokenize('<div>&nbsptext</div>');
-      const textToken = tokens.find(t => t.type === TokenType.TEXT);
+      const tokens = tokenize("<div>&nbsptext</div>");
+      const textToken = tokens.find((t) => t.type === TokenType.TEXT);
       expect(textToken).toBeDefined();
       // Should decode &nbsp (non-breaking space) and keep "text"
-      expect(textToken!.value).toContain('text');
+      expect(textToken!.value).toContain("text");
     });
-    it('should handle entity without semicolon - lt prefix', () => {
-      const tokens = tokenize('<div>&ltvalue</div>');
-      const textToken = tokens.find(t => t.type === TokenType.TEXT);
+    it("should handle entity without semicolon - lt prefix", () => {
+      const tokens = tokenize("<div>&ltvalue</div>");
+      const textToken = tokens.find((t) => t.type === TokenType.TEXT);
       expect(textToken).toBeDefined();
-      expect(textToken!.value).toBe('&ltvalue');
+      expect(textToken!.value).toBe("&ltvalue");
     });
-    it('should handle entity without semicolon - gt prefix', () => {
-      const tokens = tokenize('<div>&gtvalue</div>');
-      const textToken = tokens.find(t => t.type === TokenType.TEXT);
+    it("should handle entity without semicolon - gt prefix", () => {
+      const tokens = tokenize("<div>&gtvalue</div>");
+      const textToken = tokens.find((t) => t.type === TokenType.TEXT);
       expect(textToken).toBeDefined();
-      expect(textToken!.value).toBe('&gtvalue');
+      expect(textToken!.value).toBe("&gtvalue");
     });
-    it('should handle entity without semicolon - amp prefix', () => {
-      const tokens = tokenize('<div>&ampvalue</div>');
-      const textToken = tokens.find(t => t.type === TokenType.TEXT);
+    it("should handle entity without semicolon - amp prefix", () => {
+      const tokens = tokenize("<div>&ampvalue</div>");
+      const textToken = tokens.find((t) => t.type === TokenType.TEXT);
       expect(textToken).toBeDefined();
-      expect(textToken!.value).toBe('&ampvalue');
+      expect(textToken!.value).toBe("&ampvalue");
     });
-    it('should handle unknown entity gracefully', () => {
-      const tokens = tokenize('<div>&unknownentity;</div>');
-      const textToken = tokens.find(t => t.type === TokenType.TEXT);
+    it("should handle unknown entity gracefully", () => {
+      const tokens = tokenize("<div>&unknownentity;</div>");
+      const textToken = tokens.find((t) => t.type === TokenType.TEXT);
       expect(textToken).toBeDefined();
       // Unknown entity should be kept as-is
-      expect(textToken!.value).toBe('&unknownentity;');
+      expect(textToken!.value).toBe("&unknownentity;");
     });
-    it('should handle partial entity name with no matching prefix', () => {
-      const tokens = tokenize('<div>&xyz</div>');
-      const textToken = tokens.find(t => t.type === TokenType.TEXT);
+    it("should handle partial entity name with no matching prefix", () => {
+      const tokens = tokenize("<div>&xyz</div>");
+      const textToken = tokens.find((t) => t.type === TokenType.TEXT);
       expect(textToken).toBeDefined();
       // No valid entity prefix, keep as-is
-      expect(textToken!.value).toBe('&xyz');
+      expect(textToken!.value).toBe("&xyz");
     });
-  })
+  });
 });