@tkeron/html-parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,118 @@
1
+ import { describe, it, expect } from "bun:test";
2
+ import { parseHTML } from "../index";
3
+ import { readFileSync } from "fs";
4
+ import { join } from "path";
5
+
6
+ describe("Google DOM Parsing Test", () => {
7
+ it("should parse real Google HTML and find elements", () => {
8
+ const filePath = join(__dirname, "google-homepage.txt");
9
+ let googleHTML: string;
10
+
11
+ try {
12
+ googleHTML = readFileSync(filePath, "utf-8");
13
+ } catch (error) {
14
+ throw new Error(`Could not read file: ${error}`);
15
+ }
16
+
17
+ let doc: Document;
18
+
19
+ try {
20
+ doc = parseHTML(googleHTML);
21
+ } catch (error) {
22
+ throw new Error(`Error parsing Google HTML: ${error}`);
23
+ }
24
+
25
+ expect(doc).toBeDefined();
26
+ expect(doc.nodeType).toBe(9);
27
+
28
+ const htmlElement = doc.querySelector("html");
29
+ const bodyElement = doc.querySelector("body");
30
+ const titleElement = doc.querySelector("title");
31
+
32
+ expect(htmlElement).toBeDefined();
33
+ expect(bodyElement).toBeDefined();
34
+ expect(titleElement).toBeDefined();
35
+
36
+ if (titleElement) {
37
+ expect(titleElement.textContent).toBe("Google");
38
+ }
39
+
40
+ const searchForm = doc.querySelector('form[action="/search"]');
41
+ expect(searchForm).toBeDefined();
42
+
43
+ if (searchForm) {
44
+ expect(searchForm.getAttribute("name")).toBe("f");
45
+ }
46
+
47
+ const searchInput = doc.querySelector('input[name="q"]');
48
+ expect(searchInput).toBeDefined();
49
+
50
+ if (searchInput) {
51
+ expect(searchInput.getAttribute("title")).toBe("Buscar con Google");
52
+ }
53
+
54
+ const searchButton = doc.querySelector('input[name="btnG"]');
55
+ const luckyButton = doc.querySelector('input[name="btnI"]');
56
+
57
+ expect(searchButton).toBeDefined();
58
+ expect(luckyButton).toBeDefined();
59
+
60
+ if (searchButton) {
61
+ expect(searchButton.getAttribute("value")).toBe("Buscar con Google");
62
+ }
63
+
64
+ if (luckyButton) {
65
+ expect(luckyButton.getAttribute("value")).toBe("Voy a tener suerte");
66
+ }
67
+
68
+ const logo = doc.querySelector('img[alt="Google"]');
69
+ expect(logo).toBeDefined();
70
+
71
+ if (logo) {
72
+ expect(logo.getAttribute("id")).toBe("hplogo");
73
+ }
74
+
75
+ const allDivs = doc.querySelectorAll("div");
76
+ const allInputs = doc.querySelectorAll("input");
77
+ const allLinks = doc.querySelectorAll("a");
78
+ const allScripts = doc.querySelectorAll("script");
79
+
80
+ expect(allDivs.length).toBeGreaterThan(5);
81
+ expect(allInputs.length).toBeGreaterThan(5);
82
+ expect(allLinks.length).toBeGreaterThan(10);
83
+ expect(allScripts.length).toBeGreaterThan(3);
84
+ });
85
+
86
+ it("should demonstrate DOM manipulation on Google page", () => {
87
+ const filePath = join(__dirname, "google-homepage.txt");
88
+ const googleHTML = readFileSync(filePath, "utf-8");
89
+ const doc = parseHTML(googleHTML);
90
+
91
+ const title = doc.querySelector("title");
92
+ if (title) {
93
+ const originalTitle = title.textContent;
94
+ expect(originalTitle).toBe("Google");
95
+ }
96
+
97
+ const navLinks = doc.querySelectorAll("a.gb1");
98
+
99
+ if (navLinks.length > 0) {
100
+ for (let i = 0; i < Math.min(navLinks.length, 5); i++) {
101
+ const link = navLinks[i];
102
+ if (link) {
103
+ const href = link.getAttribute("href");
104
+ const text = link.textContent?.trim();
105
+ }
106
+ }
107
+ }
108
+
109
+ const metaTags = doc.querySelectorAll("meta[http-equiv]");
110
+
111
+ const scriptsWithNonce = doc.querySelectorAll("script[nonce]");
112
+
113
+ expect(title).toBeDefined();
114
+
115
+ expect(metaTags.length).toBeGreaterThanOrEqual(0);
116
+ expect(scriptsWithNonce.length).toBeGreaterThanOrEqual(0);
117
+ });
118
+ });
@@ -0,0 +1,13 @@
1
+ <!doctype html><html itemscope="" itemtype="http://schema.org/WebPage" lang="es-419"><head><meta content="text/html; charset=UTF-8" http-equiv="Content-Type"><meta content="/images/branding/googleg/1x/googleg_standard_color_128dp.png" itemprop="image"><title>Google</title><script nonce="o992dxvdlf89K5i-4BMwWA">(function(){var _g={kEI:'l-1haMW1Nrbg0PEP-cWnkQs',kEXPI:'0,202792,62,2,610013,2887399,171,945,538661,94918,344796,219811,70233,15664,5226018,32768933,4043709,25228681,119775,18493,18673,60613,6749,23878,7042,2097,4600,328,6225,54189,25,9187,764,6748,8301,15634,30376,28336,48238,77,5894,353,18470,410,5870,3097,4617,5774,27611,16524,2801,460,2990,35,3420,13483,12108,656,1707,3320,3604,8997,2414,1736,2863,1762,9161,115,596,617,7,5875,2261,648,4225,3,2047,1201,310,1243,945,3,985,485,1,3435,422,945,808,3819,1020,1,2966,497,2,895,1620,539,4,252,388,482,7,487,764,728,1032,1578,715,6623,2,1493,1411,607,423,940,406,889,400,692,519,164,2064,2,4,1,54,210,1246,3913,667,31,3,1321,290,251,363,81,7,8,146,384,1804,698,262,5,1386,93,275,1579,772,472,1446,3,2,2,2,856,335,140,513,349,4,1122,244,1390,689,605,3,452,7,381,1078,2,332,479,354,17,1508,1206,601,4589,519,760,1555,192,98,1280,54,351,185,4,304,529,545,32,42,20,171,642,91,357,551,24,24,24,166,104,168,790,966,1262,1138,102,44,670,151,403,57,5,90,506,336,123,442,2,346,14,25,89,2,2,1242,357,128,7,2,258,149,24,18,137,107,211,175,665,56,827,190,220,226,48,1013,2,242,26,23,322,558,428,328,515,6,184,1012,181,37,106,210,46,285,1618,2,9,1,12,3,434,204,26,1129,722,75,92,14,275,1236,13,2,57,4,643,2,522,144,435,823,532,254,121,611,104,47,1081,2,1030,21237884,5,2992,4,2433,527,3,1272,36,167,2858,15,718,3425,2,1473,261,827,1446,3,534,186,909,358,1072,1089,6019595,2473307,27641,447299,100900,1193859,93710',kBL:'HYe-',kOPI:89978449};(function(){var a;((a=window.google)==null?0:a.stvsc)?google.kEI=_g.kEI:window.google=_g;}).call(this);})();(function(){google.sn='webhp';google.kHL='es-419';})();(function(){
2
+ var g=this||self;function k(){return window.google&&window.google.kOPI||null};var l,m=[];function n(a){for(var b;a&&(!a.getAttribute||!(b=a.getAttribute("eid")));)a=a.parentNode;return b||l}function p(a){for(var b=null;a&&(!a.getAttribute||!(b=a.getAttribute("leid")));)a=a.parentNode;return b}function q(a){/^http:/i.test(a)&&window.location.protocol==="https:"&&(google.ml&&google.ml(Error("a"),!1,{src:a,glmm:1}),a="");return a}
3
+ function r(a,b,d,c,h){var e="";b.search("&ei=")===-1&&(e="&ei="+n(c),b.search("&lei=")===-1&&(c=p(c))&&(e+="&lei="+c));var f=b.search("&cshid=")===-1&&a!=="slh";c="&zx="+Date.now().toString();g._cshid&&f&&(c+="&cshid="+g._cshid);(d=d())&&(c+="&opi="+d);return"/"+(h||"gen_204")+"?atyp=i&ct="+String(a)+"&cad="+(b+e+c)};l=google.kEI;google.getEI=n;google.getLEI=p;google.ml=function(){return null};google.log=function(a,b,d,c,h,e){e=e===void 0?k:e;d||(d=r(a,b,e,c,h));if(d=q(d)){a=new Image;var f=m.length;m[f]=a;a.onerror=a.onload=a.onabort=function(){delete m[f]};a.src=d}};google.logUrl=function(a,b){b=b===void 0?k:b;return r("",a,b)};}).call(this);(function(){google.y={};google.sy=[];var d;(d=google).x||(d.x=function(a,b){if(a)var c=a.id;else{do c=Math.random();while(google.y[c])}google.y[c]=[a,b]});var e;(e=google).sx||(e.sx=function(a){google.sy.push(a)});google.lm=[];var f;(f=google).plm||(f.plm=function(a){google.lm.push.apply(google.lm,a)});google.lq=[];var g;(g=google).load||(g.load=function(a,b,c){google.lq.push([[a],b,c])});var h;(h=google).loadAll||(h.loadAll=function(a,b){google.lq.push([a,b])});google.bx=!1;var k;(k=google).lx||(k.lx=function(){});var l=[],m;(m=google).fce||(m.fce=function(a,b,c,n){l.push([a,b,c,n])});google.qce=l;}).call(this);google.f={};(function(){
4
+ document.documentElement.addEventListener("submit",function(b){var a;if(a=b.target){var c=a.getAttribute("data-submitfalse");a=c==="1"||c==="q"&&!a.elements.q.value?!0:!1}else a=!1;a&&(b.preventDefault(),b.stopPropagation())},!0);document.documentElement.addEventListener("click",function(b){var a;a:{for(a=b.target;a&&a!==document.documentElement;a=a.parentElement)if(a.tagName==="A"){a=a.getAttribute("data-nohref")==="1";break a}a=!1}a&&b.preventDefault()},!0);}).call(this);</script><style>#gbar,#guser{font-size:13px;padding-top:1px !important;}#gbar{height:22px}#guser{padding-bottom:7px !important;text-align:right}.gbh,.gbd{border-top:1px solid #c9d7f1;font-size:1px}.gbh{height:0;position:absolute;top:24px;width:100%}@media all{.gb1{height:22px;margin-right:.5em;vertical-align:top}#gbar{float:left}}a.gb1,a.gb4{text-decoration:underline !important}a.gb1,a.gb4{color:#00c !important}.gbi .gb4{color:#dd8e27 !important}.gbf .gb4{color:#900 !important}
5
+ </style><style>body,td,a,p,.h{font-family:sans-serif}body{margin:0;overflow-y:scroll}#gog{padding:3px 8px 0}td{line-height:.8em}.gac_m td{line-height:17px}form{margin-bottom:20px}.h{color:#1967d2}em{font-weight:bold;font-style:normal}.lst{height:25px;width:496px}.gsfi,.lst{font:18px sans-serif}.gsfs{font:17px sans-serif}.ds{display:inline-box;display:inline-block;margin:3px 0 4px;margin-left:4px}input{font-family:inherit}body{background:#fff;color:#000}a{color:#681da8;text-decoration:none}a:hover,a:active{text-decoration:underline}.fl a{color:#1967d2}a:visited{color:#681da8}.sblc{padding-top:5px}.sblc a{display:block;margin:2px 0;margin-left:13px;font-size:11px}.lsbb{background:#f8f9fa;border:solid 1px;border-color:#dadce0 #70757a #70757a #dadce0;height:30px}.lsbb{display:block}#WqQANb a{display:inline-block;margin:0 12px}.lsb{background:url(/images/nav_logo229.png) 0 -261px repeat-x;color:#000;border:none;cursor:pointer;height:30px;margin:0;outline:0;font:15px sans-serif;vertical-align:top}.lsb:active{background:#dadce0}.lst:focus{outline:none}</style><script nonce="o992dxvdlf89K5i-4BMwWA">(function(){window.google.erd={jsr:1,bv:2244,de:true,dpf:'IMrsMQrjlTe_idjqSnbwCj_x0M-c5ECD_ddpdD_hxJg'};
6
+ var g=this||self;var k,l=(k=g.mei)!=null?k:1,m,p=(m=g.diel)!=null?m:0,q,r=(q=g.sdo)!=null?q:!0,t=0,u,w=google.erd,x=w.jsr;google.ml=function(a,b,d,n,e){e=e===void 0?2:e;b&&(u=a&&a.message);d===void 0&&(d={});d.cad="ple_"+google.ple+".aple_"+google.aple;if(google.dl)return google.dl(a,e,d,!0),null;b=d;if(x<0){window.console&&console.error(a,b);if(x===-2)throw a;b=!1}else b=!a||!a.message||a.message==="Error loading script"||t>=l&&!n?!1:!0;if(!b)return null;t++;d=d||{};b=encodeURIComponent;var c="/gen_204?atyp=i&ei="+b(google.kEI);google.kEXPI&&(c+="&jexpid="+b(google.kEXPI));c+="&srcpg="+b(google.sn)+"&jsr="+b(w.jsr)+
7
+ "&bver="+b(w.bv);w.dpf&&(c+="&dpf="+b(w.dpf));var f=a.lineNumber;f!==void 0&&(c+="&line="+f);var h=a.fileName;h&&(h.indexOf("-extension:/")>0&&(e=3),c+="&script="+b(h),f&&h===window.location.href&&(f=document.documentElement.outerHTML.split("\n")[f],c+="&cad="+b(f?f.substring(0,300):"No script found.")));google.ple&&google.ple===1&&(e=2);c+="&jsel="+e;for(var v in d)c+="&",c+=b(v),c+="=",c+=b(d[v]);c=c+"&emsg="+b(a.name+": "+a.message);c=c+"&jsst="+b(a.stack||"N/A");c.length>=12288&&(c=c.substr(0,12288));a=c;n||google.log(0,"",a);return a};window.onerror=function(a,b,d,n,e){u!==a&&(a=e instanceof Error?e:Error(a),d===void 0||"lineNumber"in a||(a.lineNumber=d),b===void 0||"fileName"in a||(a.fileName=b),google.ml(a,!1,void 0,!1,a.name==="SyntaxError"||a.message.substring(0,11)==="SyntaxError"||a.message.indexOf("Script error")!==-1?3:p));u=null;r&&t>=l&&(window.onerror=null)};})();</script></head><body bgcolor="#fff"><script nonce="o992dxvdlf89K5i-4BMwWA">(function(){var src='/images/nav_logo229.png';var iesg=false;document.body.onload = function(){window.n && window.n();if (document.images){new Image().src=src;}
8
+ if (!iesg){document.f&&document.f.q.focus();document.gbqf&&document.gbqf.q.focus();}
9
+ }
10
+ })();</script><div id="mngb"><div id=gbar><nobr><b class=gb1>B�squeda</b> <a class=gb1 href="https://www.google.com/imghp?hl=es-419&tab=wi">Im�genes</a> <a class=gb1 href="https://maps.google.com.pe/maps?hl=es-419&tab=wl">Maps</a> <a class=gb1 href="https://play.google.com/?hl=es-419&tab=w8">Play</a> <a class=gb1 href="https://www.youtube.com/?tab=w1">YouTube</a> <a class=gb1 href="https://news.google.com/?tab=wn">Noticias</a> <a class=gb1 href="https://mail.google.com/mail/?tab=wm">Gmail</a> <a class=gb1 href="https://drive.google.com/?tab=wo">Drive</a> <a class=gb1 style="text-decoration:none" href="https://www.google.com.pe/intl/es-419/about/products?tab=wh"><u>M�s</u> &raquo;</a></nobr></div><div id=guser width=100%><nobr><span id=gbn class=gbi></span><span id=gbf class=gbf></span><span id=gbe></span><a href="http://www.google.com.pe/history/optout?hl=es-419" class=gb4>Historial web</a> | <a href="/preferences?hl=es-419" class=gb4>Configuraci�n</a> | <a target=_top id=gb_70 href="https://accounts.google.com/ServiceLogin?hl=es-419&passive=true&continue=https://www.google.com/&ec=GAZAAQ" class=gb4>Acceder</a></nobr></div><div class=gbh style=left:0></div><div class=gbh style=right:0></div></div><center><br clear="all" id="lgpd"><div id="XjhHGf"><img alt="Google" height="92" src="/images/branding/googlelogo/1x/googlelogo_white_background_color_272x92dp.png" style="padding:28px 0 14px" width="272" id="hplogo"><br><br></div><form action="/search" name="f"><table cellpadding="0" cellspacing="0"><tr valign="top"><td width="25%">&nbsp;</td><td align="center" nowrap=""><input name="ie" value="ISO-8859-1" type="hidden"><input value="es-419" name="hl" type="hidden"><input name="source" type="hidden" value="hp"><input name="biw" type="hidden"><input name="bih" type="hidden"><div class="ds" style="height:32px;margin:4px 0"><input class="lst" style="margin:0;padding:5px 8px 0 6px;vertical-align:top;color:#000" autocomplete="off" value="" title="Buscar con Google" maxlength="2048" name="q" size="57"></div><br style="line-height:0"><span class="ds"><span class="lsbb"><input class="lsb" value="Buscar con Google" name="btnG" type="submit"></span></span><span class="ds"><span class="lsbb"><input class="lsb" id="tsuid_l-1haMW1Nrbg0PEP-cWnkQs_1" value="Voy a tener suerte" name="btnI" type="submit"><script nonce="o992dxvdlf89K5i-4BMwWA">(function(){var id='tsuid_l-1haMW1Nrbg0PEP-cWnkQs_1';document.getElementById(id).onclick = function(){if (this.form.q.value){this.checked = 1;if (this.form.iflsig)this.form.iflsig.disabled = false;}
11
+ else top.location='/doodles/';};})();</script><input value="AOw8s4IAAAAAaGH7p8JgogoiiWeE10uSSTYd3bwYsPDg" name="iflsig" type="hidden"></span></span></td><td class="fl sblc" align="left" nowrap="" width="25%"><a href="/advanced_search?hl=es-419&amp;authuser=0">B�squeda avanzada</a></td></tr></table><input id="gbv" name="gbv" type="hidden" value="1"><script nonce="o992dxvdlf89K5i-4BMwWA">(function(){var a,b="1";if(document&&document.getElementById)if(typeof XMLHttpRequest!="undefined")b="2";else if(typeof ActiveXObject!="undefined"){var c,d,e=["MSXML2.XMLHTTP.6.0","MSXML2.XMLHTTP.3.0","MSXML2.XMLHTTP","Microsoft.XMLHTTP"];for(c=0;d=e[c++];)try{new ActiveXObject(d),b="2"}catch(h){}}a=b;if(a=="2"&&location.search.indexOf("&gbv=2")==-1){var f=google.gbvu,g=document.getElementById("gbv");g&&(g.value=a);f&&window.setTimeout(function(){location.href=f},0)};}).call(this);</script></form><div style="font-size:83%;min-height:3.5em"><br><div id="gws-output-pages-elements-homepage_additional_languages__als"><style>#gws-output-pages-elements-homepage_additional_languages__als{font-size:small;margin-bottom:24px}#SIvCob{color:#545454;display:inline-block;line-height:28px;}#SIvCob a{}.H6sW5{display:inline-block;margin:0 2px;white-space:nowrap}.z4hgWe{display:inline-block;margin:0 2px}</style><div id="SIvCob">Google disponible en: <a href="https://www.google.com/setprefs?sig=0_CnNe6taea4YTaNFz61kVBPcoAAw%3D&amp;hl=qu&amp;source=homepage&amp;sa=X&amp;ved=0ahUKEwiF3YudhJiOAxU2MDQIHfniKbIQ2ZgBCAY">Quechua</a> </div></div></div><span id="footer"><div style="font-size:10pt"><div style="margin:19px auto;text-align:center" id="WqQANb"><a href="/intl/es-419/ads/">Publicidad</a><a href="/services/">Soluciones Empresariales</a><a href="/intl/es-419/about.html">Todo acerca de Google</a><a href="https://www.google.com/setprefdomain?prefdom=PE&amp;prev=https://www.google.com.pe/&amp;sig=K_168tnc-MwGHb8Ssp_valc2NCZ9o%3D">Google.com.pe</a></div></div><p style="font-size:8pt;color:#636363">&copy; 2025 - <a href="/intl/es-419/policies/privacy/">Privacidad</a> - <a href="/intl/es-419/policies/terms/">Condiciones</a></p></span></center><script nonce="o992dxvdlf89K5i-4BMwWA">(function(){window.google.cdo={height:757,width:1440};(function(){var a=window.innerWidth,b=window.innerHeight;if(!a||!b){var c=window.document,d=c.compatMode=="CSS1Compat"?c.documentElement:c.body;a=d.clientWidth;b=d.clientHeight}if(a&&b&&(a!=google.cdo.width||b!=google.cdo.height)){var e=google,f=e.log,g="/client_204?&atyp=i&biw="+a+"&bih="+b+"&ei="+google.kEI,h="",k=window.google&&window.google.kOPI||null;k&&(h+="&opi="+k);f.call(e,"","",g+h)};}).call(this);})();(function(){google.xjs={basecomb:'/xjs/_/js/k\x3dxjs.hp.en.yvtAdIuzjZk.es5.O/ck\x3dxjs.hp.UvPXK4kvpic.L.X.O/am\x3dAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAACUCAQAAAIggAAAAAAACAAAAAAAAAAAAAIwAABAAQAABAgAAJQkAxoEAAAsAUAIBkAOUHgAAEAAEAABAAIAAAACAEAAAAQAABJwAAAAAAyA8DggAAAAExAKAAAAAAOIR/d\x3d1/ed\x3d1/dg\x3d0/ujg\x3d1/rs\x3dACT90oGgvMsxcccTSalf_rRpY4pmPGIhaw',basecss:'/xjs/_/ss/k\x3dxjs.hp.UvPXK4kvpic.L.X.O/am\x3dAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAACUCAQAAAAggAAAAAAAAAAAAAAAAAAAAAIgAABAAQAABAgAAJQEABgAAAAsAUAIBkAOUHgAAEAAEAABAAIAAAACAEAAAAQAABIwAAAAAAAAAAAAAAAAERA/rs\x3dACT90oF4NjEN1_pQra_aGKeIbUAyz2aH7Q',basejs:'/xjs/_/js/k\x3dxjs.hp.en.yvtAdIuzjZk.es5.O/am\x3dAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAAAAAAACAAAAAAAAAAAAAIQAABAAAAAAAAAAAAgAwIEAAAoAAAAAgAMAAAAAAAAEAAAAAAAAAAAAAAAAAAAAABwAAAAAAyA8DggAAAAExAKAAAAAAOIR/dg\x3d0/rs\x3dACT90oEYPkmP3bYe0uuzmmEGxw6goU5Eqg',excm:[]};})();(function(){var u='/xjs/_/js/k\x3dxjs.hp.en.yvtAdIuzjZk.es5.O/am\x3dAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAAAAAAACAAAAAAAAAAAAAIQAABAAAAAAAAAAAAgAwIEAAAoAAAAAgAMAAAAAAAAEAAAAAAAAAAAAAAAAAAAAABwAAAAAAyA8DggAAAAExAKAAAAAAOIR/d\x3d1/ed\x3d1/dg\x3d3/rs\x3dACT90oEYPkmP3bYe0uuzmmEGxw6goU5Eqg/m\x3dsb_he,d';var st=1;var amd=1000;var mmd=0;var pod=true;var pop=true;var povp=false;var fp='';
12
+ var e=this||self;function f(){var b,a,d;if(a=b=(a=window.google)==null?void 0:(d=a.ia)==null?void 0:d.r.B2Jtyd)a=b.m,a=a===1||a===5;return a&&b.cbfd!=null&&b.cbvi!=null?b:void 0};function g(){var b=[u];if(!google.dp){for(var a=0;a<b.length;a++){var d=b[a],c=document.createElement("link");c.as="script";c.href=d;c.rel="preload";document.body.appendChild(c)}google.dp=!0}};google.ps===void 0&&(google.ps=[]);function h(){var b=u,a=function(){};google.lx=google.stvsc?a:function(){k(b);google.lx=a};google.bx||google.lx()}function l(b,a){a&&(b.src=a);fp&&google.caft&&google.caft(function(){b.fetchPriority=fp});var d=b.onload;b.onload=function(c){d&&d(c);google.ps=google.ps.filter(function(G){return b!==G})};google.ps.push(b);document.body.appendChild(b)}google.as=l;function k(b){google.timers&&google.timers.load&&google.tick&&google.tick("load","xjsls");var a=document.createElement("script");a.onerror=function(){google.ple=1};a.onload=function(){google.ple=0};google.xjsus=void 0;l(a,b);google.aple=-1;google.dp=!0};function m(b){var a=b.getAttribute("jscontroller");return(a==="UBXHI"||a==="R3fhkb"||a==="TSZEqd")&&b.hasAttribute("data-src")}function n(){for(var b=document.getElementsByTagName("img"),a=0,d=b.length;a<d;a++){var c=b[a];if(c.hasAttribute("data-lzy_")&&Number(c.getAttribute("data-atf"))&1&&!m(c))return!0}return!1}for(var p=document.getElementsByTagName("img"),q=0,r=p.length;q<r;++q){var t=p[q];Number(t.getAttribute("data-atf"))&1&&m(t)&&(t.src=t.getAttribute("data-src"))};var w,x,y,z,A,B,C,D,E,F;function H(){google.xjsu=u;e._F_jsUrl=u;A=function(){h()};w=!1;x=(st===1||st===3)&&!!google.caft&&!n();y=f();z=(st===2||st===3)&&!!y&&!n();B=pod;C=pop;D=povp;E=pop&&document.prerendering||povp&&document.hidden;F=D?"visibilitychange":"prerenderingchange"}function I(){w||x||z||E||(A(),w=!0)}
13
+ setTimeout(function(){google&&google.tick&&google.timers&&google.timers.load&&google.tick("load","xjspls");H();if(x||z||E){if(x){var b=function(){x=!1;I()};google.caft(b);window.setTimeout(b,amd)}z&&(b=function(){z=!1;I()},y.cbvi.push(b),window.setTimeout(b,mmd));if(E){var a=function(){(D?document.hidden:document.prerendering)||(E=!1,I(),document.removeEventListener(F,a))};document.addEventListener(F,a,{passive:!0})}if(B||C||D)w||g()}else A()},0);})();window._ = window._ || {};window._DumpException = _._DumpException = function(e){throw e;};window._s = window._s || {};_s._DumpException = _._DumpException;window._qs = window._qs || {};_qs._DumpException = _._DumpException;(function(){var t=[0,16384,0,0,0,0,0,277430272,1,33312,536870914,33562624,0,805306368,17825801,552882176,620757047,119013414,12059224,4232192,517211024,71303168,66,2097168,276824064,262144,163661824,0,238821379,32,2908224,32,1171968];window._F_toggles = window._xjs_toggles = t;})();window._F_installCss = window._F_installCss || function(css){};(function(){google.jl={bfl:0,dw:false,eli:false,ine:false,ubm:false,uwp:true,vs:false};})();(function(){var pmc='{\x22d\x22:{},\x22sb_he\x22:{\x22client\x22:\x22heirloom-hp\x22,\x22dh\x22:true,\x22ds\x22:\x22\x22,\x22host\x22:\x22google.com\x22,\x22jsonp\x22:true,\x22msgs\x22:{\x22cibl\x22:\x22Borrar b\\u00fasqueda\x22,\x22dym\x22:\x22Quiz\\u00e1s quisiste decir:\x22,\x22lcky\x22:\x22Voy a tener suerte\x22,\x22lml\x22:\x22M\\u00e1s informaci\\u00f3n\x22,\x22psrc\x22:\x22Se ha eliminado esta b\\u00fasqueda de tu \\u003Ca href\x3d\\\x22/history\\\x22\\u003EHistorial web\\u003C/a\\u003E\x22,\x22psrl\x22:\x22Eliminar\x22,\x22sbit\x22:\x22Buscar por im\\u00e1genes\x22,\x22srch\x22:\x22Buscar con Google\x22},\x22ovr\x22:{},\x22pq\x22:\x22\x22,\x22rfs\x22:[],\x22stok\x22:\x22hkAmTn0ZXJNZW6o13RHuCxApFL0\x22}}';google.pmc=JSON.parse(pmc);})();</script></body></html>
@@ -0,0 +1,87 @@
1
+ # Official HTML Parser Tests
2
+
3
+ This directory contains implementations of official HTML parsing test suites to ensure compliance with web standards.
4
+
5
+ ## Test Sources
6
+
7
+ ### HTML5lib Tests
8
+ - **Tokenizer Tests**: JSON format tests from `html5lib-tests/tokenizer/`
9
+ - **Tree Construction Tests**: DAT format tests from `html5lib-tests/tree-construction/`
10
+
11
+ ### Web Platform Tests (WPT)
12
+ - **Parsing Tests**: HTML format tests from `wpt/html/syntax/parsing/`
13
+
14
+ ### Benchmark/Compliance Tests
15
+ - **Acid Tests**: Standardized rendering tests (Acid1, Acid2, Acid3)
16
+ - **HTML5 Test Suite**: Comprehensive HTML5 compliance tests
17
+
18
+ ## Test Structure
19
+
20
+ ```
21
+ tests/official/
22
+ ├── html5lib/
23
+ │ ├── tokenizer/ # JSON tokenizer tests
24
+ │ ├── tree-construction/ # DAT tree construction tests
25
+ │ └── utils/ # HTML5lib test utilities
26
+ ├── wpt/ # Web Platform Tests
27
+ ├── acid/ # Acid tests
28
+ ├── benchmarks/ # Performance benchmarks
29
+ └── compliance/ # Compliance test results
30
+ ```
31
+
32
+ ## Test Formats
33
+
34
+ ### HTML5lib Tokenizer Tests (JSON)
35
+ ```json
36
+ {
37
+ "tests": [
38
+ {
39
+ "description": "Test description",
40
+ "input": "input_string",
41
+ "output": [expected_output_tokens],
42
+ "initialStates": [initial_states],
43
+ "lastStartTag": "last_start_tag",
44
+ "errors": [parse_errors]
45
+ }
46
+ ]
47
+ }
48
+ ```
49
+
50
+ ### HTML5lib Tree Construction Tests (DAT)
51
+ ```
52
+ #data
53
+ <html>
54
+ #errors
55
+ (1,6): expected-doctype-but-got-start-tag
56
+ #document
57
+ | <html>
58
+ | <head>
59
+ | <body>
60
+ ```
61
+
62
+ ### Web Platform Tests (HTML)
63
+ Standard HTML files with embedded test assertions and expected results.
64
+
65
+ ## Usage
66
+
67
+ ```bash
68
+ # Run all official tests
69
+ bun test tests/official/
70
+
71
+ # Run specific test suite
72
+ bun test tests/official/html5lib/
73
+ bun test tests/official/wpt/
74
+ bun test tests/official/acid/
75
+
76
+ # Run with coverage
77
+ bun test --coverage tests/official/
78
+ ```
79
+
80
+ ## Test Results
81
+
82
+ Results are automatically generated and stored in `tests/official/compliance/` with detailed reports on:
83
+ - Tokenizer compliance
84
+ - Tree construction compliance
85
+ - Error handling accuracy
86
+ - Performance benchmarks
87
+ - Standards compliance scores
@@ -0,0 +1,309 @@
1
+ import { describe, it, expect } from 'bun:test';
2
+ import { tokenize } from '../../../src/tokenizer';
3
+ import { parse } from '../../../src/parser';
4
+
5
+ describe('Acid Tests Compliance', () => {
6
+ describe('Acid1 Test', () => {
7
+ it('should parse basic HTML structure correctly', () => {
8
+ const acid1Html = `
9
+ <!DOCTYPE html>
10
+ <html>
11
+ <head>
12
+ <title>Acid1 Test</title>
13
+ </head>
14
+ <body>
15
+ <div>
16
+ <p>Hello <b>World</b></p>
17
+ <table>
18
+ <tr>
19
+ <td>Cell 1</td>
20
+ <td>Cell 2</td>
21
+ </tr>
22
+ </table>
23
+ </div>
24
+ </body>
25
+ </html>
26
+ `;
27
+
28
+ const tokens = tokenize(acid1Html);
29
+ const ast = parse(tokens);
30
+
31
+ expect(ast).toBeDefined();
32
+ expect((ast as any).type).toBe('DOCUMENT');
33
+ expect((ast as any).children?.length).toBeGreaterThan(0);
34
+ });
35
+
36
+ it('should handle nested elements', () => {
37
+ const nestedHtml = `
38
+ <div>
39
+ <p>Text <strong>bold <em>italic</em></strong> more text</p>
40
+ </div>
41
+ `;
42
+
43
+ const tokens = tokenize(nestedHtml);
44
+ const ast = parse(tokens);
45
+
46
+ expect(ast).toBeDefined();
47
+ expect(ast.children?.length).toBeGreaterThan(0);
48
+ });
49
+
50
+ it('should handle self-closing tags', () => {
51
+ const selfClosingHtml = `
52
+ <div>
53
+ <img src="test.jpg" alt="test">
54
+ <br>
55
+ <hr>
56
+ </div>
57
+ `;
58
+
59
+ const tokens = tokenize(selfClosingHtml);
60
+ const ast = parse(tokens);
61
+
62
+ expect(ast).toBeDefined();
63
+ });
64
+ });
65
+
66
+ describe('Acid2 Test', () => {
67
+ it('should handle CSS and more complex HTML', () => {
68
+ const acid2Html = `
69
+ <!DOCTYPE html>
70
+ <html>
71
+ <head>
72
+ <style>
73
+ body { margin: 0; }
74
+ .test { color: red; }
75
+ </style>
76
+ </head>
77
+ <body>
78
+ <div class="test">
79
+ <span>Styled text</span>
80
+ </div>
81
+ </body>
82
+ </html>
83
+ `;
84
+
85
+ const tokens = tokenize(acid2Html);
86
+ const ast = parse(tokens);
87
+
88
+ expect(ast).toBeDefined();
89
+ expect((ast as any).type).toBe('DOCUMENT');
90
+ });
91
+
92
+ it('should handle complex table structures', () => {
93
+ const complexTable = `
94
+ <table>
95
+ <thead>
96
+ <tr>
97
+ <th colspan="2">Header</th>
98
+ </tr>
99
+ </thead>
100
+ <tbody>
101
+ <tr>
102
+ <td rowspan="2">Cell 1</td>
103
+ <td>Cell 2</td>
104
+ </tr>
105
+ <tr>
106
+ <td>Cell 3</td>
107
+ </tr>
108
+ </tbody>
109
+ </table>
110
+ `;
111
+
112
+ const tokens = tokenize(complexTable);
113
+ const ast = parse(tokens);
114
+
115
+ expect(ast).toBeDefined();
116
+ });
117
+ });
118
+
119
+ describe('Acid3 Test', () => {
120
+ it('should handle advanced HTML5 features', () => {
121
+ const acid3Html = `
122
+ <!DOCTYPE html>
123
+ <html>
124
+ <head>
125
+ <meta charset="UTF-8">
126
+ <title>Acid3 Test</title>
127
+ </head>
128
+ <body>
129
+ <article>
130
+ <header>
131
+ <h1>Article Title</h1>
132
+ </header>
133
+ <section>
134
+ <p>Article content</p>
135
+ </section>
136
+ <footer>
137
+ <p>Footer content</p>
138
+ </footer>
139
+ </article>
140
+ </body>
141
+ </html>
142
+ `;
143
+
144
+ const tokens = tokenize(acid3Html);
145
+ const ast = parse(tokens);
146
+
147
+ expect(ast).toBeDefined();
148
+ expect((ast as any).type).toBe('DOCUMENT');
149
+ });
150
+
151
+ it('should handle HTML5 semantic elements', () => {
152
+ const semanticHtml = `
153
+ <main>
154
+ <nav>
155
+ <ul>
156
+ <li><a href="#home">Home</a></li>
157
+ <li><a href="#about">About</a></li>
158
+ </ul>
159
+ </nav>
160
+ <aside>
161
+ <p>Sidebar content</p>
162
+ </aside>
163
+ </main>
164
+ `;
165
+
166
+ const tokens = tokenize(semanticHtml);
167
+ const ast = parse(tokens);
168
+
169
+ expect(ast).toBeDefined();
170
+ });
171
+ });
172
+ });
173
+
174
+ describe('Quirks Mode Tests', () => {
175
+ it('should handle quirks mode HTML', () => {
176
+ const quirksHtml = `
177
+ <html>
178
+ <body>
179
+ <div>
180
+ <p>No DOCTYPE - should trigger quirks mode
181
+ <p>Unclosed paragraphs
182
+ <div>Nested without proper closing
183
+ </div>
184
+ </body>
185
+ </html>
186
+ `;
187
+
188
+ const tokens = tokenize(quirksHtml);
189
+ const ast = parse(tokens);
190
+
191
+ expect(ast).toBeDefined();
192
+ expect((ast as any).type).toBe('DOCUMENT');
193
+ });
194
+
195
+ it('should handle malformed HTML gracefully', () => {
196
+ const malformedHtml = `
197
+ <div>
198
+ <p>Unclosed paragraph
199
+ <span>Unclosed span
200
+ <b>Bold text
201
+ <i>Italic text
202
+ </div>
203
+ `;
204
+
205
+ const tokens = tokenize(malformedHtml);
206
+ const ast = parse(tokens);
207
+
208
+ expect(ast).toBeDefined();
209
+ });
210
+
211
+ it('should handle mismatched tags', () => {
212
+ const mismatchedHtml = `
213
+ <div>
214
+ <p>Paragraph</div>
215
+ <span>Span</p>
216
+ </span>
217
+ `;
218
+
219
+ const tokens = tokenize(mismatchedHtml);
220
+ const ast = parse(tokens);
221
+
222
+ expect(ast).toBeDefined();
223
+ });
224
+ });
225
+
226
+ describe('Performance Benchmarks', () => {
227
+ it('should parse small HTML quickly', () => {
228
+ const smallHtml = '<div><p>Hello World</p></div>';
229
+
230
+ const start = performance.now();
231
+ const tokens = tokenize(smallHtml);
232
+ const ast = parse(tokens);
233
+ const end = performance.now();
234
+
235
+ expect(ast).toBeDefined();
236
+ expect(end - start).toBeLessThan(10); // Should be very fast
237
+ });
238
+
239
+ it('should handle medium-sized HTML', () => {
240
+ const mediumHtml = Array(100).fill('<div><p>Content</p></div>').join('');
241
+
242
+ const start = performance.now();
243
+ const tokens = tokenize(mediumHtml);
244
+ const ast = parse(tokens);
245
+ const end = performance.now();
246
+
247
+ expect(ast).toBeDefined();
248
+ expect(end - start).toBeLessThan(100); // Should still be fast
249
+ });
250
+
251
+ it('should handle large HTML documents', () => {
252
+ const largeHtml = Array(1000).fill('<div><p>Large content</p></div>').join('');
253
+
254
+ const start = performance.now();
255
+ const tokens = tokenize(largeHtml);
256
+ const ast = parse(tokens);
257
+ const end = performance.now();
258
+
259
+ expect(ast).toBeDefined();
260
+ expect(end - start).toBeLessThan(1000); // Should complete within 1 second
261
+ });
262
+
263
+ it('should handle deeply nested HTML', () => {
264
+ let deepHtml = '';
265
+ for (let i = 0; i < 100; i++) {
266
+ deepHtml += '<div>';
267
+ }
268
+ deepHtml += 'Deep content';
269
+ for (let i = 0; i < 100; i++) {
270
+ deepHtml += '</div>';
271
+ }
272
+
273
+ const start = performance.now();
274
+ const tokens = tokenize(deepHtml);
275
+ const ast = parse(tokens);
276
+ const end = performance.now();
277
+
278
+ expect(ast).toBeDefined();
279
+ expect(end - start).toBeLessThan(500); // Should handle deep nesting
280
+ });
281
+ });
282
+
283
+ describe('Memory Usage Tests', () => {
284
+ it('should not leak memory on repeated parsing', () => {
285
+ const testHtml = '<div><p>Memory test</p></div>';
286
+
287
+ // Parse the same HTML multiple times
288
+ for (let i = 0; i < 1000; i++) {
289
+ const tokens = tokenize(testHtml);
290
+ const ast = parse(tokens);
291
+ expect(ast).toBeDefined();
292
+ }
293
+
294
+ // If we get here without crashing, memory is likely managed well
295
+ expect(true).toBe(true);
296
+ });
297
+
298
+ it('should handle multiple large documents', () => {
299
+ const largeHtml = Array(500).fill('<div><p>Large content</p></div>').join('');
300
+
301
+ for (let i = 0; i < 10; i++) {
302
+ const tokens = tokenize(largeHtml);
303
+ const ast = parse(tokens);
304
+ expect(ast).toBeDefined();
305
+ }
306
+
307
+ expect(true).toBe(true);
308
+ });
309
+ });