@akotliar/sitemap-qa 1.0.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs ADDED
@@ -0,0 +1,484 @@
1
+ #!/usr/bin/env node
2
+ "use strict";var ue=Object.create;var F=Object.defineProperty;var de=Object.getOwnPropertyDescriptor;var pe=Object.getOwnPropertyNames;var ge=Object.getPrototypeOf,fe=Object.prototype.hasOwnProperty;var he=(e,t,r,s)=>{if(t&&typeof t=="object"||typeof t=="function")for(let n of pe(t))!fe.call(e,n)&&n!==r&&F(e,n,{get:()=>t[n],enumerable:!(s=de(t,n))||s.enumerable});return e};var M=(e,t,r)=>(r=e!=null?ue(ge(e)):{},he(t||!e||!e.__esModule?F(r,"default",{value:e,enumerable:!0}):r,e));var tr=require("dotenv/config"),me=require("commander");var ce=require("commander"),D=require("fs"),k=M(require("ora"),1),x=M(require("chalk"),1);var E=require("fs/promises"),_=require("fs"),A=require("path"),O=require("os");var z={timeout:30,concurrency:10,outputFormat:"html",outputDir:"./sitemap-qa/report",verbose:!1,baseUrl:"https://example.com",acceptedPatterns:[]};async function G(e){let t={...z},r=(0,A.join)((0,O.homedir)(),".sitemap-qa","config.json");if((0,_.existsSync)(r))try{let a=JSON.parse(await(0,E.readFile)(r,"utf-8"));t={...t,...a}}catch(a){console.warn(`Warning: Failed to load global config: ${a}`)}let s=(0,A.join)(process.cwd(),".sitemap-qa.config.json");if((0,_.existsSync)(s))try{let a=JSON.parse(await(0,E.readFile)(s,"utf-8"));t={...t,...a}}catch(a){console.warn(`Warning: Failed to load project config: ${a}`)}let n=ye();return t={...t,...n},t=ve(t,e),e.baseUrl&&(t.baseUrl=e.baseUrl),be(t),t}function ye(){let e={};return process.env.SITEMAP_VERIFY_TIMEOUT&&(e.timeout=parseInt(process.env.SITEMAP_VERIFY_TIMEOUT,10)),e}function ve(e,t){let r={...e};return t.timeout&&t.timeout!=="30"&&(r.timeout=parseInt(t.timeout,10)),t.output&&(r.outputFormat=t.output),t.outputDir&&(r.outputDir=t.outputDir),t.verbose===!0&&(r.verbose=!0),t.acceptedPatterns&&(r.acceptedPatterns=t.acceptedPatterns.split(",").map(s=>s.trim()).filter(Boolean)),r}function be(e){if(e.timeout<1||e.timeout>300)throw new Error("Timeout must be between 1 and 300 seconds");if(!["json","html"].includes(e.outputFormat))throw new Error("Output format must be json or html")}var U=class extends Error{constructor(r,s){super(`Network request failed for ${r}: ${s.message}`);this.url=r;this.originalError=s;this.name="NetworkError"}code="NETWORK_ERROR"},w=class extends Error{constructor(r,s,n){let a=`HTTP ${s} error for ${r}`;s===403&&(a+=`
3
+ Note: 403 Forbidden often indicates bot protection (Cloudflare, etc.) or access restrictions`);super(a);this.url=r;this.statusCode=s;this.statusText=n;this.name="HttpError"}code="HTTP_ERROR"};var q=require("playwright");async function we(e,t){let r;try{r=await q.chromium.launch({headless:!0,args:["--disable-blink-features=AutomationControlled","--disable-dev-shm-usage","--no-sandbox"]});let n=await(await r.newContext({userAgent:"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",viewport:{width:1920,height:1080},locale:"en-US",timezoneId:"America/New_York",extraHTTPHeaders:{Accept:"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8","Accept-Language":"en-US,en;q=0.9","Accept-Encoding":"gzip, deflate, br",DNT:"1",Connection:"keep-alive","Upgrade-Insecure-Requests":"1"}})).newPage();await n.addInitScript(()=>{Object.defineProperty(navigator,"webdriver",{get:()=>!1}),window.chrome={runtime:{}};let m=window.navigator.permissions.query;window.navigator.permissions.query=l=>l.name==="notifications"?Promise.resolve({state:Notification.permission}):m(l)}),n.setDefaultTimeout(t*1e3);let a=await n.goto(e,{waitUntil:"domcontentloaded",timeout:t*1e3});if(!a)throw new Error("No response received from page");let c=a.status(),o=await n.content(),i=n.url();if(await r.close(),c>=200&&c<300)return{content:o,statusCode:c,url:i};throw new w(i,c)}catch(s){throw r&&await r.close(),s.code==="HTTP_ERROR"?s:new U(e,s)}}async function S(e,t={}){let{timeout:r=30,maxRetries:s=3,retryDelay:n=1e3,useBrowser:a=!1}=t;new URL(e);let c=[408,429,500,502,503,504],o=null,i=!1;for(let m=0;m<=s;m++){try{if(a||i)return await we(e,r);let l=new AbortController,d=setTimeout(()=>l.abort(),r*1e3),p=await fetch(e,{method:"GET",headers:{"User-Agent":"sitemap-qa/1.0.0",Accept:"text/xml,application/xml,text/plain,*/*"},signal:l.signal,redirect:"follow"});clearTimeout(d);let u=p.status,g=await p.text();if(u>=200&&u<300)return{content:g,statusCode:u,url:p.url};if(u===403&&!i){i=!0;continue}if(!c.includes(u))throw new w(p.url,u);o=new w(p.url,u)}catch(l){if(l.code==="HTTP_ERROR"){let d=l;if(!c.includes(d.statusCode))throw l;o=l}else o=new U(e,l);if(m===s)break}if(m<s){let l=n*Math.pow(2,m);await new Promise(d=>setTimeout(d,l))}}throw o}function xe(e){return new URL(e).origin}async function Re(e,t){let r=new URL(e),n=r.hostname.startsWith("www.")?r.hostname.substring(4):`www.${r.hostname}`,a=`${r.protocol}//${n}/robots.txt`;try{let c=await S(a,{timeout:t.timeout,maxRetries:1});return c.statusCode===200||c.statusCode===404?n:r.hostname}catch(c){return c instanceof w&&c.statusCode===301,r.hostname}}async function j(e,t){let r=new URL(e).origin,s=[],n=["/sitemap.xml","/sitemap_index.xml","/sitemap-index.xml"],a=await Promise.allSettled(n.map(async c=>{let o=`${r}${c}`;try{return(await S(o,{timeout:t.timeout,maxRetries:1})).statusCode===200?(t.verbose&&console.log(`\u2713 Found sitemap at: ${o}`),{found:!0,url:o}):{found:!1}}catch(i){return i instanceof w?i.statusCode===401||i.statusCode===403?(s.push({url:o,statusCode:i.statusCode,error:i.statusCode===401?"Unauthorized":"Access Denied"}),t.verbose&&console.log(`\u26A0 Access denied: ${o} (${i.statusCode})`)):t.verbose&&console.log(`\u2717 Not found: ${o} (${i.statusCode})`):t.verbose&&console.log(`\u2717 Not found: ${o}`),{found:!1}}}));for(let c of a)if(c.status==="fulfilled"&&c.value.found)return{sitemaps:[c.value.url],issues:s};return t.verbose&&console.log("No sitemap found at standard paths"),{sitemaps:[],issues:s}}async function H(e,t){let r=`${new URL(e).origin}/robots.txt`;try{let n=(await S(r,{timeout:t.timeout,maxRetries:1})).content.split(`
4
+ `),a=[];for(let c of n){let o=c.match(/^Sitemap:\s*(.+)$/i);if(o){let i=o[1].trim();try{new URL(i),a.push(i)}catch{t.verbose&&console.warn(`Invalid sitemap URL in robots.txt: ${i}`)}}}return t.verbose&&a.length>0&&console.log(`Found ${a.length} sitemap(s) in robots.txt`),a}catch{return t.verbose&&console.log(`No robots.txt found at ${r}`),[]}}function Se(e){if(e.includes("<sitemapindex"))return!0;if(e.includes("<urlset")){let t=/<url[^>]*>.*?<loc>([^<]+)<\/loc>.*?<\/url>/gs,r=Array.from(e.matchAll(t)),s=Math.min(5,r.length),n=0;for(let a=0;a<s;a++){let c=r[a][1].trim().toLowerCase();(c.includes("sitemap")||c.endsWith(".xml"))&&n++}return n>s/2}return!1}function ke(e){let t=[];if(e.includes("<sitemapindex")){let r=/<sitemap[^>]*>(.*?)<\/sitemap>/gs,s;for(;(s=r.exec(e))!==null;){let n=/<loc>([^<]+)<\/loc>/i.exec(s[1]);if(n){let a=n[1].trim();try{new URL(a),t.push(a)}catch{}}}}else{let r=/<url[^>]*>(.*?)<\/url>/gs,s;for(;(s=r.exec(e))!==null;){let n=/<loc>([^<]+)<\/loc>/i.exec(s[1]);if(n){let a=n[1].trim();if(a.toLowerCase().includes("sitemap")||a.toLowerCase().endsWith(".xml"))try{new URL(a),t.push(a)}catch{}}}}return t}async function C(e,t,r,s,n=10){let a=[],c=[...e],o=new Set,i=new Set,m=new Set,l=s,d=5;for(;c.length>0;){let b=c.splice(0,Math.min(d,c.length));if(await Promise.all(b.map(async h=>{if(o.has(h)){t.verbose&&console.warn(`Skipping duplicate sitemap: ${h}`);return}o.add(h);try{let y=await S(h,{timeout:t.timeout,maxRetries:2});if(Se(y.content)){t.verbose&&console.log(`Found sitemap index: ${h}`);let R=ke(y.content);c.push(...R),t.verbose&&console.log(` \u2514\u2500 Contains ${R.length} child sitemap(s)`)}else a.push(h),t.verbose&&console.log(`\u2713 Discovered sitemap: ${h}`)}catch(y){if(y instanceof w&&y.statusCode===301?m.add(h):i.add(h),t.verbose){let R=y instanceof Error?y.message:String(y);if(y instanceof w&&y.statusCode===301){l||(l=await Re(r,t),t.verbose&&console.log(`Canonical domain detected: ${l}`));try{let T=new URL(h);T.hostname!==l?(console.warn(`\u26A0\uFE0F Sitemap URL redirects (301): ${h}`),console.warn(" Problem: The sitemap index contains a URL that redirects."),console.warn(` Likely issue: Domain mismatch - expected "${l}" but got "${T.hostname}"`),console.warn(` Fix: Update sitemap index to use "https://${l}${T.pathname}"`)):(console.warn(`\u26A0\uFE0F Sitemap URL redirects (301): ${h}`),console.warn(" Fix: Update the sitemap index to reference the final URL after redirect."))}catch{console.warn(`Failed to fetch sitemap ${h}: ${R}`)}}else console.warn(`Failed to fetch sitemap ${h}: ${R}`)}}})),o.size>1e3){console.warn("\u26A0\uFE0F Processed over 1000 sitemap URLs. Stopping to prevent excessive requests.");break}}let p=o.size,u=i.size,g=m.size,f=p-a.length-u-g;return a.length===0&&p>0&&(console.warn(`
5
+ \u26A0\uFE0F SITEMAP DISCOVERY ISSUE`),f>0&&(u>0||g>0)?(console.warn(`Found ${f} sitemap index(es) containing ${u+g} child sitemap(s):`),g>0&&console.warn(` - ${g} sitemap(s) return 301 redirects (content not accessible without following redirect)`),u>0&&console.warn(` - ${u} sitemap(s) returned errors (404, 403, 500, or network issues)`)):g>0?console.warn(`All ${g} sitemap(s) return 301 redirects.`):u>0?(console.warn(`All ${u} sitemap(s) returned errors.`),console.warn(`
6
+ Common causes:`),console.warn(" - 403 Forbidden: Bot protection (Cloudflare, etc.) or IP blocking"),console.warn(" - 404 Not Found: Sitemaps don't exist at these URLs"),console.warn(" - 500/502/503: Server errors or maintenance"),console.warn(`
7
+ If sitemaps work in your browser but not here, the site likely has bot protection.`),console.warn("Try: Check if sitemaps load without JavaScript, or contact site administrator.")):console.warn(`Processed ${p} URL(s) but found no accessible sitemaps.`),console.warn(`
8
+ Note: This tool does not follow redirects for sitemap URLs.`),g>0&&(console.warn(`
9
+ Possible causes of redirects:`),console.warn(" - Sitemap index uses non-canonical domain (e.g., missing 'www' or vice versa)"),console.warn(" - Sitemap URLs redirect from HTTP to HTTPS"),console.warn(" - Intentional redirects in your site configuration"),console.warn(`
10
+ Recommendation: Update sitemap index URLs to match the final destination (no redirects).`)),console.warn("")),{sitemaps:a,canonicalDomain:l}}async function V(e,t){let r=xe(e),s=[],n;t.verbose&&console.log("Strategy 1: Checking robots.txt for sitemap directives...");let a=await H(r,t);if(a.length>0){let{sitemaps:m,canonicalDomain:l}=await C(a,t,r,n);return n=l,{sitemaps:m,source:"robots-txt",accessIssues:[],canonicalDomain:n}}t.verbose&&console.log("Strategy 2: Trying standard sitemap paths...");let{sitemaps:c,issues:o,redirectedToCanonical:i}=await j(r,t);if(s=o,c.length>0){let{sitemaps:m,canonicalDomain:l}=await C(c,t,r,n);return n=l,{sitemaps:m,source:"standard-path",accessIssues:[],canonicalDomain:n}}if(i){let m=`https://${i}`;console.log(`
11
+ \u{1F4A1} All requests redirected. Retrying with canonical domain: ${i}
12
+ `);let l=await H(m,t);if(l.length>0){let{sitemaps:p,canonicalDomain:u}=await C(l,t,m,i);return{sitemaps:p,source:"robots-txt",accessIssues:[],canonicalDomain:u||i}}let{sitemaps:d}=await j(m,t);if(d.length>0){let{sitemaps:p,canonicalDomain:u}=await C(d,t,m,i);return{sitemaps:p,source:"standard-path",accessIssues:[],canonicalDomain:u||i}}}return{sitemaps:[],source:"none",accessIssues:s,canonicalDomain:n}}var $=require("fast-xml-parser"),Ue=new $.XMLParser({ignoreAttributes:!1,attributeNamePrefix:"@_",textNodeName:"_text",parseAttributeValue:!0,trimValues:!0,allowBooleanAttributes:!0,parseTagValue:!1});function Ce(e,t){let r=[];if(e.urlset){let s=Array.isArray(e.urlset.url)?e.urlset.url:[e.urlset.url];for(let n of s)!n||!n.loc||r.push({loc:n.loc,lastmod:n.lastmod,changefreq:n.changefreq,priority:n.priority?parseFloat(n.priority):void 0,source:t})}return r}async function B(e,t){let r=[];try{let s=$.XMLValidator.validate(e);if(s!==!0){let o=typeof s=="object"?s.err.msg:"Invalid XML";return{urls:[],errors:[`[${t}] XML parsing failed: ${o}`],totalCount:0,sitemapUrl:t}}let n=Ue.parse(e),a=Ce(n,t),c=[];for(let o of a)try{new URL(o.loc),o.priority!==void 0&&(o.priority<0||o.priority>1)&&(r.push(`Invalid priority ${o.priority} for ${o.loc} - clamping to 0-1`),o.priority=Math.max(0,Math.min(1,o.priority))),o.changefreq&&(["always","hourly","daily","weekly","monthly","yearly","never"].includes(o.changefreq.toLowerCase())||(r.push(`Invalid changefreq "${o.changefreq}" for ${o.loc}`),o.changefreq=void 0)),c.push(o)}catch{r.push(`Invalid URL format: ${o.loc}`)}return{urls:c,errors:r,totalCount:c.length,sitemapUrl:t}}catch(s){let n=s instanceof Error?s.message:String(s);return{urls:[],errors:[`[${t}] XML parsing failed: ${n}`],totalCount:0,sitemapUrl:t}}}async function W(e,t){let r=[],s=[],n=0,a=0;t.verbose&&console.log(`
13
+ Extracting URLs from ${e.length} sitemap(s)...`);let o=await $e(e,10,async i=>{try{t.verbose&&console.log(`Extracting URLs from: ${i}`);let m=await S(i,{timeout:t.timeout,maxRetries:2}),l=await B(m.content,i),d=l.urls.map(p=>({...p,extractedAt:new Date().toISOString()}));return t.verbose&&console.log(` \u2713 Extracted ${l.urls.length} URLs from ${i}`),{success:!0,urls:d,errors:l.errors}}catch(m){let l=`Failed to process ${i}: ${m instanceof Error?m.message:String(m)}`;return t.verbose&&console.error(` \u2717 ${l}`),{success:!1,urls:[],errors:[l]}}});for(let i of o)i.success?(n++,r.push(...i.urls)):a++,s.push(...i.errors);return t.verbose&&(console.log(`
14
+ Extraction complete:`),console.log(` - Sitemaps processed: ${n}`),console.log(` - Sitemaps failed: ${a}`),console.log(` - Total URLs: ${r.length}`),console.log(` - Errors: ${s.length}`)),{allUrls:r,sitemapsProcessed:n,sitemapsFailed:a,totalUrls:r.length,errors:s}}async function $e(e,t,r){let s=[];for(let n=0;n<e.length;n+=t){let a=e.slice(n,n+t),c=await Promise.all(a.map(r));s.push(...c)}return s}function Pe(e){try{let t=new URL(e),r=t.pathname;r.endsWith("/")&&r!=="/"&&(r=r.slice(0,-1));let s=Array.from(t.searchParams.entries()).sort(([a],[c])=>a.localeCompare(c)),n=new URLSearchParams(s);return`${t.protocol}//${t.host}${r}${n.toString()?"?"+n.toString():""}${t.hash}`}catch{return e}}function Te(e){if(e.length===1)return e[0];let t={...e[0]},r=e.map(o=>o.source);t.source=r.join(", ");let s=e.map(o=>o.lastmod).filter(o=>!!o).map(o=>new Date(o).getTime()).sort((o,i)=>i-o);s.length>0&&(t.lastmod=new Date(s[0]).toISOString());let n=e.map(o=>o.priority).filter(o=>o!==void 0);n.length>0&&(t.priority=Math.max(...n));let a=e.map(o=>o.changefreq).filter(o=>!!o);if(a.length>0){let o=new Map;for(let m of a)o.set(m,(o.get(m)||0)+1);let i=Array.from(o.entries()).sort((m,l)=>l[1]-m[1]);t.changefreq=i[0][0]}let c=e.map(o=>o.extractedAt).filter(o=>!!o).map(o=>new Date(o).getTime()).sort((o,i)=>i-o);return c.length>0&&(t.extractedAt=new Date(c[0]).toISOString()),t}function J(e,t=!1){let r=e.length;t&&console.log(`
15
+ Consolidating ${e.length} URL(s)...`);let s=new Map;for(let c of e){let o=Pe(c.loc);s.has(o)||s.set(o,[]),s.get(o).push(c)}let n=[],a=[];for(let[c,o]of s.entries()){let i=Te(o);n.push(i),o.length>1&&a.push({url:c,count:o.length,sources:o.map(m=>m.source)})}if(t&&(console.log("Consolidation complete:"),console.log(` - Input URLs: ${r}`),console.log(` - Unique URLs: ${n.length}`),console.log(` - Duplicates removed: ${r-n.length}`),a.length>0)){console.log(`
16
+ Top duplicates:`);let c=a.sort((o,i)=>i.count-o.count).slice(0,5);for(let o of c)console.log(` - ${o.url} (${o.count} times)`)}return{uniqueUrls:n,totalInputUrls:r,duplicatesRemoved:r-n.length,duplicateGroups:a}}var Q=[{name:"Authentication Parameter",category:"sensitive_params",severity:"high",regex:/[?&](token|auth|key|password|secret|apikey|session|credentials)=/i,description:"Query parameter may contain sensitive authentication data"},{name:"Debug Parameter",category:"sensitive_params",severity:"medium",regex:/[?&](debug|trace|verbose|test_mode)=/i,description:"Query parameter may contain debug or diagnostic flag"},{name:"HTTP in HTTPS Site",category:"protocol_inconsistency",severity:"medium",regex:/^http:\/\//,description:"HTTP URL in HTTPS sitemap (potential mixed content)"},{name:"Test Content Path",category:"test_content",severity:"medium",regex:/\/(?:test-|demo-|sample-|temp-|temporary-|placeholder-)|\/(test|demo|sample|temp|temporary|placeholder)(?:\/|$)/i,description:"URL path suggests test, demo, or unfinished content that may not be intended for indexing"}];function L(e){return e.replace(/[.*+?^${}()|[\]\\]/g,"\\$&")}function Ee(e){let t=e.split(".");return t.length>=2?t.slice(-2).join("."):e}function K(e,t){let r=new URL(e).hostname,s=Ee(r);if(t?.allowedSubdomains&&t.allowedSubdomains.length>0){let c=L(s),i=`^https?://(?!(?:(?:${t.allowedSubdomains.map(L).join("|")})\\.)?${c}(?:/|$))`;return{name:"Domain Mismatch",category:"domain_mismatch",severity:"high",regex:new RegExp(i),description:"URL does not match expected domain or allowed subdomains"}}let a=`^https?://(?!(?:www\\.)?${L(s)}(?:/|$))`;return{name:"Domain Mismatch",category:"domain_mismatch",severity:"high",regex:new RegExp(a),description:`URL does not match expected domain: ${s} (including www variant)`}}var Y=[{name:"Staging Subdomain",category:"environment_leakage",severity:"high",regex:/^https?:\/\/(staging|stg)\./i,description:"URL uses staging subdomain"},{name:"Development Subdomain",category:"environment_leakage",severity:"high",regex:/^https?:\/\/(dev|development)\./i,description:"URL uses development subdomain"},{name:"QA/Test Subdomain",category:"environment_leakage",severity:"high",regex:/^https?:\/\/(qa|test|uat|preprod)\./i,description:"URL uses test environment subdomain"},{name:"Localhost URL",category:"environment_leakage",severity:"high",regex:/^https?:\/\/(localhost|127\.0\.0\.1|0\.0\.0\.0)/,description:"URL points to localhost (development environment)"},{name:"Environment in Path",category:"environment_leakage",severity:"high",regex:/^https?:\/\/[^/]+\/(staging|dev|qa|uat|preprod)\//i,description:"URL path contains environment identifier at root level"}];var X=[{name:"Admin Path",category:"admin_paths",severity:"high",regex:/\/(admin|administrator)(?:\/|$|\?)/i,description:"URL contains /admin or /administrator as a path segment"},{name:"Dashboard Path",category:"admin_paths",severity:"high",regex:/\/dashboard(?:\/|$|\?)/i,description:"URL contains /dashboard as a path segment"},{name:"Config Path",category:"admin_paths",severity:"high",regex:/\/(config|configuration)(?:\/|$|\?)/i,description:"URL contains /config or /configuration as a path segment"},{name:"Console Path",category:"admin_paths",severity:"high",regex:/\/console(?:\/|$|\?)/i,description:"URL contains /console as a path segment"},{name:"Control Panel Path",category:"admin_paths",severity:"high",regex:/\/(cpanel|control-panel)(?:\/|$|\?)/i,description:"URL contains control panel as a path segment"}],Z=[{name:"Internal Content Path",category:"internal_content",severity:"medium",regex:/\/internal\b/i,description:"URL contains /internal path segment - may be internal-only content not intended for public indexing"}],ee=[{name:"Authentication Token Parameter",category:"sensitive_params",severity:"high",regex:/[?&](token|auth_token|access_token|api_token)=/i,description:"Query parameter may contain authentication token"},{name:"API Key Parameter",category:"sensitive_params",severity:"high",regex:/[?&](apikey|api_key|key)=/i,description:"Query parameter may contain API key"},{name:"Password Parameter",category:"sensitive_params",severity:"high",regex:/[?&](password|passwd|pwd)=/i,description:"Query parameter may contain password"},{name:"Secret Parameter",category:"sensitive_params",severity:"high",regex:/[?&](secret|client_secret)=/i,description:"Query parameter may contain secret value"},{name:"Session Parameter",category:"sensitive_params",severity:"high",regex:/[?&](session|sessionid|sid)=/i,description:"Query parameter may contain session identifier"},{name:"Credentials Parameter",category:"sensitive_params",severity:"high",regex:/[?&]credentials=/i,description:"Query parameter may contain credentials"},{name:"Debug Parameter",category:"sensitive_params",severity:"medium",regex:/[?&](debug|trace|verbose)=/i,description:"Query parameter contains debug or diagnostic flag"},{name:"Test Mode Parameter",category:"sensitive_params",severity:"medium",regex:/[?&](test_mode|test|testing)=/i,description:"Query parameter indicates test mode"}];function te(e){try{let t=new URL(e),r=["token","auth","auth_token","access_token","api_token","apikey","api_key","key","password","passwd","pwd","secret","client_secret","session","sessionid","sid","credentials"];for(let s of r)t.searchParams.has(s)&&t.searchParams.set(s,"[REDACTED]");return t.toString()}catch{return e}}function _e(e,t,r){switch(e){case"environment_leakage":return{rationale:`Production sitemap contains ${r} URL(s) from non-production environments (staging, dev, QA, test). This indicates configuration errors or environment leakage.`,recommendedAction:"Verify sitemap generation excludes non-production environments. Review deployment configuration and environment filtering rules."};case"admin_paths":return{rationale:`${r} administrative path(s) detected in public sitemap (admin, dashboard, config). These paths may expose privileged access points.`,recommendedAction:"Confirm if admin paths should be publicly indexed. Consider excluding via robots.txt or removing from sitemap. Verify access controls."};case"internal_content":return{rationale:`${r} URL(s) contain "internal" in the path. These may be internal-facing content not intended for public indexing.`,recommendedAction:"Review URLs to determine if they should be publicly accessible. Consider excluding internal content from sitemap or adding noindex meta tags."};case"test_content":return{rationale:`${r} URL(s) contain test/demo/sample identifiers. These may be placeholder or unfinished content not intended for indexing.`,recommendedAction:"Review and remove test content from production sitemaps. Verify content is production-ready before including in sitemap."};case"sensitive_params":return{rationale:`${r} URL(s) contain sensitive query parameters (token, auth, key, password, session). This may expose authentication credentials or debugging flags.`,recommendedAction:"Review why sensitive parameters are in sitemap URLs. Remove authentication tokens from URLs. Consider POST requests for sensitive data."};case"protocol_inconsistency":return{rationale:`${r} URL(s) use HTTP protocol in HTTPS sitemap. This creates mixed content warnings and potential security issues.`,recommendedAction:"Update URLs to use HTTPS consistently. Verify SSL certificate coverage. Check for hardcoded HTTP URLs in content."};case"domain_mismatch":return{rationale:`${r} URL(s) do not match expected base domain. This may indicate external links, CDN URLs, or configuration errors.`,recommendedAction:"Verify if external domains are intentional. Review sitemap generation logic. Confirm CDN or subdomain configuration is correct."};default:return{rationale:`${r} URL(s) flagged in category: ${e}`,recommendedAction:"Review flagged URLs and determine appropriate action."}}}function P(e,t=5){let r=new Map;for(let i of e)r.has(i.category)||r.set(i.category,[]),r.get(i.category).push(i);let s=[];for(let[i,m]of r.entries()){let l=Array.from(new Set(m.map(f=>f.url))),d=m.reduce((f,b)=>{let h=["low","medium","high"];return h.indexOf(b.severity)>h.indexOf(f)?b.severity:f},"low"),p=l.slice(0,t),{rationale:u,recommendedAction:g}=_e(i,d,l.length);s.push({category:i,severity:d,count:l.length,rationale:u,sampleUrls:p,recommendedAction:g,allUrls:l})}s.sort((i,m)=>{let l=["high","medium","low"];return l.indexOf(i.severity)-l.indexOf(m.severity)});let n=new Set(e.map(i=>i.url)).size,a=s.filter(i=>i.severity==="high").reduce((i,m)=>i+m.count,0),c=s.filter(i=>i.severity==="medium").reduce((i,m)=>i+m.count,0),o=s.filter(i=>i.severity==="low").reduce((i,m)=>i+m.count,0);return{groups:s,totalRiskUrls:n,highSeverityCount:a,mediumSeverityCount:c,lowSeverityCount:o}}async function re(e,t,r){let s=Date.now(),n=[],a=K(t),c=[...Q,...Y,...X,...ee,...Z,a],o=[];if(r.acceptedPatterns&&r.acceptedPatterns.length>0)for(let p of r.acceptedPatterns)try{let u=p.replace(/[.+?^${}()|[\]\\]/g,"\\$&").replace(/\*/g,"[^/]*");!u.endsWith("$")&&!u.includes("(?:")&&(u=u+"(?:/|$|\\?|#)"),o.push(new RegExp(u,"i"))}catch{r.verbose&&console.warn(`Invalid accepted pattern: ${p}`)}if(r.verbose){console.log(`
17
+ Analyzing ${e.length} URLs for risk patterns...`);try{console.log(`Base domain: ${new URL(t).hostname}`)}catch{console.log(`Base URL: ${t}`)}o.length>0&&console.log(`Accepted patterns: ${o.length}`)}let i;try{i=new URL(t).protocol}catch{r.verbose&&console.warn(`Invalid base URL: ${t}, defaulting to https:`),i="https:"}let m=0;for(let p of e){let u=p.loc;m++,(m%1e4===0||m===e.length)&&process.stdout.write(`\r\x1B[K Analyzing: ${m.toLocaleString()}/${e.length.toLocaleString()} URLs...`);let g=!1;for(let f of o)if(f.test(u)){g=!0;break}if(!g)for(let f of c)if(f.category==="protocol_inconsistency")try{let b=new URL(u).protocol;i==="https:"&&b==="http:"&&n.push({url:u,category:f.category,severity:f.severity,pattern:f.name,rationale:f.description,matchedValue:"http://"})}catch{r.verbose&&console.warn(`Skipping invalid URL: ${u}`);continue}else try{let b=u.match(f.regex);b&&n.push({url:f.category==="sensitive_params"?te(u):u,category:f.category,severity:f.severity,pattern:f.name,rationale:f.description,matchedValue:b[0]})}catch(b){r.verbose&&console.error(`Pattern matching failed for ${f.name}: ${b instanceof Error?b.message:String(b)}`);continue}}e.length>=1e4&&process.stdout.write("\r\x1B[K");let l=P(n),d=Date.now()-s;if(r.verbose&&(console.log(`
18
+ Risk Summary:`),console.log(` - Total URLs analyzed: ${e.length}`),console.log(` - Risk URLs found: ${l.totalRiskUrls}`),console.log(` - HIGH severity: ${l.highSeverityCount}`),console.log(` - MEDIUM severity: ${l.mediumSeverityCount}`),console.log(` - LOW severity: ${l.lowSeverityCount}`),console.log(` - Processing time: ${d}ms`),l.groups.length>0)){console.log(`
19
+ Risk Categories Found:`);for(let p of l.groups)console.log(` - ${p.category}: ${p.count} URLs (${p.severity.toUpperCase()})`)}return{findings:n,groups:l.groups,totalUrlsAnalyzed:e.length,riskUrlCount:l.totalRiskUrls,cleanUrlCount:e.length-l.totalRiskUrls,highSeverityCount:l.highSeverityCount,mediumSeverityCount:l.mediumSeverityCount,lowSeverityCount:l.lowSeverityCount,processingTimeMs:d}}function oe(e){let t={high:0,medium:0,low:0},r=e.riskGroups.map(c=>{t[c.severity]+=c.count;let o=c.allUrls||c.sampleUrls;return{category:c.category,count:c.count,severity:c.severity,summary:c.rationale,examples:o.slice(0,3),allUrls:o}}),s=e.riskGroups.reduce((c,o)=>c+o.count,0),n=s>0?`Found ${s} potentially risky URLs across ${e.riskGroups.length} categories in ${e.totalUrls} total URLs.`:`Analyzed ${e.totalUrls} URLs. No suspicious patterns detected.`,a=[];return t.high>0&&a.push(`${t.high} high-severity issues require immediate attention`),t.medium>0&&a.push(`${t.medium} medium-severity issues should be reviewed`),t.low>0&&a.push(`${t.low} low-severity items flagged for awareness`),{overview:n,keyFindings:a,categoryInsights:r,severityBreakdown:t,recommendations:[],generatedBy:"rule-based analysis",metadata:{tokensUsed:0,processingTime:e.processingTime||0,model:"pattern-matching"}}}var Ae="1.0.0-alpha.0";function se(e,t,r,s,n,a,c={}){let{pretty:o=!0,indent:i=2}=c,m=Le(e,t,r,s,n,a),l=Ne(m);return o?JSON.stringify(l,null,i):JSON.stringify(l)}function Le(e,t,r,s,n,a){let c=Ie(n.baseUrl||"unknown",a,e),o=s.map(d=>({category:d.category,severity:d.severity,count:d.count,pattern:d.category,rationale:d.rationale,sampleUrls:d.sampleUrls.slice(0,5),recommendedAction:d.recommendedAction})),i={highSeverityCount:e.severityBreakdown.high,mediumSeverityCount:e.severityBreakdown.medium,lowSeverityCount:e.severityBreakdown.low,totalRiskyUrls:s.reduce((d,p)=>d+p.count,0),overallStatus:De(e.severityBreakdown,r.errors)},m={overview:e.overview,keyFindings:e.keyFindings,recommendations:e.recommendations},l=r.errors.map(Ge);return{analysisMetadata:c,sitemapsDiscovered:t.sitemaps,totalUrlCount:r.totalCount,urlsAnalyzed:r.totalCount,suspiciousGroups:o,riskSummary:m,summary:i,errors:l}}function Ie(e,t,r){return{baseUrl:e,analysisTimestamp:new Date().toISOString(),toolVersion:Ae,executionTimeMs:Date.now()-t,analysisType:r.generatedBy}}function De(e,t){return t.length>0?"errors":e.high+e.medium+e.low>0?"issues_found":"clean"}function Ne(e){return{analysis_metadata:Fe(e.analysisMetadata),sitemaps_discovered:e.sitemapsDiscovered,total_url_count:e.totalUrlCount,urls_analyzed:e.urlsAnalyzed,suspicious_groups:e.suspiciousGroups.map(Me),risk_summary:ze(e.riskSummary),summary:Oe(e.summary),errors:e.errors}}function Fe(e){return{base_url:e.baseUrl,analysis_timestamp:e.analysisTimestamp,tool_version:e.toolVersion,execution_time_ms:e.executionTimeMs,analysis_type:e.analysisType}}function Me(e){return{category:e.category,severity:e.severity,count:e.count,pattern:e.pattern,rationale:e.rationale,sample_urls:e.sampleUrls,recommended_action:e.recommendedAction}}function ze(e){return{overview:e.overview,key_findings:e.keyFindings,recommendations:e.recommendations}}function Oe(e){return{high_severity_count:e.highSeverityCount,medium_severity_count:e.mediumSeverityCount,low_severity_count:e.lowSeverityCount,total_risky_urls:e.totalRiskyUrls,overall_status:e.overallStatus}}function Ge(e){if("code"in e){let t=e,r={code:t.code||"UNKNOWN_ERROR",message:e.message};return"attemptedPaths"in t?r.context={attempted_paths:t.attemptedPaths}:"sitemapUrl"in t&&"lineNumber"in t?r.context={sitemap_url:t.sitemapUrl,line_number:t.lineNumber}:"url"in t&&(r.context={url:t.url}),r}return{code:"UNKNOWN_ERROR",message:e.message}}var ie=require("fs"),qe="1.0.0-alpha.0";function je(e,t,r,s,n,a={}){let c=a.maxUrlsPerGroup??10,o=new Date().toISOString(),i=e.categoryInsights.reduce((u,g)=>u+g.count,0),m=e.categoryInsights.filter(u=>u.severity==="high"),l=e.categoryInsights.filter(u=>u.severity==="medium"),d=e.categoryInsights.filter(u=>u.severity==="low");return`<!DOCTYPE html>
20
+ <html lang="en">
21
+ <head>
22
+ <meta charset="UTF-8">
23
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
24
+ <title>Sitemap QA Report - ${s.baseUrl}</title>
25
+ <style>
26
+ * { margin: 0; padding: 0; box-sizing: border-box; }
27
+ body {
28
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
29
+ line-height: 1.6;
30
+ color: #1f2937;
31
+ background: #ffffff;
32
+ padding: 24px;
33
+ }
34
+ .container {
35
+ max-width: 1400px;
36
+ margin: 0 auto;
37
+ background: white;
38
+ box-shadow: 0 1px 3px rgba(0,0,0,0.05);
39
+ border-radius: 12px;
40
+ overflow: hidden;
41
+ border: 1px solid #e5e7eb;
42
+ }
43
+ .header {
44
+ background: #0f172a;
45
+ color: white;
46
+ padding: 48px 40px;
47
+ border-bottom: 3px solid #3b82f6;
48
+ }
49
+ .header h1 {
50
+ font-size: 1.875rem;
51
+ font-weight: 700;
52
+ margin-bottom: 12px;
53
+ letter-spacing: -0.025em;
54
+ }
55
+ .header .meta {
56
+ opacity: 0.75;
57
+ font-size: 0.875rem;
58
+ font-weight: 400;
59
+ }
60
+ .summary {
61
+ display: grid;
62
+ grid-template-columns: repeat(auto-fit, minmax(240px, 1fr));
63
+ gap: 1px;
64
+ background: #e5e7eb;
65
+ border-bottom: 1px solid #e5e7eb;
66
+ }
67
+ .summary-card {
68
+ background: white;
69
+ padding: 28px 32px;
70
+ text-align: center;
71
+ }
72
+ .summary-card .label {
73
+ font-size: 0.75rem;
74
+ color: #6b7280;
75
+ text-transform: uppercase;
76
+ letter-spacing: 0.05em;
77
+ font-weight: 600;
78
+ margin-bottom: 8px;
79
+ }
80
+ .summary-card .value {
81
+ font-size: 2.25rem;
82
+ font-weight: 700;
83
+ color: #0f172a;
84
+ font-variant-numeric: tabular-nums;
85
+ }
86
+ .content { padding: 40px; }
87
+ .status-clean {
88
+ text-align: center;
89
+ padding: 80px 32px;
90
+ background: #f0fdf4;
91
+ border-radius: 8px;
92
+ border: 1px solid #86efac;
93
+ }
94
+ .status-clean h2 {
95
+ font-size: 1.875rem;
96
+ margin-bottom: 12px;
97
+ color: #166534;
98
+ font-weight: 700;
99
+ }
100
+ .status-clean p {
101
+ font-size: 1rem;
102
+ color: #65a30d;
103
+ }
104
+ .severity-section { margin-bottom: 32px; }
105
+ .severity-section h2 {
106
+ font-size: 1.125rem;
107
+ font-weight: 600;
108
+ padding: 16px 20px;
109
+ margin-bottom: 16px;
110
+ border-radius: 8px;
111
+ display: flex;
112
+ align-items: center;
113
+ gap: 12px;
114
+ cursor: pointer;
115
+ user-select: none;
116
+ transition: all 0.2s;
117
+ }
118
+ .severity-section h2:hover {
119
+ opacity: 0.85;
120
+ transform: translateY(-1px);
121
+ }
122
+ .severity-section h2::after {
123
+ content: '\u25BC';
124
+ margin-left: auto;
125
+ font-size: 0.8em;
126
+ transition: transform 0.3s ease;
127
+ opacity: 0.7;
128
+ }
129
+ .severity-section h2.collapsed::after {
130
+ transform: rotate(-90deg);
131
+ }
132
+ .severity-section h2.collapsed {
133
+ margin-bottom: 0;
134
+ }
135
+ .severity-content {
136
+ max-height: none;
137
+ overflow: visible;
138
+ transition: max-height 0.4s ease-out, opacity 0.3s ease-out;
139
+ opacity: 1;
140
+ }
141
+ .severity-content.collapsed {
142
+ max-height: 0;
143
+ overflow: hidden;
144
+ opacity: 0;
145
+ }
146
+ .severity-high { background: #fef2f2; color: #dc2626; border: 1px solid #fecaca; }
147
+ .severity-medium { background: #fffbeb; color: #d97706; border: 1px solid #fde68a; }
148
+ .severity-low { background: #eff6ff; color: #2563eb; border: 1px solid #dbeafe; }
149
+ .risk-group {
150
+ background: white;
151
+ border: 1px solid #e5e7eb;
152
+ border-radius: 8px;
153
+ padding: 24px;
154
+ margin-bottom: 16px;
155
+ }
156
+ .risk-group h3 {
157
+ font-size: 1rem;
158
+ margin-bottom: 12px;
159
+ color: #0f172a;
160
+ font-weight: 600;
161
+ }
162
+ .risk-group .count {
163
+ display: inline-block;
164
+ background: #3b82f6;
165
+ color: white;
166
+ padding: 2px 10px;
167
+ border-radius: 9999px;
168
+ font-size: 0.75rem;
169
+ font-weight: 600;
170
+ margin-left: 8px;
171
+ }
172
+ .risk-group .impact {
173
+ color: #64748b;
174
+ margin-bottom: 16px;
175
+ font-size: 0.875rem;
176
+ line-height: 1.6;
177
+ }
178
+ .risk-group .urls {
179
+ background: #f8fafc;
180
+ border: 1px solid #e2e8f0;
181
+ border-radius: 6px;
182
+ padding: 16px;
183
+ }
184
+ .risk-group .urls h4 {
185
+ font-size: 0.75rem;
186
+ color: #64748b;
187
+ margin-bottom: 12px;
188
+ text-transform: uppercase;
189
+ letter-spacing: 0.05em;
190
+ font-weight: 600;
191
+ }
192
+ .risk-group .urls ul { list-style: none; }
193
+ .risk-group .urls li {
194
+ padding: 10px 12px;
195
+ border-bottom: 1px solid #e2e8f0;
196
+ font-family: 'SF Mono', 'Monaco', 'Cascadia Code', 'Consolas', monospace;
197
+ font-size: 0.8125rem;
198
+ color: #334155;
199
+ background: white;
200
+ margin-bottom: 4px;
201
+ border-radius: 4px;
202
+ word-break: break-all;
203
+ line-height: 1.6;
204
+ }
205
+ .risk-group .urls li:last-child { border-bottom: none; margin-bottom: 0; }
206
+ .risk-group .more {
207
+ color: #3b82f6;
208
+ font-style: italic;
209
+ margin-top: 8px;
210
+ font-size: 0.8125rem;
211
+ }
212
+ .download-btn {
213
+ display: inline-block;
214
+ background: #3b82f6;
215
+ color: white;
216
+ padding: 8px 16px;
217
+ border-radius: 6px;
218
+ text-decoration: none;
219
+ font-size: 0.8125rem;
220
+ font-weight: 500;
221
+ margin-top: 12px;
222
+ cursor: pointer;
223
+ border: none;
224
+ transition: all 0.15s;
225
+ }
226
+ .download-btn:hover {
227
+ background: #2563eb;
228
+ transform: translateY(-1px);
229
+ box-shadow: 0 4px 6px -1px rgba(0,0,0,0.1);
230
+ }
231
+ .footer {
232
+ background: #f8fafc;
233
+ padding: 24px 40px;
234
+ border-top: 1px solid #e5e7eb;
235
+ text-align: center;
236
+ color: #64748b;
237
+ font-size: 0.8125rem;
238
+ }
239
+ .sitemaps {
240
+ background: white;
241
+ border: 1px solid #e5e7eb;
242
+ border-radius: 8px;
243
+ margin-bottom: 24px;
244
+ overflow: hidden;
245
+ }
246
+ .sitemaps h3 {
247
+ font-size: 1.125rem;
248
+ font-weight: 600;
249
+ padding: 16px 20px;
250
+ margin: 0;
251
+ color: #0f172a;
252
+ background: #f8fafc;
253
+ cursor: pointer;
254
+ user-select: none;
255
+ transition: all 0.15s;
256
+ display: flex;
257
+ align-items: center;
258
+ gap: 10px;
259
+ }
260
+ .sitemaps h3:hover {
261
+ background: #f1f5f9;
262
+ }
263
+ .sitemaps h3::after {
264
+ content: '\u25BC';
265
+ margin-left: auto;
266
+ font-size: 0.8em;
267
+ transition: transform 0.3s ease;
268
+ opacity: 0.7;
269
+ }
270
+ .sitemaps h3.collapsed::after {
271
+ transform: rotate(-90deg);
272
+ }
273
+ .sitemaps-content {
274
+ max-height: none;
275
+ overflow: visible;
276
+ transition: max-height 0.4s ease-out, opacity 0.3s ease-out;
277
+ opacity: 1;
278
+ padding: 20px;
279
+ }
280
+ .sitemaps-content.collapsed {
281
+ max-height: 0;
282
+ overflow: hidden;
283
+ opacity: 0;
284
+ padding: 0 20px;
285
+ }
286
+ .sitemaps ul { list-style: none; }
287
+ .sitemaps li {
288
+ padding: 10px 12px;
289
+ font-family: 'SF Mono', 'Monaco', 'Cascadia Code', 'Consolas', monospace;
290
+ font-size: 0.8125rem;
291
+ color: #475569;
292
+ word-break: break-all;
293
+ line-height: 1.6;
294
+ background: #f8fafc;
295
+ margin-bottom: 4px;
296
+ border-radius: 4px;
297
+ }
298
+ .sitemaps li:last-child { margin-bottom: 0; }
299
+ .errors-section {
300
+ background: #fffbeb;
301
+ border-left: 4px solid #f59e0b;
302
+ padding: 20px;
303
+ margin-bottom: 24px;
304
+ border-radius: 8px;
305
+ border: 1px solid #fde68a;
306
+ }
307
+ .errors-section h3 {
308
+ color: #92400e;
309
+ margin-bottom: 16px;
310
+ font-size: 1.125rem;
311
+ font-weight: 600;
312
+ display: flex;
313
+ align-items: center;
314
+ gap: 8px;
315
+ }
316
+ .errors-section ul {
317
+ list-style: none;
318
+ padding: 0;
319
+ }
320
+ .errors-section li {
321
+ padding: 12px;
322
+ background: white;
323
+ margin-bottom: 8px;
324
+ border-radius: 6px;
325
+ font-family: 'SF Mono', 'Monaco', 'Cascadia Code', 'Consolas', monospace;
326
+ font-size: 0.8125rem;
327
+ color: #78350f;
328
+ word-break: break-all;
329
+ line-height: 1.6;
330
+ border: 1px solid #fde68a;
331
+ }
332
+ .errors-section li:last-child {
333
+ margin-bottom: 0;
334
+ }
335
+ </style>
336
+ </head>
337
+ <body>
338
+ <div class="container">
339
+ <div class="header">
340
+ <h1>Sitemap Analysis</h1>
341
+ <div class="meta">
342
+ <div>${s.baseUrl}</div>
343
+ <div>${new Date(o).toLocaleString()}</div>
344
+ </div>
345
+ </div>
346
+
347
+ <div class="summary">
348
+ <div class="summary-card">
349
+ <div class="label">Sitemaps</div>
350
+ <div class="value">${t.sitemaps.length}</div>
351
+ </div>
352
+ <div class="summary-card">
353
+ <div class="label">URLs Analyzed</div>
354
+ <div class="value">${r.toLocaleString()}</div>
355
+ </div>
356
+ <div class="summary-card">
357
+ <div class="label">Issues Found</div>
358
+ <div class="value" style="color: ${i>0?"#dc2626":"#059669"}">${i}</div>
359
+ </div>
360
+ <div class="summary-card">
361
+ <div class="label">Scan Time</div>
362
+ <div class="value">${(e.metadata.processingTime/1e3).toFixed(1)}s</div>
363
+ </div>
364
+ </div>
365
+
366
+ <div class="content">
367
+ ${n.length>0?`
368
+ <div class="errors-section">
369
+ <h3>Parsing Errors & Warnings (${n.length})</h3>
370
+ <ul>
371
+ ${n.map(u=>`<li>${u.message}</li>`).join(`
372
+ `)}
373
+ </ul>
374
+ </div>
375
+ `:""}
376
+
377
+ ${t.sitemaps.length>0?`
378
+ <div class="sitemaps">
379
+ <h3 class="collapsed" onclick="toggleSection(this)">Sitemaps Discovered (${t.sitemaps.length})</h3>
380
+ <div class="sitemaps-content collapsed">
381
+ <ul>
382
+ ${t.sitemaps.map(u=>`<li>\u2022 ${u}</li>`).join(`
383
+ `)}
384
+ </ul>
385
+ </div>
386
+ </div>
387
+ `:""}
388
+
389
+ ${i===0?`
390
+ <div class="status-clean">
391
+ <h2>No Issues Found</h2>
392
+ <p>All URLs in the sitemap passed validation checks.</p>
393
+ </div>
394
+ `:""}
395
+
396
+ ${m.length>0?`
397
+ <div class="severity-section">
398
+ <h2 class="severity-high" onclick="toggleSection(this)">High Severity (${m.reduce((u,g)=>u+g.count,0)} URLs)</h2>
399
+ <div class="severity-content">
400
+ ${m.map(u=>I(u,c)).join(`
401
+ `)}
402
+ </div>
403
+ </div>
404
+ `:""}
405
+
406
+ ${l.length>0?`
407
+ <div class="severity-section">
408
+ <h2 class="severity-medium" onclick="toggleSection(this)">Medium Severity (${l.reduce((u,g)=>u+g.count,0)} URLs)</h2>
409
+ <div class="severity-content">
410
+ ${l.map(u=>I(u,c)).join(`
411
+ `)}
412
+ </div>
413
+ </div>
414
+ `:""}
415
+
416
+ ${d.length>0?`
417
+ <div class="severity-section">
418
+ <h2 class="severity-low" onclick="toggleSection(this)">Low Severity (${d.reduce((u,g)=>u+g.count,0)} URLs)</h2>
419
+ <div class="severity-content">
420
+ ${d.map(u=>I(u,c)).join(`
421
+ `)}
422
+ </div>
423
+ </div>
424
+ `:""}
425
+ </div>
426
+
427
+ <div class="footer">
428
+ Generated by <strong>sitemap-qa</strong> v${qe}
429
+ </div>
430
+ </div>
431
+
432
+ <script>
433
+ function toggleSection(header) {
434
+ header.classList.toggle('collapsed');
435
+ const content = header.nextElementSibling;
436
+ content.classList.toggle('collapsed');
437
+ }
438
+
439
+ function downloadUrls(categorySlug, encodedUrls) {
440
+ // Decode HTML entities and parse JSON
441
+ const textarea = document.createElement('textarea');
442
+ textarea.innerHTML = encodedUrls;
443
+ const urls = JSON.parse(textarea.value);
444
+
445
+ // Create text content (one URL per line)
446
+ const textContent = urls.join('\\n');
447
+
448
+ // Create blob and download
449
+ const blob = new Blob([textContent], { type: 'text/plain' });
450
+ const url = URL.createObjectURL(blob);
451
+ const a = document.createElement('a');
452
+ a.href = url;
453
+ a.download = categorySlug + '_urls.txt';
454
+ document.body.appendChild(a);
455
+ a.click();
456
+ document.body.removeChild(a);
457
+ URL.revokeObjectURL(url);
458
+ }
459
+ </script>
460
+ </body>
461
+ </html>`}function I(e,t){let r=e.category.split("_").map(i=>i.charAt(0).toUpperCase()+i.slice(1)).join(" "),s=e.examples.slice(0,t),n=e.count-s.length,a=e.category.toLowerCase(),c=JSON.stringify(e.allUrls),o=ne(c);return`<div class="risk-group">
462
+ <h3>${r} <span class="count">${e.count} URLs</span></h3>
463
+ <div class="impact">${e.summary}</div>
464
+ <div class="urls">
465
+ <h4>Sample URLs</h4>
466
+ <ul>
467
+ ${s.map(i=>`<li>${ne(i)}</li>`).join(`
468
+ `)}
469
+ </ul>
470
+ ${n>0?`<div class="more">... and ${n} more</div>`:""}
471
+ <button class="download-btn" onclick="downloadUrls('${a}', '${o}')">\u{1F4E5} Download All ${e.count} URLs</button>
472
+ </div>
473
+ </div>`}function ne(e){return e.replace(/&/g,"&amp;").replace(/</g,"&lt;").replace(/>/g,"&gt;").replace(/"/g,"&quot;").replace(/'/g,"&#039;")}async function ae(e,t,r,s,n,a,c={}){let o=je(e,t,r,s,a,c);await ie.promises.writeFile(n,o,"utf-8")}var le=new ce.Command("analyze").description("Analyze sitemap for QA issues").argument("<url>","Base URL to analyze").option("--timeout <seconds>","HTTP timeout in seconds","30").option("--no-progress","Disable progress bar").option("--output <format>","Output format: html or json","html").option("--output-dir <path>","Output directory for reports").option("--output-file <path>","Custom output filename").option("--accepted-patterns <patterns>","Comma-separated regex patterns to exclude from risk detection").option("--no-color","Disable ANSI color codes in CLI output").option("--verbose","Enable verbose logging",!1).action(async(e,t)=>{let r;try{He(t),r=await G({...t,baseUrl:e,outputFormat:t.output}),console.log(`
474
+ \u{1F50D} Analyzing ${e}...
475
+ `);let n=await Be(e,r);Ve(n),await D.promises.mkdir(r.outputDir,{recursive:!0});let a=t.outputFile||`sitemap-qa-report-${Date.now()}.html`,c=`${r.outputDir}/${a}`;if(await ae(n.summary,n.discoveryResult,n.totalUrls,r,c,n.errors,{maxUrlsPerGroup:10}),console.log(`
476
+ \u{1F4C4} Full report saved to: ${x.default.cyan(c)}`),t.output==="json"){let i=a.replace(/\.html$/,".json"),m=`${r.outputDir}/${i}`,l=se(n.summary,n.discoveryResult,{totalCount:n.totalUrls,uniqueUrls:[],errors:[]},n.riskGroups,r,n.executionTime,{pretty:!0,indent:2});await D.promises.writeFile(m,l,"utf-8"),console.log(`\u{1F4C4} JSON report saved to: ${x.default.cyan(m)}`)}let o=We(n);process.exit(o)}catch(s){Je(s,r),process.exit(2)}});function He(e){let t=["json","html"];if(!t.includes(e.output))throw new Error(`Invalid output format: ${e.output}. Must be one of: ${t.join(", ")}`);let r=parseInt(e.timeout);if(isNaN(r)||r<=0)throw new Error(`Invalid timeout: ${e.timeout}. Must be a positive number.`)}function Ve(e){console.log("");let t=e.summary.categoryInsights.reduce((r,s)=>r+s.count,0);if(t===0)console.log(x.default.green("\u2705 No issues found - sitemap looks clean!"));else{console.log(x.default.yellow(`\u26A0\uFE0F Found ${t} potentially risky URL(s)`)),console.log("");let{high:r,medium:s,low:n}=e.summary.severityBreakdown;r>0&&console.log(x.default.red(` \u{1F6A8} High severity: ${r} URLs`)),s>0&&console.log(x.default.yellow(` \u26A0\uFE0F Medium severity: ${s} URLs`)),n>0&&console.log(x.default.blue(` \u2139\uFE0F Low severity: ${n} URLs`))}console.log("")}async function Be(e,t){let r=Date.now(),s=[],n=(0,k.default)("Discovering sitemaps...").start(),a=await V(e,t);if(n.succeed(`Found ${a.sitemaps.length} sitemap(s)`),a.accessIssues.length>0){console.warn(`\u26A0\uFE0F Warning: ${a.accessIssues.length} sitemap(s) are access-blocked`);for(let y of a.accessIssues)s.push(new Error(`Access blocked: ${y.url} (${y.statusCode})`))}if(a.sitemaps.length===0)throw new Error(`No sitemaps found at ${e}. Tried: /sitemap.xml, /sitemap_index.xml, /robots.txt`);let c=(0,k.default)("Parsing sitemaps...").start(),o=await W(a.sitemaps,t);if(c.succeed(`Extracted ${o.allUrls.length.toLocaleString()} URLs`),o.errors.length>0)for(let y of o.errors)typeof y=="string"?s.push(new Error(y)):s.push(y);if(o.allUrls.length===0)throw new Error("No URLs extracted from sitemaps");let i=(0,k.default)("Removing duplicates...").start(),m=J(o.allUrls),l=o.allUrls.length-m.uniqueUrls.length;l>0?i.succeed(`${m.uniqueUrls.length.toLocaleString()} unique URLs (removed ${l.toLocaleString()} duplicates)`):i.succeed(`${m.uniqueUrls.length.toLocaleString()} unique URLs`);let d=(0,k.default)("Analyzing for risks...").start(),p=await re(m.uniqueUrls,e,t),u=P(p.findings),g=u.groups.reduce((y,R)=>y+R.count,0);g>0?d.warn(`Found ${g} risky URL(s)`):d.succeed("No risks detected");let f=Date.now()-r,b=(0,k.default)("Generating report...").start(),h=oe({riskGroups:u.groups,totalUrls:m.uniqueUrls.length,sitemapUrl:e,processingTime:f});return b.succeed("Analysis complete"),{discoveryResult:a,totalUrls:m.uniqueUrls.length,riskGroups:u.groups,summary:h,errors:s,executionTime:f}}function We(e){return e.summary.severityBreakdown.high>0?1:0}function Je(e,t){console.error(`
477
+ \u274C Analysis failed
478
+ `),e instanceof Error?(console.error(`Error: ${e.message}`),t?.verbose&&e.stack&&(console.error(`
479
+ Stack trace:`),console.error(e.stack)),e.message.includes("No sitemaps found")?(console.error(`
480
+ Suggestions:`),console.error(" \u2022 Verify the base URL is correct"),console.error(" \u2022 Check if the site has a sitemap"),console.error(" \u2022 Ensure the sitemap is publicly accessible")):(e.message.includes("Network")||e.message.includes("timeout"))&&(console.error(`
481
+ Suggestions:`),console.error(" \u2022 Check your internet connection"),console.error(" \u2022 Verify the URL is accessible"),console.error(" \u2022 Try increasing the timeout with --timeout option"))):(console.error("Unknown error occurred"),console.error(String(e)))}var N=new me.Command;N.name("sitemap-qa").version("1.0.0").description("sitemap analysis for QA teams");N.addCommand(le);process.on("unhandledRejection",(e,t)=>{console.error("Unhandled Rejection at:",t,"reason:",e),process.exit(1)});process.on("SIGINT",()=>{console.log(`
482
+ Gracefully shutting down...`),process.exit(0)});process.on("SIGTERM",()=>{console.log(`
483
+ Gracefully shutting down...`),process.exit(0)});N.parse();
484
+ //# sourceMappingURL=index.cjs.map