aeorank 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs ADDED
@@ -0,0 +1,2584 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+
20
+ // src/index.ts
21
+ var index_exports = {};
22
+ __export(index_exports, {
23
+ CRITERION_LABELS: () => CRITERION_LABELS,
24
+ analyzeAllPages: () => analyzeAllPages,
25
+ analyzePage: () => analyzePage,
26
+ audit: () => audit,
27
+ auditSiteFromData: () => auditSiteFromData,
28
+ buildDetailedFindings: () => buildDetailedFindings,
29
+ buildScorecard: () => buildScorecard,
30
+ calculateOverallScore: () => calculateOverallScore,
31
+ classifyRendering: () => classifyRendering,
32
+ detectParkedDomain: () => detectParkedDomain,
33
+ extractContentPagesFromSitemap: () => extractContentPagesFromSitemap,
34
+ extractNavLinks: () => extractNavLinks,
35
+ extractRawDataSummary: () => extractRawDataSummary,
36
+ fetchMultiPageData: () => fetchMultiPageData,
37
+ fetchWithHeadless: () => fetchWithHeadless,
38
+ generateBottomLine: () => generateBottomLine,
39
+ generateOpportunities: () => generateOpportunities,
40
+ generatePitchNumbers: () => generatePitchNumbers,
41
+ generateVerdict: () => generateVerdict,
42
+ isSpaShell: () => isSpaShell,
43
+ prefetchSiteData: () => prefetchSiteData,
44
+ scoreToStatus: () => scoreToStatus
45
+ });
46
+ module.exports = __toCommonJS(index_exports);
47
+
48
+ // src/parked-domain.ts
49
+ var PARKING_PATHS = ["/lander", "/parking", "/park", "/sedoparking"];
50
+ var PARKING_SERVICE_DOMAINS = [
51
+ "sedoparking.com",
52
+ "parkingcrew.net",
53
+ "bodis.com",
54
+ "dsparking.com",
55
+ "hugedomains.com",
56
+ "afternic.com",
57
+ "dan.com",
58
+ "undeveloped.com",
59
+ "domainmarket.com",
60
+ "sav.com",
61
+ "domaincontrol.com",
62
+ "above.com",
63
+ "domainlore.com",
64
+ "domainnamesales.com",
65
+ "brandbucket.com",
66
+ "squadhelp.com",
67
+ "godaddy.com/domainsearch"
68
+ ];
69
+ var PARKING_TEXT_PATTERNS = [
70
+ /\bbuy this domain\b/i,
71
+ /\bdomain is for sale\b/i,
72
+ /\bthis domain may be for sale\b/i,
73
+ /\bdomain for sale\b/i,
74
+ /\bthis domain name is available\b/i,
75
+ /\bparked by/i,
76
+ /\bthis page is parked/i,
77
+ /\bdomain has expired/i,
78
+ /\bthis domain has been registered/i,
79
+ /\bmake an offer on this domain\b/i,
80
+ /\bget this domain\b/i,
81
+ /\bacquire this domain\b/i
82
+ ];
83
+ function detectParkingRedirect(bodySnippet) {
84
+ const relativeRedirect = bodySnippet.match(
85
+ /window\.location\.(replace|assign|href)\s*[=(]\s*['"](\/[^'"]*)['"]/i
86
+ );
87
+ if (!relativeRedirect) return null;
88
+ const path = relativeRedirect[2].toLowerCase().replace(/[?#].*/, "");
89
+ if (PARKING_PATHS.includes(path)) {
90
+ return `js-redirect to ${relativeRedirect[2]}`;
91
+ }
92
+ return null;
93
+ }
94
+ function detectParkingService(bodySnippet) {
95
+ const lower = bodySnippet.toLowerCase();
96
+ for (const service of PARKING_SERVICE_DOMAINS) {
97
+ if (lower.includes(service)) {
98
+ return `parking service: ${service}`;
99
+ }
100
+ }
101
+ return null;
102
+ }
103
+ function detectParkingText(bodySnippet) {
104
+ for (const pattern of PARKING_TEXT_PATTERNS) {
105
+ if (pattern.test(bodySnippet)) {
106
+ return `parking text: ${bodySnippet.match(pattern)?.[0]}`;
107
+ }
108
+ }
109
+ return null;
110
+ }
111
+ function detectParkedDomain(bodySnippet) {
112
+ const parkingRedirect = detectParkingRedirect(bodySnippet);
113
+ if (parkingRedirect) return { isParked: true, reason: parkingRedirect };
114
+ const parkingService = detectParkingService(bodySnippet);
115
+ if (parkingService) return { isParked: true, reason: parkingService };
116
+ const parkingText = detectParkingText(bodySnippet);
117
+ if (parkingText) return { isParked: true, reason: parkingText };
118
+ return { isParked: false };
119
+ }
120
+
121
+ // src/site-crawler.ts
122
+ async function fetchText(url) {
123
+ try {
124
+ const res = await fetch(url, {
125
+ signal: AbortSignal.timeout(15e3),
126
+ headers: { "User-Agent": "AEO-Visibility-Bot/1.0" },
127
+ redirect: "follow"
128
+ });
129
+ const text = await res.text();
130
+ return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
131
+ } catch {
132
+ return null;
133
+ }
134
+ }
135
+ function extractDomain(url) {
136
+ return url.replace(/^https?:\/\//, "").replace(/\/.*/, "").replace(/:[0-9]+$/, "").replace(/^www\./, "").toLowerCase();
137
+ }
138
+ function extractBrandName(domain) {
139
+ const parts = domain.split(".");
140
+ const twoPartTlds = ["co.uk", "com.au", "co.jp", "com.br", "co.nz", "co.in"];
141
+ const lastTwo = parts.slice(-2).join(".");
142
+ if (twoPartTlds.includes(lastTwo) && parts.length > 2) {
143
+ return parts.slice(0, -2).join(".");
144
+ }
145
+ return parts.length > 1 ? parts.slice(0, -1).join(".") : domain;
146
+ }
147
+ function detectCrossDomainRedirect(originalDomain, homepage) {
148
+ if (!homepage.finalUrl) return null;
149
+ const finalDomain = extractDomain(homepage.finalUrl);
150
+ const cleanOriginal = originalDomain.replace(/^www\./, "").toLowerCase();
151
+ if (finalDomain === cleanOriginal || finalDomain === `www.${cleanOriginal}` || extractBrandName(finalDomain) === extractBrandName(cleanOriginal)) {
152
+ return null;
153
+ }
154
+ return finalDomain;
155
+ }
156
+ function detectJsRedirect(bodySnippet, originalDomain) {
157
+ const jsMatch = bodySnippet.match(
158
+ /window\.location\.(replace|assign|href)\s*[=(]\s*['"]https?:\/\/([^'"]+)['"]/i
159
+ );
160
+ if (!jsMatch) return null;
161
+ const jsDomain = extractDomain("https://" + jsMatch[2]);
162
+ const cleanOriginal = originalDomain.replace(/^www\./, "").toLowerCase();
163
+ if (jsDomain === cleanOriginal || jsDomain === `www.${cleanOriginal}` || extractBrandName(jsDomain) === extractBrandName(cleanOriginal)) {
164
+ return null;
165
+ }
166
+ return jsDomain;
167
+ }
168
+ function isHtmlResponse(result) {
169
+ if (!result || result.status !== 200) return false;
170
+ const trimmed = result.text.trimStart().slice(0, 200).toLowerCase();
171
+ return trimmed.startsWith("<!doctype html") || trimmed.startsWith("<html") || /<head[\s>]/i.test(trimmed);
172
+ }
173
+ async function prefetchSiteData(domain) {
174
+ let protocol = null;
175
+ let homepage = null;
176
+ homepage = await fetchText(`https://${domain}`);
177
+ if (homepage && homepage.status >= 200 && homepage.status < 400) {
178
+ protocol = "https";
179
+ } else {
180
+ homepage = await fetchText(`http://${domain}`);
181
+ if (homepage && homepage.status >= 200 && homepage.status < 400) {
182
+ protocol = "http";
183
+ }
184
+ }
185
+ if (!protocol) {
186
+ return { domain, protocol: null, homepage: null, llmsTxt: null, robotsTxt: null, faqPage: null, sitemapXml: null, rssFeed: null, aiTxt: null, redirectedTo: null, parkedReason: null, blogSample: [] };
187
+ }
188
+ const httpRedirect = homepage ? detectCrossDomainRedirect(domain, homepage) : null;
189
+ const jsRedirect = homepage ? detectJsRedirect(homepage.text.slice(0, 8192), domain) : null;
190
+ const redirectedTo = httpRedirect || jsRedirect;
191
+ if (redirectedTo) {
192
+ return { domain, protocol, homepage, llmsTxt: null, robotsTxt: null, faqPage: null, sitemapXml: null, rssFeed: null, aiTxt: null, redirectedTo, parkedReason: null, blogSample: [] };
193
+ }
194
+ const parkedResult = homepage ? detectParkedDomain(homepage.text.slice(0, 8192)) : { isParked: false };
195
+ if (parkedResult.isParked) {
196
+ return { domain, protocol, homepage, llmsTxt: null, robotsTxt: null, faqPage: null, sitemapXml: null, rssFeed: null, aiTxt: null, redirectedTo: null, parkedReason: parkedResult.reason || "parked", blogSample: [] };
197
+ }
198
+ const baseUrl = `${protocol}://${domain}`;
199
+ const [llmsTxt, robotsTxt, faqPage, sitemapXml, aiTxt] = await Promise.all([
200
+ fetchText(`${baseUrl}/llms.txt`),
201
+ fetchText(`${baseUrl}/robots.txt`),
202
+ fetchText(`${baseUrl}/faq`).then(async (result) => {
203
+ if (result && result.status === 200) return result;
204
+ for (const path of ["/frequently-asked-questions", "/help", "/support", "/help-center"]) {
205
+ const fallback = await fetchText(`${baseUrl}${path}`);
206
+ if (fallback && fallback.status === 200) return fallback;
207
+ }
208
+ return result;
209
+ }),
210
+ fetchText(`${baseUrl}/sitemap.xml`),
211
+ fetchText(`${baseUrl}/ai.txt`)
212
+ ]);
213
+ let rssFeed = null;
214
+ if (homepage) {
215
+ const rssLinkMatch = homepage.text.match(/<link[^>]*type="application\/(?:rss|atom)\+xml"[^>]*href="([^"]*)"[^>]*>/i);
216
+ if (rssLinkMatch) {
217
+ const rssUrl = rssLinkMatch[1].startsWith("http") ? rssLinkMatch[1] : `${baseUrl}${rssLinkMatch[1]}`;
218
+ rssFeed = await fetchText(rssUrl);
219
+ }
220
+ if (!rssFeed || rssFeed.status !== 200) {
221
+ for (const path of ["/feed", "/rss.xml", "/feed.xml"]) {
222
+ rssFeed = await fetchText(`${baseUrl}${path}`);
223
+ if (rssFeed && rssFeed.status === 200 && (rssFeed.text.includes("<rss") || rssFeed.text.includes("<feed") || rssFeed.text.includes("<channel"))) break;
224
+ rssFeed = null;
225
+ }
226
+ }
227
+ }
228
+ let blogSample = [];
229
+ if (sitemapXml && sitemapXml.status === 200) {
230
+ let sitemapForBlog = sitemapXml.text;
231
+ const subSitemapUrl = extractSubSitemapUrl(sitemapForBlog);
232
+ if (subSitemapUrl) {
233
+ const subSitemap = await fetchText(subSitemapUrl);
234
+ if (subSitemap && subSitemap.status === 200) {
235
+ sitemapForBlog = subSitemap.text;
236
+ }
237
+ }
238
+ const blogUrls = extractBlogUrlsFromSitemap(sitemapForBlog, domain, 10);
239
+ if (blogUrls.length > 0) {
240
+ const fetched = await Promise.all(blogUrls.map((url) => fetchText(url)));
241
+ blogSample = fetched.filter(
242
+ (r) => r !== null && r.status === 200 && r.text.length > 500
243
+ );
244
+ for (const page of blogSample) {
245
+ page.category = "blog";
246
+ }
247
+ }
248
+ }
249
+ if (homepage) homepage.category = "homepage";
250
+ return { domain, protocol, homepage, llmsTxt, robotsTxt, faqPage, sitemapXml, rssFeed, aiTxt, redirectedTo: null, parkedReason: null, blogSample };
251
+ }
252
+ function getCombinedHtml(data) {
253
+ const parts = [data.homepage?.text || ""];
254
+ if (data.blogSample) {
255
+ for (const page of data.blogSample) {
256
+ parts.push(page.text);
257
+ }
258
+ }
259
+ return parts.join("\n");
260
+ }
261
+ function getBlogHtml(data) {
262
+ if (!data.blogSample || data.blogSample.length === 0) return "";
263
+ return data.blogSample.map((p) => p.text).join("\n");
264
+ }
265
+ function checkLlmsTxt(data) {
266
+ const findings = [];
267
+ const result = data.llmsTxt;
268
+ if (!result || result.status !== 200 || isHtmlResponse(result)) {
269
+ const statusNote = result ? isHtmlResponse(result) ? "HTML page served (not a valid text file)" : `HTTP ${result.status}` : "connection failed";
270
+ findings.push({ severity: "critical", detail: `No llms.txt file found at ${data.protocol}://${data.domain}/llms.txt (${statusNote})`, fix: "Create a /llms.txt file that describes your site, services, and key pages in markdown format" });
271
+ return { criterion: "llms_txt", criterion_label: "llms.txt File", score: 0, status: "fail", findings, fix_priority: "P0" };
272
+ }
273
+ const text = result.text;
274
+ let score = 4;
275
+ if (text.length < 100) {
276
+ findings.push({ severity: "medium", detail: `llms.txt exists but is very short (${text.length} characters)`, fix: "Add comprehensive description of your services, team, and key content" });
277
+ } else {
278
+ score += 2;
279
+ findings.push({ severity: "info", detail: `llms.txt file found (${text.length} characters)` });
280
+ }
281
+ if (text.includes("#") || text.includes("##")) {
282
+ score += 2;
283
+ findings.push({ severity: "info", detail: "llms.txt uses markdown headings for structure" });
284
+ } else {
285
+ findings.push({ severity: "low", detail: "llms.txt lacks markdown structure", fix: "Add headings (# About, ## Services, etc.) for better LLM parsing" });
286
+ }
287
+ if (/https?:\/\//.test(text)) {
288
+ score += 2;
289
+ findings.push({ severity: "info", detail: "llms.txt includes URLs to key pages" });
290
+ } else {
291
+ findings.push({ severity: "medium", detail: "llms.txt does not link to key pages", fix: "Add URLs to your most important pages (services, about, FAQ)" });
292
+ }
293
+ return { criterion: "llms_txt", criterion_label: "llms.txt File", score: Math.min(10, score), status: score >= 7 ? "pass" : "partial", findings, fix_priority: score >= 7 ? "P3" : "P0" };
294
+ }
295
+ function checkSchemaMarkup(data) {
296
+ const findings = [];
297
+ if (!data.homepage) {
298
+ findings.push({ severity: "critical", detail: "Could not fetch homepage to check schema markup" });
299
+ return { criterion: "schema_markup", criterion_label: "Schema.org Structured Data", score: 0, status: "not_found", findings, fix_priority: "P1" };
300
+ }
301
+ const html = data.homepage.text;
302
+ const ldJsonMatches = html.match(/<script[^>]*type="application\/ld\+json"[^>]*>([\s\S]*?)<\/script>/gi) || [];
303
+ let score = 0;
304
+ if (ldJsonMatches.length === 0) {
305
+ findings.push({ severity: "critical", detail: "No JSON-LD structured data found on homepage", fix: 'Add Organization, LocalBusiness, or WebSite schema in a <script type="application/ld+json"> tag' });
306
+ return { criterion: "schema_markup", criterion_label: "Schema.org Structured Data", score: 0, status: "fail", findings, fix_priority: "P1" };
307
+ }
308
+ score += 3;
309
+ findings.push({ severity: "info", detail: `Found ${ldJsonMatches.length} JSON-LD block(s) on homepage` });
310
+ const allSchemaText = ldJsonMatches.join(" ").toLowerCase();
311
+ const schemaTypes = ["organization", "localbusiness", "faqpage", "service", "article", "webpage", "website", "breadcrumblist", "howto", "product"];
312
+ const foundTypes = [];
313
+ for (const type of schemaTypes) {
314
+ if (allSchemaText.includes(`"${type}"`) || allSchemaText.includes(`"@type":"${type}"`)) {
315
+ foundTypes.push(type);
316
+ }
317
+ }
318
+ if (foundTypes.length > 0) {
319
+ score += Math.min(4, foundTypes.length * 2);
320
+ findings.push({ severity: "info", detail: `Schema types found: ${foundTypes.join(", ")}` });
321
+ }
322
+ if (!foundTypes.includes("organization") && !foundTypes.includes("localbusiness")) {
323
+ findings.push({ severity: "high", detail: "Missing Organization or LocalBusiness schema", fix: "Add Organization schema with name, url, logo, contactPoint, and sameAs properties" });
324
+ } else {
325
+ score += 2;
326
+ findings.push({ severity: "info", detail: "Organization or LocalBusiness schema found" });
327
+ }
328
+ if (!foundTypes.includes("faqpage")) {
329
+ findings.push({ severity: "medium", detail: "No FAQPage schema found", fix: "Add FAQPage schema on pages with FAQ content" });
330
+ } else {
331
+ score += 1;
332
+ findings.push({ severity: "info", detail: "FAQPage schema markup present" });
333
+ }
334
+ if (data.blogSample && data.blogSample.length > 0) {
335
+ const blogHtml = getBlogHtml(data);
336
+ const blogLdJson = blogHtml.match(/<script[^>]*type="application\/ld\+json"[^>]*>([\s\S]*?)<\/script>/gi) || [];
337
+ if (blogLdJson.length > 0) {
338
+ const blogSchemaText = blogLdJson.join(" ").toLowerCase();
339
+ const blogTypes = schemaTypes.filter(
340
+ (t) => (blogSchemaText.includes(`"${t}"`) || blogSchemaText.includes(`"@type":"${t}"`)) && !foundTypes.includes(t)
341
+ );
342
+ if (blogTypes.length > 0) {
343
+ score += Math.min(2, blogTypes.length);
344
+ findings.push({ severity: "info", detail: `Additional schema types found on blog pages: ${blogTypes.join(", ")}` });
345
+ }
346
+ if (!foundTypes.includes("faqpage") && /faqpage/i.test(blogSchemaText)) {
347
+ score += 1;
348
+ findings.push({ severity: "info", detail: "FAQPage schema found on blog posts" });
349
+ }
350
+ }
351
+ }
352
+ return { criterion: "schema_markup", criterion_label: "Schema.org Structured Data", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P2" : "P1" };
353
+ }
354
+ function checkQAFormat(data) {
355
+ const findings = [];
356
+ if (!data.homepage) {
357
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
358
+ return { criterion: "qa_content_format", criterion_label: "Q&A Content Format", score: 0, status: "not_found", findings, fix_priority: "P1" };
359
+ }
360
+ const html = data.homepage.text;
361
+ let score = 0;
362
+ const combinedHtml = getCombinedHtml(data);
363
+ const hTagContent = (combinedHtml.match(/<h[1-6][^>]*>([\s\S]*?)<\/h[1-6]>/gi) || []).map((h) => h.replace(/<[^>]*>/g, ""));
364
+ const questionHeadings = hTagContent.filter((h) => h.includes("?") || /^(what|how|why|when|who|where|can|do|does|is|are|should)\s/i.test(h));
365
+ if (questionHeadings.length >= 10) {
366
+ score += 5;
367
+ findings.push({ severity: "info", detail: `Found ${questionHeadings.length} question-format headings` });
368
+ } else if (questionHeadings.length >= 3) {
369
+ score += 3;
370
+ findings.push({ severity: "info", detail: `Found ${questionHeadings.length} question-format headings` });
371
+ } else if (questionHeadings.length >= 1) {
372
+ score += 1;
373
+ findings.push({ severity: "low", detail: `Only ${questionHeadings.length} question-format heading(s) found`, fix: "Structure more content as Q&A with question headings (H2/H3) followed by direct answers" });
374
+ } else {
375
+ findings.push({ severity: "high", detail: "No question-format headings found", fix: 'Add Q&A sections with headings like "What is...?", "How does...?" followed by concise answers' });
376
+ }
377
+ const hasDirectAnswers = /<h[2-3][^>]*>[^<]*\?<\/h[2-3]>\s*<p[^>]*>[\s\S]{20,500}<\/p>/i.test(combinedHtml);
378
+ if (hasDirectAnswers) {
379
+ score += 3;
380
+ findings.push({ severity: "info", detail: "Content uses direct-answer format after question headings" });
381
+ } else {
382
+ findings.push({ severity: "medium", detail: "Content does not follow direct-answer format", fix: "Start each answer paragraph with a concise 1-2 sentence definition before elaborating" });
383
+ }
384
+ const h1Count = (html.match(/<h1[\s>]/gi) || []).length;
385
+ if (h1Count === 1) {
386
+ score += 2;
387
+ findings.push({ severity: "info", detail: "Proper single H1 tag hierarchy" });
388
+ } else if (h1Count === 0) {
389
+ findings.push({ severity: "high", detail: "No H1 tag found", fix: "Add exactly one H1 tag as the main page heading" });
390
+ } else {
391
+ score += 1;
392
+ findings.push({ severity: "medium", detail: `Multiple H1 tags found (${h1Count})`, fix: "Use only one H1 per page; use H2/H3 for subsections" });
393
+ }
394
+ return { criterion: "qa_content_format", criterion_label: "Q&A Content Format", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P2" : "P1" };
395
+ }
396
+ function checkCleanHTML(data) {
397
+ const findings = [];
398
+ if (!data.homepage) {
399
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
400
+ return { criterion: "clean_html", criterion_label: "Clean, Crawlable HTML", score: 0, status: "not_found", findings, fix_priority: "P1" };
401
+ }
402
+ const html = data.homepage.text;
403
+ let score = 0;
404
+ const httpsAvailable = data.protocol === "https";
405
+ if (httpsAvailable) {
406
+ findings.push({ severity: "info", detail: "Site serves over HTTPS" });
407
+ } else {
408
+ findings.push({ severity: "critical", detail: "Site does not support HTTPS", fix: "Enable HTTPS with a valid SSL certificate. Sites without HTTPS are penalized by AI crawlers." });
409
+ }
410
+ const hasMain = /<main[\s>]/i.test(html);
411
+ const hasArticle = /<article[\s>]/i.test(html);
412
+ const hasSection = /<section[\s>]/i.test(html);
413
+ const semanticCount = [hasMain, hasArticle, hasSection].filter(Boolean).length;
414
+ score += Math.min(3, semanticCount * 1);
415
+ if (semanticCount >= 2) {
416
+ findings.push({ severity: "info", detail: `Uses semantic HTML5 elements: ${[hasMain && "main", hasArticle && "article", hasSection && "section"].filter(Boolean).join(", ")}` });
417
+ } else {
418
+ findings.push({ severity: "medium", detail: "Limited semantic HTML5 usage", fix: "Wrap main content in <main>, use <article> for standalone content, <section> for grouped content" });
419
+ }
420
+ const h1Count = (html.match(/<h1[\s>]/gi) || []).length;
421
+ if (h1Count === 1) {
422
+ score += 2;
423
+ findings.push({ severity: "info", detail: "Single H1 tag found - correct heading hierarchy" });
424
+ } else {
425
+ findings.push({ severity: h1Count === 0 ? "high" : "medium", detail: `${h1Count === 0 ? "No" : "Multiple"} H1 tag(s) found (${h1Count})`, fix: "Use exactly one H1 per page" });
426
+ }
427
+ const textContent = html.replace(/<[^>]*>/g, "").replace(/\s+/g, " ").trim();
428
+ if (textContent.length > 500) {
429
+ score += 3;
430
+ findings.push({ severity: "info", detail: "Page has substantial text content accessible without JavaScript" });
431
+ } else {
432
+ findings.push({ severity: "high", detail: "Very little text content visible in HTML source", fix: "Ensure key content is server-rendered, not loaded via JavaScript only" });
433
+ }
434
+ const hasMetaDesc = /<meta[^>]*name="description"[^>]*>/i.test(html);
435
+ const hasTitle = /<title[^>]*>[^<]+<\/title>/i.test(html);
436
+ if (hasMetaDesc && hasTitle) {
437
+ score += 2;
438
+ findings.push({ severity: "info", detail: "Page has title and meta description" });
439
+ } else {
440
+ findings.push({ severity: "medium", detail: `Missing ${!hasTitle ? "title tag" : ""}${!hasTitle && !hasMetaDesc ? " and " : ""}${!hasMetaDesc ? "meta description" : ""}`, fix: 'Add <title> and <meta name="description"> tags' });
441
+ }
442
+ if (!httpsAvailable) {
443
+ score = Math.min(score, 3);
444
+ }
445
+ return { criterion: "clean_html", criterion_label: "Clean, Crawlable HTML", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P1" };
446
+ }
447
+ function checkEntityConsistency(data) {
448
+ const findings = [];
449
+ if (!data.homepage) {
450
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
451
+ return { criterion: "entity_consistency", criterion_label: "Entity Authority & E-E-A-T", score: 0, status: "not_found", findings, fix_priority: "P1" };
452
+ }
453
+ const html = data.homepage.text;
454
+ const text = html.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ");
455
+ let score = 0;
456
+ const hasTelLink = /href="tel:/i.test(html);
457
+ const hasSchemaTelephone = /"telephone"/i.test(html);
458
+ const phoneContextWords = /\b(phone|call|tel:|contact\s*us|fax|dial)\b/i;
459
+ const phoneRegex = /(?:\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/g;
460
+ const phones = text.match(phoneRegex) || [];
461
+ const contextValidatedPhones = [];
462
+ if (hasTelLink || hasSchemaTelephone) {
463
+ contextValidatedPhones.push(...phones);
464
+ } else {
465
+ let match;
466
+ const phoneRegex2 = /(?:\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/g;
467
+ while ((match = phoneRegex2.exec(text)) !== null) {
468
+ const start = Math.max(0, match.index - 100);
469
+ const end = Math.min(text.length, match.index + match[0].length + 100);
470
+ const surrounding = text.slice(start, end);
471
+ if (phoneContextWords.test(surrounding)) {
472
+ contextValidatedPhones.push(match[0]);
473
+ }
474
+ }
475
+ }
476
+ const uniquePhones = [...new Set(contextValidatedPhones.map((p) => p.replace(/\D/g, "")))];
477
+ if (uniquePhones.length === 1) {
478
+ score += 3;
479
+ findings.push({ severity: "info", detail: "Single consistent phone number found" });
480
+ } else if (uniquePhones.length > 1) {
481
+ score += 1;
482
+ findings.push({ severity: "medium", detail: `Multiple phone numbers found (${uniquePhones.length})`, fix: "Use one primary phone number consistently across all pages" });
483
+ } else {
484
+ findings.push({ severity: "low", detail: "No phone number found on homepage" });
485
+ score += 1;
486
+ }
487
+ const hasAddress = /\d+\s+\w+\s+(street|st|avenue|ave|road|rd|drive|dr|blvd|boulevard|lane|ln|way|court|ct)/i.test(text);
488
+ if (hasAddress) {
489
+ score += 2;
490
+ findings.push({ severity: "info", detail: "Physical address found on page" });
491
+ }
492
+ const hasOrgSchema = /organization|localbusiness/i.test(html);
493
+ if (hasOrgSchema) {
494
+ score += 3;
495
+ findings.push({ severity: "info", detail: "Organization/LocalBusiness schema reinforces entity identity" });
496
+ } else {
497
+ findings.push({ severity: "high", detail: "No Organization schema to reinforce entity identity", fix: "Add Organization JSON-LD with consistent name, address, phone, and social links" });
498
+ }
499
+ const hasSameAs = /sameas|linkedin\.com|facebook\.com|twitter\.com|x\.com/i.test(html);
500
+ if (hasSameAs) {
501
+ score += 2;
502
+ findings.push({ severity: "info", detail: "Social media / sameAs references found" });
503
+ } else {
504
+ findings.push({ severity: "low", detail: "No social media links or sameAs found", fix: "Add sameAs links in Organization schema to social profiles" });
505
+ }
506
+ return { criterion: "entity_consistency", criterion_label: "Entity Authority & E-E-A-T", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P1" };
507
+ }
508
+ function checkRobotsTxt(data) {
509
+ const findings = [];
510
+ const result = data.robotsTxt;
511
+ if (!result || result.status !== 200 || isHtmlResponse(result)) {
512
+ findings.push({ severity: "high", detail: "No robots.txt file found", fix: "Create a robots.txt that explicitly allows AI crawlers" });
513
+ return { criterion: "robots_txt", criterion_label: "robots.txt for AI Crawlers", score: 2, status: "fail", findings, fix_priority: "P0" };
514
+ }
515
+ const text = result.text.toLowerCase();
516
+ let score = 3;
517
+ const aiCrawlers = ["gptbot", "claudebot", "perplexitybot", "anthropic", "chatgpt"];
518
+ const mentionedCrawlers = aiCrawlers.filter((c) => text.includes(c));
519
+ if (mentionedCrawlers.length > 0) {
520
+ score += 3;
521
+ findings.push({ severity: "info", detail: `AI crawlers mentioned: ${mentionedCrawlers.join(", ")}` });
522
+ const blocked = mentionedCrawlers.filter((c) => {
523
+ const sectionRegex = new RegExp(`user-agent:\\s*${c}[^\\S\\n]*\\n([\\s\\S]*?)(?=user-agent:|$)`, "i");
524
+ const match = sectionRegex.exec(result.text);
525
+ if (!match) return false;
526
+ const section = match[1];
527
+ if (/^allow:\s*\/\s*$/im.test(section)) return false;
528
+ return /^disallow:\s*\/\s*$/im.test(section);
529
+ });
530
+ if (blocked.length > 0) {
531
+ score -= 2;
532
+ findings.push({ severity: "critical", detail: `AI crawlers BLOCKED: ${blocked.join(", ")}`, fix: "Change Disallow: / to Allow: / for AI crawler user-agents" });
533
+ } else {
534
+ score += 2;
535
+ findings.push({ severity: "info", detail: "AI crawlers are allowed to index the site" });
536
+ }
537
+ } else {
538
+ findings.push({ severity: "medium", detail: "No explicit AI crawler rules in robots.txt", fix: "Add User-agent rules for GPTBot, ClaudeBot, PerplexityBot with Allow: /" });
539
+ }
540
+ if (text.includes("sitemap:")) {
541
+ score += 2;
542
+ findings.push({ severity: "info", detail: "Sitemap URL referenced in robots.txt" });
543
+ } else {
544
+ findings.push({ severity: "low", detail: "No sitemap reference in robots.txt", fix: "Add Sitemap: https://yoursite.com/sitemap.xml to robots.txt" });
545
+ }
546
+ return { criterion: "robots_txt", criterion_label: "robots.txt for AI Crawlers", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 4 ? "P2" : "P0" };
547
+ }
548
+ function checkFAQSection(data) {
549
+ const findings = [];
550
+ let score = 0;
551
+ const homepage = data.homepage;
552
+ const hasHomepageFAQ = homepage && /faq|frequently\s+asked/i.test(homepage.text);
553
+ if (hasHomepageFAQ) {
554
+ score += 2;
555
+ findings.push({ severity: "info", detail: "FAQ content found on homepage" });
556
+ } else {
557
+ findings.push({ severity: "low", detail: "No FAQ content found on homepage", fix: "Add an FAQ section to your homepage addressing common visitor questions" });
558
+ }
559
+ const faqPage = data.faqPage;
560
+ const hasFaqPage = faqPage && faqPage.status === 200 && faqPage.text.length > 500;
561
+ if (hasFaqPage) {
562
+ score += 3;
563
+ findings.push({ severity: "info", detail: "Dedicated FAQ page exists" });
564
+ if (/accordion|toggle|collaps|expand/i.test(faqPage.text)) {
565
+ score += 1;
566
+ findings.push({ severity: "info", detail: "FAQ uses accordion/toggle UI pattern" });
567
+ }
568
+ } else {
569
+ findings.push({ severity: "high", detail: "No dedicated FAQ page found at /faq", fix: "Create a comprehensive FAQ page at /faq covering common questions about your service" });
570
+ }
571
+ const blogHtml = getBlogHtml(data);
572
+ const allHtml = (homepage?.text || "") + (faqPage?.text || "") + blogHtml;
573
+ if (/faqpage/i.test(allHtml) && /application\/ld\+json/i.test(allHtml)) {
574
+ score += 3;
575
+ const faqOnBlog = blogHtml && /faqpage/i.test(blogHtml) && /application\/ld\+json/i.test(blogHtml);
576
+ findings.push({ severity: "info", detail: faqOnBlog ? "FAQPage schema markup found on blog posts" : "FAQPage schema markup found" });
577
+ } else {
578
+ findings.push({ severity: "medium", detail: "No FAQPage schema markup", fix: "Add FAQPage JSON-LD schema to pages with FAQ content" });
579
+ }
580
+ const questionCount = (allHtml.match(/<h[2-4][^>]*>[^<]*\?<\/h[2-4]>/gi) || []).length;
581
+ if (questionCount >= 10) {
582
+ score += 1;
583
+ findings.push({ severity: "info", detail: `${questionCount} question headings found across checked pages` });
584
+ } else if (questionCount >= 5) {
585
+ findings.push({ severity: "low", detail: `Only ${questionCount} question headings found`, fix: "Expand FAQ to cover at least 10-15 common questions" });
586
+ }
587
+ return { criterion: "faq_section", criterion_label: "Comprehensive FAQ Sections", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 4 ? "P2" : "P1" };
588
+ }
589
+ function checkOriginalData(data) {
590
+ const findings = [];
591
+ if (!data.homepage) {
592
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
593
+ return { criterion: "original_data", criterion_label: "Original Data & Expert Content", score: 0, status: "not_found", findings, fix_priority: "P2" };
594
+ }
595
+ const html = data.homepage.text;
596
+ const text = html.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ");
597
+ let score = 0;
598
+ const statPatterns = /\d+%|\d+\s*(patients|clients|customers|cases|years|professionals|specialists|companies|users|businesses|domains|audits)/i;
599
+ if (statPatterns.test(text)) {
600
+ const researchContext = /\b(our\s+(?:study|analysis|research|data|survey|findings|report)|we\s+(?:surveyed|analyzed|studied|measured|tracked)|proprietary|methodology|original\s+research)\b/i;
601
+ if (researchContext.test(text)) {
602
+ score += 3;
603
+ findings.push({ severity: "info", detail: "Proprietary statistics with research context found on homepage" });
604
+ } else {
605
+ score += 1;
606
+ findings.push({ severity: "low", detail: 'Statistics found but without research context (e.g., "500+ clients")', fix: 'Add context about your methodology: "Our analysis of X found..." or "We surveyed Y..."' });
607
+ }
608
+ } else {
609
+ findings.push({ severity: "medium", detail: "No proprietary data or statistics found", fix: "Add unique statistics, case study results, or industry data that LLMs would cite as authoritative" });
610
+ }
611
+ const caseStudyPattern = /case\s+stud|testimonial|success\s+stor|client\s+stor/i;
612
+ if (caseStudyPattern.test(text)) {
613
+ const caseStudyRegex = /case\s+stud|testimonial|success\s+stor|client\s+stor/gi;
614
+ let hasMetricNearby = false;
615
+ let csMatch;
616
+ while ((csMatch = caseStudyRegex.exec(text)) !== null) {
617
+ const start = Math.max(0, csMatch.index - 200);
618
+ const end = Math.min(text.length, csMatch.index + csMatch[0].length + 200);
619
+ const surrounding = text.slice(start, end);
620
+ if (/\d+%|\$[\d,]+|\d+x\b/i.test(surrounding)) {
621
+ hasMetricNearby = true;
622
+ break;
623
+ }
624
+ }
625
+ if (hasMetricNearby) {
626
+ score += 3;
627
+ findings.push({ severity: "info", detail: "Case studies or testimonials with specific metrics found" });
628
+ } else {
629
+ score += 1;
630
+ findings.push({ severity: "low", detail: "Case studies or testimonials mentioned but without specific metrics", fix: 'Add measurable outcomes to case studies (e.g., "increased traffic by 45%")' });
631
+ }
632
+ } else {
633
+ findings.push({ severity: "medium", detail: "No case studies or testimonials found", fix: "Add case studies with specific outcomes and metrics" });
634
+ }
635
+ const combinedText = data.blogSample && data.blogSample.length > 0 ? text + " " + data.blogSample.map((p) => p.text.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ")).join(" ") : text;
636
+ if (/written\s+by|authored\s+by|expert|specialist|board.certified|licensed/i.test(combinedText)) {
637
+ score += 2;
638
+ findings.push({ severity: "info", detail: "Expert attribution or credentials found" });
639
+ } else {
640
+ findings.push({ severity: "low", detail: "No expert attribution or credentials visible", fix: "Add author bios with credentials to establish E-E-A-T signals" });
641
+ }
642
+ const contentLinkPattern = /href="[^"]*\/(?:blog|articles|insights|guides|resources)\b[^"]*"/i;
643
+ if (contentLinkPattern.test(html)) {
644
+ score += 2;
645
+ findings.push({ severity: "info", detail: "Links to blog/articles section found on site" });
646
+ } else {
647
+ findings.push({ severity: "medium", detail: "No links to blog or articles section found", fix: "Create a content section with expert articles and link to it from your homepage" });
648
+ }
649
+ if (data.blogSample && data.blogSample.length > 0 && !caseStudyPattern.test(text)) {
650
+ const blogText = data.blogSample.map((p) => p.text.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ")).join(" ");
651
+ if (caseStudyPattern.test(blogText)) {
652
+ score += 1;
653
+ findings.push({ severity: "info", detail: "Case studies or testimonials found on blog posts" });
654
+ }
655
+ }
656
+ return { criterion: "original_data", criterion_label: "Original Data & Expert Content", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 4 ? "P2" : "P2" };
657
+ }
658
+ function checkInternalLinking(data) {
659
+ const findings = [];
660
+ if (!data.homepage) {
661
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
662
+ return { criterion: "internal_linking", criterion_label: "Internal Linking Architecture", score: 0, status: "not_found", findings, fix_priority: "P2" };
663
+ }
664
+ const html = data.homepage.text;
665
+ let score = 0;
666
+ const linkMatches = html.match(/<a[^>]*href="([^"]*)"[^>]*>/gi) || [];
667
+ const internalLinks = linkMatches.filter((l) => {
668
+ const href = l.match(/href="([^"]*)"/)?.[1] || "";
669
+ return href.startsWith("/") || href.includes(data.domain);
670
+ });
671
+ if (internalLinks.length >= 20) {
672
+ score += 3;
673
+ findings.push({ severity: "info", detail: `${internalLinks.length} internal links found on homepage` });
674
+ } else if (internalLinks.length >= 10) {
675
+ score += 2;
676
+ findings.push({ severity: "low", detail: `${internalLinks.length} internal links on homepage`, fix: "Add more internal links to key service/content pages" });
677
+ } else {
678
+ findings.push({ severity: "high", detail: `Only ${internalLinks.length} internal links on homepage`, fix: "Add prominent internal links to service pages, FAQ, blog, and about pages" });
679
+ }
680
+ if (/breadcrumb|aria-label="breadcrumb"/i.test(html)) {
681
+ score += 2;
682
+ findings.push({ severity: "info", detail: "Breadcrumb navigation detected" });
683
+ } else {
684
+ findings.push({ severity: "medium", detail: "No breadcrumb navigation found", fix: "Add breadcrumb navigation with BreadcrumbList schema markup" });
685
+ }
686
+ if (/<nav[\s>]/i.test(html)) {
687
+ score += 2;
688
+ findings.push({ severity: "info", detail: "Semantic <nav> element used for navigation" });
689
+ } else {
690
+ findings.push({ severity: "low", detail: "No semantic <nav> element found", fix: "Wrap navigation menus in <nav> for better AI and accessibility parsing" });
691
+ }
692
+ if (/related|see\s+also|learn\s+more|explore|you\s+may\s+also/i.test(html)) {
693
+ score += 2;
694
+ findings.push({ severity: "info", detail: "Related content or cross-linking sections found" });
695
+ } else {
696
+ findings.push({ severity: "low", detail: "No related content or cross-linking found", fix: 'Add "Related Services" or "Learn More" sections to build topic clusters' });
697
+ }
698
+ if (/<footer[\s>]/i.test(html)) {
699
+ score += 1;
700
+ findings.push({ severity: "info", detail: "Footer element with likely navigation links" });
701
+ } else {
702
+ findings.push({ severity: "low", detail: "No <footer> element found", fix: "Add a <footer> with navigation links, contact info, and site structure" });
703
+ }
704
+ return { criterion: "internal_linking", criterion_label: "Internal Linking Architecture", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 4 ? "P2" : "P1" };
705
+ }
706
+ function checkSemanticHTML(data) {
707
+ const findings = [];
708
+ if (!data.homepage) {
709
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
710
+ return { criterion: "semantic_html", criterion_label: "Semantic HTML5 & Accessibility", score: 0, status: "not_found", findings, fix_priority: "P2" };
711
+ }
712
+ const combinedHtml = getCombinedHtml(data);
713
+ const html = data.homepage.text;
714
+ let score = 0;
715
+ const checks = [
716
+ ["<main>", /<main[\s>]/i, "Wrap primary page content in <main>"],
717
+ ["<article>", /<article[\s>]/i, "Use <article> for standalone content blocks"],
718
+ ["<time>", /<time[\s>]/i, "Use <time> elements for dates"],
719
+ ["<nav>", /<nav[\s>]/i, "Use <nav> for navigation sections"],
720
+ ["<header>", /<header[\s>]/i, "Use <header> for page/section headers"],
721
+ ["<footer>", /<footer[\s>]/i, "Use <footer> for page/section footers"]
722
+ ];
723
+ let found = 0;
724
+ for (const [name, regex, fix] of checks) {
725
+ if (regex.test(combinedHtml)) {
726
+ found++;
727
+ } else {
728
+ findings.push({ severity: "low", detail: `Missing ${name} element`, fix });
729
+ }
730
+ }
731
+ score += Math.min(4, Math.floor(found * 0.7));
732
+ if (found >= 4) findings.push({ severity: "info", detail: `${found}/6 key semantic HTML5 elements found` });
733
+ const images = html.match(/<img[^>]*>/gi) || [];
734
+ const imagesWithAlt = images.filter((img) => /alt="[^"]+"/i.test(img));
735
+ if (images.length > 0) {
736
+ const ratio = imagesWithAlt.length / images.length;
737
+ if (ratio >= 0.8) {
738
+ score += 2;
739
+ findings.push({ severity: "info", detail: `${Math.round(ratio * 100)}% of images have alt text` });
740
+ } else {
741
+ findings.push({ severity: "medium", detail: `Only ${Math.round(ratio * 100)}% of images have alt text`, fix: "Add descriptive alt text to all images" });
742
+ }
743
+ }
744
+ if (/lang="[a-z]{2}"/i.test(html)) {
745
+ score += 2;
746
+ findings.push({ severity: "info", detail: "HTML lang attribute set" });
747
+ } else {
748
+ findings.push({ severity: "medium", detail: "Missing lang attribute on <html> tag", fix: 'Add lang="en" (or appropriate language) to the <html> tag' });
749
+ }
750
+ if (/role="|aria-/i.test(html)) {
751
+ score += 2;
752
+ findings.push({ severity: "info", detail: "ARIA attributes found for accessibility" });
753
+ } else {
754
+ findings.push({ severity: "low", detail: "No ARIA roles or attributes found", fix: "Add ARIA roles and labels to improve accessibility and semantic parsing" });
755
+ }
756
+ return { criterion: "semantic_html", criterion_label: "Semantic HTML5 & Accessibility", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 4 ? "P3" : "P2" };
757
+ }
758
+ function checkContentFreshness(data) {
759
+ const findings = [];
760
+ if (!data.homepage) {
761
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
762
+ return { criterion: "content_freshness", criterion_label: "Content Freshness Signals", score: 0, status: "not_found", findings, fix_priority: "P2" };
763
+ }
764
+ const html = data.homepage.text;
765
+ let score = 0;
766
+ const hasDatePublished = /datePublished|dateCreated/i.test(html);
767
+ const hasDateModified = /dateModified/i.test(html);
768
+ if (hasDatePublished || hasDateModified) {
769
+ score += 3;
770
+ findings.push({ severity: "info", detail: `JSON-LD date properties found: ${[hasDatePublished && "datePublished", hasDateModified && "dateModified"].filter(Boolean).join(", ")}` });
771
+ } else {
772
+ findings.push({ severity: "high", detail: "No JSON-LD date properties (datePublished/dateModified) found", fix: "Add datePublished and dateModified to Article or WebPage schema" });
773
+ }
774
+ const timeElements = html.match(/<time[\s>]/gi) || [];
775
+ if (timeElements.length >= 2) {
776
+ score += 3;
777
+ findings.push({ severity: "info", detail: `${timeElements.length} <time> elements found` });
778
+ } else if (timeElements.length === 1) {
779
+ score += 1;
780
+ findings.push({ severity: "low", detail: "Only 1 <time> element found", fix: 'Use <time datetime="..."> for all dates to help AI parsers' });
781
+ } else {
782
+ findings.push({ severity: "medium", detail: "No <time> elements found", fix: 'Wrap publication and modification dates in <time datetime="..."> elements' });
783
+ }
784
+ const hasArticleMeta = /article:published_time|article:modified_time/i.test(html);
785
+ if (hasArticleMeta) {
786
+ score += 2;
787
+ findings.push({ severity: "info", detail: "Open Graph article date meta tags found" });
788
+ } else {
789
+ findings.push({ severity: "low", detail: "No article:published_time or article:modified_time meta tags", fix: "Add Open Graph article date meta tags" });
790
+ }
791
+ const currentYear = (/* @__PURE__ */ new Date()).getFullYear();
792
+ const hasRecentYear = html.includes(String(currentYear)) || html.includes(String(currentYear - 1));
793
+ if (hasRecentYear) {
794
+ score += 2;
795
+ findings.push({ severity: "info", detail: `References to ${currentYear} or ${currentYear - 1} found, suggesting recent content` });
796
+ } else {
797
+ findings.push({ severity: "low", detail: "No references to recent years found on homepage" });
798
+ }
799
+ return { criterion: "content_freshness", criterion_label: "Content Freshness Signals", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P2" };
800
+ }
801
+ function checkSitemapCompleteness(data) {
802
+ const findings = [];
803
+ const sitemap = data.sitemapXml;
804
+ if (!sitemap || sitemap.status !== 200) {
805
+ findings.push({ severity: "critical", detail: "No sitemap.xml found", fix: "Create a sitemap.xml with all indexable pages and submit to search engines" });
806
+ return { criterion: "sitemap_completeness", criterion_label: "Sitemap Completeness", score: 0, status: "fail", findings, fix_priority: "P1" };
807
+ }
808
+ const text = sitemap.text;
809
+ let score = 2;
810
+ if (text.includes("<urlset") || text.includes("<sitemapindex")) {
811
+ score += 2;
812
+ findings.push({ severity: "info", detail: "Valid sitemap XML structure detected" });
813
+ } else {
814
+ findings.push({ severity: "high", detail: "sitemap.xml does not contain valid XML structure", fix: "Ensure sitemap uses proper <urlset> or <sitemapindex> XML format" });
815
+ }
816
+ const urlCount = (text.match(/<loc>/gi) || []).length;
817
+ if (urlCount >= 50) {
818
+ score += 3;
819
+ findings.push({ severity: "info", detail: `${urlCount} URLs in sitemap` });
820
+ } else if (urlCount >= 10) {
821
+ score += 2;
822
+ findings.push({ severity: "info", detail: `${urlCount} URLs in sitemap` });
823
+ } else if (urlCount > 0) {
824
+ score += 1;
825
+ findings.push({ severity: "low", detail: `Only ${urlCount} URL(s) in sitemap`, fix: "Add all important pages to your sitemap" });
826
+ }
827
+ const lastmodMatches = text.match(/<lastmod>([^<]+)<\/lastmod>/gi) || [];
828
+ if (lastmodMatches.length > 0) {
829
+ score += 2;
830
+ findings.push({ severity: "info", detail: `${lastmodMatches.length} URLs have lastmod dates` });
831
+ } else {
832
+ findings.push({ severity: "medium", detail: "No lastmod dates in sitemap", fix: "Add <lastmod> dates to sitemap entries for freshness signals" });
833
+ }
834
+ if (text.includes("<sitemapindex")) {
835
+ score += 1;
836
+ findings.push({ severity: "info", detail: "Sitemap index found, indicating organized sitemap structure" });
837
+ } else {
838
+ findings.push({ severity: "low", detail: "No sitemap index structure", fix: "Use a <sitemapindex> with multiple child sitemaps for larger sites to improve crawl efficiency" });
839
+ }
840
+ return { criterion: "sitemap_completeness", criterion_label: "Sitemap Completeness", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P1" };
841
+ }
842
+ function checkRssFeed(data) {
843
+ const findings = [];
844
+ let score = 0;
845
+ const hasRssLink = data.homepage && /<link[^>]*type="application\/(?:rss|atom)\+xml"/i.test(data.homepage.text);
846
+ if (hasRssLink) {
847
+ score += 3;
848
+ findings.push({ severity: "info", detail: "RSS/Atom feed link tag found in homepage <head>" });
849
+ } else {
850
+ findings.push({ severity: "high", detail: "No RSS/Atom feed link tag in homepage", fix: 'Add <link rel="alternate" type="application/rss+xml" href="/feed"> to your <head>' });
851
+ }
852
+ const feed = data.rssFeed;
853
+ if (feed && feed.status === 200) {
854
+ const feedText = feed.text;
855
+ const isValidFeed = feedText.includes("<rss") || feedText.includes("<feed") || feedText.includes("<channel");
856
+ if (isValidFeed) {
857
+ score += 3;
858
+ findings.push({ severity: "info", detail: "Valid RSS/Atom feed content detected" });
859
+ const itemCount = (feedText.match(/<item[\s>]|<entry[\s>]/gi) || []).length;
860
+ if (itemCount >= 5) {
861
+ score += 4;
862
+ findings.push({ severity: "info", detail: `Feed contains ${itemCount} items` });
863
+ } else if (itemCount > 0) {
864
+ score += 2;
865
+ findings.push({ severity: "low", detail: `Feed contains only ${itemCount} item(s)`, fix: "Publish more content to populate your RSS feed with at least 5 items" });
866
+ }
867
+ } else {
868
+ findings.push({ severity: "medium", detail: "Feed URL returned content but not valid RSS/Atom XML", fix: "Ensure your feed outputs valid RSS 2.0 or Atom XML" });
869
+ }
870
+ } else if (!hasRssLink) {
871
+ findings.push({ severity: "medium", detail: "No accessible RSS/Atom feed found", fix: "Create an RSS feed to help AI engines discover and index new content automatically" });
872
+ }
873
+ return { criterion: "rss_feed", criterion_label: "RSS/Atom Feed", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 4 ? "P3" : "P2" };
874
+ }
875
+ function checkTableListExtractability(data) {
876
+ const findings = [];
877
+ if (!data.homepage) {
878
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
879
+ return { criterion: "table_list_extractability", criterion_label: "Table & List Extractability", score: 0, status: "not_found", findings, fix_priority: "P2" };
880
+ }
881
+ const html = getCombinedHtml(data);
882
+ let score = 0;
883
+ const tables = html.match(/<table[\s>]/gi) || [];
884
+ const tablesWithHeaders = (html.match(/<table[\s\S]*?<\/table>/gi) || []).filter((t) => /<th[\s>]/i.test(t));
885
+ if (tablesWithHeaders.length >= 1) {
886
+ score += 3;
887
+ findings.push({ severity: "info", detail: `${tablesWithHeaders.length} table(s) with <th> headers found` });
888
+ } else if (tables.length > 0) {
889
+ score += 1;
890
+ findings.push({ severity: "medium", detail: `${tables.length} table(s) found but without <th> header cells`, fix: "Add <th> header cells to tables for better AI extraction" });
891
+ } else {
892
+ findings.push({ severity: "low", detail: "No HTML tables found", fix: "Use comparison tables with <th> headers for structured data AI engines can extract" });
893
+ }
894
+ if (tablesWithHeaders.length >= 2) {
895
+ score += 1;
896
+ findings.push({ severity: "info", detail: "Multiple well-structured tables present" });
897
+ } else if (tablesWithHeaders.length === 1) {
898
+ findings.push({ severity: "low", detail: "Only 1 table with headers found", fix: "Add more comparison or data tables with <th> headers to increase extractable structured content" });
899
+ }
900
+ const olCount = (html.match(/<ol[\s>]/gi) || []).length;
901
+ if (olCount >= 1) {
902
+ score += 2;
903
+ findings.push({ severity: "info", detail: `${olCount} ordered list(s) found - good for step-by-step content` });
904
+ } else {
905
+ findings.push({ severity: "low", detail: "No ordered lists (<ol>) found", fix: "Use <ol> for sequential content (steps, rankings, processes)" });
906
+ }
907
+ const ulCount = (html.match(/<ul[\s>]/gi) || []).length;
908
+ if (ulCount >= 1) {
909
+ score += 2;
910
+ findings.push({ severity: "info", detail: `${ulCount} unordered list(s) found` });
911
+ } else {
912
+ findings.push({ severity: "low", detail: "No unordered lists (<ul>) found", fix: "Use <ul> for feature lists, benefits, and bullet-point content" });
913
+ }
914
+ const liCount = (html.match(/<li[\s>]/gi) || []).length;
915
+ if (liCount >= 10) {
916
+ score += 1;
917
+ findings.push({ severity: "info", detail: `${liCount} list items - substantial extractable content` });
918
+ }
919
+ const dlCount = (html.match(/<dl[\s>]/gi) || []).length;
920
+ if (dlCount >= 1) {
921
+ score += 1;
922
+ findings.push({ severity: "info", detail: `${dlCount} definition list(s) found` });
923
+ } else {
924
+ findings.push({ severity: "low", detail: "No definition lists (<dl>) found", fix: "Use <dl>/<dt>/<dd> for term-definition pairs to improve AI extractability" });
925
+ }
926
+ return { criterion: "table_list_extractability", criterion_label: "Table & List Extractability", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P2" };
927
+ }
928
+ function checkDefinitionPatterns(data) {
929
+ const findings = [];
930
+ if (!data.homepage) {
931
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
932
+ return { criterion: "definition_patterns", criterion_label: "Definition Patterns", score: 0, status: "not_found", findings, fix_priority: "P2" };
933
+ }
934
+ const combinedHtml = getCombinedHtml(data);
935
+ const text = combinedHtml.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ");
936
+ const html = combinedHtml;
937
+ let score = 0;
938
+ const definitionPatterns = [
939
+ /\b\w[\w\s]{2,30}\bis\s+(?:a|an|the)\s/gi,
940
+ /\b\w[\w\s]{2,30}\bare\s+(?:a|an|the)\s/gi,
941
+ /\brefers?\s+to\b/gi,
942
+ /\bdefined\s+as\b/gi,
943
+ /\bknown\s+as\b/gi,
944
+ /\bmeans?\s+that\b/gi
945
+ ];
946
+ let patternCount = 0;
947
+ for (const pattern of definitionPatterns) {
948
+ const matches = text.match(pattern) || [];
949
+ patternCount += matches.length;
950
+ }
951
+ if (patternCount >= 3) {
952
+ score += 5;
953
+ findings.push({ severity: "info", detail: `${patternCount} definition-style patterns found (e.g., "X is a...", "refers to", "defined as")` });
954
+ } else if (patternCount >= 1) {
955
+ score += 3;
956
+ findings.push({ severity: "low", detail: `Only ${patternCount} definition pattern(s) found`, fix: 'Start key descriptions with clear definition patterns like "X is a..." or "X refers to..."' });
957
+ } else {
958
+ findings.push({ severity: "medium", detail: "No definition patterns found", fix: 'Add clear definitions using patterns like "[Term] is [definition]" that AI engines can extract as snippets' });
959
+ }
960
+ const earlyText = text.slice(0, 2e3);
961
+ const earlyDefinitions = definitionPatterns.some((p) => p.test(earlyText));
962
+ definitionPatterns.forEach((p) => {
963
+ p.lastIndex = 0;
964
+ });
965
+ if (earlyDefinitions) {
966
+ score += 2;
967
+ findings.push({ severity: "info", detail: "Definition patterns appear early in page content - good for snippet extraction" });
968
+ } else {
969
+ findings.push({ severity: "low", detail: "No definition patterns in the first 2000 characters of content", fix: "Place key definitions early on the page where AI engines prioritize extraction" });
970
+ }
971
+ const hasDfn = /<dfn[\s>]/i.test(html);
972
+ const hasAbbr = /<abbr[\s>]/i.test(html);
973
+ if (hasDfn || hasAbbr) {
974
+ score += 1;
975
+ findings.push({ severity: "info", detail: `Semantic definition elements found: ${[hasDfn && "<dfn>", hasAbbr && "<abbr>"].filter(Boolean).join(", ")}` });
976
+ } else {
977
+ findings.push({ severity: "low", detail: "No <dfn> or <abbr> elements found", fix: "Use <dfn> for term definitions and <abbr> for abbreviations to help AI parse terminology" });
978
+ }
979
+ if (/<dl[\s>]/i.test(html) || /glossary|definitions|terminology/i.test(html)) {
980
+ score += 2;
981
+ findings.push({ severity: "info", detail: "Glossary or definition list structure detected" });
982
+ } else {
983
+ findings.push({ severity: "low", detail: "No glossary or definition list found", fix: "Add a glossary section using <dl>/<dt>/<dd> for key industry terms" });
984
+ }
985
+ return { criterion: "definition_patterns", criterion_label: "Definition Patterns", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P2" };
986
+ }
987
+ function checkDirectAnswerDensity(data) {
988
+ const findings = [];
989
+ if (!data.homepage) {
990
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
991
+ return { criterion: "direct_answer_density", criterion_label: "Direct Answer Paragraphs", score: 0, status: "not_found", findings, fix_priority: "P1" };
992
+ }
993
+ const html = getCombinedHtml(data);
994
+ let score = 0;
995
+ const qaPairs = html.match(/<h[2-4][^>]*>[^<]*\?<\/h[2-4]>\s*<p[^>]*>/gi) || [];
996
+ if (qaPairs.length >= 3) {
997
+ score += 6;
998
+ findings.push({ severity: "info", detail: `${qaPairs.length} question-answer pairs found (question heading + direct answer paragraph)` });
999
+ } else if (qaPairs.length >= 1) {
1000
+ score += 3;
1001
+ findings.push({ severity: "low", detail: `${qaPairs.length} question-answer pair(s) found`, fix: "Add more question headings (H2/H3) immediately followed by concise answer paragraphs" });
1002
+ } else {
1003
+ findings.push({ severity: "high", detail: "No direct question-answer pairs found", fix: 'Structure content with question headings (e.g., "What is X?") immediately followed by a concise answer paragraph' });
1004
+ }
1005
+ const paragraphs = html.match(/<p[^>]*>([\s\S]*?)<\/p>/gi) || [];
1006
+ const snippetZoneParagraphs = paragraphs.filter((p) => {
1007
+ const text2 = p.replace(/<[^>]*>/g, "").trim();
1008
+ const wordCount = text2.split(/\s+/).length;
1009
+ return wordCount >= 40 && wordCount <= 150;
1010
+ });
1011
+ if (snippetZoneParagraphs.length >= 3) {
1012
+ score += 2;
1013
+ findings.push({ severity: "info", detail: `${snippetZoneParagraphs.length} paragraphs in snippet zone (40-150 words) - ideal for AI extraction` });
1014
+ } else if (snippetZoneParagraphs.length >= 1) {
1015
+ score += 1;
1016
+ findings.push({ severity: "low", detail: `Only ${snippetZoneParagraphs.length} paragraph(s) in optimal snippet length`, fix: "Write more paragraphs in the 40-150 word range for AI snippet extraction" });
1017
+ } else {
1018
+ findings.push({ severity: "medium", detail: "No paragraphs in the optimal snippet zone (40-150 words)", fix: "Write self-contained paragraphs of 40-150 words that directly answer common questions" });
1019
+ }
1020
+ const text = data.homepage.text.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ");
1021
+ const directOpeners = /\b(yes|no|the answer is|in short|simply put|to summarize)\b/gi;
1022
+ const openerCount = (text.match(directOpeners) || []).length;
1023
+ if (openerCount >= 2) {
1024
+ score += 2;
1025
+ findings.push({ severity: "info", detail: 'Direct answer openers found (e.g., "Yes,", "In short,")' });
1026
+ } else {
1027
+ findings.push({ severity: "low", detail: "Few or no direct answer openers found", fix: 'Start answers with direct phrases like "Yes,", "No,", "In short," to signal definitive answers to AI engines' });
1028
+ }
1029
+ return { criterion: "direct_answer_density", criterion_label: "Direct Answer Paragraphs", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P2" : "P1" };
1030
+ }
1031
+ function checkContentLicensing(data) {
1032
+ const findings = [];
1033
+ let score = 0;
1034
+ const aiTxt = data.aiTxt;
1035
+ if (aiTxt && aiTxt.status === 200 && aiTxt.text.length > 20 && !isHtmlResponse(aiTxt)) {
1036
+ score += 4;
1037
+ findings.push({ severity: "info", detail: `ai.txt file found (${aiTxt.text.length} characters)` });
1038
+ } else {
1039
+ findings.push({ severity: "high", detail: "No ai.txt file found", fix: "Create /ai.txt to declare your AI usage policy and content permissions for AI crawlers" });
1040
+ }
1041
+ const html = data.homepage?.text || "";
1042
+ const text = html.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ");
1043
+ const hasPolicyLanguage = /content\s+policy|terms\s+of\s+use|usage\s+rights|permission|copyright\s+policy|licensing|creative\s+commons/i.test(text);
1044
+ if (hasPolicyLanguage) {
1045
+ score += 2;
1046
+ findings.push({ severity: "info", detail: "Content policy or licensing language found on page" });
1047
+ } else {
1048
+ findings.push({ severity: "low", detail: "No content policy or licensing language visible", fix: "Add clear content usage terms or licensing information" });
1049
+ }
1050
+ const hasLicenseSchema = /license|copyrightHolder|copyrightYear/i.test(html) && /application\/ld\+json/i.test(html);
1051
+ if (hasLicenseSchema) {
1052
+ score += 2;
1053
+ findings.push({ severity: "info", detail: "License or copyright properties found in schema markup" });
1054
+ } else {
1055
+ findings.push({ severity: "low", detail: "No license or copyright properties in schema", fix: "Add license, copyrightHolder, and copyrightYear to your schema markup" });
1056
+ }
1057
+ const hasTdmOrCC = /tdm|text\s+and\s+data\s+mining|creative\s+commons|CC\s+BY|creativecommons\.org/i.test(html + (aiTxt?.text || ""));
1058
+ if (hasTdmOrCC) {
1059
+ score += 2;
1060
+ findings.push({ severity: "info", detail: "TDM or Creative Commons licensing references found" });
1061
+ } else {
1062
+ findings.push({ severity: "low", detail: "No TDM or Creative Commons licensing references found", fix: "Add Text and Data Mining (TDM) permissions or Creative Commons licensing to signal AI-friendly content use" });
1063
+ }
1064
+ return { criterion: "content_licensing", criterion_label: "Content Licensing & AI Permissions", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 4 ? "P3" : "P2" };
1065
+ }
1066
+ function checkAuthorSchemaDepth(data) {
1067
+ const findings = [];
1068
+ if (!data.homepage) {
1069
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
1070
+ return { criterion: "author_schema_depth", criterion_label: "Author & Expert Schema", score: 0, status: "not_found", findings, fix_priority: "P2" };
1071
+ }
1072
+ const combinedHtml = getCombinedHtml(data);
1073
+ const html = data.homepage.text;
1074
+ const text = combinedHtml.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ");
1075
+ let score = 0;
1076
+ const hasPersonSchema = /"@type"\s*:\s*"Person"/i.test(combinedHtml);
1077
+ if (hasPersonSchema) {
1078
+ score += 3;
1079
+ findings.push({ severity: "info", detail: "Person schema found in JSON-LD" });
1080
+ } else {
1081
+ findings.push({ severity: "medium", detail: "No Person schema found", fix: "Add Person schema for authors with name, jobTitle, knowsAbout, and sameAs properties" });
1082
+ }
1083
+ const hasJobTitle = /jobTitle|knowsAbout|expertise|hasCredential/i.test(combinedHtml);
1084
+ if (hasJobTitle) {
1085
+ score += 2;
1086
+ findings.push({ severity: "info", detail: "Author credential properties found (jobTitle/knowsAbout)" });
1087
+ } else {
1088
+ findings.push({ severity: "low", detail: "No jobTitle or knowsAbout in author schema", fix: "Add jobTitle and knowsAbout to Person schema to establish expertise" });
1089
+ }
1090
+ const hasSameAs = /sameAs/i.test(combinedHtml) && hasPersonSchema;
1091
+ if (hasSameAs) {
1092
+ score += 2;
1093
+ findings.push({ severity: "info", detail: "Author sameAs social profile links found" });
1094
+ } else {
1095
+ findings.push({ severity: "low", detail: "No sameAs links to author social profiles", fix: "Add sameAs URLs (LinkedIn, GitHub) to Person schema to strengthen entity connections" });
1096
+ }
1097
+ const hasByline = /written\s+by|authored?\s+by|by\s+[A-Z][a-z]+\s+[A-Z]/i.test(text) || /class="[^"]*author[^"]*"/i.test(html) || /rel="author"/i.test(html);
1098
+ if (hasByline) {
1099
+ score += 2;
1100
+ findings.push({ severity: "info", detail: "Visible author byline or attribution found" });
1101
+ } else {
1102
+ findings.push({ severity: "medium", detail: "No visible author byline found", fix: "Add visible author names with credentials to establish E-E-A-T" });
1103
+ }
1104
+ if (/<address[\s>]/i.test(combinedHtml)) {
1105
+ score += 1;
1106
+ findings.push({ severity: "info", detail: "<address> element found for contact information" });
1107
+ } else {
1108
+ findings.push({ severity: "low", detail: "No <address> element found for contact information", fix: "Add an <address> element with contact details to reinforce entity identity" });
1109
+ }
1110
+ return { criterion: "author_schema_depth", criterion_label: "Author & Expert Schema", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P2" };
1111
+ }
1112
+ function checkFactDensity(data) {
1113
+ const findings = [];
1114
+ if (!data.homepage) {
1115
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
1116
+ return { criterion: "fact_density", criterion_label: "Fact & Data Density", score: 0, status: "not_found", findings, fix_priority: "P2" };
1117
+ }
1118
+ const text = data.homepage.text.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ");
1119
+ let score = 0;
1120
+ const dataPoints = text.match(/\d+(?:\.\d+)?(?:\s*%|\s*\$|\s*USD|\s*EUR)/g) || [];
1121
+ const countPhrases = text.match(/\d+(?:,\d{3})*\+?\s+(?:users?|clients?|customers?|companies|businesses|patients?|members?|employees?|projects?|downloads?)/gi) || [];
1122
+ const totalDataPoints = dataPoints.length + countPhrases.length;
1123
+ if (totalDataPoints >= 6) {
1124
+ score += 5;
1125
+ findings.push({ severity: "info", detail: `${totalDataPoints} quantitative data points found on homepage` });
1126
+ } else if (totalDataPoints >= 3) {
1127
+ score += 3;
1128
+ findings.push({ severity: "info", detail: `${totalDataPoints} quantitative data points found` });
1129
+ } else if (totalDataPoints >= 1) {
1130
+ score += 1;
1131
+ findings.push({ severity: "low", detail: `Only ${totalDataPoints} quantitative data point(s) found`, fix: "Add more specific numbers, percentages, and metrics to strengthen credibility" });
1132
+ } else {
1133
+ findings.push({ severity: "high", detail: "No quantitative data points found", fix: "Add specific statistics (percentages, counts, comparisons) that AI engines can cite" });
1134
+ }
1135
+ const yearRefs = text.match(/(?:19|20)\d{2}/g) || [];
1136
+ const uniqueYears = [...new Set(yearRefs)];
1137
+ if (uniqueYears.length >= 2) {
1138
+ score += 2;
1139
+ findings.push({ severity: "info", detail: `${uniqueYears.length} different year references found - suggests dated, verifiable claims` });
1140
+ } else if (uniqueYears.length === 1) {
1141
+ score += 1;
1142
+ findings.push({ severity: "low", detail: "Only 1 year reference found on page", fix: "Add more dated references and timestamps to demonstrate current, verifiable information" });
1143
+ } else {
1144
+ findings.push({ severity: "low", detail: "No year references found on page", fix: "Include specific years and dates to provide verifiable, time-anchored facts" });
1145
+ }
1146
+ const attributions = text.match(/according\s+to|source:|study\s+(?:by|from)|research\s+(?:by|from|shows)|data\s+from|report\s+(?:by|from)|published\s+(?:by|in)/gi) || [];
1147
+ if (attributions.length >= 1) {
1148
+ score += 2;
1149
+ findings.push({ severity: "info", detail: `${attributions.length} source attribution(s) found (e.g., "according to", "study by")` });
1150
+ } else {
1151
+ findings.push({ severity: "low", detail: "No source attributions found", fix: 'Add citations like "According to [source]" or "Research from [org] shows" for credibility' });
1152
+ }
1153
+ const units = text.match(/\d+\s*(?:hours?|minutes?|days?|weeks?|months?|years?|miles?|km|lbs?|kg|mg|sq\s*ft|acres?|gallons?|liters?)/gi) || [];
1154
+ if (units.length >= 2) {
1155
+ score += 1;
1156
+ findings.push({ severity: "info", detail: `${units.length} measurement units found (hours, miles, etc.) - adds factual precision` });
1157
+ } else {
1158
+ findings.push({ severity: "low", detail: "Few or no units of measurement found", fix: "Include specific measurements (hours, miles, sq ft, etc.) to add factual precision AI engines can extract" });
1159
+ }
1160
+ return { criterion: "fact_density", criterion_label: "Fact & Data Density", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P2" };
1161
+ }
1162
+ function checkCanonicalUrl(data) {
1163
+ const findings = [];
1164
+ if (!data.homepage) {
1165
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
1166
+ return { criterion: "canonical_url", criterion_label: "Canonical URL Strategy", score: 0, status: "not_found", findings, fix_priority: "P1" };
1167
+ }
1168
+ const html = data.homepage.text;
1169
+ let score = 0;
1170
+ const canonicalMatch = html.match(/<link[^>]*rel="canonical"[^>]*href="([^"]*)"[^>]*>/i) || html.match(/<link[^>]*href="([^"]*)"[^>]*rel="canonical"[^>]*>/i);
1171
+ if (canonicalMatch) {
1172
+ score += 4;
1173
+ findings.push({ severity: "info", detail: `Canonical URL found: ${canonicalMatch[1].slice(0, 80)}` });
1174
+ const canonicalUrl = canonicalMatch[1];
1175
+ if (canonicalUrl.includes(data.domain)) {
1176
+ score += 3;
1177
+ findings.push({ severity: "info", detail: "Canonical URL is self-referencing (points to same domain)" });
1178
+ } else {
1179
+ findings.push({ severity: "medium", detail: "Canonical URL points to a different domain", fix: "Ensure canonical URL points to the authoritative version of this page" });
1180
+ }
1181
+ if (canonicalUrl.startsWith("https://")) {
1182
+ score += 2;
1183
+ findings.push({ severity: "info", detail: "Canonical URL uses HTTPS" });
1184
+ } else {
1185
+ findings.push({ severity: "medium", detail: "Canonical URL does not use HTTPS", fix: "Update canonical URL to use https://" });
1186
+ }
1187
+ } else {
1188
+ findings.push({ severity: "high", detail: "No canonical URL tag found", fix: 'Add <link rel="canonical" href="https://yoursite.com/page"> to prevent duplicate content issues' });
1189
+ }
1190
+ const allCanonicals = html.match(/<link[^>]*(?:rel="canonical"|rel='canonical')[^>]*>/gi) || [];
1191
+ if (allCanonicals.length > 1) {
1192
+ score -= 1;
1193
+ findings.push({ severity: "high", detail: `${allCanonicals.length} canonical tags found - must have exactly one`, fix: "Remove duplicate canonical tags, keeping only one per page" });
1194
+ } else if (allCanonicals.length === 1) {
1195
+ score += 1;
1196
+ findings.push({ severity: "info", detail: "Single canonical tag present (no duplicates)" });
1197
+ }
1198
+ return { criterion: "canonical_url", criterion_label: "Canonical URL Strategy", score: Math.max(0, Math.min(10, score)), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P1" };
1199
+ }
1200
+ function countRecentSitemapDates(sitemapText) {
1201
+ const lastmodMatches = sitemapText.match(/<lastmod>([^<]+)<\/lastmod>/gi) || [];
1202
+ if (lastmodMatches.length === 0) {
1203
+ return { recentCount: 0, isUniform: false, totalWithDates: 0, distinctRecentDays: 0 };
1204
+ }
1205
+ const now = /* @__PURE__ */ new Date();
1206
+ const ninetyDaysAgo = new Date(now.getTime() - 90 * 24 * 60 * 60 * 1e3);
1207
+ const dayCounts = {};
1208
+ let recentCount = 0;
1209
+ const recentDays = /* @__PURE__ */ new Set();
1210
+ for (const match of lastmodMatches) {
1211
+ const dateStr = match.replace(/<\/?lastmod>/gi, "").trim();
1212
+ const date = new Date(dateStr);
1213
+ if (isNaN(date.getTime())) continue;
1214
+ const dayKey = date.toISOString().split("T")[0];
1215
+ dayCounts[dayKey] = (dayCounts[dayKey] || 0) + 1;
1216
+ if (date >= ninetyDaysAgo) {
1217
+ recentCount++;
1218
+ recentDays.add(dayKey);
1219
+ }
1220
+ }
1221
+ const totalWithDates = Object.values(dayCounts).reduce((a, b) => a + b, 0);
1222
+ const maxDayCount = Math.max(...Object.values(dayCounts));
1223
+ const isUniform = totalWithDates >= 5 && maxDayCount / totalWithDates > 0.8;
1224
+ let uniformDetail;
1225
+ if (isUniform) {
1226
+ const topDay = Object.entries(dayCounts).find(([, count]) => count === maxDayCount)[0];
1227
+ uniformDetail = `${maxDayCount} of ${totalWithDates} URLs share lastmod date ${topDay} - likely auto-generated by build system`;
1228
+ }
1229
+ return {
1230
+ recentCount,
1231
+ isUniform,
1232
+ uniformDetail,
1233
+ totalWithDates,
1234
+ distinctRecentDays: recentDays.size
1235
+ };
1236
+ }
1237
+ var BLOG_PATH_PATTERNS = /\/(?:blog|articles?|insights?|guides?|resources?|news|posts?|learn|help|how-?to|tutorials?|case-stud|whitepapers?)\b/i;
1238
+ var EXCLUDE_PATH_PATTERNS = /\/(?:tag|category|author|page|feed|wp-content|wp-admin|wp-json|cart|checkout|login|search|api|static|assets|_next)\b/i;
1239
+ function extractBlogUrlsFromSitemap(sitemapText, domain, limit = 5) {
1240
+ const urlBlocks = sitemapText.match(/<url>([\s\S]*?)<\/url>/gi) || [];
1241
+ const candidates = [];
1242
+ const cleanDomain = domain.replace(/^www\./, "").toLowerCase();
1243
+ for (const block of urlBlocks) {
1244
+ const locMatch = block.match(/<loc>([^<]+)<\/loc>/i);
1245
+ if (!locMatch) continue;
1246
+ const url = locMatch[1].trim();
1247
+ try {
1248
+ const parsed = new URL(url);
1249
+ const urlDomain = parsed.hostname.replace(/^www\./, "").toLowerCase();
1250
+ if (urlDomain !== cleanDomain) continue;
1251
+ if (parsed.pathname === "/" || parsed.pathname === "") continue;
1252
+ const path = parsed.pathname.toLowerCase();
1253
+ if (EXCLUDE_PATH_PATTERNS.test(path)) continue;
1254
+ const segments = path.split("/").filter(Boolean);
1255
+ const isBlogPath = BLOG_PATH_PATTERNS.test(path);
1256
+ const isDeepPath = segments.length >= 2;
1257
+ if (!isBlogPath && !isDeepPath) continue;
1258
+ } catch {
1259
+ continue;
1260
+ }
1261
+ const lastmodMatch = block.match(/<lastmod>([^<]+)<\/lastmod>/i);
1262
+ const lastmod = lastmodMatch ? lastmodMatch[1].trim() : "";
1263
+ candidates.push({ url, lastmod });
1264
+ }
1265
+ candidates.sort((a, b) => {
1266
+ if (a.lastmod && b.lastmod) return b.lastmod.localeCompare(a.lastmod);
1267
+ if (a.lastmod) return -1;
1268
+ if (b.lastmod) return 1;
1269
+ return 0;
1270
+ });
1271
+ return candidates.slice(0, limit).map((c) => c.url);
1272
+ }
1273
+ function extractSubSitemapUrl(sitemapText) {
1274
+ if (!sitemapText.includes("<sitemapindex")) return null;
1275
+ const sitemapLocs = sitemapText.match(/<sitemap>[\s\S]*?<loc>([^<]+)<\/loc>[\s\S]*?<\/sitemap>/gi) || [];
1276
+ if (sitemapLocs.length === 0) return null;
1277
+ const urls = sitemapLocs.map((block) => {
1278
+ const match = block.match(/<loc>([^<]+)<\/loc>/i);
1279
+ return match ? match[1].trim() : "";
1280
+ }).filter(Boolean);
1281
+ const preferred = urls.find((u) => /post|blog|article/i.test(u));
1282
+ return preferred || urls[0] || null;
1283
+ }
1284
+ function checkContentVelocity(data) {
1285
+ const findings = [];
1286
+ const sitemap = data.sitemapXml;
1287
+ let score = 0;
1288
+ if (!sitemap || sitemap.status !== 200) {
1289
+ findings.push({ severity: "medium", detail: "No sitemap available to assess content velocity", fix: "Create a sitemap.xml with lastmod dates to signal content publishing frequency" });
1290
+ return { criterion: "content_velocity", criterion_label: "Content Publishing Velocity", score: 0, status: "fail", findings, fix_priority: "P2" };
1291
+ }
1292
+ const analysis = countRecentSitemapDates(sitemap.text);
1293
+ if (analysis.totalWithDates === 0) {
1294
+ findings.push({ severity: "medium", detail: "No lastmod dates in sitemap", fix: "Add lastmod dates to sitemap entries to signal content freshness" });
1295
+ return { criterion: "content_velocity", criterion_label: "Content Publishing Velocity", score: 2, status: "fail", findings, fix_priority: "P2" };
1296
+ }
1297
+ score += 2;
1298
+ findings.push({ severity: "info", detail: `${analysis.totalWithDates} pages have lastmod dates` });
1299
+ const effectiveCount = analysis.isUniform ? analysis.distinctRecentDays : analysis.recentCount;
1300
+ if (analysis.isUniform) {
1301
+ findings.push({ severity: "medium", detail: analysis.uniformDetail, fix: "Set genuine lastmod dates per page reflecting actual content changes, not build timestamps" });
1302
+ }
1303
+ if (effectiveCount >= 20) {
1304
+ score += 8;
1305
+ findings.push({ severity: "info", detail: `${effectiveCount} ${analysis.isUniform ? "distinct dates" : "pages updated"} in last 90 days - excellent content velocity` });
1306
+ } else if (effectiveCount >= 10) {
1307
+ score += 5;
1308
+ findings.push({ severity: "info", detail: `${effectiveCount} ${analysis.isUniform ? "distinct dates" : "pages updated"} in last 90 days - good velocity` });
1309
+ } else if (effectiveCount >= 5) {
1310
+ score += 3;
1311
+ findings.push({ severity: "info", detail: `${effectiveCount} ${analysis.isUniform ? "distinct dates" : "pages updated"} in last 90 days` });
1312
+ } else if (effectiveCount >= 1) {
1313
+ score += 1;
1314
+ findings.push({ severity: "low", detail: `Only ${effectiveCount} ${analysis.isUniform ? "distinct date(s)" : "page(s) updated"} in last 90 days`, fix: "Publish or update content more frequently to signal active maintenance" });
1315
+ } else {
1316
+ findings.push({ severity: "medium", detail: "No pages updated in the last 90 days", fix: "Update existing content and publish new pages regularly" });
1317
+ }
1318
+ return { criterion: "content_velocity", criterion_label: "Content Publishing Velocity", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P2" };
1319
+ }
1320
+ function checkSchemaCoverage(data) {
1321
+ const findings = [];
1322
+ if (!data.homepage) {
1323
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
1324
+ return { criterion: "schema_coverage", criterion_label: "Schema Coverage & Depth", score: 0, status: "not_found", findings, fix_priority: "P2" };
1325
+ }
1326
+ const combinedHtml = getCombinedHtml(data);
1327
+ const html = data.homepage.text;
1328
+ const ldJsonMatches = combinedHtml.match(/<script[^>]*type="application\/ld\+json"[^>]*>([\s\S]*?)<\/script>/gi) || [];
1329
+ let score = 0;
1330
+ if (ldJsonMatches.length === 0) {
1331
+ findings.push({ severity: "critical", detail: "No JSON-LD found - cannot assess schema coverage", fix: "Add JSON-LD schema markup to improve AI engine understanding" });
1332
+ return { criterion: "schema_coverage", criterion_label: "Schema Coverage & Depth", score: 0, status: "fail", findings, fix_priority: "P1" };
1333
+ }
1334
+ const allSchemaText = ldJsonMatches.join(" ");
1335
+ const allSchemaLower = allSchemaText.toLowerCase();
1336
+ const propertyMatches = allSchemaText.match(/"[a-zA-Z@]+"\s*:/g) || [];
1337
+ const uniqueProperties = new Set(propertyMatches.map((p) => p.replace(/[":\s]/g, "").toLowerCase()));
1338
+ if (uniqueProperties.size >= 15) {
1339
+ score += 2;
1340
+ findings.push({ severity: "info", detail: `${uniqueProperties.size} unique schema properties used - rich schema depth` });
1341
+ } else if (uniqueProperties.size >= 5) {
1342
+ score += 2;
1343
+ findings.push({ severity: "info", detail: `${uniqueProperties.size} unique schema properties found` });
1344
+ } else {
1345
+ score += 1;
1346
+ findings.push({ severity: "low", detail: `Only ${uniqueProperties.size} schema properties`, fix: "Add more properties to your schema types for richer AI understanding" });
1347
+ }
1348
+ const orgProps = ["name", "url", "logo", "contactpoint", "sameas", "address", "telephone", "description", "founder", "foundingdate"];
1349
+ const orgPropsFound = orgProps.filter((p) => allSchemaLower.includes(`"${p}"`));
1350
+ if (orgPropsFound.length >= 5) {
1351
+ score += 2;
1352
+ findings.push({ severity: "info", detail: `Organization schema has ${orgPropsFound.length}/10 key properties` });
1353
+ } else if (orgPropsFound.length >= 3) {
1354
+ score += 1;
1355
+ findings.push({ severity: "low", detail: `Organization schema has only ${orgPropsFound.length}/10 key properties`, fix: "Add more Organization properties: logo, contactPoint, sameAs, address" });
1356
+ } else {
1357
+ findings.push({ severity: "medium", detail: `Organization schema has only ${orgPropsFound.length} key properties`, fix: "Add essential Organization properties: name, url, logo, contactPoint, sameAs, address, telephone" });
1358
+ }
1359
+ const articleProps = ["headline", "datepublished", "datemodified", "author", "image", "description", "publisher"];
1360
+ const articlePropsFound = articleProps.filter((p) => allSchemaLower.includes(`"${p}"`));
1361
+ if (articlePropsFound.length >= 4) {
1362
+ score += 2;
1363
+ findings.push({ severity: "info", detail: `Article schema has ${articlePropsFound.length}/7 key properties` });
1364
+ } else if (articlePropsFound.length >= 2) {
1365
+ score += 1;
1366
+ findings.push({ severity: "low", detail: `Article schema has only ${articlePropsFound.length}/7 key properties`, fix: "Add headline, datePublished, dateModified, author, image, and publisher to Article schema" });
1367
+ } else {
1368
+ findings.push({ severity: "medium", detail: "Article schema missing or has fewer than 2 key properties", fix: "Add Article schema with headline, datePublished, author, and publisher properties" });
1369
+ }
1370
+ const hasIdLinking = /"@id"\s*:/i.test(allSchemaText);
1371
+ if (hasIdLinking) {
1372
+ score += 2;
1373
+ findings.push({ severity: "info", detail: "@id linking found - schema types are connected in a graph" });
1374
+ } else {
1375
+ findings.push({ severity: "low", detail: "No @id linking between schema types", fix: "Use @id references to connect schema types (e.g., article.publisher -> organization)" });
1376
+ }
1377
+ const schemaTypes = ["organization", "localbusiness", "faqpage", "service", "article", "webpage", "website", "breadcrumblist", "howto", "product", "person", "event", "offer", "review", "aboutpage"];
1378
+ const foundTypes = schemaTypes.filter((t) => allSchemaLower.includes(`"${t}"`));
1379
+ if (foundTypes.length >= 3) {
1380
+ score += 2;
1381
+ findings.push({ severity: "info", detail: `${foundTypes.length} distinct schema types used: ${foundTypes.join(", ")}` });
1382
+ } else if (foundTypes.length >= 2) {
1383
+ score += 1;
1384
+ findings.push({ severity: "low", detail: `Only ${foundTypes.length} distinct schema types used`, fix: "Add more schema types (FAQPage, BreadcrumbList, Service) for comprehensive AI understanding" });
1385
+ } else {
1386
+ findings.push({ severity: "medium", detail: `Only ${foundTypes.length} schema type(s) found - limited coverage`, fix: "Add multiple schema types (Organization, WebSite, FAQPage, BreadcrumbList) for comprehensive AI understanding" });
1387
+ }
1388
+ return { criterion: "schema_coverage", criterion_label: "Schema Coverage & Depth", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P1" };
1389
+ }
1390
+ function checkSpeakableSchema(data) {
1391
+ const findings = [];
1392
+ if (!data.homepage) {
1393
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
1394
+ return { criterion: "speakable_schema", criterion_label: "Speakable Schema", score: 0, status: "not_found", findings, fix_priority: "P2" };
1395
+ }
1396
+ const combinedHtml = getCombinedHtml(data);
1397
+ const ldJsonMatches = combinedHtml.match(/<script[^>]*type="application\/ld\+json"[^>]*>([\s\S]*?)<\/script>/gi) || [];
1398
+ let score = 0;
1399
+ if (ldJsonMatches.length === 0) {
1400
+ findings.push({ severity: "critical", detail: "No JSON-LD found - cannot assess speakable schema", fix: "Add JSON-LD schema markup with SpeakableSpecification to indicate voice-readable content sections" });
1401
+ return { criterion: "speakable_schema", criterion_label: "Speakable Schema", score: 0, status: "fail", findings, fix_priority: "P2" };
1402
+ }
1403
+ const allSchemaText = ldJsonMatches.join(" ");
1404
+ const allSchemaLower = allSchemaText.toLowerCase();
1405
+ const hasSpeakableType = /speakablespecification/i.test(allSchemaLower);
1406
+ const hasSpeakableProperty = /"speakable"\s*:/i.test(allSchemaText);
1407
+ const hasSpeakable = hasSpeakableType || hasSpeakableProperty;
1408
+ if (!hasSpeakable) {
1409
+ findings.push({ severity: "medium", detail: "No SpeakableSpecification schema found - voice assistants cannot identify readable sections", fix: "Add SpeakableSpecification schema with cssSelector or xpath targeting key content sections (headlines, summaries, FAQ answers)" });
1410
+ return { criterion: "speakable_schema", criterion_label: "Speakable Schema", score: 0, status: "fail", findings, fix_priority: "P2" };
1411
+ }
1412
+ score += 4;
1413
+ findings.push({ severity: "info", detail: "SpeakableSpecification schema detected - voice assistants can identify readable content" });
1414
+ const hasCssSelector = /"cssselector"/i.test(allSchemaLower);
1415
+ const hasXpath = /"xpath"/i.test(allSchemaLower);
1416
+ if (hasCssSelector || hasXpath) {
1417
+ score += 3;
1418
+ const targetType = hasCssSelector && hasXpath ? "cssSelector and xpath" : hasCssSelector ? "cssSelector" : "xpath";
1419
+ findings.push({ severity: "info", detail: `Speakable uses ${targetType} targeting for precise content selection` });
1420
+ } else {
1421
+ findings.push({ severity: "low", detail: "Speakable schema lacks cssSelector or xpath targeting", fix: 'Add cssSelector (e.g., ".article-headline, .article-summary") or xpath to precisely target speakable sections' });
1422
+ }
1423
+ if (data.blogSample && data.blogSample.length > 0) {
1424
+ const blogHtml = data.blogSample.map((p) => p.text).join("\n");
1425
+ const blogHasSpeakable = /speakablespecification/i.test(blogHtml) || /"speakable"\s*:/i.test(blogHtml);
1426
+ if (blogHasSpeakable) {
1427
+ score += 3;
1428
+ findings.push({ severity: "info", detail: "Speakable schema also found in blog/content pages - comprehensive voice coverage" });
1429
+ } else {
1430
+ findings.push({ severity: "low", detail: "Speakable schema only on homepage, not found in blog/content pages", fix: "Add SpeakableSpecification to article pages to make blog content voice-assistant readable" });
1431
+ }
1432
+ } else {
1433
+ findings.push({ severity: "info", detail: "No blog pages sampled - blog speakable coverage not assessed" });
1434
+ }
1435
+ return { criterion: "speakable_schema", criterion_label: "Speakable Schema", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P2" };
1436
+ }
1437
+ function extractRawDataSummary(data) {
1438
+ const html = data.homepage?.text || "";
1439
+ const text = html.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ");
1440
+ const ldJsonMatches = html.match(/<script[^>]*type="application\/ld\+json"[^>]*>([\s\S]*?)<\/script>/gi) || [];
1441
+ const allSchemaText = ldJsonMatches.join(" ").toLowerCase();
1442
+ const schemaTypes = ["organization", "localbusiness", "faqpage", "service", "article", "webpage", "website", "breadcrumblist", "howto", "product"];
1443
+ const foundSchemaTypes = schemaTypes.filter(
1444
+ (t) => allSchemaText.includes(`"${t}"`) || allSchemaText.includes(`"@type":"${t}"`)
1445
+ );
1446
+ const linkMatches = html.match(/<a[^>]*href="([^"]*)"[^>]*>/gi) || [];
1447
+ const internalLinks = linkMatches.filter((l) => {
1448
+ const href = l.match(/href="([^"]*)"/)?.[1] || "";
1449
+ return href.startsWith("/") || href.includes(data.domain);
1450
+ });
1451
+ const externalLinks = linkMatches.filter((l) => {
1452
+ const href = l.match(/href="([^"]*)"/)?.[1] || "";
1453
+ return href.startsWith("http") && !href.includes(data.domain);
1454
+ });
1455
+ const robotsText = (data.robotsTxt?.text || "").toLowerCase();
1456
+ const aiCrawlers = ["gptbot", "claudebot", "perplexitybot", "anthropic", "chatgpt"];
1457
+ const mentionedCrawlers = aiCrawlers.filter((c) => robotsText.includes(c));
1458
+ const blockedCrawlers = mentionedCrawlers.filter((c) => {
1459
+ const sectionRegex = new RegExp(`user-agent:\\s*${c}[^\\S\\n]*\\n([\\s\\S]*?)(?=user-agent:|$)`, "i");
1460
+ const match = sectionRegex.exec(data.robotsTxt?.text || "");
1461
+ if (!match) return false;
1462
+ const section = match[1];
1463
+ if (/^allow:\s*\/\s*$/im.test(section)) return false;
1464
+ return /^disallow:\s*\/\s*$/im.test(section);
1465
+ });
1466
+ const hTagContent = (html.match(/<h[1-6][^>]*>([\s\S]*?)<\/h[1-6]>/gi) || []).map((h) => h.replace(/<[^>]*>/g, ""));
1467
+ const questionHeadings = hTagContent.filter((h) => h.includes("?") || /^(what|how|why|when|who|where|can|do|does|is|are|should)\s/i.test(h));
1468
+ const h1Count = (html.match(/<h1[\s>]/gi) || []).length;
1469
+ const images = html.match(/<img[^>]*>/gi) || [];
1470
+ const imagesWithAlt = images.filter((img) => /alt="[^"]+"/i.test(img));
1471
+ const semanticChecks = ["main", "article", "nav", "header", "footer", "section", "time"];
1472
+ const foundElements = semanticChecks.filter((el) => new RegExp(`<${el}[\\s>]`, "i").test(html));
1473
+ return {
1474
+ domain: data.domain,
1475
+ protocol: data.protocol,
1476
+ homepage_length: html.length,
1477
+ homepage_text_length: text.trim().length,
1478
+ has_https: data.protocol === "https",
1479
+ llms_txt_status: data.llmsTxt && !isHtmlResponse(data.llmsTxt) ? data.llmsTxt.status : null,
1480
+ llms_txt_length: data.llmsTxt?.status === 200 && !isHtmlResponse(data.llmsTxt) ? data.llmsTxt.text.length : 0,
1481
+ robots_txt_status: data.robotsTxt && !isHtmlResponse(data.robotsTxt) ? data.robotsTxt.status : null,
1482
+ robots_txt_snippet: (data.robotsTxt?.text || "").slice(0, 500),
1483
+ robots_txt_ai_crawlers: mentionedCrawlers,
1484
+ robots_txt_blocked_crawlers: blockedCrawlers,
1485
+ schema_types_found: foundSchemaTypes,
1486
+ schema_block_count: ldJsonMatches.length,
1487
+ faq_page_status: data.faqPage?.status ?? null,
1488
+ faq_page_length: data.faqPage?.status === 200 ? data.faqPage.text.length : 0,
1489
+ sitemap_status: data.sitemapXml?.status ?? null,
1490
+ internal_link_count: internalLinks.length,
1491
+ external_link_count: externalLinks.length,
1492
+ question_headings_count: questionHeadings.length,
1493
+ h1_count: h1Count,
1494
+ has_meta_description: /<meta[^>]*name="description"[^>]*>/i.test(html),
1495
+ has_title: /<title[^>]*>[^<]+<\/title>/i.test(html),
1496
+ has_phone: (() => {
1497
+ const phoneMatch = /(?:\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/.test(text);
1498
+ if (!phoneMatch) return false;
1499
+ return /href="tel:/i.test(html) || /"telephone"/i.test(html) || /\b(phone|call|tel:|contact\s*us|fax|dial)\b/i.test(text);
1500
+ })(),
1501
+ has_address: /\d+\s+\w+\s+(street|st|avenue|ave|road|rd|drive|dr|blvd|boulevard|lane|ln|way|court|ct)/i.test(text),
1502
+ has_org_schema: /organization|localbusiness/i.test(html),
1503
+ has_social_links: /sameas|linkedin\.com|facebook\.com|twitter\.com|x\.com/i.test(html),
1504
+ semantic_elements_found: foundElements,
1505
+ img_count: images.length,
1506
+ img_with_alt_count: imagesWithAlt.length,
1507
+ has_lang_attr: /lang="[a-z]{2}"/i.test(html),
1508
+ has_aria: /role="|aria-/i.test(html),
1509
+ has_breadcrumbs: /breadcrumb|aria-label="breadcrumb"/i.test(html),
1510
+ has_nav: /<nav[\s>]/i.test(html),
1511
+ has_footer: /<footer[\s>]/i.test(html),
1512
+ has_case_studies: /case\s+stud|testimonial|success\s+stor|client\s+stor/i.test(text),
1513
+ has_statistics: /\d+%|\d+\s*(patients|clients|customers|cases|years|professionals|specialists|companies|users|businesses|domains|audits)/i.test(text),
1514
+ has_expert_attribution: /written\s+by|authored\s+by|expert|specialist|board.certified|licensed/i.test(text),
1515
+ has_blog_section: /href="[^"]*\/(?:blog|articles|insights|guides|resources)\b[^"]*"/i.test(html),
1516
+ // New criteria fields
1517
+ has_date_modified_schema: /dateModified/i.test(html),
1518
+ time_element_count: (html.match(/<time[\s>]/gi) || []).length,
1519
+ sitemap_url_count: (data.sitemapXml?.text?.match(/<loc>/gi) || []).length,
1520
+ has_rss_feed: !!(data.rssFeed && data.rssFeed.status === 200 && !isHtmlResponse(data.rssFeed)),
1521
+ table_count: (html.match(/<table[\s>]/gi) || []).length,
1522
+ ordered_list_count: (html.match(/<ol[\s>]/gi) || []).length,
1523
+ unordered_list_count: (html.match(/<ul[\s>]/gi) || []).length,
1524
+ definition_pattern_count: (text.match(/\brefers?\s+to\b|\bdefined\s+as\b|\bknown\s+as\b/gi) || []).length,
1525
+ has_ai_txt: !!(data.aiTxt && data.aiTxt.status === 200 && !isHtmlResponse(data.aiTxt)),
1526
+ has_person_schema: /"@type"\s*:\s*"Person"/i.test(html),
1527
+ fact_data_point_count: (text.match(/\d+(?:\.\d+)?(?:\s*%|\s*\$|\s*USD)/g) || []).length,
1528
+ has_canonical: /<link[^>]*rel="canonical"/i.test(html),
1529
+ has_license_schema: /license|copyrightHolder/i.test(html) && /application\/ld\+json/i.test(html),
1530
+ sitemap_recent_lastmod_count: (() => {
1531
+ const analysis = countRecentSitemapDates(data.sitemapXml?.text || "");
1532
+ return analysis.isUniform ? analysis.distinctRecentDays : analysis.recentCount;
1533
+ })(),
1534
+ // Speakable schema fields
1535
+ has_speakable_schema: /speakablespecification/i.test(ldJsonMatches.join(" ")) || /"speakable"\s*:/i.test(ldJsonMatches.join(" ")),
1536
+ speakable_selector_count: (ldJsonMatches.join(" ").match(/"cssselector"|"xpath"/gi) || []).length,
1537
+ // Blog sample fields
1538
+ blog_sample_count: data.blogSample?.length ?? 0,
1539
+ blog_sample_urls: data.blogSample?.map((p) => p.finalUrl || "").filter(Boolean) ?? [],
1540
+ blog_sample_schema_types: (() => {
1541
+ if (!data.blogSample || data.blogSample.length === 0) return [];
1542
+ const blogHtml = data.blogSample.map((p) => p.text).join("\n");
1543
+ const blogLd = blogHtml.match(/<script[^>]*type="application\/ld\+json"[^>]*>([\s\S]*?)<\/script>/gi) || [];
1544
+ const blogSchema = blogLd.join(" ").toLowerCase();
1545
+ const types = ["organization", "localbusiness", "faqpage", "service", "article", "webpage", "website", "breadcrumblist", "howto", "product", "person"];
1546
+ return types.filter((t) => blogSchema.includes(`"${t}"`));
1547
+ })(),
1548
+ blog_sample_question_headings: (() => {
1549
+ if (!data.blogSample || data.blogSample.length === 0) return 0;
1550
+ const blogHtml = data.blogSample.map((p) => p.text).join("\n");
1551
+ const hTags = (blogHtml.match(/<h[1-6][^>]*>([\s\S]*?)<\/h[1-6]>/gi) || []).map((h) => h.replace(/<[^>]*>/g, ""));
1552
+ return hTags.filter((h) => h.includes("?") || /^(what|how|why|when|who|where|can|do|does|is|are|should)\s/i.test(h)).length;
1553
+ })(),
1554
+ blog_sample_faq_schema_found: (() => {
1555
+ if (!data.blogSample || data.blogSample.length === 0) return false;
1556
+ const blogHtml = data.blogSample.map((p) => p.text).join("\n");
1557
+ return /faqpage/i.test(blogHtml) && /application\/ld\+json/i.test(blogHtml);
1558
+ })()
1559
+ };
1560
+ }
1561
+ function auditSiteFromData(data) {
1562
+ return [
1563
+ checkLlmsTxt(data),
1564
+ checkSchemaMarkup(data),
1565
+ checkQAFormat(data),
1566
+ checkCleanHTML(data),
1567
+ checkEntityConsistency(data),
1568
+ checkRobotsTxt(data),
1569
+ checkFAQSection(data),
1570
+ checkOriginalData(data),
1571
+ checkInternalLinking(data),
1572
+ checkSemanticHTML(data),
1573
+ checkContentFreshness(data),
1574
+ checkSitemapCompleteness(data),
1575
+ checkRssFeed(data),
1576
+ checkTableListExtractability(data),
1577
+ checkDefinitionPatterns(data),
1578
+ checkDirectAnswerDensity(data),
1579
+ checkContentLicensing(data),
1580
+ checkAuthorSchemaDepth(data),
1581
+ checkFactDensity(data),
1582
+ checkCanonicalUrl(data),
1583
+ checkContentVelocity(data),
1584
+ checkSchemaCoverage(data),
1585
+ checkSpeakableSchema(data)
1586
+ ];
1587
+ }
1588
+
1589
+ // src/scoring.ts
1590
+ var WEIGHTS = {
1591
+ // Original 10
1592
+ llms_txt: 0.1,
1593
+ schema_markup: 0.15,
1594
+ qa_content_format: 0.15,
1595
+ clean_html: 0.1,
1596
+ entity_consistency: 0.1,
1597
+ robots_txt: 0.05,
1598
+ faq_section: 0.1,
1599
+ original_data: 0.1,
1600
+ internal_linking: 0.1,
1601
+ semantic_html: 0.05,
1602
+ // New 12
1603
+ content_freshness: 0.07,
1604
+ sitemap_completeness: 0.05,
1605
+ rss_feed: 0.03,
1606
+ table_list_extractability: 0.07,
1607
+ definition_patterns: 0.04,
1608
+ direct_answer_density: 0.07,
1609
+ content_licensing: 0.04,
1610
+ author_schema_depth: 0.04,
1611
+ fact_density: 0.05,
1612
+ canonical_url: 0.04,
1613
+ content_velocity: 0.03,
1614
+ schema_coverage: 0.03,
1615
+ speakable_schema: 0.03
1616
+ };
1617
+ function calculateOverallScore(criteria) {
1618
+ let totalWeight = 0;
1619
+ let weightedSum = 0;
1620
+ for (const c of criteria) {
1621
+ const weight = WEIGHTS[c.criterion] ?? 0.1;
1622
+ weightedSum += c.score / 10 * weight * 100;
1623
+ totalWeight += weight;
1624
+ }
1625
+ if (totalWeight === 0) return 0;
1626
+ return Math.round(weightedSum / totalWeight);
1627
+ }
1628
+
1629
+ // src/headless-fetch.ts
1630
+ var SPA_INDICATORS = [
1631
+ // Root mount points (empty or nearly empty, including self-closing)
1632
+ /<div\s+id=["'](root|app|__next|__nuxt|__vue)["'][^>]*(?:\/>|>\s*<\/div>)/i,
1633
+ // Framework globals
1634
+ /__NEXT_DATA__/,
1635
+ /__NUXT__/,
1636
+ // CRA / Vite bundle patterns
1637
+ /src=["'][^"']*\/static\/js\/main\.[a-f0-9]+\.js["']/i,
1638
+ /src=["'][^"']*\/assets\/index-[a-f0-9]+\.js["']/i,
1639
+ // React markers
1640
+ /data-reactroot/i,
1641
+ // Angular
1642
+ /ng-version/i,
1643
+ // Noscript JS warnings
1644
+ /<noscript>[^<]*(?:javascript|enable\s+js|requires?\s+javascript)[^<]*<\/noscript>/i
1645
+ ];
1646
+ function isSpaShell(html) {
1647
+ const text = html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
1648
+ if (text.length >= 500) return false;
1649
+ return SPA_INDICATORS.some((pattern) => pattern.test(html));
1650
+ }
1651
+ function classifyRendering(html) {
1652
+ if (!isSpaShell(html)) return { method: "server", framework: null };
1653
+ const frameworkPatterns = [
1654
+ [/__NEXT_DATA__/, "next"],
1655
+ [/__NUXT__/, "nuxt"],
1656
+ [/<div\s+id=["']__vue["']/i, "vue"],
1657
+ [/ng-version/i, "angular"],
1658
+ [/data-reactroot/i, "react"],
1659
+ [/<div\s+id=["'](root|app)["'][^>]*(?:\/>|>\s*<\/div>)/i, "react"],
1660
+ [/src=["'][^"']*\/static\/js\/main\.[a-f0-9]+\.js["']/i, "react"],
1661
+ [/src=["'][^"']*\/assets\/index-[a-f0-9]+\.js["']/i, "vite"]
1662
+ ];
1663
+ for (const [pattern, framework] of frameworkPatterns) {
1664
+ if (pattern.test(html)) return { method: "client-spa", framework };
1665
+ }
1666
+ return { method: "client-spa", framework: null };
1667
+ }
1668
+ async function fetchWithHeadless(url, options) {
1669
+ let puppeteer;
1670
+ try {
1671
+ const mod = "puppeteer";
1672
+ puppeteer = await import(
1673
+ /* @vite-ignore */
1674
+ mod
1675
+ );
1676
+ } catch {
1677
+ return null;
1678
+ }
1679
+ const timeout = options?.timeout ?? 25e3;
1680
+ let browser = null;
1681
+ try {
1682
+ browser = await puppeteer.launch({
1683
+ headless: true,
1684
+ args: [
1685
+ "--no-sandbox",
1686
+ "--disable-setuid-sandbox",
1687
+ "--disable-dev-shm-usage",
1688
+ "--disable-gpu",
1689
+ "--single-process"
1690
+ ]
1691
+ });
1692
+ const page = await browser.newPage();
1693
+ await page.setRequestInterception(true);
1694
+ page.on("request", (req) => {
1695
+ const type = req.resourceType();
1696
+ if (["image", "font", "media", "stylesheet"].includes(type)) {
1697
+ req.abort();
1698
+ } else {
1699
+ req.continue();
1700
+ }
1701
+ });
1702
+ await page.setUserAgent("AEO-Visibility-Bot/1.0");
1703
+ await page.goto(url, { waitUntil: "networkidle2", timeout });
1704
+ try {
1705
+ await page.waitForFunction(
1706
+ 'document.body && document.body.innerText && document.body.innerText.replace(/\\s+/g, " ").trim().length > 100',
1707
+ { timeout: 5e3 }
1708
+ );
1709
+ } catch {
1710
+ }
1711
+ const html = await page.content();
1712
+ const finalUrl = page.url();
1713
+ return {
1714
+ text: html.slice(0, 5e5),
1715
+ status: 200,
1716
+ finalUrl
1717
+ };
1718
+ } catch {
1719
+ return null;
1720
+ } finally {
1721
+ if (browser) {
1722
+ try {
1723
+ await browser.close();
1724
+ } catch {
1725
+ }
1726
+ }
1727
+ }
1728
+ }
1729
+
1730
+ // src/scorecard-builder.ts
1731
+ var CRITERION_LABELS = {
1732
+ "llms.txt File": "llms.txt File",
1733
+ "Schema.org Structured Data": "Schema.org Structured Data",
1734
+ "Q&A Content Format": "Q&A Content Format",
1735
+ "Clean, Crawlable HTML": "Clean, Crawlable HTML",
1736
+ "Entity Authority & E-E-A-T": "Entity Authority & NAP Consistency",
1737
+ "robots.txt for AI Crawlers": "robots.txt for AI Crawlers",
1738
+ "Comprehensive FAQ Sections": "Comprehensive FAQ Section",
1739
+ "Original Data & Expert Content": "Original Data & Expert Analysis",
1740
+ "Internal Linking Architecture": "Internal Linking Structure",
1741
+ "Semantic HTML5 & Accessibility": "Semantic HTML5 & Accessibility",
1742
+ "Content Freshness Signals": "Content Freshness Signals",
1743
+ "Sitemap Completeness": "Sitemap Completeness",
1744
+ "RSS/Atom Feed": "RSS/Atom Feed",
1745
+ "Table & List Extractability": "Table & List Extractability",
1746
+ "Definition Patterns": "Definition Patterns",
1747
+ "Direct Answer Paragraphs": "Direct Answer Paragraphs",
1748
+ "Content Licensing & AI Permissions": "Content Licensing & AI Permissions",
1749
+ "Author & Expert Schema": "Author & Expert Schema",
1750
+ "Fact & Data Density": "Fact & Data Density",
1751
+ "Canonical URL Strategy": "Canonical URL Strategy",
1752
+ "Content Publishing Velocity": "Content Publishing Velocity",
1753
+ "Schema Coverage & Depth": "Schema Coverage & Depth",
1754
+ "Speakable Schema": "Speakable Schema"
1755
+ };
1756
+ function scoreToStatus(score) {
1757
+ if (score === 0) return "MISSING";
1758
+ if (score === 1) return "NEARLY EMPTY";
1759
+ if (score === 2) return "POOR";
1760
+ if (score === 3) return "WEAK";
1761
+ if (score <= 5) return "PARTIAL";
1762
+ if (score === 6) return "MODERATE";
1763
+ if (score === 7) return "GOOD";
1764
+ return "STRONG";
1765
+ }
1766
+ function mapFindingSeverity(severity) {
1767
+ switch (severity) {
1768
+ case "critical":
1769
+ return "CRITICAL";
1770
+ case "high":
1771
+ return "MISSING";
1772
+ case "medium":
1773
+ return "ADD";
1774
+ case "low":
1775
+ return "PARTIAL";
1776
+ case "info":
1777
+ return "WORKING";
1778
+ default:
1779
+ return "PARTIAL";
1780
+ }
1781
+ }
1782
+ function mapFindingType(severity, hasFix) {
1783
+ if (severity === "info") return "Good";
1784
+ if (severity === "critical") return "Critical";
1785
+ if (severity === "high") return "Missing";
1786
+ if (hasFix) return "Issue";
1787
+ return "Note";
1788
+ }
1789
+ function buildScorecard(results) {
1790
+ return results.map((r, i) => {
1791
+ const label = CRITERION_LABELS[r.criterion_label] || r.criterion_label;
1792
+ const keyParts = [];
1793
+ for (const f of r.findings) {
1794
+ if (keyParts.length >= 3) break;
1795
+ keyParts.push(f.detail);
1796
+ }
1797
+ const keyFindings = keyParts.join(". ") + (keyParts.length > 0 && !keyParts[keyParts.length - 1].endsWith(".") ? "." : "");
1798
+ return {
1799
+ id: i + 1,
1800
+ criterion: label,
1801
+ score: r.score,
1802
+ status: scoreToStatus(r.score),
1803
+ keyFindings
1804
+ };
1805
+ });
1806
+ }
1807
+ function buildDetailedFindings(results) {
1808
+ return results.map((r, i) => {
1809
+ const label = CRITERION_LABELS[r.criterion_label] || r.criterion_label;
1810
+ const rawFindings = r.findings.map((f) => ({
1811
+ type: mapFindingType(f.severity, !!f.fix),
1812
+ description: f.fix ? `${f.detail}. ${f.fix}` : f.detail,
1813
+ severity: mapFindingSeverity(f.severity)
1814
+ }));
1815
+ const seen = /* @__PURE__ */ new Set();
1816
+ const findings = [];
1817
+ for (const f of rawFindings) {
1818
+ if (!seen.has(f.description)) {
1819
+ seen.add(f.description);
1820
+ findings.push(f);
1821
+ }
1822
+ }
1823
+ if (findings.length < 2) {
1824
+ if (r.score >= 7) {
1825
+ findings.push({ type: "Good", description: `${label} is well-implemented for AI engine visibility.`, severity: "WORKING" });
1826
+ } else {
1827
+ findings.push({ type: "Note", description: `${label} needs improvement - review specific issues above.`, severity: "PARTIAL" });
1828
+ }
1829
+ }
1830
+ return {
1831
+ id: i + 1,
1832
+ name: label,
1833
+ findings
1834
+ };
1835
+ });
1836
+ }
1837
+
1838
+ // src/narrative-generator.ts
1839
+ var CRITERION_WEIGHTS = {
1840
+ llms_txt: 0.1,
1841
+ schema_markup: 0.15,
1842
+ qa_content_format: 0.15,
1843
+ clean_html: 0.1,
1844
+ entity_consistency: 0.1,
1845
+ robots_txt: 0.05,
1846
+ faq_section: 0.1,
1847
+ original_data: 0.1,
1848
+ internal_linking: 0.1,
1849
+ semantic_html: 0.05,
1850
+ content_freshness: 0.07,
1851
+ sitemap_completeness: 0.05,
1852
+ rss_feed: 0.03,
1853
+ table_list_extractability: 0.07,
1854
+ definition_patterns: 0.04,
1855
+ direct_answer_density: 0.07,
1856
+ content_licensing: 0.04,
1857
+ author_schema_depth: 0.04,
1858
+ fact_density: 0.05,
1859
+ canonical_url: 0.04,
1860
+ content_velocity: 0.03,
1861
+ schema_coverage: 0.03,
1862
+ speakable_schema: 0.03
1863
+ };
1864
+ var OPPORTUNITY_TEMPLATES = {
1865
+ llms_txt: {
1866
+ name: "Create llms.txt File",
1867
+ effort: "Low",
1868
+ description: "Add a /llms.txt file that describes your site, core services, and key pages in markdown format. This helps AI engines like ChatGPT and Claude understand your site structure and content offerings."
1869
+ },
1870
+ schema_markup: {
1871
+ name: "Add Schema.org Structured Data",
1872
+ effort: "Medium",
1873
+ description: "Implement JSON-LD structured data (Organization, Service, Product, FAQPage) on key pages. Schema markup helps AI engines extract and cite your content accurately."
1874
+ },
1875
+ qa_content_format: {
1876
+ name: "Restructure Content as Q&A",
1877
+ effort: "Medium",
1878
+ description: 'Add question-based headings (H2/H3) throughout your content. Use "What is...", "How does...", "Why should..." patterns that match how users query AI assistants.'
1879
+ },
1880
+ clean_html: {
1881
+ name: "Fix HTML Structure & Enable HTTPS",
1882
+ effort: "Medium",
1883
+ description: "Ensure clean, well-structured HTML with proper meta tags, semantic elements, and HTTPS. Clean HTML makes your content more parseable by AI crawlers."
1884
+ },
1885
+ entity_consistency: {
1886
+ name: "Strengthen Entity Authority (NAP)",
1887
+ effort: "Low",
1888
+ description: "Add Organization schema with consistent name, address, phone (NAP). Include sameAs links to social profiles and authoritative directories to strengthen entity recognition."
1889
+ },
1890
+ robots_txt: {
1891
+ name: "Configure robots.txt for AI Crawlers",
1892
+ effort: "Low",
1893
+ description: "Update robots.txt to explicitly allow AI crawlers (GPTBot, ClaudeBot, PerplexityBot). Add a Sitemap directive to help crawlers discover your content."
1894
+ },
1895
+ faq_section: {
1896
+ name: "Build Comprehensive FAQ Section",
1897
+ effort: "Medium",
1898
+ description: "Create a dedicated FAQ page with FAQPage schema markup. Cover common questions about your products, services, and industry to become a direct answer source for AI engines."
1899
+ },
1900
+ original_data: {
1901
+ name: "Add Original Data & Case Studies",
1902
+ effort: "High",
1903
+ description: "Publish original research, statistics, case studies, or proprietary data that AI engines can cite. Unique data points make your content a primary source rather than a derivative one."
1904
+ },
1905
+ internal_linking: {
1906
+ name: "Improve Internal Linking Architecture",
1907
+ effort: "Medium",
1908
+ description: "Strengthen internal linking with descriptive anchor text between related pages. Add breadcrumb navigation and ensure every key page is reachable within 3 clicks from the homepage."
1909
+ },
1910
+ semantic_html: {
1911
+ name: "Implement Semantic HTML5",
1912
+ effort: "Low",
1913
+ description: "Use semantic HTML5 elements (main, article, nav, header, footer, section) to give AI parsers clear content structure. Add lang attribute and ARIA labels for accessibility."
1914
+ },
1915
+ content_freshness: {
1916
+ name: "Add Content Freshness Signals",
1917
+ effort: "Low",
1918
+ description: "Include dateModified schema, visible last-updated dates, and time elements on content pages. Fresh content signals help AI engines prioritize your pages over stale alternatives."
1919
+ },
1920
+ sitemap_completeness: {
1921
+ name: "Create Complete Sitemap",
1922
+ effort: "Low",
1923
+ description: "Generate a comprehensive sitemap.xml with lastmod dates for all important pages. A complete sitemap ensures AI crawlers can discover and prioritize your full content library."
1924
+ },
1925
+ rss_feed: {
1926
+ name: "Deploy RSS/Atom Feed",
1927
+ effort: "Low",
1928
+ description: "Add an RSS or Atom feed linked from your homepage. Feeds signal active content publishing and give AI engines a structured way to track your latest content."
1929
+ },
1930
+ table_list_extractability: {
1931
+ name: "Add Structured Tables & Lists",
1932
+ effort: "Medium",
1933
+ description: "Use HTML tables for comparison data and ordered/unordered lists for features, steps, and specifications. Structured data formats are directly extractable by AI engines for answers."
1934
+ },
1935
+ definition_patterns: {
1936
+ name: "Add Definition-Style Content",
1937
+ effort: "Low",
1938
+ description: 'Include clear definition patterns ("X refers to...", "X is defined as...") for key terms and concepts. Definition-style content is highly citable by AI engines answering "what is" queries.'
1939
+ },
1940
+ direct_answer_density: {
1941
+ name: "Add Direct Answer Paragraphs",
1942
+ effort: "Medium",
1943
+ description: 'Write concise, standalone answer paragraphs (2-3 sentences) immediately after question headings. These "snippet-ready" paragraphs are ideal for AI engine citations.'
1944
+ },
1945
+ content_licensing: {
1946
+ name: "Add Content Licensing & ai.txt",
1947
+ effort: "Low",
1948
+ description: "Create an /ai.txt file specifying AI usage permissions and add license schema to your structured data. Clear licensing signals help AI engines understand how they can use your content."
1949
+ },
1950
+ author_schema_depth: {
1951
+ name: "Enhance Author & Expert Schema",
1952
+ effort: "Low",
1953
+ description: "Add Person schema for content authors with credentials, expertise, and sameAs links. Expert attribution strengthens E-E-A-T signals that AI engines use to evaluate source credibility."
1954
+ },
1955
+ fact_density: {
1956
+ name: "Increase Fact & Data Density",
1957
+ effort: "Medium",
1958
+ description: "Add specific numbers, percentages, statistics, and data points throughout your content. Fact-dense content gives AI engines concrete data to cite rather than vague claims."
1959
+ },
1960
+ canonical_url: {
1961
+ name: "Fix Canonical URL Strategy",
1962
+ effort: "Low",
1963
+ description: 'Add rel="canonical" tags to all pages pointing to the preferred URL version. Canonical URLs prevent duplicate content confusion and consolidate AI engine citations to a single authoritative URL.'
1964
+ },
1965
+ content_velocity: {
1966
+ name: "Increase Publishing Frequency",
1967
+ effort: "High",
1968
+ description: "Establish a regular content publishing cadence with dated entries in your sitemap. Consistent publishing signals to AI engines that your site is an active, current information source."
1969
+ },
1970
+ schema_coverage: {
1971
+ name: "Deepen Schema Coverage",
1972
+ effort: "Medium",
1973
+ description: "Extend structured data beyond the homepage to inner pages (articles, services, products). Consistent schema coverage across your site helps AI engines understand your full content depth."
1974
+ },
1975
+ speakable_schema: {
1976
+ name: "Add Speakable Schema",
1977
+ effort: "Low",
1978
+ description: "Add SpeakableSpecification schema with CSS selectors pointing to key content sections. This tells voice assistants and AI engines which parts of your page are most suitable for spoken answers."
1979
+ }
1980
+ };
1981
+ function calculateImpact(score, weight, effort) {
1982
+ const impactScore = (10 - score) * weight * 100;
1983
+ if (effort === "Low" && impactScore >= 3) return "QUICK WIN";
1984
+ if (impactScore >= 12) return "CRITICAL";
1985
+ if (impactScore >= 8) return "HIGH";
1986
+ if (impactScore >= 5) return "CORE AEO";
1987
+ if (impactScore >= 3) return "MEDIUM";
1988
+ return "LOW";
1989
+ }
1990
+ function generateVerdict(score, scorecard, rawData, domain) {
1991
+ let opening;
1992
+ if (score >= 86) {
1993
+ opening = `Excellent AEO implementation scoring ${score}/100.`;
1994
+ } else if (score >= 71) {
1995
+ opening = `Strong AEO fundamentals scoring ${score}/100 with room for optimization.`;
1996
+ } else if (score >= 56) {
1997
+ opening = `Moderate AEO readiness at ${score}/100 with significant gaps to address.`;
1998
+ } else if (score >= 41) {
1999
+ opening = `Below-average AEO readiness at ${score}/100 - multiple areas need attention.`;
2000
+ } else {
2001
+ opening = `Critical AEO gaps at ${score}/100 - ${domain} is largely invisible to AI engines.`;
2002
+ }
2003
+ const strengths = scorecard.filter((s) => s.score >= 8).sort((a, b) => b.score - a.score).slice(0, 3);
2004
+ const weaknesses = scorecard.filter((s) => s.score <= 4).sort((a, b) => a.score - b.score).slice(0, 3);
2005
+ const parts = [opening];
2006
+ if (strengths.length > 0) {
2007
+ const names = strengths.map((s) => s.criterion);
2008
+ parts.push(`Key strengths include ${formatList(names)}.`);
2009
+ }
2010
+ if (weaknesses.length > 0) {
2011
+ const names = weaknesses.map((s) => s.criterion);
2012
+ parts.push(`Priority gaps: ${formatList(names)}.`);
2013
+ }
2014
+ if (!rawData.has_https) {
2015
+ parts.push("HTTPS is not enabled, which caps several criteria scores and reduces AI crawler trust.");
2016
+ }
2017
+ if (rawData.rendered_with_headless) {
2018
+ parts.push("Note: this site uses client-side JavaScript rendering. AI crawlers see an empty page shell instead of content, which is the primary factor limiting the score.");
2019
+ }
2020
+ return parts.join(" ");
2021
+ }
2022
+ function generateOpportunities(scorecard, criterionResults) {
2023
+ const candidates = [];
2024
+ for (const result of criterionResults) {
2025
+ if (result.score > 7) continue;
2026
+ const weight = CRITERION_WEIGHTS[result.criterion] ?? 0.05;
2027
+ const template = OPPORTUNITY_TEMPLATES[result.criterion];
2028
+ if (!template) continue;
2029
+ const impactScore = (10 - result.score) * weight * 100;
2030
+ const impact = calculateImpact(result.score, weight, template.effort);
2031
+ candidates.push({
2032
+ criterion: result.criterion,
2033
+ score: result.score,
2034
+ weight,
2035
+ impactScore,
2036
+ template,
2037
+ impact
2038
+ });
2039
+ }
2040
+ candidates.sort((a, b) => b.impactScore - a.impactScore);
2041
+ const top = candidates.slice(0, 10);
2042
+ return top.map((c, i) => ({
2043
+ id: i + 1,
2044
+ name: c.template.name,
2045
+ description: c.template.description,
2046
+ effort: c.template.effort,
2047
+ impact: c.impact
2048
+ }));
2049
+ }
2050
+ function generatePitchNumbers(score, rawData, scorecard) {
2051
+ const metrics = [];
2052
+ if (rawData.rendered_with_headless) {
2053
+ metrics.push({
2054
+ metric: "Rendering Method",
2055
+ value: "Client-Side Only",
2056
+ significance: "AI crawlers see empty HTML. All content loads via JavaScript, making this site invisible to ChatGPT, Claude, and Perplexity."
2057
+ });
2058
+ }
2059
+ metrics.push({
2060
+ metric: "AEO Score",
2061
+ value: `${score}/100`,
2062
+ significance: score >= 70 ? "Above average AI engine visibility" : score >= 50 ? "Moderate AI visibility with clear improvement paths" : "Below average - significant optimization needed"
2063
+ });
2064
+ const schemaCount = rawData.schema_types_found.length;
2065
+ metrics.push({
2066
+ metric: "Schema Types",
2067
+ value: `${schemaCount} found`,
2068
+ significance: schemaCount >= 4 ? "Rich structured data helps AI engines parse content" : schemaCount >= 1 ? "Basic schema present but more types would improve AI extraction" : "No structured data - AI engines cannot reliably extract content"
2069
+ });
2070
+ const aiCrawlerCount = rawData.robots_txt_ai_crawlers.length;
2071
+ const blockedCount = rawData.robots_txt_blocked_crawlers.length;
2072
+ metrics.push({
2073
+ metric: "AI Crawler Access",
2074
+ value: blockedCount > 0 ? `${blockedCount} blocked` : aiCrawlerCount > 0 ? `${aiCrawlerCount} configured` : "Not configured",
2075
+ significance: blockedCount > 0 ? "Active AI crawlers are blocked from accessing content" : aiCrawlerCount > 0 ? "robots.txt explicitly addresses AI crawler access" : "No AI-specific crawler directives in robots.txt"
2076
+ });
2077
+ const sitemapUrls = rawData.sitemap_url_count;
2078
+ metrics.push({
2079
+ metric: "Sitemap URLs",
2080
+ value: sitemapUrls > 0 ? `${sitemapUrls} pages` : "No sitemap",
2081
+ significance: sitemapUrls >= 50 ? "Comprehensive content library discoverable by AI crawlers" : sitemapUrls >= 10 ? "Moderate content footprint in sitemap" : sitemapUrls > 0 ? "Small sitemap - expanding content improves AI coverage" : "No sitemap means AI crawlers must discover pages via links only"
2082
+ });
2083
+ const linkCount = rawData.internal_link_count;
2084
+ metrics.push({
2085
+ metric: "Internal Links",
2086
+ value: `${linkCount} links`,
2087
+ significance: linkCount >= 30 ? "Strong internal linking supports AI content discovery" : linkCount >= 10 ? "Moderate linking - adding more cross-references improves navigability" : "Weak internal linking limits AI crawler depth"
2088
+ });
2089
+ const questionCount = rawData.question_headings_count + (rawData.blog_sample_question_headings || 0);
2090
+ if (questionCount > 0) {
2091
+ metrics.push({
2092
+ metric: "Question Headings",
2093
+ value: `${questionCount} found`,
2094
+ significance: "Question-based headings match how users query AI assistants"
2095
+ });
2096
+ }
2097
+ const passing = scorecard.filter((s) => s.score >= 7).length;
2098
+ metrics.push({
2099
+ metric: "Criteria Passing",
2100
+ value: `${passing}/23`,
2101
+ significance: passing >= 18 ? "Excellent coverage across AEO dimensions" : passing >= 12 ? "Good foundation with room to improve remaining criteria" : `${23 - passing} criteria need attention for full AI visibility`
2102
+ });
2103
+ return metrics;
2104
+ }
2105
+ function generateBottomLine(score, opportunities, scorecard, domain) {
2106
+ const quickWins = opportunities.filter((o) => o.impact === "QUICK WIN");
2107
+ const criticalOps = opportunities.filter((o) => o.impact === "CRITICAL" || o.impact === "HIGH");
2108
+ const passing = scorecard.filter((s) => s.score >= 7).length;
2109
+ const total = scorecard.length;
2110
+ let summary;
2111
+ if (score >= 86) {
2112
+ summary = `${domain} demonstrates excellent AI engine optimization with ${passing}/${total} criteria at good or strong levels. Focus on maintaining content freshness and expanding structured data coverage to stay ahead.`;
2113
+ } else if (score >= 71) {
2114
+ summary = `${domain} has a solid AEO foundation with ${passing}/${total} criteria passing.`;
2115
+ if (quickWins.length > 0) {
2116
+ summary += ` ${quickWins.length} quick wins available: ${quickWins.slice(0, 3).map((q) => q.name).join(", ")}.`;
2117
+ }
2118
+ if (criticalOps.length > 0) {
2119
+ summary += ` Address ${criticalOps.length} high-impact opportunities to push the score above 85.`;
2120
+ }
2121
+ } else if (score >= 56) {
2122
+ summary = `${domain} has moderate AI visibility with ${passing}/${total} criteria passing. ${opportunities.length} improvement opportunities identified.`;
2123
+ if (quickWins.length > 0) {
2124
+ summary += ` Start with quick wins: ${quickWins.slice(0, 3).map((q) => q.name).join(", ")}.`;
2125
+ }
2126
+ } else if (score >= 41) {
2127
+ summary = `${domain} needs significant AEO work with only ${passing}/${total} criteria passing.`;
2128
+ if (criticalOps.length > 0) {
2129
+ summary += ` Priority: ${criticalOps.slice(0, 3).map((c) => c.name).join(", ")}.`;
2130
+ }
2131
+ summary += ` Implementing the top ${Math.min(5, opportunities.length)} recommendations could improve the score by 15-25 points.`;
2132
+ } else {
2133
+ summary = `${domain} is largely invisible to AI engines with only ${passing}/${total} criteria passing. Fundamental AEO infrastructure is missing.`;
2134
+ if (opportunities.length > 0) {
2135
+ summary += ` Start with: ${opportunities.slice(0, 3).map((o) => o.name).join(", ")}.`;
2136
+ }
2137
+ summary += ` A comprehensive AEO implementation could transform AI visibility from near-zero to competitive.`;
2138
+ }
2139
+ return summary;
2140
+ }
2141
+ function formatList(items) {
2142
+ if (items.length === 0) return "";
2143
+ if (items.length === 1) return items[0];
2144
+ if (items.length === 2) return `${items[0]} and ${items[1]}`;
2145
+ return `${items.slice(0, -1).join(", ")}, and ${items[items.length - 1]}`;
2146
+ }
2147
+
2148
+ // src/multi-page-fetcher.ts
2149
+ async function fetchPage(url, timeoutMs = 1e4) {
2150
+ try {
2151
+ const res = await fetch(url, {
2152
+ signal: AbortSignal.timeout(timeoutMs),
2153
+ headers: { "User-Agent": "AEO-Visibility-Bot/1.0" },
2154
+ redirect: "follow"
2155
+ });
2156
+ if (res.status !== 200) return null;
2157
+ const text = await res.text();
2158
+ if (text.length < 200) return null;
2159
+ return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
2160
+ } catch {
2161
+ return null;
2162
+ }
2163
+ }
2164
+ var PAGE_VARIANTS = {
2165
+ about: ["/about", "/about-us", "/company", "/who-we-are"],
2166
+ pricing: ["/pricing", "/plans", "/packages"],
2167
+ services: ["/services", "/features", "/solutions", "/products", "/what-we-do"],
2168
+ contact: ["/contact", "/contact-us", "/get-in-touch"],
2169
+ team: ["/team", "/our-team", "/authors", "/people", "/leadership"],
2170
+ resources: ["/resources", "/resource-center", "/library"],
2171
+ docs: ["/docs", "/documentation", "/help", "/help-center", "/support"],
2172
+ cases: ["/case-studies", "/customers", "/success-stories", "/testimonials"]
2173
+ };
2174
+ function extractNavLinks(html, domain) {
2175
+ const navBlocks = html.match(/<nav[\s\S]*?<\/nav>/gi) || [];
2176
+ const navHtml = navBlocks.join("\n");
2177
+ const hrefMatches = navHtml.match(/href="([^"#]*)"/gi) || [];
2178
+ const paths = /* @__PURE__ */ new Set();
2179
+ const cleanDomain = domain.replace(/^www\./, "").toLowerCase();
2180
+ for (const match of hrefMatches) {
2181
+ const href = match.match(/href="([^"#]*)"/i)?.[1];
2182
+ if (!href) continue;
2183
+ let path;
2184
+ if (href.startsWith("/")) {
2185
+ path = href;
2186
+ } else if (href.startsWith("http")) {
2187
+ try {
2188
+ const url = new URL(href);
2189
+ const linkDomain = url.hostname.replace(/^www\./, "").toLowerCase();
2190
+ if (linkDomain !== cleanDomain) continue;
2191
+ path = url.pathname;
2192
+ } catch {
2193
+ continue;
2194
+ }
2195
+ } else {
2196
+ continue;
2197
+ }
2198
+ path = path.replace(/\/+$/, "") || "/";
2199
+ if (path === "/") continue;
2200
+ if (path.includes("#")) continue;
2201
+ if (/\.(js|css|png|jpg|svg|ico|pdf|xml|txt)$/i.test(path)) continue;
2202
+ if (/^\/(api|wp-|static|assets|_next|auth|login|signup|cart|checkout)\b/i.test(path)) continue;
2203
+ paths.add(path);
2204
+ }
2205
+ return Array.from(paths);
2206
+ }
2207
+ function extractContentPagesFromSitemap(sitemapText, domain, limit = 6) {
2208
+ const urlBlocks = sitemapText.match(/<url>([\s\S]*?)<\/url>/gi) || [];
2209
+ const cleanDomain = domain.replace(/^www\./, "").toLowerCase();
2210
+ const candidates = [];
2211
+ const skipPatterns = /\/(?:blog|articles?|posts?|news|tag|category|author|feed|faq|about|pricing|contact|team|resources?|docs?|documentation|help|support|case-studies|customers|testimonials|sitemap|wp-|api|login|cart|checkout|search)\b/i;
2212
+ for (const block of urlBlocks) {
2213
+ const locMatch = block.match(/<loc>([^<]+)<\/loc>/i);
2214
+ if (!locMatch) continue;
2215
+ const url = locMatch[1].trim();
2216
+ try {
2217
+ const parsed = new URL(url);
2218
+ const urlDomain = parsed.hostname.replace(/^www\./, "").toLowerCase();
2219
+ if (urlDomain !== cleanDomain) continue;
2220
+ if (parsed.pathname === "/" || parsed.pathname === "") continue;
2221
+ const path = parsed.pathname.toLowerCase();
2222
+ if (skipPatterns.test(path)) continue;
2223
+ const segments = path.split("/").filter(Boolean);
2224
+ if (segments.length < 1 || segments.length > 3) continue;
2225
+ candidates.push(url);
2226
+ } catch {
2227
+ continue;
2228
+ }
2229
+ }
2230
+ if (candidates.length <= limit) return candidates;
2231
+ const result = [];
2232
+ for (let i = 0; i < limit; i++) {
2233
+ const index = Math.round(i * (candidates.length - 1) / (limit - 1));
2234
+ result.push(candidates[index]);
2235
+ }
2236
+ return result;
2237
+ }
2238
+ async function fetchMultiPageData(siteData, options) {
2239
+ if (!siteData.protocol || !siteData.homepage) return 0;
2240
+ const timeoutMs = options?.timeoutMs ?? 1e4;
2241
+ const baseUrl = `${siteData.protocol}://${siteData.domain}`;
2242
+ const existingUrls = /* @__PURE__ */ new Set();
2243
+ existingUrls.add(baseUrl + "/");
2244
+ existingUrls.add(baseUrl);
2245
+ if (siteData.blogSample) {
2246
+ for (const page of siteData.blogSample) {
2247
+ if (page.finalUrl) existingUrls.add(page.finalUrl);
2248
+ }
2249
+ }
2250
+ const urlsToFetch = /* @__PURE__ */ new Map();
2251
+ const navPaths = extractNavLinks(siteData.homepage.text, siteData.domain);
2252
+ for (const [category, variants] of Object.entries(PAGE_VARIANTS)) {
2253
+ const navMatch = navPaths.find(
2254
+ (p) => variants.some((v) => p.toLowerCase() === v || p.toLowerCase().startsWith(v + "/"))
2255
+ );
2256
+ if (navMatch) {
2257
+ const url = `${baseUrl}${navMatch}`;
2258
+ if (!existingUrls.has(url)) urlsToFetch.set(url, category);
2259
+ } else {
2260
+ const url = `${baseUrl}${variants[0]}`;
2261
+ if (!existingUrls.has(url)) urlsToFetch.set(url, category);
2262
+ }
2263
+ }
2264
+ if (siteData.sitemapXml && siteData.sitemapXml.status === 200) {
2265
+ const contentUrls = extractContentPagesFromSitemap(
2266
+ siteData.sitemapXml.text,
2267
+ siteData.domain,
2268
+ 6
2269
+ );
2270
+ for (const url of contentUrls) {
2271
+ if (!existingUrls.has(url)) urlsToFetch.set(url, "content");
2272
+ }
2273
+ }
2274
+ const entries = Array.from(urlsToFetch.entries());
2275
+ if (entries.length === 0) return 0;
2276
+ const results = await Promise.all(entries.map(([url]) => fetchPage(url, timeoutMs)));
2277
+ if (!siteData.blogSample) siteData.blogSample = [];
2278
+ let added = 0;
2279
+ for (let i = 0; i < results.length; i++) {
2280
+ const result = results[i];
2281
+ if (result && result.text.length > 500) {
2282
+ result.category = entries[i][1];
2283
+ siteData.blogSample.push(result);
2284
+ added++;
2285
+ }
2286
+ }
2287
+ return added;
2288
+ }
2289
+
2290
+ // src/page-analyzer.ts
2291
+ function extractTitle(html) {
2292
+ const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
2293
+ return match ? match[1].replace(/\s+/g, " ").trim() : "";
2294
+ }
2295
+ function getTextContent(html) {
2296
+ return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
2297
+ }
2298
+ function countWords(text) {
2299
+ if (!text) return 0;
2300
+ return text.split(/\s+/).filter((w) => w.length > 0).length;
2301
+ }
2302
+ function checkMissingTitle(html) {
2303
+ const hasTitle = /<title[^>]*>[\s\S]*?<\/title>/i.test(html);
2304
+ if (!hasTitle) {
2305
+ return { check: "missing-title", label: "Missing <title> tag", severity: "error" };
2306
+ }
2307
+ const title = extractTitle(html);
2308
+ if (!title) {
2309
+ return { check: "missing-title", label: "Empty <title> tag", severity: "error" };
2310
+ }
2311
+ return null;
2312
+ }
2313
+ function checkMissingMetaDescription(html) {
2314
+ const hasDesc = /<meta\s[^>]*name=["']description["'][^>]*content=["'][^"']+["']/i.test(html) || /<meta\s[^>]*content=["'][^"']+["'][^>]*name=["']description["']/i.test(html);
2315
+ if (!hasDesc) {
2316
+ return { check: "missing-meta-description", label: "Missing meta description", severity: "error" };
2317
+ }
2318
+ return null;
2319
+ }
2320
+ function checkNoH1(html) {
2321
+ const h1Matches = html.match(/<h1[\s>]/gi);
2322
+ if (!h1Matches || h1Matches.length === 0) {
2323
+ return { check: "no-h1", label: "No <h1> tag", severity: "error" };
2324
+ }
2325
+ return null;
2326
+ }
2327
+ function checkMultipleH1(html) {
2328
+ const h1Matches = html.match(/<h1[\s>]/gi);
2329
+ if (h1Matches && h1Matches.length > 1) {
2330
+ return { check: "multiple-h1", label: `Multiple <h1> tags (${h1Matches.length})`, severity: "warning" };
2331
+ }
2332
+ return null;
2333
+ }
2334
+ function checkNoSchema(html) {
2335
+ const hasLdJson = /<script[^>]*type=["']application\/ld\+json["'][^>]*>/i.test(html);
2336
+ if (!hasLdJson) {
2337
+ return { check: "no-schema", label: "No JSON-LD structured data", severity: "warning" };
2338
+ }
2339
+ return null;
2340
+ }
2341
+ function checkMissingCanonical(html) {
2342
+ const hasCanonical = /<link[^>]*rel=["']canonical["'][^>]*>/i.test(html);
2343
+ if (!hasCanonical) {
2344
+ return { check: "missing-canonical", label: "Missing canonical link", severity: "warning" };
2345
+ }
2346
+ return null;
2347
+ }
2348
+ function checkMissingOgTags(html) {
2349
+ const hasOg = /<meta\s[^>]*property=["']og:/i.test(html);
2350
+ if (!hasOg) {
2351
+ return { check: "missing-og-tags", label: "No Open Graph tags", severity: "warning" };
2352
+ }
2353
+ return null;
2354
+ }
2355
+ function checkThinContent(wordCount) {
2356
+ if (wordCount < 300) {
2357
+ return { check: "thin-content", label: `Thin content (${wordCount} words)`, severity: "warning" };
2358
+ }
2359
+ return null;
2360
+ }
2361
+ function checkImagesMissingAlt(html) {
2362
+ const imgTags = html.match(/<img\s[^>]*>/gi) || [];
2363
+ if (imgTags.length === 0) return null;
2364
+ let missingAlt = 0;
2365
+ for (const img of imgTags) {
2366
+ const hasAlt = /\salt=["'][^"']+["']/i.test(img);
2367
+ const hasEmptyAlt = /\salt=["']["']/i.test(img);
2368
+ if (!hasAlt && !hasEmptyAlt) missingAlt++;
2369
+ }
2370
+ if (missingAlt > 0) {
2371
+ return {
2372
+ check: "images-missing-alt",
2373
+ label: `${missingAlt} image${missingAlt > 1 ? "s" : ""} missing alt text`,
2374
+ severity: "warning"
2375
+ };
2376
+ }
2377
+ return null;
2378
+ }
2379
+ function checkNoInternalLinks(html, url) {
2380
+ let domain;
2381
+ try {
2382
+ domain = new URL(url).hostname.replace(/^www\./, "").toLowerCase();
2383
+ } catch {
2384
+ return null;
2385
+ }
2386
+ const links = html.match(/<a\s[^>]*href=["']([^"'#]*?)["'][^>]*>/gi) || [];
2387
+ let internalCount = 0;
2388
+ for (const link of links) {
2389
+ const hrefMatch = link.match(/href=["']([^"'#]*?)["']/i);
2390
+ if (!hrefMatch) continue;
2391
+ const href = hrefMatch[1];
2392
+ if (href.startsWith("/") && !href.startsWith("//")) {
2393
+ internalCount++;
2394
+ } else if (href.startsWith("http")) {
2395
+ try {
2396
+ const linkDomain = new URL(href).hostname.replace(/^www\./, "").toLowerCase();
2397
+ if (linkDomain === domain) internalCount++;
2398
+ } catch {
2399
+ }
2400
+ }
2401
+ }
2402
+ if (internalCount === 0) {
2403
+ return { check: "no-internal-links", label: "No internal links found", severity: "warning" };
2404
+ }
2405
+ return null;
2406
+ }
2407
+ function checkHasStructuredData(html) {
2408
+ const ldBlocks = html.match(/<script[^>]*type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi) || [];
2409
+ if (ldBlocks.length === 0) return null;
2410
+ const types = /* @__PURE__ */ new Set();
2411
+ for (const block of ldBlocks) {
2412
+ const content = block.replace(/<\/?script[^>]*>/gi, "");
2413
+ const typeMatches = content.match(/"@type"\s*:\s*"([^"]+)"/g) || [];
2414
+ for (const m of typeMatches) {
2415
+ const t = m.match(/"@type"\s*:\s*"([^"]+)"/);
2416
+ if (t) types.add(t[1]);
2417
+ }
2418
+ }
2419
+ if (types.size > 0) {
2420
+ return {
2421
+ check: "has-structured-data",
2422
+ label: `JSON-LD: ${Array.from(types).join(", ")}`,
2423
+ severity: "info"
2424
+ };
2425
+ }
2426
+ return null;
2427
+ }
2428
+ function checkHasQuestionHeadings(html) {
2429
+ const headings = html.match(/<h[2-4][^>]*>[\s\S]*?<\/h[2-4]>/gi) || [];
2430
+ let questionCount = 0;
2431
+ for (const h of headings) {
2432
+ const text = h.replace(/<[^>]*>/g, "").trim();
2433
+ if (/\?$/.test(text) || /^(what|how|why|when|where|who|which|can|do|does|is|are|should|will)\b/i.test(text)) {
2434
+ questionCount++;
2435
+ }
2436
+ }
2437
+ if (questionCount > 0) {
2438
+ return {
2439
+ check: "has-question-headings",
2440
+ label: `${questionCount} question-format heading${questionCount > 1 ? "s" : ""}`,
2441
+ severity: "info"
2442
+ };
2443
+ }
2444
+ return null;
2445
+ }
2446
+ function analyzePage(html, url, category) {
2447
+ const title = extractTitle(html);
2448
+ const textContent = getTextContent(html);
2449
+ const wordCount = countWords(textContent);
2450
+ const issues = [];
2451
+ const strengths = [];
2452
+ const issueChecks = [
2453
+ checkMissingTitle(html),
2454
+ checkMissingMetaDescription(html),
2455
+ checkNoH1(html),
2456
+ checkMultipleH1(html),
2457
+ checkNoSchema(html),
2458
+ checkMissingCanonical(html),
2459
+ checkMissingOgTags(html),
2460
+ checkThinContent(wordCount),
2461
+ checkImagesMissingAlt(html),
2462
+ checkNoInternalLinks(html, url)
2463
+ ];
2464
+ for (const result of issueChecks) {
2465
+ if (result) issues.push(result);
2466
+ }
2467
+ const strengthChecks = [
2468
+ checkHasStructuredData(html),
2469
+ checkHasQuestionHeadings(html)
2470
+ ];
2471
+ for (const result of strengthChecks) {
2472
+ if (result) strengths.push(result);
2473
+ }
2474
+ return { url, title, category, wordCount, issues, strengths };
2475
+ }
2476
+ function analyzeAllPages(siteData) {
2477
+ const reviews = [];
2478
+ if (siteData.homepage) {
2479
+ const url = `${siteData.protocol}://${siteData.domain}`;
2480
+ reviews.push(analyzePage(siteData.homepage.text, url, siteData.homepage.category || "homepage"));
2481
+ }
2482
+ if (siteData.blogSample) {
2483
+ for (const page of siteData.blogSample) {
2484
+ const url = page.finalUrl || "unknown";
2485
+ reviews.push(analyzePage(page.text, url, page.category || "content"));
2486
+ }
2487
+ }
2488
+ return reviews;
2489
+ }
2490
+
2491
+ // src/audit.ts
2492
+ function getTextLength(html) {
2493
+ return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim().length;
2494
+ }
2495
+ async function audit(domain, options) {
2496
+ const startTime = Date.now();
2497
+ let renderedWithHeadless = false;
2498
+ const siteData = await prefetchSiteData(domain);
2499
+ if (!siteData.protocol) {
2500
+ throw new Error(`Could not connect to ${domain} (no HTTPS or HTTP response)`);
2501
+ }
2502
+ if (siteData.redirectedTo) {
2503
+ throw new Error(`${domain} redirects to ${siteData.redirectedTo} (hijacked domain)`);
2504
+ }
2505
+ if (siteData.parkedReason) {
2506
+ throw new Error(`${domain} is a parked/lost domain (${siteData.parkedReason})`);
2507
+ }
2508
+ if (!options?.noHeadless && siteData.homepage && isSpaShell(siteData.homepage.text)) {
2509
+ const rawTextLen = getTextLength(siteData.homepage.text);
2510
+ const url = `${siteData.protocol}://${domain}`;
2511
+ const rendered = await fetchWithHeadless(url);
2512
+ if (rendered) {
2513
+ const renderedTextLen = getTextLength(rendered.text);
2514
+ if (renderedTextLen > rawTextLen) {
2515
+ siteData.homepage = rendered;
2516
+ renderedWithHeadless = true;
2517
+ }
2518
+ }
2519
+ if (renderedWithHeadless && siteData.faqPage && isSpaShell(siteData.faqPage.text)) {
2520
+ const faqUrl = `${siteData.protocol}://${domain}/faq`;
2521
+ const renderedFaq = await fetchWithHeadless(faqUrl);
2522
+ if (renderedFaq && getTextLength(renderedFaq.text) > getTextLength(siteData.faqPage.text)) {
2523
+ siteData.faqPage = renderedFaq;
2524
+ }
2525
+ }
2526
+ }
2527
+ if (!options?.noMultiPage) {
2528
+ await fetchMultiPageData(siteData);
2529
+ }
2530
+ const results = auditSiteFromData(siteData);
2531
+ const overallScore = calculateOverallScore(results);
2532
+ const rawData = extractRawDataSummary(siteData);
2533
+ if (renderedWithHeadless) rawData.rendered_with_headless = true;
2534
+ const scorecard = buildScorecard(results);
2535
+ const detailedFindings = buildDetailedFindings(results);
2536
+ const verdict = generateVerdict(overallScore, scorecard, rawData, domain);
2537
+ const opportunities = generateOpportunities(scorecard, results);
2538
+ const pitchNumbers = generatePitchNumbers(overallScore, rawData, scorecard);
2539
+ const bottomLine = generateBottomLine(overallScore, opportunities, scorecard, domain);
2540
+ const pagesReviewed = analyzeAllPages(siteData);
2541
+ const elapsed = Math.round((Date.now() - startTime) / 100) / 10;
2542
+ return {
2543
+ site: domain,
2544
+ auditDate: (/* @__PURE__ */ new Date()).toLocaleDateString("en-US", { year: "numeric", month: "long", day: "numeric" }),
2545
+ auditor: "AEORank",
2546
+ engine: "instant",
2547
+ overallScore,
2548
+ verdict,
2549
+ scorecard,
2550
+ detailedFindings,
2551
+ opportunities,
2552
+ pitchNumbers,
2553
+ bottomLine,
2554
+ pagesReviewed,
2555
+ elapsed,
2556
+ ...renderedWithHeadless && { renderedWithHeadless: true }
2557
+ };
2558
+ }
2559
+ // Annotate the CommonJS export names for ESM import in node:
2560
+ 0 && (module.exports = {
2561
+ CRITERION_LABELS,
2562
+ analyzeAllPages,
2563
+ analyzePage,
2564
+ audit,
2565
+ auditSiteFromData,
2566
+ buildDetailedFindings,
2567
+ buildScorecard,
2568
+ calculateOverallScore,
2569
+ classifyRendering,
2570
+ detectParkedDomain,
2571
+ extractContentPagesFromSitemap,
2572
+ extractNavLinks,
2573
+ extractRawDataSummary,
2574
+ fetchMultiPageData,
2575
+ fetchWithHeadless,
2576
+ generateBottomLine,
2577
+ generateOpportunities,
2578
+ generatePitchNumbers,
2579
+ generateVerdict,
2580
+ isSpaShell,
2581
+ prefetchSiteData,
2582
+ scoreToStatus
2583
+ });
2584
+ //# sourceMappingURL=index.cjs.map