aeorank 3.1.1 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1,4 +1,10 @@
1
1
  #!/usr/bin/env node
2
+ import {
3
+ isSafeFetchTarget,
4
+ isSafePublicUrl,
5
+ normalizeHostname,
6
+ safeFetch
7
+ } from "./chunk-PYV5JVTC.js";
2
8
 
3
9
  // src/cli.ts
4
10
  import { writeFileSync } from "fs";
@@ -145,19 +151,181 @@ function shingleJaccardSimilarity(a, b) {
145
151
  return union === 0 ? 0 : intersection / union;
146
152
  }
147
153
 
154
+ // src/helpful-content.ts
155
+ function cap(value, max) {
156
+ return Math.min(max, value);
157
+ }
158
+ function floor(value, min) {
159
+ return Math.max(min, value);
160
+ }
161
+ function countMatches(text, pattern) {
162
+ return text.match(pattern)?.length ?? 0;
163
+ }
164
+ function stripScriptsAndStyles(html) {
165
+ return html.replace(/<script[\s\S]*?<\/script>/gi, " ").replace(/<style[\s\S]*?<\/style>/gi, " ");
166
+ }
167
+ function getTextContent(html) {
168
+ return stripScriptsAndStyles(html).replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
169
+ }
170
+ function getBodyHtml(html) {
171
+ const bodyMatch = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
172
+ return bodyMatch ? bodyMatch[1] : html;
173
+ }
174
+ function getFirstParagraphText(html) {
175
+ const firstPara = getBodyHtml(html).match(/<p[^>]*>([\s\S]*?)<\/p>/i);
176
+ return firstPara ? firstPara[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() : "";
177
+ }
178
+ function firstNWords(text, count) {
179
+ return text.split(/\s+/).slice(0, count).join(" ");
180
+ }
181
+ function getH1Text(html) {
182
+ const match = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
183
+ return match ? match[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() : "";
184
+ }
185
+ function getTitleText(html) {
186
+ const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
187
+ return match ? match[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() : "";
188
+ }
189
+ function wordCount(text) {
190
+ return text ? text.split(/\s+/).filter(Boolean).length : 0;
191
+ }
192
+ function isContentLikePage(html, url) {
193
+ const text = getTextContent(html);
194
+ const wc = wordCount(text);
195
+ let signals = 0;
196
+ if (url && /\/(?:blog|article|articles|guide|guides|docs|learn|help|news|insights|resources|how-to|tutorial|case-stud|whitepaper|faq)\b/i.test(url)) {
197
+ signals += 2;
198
+ }
199
+ if (/<article[\s>]/i.test(html)) signals += 1;
200
+ if ((html.match(/<h[2-3][^>]*>/gi) || []).length >= 2) signals += 1;
201
+ if (wc >= 500) signals += 1;
202
+ if (/<time[\s>]/i.test(html) || /datePublished|dateModified/i.test(html)) signals += 1;
203
+ if (/written\s+by|authored?\s+by|reviewed\s+by|medically\s+reviewed/i.test(text)) signals += 1;
204
+ return signals >= 2;
205
+ }
206
+ function expectsMethodology(html, url) {
207
+ const text = getTextContent(html);
208
+ const title = `${getTitleText(html)} ${getH1Text(html)}`.toLowerCase();
209
+ const urlText = (url || "").toLowerCase();
210
+ if (/(?:review|compare|comparison|vs\.?|best|benchmark|study|analysis|survey|report|research|tested|test|methodology)/i.test(title)) {
211
+ return true;
212
+ }
213
+ if (/(?:review|compare|comparison|benchmark|study|analysis|survey|research|report|best)/i.test(urlText)) {
214
+ return true;
215
+ }
216
+ return /\b(methodology|how we tested|how we reviewed|our testing|sample size|dataset|benchmark|editorial policy|review process)\b/i.test(text);
217
+ }
218
+ function titleAndBodyAlign(html) {
219
+ const h1 = getH1Text(html);
220
+ const title = getTitleText(html);
221
+ const text = firstNWords(getTextContent(html), 250).toLowerCase();
222
+ const topic = `${title} ${h1}`.toLowerCase();
223
+ const keywords = topic.split(/[\s|:()\-/]+/).filter((w) => w.length >= 5 && !/^(about|guide|complete|ultimate|best|learn|understand|what|when|where|which|their|there|these|those)$/i.test(w));
224
+ const uniqueKeywords = [...new Set(keywords)];
225
+ if (uniqueKeywords.length === 0) return false;
226
+ return uniqueKeywords.filter((w) => text.includes(w)).length >= Math.min(2, uniqueKeywords.length);
227
+ }
228
+ var GENERIC_OPENERS = /^(?:in today'?s|it is important to understand|in this (?:article|guide|post)|whether you are|have you ever|welcome to|if you'?re looking|in the modern|in the digital age)/i;
229
+ var PRACTICAL_LANGUAGE = /\b(?:here'?s what to do|choose\s+\w+\s+if|avoid\s+\w+\s+when|the main risk is|the fastest option is|next step(?:s)?|best option|lowest risk|good fit if|not a good fit if|what to do next)\b/gi;
230
+ var TRADEOFF_LANGUAGE = /\b(?:however|but|trade-?off|caveat|limitation|downside|upside|risk|benefit|pros?\b|cons?\b|watch out|be careful|unless)\b/gi;
231
+ var FLUFF_LANGUAGE = /\b(?:comprehensive guide|ever-evolving|fast-paced world|unlock(?:ing)? the power|in conclusion|delve into|navigate the landscape|journey|seamless|robust solution)\b/gi;
232
+ var EARLY_CTA_PATTERN = /\b(?:sign up|get started|book (?:a )?demo|contact us|schedule (?:a )?call|buy now|start free|request a quote|talk to sales|subscribe)\b/gi;
233
+ var FIRST_HAND_ACTIONS = /\b(?:we|i)\s+(?:tested|used|reviewed|implemented|measured|compared|observed|deployed|migrated|surveyed|analyzed|audited|interviewed)\b/gi;
234
+ var EXPERIENCE_CONTEXT = /\b(?:in our|during our|for our|in practice|in production|in the field|in our clinic|during implementation|over \d+\s+(?:days?|weeks?|months?)|across \d+\s+(?:accounts?|customers?|patients?|sites?|campaigns?)|with \d+\s+(?:participants?|users?|patients?|samples?))\b/gi;
235
+ var EXPERIENCE_ARTIFACTS = /\b(?:screenshot|photo|benchmark|timeline|before\/after|our results|original chart|field note|walkthrough)\b/gi;
236
+ var LIMITATION_LANGUAGE = /\b(?:limitation|limitations|we found that|we learned|one caveat|did not work|failed|issue we hit|edge case|unexpectedly|drawback)\b/gi;
237
+ var AUTHOR_LINK_PATTERN = /<a[^>]*href=["'][^"']*\/(?:author|authors|team|people|staff|reviewers?)\b[^"']*["'][^>]*>/i;
238
+ var BYLINE_PATTERN = /\b(?:written|authored|reviewed|edited|medically reviewed)\s+by\b/i;
239
+ var AUTHOR_BIO_PATTERN = /\b(?:about the author|author bio|reviewed by|medically reviewed by|board-certified|licensed|credentials?|specializes in|specialist in)\b/i;
240
+ var METHODOLOGY_TERMS = /\b(?:methodology|how we tested|how we reviewed|our methodology|testing process|review process|editorial policy|sample size|data collection|timeframe|criteria used|tools used|benchmark method|updated using|last reviewed|ai-assisted|ai assisted)\b/gi;
241
+ var METHODOLOGY_DETAIL = /\b(?:sample size|participants?|dataset|timeframe|criteria|weights?|tool(?:s)? used|measured over|tested over|reviewed on|last updated|last reviewed|scoring method)\b/gi;
242
+ function scoreHelpfulPurposeAlignment(html, url) {
243
+ const text = getTextContent(html);
244
+ if (!text) return 0;
245
+ const contentLike = isContentLikePage(html, url);
246
+ if (!contentLike && wordCount(text) < 250) return 5;
247
+ let score = contentLike ? 3 : 5;
248
+ const firstPara = getFirstParagraphText(html);
249
+ const earlyText = firstNWords(text, 300);
250
+ const bodyHtml = getBodyHtml(html);
251
+ if (firstPara && !GENERIC_OPENERS.test(firstPara)) score += 2;
252
+ if (countMatches(earlyText, PRACTICAL_LANGUAGE) >= 1) score += 2;
253
+ else if (countMatches(text, PRACTICAL_LANGUAGE) >= 2) score += 1;
254
+ const tradeoffCount = countMatches(text, TRADEOFF_LANGUAGE);
255
+ if (tradeoffCount >= 2) score += 2;
256
+ else if (tradeoffCount >= 1) score += 1;
257
+ if (titleAndBodyAlign(html)) score += 1;
258
+ if (/\b(?:bottom line|key takeaway|here'?s the short answer|next steps?)\b/i.test(text)) score += 1;
259
+ if (firstPara && GENERIC_OPENERS.test(firstPara)) score -= 2;
260
+ const earlyBodyHtml = bodyHtml.slice(0, 1800);
261
+ const earlyCtas = countMatches(earlyBodyHtml, EARLY_CTA_PATTERN);
262
+ if (earlyCtas >= 3) score -= 2;
263
+ else if (earlyCtas >= 2) score -= 1;
264
+ const fluffCount = countMatches(text, FLUFF_LANGUAGE);
265
+ if (fluffCount >= 3) score -= 2;
266
+ else if (fluffCount >= 1) score -= 1;
267
+ return floor(cap(score, 10), 0);
268
+ }
269
+ function scoreFirstHandExperienceSignals(html, url) {
270
+ const text = getTextContent(html);
271
+ if (!text) return 0;
272
+ const contentLike = isContentLikePage(html, url);
273
+ let score = contentLike ? 2 : 5;
274
+ const actionCount = countMatches(text, FIRST_HAND_ACTIONS);
275
+ if (actionCount >= 3) score += 4;
276
+ else if (actionCount >= 1) score += 2;
277
+ const contextCount = countMatches(text, EXPERIENCE_CONTEXT);
278
+ if (contextCount >= 2) score += 2;
279
+ else if (contextCount >= 1) score += 1;
280
+ const artifactCount = countMatches(text, EXPERIENCE_ARTIFACTS) + countMatches(html, /<figure|<figcaption/gi);
281
+ if (artifactCount >= 3) score += 2;
282
+ else if (artifactCount >= 1) score += 1;
283
+ const limitationCount = countMatches(text, LIMITATION_LANGUAGE);
284
+ if (limitationCount >= 2) score += 2;
285
+ else if (limitationCount >= 1) score += 1;
286
+ if (/\b(?:manufacturer|vendor)\s+(?:description|specification|copy)\b/i.test(text)) score -= 1;
287
+ return floor(cap(score, 10), 0);
288
+ }
289
+ function scoreCreatorTransparency(html, url) {
290
+ const text = getTextContent(html);
291
+ if (!text) return 0;
292
+ const contentLike = isContentLikePage(html, url);
293
+ if (!contentLike) return 5;
294
+ let score = 0;
295
+ const hasByline = BYLINE_PATTERN.test(text) || /class=["'][^"']*author[^"']*["']/i.test(html) || /rel=["']author["']/i.test(html);
296
+ const hasPersonSchema = /"@type"\s*:\s*"Person"/i.test(html);
297
+ if (hasByline) score += 3;
298
+ if (AUTHOR_LINK_PATTERN.test(html)) score += 2;
299
+ if (AUTHOR_BIO_PATTERN.test(text)) score += 2;
300
+ if (/\b(?:reviewed by|edited by|medically reviewed by)\b/i.test(text)) score += 1;
301
+ if (hasPersonSchema) score += 2;
302
+ return floor(cap(score, 10), 0);
303
+ }
304
+ function scoreMethodologyTransparency(html, url) {
305
+ const text = getTextContent(html);
306
+ if (!text) return 0;
307
+ const contentLike = isContentLikePage(html, url);
308
+ const expected = expectsMethodology(html, url);
309
+ let score = expected ? 2 : contentLike ? 5 : 5;
310
+ const methodologyCount = countMatches(text, METHODOLOGY_TERMS);
311
+ if (methodologyCount >= 2) score += 3;
312
+ else if (methodologyCount >= 1) score += 2;
313
+ const detailCount = countMatches(text, METHODOLOGY_DETAIL);
314
+ if (detailCount >= 3) score += 3;
315
+ else if (detailCount >= 2) score += 2;
316
+ else if (detailCount >= 1) score += 1;
317
+ if (/\b(?:tested|reviewed|analyzed)\s+\d+|\bacross\s+\d+|\bover\s+\d+\s+(?:days?|weeks?|months?)|\busing\s+\d+\s+\w+/i.test(text)) score += 1;
318
+ if (/<figure|<table/i.test(html) && methodologyCount >= 1) score += 1;
319
+ if (/\b(?:ai-assisted|ai assisted|reviewed by an editor|human reviewed)\b/i.test(text)) score += 1;
320
+ return floor(cap(score, 10), 0);
321
+ }
322
+
148
323
  // src/site-crawler.ts
149
- async function fetchText(url) {
150
- try {
151
- const res = await fetch(url, {
152
- signal: AbortSignal.timeout(15e3),
153
- headers: { "User-Agent": "AEO-Visibility-Bot/1.0" },
154
- redirect: "follow"
155
- });
156
- const text = await res.text();
157
- return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
158
- } catch {
159
- return null;
160
- }
324
+ async function fetchText(url, expectedDomain) {
325
+ const res = await safeFetch(url, { timeoutMs: 15e3, expectedDomain });
326
+ if (!res) return null;
327
+ const text = await res.text();
328
+ return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
161
329
  }
162
330
  function extractDomain(url) {
163
331
  return url.replace(/^https?:\/\//, "").replace(/\/.*/, "").replace(/:[0-9]+$/, "").replace(/^www\./, "").toLowerCase();
@@ -198,13 +366,16 @@ function isHtmlResponse(result) {
198
366
  return trimmed.startsWith("<!doctype html") || trimmed.startsWith("<html") || /<head[\s>]/i.test(trimmed);
199
367
  }
200
368
  async function prefetchSiteData(domain) {
369
+ if (!await isSafeFetchTarget(`https://${domain}`)) {
370
+ return { domain, protocol: null, homepage: null, llmsTxt: null, robotsTxt: null, faqPage: null, sitemapXml: null, rssFeed: null, aiTxt: null, redirectedTo: null, parkedReason: null, blogSample: [] };
371
+ }
201
372
  let protocol = null;
202
373
  let homepage = null;
203
- homepage = await fetchText(`https://${domain}`);
374
+ homepage = await fetchText(`https://${domain}`, domain);
204
375
  if (homepage && homepage.status >= 200 && homepage.status < 400) {
205
376
  protocol = "https";
206
377
  } else {
207
- homepage = await fetchText(`http://${domain}`);
378
+ homepage = await fetchText(`http://${domain}`, domain);
208
379
  if (homepage && homepage.status >= 200 && homepage.status < 400) {
209
380
  protocol = "http";
210
381
  }
@@ -224,38 +395,38 @@ async function prefetchSiteData(domain) {
224
395
  }
225
396
  const baseUrl = `${protocol}://${domain}`;
226
397
  const [llmsTxt, robotsTxt, faqPage, sitemapXml, aiTxt] = await Promise.all([
227
- fetchText(`${baseUrl}/llms.txt`),
228
- fetchText(`${baseUrl}/robots.txt`),
229
- fetchText(`${baseUrl}/faq`).then(async (result) => {
398
+ fetchText(`${baseUrl}/llms.txt`, domain),
399
+ fetchText(`${baseUrl}/robots.txt`, domain),
400
+ fetchText(`${baseUrl}/faq`, domain).then(async (result) => {
230
401
  if (result && result.status === 200) return result;
231
402
  for (const path of ["/frequently-asked-questions", "/help", "/support", "/help-center"]) {
232
- const fallback = await fetchText(`${baseUrl}${path}`);
403
+ const fallback = await fetchText(`${baseUrl}${path}`, domain);
233
404
  if (fallback && fallback.status === 200) return fallback;
234
405
  }
235
406
  return result;
236
407
  }),
237
- fetchText(`${baseUrl}/sitemap.xml`),
238
- fetchText(`${baseUrl}/ai.txt`)
408
+ fetchText(`${baseUrl}/sitemap.xml`, domain),
409
+ fetchText(`${baseUrl}/ai.txt`, domain)
239
410
  ]);
240
411
  let rssFeed = null;
241
412
  if (homepage) {
242
413
  const rssLinkMatch = homepage.text.match(/<link[^>]*type="application\/(?:rss|atom)\+xml"[^>]*href="([^"]*)"[^>]*>/i);
243
414
  if (rssLinkMatch) {
244
415
  const rssUrl = rssLinkMatch[1].startsWith("http") ? rssLinkMatch[1] : `${baseUrl}${rssLinkMatch[1]}`;
245
- rssFeed = await fetchText(rssUrl);
416
+ rssFeed = await fetchText(rssUrl, domain);
246
417
  }
247
418
  if (!rssFeed || rssFeed.status !== 200) {
248
419
  for (const path of ["/feed", "/rss.xml", "/feed.xml"]) {
249
- rssFeed = await fetchText(`${baseUrl}${path}`);
420
+ rssFeed = await fetchText(`${baseUrl}${path}`, domain);
250
421
  if (rssFeed && rssFeed.status === 200 && (rssFeed.text.includes("<rss") || rssFeed.text.includes("<feed") || rssFeed.text.includes("<channel"))) break;
251
422
  rssFeed = null;
252
423
  }
253
424
  }
254
425
  }
255
426
  if (sitemapXml && sitemapXml.status === 200 && sitemapXml.text.includes("<sitemapindex")) {
256
- const subUrls = extractAllSubSitemapUrls(sitemapXml.text, 5);
427
+ const subUrls = extractAllSubSitemapUrls(sitemapXml.text, domain, 5);
257
428
  if (subUrls.length > 0) {
258
- const subResults = await Promise.all(subUrls.map((u) => fetchText(u)));
429
+ const subResults = await Promise.all(subUrls.map((u) => fetchText(u, domain)));
259
430
  for (const sub of subResults) {
260
431
  if (sub && sub.status === 200) {
261
432
  sitemapXml.text += "\n" + sub.text;
@@ -268,7 +439,7 @@ async function prefetchSiteData(domain) {
268
439
  const sitemapForBlog = sitemapXml.text;
269
440
  const blogUrls = extractBlogUrlsFromSitemap(sitemapForBlog, domain, 50);
270
441
  if (blogUrls.length > 0) {
271
- const fetched = await Promise.all(blogUrls.map((url) => fetchText(url)));
442
+ const fetched = await Promise.all(blogUrls.map((url) => fetchText(url, domain)));
272
443
  blogSample = fetched.filter(
273
444
  (r) => r !== null && r.status === 200 && r.text.length > 500
274
445
  );
@@ -1050,8 +1221,8 @@ function checkDirectAnswerDensity(data) {
1050
1221
  const paragraphs = html.match(/<p[^>]*>([\s\S]*?)<\/p>/gi) || [];
1051
1222
  const snippetZoneParagraphs = paragraphs.filter((p) => {
1052
1223
  const text2 = p.replace(/<[^>]*>/g, "").trim();
1053
- const wordCount = text2.split(/\s+/).length;
1054
- return wordCount >= 40 && wordCount <= 150;
1224
+ const wordCount2 = text2.split(/\s+/).length;
1225
+ return wordCount2 >= 40 && wordCount2 <= 150;
1055
1226
  });
1056
1227
  if (snippetZoneParagraphs.length >= 3) {
1057
1228
  score += 2;
@@ -1319,13 +1490,15 @@ function extractBlogUrlsFromSitemap(sitemapText, domain, limit = 50) {
1319
1490
  });
1320
1491
  return candidates.slice(0, limit).map((c) => c.url);
1321
1492
  }
1322
- function extractAllSubSitemapUrls(sitemapText, limit = 5) {
1493
+ function extractAllSubSitemapUrls(sitemapText, domainOrLimit, maybeLimit = 5) {
1323
1494
  if (!sitemapText.includes("<sitemapindex")) return [];
1495
+ const domain = typeof domainOrLimit === "string" ? domainOrLimit : void 0;
1496
+ const limit = typeof domainOrLimit === "number" ? domainOrLimit : maybeLimit;
1324
1497
  const sitemapLocs = sitemapText.match(/<sitemap>[\s\S]*?<loc>([^<]+)<\/loc>[\s\S]*?<\/sitemap>/gi) || [];
1325
1498
  const urls = sitemapLocs.map((block) => {
1326
1499
  const match = block.match(/<loc>([^<]+)<\/loc>/i);
1327
1500
  return match ? match[1].trim() : "";
1328
- }).filter(Boolean);
1501
+ }).filter((url) => !!url && isSafePublicUrl(url, domain));
1329
1502
  const preferred = urls.filter((u) => /post|blog|article|page/i.test(u));
1330
1503
  const rest = urls.filter((u) => !preferred.includes(u));
1331
1504
  return [...preferred, ...rest].slice(0, limit);
@@ -2199,6 +2372,123 @@ function checkContentDepth(data, topicCoherenceScore) {
2199
2372
  }
2200
2373
  return { criterion: "content_depth", criterion_label: "Content Depth", score: finalScore, status: finalScore >= 7 ? "pass" : finalScore >= 4 ? "partial" : "fail", findings, fix_priority: finalScore >= 7 ? "P3" : "P1" };
2201
2374
  }
2375
+ function scoreSampledPages(data, scorer) {
2376
+ const pages = [];
2377
+ if (data.homepage) {
2378
+ const url = data.homepage.finalUrl || (data.protocol ? `${data.protocol}://${data.domain}/` : `https://${data.domain}/`);
2379
+ pages.push({ url, score: scorer(data.homepage.text, url) });
2380
+ }
2381
+ if (data.blogSample) {
2382
+ for (const page of data.blogSample) {
2383
+ const url = page.finalUrl || (data.protocol ? `${data.protocol}://${data.domain}/` : `https://${data.domain}/`);
2384
+ pages.push({ url, score: scorer(page.text, url) });
2385
+ }
2386
+ }
2387
+ return pages;
2388
+ }
2389
+ function summarizeHelpfulScores(pageScores) {
2390
+ const total = pageScores.length;
2391
+ const average = total > 0 ? Math.round(pageScores.reduce((sum, p) => sum + p.score, 0) / total) : 0;
2392
+ const strong = pageScores.filter((p) => p.score >= 8);
2393
+ const weak = pageScores.filter((p) => p.score <= 4);
2394
+ return { total, average, strong, weak };
2395
+ }
2396
+ function checkHelpfulPurposeAlignment(data) {
2397
+ const findings = [];
2398
+ if (!data.homepage) {
2399
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
2400
+ return { criterion: "helpful_purpose_alignment", criterion_label: "Helpful Purpose Alignment", score: 0, status: "not_found", findings, fix_priority: "P1" };
2401
+ }
2402
+ const pageScores = scoreSampledPages(data, scoreHelpfulPurposeAlignment);
2403
+ const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
2404
+ if (average >= 8) {
2405
+ findings.push({ severity: "info", detail: `${strong.length}/${total} pages strongly prioritize visitor task completion over filler` });
2406
+ } else if (average >= 5) {
2407
+ findings.push({ severity: "low", detail: `${strong.length}/${total} pages clearly lead with useful guidance`, fix: "Tighten intros, reduce generic filler, and make pages solve the promised user task faster" });
2408
+ } else {
2409
+ findings.push({ severity: "medium", detail: `Average helpful-purpose score is ${average}/10 across ${total} sampled pages`, fix: "Reduce search-first framing, generic intros, and CTA interruptions before the first useful answer" });
2410
+ }
2411
+ if (weak.length > 0) {
2412
+ findings.push({
2413
+ severity: "low",
2414
+ detail: `${weak.length} page(s) read as weakly task-focused`,
2415
+ fix: "Rewrite weak pages to lead with concrete answers, tradeoffs, and next steps instead of broad introductory filler"
2416
+ });
2417
+ }
2418
+ return { criterion: "helpful_purpose_alignment", criterion_label: "Helpful Purpose Alignment", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P1" };
2419
+ }
2420
+ function checkFirstHandExperienceSignals(data) {
2421
+ const findings = [];
2422
+ if (!data.homepage) {
2423
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
2424
+ return { criterion: "first_hand_experience_signals", criterion_label: "First-Hand Experience Signals", score: 0, status: "not_found", findings, fix_priority: "P2" };
2425
+ }
2426
+ const pageScores = scoreSampledPages(data, scoreFirstHandExperienceSignals);
2427
+ const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
2428
+ if (average >= 8) {
2429
+ findings.push({ severity: "info", detail: `${strong.length}/${total} pages show strong signs of direct use, testing, or observation` });
2430
+ } else if (average >= 5) {
2431
+ findings.push({ severity: "low", detail: `Moderate experiential depth across ${total} sampled pages`, fix: "Add real implementation details, limitations, screenshots, or direct observations where relevant" });
2432
+ } else {
2433
+ findings.push({ severity: "medium", detail: "Little first-hand experience is visible in sampled content", fix: "Add evidence of real use, testing, implementation, or lived experience instead of generic summaries" });
2434
+ }
2435
+ if (weak.length > 0) {
2436
+ findings.push({
2437
+ severity: "low",
2438
+ detail: `${weak.length} page(s) appear generic or second-hand`,
2439
+ fix: "Strengthen those pages with case details, lessons learned, or original evidence from practice"
2440
+ });
2441
+ }
2442
+ return { criterion: "first_hand_experience_signals", criterion_label: "First-Hand Experience Signals", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P2" };
2443
+ }
2444
+ function checkCreatorTransparency(data) {
2445
+ const findings = [];
2446
+ if (!data.homepage) {
2447
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
2448
+ return { criterion: "creator_transparency", criterion_label: "Creator Transparency", score: 0, status: "not_found", findings, fix_priority: "P2" };
2449
+ }
2450
+ const pageScores = scoreSampledPages(data, scoreCreatorTransparency);
2451
+ const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
2452
+ if (average >= 8) {
2453
+ findings.push({ severity: "info", detail: `${strong.length}/${total} sampled pages provide clear visible creator attribution` });
2454
+ } else if (average >= 5) {
2455
+ findings.push({ severity: "low", detail: "Visible authorship is present on some content but inconsistent", fix: "Add bylines, author links, and reviewer details on article-like pages where readers expect them" });
2456
+ } else {
2457
+ findings.push({ severity: "medium", detail: "Creator visibility is weak on content-like pages", fix: "Show clear bylines, author pages, and reviewer context rather than relying on schema alone" });
2458
+ }
2459
+ if (weak.length > 0) {
2460
+ findings.push({
2461
+ severity: "low",
2462
+ detail: `${weak.length} page(s) look article-like but expose little visible author context`,
2463
+ fix: "Add visible bylines, author bios, or reviewer attribution to those pages"
2464
+ });
2465
+ }
2466
+ return { criterion: "creator_transparency", criterion_label: "Creator Transparency", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P2" };
2467
+ }
2468
+ function checkMethodologyTransparency(data) {
2469
+ const findings = [];
2470
+ if (!data.homepage) {
2471
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
2472
+ return { criterion: "methodology_transparency", criterion_label: "Methodology Transparency", score: 0, status: "not_found", findings, fix_priority: "P2" };
2473
+ }
2474
+ const pageScores = scoreSampledPages(data, scoreMethodologyTransparency);
2475
+ const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
2476
+ if (average >= 8) {
2477
+ findings.push({ severity: "info", detail: `${strong.length}/${total} pages clearly explain how content was tested, researched, reviewed, or updated` });
2478
+ } else if (average >= 5) {
2479
+ findings.push({ severity: "low", detail: "Some process transparency exists, but it is inconsistent", fix: 'Add "how we tested", methodology, review process, or update disclosures on pages where users would expect them' });
2480
+ } else {
2481
+ findings.push({ severity: "medium", detail: "Little content-production or review transparency is visible", fix: "Explain how pages were researched, tested, or reviewed instead of presenting unsupported comparisons or conclusions" });
2482
+ }
2483
+ if (weak.length > 0) {
2484
+ findings.push({
2485
+ severity: "low",
2486
+ detail: `${weak.length} page(s) lack visible methodology or review context`,
2487
+ fix: "Add process detail such as sample size, criteria, tools used, review process, or update notes"
2488
+ });
2489
+ }
2490
+ return { criterion: "methodology_transparency", criterion_label: "Methodology Transparency", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P2" };
2491
+ }
2202
2492
  function checkCitationReadyWriting(data) {
2203
2493
  const findings = [];
2204
2494
  if (!data.homepage) {
@@ -2294,8 +2584,8 @@ function checkAnswerFirstPlacement(data) {
2294
2584
  const earlyParagraphs = bodyHtml.match(/<p[^>]*>([\s\S]*?)<\/p>/gi)?.slice(0, 5) || [];
2295
2585
  for (const p of earlyParagraphs) {
2296
2586
  const pText = p.replace(/<[^>]*>/g, "").trim();
2297
- const wordCount = pText.split(/\s+/).length;
2298
- if (wordCount >= 40 && wordCount <= 80 && first300Words.includes(pText.slice(0, 50))) {
2587
+ const wordCount2 = pText.split(/\s+/).length;
2588
+ if (wordCount2 >= 40 && wordCount2 <= 80 && first300Words.includes(pText.slice(0, 50))) {
2299
2589
  shortAnswerCount++;
2300
2590
  break;
2301
2591
  }
@@ -2837,14 +3127,19 @@ function auditSiteFromData(data) {
2837
3127
  checkVisibleDateSignal(data),
2838
3128
  topicCoherence,
2839
3129
  checkContentDepth(data, topicCoherence.score),
2840
- // V2 criteria (#29-#34)
3130
+ // Helpful-content criteria (#29-#32)
3131
+ checkHelpfulPurposeAlignment(data),
3132
+ checkFirstHandExperienceSignals(data),
3133
+ checkCreatorTransparency(data),
3134
+ checkMethodologyTransparency(data),
3135
+ // V2 criteria (#33-#38)
2841
3136
  checkCitationReadyWriting(data),
2842
3137
  checkAnswerFirstPlacement(data),
2843
3138
  checkEvidencePackaging(data),
2844
3139
  checkEntityDisambiguation(data),
2845
3140
  checkExtractionFriction(data),
2846
3141
  checkImageContextAI(data),
2847
- // V3 criteria (#35-#36)
3142
+ // V3 criteria (#39-#40)
2848
3143
  checkDuplicateContent(data),
2849
3144
  checkCrossPageDuplication(data)
2850
3145
  ];
@@ -2870,6 +3165,10 @@ var WEIGHTS = {
2870
3165
  // Relevance to actual AI queries
2871
3166
  faq_section: 0.03,
2872
3167
  // Structured Q&A pairs
3168
+ helpful_purpose_alignment: 0.03,
3169
+ // Visitor-helpful vs search-first framing
3170
+ first_hand_experience_signals: 0.03,
3171
+ // Evidence of real use or observation
2873
3172
  // ─── Content Organization (~30%) ──────────────────────────────────────────
2874
3173
  // HOW easily AI engines can extract and trust your content.
2875
3174
  entity_consistency: 0.05,
@@ -2884,9 +3183,13 @@ var WEIGHTS = {
2884
3183
  // Expert attribution
2885
3184
  table_list_extractability: 0.03,
2886
3185
  // Extractable structured data
2887
- definition_patterns: 0.02,
3186
+ creator_transparency: 0.02,
3187
+ // Visible author/reviewer clarity
3188
+ methodology_transparency: 0.02,
3189
+ // Process disclosure
3190
+ definition_patterns: 0.015,
2888
3191
  // Clear definitions
2889
- visible_date_signal: 0.02,
3192
+ visible_date_signal: 0.015,
2890
3193
  // Publication date trust
2891
3194
  semantic_html: 0.02,
2892
3195
  // Clean semantic structure
@@ -2895,15 +3198,15 @@ var WEIGHTS = {
2895
3198
  // ─── Technical Plumbing (~15%) ────────────────────────────────────────────
2896
3199
  // WHETHER AI crawlers can find you. Table stakes with diminishing returns.
2897
3200
  content_cannibalization: 0.02,
2898
- llms_txt: 0.02,
2899
- robots_txt: 0.02,
3201
+ llms_txt: 0.01,
3202
+ robots_txt: 0.01,
2900
3203
  content_velocity: 0.02,
2901
- content_licensing: 0.02,
3204
+ content_licensing: 0.01,
2902
3205
  sitemap_completeness: 0.01,
2903
- canonical_url: 0.01,
2904
- rss_feed: 0.01,
2905
- schema_coverage: 0.01,
2906
- speakable_schema: 0.01,
3206
+ canonical_url: 5e-3,
3207
+ rss_feed: 5e-3,
3208
+ schema_coverage: 5e-3,
3209
+ speakable_schema: 5e-3,
2907
3210
  // ─── V2 Criteria (~15%) ───────────────────────────────────────────────────
2908
3211
  // Citation quality, evidence packaging, and extraction friction.
2909
3212
  citation_ready_writing: 0.04,
@@ -2916,7 +3219,7 @@ var WEIGHTS = {
2916
3219
  // Clear entity boundaries
2917
3220
  extraction_friction: 0.02,
2918
3221
  // Sentence length, voice, jargon
2919
- image_context_ai: 0.01,
3222
+ image_context_ai: 5e-3,
2920
3223
  // Figure/figcaption, alt text quality
2921
3224
  // ─── V3 Criteria ────────────────────────────────────────────────────────
2922
3225
  duplicate_content: 0.05,
@@ -2936,8 +3239,8 @@ function calculateOverallScore(criteria) {
2936
3239
  let score = Math.round(weightedSum / totalWeight);
2937
3240
  const coherence = criteria.find((c) => c.criterion === "topic_coherence");
2938
3241
  if (coherence && coherence.score < 6) {
2939
- const cap2 = 35 + coherence.score * 5;
2940
- score = Math.min(score, cap2);
3242
+ const cap3 = 35 + coherence.score * 5;
3243
+ score = Math.min(score, cap3);
2941
3244
  }
2942
3245
  return score;
2943
3246
  }
@@ -2965,6 +3268,13 @@ function isSpaShell(html) {
2965
3268
  return SPA_INDICATORS.some((pattern) => pattern.test(html));
2966
3269
  }
2967
3270
  async function fetchWithHeadless(url, options) {
3271
+ let expectedDomain;
3272
+ try {
3273
+ expectedDomain = normalizeHostname(new URL(url).hostname);
3274
+ } catch {
3275
+ return null;
3276
+ }
3277
+ if (!await isSafeFetchTarget(url, expectedDomain)) return null;
2968
3278
  let puppeteer;
2969
3279
  try {
2970
3280
  const mod = "puppeteer";
@@ -2991,12 +3301,28 @@ async function fetchWithHeadless(url, options) {
2991
3301
  const page = await browser.newPage();
2992
3302
  await page.setRequestInterception(true);
2993
3303
  page.on("request", (req) => {
2994
- const type = req.resourceType();
2995
- if (["image", "font", "media", "stylesheet"].includes(type)) {
2996
- req.abort();
2997
- } else {
2998
- req.continue();
2999
- }
3304
+ void (async () => {
3305
+ const alreadyHandled = typeof req.isInterceptResolutionHandled === "function" ? req.isInterceptResolutionHandled() : false;
3306
+ if (alreadyHandled) return;
3307
+ if (!await isSafeFetchTarget(req.url(), expectedDomain)) {
3308
+ try {
3309
+ if (!req.isInterceptResolutionHandled?.()) await req.abort();
3310
+ } catch {
3311
+ }
3312
+ return;
3313
+ }
3314
+ const type = req.resourceType();
3315
+ try {
3316
+ if (!req.isInterceptResolutionHandled?.()) {
3317
+ if (["image", "font", "media", "stylesheet"].includes(type)) {
3318
+ await req.abort();
3319
+ } else {
3320
+ await req.continue();
3321
+ }
3322
+ }
3323
+ } catch {
3324
+ }
3325
+ })();
3000
3326
  });
3001
3327
  await page.setUserAgent("AEO-Visibility-Bot/1.0");
3002
3328
  await page.goto(url, { waitUntil: "networkidle2", timeout });
@@ -3009,6 +3335,7 @@ async function fetchWithHeadless(url, options) {
3009
3335
  }
3010
3336
  const html = await page.content();
3011
3337
  const finalUrl = page.url();
3338
+ if (!await isSafeFetchTarget(finalUrl, expectedDomain)) return null;
3012
3339
  return {
3013
3340
  text: html.slice(0, 5e5),
3014
3341
  status: 200,
@@ -3036,6 +3363,8 @@ var PILLARS = {
3036
3363
  "citation_ready_writing",
3037
3364
  "answer_first_placement",
3038
3365
  "evidence_packaging",
3366
+ "helpful_purpose_alignment",
3367
+ "first_hand_experience_signals",
3039
3368
  "duplicate_content",
3040
3369
  "cross_page_duplication"
3041
3370
  ],
@@ -3053,7 +3382,9 @@ var PILLARS = {
3053
3382
  "internal_linking",
3054
3383
  "content_freshness",
3055
3384
  "author_schema_depth",
3056
- "schema_markup"
3385
+ "schema_markup",
3386
+ "creator_transparency",
3387
+ "methodology_transparency"
3057
3388
  ],
3058
3389
  "Technical Foundation": [
3059
3390
  "semantic_html",
@@ -3083,6 +3414,8 @@ var CLIENT_NAMES = {
3083
3414
  citation_ready_writing: "Citation-Ready Writing",
3084
3415
  answer_first_placement: "Answer-First Placement",
3085
3416
  evidence_packaging: "Evidence Packaging",
3417
+ helpful_purpose_alignment: "Helpful Purpose Alignment",
3418
+ first_hand_experience_signals: "First-Hand Experience Signals",
3086
3419
  direct_answer_density: "Direct Answer Density",
3087
3420
  qa_content_format: "Q&A Content Format",
3088
3421
  query_answer_alignment: "Query-Answer Alignment",
@@ -3095,6 +3428,8 @@ var CLIENT_NAMES = {
3095
3428
  content_freshness: "Content Freshness",
3096
3429
  author_schema_depth: "Author & Expert Schema",
3097
3430
  schema_markup: "Schema Markup",
3431
+ creator_transparency: "Creator Transparency",
3432
+ methodology_transparency: "Methodology Transparency",
3098
3433
  semantic_html: "Semantic HTML",
3099
3434
  clean_html: "Clean HTML",
3100
3435
  visible_date_signal: "Visible Date Signal",
@@ -3121,6 +3456,8 @@ var PILLAR_WEIGHTS = {
3121
3456
  citation_ready_writing: 0.04,
3122
3457
  answer_first_placement: 0.03,
3123
3458
  evidence_packaging: 0.03,
3459
+ helpful_purpose_alignment: 0.03,
3460
+ first_hand_experience_signals: 0.03,
3124
3461
  duplicate_content: 0.05,
3125
3462
  cross_page_duplication: 0.03,
3126
3463
  direct_answer_density: 0.05,
@@ -3128,28 +3465,30 @@ var PILLAR_WEIGHTS = {
3128
3465
  query_answer_alignment: 0.04,
3129
3466
  faq_section: 0.03,
3130
3467
  table_list_extractability: 0.03,
3131
- definition_patterns: 0.02,
3468
+ definition_patterns: 0.015,
3132
3469
  entity_disambiguation: 0.02,
3133
3470
  entity_consistency: 0.05,
3134
3471
  internal_linking: 0.04,
3135
3472
  content_freshness: 0.04,
3136
3473
  author_schema_depth: 0.03,
3137
3474
  schema_markup: 0.03,
3475
+ creator_transparency: 0.02,
3476
+ methodology_transparency: 0.02,
3138
3477
  semantic_html: 0.02,
3139
3478
  clean_html: 0.02,
3140
- visible_date_signal: 0.02,
3479
+ visible_date_signal: 0.015,
3141
3480
  extraction_friction: 0.02,
3142
- image_context_ai: 0.01,
3143
- schema_coverage: 0.01,
3144
- speakable_schema: 0.01,
3481
+ image_context_ai: 5e-3,
3482
+ schema_coverage: 5e-3,
3483
+ speakable_schema: 5e-3,
3145
3484
  content_cannibalization: 0.02,
3146
- llms_txt: 0.02,
3147
- robots_txt: 0.02,
3485
+ llms_txt: 0.01,
3486
+ robots_txt: 0.01,
3148
3487
  content_velocity: 0.02,
3149
- content_licensing: 0.02,
3150
- canonical_url: 0.01,
3488
+ content_licensing: 0.01,
3489
+ canonical_url: 5e-3,
3151
3490
  sitemap_completeness: 0.01,
3152
- rss_feed: 0.01
3491
+ rss_feed: 5e-3
3153
3492
  };
3154
3493
  var CRITERION_EFFORT = {
3155
3494
  topic_coherence: "High",
@@ -3159,6 +3498,8 @@ var CRITERION_EFFORT = {
3159
3498
  citation_ready_writing: "Medium",
3160
3499
  answer_first_placement: "Medium",
3161
3500
  evidence_packaging: "Medium",
3501
+ helpful_purpose_alignment: "Medium",
3502
+ first_hand_experience_signals: "Medium",
3162
3503
  duplicate_content: "Medium",
3163
3504
  cross_page_duplication: "Medium",
3164
3505
  direct_answer_density: "Medium",
@@ -3173,6 +3514,8 @@ var CRITERION_EFFORT = {
3173
3514
  content_freshness: "Low",
3174
3515
  author_schema_depth: "Low",
3175
3516
  schema_markup: "Medium",
3517
+ creator_transparency: "Low",
3518
+ methodology_transparency: "Low",
3176
3519
  semantic_html: "Low",
3177
3520
  clean_html: "Medium",
3178
3521
  visible_date_signal: "Low",
@@ -3197,6 +3540,8 @@ var FIX_DESCRIPTIONS = {
3197
3540
  citation_ready_writing: "Write self-contained definition sentences and one-claim statements AI can quote directly.",
3198
3541
  answer_first_placement: "Place a 40-80 word answer block in the first 300 words of each page.",
3199
3542
  evidence_packaging: "Add inline citations, attribution phrases, and a sources section to key pages.",
3543
+ helpful_purpose_alignment: "Lead with useful, task-solving guidance instead of search-first filler.",
3544
+ first_hand_experience_signals: "Add concrete signs of first-hand use, testing, implementation, or observation.",
3200
3545
  direct_answer_density: "Write concise 2-3 sentence answer paragraphs after each question heading.",
3201
3546
  qa_content_format: "Add question-based H2/H3 headings matching common AI queries.",
3202
3547
  query_answer_alignment: "Ensure every question heading is followed by a direct answer paragraph.",
@@ -3209,6 +3554,8 @@ var FIX_DESCRIPTIONS = {
3209
3554
  content_freshness: "Add dateModified schema and visible last-updated dates.",
3210
3555
  author_schema_depth: "Add Person schema for authors with credentials and sameAs links.",
3211
3556
  schema_markup: "Implement JSON-LD structured data on key pages.",
3557
+ creator_transparency: "Show clear visible bylines, author pages, and reviewer details where readers expect them.",
3558
+ methodology_transparency: "Explain how content was tested, researched, reviewed, or updated.",
3212
3559
  semantic_html: "Use semantic HTML5 elements (main, article, nav, header, footer).",
3213
3560
  clean_html: "Fix HTML structure, add meta tags, and ensure HTTPS.",
3214
3561
  visible_date_signal: "Display dates using <time> elements and add datePublished to JSON-LD.",
@@ -3308,6 +3655,10 @@ var CRITERION_LABELS = {
3308
3655
  "Visible Date Signal": "Visible Date Signal",
3309
3656
  "Topic Coherence": "Topic Coherence",
3310
3657
  "Content Depth": "Content Depth",
3658
+ "Helpful Purpose Alignment": "Helpful Purpose Alignment",
3659
+ "First-Hand Experience Signals": "First-Hand Experience Signals",
3660
+ "Creator Transparency": "Creator Transparency",
3661
+ "Methodology Transparency": "Methodology Transparency",
3311
3662
  "Citation-Ready Writing Quality": "Citation-Ready Writing Quality",
3312
3663
  "Answer-First Placement": "Answer-First Placement",
3313
3664
  "Evidence Packaging": "Evidence Packaging",
@@ -3413,6 +3764,8 @@ var CRITERION_WEIGHTS = {
3413
3764
  qa_content_format: 0.04,
3414
3765
  query_answer_alignment: 0.04,
3415
3766
  faq_section: 0.03,
3767
+ helpful_purpose_alignment: 0.03,
3768
+ first_hand_experience_signals: 0.03,
3416
3769
  // Content Organization (~30%)
3417
3770
  entity_consistency: 0.05,
3418
3771
  internal_linking: 0.04,
@@ -3420,28 +3773,30 @@ var CRITERION_WEIGHTS = {
3420
3773
  schema_markup: 0.03,
3421
3774
  author_schema_depth: 0.03,
3422
3775
  table_list_extractability: 0.03,
3423
- definition_patterns: 0.02,
3424
- visible_date_signal: 0.02,
3776
+ creator_transparency: 0.02,
3777
+ methodology_transparency: 0.02,
3778
+ definition_patterns: 0.015,
3779
+ visible_date_signal: 0.015,
3425
3780
  semantic_html: 0.02,
3426
3781
  clean_html: 0.02,
3427
3782
  // Technical Plumbing (~15%)
3428
3783
  content_cannibalization: 0.02,
3429
- llms_txt: 0.02,
3430
- robots_txt: 0.02,
3784
+ llms_txt: 0.01,
3785
+ robots_txt: 0.01,
3431
3786
  content_velocity: 0.02,
3432
- content_licensing: 0.02,
3787
+ content_licensing: 0.01,
3433
3788
  sitemap_completeness: 0.01,
3434
- canonical_url: 0.01,
3435
- rss_feed: 0.01,
3436
- schema_coverage: 0.01,
3437
- speakable_schema: 0.01,
3789
+ canonical_url: 5e-3,
3790
+ rss_feed: 5e-3,
3791
+ schema_coverage: 5e-3,
3792
+ speakable_schema: 5e-3,
3438
3793
  // V2 Criteria (~15%)
3439
3794
  citation_ready_writing: 0.04,
3440
3795
  answer_first_placement: 0.03,
3441
3796
  evidence_packaging: 0.03,
3442
3797
  entity_disambiguation: 0.02,
3443
3798
  extraction_friction: 0.02,
3444
- image_context_ai: 0.01,
3799
+ image_context_ai: 5e-3,
3445
3800
  // V3 Criteria
3446
3801
  duplicate_content: 0.05,
3447
3802
  cross_page_duplication: 0.03
@@ -3482,6 +3837,16 @@ var OPPORTUNITY_TEMPLATES = {
3482
3837
  effort: "Medium",
3483
3838
  description: "Create a dedicated FAQ page with FAQPage schema markup. Cover common questions about your products, services, and industry to become a direct answer source for AI engines."
3484
3839
  },
3840
+ helpful_purpose_alignment: {
3841
+ name: "Improve Helpful Purpose Alignment",
3842
+ effort: "Medium",
3843
+ description: "Rewrite pages to solve the visitor task quickly and concretely. Reduce generic intros, search-first filler, and CTA interruptions before the first useful answer."
3844
+ },
3845
+ first_hand_experience_signals: {
3846
+ name: "Add First-Hand Experience Signals",
3847
+ effort: "Medium",
3848
+ description: "Show direct use, testing, implementation, or lived experience with concrete observations, examples, screenshots, and lessons learned."
3849
+ },
3485
3850
  original_data: {
3486
3851
  name: "Add Original Data & Case Studies",
3487
3852
  effort: "High",
@@ -3537,6 +3902,16 @@ var OPPORTUNITY_TEMPLATES = {
3537
3902
  effort: "Low",
3538
3903
  description: "Add Person schema for content authors with credentials, expertise, and sameAs links. Expert attribution strengthens E-E-A-T signals that AI engines use to evaluate source credibility."
3539
3904
  },
3905
+ creator_transparency: {
3906
+ name: "Improve Creator Transparency",
3907
+ effort: "Low",
3908
+ description: "Add visible bylines, author pages, and reviewer/editor details so readers can clearly tell who created the content and why they are credible."
3909
+ },
3910
+ methodology_transparency: {
3911
+ name: "Add Methodology Transparency",
3912
+ effort: "Low",
3913
+ description: "Explain how pages were tested, researched, reviewed, or updated. Add methodology, criteria, sample-size, or review-process details where users expect them."
3914
+ },
3540
3915
  fact_density: {
3541
3916
  name: "Increase Fact & Data Density",
3542
3917
  effort: "Medium",
@@ -3796,20 +4171,12 @@ function formatList(items) {
3796
4171
  }
3797
4172
 
3798
4173
  // src/multi-page-fetcher.ts
3799
- async function fetchPage(url, timeoutMs = 1e4) {
3800
- try {
3801
- const res = await fetch(url, {
3802
- signal: AbortSignal.timeout(timeoutMs),
3803
- headers: { "User-Agent": "AEO-Visibility-Bot/1.0" },
3804
- redirect: "follow"
3805
- });
3806
- if (res.status !== 200) return null;
3807
- const text = await res.text();
3808
- if (text.length < 200) return null;
3809
- return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
3810
- } catch {
3811
- return null;
3812
- }
4174
+ async function fetchPage(url, domain, timeoutMs = 1e4) {
4175
+ const res = await safeFetch(url, { timeoutMs, expectedDomain: domain });
4176
+ if (!res || res.status !== 200) return null;
4177
+ const text = await res.text();
4178
+ if (text.length < 200) return null;
4179
+ return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
3813
4180
  }
3814
4181
  var PAGE_VARIANTS = {
3815
4182
  about: ["/about", "/about-us", "/company", "/who-we-are"],
@@ -3965,7 +4332,7 @@ async function fetchMultiPageData(siteData, options) {
3965
4332
  }
3966
4333
  const entries = Array.from(urlsToFetch.entries());
3967
4334
  if (entries.length === 0) return 0;
3968
- const results = await Promise.all(entries.map(([url]) => fetchPage(url, timeoutMs)));
4335
+ const results = await Promise.all(entries.map(([url]) => fetchPage(url, siteData.domain, timeoutMs)));
3969
4336
  if (!siteData.blogSample) siteData.blogSample = [];
3970
4337
  let added = 0;
3971
4338
  for (let i = 0; i < results.length; i++) {
@@ -3992,19 +4359,23 @@ var PAGE_CRITERIA = {
3992
4359
  content_freshness: { weight: 0.04, label: "Content Freshness Signals" },
3993
4360
  schema_markup: { weight: 0.03, label: "Schema.org Structured Data" },
3994
4361
  table_list_extractability: { weight: 0.03, label: "Table & List Extractability" },
3995
- definition_patterns: { weight: 0.02, label: "Definition Patterns" },
3996
- visible_date_signal: { weight: 0.02, label: "Visible Date Signal" },
4362
+ definition_patterns: { weight: 0.015, label: "Definition Patterns" },
4363
+ visible_date_signal: { weight: 0.015, label: "Visible Date Signal" },
3997
4364
  semantic_html: { weight: 0.02, label: "Semantic HTML5 & Accessibility" },
3998
4365
  clean_html: { weight: 0.02, label: "Clean, Crawlable HTML" },
3999
4366
  // Technical Plumbing
4000
- canonical_url: { weight: 0.01, label: "Canonical URL Strategy" },
4367
+ canonical_url: { weight: 5e-3, label: "Canonical URL Strategy" },
4001
4368
  // V2 Criteria
4002
4369
  citation_ready_writing: { weight: 0.04, label: "Citation-Ready Writing Quality" },
4003
4370
  answer_first_placement: { weight: 0.03, label: "Answer-First Placement" },
4004
4371
  evidence_packaging: { weight: 0.03, label: "Evidence Packaging" },
4372
+ helpful_purpose_alignment: { weight: 0.03, label: "Helpful Purpose Alignment" },
4373
+ first_hand_experience_signals: { weight: 0.03, label: "First-Hand Experience Signals" },
4005
4374
  entity_disambiguation: { weight: 0.02, label: "Entity Disambiguation" },
4006
4375
  extraction_friction: { weight: 0.02, label: "Extraction Friction Score" },
4007
- image_context_ai: { weight: 0.01, label: "Image Context for AI" },
4376
+ creator_transparency: { weight: 0.02, label: "Creator Transparency" },
4377
+ methodology_transparency: { weight: 0.02, label: "Methodology Transparency" },
4378
+ image_context_ai: { weight: 5e-3, label: "Image Context for AI" },
4008
4379
  duplicate_content: { weight: 0.05, label: "Duplicate Content Blocks" }
4009
4380
  };
4010
4381
  function extractJsonLdBlocks(html) {
@@ -4027,7 +4398,7 @@ function extractTypesFromJsonLd(blocks) {
4027
4398
  }
4028
4399
  return types;
4029
4400
  }
4030
- function getTextContent(html) {
4401
+ function getTextContent2(html) {
4031
4402
  return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
4032
4403
  }
4033
4404
  function extractQuestionHeadings2(html) {
@@ -4055,7 +4426,7 @@ function countAnsweredQuestions(html) {
4055
4426
  }
4056
4427
  return { total: questions.length, answered };
4057
4428
  }
4058
- function cap(value, max) {
4429
+ function cap2(value, max) {
4059
4430
  return Math.min(value, max);
4060
4431
  }
4061
4432
  function scoreSchemaMarkup(html) {
@@ -4081,10 +4452,10 @@ function scoreSchemaMarkup(html) {
4081
4452
  for (const t of types) {
4082
4453
  if (knownTypes.includes(t)) knownCount++;
4083
4454
  }
4084
- score += cap(knownCount * 2, 4);
4455
+ score += cap2(knownCount * 2, 4);
4085
4456
  if (types.has("Organization") || types.has("LocalBusiness")) score += 2;
4086
4457
  if (types.has("FAQPage")) score += 1;
4087
- return cap(score, 10);
4458
+ return cap2(score, 10);
4088
4459
  }
4089
4460
  function scoreQAFormat(html) {
4090
4461
  const questions = extractQuestionHeadings2(html);
@@ -4096,7 +4467,7 @@ function scoreQAFormat(html) {
4096
4467
  if (answered >= 1) score += 3;
4097
4468
  const h1Matches = html.match(/<h1[\s>]/gi) || [];
4098
4469
  if (h1Matches.length === 1) score += 2;
4099
- return cap(score, 10);
4470
+ return cap2(score, 10);
4100
4471
  }
4101
4472
  function scoreCleanHtml(html) {
4102
4473
  let score = 0;
@@ -4105,15 +4476,15 @@ function scoreCleanHtml(html) {
4105
4476
  for (const tag of semantics) {
4106
4477
  if (html.toLowerCase().includes(tag)) semCount++;
4107
4478
  }
4108
- score += cap(semCount, 3);
4479
+ score += cap2(semCount, 3);
4109
4480
  const h1Matches = html.match(/<h1[\s>]/gi) || [];
4110
4481
  if (h1Matches.length === 1) score += 2;
4111
- const text = getTextContent(html);
4482
+ const text = getTextContent2(html);
4112
4483
  if (text.length > 500) score += 3;
4113
4484
  const hasTitle = /<title[^>]*>[^<]+<\/title>/i.test(html);
4114
4485
  const hasDesc = /<meta\s[^>]*name=["']description["'][^>]*content=["'][^"']+["']/i.test(html) || /<meta\s[^>]*content=["'][^"']+["'][^>]*name=["']description["']/i.test(html);
4115
4486
  if (hasTitle && hasDesc) score += 2;
4116
- return cap(score, 10);
4487
+ return cap2(score, 10);
4117
4488
  }
4118
4489
  function scoreFaqSection(html) {
4119
4490
  let score = 0;
@@ -4125,11 +4496,11 @@ function scoreFaqSection(html) {
4125
4496
  const questions = extractQuestionHeadings2(html);
4126
4497
  if (questions.length >= 10) score += 1;
4127
4498
  if (/<details[\s>]/i.test(html) || /accordion|collapsible|toggle/i.test(lowerHtml)) score += 1;
4128
- return cap(score, 10);
4499
+ return cap2(score, 10);
4129
4500
  }
4130
4501
  function scoreOriginalData(html) {
4131
4502
  let score = 0;
4132
- const text = getTextContent(html);
4503
+ const text = getTextContent2(html);
4133
4504
  if (/\b(our (study|analysis|research|survey|data|findings))\b/i.test(text)) {
4134
4505
  score += 3;
4135
4506
  } else if (/\d+(\.\d+)?%|\$[\d,.]+|\b\d{1,3}(,\d{3})+\b/.test(text)) {
@@ -4146,7 +4517,7 @@ function scoreOriginalData(html) {
4146
4517
  if (/href=["'][^"']*\/blog\b/i.test(html)) {
4147
4518
  score += 2;
4148
4519
  }
4149
- return cap(score, 10);
4520
+ return cap2(score, 10);
4150
4521
  }
4151
4522
  function scoreQueryAnswerAlignment(html) {
4152
4523
  const { total, answered } = countAnsweredQuestions(html);
@@ -4169,7 +4540,7 @@ function scoreContentFreshness(html) {
4169
4540
  const currentYear = (/* @__PURE__ */ new Date()).getFullYear();
4170
4541
  const yearPattern = new RegExp(`\\b(${currentYear}|${currentYear - 1})\\b`);
4171
4542
  if (yearPattern.test(html)) score += 2;
4172
- return cap(score, 10);
4543
+ return cap2(score, 10);
4173
4544
  }
4174
4545
  function scoreTableListExtractability(html) {
4175
4546
  let score = 0;
@@ -4182,7 +4553,7 @@ function scoreTableListExtractability(html) {
4182
4553
  const listItems = html.match(/<li[\s>]/gi) || [];
4183
4554
  if (listItems.length >= 10) score += 1;
4184
4555
  if (/<dl[\s>]/i.test(html)) score += 1;
4185
- return cap(score, 10);
4556
+ return cap2(score, 10);
4186
4557
  }
4187
4558
  function scoreDirectAnswerDensity(html) {
4188
4559
  let score = 0;
@@ -4198,9 +4569,9 @@ function scoreDirectAnswerDensity(html) {
4198
4569
  }
4199
4570
  if (snippetCount >= 3) score += 2;
4200
4571
  else if (snippetCount >= 1) score += 1;
4201
- const directOpeners = getTextContent(html).match(/\b(yes|no|in short|the answer is|simply put|in summary)\b/gi) || [];
4572
+ const directOpeners = getTextContent2(html).match(/\b(yes|no|in short|the answer is|simply put|in summary)\b/gi) || [];
4202
4573
  if (directOpeners.length >= 2) score += 2;
4203
- return cap(score, 10);
4574
+ return cap2(score, 10);
4204
4575
  }
4205
4576
  function scoreSemanticHtml(html) {
4206
4577
  let score = 0;
@@ -4210,7 +4581,7 @@ function scoreSemanticHtml(html) {
4210
4581
  for (const el of elements) {
4211
4582
  if (lowerHtml.includes(el)) count++;
4212
4583
  }
4213
- score += cap(Math.floor(count * 0.7), 4);
4584
+ score += cap2(Math.floor(count * 0.7), 4);
4214
4585
  const imgTags = html.match(/<img\s[^>]*>/gi) || [];
4215
4586
  if (imgTags.length > 0) {
4216
4587
  let withAlt = 0;
@@ -4221,11 +4592,11 @@ function scoreSemanticHtml(html) {
4221
4592
  }
4222
4593
  if (/<html[^>]*\slang=["'][^"']+["']/i.test(html)) score += 2;
4223
4594
  if (/\baria-/i.test(html)) score += 2;
4224
- return cap(score, 10);
4595
+ return cap2(score, 10);
4225
4596
  }
4226
4597
  function scoreFactDensity(html) {
4227
4598
  let score = 0;
4228
- const text = getTextContent(html);
4599
+ const text = getTextContent2(html);
4229
4600
  const numericPatterns = text.match(/\d+(\.\d+)?%|\$[\d,.]+|\b\d{1,3}(,\d{3})+\b|\b\d+\s*(million|billion|thousand|users|customers|employees)\b/gi) || [];
4230
4601
  if (numericPatterns.length >= 6) score += 5;
4231
4602
  else if (numericPatterns.length >= 3) score += 3;
@@ -4238,11 +4609,11 @@ function scoreFactDensity(html) {
4238
4609
  if (/\baccording to\b|\bsource:\s|\bcited\b|\breported by\b/i.test(text)) score += 2;
4239
4610
  const units = text.match(/\b\d+\s*(kg|lb|miles|km|hours|minutes|days|months|years|GB|MB|TB)\b/gi) || [];
4240
4611
  if (units.length >= 2) score += 1;
4241
- return cap(score, 10);
4612
+ return cap2(score, 10);
4242
4613
  }
4243
4614
  function scoreDefinitionPatterns(html) {
4244
4615
  let score = 0;
4245
- const text = getTextContent(html);
4616
+ const text = getTextContent2(html);
4246
4617
  const defPatterns = text.match(/\b(is a|is an|refers to|defined as|means that|also known as|abbreviated as)\b/gi) || [];
4247
4618
  if (defPatterns.length >= 3) score += 5;
4248
4619
  else if (defPatterns.length >= 1) score += 3;
@@ -4250,7 +4621,7 @@ function scoreDefinitionPatterns(html) {
4250
4621
  if (/\b(is a|is an|refers to|defined as)\b/i.test(early)) score += 2;
4251
4622
  if (/<dfn[\s>]/i.test(html) || /<abbr[\s>]/i.test(html)) score += 1;
4252
4623
  if (/<dl[\s>]/i.test(html) || /glossary/i.test(html)) score += 2;
4253
- return cap(score, 10);
4624
+ return cap2(score, 10);
4254
4625
  }
4255
4626
  function scoreCanonicalUrl(html, url) {
4256
4627
  let score = 0;
@@ -4271,7 +4642,7 @@ function scoreCanonicalUrl(html, url) {
4271
4642
  if (canonicalHref.startsWith("https://")) score += 2;
4272
4643
  const allCanonicals = html.match(/<link[^>]*rel=["']canonical["'][^>]*>/gi) || [];
4273
4644
  if (allCanonicals.length === 1) score += 1;
4274
- return cap(score, 10);
4645
+ return cap2(score, 10);
4275
4646
  }
4276
4647
  function scoreVisibleDateSignal(html) {
4277
4648
  let score = 0;
@@ -4290,11 +4661,11 @@ function scoreVisibleDateSignal(html) {
4290
4661
  } catch {
4291
4662
  }
4292
4663
  }
4293
- return cap(score, 10);
4664
+ return cap2(score, 10);
4294
4665
  }
4295
4666
  function scoreCitationReadyWriting(html) {
4296
4667
  let score = 0;
4297
- const text = getTextContent(html);
4668
+ const text = getTextContent2(html);
4298
4669
  const defSentences = text.match(/\b\w+\s+(is\s+(?:a|an)\s|refers\s+to|defined\s+as)\b/gi) || [];
4299
4670
  if (defSentences.length >= 3) score += 3;
4300
4671
  else if (defSentences.length >= 1) score += 1;
@@ -4323,7 +4694,7 @@ function scoreCitationReadyWriting(html) {
4323
4694
  );
4324
4695
  if (quotableLines.length >= 2) score += 2;
4325
4696
  else if (quotableLines.length >= 1) score += 1;
4326
- return cap(score, 10);
4697
+ return cap2(score, 10);
4327
4698
  }
4328
4699
  function scoreAnswerFirstPlacement(html) {
4329
4700
  let score = 0;
@@ -4334,8 +4705,8 @@ function scoreAnswerFirstPlacement(html) {
4334
4705
  const first300Words = bodyText.split(/\s+/).slice(0, 300).join(" ");
4335
4706
  for (const p of earlyParagraphs) {
4336
4707
  const pText = p.replace(/<[^>]*>/g, "").trim();
4337
- const wordCount = pText.split(/\s+/).length;
4338
- if (wordCount >= 40 && wordCount <= 80 && first300Words.includes(pText.slice(0, 50))) {
4708
+ const wordCount2 = pText.split(/\s+/).length;
4709
+ if (wordCount2 >= 40 && wordCount2 <= 80 && first300Words.includes(pText.slice(0, 50))) {
4339
4710
  score += 4;
4340
4711
  break;
4341
4712
  }
@@ -4356,11 +4727,11 @@ function scoreAnswerFirstPlacement(html) {
4356
4727
  score += 3;
4357
4728
  }
4358
4729
  }
4359
- return cap(score, 10);
4730
+ return cap2(score, 10);
4360
4731
  }
4361
4732
  function scoreEvidencePackaging(html) {
4362
4733
  let score = 0;
4363
- const text = getTextContent(html);
4734
+ const text = getTextContent2(html);
4364
4735
  const paragraphs = html.match(/<p[^>]*>[\s\S]*?<\/p>/gi) || [];
4365
4736
  let inlineCitations = 0;
4366
4737
  for (const p of paragraphs) {
@@ -4378,11 +4749,11 @@ function scoreEvidencePackaging(html) {
4378
4749
  const sourcedStats = text.match(/\d+(\.\d+)?(%|\s*(million|billion|thousand|percent))\b[^.]*\b[A-Z][a-z]+\b/gi) || [];
4379
4750
  if (sourcedStats.length >= 2) score += 2;
4380
4751
  else if (sourcedStats.length >= 1) score += 1;
4381
- return cap(score, 10);
4752
+ return cap2(score, 10);
4382
4753
  }
4383
4754
  function scoreEntityDisambiguation(html) {
4384
4755
  let score = 0;
4385
- const text = getTextContent(html);
4756
+ const text = getTextContent2(html);
4386
4757
  const h1Match = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
4387
4758
  if (!h1Match) return 3;
4388
4759
  const h1Text = h1Match[1].replace(/<[^>]*>/g, "").trim();
@@ -4400,11 +4771,11 @@ function scoreEntityDisambiguation(html) {
4400
4771
  if (/\bunlike\s+\w/i.test(text) || /\bcompared\s+to\s+\w/i.test(text) || /\bnot\s+to\s+be\s+confused\s+with\b/i.test(text)) {
4401
4772
  score += 3;
4402
4773
  }
4403
- return cap(score, 10);
4774
+ return cap2(score, 10);
4404
4775
  }
4405
4776
  function scoreExtractionFriction(html) {
4406
4777
  let score = 0;
4407
- const text = getTextContent(html);
4778
+ const text = getTextContent2(html);
4408
4779
  const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 5);
4409
4780
  const avgLen = sentences.length > 0 ? sentences.map((s) => s.trim().split(/\s+/).length).reduce((a, b) => a + b, 0) / sentences.length : 0;
4410
4781
  if (avgLen > 0 && avgLen < 20) score += 3;
@@ -4427,7 +4798,7 @@ function scoreExtractionFriction(html) {
4427
4798
  if (/display\s*:\s*none|visibility\s*:\s*hidden/i.test(html) && /<(div|section|p)[^>]*(?:style=["'][^"']*display\s*:\s*none|hidden)[^>]*>/i.test(html)) {
4428
4799
  score = Math.max(0, score - 2);
4429
4800
  }
4430
- return cap(score, 10);
4801
+ return cap2(score, 10);
4431
4802
  }
4432
4803
  function scoreImageContextAI(html) {
4433
4804
  let score = 0;
@@ -4452,7 +4823,7 @@ function scoreImageContextAI(html) {
4452
4823
  else if (goodAltCount > 0) score += 1;
4453
4824
  const contextualImages = html.match(/<(article|section)[^>]*>[\s\S]*?<img[^>]*>[\s\S]*?<\/\1>/gi) || [];
4454
4825
  if (contextualImages.length > 0) score += 3;
4455
- return cap(score, 10);
4826
+ return cap2(score, 10);
4456
4827
  }
4457
4828
  function scoreDuplicateContent(html) {
4458
4829
  return scoreDuplicateContentDetailed(html).score;
@@ -4514,8 +4885,12 @@ var SCORING_FUNCTIONS = {
4514
4885
  citation_ready_writing: scoreCitationReadyWriting,
4515
4886
  answer_first_placement: scoreAnswerFirstPlacement,
4516
4887
  evidence_packaging: scoreEvidencePackaging,
4888
+ helpful_purpose_alignment: scoreHelpfulPurposeAlignment,
4889
+ first_hand_experience_signals: scoreFirstHandExperienceSignals,
4517
4890
  entity_disambiguation: scoreEntityDisambiguation,
4518
4891
  extraction_friction: scoreExtractionFriction,
4892
+ creator_transparency: scoreCreatorTransparency,
4893
+ methodology_transparency: scoreMethodologyTransparency,
4519
4894
  image_context_ai: scoreImageContextAI,
4520
4895
  duplicate_content: scoreDuplicateContent
4521
4896
  };
@@ -4546,7 +4921,7 @@ function extractTitle(html) {
4546
4921
  const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
4547
4922
  return match ? match[1].replace(/\s+/g, " ").trim() : "";
4548
4923
  }
4549
- function getTextContent2(html) {
4924
+ function getTextContent3(html) {
4550
4925
  return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
4551
4926
  }
4552
4927
  function countWords2(text) {
@@ -4606,9 +4981,9 @@ function checkMissingOgTags(html) {
4606
4981
  }
4607
4982
  return null;
4608
4983
  }
4609
- function checkThinContent(wordCount) {
4610
- if (wordCount < 300) {
4611
- return { check: "thin-content", label: `Thin content (${wordCount} words)`, severity: "warning" };
4984
+ function checkThinContent(wordCount2) {
4985
+ if (wordCount2 < 300) {
4986
+ return { check: "thin-content", label: `Thin content (${wordCount2} words)`, severity: "warning" };
4612
4987
  }
4613
4988
  return null;
4614
4989
  }
@@ -4705,15 +5080,15 @@ function checkNoAnswerBlock(html) {
4705
5080
  const first300Words = bodyText.split(/\s+/).slice(0, 300).join(" ");
4706
5081
  for (const p of earlyParagraphs) {
4707
5082
  const pText = p.replace(/<[^>]*>/g, "").trim();
4708
- const wordCount = pText.split(/\s+/).length;
4709
- if (wordCount >= 40 && wordCount <= 80 && first300Words.includes(pText.slice(0, 50))) {
5083
+ const wordCount2 = pText.split(/\s+/).length;
5084
+ if (wordCount2 >= 40 && wordCount2 <= 80 && first300Words.includes(pText.slice(0, 50))) {
4710
5085
  return null;
4711
5086
  }
4712
5087
  }
4713
5088
  return { check: "no-answer-block", label: "No short answer block (40-80 words) in first 300 words", severity: "warning" };
4714
5089
  }
4715
5090
  function checkNoEvidence(html, url) {
4716
- const text = getTextContent2(html);
5091
+ const text = getTextContent3(html);
4717
5092
  const paragraphs = html.match(/<p[^>]*>[\s\S]*?<\/p>/gi) || [];
4718
5093
  let inlineCitations = 0;
4719
5094
  for (const p of paragraphs) {
@@ -4727,7 +5102,7 @@ function checkNoEvidence(html, url) {
4727
5102
  return null;
4728
5103
  }
4729
5104
  function checkHasCitationReadyContent(html) {
4730
- const text = getTextContent2(html);
5105
+ const text = getTextContent3(html);
4731
5106
  const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 10);
4732
5107
  let quotable = 0;
4733
5108
  for (const s of sentences) {
@@ -4752,8 +5127,8 @@ function checkDuplicateContentBlocks(html) {
4752
5127
  }
4753
5128
  function analyzePage(html, url, category) {
4754
5129
  const title = extractTitle(html);
4755
- const textContent = getTextContent2(html);
4756
- const wordCount = countWords2(textContent);
5130
+ const textContent = getTextContent3(html);
5131
+ const wordCount2 = countWords2(textContent);
4757
5132
  const issues = [];
4758
5133
  const strengths = [];
4759
5134
  const issueChecks = [
@@ -4764,7 +5139,7 @@ function analyzePage(html, url, category) {
4764
5139
  checkNoSchema(html),
4765
5140
  checkMissingCanonical(html),
4766
5141
  checkMissingOgTags(html),
4767
- checkThinContent(wordCount),
5142
+ checkThinContent(wordCount2),
4768
5143
  checkImagesMissingAlt(html),
4769
5144
  checkNoInternalLinks(html, url),
4770
5145
  checkNoAnswerBlock(html),
@@ -4783,7 +5158,7 @@ function analyzePage(html, url, category) {
4783
5158
  if (result) strengths.push(result);
4784
5159
  }
4785
5160
  const { aeoScore, criterionScores } = scorePage(html, url);
4786
- return { url, title, category, wordCount, issues, strengths, aeoScore, criterionScores };
5161
+ return { url, title, category, wordCount: wordCount2, issues, strengths, aeoScore, criterionScores };
4787
5162
  }
4788
5163
  function analyzeAllPages(siteData) {
4789
5164
  const reviews = [];
@@ -4805,6 +5180,10 @@ function getTextLength(html) {
4805
5180
  return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim().length;
4806
5181
  }
4807
5182
  async function audit(domain, options) {
5183
+ const normalizedTarget = domain.startsWith("http") ? domain : `https://${domain}`;
5184
+ if (!await isSafeFetchTarget(normalizedTarget)) {
5185
+ throw new Error(`Refusing to audit private or local address: ${domain}`);
5186
+ }
4808
5187
  const startTime = Date.now();
4809
5188
  let renderedWithHeadless = false;
4810
5189
  const siteData = await prefetchSiteData(domain);
@@ -4837,7 +5216,7 @@ async function audit(domain, options) {
4837
5216
  }
4838
5217
  }
4839
5218
  if (options?.fullCrawl) {
4840
- const { crawlFullSite } = await import("./full-site-crawler-OBECS7AT.js");
5219
+ const { crawlFullSite } = await import("./full-site-crawler-W3WSE6WT.js");
4841
5220
  const crawlResult = await crawlFullSite(siteData, {
4842
5221
  maxPages: options.maxPages ?? 200,
4843
5222
  concurrency: options.concurrency ?? 5
@@ -5266,7 +5645,7 @@ function generateComparisonHtmlReport(result) {
5266
5645
  }
5267
5646
 
5268
5647
  // src/cli.ts
5269
- var VERSION = "3.0.0";
5648
+ var VERSION = "3.2.1";
5270
5649
  function printHelp() {
5271
5650
  console.log(`
5272
5651
  aeorank - AI Engine Optimization audit