aeorank 3.1.1 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/browser.js CHANGED
@@ -2,8 +2,12 @@ import {
2
2
  crawlFullSite,
3
3
  extractAllUrlsFromSitemap,
4
4
  extractInternalLinks,
5
- inferCategory
6
- } from "./chunk-RYV25AUV.js";
5
+ inferCategory,
6
+ isSafeFetchTarget,
7
+ isSafePublicUrl,
8
+ normalizeHostname,
9
+ safeFetch
10
+ } from "./chunk-DW7MPQ4X.js";
7
11
 
8
12
  // src/parked-domain.ts
9
13
  var PARKING_PATHS = ["/lander", "/parking", "/park", "/sedoparking"];
@@ -147,19 +151,181 @@ function shingleJaccardSimilarity(a, b) {
147
151
  return union === 0 ? 0 : intersection / union;
148
152
  }
149
153
 
154
+ // src/helpful-content.ts
155
+ function cap(value, max) {
156
+ return Math.min(max, value);
157
+ }
158
+ function floor(value, min) {
159
+ return Math.max(min, value);
160
+ }
161
+ function countMatches(text, pattern) {
162
+ return text.match(pattern)?.length ?? 0;
163
+ }
164
+ function stripScriptsAndStyles(html) {
165
+ return html.replace(/<script[\s\S]*?<\/script>/gi, " ").replace(/<style[\s\S]*?<\/style>/gi, " ");
166
+ }
167
+ function getTextContent(html) {
168
+ return stripScriptsAndStyles(html).replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
169
+ }
170
+ function getBodyHtml(html) {
171
+ const bodyMatch = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
172
+ return bodyMatch ? bodyMatch[1] : html;
173
+ }
174
+ function getFirstParagraphText(html) {
175
+ const firstPara = getBodyHtml(html).match(/<p[^>]*>([\s\S]*?)<\/p>/i);
176
+ return firstPara ? firstPara[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() : "";
177
+ }
178
+ function firstNWords(text, count) {
179
+ return text.split(/\s+/).slice(0, count).join(" ");
180
+ }
181
+ function getH1Text(html) {
182
+ const match = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
183
+ return match ? match[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() : "";
184
+ }
185
+ function getTitleText(html) {
186
+ const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
187
+ return match ? match[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() : "";
188
+ }
189
+ function wordCount(text) {
190
+ return text ? text.split(/\s+/).filter(Boolean).length : 0;
191
+ }
192
+ function isContentLikePage(html, url) {
193
+ const text = getTextContent(html);
194
+ const wc = wordCount(text);
195
+ let signals = 0;
196
+ if (url && /\/(?:blog|article|articles|guide|guides|docs|learn|help|news|insights|resources|how-to|tutorial|case-stud|whitepaper|faq)\b/i.test(url)) {
197
+ signals += 2;
198
+ }
199
+ if (/<article[\s>]/i.test(html)) signals += 1;
200
+ if ((html.match(/<h[2-3][^>]*>/gi) || []).length >= 2) signals += 1;
201
+ if (wc >= 500) signals += 1;
202
+ if (/<time[\s>]/i.test(html) || /datePublished|dateModified/i.test(html)) signals += 1;
203
+ if (/written\s+by|authored?\s+by|reviewed\s+by|medically\s+reviewed/i.test(text)) signals += 1;
204
+ return signals >= 2;
205
+ }
206
+ function expectsMethodology(html, url) {
207
+ const text = getTextContent(html);
208
+ const title = `${getTitleText(html)} ${getH1Text(html)}`.toLowerCase();
209
+ const urlText = (url || "").toLowerCase();
210
+ if (/(?:review|compare|comparison|vs\.?|best|benchmark|study|analysis|survey|report|research|tested|test|methodology)/i.test(title)) {
211
+ return true;
212
+ }
213
+ if (/(?:review|compare|comparison|benchmark|study|analysis|survey|research|report|best)/i.test(urlText)) {
214
+ return true;
215
+ }
216
+ return /\b(methodology|how we tested|how we reviewed|our testing|sample size|dataset|benchmark|editorial policy|review process)\b/i.test(text);
217
+ }
218
+ function titleAndBodyAlign(html) {
219
+ const h1 = getH1Text(html);
220
+ const title = getTitleText(html);
221
+ const text = firstNWords(getTextContent(html), 250).toLowerCase();
222
+ const topic = `${title} ${h1}`.toLowerCase();
223
+ const keywords = topic.split(/[\s|:()\-/]+/).filter((w) => w.length >= 5 && !/^(about|guide|complete|ultimate|best|learn|understand|what|when|where|which|their|there|these|those)$/i.test(w));
224
+ const uniqueKeywords = [...new Set(keywords)];
225
+ if (uniqueKeywords.length === 0) return false;
226
+ return uniqueKeywords.filter((w) => text.includes(w)).length >= Math.min(2, uniqueKeywords.length);
227
+ }
228
+ var GENERIC_OPENERS = /^(?:in today'?s|it is important to understand|in this (?:article|guide|post)|whether you are|have you ever|welcome to|if you'?re looking|in the modern|in the digital age)/i;
229
+ var PRACTICAL_LANGUAGE = /\b(?:here'?s what to do|choose\s+\w+\s+if|avoid\s+\w+\s+when|the main risk is|the fastest option is|next step(?:s)?|best option|lowest risk|good fit if|not a good fit if|what to do next)\b/gi;
230
+ var TRADEOFF_LANGUAGE = /\b(?:however|but|trade-?off|caveat|limitation|downside|upside|risk|benefit|pros?\b|cons?\b|watch out|be careful|unless)\b/gi;
231
+ var FLUFF_LANGUAGE = /\b(?:comprehensive guide|ever-evolving|fast-paced world|unlock(?:ing)? the power|in conclusion|delve into|navigate the landscape|journey|seamless|robust solution)\b/gi;
232
+ var EARLY_CTA_PATTERN = /\b(?:sign up|get started|book (?:a )?demo|contact us|schedule (?:a )?call|buy now|start free|request a quote|talk to sales|subscribe)\b/gi;
233
+ var FIRST_HAND_ACTIONS = /\b(?:we|i)\s+(?:tested|used|reviewed|implemented|measured|compared|observed|deployed|migrated|surveyed|analyzed|audited|interviewed)\b/gi;
234
+ var EXPERIENCE_CONTEXT = /\b(?:in our|during our|for our|in practice|in production|in the field|in our clinic|during implementation|over \d+\s+(?:days?|weeks?|months?)|across \d+\s+(?:accounts?|customers?|patients?|sites?|campaigns?)|with \d+\s+(?:participants?|users?|patients?|samples?))\b/gi;
235
+ var EXPERIENCE_ARTIFACTS = /\b(?:screenshot|photo|benchmark|timeline|before\/after|our results|original chart|field note|walkthrough)\b/gi;
236
+ var LIMITATION_LANGUAGE = /\b(?:limitation|limitations|we found that|we learned|one caveat|did not work|failed|issue we hit|edge case|unexpectedly|drawback)\b/gi;
237
+ var AUTHOR_LINK_PATTERN = /<a[^>]*href=["'][^"']*\/(?:author|authors|team|people|staff|reviewers?)\b[^"']*["'][^>]*>/i;
238
+ var BYLINE_PATTERN = /\b(?:written|authored|reviewed|edited|medically reviewed)\s+by\b/i;
239
+ var AUTHOR_BIO_PATTERN = /\b(?:about the author|author bio|reviewed by|medically reviewed by|board-certified|licensed|credentials?|specializes in|specialist in)\b/i;
240
+ var METHODOLOGY_TERMS = /\b(?:methodology|how we tested|how we reviewed|our methodology|testing process|review process|editorial policy|sample size|data collection|timeframe|criteria used|tools used|benchmark method|updated using|last reviewed|ai-assisted|ai assisted)\b/gi;
241
+ var METHODOLOGY_DETAIL = /\b(?:sample size|participants?|dataset|timeframe|criteria|weights?|tool(?:s)? used|measured over|tested over|reviewed on|last updated|last reviewed|scoring method)\b/gi;
242
+ function scoreHelpfulPurposeAlignment(html, url) {
243
+ const text = getTextContent(html);
244
+ if (!text) return 0;
245
+ const contentLike = isContentLikePage(html, url);
246
+ if (!contentLike && wordCount(text) < 250) return 5;
247
+ let score = contentLike ? 3 : 5;
248
+ const firstPara = getFirstParagraphText(html);
249
+ const earlyText = firstNWords(text, 300);
250
+ const bodyHtml = getBodyHtml(html);
251
+ if (firstPara && !GENERIC_OPENERS.test(firstPara)) score += 2;
252
+ if (countMatches(earlyText, PRACTICAL_LANGUAGE) >= 1) score += 2;
253
+ else if (countMatches(text, PRACTICAL_LANGUAGE) >= 2) score += 1;
254
+ const tradeoffCount = countMatches(text, TRADEOFF_LANGUAGE);
255
+ if (tradeoffCount >= 2) score += 2;
256
+ else if (tradeoffCount >= 1) score += 1;
257
+ if (titleAndBodyAlign(html)) score += 1;
258
+ if (/\b(?:bottom line|key takeaway|here'?s the short answer|next steps?)\b/i.test(text)) score += 1;
259
+ if (firstPara && GENERIC_OPENERS.test(firstPara)) score -= 2;
260
+ const earlyBodyHtml = bodyHtml.slice(0, 1800);
261
+ const earlyCtas = countMatches(earlyBodyHtml, EARLY_CTA_PATTERN);
262
+ if (earlyCtas >= 3) score -= 2;
263
+ else if (earlyCtas >= 2) score -= 1;
264
+ const fluffCount = countMatches(text, FLUFF_LANGUAGE);
265
+ if (fluffCount >= 3) score -= 2;
266
+ else if (fluffCount >= 1) score -= 1;
267
+ return floor(cap(score, 10), 0);
268
+ }
269
+ function scoreFirstHandExperienceSignals(html, url) {
270
+ const text = getTextContent(html);
271
+ if (!text) return 0;
272
+ const contentLike = isContentLikePage(html, url);
273
+ let score = contentLike ? 2 : 5;
274
+ const actionCount = countMatches(text, FIRST_HAND_ACTIONS);
275
+ if (actionCount >= 3) score += 4;
276
+ else if (actionCount >= 1) score += 2;
277
+ const contextCount = countMatches(text, EXPERIENCE_CONTEXT);
278
+ if (contextCount >= 2) score += 2;
279
+ else if (contextCount >= 1) score += 1;
280
+ const artifactCount = countMatches(text, EXPERIENCE_ARTIFACTS) + countMatches(html, /<figure|<figcaption/gi);
281
+ if (artifactCount >= 3) score += 2;
282
+ else if (artifactCount >= 1) score += 1;
283
+ const limitationCount = countMatches(text, LIMITATION_LANGUAGE);
284
+ if (limitationCount >= 2) score += 2;
285
+ else if (limitationCount >= 1) score += 1;
286
+ if (/\b(?:manufacturer|vendor)\s+(?:description|specification|copy)\b/i.test(text)) score -= 1;
287
+ return floor(cap(score, 10), 0);
288
+ }
289
+ function scoreCreatorTransparency(html, url) {
290
+ const text = getTextContent(html);
291
+ if (!text) return 0;
292
+ const contentLike = isContentLikePage(html, url);
293
+ if (!contentLike) return 5;
294
+ let score = 0;
295
+ const hasByline = BYLINE_PATTERN.test(text) || /class=["'][^"']*author[^"']*["']/i.test(html) || /rel=["']author["']/i.test(html);
296
+ const hasPersonSchema = /"@type"\s*:\s*"Person"/i.test(html);
297
+ if (hasByline) score += 3;
298
+ if (AUTHOR_LINK_PATTERN.test(html)) score += 2;
299
+ if (AUTHOR_BIO_PATTERN.test(text)) score += 2;
300
+ if (/\b(?:reviewed by|edited by|medically reviewed by)\b/i.test(text)) score += 1;
301
+ if (hasPersonSchema) score += 2;
302
+ return floor(cap(score, 10), 0);
303
+ }
304
+ function scoreMethodologyTransparency(html, url) {
305
+ const text = getTextContent(html);
306
+ if (!text) return 0;
307
+ const contentLike = isContentLikePage(html, url);
308
+ const expected = expectsMethodology(html, url);
309
+ let score = expected ? 2 : contentLike ? 5 : 5;
310
+ const methodologyCount = countMatches(text, METHODOLOGY_TERMS);
311
+ if (methodologyCount >= 2) score += 3;
312
+ else if (methodologyCount >= 1) score += 2;
313
+ const detailCount = countMatches(text, METHODOLOGY_DETAIL);
314
+ if (detailCount >= 3) score += 3;
315
+ else if (detailCount >= 2) score += 2;
316
+ else if (detailCount >= 1) score += 1;
317
+ if (/\b(?:tested|reviewed|analyzed)\s+\d+|\bacross\s+\d+|\bover\s+\d+\s+(?:days?|weeks?|months?)|\busing\s+\d+\s+\w+/i.test(text)) score += 1;
318
+ if (/<figure|<table/i.test(html) && methodologyCount >= 1) score += 1;
319
+ if (/\b(?:ai-assisted|ai assisted|reviewed by an editor|human reviewed)\b/i.test(text)) score += 1;
320
+ return floor(cap(score, 10), 0);
321
+ }
322
+
150
323
  // src/site-crawler.ts
151
- async function fetchText(url) {
152
- try {
153
- const res = await fetch(url, {
154
- signal: AbortSignal.timeout(15e3),
155
- headers: { "User-Agent": "AEO-Visibility-Bot/1.0" },
156
- redirect: "follow"
157
- });
158
- const text = await res.text();
159
- return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
160
- } catch {
161
- return null;
162
- }
324
+ async function fetchText(url, expectedDomain) {
325
+ const res = await safeFetch(url, { timeoutMs: 15e3, expectedDomain });
326
+ if (!res) return null;
327
+ const text = await res.text();
328
+ return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
163
329
  }
164
330
  function extractDomain(url) {
165
331
  return url.replace(/^https?:\/\//, "").replace(/\/.*/, "").replace(/:[0-9]+$/, "").replace(/^www\./, "").toLowerCase();
@@ -200,13 +366,16 @@ function isHtmlResponse(result) {
200
366
  return trimmed.startsWith("<!doctype html") || trimmed.startsWith("<html") || /<head[\s>]/i.test(trimmed);
201
367
  }
202
368
  async function prefetchSiteData(domain) {
369
+ if (!await isSafeFetchTarget(`https://${domain}`)) {
370
+ return { domain, protocol: null, homepage: null, llmsTxt: null, robotsTxt: null, faqPage: null, sitemapXml: null, rssFeed: null, aiTxt: null, redirectedTo: null, parkedReason: null, blogSample: [] };
371
+ }
203
372
  let protocol = null;
204
373
  let homepage = null;
205
- homepage = await fetchText(`https://${domain}`);
374
+ homepage = await fetchText(`https://${domain}`, domain);
206
375
  if (homepage && homepage.status >= 200 && homepage.status < 400) {
207
376
  protocol = "https";
208
377
  } else {
209
- homepage = await fetchText(`http://${domain}`);
378
+ homepage = await fetchText(`http://${domain}`, domain);
210
379
  if (homepage && homepage.status >= 200 && homepage.status < 400) {
211
380
  protocol = "http";
212
381
  }
@@ -226,38 +395,38 @@ async function prefetchSiteData(domain) {
226
395
  }
227
396
  const baseUrl = `${protocol}://${domain}`;
228
397
  const [llmsTxt, robotsTxt, faqPage, sitemapXml, aiTxt] = await Promise.all([
229
- fetchText(`${baseUrl}/llms.txt`),
230
- fetchText(`${baseUrl}/robots.txt`),
231
- fetchText(`${baseUrl}/faq`).then(async (result) => {
398
+ fetchText(`${baseUrl}/llms.txt`, domain),
399
+ fetchText(`${baseUrl}/robots.txt`, domain),
400
+ fetchText(`${baseUrl}/faq`, domain).then(async (result) => {
232
401
  if (result && result.status === 200) return result;
233
402
  for (const path of ["/frequently-asked-questions", "/help", "/support", "/help-center"]) {
234
- const fallback = await fetchText(`${baseUrl}${path}`);
403
+ const fallback = await fetchText(`${baseUrl}${path}`, domain);
235
404
  if (fallback && fallback.status === 200) return fallback;
236
405
  }
237
406
  return result;
238
407
  }),
239
- fetchText(`${baseUrl}/sitemap.xml`),
240
- fetchText(`${baseUrl}/ai.txt`)
408
+ fetchText(`${baseUrl}/sitemap.xml`, domain),
409
+ fetchText(`${baseUrl}/ai.txt`, domain)
241
410
  ]);
242
411
  let rssFeed = null;
243
412
  if (homepage) {
244
413
  const rssLinkMatch = homepage.text.match(/<link[^>]*type="application\/(?:rss|atom)\+xml"[^>]*href="([^"]*)"[^>]*>/i);
245
414
  if (rssLinkMatch) {
246
415
  const rssUrl = rssLinkMatch[1].startsWith("http") ? rssLinkMatch[1] : `${baseUrl}${rssLinkMatch[1]}`;
247
- rssFeed = await fetchText(rssUrl);
416
+ rssFeed = await fetchText(rssUrl, domain);
248
417
  }
249
418
  if (!rssFeed || rssFeed.status !== 200) {
250
419
  for (const path of ["/feed", "/rss.xml", "/feed.xml"]) {
251
- rssFeed = await fetchText(`${baseUrl}${path}`);
420
+ rssFeed = await fetchText(`${baseUrl}${path}`, domain);
252
421
  if (rssFeed && rssFeed.status === 200 && (rssFeed.text.includes("<rss") || rssFeed.text.includes("<feed") || rssFeed.text.includes("<channel"))) break;
253
422
  rssFeed = null;
254
423
  }
255
424
  }
256
425
  }
257
426
  if (sitemapXml && sitemapXml.status === 200 && sitemapXml.text.includes("<sitemapindex")) {
258
- const subUrls = extractAllSubSitemapUrls(sitemapXml.text, 5);
427
+ const subUrls = extractAllSubSitemapUrls(sitemapXml.text, domain, 5);
259
428
  if (subUrls.length > 0) {
260
- const subResults = await Promise.all(subUrls.map((u) => fetchText(u)));
429
+ const subResults = await Promise.all(subUrls.map((u) => fetchText(u, domain)));
261
430
  for (const sub of subResults) {
262
431
  if (sub && sub.status === 200) {
263
432
  sitemapXml.text += "\n" + sub.text;
@@ -270,7 +439,7 @@ async function prefetchSiteData(domain) {
270
439
  const sitemapForBlog = sitemapXml.text;
271
440
  const blogUrls = extractBlogUrlsFromSitemap(sitemapForBlog, domain, 50);
272
441
  if (blogUrls.length > 0) {
273
- const fetched = await Promise.all(blogUrls.map((url) => fetchText(url)));
442
+ const fetched = await Promise.all(blogUrls.map((url) => fetchText(url, domain)));
274
443
  blogSample = fetched.filter(
275
444
  (r) => r !== null && r.status === 200 && r.text.length > 500
276
445
  );
@@ -1052,8 +1221,8 @@ function checkDirectAnswerDensity(data) {
1052
1221
  const paragraphs = html.match(/<p[^>]*>([\s\S]*?)<\/p>/gi) || [];
1053
1222
  const snippetZoneParagraphs = paragraphs.filter((p) => {
1054
1223
  const text2 = p.replace(/<[^>]*>/g, "").trim();
1055
- const wordCount = text2.split(/\s+/).length;
1056
- return wordCount >= 40 && wordCount <= 150;
1224
+ const wordCount2 = text2.split(/\s+/).length;
1225
+ return wordCount2 >= 40 && wordCount2 <= 150;
1057
1226
  });
1058
1227
  if (snippetZoneParagraphs.length >= 3) {
1059
1228
  score += 2;
@@ -1321,13 +1490,15 @@ function extractBlogUrlsFromSitemap(sitemapText, domain, limit = 50) {
1321
1490
  });
1322
1491
  return candidates.slice(0, limit).map((c) => c.url);
1323
1492
  }
1324
- function extractAllSubSitemapUrls(sitemapText, limit = 5) {
1493
+ function extractAllSubSitemapUrls(sitemapText, domainOrLimit, maybeLimit = 5) {
1325
1494
  if (!sitemapText.includes("<sitemapindex")) return [];
1495
+ const domain = typeof domainOrLimit === "string" ? domainOrLimit : void 0;
1496
+ const limit = typeof domainOrLimit === "number" ? domainOrLimit : maybeLimit;
1326
1497
  const sitemapLocs = sitemapText.match(/<sitemap>[\s\S]*?<loc>([^<]+)<\/loc>[\s\S]*?<\/sitemap>/gi) || [];
1327
1498
  const urls = sitemapLocs.map((block) => {
1328
1499
  const match = block.match(/<loc>([^<]+)<\/loc>/i);
1329
1500
  return match ? match[1].trim() : "";
1330
- }).filter(Boolean);
1501
+ }).filter((url) => !!url && isSafePublicUrl(url, domain));
1331
1502
  const preferred = urls.filter((u) => /post|blog|article|page/i.test(u));
1332
1503
  const rest = urls.filter((u) => !preferred.includes(u));
1333
1504
  return [...preferred, ...rest].slice(0, limit);
@@ -2201,6 +2372,123 @@ function checkContentDepth(data, topicCoherenceScore) {
2201
2372
  }
2202
2373
  return { criterion: "content_depth", criterion_label: "Content Depth", score: finalScore, status: finalScore >= 7 ? "pass" : finalScore >= 4 ? "partial" : "fail", findings, fix_priority: finalScore >= 7 ? "P3" : "P1" };
2203
2374
  }
2375
+ function scoreSampledPages(data, scorer) {
2376
+ const pages = [];
2377
+ if (data.homepage) {
2378
+ const url = data.homepage.finalUrl || (data.protocol ? `${data.protocol}://${data.domain}/` : `https://${data.domain}/`);
2379
+ pages.push({ url, score: scorer(data.homepage.text, url) });
2380
+ }
2381
+ if (data.blogSample) {
2382
+ for (const page of data.blogSample) {
2383
+ const url = page.finalUrl || (data.protocol ? `${data.protocol}://${data.domain}/` : `https://${data.domain}/`);
2384
+ pages.push({ url, score: scorer(page.text, url) });
2385
+ }
2386
+ }
2387
+ return pages;
2388
+ }
2389
+ function summarizeHelpfulScores(pageScores) {
2390
+ const total = pageScores.length;
2391
+ const average = total > 0 ? Math.round(pageScores.reduce((sum, p) => sum + p.score, 0) / total) : 0;
2392
+ const strong = pageScores.filter((p) => p.score >= 8);
2393
+ const weak = pageScores.filter((p) => p.score <= 4);
2394
+ return { total, average, strong, weak };
2395
+ }
2396
+ function checkHelpfulPurposeAlignment(data) {
2397
+ const findings = [];
2398
+ if (!data.homepage) {
2399
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
2400
+ return { criterion: "helpful_purpose_alignment", criterion_label: "Helpful Purpose Alignment", score: 0, status: "not_found", findings, fix_priority: "P1" };
2401
+ }
2402
+ const pageScores = scoreSampledPages(data, scoreHelpfulPurposeAlignment);
2403
+ const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
2404
+ if (average >= 8) {
2405
+ findings.push({ severity: "info", detail: `${strong.length}/${total} pages strongly prioritize visitor task completion over filler` });
2406
+ } else if (average >= 5) {
2407
+ findings.push({ severity: "low", detail: `${strong.length}/${total} pages clearly lead with useful guidance`, fix: "Tighten intros, reduce generic filler, and make pages solve the promised user task faster" });
2408
+ } else {
2409
+ findings.push({ severity: "medium", detail: `Average helpful-purpose score is ${average}/10 across ${total} sampled pages`, fix: "Reduce search-first framing, generic intros, and CTA interruptions before the first useful answer" });
2410
+ }
2411
+ if (weak.length > 0) {
2412
+ findings.push({
2413
+ severity: "low",
2414
+ detail: `${weak.length} page(s) read as weakly task-focused`,
2415
+ fix: "Rewrite weak pages to lead with concrete answers, tradeoffs, and next steps instead of broad introductory filler"
2416
+ });
2417
+ }
2418
+ return { criterion: "helpful_purpose_alignment", criterion_label: "Helpful Purpose Alignment", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P1" };
2419
+ }
2420
+ function checkFirstHandExperienceSignals(data) {
2421
+ const findings = [];
2422
+ if (!data.homepage) {
2423
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
2424
+ return { criterion: "first_hand_experience_signals", criterion_label: "First-Hand Experience Signals", score: 0, status: "not_found", findings, fix_priority: "P2" };
2425
+ }
2426
+ const pageScores = scoreSampledPages(data, scoreFirstHandExperienceSignals);
2427
+ const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
2428
+ if (average >= 8) {
2429
+ findings.push({ severity: "info", detail: `${strong.length}/${total} pages show strong signs of direct use, testing, or observation` });
2430
+ } else if (average >= 5) {
2431
+ findings.push({ severity: "low", detail: `Moderate experiential depth across ${total} sampled pages`, fix: "Add real implementation details, limitations, screenshots, or direct observations where relevant" });
2432
+ } else {
2433
+ findings.push({ severity: "medium", detail: "Little first-hand experience is visible in sampled content", fix: "Add evidence of real use, testing, implementation, or lived experience instead of generic summaries" });
2434
+ }
2435
+ if (weak.length > 0) {
2436
+ findings.push({
2437
+ severity: "low",
2438
+ detail: `${weak.length} page(s) appear generic or second-hand`,
2439
+ fix: "Strengthen those pages with case details, lessons learned, or original evidence from practice"
2440
+ });
2441
+ }
2442
+ return { criterion: "first_hand_experience_signals", criterion_label: "First-Hand Experience Signals", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P2" };
2443
+ }
2444
+ function checkCreatorTransparency(data) {
2445
+ const findings = [];
2446
+ if (!data.homepage) {
2447
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
2448
+ return { criterion: "creator_transparency", criterion_label: "Creator Transparency", score: 0, status: "not_found", findings, fix_priority: "P2" };
2449
+ }
2450
+ const pageScores = scoreSampledPages(data, scoreCreatorTransparency);
2451
+ const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
2452
+ if (average >= 8) {
2453
+ findings.push({ severity: "info", detail: `${strong.length}/${total} sampled pages provide clear visible creator attribution` });
2454
+ } else if (average >= 5) {
2455
+ findings.push({ severity: "low", detail: "Visible authorship is present on some content but inconsistent", fix: "Add bylines, author links, and reviewer details on article-like pages where readers expect them" });
2456
+ } else {
2457
+ findings.push({ severity: "medium", detail: "Creator visibility is weak on content-like pages", fix: "Show clear bylines, author pages, and reviewer context rather than relying on schema alone" });
2458
+ }
2459
+ if (weak.length > 0) {
2460
+ findings.push({
2461
+ severity: "low",
2462
+ detail: `${weak.length} page(s) look article-like but expose little visible author context`,
2463
+ fix: "Add visible bylines, author bios, or reviewer attribution to those pages"
2464
+ });
2465
+ }
2466
+ return { criterion: "creator_transparency", criterion_label: "Creator Transparency", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P2" };
2467
+ }
2468
+ function checkMethodologyTransparency(data) {
2469
+ const findings = [];
2470
+ if (!data.homepage) {
2471
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
2472
+ return { criterion: "methodology_transparency", criterion_label: "Methodology Transparency", score: 0, status: "not_found", findings, fix_priority: "P2" };
2473
+ }
2474
+ const pageScores = scoreSampledPages(data, scoreMethodologyTransparency);
2475
+ const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
2476
+ if (average >= 8) {
2477
+ findings.push({ severity: "info", detail: `${strong.length}/${total} pages clearly explain how content was tested, researched, reviewed, or updated` });
2478
+ } else if (average >= 5) {
2479
+ findings.push({ severity: "low", detail: "Some process transparency exists, but it is inconsistent", fix: 'Add "how we tested", methodology, review process, or update disclosures on pages where users would expect them' });
2480
+ } else {
2481
+ findings.push({ severity: "medium", detail: "Little content-production or review transparency is visible", fix: "Explain how pages were researched, tested, or reviewed instead of presenting unsupported comparisons or conclusions" });
2482
+ }
2483
+ if (weak.length > 0) {
2484
+ findings.push({
2485
+ severity: "low",
2486
+ detail: `${weak.length} page(s) lack visible methodology or review context`,
2487
+ fix: "Add process detail such as sample size, criteria, tools used, review process, or update notes"
2488
+ });
2489
+ }
2490
+ return { criterion: "methodology_transparency", criterion_label: "Methodology Transparency", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P2" };
2491
+ }
2204
2492
  function checkCitationReadyWriting(data) {
2205
2493
  const findings = [];
2206
2494
  if (!data.homepage) {
@@ -2296,8 +2584,8 @@ function checkAnswerFirstPlacement(data) {
2296
2584
  const earlyParagraphs = bodyHtml.match(/<p[^>]*>([\s\S]*?)<\/p>/gi)?.slice(0, 5) || [];
2297
2585
  for (const p of earlyParagraphs) {
2298
2586
  const pText = p.replace(/<[^>]*>/g, "").trim();
2299
- const wordCount = pText.split(/\s+/).length;
2300
- if (wordCount >= 40 && wordCount <= 80 && first300Words.includes(pText.slice(0, 50))) {
2587
+ const wordCount2 = pText.split(/\s+/).length;
2588
+ if (wordCount2 >= 40 && wordCount2 <= 80 && first300Words.includes(pText.slice(0, 50))) {
2301
2589
  shortAnswerCount++;
2302
2590
  break;
2303
2591
  }
@@ -2839,20 +3127,29 @@ function auditSiteFromData(data) {
2839
3127
  checkVisibleDateSignal(data),
2840
3128
  topicCoherence,
2841
3129
  checkContentDepth(data, topicCoherence.score),
2842
- // V2 criteria (#29-#34)
3130
+ // Helpful-content criteria (#29-#32)
3131
+ checkHelpfulPurposeAlignment(data),
3132
+ checkFirstHandExperienceSignals(data),
3133
+ checkCreatorTransparency(data),
3134
+ checkMethodologyTransparency(data),
3135
+ // V2 criteria (#33-#38)
2843
3136
  checkCitationReadyWriting(data),
2844
3137
  checkAnswerFirstPlacement(data),
2845
3138
  checkEvidencePackaging(data),
2846
3139
  checkEntityDisambiguation(data),
2847
3140
  checkExtractionFriction(data),
2848
3141
  checkImageContextAI(data),
2849
- // V3 criteria (#35-#36)
3142
+ // V3 criteria (#39-#40)
2850
3143
  checkDuplicateContent(data),
2851
3144
  checkCrossPageDuplication(data)
2852
3145
  ];
2853
3146
  }
2854
3147
  async function auditSite(targetUrl) {
2855
- const url = new URL(targetUrl.startsWith("http") ? targetUrl : `https://${targetUrl}`);
3148
+ const normalizedTarget = targetUrl.startsWith("http") ? targetUrl : `https://${targetUrl}`;
3149
+ if (!await isSafeFetchTarget(normalizedTarget)) {
3150
+ throw new Error(`Refusing to audit private or local address: ${targetUrl}`);
3151
+ }
3152
+ const url = new URL(normalizedTarget);
2856
3153
  const domain = url.hostname.replace(/^www\./, "");
2857
3154
  const data = await prefetchSiteData(domain);
2858
3155
  return auditSiteFromData(data);
@@ -2878,6 +3175,10 @@ var WEIGHTS = {
2878
3175
  // Relevance to actual AI queries
2879
3176
  faq_section: 0.03,
2880
3177
  // Structured Q&A pairs
3178
+ helpful_purpose_alignment: 0.03,
3179
+ // Visitor-helpful vs search-first framing
3180
+ first_hand_experience_signals: 0.03,
3181
+ // Evidence of real use or observation
2881
3182
  // ─── Content Organization (~30%) ──────────────────────────────────────────
2882
3183
  // HOW easily AI engines can extract and trust your content.
2883
3184
  entity_consistency: 0.05,
@@ -2892,9 +3193,13 @@ var WEIGHTS = {
2892
3193
  // Expert attribution
2893
3194
  table_list_extractability: 0.03,
2894
3195
  // Extractable structured data
2895
- definition_patterns: 0.02,
3196
+ creator_transparency: 0.02,
3197
+ // Visible author/reviewer clarity
3198
+ methodology_transparency: 0.02,
3199
+ // Process disclosure
3200
+ definition_patterns: 0.015,
2896
3201
  // Clear definitions
2897
- visible_date_signal: 0.02,
3202
+ visible_date_signal: 0.015,
2898
3203
  // Publication date trust
2899
3204
  semantic_html: 0.02,
2900
3205
  // Clean semantic structure
@@ -2903,15 +3208,15 @@ var WEIGHTS = {
2903
3208
  // ─── Technical Plumbing (~15%) ────────────────────────────────────────────
2904
3209
  // WHETHER AI crawlers can find you. Table stakes with diminishing returns.
2905
3210
  content_cannibalization: 0.02,
2906
- llms_txt: 0.02,
2907
- robots_txt: 0.02,
3211
+ llms_txt: 0.01,
3212
+ robots_txt: 0.01,
2908
3213
  content_velocity: 0.02,
2909
- content_licensing: 0.02,
3214
+ content_licensing: 0.01,
2910
3215
  sitemap_completeness: 0.01,
2911
- canonical_url: 0.01,
2912
- rss_feed: 0.01,
2913
- schema_coverage: 0.01,
2914
- speakable_schema: 0.01,
3216
+ canonical_url: 5e-3,
3217
+ rss_feed: 5e-3,
3218
+ schema_coverage: 5e-3,
3219
+ speakable_schema: 5e-3,
2915
3220
  // ─── V2 Criteria (~15%) ───────────────────────────────────────────────────
2916
3221
  // Citation quality, evidence packaging, and extraction friction.
2917
3222
  citation_ready_writing: 0.04,
@@ -2924,7 +3229,7 @@ var WEIGHTS = {
2924
3229
  // Clear entity boundaries
2925
3230
  extraction_friction: 0.02,
2926
3231
  // Sentence length, voice, jargon
2927
- image_context_ai: 0.01,
3232
+ image_context_ai: 5e-3,
2928
3233
  // Figure/figcaption, alt text quality
2929
3234
  // ─── V3 Criteria ────────────────────────────────────────────────────────
2930
3235
  duplicate_content: 0.05,
@@ -2944,8 +3249,8 @@ function calculateOverallScore(criteria) {
2944
3249
  let score = Math.round(weightedSum / totalWeight);
2945
3250
  const coherence = criteria.find((c) => c.criterion === "topic_coherence");
2946
3251
  if (coherence && coherence.score < 6) {
2947
- const cap2 = 35 + coherence.score * 5;
2948
- score = Math.min(score, cap2);
3252
+ const cap3 = 35 + coherence.score * 5;
3253
+ score = Math.min(score, cap3);
2949
3254
  }
2950
3255
  return score;
2951
3256
  }
@@ -2960,6 +3265,8 @@ var PILLARS = {
2960
3265
  "citation_ready_writing",
2961
3266
  "answer_first_placement",
2962
3267
  "evidence_packaging",
3268
+ "helpful_purpose_alignment",
3269
+ "first_hand_experience_signals",
2963
3270
  "duplicate_content",
2964
3271
  "cross_page_duplication"
2965
3272
  ],
@@ -2977,7 +3284,9 @@ var PILLARS = {
2977
3284
  "internal_linking",
2978
3285
  "content_freshness",
2979
3286
  "author_schema_depth",
2980
- "schema_markup"
3287
+ "schema_markup",
3288
+ "creator_transparency",
3289
+ "methodology_transparency"
2981
3290
  ],
2982
3291
  "Technical Foundation": [
2983
3292
  "semantic_html",
@@ -3007,6 +3316,8 @@ var CLIENT_NAMES = {
3007
3316
  citation_ready_writing: "Citation-Ready Writing",
3008
3317
  answer_first_placement: "Answer-First Placement",
3009
3318
  evidence_packaging: "Evidence Packaging",
3319
+ helpful_purpose_alignment: "Helpful Purpose Alignment",
3320
+ first_hand_experience_signals: "First-Hand Experience Signals",
3010
3321
  direct_answer_density: "Direct Answer Density",
3011
3322
  qa_content_format: "Q&A Content Format",
3012
3323
  query_answer_alignment: "Query-Answer Alignment",
@@ -3019,6 +3330,8 @@ var CLIENT_NAMES = {
3019
3330
  content_freshness: "Content Freshness",
3020
3331
  author_schema_depth: "Author & Expert Schema",
3021
3332
  schema_markup: "Schema Markup",
3333
+ creator_transparency: "Creator Transparency",
3334
+ methodology_transparency: "Methodology Transparency",
3022
3335
  semantic_html: "Semantic HTML",
3023
3336
  clean_html: "Clean HTML",
3024
3337
  visible_date_signal: "Visible Date Signal",
@@ -3045,6 +3358,8 @@ var PILLAR_WEIGHTS = {
3045
3358
  citation_ready_writing: 0.04,
3046
3359
  answer_first_placement: 0.03,
3047
3360
  evidence_packaging: 0.03,
3361
+ helpful_purpose_alignment: 0.03,
3362
+ first_hand_experience_signals: 0.03,
3048
3363
  duplicate_content: 0.05,
3049
3364
  cross_page_duplication: 0.03,
3050
3365
  direct_answer_density: 0.05,
@@ -3052,28 +3367,30 @@ var PILLAR_WEIGHTS = {
3052
3367
  query_answer_alignment: 0.04,
3053
3368
  faq_section: 0.03,
3054
3369
  table_list_extractability: 0.03,
3055
- definition_patterns: 0.02,
3370
+ definition_patterns: 0.015,
3056
3371
  entity_disambiguation: 0.02,
3057
3372
  entity_consistency: 0.05,
3058
3373
  internal_linking: 0.04,
3059
3374
  content_freshness: 0.04,
3060
3375
  author_schema_depth: 0.03,
3061
3376
  schema_markup: 0.03,
3377
+ creator_transparency: 0.02,
3378
+ methodology_transparency: 0.02,
3062
3379
  semantic_html: 0.02,
3063
3380
  clean_html: 0.02,
3064
- visible_date_signal: 0.02,
3381
+ visible_date_signal: 0.015,
3065
3382
  extraction_friction: 0.02,
3066
- image_context_ai: 0.01,
3067
- schema_coverage: 0.01,
3068
- speakable_schema: 0.01,
3383
+ image_context_ai: 5e-3,
3384
+ schema_coverage: 5e-3,
3385
+ speakable_schema: 5e-3,
3069
3386
  content_cannibalization: 0.02,
3070
- llms_txt: 0.02,
3071
- robots_txt: 0.02,
3387
+ llms_txt: 0.01,
3388
+ robots_txt: 0.01,
3072
3389
  content_velocity: 0.02,
3073
- content_licensing: 0.02,
3074
- canonical_url: 0.01,
3390
+ content_licensing: 0.01,
3391
+ canonical_url: 5e-3,
3075
3392
  sitemap_completeness: 0.01,
3076
- rss_feed: 0.01
3393
+ rss_feed: 5e-3
3077
3394
  };
3078
3395
  var CRITERION_EFFORT = {
3079
3396
  topic_coherence: "High",
@@ -3083,6 +3400,8 @@ var CRITERION_EFFORT = {
3083
3400
  citation_ready_writing: "Medium",
3084
3401
  answer_first_placement: "Medium",
3085
3402
  evidence_packaging: "Medium",
3403
+ helpful_purpose_alignment: "Medium",
3404
+ first_hand_experience_signals: "Medium",
3086
3405
  duplicate_content: "Medium",
3087
3406
  cross_page_duplication: "Medium",
3088
3407
  direct_answer_density: "Medium",
@@ -3097,6 +3416,8 @@ var CRITERION_EFFORT = {
3097
3416
  content_freshness: "Low",
3098
3417
  author_schema_depth: "Low",
3099
3418
  schema_markup: "Medium",
3419
+ creator_transparency: "Low",
3420
+ methodology_transparency: "Low",
3100
3421
  semantic_html: "Low",
3101
3422
  clean_html: "Medium",
3102
3423
  visible_date_signal: "Low",
@@ -3121,6 +3442,8 @@ var FIX_DESCRIPTIONS = {
3121
3442
  citation_ready_writing: "Write self-contained definition sentences and one-claim statements AI can quote directly.",
3122
3443
  answer_first_placement: "Place a 40-80 word answer block in the first 300 words of each page.",
3123
3444
  evidence_packaging: "Add inline citations, attribution phrases, and a sources section to key pages.",
3445
+ helpful_purpose_alignment: "Lead with useful, task-solving guidance instead of search-first filler.",
3446
+ first_hand_experience_signals: "Add concrete signs of first-hand use, testing, implementation, or observation.",
3124
3447
  direct_answer_density: "Write concise 2-3 sentence answer paragraphs after each question heading.",
3125
3448
  qa_content_format: "Add question-based H2/H3 headings matching common AI queries.",
3126
3449
  query_answer_alignment: "Ensure every question heading is followed by a direct answer paragraph.",
@@ -3133,6 +3456,8 @@ var FIX_DESCRIPTIONS = {
3133
3456
  content_freshness: "Add dateModified schema and visible last-updated dates.",
3134
3457
  author_schema_depth: "Add Person schema for authors with credentials and sameAs links.",
3135
3458
  schema_markup: "Implement JSON-LD structured data on key pages.",
3459
+ creator_transparency: "Show clear visible bylines, author pages, and reviewer details where readers expect them.",
3460
+ methodology_transparency: "Explain how content was tested, researched, reviewed, or updated.",
3136
3461
  semantic_html: "Use semantic HTML5 elements (main, article, nav, header, footer).",
3137
3462
  clean_html: "Fix HTML structure, add meta tags, and ensure HTTPS.",
3138
3463
  visible_date_signal: "Display dates using <time> elements and add datePublished to JSON-LD.",
@@ -3232,6 +3557,10 @@ var CRITERION_LABELS = {
3232
3557
  "Visible Date Signal": "Visible Date Signal",
3233
3558
  "Topic Coherence": "Topic Coherence",
3234
3559
  "Content Depth": "Content Depth",
3560
+ "Helpful Purpose Alignment": "Helpful Purpose Alignment",
3561
+ "First-Hand Experience Signals": "First-Hand Experience Signals",
3562
+ "Creator Transparency": "Creator Transparency",
3563
+ "Methodology Transparency": "Methodology Transparency",
3235
3564
  "Citation-Ready Writing Quality": "Citation-Ready Writing Quality",
3236
3565
  "Answer-First Placement": "Answer-First Placement",
3237
3566
  "Evidence Packaging": "Evidence Packaging",
@@ -3337,6 +3666,8 @@ var CRITERION_WEIGHTS = {
3337
3666
  qa_content_format: 0.04,
3338
3667
  query_answer_alignment: 0.04,
3339
3668
  faq_section: 0.03,
3669
+ helpful_purpose_alignment: 0.03,
3670
+ first_hand_experience_signals: 0.03,
3340
3671
  // Content Organization (~30%)
3341
3672
  entity_consistency: 0.05,
3342
3673
  internal_linking: 0.04,
@@ -3344,28 +3675,30 @@ var CRITERION_WEIGHTS = {
3344
3675
  schema_markup: 0.03,
3345
3676
  author_schema_depth: 0.03,
3346
3677
  table_list_extractability: 0.03,
3347
- definition_patterns: 0.02,
3348
- visible_date_signal: 0.02,
3678
+ creator_transparency: 0.02,
3679
+ methodology_transparency: 0.02,
3680
+ definition_patterns: 0.015,
3681
+ visible_date_signal: 0.015,
3349
3682
  semantic_html: 0.02,
3350
3683
  clean_html: 0.02,
3351
3684
  // Technical Plumbing (~15%)
3352
3685
  content_cannibalization: 0.02,
3353
- llms_txt: 0.02,
3354
- robots_txt: 0.02,
3686
+ llms_txt: 0.01,
3687
+ robots_txt: 0.01,
3355
3688
  content_velocity: 0.02,
3356
- content_licensing: 0.02,
3689
+ content_licensing: 0.01,
3357
3690
  sitemap_completeness: 0.01,
3358
- canonical_url: 0.01,
3359
- rss_feed: 0.01,
3360
- schema_coverage: 0.01,
3361
- speakable_schema: 0.01,
3691
+ canonical_url: 5e-3,
3692
+ rss_feed: 5e-3,
3693
+ schema_coverage: 5e-3,
3694
+ speakable_schema: 5e-3,
3362
3695
  // V2 Criteria (~15%)
3363
3696
  citation_ready_writing: 0.04,
3364
3697
  answer_first_placement: 0.03,
3365
3698
  evidence_packaging: 0.03,
3366
3699
  entity_disambiguation: 0.02,
3367
3700
  extraction_friction: 0.02,
3368
- image_context_ai: 0.01,
3701
+ image_context_ai: 5e-3,
3369
3702
  // V3 Criteria
3370
3703
  duplicate_content: 0.05,
3371
3704
  cross_page_duplication: 0.03
@@ -3406,6 +3739,16 @@ var OPPORTUNITY_TEMPLATES = {
3406
3739
  effort: "Medium",
3407
3740
  description: "Create a dedicated FAQ page with FAQPage schema markup. Cover common questions about your products, services, and industry to become a direct answer source for AI engines."
3408
3741
  },
3742
+ helpful_purpose_alignment: {
3743
+ name: "Improve Helpful Purpose Alignment",
3744
+ effort: "Medium",
3745
+ description: "Rewrite pages to solve the visitor task quickly and concretely. Reduce generic intros, search-first filler, and CTA interruptions before the first useful answer."
3746
+ },
3747
+ first_hand_experience_signals: {
3748
+ name: "Add First-Hand Experience Signals",
3749
+ effort: "Medium",
3750
+ description: "Show direct use, testing, implementation, or lived experience with concrete observations, examples, screenshots, and lessons learned."
3751
+ },
3409
3752
  original_data: {
3410
3753
  name: "Add Original Data & Case Studies",
3411
3754
  effort: "High",
@@ -3461,6 +3804,16 @@ var OPPORTUNITY_TEMPLATES = {
3461
3804
  effort: "Low",
3462
3805
  description: "Add Person schema for content authors with credentials, expertise, and sameAs links. Expert attribution strengthens E-E-A-T signals that AI engines use to evaluate source credibility."
3463
3806
  },
3807
+ creator_transparency: {
3808
+ name: "Improve Creator Transparency",
3809
+ effort: "Low",
3810
+ description: "Add visible bylines, author pages, and reviewer/editor details so readers can clearly tell who created the content and why they are credible."
3811
+ },
3812
+ methodology_transparency: {
3813
+ name: "Add Methodology Transparency",
3814
+ effort: "Low",
3815
+ description: "Explain how pages were tested, researched, reviewed, or updated. Add methodology, criteria, sample-size, or review-process details where users expect them."
3816
+ },
3464
3817
  fact_density: {
3465
3818
  name: "Increase Fact & Data Density",
3466
3819
  effort: "Medium",
@@ -3720,20 +4073,12 @@ function formatList(items) {
3720
4073
  }
3721
4074
 
3722
4075
  // src/multi-page-fetcher.ts
3723
- async function fetchPage(url, timeoutMs = 1e4) {
3724
- try {
3725
- const res = await fetch(url, {
3726
- signal: AbortSignal.timeout(timeoutMs),
3727
- headers: { "User-Agent": "AEO-Visibility-Bot/1.0" },
3728
- redirect: "follow"
3729
- });
3730
- if (res.status !== 200) return null;
3731
- const text = await res.text();
3732
- if (text.length < 200) return null;
3733
- return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
3734
- } catch {
3735
- return null;
3736
- }
4076
+ async function fetchPage(url, domain, timeoutMs = 1e4) {
4077
+ const res = await safeFetch(url, { timeoutMs, expectedDomain: domain });
4078
+ if (!res || res.status !== 200) return null;
4079
+ const text = await res.text();
4080
+ if (text.length < 200) return null;
4081
+ return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
3737
4082
  }
3738
4083
  var PAGE_VARIANTS = {
3739
4084
  about: ["/about", "/about-us", "/company", "/who-we-are"],
@@ -3889,7 +4234,7 @@ async function fetchMultiPageData(siteData, options) {
3889
4234
  }
3890
4235
  const entries = Array.from(urlsToFetch.entries());
3891
4236
  if (entries.length === 0) return 0;
3892
- const results = await Promise.all(entries.map(([url]) => fetchPage(url, timeoutMs)));
4237
+ const results = await Promise.all(entries.map(([url]) => fetchPage(url, siteData.domain, timeoutMs)));
3893
4238
  if (!siteData.blogSample) siteData.blogSample = [];
3894
4239
  let added = 0;
3895
4240
  for (let i = 0; i < results.length; i++) {
@@ -3916,19 +4261,23 @@ var PAGE_CRITERIA = {
3916
4261
  content_freshness: { weight: 0.04, label: "Content Freshness Signals" },
3917
4262
  schema_markup: { weight: 0.03, label: "Schema.org Structured Data" },
3918
4263
  table_list_extractability: { weight: 0.03, label: "Table & List Extractability" },
3919
- definition_patterns: { weight: 0.02, label: "Definition Patterns" },
3920
- visible_date_signal: { weight: 0.02, label: "Visible Date Signal" },
4264
+ definition_patterns: { weight: 0.015, label: "Definition Patterns" },
4265
+ visible_date_signal: { weight: 0.015, label: "Visible Date Signal" },
3921
4266
  semantic_html: { weight: 0.02, label: "Semantic HTML5 & Accessibility" },
3922
4267
  clean_html: { weight: 0.02, label: "Clean, Crawlable HTML" },
3923
4268
  // Technical Plumbing
3924
- canonical_url: { weight: 0.01, label: "Canonical URL Strategy" },
4269
+ canonical_url: { weight: 5e-3, label: "Canonical URL Strategy" },
3925
4270
  // V2 Criteria
3926
4271
  citation_ready_writing: { weight: 0.04, label: "Citation-Ready Writing Quality" },
3927
4272
  answer_first_placement: { weight: 0.03, label: "Answer-First Placement" },
3928
4273
  evidence_packaging: { weight: 0.03, label: "Evidence Packaging" },
4274
+ helpful_purpose_alignment: { weight: 0.03, label: "Helpful Purpose Alignment" },
4275
+ first_hand_experience_signals: { weight: 0.03, label: "First-Hand Experience Signals" },
3929
4276
  entity_disambiguation: { weight: 0.02, label: "Entity Disambiguation" },
3930
4277
  extraction_friction: { weight: 0.02, label: "Extraction Friction Score" },
3931
- image_context_ai: { weight: 0.01, label: "Image Context for AI" },
4278
+ creator_transparency: { weight: 0.02, label: "Creator Transparency" },
4279
+ methodology_transparency: { weight: 0.02, label: "Methodology Transparency" },
4280
+ image_context_ai: { weight: 5e-3, label: "Image Context for AI" },
3932
4281
  duplicate_content: { weight: 0.05, label: "Duplicate Content Blocks" }
3933
4282
  };
3934
4283
  function extractJsonLdBlocks(html) {
@@ -3951,7 +4300,7 @@ function extractTypesFromJsonLd(blocks) {
3951
4300
  }
3952
4301
  return types;
3953
4302
  }
3954
- function getTextContent(html) {
4303
+ function getTextContent2(html) {
3955
4304
  return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
3956
4305
  }
3957
4306
  function extractQuestionHeadings2(html) {
@@ -3979,7 +4328,7 @@ function countAnsweredQuestions(html) {
3979
4328
  }
3980
4329
  return { total: questions.length, answered };
3981
4330
  }
3982
- function cap(value, max) {
4331
+ function cap2(value, max) {
3983
4332
  return Math.min(value, max);
3984
4333
  }
3985
4334
  function scoreSchemaMarkup(html) {
@@ -4005,10 +4354,10 @@ function scoreSchemaMarkup(html) {
4005
4354
  for (const t of types) {
4006
4355
  if (knownTypes.includes(t)) knownCount++;
4007
4356
  }
4008
- score += cap(knownCount * 2, 4);
4357
+ score += cap2(knownCount * 2, 4);
4009
4358
  if (types.has("Organization") || types.has("LocalBusiness")) score += 2;
4010
4359
  if (types.has("FAQPage")) score += 1;
4011
- return cap(score, 10);
4360
+ return cap2(score, 10);
4012
4361
  }
4013
4362
  function scoreQAFormat(html) {
4014
4363
  const questions = extractQuestionHeadings2(html);
@@ -4020,7 +4369,7 @@ function scoreQAFormat(html) {
4020
4369
  if (answered >= 1) score += 3;
4021
4370
  const h1Matches = html.match(/<h1[\s>]/gi) || [];
4022
4371
  if (h1Matches.length === 1) score += 2;
4023
- return cap(score, 10);
4372
+ return cap2(score, 10);
4024
4373
  }
4025
4374
  function scoreCleanHtml(html) {
4026
4375
  let score = 0;
@@ -4029,15 +4378,15 @@ function scoreCleanHtml(html) {
4029
4378
  for (const tag of semantics) {
4030
4379
  if (html.toLowerCase().includes(tag)) semCount++;
4031
4380
  }
4032
- score += cap(semCount, 3);
4381
+ score += cap2(semCount, 3);
4033
4382
  const h1Matches = html.match(/<h1[\s>]/gi) || [];
4034
4383
  if (h1Matches.length === 1) score += 2;
4035
- const text = getTextContent(html);
4384
+ const text = getTextContent2(html);
4036
4385
  if (text.length > 500) score += 3;
4037
4386
  const hasTitle = /<title[^>]*>[^<]+<\/title>/i.test(html);
4038
4387
  const hasDesc = /<meta\s[^>]*name=["']description["'][^>]*content=["'][^"']+["']/i.test(html) || /<meta\s[^>]*content=["'][^"']+["'][^>]*name=["']description["']/i.test(html);
4039
4388
  if (hasTitle && hasDesc) score += 2;
4040
- return cap(score, 10);
4389
+ return cap2(score, 10);
4041
4390
  }
4042
4391
  function scoreFaqSection(html) {
4043
4392
  let score = 0;
@@ -4049,11 +4398,11 @@ function scoreFaqSection(html) {
4049
4398
  const questions = extractQuestionHeadings2(html);
4050
4399
  if (questions.length >= 10) score += 1;
4051
4400
  if (/<details[\s>]/i.test(html) || /accordion|collapsible|toggle/i.test(lowerHtml)) score += 1;
4052
- return cap(score, 10);
4401
+ return cap2(score, 10);
4053
4402
  }
4054
4403
  function scoreOriginalData(html) {
4055
4404
  let score = 0;
4056
- const text = getTextContent(html);
4405
+ const text = getTextContent2(html);
4057
4406
  if (/\b(our (study|analysis|research|survey|data|findings))\b/i.test(text)) {
4058
4407
  score += 3;
4059
4408
  } else if (/\d+(\.\d+)?%|\$[\d,.]+|\b\d{1,3}(,\d{3})+\b/.test(text)) {
@@ -4070,7 +4419,7 @@ function scoreOriginalData(html) {
4070
4419
  if (/href=["'][^"']*\/blog\b/i.test(html)) {
4071
4420
  score += 2;
4072
4421
  }
4073
- return cap(score, 10);
4422
+ return cap2(score, 10);
4074
4423
  }
4075
4424
  function scoreQueryAnswerAlignment(html) {
4076
4425
  const { total, answered } = countAnsweredQuestions(html);
@@ -4093,7 +4442,7 @@ function scoreContentFreshness(html) {
4093
4442
  const currentYear = (/* @__PURE__ */ new Date()).getFullYear();
4094
4443
  const yearPattern = new RegExp(`\\b(${currentYear}|${currentYear - 1})\\b`);
4095
4444
  if (yearPattern.test(html)) score += 2;
4096
- return cap(score, 10);
4445
+ return cap2(score, 10);
4097
4446
  }
4098
4447
  function scoreTableListExtractability(html) {
4099
4448
  let score = 0;
@@ -4106,7 +4455,7 @@ function scoreTableListExtractability(html) {
4106
4455
  const listItems = html.match(/<li[\s>]/gi) || [];
4107
4456
  if (listItems.length >= 10) score += 1;
4108
4457
  if (/<dl[\s>]/i.test(html)) score += 1;
4109
- return cap(score, 10);
4458
+ return cap2(score, 10);
4110
4459
  }
4111
4460
  function scoreDirectAnswerDensity(html) {
4112
4461
  let score = 0;
@@ -4122,9 +4471,9 @@ function scoreDirectAnswerDensity(html) {
4122
4471
  }
4123
4472
  if (snippetCount >= 3) score += 2;
4124
4473
  else if (snippetCount >= 1) score += 1;
4125
- const directOpeners = getTextContent(html).match(/\b(yes|no|in short|the answer is|simply put|in summary)\b/gi) || [];
4474
+ const directOpeners = getTextContent2(html).match(/\b(yes|no|in short|the answer is|simply put|in summary)\b/gi) || [];
4126
4475
  if (directOpeners.length >= 2) score += 2;
4127
- return cap(score, 10);
4476
+ return cap2(score, 10);
4128
4477
  }
4129
4478
  function scoreSemanticHtml(html) {
4130
4479
  let score = 0;
@@ -4134,7 +4483,7 @@ function scoreSemanticHtml(html) {
4134
4483
  for (const el of elements) {
4135
4484
  if (lowerHtml.includes(el)) count++;
4136
4485
  }
4137
- score += cap(Math.floor(count * 0.7), 4);
4486
+ score += cap2(Math.floor(count * 0.7), 4);
4138
4487
  const imgTags = html.match(/<img\s[^>]*>/gi) || [];
4139
4488
  if (imgTags.length > 0) {
4140
4489
  let withAlt = 0;
@@ -4145,11 +4494,11 @@ function scoreSemanticHtml(html) {
4145
4494
  }
4146
4495
  if (/<html[^>]*\slang=["'][^"']+["']/i.test(html)) score += 2;
4147
4496
  if (/\baria-/i.test(html)) score += 2;
4148
- return cap(score, 10);
4497
+ return cap2(score, 10);
4149
4498
  }
4150
4499
  function scoreFactDensity(html) {
4151
4500
  let score = 0;
4152
- const text = getTextContent(html);
4501
+ const text = getTextContent2(html);
4153
4502
  const numericPatterns = text.match(/\d+(\.\d+)?%|\$[\d,.]+|\b\d{1,3}(,\d{3})+\b|\b\d+\s*(million|billion|thousand|users|customers|employees)\b/gi) || [];
4154
4503
  if (numericPatterns.length >= 6) score += 5;
4155
4504
  else if (numericPatterns.length >= 3) score += 3;
@@ -4162,11 +4511,11 @@ function scoreFactDensity(html) {
4162
4511
  if (/\baccording to\b|\bsource:\s|\bcited\b|\breported by\b/i.test(text)) score += 2;
4163
4512
  const units = text.match(/\b\d+\s*(kg|lb|miles|km|hours|minutes|days|months|years|GB|MB|TB)\b/gi) || [];
4164
4513
  if (units.length >= 2) score += 1;
4165
- return cap(score, 10);
4514
+ return cap2(score, 10);
4166
4515
  }
4167
4516
  function scoreDefinitionPatterns(html) {
4168
4517
  let score = 0;
4169
- const text = getTextContent(html);
4518
+ const text = getTextContent2(html);
4170
4519
  const defPatterns = text.match(/\b(is a|is an|refers to|defined as|means that|also known as|abbreviated as)\b/gi) || [];
4171
4520
  if (defPatterns.length >= 3) score += 5;
4172
4521
  else if (defPatterns.length >= 1) score += 3;
@@ -4174,7 +4523,7 @@ function scoreDefinitionPatterns(html) {
4174
4523
  if (/\b(is a|is an|refers to|defined as)\b/i.test(early)) score += 2;
4175
4524
  if (/<dfn[\s>]/i.test(html) || /<abbr[\s>]/i.test(html)) score += 1;
4176
4525
  if (/<dl[\s>]/i.test(html) || /glossary/i.test(html)) score += 2;
4177
- return cap(score, 10);
4526
+ return cap2(score, 10);
4178
4527
  }
4179
4528
  function scoreCanonicalUrl(html, url) {
4180
4529
  let score = 0;
@@ -4195,7 +4544,7 @@ function scoreCanonicalUrl(html, url) {
4195
4544
  if (canonicalHref.startsWith("https://")) score += 2;
4196
4545
  const allCanonicals = html.match(/<link[^>]*rel=["']canonical["'][^>]*>/gi) || [];
4197
4546
  if (allCanonicals.length === 1) score += 1;
4198
- return cap(score, 10);
4547
+ return cap2(score, 10);
4199
4548
  }
4200
4549
  function scoreVisibleDateSignal(html) {
4201
4550
  let score = 0;
@@ -4214,11 +4563,11 @@ function scoreVisibleDateSignal(html) {
4214
4563
  } catch {
4215
4564
  }
4216
4565
  }
4217
- return cap(score, 10);
4566
+ return cap2(score, 10);
4218
4567
  }
4219
4568
  function scoreCitationReadyWriting(html) {
4220
4569
  let score = 0;
4221
- const text = getTextContent(html);
4570
+ const text = getTextContent2(html);
4222
4571
  const defSentences = text.match(/\b\w+\s+(is\s+(?:a|an)\s|refers\s+to|defined\s+as)\b/gi) || [];
4223
4572
  if (defSentences.length >= 3) score += 3;
4224
4573
  else if (defSentences.length >= 1) score += 1;
@@ -4247,7 +4596,7 @@ function scoreCitationReadyWriting(html) {
4247
4596
  );
4248
4597
  if (quotableLines.length >= 2) score += 2;
4249
4598
  else if (quotableLines.length >= 1) score += 1;
4250
- return cap(score, 10);
4599
+ return cap2(score, 10);
4251
4600
  }
4252
4601
  function scoreAnswerFirstPlacement(html) {
4253
4602
  let score = 0;
@@ -4258,8 +4607,8 @@ function scoreAnswerFirstPlacement(html) {
4258
4607
  const first300Words = bodyText.split(/\s+/).slice(0, 300).join(" ");
4259
4608
  for (const p of earlyParagraphs) {
4260
4609
  const pText = p.replace(/<[^>]*>/g, "").trim();
4261
- const wordCount = pText.split(/\s+/).length;
4262
- if (wordCount >= 40 && wordCount <= 80 && first300Words.includes(pText.slice(0, 50))) {
4610
+ const wordCount2 = pText.split(/\s+/).length;
4611
+ if (wordCount2 >= 40 && wordCount2 <= 80 && first300Words.includes(pText.slice(0, 50))) {
4263
4612
  score += 4;
4264
4613
  break;
4265
4614
  }
@@ -4280,11 +4629,11 @@ function scoreAnswerFirstPlacement(html) {
4280
4629
  score += 3;
4281
4630
  }
4282
4631
  }
4283
- return cap(score, 10);
4632
+ return cap2(score, 10);
4284
4633
  }
4285
4634
  function scoreEvidencePackaging(html) {
4286
4635
  let score = 0;
4287
- const text = getTextContent(html);
4636
+ const text = getTextContent2(html);
4288
4637
  const paragraphs = html.match(/<p[^>]*>[\s\S]*?<\/p>/gi) || [];
4289
4638
  let inlineCitations = 0;
4290
4639
  for (const p of paragraphs) {
@@ -4302,11 +4651,11 @@ function scoreEvidencePackaging(html) {
4302
4651
  const sourcedStats = text.match(/\d+(\.\d+)?(%|\s*(million|billion|thousand|percent))\b[^.]*\b[A-Z][a-z]+\b/gi) || [];
4303
4652
  if (sourcedStats.length >= 2) score += 2;
4304
4653
  else if (sourcedStats.length >= 1) score += 1;
4305
- return cap(score, 10);
4654
+ return cap2(score, 10);
4306
4655
  }
4307
4656
  function scoreEntityDisambiguation(html) {
4308
4657
  let score = 0;
4309
- const text = getTextContent(html);
4658
+ const text = getTextContent2(html);
4310
4659
  const h1Match = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
4311
4660
  if (!h1Match) return 3;
4312
4661
  const h1Text = h1Match[1].replace(/<[^>]*>/g, "").trim();
@@ -4324,11 +4673,11 @@ function scoreEntityDisambiguation(html) {
4324
4673
  if (/\bunlike\s+\w/i.test(text) || /\bcompared\s+to\s+\w/i.test(text) || /\bnot\s+to\s+be\s+confused\s+with\b/i.test(text)) {
4325
4674
  score += 3;
4326
4675
  }
4327
- return cap(score, 10);
4676
+ return cap2(score, 10);
4328
4677
  }
4329
4678
  function scoreExtractionFriction(html) {
4330
4679
  let score = 0;
4331
- const text = getTextContent(html);
4680
+ const text = getTextContent2(html);
4332
4681
  const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 5);
4333
4682
  const avgLen = sentences.length > 0 ? sentences.map((s) => s.trim().split(/\s+/).length).reduce((a, b) => a + b, 0) / sentences.length : 0;
4334
4683
  if (avgLen > 0 && avgLen < 20) score += 3;
@@ -4351,7 +4700,7 @@ function scoreExtractionFriction(html) {
4351
4700
  if (/display\s*:\s*none|visibility\s*:\s*hidden/i.test(html) && /<(div|section|p)[^>]*(?:style=["'][^"']*display\s*:\s*none|hidden)[^>]*>/i.test(html)) {
4352
4701
  score = Math.max(0, score - 2);
4353
4702
  }
4354
- return cap(score, 10);
4703
+ return cap2(score, 10);
4355
4704
  }
4356
4705
  function scoreImageContextAI(html) {
4357
4706
  let score = 0;
@@ -4376,7 +4725,7 @@ function scoreImageContextAI(html) {
4376
4725
  else if (goodAltCount > 0) score += 1;
4377
4726
  const contextualImages = html.match(/<(article|section)[^>]*>[\s\S]*?<img[^>]*>[\s\S]*?<\/\1>/gi) || [];
4378
4727
  if (contextualImages.length > 0) score += 3;
4379
- return cap(score, 10);
4728
+ return cap2(score, 10);
4380
4729
  }
4381
4730
  function scoreDuplicateContent(html) {
4382
4731
  return scoreDuplicateContentDetailed(html).score;
@@ -4438,8 +4787,12 @@ var SCORING_FUNCTIONS = {
4438
4787
  citation_ready_writing: scoreCitationReadyWriting,
4439
4788
  answer_first_placement: scoreAnswerFirstPlacement,
4440
4789
  evidence_packaging: scoreEvidencePackaging,
4790
+ helpful_purpose_alignment: scoreHelpfulPurposeAlignment,
4791
+ first_hand_experience_signals: scoreFirstHandExperienceSignals,
4441
4792
  entity_disambiguation: scoreEntityDisambiguation,
4442
4793
  extraction_friction: scoreExtractionFriction,
4794
+ creator_transparency: scoreCreatorTransparency,
4795
+ methodology_transparency: scoreMethodologyTransparency,
4443
4796
  image_context_ai: scoreImageContextAI,
4444
4797
  duplicate_content: scoreDuplicateContent
4445
4798
  };
@@ -4484,7 +4837,7 @@ function extractTitle(html) {
4484
4837
  const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
4485
4838
  return match ? match[1].replace(/\s+/g, " ").trim() : "";
4486
4839
  }
4487
- function getTextContent2(html) {
4840
+ function getTextContent3(html) {
4488
4841
  return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
4489
4842
  }
4490
4843
  function countWords2(text) {
@@ -4544,9 +4897,9 @@ function checkMissingOgTags(html) {
4544
4897
  }
4545
4898
  return null;
4546
4899
  }
4547
- function checkThinContent(wordCount) {
4548
- if (wordCount < 300) {
4549
- return { check: "thin-content", label: `Thin content (${wordCount} words)`, severity: "warning" };
4900
+ function checkThinContent(wordCount2) {
4901
+ if (wordCount2 < 300) {
4902
+ return { check: "thin-content", label: `Thin content (${wordCount2} words)`, severity: "warning" };
4550
4903
  }
4551
4904
  return null;
4552
4905
  }
@@ -4643,15 +4996,15 @@ function checkNoAnswerBlock(html) {
4643
4996
  const first300Words = bodyText.split(/\s+/).slice(0, 300).join(" ");
4644
4997
  for (const p of earlyParagraphs) {
4645
4998
  const pText = p.replace(/<[^>]*>/g, "").trim();
4646
- const wordCount = pText.split(/\s+/).length;
4647
- if (wordCount >= 40 && wordCount <= 80 && first300Words.includes(pText.slice(0, 50))) {
4999
+ const wordCount2 = pText.split(/\s+/).length;
5000
+ if (wordCount2 >= 40 && wordCount2 <= 80 && first300Words.includes(pText.slice(0, 50))) {
4648
5001
  return null;
4649
5002
  }
4650
5003
  }
4651
5004
  return { check: "no-answer-block", label: "No short answer block (40-80 words) in first 300 words", severity: "warning" };
4652
5005
  }
4653
5006
  function checkNoEvidence(html, url) {
4654
- const text = getTextContent2(html);
5007
+ const text = getTextContent3(html);
4655
5008
  const paragraphs = html.match(/<p[^>]*>[\s\S]*?<\/p>/gi) || [];
4656
5009
  let inlineCitations = 0;
4657
5010
  for (const p of paragraphs) {
@@ -4665,7 +5018,7 @@ function checkNoEvidence(html, url) {
4665
5018
  return null;
4666
5019
  }
4667
5020
  function checkHasCitationReadyContent(html) {
4668
- const text = getTextContent2(html);
5021
+ const text = getTextContent3(html);
4669
5022
  const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 10);
4670
5023
  let quotable = 0;
4671
5024
  for (const s of sentences) {
@@ -4690,8 +5043,8 @@ function checkDuplicateContentBlocks(html) {
4690
5043
  }
4691
5044
  function analyzePage(html, url, category) {
4692
5045
  const title = extractTitle(html);
4693
- const textContent = getTextContent2(html);
4694
- const wordCount = countWords2(textContent);
5046
+ const textContent = getTextContent3(html);
5047
+ const wordCount2 = countWords2(textContent);
4695
5048
  const issues = [];
4696
5049
  const strengths = [];
4697
5050
  const issueChecks = [
@@ -4702,7 +5055,7 @@ function analyzePage(html, url, category) {
4702
5055
  checkNoSchema(html),
4703
5056
  checkMissingCanonical(html),
4704
5057
  checkMissingOgTags(html),
4705
- checkThinContent(wordCount),
5058
+ checkThinContent(wordCount2),
4706
5059
  checkImagesMissingAlt(html),
4707
5060
  checkNoInternalLinks(html, url),
4708
5061
  checkNoAnswerBlock(html),
@@ -4721,7 +5074,7 @@ function analyzePage(html, url, category) {
4721
5074
  if (result) strengths.push(result);
4722
5075
  }
4723
5076
  const { aeoScore, criterionScores } = scorePage(html, url);
4724
- return { url, title, category, wordCount, issues, strengths, aeoScore, criterionScores };
5077
+ return { url, title, category, wordCount: wordCount2, issues, strengths, aeoScore, criterionScores };
4725
5078
  }
4726
5079
  function analyzeAllPages(siteData) {
4727
5080
  const reviews = [];
@@ -4760,7 +5113,7 @@ function extractTitle2(html) {
4760
5113
  const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
4761
5114
  return match ? match[1].replace(/\s+/g, " ").trim() : "";
4762
5115
  }
4763
- function getTextContent3(html) {
5116
+ function getTextContent4(html) {
4764
5117
  return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
4765
5118
  }
4766
5119
  function countWords3(text) {
@@ -4910,12 +5263,12 @@ function buildLinkGraph(pages, domain, homepageUrl) {
4910
5263
  const norm = normalizeUrl(url);
4911
5264
  if (nodes.has(norm)) continue;
4912
5265
  const title = extractTitle2(page.text);
4913
- const text = getTextContent3(page.text);
4914
- const wordCount = countWords3(text);
5266
+ const text = getTextContent4(page.text);
5267
+ const wordCount2 = countWords3(text);
4915
5268
  nodes.set(norm, {
4916
5269
  url: norm,
4917
5270
  title,
4918
- wordCount,
5271
+ wordCount: wordCount2,
4919
5272
  category: page.category || "content",
4920
5273
  inDegree: 0,
4921
5274
  outDegree: 0,
@@ -4983,6 +5336,8 @@ var CRITERION_WEIGHTS2 = {
4983
5336
  qa_content_format: 0.04,
4984
5337
  query_answer_alignment: 0.04,
4985
5338
  faq_section: 0.03,
5339
+ helpful_purpose_alignment: 0.03,
5340
+ first_hand_experience_signals: 0.03,
4986
5341
  // Content Organization (~30%)
4987
5342
  entity_consistency: 0.05,
4988
5343
  internal_linking: 0.04,
@@ -4990,30 +5345,32 @@ var CRITERION_WEIGHTS2 = {
4990
5345
  schema_markup: 0.03,
4991
5346
  author_schema_depth: 0.03,
4992
5347
  table_list_extractability: 0.03,
4993
- definition_patterns: 0.02,
4994
- visible_date_signal: 0.02,
5348
+ creator_transparency: 0.02,
5349
+ methodology_transparency: 0.02,
5350
+ definition_patterns: 0.015,
5351
+ visible_date_signal: 0.015,
4995
5352
  semantic_html: 0.02,
4996
5353
  clean_html: 0.02,
4997
5354
  // Technical Plumbing (~15%)
4998
5355
  content_cannibalization: 0.02,
4999
5356
  duplicate_content: 0.05,
5000
5357
  cross_page_duplication: 0.03,
5001
- llms_txt: 0.02,
5002
- robots_txt: 0.02,
5358
+ llms_txt: 0.01,
5359
+ robots_txt: 0.01,
5003
5360
  content_velocity: 0.02,
5004
- content_licensing: 0.02,
5361
+ content_licensing: 0.01,
5005
5362
  sitemap_completeness: 0.01,
5006
- canonical_url: 0.01,
5007
- rss_feed: 0.01,
5008
- schema_coverage: 0.01,
5009
- speakable_schema: 0.01,
5363
+ canonical_url: 5e-3,
5364
+ rss_feed: 5e-3,
5365
+ schema_coverage: 5e-3,
5366
+ speakable_schema: 5e-3,
5010
5367
  // V2 Criteria (~15%)
5011
5368
  citation_ready_writing: 0.04,
5012
5369
  answer_first_placement: 0.03,
5013
5370
  evidence_packaging: 0.03,
5014
5371
  entity_disambiguation: 0.02,
5015
5372
  extraction_friction: 0.02,
5016
- image_context_ai: 0.01
5373
+ image_context_ai: 5e-3
5017
5374
  };
5018
5375
  var PHASE_CONFIG = [
5019
5376
  {
@@ -5043,6 +5400,8 @@ var PHASE_CONFIG = [
5043
5400
  "answer_first_placement",
5044
5401
  "evidence_packaging",
5045
5402
  "entity_disambiguation",
5403
+ "helpful_purpose_alignment",
5404
+ "first_hand_experience_signals",
5046
5405
  "duplicate_content",
5047
5406
  "cross_page_duplication"
5048
5407
  ]
@@ -5056,6 +5415,8 @@ var PHASE_CONFIG = [
5056
5415
  "schema_coverage",
5057
5416
  "speakable_schema",
5058
5417
  "author_schema_depth",
5418
+ "creator_transparency",
5419
+ "methodology_transparency",
5059
5420
  "content_licensing",
5060
5421
  "entity_consistency",
5061
5422
  "semantic_html",
@@ -5078,7 +5439,7 @@ function impactFromScore(score) {
5078
5439
  }
5079
5440
  function effortForCriterion(criterion, score) {
5080
5441
  const trivialCriteria = ["llms_txt", "robots_txt", "canonical_url", "content_licensing", "visible_date_signal"];
5081
- const lowCriteria = ["rss_feed", "sitemap_completeness", "speakable_schema", "author_schema_depth", "semantic_html", "definition_patterns", "content_freshness"];
5442
+ const lowCriteria = ["rss_feed", "sitemap_completeness", "speakable_schema", "author_schema_depth", "creator_transparency", "methodology_transparency", "semantic_html", "definition_patterns", "content_freshness"];
5082
5443
  const highCriteria = ["original_data", "content_velocity", "content_cannibalization"];
5083
5444
  if (trivialCriteria.includes(criterion)) return score <= 3 ? "low" : "trivial";
5084
5445
  if (lowCriteria.includes(criterion)) return score <= 3 ? "medium" : "low";
@@ -5345,6 +5706,58 @@ Sitemap: https://example.com/sitemap.xml`,
5345
5706
  pageCount: affected?.length
5346
5707
  }];
5347
5708
  },
5709
+ helpful_purpose_alignment: (c, pages) => {
5710
+ if (c.score >= 10) return [];
5711
+ const impact = impactFromScore(c.score);
5712
+ const effort = effortForCriterion("helpful_purpose_alignment", c.score);
5713
+ const affected = getAffectedPages("helpful_purpose_alignment", pages);
5714
+ return [{
5715
+ id: "fix-helpful-purpose-alignment",
5716
+ criterion: c.criterion_label,
5717
+ criterionId: c.criterion,
5718
+ title: "Make pages solve the user task faster",
5719
+ description: "Reduce search-first filler and rewrite pages so the promised task is resolved quickly with concrete guidance, tradeoffs, and next steps.",
5720
+ impact,
5721
+ effort,
5722
+ impactScore: 0,
5723
+ category: "content",
5724
+ steps: [
5725
+ "Rewrite first paragraphs to answer the user need within the first 150-300 words",
5726
+ 'Remove generic intros like "In this guide" and broad filler that could fit any topic',
5727
+ "Add concrete decision help: tradeoffs, risks, constraints, and next steps",
5728
+ "Move aggressive CTAs below the first useful answer block"
5729
+ ],
5730
+ successCriteria: "Pages lead with task-solving guidance instead of generic search-first framing",
5731
+ affectedPages: affected,
5732
+ pageCount: affected?.length
5733
+ }];
5734
+ },
5735
+ first_hand_experience_signals: (c, pages) => {
5736
+ if (c.score >= 10) return [];
5737
+ const impact = impactFromScore(c.score);
5738
+ const effort = effortForCriterion("first_hand_experience_signals", c.score);
5739
+ const affected = getAffectedPages("first_hand_experience_signals", pages);
5740
+ return [{
5741
+ id: "fix-first-hand-experience",
5742
+ criterion: c.criterion_label,
5743
+ criterionId: c.criterion,
5744
+ title: "Add first-hand experience signals",
5745
+ description: "Show real use, testing, implementation, or lived experience instead of relying on generic summary content.",
5746
+ impact,
5747
+ effort,
5748
+ impactScore: 0,
5749
+ category: "content",
5750
+ steps: [
5751
+ "Add specific observations from real use, testing, or implementation",
5752
+ "Document limitations, edge cases, or lessons learned in practice",
5753
+ "Include screenshots, photos, before/after metrics, or original artifacts where relevant",
5754
+ "Rewrite generic sections to reflect direct experience with the subject matter"
5755
+ ],
5756
+ successCriteria: "Key pages contain credible signs of direct use or observation, not just generic advice",
5757
+ affectedPages: affected,
5758
+ pageCount: affected?.length
5759
+ }];
5760
+ },
5348
5761
  original_data: (c, pages) => {
5349
5762
  if (c.score >= 10) return [];
5350
5763
  const impact = impactFromScore(c.score);
@@ -5711,6 +6124,58 @@ Summarization: yes`,
5711
6124
  successCriteria: "Articles have Person schema for authors with credentials"
5712
6125
  }];
5713
6126
  },
6127
+ creator_transparency: (c, pages) => {
6128
+ if (c.score >= 10) return [];
6129
+ const impact = impactFromScore(c.score);
6130
+ const effort = effortForCriterion("creator_transparency", c.score);
6131
+ const affected = getAffectedPages("creator_transparency", pages);
6132
+ return [{
6133
+ id: "fix-creator-transparency",
6134
+ criterion: c.criterion_label,
6135
+ criterionId: c.criterion,
6136
+ title: "Make content creators clearly visible",
6137
+ description: "Add visible bylines, author pages, and reviewer/editor attribution so readers can clearly tell who created the content.",
6138
+ impact,
6139
+ effort,
6140
+ impactScore: 0,
6141
+ category: "trust",
6142
+ steps: [
6143
+ "Add visible bylines to article-like pages where readers expect them",
6144
+ "Link author names to author pages with role, expertise area, and relevant background",
6145
+ "Add reviewer or editor attribution on sensitive or expert content",
6146
+ "Keep visible creator identity consistent with schema markup"
6147
+ ],
6148
+ successCriteria: "Article-like pages have clear visible bylines and linked creator context",
6149
+ affectedPages: affected,
6150
+ pageCount: affected?.length
6151
+ }];
6152
+ },
6153
+ methodology_transparency: (c, pages) => {
6154
+ if (c.score >= 10) return [];
6155
+ const impact = impactFromScore(c.score);
6156
+ const effort = effortForCriterion("methodology_transparency", c.score);
6157
+ const affected = getAffectedPages("methodology_transparency", pages);
6158
+ return [{
6159
+ id: "fix-methodology-transparency",
6160
+ criterion: c.criterion_label,
6161
+ criterionId: c.criterion,
6162
+ title: "Explain how content was tested or reviewed",
6163
+ description: "Add methodology, criteria, testing, review, or update-process details where users would expect them.",
6164
+ impact,
6165
+ effort,
6166
+ impactScore: 0,
6167
+ category: "trust",
6168
+ steps: [
6169
+ 'Add a "How we tested", "Methodology", or review-process section where relevant',
6170
+ "Document criteria, tools used, sample size, timeframe, or update policy",
6171
+ "Disclose AI assistance when a reasonable reader would expect that context",
6172
+ "Support methodology notes with screenshots, tables, or process artifacts when possible"
6173
+ ],
6174
+ successCriteria: "Review, comparison, and research-style pages explain how conclusions were produced",
6175
+ affectedPages: affected,
6176
+ pageCount: affected?.length
6177
+ }];
6178
+ },
5714
6179
  fact_density: (c, pages) => {
5715
6180
  if (c.score >= 10) return [];
5716
6181
  const impact = impactFromScore(c.score);
@@ -6356,6 +6821,13 @@ function isSpaShell(html) {
6356
6821
  return SPA_INDICATORS.some((pattern) => pattern.test(html));
6357
6822
  }
6358
6823
  async function fetchWithHeadless(url, options) {
6824
+ let expectedDomain;
6825
+ try {
6826
+ expectedDomain = normalizeHostname(new URL(url).hostname);
6827
+ } catch {
6828
+ return null;
6829
+ }
6830
+ if (!await isSafeFetchTarget(url, expectedDomain)) return null;
6359
6831
  let puppeteer;
6360
6832
  try {
6361
6833
  const mod = "puppeteer";
@@ -6382,12 +6854,28 @@ async function fetchWithHeadless(url, options) {
6382
6854
  const page = await browser.newPage();
6383
6855
  await page.setRequestInterception(true);
6384
6856
  page.on("request", (req) => {
6385
- const type = req.resourceType();
6386
- if (["image", "font", "media", "stylesheet"].includes(type)) {
6387
- req.abort();
6388
- } else {
6389
- req.continue();
6390
- }
6857
+ void (async () => {
6858
+ const alreadyHandled = typeof req.isInterceptResolutionHandled === "function" ? req.isInterceptResolutionHandled() : false;
6859
+ if (alreadyHandled) return;
6860
+ if (!await isSafeFetchTarget(req.url(), expectedDomain)) {
6861
+ try {
6862
+ if (!req.isInterceptResolutionHandled?.()) await req.abort();
6863
+ } catch {
6864
+ }
6865
+ return;
6866
+ }
6867
+ const type = req.resourceType();
6868
+ try {
6869
+ if (!req.isInterceptResolutionHandled?.()) {
6870
+ if (["image", "font", "media", "stylesheet"].includes(type)) {
6871
+ await req.abort();
6872
+ } else {
6873
+ await req.continue();
6874
+ }
6875
+ }
6876
+ } catch {
6877
+ }
6878
+ })();
6391
6879
  });
6392
6880
  await page.setUserAgent("AEO-Visibility-Bot/1.0");
6393
6881
  await page.goto(url, { waitUntil: "networkidle2", timeout });
@@ -6400,6 +6888,7 @@ async function fetchWithHeadless(url, options) {
6400
6888
  }
6401
6889
  const html = await page.content();
6402
6890
  const finalUrl = page.url();
6891
+ if (!await isSafeFetchTarget(finalUrl, expectedDomain)) return null;
6403
6892
  return {
6404
6893
  text: html.slice(0, 5e5),
6405
6894
  status: 200,
@@ -6422,6 +6911,10 @@ function getTextLength(html) {
6422
6911
  return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim().length;
6423
6912
  }
6424
6913
  async function audit(domain, options) {
6914
+ const normalizedTarget = domain.startsWith("http") ? domain : `https://${domain}`;
6915
+ if (!await isSafeFetchTarget(normalizedTarget)) {
6916
+ throw new Error(`Refusing to audit private or local address: ${domain}`);
6917
+ }
6425
6918
  const startTime = Date.now();
6426
6919
  let renderedWithHeadless = false;
6427
6920
  const siteData = await prefetchSiteData(domain);
@@ -6454,7 +6947,7 @@ async function audit(domain, options) {
6454
6947
  }
6455
6948
  }
6456
6949
  if (options?.fullCrawl) {
6457
- const { crawlFullSite: crawlFullSite2 } = await import("./full-site-crawler-TQ35TB2X.js");
6950
+ const { crawlFullSite: crawlFullSite2 } = await import("./full-site-crawler-HAF2X2X3.js");
6458
6951
  const crawlResult = await crawlFullSite2(siteData, {
6459
6952
  maxPages: options.maxPages ?? 200,
6460
6953
  concurrency: options.concurrency ?? 5