aeorank 3.1.1 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -20,6 +20,182 @@ var __copyProps = (to, from, except, desc) => {
20
20
  };
21
21
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
22
22
 
23
+ // src/network-guard.ts
24
+ function stripIpv6Brackets(hostname) {
25
+ return hostname.replace(/^\[/, "").replace(/\]$/, "");
26
+ }
27
+ function normalizeHostname(hostname) {
28
+ return stripIpv6Brackets(hostname).replace(/^www\./, "").trim().toLowerCase();
29
+ }
30
+ function parseIpv4(hostname) {
31
+ if (!/^\d{1,3}(?:\.\d{1,3}){3}$/.test(hostname)) return null;
32
+ const parts = hostname.split(".").map(Number);
33
+ return parts.every((part) => Number.isInteger(part) && part >= 0 && part <= 255) ? parts : null;
34
+ }
35
+ function isPrivateOrReservedIpv4(parts) {
36
+ const [a, b, c] = parts;
37
+ if (a === 0 || a === 10 || a === 127) return true;
38
+ if (a === 100 && b >= 64 && b <= 127) return true;
39
+ if (a === 169 && b === 254) return true;
40
+ if (a === 172 && b >= 16 && b <= 31) return true;
41
+ if (a === 192 && b === 0) return true;
42
+ if (a === 192 && b === 88 && c === 99) return true;
43
+ if (a === 192 && b === 168) return true;
44
+ if (a === 198 && (b === 18 || b === 19)) return true;
45
+ if (a === 198 && b === 51 && c === 100) return true;
46
+ if (a === 203 && b === 0 && c === 113) return true;
47
+ if (a >= 224) return true;
48
+ return false;
49
+ }
50
+ function isBlockedIpv6(hostname) {
51
+ const host = stripIpv6Brackets(hostname).toLowerCase();
52
+ if (!host.includes(":")) return false;
53
+ if (host === "::" || host === "::1") return true;
54
+ if (host.startsWith("fc") || host.startsWith("fd")) return true;
55
+ if (/^fe[89ab]/.test(host)) return true;
56
+ if (host.startsWith("2001:db8")) return true;
57
+ const mappedIpv4 = host.match(/^::ffff:(\d{1,3}(?:\.\d{1,3}){3})$/);
58
+ if (mappedIpv4) {
59
+ const parts = parseIpv4(mappedIpv4[1]);
60
+ return parts ? isPrivateOrReservedIpv4(parts) : true;
61
+ }
62
+ return false;
63
+ }
64
+ function isBlockedHostname(hostname) {
65
+ const host = normalizeHostname(hostname);
66
+ if (!host) return true;
67
+ if (host === "localhost" || host === "metadata.google.internal") return true;
68
+ if (LOCAL_HOST_SUFFIXES.some((suffix) => host.endsWith(suffix))) return true;
69
+ const ipv4 = parseIpv4(host);
70
+ if (ipv4) return isPrivateOrReservedIpv4(ipv4);
71
+ if (isBlockedIpv6(host)) return true;
72
+ return false;
73
+ }
74
+ function isSameSiteHost(hostname, domain) {
75
+ const host = normalizeHostname(hostname);
76
+ const base = normalizeHostname(domain);
77
+ if (!host || !base) return false;
78
+ return host === base || host.endsWith(`.${base}`);
79
+ }
80
+ function isSafePublicUrl(url, expectedDomain) {
81
+ try {
82
+ const parsed = new URL(url);
83
+ if (parsed.protocol !== "http:" && parsed.protocol !== "https:") return false;
84
+ if (isBlockedHostname(parsed.hostname)) return false;
85
+ if (expectedDomain && !isSameSiteHost(parsed.hostname, expectedDomain)) return false;
86
+ return true;
87
+ } catch {
88
+ return false;
89
+ }
90
+ }
91
+ function isNodeRuntime() {
92
+ return typeof process !== "undefined" && !!process.versions?.node;
93
+ }
94
+ function canUseManualRedirects() {
95
+ return isNodeRuntime();
96
+ }
97
+ function isRedirectStatus(status) {
98
+ return status === 301 || status === 302 || status === 303 || status === 307 || status === 308;
99
+ }
100
+ function isIpLiteral(hostname) {
101
+ const host = stripIpv6Brackets(hostname);
102
+ return !!parseIpv4(host) || host.includes(":");
103
+ }
104
+ function isBlockedResolvedAddress(address) {
105
+ const normalizedAddress = stripIpv6Brackets(address).toLowerCase();
106
+ const ipv4 = parseIpv4(normalizedAddress);
107
+ if (ipv4) return isPrivateOrReservedIpv4(ipv4);
108
+ return isBlockedIpv6(normalizedAddress);
109
+ }
110
+ async function loadDnsLookup() {
111
+ if (dnsLookupOverride) return dnsLookupOverride;
112
+ if (!isNodeRuntime()) return null;
113
+ try {
114
+ const mod = "node:dns/promises";
115
+ const dns = await import(
116
+ /* @vite-ignore */
117
+ mod
118
+ );
119
+ return async (hostname) => dns.lookup(hostname, { all: true, verbatim: true });
120
+ } catch {
121
+ return null;
122
+ }
123
+ }
124
+ async function isDnsResolvedHostSafe(hostname) {
125
+ const host = normalizeHostname(hostname);
126
+ if (!host || isIpLiteral(host)) return true;
127
+ const cached = dnsSafetyCache.get(host);
128
+ if (cached !== void 0) return cached;
129
+ const lookup = await loadDnsLookup();
130
+ if (!lookup) return true;
131
+ try {
132
+ const records = await lookup(host);
133
+ const safe = records.length > 0 && records.every((record) => !isBlockedResolvedAddress(record.address));
134
+ dnsSafetyCache.set(host, safe);
135
+ return safe;
136
+ } catch {
137
+ dnsSafetyCache.set(host, false);
138
+ return false;
139
+ }
140
+ }
141
+ async function isSafeFetchTarget(url, expectedDomain) {
142
+ if (!isSafePublicUrl(url, expectedDomain)) return false;
143
+ try {
144
+ const parsed = new URL(url);
145
+ return await isDnsResolvedHostSafe(parsed.hostname);
146
+ } catch {
147
+ return false;
148
+ }
149
+ }
150
+ async function safeFetch(url, options = {}) {
151
+ const timeoutMs = options.timeoutMs ?? 15e3;
152
+ const userAgent = options.userAgent ?? DEFAULT_USER_AGENT;
153
+ const maxRedirects = options.maxRedirects ?? 5;
154
+ const expectedDomain = options.expectedDomain ?? null;
155
+ const manualRedirects = canUseManualRedirects();
156
+ const redirectMode = manualRedirects ? "manual" : "error";
157
+ let currentUrl = url;
158
+ for (let redirects = 0; redirects <= maxRedirects; redirects++) {
159
+ if (!await isSafeFetchTarget(currentUrl, expectedDomain)) return null;
160
+ let res;
161
+ try {
162
+ res = await fetch(currentUrl, {
163
+ signal: AbortSignal.timeout(timeoutMs),
164
+ headers: { "User-Agent": userAgent },
165
+ redirect: redirectMode
166
+ });
167
+ } catch {
168
+ return null;
169
+ }
170
+ if (!manualRedirects) {
171
+ return await isSafeFetchTarget(res.url || currentUrl, expectedDomain) ? res : null;
172
+ }
173
+ if (isRedirectStatus(res.status)) {
174
+ if (redirects === maxRedirects) return null;
175
+ const location = res.headers.get("location");
176
+ if (!location) return null;
177
+ try {
178
+ currentUrl = new URL(location, currentUrl).toString();
179
+ } catch {
180
+ return null;
181
+ }
182
+ continue;
183
+ }
184
+ return await isSafeFetchTarget(res.url || currentUrl, expectedDomain) ? res : null;
185
+ }
186
+ return null;
187
+ }
188
+ var DEFAULT_USER_AGENT, LOCAL_HOST_SUFFIXES, dnsSafetyCache, dnsLookupOverride;
189
+ var init_network_guard = __esm({
190
+ "src/network-guard.ts"() {
191
+ "use strict";
192
+ DEFAULT_USER_AGENT = "AEO-Visibility-Bot/1.0";
193
+ LOCAL_HOST_SUFFIXES = [".localhost", ".local", ".localdomain", ".internal", ".home.arpa", ".test"];
194
+ dnsSafetyCache = /* @__PURE__ */ new Map();
195
+ dnsLookupOverride = null;
196
+ }
197
+ });
198
+
23
199
  // src/full-site-crawler.ts
24
200
  var full_site_crawler_exports = {};
25
201
  __export(full_site_crawler_exports, {
@@ -74,33 +250,17 @@ function isDisallowedByRobots(urlPath, rules) {
74
250
  if (longestAllow === 0 && longestDisallow === 0) return false;
75
251
  return longestDisallow > longestAllow;
76
252
  }
77
- async function fetchPage2(url, timeoutMs = 1e4) {
78
- try {
79
- const res = await fetch(url, {
80
- signal: AbortSignal.timeout(timeoutMs),
81
- headers: { "User-Agent": "AEO-Visibility-Bot/1.0" },
82
- redirect: "follow"
83
- });
84
- if (res.status !== 200) return null;
85
- const text = await res.text();
86
- if (text.length < 200) return null;
87
- return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
88
- } catch {
89
- return null;
90
- }
253
+ async function fetchPage2(url, domain, timeoutMs = 1e4) {
254
+ const res = await safeFetch(url, { timeoutMs, expectedDomain: domain });
255
+ if (!res || res.status !== 200) return null;
256
+ const text = await res.text();
257
+ if (text.length < 200) return null;
258
+ return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
91
259
  }
92
- async function fetchSitemapXml(url, timeoutMs = 1e4) {
93
- try {
94
- const res = await fetch(url, {
95
- signal: AbortSignal.timeout(timeoutMs),
96
- headers: { "User-Agent": "AEO-Visibility-Bot/1.0" },
97
- redirect: "follow"
98
- });
99
- if (res.status !== 200) return null;
100
- return await res.text();
101
- } catch {
102
- return null;
103
- }
260
+ async function fetchSitemapXml(url, domain, timeoutMs = 1e4) {
261
+ const res = await safeFetch(url, { timeoutMs, expectedDomain: domain });
262
+ if (!res || res.status !== 200) return null;
263
+ return await res.text();
104
264
  }
105
265
  async function extractAllUrlsFromSitemap(sitemapText, domain, timeoutMs = 1e4) {
106
266
  const cleanDomain = domain.replace(/^www\./, "").toLowerCase();
@@ -110,9 +270,9 @@ async function extractAllUrlsFromSitemap(sitemapText, domain, timeoutMs = 1e4) {
110
270
  const subUrls = [];
111
271
  for (const block of subSitemapLocs) {
112
272
  const locMatch = block.match(/<loc>([^<]+)<\/loc>/i);
113
- if (locMatch) subUrls.push(locMatch[1].trim());
273
+ if (locMatch && isSafePublicUrl(locMatch[1].trim(), cleanDomain)) subUrls.push(locMatch[1].trim());
114
274
  }
115
- const fetches = subUrls.slice(0, 10).map((u) => fetchSitemapXml(u, timeoutMs));
275
+ const fetches = subUrls.slice(0, 10).map((u) => fetchSitemapXml(u, cleanDomain, timeoutMs));
116
276
  const results = await Promise.all(fetches);
117
277
  for (const text of results) {
118
278
  if (text) {
@@ -258,7 +418,7 @@ async function crawlFullSite(siteData, options) {
258
418
  batch.push(url);
259
419
  }
260
420
  if (batch.length === 0) continue;
261
- const fetchResults = await Promise.all(batch.map((url) => fetchPage2(url, timeoutMs)));
421
+ const fetchResults = await Promise.all(batch.map((url) => fetchPage2(url, siteData.domain, timeoutMs)));
262
422
  const batchResults = [];
263
423
  for (let i = 0; i < fetchResults.length; i++) {
264
424
  const result = fetchResults[i];
@@ -312,6 +472,7 @@ var RESOURCE_EXTENSIONS, SKIP_PATH_PATTERNS, SKIP_URL_PATTERNS, CATEGORY_PATTERN
312
472
  var init_full_site_crawler = __esm({
313
473
  "src/full-site-crawler.ts"() {
314
474
  "use strict";
475
+ init_network_guard();
315
476
  RESOURCE_EXTENSIONS = /\.(js|css|png|jpg|jpeg|gif|svg|ico|pdf|xml|txt|woff|woff2|ttf|eot|mp4|mp3|webp|avif|zip|gz|tar|json)$/i;
316
477
  SKIP_PATH_PATTERNS = /^\/(api|wp-admin|wp-json|wp-content|wp-includes|static|assets|_next|auth|login|signup|cart|checkout|admin|feed|xmlrpc|tag|tags|author|authors|category|categories|attachment|embed|trackback|comments|search|print|amp)\b/i;
317
478
  SKIP_URL_PATTERNS = /\/page\/\d+\/?$|[?&](s|replytocom|p|preview|share|like|amp)=/i;
@@ -531,19 +692,182 @@ function shingleJaccardSimilarity(a, b) {
531
692
  return union === 0 ? 0 : intersection / union;
532
693
  }
533
694
 
695
+ // src/helpful-content.ts
696
+ function cap(value, max) {
697
+ return Math.min(max, value);
698
+ }
699
+ function floor(value, min) {
700
+ return Math.max(min, value);
701
+ }
702
+ function countMatches(text, pattern) {
703
+ return text.match(pattern)?.length ?? 0;
704
+ }
705
+ function stripScriptsAndStyles(html) {
706
+ return html.replace(/<script[\s\S]*?<\/script>/gi, " ").replace(/<style[\s\S]*?<\/style>/gi, " ");
707
+ }
708
+ function getTextContent(html) {
709
+ return stripScriptsAndStyles(html).replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
710
+ }
711
+ function getBodyHtml(html) {
712
+ const bodyMatch = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
713
+ return bodyMatch ? bodyMatch[1] : html;
714
+ }
715
+ function getFirstParagraphText(html) {
716
+ const firstPara = getBodyHtml(html).match(/<p[^>]*>([\s\S]*?)<\/p>/i);
717
+ return firstPara ? firstPara[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() : "";
718
+ }
719
+ function firstNWords(text, count) {
720
+ return text.split(/\s+/).slice(0, count).join(" ");
721
+ }
722
+ function getH1Text(html) {
723
+ const match = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
724
+ return match ? match[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() : "";
725
+ }
726
+ function getTitleText(html) {
727
+ const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
728
+ return match ? match[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() : "";
729
+ }
730
+ function wordCount(text) {
731
+ return text ? text.split(/\s+/).filter(Boolean).length : 0;
732
+ }
733
+ function isContentLikePage(html, url) {
734
+ const text = getTextContent(html);
735
+ const wc = wordCount(text);
736
+ let signals = 0;
737
+ if (url && /\/(?:blog|article|articles|guide|guides|docs|learn|help|news|insights|resources|how-to|tutorial|case-stud|whitepaper|faq)\b/i.test(url)) {
738
+ signals += 2;
739
+ }
740
+ if (/<article[\s>]/i.test(html)) signals += 1;
741
+ if ((html.match(/<h[2-3][^>]*>/gi) || []).length >= 2) signals += 1;
742
+ if (wc >= 500) signals += 1;
743
+ if (/<time[\s>]/i.test(html) || /datePublished|dateModified/i.test(html)) signals += 1;
744
+ if (/written\s+by|authored?\s+by|reviewed\s+by|medically\s+reviewed/i.test(text)) signals += 1;
745
+ return signals >= 2;
746
+ }
747
+ function expectsMethodology(html, url) {
748
+ const text = getTextContent(html);
749
+ const title = `${getTitleText(html)} ${getH1Text(html)}`.toLowerCase();
750
+ const urlText = (url || "").toLowerCase();
751
+ if (/(?:review|compare|comparison|vs\.?|best|benchmark|study|analysis|survey|report|research|tested|test|methodology)/i.test(title)) {
752
+ return true;
753
+ }
754
+ if (/(?:review|compare|comparison|benchmark|study|analysis|survey|research|report|best)/i.test(urlText)) {
755
+ return true;
756
+ }
757
+ return /\b(methodology|how we tested|how we reviewed|our testing|sample size|dataset|benchmark|editorial policy|review process)\b/i.test(text);
758
+ }
759
+ function titleAndBodyAlign(html) {
760
+ const h1 = getH1Text(html);
761
+ const title = getTitleText(html);
762
+ const text = firstNWords(getTextContent(html), 250).toLowerCase();
763
+ const topic = `${title} ${h1}`.toLowerCase();
764
+ const keywords = topic.split(/[\s|:()\-/]+/).filter((w) => w.length >= 5 && !/^(about|guide|complete|ultimate|best|learn|understand|what|when|where|which|their|there|these|those)$/i.test(w));
765
+ const uniqueKeywords = [...new Set(keywords)];
766
+ if (uniqueKeywords.length === 0) return false;
767
+ return uniqueKeywords.filter((w) => text.includes(w)).length >= Math.min(2, uniqueKeywords.length);
768
+ }
769
+ var GENERIC_OPENERS = /^(?:in today'?s|it is important to understand|in this (?:article|guide|post)|whether you are|have you ever|welcome to|if you'?re looking|in the modern|in the digital age)/i;
770
+ var PRACTICAL_LANGUAGE = /\b(?:here'?s what to do|choose\s+\w+\s+if|avoid\s+\w+\s+when|the main risk is|the fastest option is|next step(?:s)?|best option|lowest risk|good fit if|not a good fit if|what to do next)\b/gi;
771
+ var TRADEOFF_LANGUAGE = /\b(?:however|but|trade-?off|caveat|limitation|downside|upside|risk|benefit|pros?\b|cons?\b|watch out|be careful|unless)\b/gi;
772
+ var FLUFF_LANGUAGE = /\b(?:comprehensive guide|ever-evolving|fast-paced world|unlock(?:ing)? the power|in conclusion|delve into|navigate the landscape|journey|seamless|robust solution)\b/gi;
773
+ var EARLY_CTA_PATTERN = /\b(?:sign up|get started|book (?:a )?demo|contact us|schedule (?:a )?call|buy now|start free|request a quote|talk to sales|subscribe)\b/gi;
774
+ var FIRST_HAND_ACTIONS = /\b(?:we|i)\s+(?:tested|used|reviewed|implemented|measured|compared|observed|deployed|migrated|surveyed|analyzed|audited|interviewed)\b/gi;
775
+ var EXPERIENCE_CONTEXT = /\b(?:in our|during our|for our|in practice|in production|in the field|in our clinic|during implementation|over \d+\s+(?:days?|weeks?|months?)|across \d+\s+(?:accounts?|customers?|patients?|sites?|campaigns?)|with \d+\s+(?:participants?|users?|patients?|samples?))\b/gi;
776
+ var EXPERIENCE_ARTIFACTS = /\b(?:screenshot|photo|benchmark|timeline|before\/after|our results|original chart|field note|walkthrough)\b/gi;
777
+ var LIMITATION_LANGUAGE = /\b(?:limitation|limitations|we found that|we learned|one caveat|did not work|failed|issue we hit|edge case|unexpectedly|drawback)\b/gi;
778
+ var AUTHOR_LINK_PATTERN = /<a[^>]*href=["'][^"']*\/(?:author|authors|team|people|staff|reviewers?)\b[^"']*["'][^>]*>/i;
779
+ var BYLINE_PATTERN = /\b(?:written|authored|reviewed|edited|medically reviewed)\s+by\b/i;
780
+ var AUTHOR_BIO_PATTERN = /\b(?:about the author|author bio|reviewed by|medically reviewed by|board-certified|licensed|credentials?|specializes in|specialist in)\b/i;
781
+ var METHODOLOGY_TERMS = /\b(?:methodology|how we tested|how we reviewed|our methodology|testing process|review process|editorial policy|sample size|data collection|timeframe|criteria used|tools used|benchmark method|updated using|last reviewed|ai-assisted|ai assisted)\b/gi;
782
+ var METHODOLOGY_DETAIL = /\b(?:sample size|participants?|dataset|timeframe|criteria|weights?|tool(?:s)? used|measured over|tested over|reviewed on|last updated|last reviewed|scoring method)\b/gi;
783
+ function scoreHelpfulPurposeAlignment(html, url) {
784
+ const text = getTextContent(html);
785
+ if (!text) return 0;
786
+ const contentLike = isContentLikePage(html, url);
787
+ if (!contentLike && wordCount(text) < 250) return 5;
788
+ let score = contentLike ? 3 : 5;
789
+ const firstPara = getFirstParagraphText(html);
790
+ const earlyText = firstNWords(text, 300);
791
+ const bodyHtml = getBodyHtml(html);
792
+ if (firstPara && !GENERIC_OPENERS.test(firstPara)) score += 2;
793
+ if (countMatches(earlyText, PRACTICAL_LANGUAGE) >= 1) score += 2;
794
+ else if (countMatches(text, PRACTICAL_LANGUAGE) >= 2) score += 1;
795
+ const tradeoffCount = countMatches(text, TRADEOFF_LANGUAGE);
796
+ if (tradeoffCount >= 2) score += 2;
797
+ else if (tradeoffCount >= 1) score += 1;
798
+ if (titleAndBodyAlign(html)) score += 1;
799
+ if (/\b(?:bottom line|key takeaway|here'?s the short answer|next steps?)\b/i.test(text)) score += 1;
800
+ if (firstPara && GENERIC_OPENERS.test(firstPara)) score -= 2;
801
+ const earlyBodyHtml = bodyHtml.slice(0, 1800);
802
+ const earlyCtas = countMatches(earlyBodyHtml, EARLY_CTA_PATTERN);
803
+ if (earlyCtas >= 3) score -= 2;
804
+ else if (earlyCtas >= 2) score -= 1;
805
+ const fluffCount = countMatches(text, FLUFF_LANGUAGE);
806
+ if (fluffCount >= 3) score -= 2;
807
+ else if (fluffCount >= 1) score -= 1;
808
+ return floor(cap(score, 10), 0);
809
+ }
810
+ function scoreFirstHandExperienceSignals(html, url) {
811
+ const text = getTextContent(html);
812
+ if (!text) return 0;
813
+ const contentLike = isContentLikePage(html, url);
814
+ let score = contentLike ? 2 : 5;
815
+ const actionCount = countMatches(text, FIRST_HAND_ACTIONS);
816
+ if (actionCount >= 3) score += 4;
817
+ else if (actionCount >= 1) score += 2;
818
+ const contextCount = countMatches(text, EXPERIENCE_CONTEXT);
819
+ if (contextCount >= 2) score += 2;
820
+ else if (contextCount >= 1) score += 1;
821
+ const artifactCount = countMatches(text, EXPERIENCE_ARTIFACTS) + countMatches(html, /<figure|<figcaption/gi);
822
+ if (artifactCount >= 3) score += 2;
823
+ else if (artifactCount >= 1) score += 1;
824
+ const limitationCount = countMatches(text, LIMITATION_LANGUAGE);
825
+ if (limitationCount >= 2) score += 2;
826
+ else if (limitationCount >= 1) score += 1;
827
+ if (/\b(?:manufacturer|vendor)\s+(?:description|specification|copy)\b/i.test(text)) score -= 1;
828
+ return floor(cap(score, 10), 0);
829
+ }
830
+ function scoreCreatorTransparency(html, url) {
831
+ const text = getTextContent(html);
832
+ if (!text) return 0;
833
+ const contentLike = isContentLikePage(html, url);
834
+ if (!contentLike) return 5;
835
+ let score = 0;
836
+ const hasByline = BYLINE_PATTERN.test(text) || /class=["'][^"']*author[^"']*["']/i.test(html) || /rel=["']author["']/i.test(html);
837
+ const hasPersonSchema = /"@type"\s*:\s*"Person"/i.test(html);
838
+ if (hasByline) score += 3;
839
+ if (AUTHOR_LINK_PATTERN.test(html)) score += 2;
840
+ if (AUTHOR_BIO_PATTERN.test(text)) score += 2;
841
+ if (/\b(?:reviewed by|edited by|medically reviewed by)\b/i.test(text)) score += 1;
842
+ if (hasPersonSchema) score += 2;
843
+ return floor(cap(score, 10), 0);
844
+ }
845
+ function scoreMethodologyTransparency(html, url) {
846
+ const text = getTextContent(html);
847
+ if (!text) return 0;
848
+ const contentLike = isContentLikePage(html, url);
849
+ const expected = expectsMethodology(html, url);
850
+ let score = expected ? 2 : contentLike ? 5 : 5;
851
+ const methodologyCount = countMatches(text, METHODOLOGY_TERMS);
852
+ if (methodologyCount >= 2) score += 3;
853
+ else if (methodologyCount >= 1) score += 2;
854
+ const detailCount = countMatches(text, METHODOLOGY_DETAIL);
855
+ if (detailCount >= 3) score += 3;
856
+ else if (detailCount >= 2) score += 2;
857
+ else if (detailCount >= 1) score += 1;
858
+ if (/\b(?:tested|reviewed|analyzed)\s+\d+|\bacross\s+\d+|\bover\s+\d+\s+(?:days?|weeks?|months?)|\busing\s+\d+\s+\w+/i.test(text)) score += 1;
859
+ if (/<figure|<table/i.test(html) && methodologyCount >= 1) score += 1;
860
+ if (/\b(?:ai-assisted|ai assisted|reviewed by an editor|human reviewed)\b/i.test(text)) score += 1;
861
+ return floor(cap(score, 10), 0);
862
+ }
863
+
534
864
  // src/site-crawler.ts
535
- async function fetchText(url) {
536
- try {
537
- const res = await fetch(url, {
538
- signal: AbortSignal.timeout(15e3),
539
- headers: { "User-Agent": "AEO-Visibility-Bot/1.0" },
540
- redirect: "follow"
541
- });
542
- const text = await res.text();
543
- return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
544
- } catch {
545
- return null;
546
- }
865
+ init_network_guard();
866
+ async function fetchText(url, expectedDomain) {
867
+ const res = await safeFetch(url, { timeoutMs: 15e3, expectedDomain });
868
+ if (!res) return null;
869
+ const text = await res.text();
870
+ return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
547
871
  }
548
872
  function extractDomain(url) {
549
873
  return url.replace(/^https?:\/\//, "").replace(/\/.*/, "").replace(/:[0-9]+$/, "").replace(/^www\./, "").toLowerCase();
@@ -584,13 +908,16 @@ function isHtmlResponse(result) {
584
908
  return trimmed.startsWith("<!doctype html") || trimmed.startsWith("<html") || /<head[\s>]/i.test(trimmed);
585
909
  }
586
910
  async function prefetchSiteData(domain) {
911
+ if (!await isSafeFetchTarget(`https://${domain}`)) {
912
+ return { domain, protocol: null, homepage: null, llmsTxt: null, robotsTxt: null, faqPage: null, sitemapXml: null, rssFeed: null, aiTxt: null, redirectedTo: null, parkedReason: null, blogSample: [] };
913
+ }
587
914
  let protocol = null;
588
915
  let homepage = null;
589
- homepage = await fetchText(`https://${domain}`);
916
+ homepage = await fetchText(`https://${domain}`, domain);
590
917
  if (homepage && homepage.status >= 200 && homepage.status < 400) {
591
918
  protocol = "https";
592
919
  } else {
593
- homepage = await fetchText(`http://${domain}`);
920
+ homepage = await fetchText(`http://${domain}`, domain);
594
921
  if (homepage && homepage.status >= 200 && homepage.status < 400) {
595
922
  protocol = "http";
596
923
  }
@@ -610,38 +937,38 @@ async function prefetchSiteData(domain) {
610
937
  }
611
938
  const baseUrl = `${protocol}://${domain}`;
612
939
  const [llmsTxt, robotsTxt, faqPage, sitemapXml, aiTxt] = await Promise.all([
613
- fetchText(`${baseUrl}/llms.txt`),
614
- fetchText(`${baseUrl}/robots.txt`),
615
- fetchText(`${baseUrl}/faq`).then(async (result) => {
940
+ fetchText(`${baseUrl}/llms.txt`, domain),
941
+ fetchText(`${baseUrl}/robots.txt`, domain),
942
+ fetchText(`${baseUrl}/faq`, domain).then(async (result) => {
616
943
  if (result && result.status === 200) return result;
617
944
  for (const path of ["/frequently-asked-questions", "/help", "/support", "/help-center"]) {
618
- const fallback = await fetchText(`${baseUrl}${path}`);
945
+ const fallback = await fetchText(`${baseUrl}${path}`, domain);
619
946
  if (fallback && fallback.status === 200) return fallback;
620
947
  }
621
948
  return result;
622
949
  }),
623
- fetchText(`${baseUrl}/sitemap.xml`),
624
- fetchText(`${baseUrl}/ai.txt`)
950
+ fetchText(`${baseUrl}/sitemap.xml`, domain),
951
+ fetchText(`${baseUrl}/ai.txt`, domain)
625
952
  ]);
626
953
  let rssFeed = null;
627
954
  if (homepage) {
628
955
  const rssLinkMatch = homepage.text.match(/<link[^>]*type="application\/(?:rss|atom)\+xml"[^>]*href="([^"]*)"[^>]*>/i);
629
956
  if (rssLinkMatch) {
630
957
  const rssUrl = rssLinkMatch[1].startsWith("http") ? rssLinkMatch[1] : `${baseUrl}${rssLinkMatch[1]}`;
631
- rssFeed = await fetchText(rssUrl);
958
+ rssFeed = await fetchText(rssUrl, domain);
632
959
  }
633
960
  if (!rssFeed || rssFeed.status !== 200) {
634
961
  for (const path of ["/feed", "/rss.xml", "/feed.xml"]) {
635
- rssFeed = await fetchText(`${baseUrl}${path}`);
962
+ rssFeed = await fetchText(`${baseUrl}${path}`, domain);
636
963
  if (rssFeed && rssFeed.status === 200 && (rssFeed.text.includes("<rss") || rssFeed.text.includes("<feed") || rssFeed.text.includes("<channel"))) break;
637
964
  rssFeed = null;
638
965
  }
639
966
  }
640
967
  }
641
968
  if (sitemapXml && sitemapXml.status === 200 && sitemapXml.text.includes("<sitemapindex")) {
642
- const subUrls = extractAllSubSitemapUrls(sitemapXml.text, 5);
969
+ const subUrls = extractAllSubSitemapUrls(sitemapXml.text, domain, 5);
643
970
  if (subUrls.length > 0) {
644
- const subResults = await Promise.all(subUrls.map((u) => fetchText(u)));
971
+ const subResults = await Promise.all(subUrls.map((u) => fetchText(u, domain)));
645
972
  for (const sub of subResults) {
646
973
  if (sub && sub.status === 200) {
647
974
  sitemapXml.text += "\n" + sub.text;
@@ -654,7 +981,7 @@ async function prefetchSiteData(domain) {
654
981
  const sitemapForBlog = sitemapXml.text;
655
982
  const blogUrls = extractBlogUrlsFromSitemap(sitemapForBlog, domain, 50);
656
983
  if (blogUrls.length > 0) {
657
- const fetched = await Promise.all(blogUrls.map((url) => fetchText(url)));
984
+ const fetched = await Promise.all(blogUrls.map((url) => fetchText(url, domain)));
658
985
  blogSample = fetched.filter(
659
986
  (r) => r !== null && r.status === 200 && r.text.length > 500
660
987
  );
@@ -1436,8 +1763,8 @@ function checkDirectAnswerDensity(data) {
1436
1763
  const paragraphs = html.match(/<p[^>]*>([\s\S]*?)<\/p>/gi) || [];
1437
1764
  const snippetZoneParagraphs = paragraphs.filter((p) => {
1438
1765
  const text2 = p.replace(/<[^>]*>/g, "").trim();
1439
- const wordCount = text2.split(/\s+/).length;
1440
- return wordCount >= 40 && wordCount <= 150;
1766
+ const wordCount2 = text2.split(/\s+/).length;
1767
+ return wordCount2 >= 40 && wordCount2 <= 150;
1441
1768
  });
1442
1769
  if (snippetZoneParagraphs.length >= 3) {
1443
1770
  score += 2;
@@ -1705,13 +2032,15 @@ function extractBlogUrlsFromSitemap(sitemapText, domain, limit = 50) {
1705
2032
  });
1706
2033
  return candidates.slice(0, limit).map((c) => c.url);
1707
2034
  }
1708
- function extractAllSubSitemapUrls(sitemapText, limit = 5) {
2035
+ function extractAllSubSitemapUrls(sitemapText, domainOrLimit, maybeLimit = 5) {
1709
2036
  if (!sitemapText.includes("<sitemapindex")) return [];
2037
+ const domain = typeof domainOrLimit === "string" ? domainOrLimit : void 0;
2038
+ const limit = typeof domainOrLimit === "number" ? domainOrLimit : maybeLimit;
1710
2039
  const sitemapLocs = sitemapText.match(/<sitemap>[\s\S]*?<loc>([^<]+)<\/loc>[\s\S]*?<\/sitemap>/gi) || [];
1711
2040
  const urls = sitemapLocs.map((block) => {
1712
2041
  const match = block.match(/<loc>([^<]+)<\/loc>/i);
1713
2042
  return match ? match[1].trim() : "";
1714
- }).filter(Boolean);
2043
+ }).filter((url) => !!url && isSafePublicUrl(url, domain));
1715
2044
  const preferred = urls.filter((u) => /post|blog|article|page/i.test(u));
1716
2045
  const rest = urls.filter((u) => !preferred.includes(u));
1717
2046
  return [...preferred, ...rest].slice(0, limit);
@@ -2585,6 +2914,123 @@ function checkContentDepth(data, topicCoherenceScore) {
2585
2914
  }
2586
2915
  return { criterion: "content_depth", criterion_label: "Content Depth", score: finalScore, status: finalScore >= 7 ? "pass" : finalScore >= 4 ? "partial" : "fail", findings, fix_priority: finalScore >= 7 ? "P3" : "P1" };
2587
2916
  }
2917
+ function scoreSampledPages(data, scorer) {
2918
+ const pages = [];
2919
+ if (data.homepage) {
2920
+ const url = data.homepage.finalUrl || (data.protocol ? `${data.protocol}://${data.domain}/` : `https://${data.domain}/`);
2921
+ pages.push({ url, score: scorer(data.homepage.text, url) });
2922
+ }
2923
+ if (data.blogSample) {
2924
+ for (const page of data.blogSample) {
2925
+ const url = page.finalUrl || (data.protocol ? `${data.protocol}://${data.domain}/` : `https://${data.domain}/`);
2926
+ pages.push({ url, score: scorer(page.text, url) });
2927
+ }
2928
+ }
2929
+ return pages;
2930
+ }
2931
+ function summarizeHelpfulScores(pageScores) {
2932
+ const total = pageScores.length;
2933
+ const average = total > 0 ? Math.round(pageScores.reduce((sum, p) => sum + p.score, 0) / total) : 0;
2934
+ const strong = pageScores.filter((p) => p.score >= 8);
2935
+ const weak = pageScores.filter((p) => p.score <= 4);
2936
+ return { total, average, strong, weak };
2937
+ }
2938
+ function checkHelpfulPurposeAlignment(data) {
2939
+ const findings = [];
2940
+ if (!data.homepage) {
2941
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
2942
+ return { criterion: "helpful_purpose_alignment", criterion_label: "Helpful Purpose Alignment", score: 0, status: "not_found", findings, fix_priority: "P1" };
2943
+ }
2944
+ const pageScores = scoreSampledPages(data, scoreHelpfulPurposeAlignment);
2945
+ const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
2946
+ if (average >= 8) {
2947
+ findings.push({ severity: "info", detail: `${strong.length}/${total} pages strongly prioritize visitor task completion over filler` });
2948
+ } else if (average >= 5) {
2949
+ findings.push({ severity: "low", detail: `${strong.length}/${total} pages clearly lead with useful guidance`, fix: "Tighten intros, reduce generic filler, and make pages solve the promised user task faster" });
2950
+ } else {
2951
+ findings.push({ severity: "medium", detail: `Average helpful-purpose score is ${average}/10 across ${total} sampled pages`, fix: "Reduce search-first framing, generic intros, and CTA interruptions before the first useful answer" });
2952
+ }
2953
+ if (weak.length > 0) {
2954
+ findings.push({
2955
+ severity: "low",
2956
+ detail: `${weak.length} page(s) read as weakly task-focused`,
2957
+ fix: "Rewrite weak pages to lead with concrete answers, tradeoffs, and next steps instead of broad introductory filler"
2958
+ });
2959
+ }
2960
+ return { criterion: "helpful_purpose_alignment", criterion_label: "Helpful Purpose Alignment", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P1" };
2961
+ }
2962
+ function checkFirstHandExperienceSignals(data) {
2963
+ const findings = [];
2964
+ if (!data.homepage) {
2965
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
2966
+ return { criterion: "first_hand_experience_signals", criterion_label: "First-Hand Experience Signals", score: 0, status: "not_found", findings, fix_priority: "P2" };
2967
+ }
2968
+ const pageScores = scoreSampledPages(data, scoreFirstHandExperienceSignals);
2969
+ const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
2970
+ if (average >= 8) {
2971
+ findings.push({ severity: "info", detail: `${strong.length}/${total} pages show strong signs of direct use, testing, or observation` });
2972
+ } else if (average >= 5) {
2973
+ findings.push({ severity: "low", detail: `Moderate experiential depth across ${total} sampled pages`, fix: "Add real implementation details, limitations, screenshots, or direct observations where relevant" });
2974
+ } else {
2975
+ findings.push({ severity: "medium", detail: "Little first-hand experience is visible in sampled content", fix: "Add evidence of real use, testing, implementation, or lived experience instead of generic summaries" });
2976
+ }
2977
+ if (weak.length > 0) {
2978
+ findings.push({
2979
+ severity: "low",
2980
+ detail: `${weak.length} page(s) appear generic or second-hand`,
2981
+ fix: "Strengthen those pages with case details, lessons learned, or original evidence from practice"
2982
+ });
2983
+ }
2984
+ return { criterion: "first_hand_experience_signals", criterion_label: "First-Hand Experience Signals", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P2" };
2985
+ }
2986
+ function checkCreatorTransparency(data) {
2987
+ const findings = [];
2988
+ if (!data.homepage) {
2989
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
2990
+ return { criterion: "creator_transparency", criterion_label: "Creator Transparency", score: 0, status: "not_found", findings, fix_priority: "P2" };
2991
+ }
2992
+ const pageScores = scoreSampledPages(data, scoreCreatorTransparency);
2993
+ const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
2994
+ if (average >= 8) {
2995
+ findings.push({ severity: "info", detail: `${strong.length}/${total} sampled pages provide clear visible creator attribution` });
2996
+ } else if (average >= 5) {
2997
+ findings.push({ severity: "low", detail: "Visible authorship is present on some content but inconsistent", fix: "Add bylines, author links, and reviewer details on article-like pages where readers expect them" });
2998
+ } else {
2999
+ findings.push({ severity: "medium", detail: "Creator visibility is weak on content-like pages", fix: "Show clear bylines, author pages, and reviewer context rather than relying on schema alone" });
3000
+ }
3001
+ if (weak.length > 0) {
3002
+ findings.push({
3003
+ severity: "low",
3004
+ detail: `${weak.length} page(s) look article-like but expose little visible author context`,
3005
+ fix: "Add visible bylines, author bios, or reviewer attribution to those pages"
3006
+ });
3007
+ }
3008
+ return { criterion: "creator_transparency", criterion_label: "Creator Transparency", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P2" };
3009
+ }
3010
+ function checkMethodologyTransparency(data) {
3011
+ const findings = [];
3012
+ if (!data.homepage) {
3013
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
3014
+ return { criterion: "methodology_transparency", criterion_label: "Methodology Transparency", score: 0, status: "not_found", findings, fix_priority: "P2" };
3015
+ }
3016
+ const pageScores = scoreSampledPages(data, scoreMethodologyTransparency);
3017
+ const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
3018
+ if (average >= 8) {
3019
+ findings.push({ severity: "info", detail: `${strong.length}/${total} pages clearly explain how content was tested, researched, reviewed, or updated` });
3020
+ } else if (average >= 5) {
3021
+ findings.push({ severity: "low", detail: "Some process transparency exists, but it is inconsistent", fix: 'Add "how we tested", methodology, review process, or update disclosures on pages where users would expect them' });
3022
+ } else {
3023
+ findings.push({ severity: "medium", detail: "Little content-production or review transparency is visible", fix: "Explain how pages were researched, tested, or reviewed instead of presenting unsupported comparisons or conclusions" });
3024
+ }
3025
+ if (weak.length > 0) {
3026
+ findings.push({
3027
+ severity: "low",
3028
+ detail: `${weak.length} page(s) lack visible methodology or review context`,
3029
+ fix: "Add process detail such as sample size, criteria, tools used, review process, or update notes"
3030
+ });
3031
+ }
3032
+ return { criterion: "methodology_transparency", criterion_label: "Methodology Transparency", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P2" };
3033
+ }
2588
3034
  function checkCitationReadyWriting(data) {
2589
3035
  const findings = [];
2590
3036
  if (!data.homepage) {
@@ -2680,8 +3126,8 @@ function checkAnswerFirstPlacement(data) {
2680
3126
  const earlyParagraphs = bodyHtml.match(/<p[^>]*>([\s\S]*?)<\/p>/gi)?.slice(0, 5) || [];
2681
3127
  for (const p of earlyParagraphs) {
2682
3128
  const pText = p.replace(/<[^>]*>/g, "").trim();
2683
- const wordCount = pText.split(/\s+/).length;
2684
- if (wordCount >= 40 && wordCount <= 80 && first300Words.includes(pText.slice(0, 50))) {
3129
+ const wordCount2 = pText.split(/\s+/).length;
3130
+ if (wordCount2 >= 40 && wordCount2 <= 80 && first300Words.includes(pText.slice(0, 50))) {
2685
3131
  shortAnswerCount++;
2686
3132
  break;
2687
3133
  }
@@ -3223,20 +3669,29 @@ function auditSiteFromData(data) {
3223
3669
  checkVisibleDateSignal(data),
3224
3670
  topicCoherence,
3225
3671
  checkContentDepth(data, topicCoherence.score),
3226
- // V2 criteria (#29-#34)
3672
+ // Helpful-content criteria (#29-#32)
3673
+ checkHelpfulPurposeAlignment(data),
3674
+ checkFirstHandExperienceSignals(data),
3675
+ checkCreatorTransparency(data),
3676
+ checkMethodologyTransparency(data),
3677
+ // V2 criteria (#33-#38)
3227
3678
  checkCitationReadyWriting(data),
3228
3679
  checkAnswerFirstPlacement(data),
3229
3680
  checkEvidencePackaging(data),
3230
3681
  checkEntityDisambiguation(data),
3231
3682
  checkExtractionFriction(data),
3232
3683
  checkImageContextAI(data),
3233
- // V3 criteria (#35-#36)
3684
+ // V3 criteria (#39-#40)
3234
3685
  checkDuplicateContent(data),
3235
3686
  checkCrossPageDuplication(data)
3236
3687
  ];
3237
3688
  }
3238
3689
  async function auditSite(targetUrl) {
3239
- const url = new URL(targetUrl.startsWith("http") ? targetUrl : `https://${targetUrl}`);
3690
+ const normalizedTarget = targetUrl.startsWith("http") ? targetUrl : `https://${targetUrl}`;
3691
+ if (!await isSafeFetchTarget(normalizedTarget)) {
3692
+ throw new Error(`Refusing to audit private or local address: ${targetUrl}`);
3693
+ }
3694
+ const url = new URL(normalizedTarget);
3240
3695
  const domain = url.hostname.replace(/^www\./, "");
3241
3696
  const data = await prefetchSiteData(domain);
3242
3697
  return auditSiteFromData(data);
@@ -3262,6 +3717,10 @@ var WEIGHTS = {
3262
3717
  // Relevance to actual AI queries
3263
3718
  faq_section: 0.03,
3264
3719
  // Structured Q&A pairs
3720
+ helpful_purpose_alignment: 0.03,
3721
+ // Visitor-helpful vs search-first framing
3722
+ first_hand_experience_signals: 0.03,
3723
+ // Evidence of real use or observation
3265
3724
  // ─── Content Organization (~30%) ──────────────────────────────────────────
3266
3725
  // HOW easily AI engines can extract and trust your content.
3267
3726
  entity_consistency: 0.05,
@@ -3276,9 +3735,13 @@ var WEIGHTS = {
3276
3735
  // Expert attribution
3277
3736
  table_list_extractability: 0.03,
3278
3737
  // Extractable structured data
3279
- definition_patterns: 0.02,
3738
+ creator_transparency: 0.02,
3739
+ // Visible author/reviewer clarity
3740
+ methodology_transparency: 0.02,
3741
+ // Process disclosure
3742
+ definition_patterns: 0.015,
3280
3743
  // Clear definitions
3281
- visible_date_signal: 0.02,
3744
+ visible_date_signal: 0.015,
3282
3745
  // Publication date trust
3283
3746
  semantic_html: 0.02,
3284
3747
  // Clean semantic structure
@@ -3287,15 +3750,15 @@ var WEIGHTS = {
3287
3750
  // ─── Technical Plumbing (~15%) ────────────────────────────────────────────
3288
3751
  // WHETHER AI crawlers can find you. Table stakes with diminishing returns.
3289
3752
  content_cannibalization: 0.02,
3290
- llms_txt: 0.02,
3291
- robots_txt: 0.02,
3753
+ llms_txt: 0.01,
3754
+ robots_txt: 0.01,
3292
3755
  content_velocity: 0.02,
3293
- content_licensing: 0.02,
3756
+ content_licensing: 0.01,
3294
3757
  sitemap_completeness: 0.01,
3295
- canonical_url: 0.01,
3296
- rss_feed: 0.01,
3297
- schema_coverage: 0.01,
3298
- speakable_schema: 0.01,
3758
+ canonical_url: 5e-3,
3759
+ rss_feed: 5e-3,
3760
+ schema_coverage: 5e-3,
3761
+ speakable_schema: 5e-3,
3299
3762
  // ─── V2 Criteria (~15%) ───────────────────────────────────────────────────
3300
3763
  // Citation quality, evidence packaging, and extraction friction.
3301
3764
  citation_ready_writing: 0.04,
@@ -3308,7 +3771,7 @@ var WEIGHTS = {
3308
3771
  // Clear entity boundaries
3309
3772
  extraction_friction: 0.02,
3310
3773
  // Sentence length, voice, jargon
3311
- image_context_ai: 0.01,
3774
+ image_context_ai: 5e-3,
3312
3775
  // Figure/figcaption, alt text quality
3313
3776
  // ─── V3 Criteria ────────────────────────────────────────────────────────
3314
3777
  duplicate_content: 0.05,
@@ -3328,13 +3791,14 @@ function calculateOverallScore(criteria) {
3328
3791
  let score = Math.round(weightedSum / totalWeight);
3329
3792
  const coherence = criteria.find((c) => c.criterion === "topic_coherence");
3330
3793
  if (coherence && coherence.score < 6) {
3331
- const cap2 = 35 + coherence.score * 5;
3332
- score = Math.min(score, cap2);
3794
+ const cap3 = 35 + coherence.score * 5;
3795
+ score = Math.min(score, cap3);
3333
3796
  }
3334
3797
  return score;
3335
3798
  }
3336
3799
 
3337
3800
  // src/headless-fetch.ts
3801
+ init_network_guard();
3338
3802
  var SPA_INDICATORS = [
3339
3803
  // Root mount points (empty or nearly empty, including self-closing)
3340
3804
  /<div\s+id=["'](root|app|__next|__nuxt|__vue)["'][^>]*(?:\/>|>\s*<\/div>)/i,
@@ -3374,6 +3838,13 @@ function classifyRendering(html) {
3374
3838
  return { method: "client-spa", framework: null };
3375
3839
  }
3376
3840
  async function fetchWithHeadless(url, options) {
3841
+ let expectedDomain;
3842
+ try {
3843
+ expectedDomain = normalizeHostname(new URL(url).hostname);
3844
+ } catch {
3845
+ return null;
3846
+ }
3847
+ if (!await isSafeFetchTarget(url, expectedDomain)) return null;
3377
3848
  let puppeteer;
3378
3849
  try {
3379
3850
  const mod = "puppeteer";
@@ -3400,12 +3871,28 @@ async function fetchWithHeadless(url, options) {
3400
3871
  const page = await browser.newPage();
3401
3872
  await page.setRequestInterception(true);
3402
3873
  page.on("request", (req) => {
3403
- const type = req.resourceType();
3404
- if (["image", "font", "media", "stylesheet"].includes(type)) {
3405
- req.abort();
3406
- } else {
3407
- req.continue();
3408
- }
3874
+ void (async () => {
3875
+ const alreadyHandled = typeof req.isInterceptResolutionHandled === "function" ? req.isInterceptResolutionHandled() : false;
3876
+ if (alreadyHandled) return;
3877
+ if (!await isSafeFetchTarget(req.url(), expectedDomain)) {
3878
+ try {
3879
+ if (!req.isInterceptResolutionHandled?.()) await req.abort();
3880
+ } catch {
3881
+ }
3882
+ return;
3883
+ }
3884
+ const type = req.resourceType();
3885
+ try {
3886
+ if (!req.isInterceptResolutionHandled?.()) {
3887
+ if (["image", "font", "media", "stylesheet"].includes(type)) {
3888
+ await req.abort();
3889
+ } else {
3890
+ await req.continue();
3891
+ }
3892
+ }
3893
+ } catch {
3894
+ }
3895
+ })();
3409
3896
  });
3410
3897
  await page.setUserAgent("AEO-Visibility-Bot/1.0");
3411
3898
  await page.goto(url, { waitUntil: "networkidle2", timeout });
@@ -3418,6 +3905,7 @@ async function fetchWithHeadless(url, options) {
3418
3905
  }
3419
3906
  const html = await page.content();
3420
3907
  const finalUrl = page.url();
3908
+ if (!await isSafeFetchTarget(finalUrl, expectedDomain)) return null;
3421
3909
  return {
3422
3910
  text: html.slice(0, 5e5),
3423
3911
  status: 200,
@@ -3445,6 +3933,8 @@ var PILLARS = {
3445
3933
  "citation_ready_writing",
3446
3934
  "answer_first_placement",
3447
3935
  "evidence_packaging",
3936
+ "helpful_purpose_alignment",
3937
+ "first_hand_experience_signals",
3448
3938
  "duplicate_content",
3449
3939
  "cross_page_duplication"
3450
3940
  ],
@@ -3462,7 +3952,9 @@ var PILLARS = {
3462
3952
  "internal_linking",
3463
3953
  "content_freshness",
3464
3954
  "author_schema_depth",
3465
- "schema_markup"
3955
+ "schema_markup",
3956
+ "creator_transparency",
3957
+ "methodology_transparency"
3466
3958
  ],
3467
3959
  "Technical Foundation": [
3468
3960
  "semantic_html",
@@ -3492,6 +3984,8 @@ var CLIENT_NAMES = {
3492
3984
  citation_ready_writing: "Citation-Ready Writing",
3493
3985
  answer_first_placement: "Answer-First Placement",
3494
3986
  evidence_packaging: "Evidence Packaging",
3987
+ helpful_purpose_alignment: "Helpful Purpose Alignment",
3988
+ first_hand_experience_signals: "First-Hand Experience Signals",
3495
3989
  direct_answer_density: "Direct Answer Density",
3496
3990
  qa_content_format: "Q&A Content Format",
3497
3991
  query_answer_alignment: "Query-Answer Alignment",
@@ -3504,6 +3998,8 @@ var CLIENT_NAMES = {
3504
3998
  content_freshness: "Content Freshness",
3505
3999
  author_schema_depth: "Author & Expert Schema",
3506
4000
  schema_markup: "Schema Markup",
4001
+ creator_transparency: "Creator Transparency",
4002
+ methodology_transparency: "Methodology Transparency",
3507
4003
  semantic_html: "Semantic HTML",
3508
4004
  clean_html: "Clean HTML",
3509
4005
  visible_date_signal: "Visible Date Signal",
@@ -3530,6 +4026,8 @@ var PILLAR_WEIGHTS = {
3530
4026
  citation_ready_writing: 0.04,
3531
4027
  answer_first_placement: 0.03,
3532
4028
  evidence_packaging: 0.03,
4029
+ helpful_purpose_alignment: 0.03,
4030
+ first_hand_experience_signals: 0.03,
3533
4031
  duplicate_content: 0.05,
3534
4032
  cross_page_duplication: 0.03,
3535
4033
  direct_answer_density: 0.05,
@@ -3537,28 +4035,30 @@ var PILLAR_WEIGHTS = {
3537
4035
  query_answer_alignment: 0.04,
3538
4036
  faq_section: 0.03,
3539
4037
  table_list_extractability: 0.03,
3540
- definition_patterns: 0.02,
4038
+ definition_patterns: 0.015,
3541
4039
  entity_disambiguation: 0.02,
3542
4040
  entity_consistency: 0.05,
3543
4041
  internal_linking: 0.04,
3544
4042
  content_freshness: 0.04,
3545
4043
  author_schema_depth: 0.03,
3546
4044
  schema_markup: 0.03,
4045
+ creator_transparency: 0.02,
4046
+ methodology_transparency: 0.02,
3547
4047
  semantic_html: 0.02,
3548
4048
  clean_html: 0.02,
3549
- visible_date_signal: 0.02,
4049
+ visible_date_signal: 0.015,
3550
4050
  extraction_friction: 0.02,
3551
- image_context_ai: 0.01,
3552
- schema_coverage: 0.01,
3553
- speakable_schema: 0.01,
4051
+ image_context_ai: 5e-3,
4052
+ schema_coverage: 5e-3,
4053
+ speakable_schema: 5e-3,
3554
4054
  content_cannibalization: 0.02,
3555
- llms_txt: 0.02,
3556
- robots_txt: 0.02,
4055
+ llms_txt: 0.01,
4056
+ robots_txt: 0.01,
3557
4057
  content_velocity: 0.02,
3558
- content_licensing: 0.02,
3559
- canonical_url: 0.01,
4058
+ content_licensing: 0.01,
4059
+ canonical_url: 5e-3,
3560
4060
  sitemap_completeness: 0.01,
3561
- rss_feed: 0.01
4061
+ rss_feed: 5e-3
3562
4062
  };
3563
4063
  var CRITERION_EFFORT = {
3564
4064
  topic_coherence: "High",
@@ -3568,6 +4068,8 @@ var CRITERION_EFFORT = {
3568
4068
  citation_ready_writing: "Medium",
3569
4069
  answer_first_placement: "Medium",
3570
4070
  evidence_packaging: "Medium",
4071
+ helpful_purpose_alignment: "Medium",
4072
+ first_hand_experience_signals: "Medium",
3571
4073
  duplicate_content: "Medium",
3572
4074
  cross_page_duplication: "Medium",
3573
4075
  direct_answer_density: "Medium",
@@ -3582,6 +4084,8 @@ var CRITERION_EFFORT = {
3582
4084
  content_freshness: "Low",
3583
4085
  author_schema_depth: "Low",
3584
4086
  schema_markup: "Medium",
4087
+ creator_transparency: "Low",
4088
+ methodology_transparency: "Low",
3585
4089
  semantic_html: "Low",
3586
4090
  clean_html: "Medium",
3587
4091
  visible_date_signal: "Low",
@@ -3606,6 +4110,8 @@ var FIX_DESCRIPTIONS = {
3606
4110
  citation_ready_writing: "Write self-contained definition sentences and one-claim statements AI can quote directly.",
3607
4111
  answer_first_placement: "Place a 40-80 word answer block in the first 300 words of each page.",
3608
4112
  evidence_packaging: "Add inline citations, attribution phrases, and a sources section to key pages.",
4113
+ helpful_purpose_alignment: "Lead with useful, task-solving guidance instead of search-first filler.",
4114
+ first_hand_experience_signals: "Add concrete signs of first-hand use, testing, implementation, or observation.",
3609
4115
  direct_answer_density: "Write concise 2-3 sentence answer paragraphs after each question heading.",
3610
4116
  qa_content_format: "Add question-based H2/H3 headings matching common AI queries.",
3611
4117
  query_answer_alignment: "Ensure every question heading is followed by a direct answer paragraph.",
@@ -3618,6 +4124,8 @@ var FIX_DESCRIPTIONS = {
3618
4124
  content_freshness: "Add dateModified schema and visible last-updated dates.",
3619
4125
  author_schema_depth: "Add Person schema for authors with credentials and sameAs links.",
3620
4126
  schema_markup: "Implement JSON-LD structured data on key pages.",
4127
+ creator_transparency: "Show clear visible bylines, author pages, and reviewer details where readers expect them.",
4128
+ methodology_transparency: "Explain how content was tested, researched, reviewed, or updated.",
3621
4129
  semantic_html: "Use semantic HTML5 elements (main, article, nav, header, footer).",
3622
4130
  clean_html: "Fix HTML structure, add meta tags, and ensure HTTPS.",
3623
4131
  visible_date_signal: "Display dates using <time> elements and add datePublished to JSON-LD.",
@@ -3717,6 +4225,10 @@ var CRITERION_LABELS = {
3717
4225
  "Visible Date Signal": "Visible Date Signal",
3718
4226
  "Topic Coherence": "Topic Coherence",
3719
4227
  "Content Depth": "Content Depth",
4228
+ "Helpful Purpose Alignment": "Helpful Purpose Alignment",
4229
+ "First-Hand Experience Signals": "First-Hand Experience Signals",
4230
+ "Creator Transparency": "Creator Transparency",
4231
+ "Methodology Transparency": "Methodology Transparency",
3720
4232
  "Citation-Ready Writing Quality": "Citation-Ready Writing Quality",
3721
4233
  "Answer-First Placement": "Answer-First Placement",
3722
4234
  "Evidence Packaging": "Evidence Packaging",
@@ -3822,6 +4334,8 @@ var CRITERION_WEIGHTS = {
3822
4334
  qa_content_format: 0.04,
3823
4335
  query_answer_alignment: 0.04,
3824
4336
  faq_section: 0.03,
4337
+ helpful_purpose_alignment: 0.03,
4338
+ first_hand_experience_signals: 0.03,
3825
4339
  // Content Organization (~30%)
3826
4340
  entity_consistency: 0.05,
3827
4341
  internal_linking: 0.04,
@@ -3829,28 +4343,30 @@ var CRITERION_WEIGHTS = {
3829
4343
  schema_markup: 0.03,
3830
4344
  author_schema_depth: 0.03,
3831
4345
  table_list_extractability: 0.03,
3832
- definition_patterns: 0.02,
3833
- visible_date_signal: 0.02,
4346
+ creator_transparency: 0.02,
4347
+ methodology_transparency: 0.02,
4348
+ definition_patterns: 0.015,
4349
+ visible_date_signal: 0.015,
3834
4350
  semantic_html: 0.02,
3835
4351
  clean_html: 0.02,
3836
4352
  // Technical Plumbing (~15%)
3837
4353
  content_cannibalization: 0.02,
3838
- llms_txt: 0.02,
3839
- robots_txt: 0.02,
4354
+ llms_txt: 0.01,
4355
+ robots_txt: 0.01,
3840
4356
  content_velocity: 0.02,
3841
- content_licensing: 0.02,
4357
+ content_licensing: 0.01,
3842
4358
  sitemap_completeness: 0.01,
3843
- canonical_url: 0.01,
3844
- rss_feed: 0.01,
3845
- schema_coverage: 0.01,
3846
- speakable_schema: 0.01,
4359
+ canonical_url: 5e-3,
4360
+ rss_feed: 5e-3,
4361
+ schema_coverage: 5e-3,
4362
+ speakable_schema: 5e-3,
3847
4363
  // V2 Criteria (~15%)
3848
4364
  citation_ready_writing: 0.04,
3849
4365
  answer_first_placement: 0.03,
3850
4366
  evidence_packaging: 0.03,
3851
4367
  entity_disambiguation: 0.02,
3852
4368
  extraction_friction: 0.02,
3853
- image_context_ai: 0.01,
4369
+ image_context_ai: 5e-3,
3854
4370
  // V3 Criteria
3855
4371
  duplicate_content: 0.05,
3856
4372
  cross_page_duplication: 0.03
@@ -3891,6 +4407,16 @@ var OPPORTUNITY_TEMPLATES = {
3891
4407
  effort: "Medium",
3892
4408
  description: "Create a dedicated FAQ page with FAQPage schema markup. Cover common questions about your products, services, and industry to become a direct answer source for AI engines."
3893
4409
  },
4410
+ helpful_purpose_alignment: {
4411
+ name: "Improve Helpful Purpose Alignment",
4412
+ effort: "Medium",
4413
+ description: "Rewrite pages to solve the visitor task quickly and concretely. Reduce generic intros, search-first filler, and CTA interruptions before the first useful answer."
4414
+ },
4415
+ first_hand_experience_signals: {
4416
+ name: "Add First-Hand Experience Signals",
4417
+ effort: "Medium",
4418
+ description: "Show direct use, testing, implementation, or lived experience with concrete observations, examples, screenshots, and lessons learned."
4419
+ },
3894
4420
  original_data: {
3895
4421
  name: "Add Original Data & Case Studies",
3896
4422
  effort: "High",
@@ -3946,6 +4472,16 @@ var OPPORTUNITY_TEMPLATES = {
3946
4472
  effort: "Low",
3947
4473
  description: "Add Person schema for content authors with credentials, expertise, and sameAs links. Expert attribution strengthens E-E-A-T signals that AI engines use to evaluate source credibility."
3948
4474
  },
4475
+ creator_transparency: {
4476
+ name: "Improve Creator Transparency",
4477
+ effort: "Low",
4478
+ description: "Add visible bylines, author pages, and reviewer/editor details so readers can clearly tell who created the content and why they are credible."
4479
+ },
4480
+ methodology_transparency: {
4481
+ name: "Add Methodology Transparency",
4482
+ effort: "Low",
4483
+ description: "Explain how pages were tested, researched, reviewed, or updated. Add methodology, criteria, sample-size, or review-process details where users expect them."
4484
+ },
3949
4485
  fact_density: {
3950
4486
  name: "Increase Fact & Data Density",
3951
4487
  effort: "Medium",
@@ -4205,20 +4741,13 @@ function formatList(items) {
4205
4741
  }
4206
4742
 
4207
4743
  // src/multi-page-fetcher.ts
4208
- async function fetchPage(url, timeoutMs = 1e4) {
4209
- try {
4210
- const res = await fetch(url, {
4211
- signal: AbortSignal.timeout(timeoutMs),
4212
- headers: { "User-Agent": "AEO-Visibility-Bot/1.0" },
4213
- redirect: "follow"
4214
- });
4215
- if (res.status !== 200) return null;
4216
- const text = await res.text();
4217
- if (text.length < 200) return null;
4218
- return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
4219
- } catch {
4220
- return null;
4221
- }
4744
+ init_network_guard();
4745
+ async function fetchPage(url, domain, timeoutMs = 1e4) {
4746
+ const res = await safeFetch(url, { timeoutMs, expectedDomain: domain });
4747
+ if (!res || res.status !== 200) return null;
4748
+ const text = await res.text();
4749
+ if (text.length < 200) return null;
4750
+ return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
4222
4751
  }
4223
4752
  var PAGE_VARIANTS = {
4224
4753
  about: ["/about", "/about-us", "/company", "/who-we-are"],
@@ -4374,7 +4903,7 @@ async function fetchMultiPageData(siteData, options) {
4374
4903
  }
4375
4904
  const entries = Array.from(urlsToFetch.entries());
4376
4905
  if (entries.length === 0) return 0;
4377
- const results = await Promise.all(entries.map(([url]) => fetchPage(url, timeoutMs)));
4906
+ const results = await Promise.all(entries.map(([url]) => fetchPage(url, siteData.domain, timeoutMs)));
4378
4907
  if (!siteData.blogSample) siteData.blogSample = [];
4379
4908
  let added = 0;
4380
4909
  for (let i = 0; i < results.length; i++) {
@@ -4401,19 +4930,23 @@ var PAGE_CRITERIA = {
4401
4930
  content_freshness: { weight: 0.04, label: "Content Freshness Signals" },
4402
4931
  schema_markup: { weight: 0.03, label: "Schema.org Structured Data" },
4403
4932
  table_list_extractability: { weight: 0.03, label: "Table & List Extractability" },
4404
- definition_patterns: { weight: 0.02, label: "Definition Patterns" },
4405
- visible_date_signal: { weight: 0.02, label: "Visible Date Signal" },
4933
+ definition_patterns: { weight: 0.015, label: "Definition Patterns" },
4934
+ visible_date_signal: { weight: 0.015, label: "Visible Date Signal" },
4406
4935
  semantic_html: { weight: 0.02, label: "Semantic HTML5 & Accessibility" },
4407
4936
  clean_html: { weight: 0.02, label: "Clean, Crawlable HTML" },
4408
4937
  // Technical Plumbing
4409
- canonical_url: { weight: 0.01, label: "Canonical URL Strategy" },
4938
+ canonical_url: { weight: 5e-3, label: "Canonical URL Strategy" },
4410
4939
  // V2 Criteria
4411
4940
  citation_ready_writing: { weight: 0.04, label: "Citation-Ready Writing Quality" },
4412
4941
  answer_first_placement: { weight: 0.03, label: "Answer-First Placement" },
4413
4942
  evidence_packaging: { weight: 0.03, label: "Evidence Packaging" },
4943
+ helpful_purpose_alignment: { weight: 0.03, label: "Helpful Purpose Alignment" },
4944
+ first_hand_experience_signals: { weight: 0.03, label: "First-Hand Experience Signals" },
4414
4945
  entity_disambiguation: { weight: 0.02, label: "Entity Disambiguation" },
4415
4946
  extraction_friction: { weight: 0.02, label: "Extraction Friction Score" },
4416
- image_context_ai: { weight: 0.01, label: "Image Context for AI" },
4947
+ creator_transparency: { weight: 0.02, label: "Creator Transparency" },
4948
+ methodology_transparency: { weight: 0.02, label: "Methodology Transparency" },
4949
+ image_context_ai: { weight: 5e-3, label: "Image Context for AI" },
4417
4950
  duplicate_content: { weight: 0.05, label: "Duplicate Content Blocks" }
4418
4951
  };
4419
4952
  function extractJsonLdBlocks(html) {
@@ -4436,7 +4969,7 @@ function extractTypesFromJsonLd(blocks) {
4436
4969
  }
4437
4970
  return types;
4438
4971
  }
4439
- function getTextContent(html) {
4972
+ function getTextContent2(html) {
4440
4973
  return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
4441
4974
  }
4442
4975
  function extractQuestionHeadings2(html) {
@@ -4464,7 +4997,7 @@ function countAnsweredQuestions(html) {
4464
4997
  }
4465
4998
  return { total: questions.length, answered };
4466
4999
  }
4467
- function cap(value, max) {
5000
+ function cap2(value, max) {
4468
5001
  return Math.min(value, max);
4469
5002
  }
4470
5003
  function scoreSchemaMarkup(html) {
@@ -4490,10 +5023,10 @@ function scoreSchemaMarkup(html) {
4490
5023
  for (const t of types) {
4491
5024
  if (knownTypes.includes(t)) knownCount++;
4492
5025
  }
4493
- score += cap(knownCount * 2, 4);
5026
+ score += cap2(knownCount * 2, 4);
4494
5027
  if (types.has("Organization") || types.has("LocalBusiness")) score += 2;
4495
5028
  if (types.has("FAQPage")) score += 1;
4496
- return cap(score, 10);
5029
+ return cap2(score, 10);
4497
5030
  }
4498
5031
  function scoreQAFormat(html) {
4499
5032
  const questions = extractQuestionHeadings2(html);
@@ -4505,7 +5038,7 @@ function scoreQAFormat(html) {
4505
5038
  if (answered >= 1) score += 3;
4506
5039
  const h1Matches = html.match(/<h1[\s>]/gi) || [];
4507
5040
  if (h1Matches.length === 1) score += 2;
4508
- return cap(score, 10);
5041
+ return cap2(score, 10);
4509
5042
  }
4510
5043
  function scoreCleanHtml(html) {
4511
5044
  let score = 0;
@@ -4514,15 +5047,15 @@ function scoreCleanHtml(html) {
4514
5047
  for (const tag of semantics) {
4515
5048
  if (html.toLowerCase().includes(tag)) semCount++;
4516
5049
  }
4517
- score += cap(semCount, 3);
5050
+ score += cap2(semCount, 3);
4518
5051
  const h1Matches = html.match(/<h1[\s>]/gi) || [];
4519
5052
  if (h1Matches.length === 1) score += 2;
4520
- const text = getTextContent(html);
5053
+ const text = getTextContent2(html);
4521
5054
  if (text.length > 500) score += 3;
4522
5055
  const hasTitle = /<title[^>]*>[^<]+<\/title>/i.test(html);
4523
5056
  const hasDesc = /<meta\s[^>]*name=["']description["'][^>]*content=["'][^"']+["']/i.test(html) || /<meta\s[^>]*content=["'][^"']+["'][^>]*name=["']description["']/i.test(html);
4524
5057
  if (hasTitle && hasDesc) score += 2;
4525
- return cap(score, 10);
5058
+ return cap2(score, 10);
4526
5059
  }
4527
5060
  function scoreFaqSection(html) {
4528
5061
  let score = 0;
@@ -4534,11 +5067,11 @@ function scoreFaqSection(html) {
4534
5067
  const questions = extractQuestionHeadings2(html);
4535
5068
  if (questions.length >= 10) score += 1;
4536
5069
  if (/<details[\s>]/i.test(html) || /accordion|collapsible|toggle/i.test(lowerHtml)) score += 1;
4537
- return cap(score, 10);
5070
+ return cap2(score, 10);
4538
5071
  }
4539
5072
  function scoreOriginalData(html) {
4540
5073
  let score = 0;
4541
- const text = getTextContent(html);
5074
+ const text = getTextContent2(html);
4542
5075
  if (/\b(our (study|analysis|research|survey|data|findings))\b/i.test(text)) {
4543
5076
  score += 3;
4544
5077
  } else if (/\d+(\.\d+)?%|\$[\d,.]+|\b\d{1,3}(,\d{3})+\b/.test(text)) {
@@ -4555,7 +5088,7 @@ function scoreOriginalData(html) {
4555
5088
  if (/href=["'][^"']*\/blog\b/i.test(html)) {
4556
5089
  score += 2;
4557
5090
  }
4558
- return cap(score, 10);
5091
+ return cap2(score, 10);
4559
5092
  }
4560
5093
  function scoreQueryAnswerAlignment(html) {
4561
5094
  const { total, answered } = countAnsweredQuestions(html);
@@ -4578,7 +5111,7 @@ function scoreContentFreshness(html) {
4578
5111
  const currentYear = (/* @__PURE__ */ new Date()).getFullYear();
4579
5112
  const yearPattern = new RegExp(`\\b(${currentYear}|${currentYear - 1})\\b`);
4580
5113
  if (yearPattern.test(html)) score += 2;
4581
- return cap(score, 10);
5114
+ return cap2(score, 10);
4582
5115
  }
4583
5116
  function scoreTableListExtractability(html) {
4584
5117
  let score = 0;
@@ -4591,7 +5124,7 @@ function scoreTableListExtractability(html) {
4591
5124
  const listItems = html.match(/<li[\s>]/gi) || [];
4592
5125
  if (listItems.length >= 10) score += 1;
4593
5126
  if (/<dl[\s>]/i.test(html)) score += 1;
4594
- return cap(score, 10);
5127
+ return cap2(score, 10);
4595
5128
  }
4596
5129
  function scoreDirectAnswerDensity(html) {
4597
5130
  let score = 0;
@@ -4607,9 +5140,9 @@ function scoreDirectAnswerDensity(html) {
4607
5140
  }
4608
5141
  if (snippetCount >= 3) score += 2;
4609
5142
  else if (snippetCount >= 1) score += 1;
4610
- const directOpeners = getTextContent(html).match(/\b(yes|no|in short|the answer is|simply put|in summary)\b/gi) || [];
5143
+ const directOpeners = getTextContent2(html).match(/\b(yes|no|in short|the answer is|simply put|in summary)\b/gi) || [];
4611
5144
  if (directOpeners.length >= 2) score += 2;
4612
- return cap(score, 10);
5145
+ return cap2(score, 10);
4613
5146
  }
4614
5147
  function scoreSemanticHtml(html) {
4615
5148
  let score = 0;
@@ -4619,7 +5152,7 @@ function scoreSemanticHtml(html) {
4619
5152
  for (const el of elements) {
4620
5153
  if (lowerHtml.includes(el)) count++;
4621
5154
  }
4622
- score += cap(Math.floor(count * 0.7), 4);
5155
+ score += cap2(Math.floor(count * 0.7), 4);
4623
5156
  const imgTags = html.match(/<img\s[^>]*>/gi) || [];
4624
5157
  if (imgTags.length > 0) {
4625
5158
  let withAlt = 0;
@@ -4630,11 +5163,11 @@ function scoreSemanticHtml(html) {
4630
5163
  }
4631
5164
  if (/<html[^>]*\slang=["'][^"']+["']/i.test(html)) score += 2;
4632
5165
  if (/\baria-/i.test(html)) score += 2;
4633
- return cap(score, 10);
5166
+ return cap2(score, 10);
4634
5167
  }
4635
5168
  function scoreFactDensity(html) {
4636
5169
  let score = 0;
4637
- const text = getTextContent(html);
5170
+ const text = getTextContent2(html);
4638
5171
  const numericPatterns = text.match(/\d+(\.\d+)?%|\$[\d,.]+|\b\d{1,3}(,\d{3})+\b|\b\d+\s*(million|billion|thousand|users|customers|employees)\b/gi) || [];
4639
5172
  if (numericPatterns.length >= 6) score += 5;
4640
5173
  else if (numericPatterns.length >= 3) score += 3;
@@ -4647,11 +5180,11 @@ function scoreFactDensity(html) {
4647
5180
  if (/\baccording to\b|\bsource:\s|\bcited\b|\breported by\b/i.test(text)) score += 2;
4648
5181
  const units = text.match(/\b\d+\s*(kg|lb|miles|km|hours|minutes|days|months|years|GB|MB|TB)\b/gi) || [];
4649
5182
  if (units.length >= 2) score += 1;
4650
- return cap(score, 10);
5183
+ return cap2(score, 10);
4651
5184
  }
4652
5185
  function scoreDefinitionPatterns(html) {
4653
5186
  let score = 0;
4654
- const text = getTextContent(html);
5187
+ const text = getTextContent2(html);
4655
5188
  const defPatterns = text.match(/\b(is a|is an|refers to|defined as|means that|also known as|abbreviated as)\b/gi) || [];
4656
5189
  if (defPatterns.length >= 3) score += 5;
4657
5190
  else if (defPatterns.length >= 1) score += 3;
@@ -4659,7 +5192,7 @@ function scoreDefinitionPatterns(html) {
4659
5192
  if (/\b(is a|is an|refers to|defined as)\b/i.test(early)) score += 2;
4660
5193
  if (/<dfn[\s>]/i.test(html) || /<abbr[\s>]/i.test(html)) score += 1;
4661
5194
  if (/<dl[\s>]/i.test(html) || /glossary/i.test(html)) score += 2;
4662
- return cap(score, 10);
5195
+ return cap2(score, 10);
4663
5196
  }
4664
5197
  function scoreCanonicalUrl(html, url) {
4665
5198
  let score = 0;
@@ -4680,7 +5213,7 @@ function scoreCanonicalUrl(html, url) {
4680
5213
  if (canonicalHref.startsWith("https://")) score += 2;
4681
5214
  const allCanonicals = html.match(/<link[^>]*rel=["']canonical["'][^>]*>/gi) || [];
4682
5215
  if (allCanonicals.length === 1) score += 1;
4683
- return cap(score, 10);
5216
+ return cap2(score, 10);
4684
5217
  }
4685
5218
  function scoreVisibleDateSignal(html) {
4686
5219
  let score = 0;
@@ -4699,11 +5232,11 @@ function scoreVisibleDateSignal(html) {
4699
5232
  } catch {
4700
5233
  }
4701
5234
  }
4702
- return cap(score, 10);
5235
+ return cap2(score, 10);
4703
5236
  }
4704
5237
  function scoreCitationReadyWriting(html) {
4705
5238
  let score = 0;
4706
- const text = getTextContent(html);
5239
+ const text = getTextContent2(html);
4707
5240
  const defSentences = text.match(/\b\w+\s+(is\s+(?:a|an)\s|refers\s+to|defined\s+as)\b/gi) || [];
4708
5241
  if (defSentences.length >= 3) score += 3;
4709
5242
  else if (defSentences.length >= 1) score += 1;
@@ -4732,7 +5265,7 @@ function scoreCitationReadyWriting(html) {
4732
5265
  );
4733
5266
  if (quotableLines.length >= 2) score += 2;
4734
5267
  else if (quotableLines.length >= 1) score += 1;
4735
- return cap(score, 10);
5268
+ return cap2(score, 10);
4736
5269
  }
4737
5270
  function scoreAnswerFirstPlacement(html) {
4738
5271
  let score = 0;
@@ -4743,8 +5276,8 @@ function scoreAnswerFirstPlacement(html) {
4743
5276
  const first300Words = bodyText.split(/\s+/).slice(0, 300).join(" ");
4744
5277
  for (const p of earlyParagraphs) {
4745
5278
  const pText = p.replace(/<[^>]*>/g, "").trim();
4746
- const wordCount = pText.split(/\s+/).length;
4747
- if (wordCount >= 40 && wordCount <= 80 && first300Words.includes(pText.slice(0, 50))) {
5279
+ const wordCount2 = pText.split(/\s+/).length;
5280
+ if (wordCount2 >= 40 && wordCount2 <= 80 && first300Words.includes(pText.slice(0, 50))) {
4748
5281
  score += 4;
4749
5282
  break;
4750
5283
  }
@@ -4765,11 +5298,11 @@ function scoreAnswerFirstPlacement(html) {
4765
5298
  score += 3;
4766
5299
  }
4767
5300
  }
4768
- return cap(score, 10);
5301
+ return cap2(score, 10);
4769
5302
  }
4770
5303
  function scoreEvidencePackaging(html) {
4771
5304
  let score = 0;
4772
- const text = getTextContent(html);
5305
+ const text = getTextContent2(html);
4773
5306
  const paragraphs = html.match(/<p[^>]*>[\s\S]*?<\/p>/gi) || [];
4774
5307
  let inlineCitations = 0;
4775
5308
  for (const p of paragraphs) {
@@ -4787,11 +5320,11 @@ function scoreEvidencePackaging(html) {
4787
5320
  const sourcedStats = text.match(/\d+(\.\d+)?(%|\s*(million|billion|thousand|percent))\b[^.]*\b[A-Z][a-z]+\b/gi) || [];
4788
5321
  if (sourcedStats.length >= 2) score += 2;
4789
5322
  else if (sourcedStats.length >= 1) score += 1;
4790
- return cap(score, 10);
5323
+ return cap2(score, 10);
4791
5324
  }
4792
5325
  function scoreEntityDisambiguation(html) {
4793
5326
  let score = 0;
4794
- const text = getTextContent(html);
5327
+ const text = getTextContent2(html);
4795
5328
  const h1Match = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
4796
5329
  if (!h1Match) return 3;
4797
5330
  const h1Text = h1Match[1].replace(/<[^>]*>/g, "").trim();
@@ -4809,11 +5342,11 @@ function scoreEntityDisambiguation(html) {
4809
5342
  if (/\bunlike\s+\w/i.test(text) || /\bcompared\s+to\s+\w/i.test(text) || /\bnot\s+to\s+be\s+confused\s+with\b/i.test(text)) {
4810
5343
  score += 3;
4811
5344
  }
4812
- return cap(score, 10);
5345
+ return cap2(score, 10);
4813
5346
  }
4814
5347
  function scoreExtractionFriction(html) {
4815
5348
  let score = 0;
4816
- const text = getTextContent(html);
5349
+ const text = getTextContent2(html);
4817
5350
  const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 5);
4818
5351
  const avgLen = sentences.length > 0 ? sentences.map((s) => s.trim().split(/\s+/).length).reduce((a, b) => a + b, 0) / sentences.length : 0;
4819
5352
  if (avgLen > 0 && avgLen < 20) score += 3;
@@ -4836,7 +5369,7 @@ function scoreExtractionFriction(html) {
4836
5369
  if (/display\s*:\s*none|visibility\s*:\s*hidden/i.test(html) && /<(div|section|p)[^>]*(?:style=["'][^"']*display\s*:\s*none|hidden)[^>]*>/i.test(html)) {
4837
5370
  score = Math.max(0, score - 2);
4838
5371
  }
4839
- return cap(score, 10);
5372
+ return cap2(score, 10);
4840
5373
  }
4841
5374
  function scoreImageContextAI(html) {
4842
5375
  let score = 0;
@@ -4861,7 +5394,7 @@ function scoreImageContextAI(html) {
4861
5394
  else if (goodAltCount > 0) score += 1;
4862
5395
  const contextualImages = html.match(/<(article|section)[^>]*>[\s\S]*?<img[^>]*>[\s\S]*?<\/\1>/gi) || [];
4863
5396
  if (contextualImages.length > 0) score += 3;
4864
- return cap(score, 10);
5397
+ return cap2(score, 10);
4865
5398
  }
4866
5399
  function scoreDuplicateContent(html) {
4867
5400
  return scoreDuplicateContentDetailed(html).score;
@@ -4923,8 +5456,12 @@ var SCORING_FUNCTIONS = {
4923
5456
  citation_ready_writing: scoreCitationReadyWriting,
4924
5457
  answer_first_placement: scoreAnswerFirstPlacement,
4925
5458
  evidence_packaging: scoreEvidencePackaging,
5459
+ helpful_purpose_alignment: scoreHelpfulPurposeAlignment,
5460
+ first_hand_experience_signals: scoreFirstHandExperienceSignals,
4926
5461
  entity_disambiguation: scoreEntityDisambiguation,
4927
5462
  extraction_friction: scoreExtractionFriction,
5463
+ creator_transparency: scoreCreatorTransparency,
5464
+ methodology_transparency: scoreMethodologyTransparency,
4928
5465
  image_context_ai: scoreImageContextAI,
4929
5466
  duplicate_content: scoreDuplicateContent
4930
5467
  };
@@ -4969,7 +5506,7 @@ function extractTitle(html) {
4969
5506
  const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
4970
5507
  return match ? match[1].replace(/\s+/g, " ").trim() : "";
4971
5508
  }
4972
- function getTextContent2(html) {
5509
+ function getTextContent3(html) {
4973
5510
  return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
4974
5511
  }
4975
5512
  function countWords2(text) {
@@ -5029,9 +5566,9 @@ function checkMissingOgTags(html) {
5029
5566
  }
5030
5567
  return null;
5031
5568
  }
5032
- function checkThinContent(wordCount) {
5033
- if (wordCount < 300) {
5034
- return { check: "thin-content", label: `Thin content (${wordCount} words)`, severity: "warning" };
5569
+ function checkThinContent(wordCount2) {
5570
+ if (wordCount2 < 300) {
5571
+ return { check: "thin-content", label: `Thin content (${wordCount2} words)`, severity: "warning" };
5035
5572
  }
5036
5573
  return null;
5037
5574
  }
@@ -5128,15 +5665,15 @@ function checkNoAnswerBlock(html) {
5128
5665
  const first300Words = bodyText.split(/\s+/).slice(0, 300).join(" ");
5129
5666
  for (const p of earlyParagraphs) {
5130
5667
  const pText = p.replace(/<[^>]*>/g, "").trim();
5131
- const wordCount = pText.split(/\s+/).length;
5132
- if (wordCount >= 40 && wordCount <= 80 && first300Words.includes(pText.slice(0, 50))) {
5668
+ const wordCount2 = pText.split(/\s+/).length;
5669
+ if (wordCount2 >= 40 && wordCount2 <= 80 && first300Words.includes(pText.slice(0, 50))) {
5133
5670
  return null;
5134
5671
  }
5135
5672
  }
5136
5673
  return { check: "no-answer-block", label: "No short answer block (40-80 words) in first 300 words", severity: "warning" };
5137
5674
  }
5138
5675
  function checkNoEvidence(html, url) {
5139
- const text = getTextContent2(html);
5676
+ const text = getTextContent3(html);
5140
5677
  const paragraphs = html.match(/<p[^>]*>[\s\S]*?<\/p>/gi) || [];
5141
5678
  let inlineCitations = 0;
5142
5679
  for (const p of paragraphs) {
@@ -5150,7 +5687,7 @@ function checkNoEvidence(html, url) {
5150
5687
  return null;
5151
5688
  }
5152
5689
  function checkHasCitationReadyContent(html) {
5153
- const text = getTextContent2(html);
5690
+ const text = getTextContent3(html);
5154
5691
  const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 10);
5155
5692
  let quotable = 0;
5156
5693
  for (const s of sentences) {
@@ -5175,8 +5712,8 @@ function checkDuplicateContentBlocks(html) {
5175
5712
  }
5176
5713
  function analyzePage(html, url, category) {
5177
5714
  const title = extractTitle(html);
5178
- const textContent = getTextContent2(html);
5179
- const wordCount = countWords2(textContent);
5715
+ const textContent = getTextContent3(html);
5716
+ const wordCount2 = countWords2(textContent);
5180
5717
  const issues = [];
5181
5718
  const strengths = [];
5182
5719
  const issueChecks = [
@@ -5187,7 +5724,7 @@ function analyzePage(html, url, category) {
5187
5724
  checkNoSchema(html),
5188
5725
  checkMissingCanonical(html),
5189
5726
  checkMissingOgTags(html),
5190
- checkThinContent(wordCount),
5727
+ checkThinContent(wordCount2),
5191
5728
  checkImagesMissingAlt(html),
5192
5729
  checkNoInternalLinks(html, url),
5193
5730
  checkNoAnswerBlock(html),
@@ -5206,7 +5743,7 @@ function analyzePage(html, url, category) {
5206
5743
  if (result) strengths.push(result);
5207
5744
  }
5208
5745
  const { aeoScore, criterionScores } = scorePage(html, url);
5209
- return { url, title, category, wordCount, issues, strengths, aeoScore, criterionScores };
5746
+ return { url, title, category, wordCount: wordCount2, issues, strengths, aeoScore, criterionScores };
5210
5747
  }
5211
5748
  function analyzeAllPages(siteData) {
5212
5749
  const reviews = [];
@@ -5224,10 +5761,15 @@ function analyzeAllPages(siteData) {
5224
5761
  }
5225
5762
 
5226
5763
  // src/audit.ts
5764
+ init_network_guard();
5227
5765
  function getTextLength(html) {
5228
5766
  return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim().length;
5229
5767
  }
5230
5768
  async function audit(domain, options) {
5769
+ const normalizedTarget = domain.startsWith("http") ? domain : `https://${domain}`;
5770
+ if (!await isSafeFetchTarget(normalizedTarget)) {
5771
+ throw new Error(`Refusing to audit private or local address: ${domain}`);
5772
+ }
5231
5773
  const startTime = Date.now();
5232
5774
  let renderedWithHeadless = false;
5233
5775
  const siteData = await prefetchSiteData(domain);
@@ -5338,7 +5880,7 @@ function extractTitle2(html) {
5338
5880
  const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
5339
5881
  return match ? match[1].replace(/\s+/g, " ").trim() : "";
5340
5882
  }
5341
- function getTextContent3(html) {
5883
+ function getTextContent4(html) {
5342
5884
  return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
5343
5885
  }
5344
5886
  function countWords3(text) {
@@ -5488,12 +6030,12 @@ function buildLinkGraph(pages, domain, homepageUrl) {
5488
6030
  const norm = normalizeUrl2(url);
5489
6031
  if (nodes.has(norm)) continue;
5490
6032
  const title = extractTitle2(page.text);
5491
- const text = getTextContent3(page.text);
5492
- const wordCount = countWords3(text);
6033
+ const text = getTextContent4(page.text);
6034
+ const wordCount2 = countWords3(text);
5493
6035
  nodes.set(norm, {
5494
6036
  url: norm,
5495
6037
  title,
5496
- wordCount,
6038
+ wordCount: wordCount2,
5497
6039
  category: page.category || "content",
5498
6040
  inDegree: 0,
5499
6041
  outDegree: 0,
@@ -5561,6 +6103,8 @@ var CRITERION_WEIGHTS2 = {
5561
6103
  qa_content_format: 0.04,
5562
6104
  query_answer_alignment: 0.04,
5563
6105
  faq_section: 0.03,
6106
+ helpful_purpose_alignment: 0.03,
6107
+ first_hand_experience_signals: 0.03,
5564
6108
  // Content Organization (~30%)
5565
6109
  entity_consistency: 0.05,
5566
6110
  internal_linking: 0.04,
@@ -5568,30 +6112,32 @@ var CRITERION_WEIGHTS2 = {
5568
6112
  schema_markup: 0.03,
5569
6113
  author_schema_depth: 0.03,
5570
6114
  table_list_extractability: 0.03,
5571
- definition_patterns: 0.02,
5572
- visible_date_signal: 0.02,
6115
+ creator_transparency: 0.02,
6116
+ methodology_transparency: 0.02,
6117
+ definition_patterns: 0.015,
6118
+ visible_date_signal: 0.015,
5573
6119
  semantic_html: 0.02,
5574
6120
  clean_html: 0.02,
5575
6121
  // Technical Plumbing (~15%)
5576
6122
  content_cannibalization: 0.02,
5577
6123
  duplicate_content: 0.05,
5578
6124
  cross_page_duplication: 0.03,
5579
- llms_txt: 0.02,
5580
- robots_txt: 0.02,
6125
+ llms_txt: 0.01,
6126
+ robots_txt: 0.01,
5581
6127
  content_velocity: 0.02,
5582
- content_licensing: 0.02,
6128
+ content_licensing: 0.01,
5583
6129
  sitemap_completeness: 0.01,
5584
- canonical_url: 0.01,
5585
- rss_feed: 0.01,
5586
- schema_coverage: 0.01,
5587
- speakable_schema: 0.01,
6130
+ canonical_url: 5e-3,
6131
+ rss_feed: 5e-3,
6132
+ schema_coverage: 5e-3,
6133
+ speakable_schema: 5e-3,
5588
6134
  // V2 Criteria (~15%)
5589
6135
  citation_ready_writing: 0.04,
5590
6136
  answer_first_placement: 0.03,
5591
6137
  evidence_packaging: 0.03,
5592
6138
  entity_disambiguation: 0.02,
5593
6139
  extraction_friction: 0.02,
5594
- image_context_ai: 0.01
6140
+ image_context_ai: 5e-3
5595
6141
  };
5596
6142
  var PHASE_CONFIG = [
5597
6143
  {
@@ -5621,6 +6167,8 @@ var PHASE_CONFIG = [
5621
6167
  "answer_first_placement",
5622
6168
  "evidence_packaging",
5623
6169
  "entity_disambiguation",
6170
+ "helpful_purpose_alignment",
6171
+ "first_hand_experience_signals",
5624
6172
  "duplicate_content",
5625
6173
  "cross_page_duplication"
5626
6174
  ]
@@ -5634,6 +6182,8 @@ var PHASE_CONFIG = [
5634
6182
  "schema_coverage",
5635
6183
  "speakable_schema",
5636
6184
  "author_schema_depth",
6185
+ "creator_transparency",
6186
+ "methodology_transparency",
5637
6187
  "content_licensing",
5638
6188
  "entity_consistency",
5639
6189
  "semantic_html",
@@ -5656,7 +6206,7 @@ function impactFromScore(score) {
5656
6206
  }
5657
6207
  function effortForCriterion(criterion, score) {
5658
6208
  const trivialCriteria = ["llms_txt", "robots_txt", "canonical_url", "content_licensing", "visible_date_signal"];
5659
- const lowCriteria = ["rss_feed", "sitemap_completeness", "speakable_schema", "author_schema_depth", "semantic_html", "definition_patterns", "content_freshness"];
6209
+ const lowCriteria = ["rss_feed", "sitemap_completeness", "speakable_schema", "author_schema_depth", "creator_transparency", "methodology_transparency", "semantic_html", "definition_patterns", "content_freshness"];
5660
6210
  const highCriteria = ["original_data", "content_velocity", "content_cannibalization"];
5661
6211
  if (trivialCriteria.includes(criterion)) return score <= 3 ? "low" : "trivial";
5662
6212
  if (lowCriteria.includes(criterion)) return score <= 3 ? "medium" : "low";
@@ -5923,6 +6473,58 @@ Sitemap: https://example.com/sitemap.xml`,
5923
6473
  pageCount: affected?.length
5924
6474
  }];
5925
6475
  },
6476
+ helpful_purpose_alignment: (c, pages) => {
6477
+ if (c.score >= 10) return [];
6478
+ const impact = impactFromScore(c.score);
6479
+ const effort = effortForCriterion("helpful_purpose_alignment", c.score);
6480
+ const affected = getAffectedPages("helpful_purpose_alignment", pages);
6481
+ return [{
6482
+ id: "fix-helpful-purpose-alignment",
6483
+ criterion: c.criterion_label,
6484
+ criterionId: c.criterion,
6485
+ title: "Make pages solve the user task faster",
6486
+ description: "Reduce search-first filler and rewrite pages so the promised task is resolved quickly with concrete guidance, tradeoffs, and next steps.",
6487
+ impact,
6488
+ effort,
6489
+ impactScore: 0,
6490
+ category: "content",
6491
+ steps: [
6492
+ "Rewrite first paragraphs to answer the user need within the first 150-300 words",
6493
+ 'Remove generic intros like "In this guide" and broad filler that could fit any topic',
6494
+ "Add concrete decision help: tradeoffs, risks, constraints, and next steps",
6495
+ "Move aggressive CTAs below the first useful answer block"
6496
+ ],
6497
+ successCriteria: "Pages lead with task-solving guidance instead of generic search-first framing",
6498
+ affectedPages: affected,
6499
+ pageCount: affected?.length
6500
+ }];
6501
+ },
6502
+ first_hand_experience_signals: (c, pages) => {
6503
+ if (c.score >= 10) return [];
6504
+ const impact = impactFromScore(c.score);
6505
+ const effort = effortForCriterion("first_hand_experience_signals", c.score);
6506
+ const affected = getAffectedPages("first_hand_experience_signals", pages);
6507
+ return [{
6508
+ id: "fix-first-hand-experience",
6509
+ criterion: c.criterion_label,
6510
+ criterionId: c.criterion,
6511
+ title: "Add first-hand experience signals",
6512
+ description: "Show real use, testing, implementation, or lived experience instead of relying on generic summary content.",
6513
+ impact,
6514
+ effort,
6515
+ impactScore: 0,
6516
+ category: "content",
6517
+ steps: [
6518
+ "Add specific observations from real use, testing, or implementation",
6519
+ "Document limitations, edge cases, or lessons learned in practice",
6520
+ "Include screenshots, photos, before/after metrics, or original artifacts where relevant",
6521
+ "Rewrite generic sections to reflect direct experience with the subject matter"
6522
+ ],
6523
+ successCriteria: "Key pages contain credible signs of direct use or observation, not just generic advice",
6524
+ affectedPages: affected,
6525
+ pageCount: affected?.length
6526
+ }];
6527
+ },
5926
6528
  original_data: (c, pages) => {
5927
6529
  if (c.score >= 10) return [];
5928
6530
  const impact = impactFromScore(c.score);
@@ -6289,6 +6891,58 @@ Summarization: yes`,
6289
6891
  successCriteria: "Articles have Person schema for authors with credentials"
6290
6892
  }];
6291
6893
  },
6894
+ creator_transparency: (c, pages) => {
6895
+ if (c.score >= 10) return [];
6896
+ const impact = impactFromScore(c.score);
6897
+ const effort = effortForCriterion("creator_transparency", c.score);
6898
+ const affected = getAffectedPages("creator_transparency", pages);
6899
+ return [{
6900
+ id: "fix-creator-transparency",
6901
+ criterion: c.criterion_label,
6902
+ criterionId: c.criterion,
6903
+ title: "Make content creators clearly visible",
6904
+ description: "Add visible bylines, author pages, and reviewer/editor attribution so readers can clearly tell who created the content.",
6905
+ impact,
6906
+ effort,
6907
+ impactScore: 0,
6908
+ category: "trust",
6909
+ steps: [
6910
+ "Add visible bylines to article-like pages where readers expect them",
6911
+ "Link author names to author pages with role, expertise area, and relevant background",
6912
+ "Add reviewer or editor attribution on sensitive or expert content",
6913
+ "Keep visible creator identity consistent with schema markup"
6914
+ ],
6915
+ successCriteria: "Article-like pages have clear visible bylines and linked creator context",
6916
+ affectedPages: affected,
6917
+ pageCount: affected?.length
6918
+ }];
6919
+ },
6920
+ methodology_transparency: (c, pages) => {
6921
+ if (c.score >= 10) return [];
6922
+ const impact = impactFromScore(c.score);
6923
+ const effort = effortForCriterion("methodology_transparency", c.score);
6924
+ const affected = getAffectedPages("methodology_transparency", pages);
6925
+ return [{
6926
+ id: "fix-methodology-transparency",
6927
+ criterion: c.criterion_label,
6928
+ criterionId: c.criterion,
6929
+ title: "Explain how content was tested or reviewed",
6930
+ description: "Add methodology, criteria, testing, review, or update-process details where users would expect them.",
6931
+ impact,
6932
+ effort,
6933
+ impactScore: 0,
6934
+ category: "trust",
6935
+ steps: [
6936
+ 'Add a "How we tested", "Methodology", or review-process section where relevant',
6937
+ "Document criteria, tools used, sample size, timeframe, or update policy",
6938
+ "Disclose AI assistance when a reasonable reader would expect that context",
6939
+ "Support methodology notes with screenshots, tables, or process artifacts when possible"
6940
+ ],
6941
+ successCriteria: "Review, comparison, and research-style pages explain how conclusions were produced",
6942
+ affectedPages: affected,
6943
+ pageCount: affected?.length
6944
+ }];
6945
+ },
6292
6946
  fact_density: (c, pages) => {
6293
6947
  if (c.score >= 10) return [];
6294
6948
  const impact = impactFromScore(c.score);