aeorank 3.1.1 → 3.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +63 -43
- package/dist/browser.d.ts +4 -4
- package/dist/browser.js +650 -157
- package/dist/browser.js.map +1 -1
- package/dist/{chunk-RYV25AUV.js → chunk-DW7MPQ4X.js} +188 -30
- package/dist/chunk-DW7MPQ4X.js.map +1 -0
- package/dist/chunk-PYV5JVTC.js +179 -0
- package/dist/chunk-PYV5JVTC.js.map +1 -0
- package/dist/cli.js +519 -140
- package/dist/cli.js.map +1 -1
- package/dist/{full-site-crawler-TQ35TB2X.js → full-site-crawler-HAF2X2X3.js} +2 -2
- package/dist/{full-site-crawler-OBECS7AT.js → full-site-crawler-W3WSE6WT.js} +18 -30
- package/dist/full-site-crawler-W3WSE6WT.js.map +1 -0
- package/dist/index.cjs +837 -183
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +4 -4
- package/dist/index.d.ts +4 -4
- package/dist/index.js +650 -157
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
- package/dist/chunk-RYV25AUV.js.map +0 -1
- package/dist/full-site-crawler-OBECS7AT.js.map +0 -1
- /package/dist/{full-site-crawler-TQ35TB2X.js.map → full-site-crawler-HAF2X2X3.js.map} +0 -0
package/dist/browser.js
CHANGED
|
@@ -2,8 +2,12 @@ import {
|
|
|
2
2
|
crawlFullSite,
|
|
3
3
|
extractAllUrlsFromSitemap,
|
|
4
4
|
extractInternalLinks,
|
|
5
|
-
inferCategory
|
|
6
|
-
|
|
5
|
+
inferCategory,
|
|
6
|
+
isSafeFetchTarget,
|
|
7
|
+
isSafePublicUrl,
|
|
8
|
+
normalizeHostname,
|
|
9
|
+
safeFetch
|
|
10
|
+
} from "./chunk-DW7MPQ4X.js";
|
|
7
11
|
|
|
8
12
|
// src/parked-domain.ts
|
|
9
13
|
var PARKING_PATHS = ["/lander", "/parking", "/park", "/sedoparking"];
|
|
@@ -147,19 +151,181 @@ function shingleJaccardSimilarity(a, b) {
|
|
|
147
151
|
return union === 0 ? 0 : intersection / union;
|
|
148
152
|
}
|
|
149
153
|
|
|
154
|
+
// src/helpful-content.ts
|
|
155
|
+
function cap(value, max) {
|
|
156
|
+
return Math.min(max, value);
|
|
157
|
+
}
|
|
158
|
+
function floor(value, min) {
|
|
159
|
+
return Math.max(min, value);
|
|
160
|
+
}
|
|
161
|
+
function countMatches(text, pattern) {
|
|
162
|
+
return text.match(pattern)?.length ?? 0;
|
|
163
|
+
}
|
|
164
|
+
function stripScriptsAndStyles(html) {
|
|
165
|
+
return html.replace(/<script[\s\S]*?<\/script>/gi, " ").replace(/<style[\s\S]*?<\/style>/gi, " ");
|
|
166
|
+
}
|
|
167
|
+
function getTextContent(html) {
|
|
168
|
+
return stripScriptsAndStyles(html).replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
|
|
169
|
+
}
|
|
170
|
+
function getBodyHtml(html) {
|
|
171
|
+
const bodyMatch = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
|
|
172
|
+
return bodyMatch ? bodyMatch[1] : html;
|
|
173
|
+
}
|
|
174
|
+
function getFirstParagraphText(html) {
|
|
175
|
+
const firstPara = getBodyHtml(html).match(/<p[^>]*>([\s\S]*?)<\/p>/i);
|
|
176
|
+
return firstPara ? firstPara[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() : "";
|
|
177
|
+
}
|
|
178
|
+
function firstNWords(text, count) {
|
|
179
|
+
return text.split(/\s+/).slice(0, count).join(" ");
|
|
180
|
+
}
|
|
181
|
+
function getH1Text(html) {
|
|
182
|
+
const match = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
|
|
183
|
+
return match ? match[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() : "";
|
|
184
|
+
}
|
|
185
|
+
function getTitleText(html) {
|
|
186
|
+
const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
187
|
+
return match ? match[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() : "";
|
|
188
|
+
}
|
|
189
|
+
function wordCount(text) {
|
|
190
|
+
return text ? text.split(/\s+/).filter(Boolean).length : 0;
|
|
191
|
+
}
|
|
192
|
+
function isContentLikePage(html, url) {
|
|
193
|
+
const text = getTextContent(html);
|
|
194
|
+
const wc = wordCount(text);
|
|
195
|
+
let signals = 0;
|
|
196
|
+
if (url && /\/(?:blog|article|articles|guide|guides|docs|learn|help|news|insights|resources|how-to|tutorial|case-stud|whitepaper|faq)\b/i.test(url)) {
|
|
197
|
+
signals += 2;
|
|
198
|
+
}
|
|
199
|
+
if (/<article[\s>]/i.test(html)) signals += 1;
|
|
200
|
+
if ((html.match(/<h[2-3][^>]*>/gi) || []).length >= 2) signals += 1;
|
|
201
|
+
if (wc >= 500) signals += 1;
|
|
202
|
+
if (/<time[\s>]/i.test(html) || /datePublished|dateModified/i.test(html)) signals += 1;
|
|
203
|
+
if (/written\s+by|authored?\s+by|reviewed\s+by|medically\s+reviewed/i.test(text)) signals += 1;
|
|
204
|
+
return signals >= 2;
|
|
205
|
+
}
|
|
206
|
+
function expectsMethodology(html, url) {
|
|
207
|
+
const text = getTextContent(html);
|
|
208
|
+
const title = `${getTitleText(html)} ${getH1Text(html)}`.toLowerCase();
|
|
209
|
+
const urlText = (url || "").toLowerCase();
|
|
210
|
+
if (/(?:review|compare|comparison|vs\.?|best|benchmark|study|analysis|survey|report|research|tested|test|methodology)/i.test(title)) {
|
|
211
|
+
return true;
|
|
212
|
+
}
|
|
213
|
+
if (/(?:review|compare|comparison|benchmark|study|analysis|survey|research|report|best)/i.test(urlText)) {
|
|
214
|
+
return true;
|
|
215
|
+
}
|
|
216
|
+
return /\b(methodology|how we tested|how we reviewed|our testing|sample size|dataset|benchmark|editorial policy|review process)\b/i.test(text);
|
|
217
|
+
}
|
|
218
|
+
function titleAndBodyAlign(html) {
|
|
219
|
+
const h1 = getH1Text(html);
|
|
220
|
+
const title = getTitleText(html);
|
|
221
|
+
const text = firstNWords(getTextContent(html), 250).toLowerCase();
|
|
222
|
+
const topic = `${title} ${h1}`.toLowerCase();
|
|
223
|
+
const keywords = topic.split(/[\s|:()\-/]+/).filter((w) => w.length >= 5 && !/^(about|guide|complete|ultimate|best|learn|understand|what|when|where|which|their|there|these|those)$/i.test(w));
|
|
224
|
+
const uniqueKeywords = [...new Set(keywords)];
|
|
225
|
+
if (uniqueKeywords.length === 0) return false;
|
|
226
|
+
return uniqueKeywords.filter((w) => text.includes(w)).length >= Math.min(2, uniqueKeywords.length);
|
|
227
|
+
}
|
|
228
|
+
var GENERIC_OPENERS = /^(?:in today'?s|it is important to understand|in this (?:article|guide|post)|whether you are|have you ever|welcome to|if you'?re looking|in the modern|in the digital age)/i;
|
|
229
|
+
var PRACTICAL_LANGUAGE = /\b(?:here'?s what to do|choose\s+\w+\s+if|avoid\s+\w+\s+when|the main risk is|the fastest option is|next step(?:s)?|best option|lowest risk|good fit if|not a good fit if|what to do next)\b/gi;
|
|
230
|
+
var TRADEOFF_LANGUAGE = /\b(?:however|but|trade-?off|caveat|limitation|downside|upside|risk|benefit|pros?\b|cons?\b|watch out|be careful|unless)\b/gi;
|
|
231
|
+
var FLUFF_LANGUAGE = /\b(?:comprehensive guide|ever-evolving|fast-paced world|unlock(?:ing)? the power|in conclusion|delve into|navigate the landscape|journey|seamless|robust solution)\b/gi;
|
|
232
|
+
var EARLY_CTA_PATTERN = /\b(?:sign up|get started|book (?:a )?demo|contact us|schedule (?:a )?call|buy now|start free|request a quote|talk to sales|subscribe)\b/gi;
|
|
233
|
+
var FIRST_HAND_ACTIONS = /\b(?:we|i)\s+(?:tested|used|reviewed|implemented|measured|compared|observed|deployed|migrated|surveyed|analyzed|audited|interviewed)\b/gi;
|
|
234
|
+
var EXPERIENCE_CONTEXT = /\b(?:in our|during our|for our|in practice|in production|in the field|in our clinic|during implementation|over \d+\s+(?:days?|weeks?|months?)|across \d+\s+(?:accounts?|customers?|patients?|sites?|campaigns?)|with \d+\s+(?:participants?|users?|patients?|samples?))\b/gi;
|
|
235
|
+
var EXPERIENCE_ARTIFACTS = /\b(?:screenshot|photo|benchmark|timeline|before\/after|our results|original chart|field note|walkthrough)\b/gi;
|
|
236
|
+
var LIMITATION_LANGUAGE = /\b(?:limitation|limitations|we found that|we learned|one caveat|did not work|failed|issue we hit|edge case|unexpectedly|drawback)\b/gi;
|
|
237
|
+
var AUTHOR_LINK_PATTERN = /<a[^>]*href=["'][^"']*\/(?:author|authors|team|people|staff|reviewers?)\b[^"']*["'][^>]*>/i;
|
|
238
|
+
var BYLINE_PATTERN = /\b(?:written|authored|reviewed|edited|medically reviewed)\s+by\b/i;
|
|
239
|
+
var AUTHOR_BIO_PATTERN = /\b(?:about the author|author bio|reviewed by|medically reviewed by|board-certified|licensed|credentials?|specializes in|specialist in)\b/i;
|
|
240
|
+
var METHODOLOGY_TERMS = /\b(?:methodology|how we tested|how we reviewed|our methodology|testing process|review process|editorial policy|sample size|data collection|timeframe|criteria used|tools used|benchmark method|updated using|last reviewed|ai-assisted|ai assisted)\b/gi;
|
|
241
|
+
var METHODOLOGY_DETAIL = /\b(?:sample size|participants?|dataset|timeframe|criteria|weights?|tool(?:s)? used|measured over|tested over|reviewed on|last updated|last reviewed|scoring method)\b/gi;
|
|
242
|
+
function scoreHelpfulPurposeAlignment(html, url) {
|
|
243
|
+
const text = getTextContent(html);
|
|
244
|
+
if (!text) return 0;
|
|
245
|
+
const contentLike = isContentLikePage(html, url);
|
|
246
|
+
if (!contentLike && wordCount(text) < 250) return 5;
|
|
247
|
+
let score = contentLike ? 3 : 5;
|
|
248
|
+
const firstPara = getFirstParagraphText(html);
|
|
249
|
+
const earlyText = firstNWords(text, 300);
|
|
250
|
+
const bodyHtml = getBodyHtml(html);
|
|
251
|
+
if (firstPara && !GENERIC_OPENERS.test(firstPara)) score += 2;
|
|
252
|
+
if (countMatches(earlyText, PRACTICAL_LANGUAGE) >= 1) score += 2;
|
|
253
|
+
else if (countMatches(text, PRACTICAL_LANGUAGE) >= 2) score += 1;
|
|
254
|
+
const tradeoffCount = countMatches(text, TRADEOFF_LANGUAGE);
|
|
255
|
+
if (tradeoffCount >= 2) score += 2;
|
|
256
|
+
else if (tradeoffCount >= 1) score += 1;
|
|
257
|
+
if (titleAndBodyAlign(html)) score += 1;
|
|
258
|
+
if (/\b(?:bottom line|key takeaway|here'?s the short answer|next steps?)\b/i.test(text)) score += 1;
|
|
259
|
+
if (firstPara && GENERIC_OPENERS.test(firstPara)) score -= 2;
|
|
260
|
+
const earlyBodyHtml = bodyHtml.slice(0, 1800);
|
|
261
|
+
const earlyCtas = countMatches(earlyBodyHtml, EARLY_CTA_PATTERN);
|
|
262
|
+
if (earlyCtas >= 3) score -= 2;
|
|
263
|
+
else if (earlyCtas >= 2) score -= 1;
|
|
264
|
+
const fluffCount = countMatches(text, FLUFF_LANGUAGE);
|
|
265
|
+
if (fluffCount >= 3) score -= 2;
|
|
266
|
+
else if (fluffCount >= 1) score -= 1;
|
|
267
|
+
return floor(cap(score, 10), 0);
|
|
268
|
+
}
|
|
269
|
+
function scoreFirstHandExperienceSignals(html, url) {
|
|
270
|
+
const text = getTextContent(html);
|
|
271
|
+
if (!text) return 0;
|
|
272
|
+
const contentLike = isContentLikePage(html, url);
|
|
273
|
+
let score = contentLike ? 2 : 5;
|
|
274
|
+
const actionCount = countMatches(text, FIRST_HAND_ACTIONS);
|
|
275
|
+
if (actionCount >= 3) score += 4;
|
|
276
|
+
else if (actionCount >= 1) score += 2;
|
|
277
|
+
const contextCount = countMatches(text, EXPERIENCE_CONTEXT);
|
|
278
|
+
if (contextCount >= 2) score += 2;
|
|
279
|
+
else if (contextCount >= 1) score += 1;
|
|
280
|
+
const artifactCount = countMatches(text, EXPERIENCE_ARTIFACTS) + countMatches(html, /<figure|<figcaption/gi);
|
|
281
|
+
if (artifactCount >= 3) score += 2;
|
|
282
|
+
else if (artifactCount >= 1) score += 1;
|
|
283
|
+
const limitationCount = countMatches(text, LIMITATION_LANGUAGE);
|
|
284
|
+
if (limitationCount >= 2) score += 2;
|
|
285
|
+
else if (limitationCount >= 1) score += 1;
|
|
286
|
+
if (/\b(?:manufacturer|vendor)\s+(?:description|specification|copy)\b/i.test(text)) score -= 1;
|
|
287
|
+
return floor(cap(score, 10), 0);
|
|
288
|
+
}
|
|
289
|
+
function scoreCreatorTransparency(html, url) {
|
|
290
|
+
const text = getTextContent(html);
|
|
291
|
+
if (!text) return 0;
|
|
292
|
+
const contentLike = isContentLikePage(html, url);
|
|
293
|
+
if (!contentLike) return 5;
|
|
294
|
+
let score = 0;
|
|
295
|
+
const hasByline = BYLINE_PATTERN.test(text) || /class=["'][^"']*author[^"']*["']/i.test(html) || /rel=["']author["']/i.test(html);
|
|
296
|
+
const hasPersonSchema = /"@type"\s*:\s*"Person"/i.test(html);
|
|
297
|
+
if (hasByline) score += 3;
|
|
298
|
+
if (AUTHOR_LINK_PATTERN.test(html)) score += 2;
|
|
299
|
+
if (AUTHOR_BIO_PATTERN.test(text)) score += 2;
|
|
300
|
+
if (/\b(?:reviewed by|edited by|medically reviewed by)\b/i.test(text)) score += 1;
|
|
301
|
+
if (hasPersonSchema) score += 2;
|
|
302
|
+
return floor(cap(score, 10), 0);
|
|
303
|
+
}
|
|
304
|
+
function scoreMethodologyTransparency(html, url) {
|
|
305
|
+
const text = getTextContent(html);
|
|
306
|
+
if (!text) return 0;
|
|
307
|
+
const contentLike = isContentLikePage(html, url);
|
|
308
|
+
const expected = expectsMethodology(html, url);
|
|
309
|
+
let score = expected ? 2 : contentLike ? 5 : 5;
|
|
310
|
+
const methodologyCount = countMatches(text, METHODOLOGY_TERMS);
|
|
311
|
+
if (methodologyCount >= 2) score += 3;
|
|
312
|
+
else if (methodologyCount >= 1) score += 2;
|
|
313
|
+
const detailCount = countMatches(text, METHODOLOGY_DETAIL);
|
|
314
|
+
if (detailCount >= 3) score += 3;
|
|
315
|
+
else if (detailCount >= 2) score += 2;
|
|
316
|
+
else if (detailCount >= 1) score += 1;
|
|
317
|
+
if (/\b(?:tested|reviewed|analyzed)\s+\d+|\bacross\s+\d+|\bover\s+\d+\s+(?:days?|weeks?|months?)|\busing\s+\d+\s+\w+/i.test(text)) score += 1;
|
|
318
|
+
if (/<figure|<table/i.test(html) && methodologyCount >= 1) score += 1;
|
|
319
|
+
if (/\b(?:ai-assisted|ai assisted|reviewed by an editor|human reviewed)\b/i.test(text)) score += 1;
|
|
320
|
+
return floor(cap(score, 10), 0);
|
|
321
|
+
}
|
|
322
|
+
|
|
150
323
|
// src/site-crawler.ts
|
|
151
|
-
async function fetchText(url) {
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
redirect: "follow"
|
|
157
|
-
});
|
|
158
|
-
const text = await res.text();
|
|
159
|
-
return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
|
|
160
|
-
} catch {
|
|
161
|
-
return null;
|
|
162
|
-
}
|
|
324
|
+
async function fetchText(url, expectedDomain) {
|
|
325
|
+
const res = await safeFetch(url, { timeoutMs: 15e3, expectedDomain });
|
|
326
|
+
if (!res) return null;
|
|
327
|
+
const text = await res.text();
|
|
328
|
+
return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
|
|
163
329
|
}
|
|
164
330
|
function extractDomain(url) {
|
|
165
331
|
return url.replace(/^https?:\/\//, "").replace(/\/.*/, "").replace(/:[0-9]+$/, "").replace(/^www\./, "").toLowerCase();
|
|
@@ -200,13 +366,16 @@ function isHtmlResponse(result) {
|
|
|
200
366
|
return trimmed.startsWith("<!doctype html") || trimmed.startsWith("<html") || /<head[\s>]/i.test(trimmed);
|
|
201
367
|
}
|
|
202
368
|
async function prefetchSiteData(domain) {
|
|
369
|
+
if (!await isSafeFetchTarget(`https://${domain}`)) {
|
|
370
|
+
return { domain, protocol: null, homepage: null, llmsTxt: null, robotsTxt: null, faqPage: null, sitemapXml: null, rssFeed: null, aiTxt: null, redirectedTo: null, parkedReason: null, blogSample: [] };
|
|
371
|
+
}
|
|
203
372
|
let protocol = null;
|
|
204
373
|
let homepage = null;
|
|
205
|
-
homepage = await fetchText(`https://${domain}
|
|
374
|
+
homepage = await fetchText(`https://${domain}`, domain);
|
|
206
375
|
if (homepage && homepage.status >= 200 && homepage.status < 400) {
|
|
207
376
|
protocol = "https";
|
|
208
377
|
} else {
|
|
209
|
-
homepage = await fetchText(`http://${domain}
|
|
378
|
+
homepage = await fetchText(`http://${domain}`, domain);
|
|
210
379
|
if (homepage && homepage.status >= 200 && homepage.status < 400) {
|
|
211
380
|
protocol = "http";
|
|
212
381
|
}
|
|
@@ -226,38 +395,38 @@ async function prefetchSiteData(domain) {
|
|
|
226
395
|
}
|
|
227
396
|
const baseUrl = `${protocol}://${domain}`;
|
|
228
397
|
const [llmsTxt, robotsTxt, faqPage, sitemapXml, aiTxt] = await Promise.all([
|
|
229
|
-
fetchText(`${baseUrl}/llms.txt
|
|
230
|
-
fetchText(`${baseUrl}/robots.txt
|
|
231
|
-
fetchText(`${baseUrl}/faq
|
|
398
|
+
fetchText(`${baseUrl}/llms.txt`, domain),
|
|
399
|
+
fetchText(`${baseUrl}/robots.txt`, domain),
|
|
400
|
+
fetchText(`${baseUrl}/faq`, domain).then(async (result) => {
|
|
232
401
|
if (result && result.status === 200) return result;
|
|
233
402
|
for (const path of ["/frequently-asked-questions", "/help", "/support", "/help-center"]) {
|
|
234
|
-
const fallback = await fetchText(`${baseUrl}${path}
|
|
403
|
+
const fallback = await fetchText(`${baseUrl}${path}`, domain);
|
|
235
404
|
if (fallback && fallback.status === 200) return fallback;
|
|
236
405
|
}
|
|
237
406
|
return result;
|
|
238
407
|
}),
|
|
239
|
-
fetchText(`${baseUrl}/sitemap.xml
|
|
240
|
-
fetchText(`${baseUrl}/ai.txt
|
|
408
|
+
fetchText(`${baseUrl}/sitemap.xml`, domain),
|
|
409
|
+
fetchText(`${baseUrl}/ai.txt`, domain)
|
|
241
410
|
]);
|
|
242
411
|
let rssFeed = null;
|
|
243
412
|
if (homepage) {
|
|
244
413
|
const rssLinkMatch = homepage.text.match(/<link[^>]*type="application\/(?:rss|atom)\+xml"[^>]*href="([^"]*)"[^>]*>/i);
|
|
245
414
|
if (rssLinkMatch) {
|
|
246
415
|
const rssUrl = rssLinkMatch[1].startsWith("http") ? rssLinkMatch[1] : `${baseUrl}${rssLinkMatch[1]}`;
|
|
247
|
-
rssFeed = await fetchText(rssUrl);
|
|
416
|
+
rssFeed = await fetchText(rssUrl, domain);
|
|
248
417
|
}
|
|
249
418
|
if (!rssFeed || rssFeed.status !== 200) {
|
|
250
419
|
for (const path of ["/feed", "/rss.xml", "/feed.xml"]) {
|
|
251
|
-
rssFeed = await fetchText(`${baseUrl}${path}
|
|
420
|
+
rssFeed = await fetchText(`${baseUrl}${path}`, domain);
|
|
252
421
|
if (rssFeed && rssFeed.status === 200 && (rssFeed.text.includes("<rss") || rssFeed.text.includes("<feed") || rssFeed.text.includes("<channel"))) break;
|
|
253
422
|
rssFeed = null;
|
|
254
423
|
}
|
|
255
424
|
}
|
|
256
425
|
}
|
|
257
426
|
if (sitemapXml && sitemapXml.status === 200 && sitemapXml.text.includes("<sitemapindex")) {
|
|
258
|
-
const subUrls = extractAllSubSitemapUrls(sitemapXml.text, 5);
|
|
427
|
+
const subUrls = extractAllSubSitemapUrls(sitemapXml.text, domain, 5);
|
|
259
428
|
if (subUrls.length > 0) {
|
|
260
|
-
const subResults = await Promise.all(subUrls.map((u) => fetchText(u)));
|
|
429
|
+
const subResults = await Promise.all(subUrls.map((u) => fetchText(u, domain)));
|
|
261
430
|
for (const sub of subResults) {
|
|
262
431
|
if (sub && sub.status === 200) {
|
|
263
432
|
sitemapXml.text += "\n" + sub.text;
|
|
@@ -270,7 +439,7 @@ async function prefetchSiteData(domain) {
|
|
|
270
439
|
const sitemapForBlog = sitemapXml.text;
|
|
271
440
|
const blogUrls = extractBlogUrlsFromSitemap(sitemapForBlog, domain, 50);
|
|
272
441
|
if (blogUrls.length > 0) {
|
|
273
|
-
const fetched = await Promise.all(blogUrls.map((url) => fetchText(url)));
|
|
442
|
+
const fetched = await Promise.all(blogUrls.map((url) => fetchText(url, domain)));
|
|
274
443
|
blogSample = fetched.filter(
|
|
275
444
|
(r) => r !== null && r.status === 200 && r.text.length > 500
|
|
276
445
|
);
|
|
@@ -1052,8 +1221,8 @@ function checkDirectAnswerDensity(data) {
|
|
|
1052
1221
|
const paragraphs = html.match(/<p[^>]*>([\s\S]*?)<\/p>/gi) || [];
|
|
1053
1222
|
const snippetZoneParagraphs = paragraphs.filter((p) => {
|
|
1054
1223
|
const text2 = p.replace(/<[^>]*>/g, "").trim();
|
|
1055
|
-
const
|
|
1056
|
-
return
|
|
1224
|
+
const wordCount2 = text2.split(/\s+/).length;
|
|
1225
|
+
return wordCount2 >= 40 && wordCount2 <= 150;
|
|
1057
1226
|
});
|
|
1058
1227
|
if (snippetZoneParagraphs.length >= 3) {
|
|
1059
1228
|
score += 2;
|
|
@@ -1321,13 +1490,15 @@ function extractBlogUrlsFromSitemap(sitemapText, domain, limit = 50) {
|
|
|
1321
1490
|
});
|
|
1322
1491
|
return candidates.slice(0, limit).map((c) => c.url);
|
|
1323
1492
|
}
|
|
1324
|
-
function extractAllSubSitemapUrls(sitemapText,
|
|
1493
|
+
function extractAllSubSitemapUrls(sitemapText, domainOrLimit, maybeLimit = 5) {
|
|
1325
1494
|
if (!sitemapText.includes("<sitemapindex")) return [];
|
|
1495
|
+
const domain = typeof domainOrLimit === "string" ? domainOrLimit : void 0;
|
|
1496
|
+
const limit = typeof domainOrLimit === "number" ? domainOrLimit : maybeLimit;
|
|
1326
1497
|
const sitemapLocs = sitemapText.match(/<sitemap>[\s\S]*?<loc>([^<]+)<\/loc>[\s\S]*?<\/sitemap>/gi) || [];
|
|
1327
1498
|
const urls = sitemapLocs.map((block) => {
|
|
1328
1499
|
const match = block.match(/<loc>([^<]+)<\/loc>/i);
|
|
1329
1500
|
return match ? match[1].trim() : "";
|
|
1330
|
-
}).filter(
|
|
1501
|
+
}).filter((url) => !!url && isSafePublicUrl(url, domain));
|
|
1331
1502
|
const preferred = urls.filter((u) => /post|blog|article|page/i.test(u));
|
|
1332
1503
|
const rest = urls.filter((u) => !preferred.includes(u));
|
|
1333
1504
|
return [...preferred, ...rest].slice(0, limit);
|
|
@@ -2201,6 +2372,123 @@ function checkContentDepth(data, topicCoherenceScore) {
|
|
|
2201
2372
|
}
|
|
2202
2373
|
return { criterion: "content_depth", criterion_label: "Content Depth", score: finalScore, status: finalScore >= 7 ? "pass" : finalScore >= 4 ? "partial" : "fail", findings, fix_priority: finalScore >= 7 ? "P3" : "P1" };
|
|
2203
2374
|
}
|
|
2375
|
+
function scoreSampledPages(data, scorer) {
|
|
2376
|
+
const pages = [];
|
|
2377
|
+
if (data.homepage) {
|
|
2378
|
+
const url = data.homepage.finalUrl || (data.protocol ? `${data.protocol}://${data.domain}/` : `https://${data.domain}/`);
|
|
2379
|
+
pages.push({ url, score: scorer(data.homepage.text, url) });
|
|
2380
|
+
}
|
|
2381
|
+
if (data.blogSample) {
|
|
2382
|
+
for (const page of data.blogSample) {
|
|
2383
|
+
const url = page.finalUrl || (data.protocol ? `${data.protocol}://${data.domain}/` : `https://${data.domain}/`);
|
|
2384
|
+
pages.push({ url, score: scorer(page.text, url) });
|
|
2385
|
+
}
|
|
2386
|
+
}
|
|
2387
|
+
return pages;
|
|
2388
|
+
}
|
|
2389
|
+
function summarizeHelpfulScores(pageScores) {
|
|
2390
|
+
const total = pageScores.length;
|
|
2391
|
+
const average = total > 0 ? Math.round(pageScores.reduce((sum, p) => sum + p.score, 0) / total) : 0;
|
|
2392
|
+
const strong = pageScores.filter((p) => p.score >= 8);
|
|
2393
|
+
const weak = pageScores.filter((p) => p.score <= 4);
|
|
2394
|
+
return { total, average, strong, weak };
|
|
2395
|
+
}
|
|
2396
|
+
function checkHelpfulPurposeAlignment(data) {
|
|
2397
|
+
const findings = [];
|
|
2398
|
+
if (!data.homepage) {
|
|
2399
|
+
findings.push({ severity: "critical", detail: "Could not fetch homepage" });
|
|
2400
|
+
return { criterion: "helpful_purpose_alignment", criterion_label: "Helpful Purpose Alignment", score: 0, status: "not_found", findings, fix_priority: "P1" };
|
|
2401
|
+
}
|
|
2402
|
+
const pageScores = scoreSampledPages(data, scoreHelpfulPurposeAlignment);
|
|
2403
|
+
const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
|
|
2404
|
+
if (average >= 8) {
|
|
2405
|
+
findings.push({ severity: "info", detail: `${strong.length}/${total} pages strongly prioritize visitor task completion over filler` });
|
|
2406
|
+
} else if (average >= 5) {
|
|
2407
|
+
findings.push({ severity: "low", detail: `${strong.length}/${total} pages clearly lead with useful guidance`, fix: "Tighten intros, reduce generic filler, and make pages solve the promised user task faster" });
|
|
2408
|
+
} else {
|
|
2409
|
+
findings.push({ severity: "medium", detail: `Average helpful-purpose score is ${average}/10 across ${total} sampled pages`, fix: "Reduce search-first framing, generic intros, and CTA interruptions before the first useful answer" });
|
|
2410
|
+
}
|
|
2411
|
+
if (weak.length > 0) {
|
|
2412
|
+
findings.push({
|
|
2413
|
+
severity: "low",
|
|
2414
|
+
detail: `${weak.length} page(s) read as weakly task-focused`,
|
|
2415
|
+
fix: "Rewrite weak pages to lead with concrete answers, tradeoffs, and next steps instead of broad introductory filler"
|
|
2416
|
+
});
|
|
2417
|
+
}
|
|
2418
|
+
return { criterion: "helpful_purpose_alignment", criterion_label: "Helpful Purpose Alignment", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P1" };
|
|
2419
|
+
}
|
|
2420
|
+
function checkFirstHandExperienceSignals(data) {
|
|
2421
|
+
const findings = [];
|
|
2422
|
+
if (!data.homepage) {
|
|
2423
|
+
findings.push({ severity: "critical", detail: "Could not fetch homepage" });
|
|
2424
|
+
return { criterion: "first_hand_experience_signals", criterion_label: "First-Hand Experience Signals", score: 0, status: "not_found", findings, fix_priority: "P2" };
|
|
2425
|
+
}
|
|
2426
|
+
const pageScores = scoreSampledPages(data, scoreFirstHandExperienceSignals);
|
|
2427
|
+
const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
|
|
2428
|
+
if (average >= 8) {
|
|
2429
|
+
findings.push({ severity: "info", detail: `${strong.length}/${total} pages show strong signs of direct use, testing, or observation` });
|
|
2430
|
+
} else if (average >= 5) {
|
|
2431
|
+
findings.push({ severity: "low", detail: `Moderate experiential depth across ${total} sampled pages`, fix: "Add real implementation details, limitations, screenshots, or direct observations where relevant" });
|
|
2432
|
+
} else {
|
|
2433
|
+
findings.push({ severity: "medium", detail: "Little first-hand experience is visible in sampled content", fix: "Add evidence of real use, testing, implementation, or lived experience instead of generic summaries" });
|
|
2434
|
+
}
|
|
2435
|
+
if (weak.length > 0) {
|
|
2436
|
+
findings.push({
|
|
2437
|
+
severity: "low",
|
|
2438
|
+
detail: `${weak.length} page(s) appear generic or second-hand`,
|
|
2439
|
+
fix: "Strengthen those pages with case details, lessons learned, or original evidence from practice"
|
|
2440
|
+
});
|
|
2441
|
+
}
|
|
2442
|
+
return { criterion: "first_hand_experience_signals", criterion_label: "First-Hand Experience Signals", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P2" };
|
|
2443
|
+
}
|
|
2444
|
+
function checkCreatorTransparency(data) {
|
|
2445
|
+
const findings = [];
|
|
2446
|
+
if (!data.homepage) {
|
|
2447
|
+
findings.push({ severity: "critical", detail: "Could not fetch homepage" });
|
|
2448
|
+
return { criterion: "creator_transparency", criterion_label: "Creator Transparency", score: 0, status: "not_found", findings, fix_priority: "P2" };
|
|
2449
|
+
}
|
|
2450
|
+
const pageScores = scoreSampledPages(data, scoreCreatorTransparency);
|
|
2451
|
+
const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
|
|
2452
|
+
if (average >= 8) {
|
|
2453
|
+
findings.push({ severity: "info", detail: `${strong.length}/${total} sampled pages provide clear visible creator attribution` });
|
|
2454
|
+
} else if (average >= 5) {
|
|
2455
|
+
findings.push({ severity: "low", detail: "Visible authorship is present on some content but inconsistent", fix: "Add bylines, author links, and reviewer details on article-like pages where readers expect them" });
|
|
2456
|
+
} else {
|
|
2457
|
+
findings.push({ severity: "medium", detail: "Creator visibility is weak on content-like pages", fix: "Show clear bylines, author pages, and reviewer context rather than relying on schema alone" });
|
|
2458
|
+
}
|
|
2459
|
+
if (weak.length > 0) {
|
|
2460
|
+
findings.push({
|
|
2461
|
+
severity: "low",
|
|
2462
|
+
detail: `${weak.length} page(s) look article-like but expose little visible author context`,
|
|
2463
|
+
fix: "Add visible bylines, author bios, or reviewer attribution to those pages"
|
|
2464
|
+
});
|
|
2465
|
+
}
|
|
2466
|
+
return { criterion: "creator_transparency", criterion_label: "Creator Transparency", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P2" };
|
|
2467
|
+
}
|
|
2468
|
+
function checkMethodologyTransparency(data) {
|
|
2469
|
+
const findings = [];
|
|
2470
|
+
if (!data.homepage) {
|
|
2471
|
+
findings.push({ severity: "critical", detail: "Could not fetch homepage" });
|
|
2472
|
+
return { criterion: "methodology_transparency", criterion_label: "Methodology Transparency", score: 0, status: "not_found", findings, fix_priority: "P2" };
|
|
2473
|
+
}
|
|
2474
|
+
const pageScores = scoreSampledPages(data, scoreMethodologyTransparency);
|
|
2475
|
+
const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
|
|
2476
|
+
if (average >= 8) {
|
|
2477
|
+
findings.push({ severity: "info", detail: `${strong.length}/${total} pages clearly explain how content was tested, researched, reviewed, or updated` });
|
|
2478
|
+
} else if (average >= 5) {
|
|
2479
|
+
findings.push({ severity: "low", detail: "Some process transparency exists, but it is inconsistent", fix: 'Add "how we tested", methodology, review process, or update disclosures on pages where users would expect them' });
|
|
2480
|
+
} else {
|
|
2481
|
+
findings.push({ severity: "medium", detail: "Little content-production or review transparency is visible", fix: "Explain how pages were researched, tested, or reviewed instead of presenting unsupported comparisons or conclusions" });
|
|
2482
|
+
}
|
|
2483
|
+
if (weak.length > 0) {
|
|
2484
|
+
findings.push({
|
|
2485
|
+
severity: "low",
|
|
2486
|
+
detail: `${weak.length} page(s) lack visible methodology or review context`,
|
|
2487
|
+
fix: "Add process detail such as sample size, criteria, tools used, review process, or update notes"
|
|
2488
|
+
});
|
|
2489
|
+
}
|
|
2490
|
+
return { criterion: "methodology_transparency", criterion_label: "Methodology Transparency", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P2" };
|
|
2491
|
+
}
|
|
2204
2492
|
function checkCitationReadyWriting(data) {
|
|
2205
2493
|
const findings = [];
|
|
2206
2494
|
if (!data.homepage) {
|
|
@@ -2296,8 +2584,8 @@ function checkAnswerFirstPlacement(data) {
|
|
|
2296
2584
|
const earlyParagraphs = bodyHtml.match(/<p[^>]*>([\s\S]*?)<\/p>/gi)?.slice(0, 5) || [];
|
|
2297
2585
|
for (const p of earlyParagraphs) {
|
|
2298
2586
|
const pText = p.replace(/<[^>]*>/g, "").trim();
|
|
2299
|
-
const
|
|
2300
|
-
if (
|
|
2587
|
+
const wordCount2 = pText.split(/\s+/).length;
|
|
2588
|
+
if (wordCount2 >= 40 && wordCount2 <= 80 && first300Words.includes(pText.slice(0, 50))) {
|
|
2301
2589
|
shortAnswerCount++;
|
|
2302
2590
|
break;
|
|
2303
2591
|
}
|
|
@@ -2839,20 +3127,29 @@ function auditSiteFromData(data) {
|
|
|
2839
3127
|
checkVisibleDateSignal(data),
|
|
2840
3128
|
topicCoherence,
|
|
2841
3129
|
checkContentDepth(data, topicCoherence.score),
|
|
2842
|
-
//
|
|
3130
|
+
// Helpful-content criteria (#29-#32)
|
|
3131
|
+
checkHelpfulPurposeAlignment(data),
|
|
3132
|
+
checkFirstHandExperienceSignals(data),
|
|
3133
|
+
checkCreatorTransparency(data),
|
|
3134
|
+
checkMethodologyTransparency(data),
|
|
3135
|
+
// V2 criteria (#33-#38)
|
|
2843
3136
|
checkCitationReadyWriting(data),
|
|
2844
3137
|
checkAnswerFirstPlacement(data),
|
|
2845
3138
|
checkEvidencePackaging(data),
|
|
2846
3139
|
checkEntityDisambiguation(data),
|
|
2847
3140
|
checkExtractionFriction(data),
|
|
2848
3141
|
checkImageContextAI(data),
|
|
2849
|
-
// V3 criteria (#
|
|
3142
|
+
// V3 criteria (#39-#40)
|
|
2850
3143
|
checkDuplicateContent(data),
|
|
2851
3144
|
checkCrossPageDuplication(data)
|
|
2852
3145
|
];
|
|
2853
3146
|
}
|
|
2854
3147
|
async function auditSite(targetUrl) {
|
|
2855
|
-
const
|
|
3148
|
+
const normalizedTarget = targetUrl.startsWith("http") ? targetUrl : `https://${targetUrl}`;
|
|
3149
|
+
if (!await isSafeFetchTarget(normalizedTarget)) {
|
|
3150
|
+
throw new Error(`Refusing to audit private or local address: ${targetUrl}`);
|
|
3151
|
+
}
|
|
3152
|
+
const url = new URL(normalizedTarget);
|
|
2856
3153
|
const domain = url.hostname.replace(/^www\./, "");
|
|
2857
3154
|
const data = await prefetchSiteData(domain);
|
|
2858
3155
|
return auditSiteFromData(data);
|
|
@@ -2878,6 +3175,10 @@ var WEIGHTS = {
|
|
|
2878
3175
|
// Relevance to actual AI queries
|
|
2879
3176
|
faq_section: 0.03,
|
|
2880
3177
|
// Structured Q&A pairs
|
|
3178
|
+
helpful_purpose_alignment: 0.03,
|
|
3179
|
+
// Visitor-helpful vs search-first framing
|
|
3180
|
+
first_hand_experience_signals: 0.03,
|
|
3181
|
+
// Evidence of real use or observation
|
|
2881
3182
|
// ─── Content Organization (~30%) ──────────────────────────────────────────
|
|
2882
3183
|
// HOW easily AI engines can extract and trust your content.
|
|
2883
3184
|
entity_consistency: 0.05,
|
|
@@ -2892,9 +3193,13 @@ var WEIGHTS = {
|
|
|
2892
3193
|
// Expert attribution
|
|
2893
3194
|
table_list_extractability: 0.03,
|
|
2894
3195
|
// Extractable structured data
|
|
2895
|
-
|
|
3196
|
+
creator_transparency: 0.02,
|
|
3197
|
+
// Visible author/reviewer clarity
|
|
3198
|
+
methodology_transparency: 0.02,
|
|
3199
|
+
// Process disclosure
|
|
3200
|
+
definition_patterns: 0.015,
|
|
2896
3201
|
// Clear definitions
|
|
2897
|
-
visible_date_signal: 0.
|
|
3202
|
+
visible_date_signal: 0.015,
|
|
2898
3203
|
// Publication date trust
|
|
2899
3204
|
semantic_html: 0.02,
|
|
2900
3205
|
// Clean semantic structure
|
|
@@ -2903,15 +3208,15 @@ var WEIGHTS = {
|
|
|
2903
3208
|
// ─── Technical Plumbing (~15%) ────────────────────────────────────────────
|
|
2904
3209
|
// WHETHER AI crawlers can find you. Table stakes with diminishing returns.
|
|
2905
3210
|
content_cannibalization: 0.02,
|
|
2906
|
-
llms_txt: 0.
|
|
2907
|
-
robots_txt: 0.
|
|
3211
|
+
llms_txt: 0.01,
|
|
3212
|
+
robots_txt: 0.01,
|
|
2908
3213
|
content_velocity: 0.02,
|
|
2909
|
-
content_licensing: 0.
|
|
3214
|
+
content_licensing: 0.01,
|
|
2910
3215
|
sitemap_completeness: 0.01,
|
|
2911
|
-
canonical_url:
|
|
2912
|
-
rss_feed:
|
|
2913
|
-
schema_coverage:
|
|
2914
|
-
speakable_schema:
|
|
3216
|
+
canonical_url: 5e-3,
|
|
3217
|
+
rss_feed: 5e-3,
|
|
3218
|
+
schema_coverage: 5e-3,
|
|
3219
|
+
speakable_schema: 5e-3,
|
|
2915
3220
|
// ─── V2 Criteria (~15%) ───────────────────────────────────────────────────
|
|
2916
3221
|
// Citation quality, evidence packaging, and extraction friction.
|
|
2917
3222
|
citation_ready_writing: 0.04,
|
|
@@ -2924,7 +3229,7 @@ var WEIGHTS = {
|
|
|
2924
3229
|
// Clear entity boundaries
|
|
2925
3230
|
extraction_friction: 0.02,
|
|
2926
3231
|
// Sentence length, voice, jargon
|
|
2927
|
-
image_context_ai:
|
|
3232
|
+
image_context_ai: 5e-3,
|
|
2928
3233
|
// Figure/figcaption, alt text quality
|
|
2929
3234
|
// ─── V3 Criteria ────────────────────────────────────────────────────────
|
|
2930
3235
|
duplicate_content: 0.05,
|
|
@@ -2944,8 +3249,8 @@ function calculateOverallScore(criteria) {
|
|
|
2944
3249
|
let score = Math.round(weightedSum / totalWeight);
|
|
2945
3250
|
const coherence = criteria.find((c) => c.criterion === "topic_coherence");
|
|
2946
3251
|
if (coherence && coherence.score < 6) {
|
|
2947
|
-
const
|
|
2948
|
-
score = Math.min(score,
|
|
3252
|
+
const cap3 = 35 + coherence.score * 5;
|
|
3253
|
+
score = Math.min(score, cap3);
|
|
2949
3254
|
}
|
|
2950
3255
|
return score;
|
|
2951
3256
|
}
|
|
@@ -2960,6 +3265,8 @@ var PILLARS = {
|
|
|
2960
3265
|
"citation_ready_writing",
|
|
2961
3266
|
"answer_first_placement",
|
|
2962
3267
|
"evidence_packaging",
|
|
3268
|
+
"helpful_purpose_alignment",
|
|
3269
|
+
"first_hand_experience_signals",
|
|
2963
3270
|
"duplicate_content",
|
|
2964
3271
|
"cross_page_duplication"
|
|
2965
3272
|
],
|
|
@@ -2977,7 +3284,9 @@ var PILLARS = {
|
|
|
2977
3284
|
"internal_linking",
|
|
2978
3285
|
"content_freshness",
|
|
2979
3286
|
"author_schema_depth",
|
|
2980
|
-
"schema_markup"
|
|
3287
|
+
"schema_markup",
|
|
3288
|
+
"creator_transparency",
|
|
3289
|
+
"methodology_transparency"
|
|
2981
3290
|
],
|
|
2982
3291
|
"Technical Foundation": [
|
|
2983
3292
|
"semantic_html",
|
|
@@ -3007,6 +3316,8 @@ var CLIENT_NAMES = {
|
|
|
3007
3316
|
citation_ready_writing: "Citation-Ready Writing",
|
|
3008
3317
|
answer_first_placement: "Answer-First Placement",
|
|
3009
3318
|
evidence_packaging: "Evidence Packaging",
|
|
3319
|
+
helpful_purpose_alignment: "Helpful Purpose Alignment",
|
|
3320
|
+
first_hand_experience_signals: "First-Hand Experience Signals",
|
|
3010
3321
|
direct_answer_density: "Direct Answer Density",
|
|
3011
3322
|
qa_content_format: "Q&A Content Format",
|
|
3012
3323
|
query_answer_alignment: "Query-Answer Alignment",
|
|
@@ -3019,6 +3330,8 @@ var CLIENT_NAMES = {
|
|
|
3019
3330
|
content_freshness: "Content Freshness",
|
|
3020
3331
|
author_schema_depth: "Author & Expert Schema",
|
|
3021
3332
|
schema_markup: "Schema Markup",
|
|
3333
|
+
creator_transparency: "Creator Transparency",
|
|
3334
|
+
methodology_transparency: "Methodology Transparency",
|
|
3022
3335
|
semantic_html: "Semantic HTML",
|
|
3023
3336
|
clean_html: "Clean HTML",
|
|
3024
3337
|
visible_date_signal: "Visible Date Signal",
|
|
@@ -3045,6 +3358,8 @@ var PILLAR_WEIGHTS = {
|
|
|
3045
3358
|
citation_ready_writing: 0.04,
|
|
3046
3359
|
answer_first_placement: 0.03,
|
|
3047
3360
|
evidence_packaging: 0.03,
|
|
3361
|
+
helpful_purpose_alignment: 0.03,
|
|
3362
|
+
first_hand_experience_signals: 0.03,
|
|
3048
3363
|
duplicate_content: 0.05,
|
|
3049
3364
|
cross_page_duplication: 0.03,
|
|
3050
3365
|
direct_answer_density: 0.05,
|
|
@@ -3052,28 +3367,30 @@ var PILLAR_WEIGHTS = {
|
|
|
3052
3367
|
query_answer_alignment: 0.04,
|
|
3053
3368
|
faq_section: 0.03,
|
|
3054
3369
|
table_list_extractability: 0.03,
|
|
3055
|
-
definition_patterns: 0.
|
|
3370
|
+
definition_patterns: 0.015,
|
|
3056
3371
|
entity_disambiguation: 0.02,
|
|
3057
3372
|
entity_consistency: 0.05,
|
|
3058
3373
|
internal_linking: 0.04,
|
|
3059
3374
|
content_freshness: 0.04,
|
|
3060
3375
|
author_schema_depth: 0.03,
|
|
3061
3376
|
schema_markup: 0.03,
|
|
3377
|
+
creator_transparency: 0.02,
|
|
3378
|
+
methodology_transparency: 0.02,
|
|
3062
3379
|
semantic_html: 0.02,
|
|
3063
3380
|
clean_html: 0.02,
|
|
3064
|
-
visible_date_signal: 0.
|
|
3381
|
+
visible_date_signal: 0.015,
|
|
3065
3382
|
extraction_friction: 0.02,
|
|
3066
|
-
image_context_ai:
|
|
3067
|
-
schema_coverage:
|
|
3068
|
-
speakable_schema:
|
|
3383
|
+
image_context_ai: 5e-3,
|
|
3384
|
+
schema_coverage: 5e-3,
|
|
3385
|
+
speakable_schema: 5e-3,
|
|
3069
3386
|
content_cannibalization: 0.02,
|
|
3070
|
-
llms_txt: 0.
|
|
3071
|
-
robots_txt: 0.
|
|
3387
|
+
llms_txt: 0.01,
|
|
3388
|
+
robots_txt: 0.01,
|
|
3072
3389
|
content_velocity: 0.02,
|
|
3073
|
-
content_licensing: 0.
|
|
3074
|
-
canonical_url:
|
|
3390
|
+
content_licensing: 0.01,
|
|
3391
|
+
canonical_url: 5e-3,
|
|
3075
3392
|
sitemap_completeness: 0.01,
|
|
3076
|
-
rss_feed:
|
|
3393
|
+
rss_feed: 5e-3
|
|
3077
3394
|
};
|
|
3078
3395
|
var CRITERION_EFFORT = {
|
|
3079
3396
|
topic_coherence: "High",
|
|
@@ -3083,6 +3400,8 @@ var CRITERION_EFFORT = {
|
|
|
3083
3400
|
citation_ready_writing: "Medium",
|
|
3084
3401
|
answer_first_placement: "Medium",
|
|
3085
3402
|
evidence_packaging: "Medium",
|
|
3403
|
+
helpful_purpose_alignment: "Medium",
|
|
3404
|
+
first_hand_experience_signals: "Medium",
|
|
3086
3405
|
duplicate_content: "Medium",
|
|
3087
3406
|
cross_page_duplication: "Medium",
|
|
3088
3407
|
direct_answer_density: "Medium",
|
|
@@ -3097,6 +3416,8 @@ var CRITERION_EFFORT = {
|
|
|
3097
3416
|
content_freshness: "Low",
|
|
3098
3417
|
author_schema_depth: "Low",
|
|
3099
3418
|
schema_markup: "Medium",
|
|
3419
|
+
creator_transparency: "Low",
|
|
3420
|
+
methodology_transparency: "Low",
|
|
3100
3421
|
semantic_html: "Low",
|
|
3101
3422
|
clean_html: "Medium",
|
|
3102
3423
|
visible_date_signal: "Low",
|
|
@@ -3121,6 +3442,8 @@ var FIX_DESCRIPTIONS = {
|
|
|
3121
3442
|
citation_ready_writing: "Write self-contained definition sentences and one-claim statements AI can quote directly.",
|
|
3122
3443
|
answer_first_placement: "Place a 40-80 word answer block in the first 300 words of each page.",
|
|
3123
3444
|
evidence_packaging: "Add inline citations, attribution phrases, and a sources section to key pages.",
|
|
3445
|
+
helpful_purpose_alignment: "Lead with useful, task-solving guidance instead of search-first filler.",
|
|
3446
|
+
first_hand_experience_signals: "Add concrete signs of first-hand use, testing, implementation, or observation.",
|
|
3124
3447
|
direct_answer_density: "Write concise 2-3 sentence answer paragraphs after each question heading.",
|
|
3125
3448
|
qa_content_format: "Add question-based H2/H3 headings matching common AI queries.",
|
|
3126
3449
|
query_answer_alignment: "Ensure every question heading is followed by a direct answer paragraph.",
|
|
@@ -3133,6 +3456,8 @@ var FIX_DESCRIPTIONS = {
|
|
|
3133
3456
|
content_freshness: "Add dateModified schema and visible last-updated dates.",
|
|
3134
3457
|
author_schema_depth: "Add Person schema for authors with credentials and sameAs links.",
|
|
3135
3458
|
schema_markup: "Implement JSON-LD structured data on key pages.",
|
|
3459
|
+
creator_transparency: "Show clear visible bylines, author pages, and reviewer details where readers expect them.",
|
|
3460
|
+
methodology_transparency: "Explain how content was tested, researched, reviewed, or updated.",
|
|
3136
3461
|
semantic_html: "Use semantic HTML5 elements (main, article, nav, header, footer).",
|
|
3137
3462
|
clean_html: "Fix HTML structure, add meta tags, and ensure HTTPS.",
|
|
3138
3463
|
visible_date_signal: "Display dates using <time> elements and add datePublished to JSON-LD.",
|
|
@@ -3232,6 +3557,10 @@ var CRITERION_LABELS = {
|
|
|
3232
3557
|
"Visible Date Signal": "Visible Date Signal",
|
|
3233
3558
|
"Topic Coherence": "Topic Coherence",
|
|
3234
3559
|
"Content Depth": "Content Depth",
|
|
3560
|
+
"Helpful Purpose Alignment": "Helpful Purpose Alignment",
|
|
3561
|
+
"First-Hand Experience Signals": "First-Hand Experience Signals",
|
|
3562
|
+
"Creator Transparency": "Creator Transparency",
|
|
3563
|
+
"Methodology Transparency": "Methodology Transparency",
|
|
3235
3564
|
"Citation-Ready Writing Quality": "Citation-Ready Writing Quality",
|
|
3236
3565
|
"Answer-First Placement": "Answer-First Placement",
|
|
3237
3566
|
"Evidence Packaging": "Evidence Packaging",
|
|
@@ -3337,6 +3666,8 @@ var CRITERION_WEIGHTS = {
|
|
|
3337
3666
|
qa_content_format: 0.04,
|
|
3338
3667
|
query_answer_alignment: 0.04,
|
|
3339
3668
|
faq_section: 0.03,
|
|
3669
|
+
helpful_purpose_alignment: 0.03,
|
|
3670
|
+
first_hand_experience_signals: 0.03,
|
|
3340
3671
|
// Content Organization (~30%)
|
|
3341
3672
|
entity_consistency: 0.05,
|
|
3342
3673
|
internal_linking: 0.04,
|
|
@@ -3344,28 +3675,30 @@ var CRITERION_WEIGHTS = {
|
|
|
3344
3675
|
schema_markup: 0.03,
|
|
3345
3676
|
author_schema_depth: 0.03,
|
|
3346
3677
|
table_list_extractability: 0.03,
|
|
3347
|
-
|
|
3348
|
-
|
|
3678
|
+
creator_transparency: 0.02,
|
|
3679
|
+
methodology_transparency: 0.02,
|
|
3680
|
+
definition_patterns: 0.015,
|
|
3681
|
+
visible_date_signal: 0.015,
|
|
3349
3682
|
semantic_html: 0.02,
|
|
3350
3683
|
clean_html: 0.02,
|
|
3351
3684
|
// Technical Plumbing (~15%)
|
|
3352
3685
|
content_cannibalization: 0.02,
|
|
3353
|
-
llms_txt: 0.
|
|
3354
|
-
robots_txt: 0.
|
|
3686
|
+
llms_txt: 0.01,
|
|
3687
|
+
robots_txt: 0.01,
|
|
3355
3688
|
content_velocity: 0.02,
|
|
3356
|
-
content_licensing: 0.
|
|
3689
|
+
content_licensing: 0.01,
|
|
3357
3690
|
sitemap_completeness: 0.01,
|
|
3358
|
-
canonical_url:
|
|
3359
|
-
rss_feed:
|
|
3360
|
-
schema_coverage:
|
|
3361
|
-
speakable_schema:
|
|
3691
|
+
canonical_url: 5e-3,
|
|
3692
|
+
rss_feed: 5e-3,
|
|
3693
|
+
schema_coverage: 5e-3,
|
|
3694
|
+
speakable_schema: 5e-3,
|
|
3362
3695
|
// V2 Criteria (~15%)
|
|
3363
3696
|
citation_ready_writing: 0.04,
|
|
3364
3697
|
answer_first_placement: 0.03,
|
|
3365
3698
|
evidence_packaging: 0.03,
|
|
3366
3699
|
entity_disambiguation: 0.02,
|
|
3367
3700
|
extraction_friction: 0.02,
|
|
3368
|
-
image_context_ai:
|
|
3701
|
+
image_context_ai: 5e-3,
|
|
3369
3702
|
// V3 Criteria
|
|
3370
3703
|
duplicate_content: 0.05,
|
|
3371
3704
|
cross_page_duplication: 0.03
|
|
@@ -3406,6 +3739,16 @@ var OPPORTUNITY_TEMPLATES = {
|
|
|
3406
3739
|
effort: "Medium",
|
|
3407
3740
|
description: "Create a dedicated FAQ page with FAQPage schema markup. Cover common questions about your products, services, and industry to become a direct answer source for AI engines."
|
|
3408
3741
|
},
|
|
3742
|
+
helpful_purpose_alignment: {
|
|
3743
|
+
name: "Improve Helpful Purpose Alignment",
|
|
3744
|
+
effort: "Medium",
|
|
3745
|
+
description: "Rewrite pages to solve the visitor task quickly and concretely. Reduce generic intros, search-first filler, and CTA interruptions before the first useful answer."
|
|
3746
|
+
},
|
|
3747
|
+
first_hand_experience_signals: {
|
|
3748
|
+
name: "Add First-Hand Experience Signals",
|
|
3749
|
+
effort: "Medium",
|
|
3750
|
+
description: "Show direct use, testing, implementation, or lived experience with concrete observations, examples, screenshots, and lessons learned."
|
|
3751
|
+
},
|
|
3409
3752
|
original_data: {
|
|
3410
3753
|
name: "Add Original Data & Case Studies",
|
|
3411
3754
|
effort: "High",
|
|
@@ -3461,6 +3804,16 @@ var OPPORTUNITY_TEMPLATES = {
|
|
|
3461
3804
|
effort: "Low",
|
|
3462
3805
|
description: "Add Person schema for content authors with credentials, expertise, and sameAs links. Expert attribution strengthens E-E-A-T signals that AI engines use to evaluate source credibility."
|
|
3463
3806
|
},
|
|
3807
|
+
creator_transparency: {
|
|
3808
|
+
name: "Improve Creator Transparency",
|
|
3809
|
+
effort: "Low",
|
|
3810
|
+
description: "Add visible bylines, author pages, and reviewer/editor details so readers can clearly tell who created the content and why they are credible."
|
|
3811
|
+
},
|
|
3812
|
+
methodology_transparency: {
|
|
3813
|
+
name: "Add Methodology Transparency",
|
|
3814
|
+
effort: "Low",
|
|
3815
|
+
description: "Explain how pages were tested, researched, reviewed, or updated. Add methodology, criteria, sample-size, or review-process details where users expect them."
|
|
3816
|
+
},
|
|
3464
3817
|
fact_density: {
|
|
3465
3818
|
name: "Increase Fact & Data Density",
|
|
3466
3819
|
effort: "Medium",
|
|
@@ -3720,20 +4073,12 @@ function formatList(items) {
|
|
|
3720
4073
|
}
|
|
3721
4074
|
|
|
3722
4075
|
// src/multi-page-fetcher.ts
|
|
3723
|
-
async function fetchPage(url, timeoutMs = 1e4) {
|
|
3724
|
-
|
|
3725
|
-
|
|
3726
|
-
|
|
3727
|
-
|
|
3728
|
-
|
|
3729
|
-
});
|
|
3730
|
-
if (res.status !== 200) return null;
|
|
3731
|
-
const text = await res.text();
|
|
3732
|
-
if (text.length < 200) return null;
|
|
3733
|
-
return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
|
|
3734
|
-
} catch {
|
|
3735
|
-
return null;
|
|
3736
|
-
}
|
|
4076
|
+
async function fetchPage(url, domain, timeoutMs = 1e4) {
|
|
4077
|
+
const res = await safeFetch(url, { timeoutMs, expectedDomain: domain });
|
|
4078
|
+
if (!res || res.status !== 200) return null;
|
|
4079
|
+
const text = await res.text();
|
|
4080
|
+
if (text.length < 200) return null;
|
|
4081
|
+
return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
|
|
3737
4082
|
}
|
|
3738
4083
|
var PAGE_VARIANTS = {
|
|
3739
4084
|
about: ["/about", "/about-us", "/company", "/who-we-are"],
|
|
@@ -3889,7 +4234,7 @@ async function fetchMultiPageData(siteData, options) {
|
|
|
3889
4234
|
}
|
|
3890
4235
|
const entries = Array.from(urlsToFetch.entries());
|
|
3891
4236
|
if (entries.length === 0) return 0;
|
|
3892
|
-
const results = await Promise.all(entries.map(([url]) => fetchPage(url, timeoutMs)));
|
|
4237
|
+
const results = await Promise.all(entries.map(([url]) => fetchPage(url, siteData.domain, timeoutMs)));
|
|
3893
4238
|
if (!siteData.blogSample) siteData.blogSample = [];
|
|
3894
4239
|
let added = 0;
|
|
3895
4240
|
for (let i = 0; i < results.length; i++) {
|
|
@@ -3916,19 +4261,23 @@ var PAGE_CRITERIA = {
|
|
|
3916
4261
|
content_freshness: { weight: 0.04, label: "Content Freshness Signals" },
|
|
3917
4262
|
schema_markup: { weight: 0.03, label: "Schema.org Structured Data" },
|
|
3918
4263
|
table_list_extractability: { weight: 0.03, label: "Table & List Extractability" },
|
|
3919
|
-
definition_patterns: { weight: 0.
|
|
3920
|
-
visible_date_signal: { weight: 0.
|
|
4264
|
+
definition_patterns: { weight: 0.015, label: "Definition Patterns" },
|
|
4265
|
+
visible_date_signal: { weight: 0.015, label: "Visible Date Signal" },
|
|
3921
4266
|
semantic_html: { weight: 0.02, label: "Semantic HTML5 & Accessibility" },
|
|
3922
4267
|
clean_html: { weight: 0.02, label: "Clean, Crawlable HTML" },
|
|
3923
4268
|
// Technical Plumbing
|
|
3924
|
-
canonical_url: { weight:
|
|
4269
|
+
canonical_url: { weight: 5e-3, label: "Canonical URL Strategy" },
|
|
3925
4270
|
// V2 Criteria
|
|
3926
4271
|
citation_ready_writing: { weight: 0.04, label: "Citation-Ready Writing Quality" },
|
|
3927
4272
|
answer_first_placement: { weight: 0.03, label: "Answer-First Placement" },
|
|
3928
4273
|
evidence_packaging: { weight: 0.03, label: "Evidence Packaging" },
|
|
4274
|
+
helpful_purpose_alignment: { weight: 0.03, label: "Helpful Purpose Alignment" },
|
|
4275
|
+
first_hand_experience_signals: { weight: 0.03, label: "First-Hand Experience Signals" },
|
|
3929
4276
|
entity_disambiguation: { weight: 0.02, label: "Entity Disambiguation" },
|
|
3930
4277
|
extraction_friction: { weight: 0.02, label: "Extraction Friction Score" },
|
|
3931
|
-
|
|
4278
|
+
creator_transparency: { weight: 0.02, label: "Creator Transparency" },
|
|
4279
|
+
methodology_transparency: { weight: 0.02, label: "Methodology Transparency" },
|
|
4280
|
+
image_context_ai: { weight: 5e-3, label: "Image Context for AI" },
|
|
3932
4281
|
duplicate_content: { weight: 0.05, label: "Duplicate Content Blocks" }
|
|
3933
4282
|
};
|
|
3934
4283
|
function extractJsonLdBlocks(html) {
|
|
@@ -3951,7 +4300,7 @@ function extractTypesFromJsonLd(blocks) {
|
|
|
3951
4300
|
}
|
|
3952
4301
|
return types;
|
|
3953
4302
|
}
|
|
3954
|
-
function
|
|
4303
|
+
function getTextContent2(html) {
|
|
3955
4304
|
return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
|
|
3956
4305
|
}
|
|
3957
4306
|
function extractQuestionHeadings2(html) {
|
|
@@ -3979,7 +4328,7 @@ function countAnsweredQuestions(html) {
|
|
|
3979
4328
|
}
|
|
3980
4329
|
return { total: questions.length, answered };
|
|
3981
4330
|
}
|
|
3982
|
-
function
|
|
4331
|
+
function cap2(value, max) {
|
|
3983
4332
|
return Math.min(value, max);
|
|
3984
4333
|
}
|
|
3985
4334
|
function scoreSchemaMarkup(html) {
|
|
@@ -4005,10 +4354,10 @@ function scoreSchemaMarkup(html) {
|
|
|
4005
4354
|
for (const t of types) {
|
|
4006
4355
|
if (knownTypes.includes(t)) knownCount++;
|
|
4007
4356
|
}
|
|
4008
|
-
score +=
|
|
4357
|
+
score += cap2(knownCount * 2, 4);
|
|
4009
4358
|
if (types.has("Organization") || types.has("LocalBusiness")) score += 2;
|
|
4010
4359
|
if (types.has("FAQPage")) score += 1;
|
|
4011
|
-
return
|
|
4360
|
+
return cap2(score, 10);
|
|
4012
4361
|
}
|
|
4013
4362
|
function scoreQAFormat(html) {
|
|
4014
4363
|
const questions = extractQuestionHeadings2(html);
|
|
@@ -4020,7 +4369,7 @@ function scoreQAFormat(html) {
|
|
|
4020
4369
|
if (answered >= 1) score += 3;
|
|
4021
4370
|
const h1Matches = html.match(/<h1[\s>]/gi) || [];
|
|
4022
4371
|
if (h1Matches.length === 1) score += 2;
|
|
4023
|
-
return
|
|
4372
|
+
return cap2(score, 10);
|
|
4024
4373
|
}
|
|
4025
4374
|
function scoreCleanHtml(html) {
|
|
4026
4375
|
let score = 0;
|
|
@@ -4029,15 +4378,15 @@ function scoreCleanHtml(html) {
|
|
|
4029
4378
|
for (const tag of semantics) {
|
|
4030
4379
|
if (html.toLowerCase().includes(tag)) semCount++;
|
|
4031
4380
|
}
|
|
4032
|
-
score +=
|
|
4381
|
+
score += cap2(semCount, 3);
|
|
4033
4382
|
const h1Matches = html.match(/<h1[\s>]/gi) || [];
|
|
4034
4383
|
if (h1Matches.length === 1) score += 2;
|
|
4035
|
-
const text =
|
|
4384
|
+
const text = getTextContent2(html);
|
|
4036
4385
|
if (text.length > 500) score += 3;
|
|
4037
4386
|
const hasTitle = /<title[^>]*>[^<]+<\/title>/i.test(html);
|
|
4038
4387
|
const hasDesc = /<meta\s[^>]*name=["']description["'][^>]*content=["'][^"']+["']/i.test(html) || /<meta\s[^>]*content=["'][^"']+["'][^>]*name=["']description["']/i.test(html);
|
|
4039
4388
|
if (hasTitle && hasDesc) score += 2;
|
|
4040
|
-
return
|
|
4389
|
+
return cap2(score, 10);
|
|
4041
4390
|
}
|
|
4042
4391
|
function scoreFaqSection(html) {
|
|
4043
4392
|
let score = 0;
|
|
@@ -4049,11 +4398,11 @@ function scoreFaqSection(html) {
|
|
|
4049
4398
|
const questions = extractQuestionHeadings2(html);
|
|
4050
4399
|
if (questions.length >= 10) score += 1;
|
|
4051
4400
|
if (/<details[\s>]/i.test(html) || /accordion|collapsible|toggle/i.test(lowerHtml)) score += 1;
|
|
4052
|
-
return
|
|
4401
|
+
return cap2(score, 10);
|
|
4053
4402
|
}
|
|
4054
4403
|
function scoreOriginalData(html) {
|
|
4055
4404
|
let score = 0;
|
|
4056
|
-
const text =
|
|
4405
|
+
const text = getTextContent2(html);
|
|
4057
4406
|
if (/\b(our (study|analysis|research|survey|data|findings))\b/i.test(text)) {
|
|
4058
4407
|
score += 3;
|
|
4059
4408
|
} else if (/\d+(\.\d+)?%|\$[\d,.]+|\b\d{1,3}(,\d{3})+\b/.test(text)) {
|
|
@@ -4070,7 +4419,7 @@ function scoreOriginalData(html) {
|
|
|
4070
4419
|
if (/href=["'][^"']*\/blog\b/i.test(html)) {
|
|
4071
4420
|
score += 2;
|
|
4072
4421
|
}
|
|
4073
|
-
return
|
|
4422
|
+
return cap2(score, 10);
|
|
4074
4423
|
}
|
|
4075
4424
|
function scoreQueryAnswerAlignment(html) {
|
|
4076
4425
|
const { total, answered } = countAnsweredQuestions(html);
|
|
@@ -4093,7 +4442,7 @@ function scoreContentFreshness(html) {
|
|
|
4093
4442
|
const currentYear = (/* @__PURE__ */ new Date()).getFullYear();
|
|
4094
4443
|
const yearPattern = new RegExp(`\\b(${currentYear}|${currentYear - 1})\\b`);
|
|
4095
4444
|
if (yearPattern.test(html)) score += 2;
|
|
4096
|
-
return
|
|
4445
|
+
return cap2(score, 10);
|
|
4097
4446
|
}
|
|
4098
4447
|
function scoreTableListExtractability(html) {
|
|
4099
4448
|
let score = 0;
|
|
@@ -4106,7 +4455,7 @@ function scoreTableListExtractability(html) {
|
|
|
4106
4455
|
const listItems = html.match(/<li[\s>]/gi) || [];
|
|
4107
4456
|
if (listItems.length >= 10) score += 1;
|
|
4108
4457
|
if (/<dl[\s>]/i.test(html)) score += 1;
|
|
4109
|
-
return
|
|
4458
|
+
return cap2(score, 10);
|
|
4110
4459
|
}
|
|
4111
4460
|
function scoreDirectAnswerDensity(html) {
|
|
4112
4461
|
let score = 0;
|
|
@@ -4122,9 +4471,9 @@ function scoreDirectAnswerDensity(html) {
|
|
|
4122
4471
|
}
|
|
4123
4472
|
if (snippetCount >= 3) score += 2;
|
|
4124
4473
|
else if (snippetCount >= 1) score += 1;
|
|
4125
|
-
const directOpeners =
|
|
4474
|
+
const directOpeners = getTextContent2(html).match(/\b(yes|no|in short|the answer is|simply put|in summary)\b/gi) || [];
|
|
4126
4475
|
if (directOpeners.length >= 2) score += 2;
|
|
4127
|
-
return
|
|
4476
|
+
return cap2(score, 10);
|
|
4128
4477
|
}
|
|
4129
4478
|
function scoreSemanticHtml(html) {
|
|
4130
4479
|
let score = 0;
|
|
@@ -4134,7 +4483,7 @@ function scoreSemanticHtml(html) {
|
|
|
4134
4483
|
for (const el of elements) {
|
|
4135
4484
|
if (lowerHtml.includes(el)) count++;
|
|
4136
4485
|
}
|
|
4137
|
-
score +=
|
|
4486
|
+
score += cap2(Math.floor(count * 0.7), 4);
|
|
4138
4487
|
const imgTags = html.match(/<img\s[^>]*>/gi) || [];
|
|
4139
4488
|
if (imgTags.length > 0) {
|
|
4140
4489
|
let withAlt = 0;
|
|
@@ -4145,11 +4494,11 @@ function scoreSemanticHtml(html) {
|
|
|
4145
4494
|
}
|
|
4146
4495
|
if (/<html[^>]*\slang=["'][^"']+["']/i.test(html)) score += 2;
|
|
4147
4496
|
if (/\baria-/i.test(html)) score += 2;
|
|
4148
|
-
return
|
|
4497
|
+
return cap2(score, 10);
|
|
4149
4498
|
}
|
|
4150
4499
|
function scoreFactDensity(html) {
|
|
4151
4500
|
let score = 0;
|
|
4152
|
-
const text =
|
|
4501
|
+
const text = getTextContent2(html);
|
|
4153
4502
|
const numericPatterns = text.match(/\d+(\.\d+)?%|\$[\d,.]+|\b\d{1,3}(,\d{3})+\b|\b\d+\s*(million|billion|thousand|users|customers|employees)\b/gi) || [];
|
|
4154
4503
|
if (numericPatterns.length >= 6) score += 5;
|
|
4155
4504
|
else if (numericPatterns.length >= 3) score += 3;
|
|
@@ -4162,11 +4511,11 @@ function scoreFactDensity(html) {
|
|
|
4162
4511
|
if (/\baccording to\b|\bsource:\s|\bcited\b|\breported by\b/i.test(text)) score += 2;
|
|
4163
4512
|
const units = text.match(/\b\d+\s*(kg|lb|miles|km|hours|minutes|days|months|years|GB|MB|TB)\b/gi) || [];
|
|
4164
4513
|
if (units.length >= 2) score += 1;
|
|
4165
|
-
return
|
|
4514
|
+
return cap2(score, 10);
|
|
4166
4515
|
}
|
|
4167
4516
|
function scoreDefinitionPatterns(html) {
|
|
4168
4517
|
let score = 0;
|
|
4169
|
-
const text =
|
|
4518
|
+
const text = getTextContent2(html);
|
|
4170
4519
|
const defPatterns = text.match(/\b(is a|is an|refers to|defined as|means that|also known as|abbreviated as)\b/gi) || [];
|
|
4171
4520
|
if (defPatterns.length >= 3) score += 5;
|
|
4172
4521
|
else if (defPatterns.length >= 1) score += 3;
|
|
@@ -4174,7 +4523,7 @@ function scoreDefinitionPatterns(html) {
|
|
|
4174
4523
|
if (/\b(is a|is an|refers to|defined as)\b/i.test(early)) score += 2;
|
|
4175
4524
|
if (/<dfn[\s>]/i.test(html) || /<abbr[\s>]/i.test(html)) score += 1;
|
|
4176
4525
|
if (/<dl[\s>]/i.test(html) || /glossary/i.test(html)) score += 2;
|
|
4177
|
-
return
|
|
4526
|
+
return cap2(score, 10);
|
|
4178
4527
|
}
|
|
4179
4528
|
function scoreCanonicalUrl(html, url) {
|
|
4180
4529
|
let score = 0;
|
|
@@ -4195,7 +4544,7 @@ function scoreCanonicalUrl(html, url) {
|
|
|
4195
4544
|
if (canonicalHref.startsWith("https://")) score += 2;
|
|
4196
4545
|
const allCanonicals = html.match(/<link[^>]*rel=["']canonical["'][^>]*>/gi) || [];
|
|
4197
4546
|
if (allCanonicals.length === 1) score += 1;
|
|
4198
|
-
return
|
|
4547
|
+
return cap2(score, 10);
|
|
4199
4548
|
}
|
|
4200
4549
|
function scoreVisibleDateSignal(html) {
|
|
4201
4550
|
let score = 0;
|
|
@@ -4214,11 +4563,11 @@ function scoreVisibleDateSignal(html) {
|
|
|
4214
4563
|
} catch {
|
|
4215
4564
|
}
|
|
4216
4565
|
}
|
|
4217
|
-
return
|
|
4566
|
+
return cap2(score, 10);
|
|
4218
4567
|
}
|
|
4219
4568
|
function scoreCitationReadyWriting(html) {
|
|
4220
4569
|
let score = 0;
|
|
4221
|
-
const text =
|
|
4570
|
+
const text = getTextContent2(html);
|
|
4222
4571
|
const defSentences = text.match(/\b\w+\s+(is\s+(?:a|an)\s|refers\s+to|defined\s+as)\b/gi) || [];
|
|
4223
4572
|
if (defSentences.length >= 3) score += 3;
|
|
4224
4573
|
else if (defSentences.length >= 1) score += 1;
|
|
@@ -4247,7 +4596,7 @@ function scoreCitationReadyWriting(html) {
|
|
|
4247
4596
|
);
|
|
4248
4597
|
if (quotableLines.length >= 2) score += 2;
|
|
4249
4598
|
else if (quotableLines.length >= 1) score += 1;
|
|
4250
|
-
return
|
|
4599
|
+
return cap2(score, 10);
|
|
4251
4600
|
}
|
|
4252
4601
|
function scoreAnswerFirstPlacement(html) {
|
|
4253
4602
|
let score = 0;
|
|
@@ -4258,8 +4607,8 @@ function scoreAnswerFirstPlacement(html) {
|
|
|
4258
4607
|
const first300Words = bodyText.split(/\s+/).slice(0, 300).join(" ");
|
|
4259
4608
|
for (const p of earlyParagraphs) {
|
|
4260
4609
|
const pText = p.replace(/<[^>]*>/g, "").trim();
|
|
4261
|
-
const
|
|
4262
|
-
if (
|
|
4610
|
+
const wordCount2 = pText.split(/\s+/).length;
|
|
4611
|
+
if (wordCount2 >= 40 && wordCount2 <= 80 && first300Words.includes(pText.slice(0, 50))) {
|
|
4263
4612
|
score += 4;
|
|
4264
4613
|
break;
|
|
4265
4614
|
}
|
|
@@ -4280,11 +4629,11 @@ function scoreAnswerFirstPlacement(html) {
|
|
|
4280
4629
|
score += 3;
|
|
4281
4630
|
}
|
|
4282
4631
|
}
|
|
4283
|
-
return
|
|
4632
|
+
return cap2(score, 10);
|
|
4284
4633
|
}
|
|
4285
4634
|
function scoreEvidencePackaging(html) {
|
|
4286
4635
|
let score = 0;
|
|
4287
|
-
const text =
|
|
4636
|
+
const text = getTextContent2(html);
|
|
4288
4637
|
const paragraphs = html.match(/<p[^>]*>[\s\S]*?<\/p>/gi) || [];
|
|
4289
4638
|
let inlineCitations = 0;
|
|
4290
4639
|
for (const p of paragraphs) {
|
|
@@ -4302,11 +4651,11 @@ function scoreEvidencePackaging(html) {
|
|
|
4302
4651
|
const sourcedStats = text.match(/\d+(\.\d+)?(%|\s*(million|billion|thousand|percent))\b[^.]*\b[A-Z][a-z]+\b/gi) || [];
|
|
4303
4652
|
if (sourcedStats.length >= 2) score += 2;
|
|
4304
4653
|
else if (sourcedStats.length >= 1) score += 1;
|
|
4305
|
-
return
|
|
4654
|
+
return cap2(score, 10);
|
|
4306
4655
|
}
|
|
4307
4656
|
function scoreEntityDisambiguation(html) {
|
|
4308
4657
|
let score = 0;
|
|
4309
|
-
const text =
|
|
4658
|
+
const text = getTextContent2(html);
|
|
4310
4659
|
const h1Match = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
|
|
4311
4660
|
if (!h1Match) return 3;
|
|
4312
4661
|
const h1Text = h1Match[1].replace(/<[^>]*>/g, "").trim();
|
|
@@ -4324,11 +4673,11 @@ function scoreEntityDisambiguation(html) {
|
|
|
4324
4673
|
if (/\bunlike\s+\w/i.test(text) || /\bcompared\s+to\s+\w/i.test(text) || /\bnot\s+to\s+be\s+confused\s+with\b/i.test(text)) {
|
|
4325
4674
|
score += 3;
|
|
4326
4675
|
}
|
|
4327
|
-
return
|
|
4676
|
+
return cap2(score, 10);
|
|
4328
4677
|
}
|
|
4329
4678
|
function scoreExtractionFriction(html) {
|
|
4330
4679
|
let score = 0;
|
|
4331
|
-
const text =
|
|
4680
|
+
const text = getTextContent2(html);
|
|
4332
4681
|
const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 5);
|
|
4333
4682
|
const avgLen = sentences.length > 0 ? sentences.map((s) => s.trim().split(/\s+/).length).reduce((a, b) => a + b, 0) / sentences.length : 0;
|
|
4334
4683
|
if (avgLen > 0 && avgLen < 20) score += 3;
|
|
@@ -4351,7 +4700,7 @@ function scoreExtractionFriction(html) {
|
|
|
4351
4700
|
if (/display\s*:\s*none|visibility\s*:\s*hidden/i.test(html) && /<(div|section|p)[^>]*(?:style=["'][^"']*display\s*:\s*none|hidden)[^>]*>/i.test(html)) {
|
|
4352
4701
|
score = Math.max(0, score - 2);
|
|
4353
4702
|
}
|
|
4354
|
-
return
|
|
4703
|
+
return cap2(score, 10);
|
|
4355
4704
|
}
|
|
4356
4705
|
function scoreImageContextAI(html) {
|
|
4357
4706
|
let score = 0;
|
|
@@ -4376,7 +4725,7 @@ function scoreImageContextAI(html) {
|
|
|
4376
4725
|
else if (goodAltCount > 0) score += 1;
|
|
4377
4726
|
const contextualImages = html.match(/<(article|section)[^>]*>[\s\S]*?<img[^>]*>[\s\S]*?<\/\1>/gi) || [];
|
|
4378
4727
|
if (contextualImages.length > 0) score += 3;
|
|
4379
|
-
return
|
|
4728
|
+
return cap2(score, 10);
|
|
4380
4729
|
}
|
|
4381
4730
|
function scoreDuplicateContent(html) {
|
|
4382
4731
|
return scoreDuplicateContentDetailed(html).score;
|
|
@@ -4438,8 +4787,12 @@ var SCORING_FUNCTIONS = {
|
|
|
4438
4787
|
citation_ready_writing: scoreCitationReadyWriting,
|
|
4439
4788
|
answer_first_placement: scoreAnswerFirstPlacement,
|
|
4440
4789
|
evidence_packaging: scoreEvidencePackaging,
|
|
4790
|
+
helpful_purpose_alignment: scoreHelpfulPurposeAlignment,
|
|
4791
|
+
first_hand_experience_signals: scoreFirstHandExperienceSignals,
|
|
4441
4792
|
entity_disambiguation: scoreEntityDisambiguation,
|
|
4442
4793
|
extraction_friction: scoreExtractionFriction,
|
|
4794
|
+
creator_transparency: scoreCreatorTransparency,
|
|
4795
|
+
methodology_transparency: scoreMethodologyTransparency,
|
|
4443
4796
|
image_context_ai: scoreImageContextAI,
|
|
4444
4797
|
duplicate_content: scoreDuplicateContent
|
|
4445
4798
|
};
|
|
@@ -4484,7 +4837,7 @@ function extractTitle(html) {
|
|
|
4484
4837
|
const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
4485
4838
|
return match ? match[1].replace(/\s+/g, " ").trim() : "";
|
|
4486
4839
|
}
|
|
4487
|
-
function
|
|
4840
|
+
function getTextContent3(html) {
|
|
4488
4841
|
return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
|
|
4489
4842
|
}
|
|
4490
4843
|
function countWords2(text) {
|
|
@@ -4544,9 +4897,9 @@ function checkMissingOgTags(html) {
|
|
|
4544
4897
|
}
|
|
4545
4898
|
return null;
|
|
4546
4899
|
}
|
|
4547
|
-
function checkThinContent(
|
|
4548
|
-
if (
|
|
4549
|
-
return { check: "thin-content", label: `Thin content (${
|
|
4900
|
+
function checkThinContent(wordCount2) {
|
|
4901
|
+
if (wordCount2 < 300) {
|
|
4902
|
+
return { check: "thin-content", label: `Thin content (${wordCount2} words)`, severity: "warning" };
|
|
4550
4903
|
}
|
|
4551
4904
|
return null;
|
|
4552
4905
|
}
|
|
@@ -4643,15 +4996,15 @@ function checkNoAnswerBlock(html) {
|
|
|
4643
4996
|
const first300Words = bodyText.split(/\s+/).slice(0, 300).join(" ");
|
|
4644
4997
|
for (const p of earlyParagraphs) {
|
|
4645
4998
|
const pText = p.replace(/<[^>]*>/g, "").trim();
|
|
4646
|
-
const
|
|
4647
|
-
if (
|
|
4999
|
+
const wordCount2 = pText.split(/\s+/).length;
|
|
5000
|
+
if (wordCount2 >= 40 && wordCount2 <= 80 && first300Words.includes(pText.slice(0, 50))) {
|
|
4648
5001
|
return null;
|
|
4649
5002
|
}
|
|
4650
5003
|
}
|
|
4651
5004
|
return { check: "no-answer-block", label: "No short answer block (40-80 words) in first 300 words", severity: "warning" };
|
|
4652
5005
|
}
|
|
4653
5006
|
function checkNoEvidence(html, url) {
|
|
4654
|
-
const text =
|
|
5007
|
+
const text = getTextContent3(html);
|
|
4655
5008
|
const paragraphs = html.match(/<p[^>]*>[\s\S]*?<\/p>/gi) || [];
|
|
4656
5009
|
let inlineCitations = 0;
|
|
4657
5010
|
for (const p of paragraphs) {
|
|
@@ -4665,7 +5018,7 @@ function checkNoEvidence(html, url) {
|
|
|
4665
5018
|
return null;
|
|
4666
5019
|
}
|
|
4667
5020
|
function checkHasCitationReadyContent(html) {
|
|
4668
|
-
const text =
|
|
5021
|
+
const text = getTextContent3(html);
|
|
4669
5022
|
const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 10);
|
|
4670
5023
|
let quotable = 0;
|
|
4671
5024
|
for (const s of sentences) {
|
|
@@ -4690,8 +5043,8 @@ function checkDuplicateContentBlocks(html) {
|
|
|
4690
5043
|
}
|
|
4691
5044
|
function analyzePage(html, url, category) {
|
|
4692
5045
|
const title = extractTitle(html);
|
|
4693
|
-
const textContent =
|
|
4694
|
-
const
|
|
5046
|
+
const textContent = getTextContent3(html);
|
|
5047
|
+
const wordCount2 = countWords2(textContent);
|
|
4695
5048
|
const issues = [];
|
|
4696
5049
|
const strengths = [];
|
|
4697
5050
|
const issueChecks = [
|
|
@@ -4702,7 +5055,7 @@ function analyzePage(html, url, category) {
|
|
|
4702
5055
|
checkNoSchema(html),
|
|
4703
5056
|
checkMissingCanonical(html),
|
|
4704
5057
|
checkMissingOgTags(html),
|
|
4705
|
-
checkThinContent(
|
|
5058
|
+
checkThinContent(wordCount2),
|
|
4706
5059
|
checkImagesMissingAlt(html),
|
|
4707
5060
|
checkNoInternalLinks(html, url),
|
|
4708
5061
|
checkNoAnswerBlock(html),
|
|
@@ -4721,7 +5074,7 @@ function analyzePage(html, url, category) {
|
|
|
4721
5074
|
if (result) strengths.push(result);
|
|
4722
5075
|
}
|
|
4723
5076
|
const { aeoScore, criterionScores } = scorePage(html, url);
|
|
4724
|
-
return { url, title, category, wordCount, issues, strengths, aeoScore, criterionScores };
|
|
5077
|
+
return { url, title, category, wordCount: wordCount2, issues, strengths, aeoScore, criterionScores };
|
|
4725
5078
|
}
|
|
4726
5079
|
function analyzeAllPages(siteData) {
|
|
4727
5080
|
const reviews = [];
|
|
@@ -4760,7 +5113,7 @@ function extractTitle2(html) {
|
|
|
4760
5113
|
const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
4761
5114
|
return match ? match[1].replace(/\s+/g, " ").trim() : "";
|
|
4762
5115
|
}
|
|
4763
|
-
function
|
|
5116
|
+
function getTextContent4(html) {
|
|
4764
5117
|
return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
|
|
4765
5118
|
}
|
|
4766
5119
|
function countWords3(text) {
|
|
@@ -4910,12 +5263,12 @@ function buildLinkGraph(pages, domain, homepageUrl) {
|
|
|
4910
5263
|
const norm = normalizeUrl(url);
|
|
4911
5264
|
if (nodes.has(norm)) continue;
|
|
4912
5265
|
const title = extractTitle2(page.text);
|
|
4913
|
-
const text =
|
|
4914
|
-
const
|
|
5266
|
+
const text = getTextContent4(page.text);
|
|
5267
|
+
const wordCount2 = countWords3(text);
|
|
4915
5268
|
nodes.set(norm, {
|
|
4916
5269
|
url: norm,
|
|
4917
5270
|
title,
|
|
4918
|
-
wordCount,
|
|
5271
|
+
wordCount: wordCount2,
|
|
4919
5272
|
category: page.category || "content",
|
|
4920
5273
|
inDegree: 0,
|
|
4921
5274
|
outDegree: 0,
|
|
@@ -4983,6 +5336,8 @@ var CRITERION_WEIGHTS2 = {
|
|
|
4983
5336
|
qa_content_format: 0.04,
|
|
4984
5337
|
query_answer_alignment: 0.04,
|
|
4985
5338
|
faq_section: 0.03,
|
|
5339
|
+
helpful_purpose_alignment: 0.03,
|
|
5340
|
+
first_hand_experience_signals: 0.03,
|
|
4986
5341
|
// Content Organization (~30%)
|
|
4987
5342
|
entity_consistency: 0.05,
|
|
4988
5343
|
internal_linking: 0.04,
|
|
@@ -4990,30 +5345,32 @@ var CRITERION_WEIGHTS2 = {
|
|
|
4990
5345
|
schema_markup: 0.03,
|
|
4991
5346
|
author_schema_depth: 0.03,
|
|
4992
5347
|
table_list_extractability: 0.03,
|
|
4993
|
-
|
|
4994
|
-
|
|
5348
|
+
creator_transparency: 0.02,
|
|
5349
|
+
methodology_transparency: 0.02,
|
|
5350
|
+
definition_patterns: 0.015,
|
|
5351
|
+
visible_date_signal: 0.015,
|
|
4995
5352
|
semantic_html: 0.02,
|
|
4996
5353
|
clean_html: 0.02,
|
|
4997
5354
|
// Technical Plumbing (~15%)
|
|
4998
5355
|
content_cannibalization: 0.02,
|
|
4999
5356
|
duplicate_content: 0.05,
|
|
5000
5357
|
cross_page_duplication: 0.03,
|
|
5001
|
-
llms_txt: 0.
|
|
5002
|
-
robots_txt: 0.
|
|
5358
|
+
llms_txt: 0.01,
|
|
5359
|
+
robots_txt: 0.01,
|
|
5003
5360
|
content_velocity: 0.02,
|
|
5004
|
-
content_licensing: 0.
|
|
5361
|
+
content_licensing: 0.01,
|
|
5005
5362
|
sitemap_completeness: 0.01,
|
|
5006
|
-
canonical_url:
|
|
5007
|
-
rss_feed:
|
|
5008
|
-
schema_coverage:
|
|
5009
|
-
speakable_schema:
|
|
5363
|
+
canonical_url: 5e-3,
|
|
5364
|
+
rss_feed: 5e-3,
|
|
5365
|
+
schema_coverage: 5e-3,
|
|
5366
|
+
speakable_schema: 5e-3,
|
|
5010
5367
|
// V2 Criteria (~15%)
|
|
5011
5368
|
citation_ready_writing: 0.04,
|
|
5012
5369
|
answer_first_placement: 0.03,
|
|
5013
5370
|
evidence_packaging: 0.03,
|
|
5014
5371
|
entity_disambiguation: 0.02,
|
|
5015
5372
|
extraction_friction: 0.02,
|
|
5016
|
-
image_context_ai:
|
|
5373
|
+
image_context_ai: 5e-3
|
|
5017
5374
|
};
|
|
5018
5375
|
var PHASE_CONFIG = [
|
|
5019
5376
|
{
|
|
@@ -5043,6 +5400,8 @@ var PHASE_CONFIG = [
|
|
|
5043
5400
|
"answer_first_placement",
|
|
5044
5401
|
"evidence_packaging",
|
|
5045
5402
|
"entity_disambiguation",
|
|
5403
|
+
"helpful_purpose_alignment",
|
|
5404
|
+
"first_hand_experience_signals",
|
|
5046
5405
|
"duplicate_content",
|
|
5047
5406
|
"cross_page_duplication"
|
|
5048
5407
|
]
|
|
@@ -5056,6 +5415,8 @@ var PHASE_CONFIG = [
|
|
|
5056
5415
|
"schema_coverage",
|
|
5057
5416
|
"speakable_schema",
|
|
5058
5417
|
"author_schema_depth",
|
|
5418
|
+
"creator_transparency",
|
|
5419
|
+
"methodology_transparency",
|
|
5059
5420
|
"content_licensing",
|
|
5060
5421
|
"entity_consistency",
|
|
5061
5422
|
"semantic_html",
|
|
@@ -5078,7 +5439,7 @@ function impactFromScore(score) {
|
|
|
5078
5439
|
}
|
|
5079
5440
|
function effortForCriterion(criterion, score) {
|
|
5080
5441
|
const trivialCriteria = ["llms_txt", "robots_txt", "canonical_url", "content_licensing", "visible_date_signal"];
|
|
5081
|
-
const lowCriteria = ["rss_feed", "sitemap_completeness", "speakable_schema", "author_schema_depth", "semantic_html", "definition_patterns", "content_freshness"];
|
|
5442
|
+
const lowCriteria = ["rss_feed", "sitemap_completeness", "speakable_schema", "author_schema_depth", "creator_transparency", "methodology_transparency", "semantic_html", "definition_patterns", "content_freshness"];
|
|
5082
5443
|
const highCriteria = ["original_data", "content_velocity", "content_cannibalization"];
|
|
5083
5444
|
if (trivialCriteria.includes(criterion)) return score <= 3 ? "low" : "trivial";
|
|
5084
5445
|
if (lowCriteria.includes(criterion)) return score <= 3 ? "medium" : "low";
|
|
@@ -5345,6 +5706,58 @@ Sitemap: https://example.com/sitemap.xml`,
|
|
|
5345
5706
|
pageCount: affected?.length
|
|
5346
5707
|
}];
|
|
5347
5708
|
},
|
|
5709
|
+
helpful_purpose_alignment: (c, pages) => {
|
|
5710
|
+
if (c.score >= 10) return [];
|
|
5711
|
+
const impact = impactFromScore(c.score);
|
|
5712
|
+
const effort = effortForCriterion("helpful_purpose_alignment", c.score);
|
|
5713
|
+
const affected = getAffectedPages("helpful_purpose_alignment", pages);
|
|
5714
|
+
return [{
|
|
5715
|
+
id: "fix-helpful-purpose-alignment",
|
|
5716
|
+
criterion: c.criterion_label,
|
|
5717
|
+
criterionId: c.criterion,
|
|
5718
|
+
title: "Make pages solve the user task faster",
|
|
5719
|
+
description: "Reduce search-first filler and rewrite pages so the promised task is resolved quickly with concrete guidance, tradeoffs, and next steps.",
|
|
5720
|
+
impact,
|
|
5721
|
+
effort,
|
|
5722
|
+
impactScore: 0,
|
|
5723
|
+
category: "content",
|
|
5724
|
+
steps: [
|
|
5725
|
+
"Rewrite first paragraphs to answer the user need within the first 150-300 words",
|
|
5726
|
+
'Remove generic intros like "In this guide" and broad filler that could fit any topic',
|
|
5727
|
+
"Add concrete decision help: tradeoffs, risks, constraints, and next steps",
|
|
5728
|
+
"Move aggressive CTAs below the first useful answer block"
|
|
5729
|
+
],
|
|
5730
|
+
successCriteria: "Pages lead with task-solving guidance instead of generic search-first framing",
|
|
5731
|
+
affectedPages: affected,
|
|
5732
|
+
pageCount: affected?.length
|
|
5733
|
+
}];
|
|
5734
|
+
},
|
|
5735
|
+
first_hand_experience_signals: (c, pages) => {
|
|
5736
|
+
if (c.score >= 10) return [];
|
|
5737
|
+
const impact = impactFromScore(c.score);
|
|
5738
|
+
const effort = effortForCriterion("first_hand_experience_signals", c.score);
|
|
5739
|
+
const affected = getAffectedPages("first_hand_experience_signals", pages);
|
|
5740
|
+
return [{
|
|
5741
|
+
id: "fix-first-hand-experience",
|
|
5742
|
+
criterion: c.criterion_label,
|
|
5743
|
+
criterionId: c.criterion,
|
|
5744
|
+
title: "Add first-hand experience signals",
|
|
5745
|
+
description: "Show real use, testing, implementation, or lived experience instead of relying on generic summary content.",
|
|
5746
|
+
impact,
|
|
5747
|
+
effort,
|
|
5748
|
+
impactScore: 0,
|
|
5749
|
+
category: "content",
|
|
5750
|
+
steps: [
|
|
5751
|
+
"Add specific observations from real use, testing, or implementation",
|
|
5752
|
+
"Document limitations, edge cases, or lessons learned in practice",
|
|
5753
|
+
"Include screenshots, photos, before/after metrics, or original artifacts where relevant",
|
|
5754
|
+
"Rewrite generic sections to reflect direct experience with the subject matter"
|
|
5755
|
+
],
|
|
5756
|
+
successCriteria: "Key pages contain credible signs of direct use or observation, not just generic advice",
|
|
5757
|
+
affectedPages: affected,
|
|
5758
|
+
pageCount: affected?.length
|
|
5759
|
+
}];
|
|
5760
|
+
},
|
|
5348
5761
|
original_data: (c, pages) => {
|
|
5349
5762
|
if (c.score >= 10) return [];
|
|
5350
5763
|
const impact = impactFromScore(c.score);
|
|
@@ -5711,6 +6124,58 @@ Summarization: yes`,
|
|
|
5711
6124
|
successCriteria: "Articles have Person schema for authors with credentials"
|
|
5712
6125
|
}];
|
|
5713
6126
|
},
|
|
6127
|
+
creator_transparency: (c, pages) => {
|
|
6128
|
+
if (c.score >= 10) return [];
|
|
6129
|
+
const impact = impactFromScore(c.score);
|
|
6130
|
+
const effort = effortForCriterion("creator_transparency", c.score);
|
|
6131
|
+
const affected = getAffectedPages("creator_transparency", pages);
|
|
6132
|
+
return [{
|
|
6133
|
+
id: "fix-creator-transparency",
|
|
6134
|
+
criterion: c.criterion_label,
|
|
6135
|
+
criterionId: c.criterion,
|
|
6136
|
+
title: "Make content creators clearly visible",
|
|
6137
|
+
description: "Add visible bylines, author pages, and reviewer/editor attribution so readers can clearly tell who created the content.",
|
|
6138
|
+
impact,
|
|
6139
|
+
effort,
|
|
6140
|
+
impactScore: 0,
|
|
6141
|
+
category: "trust",
|
|
6142
|
+
steps: [
|
|
6143
|
+
"Add visible bylines to article-like pages where readers expect them",
|
|
6144
|
+
"Link author names to author pages with role, expertise area, and relevant background",
|
|
6145
|
+
"Add reviewer or editor attribution on sensitive or expert content",
|
|
6146
|
+
"Keep visible creator identity consistent with schema markup"
|
|
6147
|
+
],
|
|
6148
|
+
successCriteria: "Article-like pages have clear visible bylines and linked creator context",
|
|
6149
|
+
affectedPages: affected,
|
|
6150
|
+
pageCount: affected?.length
|
|
6151
|
+
}];
|
|
6152
|
+
},
|
|
6153
|
+
methodology_transparency: (c, pages) => {
|
|
6154
|
+
if (c.score >= 10) return [];
|
|
6155
|
+
const impact = impactFromScore(c.score);
|
|
6156
|
+
const effort = effortForCriterion("methodology_transparency", c.score);
|
|
6157
|
+
const affected = getAffectedPages("methodology_transparency", pages);
|
|
6158
|
+
return [{
|
|
6159
|
+
id: "fix-methodology-transparency",
|
|
6160
|
+
criterion: c.criterion_label,
|
|
6161
|
+
criterionId: c.criterion,
|
|
6162
|
+
title: "Explain how content was tested or reviewed",
|
|
6163
|
+
description: "Add methodology, criteria, testing, review, or update-process details where users would expect them.",
|
|
6164
|
+
impact,
|
|
6165
|
+
effort,
|
|
6166
|
+
impactScore: 0,
|
|
6167
|
+
category: "trust",
|
|
6168
|
+
steps: [
|
|
6169
|
+
'Add a "How we tested", "Methodology", or review-process section where relevant',
|
|
6170
|
+
"Document criteria, tools used, sample size, timeframe, or update policy",
|
|
6171
|
+
"Disclose AI assistance when a reasonable reader would expect that context",
|
|
6172
|
+
"Support methodology notes with screenshots, tables, or process artifacts when possible"
|
|
6173
|
+
],
|
|
6174
|
+
successCriteria: "Review, comparison, and research-style pages explain how conclusions were produced",
|
|
6175
|
+
affectedPages: affected,
|
|
6176
|
+
pageCount: affected?.length
|
|
6177
|
+
}];
|
|
6178
|
+
},
|
|
5714
6179
|
fact_density: (c, pages) => {
|
|
5715
6180
|
if (c.score >= 10) return [];
|
|
5716
6181
|
const impact = impactFromScore(c.score);
|
|
@@ -6356,6 +6821,13 @@ function isSpaShell(html) {
|
|
|
6356
6821
|
return SPA_INDICATORS.some((pattern) => pattern.test(html));
|
|
6357
6822
|
}
|
|
6358
6823
|
async function fetchWithHeadless(url, options) {
|
|
6824
|
+
let expectedDomain;
|
|
6825
|
+
try {
|
|
6826
|
+
expectedDomain = normalizeHostname(new URL(url).hostname);
|
|
6827
|
+
} catch {
|
|
6828
|
+
return null;
|
|
6829
|
+
}
|
|
6830
|
+
if (!await isSafeFetchTarget(url, expectedDomain)) return null;
|
|
6359
6831
|
let puppeteer;
|
|
6360
6832
|
try {
|
|
6361
6833
|
const mod = "puppeteer";
|
|
@@ -6382,12 +6854,28 @@ async function fetchWithHeadless(url, options) {
|
|
|
6382
6854
|
const page = await browser.newPage();
|
|
6383
6855
|
await page.setRequestInterception(true);
|
|
6384
6856
|
page.on("request", (req) => {
|
|
6385
|
-
|
|
6386
|
-
|
|
6387
|
-
|
|
6388
|
-
|
|
6389
|
-
|
|
6390
|
-
|
|
6857
|
+
void (async () => {
|
|
6858
|
+
const alreadyHandled = typeof req.isInterceptResolutionHandled === "function" ? req.isInterceptResolutionHandled() : false;
|
|
6859
|
+
if (alreadyHandled) return;
|
|
6860
|
+
if (!await isSafeFetchTarget(req.url(), expectedDomain)) {
|
|
6861
|
+
try {
|
|
6862
|
+
if (!req.isInterceptResolutionHandled?.()) await req.abort();
|
|
6863
|
+
} catch {
|
|
6864
|
+
}
|
|
6865
|
+
return;
|
|
6866
|
+
}
|
|
6867
|
+
const type = req.resourceType();
|
|
6868
|
+
try {
|
|
6869
|
+
if (!req.isInterceptResolutionHandled?.()) {
|
|
6870
|
+
if (["image", "font", "media", "stylesheet"].includes(type)) {
|
|
6871
|
+
await req.abort();
|
|
6872
|
+
} else {
|
|
6873
|
+
await req.continue();
|
|
6874
|
+
}
|
|
6875
|
+
}
|
|
6876
|
+
} catch {
|
|
6877
|
+
}
|
|
6878
|
+
})();
|
|
6391
6879
|
});
|
|
6392
6880
|
await page.setUserAgent("AEO-Visibility-Bot/1.0");
|
|
6393
6881
|
await page.goto(url, { waitUntil: "networkidle2", timeout });
|
|
@@ -6400,6 +6888,7 @@ async function fetchWithHeadless(url, options) {
|
|
|
6400
6888
|
}
|
|
6401
6889
|
const html = await page.content();
|
|
6402
6890
|
const finalUrl = page.url();
|
|
6891
|
+
if (!await isSafeFetchTarget(finalUrl, expectedDomain)) return null;
|
|
6403
6892
|
return {
|
|
6404
6893
|
text: html.slice(0, 5e5),
|
|
6405
6894
|
status: 200,
|
|
@@ -6422,6 +6911,10 @@ function getTextLength(html) {
|
|
|
6422
6911
|
return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim().length;
|
|
6423
6912
|
}
|
|
6424
6913
|
async function audit(domain, options) {
|
|
6914
|
+
const normalizedTarget = domain.startsWith("http") ? domain : `https://${domain}`;
|
|
6915
|
+
if (!await isSafeFetchTarget(normalizedTarget)) {
|
|
6916
|
+
throw new Error(`Refusing to audit private or local address: ${domain}`);
|
|
6917
|
+
}
|
|
6425
6918
|
const startTime = Date.now();
|
|
6426
6919
|
let renderedWithHeadless = false;
|
|
6427
6920
|
const siteData = await prefetchSiteData(domain);
|
|
@@ -6454,7 +6947,7 @@ async function audit(domain, options) {
|
|
|
6454
6947
|
}
|
|
6455
6948
|
}
|
|
6456
6949
|
if (options?.fullCrawl) {
|
|
6457
|
-
const { crawlFullSite: crawlFullSite2 } = await import("./full-site-crawler-
|
|
6950
|
+
const { crawlFullSite: crawlFullSite2 } = await import("./full-site-crawler-HAF2X2X3.js");
|
|
6458
6951
|
const crawlResult = await crawlFullSite2(siteData, {
|
|
6459
6952
|
maxPages: options.maxPages ?? 200,
|
|
6460
6953
|
concurrency: options.concurrency ?? 5
|