aeorank 3.1.1 → 3.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +63 -43
- package/dist/browser.d.ts +4 -4
- package/dist/browser.js +650 -157
- package/dist/browser.js.map +1 -1
- package/dist/{chunk-RYV25AUV.js → chunk-DW7MPQ4X.js} +188 -30
- package/dist/chunk-DW7MPQ4X.js.map +1 -0
- package/dist/chunk-PYV5JVTC.js +179 -0
- package/dist/chunk-PYV5JVTC.js.map +1 -0
- package/dist/cli.js +519 -140
- package/dist/cli.js.map +1 -1
- package/dist/{full-site-crawler-TQ35TB2X.js → full-site-crawler-HAF2X2X3.js} +2 -2
- package/dist/{full-site-crawler-OBECS7AT.js → full-site-crawler-W3WSE6WT.js} +18 -30
- package/dist/full-site-crawler-W3WSE6WT.js.map +1 -0
- package/dist/index.cjs +837 -183
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +4 -4
- package/dist/index.d.ts +4 -4
- package/dist/index.js +650 -157
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
- package/dist/chunk-RYV25AUV.js.map +0 -1
- package/dist/full-site-crawler-OBECS7AT.js.map +0 -1
- /package/dist/{full-site-crawler-TQ35TB2X.js.map → full-site-crawler-HAF2X2X3.js.map} +0 -0
package/dist/index.js
CHANGED
|
@@ -2,8 +2,12 @@ import {
|
|
|
2
2
|
crawlFullSite,
|
|
3
3
|
extractAllUrlsFromSitemap,
|
|
4
4
|
extractInternalLinks,
|
|
5
|
-
inferCategory
|
|
6
|
-
|
|
5
|
+
inferCategory,
|
|
6
|
+
isSafeFetchTarget,
|
|
7
|
+
isSafePublicUrl,
|
|
8
|
+
normalizeHostname,
|
|
9
|
+
safeFetch
|
|
10
|
+
} from "./chunk-DW7MPQ4X.js";
|
|
7
11
|
|
|
8
12
|
// src/parked-domain.ts
|
|
9
13
|
var PARKING_PATHS = ["/lander", "/parking", "/park", "/sedoparking"];
|
|
@@ -147,19 +151,181 @@ function shingleJaccardSimilarity(a, b) {
|
|
|
147
151
|
return union === 0 ? 0 : intersection / union;
|
|
148
152
|
}
|
|
149
153
|
|
|
154
|
+
// src/helpful-content.ts
|
|
155
|
+
function cap(value, max) {
|
|
156
|
+
return Math.min(max, value);
|
|
157
|
+
}
|
|
158
|
+
function floor(value, min) {
|
|
159
|
+
return Math.max(min, value);
|
|
160
|
+
}
|
|
161
|
+
function countMatches(text, pattern) {
|
|
162
|
+
return text.match(pattern)?.length ?? 0;
|
|
163
|
+
}
|
|
164
|
+
function stripScriptsAndStyles(html) {
|
|
165
|
+
return html.replace(/<script[\s\S]*?<\/script>/gi, " ").replace(/<style[\s\S]*?<\/style>/gi, " ");
|
|
166
|
+
}
|
|
167
|
+
function getTextContent(html) {
|
|
168
|
+
return stripScriptsAndStyles(html).replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
|
|
169
|
+
}
|
|
170
|
+
function getBodyHtml(html) {
|
|
171
|
+
const bodyMatch = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
|
|
172
|
+
return bodyMatch ? bodyMatch[1] : html;
|
|
173
|
+
}
|
|
174
|
+
function getFirstParagraphText(html) {
|
|
175
|
+
const firstPara = getBodyHtml(html).match(/<p[^>]*>([\s\S]*?)<\/p>/i);
|
|
176
|
+
return firstPara ? firstPara[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() : "";
|
|
177
|
+
}
|
|
178
|
+
function firstNWords(text, count) {
|
|
179
|
+
return text.split(/\s+/).slice(0, count).join(" ");
|
|
180
|
+
}
|
|
181
|
+
function getH1Text(html) {
|
|
182
|
+
const match = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
|
|
183
|
+
return match ? match[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() : "";
|
|
184
|
+
}
|
|
185
|
+
function getTitleText(html) {
|
|
186
|
+
const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
187
|
+
return match ? match[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() : "";
|
|
188
|
+
}
|
|
189
|
+
function wordCount(text) {
|
|
190
|
+
return text ? text.split(/\s+/).filter(Boolean).length : 0;
|
|
191
|
+
}
|
|
192
|
+
function isContentLikePage(html, url) {
|
|
193
|
+
const text = getTextContent(html);
|
|
194
|
+
const wc = wordCount(text);
|
|
195
|
+
let signals = 0;
|
|
196
|
+
if (url && /\/(?:blog|article|articles|guide|guides|docs|learn|help|news|insights|resources|how-to|tutorial|case-stud|whitepaper|faq)\b/i.test(url)) {
|
|
197
|
+
signals += 2;
|
|
198
|
+
}
|
|
199
|
+
if (/<article[\s>]/i.test(html)) signals += 1;
|
|
200
|
+
if ((html.match(/<h[2-3][^>]*>/gi) || []).length >= 2) signals += 1;
|
|
201
|
+
if (wc >= 500) signals += 1;
|
|
202
|
+
if (/<time[\s>]/i.test(html) || /datePublished|dateModified/i.test(html)) signals += 1;
|
|
203
|
+
if (/written\s+by|authored?\s+by|reviewed\s+by|medically\s+reviewed/i.test(text)) signals += 1;
|
|
204
|
+
return signals >= 2;
|
|
205
|
+
}
|
|
206
|
+
function expectsMethodology(html, url) {
|
|
207
|
+
const text = getTextContent(html);
|
|
208
|
+
const title = `${getTitleText(html)} ${getH1Text(html)}`.toLowerCase();
|
|
209
|
+
const urlText = (url || "").toLowerCase();
|
|
210
|
+
if (/(?:review|compare|comparison|vs\.?|best|benchmark|study|analysis|survey|report|research|tested|test|methodology)/i.test(title)) {
|
|
211
|
+
return true;
|
|
212
|
+
}
|
|
213
|
+
if (/(?:review|compare|comparison|benchmark|study|analysis|survey|research|report|best)/i.test(urlText)) {
|
|
214
|
+
return true;
|
|
215
|
+
}
|
|
216
|
+
return /\b(methodology|how we tested|how we reviewed|our testing|sample size|dataset|benchmark|editorial policy|review process)\b/i.test(text);
|
|
217
|
+
}
|
|
218
|
+
function titleAndBodyAlign(html) {
|
|
219
|
+
const h1 = getH1Text(html);
|
|
220
|
+
const title = getTitleText(html);
|
|
221
|
+
const text = firstNWords(getTextContent(html), 250).toLowerCase();
|
|
222
|
+
const topic = `${title} ${h1}`.toLowerCase();
|
|
223
|
+
const keywords = topic.split(/[\s|:()\-/]+/).filter((w) => w.length >= 5 && !/^(about|guide|complete|ultimate|best|learn|understand|what|when|where|which|their|there|these|those)$/i.test(w));
|
|
224
|
+
const uniqueKeywords = [...new Set(keywords)];
|
|
225
|
+
if (uniqueKeywords.length === 0) return false;
|
|
226
|
+
return uniqueKeywords.filter((w) => text.includes(w)).length >= Math.min(2, uniqueKeywords.length);
|
|
227
|
+
}
|
|
228
|
+
var GENERIC_OPENERS = /^(?:in today'?s|it is important to understand|in this (?:article|guide|post)|whether you are|have you ever|welcome to|if you'?re looking|in the modern|in the digital age)/i;
|
|
229
|
+
var PRACTICAL_LANGUAGE = /\b(?:here'?s what to do|choose\s+\w+\s+if|avoid\s+\w+\s+when|the main risk is|the fastest option is|next step(?:s)?|best option|lowest risk|good fit if|not a good fit if|what to do next)\b/gi;
|
|
230
|
+
var TRADEOFF_LANGUAGE = /\b(?:however|but|trade-?off|caveat|limitation|downside|upside|risk|benefit|pros?\b|cons?\b|watch out|be careful|unless)\b/gi;
|
|
231
|
+
var FLUFF_LANGUAGE = /\b(?:comprehensive guide|ever-evolving|fast-paced world|unlock(?:ing)? the power|in conclusion|delve into|navigate the landscape|journey|seamless|robust solution)\b/gi;
|
|
232
|
+
var EARLY_CTA_PATTERN = /\b(?:sign up|get started|book (?:a )?demo|contact us|schedule (?:a )?call|buy now|start free|request a quote|talk to sales|subscribe)\b/gi;
|
|
233
|
+
var FIRST_HAND_ACTIONS = /\b(?:we|i)\s+(?:tested|used|reviewed|implemented|measured|compared|observed|deployed|migrated|surveyed|analyzed|audited|interviewed)\b/gi;
|
|
234
|
+
var EXPERIENCE_CONTEXT = /\b(?:in our|during our|for our|in practice|in production|in the field|in our clinic|during implementation|over \d+\s+(?:days?|weeks?|months?)|across \d+\s+(?:accounts?|customers?|patients?|sites?|campaigns?)|with \d+\s+(?:participants?|users?|patients?|samples?))\b/gi;
|
|
235
|
+
var EXPERIENCE_ARTIFACTS = /\b(?:screenshot|photo|benchmark|timeline|before\/after|our results|original chart|field note|walkthrough)\b/gi;
|
|
236
|
+
var LIMITATION_LANGUAGE = /\b(?:limitation|limitations|we found that|we learned|one caveat|did not work|failed|issue we hit|edge case|unexpectedly|drawback)\b/gi;
|
|
237
|
+
var AUTHOR_LINK_PATTERN = /<a[^>]*href=["'][^"']*\/(?:author|authors|team|people|staff|reviewers?)\b[^"']*["'][^>]*>/i;
|
|
238
|
+
var BYLINE_PATTERN = /\b(?:written|authored|reviewed|edited|medically reviewed)\s+by\b/i;
|
|
239
|
+
var AUTHOR_BIO_PATTERN = /\b(?:about the author|author bio|reviewed by|medically reviewed by|board-certified|licensed|credentials?|specializes in|specialist in)\b/i;
|
|
240
|
+
var METHODOLOGY_TERMS = /\b(?:methodology|how we tested|how we reviewed|our methodology|testing process|review process|editorial policy|sample size|data collection|timeframe|criteria used|tools used|benchmark method|updated using|last reviewed|ai-assisted|ai assisted)\b/gi;
|
|
241
|
+
var METHODOLOGY_DETAIL = /\b(?:sample size|participants?|dataset|timeframe|criteria|weights?|tool(?:s)? used|measured over|tested over|reviewed on|last updated|last reviewed|scoring method)\b/gi;
|
|
242
|
+
function scoreHelpfulPurposeAlignment(html, url) {
|
|
243
|
+
const text = getTextContent(html);
|
|
244
|
+
if (!text) return 0;
|
|
245
|
+
const contentLike = isContentLikePage(html, url);
|
|
246
|
+
if (!contentLike && wordCount(text) < 250) return 5;
|
|
247
|
+
let score = contentLike ? 3 : 5;
|
|
248
|
+
const firstPara = getFirstParagraphText(html);
|
|
249
|
+
const earlyText = firstNWords(text, 300);
|
|
250
|
+
const bodyHtml = getBodyHtml(html);
|
|
251
|
+
if (firstPara && !GENERIC_OPENERS.test(firstPara)) score += 2;
|
|
252
|
+
if (countMatches(earlyText, PRACTICAL_LANGUAGE) >= 1) score += 2;
|
|
253
|
+
else if (countMatches(text, PRACTICAL_LANGUAGE) >= 2) score += 1;
|
|
254
|
+
const tradeoffCount = countMatches(text, TRADEOFF_LANGUAGE);
|
|
255
|
+
if (tradeoffCount >= 2) score += 2;
|
|
256
|
+
else if (tradeoffCount >= 1) score += 1;
|
|
257
|
+
if (titleAndBodyAlign(html)) score += 1;
|
|
258
|
+
if (/\b(?:bottom line|key takeaway|here'?s the short answer|next steps?)\b/i.test(text)) score += 1;
|
|
259
|
+
if (firstPara && GENERIC_OPENERS.test(firstPara)) score -= 2;
|
|
260
|
+
const earlyBodyHtml = bodyHtml.slice(0, 1800);
|
|
261
|
+
const earlyCtas = countMatches(earlyBodyHtml, EARLY_CTA_PATTERN);
|
|
262
|
+
if (earlyCtas >= 3) score -= 2;
|
|
263
|
+
else if (earlyCtas >= 2) score -= 1;
|
|
264
|
+
const fluffCount = countMatches(text, FLUFF_LANGUAGE);
|
|
265
|
+
if (fluffCount >= 3) score -= 2;
|
|
266
|
+
else if (fluffCount >= 1) score -= 1;
|
|
267
|
+
return floor(cap(score, 10), 0);
|
|
268
|
+
}
|
|
269
|
+
function scoreFirstHandExperienceSignals(html, url) {
|
|
270
|
+
const text = getTextContent(html);
|
|
271
|
+
if (!text) return 0;
|
|
272
|
+
const contentLike = isContentLikePage(html, url);
|
|
273
|
+
let score = contentLike ? 2 : 5;
|
|
274
|
+
const actionCount = countMatches(text, FIRST_HAND_ACTIONS);
|
|
275
|
+
if (actionCount >= 3) score += 4;
|
|
276
|
+
else if (actionCount >= 1) score += 2;
|
|
277
|
+
const contextCount = countMatches(text, EXPERIENCE_CONTEXT);
|
|
278
|
+
if (contextCount >= 2) score += 2;
|
|
279
|
+
else if (contextCount >= 1) score += 1;
|
|
280
|
+
const artifactCount = countMatches(text, EXPERIENCE_ARTIFACTS) + countMatches(html, /<figure|<figcaption/gi);
|
|
281
|
+
if (artifactCount >= 3) score += 2;
|
|
282
|
+
else if (artifactCount >= 1) score += 1;
|
|
283
|
+
const limitationCount = countMatches(text, LIMITATION_LANGUAGE);
|
|
284
|
+
if (limitationCount >= 2) score += 2;
|
|
285
|
+
else if (limitationCount >= 1) score += 1;
|
|
286
|
+
if (/\b(?:manufacturer|vendor)\s+(?:description|specification|copy)\b/i.test(text)) score -= 1;
|
|
287
|
+
return floor(cap(score, 10), 0);
|
|
288
|
+
}
|
|
289
|
+
function scoreCreatorTransparency(html, url) {
|
|
290
|
+
const text = getTextContent(html);
|
|
291
|
+
if (!text) return 0;
|
|
292
|
+
const contentLike = isContentLikePage(html, url);
|
|
293
|
+
if (!contentLike) return 5;
|
|
294
|
+
let score = 0;
|
|
295
|
+
const hasByline = BYLINE_PATTERN.test(text) || /class=["'][^"']*author[^"']*["']/i.test(html) || /rel=["']author["']/i.test(html);
|
|
296
|
+
const hasPersonSchema = /"@type"\s*:\s*"Person"/i.test(html);
|
|
297
|
+
if (hasByline) score += 3;
|
|
298
|
+
if (AUTHOR_LINK_PATTERN.test(html)) score += 2;
|
|
299
|
+
if (AUTHOR_BIO_PATTERN.test(text)) score += 2;
|
|
300
|
+
if (/\b(?:reviewed by|edited by|medically reviewed by)\b/i.test(text)) score += 1;
|
|
301
|
+
if (hasPersonSchema) score += 2;
|
|
302
|
+
return floor(cap(score, 10), 0);
|
|
303
|
+
}
|
|
304
|
+
function scoreMethodologyTransparency(html, url) {
|
|
305
|
+
const text = getTextContent(html);
|
|
306
|
+
if (!text) return 0;
|
|
307
|
+
const contentLike = isContentLikePage(html, url);
|
|
308
|
+
const expected = expectsMethodology(html, url);
|
|
309
|
+
let score = expected ? 2 : contentLike ? 5 : 5;
|
|
310
|
+
const methodologyCount = countMatches(text, METHODOLOGY_TERMS);
|
|
311
|
+
if (methodologyCount >= 2) score += 3;
|
|
312
|
+
else if (methodologyCount >= 1) score += 2;
|
|
313
|
+
const detailCount = countMatches(text, METHODOLOGY_DETAIL);
|
|
314
|
+
if (detailCount >= 3) score += 3;
|
|
315
|
+
else if (detailCount >= 2) score += 2;
|
|
316
|
+
else if (detailCount >= 1) score += 1;
|
|
317
|
+
if (/\b(?:tested|reviewed|analyzed)\s+\d+|\bacross\s+\d+|\bover\s+\d+\s+(?:days?|weeks?|months?)|\busing\s+\d+\s+\w+/i.test(text)) score += 1;
|
|
318
|
+
if (/<figure|<table/i.test(html) && methodologyCount >= 1) score += 1;
|
|
319
|
+
if (/\b(?:ai-assisted|ai assisted|reviewed by an editor|human reviewed)\b/i.test(text)) score += 1;
|
|
320
|
+
return floor(cap(score, 10), 0);
|
|
321
|
+
}
|
|
322
|
+
|
|
150
323
|
// src/site-crawler.ts
|
|
151
|
-
async function fetchText(url) {
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
redirect: "follow"
|
|
157
|
-
});
|
|
158
|
-
const text = await res.text();
|
|
159
|
-
return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
|
|
160
|
-
} catch {
|
|
161
|
-
return null;
|
|
162
|
-
}
|
|
324
|
+
async function fetchText(url, expectedDomain) {
|
|
325
|
+
const res = await safeFetch(url, { timeoutMs: 15e3, expectedDomain });
|
|
326
|
+
if (!res) return null;
|
|
327
|
+
const text = await res.text();
|
|
328
|
+
return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
|
|
163
329
|
}
|
|
164
330
|
function extractDomain(url) {
|
|
165
331
|
return url.replace(/^https?:\/\//, "").replace(/\/.*/, "").replace(/:[0-9]+$/, "").replace(/^www\./, "").toLowerCase();
|
|
@@ -200,13 +366,16 @@ function isHtmlResponse(result) {
|
|
|
200
366
|
return trimmed.startsWith("<!doctype html") || trimmed.startsWith("<html") || /<head[\s>]/i.test(trimmed);
|
|
201
367
|
}
|
|
202
368
|
async function prefetchSiteData(domain) {
|
|
369
|
+
if (!await isSafeFetchTarget(`https://${domain}`)) {
|
|
370
|
+
return { domain, protocol: null, homepage: null, llmsTxt: null, robotsTxt: null, faqPage: null, sitemapXml: null, rssFeed: null, aiTxt: null, redirectedTo: null, parkedReason: null, blogSample: [] };
|
|
371
|
+
}
|
|
203
372
|
let protocol = null;
|
|
204
373
|
let homepage = null;
|
|
205
|
-
homepage = await fetchText(`https://${domain}
|
|
374
|
+
homepage = await fetchText(`https://${domain}`, domain);
|
|
206
375
|
if (homepage && homepage.status >= 200 && homepage.status < 400) {
|
|
207
376
|
protocol = "https";
|
|
208
377
|
} else {
|
|
209
|
-
homepage = await fetchText(`http://${domain}
|
|
378
|
+
homepage = await fetchText(`http://${domain}`, domain);
|
|
210
379
|
if (homepage && homepage.status >= 200 && homepage.status < 400) {
|
|
211
380
|
protocol = "http";
|
|
212
381
|
}
|
|
@@ -226,38 +395,38 @@ async function prefetchSiteData(domain) {
|
|
|
226
395
|
}
|
|
227
396
|
const baseUrl = `${protocol}://${domain}`;
|
|
228
397
|
const [llmsTxt, robotsTxt, faqPage, sitemapXml, aiTxt] = await Promise.all([
|
|
229
|
-
fetchText(`${baseUrl}/llms.txt
|
|
230
|
-
fetchText(`${baseUrl}/robots.txt
|
|
231
|
-
fetchText(`${baseUrl}/faq
|
|
398
|
+
fetchText(`${baseUrl}/llms.txt`, domain),
|
|
399
|
+
fetchText(`${baseUrl}/robots.txt`, domain),
|
|
400
|
+
fetchText(`${baseUrl}/faq`, domain).then(async (result) => {
|
|
232
401
|
if (result && result.status === 200) return result;
|
|
233
402
|
for (const path of ["/frequently-asked-questions", "/help", "/support", "/help-center"]) {
|
|
234
|
-
const fallback = await fetchText(`${baseUrl}${path}
|
|
403
|
+
const fallback = await fetchText(`${baseUrl}${path}`, domain);
|
|
235
404
|
if (fallback && fallback.status === 200) return fallback;
|
|
236
405
|
}
|
|
237
406
|
return result;
|
|
238
407
|
}),
|
|
239
|
-
fetchText(`${baseUrl}/sitemap.xml
|
|
240
|
-
fetchText(`${baseUrl}/ai.txt
|
|
408
|
+
fetchText(`${baseUrl}/sitemap.xml`, domain),
|
|
409
|
+
fetchText(`${baseUrl}/ai.txt`, domain)
|
|
241
410
|
]);
|
|
242
411
|
let rssFeed = null;
|
|
243
412
|
if (homepage) {
|
|
244
413
|
const rssLinkMatch = homepage.text.match(/<link[^>]*type="application\/(?:rss|atom)\+xml"[^>]*href="([^"]*)"[^>]*>/i);
|
|
245
414
|
if (rssLinkMatch) {
|
|
246
415
|
const rssUrl = rssLinkMatch[1].startsWith("http") ? rssLinkMatch[1] : `${baseUrl}${rssLinkMatch[1]}`;
|
|
247
|
-
rssFeed = await fetchText(rssUrl);
|
|
416
|
+
rssFeed = await fetchText(rssUrl, domain);
|
|
248
417
|
}
|
|
249
418
|
if (!rssFeed || rssFeed.status !== 200) {
|
|
250
419
|
for (const path of ["/feed", "/rss.xml", "/feed.xml"]) {
|
|
251
|
-
rssFeed = await fetchText(`${baseUrl}${path}
|
|
420
|
+
rssFeed = await fetchText(`${baseUrl}${path}`, domain);
|
|
252
421
|
if (rssFeed && rssFeed.status === 200 && (rssFeed.text.includes("<rss") || rssFeed.text.includes("<feed") || rssFeed.text.includes("<channel"))) break;
|
|
253
422
|
rssFeed = null;
|
|
254
423
|
}
|
|
255
424
|
}
|
|
256
425
|
}
|
|
257
426
|
if (sitemapXml && sitemapXml.status === 200 && sitemapXml.text.includes("<sitemapindex")) {
|
|
258
|
-
const subUrls = extractAllSubSitemapUrls(sitemapXml.text, 5);
|
|
427
|
+
const subUrls = extractAllSubSitemapUrls(sitemapXml.text, domain, 5);
|
|
259
428
|
if (subUrls.length > 0) {
|
|
260
|
-
const subResults = await Promise.all(subUrls.map((u) => fetchText(u)));
|
|
429
|
+
const subResults = await Promise.all(subUrls.map((u) => fetchText(u, domain)));
|
|
261
430
|
for (const sub of subResults) {
|
|
262
431
|
if (sub && sub.status === 200) {
|
|
263
432
|
sitemapXml.text += "\n" + sub.text;
|
|
@@ -270,7 +439,7 @@ async function prefetchSiteData(domain) {
|
|
|
270
439
|
const sitemapForBlog = sitemapXml.text;
|
|
271
440
|
const blogUrls = extractBlogUrlsFromSitemap(sitemapForBlog, domain, 50);
|
|
272
441
|
if (blogUrls.length > 0) {
|
|
273
|
-
const fetched = await Promise.all(blogUrls.map((url) => fetchText(url)));
|
|
442
|
+
const fetched = await Promise.all(blogUrls.map((url) => fetchText(url, domain)));
|
|
274
443
|
blogSample = fetched.filter(
|
|
275
444
|
(r) => r !== null && r.status === 200 && r.text.length > 500
|
|
276
445
|
);
|
|
@@ -1052,8 +1221,8 @@ function checkDirectAnswerDensity(data) {
|
|
|
1052
1221
|
const paragraphs = html.match(/<p[^>]*>([\s\S]*?)<\/p>/gi) || [];
|
|
1053
1222
|
const snippetZoneParagraphs = paragraphs.filter((p) => {
|
|
1054
1223
|
const text2 = p.replace(/<[^>]*>/g, "").trim();
|
|
1055
|
-
const
|
|
1056
|
-
return
|
|
1224
|
+
const wordCount2 = text2.split(/\s+/).length;
|
|
1225
|
+
return wordCount2 >= 40 && wordCount2 <= 150;
|
|
1057
1226
|
});
|
|
1058
1227
|
if (snippetZoneParagraphs.length >= 3) {
|
|
1059
1228
|
score += 2;
|
|
@@ -1321,13 +1490,15 @@ function extractBlogUrlsFromSitemap(sitemapText, domain, limit = 50) {
|
|
|
1321
1490
|
});
|
|
1322
1491
|
return candidates.slice(0, limit).map((c) => c.url);
|
|
1323
1492
|
}
|
|
1324
|
-
function extractAllSubSitemapUrls(sitemapText,
|
|
1493
|
+
function extractAllSubSitemapUrls(sitemapText, domainOrLimit, maybeLimit = 5) {
|
|
1325
1494
|
if (!sitemapText.includes("<sitemapindex")) return [];
|
|
1495
|
+
const domain = typeof domainOrLimit === "string" ? domainOrLimit : void 0;
|
|
1496
|
+
const limit = typeof domainOrLimit === "number" ? domainOrLimit : maybeLimit;
|
|
1326
1497
|
const sitemapLocs = sitemapText.match(/<sitemap>[\s\S]*?<loc>([^<]+)<\/loc>[\s\S]*?<\/sitemap>/gi) || [];
|
|
1327
1498
|
const urls = sitemapLocs.map((block) => {
|
|
1328
1499
|
const match = block.match(/<loc>([^<]+)<\/loc>/i);
|
|
1329
1500
|
return match ? match[1].trim() : "";
|
|
1330
|
-
}).filter(
|
|
1501
|
+
}).filter((url) => !!url && isSafePublicUrl(url, domain));
|
|
1331
1502
|
const preferred = urls.filter((u) => /post|blog|article|page/i.test(u));
|
|
1332
1503
|
const rest = urls.filter((u) => !preferred.includes(u));
|
|
1333
1504
|
return [...preferred, ...rest].slice(0, limit);
|
|
@@ -2201,6 +2372,123 @@ function checkContentDepth(data, topicCoherenceScore) {
|
|
|
2201
2372
|
}
|
|
2202
2373
|
return { criterion: "content_depth", criterion_label: "Content Depth", score: finalScore, status: finalScore >= 7 ? "pass" : finalScore >= 4 ? "partial" : "fail", findings, fix_priority: finalScore >= 7 ? "P3" : "P1" };
|
|
2203
2374
|
}
|
|
2375
|
+
function scoreSampledPages(data, scorer) {
|
|
2376
|
+
const pages = [];
|
|
2377
|
+
if (data.homepage) {
|
|
2378
|
+
const url = data.homepage.finalUrl || (data.protocol ? `${data.protocol}://${data.domain}/` : `https://${data.domain}/`);
|
|
2379
|
+
pages.push({ url, score: scorer(data.homepage.text, url) });
|
|
2380
|
+
}
|
|
2381
|
+
if (data.blogSample) {
|
|
2382
|
+
for (const page of data.blogSample) {
|
|
2383
|
+
const url = page.finalUrl || (data.protocol ? `${data.protocol}://${data.domain}/` : `https://${data.domain}/`);
|
|
2384
|
+
pages.push({ url, score: scorer(page.text, url) });
|
|
2385
|
+
}
|
|
2386
|
+
}
|
|
2387
|
+
return pages;
|
|
2388
|
+
}
|
|
2389
|
+
function summarizeHelpfulScores(pageScores) {
|
|
2390
|
+
const total = pageScores.length;
|
|
2391
|
+
const average = total > 0 ? Math.round(pageScores.reduce((sum, p) => sum + p.score, 0) / total) : 0;
|
|
2392
|
+
const strong = pageScores.filter((p) => p.score >= 8);
|
|
2393
|
+
const weak = pageScores.filter((p) => p.score <= 4);
|
|
2394
|
+
return { total, average, strong, weak };
|
|
2395
|
+
}
|
|
2396
|
+
function checkHelpfulPurposeAlignment(data) {
|
|
2397
|
+
const findings = [];
|
|
2398
|
+
if (!data.homepage) {
|
|
2399
|
+
findings.push({ severity: "critical", detail: "Could not fetch homepage" });
|
|
2400
|
+
return { criterion: "helpful_purpose_alignment", criterion_label: "Helpful Purpose Alignment", score: 0, status: "not_found", findings, fix_priority: "P1" };
|
|
2401
|
+
}
|
|
2402
|
+
const pageScores = scoreSampledPages(data, scoreHelpfulPurposeAlignment);
|
|
2403
|
+
const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
|
|
2404
|
+
if (average >= 8) {
|
|
2405
|
+
findings.push({ severity: "info", detail: `${strong.length}/${total} pages strongly prioritize visitor task completion over filler` });
|
|
2406
|
+
} else if (average >= 5) {
|
|
2407
|
+
findings.push({ severity: "low", detail: `${strong.length}/${total} pages clearly lead with useful guidance`, fix: "Tighten intros, reduce generic filler, and make pages solve the promised user task faster" });
|
|
2408
|
+
} else {
|
|
2409
|
+
findings.push({ severity: "medium", detail: `Average helpful-purpose score is ${average}/10 across ${total} sampled pages`, fix: "Reduce search-first framing, generic intros, and CTA interruptions before the first useful answer" });
|
|
2410
|
+
}
|
|
2411
|
+
if (weak.length > 0) {
|
|
2412
|
+
findings.push({
|
|
2413
|
+
severity: "low",
|
|
2414
|
+
detail: `${weak.length} page(s) read as weakly task-focused`,
|
|
2415
|
+
fix: "Rewrite weak pages to lead with concrete answers, tradeoffs, and next steps instead of broad introductory filler"
|
|
2416
|
+
});
|
|
2417
|
+
}
|
|
2418
|
+
return { criterion: "helpful_purpose_alignment", criterion_label: "Helpful Purpose Alignment", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P1" };
|
|
2419
|
+
}
|
|
2420
|
+
function checkFirstHandExperienceSignals(data) {
|
|
2421
|
+
const findings = [];
|
|
2422
|
+
if (!data.homepage) {
|
|
2423
|
+
findings.push({ severity: "critical", detail: "Could not fetch homepage" });
|
|
2424
|
+
return { criterion: "first_hand_experience_signals", criterion_label: "First-Hand Experience Signals", score: 0, status: "not_found", findings, fix_priority: "P2" };
|
|
2425
|
+
}
|
|
2426
|
+
const pageScores = scoreSampledPages(data, scoreFirstHandExperienceSignals);
|
|
2427
|
+
const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
|
|
2428
|
+
if (average >= 8) {
|
|
2429
|
+
findings.push({ severity: "info", detail: `${strong.length}/${total} pages show strong signs of direct use, testing, or observation` });
|
|
2430
|
+
} else if (average >= 5) {
|
|
2431
|
+
findings.push({ severity: "low", detail: `Moderate experiential depth across ${total} sampled pages`, fix: "Add real implementation details, limitations, screenshots, or direct observations where relevant" });
|
|
2432
|
+
} else {
|
|
2433
|
+
findings.push({ severity: "medium", detail: "Little first-hand experience is visible in sampled content", fix: "Add evidence of real use, testing, implementation, or lived experience instead of generic summaries" });
|
|
2434
|
+
}
|
|
2435
|
+
if (weak.length > 0) {
|
|
2436
|
+
findings.push({
|
|
2437
|
+
severity: "low",
|
|
2438
|
+
detail: `${weak.length} page(s) appear generic or second-hand`,
|
|
2439
|
+
fix: "Strengthen those pages with case details, lessons learned, or original evidence from practice"
|
|
2440
|
+
});
|
|
2441
|
+
}
|
|
2442
|
+
return { criterion: "first_hand_experience_signals", criterion_label: "First-Hand Experience Signals", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P2" };
|
|
2443
|
+
}
|
|
2444
|
+
function checkCreatorTransparency(data) {
|
|
2445
|
+
const findings = [];
|
|
2446
|
+
if (!data.homepage) {
|
|
2447
|
+
findings.push({ severity: "critical", detail: "Could not fetch homepage" });
|
|
2448
|
+
return { criterion: "creator_transparency", criterion_label: "Creator Transparency", score: 0, status: "not_found", findings, fix_priority: "P2" };
|
|
2449
|
+
}
|
|
2450
|
+
const pageScores = scoreSampledPages(data, scoreCreatorTransparency);
|
|
2451
|
+
const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
|
|
2452
|
+
if (average >= 8) {
|
|
2453
|
+
findings.push({ severity: "info", detail: `${strong.length}/${total} sampled pages provide clear visible creator attribution` });
|
|
2454
|
+
} else if (average >= 5) {
|
|
2455
|
+
findings.push({ severity: "low", detail: "Visible authorship is present on some content but inconsistent", fix: "Add bylines, author links, and reviewer details on article-like pages where readers expect them" });
|
|
2456
|
+
} else {
|
|
2457
|
+
findings.push({ severity: "medium", detail: "Creator visibility is weak on content-like pages", fix: "Show clear bylines, author pages, and reviewer context rather than relying on schema alone" });
|
|
2458
|
+
}
|
|
2459
|
+
if (weak.length > 0) {
|
|
2460
|
+
findings.push({
|
|
2461
|
+
severity: "low",
|
|
2462
|
+
detail: `${weak.length} page(s) look article-like but expose little visible author context`,
|
|
2463
|
+
fix: "Add visible bylines, author bios, or reviewer attribution to those pages"
|
|
2464
|
+
});
|
|
2465
|
+
}
|
|
2466
|
+
return { criterion: "creator_transparency", criterion_label: "Creator Transparency", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P2" };
|
|
2467
|
+
}
|
|
2468
|
+
function checkMethodologyTransparency(data) {
|
|
2469
|
+
const findings = [];
|
|
2470
|
+
if (!data.homepage) {
|
|
2471
|
+
findings.push({ severity: "critical", detail: "Could not fetch homepage" });
|
|
2472
|
+
return { criterion: "methodology_transparency", criterion_label: "Methodology Transparency", score: 0, status: "not_found", findings, fix_priority: "P2" };
|
|
2473
|
+
}
|
|
2474
|
+
const pageScores = scoreSampledPages(data, scoreMethodologyTransparency);
|
|
2475
|
+
const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
|
|
2476
|
+
if (average >= 8) {
|
|
2477
|
+
findings.push({ severity: "info", detail: `${strong.length}/${total} pages clearly explain how content was tested, researched, reviewed, or updated` });
|
|
2478
|
+
} else if (average >= 5) {
|
|
2479
|
+
findings.push({ severity: "low", detail: "Some process transparency exists, but it is inconsistent", fix: 'Add "how we tested", methodology, review process, or update disclosures on pages where users would expect them' });
|
|
2480
|
+
} else {
|
|
2481
|
+
findings.push({ severity: "medium", detail: "Little content-production or review transparency is visible", fix: "Explain how pages were researched, tested, or reviewed instead of presenting unsupported comparisons or conclusions" });
|
|
2482
|
+
}
|
|
2483
|
+
if (weak.length > 0) {
|
|
2484
|
+
findings.push({
|
|
2485
|
+
severity: "low",
|
|
2486
|
+
detail: `${weak.length} page(s) lack visible methodology or review context`,
|
|
2487
|
+
fix: "Add process detail such as sample size, criteria, tools used, review process, or update notes"
|
|
2488
|
+
});
|
|
2489
|
+
}
|
|
2490
|
+
return { criterion: "methodology_transparency", criterion_label: "Methodology Transparency", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P2" };
|
|
2491
|
+
}
|
|
2204
2492
|
function checkCitationReadyWriting(data) {
|
|
2205
2493
|
const findings = [];
|
|
2206
2494
|
if (!data.homepage) {
|
|
@@ -2296,8 +2584,8 @@ function checkAnswerFirstPlacement(data) {
|
|
|
2296
2584
|
const earlyParagraphs = bodyHtml.match(/<p[^>]*>([\s\S]*?)<\/p>/gi)?.slice(0, 5) || [];
|
|
2297
2585
|
for (const p of earlyParagraphs) {
|
|
2298
2586
|
const pText = p.replace(/<[^>]*>/g, "").trim();
|
|
2299
|
-
const
|
|
2300
|
-
if (
|
|
2587
|
+
const wordCount2 = pText.split(/\s+/).length;
|
|
2588
|
+
if (wordCount2 >= 40 && wordCount2 <= 80 && first300Words.includes(pText.slice(0, 50))) {
|
|
2301
2589
|
shortAnswerCount++;
|
|
2302
2590
|
break;
|
|
2303
2591
|
}
|
|
@@ -2839,20 +3127,29 @@ function auditSiteFromData(data) {
|
|
|
2839
3127
|
checkVisibleDateSignal(data),
|
|
2840
3128
|
topicCoherence,
|
|
2841
3129
|
checkContentDepth(data, topicCoherence.score),
|
|
2842
|
-
//
|
|
3130
|
+
// Helpful-content criteria (#29-#32)
|
|
3131
|
+
checkHelpfulPurposeAlignment(data),
|
|
3132
|
+
checkFirstHandExperienceSignals(data),
|
|
3133
|
+
checkCreatorTransparency(data),
|
|
3134
|
+
checkMethodologyTransparency(data),
|
|
3135
|
+
// V2 criteria (#33-#38)
|
|
2843
3136
|
checkCitationReadyWriting(data),
|
|
2844
3137
|
checkAnswerFirstPlacement(data),
|
|
2845
3138
|
checkEvidencePackaging(data),
|
|
2846
3139
|
checkEntityDisambiguation(data),
|
|
2847
3140
|
checkExtractionFriction(data),
|
|
2848
3141
|
checkImageContextAI(data),
|
|
2849
|
-
// V3 criteria (#
|
|
3142
|
+
// V3 criteria (#39-#40)
|
|
2850
3143
|
checkDuplicateContent(data),
|
|
2851
3144
|
checkCrossPageDuplication(data)
|
|
2852
3145
|
];
|
|
2853
3146
|
}
|
|
2854
3147
|
async function auditSite(targetUrl) {
|
|
2855
|
-
const
|
|
3148
|
+
const normalizedTarget = targetUrl.startsWith("http") ? targetUrl : `https://${targetUrl}`;
|
|
3149
|
+
if (!await isSafeFetchTarget(normalizedTarget)) {
|
|
3150
|
+
throw new Error(`Refusing to audit private or local address: ${targetUrl}`);
|
|
3151
|
+
}
|
|
3152
|
+
const url = new URL(normalizedTarget);
|
|
2856
3153
|
const domain = url.hostname.replace(/^www\./, "");
|
|
2857
3154
|
const data = await prefetchSiteData(domain);
|
|
2858
3155
|
return auditSiteFromData(data);
|
|
@@ -2878,6 +3175,10 @@ var WEIGHTS = {
|
|
|
2878
3175
|
// Relevance to actual AI queries
|
|
2879
3176
|
faq_section: 0.03,
|
|
2880
3177
|
// Structured Q&A pairs
|
|
3178
|
+
helpful_purpose_alignment: 0.03,
|
|
3179
|
+
// Visitor-helpful vs search-first framing
|
|
3180
|
+
first_hand_experience_signals: 0.03,
|
|
3181
|
+
// Evidence of real use or observation
|
|
2881
3182
|
// ─── Content Organization (~30%) ──────────────────────────────────────────
|
|
2882
3183
|
// HOW easily AI engines can extract and trust your content.
|
|
2883
3184
|
entity_consistency: 0.05,
|
|
@@ -2892,9 +3193,13 @@ var WEIGHTS = {
|
|
|
2892
3193
|
// Expert attribution
|
|
2893
3194
|
table_list_extractability: 0.03,
|
|
2894
3195
|
// Extractable structured data
|
|
2895
|
-
|
|
3196
|
+
creator_transparency: 0.02,
|
|
3197
|
+
// Visible author/reviewer clarity
|
|
3198
|
+
methodology_transparency: 0.02,
|
|
3199
|
+
// Process disclosure
|
|
3200
|
+
definition_patterns: 0.015,
|
|
2896
3201
|
// Clear definitions
|
|
2897
|
-
visible_date_signal: 0.
|
|
3202
|
+
visible_date_signal: 0.015,
|
|
2898
3203
|
// Publication date trust
|
|
2899
3204
|
semantic_html: 0.02,
|
|
2900
3205
|
// Clean semantic structure
|
|
@@ -2903,15 +3208,15 @@ var WEIGHTS = {
|
|
|
2903
3208
|
// ─── Technical Plumbing (~15%) ────────────────────────────────────────────
|
|
2904
3209
|
// WHETHER AI crawlers can find you. Table stakes with diminishing returns.
|
|
2905
3210
|
content_cannibalization: 0.02,
|
|
2906
|
-
llms_txt: 0.
|
|
2907
|
-
robots_txt: 0.
|
|
3211
|
+
llms_txt: 0.01,
|
|
3212
|
+
robots_txt: 0.01,
|
|
2908
3213
|
content_velocity: 0.02,
|
|
2909
|
-
content_licensing: 0.
|
|
3214
|
+
content_licensing: 0.01,
|
|
2910
3215
|
sitemap_completeness: 0.01,
|
|
2911
|
-
canonical_url:
|
|
2912
|
-
rss_feed:
|
|
2913
|
-
schema_coverage:
|
|
2914
|
-
speakable_schema:
|
|
3216
|
+
canonical_url: 5e-3,
|
|
3217
|
+
rss_feed: 5e-3,
|
|
3218
|
+
schema_coverage: 5e-3,
|
|
3219
|
+
speakable_schema: 5e-3,
|
|
2915
3220
|
// ─── V2 Criteria (~15%) ───────────────────────────────────────────────────
|
|
2916
3221
|
// Citation quality, evidence packaging, and extraction friction.
|
|
2917
3222
|
citation_ready_writing: 0.04,
|
|
@@ -2924,7 +3229,7 @@ var WEIGHTS = {
|
|
|
2924
3229
|
// Clear entity boundaries
|
|
2925
3230
|
extraction_friction: 0.02,
|
|
2926
3231
|
// Sentence length, voice, jargon
|
|
2927
|
-
image_context_ai:
|
|
3232
|
+
image_context_ai: 5e-3,
|
|
2928
3233
|
// Figure/figcaption, alt text quality
|
|
2929
3234
|
// ─── V3 Criteria ────────────────────────────────────────────────────────
|
|
2930
3235
|
duplicate_content: 0.05,
|
|
@@ -2944,8 +3249,8 @@ function calculateOverallScore(criteria) {
|
|
|
2944
3249
|
let score = Math.round(weightedSum / totalWeight);
|
|
2945
3250
|
const coherence = criteria.find((c) => c.criterion === "topic_coherence");
|
|
2946
3251
|
if (coherence && coherence.score < 6) {
|
|
2947
|
-
const
|
|
2948
|
-
score = Math.min(score,
|
|
3252
|
+
const cap3 = 35 + coherence.score * 5;
|
|
3253
|
+
score = Math.min(score, cap3);
|
|
2949
3254
|
}
|
|
2950
3255
|
return score;
|
|
2951
3256
|
}
|
|
@@ -2990,6 +3295,13 @@ function classifyRendering(html) {
|
|
|
2990
3295
|
return { method: "client-spa", framework: null };
|
|
2991
3296
|
}
|
|
2992
3297
|
async function fetchWithHeadless(url, options) {
|
|
3298
|
+
let expectedDomain;
|
|
3299
|
+
try {
|
|
3300
|
+
expectedDomain = normalizeHostname(new URL(url).hostname);
|
|
3301
|
+
} catch {
|
|
3302
|
+
return null;
|
|
3303
|
+
}
|
|
3304
|
+
if (!await isSafeFetchTarget(url, expectedDomain)) return null;
|
|
2993
3305
|
let puppeteer;
|
|
2994
3306
|
try {
|
|
2995
3307
|
const mod = "puppeteer";
|
|
@@ -3016,12 +3328,28 @@ async function fetchWithHeadless(url, options) {
|
|
|
3016
3328
|
const page = await browser.newPage();
|
|
3017
3329
|
await page.setRequestInterception(true);
|
|
3018
3330
|
page.on("request", (req) => {
|
|
3019
|
-
|
|
3020
|
-
|
|
3021
|
-
|
|
3022
|
-
|
|
3023
|
-
|
|
3024
|
-
|
|
3331
|
+
void (async () => {
|
|
3332
|
+
const alreadyHandled = typeof req.isInterceptResolutionHandled === "function" ? req.isInterceptResolutionHandled() : false;
|
|
3333
|
+
if (alreadyHandled) return;
|
|
3334
|
+
if (!await isSafeFetchTarget(req.url(), expectedDomain)) {
|
|
3335
|
+
try {
|
|
3336
|
+
if (!req.isInterceptResolutionHandled?.()) await req.abort();
|
|
3337
|
+
} catch {
|
|
3338
|
+
}
|
|
3339
|
+
return;
|
|
3340
|
+
}
|
|
3341
|
+
const type = req.resourceType();
|
|
3342
|
+
try {
|
|
3343
|
+
if (!req.isInterceptResolutionHandled?.()) {
|
|
3344
|
+
if (["image", "font", "media", "stylesheet"].includes(type)) {
|
|
3345
|
+
await req.abort();
|
|
3346
|
+
} else {
|
|
3347
|
+
await req.continue();
|
|
3348
|
+
}
|
|
3349
|
+
}
|
|
3350
|
+
} catch {
|
|
3351
|
+
}
|
|
3352
|
+
})();
|
|
3025
3353
|
});
|
|
3026
3354
|
await page.setUserAgent("AEO-Visibility-Bot/1.0");
|
|
3027
3355
|
await page.goto(url, { waitUntil: "networkidle2", timeout });
|
|
@@ -3034,6 +3362,7 @@ async function fetchWithHeadless(url, options) {
|
|
|
3034
3362
|
}
|
|
3035
3363
|
const html = await page.content();
|
|
3036
3364
|
const finalUrl = page.url();
|
|
3365
|
+
if (!await isSafeFetchTarget(finalUrl, expectedDomain)) return null;
|
|
3037
3366
|
return {
|
|
3038
3367
|
text: html.slice(0, 5e5),
|
|
3039
3368
|
status: 200,
|
|
@@ -3061,6 +3390,8 @@ var PILLARS = {
|
|
|
3061
3390
|
"citation_ready_writing",
|
|
3062
3391
|
"answer_first_placement",
|
|
3063
3392
|
"evidence_packaging",
|
|
3393
|
+
"helpful_purpose_alignment",
|
|
3394
|
+
"first_hand_experience_signals",
|
|
3064
3395
|
"duplicate_content",
|
|
3065
3396
|
"cross_page_duplication"
|
|
3066
3397
|
],
|
|
@@ -3078,7 +3409,9 @@ var PILLARS = {
|
|
|
3078
3409
|
"internal_linking",
|
|
3079
3410
|
"content_freshness",
|
|
3080
3411
|
"author_schema_depth",
|
|
3081
|
-
"schema_markup"
|
|
3412
|
+
"schema_markup",
|
|
3413
|
+
"creator_transparency",
|
|
3414
|
+
"methodology_transparency"
|
|
3082
3415
|
],
|
|
3083
3416
|
"Technical Foundation": [
|
|
3084
3417
|
"semantic_html",
|
|
@@ -3108,6 +3441,8 @@ var CLIENT_NAMES = {
|
|
|
3108
3441
|
citation_ready_writing: "Citation-Ready Writing",
|
|
3109
3442
|
answer_first_placement: "Answer-First Placement",
|
|
3110
3443
|
evidence_packaging: "Evidence Packaging",
|
|
3444
|
+
helpful_purpose_alignment: "Helpful Purpose Alignment",
|
|
3445
|
+
first_hand_experience_signals: "First-Hand Experience Signals",
|
|
3111
3446
|
direct_answer_density: "Direct Answer Density",
|
|
3112
3447
|
qa_content_format: "Q&A Content Format",
|
|
3113
3448
|
query_answer_alignment: "Query-Answer Alignment",
|
|
@@ -3120,6 +3455,8 @@ var CLIENT_NAMES = {
|
|
|
3120
3455
|
content_freshness: "Content Freshness",
|
|
3121
3456
|
author_schema_depth: "Author & Expert Schema",
|
|
3122
3457
|
schema_markup: "Schema Markup",
|
|
3458
|
+
creator_transparency: "Creator Transparency",
|
|
3459
|
+
methodology_transparency: "Methodology Transparency",
|
|
3123
3460
|
semantic_html: "Semantic HTML",
|
|
3124
3461
|
clean_html: "Clean HTML",
|
|
3125
3462
|
visible_date_signal: "Visible Date Signal",
|
|
@@ -3146,6 +3483,8 @@ var PILLAR_WEIGHTS = {
|
|
|
3146
3483
|
citation_ready_writing: 0.04,
|
|
3147
3484
|
answer_first_placement: 0.03,
|
|
3148
3485
|
evidence_packaging: 0.03,
|
|
3486
|
+
helpful_purpose_alignment: 0.03,
|
|
3487
|
+
first_hand_experience_signals: 0.03,
|
|
3149
3488
|
duplicate_content: 0.05,
|
|
3150
3489
|
cross_page_duplication: 0.03,
|
|
3151
3490
|
direct_answer_density: 0.05,
|
|
@@ -3153,28 +3492,30 @@ var PILLAR_WEIGHTS = {
|
|
|
3153
3492
|
query_answer_alignment: 0.04,
|
|
3154
3493
|
faq_section: 0.03,
|
|
3155
3494
|
table_list_extractability: 0.03,
|
|
3156
|
-
definition_patterns: 0.
|
|
3495
|
+
definition_patterns: 0.015,
|
|
3157
3496
|
entity_disambiguation: 0.02,
|
|
3158
3497
|
entity_consistency: 0.05,
|
|
3159
3498
|
internal_linking: 0.04,
|
|
3160
3499
|
content_freshness: 0.04,
|
|
3161
3500
|
author_schema_depth: 0.03,
|
|
3162
3501
|
schema_markup: 0.03,
|
|
3502
|
+
creator_transparency: 0.02,
|
|
3503
|
+
methodology_transparency: 0.02,
|
|
3163
3504
|
semantic_html: 0.02,
|
|
3164
3505
|
clean_html: 0.02,
|
|
3165
|
-
visible_date_signal: 0.
|
|
3506
|
+
visible_date_signal: 0.015,
|
|
3166
3507
|
extraction_friction: 0.02,
|
|
3167
|
-
image_context_ai:
|
|
3168
|
-
schema_coverage:
|
|
3169
|
-
speakable_schema:
|
|
3508
|
+
image_context_ai: 5e-3,
|
|
3509
|
+
schema_coverage: 5e-3,
|
|
3510
|
+
speakable_schema: 5e-3,
|
|
3170
3511
|
content_cannibalization: 0.02,
|
|
3171
|
-
llms_txt: 0.
|
|
3172
|
-
robots_txt: 0.
|
|
3512
|
+
llms_txt: 0.01,
|
|
3513
|
+
robots_txt: 0.01,
|
|
3173
3514
|
content_velocity: 0.02,
|
|
3174
|
-
content_licensing: 0.
|
|
3175
|
-
canonical_url:
|
|
3515
|
+
content_licensing: 0.01,
|
|
3516
|
+
canonical_url: 5e-3,
|
|
3176
3517
|
sitemap_completeness: 0.01,
|
|
3177
|
-
rss_feed:
|
|
3518
|
+
rss_feed: 5e-3
|
|
3178
3519
|
};
|
|
3179
3520
|
var CRITERION_EFFORT = {
|
|
3180
3521
|
topic_coherence: "High",
|
|
@@ -3184,6 +3525,8 @@ var CRITERION_EFFORT = {
|
|
|
3184
3525
|
citation_ready_writing: "Medium",
|
|
3185
3526
|
answer_first_placement: "Medium",
|
|
3186
3527
|
evidence_packaging: "Medium",
|
|
3528
|
+
helpful_purpose_alignment: "Medium",
|
|
3529
|
+
first_hand_experience_signals: "Medium",
|
|
3187
3530
|
duplicate_content: "Medium",
|
|
3188
3531
|
cross_page_duplication: "Medium",
|
|
3189
3532
|
direct_answer_density: "Medium",
|
|
@@ -3198,6 +3541,8 @@ var CRITERION_EFFORT = {
|
|
|
3198
3541
|
content_freshness: "Low",
|
|
3199
3542
|
author_schema_depth: "Low",
|
|
3200
3543
|
schema_markup: "Medium",
|
|
3544
|
+
creator_transparency: "Low",
|
|
3545
|
+
methodology_transparency: "Low",
|
|
3201
3546
|
semantic_html: "Low",
|
|
3202
3547
|
clean_html: "Medium",
|
|
3203
3548
|
visible_date_signal: "Low",
|
|
@@ -3222,6 +3567,8 @@ var FIX_DESCRIPTIONS = {
|
|
|
3222
3567
|
citation_ready_writing: "Write self-contained definition sentences and one-claim statements AI can quote directly.",
|
|
3223
3568
|
answer_first_placement: "Place a 40-80 word answer block in the first 300 words of each page.",
|
|
3224
3569
|
evidence_packaging: "Add inline citations, attribution phrases, and a sources section to key pages.",
|
|
3570
|
+
helpful_purpose_alignment: "Lead with useful, task-solving guidance instead of search-first filler.",
|
|
3571
|
+
first_hand_experience_signals: "Add concrete signs of first-hand use, testing, implementation, or observation.",
|
|
3225
3572
|
direct_answer_density: "Write concise 2-3 sentence answer paragraphs after each question heading.",
|
|
3226
3573
|
qa_content_format: "Add question-based H2/H3 headings matching common AI queries.",
|
|
3227
3574
|
query_answer_alignment: "Ensure every question heading is followed by a direct answer paragraph.",
|
|
@@ -3234,6 +3581,8 @@ var FIX_DESCRIPTIONS = {
|
|
|
3234
3581
|
content_freshness: "Add dateModified schema and visible last-updated dates.",
|
|
3235
3582
|
author_schema_depth: "Add Person schema for authors with credentials and sameAs links.",
|
|
3236
3583
|
schema_markup: "Implement JSON-LD structured data on key pages.",
|
|
3584
|
+
creator_transparency: "Show clear visible bylines, author pages, and reviewer details where readers expect them.",
|
|
3585
|
+
methodology_transparency: "Explain how content was tested, researched, reviewed, or updated.",
|
|
3237
3586
|
semantic_html: "Use semantic HTML5 elements (main, article, nav, header, footer).",
|
|
3238
3587
|
clean_html: "Fix HTML structure, add meta tags, and ensure HTTPS.",
|
|
3239
3588
|
visible_date_signal: "Display dates using <time> elements and add datePublished to JSON-LD.",
|
|
@@ -3333,6 +3682,10 @@ var CRITERION_LABELS = {
|
|
|
3333
3682
|
"Visible Date Signal": "Visible Date Signal",
|
|
3334
3683
|
"Topic Coherence": "Topic Coherence",
|
|
3335
3684
|
"Content Depth": "Content Depth",
|
|
3685
|
+
"Helpful Purpose Alignment": "Helpful Purpose Alignment",
|
|
3686
|
+
"First-Hand Experience Signals": "First-Hand Experience Signals",
|
|
3687
|
+
"Creator Transparency": "Creator Transparency",
|
|
3688
|
+
"Methodology Transparency": "Methodology Transparency",
|
|
3336
3689
|
"Citation-Ready Writing Quality": "Citation-Ready Writing Quality",
|
|
3337
3690
|
"Answer-First Placement": "Answer-First Placement",
|
|
3338
3691
|
"Evidence Packaging": "Evidence Packaging",
|
|
@@ -3438,6 +3791,8 @@ var CRITERION_WEIGHTS = {
|
|
|
3438
3791
|
qa_content_format: 0.04,
|
|
3439
3792
|
query_answer_alignment: 0.04,
|
|
3440
3793
|
faq_section: 0.03,
|
|
3794
|
+
helpful_purpose_alignment: 0.03,
|
|
3795
|
+
first_hand_experience_signals: 0.03,
|
|
3441
3796
|
// Content Organization (~30%)
|
|
3442
3797
|
entity_consistency: 0.05,
|
|
3443
3798
|
internal_linking: 0.04,
|
|
@@ -3445,28 +3800,30 @@ var CRITERION_WEIGHTS = {
|
|
|
3445
3800
|
schema_markup: 0.03,
|
|
3446
3801
|
author_schema_depth: 0.03,
|
|
3447
3802
|
table_list_extractability: 0.03,
|
|
3448
|
-
|
|
3449
|
-
|
|
3803
|
+
creator_transparency: 0.02,
|
|
3804
|
+
methodology_transparency: 0.02,
|
|
3805
|
+
definition_patterns: 0.015,
|
|
3806
|
+
visible_date_signal: 0.015,
|
|
3450
3807
|
semantic_html: 0.02,
|
|
3451
3808
|
clean_html: 0.02,
|
|
3452
3809
|
// Technical Plumbing (~15%)
|
|
3453
3810
|
content_cannibalization: 0.02,
|
|
3454
|
-
llms_txt: 0.
|
|
3455
|
-
robots_txt: 0.
|
|
3811
|
+
llms_txt: 0.01,
|
|
3812
|
+
robots_txt: 0.01,
|
|
3456
3813
|
content_velocity: 0.02,
|
|
3457
|
-
content_licensing: 0.
|
|
3814
|
+
content_licensing: 0.01,
|
|
3458
3815
|
sitemap_completeness: 0.01,
|
|
3459
|
-
canonical_url:
|
|
3460
|
-
rss_feed:
|
|
3461
|
-
schema_coverage:
|
|
3462
|
-
speakable_schema:
|
|
3816
|
+
canonical_url: 5e-3,
|
|
3817
|
+
rss_feed: 5e-3,
|
|
3818
|
+
schema_coverage: 5e-3,
|
|
3819
|
+
speakable_schema: 5e-3,
|
|
3463
3820
|
// V2 Criteria (~15%)
|
|
3464
3821
|
citation_ready_writing: 0.04,
|
|
3465
3822
|
answer_first_placement: 0.03,
|
|
3466
3823
|
evidence_packaging: 0.03,
|
|
3467
3824
|
entity_disambiguation: 0.02,
|
|
3468
3825
|
extraction_friction: 0.02,
|
|
3469
|
-
image_context_ai:
|
|
3826
|
+
image_context_ai: 5e-3,
|
|
3470
3827
|
// V3 Criteria
|
|
3471
3828
|
duplicate_content: 0.05,
|
|
3472
3829
|
cross_page_duplication: 0.03
|
|
@@ -3507,6 +3864,16 @@ var OPPORTUNITY_TEMPLATES = {
|
|
|
3507
3864
|
effort: "Medium",
|
|
3508
3865
|
description: "Create a dedicated FAQ page with FAQPage schema markup. Cover common questions about your products, services, and industry to become a direct answer source for AI engines."
|
|
3509
3866
|
},
|
|
3867
|
+
helpful_purpose_alignment: {
|
|
3868
|
+
name: "Improve Helpful Purpose Alignment",
|
|
3869
|
+
effort: "Medium",
|
|
3870
|
+
description: "Rewrite pages to solve the visitor task quickly and concretely. Reduce generic intros, search-first filler, and CTA interruptions before the first useful answer."
|
|
3871
|
+
},
|
|
3872
|
+
first_hand_experience_signals: {
|
|
3873
|
+
name: "Add First-Hand Experience Signals",
|
|
3874
|
+
effort: "Medium",
|
|
3875
|
+
description: "Show direct use, testing, implementation, or lived experience with concrete observations, examples, screenshots, and lessons learned."
|
|
3876
|
+
},
|
|
3510
3877
|
original_data: {
|
|
3511
3878
|
name: "Add Original Data & Case Studies",
|
|
3512
3879
|
effort: "High",
|
|
@@ -3562,6 +3929,16 @@ var OPPORTUNITY_TEMPLATES = {
|
|
|
3562
3929
|
effort: "Low",
|
|
3563
3930
|
description: "Add Person schema for content authors with credentials, expertise, and sameAs links. Expert attribution strengthens E-E-A-T signals that AI engines use to evaluate source credibility."
|
|
3564
3931
|
},
|
|
3932
|
+
creator_transparency: {
|
|
3933
|
+
name: "Improve Creator Transparency",
|
|
3934
|
+
effort: "Low",
|
|
3935
|
+
description: "Add visible bylines, author pages, and reviewer/editor details so readers can clearly tell who created the content and why they are credible."
|
|
3936
|
+
},
|
|
3937
|
+
methodology_transparency: {
|
|
3938
|
+
name: "Add Methodology Transparency",
|
|
3939
|
+
effort: "Low",
|
|
3940
|
+
description: "Explain how pages were tested, researched, reviewed, or updated. Add methodology, criteria, sample-size, or review-process details where users expect them."
|
|
3941
|
+
},
|
|
3565
3942
|
fact_density: {
|
|
3566
3943
|
name: "Increase Fact & Data Density",
|
|
3567
3944
|
effort: "Medium",
|
|
@@ -3821,20 +4198,12 @@ function formatList(items) {
|
|
|
3821
4198
|
}
|
|
3822
4199
|
|
|
3823
4200
|
// src/multi-page-fetcher.ts
|
|
3824
|
-
async function fetchPage(url, timeoutMs = 1e4) {
|
|
3825
|
-
|
|
3826
|
-
|
|
3827
|
-
|
|
3828
|
-
|
|
3829
|
-
|
|
3830
|
-
});
|
|
3831
|
-
if (res.status !== 200) return null;
|
|
3832
|
-
const text = await res.text();
|
|
3833
|
-
if (text.length < 200) return null;
|
|
3834
|
-
return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
|
|
3835
|
-
} catch {
|
|
3836
|
-
return null;
|
|
3837
|
-
}
|
|
4201
|
+
async function fetchPage(url, domain, timeoutMs = 1e4) {
|
|
4202
|
+
const res = await safeFetch(url, { timeoutMs, expectedDomain: domain });
|
|
4203
|
+
if (!res || res.status !== 200) return null;
|
|
4204
|
+
const text = await res.text();
|
|
4205
|
+
if (text.length < 200) return null;
|
|
4206
|
+
return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
|
|
3838
4207
|
}
|
|
3839
4208
|
var PAGE_VARIANTS = {
|
|
3840
4209
|
about: ["/about", "/about-us", "/company", "/who-we-are"],
|
|
@@ -3990,7 +4359,7 @@ async function fetchMultiPageData(siteData, options) {
|
|
|
3990
4359
|
}
|
|
3991
4360
|
const entries = Array.from(urlsToFetch.entries());
|
|
3992
4361
|
if (entries.length === 0) return 0;
|
|
3993
|
-
const results = await Promise.all(entries.map(([url]) => fetchPage(url, timeoutMs)));
|
|
4362
|
+
const results = await Promise.all(entries.map(([url]) => fetchPage(url, siteData.domain, timeoutMs)));
|
|
3994
4363
|
if (!siteData.blogSample) siteData.blogSample = [];
|
|
3995
4364
|
let added = 0;
|
|
3996
4365
|
for (let i = 0; i < results.length; i++) {
|
|
@@ -4017,19 +4386,23 @@ var PAGE_CRITERIA = {
|
|
|
4017
4386
|
content_freshness: { weight: 0.04, label: "Content Freshness Signals" },
|
|
4018
4387
|
schema_markup: { weight: 0.03, label: "Schema.org Structured Data" },
|
|
4019
4388
|
table_list_extractability: { weight: 0.03, label: "Table & List Extractability" },
|
|
4020
|
-
definition_patterns: { weight: 0.
|
|
4021
|
-
visible_date_signal: { weight: 0.
|
|
4389
|
+
definition_patterns: { weight: 0.015, label: "Definition Patterns" },
|
|
4390
|
+
visible_date_signal: { weight: 0.015, label: "Visible Date Signal" },
|
|
4022
4391
|
semantic_html: { weight: 0.02, label: "Semantic HTML5 & Accessibility" },
|
|
4023
4392
|
clean_html: { weight: 0.02, label: "Clean, Crawlable HTML" },
|
|
4024
4393
|
// Technical Plumbing
|
|
4025
|
-
canonical_url: { weight:
|
|
4394
|
+
canonical_url: { weight: 5e-3, label: "Canonical URL Strategy" },
|
|
4026
4395
|
// V2 Criteria
|
|
4027
4396
|
citation_ready_writing: { weight: 0.04, label: "Citation-Ready Writing Quality" },
|
|
4028
4397
|
answer_first_placement: { weight: 0.03, label: "Answer-First Placement" },
|
|
4029
4398
|
evidence_packaging: { weight: 0.03, label: "Evidence Packaging" },
|
|
4399
|
+
helpful_purpose_alignment: { weight: 0.03, label: "Helpful Purpose Alignment" },
|
|
4400
|
+
first_hand_experience_signals: { weight: 0.03, label: "First-Hand Experience Signals" },
|
|
4030
4401
|
entity_disambiguation: { weight: 0.02, label: "Entity Disambiguation" },
|
|
4031
4402
|
extraction_friction: { weight: 0.02, label: "Extraction Friction Score" },
|
|
4032
|
-
|
|
4403
|
+
creator_transparency: { weight: 0.02, label: "Creator Transparency" },
|
|
4404
|
+
methodology_transparency: { weight: 0.02, label: "Methodology Transparency" },
|
|
4405
|
+
image_context_ai: { weight: 5e-3, label: "Image Context for AI" },
|
|
4033
4406
|
duplicate_content: { weight: 0.05, label: "Duplicate Content Blocks" }
|
|
4034
4407
|
};
|
|
4035
4408
|
function extractJsonLdBlocks(html) {
|
|
@@ -4052,7 +4425,7 @@ function extractTypesFromJsonLd(blocks) {
|
|
|
4052
4425
|
}
|
|
4053
4426
|
return types;
|
|
4054
4427
|
}
|
|
4055
|
-
function
|
|
4428
|
+
function getTextContent2(html) {
|
|
4056
4429
|
return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
|
|
4057
4430
|
}
|
|
4058
4431
|
function extractQuestionHeadings2(html) {
|
|
@@ -4080,7 +4453,7 @@ function countAnsweredQuestions(html) {
|
|
|
4080
4453
|
}
|
|
4081
4454
|
return { total: questions.length, answered };
|
|
4082
4455
|
}
|
|
4083
|
-
function
|
|
4456
|
+
function cap2(value, max) {
|
|
4084
4457
|
return Math.min(value, max);
|
|
4085
4458
|
}
|
|
4086
4459
|
function scoreSchemaMarkup(html) {
|
|
@@ -4106,10 +4479,10 @@ function scoreSchemaMarkup(html) {
|
|
|
4106
4479
|
for (const t of types) {
|
|
4107
4480
|
if (knownTypes.includes(t)) knownCount++;
|
|
4108
4481
|
}
|
|
4109
|
-
score +=
|
|
4482
|
+
score += cap2(knownCount * 2, 4);
|
|
4110
4483
|
if (types.has("Organization") || types.has("LocalBusiness")) score += 2;
|
|
4111
4484
|
if (types.has("FAQPage")) score += 1;
|
|
4112
|
-
return
|
|
4485
|
+
return cap2(score, 10);
|
|
4113
4486
|
}
|
|
4114
4487
|
function scoreQAFormat(html) {
|
|
4115
4488
|
const questions = extractQuestionHeadings2(html);
|
|
@@ -4121,7 +4494,7 @@ function scoreQAFormat(html) {
|
|
|
4121
4494
|
if (answered >= 1) score += 3;
|
|
4122
4495
|
const h1Matches = html.match(/<h1[\s>]/gi) || [];
|
|
4123
4496
|
if (h1Matches.length === 1) score += 2;
|
|
4124
|
-
return
|
|
4497
|
+
return cap2(score, 10);
|
|
4125
4498
|
}
|
|
4126
4499
|
function scoreCleanHtml(html) {
|
|
4127
4500
|
let score = 0;
|
|
@@ -4130,15 +4503,15 @@ function scoreCleanHtml(html) {
|
|
|
4130
4503
|
for (const tag of semantics) {
|
|
4131
4504
|
if (html.toLowerCase().includes(tag)) semCount++;
|
|
4132
4505
|
}
|
|
4133
|
-
score +=
|
|
4506
|
+
score += cap2(semCount, 3);
|
|
4134
4507
|
const h1Matches = html.match(/<h1[\s>]/gi) || [];
|
|
4135
4508
|
if (h1Matches.length === 1) score += 2;
|
|
4136
|
-
const text =
|
|
4509
|
+
const text = getTextContent2(html);
|
|
4137
4510
|
if (text.length > 500) score += 3;
|
|
4138
4511
|
const hasTitle = /<title[^>]*>[^<]+<\/title>/i.test(html);
|
|
4139
4512
|
const hasDesc = /<meta\s[^>]*name=["']description["'][^>]*content=["'][^"']+["']/i.test(html) || /<meta\s[^>]*content=["'][^"']+["'][^>]*name=["']description["']/i.test(html);
|
|
4140
4513
|
if (hasTitle && hasDesc) score += 2;
|
|
4141
|
-
return
|
|
4514
|
+
return cap2(score, 10);
|
|
4142
4515
|
}
|
|
4143
4516
|
function scoreFaqSection(html) {
|
|
4144
4517
|
let score = 0;
|
|
@@ -4150,11 +4523,11 @@ function scoreFaqSection(html) {
|
|
|
4150
4523
|
const questions = extractQuestionHeadings2(html);
|
|
4151
4524
|
if (questions.length >= 10) score += 1;
|
|
4152
4525
|
if (/<details[\s>]/i.test(html) || /accordion|collapsible|toggle/i.test(lowerHtml)) score += 1;
|
|
4153
|
-
return
|
|
4526
|
+
return cap2(score, 10);
|
|
4154
4527
|
}
|
|
4155
4528
|
function scoreOriginalData(html) {
|
|
4156
4529
|
let score = 0;
|
|
4157
|
-
const text =
|
|
4530
|
+
const text = getTextContent2(html);
|
|
4158
4531
|
if (/\b(our (study|analysis|research|survey|data|findings))\b/i.test(text)) {
|
|
4159
4532
|
score += 3;
|
|
4160
4533
|
} else if (/\d+(\.\d+)?%|\$[\d,.]+|\b\d{1,3}(,\d{3})+\b/.test(text)) {
|
|
@@ -4171,7 +4544,7 @@ function scoreOriginalData(html) {
|
|
|
4171
4544
|
if (/href=["'][^"']*\/blog\b/i.test(html)) {
|
|
4172
4545
|
score += 2;
|
|
4173
4546
|
}
|
|
4174
|
-
return
|
|
4547
|
+
return cap2(score, 10);
|
|
4175
4548
|
}
|
|
4176
4549
|
function scoreQueryAnswerAlignment(html) {
|
|
4177
4550
|
const { total, answered } = countAnsweredQuestions(html);
|
|
@@ -4194,7 +4567,7 @@ function scoreContentFreshness(html) {
|
|
|
4194
4567
|
const currentYear = (/* @__PURE__ */ new Date()).getFullYear();
|
|
4195
4568
|
const yearPattern = new RegExp(`\\b(${currentYear}|${currentYear - 1})\\b`);
|
|
4196
4569
|
if (yearPattern.test(html)) score += 2;
|
|
4197
|
-
return
|
|
4570
|
+
return cap2(score, 10);
|
|
4198
4571
|
}
|
|
4199
4572
|
function scoreTableListExtractability(html) {
|
|
4200
4573
|
let score = 0;
|
|
@@ -4207,7 +4580,7 @@ function scoreTableListExtractability(html) {
|
|
|
4207
4580
|
const listItems = html.match(/<li[\s>]/gi) || [];
|
|
4208
4581
|
if (listItems.length >= 10) score += 1;
|
|
4209
4582
|
if (/<dl[\s>]/i.test(html)) score += 1;
|
|
4210
|
-
return
|
|
4583
|
+
return cap2(score, 10);
|
|
4211
4584
|
}
|
|
4212
4585
|
function scoreDirectAnswerDensity(html) {
|
|
4213
4586
|
let score = 0;
|
|
@@ -4223,9 +4596,9 @@ function scoreDirectAnswerDensity(html) {
|
|
|
4223
4596
|
}
|
|
4224
4597
|
if (snippetCount >= 3) score += 2;
|
|
4225
4598
|
else if (snippetCount >= 1) score += 1;
|
|
4226
|
-
const directOpeners =
|
|
4599
|
+
const directOpeners = getTextContent2(html).match(/\b(yes|no|in short|the answer is|simply put|in summary)\b/gi) || [];
|
|
4227
4600
|
if (directOpeners.length >= 2) score += 2;
|
|
4228
|
-
return
|
|
4601
|
+
return cap2(score, 10);
|
|
4229
4602
|
}
|
|
4230
4603
|
function scoreSemanticHtml(html) {
|
|
4231
4604
|
let score = 0;
|
|
@@ -4235,7 +4608,7 @@ function scoreSemanticHtml(html) {
|
|
|
4235
4608
|
for (const el of elements) {
|
|
4236
4609
|
if (lowerHtml.includes(el)) count++;
|
|
4237
4610
|
}
|
|
4238
|
-
score +=
|
|
4611
|
+
score += cap2(Math.floor(count * 0.7), 4);
|
|
4239
4612
|
const imgTags = html.match(/<img\s[^>]*>/gi) || [];
|
|
4240
4613
|
if (imgTags.length > 0) {
|
|
4241
4614
|
let withAlt = 0;
|
|
@@ -4246,11 +4619,11 @@ function scoreSemanticHtml(html) {
|
|
|
4246
4619
|
}
|
|
4247
4620
|
if (/<html[^>]*\slang=["'][^"']+["']/i.test(html)) score += 2;
|
|
4248
4621
|
if (/\baria-/i.test(html)) score += 2;
|
|
4249
|
-
return
|
|
4622
|
+
return cap2(score, 10);
|
|
4250
4623
|
}
|
|
4251
4624
|
function scoreFactDensity(html) {
|
|
4252
4625
|
let score = 0;
|
|
4253
|
-
const text =
|
|
4626
|
+
const text = getTextContent2(html);
|
|
4254
4627
|
const numericPatterns = text.match(/\d+(\.\d+)?%|\$[\d,.]+|\b\d{1,3}(,\d{3})+\b|\b\d+\s*(million|billion|thousand|users|customers|employees)\b/gi) || [];
|
|
4255
4628
|
if (numericPatterns.length >= 6) score += 5;
|
|
4256
4629
|
else if (numericPatterns.length >= 3) score += 3;
|
|
@@ -4263,11 +4636,11 @@ function scoreFactDensity(html) {
|
|
|
4263
4636
|
if (/\baccording to\b|\bsource:\s|\bcited\b|\breported by\b/i.test(text)) score += 2;
|
|
4264
4637
|
const units = text.match(/\b\d+\s*(kg|lb|miles|km|hours|minutes|days|months|years|GB|MB|TB)\b/gi) || [];
|
|
4265
4638
|
if (units.length >= 2) score += 1;
|
|
4266
|
-
return
|
|
4639
|
+
return cap2(score, 10);
|
|
4267
4640
|
}
|
|
4268
4641
|
function scoreDefinitionPatterns(html) {
|
|
4269
4642
|
let score = 0;
|
|
4270
|
-
const text =
|
|
4643
|
+
const text = getTextContent2(html);
|
|
4271
4644
|
const defPatterns = text.match(/\b(is a|is an|refers to|defined as|means that|also known as|abbreviated as)\b/gi) || [];
|
|
4272
4645
|
if (defPatterns.length >= 3) score += 5;
|
|
4273
4646
|
else if (defPatterns.length >= 1) score += 3;
|
|
@@ -4275,7 +4648,7 @@ function scoreDefinitionPatterns(html) {
|
|
|
4275
4648
|
if (/\b(is a|is an|refers to|defined as)\b/i.test(early)) score += 2;
|
|
4276
4649
|
if (/<dfn[\s>]/i.test(html) || /<abbr[\s>]/i.test(html)) score += 1;
|
|
4277
4650
|
if (/<dl[\s>]/i.test(html) || /glossary/i.test(html)) score += 2;
|
|
4278
|
-
return
|
|
4651
|
+
return cap2(score, 10);
|
|
4279
4652
|
}
|
|
4280
4653
|
function scoreCanonicalUrl(html, url) {
|
|
4281
4654
|
let score = 0;
|
|
@@ -4296,7 +4669,7 @@ function scoreCanonicalUrl(html, url) {
|
|
|
4296
4669
|
if (canonicalHref.startsWith("https://")) score += 2;
|
|
4297
4670
|
const allCanonicals = html.match(/<link[^>]*rel=["']canonical["'][^>]*>/gi) || [];
|
|
4298
4671
|
if (allCanonicals.length === 1) score += 1;
|
|
4299
|
-
return
|
|
4672
|
+
return cap2(score, 10);
|
|
4300
4673
|
}
|
|
4301
4674
|
function scoreVisibleDateSignal(html) {
|
|
4302
4675
|
let score = 0;
|
|
@@ -4315,11 +4688,11 @@ function scoreVisibleDateSignal(html) {
|
|
|
4315
4688
|
} catch {
|
|
4316
4689
|
}
|
|
4317
4690
|
}
|
|
4318
|
-
return
|
|
4691
|
+
return cap2(score, 10);
|
|
4319
4692
|
}
|
|
4320
4693
|
function scoreCitationReadyWriting(html) {
|
|
4321
4694
|
let score = 0;
|
|
4322
|
-
const text =
|
|
4695
|
+
const text = getTextContent2(html);
|
|
4323
4696
|
const defSentences = text.match(/\b\w+\s+(is\s+(?:a|an)\s|refers\s+to|defined\s+as)\b/gi) || [];
|
|
4324
4697
|
if (defSentences.length >= 3) score += 3;
|
|
4325
4698
|
else if (defSentences.length >= 1) score += 1;
|
|
@@ -4348,7 +4721,7 @@ function scoreCitationReadyWriting(html) {
|
|
|
4348
4721
|
);
|
|
4349
4722
|
if (quotableLines.length >= 2) score += 2;
|
|
4350
4723
|
else if (quotableLines.length >= 1) score += 1;
|
|
4351
|
-
return
|
|
4724
|
+
return cap2(score, 10);
|
|
4352
4725
|
}
|
|
4353
4726
|
function scoreAnswerFirstPlacement(html) {
|
|
4354
4727
|
let score = 0;
|
|
@@ -4359,8 +4732,8 @@ function scoreAnswerFirstPlacement(html) {
|
|
|
4359
4732
|
const first300Words = bodyText.split(/\s+/).slice(0, 300).join(" ");
|
|
4360
4733
|
for (const p of earlyParagraphs) {
|
|
4361
4734
|
const pText = p.replace(/<[^>]*>/g, "").trim();
|
|
4362
|
-
const
|
|
4363
|
-
if (
|
|
4735
|
+
const wordCount2 = pText.split(/\s+/).length;
|
|
4736
|
+
if (wordCount2 >= 40 && wordCount2 <= 80 && first300Words.includes(pText.slice(0, 50))) {
|
|
4364
4737
|
score += 4;
|
|
4365
4738
|
break;
|
|
4366
4739
|
}
|
|
@@ -4381,11 +4754,11 @@ function scoreAnswerFirstPlacement(html) {
|
|
|
4381
4754
|
score += 3;
|
|
4382
4755
|
}
|
|
4383
4756
|
}
|
|
4384
|
-
return
|
|
4757
|
+
return cap2(score, 10);
|
|
4385
4758
|
}
|
|
4386
4759
|
function scoreEvidencePackaging(html) {
|
|
4387
4760
|
let score = 0;
|
|
4388
|
-
const text =
|
|
4761
|
+
const text = getTextContent2(html);
|
|
4389
4762
|
const paragraphs = html.match(/<p[^>]*>[\s\S]*?<\/p>/gi) || [];
|
|
4390
4763
|
let inlineCitations = 0;
|
|
4391
4764
|
for (const p of paragraphs) {
|
|
@@ -4403,11 +4776,11 @@ function scoreEvidencePackaging(html) {
|
|
|
4403
4776
|
const sourcedStats = text.match(/\d+(\.\d+)?(%|\s*(million|billion|thousand|percent))\b[^.]*\b[A-Z][a-z]+\b/gi) || [];
|
|
4404
4777
|
if (sourcedStats.length >= 2) score += 2;
|
|
4405
4778
|
else if (sourcedStats.length >= 1) score += 1;
|
|
4406
|
-
return
|
|
4779
|
+
return cap2(score, 10);
|
|
4407
4780
|
}
|
|
4408
4781
|
function scoreEntityDisambiguation(html) {
|
|
4409
4782
|
let score = 0;
|
|
4410
|
-
const text =
|
|
4783
|
+
const text = getTextContent2(html);
|
|
4411
4784
|
const h1Match = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
|
|
4412
4785
|
if (!h1Match) return 3;
|
|
4413
4786
|
const h1Text = h1Match[1].replace(/<[^>]*>/g, "").trim();
|
|
@@ -4425,11 +4798,11 @@ function scoreEntityDisambiguation(html) {
|
|
|
4425
4798
|
if (/\bunlike\s+\w/i.test(text) || /\bcompared\s+to\s+\w/i.test(text) || /\bnot\s+to\s+be\s+confused\s+with\b/i.test(text)) {
|
|
4426
4799
|
score += 3;
|
|
4427
4800
|
}
|
|
4428
|
-
return
|
|
4801
|
+
return cap2(score, 10);
|
|
4429
4802
|
}
|
|
4430
4803
|
function scoreExtractionFriction(html) {
|
|
4431
4804
|
let score = 0;
|
|
4432
|
-
const text =
|
|
4805
|
+
const text = getTextContent2(html);
|
|
4433
4806
|
const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 5);
|
|
4434
4807
|
const avgLen = sentences.length > 0 ? sentences.map((s) => s.trim().split(/\s+/).length).reduce((a, b) => a + b, 0) / sentences.length : 0;
|
|
4435
4808
|
if (avgLen > 0 && avgLen < 20) score += 3;
|
|
@@ -4452,7 +4825,7 @@ function scoreExtractionFriction(html) {
|
|
|
4452
4825
|
if (/display\s*:\s*none|visibility\s*:\s*hidden/i.test(html) && /<(div|section|p)[^>]*(?:style=["'][^"']*display\s*:\s*none|hidden)[^>]*>/i.test(html)) {
|
|
4453
4826
|
score = Math.max(0, score - 2);
|
|
4454
4827
|
}
|
|
4455
|
-
return
|
|
4828
|
+
return cap2(score, 10);
|
|
4456
4829
|
}
|
|
4457
4830
|
function scoreImageContextAI(html) {
|
|
4458
4831
|
let score = 0;
|
|
@@ -4477,7 +4850,7 @@ function scoreImageContextAI(html) {
|
|
|
4477
4850
|
else if (goodAltCount > 0) score += 1;
|
|
4478
4851
|
const contextualImages = html.match(/<(article|section)[^>]*>[\s\S]*?<img[^>]*>[\s\S]*?<\/\1>/gi) || [];
|
|
4479
4852
|
if (contextualImages.length > 0) score += 3;
|
|
4480
|
-
return
|
|
4853
|
+
return cap2(score, 10);
|
|
4481
4854
|
}
|
|
4482
4855
|
function scoreDuplicateContent(html) {
|
|
4483
4856
|
return scoreDuplicateContentDetailed(html).score;
|
|
@@ -4539,8 +4912,12 @@ var SCORING_FUNCTIONS = {
|
|
|
4539
4912
|
citation_ready_writing: scoreCitationReadyWriting,
|
|
4540
4913
|
answer_first_placement: scoreAnswerFirstPlacement,
|
|
4541
4914
|
evidence_packaging: scoreEvidencePackaging,
|
|
4915
|
+
helpful_purpose_alignment: scoreHelpfulPurposeAlignment,
|
|
4916
|
+
first_hand_experience_signals: scoreFirstHandExperienceSignals,
|
|
4542
4917
|
entity_disambiguation: scoreEntityDisambiguation,
|
|
4543
4918
|
extraction_friction: scoreExtractionFriction,
|
|
4919
|
+
creator_transparency: scoreCreatorTransparency,
|
|
4920
|
+
methodology_transparency: scoreMethodologyTransparency,
|
|
4544
4921
|
image_context_ai: scoreImageContextAI,
|
|
4545
4922
|
duplicate_content: scoreDuplicateContent
|
|
4546
4923
|
};
|
|
@@ -4585,7 +4962,7 @@ function extractTitle(html) {
|
|
|
4585
4962
|
const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
4586
4963
|
return match ? match[1].replace(/\s+/g, " ").trim() : "";
|
|
4587
4964
|
}
|
|
4588
|
-
function
|
|
4965
|
+
function getTextContent3(html) {
|
|
4589
4966
|
return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
|
|
4590
4967
|
}
|
|
4591
4968
|
function countWords2(text) {
|
|
@@ -4645,9 +5022,9 @@ function checkMissingOgTags(html) {
|
|
|
4645
5022
|
}
|
|
4646
5023
|
return null;
|
|
4647
5024
|
}
|
|
4648
|
-
function checkThinContent(
|
|
4649
|
-
if (
|
|
4650
|
-
return { check: "thin-content", label: `Thin content (${
|
|
5025
|
+
function checkThinContent(wordCount2) {
|
|
5026
|
+
if (wordCount2 < 300) {
|
|
5027
|
+
return { check: "thin-content", label: `Thin content (${wordCount2} words)`, severity: "warning" };
|
|
4651
5028
|
}
|
|
4652
5029
|
return null;
|
|
4653
5030
|
}
|
|
@@ -4744,15 +5121,15 @@ function checkNoAnswerBlock(html) {
|
|
|
4744
5121
|
const first300Words = bodyText.split(/\s+/).slice(0, 300).join(" ");
|
|
4745
5122
|
for (const p of earlyParagraphs) {
|
|
4746
5123
|
const pText = p.replace(/<[^>]*>/g, "").trim();
|
|
4747
|
-
const
|
|
4748
|
-
if (
|
|
5124
|
+
const wordCount2 = pText.split(/\s+/).length;
|
|
5125
|
+
if (wordCount2 >= 40 && wordCount2 <= 80 && first300Words.includes(pText.slice(0, 50))) {
|
|
4749
5126
|
return null;
|
|
4750
5127
|
}
|
|
4751
5128
|
}
|
|
4752
5129
|
return { check: "no-answer-block", label: "No short answer block (40-80 words) in first 300 words", severity: "warning" };
|
|
4753
5130
|
}
|
|
4754
5131
|
function checkNoEvidence(html, url) {
|
|
4755
|
-
const text =
|
|
5132
|
+
const text = getTextContent3(html);
|
|
4756
5133
|
const paragraphs = html.match(/<p[^>]*>[\s\S]*?<\/p>/gi) || [];
|
|
4757
5134
|
let inlineCitations = 0;
|
|
4758
5135
|
for (const p of paragraphs) {
|
|
@@ -4766,7 +5143,7 @@ function checkNoEvidence(html, url) {
|
|
|
4766
5143
|
return null;
|
|
4767
5144
|
}
|
|
4768
5145
|
function checkHasCitationReadyContent(html) {
|
|
4769
|
-
const text =
|
|
5146
|
+
const text = getTextContent3(html);
|
|
4770
5147
|
const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 10);
|
|
4771
5148
|
let quotable = 0;
|
|
4772
5149
|
for (const s of sentences) {
|
|
@@ -4791,8 +5168,8 @@ function checkDuplicateContentBlocks(html) {
|
|
|
4791
5168
|
}
|
|
4792
5169
|
function analyzePage(html, url, category) {
|
|
4793
5170
|
const title = extractTitle(html);
|
|
4794
|
-
const textContent =
|
|
4795
|
-
const
|
|
5171
|
+
const textContent = getTextContent3(html);
|
|
5172
|
+
const wordCount2 = countWords2(textContent);
|
|
4796
5173
|
const issues = [];
|
|
4797
5174
|
const strengths = [];
|
|
4798
5175
|
const issueChecks = [
|
|
@@ -4803,7 +5180,7 @@ function analyzePage(html, url, category) {
|
|
|
4803
5180
|
checkNoSchema(html),
|
|
4804
5181
|
checkMissingCanonical(html),
|
|
4805
5182
|
checkMissingOgTags(html),
|
|
4806
|
-
checkThinContent(
|
|
5183
|
+
checkThinContent(wordCount2),
|
|
4807
5184
|
checkImagesMissingAlt(html),
|
|
4808
5185
|
checkNoInternalLinks(html, url),
|
|
4809
5186
|
checkNoAnswerBlock(html),
|
|
@@ -4822,7 +5199,7 @@ function analyzePage(html, url, category) {
|
|
|
4822
5199
|
if (result) strengths.push(result);
|
|
4823
5200
|
}
|
|
4824
5201
|
const { aeoScore, criterionScores } = scorePage(html, url);
|
|
4825
|
-
return { url, title, category, wordCount, issues, strengths, aeoScore, criterionScores };
|
|
5202
|
+
return { url, title, category, wordCount: wordCount2, issues, strengths, aeoScore, criterionScores };
|
|
4826
5203
|
}
|
|
4827
5204
|
function analyzeAllPages(siteData) {
|
|
4828
5205
|
const reviews = [];
|
|
@@ -4844,6 +5221,10 @@ function getTextLength(html) {
|
|
|
4844
5221
|
return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim().length;
|
|
4845
5222
|
}
|
|
4846
5223
|
async function audit(domain, options) {
|
|
5224
|
+
const normalizedTarget = domain.startsWith("http") ? domain : `https://${domain}`;
|
|
5225
|
+
if (!await isSafeFetchTarget(normalizedTarget)) {
|
|
5226
|
+
throw new Error(`Refusing to audit private or local address: ${domain}`);
|
|
5227
|
+
}
|
|
4847
5228
|
const startTime = Date.now();
|
|
4848
5229
|
let renderedWithHeadless = false;
|
|
4849
5230
|
const siteData = await prefetchSiteData(domain);
|
|
@@ -4876,7 +5257,7 @@ async function audit(domain, options) {
|
|
|
4876
5257
|
}
|
|
4877
5258
|
}
|
|
4878
5259
|
if (options?.fullCrawl) {
|
|
4879
|
-
const { crawlFullSite: crawlFullSite2 } = await import("./full-site-crawler-
|
|
5260
|
+
const { crawlFullSite: crawlFullSite2 } = await import("./full-site-crawler-HAF2X2X3.js");
|
|
4880
5261
|
const crawlResult = await crawlFullSite2(siteData, {
|
|
4881
5262
|
maxPages: options.maxPages ?? 200,
|
|
4882
5263
|
concurrency: options.concurrency ?? 5
|
|
@@ -4951,7 +5332,7 @@ function extractTitle2(html) {
|
|
|
4951
5332
|
const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
4952
5333
|
return match ? match[1].replace(/\s+/g, " ").trim() : "";
|
|
4953
5334
|
}
|
|
4954
|
-
function
|
|
5335
|
+
function getTextContent4(html) {
|
|
4955
5336
|
return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
|
|
4956
5337
|
}
|
|
4957
5338
|
function countWords3(text) {
|
|
@@ -5101,12 +5482,12 @@ function buildLinkGraph(pages, domain, homepageUrl) {
|
|
|
5101
5482
|
const norm = normalizeUrl(url);
|
|
5102
5483
|
if (nodes.has(norm)) continue;
|
|
5103
5484
|
const title = extractTitle2(page.text);
|
|
5104
|
-
const text =
|
|
5105
|
-
const
|
|
5485
|
+
const text = getTextContent4(page.text);
|
|
5486
|
+
const wordCount2 = countWords3(text);
|
|
5106
5487
|
nodes.set(norm, {
|
|
5107
5488
|
url: norm,
|
|
5108
5489
|
title,
|
|
5109
|
-
wordCount,
|
|
5490
|
+
wordCount: wordCount2,
|
|
5110
5491
|
category: page.category || "content",
|
|
5111
5492
|
inDegree: 0,
|
|
5112
5493
|
outDegree: 0,
|
|
@@ -5174,6 +5555,8 @@ var CRITERION_WEIGHTS2 = {
|
|
|
5174
5555
|
qa_content_format: 0.04,
|
|
5175
5556
|
query_answer_alignment: 0.04,
|
|
5176
5557
|
faq_section: 0.03,
|
|
5558
|
+
helpful_purpose_alignment: 0.03,
|
|
5559
|
+
first_hand_experience_signals: 0.03,
|
|
5177
5560
|
// Content Organization (~30%)
|
|
5178
5561
|
entity_consistency: 0.05,
|
|
5179
5562
|
internal_linking: 0.04,
|
|
@@ -5181,30 +5564,32 @@ var CRITERION_WEIGHTS2 = {
|
|
|
5181
5564
|
schema_markup: 0.03,
|
|
5182
5565
|
author_schema_depth: 0.03,
|
|
5183
5566
|
table_list_extractability: 0.03,
|
|
5184
|
-
|
|
5185
|
-
|
|
5567
|
+
creator_transparency: 0.02,
|
|
5568
|
+
methodology_transparency: 0.02,
|
|
5569
|
+
definition_patterns: 0.015,
|
|
5570
|
+
visible_date_signal: 0.015,
|
|
5186
5571
|
semantic_html: 0.02,
|
|
5187
5572
|
clean_html: 0.02,
|
|
5188
5573
|
// Technical Plumbing (~15%)
|
|
5189
5574
|
content_cannibalization: 0.02,
|
|
5190
5575
|
duplicate_content: 0.05,
|
|
5191
5576
|
cross_page_duplication: 0.03,
|
|
5192
|
-
llms_txt: 0.
|
|
5193
|
-
robots_txt: 0.
|
|
5577
|
+
llms_txt: 0.01,
|
|
5578
|
+
robots_txt: 0.01,
|
|
5194
5579
|
content_velocity: 0.02,
|
|
5195
|
-
content_licensing: 0.
|
|
5580
|
+
content_licensing: 0.01,
|
|
5196
5581
|
sitemap_completeness: 0.01,
|
|
5197
|
-
canonical_url:
|
|
5198
|
-
rss_feed:
|
|
5199
|
-
schema_coverage:
|
|
5200
|
-
speakable_schema:
|
|
5582
|
+
canonical_url: 5e-3,
|
|
5583
|
+
rss_feed: 5e-3,
|
|
5584
|
+
schema_coverage: 5e-3,
|
|
5585
|
+
speakable_schema: 5e-3,
|
|
5201
5586
|
// V2 Criteria (~15%)
|
|
5202
5587
|
citation_ready_writing: 0.04,
|
|
5203
5588
|
answer_first_placement: 0.03,
|
|
5204
5589
|
evidence_packaging: 0.03,
|
|
5205
5590
|
entity_disambiguation: 0.02,
|
|
5206
5591
|
extraction_friction: 0.02,
|
|
5207
|
-
image_context_ai:
|
|
5592
|
+
image_context_ai: 5e-3
|
|
5208
5593
|
};
|
|
5209
5594
|
var PHASE_CONFIG = [
|
|
5210
5595
|
{
|
|
@@ -5234,6 +5619,8 @@ var PHASE_CONFIG = [
|
|
|
5234
5619
|
"answer_first_placement",
|
|
5235
5620
|
"evidence_packaging",
|
|
5236
5621
|
"entity_disambiguation",
|
|
5622
|
+
"helpful_purpose_alignment",
|
|
5623
|
+
"first_hand_experience_signals",
|
|
5237
5624
|
"duplicate_content",
|
|
5238
5625
|
"cross_page_duplication"
|
|
5239
5626
|
]
|
|
@@ -5247,6 +5634,8 @@ var PHASE_CONFIG = [
|
|
|
5247
5634
|
"schema_coverage",
|
|
5248
5635
|
"speakable_schema",
|
|
5249
5636
|
"author_schema_depth",
|
|
5637
|
+
"creator_transparency",
|
|
5638
|
+
"methodology_transparency",
|
|
5250
5639
|
"content_licensing",
|
|
5251
5640
|
"entity_consistency",
|
|
5252
5641
|
"semantic_html",
|
|
@@ -5269,7 +5658,7 @@ function impactFromScore(score) {
|
|
|
5269
5658
|
}
|
|
5270
5659
|
function effortForCriterion(criterion, score) {
|
|
5271
5660
|
const trivialCriteria = ["llms_txt", "robots_txt", "canonical_url", "content_licensing", "visible_date_signal"];
|
|
5272
|
-
const lowCriteria = ["rss_feed", "sitemap_completeness", "speakable_schema", "author_schema_depth", "semantic_html", "definition_patterns", "content_freshness"];
|
|
5661
|
+
const lowCriteria = ["rss_feed", "sitemap_completeness", "speakable_schema", "author_schema_depth", "creator_transparency", "methodology_transparency", "semantic_html", "definition_patterns", "content_freshness"];
|
|
5273
5662
|
const highCriteria = ["original_data", "content_velocity", "content_cannibalization"];
|
|
5274
5663
|
if (trivialCriteria.includes(criterion)) return score <= 3 ? "low" : "trivial";
|
|
5275
5664
|
if (lowCriteria.includes(criterion)) return score <= 3 ? "medium" : "low";
|
|
@@ -5536,6 +5925,58 @@ Sitemap: https://example.com/sitemap.xml`,
|
|
|
5536
5925
|
pageCount: affected?.length
|
|
5537
5926
|
}];
|
|
5538
5927
|
},
|
|
5928
|
+
helpful_purpose_alignment: (c, pages) => {
|
|
5929
|
+
if (c.score >= 10) return [];
|
|
5930
|
+
const impact = impactFromScore(c.score);
|
|
5931
|
+
const effort = effortForCriterion("helpful_purpose_alignment", c.score);
|
|
5932
|
+
const affected = getAffectedPages("helpful_purpose_alignment", pages);
|
|
5933
|
+
return [{
|
|
5934
|
+
id: "fix-helpful-purpose-alignment",
|
|
5935
|
+
criterion: c.criterion_label,
|
|
5936
|
+
criterionId: c.criterion,
|
|
5937
|
+
title: "Make pages solve the user task faster",
|
|
5938
|
+
description: "Reduce search-first filler and rewrite pages so the promised task is resolved quickly with concrete guidance, tradeoffs, and next steps.",
|
|
5939
|
+
impact,
|
|
5940
|
+
effort,
|
|
5941
|
+
impactScore: 0,
|
|
5942
|
+
category: "content",
|
|
5943
|
+
steps: [
|
|
5944
|
+
"Rewrite first paragraphs to answer the user need within the first 150-300 words",
|
|
5945
|
+
'Remove generic intros like "In this guide" and broad filler that could fit any topic',
|
|
5946
|
+
"Add concrete decision help: tradeoffs, risks, constraints, and next steps",
|
|
5947
|
+
"Move aggressive CTAs below the first useful answer block"
|
|
5948
|
+
],
|
|
5949
|
+
successCriteria: "Pages lead with task-solving guidance instead of generic search-first framing",
|
|
5950
|
+
affectedPages: affected,
|
|
5951
|
+
pageCount: affected?.length
|
|
5952
|
+
}];
|
|
5953
|
+
},
|
|
5954
|
+
first_hand_experience_signals: (c, pages) => {
|
|
5955
|
+
if (c.score >= 10) return [];
|
|
5956
|
+
const impact = impactFromScore(c.score);
|
|
5957
|
+
const effort = effortForCriterion("first_hand_experience_signals", c.score);
|
|
5958
|
+
const affected = getAffectedPages("first_hand_experience_signals", pages);
|
|
5959
|
+
return [{
|
|
5960
|
+
id: "fix-first-hand-experience",
|
|
5961
|
+
criterion: c.criterion_label,
|
|
5962
|
+
criterionId: c.criterion,
|
|
5963
|
+
title: "Add first-hand experience signals",
|
|
5964
|
+
description: "Show real use, testing, implementation, or lived experience instead of relying on generic summary content.",
|
|
5965
|
+
impact,
|
|
5966
|
+
effort,
|
|
5967
|
+
impactScore: 0,
|
|
5968
|
+
category: "content",
|
|
5969
|
+
steps: [
|
|
5970
|
+
"Add specific observations from real use, testing, or implementation",
|
|
5971
|
+
"Document limitations, edge cases, or lessons learned in practice",
|
|
5972
|
+
"Include screenshots, photos, before/after metrics, or original artifacts where relevant",
|
|
5973
|
+
"Rewrite generic sections to reflect direct experience with the subject matter"
|
|
5974
|
+
],
|
|
5975
|
+
successCriteria: "Key pages contain credible signs of direct use or observation, not just generic advice",
|
|
5976
|
+
affectedPages: affected,
|
|
5977
|
+
pageCount: affected?.length
|
|
5978
|
+
}];
|
|
5979
|
+
},
|
|
5539
5980
|
original_data: (c, pages) => {
|
|
5540
5981
|
if (c.score >= 10) return [];
|
|
5541
5982
|
const impact = impactFromScore(c.score);
|
|
@@ -5902,6 +6343,58 @@ Summarization: yes`,
|
|
|
5902
6343
|
successCriteria: "Articles have Person schema for authors with credentials"
|
|
5903
6344
|
}];
|
|
5904
6345
|
},
|
|
6346
|
+
creator_transparency: (c, pages) => {
|
|
6347
|
+
if (c.score >= 10) return [];
|
|
6348
|
+
const impact = impactFromScore(c.score);
|
|
6349
|
+
const effort = effortForCriterion("creator_transparency", c.score);
|
|
6350
|
+
const affected = getAffectedPages("creator_transparency", pages);
|
|
6351
|
+
return [{
|
|
6352
|
+
id: "fix-creator-transparency",
|
|
6353
|
+
criterion: c.criterion_label,
|
|
6354
|
+
criterionId: c.criterion,
|
|
6355
|
+
title: "Make content creators clearly visible",
|
|
6356
|
+
description: "Add visible bylines, author pages, and reviewer/editor attribution so readers can clearly tell who created the content.",
|
|
6357
|
+
impact,
|
|
6358
|
+
effort,
|
|
6359
|
+
impactScore: 0,
|
|
6360
|
+
category: "trust",
|
|
6361
|
+
steps: [
|
|
6362
|
+
"Add visible bylines to article-like pages where readers expect them",
|
|
6363
|
+
"Link author names to author pages with role, expertise area, and relevant background",
|
|
6364
|
+
"Add reviewer or editor attribution on sensitive or expert content",
|
|
6365
|
+
"Keep visible creator identity consistent with schema markup"
|
|
6366
|
+
],
|
|
6367
|
+
successCriteria: "Article-like pages have clear visible bylines and linked creator context",
|
|
6368
|
+
affectedPages: affected,
|
|
6369
|
+
pageCount: affected?.length
|
|
6370
|
+
}];
|
|
6371
|
+
},
|
|
6372
|
+
methodology_transparency: (c, pages) => {
|
|
6373
|
+
if (c.score >= 10) return [];
|
|
6374
|
+
const impact = impactFromScore(c.score);
|
|
6375
|
+
const effort = effortForCriterion("methodology_transparency", c.score);
|
|
6376
|
+
const affected = getAffectedPages("methodology_transparency", pages);
|
|
6377
|
+
return [{
|
|
6378
|
+
id: "fix-methodology-transparency",
|
|
6379
|
+
criterion: c.criterion_label,
|
|
6380
|
+
criterionId: c.criterion,
|
|
6381
|
+
title: "Explain how content was tested or reviewed",
|
|
6382
|
+
description: "Add methodology, criteria, testing, review, or update-process details where users would expect them.",
|
|
6383
|
+
impact,
|
|
6384
|
+
effort,
|
|
6385
|
+
impactScore: 0,
|
|
6386
|
+
category: "trust",
|
|
6387
|
+
steps: [
|
|
6388
|
+
'Add a "How we tested", "Methodology", or review-process section where relevant',
|
|
6389
|
+
"Document criteria, tools used, sample size, timeframe, or update policy",
|
|
6390
|
+
"Disclose AI assistance when a reasonable reader would expect that context",
|
|
6391
|
+
"Support methodology notes with screenshots, tables, or process artifacts when possible"
|
|
6392
|
+
],
|
|
6393
|
+
successCriteria: "Review, comparison, and research-style pages explain how conclusions were produced",
|
|
6394
|
+
affectedPages: affected,
|
|
6395
|
+
pageCount: affected?.length
|
|
6396
|
+
}];
|
|
6397
|
+
},
|
|
5905
6398
|
fact_density: (c, pages) => {
|
|
5906
6399
|
if (c.score >= 10) return [];
|
|
5907
6400
|
const impact = impactFromScore(c.score);
|