aeorank 3.1.1 → 3.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +63 -43
- package/dist/browser.d.ts +4 -4
- package/dist/browser.js +650 -157
- package/dist/browser.js.map +1 -1
- package/dist/{chunk-RYV25AUV.js → chunk-DW7MPQ4X.js} +188 -30
- package/dist/chunk-DW7MPQ4X.js.map +1 -0
- package/dist/chunk-PYV5JVTC.js +179 -0
- package/dist/chunk-PYV5JVTC.js.map +1 -0
- package/dist/cli.js +519 -140
- package/dist/cli.js.map +1 -1
- package/dist/{full-site-crawler-TQ35TB2X.js → full-site-crawler-HAF2X2X3.js} +2 -2
- package/dist/{full-site-crawler-OBECS7AT.js → full-site-crawler-W3WSE6WT.js} +18 -30
- package/dist/full-site-crawler-W3WSE6WT.js.map +1 -0
- package/dist/index.cjs +837 -183
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +4 -4
- package/dist/index.d.ts +4 -4
- package/dist/index.js +650 -157
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
- package/dist/chunk-RYV25AUV.js.map +0 -1
- package/dist/full-site-crawler-OBECS7AT.js.map +0 -1
- /package/dist/{full-site-crawler-TQ35TB2X.js.map → full-site-crawler-HAF2X2X3.js.map} +0 -0
package/dist/cli.js
CHANGED
|
@@ -1,4 +1,10 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
+
import {
|
|
3
|
+
isSafeFetchTarget,
|
|
4
|
+
isSafePublicUrl,
|
|
5
|
+
normalizeHostname,
|
|
6
|
+
safeFetch
|
|
7
|
+
} from "./chunk-PYV5JVTC.js";
|
|
2
8
|
|
|
3
9
|
// src/cli.ts
|
|
4
10
|
import { writeFileSync } from "fs";
|
|
@@ -145,19 +151,181 @@ function shingleJaccardSimilarity(a, b) {
|
|
|
145
151
|
return union === 0 ? 0 : intersection / union;
|
|
146
152
|
}
|
|
147
153
|
|
|
154
|
+
// src/helpful-content.ts
|
|
155
|
+
function cap(value, max) {
|
|
156
|
+
return Math.min(max, value);
|
|
157
|
+
}
|
|
158
|
+
function floor(value, min) {
|
|
159
|
+
return Math.max(min, value);
|
|
160
|
+
}
|
|
161
|
+
function countMatches(text, pattern) {
|
|
162
|
+
return text.match(pattern)?.length ?? 0;
|
|
163
|
+
}
|
|
164
|
+
function stripScriptsAndStyles(html) {
|
|
165
|
+
return html.replace(/<script[\s\S]*?<\/script>/gi, " ").replace(/<style[\s\S]*?<\/style>/gi, " ");
|
|
166
|
+
}
|
|
167
|
+
function getTextContent(html) {
|
|
168
|
+
return stripScriptsAndStyles(html).replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
|
|
169
|
+
}
|
|
170
|
+
function getBodyHtml(html) {
|
|
171
|
+
const bodyMatch = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
|
|
172
|
+
return bodyMatch ? bodyMatch[1] : html;
|
|
173
|
+
}
|
|
174
|
+
function getFirstParagraphText(html) {
|
|
175
|
+
const firstPara = getBodyHtml(html).match(/<p[^>]*>([\s\S]*?)<\/p>/i);
|
|
176
|
+
return firstPara ? firstPara[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() : "";
|
|
177
|
+
}
|
|
178
|
+
function firstNWords(text, count) {
|
|
179
|
+
return text.split(/\s+/).slice(0, count).join(" ");
|
|
180
|
+
}
|
|
181
|
+
function getH1Text(html) {
|
|
182
|
+
const match = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
|
|
183
|
+
return match ? match[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() : "";
|
|
184
|
+
}
|
|
185
|
+
function getTitleText(html) {
|
|
186
|
+
const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
187
|
+
return match ? match[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() : "";
|
|
188
|
+
}
|
|
189
|
+
function wordCount(text) {
|
|
190
|
+
return text ? text.split(/\s+/).filter(Boolean).length : 0;
|
|
191
|
+
}
|
|
192
|
+
function isContentLikePage(html, url) {
|
|
193
|
+
const text = getTextContent(html);
|
|
194
|
+
const wc = wordCount(text);
|
|
195
|
+
let signals = 0;
|
|
196
|
+
if (url && /\/(?:blog|article|articles|guide|guides|docs|learn|help|news|insights|resources|how-to|tutorial|case-stud|whitepaper|faq)\b/i.test(url)) {
|
|
197
|
+
signals += 2;
|
|
198
|
+
}
|
|
199
|
+
if (/<article[\s>]/i.test(html)) signals += 1;
|
|
200
|
+
if ((html.match(/<h[2-3][^>]*>/gi) || []).length >= 2) signals += 1;
|
|
201
|
+
if (wc >= 500) signals += 1;
|
|
202
|
+
if (/<time[\s>]/i.test(html) || /datePublished|dateModified/i.test(html)) signals += 1;
|
|
203
|
+
if (/written\s+by|authored?\s+by|reviewed\s+by|medically\s+reviewed/i.test(text)) signals += 1;
|
|
204
|
+
return signals >= 2;
|
|
205
|
+
}
|
|
206
|
+
function expectsMethodology(html, url) {
|
|
207
|
+
const text = getTextContent(html);
|
|
208
|
+
const title = `${getTitleText(html)} ${getH1Text(html)}`.toLowerCase();
|
|
209
|
+
const urlText = (url || "").toLowerCase();
|
|
210
|
+
if (/(?:review|compare|comparison|vs\.?|best|benchmark|study|analysis|survey|report|research|tested|test|methodology)/i.test(title)) {
|
|
211
|
+
return true;
|
|
212
|
+
}
|
|
213
|
+
if (/(?:review|compare|comparison|benchmark|study|analysis|survey|research|report|best)/i.test(urlText)) {
|
|
214
|
+
return true;
|
|
215
|
+
}
|
|
216
|
+
return /\b(methodology|how we tested|how we reviewed|our testing|sample size|dataset|benchmark|editorial policy|review process)\b/i.test(text);
|
|
217
|
+
}
|
|
218
|
+
function titleAndBodyAlign(html) {
|
|
219
|
+
const h1 = getH1Text(html);
|
|
220
|
+
const title = getTitleText(html);
|
|
221
|
+
const text = firstNWords(getTextContent(html), 250).toLowerCase();
|
|
222
|
+
const topic = `${title} ${h1}`.toLowerCase();
|
|
223
|
+
const keywords = topic.split(/[\s|:()\-/]+/).filter((w) => w.length >= 5 && !/^(about|guide|complete|ultimate|best|learn|understand|what|when|where|which|their|there|these|those)$/i.test(w));
|
|
224
|
+
const uniqueKeywords = [...new Set(keywords)];
|
|
225
|
+
if (uniqueKeywords.length === 0) return false;
|
|
226
|
+
return uniqueKeywords.filter((w) => text.includes(w)).length >= Math.min(2, uniqueKeywords.length);
|
|
227
|
+
}
|
|
228
|
+
var GENERIC_OPENERS = /^(?:in today'?s|it is important to understand|in this (?:article|guide|post)|whether you are|have you ever|welcome to|if you'?re looking|in the modern|in the digital age)/i;
|
|
229
|
+
var PRACTICAL_LANGUAGE = /\b(?:here'?s what to do|choose\s+\w+\s+if|avoid\s+\w+\s+when|the main risk is|the fastest option is|next step(?:s)?|best option|lowest risk|good fit if|not a good fit if|what to do next)\b/gi;
|
|
230
|
+
var TRADEOFF_LANGUAGE = /\b(?:however|but|trade-?off|caveat|limitation|downside|upside|risk|benefit|pros?\b|cons?\b|watch out|be careful|unless)\b/gi;
|
|
231
|
+
var FLUFF_LANGUAGE = /\b(?:comprehensive guide|ever-evolving|fast-paced world|unlock(?:ing)? the power|in conclusion|delve into|navigate the landscape|journey|seamless|robust solution)\b/gi;
|
|
232
|
+
var EARLY_CTA_PATTERN = /\b(?:sign up|get started|book (?:a )?demo|contact us|schedule (?:a )?call|buy now|start free|request a quote|talk to sales|subscribe)\b/gi;
|
|
233
|
+
var FIRST_HAND_ACTIONS = /\b(?:we|i)\s+(?:tested|used|reviewed|implemented|measured|compared|observed|deployed|migrated|surveyed|analyzed|audited|interviewed)\b/gi;
|
|
234
|
+
var EXPERIENCE_CONTEXT = /\b(?:in our|during our|for our|in practice|in production|in the field|in our clinic|during implementation|over \d+\s+(?:days?|weeks?|months?)|across \d+\s+(?:accounts?|customers?|patients?|sites?|campaigns?)|with \d+\s+(?:participants?|users?|patients?|samples?))\b/gi;
|
|
235
|
+
var EXPERIENCE_ARTIFACTS = /\b(?:screenshot|photo|benchmark|timeline|before\/after|our results|original chart|field note|walkthrough)\b/gi;
|
|
236
|
+
var LIMITATION_LANGUAGE = /\b(?:limitation|limitations|we found that|we learned|one caveat|did not work|failed|issue we hit|edge case|unexpectedly|drawback)\b/gi;
|
|
237
|
+
var AUTHOR_LINK_PATTERN = /<a[^>]*href=["'][^"']*\/(?:author|authors|team|people|staff|reviewers?)\b[^"']*["'][^>]*>/i;
|
|
238
|
+
var BYLINE_PATTERN = /\b(?:written|authored|reviewed|edited|medically reviewed)\s+by\b/i;
|
|
239
|
+
var AUTHOR_BIO_PATTERN = /\b(?:about the author|author bio|reviewed by|medically reviewed by|board-certified|licensed|credentials?|specializes in|specialist in)\b/i;
|
|
240
|
+
var METHODOLOGY_TERMS = /\b(?:methodology|how we tested|how we reviewed|our methodology|testing process|review process|editorial policy|sample size|data collection|timeframe|criteria used|tools used|benchmark method|updated using|last reviewed|ai-assisted|ai assisted)\b/gi;
|
|
241
|
+
var METHODOLOGY_DETAIL = /\b(?:sample size|participants?|dataset|timeframe|criteria|weights?|tool(?:s)? used|measured over|tested over|reviewed on|last updated|last reviewed|scoring method)\b/gi;
|
|
242
|
+
function scoreHelpfulPurposeAlignment(html, url) {
|
|
243
|
+
const text = getTextContent(html);
|
|
244
|
+
if (!text) return 0;
|
|
245
|
+
const contentLike = isContentLikePage(html, url);
|
|
246
|
+
if (!contentLike && wordCount(text) < 250) return 5;
|
|
247
|
+
let score = contentLike ? 3 : 5;
|
|
248
|
+
const firstPara = getFirstParagraphText(html);
|
|
249
|
+
const earlyText = firstNWords(text, 300);
|
|
250
|
+
const bodyHtml = getBodyHtml(html);
|
|
251
|
+
if (firstPara && !GENERIC_OPENERS.test(firstPara)) score += 2;
|
|
252
|
+
if (countMatches(earlyText, PRACTICAL_LANGUAGE) >= 1) score += 2;
|
|
253
|
+
else if (countMatches(text, PRACTICAL_LANGUAGE) >= 2) score += 1;
|
|
254
|
+
const tradeoffCount = countMatches(text, TRADEOFF_LANGUAGE);
|
|
255
|
+
if (tradeoffCount >= 2) score += 2;
|
|
256
|
+
else if (tradeoffCount >= 1) score += 1;
|
|
257
|
+
if (titleAndBodyAlign(html)) score += 1;
|
|
258
|
+
if (/\b(?:bottom line|key takeaway|here'?s the short answer|next steps?)\b/i.test(text)) score += 1;
|
|
259
|
+
if (firstPara && GENERIC_OPENERS.test(firstPara)) score -= 2;
|
|
260
|
+
const earlyBodyHtml = bodyHtml.slice(0, 1800);
|
|
261
|
+
const earlyCtas = countMatches(earlyBodyHtml, EARLY_CTA_PATTERN);
|
|
262
|
+
if (earlyCtas >= 3) score -= 2;
|
|
263
|
+
else if (earlyCtas >= 2) score -= 1;
|
|
264
|
+
const fluffCount = countMatches(text, FLUFF_LANGUAGE);
|
|
265
|
+
if (fluffCount >= 3) score -= 2;
|
|
266
|
+
else if (fluffCount >= 1) score -= 1;
|
|
267
|
+
return floor(cap(score, 10), 0);
|
|
268
|
+
}
|
|
269
|
+
function scoreFirstHandExperienceSignals(html, url) {
|
|
270
|
+
const text = getTextContent(html);
|
|
271
|
+
if (!text) return 0;
|
|
272
|
+
const contentLike = isContentLikePage(html, url);
|
|
273
|
+
let score = contentLike ? 2 : 5;
|
|
274
|
+
const actionCount = countMatches(text, FIRST_HAND_ACTIONS);
|
|
275
|
+
if (actionCount >= 3) score += 4;
|
|
276
|
+
else if (actionCount >= 1) score += 2;
|
|
277
|
+
const contextCount = countMatches(text, EXPERIENCE_CONTEXT);
|
|
278
|
+
if (contextCount >= 2) score += 2;
|
|
279
|
+
else if (contextCount >= 1) score += 1;
|
|
280
|
+
const artifactCount = countMatches(text, EXPERIENCE_ARTIFACTS) + countMatches(html, /<figure|<figcaption/gi);
|
|
281
|
+
if (artifactCount >= 3) score += 2;
|
|
282
|
+
else if (artifactCount >= 1) score += 1;
|
|
283
|
+
const limitationCount = countMatches(text, LIMITATION_LANGUAGE);
|
|
284
|
+
if (limitationCount >= 2) score += 2;
|
|
285
|
+
else if (limitationCount >= 1) score += 1;
|
|
286
|
+
if (/\b(?:manufacturer|vendor)\s+(?:description|specification|copy)\b/i.test(text)) score -= 1;
|
|
287
|
+
return floor(cap(score, 10), 0);
|
|
288
|
+
}
|
|
289
|
+
function scoreCreatorTransparency(html, url) {
|
|
290
|
+
const text = getTextContent(html);
|
|
291
|
+
if (!text) return 0;
|
|
292
|
+
const contentLike = isContentLikePage(html, url);
|
|
293
|
+
if (!contentLike) return 5;
|
|
294
|
+
let score = 0;
|
|
295
|
+
const hasByline = BYLINE_PATTERN.test(text) || /class=["'][^"']*author[^"']*["']/i.test(html) || /rel=["']author["']/i.test(html);
|
|
296
|
+
const hasPersonSchema = /"@type"\s*:\s*"Person"/i.test(html);
|
|
297
|
+
if (hasByline) score += 3;
|
|
298
|
+
if (AUTHOR_LINK_PATTERN.test(html)) score += 2;
|
|
299
|
+
if (AUTHOR_BIO_PATTERN.test(text)) score += 2;
|
|
300
|
+
if (/\b(?:reviewed by|edited by|medically reviewed by)\b/i.test(text)) score += 1;
|
|
301
|
+
if (hasPersonSchema) score += 2;
|
|
302
|
+
return floor(cap(score, 10), 0);
|
|
303
|
+
}
|
|
304
|
+
function scoreMethodologyTransparency(html, url) {
|
|
305
|
+
const text = getTextContent(html);
|
|
306
|
+
if (!text) return 0;
|
|
307
|
+
const contentLike = isContentLikePage(html, url);
|
|
308
|
+
const expected = expectsMethodology(html, url);
|
|
309
|
+
let score = expected ? 2 : contentLike ? 5 : 5;
|
|
310
|
+
const methodologyCount = countMatches(text, METHODOLOGY_TERMS);
|
|
311
|
+
if (methodologyCount >= 2) score += 3;
|
|
312
|
+
else if (methodologyCount >= 1) score += 2;
|
|
313
|
+
const detailCount = countMatches(text, METHODOLOGY_DETAIL);
|
|
314
|
+
if (detailCount >= 3) score += 3;
|
|
315
|
+
else if (detailCount >= 2) score += 2;
|
|
316
|
+
else if (detailCount >= 1) score += 1;
|
|
317
|
+
if (/\b(?:tested|reviewed|analyzed)\s+\d+|\bacross\s+\d+|\bover\s+\d+\s+(?:days?|weeks?|months?)|\busing\s+\d+\s+\w+/i.test(text)) score += 1;
|
|
318
|
+
if (/<figure|<table/i.test(html) && methodologyCount >= 1) score += 1;
|
|
319
|
+
if (/\b(?:ai-assisted|ai assisted|reviewed by an editor|human reviewed)\b/i.test(text)) score += 1;
|
|
320
|
+
return floor(cap(score, 10), 0);
|
|
321
|
+
}
|
|
322
|
+
|
|
148
323
|
// src/site-crawler.ts
|
|
149
|
-
async function fetchText(url) {
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
redirect: "follow"
|
|
155
|
-
});
|
|
156
|
-
const text = await res.text();
|
|
157
|
-
return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
|
|
158
|
-
} catch {
|
|
159
|
-
return null;
|
|
160
|
-
}
|
|
324
|
+
async function fetchText(url, expectedDomain) {
|
|
325
|
+
const res = await safeFetch(url, { timeoutMs: 15e3, expectedDomain });
|
|
326
|
+
if (!res) return null;
|
|
327
|
+
const text = await res.text();
|
|
328
|
+
return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
|
|
161
329
|
}
|
|
162
330
|
function extractDomain(url) {
|
|
163
331
|
return url.replace(/^https?:\/\//, "").replace(/\/.*/, "").replace(/:[0-9]+$/, "").replace(/^www\./, "").toLowerCase();
|
|
@@ -198,13 +366,16 @@ function isHtmlResponse(result) {
|
|
|
198
366
|
return trimmed.startsWith("<!doctype html") || trimmed.startsWith("<html") || /<head[\s>]/i.test(trimmed);
|
|
199
367
|
}
|
|
200
368
|
async function prefetchSiteData(domain) {
|
|
369
|
+
if (!await isSafeFetchTarget(`https://${domain}`)) {
|
|
370
|
+
return { domain, protocol: null, homepage: null, llmsTxt: null, robotsTxt: null, faqPage: null, sitemapXml: null, rssFeed: null, aiTxt: null, redirectedTo: null, parkedReason: null, blogSample: [] };
|
|
371
|
+
}
|
|
201
372
|
let protocol = null;
|
|
202
373
|
let homepage = null;
|
|
203
|
-
homepage = await fetchText(`https://${domain}
|
|
374
|
+
homepage = await fetchText(`https://${domain}`, domain);
|
|
204
375
|
if (homepage && homepage.status >= 200 && homepage.status < 400) {
|
|
205
376
|
protocol = "https";
|
|
206
377
|
} else {
|
|
207
|
-
homepage = await fetchText(`http://${domain}
|
|
378
|
+
homepage = await fetchText(`http://${domain}`, domain);
|
|
208
379
|
if (homepage && homepage.status >= 200 && homepage.status < 400) {
|
|
209
380
|
protocol = "http";
|
|
210
381
|
}
|
|
@@ -224,38 +395,38 @@ async function prefetchSiteData(domain) {
|
|
|
224
395
|
}
|
|
225
396
|
const baseUrl = `${protocol}://${domain}`;
|
|
226
397
|
const [llmsTxt, robotsTxt, faqPage, sitemapXml, aiTxt] = await Promise.all([
|
|
227
|
-
fetchText(`${baseUrl}/llms.txt
|
|
228
|
-
fetchText(`${baseUrl}/robots.txt
|
|
229
|
-
fetchText(`${baseUrl}/faq
|
|
398
|
+
fetchText(`${baseUrl}/llms.txt`, domain),
|
|
399
|
+
fetchText(`${baseUrl}/robots.txt`, domain),
|
|
400
|
+
fetchText(`${baseUrl}/faq`, domain).then(async (result) => {
|
|
230
401
|
if (result && result.status === 200) return result;
|
|
231
402
|
for (const path of ["/frequently-asked-questions", "/help", "/support", "/help-center"]) {
|
|
232
|
-
const fallback = await fetchText(`${baseUrl}${path}
|
|
403
|
+
const fallback = await fetchText(`${baseUrl}${path}`, domain);
|
|
233
404
|
if (fallback && fallback.status === 200) return fallback;
|
|
234
405
|
}
|
|
235
406
|
return result;
|
|
236
407
|
}),
|
|
237
|
-
fetchText(`${baseUrl}/sitemap.xml
|
|
238
|
-
fetchText(`${baseUrl}/ai.txt
|
|
408
|
+
fetchText(`${baseUrl}/sitemap.xml`, domain),
|
|
409
|
+
fetchText(`${baseUrl}/ai.txt`, domain)
|
|
239
410
|
]);
|
|
240
411
|
let rssFeed = null;
|
|
241
412
|
if (homepage) {
|
|
242
413
|
const rssLinkMatch = homepage.text.match(/<link[^>]*type="application\/(?:rss|atom)\+xml"[^>]*href="([^"]*)"[^>]*>/i);
|
|
243
414
|
if (rssLinkMatch) {
|
|
244
415
|
const rssUrl = rssLinkMatch[1].startsWith("http") ? rssLinkMatch[1] : `${baseUrl}${rssLinkMatch[1]}`;
|
|
245
|
-
rssFeed = await fetchText(rssUrl);
|
|
416
|
+
rssFeed = await fetchText(rssUrl, domain);
|
|
246
417
|
}
|
|
247
418
|
if (!rssFeed || rssFeed.status !== 200) {
|
|
248
419
|
for (const path of ["/feed", "/rss.xml", "/feed.xml"]) {
|
|
249
|
-
rssFeed = await fetchText(`${baseUrl}${path}
|
|
420
|
+
rssFeed = await fetchText(`${baseUrl}${path}`, domain);
|
|
250
421
|
if (rssFeed && rssFeed.status === 200 && (rssFeed.text.includes("<rss") || rssFeed.text.includes("<feed") || rssFeed.text.includes("<channel"))) break;
|
|
251
422
|
rssFeed = null;
|
|
252
423
|
}
|
|
253
424
|
}
|
|
254
425
|
}
|
|
255
426
|
if (sitemapXml && sitemapXml.status === 200 && sitemapXml.text.includes("<sitemapindex")) {
|
|
256
|
-
const subUrls = extractAllSubSitemapUrls(sitemapXml.text, 5);
|
|
427
|
+
const subUrls = extractAllSubSitemapUrls(sitemapXml.text, domain, 5);
|
|
257
428
|
if (subUrls.length > 0) {
|
|
258
|
-
const subResults = await Promise.all(subUrls.map((u) => fetchText(u)));
|
|
429
|
+
const subResults = await Promise.all(subUrls.map((u) => fetchText(u, domain)));
|
|
259
430
|
for (const sub of subResults) {
|
|
260
431
|
if (sub && sub.status === 200) {
|
|
261
432
|
sitemapXml.text += "\n" + sub.text;
|
|
@@ -268,7 +439,7 @@ async function prefetchSiteData(domain) {
|
|
|
268
439
|
const sitemapForBlog = sitemapXml.text;
|
|
269
440
|
const blogUrls = extractBlogUrlsFromSitemap(sitemapForBlog, domain, 50);
|
|
270
441
|
if (blogUrls.length > 0) {
|
|
271
|
-
const fetched = await Promise.all(blogUrls.map((url) => fetchText(url)));
|
|
442
|
+
const fetched = await Promise.all(blogUrls.map((url) => fetchText(url, domain)));
|
|
272
443
|
blogSample = fetched.filter(
|
|
273
444
|
(r) => r !== null && r.status === 200 && r.text.length > 500
|
|
274
445
|
);
|
|
@@ -1050,8 +1221,8 @@ function checkDirectAnswerDensity(data) {
|
|
|
1050
1221
|
const paragraphs = html.match(/<p[^>]*>([\s\S]*?)<\/p>/gi) || [];
|
|
1051
1222
|
const snippetZoneParagraphs = paragraphs.filter((p) => {
|
|
1052
1223
|
const text2 = p.replace(/<[^>]*>/g, "").trim();
|
|
1053
|
-
const
|
|
1054
|
-
return
|
|
1224
|
+
const wordCount2 = text2.split(/\s+/).length;
|
|
1225
|
+
return wordCount2 >= 40 && wordCount2 <= 150;
|
|
1055
1226
|
});
|
|
1056
1227
|
if (snippetZoneParagraphs.length >= 3) {
|
|
1057
1228
|
score += 2;
|
|
@@ -1319,13 +1490,15 @@ function extractBlogUrlsFromSitemap(sitemapText, domain, limit = 50) {
|
|
|
1319
1490
|
});
|
|
1320
1491
|
return candidates.slice(0, limit).map((c) => c.url);
|
|
1321
1492
|
}
|
|
1322
|
-
function extractAllSubSitemapUrls(sitemapText,
|
|
1493
|
+
function extractAllSubSitemapUrls(sitemapText, domainOrLimit, maybeLimit = 5) {
|
|
1323
1494
|
if (!sitemapText.includes("<sitemapindex")) return [];
|
|
1495
|
+
const domain = typeof domainOrLimit === "string" ? domainOrLimit : void 0;
|
|
1496
|
+
const limit = typeof domainOrLimit === "number" ? domainOrLimit : maybeLimit;
|
|
1324
1497
|
const sitemapLocs = sitemapText.match(/<sitemap>[\s\S]*?<loc>([^<]+)<\/loc>[\s\S]*?<\/sitemap>/gi) || [];
|
|
1325
1498
|
const urls = sitemapLocs.map((block) => {
|
|
1326
1499
|
const match = block.match(/<loc>([^<]+)<\/loc>/i);
|
|
1327
1500
|
return match ? match[1].trim() : "";
|
|
1328
|
-
}).filter(
|
|
1501
|
+
}).filter((url) => !!url && isSafePublicUrl(url, domain));
|
|
1329
1502
|
const preferred = urls.filter((u) => /post|blog|article|page/i.test(u));
|
|
1330
1503
|
const rest = urls.filter((u) => !preferred.includes(u));
|
|
1331
1504
|
return [...preferred, ...rest].slice(0, limit);
|
|
@@ -2199,6 +2372,123 @@ function checkContentDepth(data, topicCoherenceScore) {
|
|
|
2199
2372
|
}
|
|
2200
2373
|
return { criterion: "content_depth", criterion_label: "Content Depth", score: finalScore, status: finalScore >= 7 ? "pass" : finalScore >= 4 ? "partial" : "fail", findings, fix_priority: finalScore >= 7 ? "P3" : "P1" };
|
|
2201
2374
|
}
|
|
2375
|
+
function scoreSampledPages(data, scorer) {
|
|
2376
|
+
const pages = [];
|
|
2377
|
+
if (data.homepage) {
|
|
2378
|
+
const url = data.homepage.finalUrl || (data.protocol ? `${data.protocol}://${data.domain}/` : `https://${data.domain}/`);
|
|
2379
|
+
pages.push({ url, score: scorer(data.homepage.text, url) });
|
|
2380
|
+
}
|
|
2381
|
+
if (data.blogSample) {
|
|
2382
|
+
for (const page of data.blogSample) {
|
|
2383
|
+
const url = page.finalUrl || (data.protocol ? `${data.protocol}://${data.domain}/` : `https://${data.domain}/`);
|
|
2384
|
+
pages.push({ url, score: scorer(page.text, url) });
|
|
2385
|
+
}
|
|
2386
|
+
}
|
|
2387
|
+
return pages;
|
|
2388
|
+
}
|
|
2389
|
+
function summarizeHelpfulScores(pageScores) {
|
|
2390
|
+
const total = pageScores.length;
|
|
2391
|
+
const average = total > 0 ? Math.round(pageScores.reduce((sum, p) => sum + p.score, 0) / total) : 0;
|
|
2392
|
+
const strong = pageScores.filter((p) => p.score >= 8);
|
|
2393
|
+
const weak = pageScores.filter((p) => p.score <= 4);
|
|
2394
|
+
return { total, average, strong, weak };
|
|
2395
|
+
}
|
|
2396
|
+
function checkHelpfulPurposeAlignment(data) {
|
|
2397
|
+
const findings = [];
|
|
2398
|
+
if (!data.homepage) {
|
|
2399
|
+
findings.push({ severity: "critical", detail: "Could not fetch homepage" });
|
|
2400
|
+
return { criterion: "helpful_purpose_alignment", criterion_label: "Helpful Purpose Alignment", score: 0, status: "not_found", findings, fix_priority: "P1" };
|
|
2401
|
+
}
|
|
2402
|
+
const pageScores = scoreSampledPages(data, scoreHelpfulPurposeAlignment);
|
|
2403
|
+
const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
|
|
2404
|
+
if (average >= 8) {
|
|
2405
|
+
findings.push({ severity: "info", detail: `${strong.length}/${total} pages strongly prioritize visitor task completion over filler` });
|
|
2406
|
+
} else if (average >= 5) {
|
|
2407
|
+
findings.push({ severity: "low", detail: `${strong.length}/${total} pages clearly lead with useful guidance`, fix: "Tighten intros, reduce generic filler, and make pages solve the promised user task faster" });
|
|
2408
|
+
} else {
|
|
2409
|
+
findings.push({ severity: "medium", detail: `Average helpful-purpose score is ${average}/10 across ${total} sampled pages`, fix: "Reduce search-first framing, generic intros, and CTA interruptions before the first useful answer" });
|
|
2410
|
+
}
|
|
2411
|
+
if (weak.length > 0) {
|
|
2412
|
+
findings.push({
|
|
2413
|
+
severity: "low",
|
|
2414
|
+
detail: `${weak.length} page(s) read as weakly task-focused`,
|
|
2415
|
+
fix: "Rewrite weak pages to lead with concrete answers, tradeoffs, and next steps instead of broad introductory filler"
|
|
2416
|
+
});
|
|
2417
|
+
}
|
|
2418
|
+
return { criterion: "helpful_purpose_alignment", criterion_label: "Helpful Purpose Alignment", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P1" };
|
|
2419
|
+
}
|
|
2420
|
+
function checkFirstHandExperienceSignals(data) {
|
|
2421
|
+
const findings = [];
|
|
2422
|
+
if (!data.homepage) {
|
|
2423
|
+
findings.push({ severity: "critical", detail: "Could not fetch homepage" });
|
|
2424
|
+
return { criterion: "first_hand_experience_signals", criterion_label: "First-Hand Experience Signals", score: 0, status: "not_found", findings, fix_priority: "P2" };
|
|
2425
|
+
}
|
|
2426
|
+
const pageScores = scoreSampledPages(data, scoreFirstHandExperienceSignals);
|
|
2427
|
+
const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
|
|
2428
|
+
if (average >= 8) {
|
|
2429
|
+
findings.push({ severity: "info", detail: `${strong.length}/${total} pages show strong signs of direct use, testing, or observation` });
|
|
2430
|
+
} else if (average >= 5) {
|
|
2431
|
+
findings.push({ severity: "low", detail: `Moderate experiential depth across ${total} sampled pages`, fix: "Add real implementation details, limitations, screenshots, or direct observations where relevant" });
|
|
2432
|
+
} else {
|
|
2433
|
+
findings.push({ severity: "medium", detail: "Little first-hand experience is visible in sampled content", fix: "Add evidence of real use, testing, implementation, or lived experience instead of generic summaries" });
|
|
2434
|
+
}
|
|
2435
|
+
if (weak.length > 0) {
|
|
2436
|
+
findings.push({
|
|
2437
|
+
severity: "low",
|
|
2438
|
+
detail: `${weak.length} page(s) appear generic or second-hand`,
|
|
2439
|
+
fix: "Strengthen those pages with case details, lessons learned, or original evidence from practice"
|
|
2440
|
+
});
|
|
2441
|
+
}
|
|
2442
|
+
return { criterion: "first_hand_experience_signals", criterion_label: "First-Hand Experience Signals", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P2" };
|
|
2443
|
+
}
|
|
2444
|
+
function checkCreatorTransparency(data) {
|
|
2445
|
+
const findings = [];
|
|
2446
|
+
if (!data.homepage) {
|
|
2447
|
+
findings.push({ severity: "critical", detail: "Could not fetch homepage" });
|
|
2448
|
+
return { criterion: "creator_transparency", criterion_label: "Creator Transparency", score: 0, status: "not_found", findings, fix_priority: "P2" };
|
|
2449
|
+
}
|
|
2450
|
+
const pageScores = scoreSampledPages(data, scoreCreatorTransparency);
|
|
2451
|
+
const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
|
|
2452
|
+
if (average >= 8) {
|
|
2453
|
+
findings.push({ severity: "info", detail: `${strong.length}/${total} sampled pages provide clear visible creator attribution` });
|
|
2454
|
+
} else if (average >= 5) {
|
|
2455
|
+
findings.push({ severity: "low", detail: "Visible authorship is present on some content but inconsistent", fix: "Add bylines, author links, and reviewer details on article-like pages where readers expect them" });
|
|
2456
|
+
} else {
|
|
2457
|
+
findings.push({ severity: "medium", detail: "Creator visibility is weak on content-like pages", fix: "Show clear bylines, author pages, and reviewer context rather than relying on schema alone" });
|
|
2458
|
+
}
|
|
2459
|
+
if (weak.length > 0) {
|
|
2460
|
+
findings.push({
|
|
2461
|
+
severity: "low",
|
|
2462
|
+
detail: `${weak.length} page(s) look article-like but expose little visible author context`,
|
|
2463
|
+
fix: "Add visible bylines, author bios, or reviewer attribution to those pages"
|
|
2464
|
+
});
|
|
2465
|
+
}
|
|
2466
|
+
return { criterion: "creator_transparency", criterion_label: "Creator Transparency", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P2" };
|
|
2467
|
+
}
|
|
2468
|
+
function checkMethodologyTransparency(data) {
|
|
2469
|
+
const findings = [];
|
|
2470
|
+
if (!data.homepage) {
|
|
2471
|
+
findings.push({ severity: "critical", detail: "Could not fetch homepage" });
|
|
2472
|
+
return { criterion: "methodology_transparency", criterion_label: "Methodology Transparency", score: 0, status: "not_found", findings, fix_priority: "P2" };
|
|
2473
|
+
}
|
|
2474
|
+
const pageScores = scoreSampledPages(data, scoreMethodologyTransparency);
|
|
2475
|
+
const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
|
|
2476
|
+
if (average >= 8) {
|
|
2477
|
+
findings.push({ severity: "info", detail: `${strong.length}/${total} pages clearly explain how content was tested, researched, reviewed, or updated` });
|
|
2478
|
+
} else if (average >= 5) {
|
|
2479
|
+
findings.push({ severity: "low", detail: "Some process transparency exists, but it is inconsistent", fix: 'Add "how we tested", methodology, review process, or update disclosures on pages where users would expect them' });
|
|
2480
|
+
} else {
|
|
2481
|
+
findings.push({ severity: "medium", detail: "Little content-production or review transparency is visible", fix: "Explain how pages were researched, tested, or reviewed instead of presenting unsupported comparisons or conclusions" });
|
|
2482
|
+
}
|
|
2483
|
+
if (weak.length > 0) {
|
|
2484
|
+
findings.push({
|
|
2485
|
+
severity: "low",
|
|
2486
|
+
detail: `${weak.length} page(s) lack visible methodology or review context`,
|
|
2487
|
+
fix: "Add process detail such as sample size, criteria, tools used, review process, or update notes"
|
|
2488
|
+
});
|
|
2489
|
+
}
|
|
2490
|
+
return { criterion: "methodology_transparency", criterion_label: "Methodology Transparency", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P2" };
|
|
2491
|
+
}
|
|
2202
2492
|
function checkCitationReadyWriting(data) {
|
|
2203
2493
|
const findings = [];
|
|
2204
2494
|
if (!data.homepage) {
|
|
@@ -2294,8 +2584,8 @@ function checkAnswerFirstPlacement(data) {
|
|
|
2294
2584
|
const earlyParagraphs = bodyHtml.match(/<p[^>]*>([\s\S]*?)<\/p>/gi)?.slice(0, 5) || [];
|
|
2295
2585
|
for (const p of earlyParagraphs) {
|
|
2296
2586
|
const pText = p.replace(/<[^>]*>/g, "").trim();
|
|
2297
|
-
const
|
|
2298
|
-
if (
|
|
2587
|
+
const wordCount2 = pText.split(/\s+/).length;
|
|
2588
|
+
if (wordCount2 >= 40 && wordCount2 <= 80 && first300Words.includes(pText.slice(0, 50))) {
|
|
2299
2589
|
shortAnswerCount++;
|
|
2300
2590
|
break;
|
|
2301
2591
|
}
|
|
@@ -2837,14 +3127,19 @@ function auditSiteFromData(data) {
|
|
|
2837
3127
|
checkVisibleDateSignal(data),
|
|
2838
3128
|
topicCoherence,
|
|
2839
3129
|
checkContentDepth(data, topicCoherence.score),
|
|
2840
|
-
//
|
|
3130
|
+
// Helpful-content criteria (#29-#32)
|
|
3131
|
+
checkHelpfulPurposeAlignment(data),
|
|
3132
|
+
checkFirstHandExperienceSignals(data),
|
|
3133
|
+
checkCreatorTransparency(data),
|
|
3134
|
+
checkMethodologyTransparency(data),
|
|
3135
|
+
// V2 criteria (#33-#38)
|
|
2841
3136
|
checkCitationReadyWriting(data),
|
|
2842
3137
|
checkAnswerFirstPlacement(data),
|
|
2843
3138
|
checkEvidencePackaging(data),
|
|
2844
3139
|
checkEntityDisambiguation(data),
|
|
2845
3140
|
checkExtractionFriction(data),
|
|
2846
3141
|
checkImageContextAI(data),
|
|
2847
|
-
// V3 criteria (#
|
|
3142
|
+
// V3 criteria (#39-#40)
|
|
2848
3143
|
checkDuplicateContent(data),
|
|
2849
3144
|
checkCrossPageDuplication(data)
|
|
2850
3145
|
];
|
|
@@ -2870,6 +3165,10 @@ var WEIGHTS = {
|
|
|
2870
3165
|
// Relevance to actual AI queries
|
|
2871
3166
|
faq_section: 0.03,
|
|
2872
3167
|
// Structured Q&A pairs
|
|
3168
|
+
helpful_purpose_alignment: 0.03,
|
|
3169
|
+
// Visitor-helpful vs search-first framing
|
|
3170
|
+
first_hand_experience_signals: 0.03,
|
|
3171
|
+
// Evidence of real use or observation
|
|
2873
3172
|
// ─── Content Organization (~30%) ──────────────────────────────────────────
|
|
2874
3173
|
// HOW easily AI engines can extract and trust your content.
|
|
2875
3174
|
entity_consistency: 0.05,
|
|
@@ -2884,9 +3183,13 @@ var WEIGHTS = {
|
|
|
2884
3183
|
// Expert attribution
|
|
2885
3184
|
table_list_extractability: 0.03,
|
|
2886
3185
|
// Extractable structured data
|
|
2887
|
-
|
|
3186
|
+
creator_transparency: 0.02,
|
|
3187
|
+
// Visible author/reviewer clarity
|
|
3188
|
+
methodology_transparency: 0.02,
|
|
3189
|
+
// Process disclosure
|
|
3190
|
+
definition_patterns: 0.015,
|
|
2888
3191
|
// Clear definitions
|
|
2889
|
-
visible_date_signal: 0.
|
|
3192
|
+
visible_date_signal: 0.015,
|
|
2890
3193
|
// Publication date trust
|
|
2891
3194
|
semantic_html: 0.02,
|
|
2892
3195
|
// Clean semantic structure
|
|
@@ -2895,15 +3198,15 @@ var WEIGHTS = {
|
|
|
2895
3198
|
// ─── Technical Plumbing (~15%) ────────────────────────────────────────────
|
|
2896
3199
|
// WHETHER AI crawlers can find you. Table stakes with diminishing returns.
|
|
2897
3200
|
content_cannibalization: 0.02,
|
|
2898
|
-
llms_txt: 0.
|
|
2899
|
-
robots_txt: 0.
|
|
3201
|
+
llms_txt: 0.01,
|
|
3202
|
+
robots_txt: 0.01,
|
|
2900
3203
|
content_velocity: 0.02,
|
|
2901
|
-
content_licensing: 0.
|
|
3204
|
+
content_licensing: 0.01,
|
|
2902
3205
|
sitemap_completeness: 0.01,
|
|
2903
|
-
canonical_url:
|
|
2904
|
-
rss_feed:
|
|
2905
|
-
schema_coverage:
|
|
2906
|
-
speakable_schema:
|
|
3206
|
+
canonical_url: 5e-3,
|
|
3207
|
+
rss_feed: 5e-3,
|
|
3208
|
+
schema_coverage: 5e-3,
|
|
3209
|
+
speakable_schema: 5e-3,
|
|
2907
3210
|
// ─── V2 Criteria (~15%) ───────────────────────────────────────────────────
|
|
2908
3211
|
// Citation quality, evidence packaging, and extraction friction.
|
|
2909
3212
|
citation_ready_writing: 0.04,
|
|
@@ -2916,7 +3219,7 @@ var WEIGHTS = {
|
|
|
2916
3219
|
// Clear entity boundaries
|
|
2917
3220
|
extraction_friction: 0.02,
|
|
2918
3221
|
// Sentence length, voice, jargon
|
|
2919
|
-
image_context_ai:
|
|
3222
|
+
image_context_ai: 5e-3,
|
|
2920
3223
|
// Figure/figcaption, alt text quality
|
|
2921
3224
|
// ─── V3 Criteria ────────────────────────────────────────────────────────
|
|
2922
3225
|
duplicate_content: 0.05,
|
|
@@ -2936,8 +3239,8 @@ function calculateOverallScore(criteria) {
|
|
|
2936
3239
|
let score = Math.round(weightedSum / totalWeight);
|
|
2937
3240
|
const coherence = criteria.find((c) => c.criterion === "topic_coherence");
|
|
2938
3241
|
if (coherence && coherence.score < 6) {
|
|
2939
|
-
const
|
|
2940
|
-
score = Math.min(score,
|
|
3242
|
+
const cap3 = 35 + coherence.score * 5;
|
|
3243
|
+
score = Math.min(score, cap3);
|
|
2941
3244
|
}
|
|
2942
3245
|
return score;
|
|
2943
3246
|
}
|
|
@@ -2965,6 +3268,13 @@ function isSpaShell(html) {
|
|
|
2965
3268
|
return SPA_INDICATORS.some((pattern) => pattern.test(html));
|
|
2966
3269
|
}
|
|
2967
3270
|
async function fetchWithHeadless(url, options) {
|
|
3271
|
+
let expectedDomain;
|
|
3272
|
+
try {
|
|
3273
|
+
expectedDomain = normalizeHostname(new URL(url).hostname);
|
|
3274
|
+
} catch {
|
|
3275
|
+
return null;
|
|
3276
|
+
}
|
|
3277
|
+
if (!await isSafeFetchTarget(url, expectedDomain)) return null;
|
|
2968
3278
|
let puppeteer;
|
|
2969
3279
|
try {
|
|
2970
3280
|
const mod = "puppeteer";
|
|
@@ -2991,12 +3301,28 @@ async function fetchWithHeadless(url, options) {
|
|
|
2991
3301
|
const page = await browser.newPage();
|
|
2992
3302
|
await page.setRequestInterception(true);
|
|
2993
3303
|
page.on("request", (req) => {
|
|
2994
|
-
|
|
2995
|
-
|
|
2996
|
-
|
|
2997
|
-
|
|
2998
|
-
|
|
2999
|
-
|
|
3304
|
+
void (async () => {
|
|
3305
|
+
const alreadyHandled = typeof req.isInterceptResolutionHandled === "function" ? req.isInterceptResolutionHandled() : false;
|
|
3306
|
+
if (alreadyHandled) return;
|
|
3307
|
+
if (!await isSafeFetchTarget(req.url(), expectedDomain)) {
|
|
3308
|
+
try {
|
|
3309
|
+
if (!req.isInterceptResolutionHandled?.()) await req.abort();
|
|
3310
|
+
} catch {
|
|
3311
|
+
}
|
|
3312
|
+
return;
|
|
3313
|
+
}
|
|
3314
|
+
const type = req.resourceType();
|
|
3315
|
+
try {
|
|
3316
|
+
if (!req.isInterceptResolutionHandled?.()) {
|
|
3317
|
+
if (["image", "font", "media", "stylesheet"].includes(type)) {
|
|
3318
|
+
await req.abort();
|
|
3319
|
+
} else {
|
|
3320
|
+
await req.continue();
|
|
3321
|
+
}
|
|
3322
|
+
}
|
|
3323
|
+
} catch {
|
|
3324
|
+
}
|
|
3325
|
+
})();
|
|
3000
3326
|
});
|
|
3001
3327
|
await page.setUserAgent("AEO-Visibility-Bot/1.0");
|
|
3002
3328
|
await page.goto(url, { waitUntil: "networkidle2", timeout });
|
|
@@ -3009,6 +3335,7 @@ async function fetchWithHeadless(url, options) {
|
|
|
3009
3335
|
}
|
|
3010
3336
|
const html = await page.content();
|
|
3011
3337
|
const finalUrl = page.url();
|
|
3338
|
+
if (!await isSafeFetchTarget(finalUrl, expectedDomain)) return null;
|
|
3012
3339
|
return {
|
|
3013
3340
|
text: html.slice(0, 5e5),
|
|
3014
3341
|
status: 200,
|
|
@@ -3036,6 +3363,8 @@ var PILLARS = {
|
|
|
3036
3363
|
"citation_ready_writing",
|
|
3037
3364
|
"answer_first_placement",
|
|
3038
3365
|
"evidence_packaging",
|
|
3366
|
+
"helpful_purpose_alignment",
|
|
3367
|
+
"first_hand_experience_signals",
|
|
3039
3368
|
"duplicate_content",
|
|
3040
3369
|
"cross_page_duplication"
|
|
3041
3370
|
],
|
|
@@ -3053,7 +3382,9 @@ var PILLARS = {
|
|
|
3053
3382
|
"internal_linking",
|
|
3054
3383
|
"content_freshness",
|
|
3055
3384
|
"author_schema_depth",
|
|
3056
|
-
"schema_markup"
|
|
3385
|
+
"schema_markup",
|
|
3386
|
+
"creator_transparency",
|
|
3387
|
+
"methodology_transparency"
|
|
3057
3388
|
],
|
|
3058
3389
|
"Technical Foundation": [
|
|
3059
3390
|
"semantic_html",
|
|
@@ -3083,6 +3414,8 @@ var CLIENT_NAMES = {
|
|
|
3083
3414
|
citation_ready_writing: "Citation-Ready Writing",
|
|
3084
3415
|
answer_first_placement: "Answer-First Placement",
|
|
3085
3416
|
evidence_packaging: "Evidence Packaging",
|
|
3417
|
+
helpful_purpose_alignment: "Helpful Purpose Alignment",
|
|
3418
|
+
first_hand_experience_signals: "First-Hand Experience Signals",
|
|
3086
3419
|
direct_answer_density: "Direct Answer Density",
|
|
3087
3420
|
qa_content_format: "Q&A Content Format",
|
|
3088
3421
|
query_answer_alignment: "Query-Answer Alignment",
|
|
@@ -3095,6 +3428,8 @@ var CLIENT_NAMES = {
|
|
|
3095
3428
|
content_freshness: "Content Freshness",
|
|
3096
3429
|
author_schema_depth: "Author & Expert Schema",
|
|
3097
3430
|
schema_markup: "Schema Markup",
|
|
3431
|
+
creator_transparency: "Creator Transparency",
|
|
3432
|
+
methodology_transparency: "Methodology Transparency",
|
|
3098
3433
|
semantic_html: "Semantic HTML",
|
|
3099
3434
|
clean_html: "Clean HTML",
|
|
3100
3435
|
visible_date_signal: "Visible Date Signal",
|
|
@@ -3121,6 +3456,8 @@ var PILLAR_WEIGHTS = {
|
|
|
3121
3456
|
citation_ready_writing: 0.04,
|
|
3122
3457
|
answer_first_placement: 0.03,
|
|
3123
3458
|
evidence_packaging: 0.03,
|
|
3459
|
+
helpful_purpose_alignment: 0.03,
|
|
3460
|
+
first_hand_experience_signals: 0.03,
|
|
3124
3461
|
duplicate_content: 0.05,
|
|
3125
3462
|
cross_page_duplication: 0.03,
|
|
3126
3463
|
direct_answer_density: 0.05,
|
|
@@ -3128,28 +3465,30 @@ var PILLAR_WEIGHTS = {
|
|
|
3128
3465
|
query_answer_alignment: 0.04,
|
|
3129
3466
|
faq_section: 0.03,
|
|
3130
3467
|
table_list_extractability: 0.03,
|
|
3131
|
-
definition_patterns: 0.
|
|
3468
|
+
definition_patterns: 0.015,
|
|
3132
3469
|
entity_disambiguation: 0.02,
|
|
3133
3470
|
entity_consistency: 0.05,
|
|
3134
3471
|
internal_linking: 0.04,
|
|
3135
3472
|
content_freshness: 0.04,
|
|
3136
3473
|
author_schema_depth: 0.03,
|
|
3137
3474
|
schema_markup: 0.03,
|
|
3475
|
+
creator_transparency: 0.02,
|
|
3476
|
+
methodology_transparency: 0.02,
|
|
3138
3477
|
semantic_html: 0.02,
|
|
3139
3478
|
clean_html: 0.02,
|
|
3140
|
-
visible_date_signal: 0.
|
|
3479
|
+
visible_date_signal: 0.015,
|
|
3141
3480
|
extraction_friction: 0.02,
|
|
3142
|
-
image_context_ai:
|
|
3143
|
-
schema_coverage:
|
|
3144
|
-
speakable_schema:
|
|
3481
|
+
image_context_ai: 5e-3,
|
|
3482
|
+
schema_coverage: 5e-3,
|
|
3483
|
+
speakable_schema: 5e-3,
|
|
3145
3484
|
content_cannibalization: 0.02,
|
|
3146
|
-
llms_txt: 0.
|
|
3147
|
-
robots_txt: 0.
|
|
3485
|
+
llms_txt: 0.01,
|
|
3486
|
+
robots_txt: 0.01,
|
|
3148
3487
|
content_velocity: 0.02,
|
|
3149
|
-
content_licensing: 0.
|
|
3150
|
-
canonical_url:
|
|
3488
|
+
content_licensing: 0.01,
|
|
3489
|
+
canonical_url: 5e-3,
|
|
3151
3490
|
sitemap_completeness: 0.01,
|
|
3152
|
-
rss_feed:
|
|
3491
|
+
rss_feed: 5e-3
|
|
3153
3492
|
};
|
|
3154
3493
|
var CRITERION_EFFORT = {
|
|
3155
3494
|
topic_coherence: "High",
|
|
@@ -3159,6 +3498,8 @@ var CRITERION_EFFORT = {
|
|
|
3159
3498
|
citation_ready_writing: "Medium",
|
|
3160
3499
|
answer_first_placement: "Medium",
|
|
3161
3500
|
evidence_packaging: "Medium",
|
|
3501
|
+
helpful_purpose_alignment: "Medium",
|
|
3502
|
+
first_hand_experience_signals: "Medium",
|
|
3162
3503
|
duplicate_content: "Medium",
|
|
3163
3504
|
cross_page_duplication: "Medium",
|
|
3164
3505
|
direct_answer_density: "Medium",
|
|
@@ -3173,6 +3514,8 @@ var CRITERION_EFFORT = {
|
|
|
3173
3514
|
content_freshness: "Low",
|
|
3174
3515
|
author_schema_depth: "Low",
|
|
3175
3516
|
schema_markup: "Medium",
|
|
3517
|
+
creator_transparency: "Low",
|
|
3518
|
+
methodology_transparency: "Low",
|
|
3176
3519
|
semantic_html: "Low",
|
|
3177
3520
|
clean_html: "Medium",
|
|
3178
3521
|
visible_date_signal: "Low",
|
|
@@ -3197,6 +3540,8 @@ var FIX_DESCRIPTIONS = {
|
|
|
3197
3540
|
citation_ready_writing: "Write self-contained definition sentences and one-claim statements AI can quote directly.",
|
|
3198
3541
|
answer_first_placement: "Place a 40-80 word answer block in the first 300 words of each page.",
|
|
3199
3542
|
evidence_packaging: "Add inline citations, attribution phrases, and a sources section to key pages.",
|
|
3543
|
+
helpful_purpose_alignment: "Lead with useful, task-solving guidance instead of search-first filler.",
|
|
3544
|
+
first_hand_experience_signals: "Add concrete signs of first-hand use, testing, implementation, or observation.",
|
|
3200
3545
|
direct_answer_density: "Write concise 2-3 sentence answer paragraphs after each question heading.",
|
|
3201
3546
|
qa_content_format: "Add question-based H2/H3 headings matching common AI queries.",
|
|
3202
3547
|
query_answer_alignment: "Ensure every question heading is followed by a direct answer paragraph.",
|
|
@@ -3209,6 +3554,8 @@ var FIX_DESCRIPTIONS = {
|
|
|
3209
3554
|
content_freshness: "Add dateModified schema and visible last-updated dates.",
|
|
3210
3555
|
author_schema_depth: "Add Person schema for authors with credentials and sameAs links.",
|
|
3211
3556
|
schema_markup: "Implement JSON-LD structured data on key pages.",
|
|
3557
|
+
creator_transparency: "Show clear visible bylines, author pages, and reviewer details where readers expect them.",
|
|
3558
|
+
methodology_transparency: "Explain how content was tested, researched, reviewed, or updated.",
|
|
3212
3559
|
semantic_html: "Use semantic HTML5 elements (main, article, nav, header, footer).",
|
|
3213
3560
|
clean_html: "Fix HTML structure, add meta tags, and ensure HTTPS.",
|
|
3214
3561
|
visible_date_signal: "Display dates using <time> elements and add datePublished to JSON-LD.",
|
|
@@ -3308,6 +3655,10 @@ var CRITERION_LABELS = {
|
|
|
3308
3655
|
"Visible Date Signal": "Visible Date Signal",
|
|
3309
3656
|
"Topic Coherence": "Topic Coherence",
|
|
3310
3657
|
"Content Depth": "Content Depth",
|
|
3658
|
+
"Helpful Purpose Alignment": "Helpful Purpose Alignment",
|
|
3659
|
+
"First-Hand Experience Signals": "First-Hand Experience Signals",
|
|
3660
|
+
"Creator Transparency": "Creator Transparency",
|
|
3661
|
+
"Methodology Transparency": "Methodology Transparency",
|
|
3311
3662
|
"Citation-Ready Writing Quality": "Citation-Ready Writing Quality",
|
|
3312
3663
|
"Answer-First Placement": "Answer-First Placement",
|
|
3313
3664
|
"Evidence Packaging": "Evidence Packaging",
|
|
@@ -3413,6 +3764,8 @@ var CRITERION_WEIGHTS = {
|
|
|
3413
3764
|
qa_content_format: 0.04,
|
|
3414
3765
|
query_answer_alignment: 0.04,
|
|
3415
3766
|
faq_section: 0.03,
|
|
3767
|
+
helpful_purpose_alignment: 0.03,
|
|
3768
|
+
first_hand_experience_signals: 0.03,
|
|
3416
3769
|
// Content Organization (~30%)
|
|
3417
3770
|
entity_consistency: 0.05,
|
|
3418
3771
|
internal_linking: 0.04,
|
|
@@ -3420,28 +3773,30 @@ var CRITERION_WEIGHTS = {
|
|
|
3420
3773
|
schema_markup: 0.03,
|
|
3421
3774
|
author_schema_depth: 0.03,
|
|
3422
3775
|
table_list_extractability: 0.03,
|
|
3423
|
-
|
|
3424
|
-
|
|
3776
|
+
creator_transparency: 0.02,
|
|
3777
|
+
methodology_transparency: 0.02,
|
|
3778
|
+
definition_patterns: 0.015,
|
|
3779
|
+
visible_date_signal: 0.015,
|
|
3425
3780
|
semantic_html: 0.02,
|
|
3426
3781
|
clean_html: 0.02,
|
|
3427
3782
|
// Technical Plumbing (~15%)
|
|
3428
3783
|
content_cannibalization: 0.02,
|
|
3429
|
-
llms_txt: 0.
|
|
3430
|
-
robots_txt: 0.
|
|
3784
|
+
llms_txt: 0.01,
|
|
3785
|
+
robots_txt: 0.01,
|
|
3431
3786
|
content_velocity: 0.02,
|
|
3432
|
-
content_licensing: 0.
|
|
3787
|
+
content_licensing: 0.01,
|
|
3433
3788
|
sitemap_completeness: 0.01,
|
|
3434
|
-
canonical_url:
|
|
3435
|
-
rss_feed:
|
|
3436
|
-
schema_coverage:
|
|
3437
|
-
speakable_schema:
|
|
3789
|
+
canonical_url: 5e-3,
|
|
3790
|
+
rss_feed: 5e-3,
|
|
3791
|
+
schema_coverage: 5e-3,
|
|
3792
|
+
speakable_schema: 5e-3,
|
|
3438
3793
|
// V2 Criteria (~15%)
|
|
3439
3794
|
citation_ready_writing: 0.04,
|
|
3440
3795
|
answer_first_placement: 0.03,
|
|
3441
3796
|
evidence_packaging: 0.03,
|
|
3442
3797
|
entity_disambiguation: 0.02,
|
|
3443
3798
|
extraction_friction: 0.02,
|
|
3444
|
-
image_context_ai:
|
|
3799
|
+
image_context_ai: 5e-3,
|
|
3445
3800
|
// V3 Criteria
|
|
3446
3801
|
duplicate_content: 0.05,
|
|
3447
3802
|
cross_page_duplication: 0.03
|
|
@@ -3482,6 +3837,16 @@ var OPPORTUNITY_TEMPLATES = {
|
|
|
3482
3837
|
effort: "Medium",
|
|
3483
3838
|
description: "Create a dedicated FAQ page with FAQPage schema markup. Cover common questions about your products, services, and industry to become a direct answer source for AI engines."
|
|
3484
3839
|
},
|
|
3840
|
+
helpful_purpose_alignment: {
|
|
3841
|
+
name: "Improve Helpful Purpose Alignment",
|
|
3842
|
+
effort: "Medium",
|
|
3843
|
+
description: "Rewrite pages to solve the visitor task quickly and concretely. Reduce generic intros, search-first filler, and CTA interruptions before the first useful answer."
|
|
3844
|
+
},
|
|
3845
|
+
first_hand_experience_signals: {
|
|
3846
|
+
name: "Add First-Hand Experience Signals",
|
|
3847
|
+
effort: "Medium",
|
|
3848
|
+
description: "Show direct use, testing, implementation, or lived experience with concrete observations, examples, screenshots, and lessons learned."
|
|
3849
|
+
},
|
|
3485
3850
|
original_data: {
|
|
3486
3851
|
name: "Add Original Data & Case Studies",
|
|
3487
3852
|
effort: "High",
|
|
@@ -3537,6 +3902,16 @@ var OPPORTUNITY_TEMPLATES = {
|
|
|
3537
3902
|
effort: "Low",
|
|
3538
3903
|
description: "Add Person schema for content authors with credentials, expertise, and sameAs links. Expert attribution strengthens E-E-A-T signals that AI engines use to evaluate source credibility."
|
|
3539
3904
|
},
|
|
3905
|
+
creator_transparency: {
|
|
3906
|
+
name: "Improve Creator Transparency",
|
|
3907
|
+
effort: "Low",
|
|
3908
|
+
description: "Add visible bylines, author pages, and reviewer/editor details so readers can clearly tell who created the content and why they are credible."
|
|
3909
|
+
},
|
|
3910
|
+
methodology_transparency: {
|
|
3911
|
+
name: "Add Methodology Transparency",
|
|
3912
|
+
effort: "Low",
|
|
3913
|
+
description: "Explain how pages were tested, researched, reviewed, or updated. Add methodology, criteria, sample-size, or review-process details where users expect them."
|
|
3914
|
+
},
|
|
3540
3915
|
fact_density: {
|
|
3541
3916
|
name: "Increase Fact & Data Density",
|
|
3542
3917
|
effort: "Medium",
|
|
@@ -3796,20 +4171,12 @@ function formatList(items) {
|
|
|
3796
4171
|
}
|
|
3797
4172
|
|
|
3798
4173
|
// src/multi-page-fetcher.ts
|
|
3799
|
-
async function fetchPage(url, timeoutMs = 1e4) {
|
|
3800
|
-
|
|
3801
|
-
|
|
3802
|
-
|
|
3803
|
-
|
|
3804
|
-
|
|
3805
|
-
});
|
|
3806
|
-
if (res.status !== 200) return null;
|
|
3807
|
-
const text = await res.text();
|
|
3808
|
-
if (text.length < 200) return null;
|
|
3809
|
-
return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
|
|
3810
|
-
} catch {
|
|
3811
|
-
return null;
|
|
3812
|
-
}
|
|
4174
|
+
async function fetchPage(url, domain, timeoutMs = 1e4) {
|
|
4175
|
+
const res = await safeFetch(url, { timeoutMs, expectedDomain: domain });
|
|
4176
|
+
if (!res || res.status !== 200) return null;
|
|
4177
|
+
const text = await res.text();
|
|
4178
|
+
if (text.length < 200) return null;
|
|
4179
|
+
return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
|
|
3813
4180
|
}
|
|
3814
4181
|
var PAGE_VARIANTS = {
|
|
3815
4182
|
about: ["/about", "/about-us", "/company", "/who-we-are"],
|
|
@@ -3965,7 +4332,7 @@ async function fetchMultiPageData(siteData, options) {
|
|
|
3965
4332
|
}
|
|
3966
4333
|
const entries = Array.from(urlsToFetch.entries());
|
|
3967
4334
|
if (entries.length === 0) return 0;
|
|
3968
|
-
const results = await Promise.all(entries.map(([url]) => fetchPage(url, timeoutMs)));
|
|
4335
|
+
const results = await Promise.all(entries.map(([url]) => fetchPage(url, siteData.domain, timeoutMs)));
|
|
3969
4336
|
if (!siteData.blogSample) siteData.blogSample = [];
|
|
3970
4337
|
let added = 0;
|
|
3971
4338
|
for (let i = 0; i < results.length; i++) {
|
|
@@ -3992,19 +4359,23 @@ var PAGE_CRITERIA = {
|
|
|
3992
4359
|
content_freshness: { weight: 0.04, label: "Content Freshness Signals" },
|
|
3993
4360
|
schema_markup: { weight: 0.03, label: "Schema.org Structured Data" },
|
|
3994
4361
|
table_list_extractability: { weight: 0.03, label: "Table & List Extractability" },
|
|
3995
|
-
definition_patterns: { weight: 0.
|
|
3996
|
-
visible_date_signal: { weight: 0.
|
|
4362
|
+
definition_patterns: { weight: 0.015, label: "Definition Patterns" },
|
|
4363
|
+
visible_date_signal: { weight: 0.015, label: "Visible Date Signal" },
|
|
3997
4364
|
semantic_html: { weight: 0.02, label: "Semantic HTML5 & Accessibility" },
|
|
3998
4365
|
clean_html: { weight: 0.02, label: "Clean, Crawlable HTML" },
|
|
3999
4366
|
// Technical Plumbing
|
|
4000
|
-
canonical_url: { weight:
|
|
4367
|
+
canonical_url: { weight: 5e-3, label: "Canonical URL Strategy" },
|
|
4001
4368
|
// V2 Criteria
|
|
4002
4369
|
citation_ready_writing: { weight: 0.04, label: "Citation-Ready Writing Quality" },
|
|
4003
4370
|
answer_first_placement: { weight: 0.03, label: "Answer-First Placement" },
|
|
4004
4371
|
evidence_packaging: { weight: 0.03, label: "Evidence Packaging" },
|
|
4372
|
+
helpful_purpose_alignment: { weight: 0.03, label: "Helpful Purpose Alignment" },
|
|
4373
|
+
first_hand_experience_signals: { weight: 0.03, label: "First-Hand Experience Signals" },
|
|
4005
4374
|
entity_disambiguation: { weight: 0.02, label: "Entity Disambiguation" },
|
|
4006
4375
|
extraction_friction: { weight: 0.02, label: "Extraction Friction Score" },
|
|
4007
|
-
|
|
4376
|
+
creator_transparency: { weight: 0.02, label: "Creator Transparency" },
|
|
4377
|
+
methodology_transparency: { weight: 0.02, label: "Methodology Transparency" },
|
|
4378
|
+
image_context_ai: { weight: 5e-3, label: "Image Context for AI" },
|
|
4008
4379
|
duplicate_content: { weight: 0.05, label: "Duplicate Content Blocks" }
|
|
4009
4380
|
};
|
|
4010
4381
|
function extractJsonLdBlocks(html) {
|
|
@@ -4027,7 +4398,7 @@ function extractTypesFromJsonLd(blocks) {
|
|
|
4027
4398
|
}
|
|
4028
4399
|
return types;
|
|
4029
4400
|
}
|
|
4030
|
-
function
|
|
4401
|
+
function getTextContent2(html) {
|
|
4031
4402
|
return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
|
|
4032
4403
|
}
|
|
4033
4404
|
function extractQuestionHeadings2(html) {
|
|
@@ -4055,7 +4426,7 @@ function countAnsweredQuestions(html) {
|
|
|
4055
4426
|
}
|
|
4056
4427
|
return { total: questions.length, answered };
|
|
4057
4428
|
}
|
|
4058
|
-
function
|
|
4429
|
+
function cap2(value, max) {
|
|
4059
4430
|
return Math.min(value, max);
|
|
4060
4431
|
}
|
|
4061
4432
|
function scoreSchemaMarkup(html) {
|
|
@@ -4081,10 +4452,10 @@ function scoreSchemaMarkup(html) {
|
|
|
4081
4452
|
for (const t of types) {
|
|
4082
4453
|
if (knownTypes.includes(t)) knownCount++;
|
|
4083
4454
|
}
|
|
4084
|
-
score +=
|
|
4455
|
+
score += cap2(knownCount * 2, 4);
|
|
4085
4456
|
if (types.has("Organization") || types.has("LocalBusiness")) score += 2;
|
|
4086
4457
|
if (types.has("FAQPage")) score += 1;
|
|
4087
|
-
return
|
|
4458
|
+
return cap2(score, 10);
|
|
4088
4459
|
}
|
|
4089
4460
|
function scoreQAFormat(html) {
|
|
4090
4461
|
const questions = extractQuestionHeadings2(html);
|
|
@@ -4096,7 +4467,7 @@ function scoreQAFormat(html) {
|
|
|
4096
4467
|
if (answered >= 1) score += 3;
|
|
4097
4468
|
const h1Matches = html.match(/<h1[\s>]/gi) || [];
|
|
4098
4469
|
if (h1Matches.length === 1) score += 2;
|
|
4099
|
-
return
|
|
4470
|
+
return cap2(score, 10);
|
|
4100
4471
|
}
|
|
4101
4472
|
function scoreCleanHtml(html) {
|
|
4102
4473
|
let score = 0;
|
|
@@ -4105,15 +4476,15 @@ function scoreCleanHtml(html) {
|
|
|
4105
4476
|
for (const tag of semantics) {
|
|
4106
4477
|
if (html.toLowerCase().includes(tag)) semCount++;
|
|
4107
4478
|
}
|
|
4108
|
-
score +=
|
|
4479
|
+
score += cap2(semCount, 3);
|
|
4109
4480
|
const h1Matches = html.match(/<h1[\s>]/gi) || [];
|
|
4110
4481
|
if (h1Matches.length === 1) score += 2;
|
|
4111
|
-
const text =
|
|
4482
|
+
const text = getTextContent2(html);
|
|
4112
4483
|
if (text.length > 500) score += 3;
|
|
4113
4484
|
const hasTitle = /<title[^>]*>[^<]+<\/title>/i.test(html);
|
|
4114
4485
|
const hasDesc = /<meta\s[^>]*name=["']description["'][^>]*content=["'][^"']+["']/i.test(html) || /<meta\s[^>]*content=["'][^"']+["'][^>]*name=["']description["']/i.test(html);
|
|
4115
4486
|
if (hasTitle && hasDesc) score += 2;
|
|
4116
|
-
return
|
|
4487
|
+
return cap2(score, 10);
|
|
4117
4488
|
}
|
|
4118
4489
|
function scoreFaqSection(html) {
|
|
4119
4490
|
let score = 0;
|
|
@@ -4125,11 +4496,11 @@ function scoreFaqSection(html) {
|
|
|
4125
4496
|
const questions = extractQuestionHeadings2(html);
|
|
4126
4497
|
if (questions.length >= 10) score += 1;
|
|
4127
4498
|
if (/<details[\s>]/i.test(html) || /accordion|collapsible|toggle/i.test(lowerHtml)) score += 1;
|
|
4128
|
-
return
|
|
4499
|
+
return cap2(score, 10);
|
|
4129
4500
|
}
|
|
4130
4501
|
function scoreOriginalData(html) {
|
|
4131
4502
|
let score = 0;
|
|
4132
|
-
const text =
|
|
4503
|
+
const text = getTextContent2(html);
|
|
4133
4504
|
if (/\b(our (study|analysis|research|survey|data|findings))\b/i.test(text)) {
|
|
4134
4505
|
score += 3;
|
|
4135
4506
|
} else if (/\d+(\.\d+)?%|\$[\d,.]+|\b\d{1,3}(,\d{3})+\b/.test(text)) {
|
|
@@ -4146,7 +4517,7 @@ function scoreOriginalData(html) {
|
|
|
4146
4517
|
if (/href=["'][^"']*\/blog\b/i.test(html)) {
|
|
4147
4518
|
score += 2;
|
|
4148
4519
|
}
|
|
4149
|
-
return
|
|
4520
|
+
return cap2(score, 10);
|
|
4150
4521
|
}
|
|
4151
4522
|
function scoreQueryAnswerAlignment(html) {
|
|
4152
4523
|
const { total, answered } = countAnsweredQuestions(html);
|
|
@@ -4169,7 +4540,7 @@ function scoreContentFreshness(html) {
|
|
|
4169
4540
|
const currentYear = (/* @__PURE__ */ new Date()).getFullYear();
|
|
4170
4541
|
const yearPattern = new RegExp(`\\b(${currentYear}|${currentYear - 1})\\b`);
|
|
4171
4542
|
if (yearPattern.test(html)) score += 2;
|
|
4172
|
-
return
|
|
4543
|
+
return cap2(score, 10);
|
|
4173
4544
|
}
|
|
4174
4545
|
function scoreTableListExtractability(html) {
|
|
4175
4546
|
let score = 0;
|
|
@@ -4182,7 +4553,7 @@ function scoreTableListExtractability(html) {
|
|
|
4182
4553
|
const listItems = html.match(/<li[\s>]/gi) || [];
|
|
4183
4554
|
if (listItems.length >= 10) score += 1;
|
|
4184
4555
|
if (/<dl[\s>]/i.test(html)) score += 1;
|
|
4185
|
-
return
|
|
4556
|
+
return cap2(score, 10);
|
|
4186
4557
|
}
|
|
4187
4558
|
function scoreDirectAnswerDensity(html) {
|
|
4188
4559
|
let score = 0;
|
|
@@ -4198,9 +4569,9 @@ function scoreDirectAnswerDensity(html) {
|
|
|
4198
4569
|
}
|
|
4199
4570
|
if (snippetCount >= 3) score += 2;
|
|
4200
4571
|
else if (snippetCount >= 1) score += 1;
|
|
4201
|
-
const directOpeners =
|
|
4572
|
+
const directOpeners = getTextContent2(html).match(/\b(yes|no|in short|the answer is|simply put|in summary)\b/gi) || [];
|
|
4202
4573
|
if (directOpeners.length >= 2) score += 2;
|
|
4203
|
-
return
|
|
4574
|
+
return cap2(score, 10);
|
|
4204
4575
|
}
|
|
4205
4576
|
function scoreSemanticHtml(html) {
|
|
4206
4577
|
let score = 0;
|
|
@@ -4210,7 +4581,7 @@ function scoreSemanticHtml(html) {
|
|
|
4210
4581
|
for (const el of elements) {
|
|
4211
4582
|
if (lowerHtml.includes(el)) count++;
|
|
4212
4583
|
}
|
|
4213
|
-
score +=
|
|
4584
|
+
score += cap2(Math.floor(count * 0.7), 4);
|
|
4214
4585
|
const imgTags = html.match(/<img\s[^>]*>/gi) || [];
|
|
4215
4586
|
if (imgTags.length > 0) {
|
|
4216
4587
|
let withAlt = 0;
|
|
@@ -4221,11 +4592,11 @@ function scoreSemanticHtml(html) {
|
|
|
4221
4592
|
}
|
|
4222
4593
|
if (/<html[^>]*\slang=["'][^"']+["']/i.test(html)) score += 2;
|
|
4223
4594
|
if (/\baria-/i.test(html)) score += 2;
|
|
4224
|
-
return
|
|
4595
|
+
return cap2(score, 10);
|
|
4225
4596
|
}
|
|
4226
4597
|
function scoreFactDensity(html) {
|
|
4227
4598
|
let score = 0;
|
|
4228
|
-
const text =
|
|
4599
|
+
const text = getTextContent2(html);
|
|
4229
4600
|
const numericPatterns = text.match(/\d+(\.\d+)?%|\$[\d,.]+|\b\d{1,3}(,\d{3})+\b|\b\d+\s*(million|billion|thousand|users|customers|employees)\b/gi) || [];
|
|
4230
4601
|
if (numericPatterns.length >= 6) score += 5;
|
|
4231
4602
|
else if (numericPatterns.length >= 3) score += 3;
|
|
@@ -4238,11 +4609,11 @@ function scoreFactDensity(html) {
|
|
|
4238
4609
|
if (/\baccording to\b|\bsource:\s|\bcited\b|\breported by\b/i.test(text)) score += 2;
|
|
4239
4610
|
const units = text.match(/\b\d+\s*(kg|lb|miles|km|hours|minutes|days|months|years|GB|MB|TB)\b/gi) || [];
|
|
4240
4611
|
if (units.length >= 2) score += 1;
|
|
4241
|
-
return
|
|
4612
|
+
return cap2(score, 10);
|
|
4242
4613
|
}
|
|
4243
4614
|
function scoreDefinitionPatterns(html) {
|
|
4244
4615
|
let score = 0;
|
|
4245
|
-
const text =
|
|
4616
|
+
const text = getTextContent2(html);
|
|
4246
4617
|
const defPatterns = text.match(/\b(is a|is an|refers to|defined as|means that|also known as|abbreviated as)\b/gi) || [];
|
|
4247
4618
|
if (defPatterns.length >= 3) score += 5;
|
|
4248
4619
|
else if (defPatterns.length >= 1) score += 3;
|
|
@@ -4250,7 +4621,7 @@ function scoreDefinitionPatterns(html) {
|
|
|
4250
4621
|
if (/\b(is a|is an|refers to|defined as)\b/i.test(early)) score += 2;
|
|
4251
4622
|
if (/<dfn[\s>]/i.test(html) || /<abbr[\s>]/i.test(html)) score += 1;
|
|
4252
4623
|
if (/<dl[\s>]/i.test(html) || /glossary/i.test(html)) score += 2;
|
|
4253
|
-
return
|
|
4624
|
+
return cap2(score, 10);
|
|
4254
4625
|
}
|
|
4255
4626
|
function scoreCanonicalUrl(html, url) {
|
|
4256
4627
|
let score = 0;
|
|
@@ -4271,7 +4642,7 @@ function scoreCanonicalUrl(html, url) {
|
|
|
4271
4642
|
if (canonicalHref.startsWith("https://")) score += 2;
|
|
4272
4643
|
const allCanonicals = html.match(/<link[^>]*rel=["']canonical["'][^>]*>/gi) || [];
|
|
4273
4644
|
if (allCanonicals.length === 1) score += 1;
|
|
4274
|
-
return
|
|
4645
|
+
return cap2(score, 10);
|
|
4275
4646
|
}
|
|
4276
4647
|
function scoreVisibleDateSignal(html) {
|
|
4277
4648
|
let score = 0;
|
|
@@ -4290,11 +4661,11 @@ function scoreVisibleDateSignal(html) {
|
|
|
4290
4661
|
} catch {
|
|
4291
4662
|
}
|
|
4292
4663
|
}
|
|
4293
|
-
return
|
|
4664
|
+
return cap2(score, 10);
|
|
4294
4665
|
}
|
|
4295
4666
|
function scoreCitationReadyWriting(html) {
|
|
4296
4667
|
let score = 0;
|
|
4297
|
-
const text =
|
|
4668
|
+
const text = getTextContent2(html);
|
|
4298
4669
|
const defSentences = text.match(/\b\w+\s+(is\s+(?:a|an)\s|refers\s+to|defined\s+as)\b/gi) || [];
|
|
4299
4670
|
if (defSentences.length >= 3) score += 3;
|
|
4300
4671
|
else if (defSentences.length >= 1) score += 1;
|
|
@@ -4323,7 +4694,7 @@ function scoreCitationReadyWriting(html) {
|
|
|
4323
4694
|
);
|
|
4324
4695
|
if (quotableLines.length >= 2) score += 2;
|
|
4325
4696
|
else if (quotableLines.length >= 1) score += 1;
|
|
4326
|
-
return
|
|
4697
|
+
return cap2(score, 10);
|
|
4327
4698
|
}
|
|
4328
4699
|
function scoreAnswerFirstPlacement(html) {
|
|
4329
4700
|
let score = 0;
|
|
@@ -4334,8 +4705,8 @@ function scoreAnswerFirstPlacement(html) {
|
|
|
4334
4705
|
const first300Words = bodyText.split(/\s+/).slice(0, 300).join(" ");
|
|
4335
4706
|
for (const p of earlyParagraphs) {
|
|
4336
4707
|
const pText = p.replace(/<[^>]*>/g, "").trim();
|
|
4337
|
-
const
|
|
4338
|
-
if (
|
|
4708
|
+
const wordCount2 = pText.split(/\s+/).length;
|
|
4709
|
+
if (wordCount2 >= 40 && wordCount2 <= 80 && first300Words.includes(pText.slice(0, 50))) {
|
|
4339
4710
|
score += 4;
|
|
4340
4711
|
break;
|
|
4341
4712
|
}
|
|
@@ -4356,11 +4727,11 @@ function scoreAnswerFirstPlacement(html) {
|
|
|
4356
4727
|
score += 3;
|
|
4357
4728
|
}
|
|
4358
4729
|
}
|
|
4359
|
-
return
|
|
4730
|
+
return cap2(score, 10);
|
|
4360
4731
|
}
|
|
4361
4732
|
function scoreEvidencePackaging(html) {
|
|
4362
4733
|
let score = 0;
|
|
4363
|
-
const text =
|
|
4734
|
+
const text = getTextContent2(html);
|
|
4364
4735
|
const paragraphs = html.match(/<p[^>]*>[\s\S]*?<\/p>/gi) || [];
|
|
4365
4736
|
let inlineCitations = 0;
|
|
4366
4737
|
for (const p of paragraphs) {
|
|
@@ -4378,11 +4749,11 @@ function scoreEvidencePackaging(html) {
|
|
|
4378
4749
|
const sourcedStats = text.match(/\d+(\.\d+)?(%|\s*(million|billion|thousand|percent))\b[^.]*\b[A-Z][a-z]+\b/gi) || [];
|
|
4379
4750
|
if (sourcedStats.length >= 2) score += 2;
|
|
4380
4751
|
else if (sourcedStats.length >= 1) score += 1;
|
|
4381
|
-
return
|
|
4752
|
+
return cap2(score, 10);
|
|
4382
4753
|
}
|
|
4383
4754
|
function scoreEntityDisambiguation(html) {
|
|
4384
4755
|
let score = 0;
|
|
4385
|
-
const text =
|
|
4756
|
+
const text = getTextContent2(html);
|
|
4386
4757
|
const h1Match = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
|
|
4387
4758
|
if (!h1Match) return 3;
|
|
4388
4759
|
const h1Text = h1Match[1].replace(/<[^>]*>/g, "").trim();
|
|
@@ -4400,11 +4771,11 @@ function scoreEntityDisambiguation(html) {
|
|
|
4400
4771
|
if (/\bunlike\s+\w/i.test(text) || /\bcompared\s+to\s+\w/i.test(text) || /\bnot\s+to\s+be\s+confused\s+with\b/i.test(text)) {
|
|
4401
4772
|
score += 3;
|
|
4402
4773
|
}
|
|
4403
|
-
return
|
|
4774
|
+
return cap2(score, 10);
|
|
4404
4775
|
}
|
|
4405
4776
|
function scoreExtractionFriction(html) {
|
|
4406
4777
|
let score = 0;
|
|
4407
|
-
const text =
|
|
4778
|
+
const text = getTextContent2(html);
|
|
4408
4779
|
const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 5);
|
|
4409
4780
|
const avgLen = sentences.length > 0 ? sentences.map((s) => s.trim().split(/\s+/).length).reduce((a, b) => a + b, 0) / sentences.length : 0;
|
|
4410
4781
|
if (avgLen > 0 && avgLen < 20) score += 3;
|
|
@@ -4427,7 +4798,7 @@ function scoreExtractionFriction(html) {
|
|
|
4427
4798
|
if (/display\s*:\s*none|visibility\s*:\s*hidden/i.test(html) && /<(div|section|p)[^>]*(?:style=["'][^"']*display\s*:\s*none|hidden)[^>]*>/i.test(html)) {
|
|
4428
4799
|
score = Math.max(0, score - 2);
|
|
4429
4800
|
}
|
|
4430
|
-
return
|
|
4801
|
+
return cap2(score, 10);
|
|
4431
4802
|
}
|
|
4432
4803
|
function scoreImageContextAI(html) {
|
|
4433
4804
|
let score = 0;
|
|
@@ -4452,7 +4823,7 @@ function scoreImageContextAI(html) {
|
|
|
4452
4823
|
else if (goodAltCount > 0) score += 1;
|
|
4453
4824
|
const contextualImages = html.match(/<(article|section)[^>]*>[\s\S]*?<img[^>]*>[\s\S]*?<\/\1>/gi) || [];
|
|
4454
4825
|
if (contextualImages.length > 0) score += 3;
|
|
4455
|
-
return
|
|
4826
|
+
return cap2(score, 10);
|
|
4456
4827
|
}
|
|
4457
4828
|
function scoreDuplicateContent(html) {
|
|
4458
4829
|
return scoreDuplicateContentDetailed(html).score;
|
|
@@ -4514,8 +4885,12 @@ var SCORING_FUNCTIONS = {
|
|
|
4514
4885
|
citation_ready_writing: scoreCitationReadyWriting,
|
|
4515
4886
|
answer_first_placement: scoreAnswerFirstPlacement,
|
|
4516
4887
|
evidence_packaging: scoreEvidencePackaging,
|
|
4888
|
+
helpful_purpose_alignment: scoreHelpfulPurposeAlignment,
|
|
4889
|
+
first_hand_experience_signals: scoreFirstHandExperienceSignals,
|
|
4517
4890
|
entity_disambiguation: scoreEntityDisambiguation,
|
|
4518
4891
|
extraction_friction: scoreExtractionFriction,
|
|
4892
|
+
creator_transparency: scoreCreatorTransparency,
|
|
4893
|
+
methodology_transparency: scoreMethodologyTransparency,
|
|
4519
4894
|
image_context_ai: scoreImageContextAI,
|
|
4520
4895
|
duplicate_content: scoreDuplicateContent
|
|
4521
4896
|
};
|
|
@@ -4546,7 +4921,7 @@ function extractTitle(html) {
|
|
|
4546
4921
|
const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
4547
4922
|
return match ? match[1].replace(/\s+/g, " ").trim() : "";
|
|
4548
4923
|
}
|
|
4549
|
-
function
|
|
4924
|
+
function getTextContent3(html) {
|
|
4550
4925
|
return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
|
|
4551
4926
|
}
|
|
4552
4927
|
function countWords2(text) {
|
|
@@ -4606,9 +4981,9 @@ function checkMissingOgTags(html) {
|
|
|
4606
4981
|
}
|
|
4607
4982
|
return null;
|
|
4608
4983
|
}
|
|
4609
|
-
function checkThinContent(
|
|
4610
|
-
if (
|
|
4611
|
-
return { check: "thin-content", label: `Thin content (${
|
|
4984
|
+
function checkThinContent(wordCount2) {
|
|
4985
|
+
if (wordCount2 < 300) {
|
|
4986
|
+
return { check: "thin-content", label: `Thin content (${wordCount2} words)`, severity: "warning" };
|
|
4612
4987
|
}
|
|
4613
4988
|
return null;
|
|
4614
4989
|
}
|
|
@@ -4705,15 +5080,15 @@ function checkNoAnswerBlock(html) {
|
|
|
4705
5080
|
const first300Words = bodyText.split(/\s+/).slice(0, 300).join(" ");
|
|
4706
5081
|
for (const p of earlyParagraphs) {
|
|
4707
5082
|
const pText = p.replace(/<[^>]*>/g, "").trim();
|
|
4708
|
-
const
|
|
4709
|
-
if (
|
|
5083
|
+
const wordCount2 = pText.split(/\s+/).length;
|
|
5084
|
+
if (wordCount2 >= 40 && wordCount2 <= 80 && first300Words.includes(pText.slice(0, 50))) {
|
|
4710
5085
|
return null;
|
|
4711
5086
|
}
|
|
4712
5087
|
}
|
|
4713
5088
|
return { check: "no-answer-block", label: "No short answer block (40-80 words) in first 300 words", severity: "warning" };
|
|
4714
5089
|
}
|
|
4715
5090
|
function checkNoEvidence(html, url) {
|
|
4716
|
-
const text =
|
|
5091
|
+
const text = getTextContent3(html);
|
|
4717
5092
|
const paragraphs = html.match(/<p[^>]*>[\s\S]*?<\/p>/gi) || [];
|
|
4718
5093
|
let inlineCitations = 0;
|
|
4719
5094
|
for (const p of paragraphs) {
|
|
@@ -4727,7 +5102,7 @@ function checkNoEvidence(html, url) {
|
|
|
4727
5102
|
return null;
|
|
4728
5103
|
}
|
|
4729
5104
|
function checkHasCitationReadyContent(html) {
|
|
4730
|
-
const text =
|
|
5105
|
+
const text = getTextContent3(html);
|
|
4731
5106
|
const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 10);
|
|
4732
5107
|
let quotable = 0;
|
|
4733
5108
|
for (const s of sentences) {
|
|
@@ -4752,8 +5127,8 @@ function checkDuplicateContentBlocks(html) {
|
|
|
4752
5127
|
}
|
|
4753
5128
|
function analyzePage(html, url, category) {
|
|
4754
5129
|
const title = extractTitle(html);
|
|
4755
|
-
const textContent =
|
|
4756
|
-
const
|
|
5130
|
+
const textContent = getTextContent3(html);
|
|
5131
|
+
const wordCount2 = countWords2(textContent);
|
|
4757
5132
|
const issues = [];
|
|
4758
5133
|
const strengths = [];
|
|
4759
5134
|
const issueChecks = [
|
|
@@ -4764,7 +5139,7 @@ function analyzePage(html, url, category) {
|
|
|
4764
5139
|
checkNoSchema(html),
|
|
4765
5140
|
checkMissingCanonical(html),
|
|
4766
5141
|
checkMissingOgTags(html),
|
|
4767
|
-
checkThinContent(
|
|
5142
|
+
checkThinContent(wordCount2),
|
|
4768
5143
|
checkImagesMissingAlt(html),
|
|
4769
5144
|
checkNoInternalLinks(html, url),
|
|
4770
5145
|
checkNoAnswerBlock(html),
|
|
@@ -4783,7 +5158,7 @@ function analyzePage(html, url, category) {
|
|
|
4783
5158
|
if (result) strengths.push(result);
|
|
4784
5159
|
}
|
|
4785
5160
|
const { aeoScore, criterionScores } = scorePage(html, url);
|
|
4786
|
-
return { url, title, category, wordCount, issues, strengths, aeoScore, criterionScores };
|
|
5161
|
+
return { url, title, category, wordCount: wordCount2, issues, strengths, aeoScore, criterionScores };
|
|
4787
5162
|
}
|
|
4788
5163
|
function analyzeAllPages(siteData) {
|
|
4789
5164
|
const reviews = [];
|
|
@@ -4805,6 +5180,10 @@ function getTextLength(html) {
|
|
|
4805
5180
|
return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim().length;
|
|
4806
5181
|
}
|
|
4807
5182
|
async function audit(domain, options) {
|
|
5183
|
+
const normalizedTarget = domain.startsWith("http") ? domain : `https://${domain}`;
|
|
5184
|
+
if (!await isSafeFetchTarget(normalizedTarget)) {
|
|
5185
|
+
throw new Error(`Refusing to audit private or local address: ${domain}`);
|
|
5186
|
+
}
|
|
4808
5187
|
const startTime = Date.now();
|
|
4809
5188
|
let renderedWithHeadless = false;
|
|
4810
5189
|
const siteData = await prefetchSiteData(domain);
|
|
@@ -4837,7 +5216,7 @@ async function audit(domain, options) {
|
|
|
4837
5216
|
}
|
|
4838
5217
|
}
|
|
4839
5218
|
if (options?.fullCrawl) {
|
|
4840
|
-
const { crawlFullSite } = await import("./full-site-crawler-
|
|
5219
|
+
const { crawlFullSite } = await import("./full-site-crawler-W3WSE6WT.js");
|
|
4841
5220
|
const crawlResult = await crawlFullSite(siteData, {
|
|
4842
5221
|
maxPages: options.maxPages ?? 200,
|
|
4843
5222
|
concurrency: options.concurrency ?? 5
|
|
@@ -5266,7 +5645,7 @@ function generateComparisonHtmlReport(result) {
|
|
|
5266
5645
|
}
|
|
5267
5646
|
|
|
5268
5647
|
// src/cli.ts
|
|
5269
|
-
var VERSION = "3.
|
|
5648
|
+
var VERSION = "3.2.1";
|
|
5270
5649
|
function printHelp() {
|
|
5271
5650
|
console.log(`
|
|
5272
5651
|
aeorank - AI Engine Optimization audit
|