aeorank 3.1.0 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +64 -44
- package/dist/browser.d.ts +4 -4
- package/dist/browser.js +648 -202
- package/dist/browser.js.map +1 -1
- package/dist/cli.js +521 -187
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +648 -202
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +4 -4
- package/dist/index.d.ts +4 -4
- package/dist/index.js +648 -202
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
package/dist/cli.js
CHANGED
|
@@ -76,6 +76,244 @@ function detectParkedDomain(bodySnippet) {
|
|
|
76
76
|
return { isParked: false };
|
|
77
77
|
}
|
|
78
78
|
|
|
79
|
+
// src/duplicate-content.ts
|
|
80
|
+
var BOILERPLATE_PATTERNS = /\b(sign up|subscribe|get started|contact us|request a demo|free trial|book a call|schedule a|learn more|click here|follow us|share this|copyright|all rights reserved|privacy policy|terms of service)\b/i;
|
|
81
|
+
var MIN_SUBSTANTIVE_WORDS = 15;
|
|
82
|
+
var MAX_METADATA_WORDS = 24;
|
|
83
|
+
var MAX_METADATA_LABEL_WORDS = 4;
|
|
84
|
+
function normalizeParagraphText(htmlFragment) {
|
|
85
|
+
return htmlFragment.replace(/<[^>]*>/g, " ").replace(/&\w+;/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
|
|
86
|
+
}
|
|
87
|
+
function tokenize(text) {
|
|
88
|
+
return text.split(/\s+/).map((word) => word.replace(/^[^a-z0-9]+|[^a-z0-9]+$/gi, "")).filter((word) => word.length > 0);
|
|
89
|
+
}
|
|
90
|
+
function isBoilerplateParagraph(text, words) {
|
|
91
|
+
if (words < 20 && BOILERPLATE_PATTERNS.test(text)) return true;
|
|
92
|
+
if (/\b(cookie|gdpr|consent|opt.out)\b/i.test(text) && words < 30) return true;
|
|
93
|
+
return false;
|
|
94
|
+
}
|
|
95
|
+
function isMetadataParagraph(text, words) {
|
|
96
|
+
const labelMatch = text.match(/^([^:]{1,60}):\s+/);
|
|
97
|
+
if (!labelMatch) return false;
|
|
98
|
+
const labelWords = tokenize(labelMatch[1]).length;
|
|
99
|
+
return labelWords > 0 && labelWords <= MAX_METADATA_LABEL_WORDS && words <= MAX_METADATA_WORDS;
|
|
100
|
+
}
|
|
101
|
+
function buildShinglesFromTokens(words, n = 4) {
|
|
102
|
+
const shingles = /* @__PURE__ */ new Set();
|
|
103
|
+
for (let i = 0; i <= words.length - n; i++) {
|
|
104
|
+
shingles.add(words.slice(i, i + n).join(" "));
|
|
105
|
+
}
|
|
106
|
+
return shingles;
|
|
107
|
+
}
|
|
108
|
+
function createParagraph(htmlFragment) {
|
|
109
|
+
const text = normalizeParagraphText(htmlFragment);
|
|
110
|
+
const words = tokenize(text);
|
|
111
|
+
if (words.length < MIN_SUBSTANTIVE_WORDS) return null;
|
|
112
|
+
if (isBoilerplateParagraph(text, words.length)) return null;
|
|
113
|
+
if (isMetadataParagraph(text, words.length)) return null;
|
|
114
|
+
const shingles = buildShinglesFromTokens(words);
|
|
115
|
+
if (shingles.size < 3) return null;
|
|
116
|
+
return { text, shingles };
|
|
117
|
+
}
|
|
118
|
+
function stripNonContentHtml(html) {
|
|
119
|
+
return html.replace(/<(script|style|nav|header|footer|noscript)\b[^>]*>[\s\S]*?<\/\1>/gi, "").replace(/<aside\b[^>]*>[\s\S]*?<\/aside>/gi, "");
|
|
120
|
+
}
|
|
121
|
+
function extractDuplicateContentParagraphs(html) {
|
|
122
|
+
const cleaned = stripNonContentHtml(html);
|
|
123
|
+
const matches = cleaned.match(/<p\b[^>]*>([\s\S]*?)<\/p>/gi) || [];
|
|
124
|
+
return matches.map(createParagraph).filter((paragraph) => paragraph !== null);
|
|
125
|
+
}
|
|
126
|
+
function extractDuplicateContentSections(html) {
|
|
127
|
+
const cleaned = stripNonContentHtml(html);
|
|
128
|
+
const parts = cleaned.split(/(?=<h[23]\b[^>]*>)/i);
|
|
129
|
+
const sections = [];
|
|
130
|
+
for (const part of parts) {
|
|
131
|
+
const headingMatch = part.match(/<h[23]\b[^>]*>([\s\S]*?)<\/h[23]>/i);
|
|
132
|
+
const heading = headingMatch ? headingMatch[1].replace(/<[^>]*>/g, "").trim() : "(intro)";
|
|
133
|
+
const paragraphs = (part.match(/<p\b[^>]*>([\s\S]*?)<\/p>/gi) || []).map(createParagraph).filter((paragraph) => paragraph !== null);
|
|
134
|
+
if (paragraphs.length > 0) sections.push({ heading, paragraphs });
|
|
135
|
+
}
|
|
136
|
+
return sections;
|
|
137
|
+
}
|
|
138
|
+
function shingleJaccardSimilarity(a, b) {
|
|
139
|
+
if (a.size === 0 && b.size === 0) return 0;
|
|
140
|
+
let intersection = 0;
|
|
141
|
+
for (const shingle of a) {
|
|
142
|
+
if (b.has(shingle)) intersection++;
|
|
143
|
+
}
|
|
144
|
+
const union = a.size + b.size - intersection;
|
|
145
|
+
return union === 0 ? 0 : intersection / union;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// src/helpful-content.ts
|
|
149
|
+
function cap(value, max) {
|
|
150
|
+
return Math.min(max, value);
|
|
151
|
+
}
|
|
152
|
+
function floor(value, min) {
|
|
153
|
+
return Math.max(min, value);
|
|
154
|
+
}
|
|
155
|
+
function countMatches(text, pattern) {
|
|
156
|
+
return text.match(pattern)?.length ?? 0;
|
|
157
|
+
}
|
|
158
|
+
function stripScriptsAndStyles(html) {
|
|
159
|
+
return html.replace(/<script[\s\S]*?<\/script>/gi, " ").replace(/<style[\s\S]*?<\/style>/gi, " ");
|
|
160
|
+
}
|
|
161
|
+
function getTextContent(html) {
|
|
162
|
+
return stripScriptsAndStyles(html).replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
|
|
163
|
+
}
|
|
164
|
+
function getBodyHtml(html) {
|
|
165
|
+
const bodyMatch = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
|
|
166
|
+
return bodyMatch ? bodyMatch[1] : html;
|
|
167
|
+
}
|
|
168
|
+
function getFirstParagraphText(html) {
|
|
169
|
+
const firstPara = getBodyHtml(html).match(/<p[^>]*>([\s\S]*?)<\/p>/i);
|
|
170
|
+
return firstPara ? firstPara[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() : "";
|
|
171
|
+
}
|
|
172
|
+
function firstNWords(text, count) {
|
|
173
|
+
return text.split(/\s+/).slice(0, count).join(" ");
|
|
174
|
+
}
|
|
175
|
+
function getH1Text(html) {
|
|
176
|
+
const match = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
|
|
177
|
+
return match ? match[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() : "";
|
|
178
|
+
}
|
|
179
|
+
function getTitleText(html) {
|
|
180
|
+
const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
181
|
+
return match ? match[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() : "";
|
|
182
|
+
}
|
|
183
|
+
function wordCount(text) {
|
|
184
|
+
return text ? text.split(/\s+/).filter(Boolean).length : 0;
|
|
185
|
+
}
|
|
186
|
+
function isContentLikePage(html, url) {
|
|
187
|
+
const text = getTextContent(html);
|
|
188
|
+
const wc = wordCount(text);
|
|
189
|
+
let signals = 0;
|
|
190
|
+
if (url && /\/(?:blog|article|articles|guide|guides|docs|learn|help|news|insights|resources|how-to|tutorial|case-stud|whitepaper|faq)\b/i.test(url)) {
|
|
191
|
+
signals += 2;
|
|
192
|
+
}
|
|
193
|
+
if (/<article[\s>]/i.test(html)) signals += 1;
|
|
194
|
+
if ((html.match(/<h[2-3][^>]*>/gi) || []).length >= 2) signals += 1;
|
|
195
|
+
if (wc >= 500) signals += 1;
|
|
196
|
+
if (/<time[\s>]/i.test(html) || /datePublished|dateModified/i.test(html)) signals += 1;
|
|
197
|
+
if (/written\s+by|authored?\s+by|reviewed\s+by|medically\s+reviewed/i.test(text)) signals += 1;
|
|
198
|
+
return signals >= 2;
|
|
199
|
+
}
|
|
200
|
+
function expectsMethodology(html, url) {
|
|
201
|
+
const text = getTextContent(html);
|
|
202
|
+
const title = `${getTitleText(html)} ${getH1Text(html)}`.toLowerCase();
|
|
203
|
+
const urlText = (url || "").toLowerCase();
|
|
204
|
+
if (/(?:review|compare|comparison|vs\.?|best|benchmark|study|analysis|survey|report|research|tested|test|methodology)/i.test(title)) {
|
|
205
|
+
return true;
|
|
206
|
+
}
|
|
207
|
+
if (/(?:review|compare|comparison|benchmark|study|analysis|survey|research|report|best)/i.test(urlText)) {
|
|
208
|
+
return true;
|
|
209
|
+
}
|
|
210
|
+
return /\b(methodology|how we tested|how we reviewed|our testing|sample size|dataset|benchmark|editorial policy|review process)\b/i.test(text);
|
|
211
|
+
}
|
|
212
|
+
function titleAndBodyAlign(html) {
|
|
213
|
+
const h1 = getH1Text(html);
|
|
214
|
+
const title = getTitleText(html);
|
|
215
|
+
const text = firstNWords(getTextContent(html), 250).toLowerCase();
|
|
216
|
+
const topic = `${title} ${h1}`.toLowerCase();
|
|
217
|
+
const keywords = topic.split(/[\s|:()\-/]+/).filter((w) => w.length >= 5 && !/^(about|guide|complete|ultimate|best|learn|understand|what|when|where|which|their|there|these|those)$/i.test(w));
|
|
218
|
+
const uniqueKeywords = [...new Set(keywords)];
|
|
219
|
+
if (uniqueKeywords.length === 0) return false;
|
|
220
|
+
return uniqueKeywords.filter((w) => text.includes(w)).length >= Math.min(2, uniqueKeywords.length);
|
|
221
|
+
}
|
|
222
|
+
var GENERIC_OPENERS = /^(?:in today'?s|it is important to understand|in this (?:article|guide|post)|whether you are|have you ever|welcome to|if you'?re looking|in the modern|in the digital age)/i;
|
|
223
|
+
var PRACTICAL_LANGUAGE = /\b(?:here'?s what to do|choose\s+\w+\s+if|avoid\s+\w+\s+when|the main risk is|the fastest option is|next step(?:s)?|best option|lowest risk|good fit if|not a good fit if|what to do next)\b/gi;
|
|
224
|
+
var TRADEOFF_LANGUAGE = /\b(?:however|but|trade-?off|caveat|limitation|downside|upside|risk|benefit|pros?\b|cons?\b|watch out|be careful|unless)\b/gi;
|
|
225
|
+
var FLUFF_LANGUAGE = /\b(?:comprehensive guide|ever-evolving|fast-paced world|unlock(?:ing)? the power|in conclusion|delve into|navigate the landscape|journey|seamless|robust solution)\b/gi;
|
|
226
|
+
var EARLY_CTA_PATTERN = /\b(?:sign up|get started|book (?:a )?demo|contact us|schedule (?:a )?call|buy now|start free|request a quote|talk to sales|subscribe)\b/gi;
|
|
227
|
+
var FIRST_HAND_ACTIONS = /\b(?:we|i)\s+(?:tested|used|reviewed|implemented|measured|compared|observed|deployed|migrated|surveyed|analyzed|audited|interviewed)\b/gi;
|
|
228
|
+
var EXPERIENCE_CONTEXT = /\b(?:in our|during our|for our|in practice|in production|in the field|in our clinic|during implementation|over \d+\s+(?:days?|weeks?|months?)|across \d+\s+(?:accounts?|customers?|patients?|sites?|campaigns?)|with \d+\s+(?:participants?|users?|patients?|samples?))\b/gi;
|
|
229
|
+
var EXPERIENCE_ARTIFACTS = /\b(?:screenshot|photo|benchmark|timeline|before\/after|our results|original chart|field note|walkthrough)\b/gi;
|
|
230
|
+
var LIMITATION_LANGUAGE = /\b(?:limitation|limitations|we found that|we learned|one caveat|did not work|failed|issue we hit|edge case|unexpectedly|drawback)\b/gi;
|
|
231
|
+
var AUTHOR_LINK_PATTERN = /<a[^>]*href=["'][^"']*\/(?:author|authors|team|people|staff|reviewers?)\b[^"']*["'][^>]*>/i;
|
|
232
|
+
var BYLINE_PATTERN = /\b(?:written|authored|reviewed|edited|medically reviewed)\s+by\b/i;
|
|
233
|
+
var AUTHOR_BIO_PATTERN = /\b(?:about the author|author bio|reviewed by|medically reviewed by|board-certified|licensed|credentials?|specializes in|specialist in)\b/i;
|
|
234
|
+
var METHODOLOGY_TERMS = /\b(?:methodology|how we tested|how we reviewed|our methodology|testing process|review process|editorial policy|sample size|data collection|timeframe|criteria used|tools used|benchmark method|updated using|last reviewed|ai-assisted|ai assisted)\b/gi;
|
|
235
|
+
var METHODOLOGY_DETAIL = /\b(?:sample size|participants?|dataset|timeframe|criteria|weights?|tool(?:s)? used|measured over|tested over|reviewed on|last updated|last reviewed|scoring method)\b/gi;
|
|
236
|
+
function scoreHelpfulPurposeAlignment(html, url) {
|
|
237
|
+
const text = getTextContent(html);
|
|
238
|
+
if (!text) return 0;
|
|
239
|
+
const contentLike = isContentLikePage(html, url);
|
|
240
|
+
if (!contentLike && wordCount(text) < 250) return 5;
|
|
241
|
+
let score = contentLike ? 3 : 5;
|
|
242
|
+
const firstPara = getFirstParagraphText(html);
|
|
243
|
+
const earlyText = firstNWords(text, 300);
|
|
244
|
+
const bodyHtml = getBodyHtml(html);
|
|
245
|
+
if (firstPara && !GENERIC_OPENERS.test(firstPara)) score += 2;
|
|
246
|
+
if (countMatches(earlyText, PRACTICAL_LANGUAGE) >= 1) score += 2;
|
|
247
|
+
else if (countMatches(text, PRACTICAL_LANGUAGE) >= 2) score += 1;
|
|
248
|
+
const tradeoffCount = countMatches(text, TRADEOFF_LANGUAGE);
|
|
249
|
+
if (tradeoffCount >= 2) score += 2;
|
|
250
|
+
else if (tradeoffCount >= 1) score += 1;
|
|
251
|
+
if (titleAndBodyAlign(html)) score += 1;
|
|
252
|
+
if (/\b(?:bottom line|key takeaway|here'?s the short answer|next steps?)\b/i.test(text)) score += 1;
|
|
253
|
+
if (firstPara && GENERIC_OPENERS.test(firstPara)) score -= 2;
|
|
254
|
+
const earlyBodyHtml = bodyHtml.slice(0, 1800);
|
|
255
|
+
const earlyCtas = countMatches(earlyBodyHtml, EARLY_CTA_PATTERN);
|
|
256
|
+
if (earlyCtas >= 3) score -= 2;
|
|
257
|
+
else if (earlyCtas >= 2) score -= 1;
|
|
258
|
+
const fluffCount = countMatches(text, FLUFF_LANGUAGE);
|
|
259
|
+
if (fluffCount >= 3) score -= 2;
|
|
260
|
+
else if (fluffCount >= 1) score -= 1;
|
|
261
|
+
return floor(cap(score, 10), 0);
|
|
262
|
+
}
|
|
263
|
+
function scoreFirstHandExperienceSignals(html, url) {
|
|
264
|
+
const text = getTextContent(html);
|
|
265
|
+
if (!text) return 0;
|
|
266
|
+
const contentLike = isContentLikePage(html, url);
|
|
267
|
+
let score = contentLike ? 2 : 5;
|
|
268
|
+
const actionCount = countMatches(text, FIRST_HAND_ACTIONS);
|
|
269
|
+
if (actionCount >= 3) score += 4;
|
|
270
|
+
else if (actionCount >= 1) score += 2;
|
|
271
|
+
const contextCount = countMatches(text, EXPERIENCE_CONTEXT);
|
|
272
|
+
if (contextCount >= 2) score += 2;
|
|
273
|
+
else if (contextCount >= 1) score += 1;
|
|
274
|
+
const artifactCount = countMatches(text, EXPERIENCE_ARTIFACTS) + countMatches(html, /<figure|<figcaption/gi);
|
|
275
|
+
if (artifactCount >= 3) score += 2;
|
|
276
|
+
else if (artifactCount >= 1) score += 1;
|
|
277
|
+
const limitationCount = countMatches(text, LIMITATION_LANGUAGE);
|
|
278
|
+
if (limitationCount >= 2) score += 2;
|
|
279
|
+
else if (limitationCount >= 1) score += 1;
|
|
280
|
+
if (/\b(?:manufacturer|vendor)\s+(?:description|specification|copy)\b/i.test(text)) score -= 1;
|
|
281
|
+
return floor(cap(score, 10), 0);
|
|
282
|
+
}
|
|
283
|
+
function scoreCreatorTransparency(html, url) {
|
|
284
|
+
const text = getTextContent(html);
|
|
285
|
+
if (!text) return 0;
|
|
286
|
+
const contentLike = isContentLikePage(html, url);
|
|
287
|
+
if (!contentLike) return 5;
|
|
288
|
+
let score = 0;
|
|
289
|
+
const hasByline = BYLINE_PATTERN.test(text) || /class=["'][^"']*author[^"']*["']/i.test(html) || /rel=["']author["']/i.test(html);
|
|
290
|
+
const hasPersonSchema = /"@type"\s*:\s*"Person"/i.test(html);
|
|
291
|
+
if (hasByline) score += 3;
|
|
292
|
+
if (AUTHOR_LINK_PATTERN.test(html)) score += 2;
|
|
293
|
+
if (AUTHOR_BIO_PATTERN.test(text)) score += 2;
|
|
294
|
+
if (/\b(?:reviewed by|edited by|medically reviewed by)\b/i.test(text)) score += 1;
|
|
295
|
+
if (hasPersonSchema) score += 2;
|
|
296
|
+
return floor(cap(score, 10), 0);
|
|
297
|
+
}
|
|
298
|
+
function scoreMethodologyTransparency(html, url) {
|
|
299
|
+
const text = getTextContent(html);
|
|
300
|
+
if (!text) return 0;
|
|
301
|
+
const contentLike = isContentLikePage(html, url);
|
|
302
|
+
const expected = expectsMethodology(html, url);
|
|
303
|
+
let score = expected ? 2 : contentLike ? 5 : 5;
|
|
304
|
+
const methodologyCount = countMatches(text, METHODOLOGY_TERMS);
|
|
305
|
+
if (methodologyCount >= 2) score += 3;
|
|
306
|
+
else if (methodologyCount >= 1) score += 2;
|
|
307
|
+
const detailCount = countMatches(text, METHODOLOGY_DETAIL);
|
|
308
|
+
if (detailCount >= 3) score += 3;
|
|
309
|
+
else if (detailCount >= 2) score += 2;
|
|
310
|
+
else if (detailCount >= 1) score += 1;
|
|
311
|
+
if (/\b(?:tested|reviewed|analyzed)\s+\d+|\bacross\s+\d+|\bover\s+\d+\s+(?:days?|weeks?|months?)|\busing\s+\d+\s+\w+/i.test(text)) score += 1;
|
|
312
|
+
if (/<figure|<table/i.test(html) && methodologyCount >= 1) score += 1;
|
|
313
|
+
if (/\b(?:ai-assisted|ai assisted|reviewed by an editor|human reviewed)\b/i.test(text)) score += 1;
|
|
314
|
+
return floor(cap(score, 10), 0);
|
|
315
|
+
}
|
|
316
|
+
|
|
79
317
|
// src/site-crawler.ts
|
|
80
318
|
async function fetchText(url) {
|
|
81
319
|
try {
|
|
@@ -981,8 +1219,8 @@ function checkDirectAnswerDensity(data) {
|
|
|
981
1219
|
const paragraphs = html.match(/<p[^>]*>([\s\S]*?)<\/p>/gi) || [];
|
|
982
1220
|
const snippetZoneParagraphs = paragraphs.filter((p) => {
|
|
983
1221
|
const text2 = p.replace(/<[^>]*>/g, "").trim();
|
|
984
|
-
const
|
|
985
|
-
return
|
|
1222
|
+
const wordCount2 = text2.split(/\s+/).length;
|
|
1223
|
+
return wordCount2 >= 40 && wordCount2 <= 150;
|
|
986
1224
|
});
|
|
987
1225
|
if (snippetZoneParagraphs.length >= 3) {
|
|
988
1226
|
score += 2;
|
|
@@ -2130,6 +2368,123 @@ function checkContentDepth(data, topicCoherenceScore) {
|
|
|
2130
2368
|
}
|
|
2131
2369
|
return { criterion: "content_depth", criterion_label: "Content Depth", score: finalScore, status: finalScore >= 7 ? "pass" : finalScore >= 4 ? "partial" : "fail", findings, fix_priority: finalScore >= 7 ? "P3" : "P1" };
|
|
2132
2370
|
}
|
|
2371
|
+
function scoreSampledPages(data, scorer) {
|
|
2372
|
+
const pages = [];
|
|
2373
|
+
if (data.homepage) {
|
|
2374
|
+
const url = data.homepage.finalUrl || (data.protocol ? `${data.protocol}://${data.domain}/` : `https://${data.domain}/`);
|
|
2375
|
+
pages.push({ url, score: scorer(data.homepage.text, url) });
|
|
2376
|
+
}
|
|
2377
|
+
if (data.blogSample) {
|
|
2378
|
+
for (const page of data.blogSample) {
|
|
2379
|
+
const url = page.finalUrl || (data.protocol ? `${data.protocol}://${data.domain}/` : `https://${data.domain}/`);
|
|
2380
|
+
pages.push({ url, score: scorer(page.text, url) });
|
|
2381
|
+
}
|
|
2382
|
+
}
|
|
2383
|
+
return pages;
|
|
2384
|
+
}
|
|
2385
|
+
function summarizeHelpfulScores(pageScores) {
|
|
2386
|
+
const total = pageScores.length;
|
|
2387
|
+
const average = total > 0 ? Math.round(pageScores.reduce((sum, p) => sum + p.score, 0) / total) : 0;
|
|
2388
|
+
const strong = pageScores.filter((p) => p.score >= 8);
|
|
2389
|
+
const weak = pageScores.filter((p) => p.score <= 4);
|
|
2390
|
+
return { total, average, strong, weak };
|
|
2391
|
+
}
|
|
2392
|
+
function checkHelpfulPurposeAlignment(data) {
|
|
2393
|
+
const findings = [];
|
|
2394
|
+
if (!data.homepage) {
|
|
2395
|
+
findings.push({ severity: "critical", detail: "Could not fetch homepage" });
|
|
2396
|
+
return { criterion: "helpful_purpose_alignment", criterion_label: "Helpful Purpose Alignment", score: 0, status: "not_found", findings, fix_priority: "P1" };
|
|
2397
|
+
}
|
|
2398
|
+
const pageScores = scoreSampledPages(data, scoreHelpfulPurposeAlignment);
|
|
2399
|
+
const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
|
|
2400
|
+
if (average >= 8) {
|
|
2401
|
+
findings.push({ severity: "info", detail: `${strong.length}/${total} pages strongly prioritize visitor task completion over filler` });
|
|
2402
|
+
} else if (average >= 5) {
|
|
2403
|
+
findings.push({ severity: "low", detail: `${strong.length}/${total} pages clearly lead with useful guidance`, fix: "Tighten intros, reduce generic filler, and make pages solve the promised user task faster" });
|
|
2404
|
+
} else {
|
|
2405
|
+
findings.push({ severity: "medium", detail: `Average helpful-purpose score is ${average}/10 across ${total} sampled pages`, fix: "Reduce search-first framing, generic intros, and CTA interruptions before the first useful answer" });
|
|
2406
|
+
}
|
|
2407
|
+
if (weak.length > 0) {
|
|
2408
|
+
findings.push({
|
|
2409
|
+
severity: "low",
|
|
2410
|
+
detail: `${weak.length} page(s) read as weakly task-focused`,
|
|
2411
|
+
fix: "Rewrite weak pages to lead with concrete answers, tradeoffs, and next steps instead of broad introductory filler"
|
|
2412
|
+
});
|
|
2413
|
+
}
|
|
2414
|
+
return { criterion: "helpful_purpose_alignment", criterion_label: "Helpful Purpose Alignment", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P1" };
|
|
2415
|
+
}
|
|
2416
|
+
function checkFirstHandExperienceSignals(data) {
|
|
2417
|
+
const findings = [];
|
|
2418
|
+
if (!data.homepage) {
|
|
2419
|
+
findings.push({ severity: "critical", detail: "Could not fetch homepage" });
|
|
2420
|
+
return { criterion: "first_hand_experience_signals", criterion_label: "First-Hand Experience Signals", score: 0, status: "not_found", findings, fix_priority: "P2" };
|
|
2421
|
+
}
|
|
2422
|
+
const pageScores = scoreSampledPages(data, scoreFirstHandExperienceSignals);
|
|
2423
|
+
const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
|
|
2424
|
+
if (average >= 8) {
|
|
2425
|
+
findings.push({ severity: "info", detail: `${strong.length}/${total} pages show strong signs of direct use, testing, or observation` });
|
|
2426
|
+
} else if (average >= 5) {
|
|
2427
|
+
findings.push({ severity: "low", detail: `Moderate experiential depth across ${total} sampled pages`, fix: "Add real implementation details, limitations, screenshots, or direct observations where relevant" });
|
|
2428
|
+
} else {
|
|
2429
|
+
findings.push({ severity: "medium", detail: "Little first-hand experience is visible in sampled content", fix: "Add evidence of real use, testing, implementation, or lived experience instead of generic summaries" });
|
|
2430
|
+
}
|
|
2431
|
+
if (weak.length > 0) {
|
|
2432
|
+
findings.push({
|
|
2433
|
+
severity: "low",
|
|
2434
|
+
detail: `${weak.length} page(s) appear generic or second-hand`,
|
|
2435
|
+
fix: "Strengthen those pages with case details, lessons learned, or original evidence from practice"
|
|
2436
|
+
});
|
|
2437
|
+
}
|
|
2438
|
+
return { criterion: "first_hand_experience_signals", criterion_label: "First-Hand Experience Signals", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P2" };
|
|
2439
|
+
}
|
|
2440
|
+
function checkCreatorTransparency(data) {
|
|
2441
|
+
const findings = [];
|
|
2442
|
+
if (!data.homepage) {
|
|
2443
|
+
findings.push({ severity: "critical", detail: "Could not fetch homepage" });
|
|
2444
|
+
return { criterion: "creator_transparency", criterion_label: "Creator Transparency", score: 0, status: "not_found", findings, fix_priority: "P2" };
|
|
2445
|
+
}
|
|
2446
|
+
const pageScores = scoreSampledPages(data, scoreCreatorTransparency);
|
|
2447
|
+
const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
|
|
2448
|
+
if (average >= 8) {
|
|
2449
|
+
findings.push({ severity: "info", detail: `${strong.length}/${total} sampled pages provide clear visible creator attribution` });
|
|
2450
|
+
} else if (average >= 5) {
|
|
2451
|
+
findings.push({ severity: "low", detail: "Visible authorship is present on some content but inconsistent", fix: "Add bylines, author links, and reviewer details on article-like pages where readers expect them" });
|
|
2452
|
+
} else {
|
|
2453
|
+
findings.push({ severity: "medium", detail: "Creator visibility is weak on content-like pages", fix: "Show clear bylines, author pages, and reviewer context rather than relying on schema alone" });
|
|
2454
|
+
}
|
|
2455
|
+
if (weak.length > 0) {
|
|
2456
|
+
findings.push({
|
|
2457
|
+
severity: "low",
|
|
2458
|
+
detail: `${weak.length} page(s) look article-like but expose little visible author context`,
|
|
2459
|
+
fix: "Add visible bylines, author bios, or reviewer attribution to those pages"
|
|
2460
|
+
});
|
|
2461
|
+
}
|
|
2462
|
+
return { criterion: "creator_transparency", criterion_label: "Creator Transparency", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P2" };
|
|
2463
|
+
}
|
|
2464
|
+
function checkMethodologyTransparency(data) {
|
|
2465
|
+
const findings = [];
|
|
2466
|
+
if (!data.homepage) {
|
|
2467
|
+
findings.push({ severity: "critical", detail: "Could not fetch homepage" });
|
|
2468
|
+
return { criterion: "methodology_transparency", criterion_label: "Methodology Transparency", score: 0, status: "not_found", findings, fix_priority: "P2" };
|
|
2469
|
+
}
|
|
2470
|
+
const pageScores = scoreSampledPages(data, scoreMethodologyTransparency);
|
|
2471
|
+
const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
|
|
2472
|
+
if (average >= 8) {
|
|
2473
|
+
findings.push({ severity: "info", detail: `${strong.length}/${total} pages clearly explain how content was tested, researched, reviewed, or updated` });
|
|
2474
|
+
} else if (average >= 5) {
|
|
2475
|
+
findings.push({ severity: "low", detail: "Some process transparency exists, but it is inconsistent", fix: 'Add "how we tested", methodology, review process, or update disclosures on pages where users would expect them' });
|
|
2476
|
+
} else {
|
|
2477
|
+
findings.push({ severity: "medium", detail: "Little content-production or review transparency is visible", fix: "Explain how pages were researched, tested, or reviewed instead of presenting unsupported comparisons or conclusions" });
|
|
2478
|
+
}
|
|
2479
|
+
if (weak.length > 0) {
|
|
2480
|
+
findings.push({
|
|
2481
|
+
severity: "low",
|
|
2482
|
+
detail: `${weak.length} page(s) lack visible methodology or review context`,
|
|
2483
|
+
fix: "Add process detail such as sample size, criteria, tools used, review process, or update notes"
|
|
2484
|
+
});
|
|
2485
|
+
}
|
|
2486
|
+
return { criterion: "methodology_transparency", criterion_label: "Methodology Transparency", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P2" };
|
|
2487
|
+
}
|
|
2133
2488
|
function checkCitationReadyWriting(data) {
|
|
2134
2489
|
const findings = [];
|
|
2135
2490
|
if (!data.homepage) {
|
|
@@ -2225,8 +2580,8 @@ function checkAnswerFirstPlacement(data) {
|
|
|
2225
2580
|
const earlyParagraphs = bodyHtml.match(/<p[^>]*>([\s\S]*?)<\/p>/gi)?.slice(0, 5) || [];
|
|
2226
2581
|
for (const p of earlyParagraphs) {
|
|
2227
2582
|
const pText = p.replace(/<[^>]*>/g, "").trim();
|
|
2228
|
-
const
|
|
2229
|
-
if (
|
|
2583
|
+
const wordCount2 = pText.split(/\s+/).length;
|
|
2584
|
+
if (wordCount2 >= 40 && wordCount2 <= 80 && first300Words.includes(pText.slice(0, 50))) {
|
|
2230
2585
|
shortAnswerCount++;
|
|
2231
2586
|
break;
|
|
2232
2587
|
}
|
|
@@ -2556,56 +2911,8 @@ function checkImageContextAI(data) {
|
|
|
2556
2911
|
}
|
|
2557
2912
|
return { criterion: "image_context_ai", criterion_label: "Image Context for AI", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P2" };
|
|
2558
2913
|
}
|
|
2559
|
-
var BOILERPLATE_RE = /\b(sign up|subscribe|get started|contact us|request a demo|free trial|book a call|schedule a|learn more|click here|follow us|share this|copyright|all rights reserved|privacy policy|terms of service)\b/i;
|
|
2560
|
-
function isBoilerplateParagraph(text) {
|
|
2561
|
-
const words = text.split(/\s+/).length;
|
|
2562
|
-
if (words < 20 && BOILERPLATE_RE.test(text)) return true;
|
|
2563
|
-
if (/\b(cookie|gdpr|consent|opt.out)\b/i.test(text) && words < 30) return true;
|
|
2564
|
-
return false;
|
|
2565
|
-
}
|
|
2566
|
-
function toShingles(text, n = 4) {
|
|
2567
|
-
const words = text.split(/\s+/).filter((w) => w.length > 1);
|
|
2568
|
-
const shingles = /* @__PURE__ */ new Set();
|
|
2569
|
-
for (let i = 0; i <= words.length - n; i++) {
|
|
2570
|
-
shingles.add(words.slice(i, i + n).join(" "));
|
|
2571
|
-
}
|
|
2572
|
-
return shingles;
|
|
2573
|
-
}
|
|
2574
|
-
function shingleSimilarity(a, b) {
|
|
2575
|
-
if (a.size === 0 && b.size === 0) return 0;
|
|
2576
|
-
let intersection = 0;
|
|
2577
|
-
for (const s of a) {
|
|
2578
|
-
if (b.has(s)) intersection++;
|
|
2579
|
-
}
|
|
2580
|
-
const union = a.size + b.size - intersection;
|
|
2581
|
-
return union === 0 ? 0 : intersection / union;
|
|
2582
|
-
}
|
|
2583
|
-
function extractPageParagraphs(html) {
|
|
2584
|
-
const cleaned = html.replace(/<(script|style|nav|header|footer|noscript)\b[^>]*>[\s\S]*?<\/\1>/gi, "").replace(/<aside\b[^>]*>[\s\S]*?<\/aside>/gi, "");
|
|
2585
|
-
const pMatches = cleaned.match(/<p\b[^>]*>([\s\S]*?)<\/p>/gi) || [];
|
|
2586
|
-
return pMatches.map((p) => {
|
|
2587
|
-
const text = p.replace(/<[^>]*>/g, " ").replace(/&\w+;/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
|
|
2588
|
-
return { text, shingles: toShingles(text) };
|
|
2589
|
-
}).filter((p) => p.shingles.size >= 3 && !isBoilerplateParagraph(p.text));
|
|
2590
|
-
}
|
|
2591
|
-
function splitIntoSectionsWithParagraphs(html) {
|
|
2592
|
-
const cleaned = html.replace(/<(script|style|nav|header|footer|noscript)\b[^>]*>[\s\S]*?<\/\1>/gi, "").replace(/<aside\b[^>]*>[\s\S]*?<\/aside>/gi, "");
|
|
2593
|
-
const parts = cleaned.split(/(?=<h[23]\b[^>]*>)/i);
|
|
2594
|
-
const sections = [];
|
|
2595
|
-
for (const part of parts) {
|
|
2596
|
-
const hMatch = part.match(/<h[23]\b[^>]*>([\s\S]*?)<\/h[23]>/i);
|
|
2597
|
-
const heading = hMatch ? hMatch[1].replace(/<[^>]*>/g, "").trim() : "(intro)";
|
|
2598
|
-
const pMatches = part.match(/<p\b[^>]*>([\s\S]*?)<\/p>/gi) || [];
|
|
2599
|
-
const paragraphs = pMatches.map((p) => {
|
|
2600
|
-
const text = p.replace(/<[^>]*>/g, " ").replace(/&\w+;/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
|
|
2601
|
-
return { text, shingles: toShingles(text) };
|
|
2602
|
-
}).filter((p) => p.shingles.size >= 3 && !isBoilerplateParagraph(p.text));
|
|
2603
|
-
if (paragraphs.length > 0) sections.push({ heading, paragraphs });
|
|
2604
|
-
}
|
|
2605
|
-
return sections;
|
|
2606
|
-
}
|
|
2607
2914
|
function findIntraPageDuplicates(html) {
|
|
2608
|
-
const sections =
|
|
2915
|
+
const sections = extractDuplicateContentSections(html);
|
|
2609
2916
|
if (sections.length < 2) return [];
|
|
2610
2917
|
const pairs = [];
|
|
2611
2918
|
for (let i = 0; i < sections.length; i++) {
|
|
@@ -2614,7 +2921,7 @@ function findIntraPageDuplicates(html) {
|
|
|
2614
2921
|
for (const pA of sections[i].paragraphs) {
|
|
2615
2922
|
if (found) break;
|
|
2616
2923
|
for (const pB of sections[j].paragraphs) {
|
|
2617
|
-
const sim =
|
|
2924
|
+
const sim = shingleJaccardSimilarity(pA.shingles, pB.shingles);
|
|
2618
2925
|
if (sim > 0.4) {
|
|
2619
2926
|
pairs.push({
|
|
2620
2927
|
headingA: sections[i].heading,
|
|
@@ -2694,11 +3001,11 @@ function checkCrossPageDuplication(data) {
|
|
|
2694
3001
|
const findings = [];
|
|
2695
3002
|
const pages = [];
|
|
2696
3003
|
if (data.homepage) {
|
|
2697
|
-
pages.push({ url: data.homepage.finalUrl || `https://${data.domain}/`, paragraphs:
|
|
3004
|
+
pages.push({ url: data.homepage.finalUrl || `https://${data.domain}/`, paragraphs: extractDuplicateContentParagraphs(data.homepage.text) });
|
|
2698
3005
|
}
|
|
2699
3006
|
if (data.blogSample) {
|
|
2700
3007
|
for (const page of data.blogSample) {
|
|
2701
|
-
pages.push({ url: page.finalUrl || "", paragraphs:
|
|
3008
|
+
pages.push({ url: page.finalUrl || "", paragraphs: extractDuplicateContentParagraphs(page.text) });
|
|
2702
3009
|
}
|
|
2703
3010
|
}
|
|
2704
3011
|
if (pages.length <= 1) {
|
|
@@ -2730,7 +3037,7 @@ function checkCrossPageDuplication(data) {
|
|
|
2730
3037
|
const fpA = [...pA.shingles].slice(0, 5).join("|");
|
|
2731
3038
|
if (siteBoilerprints.has(fpA)) continue;
|
|
2732
3039
|
for (const pB of pages[j].paragraphs) {
|
|
2733
|
-
const sim =
|
|
3040
|
+
const sim = shingleJaccardSimilarity(pA.shingles, pB.shingles);
|
|
2734
3041
|
if (sim > 0.4) {
|
|
2735
3042
|
dupCount++;
|
|
2736
3043
|
if (!sample) sample = pA.text.slice(0, 80);
|
|
@@ -2816,14 +3123,19 @@ function auditSiteFromData(data) {
|
|
|
2816
3123
|
checkVisibleDateSignal(data),
|
|
2817
3124
|
topicCoherence,
|
|
2818
3125
|
checkContentDepth(data, topicCoherence.score),
|
|
2819
|
-
//
|
|
3126
|
+
// Helpful-content criteria (#29-#32)
|
|
3127
|
+
checkHelpfulPurposeAlignment(data),
|
|
3128
|
+
checkFirstHandExperienceSignals(data),
|
|
3129
|
+
checkCreatorTransparency(data),
|
|
3130
|
+
checkMethodologyTransparency(data),
|
|
3131
|
+
// V2 criteria (#33-#38)
|
|
2820
3132
|
checkCitationReadyWriting(data),
|
|
2821
3133
|
checkAnswerFirstPlacement(data),
|
|
2822
3134
|
checkEvidencePackaging(data),
|
|
2823
3135
|
checkEntityDisambiguation(data),
|
|
2824
3136
|
checkExtractionFriction(data),
|
|
2825
3137
|
checkImageContextAI(data),
|
|
2826
|
-
// V3 criteria (#
|
|
3138
|
+
// V3 criteria (#39-#40)
|
|
2827
3139
|
checkDuplicateContent(data),
|
|
2828
3140
|
checkCrossPageDuplication(data)
|
|
2829
3141
|
];
|
|
@@ -2849,6 +3161,10 @@ var WEIGHTS = {
|
|
|
2849
3161
|
// Relevance to actual AI queries
|
|
2850
3162
|
faq_section: 0.03,
|
|
2851
3163
|
// Structured Q&A pairs
|
|
3164
|
+
helpful_purpose_alignment: 0.03,
|
|
3165
|
+
// Visitor-helpful vs search-first framing
|
|
3166
|
+
first_hand_experience_signals: 0.03,
|
|
3167
|
+
// Evidence of real use or observation
|
|
2852
3168
|
// ─── Content Organization (~30%) ──────────────────────────────────────────
|
|
2853
3169
|
// HOW easily AI engines can extract and trust your content.
|
|
2854
3170
|
entity_consistency: 0.05,
|
|
@@ -2863,9 +3179,13 @@ var WEIGHTS = {
|
|
|
2863
3179
|
// Expert attribution
|
|
2864
3180
|
table_list_extractability: 0.03,
|
|
2865
3181
|
// Extractable structured data
|
|
2866
|
-
|
|
3182
|
+
creator_transparency: 0.02,
|
|
3183
|
+
// Visible author/reviewer clarity
|
|
3184
|
+
methodology_transparency: 0.02,
|
|
3185
|
+
// Process disclosure
|
|
3186
|
+
definition_patterns: 0.015,
|
|
2867
3187
|
// Clear definitions
|
|
2868
|
-
visible_date_signal: 0.
|
|
3188
|
+
visible_date_signal: 0.015,
|
|
2869
3189
|
// Publication date trust
|
|
2870
3190
|
semantic_html: 0.02,
|
|
2871
3191
|
// Clean semantic structure
|
|
@@ -2874,15 +3194,15 @@ var WEIGHTS = {
|
|
|
2874
3194
|
// ─── Technical Plumbing (~15%) ────────────────────────────────────────────
|
|
2875
3195
|
// WHETHER AI crawlers can find you. Table stakes with diminishing returns.
|
|
2876
3196
|
content_cannibalization: 0.02,
|
|
2877
|
-
llms_txt: 0.
|
|
2878
|
-
robots_txt: 0.
|
|
3197
|
+
llms_txt: 0.01,
|
|
3198
|
+
robots_txt: 0.01,
|
|
2879
3199
|
content_velocity: 0.02,
|
|
2880
|
-
content_licensing: 0.
|
|
3200
|
+
content_licensing: 0.01,
|
|
2881
3201
|
sitemap_completeness: 0.01,
|
|
2882
|
-
canonical_url:
|
|
2883
|
-
rss_feed: 0
|
|
2884
|
-
schema_coverage: 0
|
|
2885
|
-
speakable_schema: 0
|
|
3202
|
+
canonical_url: 5e-3,
|
|
3203
|
+
rss_feed: 0,
|
|
3204
|
+
schema_coverage: 0,
|
|
3205
|
+
speakable_schema: 0,
|
|
2886
3206
|
// ─── V2 Criteria (~15%) ───────────────────────────────────────────────────
|
|
2887
3207
|
// Citation quality, evidence packaging, and extraction friction.
|
|
2888
3208
|
citation_ready_writing: 0.04,
|
|
@@ -2895,7 +3215,7 @@ var WEIGHTS = {
|
|
|
2895
3215
|
// Clear entity boundaries
|
|
2896
3216
|
extraction_friction: 0.02,
|
|
2897
3217
|
// Sentence length, voice, jargon
|
|
2898
|
-
image_context_ai:
|
|
3218
|
+
image_context_ai: 5e-3,
|
|
2899
3219
|
// Figure/figcaption, alt text quality
|
|
2900
3220
|
// ─── V3 Criteria ────────────────────────────────────────────────────────
|
|
2901
3221
|
duplicate_content: 0.05,
|
|
@@ -2915,8 +3235,8 @@ function calculateOverallScore(criteria) {
|
|
|
2915
3235
|
let score = Math.round(weightedSum / totalWeight);
|
|
2916
3236
|
const coherence = criteria.find((c) => c.criterion === "topic_coherence");
|
|
2917
3237
|
if (coherence && coherence.score < 6) {
|
|
2918
|
-
const
|
|
2919
|
-
score = Math.min(score,
|
|
3238
|
+
const cap3 = 35 + coherence.score * 5;
|
|
3239
|
+
score = Math.min(score, cap3);
|
|
2920
3240
|
}
|
|
2921
3241
|
return score;
|
|
2922
3242
|
}
|
|
@@ -3015,6 +3335,8 @@ var PILLARS = {
|
|
|
3015
3335
|
"citation_ready_writing",
|
|
3016
3336
|
"answer_first_placement",
|
|
3017
3337
|
"evidence_packaging",
|
|
3338
|
+
"helpful_purpose_alignment",
|
|
3339
|
+
"first_hand_experience_signals",
|
|
3018
3340
|
"duplicate_content",
|
|
3019
3341
|
"cross_page_duplication"
|
|
3020
3342
|
],
|
|
@@ -3032,7 +3354,9 @@ var PILLARS = {
|
|
|
3032
3354
|
"internal_linking",
|
|
3033
3355
|
"content_freshness",
|
|
3034
3356
|
"author_schema_depth",
|
|
3035
|
-
"schema_markup"
|
|
3357
|
+
"schema_markup",
|
|
3358
|
+
"creator_transparency",
|
|
3359
|
+
"methodology_transparency"
|
|
3036
3360
|
],
|
|
3037
3361
|
"Technical Foundation": [
|
|
3038
3362
|
"semantic_html",
|
|
@@ -3062,6 +3386,8 @@ var CLIENT_NAMES = {
|
|
|
3062
3386
|
citation_ready_writing: "Citation-Ready Writing",
|
|
3063
3387
|
answer_first_placement: "Answer-First Placement",
|
|
3064
3388
|
evidence_packaging: "Evidence Packaging",
|
|
3389
|
+
helpful_purpose_alignment: "Helpful Purpose Alignment",
|
|
3390
|
+
first_hand_experience_signals: "First-Hand Experience Signals",
|
|
3065
3391
|
direct_answer_density: "Direct Answer Density",
|
|
3066
3392
|
qa_content_format: "Q&A Content Format",
|
|
3067
3393
|
query_answer_alignment: "Query-Answer Alignment",
|
|
@@ -3074,6 +3400,8 @@ var CLIENT_NAMES = {
|
|
|
3074
3400
|
content_freshness: "Content Freshness",
|
|
3075
3401
|
author_schema_depth: "Author & Expert Schema",
|
|
3076
3402
|
schema_markup: "Schema Markup",
|
|
3403
|
+
creator_transparency: "Creator Transparency",
|
|
3404
|
+
methodology_transparency: "Methodology Transparency",
|
|
3077
3405
|
semantic_html: "Semantic HTML",
|
|
3078
3406
|
clean_html: "Clean HTML",
|
|
3079
3407
|
visible_date_signal: "Visible Date Signal",
|
|
@@ -3100,6 +3428,8 @@ var PILLAR_WEIGHTS = {
|
|
|
3100
3428
|
citation_ready_writing: 0.04,
|
|
3101
3429
|
answer_first_placement: 0.03,
|
|
3102
3430
|
evidence_packaging: 0.03,
|
|
3431
|
+
helpful_purpose_alignment: 0.03,
|
|
3432
|
+
first_hand_experience_signals: 0.03,
|
|
3103
3433
|
duplicate_content: 0.05,
|
|
3104
3434
|
cross_page_duplication: 0.03,
|
|
3105
3435
|
direct_answer_density: 0.05,
|
|
@@ -3107,28 +3437,30 @@ var PILLAR_WEIGHTS = {
|
|
|
3107
3437
|
query_answer_alignment: 0.04,
|
|
3108
3438
|
faq_section: 0.03,
|
|
3109
3439
|
table_list_extractability: 0.03,
|
|
3110
|
-
definition_patterns: 0.
|
|
3440
|
+
definition_patterns: 0.015,
|
|
3111
3441
|
entity_disambiguation: 0.02,
|
|
3112
3442
|
entity_consistency: 0.05,
|
|
3113
3443
|
internal_linking: 0.04,
|
|
3114
3444
|
content_freshness: 0.04,
|
|
3115
3445
|
author_schema_depth: 0.03,
|
|
3116
3446
|
schema_markup: 0.03,
|
|
3447
|
+
creator_transparency: 0.02,
|
|
3448
|
+
methodology_transparency: 0.02,
|
|
3117
3449
|
semantic_html: 0.02,
|
|
3118
3450
|
clean_html: 0.02,
|
|
3119
|
-
visible_date_signal: 0.
|
|
3451
|
+
visible_date_signal: 0.015,
|
|
3120
3452
|
extraction_friction: 0.02,
|
|
3121
|
-
image_context_ai:
|
|
3122
|
-
schema_coverage: 0
|
|
3123
|
-
speakable_schema: 0
|
|
3453
|
+
image_context_ai: 5e-3,
|
|
3454
|
+
schema_coverage: 0,
|
|
3455
|
+
speakable_schema: 0,
|
|
3124
3456
|
content_cannibalization: 0.02,
|
|
3125
|
-
llms_txt: 0.
|
|
3126
|
-
robots_txt: 0.
|
|
3457
|
+
llms_txt: 0.01,
|
|
3458
|
+
robots_txt: 0.01,
|
|
3127
3459
|
content_velocity: 0.02,
|
|
3128
|
-
content_licensing: 0.
|
|
3129
|
-
canonical_url:
|
|
3460
|
+
content_licensing: 0.01,
|
|
3461
|
+
canonical_url: 5e-3,
|
|
3130
3462
|
sitemap_completeness: 0.01,
|
|
3131
|
-
rss_feed: 0
|
|
3463
|
+
rss_feed: 0
|
|
3132
3464
|
};
|
|
3133
3465
|
var CRITERION_EFFORT = {
|
|
3134
3466
|
topic_coherence: "High",
|
|
@@ -3138,6 +3470,8 @@ var CRITERION_EFFORT = {
|
|
|
3138
3470
|
citation_ready_writing: "Medium",
|
|
3139
3471
|
answer_first_placement: "Medium",
|
|
3140
3472
|
evidence_packaging: "Medium",
|
|
3473
|
+
helpful_purpose_alignment: "Medium",
|
|
3474
|
+
first_hand_experience_signals: "Medium",
|
|
3141
3475
|
duplicate_content: "Medium",
|
|
3142
3476
|
cross_page_duplication: "Medium",
|
|
3143
3477
|
direct_answer_density: "Medium",
|
|
@@ -3152,6 +3486,8 @@ var CRITERION_EFFORT = {
|
|
|
3152
3486
|
content_freshness: "Low",
|
|
3153
3487
|
author_schema_depth: "Low",
|
|
3154
3488
|
schema_markup: "Medium",
|
|
3489
|
+
creator_transparency: "Low",
|
|
3490
|
+
methodology_transparency: "Low",
|
|
3155
3491
|
semantic_html: "Low",
|
|
3156
3492
|
clean_html: "Medium",
|
|
3157
3493
|
visible_date_signal: "Low",
|
|
@@ -3176,6 +3512,8 @@ var FIX_DESCRIPTIONS = {
|
|
|
3176
3512
|
citation_ready_writing: "Write self-contained definition sentences and one-claim statements AI can quote directly.",
|
|
3177
3513
|
answer_first_placement: "Place a 40-80 word answer block in the first 300 words of each page.",
|
|
3178
3514
|
evidence_packaging: "Add inline citations, attribution phrases, and a sources section to key pages.",
|
|
3515
|
+
helpful_purpose_alignment: "Lead with useful, task-solving guidance instead of search-first filler.",
|
|
3516
|
+
first_hand_experience_signals: "Add concrete signs of first-hand use, testing, implementation, or observation.",
|
|
3179
3517
|
direct_answer_density: "Write concise 2-3 sentence answer paragraphs after each question heading.",
|
|
3180
3518
|
qa_content_format: "Add question-based H2/H3 headings matching common AI queries.",
|
|
3181
3519
|
query_answer_alignment: "Ensure every question heading is followed by a direct answer paragraph.",
|
|
@@ -3188,6 +3526,8 @@ var FIX_DESCRIPTIONS = {
|
|
|
3188
3526
|
content_freshness: "Add dateModified schema and visible last-updated dates.",
|
|
3189
3527
|
author_schema_depth: "Add Person schema for authors with credentials and sameAs links.",
|
|
3190
3528
|
schema_markup: "Implement JSON-LD structured data on key pages.",
|
|
3529
|
+
creator_transparency: "Show clear visible bylines, author pages, and reviewer details where readers expect them.",
|
|
3530
|
+
methodology_transparency: "Explain how content was tested, researched, reviewed, or updated.",
|
|
3191
3531
|
semantic_html: "Use semantic HTML5 elements (main, article, nav, header, footer).",
|
|
3192
3532
|
clean_html: "Fix HTML structure, add meta tags, and ensure HTTPS.",
|
|
3193
3533
|
visible_date_signal: "Display dates using <time> elements and add datePublished to JSON-LD.",
|
|
@@ -3287,6 +3627,10 @@ var CRITERION_LABELS = {
|
|
|
3287
3627
|
"Visible Date Signal": "Visible Date Signal",
|
|
3288
3628
|
"Topic Coherence": "Topic Coherence",
|
|
3289
3629
|
"Content Depth": "Content Depth",
|
|
3630
|
+
"Helpful Purpose Alignment": "Helpful Purpose Alignment",
|
|
3631
|
+
"First-Hand Experience Signals": "First-Hand Experience Signals",
|
|
3632
|
+
"Creator Transparency": "Creator Transparency",
|
|
3633
|
+
"Methodology Transparency": "Methodology Transparency",
|
|
3290
3634
|
"Citation-Ready Writing Quality": "Citation-Ready Writing Quality",
|
|
3291
3635
|
"Answer-First Placement": "Answer-First Placement",
|
|
3292
3636
|
"Evidence Packaging": "Evidence Packaging",
|
|
@@ -3392,6 +3736,8 @@ var CRITERION_WEIGHTS = {
|
|
|
3392
3736
|
qa_content_format: 0.04,
|
|
3393
3737
|
query_answer_alignment: 0.04,
|
|
3394
3738
|
faq_section: 0.03,
|
|
3739
|
+
helpful_purpose_alignment: 0.03,
|
|
3740
|
+
first_hand_experience_signals: 0.03,
|
|
3395
3741
|
// Content Organization (~30%)
|
|
3396
3742
|
entity_consistency: 0.05,
|
|
3397
3743
|
internal_linking: 0.04,
|
|
@@ -3399,28 +3745,30 @@ var CRITERION_WEIGHTS = {
|
|
|
3399
3745
|
schema_markup: 0.03,
|
|
3400
3746
|
author_schema_depth: 0.03,
|
|
3401
3747
|
table_list_extractability: 0.03,
|
|
3402
|
-
|
|
3403
|
-
|
|
3748
|
+
creator_transparency: 0.02,
|
|
3749
|
+
methodology_transparency: 0.02,
|
|
3750
|
+
definition_patterns: 0.015,
|
|
3751
|
+
visible_date_signal: 0.015,
|
|
3404
3752
|
semantic_html: 0.02,
|
|
3405
3753
|
clean_html: 0.02,
|
|
3406
3754
|
// Technical Plumbing (~15%)
|
|
3407
3755
|
content_cannibalization: 0.02,
|
|
3408
|
-
llms_txt: 0.
|
|
3409
|
-
robots_txt: 0.
|
|
3756
|
+
llms_txt: 0.01,
|
|
3757
|
+
robots_txt: 0.01,
|
|
3410
3758
|
content_velocity: 0.02,
|
|
3411
|
-
content_licensing: 0.
|
|
3759
|
+
content_licensing: 0.01,
|
|
3412
3760
|
sitemap_completeness: 0.01,
|
|
3413
|
-
canonical_url:
|
|
3414
|
-
rss_feed: 0
|
|
3415
|
-
schema_coverage: 0
|
|
3416
|
-
speakable_schema: 0
|
|
3761
|
+
canonical_url: 5e-3,
|
|
3762
|
+
rss_feed: 0,
|
|
3763
|
+
schema_coverage: 0,
|
|
3764
|
+
speakable_schema: 0,
|
|
3417
3765
|
// V2 Criteria (~15%)
|
|
3418
3766
|
citation_ready_writing: 0.04,
|
|
3419
3767
|
answer_first_placement: 0.03,
|
|
3420
3768
|
evidence_packaging: 0.03,
|
|
3421
3769
|
entity_disambiguation: 0.02,
|
|
3422
3770
|
extraction_friction: 0.02,
|
|
3423
|
-
image_context_ai:
|
|
3771
|
+
image_context_ai: 5e-3,
|
|
3424
3772
|
// V3 Criteria
|
|
3425
3773
|
duplicate_content: 0.05,
|
|
3426
3774
|
cross_page_duplication: 0.03
|
|
@@ -3461,6 +3809,16 @@ var OPPORTUNITY_TEMPLATES = {
|
|
|
3461
3809
|
effort: "Medium",
|
|
3462
3810
|
description: "Create a dedicated FAQ page with FAQPage schema markup. Cover common questions about your products, services, and industry to become a direct answer source for AI engines."
|
|
3463
3811
|
},
|
|
3812
|
+
helpful_purpose_alignment: {
|
|
3813
|
+
name: "Improve Helpful Purpose Alignment",
|
|
3814
|
+
effort: "Medium",
|
|
3815
|
+
description: "Rewrite pages to solve the visitor task quickly and concretely. Reduce generic intros, search-first filler, and CTA interruptions before the first useful answer."
|
|
3816
|
+
},
|
|
3817
|
+
first_hand_experience_signals: {
|
|
3818
|
+
name: "Add First-Hand Experience Signals",
|
|
3819
|
+
effort: "Medium",
|
|
3820
|
+
description: "Show direct use, testing, implementation, or lived experience with concrete observations, examples, screenshots, and lessons learned."
|
|
3821
|
+
},
|
|
3464
3822
|
original_data: {
|
|
3465
3823
|
name: "Add Original Data & Case Studies",
|
|
3466
3824
|
effort: "High",
|
|
@@ -3516,6 +3874,16 @@ var OPPORTUNITY_TEMPLATES = {
|
|
|
3516
3874
|
effort: "Low",
|
|
3517
3875
|
description: "Add Person schema for content authors with credentials, expertise, and sameAs links. Expert attribution strengthens E-E-A-T signals that AI engines use to evaluate source credibility."
|
|
3518
3876
|
},
|
|
3877
|
+
creator_transparency: {
|
|
3878
|
+
name: "Improve Creator Transparency",
|
|
3879
|
+
effort: "Low",
|
|
3880
|
+
description: "Add visible bylines, author pages, and reviewer/editor details so readers can clearly tell who created the content and why they are credible."
|
|
3881
|
+
},
|
|
3882
|
+
methodology_transparency: {
|
|
3883
|
+
name: "Add Methodology Transparency",
|
|
3884
|
+
effort: "Low",
|
|
3885
|
+
description: "Explain how pages were tested, researched, reviewed, or updated. Add methodology, criteria, sample-size, or review-process details where users expect them."
|
|
3886
|
+
},
|
|
3519
3887
|
fact_density: {
|
|
3520
3888
|
name: "Increase Fact & Data Density",
|
|
3521
3889
|
effort: "Medium",
|
|
@@ -3971,19 +4339,23 @@ var PAGE_CRITERIA = {
|
|
|
3971
4339
|
content_freshness: { weight: 0.04, label: "Content Freshness Signals" },
|
|
3972
4340
|
schema_markup: { weight: 0.03, label: "Schema.org Structured Data" },
|
|
3973
4341
|
table_list_extractability: { weight: 0.03, label: "Table & List Extractability" },
|
|
3974
|
-
definition_patterns: { weight: 0.
|
|
3975
|
-
visible_date_signal: { weight: 0.
|
|
4342
|
+
definition_patterns: { weight: 0.015, label: "Definition Patterns" },
|
|
4343
|
+
visible_date_signal: { weight: 0.015, label: "Visible Date Signal" },
|
|
3976
4344
|
semantic_html: { weight: 0.02, label: "Semantic HTML5 & Accessibility" },
|
|
3977
4345
|
clean_html: { weight: 0.02, label: "Clean, Crawlable HTML" },
|
|
3978
4346
|
// Technical Plumbing
|
|
3979
|
-
canonical_url: { weight:
|
|
4347
|
+
canonical_url: { weight: 5e-3, label: "Canonical URL Strategy" },
|
|
3980
4348
|
// V2 Criteria
|
|
3981
4349
|
citation_ready_writing: { weight: 0.04, label: "Citation-Ready Writing Quality" },
|
|
3982
4350
|
answer_first_placement: { weight: 0.03, label: "Answer-First Placement" },
|
|
3983
4351
|
evidence_packaging: { weight: 0.03, label: "Evidence Packaging" },
|
|
4352
|
+
helpful_purpose_alignment: { weight: 0.03, label: "Helpful Purpose Alignment" },
|
|
4353
|
+
first_hand_experience_signals: { weight: 0.03, label: "First-Hand Experience Signals" },
|
|
3984
4354
|
entity_disambiguation: { weight: 0.02, label: "Entity Disambiguation" },
|
|
3985
4355
|
extraction_friction: { weight: 0.02, label: "Extraction Friction Score" },
|
|
3986
|
-
|
|
4356
|
+
creator_transparency: { weight: 0.02, label: "Creator Transparency" },
|
|
4357
|
+
methodology_transparency: { weight: 0.02, label: "Methodology Transparency" },
|
|
4358
|
+
image_context_ai: { weight: 5e-3, label: "Image Context for AI" },
|
|
3987
4359
|
duplicate_content: { weight: 0.05, label: "Duplicate Content Blocks" }
|
|
3988
4360
|
};
|
|
3989
4361
|
function extractJsonLdBlocks(html) {
|
|
@@ -4006,7 +4378,7 @@ function extractTypesFromJsonLd(blocks) {
|
|
|
4006
4378
|
}
|
|
4007
4379
|
return types;
|
|
4008
4380
|
}
|
|
4009
|
-
function
|
|
4381
|
+
function getTextContent2(html) {
|
|
4010
4382
|
return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
|
|
4011
4383
|
}
|
|
4012
4384
|
function extractQuestionHeadings2(html) {
|
|
@@ -4034,7 +4406,7 @@ function countAnsweredQuestions(html) {
|
|
|
4034
4406
|
}
|
|
4035
4407
|
return { total: questions.length, answered };
|
|
4036
4408
|
}
|
|
4037
|
-
function
|
|
4409
|
+
function cap2(value, max) {
|
|
4038
4410
|
return Math.min(value, max);
|
|
4039
4411
|
}
|
|
4040
4412
|
function scoreSchemaMarkup(html) {
|
|
@@ -4060,10 +4432,10 @@ function scoreSchemaMarkup(html) {
|
|
|
4060
4432
|
for (const t of types) {
|
|
4061
4433
|
if (knownTypes.includes(t)) knownCount++;
|
|
4062
4434
|
}
|
|
4063
|
-
score +=
|
|
4435
|
+
score += cap2(knownCount * 2, 4);
|
|
4064
4436
|
if (types.has("Organization") || types.has("LocalBusiness")) score += 2;
|
|
4065
4437
|
if (types.has("FAQPage")) score += 1;
|
|
4066
|
-
return
|
|
4438
|
+
return cap2(score, 10);
|
|
4067
4439
|
}
|
|
4068
4440
|
function scoreQAFormat(html) {
|
|
4069
4441
|
const questions = extractQuestionHeadings2(html);
|
|
@@ -4075,7 +4447,7 @@ function scoreQAFormat(html) {
|
|
|
4075
4447
|
if (answered >= 1) score += 3;
|
|
4076
4448
|
const h1Matches = html.match(/<h1[\s>]/gi) || [];
|
|
4077
4449
|
if (h1Matches.length === 1) score += 2;
|
|
4078
|
-
return
|
|
4450
|
+
return cap2(score, 10);
|
|
4079
4451
|
}
|
|
4080
4452
|
function scoreCleanHtml(html) {
|
|
4081
4453
|
let score = 0;
|
|
@@ -4084,15 +4456,15 @@ function scoreCleanHtml(html) {
|
|
|
4084
4456
|
for (const tag of semantics) {
|
|
4085
4457
|
if (html.toLowerCase().includes(tag)) semCount++;
|
|
4086
4458
|
}
|
|
4087
|
-
score +=
|
|
4459
|
+
score += cap2(semCount, 3);
|
|
4088
4460
|
const h1Matches = html.match(/<h1[\s>]/gi) || [];
|
|
4089
4461
|
if (h1Matches.length === 1) score += 2;
|
|
4090
|
-
const text =
|
|
4462
|
+
const text = getTextContent2(html);
|
|
4091
4463
|
if (text.length > 500) score += 3;
|
|
4092
4464
|
const hasTitle = /<title[^>]*>[^<]+<\/title>/i.test(html);
|
|
4093
4465
|
const hasDesc = /<meta\s[^>]*name=["']description["'][^>]*content=["'][^"']+["']/i.test(html) || /<meta\s[^>]*content=["'][^"']+["'][^>]*name=["']description["']/i.test(html);
|
|
4094
4466
|
if (hasTitle && hasDesc) score += 2;
|
|
4095
|
-
return
|
|
4467
|
+
return cap2(score, 10);
|
|
4096
4468
|
}
|
|
4097
4469
|
function scoreFaqSection(html) {
|
|
4098
4470
|
let score = 0;
|
|
@@ -4104,11 +4476,11 @@ function scoreFaqSection(html) {
|
|
|
4104
4476
|
const questions = extractQuestionHeadings2(html);
|
|
4105
4477
|
if (questions.length >= 10) score += 1;
|
|
4106
4478
|
if (/<details[\s>]/i.test(html) || /accordion|collapsible|toggle/i.test(lowerHtml)) score += 1;
|
|
4107
|
-
return
|
|
4479
|
+
return cap2(score, 10);
|
|
4108
4480
|
}
|
|
4109
4481
|
function scoreOriginalData(html) {
|
|
4110
4482
|
let score = 0;
|
|
4111
|
-
const text =
|
|
4483
|
+
const text = getTextContent2(html);
|
|
4112
4484
|
if (/\b(our (study|analysis|research|survey|data|findings))\b/i.test(text)) {
|
|
4113
4485
|
score += 3;
|
|
4114
4486
|
} else if (/\d+(\.\d+)?%|\$[\d,.]+|\b\d{1,3}(,\d{3})+\b/.test(text)) {
|
|
@@ -4125,7 +4497,7 @@ function scoreOriginalData(html) {
|
|
|
4125
4497
|
if (/href=["'][^"']*\/blog\b/i.test(html)) {
|
|
4126
4498
|
score += 2;
|
|
4127
4499
|
}
|
|
4128
|
-
return
|
|
4500
|
+
return cap2(score, 10);
|
|
4129
4501
|
}
|
|
4130
4502
|
function scoreQueryAnswerAlignment(html) {
|
|
4131
4503
|
const { total, answered } = countAnsweredQuestions(html);
|
|
@@ -4148,7 +4520,7 @@ function scoreContentFreshness(html) {
|
|
|
4148
4520
|
const currentYear = (/* @__PURE__ */ new Date()).getFullYear();
|
|
4149
4521
|
const yearPattern = new RegExp(`\\b(${currentYear}|${currentYear - 1})\\b`);
|
|
4150
4522
|
if (yearPattern.test(html)) score += 2;
|
|
4151
|
-
return
|
|
4523
|
+
return cap2(score, 10);
|
|
4152
4524
|
}
|
|
4153
4525
|
function scoreTableListExtractability(html) {
|
|
4154
4526
|
let score = 0;
|
|
@@ -4161,7 +4533,7 @@ function scoreTableListExtractability(html) {
|
|
|
4161
4533
|
const listItems = html.match(/<li[\s>]/gi) || [];
|
|
4162
4534
|
if (listItems.length >= 10) score += 1;
|
|
4163
4535
|
if (/<dl[\s>]/i.test(html)) score += 1;
|
|
4164
|
-
return
|
|
4536
|
+
return cap2(score, 10);
|
|
4165
4537
|
}
|
|
4166
4538
|
function scoreDirectAnswerDensity(html) {
|
|
4167
4539
|
let score = 0;
|
|
@@ -4177,9 +4549,9 @@ function scoreDirectAnswerDensity(html) {
|
|
|
4177
4549
|
}
|
|
4178
4550
|
if (snippetCount >= 3) score += 2;
|
|
4179
4551
|
else if (snippetCount >= 1) score += 1;
|
|
4180
|
-
const directOpeners =
|
|
4552
|
+
const directOpeners = getTextContent2(html).match(/\b(yes|no|in short|the answer is|simply put|in summary)\b/gi) || [];
|
|
4181
4553
|
if (directOpeners.length >= 2) score += 2;
|
|
4182
|
-
return
|
|
4554
|
+
return cap2(score, 10);
|
|
4183
4555
|
}
|
|
4184
4556
|
function scoreSemanticHtml(html) {
|
|
4185
4557
|
let score = 0;
|
|
@@ -4189,7 +4561,7 @@ function scoreSemanticHtml(html) {
|
|
|
4189
4561
|
for (const el of elements) {
|
|
4190
4562
|
if (lowerHtml.includes(el)) count++;
|
|
4191
4563
|
}
|
|
4192
|
-
score +=
|
|
4564
|
+
score += cap2(Math.floor(count * 0.7), 4);
|
|
4193
4565
|
const imgTags = html.match(/<img\s[^>]*>/gi) || [];
|
|
4194
4566
|
if (imgTags.length > 0) {
|
|
4195
4567
|
let withAlt = 0;
|
|
@@ -4200,11 +4572,11 @@ function scoreSemanticHtml(html) {
|
|
|
4200
4572
|
}
|
|
4201
4573
|
if (/<html[^>]*\slang=["'][^"']+["']/i.test(html)) score += 2;
|
|
4202
4574
|
if (/\baria-/i.test(html)) score += 2;
|
|
4203
|
-
return
|
|
4575
|
+
return cap2(score, 10);
|
|
4204
4576
|
}
|
|
4205
4577
|
function scoreFactDensity(html) {
|
|
4206
4578
|
let score = 0;
|
|
4207
|
-
const text =
|
|
4579
|
+
const text = getTextContent2(html);
|
|
4208
4580
|
const numericPatterns = text.match(/\d+(\.\d+)?%|\$[\d,.]+|\b\d{1,3}(,\d{3})+\b|\b\d+\s*(million|billion|thousand|users|customers|employees)\b/gi) || [];
|
|
4209
4581
|
if (numericPatterns.length >= 6) score += 5;
|
|
4210
4582
|
else if (numericPatterns.length >= 3) score += 3;
|
|
@@ -4217,11 +4589,11 @@ function scoreFactDensity(html) {
|
|
|
4217
4589
|
if (/\baccording to\b|\bsource:\s|\bcited\b|\breported by\b/i.test(text)) score += 2;
|
|
4218
4590
|
const units = text.match(/\b\d+\s*(kg|lb|miles|km|hours|minutes|days|months|years|GB|MB|TB)\b/gi) || [];
|
|
4219
4591
|
if (units.length >= 2) score += 1;
|
|
4220
|
-
return
|
|
4592
|
+
return cap2(score, 10);
|
|
4221
4593
|
}
|
|
4222
4594
|
function scoreDefinitionPatterns(html) {
|
|
4223
4595
|
let score = 0;
|
|
4224
|
-
const text =
|
|
4596
|
+
const text = getTextContent2(html);
|
|
4225
4597
|
const defPatterns = text.match(/\b(is a|is an|refers to|defined as|means that|also known as|abbreviated as)\b/gi) || [];
|
|
4226
4598
|
if (defPatterns.length >= 3) score += 5;
|
|
4227
4599
|
else if (defPatterns.length >= 1) score += 3;
|
|
@@ -4229,7 +4601,7 @@ function scoreDefinitionPatterns(html) {
|
|
|
4229
4601
|
if (/\b(is a|is an|refers to|defined as)\b/i.test(early)) score += 2;
|
|
4230
4602
|
if (/<dfn[\s>]/i.test(html) || /<abbr[\s>]/i.test(html)) score += 1;
|
|
4231
4603
|
if (/<dl[\s>]/i.test(html) || /glossary/i.test(html)) score += 2;
|
|
4232
|
-
return
|
|
4604
|
+
return cap2(score, 10);
|
|
4233
4605
|
}
|
|
4234
4606
|
function scoreCanonicalUrl(html, url) {
|
|
4235
4607
|
let score = 0;
|
|
@@ -4250,7 +4622,7 @@ function scoreCanonicalUrl(html, url) {
|
|
|
4250
4622
|
if (canonicalHref.startsWith("https://")) score += 2;
|
|
4251
4623
|
const allCanonicals = html.match(/<link[^>]*rel=["']canonical["'][^>]*>/gi) || [];
|
|
4252
4624
|
if (allCanonicals.length === 1) score += 1;
|
|
4253
|
-
return
|
|
4625
|
+
return cap2(score, 10);
|
|
4254
4626
|
}
|
|
4255
4627
|
function scoreVisibleDateSignal(html) {
|
|
4256
4628
|
let score = 0;
|
|
@@ -4269,11 +4641,11 @@ function scoreVisibleDateSignal(html) {
|
|
|
4269
4641
|
} catch {
|
|
4270
4642
|
}
|
|
4271
4643
|
}
|
|
4272
|
-
return
|
|
4644
|
+
return cap2(score, 10);
|
|
4273
4645
|
}
|
|
4274
4646
|
function scoreCitationReadyWriting(html) {
|
|
4275
4647
|
let score = 0;
|
|
4276
|
-
const text =
|
|
4648
|
+
const text = getTextContent2(html);
|
|
4277
4649
|
const defSentences = text.match(/\b\w+\s+(is\s+(?:a|an)\s|refers\s+to|defined\s+as)\b/gi) || [];
|
|
4278
4650
|
if (defSentences.length >= 3) score += 3;
|
|
4279
4651
|
else if (defSentences.length >= 1) score += 1;
|
|
@@ -4302,7 +4674,7 @@ function scoreCitationReadyWriting(html) {
|
|
|
4302
4674
|
);
|
|
4303
4675
|
if (quotableLines.length >= 2) score += 2;
|
|
4304
4676
|
else if (quotableLines.length >= 1) score += 1;
|
|
4305
|
-
return
|
|
4677
|
+
return cap2(score, 10);
|
|
4306
4678
|
}
|
|
4307
4679
|
function scoreAnswerFirstPlacement(html) {
|
|
4308
4680
|
let score = 0;
|
|
@@ -4313,8 +4685,8 @@ function scoreAnswerFirstPlacement(html) {
|
|
|
4313
4685
|
const first300Words = bodyText.split(/\s+/).slice(0, 300).join(" ");
|
|
4314
4686
|
for (const p of earlyParagraphs) {
|
|
4315
4687
|
const pText = p.replace(/<[^>]*>/g, "").trim();
|
|
4316
|
-
const
|
|
4317
|
-
if (
|
|
4688
|
+
const wordCount2 = pText.split(/\s+/).length;
|
|
4689
|
+
if (wordCount2 >= 40 && wordCount2 <= 80 && first300Words.includes(pText.slice(0, 50))) {
|
|
4318
4690
|
score += 4;
|
|
4319
4691
|
break;
|
|
4320
4692
|
}
|
|
@@ -4335,11 +4707,11 @@ function scoreAnswerFirstPlacement(html) {
|
|
|
4335
4707
|
score += 3;
|
|
4336
4708
|
}
|
|
4337
4709
|
}
|
|
4338
|
-
return
|
|
4710
|
+
return cap2(score, 10);
|
|
4339
4711
|
}
|
|
4340
4712
|
function scoreEvidencePackaging(html) {
|
|
4341
4713
|
let score = 0;
|
|
4342
|
-
const text =
|
|
4714
|
+
const text = getTextContent2(html);
|
|
4343
4715
|
const paragraphs = html.match(/<p[^>]*>[\s\S]*?<\/p>/gi) || [];
|
|
4344
4716
|
let inlineCitations = 0;
|
|
4345
4717
|
for (const p of paragraphs) {
|
|
@@ -4357,11 +4729,11 @@ function scoreEvidencePackaging(html) {
|
|
|
4357
4729
|
const sourcedStats = text.match(/\d+(\.\d+)?(%|\s*(million|billion|thousand|percent))\b[^.]*\b[A-Z][a-z]+\b/gi) || [];
|
|
4358
4730
|
if (sourcedStats.length >= 2) score += 2;
|
|
4359
4731
|
else if (sourcedStats.length >= 1) score += 1;
|
|
4360
|
-
return
|
|
4732
|
+
return cap2(score, 10);
|
|
4361
4733
|
}
|
|
4362
4734
|
function scoreEntityDisambiguation(html) {
|
|
4363
4735
|
let score = 0;
|
|
4364
|
-
const text =
|
|
4736
|
+
const text = getTextContent2(html);
|
|
4365
4737
|
const h1Match = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
|
|
4366
4738
|
if (!h1Match) return 3;
|
|
4367
4739
|
const h1Text = h1Match[1].replace(/<[^>]*>/g, "").trim();
|
|
@@ -4379,11 +4751,11 @@ function scoreEntityDisambiguation(html) {
|
|
|
4379
4751
|
if (/\bunlike\s+\w/i.test(text) || /\bcompared\s+to\s+\w/i.test(text) || /\bnot\s+to\s+be\s+confused\s+with\b/i.test(text)) {
|
|
4380
4752
|
score += 3;
|
|
4381
4753
|
}
|
|
4382
|
-
return
|
|
4754
|
+
return cap2(score, 10);
|
|
4383
4755
|
}
|
|
4384
4756
|
function scoreExtractionFriction(html) {
|
|
4385
4757
|
let score = 0;
|
|
4386
|
-
const text =
|
|
4758
|
+
const text = getTextContent2(html);
|
|
4387
4759
|
const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 5);
|
|
4388
4760
|
const avgLen = sentences.length > 0 ? sentences.map((s) => s.trim().split(/\s+/).length).reduce((a, b) => a + b, 0) / sentences.length : 0;
|
|
4389
4761
|
if (avgLen > 0 && avgLen < 20) score += 3;
|
|
@@ -4406,7 +4778,7 @@ function scoreExtractionFriction(html) {
|
|
|
4406
4778
|
if (/display\s*:\s*none|visibility\s*:\s*hidden/i.test(html) && /<(div|section|p)[^>]*(?:style=["'][^"']*display\s*:\s*none|hidden)[^>]*>/i.test(html)) {
|
|
4407
4779
|
score = Math.max(0, score - 2);
|
|
4408
4780
|
}
|
|
4409
|
-
return
|
|
4781
|
+
return cap2(score, 10);
|
|
4410
4782
|
}
|
|
4411
4783
|
function scoreImageContextAI(html) {
|
|
4412
4784
|
let score = 0;
|
|
@@ -4431,20 +4803,13 @@ function scoreImageContextAI(html) {
|
|
|
4431
4803
|
else if (goodAltCount > 0) score += 1;
|
|
4432
4804
|
const contextualImages = html.match(/<(article|section)[^>]*>[\s\S]*?<img[^>]*>[\s\S]*?<\/\1>/gi) || [];
|
|
4433
4805
|
if (contextualImages.length > 0) score += 3;
|
|
4434
|
-
return
|
|
4435
|
-
}
|
|
4436
|
-
var BOILERPLATE_PATTERNS = /\b(sign up|subscribe|get started|contact us|request a demo|free trial|book a call|schedule a|learn more|click here|follow us|share this|copyright|all rights reserved|privacy policy|terms of service)\b/i;
|
|
4437
|
-
function isBoilerplate(text) {
|
|
4438
|
-
const words = text.split(/\s+/).length;
|
|
4439
|
-
if (words < 20 && BOILERPLATE_PATTERNS.test(text)) return true;
|
|
4440
|
-
if (/\b(cookie|gdpr|consent|opt.out)\b/i.test(text) && words < 30) return true;
|
|
4441
|
-
return false;
|
|
4806
|
+
return cap2(score, 10);
|
|
4442
4807
|
}
|
|
4443
4808
|
function scoreDuplicateContent(html) {
|
|
4444
4809
|
return scoreDuplicateContentDetailed(html).score;
|
|
4445
4810
|
}
|
|
4446
4811
|
function scoreDuplicateContentDetailed(html) {
|
|
4447
|
-
const sections =
|
|
4812
|
+
const sections = extractDuplicateContentSections(html);
|
|
4448
4813
|
if (sections.length < 2) return { score: 10, duplicates: [] };
|
|
4449
4814
|
const totalParagraphs = sections.reduce((sum, s) => sum + s.paragraphs.length, 0);
|
|
4450
4815
|
const duplicates = [];
|
|
@@ -4453,7 +4818,7 @@ function scoreDuplicateContentDetailed(html) {
|
|
|
4453
4818
|
for (let j = i + 1; j < sections.length; j++) {
|
|
4454
4819
|
for (const pA of sections[i].paragraphs) {
|
|
4455
4820
|
for (const pB of sections[j].paragraphs) {
|
|
4456
|
-
const sim =
|
|
4821
|
+
const sim = shingleJaccardSimilarity(pA.shingles, pB.shingles);
|
|
4457
4822
|
if (sim > 0.4) {
|
|
4458
4823
|
dupParagraphCount++;
|
|
4459
4824
|
duplicates.push({
|
|
@@ -4482,41 +4847,6 @@ function scoreDuplicateContentDetailed(html) {
|
|
|
4482
4847
|
}
|
|
4483
4848
|
return { score, duplicates };
|
|
4484
4849
|
}
|
|
4485
|
-
function extractSectionsWithParagraphs(html) {
|
|
4486
|
-
const cleaned = html.replace(/<(script|style|nav|header|footer|noscript)\b[^>]*>[\s\S]*?<\/\1>/gi, "").replace(/<aside\b[^>]*>[\s\S]*?<\/aside>/gi, "");
|
|
4487
|
-
const parts = cleaned.split(/(?=<h[23]\b[^>]*>)/i);
|
|
4488
|
-
const sections = [];
|
|
4489
|
-
for (const part of parts) {
|
|
4490
|
-
const headingMatch = part.match(/<h[23]\b[^>]*>([\s\S]*?)<\/h[23]>/i);
|
|
4491
|
-
const heading = headingMatch ? headingMatch[1].replace(/<[^>]*>/g, "").trim() : "(intro)";
|
|
4492
|
-
const pMatches = part.match(/<p\b[^>]*>([\s\S]*?)<\/p>/gi) || [];
|
|
4493
|
-
const paragraphs = pMatches.map((p) => {
|
|
4494
|
-
const text = p.replace(/<[^>]*>/g, " ").replace(/&\w+;/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
|
|
4495
|
-
return { text, shingles: buildShingles(text, 4) };
|
|
4496
|
-
}).filter((p) => p.shingles.size >= 3 && !isBoilerplate(p.text));
|
|
4497
|
-
if (paragraphs.length > 0) {
|
|
4498
|
-
sections.push({ heading, paragraphs });
|
|
4499
|
-
}
|
|
4500
|
-
}
|
|
4501
|
-
return sections;
|
|
4502
|
-
}
|
|
4503
|
-
function buildShingles(text, n) {
|
|
4504
|
-
const words = text.split(/\s+/).filter((w) => w.length > 1);
|
|
4505
|
-
const shingles = /* @__PURE__ */ new Set();
|
|
4506
|
-
for (let i = 0; i <= words.length - n; i++) {
|
|
4507
|
-
shingles.add(words.slice(i, i + n).join(" "));
|
|
4508
|
-
}
|
|
4509
|
-
return shingles;
|
|
4510
|
-
}
|
|
4511
|
-
function shingleJaccard(a, b) {
|
|
4512
|
-
if (a.size === 0 && b.size === 0) return 0;
|
|
4513
|
-
let intersection = 0;
|
|
4514
|
-
for (const s of a) {
|
|
4515
|
-
if (b.has(s)) intersection++;
|
|
4516
|
-
}
|
|
4517
|
-
const union = a.size + b.size - intersection;
|
|
4518
|
-
return union === 0 ? 0 : intersection / union;
|
|
4519
|
-
}
|
|
4520
4850
|
var SCORING_FUNCTIONS = {
|
|
4521
4851
|
schema_markup: scoreSchemaMarkup,
|
|
4522
4852
|
qa_content_format: scoreQAFormat,
|
|
@@ -4535,8 +4865,12 @@ var SCORING_FUNCTIONS = {
|
|
|
4535
4865
|
citation_ready_writing: scoreCitationReadyWriting,
|
|
4536
4866
|
answer_first_placement: scoreAnswerFirstPlacement,
|
|
4537
4867
|
evidence_packaging: scoreEvidencePackaging,
|
|
4868
|
+
helpful_purpose_alignment: scoreHelpfulPurposeAlignment,
|
|
4869
|
+
first_hand_experience_signals: scoreFirstHandExperienceSignals,
|
|
4538
4870
|
entity_disambiguation: scoreEntityDisambiguation,
|
|
4539
4871
|
extraction_friction: scoreExtractionFriction,
|
|
4872
|
+
creator_transparency: scoreCreatorTransparency,
|
|
4873
|
+
methodology_transparency: scoreMethodologyTransparency,
|
|
4540
4874
|
image_context_ai: scoreImageContextAI,
|
|
4541
4875
|
duplicate_content: scoreDuplicateContent
|
|
4542
4876
|
};
|
|
@@ -4567,7 +4901,7 @@ function extractTitle(html) {
|
|
|
4567
4901
|
const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
4568
4902
|
return match ? match[1].replace(/\s+/g, " ").trim() : "";
|
|
4569
4903
|
}
|
|
4570
|
-
function
|
|
4904
|
+
function getTextContent3(html) {
|
|
4571
4905
|
return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
|
|
4572
4906
|
}
|
|
4573
4907
|
function countWords2(text) {
|
|
@@ -4627,9 +4961,9 @@ function checkMissingOgTags(html) {
|
|
|
4627
4961
|
}
|
|
4628
4962
|
return null;
|
|
4629
4963
|
}
|
|
4630
|
-
function checkThinContent(
|
|
4631
|
-
if (
|
|
4632
|
-
return { check: "thin-content", label: `Thin content (${
|
|
4964
|
+
function checkThinContent(wordCount2) {
|
|
4965
|
+
if (wordCount2 < 300) {
|
|
4966
|
+
return { check: "thin-content", label: `Thin content (${wordCount2} words)`, severity: "warning" };
|
|
4633
4967
|
}
|
|
4634
4968
|
return null;
|
|
4635
4969
|
}
|
|
@@ -4726,15 +5060,15 @@ function checkNoAnswerBlock(html) {
|
|
|
4726
5060
|
const first300Words = bodyText.split(/\s+/).slice(0, 300).join(" ");
|
|
4727
5061
|
for (const p of earlyParagraphs) {
|
|
4728
5062
|
const pText = p.replace(/<[^>]*>/g, "").trim();
|
|
4729
|
-
const
|
|
4730
|
-
if (
|
|
5063
|
+
const wordCount2 = pText.split(/\s+/).length;
|
|
5064
|
+
if (wordCount2 >= 40 && wordCount2 <= 80 && first300Words.includes(pText.slice(0, 50))) {
|
|
4731
5065
|
return null;
|
|
4732
5066
|
}
|
|
4733
5067
|
}
|
|
4734
5068
|
return { check: "no-answer-block", label: "No short answer block (40-80 words) in first 300 words", severity: "warning" };
|
|
4735
5069
|
}
|
|
4736
5070
|
function checkNoEvidence(html, url) {
|
|
4737
|
-
const text =
|
|
5071
|
+
const text = getTextContent3(html);
|
|
4738
5072
|
const paragraphs = html.match(/<p[^>]*>[\s\S]*?<\/p>/gi) || [];
|
|
4739
5073
|
let inlineCitations = 0;
|
|
4740
5074
|
for (const p of paragraphs) {
|
|
@@ -4748,7 +5082,7 @@ function checkNoEvidence(html, url) {
|
|
|
4748
5082
|
return null;
|
|
4749
5083
|
}
|
|
4750
5084
|
function checkHasCitationReadyContent(html) {
|
|
4751
|
-
const text =
|
|
5085
|
+
const text = getTextContent3(html);
|
|
4752
5086
|
const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 10);
|
|
4753
5087
|
let quotable = 0;
|
|
4754
5088
|
for (const s of sentences) {
|
|
@@ -4773,8 +5107,8 @@ function checkDuplicateContentBlocks(html) {
|
|
|
4773
5107
|
}
|
|
4774
5108
|
function analyzePage(html, url, category) {
|
|
4775
5109
|
const title = extractTitle(html);
|
|
4776
|
-
const textContent =
|
|
4777
|
-
const
|
|
5110
|
+
const textContent = getTextContent3(html);
|
|
5111
|
+
const wordCount2 = countWords2(textContent);
|
|
4778
5112
|
const issues = [];
|
|
4779
5113
|
const strengths = [];
|
|
4780
5114
|
const issueChecks = [
|
|
@@ -4785,7 +5119,7 @@ function analyzePage(html, url, category) {
|
|
|
4785
5119
|
checkNoSchema(html),
|
|
4786
5120
|
checkMissingCanonical(html),
|
|
4787
5121
|
checkMissingOgTags(html),
|
|
4788
|
-
checkThinContent(
|
|
5122
|
+
checkThinContent(wordCount2),
|
|
4789
5123
|
checkImagesMissingAlt(html),
|
|
4790
5124
|
checkNoInternalLinks(html, url),
|
|
4791
5125
|
checkNoAnswerBlock(html),
|
|
@@ -4804,7 +5138,7 @@ function analyzePage(html, url, category) {
|
|
|
4804
5138
|
if (result) strengths.push(result);
|
|
4805
5139
|
}
|
|
4806
5140
|
const { aeoScore, criterionScores } = scorePage(html, url);
|
|
4807
|
-
return { url, title, category, wordCount, issues, strengths, aeoScore, criterionScores };
|
|
5141
|
+
return { url, title, category, wordCount: wordCount2, issues, strengths, aeoScore, criterionScores };
|
|
4808
5142
|
}
|
|
4809
5143
|
function analyzeAllPages(siteData) {
|
|
4810
5144
|
const reviews = [];
|