aeorank 3.1.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -76,6 +76,244 @@ function detectParkedDomain(bodySnippet) {
76
76
  return { isParked: false };
77
77
  }
78
78
 
79
+ // src/duplicate-content.ts
80
+ var BOILERPLATE_PATTERNS = /\b(sign up|subscribe|get started|contact us|request a demo|free trial|book a call|schedule a|learn more|click here|follow us|share this|copyright|all rights reserved|privacy policy|terms of service)\b/i;
81
+ var MIN_SUBSTANTIVE_WORDS = 15;
82
+ var MAX_METADATA_WORDS = 24;
83
+ var MAX_METADATA_LABEL_WORDS = 4;
84
+ function normalizeParagraphText(htmlFragment) {
85
+ return htmlFragment.replace(/<[^>]*>/g, " ").replace(/&\w+;/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
86
+ }
87
+ function tokenize(text) {
88
+ return text.split(/\s+/).map((word) => word.replace(/^[^a-z0-9]+|[^a-z0-9]+$/gi, "")).filter((word) => word.length > 0);
89
+ }
90
+ function isBoilerplateParagraph(text, words) {
91
+ if (words < 20 && BOILERPLATE_PATTERNS.test(text)) return true;
92
+ if (/\b(cookie|gdpr|consent|opt.out)\b/i.test(text) && words < 30) return true;
93
+ return false;
94
+ }
95
+ function isMetadataParagraph(text, words) {
96
+ const labelMatch = text.match(/^([^:]{1,60}):\s+/);
97
+ if (!labelMatch) return false;
98
+ const labelWords = tokenize(labelMatch[1]).length;
99
+ return labelWords > 0 && labelWords <= MAX_METADATA_LABEL_WORDS && words <= MAX_METADATA_WORDS;
100
+ }
101
+ function buildShinglesFromTokens(words, n = 4) {
102
+ const shingles = /* @__PURE__ */ new Set();
103
+ for (let i = 0; i <= words.length - n; i++) {
104
+ shingles.add(words.slice(i, i + n).join(" "));
105
+ }
106
+ return shingles;
107
+ }
108
+ function createParagraph(htmlFragment) {
109
+ const text = normalizeParagraphText(htmlFragment);
110
+ const words = tokenize(text);
111
+ if (words.length < MIN_SUBSTANTIVE_WORDS) return null;
112
+ if (isBoilerplateParagraph(text, words.length)) return null;
113
+ if (isMetadataParagraph(text, words.length)) return null;
114
+ const shingles = buildShinglesFromTokens(words);
115
+ if (shingles.size < 3) return null;
116
+ return { text, shingles };
117
+ }
118
+ function stripNonContentHtml(html) {
119
+ return html.replace(/<(script|style|nav|header|footer|noscript)\b[^>]*>[\s\S]*?<\/\1>/gi, "").replace(/<aside\b[^>]*>[\s\S]*?<\/aside>/gi, "");
120
+ }
121
+ function extractDuplicateContentParagraphs(html) {
122
+ const cleaned = stripNonContentHtml(html);
123
+ const matches = cleaned.match(/<p\b[^>]*>([\s\S]*?)<\/p>/gi) || [];
124
+ return matches.map(createParagraph).filter((paragraph) => paragraph !== null);
125
+ }
126
+ function extractDuplicateContentSections(html) {
127
+ const cleaned = stripNonContentHtml(html);
128
+ const parts = cleaned.split(/(?=<h[23]\b[^>]*>)/i);
129
+ const sections = [];
130
+ for (const part of parts) {
131
+ const headingMatch = part.match(/<h[23]\b[^>]*>([\s\S]*?)<\/h[23]>/i);
132
+ const heading = headingMatch ? headingMatch[1].replace(/<[^>]*>/g, "").trim() : "(intro)";
133
+ const paragraphs = (part.match(/<p\b[^>]*>([\s\S]*?)<\/p>/gi) || []).map(createParagraph).filter((paragraph) => paragraph !== null);
134
+ if (paragraphs.length > 0) sections.push({ heading, paragraphs });
135
+ }
136
+ return sections;
137
+ }
138
+ function shingleJaccardSimilarity(a, b) {
139
+ if (a.size === 0 && b.size === 0) return 0;
140
+ let intersection = 0;
141
+ for (const shingle of a) {
142
+ if (b.has(shingle)) intersection++;
143
+ }
144
+ const union = a.size + b.size - intersection;
145
+ return union === 0 ? 0 : intersection / union;
146
+ }
147
+
148
+ // src/helpful-content.ts
149
+ function cap(value, max) {
150
+ return Math.min(max, value);
151
+ }
152
+ function floor(value, min) {
153
+ return Math.max(min, value);
154
+ }
155
+ function countMatches(text, pattern) {
156
+ return text.match(pattern)?.length ?? 0;
157
+ }
158
+ function stripScriptsAndStyles(html) {
159
+ return html.replace(/<script[\s\S]*?<\/script>/gi, " ").replace(/<style[\s\S]*?<\/style>/gi, " ");
160
+ }
161
+ function getTextContent(html) {
162
+ return stripScriptsAndStyles(html).replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
163
+ }
164
+ function getBodyHtml(html) {
165
+ const bodyMatch = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
166
+ return bodyMatch ? bodyMatch[1] : html;
167
+ }
168
+ function getFirstParagraphText(html) {
169
+ const firstPara = getBodyHtml(html).match(/<p[^>]*>([\s\S]*?)<\/p>/i);
170
+ return firstPara ? firstPara[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() : "";
171
+ }
172
+ function firstNWords(text, count) {
173
+ return text.split(/\s+/).slice(0, count).join(" ");
174
+ }
175
+ function getH1Text(html) {
176
+ const match = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
177
+ return match ? match[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() : "";
178
+ }
179
+ function getTitleText(html) {
180
+ const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
181
+ return match ? match[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() : "";
182
+ }
183
+ function wordCount(text) {
184
+ return text ? text.split(/\s+/).filter(Boolean).length : 0;
185
+ }
186
+ function isContentLikePage(html, url) {
187
+ const text = getTextContent(html);
188
+ const wc = wordCount(text);
189
+ let signals = 0;
190
+ if (url && /\/(?:blog|article|articles|guide|guides|docs|learn|help|news|insights|resources|how-to|tutorial|case-stud|whitepaper|faq)\b/i.test(url)) {
191
+ signals += 2;
192
+ }
193
+ if (/<article[\s>]/i.test(html)) signals += 1;
194
+ if ((html.match(/<h[2-3][^>]*>/gi) || []).length >= 2) signals += 1;
195
+ if (wc >= 500) signals += 1;
196
+ if (/<time[\s>]/i.test(html) || /datePublished|dateModified/i.test(html)) signals += 1;
197
+ if (/written\s+by|authored?\s+by|reviewed\s+by|medically\s+reviewed/i.test(text)) signals += 1;
198
+ return signals >= 2;
199
+ }
200
+ function expectsMethodology(html, url) {
201
+ const text = getTextContent(html);
202
+ const title = `${getTitleText(html)} ${getH1Text(html)}`.toLowerCase();
203
+ const urlText = (url || "").toLowerCase();
204
+ if (/(?:review|compare|comparison|vs\.?|best|benchmark|study|analysis|survey|report|research|tested|test|methodology)/i.test(title)) {
205
+ return true;
206
+ }
207
+ if (/(?:review|compare|comparison|benchmark|study|analysis|survey|research|report|best)/i.test(urlText)) {
208
+ return true;
209
+ }
210
+ return /\b(methodology|how we tested|how we reviewed|our testing|sample size|dataset|benchmark|editorial policy|review process)\b/i.test(text);
211
+ }
212
+ function titleAndBodyAlign(html) {
213
+ const h1 = getH1Text(html);
214
+ const title = getTitleText(html);
215
+ const text = firstNWords(getTextContent(html), 250).toLowerCase();
216
+ const topic = `${title} ${h1}`.toLowerCase();
217
+ const keywords = topic.split(/[\s|:()\-/]+/).filter((w) => w.length >= 5 && !/^(about|guide|complete|ultimate|best|learn|understand|what|when|where|which|their|there|these|those)$/i.test(w));
218
+ const uniqueKeywords = [...new Set(keywords)];
219
+ if (uniqueKeywords.length === 0) return false;
220
+ return uniqueKeywords.filter((w) => text.includes(w)).length >= Math.min(2, uniqueKeywords.length);
221
+ }
222
+ var GENERIC_OPENERS = /^(?:in today'?s|it is important to understand|in this (?:article|guide|post)|whether you are|have you ever|welcome to|if you'?re looking|in the modern|in the digital age)/i;
223
+ var PRACTICAL_LANGUAGE = /\b(?:here'?s what to do|choose\s+\w+\s+if|avoid\s+\w+\s+when|the main risk is|the fastest option is|next step(?:s)?|best option|lowest risk|good fit if|not a good fit if|what to do next)\b/gi;
224
+ var TRADEOFF_LANGUAGE = /\b(?:however|but|trade-?off|caveat|limitation|downside|upside|risk|benefit|pros?\b|cons?\b|watch out|be careful|unless)\b/gi;
225
+ var FLUFF_LANGUAGE = /\b(?:comprehensive guide|ever-evolving|fast-paced world|unlock(?:ing)? the power|in conclusion|delve into|navigate the landscape|journey|seamless|robust solution)\b/gi;
226
+ var EARLY_CTA_PATTERN = /\b(?:sign up|get started|book (?:a )?demo|contact us|schedule (?:a )?call|buy now|start free|request a quote|talk to sales|subscribe)\b/gi;
227
+ var FIRST_HAND_ACTIONS = /\b(?:we|i)\s+(?:tested|used|reviewed|implemented|measured|compared|observed|deployed|migrated|surveyed|analyzed|audited|interviewed)\b/gi;
228
+ var EXPERIENCE_CONTEXT = /\b(?:in our|during our|for our|in practice|in production|in the field|in our clinic|during implementation|over \d+\s+(?:days?|weeks?|months?)|across \d+\s+(?:accounts?|customers?|patients?|sites?|campaigns?)|with \d+\s+(?:participants?|users?|patients?|samples?))\b/gi;
229
+ var EXPERIENCE_ARTIFACTS = /\b(?:screenshot|photo|benchmark|timeline|before\/after|our results|original chart|field note|walkthrough)\b/gi;
230
+ var LIMITATION_LANGUAGE = /\b(?:limitation|limitations|we found that|we learned|one caveat|did not work|failed|issue we hit|edge case|unexpectedly|drawback)\b/gi;
231
+ var AUTHOR_LINK_PATTERN = /<a[^>]*href=["'][^"']*\/(?:author|authors|team|people|staff|reviewers?)\b[^"']*["'][^>]*>/i;
232
+ var BYLINE_PATTERN = /\b(?:written|authored|reviewed|edited|medically reviewed)\s+by\b/i;
233
+ var AUTHOR_BIO_PATTERN = /\b(?:about the author|author bio|reviewed by|medically reviewed by|board-certified|licensed|credentials?|specializes in|specialist in)\b/i;
234
+ var METHODOLOGY_TERMS = /\b(?:methodology|how we tested|how we reviewed|our methodology|testing process|review process|editorial policy|sample size|data collection|timeframe|criteria used|tools used|benchmark method|updated using|last reviewed|ai-assisted|ai assisted)\b/gi;
235
+ var METHODOLOGY_DETAIL = /\b(?:sample size|participants?|dataset|timeframe|criteria|weights?|tool(?:s)? used|measured over|tested over|reviewed on|last updated|last reviewed|scoring method)\b/gi;
236
+ function scoreHelpfulPurposeAlignment(html, url) {
237
+ const text = getTextContent(html);
238
+ if (!text) return 0;
239
+ const contentLike = isContentLikePage(html, url);
240
+ if (!contentLike && wordCount(text) < 250) return 5;
241
+ let score = contentLike ? 3 : 5;
242
+ const firstPara = getFirstParagraphText(html);
243
+ const earlyText = firstNWords(text, 300);
244
+ const bodyHtml = getBodyHtml(html);
245
+ if (firstPara && !GENERIC_OPENERS.test(firstPara)) score += 2;
246
+ if (countMatches(earlyText, PRACTICAL_LANGUAGE) >= 1) score += 2;
247
+ else if (countMatches(text, PRACTICAL_LANGUAGE) >= 2) score += 1;
248
+ const tradeoffCount = countMatches(text, TRADEOFF_LANGUAGE);
249
+ if (tradeoffCount >= 2) score += 2;
250
+ else if (tradeoffCount >= 1) score += 1;
251
+ if (titleAndBodyAlign(html)) score += 1;
252
+ if (/\b(?:bottom line|key takeaway|here'?s the short answer|next steps?)\b/i.test(text)) score += 1;
253
+ if (firstPara && GENERIC_OPENERS.test(firstPara)) score -= 2;
254
+ const earlyBodyHtml = bodyHtml.slice(0, 1800);
255
+ const earlyCtas = countMatches(earlyBodyHtml, EARLY_CTA_PATTERN);
256
+ if (earlyCtas >= 3) score -= 2;
257
+ else if (earlyCtas >= 2) score -= 1;
258
+ const fluffCount = countMatches(text, FLUFF_LANGUAGE);
259
+ if (fluffCount >= 3) score -= 2;
260
+ else if (fluffCount >= 1) score -= 1;
261
+ return floor(cap(score, 10), 0);
262
+ }
263
+ function scoreFirstHandExperienceSignals(html, url) {
264
+ const text = getTextContent(html);
265
+ if (!text) return 0;
266
+ const contentLike = isContentLikePage(html, url);
267
+ let score = contentLike ? 2 : 5;
268
+ const actionCount = countMatches(text, FIRST_HAND_ACTIONS);
269
+ if (actionCount >= 3) score += 4;
270
+ else if (actionCount >= 1) score += 2;
271
+ const contextCount = countMatches(text, EXPERIENCE_CONTEXT);
272
+ if (contextCount >= 2) score += 2;
273
+ else if (contextCount >= 1) score += 1;
274
+ const artifactCount = countMatches(text, EXPERIENCE_ARTIFACTS) + countMatches(html, /<figure|<figcaption/gi);
275
+ if (artifactCount >= 3) score += 2;
276
+ else if (artifactCount >= 1) score += 1;
277
+ const limitationCount = countMatches(text, LIMITATION_LANGUAGE);
278
+ if (limitationCount >= 2) score += 2;
279
+ else if (limitationCount >= 1) score += 1;
280
+ if (/\b(?:manufacturer|vendor)\s+(?:description|specification|copy)\b/i.test(text)) score -= 1;
281
+ return floor(cap(score, 10), 0);
282
+ }
283
+ function scoreCreatorTransparency(html, url) {
284
+ const text = getTextContent(html);
285
+ if (!text) return 0;
286
+ const contentLike = isContentLikePage(html, url);
287
+ if (!contentLike) return 5;
288
+ let score = 0;
289
+ const hasByline = BYLINE_PATTERN.test(text) || /class=["'][^"']*author[^"']*["']/i.test(html) || /rel=["']author["']/i.test(html);
290
+ const hasPersonSchema = /"@type"\s*:\s*"Person"/i.test(html);
291
+ if (hasByline) score += 3;
292
+ if (AUTHOR_LINK_PATTERN.test(html)) score += 2;
293
+ if (AUTHOR_BIO_PATTERN.test(text)) score += 2;
294
+ if (/\b(?:reviewed by|edited by|medically reviewed by)\b/i.test(text)) score += 1;
295
+ if (hasPersonSchema) score += 2;
296
+ return floor(cap(score, 10), 0);
297
+ }
298
+ function scoreMethodologyTransparency(html, url) {
299
+ const text = getTextContent(html);
300
+ if (!text) return 0;
301
+ const contentLike = isContentLikePage(html, url);
302
+ const expected = expectsMethodology(html, url);
303
+ let score = expected ? 2 : contentLike ? 5 : 5;
304
+ const methodologyCount = countMatches(text, METHODOLOGY_TERMS);
305
+ if (methodologyCount >= 2) score += 3;
306
+ else if (methodologyCount >= 1) score += 2;
307
+ const detailCount = countMatches(text, METHODOLOGY_DETAIL);
308
+ if (detailCount >= 3) score += 3;
309
+ else if (detailCount >= 2) score += 2;
310
+ else if (detailCount >= 1) score += 1;
311
+ if (/\b(?:tested|reviewed|analyzed)\s+\d+|\bacross\s+\d+|\bover\s+\d+\s+(?:days?|weeks?|months?)|\busing\s+\d+\s+\w+/i.test(text)) score += 1;
312
+ if (/<figure|<table/i.test(html) && methodologyCount >= 1) score += 1;
313
+ if (/\b(?:ai-assisted|ai assisted|reviewed by an editor|human reviewed)\b/i.test(text)) score += 1;
314
+ return floor(cap(score, 10), 0);
315
+ }
316
+
79
317
  // src/site-crawler.ts
80
318
  async function fetchText(url) {
81
319
  try {
@@ -981,8 +1219,8 @@ function checkDirectAnswerDensity(data) {
981
1219
  const paragraphs = html.match(/<p[^>]*>([\s\S]*?)<\/p>/gi) || [];
982
1220
  const snippetZoneParagraphs = paragraphs.filter((p) => {
983
1221
  const text2 = p.replace(/<[^>]*>/g, "").trim();
984
- const wordCount = text2.split(/\s+/).length;
985
- return wordCount >= 40 && wordCount <= 150;
1222
+ const wordCount2 = text2.split(/\s+/).length;
1223
+ return wordCount2 >= 40 && wordCount2 <= 150;
986
1224
  });
987
1225
  if (snippetZoneParagraphs.length >= 3) {
988
1226
  score += 2;
@@ -2130,6 +2368,123 @@ function checkContentDepth(data, topicCoherenceScore) {
2130
2368
  }
2131
2369
  return { criterion: "content_depth", criterion_label: "Content Depth", score: finalScore, status: finalScore >= 7 ? "pass" : finalScore >= 4 ? "partial" : "fail", findings, fix_priority: finalScore >= 7 ? "P3" : "P1" };
2132
2370
  }
2371
+ function scoreSampledPages(data, scorer) {
2372
+ const pages = [];
2373
+ if (data.homepage) {
2374
+ const url = data.homepage.finalUrl || (data.protocol ? `${data.protocol}://${data.domain}/` : `https://${data.domain}/`);
2375
+ pages.push({ url, score: scorer(data.homepage.text, url) });
2376
+ }
2377
+ if (data.blogSample) {
2378
+ for (const page of data.blogSample) {
2379
+ const url = page.finalUrl || (data.protocol ? `${data.protocol}://${data.domain}/` : `https://${data.domain}/`);
2380
+ pages.push({ url, score: scorer(page.text, url) });
2381
+ }
2382
+ }
2383
+ return pages;
2384
+ }
2385
+ function summarizeHelpfulScores(pageScores) {
2386
+ const total = pageScores.length;
2387
+ const average = total > 0 ? Math.round(pageScores.reduce((sum, p) => sum + p.score, 0) / total) : 0;
2388
+ const strong = pageScores.filter((p) => p.score >= 8);
2389
+ const weak = pageScores.filter((p) => p.score <= 4);
2390
+ return { total, average, strong, weak };
2391
+ }
2392
+ function checkHelpfulPurposeAlignment(data) {
2393
+ const findings = [];
2394
+ if (!data.homepage) {
2395
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
2396
+ return { criterion: "helpful_purpose_alignment", criterion_label: "Helpful Purpose Alignment", score: 0, status: "not_found", findings, fix_priority: "P1" };
2397
+ }
2398
+ const pageScores = scoreSampledPages(data, scoreHelpfulPurposeAlignment);
2399
+ const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
2400
+ if (average >= 8) {
2401
+ findings.push({ severity: "info", detail: `${strong.length}/${total} pages strongly prioritize visitor task completion over filler` });
2402
+ } else if (average >= 5) {
2403
+ findings.push({ severity: "low", detail: `${strong.length}/${total} pages clearly lead with useful guidance`, fix: "Tighten intros, reduce generic filler, and make pages solve the promised user task faster" });
2404
+ } else {
2405
+ findings.push({ severity: "medium", detail: `Average helpful-purpose score is ${average}/10 across ${total} sampled pages`, fix: "Reduce search-first framing, generic intros, and CTA interruptions before the first useful answer" });
2406
+ }
2407
+ if (weak.length > 0) {
2408
+ findings.push({
2409
+ severity: "low",
2410
+ detail: `${weak.length} page(s) read as weakly task-focused`,
2411
+ fix: "Rewrite weak pages to lead with concrete answers, tradeoffs, and next steps instead of broad introductory filler"
2412
+ });
2413
+ }
2414
+ return { criterion: "helpful_purpose_alignment", criterion_label: "Helpful Purpose Alignment", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P1" };
2415
+ }
2416
+ function checkFirstHandExperienceSignals(data) {
2417
+ const findings = [];
2418
+ if (!data.homepage) {
2419
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
2420
+ return { criterion: "first_hand_experience_signals", criterion_label: "First-Hand Experience Signals", score: 0, status: "not_found", findings, fix_priority: "P2" };
2421
+ }
2422
+ const pageScores = scoreSampledPages(data, scoreFirstHandExperienceSignals);
2423
+ const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
2424
+ if (average >= 8) {
2425
+ findings.push({ severity: "info", detail: `${strong.length}/${total} pages show strong signs of direct use, testing, or observation` });
2426
+ } else if (average >= 5) {
2427
+ findings.push({ severity: "low", detail: `Moderate experiential depth across ${total} sampled pages`, fix: "Add real implementation details, limitations, screenshots, or direct observations where relevant" });
2428
+ } else {
2429
+ findings.push({ severity: "medium", detail: "Little first-hand experience is visible in sampled content", fix: "Add evidence of real use, testing, implementation, or lived experience instead of generic summaries" });
2430
+ }
2431
+ if (weak.length > 0) {
2432
+ findings.push({
2433
+ severity: "low",
2434
+ detail: `${weak.length} page(s) appear generic or second-hand`,
2435
+ fix: "Strengthen those pages with case details, lessons learned, or original evidence from practice"
2436
+ });
2437
+ }
2438
+ return { criterion: "first_hand_experience_signals", criterion_label: "First-Hand Experience Signals", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P2" };
2439
+ }
2440
+ function checkCreatorTransparency(data) {
2441
+ const findings = [];
2442
+ if (!data.homepage) {
2443
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
2444
+ return { criterion: "creator_transparency", criterion_label: "Creator Transparency", score: 0, status: "not_found", findings, fix_priority: "P2" };
2445
+ }
2446
+ const pageScores = scoreSampledPages(data, scoreCreatorTransparency);
2447
+ const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
2448
+ if (average >= 8) {
2449
+ findings.push({ severity: "info", detail: `${strong.length}/${total} sampled pages provide clear visible creator attribution` });
2450
+ } else if (average >= 5) {
2451
+ findings.push({ severity: "low", detail: "Visible authorship is present on some content but inconsistent", fix: "Add bylines, author links, and reviewer details on article-like pages where readers expect them" });
2452
+ } else {
2453
+ findings.push({ severity: "medium", detail: "Creator visibility is weak on content-like pages", fix: "Show clear bylines, author pages, and reviewer context rather than relying on schema alone" });
2454
+ }
2455
+ if (weak.length > 0) {
2456
+ findings.push({
2457
+ severity: "low",
2458
+ detail: `${weak.length} page(s) look article-like but expose little visible author context`,
2459
+ fix: "Add visible bylines, author bios, or reviewer attribution to those pages"
2460
+ });
2461
+ }
2462
+ return { criterion: "creator_transparency", criterion_label: "Creator Transparency", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P2" };
2463
+ }
2464
+ function checkMethodologyTransparency(data) {
2465
+ const findings = [];
2466
+ if (!data.homepage) {
2467
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
2468
+ return { criterion: "methodology_transparency", criterion_label: "Methodology Transparency", score: 0, status: "not_found", findings, fix_priority: "P2" };
2469
+ }
2470
+ const pageScores = scoreSampledPages(data, scoreMethodologyTransparency);
2471
+ const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
2472
+ if (average >= 8) {
2473
+ findings.push({ severity: "info", detail: `${strong.length}/${total} pages clearly explain how content was tested, researched, reviewed, or updated` });
2474
+ } else if (average >= 5) {
2475
+ findings.push({ severity: "low", detail: "Some process transparency exists, but it is inconsistent", fix: 'Add "how we tested", methodology, review process, or update disclosures on pages where users would expect them' });
2476
+ } else {
2477
+ findings.push({ severity: "medium", detail: "Little content-production or review transparency is visible", fix: "Explain how pages were researched, tested, or reviewed instead of presenting unsupported comparisons or conclusions" });
2478
+ }
2479
+ if (weak.length > 0) {
2480
+ findings.push({
2481
+ severity: "low",
2482
+ detail: `${weak.length} page(s) lack visible methodology or review context`,
2483
+ fix: "Add process detail such as sample size, criteria, tools used, review process, or update notes"
2484
+ });
2485
+ }
2486
+ return { criterion: "methodology_transparency", criterion_label: "Methodology Transparency", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P2" };
2487
+ }
2133
2488
  function checkCitationReadyWriting(data) {
2134
2489
  const findings = [];
2135
2490
  if (!data.homepage) {
@@ -2225,8 +2580,8 @@ function checkAnswerFirstPlacement(data) {
2225
2580
  const earlyParagraphs = bodyHtml.match(/<p[^>]*>([\s\S]*?)<\/p>/gi)?.slice(0, 5) || [];
2226
2581
  for (const p of earlyParagraphs) {
2227
2582
  const pText = p.replace(/<[^>]*>/g, "").trim();
2228
- const wordCount = pText.split(/\s+/).length;
2229
- if (wordCount >= 40 && wordCount <= 80 && first300Words.includes(pText.slice(0, 50))) {
2583
+ const wordCount2 = pText.split(/\s+/).length;
2584
+ if (wordCount2 >= 40 && wordCount2 <= 80 && first300Words.includes(pText.slice(0, 50))) {
2230
2585
  shortAnswerCount++;
2231
2586
  break;
2232
2587
  }
@@ -2556,56 +2911,8 @@ function checkImageContextAI(data) {
2556
2911
  }
2557
2912
  return { criterion: "image_context_ai", criterion_label: "Image Context for AI", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P2" };
2558
2913
  }
2559
- var BOILERPLATE_RE = /\b(sign up|subscribe|get started|contact us|request a demo|free trial|book a call|schedule a|learn more|click here|follow us|share this|copyright|all rights reserved|privacy policy|terms of service)\b/i;
2560
- function isBoilerplateParagraph(text) {
2561
- const words = text.split(/\s+/).length;
2562
- if (words < 20 && BOILERPLATE_RE.test(text)) return true;
2563
- if (/\b(cookie|gdpr|consent|opt.out)\b/i.test(text) && words < 30) return true;
2564
- return false;
2565
- }
2566
- function toShingles(text, n = 4) {
2567
- const words = text.split(/\s+/).filter((w) => w.length > 1);
2568
- const shingles = /* @__PURE__ */ new Set();
2569
- for (let i = 0; i <= words.length - n; i++) {
2570
- shingles.add(words.slice(i, i + n).join(" "));
2571
- }
2572
- return shingles;
2573
- }
2574
- function shingleSimilarity(a, b) {
2575
- if (a.size === 0 && b.size === 0) return 0;
2576
- let intersection = 0;
2577
- for (const s of a) {
2578
- if (b.has(s)) intersection++;
2579
- }
2580
- const union = a.size + b.size - intersection;
2581
- return union === 0 ? 0 : intersection / union;
2582
- }
2583
- function extractPageParagraphs(html) {
2584
- const cleaned = html.replace(/<(script|style|nav|header|footer|noscript)\b[^>]*>[\s\S]*?<\/\1>/gi, "").replace(/<aside\b[^>]*>[\s\S]*?<\/aside>/gi, "");
2585
- const pMatches = cleaned.match(/<p\b[^>]*>([\s\S]*?)<\/p>/gi) || [];
2586
- return pMatches.map((p) => {
2587
- const text = p.replace(/<[^>]*>/g, " ").replace(/&\w+;/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
2588
- return { text, shingles: toShingles(text) };
2589
- }).filter((p) => p.shingles.size >= 3 && !isBoilerplateParagraph(p.text));
2590
- }
2591
- function splitIntoSectionsWithParagraphs(html) {
2592
- const cleaned = html.replace(/<(script|style|nav|header|footer|noscript)\b[^>]*>[\s\S]*?<\/\1>/gi, "").replace(/<aside\b[^>]*>[\s\S]*?<\/aside>/gi, "");
2593
- const parts = cleaned.split(/(?=<h[23]\b[^>]*>)/i);
2594
- const sections = [];
2595
- for (const part of parts) {
2596
- const hMatch = part.match(/<h[23]\b[^>]*>([\s\S]*?)<\/h[23]>/i);
2597
- const heading = hMatch ? hMatch[1].replace(/<[^>]*>/g, "").trim() : "(intro)";
2598
- const pMatches = part.match(/<p\b[^>]*>([\s\S]*?)<\/p>/gi) || [];
2599
- const paragraphs = pMatches.map((p) => {
2600
- const text = p.replace(/<[^>]*>/g, " ").replace(/&\w+;/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
2601
- return { text, shingles: toShingles(text) };
2602
- }).filter((p) => p.shingles.size >= 3 && !isBoilerplateParagraph(p.text));
2603
- if (paragraphs.length > 0) sections.push({ heading, paragraphs });
2604
- }
2605
- return sections;
2606
- }
2607
2914
  function findIntraPageDuplicates(html) {
2608
- const sections = splitIntoSectionsWithParagraphs(html);
2915
+ const sections = extractDuplicateContentSections(html);
2609
2916
  if (sections.length < 2) return [];
2610
2917
  const pairs = [];
2611
2918
  for (let i = 0; i < sections.length; i++) {
@@ -2614,7 +2921,7 @@ function findIntraPageDuplicates(html) {
2614
2921
  for (const pA of sections[i].paragraphs) {
2615
2922
  if (found) break;
2616
2923
  for (const pB of sections[j].paragraphs) {
2617
- const sim = shingleSimilarity(pA.shingles, pB.shingles);
2924
+ const sim = shingleJaccardSimilarity(pA.shingles, pB.shingles);
2618
2925
  if (sim > 0.4) {
2619
2926
  pairs.push({
2620
2927
  headingA: sections[i].heading,
@@ -2694,11 +3001,11 @@ function checkCrossPageDuplication(data) {
2694
3001
  const findings = [];
2695
3002
  const pages = [];
2696
3003
  if (data.homepage) {
2697
- pages.push({ url: data.homepage.finalUrl || `https://${data.domain}/`, paragraphs: extractPageParagraphs(data.homepage.text) });
3004
+ pages.push({ url: data.homepage.finalUrl || `https://${data.domain}/`, paragraphs: extractDuplicateContentParagraphs(data.homepage.text) });
2698
3005
  }
2699
3006
  if (data.blogSample) {
2700
3007
  for (const page of data.blogSample) {
2701
- pages.push({ url: page.finalUrl || "", paragraphs: extractPageParagraphs(page.text) });
3008
+ pages.push({ url: page.finalUrl || "", paragraphs: extractDuplicateContentParagraphs(page.text) });
2702
3009
  }
2703
3010
  }
2704
3011
  if (pages.length <= 1) {
@@ -2730,7 +3037,7 @@ function checkCrossPageDuplication(data) {
2730
3037
  const fpA = [...pA.shingles].slice(0, 5).join("|");
2731
3038
  if (siteBoilerprints.has(fpA)) continue;
2732
3039
  for (const pB of pages[j].paragraphs) {
2733
- const sim = shingleSimilarity(pA.shingles, pB.shingles);
3040
+ const sim = shingleJaccardSimilarity(pA.shingles, pB.shingles);
2734
3041
  if (sim > 0.4) {
2735
3042
  dupCount++;
2736
3043
  if (!sample) sample = pA.text.slice(0, 80);
@@ -2816,14 +3123,19 @@ function auditSiteFromData(data) {
2816
3123
  checkVisibleDateSignal(data),
2817
3124
  topicCoherence,
2818
3125
  checkContentDepth(data, topicCoherence.score),
2819
- // V2 criteria (#29-#34)
3126
+ // Helpful-content criteria (#29-#32)
3127
+ checkHelpfulPurposeAlignment(data),
3128
+ checkFirstHandExperienceSignals(data),
3129
+ checkCreatorTransparency(data),
3130
+ checkMethodologyTransparency(data),
3131
+ // V2 criteria (#33-#38)
2820
3132
  checkCitationReadyWriting(data),
2821
3133
  checkAnswerFirstPlacement(data),
2822
3134
  checkEvidencePackaging(data),
2823
3135
  checkEntityDisambiguation(data),
2824
3136
  checkExtractionFriction(data),
2825
3137
  checkImageContextAI(data),
2826
- // V3 criteria (#35-#36)
3138
+ // V3 criteria (#39-#40)
2827
3139
  checkDuplicateContent(data),
2828
3140
  checkCrossPageDuplication(data)
2829
3141
  ];
@@ -2849,6 +3161,10 @@ var WEIGHTS = {
2849
3161
  // Relevance to actual AI queries
2850
3162
  faq_section: 0.03,
2851
3163
  // Structured Q&A pairs
3164
+ helpful_purpose_alignment: 0.03,
3165
+ // Visitor-helpful vs search-first framing
3166
+ first_hand_experience_signals: 0.03,
3167
+ // Evidence of real use or observation
2852
3168
  // ─── Content Organization (~30%) ──────────────────────────────────────────
2853
3169
  // HOW easily AI engines can extract and trust your content.
2854
3170
  entity_consistency: 0.05,
@@ -2863,9 +3179,13 @@ var WEIGHTS = {
2863
3179
  // Expert attribution
2864
3180
  table_list_extractability: 0.03,
2865
3181
  // Extractable structured data
2866
- definition_patterns: 0.02,
3182
+ creator_transparency: 0.02,
3183
+ // Visible author/reviewer clarity
3184
+ methodology_transparency: 0.02,
3185
+ // Process disclosure
3186
+ definition_patterns: 0.015,
2867
3187
  // Clear definitions
2868
- visible_date_signal: 0.02,
3188
+ visible_date_signal: 0.015,
2869
3189
  // Publication date trust
2870
3190
  semantic_html: 0.02,
2871
3191
  // Clean semantic structure
@@ -2874,15 +3194,15 @@ var WEIGHTS = {
2874
3194
  // ─── Technical Plumbing (~15%) ────────────────────────────────────────────
2875
3195
  // WHETHER AI crawlers can find you. Table stakes with diminishing returns.
2876
3196
  content_cannibalization: 0.02,
2877
- llms_txt: 0.02,
2878
- robots_txt: 0.02,
3197
+ llms_txt: 0.01,
3198
+ robots_txt: 0.01,
2879
3199
  content_velocity: 0.02,
2880
- content_licensing: 0.02,
3200
+ content_licensing: 0.01,
2881
3201
  sitemap_completeness: 0.01,
2882
- canonical_url: 0.01,
2883
- rss_feed: 0.01,
2884
- schema_coverage: 0.01,
2885
- speakable_schema: 0.01,
3202
+ canonical_url: 5e-3,
3203
+ rss_feed: 0,
3204
+ schema_coverage: 0,
3205
+ speakable_schema: 0,
2886
3206
  // ─── V2 Criteria (~15%) ───────────────────────────────────────────────────
2887
3207
  // Citation quality, evidence packaging, and extraction friction.
2888
3208
  citation_ready_writing: 0.04,
@@ -2895,7 +3215,7 @@ var WEIGHTS = {
2895
3215
  // Clear entity boundaries
2896
3216
  extraction_friction: 0.02,
2897
3217
  // Sentence length, voice, jargon
2898
- image_context_ai: 0.01,
3218
+ image_context_ai: 5e-3,
2899
3219
  // Figure/figcaption, alt text quality
2900
3220
  // ─── V3 Criteria ────────────────────────────────────────────────────────
2901
3221
  duplicate_content: 0.05,
@@ -2915,8 +3235,8 @@ function calculateOverallScore(criteria) {
2915
3235
  let score = Math.round(weightedSum / totalWeight);
2916
3236
  const coherence = criteria.find((c) => c.criterion === "topic_coherence");
2917
3237
  if (coherence && coherence.score < 6) {
2918
- const cap2 = 35 + coherence.score * 5;
2919
- score = Math.min(score, cap2);
3238
+ const cap3 = 35 + coherence.score * 5;
3239
+ score = Math.min(score, cap3);
2920
3240
  }
2921
3241
  return score;
2922
3242
  }
@@ -3015,6 +3335,8 @@ var PILLARS = {
3015
3335
  "citation_ready_writing",
3016
3336
  "answer_first_placement",
3017
3337
  "evidence_packaging",
3338
+ "helpful_purpose_alignment",
3339
+ "first_hand_experience_signals",
3018
3340
  "duplicate_content",
3019
3341
  "cross_page_duplication"
3020
3342
  ],
@@ -3032,7 +3354,9 @@ var PILLARS = {
3032
3354
  "internal_linking",
3033
3355
  "content_freshness",
3034
3356
  "author_schema_depth",
3035
- "schema_markup"
3357
+ "schema_markup",
3358
+ "creator_transparency",
3359
+ "methodology_transparency"
3036
3360
  ],
3037
3361
  "Technical Foundation": [
3038
3362
  "semantic_html",
@@ -3062,6 +3386,8 @@ var CLIENT_NAMES = {
3062
3386
  citation_ready_writing: "Citation-Ready Writing",
3063
3387
  answer_first_placement: "Answer-First Placement",
3064
3388
  evidence_packaging: "Evidence Packaging",
3389
+ helpful_purpose_alignment: "Helpful Purpose Alignment",
3390
+ first_hand_experience_signals: "First-Hand Experience Signals",
3065
3391
  direct_answer_density: "Direct Answer Density",
3066
3392
  qa_content_format: "Q&A Content Format",
3067
3393
  query_answer_alignment: "Query-Answer Alignment",
@@ -3074,6 +3400,8 @@ var CLIENT_NAMES = {
3074
3400
  content_freshness: "Content Freshness",
3075
3401
  author_schema_depth: "Author & Expert Schema",
3076
3402
  schema_markup: "Schema Markup",
3403
+ creator_transparency: "Creator Transparency",
3404
+ methodology_transparency: "Methodology Transparency",
3077
3405
  semantic_html: "Semantic HTML",
3078
3406
  clean_html: "Clean HTML",
3079
3407
  visible_date_signal: "Visible Date Signal",
@@ -3100,6 +3428,8 @@ var PILLAR_WEIGHTS = {
3100
3428
  citation_ready_writing: 0.04,
3101
3429
  answer_first_placement: 0.03,
3102
3430
  evidence_packaging: 0.03,
3431
+ helpful_purpose_alignment: 0.03,
3432
+ first_hand_experience_signals: 0.03,
3103
3433
  duplicate_content: 0.05,
3104
3434
  cross_page_duplication: 0.03,
3105
3435
  direct_answer_density: 0.05,
@@ -3107,28 +3437,30 @@ var PILLAR_WEIGHTS = {
3107
3437
  query_answer_alignment: 0.04,
3108
3438
  faq_section: 0.03,
3109
3439
  table_list_extractability: 0.03,
3110
- definition_patterns: 0.02,
3440
+ definition_patterns: 0.015,
3111
3441
  entity_disambiguation: 0.02,
3112
3442
  entity_consistency: 0.05,
3113
3443
  internal_linking: 0.04,
3114
3444
  content_freshness: 0.04,
3115
3445
  author_schema_depth: 0.03,
3116
3446
  schema_markup: 0.03,
3447
+ creator_transparency: 0.02,
3448
+ methodology_transparency: 0.02,
3117
3449
  semantic_html: 0.02,
3118
3450
  clean_html: 0.02,
3119
- visible_date_signal: 0.02,
3451
+ visible_date_signal: 0.015,
3120
3452
  extraction_friction: 0.02,
3121
- image_context_ai: 0.01,
3122
- schema_coverage: 0.01,
3123
- speakable_schema: 0.01,
3453
+ image_context_ai: 5e-3,
3454
+ schema_coverage: 0,
3455
+ speakable_schema: 0,
3124
3456
  content_cannibalization: 0.02,
3125
- llms_txt: 0.02,
3126
- robots_txt: 0.02,
3457
+ llms_txt: 0.01,
3458
+ robots_txt: 0.01,
3127
3459
  content_velocity: 0.02,
3128
- content_licensing: 0.02,
3129
- canonical_url: 0.01,
3460
+ content_licensing: 0.01,
3461
+ canonical_url: 5e-3,
3130
3462
  sitemap_completeness: 0.01,
3131
- rss_feed: 0.01
3463
+ rss_feed: 0
3132
3464
  };
3133
3465
  var CRITERION_EFFORT = {
3134
3466
  topic_coherence: "High",
@@ -3138,6 +3470,8 @@ var CRITERION_EFFORT = {
3138
3470
  citation_ready_writing: "Medium",
3139
3471
  answer_first_placement: "Medium",
3140
3472
  evidence_packaging: "Medium",
3473
+ helpful_purpose_alignment: "Medium",
3474
+ first_hand_experience_signals: "Medium",
3141
3475
  duplicate_content: "Medium",
3142
3476
  cross_page_duplication: "Medium",
3143
3477
  direct_answer_density: "Medium",
@@ -3152,6 +3486,8 @@ var CRITERION_EFFORT = {
3152
3486
  content_freshness: "Low",
3153
3487
  author_schema_depth: "Low",
3154
3488
  schema_markup: "Medium",
3489
+ creator_transparency: "Low",
3490
+ methodology_transparency: "Low",
3155
3491
  semantic_html: "Low",
3156
3492
  clean_html: "Medium",
3157
3493
  visible_date_signal: "Low",
@@ -3176,6 +3512,8 @@ var FIX_DESCRIPTIONS = {
3176
3512
  citation_ready_writing: "Write self-contained definition sentences and one-claim statements AI can quote directly.",
3177
3513
  answer_first_placement: "Place a 40-80 word answer block in the first 300 words of each page.",
3178
3514
  evidence_packaging: "Add inline citations, attribution phrases, and a sources section to key pages.",
3515
+ helpful_purpose_alignment: "Lead with useful, task-solving guidance instead of search-first filler.",
3516
+ first_hand_experience_signals: "Add concrete signs of first-hand use, testing, implementation, or observation.",
3179
3517
  direct_answer_density: "Write concise 2-3 sentence answer paragraphs after each question heading.",
3180
3518
  qa_content_format: "Add question-based H2/H3 headings matching common AI queries.",
3181
3519
  query_answer_alignment: "Ensure every question heading is followed by a direct answer paragraph.",
@@ -3188,6 +3526,8 @@ var FIX_DESCRIPTIONS = {
3188
3526
  content_freshness: "Add dateModified schema and visible last-updated dates.",
3189
3527
  author_schema_depth: "Add Person schema for authors with credentials and sameAs links.",
3190
3528
  schema_markup: "Implement JSON-LD structured data on key pages.",
3529
+ creator_transparency: "Show clear visible bylines, author pages, and reviewer details where readers expect them.",
3530
+ methodology_transparency: "Explain how content was tested, researched, reviewed, or updated.",
3191
3531
  semantic_html: "Use semantic HTML5 elements (main, article, nav, header, footer).",
3192
3532
  clean_html: "Fix HTML structure, add meta tags, and ensure HTTPS.",
3193
3533
  visible_date_signal: "Display dates using <time> elements and add datePublished to JSON-LD.",
@@ -3287,6 +3627,10 @@ var CRITERION_LABELS = {
3287
3627
  "Visible Date Signal": "Visible Date Signal",
3288
3628
  "Topic Coherence": "Topic Coherence",
3289
3629
  "Content Depth": "Content Depth",
3630
+ "Helpful Purpose Alignment": "Helpful Purpose Alignment",
3631
+ "First-Hand Experience Signals": "First-Hand Experience Signals",
3632
+ "Creator Transparency": "Creator Transparency",
3633
+ "Methodology Transparency": "Methodology Transparency",
3290
3634
  "Citation-Ready Writing Quality": "Citation-Ready Writing Quality",
3291
3635
  "Answer-First Placement": "Answer-First Placement",
3292
3636
  "Evidence Packaging": "Evidence Packaging",
@@ -3392,6 +3736,8 @@ var CRITERION_WEIGHTS = {
3392
3736
  qa_content_format: 0.04,
3393
3737
  query_answer_alignment: 0.04,
3394
3738
  faq_section: 0.03,
3739
+ helpful_purpose_alignment: 0.03,
3740
+ first_hand_experience_signals: 0.03,
3395
3741
  // Content Organization (~30%)
3396
3742
  entity_consistency: 0.05,
3397
3743
  internal_linking: 0.04,
@@ -3399,28 +3745,30 @@ var CRITERION_WEIGHTS = {
3399
3745
  schema_markup: 0.03,
3400
3746
  author_schema_depth: 0.03,
3401
3747
  table_list_extractability: 0.03,
3402
- definition_patterns: 0.02,
3403
- visible_date_signal: 0.02,
3748
+ creator_transparency: 0.02,
3749
+ methodology_transparency: 0.02,
3750
+ definition_patterns: 0.015,
3751
+ visible_date_signal: 0.015,
3404
3752
  semantic_html: 0.02,
3405
3753
  clean_html: 0.02,
3406
3754
  // Technical Plumbing (~15%)
3407
3755
  content_cannibalization: 0.02,
3408
- llms_txt: 0.02,
3409
- robots_txt: 0.02,
3756
+ llms_txt: 0.01,
3757
+ robots_txt: 0.01,
3410
3758
  content_velocity: 0.02,
3411
- content_licensing: 0.02,
3759
+ content_licensing: 0.01,
3412
3760
  sitemap_completeness: 0.01,
3413
- canonical_url: 0.01,
3414
- rss_feed: 0.01,
3415
- schema_coverage: 0.01,
3416
- speakable_schema: 0.01,
3761
+ canonical_url: 5e-3,
3762
+ rss_feed: 0,
3763
+ schema_coverage: 0,
3764
+ speakable_schema: 0,
3417
3765
  // V2 Criteria (~15%)
3418
3766
  citation_ready_writing: 0.04,
3419
3767
  answer_first_placement: 0.03,
3420
3768
  evidence_packaging: 0.03,
3421
3769
  entity_disambiguation: 0.02,
3422
3770
  extraction_friction: 0.02,
3423
- image_context_ai: 0.01,
3771
+ image_context_ai: 5e-3,
3424
3772
  // V3 Criteria
3425
3773
  duplicate_content: 0.05,
3426
3774
  cross_page_duplication: 0.03
@@ -3461,6 +3809,16 @@ var OPPORTUNITY_TEMPLATES = {
3461
3809
  effort: "Medium",
3462
3810
  description: "Create a dedicated FAQ page with FAQPage schema markup. Cover common questions about your products, services, and industry to become a direct answer source for AI engines."
3463
3811
  },
3812
+ helpful_purpose_alignment: {
3813
+ name: "Improve Helpful Purpose Alignment",
3814
+ effort: "Medium",
3815
+ description: "Rewrite pages to solve the visitor task quickly and concretely. Reduce generic intros, search-first filler, and CTA interruptions before the first useful answer."
3816
+ },
3817
+ first_hand_experience_signals: {
3818
+ name: "Add First-Hand Experience Signals",
3819
+ effort: "Medium",
3820
+ description: "Show direct use, testing, implementation, or lived experience with concrete observations, examples, screenshots, and lessons learned."
3821
+ },
3464
3822
  original_data: {
3465
3823
  name: "Add Original Data & Case Studies",
3466
3824
  effort: "High",
@@ -3516,6 +3874,16 @@ var OPPORTUNITY_TEMPLATES = {
3516
3874
  effort: "Low",
3517
3875
  description: "Add Person schema for content authors with credentials, expertise, and sameAs links. Expert attribution strengthens E-E-A-T signals that AI engines use to evaluate source credibility."
3518
3876
  },
3877
+ creator_transparency: {
3878
+ name: "Improve Creator Transparency",
3879
+ effort: "Low",
3880
+ description: "Add visible bylines, author pages, and reviewer/editor details so readers can clearly tell who created the content and why they are credible."
3881
+ },
3882
+ methodology_transparency: {
3883
+ name: "Add Methodology Transparency",
3884
+ effort: "Low",
3885
+ description: "Explain how pages were tested, researched, reviewed, or updated. Add methodology, criteria, sample-size, or review-process details where users expect them."
3886
+ },
3519
3887
  fact_density: {
3520
3888
  name: "Increase Fact & Data Density",
3521
3889
  effort: "Medium",
@@ -3971,19 +4339,23 @@ var PAGE_CRITERIA = {
3971
4339
  content_freshness: { weight: 0.04, label: "Content Freshness Signals" },
3972
4340
  schema_markup: { weight: 0.03, label: "Schema.org Structured Data" },
3973
4341
  table_list_extractability: { weight: 0.03, label: "Table & List Extractability" },
3974
- definition_patterns: { weight: 0.02, label: "Definition Patterns" },
3975
- visible_date_signal: { weight: 0.02, label: "Visible Date Signal" },
4342
+ definition_patterns: { weight: 0.015, label: "Definition Patterns" },
4343
+ visible_date_signal: { weight: 0.015, label: "Visible Date Signal" },
3976
4344
  semantic_html: { weight: 0.02, label: "Semantic HTML5 & Accessibility" },
3977
4345
  clean_html: { weight: 0.02, label: "Clean, Crawlable HTML" },
3978
4346
  // Technical Plumbing
3979
- canonical_url: { weight: 0.01, label: "Canonical URL Strategy" },
4347
+ canonical_url: { weight: 5e-3, label: "Canonical URL Strategy" },
3980
4348
  // V2 Criteria
3981
4349
  citation_ready_writing: { weight: 0.04, label: "Citation-Ready Writing Quality" },
3982
4350
  answer_first_placement: { weight: 0.03, label: "Answer-First Placement" },
3983
4351
  evidence_packaging: { weight: 0.03, label: "Evidence Packaging" },
4352
+ helpful_purpose_alignment: { weight: 0.03, label: "Helpful Purpose Alignment" },
4353
+ first_hand_experience_signals: { weight: 0.03, label: "First-Hand Experience Signals" },
3984
4354
  entity_disambiguation: { weight: 0.02, label: "Entity Disambiguation" },
3985
4355
  extraction_friction: { weight: 0.02, label: "Extraction Friction Score" },
3986
- image_context_ai: { weight: 0.01, label: "Image Context for AI" },
4356
+ creator_transparency: { weight: 0.02, label: "Creator Transparency" },
4357
+ methodology_transparency: { weight: 0.02, label: "Methodology Transparency" },
4358
+ image_context_ai: { weight: 5e-3, label: "Image Context for AI" },
3987
4359
  duplicate_content: { weight: 0.05, label: "Duplicate Content Blocks" }
3988
4360
  };
3989
4361
  function extractJsonLdBlocks(html) {
@@ -4006,7 +4378,7 @@ function extractTypesFromJsonLd(blocks) {
4006
4378
  }
4007
4379
  return types;
4008
4380
  }
4009
- function getTextContent(html) {
4381
+ function getTextContent2(html) {
4010
4382
  return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
4011
4383
  }
4012
4384
  function extractQuestionHeadings2(html) {
@@ -4034,7 +4406,7 @@ function countAnsweredQuestions(html) {
4034
4406
  }
4035
4407
  return { total: questions.length, answered };
4036
4408
  }
4037
- function cap(value, max) {
4409
+ function cap2(value, max) {
4038
4410
  return Math.min(value, max);
4039
4411
  }
4040
4412
  function scoreSchemaMarkup(html) {
@@ -4060,10 +4432,10 @@ function scoreSchemaMarkup(html) {
4060
4432
  for (const t of types) {
4061
4433
  if (knownTypes.includes(t)) knownCount++;
4062
4434
  }
4063
- score += cap(knownCount * 2, 4);
4435
+ score += cap2(knownCount * 2, 4);
4064
4436
  if (types.has("Organization") || types.has("LocalBusiness")) score += 2;
4065
4437
  if (types.has("FAQPage")) score += 1;
4066
- return cap(score, 10);
4438
+ return cap2(score, 10);
4067
4439
  }
4068
4440
  function scoreQAFormat(html) {
4069
4441
  const questions = extractQuestionHeadings2(html);
@@ -4075,7 +4447,7 @@ function scoreQAFormat(html) {
4075
4447
  if (answered >= 1) score += 3;
4076
4448
  const h1Matches = html.match(/<h1[\s>]/gi) || [];
4077
4449
  if (h1Matches.length === 1) score += 2;
4078
- return cap(score, 10);
4450
+ return cap2(score, 10);
4079
4451
  }
4080
4452
  function scoreCleanHtml(html) {
4081
4453
  let score = 0;
@@ -4084,15 +4456,15 @@ function scoreCleanHtml(html) {
4084
4456
  for (const tag of semantics) {
4085
4457
  if (html.toLowerCase().includes(tag)) semCount++;
4086
4458
  }
4087
- score += cap(semCount, 3);
4459
+ score += cap2(semCount, 3);
4088
4460
  const h1Matches = html.match(/<h1[\s>]/gi) || [];
4089
4461
  if (h1Matches.length === 1) score += 2;
4090
- const text = getTextContent(html);
4462
+ const text = getTextContent2(html);
4091
4463
  if (text.length > 500) score += 3;
4092
4464
  const hasTitle = /<title[^>]*>[^<]+<\/title>/i.test(html);
4093
4465
  const hasDesc = /<meta\s[^>]*name=["']description["'][^>]*content=["'][^"']+["']/i.test(html) || /<meta\s[^>]*content=["'][^"']+["'][^>]*name=["']description["']/i.test(html);
4094
4466
  if (hasTitle && hasDesc) score += 2;
4095
- return cap(score, 10);
4467
+ return cap2(score, 10);
4096
4468
  }
4097
4469
  function scoreFaqSection(html) {
4098
4470
  let score = 0;
@@ -4104,11 +4476,11 @@ function scoreFaqSection(html) {
4104
4476
  const questions = extractQuestionHeadings2(html);
4105
4477
  if (questions.length >= 10) score += 1;
4106
4478
  if (/<details[\s>]/i.test(html) || /accordion|collapsible|toggle/i.test(lowerHtml)) score += 1;
4107
- return cap(score, 10);
4479
+ return cap2(score, 10);
4108
4480
  }
4109
4481
  function scoreOriginalData(html) {
4110
4482
  let score = 0;
4111
- const text = getTextContent(html);
4483
+ const text = getTextContent2(html);
4112
4484
  if (/\b(our (study|analysis|research|survey|data|findings))\b/i.test(text)) {
4113
4485
  score += 3;
4114
4486
  } else if (/\d+(\.\d+)?%|\$[\d,.]+|\b\d{1,3}(,\d{3})+\b/.test(text)) {
@@ -4125,7 +4497,7 @@ function scoreOriginalData(html) {
4125
4497
  if (/href=["'][^"']*\/blog\b/i.test(html)) {
4126
4498
  score += 2;
4127
4499
  }
4128
- return cap(score, 10);
4500
+ return cap2(score, 10);
4129
4501
  }
4130
4502
  function scoreQueryAnswerAlignment(html) {
4131
4503
  const { total, answered } = countAnsweredQuestions(html);
@@ -4148,7 +4520,7 @@ function scoreContentFreshness(html) {
4148
4520
  const currentYear = (/* @__PURE__ */ new Date()).getFullYear();
4149
4521
  const yearPattern = new RegExp(`\\b(${currentYear}|${currentYear - 1})\\b`);
4150
4522
  if (yearPattern.test(html)) score += 2;
4151
- return cap(score, 10);
4523
+ return cap2(score, 10);
4152
4524
  }
4153
4525
  function scoreTableListExtractability(html) {
4154
4526
  let score = 0;
@@ -4161,7 +4533,7 @@ function scoreTableListExtractability(html) {
4161
4533
  const listItems = html.match(/<li[\s>]/gi) || [];
4162
4534
  if (listItems.length >= 10) score += 1;
4163
4535
  if (/<dl[\s>]/i.test(html)) score += 1;
4164
- return cap(score, 10);
4536
+ return cap2(score, 10);
4165
4537
  }
4166
4538
  function scoreDirectAnswerDensity(html) {
4167
4539
  let score = 0;
@@ -4177,9 +4549,9 @@ function scoreDirectAnswerDensity(html) {
4177
4549
  }
4178
4550
  if (snippetCount >= 3) score += 2;
4179
4551
  else if (snippetCount >= 1) score += 1;
4180
- const directOpeners = getTextContent(html).match(/\b(yes|no|in short|the answer is|simply put|in summary)\b/gi) || [];
4552
+ const directOpeners = getTextContent2(html).match(/\b(yes|no|in short|the answer is|simply put|in summary)\b/gi) || [];
4181
4553
  if (directOpeners.length >= 2) score += 2;
4182
- return cap(score, 10);
4554
+ return cap2(score, 10);
4183
4555
  }
4184
4556
  function scoreSemanticHtml(html) {
4185
4557
  let score = 0;
@@ -4189,7 +4561,7 @@ function scoreSemanticHtml(html) {
4189
4561
  for (const el of elements) {
4190
4562
  if (lowerHtml.includes(el)) count++;
4191
4563
  }
4192
- score += cap(Math.floor(count * 0.7), 4);
4564
+ score += cap2(Math.floor(count * 0.7), 4);
4193
4565
  const imgTags = html.match(/<img\s[^>]*>/gi) || [];
4194
4566
  if (imgTags.length > 0) {
4195
4567
  let withAlt = 0;
@@ -4200,11 +4572,11 @@ function scoreSemanticHtml(html) {
4200
4572
  }
4201
4573
  if (/<html[^>]*\slang=["'][^"']+["']/i.test(html)) score += 2;
4202
4574
  if (/\baria-/i.test(html)) score += 2;
4203
- return cap(score, 10);
4575
+ return cap2(score, 10);
4204
4576
  }
4205
4577
  function scoreFactDensity(html) {
4206
4578
  let score = 0;
4207
- const text = getTextContent(html);
4579
+ const text = getTextContent2(html);
4208
4580
  const numericPatterns = text.match(/\d+(\.\d+)?%|\$[\d,.]+|\b\d{1,3}(,\d{3})+\b|\b\d+\s*(million|billion|thousand|users|customers|employees)\b/gi) || [];
4209
4581
  if (numericPatterns.length >= 6) score += 5;
4210
4582
  else if (numericPatterns.length >= 3) score += 3;
@@ -4217,11 +4589,11 @@ function scoreFactDensity(html) {
4217
4589
  if (/\baccording to\b|\bsource:\s|\bcited\b|\breported by\b/i.test(text)) score += 2;
4218
4590
  const units = text.match(/\b\d+\s*(kg|lb|miles|km|hours|minutes|days|months|years|GB|MB|TB)\b/gi) || [];
4219
4591
  if (units.length >= 2) score += 1;
4220
- return cap(score, 10);
4592
+ return cap2(score, 10);
4221
4593
  }
4222
4594
  function scoreDefinitionPatterns(html) {
4223
4595
  let score = 0;
4224
- const text = getTextContent(html);
4596
+ const text = getTextContent2(html);
4225
4597
  const defPatterns = text.match(/\b(is a|is an|refers to|defined as|means that|also known as|abbreviated as)\b/gi) || [];
4226
4598
  if (defPatterns.length >= 3) score += 5;
4227
4599
  else if (defPatterns.length >= 1) score += 3;
@@ -4229,7 +4601,7 @@ function scoreDefinitionPatterns(html) {
4229
4601
  if (/\b(is a|is an|refers to|defined as)\b/i.test(early)) score += 2;
4230
4602
  if (/<dfn[\s>]/i.test(html) || /<abbr[\s>]/i.test(html)) score += 1;
4231
4603
  if (/<dl[\s>]/i.test(html) || /glossary/i.test(html)) score += 2;
4232
- return cap(score, 10);
4604
+ return cap2(score, 10);
4233
4605
  }
4234
4606
  function scoreCanonicalUrl(html, url) {
4235
4607
  let score = 0;
@@ -4250,7 +4622,7 @@ function scoreCanonicalUrl(html, url) {
4250
4622
  if (canonicalHref.startsWith("https://")) score += 2;
4251
4623
  const allCanonicals = html.match(/<link[^>]*rel=["']canonical["'][^>]*>/gi) || [];
4252
4624
  if (allCanonicals.length === 1) score += 1;
4253
- return cap(score, 10);
4625
+ return cap2(score, 10);
4254
4626
  }
4255
4627
  function scoreVisibleDateSignal(html) {
4256
4628
  let score = 0;
@@ -4269,11 +4641,11 @@ function scoreVisibleDateSignal(html) {
4269
4641
  } catch {
4270
4642
  }
4271
4643
  }
4272
- return cap(score, 10);
4644
+ return cap2(score, 10);
4273
4645
  }
4274
4646
  function scoreCitationReadyWriting(html) {
4275
4647
  let score = 0;
4276
- const text = getTextContent(html);
4648
+ const text = getTextContent2(html);
4277
4649
  const defSentences = text.match(/\b\w+\s+(is\s+(?:a|an)\s|refers\s+to|defined\s+as)\b/gi) || [];
4278
4650
  if (defSentences.length >= 3) score += 3;
4279
4651
  else if (defSentences.length >= 1) score += 1;
@@ -4302,7 +4674,7 @@ function scoreCitationReadyWriting(html) {
4302
4674
  );
4303
4675
  if (quotableLines.length >= 2) score += 2;
4304
4676
  else if (quotableLines.length >= 1) score += 1;
4305
- return cap(score, 10);
4677
+ return cap2(score, 10);
4306
4678
  }
4307
4679
  function scoreAnswerFirstPlacement(html) {
4308
4680
  let score = 0;
@@ -4313,8 +4685,8 @@ function scoreAnswerFirstPlacement(html) {
4313
4685
  const first300Words = bodyText.split(/\s+/).slice(0, 300).join(" ");
4314
4686
  for (const p of earlyParagraphs) {
4315
4687
  const pText = p.replace(/<[^>]*>/g, "").trim();
4316
- const wordCount = pText.split(/\s+/).length;
4317
- if (wordCount >= 40 && wordCount <= 80 && first300Words.includes(pText.slice(0, 50))) {
4688
+ const wordCount2 = pText.split(/\s+/).length;
4689
+ if (wordCount2 >= 40 && wordCount2 <= 80 && first300Words.includes(pText.slice(0, 50))) {
4318
4690
  score += 4;
4319
4691
  break;
4320
4692
  }
@@ -4335,11 +4707,11 @@ function scoreAnswerFirstPlacement(html) {
4335
4707
  score += 3;
4336
4708
  }
4337
4709
  }
4338
- return cap(score, 10);
4710
+ return cap2(score, 10);
4339
4711
  }
4340
4712
  function scoreEvidencePackaging(html) {
4341
4713
  let score = 0;
4342
- const text = getTextContent(html);
4714
+ const text = getTextContent2(html);
4343
4715
  const paragraphs = html.match(/<p[^>]*>[\s\S]*?<\/p>/gi) || [];
4344
4716
  let inlineCitations = 0;
4345
4717
  for (const p of paragraphs) {
@@ -4357,11 +4729,11 @@ function scoreEvidencePackaging(html) {
4357
4729
  const sourcedStats = text.match(/\d+(\.\d+)?(%|\s*(million|billion|thousand|percent))\b[^.]*\b[A-Z][a-z]+\b/gi) || [];
4358
4730
  if (sourcedStats.length >= 2) score += 2;
4359
4731
  else if (sourcedStats.length >= 1) score += 1;
4360
- return cap(score, 10);
4732
+ return cap2(score, 10);
4361
4733
  }
4362
4734
  function scoreEntityDisambiguation(html) {
4363
4735
  let score = 0;
4364
- const text = getTextContent(html);
4736
+ const text = getTextContent2(html);
4365
4737
  const h1Match = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
4366
4738
  if (!h1Match) return 3;
4367
4739
  const h1Text = h1Match[1].replace(/<[^>]*>/g, "").trim();
@@ -4379,11 +4751,11 @@ function scoreEntityDisambiguation(html) {
4379
4751
  if (/\bunlike\s+\w/i.test(text) || /\bcompared\s+to\s+\w/i.test(text) || /\bnot\s+to\s+be\s+confused\s+with\b/i.test(text)) {
4380
4752
  score += 3;
4381
4753
  }
4382
- return cap(score, 10);
4754
+ return cap2(score, 10);
4383
4755
  }
4384
4756
  function scoreExtractionFriction(html) {
4385
4757
  let score = 0;
4386
- const text = getTextContent(html);
4758
+ const text = getTextContent2(html);
4387
4759
  const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 5);
4388
4760
  const avgLen = sentences.length > 0 ? sentences.map((s) => s.trim().split(/\s+/).length).reduce((a, b) => a + b, 0) / sentences.length : 0;
4389
4761
  if (avgLen > 0 && avgLen < 20) score += 3;
@@ -4406,7 +4778,7 @@ function scoreExtractionFriction(html) {
4406
4778
  if (/display\s*:\s*none|visibility\s*:\s*hidden/i.test(html) && /<(div|section|p)[^>]*(?:style=["'][^"']*display\s*:\s*none|hidden)[^>]*>/i.test(html)) {
4407
4779
  score = Math.max(0, score - 2);
4408
4780
  }
4409
- return cap(score, 10);
4781
+ return cap2(score, 10);
4410
4782
  }
4411
4783
  function scoreImageContextAI(html) {
4412
4784
  let score = 0;
@@ -4431,20 +4803,13 @@ function scoreImageContextAI(html) {
4431
4803
  else if (goodAltCount > 0) score += 1;
4432
4804
  const contextualImages = html.match(/<(article|section)[^>]*>[\s\S]*?<img[^>]*>[\s\S]*?<\/\1>/gi) || [];
4433
4805
  if (contextualImages.length > 0) score += 3;
4434
- return cap(score, 10);
4435
- }
4436
- var BOILERPLATE_PATTERNS = /\b(sign up|subscribe|get started|contact us|request a demo|free trial|book a call|schedule a|learn more|click here|follow us|share this|copyright|all rights reserved|privacy policy|terms of service)\b/i;
4437
- function isBoilerplate(text) {
4438
- const words = text.split(/\s+/).length;
4439
- if (words < 20 && BOILERPLATE_PATTERNS.test(text)) return true;
4440
- if (/\b(cookie|gdpr|consent|opt.out)\b/i.test(text) && words < 30) return true;
4441
- return false;
4806
+ return cap2(score, 10);
4442
4807
  }
4443
4808
  function scoreDuplicateContent(html) {
4444
4809
  return scoreDuplicateContentDetailed(html).score;
4445
4810
  }
4446
4811
  function scoreDuplicateContentDetailed(html) {
4447
- const sections = extractSectionsWithParagraphs(html);
4812
+ const sections = extractDuplicateContentSections(html);
4448
4813
  if (sections.length < 2) return { score: 10, duplicates: [] };
4449
4814
  const totalParagraphs = sections.reduce((sum, s) => sum + s.paragraphs.length, 0);
4450
4815
  const duplicates = [];
@@ -4453,7 +4818,7 @@ function scoreDuplicateContentDetailed(html) {
4453
4818
  for (let j = i + 1; j < sections.length; j++) {
4454
4819
  for (const pA of sections[i].paragraphs) {
4455
4820
  for (const pB of sections[j].paragraphs) {
4456
- const sim = shingleJaccard(pA.shingles, pB.shingles);
4821
+ const sim = shingleJaccardSimilarity(pA.shingles, pB.shingles);
4457
4822
  if (sim > 0.4) {
4458
4823
  dupParagraphCount++;
4459
4824
  duplicates.push({
@@ -4482,41 +4847,6 @@ function scoreDuplicateContentDetailed(html) {
4482
4847
  }
4483
4848
  return { score, duplicates };
4484
4849
  }
4485
- function extractSectionsWithParagraphs(html) {
4486
- const cleaned = html.replace(/<(script|style|nav|header|footer|noscript)\b[^>]*>[\s\S]*?<\/\1>/gi, "").replace(/<aside\b[^>]*>[\s\S]*?<\/aside>/gi, "");
4487
- const parts = cleaned.split(/(?=<h[23]\b[^>]*>)/i);
4488
- const sections = [];
4489
- for (const part of parts) {
4490
- const headingMatch = part.match(/<h[23]\b[^>]*>([\s\S]*?)<\/h[23]>/i);
4491
- const heading = headingMatch ? headingMatch[1].replace(/<[^>]*>/g, "").trim() : "(intro)";
4492
- const pMatches = part.match(/<p\b[^>]*>([\s\S]*?)<\/p>/gi) || [];
4493
- const paragraphs = pMatches.map((p) => {
4494
- const text = p.replace(/<[^>]*>/g, " ").replace(/&\w+;/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
4495
- return { text, shingles: buildShingles(text, 4) };
4496
- }).filter((p) => p.shingles.size >= 3 && !isBoilerplate(p.text));
4497
- if (paragraphs.length > 0) {
4498
- sections.push({ heading, paragraphs });
4499
- }
4500
- }
4501
- return sections;
4502
- }
4503
- function buildShingles(text, n) {
4504
- const words = text.split(/\s+/).filter((w) => w.length > 1);
4505
- const shingles = /* @__PURE__ */ new Set();
4506
- for (let i = 0; i <= words.length - n; i++) {
4507
- shingles.add(words.slice(i, i + n).join(" "));
4508
- }
4509
- return shingles;
4510
- }
4511
- function shingleJaccard(a, b) {
4512
- if (a.size === 0 && b.size === 0) return 0;
4513
- let intersection = 0;
4514
- for (const s of a) {
4515
- if (b.has(s)) intersection++;
4516
- }
4517
- const union = a.size + b.size - intersection;
4518
- return union === 0 ? 0 : intersection / union;
4519
- }
4520
4850
  var SCORING_FUNCTIONS = {
4521
4851
  schema_markup: scoreSchemaMarkup,
4522
4852
  qa_content_format: scoreQAFormat,
@@ -4535,8 +4865,12 @@ var SCORING_FUNCTIONS = {
4535
4865
  citation_ready_writing: scoreCitationReadyWriting,
4536
4866
  answer_first_placement: scoreAnswerFirstPlacement,
4537
4867
  evidence_packaging: scoreEvidencePackaging,
4868
+ helpful_purpose_alignment: scoreHelpfulPurposeAlignment,
4869
+ first_hand_experience_signals: scoreFirstHandExperienceSignals,
4538
4870
  entity_disambiguation: scoreEntityDisambiguation,
4539
4871
  extraction_friction: scoreExtractionFriction,
4872
+ creator_transparency: scoreCreatorTransparency,
4873
+ methodology_transparency: scoreMethodologyTransparency,
4540
4874
  image_context_ai: scoreImageContextAI,
4541
4875
  duplicate_content: scoreDuplicateContent
4542
4876
  };
@@ -4567,7 +4901,7 @@ function extractTitle(html) {
4567
4901
  const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
4568
4902
  return match ? match[1].replace(/\s+/g, " ").trim() : "";
4569
4903
  }
4570
- function getTextContent2(html) {
4904
+ function getTextContent3(html) {
4571
4905
  return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
4572
4906
  }
4573
4907
  function countWords2(text) {
@@ -4627,9 +4961,9 @@ function checkMissingOgTags(html) {
4627
4961
  }
4628
4962
  return null;
4629
4963
  }
4630
- function checkThinContent(wordCount) {
4631
- if (wordCount < 300) {
4632
- return { check: "thin-content", label: `Thin content (${wordCount} words)`, severity: "warning" };
4964
+ function checkThinContent(wordCount2) {
4965
+ if (wordCount2 < 300) {
4966
+ return { check: "thin-content", label: `Thin content (${wordCount2} words)`, severity: "warning" };
4633
4967
  }
4634
4968
  return null;
4635
4969
  }
@@ -4726,15 +5060,15 @@ function checkNoAnswerBlock(html) {
4726
5060
  const first300Words = bodyText.split(/\s+/).slice(0, 300).join(" ");
4727
5061
  for (const p of earlyParagraphs) {
4728
5062
  const pText = p.replace(/<[^>]*>/g, "").trim();
4729
- const wordCount = pText.split(/\s+/).length;
4730
- if (wordCount >= 40 && wordCount <= 80 && first300Words.includes(pText.slice(0, 50))) {
5063
+ const wordCount2 = pText.split(/\s+/).length;
5064
+ if (wordCount2 >= 40 && wordCount2 <= 80 && first300Words.includes(pText.slice(0, 50))) {
4731
5065
  return null;
4732
5066
  }
4733
5067
  }
4734
5068
  return { check: "no-answer-block", label: "No short answer block (40-80 words) in first 300 words", severity: "warning" };
4735
5069
  }
4736
5070
  function checkNoEvidence(html, url) {
4737
- const text = getTextContent2(html);
5071
+ const text = getTextContent3(html);
4738
5072
  const paragraphs = html.match(/<p[^>]*>[\s\S]*?<\/p>/gi) || [];
4739
5073
  let inlineCitations = 0;
4740
5074
  for (const p of paragraphs) {
@@ -4748,7 +5082,7 @@ function checkNoEvidence(html, url) {
4748
5082
  return null;
4749
5083
  }
4750
5084
  function checkHasCitationReadyContent(html) {
4751
- const text = getTextContent2(html);
5085
+ const text = getTextContent3(html);
4752
5086
  const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 10);
4753
5087
  let quotable = 0;
4754
5088
  for (const s of sentences) {
@@ -4773,8 +5107,8 @@ function checkDuplicateContentBlocks(html) {
4773
5107
  }
4774
5108
  function analyzePage(html, url, category) {
4775
5109
  const title = extractTitle(html);
4776
- const textContent = getTextContent2(html);
4777
- const wordCount = countWords2(textContent);
5110
+ const textContent = getTextContent3(html);
5111
+ const wordCount2 = countWords2(textContent);
4778
5112
  const issues = [];
4779
5113
  const strengths = [];
4780
5114
  const issueChecks = [
@@ -4785,7 +5119,7 @@ function analyzePage(html, url, category) {
4785
5119
  checkNoSchema(html),
4786
5120
  checkMissingCanonical(html),
4787
5121
  checkMissingOgTags(html),
4788
- checkThinContent(wordCount),
5122
+ checkThinContent(wordCount2),
4789
5123
  checkImagesMissingAlt(html),
4790
5124
  checkNoInternalLinks(html, url),
4791
5125
  checkNoAnswerBlock(html),
@@ -4804,7 +5138,7 @@ function analyzePage(html, url, category) {
4804
5138
  if (result) strengths.push(result);
4805
5139
  }
4806
5140
  const { aeoScore, criterionScores } = scorePage(html, url);
4807
- return { url, title, category, wordCount, issues, strengths, aeoScore, criterionScores };
5141
+ return { url, title, category, wordCount: wordCount2, issues, strengths, aeoScore, criterionScores };
4808
5142
  }
4809
5143
  function analyzeAllPages(siteData) {
4810
5144
  const reviews = [];