aeorank 3.1.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -462,6 +462,244 @@ function detectParkedDomain(bodySnippet) {
462
462
  return { isParked: false };
463
463
  }
464
464
 
465
+ // src/duplicate-content.ts
466
+ var BOILERPLATE_PATTERNS = /\b(sign up|subscribe|get started|contact us|request a demo|free trial|book a call|schedule a|learn more|click here|follow us|share this|copyright|all rights reserved|privacy policy|terms of service)\b/i;
467
+ var MIN_SUBSTANTIVE_WORDS = 15;
468
+ var MAX_METADATA_WORDS = 24;
469
+ var MAX_METADATA_LABEL_WORDS = 4;
470
+ function normalizeParagraphText(htmlFragment) {
471
+ return htmlFragment.replace(/<[^>]*>/g, " ").replace(/&\w+;/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
472
+ }
473
+ function tokenize(text) {
474
+ return text.split(/\s+/).map((word) => word.replace(/^[^a-z0-9]+|[^a-z0-9]+$/gi, "")).filter((word) => word.length > 0);
475
+ }
476
+ function isBoilerplateParagraph(text, words) {
477
+ if (words < 20 && BOILERPLATE_PATTERNS.test(text)) return true;
478
+ if (/\b(cookie|gdpr|consent|opt.out)\b/i.test(text) && words < 30) return true;
479
+ return false;
480
+ }
481
+ function isMetadataParagraph(text, words) {
482
+ const labelMatch = text.match(/^([^:]{1,60}):\s+/);
483
+ if (!labelMatch) return false;
484
+ const labelWords = tokenize(labelMatch[1]).length;
485
+ return labelWords > 0 && labelWords <= MAX_METADATA_LABEL_WORDS && words <= MAX_METADATA_WORDS;
486
+ }
487
+ function buildShinglesFromTokens(words, n = 4) {
488
+ const shingles = /* @__PURE__ */ new Set();
489
+ for (let i = 0; i <= words.length - n; i++) {
490
+ shingles.add(words.slice(i, i + n).join(" "));
491
+ }
492
+ return shingles;
493
+ }
494
+ function createParagraph(htmlFragment) {
495
+ const text = normalizeParagraphText(htmlFragment);
496
+ const words = tokenize(text);
497
+ if (words.length < MIN_SUBSTANTIVE_WORDS) return null;
498
+ if (isBoilerplateParagraph(text, words.length)) return null;
499
+ if (isMetadataParagraph(text, words.length)) return null;
500
+ const shingles = buildShinglesFromTokens(words);
501
+ if (shingles.size < 3) return null;
502
+ return { text, shingles };
503
+ }
504
+ function stripNonContentHtml(html) {
505
+ return html.replace(/<(script|style|nav|header|footer|noscript)\b[^>]*>[\s\S]*?<\/\1>/gi, "").replace(/<aside\b[^>]*>[\s\S]*?<\/aside>/gi, "");
506
+ }
507
+ function extractDuplicateContentParagraphs(html) {
508
+ const cleaned = stripNonContentHtml(html);
509
+ const matches = cleaned.match(/<p\b[^>]*>([\s\S]*?)<\/p>/gi) || [];
510
+ return matches.map(createParagraph).filter((paragraph) => paragraph !== null);
511
+ }
512
+ function extractDuplicateContentSections(html) {
513
+ const cleaned = stripNonContentHtml(html);
514
+ const parts = cleaned.split(/(?=<h[23]\b[^>]*>)/i);
515
+ const sections = [];
516
+ for (const part of parts) {
517
+ const headingMatch = part.match(/<h[23]\b[^>]*>([\s\S]*?)<\/h[23]>/i);
518
+ const heading = headingMatch ? headingMatch[1].replace(/<[^>]*>/g, "").trim() : "(intro)";
519
+ const paragraphs = (part.match(/<p\b[^>]*>([\s\S]*?)<\/p>/gi) || []).map(createParagraph).filter((paragraph) => paragraph !== null);
520
+ if (paragraphs.length > 0) sections.push({ heading, paragraphs });
521
+ }
522
+ return sections;
523
+ }
524
+ function shingleJaccardSimilarity(a, b) {
525
+ if (a.size === 0 && b.size === 0) return 0;
526
+ let intersection = 0;
527
+ for (const shingle of a) {
528
+ if (b.has(shingle)) intersection++;
529
+ }
530
+ const union = a.size + b.size - intersection;
531
+ return union === 0 ? 0 : intersection / union;
532
+ }
533
+
534
+ // src/helpful-content.ts
535
+ function cap(value, max) {
536
+ return Math.min(max, value);
537
+ }
538
+ function floor(value, min) {
539
+ return Math.max(min, value);
540
+ }
541
+ function countMatches(text, pattern) {
542
+ return text.match(pattern)?.length ?? 0;
543
+ }
544
+ function stripScriptsAndStyles(html) {
545
+ return html.replace(/<script[\s\S]*?<\/script>/gi, " ").replace(/<style[\s\S]*?<\/style>/gi, " ");
546
+ }
547
+ function getTextContent(html) {
548
+ return stripScriptsAndStyles(html).replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
549
+ }
550
+ function getBodyHtml(html) {
551
+ const bodyMatch = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
552
+ return bodyMatch ? bodyMatch[1] : html;
553
+ }
554
+ function getFirstParagraphText(html) {
555
+ const firstPara = getBodyHtml(html).match(/<p[^>]*>([\s\S]*?)<\/p>/i);
556
+ return firstPara ? firstPara[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() : "";
557
+ }
558
+ function firstNWords(text, count) {
559
+ return text.split(/\s+/).slice(0, count).join(" ");
560
+ }
561
+ function getH1Text(html) {
562
+ const match = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
563
+ return match ? match[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() : "";
564
+ }
565
+ function getTitleText(html) {
566
+ const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
567
+ return match ? match[1].replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim() : "";
568
+ }
569
+ function wordCount(text) {
570
+ return text ? text.split(/\s+/).filter(Boolean).length : 0;
571
+ }
572
+ function isContentLikePage(html, url) {
573
+ const text = getTextContent(html);
574
+ const wc = wordCount(text);
575
+ let signals = 0;
576
+ if (url && /\/(?:blog|article|articles|guide|guides|docs|learn|help|news|insights|resources|how-to|tutorial|case-stud|whitepaper|faq)\b/i.test(url)) {
577
+ signals += 2;
578
+ }
579
+ if (/<article[\s>]/i.test(html)) signals += 1;
580
+ if ((html.match(/<h[2-3][^>]*>/gi) || []).length >= 2) signals += 1;
581
+ if (wc >= 500) signals += 1;
582
+ if (/<time[\s>]/i.test(html) || /datePublished|dateModified/i.test(html)) signals += 1;
583
+ if (/written\s+by|authored?\s+by|reviewed\s+by|medically\s+reviewed/i.test(text)) signals += 1;
584
+ return signals >= 2;
585
+ }
586
+ function expectsMethodology(html, url) {
587
+ const text = getTextContent(html);
588
+ const title = `${getTitleText(html)} ${getH1Text(html)}`.toLowerCase();
589
+ const urlText = (url || "").toLowerCase();
590
+ if (/(?:review|compare|comparison|vs\.?|best|benchmark|study|analysis|survey|report|research|tested|test|methodology)/i.test(title)) {
591
+ return true;
592
+ }
593
+ if (/(?:review|compare|comparison|benchmark|study|analysis|survey|research|report|best)/i.test(urlText)) {
594
+ return true;
595
+ }
596
+ return /\b(methodology|how we tested|how we reviewed|our testing|sample size|dataset|benchmark|editorial policy|review process)\b/i.test(text);
597
+ }
598
+ function titleAndBodyAlign(html) {
599
+ const h1 = getH1Text(html);
600
+ const title = getTitleText(html);
601
+ const text = firstNWords(getTextContent(html), 250).toLowerCase();
602
+ const topic = `${title} ${h1}`.toLowerCase();
603
+ const keywords = topic.split(/[\s|:()\-/]+/).filter((w) => w.length >= 5 && !/^(about|guide|complete|ultimate|best|learn|understand|what|when|where|which|their|there|these|those)$/i.test(w));
604
+ const uniqueKeywords = [...new Set(keywords)];
605
+ if (uniqueKeywords.length === 0) return false;
606
+ return uniqueKeywords.filter((w) => text.includes(w)).length >= Math.min(2, uniqueKeywords.length);
607
+ }
608
+ var GENERIC_OPENERS = /^(?:in today'?s|it is important to understand|in this (?:article|guide|post)|whether you are|have you ever|welcome to|if you'?re looking|in the modern|in the digital age)/i;
609
+ var PRACTICAL_LANGUAGE = /\b(?:here'?s what to do|choose\s+\w+\s+if|avoid\s+\w+\s+when|the main risk is|the fastest option is|next step(?:s)?|best option|lowest risk|good fit if|not a good fit if|what to do next)\b/gi;
610
+ var TRADEOFF_LANGUAGE = /\b(?:however|but|trade-?off|caveat|limitation|downside|upside|risk|benefit|pros?\b|cons?\b|watch out|be careful|unless)\b/gi;
611
+ var FLUFF_LANGUAGE = /\b(?:comprehensive guide|ever-evolving|fast-paced world|unlock(?:ing)? the power|in conclusion|delve into|navigate the landscape|journey|seamless|robust solution)\b/gi;
612
+ var EARLY_CTA_PATTERN = /\b(?:sign up|get started|book (?:a )?demo|contact us|schedule (?:a )?call|buy now|start free|request a quote|talk to sales|subscribe)\b/gi;
613
+ var FIRST_HAND_ACTIONS = /\b(?:we|i)\s+(?:tested|used|reviewed|implemented|measured|compared|observed|deployed|migrated|surveyed|analyzed|audited|interviewed)\b/gi;
614
+ var EXPERIENCE_CONTEXT = /\b(?:in our|during our|for our|in practice|in production|in the field|in our clinic|during implementation|over \d+\s+(?:days?|weeks?|months?)|across \d+\s+(?:accounts?|customers?|patients?|sites?|campaigns?)|with \d+\s+(?:participants?|users?|patients?|samples?))\b/gi;
615
+ var EXPERIENCE_ARTIFACTS = /\b(?:screenshot|photo|benchmark|timeline|before\/after|our results|original chart|field note|walkthrough)\b/gi;
616
+ var LIMITATION_LANGUAGE = /\b(?:limitation|limitations|we found that|we learned|one caveat|did not work|failed|issue we hit|edge case|unexpectedly|drawback)\b/gi;
617
+ var AUTHOR_LINK_PATTERN = /<a[^>]*href=["'][^"']*\/(?:author|authors|team|people|staff|reviewers?)\b[^"']*["'][^>]*>/i;
618
+ var BYLINE_PATTERN = /\b(?:written|authored|reviewed|edited|medically reviewed)\s+by\b/i;
619
+ var AUTHOR_BIO_PATTERN = /\b(?:about the author|author bio|reviewed by|medically reviewed by|board-certified|licensed|credentials?|specializes in|specialist in)\b/i;
620
+ var METHODOLOGY_TERMS = /\b(?:methodology|how we tested|how we reviewed|our methodology|testing process|review process|editorial policy|sample size|data collection|timeframe|criteria used|tools used|benchmark method|updated using|last reviewed|ai-assisted|ai assisted)\b/gi;
621
+ var METHODOLOGY_DETAIL = /\b(?:sample size|participants?|dataset|timeframe|criteria|weights?|tool(?:s)? used|measured over|tested over|reviewed on|last updated|last reviewed|scoring method)\b/gi;
622
+ function scoreHelpfulPurposeAlignment(html, url) {
623
+ const text = getTextContent(html);
624
+ if (!text) return 0;
625
+ const contentLike = isContentLikePage(html, url);
626
+ if (!contentLike && wordCount(text) < 250) return 5;
627
+ let score = contentLike ? 3 : 5;
628
+ const firstPara = getFirstParagraphText(html);
629
+ const earlyText = firstNWords(text, 300);
630
+ const bodyHtml = getBodyHtml(html);
631
+ if (firstPara && !GENERIC_OPENERS.test(firstPara)) score += 2;
632
+ if (countMatches(earlyText, PRACTICAL_LANGUAGE) >= 1) score += 2;
633
+ else if (countMatches(text, PRACTICAL_LANGUAGE) >= 2) score += 1;
634
+ const tradeoffCount = countMatches(text, TRADEOFF_LANGUAGE);
635
+ if (tradeoffCount >= 2) score += 2;
636
+ else if (tradeoffCount >= 1) score += 1;
637
+ if (titleAndBodyAlign(html)) score += 1;
638
+ if (/\b(?:bottom line|key takeaway|here'?s the short answer|next steps?)\b/i.test(text)) score += 1;
639
+ if (firstPara && GENERIC_OPENERS.test(firstPara)) score -= 2;
640
+ const earlyBodyHtml = bodyHtml.slice(0, 1800);
641
+ const earlyCtas = countMatches(earlyBodyHtml, EARLY_CTA_PATTERN);
642
+ if (earlyCtas >= 3) score -= 2;
643
+ else if (earlyCtas >= 2) score -= 1;
644
+ const fluffCount = countMatches(text, FLUFF_LANGUAGE);
645
+ if (fluffCount >= 3) score -= 2;
646
+ else if (fluffCount >= 1) score -= 1;
647
+ return floor(cap(score, 10), 0);
648
+ }
649
+ function scoreFirstHandExperienceSignals(html, url) {
650
+ const text = getTextContent(html);
651
+ if (!text) return 0;
652
+ const contentLike = isContentLikePage(html, url);
653
+ let score = contentLike ? 2 : 5;
654
+ const actionCount = countMatches(text, FIRST_HAND_ACTIONS);
655
+ if (actionCount >= 3) score += 4;
656
+ else if (actionCount >= 1) score += 2;
657
+ const contextCount = countMatches(text, EXPERIENCE_CONTEXT);
658
+ if (contextCount >= 2) score += 2;
659
+ else if (contextCount >= 1) score += 1;
660
+ const artifactCount = countMatches(text, EXPERIENCE_ARTIFACTS) + countMatches(html, /<figure|<figcaption/gi);
661
+ if (artifactCount >= 3) score += 2;
662
+ else if (artifactCount >= 1) score += 1;
663
+ const limitationCount = countMatches(text, LIMITATION_LANGUAGE);
664
+ if (limitationCount >= 2) score += 2;
665
+ else if (limitationCount >= 1) score += 1;
666
+ if (/\b(?:manufacturer|vendor)\s+(?:description|specification|copy)\b/i.test(text)) score -= 1;
667
+ return floor(cap(score, 10), 0);
668
+ }
669
+ function scoreCreatorTransparency(html, url) {
670
+ const text = getTextContent(html);
671
+ if (!text) return 0;
672
+ const contentLike = isContentLikePage(html, url);
673
+ if (!contentLike) return 5;
674
+ let score = 0;
675
+ const hasByline = BYLINE_PATTERN.test(text) || /class=["'][^"']*author[^"']*["']/i.test(html) || /rel=["']author["']/i.test(html);
676
+ const hasPersonSchema = /"@type"\s*:\s*"Person"/i.test(html);
677
+ if (hasByline) score += 3;
678
+ if (AUTHOR_LINK_PATTERN.test(html)) score += 2;
679
+ if (AUTHOR_BIO_PATTERN.test(text)) score += 2;
680
+ if (/\b(?:reviewed by|edited by|medically reviewed by)\b/i.test(text)) score += 1;
681
+ if (hasPersonSchema) score += 2;
682
+ return floor(cap(score, 10), 0);
683
+ }
684
+ function scoreMethodologyTransparency(html, url) {
685
+ const text = getTextContent(html);
686
+ if (!text) return 0;
687
+ const contentLike = isContentLikePage(html, url);
688
+ const expected = expectsMethodology(html, url);
689
+ let score = expected ? 2 : contentLike ? 5 : 5;
690
+ const methodologyCount = countMatches(text, METHODOLOGY_TERMS);
691
+ if (methodologyCount >= 2) score += 3;
692
+ else if (methodologyCount >= 1) score += 2;
693
+ const detailCount = countMatches(text, METHODOLOGY_DETAIL);
694
+ if (detailCount >= 3) score += 3;
695
+ else if (detailCount >= 2) score += 2;
696
+ else if (detailCount >= 1) score += 1;
697
+ if (/\b(?:tested|reviewed|analyzed)\s+\d+|\bacross\s+\d+|\bover\s+\d+\s+(?:days?|weeks?|months?)|\busing\s+\d+\s+\w+/i.test(text)) score += 1;
698
+ if (/<figure|<table/i.test(html) && methodologyCount >= 1) score += 1;
699
+ if (/\b(?:ai-assisted|ai assisted|reviewed by an editor|human reviewed)\b/i.test(text)) score += 1;
700
+ return floor(cap(score, 10), 0);
701
+ }
702
+
465
703
  // src/site-crawler.ts
466
704
  async function fetchText(url) {
467
705
  try {
@@ -1367,8 +1605,8 @@ function checkDirectAnswerDensity(data) {
1367
1605
  const paragraphs = html.match(/<p[^>]*>([\s\S]*?)<\/p>/gi) || [];
1368
1606
  const snippetZoneParagraphs = paragraphs.filter((p) => {
1369
1607
  const text2 = p.replace(/<[^>]*>/g, "").trim();
1370
- const wordCount = text2.split(/\s+/).length;
1371
- return wordCount >= 40 && wordCount <= 150;
1608
+ const wordCount2 = text2.split(/\s+/).length;
1609
+ return wordCount2 >= 40 && wordCount2 <= 150;
1372
1610
  });
1373
1611
  if (snippetZoneParagraphs.length >= 3) {
1374
1612
  score += 2;
@@ -2516,6 +2754,123 @@ function checkContentDepth(data, topicCoherenceScore) {
2516
2754
  }
2517
2755
  return { criterion: "content_depth", criterion_label: "Content Depth", score: finalScore, status: finalScore >= 7 ? "pass" : finalScore >= 4 ? "partial" : "fail", findings, fix_priority: finalScore >= 7 ? "P3" : "P1" };
2518
2756
  }
2757
+ function scoreSampledPages(data, scorer) {
2758
+ const pages = [];
2759
+ if (data.homepage) {
2760
+ const url = data.homepage.finalUrl || (data.protocol ? `${data.protocol}://${data.domain}/` : `https://${data.domain}/`);
2761
+ pages.push({ url, score: scorer(data.homepage.text, url) });
2762
+ }
2763
+ if (data.blogSample) {
2764
+ for (const page of data.blogSample) {
2765
+ const url = page.finalUrl || (data.protocol ? `${data.protocol}://${data.domain}/` : `https://${data.domain}/`);
2766
+ pages.push({ url, score: scorer(page.text, url) });
2767
+ }
2768
+ }
2769
+ return pages;
2770
+ }
2771
+ function summarizeHelpfulScores(pageScores) {
2772
+ const total = pageScores.length;
2773
+ const average = total > 0 ? Math.round(pageScores.reduce((sum, p) => sum + p.score, 0) / total) : 0;
2774
+ const strong = pageScores.filter((p) => p.score >= 8);
2775
+ const weak = pageScores.filter((p) => p.score <= 4);
2776
+ return { total, average, strong, weak };
2777
+ }
2778
+ function checkHelpfulPurposeAlignment(data) {
2779
+ const findings = [];
2780
+ if (!data.homepage) {
2781
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
2782
+ return { criterion: "helpful_purpose_alignment", criterion_label: "Helpful Purpose Alignment", score: 0, status: "not_found", findings, fix_priority: "P1" };
2783
+ }
2784
+ const pageScores = scoreSampledPages(data, scoreHelpfulPurposeAlignment);
2785
+ const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
2786
+ if (average >= 8) {
2787
+ findings.push({ severity: "info", detail: `${strong.length}/${total} pages strongly prioritize visitor task completion over filler` });
2788
+ } else if (average >= 5) {
2789
+ findings.push({ severity: "low", detail: `${strong.length}/${total} pages clearly lead with useful guidance`, fix: "Tighten intros, reduce generic filler, and make pages solve the promised user task faster" });
2790
+ } else {
2791
+ findings.push({ severity: "medium", detail: `Average helpful-purpose score is ${average}/10 across ${total} sampled pages`, fix: "Reduce search-first framing, generic intros, and CTA interruptions before the first useful answer" });
2792
+ }
2793
+ if (weak.length > 0) {
2794
+ findings.push({
2795
+ severity: "low",
2796
+ detail: `${weak.length} page(s) read as weakly task-focused`,
2797
+ fix: "Rewrite weak pages to lead with concrete answers, tradeoffs, and next steps instead of broad introductory filler"
2798
+ });
2799
+ }
2800
+ return { criterion: "helpful_purpose_alignment", criterion_label: "Helpful Purpose Alignment", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P1" };
2801
+ }
2802
+ function checkFirstHandExperienceSignals(data) {
2803
+ const findings = [];
2804
+ if (!data.homepage) {
2805
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
2806
+ return { criterion: "first_hand_experience_signals", criterion_label: "First-Hand Experience Signals", score: 0, status: "not_found", findings, fix_priority: "P2" };
2807
+ }
2808
+ const pageScores = scoreSampledPages(data, scoreFirstHandExperienceSignals);
2809
+ const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
2810
+ if (average >= 8) {
2811
+ findings.push({ severity: "info", detail: `${strong.length}/${total} pages show strong signs of direct use, testing, or observation` });
2812
+ } else if (average >= 5) {
2813
+ findings.push({ severity: "low", detail: `Moderate experiential depth across ${total} sampled pages`, fix: "Add real implementation details, limitations, screenshots, or direct observations where relevant" });
2814
+ } else {
2815
+ findings.push({ severity: "medium", detail: "Little first-hand experience is visible in sampled content", fix: "Add evidence of real use, testing, implementation, or lived experience instead of generic summaries" });
2816
+ }
2817
+ if (weak.length > 0) {
2818
+ findings.push({
2819
+ severity: "low",
2820
+ detail: `${weak.length} page(s) appear generic or second-hand`,
2821
+ fix: "Strengthen those pages with case details, lessons learned, or original evidence from practice"
2822
+ });
2823
+ }
2824
+ return { criterion: "first_hand_experience_signals", criterion_label: "First-Hand Experience Signals", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P2" };
2825
+ }
2826
+ function checkCreatorTransparency(data) {
2827
+ const findings = [];
2828
+ if (!data.homepage) {
2829
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
2830
+ return { criterion: "creator_transparency", criterion_label: "Creator Transparency", score: 0, status: "not_found", findings, fix_priority: "P2" };
2831
+ }
2832
+ const pageScores = scoreSampledPages(data, scoreCreatorTransparency);
2833
+ const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
2834
+ if (average >= 8) {
2835
+ findings.push({ severity: "info", detail: `${strong.length}/${total} sampled pages provide clear visible creator attribution` });
2836
+ } else if (average >= 5) {
2837
+ findings.push({ severity: "low", detail: "Visible authorship is present on some content but inconsistent", fix: "Add bylines, author links, and reviewer details on article-like pages where readers expect them" });
2838
+ } else {
2839
+ findings.push({ severity: "medium", detail: "Creator visibility is weak on content-like pages", fix: "Show clear bylines, author pages, and reviewer context rather than relying on schema alone" });
2840
+ }
2841
+ if (weak.length > 0) {
2842
+ findings.push({
2843
+ severity: "low",
2844
+ detail: `${weak.length} page(s) look article-like but expose little visible author context`,
2845
+ fix: "Add visible bylines, author bios, or reviewer attribution to those pages"
2846
+ });
2847
+ }
2848
+ return { criterion: "creator_transparency", criterion_label: "Creator Transparency", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P2" };
2849
+ }
2850
+ function checkMethodologyTransparency(data) {
2851
+ const findings = [];
2852
+ if (!data.homepage) {
2853
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
2854
+ return { criterion: "methodology_transparency", criterion_label: "Methodology Transparency", score: 0, status: "not_found", findings, fix_priority: "P2" };
2855
+ }
2856
+ const pageScores = scoreSampledPages(data, scoreMethodologyTransparency);
2857
+ const { total, average, strong, weak } = summarizeHelpfulScores(pageScores);
2858
+ if (average >= 8) {
2859
+ findings.push({ severity: "info", detail: `${strong.length}/${total} pages clearly explain how content was tested, researched, reviewed, or updated` });
2860
+ } else if (average >= 5) {
2861
+ findings.push({ severity: "low", detail: "Some process transparency exists, but it is inconsistent", fix: 'Add "how we tested", methodology, review process, or update disclosures on pages where users would expect them' });
2862
+ } else {
2863
+ findings.push({ severity: "medium", detail: "Little content-production or review transparency is visible", fix: "Explain how pages were researched, tested, or reviewed instead of presenting unsupported comparisons or conclusions" });
2864
+ }
2865
+ if (weak.length > 0) {
2866
+ findings.push({
2867
+ severity: "low",
2868
+ detail: `${weak.length} page(s) lack visible methodology or review context`,
2869
+ fix: "Add process detail such as sample size, criteria, tools used, review process, or update notes"
2870
+ });
2871
+ }
2872
+ return { criterion: "methodology_transparency", criterion_label: "Methodology Transparency", score: average, status: average >= 7 ? "pass" : average >= 4 ? "partial" : "fail", findings, fix_priority: average >= 7 ? "P3" : "P2" };
2873
+ }
2519
2874
  function checkCitationReadyWriting(data) {
2520
2875
  const findings = [];
2521
2876
  if (!data.homepage) {
@@ -2611,8 +2966,8 @@ function checkAnswerFirstPlacement(data) {
2611
2966
  const earlyParagraphs = bodyHtml.match(/<p[^>]*>([\s\S]*?)<\/p>/gi)?.slice(0, 5) || [];
2612
2967
  for (const p of earlyParagraphs) {
2613
2968
  const pText = p.replace(/<[^>]*>/g, "").trim();
2614
- const wordCount = pText.split(/\s+/).length;
2615
- if (wordCount >= 40 && wordCount <= 80 && first300Words.includes(pText.slice(0, 50))) {
2969
+ const wordCount2 = pText.split(/\s+/).length;
2970
+ if (wordCount2 >= 40 && wordCount2 <= 80 && first300Words.includes(pText.slice(0, 50))) {
2616
2971
  shortAnswerCount++;
2617
2972
  break;
2618
2973
  }
@@ -2942,56 +3297,8 @@ function checkImageContextAI(data) {
2942
3297
  }
2943
3298
  return { criterion: "image_context_ai", criterion_label: "Image Context for AI", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P2" };
2944
3299
  }
2945
- var BOILERPLATE_RE = /\b(sign up|subscribe|get started|contact us|request a demo|free trial|book a call|schedule a|learn more|click here|follow us|share this|copyright|all rights reserved|privacy policy|terms of service)\b/i;
2946
- function isBoilerplateParagraph(text) {
2947
- const words = text.split(/\s+/).length;
2948
- if (words < 20 && BOILERPLATE_RE.test(text)) return true;
2949
- if (/\b(cookie|gdpr|consent|opt.out)\b/i.test(text) && words < 30) return true;
2950
- return false;
2951
- }
2952
- function toShingles(text, n = 4) {
2953
- const words = text.split(/\s+/).filter((w) => w.length > 1);
2954
- const shingles = /* @__PURE__ */ new Set();
2955
- for (let i = 0; i <= words.length - n; i++) {
2956
- shingles.add(words.slice(i, i + n).join(" "));
2957
- }
2958
- return shingles;
2959
- }
2960
- function shingleSimilarity(a, b) {
2961
- if (a.size === 0 && b.size === 0) return 0;
2962
- let intersection = 0;
2963
- for (const s of a) {
2964
- if (b.has(s)) intersection++;
2965
- }
2966
- const union = a.size + b.size - intersection;
2967
- return union === 0 ? 0 : intersection / union;
2968
- }
2969
- function extractPageParagraphs(html) {
2970
- const cleaned = html.replace(/<(script|style|nav|header|footer|noscript)\b[^>]*>[\s\S]*?<\/\1>/gi, "").replace(/<aside\b[^>]*>[\s\S]*?<\/aside>/gi, "");
2971
- const pMatches = cleaned.match(/<p\b[^>]*>([\s\S]*?)<\/p>/gi) || [];
2972
- return pMatches.map((p) => {
2973
- const text = p.replace(/<[^>]*>/g, " ").replace(/&\w+;/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
2974
- return { text, shingles: toShingles(text) };
2975
- }).filter((p) => p.shingles.size >= 3 && !isBoilerplateParagraph(p.text));
2976
- }
2977
- function splitIntoSectionsWithParagraphs(html) {
2978
- const cleaned = html.replace(/<(script|style|nav|header|footer|noscript)\b[^>]*>[\s\S]*?<\/\1>/gi, "").replace(/<aside\b[^>]*>[\s\S]*?<\/aside>/gi, "");
2979
- const parts = cleaned.split(/(?=<h[23]\b[^>]*>)/i);
2980
- const sections = [];
2981
- for (const part of parts) {
2982
- const hMatch = part.match(/<h[23]\b[^>]*>([\s\S]*?)<\/h[23]>/i);
2983
- const heading = hMatch ? hMatch[1].replace(/<[^>]*>/g, "").trim() : "(intro)";
2984
- const pMatches = part.match(/<p\b[^>]*>([\s\S]*?)<\/p>/gi) || [];
2985
- const paragraphs = pMatches.map((p) => {
2986
- const text = p.replace(/<[^>]*>/g, " ").replace(/&\w+;/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
2987
- return { text, shingles: toShingles(text) };
2988
- }).filter((p) => p.shingles.size >= 3 && !isBoilerplateParagraph(p.text));
2989
- if (paragraphs.length > 0) sections.push({ heading, paragraphs });
2990
- }
2991
- return sections;
2992
- }
2993
3300
  function findIntraPageDuplicates(html) {
2994
- const sections = splitIntoSectionsWithParagraphs(html);
3301
+ const sections = extractDuplicateContentSections(html);
2995
3302
  if (sections.length < 2) return [];
2996
3303
  const pairs = [];
2997
3304
  for (let i = 0; i < sections.length; i++) {
@@ -3000,7 +3307,7 @@ function findIntraPageDuplicates(html) {
3000
3307
  for (const pA of sections[i].paragraphs) {
3001
3308
  if (found) break;
3002
3309
  for (const pB of sections[j].paragraphs) {
3003
- const sim = shingleSimilarity(pA.shingles, pB.shingles);
3310
+ const sim = shingleJaccardSimilarity(pA.shingles, pB.shingles);
3004
3311
  if (sim > 0.4) {
3005
3312
  pairs.push({
3006
3313
  headingA: sections[i].heading,
@@ -3080,11 +3387,11 @@ function checkCrossPageDuplication(data) {
3080
3387
  const findings = [];
3081
3388
  const pages = [];
3082
3389
  if (data.homepage) {
3083
- pages.push({ url: data.homepage.finalUrl || `https://${data.domain}/`, paragraphs: extractPageParagraphs(data.homepage.text) });
3390
+ pages.push({ url: data.homepage.finalUrl || `https://${data.domain}/`, paragraphs: extractDuplicateContentParagraphs(data.homepage.text) });
3084
3391
  }
3085
3392
  if (data.blogSample) {
3086
3393
  for (const page of data.blogSample) {
3087
- pages.push({ url: page.finalUrl || "", paragraphs: extractPageParagraphs(page.text) });
3394
+ pages.push({ url: page.finalUrl || "", paragraphs: extractDuplicateContentParagraphs(page.text) });
3088
3395
  }
3089
3396
  }
3090
3397
  if (pages.length <= 1) {
@@ -3116,7 +3423,7 @@ function checkCrossPageDuplication(data) {
3116
3423
  const fpA = [...pA.shingles].slice(0, 5).join("|");
3117
3424
  if (siteBoilerprints.has(fpA)) continue;
3118
3425
  for (const pB of pages[j].paragraphs) {
3119
- const sim = shingleSimilarity(pA.shingles, pB.shingles);
3426
+ const sim = shingleJaccardSimilarity(pA.shingles, pB.shingles);
3120
3427
  if (sim > 0.4) {
3121
3428
  dupCount++;
3122
3429
  if (!sample) sample = pA.text.slice(0, 80);
@@ -3202,14 +3509,19 @@ function auditSiteFromData(data) {
3202
3509
  checkVisibleDateSignal(data),
3203
3510
  topicCoherence,
3204
3511
  checkContentDepth(data, topicCoherence.score),
3205
- // V2 criteria (#29-#34)
3512
+ // Helpful-content criteria (#29-#32)
3513
+ checkHelpfulPurposeAlignment(data),
3514
+ checkFirstHandExperienceSignals(data),
3515
+ checkCreatorTransparency(data),
3516
+ checkMethodologyTransparency(data),
3517
+ // V2 criteria (#33-#38)
3206
3518
  checkCitationReadyWriting(data),
3207
3519
  checkAnswerFirstPlacement(data),
3208
3520
  checkEvidencePackaging(data),
3209
3521
  checkEntityDisambiguation(data),
3210
3522
  checkExtractionFriction(data),
3211
3523
  checkImageContextAI(data),
3212
- // V3 criteria (#35-#36)
3524
+ // V3 criteria (#39-#40)
3213
3525
  checkDuplicateContent(data),
3214
3526
  checkCrossPageDuplication(data)
3215
3527
  ];
@@ -3241,6 +3553,10 @@ var WEIGHTS = {
3241
3553
  // Relevance to actual AI queries
3242
3554
  faq_section: 0.03,
3243
3555
  // Structured Q&A pairs
3556
+ helpful_purpose_alignment: 0.03,
3557
+ // Visitor-helpful vs search-first framing
3558
+ first_hand_experience_signals: 0.03,
3559
+ // Evidence of real use or observation
3244
3560
  // ─── Content Organization (~30%) ──────────────────────────────────────────
3245
3561
  // HOW easily AI engines can extract and trust your content.
3246
3562
  entity_consistency: 0.05,
@@ -3255,9 +3571,13 @@ var WEIGHTS = {
3255
3571
  // Expert attribution
3256
3572
  table_list_extractability: 0.03,
3257
3573
  // Extractable structured data
3258
- definition_patterns: 0.02,
3574
+ creator_transparency: 0.02,
3575
+ // Visible author/reviewer clarity
3576
+ methodology_transparency: 0.02,
3577
+ // Process disclosure
3578
+ definition_patterns: 0.015,
3259
3579
  // Clear definitions
3260
- visible_date_signal: 0.02,
3580
+ visible_date_signal: 0.015,
3261
3581
  // Publication date trust
3262
3582
  semantic_html: 0.02,
3263
3583
  // Clean semantic structure
@@ -3266,15 +3586,15 @@ var WEIGHTS = {
3266
3586
  // ─── Technical Plumbing (~15%) ────────────────────────────────────────────
3267
3587
  // WHETHER AI crawlers can find you. Table stakes with diminishing returns.
3268
3588
  content_cannibalization: 0.02,
3269
- llms_txt: 0.02,
3270
- robots_txt: 0.02,
3589
+ llms_txt: 0.01,
3590
+ robots_txt: 0.01,
3271
3591
  content_velocity: 0.02,
3272
- content_licensing: 0.02,
3592
+ content_licensing: 0.01,
3273
3593
  sitemap_completeness: 0.01,
3274
- canonical_url: 0.01,
3275
- rss_feed: 0.01,
3276
- schema_coverage: 0.01,
3277
- speakable_schema: 0.01,
3594
+ canonical_url: 5e-3,
3595
+ rss_feed: 0,
3596
+ schema_coverage: 0,
3597
+ speakable_schema: 0,
3278
3598
  // ─── V2 Criteria (~15%) ───────────────────────────────────────────────────
3279
3599
  // Citation quality, evidence packaging, and extraction friction.
3280
3600
  citation_ready_writing: 0.04,
@@ -3287,7 +3607,7 @@ var WEIGHTS = {
3287
3607
  // Clear entity boundaries
3288
3608
  extraction_friction: 0.02,
3289
3609
  // Sentence length, voice, jargon
3290
- image_context_ai: 0.01,
3610
+ image_context_ai: 5e-3,
3291
3611
  // Figure/figcaption, alt text quality
3292
3612
  // ─── V3 Criteria ────────────────────────────────────────────────────────
3293
3613
  duplicate_content: 0.05,
@@ -3307,8 +3627,8 @@ function calculateOverallScore(criteria) {
3307
3627
  let score = Math.round(weightedSum / totalWeight);
3308
3628
  const coherence = criteria.find((c) => c.criterion === "topic_coherence");
3309
3629
  if (coherence && coherence.score < 6) {
3310
- const cap2 = 35 + coherence.score * 5;
3311
- score = Math.min(score, cap2);
3630
+ const cap3 = 35 + coherence.score * 5;
3631
+ score = Math.min(score, cap3);
3312
3632
  }
3313
3633
  return score;
3314
3634
  }
@@ -3424,6 +3744,8 @@ var PILLARS = {
3424
3744
  "citation_ready_writing",
3425
3745
  "answer_first_placement",
3426
3746
  "evidence_packaging",
3747
+ "helpful_purpose_alignment",
3748
+ "first_hand_experience_signals",
3427
3749
  "duplicate_content",
3428
3750
  "cross_page_duplication"
3429
3751
  ],
@@ -3441,7 +3763,9 @@ var PILLARS = {
3441
3763
  "internal_linking",
3442
3764
  "content_freshness",
3443
3765
  "author_schema_depth",
3444
- "schema_markup"
3766
+ "schema_markup",
3767
+ "creator_transparency",
3768
+ "methodology_transparency"
3445
3769
  ],
3446
3770
  "Technical Foundation": [
3447
3771
  "semantic_html",
@@ -3471,6 +3795,8 @@ var CLIENT_NAMES = {
3471
3795
  citation_ready_writing: "Citation-Ready Writing",
3472
3796
  answer_first_placement: "Answer-First Placement",
3473
3797
  evidence_packaging: "Evidence Packaging",
3798
+ helpful_purpose_alignment: "Helpful Purpose Alignment",
3799
+ first_hand_experience_signals: "First-Hand Experience Signals",
3474
3800
  direct_answer_density: "Direct Answer Density",
3475
3801
  qa_content_format: "Q&A Content Format",
3476
3802
  query_answer_alignment: "Query-Answer Alignment",
@@ -3483,6 +3809,8 @@ var CLIENT_NAMES = {
3483
3809
  content_freshness: "Content Freshness",
3484
3810
  author_schema_depth: "Author & Expert Schema",
3485
3811
  schema_markup: "Schema Markup",
3812
+ creator_transparency: "Creator Transparency",
3813
+ methodology_transparency: "Methodology Transparency",
3486
3814
  semantic_html: "Semantic HTML",
3487
3815
  clean_html: "Clean HTML",
3488
3816
  visible_date_signal: "Visible Date Signal",
@@ -3509,6 +3837,8 @@ var PILLAR_WEIGHTS = {
3509
3837
  citation_ready_writing: 0.04,
3510
3838
  answer_first_placement: 0.03,
3511
3839
  evidence_packaging: 0.03,
3840
+ helpful_purpose_alignment: 0.03,
3841
+ first_hand_experience_signals: 0.03,
3512
3842
  duplicate_content: 0.05,
3513
3843
  cross_page_duplication: 0.03,
3514
3844
  direct_answer_density: 0.05,
@@ -3516,28 +3846,30 @@ var PILLAR_WEIGHTS = {
3516
3846
  query_answer_alignment: 0.04,
3517
3847
  faq_section: 0.03,
3518
3848
  table_list_extractability: 0.03,
3519
- definition_patterns: 0.02,
3849
+ definition_patterns: 0.015,
3520
3850
  entity_disambiguation: 0.02,
3521
3851
  entity_consistency: 0.05,
3522
3852
  internal_linking: 0.04,
3523
3853
  content_freshness: 0.04,
3524
3854
  author_schema_depth: 0.03,
3525
3855
  schema_markup: 0.03,
3856
+ creator_transparency: 0.02,
3857
+ methodology_transparency: 0.02,
3526
3858
  semantic_html: 0.02,
3527
3859
  clean_html: 0.02,
3528
- visible_date_signal: 0.02,
3860
+ visible_date_signal: 0.015,
3529
3861
  extraction_friction: 0.02,
3530
- image_context_ai: 0.01,
3531
- schema_coverage: 0.01,
3532
- speakable_schema: 0.01,
3862
+ image_context_ai: 5e-3,
3863
+ schema_coverage: 0,
3864
+ speakable_schema: 0,
3533
3865
  content_cannibalization: 0.02,
3534
- llms_txt: 0.02,
3535
- robots_txt: 0.02,
3866
+ llms_txt: 0.01,
3867
+ robots_txt: 0.01,
3536
3868
  content_velocity: 0.02,
3537
- content_licensing: 0.02,
3538
- canonical_url: 0.01,
3869
+ content_licensing: 0.01,
3870
+ canonical_url: 5e-3,
3539
3871
  sitemap_completeness: 0.01,
3540
- rss_feed: 0.01
3872
+ rss_feed: 0
3541
3873
  };
3542
3874
  var CRITERION_EFFORT = {
3543
3875
  topic_coherence: "High",
@@ -3547,6 +3879,8 @@ var CRITERION_EFFORT = {
3547
3879
  citation_ready_writing: "Medium",
3548
3880
  answer_first_placement: "Medium",
3549
3881
  evidence_packaging: "Medium",
3882
+ helpful_purpose_alignment: "Medium",
3883
+ first_hand_experience_signals: "Medium",
3550
3884
  duplicate_content: "Medium",
3551
3885
  cross_page_duplication: "Medium",
3552
3886
  direct_answer_density: "Medium",
@@ -3561,6 +3895,8 @@ var CRITERION_EFFORT = {
3561
3895
  content_freshness: "Low",
3562
3896
  author_schema_depth: "Low",
3563
3897
  schema_markup: "Medium",
3898
+ creator_transparency: "Low",
3899
+ methodology_transparency: "Low",
3564
3900
  semantic_html: "Low",
3565
3901
  clean_html: "Medium",
3566
3902
  visible_date_signal: "Low",
@@ -3585,6 +3921,8 @@ var FIX_DESCRIPTIONS = {
3585
3921
  citation_ready_writing: "Write self-contained definition sentences and one-claim statements AI can quote directly.",
3586
3922
  answer_first_placement: "Place a 40-80 word answer block in the first 300 words of each page.",
3587
3923
  evidence_packaging: "Add inline citations, attribution phrases, and a sources section to key pages.",
3924
+ helpful_purpose_alignment: "Lead with useful, task-solving guidance instead of search-first filler.",
3925
+ first_hand_experience_signals: "Add concrete signs of first-hand use, testing, implementation, or observation.",
3588
3926
  direct_answer_density: "Write concise 2-3 sentence answer paragraphs after each question heading.",
3589
3927
  qa_content_format: "Add question-based H2/H3 headings matching common AI queries.",
3590
3928
  query_answer_alignment: "Ensure every question heading is followed by a direct answer paragraph.",
@@ -3597,6 +3935,8 @@ var FIX_DESCRIPTIONS = {
3597
3935
  content_freshness: "Add dateModified schema and visible last-updated dates.",
3598
3936
  author_schema_depth: "Add Person schema for authors with credentials and sameAs links.",
3599
3937
  schema_markup: "Implement JSON-LD structured data on key pages.",
3938
+ creator_transparency: "Show clear visible bylines, author pages, and reviewer details where readers expect them.",
3939
+ methodology_transparency: "Explain how content was tested, researched, reviewed, or updated.",
3600
3940
  semantic_html: "Use semantic HTML5 elements (main, article, nav, header, footer).",
3601
3941
  clean_html: "Fix HTML structure, add meta tags, and ensure HTTPS.",
3602
3942
  visible_date_signal: "Display dates using <time> elements and add datePublished to JSON-LD.",
@@ -3696,6 +4036,10 @@ var CRITERION_LABELS = {
3696
4036
  "Visible Date Signal": "Visible Date Signal",
3697
4037
  "Topic Coherence": "Topic Coherence",
3698
4038
  "Content Depth": "Content Depth",
4039
+ "Helpful Purpose Alignment": "Helpful Purpose Alignment",
4040
+ "First-Hand Experience Signals": "First-Hand Experience Signals",
4041
+ "Creator Transparency": "Creator Transparency",
4042
+ "Methodology Transparency": "Methodology Transparency",
3699
4043
  "Citation-Ready Writing Quality": "Citation-Ready Writing Quality",
3700
4044
  "Answer-First Placement": "Answer-First Placement",
3701
4045
  "Evidence Packaging": "Evidence Packaging",
@@ -3801,6 +4145,8 @@ var CRITERION_WEIGHTS = {
3801
4145
  qa_content_format: 0.04,
3802
4146
  query_answer_alignment: 0.04,
3803
4147
  faq_section: 0.03,
4148
+ helpful_purpose_alignment: 0.03,
4149
+ first_hand_experience_signals: 0.03,
3804
4150
  // Content Organization (~30%)
3805
4151
  entity_consistency: 0.05,
3806
4152
  internal_linking: 0.04,
@@ -3808,28 +4154,30 @@ var CRITERION_WEIGHTS = {
3808
4154
  schema_markup: 0.03,
3809
4155
  author_schema_depth: 0.03,
3810
4156
  table_list_extractability: 0.03,
3811
- definition_patterns: 0.02,
3812
- visible_date_signal: 0.02,
4157
+ creator_transparency: 0.02,
4158
+ methodology_transparency: 0.02,
4159
+ definition_patterns: 0.015,
4160
+ visible_date_signal: 0.015,
3813
4161
  semantic_html: 0.02,
3814
4162
  clean_html: 0.02,
3815
4163
  // Technical Plumbing (~15%)
3816
4164
  content_cannibalization: 0.02,
3817
- llms_txt: 0.02,
3818
- robots_txt: 0.02,
4165
+ llms_txt: 0.01,
4166
+ robots_txt: 0.01,
3819
4167
  content_velocity: 0.02,
3820
- content_licensing: 0.02,
4168
+ content_licensing: 0.01,
3821
4169
  sitemap_completeness: 0.01,
3822
- canonical_url: 0.01,
3823
- rss_feed: 0.01,
3824
- schema_coverage: 0.01,
3825
- speakable_schema: 0.01,
4170
+ canonical_url: 5e-3,
4171
+ rss_feed: 0,
4172
+ schema_coverage: 0,
4173
+ speakable_schema: 0,
3826
4174
  // V2 Criteria (~15%)
3827
4175
  citation_ready_writing: 0.04,
3828
4176
  answer_first_placement: 0.03,
3829
4177
  evidence_packaging: 0.03,
3830
4178
  entity_disambiguation: 0.02,
3831
4179
  extraction_friction: 0.02,
3832
- image_context_ai: 0.01,
4180
+ image_context_ai: 5e-3,
3833
4181
  // V3 Criteria
3834
4182
  duplicate_content: 0.05,
3835
4183
  cross_page_duplication: 0.03
@@ -3870,6 +4218,16 @@ var OPPORTUNITY_TEMPLATES = {
3870
4218
  effort: "Medium",
3871
4219
  description: "Create a dedicated FAQ page with FAQPage schema markup. Cover common questions about your products, services, and industry to become a direct answer source for AI engines."
3872
4220
  },
4221
+ helpful_purpose_alignment: {
4222
+ name: "Improve Helpful Purpose Alignment",
4223
+ effort: "Medium",
4224
+ description: "Rewrite pages to solve the visitor task quickly and concretely. Reduce generic intros, search-first filler, and CTA interruptions before the first useful answer."
4225
+ },
4226
+ first_hand_experience_signals: {
4227
+ name: "Add First-Hand Experience Signals",
4228
+ effort: "Medium",
4229
+ description: "Show direct use, testing, implementation, or lived experience with concrete observations, examples, screenshots, and lessons learned."
4230
+ },
3873
4231
  original_data: {
3874
4232
  name: "Add Original Data & Case Studies",
3875
4233
  effort: "High",
@@ -3925,6 +4283,16 @@ var OPPORTUNITY_TEMPLATES = {
3925
4283
  effort: "Low",
3926
4284
  description: "Add Person schema for content authors with credentials, expertise, and sameAs links. Expert attribution strengthens E-E-A-T signals that AI engines use to evaluate source credibility."
3927
4285
  },
4286
+ creator_transparency: {
4287
+ name: "Improve Creator Transparency",
4288
+ effort: "Low",
4289
+ description: "Add visible bylines, author pages, and reviewer/editor details so readers can clearly tell who created the content and why they are credible."
4290
+ },
4291
+ methodology_transparency: {
4292
+ name: "Add Methodology Transparency",
4293
+ effort: "Low",
4294
+ description: "Explain how pages were tested, researched, reviewed, or updated. Add methodology, criteria, sample-size, or review-process details where users expect them."
4295
+ },
3928
4296
  fact_density: {
3929
4297
  name: "Increase Fact & Data Density",
3930
4298
  effort: "Medium",
@@ -4380,19 +4748,23 @@ var PAGE_CRITERIA = {
4380
4748
  content_freshness: { weight: 0.04, label: "Content Freshness Signals" },
4381
4749
  schema_markup: { weight: 0.03, label: "Schema.org Structured Data" },
4382
4750
  table_list_extractability: { weight: 0.03, label: "Table & List Extractability" },
4383
- definition_patterns: { weight: 0.02, label: "Definition Patterns" },
4384
- visible_date_signal: { weight: 0.02, label: "Visible Date Signal" },
4751
+ definition_patterns: { weight: 0.015, label: "Definition Patterns" },
4752
+ visible_date_signal: { weight: 0.015, label: "Visible Date Signal" },
4385
4753
  semantic_html: { weight: 0.02, label: "Semantic HTML5 & Accessibility" },
4386
4754
  clean_html: { weight: 0.02, label: "Clean, Crawlable HTML" },
4387
4755
  // Technical Plumbing
4388
- canonical_url: { weight: 0.01, label: "Canonical URL Strategy" },
4756
+ canonical_url: { weight: 5e-3, label: "Canonical URL Strategy" },
4389
4757
  // V2 Criteria
4390
4758
  citation_ready_writing: { weight: 0.04, label: "Citation-Ready Writing Quality" },
4391
4759
  answer_first_placement: { weight: 0.03, label: "Answer-First Placement" },
4392
4760
  evidence_packaging: { weight: 0.03, label: "Evidence Packaging" },
4761
+ helpful_purpose_alignment: { weight: 0.03, label: "Helpful Purpose Alignment" },
4762
+ first_hand_experience_signals: { weight: 0.03, label: "First-Hand Experience Signals" },
4393
4763
  entity_disambiguation: { weight: 0.02, label: "Entity Disambiguation" },
4394
4764
  extraction_friction: { weight: 0.02, label: "Extraction Friction Score" },
4395
- image_context_ai: { weight: 0.01, label: "Image Context for AI" },
4765
+ creator_transparency: { weight: 0.02, label: "Creator Transparency" },
4766
+ methodology_transparency: { weight: 0.02, label: "Methodology Transparency" },
4767
+ image_context_ai: { weight: 5e-3, label: "Image Context for AI" },
4396
4768
  duplicate_content: { weight: 0.05, label: "Duplicate Content Blocks" }
4397
4769
  };
4398
4770
  function extractJsonLdBlocks(html) {
@@ -4415,7 +4787,7 @@ function extractTypesFromJsonLd(blocks) {
4415
4787
  }
4416
4788
  return types;
4417
4789
  }
4418
- function getTextContent(html) {
4790
+ function getTextContent2(html) {
4419
4791
  return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
4420
4792
  }
4421
4793
  function extractQuestionHeadings2(html) {
@@ -4443,7 +4815,7 @@ function countAnsweredQuestions(html) {
4443
4815
  }
4444
4816
  return { total: questions.length, answered };
4445
4817
  }
4446
- function cap(value, max) {
4818
+ function cap2(value, max) {
4447
4819
  return Math.min(value, max);
4448
4820
  }
4449
4821
  function scoreSchemaMarkup(html) {
@@ -4469,10 +4841,10 @@ function scoreSchemaMarkup(html) {
4469
4841
  for (const t of types) {
4470
4842
  if (knownTypes.includes(t)) knownCount++;
4471
4843
  }
4472
- score += cap(knownCount * 2, 4);
4844
+ score += cap2(knownCount * 2, 4);
4473
4845
  if (types.has("Organization") || types.has("LocalBusiness")) score += 2;
4474
4846
  if (types.has("FAQPage")) score += 1;
4475
- return cap(score, 10);
4847
+ return cap2(score, 10);
4476
4848
  }
4477
4849
  function scoreQAFormat(html) {
4478
4850
  const questions = extractQuestionHeadings2(html);
@@ -4484,7 +4856,7 @@ function scoreQAFormat(html) {
4484
4856
  if (answered >= 1) score += 3;
4485
4857
  const h1Matches = html.match(/<h1[\s>]/gi) || [];
4486
4858
  if (h1Matches.length === 1) score += 2;
4487
- return cap(score, 10);
4859
+ return cap2(score, 10);
4488
4860
  }
4489
4861
  function scoreCleanHtml(html) {
4490
4862
  let score = 0;
@@ -4493,15 +4865,15 @@ function scoreCleanHtml(html) {
4493
4865
  for (const tag of semantics) {
4494
4866
  if (html.toLowerCase().includes(tag)) semCount++;
4495
4867
  }
4496
- score += cap(semCount, 3);
4868
+ score += cap2(semCount, 3);
4497
4869
  const h1Matches = html.match(/<h1[\s>]/gi) || [];
4498
4870
  if (h1Matches.length === 1) score += 2;
4499
- const text = getTextContent(html);
4871
+ const text = getTextContent2(html);
4500
4872
  if (text.length > 500) score += 3;
4501
4873
  const hasTitle = /<title[^>]*>[^<]+<\/title>/i.test(html);
4502
4874
  const hasDesc = /<meta\s[^>]*name=["']description["'][^>]*content=["'][^"']+["']/i.test(html) || /<meta\s[^>]*content=["'][^"']+["'][^>]*name=["']description["']/i.test(html);
4503
4875
  if (hasTitle && hasDesc) score += 2;
4504
- return cap(score, 10);
4876
+ return cap2(score, 10);
4505
4877
  }
4506
4878
  function scoreFaqSection(html) {
4507
4879
  let score = 0;
@@ -4513,11 +4885,11 @@ function scoreFaqSection(html) {
4513
4885
  const questions = extractQuestionHeadings2(html);
4514
4886
  if (questions.length >= 10) score += 1;
4515
4887
  if (/<details[\s>]/i.test(html) || /accordion|collapsible|toggle/i.test(lowerHtml)) score += 1;
4516
- return cap(score, 10);
4888
+ return cap2(score, 10);
4517
4889
  }
4518
4890
  function scoreOriginalData(html) {
4519
4891
  let score = 0;
4520
- const text = getTextContent(html);
4892
+ const text = getTextContent2(html);
4521
4893
  if (/\b(our (study|analysis|research|survey|data|findings))\b/i.test(text)) {
4522
4894
  score += 3;
4523
4895
  } else if (/\d+(\.\d+)?%|\$[\d,.]+|\b\d{1,3}(,\d{3})+\b/.test(text)) {
@@ -4534,7 +4906,7 @@ function scoreOriginalData(html) {
4534
4906
  if (/href=["'][^"']*\/blog\b/i.test(html)) {
4535
4907
  score += 2;
4536
4908
  }
4537
- return cap(score, 10);
4909
+ return cap2(score, 10);
4538
4910
  }
4539
4911
  function scoreQueryAnswerAlignment(html) {
4540
4912
  const { total, answered } = countAnsweredQuestions(html);
@@ -4557,7 +4929,7 @@ function scoreContentFreshness(html) {
4557
4929
  const currentYear = (/* @__PURE__ */ new Date()).getFullYear();
4558
4930
  const yearPattern = new RegExp(`\\b(${currentYear}|${currentYear - 1})\\b`);
4559
4931
  if (yearPattern.test(html)) score += 2;
4560
- return cap(score, 10);
4932
+ return cap2(score, 10);
4561
4933
  }
4562
4934
  function scoreTableListExtractability(html) {
4563
4935
  let score = 0;
@@ -4570,7 +4942,7 @@ function scoreTableListExtractability(html) {
4570
4942
  const listItems = html.match(/<li[\s>]/gi) || [];
4571
4943
  if (listItems.length >= 10) score += 1;
4572
4944
  if (/<dl[\s>]/i.test(html)) score += 1;
4573
- return cap(score, 10);
4945
+ return cap2(score, 10);
4574
4946
  }
4575
4947
  function scoreDirectAnswerDensity(html) {
4576
4948
  let score = 0;
@@ -4586,9 +4958,9 @@ function scoreDirectAnswerDensity(html) {
4586
4958
  }
4587
4959
  if (snippetCount >= 3) score += 2;
4588
4960
  else if (snippetCount >= 1) score += 1;
4589
- const directOpeners = getTextContent(html).match(/\b(yes|no|in short|the answer is|simply put|in summary)\b/gi) || [];
4961
+ const directOpeners = getTextContent2(html).match(/\b(yes|no|in short|the answer is|simply put|in summary)\b/gi) || [];
4590
4962
  if (directOpeners.length >= 2) score += 2;
4591
- return cap(score, 10);
4963
+ return cap2(score, 10);
4592
4964
  }
4593
4965
  function scoreSemanticHtml(html) {
4594
4966
  let score = 0;
@@ -4598,7 +4970,7 @@ function scoreSemanticHtml(html) {
4598
4970
  for (const el of elements) {
4599
4971
  if (lowerHtml.includes(el)) count++;
4600
4972
  }
4601
- score += cap(Math.floor(count * 0.7), 4);
4973
+ score += cap2(Math.floor(count * 0.7), 4);
4602
4974
  const imgTags = html.match(/<img\s[^>]*>/gi) || [];
4603
4975
  if (imgTags.length > 0) {
4604
4976
  let withAlt = 0;
@@ -4609,11 +4981,11 @@ function scoreSemanticHtml(html) {
4609
4981
  }
4610
4982
  if (/<html[^>]*\slang=["'][^"']+["']/i.test(html)) score += 2;
4611
4983
  if (/\baria-/i.test(html)) score += 2;
4612
- return cap(score, 10);
4984
+ return cap2(score, 10);
4613
4985
  }
4614
4986
  function scoreFactDensity(html) {
4615
4987
  let score = 0;
4616
- const text = getTextContent(html);
4988
+ const text = getTextContent2(html);
4617
4989
  const numericPatterns = text.match(/\d+(\.\d+)?%|\$[\d,.]+|\b\d{1,3}(,\d{3})+\b|\b\d+\s*(million|billion|thousand|users|customers|employees)\b/gi) || [];
4618
4990
  if (numericPatterns.length >= 6) score += 5;
4619
4991
  else if (numericPatterns.length >= 3) score += 3;
@@ -4626,11 +4998,11 @@ function scoreFactDensity(html) {
4626
4998
  if (/\baccording to\b|\bsource:\s|\bcited\b|\breported by\b/i.test(text)) score += 2;
4627
4999
  const units = text.match(/\b\d+\s*(kg|lb|miles|km|hours|minutes|days|months|years|GB|MB|TB)\b/gi) || [];
4628
5000
  if (units.length >= 2) score += 1;
4629
- return cap(score, 10);
5001
+ return cap2(score, 10);
4630
5002
  }
4631
5003
  function scoreDefinitionPatterns(html) {
4632
5004
  let score = 0;
4633
- const text = getTextContent(html);
5005
+ const text = getTextContent2(html);
4634
5006
  const defPatterns = text.match(/\b(is a|is an|refers to|defined as|means that|also known as|abbreviated as)\b/gi) || [];
4635
5007
  if (defPatterns.length >= 3) score += 5;
4636
5008
  else if (defPatterns.length >= 1) score += 3;
@@ -4638,7 +5010,7 @@ function scoreDefinitionPatterns(html) {
4638
5010
  if (/\b(is a|is an|refers to|defined as)\b/i.test(early)) score += 2;
4639
5011
  if (/<dfn[\s>]/i.test(html) || /<abbr[\s>]/i.test(html)) score += 1;
4640
5012
  if (/<dl[\s>]/i.test(html) || /glossary/i.test(html)) score += 2;
4641
- return cap(score, 10);
5013
+ return cap2(score, 10);
4642
5014
  }
4643
5015
  function scoreCanonicalUrl(html, url) {
4644
5016
  let score = 0;
@@ -4659,7 +5031,7 @@ function scoreCanonicalUrl(html, url) {
4659
5031
  if (canonicalHref.startsWith("https://")) score += 2;
4660
5032
  const allCanonicals = html.match(/<link[^>]*rel=["']canonical["'][^>]*>/gi) || [];
4661
5033
  if (allCanonicals.length === 1) score += 1;
4662
- return cap(score, 10);
5034
+ return cap2(score, 10);
4663
5035
  }
4664
5036
  function scoreVisibleDateSignal(html) {
4665
5037
  let score = 0;
@@ -4678,11 +5050,11 @@ function scoreVisibleDateSignal(html) {
4678
5050
  } catch {
4679
5051
  }
4680
5052
  }
4681
- return cap(score, 10);
5053
+ return cap2(score, 10);
4682
5054
  }
4683
5055
  function scoreCitationReadyWriting(html) {
4684
5056
  let score = 0;
4685
- const text = getTextContent(html);
5057
+ const text = getTextContent2(html);
4686
5058
  const defSentences = text.match(/\b\w+\s+(is\s+(?:a|an)\s|refers\s+to|defined\s+as)\b/gi) || [];
4687
5059
  if (defSentences.length >= 3) score += 3;
4688
5060
  else if (defSentences.length >= 1) score += 1;
@@ -4711,7 +5083,7 @@ function scoreCitationReadyWriting(html) {
4711
5083
  );
4712
5084
  if (quotableLines.length >= 2) score += 2;
4713
5085
  else if (quotableLines.length >= 1) score += 1;
4714
- return cap(score, 10);
5086
+ return cap2(score, 10);
4715
5087
  }
4716
5088
  function scoreAnswerFirstPlacement(html) {
4717
5089
  let score = 0;
@@ -4722,8 +5094,8 @@ function scoreAnswerFirstPlacement(html) {
4722
5094
  const first300Words = bodyText.split(/\s+/).slice(0, 300).join(" ");
4723
5095
  for (const p of earlyParagraphs) {
4724
5096
  const pText = p.replace(/<[^>]*>/g, "").trim();
4725
- const wordCount = pText.split(/\s+/).length;
4726
- if (wordCount >= 40 && wordCount <= 80 && first300Words.includes(pText.slice(0, 50))) {
5097
+ const wordCount2 = pText.split(/\s+/).length;
5098
+ if (wordCount2 >= 40 && wordCount2 <= 80 && first300Words.includes(pText.slice(0, 50))) {
4727
5099
  score += 4;
4728
5100
  break;
4729
5101
  }
@@ -4744,11 +5116,11 @@ function scoreAnswerFirstPlacement(html) {
4744
5116
  score += 3;
4745
5117
  }
4746
5118
  }
4747
- return cap(score, 10);
5119
+ return cap2(score, 10);
4748
5120
  }
4749
5121
  function scoreEvidencePackaging(html) {
4750
5122
  let score = 0;
4751
- const text = getTextContent(html);
5123
+ const text = getTextContent2(html);
4752
5124
  const paragraphs = html.match(/<p[^>]*>[\s\S]*?<\/p>/gi) || [];
4753
5125
  let inlineCitations = 0;
4754
5126
  for (const p of paragraphs) {
@@ -4766,11 +5138,11 @@ function scoreEvidencePackaging(html) {
4766
5138
  const sourcedStats = text.match(/\d+(\.\d+)?(%|\s*(million|billion|thousand|percent))\b[^.]*\b[A-Z][a-z]+\b/gi) || [];
4767
5139
  if (sourcedStats.length >= 2) score += 2;
4768
5140
  else if (sourcedStats.length >= 1) score += 1;
4769
- return cap(score, 10);
5141
+ return cap2(score, 10);
4770
5142
  }
4771
5143
  function scoreEntityDisambiguation(html) {
4772
5144
  let score = 0;
4773
- const text = getTextContent(html);
5145
+ const text = getTextContent2(html);
4774
5146
  const h1Match = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
4775
5147
  if (!h1Match) return 3;
4776
5148
  const h1Text = h1Match[1].replace(/<[^>]*>/g, "").trim();
@@ -4788,11 +5160,11 @@ function scoreEntityDisambiguation(html) {
4788
5160
  if (/\bunlike\s+\w/i.test(text) || /\bcompared\s+to\s+\w/i.test(text) || /\bnot\s+to\s+be\s+confused\s+with\b/i.test(text)) {
4789
5161
  score += 3;
4790
5162
  }
4791
- return cap(score, 10);
5163
+ return cap2(score, 10);
4792
5164
  }
4793
5165
  function scoreExtractionFriction(html) {
4794
5166
  let score = 0;
4795
- const text = getTextContent(html);
5167
+ const text = getTextContent2(html);
4796
5168
  const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 5);
4797
5169
  const avgLen = sentences.length > 0 ? sentences.map((s) => s.trim().split(/\s+/).length).reduce((a, b) => a + b, 0) / sentences.length : 0;
4798
5170
  if (avgLen > 0 && avgLen < 20) score += 3;
@@ -4815,7 +5187,7 @@ function scoreExtractionFriction(html) {
4815
5187
  if (/display\s*:\s*none|visibility\s*:\s*hidden/i.test(html) && /<(div|section|p)[^>]*(?:style=["'][^"']*display\s*:\s*none|hidden)[^>]*>/i.test(html)) {
4816
5188
  score = Math.max(0, score - 2);
4817
5189
  }
4818
- return cap(score, 10);
5190
+ return cap2(score, 10);
4819
5191
  }
4820
5192
  function scoreImageContextAI(html) {
4821
5193
  let score = 0;
@@ -4840,20 +5212,13 @@ function scoreImageContextAI(html) {
4840
5212
  else if (goodAltCount > 0) score += 1;
4841
5213
  const contextualImages = html.match(/<(article|section)[^>]*>[\s\S]*?<img[^>]*>[\s\S]*?<\/\1>/gi) || [];
4842
5214
  if (contextualImages.length > 0) score += 3;
4843
- return cap(score, 10);
4844
- }
4845
- var BOILERPLATE_PATTERNS = /\b(sign up|subscribe|get started|contact us|request a demo|free trial|book a call|schedule a|learn more|click here|follow us|share this|copyright|all rights reserved|privacy policy|terms of service)\b/i;
4846
- function isBoilerplate(text) {
4847
- const words = text.split(/\s+/).length;
4848
- if (words < 20 && BOILERPLATE_PATTERNS.test(text)) return true;
4849
- if (/\b(cookie|gdpr|consent|opt.out)\b/i.test(text) && words < 30) return true;
4850
- return false;
5215
+ return cap2(score, 10);
4851
5216
  }
4852
5217
  function scoreDuplicateContent(html) {
4853
5218
  return scoreDuplicateContentDetailed(html).score;
4854
5219
  }
4855
5220
  function scoreDuplicateContentDetailed(html) {
4856
- const sections = extractSectionsWithParagraphs(html);
5221
+ const sections = extractDuplicateContentSections(html);
4857
5222
  if (sections.length < 2) return { score: 10, duplicates: [] };
4858
5223
  const totalParagraphs = sections.reduce((sum, s) => sum + s.paragraphs.length, 0);
4859
5224
  const duplicates = [];
@@ -4862,7 +5227,7 @@ function scoreDuplicateContentDetailed(html) {
4862
5227
  for (let j = i + 1; j < sections.length; j++) {
4863
5228
  for (const pA of sections[i].paragraphs) {
4864
5229
  for (const pB of sections[j].paragraphs) {
4865
- const sim = shingleJaccard(pA.shingles, pB.shingles);
5230
+ const sim = shingleJaccardSimilarity(pA.shingles, pB.shingles);
4866
5231
  if (sim > 0.4) {
4867
5232
  dupParagraphCount++;
4868
5233
  duplicates.push({
@@ -4891,41 +5256,6 @@ function scoreDuplicateContentDetailed(html) {
4891
5256
  }
4892
5257
  return { score, duplicates };
4893
5258
  }
4894
- function extractSectionsWithParagraphs(html) {
4895
- const cleaned = html.replace(/<(script|style|nav|header|footer|noscript)\b[^>]*>[\s\S]*?<\/\1>/gi, "").replace(/<aside\b[^>]*>[\s\S]*?<\/aside>/gi, "");
4896
- const parts = cleaned.split(/(?=<h[23]\b[^>]*>)/i);
4897
- const sections = [];
4898
- for (const part of parts) {
4899
- const headingMatch = part.match(/<h[23]\b[^>]*>([\s\S]*?)<\/h[23]>/i);
4900
- const heading = headingMatch ? headingMatch[1].replace(/<[^>]*>/g, "").trim() : "(intro)";
4901
- const pMatches = part.match(/<p\b[^>]*>([\s\S]*?)<\/p>/gi) || [];
4902
- const paragraphs = pMatches.map((p) => {
4903
- const text = p.replace(/<[^>]*>/g, " ").replace(/&\w+;/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
4904
- return { text, shingles: buildShingles(text, 4) };
4905
- }).filter((p) => p.shingles.size >= 3 && !isBoilerplate(p.text));
4906
- if (paragraphs.length > 0) {
4907
- sections.push({ heading, paragraphs });
4908
- }
4909
- }
4910
- return sections;
4911
- }
4912
- function buildShingles(text, n) {
4913
- const words = text.split(/\s+/).filter((w) => w.length > 1);
4914
- const shingles = /* @__PURE__ */ new Set();
4915
- for (let i = 0; i <= words.length - n; i++) {
4916
- shingles.add(words.slice(i, i + n).join(" "));
4917
- }
4918
- return shingles;
4919
- }
4920
- function shingleJaccard(a, b) {
4921
- if (a.size === 0 && b.size === 0) return 0;
4922
- let intersection = 0;
4923
- for (const s of a) {
4924
- if (b.has(s)) intersection++;
4925
- }
4926
- const union = a.size + b.size - intersection;
4927
- return union === 0 ? 0 : intersection / union;
4928
- }
4929
5259
  var SCORING_FUNCTIONS = {
4930
5260
  schema_markup: scoreSchemaMarkup,
4931
5261
  qa_content_format: scoreQAFormat,
@@ -4944,8 +5274,12 @@ var SCORING_FUNCTIONS = {
4944
5274
  citation_ready_writing: scoreCitationReadyWriting,
4945
5275
  answer_first_placement: scoreAnswerFirstPlacement,
4946
5276
  evidence_packaging: scoreEvidencePackaging,
5277
+ helpful_purpose_alignment: scoreHelpfulPurposeAlignment,
5278
+ first_hand_experience_signals: scoreFirstHandExperienceSignals,
4947
5279
  entity_disambiguation: scoreEntityDisambiguation,
4948
5280
  extraction_friction: scoreExtractionFriction,
5281
+ creator_transparency: scoreCreatorTransparency,
5282
+ methodology_transparency: scoreMethodologyTransparency,
4949
5283
  image_context_ai: scoreImageContextAI,
4950
5284
  duplicate_content: scoreDuplicateContent
4951
5285
  };
@@ -4990,7 +5324,7 @@ function extractTitle(html) {
4990
5324
  const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
4991
5325
  return match ? match[1].replace(/\s+/g, " ").trim() : "";
4992
5326
  }
4993
- function getTextContent2(html) {
5327
+ function getTextContent3(html) {
4994
5328
  return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
4995
5329
  }
4996
5330
  function countWords2(text) {
@@ -5050,9 +5384,9 @@ function checkMissingOgTags(html) {
5050
5384
  }
5051
5385
  return null;
5052
5386
  }
5053
- function checkThinContent(wordCount) {
5054
- if (wordCount < 300) {
5055
- return { check: "thin-content", label: `Thin content (${wordCount} words)`, severity: "warning" };
5387
+ function checkThinContent(wordCount2) {
5388
+ if (wordCount2 < 300) {
5389
+ return { check: "thin-content", label: `Thin content (${wordCount2} words)`, severity: "warning" };
5056
5390
  }
5057
5391
  return null;
5058
5392
  }
@@ -5149,15 +5483,15 @@ function checkNoAnswerBlock(html) {
5149
5483
  const first300Words = bodyText.split(/\s+/).slice(0, 300).join(" ");
5150
5484
  for (const p of earlyParagraphs) {
5151
5485
  const pText = p.replace(/<[^>]*>/g, "").trim();
5152
- const wordCount = pText.split(/\s+/).length;
5153
- if (wordCount >= 40 && wordCount <= 80 && first300Words.includes(pText.slice(0, 50))) {
5486
+ const wordCount2 = pText.split(/\s+/).length;
5487
+ if (wordCount2 >= 40 && wordCount2 <= 80 && first300Words.includes(pText.slice(0, 50))) {
5154
5488
  return null;
5155
5489
  }
5156
5490
  }
5157
5491
  return { check: "no-answer-block", label: "No short answer block (40-80 words) in first 300 words", severity: "warning" };
5158
5492
  }
5159
5493
  function checkNoEvidence(html, url) {
5160
- const text = getTextContent2(html);
5494
+ const text = getTextContent3(html);
5161
5495
  const paragraphs = html.match(/<p[^>]*>[\s\S]*?<\/p>/gi) || [];
5162
5496
  let inlineCitations = 0;
5163
5497
  for (const p of paragraphs) {
@@ -5171,7 +5505,7 @@ function checkNoEvidence(html, url) {
5171
5505
  return null;
5172
5506
  }
5173
5507
  function checkHasCitationReadyContent(html) {
5174
- const text = getTextContent2(html);
5508
+ const text = getTextContent3(html);
5175
5509
  const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 10);
5176
5510
  let quotable = 0;
5177
5511
  for (const s of sentences) {
@@ -5196,8 +5530,8 @@ function checkDuplicateContentBlocks(html) {
5196
5530
  }
5197
5531
  function analyzePage(html, url, category) {
5198
5532
  const title = extractTitle(html);
5199
- const textContent = getTextContent2(html);
5200
- const wordCount = countWords2(textContent);
5533
+ const textContent = getTextContent3(html);
5534
+ const wordCount2 = countWords2(textContent);
5201
5535
  const issues = [];
5202
5536
  const strengths = [];
5203
5537
  const issueChecks = [
@@ -5208,7 +5542,7 @@ function analyzePage(html, url, category) {
5208
5542
  checkNoSchema(html),
5209
5543
  checkMissingCanonical(html),
5210
5544
  checkMissingOgTags(html),
5211
- checkThinContent(wordCount),
5545
+ checkThinContent(wordCount2),
5212
5546
  checkImagesMissingAlt(html),
5213
5547
  checkNoInternalLinks(html, url),
5214
5548
  checkNoAnswerBlock(html),
@@ -5227,7 +5561,7 @@ function analyzePage(html, url, category) {
5227
5561
  if (result) strengths.push(result);
5228
5562
  }
5229
5563
  const { aeoScore, criterionScores } = scorePage(html, url);
5230
- return { url, title, category, wordCount, issues, strengths, aeoScore, criterionScores };
5564
+ return { url, title, category, wordCount: wordCount2, issues, strengths, aeoScore, criterionScores };
5231
5565
  }
5232
5566
  function analyzeAllPages(siteData) {
5233
5567
  const reviews = [];
@@ -5359,7 +5693,7 @@ function extractTitle2(html) {
5359
5693
  const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
5360
5694
  return match ? match[1].replace(/\s+/g, " ").trim() : "";
5361
5695
  }
5362
- function getTextContent3(html) {
5696
+ function getTextContent4(html) {
5363
5697
  return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
5364
5698
  }
5365
5699
  function countWords3(text) {
@@ -5509,12 +5843,12 @@ function buildLinkGraph(pages, domain, homepageUrl) {
5509
5843
  const norm = normalizeUrl2(url);
5510
5844
  if (nodes.has(norm)) continue;
5511
5845
  const title = extractTitle2(page.text);
5512
- const text = getTextContent3(page.text);
5513
- const wordCount = countWords3(text);
5846
+ const text = getTextContent4(page.text);
5847
+ const wordCount2 = countWords3(text);
5514
5848
  nodes.set(norm, {
5515
5849
  url: norm,
5516
5850
  title,
5517
- wordCount,
5851
+ wordCount: wordCount2,
5518
5852
  category: page.category || "content",
5519
5853
  inDegree: 0,
5520
5854
  outDegree: 0,
@@ -5582,6 +5916,8 @@ var CRITERION_WEIGHTS2 = {
5582
5916
  qa_content_format: 0.04,
5583
5917
  query_answer_alignment: 0.04,
5584
5918
  faq_section: 0.03,
5919
+ helpful_purpose_alignment: 0.03,
5920
+ first_hand_experience_signals: 0.03,
5585
5921
  // Content Organization (~30%)
5586
5922
  entity_consistency: 0.05,
5587
5923
  internal_linking: 0.04,
@@ -5589,30 +5925,32 @@ var CRITERION_WEIGHTS2 = {
5589
5925
  schema_markup: 0.03,
5590
5926
  author_schema_depth: 0.03,
5591
5927
  table_list_extractability: 0.03,
5592
- definition_patterns: 0.02,
5593
- visible_date_signal: 0.02,
5928
+ creator_transparency: 0.02,
5929
+ methodology_transparency: 0.02,
5930
+ definition_patterns: 0.015,
5931
+ visible_date_signal: 0.015,
5594
5932
  semantic_html: 0.02,
5595
5933
  clean_html: 0.02,
5596
5934
  // Technical Plumbing (~15%)
5597
5935
  content_cannibalization: 0.02,
5598
5936
  duplicate_content: 0.05,
5599
5937
  cross_page_duplication: 0.03,
5600
- llms_txt: 0.02,
5601
- robots_txt: 0.02,
5938
+ llms_txt: 0.01,
5939
+ robots_txt: 0.01,
5602
5940
  content_velocity: 0.02,
5603
- content_licensing: 0.02,
5941
+ content_licensing: 0.01,
5604
5942
  sitemap_completeness: 0.01,
5605
- canonical_url: 0.01,
5606
- rss_feed: 0.01,
5607
- schema_coverage: 0.01,
5608
- speakable_schema: 0.01,
5943
+ canonical_url: 5e-3,
5944
+ rss_feed: 0,
5945
+ schema_coverage: 0,
5946
+ speakable_schema: 0,
5609
5947
  // V2 Criteria (~15%)
5610
5948
  citation_ready_writing: 0.04,
5611
5949
  answer_first_placement: 0.03,
5612
5950
  evidence_packaging: 0.03,
5613
5951
  entity_disambiguation: 0.02,
5614
5952
  extraction_friction: 0.02,
5615
- image_context_ai: 0.01
5953
+ image_context_ai: 5e-3
5616
5954
  };
5617
5955
  var PHASE_CONFIG = [
5618
5956
  {
@@ -5642,6 +5980,8 @@ var PHASE_CONFIG = [
5642
5980
  "answer_first_placement",
5643
5981
  "evidence_packaging",
5644
5982
  "entity_disambiguation",
5983
+ "helpful_purpose_alignment",
5984
+ "first_hand_experience_signals",
5645
5985
  "duplicate_content",
5646
5986
  "cross_page_duplication"
5647
5987
  ]
@@ -5655,6 +5995,8 @@ var PHASE_CONFIG = [
5655
5995
  "schema_coverage",
5656
5996
  "speakable_schema",
5657
5997
  "author_schema_depth",
5998
+ "creator_transparency",
5999
+ "methodology_transparency",
5658
6000
  "content_licensing",
5659
6001
  "entity_consistency",
5660
6002
  "semantic_html",
@@ -5677,7 +6019,7 @@ function impactFromScore(score) {
5677
6019
  }
5678
6020
  function effortForCriterion(criterion, score) {
5679
6021
  const trivialCriteria = ["llms_txt", "robots_txt", "canonical_url", "content_licensing", "visible_date_signal"];
5680
- const lowCriteria = ["rss_feed", "sitemap_completeness", "speakable_schema", "author_schema_depth", "semantic_html", "definition_patterns", "content_freshness"];
6022
+ const lowCriteria = ["rss_feed", "sitemap_completeness", "speakable_schema", "author_schema_depth", "creator_transparency", "methodology_transparency", "semantic_html", "definition_patterns", "content_freshness"];
5681
6023
  const highCriteria = ["original_data", "content_velocity", "content_cannibalization"];
5682
6024
  if (trivialCriteria.includes(criterion)) return score <= 3 ? "low" : "trivial";
5683
6025
  if (lowCriteria.includes(criterion)) return score <= 3 ? "medium" : "low";
@@ -5944,6 +6286,58 @@ Sitemap: https://example.com/sitemap.xml`,
5944
6286
  pageCount: affected?.length
5945
6287
  }];
5946
6288
  },
6289
+ helpful_purpose_alignment: (c, pages) => {
6290
+ if (c.score >= 10) return [];
6291
+ const impact = impactFromScore(c.score);
6292
+ const effort = effortForCriterion("helpful_purpose_alignment", c.score);
6293
+ const affected = getAffectedPages("helpful_purpose_alignment", pages);
6294
+ return [{
6295
+ id: "fix-helpful-purpose-alignment",
6296
+ criterion: c.criterion_label,
6297
+ criterionId: c.criterion,
6298
+ title: "Make pages solve the user task faster",
6299
+ description: "Reduce search-first filler and rewrite pages so the promised task is resolved quickly with concrete guidance, tradeoffs, and next steps.",
6300
+ impact,
6301
+ effort,
6302
+ impactScore: 0,
6303
+ category: "content",
6304
+ steps: [
6305
+ "Rewrite first paragraphs to answer the user need within the first 150-300 words",
6306
+ 'Remove generic intros like "In this guide" and broad filler that could fit any topic',
6307
+ "Add concrete decision help: tradeoffs, risks, constraints, and next steps",
6308
+ "Move aggressive CTAs below the first useful answer block"
6309
+ ],
6310
+ successCriteria: "Pages lead with task-solving guidance instead of generic search-first framing",
6311
+ affectedPages: affected,
6312
+ pageCount: affected?.length
6313
+ }];
6314
+ },
6315
+ first_hand_experience_signals: (c, pages) => {
6316
+ if (c.score >= 10) return [];
6317
+ const impact = impactFromScore(c.score);
6318
+ const effort = effortForCriterion("first_hand_experience_signals", c.score);
6319
+ const affected = getAffectedPages("first_hand_experience_signals", pages);
6320
+ return [{
6321
+ id: "fix-first-hand-experience",
6322
+ criterion: c.criterion_label,
6323
+ criterionId: c.criterion,
6324
+ title: "Add first-hand experience signals",
6325
+ description: "Show real use, testing, implementation, or lived experience instead of relying on generic summary content.",
6326
+ impact,
6327
+ effort,
6328
+ impactScore: 0,
6329
+ category: "content",
6330
+ steps: [
6331
+ "Add specific observations from real use, testing, or implementation",
6332
+ "Document limitations, edge cases, or lessons learned in practice",
6333
+ "Include screenshots, photos, before/after metrics, or original artifacts where relevant",
6334
+ "Rewrite generic sections to reflect direct experience with the subject matter"
6335
+ ],
6336
+ successCriteria: "Key pages contain credible signs of direct use or observation, not just generic advice",
6337
+ affectedPages: affected,
6338
+ pageCount: affected?.length
6339
+ }];
6340
+ },
5947
6341
  original_data: (c, pages) => {
5948
6342
  if (c.score >= 10) return [];
5949
6343
  const impact = impactFromScore(c.score);
@@ -6310,6 +6704,58 @@ Summarization: yes`,
6310
6704
  successCriteria: "Articles have Person schema for authors with credentials"
6311
6705
  }];
6312
6706
  },
6707
+ creator_transparency: (c, pages) => {
6708
+ if (c.score >= 10) return [];
6709
+ const impact = impactFromScore(c.score);
6710
+ const effort = effortForCriterion("creator_transparency", c.score);
6711
+ const affected = getAffectedPages("creator_transparency", pages);
6712
+ return [{
6713
+ id: "fix-creator-transparency",
6714
+ criterion: c.criterion_label,
6715
+ criterionId: c.criterion,
6716
+ title: "Make content creators clearly visible",
6717
+ description: "Add visible bylines, author pages, and reviewer/editor attribution so readers can clearly tell who created the content.",
6718
+ impact,
6719
+ effort,
6720
+ impactScore: 0,
6721
+ category: "trust",
6722
+ steps: [
6723
+ "Add visible bylines to article-like pages where readers expect them",
6724
+ "Link author names to author pages with role, expertise area, and relevant background",
6725
+ "Add reviewer or editor attribution on sensitive or expert content",
6726
+ "Keep visible creator identity consistent with schema markup"
6727
+ ],
6728
+ successCriteria: "Article-like pages have clear visible bylines and linked creator context",
6729
+ affectedPages: affected,
6730
+ pageCount: affected?.length
6731
+ }];
6732
+ },
6733
+ methodology_transparency: (c, pages) => {
6734
+ if (c.score >= 10) return [];
6735
+ const impact = impactFromScore(c.score);
6736
+ const effort = effortForCriterion("methodology_transparency", c.score);
6737
+ const affected = getAffectedPages("methodology_transparency", pages);
6738
+ return [{
6739
+ id: "fix-methodology-transparency",
6740
+ criterion: c.criterion_label,
6741
+ criterionId: c.criterion,
6742
+ title: "Explain how content was tested or reviewed",
6743
+ description: "Add methodology, criteria, testing, review, or update-process details where users would expect them.",
6744
+ impact,
6745
+ effort,
6746
+ impactScore: 0,
6747
+ category: "trust",
6748
+ steps: [
6749
+ 'Add a "How we tested", "Methodology", or review-process section where relevant',
6750
+ "Document criteria, tools used, sample size, timeframe, or update policy",
6751
+ "Disclose AI assistance when a reasonable reader would expect that context",
6752
+ "Support methodology notes with screenshots, tables, or process artifacts when possible"
6753
+ ],
6754
+ successCriteria: "Review, comparison, and research-style pages explain how conclusions were produced",
6755
+ affectedPages: affected,
6756
+ pageCount: affected?.length
6757
+ }];
6758
+ },
6313
6759
  fact_density: (c, pages) => {
6314
6760
  if (c.score >= 10) return [];
6315
6761
  const impact = impactFromScore(c.score);