aeorank 2.3.2 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/browser.js CHANGED
@@ -3,7 +3,7 @@ import {
3
3
  extractAllUrlsFromSitemap,
4
4
  extractInternalLinks,
5
5
  inferCategory
6
- } from "./chunk-PKJIKMLV.js";
6
+ } from "./chunk-OCLAIHX6.js";
7
7
 
8
8
  // src/parked-domain.ts
9
9
  var PARKING_PATHS = ["/lander", "/parking", "/park", "/sedoparking"];
@@ -892,6 +892,15 @@ function checkTableListExtractability(data) {
892
892
  } else {
893
893
  findings.push({ severity: "low", detail: "No definition lists (<dl>) found", fix: "Use <dl>/<dt>/<dd> for term-definition pairs to improve AI extractability" });
894
894
  }
895
+ for (const table of tablesWithHeaders) {
896
+ const headerCells = (table.match(/<th[\s>]/gi) || []).length;
897
+ const dataCells = (table.match(/<td[\s>]/gi) || []).length;
898
+ if (headerCells >= 3 && dataCells >= headerCells * 2) {
899
+ score += 1;
900
+ findings.push({ severity: "info", detail: "Meaningful comparison table found (3+ columns \xD7 3+ rows)" });
901
+ break;
902
+ }
903
+ }
895
904
  return { criterion: "table_list_extractability", criterion_label: "Table & List Extractability", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P2" };
896
905
  }
897
906
  function checkDefinitionPatterns(data) {
@@ -1815,7 +1824,83 @@ function extractRawDataSummary(data) {
1815
1824
  // Full-crawl stats
1816
1825
  crawl_discovered: data.crawlStats?.discovered ?? 0,
1817
1826
  crawl_fetched: data.crawlStats?.fetched ?? 0,
1818
- crawl_skipped: data.crawlStats?.skipped ?? 0
1827
+ crawl_skipped: data.crawlStats?.skipped ?? 0,
1828
+ // V2 criteria fields
1829
+ citation_ready_sentences: (() => {
1830
+ const combinedText = text + " " + (data.blogSample?.map((p) => p.text.replace(/<[^>]*>/g, " ")).join(" ") || "");
1831
+ return (combinedText.match(/\b\w+\s+(is\s+(?:a|an)\s|refers\s+to|defined\s+as)\b/gi) || []).length;
1832
+ })(),
1833
+ answer_first_ratio: (() => {
1834
+ const pages = [html, ...data.blogSample?.map((p) => p.text) || []];
1835
+ let answerFirst = 0;
1836
+ for (const pageHtml of pages) {
1837
+ const bodyMatch = pageHtml.match(/<body[^>]*>([\s\S]*)/i);
1838
+ const bodyHtml = bodyMatch ? bodyMatch[1] : pageHtml;
1839
+ const earlyParas = bodyHtml.match(/<p[^>]*>([\s\S]*?)<\/p>/gi)?.slice(0, 5) || [];
1840
+ for (const p of earlyParas) {
1841
+ const pText = p.replace(/<[^>]*>/g, "").trim();
1842
+ const wc = pText.split(/\s+/).length;
1843
+ if (wc >= 40 && wc <= 80) {
1844
+ answerFirst++;
1845
+ break;
1846
+ }
1847
+ }
1848
+ }
1849
+ return pages.length > 0 ? Math.round(answerFirst / pages.length * 100) : 0;
1850
+ })(),
1851
+ evidence_citations_avg: (() => {
1852
+ const allHtml = html + "\n" + (data.blogSample?.map((p) => p.text).join("\n") || "");
1853
+ const paragraphs = allHtml.match(/<p[^>]*>[\s\S]*?<\/p>/gi) || [];
1854
+ let citations = 0;
1855
+ const domainLower = data.domain.replace(/^www\./, "").toLowerCase();
1856
+ for (const p of paragraphs) {
1857
+ const links = p.match(/<a[^>]*href=["'](https?:\/\/[^"']+)["'][^>]*>/gi) || [];
1858
+ for (const link of links) {
1859
+ const href = link.match(/href=["'](https?:\/\/[^"']+)["']/i);
1860
+ if (href) {
1861
+ try {
1862
+ const ld = new URL(href[1]).hostname.replace(/^www\./, "").toLowerCase();
1863
+ if (ld !== domainLower) citations++;
1864
+ } catch {
1865
+ }
1866
+ }
1867
+ }
1868
+ }
1869
+ const pageCount = Math.max(1, 1 + (data.blogSample?.length ?? 0));
1870
+ return Math.round(citations / pageCount * 10) / 10;
1871
+ })(),
1872
+ entity_disambiguation_ratio: (() => {
1873
+ const pages = [html, ...data.blogSample?.map((p) => p.text) || []];
1874
+ let defined = 0;
1875
+ for (const pageHtml of pages) {
1876
+ const h1Match = pageHtml.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
1877
+ if (!h1Match) continue;
1878
+ const h1Text = h1Match[1].replace(/<[^>]*>/g, "").trim();
1879
+ const h1Words = h1Text.split(/\s+/).filter((w) => w.length > 3);
1880
+ const primaryNoun = h1Words.sort((a, b) => b.length - a.length)[0] || "";
1881
+ if (!primaryNoun) continue;
1882
+ const pageText = pageHtml.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").slice(0, 500);
1883
+ if (new RegExp(`\\b${primaryNoun.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b[^.]*\\b(is|refers|defined|means)\\b`, "i").test(pageText)) {
1884
+ defined++;
1885
+ }
1886
+ }
1887
+ return pages.length > 0 ? Math.round(defined / pages.length * 100) : 0;
1888
+ })(),
1889
+ extraction_friction_avg: (() => {
1890
+ const combinedText = text + " " + (data.blogSample?.map((p) => p.text.replace(/<[^>]*>/g, " ")).join(" ") || "");
1891
+ const sentences = combinedText.split(/[.!?]+/).filter((s) => s.trim().length > 5);
1892
+ if (sentences.length === 0) return 0;
1893
+ const totalWords = sentences.reduce((sum, s) => sum + s.trim().split(/\s+/).length, 0);
1894
+ return Math.round(totalWords / sentences.length * 10) / 10;
1895
+ })(),
1896
+ image_figure_ratio: (() => {
1897
+ const combinedHtml = html + "\n" + (data.blogSample?.map((p) => p.text).join("\n") || "");
1898
+ const allImages = combinedHtml.match(/<img\s[^>]*>/gi) || [];
1899
+ if (allImages.length === 0) return 0;
1900
+ const figureBlocks = combinedHtml.match(/<figure[\s\S]*?<\/figure>/gi) || [];
1901
+ const figuresWithCaption = figureBlocks.filter((f) => /<figcaption/i.test(f));
1902
+ return Math.round(figuresWithCaption.length / allImages.length * 100);
1903
+ })()
1819
1904
  };
1820
1905
  }
1821
1906
  function getPageTopicText(html) {
@@ -2047,6 +2132,432 @@ function checkContentDepth(data, topicCoherenceScore) {
2047
2132
  }
2048
2133
  return { criterion: "content_depth", criterion_label: "Content Depth", score: finalScore, status: finalScore >= 7 ? "pass" : finalScore >= 4 ? "partial" : "fail", findings, fix_priority: finalScore >= 7 ? "P3" : "P1" };
2049
2134
  }
2135
+ function checkCitationReadyWriting(data) {
2136
+ const findings = [];
2137
+ if (!data.homepage) {
2138
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
2139
+ return { criterion: "citation_ready_writing", criterion_label: "Citation-Ready Writing Quality", score: 0, status: "not_found", findings, fix_priority: "P1" };
2140
+ }
2141
+ const combinedHtml = getCombinedHtml(data);
2142
+ const text = combinedHtml.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ");
2143
+ const blogHtml = getBlogHtml(data);
2144
+ const blogText = blogHtml ? blogHtml.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ") : text;
2145
+ const pageCount = Math.max(1, (data.blogSample?.length ?? 0) + 1);
2146
+ let score = 0;
2147
+ const defSentences = blogText.match(/\b\w+\s+(is\s+(?:a|an)\s|refers\s+to|defined\s+as)\b/gi) || [];
2148
+ const avgDef = defSentences.length / pageCount;
2149
+ if (avgDef >= 3) {
2150
+ score += 3;
2151
+ findings.push({ severity: "info", detail: `${defSentences.length} definition sentences found (avg ${avgDef.toFixed(1)}/page)` });
2152
+ } else if (avgDef >= 1) {
2153
+ score += 1;
2154
+ findings.push({ severity: "low", detail: `${defSentences.length} definition sentences found (avg ${avgDef.toFixed(1)}/page)`, fix: "Add more self-contained definition sentences that AI can quote directly" });
2155
+ } else {
2156
+ findings.push({ severity: "medium", detail: "No definition sentences found", fix: 'Write clear "X is a..." and "X refers to..." sentences for key concepts' });
2157
+ }
2158
+ const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 10);
2159
+ const oneClaimSentences = sentences.filter((s) => {
2160
+ const words = s.trim().split(/\s+/);
2161
+ return words.length < 30 && !/,\s*(and|but|or|which|that|because|although|while)\s/i.test(s);
2162
+ });
2163
+ const avgOneClaim = oneClaimSentences.length / pageCount;
2164
+ if (avgOneClaim >= 5) {
2165
+ score += 3;
2166
+ findings.push({ severity: "info", detail: `Strong single-claim sentence density (avg ${avgOneClaim.toFixed(1)}/page)` });
2167
+ } else if (avgOneClaim >= 2) {
2168
+ score += 1;
2169
+ findings.push({ severity: "low", detail: `Moderate single-claim sentence density`, fix: "Write more concise, single-claim sentences that AI engines can quote" });
2170
+ }
2171
+ const qHeadings = extractQuestionHeadings(combinedHtml);
2172
+ if (qHeadings.length > 0) {
2173
+ let selfContained = 0;
2174
+ for (const qh of qHeadings) {
2175
+ const escaped = qh.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
2176
+ const pat = new RegExp(escaped + "[\\s\\S]{0,200}?<\\/h[23]>\\s*<p[^>]*>([^<]{0,200})", "i");
2177
+ const m = pat.exec(combinedHtml);
2178
+ if (m && m[1] && !/^\s*(this|that|these|those|it)\s/i.test(m[1].trim())) {
2179
+ selfContained++;
2180
+ }
2181
+ }
2182
+ const selfContainedRatio = selfContained / qHeadings.length;
2183
+ if (selfContainedRatio >= 0.8) {
2184
+ score += 2;
2185
+ findings.push({ severity: "info", detail: `${Math.round(selfContainedRatio * 100)}% of answer openings are self-contained (no dangling pronouns)` });
2186
+ } else {
2187
+ findings.push({ severity: "low", detail: `Only ${Math.round(selfContainedRatio * 100)}% of answer openings are self-contained`, fix: 'Start answer paragraphs with the entity name, not "This" or "That"' });
2188
+ }
2189
+ }
2190
+ const quotableLines = sentences.filter(
2191
+ (s) => /\d+(\.\d+)?%|\$[\d,.]+|\b\d{1,3}(,\d{3})+\b/.test(s) && /\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b/.test(s)
2192
+ );
2193
+ const avgQuotable = quotableLines.length / pageCount;
2194
+ if (avgQuotable >= 2) {
2195
+ score += 2;
2196
+ findings.push({ severity: "info", detail: `${quotableLines.length} quotation-ready lines with stats and sources` });
2197
+ } else if (avgQuotable >= 1) {
2198
+ score += 1;
2199
+ findings.push({ severity: "low", detail: `${quotableLines.length} quotation-ready lines found`, fix: "Combine statistics with named sources for more citable content" });
2200
+ } else {
2201
+ findings.push({ severity: "medium", detail: "No quotation-ready lines found", fix: 'Write sentences that combine a statistic with a named source, e.g. "According to Harvard, 85% of..."' });
2202
+ }
2203
+ return { criterion: "citation_ready_writing", criterion_label: "Citation-Ready Writing Quality", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P1" };
2204
+ }
2205
+ function checkAnswerFirstPlacement(data) {
2206
+ const findings = [];
2207
+ if (!data.homepage) {
2208
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
2209
+ return { criterion: "answer_first_placement", criterion_label: "Answer-First Placement", score: 0, status: "not_found", findings, fix_priority: "P1" };
2210
+ }
2211
+ const pages = [stripScripts(data.homepage.text)];
2212
+ if (data.blogSample) {
2213
+ for (const p of data.blogSample) {
2214
+ pages.push(stripScripts(p.text));
2215
+ }
2216
+ }
2217
+ let score = 0;
2218
+ let shortAnswerCount = 0;
2219
+ let answerFirstCount = 0;
2220
+ let entityFirstCount = 0;
2221
+ const throatClearing = /^(In today|Welcome to|Have you ever|If you're looking|In this (article|post|guide))/i;
2222
+ for (const pageHtml of pages) {
2223
+ const bodyMatch = pageHtml.match(/<body[^>]*>([\s\S]*)/i);
2224
+ const bodyHtml = bodyMatch ? bodyMatch[1] : pageHtml;
2225
+ const bodyText = bodyHtml.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
2226
+ const first300Words = bodyText.split(/\s+/).slice(0, 300).join(" ");
2227
+ const earlyParagraphs = bodyHtml.match(/<p[^>]*>([\s\S]*?)<\/p>/gi)?.slice(0, 5) || [];
2228
+ for (const p of earlyParagraphs) {
2229
+ const pText = p.replace(/<[^>]*>/g, "").trim();
2230
+ const wordCount = pText.split(/\s+/).length;
2231
+ if (wordCount >= 40 && wordCount <= 80 && first300Words.includes(pText.slice(0, 50))) {
2232
+ shortAnswerCount++;
2233
+ break;
2234
+ }
2235
+ }
2236
+ const firstPara = bodyHtml.match(/<p[^>]*>([\s\S]*?)<\/p>/i);
2237
+ if (firstPara) {
2238
+ const firstParaText = firstPara[1].replace(/<[^>]*>/g, "").trim();
2239
+ if (!throatClearing.test(firstParaText)) {
2240
+ answerFirstCount++;
2241
+ }
2242
+ }
2243
+ const h1Match = pageHtml.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
2244
+ if (h1Match) {
2245
+ const h1Text = h1Match[1].replace(/<[^>]*>/g, "").trim();
2246
+ const h1Words = h1Text.split(/\s+/).filter((w) => w.length > 3 && /^[A-Z]/.test(w) || w.length > 5);
2247
+ const firstSentence = bodyText.split(/[.!?]/)[0] || "";
2248
+ if (h1Words.some((w) => firstSentence.toLowerCase().includes(w.toLowerCase()))) {
2249
+ entityFirstCount++;
2250
+ }
2251
+ }
2252
+ }
2253
+ const totalPages = pages.length;
2254
+ const shortAnswerRatio = shortAnswerCount / totalPages;
2255
+ const answerFirstRatio = answerFirstCount / totalPages;
2256
+ const entityFirstRatio = entityFirstCount / totalPages;
2257
+ if (shortAnswerRatio >= 0.7) {
2258
+ score += 4;
2259
+ findings.push({ severity: "info", detail: `${Math.round(shortAnswerRatio * 100)}% of pages have a short answer block (40-80 words) early` });
2260
+ } else if (shortAnswerRatio >= 0.3) {
2261
+ score += 2;
2262
+ findings.push({ severity: "low", detail: `${Math.round(shortAnswerRatio * 100)}% of pages have an early answer block`, fix: "Add a concise 40-80 word answer paragraph in the first 300 words of each page" });
2263
+ } else {
2264
+ findings.push({ severity: "medium", detail: "Few pages have an early answer block", fix: "Place a 40-80 word summary paragraph near the top of each page for AI snippet extraction" });
2265
+ }
2266
+ if (answerFirstRatio >= 0.7) {
2267
+ score += 3;
2268
+ findings.push({ severity: "info", detail: `${Math.round(answerFirstRatio * 100)}% of pages open with a direct answer (no throat-clearing)` });
2269
+ } else if (answerFirstRatio >= 0.4) {
2270
+ score += 1;
2271
+ findings.push({ severity: "low", detail: `${Math.round(answerFirstRatio * 100)}% of pages open with a direct answer`, fix: `Avoid openers like "In today's world..." - start with the answer` });
2272
+ } else {
2273
+ findings.push({ severity: "medium", detail: "Most pages use throat-clearing openers instead of direct answers", fix: `Replace "In this article..." with a direct answer to the page's main question` });
2274
+ }
2275
+ if (entityFirstRatio >= 0.8) {
2276
+ score += 3;
2277
+ findings.push({ severity: "info", detail: `${Math.round(entityFirstRatio * 100)}% of pages mention the primary entity in the first sentence` });
2278
+ } else if (entityFirstRatio >= 0.5) {
2279
+ score += 1;
2280
+ findings.push({ severity: "low", detail: `${Math.round(entityFirstRatio * 100)}% mention the primary entity in the first sentence`, fix: "Include the H1 topic in the first body sentence for AI extraction" });
2281
+ } else {
2282
+ findings.push({ severity: "low", detail: "Few pages mention the primary entity in the opening sentence", fix: "Start content with a sentence that names the page topic explicitly" });
2283
+ }
2284
+ return { criterion: "answer_first_placement", criterion_label: "Answer-First Placement", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P1" };
2285
+ }
2286
+ function checkEvidencePackaging(data) {
2287
+ const findings = [];
2288
+ if (!data.homepage) {
2289
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
2290
+ return { criterion: "evidence_packaging", criterion_label: "Evidence Packaging", score: 0, status: "not_found", findings, fix_priority: "P1" };
2291
+ }
2292
+ const combinedHtml = getCombinedHtml(data);
2293
+ const text = combinedHtml.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ");
2294
+ const pageCount = Math.max(1, (data.blogSample?.length ?? 0) + 1);
2295
+ let score = 0;
2296
+ const paragraphs = combinedHtml.match(/<p[^>]*>[\s\S]*?<\/p>/gi) || [];
2297
+ let inlineCitations = 0;
2298
+ const domain = data.domain.replace(/^www\./, "").toLowerCase();
2299
+ for (const p of paragraphs) {
2300
+ const links = p.match(/<a[^>]*href=["'](https?:\/\/[^"']+)["'][^>]*>/gi) || [];
2301
+ for (const link of links) {
2302
+ const href = link.match(/href=["'](https?:\/\/[^"']+)["']/i);
2303
+ if (href) {
2304
+ try {
2305
+ const linkDomain = new URL(href[1]).hostname.replace(/^www\./, "").toLowerCase();
2306
+ if (linkDomain !== domain) inlineCitations++;
2307
+ } catch {
2308
+ }
2309
+ }
2310
+ }
2311
+ }
2312
+ const avgCitations = inlineCitations / pageCount;
2313
+ if (avgCitations >= 3) {
2314
+ score += 3;
2315
+ findings.push({ severity: "info", detail: `${inlineCitations} inline citations to external sources (avg ${avgCitations.toFixed(1)}/page)` });
2316
+ } else if (avgCitations >= 1) {
2317
+ score += 1;
2318
+ findings.push({ severity: "low", detail: `${inlineCitations} inline citations found`, fix: "Add more inline links to authoritative external sources within paragraphs" });
2319
+ } else {
2320
+ findings.push({ severity: "medium", detail: "No inline citations to external sources", fix: "Link to authoritative sources within your content paragraphs to strengthen evidence" });
2321
+ }
2322
+ const sourcesHeading = combinedHtml.match(/<h[2-4][^>]*>[^<]*(source|reference|citation|bibliography)[^<]*<\/h[2-4]>/gi) || [];
2323
+ const sourceSections = sourcesHeading.length;
2324
+ if (sourceSections > 0) {
2325
+ const ratio = sourceSections / pageCount;
2326
+ if (ratio >= 0.5) {
2327
+ score += 2;
2328
+ findings.push({ severity: "info", detail: `Sources/References section found on ${sourceSections} page(s)` });
2329
+ } else {
2330
+ score += 1;
2331
+ findings.push({ severity: "low", detail: `Sources section found on ${sourceSections} page(s)`, fix: "Add a Sources or References section to more content pages" });
2332
+ }
2333
+ } else {
2334
+ findings.push({ severity: "low", detail: "No Sources/References section found", fix: 'Add a "Sources" heading at the end of key articles' });
2335
+ }
2336
+ const attributionPhrases = text.match(/\baccording\s+to\s+[A-Z]|\b[A-Z][a-z]+\s+(found|says|reports|notes|states|estimates|suggests)\b/gi) || [];
2337
+ const avgAttribution = attributionPhrases.length / pageCount;
2338
+ if (avgAttribution >= 3) {
2339
+ score += 3;
2340
+ findings.push({ severity: "info", detail: `${attributionPhrases.length} attribution phrases found (avg ${avgAttribution.toFixed(1)}/page)` });
2341
+ } else if (avgAttribution >= 1) {
2342
+ score += 1;
2343
+ findings.push({ severity: "low", detail: `${attributionPhrases.length} attribution phrases found`, fix: 'Add more "According to [Source]..." attribution for credibility' });
2344
+ } else {
2345
+ findings.push({ severity: "medium", detail: "No attribution phrases found", fix: 'Use "According to [Name]..." and "[Expert] reports..." to attribute claims' });
2346
+ }
2347
+ const sourcedStats = text.match(/\d+(\.\d+)?(%|\s*(million|billion|thousand|percent))\b[^.]*\b[A-Z][a-z]+\b/gi) || [];
2348
+ const avgSourcedStats = sourcedStats.length / pageCount;
2349
+ if (avgSourcedStats >= 2) {
2350
+ score += 2;
2351
+ findings.push({ severity: "info", detail: `${sourcedStats.length} sourced statistics found` });
2352
+ } else if (avgSourcedStats >= 1) {
2353
+ score += 1;
2354
+ findings.push({ severity: "low", detail: `${sourcedStats.length} sourced statistics found`, fix: "Pair more statistics with named sources" });
2355
+ }
2356
+ return { criterion: "evidence_packaging", criterion_label: "Evidence Packaging", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P1" };
2357
+ }
2358
+ function checkEntityDisambiguation(data) {
2359
+ const findings = [];
2360
+ if (!data.homepage) {
2361
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
2362
+ return { criterion: "entity_disambiguation", criterion_label: "Entity Disambiguation", score: 0, status: "not_found", findings, fix_priority: "P2" };
2363
+ }
2364
+ const pages = [stripScripts(data.homepage.text)];
2365
+ if (data.blogSample) {
2366
+ for (const p of data.blogSample) {
2367
+ pages.push(stripScripts(p.text));
2368
+ }
2369
+ }
2370
+ let score = 0;
2371
+ let definedEarlyCount = 0;
2372
+ let consistentCount = 0;
2373
+ let disambiguationCount = 0;
2374
+ for (const pageHtml of pages) {
2375
+ const text = pageHtml.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ");
2376
+ const h1Match = pageHtml.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
2377
+ if (!h1Match) continue;
2378
+ const h1Text = h1Match[1].replace(/<[^>]*>/g, "").trim();
2379
+ const h1Words = h1Text.split(/\s+/).filter((w) => w.length > 3);
2380
+ const primaryNoun = h1Words.sort((a, b) => b.length - a.length)[0] || "";
2381
+ if (!primaryNoun) continue;
2382
+ const first500 = text.slice(0, 500);
2383
+ if (new RegExp(`\\b${primaryNoun.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b[^.]*\\b(is|refers|defined|means)\\b`, "i").test(first500)) {
2384
+ definedEarlyCount++;
2385
+ }
2386
+ const primaryNounLower = primaryNoun.toLowerCase();
2387
+ const primaryRegex = new RegExp(`\\b${primaryNounLower.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`, "gi");
2388
+ const mentions = text.match(primaryRegex) || [];
2389
+ if (mentions.length >= 3) {
2390
+ consistentCount++;
2391
+ }
2392
+ if (/\bunlike\s+\w/i.test(text) || /\bcompared\s+to\s+\w/i.test(text) || /\bnot\s+to\s+be\s+confused\s+with\b/i.test(text) || /\bas\s+opposed\s+to\b/i.test(text)) {
2393
+ disambiguationCount++;
2394
+ }
2395
+ }
2396
+ const totalPages = pages.length;
2397
+ const definedRatio = definedEarlyCount / totalPages;
2398
+ if (definedRatio >= 0.7) {
2399
+ score += 4;
2400
+ findings.push({ severity: "info", detail: `${Math.round(definedRatio * 100)}% of pages define the primary entity early` });
2401
+ } else if (definedRatio >= 0.3) {
2402
+ score += 2;
2403
+ findings.push({ severity: "low", detail: `${Math.round(definedRatio * 100)}% of pages define the primary entity early`, fix: "Define the main topic within the first 500 characters of each page" });
2404
+ } else {
2405
+ findings.push({ severity: "medium", detail: "Few pages define the primary entity early", fix: "Start each page with a clear definition of the main topic" });
2406
+ }
2407
+ const consistentRatio = consistentCount / totalPages;
2408
+ if (consistentRatio >= 0.7) {
2409
+ score += 3;
2410
+ findings.push({ severity: "info", detail: `${Math.round(consistentRatio * 100)}% of pages use consistent entity terminology` });
2411
+ } else if (consistentRatio >= 0.3) {
2412
+ score += 1;
2413
+ findings.push({ severity: "low", detail: `${Math.round(consistentRatio * 100)}% of pages use consistent terminology`, fix: "Use the same primary term consistently instead of varying synonyms" });
2414
+ } else {
2415
+ findings.push({ severity: "low", detail: "Low entity terminology consistency across pages", fix: "Repeat the primary entity term consistently throughout each page" });
2416
+ }
2417
+ const disambigRatio = disambiguationCount / totalPages;
2418
+ if (disambigRatio >= 0.3) {
2419
+ score += 3;
2420
+ findings.push({ severity: "info", detail: `${disambiguationCount} page(s) use disambiguation signals ("unlike X", "compared to")` });
2421
+ } else if (disambiguationCount > 0) {
2422
+ score += 1;
2423
+ findings.push({ severity: "low", detail: `${disambiguationCount} page(s) use disambiguation signals`, fix: 'Add "unlike X" or "compared to Y" to clarify entity boundaries for AI' });
2424
+ } else {
2425
+ findings.push({ severity: "low", detail: "No disambiguation signals found", fix: 'Add "unlike X" or "not to be confused with Y" to help AI engines distinguish your topics' });
2426
+ }
2427
+ return { criterion: "entity_disambiguation", criterion_label: "Entity Disambiguation", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P2" };
2428
+ }
2429
+ function checkExtractionFriction(data) {
2430
+ const findings = [];
2431
+ if (!data.homepage) {
2432
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
2433
+ return { criterion: "extraction_friction", criterion_label: "Extraction Friction Score", score: 0, status: "not_found", findings, fix_priority: "P2" };
2434
+ }
2435
+ const combinedHtml = getCombinedHtml(data);
2436
+ const text = combinedHtml.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ");
2437
+ let score = 0;
2438
+ const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 5);
2439
+ const sentenceLengths = sentences.map((s) => s.trim().split(/\s+/).length);
2440
+ const avgSentenceLen = sentenceLengths.length > 0 ? sentenceLengths.reduce((a, b) => a + b, 0) / sentenceLengths.length : 0;
2441
+ if (avgSentenceLen > 0 && avgSentenceLen < 20) {
2442
+ score += 3;
2443
+ findings.push({ severity: "info", detail: `Average sentence length: ${avgSentenceLen.toFixed(1)} words - excellent for AI extraction` });
2444
+ } else if (avgSentenceLen >= 20 && avgSentenceLen <= 25) {
2445
+ score += 2;
2446
+ findings.push({ severity: "info", detail: `Average sentence length: ${avgSentenceLen.toFixed(1)} words - good` });
2447
+ } else if (avgSentenceLen > 25 && avgSentenceLen <= 35) {
2448
+ score += 1;
2449
+ findings.push({ severity: "low", detail: `Average sentence length: ${avgSentenceLen.toFixed(1)} words - slightly long`, fix: "Shorten sentences to under 25 words for easier AI extraction" });
2450
+ } else if (avgSentenceLen > 35) {
2451
+ findings.push({ severity: "medium", detail: `Average sentence length: ${avgSentenceLen.toFixed(1)} words - too long for AI snippets`, fix: "Break long sentences into shorter, single-claim statements" });
2452
+ }
2453
+ const pages = [stripScripts(data.homepage.text)];
2454
+ if (data.blogSample) {
2455
+ for (const p of data.blogSample) pages.push(stripScripts(p.text));
2456
+ }
2457
+ let voiceFriendlyCount = 0;
2458
+ for (const pageHtml of pages) {
2459
+ const h1Match = pageHtml.match(/<h1[^>]*>[\s\S]*?<\/h1>/i);
2460
+ if (!h1Match) continue;
2461
+ const afterH1 = pageHtml.slice(pageHtml.indexOf(h1Match[0]) + h1Match[0].length);
2462
+ const leadParas = afterH1.match(/<p[^>]*>([\s\S]*?)<\/p>/gi)?.slice(0, 3) || [];
2463
+ const leadText = leadParas.map((p) => p.replace(/<[^>]*>/g, "")).join(" ").trim();
2464
+ const words = leadText.split(/\s+/).length;
2465
+ const hasParentheticals = /\([^)]+\)/.test(leadText);
2466
+ if (words <= 75 && words > 10 && !hasParentheticals) {
2467
+ voiceFriendlyCount++;
2468
+ }
2469
+ }
2470
+ const voiceFriendlyRatio = voiceFriendlyCount / pages.length;
2471
+ if (voiceFriendlyRatio >= 0.7) {
2472
+ score += 3;
2473
+ findings.push({ severity: "info", detail: `${Math.round(voiceFriendlyRatio * 100)}% of pages have voice-friendly lead paragraphs` });
2474
+ } else if (voiceFriendlyRatio >= 0.3) {
2475
+ score += 1;
2476
+ findings.push({ severity: "low", detail: `${Math.round(voiceFriendlyRatio * 100)}% have voice-friendly leads`, fix: "Keep the first 2-3 sentences under 75 words total, using active voice with no parentheticals" });
2477
+ } else {
2478
+ findings.push({ severity: "low", detail: "Few pages have voice-friendly lead paragraphs", fix: "Write concise opening paragraphs without parentheticals for voice assistant compatibility" });
2479
+ }
2480
+ const first500Words = text.split(/\s+/).slice(0, 500).join(" ");
2481
+ const longWords = first500Words.match(/\b[a-z]{10,}\b/gi) || [];
2482
+ const avgJargon = longWords.length / Math.max(1, pages.length);
2483
+ if (avgJargon <= 2) {
2484
+ score += 2;
2485
+ findings.push({ severity: "info", detail: "Low jargon density in opening content" });
2486
+ } else if (avgJargon <= 5) {
2487
+ score += 1;
2488
+ findings.push({ severity: "low", detail: "Moderate jargon in opening content", fix: "Define technical terms inline when first used" });
2489
+ } else {
2490
+ findings.push({ severity: "low", detail: "High jargon density in opening content", fix: "Replace or define complex terms to reduce extraction friction" });
2491
+ }
2492
+ const hasHiddenContent = /display\s*:\s*none|visibility\s*:\s*hidden|\bhidden\b/i.test(combinedHtml) && /<(div|section|p)[^>]*(?:style=["'][^"']*display\s*:\s*none|hidden)[^>]*>/i.test(combinedHtml);
2493
+ if (hasHiddenContent) {
2494
+ score = Math.max(0, score - 2);
2495
+ findings.push({ severity: "medium", detail: "Hidden content detected (display:none or hidden attribute)", fix: "Make all content visible - hidden content is not extractable by AI engines" });
2496
+ }
2497
+ return { criterion: "extraction_friction", criterion_label: "Extraction Friction Score", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P2" };
2498
+ }
2499
+ function checkImageContextAI(data) {
2500
+ const findings = [];
2501
+ if (!data.homepage) {
2502
+ findings.push({ severity: "critical", detail: "Could not fetch homepage" });
2503
+ return { criterion: "image_context_ai", criterion_label: "Image Context for AI", score: 0, status: "not_found", findings, fix_priority: "P2" };
2504
+ }
2505
+ const combinedHtml = getCombinedHtml(data);
2506
+ let score = 0;
2507
+ const allImages = combinedHtml.match(/<img\s[^>]*>/gi) || [];
2508
+ if (allImages.length === 0) {
2509
+ findings.push({ severity: "low", detail: "No images found on pages" });
2510
+ return { criterion: "image_context_ai", criterion_label: "Image Context for AI", score: 5, status: "partial", findings, fix_priority: "P3" };
2511
+ }
2512
+ const figureBlocks = combinedHtml.match(/<figure[\s\S]*?<\/figure>/gi) || [];
2513
+ const figuresWithCaption = figureBlocks.filter((f) => /<figcaption/i.test(f));
2514
+ const figureRatio = figuresWithCaption.length / allImages.length;
2515
+ if (figureRatio >= 0.5) {
2516
+ score += 4;
2517
+ findings.push({ severity: "info", detail: `${figuresWithCaption.length}/${allImages.length} images use <figure> + <figcaption>` });
2518
+ } else if (figureRatio >= 0.25) {
2519
+ score += 2;
2520
+ findings.push({ severity: "low", detail: `${figuresWithCaption.length}/${allImages.length} images use <figure> + <figcaption>`, fix: "Wrap more images in <figure> with <figcaption> for AI context" });
2521
+ } else {
2522
+ findings.push({ severity: "medium", detail: "Few or no images use <figure> + <figcaption>", fix: "Wrap images in <figure> elements with descriptive <figcaption> tags" });
2523
+ }
2524
+ let goodAltCount = 0;
2525
+ for (const img of allImages) {
2526
+ const altMatch = img.match(/\salt=["']([^"']+)["']/i);
2527
+ if (altMatch) {
2528
+ const altText = altMatch[1].trim();
2529
+ const words = altText.split(/\s+/).length;
2530
+ const isGeneric = /^(image|photo|picture|img|icon|logo|banner|screenshot)$/i.test(altText);
2531
+ if (words > 5 && !isGeneric) {
2532
+ goodAltCount++;
2533
+ }
2534
+ }
2535
+ }
2536
+ const altQualityRatio = goodAltCount / allImages.length;
2537
+ if (altQualityRatio >= 0.5) {
2538
+ score += 3;
2539
+ findings.push({ severity: "info", detail: `${goodAltCount}/${allImages.length} images have descriptive alt text (>5 words)` });
2540
+ } else if (altQualityRatio >= 0.25) {
2541
+ score += 1;
2542
+ findings.push({ severity: "low", detail: `${goodAltCount}/${allImages.length} images have quality alt text`, fix: "Write descriptive alt text (>5 words) that explains the image content" });
2543
+ } else {
2544
+ findings.push({ severity: "medium", detail: "Most images lack descriptive alt text", fix: 'Add descriptive alt text (>5 words) to all images, avoiding generic terms like "image" or "photo"' });
2545
+ }
2546
+ const contextualImages = combinedHtml.match(/<(article|section)[^>]*>[\s\S]*?<img[^>]*>[\s\S]*?<\/\1>/gi) || [];
2547
+ if (contextualImages.length > 0) {
2548
+ const contextRatio = Math.min(contextualImages.length, allImages.length) / allImages.length;
2549
+ if (contextRatio >= 0.5) {
2550
+ score += 3;
2551
+ findings.push({ severity: "info", detail: "Images are well-placed within semantic content sections" });
2552
+ } else {
2553
+ score += 1;
2554
+ findings.push({ severity: "low", detail: "Some images placed within content sections", fix: "Place images within <article> or <section> elements for better AI context" });
2555
+ }
2556
+ } else {
2557
+ findings.push({ severity: "low", detail: "Images not placed within semantic content sections", fix: "Move images inside <article> or <section> elements" });
2558
+ }
2559
+ return { criterion: "image_context_ai", criterion_label: "Image Context for AI", score: Math.min(10, score), status: score >= 7 ? "pass" : score >= 4 ? "partial" : "fail", findings, fix_priority: score >= 7 ? "P3" : "P2" };
2560
+ }
2050
2561
  function auditSiteFromData(data) {
2051
2562
  const topicCoherence = checkTopicCoherence(data);
2052
2563
  const cannibalization = checkContentCannibalization(data, topicCoherence.score);
@@ -2078,7 +2589,14 @@ function auditSiteFromData(data) {
2078
2589
  cannibalization,
2079
2590
  checkVisibleDateSignal(data),
2080
2591
  topicCoherence,
2081
- checkContentDepth(data, topicCoherence.score)
2592
+ checkContentDepth(data, topicCoherence.score),
2593
+ // V2 criteria (#29-#34)
2594
+ checkCitationReadyWriting(data),
2595
+ checkAnswerFirstPlacement(data),
2596
+ checkEvidencePackaging(data),
2597
+ checkEntityDisambiguation(data),
2598
+ checkExtractionFriction(data),
2599
+ checkImageContextAI(data)
2082
2600
  ];
2083
2601
  }
2084
2602
  async function auditSite(targetUrl) {
@@ -2141,7 +2659,21 @@ var WEIGHTS = {
2141
2659
  canonical_url: 0.01,
2142
2660
  rss_feed: 0.01,
2143
2661
  schema_coverage: 0.01,
2144
- speakable_schema: 0.01
2662
+ speakable_schema: 0.01,
2663
+ // ─── V2 Criteria (~15%) ───────────────────────────────────────────────────
2664
+ // Citation quality, evidence packaging, and extraction friction.
2665
+ citation_ready_writing: 0.04,
2666
+ // Self-contained quotable sentences
2667
+ answer_first_placement: 0.03,
2668
+ // Answer block in first 300 words
2669
+ evidence_packaging: 0.03,
2670
+ // Inline citations and attribution
2671
+ entity_disambiguation: 0.02,
2672
+ // Clear entity boundaries
2673
+ extraction_friction: 0.02,
2674
+ // Sentence length, voice, jargon
2675
+ image_context_ai: 0.01
2676
+ // Figure/figcaption, alt text quality
2145
2677
  };
2146
2678
  function calculateOverallScore(criteria) {
2147
2679
  let totalWeight = 0;
@@ -2161,6 +2693,248 @@ function calculateOverallScore(criteria) {
2161
2693
  return score;
2162
2694
  }
2163
2695
 
2696
+ // src/pillars.ts
2697
+ var PILLARS = {
2698
+ "Answer Readiness": [
2699
+ "topic_coherence",
2700
+ "original_data",
2701
+ "content_depth",
2702
+ "fact_density",
2703
+ "citation_ready_writing",
2704
+ "answer_first_placement",
2705
+ "evidence_packaging"
2706
+ ],
2707
+ "Content Structure": [
2708
+ "direct_answer_density",
2709
+ "qa_content_format",
2710
+ "query_answer_alignment",
2711
+ "faq_section",
2712
+ "table_list_extractability",
2713
+ "definition_patterns",
2714
+ "entity_disambiguation"
2715
+ ],
2716
+ "Trust & Authority": [
2717
+ "entity_consistency",
2718
+ "internal_linking",
2719
+ "content_freshness",
2720
+ "author_schema_depth",
2721
+ "schema_markup"
2722
+ ],
2723
+ "Technical Foundation": [
2724
+ "semantic_html",
2725
+ "clean_html",
2726
+ "visible_date_signal",
2727
+ "extraction_friction",
2728
+ "image_context_ai",
2729
+ "schema_coverage",
2730
+ "speakable_schema"
2731
+ ],
2732
+ "AI Discovery": [
2733
+ "content_cannibalization",
2734
+ "llms_txt",
2735
+ "robots_txt",
2736
+ "content_velocity",
2737
+ "content_licensing",
2738
+ "canonical_url",
2739
+ "sitemap_completeness",
2740
+ "rss_feed"
2741
+ ]
2742
+ };
2743
+ var CLIENT_NAMES = {
2744
+ topic_coherence: "Topical Authority",
2745
+ original_data: "Original Research & Data",
2746
+ content_depth: "Content Depth",
2747
+ fact_density: "Fact & Data Density",
2748
+ citation_ready_writing: "Citation-Ready Writing",
2749
+ answer_first_placement: "Answer-First Placement",
2750
+ evidence_packaging: "Evidence Packaging",
2751
+ direct_answer_density: "Direct Answer Density",
2752
+ qa_content_format: "Q&A Content Format",
2753
+ query_answer_alignment: "Query-Answer Alignment",
2754
+ faq_section: "FAQ Section",
2755
+ table_list_extractability: "Tables & Lists",
2756
+ definition_patterns: "Definition Patterns",
2757
+ entity_disambiguation: "Entity Disambiguation",
2758
+ entity_consistency: "Entity & Brand Authority",
2759
+ internal_linking: "Internal Linking",
2760
+ content_freshness: "Content Freshness",
2761
+ author_schema_depth: "Author & Expert Schema",
2762
+ schema_markup: "Schema Markup",
2763
+ semantic_html: "Semantic HTML",
2764
+ clean_html: "Clean HTML",
2765
+ visible_date_signal: "Visible Date Signal",
2766
+ extraction_friction: "Extraction Friction",
2767
+ image_context_ai: "Image Context for AI",
2768
+ schema_coverage: "Schema Coverage",
2769
+ speakable_schema: "Speakable Schema",
2770
+ content_cannibalization: "Content Cannibalization",
2771
+ llms_txt: "llms.txt File",
2772
+ robots_txt: "robots.txt for AI",
2773
+ content_velocity: "Publishing Velocity",
2774
+ content_licensing: "Content Licensing",
2775
+ canonical_url: "Canonical URLs",
2776
+ sitemap_completeness: "Sitemap Completeness",
2777
+ rss_feed: "RSS/Atom Feed"
2778
+ };
2779
+ var PILLAR_WEIGHTS = {
2780
+ topic_coherence: 0.14,
2781
+ original_data: 0.1,
2782
+ content_depth: 0.07,
2783
+ fact_density: 0.06,
2784
+ citation_ready_writing: 0.04,
2785
+ answer_first_placement: 0.03,
2786
+ evidence_packaging: 0.03,
2787
+ direct_answer_density: 0.05,
2788
+ qa_content_format: 0.05,
2789
+ query_answer_alignment: 0.05,
2790
+ faq_section: 0.04,
2791
+ table_list_extractability: 0.03,
2792
+ definition_patterns: 0.02,
2793
+ entity_disambiguation: 0.02,
2794
+ entity_consistency: 0.05,
2795
+ internal_linking: 0.04,
2796
+ content_freshness: 0.04,
2797
+ author_schema_depth: 0.03,
2798
+ schema_markup: 0.03,
2799
+ semantic_html: 0.02,
2800
+ clean_html: 0.02,
2801
+ visible_date_signal: 0.02,
2802
+ extraction_friction: 0.02,
2803
+ image_context_ai: 0.01,
2804
+ schema_coverage: 0.01,
2805
+ speakable_schema: 0.01,
2806
+ content_cannibalization: 0.02,
2807
+ llms_txt: 0.02,
2808
+ robots_txt: 0.02,
2809
+ content_velocity: 0.02,
2810
+ content_licensing: 0.02,
2811
+ canonical_url: 0.01,
2812
+ sitemap_completeness: 0.01,
2813
+ rss_feed: 0.01
2814
+ };
2815
+ var CRITERION_EFFORT = {
2816
+ topic_coherence: "High",
2817
+ original_data: "High",
2818
+ content_depth: "Medium",
2819
+ fact_density: "Medium",
2820
+ citation_ready_writing: "Medium",
2821
+ answer_first_placement: "Medium",
2822
+ evidence_packaging: "Medium",
2823
+ direct_answer_density: "Medium",
2824
+ qa_content_format: "Medium",
2825
+ query_answer_alignment: "Medium",
2826
+ faq_section: "Medium",
2827
+ table_list_extractability: "Medium",
2828
+ definition_patterns: "Low",
2829
+ entity_disambiguation: "Medium",
2830
+ entity_consistency: "Low",
2831
+ internal_linking: "Medium",
2832
+ content_freshness: "Low",
2833
+ author_schema_depth: "Low",
2834
+ schema_markup: "Medium",
2835
+ semantic_html: "Low",
2836
+ clean_html: "Medium",
2837
+ visible_date_signal: "Low",
2838
+ extraction_friction: "Medium",
2839
+ image_context_ai: "Low",
2840
+ schema_coverage: "Medium",
2841
+ speakable_schema: "Low",
2842
+ content_cannibalization: "Medium",
2843
+ llms_txt: "Low",
2844
+ robots_txt: "Low",
2845
+ content_velocity: "High",
2846
+ content_licensing: "Low",
2847
+ canonical_url: "Low",
2848
+ sitemap_completeness: "Low",
2849
+ rss_feed: "Low"
2850
+ };
2851
+ var FIX_DESCRIPTIONS = {
2852
+ topic_coherence: "Focus blog content on 2-3 core expertise areas instead of scattered topics.",
2853
+ original_data: "Publish original research, case studies, or proprietary data that AI engines can cite.",
2854
+ content_depth: "Expand articles to 1000+ words with structured H2/H3 sections and expert analysis.",
2855
+ fact_density: "Add specific numbers, percentages, and statistics throughout content.",
2856
+ citation_ready_writing: "Write self-contained definition sentences and one-claim statements AI can quote directly.",
2857
+ answer_first_placement: "Place a 40-80 word answer block in the first 300 words of each page.",
2858
+ evidence_packaging: "Add inline citations, attribution phrases, and a sources section to key pages.",
2859
+ direct_answer_density: "Write concise 2-3 sentence answer paragraphs after each question heading.",
2860
+ qa_content_format: "Add question-based H2/H3 headings matching common AI queries.",
2861
+ query_answer_alignment: "Ensure every question heading is followed by a direct answer paragraph.",
2862
+ faq_section: "Create a dedicated FAQ page with FAQPage schema markup.",
2863
+ table_list_extractability: "Add comparison tables with headers and structured lists.",
2864
+ definition_patterns: 'Include clear "X is defined as..." patterns for key terms.',
2865
+ entity_disambiguation: "Define the primary entity early and use consistent terminology.",
2866
+ entity_consistency: "Add Organization schema with consistent name, address, and sameAs links.",
2867
+ internal_linking: "Strengthen internal linking with descriptive anchor text between related pages.",
2868
+ content_freshness: "Add dateModified schema and visible last-updated dates.",
2869
+ author_schema_depth: "Add Person schema for authors with credentials and sameAs links.",
2870
+ schema_markup: "Implement JSON-LD structured data on key pages.",
2871
+ semantic_html: "Use semantic HTML5 elements (main, article, nav, header, footer).",
2872
+ clean_html: "Fix HTML structure, add meta tags, and ensure HTTPS.",
2873
+ visible_date_signal: "Display dates using <time> elements and add datePublished to JSON-LD.",
2874
+ extraction_friction: "Shorten sentences, use active voice, and avoid jargon in lead paragraphs.",
2875
+ image_context_ai: "Wrap images in <figure>/<figcaption> with descriptive alt text.",
2876
+ schema_coverage: "Extend structured data to inner pages (articles, services, products).",
2877
+ speakable_schema: "Add SpeakableSpecification schema for voice assistant compatibility.",
2878
+ content_cannibalization: "Consolidate overlapping pages or differentiate titles and H1 headings.",
2879
+ llms_txt: "Create a /llms.txt file describing your site for AI engines.",
2880
+ robots_txt: "Update robots.txt to explicitly allow AI crawlers.",
2881
+ content_velocity: "Establish a regular publishing cadence with dated sitemap entries.",
2882
+ content_licensing: "Create /ai.txt to declare AI usage permissions.",
2883
+ canonical_url: 'Add rel="canonical" tags to all pages.',
2884
+ sitemap_completeness: "Generate a comprehensive sitemap.xml with lastmod dates.",
2885
+ rss_feed: "Add an RSS feed linked from your homepage."
2886
+ };
2887
+ var CRITERION_PILLAR_MAP = {};
2888
+ for (const [pillar, criteria] of Object.entries(PILLARS)) {
2889
+ for (const c of criteria) {
2890
+ CRITERION_PILLAR_MAP[c] = pillar;
2891
+ }
2892
+ }
2893
+ function getCriterionPillar(criterionId) {
2894
+ return CRITERION_PILLAR_MAP[criterionId] || "Unknown";
2895
+ }
2896
+ function computePillarScores(criteria) {
2897
+ const criteriaMap = new Map(criteria.map((c) => [c.criterion, c]));
2898
+ function pillarScore(pillarCriteria) {
2899
+ let totalWeight = 0;
2900
+ let weightedSum = 0;
2901
+ for (const id of pillarCriteria) {
2902
+ const c = criteriaMap.get(id);
2903
+ const weight = PILLAR_WEIGHTS[id] ?? 0.02;
2904
+ if (c) {
2905
+ weightedSum += c.score / 10 * weight * 100;
2906
+ totalWeight += weight;
2907
+ }
2908
+ }
2909
+ return totalWeight === 0 ? 0 : Math.round(weightedSum / totalWeight);
2910
+ }
2911
+ return {
2912
+ answerReadiness: pillarScore(PILLARS["Answer Readiness"]),
2913
+ contentStructure: pillarScore(PILLARS["Content Structure"]),
2914
+ trustAuthority: pillarScore(PILLARS["Trust & Authority"]),
2915
+ technicalFoundation: pillarScore(PILLARS["Technical Foundation"]),
2916
+ aiDiscovery: pillarScore(PILLARS["AI Discovery"])
2917
+ };
2918
+ }
2919
+ function computeTopFixes(criteria, count = 3) {
2920
+ const fixes = [];
2921
+ for (const c of criteria) {
2922
+ if (c.score >= 8) continue;
2923
+ const weight = PILLAR_WEIGHTS[c.criterion] ?? 0.02;
2924
+ const impactPoints = Math.round((10 - c.score) * weight * 100);
2925
+ if (impactPoints <= 0) continue;
2926
+ fixes.push({
2927
+ criterion: c.criterion,
2928
+ clientName: CLIENT_NAMES[c.criterion] || c.criterion_label,
2929
+ description: FIX_DESCRIPTIONS[c.criterion] || `Improve ${c.criterion_label}`,
2930
+ impactPoints,
2931
+ effort: CRITERION_EFFORT[c.criterion] || "Medium"
2932
+ });
2933
+ }
2934
+ fixes.sort((a, b) => b.impactPoints - a.impactPoints);
2935
+ return fixes.slice(0, count);
2936
+ }
2937
+
2164
2938
  // src/scorecard-builder.ts
2165
2939
  var CRITERION_LABELS = {
2166
2940
  "llms.txt File": "llms.txt File",
@@ -2190,7 +2964,13 @@ var CRITERION_LABELS = {
2190
2964
  "Content Cannibalization": "Content Cannibalization",
2191
2965
  "Visible Date Signal": "Visible Date Signal",
2192
2966
  "Topic Coherence": "Topic Coherence",
2193
- "Content Depth": "Content Depth"
2967
+ "Content Depth": "Content Depth",
2968
+ "Citation-Ready Writing Quality": "Citation-Ready Writing Quality",
2969
+ "Answer-First Placement": "Answer-First Placement",
2970
+ "Evidence Packaging": "Evidence Packaging",
2971
+ "Entity Disambiguation": "Entity Disambiguation",
2972
+ "Extraction Friction Score": "Extraction Friction Score",
2973
+ "Image Context for AI": "Image Context for AI"
2194
2974
  };
2195
2975
  function scoreToStatus(score) {
2196
2976
  if (score === 0) return "MISSING";
@@ -2239,7 +3019,10 @@ function buildScorecard(results) {
2239
3019
  criterion: label,
2240
3020
  score: r.score,
2241
3021
  status: scoreToStatus(r.score),
2242
- keyFindings
3022
+ keyFindings,
3023
+ pillar: getCriterionPillar(r.criterion),
3024
+ clientName: CLIENT_NAMES[r.criterion] || label,
3025
+ weight: WEIGHTS[r.criterion]
2243
3026
  };
2244
3027
  });
2245
3028
  }
@@ -2306,7 +3089,14 @@ var CRITERION_WEIGHTS = {
2306
3089
  canonical_url: 0.01,
2307
3090
  rss_feed: 0.01,
2308
3091
  schema_coverage: 0.01,
2309
- speakable_schema: 0.01
3092
+ speakable_schema: 0.01,
3093
+ // V2 Criteria (~15%)
3094
+ citation_ready_writing: 0.04,
3095
+ answer_first_placement: 0.03,
3096
+ evidence_packaging: 0.03,
3097
+ entity_disambiguation: 0.02,
3098
+ extraction_friction: 0.02,
3099
+ image_context_ai: 0.01
2310
3100
  };
2311
3101
  var OPPORTUNITY_TEMPLATES = {
2312
3102
  llms_txt: {
@@ -2448,6 +3238,36 @@ var OPPORTUNITY_TEMPLATES = {
2448
3238
  name: "Increase Content Depth",
2449
3239
  effort: "Medium",
2450
3240
  description: "Expand articles to 1000+ words with structured H2/H3 sections, comparison tables, and expert analysis. Thin content (under 300 words) is rarely cited by AI engines. Deep, well-structured articles demonstrate expertise."
3241
+ },
3242
+ citation_ready_writing: {
3243
+ name: "Improve Citation-Ready Writing",
3244
+ effort: "Medium",
3245
+ description: 'Write self-contained definition sentences and single-claim statements that AI engines can quote directly. Avoid pronouns like "this" or "that" at the start of answer paragraphs.'
3246
+ },
3247
+ answer_first_placement: {
3248
+ name: "Add Answer-First Placement",
3249
+ effort: "Medium",
3250
+ description: 'Place a concise 40-80 word answer block in the first 300 words of each page. Avoid throat-clearing openers like "In this article..." and lead with the answer.'
3251
+ },
3252
+ evidence_packaging: {
3253
+ name: "Package Evidence for AI",
3254
+ effort: "Medium",
3255
+ description: 'Add inline citations to external sources, "According to [Source]..." attribution phrases, and a Sources section at the end of key articles.'
3256
+ },
3257
+ entity_disambiguation: {
3258
+ name: "Add Entity Disambiguation",
3259
+ effort: "Medium",
3260
+ description: 'Define the primary entity in the first 500 characters, use consistent terminology (same term 70%+), and add "unlike X" signals to help AI engines distinguish your topics.'
3261
+ },
3262
+ extraction_friction: {
3263
+ name: "Reduce Extraction Friction",
3264
+ effort: "Medium",
3265
+ description: "Shorten sentences to under 25 words, use active voice in lead paragraphs, define jargon inline, and avoid hiding content behind accordions or display:none."
3266
+ },
3267
+ image_context_ai: {
3268
+ name: "Add Image Context for AI",
3269
+ effort: "Low",
3270
+ description: 'Wrap images in <figure> elements with descriptive <figcaption> tags. Write alt text with 5+ words that describes the image content, not just "image" or "photo".'
2451
3271
  }
2452
3272
  };
2453
3273
  function calculateImpact(score, weight, effort) {
@@ -2569,8 +3389,8 @@ function generatePitchNumbers(score, rawData, scorecard) {
2569
3389
  const passing = scorecard.filter((s) => s.score >= 7).length;
2570
3390
  metrics.push({
2571
3391
  metric: "Criteria Passing",
2572
- value: `${passing}/28`,
2573
- significance: passing >= 18 ? "Excellent coverage across AEO dimensions" : passing >= 12 ? "Good foundation with room to improve remaining criteria" : `${26 - passing} criteria need attention for full AI visibility`
3392
+ value: `${passing}/${scorecard.length}`,
3393
+ significance: passing >= 18 ? "Excellent coverage across AEO dimensions" : passing >= 12 ? "Good foundation with room to improve remaining criteria" : `${scorecard.length - passing} criteria need attention for full AI visibility`
2574
3394
  });
2575
3395
  return metrics;
2576
3396
  }
@@ -2819,7 +3639,14 @@ var PAGE_CRITERIA = {
2819
3639
  semantic_html: { weight: 0.02, label: "Semantic HTML5 & Accessibility" },
2820
3640
  clean_html: { weight: 0.02, label: "Clean, Crawlable HTML" },
2821
3641
  // Technical Plumbing
2822
- canonical_url: { weight: 0.01, label: "Canonical URL Strategy" }
3642
+ canonical_url: { weight: 0.01, label: "Canonical URL Strategy" },
3643
+ // V2 Criteria
3644
+ citation_ready_writing: { weight: 0.04, label: "Citation-Ready Writing Quality" },
3645
+ answer_first_placement: { weight: 0.03, label: "Answer-First Placement" },
3646
+ evidence_packaging: { weight: 0.03, label: "Evidence Packaging" },
3647
+ entity_disambiguation: { weight: 0.02, label: "Entity Disambiguation" },
3648
+ extraction_friction: { weight: 0.02, label: "Extraction Friction Score" },
3649
+ image_context_ai: { weight: 0.01, label: "Image Context for AI" }
2823
3650
  };
2824
3651
  function extractJsonLdBlocks(html) {
2825
3652
  const blocks = [];
@@ -3106,6 +3933,168 @@ function scoreVisibleDateSignal(html) {
3106
3933
  }
3107
3934
  return cap(score, 10);
3108
3935
  }
3936
+ function scoreCitationReadyWriting(html) {
3937
+ let score = 0;
3938
+ const text = getTextContent(html);
3939
+ const defSentences = text.match(/\b\w+\s+(is\s+(?:a|an)\s|refers\s+to|defined\s+as)\b/gi) || [];
3940
+ if (defSentences.length >= 3) score += 3;
3941
+ else if (defSentences.length >= 1) score += 1;
3942
+ const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 10);
3943
+ const oneClaimSentences = sentences.filter((s) => {
3944
+ const words = s.trim().split(/\s+/);
3945
+ return words.length < 30 && !/,\s*(and|but|or|which|that|because|although|while)\s/i.test(s);
3946
+ });
3947
+ if (oneClaimSentences.length >= 5) score += 3;
3948
+ else if (oneClaimSentences.length >= 2) score += 1;
3949
+ const qHeadings = extractQuestionHeadings2(html);
3950
+ if (qHeadings.length > 0) {
3951
+ let selfContained = 0;
3952
+ for (const qh of qHeadings) {
3953
+ const escaped = qh.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
3954
+ const pat = new RegExp(escaped + "[\\s\\S]{0,200}?<\\/h[23]>\\s*<p[^>]*>([^<]{0,200})", "i");
3955
+ const m = pat.exec(html);
3956
+ if (m && m[1] && !/^\s*(this|that|these|those|it)\s/i.test(m[1].trim())) {
3957
+ selfContained++;
3958
+ }
3959
+ }
3960
+ if (selfContained / qHeadings.length >= 0.8) score += 2;
3961
+ }
3962
+ const quotableLines = sentences.filter(
3963
+ (s) => /\d+(\.\d+)?%|\$[\d,.]+|\b\d{1,3}(,\d{3})+\b/.test(s) && /\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b/.test(s)
3964
+ );
3965
+ if (quotableLines.length >= 2) score += 2;
3966
+ else if (quotableLines.length >= 1) score += 1;
3967
+ return cap(score, 10);
3968
+ }
3969
+ function scoreAnswerFirstPlacement(html) {
3970
+ let score = 0;
3971
+ const bodyMatch = html.match(/<body[^>]*>([\s\S]*)/i);
3972
+ const bodyHtml = bodyMatch ? bodyMatch[1] : html;
3973
+ const bodyText = bodyHtml.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
3974
+ const earlyParagraphs = bodyHtml.match(/<p[^>]*>([\s\S]*?)<\/p>/gi)?.slice(0, 5) || [];
3975
+ const first300Words = bodyText.split(/\s+/).slice(0, 300).join(" ");
3976
+ for (const p of earlyParagraphs) {
3977
+ const pText = p.replace(/<[^>]*>/g, "").trim();
3978
+ const wordCount = pText.split(/\s+/).length;
3979
+ if (wordCount >= 40 && wordCount <= 80 && first300Words.includes(pText.slice(0, 50))) {
3980
+ score += 4;
3981
+ break;
3982
+ }
3983
+ }
3984
+ const firstPara = bodyHtml.match(/<p[^>]*>([\s\S]*?)<\/p>/i);
3985
+ if (firstPara) {
3986
+ const firstParaText = firstPara[1].replace(/<[^>]*>/g, "").trim();
3987
+ if (!/^(In today|Welcome to|Have you ever|If you're looking|In this (article|post|guide))/i.test(firstParaText)) {
3988
+ score += 3;
3989
+ }
3990
+ }
3991
+ const h1Match = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
3992
+ if (h1Match) {
3993
+ const h1Text = h1Match[1].replace(/<[^>]*>/g, "").trim();
3994
+ const h1Words = h1Text.split(/\s+/).filter((w) => w.length > 3);
3995
+ const firstSentence = bodyText.split(/[.!?]/)[0] || "";
3996
+ if (h1Words.some((w) => firstSentence.toLowerCase().includes(w.toLowerCase()))) {
3997
+ score += 3;
3998
+ }
3999
+ }
4000
+ return cap(score, 10);
4001
+ }
4002
+ function scoreEvidencePackaging(html) {
4003
+ let score = 0;
4004
+ const text = getTextContent(html);
4005
+ const paragraphs = html.match(/<p[^>]*>[\s\S]*?<\/p>/gi) || [];
4006
+ let inlineCitations = 0;
4007
+ for (const p of paragraphs) {
4008
+ const links = p.match(/<a[^>]*href=["'](https?:\/\/[^"']+)["'][^>]*>/gi) || [];
4009
+ inlineCitations += links.length;
4010
+ }
4011
+ if (inlineCitations >= 3) score += 3;
4012
+ else if (inlineCitations >= 1) score += 1;
4013
+ if (/<h[2-4][^>]*>[^<]*(source|reference|citation|bibliography)[^<]*<\/h[2-4]>/i.test(html)) {
4014
+ score += 2;
4015
+ }
4016
+ const attributionPhrases = text.match(/\baccording\s+to\s+[A-Z]|\b[A-Z][a-z]+\s+(found|says|reports|notes|states)\b/gi) || [];
4017
+ if (attributionPhrases.length >= 3) score += 3;
4018
+ else if (attributionPhrases.length >= 1) score += 1;
4019
+ const sourcedStats = text.match(/\d+(\.\d+)?(%|\s*(million|billion|thousand|percent))\b[^.]*\b[A-Z][a-z]+\b/gi) || [];
4020
+ if (sourcedStats.length >= 2) score += 2;
4021
+ else if (sourcedStats.length >= 1) score += 1;
4022
+ return cap(score, 10);
4023
+ }
4024
+ function scoreEntityDisambiguation(html) {
4025
+ let score = 0;
4026
+ const text = getTextContent(html);
4027
+ const h1Match = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
4028
+ if (!h1Match) return 3;
4029
+ const h1Text = h1Match[1].replace(/<[^>]*>/g, "").trim();
4030
+ const h1Words = h1Text.split(/\s+/).filter((w) => w.length > 3);
4031
+ const primaryNoun = h1Words.sort((a, b) => b.length - a.length)[0] || "";
4032
+ if (!primaryNoun) return 3;
4033
+ const first500 = text.slice(0, 500);
4034
+ if (new RegExp(`\\b${primaryNoun.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b[^.]*\\b(is|refers|defined|means)\\b`, "i").test(first500)) {
4035
+ score += 4;
4036
+ }
4037
+ const primaryRegex = new RegExp(`\\b${primaryNoun.toLowerCase().replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`, "gi");
4038
+ const mentions = text.match(primaryRegex) || [];
4039
+ if (mentions.length >= 3) score += 3;
4040
+ else if (mentions.length >= 1) score += 1;
4041
+ if (/\bunlike\s+\w/i.test(text) || /\bcompared\s+to\s+\w/i.test(text) || /\bnot\s+to\s+be\s+confused\s+with\b/i.test(text)) {
4042
+ score += 3;
4043
+ }
4044
+ return cap(score, 10);
4045
+ }
4046
+ function scoreExtractionFriction(html) {
4047
+ let score = 0;
4048
+ const text = getTextContent(html);
4049
+ const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 5);
4050
+ const avgLen = sentences.length > 0 ? sentences.map((s) => s.trim().split(/\s+/).length).reduce((a, b) => a + b, 0) / sentences.length : 0;
4051
+ if (avgLen > 0 && avgLen < 20) score += 3;
4052
+ else if (avgLen >= 20 && avgLen <= 25) score += 2;
4053
+ else if (avgLen > 25 && avgLen <= 35) score += 1;
4054
+ const h1Match = html.match(/<h1[^>]*>[\s\S]*?<\/h1>/i);
4055
+ if (h1Match) {
4056
+ const afterH1 = html.slice(html.indexOf(h1Match[0]) + h1Match[0].length);
4057
+ const leadParas = afterH1.match(/<p[^>]*>([\s\S]*?)<\/p>/gi)?.slice(0, 3) || [];
4058
+ const leadText = leadParas.map((p) => p.replace(/<[^>]*>/g, "")).join(" ").trim();
4059
+ const words = leadText.split(/\s+/).length;
4060
+ if (words <= 75 && words > 10 && !/\([^)]+\)/.test(leadText)) {
4061
+ score += 3;
4062
+ }
4063
+ }
4064
+ const first500Words = text.split(/\s+/).slice(0, 500).join(" ");
4065
+ const longWords = first500Words.match(/\b[a-z]{10,}\b/gi) || [];
4066
+ if (longWords.length <= 2) score += 2;
4067
+ else if (longWords.length <= 5) score += 1;
4068
+ if (/display\s*:\s*none|visibility\s*:\s*hidden/i.test(html) && /<(div|section|p)[^>]*(?:style=["'][^"']*display\s*:\s*none|hidden)[^>]*>/i.test(html)) {
4069
+ score = Math.max(0, score - 2);
4070
+ }
4071
+ return cap(score, 10);
4072
+ }
4073
+ function scoreImageContextAI(html) {
4074
+ let score = 0;
4075
+ const allImages = html.match(/<img\s[^>]*>/gi) || [];
4076
+ if (allImages.length === 0) return 5;
4077
+ const figureBlocks = html.match(/<figure[\s\S]*?<\/figure>/gi) || [];
4078
+ const figuresWithCaption = figureBlocks.filter((f) => /<figcaption/i.test(f));
4079
+ const figureRatio = figuresWithCaption.length / allImages.length;
4080
+ if (figureRatio >= 0.5) score += 4;
4081
+ else if (figureRatio >= 0.25) score += 2;
4082
+ let goodAltCount = 0;
4083
+ for (const img of allImages) {
4084
+ const altMatch = img.match(/\salt=["']([^"']+)["']/i);
4085
+ if (altMatch) {
4086
+ const altText = altMatch[1].trim();
4087
+ if (altText.split(/\s+/).length > 5 && !/^(image|photo|picture|img|icon|logo|banner|screenshot)$/i.test(altText)) {
4088
+ goodAltCount++;
4089
+ }
4090
+ }
4091
+ }
4092
+ if (goodAltCount / allImages.length >= 0.5) score += 3;
4093
+ else if (goodAltCount > 0) score += 1;
4094
+ const contextualImages = html.match(/<(article|section)[^>]*>[\s\S]*?<img[^>]*>[\s\S]*?<\/\1>/gi) || [];
4095
+ if (contextualImages.length > 0) score += 3;
4096
+ return cap(score, 10);
4097
+ }
3109
4098
  var SCORING_FUNCTIONS = {
3110
4099
  schema_markup: scoreSchemaMarkup,
3111
4100
  qa_content_format: scoreQAFormat,
@@ -3120,7 +4109,13 @@ var SCORING_FUNCTIONS = {
3120
4109
  fact_density: scoreFactDensity,
3121
4110
  definition_patterns: scoreDefinitionPatterns,
3122
4111
  canonical_url: scoreCanonicalUrl,
3123
- visible_date_signal: scoreVisibleDateSignal
4112
+ visible_date_signal: scoreVisibleDateSignal,
4113
+ citation_ready_writing: scoreCitationReadyWriting,
4114
+ answer_first_placement: scoreAnswerFirstPlacement,
4115
+ evidence_packaging: scoreEvidencePackaging,
4116
+ entity_disambiguation: scoreEntityDisambiguation,
4117
+ extraction_friction: scoreExtractionFriction,
4118
+ image_context_ai: scoreImageContextAI
3124
4119
  };
3125
4120
  function scorePage(html, url) {
3126
4121
  let totalWeight = 0;
@@ -3133,8 +4128,10 @@ function scorePage(html, url) {
3133
4128
  weightedSum += score / 10 * weight * 100;
3134
4129
  totalWeight += weight;
3135
4130
  }
3136
- const aeoScore = totalWeight === 0 ? 0 : Math.round(weightedSum / totalWeight);
3137
- return { aeoScore, criterionScores };
4131
+ let aeoScore = totalWeight === 0 ? 0 : Math.round(weightedSum / totalWeight);
4132
+ const scoreCapped = aeoScore > 75;
4133
+ if (scoreCapped) aeoScore = 75;
4134
+ return { aeoScore, criterionScores, scoreCapped };
3138
4135
  }
3139
4136
  function scoreAllPages(siteData) {
3140
4137
  const results = [];
@@ -3191,7 +4188,7 @@ function checkNoH1(html) {
3191
4188
  function checkMultipleH1(html) {
3192
4189
  const h1Matches = html.match(/<h1[\s>]/gi);
3193
4190
  if (h1Matches && h1Matches.length > 1) {
3194
- return { check: "multiple-h1", label: `Multiple <h1> tags (${h1Matches.length})`, severity: "warning" };
4191
+ return { check: "multiple-h1", label: `Multiple <h1> tags (${h1Matches.length})`, severity: "error" };
3195
4192
  }
3196
4193
  return null;
3197
4194
  }
@@ -3307,6 +4304,50 @@ function checkHasQuestionHeadings(html) {
3307
4304
  }
3308
4305
  return null;
3309
4306
  }
4307
+ function checkNoAnswerBlock(html) {
4308
+ const bodyMatch = html.match(/<body[^>]*>([\s\S]*)/i);
4309
+ const bodyHtml = bodyMatch ? bodyMatch[1] : html;
4310
+ const earlyParagraphs = bodyHtml.match(/<p[^>]*>([\s\S]*?)<\/p>/gi)?.slice(0, 5) || [];
4311
+ const bodyText = bodyHtml.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
4312
+ const first300Words = bodyText.split(/\s+/).slice(0, 300).join(" ");
4313
+ for (const p of earlyParagraphs) {
4314
+ const pText = p.replace(/<[^>]*>/g, "").trim();
4315
+ const wordCount = pText.split(/\s+/).length;
4316
+ if (wordCount >= 40 && wordCount <= 80 && first300Words.includes(pText.slice(0, 50))) {
4317
+ return null;
4318
+ }
4319
+ }
4320
+ return { check: "no-answer-block", label: "No short answer block (40-80 words) in first 300 words", severity: "warning" };
4321
+ }
4322
+ function checkNoEvidence(html, url) {
4323
+ const text = getTextContent2(html);
4324
+ const paragraphs = html.match(/<p[^>]*>[\s\S]*?<\/p>/gi) || [];
4325
+ let inlineCitations = 0;
4326
+ for (const p of paragraphs) {
4327
+ const links = p.match(/<a[^>]*href=["'](https?:\/\/[^"']+)["'][^>]*>/gi) || [];
4328
+ inlineCitations += links.length;
4329
+ }
4330
+ const attributionPhrases = text.match(/\baccording\s+to\s+[A-Z]|\b[A-Z][a-z]+\s+(found|says|reports|notes|states)\b/gi) || [];
4331
+ if (inlineCitations === 0 && attributionPhrases.length === 0) {
4332
+ return { check: "no-evidence", label: "No inline citations or attribution phrases", severity: "warning" };
4333
+ }
4334
+ return null;
4335
+ }
4336
+ function checkHasCitationReadyContent(html) {
4337
+ const text = getTextContent2(html);
4338
+ const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 10);
4339
+ let quotable = 0;
4340
+ for (const s of sentences) {
4341
+ const hasData = /\d+(\.\d+)?%|\$[\d,.]+|\b\d{1,3}(,\d{3})+\b/.test(s);
4342
+ const hasSource = /\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b/.test(s);
4343
+ const isShort = s.trim().split(/\s+/).length < 30;
4344
+ if (hasData && hasSource && isShort) quotable++;
4345
+ }
4346
+ if (quotable >= 3) {
4347
+ return { check: "has-citation-ready-content", label: `${quotable} citation-ready sentences with data and sources`, severity: "info" };
4348
+ }
4349
+ return null;
4350
+ }
3310
4351
  function analyzePage(html, url, category) {
3311
4352
  const title = extractTitle(html);
3312
4353
  const textContent = getTextContent2(html);
@@ -3323,14 +4364,17 @@ function analyzePage(html, url, category) {
3323
4364
  checkMissingOgTags(html),
3324
4365
  checkThinContent(wordCount),
3325
4366
  checkImagesMissingAlt(html),
3326
- checkNoInternalLinks(html, url)
4367
+ checkNoInternalLinks(html, url),
4368
+ checkNoAnswerBlock(html),
4369
+ checkNoEvidence(html, url)
3327
4370
  ];
3328
4371
  for (const result of issueChecks) {
3329
4372
  if (result) issues.push(result);
3330
4373
  }
3331
4374
  const strengthChecks = [
3332
4375
  checkHasStructuredData(html),
3333
- checkHasQuestionHeadings(html)
4376
+ checkHasQuestionHeadings(html),
4377
+ checkHasCitationReadyContent(html)
3334
4378
  ];
3335
4379
  for (const result of strengthChecks) {
3336
4380
  if (result) strengths.push(result);
@@ -3619,7 +4663,14 @@ var CRITERION_WEIGHTS2 = {
3619
4663
  canonical_url: 0.01,
3620
4664
  rss_feed: 0.01,
3621
4665
  schema_coverage: 0.01,
3622
- speakable_schema: 0.01
4666
+ speakable_schema: 0.01,
4667
+ // V2 Criteria (~15%)
4668
+ citation_ready_writing: 0.04,
4669
+ answer_first_placement: 0.03,
4670
+ evidence_packaging: 0.03,
4671
+ entity_disambiguation: 0.02,
4672
+ extraction_friction: 0.02,
4673
+ image_context_ai: 0.01
3623
4674
  };
3624
4675
  var PHASE_CONFIG = [
3625
4676
  {
@@ -3644,7 +4695,11 @@ var PHASE_CONFIG = [
3644
4695
  "query_answer_alignment",
3645
4696
  "visible_date_signal",
3646
4697
  "topic_coherence",
3647
- "content_depth"
4698
+ "content_depth",
4699
+ "citation_ready_writing",
4700
+ "answer_first_placement",
4701
+ "evidence_packaging",
4702
+ "entity_disambiguation"
3648
4703
  ]
3649
4704
  },
3650
4705
  {
@@ -3658,7 +4713,9 @@ var PHASE_CONFIG = [
3658
4713
  "author_schema_depth",
3659
4714
  "content_licensing",
3660
4715
  "entity_consistency",
3661
- "semantic_html"
4716
+ "semantic_html",
4717
+ "image_context_ai",
4718
+ "extraction_friction"
3662
4719
  ]
3663
4720
  },
3664
4721
  {
@@ -4597,6 +5654,160 @@ Summarization: yes`,
4597
5654
  affectedPages: affected,
4598
5655
  pageCount: affected?.length
4599
5656
  }];
5657
+ },
5658
+ citation_ready_writing: (c, pages) => {
5659
+ if (c.score >= 10) return [];
5660
+ const impact = impactFromScore(c.score);
5661
+ const effort = effortForCriterion("citation_ready_writing", c.score);
5662
+ const affected = getAffectedPages("citation_ready_writing", pages);
5663
+ return [{
5664
+ id: `fix-${c.criterion}`,
5665
+ criterion: c.criterion_label,
5666
+ criterionId: c.criterion,
5667
+ title: "Improve citation-ready writing quality",
5668
+ description: "Write self-contained definition sentences and single-claim statements that AI engines can quote directly.",
5669
+ impact,
5670
+ effort,
5671
+ impactScore: 0,
5672
+ category: "content",
5673
+ steps: [
5674
+ 'Write "X is a..." definition sentences for key concepts',
5675
+ "Keep sentences under 30 words with a single claim each",
5676
+ 'Start answer paragraphs with the entity name, not "This" or "That"',
5677
+ "Pair statistics with named sources for quotable lines"
5678
+ ],
5679
+ successCriteria: "Average 3+ definition sentences and 5+ single-claim sentences per page",
5680
+ affectedPages: affected,
5681
+ pageCount: affected?.length
5682
+ }];
5683
+ },
5684
+ answer_first_placement: (c, pages) => {
5685
+ if (c.score >= 10) return [];
5686
+ const impact = impactFromScore(c.score);
5687
+ const effort = effortForCriterion("answer_first_placement", c.score);
5688
+ const affected = getAffectedPages("answer_first_placement", pages);
5689
+ return [{
5690
+ id: `fix-${c.criterion}`,
5691
+ criterion: c.criterion_label,
5692
+ criterionId: c.criterion,
5693
+ title: "Add answer-first content placement",
5694
+ description: "Place a concise answer block in the first 300 words of each page.",
5695
+ impact,
5696
+ effort,
5697
+ impactScore: 0,
5698
+ category: "content",
5699
+ steps: [
5700
+ "Add a 40-80 word answer paragraph within the first 300 words",
5701
+ 'Replace "In this article..." openers with direct answers',
5702
+ "Include the H1 topic entity in the first body sentence"
5703
+ ],
5704
+ successCriteria: "70%+ of pages have an answer block in the first 300 words",
5705
+ affectedPages: affected,
5706
+ pageCount: affected?.length
5707
+ }];
5708
+ },
5709
+ evidence_packaging: (c, pages) => {
5710
+ if (c.score >= 10) return [];
5711
+ const impact = impactFromScore(c.score);
5712
+ const effort = effortForCriterion("evidence_packaging", c.score);
5713
+ const affected = getAffectedPages("evidence_packaging", pages);
5714
+ return [{
5715
+ id: `fix-${c.criterion}`,
5716
+ criterion: c.criterion_label,
5717
+ criterionId: c.criterion,
5718
+ title: "Package evidence for AI engines",
5719
+ description: "Add inline citations, attribution phrases, and sources sections.",
5720
+ impact,
5721
+ effort,
5722
+ impactScore: 0,
5723
+ category: "content",
5724
+ steps: [
5725
+ "Add inline links to authoritative external sources within paragraphs",
5726
+ 'Use "According to [Source]..." attribution phrases',
5727
+ 'Add a "Sources" or "References" heading at the end of articles',
5728
+ "Pair statistics with named source organizations"
5729
+ ],
5730
+ successCriteria: "Average 3+ inline citations and 3+ attribution phrases per page",
5731
+ affectedPages: affected,
5732
+ pageCount: affected?.length
5733
+ }];
5734
+ },
5735
+ entity_disambiguation: (c, pages) => {
5736
+ if (c.score >= 10) return [];
5737
+ const impact = impactFromScore(c.score);
5738
+ const effort = effortForCriterion("entity_disambiguation", c.score);
5739
+ const affected = getAffectedPages("entity_disambiguation", pages);
5740
+ return [{
5741
+ id: `fix-${c.criterion}`,
5742
+ criterion: c.criterion_label,
5743
+ criterionId: c.criterion,
5744
+ title: "Clarify entity boundaries",
5745
+ description: "Define the primary entity early and use consistent terminology.",
5746
+ impact,
5747
+ effort,
5748
+ impactScore: 0,
5749
+ category: "content",
5750
+ steps: [
5751
+ "Define the primary entity within the first 500 characters",
5752
+ "Use the same primary term consistently (avoid synonym soup)",
5753
+ 'Add "unlike X" or "compared to Y" disambiguation signals'
5754
+ ],
5755
+ successCriteria: "70%+ of pages define the primary entity early with consistent terminology",
5756
+ affectedPages: affected,
5757
+ pageCount: affected?.length
5758
+ }];
5759
+ },
5760
+ extraction_friction: (c, pages) => {
5761
+ if (c.score >= 10) return [];
5762
+ const impact = impactFromScore(c.score);
5763
+ const effort = effortForCriterion("extraction_friction", c.score);
5764
+ const affected = getAffectedPages("extraction_friction", pages);
5765
+ return [{
5766
+ id: `fix-${c.criterion}`,
5767
+ criterion: c.criterion_label,
5768
+ criterionId: c.criterion,
5769
+ title: "Reduce extraction friction",
5770
+ description: "Make content easier for AI engines to extract by reducing sentence length and jargon.",
5771
+ impact,
5772
+ effort,
5773
+ impactScore: 0,
5774
+ category: "structure",
5775
+ steps: [
5776
+ "Shorten sentences to under 25 words on average",
5777
+ "Use active voice in lead paragraphs (first 2-3 sentences)",
5778
+ "Define jargon inline when first used",
5779
+ "Remove display:none hidden content and accordion-only content"
5780
+ ],
5781
+ successCriteria: "Average sentence length under 25 words with no hidden content",
5782
+ affectedPages: affected,
5783
+ pageCount: affected?.length
5784
+ }];
5785
+ },
5786
+ image_context_ai: (c, pages) => {
5787
+ if (c.score >= 10) return [];
5788
+ const impact = impactFromScore(c.score);
5789
+ const effort = effortForCriterion("image_context_ai", c.score);
5790
+ const affected = getAffectedPages("image_context_ai", pages);
5791
+ return [{
5792
+ id: `fix-${c.criterion}`,
5793
+ criterion: c.criterion_label,
5794
+ criterionId: c.criterion,
5795
+ title: "Add image context for AI engines",
5796
+ description: "Wrap images in <figure>/<figcaption> with descriptive alt text.",
5797
+ impact,
5798
+ effort,
5799
+ impactScore: 0,
5800
+ category: "trust",
5801
+ steps: [
5802
+ "Wrap images in <figure> elements with descriptive <figcaption>",
5803
+ 'Write alt text with 5+ descriptive words (avoid "image" or "photo")',
5804
+ "Place images within <article> or <section> elements"
5805
+ ],
5806
+ codeExample: '<figure>\n <img src="chart.png" alt="Bar chart showing 45% reduction in fall risk for walker users">\n <figcaption>Walker users experienced a 45% reduction in fall risk (2025 study)</figcaption>\n</figure>',
5807
+ successCriteria: "50%+ of images use <figure>/<figcaption> with descriptive alt text",
5808
+ affectedPages: affected,
5809
+ pageCount: affected?.length
5810
+ }];
4600
5811
  }
4601
5812
  };
4602
5813
  function generateFixPlan(domain, overallScore, criteria, pagesReviewed, linkGraph) {
@@ -4838,7 +6049,7 @@ async function audit(domain, options) {
4838
6049
  }
4839
6050
  }
4840
6051
  if (options?.fullCrawl) {
4841
- const { crawlFullSite: crawlFullSite2 } = await import("./full-site-crawler-UIOMKOZA.js");
6052
+ const { crawlFullSite: crawlFullSite2 } = await import("./full-site-crawler-BCJS67WQ.js");
4842
6053
  const crawlResult = await crawlFullSite2(siteData, {
4843
6054
  maxPages: options.maxPages ?? 200,
4844
6055
  concurrency: options.concurrency ?? 5
@@ -4857,6 +6068,10 @@ async function audit(domain, options) {
4857
6068
  const overallScore = calculateOverallScore(results);
4858
6069
  const rawData = extractRawDataSummary(siteData);
4859
6070
  if (renderedWithHeadless) rawData.rendered_with_headless = true;
6071
+ const pillarScores = computePillarScores(results);
6072
+ const topFixesRaw = computeTopFixes(results, 3);
6073
+ const coherenceResult = results.find((r) => r.criterion === "topic_coherence");
6074
+ const coherenceGated = !!(coherenceResult && coherenceResult.score < 6);
4860
6075
  const scorecard = buildScorecard(results);
4861
6076
  const detailedFindings = buildDetailedFindings(results);
4862
6077
  const verdict = generateVerdict(overallScore, scorecard, rawData, domain);
@@ -4878,6 +6093,10 @@ async function audit(domain, options) {
4878
6093
  pitchNumbers,
4879
6094
  bottomLine,
4880
6095
  pagesReviewed,
6096
+ pillarScores,
6097
+ coherenceGated,
6098
+ criteriaScored: results.length,
6099
+ topFixes: topFixesRaw.map((f) => ({ fix: f.clientName, impact: `+${f.impactPoints} pts`, effort: f.effort })),
4881
6100
  elapsed,
4882
6101
  ...renderedWithHeadless && { renderedWithHeadless: true }
4883
6102
  };
@@ -4924,7 +6143,10 @@ async function compare(domainA, domainB, options) {
4924
6143
  };
4925
6144
  }
4926
6145
  export {
6146
+ CLIENT_NAMES,
4927
6147
  CRITERION_LABELS,
6148
+ PILLARS,
6149
+ WEIGHTS,
4928
6150
  analyzeAllPages,
4929
6151
  analyzePage,
4930
6152
  auditSite,
@@ -4934,6 +6156,8 @@ export {
4934
6156
  buildScorecard,
4935
6157
  calculateOverallScore,
4936
6158
  compare,
6159
+ computePillarScores,
6160
+ computeTopFixes,
4937
6161
  countRecentSitemapDates,
4938
6162
  crawlFullSite,
4939
6163
  detectParkedDomain,
@@ -4949,9 +6173,16 @@ export {
4949
6173
  generateOpportunities,
4950
6174
  generatePitchNumbers,
4951
6175
  generateVerdict,
6176
+ getCriterionPillar,
4952
6177
  inferCategory,
4953
6178
  prefetchSiteData,
4954
6179
  scoreAllPages,
6180
+ scoreAnswerFirstPlacement,
6181
+ scoreCitationReadyWriting,
6182
+ scoreEntityDisambiguation,
6183
+ scoreEvidencePackaging,
6184
+ scoreExtractionFriction,
6185
+ scoreImageContextAI,
4955
6186
  scorePage,
4956
6187
  scoreToStatus,
4957
6188
  serializeLinkGraph