aeo-ready 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +9 -62
- package/package.json +2 -2
- package/src/dashboard/generate.js +22 -48
- package/src/dashboard/sections/benchmark-details.js +79 -0
- package/src/dashboard/sections/history-table.js +10 -7
- package/src/dashboard/sections/overall-score.js +55 -118
- package/src/dashboard/sections/trend-chart.js +31 -46
- package/src/history/index.js +8 -15
- package/src/scan.js +59 -293
- package/src/checks/agent-readiness/actionable.js +0 -165
- package/src/checks/agent-readiness/capability.js +0 -209
- package/src/checks/agent-readiness/content-structure.js +0 -242
- package/src/checks/agent-readiness/discovery.js +0 -231
- package/src/checks/ai-visibility/authority.js +0 -195
- package/src/checks/ai-visibility/citation-readiness.js +0 -228
- package/src/checks/ai-visibility/freshness.js +0 -182
- package/src/checks/ai-visibility/structured-data.js +0 -180
- package/src/dashboard/sections/agent-readiness.js +0 -71
- package/src/dashboard/sections/ai-visibility.js +0 -67
- package/src/dashboard/sections/recommendations.js +0 -196
- package/src/fix/generators/agents-json.js +0 -73
- package/src/fix/generators/agents-md.js +0 -85
- package/src/fix/generators/llms-txt.js +0 -166
- package/src/fix/generators/robots-txt.js +0 -64
- package/src/fix/index.js +0 -177
- package/src/track/index.js +0 -167
- package/src/utils/detect-type.js +0 -99
- package/src/utils/tokens.js +0 -18
|
@@ -1,231 +0,0 @@
|
|
|
1
|
-
import { estimateTokens } from "../../utils/tokens.js";
|
|
2
|
-
|
|
3
|
-
const AI_CRAWLERS = [
|
|
4
|
-
"GPTBot",
|
|
5
|
-
"OAI-SearchBot",
|
|
6
|
-
"ClaudeBot",
|
|
7
|
-
"Claude-User",
|
|
8
|
-
"Claude-SearchBot",
|
|
9
|
-
"Google-Extended",
|
|
10
|
-
"PerplexityBot",
|
|
11
|
-
"Meta-ExternalAgent",
|
|
12
|
-
"CCBot",
|
|
13
|
-
];
|
|
14
|
-
|
|
15
|
-
export async function runDiscoveryChecks(context) {
|
|
16
|
-
const checks = [
|
|
17
|
-
checkLlmsTxt(context),
|
|
18
|
-
checkRobotsTxt(context),
|
|
19
|
-
checkSitemap(context),
|
|
20
|
-
checkAiMetaTags(context),
|
|
21
|
-
];
|
|
22
|
-
|
|
23
|
-
const score = checks.reduce((sum, c) => sum + (c.passed ? c.points : 0), 0);
|
|
24
|
-
return { score, maxScore: 12, checks };
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
function checkLlmsTxt(context) {
|
|
28
|
-
const content = getContent(context, "llms.txt", "/llms.txt");
|
|
29
|
-
if (!content) {
|
|
30
|
-
return fail(
|
|
31
|
-
"llms.txt",
|
|
32
|
-
4,
|
|
33
|
-
"No llms.txt found. Create one describing what your site does for AI agents.",
|
|
34
|
-
);
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
const tokens = estimateTokens(content);
|
|
38
|
-
if (tokens > 5000) {
|
|
39
|
-
return partial(
|
|
40
|
-
"llms.txt",
|
|
41
|
-
2,
|
|
42
|
-
4,
|
|
43
|
-
`llms.txt exists but is ${tokens} tokens (budget: <5000).`,
|
|
44
|
-
);
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
const hasUsefulContent =
|
|
48
|
-
content.length > 100 && !content.toLowerCase().includes("lorem");
|
|
49
|
-
if (!hasUsefulContent) {
|
|
50
|
-
return partial(
|
|
51
|
-
"llms.txt",
|
|
52
|
-
1,
|
|
53
|
-
4,
|
|
54
|
-
"llms.txt exists but has minimal/placeholder content.",
|
|
55
|
-
);
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
const frontLoaded = checkFrontLoading(content);
|
|
59
|
-
return frontLoaded
|
|
60
|
-
? pass("llms.txt", 4)
|
|
61
|
-
: partial(
|
|
62
|
-
"llms.txt",
|
|
63
|
-
3,
|
|
64
|
-
4,
|
|
65
|
-
"llms.txt exists but doesn't front-load what/why/how in first 500 tokens.",
|
|
66
|
-
);
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
function checkRobotsTxt(context) {
|
|
70
|
-
const content = getContent(context, "robots.txt", "/robots.txt");
|
|
71
|
-
if (!content) {
|
|
72
|
-
return fail(
|
|
73
|
-
"robots.txt AI crawlers",
|
|
74
|
-
3,
|
|
75
|
-
"No robots.txt found. Add one with explicit AI crawler rules.",
|
|
76
|
-
);
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
const allowed = AI_CRAWLERS.filter(
|
|
80
|
-
(bot) => content.includes(bot) && hasAllow(content, bot),
|
|
81
|
-
);
|
|
82
|
-
const blocked = AI_CRAWLERS.filter(
|
|
83
|
-
(bot) => content.includes(bot) && hasDisallow(content, bot),
|
|
84
|
-
);
|
|
85
|
-
|
|
86
|
-
if (allowed.length >= 5) {
|
|
87
|
-
return pass("robots.txt AI crawlers", 3);
|
|
88
|
-
}
|
|
89
|
-
if (blocked.length > 3) {
|
|
90
|
-
return fail(
|
|
91
|
-
"robots.txt AI crawlers",
|
|
92
|
-
3,
|
|
93
|
-
`Blocks ${blocked.length} AI crawlers. Consider allowing for discoverability.`,
|
|
94
|
-
);
|
|
95
|
-
}
|
|
96
|
-
if (allowed.length > 0) {
|
|
97
|
-
return partial(
|
|
98
|
-
"robots.txt AI crawlers",
|
|
99
|
-
1,
|
|
100
|
-
3,
|
|
101
|
-
`Only ${allowed.length}/9 AI crawlers explicitly allowed.`,
|
|
102
|
-
);
|
|
103
|
-
}
|
|
104
|
-
return partial(
|
|
105
|
-
"robots.txt AI crawlers",
|
|
106
|
-
1,
|
|
107
|
-
3,
|
|
108
|
-
"robots.txt exists but doesn't mention AI crawlers. Add explicit Allow rules.",
|
|
109
|
-
);
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
function checkSitemap(context) {
|
|
113
|
-
const content = getContent(context, "sitemap.xml", "/sitemap.xml");
|
|
114
|
-
if (!content) {
|
|
115
|
-
return fail("sitemap.xml", 2, "No sitemap.xml found.");
|
|
116
|
-
}
|
|
117
|
-
const hasUrls = content.includes("<url>") || content.includes("<loc>");
|
|
118
|
-
return hasUrls
|
|
119
|
-
? pass("sitemap.xml", 2)
|
|
120
|
-
: partial(
|
|
121
|
-
"sitemap.xml",
|
|
122
|
-
1,
|
|
123
|
-
2,
|
|
124
|
-
"sitemap.xml exists but has no URL entries.",
|
|
125
|
-
);
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
function checkAiMetaTags(context) {
|
|
129
|
-
const html = context.html || "";
|
|
130
|
-
if (!html) {
|
|
131
|
-
return fail(
|
|
132
|
-
"AI-friendly meta tags",
|
|
133
|
-
3,
|
|
134
|
-
"No HTML available to check meta tags.",
|
|
135
|
-
);
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
let points = 0;
|
|
139
|
-
if (html.includes("og:title") || html.includes("og:description")) points += 1;
|
|
140
|
-
if (
|
|
141
|
-
html.includes('meta name="description"') ||
|
|
142
|
-
html.includes("meta name='description'")
|
|
143
|
-
)
|
|
144
|
-
points += 1;
|
|
145
|
-
if (html.includes("application/ld+json")) points += 1;
|
|
146
|
-
|
|
147
|
-
if (points === 3) return pass("AI-friendly meta tags", 3);
|
|
148
|
-
if (points > 0)
|
|
149
|
-
return partial(
|
|
150
|
-
"AI-friendly meta tags",
|
|
151
|
-
points,
|
|
152
|
-
3,
|
|
153
|
-
"Some meta tags present but incomplete (need og:, description, JSON-LD).",
|
|
154
|
-
);
|
|
155
|
-
return fail(
|
|
156
|
-
"AI-friendly meta tags",
|
|
157
|
-
3,
|
|
158
|
-
"No AI-friendly meta tags (og:, description, JSON-LD) found.",
|
|
159
|
-
);
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
const PAGE_KEY_MAP = {
|
|
163
|
-
"llms.txt": "llmsTxt",
|
|
164
|
-
"robots.txt": "robotsTxt",
|
|
165
|
-
"sitemap.xml": "sitemap",
|
|
166
|
-
"agents.json": "agentJson",
|
|
167
|
-
"openapi.json": "openapi",
|
|
168
|
-
"openapi.yaml": "openapi",
|
|
169
|
-
};
|
|
170
|
-
|
|
171
|
-
function getContent(context, filename) {
|
|
172
|
-
if (context.mode === "url") {
|
|
173
|
-
const key = PAGE_KEY_MAP[filename];
|
|
174
|
-
const page = key ? context.pages[key] : null;
|
|
175
|
-
return page && page.status === 200 ? page.text : null;
|
|
176
|
-
}
|
|
177
|
-
const match = context.files.find(
|
|
178
|
-
(f) =>
|
|
179
|
-
f === filename ||
|
|
180
|
-
f.endsWith(`/${filename}`) ||
|
|
181
|
-
f === `public/${filename}`,
|
|
182
|
-
);
|
|
183
|
-
return match ? context.fileContents[match] : null;
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
function toCamelCase(filename) {
|
|
187
|
-
const name = filename.replace(/\.[^.]+$/, "");
|
|
188
|
-
return name.replace(/[-_.](.)/g, (_, c) => c.toUpperCase());
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
function hasAllow(robotsTxt, bot) {
|
|
192
|
-
const section = extractBotSection(robotsTxt, bot);
|
|
193
|
-
return (
|
|
194
|
-
section &&
|
|
195
|
-
/Allow:\s*\//.test(section) &&
|
|
196
|
-
!/Disallow:\s*\/\s*$/m.test(section)
|
|
197
|
-
);
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
function hasDisallow(robotsTxt, bot) {
|
|
201
|
-
const section = extractBotSection(robotsTxt, bot);
|
|
202
|
-
return section && /Disallow:\s*\/\s*$/m.test(section);
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
function extractBotSection(robotsTxt, bot) {
|
|
206
|
-
const regex = new RegExp(
|
|
207
|
-
`User-agent:\\s*${bot}[\\s\\S]*?(?=User-agent:|$)`,
|
|
208
|
-
"i",
|
|
209
|
-
);
|
|
210
|
-
const match = robotsTxt.match(regex);
|
|
211
|
-
return match ? match[0] : null;
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
function checkFrontLoading(text) {
|
|
215
|
-
const first500Tokens = text.slice(0, 2000);
|
|
216
|
-
const hasWhat = /what|does|is/i.test(first500Tokens);
|
|
217
|
-
const hasHow = /how|get started|install|use/i.test(first500Tokens);
|
|
218
|
-
return hasWhat && hasHow;
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
function pass(name, points) {
|
|
222
|
-
return { name, passed: true, points, maxPoints: points };
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
function partial(name, points, maxPoints, fix) {
|
|
226
|
-
return { name, passed: false, points, maxPoints, fix };
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
function fail(name, maxPoints, fix) {
|
|
230
|
-
return { name, passed: false, points: 0, maxPoints, fix };
|
|
231
|
-
}
|
|
@@ -1,195 +0,0 @@
|
|
|
1
|
-
export async function runAuthorityChecks(context) {
|
|
2
|
-
const checks = [
|
|
3
|
-
checkEeat(context),
|
|
4
|
-
checkEntityOptimization(context),
|
|
5
|
-
checkExternalValidation(context),
|
|
6
|
-
];
|
|
7
|
-
|
|
8
|
-
const score = checks.reduce((sum, c) => sum + (c.passed ? c.points : 0), 0);
|
|
9
|
-
return { score, maxScore: 13, checks };
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
function checkEeat(context) {
|
|
13
|
-
const allContent = getAllContent(context);
|
|
14
|
-
const html = context.html || "";
|
|
15
|
-
|
|
16
|
-
let points = 0;
|
|
17
|
-
const signals = [];
|
|
18
|
-
|
|
19
|
-
const hasAuthorBio =
|
|
20
|
-
/author|written by|by [A-Z]/i.test(allContent) &&
|
|
21
|
-
(/bio|about the author|expertise|experience/i.test(allContent) ||
|
|
22
|
-
allContent.includes("Person"));
|
|
23
|
-
if (hasAuthorBio) {
|
|
24
|
-
points += 1;
|
|
25
|
-
signals.push("author bio");
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
const hasCredentials =
|
|
29
|
-
/credential|certified|years of experience|expert|specialist|phd|mba/i.test(
|
|
30
|
-
allContent,
|
|
31
|
-
);
|
|
32
|
-
if (hasCredentials) {
|
|
33
|
-
points += 1;
|
|
34
|
-
signals.push("credentials");
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
const hasExperience =
|
|
38
|
-
/built|shipped|worked on|founded|created|contributed to/i.test(allContent);
|
|
39
|
-
if (hasExperience) {
|
|
40
|
-
points += 1;
|
|
41
|
-
signals.push("experience signals");
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
const hasPersonSchema =
|
|
45
|
-
allContent.includes('"Person"') || allContent.includes("'Person'");
|
|
46
|
-
if (hasPersonSchema) {
|
|
47
|
-
points += 1;
|
|
48
|
-
signals.push("Person schema");
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
if (points === 4) return pass("E-E-A-T signals", 4);
|
|
52
|
-
if (points > 0) {
|
|
53
|
-
const missing = [];
|
|
54
|
-
if (!hasAuthorBio) missing.push("author bios");
|
|
55
|
-
if (!hasCredentials) missing.push("credentials");
|
|
56
|
-
if (!hasExperience) missing.push("first-hand experience");
|
|
57
|
-
if (!hasPersonSchema) missing.push("Person schema");
|
|
58
|
-
return partial(
|
|
59
|
-
"E-E-A-T signals",
|
|
60
|
-
points,
|
|
61
|
-
4,
|
|
62
|
-
`Has: ${signals.join(", ")}. Missing: ${missing.join(", ")}.`,
|
|
63
|
-
);
|
|
64
|
-
}
|
|
65
|
-
return fail(
|
|
66
|
-
"E-E-A-T signals",
|
|
67
|
-
4,
|
|
68
|
-
"No E-E-A-T signals (author bios, credentials, experience markers, Person schema). AI prioritizes authoritative sources.",
|
|
69
|
-
);
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
function checkEntityOptimization(context) {
|
|
73
|
-
const allContent = getAllContent(context);
|
|
74
|
-
|
|
75
|
-
let points = 0;
|
|
76
|
-
const signals = [];
|
|
77
|
-
|
|
78
|
-
const hasSameAs = allContent.includes("sameAs");
|
|
79
|
-
if (hasSameAs) {
|
|
80
|
-
points += 1;
|
|
81
|
-
signals.push("sameAs links");
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
const hasConsistentNaming = checkNameConsistency(allContent, context);
|
|
85
|
-
if (hasConsistentNaming) {
|
|
86
|
-
points += 1;
|
|
87
|
-
signals.push("consistent naming");
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
const hasIdentifiers = /github\.com|linkedin\.com|twitter\.com|x\.com/i.test(
|
|
91
|
-
allContent,
|
|
92
|
-
);
|
|
93
|
-
if (hasIdentifiers) {
|
|
94
|
-
points += 2;
|
|
95
|
-
signals.push("cross-platform identifiers");
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
if (points >= 3) return pass("Entity optimization", 3);
|
|
99
|
-
if (points > 0) {
|
|
100
|
-
return partial(
|
|
101
|
-
"Entity optimization",
|
|
102
|
-
Math.min(points, 2),
|
|
103
|
-
3,
|
|
104
|
-
`Entity signals: ${signals.join(", ")}. Add sameAs links and consistent naming across platforms.`,
|
|
105
|
-
);
|
|
106
|
-
}
|
|
107
|
-
return fail(
|
|
108
|
-
"Entity optimization",
|
|
109
|
-
3,
|
|
110
|
-
"No entity optimization. Add sameAs links, consistent naming, and cross-platform profile links for AI entity resolution.",
|
|
111
|
-
);
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
function checkExternalValidation(context) {
|
|
115
|
-
const allContent = getAllContent(context);
|
|
116
|
-
|
|
117
|
-
let points = 0;
|
|
118
|
-
|
|
119
|
-
const hasExternalLinks =
|
|
120
|
-
/https?:\/\/(?!.*(?:localhost|127\.0\.0|example\.com))/i.test(allContent);
|
|
121
|
-
if (hasExternalLinks) points += 1;
|
|
122
|
-
|
|
123
|
-
const hasCitations =
|
|
124
|
-
/\[\d+\]|source:|reference:|according to|research shows/i.test(allContent);
|
|
125
|
-
if (hasCitations) points += 1;
|
|
126
|
-
|
|
127
|
-
const hasDataClaims =
|
|
128
|
-
/\d+%|\d+x|\$[\d,]+|increased by|reduced by|improved/i.test(allContent);
|
|
129
|
-
if (hasDataClaims) points += 1;
|
|
130
|
-
|
|
131
|
-
if (points === 3) return pass("External validation & references", 3);
|
|
132
|
-
if (points > 0) {
|
|
133
|
-
const missing = [];
|
|
134
|
-
if (!hasExternalLinks) missing.push("external source links");
|
|
135
|
-
if (!hasCitations) missing.push("cited references");
|
|
136
|
-
if (!hasDataClaims) missing.push("quantified claims");
|
|
137
|
-
return partial(
|
|
138
|
-
"External validation & references",
|
|
139
|
-
points,
|
|
140
|
-
3,
|
|
141
|
-
`Add: ${missing.join(", ")}. AI trusts content that cites sources.`,
|
|
142
|
-
);
|
|
143
|
-
}
|
|
144
|
-
return fail(
|
|
145
|
-
"External validation & references",
|
|
146
|
-
3,
|
|
147
|
-
"No external validation — no cited sources, references, or quantified claims. AI weighs evidence-backed content higher.",
|
|
148
|
-
);
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
function checkNameConsistency(content, context) {
|
|
152
|
-
if (context.mode === "url" && context.url) {
|
|
153
|
-
try {
|
|
154
|
-
const hostname = new URL(context.url).hostname
|
|
155
|
-
.replace("www.", "")
|
|
156
|
-
.split(".")[0];
|
|
157
|
-
const mentions =
|
|
158
|
-
content.toLowerCase().split(hostname.toLowerCase()).length - 1;
|
|
159
|
-
return mentions >= 3;
|
|
160
|
-
} catch {
|
|
161
|
-
return false;
|
|
162
|
-
}
|
|
163
|
-
}
|
|
164
|
-
const pkg = context.fileContents?.["package.json"];
|
|
165
|
-
if (pkg) {
|
|
166
|
-
try {
|
|
167
|
-
const name = JSON.parse(pkg).name;
|
|
168
|
-
return name && content.split(name).length > 3;
|
|
169
|
-
} catch {
|
|
170
|
-
return false;
|
|
171
|
-
}
|
|
172
|
-
}
|
|
173
|
-
return false;
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
function getAllContent(context) {
|
|
177
|
-
if (context.mode === "url") {
|
|
178
|
-
return Object.values(context.pages || {})
|
|
179
|
-
.map((p) => p?.text || "")
|
|
180
|
-
.join("\n");
|
|
181
|
-
}
|
|
182
|
-
return Object.values(context.fileContents || {}).join("\n");
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
function pass(name, points) {
|
|
186
|
-
return { name, passed: true, points, maxPoints: points };
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
function partial(name, points, maxPoints, fix) {
|
|
190
|
-
return { name, passed: false, points, maxPoints, fix };
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
function fail(name, maxPoints, fix) {
|
|
194
|
-
return { name, passed: false, points: 0, maxPoints, fix };
|
|
195
|
-
}
|
|
@@ -1,228 +0,0 @@
|
|
|
1
|
-
export async function runCitationReadinessChecks(context) {
|
|
2
|
-
const checks = [
|
|
3
|
-
checkDirectAnswerFormat(context),
|
|
4
|
-
checkQuestionHeadings(context),
|
|
5
|
-
checkCitationStructure(context),
|
|
6
|
-
checkDefinitiveStatements(context),
|
|
7
|
-
];
|
|
8
|
-
|
|
9
|
-
const score = checks.reduce((sum, c) => sum + (c.passed ? c.points : 0), 0);
|
|
10
|
-
return { score, maxScore: 15, checks };
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
function checkDirectAnswerFormat(context) {
|
|
14
|
-
const pages = getPageContents(context);
|
|
15
|
-
if (pages.length === 0) {
|
|
16
|
-
return fail("Direct answer formatting", 4, "No content pages to analyze.");
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
let directAnswerCount = 0;
|
|
20
|
-
for (const page of pages.slice(0, 10)) {
|
|
21
|
-
const paragraphs = extractParagraphs(page.content);
|
|
22
|
-
const firstSubstantial = paragraphs.find((p) => p.length > 80);
|
|
23
|
-
if (firstSubstantial) {
|
|
24
|
-
const wordCount = firstSubstantial.split(/\s+/).length;
|
|
25
|
-
if (wordCount >= 30 && wordCount <= 80) directAnswerCount++;
|
|
26
|
-
}
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
const ratio = directAnswerCount / Math.min(pages.length, 10);
|
|
30
|
-
if (ratio >= 0.6) return pass("Direct answer formatting", 4);
|
|
31
|
-
if (ratio >= 0.3)
|
|
32
|
-
return partial(
|
|
33
|
-
"Direct answer formatting",
|
|
34
|
-
2,
|
|
35
|
-
4,
|
|
36
|
-
`Only ${Math.round(ratio * 100)}% of pages lead with a 40-60 word summary. AI systems extract these as citations.`,
|
|
37
|
-
);
|
|
38
|
-
return partial(
|
|
39
|
-
"Direct answer formatting",
|
|
40
|
-
1,
|
|
41
|
-
4,
|
|
42
|
-
"Most pages don't lead with a concise summary (40-60 words). This is what AI systems cite.",
|
|
43
|
-
);
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
function checkQuestionHeadings(context) {
|
|
47
|
-
const content = getAllTextContent(context);
|
|
48
|
-
if (!content) {
|
|
49
|
-
return fail("Question-based headings", 4, "No content to analyze.");
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
const headings = extractHeadings(content);
|
|
53
|
-
if (headings.length === 0) {
|
|
54
|
-
return fail("Question-based headings", 4, "No headings found in content.");
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
const questionHeadings = headings.filter(
|
|
58
|
-
(h) =>
|
|
59
|
-
h.endsWith("?") ||
|
|
60
|
-
/^(what|how|why|when|where|who|which|can|does|is|are|do)\b/i.test(h),
|
|
61
|
-
);
|
|
62
|
-
|
|
63
|
-
const ratio = questionHeadings.length / headings.length;
|
|
64
|
-
if (ratio >= 0.3) return pass("Question-based headings", 4);
|
|
65
|
-
if (ratio >= 0.1)
|
|
66
|
-
return partial(
|
|
67
|
-
"Question-based headings",
|
|
68
|
-
2,
|
|
69
|
-
4,
|
|
70
|
-
`${questionHeadings.length}/${headings.length} headings are question-based. Aim for 30%+ — these match how users query AI.`,
|
|
71
|
-
);
|
|
72
|
-
if (questionHeadings.length > 0)
|
|
73
|
-
return partial(
|
|
74
|
-
"Question-based headings",
|
|
75
|
-
1,
|
|
76
|
-
4,
|
|
77
|
-
"Very few question-based headings. Rephrase key H2/H3s as questions users would ask AI.",
|
|
78
|
-
);
|
|
79
|
-
return fail(
|
|
80
|
-
"Question-based headings",
|
|
81
|
-
4,
|
|
82
|
-
'No question-based headings. AI systems match user queries to headings — use "What is X?" / "How to Y?" format.',
|
|
83
|
-
);
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
function checkCitationStructure(context) {
|
|
87
|
-
const pages = getPageContents(context);
|
|
88
|
-
if (pages.length === 0) {
|
|
89
|
-
return fail(
|
|
90
|
-
"Citation-friendly structure",
|
|
91
|
-
4,
|
|
92
|
-
"No content pages to analyze.",
|
|
93
|
-
);
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
let score = 0;
|
|
97
|
-
let issues = [];
|
|
98
|
-
|
|
99
|
-
const avgParaLength = getAverageParagraphLength(pages);
|
|
100
|
-
if (avgParaLength <= 100) {
|
|
101
|
-
score += 2;
|
|
102
|
-
} else if (avgParaLength <= 150) {
|
|
103
|
-
score += 1;
|
|
104
|
-
issues.push("paragraphs slightly long for citation extraction");
|
|
105
|
-
} else {
|
|
106
|
-
issues.push("paragraphs too long — AI prefers short, citable blocks");
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
const hasLists = pages.some(
|
|
110
|
-
(p) =>
|
|
111
|
-
p.content.includes("- ") ||
|
|
112
|
-
p.content.includes("* ") ||
|
|
113
|
-
/<[ou]l/i.test(p.content),
|
|
114
|
-
);
|
|
115
|
-
if (hasLists) score += 1;
|
|
116
|
-
else issues.push("no lists — AI extracts bulleted info easily");
|
|
117
|
-
|
|
118
|
-
const hasTables = pages.some(
|
|
119
|
-
(p) => p.content.includes("|") || /<table/i.test(p.content),
|
|
120
|
-
);
|
|
121
|
-
if (hasTables) score += 1;
|
|
122
|
-
else issues.push("no tables — structured comparisons get cited");
|
|
123
|
-
|
|
124
|
-
if (score === 4) return pass("Citation-friendly structure", 4);
|
|
125
|
-
return partial(
|
|
126
|
-
"Citation-friendly structure",
|
|
127
|
-
score,
|
|
128
|
-
4,
|
|
129
|
-
issues.join("; ") + ".",
|
|
130
|
-
);
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
function checkDefinitiveStatements(context) {
|
|
134
|
-
const content = getAllTextContent(context);
|
|
135
|
-
if (!content) {
|
|
136
|
-
return fail("Definitive statements", 3, "No content to analyze.");
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
const sentences = content.split(/[.!]\s+/).filter((s) => s.length > 20);
|
|
140
|
-
const definitive = sentences.filter(
|
|
141
|
-
(s) =>
|
|
142
|
-
/\b(is|are|was|means|defined as|refers to|consists of)\b/i.test(s) &&
|
|
143
|
-
!/\b(might|maybe|perhaps|possibly|could be|generally)\b/i.test(s),
|
|
144
|
-
);
|
|
145
|
-
|
|
146
|
-
const ratio = sentences.length > 0 ? definitive.length / sentences.length : 0;
|
|
147
|
-
if (ratio >= 0.2) return pass("Definitive statements", 3);
|
|
148
|
-
if (ratio >= 0.1)
|
|
149
|
-
return partial(
|
|
150
|
-
"Definitive statements",
|
|
151
|
-
2,
|
|
152
|
-
3,
|
|
153
|
-
"Some definitive statements but too much hedging. AI cites confident claims over uncertain ones.",
|
|
154
|
-
);
|
|
155
|
-
return partial(
|
|
156
|
-
"Definitive statements",
|
|
157
|
-
1,
|
|
158
|
-
3,
|
|
159
|
-
"Content uses weak/uncertain language. Make clear, definitive claims that AI can cite with confidence.",
|
|
160
|
-
);
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
function getPageContents(context) {
|
|
164
|
-
if (context.mode === "url") {
|
|
165
|
-
return context.html ? [{ name: "homepage", content: context.html }] : [];
|
|
166
|
-
}
|
|
167
|
-
return context.files
|
|
168
|
-
.filter((f) => f.endsWith(".md") || f.endsWith(".html"))
|
|
169
|
-
.map((f) => ({ name: f, content: context.fileContents[f] || "" }))
|
|
170
|
-
.filter((p) => p.content.length > 0);
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
function getAllTextContent(context) {
|
|
174
|
-
if (context.mode === "url") return context.html || "";
|
|
175
|
-
return context.files
|
|
176
|
-
.filter(
|
|
177
|
-
(f) => f.endsWith(".md") || f.endsWith(".html") || f.endsWith(".txt"),
|
|
178
|
-
)
|
|
179
|
-
.map((f) => context.fileContents[f] || "")
|
|
180
|
-
.join("\n");
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
function extractParagraphs(content) {
|
|
184
|
-
if (content.includes("<p")) {
|
|
185
|
-
return [...content.matchAll(/<p[^>]*>([\s\S]*?)<\/p>/gi)].map((m) =>
|
|
186
|
-
stripTags(m[1]).trim(),
|
|
187
|
-
);
|
|
188
|
-
}
|
|
189
|
-
return content
|
|
190
|
-
.split(/\n\n+/)
|
|
191
|
-
.map((p) => p.trim())
|
|
192
|
-
.filter((p) => p.length > 0 && !p.startsWith("#"));
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
function extractHeadings(content) {
|
|
196
|
-
const md = [...content.matchAll(/^#{1,6}\s+(.+)$/gm)].map((m) => m[1]);
|
|
197
|
-
const html = [...content.matchAll(/<h[1-6][^>]*>([^<]+)/gi)].map((m) => m[1]);
|
|
198
|
-
return md.length > html.length ? md : html;
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
function getAverageParagraphLength(pages) {
|
|
202
|
-
let total = 0;
|
|
203
|
-
let count = 0;
|
|
204
|
-
for (const page of pages) {
|
|
205
|
-
const paras = extractParagraphs(page.content).filter((p) => p.length > 20);
|
|
206
|
-
for (const p of paras) {
|
|
207
|
-
total += p.split(/\s+/).length;
|
|
208
|
-
count++;
|
|
209
|
-
}
|
|
210
|
-
}
|
|
211
|
-
return count > 0 ? total / count : 0;
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
function stripTags(html) {
|
|
215
|
-
return html.replace(/<[^>]+>/g, "");
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
function pass(name, points) {
|
|
219
|
-
return { name, passed: true, points, maxPoints: points };
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
function partial(name, points, maxPoints, fix) {
|
|
223
|
-
return { name, passed: false, points, maxPoints, fix };
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
function fail(name, maxPoints, fix) {
|
|
227
|
-
return { name, passed: false, points: 0, maxPoints, fix };
|
|
228
|
-
}
|