@rankcli/agent-runtime 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +242 -0
- package/dist/analyzer-2CSWIQGD.mjs +6 -0
- package/dist/chunk-YNZYHEYM.mjs +774 -0
- package/dist/index.d.mts +4012 -0
- package/dist/index.d.ts +4012 -0
- package/dist/index.js +29672 -0
- package/dist/index.mjs +28602 -0
- package/package.json +53 -0
- package/scripts/build-deno.ts +134 -0
- package/src/audit/ai/analyzer.ts +347 -0
- package/src/audit/ai/index.ts +29 -0
- package/src/audit/ai/prompts/content-analysis.ts +271 -0
- package/src/audit/ai/types.ts +179 -0
- package/src/audit/checks/additional-checks.ts +439 -0
- package/src/audit/checks/ai-citation-worthiness.ts +399 -0
- package/src/audit/checks/ai-content-structure.ts +325 -0
- package/src/audit/checks/ai-readiness.ts +339 -0
- package/src/audit/checks/anchor-text.ts +179 -0
- package/src/audit/checks/answer-conciseness.ts +322 -0
- package/src/audit/checks/asset-minification.ts +270 -0
- package/src/audit/checks/bing-optimization.ts +206 -0
- package/src/audit/checks/brand-mention-optimization.ts +349 -0
- package/src/audit/checks/caching-headers.ts +305 -0
- package/src/audit/checks/canonical-advanced.ts +150 -0
- package/src/audit/checks/canonical-domain.ts +196 -0
- package/src/audit/checks/citation-quality.ts +358 -0
- package/src/audit/checks/client-rendering.ts +542 -0
- package/src/audit/checks/color-contrast.ts +342 -0
- package/src/audit/checks/content-freshness.ts +170 -0
- package/src/audit/checks/content-science.ts +589 -0
- package/src/audit/checks/conversion-elements.ts +526 -0
- package/src/audit/checks/crawlability.ts +220 -0
- package/src/audit/checks/directory-listing.ts +172 -0
- package/src/audit/checks/dom-analysis.ts +191 -0
- package/src/audit/checks/dom-size.ts +246 -0
- package/src/audit/checks/duplicate-content.ts +194 -0
- package/src/audit/checks/eeat-signals.ts +990 -0
- package/src/audit/checks/entity-seo.ts +396 -0
- package/src/audit/checks/featured-snippet.ts +473 -0
- package/src/audit/checks/freshness-signals.ts +443 -0
- package/src/audit/checks/funnel-intent.ts +463 -0
- package/src/audit/checks/hreflang.ts +174 -0
- package/src/audit/checks/html-compliance.ts +302 -0
- package/src/audit/checks/image-dimensions.ts +167 -0
- package/src/audit/checks/images.ts +160 -0
- package/src/audit/checks/indexnow.ts +275 -0
- package/src/audit/checks/interactive-tools.ts +475 -0
- package/src/audit/checks/internal-link-graph.ts +436 -0
- package/src/audit/checks/keyword-analysis.ts +239 -0
- package/src/audit/checks/keyword-cannibalization.ts +385 -0
- package/src/audit/checks/keyword-placement.ts +471 -0
- package/src/audit/checks/links.ts +203 -0
- package/src/audit/checks/llms-txt.ts +224 -0
- package/src/audit/checks/local-seo.ts +296 -0
- package/src/audit/checks/mobile.ts +167 -0
- package/src/audit/checks/modern-images.ts +226 -0
- package/src/audit/checks/navboost-signals.ts +395 -0
- package/src/audit/checks/on-page.ts +209 -0
- package/src/audit/checks/page-resources.ts +285 -0
- package/src/audit/checks/pagination.ts +180 -0
- package/src/audit/checks/performance.ts +153 -0
- package/src/audit/checks/platform-presence.ts +580 -0
- package/src/audit/checks/redirect-analysis.ts +153 -0
- package/src/audit/checks/redirect-chain.ts +389 -0
- package/src/audit/checks/resource-hints.ts +420 -0
- package/src/audit/checks/responsive-css.ts +247 -0
- package/src/audit/checks/responsive-images.ts +396 -0
- package/src/audit/checks/review-ecosystem.ts +415 -0
- package/src/audit/checks/robots-validation.ts +373 -0
- package/src/audit/checks/security-headers.ts +172 -0
- package/src/audit/checks/security.ts +144 -0
- package/src/audit/checks/serp-preview.ts +251 -0
- package/src/audit/checks/site-maturity.ts +444 -0
- package/src/audit/checks/social-meta.test.ts +275 -0
- package/src/audit/checks/social-meta.ts +134 -0
- package/src/audit/checks/soft-404.ts +151 -0
- package/src/audit/checks/structured-data.ts +238 -0
- package/src/audit/checks/tech-detection.ts +496 -0
- package/src/audit/checks/topical-clusters.ts +435 -0
- package/src/audit/checks/tracker-bloat.ts +462 -0
- package/src/audit/checks/tracking-verification.test.ts +371 -0
- package/src/audit/checks/tracking-verification.ts +636 -0
- package/src/audit/checks/url-safety.ts +682 -0
- package/src/audit/deno-entry.ts +66 -0
- package/src/audit/discovery/index.ts +15 -0
- package/src/audit/discovery/link-crawler.ts +232 -0
- package/src/audit/discovery/repo-routes.ts +347 -0
- package/src/audit/engine.ts +620 -0
- package/src/audit/fixes/index.ts +209 -0
- package/src/audit/fixes/social-meta-fixes.test.ts +329 -0
- package/src/audit/fixes/social-meta-fixes.ts +463 -0
- package/src/audit/index.ts +74 -0
- package/src/audit/runner.test.ts +299 -0
- package/src/audit/runner.ts +130 -0
- package/src/audit/types.ts +1953 -0
- package/src/content/featured-snippet.ts +367 -0
- package/src/content/generator.test.ts +534 -0
- package/src/content/generator.ts +501 -0
- package/src/content/headline.ts +317 -0
- package/src/content/index.ts +62 -0
- package/src/content/intent.ts +258 -0
- package/src/content/keyword-density.ts +349 -0
- package/src/content/readability.ts +262 -0
- package/src/executor.ts +336 -0
- package/src/fixer.ts +416 -0
- package/src/frameworks/detector.test.ts +248 -0
- package/src/frameworks/detector.ts +371 -0
- package/src/frameworks/index.ts +68 -0
- package/src/frameworks/recipes/angular.yaml +171 -0
- package/src/frameworks/recipes/astro.yaml +206 -0
- package/src/frameworks/recipes/django.yaml +180 -0
- package/src/frameworks/recipes/laravel.yaml +137 -0
- package/src/frameworks/recipes/nextjs.yaml +268 -0
- package/src/frameworks/recipes/nuxt.yaml +175 -0
- package/src/frameworks/recipes/rails.yaml +188 -0
- package/src/frameworks/recipes/react.yaml +202 -0
- package/src/frameworks/recipes/sveltekit.yaml +154 -0
- package/src/frameworks/recipes/vue.yaml +137 -0
- package/src/frameworks/recipes/wordpress.yaml +209 -0
- package/src/frameworks/suggestion-engine.ts +320 -0
- package/src/geo/geo-content.test.ts +305 -0
- package/src/geo/geo-content.ts +266 -0
- package/src/geo/geo-history.test.ts +473 -0
- package/src/geo/geo-history.ts +433 -0
- package/src/geo/geo-tracker.test.ts +359 -0
- package/src/geo/geo-tracker.ts +411 -0
- package/src/geo/index.ts +10 -0
- package/src/git/commit-helper.test.ts +261 -0
- package/src/git/commit-helper.ts +329 -0
- package/src/git/index.ts +12 -0
- package/src/git/pr-helper.test.ts +284 -0
- package/src/git/pr-helper.ts +307 -0
- package/src/index.ts +66 -0
- package/src/keywords/ai-keyword-engine.ts +1062 -0
- package/src/keywords/ai-summarizer.ts +387 -0
- package/src/keywords/ci-mode.ts +555 -0
- package/src/keywords/engine.ts +359 -0
- package/src/keywords/index.ts +151 -0
- package/src/keywords/llm-judge.ts +357 -0
- package/src/keywords/nlp-analysis.ts +706 -0
- package/src/keywords/prioritizer.ts +295 -0
- package/src/keywords/site-crawler.ts +342 -0
- package/src/keywords/sources/autocomplete.ts +139 -0
- package/src/keywords/sources/competitive-search.ts +450 -0
- package/src/keywords/sources/competitor-analysis.ts +374 -0
- package/src/keywords/sources/dataforseo.ts +206 -0
- package/src/keywords/sources/free-sources.ts +294 -0
- package/src/keywords/sources/gsc.ts +123 -0
- package/src/keywords/topic-grouping.ts +327 -0
- package/src/keywords/types.ts +144 -0
- package/src/keywords/wizard.ts +457 -0
- package/src/loader.ts +40 -0
- package/src/reports/index.ts +7 -0
- package/src/reports/report-generator.test.ts +293 -0
- package/src/reports/report-generator.ts +713 -0
- package/src/scheduler/alerts.test.ts +458 -0
- package/src/scheduler/alerts.ts +328 -0
- package/src/scheduler/index.ts +8 -0
- package/src/scheduler/scheduled-audit.test.ts +377 -0
- package/src/scheduler/scheduled-audit.ts +149 -0
- package/src/test/integration-test.ts +325 -0
- package/src/tools/analyzer.ts +373 -0
- package/src/tools/crawl.ts +293 -0
- package/src/tools/files.ts +301 -0
- package/src/tools/h1-fixer.ts +249 -0
- package/src/tools/index.ts +67 -0
- package/src/tracking/github-action.ts +326 -0
- package/src/tracking/google-analytics.ts +265 -0
- package/src/tracking/index.ts +45 -0
- package/src/tracking/report-generator.ts +386 -0
- package/src/tracking/search-console.ts +335 -0
- package/src/types.ts +134 -0
- package/src/utils/http.ts +302 -0
- package/src/wasm-adapter.ts +297 -0
- package/src/wasm-entry.ts +14 -0
- package/tsconfig.json +17 -0
- package/tsup.wasm.config.ts +26 -0
- package/vitest.config.ts +15 -0
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AI Content Structure Checks
|
|
3
|
+
*
|
|
4
|
+
* AI systems prefer well-structured content that's easy to parse and quote.
|
|
5
|
+
* These checks verify content is formatted optimally for AI consumption:
|
|
6
|
+
* - Tables for comparisons/data
|
|
7
|
+
* - Numbered lists for steps/procedures
|
|
8
|
+
* - Bullet points for features/benefits
|
|
9
|
+
* - Clear Q&A format for direct answers
|
|
10
|
+
* - Concise, quotable statements
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import * as cheerio from 'cheerio';
|
|
14
|
+
import type { AuditIssue } from '../types.js';
|
|
15
|
+
|
|
16
|
+
export interface AIContentStructureData {
|
|
17
|
+
tables: {
|
|
18
|
+
count: number;
|
|
19
|
+
hasComparisonTable: boolean;
|
|
20
|
+
hasDataTable: boolean;
|
|
21
|
+
};
|
|
22
|
+
lists: {
|
|
23
|
+
orderedLists: number;
|
|
24
|
+
unorderedLists: number;
|
|
25
|
+
hasStepByStep: boolean;
|
|
26
|
+
hasBulletedFeatures: boolean;
|
|
27
|
+
};
|
|
28
|
+
qaFormat: {
|
|
29
|
+
hasExplicitQA: boolean;
|
|
30
|
+
questionCount: number;
|
|
31
|
+
hasDirectAnswers: boolean;
|
|
32
|
+
};
|
|
33
|
+
quotability: {
|
|
34
|
+
hasDefinitions: boolean;
|
|
35
|
+
hasConciseStatements: boolean;
|
|
36
|
+
shortParagraphRatio: number;
|
|
37
|
+
};
|
|
38
|
+
structureScore: number;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export function analyzeAIContentStructure(
|
|
42
|
+
html: string,
|
|
43
|
+
url: string
|
|
44
|
+
): { issues: AuditIssue[]; data: AIContentStructureData } {
|
|
45
|
+
const issues: AuditIssue[] = [];
|
|
46
|
+
const $ = cheerio.load(html);
|
|
47
|
+
|
|
48
|
+
// Remove nav, footer, aside, scripts, styles for content analysis
|
|
49
|
+
$('nav, footer, aside, script, style, noscript, header').remove();
|
|
50
|
+
|
|
51
|
+
// Analyze tables
|
|
52
|
+
const tables = $('table');
|
|
53
|
+
const tableCount = tables.length;
|
|
54
|
+
let hasComparisonTable = false;
|
|
55
|
+
let hasDataTable = false;
|
|
56
|
+
|
|
57
|
+
tables.each((_, table) => {
|
|
58
|
+
const $table = $(table);
|
|
59
|
+
const headers = $table.find('th').length;
|
|
60
|
+
const rows = $table.find('tr').length;
|
|
61
|
+
const cells = $table.find('td').length;
|
|
62
|
+
|
|
63
|
+
// Comparison table: multiple columns with headers
|
|
64
|
+
if (headers >= 2 && rows >= 3) {
|
|
65
|
+
hasComparisonTable = true;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Data table: has numeric data
|
|
69
|
+
const cellTexts = $table.find('td').map((_, td) => $(td).text()).get();
|
|
70
|
+
const hasNumbers = cellTexts.some(text => /\d+/.test(text));
|
|
71
|
+
if (hasNumbers && cells >= 4) {
|
|
72
|
+
hasDataTable = true;
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
// Analyze lists
|
|
77
|
+
const orderedLists = $('ol').length;
|
|
78
|
+
const unorderedLists = $('ul').not('nav ul').length;
|
|
79
|
+
|
|
80
|
+
// Check for step-by-step content
|
|
81
|
+
let hasStepByStep = false;
|
|
82
|
+
$('ol').each((_, ol) => {
|
|
83
|
+
const items = $(ol).find('> li').length;
|
|
84
|
+
if (items >= 3) {
|
|
85
|
+
hasStepByStep = true;
|
|
86
|
+
}
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
// Check for bulleted features/benefits
|
|
90
|
+
let hasBulletedFeatures = false;
|
|
91
|
+
$('ul').not('nav ul').each((_, ul) => {
|
|
92
|
+
const items = $(ul).find('> li').length;
|
|
93
|
+
if (items >= 3) {
|
|
94
|
+
hasBulletedFeatures = true;
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
// Analyze Q&A format
|
|
99
|
+
const bodyText = $('body').text().toLowerCase();
|
|
100
|
+
const headings = $('h1, h2, h3, h4, h5, h6');
|
|
101
|
+
let questionCount = 0;
|
|
102
|
+
let hasDirectAnswers = false;
|
|
103
|
+
|
|
104
|
+
headings.each((_, heading) => {
|
|
105
|
+
const text = $(heading).text();
|
|
106
|
+
// Check if heading is a question
|
|
107
|
+
if (text.includes('?') || /^(what|how|why|when|where|who|which|can|does|is|are|should|will)\s/i.test(text)) {
|
|
108
|
+
questionCount++;
|
|
109
|
+
|
|
110
|
+
// Check if followed by a direct answer (short paragraph)
|
|
111
|
+
const nextP = $(heading).next('p');
|
|
112
|
+
if (nextP.length) {
|
|
113
|
+
const answerText = nextP.text();
|
|
114
|
+
if (answerText.length > 20 && answerText.length < 300) {
|
|
115
|
+
hasDirectAnswers = true;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
// Check for explicit Q&A format (FAQ style)
|
|
122
|
+
const hasExplicitQA =
|
|
123
|
+
$('[itemtype*="FAQPage"]').length > 0 ||
|
|
124
|
+
$('[itemtype*="Question"]').length > 0 ||
|
|
125
|
+
$('.faq, #faq, [class*="faq"], [id*="faq"]').length > 0 ||
|
|
126
|
+
$('details summary').length >= 2 ||
|
|
127
|
+
$('dt').length >= 2; // Definition lists often used for Q&A
|
|
128
|
+
|
|
129
|
+
// Analyze quotability
|
|
130
|
+
const paragraphs = $('p');
|
|
131
|
+
let shortParagraphCount = 0;
|
|
132
|
+
let totalParagraphs = 0;
|
|
133
|
+
let hasDefinitions = false;
|
|
134
|
+
let hasConciseStatements = false;
|
|
135
|
+
|
|
136
|
+
paragraphs.each((_, p) => {
|
|
137
|
+
const text = $(p).text().trim();
|
|
138
|
+
if (text.length < 20) return; // Skip very short paragraphs
|
|
139
|
+
|
|
140
|
+
totalParagraphs++;
|
|
141
|
+
|
|
142
|
+
// Short paragraphs (under 150 chars) are more quotable
|
|
143
|
+
if (text.length < 150) {
|
|
144
|
+
shortParagraphCount++;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// Check for definition patterns ("X is a...", "X refers to...")
|
|
148
|
+
if (/^[A-Z][^.]+\s+(is|are|refers to|means|describes)\s+/i.test(text)) {
|
|
149
|
+
hasDefinitions = true;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Concise statements with clear structure
|
|
153
|
+
if (text.length < 200 && (
|
|
154
|
+
text.includes(':') ||
|
|
155
|
+
/^(The|A|An)\s+\w+/.test(text) ||
|
|
156
|
+
/^\d+\.?\s+/.test(text)
|
|
157
|
+
)) {
|
|
158
|
+
hasConciseStatements = true;
|
|
159
|
+
}
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
const shortParagraphRatio = totalParagraphs > 0
|
|
163
|
+
? shortParagraphCount / totalParagraphs
|
|
164
|
+
: 0;
|
|
165
|
+
|
|
166
|
+
// Calculate structure score (0-100)
|
|
167
|
+
let structureScore = 50; // Base score
|
|
168
|
+
|
|
169
|
+
// Tables bonus
|
|
170
|
+
if (hasComparisonTable) structureScore += 10;
|
|
171
|
+
if (hasDataTable) structureScore += 5;
|
|
172
|
+
|
|
173
|
+
// Lists bonus
|
|
174
|
+
if (hasStepByStep) structureScore += 10;
|
|
175
|
+
if (hasBulletedFeatures) structureScore += 5;
|
|
176
|
+
|
|
177
|
+
// Q&A bonus
|
|
178
|
+
if (hasExplicitQA) structureScore += 10;
|
|
179
|
+
if (questionCount >= 3) structureScore += 5;
|
|
180
|
+
if (hasDirectAnswers) structureScore += 5;
|
|
181
|
+
|
|
182
|
+
// Quotability bonus
|
|
183
|
+
if (hasDefinitions) structureScore += 5;
|
|
184
|
+
if (hasConciseStatements) structureScore += 5;
|
|
185
|
+
if (shortParagraphRatio > 0.3) structureScore += 5;
|
|
186
|
+
|
|
187
|
+
// Penalties
|
|
188
|
+
if (tableCount === 0 && orderedLists === 0 && unorderedLists === 0) {
|
|
189
|
+
structureScore -= 15; // No structured content at all
|
|
190
|
+
}
|
|
191
|
+
if (questionCount === 0) {
|
|
192
|
+
structureScore -= 5; // No question-based headings
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
structureScore = Math.max(0, Math.min(100, structureScore));
|
|
196
|
+
|
|
197
|
+
// Generate issues
|
|
198
|
+
|
|
199
|
+
// No tables for comparisons
|
|
200
|
+
if (!hasComparisonTable && bodyText.includes('compar') || bodyText.includes('vs') || bodyText.includes('versus')) {
|
|
201
|
+
issues.push({
|
|
202
|
+
code: 'AI_NO_COMPARISON_TABLE',
|
|
203
|
+
severity: 'notice',
|
|
204
|
+
category: 'ai-readiness',
|
|
205
|
+
title: 'Comparison content without table format',
|
|
206
|
+
description: 'Your content mentions comparisons but doesn\'t use a table format. AI systems can easily parse and quote table data.',
|
|
207
|
+
impact: 'AI may not accurately extract comparison data, reducing chances of being cited in AI-generated comparisons.',
|
|
208
|
+
howToFix: 'Convert comparison content to HTML tables with clear headers. Example: Feature | Option A | Option B',
|
|
209
|
+
affectedUrls: [url],
|
|
210
|
+
});
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// No numbered lists for procedural content
|
|
214
|
+
if (!hasStepByStep && (bodyText.includes('step') || bodyText.includes('how to') || bodyText.includes('guide'))) {
|
|
215
|
+
issues.push({
|
|
216
|
+
code: 'AI_NO_NUMBERED_STEPS',
|
|
217
|
+
severity: 'notice',
|
|
218
|
+
category: 'ai-readiness',
|
|
219
|
+
title: 'Procedural content without numbered steps',
|
|
220
|
+
description: 'Your content appears to be a guide or how-to but doesn\'t use numbered lists. AI prefers numbered lists for step-by-step content.',
|
|
221
|
+
impact: 'AI may not accurately quote your steps in order, or may skip your content for better-structured alternatives.',
|
|
222
|
+
howToFix: 'Convert step-by-step instructions to <ol> (ordered list) format with clear, actionable items.',
|
|
223
|
+
affectedUrls: [url],
|
|
224
|
+
});
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// No Q&A format
|
|
228
|
+
if (!hasExplicitQA && questionCount === 0) {
|
|
229
|
+
issues.push({
|
|
230
|
+
code: 'AI_NO_QA_FORMAT',
|
|
231
|
+
severity: 'notice',
|
|
232
|
+
category: 'ai-readiness',
|
|
233
|
+
title: 'No question-and-answer format detected',
|
|
234
|
+
description: 'Content lacks explicit Q&A structure. AI systems often look for clear question-answer pairs to provide direct responses.',
|
|
235
|
+
impact: 'Lower chance of being featured in AI direct answers or FAQ-style responses.',
|
|
236
|
+
howToFix: 'Add FAQ section with common questions as headings (H2/H3) followed by concise answers. Implement FAQ schema markup.',
|
|
237
|
+
affectedUrls: [url],
|
|
238
|
+
details: {
|
|
239
|
+
suggestions: [
|
|
240
|
+
'Use question words in headings (What, How, Why, etc.)',
|
|
241
|
+
'Follow questions with short, direct answer paragraphs',
|
|
242
|
+
'Add FAQ schema markup for structured data',
|
|
243
|
+
'Consider using <details>/<summary> for expandable Q&A',
|
|
244
|
+
],
|
|
245
|
+
},
|
|
246
|
+
});
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
// Questions without direct answers
|
|
250
|
+
if (questionCount >= 2 && !hasDirectAnswers) {
|
|
251
|
+
issues.push({
|
|
252
|
+
code: 'AI_QUESTIONS_NO_DIRECT_ANSWERS',
|
|
253
|
+
severity: 'notice',
|
|
254
|
+
category: 'ai-readiness',
|
|
255
|
+
title: 'Questions in headings lack direct answers',
|
|
256
|
+
description: `Found ${questionCount} question-style headings but answers are too long or not immediately following. AI prefers concise answers right after questions.`,
|
|
257
|
+
impact: 'AI may struggle to extract clear answers, reducing citation likelihood.',
|
|
258
|
+
howToFix: 'Start each answer section with a 1-2 sentence direct answer, then expand with details. First sentence should standalone as a complete answer.',
|
|
259
|
+
affectedUrls: [url],
|
|
260
|
+
});
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// No definitions
|
|
264
|
+
if (!hasDefinitions && (bodyText.includes('what is') || bodyText.includes('definition'))) {
|
|
265
|
+
issues.push({
|
|
266
|
+
code: 'AI_NO_DEFINITIONS',
|
|
267
|
+
severity: 'notice',
|
|
268
|
+
category: 'ai-readiness',
|
|
269
|
+
title: 'Missing clear definitions',
|
|
270
|
+
description: 'Content discusses concepts but lacks Wikipedia-style definitions. AI loves clear "X is a..." statements.',
|
|
271
|
+
impact: 'Less likely to be cited for definitional queries in AI search.',
|
|
272
|
+
howToFix: 'Add clear definitions early in content: "[Term] is a [category] that [distinguishing features]." Make first paragraph a complete definition.',
|
|
273
|
+
affectedUrls: [url],
|
|
274
|
+
});
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
// Low quotability
|
|
278
|
+
if (structureScore < 50) {
|
|
279
|
+
issues.push({
|
|
280
|
+
code: 'AI_LOW_STRUCTURE_SCORE',
|
|
281
|
+
severity: 'warning',
|
|
282
|
+
category: 'ai-readiness',
|
|
283
|
+
title: 'Content structure not optimized for AI parsing',
|
|
284
|
+
description: `AI structure score: ${structureScore}/100. Content lacks structured elements that AI can easily parse and quote.`,
|
|
285
|
+
impact: 'Lower likelihood of being cited in AI-generated answers due to poor content structure.',
|
|
286
|
+
howToFix: 'Improve content structure: add tables for data, numbered lists for steps, bullet points for features, Q&A sections for common questions.',
|
|
287
|
+
affectedUrls: [url],
|
|
288
|
+
details: {
|
|
289
|
+
structureScore,
|
|
290
|
+
hasTable: tableCount > 0,
|
|
291
|
+
hasOrderedList: orderedLists > 0,
|
|
292
|
+
hasUnorderedList: unorderedLists > 0,
|
|
293
|
+
hasQAFormat: hasExplicitQA || questionCount > 0,
|
|
294
|
+
},
|
|
295
|
+
});
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
return {
|
|
299
|
+
issues,
|
|
300
|
+
data: {
|
|
301
|
+
tables: {
|
|
302
|
+
count: tableCount,
|
|
303
|
+
hasComparisonTable,
|
|
304
|
+
hasDataTable,
|
|
305
|
+
},
|
|
306
|
+
lists: {
|
|
307
|
+
orderedLists,
|
|
308
|
+
unorderedLists,
|
|
309
|
+
hasStepByStep,
|
|
310
|
+
hasBulletedFeatures,
|
|
311
|
+
},
|
|
312
|
+
qaFormat: {
|
|
313
|
+
hasExplicitQA,
|
|
314
|
+
questionCount,
|
|
315
|
+
hasDirectAnswers,
|
|
316
|
+
},
|
|
317
|
+
quotability: {
|
|
318
|
+
hasDefinitions,
|
|
319
|
+
hasConciseStatements,
|
|
320
|
+
shortParagraphRatio,
|
|
321
|
+
},
|
|
322
|
+
structureScore,
|
|
323
|
+
},
|
|
324
|
+
};
|
|
325
|
+
}
|
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
// AI/LLM Readiness Checks
|
|
2
|
+
// Checks for llms.txt, AI bot blocking, and JS rendering ratio
|
|
3
|
+
|
|
4
|
+
import { httpGet } from '../../utils/http.js';
|
|
5
|
+
import * as cheerio from 'cheerio';
|
|
6
|
+
import type { AuditIssue } from '../types.js';
|
|
7
|
+
import { ISSUE_DEFINITIONS } from '../types.js';
|
|
8
|
+
|
|
9
|
+
// Known AI bot user agents
|
|
10
|
+
const AI_BOTS = {
|
|
11
|
+
GPTBot: 'GPTBot',
|
|
12
|
+
'ChatGPT-User': 'ChatGPT-User',
|
|
13
|
+
'OAI-SearchBot': 'OAI-SearchBot',
|
|
14
|
+
ClaudeBot: 'ClaudeBot',
|
|
15
|
+
'Claude-Web': 'Claude-Web',
|
|
16
|
+
'anthropic-ai': 'anthropic-ai',
|
|
17
|
+
PerplexityBot: 'PerplexityBot',
|
|
18
|
+
'Google-Extended': 'Google-Extended',
|
|
19
|
+
Bytespider: 'Bytespider',
|
|
20
|
+
CCBot: 'CCBot',
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
export interface LlmsTxtResult {
|
|
24
|
+
exists: boolean;
|
|
25
|
+
content?: string;
|
|
26
|
+
valid: boolean;
|
|
27
|
+
errors: string[];
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export interface AIBotBlockingResult {
|
|
31
|
+
robotsExists: boolean;
|
|
32
|
+
blockedBots: string[];
|
|
33
|
+
allowedBots: string[];
|
|
34
|
+
allBlocked: boolean;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export interface AIReadinessData {
|
|
38
|
+
llmsTxt: LlmsTxtResult;
|
|
39
|
+
botBlocking: AIBotBlockingResult;
|
|
40
|
+
jsRenderingRatio: number; // 0-100%
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Check for llms.txt file
|
|
45
|
+
* See: https://llmstxt.org/
|
|
46
|
+
*/
|
|
47
|
+
export async function checkLlmsTxt(baseUrl: string): Promise<{ issues: AuditIssue[]; data: LlmsTxtResult }> {
|
|
48
|
+
const issues: AuditIssue[] = [];
|
|
49
|
+
const url = new URL('/llms.txt', baseUrl).href;
|
|
50
|
+
|
|
51
|
+
try {
|
|
52
|
+
const response = await httpGet<string>(url, {
|
|
53
|
+
timeout: 10000,
|
|
54
|
+
validateStatus: () => true,
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
if (response.status === 404 || response.status >= 400) {
|
|
58
|
+
issues.push({
|
|
59
|
+
...ISSUE_DEFINITIONS.LLMS_TXT_MISSING,
|
|
60
|
+
affectedUrls: [url],
|
|
61
|
+
});
|
|
62
|
+
return {
|
|
63
|
+
issues,
|
|
64
|
+
data: { exists: false, valid: false, errors: ['File not found'] },
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const content = response.data as string;
|
|
69
|
+
|
|
70
|
+
// Basic validation of llms.txt format
|
|
71
|
+
// Expected format starts with # followed by site name, then markdown content
|
|
72
|
+
const lines = content.split('\n').filter((line) => line.trim());
|
|
73
|
+
const errors: string[] = [];
|
|
74
|
+
let valid = true;
|
|
75
|
+
|
|
76
|
+
// Check if it starts with a title (# Site Name)
|
|
77
|
+
if (lines.length === 0) {
|
|
78
|
+
errors.push('File is empty');
|
|
79
|
+
valid = false;
|
|
80
|
+
} else if (!lines[0].startsWith('#')) {
|
|
81
|
+
errors.push('File should start with a markdown heading (# Site Name)');
|
|
82
|
+
valid = false;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Check for common sections
|
|
86
|
+
const hasDescription = content.toLowerCase().includes('## ') || content.length > 50;
|
|
87
|
+
if (!hasDescription) {
|
|
88
|
+
errors.push('File should contain meaningful content describing your site for AI');
|
|
89
|
+
valid = false;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
if (!valid) {
|
|
93
|
+
issues.push({
|
|
94
|
+
...ISSUE_DEFINITIONS.LLMS_TXT_INVALID,
|
|
95
|
+
affectedUrls: [url],
|
|
96
|
+
details: { errors },
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return {
|
|
101
|
+
issues,
|
|
102
|
+
data: { exists: true, content, valid, errors },
|
|
103
|
+
};
|
|
104
|
+
} catch (error) {
|
|
105
|
+
issues.push({
|
|
106
|
+
...ISSUE_DEFINITIONS.LLMS_TXT_MISSING,
|
|
107
|
+
affectedUrls: [url],
|
|
108
|
+
details: { error: error instanceof Error ? error.message : 'Unknown error' },
|
|
109
|
+
});
|
|
110
|
+
return {
|
|
111
|
+
issues,
|
|
112
|
+
data: { exists: false, valid: false, errors: ['Failed to fetch'] },
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Check robots.txt for AI bot blocking
|
|
119
|
+
*/
|
|
120
|
+
export async function checkAIBotBlocking(baseUrl: string): Promise<{ issues: AuditIssue[]; data: AIBotBlockingResult }> {
|
|
121
|
+
const issues: AuditIssue[] = [];
|
|
122
|
+
const url = new URL('/robots.txt', baseUrl).href;
|
|
123
|
+
|
|
124
|
+
const blockedBots: string[] = [];
|
|
125
|
+
const allowedBots: string[] = [];
|
|
126
|
+
|
|
127
|
+
try {
|
|
128
|
+
const response = await httpGet<string>(url, {
|
|
129
|
+
timeout: 10000,
|
|
130
|
+
validateStatus: () => true,
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
if (response.status === 404 || response.status >= 400) {
|
|
134
|
+
// No robots.txt means all bots are allowed
|
|
135
|
+
return {
|
|
136
|
+
issues,
|
|
137
|
+
data: {
|
|
138
|
+
robotsExists: false,
|
|
139
|
+
blockedBots: [],
|
|
140
|
+
allowedBots: Object.keys(AI_BOTS),
|
|
141
|
+
allBlocked: false,
|
|
142
|
+
},
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
const content = response.data as string;
|
|
147
|
+
const lines = content.split('\n');
|
|
148
|
+
|
|
149
|
+
// Parse robots.txt for AI bot rules
|
|
150
|
+
let currentUserAgent = '';
|
|
151
|
+
const botRules: Record<string, { allowed: boolean; disallowAll: boolean }> = {};
|
|
152
|
+
|
|
153
|
+
for (const line of lines) {
|
|
154
|
+
const trimmed = line.trim().toLowerCase();
|
|
155
|
+
|
|
156
|
+
if (trimmed.startsWith('user-agent:')) {
|
|
157
|
+
currentUserAgent = trimmed.split(':')[1].trim();
|
|
158
|
+
} else if (trimmed.startsWith('disallow:')) {
|
|
159
|
+
const path = trimmed.split(':')[1]?.trim() || '';
|
|
160
|
+
|
|
161
|
+
// Check if this user agent matches any AI bot
|
|
162
|
+
for (const [botKey, botName] of Object.entries(AI_BOTS)) {
|
|
163
|
+
if (currentUserAgent === '*' || currentUserAgent === botKey.toLowerCase() || currentUserAgent === botName.toLowerCase()) {
|
|
164
|
+
if (path === '/' || path === '/*') {
|
|
165
|
+
if (!botRules[botKey]) {
|
|
166
|
+
botRules[botKey] = { allowed: false, disallowAll: false };
|
|
167
|
+
}
|
|
168
|
+
if (currentUserAgent !== '*' || (currentUserAgent === '*' && !botRules[botKey].allowed)) {
|
|
169
|
+
botRules[botKey].disallowAll = true;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
} else if (trimmed.startsWith('allow:')) {
|
|
175
|
+
// Explicit allow for a bot
|
|
176
|
+
for (const [botKey] of Object.entries(AI_BOTS)) {
|
|
177
|
+
if (currentUserAgent === botKey.toLowerCase()) {
|
|
178
|
+
if (!botRules[botKey]) {
|
|
179
|
+
botRules[botKey] = { allowed: true, disallowAll: false };
|
|
180
|
+
}
|
|
181
|
+
botRules[botKey].allowed = true;
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// Determine blocked/allowed bots
|
|
188
|
+
for (const [botKey] of Object.entries(AI_BOTS)) {
|
|
189
|
+
const rules = botRules[botKey];
|
|
190
|
+
if (rules?.disallowAll && !rules?.allowed) {
|
|
191
|
+
blockedBots.push(botKey);
|
|
192
|
+
} else {
|
|
193
|
+
allowedBots.push(botKey);
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// Generate issues for blocked bots
|
|
198
|
+
if (blockedBots.length === Object.keys(AI_BOTS).length) {
|
|
199
|
+
issues.push({
|
|
200
|
+
...ISSUE_DEFINITIONS.AI_BOT_BLOCKED_ALL,
|
|
201
|
+
affectedUrls: [url],
|
|
202
|
+
details: { blockedBots },
|
|
203
|
+
});
|
|
204
|
+
} else {
|
|
205
|
+
// Individual bot blocking notices
|
|
206
|
+
if (blockedBots.includes('GPTBot') || blockedBots.includes('ChatGPT-User')) {
|
|
207
|
+
issues.push({
|
|
208
|
+
...ISSUE_DEFINITIONS.AI_BOT_GPTBOT_BLOCKED,
|
|
209
|
+
affectedUrls: [url],
|
|
210
|
+
});
|
|
211
|
+
}
|
|
212
|
+
if (blockedBots.includes('ClaudeBot') || blockedBots.includes('Claude-Web') || blockedBots.includes('anthropic-ai')) {
|
|
213
|
+
issues.push({
|
|
214
|
+
...ISSUE_DEFINITIONS.AI_BOT_CLAUDEBOT_BLOCKED,
|
|
215
|
+
affectedUrls: [url],
|
|
216
|
+
});
|
|
217
|
+
}
|
|
218
|
+
if (blockedBots.includes('PerplexityBot')) {
|
|
219
|
+
issues.push({
|
|
220
|
+
...ISSUE_DEFINITIONS.AI_BOT_PERPLEXITY_BLOCKED,
|
|
221
|
+
affectedUrls: [url],
|
|
222
|
+
});
|
|
223
|
+
}
|
|
224
|
+
if (blockedBots.includes('Google-Extended')) {
|
|
225
|
+
issues.push({
|
|
226
|
+
...ISSUE_DEFINITIONS.AI_BOT_GOOGLE_EXTENDED_BLOCKED,
|
|
227
|
+
affectedUrls: [url],
|
|
228
|
+
});
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
return {
|
|
233
|
+
issues,
|
|
234
|
+
data: {
|
|
235
|
+
robotsExists: true,
|
|
236
|
+
blockedBots,
|
|
237
|
+
allowedBots,
|
|
238
|
+
allBlocked: blockedBots.length === Object.keys(AI_BOTS).length,
|
|
239
|
+
},
|
|
240
|
+
};
|
|
241
|
+
} catch (error) {
|
|
242
|
+
return {
|
|
243
|
+
issues,
|
|
244
|
+
data: {
|
|
245
|
+
robotsExists: false,
|
|
246
|
+
blockedBots: [],
|
|
247
|
+
allowedBots: Object.keys(AI_BOTS),
|
|
248
|
+
allBlocked: false,
|
|
249
|
+
},
|
|
250
|
+
};
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
/**
|
|
255
|
+
* Check JavaScript rendering ratio
|
|
256
|
+
* Compares static HTML content size vs rendered content
|
|
257
|
+
*/
|
|
258
|
+
export function checkJSRenderingRatio(
|
|
259
|
+
html: string,
|
|
260
|
+
url: string
|
|
261
|
+
): { issues: AuditIssue[]; data: { ratio: number; staticWordCount: number } } {
|
|
262
|
+
const issues: AuditIssue[] = [];
|
|
263
|
+
const $ = cheerio.load(html);
|
|
264
|
+
|
|
265
|
+
// Get static text content (what AI crawlers would see without JS)
|
|
266
|
+
// Remove script and style tags
|
|
267
|
+
$('script, style, noscript').remove();
|
|
268
|
+
|
|
269
|
+
const staticText = $('body').text().replace(/\s+/g, ' ').trim();
|
|
270
|
+
const staticWordCount = staticText.split(/\s+/).filter((word) => word.length > 0).length;
|
|
271
|
+
|
|
272
|
+
// Check for signs of heavy JS rendering
|
|
273
|
+
const hasReactRoot = $('#root, #app, #__next, [data-reactroot]').length > 0;
|
|
274
|
+
const hasVueApp = $('#app[data-v-app], [data-v-]').length > 0;
|
|
275
|
+
const hasAngularApp = $('[ng-app], [data-ng-app]').length > 0;
|
|
276
|
+
const hasEmptyBody = staticWordCount < 50;
|
|
277
|
+
|
|
278
|
+
// Estimate JS rendering ratio based on signals
|
|
279
|
+
let ratio = 0;
|
|
280
|
+
if (hasEmptyBody && (hasReactRoot || hasVueApp || hasAngularApp)) {
|
|
281
|
+
ratio = 90; // Likely SPA with most content rendered by JS
|
|
282
|
+
} else if (hasReactRoot || hasVueApp || hasAngularApp) {
|
|
283
|
+
ratio = 50; // Has JS framework but some static content
|
|
284
|
+
} else if (staticWordCount < 100) {
|
|
285
|
+
ratio = 60; // Very little static content
|
|
286
|
+
} else {
|
|
287
|
+
ratio = 10; // Mostly static content
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
// Check for noscript fallback
|
|
291
|
+
const hasNoScript = $('noscript').length > 0;
|
|
292
|
+
if (hasNoScript && ratio > 50) {
|
|
293
|
+
ratio -= 20; // Has fallback content
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
if (ratio > 50) {
|
|
297
|
+
issues.push({
|
|
298
|
+
...ISSUE_DEFINITIONS.HIGH_JS_RENDERING_RATIO,
|
|
299
|
+
affectedUrls: [url],
|
|
300
|
+
details: { ratio: `${ratio}%`, staticWordCount },
|
|
301
|
+
});
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
return {
|
|
305
|
+
issues,
|
|
306
|
+
data: { ratio, staticWordCount },
|
|
307
|
+
};
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
/**
|
|
311
|
+
* Run all AI readiness checks
|
|
312
|
+
*/
|
|
313
|
+
export async function runAIReadinessChecks(
|
|
314
|
+
baseUrl: string,
|
|
315
|
+
html: string
|
|
316
|
+
): Promise<{ issues: AuditIssue[]; data: AIReadinessData }> {
|
|
317
|
+
const allIssues: AuditIssue[] = [];
|
|
318
|
+
|
|
319
|
+
// Check llms.txt
|
|
320
|
+
const llmsResult = await checkLlmsTxt(baseUrl);
|
|
321
|
+
allIssues.push(...llmsResult.issues);
|
|
322
|
+
|
|
323
|
+
// Check AI bot blocking
|
|
324
|
+
const botResult = await checkAIBotBlocking(baseUrl);
|
|
325
|
+
allIssues.push(...botResult.issues);
|
|
326
|
+
|
|
327
|
+
// Check JS rendering ratio
|
|
328
|
+
const jsResult = checkJSRenderingRatio(html, baseUrl);
|
|
329
|
+
allIssues.push(...jsResult.issues);
|
|
330
|
+
|
|
331
|
+
return {
|
|
332
|
+
issues: allIssues,
|
|
333
|
+
data: {
|
|
334
|
+
llmsTxt: llmsResult.data,
|
|
335
|
+
botBlocking: botResult.data,
|
|
336
|
+
jsRenderingRatio: jsResult.data.ratio,
|
|
337
|
+
},
|
|
338
|
+
};
|
|
339
|
+
}
|