@rankcli/agent-runtime 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. package/README.md +242 -0
  2. package/dist/analyzer-2CSWIQGD.mjs +6 -0
  3. package/dist/chunk-YNZYHEYM.mjs +774 -0
  4. package/dist/index.d.mts +4012 -0
  5. package/dist/index.d.ts +4012 -0
  6. package/dist/index.js +29672 -0
  7. package/dist/index.mjs +28602 -0
  8. package/package.json +53 -0
  9. package/scripts/build-deno.ts +134 -0
  10. package/src/audit/ai/analyzer.ts +347 -0
  11. package/src/audit/ai/index.ts +29 -0
  12. package/src/audit/ai/prompts/content-analysis.ts +271 -0
  13. package/src/audit/ai/types.ts +179 -0
  14. package/src/audit/checks/additional-checks.ts +439 -0
  15. package/src/audit/checks/ai-citation-worthiness.ts +399 -0
  16. package/src/audit/checks/ai-content-structure.ts +325 -0
  17. package/src/audit/checks/ai-readiness.ts +339 -0
  18. package/src/audit/checks/anchor-text.ts +179 -0
  19. package/src/audit/checks/answer-conciseness.ts +322 -0
  20. package/src/audit/checks/asset-minification.ts +270 -0
  21. package/src/audit/checks/bing-optimization.ts +206 -0
  22. package/src/audit/checks/brand-mention-optimization.ts +349 -0
  23. package/src/audit/checks/caching-headers.ts +305 -0
  24. package/src/audit/checks/canonical-advanced.ts +150 -0
  25. package/src/audit/checks/canonical-domain.ts +196 -0
  26. package/src/audit/checks/citation-quality.ts +358 -0
  27. package/src/audit/checks/client-rendering.ts +542 -0
  28. package/src/audit/checks/color-contrast.ts +342 -0
  29. package/src/audit/checks/content-freshness.ts +170 -0
  30. package/src/audit/checks/content-science.ts +589 -0
  31. package/src/audit/checks/conversion-elements.ts +526 -0
  32. package/src/audit/checks/crawlability.ts +220 -0
  33. package/src/audit/checks/directory-listing.ts +172 -0
  34. package/src/audit/checks/dom-analysis.ts +191 -0
  35. package/src/audit/checks/dom-size.ts +246 -0
  36. package/src/audit/checks/duplicate-content.ts +194 -0
  37. package/src/audit/checks/eeat-signals.ts +990 -0
  38. package/src/audit/checks/entity-seo.ts +396 -0
  39. package/src/audit/checks/featured-snippet.ts +473 -0
  40. package/src/audit/checks/freshness-signals.ts +443 -0
  41. package/src/audit/checks/funnel-intent.ts +463 -0
  42. package/src/audit/checks/hreflang.ts +174 -0
  43. package/src/audit/checks/html-compliance.ts +302 -0
  44. package/src/audit/checks/image-dimensions.ts +167 -0
  45. package/src/audit/checks/images.ts +160 -0
  46. package/src/audit/checks/indexnow.ts +275 -0
  47. package/src/audit/checks/interactive-tools.ts +475 -0
  48. package/src/audit/checks/internal-link-graph.ts +436 -0
  49. package/src/audit/checks/keyword-analysis.ts +239 -0
  50. package/src/audit/checks/keyword-cannibalization.ts +385 -0
  51. package/src/audit/checks/keyword-placement.ts +471 -0
  52. package/src/audit/checks/links.ts +203 -0
  53. package/src/audit/checks/llms-txt.ts +224 -0
  54. package/src/audit/checks/local-seo.ts +296 -0
  55. package/src/audit/checks/mobile.ts +167 -0
  56. package/src/audit/checks/modern-images.ts +226 -0
  57. package/src/audit/checks/navboost-signals.ts +395 -0
  58. package/src/audit/checks/on-page.ts +209 -0
  59. package/src/audit/checks/page-resources.ts +285 -0
  60. package/src/audit/checks/pagination.ts +180 -0
  61. package/src/audit/checks/performance.ts +153 -0
  62. package/src/audit/checks/platform-presence.ts +580 -0
  63. package/src/audit/checks/redirect-analysis.ts +153 -0
  64. package/src/audit/checks/redirect-chain.ts +389 -0
  65. package/src/audit/checks/resource-hints.ts +420 -0
  66. package/src/audit/checks/responsive-css.ts +247 -0
  67. package/src/audit/checks/responsive-images.ts +396 -0
  68. package/src/audit/checks/review-ecosystem.ts +415 -0
  69. package/src/audit/checks/robots-validation.ts +373 -0
  70. package/src/audit/checks/security-headers.ts +172 -0
  71. package/src/audit/checks/security.ts +144 -0
  72. package/src/audit/checks/serp-preview.ts +251 -0
  73. package/src/audit/checks/site-maturity.ts +444 -0
  74. package/src/audit/checks/social-meta.test.ts +275 -0
  75. package/src/audit/checks/social-meta.ts +134 -0
  76. package/src/audit/checks/soft-404.ts +151 -0
  77. package/src/audit/checks/structured-data.ts +238 -0
  78. package/src/audit/checks/tech-detection.ts +496 -0
  79. package/src/audit/checks/topical-clusters.ts +435 -0
  80. package/src/audit/checks/tracker-bloat.ts +462 -0
  81. package/src/audit/checks/tracking-verification.test.ts +371 -0
  82. package/src/audit/checks/tracking-verification.ts +636 -0
  83. package/src/audit/checks/url-safety.ts +682 -0
  84. package/src/audit/deno-entry.ts +66 -0
  85. package/src/audit/discovery/index.ts +15 -0
  86. package/src/audit/discovery/link-crawler.ts +232 -0
  87. package/src/audit/discovery/repo-routes.ts +347 -0
  88. package/src/audit/engine.ts +620 -0
  89. package/src/audit/fixes/index.ts +209 -0
  90. package/src/audit/fixes/social-meta-fixes.test.ts +329 -0
  91. package/src/audit/fixes/social-meta-fixes.ts +463 -0
  92. package/src/audit/index.ts +74 -0
  93. package/src/audit/runner.test.ts +299 -0
  94. package/src/audit/runner.ts +130 -0
  95. package/src/audit/types.ts +1953 -0
  96. package/src/content/featured-snippet.ts +367 -0
  97. package/src/content/generator.test.ts +534 -0
  98. package/src/content/generator.ts +501 -0
  99. package/src/content/headline.ts +317 -0
  100. package/src/content/index.ts +62 -0
  101. package/src/content/intent.ts +258 -0
  102. package/src/content/keyword-density.ts +349 -0
  103. package/src/content/readability.ts +262 -0
  104. package/src/executor.ts +336 -0
  105. package/src/fixer.ts +416 -0
  106. package/src/frameworks/detector.test.ts +248 -0
  107. package/src/frameworks/detector.ts +371 -0
  108. package/src/frameworks/index.ts +68 -0
  109. package/src/frameworks/recipes/angular.yaml +171 -0
  110. package/src/frameworks/recipes/astro.yaml +206 -0
  111. package/src/frameworks/recipes/django.yaml +180 -0
  112. package/src/frameworks/recipes/laravel.yaml +137 -0
  113. package/src/frameworks/recipes/nextjs.yaml +268 -0
  114. package/src/frameworks/recipes/nuxt.yaml +175 -0
  115. package/src/frameworks/recipes/rails.yaml +188 -0
  116. package/src/frameworks/recipes/react.yaml +202 -0
  117. package/src/frameworks/recipes/sveltekit.yaml +154 -0
  118. package/src/frameworks/recipes/vue.yaml +137 -0
  119. package/src/frameworks/recipes/wordpress.yaml +209 -0
  120. package/src/frameworks/suggestion-engine.ts +320 -0
  121. package/src/geo/geo-content.test.ts +305 -0
  122. package/src/geo/geo-content.ts +266 -0
  123. package/src/geo/geo-history.test.ts +473 -0
  124. package/src/geo/geo-history.ts +433 -0
  125. package/src/geo/geo-tracker.test.ts +359 -0
  126. package/src/geo/geo-tracker.ts +411 -0
  127. package/src/geo/index.ts +10 -0
  128. package/src/git/commit-helper.test.ts +261 -0
  129. package/src/git/commit-helper.ts +329 -0
  130. package/src/git/index.ts +12 -0
  131. package/src/git/pr-helper.test.ts +284 -0
  132. package/src/git/pr-helper.ts +307 -0
  133. package/src/index.ts +66 -0
  134. package/src/keywords/ai-keyword-engine.ts +1062 -0
  135. package/src/keywords/ai-summarizer.ts +387 -0
  136. package/src/keywords/ci-mode.ts +555 -0
  137. package/src/keywords/engine.ts +359 -0
  138. package/src/keywords/index.ts +151 -0
  139. package/src/keywords/llm-judge.ts +357 -0
  140. package/src/keywords/nlp-analysis.ts +706 -0
  141. package/src/keywords/prioritizer.ts +295 -0
  142. package/src/keywords/site-crawler.ts +342 -0
  143. package/src/keywords/sources/autocomplete.ts +139 -0
  144. package/src/keywords/sources/competitive-search.ts +450 -0
  145. package/src/keywords/sources/competitor-analysis.ts +374 -0
  146. package/src/keywords/sources/dataforseo.ts +206 -0
  147. package/src/keywords/sources/free-sources.ts +294 -0
  148. package/src/keywords/sources/gsc.ts +123 -0
  149. package/src/keywords/topic-grouping.ts +327 -0
  150. package/src/keywords/types.ts +144 -0
  151. package/src/keywords/wizard.ts +457 -0
  152. package/src/loader.ts +40 -0
  153. package/src/reports/index.ts +7 -0
  154. package/src/reports/report-generator.test.ts +293 -0
  155. package/src/reports/report-generator.ts +713 -0
  156. package/src/scheduler/alerts.test.ts +458 -0
  157. package/src/scheduler/alerts.ts +328 -0
  158. package/src/scheduler/index.ts +8 -0
  159. package/src/scheduler/scheduled-audit.test.ts +377 -0
  160. package/src/scheduler/scheduled-audit.ts +149 -0
  161. package/src/test/integration-test.ts +325 -0
  162. package/src/tools/analyzer.ts +373 -0
  163. package/src/tools/crawl.ts +293 -0
  164. package/src/tools/files.ts +301 -0
  165. package/src/tools/h1-fixer.ts +249 -0
  166. package/src/tools/index.ts +67 -0
  167. package/src/tracking/github-action.ts +326 -0
  168. package/src/tracking/google-analytics.ts +265 -0
  169. package/src/tracking/index.ts +45 -0
  170. package/src/tracking/report-generator.ts +386 -0
  171. package/src/tracking/search-console.ts +335 -0
  172. package/src/types.ts +134 -0
  173. package/src/utils/http.ts +302 -0
  174. package/src/wasm-adapter.ts +297 -0
  175. package/src/wasm-entry.ts +14 -0
  176. package/tsconfig.json +17 -0
  177. package/tsup.wasm.config.ts +26 -0
  178. package/vitest.config.ts +15 -0
@@ -0,0 +1,325 @@
1
+ /**
2
+ * AI Content Structure Checks
3
+ *
4
+ * AI systems prefer well-structured content that's easy to parse and quote.
5
+ * These checks verify content is formatted optimally for AI consumption:
6
+ * - Tables for comparisons/data
7
+ * - Numbered lists for steps/procedures
8
+ * - Bullet points for features/benefits
9
+ * - Clear Q&A format for direct answers
10
+ * - Concise, quotable statements
11
+ */
12
+
13
+ import * as cheerio from 'cheerio';
14
+ import type { AuditIssue } from '../types.js';
15
+
16
+ export interface AIContentStructureData {
17
+ tables: {
18
+ count: number;
19
+ hasComparisonTable: boolean;
20
+ hasDataTable: boolean;
21
+ };
22
+ lists: {
23
+ orderedLists: number;
24
+ unorderedLists: number;
25
+ hasStepByStep: boolean;
26
+ hasBulletedFeatures: boolean;
27
+ };
28
+ qaFormat: {
29
+ hasExplicitQA: boolean;
30
+ questionCount: number;
31
+ hasDirectAnswers: boolean;
32
+ };
33
+ quotability: {
34
+ hasDefinitions: boolean;
35
+ hasConciseStatements: boolean;
36
+ shortParagraphRatio: number;
37
+ };
38
+ structureScore: number;
39
+ }
40
+
41
+ export function analyzeAIContentStructure(
42
+ html: string,
43
+ url: string
44
+ ): { issues: AuditIssue[]; data: AIContentStructureData } {
45
+ const issues: AuditIssue[] = [];
46
+ const $ = cheerio.load(html);
47
+
48
+ // Remove nav, footer, aside, scripts, styles for content analysis
49
+ $('nav, footer, aside, script, style, noscript, header').remove();
50
+
51
+ // Analyze tables
52
+ const tables = $('table');
53
+ const tableCount = tables.length;
54
+ let hasComparisonTable = false;
55
+ let hasDataTable = false;
56
+
57
+ tables.each((_, table) => {
58
+ const $table = $(table);
59
+ const headers = $table.find('th').length;
60
+ const rows = $table.find('tr').length;
61
+ const cells = $table.find('td').length;
62
+
63
+ // Comparison table: multiple columns with headers
64
+ if (headers >= 2 && rows >= 3) {
65
+ hasComparisonTable = true;
66
+ }
67
+
68
+ // Data table: has numeric data
69
+ const cellTexts = $table.find('td').map((_, td) => $(td).text()).get();
70
+ const hasNumbers = cellTexts.some(text => /\d+/.test(text));
71
+ if (hasNumbers && cells >= 4) {
72
+ hasDataTable = true;
73
+ }
74
+ });
75
+
76
+ // Analyze lists
77
+ const orderedLists = $('ol').length;
78
+ const unorderedLists = $('ul').not('nav ul').length;
79
+
80
+ // Check for step-by-step content
81
+ let hasStepByStep = false;
82
+ $('ol').each((_, ol) => {
83
+ const items = $(ol).find('> li').length;
84
+ if (items >= 3) {
85
+ hasStepByStep = true;
86
+ }
87
+ });
88
+
89
+ // Check for bulleted features/benefits
90
+ let hasBulletedFeatures = false;
91
+ $('ul').not('nav ul').each((_, ul) => {
92
+ const items = $(ul).find('> li').length;
93
+ if (items >= 3) {
94
+ hasBulletedFeatures = true;
95
+ }
96
+ });
97
+
98
+ // Analyze Q&A format
99
+ const bodyText = $('body').text().toLowerCase();
100
+ const headings = $('h1, h2, h3, h4, h5, h6');
101
+ let questionCount = 0;
102
+ let hasDirectAnswers = false;
103
+
104
+ headings.each((_, heading) => {
105
+ const text = $(heading).text();
106
+ // Check if heading is a question
107
+ if (text.includes('?') || /^(what|how|why|when|where|who|which|can|does|is|are|should|will)\s/i.test(text)) {
108
+ questionCount++;
109
+
110
+ // Check if followed by a direct answer (short paragraph)
111
+ const nextP = $(heading).next('p');
112
+ if (nextP.length) {
113
+ const answerText = nextP.text();
114
+ if (answerText.length > 20 && answerText.length < 300) {
115
+ hasDirectAnswers = true;
116
+ }
117
+ }
118
+ }
119
+ });
120
+
121
+ // Check for explicit Q&A format (FAQ style)
122
+ const hasExplicitQA =
123
+ $('[itemtype*="FAQPage"]').length > 0 ||
124
+ $('[itemtype*="Question"]').length > 0 ||
125
+ $('.faq, #faq, [class*="faq"], [id*="faq"]').length > 0 ||
126
+ $('details summary').length >= 2 ||
127
+ $('dt').length >= 2; // Definition lists often used for Q&A
128
+
129
+ // Analyze quotability
130
+ const paragraphs = $('p');
131
+ let shortParagraphCount = 0;
132
+ let totalParagraphs = 0;
133
+ let hasDefinitions = false;
134
+ let hasConciseStatements = false;
135
+
136
+ paragraphs.each((_, p) => {
137
+ const text = $(p).text().trim();
138
+ if (text.length < 20) return; // Skip very short paragraphs
139
+
140
+ totalParagraphs++;
141
+
142
+ // Short paragraphs (under 150 chars) are more quotable
143
+ if (text.length < 150) {
144
+ shortParagraphCount++;
145
+ }
146
+
147
+ // Check for definition patterns ("X is a...", "X refers to...")
148
+ if (/^[A-Z][^.]+\s+(is|are|refers to|means|describes)\s+/i.test(text)) {
149
+ hasDefinitions = true;
150
+ }
151
+
152
+ // Concise statements with clear structure
153
+ if (text.length < 200 && (
154
+ text.includes(':') ||
155
+ /^(The|A|An)\s+\w+/.test(text) ||
156
+ /^\d+\.?\s+/.test(text)
157
+ )) {
158
+ hasConciseStatements = true;
159
+ }
160
+ });
161
+
162
+ const shortParagraphRatio = totalParagraphs > 0
163
+ ? shortParagraphCount / totalParagraphs
164
+ : 0;
165
+
166
+ // Calculate structure score (0-100)
167
+ let structureScore = 50; // Base score
168
+
169
+ // Tables bonus
170
+ if (hasComparisonTable) structureScore += 10;
171
+ if (hasDataTable) structureScore += 5;
172
+
173
+ // Lists bonus
174
+ if (hasStepByStep) structureScore += 10;
175
+ if (hasBulletedFeatures) structureScore += 5;
176
+
177
+ // Q&A bonus
178
+ if (hasExplicitQA) structureScore += 10;
179
+ if (questionCount >= 3) structureScore += 5;
180
+ if (hasDirectAnswers) structureScore += 5;
181
+
182
+ // Quotability bonus
183
+ if (hasDefinitions) structureScore += 5;
184
+ if (hasConciseStatements) structureScore += 5;
185
+ if (shortParagraphRatio > 0.3) structureScore += 5;
186
+
187
+ // Penalties
188
+ if (tableCount === 0 && orderedLists === 0 && unorderedLists === 0) {
189
+ structureScore -= 15; // No structured content at all
190
+ }
191
+ if (questionCount === 0) {
192
+ structureScore -= 5; // No question-based headings
193
+ }
194
+
195
+ structureScore = Math.max(0, Math.min(100, structureScore));
196
+
197
+ // Generate issues
198
+
199
+ // No tables for comparisons
200
+ if (!hasComparisonTable && bodyText.includes('compar') || bodyText.includes('vs') || bodyText.includes('versus')) {
201
+ issues.push({
202
+ code: 'AI_NO_COMPARISON_TABLE',
203
+ severity: 'notice',
204
+ category: 'ai-readiness',
205
+ title: 'Comparison content without table format',
206
+ description: 'Your content mentions comparisons but doesn\'t use a table format. AI systems can easily parse and quote table data.',
207
+ impact: 'AI may not accurately extract comparison data, reducing chances of being cited in AI-generated comparisons.',
208
+ howToFix: 'Convert comparison content to HTML tables with clear headers. Example: Feature | Option A | Option B',
209
+ affectedUrls: [url],
210
+ });
211
+ }
212
+
213
+ // No numbered lists for procedural content
214
+ if (!hasStepByStep && (bodyText.includes('step') || bodyText.includes('how to') || bodyText.includes('guide'))) {
215
+ issues.push({
216
+ code: 'AI_NO_NUMBERED_STEPS',
217
+ severity: 'notice',
218
+ category: 'ai-readiness',
219
+ title: 'Procedural content without numbered steps',
220
+ description: 'Your content appears to be a guide or how-to but doesn\'t use numbered lists. AI prefers numbered lists for step-by-step content.',
221
+ impact: 'AI may not accurately quote your steps in order, or may skip your content for better-structured alternatives.',
222
+ howToFix: 'Convert step-by-step instructions to <ol> (ordered list) format with clear, actionable items.',
223
+ affectedUrls: [url],
224
+ });
225
+ }
226
+
227
+ // No Q&A format
228
+ if (!hasExplicitQA && questionCount === 0) {
229
+ issues.push({
230
+ code: 'AI_NO_QA_FORMAT',
231
+ severity: 'notice',
232
+ category: 'ai-readiness',
233
+ title: 'No question-and-answer format detected',
234
+ description: 'Content lacks explicit Q&A structure. AI systems often look for clear question-answer pairs to provide direct responses.',
235
+ impact: 'Lower chance of being featured in AI direct answers or FAQ-style responses.',
236
+ howToFix: 'Add FAQ section with common questions as headings (H2/H3) followed by concise answers. Implement FAQ schema markup.',
237
+ affectedUrls: [url],
238
+ details: {
239
+ suggestions: [
240
+ 'Use question words in headings (What, How, Why, etc.)',
241
+ 'Follow questions with short, direct answer paragraphs',
242
+ 'Add FAQ schema markup for structured data',
243
+ 'Consider using <details>/<summary> for expandable Q&A',
244
+ ],
245
+ },
246
+ });
247
+ }
248
+
249
+ // Questions without direct answers
250
+ if (questionCount >= 2 && !hasDirectAnswers) {
251
+ issues.push({
252
+ code: 'AI_QUESTIONS_NO_DIRECT_ANSWERS',
253
+ severity: 'notice',
254
+ category: 'ai-readiness',
255
+ title: 'Questions in headings lack direct answers',
256
+ description: `Found ${questionCount} question-style headings but answers are too long or not immediately following. AI prefers concise answers right after questions.`,
257
+ impact: 'AI may struggle to extract clear answers, reducing citation likelihood.',
258
+ howToFix: 'Start each answer section with a 1-2 sentence direct answer, then expand with details. First sentence should standalone as a complete answer.',
259
+ affectedUrls: [url],
260
+ });
261
+ }
262
+
263
+ // No definitions
264
+ if (!hasDefinitions && (bodyText.includes('what is') || bodyText.includes('definition'))) {
265
+ issues.push({
266
+ code: 'AI_NO_DEFINITIONS',
267
+ severity: 'notice',
268
+ category: 'ai-readiness',
269
+ title: 'Missing clear definitions',
270
+ description: 'Content discusses concepts but lacks Wikipedia-style definitions. AI loves clear "X is a..." statements.',
271
+ impact: 'Less likely to be cited for definitional queries in AI search.',
272
+ howToFix: 'Add clear definitions early in content: "[Term] is a [category] that [distinguishing features]." Make first paragraph a complete definition.',
273
+ affectedUrls: [url],
274
+ });
275
+ }
276
+
277
+ // Low quotability
278
+ if (structureScore < 50) {
279
+ issues.push({
280
+ code: 'AI_LOW_STRUCTURE_SCORE',
281
+ severity: 'warning',
282
+ category: 'ai-readiness',
283
+ title: 'Content structure not optimized for AI parsing',
284
+ description: `AI structure score: ${structureScore}/100. Content lacks structured elements that AI can easily parse and quote.`,
285
+ impact: 'Lower likelihood of being cited in AI-generated answers due to poor content structure.',
286
+ howToFix: 'Improve content structure: add tables for data, numbered lists for steps, bullet points for features, Q&A sections for common questions.',
287
+ affectedUrls: [url],
288
+ details: {
289
+ structureScore,
290
+ hasTable: tableCount > 0,
291
+ hasOrderedList: orderedLists > 0,
292
+ hasUnorderedList: unorderedLists > 0,
293
+ hasQAFormat: hasExplicitQA || questionCount > 0,
294
+ },
295
+ });
296
+ }
297
+
298
+ return {
299
+ issues,
300
+ data: {
301
+ tables: {
302
+ count: tableCount,
303
+ hasComparisonTable,
304
+ hasDataTable,
305
+ },
306
+ lists: {
307
+ orderedLists,
308
+ unorderedLists,
309
+ hasStepByStep,
310
+ hasBulletedFeatures,
311
+ },
312
+ qaFormat: {
313
+ hasExplicitQA,
314
+ questionCount,
315
+ hasDirectAnswers,
316
+ },
317
+ quotability: {
318
+ hasDefinitions,
319
+ hasConciseStatements,
320
+ shortParagraphRatio,
321
+ },
322
+ structureScore,
323
+ },
324
+ };
325
+ }
@@ -0,0 +1,339 @@
1
+ // AI/LLM Readiness Checks
2
+ // Checks for llms.txt, AI bot blocking, and JS rendering ratio
3
+
4
+ import { httpGet } from '../../utils/http.js';
5
+ import * as cheerio from 'cheerio';
6
+ import type { AuditIssue } from '../types.js';
7
+ import { ISSUE_DEFINITIONS } from '../types.js';
8
+
9
+ // Known AI bot user agents
10
+ const AI_BOTS = {
11
+ GPTBot: 'GPTBot',
12
+ 'ChatGPT-User': 'ChatGPT-User',
13
+ 'OAI-SearchBot': 'OAI-SearchBot',
14
+ ClaudeBot: 'ClaudeBot',
15
+ 'Claude-Web': 'Claude-Web',
16
+ 'anthropic-ai': 'anthropic-ai',
17
+ PerplexityBot: 'PerplexityBot',
18
+ 'Google-Extended': 'Google-Extended',
19
+ Bytespider: 'Bytespider',
20
+ CCBot: 'CCBot',
21
+ };
22
+
23
+ export interface LlmsTxtResult {
24
+ exists: boolean;
25
+ content?: string;
26
+ valid: boolean;
27
+ errors: string[];
28
+ }
29
+
30
+ export interface AIBotBlockingResult {
31
+ robotsExists: boolean;
32
+ blockedBots: string[];
33
+ allowedBots: string[];
34
+ allBlocked: boolean;
35
+ }
36
+
37
+ export interface AIReadinessData {
38
+ llmsTxt: LlmsTxtResult;
39
+ botBlocking: AIBotBlockingResult;
40
+ jsRenderingRatio: number; // 0-100%
41
+ }
42
+
43
+ /**
44
+ * Check for llms.txt file
45
+ * See: https://llmstxt.org/
46
+ */
47
+ export async function checkLlmsTxt(baseUrl: string): Promise<{ issues: AuditIssue[]; data: LlmsTxtResult }> {
48
+ const issues: AuditIssue[] = [];
49
+ const url = new URL('/llms.txt', baseUrl).href;
50
+
51
+ try {
52
+ const response = await httpGet<string>(url, {
53
+ timeout: 10000,
54
+ validateStatus: () => true,
55
+ });
56
+
57
+ if (response.status === 404 || response.status >= 400) {
58
+ issues.push({
59
+ ...ISSUE_DEFINITIONS.LLMS_TXT_MISSING,
60
+ affectedUrls: [url],
61
+ });
62
+ return {
63
+ issues,
64
+ data: { exists: false, valid: false, errors: ['File not found'] },
65
+ };
66
+ }
67
+
68
+ const content = response.data as string;
69
+
70
+ // Basic validation of llms.txt format
71
+ // Expected format starts with # followed by site name, then markdown content
72
+ const lines = content.split('\n').filter((line) => line.trim());
73
+ const errors: string[] = [];
74
+ let valid = true;
75
+
76
+ // Check if it starts with a title (# Site Name)
77
+ if (lines.length === 0) {
78
+ errors.push('File is empty');
79
+ valid = false;
80
+ } else if (!lines[0].startsWith('#')) {
81
+ errors.push('File should start with a markdown heading (# Site Name)');
82
+ valid = false;
83
+ }
84
+
85
+ // Check for common sections
86
+ const hasDescription = content.toLowerCase().includes('## ') || content.length > 50;
87
+ if (!hasDescription) {
88
+ errors.push('File should contain meaningful content describing your site for AI');
89
+ valid = false;
90
+ }
91
+
92
+ if (!valid) {
93
+ issues.push({
94
+ ...ISSUE_DEFINITIONS.LLMS_TXT_INVALID,
95
+ affectedUrls: [url],
96
+ details: { errors },
97
+ });
98
+ }
99
+
100
+ return {
101
+ issues,
102
+ data: { exists: true, content, valid, errors },
103
+ };
104
+ } catch (error) {
105
+ issues.push({
106
+ ...ISSUE_DEFINITIONS.LLMS_TXT_MISSING,
107
+ affectedUrls: [url],
108
+ details: { error: error instanceof Error ? error.message : 'Unknown error' },
109
+ });
110
+ return {
111
+ issues,
112
+ data: { exists: false, valid: false, errors: ['Failed to fetch'] },
113
+ };
114
+ }
115
+ }
116
+
117
+ /**
118
+ * Check robots.txt for AI bot blocking
119
+ */
120
+ export async function checkAIBotBlocking(baseUrl: string): Promise<{ issues: AuditIssue[]; data: AIBotBlockingResult }> {
121
+ const issues: AuditIssue[] = [];
122
+ const url = new URL('/robots.txt', baseUrl).href;
123
+
124
+ const blockedBots: string[] = [];
125
+ const allowedBots: string[] = [];
126
+
127
+ try {
128
+ const response = await httpGet<string>(url, {
129
+ timeout: 10000,
130
+ validateStatus: () => true,
131
+ });
132
+
133
+ if (response.status === 404 || response.status >= 400) {
134
+ // No robots.txt means all bots are allowed
135
+ return {
136
+ issues,
137
+ data: {
138
+ robotsExists: false,
139
+ blockedBots: [],
140
+ allowedBots: Object.keys(AI_BOTS),
141
+ allBlocked: false,
142
+ },
143
+ };
144
+ }
145
+
146
+ const content = response.data as string;
147
+ const lines = content.split('\n');
148
+
149
+ // Parse robots.txt for AI bot rules
150
+ let currentUserAgent = '';
151
+ const botRules: Record<string, { allowed: boolean; disallowAll: boolean }> = {};
152
+
153
+ for (const line of lines) {
154
+ const trimmed = line.trim().toLowerCase();
155
+
156
+ if (trimmed.startsWith('user-agent:')) {
157
+ currentUserAgent = trimmed.split(':')[1].trim();
158
+ } else if (trimmed.startsWith('disallow:')) {
159
+ const path = trimmed.split(':')[1]?.trim() || '';
160
+
161
+ // Check if this user agent matches any AI bot
162
+ for (const [botKey, botName] of Object.entries(AI_BOTS)) {
163
+ if (currentUserAgent === '*' || currentUserAgent === botKey.toLowerCase() || currentUserAgent === botName.toLowerCase()) {
164
+ if (path === '/' || path === '/*') {
165
+ if (!botRules[botKey]) {
166
+ botRules[botKey] = { allowed: false, disallowAll: false };
167
+ }
168
+ if (currentUserAgent !== '*' || (currentUserAgent === '*' && !botRules[botKey].allowed)) {
169
+ botRules[botKey].disallowAll = true;
170
+ }
171
+ }
172
+ }
173
+ }
174
+ } else if (trimmed.startsWith('allow:')) {
175
+ // Explicit allow for a bot
176
+ for (const [botKey] of Object.entries(AI_BOTS)) {
177
+ if (currentUserAgent === botKey.toLowerCase()) {
178
+ if (!botRules[botKey]) {
179
+ botRules[botKey] = { allowed: true, disallowAll: false };
180
+ }
181
+ botRules[botKey].allowed = true;
182
+ }
183
+ }
184
+ }
185
+ }
186
+
187
+ // Determine blocked/allowed bots
188
+ for (const [botKey] of Object.entries(AI_BOTS)) {
189
+ const rules = botRules[botKey];
190
+ if (rules?.disallowAll && !rules?.allowed) {
191
+ blockedBots.push(botKey);
192
+ } else {
193
+ allowedBots.push(botKey);
194
+ }
195
+ }
196
+
197
+ // Generate issues for blocked bots
198
+ if (blockedBots.length === Object.keys(AI_BOTS).length) {
199
+ issues.push({
200
+ ...ISSUE_DEFINITIONS.AI_BOT_BLOCKED_ALL,
201
+ affectedUrls: [url],
202
+ details: { blockedBots },
203
+ });
204
+ } else {
205
+ // Individual bot blocking notices
206
+ if (blockedBots.includes('GPTBot') || blockedBots.includes('ChatGPT-User')) {
207
+ issues.push({
208
+ ...ISSUE_DEFINITIONS.AI_BOT_GPTBOT_BLOCKED,
209
+ affectedUrls: [url],
210
+ });
211
+ }
212
+ if (blockedBots.includes('ClaudeBot') || blockedBots.includes('Claude-Web') || blockedBots.includes('anthropic-ai')) {
213
+ issues.push({
214
+ ...ISSUE_DEFINITIONS.AI_BOT_CLAUDEBOT_BLOCKED,
215
+ affectedUrls: [url],
216
+ });
217
+ }
218
+ if (blockedBots.includes('PerplexityBot')) {
219
+ issues.push({
220
+ ...ISSUE_DEFINITIONS.AI_BOT_PERPLEXITY_BLOCKED,
221
+ affectedUrls: [url],
222
+ });
223
+ }
224
+ if (blockedBots.includes('Google-Extended')) {
225
+ issues.push({
226
+ ...ISSUE_DEFINITIONS.AI_BOT_GOOGLE_EXTENDED_BLOCKED,
227
+ affectedUrls: [url],
228
+ });
229
+ }
230
+ }
231
+
232
+ return {
233
+ issues,
234
+ data: {
235
+ robotsExists: true,
236
+ blockedBots,
237
+ allowedBots,
238
+ allBlocked: blockedBots.length === Object.keys(AI_BOTS).length,
239
+ },
240
+ };
241
+ } catch (error) {
242
+ return {
243
+ issues,
244
+ data: {
245
+ robotsExists: false,
246
+ blockedBots: [],
247
+ allowedBots: Object.keys(AI_BOTS),
248
+ allBlocked: false,
249
+ },
250
+ };
251
+ }
252
+ }
253
+
254
+ /**
255
+ * Check JavaScript rendering ratio
256
+ * Compares static HTML content size vs rendered content
257
+ */
258
+ export function checkJSRenderingRatio(
259
+ html: string,
260
+ url: string
261
+ ): { issues: AuditIssue[]; data: { ratio: number; staticWordCount: number } } {
262
+ const issues: AuditIssue[] = [];
263
+ const $ = cheerio.load(html);
264
+
265
+ // Get static text content (what AI crawlers would see without JS)
266
+ // Remove script and style tags
267
+ $('script, style, noscript').remove();
268
+
269
+ const staticText = $('body').text().replace(/\s+/g, ' ').trim();
270
+ const staticWordCount = staticText.split(/\s+/).filter((word) => word.length > 0).length;
271
+
272
+ // Check for signs of heavy JS rendering
273
+ const hasReactRoot = $('#root, #app, #__next, [data-reactroot]').length > 0;
274
+ const hasVueApp = $('#app[data-v-app], [data-v-]').length > 0;
275
+ const hasAngularApp = $('[ng-app], [data-ng-app]').length > 0;
276
+ const hasEmptyBody = staticWordCount < 50;
277
+
278
+ // Estimate JS rendering ratio based on signals
279
+ let ratio = 0;
280
+ if (hasEmptyBody && (hasReactRoot || hasVueApp || hasAngularApp)) {
281
+ ratio = 90; // Likely SPA with most content rendered by JS
282
+ } else if (hasReactRoot || hasVueApp || hasAngularApp) {
283
+ ratio = 50; // Has JS framework but some static content
284
+ } else if (staticWordCount < 100) {
285
+ ratio = 60; // Very little static content
286
+ } else {
287
+ ratio = 10; // Mostly static content
288
+ }
289
+
290
+ // Check for noscript fallback
291
+ const hasNoScript = $('noscript').length > 0;
292
+ if (hasNoScript && ratio > 50) {
293
+ ratio -= 20; // Has fallback content
294
+ }
295
+
296
+ if (ratio > 50) {
297
+ issues.push({
298
+ ...ISSUE_DEFINITIONS.HIGH_JS_RENDERING_RATIO,
299
+ affectedUrls: [url],
300
+ details: { ratio: `${ratio}%`, staticWordCount },
301
+ });
302
+ }
303
+
304
+ return {
305
+ issues,
306
+ data: { ratio, staticWordCount },
307
+ };
308
+ }
309
+
310
+ /**
311
+ * Run all AI readiness checks
312
+ */
313
+ export async function runAIReadinessChecks(
314
+ baseUrl: string,
315
+ html: string
316
+ ): Promise<{ issues: AuditIssue[]; data: AIReadinessData }> {
317
+ const allIssues: AuditIssue[] = [];
318
+
319
+ // Check llms.txt
320
+ const llmsResult = await checkLlmsTxt(baseUrl);
321
+ allIssues.push(...llmsResult.issues);
322
+
323
+ // Check AI bot blocking
324
+ const botResult = await checkAIBotBlocking(baseUrl);
325
+ allIssues.push(...botResult.issues);
326
+
327
+ // Check JS rendering ratio
328
+ const jsResult = checkJSRenderingRatio(html, baseUrl);
329
+ allIssues.push(...jsResult.issues);
330
+
331
+ return {
332
+ issues: allIssues,
333
+ data: {
334
+ llmsTxt: llmsResult.data,
335
+ botBlocking: botResult.data,
336
+ jsRenderingRatio: jsResult.data.ratio,
337
+ },
338
+ };
339
+ }