@rankcli/agent-runtime 0.0.8 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +90 -196
  2. package/dist/analyzer-GMURJADU.mjs +7 -0
  3. package/dist/chunk-2JADKV3Z.mjs +244 -0
  4. package/dist/chunk-3ZSCLNTW.mjs +557 -0
  5. package/dist/chunk-4E4MQOSP.mjs +374 -0
  6. package/dist/chunk-6BWS3CLP.mjs +16 -0
  7. package/dist/chunk-AK2IC22C.mjs +206 -0
  8. package/dist/chunk-K6VSXDD6.mjs +293 -0
  9. package/dist/chunk-M27NQCWW.mjs +303 -0
  10. package/dist/{chunk-YNZYHEYM.mjs → chunk-PJLNXOLN.mjs} +0 -14
  11. package/dist/chunk-VSQD74I7.mjs +474 -0
  12. package/dist/core-web-vitals-analyzer-TE6LQJMS.mjs +7 -0
  13. package/dist/geo-analyzer-D47LTMMA.mjs +25 -0
  14. package/dist/image-optimization-analyzer-XP4OQGRP.mjs +9 -0
  15. package/dist/index.d.mts +1523 -17
  16. package/dist/index.d.ts +1523 -17
  17. package/dist/index.js +9582 -2664
  18. package/dist/index.mjs +4812 -380
  19. package/dist/internal-linking-analyzer-MRMBV7NM.mjs +9 -0
  20. package/dist/mobile-seo-analyzer-67HNQ7IO.mjs +7 -0
  21. package/dist/security-headers-analyzer-3ZUQARS5.mjs +9 -0
  22. package/dist/structured-data-analyzer-2I4NQAUP.mjs +9 -0
  23. package/package.json +2 -2
  24. package/src/analyzers/core-web-vitals-analyzer.test.ts +236 -0
  25. package/src/analyzers/core-web-vitals-analyzer.ts +557 -0
  26. package/src/analyzers/geo-analyzer.test.ts +310 -0
  27. package/src/analyzers/geo-analyzer.ts +814 -0
  28. package/src/analyzers/image-optimization-analyzer.test.ts +145 -0
  29. package/src/analyzers/image-optimization-analyzer.ts +348 -0
  30. package/src/analyzers/index.ts +233 -0
  31. package/src/analyzers/internal-linking-analyzer.test.ts +141 -0
  32. package/src/analyzers/internal-linking-analyzer.ts +419 -0
  33. package/src/analyzers/mobile-seo-analyzer.test.ts +140 -0
  34. package/src/analyzers/mobile-seo-analyzer.ts +455 -0
  35. package/src/analyzers/security-headers-analyzer.test.ts +115 -0
  36. package/src/analyzers/security-headers-analyzer.ts +318 -0
  37. package/src/analyzers/structured-data-analyzer.test.ts +210 -0
  38. package/src/analyzers/structured-data-analyzer.ts +590 -0
  39. package/src/audit/engine.ts +3 -3
  40. package/src/audit/types.ts +3 -2
  41. package/src/fixer/framework-fixes.test.ts +489 -0
  42. package/src/fixer/framework-fixes.ts +3418 -0
  43. package/src/fixer/index.ts +1 -0
  44. package/src/fixer/schemas.ts +971 -0
  45. package/src/frameworks/detector.ts +642 -114
  46. package/src/frameworks/suggestion-engine.ts +38 -1
  47. package/src/index.ts +6 -0
  48. package/src/types.ts +15 -1
  49. package/dist/analyzer-2CSWIQGD.mjs +0 -6
@@ -0,0 +1,814 @@
1
+ /**
2
+ * GEO (Generative Engine Optimization) Analyzer
3
+ *
4
+ * Industry-first analyzer for AI search visibility.
5
+ * Checks if your site is optimized for ChatGPT, Perplexity, Claude, and other AI search engines.
6
+ *
7
+ * Features:
8
+ * - AI crawler accessibility detection
9
+ * - LLM-friendly content structure analysis
10
+ * - Citation-optimized formatting
11
+ * - Entity and fact extraction readiness
12
+ * - AI search ranking factors
13
+ */
14
+
15
+ import * as cheerio from 'cheerio';
16
+ import type { AuditIssue, IssueSeverity, IssueCategory } from '../audit/types.js';
17
+
18
+ export interface GEOAnalysisResult {
19
+ score: number; // 0-100 GEO readiness score
20
+ aiCrawlerAccess: AIAccessResult;
21
+ contentStructure: ContentStructureResult;
22
+ citationReadiness: CitationResult;
23
+ entityExtraction: EntityResult;
24
+ llmSignals: LLMSignalResult;
25
+ issues: AuditIssue[];
26
+ recommendations: string[];
27
+ }
28
+
29
+ export interface AIAccessResult {
30
+ robotsTxtAllowsAI: boolean;
31
+ blockedCrawlers: string[];
32
+ allowedCrawlers: string[];
33
+ hasPrerendering: boolean;
34
+ jsRenderingRequired: boolean;
35
+ serverSideRendered: boolean;
36
+ }
37
+
38
+ export interface ContentStructureResult {
39
+ hasStructuredData: boolean;
40
+ hasFAQSchema: boolean;
41
+ hasHowToSchema: boolean;
42
+ hasArticleSchema: boolean;
43
+ hasProductSchema: boolean;
44
+ hasBreadcrumbs: boolean;
45
+ headingHierarchy: 'good' | 'needs-work' | 'poor';
46
+ contentSections: number;
47
+ listsAndTables: number;
48
+ }
49
+
50
+ export interface CitationResult {
51
+ hasCitations: boolean;
52
+ hasExternalLinks: boolean;
53
+ hasStatistics: boolean;
54
+ hasQuotes: boolean;
55
+ hasAuthorInfo: boolean;
56
+ hasPublishDate: boolean;
57
+ hasLastModified: boolean;
58
+ trustSignals: string[];
59
+ }
60
+
61
+ export interface EntityResult {
62
+ namedEntities: string[];
63
+ definedTerms: string[];
64
+ hasDefinitions: boolean;
65
+ hasComparisons: boolean;
66
+ questionAnswerPairs: number;
67
+ }
68
+
69
+ export interface LLMSignalResult {
70
+ contentClarity: number; // 0-100
71
+ factDensity: number; // 0-100
72
+ structureQuality: number; // 0-100
73
+ citationQuality: number; // 0-100
74
+ overallLLMFriendliness: number; // 0-100
75
+ }
76
+
77
+ // Known AI crawlers and their user agents
78
+ const AI_CRAWLERS = {
79
+ // OpenAI
80
+ GPTBot: { userAgent: 'GPTBot', company: 'OpenAI', purpose: 'ChatGPT training & browsing' },
81
+ ChatGPTUser: { userAgent: 'ChatGPT-User', company: 'OpenAI', purpose: 'ChatGPT browsing feature' },
82
+ OAI_SearchBot: { userAgent: 'OAI-SearchBot', company: 'OpenAI', purpose: 'OpenAI search features' },
83
+
84
+ // Anthropic
85
+ ClaudeBot: { userAgent: 'Claude-Web', company: 'Anthropic', purpose: 'Claude web access' },
86
+ anthropic_ai: { userAgent: 'anthropic-ai', company: 'Anthropic', purpose: 'Claude training' },
87
+
88
+ // Perplexity
89
+ PerplexityBot: { userAgent: 'PerplexityBot', company: 'Perplexity', purpose: 'Perplexity AI search' },
90
+
91
+ // Google AI
92
+ GoogleOther: { userAgent: 'Google-Extended', company: 'Google', purpose: 'Bard/Gemini training' },
93
+
94
+ // Microsoft/Bing
95
+ Bingbot: { userAgent: 'bingbot', company: 'Microsoft', purpose: 'Bing Chat/Copilot' },
96
+
97
+ // Meta
98
+ FacebookBot: { userAgent: 'FacebookBot', company: 'Meta', purpose: 'Meta AI features' },
99
+ MetaAI: { userAgent: 'meta-externalagent', company: 'Meta', purpose: 'Meta AI training' },
100
+
101
+ // Apple
102
+ Applebot: { userAgent: 'Applebot-Extended', company: 'Apple', purpose: 'Apple Intelligence' },
103
+
104
+ // Amazon
105
+ Amazonbot: { userAgent: 'Amazonbot', company: 'Amazon', purpose: 'Alexa/Amazon AI' },
106
+
107
+ // Others
108
+ YouBot: { userAgent: 'YouBot', company: 'You.com', purpose: 'You.com AI search' },
109
+ CCBot: { userAgent: 'CCBot', company: 'Common Crawl', purpose: 'Training data' },
110
+ cohere_ai: { userAgent: 'cohere-ai', company: 'Cohere', purpose: 'Cohere training' },
111
+ };
112
+
113
+ /**
114
+ * Analyze robots.txt for AI crawler permissions
115
+ */
116
+ export function analyzeRobotsTxtForAI(robotsTxt: string): {
117
+ allowed: string[];
118
+ blocked: string[];
119
+ recommendations: string[];
120
+ } {
121
+ const allowed: string[] = [];
122
+ const blocked: string[] = [];
123
+ const recommendations: string[] = [];
124
+
125
+ const lines = robotsTxt.split('\n').map(l => l.trim().toLowerCase());
126
+ let currentAgent = '*';
127
+
128
+ for (const line of lines) {
129
+ if (line.startsWith('user-agent:')) {
130
+ currentAgent = line.replace('user-agent:', '').trim();
131
+ } else if (line.startsWith('disallow:')) {
132
+ const path = line.replace('disallow:', '').trim();
133
+ if (path === '/' || path === '/*') {
134
+ // Check if this applies to AI crawlers
135
+ for (const [name, info] of Object.entries(AI_CRAWLERS)) {
136
+ if (currentAgent === '*' || currentAgent.includes(info.userAgent.toLowerCase())) {
137
+ if (!blocked.includes(name)) blocked.push(name);
138
+ }
139
+ }
140
+ }
141
+ } else if (line.startsWith('allow:')) {
142
+ for (const [name, info] of Object.entries(AI_CRAWLERS)) {
143
+ if (currentAgent === '*' || currentAgent.includes(info.userAgent.toLowerCase())) {
144
+ if (!allowed.includes(name)) allowed.push(name);
145
+ }
146
+ }
147
+ }
148
+ }
149
+
150
+ // Check for explicit AI crawler rules
151
+ for (const [name, info] of Object.entries(AI_CRAWLERS)) {
152
+ const hasExplicitRule = lines.some(l =>
153
+ l.includes(info.userAgent.toLowerCase())
154
+ );
155
+
156
+ if (!hasExplicitRule && !blocked.includes(name) && !allowed.includes(name)) {
157
+ // No explicit rule, defaults to allowed
158
+ allowed.push(name);
159
+ }
160
+ }
161
+
162
+ // Generate recommendations
163
+ if (blocked.includes('GPTBot')) {
164
+ recommendations.push('⚠️ GPTBot is blocked - your content won\'t appear in ChatGPT responses');
165
+ }
166
+ if (blocked.includes('PerplexityBot')) {
167
+ recommendations.push('⚠️ PerplexityBot is blocked - missing Perplexity AI search visibility');
168
+ }
169
+ if (blocked.includes('ClaudeBot')) {
170
+ recommendations.push('⚠️ Claude-Web is blocked - no visibility in Claude AI responses');
171
+ }
172
+
173
+ if (allowed.length === 0) {
174
+ recommendations.push('🚨 All AI crawlers appear to be blocked - zero AI search visibility');
175
+ }
176
+
177
+ return { allowed, blocked, recommendations };
178
+ }
179
+
180
+ /**
181
+ * Detect if page is server-side rendered or requires JS
182
+ */
183
+ export function detectRenderingMode(html: string): {
184
+ serverSideRendered: boolean;
185
+ jsRenderingRequired: boolean;
186
+ hasPrerendering: boolean;
187
+ contentInHTML: boolean;
188
+ signals: string[];
189
+ } {
190
+ const $ = cheerio.load(html);
191
+ const signals: string[] = [];
192
+
193
+ // Check for SSR/SSG indicators
194
+ const hasNextData = html.includes('__NEXT_DATA__');
195
+ const hasNuxtData = html.includes('__NUXT__');
196
+ const hasGatsbyData = html.includes('___gatsby');
197
+ const hasAstroIsland = html.includes('astro-island');
198
+
199
+ // Check for SPA indicators (minimal server content)
200
+ const bodyText = $('body').text().replace(/\s+/g, ' ').trim();
201
+ const hasMinimalContent = bodyText.length < 200;
202
+ const hasRootDiv = $('#root, #app, #__next, #__nuxt').length > 0;
203
+ const hasReactRoot = $('[data-reactroot]').length > 0;
204
+
205
+ // Check for prerender hints
206
+ const hasPrerender = $('meta[name="prerender-status-code"]').length > 0 ||
207
+ $('meta[name="fragment"]').attr('content') === '!';
208
+
209
+ // Check for actual content
210
+ const paragraphs = $('p').length;
211
+ const headings = $('h1, h2, h3, h4, h5, h6').length;
212
+ const hasSubstantialContent = paragraphs > 2 || headings > 2;
213
+
214
+ // Determine rendering mode
215
+ let serverSideRendered = false;
216
+ let jsRenderingRequired = false;
217
+
218
+ if (hasNextData || hasNuxtData || hasGatsbyData || hasAstroIsland) {
219
+ serverSideRendered = true;
220
+ signals.push('Meta-framework SSR detected');
221
+ }
222
+
223
+ if (hasMinimalContent && hasRootDiv) {
224
+ jsRenderingRequired = true;
225
+ signals.push('SPA with minimal server content detected');
226
+ }
227
+
228
+ if (hasSubstantialContent) {
229
+ serverSideRendered = true;
230
+ signals.push('Substantial HTML content present');
231
+ }
232
+
233
+ if (hasReactRoot && !hasNextData && hasMinimalContent) {
234
+ jsRenderingRequired = true;
235
+ signals.push('Client-side React app detected');
236
+ }
237
+
238
+ return {
239
+ serverSideRendered,
240
+ jsRenderingRequired,
241
+ hasPrerendering: hasPrerender,
242
+ contentInHTML: hasSubstantialContent,
243
+ signals,
244
+ };
245
+ }
246
+
247
+ /**
248
+ * Analyze content structure for LLM consumption
249
+ */
250
+ export function analyzeContentStructure(html: string): ContentStructureResult {
251
+ const $ = cheerio.load(html);
252
+
253
+ // Check for structured data
254
+ const jsonLdScripts = $('script[type="application/ld+json"]');
255
+ let hasStructuredData = jsonLdScripts.length > 0;
256
+ let hasFAQSchema = false;
257
+ let hasHowToSchema = false;
258
+ let hasArticleSchema = false;
259
+ let hasProductSchema = false;
260
+
261
+ jsonLdScripts.each((_, el) => {
262
+ try {
263
+ const content = $(el).html() || '';
264
+ if (content.includes('"FAQPage"') || content.includes('"@type":"FAQPage"')) {
265
+ hasFAQSchema = true;
266
+ }
267
+ if (content.includes('"HowTo"') || content.includes('"@type":"HowTo"')) {
268
+ hasHowToSchema = true;
269
+ }
270
+ if (content.includes('"Article"') || content.includes('"BlogPosting"') ||
271
+ content.includes('"NewsArticle"')) {
272
+ hasArticleSchema = true;
273
+ }
274
+ if (content.includes('"Product"')) {
275
+ hasProductSchema = true;
276
+ }
277
+ } catch {}
278
+ });
279
+
280
+ // Check breadcrumbs
281
+ const hasBreadcrumbs = $('[itemtype*="BreadcrumbList"], .breadcrumb, .breadcrumbs, nav[aria-label*="breadcrumb"]').length > 0 ||
282
+ html.includes('"BreadcrumbList"');
283
+
284
+ // Analyze heading hierarchy
285
+ const h1Count = $('h1').length;
286
+ const h2Count = $('h2').length;
287
+ const h3Count = $('h3').length;
288
+
289
+ let headingHierarchy: 'good' | 'needs-work' | 'poor' = 'poor';
290
+ if (h1Count === 1 && h2Count >= 2 && h3Count >= 0) {
291
+ headingHierarchy = 'good';
292
+ } else if (h1Count === 1 && h2Count >= 1) {
293
+ headingHierarchy = 'needs-work';
294
+ }
295
+
296
+ // Count content sections
297
+ const contentSections = $('section, article, .section, [class*="section"]').length;
298
+
299
+ // Count lists and tables (LLMs love these)
300
+ const listsAndTables = $('ul, ol, table, dl').length;
301
+
302
+ return {
303
+ hasStructuredData,
304
+ hasFAQSchema,
305
+ hasHowToSchema,
306
+ hasArticleSchema,
307
+ hasProductSchema,
308
+ hasBreadcrumbs,
309
+ headingHierarchy,
310
+ contentSections,
311
+ listsAndTables,
312
+ };
313
+ }
314
+
315
+ /**
316
+ * Analyze citation readiness (trust signals for AI)
317
+ */
318
+ export function analyzeCitationReadiness(html: string): CitationResult {
319
+ const $ = cheerio.load(html);
320
+ const bodyText = $('body').text();
321
+
322
+ // Check for citations
323
+ const hasCitations = $('cite, blockquote, [class*="citation"], [class*="reference"]').length > 0 ||
324
+ /\[\d+\]|\(\d{4}\)/.test(bodyText);
325
+
326
+ // Check for external links (source references)
327
+ const externalLinks = $('a[href^="http"]').filter((_, el) => {
328
+ const href = $(el).attr('href') || '';
329
+ return !href.includes(new URL(href).hostname);
330
+ });
331
+ const hasExternalLinks = externalLinks.length > 0;
332
+
333
+ // Check for statistics
334
+ const hasStatistics = /\d+%|\d+\s*(million|billion|thousand)|increased by \d+|decreased by \d+/i.test(bodyText);
335
+
336
+ // Check for quotes
337
+ const hasQuotes = $('blockquote, q').length > 0 || /"[^"]{20,}"/.test(bodyText);
338
+
339
+ // Check for author info
340
+ const hasAuthorInfo = $('[rel="author"], .author, [class*="author"], [itemtype*="Person"]').length > 0 ||
341
+ $('meta[name="author"]').length > 0;
342
+
343
+ // Check for dates
344
+ const hasPublishDate = $('time[datetime], meta[property="article:published_time"], .publish-date, .date').length > 0;
345
+ const hasLastModified = $('meta[property="article:modified_time"], .updated, .modified').length > 0;
346
+
347
+ // Collect trust signals
348
+ const trustSignals: string[] = [];
349
+ if (hasCitations) trustSignals.push('Citations present');
350
+ if (hasExternalLinks) trustSignals.push('External source links');
351
+ if (hasStatistics) trustSignals.push('Statistical data');
352
+ if (hasQuotes) trustSignals.push('Expert quotes');
353
+ if (hasAuthorInfo) trustSignals.push('Author attribution');
354
+ if (hasPublishDate) trustSignals.push('Publish date');
355
+ if (hasLastModified) trustSignals.push('Last modified date');
356
+
357
+ return {
358
+ hasCitations,
359
+ hasExternalLinks,
360
+ hasStatistics,
361
+ hasQuotes,
362
+ hasAuthorInfo,
363
+ hasPublishDate,
364
+ hasLastModified,
365
+ trustSignals,
366
+ };
367
+ }
368
+
369
+ /**
370
+ * Extract entities and structured information
371
+ */
372
+ export function analyzeEntityExtraction(html: string): EntityResult {
373
+ const $ = cheerio.load(html);
374
+ const bodyText = $('body').text();
375
+
376
+ // Look for defined terms (using <dfn>, <abbr>, or definition patterns)
377
+ const definedTerms: string[] = [];
378
+ $('dfn, abbr[title]').each((_, el) => {
379
+ const term = $(el).text().trim();
380
+ if (term) definedTerms.push(term);
381
+ });
382
+
383
+ // Check for definition patterns
384
+ const hasDefinitions = /is defined as|refers to|means that|is the process of/i.test(bodyText) ||
385
+ $('dfn, .definition, [class*="definition"]').length > 0;
386
+
387
+ // Check for comparisons (great for LLM understanding)
388
+ const hasComparisons = /compared to|versus|vs\.|unlike|similar to|difference between/i.test(bodyText) ||
389
+ $('table[class*="comparison"], .comparison, [class*="versus"]').length > 0;
390
+
391
+ // Count Q&A pairs (excellent for AI)
392
+ let questionAnswerPairs = 0;
393
+ $('details, .faq-item, [class*="question"], [class*="accordion"]').each(() => {
394
+ questionAnswerPairs++;
395
+ });
396
+
397
+ // Also check for question patterns in headings
398
+ $('h2, h3, h4').each((_, el) => {
399
+ const text = $(el).text();
400
+ if (/^(what|how|why|when|where|who|which|can|does|is|are|should|will)\s/i.test(text) ||
401
+ text.endsWith('?')) {
402
+ questionAnswerPairs++;
403
+ }
404
+ });
405
+
406
+ // Extract named entities (basic pattern matching)
407
+ const namedEntities: string[] = [];
408
+
409
+ // Look for proper nouns in strong/b tags (often company/product names)
410
+ $('strong, b').each((_, el) => {
411
+ const text = $(el).text().trim();
412
+ if (text.length > 2 && text.length < 50 && /^[A-Z]/.test(text)) {
413
+ if (!namedEntities.includes(text)) namedEntities.push(text);
414
+ }
415
+ });
416
+
417
+ return {
418
+ namedEntities: namedEntities.slice(0, 20), // Limit to top 20
419
+ definedTerms,
420
+ hasDefinitions,
421
+ hasComparisons,
422
+ questionAnswerPairs,
423
+ };
424
+ }
425
+
426
+ /**
427
+ * Calculate LLM-friendliness signals
428
+ */
429
+ export function calculateLLMSignals(
430
+ structure: ContentStructureResult,
431
+ citation: CitationResult,
432
+ entity: EntityResult,
433
+ html: string
434
+ ): LLMSignalResult {
435
+ const $ = cheerio.load(html);
436
+
437
+ // Content clarity (structure + readability)
438
+ let contentClarity = 0;
439
+ if (structure.headingHierarchy === 'good') contentClarity += 30;
440
+ else if (structure.headingHierarchy === 'needs-work') contentClarity += 15;
441
+ contentClarity += Math.min(structure.listsAndTables * 5, 25); // Lists help clarity
442
+ contentClarity += Math.min(structure.contentSections * 5, 20);
443
+ if (structure.hasBreadcrumbs) contentClarity += 10;
444
+ contentClarity += entity.hasDefinitions ? 15 : 0;
445
+ contentClarity = Math.min(contentClarity, 100);
446
+
447
+ // Fact density (citations, stats, specifics)
448
+ let factDensity = 0;
449
+ if (citation.hasStatistics) factDensity += 25;
450
+ if (citation.hasCitations) factDensity += 20;
451
+ if (citation.hasExternalLinks) factDensity += 15;
452
+ if (citation.hasQuotes) factDensity += 15;
453
+ factDensity += Math.min(entity.questionAnswerPairs * 5, 25);
454
+ factDensity = Math.min(factDensity, 100);
455
+
456
+ // Structure quality (schema, hierarchy)
457
+ let structureQuality = 0;
458
+ if (structure.hasStructuredData) structureQuality += 25;
459
+ if (structure.hasFAQSchema) structureQuality += 20;
460
+ if (structure.hasArticleSchema) structureQuality += 15;
461
+ if (structure.hasHowToSchema) structureQuality += 15;
462
+ if (structure.hasProductSchema) structureQuality += 10;
463
+ structureQuality += structure.headingHierarchy === 'good' ? 15 : 0;
464
+ structureQuality = Math.min(structureQuality, 100);
465
+
466
+ // Citation quality (trust signals)
467
+ let citationQuality = 0;
468
+ citationQuality += citation.trustSignals.length * 12;
469
+ if (citation.hasAuthorInfo) citationQuality += 15;
470
+ if (citation.hasPublishDate && citation.hasLastModified) citationQuality += 15;
471
+ citationQuality = Math.min(citationQuality, 100);
472
+
473
+ // Overall LLM friendliness
474
+ const overallLLMFriendliness = Math.round(
475
+ (contentClarity * 0.25) +
476
+ (factDensity * 0.25) +
477
+ (structureQuality * 0.30) +
478
+ (citationQuality * 0.20)
479
+ );
480
+
481
+ return {
482
+ contentClarity,
483
+ factDensity,
484
+ structureQuality,
485
+ citationQuality,
486
+ overallLLMFriendliness,
487
+ };
488
+ }
489
+
490
+ /**
491
+ * Generate GEO-specific issues
492
+ */
493
+ export function generateGEOIssues(
494
+ aiAccess: AIAccessResult,
495
+ structure: ContentStructureResult,
496
+ citation: CitationResult,
497
+ entity: EntityResult,
498
+ llmSignals: LLMSignalResult,
499
+ url: string
500
+ ): AuditIssue[] {
501
+ const issues: AuditIssue[] = [];
502
+
503
+ // Critical: AI crawler access issues
504
+ if (aiAccess.blockedCrawlers.length > 0) {
505
+ issues.push({
506
+ code: 'GEO_AI_CRAWLERS_BLOCKED',
507
+ severity: 'critical',
508
+ category: 'technical',
509
+ title: `${aiAccess.blockedCrawlers.length} AI crawlers blocked in robots.txt`,
510
+ description: `The following AI crawlers are blocked: ${aiAccess.blockedCrawlers.join(', ')}. This prevents your content from appearing in AI search results.`,
511
+ impact: 'Your content will NOT appear in ChatGPT, Perplexity, or Claude responses',
512
+ howToFix: `Update robots.txt to allow AI crawlers:\n\nUser-agent: GPTBot\nAllow: /\n\nUser-agent: PerplexityBot\nAllow: /\n\nUser-agent: Claude-Web\nAllow: /`,
513
+ affectedUrls: [url],
514
+ });
515
+ }
516
+
517
+ // Critical: JS rendering required without prerendering
518
+ if (aiAccess.jsRenderingRequired && !aiAccess.hasPrerendering) {
519
+ issues.push({
520
+ code: 'GEO_JS_RENDERING_REQUIRED',
521
+ severity: 'critical',
522
+ category: 'technical',
523
+ title: 'JavaScript rendering required - AI crawlers see blank page',
524
+ description: 'Your site requires JavaScript to render content. Most AI crawlers (GPTBot, PerplexityBot) do not execute JavaScript and will see a blank page.',
525
+ impact: 'Zero visibility in AI search - crawlers cannot access your content',
526
+ howToFix: 'Implement Server-Side Rendering (SSR) or Static Site Generation (SSG). For React: use Next.js. For Vue: use Nuxt. Alternatively, implement a prerendering service.',
527
+ affectedUrls: [url],
528
+ });
529
+ }
530
+
531
+ // High: No structured data
532
+ if (!structure.hasStructuredData) {
533
+ issues.push({
534
+ code: 'GEO_NO_STRUCTURED_DATA',
535
+ severity: 'warning',
536
+ category: 'technical',
537
+ title: 'No JSON-LD structured data found',
538
+ description: 'Structured data helps AI systems understand your content\'s context, entities, and relationships.',
539
+ impact: 'AI systems may misunderstand or skip your content',
540
+ howToFix: 'Add JSON-LD structured data. At minimum, include Organization, WebPage, and Article/Product schemas.',
541
+ affectedUrls: [url],
542
+ });
543
+ }
544
+
545
+ // High: No FAQ schema (great for AI)
546
+ if (!structure.hasFAQSchema && entity.questionAnswerPairs > 0) {
547
+ issues.push({
548
+ code: 'GEO_MISSING_FAQ_SCHEMA',
549
+ severity: 'warning',
550
+ category: 'technical',
551
+ title: 'Q&A content without FAQPage schema',
552
+ description: `Found ${entity.questionAnswerPairs} question-answer pairs but no FAQPage schema. FAQ schema is excellent for AI search visibility.`,
553
+ impact: 'Missing opportunity for featured snippets and AI citations',
554
+ howToFix: 'Add FAQPage schema markup for your Q&A content. This dramatically increases chances of being cited by AI.',
555
+ affectedUrls: [url],
556
+ });
557
+ }
558
+
559
+ // Medium: Poor heading hierarchy
560
+ if (structure.headingHierarchy === 'poor') {
561
+ issues.push({
562
+ code: 'GEO_POOR_HEADING_STRUCTURE',
563
+ severity: 'warning',
564
+ category: 'content',
565
+ title: 'Poor heading hierarchy hurts AI understanding',
566
+ description: 'AI systems use heading structure to understand content organization. Your page lacks a clear H1 → H2 → H3 hierarchy.',
567
+ impact: 'AI may struggle to extract key topics and relationships',
568
+ howToFix: 'Structure content with one H1 (main topic), multiple H2s (subtopics), and H3s (details). Use question-format headings where appropriate.',
569
+ affectedUrls: [url],
570
+ });
571
+ }
572
+
573
+ // Medium: No citations or sources
574
+ if (!citation.hasCitations && !citation.hasExternalLinks) {
575
+ issues.push({
576
+ code: 'GEO_NO_CITATIONS',
577
+ severity: 'warning',
578
+ category: 'content',
579
+ title: 'No citations or source references',
580
+ description: 'AI systems prioritize content with verifiable sources and citations. Your page lacks external references.',
581
+ impact: 'Lower trust score in AI ranking - may not be cited as authoritative',
582
+ howToFix: 'Add citations, link to authoritative sources, include statistics with references, and add expert quotes.',
583
+ affectedUrls: [url],
584
+ });
585
+ }
586
+
587
+ // Medium: No author info
588
+ if (!citation.hasAuthorInfo) {
589
+ issues.push({
590
+ code: 'GEO_NO_AUTHOR_INFO',
591
+ severity: 'info',
592
+ category: 'content',
593
+ title: 'Missing author attribution',
594
+ description: 'Author information helps establish E-E-A-T (Experience, Expertise, Authoritativeness, Trustworthiness) for AI systems.',
595
+ impact: 'Lower credibility score in AI ranking algorithms',
596
+ howToFix: 'Add author byline, link to author bio/profile, and consider adding author schema markup.',
597
+ affectedUrls: [url],
598
+ });
599
+ }
600
+
601
+ // Medium: No publish/update dates
602
+ if (!citation.hasPublishDate || !citation.hasLastModified) {
603
+ issues.push({
604
+ code: 'GEO_NO_DATES',
605
+ severity: 'info',
606
+ category: 'content',
607
+ title: 'Missing publish/update dates',
608
+ description: 'AI systems consider content freshness. Pages without dates may be considered outdated.',
609
+ impact: 'May be deprioritized for time-sensitive queries',
610
+ howToFix: 'Add visible publish date and "last updated" date. Include datePublished and dateModified in schema.',
611
+ affectedUrls: [url],
612
+ });
613
+ }
614
+
615
+ // Low LLM friendliness
616
+ if (llmSignals.overallLLMFriendliness < 40) {
617
+ issues.push({
618
+ code: 'GEO_LOW_LLM_SCORE',
619
+ severity: 'warning',
620
+ category: 'content',
621
+ title: `Low AI-friendliness score: ${llmSignals.overallLLMFriendliness}/100`,
622
+ description: 'Your content structure and signals are not optimized for AI consumption. AI systems may struggle to extract and cite your content.',
623
+ impact: 'Significantly reduced chances of appearing in AI search results',
624
+ howToFix: 'Improve content structure (clear headings, lists, tables), add citations and statistics, include FAQ sections, and add comprehensive schema markup.',
625
+ affectedUrls: [url],
626
+ });
627
+ }
628
+
629
+ return issues;
630
+ }
631
+
632
+ /**
633
+ * Main GEO analysis function
634
+ */
635
+ export async function analyzeGEO(
636
+ html: string,
637
+ url: string,
638
+ robotsTxt?: string
639
+ ): Promise<GEOAnalysisResult> {
640
+ // Analyze AI crawler access
641
+ let aiCrawlerAccess: AIAccessResult = {
642
+ robotsTxtAllowsAI: true,
643
+ blockedCrawlers: [],
644
+ allowedCrawlers: Object.keys(AI_CRAWLERS),
645
+ hasPrerendering: false,
646
+ jsRenderingRequired: false,
647
+ serverSideRendered: true,
648
+ };
649
+
650
+ if (robotsTxt) {
651
+ const robotsAnalysis = analyzeRobotsTxtForAI(robotsTxt);
652
+ aiCrawlerAccess.blockedCrawlers = robotsAnalysis.blocked;
653
+ aiCrawlerAccess.allowedCrawlers = robotsAnalysis.allowed;
654
+ aiCrawlerAccess.robotsTxtAllowsAI = robotsAnalysis.blocked.length === 0;
655
+ }
656
+
657
+ // Analyze rendering mode
658
+ const renderingMode = detectRenderingMode(html);
659
+ aiCrawlerAccess.hasPrerendering = renderingMode.hasPrerendering;
660
+ aiCrawlerAccess.jsRenderingRequired = renderingMode.jsRenderingRequired;
661
+ aiCrawlerAccess.serverSideRendered = renderingMode.serverSideRendered;
662
+
663
+ // Analyze content structure
664
+ const contentStructure = analyzeContentStructure(html);
665
+
666
+ // Analyze citation readiness
667
+ const citationReadiness = analyzeCitationReadiness(html);
668
+
669
+ // Analyze entity extraction
670
+ const entityExtraction = analyzeEntityExtraction(html);
671
+
672
+ // Calculate LLM signals
673
+ const llmSignals = calculateLLMSignals(
674
+ contentStructure,
675
+ citationReadiness,
676
+ entityExtraction,
677
+ html
678
+ );
679
+
680
+ // Generate issues
681
+ const issues = generateGEOIssues(
682
+ aiCrawlerAccess,
683
+ contentStructure,
684
+ citationReadiness,
685
+ entityExtraction,
686
+ llmSignals,
687
+ url
688
+ );
689
+
690
+ // Generate recommendations
691
+ const recommendations: string[] = [];
692
+
693
+ if (aiCrawlerAccess.blockedCrawlers.length > 0) {
694
+ recommendations.push('🚨 URGENT: Unblock AI crawlers in robots.txt');
695
+ }
696
+ if (aiCrawlerAccess.jsRenderingRequired) {
697
+ recommendations.push('🚨 URGENT: Implement SSR/SSG for AI crawler access');
698
+ }
699
+ if (!contentStructure.hasFAQSchema && entityExtraction.questionAnswerPairs > 0) {
700
+ recommendations.push('Add FAQPage schema for your Q&A content');
701
+ }
702
+ if (!contentStructure.hasStructuredData) {
703
+ recommendations.push('Add JSON-LD structured data (Article, Organization)');
704
+ }
705
+ if (citationReadiness.trustSignals.length < 3) {
706
+ recommendations.push('Add more trust signals: citations, statistics, author info');
707
+ }
708
+ if (contentStructure.listsAndTables < 2) {
709
+ recommendations.push('Add lists and tables - AI loves structured content');
710
+ }
711
+ if (!entityExtraction.hasDefinitions) {
712
+ recommendations.push('Add clear definitions for key terms');
713
+ }
714
+ if (entityExtraction.questionAnswerPairs < 3) {
715
+ recommendations.push('Add FAQ section with common questions');
716
+ }
717
+
718
+ // Calculate overall GEO score
719
+ let score = llmSignals.overallLLMFriendliness;
720
+
721
+ // Penalties
722
+ if (aiCrawlerAccess.blockedCrawlers.length > 0) {
723
+ score -= aiCrawlerAccess.blockedCrawlers.length * 10;
724
+ }
725
+ if (aiCrawlerAccess.jsRenderingRequired && !aiCrawlerAccess.hasPrerendering) {
726
+ score -= 30;
727
+ }
728
+
729
+ // Bonuses
730
+ if (contentStructure.hasFAQSchema) score += 10;
731
+ if (contentStructure.hasArticleSchema) score += 5;
732
+ if (aiCrawlerAccess.robotsTxtAllowsAI) score += 10;
733
+
734
+ score = Math.max(0, Math.min(100, score));
735
+
736
+ return {
737
+ score: Math.round(score),
738
+ aiCrawlerAccess,
739
+ contentStructure,
740
+ citationReadiness,
741
+ entityExtraction,
742
+ llmSignals,
743
+ issues,
744
+ recommendations,
745
+ };
746
+ }
747
+
748
+ /**
749
+ * Generate AI-optimized robots.txt
750
+ */
751
+ export function generateAIFriendlyRobotsTxt(siteUrl: string): string {
752
+ return `# AI-Optimized robots.txt
753
+ # Generated by RankCLI - https://rankcli.dev
754
+
755
+ # Allow all standard crawlers
756
+ User-agent: *
757
+ Allow: /
758
+ Disallow: /api/
759
+ Disallow: /admin/
760
+ Disallow: /_next/static/
761
+ Disallow: /private/
762
+
763
+ # === AI Search Crawlers ===
764
+ # OpenAI (ChatGPT)
765
+ User-agent: GPTBot
766
+ Allow: /
767
+
768
+ User-agent: ChatGPT-User
769
+ Allow: /
770
+
771
+ User-agent: OAI-SearchBot
772
+ Allow: /
773
+
774
+ # Anthropic (Claude)
775
+ User-agent: Claude-Web
776
+ Allow: /
777
+
778
+ User-agent: anthropic-ai
779
+ Allow: /
780
+
781
+ # Perplexity
782
+ User-agent: PerplexityBot
783
+ Allow: /
784
+
785
+ # Google AI (Gemini/Bard)
786
+ User-agent: Google-Extended
787
+ Allow: /
788
+
789
+ # Microsoft (Copilot)
790
+ User-agent: bingbot
791
+ Allow: /
792
+
793
+ # Apple Intelligence
794
+ User-agent: Applebot-Extended
795
+ Allow: /
796
+
797
+ # Meta AI
798
+ User-agent: meta-externalagent
799
+ Allow: /
800
+
801
+ # You.com
802
+ User-agent: YouBot
803
+ Allow: /
804
+
805
+ # Cohere
806
+ User-agent: cohere-ai
807
+ Allow: /
808
+
809
+ # Sitemap
810
+ Sitemap: ${siteUrl}/sitemap.xml
811
+ `;
812
+ }
813
+
814
+ export const AI_CRAWLERS_INFO = AI_CRAWLERS;