@rankcli/agent-runtime 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. package/README.md +242 -0
  2. package/dist/analyzer-2CSWIQGD.mjs +6 -0
  3. package/dist/chunk-YNZYHEYM.mjs +774 -0
  4. package/dist/index.d.mts +4012 -0
  5. package/dist/index.d.ts +4012 -0
  6. package/dist/index.js +29672 -0
  7. package/dist/index.mjs +28602 -0
  8. package/package.json +53 -0
  9. package/scripts/build-deno.ts +134 -0
  10. package/src/audit/ai/analyzer.ts +347 -0
  11. package/src/audit/ai/index.ts +29 -0
  12. package/src/audit/ai/prompts/content-analysis.ts +271 -0
  13. package/src/audit/ai/types.ts +179 -0
  14. package/src/audit/checks/additional-checks.ts +439 -0
  15. package/src/audit/checks/ai-citation-worthiness.ts +399 -0
  16. package/src/audit/checks/ai-content-structure.ts +325 -0
  17. package/src/audit/checks/ai-readiness.ts +339 -0
  18. package/src/audit/checks/anchor-text.ts +179 -0
  19. package/src/audit/checks/answer-conciseness.ts +322 -0
  20. package/src/audit/checks/asset-minification.ts +270 -0
  21. package/src/audit/checks/bing-optimization.ts +206 -0
  22. package/src/audit/checks/brand-mention-optimization.ts +349 -0
  23. package/src/audit/checks/caching-headers.ts +305 -0
  24. package/src/audit/checks/canonical-advanced.ts +150 -0
  25. package/src/audit/checks/canonical-domain.ts +196 -0
  26. package/src/audit/checks/citation-quality.ts +358 -0
  27. package/src/audit/checks/client-rendering.ts +542 -0
  28. package/src/audit/checks/color-contrast.ts +342 -0
  29. package/src/audit/checks/content-freshness.ts +170 -0
  30. package/src/audit/checks/content-science.ts +589 -0
  31. package/src/audit/checks/conversion-elements.ts +526 -0
  32. package/src/audit/checks/crawlability.ts +220 -0
  33. package/src/audit/checks/directory-listing.ts +172 -0
  34. package/src/audit/checks/dom-analysis.ts +191 -0
  35. package/src/audit/checks/dom-size.ts +246 -0
  36. package/src/audit/checks/duplicate-content.ts +194 -0
  37. package/src/audit/checks/eeat-signals.ts +990 -0
  38. package/src/audit/checks/entity-seo.ts +396 -0
  39. package/src/audit/checks/featured-snippet.ts +473 -0
  40. package/src/audit/checks/freshness-signals.ts +443 -0
  41. package/src/audit/checks/funnel-intent.ts +463 -0
  42. package/src/audit/checks/hreflang.ts +174 -0
  43. package/src/audit/checks/html-compliance.ts +302 -0
  44. package/src/audit/checks/image-dimensions.ts +167 -0
  45. package/src/audit/checks/images.ts +160 -0
  46. package/src/audit/checks/indexnow.ts +275 -0
  47. package/src/audit/checks/interactive-tools.ts +475 -0
  48. package/src/audit/checks/internal-link-graph.ts +436 -0
  49. package/src/audit/checks/keyword-analysis.ts +239 -0
  50. package/src/audit/checks/keyword-cannibalization.ts +385 -0
  51. package/src/audit/checks/keyword-placement.ts +471 -0
  52. package/src/audit/checks/links.ts +203 -0
  53. package/src/audit/checks/llms-txt.ts +224 -0
  54. package/src/audit/checks/local-seo.ts +296 -0
  55. package/src/audit/checks/mobile.ts +167 -0
  56. package/src/audit/checks/modern-images.ts +226 -0
  57. package/src/audit/checks/navboost-signals.ts +395 -0
  58. package/src/audit/checks/on-page.ts +209 -0
  59. package/src/audit/checks/page-resources.ts +285 -0
  60. package/src/audit/checks/pagination.ts +180 -0
  61. package/src/audit/checks/performance.ts +153 -0
  62. package/src/audit/checks/platform-presence.ts +580 -0
  63. package/src/audit/checks/redirect-analysis.ts +153 -0
  64. package/src/audit/checks/redirect-chain.ts +389 -0
  65. package/src/audit/checks/resource-hints.ts +420 -0
  66. package/src/audit/checks/responsive-css.ts +247 -0
  67. package/src/audit/checks/responsive-images.ts +396 -0
  68. package/src/audit/checks/review-ecosystem.ts +415 -0
  69. package/src/audit/checks/robots-validation.ts +373 -0
  70. package/src/audit/checks/security-headers.ts +172 -0
  71. package/src/audit/checks/security.ts +144 -0
  72. package/src/audit/checks/serp-preview.ts +251 -0
  73. package/src/audit/checks/site-maturity.ts +444 -0
  74. package/src/audit/checks/social-meta.test.ts +275 -0
  75. package/src/audit/checks/social-meta.ts +134 -0
  76. package/src/audit/checks/soft-404.ts +151 -0
  77. package/src/audit/checks/structured-data.ts +238 -0
  78. package/src/audit/checks/tech-detection.ts +496 -0
  79. package/src/audit/checks/topical-clusters.ts +435 -0
  80. package/src/audit/checks/tracker-bloat.ts +462 -0
  81. package/src/audit/checks/tracking-verification.test.ts +371 -0
  82. package/src/audit/checks/tracking-verification.ts +636 -0
  83. package/src/audit/checks/url-safety.ts +682 -0
  84. package/src/audit/deno-entry.ts +66 -0
  85. package/src/audit/discovery/index.ts +15 -0
  86. package/src/audit/discovery/link-crawler.ts +232 -0
  87. package/src/audit/discovery/repo-routes.ts +347 -0
  88. package/src/audit/engine.ts +620 -0
  89. package/src/audit/fixes/index.ts +209 -0
  90. package/src/audit/fixes/social-meta-fixes.test.ts +329 -0
  91. package/src/audit/fixes/social-meta-fixes.ts +463 -0
  92. package/src/audit/index.ts +74 -0
  93. package/src/audit/runner.test.ts +299 -0
  94. package/src/audit/runner.ts +130 -0
  95. package/src/audit/types.ts +1953 -0
  96. package/src/content/featured-snippet.ts +367 -0
  97. package/src/content/generator.test.ts +534 -0
  98. package/src/content/generator.ts +501 -0
  99. package/src/content/headline.ts +317 -0
  100. package/src/content/index.ts +62 -0
  101. package/src/content/intent.ts +258 -0
  102. package/src/content/keyword-density.ts +349 -0
  103. package/src/content/readability.ts +262 -0
  104. package/src/executor.ts +336 -0
  105. package/src/fixer.ts +416 -0
  106. package/src/frameworks/detector.test.ts +248 -0
  107. package/src/frameworks/detector.ts +371 -0
  108. package/src/frameworks/index.ts +68 -0
  109. package/src/frameworks/recipes/angular.yaml +171 -0
  110. package/src/frameworks/recipes/astro.yaml +206 -0
  111. package/src/frameworks/recipes/django.yaml +180 -0
  112. package/src/frameworks/recipes/laravel.yaml +137 -0
  113. package/src/frameworks/recipes/nextjs.yaml +268 -0
  114. package/src/frameworks/recipes/nuxt.yaml +175 -0
  115. package/src/frameworks/recipes/rails.yaml +188 -0
  116. package/src/frameworks/recipes/react.yaml +202 -0
  117. package/src/frameworks/recipes/sveltekit.yaml +154 -0
  118. package/src/frameworks/recipes/vue.yaml +137 -0
  119. package/src/frameworks/recipes/wordpress.yaml +209 -0
  120. package/src/frameworks/suggestion-engine.ts +320 -0
  121. package/src/geo/geo-content.test.ts +305 -0
  122. package/src/geo/geo-content.ts +266 -0
  123. package/src/geo/geo-history.test.ts +473 -0
  124. package/src/geo/geo-history.ts +433 -0
  125. package/src/geo/geo-tracker.test.ts +359 -0
  126. package/src/geo/geo-tracker.ts +411 -0
  127. package/src/geo/index.ts +10 -0
  128. package/src/git/commit-helper.test.ts +261 -0
  129. package/src/git/commit-helper.ts +329 -0
  130. package/src/git/index.ts +12 -0
  131. package/src/git/pr-helper.test.ts +284 -0
  132. package/src/git/pr-helper.ts +307 -0
  133. package/src/index.ts +66 -0
  134. package/src/keywords/ai-keyword-engine.ts +1062 -0
  135. package/src/keywords/ai-summarizer.ts +387 -0
  136. package/src/keywords/ci-mode.ts +555 -0
  137. package/src/keywords/engine.ts +359 -0
  138. package/src/keywords/index.ts +151 -0
  139. package/src/keywords/llm-judge.ts +357 -0
  140. package/src/keywords/nlp-analysis.ts +706 -0
  141. package/src/keywords/prioritizer.ts +295 -0
  142. package/src/keywords/site-crawler.ts +342 -0
  143. package/src/keywords/sources/autocomplete.ts +139 -0
  144. package/src/keywords/sources/competitive-search.ts +450 -0
  145. package/src/keywords/sources/competitor-analysis.ts +374 -0
  146. package/src/keywords/sources/dataforseo.ts +206 -0
  147. package/src/keywords/sources/free-sources.ts +294 -0
  148. package/src/keywords/sources/gsc.ts +123 -0
  149. package/src/keywords/topic-grouping.ts +327 -0
  150. package/src/keywords/types.ts +144 -0
  151. package/src/keywords/wizard.ts +457 -0
  152. package/src/loader.ts +40 -0
  153. package/src/reports/index.ts +7 -0
  154. package/src/reports/report-generator.test.ts +293 -0
  155. package/src/reports/report-generator.ts +713 -0
  156. package/src/scheduler/alerts.test.ts +458 -0
  157. package/src/scheduler/alerts.ts +328 -0
  158. package/src/scheduler/index.ts +8 -0
  159. package/src/scheduler/scheduled-audit.test.ts +377 -0
  160. package/src/scheduler/scheduled-audit.ts +149 -0
  161. package/src/test/integration-test.ts +325 -0
  162. package/src/tools/analyzer.ts +373 -0
  163. package/src/tools/crawl.ts +293 -0
  164. package/src/tools/files.ts +301 -0
  165. package/src/tools/h1-fixer.ts +249 -0
  166. package/src/tools/index.ts +67 -0
  167. package/src/tracking/github-action.ts +326 -0
  168. package/src/tracking/google-analytics.ts +265 -0
  169. package/src/tracking/index.ts +45 -0
  170. package/src/tracking/report-generator.ts +386 -0
  171. package/src/tracking/search-console.ts +335 -0
  172. package/src/types.ts +134 -0
  173. package/src/utils/http.ts +302 -0
  174. package/src/wasm-adapter.ts +297 -0
  175. package/src/wasm-entry.ts +14 -0
  176. package/tsconfig.json +17 -0
  177. package/tsup.wasm.config.ts +26 -0
  178. package/vitest.config.ts +15 -0
@@ -0,0 +1,463 @@
1
+ // Search Funnel Intent Analysis
2
+ // Reference: "6 Advanced SEO Tips for 2026"
3
+ // "Stop relying on top of funnel search terms"
4
+ // "TOFU, MOFU, BOFU - focus on middle and bottom of funnel"
5
+ // "AI overviews answer informational queries - focus on money keywords"
6
+
7
+ import * as cheerio from 'cheerio';
8
+ import type { AuditIssue } from '../types.js';
9
+
10
+ export type FunnelStage = 'tofu' | 'mofu' | 'bofu' | 'mixed';
11
+
12
+ export interface FunnelIntentData {
13
+ detectedStage: FunnelStage;
14
+ confidence: number; // 0-100
15
+ signals: {
16
+ tofu: string[];
17
+ mofu: string[];
18
+ bofu: string[];
19
+ };
20
+ metrics: {
21
+ informationalScore: number;
22
+ considerationScore: number;
23
+ conversionScore: number;
24
+ };
25
+ pageType: string;
26
+ aiOverviewRisk: 'high' | 'medium' | 'low';
27
+ recommendations: string[];
28
+ }
29
+
30
+ // TOFU (Top of Funnel) - Informational intent patterns
31
+ const TOFU_PATTERNS = {
32
+ url: [
33
+ /\/blog\//i,
34
+ /\/article/i,
35
+ /\/guide/i,
36
+ /\/tutorial/i,
37
+ /\/how-to/i,
38
+ /\/what-is/i,
39
+ /\/learn/i,
40
+ /\/tips/i,
41
+ /\/explained/i,
42
+ /\/introduction/i,
43
+ /\/beginners?/i,
44
+ ],
45
+ title: [
46
+ /^how to/i,
47
+ /^what is/i,
48
+ /^what are/i,
49
+ /^why /i,
50
+ /^when /i,
51
+ /^guide to/i,
52
+ /complete guide/i,
53
+ /beginner'?s? guide/i,
54
+ /ultimate guide/i,
55
+ /\d+ tips/i,
56
+ /\d+ ways/i,
57
+ /\d+ things/i,
58
+ /explained/i,
59
+ /introduction to/i,
60
+ /learn /i,
61
+ /understanding/i,
62
+ ],
63
+ content: [
64
+ /in this article/i,
65
+ /in this guide/i,
66
+ /we'll explain/i,
67
+ /we'll cover/i,
68
+ /you'll learn/i,
69
+ /let's explore/i,
70
+ /let's dive/i,
71
+ /did you know/i,
72
+ /according to/i,
73
+ /research shows/i,
74
+ /studies show/i,
75
+ ],
76
+ };
77
+
78
+ // MOFU (Middle of Funnel) - Consideration/comparison intent patterns
79
+ const MOFU_PATTERNS = {
80
+ url: [
81
+ /\/compare/i,
82
+ /\/comparison/i,
83
+ /\/vs/i,
84
+ /\/versus/i,
85
+ /\/alternative/i,
86
+ /\/review/i,
87
+ /\/pros-cons/i,
88
+ /\/features/i,
89
+ /\/solutions/i,
90
+ /\/case-stud/i,
91
+ ],
92
+ title: [
93
+ /vs\.?$/i,
94
+ /versus/i,
95
+ /comparison/i,
96
+ /compared/i,
97
+ /alternative/i,
98
+ /review/i,
99
+ /pros and cons/i,
100
+ /which is better/i,
101
+ /best .* for/i,
102
+ /top \d+/i,
103
+ /\d+ best/i,
104
+ /how .* compares/i,
105
+ /should you/i,
106
+ ],
107
+ content: [
108
+ /let'?s compare/i,
109
+ /in comparison/i,
110
+ /alternative to/i,
111
+ /pros:/i,
112
+ /cons:/i,
113
+ /advantages:/i,
114
+ /disadvantages:/i,
115
+ /key features/i,
116
+ /feature comparison/i,
117
+ /case study/i,
118
+ /our client/i,
119
+ /we helped/i,
120
+ ],
121
+ };
122
+
123
+ // BOFU (Bottom of Funnel) - Conversion/purchase intent patterns
124
+ const BOFU_PATTERNS = {
125
+ url: [
126
+ /\/pricing/i,
127
+ /\/buy/i,
128
+ /\/purchase/i,
129
+ /\/order/i,
130
+ /\/sign-?up/i,
131
+ /\/register/i,
132
+ /\/demo/i,
133
+ /\/trial/i,
134
+ /\/quote/i,
135
+ /\/contact/i,
136
+ /\/get-started/i,
137
+ /\/checkout/i,
138
+ /\/hire/i,
139
+ /\/services?$/i,
140
+ ],
141
+ title: [
142
+ /pricing/i,
143
+ /buy /i,
144
+ /purchase/i,
145
+ /order /i,
146
+ /sign up/i,
147
+ /get started/i,
148
+ /free trial/i,
149
+ /request .* demo/i,
150
+ /book .* call/i,
151
+ /schedule/i,
152
+ /get .* quote/i,
153
+ /contact us/i,
154
+ /hire /i,
155
+ /\$\d+/,
156
+ /per month/i,
157
+ /\/mo\b/i,
158
+ ],
159
+ content: [
160
+ /add to cart/i,
161
+ /buy now/i,
162
+ /purchase now/i,
163
+ /sign up now/i,
164
+ /get started/i,
165
+ /start.*trial/i,
166
+ /request.*demo/i,
167
+ /book.*call/i,
168
+ /schedule.*meeting/i,
169
+ /contact.*today/i,
170
+ /limited.*offer/i,
171
+ /special.*price/i,
172
+ /money.*back.*guarantee/i,
173
+ /free.*shipping/i,
174
+ /plans.*pricing/i,
175
+ /per user/i,
176
+ /per month/i,
177
+ /billed annually/i,
178
+ /\$\d+/,
179
+ ],
180
+ elements: [
181
+ 'form[action*="checkout"]',
182
+ 'form[action*="purchase"]',
183
+ 'form[action*="subscribe"]',
184
+ 'button[data-action="buy"]',
185
+ '[class*="pricing-table"]',
186
+ '[class*="price-card"]',
187
+ '[class*="checkout"]',
188
+ '[class*="add-to-cart"]',
189
+ ],
190
+ };
191
+
192
+ /**
193
+ * Calculate score for a funnel stage based on pattern matches
194
+ */
195
+ function calculateStageScore(
196
+ url: string,
197
+ title: string,
198
+ content: string,
199
+ $: cheerio.CheerioAPI,
200
+ patterns: {
201
+ url: RegExp[];
202
+ title: RegExp[];
203
+ content: RegExp[];
204
+ elements?: string[];
205
+ }
206
+ ): { score: number; signals: string[] } {
207
+ let score = 0;
208
+ const signals: string[] = [];
209
+
210
+ // URL patterns (weighted highest)
211
+ for (const pattern of patterns.url) {
212
+ if (pattern.test(url)) {
213
+ score += 15;
214
+ signals.push(`URL: ${pattern.source}`);
215
+ }
216
+ }
217
+
218
+ // Title patterns (high weight)
219
+ for (const pattern of patterns.title) {
220
+ if (pattern.test(title)) {
221
+ score += 12;
222
+ signals.push(`Title: ${pattern.source}`);
223
+ }
224
+ }
225
+
226
+ // Content patterns (medium weight)
227
+ for (const pattern of patterns.content) {
228
+ if (pattern.test(content)) {
229
+ score += 5;
230
+ signals.push(`Content: ${pattern.source}`);
231
+ }
232
+ }
233
+
234
+ // Element patterns (BOFU only)
235
+ if (patterns.elements) {
236
+ for (const selector of patterns.elements) {
237
+ if ($(selector).length > 0) {
238
+ score += 10;
239
+ signals.push(`Element: ${selector}`);
240
+ }
241
+ }
242
+ }
243
+
244
+ return { score, signals: signals.slice(0, 5) }; // Limit signals for readability
245
+ }
246
+
247
+ /**
248
+ * Determine page type based on content
249
+ */
250
+ function determinePageType($: cheerio.CheerioAPI, url: string): string {
251
+ // Check URL first
252
+ if (/\/blog\//i.test(url)) return 'blog-post';
253
+ if (/\/product/i.test(url)) return 'product-page';
254
+ if (/\/services?/i.test(url)) return 'service-page';
255
+ if (/\/pricing/i.test(url)) return 'pricing-page';
256
+ if (/\/about/i.test(url)) return 'about-page';
257
+ if (/\/contact/i.test(url)) return 'contact-page';
258
+ if (/\/case-stud/i.test(url)) return 'case-study';
259
+
260
+ // Check content
261
+ if ($('[class*="pricing"]').length > 0) return 'pricing-page';
262
+ if ($('[class*="product"]').length > 0) return 'product-page';
263
+ if ($('article').length > 0) return 'article';
264
+ if ($('[class*="blog"]').length > 0) return 'blog-post';
265
+
266
+ // Check for e-commerce
267
+ if ($('[class*="cart"]').length > 0 || $('[class*="checkout"]').length > 0) return 'e-commerce';
268
+
269
+ return 'general';
270
+ }
271
+
272
+ /**
273
+ * Calculate AI overview risk (likelihood of being replaced by AI-generated answer)
274
+ */
275
+ function calculateAIOverviewRisk(stage: FunnelStage, pageType: string): FunnelIntentData['aiOverviewRisk'] {
276
+ // TOFU informational content is most at risk
277
+ if (stage === 'tofu') {
278
+ return 'high';
279
+ }
280
+
281
+ // Blog posts and articles are at risk
282
+ if (pageType === 'blog-post' || pageType === 'article') {
283
+ return 'high';
284
+ }
285
+
286
+ // MOFU comparison content is medium risk
287
+ if (stage === 'mofu') {
288
+ return 'medium';
289
+ }
290
+
291
+ // BOFU transactional content is least at risk
292
+ return 'low';
293
+ }
294
+
295
+ /**
296
+ * Main function: Analyze search funnel intent
297
+ */
298
+ export function analyzeFunnelIntent(
299
+ html: string,
300
+ url: string
301
+ ): { issues: AuditIssue[]; data: FunnelIntentData } {
302
+ const $ = cheerio.load(html);
303
+ const issues: AuditIssue[] = [];
304
+
305
+ // Extract text content
306
+ const title = $('title').text().trim() + ' ' + $('h1').first().text().trim();
307
+ const bodyText = $('body').text().toLowerCase();
308
+
309
+ // Calculate scores for each funnel stage
310
+ const tofuResult = calculateStageScore(url, title, bodyText, $, TOFU_PATTERNS);
311
+ const mofuResult = calculateStageScore(url, title, bodyText, $, MOFU_PATTERNS);
312
+ const bofuResult = calculateStageScore(url, title, bodyText, $, BOFU_PATTERNS);
313
+
314
+ // Normalize scores
315
+ const totalScore = tofuResult.score + mofuResult.score + bofuResult.score || 1;
316
+ const informationalScore = Math.round((tofuResult.score / totalScore) * 100);
317
+ const considerationScore = Math.round((mofuResult.score / totalScore) * 100);
318
+ const conversionScore = Math.round((bofuResult.score / totalScore) * 100);
319
+
320
+ // Determine primary funnel stage
321
+ let detectedStage: FunnelStage;
322
+ let confidence: number;
323
+
324
+ if (bofuResult.score > tofuResult.score && bofuResult.score > mofuResult.score) {
325
+ detectedStage = 'bofu';
326
+ confidence = conversionScore;
327
+ } else if (mofuResult.score > tofuResult.score) {
328
+ detectedStage = 'mofu';
329
+ confidence = considerationScore;
330
+ } else if (tofuResult.score > 0) {
331
+ detectedStage = 'tofu';
332
+ confidence = informationalScore;
333
+ } else {
334
+ detectedStage = 'mixed';
335
+ confidence = 50;
336
+ }
337
+
338
+ // Determine page type
339
+ const pageType = determinePageType($, url);
340
+
341
+ // Calculate AI overview risk
342
+ const aiOverviewRisk = calculateAIOverviewRisk(detectedStage, pageType);
343
+
344
+ // Generate recommendations
345
+ const recommendations: string[] = [];
346
+
347
+ if (detectedStage === 'tofu') {
348
+ recommendations.push('Consider adding conversion elements (CTAs, forms) to capture leads');
349
+ recommendations.push('Link to MOFU/BOFU pages (comparisons, pricing) to guide users down the funnel');
350
+ if (aiOverviewRisk === 'high') {
351
+ recommendations.push('Add unique data, case studies, or expert opinions that AI cannot replicate');
352
+ }
353
+ }
354
+
355
+ if (detectedStage === 'mofu') {
356
+ recommendations.push('Add clear CTAs to BOFU pages (pricing, demos, trials)');
357
+ recommendations.push('Include social proof (testimonials, case studies) to build trust');
358
+ }
359
+
360
+ if (detectedStage === 'mixed') {
361
+ recommendations.push('Clarify the page\'s primary intent - is this informational, comparison, or conversion focused?');
362
+ recommendations.push('Consider splitting into multiple focused pages for better SEO');
363
+ }
364
+
365
+ // Generate issues
366
+
367
+ // High AI overview risk on TOFU content
368
+ if (aiOverviewRisk === 'high' && detectedStage === 'tofu') {
369
+ issues.push({
370
+ code: 'TOFU_AI_OVERVIEW_RISK',
371
+ severity: 'notice',
372
+ category: 'content',
373
+ title: 'Content at risk of AI overview displacement',
374
+ description: 'This informational content may be answered directly by AI overviews, reducing click-through.',
375
+ impact: 'Google AI overviews are increasingly answering informational queries directly in SERPs.',
376
+ howToFix: 'Add unique value: original data, expert opinions, case studies, or interactive tools that AI cannot replicate.',
377
+ affectedUrls: [url],
378
+ details: {
379
+ funnelStage: 'tofu',
380
+ aiOverviewRisk,
381
+ recommendation: 'Focus on MOFU/BOFU content or add unique differentiating content',
382
+ },
383
+ });
384
+ }
385
+
386
+ // TOFU without BOFU links
387
+ if (detectedStage === 'tofu') {
388
+ const hasConversionLinks = $('a[href*="pricing"], a[href*="demo"], a[href*="trial"], a[href*="contact"], a[href*="buy"]').length > 0;
389
+ if (!hasConversionLinks) {
390
+ issues.push({
391
+ code: 'TOFU_NO_FUNNEL_LINKS',
392
+ severity: 'notice',
393
+ category: 'content',
394
+ title: 'Informational page lacks conversion path',
395
+ description: 'This TOFU content has no links to pricing, demo, trial, or contact pages.',
396
+ impact: 'Users may leave without entering the sales funnel. Missing revenue opportunity.',
397
+ howToFix: 'Add contextual CTAs linking to MOFU/BOFU pages (comparisons, pricing, demos).',
398
+ affectedUrls: [url],
399
+ });
400
+ }
401
+ }
402
+
403
+ // BOFU page without social proof
404
+ if (detectedStage === 'bofu') {
405
+ const hasSocialProof = $('[class*="testimonial"], [class*="review"], [class*="case-study"], [class*="trust"]').length > 0 ||
406
+ bodyText.includes('testimonial') || bodyText.includes('customer said');
407
+
408
+ if (!hasSocialProof) {
409
+ issues.push({
410
+ code: 'BOFU_NO_SOCIAL_PROOF',
411
+ severity: 'warning',
412
+ category: 'content',
413
+ title: 'Conversion page lacks social proof',
414
+ description: 'This pricing/sales page has no visible testimonials, reviews, or case studies.',
415
+ impact: '97% of consumers look at reviews before buying. Social proof can increase conversions by 34%.',
416
+ howToFix: 'Add customer testimonials, case study results, trust badges, or review ratings.',
417
+ affectedUrls: [url],
418
+ details: {
419
+ funnelStage: 'bofu',
420
+ pageType,
421
+ },
422
+ });
423
+ }
424
+ }
425
+
426
+ // Mixed intent page
427
+ if (detectedStage === 'mixed' && confidence < 40) {
428
+ issues.push({
429
+ code: 'MIXED_FUNNEL_INTENT',
430
+ severity: 'notice',
431
+ category: 'content',
432
+ title: 'Unclear page intent',
433
+ description: 'This page has mixed signals - unclear if informational, comparison, or conversion focused.',
434
+ impact: 'Mixed intent pages may rank poorly because search engines cannot determine primary purpose.',
435
+ howToFix: 'Focus on one primary intent per page. Create separate pages for different funnel stages.',
436
+ affectedUrls: [url],
437
+ details: {
438
+ scores: { tofu: informationalScore, mofu: considerationScore, bofu: conversionScore },
439
+ },
440
+ });
441
+ }
442
+
443
+ return {
444
+ issues,
445
+ data: {
446
+ detectedStage,
447
+ confidence,
448
+ signals: {
449
+ tofu: tofuResult.signals,
450
+ mofu: mofuResult.signals,
451
+ bofu: bofuResult.signals,
452
+ },
453
+ metrics: {
454
+ informationalScore,
455
+ considerationScore,
456
+ conversionScore,
457
+ },
458
+ pageType,
459
+ aiOverviewRisk,
460
+ recommendations,
461
+ },
462
+ };
463
+ }
@@ -0,0 +1,174 @@
1
+ import * as cheerio from 'cheerio';
2
+ import { httpHead, httpGet } from '../../utils/http.js';
3
+ import type { AuditIssue } from '../types.js';
4
+ import { ISSUE_DEFINITIONS } from '../types.js';
5
+
6
+ export interface HreflangData {
7
+ hasHreflang: boolean;
8
+ entries: HreflangEntry[];
9
+ hasSelfReference: boolean;
10
+ hasXDefault: boolean;
11
+ }
12
+
13
+ export interface HreflangEntry {
14
+ lang: string;
15
+ href: string;
16
+ isValid: boolean;
17
+ isSelfReference: boolean;
18
+ statusCode?: number;
19
+ }
20
+
21
+ // Valid ISO 639-1 language codes (subset of most common)
22
+ const VALID_LANG_CODES = new Set([
23
+ 'en', 'es', 'fr', 'de', 'it', 'pt', 'ru', 'ja', 'zh', 'ko', 'ar', 'hi', 'nl', 'sv',
24
+ 'pl', 'tr', 'da', 'fi', 'no', 'cs', 'el', 'he', 'hu', 'id', 'ms', 'th', 'vi', 'uk',
25
+ 'ro', 'bg', 'hr', 'sk', 'sl', 'et', 'lv', 'lt', 'sr', 'ca', 'eu', 'gl', 'af', 'sw',
26
+ 'x-default'
27
+ ]);
28
+
29
+ function isValidHreflang(lang: string): boolean {
30
+ const parts = lang.toLowerCase().split('-');
31
+ if (parts[0] === 'x' && parts[1] === 'default') return true;
32
+ if (!VALID_LANG_CODES.has(parts[0])) return false;
33
+ // Region codes are optional, just check length if present
34
+ if (parts[1] && parts[1].length !== 2) return false;
35
+ return true;
36
+ }
37
+
38
+ export async function analyzeHreflang(
39
+ html: string,
40
+ url: string,
41
+ options: { validateUrls?: boolean } = {}
42
+ ): Promise<{ issues: AuditIssue[]; data: HreflangData }> {
43
+ const issues: AuditIssue[] = [];
44
+ const $ = cheerio.load(html);
45
+ const entries: HreflangEntry[] = [];
46
+
47
+ // Extract hreflang from link tags
48
+ $('link[rel="alternate"][hreflang]').each((_, el) => {
49
+ const lang = $(el).attr('hreflang') || '';
50
+ const href = $(el).attr('href') || '';
51
+ const isValid = isValidHreflang(lang);
52
+ const isSelfReference = normalizeUrl(href) === normalizeUrl(url);
53
+
54
+ entries.push({ lang, href, isValid, isSelfReference });
55
+ });
56
+
57
+ // Also check for hreflang in HTTP headers (would need to be passed in)
58
+
59
+ const hasHreflang = entries.length > 0;
60
+ const hasSelfReference = entries.some(e => e.isSelfReference);
61
+ const hasXDefault = entries.some(e => e.lang.toLowerCase() === 'x-default');
62
+
63
+ const data: HreflangData = {
64
+ hasHreflang,
65
+ entries,
66
+ hasSelfReference,
67
+ hasXDefault,
68
+ };
69
+
70
+ if (!hasHreflang) {
71
+ // No hreflang is not necessarily an issue for single-language sites
72
+ return { issues, data };
73
+ }
74
+
75
+ // Check for invalid hreflang values
76
+ const invalidEntries = entries.filter(e => !e.isValid);
77
+ if (invalidEntries.length > 0) {
78
+ issues.push({
79
+ ...ISSUE_DEFINITIONS.HREFLANG_INVALID,
80
+ affectedUrls: [url],
81
+ details: { invalidLanguages: invalidEntries.map(e => e.lang) },
82
+ });
83
+ }
84
+
85
+ // Check for missing self-reference
86
+ if (!hasSelfReference) {
87
+ issues.push({
88
+ ...ISSUE_DEFINITIONS.HREFLANG_SELF_MISSING,
89
+ affectedUrls: [url],
90
+ });
91
+ }
92
+
93
+ // Check for missing x-default
94
+ if (!hasXDefault && entries.length > 1) {
95
+ issues.push({
96
+ ...ISSUE_DEFINITIONS.HREFLANG_X_DEFAULT_MISSING,
97
+ affectedUrls: [url],
98
+ });
99
+ }
100
+
101
+ // Validate hreflang URLs if enabled
102
+ if (options.validateUrls) {
103
+ for (const entry of entries) {
104
+ if (!entry.isSelfReference && entry.href) {
105
+ try {
106
+ const response = await httpHead(entry.href, {
107
+ timeout: 5000,
108
+ maxRedirects: 0,
109
+ validateStatus: () => true,
110
+ });
111
+ entry.statusCode = response.status;
112
+
113
+ if (response.status >= 300) {
114
+ issues.push({
115
+ ...ISSUE_DEFINITIONS.HREFLANG_TO_NON_200,
116
+ affectedUrls: [url],
117
+ details: {
118
+ targetUrl: entry.href,
119
+ lang: entry.lang,
120
+ statusCode: response.status
121
+ },
122
+ });
123
+ }
124
+ } catch {
125
+ entry.statusCode = 0;
126
+ issues.push({
127
+ ...ISSUE_DEFINITIONS.HREFLANG_TO_NON_200,
128
+ affectedUrls: [url],
129
+ details: {
130
+ targetUrl: entry.href,
131
+ lang: entry.lang,
132
+ statusCode: 'unreachable'
133
+ },
134
+ });
135
+ }
136
+ }
137
+ }
138
+
139
+ // Check for reciprocal links
140
+ for (const entry of entries) {
141
+ if (!entry.isSelfReference && entry.href && entry.statusCode === 200) {
142
+ try {
143
+ const response = await httpGet<string>(entry.href, { timeout: 5000 });
144
+ const $target = cheerio.load(response.data);
145
+ const hasReturnLink = $target(`link[rel="alternate"][hreflang][href*="${new URL(url).hostname}"]`).length > 0;
146
+
147
+ if (!hasReturnLink) {
148
+ issues.push({
149
+ ...ISSUE_DEFINITIONS.HREFLANG_NO_RETURN,
150
+ affectedUrls: [url],
151
+ details: {
152
+ targetUrl: entry.href,
153
+ lang: entry.lang
154
+ },
155
+ });
156
+ }
157
+ } catch {
158
+ // Skip if can't fetch
159
+ }
160
+ }
161
+ }
162
+ }
163
+
164
+ return { issues, data };
165
+ }
166
+
167
+ function normalizeUrl(urlStr: string): string {
168
+ try {
169
+ const parsed = new URL(urlStr);
170
+ return `${parsed.protocol}//${parsed.host}${parsed.pathname}`.replace(/\/$/, '');
171
+ } catch {
172
+ return urlStr;
173
+ }
174
+ }