@rankcli/agent-runtime 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. package/README.md +242 -0
  2. package/dist/analyzer-2CSWIQGD.mjs +6 -0
  3. package/dist/chunk-YNZYHEYM.mjs +774 -0
  4. package/dist/index.d.mts +4012 -0
  5. package/dist/index.d.ts +4012 -0
  6. package/dist/index.js +29672 -0
  7. package/dist/index.mjs +28602 -0
  8. package/package.json +53 -0
  9. package/scripts/build-deno.ts +134 -0
  10. package/src/audit/ai/analyzer.ts +347 -0
  11. package/src/audit/ai/index.ts +29 -0
  12. package/src/audit/ai/prompts/content-analysis.ts +271 -0
  13. package/src/audit/ai/types.ts +179 -0
  14. package/src/audit/checks/additional-checks.ts +439 -0
  15. package/src/audit/checks/ai-citation-worthiness.ts +399 -0
  16. package/src/audit/checks/ai-content-structure.ts +325 -0
  17. package/src/audit/checks/ai-readiness.ts +339 -0
  18. package/src/audit/checks/anchor-text.ts +179 -0
  19. package/src/audit/checks/answer-conciseness.ts +322 -0
  20. package/src/audit/checks/asset-minification.ts +270 -0
  21. package/src/audit/checks/bing-optimization.ts +206 -0
  22. package/src/audit/checks/brand-mention-optimization.ts +349 -0
  23. package/src/audit/checks/caching-headers.ts +305 -0
  24. package/src/audit/checks/canonical-advanced.ts +150 -0
  25. package/src/audit/checks/canonical-domain.ts +196 -0
  26. package/src/audit/checks/citation-quality.ts +358 -0
  27. package/src/audit/checks/client-rendering.ts +542 -0
  28. package/src/audit/checks/color-contrast.ts +342 -0
  29. package/src/audit/checks/content-freshness.ts +170 -0
  30. package/src/audit/checks/content-science.ts +589 -0
  31. package/src/audit/checks/conversion-elements.ts +526 -0
  32. package/src/audit/checks/crawlability.ts +220 -0
  33. package/src/audit/checks/directory-listing.ts +172 -0
  34. package/src/audit/checks/dom-analysis.ts +191 -0
  35. package/src/audit/checks/dom-size.ts +246 -0
  36. package/src/audit/checks/duplicate-content.ts +194 -0
  37. package/src/audit/checks/eeat-signals.ts +990 -0
  38. package/src/audit/checks/entity-seo.ts +396 -0
  39. package/src/audit/checks/featured-snippet.ts +473 -0
  40. package/src/audit/checks/freshness-signals.ts +443 -0
  41. package/src/audit/checks/funnel-intent.ts +463 -0
  42. package/src/audit/checks/hreflang.ts +174 -0
  43. package/src/audit/checks/html-compliance.ts +302 -0
  44. package/src/audit/checks/image-dimensions.ts +167 -0
  45. package/src/audit/checks/images.ts +160 -0
  46. package/src/audit/checks/indexnow.ts +275 -0
  47. package/src/audit/checks/interactive-tools.ts +475 -0
  48. package/src/audit/checks/internal-link-graph.ts +436 -0
  49. package/src/audit/checks/keyword-analysis.ts +239 -0
  50. package/src/audit/checks/keyword-cannibalization.ts +385 -0
  51. package/src/audit/checks/keyword-placement.ts +471 -0
  52. package/src/audit/checks/links.ts +203 -0
  53. package/src/audit/checks/llms-txt.ts +224 -0
  54. package/src/audit/checks/local-seo.ts +296 -0
  55. package/src/audit/checks/mobile.ts +167 -0
  56. package/src/audit/checks/modern-images.ts +226 -0
  57. package/src/audit/checks/navboost-signals.ts +395 -0
  58. package/src/audit/checks/on-page.ts +209 -0
  59. package/src/audit/checks/page-resources.ts +285 -0
  60. package/src/audit/checks/pagination.ts +180 -0
  61. package/src/audit/checks/performance.ts +153 -0
  62. package/src/audit/checks/platform-presence.ts +580 -0
  63. package/src/audit/checks/redirect-analysis.ts +153 -0
  64. package/src/audit/checks/redirect-chain.ts +389 -0
  65. package/src/audit/checks/resource-hints.ts +420 -0
  66. package/src/audit/checks/responsive-css.ts +247 -0
  67. package/src/audit/checks/responsive-images.ts +396 -0
  68. package/src/audit/checks/review-ecosystem.ts +415 -0
  69. package/src/audit/checks/robots-validation.ts +373 -0
  70. package/src/audit/checks/security-headers.ts +172 -0
  71. package/src/audit/checks/security.ts +144 -0
  72. package/src/audit/checks/serp-preview.ts +251 -0
  73. package/src/audit/checks/site-maturity.ts +444 -0
  74. package/src/audit/checks/social-meta.test.ts +275 -0
  75. package/src/audit/checks/social-meta.ts +134 -0
  76. package/src/audit/checks/soft-404.ts +151 -0
  77. package/src/audit/checks/structured-data.ts +238 -0
  78. package/src/audit/checks/tech-detection.ts +496 -0
  79. package/src/audit/checks/topical-clusters.ts +435 -0
  80. package/src/audit/checks/tracker-bloat.ts +462 -0
  81. package/src/audit/checks/tracking-verification.test.ts +371 -0
  82. package/src/audit/checks/tracking-verification.ts +636 -0
  83. package/src/audit/checks/url-safety.ts +682 -0
  84. package/src/audit/deno-entry.ts +66 -0
  85. package/src/audit/discovery/index.ts +15 -0
  86. package/src/audit/discovery/link-crawler.ts +232 -0
  87. package/src/audit/discovery/repo-routes.ts +347 -0
  88. package/src/audit/engine.ts +620 -0
  89. package/src/audit/fixes/index.ts +209 -0
  90. package/src/audit/fixes/social-meta-fixes.test.ts +329 -0
  91. package/src/audit/fixes/social-meta-fixes.ts +463 -0
  92. package/src/audit/index.ts +74 -0
  93. package/src/audit/runner.test.ts +299 -0
  94. package/src/audit/runner.ts +130 -0
  95. package/src/audit/types.ts +1953 -0
  96. package/src/content/featured-snippet.ts +367 -0
  97. package/src/content/generator.test.ts +534 -0
  98. package/src/content/generator.ts +501 -0
  99. package/src/content/headline.ts +317 -0
  100. package/src/content/index.ts +62 -0
  101. package/src/content/intent.ts +258 -0
  102. package/src/content/keyword-density.ts +349 -0
  103. package/src/content/readability.ts +262 -0
  104. package/src/executor.ts +336 -0
  105. package/src/fixer.ts +416 -0
  106. package/src/frameworks/detector.test.ts +248 -0
  107. package/src/frameworks/detector.ts +371 -0
  108. package/src/frameworks/index.ts +68 -0
  109. package/src/frameworks/recipes/angular.yaml +171 -0
  110. package/src/frameworks/recipes/astro.yaml +206 -0
  111. package/src/frameworks/recipes/django.yaml +180 -0
  112. package/src/frameworks/recipes/laravel.yaml +137 -0
  113. package/src/frameworks/recipes/nextjs.yaml +268 -0
  114. package/src/frameworks/recipes/nuxt.yaml +175 -0
  115. package/src/frameworks/recipes/rails.yaml +188 -0
  116. package/src/frameworks/recipes/react.yaml +202 -0
  117. package/src/frameworks/recipes/sveltekit.yaml +154 -0
  118. package/src/frameworks/recipes/vue.yaml +137 -0
  119. package/src/frameworks/recipes/wordpress.yaml +209 -0
  120. package/src/frameworks/suggestion-engine.ts +320 -0
  121. package/src/geo/geo-content.test.ts +305 -0
  122. package/src/geo/geo-content.ts +266 -0
  123. package/src/geo/geo-history.test.ts +473 -0
  124. package/src/geo/geo-history.ts +433 -0
  125. package/src/geo/geo-tracker.test.ts +359 -0
  126. package/src/geo/geo-tracker.ts +411 -0
  127. package/src/geo/index.ts +10 -0
  128. package/src/git/commit-helper.test.ts +261 -0
  129. package/src/git/commit-helper.ts +329 -0
  130. package/src/git/index.ts +12 -0
  131. package/src/git/pr-helper.test.ts +284 -0
  132. package/src/git/pr-helper.ts +307 -0
  133. package/src/index.ts +66 -0
  134. package/src/keywords/ai-keyword-engine.ts +1062 -0
  135. package/src/keywords/ai-summarizer.ts +387 -0
  136. package/src/keywords/ci-mode.ts +555 -0
  137. package/src/keywords/engine.ts +359 -0
  138. package/src/keywords/index.ts +151 -0
  139. package/src/keywords/llm-judge.ts +357 -0
  140. package/src/keywords/nlp-analysis.ts +706 -0
  141. package/src/keywords/prioritizer.ts +295 -0
  142. package/src/keywords/site-crawler.ts +342 -0
  143. package/src/keywords/sources/autocomplete.ts +139 -0
  144. package/src/keywords/sources/competitive-search.ts +450 -0
  145. package/src/keywords/sources/competitor-analysis.ts +374 -0
  146. package/src/keywords/sources/dataforseo.ts +206 -0
  147. package/src/keywords/sources/free-sources.ts +294 -0
  148. package/src/keywords/sources/gsc.ts +123 -0
  149. package/src/keywords/topic-grouping.ts +327 -0
  150. package/src/keywords/types.ts +144 -0
  151. package/src/keywords/wizard.ts +457 -0
  152. package/src/loader.ts +40 -0
  153. package/src/reports/index.ts +7 -0
  154. package/src/reports/report-generator.test.ts +293 -0
  155. package/src/reports/report-generator.ts +713 -0
  156. package/src/scheduler/alerts.test.ts +458 -0
  157. package/src/scheduler/alerts.ts +328 -0
  158. package/src/scheduler/index.ts +8 -0
  159. package/src/scheduler/scheduled-audit.test.ts +377 -0
  160. package/src/scheduler/scheduled-audit.ts +149 -0
  161. package/src/test/integration-test.ts +325 -0
  162. package/src/tools/analyzer.ts +373 -0
  163. package/src/tools/crawl.ts +293 -0
  164. package/src/tools/files.ts +301 -0
  165. package/src/tools/h1-fixer.ts +249 -0
  166. package/src/tools/index.ts +67 -0
  167. package/src/tracking/github-action.ts +326 -0
  168. package/src/tracking/google-analytics.ts +265 -0
  169. package/src/tracking/index.ts +45 -0
  170. package/src/tracking/report-generator.ts +386 -0
  171. package/src/tracking/search-console.ts +335 -0
  172. package/src/types.ts +134 -0
  173. package/src/utils/http.ts +302 -0
  174. package/src/wasm-adapter.ts +297 -0
  175. package/src/wasm-entry.ts +14 -0
  176. package/tsconfig.json +17 -0
  177. package/tsup.wasm.config.ts +26 -0
  178. package/vitest.config.ts +15 -0
@@ -0,0 +1,471 @@
1
+ // Keyword Placement Analysis - Critical On-Page SEO Signals
2
+ // Reference: "4 Steps to Rank #1 in Google (2026 SEO Plan)" by Nathan Gotch
3
+ // "Google's leaked documents have a feature called title match score"
4
+ // "Include the primary keyword phrase in the first paragraph"
5
+
6
+ import * as cheerio from 'cheerio';
7
+ import type { AuditIssue } from '../types.js';
8
+
9
+ export interface KeywordPlacementData {
10
+ detectedKeywords: string[];
11
+ urlAnalysis: {
12
+ containsKeyword: boolean;
13
+ keywordInUrl: string | null;
14
+ urlSlug: string;
15
+ isCleanUrl: boolean;
16
+ hasDynamicParams: boolean;
17
+ };
18
+ titleAnalysis: {
19
+ hasExactMatch: boolean;
20
+ hasPartialMatch: boolean;
21
+ titleMatchScore: number; // 0-100 estimated
22
+ position: 'start' | 'middle' | 'end' | 'none';
23
+ };
24
+ h1Analysis: {
25
+ hasExactMatch: boolean;
26
+ hasPartialMatch: boolean;
27
+ h1Text: string | null;
28
+ };
29
+ firstParagraphAnalysis: {
30
+ hasKeyword: boolean;
31
+ keywordPosition: number | null; // word position
32
+ firstParagraphPreview: string;
33
+ isWithinFirst100Words: boolean;
34
+ };
35
+ metaDescriptionAnalysis: {
36
+ hasKeyword: boolean;
37
+ position: 'start' | 'middle' | 'end' | 'none';
38
+ };
39
+ overallPlacementScore: number; // 0-100
40
+ }
41
+
42
+ /**
43
+ * Extract likely target keywords from the page
44
+ * Uses title, H1, and URL to infer the target keyword
45
+ */
46
+ export function inferTargetKeywords(html: string, url: string): string[] {
47
+ const $ = cheerio.load(html);
48
+ const keywords: string[] = [];
49
+
50
+ // Extract from title (most reliable signal)
51
+ const title = $('title').text().trim();
52
+ if (title) {
53
+ // Remove brand name patterns (usually after | or -)
54
+ const cleanTitle = title.split(/[|\-–—]/)[0]?.trim() || title;
55
+ if (cleanTitle.length > 3 && cleanTitle.length < 100) {
56
+ keywords.push(cleanTitle.toLowerCase());
57
+ }
58
+ }
59
+
60
+ // Extract from H1
61
+ const h1 = $('h1').first().text().trim();
62
+ if (h1 && h1.length > 3 && h1.length < 100) {
63
+ keywords.push(h1.toLowerCase());
64
+ }
65
+
66
+ // Extract from URL slug
67
+ try {
68
+ const urlPath = new URL(url).pathname;
69
+ const slug = urlPath.split('/').filter(p => p.length > 0).pop();
70
+ if (slug) {
71
+ const cleanSlug = slug.replace(/[-_]/g, ' ').replace(/\.(html?|php|aspx?)$/i, '');
72
+ if (cleanSlug.length > 3) {
73
+ keywords.push(cleanSlug.toLowerCase());
74
+ }
75
+ }
76
+ } catch {
77
+ // Invalid URL
78
+ }
79
+
80
+ // Find common words across all sources to identify the core keyword
81
+ const wordCounts = new Map<string, number>();
82
+ for (const kw of keywords) {
83
+ const words = kw.split(/\s+/).filter(w => w.length > 2);
84
+ for (const word of words) {
85
+ wordCounts.set(word, (wordCounts.get(word) || 0) + 1);
86
+ }
87
+ }
88
+
89
+ // Sort by frequency and return top keywords
90
+ return [...new Set(keywords)].slice(0, 3);
91
+ }
92
+
93
+ /**
94
+ * Analyze URL for keyword presence
95
+ */
96
+ export function analyzeUrlKeyword(url: string, keywords: string[]): KeywordPlacementData['urlAnalysis'] {
97
+ try {
98
+ const parsedUrl = new URL(url);
99
+ const urlSlug = parsedUrl.pathname.toLowerCase();
100
+ const slugWords = urlSlug.replace(/[-_/]/g, ' ').trim();
101
+
102
+ let containsKeyword = false;
103
+ let keywordInUrl: string | null = null;
104
+
105
+ for (const keyword of keywords) {
106
+ const kwWords = keyword.toLowerCase().split(/\s+/);
107
+ // Check if all significant words of the keyword are in the URL
108
+ const significantWords = kwWords.filter(w => w.length > 2);
109
+ const matchCount = significantWords.filter(w => slugWords.includes(w)).length;
110
+
111
+ if (matchCount >= Math.ceil(significantWords.length * 0.7)) {
112
+ containsKeyword = true;
113
+ keywordInUrl = keyword;
114
+ break;
115
+ }
116
+ }
117
+
118
+ // Check for clean URL structure
119
+ const isCleanUrl = !urlSlug.includes('?') &&
120
+ !urlSlug.includes('=') &&
121
+ !/\d{5,}/.test(urlSlug) && // No long ID numbers
122
+ urlSlug.length < 100;
123
+
124
+ const hasDynamicParams = parsedUrl.search.length > 0;
125
+
126
+ return {
127
+ containsKeyword,
128
+ keywordInUrl,
129
+ urlSlug,
130
+ isCleanUrl,
131
+ hasDynamicParams,
132
+ };
133
+ } catch {
134
+ return {
135
+ containsKeyword: false,
136
+ keywordInUrl: null,
137
+ urlSlug: '',
138
+ isCleanUrl: false,
139
+ hasDynamicParams: false,
140
+ };
141
+ }
142
+ }
143
+
144
+ /**
145
+ * Analyze title for keyword match (Title Match Score concept)
146
+ */
147
+ export function analyzeTitleKeyword(html: string, keywords: string[]): KeywordPlacementData['titleAnalysis'] {
148
+ const $ = cheerio.load(html);
149
+ const title = $('title').text().trim().toLowerCase();
150
+
151
+ if (!title) {
152
+ return {
153
+ hasExactMatch: false,
154
+ hasPartialMatch: false,
155
+ titleMatchScore: 0,
156
+ position: 'none',
157
+ };
158
+ }
159
+
160
+ let hasExactMatch = false;
161
+ let hasPartialMatch = false;
162
+ let position: 'start' | 'middle' | 'end' | 'none' = 'none';
163
+
164
+ for (const keyword of keywords) {
165
+ const kwLower = keyword.toLowerCase();
166
+
167
+ // Exact match check
168
+ if (title.includes(kwLower)) {
169
+ hasExactMatch = true;
170
+
171
+ // Determine position
172
+ const index = title.indexOf(kwLower);
173
+ if (index === 0 || index < 5) {
174
+ position = 'start';
175
+ } else if (index > title.length - kwLower.length - 10) {
176
+ position = 'end';
177
+ } else {
178
+ position = 'middle';
179
+ }
180
+ break;
181
+ }
182
+
183
+ // Partial match (most words present)
184
+ const kwWords = kwLower.split(/\s+/).filter(w => w.length > 2);
185
+ const matchCount = kwWords.filter(w => title.includes(w)).length;
186
+ if (matchCount >= Math.ceil(kwWords.length * 0.6)) {
187
+ hasPartialMatch = true;
188
+ position = 'middle';
189
+ }
190
+ }
191
+
192
+ // Calculate title match score
193
+ let titleMatchScore = 0;
194
+ if (hasExactMatch) {
195
+ titleMatchScore = position === 'start' ? 100 : position === 'middle' ? 80 : 60;
196
+ } else if (hasPartialMatch) {
197
+ titleMatchScore = 40;
198
+ }
199
+
200
+ return {
201
+ hasExactMatch,
202
+ hasPartialMatch,
203
+ titleMatchScore,
204
+ position,
205
+ };
206
+ }
207
+
208
+ /**
209
+ * Analyze H1 for keyword match
210
+ */
211
+ export function analyzeH1Keyword(html: string, keywords: string[]): KeywordPlacementData['h1Analysis'] {
212
+ const $ = cheerio.load(html);
213
+ const h1 = $('h1').first().text().trim();
214
+ const h1Lower = h1.toLowerCase();
215
+
216
+ let hasExactMatch = false;
217
+ let hasPartialMatch = false;
218
+
219
+ for (const keyword of keywords) {
220
+ const kwLower = keyword.toLowerCase();
221
+
222
+ if (h1Lower.includes(kwLower)) {
223
+ hasExactMatch = true;
224
+ break;
225
+ }
226
+
227
+ const kwWords = kwLower.split(/\s+/).filter(w => w.length > 2);
228
+ const matchCount = kwWords.filter(w => h1Lower.includes(w)).length;
229
+ if (matchCount >= Math.ceil(kwWords.length * 0.6)) {
230
+ hasPartialMatch = true;
231
+ }
232
+ }
233
+
234
+ return {
235
+ hasExactMatch,
236
+ hasPartialMatch,
237
+ h1Text: h1 || null,
238
+ };
239
+ }
240
+
241
+ /**
242
+ * Analyze first paragraph for keyword placement
243
+ */
244
+ export function analyzeFirstParagraph(html: string, keywords: string[]): KeywordPlacementData['firstParagraphAnalysis'] {
245
+ const $ = cheerio.load(html);
246
+
247
+ // Find the first real paragraph (not in header/nav)
248
+ const paragraphs = $('main p, article p, .content p, #content p, body > div p').toArray();
249
+ let firstParagraph = '';
250
+
251
+ for (const p of paragraphs) {
252
+ const text = $(p).text().trim();
253
+ if (text.length > 50) { // Skip very short paragraphs
254
+ firstParagraph = text;
255
+ break;
256
+ }
257
+ }
258
+
259
+ // Fallback to first p tag
260
+ if (!firstParagraph) {
261
+ firstParagraph = $('p').first().text().trim();
262
+ }
263
+
264
+ const firstParagraphLower = firstParagraph.toLowerCase();
265
+ const words = firstParagraphLower.split(/\s+/);
266
+
267
+ let hasKeyword = false;
268
+ let keywordPosition: number | null = null;
269
+
270
+ for (const keyword of keywords) {
271
+ const kwLower = keyword.toLowerCase();
272
+ const kwWords = kwLower.split(/\s+/);
273
+
274
+ // Find position of first keyword word
275
+ for (let i = 0; i < words.length; i++) {
276
+ if (kwWords.some(kw => words[i].includes(kw))) {
277
+ hasKeyword = true;
278
+ keywordPosition = i;
279
+ break;
280
+ }
281
+ }
282
+
283
+ if (hasKeyword) break;
284
+ }
285
+
286
+ return {
287
+ hasKeyword,
288
+ keywordPosition,
289
+ firstParagraphPreview: firstParagraph.substring(0, 200) + (firstParagraph.length > 200 ? '...' : ''),
290
+ isWithinFirst100Words: hasKeyword && keywordPosition !== null && keywordPosition < 100,
291
+ };
292
+ }
293
+
294
+ /**
295
+ * Analyze meta description for keyword
296
+ */
297
+ export function analyzeMetaDescription(html: string, keywords: string[]): KeywordPlacementData['metaDescriptionAnalysis'] {
298
+ const $ = cheerio.load(html);
299
+ const metaDesc = $('meta[name="description"]').attr('content')?.trim().toLowerCase() || '';
300
+
301
+ if (!metaDesc) {
302
+ return { hasKeyword: false, position: 'none' };
303
+ }
304
+
305
+ let hasKeyword = false;
306
+ let position: 'start' | 'middle' | 'end' | 'none' = 'none';
307
+
308
+ for (const keyword of keywords) {
309
+ const kwLower = keyword.toLowerCase();
310
+ const kwWords = kwLower.split(/\s+/).filter(w => w.length > 2);
311
+ const matchCount = kwWords.filter(w => metaDesc.includes(w)).length;
312
+
313
+ if (matchCount >= Math.ceil(kwWords.length * 0.5)) {
314
+ hasKeyword = true;
315
+
316
+ // Find approximate position
317
+ const firstMatch = kwWords.find(w => metaDesc.includes(w));
318
+ if (firstMatch) {
319
+ const index = metaDesc.indexOf(firstMatch);
320
+ if (index < 30) {
321
+ position = 'start';
322
+ } else if (index > metaDesc.length - 50) {
323
+ position = 'end';
324
+ } else {
325
+ position = 'middle';
326
+ }
327
+ }
328
+ break;
329
+ }
330
+ }
331
+
332
+ return { hasKeyword, position };
333
+ }
334
+
335
+ /**
336
+ * Main function: Analyze keyword placement
337
+ */
338
+ export function analyzeKeywordPlacement(
339
+ html: string,
340
+ url: string
341
+ ): { issues: AuditIssue[]; data: KeywordPlacementData } {
342
+ const issues: AuditIssue[] = [];
343
+
344
+ // Infer target keywords
345
+ const detectedKeywords = inferTargetKeywords(html, url);
346
+
347
+ // Run all analyses
348
+ const urlAnalysis = analyzeUrlKeyword(url, detectedKeywords);
349
+ const titleAnalysis = analyzeTitleKeyword(html, detectedKeywords);
350
+ const h1Analysis = analyzeH1Keyword(html, detectedKeywords);
351
+ const firstParagraphAnalysis = analyzeFirstParagraph(html, detectedKeywords);
352
+ const metaDescriptionAnalysis = analyzeMetaDescription(html, detectedKeywords);
353
+
354
+ // Calculate overall placement score
355
+ let overallPlacementScore = 0;
356
+ if (urlAnalysis.containsKeyword) overallPlacementScore += 20;
357
+ if (titleAnalysis.hasExactMatch) overallPlacementScore += 25;
358
+ else if (titleAnalysis.hasPartialMatch) overallPlacementScore += 10;
359
+ if (h1Analysis.hasExactMatch) overallPlacementScore += 20;
360
+ else if (h1Analysis.hasPartialMatch) overallPlacementScore += 10;
361
+ if (firstParagraphAnalysis.isWithinFirst100Words) overallPlacementScore += 20;
362
+ else if (firstParagraphAnalysis.hasKeyword) overallPlacementScore += 10;
363
+ if (metaDescriptionAnalysis.hasKeyword) overallPlacementScore += 15;
364
+
365
+ // Generate issues
366
+
367
+ // URL doesn't contain keyword
368
+ if (!urlAnalysis.containsKeyword && urlAnalysis.urlSlug.length > 1) {
369
+ issues.push({
370
+ code: 'URL_MISSING_KEYWORD',
371
+ severity: 'warning',
372
+ category: 'on-page',
373
+ title: 'URL does not contain target keyword',
374
+ description: `The URL slug "${urlAnalysis.urlSlug}" doesn't include the apparent target keyword.`,
375
+ impact: 'URLs with keywords help Google understand page relevance. This is a ranking signal.',
376
+ howToFix: 'Include the primary keyword phrase in the URL (e.g., /keyword-phrase/ instead of /page123/).',
377
+ affectedUrls: [url],
378
+ details: {
379
+ currentSlug: urlAnalysis.urlSlug,
380
+ inferredKeywords: detectedKeywords,
381
+ },
382
+ });
383
+ }
384
+
385
+ // URL has dynamic parameters
386
+ if (urlAnalysis.hasDynamicParams) {
387
+ issues.push({
388
+ code: 'URL_HAS_PARAMS',
389
+ severity: 'notice',
390
+ category: 'on-page',
391
+ title: 'URL contains query parameters',
392
+ description: 'The URL has query parameters which create a less clean URL structure.',
393
+ impact: 'Clean, keyword-rich URLs are preferred by search engines and users.',
394
+ howToFix: 'Use URL rewriting to create clean, parameter-free URLs where possible.',
395
+ affectedUrls: [url],
396
+ });
397
+ }
398
+
399
+ // Title doesn't have exact keyword match
400
+ if (!titleAnalysis.hasExactMatch && detectedKeywords.length > 0) {
401
+ issues.push({
402
+ code: 'TITLE_KEYWORD_MISMATCH',
403
+ severity: 'warning',
404
+ category: 'on-page',
405
+ title: 'Title tag missing exact keyword match',
406
+ description: 'The title tag doesn\'t contain the exact target keyword phrase.',
407
+ impact: 'Google\'s leaked documents reveal a "title match score" that measures keyword alignment.',
408
+ howToFix: 'Include the exact target keyword phrase in the title, preferably near the beginning.',
409
+ affectedUrls: [url],
410
+ details: {
411
+ titleMatchScore: titleAnalysis.titleMatchScore,
412
+ position: titleAnalysis.position,
413
+ },
414
+ });
415
+ }
416
+
417
+ // Keyword not in first paragraph
418
+ if (!firstParagraphAnalysis.hasKeyword) {
419
+ issues.push({
420
+ code: 'FIRST_PARA_NO_KEYWORD',
421
+ severity: 'warning',
422
+ category: 'on-page',
423
+ title: 'Primary keyword not in first paragraph',
424
+ description: 'The target keyword doesn\'t appear in the opening paragraph.',
425
+ impact: 'Including keywords early in content signals relevance to search engines.',
426
+ howToFix: 'Add the primary keyword naturally within the first 100 words of your content.',
427
+ affectedUrls: [url],
428
+ details: {
429
+ preview: firstParagraphAnalysis.firstParagraphPreview,
430
+ },
431
+ });
432
+ } else if (!firstParagraphAnalysis.isWithinFirst100Words) {
433
+ issues.push({
434
+ code: 'KEYWORD_TOO_FAR',
435
+ severity: 'notice',
436
+ category: 'on-page',
437
+ title: 'Keyword appears late in first paragraph',
438
+ description: `Keyword found at word position ${firstParagraphAnalysis.keywordPosition}, ideally should be in first 100 words.`,
439
+ impact: 'Earlier keyword placement may slightly improve relevance signals.',
440
+ howToFix: 'Move the keyword mention closer to the beginning of your content.',
441
+ affectedUrls: [url],
442
+ });
443
+ }
444
+
445
+ // H1 missing keyword
446
+ if (!h1Analysis.hasExactMatch && !h1Analysis.hasPartialMatch && h1Analysis.h1Text) {
447
+ issues.push({
448
+ code: 'H1_MISSING_KEYWORD',
449
+ severity: 'notice',
450
+ category: 'on-page',
451
+ title: 'H1 doesn\'t contain target keyword',
452
+ description: `The H1 "${h1Analysis.h1Text}" doesn't include the target keyword.`,
453
+ impact: 'H1 is a strong on-page signal for topic relevance.',
454
+ howToFix: 'Include the primary keyword in your H1 headline.',
455
+ affectedUrls: [url],
456
+ });
457
+ }
458
+
459
+ return {
460
+ issues,
461
+ data: {
462
+ detectedKeywords,
463
+ urlAnalysis,
464
+ titleAnalysis,
465
+ h1Analysis,
466
+ firstParagraphAnalysis,
467
+ metaDescriptionAnalysis,
468
+ overallPlacementScore,
469
+ },
470
+ };
471
+ }
@@ -0,0 +1,203 @@
1
+ import * as cheerio from 'cheerio';
2
+ import { httpHead } from '../../utils/http.js';
3
+ import type { AuditIssue } from '../types.js';
4
+ import { ISSUE_DEFINITIONS } from '../types.js';
5
+
6
+ export interface LinkData {
7
+ internal: { href: string; text: string; nofollow: boolean }[];
8
+ external: { href: string; text: string; nofollow: boolean }[];
9
+ totalLinks: number;
10
+ brokenInternal: string[];
11
+ brokenExternal: string[];
12
+ ratio: {
13
+ internal: number;
14
+ external: number;
15
+ internalToExternalRatio: number | null; // null if no external links
16
+ };
17
+ }
18
+
19
+ export async function analyzeLinks(
20
+ html: string,
21
+ baseUrl: string,
22
+ checkBroken: boolean = false
23
+ ): Promise<{ issues: AuditIssue[]; data: LinkData }> {
24
+ const issues: AuditIssue[] = [];
25
+ const $ = cheerio.load(html);
26
+ const baseHostname = new URL(baseUrl).hostname;
27
+
28
+ const internal: LinkData['internal'] = [];
29
+ const external: LinkData['external'] = [];
30
+ const brokenInternal: string[] = [];
31
+ const brokenExternal: string[] = [];
32
+
33
+ // Extract all links
34
+ $('a[href]').each((_, el) => {
35
+ const href = $(el).attr('href') || '';
36
+ const text = $(el).text().trim();
37
+ const rel = $(el).attr('rel') || '';
38
+ const nofollow = rel.includes('nofollow');
39
+
40
+ // Skip javascript:, mailto:, tel:, and anchor links
41
+ if (href.startsWith('javascript:') || href.startsWith('mailto:') ||
42
+ href.startsWith('tel:') || href.startsWith('#')) {
43
+ return;
44
+ }
45
+
46
+ try {
47
+ const fullUrl = new URL(href, baseUrl).href;
48
+ const linkHostname = new URL(fullUrl).hostname;
49
+ const isInternal = linkHostname === baseHostname;
50
+
51
+ if (isInternal) {
52
+ internal.push({ href: fullUrl, text, nofollow });
53
+ if (nofollow) {
54
+ issues.push({
55
+ ...ISSUE_DEFINITIONS.NOFOLLOW_INTERNAL,
56
+ affectedUrls: [baseUrl],
57
+ details: { link: fullUrl, anchorText: text },
58
+ });
59
+ }
60
+ } else {
61
+ external.push({ href: fullUrl, text, nofollow });
62
+ }
63
+ } catch {
64
+ // Invalid URL, skip
65
+ }
66
+ });
67
+
68
+ const totalLinks = internal.length + external.length;
69
+
70
+ // Calculate link ratio
71
+ const internalCount = internal.length;
72
+ const externalCount = external.length;
73
+ const internalToExternalRatio = externalCount > 0 ? internalCount / externalCount : null;
74
+
75
+ // Check for too many links
76
+ if (totalLinks > 100) {
77
+ issues.push({
78
+ ...ISSUE_DEFINITIONS.TOO_MANY_LINKS,
79
+ affectedUrls: [baseUrl],
80
+ details: { totalLinks, internal: internalCount, external: externalCount },
81
+ });
82
+ }
83
+
84
+ // Check for no internal links (bad for crawlability and user navigation)
85
+ if (internalCount === 0 && totalLinks > 0) {
86
+ issues.push({
87
+ code: 'LINKS_NO_INTERNAL',
88
+ severity: 'warning',
89
+ category: 'links',
90
+ title: 'No internal links found',
91
+ description: 'This page has no internal links to other pages on your site.',
92
+ impact: 'Internal links are crucial for SEO. They help search engines discover pages and distribute link equity throughout your site.',
93
+ howToFix: 'Add relevant internal links to related content, navigation, or related articles section.',
94
+ affectedUrls: [baseUrl],
95
+ details: { totalLinks, external: externalCount },
96
+ });
97
+ }
98
+
99
+ // Check for no external links (pages should reference external resources)
100
+ if (externalCount === 0 && totalLinks > 5) {
101
+ issues.push({
102
+ code: 'LINKS_NO_EXTERNAL',
103
+ severity: 'notice',
104
+ category: 'links',
105
+ title: 'No external links found',
106
+ description: 'This page has no links to external websites.',
107
+ impact: 'Linking to high-quality external sources can improve credibility and help search engines understand your content context.',
108
+ howToFix: 'Add links to authoritative external sources that support your content. This builds trust and provides value to readers.',
109
+ affectedUrls: [baseUrl],
110
+ details: { totalLinks, internal: internalCount },
111
+ });
112
+ }
113
+
114
+ // Check for poor internal-to-external ratio (too many external vs internal)
115
+ if (externalCount > internalCount && externalCount > 5 && internalCount < 3) {
116
+ issues.push({
117
+ code: 'LINKS_RATIO_POOR',
118
+ severity: 'warning',
119
+ category: 'links',
120
+ title: 'Poor internal-to-external link ratio',
121
+ description: `Page has ${externalCount} external links but only ${internalCount} internal links.`,
122
+ impact: 'A healthy page should have more internal links than external links. Too many external links can dilute page authority and may look spammy.',
123
+ howToFix: 'Add more relevant internal links to balance the ratio. Aim for at least 2-3 internal links for every external link.',
124
+ affectedUrls: [baseUrl],
125
+ details: {
126
+ internal: internalCount,
127
+ external: externalCount,
128
+ ratio: internalToExternalRatio ? `1:${(1/internalToExternalRatio).toFixed(1)}` : 'N/A',
129
+ recommendation: 'Aim for a ratio of at least 2:1 (internal:external)',
130
+ },
131
+ });
132
+ }
133
+
134
+ // Check for broken links (limited to avoid too many requests)
135
+ if (checkBroken) {
136
+ // Check internal links (up to 10)
137
+ const internalToCheck = internal.slice(0, 10);
138
+ for (const link of internalToCheck) {
139
+ try {
140
+ const response = await httpHead(link.href, {
141
+ timeout: 5000,
142
+ maxRedirects: 5,
143
+ validateStatus: () => true,
144
+ });
145
+ if (response.status >= 400) {
146
+ brokenInternal.push(link.href);
147
+ }
148
+ } catch {
149
+ brokenInternal.push(link.href);
150
+ }
151
+ }
152
+
153
+ // Check external links (up to 5)
154
+ const externalToCheck = external.slice(0, 5);
155
+ for (const link of externalToCheck) {
156
+ try {
157
+ const response = await httpHead(link.href, {
158
+ timeout: 5000,
159
+ maxRedirects: 5,
160
+ validateStatus: () => true,
161
+ });
162
+ if (response.status >= 400) {
163
+ brokenExternal.push(link.href);
164
+ }
165
+ } catch {
166
+ // Don't count timeouts as broken for external links
167
+ }
168
+ }
169
+
170
+ // Add issues for broken links
171
+ for (const brokenLink of brokenInternal) {
172
+ issues.push({
173
+ ...ISSUE_DEFINITIONS.BROKEN_INTERNAL_LINK,
174
+ affectedUrls: [baseUrl],
175
+ details: { brokenLink },
176
+ });
177
+ }
178
+
179
+ for (const brokenLink of brokenExternal) {
180
+ issues.push({
181
+ ...ISSUE_DEFINITIONS.BROKEN_EXTERNAL_LINK,
182
+ affectedUrls: [baseUrl],
183
+ details: { brokenLink },
184
+ });
185
+ }
186
+ }
187
+
188
+ return {
189
+ issues,
190
+ data: {
191
+ internal,
192
+ external,
193
+ totalLinks,
194
+ brokenInternal,
195
+ brokenExternal,
196
+ ratio: {
197
+ internal: internalCount,
198
+ external: externalCount,
199
+ internalToExternalRatio,
200
+ },
201
+ }
202
+ };
203
+ }