@rankcli/agent-runtime 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. package/README.md +242 -0
  2. package/dist/analyzer-2CSWIQGD.mjs +6 -0
  3. package/dist/chunk-YNZYHEYM.mjs +774 -0
  4. package/dist/index.d.mts +4012 -0
  5. package/dist/index.d.ts +4012 -0
  6. package/dist/index.js +29672 -0
  7. package/dist/index.mjs +28602 -0
  8. package/package.json +53 -0
  9. package/scripts/build-deno.ts +134 -0
  10. package/src/audit/ai/analyzer.ts +347 -0
  11. package/src/audit/ai/index.ts +29 -0
  12. package/src/audit/ai/prompts/content-analysis.ts +271 -0
  13. package/src/audit/ai/types.ts +179 -0
  14. package/src/audit/checks/additional-checks.ts +439 -0
  15. package/src/audit/checks/ai-citation-worthiness.ts +399 -0
  16. package/src/audit/checks/ai-content-structure.ts +325 -0
  17. package/src/audit/checks/ai-readiness.ts +339 -0
  18. package/src/audit/checks/anchor-text.ts +179 -0
  19. package/src/audit/checks/answer-conciseness.ts +322 -0
  20. package/src/audit/checks/asset-minification.ts +270 -0
  21. package/src/audit/checks/bing-optimization.ts +206 -0
  22. package/src/audit/checks/brand-mention-optimization.ts +349 -0
  23. package/src/audit/checks/caching-headers.ts +305 -0
  24. package/src/audit/checks/canonical-advanced.ts +150 -0
  25. package/src/audit/checks/canonical-domain.ts +196 -0
  26. package/src/audit/checks/citation-quality.ts +358 -0
  27. package/src/audit/checks/client-rendering.ts +542 -0
  28. package/src/audit/checks/color-contrast.ts +342 -0
  29. package/src/audit/checks/content-freshness.ts +170 -0
  30. package/src/audit/checks/content-science.ts +589 -0
  31. package/src/audit/checks/conversion-elements.ts +526 -0
  32. package/src/audit/checks/crawlability.ts +220 -0
  33. package/src/audit/checks/directory-listing.ts +172 -0
  34. package/src/audit/checks/dom-analysis.ts +191 -0
  35. package/src/audit/checks/dom-size.ts +246 -0
  36. package/src/audit/checks/duplicate-content.ts +194 -0
  37. package/src/audit/checks/eeat-signals.ts +990 -0
  38. package/src/audit/checks/entity-seo.ts +396 -0
  39. package/src/audit/checks/featured-snippet.ts +473 -0
  40. package/src/audit/checks/freshness-signals.ts +443 -0
  41. package/src/audit/checks/funnel-intent.ts +463 -0
  42. package/src/audit/checks/hreflang.ts +174 -0
  43. package/src/audit/checks/html-compliance.ts +302 -0
  44. package/src/audit/checks/image-dimensions.ts +167 -0
  45. package/src/audit/checks/images.ts +160 -0
  46. package/src/audit/checks/indexnow.ts +275 -0
  47. package/src/audit/checks/interactive-tools.ts +475 -0
  48. package/src/audit/checks/internal-link-graph.ts +436 -0
  49. package/src/audit/checks/keyword-analysis.ts +239 -0
  50. package/src/audit/checks/keyword-cannibalization.ts +385 -0
  51. package/src/audit/checks/keyword-placement.ts +471 -0
  52. package/src/audit/checks/links.ts +203 -0
  53. package/src/audit/checks/llms-txt.ts +224 -0
  54. package/src/audit/checks/local-seo.ts +296 -0
  55. package/src/audit/checks/mobile.ts +167 -0
  56. package/src/audit/checks/modern-images.ts +226 -0
  57. package/src/audit/checks/navboost-signals.ts +395 -0
  58. package/src/audit/checks/on-page.ts +209 -0
  59. package/src/audit/checks/page-resources.ts +285 -0
  60. package/src/audit/checks/pagination.ts +180 -0
  61. package/src/audit/checks/performance.ts +153 -0
  62. package/src/audit/checks/platform-presence.ts +580 -0
  63. package/src/audit/checks/redirect-analysis.ts +153 -0
  64. package/src/audit/checks/redirect-chain.ts +389 -0
  65. package/src/audit/checks/resource-hints.ts +420 -0
  66. package/src/audit/checks/responsive-css.ts +247 -0
  67. package/src/audit/checks/responsive-images.ts +396 -0
  68. package/src/audit/checks/review-ecosystem.ts +415 -0
  69. package/src/audit/checks/robots-validation.ts +373 -0
  70. package/src/audit/checks/security-headers.ts +172 -0
  71. package/src/audit/checks/security.ts +144 -0
  72. package/src/audit/checks/serp-preview.ts +251 -0
  73. package/src/audit/checks/site-maturity.ts +444 -0
  74. package/src/audit/checks/social-meta.test.ts +275 -0
  75. package/src/audit/checks/social-meta.ts +134 -0
  76. package/src/audit/checks/soft-404.ts +151 -0
  77. package/src/audit/checks/structured-data.ts +238 -0
  78. package/src/audit/checks/tech-detection.ts +496 -0
  79. package/src/audit/checks/topical-clusters.ts +435 -0
  80. package/src/audit/checks/tracker-bloat.ts +462 -0
  81. package/src/audit/checks/tracking-verification.test.ts +371 -0
  82. package/src/audit/checks/tracking-verification.ts +636 -0
  83. package/src/audit/checks/url-safety.ts +682 -0
  84. package/src/audit/deno-entry.ts +66 -0
  85. package/src/audit/discovery/index.ts +15 -0
  86. package/src/audit/discovery/link-crawler.ts +232 -0
  87. package/src/audit/discovery/repo-routes.ts +347 -0
  88. package/src/audit/engine.ts +620 -0
  89. package/src/audit/fixes/index.ts +209 -0
  90. package/src/audit/fixes/social-meta-fixes.test.ts +329 -0
  91. package/src/audit/fixes/social-meta-fixes.ts +463 -0
  92. package/src/audit/index.ts +74 -0
  93. package/src/audit/runner.test.ts +299 -0
  94. package/src/audit/runner.ts +130 -0
  95. package/src/audit/types.ts +1953 -0
  96. package/src/content/featured-snippet.ts +367 -0
  97. package/src/content/generator.test.ts +534 -0
  98. package/src/content/generator.ts +501 -0
  99. package/src/content/headline.ts +317 -0
  100. package/src/content/index.ts +62 -0
  101. package/src/content/intent.ts +258 -0
  102. package/src/content/keyword-density.ts +349 -0
  103. package/src/content/readability.ts +262 -0
  104. package/src/executor.ts +336 -0
  105. package/src/fixer.ts +416 -0
  106. package/src/frameworks/detector.test.ts +248 -0
  107. package/src/frameworks/detector.ts +371 -0
  108. package/src/frameworks/index.ts +68 -0
  109. package/src/frameworks/recipes/angular.yaml +171 -0
  110. package/src/frameworks/recipes/astro.yaml +206 -0
  111. package/src/frameworks/recipes/django.yaml +180 -0
  112. package/src/frameworks/recipes/laravel.yaml +137 -0
  113. package/src/frameworks/recipes/nextjs.yaml +268 -0
  114. package/src/frameworks/recipes/nuxt.yaml +175 -0
  115. package/src/frameworks/recipes/rails.yaml +188 -0
  116. package/src/frameworks/recipes/react.yaml +202 -0
  117. package/src/frameworks/recipes/sveltekit.yaml +154 -0
  118. package/src/frameworks/recipes/vue.yaml +137 -0
  119. package/src/frameworks/recipes/wordpress.yaml +209 -0
  120. package/src/frameworks/suggestion-engine.ts +320 -0
  121. package/src/geo/geo-content.test.ts +305 -0
  122. package/src/geo/geo-content.ts +266 -0
  123. package/src/geo/geo-history.test.ts +473 -0
  124. package/src/geo/geo-history.ts +433 -0
  125. package/src/geo/geo-tracker.test.ts +359 -0
  126. package/src/geo/geo-tracker.ts +411 -0
  127. package/src/geo/index.ts +10 -0
  128. package/src/git/commit-helper.test.ts +261 -0
  129. package/src/git/commit-helper.ts +329 -0
  130. package/src/git/index.ts +12 -0
  131. package/src/git/pr-helper.test.ts +284 -0
  132. package/src/git/pr-helper.ts +307 -0
  133. package/src/index.ts +66 -0
  134. package/src/keywords/ai-keyword-engine.ts +1062 -0
  135. package/src/keywords/ai-summarizer.ts +387 -0
  136. package/src/keywords/ci-mode.ts +555 -0
  137. package/src/keywords/engine.ts +359 -0
  138. package/src/keywords/index.ts +151 -0
  139. package/src/keywords/llm-judge.ts +357 -0
  140. package/src/keywords/nlp-analysis.ts +706 -0
  141. package/src/keywords/prioritizer.ts +295 -0
  142. package/src/keywords/site-crawler.ts +342 -0
  143. package/src/keywords/sources/autocomplete.ts +139 -0
  144. package/src/keywords/sources/competitive-search.ts +450 -0
  145. package/src/keywords/sources/competitor-analysis.ts +374 -0
  146. package/src/keywords/sources/dataforseo.ts +206 -0
  147. package/src/keywords/sources/free-sources.ts +294 -0
  148. package/src/keywords/sources/gsc.ts +123 -0
  149. package/src/keywords/topic-grouping.ts +327 -0
  150. package/src/keywords/types.ts +144 -0
  151. package/src/keywords/wizard.ts +457 -0
  152. package/src/loader.ts +40 -0
  153. package/src/reports/index.ts +7 -0
  154. package/src/reports/report-generator.test.ts +293 -0
  155. package/src/reports/report-generator.ts +713 -0
  156. package/src/scheduler/alerts.test.ts +458 -0
  157. package/src/scheduler/alerts.ts +328 -0
  158. package/src/scheduler/index.ts +8 -0
  159. package/src/scheduler/scheduled-audit.test.ts +377 -0
  160. package/src/scheduler/scheduled-audit.ts +149 -0
  161. package/src/test/integration-test.ts +325 -0
  162. package/src/tools/analyzer.ts +373 -0
  163. package/src/tools/crawl.ts +293 -0
  164. package/src/tools/files.ts +301 -0
  165. package/src/tools/h1-fixer.ts +249 -0
  166. package/src/tools/index.ts +67 -0
  167. package/src/tracking/github-action.ts +326 -0
  168. package/src/tracking/google-analytics.ts +265 -0
  169. package/src/tracking/index.ts +45 -0
  170. package/src/tracking/report-generator.ts +386 -0
  171. package/src/tracking/search-console.ts +335 -0
  172. package/src/types.ts +134 -0
  173. package/src/utils/http.ts +302 -0
  174. package/src/wasm-adapter.ts +297 -0
  175. package/src/wasm-entry.ts +14 -0
  176. package/tsconfig.json +17 -0
  177. package/tsup.wasm.config.ts +26 -0
  178. package/vitest.config.ts +15 -0
@@ -0,0 +1,436 @@
1
+ // Internal Link Graph Analysis
2
+ // Advanced internal linking analysis: hub/authority detection, link depth, PageRank sculpting
3
+ // Based on advanced SEO research
4
+
5
+ import * as cheerio from 'cheerio';
6
+ import type { AuditIssue } from '../types.js';
7
+
8
+ export interface InternalLink {
9
+ source: string;
10
+ target: string;
11
+ anchor: string;
12
+ context: 'navigation' | 'content' | 'footer' | 'sidebar';
13
+ isEditorial: boolean;
14
+ }
15
+
16
+ export interface PageLinkStats {
17
+ url: string;
18
+ inboundLinks: number;
19
+ outboundLinks: number;
20
+ hubScore: number; // Pages that link to many others
21
+ authorityScore: number; // Pages that receive many links
22
+ depth: number; // Clicks from homepage
23
+ }
24
+
25
+ export interface InternalLinkGraphData {
26
+ totalInternalLinks: number;
27
+ orphanPages: string[];
28
+ deepPages: string[]; // More than 3 clicks deep
29
+ hubPages: string[];
30
+ authorityPages: string[];
31
+ linkDistribution: { url: string; links: number }[];
32
+ firstLinkAnchors: Map<string, string>;
33
+ contextualLinkRatio: number;
34
+ navigationVsContentRatio: number;
35
+ }
36
+
37
+ /**
38
+ * Extract internal links from HTML with context analysis
39
+ */
40
+ export function extractInternalLinks(html: string, pageUrl: string): InternalLink[] {
41
+ const $ = cheerio.load(html);
42
+ const links: InternalLink[] = [];
43
+ const baseUrl = new URL(pageUrl);
44
+
45
+ $('a[href]').each((_, el) => {
46
+ const $el = $(el);
47
+ const href = $el.attr('href') || '';
48
+ const anchor = $el.text().trim();
49
+
50
+ // Skip empty, javascript:, and fragment links
51
+ if (!href || href.startsWith('javascript:') || href.startsWith('#')) {
52
+ return;
53
+ }
54
+
55
+ // Resolve relative URLs
56
+ let targetUrl: URL;
57
+ try {
58
+ targetUrl = new URL(href, pageUrl);
59
+ } catch {
60
+ return; // Invalid URL
61
+ }
62
+
63
+ // Check if internal link
64
+ if (targetUrl.hostname !== baseUrl.hostname) {
65
+ return; // External link
66
+ }
67
+
68
+ // Determine link context
69
+ let context: 'navigation' | 'content' | 'footer' | 'sidebar' = 'content';
70
+
71
+ const parents = $el.parents();
72
+ for (let i = 0; i < parents.length; i++) {
73
+ const parent = parents.eq(i);
74
+ const tagName = parent.prop('tagName')?.toLowerCase() || '';
75
+ const className = parent.attr('class')?.toLowerCase() || '';
76
+ const id = parent.attr('id')?.toLowerCase() || '';
77
+
78
+ if (
79
+ tagName === 'nav' ||
80
+ className.includes('nav') ||
81
+ className.includes('menu') ||
82
+ id.includes('nav') ||
83
+ id.includes('menu')
84
+ ) {
85
+ context = 'navigation';
86
+ break;
87
+ }
88
+ if (
89
+ tagName === 'footer' ||
90
+ className.includes('footer') ||
91
+ id.includes('footer')
92
+ ) {
93
+ context = 'footer';
94
+ break;
95
+ }
96
+ if (
97
+ tagName === 'aside' ||
98
+ className.includes('sidebar') ||
99
+ className.includes('widget') ||
100
+ id.includes('sidebar')
101
+ ) {
102
+ context = 'sidebar';
103
+ break;
104
+ }
105
+ }
106
+
107
+ // Determine if editorial (appears to be within content with surrounding text)
108
+ let isEditorial = context === 'content';
109
+ if (isEditorial) {
110
+ const parent = $el.parent();
111
+ const parentText = parent.text().trim();
112
+ // Editorial links usually have surrounding text beyond just the anchor
113
+ isEditorial = parentText.length > anchor.length + 10;
114
+ }
115
+
116
+ links.push({
117
+ source: pageUrl,
118
+ target: targetUrl.href.split('#')[0], // Remove fragment
119
+ anchor,
120
+ context,
121
+ isEditorial,
122
+ });
123
+ });
124
+
125
+ return links;
126
+ }
127
+
128
+ /**
129
+ * Analyze link depth from homepage
130
+ */
131
+ export function calculateLinkDepth(
132
+ links: InternalLink[],
133
+ homepageUrl: string
134
+ ): Map<string, number> {
135
+ const depths = new Map<string, number>();
136
+ depths.set(homepageUrl, 0);
137
+
138
+ // BFS to calculate depths
139
+ const queue = [homepageUrl];
140
+ const visited = new Set([homepageUrl]);
141
+
142
+ while (queue.length > 0) {
143
+ const current = queue.shift()!;
144
+ const currentDepth = depths.get(current) || 0;
145
+
146
+ // Find all links from current page
147
+ const outbound = links.filter((l) => l.source === current);
148
+
149
+ for (const link of outbound) {
150
+ if (!visited.has(link.target)) {
151
+ visited.add(link.target);
152
+ depths.set(link.target, currentDepth + 1);
153
+ queue.push(link.target);
154
+ }
155
+ }
156
+ }
157
+
158
+ return depths;
159
+ }
160
+
161
+ /**
162
+ * Calculate simplified hub/authority scores (similar to HITS algorithm)
163
+ */
164
+ export function calculateHubAuthority(
165
+ links: InternalLink[],
166
+ iterations: number = 10
167
+ ): { hubs: Map<string, number>; authorities: Map<string, number> } {
168
+ // Get all unique URLs
169
+ const urls = new Set<string>();
170
+ for (const link of links) {
171
+ urls.add(link.source);
172
+ urls.add(link.target);
173
+ }
174
+
175
+ // Initialize scores
176
+ const hubs = new Map<string, number>();
177
+ const authorities = new Map<string, number>();
178
+
179
+ for (const url of urls) {
180
+ hubs.set(url, 1);
181
+ authorities.set(url, 1);
182
+ }
183
+
184
+ // Build adjacency lists
185
+ const outbound = new Map<string, string[]>();
186
+ const inbound = new Map<string, string[]>();
187
+
188
+ for (const link of links) {
189
+ const out = outbound.get(link.source) || [];
190
+ out.push(link.target);
191
+ outbound.set(link.source, out);
192
+
193
+ const inb = inbound.get(link.target) || [];
194
+ inb.push(link.source);
195
+ inbound.set(link.target, inb);
196
+ }
197
+
198
+ // Iterative calculation
199
+ for (let i = 0; i < iterations; i++) {
200
+ // Update authority scores (sum of hub scores of pages linking to it)
201
+ const newAuthorities = new Map<string, number>();
202
+ for (const url of urls) {
203
+ const sources = inbound.get(url) || [];
204
+ let score = 0;
205
+ for (const source of sources) {
206
+ score += hubs.get(source) || 0;
207
+ }
208
+ newAuthorities.set(url, score);
209
+ }
210
+
211
+ // Update hub scores (sum of authority scores of pages it links to)
212
+ const newHubs = new Map<string, number>();
213
+ for (const url of urls) {
214
+ const targets = outbound.get(url) || [];
215
+ let score = 0;
216
+ for (const target of targets) {
217
+ score += newAuthorities.get(target) || 0;
218
+ }
219
+ newHubs.set(url, score);
220
+ }
221
+
222
+ // Normalize
223
+ const maxAuth = Math.max(...newAuthorities.values(), 1);
224
+ const maxHub = Math.max(...newHubs.values(), 1);
225
+
226
+ for (const url of urls) {
227
+ authorities.set(url, (newAuthorities.get(url) || 0) / maxAuth);
228
+ hubs.set(url, (newHubs.get(url) || 0) / maxHub);
229
+ }
230
+ }
231
+
232
+ return { hubs, authorities };
233
+ }
234
+
235
+ /**
236
+ * Detect orphan pages (no internal links pointing to them)
237
+ */
238
+ export function detectOrphanPages(
239
+ links: InternalLink[],
240
+ knownUrls: string[]
241
+ ): string[] {
242
+ const linkedUrls = new Set(links.map((l) => l.target));
243
+ return knownUrls.filter((url) => !linkedUrls.has(url));
244
+ }
245
+
246
+ /**
247
+ * Analyze first link to each page (first link priority)
248
+ */
249
+ export function analyzeFirstLinkPriority(
250
+ links: InternalLink[]
251
+ ): Map<string, { anchor: string; context: string }> {
252
+ const firstLinks = new Map<string, { anchor: string; context: string }>();
253
+
254
+ for (const link of links) {
255
+ if (!firstLinks.has(link.target)) {
256
+ firstLinks.set(link.target, {
257
+ anchor: link.anchor,
258
+ context: link.context,
259
+ });
260
+ }
261
+ }
262
+
263
+ return firstLinks;
264
+ }
265
+
266
+ /**
267
+ * Check for link hoarding (pages with few outbound internal links)
268
+ */
269
+ export function detectLinkHoarding(
270
+ links: InternalLink[],
271
+ threshold: number = 3
272
+ ): string[] {
273
+ const outboundCount = new Map<string, number>();
274
+
275
+ for (const link of links) {
276
+ outboundCount.set(link.source, (outboundCount.get(link.source) || 0) + 1);
277
+ }
278
+
279
+ // Find pages with very few outbound links
280
+ return [...outboundCount.entries()]
281
+ .filter(([_, count]) => count < threshold)
282
+ .map(([url]) => url);
283
+ }
284
+
285
+ /**
286
+ * Analyze PageRank sinks (pages that receive links but don't link out)
287
+ */
288
+ export function detectPageRankSinks(links: InternalLink[]): string[] {
289
+ const outboundPages = new Set(links.map((l) => l.source));
290
+ const inboundPages = new Set(links.map((l) => l.target));
291
+
292
+ // Sinks: pages that receive links but don't link out
293
+ return [...inboundPages].filter((url) => !outboundPages.has(url));
294
+ }
295
+
296
+ /**
297
+ * Main function: Analyze internal link structure
298
+ */
299
+ export function analyzeInternalLinkGraph(
300
+ html: string,
301
+ url: string
302
+ ): { issues: AuditIssue[]; data: Partial<InternalLinkGraphData> } {
303
+ const issues: AuditIssue[] = [];
304
+ const links = extractInternalLinks(html, url);
305
+
306
+ // Count link types
307
+ const navigationLinks = links.filter((l) => l.context === 'navigation').length;
308
+ const contentLinks = links.filter((l) => l.context === 'content').length;
309
+ const footerLinks = links.filter((l) => l.context === 'footer').length;
310
+ const editorialLinks = links.filter((l) => l.isEditorial).length;
311
+
312
+ const totalLinks = links.length;
313
+ const contextualLinkRatio = totalLinks > 0 ? contentLinks / totalLinks : 0;
314
+ const navigationVsContentRatio = contentLinks > 0 ? navigationLinks / contentLinks : 0;
315
+
316
+ // Check for issues on single page
317
+ if (contentLinks === 0 && totalLinks > 0) {
318
+ issues.push({
319
+ code: 'NO_CONTEXTUAL_INTERNAL_LINKS',
320
+ severity: 'warning',
321
+ category: 'links',
322
+ title: 'No contextual internal links',
323
+ description: 'All internal links are in navigation/footer. No editorial links in content.',
324
+ impact: 'Editorial links in content pass more SEO value than navigational links.',
325
+ howToFix: 'Add relevant internal links within your main content to related pages.',
326
+ affectedUrls: [url],
327
+ details: { navigationLinks, contentLinks, footerLinks },
328
+ });
329
+ }
330
+
331
+ if (editorialLinks < 2 && contentLinks > 0) {
332
+ issues.push({
333
+ code: 'LOW_EDITORIAL_LINKS',
334
+ severity: 'notice',
335
+ category: 'links',
336
+ title: 'Few editorial internal links',
337
+ description: `Only ${editorialLinks} editorial (in-content) internal links found.`,
338
+ impact: 'Editorial links with surrounding context provide stronger topical signals.',
339
+ howToFix: 'Add 2-5 relevant internal links within your body content.',
340
+ affectedUrls: [url],
341
+ });
342
+ }
343
+
344
+ // Analyze first link anchors
345
+ const firstLinks = analyzeFirstLinkPriority(links);
346
+
347
+ // Check for generic anchor texts in first links
348
+ const genericAnchors = ['click here', 'read more', 'learn more', 'here', 'this'];
349
+ for (const [target, linkInfo] of firstLinks) {
350
+ if (genericAnchors.some((g) => linkInfo.anchor.toLowerCase().includes(g))) {
351
+ issues.push({
352
+ code: 'FIRST_LINK_GENERIC_ANCHOR',
353
+ severity: 'notice',
354
+ category: 'links',
355
+ title: 'First link uses generic anchor text',
356
+ description: `First link to ${target} uses generic anchor "${linkInfo.anchor}".`,
357
+ impact: 'Google may prioritize first link anchor for topic signals.',
358
+ howToFix: 'Use descriptive, keyword-relevant anchor text for internal links.',
359
+ affectedUrls: [url],
360
+ details: { target, anchor: linkInfo.anchor },
361
+ });
362
+ }
363
+ }
364
+
365
+ // Check for link distribution (too many links)
366
+ if (totalLinks > 100) {
367
+ issues.push({
368
+ code: 'TOO_MANY_INTERNAL_LINKS',
369
+ severity: 'warning',
370
+ category: 'links',
371
+ title: 'Excessive internal links',
372
+ description: `Page has ${totalLinks} internal links, which may dilute PageRank distribution.`,
373
+ impact: 'Too many links reduce the value passed to each linked page.',
374
+ howToFix: 'Reduce internal links to the most important and relevant pages.',
375
+ affectedUrls: [url],
376
+ details: { totalLinks, navigationLinks, contentLinks, footerLinks },
377
+ });
378
+ }
379
+
380
+ // Check for no internal links at all
381
+ if (totalLinks === 0) {
382
+ issues.push({
383
+ code: 'NO_INTERNAL_LINKS',
384
+ severity: 'error',
385
+ category: 'links',
386
+ title: 'No internal links found',
387
+ description: 'Page has no internal links to other pages on the site.',
388
+ impact: 'Creates a dead end for users and search engine crawlers.',
389
+ howToFix: 'Add relevant internal links to related content.',
390
+ affectedUrls: [url],
391
+ });
392
+ }
393
+
394
+ return {
395
+ issues,
396
+ data: {
397
+ totalInternalLinks: totalLinks,
398
+ contextualLinkRatio,
399
+ navigationVsContentRatio,
400
+ firstLinkAnchors: new Map(
401
+ [...firstLinks.entries()].map(([k, v]) => [k, v.anchor])
402
+ ),
403
+ linkDistribution: [{ url, links: totalLinks }],
404
+ },
405
+ };
406
+ }
407
+
408
+ /**
409
+ * Multi-page analysis for topic clusters and pillar pages
410
+ */
411
+ export function analyzeTopicClusters(
412
+ links: InternalLink[]
413
+ ): { pillarPages: string[]; clusters: Map<string, string[]> } {
414
+ // Calculate hub/authority scores
415
+ const { hubs, authorities } = calculateHubAuthority(links);
416
+
417
+ // Pillar pages have high hub scores (link to many cluster pages)
418
+ const pillarThreshold = 0.7;
419
+ const pillarPages = [...hubs.entries()]
420
+ .filter(([_, score]) => score >= pillarThreshold)
421
+ .map(([url]) => url);
422
+
423
+ // Group pages by their primary pillar (page they link to most with highest authority)
424
+ const clusters = new Map<string, string[]>();
425
+
426
+ for (const pillar of pillarPages) {
427
+ const clusterPages = links
428
+ .filter((l) => l.target === pillar || l.source === pillar)
429
+ .map((l) => (l.source === pillar ? l.target : l.source))
430
+ .filter((url) => !pillarPages.includes(url));
431
+
432
+ clusters.set(pillar, [...new Set(clusterPages)]);
433
+ }
434
+
435
+ return { pillarPages, clusters };
436
+ }
@@ -0,0 +1,239 @@
1
+ // Keyword Density Analysis
2
+ // Analyzes keyword usage and distribution across page elements
3
+
4
+ import * as cheerio from 'cheerio';
5
+ import type { AuditIssue } from '../types.js';
6
+ import { ISSUE_DEFINITIONS } from '../types.js';
7
+
8
+ // Common English stop words to filter out
9
+ const STOP_WORDS = new Set([
10
+ 'a', 'an', 'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
11
+ 'by', 'from', 'as', 'is', 'was', 'are', 'were', 'been', 'be', 'have', 'has', 'had',
12
+ 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must',
13
+ 'can', 'this', 'that', 'these', 'those', 'it', 'its', 'they', 'them', 'their',
14
+ 'we', 'us', 'our', 'you', 'your', 'i', 'me', 'my', 'he', 'she', 'him', 'her', 'his',
15
+ 'not', 'no', 'nor', 'so', 'if', 'then', 'else', 'when', 'where', 'why', 'how', 'what',
16
+ 'who', 'which', 'all', 'each', 'every', 'both', 'few', 'more', 'most', 'other', 'some',
17
+ 'such', 'only', 'own', 'same', 'than', 'too', 'very', 'just', 'also', 'now', 'here',
18
+ 'there', 'about', 'after', 'before', 'above', 'below', 'between', 'into', 'through',
19
+ 'during', 'under', 'again', 'further', 'once', 'any', 'being', 'because', 'while',
20
+ ]);
21
+
22
+ export interface KeywordInfo {
23
+ word: string;
24
+ count: number;
25
+ density: number; // Percentage
26
+ inTitle: boolean;
27
+ inH1: boolean;
28
+ inH2: boolean;
29
+ inMetaDesc: boolean;
30
+ }
31
+
32
+ export interface KeywordPhraseInfo {
33
+ phrase: string;
34
+ count: number;
35
+ wordCount: number;
36
+ }
37
+
38
+ export interface KeywordAnalysisData {
39
+ totalWords: number;
40
+ uniqueWords: number;
41
+ topKeywords: KeywordInfo[];
42
+ topPhrases: {
43
+ twoWord: KeywordPhraseInfo[];
44
+ threeWord: KeywordPhraseInfo[];
45
+ fourWord: KeywordPhraseInfo[];
46
+ };
47
+ keywordDistribution: {
48
+ title: string[];
49
+ h1: string[];
50
+ h2: string[];
51
+ metaDesc: string[];
52
+ };
53
+ potentialStuffing: string[];
54
+ }
55
+
56
+ /**
57
+ * Extract text content from an element
58
+ */
59
+ function extractText($: cheerio.CheerioAPI, selector: string): string {
60
+ return $(selector)
61
+ .text()
62
+ .toLowerCase()
63
+ .replace(/[^\w\s]/g, ' ')
64
+ .replace(/\s+/g, ' ')
65
+ .trim();
66
+ }
67
+
68
+ /**
69
+ * Tokenize text into words
70
+ */
71
+ function tokenize(text: string): string[] {
72
+ return text
73
+ .toLowerCase()
74
+ .replace(/[^\w\s]/g, ' ')
75
+ .split(/\s+/)
76
+ .filter((word) => word.length > 2 && !STOP_WORDS.has(word) && !/^\d+$/.test(word));
77
+ }
78
+
79
+ /**
80
+ * Count word frequencies
81
+ */
82
+ function countWords(words: string[]): Map<string, number> {
83
+ const counts = new Map<string, number>();
84
+ for (const word of words) {
85
+ counts.set(word, (counts.get(word) || 0) + 1);
86
+ }
87
+ return counts;
88
+ }
89
+
90
+ /**
91
+ * Extract n-grams (phrases)
92
+ */
93
+ function extractNGrams(words: string[], n: number): Map<string, number> {
94
+ const ngrams = new Map<string, number>();
95
+
96
+ for (let i = 0; i <= words.length - n; i++) {
97
+ const ngram = words.slice(i, i + n).join(' ');
98
+ // Skip if any word is a stop word
99
+ const ngramWords = ngram.split(' ');
100
+ if (ngramWords.some((w) => STOP_WORDS.has(w))) continue;
101
+
102
+ ngrams.set(ngram, (ngrams.get(ngram) || 0) + 1);
103
+ }
104
+
105
+ return ngrams;
106
+ }
107
+
108
+ /**
109
+ * Analyze keyword density and distribution
110
+ */
111
+ export function analyzeKeywords(html: string, url: string): { issues: AuditIssue[]; data: KeywordAnalysisData } {
112
+ const issues: AuditIssue[] = [];
113
+ const $ = cheerio.load(html);
114
+
115
+ // Extract text from different elements
116
+ const title = extractText($, 'title');
117
+ const h1 = extractText($, 'h1');
118
+ const h2 = extractText($, 'h2');
119
+ const metaDesc = $('meta[name="description"]').attr('content')?.toLowerCase() || '';
120
+
121
+ // Remove script and style content
122
+ $('script, style, noscript').remove();
123
+ const bodyText = extractText($, 'body');
124
+
125
+ // Tokenize
126
+ const bodyWords = tokenize(bodyText);
127
+ const titleWords = tokenize(title);
128
+ const h1Words = tokenize(h1);
129
+ const h2Words = tokenize(h2);
130
+ const metaDescWords = tokenize(metaDesc);
131
+
132
+ // Count words
133
+ const wordCounts = countWords(bodyWords);
134
+ const totalWords = bodyWords.length;
135
+
136
+ // Calculate keyword density
137
+ const topKeywordsMap: KeywordInfo[] = [];
138
+
139
+ for (const [word, count] of wordCounts) {
140
+ const density = (count / totalWords) * 100;
141
+ topKeywordsMap.push({
142
+ word,
143
+ count,
144
+ density: Math.round(density * 100) / 100,
145
+ inTitle: titleWords.includes(word),
146
+ inH1: h1Words.includes(word),
147
+ inH2: h2Words.includes(word),
148
+ inMetaDesc: metaDescWords.includes(word),
149
+ });
150
+ }
151
+
152
+ // Sort by count and take top 20
153
+ topKeywordsMap.sort((a, b) => b.count - a.count);
154
+ const topKeywords = topKeywordsMap.slice(0, 20);
155
+
156
+ // Extract n-grams
157
+ const twoWordPhrases = extractNGrams(bodyWords, 2);
158
+ const threeWordPhrases = extractNGrams(bodyWords, 3);
159
+ const fourWordPhrases = extractNGrams(bodyWords, 4);
160
+
161
+ // Convert to sorted arrays
162
+ const sortPhrases = (phrases: Map<string, number>, n: number): KeywordPhraseInfo[] => {
163
+ return Array.from(phrases)
164
+ .filter(([_, count]) => count >= 2)
165
+ .sort((a, b) => b[1] - a[1])
166
+ .slice(0, 10)
167
+ .map(([phrase, count]) => ({ phrase, count, wordCount: n }));
168
+ };
169
+
170
+ // Check for keyword stuffing (density > 3%)
171
+ const potentialStuffing = topKeywords.filter((k) => k.density > 3).map((k) => k.word);
172
+
173
+ // Generate issues
174
+ if (potentialStuffing.length > 0) {
175
+ issues.push({
176
+ ...ISSUE_DEFINITIONS.KEYWORD_STUFFING,
177
+ affectedUrls: [url],
178
+ details: {
179
+ keywords: potentialStuffing,
180
+ densities: potentialStuffing.map((k) => {
181
+ const info = topKeywords.find((tk) => tk.word === k);
182
+ return { word: k, density: info?.density };
183
+ }),
184
+ },
185
+ });
186
+ }
187
+
188
+ // Check if top keywords are in title
189
+ const topBodyKeywords = topKeywords.slice(0, 5).map((k) => k.word);
190
+ const keywordsInTitle = topBodyKeywords.filter((k) => titleWords.includes(k));
191
+
192
+ if (keywordsInTitle.length === 0 && topBodyKeywords.length > 0) {
193
+ issues.push({
194
+ ...ISSUE_DEFINITIONS.NO_KEYWORDS_IN_TITLE,
195
+ affectedUrls: [url],
196
+ details: {
197
+ topKeywords: topBodyKeywords,
198
+ title,
199
+ recommendation: `Consider including "${topBodyKeywords[0]}" in your title`,
200
+ },
201
+ });
202
+ }
203
+
204
+ // Check if top keywords are in H1
205
+ const keywordsInH1 = topBodyKeywords.filter((k) => h1Words.includes(k));
206
+
207
+ if (keywordsInH1.length === 0 && topBodyKeywords.length > 0 && h1) {
208
+ issues.push({
209
+ ...ISSUE_DEFINITIONS.NO_KEYWORDS_IN_H1,
210
+ affectedUrls: [url],
211
+ details: {
212
+ topKeywords: topBodyKeywords,
213
+ h1,
214
+ recommendation: `Consider including "${topBodyKeywords[0]}" in your H1`,
215
+ },
216
+ });
217
+ }
218
+
219
+ return {
220
+ issues,
221
+ data: {
222
+ totalWords,
223
+ uniqueWords: wordCounts.size,
224
+ topKeywords,
225
+ topPhrases: {
226
+ twoWord: sortPhrases(twoWordPhrases, 2),
227
+ threeWord: sortPhrases(threeWordPhrases, 3),
228
+ fourWord: sortPhrases(fourWordPhrases, 4),
229
+ },
230
+ keywordDistribution: {
231
+ title: titleWords,
232
+ h1: h1Words,
233
+ h2: h2Words,
234
+ metaDesc: metaDescWords,
235
+ },
236
+ potentialStuffing,
237
+ },
238
+ };
239
+ }