@rankcli/agent-runtime 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +242 -0
- package/dist/analyzer-2CSWIQGD.mjs +6 -0
- package/dist/chunk-YNZYHEYM.mjs +774 -0
- package/dist/index.d.mts +4012 -0
- package/dist/index.d.ts +4012 -0
- package/dist/index.js +29672 -0
- package/dist/index.mjs +28602 -0
- package/package.json +53 -0
- package/scripts/build-deno.ts +134 -0
- package/src/audit/ai/analyzer.ts +347 -0
- package/src/audit/ai/index.ts +29 -0
- package/src/audit/ai/prompts/content-analysis.ts +271 -0
- package/src/audit/ai/types.ts +179 -0
- package/src/audit/checks/additional-checks.ts +439 -0
- package/src/audit/checks/ai-citation-worthiness.ts +399 -0
- package/src/audit/checks/ai-content-structure.ts +325 -0
- package/src/audit/checks/ai-readiness.ts +339 -0
- package/src/audit/checks/anchor-text.ts +179 -0
- package/src/audit/checks/answer-conciseness.ts +322 -0
- package/src/audit/checks/asset-minification.ts +270 -0
- package/src/audit/checks/bing-optimization.ts +206 -0
- package/src/audit/checks/brand-mention-optimization.ts +349 -0
- package/src/audit/checks/caching-headers.ts +305 -0
- package/src/audit/checks/canonical-advanced.ts +150 -0
- package/src/audit/checks/canonical-domain.ts +196 -0
- package/src/audit/checks/citation-quality.ts +358 -0
- package/src/audit/checks/client-rendering.ts +542 -0
- package/src/audit/checks/color-contrast.ts +342 -0
- package/src/audit/checks/content-freshness.ts +170 -0
- package/src/audit/checks/content-science.ts +589 -0
- package/src/audit/checks/conversion-elements.ts +526 -0
- package/src/audit/checks/crawlability.ts +220 -0
- package/src/audit/checks/directory-listing.ts +172 -0
- package/src/audit/checks/dom-analysis.ts +191 -0
- package/src/audit/checks/dom-size.ts +246 -0
- package/src/audit/checks/duplicate-content.ts +194 -0
- package/src/audit/checks/eeat-signals.ts +990 -0
- package/src/audit/checks/entity-seo.ts +396 -0
- package/src/audit/checks/featured-snippet.ts +473 -0
- package/src/audit/checks/freshness-signals.ts +443 -0
- package/src/audit/checks/funnel-intent.ts +463 -0
- package/src/audit/checks/hreflang.ts +174 -0
- package/src/audit/checks/html-compliance.ts +302 -0
- package/src/audit/checks/image-dimensions.ts +167 -0
- package/src/audit/checks/images.ts +160 -0
- package/src/audit/checks/indexnow.ts +275 -0
- package/src/audit/checks/interactive-tools.ts +475 -0
- package/src/audit/checks/internal-link-graph.ts +436 -0
- package/src/audit/checks/keyword-analysis.ts +239 -0
- package/src/audit/checks/keyword-cannibalization.ts +385 -0
- package/src/audit/checks/keyword-placement.ts +471 -0
- package/src/audit/checks/links.ts +203 -0
- package/src/audit/checks/llms-txt.ts +224 -0
- package/src/audit/checks/local-seo.ts +296 -0
- package/src/audit/checks/mobile.ts +167 -0
- package/src/audit/checks/modern-images.ts +226 -0
- package/src/audit/checks/navboost-signals.ts +395 -0
- package/src/audit/checks/on-page.ts +209 -0
- package/src/audit/checks/page-resources.ts +285 -0
- package/src/audit/checks/pagination.ts +180 -0
- package/src/audit/checks/performance.ts +153 -0
- package/src/audit/checks/platform-presence.ts +580 -0
- package/src/audit/checks/redirect-analysis.ts +153 -0
- package/src/audit/checks/redirect-chain.ts +389 -0
- package/src/audit/checks/resource-hints.ts +420 -0
- package/src/audit/checks/responsive-css.ts +247 -0
- package/src/audit/checks/responsive-images.ts +396 -0
- package/src/audit/checks/review-ecosystem.ts +415 -0
- package/src/audit/checks/robots-validation.ts +373 -0
- package/src/audit/checks/security-headers.ts +172 -0
- package/src/audit/checks/security.ts +144 -0
- package/src/audit/checks/serp-preview.ts +251 -0
- package/src/audit/checks/site-maturity.ts +444 -0
- package/src/audit/checks/social-meta.test.ts +275 -0
- package/src/audit/checks/social-meta.ts +134 -0
- package/src/audit/checks/soft-404.ts +151 -0
- package/src/audit/checks/structured-data.ts +238 -0
- package/src/audit/checks/tech-detection.ts +496 -0
- package/src/audit/checks/topical-clusters.ts +435 -0
- package/src/audit/checks/tracker-bloat.ts +462 -0
- package/src/audit/checks/tracking-verification.test.ts +371 -0
- package/src/audit/checks/tracking-verification.ts +636 -0
- package/src/audit/checks/url-safety.ts +682 -0
- package/src/audit/deno-entry.ts +66 -0
- package/src/audit/discovery/index.ts +15 -0
- package/src/audit/discovery/link-crawler.ts +232 -0
- package/src/audit/discovery/repo-routes.ts +347 -0
- package/src/audit/engine.ts +620 -0
- package/src/audit/fixes/index.ts +209 -0
- package/src/audit/fixes/social-meta-fixes.test.ts +329 -0
- package/src/audit/fixes/social-meta-fixes.ts +463 -0
- package/src/audit/index.ts +74 -0
- package/src/audit/runner.test.ts +299 -0
- package/src/audit/runner.ts +130 -0
- package/src/audit/types.ts +1953 -0
- package/src/content/featured-snippet.ts +367 -0
- package/src/content/generator.test.ts +534 -0
- package/src/content/generator.ts +501 -0
- package/src/content/headline.ts +317 -0
- package/src/content/index.ts +62 -0
- package/src/content/intent.ts +258 -0
- package/src/content/keyword-density.ts +349 -0
- package/src/content/readability.ts +262 -0
- package/src/executor.ts +336 -0
- package/src/fixer.ts +416 -0
- package/src/frameworks/detector.test.ts +248 -0
- package/src/frameworks/detector.ts +371 -0
- package/src/frameworks/index.ts +68 -0
- package/src/frameworks/recipes/angular.yaml +171 -0
- package/src/frameworks/recipes/astro.yaml +206 -0
- package/src/frameworks/recipes/django.yaml +180 -0
- package/src/frameworks/recipes/laravel.yaml +137 -0
- package/src/frameworks/recipes/nextjs.yaml +268 -0
- package/src/frameworks/recipes/nuxt.yaml +175 -0
- package/src/frameworks/recipes/rails.yaml +188 -0
- package/src/frameworks/recipes/react.yaml +202 -0
- package/src/frameworks/recipes/sveltekit.yaml +154 -0
- package/src/frameworks/recipes/vue.yaml +137 -0
- package/src/frameworks/recipes/wordpress.yaml +209 -0
- package/src/frameworks/suggestion-engine.ts +320 -0
- package/src/geo/geo-content.test.ts +305 -0
- package/src/geo/geo-content.ts +266 -0
- package/src/geo/geo-history.test.ts +473 -0
- package/src/geo/geo-history.ts +433 -0
- package/src/geo/geo-tracker.test.ts +359 -0
- package/src/geo/geo-tracker.ts +411 -0
- package/src/geo/index.ts +10 -0
- package/src/git/commit-helper.test.ts +261 -0
- package/src/git/commit-helper.ts +329 -0
- package/src/git/index.ts +12 -0
- package/src/git/pr-helper.test.ts +284 -0
- package/src/git/pr-helper.ts +307 -0
- package/src/index.ts +66 -0
- package/src/keywords/ai-keyword-engine.ts +1062 -0
- package/src/keywords/ai-summarizer.ts +387 -0
- package/src/keywords/ci-mode.ts +555 -0
- package/src/keywords/engine.ts +359 -0
- package/src/keywords/index.ts +151 -0
- package/src/keywords/llm-judge.ts +357 -0
- package/src/keywords/nlp-analysis.ts +706 -0
- package/src/keywords/prioritizer.ts +295 -0
- package/src/keywords/site-crawler.ts +342 -0
- package/src/keywords/sources/autocomplete.ts +139 -0
- package/src/keywords/sources/competitive-search.ts +450 -0
- package/src/keywords/sources/competitor-analysis.ts +374 -0
- package/src/keywords/sources/dataforseo.ts +206 -0
- package/src/keywords/sources/free-sources.ts +294 -0
- package/src/keywords/sources/gsc.ts +123 -0
- package/src/keywords/topic-grouping.ts +327 -0
- package/src/keywords/types.ts +144 -0
- package/src/keywords/wizard.ts +457 -0
- package/src/loader.ts +40 -0
- package/src/reports/index.ts +7 -0
- package/src/reports/report-generator.test.ts +293 -0
- package/src/reports/report-generator.ts +713 -0
- package/src/scheduler/alerts.test.ts +458 -0
- package/src/scheduler/alerts.ts +328 -0
- package/src/scheduler/index.ts +8 -0
- package/src/scheduler/scheduled-audit.test.ts +377 -0
- package/src/scheduler/scheduled-audit.ts +149 -0
- package/src/test/integration-test.ts +325 -0
- package/src/tools/analyzer.ts +373 -0
- package/src/tools/crawl.ts +293 -0
- package/src/tools/files.ts +301 -0
- package/src/tools/h1-fixer.ts +249 -0
- package/src/tools/index.ts +67 -0
- package/src/tracking/github-action.ts +326 -0
- package/src/tracking/google-analytics.ts +265 -0
- package/src/tracking/index.ts +45 -0
- package/src/tracking/report-generator.ts +386 -0
- package/src/tracking/search-console.ts +335 -0
- package/src/types.ts +134 -0
- package/src/utils/http.ts +302 -0
- package/src/wasm-adapter.ts +297 -0
- package/src/wasm-entry.ts +14 -0
- package/tsconfig.json +17 -0
- package/tsup.wasm.config.ts +26 -0
- package/vitest.config.ts +15 -0
|
@@ -0,0 +1,436 @@
|
|
|
1
|
+
// Internal Link Graph Analysis
|
|
2
|
+
// Advanced internal linking analysis: hub/authority detection, link depth, PageRank sculpting
|
|
3
|
+
// Based on advanced SEO research
|
|
4
|
+
|
|
5
|
+
import * as cheerio from 'cheerio';
|
|
6
|
+
import type { AuditIssue } from '../types.js';
|
|
7
|
+
|
|
8
|
+
export interface InternalLink {
|
|
9
|
+
source: string;
|
|
10
|
+
target: string;
|
|
11
|
+
anchor: string;
|
|
12
|
+
context: 'navigation' | 'content' | 'footer' | 'sidebar';
|
|
13
|
+
isEditorial: boolean;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface PageLinkStats {
|
|
17
|
+
url: string;
|
|
18
|
+
inboundLinks: number;
|
|
19
|
+
outboundLinks: number;
|
|
20
|
+
hubScore: number; // Pages that link to many others
|
|
21
|
+
authorityScore: number; // Pages that receive many links
|
|
22
|
+
depth: number; // Clicks from homepage
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export interface InternalLinkGraphData {
|
|
26
|
+
totalInternalLinks: number;
|
|
27
|
+
orphanPages: string[];
|
|
28
|
+
deepPages: string[]; // More than 3 clicks deep
|
|
29
|
+
hubPages: string[];
|
|
30
|
+
authorityPages: string[];
|
|
31
|
+
linkDistribution: { url: string; links: number }[];
|
|
32
|
+
firstLinkAnchors: Map<string, string>;
|
|
33
|
+
contextualLinkRatio: number;
|
|
34
|
+
navigationVsContentRatio: number;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Extract internal links from HTML with context analysis
|
|
39
|
+
*/
|
|
40
|
+
export function extractInternalLinks(html: string, pageUrl: string): InternalLink[] {
|
|
41
|
+
const $ = cheerio.load(html);
|
|
42
|
+
const links: InternalLink[] = [];
|
|
43
|
+
const baseUrl = new URL(pageUrl);
|
|
44
|
+
|
|
45
|
+
$('a[href]').each((_, el) => {
|
|
46
|
+
const $el = $(el);
|
|
47
|
+
const href = $el.attr('href') || '';
|
|
48
|
+
const anchor = $el.text().trim();
|
|
49
|
+
|
|
50
|
+
// Skip empty, javascript:, and fragment links
|
|
51
|
+
if (!href || href.startsWith('javascript:') || href.startsWith('#')) {
|
|
52
|
+
return;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Resolve relative URLs
|
|
56
|
+
let targetUrl: URL;
|
|
57
|
+
try {
|
|
58
|
+
targetUrl = new URL(href, pageUrl);
|
|
59
|
+
} catch {
|
|
60
|
+
return; // Invalid URL
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Check if internal link
|
|
64
|
+
if (targetUrl.hostname !== baseUrl.hostname) {
|
|
65
|
+
return; // External link
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Determine link context
|
|
69
|
+
let context: 'navigation' | 'content' | 'footer' | 'sidebar' = 'content';
|
|
70
|
+
|
|
71
|
+
const parents = $el.parents();
|
|
72
|
+
for (let i = 0; i < parents.length; i++) {
|
|
73
|
+
const parent = parents.eq(i);
|
|
74
|
+
const tagName = parent.prop('tagName')?.toLowerCase() || '';
|
|
75
|
+
const className = parent.attr('class')?.toLowerCase() || '';
|
|
76
|
+
const id = parent.attr('id')?.toLowerCase() || '';
|
|
77
|
+
|
|
78
|
+
if (
|
|
79
|
+
tagName === 'nav' ||
|
|
80
|
+
className.includes('nav') ||
|
|
81
|
+
className.includes('menu') ||
|
|
82
|
+
id.includes('nav') ||
|
|
83
|
+
id.includes('menu')
|
|
84
|
+
) {
|
|
85
|
+
context = 'navigation';
|
|
86
|
+
break;
|
|
87
|
+
}
|
|
88
|
+
if (
|
|
89
|
+
tagName === 'footer' ||
|
|
90
|
+
className.includes('footer') ||
|
|
91
|
+
id.includes('footer')
|
|
92
|
+
) {
|
|
93
|
+
context = 'footer';
|
|
94
|
+
break;
|
|
95
|
+
}
|
|
96
|
+
if (
|
|
97
|
+
tagName === 'aside' ||
|
|
98
|
+
className.includes('sidebar') ||
|
|
99
|
+
className.includes('widget') ||
|
|
100
|
+
id.includes('sidebar')
|
|
101
|
+
) {
|
|
102
|
+
context = 'sidebar';
|
|
103
|
+
break;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Determine if editorial (appears to be within content with surrounding text)
|
|
108
|
+
let isEditorial = context === 'content';
|
|
109
|
+
if (isEditorial) {
|
|
110
|
+
const parent = $el.parent();
|
|
111
|
+
const parentText = parent.text().trim();
|
|
112
|
+
// Editorial links usually have surrounding text beyond just the anchor
|
|
113
|
+
isEditorial = parentText.length > anchor.length + 10;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
links.push({
|
|
117
|
+
source: pageUrl,
|
|
118
|
+
target: targetUrl.href.split('#')[0], // Remove fragment
|
|
119
|
+
anchor,
|
|
120
|
+
context,
|
|
121
|
+
isEditorial,
|
|
122
|
+
});
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
return links;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Analyze link depth from homepage
|
|
130
|
+
*/
|
|
131
|
+
export function calculateLinkDepth(
|
|
132
|
+
links: InternalLink[],
|
|
133
|
+
homepageUrl: string
|
|
134
|
+
): Map<string, number> {
|
|
135
|
+
const depths = new Map<string, number>();
|
|
136
|
+
depths.set(homepageUrl, 0);
|
|
137
|
+
|
|
138
|
+
// BFS to calculate depths
|
|
139
|
+
const queue = [homepageUrl];
|
|
140
|
+
const visited = new Set([homepageUrl]);
|
|
141
|
+
|
|
142
|
+
while (queue.length > 0) {
|
|
143
|
+
const current = queue.shift()!;
|
|
144
|
+
const currentDepth = depths.get(current) || 0;
|
|
145
|
+
|
|
146
|
+
// Find all links from current page
|
|
147
|
+
const outbound = links.filter((l) => l.source === current);
|
|
148
|
+
|
|
149
|
+
for (const link of outbound) {
|
|
150
|
+
if (!visited.has(link.target)) {
|
|
151
|
+
visited.add(link.target);
|
|
152
|
+
depths.set(link.target, currentDepth + 1);
|
|
153
|
+
queue.push(link.target);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
return depths;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Calculate simplified hub/authority scores (similar to HITS algorithm)
|
|
163
|
+
*/
|
|
164
|
+
export function calculateHubAuthority(
|
|
165
|
+
links: InternalLink[],
|
|
166
|
+
iterations: number = 10
|
|
167
|
+
): { hubs: Map<string, number>; authorities: Map<string, number> } {
|
|
168
|
+
// Get all unique URLs
|
|
169
|
+
const urls = new Set<string>();
|
|
170
|
+
for (const link of links) {
|
|
171
|
+
urls.add(link.source);
|
|
172
|
+
urls.add(link.target);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// Initialize scores
|
|
176
|
+
const hubs = new Map<string, number>();
|
|
177
|
+
const authorities = new Map<string, number>();
|
|
178
|
+
|
|
179
|
+
for (const url of urls) {
|
|
180
|
+
hubs.set(url, 1);
|
|
181
|
+
authorities.set(url, 1);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// Build adjacency lists
|
|
185
|
+
const outbound = new Map<string, string[]>();
|
|
186
|
+
const inbound = new Map<string, string[]>();
|
|
187
|
+
|
|
188
|
+
for (const link of links) {
|
|
189
|
+
const out = outbound.get(link.source) || [];
|
|
190
|
+
out.push(link.target);
|
|
191
|
+
outbound.set(link.source, out);
|
|
192
|
+
|
|
193
|
+
const inb = inbound.get(link.target) || [];
|
|
194
|
+
inb.push(link.source);
|
|
195
|
+
inbound.set(link.target, inb);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// Iterative calculation
|
|
199
|
+
for (let i = 0; i < iterations; i++) {
|
|
200
|
+
// Update authority scores (sum of hub scores of pages linking to it)
|
|
201
|
+
const newAuthorities = new Map<string, number>();
|
|
202
|
+
for (const url of urls) {
|
|
203
|
+
const sources = inbound.get(url) || [];
|
|
204
|
+
let score = 0;
|
|
205
|
+
for (const source of sources) {
|
|
206
|
+
score += hubs.get(source) || 0;
|
|
207
|
+
}
|
|
208
|
+
newAuthorities.set(url, score);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// Update hub scores (sum of authority scores of pages it links to)
|
|
212
|
+
const newHubs = new Map<string, number>();
|
|
213
|
+
for (const url of urls) {
|
|
214
|
+
const targets = outbound.get(url) || [];
|
|
215
|
+
let score = 0;
|
|
216
|
+
for (const target of targets) {
|
|
217
|
+
score += newAuthorities.get(target) || 0;
|
|
218
|
+
}
|
|
219
|
+
newHubs.set(url, score);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// Normalize
|
|
223
|
+
const maxAuth = Math.max(...newAuthorities.values(), 1);
|
|
224
|
+
const maxHub = Math.max(...newHubs.values(), 1);
|
|
225
|
+
|
|
226
|
+
for (const url of urls) {
|
|
227
|
+
authorities.set(url, (newAuthorities.get(url) || 0) / maxAuth);
|
|
228
|
+
hubs.set(url, (newHubs.get(url) || 0) / maxHub);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
return { hubs, authorities };
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Detect orphan pages (no internal links pointing to them)
|
|
237
|
+
*/
|
|
238
|
+
export function detectOrphanPages(
|
|
239
|
+
links: InternalLink[],
|
|
240
|
+
knownUrls: string[]
|
|
241
|
+
): string[] {
|
|
242
|
+
const linkedUrls = new Set(links.map((l) => l.target));
|
|
243
|
+
return knownUrls.filter((url) => !linkedUrls.has(url));
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* Analyze first link to each page (first link priority)
|
|
248
|
+
*/
|
|
249
|
+
export function analyzeFirstLinkPriority(
|
|
250
|
+
links: InternalLink[]
|
|
251
|
+
): Map<string, { anchor: string; context: string }> {
|
|
252
|
+
const firstLinks = new Map<string, { anchor: string; context: string }>();
|
|
253
|
+
|
|
254
|
+
for (const link of links) {
|
|
255
|
+
if (!firstLinks.has(link.target)) {
|
|
256
|
+
firstLinks.set(link.target, {
|
|
257
|
+
anchor: link.anchor,
|
|
258
|
+
context: link.context,
|
|
259
|
+
});
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
return firstLinks;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
/**
|
|
267
|
+
* Check for link hoarding (pages with few outbound internal links)
|
|
268
|
+
*/
|
|
269
|
+
export function detectLinkHoarding(
|
|
270
|
+
links: InternalLink[],
|
|
271
|
+
threshold: number = 3
|
|
272
|
+
): string[] {
|
|
273
|
+
const outboundCount = new Map<string, number>();
|
|
274
|
+
|
|
275
|
+
for (const link of links) {
|
|
276
|
+
outboundCount.set(link.source, (outboundCount.get(link.source) || 0) + 1);
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
// Find pages with very few outbound links
|
|
280
|
+
return [...outboundCount.entries()]
|
|
281
|
+
.filter(([_, count]) => count < threshold)
|
|
282
|
+
.map(([url]) => url);
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
/**
|
|
286
|
+
* Analyze PageRank sinks (pages that receive links but don't link out)
|
|
287
|
+
*/
|
|
288
|
+
export function detectPageRankSinks(links: InternalLink[]): string[] {
|
|
289
|
+
const outboundPages = new Set(links.map((l) => l.source));
|
|
290
|
+
const inboundPages = new Set(links.map((l) => l.target));
|
|
291
|
+
|
|
292
|
+
// Sinks: pages that receive links but don't link out
|
|
293
|
+
return [...inboundPages].filter((url) => !outboundPages.has(url));
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
/**
|
|
297
|
+
* Main function: Analyze internal link structure
|
|
298
|
+
*/
|
|
299
|
+
export function analyzeInternalLinkGraph(
|
|
300
|
+
html: string,
|
|
301
|
+
url: string
|
|
302
|
+
): { issues: AuditIssue[]; data: Partial<InternalLinkGraphData> } {
|
|
303
|
+
const issues: AuditIssue[] = [];
|
|
304
|
+
const links = extractInternalLinks(html, url);
|
|
305
|
+
|
|
306
|
+
// Count link types
|
|
307
|
+
const navigationLinks = links.filter((l) => l.context === 'navigation').length;
|
|
308
|
+
const contentLinks = links.filter((l) => l.context === 'content').length;
|
|
309
|
+
const footerLinks = links.filter((l) => l.context === 'footer').length;
|
|
310
|
+
const editorialLinks = links.filter((l) => l.isEditorial).length;
|
|
311
|
+
|
|
312
|
+
const totalLinks = links.length;
|
|
313
|
+
const contextualLinkRatio = totalLinks > 0 ? contentLinks / totalLinks : 0;
|
|
314
|
+
const navigationVsContentRatio = contentLinks > 0 ? navigationLinks / contentLinks : 0;
|
|
315
|
+
|
|
316
|
+
// Check for issues on single page
|
|
317
|
+
if (contentLinks === 0 && totalLinks > 0) {
|
|
318
|
+
issues.push({
|
|
319
|
+
code: 'NO_CONTEXTUAL_INTERNAL_LINKS',
|
|
320
|
+
severity: 'warning',
|
|
321
|
+
category: 'links',
|
|
322
|
+
title: 'No contextual internal links',
|
|
323
|
+
description: 'All internal links are in navigation/footer. No editorial links in content.',
|
|
324
|
+
impact: 'Editorial links in content pass more SEO value than navigational links.',
|
|
325
|
+
howToFix: 'Add relevant internal links within your main content to related pages.',
|
|
326
|
+
affectedUrls: [url],
|
|
327
|
+
details: { navigationLinks, contentLinks, footerLinks },
|
|
328
|
+
});
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
if (editorialLinks < 2 && contentLinks > 0) {
|
|
332
|
+
issues.push({
|
|
333
|
+
code: 'LOW_EDITORIAL_LINKS',
|
|
334
|
+
severity: 'notice',
|
|
335
|
+
category: 'links',
|
|
336
|
+
title: 'Few editorial internal links',
|
|
337
|
+
description: `Only ${editorialLinks} editorial (in-content) internal links found.`,
|
|
338
|
+
impact: 'Editorial links with surrounding context provide stronger topical signals.',
|
|
339
|
+
howToFix: 'Add 2-5 relevant internal links within your body content.',
|
|
340
|
+
affectedUrls: [url],
|
|
341
|
+
});
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
// Analyze first link anchors
|
|
345
|
+
const firstLinks = analyzeFirstLinkPriority(links);
|
|
346
|
+
|
|
347
|
+
// Check for generic anchor texts in first links
|
|
348
|
+
const genericAnchors = ['click here', 'read more', 'learn more', 'here', 'this'];
|
|
349
|
+
for (const [target, linkInfo] of firstLinks) {
|
|
350
|
+
if (genericAnchors.some((g) => linkInfo.anchor.toLowerCase().includes(g))) {
|
|
351
|
+
issues.push({
|
|
352
|
+
code: 'FIRST_LINK_GENERIC_ANCHOR',
|
|
353
|
+
severity: 'notice',
|
|
354
|
+
category: 'links',
|
|
355
|
+
title: 'First link uses generic anchor text',
|
|
356
|
+
description: `First link to ${target} uses generic anchor "${linkInfo.anchor}".`,
|
|
357
|
+
impact: 'Google may prioritize first link anchor for topic signals.',
|
|
358
|
+
howToFix: 'Use descriptive, keyword-relevant anchor text for internal links.',
|
|
359
|
+
affectedUrls: [url],
|
|
360
|
+
details: { target, anchor: linkInfo.anchor },
|
|
361
|
+
});
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
// Check for link distribution (too many links)
|
|
366
|
+
if (totalLinks > 100) {
|
|
367
|
+
issues.push({
|
|
368
|
+
code: 'TOO_MANY_INTERNAL_LINKS',
|
|
369
|
+
severity: 'warning',
|
|
370
|
+
category: 'links',
|
|
371
|
+
title: 'Excessive internal links',
|
|
372
|
+
description: `Page has ${totalLinks} internal links, which may dilute PageRank distribution.`,
|
|
373
|
+
impact: 'Too many links reduce the value passed to each linked page.',
|
|
374
|
+
howToFix: 'Reduce internal links to the most important and relevant pages.',
|
|
375
|
+
affectedUrls: [url],
|
|
376
|
+
details: { totalLinks, navigationLinks, contentLinks, footerLinks },
|
|
377
|
+
});
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
// Check for no internal links at all
|
|
381
|
+
if (totalLinks === 0) {
|
|
382
|
+
issues.push({
|
|
383
|
+
code: 'NO_INTERNAL_LINKS',
|
|
384
|
+
severity: 'error',
|
|
385
|
+
category: 'links',
|
|
386
|
+
title: 'No internal links found',
|
|
387
|
+
description: 'Page has no internal links to other pages on the site.',
|
|
388
|
+
impact: 'Creates a dead end for users and search engine crawlers.',
|
|
389
|
+
howToFix: 'Add relevant internal links to related content.',
|
|
390
|
+
affectedUrls: [url],
|
|
391
|
+
});
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
return {
|
|
395
|
+
issues,
|
|
396
|
+
data: {
|
|
397
|
+
totalInternalLinks: totalLinks,
|
|
398
|
+
contextualLinkRatio,
|
|
399
|
+
navigationVsContentRatio,
|
|
400
|
+
firstLinkAnchors: new Map(
|
|
401
|
+
[...firstLinks.entries()].map(([k, v]) => [k, v.anchor])
|
|
402
|
+
),
|
|
403
|
+
linkDistribution: [{ url, links: totalLinks }],
|
|
404
|
+
},
|
|
405
|
+
};
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
/**
|
|
409
|
+
* Multi-page analysis for topic clusters and pillar pages
|
|
410
|
+
*/
|
|
411
|
+
export function analyzeTopicClusters(
|
|
412
|
+
links: InternalLink[]
|
|
413
|
+
): { pillarPages: string[]; clusters: Map<string, string[]> } {
|
|
414
|
+
// Calculate hub/authority scores
|
|
415
|
+
const { hubs, authorities } = calculateHubAuthority(links);
|
|
416
|
+
|
|
417
|
+
// Pillar pages have high hub scores (link to many cluster pages)
|
|
418
|
+
const pillarThreshold = 0.7;
|
|
419
|
+
const pillarPages = [...hubs.entries()]
|
|
420
|
+
.filter(([_, score]) => score >= pillarThreshold)
|
|
421
|
+
.map(([url]) => url);
|
|
422
|
+
|
|
423
|
+
// Group pages by their primary pillar (page they link to most with highest authority)
|
|
424
|
+
const clusters = new Map<string, string[]>();
|
|
425
|
+
|
|
426
|
+
for (const pillar of pillarPages) {
|
|
427
|
+
const clusterPages = links
|
|
428
|
+
.filter((l) => l.target === pillar || l.source === pillar)
|
|
429
|
+
.map((l) => (l.source === pillar ? l.target : l.source))
|
|
430
|
+
.filter((url) => !pillarPages.includes(url));
|
|
431
|
+
|
|
432
|
+
clusters.set(pillar, [...new Set(clusterPages)]);
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
return { pillarPages, clusters };
|
|
436
|
+
}
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
// Keyword Density Analysis
|
|
2
|
+
// Analyzes keyword usage and distribution across page elements
|
|
3
|
+
|
|
4
|
+
import * as cheerio from 'cheerio';
|
|
5
|
+
import type { AuditIssue } from '../types.js';
|
|
6
|
+
import { ISSUE_DEFINITIONS } from '../types.js';
|
|
7
|
+
|
|
8
|
+
// Common English stop words to filter out
|
|
9
|
+
const STOP_WORDS = new Set([
|
|
10
|
+
'a', 'an', 'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
|
|
11
|
+
'by', 'from', 'as', 'is', 'was', 'are', 'were', 'been', 'be', 'have', 'has', 'had',
|
|
12
|
+
'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must',
|
|
13
|
+
'can', 'this', 'that', 'these', 'those', 'it', 'its', 'they', 'them', 'their',
|
|
14
|
+
'we', 'us', 'our', 'you', 'your', 'i', 'me', 'my', 'he', 'she', 'him', 'her', 'his',
|
|
15
|
+
'not', 'no', 'nor', 'so', 'if', 'then', 'else', 'when', 'where', 'why', 'how', 'what',
|
|
16
|
+
'who', 'which', 'all', 'each', 'every', 'both', 'few', 'more', 'most', 'other', 'some',
|
|
17
|
+
'such', 'only', 'own', 'same', 'than', 'too', 'very', 'just', 'also', 'now', 'here',
|
|
18
|
+
'there', 'about', 'after', 'before', 'above', 'below', 'between', 'into', 'through',
|
|
19
|
+
'during', 'under', 'again', 'further', 'once', 'any', 'being', 'because', 'while',
|
|
20
|
+
]);
|
|
21
|
+
|
|
22
|
+
export interface KeywordInfo {
|
|
23
|
+
word: string;
|
|
24
|
+
count: number;
|
|
25
|
+
density: number; // Percentage
|
|
26
|
+
inTitle: boolean;
|
|
27
|
+
inH1: boolean;
|
|
28
|
+
inH2: boolean;
|
|
29
|
+
inMetaDesc: boolean;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export interface KeywordPhraseInfo {
|
|
33
|
+
phrase: string;
|
|
34
|
+
count: number;
|
|
35
|
+
wordCount: number;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export interface KeywordAnalysisData {
|
|
39
|
+
totalWords: number;
|
|
40
|
+
uniqueWords: number;
|
|
41
|
+
topKeywords: KeywordInfo[];
|
|
42
|
+
topPhrases: {
|
|
43
|
+
twoWord: KeywordPhraseInfo[];
|
|
44
|
+
threeWord: KeywordPhraseInfo[];
|
|
45
|
+
fourWord: KeywordPhraseInfo[];
|
|
46
|
+
};
|
|
47
|
+
keywordDistribution: {
|
|
48
|
+
title: string[];
|
|
49
|
+
h1: string[];
|
|
50
|
+
h2: string[];
|
|
51
|
+
metaDesc: string[];
|
|
52
|
+
};
|
|
53
|
+
potentialStuffing: string[];
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Extract text content from an element
|
|
58
|
+
*/
|
|
59
|
+
function extractText($: cheerio.CheerioAPI, selector: string): string {
|
|
60
|
+
return $(selector)
|
|
61
|
+
.text()
|
|
62
|
+
.toLowerCase()
|
|
63
|
+
.replace(/[^\w\s]/g, ' ')
|
|
64
|
+
.replace(/\s+/g, ' ')
|
|
65
|
+
.trim();
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Tokenize text into words
|
|
70
|
+
*/
|
|
71
|
+
function tokenize(text: string): string[] {
|
|
72
|
+
return text
|
|
73
|
+
.toLowerCase()
|
|
74
|
+
.replace(/[^\w\s]/g, ' ')
|
|
75
|
+
.split(/\s+/)
|
|
76
|
+
.filter((word) => word.length > 2 && !STOP_WORDS.has(word) && !/^\d+$/.test(word));
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Count word frequencies
|
|
81
|
+
*/
|
|
82
|
+
function countWords(words: string[]): Map<string, number> {
|
|
83
|
+
const counts = new Map<string, number>();
|
|
84
|
+
for (const word of words) {
|
|
85
|
+
counts.set(word, (counts.get(word) || 0) + 1);
|
|
86
|
+
}
|
|
87
|
+
return counts;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Extract n-grams (phrases)
|
|
92
|
+
*/
|
|
93
|
+
function extractNGrams(words: string[], n: number): Map<string, number> {
|
|
94
|
+
const ngrams = new Map<string, number>();
|
|
95
|
+
|
|
96
|
+
for (let i = 0; i <= words.length - n; i++) {
|
|
97
|
+
const ngram = words.slice(i, i + n).join(' ');
|
|
98
|
+
// Skip if any word is a stop word
|
|
99
|
+
const ngramWords = ngram.split(' ');
|
|
100
|
+
if (ngramWords.some((w) => STOP_WORDS.has(w))) continue;
|
|
101
|
+
|
|
102
|
+
ngrams.set(ngram, (ngrams.get(ngram) || 0) + 1);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
return ngrams;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Analyze keyword density and distribution
|
|
110
|
+
*/
|
|
111
|
+
export function analyzeKeywords(html: string, url: string): { issues: AuditIssue[]; data: KeywordAnalysisData } {
|
|
112
|
+
const issues: AuditIssue[] = [];
|
|
113
|
+
const $ = cheerio.load(html);
|
|
114
|
+
|
|
115
|
+
// Extract text from different elements
|
|
116
|
+
const title = extractText($, 'title');
|
|
117
|
+
const h1 = extractText($, 'h1');
|
|
118
|
+
const h2 = extractText($, 'h2');
|
|
119
|
+
const metaDesc = $('meta[name="description"]').attr('content')?.toLowerCase() || '';
|
|
120
|
+
|
|
121
|
+
// Remove script and style content
|
|
122
|
+
$('script, style, noscript').remove();
|
|
123
|
+
const bodyText = extractText($, 'body');
|
|
124
|
+
|
|
125
|
+
// Tokenize
|
|
126
|
+
const bodyWords = tokenize(bodyText);
|
|
127
|
+
const titleWords = tokenize(title);
|
|
128
|
+
const h1Words = tokenize(h1);
|
|
129
|
+
const h2Words = tokenize(h2);
|
|
130
|
+
const metaDescWords = tokenize(metaDesc);
|
|
131
|
+
|
|
132
|
+
// Count words
|
|
133
|
+
const wordCounts = countWords(bodyWords);
|
|
134
|
+
const totalWords = bodyWords.length;
|
|
135
|
+
|
|
136
|
+
// Calculate keyword density
|
|
137
|
+
const topKeywordsMap: KeywordInfo[] = [];
|
|
138
|
+
|
|
139
|
+
for (const [word, count] of wordCounts) {
|
|
140
|
+
const density = (count / totalWords) * 100;
|
|
141
|
+
topKeywordsMap.push({
|
|
142
|
+
word,
|
|
143
|
+
count,
|
|
144
|
+
density: Math.round(density * 100) / 100,
|
|
145
|
+
inTitle: titleWords.includes(word),
|
|
146
|
+
inH1: h1Words.includes(word),
|
|
147
|
+
inH2: h2Words.includes(word),
|
|
148
|
+
inMetaDesc: metaDescWords.includes(word),
|
|
149
|
+
});
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Sort by count and take top 20
|
|
153
|
+
topKeywordsMap.sort((a, b) => b.count - a.count);
|
|
154
|
+
const topKeywords = topKeywordsMap.slice(0, 20);
|
|
155
|
+
|
|
156
|
+
// Extract n-grams
|
|
157
|
+
const twoWordPhrases = extractNGrams(bodyWords, 2);
|
|
158
|
+
const threeWordPhrases = extractNGrams(bodyWords, 3);
|
|
159
|
+
const fourWordPhrases = extractNGrams(bodyWords, 4);
|
|
160
|
+
|
|
161
|
+
// Convert to sorted arrays
|
|
162
|
+
const sortPhrases = (phrases: Map<string, number>, n: number): KeywordPhraseInfo[] => {
|
|
163
|
+
return Array.from(phrases)
|
|
164
|
+
.filter(([_, count]) => count >= 2)
|
|
165
|
+
.sort((a, b) => b[1] - a[1])
|
|
166
|
+
.slice(0, 10)
|
|
167
|
+
.map(([phrase, count]) => ({ phrase, count, wordCount: n }));
|
|
168
|
+
};
|
|
169
|
+
|
|
170
|
+
// Check for keyword stuffing (density > 3%)
|
|
171
|
+
const potentialStuffing = topKeywords.filter((k) => k.density > 3).map((k) => k.word);
|
|
172
|
+
|
|
173
|
+
// Generate issues
|
|
174
|
+
if (potentialStuffing.length > 0) {
|
|
175
|
+
issues.push({
|
|
176
|
+
...ISSUE_DEFINITIONS.KEYWORD_STUFFING,
|
|
177
|
+
affectedUrls: [url],
|
|
178
|
+
details: {
|
|
179
|
+
keywords: potentialStuffing,
|
|
180
|
+
densities: potentialStuffing.map((k) => {
|
|
181
|
+
const info = topKeywords.find((tk) => tk.word === k);
|
|
182
|
+
return { word: k, density: info?.density };
|
|
183
|
+
}),
|
|
184
|
+
},
|
|
185
|
+
});
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// Check if top keywords are in title
|
|
189
|
+
const topBodyKeywords = topKeywords.slice(0, 5).map((k) => k.word);
|
|
190
|
+
const keywordsInTitle = topBodyKeywords.filter((k) => titleWords.includes(k));
|
|
191
|
+
|
|
192
|
+
if (keywordsInTitle.length === 0 && topBodyKeywords.length > 0) {
|
|
193
|
+
issues.push({
|
|
194
|
+
...ISSUE_DEFINITIONS.NO_KEYWORDS_IN_TITLE,
|
|
195
|
+
affectedUrls: [url],
|
|
196
|
+
details: {
|
|
197
|
+
topKeywords: topBodyKeywords,
|
|
198
|
+
title,
|
|
199
|
+
recommendation: `Consider including "${topBodyKeywords[0]}" in your title`,
|
|
200
|
+
},
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// Check if top keywords are in H1
|
|
205
|
+
const keywordsInH1 = topBodyKeywords.filter((k) => h1Words.includes(k));
|
|
206
|
+
|
|
207
|
+
if (keywordsInH1.length === 0 && topBodyKeywords.length > 0 && h1) {
|
|
208
|
+
issues.push({
|
|
209
|
+
...ISSUE_DEFINITIONS.NO_KEYWORDS_IN_H1,
|
|
210
|
+
affectedUrls: [url],
|
|
211
|
+
details: {
|
|
212
|
+
topKeywords: topBodyKeywords,
|
|
213
|
+
h1,
|
|
214
|
+
recommendation: `Consider including "${topBodyKeywords[0]}" in your H1`,
|
|
215
|
+
},
|
|
216
|
+
});
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
return {
|
|
220
|
+
issues,
|
|
221
|
+
data: {
|
|
222
|
+
totalWords,
|
|
223
|
+
uniqueWords: wordCounts.size,
|
|
224
|
+
topKeywords,
|
|
225
|
+
topPhrases: {
|
|
226
|
+
twoWord: sortPhrases(twoWordPhrases, 2),
|
|
227
|
+
threeWord: sortPhrases(threeWordPhrases, 3),
|
|
228
|
+
fourWord: sortPhrases(fourWordPhrases, 4),
|
|
229
|
+
},
|
|
230
|
+
keywordDistribution: {
|
|
231
|
+
title: titleWords,
|
|
232
|
+
h1: h1Words,
|
|
233
|
+
h2: h2Words,
|
|
234
|
+
metaDesc: metaDescWords,
|
|
235
|
+
},
|
|
236
|
+
potentialStuffing,
|
|
237
|
+
},
|
|
238
|
+
};
|
|
239
|
+
}
|