@rankcli/agent-runtime 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +242 -0
- package/dist/analyzer-2CSWIQGD.mjs +6 -0
- package/dist/chunk-YNZYHEYM.mjs +774 -0
- package/dist/index.d.mts +4012 -0
- package/dist/index.d.ts +4012 -0
- package/dist/index.js +29672 -0
- package/dist/index.mjs +28602 -0
- package/package.json +53 -0
- package/scripts/build-deno.ts +134 -0
- package/src/audit/ai/analyzer.ts +347 -0
- package/src/audit/ai/index.ts +29 -0
- package/src/audit/ai/prompts/content-analysis.ts +271 -0
- package/src/audit/ai/types.ts +179 -0
- package/src/audit/checks/additional-checks.ts +439 -0
- package/src/audit/checks/ai-citation-worthiness.ts +399 -0
- package/src/audit/checks/ai-content-structure.ts +325 -0
- package/src/audit/checks/ai-readiness.ts +339 -0
- package/src/audit/checks/anchor-text.ts +179 -0
- package/src/audit/checks/answer-conciseness.ts +322 -0
- package/src/audit/checks/asset-minification.ts +270 -0
- package/src/audit/checks/bing-optimization.ts +206 -0
- package/src/audit/checks/brand-mention-optimization.ts +349 -0
- package/src/audit/checks/caching-headers.ts +305 -0
- package/src/audit/checks/canonical-advanced.ts +150 -0
- package/src/audit/checks/canonical-domain.ts +196 -0
- package/src/audit/checks/citation-quality.ts +358 -0
- package/src/audit/checks/client-rendering.ts +542 -0
- package/src/audit/checks/color-contrast.ts +342 -0
- package/src/audit/checks/content-freshness.ts +170 -0
- package/src/audit/checks/content-science.ts +589 -0
- package/src/audit/checks/conversion-elements.ts +526 -0
- package/src/audit/checks/crawlability.ts +220 -0
- package/src/audit/checks/directory-listing.ts +172 -0
- package/src/audit/checks/dom-analysis.ts +191 -0
- package/src/audit/checks/dom-size.ts +246 -0
- package/src/audit/checks/duplicate-content.ts +194 -0
- package/src/audit/checks/eeat-signals.ts +990 -0
- package/src/audit/checks/entity-seo.ts +396 -0
- package/src/audit/checks/featured-snippet.ts +473 -0
- package/src/audit/checks/freshness-signals.ts +443 -0
- package/src/audit/checks/funnel-intent.ts +463 -0
- package/src/audit/checks/hreflang.ts +174 -0
- package/src/audit/checks/html-compliance.ts +302 -0
- package/src/audit/checks/image-dimensions.ts +167 -0
- package/src/audit/checks/images.ts +160 -0
- package/src/audit/checks/indexnow.ts +275 -0
- package/src/audit/checks/interactive-tools.ts +475 -0
- package/src/audit/checks/internal-link-graph.ts +436 -0
- package/src/audit/checks/keyword-analysis.ts +239 -0
- package/src/audit/checks/keyword-cannibalization.ts +385 -0
- package/src/audit/checks/keyword-placement.ts +471 -0
- package/src/audit/checks/links.ts +203 -0
- package/src/audit/checks/llms-txt.ts +224 -0
- package/src/audit/checks/local-seo.ts +296 -0
- package/src/audit/checks/mobile.ts +167 -0
- package/src/audit/checks/modern-images.ts +226 -0
- package/src/audit/checks/navboost-signals.ts +395 -0
- package/src/audit/checks/on-page.ts +209 -0
- package/src/audit/checks/page-resources.ts +285 -0
- package/src/audit/checks/pagination.ts +180 -0
- package/src/audit/checks/performance.ts +153 -0
- package/src/audit/checks/platform-presence.ts +580 -0
- package/src/audit/checks/redirect-analysis.ts +153 -0
- package/src/audit/checks/redirect-chain.ts +389 -0
- package/src/audit/checks/resource-hints.ts +420 -0
- package/src/audit/checks/responsive-css.ts +247 -0
- package/src/audit/checks/responsive-images.ts +396 -0
- package/src/audit/checks/review-ecosystem.ts +415 -0
- package/src/audit/checks/robots-validation.ts +373 -0
- package/src/audit/checks/security-headers.ts +172 -0
- package/src/audit/checks/security.ts +144 -0
- package/src/audit/checks/serp-preview.ts +251 -0
- package/src/audit/checks/site-maturity.ts +444 -0
- package/src/audit/checks/social-meta.test.ts +275 -0
- package/src/audit/checks/social-meta.ts +134 -0
- package/src/audit/checks/soft-404.ts +151 -0
- package/src/audit/checks/structured-data.ts +238 -0
- package/src/audit/checks/tech-detection.ts +496 -0
- package/src/audit/checks/topical-clusters.ts +435 -0
- package/src/audit/checks/tracker-bloat.ts +462 -0
- package/src/audit/checks/tracking-verification.test.ts +371 -0
- package/src/audit/checks/tracking-verification.ts +636 -0
- package/src/audit/checks/url-safety.ts +682 -0
- package/src/audit/deno-entry.ts +66 -0
- package/src/audit/discovery/index.ts +15 -0
- package/src/audit/discovery/link-crawler.ts +232 -0
- package/src/audit/discovery/repo-routes.ts +347 -0
- package/src/audit/engine.ts +620 -0
- package/src/audit/fixes/index.ts +209 -0
- package/src/audit/fixes/social-meta-fixes.test.ts +329 -0
- package/src/audit/fixes/social-meta-fixes.ts +463 -0
- package/src/audit/index.ts +74 -0
- package/src/audit/runner.test.ts +299 -0
- package/src/audit/runner.ts +130 -0
- package/src/audit/types.ts +1953 -0
- package/src/content/featured-snippet.ts +367 -0
- package/src/content/generator.test.ts +534 -0
- package/src/content/generator.ts +501 -0
- package/src/content/headline.ts +317 -0
- package/src/content/index.ts +62 -0
- package/src/content/intent.ts +258 -0
- package/src/content/keyword-density.ts +349 -0
- package/src/content/readability.ts +262 -0
- package/src/executor.ts +336 -0
- package/src/fixer.ts +416 -0
- package/src/frameworks/detector.test.ts +248 -0
- package/src/frameworks/detector.ts +371 -0
- package/src/frameworks/index.ts +68 -0
- package/src/frameworks/recipes/angular.yaml +171 -0
- package/src/frameworks/recipes/astro.yaml +206 -0
- package/src/frameworks/recipes/django.yaml +180 -0
- package/src/frameworks/recipes/laravel.yaml +137 -0
- package/src/frameworks/recipes/nextjs.yaml +268 -0
- package/src/frameworks/recipes/nuxt.yaml +175 -0
- package/src/frameworks/recipes/rails.yaml +188 -0
- package/src/frameworks/recipes/react.yaml +202 -0
- package/src/frameworks/recipes/sveltekit.yaml +154 -0
- package/src/frameworks/recipes/vue.yaml +137 -0
- package/src/frameworks/recipes/wordpress.yaml +209 -0
- package/src/frameworks/suggestion-engine.ts +320 -0
- package/src/geo/geo-content.test.ts +305 -0
- package/src/geo/geo-content.ts +266 -0
- package/src/geo/geo-history.test.ts +473 -0
- package/src/geo/geo-history.ts +433 -0
- package/src/geo/geo-tracker.test.ts +359 -0
- package/src/geo/geo-tracker.ts +411 -0
- package/src/geo/index.ts +10 -0
- package/src/git/commit-helper.test.ts +261 -0
- package/src/git/commit-helper.ts +329 -0
- package/src/git/index.ts +12 -0
- package/src/git/pr-helper.test.ts +284 -0
- package/src/git/pr-helper.ts +307 -0
- package/src/index.ts +66 -0
- package/src/keywords/ai-keyword-engine.ts +1062 -0
- package/src/keywords/ai-summarizer.ts +387 -0
- package/src/keywords/ci-mode.ts +555 -0
- package/src/keywords/engine.ts +359 -0
- package/src/keywords/index.ts +151 -0
- package/src/keywords/llm-judge.ts +357 -0
- package/src/keywords/nlp-analysis.ts +706 -0
- package/src/keywords/prioritizer.ts +295 -0
- package/src/keywords/site-crawler.ts +342 -0
- package/src/keywords/sources/autocomplete.ts +139 -0
- package/src/keywords/sources/competitive-search.ts +450 -0
- package/src/keywords/sources/competitor-analysis.ts +374 -0
- package/src/keywords/sources/dataforseo.ts +206 -0
- package/src/keywords/sources/free-sources.ts +294 -0
- package/src/keywords/sources/gsc.ts +123 -0
- package/src/keywords/topic-grouping.ts +327 -0
- package/src/keywords/types.ts +144 -0
- package/src/keywords/wizard.ts +457 -0
- package/src/loader.ts +40 -0
- package/src/reports/index.ts +7 -0
- package/src/reports/report-generator.test.ts +293 -0
- package/src/reports/report-generator.ts +713 -0
- package/src/scheduler/alerts.test.ts +458 -0
- package/src/scheduler/alerts.ts +328 -0
- package/src/scheduler/index.ts +8 -0
- package/src/scheduler/scheduled-audit.test.ts +377 -0
- package/src/scheduler/scheduled-audit.ts +149 -0
- package/src/test/integration-test.ts +325 -0
- package/src/tools/analyzer.ts +373 -0
- package/src/tools/crawl.ts +293 -0
- package/src/tools/files.ts +301 -0
- package/src/tools/h1-fixer.ts +249 -0
- package/src/tools/index.ts +67 -0
- package/src/tracking/github-action.ts +326 -0
- package/src/tracking/google-analytics.ts +265 -0
- package/src/tracking/index.ts +45 -0
- package/src/tracking/report-generator.ts +386 -0
- package/src/tracking/search-console.ts +335 -0
- package/src/types.ts +134 -0
- package/src/utils/http.ts +302 -0
- package/src/wasm-adapter.ts +297 -0
- package/src/wasm-entry.ts +14 -0
- package/tsconfig.json +17 -0
- package/tsup.wasm.config.ts +26 -0
- package/vitest.config.ts +15 -0
|
@@ -0,0 +1,471 @@
|
|
|
1
|
+
// Keyword Placement Analysis - Critical On-Page SEO Signals
|
|
2
|
+
// Reference: "4 Steps to Rank #1 in Google (2026 SEO Plan)" by Nathan Gotch
|
|
3
|
+
// "Google's leaked documents have a feature called title match score"
|
|
4
|
+
// "Include the primary keyword phrase in the first paragraph"
|
|
5
|
+
|
|
6
|
+
import * as cheerio from 'cheerio';
|
|
7
|
+
import type { AuditIssue } from '../types.js';
|
|
8
|
+
|
|
9
|
+
export interface KeywordPlacementData {
|
|
10
|
+
detectedKeywords: string[];
|
|
11
|
+
urlAnalysis: {
|
|
12
|
+
containsKeyword: boolean;
|
|
13
|
+
keywordInUrl: string | null;
|
|
14
|
+
urlSlug: string;
|
|
15
|
+
isCleanUrl: boolean;
|
|
16
|
+
hasDynamicParams: boolean;
|
|
17
|
+
};
|
|
18
|
+
titleAnalysis: {
|
|
19
|
+
hasExactMatch: boolean;
|
|
20
|
+
hasPartialMatch: boolean;
|
|
21
|
+
titleMatchScore: number; // 0-100 estimated
|
|
22
|
+
position: 'start' | 'middle' | 'end' | 'none';
|
|
23
|
+
};
|
|
24
|
+
h1Analysis: {
|
|
25
|
+
hasExactMatch: boolean;
|
|
26
|
+
hasPartialMatch: boolean;
|
|
27
|
+
h1Text: string | null;
|
|
28
|
+
};
|
|
29
|
+
firstParagraphAnalysis: {
|
|
30
|
+
hasKeyword: boolean;
|
|
31
|
+
keywordPosition: number | null; // word position
|
|
32
|
+
firstParagraphPreview: string;
|
|
33
|
+
isWithinFirst100Words: boolean;
|
|
34
|
+
};
|
|
35
|
+
metaDescriptionAnalysis: {
|
|
36
|
+
hasKeyword: boolean;
|
|
37
|
+
position: 'start' | 'middle' | 'end' | 'none';
|
|
38
|
+
};
|
|
39
|
+
overallPlacementScore: number; // 0-100
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Extract likely target keywords from the page
|
|
44
|
+
* Uses title, H1, and URL to infer the target keyword
|
|
45
|
+
*/
|
|
46
|
+
export function inferTargetKeywords(html: string, url: string): string[] {
|
|
47
|
+
const $ = cheerio.load(html);
|
|
48
|
+
const keywords: string[] = [];
|
|
49
|
+
|
|
50
|
+
// Extract from title (most reliable signal)
|
|
51
|
+
const title = $('title').text().trim();
|
|
52
|
+
if (title) {
|
|
53
|
+
// Remove brand name patterns (usually after | or -)
|
|
54
|
+
const cleanTitle = title.split(/[|\-–—]/)[0]?.trim() || title;
|
|
55
|
+
if (cleanTitle.length > 3 && cleanTitle.length < 100) {
|
|
56
|
+
keywords.push(cleanTitle.toLowerCase());
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Extract from H1
|
|
61
|
+
const h1 = $('h1').first().text().trim();
|
|
62
|
+
if (h1 && h1.length > 3 && h1.length < 100) {
|
|
63
|
+
keywords.push(h1.toLowerCase());
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Extract from URL slug
|
|
67
|
+
try {
|
|
68
|
+
const urlPath = new URL(url).pathname;
|
|
69
|
+
const slug = urlPath.split('/').filter(p => p.length > 0).pop();
|
|
70
|
+
if (slug) {
|
|
71
|
+
const cleanSlug = slug.replace(/[-_]/g, ' ').replace(/\.(html?|php|aspx?)$/i, '');
|
|
72
|
+
if (cleanSlug.length > 3) {
|
|
73
|
+
keywords.push(cleanSlug.toLowerCase());
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
} catch {
|
|
77
|
+
// Invalid URL
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Find common words across all sources to identify the core keyword
|
|
81
|
+
const wordCounts = new Map<string, number>();
|
|
82
|
+
for (const kw of keywords) {
|
|
83
|
+
const words = kw.split(/\s+/).filter(w => w.length > 2);
|
|
84
|
+
for (const word of words) {
|
|
85
|
+
wordCounts.set(word, (wordCounts.get(word) || 0) + 1);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// Sort by frequency and return top keywords
|
|
90
|
+
return [...new Set(keywords)].slice(0, 3);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Analyze URL for keyword presence
|
|
95
|
+
*/
|
|
96
|
+
export function analyzeUrlKeyword(url: string, keywords: string[]): KeywordPlacementData['urlAnalysis'] {
|
|
97
|
+
try {
|
|
98
|
+
const parsedUrl = new URL(url);
|
|
99
|
+
const urlSlug = parsedUrl.pathname.toLowerCase();
|
|
100
|
+
const slugWords = urlSlug.replace(/[-_/]/g, ' ').trim();
|
|
101
|
+
|
|
102
|
+
let containsKeyword = false;
|
|
103
|
+
let keywordInUrl: string | null = null;
|
|
104
|
+
|
|
105
|
+
for (const keyword of keywords) {
|
|
106
|
+
const kwWords = keyword.toLowerCase().split(/\s+/);
|
|
107
|
+
// Check if all significant words of the keyword are in the URL
|
|
108
|
+
const significantWords = kwWords.filter(w => w.length > 2);
|
|
109
|
+
const matchCount = significantWords.filter(w => slugWords.includes(w)).length;
|
|
110
|
+
|
|
111
|
+
if (matchCount >= Math.ceil(significantWords.length * 0.7)) {
|
|
112
|
+
containsKeyword = true;
|
|
113
|
+
keywordInUrl = keyword;
|
|
114
|
+
break;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Check for clean URL structure
|
|
119
|
+
const isCleanUrl = !urlSlug.includes('?') &&
|
|
120
|
+
!urlSlug.includes('=') &&
|
|
121
|
+
!/\d{5,}/.test(urlSlug) && // No long ID numbers
|
|
122
|
+
urlSlug.length < 100;
|
|
123
|
+
|
|
124
|
+
const hasDynamicParams = parsedUrl.search.length > 0;
|
|
125
|
+
|
|
126
|
+
return {
|
|
127
|
+
containsKeyword,
|
|
128
|
+
keywordInUrl,
|
|
129
|
+
urlSlug,
|
|
130
|
+
isCleanUrl,
|
|
131
|
+
hasDynamicParams,
|
|
132
|
+
};
|
|
133
|
+
} catch {
|
|
134
|
+
return {
|
|
135
|
+
containsKeyword: false,
|
|
136
|
+
keywordInUrl: null,
|
|
137
|
+
urlSlug: '',
|
|
138
|
+
isCleanUrl: false,
|
|
139
|
+
hasDynamicParams: false,
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Analyze title for keyword match (Title Match Score concept)
|
|
146
|
+
*/
|
|
147
|
+
export function analyzeTitleKeyword(html: string, keywords: string[]): KeywordPlacementData['titleAnalysis'] {
|
|
148
|
+
const $ = cheerio.load(html);
|
|
149
|
+
const title = $('title').text().trim().toLowerCase();
|
|
150
|
+
|
|
151
|
+
if (!title) {
|
|
152
|
+
return {
|
|
153
|
+
hasExactMatch: false,
|
|
154
|
+
hasPartialMatch: false,
|
|
155
|
+
titleMatchScore: 0,
|
|
156
|
+
position: 'none',
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
let hasExactMatch = false;
|
|
161
|
+
let hasPartialMatch = false;
|
|
162
|
+
let position: 'start' | 'middle' | 'end' | 'none' = 'none';
|
|
163
|
+
|
|
164
|
+
for (const keyword of keywords) {
|
|
165
|
+
const kwLower = keyword.toLowerCase();
|
|
166
|
+
|
|
167
|
+
// Exact match check
|
|
168
|
+
if (title.includes(kwLower)) {
|
|
169
|
+
hasExactMatch = true;
|
|
170
|
+
|
|
171
|
+
// Determine position
|
|
172
|
+
const index = title.indexOf(kwLower);
|
|
173
|
+
if (index === 0 || index < 5) {
|
|
174
|
+
position = 'start';
|
|
175
|
+
} else if (index > title.length - kwLower.length - 10) {
|
|
176
|
+
position = 'end';
|
|
177
|
+
} else {
|
|
178
|
+
position = 'middle';
|
|
179
|
+
}
|
|
180
|
+
break;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// Partial match (most words present)
|
|
184
|
+
const kwWords = kwLower.split(/\s+/).filter(w => w.length > 2);
|
|
185
|
+
const matchCount = kwWords.filter(w => title.includes(w)).length;
|
|
186
|
+
if (matchCount >= Math.ceil(kwWords.length * 0.6)) {
|
|
187
|
+
hasPartialMatch = true;
|
|
188
|
+
position = 'middle';
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// Calculate title match score
|
|
193
|
+
let titleMatchScore = 0;
|
|
194
|
+
if (hasExactMatch) {
|
|
195
|
+
titleMatchScore = position === 'start' ? 100 : position === 'middle' ? 80 : 60;
|
|
196
|
+
} else if (hasPartialMatch) {
|
|
197
|
+
titleMatchScore = 40;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
return {
|
|
201
|
+
hasExactMatch,
|
|
202
|
+
hasPartialMatch,
|
|
203
|
+
titleMatchScore,
|
|
204
|
+
position,
|
|
205
|
+
};
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Analyze H1 for keyword match
|
|
210
|
+
*/
|
|
211
|
+
export function analyzeH1Keyword(html: string, keywords: string[]): KeywordPlacementData['h1Analysis'] {
|
|
212
|
+
const $ = cheerio.load(html);
|
|
213
|
+
const h1 = $('h1').first().text().trim();
|
|
214
|
+
const h1Lower = h1.toLowerCase();
|
|
215
|
+
|
|
216
|
+
let hasExactMatch = false;
|
|
217
|
+
let hasPartialMatch = false;
|
|
218
|
+
|
|
219
|
+
for (const keyword of keywords) {
|
|
220
|
+
const kwLower = keyword.toLowerCase();
|
|
221
|
+
|
|
222
|
+
if (h1Lower.includes(kwLower)) {
|
|
223
|
+
hasExactMatch = true;
|
|
224
|
+
break;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
const kwWords = kwLower.split(/\s+/).filter(w => w.length > 2);
|
|
228
|
+
const matchCount = kwWords.filter(w => h1Lower.includes(w)).length;
|
|
229
|
+
if (matchCount >= Math.ceil(kwWords.length * 0.6)) {
|
|
230
|
+
hasPartialMatch = true;
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
return {
|
|
235
|
+
hasExactMatch,
|
|
236
|
+
hasPartialMatch,
|
|
237
|
+
h1Text: h1 || null,
|
|
238
|
+
};
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* Analyze first paragraph for keyword placement
|
|
243
|
+
*/
|
|
244
|
+
export function analyzeFirstParagraph(html: string, keywords: string[]): KeywordPlacementData['firstParagraphAnalysis'] {
|
|
245
|
+
const $ = cheerio.load(html);
|
|
246
|
+
|
|
247
|
+
// Find the first real paragraph (not in header/nav)
|
|
248
|
+
const paragraphs = $('main p, article p, .content p, #content p, body > div p').toArray();
|
|
249
|
+
let firstParagraph = '';
|
|
250
|
+
|
|
251
|
+
for (const p of paragraphs) {
|
|
252
|
+
const text = $(p).text().trim();
|
|
253
|
+
if (text.length > 50) { // Skip very short paragraphs
|
|
254
|
+
firstParagraph = text;
|
|
255
|
+
break;
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
// Fallback to first p tag
|
|
260
|
+
if (!firstParagraph) {
|
|
261
|
+
firstParagraph = $('p').first().text().trim();
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
const firstParagraphLower = firstParagraph.toLowerCase();
|
|
265
|
+
const words = firstParagraphLower.split(/\s+/);
|
|
266
|
+
|
|
267
|
+
let hasKeyword = false;
|
|
268
|
+
let keywordPosition: number | null = null;
|
|
269
|
+
|
|
270
|
+
for (const keyword of keywords) {
|
|
271
|
+
const kwLower = keyword.toLowerCase();
|
|
272
|
+
const kwWords = kwLower.split(/\s+/);
|
|
273
|
+
|
|
274
|
+
// Find position of first keyword word
|
|
275
|
+
for (let i = 0; i < words.length; i++) {
|
|
276
|
+
if (kwWords.some(kw => words[i].includes(kw))) {
|
|
277
|
+
hasKeyword = true;
|
|
278
|
+
keywordPosition = i;
|
|
279
|
+
break;
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
if (hasKeyword) break;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
return {
|
|
287
|
+
hasKeyword,
|
|
288
|
+
keywordPosition,
|
|
289
|
+
firstParagraphPreview: firstParagraph.substring(0, 200) + (firstParagraph.length > 200 ? '...' : ''),
|
|
290
|
+
isWithinFirst100Words: hasKeyword && keywordPosition !== null && keywordPosition < 100,
|
|
291
|
+
};
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
/**
|
|
295
|
+
* Analyze meta description for keyword
|
|
296
|
+
*/
|
|
297
|
+
export function analyzeMetaDescription(html: string, keywords: string[]): KeywordPlacementData['metaDescriptionAnalysis'] {
|
|
298
|
+
const $ = cheerio.load(html);
|
|
299
|
+
const metaDesc = $('meta[name="description"]').attr('content')?.trim().toLowerCase() || '';
|
|
300
|
+
|
|
301
|
+
if (!metaDesc) {
|
|
302
|
+
return { hasKeyword: false, position: 'none' };
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
let hasKeyword = false;
|
|
306
|
+
let position: 'start' | 'middle' | 'end' | 'none' = 'none';
|
|
307
|
+
|
|
308
|
+
for (const keyword of keywords) {
|
|
309
|
+
const kwLower = keyword.toLowerCase();
|
|
310
|
+
const kwWords = kwLower.split(/\s+/).filter(w => w.length > 2);
|
|
311
|
+
const matchCount = kwWords.filter(w => metaDesc.includes(w)).length;
|
|
312
|
+
|
|
313
|
+
if (matchCount >= Math.ceil(kwWords.length * 0.5)) {
|
|
314
|
+
hasKeyword = true;
|
|
315
|
+
|
|
316
|
+
// Find approximate position
|
|
317
|
+
const firstMatch = kwWords.find(w => metaDesc.includes(w));
|
|
318
|
+
if (firstMatch) {
|
|
319
|
+
const index = metaDesc.indexOf(firstMatch);
|
|
320
|
+
if (index < 30) {
|
|
321
|
+
position = 'start';
|
|
322
|
+
} else if (index > metaDesc.length - 50) {
|
|
323
|
+
position = 'end';
|
|
324
|
+
} else {
|
|
325
|
+
position = 'middle';
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
break;
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
return { hasKeyword, position };
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
/**
|
|
336
|
+
* Main function: Analyze keyword placement
|
|
337
|
+
*/
|
|
338
|
+
export function analyzeKeywordPlacement(
|
|
339
|
+
html: string,
|
|
340
|
+
url: string
|
|
341
|
+
): { issues: AuditIssue[]; data: KeywordPlacementData } {
|
|
342
|
+
const issues: AuditIssue[] = [];
|
|
343
|
+
|
|
344
|
+
// Infer target keywords
|
|
345
|
+
const detectedKeywords = inferTargetKeywords(html, url);
|
|
346
|
+
|
|
347
|
+
// Run all analyses
|
|
348
|
+
const urlAnalysis = analyzeUrlKeyword(url, detectedKeywords);
|
|
349
|
+
const titleAnalysis = analyzeTitleKeyword(html, detectedKeywords);
|
|
350
|
+
const h1Analysis = analyzeH1Keyword(html, detectedKeywords);
|
|
351
|
+
const firstParagraphAnalysis = analyzeFirstParagraph(html, detectedKeywords);
|
|
352
|
+
const metaDescriptionAnalysis = analyzeMetaDescription(html, detectedKeywords);
|
|
353
|
+
|
|
354
|
+
// Calculate overall placement score
|
|
355
|
+
let overallPlacementScore = 0;
|
|
356
|
+
if (urlAnalysis.containsKeyword) overallPlacementScore += 20;
|
|
357
|
+
if (titleAnalysis.hasExactMatch) overallPlacementScore += 25;
|
|
358
|
+
else if (titleAnalysis.hasPartialMatch) overallPlacementScore += 10;
|
|
359
|
+
if (h1Analysis.hasExactMatch) overallPlacementScore += 20;
|
|
360
|
+
else if (h1Analysis.hasPartialMatch) overallPlacementScore += 10;
|
|
361
|
+
if (firstParagraphAnalysis.isWithinFirst100Words) overallPlacementScore += 20;
|
|
362
|
+
else if (firstParagraphAnalysis.hasKeyword) overallPlacementScore += 10;
|
|
363
|
+
if (metaDescriptionAnalysis.hasKeyword) overallPlacementScore += 15;
|
|
364
|
+
|
|
365
|
+
// Generate issues
|
|
366
|
+
|
|
367
|
+
// URL doesn't contain keyword
|
|
368
|
+
if (!urlAnalysis.containsKeyword && urlAnalysis.urlSlug.length > 1) {
|
|
369
|
+
issues.push({
|
|
370
|
+
code: 'URL_MISSING_KEYWORD',
|
|
371
|
+
severity: 'warning',
|
|
372
|
+
category: 'on-page',
|
|
373
|
+
title: 'URL does not contain target keyword',
|
|
374
|
+
description: `The URL slug "${urlAnalysis.urlSlug}" doesn't include the apparent target keyword.`,
|
|
375
|
+
impact: 'URLs with keywords help Google understand page relevance. This is a ranking signal.',
|
|
376
|
+
howToFix: 'Include the primary keyword phrase in the URL (e.g., /keyword-phrase/ instead of /page123/).',
|
|
377
|
+
affectedUrls: [url],
|
|
378
|
+
details: {
|
|
379
|
+
currentSlug: urlAnalysis.urlSlug,
|
|
380
|
+
inferredKeywords: detectedKeywords,
|
|
381
|
+
},
|
|
382
|
+
});
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
// URL has dynamic parameters
|
|
386
|
+
if (urlAnalysis.hasDynamicParams) {
|
|
387
|
+
issues.push({
|
|
388
|
+
code: 'URL_HAS_PARAMS',
|
|
389
|
+
severity: 'notice',
|
|
390
|
+
category: 'on-page',
|
|
391
|
+
title: 'URL contains query parameters',
|
|
392
|
+
description: 'The URL has query parameters which create a less clean URL structure.',
|
|
393
|
+
impact: 'Clean, keyword-rich URLs are preferred by search engines and users.',
|
|
394
|
+
howToFix: 'Use URL rewriting to create clean, parameter-free URLs where possible.',
|
|
395
|
+
affectedUrls: [url],
|
|
396
|
+
});
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
// Title doesn't have exact keyword match
|
|
400
|
+
if (!titleAnalysis.hasExactMatch && detectedKeywords.length > 0) {
|
|
401
|
+
issues.push({
|
|
402
|
+
code: 'TITLE_KEYWORD_MISMATCH',
|
|
403
|
+
severity: 'warning',
|
|
404
|
+
category: 'on-page',
|
|
405
|
+
title: 'Title tag missing exact keyword match',
|
|
406
|
+
description: 'The title tag doesn\'t contain the exact target keyword phrase.',
|
|
407
|
+
impact: 'Google\'s leaked documents reveal a "title match score" that measures keyword alignment.',
|
|
408
|
+
howToFix: 'Include the exact target keyword phrase in the title, preferably near the beginning.',
|
|
409
|
+
affectedUrls: [url],
|
|
410
|
+
details: {
|
|
411
|
+
titleMatchScore: titleAnalysis.titleMatchScore,
|
|
412
|
+
position: titleAnalysis.position,
|
|
413
|
+
},
|
|
414
|
+
});
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
// Keyword not in first paragraph
|
|
418
|
+
if (!firstParagraphAnalysis.hasKeyword) {
|
|
419
|
+
issues.push({
|
|
420
|
+
code: 'FIRST_PARA_NO_KEYWORD',
|
|
421
|
+
severity: 'warning',
|
|
422
|
+
category: 'on-page',
|
|
423
|
+
title: 'Primary keyword not in first paragraph',
|
|
424
|
+
description: 'The target keyword doesn\'t appear in the opening paragraph.',
|
|
425
|
+
impact: 'Including keywords early in content signals relevance to search engines.',
|
|
426
|
+
howToFix: 'Add the primary keyword naturally within the first 100 words of your content.',
|
|
427
|
+
affectedUrls: [url],
|
|
428
|
+
details: {
|
|
429
|
+
preview: firstParagraphAnalysis.firstParagraphPreview,
|
|
430
|
+
},
|
|
431
|
+
});
|
|
432
|
+
} else if (!firstParagraphAnalysis.isWithinFirst100Words) {
|
|
433
|
+
issues.push({
|
|
434
|
+
code: 'KEYWORD_TOO_FAR',
|
|
435
|
+
severity: 'notice',
|
|
436
|
+
category: 'on-page',
|
|
437
|
+
title: 'Keyword appears late in first paragraph',
|
|
438
|
+
description: `Keyword found at word position ${firstParagraphAnalysis.keywordPosition}, ideally should be in first 100 words.`,
|
|
439
|
+
impact: 'Earlier keyword placement may slightly improve relevance signals.',
|
|
440
|
+
howToFix: 'Move the keyword mention closer to the beginning of your content.',
|
|
441
|
+
affectedUrls: [url],
|
|
442
|
+
});
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
// H1 missing keyword
|
|
446
|
+
if (!h1Analysis.hasExactMatch && !h1Analysis.hasPartialMatch && h1Analysis.h1Text) {
|
|
447
|
+
issues.push({
|
|
448
|
+
code: 'H1_MISSING_KEYWORD',
|
|
449
|
+
severity: 'notice',
|
|
450
|
+
category: 'on-page',
|
|
451
|
+
title: 'H1 doesn\'t contain target keyword',
|
|
452
|
+
description: `The H1 "${h1Analysis.h1Text}" doesn't include the target keyword.`,
|
|
453
|
+
impact: 'H1 is a strong on-page signal for topic relevance.',
|
|
454
|
+
howToFix: 'Include the primary keyword in your H1 headline.',
|
|
455
|
+
affectedUrls: [url],
|
|
456
|
+
});
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
return {
|
|
460
|
+
issues,
|
|
461
|
+
data: {
|
|
462
|
+
detectedKeywords,
|
|
463
|
+
urlAnalysis,
|
|
464
|
+
titleAnalysis,
|
|
465
|
+
h1Analysis,
|
|
466
|
+
firstParagraphAnalysis,
|
|
467
|
+
metaDescriptionAnalysis,
|
|
468
|
+
overallPlacementScore,
|
|
469
|
+
},
|
|
470
|
+
};
|
|
471
|
+
}
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
import * as cheerio from 'cheerio';
|
|
2
|
+
import { httpHead } from '../../utils/http.js';
|
|
3
|
+
import type { AuditIssue } from '../types.js';
|
|
4
|
+
import { ISSUE_DEFINITIONS } from '../types.js';
|
|
5
|
+
|
|
6
|
+
export interface LinkData {
|
|
7
|
+
internal: { href: string; text: string; nofollow: boolean }[];
|
|
8
|
+
external: { href: string; text: string; nofollow: boolean }[];
|
|
9
|
+
totalLinks: number;
|
|
10
|
+
brokenInternal: string[];
|
|
11
|
+
brokenExternal: string[];
|
|
12
|
+
ratio: {
|
|
13
|
+
internal: number;
|
|
14
|
+
external: number;
|
|
15
|
+
internalToExternalRatio: number | null; // null if no external links
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export async function analyzeLinks(
|
|
20
|
+
html: string,
|
|
21
|
+
baseUrl: string,
|
|
22
|
+
checkBroken: boolean = false
|
|
23
|
+
): Promise<{ issues: AuditIssue[]; data: LinkData }> {
|
|
24
|
+
const issues: AuditIssue[] = [];
|
|
25
|
+
const $ = cheerio.load(html);
|
|
26
|
+
const baseHostname = new URL(baseUrl).hostname;
|
|
27
|
+
|
|
28
|
+
const internal: LinkData['internal'] = [];
|
|
29
|
+
const external: LinkData['external'] = [];
|
|
30
|
+
const brokenInternal: string[] = [];
|
|
31
|
+
const brokenExternal: string[] = [];
|
|
32
|
+
|
|
33
|
+
// Extract all links
|
|
34
|
+
$('a[href]').each((_, el) => {
|
|
35
|
+
const href = $(el).attr('href') || '';
|
|
36
|
+
const text = $(el).text().trim();
|
|
37
|
+
const rel = $(el).attr('rel') || '';
|
|
38
|
+
const nofollow = rel.includes('nofollow');
|
|
39
|
+
|
|
40
|
+
// Skip javascript:, mailto:, tel:, and anchor links
|
|
41
|
+
if (href.startsWith('javascript:') || href.startsWith('mailto:') ||
|
|
42
|
+
href.startsWith('tel:') || href.startsWith('#')) {
|
|
43
|
+
return;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
try {
|
|
47
|
+
const fullUrl = new URL(href, baseUrl).href;
|
|
48
|
+
const linkHostname = new URL(fullUrl).hostname;
|
|
49
|
+
const isInternal = linkHostname === baseHostname;
|
|
50
|
+
|
|
51
|
+
if (isInternal) {
|
|
52
|
+
internal.push({ href: fullUrl, text, nofollow });
|
|
53
|
+
if (nofollow) {
|
|
54
|
+
issues.push({
|
|
55
|
+
...ISSUE_DEFINITIONS.NOFOLLOW_INTERNAL,
|
|
56
|
+
affectedUrls: [baseUrl],
|
|
57
|
+
details: { link: fullUrl, anchorText: text },
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
} else {
|
|
61
|
+
external.push({ href: fullUrl, text, nofollow });
|
|
62
|
+
}
|
|
63
|
+
} catch {
|
|
64
|
+
// Invalid URL, skip
|
|
65
|
+
}
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
const totalLinks = internal.length + external.length;
|
|
69
|
+
|
|
70
|
+
// Calculate link ratio
|
|
71
|
+
const internalCount = internal.length;
|
|
72
|
+
const externalCount = external.length;
|
|
73
|
+
const internalToExternalRatio = externalCount > 0 ? internalCount / externalCount : null;
|
|
74
|
+
|
|
75
|
+
// Check for too many links
|
|
76
|
+
if (totalLinks > 100) {
|
|
77
|
+
issues.push({
|
|
78
|
+
...ISSUE_DEFINITIONS.TOO_MANY_LINKS,
|
|
79
|
+
affectedUrls: [baseUrl],
|
|
80
|
+
details: { totalLinks, internal: internalCount, external: externalCount },
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Check for no internal links (bad for crawlability and user navigation)
|
|
85
|
+
if (internalCount === 0 && totalLinks > 0) {
|
|
86
|
+
issues.push({
|
|
87
|
+
code: 'LINKS_NO_INTERNAL',
|
|
88
|
+
severity: 'warning',
|
|
89
|
+
category: 'links',
|
|
90
|
+
title: 'No internal links found',
|
|
91
|
+
description: 'This page has no internal links to other pages on your site.',
|
|
92
|
+
impact: 'Internal links are crucial for SEO. They help search engines discover pages and distribute link equity throughout your site.',
|
|
93
|
+
howToFix: 'Add relevant internal links to related content, navigation, or related articles section.',
|
|
94
|
+
affectedUrls: [baseUrl],
|
|
95
|
+
details: { totalLinks, external: externalCount },
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// Check for no external links (pages should reference external resources)
|
|
100
|
+
if (externalCount === 0 && totalLinks > 5) {
|
|
101
|
+
issues.push({
|
|
102
|
+
code: 'LINKS_NO_EXTERNAL',
|
|
103
|
+
severity: 'notice',
|
|
104
|
+
category: 'links',
|
|
105
|
+
title: 'No external links found',
|
|
106
|
+
description: 'This page has no links to external websites.',
|
|
107
|
+
impact: 'Linking to high-quality external sources can improve credibility and help search engines understand your content context.',
|
|
108
|
+
howToFix: 'Add links to authoritative external sources that support your content. This builds trust and provides value to readers.',
|
|
109
|
+
affectedUrls: [baseUrl],
|
|
110
|
+
details: { totalLinks, internal: internalCount },
|
|
111
|
+
});
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Check for poor internal-to-external ratio (too many external vs internal)
|
|
115
|
+
if (externalCount > internalCount && externalCount > 5 && internalCount < 3) {
|
|
116
|
+
issues.push({
|
|
117
|
+
code: 'LINKS_RATIO_POOR',
|
|
118
|
+
severity: 'warning',
|
|
119
|
+
category: 'links',
|
|
120
|
+
title: 'Poor internal-to-external link ratio',
|
|
121
|
+
description: `Page has ${externalCount} external links but only ${internalCount} internal links.`,
|
|
122
|
+
impact: 'A healthy page should have more internal links than external links. Too many external links can dilute page authority and may look spammy.',
|
|
123
|
+
howToFix: 'Add more relevant internal links to balance the ratio. Aim for at least 2-3 internal links for every external link.',
|
|
124
|
+
affectedUrls: [baseUrl],
|
|
125
|
+
details: {
|
|
126
|
+
internal: internalCount,
|
|
127
|
+
external: externalCount,
|
|
128
|
+
ratio: internalToExternalRatio ? `1:${(1/internalToExternalRatio).toFixed(1)}` : 'N/A',
|
|
129
|
+
recommendation: 'Aim for a ratio of at least 2:1 (internal:external)',
|
|
130
|
+
},
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Check for broken links (limited to avoid too many requests)
|
|
135
|
+
if (checkBroken) {
|
|
136
|
+
// Check internal links (up to 10)
|
|
137
|
+
const internalToCheck = internal.slice(0, 10);
|
|
138
|
+
for (const link of internalToCheck) {
|
|
139
|
+
try {
|
|
140
|
+
const response = await httpHead(link.href, {
|
|
141
|
+
timeout: 5000,
|
|
142
|
+
maxRedirects: 5,
|
|
143
|
+
validateStatus: () => true,
|
|
144
|
+
});
|
|
145
|
+
if (response.status >= 400) {
|
|
146
|
+
brokenInternal.push(link.href);
|
|
147
|
+
}
|
|
148
|
+
} catch {
|
|
149
|
+
brokenInternal.push(link.href);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Check external links (up to 5)
|
|
154
|
+
const externalToCheck = external.slice(0, 5);
|
|
155
|
+
for (const link of externalToCheck) {
|
|
156
|
+
try {
|
|
157
|
+
const response = await httpHead(link.href, {
|
|
158
|
+
timeout: 5000,
|
|
159
|
+
maxRedirects: 5,
|
|
160
|
+
validateStatus: () => true,
|
|
161
|
+
});
|
|
162
|
+
if (response.status >= 400) {
|
|
163
|
+
brokenExternal.push(link.href);
|
|
164
|
+
}
|
|
165
|
+
} catch {
|
|
166
|
+
// Don't count timeouts as broken for external links
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// Add issues for broken links
|
|
171
|
+
for (const brokenLink of brokenInternal) {
|
|
172
|
+
issues.push({
|
|
173
|
+
...ISSUE_DEFINITIONS.BROKEN_INTERNAL_LINK,
|
|
174
|
+
affectedUrls: [baseUrl],
|
|
175
|
+
details: { brokenLink },
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
for (const brokenLink of brokenExternal) {
|
|
180
|
+
issues.push({
|
|
181
|
+
...ISSUE_DEFINITIONS.BROKEN_EXTERNAL_LINK,
|
|
182
|
+
affectedUrls: [baseUrl],
|
|
183
|
+
details: { brokenLink },
|
|
184
|
+
});
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
return {
|
|
189
|
+
issues,
|
|
190
|
+
data: {
|
|
191
|
+
internal,
|
|
192
|
+
external,
|
|
193
|
+
totalLinks,
|
|
194
|
+
brokenInternal,
|
|
195
|
+
brokenExternal,
|
|
196
|
+
ratio: {
|
|
197
|
+
internal: internalCount,
|
|
198
|
+
external: externalCount,
|
|
199
|
+
internalToExternalRatio,
|
|
200
|
+
},
|
|
201
|
+
}
|
|
202
|
+
};
|
|
203
|
+
}
|