@rankcli/agent-runtime 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +242 -0
- package/dist/analyzer-2CSWIQGD.mjs +6 -0
- package/dist/chunk-YNZYHEYM.mjs +774 -0
- package/dist/index.d.mts +4012 -0
- package/dist/index.d.ts +4012 -0
- package/dist/index.js +29672 -0
- package/dist/index.mjs +28602 -0
- package/package.json +53 -0
- package/scripts/build-deno.ts +134 -0
- package/src/audit/ai/analyzer.ts +347 -0
- package/src/audit/ai/index.ts +29 -0
- package/src/audit/ai/prompts/content-analysis.ts +271 -0
- package/src/audit/ai/types.ts +179 -0
- package/src/audit/checks/additional-checks.ts +439 -0
- package/src/audit/checks/ai-citation-worthiness.ts +399 -0
- package/src/audit/checks/ai-content-structure.ts +325 -0
- package/src/audit/checks/ai-readiness.ts +339 -0
- package/src/audit/checks/anchor-text.ts +179 -0
- package/src/audit/checks/answer-conciseness.ts +322 -0
- package/src/audit/checks/asset-minification.ts +270 -0
- package/src/audit/checks/bing-optimization.ts +206 -0
- package/src/audit/checks/brand-mention-optimization.ts +349 -0
- package/src/audit/checks/caching-headers.ts +305 -0
- package/src/audit/checks/canonical-advanced.ts +150 -0
- package/src/audit/checks/canonical-domain.ts +196 -0
- package/src/audit/checks/citation-quality.ts +358 -0
- package/src/audit/checks/client-rendering.ts +542 -0
- package/src/audit/checks/color-contrast.ts +342 -0
- package/src/audit/checks/content-freshness.ts +170 -0
- package/src/audit/checks/content-science.ts +589 -0
- package/src/audit/checks/conversion-elements.ts +526 -0
- package/src/audit/checks/crawlability.ts +220 -0
- package/src/audit/checks/directory-listing.ts +172 -0
- package/src/audit/checks/dom-analysis.ts +191 -0
- package/src/audit/checks/dom-size.ts +246 -0
- package/src/audit/checks/duplicate-content.ts +194 -0
- package/src/audit/checks/eeat-signals.ts +990 -0
- package/src/audit/checks/entity-seo.ts +396 -0
- package/src/audit/checks/featured-snippet.ts +473 -0
- package/src/audit/checks/freshness-signals.ts +443 -0
- package/src/audit/checks/funnel-intent.ts +463 -0
- package/src/audit/checks/hreflang.ts +174 -0
- package/src/audit/checks/html-compliance.ts +302 -0
- package/src/audit/checks/image-dimensions.ts +167 -0
- package/src/audit/checks/images.ts +160 -0
- package/src/audit/checks/indexnow.ts +275 -0
- package/src/audit/checks/interactive-tools.ts +475 -0
- package/src/audit/checks/internal-link-graph.ts +436 -0
- package/src/audit/checks/keyword-analysis.ts +239 -0
- package/src/audit/checks/keyword-cannibalization.ts +385 -0
- package/src/audit/checks/keyword-placement.ts +471 -0
- package/src/audit/checks/links.ts +203 -0
- package/src/audit/checks/llms-txt.ts +224 -0
- package/src/audit/checks/local-seo.ts +296 -0
- package/src/audit/checks/mobile.ts +167 -0
- package/src/audit/checks/modern-images.ts +226 -0
- package/src/audit/checks/navboost-signals.ts +395 -0
- package/src/audit/checks/on-page.ts +209 -0
- package/src/audit/checks/page-resources.ts +285 -0
- package/src/audit/checks/pagination.ts +180 -0
- package/src/audit/checks/performance.ts +153 -0
- package/src/audit/checks/platform-presence.ts +580 -0
- package/src/audit/checks/redirect-analysis.ts +153 -0
- package/src/audit/checks/redirect-chain.ts +389 -0
- package/src/audit/checks/resource-hints.ts +420 -0
- package/src/audit/checks/responsive-css.ts +247 -0
- package/src/audit/checks/responsive-images.ts +396 -0
- package/src/audit/checks/review-ecosystem.ts +415 -0
- package/src/audit/checks/robots-validation.ts +373 -0
- package/src/audit/checks/security-headers.ts +172 -0
- package/src/audit/checks/security.ts +144 -0
- package/src/audit/checks/serp-preview.ts +251 -0
- package/src/audit/checks/site-maturity.ts +444 -0
- package/src/audit/checks/social-meta.test.ts +275 -0
- package/src/audit/checks/social-meta.ts +134 -0
- package/src/audit/checks/soft-404.ts +151 -0
- package/src/audit/checks/structured-data.ts +238 -0
- package/src/audit/checks/tech-detection.ts +496 -0
- package/src/audit/checks/topical-clusters.ts +435 -0
- package/src/audit/checks/tracker-bloat.ts +462 -0
- package/src/audit/checks/tracking-verification.test.ts +371 -0
- package/src/audit/checks/tracking-verification.ts +636 -0
- package/src/audit/checks/url-safety.ts +682 -0
- package/src/audit/deno-entry.ts +66 -0
- package/src/audit/discovery/index.ts +15 -0
- package/src/audit/discovery/link-crawler.ts +232 -0
- package/src/audit/discovery/repo-routes.ts +347 -0
- package/src/audit/engine.ts +620 -0
- package/src/audit/fixes/index.ts +209 -0
- package/src/audit/fixes/social-meta-fixes.test.ts +329 -0
- package/src/audit/fixes/social-meta-fixes.ts +463 -0
- package/src/audit/index.ts +74 -0
- package/src/audit/runner.test.ts +299 -0
- package/src/audit/runner.ts +130 -0
- package/src/audit/types.ts +1953 -0
- package/src/content/featured-snippet.ts +367 -0
- package/src/content/generator.test.ts +534 -0
- package/src/content/generator.ts +501 -0
- package/src/content/headline.ts +317 -0
- package/src/content/index.ts +62 -0
- package/src/content/intent.ts +258 -0
- package/src/content/keyword-density.ts +349 -0
- package/src/content/readability.ts +262 -0
- package/src/executor.ts +336 -0
- package/src/fixer.ts +416 -0
- package/src/frameworks/detector.test.ts +248 -0
- package/src/frameworks/detector.ts +371 -0
- package/src/frameworks/index.ts +68 -0
- package/src/frameworks/recipes/angular.yaml +171 -0
- package/src/frameworks/recipes/astro.yaml +206 -0
- package/src/frameworks/recipes/django.yaml +180 -0
- package/src/frameworks/recipes/laravel.yaml +137 -0
- package/src/frameworks/recipes/nextjs.yaml +268 -0
- package/src/frameworks/recipes/nuxt.yaml +175 -0
- package/src/frameworks/recipes/rails.yaml +188 -0
- package/src/frameworks/recipes/react.yaml +202 -0
- package/src/frameworks/recipes/sveltekit.yaml +154 -0
- package/src/frameworks/recipes/vue.yaml +137 -0
- package/src/frameworks/recipes/wordpress.yaml +209 -0
- package/src/frameworks/suggestion-engine.ts +320 -0
- package/src/geo/geo-content.test.ts +305 -0
- package/src/geo/geo-content.ts +266 -0
- package/src/geo/geo-history.test.ts +473 -0
- package/src/geo/geo-history.ts +433 -0
- package/src/geo/geo-tracker.test.ts +359 -0
- package/src/geo/geo-tracker.ts +411 -0
- package/src/geo/index.ts +10 -0
- package/src/git/commit-helper.test.ts +261 -0
- package/src/git/commit-helper.ts +329 -0
- package/src/git/index.ts +12 -0
- package/src/git/pr-helper.test.ts +284 -0
- package/src/git/pr-helper.ts +307 -0
- package/src/index.ts +66 -0
- package/src/keywords/ai-keyword-engine.ts +1062 -0
- package/src/keywords/ai-summarizer.ts +387 -0
- package/src/keywords/ci-mode.ts +555 -0
- package/src/keywords/engine.ts +359 -0
- package/src/keywords/index.ts +151 -0
- package/src/keywords/llm-judge.ts +357 -0
- package/src/keywords/nlp-analysis.ts +706 -0
- package/src/keywords/prioritizer.ts +295 -0
- package/src/keywords/site-crawler.ts +342 -0
- package/src/keywords/sources/autocomplete.ts +139 -0
- package/src/keywords/sources/competitive-search.ts +450 -0
- package/src/keywords/sources/competitor-analysis.ts +374 -0
- package/src/keywords/sources/dataforseo.ts +206 -0
- package/src/keywords/sources/free-sources.ts +294 -0
- package/src/keywords/sources/gsc.ts +123 -0
- package/src/keywords/topic-grouping.ts +327 -0
- package/src/keywords/types.ts +144 -0
- package/src/keywords/wizard.ts +457 -0
- package/src/loader.ts +40 -0
- package/src/reports/index.ts +7 -0
- package/src/reports/report-generator.test.ts +293 -0
- package/src/reports/report-generator.ts +713 -0
- package/src/scheduler/alerts.test.ts +458 -0
- package/src/scheduler/alerts.ts +328 -0
- package/src/scheduler/index.ts +8 -0
- package/src/scheduler/scheduled-audit.test.ts +377 -0
- package/src/scheduler/scheduled-audit.ts +149 -0
- package/src/test/integration-test.ts +325 -0
- package/src/tools/analyzer.ts +373 -0
- package/src/tools/crawl.ts +293 -0
- package/src/tools/files.ts +301 -0
- package/src/tools/h1-fixer.ts +249 -0
- package/src/tools/index.ts +67 -0
- package/src/tracking/github-action.ts +326 -0
- package/src/tracking/google-analytics.ts +265 -0
- package/src/tracking/index.ts +45 -0
- package/src/tracking/report-generator.ts +386 -0
- package/src/tracking/search-console.ts +335 -0
- package/src/types.ts +134 -0
- package/src/utils/http.ts +302 -0
- package/src/wasm-adapter.ts +297 -0
- package/src/wasm-entry.ts +14 -0
- package/tsconfig.json +17 -0
- package/tsup.wasm.config.ts +26 -0
- package/vitest.config.ts +15 -0
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
// Keyword Prioritization Algorithm
|
|
2
|
+
|
|
3
|
+
import type {
|
|
4
|
+
SiteProfile,
|
|
5
|
+
KeywordData,
|
|
6
|
+
KeywordOpportunity,
|
|
7
|
+
KeywordAction,
|
|
8
|
+
KeywordResearchResult,
|
|
9
|
+
} from './types.js';
|
|
10
|
+
import { getMaxKdThreshold, PRIORITY_WEIGHTS } from './types.js';
|
|
11
|
+
|
|
12
|
+
export function prioritizeKeywords(
|
|
13
|
+
keywords: KeywordData[],
|
|
14
|
+
siteProfile: SiteProfile,
|
|
15
|
+
existingMeta?: { title?: string; description?: string; h1?: string }
|
|
16
|
+
): KeywordResearchResult {
|
|
17
|
+
const maxKd = getMaxKdThreshold(siteProfile);
|
|
18
|
+
|
|
19
|
+
// Score and categorize each keyword
|
|
20
|
+
const opportunities: KeywordOpportunity[] = keywords.map((kw) => {
|
|
21
|
+
const priorityScore = calculatePriorityScore(kw, siteProfile, maxKd);
|
|
22
|
+
const category = categorizeKeyword(kw.keywordDifficulty, maxKd);
|
|
23
|
+
const suggestedAction = suggestAction(kw, existingMeta, category);
|
|
24
|
+
|
|
25
|
+
return {
|
|
26
|
+
...kw,
|
|
27
|
+
priorityScore,
|
|
28
|
+
category,
|
|
29
|
+
suggestedAction,
|
|
30
|
+
};
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
// Sort by priority score
|
|
34
|
+
opportunities.sort((a, b) => b.priorityScore - a.priorityScore);
|
|
35
|
+
|
|
36
|
+
// Categorize into buckets
|
|
37
|
+
const quickWins = opportunities.filter((kw) => kw.category === 'quick-win');
|
|
38
|
+
const mediumTerm = opportunities.filter((kw) => kw.category === 'medium-term');
|
|
39
|
+
const longTerm = opportunities.filter((kw) => kw.category === 'long-term');
|
|
40
|
+
|
|
41
|
+
// Generate recommendations
|
|
42
|
+
const recommendations = generateRecommendations(siteProfile, quickWins, mediumTerm);
|
|
43
|
+
|
|
44
|
+
return {
|
|
45
|
+
siteProfile,
|
|
46
|
+
keywords: opportunities,
|
|
47
|
+
quickWins,
|
|
48
|
+
mediumTerm,
|
|
49
|
+
longTerm,
|
|
50
|
+
recommendations,
|
|
51
|
+
maxKdThreshold: maxKd,
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function calculatePriorityScore(
|
|
56
|
+
keyword: KeywordData,
|
|
57
|
+
profile: SiteProfile,
|
|
58
|
+
maxKd: number
|
|
59
|
+
): number {
|
|
60
|
+
// Business value score (0-100)
|
|
61
|
+
const businessScore = calculateBusinessValue(keyword, profile.businessGoal);
|
|
62
|
+
|
|
63
|
+
// Difficulty score (inverse - lower KD = higher score)
|
|
64
|
+
let difficultyScore = 0;
|
|
65
|
+
if (keyword.keywordDifficulty <= maxKd) {
|
|
66
|
+
difficultyScore = 100 - (keyword.keywordDifficulty / maxKd) * 100;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Traffic score (normalized, capped at 100)
|
|
70
|
+
const trafficScore = Math.min((keyword.searchVolume / 1000) * 100, 100);
|
|
71
|
+
|
|
72
|
+
// Weighted sum
|
|
73
|
+
const score =
|
|
74
|
+
businessScore * PRIORITY_WEIGHTS.businessValue +
|
|
75
|
+
difficultyScore * PRIORITY_WEIGHTS.difficulty +
|
|
76
|
+
trafficScore * PRIORITY_WEIGHTS.trafficPotential;
|
|
77
|
+
|
|
78
|
+
return Math.round(score);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function calculateBusinessValue(keyword: KeywordData, goal: SiteProfile['businessGoal']): number {
|
|
82
|
+
const intent = keyword.intent || inferIntent(keyword.keyword);
|
|
83
|
+
|
|
84
|
+
// Intent weights based on business goal
|
|
85
|
+
const intentWeights: Record<SiteProfile['businessGoal'], Record<string, number>> = {
|
|
86
|
+
signups: {
|
|
87
|
+
transactional: 100,
|
|
88
|
+
commercial: 80,
|
|
89
|
+
informational: 40,
|
|
90
|
+
navigational: 20,
|
|
91
|
+
},
|
|
92
|
+
purchases: {
|
|
93
|
+
transactional: 100,
|
|
94
|
+
commercial: 90,
|
|
95
|
+
informational: 30,
|
|
96
|
+
navigational: 20,
|
|
97
|
+
},
|
|
98
|
+
leads: {
|
|
99
|
+
commercial: 100,
|
|
100
|
+
transactional: 80,
|
|
101
|
+
informational: 60,
|
|
102
|
+
navigational: 20,
|
|
103
|
+
},
|
|
104
|
+
awareness: {
|
|
105
|
+
informational: 100,
|
|
106
|
+
commercial: 60,
|
|
107
|
+
navigational: 40,
|
|
108
|
+
transactional: 30,
|
|
109
|
+
},
|
|
110
|
+
};
|
|
111
|
+
|
|
112
|
+
return intentWeights[goal][intent] || 50;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
function inferIntent(keyword: string): 'informational' | 'commercial' | 'transactional' | 'navigational' {
|
|
116
|
+
const kw = keyword.toLowerCase();
|
|
117
|
+
|
|
118
|
+
// Transactional indicators
|
|
119
|
+
if (/\b(buy|purchase|order|subscribe|download|get|try|free trial)\b/.test(kw)) {
|
|
120
|
+
return 'transactional';
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Commercial investigation
|
|
124
|
+
if (/\b(best|top|review|compare|vs|versus|alternative|pricing|cost)\b/.test(kw)) {
|
|
125
|
+
return 'commercial';
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Navigational
|
|
129
|
+
if (/\b(login|sign in|website|official|app)\b/.test(kw)) {
|
|
130
|
+
return 'navigational';
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Default to informational
|
|
134
|
+
return 'informational';
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
function categorizeKeyword(kd: number, maxKd: number): 'quick-win' | 'medium-term' | 'long-term' {
|
|
138
|
+
const quickWinThreshold = Math.min(maxKd * 0.5, 15);
|
|
139
|
+
const mediumThreshold = maxKd;
|
|
140
|
+
|
|
141
|
+
if (kd <= quickWinThreshold) return 'quick-win';
|
|
142
|
+
if (kd <= mediumThreshold) return 'medium-term';
|
|
143
|
+
return 'long-term';
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
function suggestAction(
|
|
147
|
+
keyword: KeywordData,
|
|
148
|
+
existingMeta: { title?: string; description?: string; h1?: string } | undefined,
|
|
149
|
+
category: 'quick-win' | 'medium-term' | 'long-term'
|
|
150
|
+
): KeywordAction {
|
|
151
|
+
const kw = keyword.keyword;
|
|
152
|
+
|
|
153
|
+
// Check if keyword is already in existing meta
|
|
154
|
+
const inTitle = existingMeta?.title?.toLowerCase().includes(kw.toLowerCase());
|
|
155
|
+
const inDescription = existingMeta?.description?.toLowerCase().includes(kw.toLowerCase());
|
|
156
|
+
const inH1 = existingMeta?.h1?.toLowerCase().includes(kw.toLowerCase());
|
|
157
|
+
|
|
158
|
+
// Quick wins: optimize existing elements
|
|
159
|
+
if (category === 'quick-win') {
|
|
160
|
+
if (!inTitle && existingMeta?.title) {
|
|
161
|
+
return {
|
|
162
|
+
type: 'add-to-title',
|
|
163
|
+
description: `Add "${kw}" to your title tag`,
|
|
164
|
+
targetElement: 'title',
|
|
165
|
+
currentValue: existingMeta.title,
|
|
166
|
+
suggestedValue: generateTitleWithKeyword(existingMeta.title, kw),
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
if (!inH1) {
|
|
171
|
+
return {
|
|
172
|
+
type: 'add-to-h1',
|
|
173
|
+
description: `Include "${kw}" in your H1 heading`,
|
|
174
|
+
targetElement: 'h1',
|
|
175
|
+
currentValue: existingMeta?.h1,
|
|
176
|
+
suggestedValue: generateH1WithKeyword(existingMeta?.h1, kw),
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
if (!inDescription && existingMeta?.description) {
|
|
181
|
+
return {
|
|
182
|
+
type: 'add-to-meta',
|
|
183
|
+
description: `Add "${kw}" to your meta description`,
|
|
184
|
+
targetElement: 'meta[name="description"]',
|
|
185
|
+
currentValue: existingMeta.description,
|
|
186
|
+
suggestedValue: generateDescriptionWithKeyword(existingMeta.description, kw),
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
return {
|
|
191
|
+
type: 'optimize-existing',
|
|
192
|
+
description: `Optimize existing content for "${kw}"`,
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// Medium-term: may need content creation
|
|
197
|
+
if (category === 'medium-term') {
|
|
198
|
+
return {
|
|
199
|
+
type: 'create-content',
|
|
200
|
+
description: `Create dedicated content targeting "${kw}"`,
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// Long-term: need authority building first
|
|
205
|
+
return {
|
|
206
|
+
type: 'create-content',
|
|
207
|
+
description: `Build authority first, then target "${kw}" (high competition)`,
|
|
208
|
+
};
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
function generateTitleWithKeyword(currentTitle: string, keyword: string): string {
|
|
212
|
+
// If title is short, prepend keyword
|
|
213
|
+
if (currentTitle.length < 30) {
|
|
214
|
+
return `${capitalizeFirst(keyword)} - ${currentTitle}`;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// Otherwise, try to incorporate naturally
|
|
218
|
+
const parts = currentTitle.split(' - ');
|
|
219
|
+
if (parts.length >= 2) {
|
|
220
|
+
return `${capitalizeFirst(keyword)} | ${parts[parts.length - 1]}`;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
return `${capitalizeFirst(keyword)} | ${currentTitle}`.substring(0, 60);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
function generateH1WithKeyword(currentH1: string | undefined, keyword: string): string {
|
|
227
|
+
if (!currentH1) {
|
|
228
|
+
return capitalizeFirst(keyword);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// Try to incorporate keyword naturally
|
|
232
|
+
return `${capitalizeFirst(keyword)} - ${currentH1}`;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
function generateDescriptionWithKeyword(currentDesc: string, keyword: string): string {
|
|
236
|
+
// If description doesn't start with keyword concept, prepend it
|
|
237
|
+
if (!currentDesc.toLowerCase().includes(keyword.toLowerCase().split(' ')[0])) {
|
|
238
|
+
const newDesc = `${capitalizeFirst(keyword)}: ${currentDesc}`;
|
|
239
|
+
return newDesc.substring(0, 160);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
return currentDesc;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
function capitalizeFirst(str: string): string {
|
|
246
|
+
return str.charAt(0).toUpperCase() + str.slice(1);
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
function generateRecommendations(
|
|
250
|
+
profile: SiteProfile,
|
|
251
|
+
quickWins: KeywordOpportunity[],
|
|
252
|
+
mediumTerm: KeywordOpportunity[]
|
|
253
|
+
): string[] {
|
|
254
|
+
const recommendations: string[] = [];
|
|
255
|
+
|
|
256
|
+
// Domain age specific recommendations
|
|
257
|
+
if (profile.domainAge === 'new') {
|
|
258
|
+
recommendations.push(
|
|
259
|
+
'Focus on quick-win keywords (KD < 15) first to build initial traffic and authority.'
|
|
260
|
+
);
|
|
261
|
+
recommendations.push(
|
|
262
|
+
'Consider creating long-form, comprehensive content to establish topical authority.'
|
|
263
|
+
);
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
// Backlink recommendations
|
|
267
|
+
if (profile.backlinkCount === 'none' || profile.backlinkCount === 'few') {
|
|
268
|
+
recommendations.push(
|
|
269
|
+
'Prioritize building quality backlinks to increase your ranking potential for competitive keywords.'
|
|
270
|
+
);
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
// Content recommendations based on capacity
|
|
274
|
+
if (profile.contentCapacity === 'low' && quickWins.length > 2) {
|
|
275
|
+
recommendations.push(
|
|
276
|
+
`Focus on your top ${Math.min(2, quickWins.length)} quick-win keywords first, then expand.`
|
|
277
|
+
);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
// Quick wins available
|
|
281
|
+
if (quickWins.length > 0) {
|
|
282
|
+
recommendations.push(
|
|
283
|
+
`You have ${quickWins.length} quick-win keyword opportunities that you can rank for relatively quickly.`
|
|
284
|
+
);
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// Medium-term opportunities
|
|
288
|
+
if (mediumTerm.length > 0 && profile.domainAge !== 'new') {
|
|
289
|
+
recommendations.push(
|
|
290
|
+
`${mediumTerm.length} medium-difficulty keywords are within reach with focused content and some link building.`
|
|
291
|
+
);
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
return recommendations;
|
|
295
|
+
}
|
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Site Crawler for Keyword Research
|
|
3
|
+
*
|
|
4
|
+
* Crawls an entire site to collect text content for AI analysis.
|
|
5
|
+
* Used to understand what the site does and generate relevant keywords.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import * as cheerio from 'cheerio';
|
|
9
|
+
import { httpGet } from '../utils/http.js';
|
|
10
|
+
|
|
11
|
+
export interface CrawledPage {
|
|
12
|
+
url: string;
|
|
13
|
+
title: string;
|
|
14
|
+
description: string;
|
|
15
|
+
h1: string;
|
|
16
|
+
h2s: string[];
|
|
17
|
+
mainContent: string;
|
|
18
|
+
wordCount: number;
|
|
19
|
+
internalLinks: string[];
|
|
20
|
+
isProductPage: boolean;
|
|
21
|
+
isPricingPage: boolean;
|
|
22
|
+
isBlogPost: boolean;
|
|
23
|
+
isFeaturePage: boolean;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export interface SiteCrawlResult {
|
|
27
|
+
domain: string;
|
|
28
|
+
pages: CrawledPage[];
|
|
29
|
+
aggregatedContent: string;
|
|
30
|
+
totalWordCount: number;
|
|
31
|
+
uniqueHeadings: string[];
|
|
32
|
+
detectedPageTypes: {
|
|
33
|
+
product: number;
|
|
34
|
+
pricing: number;
|
|
35
|
+
blog: number;
|
|
36
|
+
feature: number;
|
|
37
|
+
other: number;
|
|
38
|
+
};
|
|
39
|
+
crawlStats: {
|
|
40
|
+
attempted: number;
|
|
41
|
+
succeeded: number;
|
|
42
|
+
failed: number;
|
|
43
|
+
duration: number;
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const EXCLUDED_PATHS = [
|
|
48
|
+
'/cdn-cgi/',
|
|
49
|
+
'/wp-admin/',
|
|
50
|
+
'/wp-includes/',
|
|
51
|
+
'/wp-json/',
|
|
52
|
+
'/api/',
|
|
53
|
+
'/admin/',
|
|
54
|
+
'/_next/',
|
|
55
|
+
'/static/',
|
|
56
|
+
'/assets/',
|
|
57
|
+
'.xml',
|
|
58
|
+
'.json',
|
|
59
|
+
'.pdf',
|
|
60
|
+
'.zip',
|
|
61
|
+
'.png',
|
|
62
|
+
'.jpg',
|
|
63
|
+
'.jpeg',
|
|
64
|
+
'.gif',
|
|
65
|
+
'.svg',
|
|
66
|
+
'.ico',
|
|
67
|
+
'.css',
|
|
68
|
+
'.js',
|
|
69
|
+
];
|
|
70
|
+
|
|
71
|
+
const PAGE_TYPE_PATTERNS = {
|
|
72
|
+
product: ['/product', '/features', '/solutions', '/platform', '/tour'],
|
|
73
|
+
pricing: ['/pricing', '/plans', '/subscription'],
|
|
74
|
+
blog: ['/blog', '/news', '/articles', '/posts', '/journal'],
|
|
75
|
+
feature: ['/feature', '/capability', '/integration'],
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Crawl an entire site to collect text content
|
|
80
|
+
*/
|
|
81
|
+
export async function crawlSite(
|
|
82
|
+
startUrl: string,
|
|
83
|
+
options: {
|
|
84
|
+
maxPages?: number;
|
|
85
|
+
maxDepth?: number;
|
|
86
|
+
timeout?: number;
|
|
87
|
+
} = {}
|
|
88
|
+
): Promise<SiteCrawlResult> {
|
|
89
|
+
const { maxPages = 30, maxDepth = 3, timeout = 10000 } = options;
|
|
90
|
+
|
|
91
|
+
const startTime = Date.now();
|
|
92
|
+
const parsedUrl = new URL(startUrl);
|
|
93
|
+
const domain = parsedUrl.hostname;
|
|
94
|
+
const baseUrl = `${parsedUrl.protocol}//${domain}`;
|
|
95
|
+
|
|
96
|
+
const visited = new Set<string>();
|
|
97
|
+
const toVisit: Array<{ url: string; depth: number }> = [{ url: startUrl, depth: 0 }];
|
|
98
|
+
const pages: CrawledPage[] = [];
|
|
99
|
+
let failed = 0;
|
|
100
|
+
|
|
101
|
+
console.log(`🕷️ Crawling ${domain} (max ${maxPages} pages)...`);
|
|
102
|
+
|
|
103
|
+
while (toVisit.length > 0 && pages.length < maxPages) {
|
|
104
|
+
const { url, depth } = toVisit.shift()!;
|
|
105
|
+
|
|
106
|
+
// Normalize URL
|
|
107
|
+
const normalizedUrl = normalizeUrl(url, baseUrl);
|
|
108
|
+
if (!normalizedUrl || visited.has(normalizedUrl)) continue;
|
|
109
|
+
|
|
110
|
+
// Skip excluded paths
|
|
111
|
+
if (EXCLUDED_PATHS.some((p) => normalizedUrl.includes(p))) continue;
|
|
112
|
+
|
|
113
|
+
// Only crawl same domain
|
|
114
|
+
try {
|
|
115
|
+
const urlObj = new URL(normalizedUrl);
|
|
116
|
+
if (urlObj.hostname !== domain) continue;
|
|
117
|
+
} catch {
|
|
118
|
+
continue;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
visited.add(normalizedUrl);
|
|
122
|
+
|
|
123
|
+
try {
|
|
124
|
+
const page = await crawlPage(normalizedUrl, timeout);
|
|
125
|
+
pages.push(page);
|
|
126
|
+
|
|
127
|
+
// Add internal links to queue (if not at max depth)
|
|
128
|
+
if (depth < maxDepth) {
|
|
129
|
+
for (const link of page.internalLinks) {
|
|
130
|
+
const normalizedLink = normalizeUrl(link, baseUrl);
|
|
131
|
+
if (normalizedLink && !visited.has(normalizedLink)) {
|
|
132
|
+
toVisit.push({ url: normalizedLink, depth: depth + 1 });
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Progress indicator
|
|
138
|
+
if (pages.length % 5 === 0) {
|
|
139
|
+
console.log(` Crawled ${pages.length} pages...`);
|
|
140
|
+
}
|
|
141
|
+
} catch (error) {
|
|
142
|
+
failed++;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
const duration = Date.now() - startTime;
|
|
147
|
+
|
|
148
|
+
// Aggregate content
|
|
149
|
+
const aggregatedContent = pages
|
|
150
|
+
.map((p) => `# ${p.title}\n${p.description}\n${p.mainContent}`)
|
|
151
|
+
.join('\n\n---\n\n');
|
|
152
|
+
|
|
153
|
+
const totalWordCount = pages.reduce((sum, p) => sum + p.wordCount, 0);
|
|
154
|
+
|
|
155
|
+
// Collect unique headings
|
|
156
|
+
const allHeadings = new Set<string>();
|
|
157
|
+
pages.forEach((p) => {
|
|
158
|
+
if (p.h1) allHeadings.add(p.h1);
|
|
159
|
+
p.h2s.forEach((h2) => allHeadings.add(h2));
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
// Count page types
|
|
163
|
+
const detectedPageTypes = {
|
|
164
|
+
product: pages.filter((p) => p.isProductPage).length,
|
|
165
|
+
pricing: pages.filter((p) => p.isPricingPage).length,
|
|
166
|
+
blog: pages.filter((p) => p.isBlogPost).length,
|
|
167
|
+
feature: pages.filter((p) => p.isFeaturePage).length,
|
|
168
|
+
other: pages.filter(
|
|
169
|
+
(p) => !p.isProductPage && !p.isPricingPage && !p.isBlogPost && !p.isFeaturePage
|
|
170
|
+
).length,
|
|
171
|
+
};
|
|
172
|
+
|
|
173
|
+
console.log(`✅ Crawled ${pages.length} pages in ${(duration / 1000).toFixed(1)}s`);
|
|
174
|
+
|
|
175
|
+
return {
|
|
176
|
+
domain,
|
|
177
|
+
pages,
|
|
178
|
+
aggregatedContent,
|
|
179
|
+
totalWordCount,
|
|
180
|
+
uniqueHeadings: Array.from(allHeadings),
|
|
181
|
+
detectedPageTypes,
|
|
182
|
+
crawlStats: {
|
|
183
|
+
attempted: visited.size,
|
|
184
|
+
succeeded: pages.length,
|
|
185
|
+
failed,
|
|
186
|
+
duration,
|
|
187
|
+
},
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Crawl a single page
|
|
193
|
+
*/
|
|
194
|
+
async function crawlPage(url: string, timeout: number): Promise<CrawledPage> {
|
|
195
|
+
const response = await httpGet<string>(url, {
|
|
196
|
+
timeout,
|
|
197
|
+
validateStatus: (status) => status === 200,
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
const html = response.data;
|
|
201
|
+
const $ = cheerio.load(html);
|
|
202
|
+
|
|
203
|
+
// Remove non-content elements
|
|
204
|
+
$('script, style, noscript, iframe, nav, footer, header, aside, [role="navigation"]').remove();
|
|
205
|
+
|
|
206
|
+
// Extract content
|
|
207
|
+
const title = $('title').text().trim();
|
|
208
|
+
const description = $('meta[name="description"]').attr('content')?.trim() || '';
|
|
209
|
+
const h1 = $('h1').first().text().trim();
|
|
210
|
+
const h2s = $('h2')
|
|
211
|
+
.map((_, el) => $(el).text().trim())
|
|
212
|
+
.get()
|
|
213
|
+
.filter((h) => h.length > 0);
|
|
214
|
+
|
|
215
|
+
// Get main content
|
|
216
|
+
const mainSelectors = ['main', 'article', '[role="main"]', '.content', '#content', '.post-content'];
|
|
217
|
+
let mainContent = '';
|
|
218
|
+
|
|
219
|
+
for (const selector of mainSelectors) {
|
|
220
|
+
const el = $(selector);
|
|
221
|
+
if (el.length > 0) {
|
|
222
|
+
mainContent = el.text().trim();
|
|
223
|
+
break;
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// Fallback to body
|
|
228
|
+
if (!mainContent) {
|
|
229
|
+
mainContent = $('body').text().trim();
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
// Clean up whitespace
|
|
233
|
+
mainContent = mainContent
|
|
234
|
+
.replace(/\s+/g, ' ')
|
|
235
|
+
.replace(/\n+/g, '\n')
|
|
236
|
+
.trim();
|
|
237
|
+
|
|
238
|
+
// Limit content length for AI processing
|
|
239
|
+
if (mainContent.length > 5000) {
|
|
240
|
+
mainContent = mainContent.substring(0, 5000) + '...';
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
const wordCount = mainContent.split(/\s+/).length;
|
|
244
|
+
|
|
245
|
+
// Extract internal links
|
|
246
|
+
const internalLinks: string[] = [];
|
|
247
|
+
$('a[href]').each((_, el) => {
|
|
248
|
+
const href = $(el).attr('href');
|
|
249
|
+
if (href && !href.startsWith('mailto:') && !href.startsWith('tel:') && !href.startsWith('#')) {
|
|
250
|
+
internalLinks.push(href);
|
|
251
|
+
}
|
|
252
|
+
});
|
|
253
|
+
|
|
254
|
+
// Detect page type
|
|
255
|
+
const urlLower = url.toLowerCase();
|
|
256
|
+
const isProductPage = PAGE_TYPE_PATTERNS.product.some((p) => urlLower.includes(p));
|
|
257
|
+
const isPricingPage = PAGE_TYPE_PATTERNS.pricing.some((p) => urlLower.includes(p));
|
|
258
|
+
const isBlogPost = PAGE_TYPE_PATTERNS.blog.some((p) => urlLower.includes(p));
|
|
259
|
+
const isFeaturePage = PAGE_TYPE_PATTERNS.feature.some((p) => urlLower.includes(p));
|
|
260
|
+
|
|
261
|
+
return {
|
|
262
|
+
url,
|
|
263
|
+
title,
|
|
264
|
+
description,
|
|
265
|
+
h1,
|
|
266
|
+
h2s,
|
|
267
|
+
mainContent,
|
|
268
|
+
wordCount,
|
|
269
|
+
internalLinks,
|
|
270
|
+
isProductPage,
|
|
271
|
+
isPricingPage,
|
|
272
|
+
isBlogPost,
|
|
273
|
+
isFeaturePage,
|
|
274
|
+
};
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* Normalize a URL
|
|
279
|
+
*/
|
|
280
|
+
function normalizeUrl(url: string, baseUrl: string): string | null {
|
|
281
|
+
try {
|
|
282
|
+
// Handle relative URLs
|
|
283
|
+
const absoluteUrl = url.startsWith('http') ? url : new URL(url, baseUrl).href;
|
|
284
|
+
|
|
285
|
+
// Remove hash and trailing slash
|
|
286
|
+
const urlObj = new URL(absoluteUrl);
|
|
287
|
+
urlObj.hash = '';
|
|
288
|
+
let normalized = urlObj.href;
|
|
289
|
+
|
|
290
|
+
// Remove trailing slash (except for root)
|
|
291
|
+
if (normalized.endsWith('/') && normalized !== `${urlObj.origin}/`) {
|
|
292
|
+
normalized = normalized.slice(0, -1);
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
return normalized;
|
|
296
|
+
} catch {
|
|
297
|
+
return null;
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
/**
|
|
302
|
+
* Extract key phrases from crawled content
|
|
303
|
+
*/
|
|
304
|
+
export function extractKeyPhrases(crawlResult: SiteCrawlResult): string[] {
|
|
305
|
+
const phrases = new Set<string>();
|
|
306
|
+
|
|
307
|
+
// From titles
|
|
308
|
+
crawlResult.pages.forEach((page) => {
|
|
309
|
+
if (page.title) {
|
|
310
|
+
// Split on common separators
|
|
311
|
+
const parts = page.title.split(/[|\-–—:]/).map((p) => p.trim());
|
|
312
|
+
parts.forEach((part) => {
|
|
313
|
+
if (part.length > 3 && part.length < 50) {
|
|
314
|
+
phrases.add(part.toLowerCase());
|
|
315
|
+
}
|
|
316
|
+
});
|
|
317
|
+
}
|
|
318
|
+
});
|
|
319
|
+
|
|
320
|
+
// From headings
|
|
321
|
+
crawlResult.uniqueHeadings.forEach((heading) => {
|
|
322
|
+
if (heading.length > 3 && heading.length < 50) {
|
|
323
|
+
phrases.add(heading.toLowerCase());
|
|
324
|
+
}
|
|
325
|
+
});
|
|
326
|
+
|
|
327
|
+
// From descriptions
|
|
328
|
+
crawlResult.pages.forEach((page) => {
|
|
329
|
+
if (page.description) {
|
|
330
|
+
// Extract noun phrases (simplified - just multi-word sequences)
|
|
331
|
+
const words = page.description.toLowerCase().split(/\s+/);
|
|
332
|
+
for (let i = 0; i < words.length - 1; i++) {
|
|
333
|
+
const bigram = `${words[i]} ${words[i + 1]}`;
|
|
334
|
+
if (bigram.length > 5 && bigram.length < 40) {
|
|
335
|
+
phrases.add(bigram);
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
});
|
|
340
|
+
|
|
341
|
+
return Array.from(phrases).slice(0, 100);
|
|
342
|
+
}
|