@rankcli/agent-runtime 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +242 -0
- package/dist/analyzer-2CSWIQGD.mjs +6 -0
- package/dist/chunk-YNZYHEYM.mjs +774 -0
- package/dist/index.d.mts +4012 -0
- package/dist/index.d.ts +4012 -0
- package/dist/index.js +29672 -0
- package/dist/index.mjs +28602 -0
- package/package.json +53 -0
- package/scripts/build-deno.ts +134 -0
- package/src/audit/ai/analyzer.ts +347 -0
- package/src/audit/ai/index.ts +29 -0
- package/src/audit/ai/prompts/content-analysis.ts +271 -0
- package/src/audit/ai/types.ts +179 -0
- package/src/audit/checks/additional-checks.ts +439 -0
- package/src/audit/checks/ai-citation-worthiness.ts +399 -0
- package/src/audit/checks/ai-content-structure.ts +325 -0
- package/src/audit/checks/ai-readiness.ts +339 -0
- package/src/audit/checks/anchor-text.ts +179 -0
- package/src/audit/checks/answer-conciseness.ts +322 -0
- package/src/audit/checks/asset-minification.ts +270 -0
- package/src/audit/checks/bing-optimization.ts +206 -0
- package/src/audit/checks/brand-mention-optimization.ts +349 -0
- package/src/audit/checks/caching-headers.ts +305 -0
- package/src/audit/checks/canonical-advanced.ts +150 -0
- package/src/audit/checks/canonical-domain.ts +196 -0
- package/src/audit/checks/citation-quality.ts +358 -0
- package/src/audit/checks/client-rendering.ts +542 -0
- package/src/audit/checks/color-contrast.ts +342 -0
- package/src/audit/checks/content-freshness.ts +170 -0
- package/src/audit/checks/content-science.ts +589 -0
- package/src/audit/checks/conversion-elements.ts +526 -0
- package/src/audit/checks/crawlability.ts +220 -0
- package/src/audit/checks/directory-listing.ts +172 -0
- package/src/audit/checks/dom-analysis.ts +191 -0
- package/src/audit/checks/dom-size.ts +246 -0
- package/src/audit/checks/duplicate-content.ts +194 -0
- package/src/audit/checks/eeat-signals.ts +990 -0
- package/src/audit/checks/entity-seo.ts +396 -0
- package/src/audit/checks/featured-snippet.ts +473 -0
- package/src/audit/checks/freshness-signals.ts +443 -0
- package/src/audit/checks/funnel-intent.ts +463 -0
- package/src/audit/checks/hreflang.ts +174 -0
- package/src/audit/checks/html-compliance.ts +302 -0
- package/src/audit/checks/image-dimensions.ts +167 -0
- package/src/audit/checks/images.ts +160 -0
- package/src/audit/checks/indexnow.ts +275 -0
- package/src/audit/checks/interactive-tools.ts +475 -0
- package/src/audit/checks/internal-link-graph.ts +436 -0
- package/src/audit/checks/keyword-analysis.ts +239 -0
- package/src/audit/checks/keyword-cannibalization.ts +385 -0
- package/src/audit/checks/keyword-placement.ts +471 -0
- package/src/audit/checks/links.ts +203 -0
- package/src/audit/checks/llms-txt.ts +224 -0
- package/src/audit/checks/local-seo.ts +296 -0
- package/src/audit/checks/mobile.ts +167 -0
- package/src/audit/checks/modern-images.ts +226 -0
- package/src/audit/checks/navboost-signals.ts +395 -0
- package/src/audit/checks/on-page.ts +209 -0
- package/src/audit/checks/page-resources.ts +285 -0
- package/src/audit/checks/pagination.ts +180 -0
- package/src/audit/checks/performance.ts +153 -0
- package/src/audit/checks/platform-presence.ts +580 -0
- package/src/audit/checks/redirect-analysis.ts +153 -0
- package/src/audit/checks/redirect-chain.ts +389 -0
- package/src/audit/checks/resource-hints.ts +420 -0
- package/src/audit/checks/responsive-css.ts +247 -0
- package/src/audit/checks/responsive-images.ts +396 -0
- package/src/audit/checks/review-ecosystem.ts +415 -0
- package/src/audit/checks/robots-validation.ts +373 -0
- package/src/audit/checks/security-headers.ts +172 -0
- package/src/audit/checks/security.ts +144 -0
- package/src/audit/checks/serp-preview.ts +251 -0
- package/src/audit/checks/site-maturity.ts +444 -0
- package/src/audit/checks/social-meta.test.ts +275 -0
- package/src/audit/checks/social-meta.ts +134 -0
- package/src/audit/checks/soft-404.ts +151 -0
- package/src/audit/checks/structured-data.ts +238 -0
- package/src/audit/checks/tech-detection.ts +496 -0
- package/src/audit/checks/topical-clusters.ts +435 -0
- package/src/audit/checks/tracker-bloat.ts +462 -0
- package/src/audit/checks/tracking-verification.test.ts +371 -0
- package/src/audit/checks/tracking-verification.ts +636 -0
- package/src/audit/checks/url-safety.ts +682 -0
- package/src/audit/deno-entry.ts +66 -0
- package/src/audit/discovery/index.ts +15 -0
- package/src/audit/discovery/link-crawler.ts +232 -0
- package/src/audit/discovery/repo-routes.ts +347 -0
- package/src/audit/engine.ts +620 -0
- package/src/audit/fixes/index.ts +209 -0
- package/src/audit/fixes/social-meta-fixes.test.ts +329 -0
- package/src/audit/fixes/social-meta-fixes.ts +463 -0
- package/src/audit/index.ts +74 -0
- package/src/audit/runner.test.ts +299 -0
- package/src/audit/runner.ts +130 -0
- package/src/audit/types.ts +1953 -0
- package/src/content/featured-snippet.ts +367 -0
- package/src/content/generator.test.ts +534 -0
- package/src/content/generator.ts +501 -0
- package/src/content/headline.ts +317 -0
- package/src/content/index.ts +62 -0
- package/src/content/intent.ts +258 -0
- package/src/content/keyword-density.ts +349 -0
- package/src/content/readability.ts +262 -0
- package/src/executor.ts +336 -0
- package/src/fixer.ts +416 -0
- package/src/frameworks/detector.test.ts +248 -0
- package/src/frameworks/detector.ts +371 -0
- package/src/frameworks/index.ts +68 -0
- package/src/frameworks/recipes/angular.yaml +171 -0
- package/src/frameworks/recipes/astro.yaml +206 -0
- package/src/frameworks/recipes/django.yaml +180 -0
- package/src/frameworks/recipes/laravel.yaml +137 -0
- package/src/frameworks/recipes/nextjs.yaml +268 -0
- package/src/frameworks/recipes/nuxt.yaml +175 -0
- package/src/frameworks/recipes/rails.yaml +188 -0
- package/src/frameworks/recipes/react.yaml +202 -0
- package/src/frameworks/recipes/sveltekit.yaml +154 -0
- package/src/frameworks/recipes/vue.yaml +137 -0
- package/src/frameworks/recipes/wordpress.yaml +209 -0
- package/src/frameworks/suggestion-engine.ts +320 -0
- package/src/geo/geo-content.test.ts +305 -0
- package/src/geo/geo-content.ts +266 -0
- package/src/geo/geo-history.test.ts +473 -0
- package/src/geo/geo-history.ts +433 -0
- package/src/geo/geo-tracker.test.ts +359 -0
- package/src/geo/geo-tracker.ts +411 -0
- package/src/geo/index.ts +10 -0
- package/src/git/commit-helper.test.ts +261 -0
- package/src/git/commit-helper.ts +329 -0
- package/src/git/index.ts +12 -0
- package/src/git/pr-helper.test.ts +284 -0
- package/src/git/pr-helper.ts +307 -0
- package/src/index.ts +66 -0
- package/src/keywords/ai-keyword-engine.ts +1062 -0
- package/src/keywords/ai-summarizer.ts +387 -0
- package/src/keywords/ci-mode.ts +555 -0
- package/src/keywords/engine.ts +359 -0
- package/src/keywords/index.ts +151 -0
- package/src/keywords/llm-judge.ts +357 -0
- package/src/keywords/nlp-analysis.ts +706 -0
- package/src/keywords/prioritizer.ts +295 -0
- package/src/keywords/site-crawler.ts +342 -0
- package/src/keywords/sources/autocomplete.ts +139 -0
- package/src/keywords/sources/competitive-search.ts +450 -0
- package/src/keywords/sources/competitor-analysis.ts +374 -0
- package/src/keywords/sources/dataforseo.ts +206 -0
- package/src/keywords/sources/free-sources.ts +294 -0
- package/src/keywords/sources/gsc.ts +123 -0
- package/src/keywords/topic-grouping.ts +327 -0
- package/src/keywords/types.ts +144 -0
- package/src/keywords/wizard.ts +457 -0
- package/src/loader.ts +40 -0
- package/src/reports/index.ts +7 -0
- package/src/reports/report-generator.test.ts +293 -0
- package/src/reports/report-generator.ts +713 -0
- package/src/scheduler/alerts.test.ts +458 -0
- package/src/scheduler/alerts.ts +328 -0
- package/src/scheduler/index.ts +8 -0
- package/src/scheduler/scheduled-audit.test.ts +377 -0
- package/src/scheduler/scheduled-audit.ts +149 -0
- package/src/test/integration-test.ts +325 -0
- package/src/tools/analyzer.ts +373 -0
- package/src/tools/crawl.ts +293 -0
- package/src/tools/files.ts +301 -0
- package/src/tools/h1-fixer.ts +249 -0
- package/src/tools/index.ts +67 -0
- package/src/tracking/github-action.ts +326 -0
- package/src/tracking/google-analytics.ts +265 -0
- package/src/tracking/index.ts +45 -0
- package/src/tracking/report-generator.ts +386 -0
- package/src/tracking/search-console.ts +335 -0
- package/src/types.ts +134 -0
- package/src/utils/http.ts +302 -0
- package/src/wasm-adapter.ts +297 -0
- package/src/wasm-entry.ts +14 -0
- package/tsconfig.json +17 -0
- package/tsup.wasm.config.ts +26 -0
- package/vitest.config.ts +15 -0
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deno-compatible entry point for the audit module.
|
|
3
|
+
* This excludes Node-specific code and dependencies.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
// Types
|
|
7
|
+
export type { AuditIssue, AuditReport, HealthScore, PageAudit } from './types.js';
|
|
8
|
+
export { ISSUE_DEFINITIONS } from './types.js';
|
|
9
|
+
|
|
10
|
+
// Main engine
|
|
11
|
+
export { runFullAudit, formatReport } from './engine.js';
|
|
12
|
+
export type { AuditOptions } from './engine.js';
|
|
13
|
+
|
|
14
|
+
// Individual check modules (all use isomorphic fetch)
|
|
15
|
+
export { runCrawlabilityChecks, checkRobotsTxt, checkSitemap, checkRedirects } from './checks/crawlability.js';
|
|
16
|
+
export { analyzeOnPage } from './checks/on-page.js';
|
|
17
|
+
export { analyzeLinks } from './checks/links.js';
|
|
18
|
+
export { analyzeImages } from './checks/images.js';
|
|
19
|
+
export { analyzePerformance } from './checks/performance.js';
|
|
20
|
+
export { analyzeSecurity, checkCertificate } from './checks/security.js';
|
|
21
|
+
export { analyzeStructuredData, suggestSchemaTypes } from './checks/structured-data.js';
|
|
22
|
+
export { analyzeMobile, checkMobileResources } from './checks/mobile.js';
|
|
23
|
+
|
|
24
|
+
// Advanced checks
|
|
25
|
+
export { analyzeHreflang } from './checks/hreflang.js';
|
|
26
|
+
export { analyzeSocialMeta } from './checks/social-meta.js';
|
|
27
|
+
export { detectSoft404 } from './checks/soft-404.js';
|
|
28
|
+
export { analyzeAnchorText } from './checks/anchor-text.js';
|
|
29
|
+
export { analyzeCanonicalAdvanced } from './checks/canonical-advanced.js';
|
|
30
|
+
export { analyzePagination } from './checks/pagination.js';
|
|
31
|
+
export { analyzeRedirects } from './checks/redirect-analysis.js';
|
|
32
|
+
|
|
33
|
+
// New competitor-parity checks
|
|
34
|
+
export { runAIReadinessChecks, checkLlmsTxt, checkAIBotBlocking } from './checks/ai-readiness.js';
|
|
35
|
+
export { analyzeSERPPreview } from './checks/serp-preview.js';
|
|
36
|
+
export { analyzeLocalSEO } from './checks/local-seo.js';
|
|
37
|
+
export { analyzeSecurityHeaders } from './checks/security-headers.js';
|
|
38
|
+
export { analyzeContentFreshness } from './checks/content-freshness.js';
|
|
39
|
+
export { analyzeDOMStructure } from './checks/dom-analysis.js';
|
|
40
|
+
export { analyzeModernImages } from './checks/modern-images.js';
|
|
41
|
+
export { detectTechnologies } from './checks/tech-detection.js';
|
|
42
|
+
export { analyzeKeywords } from './checks/keyword-analysis.js';
|
|
43
|
+
|
|
44
|
+
// URL Safety (Google Safe Browsing-style local hash database)
|
|
45
|
+
export { urlSafetyDatabase, analyzeUrlSafety } from './checks/url-safety.js';
|
|
46
|
+
|
|
47
|
+
// Advanced SEO checks
|
|
48
|
+
export { analyzeTrackerBloat } from './checks/tracker-bloat.js';
|
|
49
|
+
export { analyzeClientRendering } from './checks/client-rendering.js';
|
|
50
|
+
export { analyzeRedirectChain } from './checks/redirect-chain.js';
|
|
51
|
+
export { analyzeResponsiveImages } from './checks/responsive-images.js';
|
|
52
|
+
export { analyzeConversionElements } from './checks/conversion-elements.js';
|
|
53
|
+
export { analyzeKeywordPlacement } from './checks/keyword-placement.js';
|
|
54
|
+
export { analyzeTopicalClusters } from './checks/topical-clusters.js';
|
|
55
|
+
export { analyzePlatformPresence } from './checks/platform-presence.js';
|
|
56
|
+
export { analyzeInteractiveTools } from './checks/interactive-tools.js';
|
|
57
|
+
export { analyzeFunnelIntent } from './checks/funnel-intent.js';
|
|
58
|
+
export { analyzeNavBoostSignals } from './checks/navboost-signals.js';
|
|
59
|
+
export { analyzeEntitySEO } from './checks/entity-seo.js';
|
|
60
|
+
export { analyzeFreshnessSignals } from './checks/freshness-signals.js';
|
|
61
|
+
|
|
62
|
+
// Page discovery (multi-page crawling)
|
|
63
|
+
export { discoverPagesFromLinks, parseSitemap, mergePages } from './discovery/link-crawler.js';
|
|
64
|
+
export { discoverRoutesFromRepo, detectFramework, routesToUrls } from './discovery/repo-routes.js';
|
|
65
|
+
export type { DiscoveredPage } from './discovery/link-crawler.js';
|
|
66
|
+
export type { RouteInfo } from './discovery/repo-routes.js';
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Discovery Module
|
|
3
|
+
*
|
|
4
|
+
* Discovers pages to audit from various sources:
|
|
5
|
+
* - Internal links from homepage
|
|
6
|
+
* - Sitemap
|
|
7
|
+
* - Route files (with repo access)
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
export { discoverPagesFromLinks, parseSitemap, mergePages } from './link-crawler.js';
|
|
11
|
+
export type { DiscoveredPage } from './link-crawler.js';
|
|
12
|
+
|
|
13
|
+
// Route discovery is exported separately
|
|
14
|
+
export { discoverRoutesFromRepo } from './repo-routes.js';
|
|
15
|
+
export type { RouteInfo } from './repo-routes.js';
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Link Crawler Module
|
|
3
|
+
*
|
|
4
|
+
* Discovers pages from a website by following internal links from the homepage.
|
|
5
|
+
* Prioritizes important pages like /pricing, /about, /features, etc.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
export interface DiscoveredPage {
|
|
9
|
+
url: string;
|
|
10
|
+
source: 'homepage' | 'sitemap' | 'internal-link';
|
|
11
|
+
priority: number; // 1-10, higher = more important
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
// Priority patterns - pages we want to audit first
|
|
15
|
+
const HIGH_PRIORITY_PATTERNS = [
|
|
16
|
+
/^\/(pricing|about|features|contact|demo|signup|login|blog)\/?$/i,
|
|
17
|
+
/^\/(products?|services?|solutions?|plans?)\/?$/i,
|
|
18
|
+
/^\/(docs?|documentation|help|support|faq)\/?$/i,
|
|
19
|
+
/^\/(careers?|jobs?|team|company)\/?$/i,
|
|
20
|
+
];
|
|
21
|
+
|
|
22
|
+
const MEDIUM_PRIORITY_PATTERNS = [
|
|
23
|
+
/^\/(privacy|terms|legal|security)\/?$/i,
|
|
24
|
+
/^\/[a-z0-9-]+\/?$/i, // Top-level pages
|
|
25
|
+
];
|
|
26
|
+
|
|
27
|
+
// Patterns to exclude
|
|
28
|
+
const EXCLUDE_PATTERNS = [
|
|
29
|
+
/\.(png|jpg|jpeg|gif|svg|webp|ico|pdf|zip|tar|gz)$/i, // Files
|
|
30
|
+
/^(mailto:|tel:|javascript:|#)/i, // Non-HTTP
|
|
31
|
+
/\/(api|_next|static|assets|cdn)\//i, // Technical paths
|
|
32
|
+
/[?&](utm_|ref=|source=)/i, // Tracking params
|
|
33
|
+
];
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Extract all internal links from HTML content
|
|
37
|
+
*/
|
|
38
|
+
function extractLinks(html: string, baseUrl: string): string[] {
|
|
39
|
+
const base = new URL(baseUrl);
|
|
40
|
+
const links: Set<string> = new Set();
|
|
41
|
+
|
|
42
|
+
// Match all href attributes
|
|
43
|
+
const hrefRegex = /href=["']([^"']+)["']/gi;
|
|
44
|
+
let match;
|
|
45
|
+
|
|
46
|
+
while ((match = hrefRegex.exec(html)) !== null) {
|
|
47
|
+
try {
|
|
48
|
+
const href = match[1];
|
|
49
|
+
|
|
50
|
+
// Skip excluded patterns
|
|
51
|
+
if (EXCLUDE_PATTERNS.some(pattern => pattern.test(href))) {
|
|
52
|
+
continue;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Parse the URL
|
|
56
|
+
let url: URL;
|
|
57
|
+
if (href.startsWith('http://') || href.startsWith('https://')) {
|
|
58
|
+
url = new URL(href);
|
|
59
|
+
} else if (href.startsWith('/')) {
|
|
60
|
+
url = new URL(href, base);
|
|
61
|
+
} else {
|
|
62
|
+
continue; // Skip relative paths that don't start with /
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Only include same-domain links
|
|
66
|
+
if (url.hostname !== base.hostname) {
|
|
67
|
+
continue;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Normalize the URL
|
|
71
|
+
const normalized = normalizeUrl(url);
|
|
72
|
+
if (normalized) {
|
|
73
|
+
links.add(normalized);
|
|
74
|
+
}
|
|
75
|
+
} catch {
|
|
76
|
+
// Invalid URL, skip
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return Array.from(links);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Normalize a URL for deduplication
|
|
85
|
+
*/
|
|
86
|
+
function normalizeUrl(url: URL): string | null {
|
|
87
|
+
// Remove hash
|
|
88
|
+
url.hash = '';
|
|
89
|
+
|
|
90
|
+
// Remove common tracking parameters
|
|
91
|
+
const paramsToRemove = ['utm_source', 'utm_medium', 'utm_campaign', 'utm_content', 'utm_term', 'ref', 'source'];
|
|
92
|
+
for (const param of paramsToRemove) {
|
|
93
|
+
url.searchParams.delete(param);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Remove trailing slash for consistency
|
|
97
|
+
let pathname = url.pathname;
|
|
98
|
+
if (pathname.length > 1 && pathname.endsWith('/')) {
|
|
99
|
+
pathname = pathname.slice(0, -1);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Skip root (we already have it)
|
|
103
|
+
if (pathname === '' || pathname === '/') {
|
|
104
|
+
return null;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
return `${url.protocol}//${url.hostname}${pathname}${url.search}`;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Calculate priority score for a URL
|
|
112
|
+
*/
|
|
113
|
+
function calculatePriority(url: string): number {
|
|
114
|
+
try {
|
|
115
|
+
const pathname = new URL(url).pathname;
|
|
116
|
+
|
|
117
|
+
// High priority pages (8-10)
|
|
118
|
+
for (const pattern of HIGH_PRIORITY_PATTERNS) {
|
|
119
|
+
if (pattern.test(pathname)) {
|
|
120
|
+
return 9;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Medium priority pages (5-7)
|
|
125
|
+
for (const pattern of MEDIUM_PRIORITY_PATTERNS) {
|
|
126
|
+
if (pattern.test(pathname)) {
|
|
127
|
+
return 6;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Lower priority for deep paths (2-4)
|
|
132
|
+
const depth = (pathname.match(/\//g) || []).length;
|
|
133
|
+
if (depth >= 3) {
|
|
134
|
+
return 2;
|
|
135
|
+
}
|
|
136
|
+
if (depth === 2) {
|
|
137
|
+
return 4;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Default priority for other pages
|
|
141
|
+
return 5;
|
|
142
|
+
} catch {
|
|
143
|
+
return 1;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Discover pages from HTML content
|
|
149
|
+
*
|
|
150
|
+
* @param baseUrl - The base URL of the site
|
|
151
|
+
* @param html - The HTML content of the homepage
|
|
152
|
+
* @param maxPages - Maximum number of pages to return
|
|
153
|
+
* @returns Array of discovered pages sorted by priority
|
|
154
|
+
*/
|
|
155
|
+
export function discoverPagesFromLinks(
|
|
156
|
+
baseUrl: string,
|
|
157
|
+
html: string,
|
|
158
|
+
maxPages: number = 5
|
|
159
|
+
): DiscoveredPage[] {
|
|
160
|
+
const links = extractLinks(html, baseUrl);
|
|
161
|
+
|
|
162
|
+
// Score and sort links
|
|
163
|
+
const pages: DiscoveredPage[] = links.map(url => ({
|
|
164
|
+
url,
|
|
165
|
+
source: 'internal-link' as const,
|
|
166
|
+
priority: calculatePriority(url),
|
|
167
|
+
}));
|
|
168
|
+
|
|
169
|
+
// Sort by priority (descending) and take top N
|
|
170
|
+
pages.sort((a, b) => b.priority - a.priority);
|
|
171
|
+
|
|
172
|
+
return pages.slice(0, maxPages);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* Parse sitemap XML to extract URLs
|
|
177
|
+
*/
|
|
178
|
+
export function parseSitemap(xml: string, baseUrl: string): DiscoveredPage[] {
|
|
179
|
+
const pages: DiscoveredPage[] = [];
|
|
180
|
+
const base = new URL(baseUrl);
|
|
181
|
+
|
|
182
|
+
// Simple regex-based XML parsing for <loc> tags
|
|
183
|
+
const locRegex = /<loc>([^<]+)<\/loc>/gi;
|
|
184
|
+
let match;
|
|
185
|
+
|
|
186
|
+
while ((match = locRegex.exec(xml)) !== null) {
|
|
187
|
+
try {
|
|
188
|
+
const url = new URL(match[1]);
|
|
189
|
+
|
|
190
|
+
// Only include same-domain URLs
|
|
191
|
+
if (url.hostname !== base.hostname) {
|
|
192
|
+
continue;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Skip excluded patterns
|
|
196
|
+
if (EXCLUDE_PATTERNS.some(pattern => pattern.test(url.pathname))) {
|
|
197
|
+
continue;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
pages.push({
|
|
201
|
+
url: url.href,
|
|
202
|
+
source: 'sitemap',
|
|
203
|
+
priority: calculatePriority(url.href),
|
|
204
|
+
});
|
|
205
|
+
} catch {
|
|
206
|
+
// Invalid URL, skip
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
return pages;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
* Merge pages from multiple sources, deduplicating by URL
|
|
215
|
+
*/
|
|
216
|
+
export function mergePages(
|
|
217
|
+
...pageSets: DiscoveredPage[][]
|
|
218
|
+
): DiscoveredPage[] {
|
|
219
|
+
const seen = new Map<string, DiscoveredPage>();
|
|
220
|
+
|
|
221
|
+
for (const pages of pageSets) {
|
|
222
|
+
for (const page of pages) {
|
|
223
|
+
const existing = seen.get(page.url);
|
|
224
|
+
if (!existing || page.priority > existing.priority) {
|
|
225
|
+
seen.set(page.url, page);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
return Array.from(seen.values())
|
|
231
|
+
.sort((a, b) => b.priority - a.priority);
|
|
232
|
+
}
|
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Route Discovery Module
|
|
3
|
+
*
|
|
4
|
+
* Discovers routes from repository files based on framework-specific patterns.
|
|
5
|
+
* Supports: Next.js (App/Pages), Astro, React Router, Vue Router, SvelteKit
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
export interface RouteInfo {
|
|
9
|
+
path: string;
|
|
10
|
+
type: 'static' | 'dynamic';
|
|
11
|
+
sourceFile: string;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
type Framework = 'nextjs' | 'astro' | 'react' | 'react-vite' | 'vue' | 'sveltekit' | 'unknown';
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Detect framework from package.json and file structure
|
|
18
|
+
*/
|
|
19
|
+
export function detectFramework(
|
|
20
|
+
packageJson: { dependencies?: Record<string, string>; devDependencies?: Record<string, string> } | null,
|
|
21
|
+
files: string[]
|
|
22
|
+
): Framework {
|
|
23
|
+
if (!packageJson) return 'unknown';
|
|
24
|
+
|
|
25
|
+
const deps = { ...packageJson.dependencies, ...packageJson.devDependencies };
|
|
26
|
+
|
|
27
|
+
// Check for Next.js
|
|
28
|
+
if (deps['next']) {
|
|
29
|
+
return 'nextjs';
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Check for Astro
|
|
33
|
+
if (deps['astro']) {
|
|
34
|
+
return 'astro';
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Check for SvelteKit
|
|
38
|
+
if (deps['@sveltejs/kit']) {
|
|
39
|
+
return 'sveltekit';
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Check for Vue with router
|
|
43
|
+
if (deps['vue-router'] || deps['@vue/router']) {
|
|
44
|
+
return 'vue';
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Check for React with Vite (our pattern)
|
|
48
|
+
if (deps['react'] && deps['vite']) {
|
|
49
|
+
return 'react-vite';
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Check for React
|
|
53
|
+
if (deps['react'] || deps['react-dom']) {
|
|
54
|
+
// Check if it has router files
|
|
55
|
+
const hasRouterFile = files.some(f =>
|
|
56
|
+
f.match(/routes?\.(tsx?|jsx?)$/) ||
|
|
57
|
+
f.includes('router') ||
|
|
58
|
+
f.endsWith('App.tsx') ||
|
|
59
|
+
f.endsWith('App.jsx')
|
|
60
|
+
);
|
|
61
|
+
if (hasRouterFile) {
|
|
62
|
+
return 'react';
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return 'unknown';
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Discover routes based on framework
|
|
71
|
+
*/
|
|
72
|
+
export async function discoverRoutesFromRepo(
|
|
73
|
+
files: string[],
|
|
74
|
+
framework: Framework,
|
|
75
|
+
fetchFile: (path: string) => Promise<string | null>
|
|
76
|
+
): Promise<RouteInfo[]> {
|
|
77
|
+
switch (framework) {
|
|
78
|
+
case 'nextjs':
|
|
79
|
+
return discoverNextRoutes(files);
|
|
80
|
+
case 'astro':
|
|
81
|
+
return discoverAstroRoutes(files);
|
|
82
|
+
case 'react':
|
|
83
|
+
case 'react-vite':
|
|
84
|
+
return discoverReactRouterRoutes(files, fetchFile);
|
|
85
|
+
case 'vue':
|
|
86
|
+
return discoverVueRouterRoutes(files, fetchFile);
|
|
87
|
+
case 'sveltekit':
|
|
88
|
+
return discoverSvelteRoutes(files);
|
|
89
|
+
default:
|
|
90
|
+
return []; // Fall back to link crawling
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Next.js: file-based routing (App Router and Pages Router)
|
|
96
|
+
*/
|
|
97
|
+
function discoverNextRoutes(files: string[]): RouteInfo[] {
|
|
98
|
+
const routes: RouteInfo[] = [];
|
|
99
|
+
|
|
100
|
+
for (const file of files) {
|
|
101
|
+
// App Router: app/**/page.tsx
|
|
102
|
+
const appMatch = file.match(/^app\/(.*)\/page\.(tsx?|jsx?)$/);
|
|
103
|
+
if (appMatch) {
|
|
104
|
+
let path = '/' + appMatch[1];
|
|
105
|
+
// Handle route groups (parentheses)
|
|
106
|
+
path = path.replace(/\/\([^)]+\)/g, '');
|
|
107
|
+
// Handle root page
|
|
108
|
+
if (path === '/') path = '/';
|
|
109
|
+
|
|
110
|
+
const isDynamic = path.includes('[');
|
|
111
|
+
routes.push({
|
|
112
|
+
path,
|
|
113
|
+
type: isDynamic ? 'dynamic' : 'static',
|
|
114
|
+
sourceFile: file
|
|
115
|
+
});
|
|
116
|
+
continue;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// App Router: app/page.tsx (root)
|
|
120
|
+
if (file.match(/^app\/page\.(tsx?|jsx?)$/)) {
|
|
121
|
+
routes.push({
|
|
122
|
+
path: '/',
|
|
123
|
+
type: 'static',
|
|
124
|
+
sourceFile: file
|
|
125
|
+
});
|
|
126
|
+
continue;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Pages Router: pages/**/*.tsx
|
|
130
|
+
const pagesMatch = file.match(/^pages\/(.+)\.(tsx?|jsx?)$/);
|
|
131
|
+
if (pagesMatch) {
|
|
132
|
+
const pagePath = pagesMatch[1];
|
|
133
|
+
|
|
134
|
+
// Skip special files
|
|
135
|
+
if (pagePath.startsWith('_') || pagePath.startsWith('api/')) {
|
|
136
|
+
continue;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
let path = '/' + pagePath;
|
|
140
|
+
// Handle index files
|
|
141
|
+
path = path.replace(/\/index$/, '');
|
|
142
|
+
if (path === '') path = '/';
|
|
143
|
+
|
|
144
|
+
const isDynamic = path.includes('[');
|
|
145
|
+
routes.push({
|
|
146
|
+
path,
|
|
147
|
+
type: isDynamic ? 'dynamic' : 'static',
|
|
148
|
+
sourceFile: file
|
|
149
|
+
});
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Deduplicate by path (prefer App Router)
|
|
154
|
+
const seen = new Map<string, RouteInfo>();
|
|
155
|
+
for (const route of routes) {
|
|
156
|
+
if (!seen.has(route.path)) {
|
|
157
|
+
seen.set(route.path, route);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
return Array.from(seen.values());
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Astro: file-based routing
|
|
166
|
+
*/
|
|
167
|
+
function discoverAstroRoutes(files: string[]): RouteInfo[] {
|
|
168
|
+
const routes: RouteInfo[] = [];
|
|
169
|
+
|
|
170
|
+
for (const file of files) {
|
|
171
|
+
// Match src/pages/**/*.astro or src/pages/**/*.md(x)
|
|
172
|
+
const match = file.match(/^src\/pages\/(.+)\.(astro|md|mdx)$/);
|
|
173
|
+
if (match) {
|
|
174
|
+
let path = '/' + match[1];
|
|
175
|
+
|
|
176
|
+
// Handle index files
|
|
177
|
+
path = path.replace(/\/index$/, '');
|
|
178
|
+
if (path === '') path = '/';
|
|
179
|
+
|
|
180
|
+
const isDynamic = path.includes('[');
|
|
181
|
+
routes.push({
|
|
182
|
+
path,
|
|
183
|
+
type: isDynamic ? 'dynamic' : 'static',
|
|
184
|
+
sourceFile: file
|
|
185
|
+
});
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
return routes;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
/**
|
|
193
|
+
* SvelteKit: file-based routing
|
|
194
|
+
*/
|
|
195
|
+
function discoverSvelteRoutes(files: string[]): RouteInfo[] {
|
|
196
|
+
const routes: RouteInfo[] = [];
|
|
197
|
+
|
|
198
|
+
for (const file of files) {
|
|
199
|
+
// Match src/routes/**/+page.svelte
|
|
200
|
+
const match = file.match(/^src\/routes\/(.*)\/?\+page\.svelte$/);
|
|
201
|
+
if (match) {
|
|
202
|
+
let path = '/' + (match[1] || '');
|
|
203
|
+
|
|
204
|
+
// Handle route groups (parentheses)
|
|
205
|
+
path = path.replace(/\/\([^)]+\)/g, '');
|
|
206
|
+
|
|
207
|
+
// Clean up double slashes
|
|
208
|
+
path = path.replace(/\/+/g, '/');
|
|
209
|
+
if (path === '') path = '/';
|
|
210
|
+
|
|
211
|
+
const isDynamic = path.includes('[');
|
|
212
|
+
routes.push({
|
|
213
|
+
path,
|
|
214
|
+
type: isDynamic ? 'dynamic' : 'static',
|
|
215
|
+
sourceFile: file
|
|
216
|
+
});
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
return routes;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
/**
|
|
224
|
+
* React Router: parse JSX/TSX for Route definitions
|
|
225
|
+
*/
|
|
226
|
+
async function discoverReactRouterRoutes(
|
|
227
|
+
files: string[],
|
|
228
|
+
fetchFile: (path: string) => Promise<string | null>
|
|
229
|
+
): Promise<RouteInfo[]> {
|
|
230
|
+
// Find likely router files
|
|
231
|
+
const routerFiles = files.filter(f =>
|
|
232
|
+
f.match(/routes?\.(tsx?|jsx?)$/) ||
|
|
233
|
+
f.includes('router') ||
|
|
234
|
+
f.endsWith('App.tsx') ||
|
|
235
|
+
f.endsWith('App.jsx') ||
|
|
236
|
+
f.includes('Router')
|
|
237
|
+
);
|
|
238
|
+
|
|
239
|
+
const routes: RouteInfo[] = [];
|
|
240
|
+
|
|
241
|
+
for (const file of routerFiles) {
|
|
242
|
+
const content = await fetchFile(file);
|
|
243
|
+
if (!content) continue;
|
|
244
|
+
|
|
245
|
+
// Match various route definition patterns:
|
|
246
|
+
// <Route path="/about" ...
|
|
247
|
+
// <Route path='/about' ...
|
|
248
|
+
// { path: "/about" ...
|
|
249
|
+
// { path: '/about' ...
|
|
250
|
+
// path="/about"
|
|
251
|
+
const patterns = [
|
|
252
|
+
/<Route[^>]*\spath=["']([^"']+)["']/g,
|
|
253
|
+
/{\s*path:\s*["']([^"']+)["']/g,
|
|
254
|
+
/createBrowserRouter\([^)]*path:\s*["']([^"']+)["']/g,
|
|
255
|
+
];
|
|
256
|
+
|
|
257
|
+
for (const pattern of patterns) {
|
|
258
|
+
let match;
|
|
259
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
260
|
+
const path = match[1];
|
|
261
|
+
|
|
262
|
+
// Skip catch-all and layout routes
|
|
263
|
+
if (path === '*' || path.includes('*')) continue;
|
|
264
|
+
|
|
265
|
+
routes.push({
|
|
266
|
+
path,
|
|
267
|
+
type: path.includes(':') ? 'dynamic' : 'static',
|
|
268
|
+
sourceFile: file
|
|
269
|
+
});
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// Deduplicate
|
|
275
|
+
const seen = new Set<string>();
|
|
276
|
+
return routes.filter(r => {
|
|
277
|
+
if (seen.has(r.path)) return false;
|
|
278
|
+
seen.add(r.path);
|
|
279
|
+
return true;
|
|
280
|
+
});
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
/**
|
|
284
|
+
* Vue Router: parse router configuration
|
|
285
|
+
*/
|
|
286
|
+
async function discoverVueRouterRoutes(
|
|
287
|
+
files: string[],
|
|
288
|
+
fetchFile: (path: string) => Promise<string | null>
|
|
289
|
+
): Promise<RouteInfo[]> {
|
|
290
|
+
// Find router configuration files
|
|
291
|
+
const routerFiles = files.filter(f =>
|
|
292
|
+
f.match(/router\/index\.(ts|js)$/) ||
|
|
293
|
+
f.match(/router\.(ts|js)$/) ||
|
|
294
|
+
f.includes('routes')
|
|
295
|
+
);
|
|
296
|
+
|
|
297
|
+
const routes: RouteInfo[] = [];
|
|
298
|
+
|
|
299
|
+
for (const file of routerFiles) {
|
|
300
|
+
const content = await fetchFile(file);
|
|
301
|
+
if (!content) continue;
|
|
302
|
+
|
|
303
|
+
// Match route definitions: { path: '/about' ...
|
|
304
|
+
const pathRegex = /path:\s*["']([^"']+)["']/g;
|
|
305
|
+
let match;
|
|
306
|
+
|
|
307
|
+
while ((match = pathRegex.exec(content)) !== null) {
|
|
308
|
+
const path = match[1];
|
|
309
|
+
|
|
310
|
+
// Skip catch-all routes
|
|
311
|
+
if (path.includes('*') || path === '/:pathMatch(.*)') continue;
|
|
312
|
+
|
|
313
|
+
routes.push({
|
|
314
|
+
path,
|
|
315
|
+
type: path.includes(':') ? 'dynamic' : 'static',
|
|
316
|
+
sourceFile: file
|
|
317
|
+
});
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
// Deduplicate
|
|
322
|
+
const seen = new Set<string>();
|
|
323
|
+
return routes.filter(r => {
|
|
324
|
+
if (seen.has(r.path)) return false;
|
|
325
|
+
seen.add(r.path);
|
|
326
|
+
return true;
|
|
327
|
+
});
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
/**
|
|
331
|
+
* Convert static routes to full URLs
|
|
332
|
+
*/
|
|
333
|
+
export function routesToUrls(
|
|
334
|
+
routes: RouteInfo[],
|
|
335
|
+
baseUrl: string,
|
|
336
|
+
maxUrls: number = 5
|
|
337
|
+
): string[] {
|
|
338
|
+
const base = new URL(baseUrl);
|
|
339
|
+
|
|
340
|
+
return routes
|
|
341
|
+
.filter(r => r.type === 'static')
|
|
342
|
+
.slice(0, maxUrls)
|
|
343
|
+
.map(r => {
|
|
344
|
+
const url = new URL(r.path, base);
|
|
345
|
+
return url.href;
|
|
346
|
+
});
|
|
347
|
+
}
|