webmcp-cli 1.2.2 → 1.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/analysis/form-to-tool-mapper.d.ts +61 -0
- package/dist/analysis/form-to-tool-mapper.js +360 -0
- package/dist/analysis/form-to-tool-mapper.js.map +1 -0
- package/dist/analysis/index.d.ts +84 -0
- package/dist/analysis/index.js +81 -0
- package/dist/analysis/index.js.map +1 -0
- package/dist/analysis/missing-tool-analyzer.d.ts +35 -0
- package/dist/analysis/missing-tool-analyzer.js +617 -0
- package/dist/analysis/missing-tool-analyzer.js.map +1 -0
- package/dist/audit/run-multi-page-audit.d.ts +34 -0
- package/dist/audit/run-multi-page-audit.js +233 -0
- package/dist/audit/run-multi-page-audit.js.map +1 -0
- package/dist/cli/commands/potential.d.ts +8 -0
- package/dist/cli/commands/potential.js +323 -0
- package/dist/cli/commands/potential.js.map +1 -0
- package/dist/cli/commands/report.d.ts +12 -0
- package/dist/cli/commands/report.js +89 -0
- package/dist/cli/commands/report.js.map +1 -0
- package/dist/cli/index.js +35 -0
- package/dist/cli/index.js.map +1 -1
- package/dist/config/defaults.d.ts +36 -0
- package/dist/config/defaults.js +33 -0
- package/dist/config/defaults.js.map +1 -0
- package/dist/config/index.d.ts +7 -0
- package/dist/config/index.js +7 -0
- package/dist/config/index.js.map +1 -0
- package/dist/config/loader.d.ts +22 -0
- package/dist/config/loader.js +91 -0
- package/dist/config/loader.js.map +1 -0
- package/dist/config/schema.d.ts +280 -0
- package/dist/config/schema.js +42 -0
- package/dist/config/schema.js.map +1 -0
- package/dist/core/types/audit.d.ts +1 -1
- package/dist/core/types/index.d.ts +1 -0
- package/dist/core/types/index.js +1 -0
- package/dist/core/types/index.js.map +1 -1
- package/dist/core/types/recon.d.ts +265 -0
- package/dist/core/types/recon.js +5 -0
- package/dist/core/types/recon.js.map +1 -0
- package/dist/core/types/rule.d.ts +1 -1
- package/dist/core/types/rule.js +7 -5
- package/dist/core/types/rule.js.map +1 -1
- package/dist/crawler/depth-crawler.d.ts +29 -0
- package/dist/crawler/depth-crawler.js +212 -0
- package/dist/crawler/depth-crawler.js.map +1 -0
- package/dist/crawler/index.d.ts +2 -0
- package/dist/crawler/index.js +3 -0
- package/dist/crawler/index.js.map +1 -0
- package/dist/crawler/link-extractor.d.ts +1 -0
- package/dist/crawler/link-extractor.js +49 -0
- package/dist/crawler/link-extractor.js.map +1 -0
- package/dist/generators/index.d.ts +10 -0
- package/dist/generators/index.js +8 -0
- package/dist/generators/index.js.map +1 -0
- package/dist/generators/report-html.d.ts +12 -0
- package/dist/generators/report-html.js +470 -0
- package/dist/generators/report-html.js.map +1 -0
- package/dist/generators/report-json.d.ts +95 -0
- package/dist/generators/report-json.js +144 -0
- package/dist/generators/report-json.js.map +1 -0
- package/dist/generators/report-manager.d.ts +31 -0
- package/dist/generators/report-manager.js +208 -0
- package/dist/generators/report-manager.js.map +1 -0
- package/dist/generators/tool-code-generator.d.ts +31 -0
- package/dist/generators/tool-code-generator.js +201 -0
- package/dist/generators/tool-code-generator.js.map +1 -0
- package/dist/potential/ai-recommender.d.ts +33 -0
- package/dist/potential/ai-recommender.js +414 -0
- package/dist/potential/ai-recommender.js.map +1 -0
- package/dist/potential/analyzer.d.ts +32 -0
- package/dist/potential/analyzer.js +383 -0
- package/dist/potential/analyzer.js.map +1 -0
- package/dist/potential/index.d.ts +3 -0
- package/dist/potential/index.js +4 -0
- package/dist/potential/index.js.map +1 -0
- package/dist/potential/prompts.d.ts +20 -0
- package/dist/potential/prompts.js +42 -0
- package/dist/potential/prompts.js.map +1 -0
- package/dist/potential/types.d.ts +40 -0
- package/dist/potential/types.js +2 -0
- package/dist/potential/types.js.map +1 -0
- package/dist/recon/index.d.ts +20 -0
- package/dist/recon/index.js +143 -0
- package/dist/recon/index.js.map +1 -0
- package/dist/recon/manifest.d.ts +16 -0
- package/dist/recon/manifest.js +108 -0
- package/dist/recon/manifest.js.map +1 -0
- package/dist/recon/meta-extractor.d.ts +11 -0
- package/dist/recon/meta-extractor.js +276 -0
- package/dist/recon/meta-extractor.js.map +1 -0
- package/dist/recon/robots.d.ts +16 -0
- package/dist/recon/robots.js +158 -0
- package/dist/recon/robots.js.map +1 -0
- package/dist/recon/route-discovery.d.ts +25 -0
- package/dist/recon/route-discovery.js +303 -0
- package/dist/recon/route-discovery.js.map +1 -0
- package/dist/recon/sitemap.d.ts +12 -0
- package/dist/recon/sitemap.js +177 -0
- package/dist/recon/sitemap.js.map +1 -0
- package/dist/rules/accessibility/AXE-001.d.ts +9 -0
- package/dist/rules/accessibility/AXE-001.js +109 -0
- package/dist/rules/accessibility/AXE-001.js.map +1 -0
- package/dist/rules/accessibility/AXE-002.d.ts +8 -0
- package/dist/rules/accessibility/AXE-002.js +85 -0
- package/dist/rules/accessibility/AXE-002.js.map +1 -0
- package/dist/rules/accessibility/AXE-003.d.ts +8 -0
- package/dist/rules/accessibility/AXE-003.js +94 -0
- package/dist/rules/accessibility/AXE-003.js.map +1 -0
- package/dist/rules/accessibility/AXE-004.d.ts +8 -0
- package/dist/rules/accessibility/AXE-004.js +101 -0
- package/dist/rules/accessibility/AXE-004.js.map +1 -0
- package/dist/rules/accessibility/AXE-005.d.ts +9 -0
- package/dist/rules/accessibility/AXE-005.js +89 -0
- package/dist/rules/accessibility/AXE-005.js.map +1 -0
- package/dist/rules/best-practices/BP-004.d.ts +9 -0
- package/dist/rules/best-practices/BP-004.js +96 -0
- package/dist/rules/best-practices/BP-004.js.map +1 -0
- package/dist/rules/best-practices/BP-005.d.ts +8 -0
- package/dist/rules/best-practices/BP-005.js +94 -0
- package/dist/rules/best-practices/BP-005.js.map +1 -0
- package/dist/rules/best-practices/BP-006.d.ts +8 -0
- package/dist/rules/best-practices/BP-006.js +80 -0
- package/dist/rules/best-practices/BP-006.js.map +1 -0
- package/dist/rules/best-practices/BP-007.d.ts +8 -0
- package/dist/rules/best-practices/BP-007.js +92 -0
- package/dist/rules/best-practices/BP-007.js.map +1 -0
- package/dist/rules/best-practices/BP-008.d.ts +12 -0
- package/dist/rules/best-practices/BP-008.js +86 -0
- package/dist/rules/best-practices/BP-008.js.map +1 -0
- package/dist/rules/best-practices/BP-009.d.ts +9 -0
- package/dist/rules/best-practices/BP-009.js +77 -0
- package/dist/rules/best-practices/BP-009.js.map +1 -0
- package/dist/rules/best-practices/BP-010.d.ts +8 -0
- package/dist/rules/best-practices/BP-010.js +85 -0
- package/dist/rules/best-practices/BP-010.js.map +1 -0
- package/dist/rules/coverage/COV-002.d.ts +8 -0
- package/dist/rules/coverage/COV-002.js +68 -0
- package/dist/rules/coverage/COV-002.js.map +1 -0
- package/dist/rules/coverage/COV-003.d.ts +8 -0
- package/dist/rules/coverage/COV-003.js +68 -0
- package/dist/rules/coverage/COV-003.js.map +1 -0
- package/dist/rules/coverage/COV-004.d.ts +8 -0
- package/dist/rules/coverage/COV-004.js +89 -0
- package/dist/rules/coverage/COV-004.js.map +1 -0
- package/dist/rules/coverage/COV-005.d.ts +8 -0
- package/dist/rules/coverage/COV-005.js +67 -0
- package/dist/rules/coverage/COV-005.js.map +1 -0
- package/dist/rules/coverage/COV-006.d.ts +9 -0
- package/dist/rules/coverage/COV-006.js +76 -0
- package/dist/rules/coverage/COV-006.js.map +1 -0
- package/dist/rules/coverage/COV-007.d.ts +8 -0
- package/dist/rules/coverage/COV-007.js +67 -0
- package/dist/rules/coverage/COV-007.js.map +1 -0
- package/dist/rules/coverage/COV-008.d.ts +9 -0
- package/dist/rules/coverage/COV-008.js +87 -0
- package/dist/rules/coverage/COV-008.js.map +1 -0
- package/dist/rules/coverage/COV-009.d.ts +8 -0
- package/dist/rules/coverage/COV-009.js +73 -0
- package/dist/rules/coverage/COV-009.js.map +1 -0
- package/dist/rules/coverage/COV-010.d.ts +9 -0
- package/dist/rules/coverage/COV-010.js +82 -0
- package/dist/rules/coverage/COV-010.js.map +1 -0
- package/dist/rules/description/DESC-001.d.ts +9 -0
- package/dist/rules/description/DESC-001.js +88 -0
- package/dist/rules/description/DESC-001.js.map +1 -0
- package/dist/rules/description/DESC-002.d.ts +10 -0
- package/dist/rules/description/DESC-002.js +99 -0
- package/dist/rules/description/DESC-002.js.map +1 -0
- package/dist/rules/description/DESC-006.d.ts +9 -0
- package/dist/rules/description/DESC-006.js +78 -0
- package/dist/rules/description/DESC-006.js.map +1 -0
- package/dist/rules/description/DESC-007.d.ts +9 -0
- package/dist/rules/description/DESC-007.js +70 -0
- package/dist/rules/description/DESC-007.js.map +1 -0
- package/dist/rules/description/DESC-008.d.ts +9 -0
- package/dist/rules/description/DESC-008.js +70 -0
- package/dist/rules/description/DESC-008.js.map +1 -0
- package/dist/rules/description/DESC-009.d.ts +8 -0
- package/dist/rules/description/DESC-009.js +55 -0
- package/dist/rules/description/DESC-009.js.map +1 -0
- package/dist/rules/description/DESC-010.d.ts +9 -0
- package/dist/rules/description/DESC-010.js +92 -0
- package/dist/rules/description/DESC-010.js.map +1 -0
- package/dist/rules/description/DESC-011.d.ts +9 -0
- package/dist/rules/description/DESC-011.js +81 -0
- package/dist/rules/description/DESC-011.js.map +1 -0
- package/dist/rules/description/DESC-012.d.ts +9 -0
- package/dist/rules/description/DESC-012.js +98 -0
- package/dist/rules/description/DESC-012.js.map +1 -0
- package/dist/rules/implementation/IMP-002.d.ts +9 -0
- package/dist/rules/implementation/IMP-002.js +59 -0
- package/dist/rules/implementation/IMP-002.js.map +1 -0
- package/dist/rules/implementation/IMP-006.d.ts +9 -0
- package/dist/rules/implementation/IMP-006.js +48 -0
- package/dist/rules/implementation/IMP-006.js.map +1 -0
- package/dist/rules/implementation/IMP-008.d.ts +9 -0
- package/dist/rules/implementation/IMP-008.js +46 -0
- package/dist/rules/implementation/IMP-008.js.map +1 -0
- package/dist/rules/implementation/IMP-009.d.ts +9 -0
- package/dist/rules/implementation/IMP-009.js +48 -0
- package/dist/rules/implementation/IMP-009.js.map +1 -0
- package/dist/rules/implementation/IMP-010.d.ts +9 -0
- package/dist/rules/implementation/IMP-010.js +66 -0
- package/dist/rules/implementation/IMP-010.js.map +1 -0
- package/dist/rules/implementation/IMP-011.d.ts +9 -0
- package/dist/rules/implementation/IMP-011.js +82 -0
- package/dist/rules/implementation/IMP-011.js.map +1 -0
- package/dist/rules/implementation/IMP-012.d.ts +9 -0
- package/dist/rules/implementation/IMP-012.js +88 -0
- package/dist/rules/implementation/IMP-012.js.map +1 -0
- package/dist/rules/implementation/IMP-014.d.ts +9 -0
- package/dist/rules/implementation/IMP-014.js +58 -0
- package/dist/rules/implementation/IMP-014.js.map +1 -0
- package/dist/rules/implementation/IMP-015.d.ts +9 -0
- package/dist/rules/implementation/IMP-015.js +64 -0
- package/dist/rules/implementation/IMP-015.js.map +1 -0
- package/dist/rules/implementation/IMP-016.d.ts +9 -0
- package/dist/rules/implementation/IMP-016.js +52 -0
- package/dist/rules/implementation/IMP-016.js.map +1 -0
- package/dist/rules/implementation/IMP-017.d.ts +8 -0
- package/dist/rules/implementation/IMP-017.js +51 -0
- package/dist/rules/implementation/IMP-017.js.map +1 -0
- package/dist/rules/implementation/IMP-018.d.ts +8 -0
- package/dist/rules/implementation/IMP-018.js +52 -0
- package/dist/rules/implementation/IMP-018.js.map +1 -0
- package/dist/rules/implementation/IMP-019.d.ts +8 -0
- package/dist/rules/implementation/IMP-019.js +53 -0
- package/dist/rules/implementation/IMP-019.js.map +1 -0
- package/dist/rules/implementation/IMP-020.d.ts +9 -0
- package/dist/rules/implementation/IMP-020.js +62 -0
- package/dist/rules/implementation/IMP-020.js.map +1 -0
- package/dist/rules/implementation/IMP-021.d.ts +8 -0
- package/dist/rules/implementation/IMP-021.js +64 -0
- package/dist/rules/implementation/IMP-021.js.map +1 -0
- package/dist/rules/implementation/IMP-022.d.ts +8 -0
- package/dist/rules/implementation/IMP-022.js +70 -0
- package/dist/rules/implementation/IMP-022.js.map +1 -0
- package/dist/rules/index.d.ts +73 -6
- package/dist/rules/index.js +141 -6
- package/dist/rules/index.js.map +1 -1
- package/dist/rules/schema/SCHEMA-004.d.ts +9 -0
- package/dist/rules/schema/SCHEMA-004.js +57 -0
- package/dist/rules/schema/SCHEMA-004.js.map +1 -0
- package/dist/rules/schema/SCHEMA-005.d.ts +9 -0
- package/dist/rules/schema/SCHEMA-005.js +61 -0
- package/dist/rules/schema/SCHEMA-005.js.map +1 -0
- package/dist/rules/schema/SCHEMA-006.d.ts +10 -0
- package/dist/rules/schema/SCHEMA-006.js +85 -0
- package/dist/rules/schema/SCHEMA-006.js.map +1 -0
- package/dist/rules/schema/SCHEMA-007.d.ts +9 -0
- package/dist/rules/schema/SCHEMA-007.js +73 -0
- package/dist/rules/schema/SCHEMA-007.js.map +1 -0
- package/dist/rules/schema/SCHEMA-008.d.ts +9 -0
- package/dist/rules/schema/SCHEMA-008.js +70 -0
- package/dist/rules/schema/SCHEMA-008.js.map +1 -0
- package/dist/rules/schema/SCHEMA-009.d.ts +10 -0
- package/dist/rules/schema/SCHEMA-009.js +80 -0
- package/dist/rules/schema/SCHEMA-009.js.map +1 -0
- package/dist/rules/schema/SCHEMA-010.d.ts +9 -0
- package/dist/rules/schema/SCHEMA-010.js +96 -0
- package/dist/rules/schema/SCHEMA-010.js.map +1 -0
- package/dist/rules/schema/SCHEMA-012.d.ts +9 -0
- package/dist/rules/schema/SCHEMA-012.js +65 -0
- package/dist/rules/schema/SCHEMA-012.js.map +1 -0
- package/dist/rules/security/SEC-002.d.ts +8 -0
- package/dist/rules/security/SEC-002.js +81 -0
- package/dist/rules/security/SEC-002.js.map +1 -0
- package/dist/rules/security/SEC-003.d.ts +8 -0
- package/dist/rules/security/SEC-003.js +85 -0
- package/dist/rules/security/SEC-003.js.map +1 -0
- package/dist/rules/security/SEC-004.d.ts +9 -0
- package/dist/rules/security/SEC-004.js +87 -0
- package/dist/rules/security/SEC-004.js.map +1 -0
- package/dist/rules/security/SEC-005.d.ts +8 -0
- package/dist/rules/security/SEC-005.js +87 -0
- package/dist/rules/security/SEC-005.js.map +1 -0
- package/dist/rules/security/SEC-006.d.ts +10 -0
- package/dist/rules/security/SEC-006.js +108 -0
- package/dist/rules/security/SEC-006.js.map +1 -0
- package/dist/rules/security/SEC-007.d.ts +9 -0
- package/dist/rules/security/SEC-007.js +108 -0
- package/dist/rules/security/SEC-007.js.map +1 -0
- package/dist/rules/security/SEC-008.d.ts +8 -0
- package/dist/rules/security/SEC-008.js +109 -0
- package/dist/rules/security/SEC-008.js.map +1 -0
- package/dist/rules/security/SEC-009.d.ts +9 -0
- package/dist/rules/security/SEC-009.js +93 -0
- package/dist/rules/security/SEC-009.js.map +1 -0
- package/dist/rules/security/SEC-010.d.ts +8 -0
- package/dist/rules/security/SEC-010.js +78 -0
- package/dist/rules/security/SEC-010.js.map +1 -0
- package/dist/rules/security/SEC-011.d.ts +8 -0
- package/dist/rules/security/SEC-011.js +93 -0
- package/dist/rules/security/SEC-011.js.map +1 -0
- package/dist/rules/security/SEC-012.d.ts +8 -0
- package/dist/rules/security/SEC-012.js +79 -0
- package/dist/rules/security/SEC-012.js.map +1 -0
- package/dist/rules/security/SEC-013.d.ts +9 -0
- package/dist/rules/security/SEC-013.js +107 -0
- package/dist/rules/security/SEC-013.js.map +1 -0
- package/dist/scoring/calculator.js +1 -0
- package/dist/scoring/calculator.js.map +1 -1
- package/dist/ui/ink/components/AIRecommendationCard.d.ts +11 -0
- package/dist/ui/ink/components/AIRecommendationCard.js +23 -0
- package/dist/ui/ink/components/AIRecommendationCard.js.map +1 -0
- package/dist/ui/ink/components/OpportunityList.d.ts +10 -0
- package/dist/ui/ink/components/OpportunityList.js +48 -0
- package/dist/ui/ink/components/OpportunityList.js.map +1 -0
- package/dist/ui/ink/components/PotentialPageCard.d.ts +13 -0
- package/dist/ui/ink/components/PotentialPageCard.js +43 -0
- package/dist/ui/ink/components/PotentialPageCard.js.map +1 -0
- package/dist/ui/ink/components/PotentialProgress.d.ts +16 -0
- package/dist/ui/ink/components/PotentialProgress.js +44 -0
- package/dist/ui/ink/components/PotentialProgress.js.map +1 -0
- package/dist/ui/ink/components/PotentialSummary.d.ts +10 -0
- package/dist/ui/ink/components/PotentialSummary.js +86 -0
- package/dist/ui/ink/components/PotentialSummary.js.map +1 -0
- package/dist/ui/ink/components/SuggestionCard.d.ts +34 -0
- package/dist/ui/ink/components/SuggestionCard.js +36 -0
- package/dist/ui/ink/components/SuggestionCard.js.map +1 -0
- package/dist/ui/ink/components/views/MultiPageCrawlView.d.ts +21 -0
- package/dist/ui/ink/components/views/MultiPageCrawlView.js +55 -0
- package/dist/ui/ink/components/views/MultiPageCrawlView.js.map +1 -0
- package/dist/ui/ink/components/views/PotentialView.d.ts +18 -0
- package/dist/ui/ink/components/views/PotentialView.js +74 -0
- package/dist/ui/ink/components/views/PotentialView.js.map +1 -0
- package/dist/ui/ink/components/views/ReconView.d.ts +22 -0
- package/dist/ui/ink/components/views/ReconView.js +30 -0
- package/dist/ui/ink/components/views/ReconView.js.map +1 -0
- package/package.json +2 -1
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Robots.txt Parser
|
|
3
|
+
*
|
|
4
|
+
* Fetches and parses robots.txt. Extracts allowed/disallowed paths,
|
|
5
|
+
* crawl-delay, and sitemap references. Handles missing robots.txt gracefully.
|
|
6
|
+
*/
|
|
7
|
+
/** Fetch timeout (ms) */
|
|
8
|
+
const FETCH_TIMEOUT_MS = 10_000;
|
|
9
|
+
/**
|
|
10
|
+
* Fetch with timeout using AbortController
|
|
11
|
+
*/
|
|
12
|
+
async function fetchWithTimeout(url, timeoutMs = FETCH_TIMEOUT_MS) {
|
|
13
|
+
const controller = new AbortController();
|
|
14
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
15
|
+
try {
|
|
16
|
+
return await fetch(url, {
|
|
17
|
+
signal: controller.signal,
|
|
18
|
+
headers: { 'User-Agent': 'WebMCP-CLI/1.0 (robots-parser)' },
|
|
19
|
+
redirect: 'follow',
|
|
20
|
+
});
|
|
21
|
+
}
|
|
22
|
+
finally {
|
|
23
|
+
clearTimeout(timer);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Parse robots.txt content into structured directives
|
|
28
|
+
*/
|
|
29
|
+
function parseRobotsTxt(raw) {
|
|
30
|
+
const directives = [];
|
|
31
|
+
const sitemapUrls = [];
|
|
32
|
+
let current = null;
|
|
33
|
+
for (const rawLine of raw.split('\n')) {
|
|
34
|
+
// Strip comments and whitespace
|
|
35
|
+
const commentIdx = rawLine.indexOf('#');
|
|
36
|
+
const line = (commentIdx !== -1 ? rawLine.substring(0, commentIdx) : rawLine).trim();
|
|
37
|
+
if (!line)
|
|
38
|
+
continue;
|
|
39
|
+
const colonIdx = line.indexOf(':');
|
|
40
|
+
if (colonIdx === -1)
|
|
41
|
+
continue;
|
|
42
|
+
const field = line.substring(0, colonIdx).trim().toLowerCase();
|
|
43
|
+
const value = line.substring(colonIdx + 1).trim();
|
|
44
|
+
if (field === 'user-agent') {
|
|
45
|
+
// Start a new directive block
|
|
46
|
+
current = {
|
|
47
|
+
userAgent: value,
|
|
48
|
+
allow: [],
|
|
49
|
+
disallow: [],
|
|
50
|
+
};
|
|
51
|
+
directives.push(current);
|
|
52
|
+
continue;
|
|
53
|
+
}
|
|
54
|
+
if (field === 'sitemap') {
|
|
55
|
+
// Sitemap directives are global, not per user-agent
|
|
56
|
+
if (value)
|
|
57
|
+
sitemapUrls.push(value);
|
|
58
|
+
continue;
|
|
59
|
+
}
|
|
60
|
+
if (!current)
|
|
61
|
+
continue;
|
|
62
|
+
switch (field) {
|
|
63
|
+
case 'allow':
|
|
64
|
+
if (value)
|
|
65
|
+
current.allow.push(value);
|
|
66
|
+
break;
|
|
67
|
+
case 'disallow':
|
|
68
|
+
if (value)
|
|
69
|
+
current.disallow.push(value);
|
|
70
|
+
break;
|
|
71
|
+
case 'crawl-delay': {
|
|
72
|
+
const delay = parseFloat(value);
|
|
73
|
+
if (!Number.isNaN(delay) && delay >= 0) {
|
|
74
|
+
current.crawlDelay = delay;
|
|
75
|
+
}
|
|
76
|
+
break;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
return { directives, sitemapUrls };
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Check whether a path is allowed by the robots.txt directives.
|
|
84
|
+
* Uses wildcard user-agent (*) rules if no specific match.
|
|
85
|
+
*/
|
|
86
|
+
export function isPathAllowed(path, directives, userAgent = '*') {
|
|
87
|
+
// Find matching directive (specific UA first, then wildcard)
|
|
88
|
+
const specific = directives.find((d) => d.userAgent.toLowerCase() === userAgent.toLowerCase());
|
|
89
|
+
const wildcard = directives.find((d) => d.userAgent === '*');
|
|
90
|
+
const directive = specific ?? wildcard;
|
|
91
|
+
if (!directive)
|
|
92
|
+
return true; // No rules = allowed
|
|
93
|
+
// Check disallow rules — longest match wins
|
|
94
|
+
let longestDisallow = 0;
|
|
95
|
+
let longestAllow = 0;
|
|
96
|
+
for (const rule of directive.disallow) {
|
|
97
|
+
if (path.startsWith(rule) && rule.length > longestDisallow) {
|
|
98
|
+
longestDisallow = rule.length;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
for (const rule of directive.allow) {
|
|
102
|
+
if (path.startsWith(rule) && rule.length > longestAllow) {
|
|
103
|
+
longestAllow = rule.length;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
// If allow is longer or equal, path is allowed
|
|
107
|
+
if (longestAllow >= longestDisallow)
|
|
108
|
+
return true;
|
|
109
|
+
return longestDisallow === 0;
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Fetch and parse robots.txt for a site
|
|
113
|
+
*/
|
|
114
|
+
export async function parseRobots(baseUrl) {
|
|
115
|
+
const url = new URL('/robots.txt', baseUrl).href;
|
|
116
|
+
try {
|
|
117
|
+
const response = await fetchWithTimeout(url);
|
|
118
|
+
if (!response.ok) {
|
|
119
|
+
return {
|
|
120
|
+
found: false,
|
|
121
|
+
directives: [],
|
|
122
|
+
sitemapUrls: [],
|
|
123
|
+
raw: '',
|
|
124
|
+
error: `HTTP ${response.status}`,
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
const contentType = response.headers.get('content-type') ?? '';
|
|
128
|
+
// Robots.txt should be text/plain; skip if HTML (common 404 page)
|
|
129
|
+
if (contentType.includes('text/html')) {
|
|
130
|
+
return {
|
|
131
|
+
found: false,
|
|
132
|
+
directives: [],
|
|
133
|
+
sitemapUrls: [],
|
|
134
|
+
raw: '',
|
|
135
|
+
error: 'Response was HTML, not robots.txt',
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
const raw = await response.text();
|
|
139
|
+
const { directives, sitemapUrls } = parseRobotsTxt(raw);
|
|
140
|
+
return {
|
|
141
|
+
found: true,
|
|
142
|
+
directives,
|
|
143
|
+
sitemapUrls,
|
|
144
|
+
raw,
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
catch (error) {
|
|
148
|
+
const message = error instanceof Error ? error.message : 'Unknown error';
|
|
149
|
+
return {
|
|
150
|
+
found: false,
|
|
151
|
+
directives: [],
|
|
152
|
+
sitemapUrls: [],
|
|
153
|
+
raw: '',
|
|
154
|
+
error: message,
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
//# sourceMappingURL=robots.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"robots.js","sourceRoot":"","sources":["../../src/recon/robots.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH,yBAAyB;AACzB,MAAM,gBAAgB,GAAG,MAAM,CAAC;AAEhC;;GAEG;AACH,KAAK,UAAU,gBAAgB,CAC7B,GAAW,EACX,YAAoB,gBAAgB;IAEpC,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;IACzC,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,SAAS,CAAC,CAAC;IAC9D,IAAI,CAAC;QACH,OAAO,MAAM,KAAK,CAAC,GAAG,EAAE;YACtB,MAAM,EAAE,UAAU,CAAC,MAAM;YACzB,OAAO,EAAE,EAAE,YAAY,EAAE,gCAAgC,EAAE;YAC3D,QAAQ,EAAE,QAAQ;SACnB,CAAC,CAAC;IACL,CAAC;YAAS,CAAC;QACT,YAAY,CAAC,KAAK,CAAC,CAAC;IACtB,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CACrB,GAAW;IAEX,MAAM,UAAU,GAAsB,EAAE,CAAC;IACzC,MAAM,WAAW,GAAa,EAAE,CAAC;IAEjC,IAAI,OAAO,GAA2B,IAAI,CAAC;IAE3C,KAAK,MAAM,OAAO,IAAI,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;QACtC,gCAAgC;QAChC,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACxC,MAAM,IAAI,GAAG,CAAC,UAAU,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;QACrF,IAAI,CAAC,IAAI;YAAE,SAAS;QAEpB,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACnC,IAAI,QAAQ,KAAK,CAAC,CAAC;YAAE,SAAS;QAE9B,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAC/D,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAElD,IAAI,KAAK,KAAK,YAAY,EAAE,CAAC;YAC3B,8BAA8B;YAC9B,OAAO,GAAG;gBACR,SAAS,EAAE,KAAK;gBAChB,KAAK,EAAE,EAAE;gBACT,QAAQ,EAAE,EAAE;aACb,CAAC;YACF,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACzB,SAAS;QACX,CAAC;QAED,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;YACxB,oDAAoD;YACpD,IAAI,KAAK;gBAAE,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACnC,SAAS;QACX,CAAC;QAED,IAAI,CAAC,OAAO;YAAE,SAAS;QAEvB,QAAQ,KAAK,EAAE,CAAC;YACd,KAAK,OAAO;gBACV,IAAI,KAAK;oBAAE,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBACrC,MAAM;YACR,KAAK,UAAU;gBACb,IAAI,KAAK;oBAAE,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBACxC,MAAM;YACR,KAAK,aAAa,CAAC,CAAC,CAAC;gBACnB,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC;gBAChC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,KAAK,IAAI,CAAC,EAAE,CAAC;oBACvC,OAAO,CAAC,UAAU,GAAG,KAAK,CAAC;gBAC7B,CAAC;gBACD,MAAM;YACR,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,EAAE,UAAU,EAAE,WAAW,EAAE,CAAC;AACrC,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,aAAa,CAC3B,IAAY,EACZ,UAA6B,EAC7B,YAAoB,GAAG;IAEvB,6DAA6D;IAC7D,MAAM,QAAQ,GAAG,UAAU,CAAC,IAAI,CAC9B,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,WAAW,EAAE,KAAK,SAAS,CAAC,WAAW,EAAE,CAC7D,CAAC;IACF,MAAM,QAAQ,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,GAAG,CAAC,CAAC;IAC7D,MAAM,SAAS,GAAG,QAAQ,IAAI,QAAQ,CAAC;IAEvC,IAAI,CAAC,SAAS;QAAE,OAAO,IAAI,CAAC,CAAC,qBAAqB;IAElD,4CAA4C;IAC5C,IAAI,eAAe,GAAG,CAAC,CAAC;IACxB,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,KAAK,MAAM,IAAI,IAAI,SAAS,CAAC,QAAQ,EAAE,CAAC;QACtC,IAAI,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,eAAe,EAAE,CAAC;YAC3D,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC;QAChC,CAAC;IACH,CAAC;IAED,KAAK,MAAM,IAAI,IAAI,SAAS,CAAC,KAAK,EAAE,CAAC;QACnC,IAAI,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,YAAY,EAAE,CAAC;YACxD,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC;QAC7B,CAAC;IACH,CAAC;IAED,+CAA+C;IAC/C,IAAI,YAAY,IAAI,eAAe;QAAE,OAAO,IAAI,CAAC;IACjD,OAAO,eAAe,KAAK,CAAC,CAAC;AAC/B,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,OAAe;IAC/C,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;IAEjD,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,gBAAgB,CAAC,GAAG,CAAC,CAAC;QAE7C,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,OAAO;gBACL,KAAK,EAAE,KAAK;gBACZ,UAAU,EAAE,EAAE;gBACd,WAAW,EAAE,EAAE;gBACf,GAAG,EAAE,EAAE;gBACP,KAAK,EAAE,QAAQ,QAAQ,CAAC,MAAM,EAAE;aACjC,CAAC;QACJ,CAAC;QAED,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;QAC/D,kEAAkE;QAClE,IAAI,WAAW,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;YACtC,OAAO;gBACL,KAAK,EAAE,KAAK;gBACZ,UAAU,EAAE,EAAE;gBACd,WAAW,EAAE,EAAE;gBACf,GAAG,EAAE,EAAE;gBACP,KAAK,EAAE,mCAAmC;aAC3C,CAAC;QACJ,CAAC;QAED,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QAClC,MAAM,EAAE,UAAU,EAAE,WAAW,EAAE,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC;QAExD,OAAO;YACL,KAAK,EAAE,IAAI;YACX,UAAU;YACV,WAAW;YACX,GAAG;SACJ,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;QACzE,OAAO;YACL,KAAK,EAAE,KAAK;YACZ,UAAU,EAAE,EAAE;YACd,WAAW,EAAE,EAAE;YACf,GAAG,EAAE,EAAE;YACP,KAAK,EAAE,OAAO;SACf,CAAC;IACJ,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Route Discovery
|
|
3
|
+
*
|
|
4
|
+
* Builds a route graph from all recon sources: sitemap, HTML links,
|
|
5
|
+
* form actions. Deduplicates and assigns preliminary priorities using
|
|
6
|
+
* the scoring algorithm from the technical plan.
|
|
7
|
+
*/
|
|
8
|
+
import type { Route, SitemapEntry, RobotsDirective } from '../core/types/recon.js';
|
|
9
|
+
export interface RouteDiscoveryInput {
|
|
10
|
+
/** Base URL of the site */
|
|
11
|
+
baseUrl: string;
|
|
12
|
+
/** Root page HTML for link extraction */
|
|
13
|
+
rootHtml: string;
|
|
14
|
+
/** Sitemap entries (if any) */
|
|
15
|
+
sitemapEntries: SitemapEntry[];
|
|
16
|
+
/** Robots.txt directives for filtering */
|
|
17
|
+
robotsDirectives: RobotsDirective[];
|
|
18
|
+
/** Whether to respect robots.txt disallow rules */
|
|
19
|
+
respectRobotsTxt: boolean;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Discover all routes from sitemap, HTML, and form actions.
|
|
23
|
+
* Deduplicates, classifies, scores, and sorts by priority.
|
|
24
|
+
*/
|
|
25
|
+
export declare function discoverRoutes(input: RouteDiscoveryInput): Route[];
|
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Route Discovery
|
|
3
|
+
*
|
|
4
|
+
* Builds a route graph from all recon sources: sitemap, HTML links,
|
|
5
|
+
* form actions. Deduplicates and assigns preliminary priorities using
|
|
6
|
+
* the scoring algorithm from the technical plan.
|
|
7
|
+
*/
|
|
8
|
+
import * as cheerio from 'cheerio';
|
|
9
|
+
import { isPathAllowed } from './robots.js';
|
|
10
|
+
/**
|
|
11
|
+
* Page type heuristics based on URL patterns
|
|
12
|
+
*/
|
|
13
|
+
const PAGE_TYPE_PATTERNS = [
|
|
14
|
+
{ pattern: /\/(search|find|browse|discover)\b/i, type: 'search' },
|
|
15
|
+
{ pattern: /[?&]q=/i, type: 'search' },
|
|
16
|
+
{ pattern: /\/(results|listings|products|catalog)\b/i, type: 'listing-results' },
|
|
17
|
+
{ pattern: /\/(product|item|detail)\//i, type: 'detail-page' },
|
|
18
|
+
{ pattern: /\/(cart|basket)\b/i, type: 'checkout-cart' },
|
|
19
|
+
{ pattern: /\/(checkout|pay|payment|purchase)\b/i, type: 'checkout-payment' },
|
|
20
|
+
{ pattern: /\/(account|profile|my-?account|user)\b/i, type: 'account-management' },
|
|
21
|
+
{ pattern: /\/(settings|preferences|config)\b/i, type: 'settings' },
|
|
22
|
+
{ pattern: /\/(login|signin|sign-in|auth|register|signup|sign-up)\b/i, type: 'authentication' },
|
|
23
|
+
{ pattern: /\/(dashboard|admin|overview)\b/i, type: 'dashboard' },
|
|
24
|
+
{ pattern: /\/(contact|contact-us|reach-us)\b/i, type: 'contact' },
|
|
25
|
+
{ pattern: /\/(help|support|faq|docs|documentation|knowledge-?base)\b/i, type: 'help-support' },
|
|
26
|
+
{ pattern: /\/(blog|article|post|news|press)\b/i, type: 'content-article' },
|
|
27
|
+
{ pattern: /\/(about|team|careers|company)\b/i, type: 'content-article' },
|
|
28
|
+
];
|
|
29
|
+
/**
|
|
30
|
+
* Page type scores for priority calculation
|
|
31
|
+
*/
|
|
32
|
+
const PAGE_TYPE_SCORES = {
|
|
33
|
+
search: 25,
|
|
34
|
+
'checkout-payment': 25,
|
|
35
|
+
'checkout-cart': 20,
|
|
36
|
+
'listing-results': 20,
|
|
37
|
+
homepage: 20,
|
|
38
|
+
'detail-page': 15,
|
|
39
|
+
'account-management': 15,
|
|
40
|
+
'form-submission': 15,
|
|
41
|
+
settings: 10,
|
|
42
|
+
authentication: 10,
|
|
43
|
+
dashboard: 10,
|
|
44
|
+
contact: 10,
|
|
45
|
+
'help-support': 5,
|
|
46
|
+
'content-article': 3,
|
|
47
|
+
unknown: 10,
|
|
48
|
+
};
|
|
49
|
+
/**
|
|
50
|
+
* Classify a URL into a page type based on URL patterns
|
|
51
|
+
*/
|
|
52
|
+
function classifyPageType(url) {
|
|
53
|
+
const pathname = new URL(url).pathname;
|
|
54
|
+
// Root path = homepage
|
|
55
|
+
if (pathname === '/' || pathname === '')
|
|
56
|
+
return 'homepage';
|
|
57
|
+
for (const { pattern, type } of PAGE_TYPE_PATTERNS) {
|
|
58
|
+
if (pattern.test(pathname))
|
|
59
|
+
return type;
|
|
60
|
+
}
|
|
61
|
+
return 'unknown';
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Calculate priority score for a route (higher = more important)
|
|
65
|
+
*/
|
|
66
|
+
function calculatePriorityScore(route) {
|
|
67
|
+
let score = 0;
|
|
68
|
+
// Forms are the #1 signal for agentic value
|
|
69
|
+
if (route.hasForm)
|
|
70
|
+
score += 30;
|
|
71
|
+
// Page type scores
|
|
72
|
+
score += PAGE_TYPE_SCORES[route.estimatedPageType] ?? 0;
|
|
73
|
+
// Depth penalty (deeper = less important)
|
|
74
|
+
score -= route.depth * 3;
|
|
75
|
+
// Interactive elements bonus
|
|
76
|
+
if (route.hasInteractiveElements)
|
|
77
|
+
score += 10;
|
|
78
|
+
return Math.max(0, score);
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Convert priority score to priority label
|
|
82
|
+
*/
|
|
83
|
+
function scoreToPriority(score) {
|
|
84
|
+
if (score >= 40)
|
|
85
|
+
return 'critical';
|
|
86
|
+
if (score >= 25)
|
|
87
|
+
return 'high';
|
|
88
|
+
if (score >= 15)
|
|
89
|
+
return 'medium';
|
|
90
|
+
if (score >= 5)
|
|
91
|
+
return 'low';
|
|
92
|
+
return 'skip';
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Check if a URL belongs to the same origin
|
|
96
|
+
*/
|
|
97
|
+
function isSameOrigin(url, baseOrigin) {
|
|
98
|
+
try {
|
|
99
|
+
return new URL(url).origin === baseOrigin;
|
|
100
|
+
}
|
|
101
|
+
catch {
|
|
102
|
+
return false;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Normalize a URL for deduplication (strip trailing slash, hash, sort params)
|
|
107
|
+
*/
|
|
108
|
+
function normalizeUrl(url) {
|
|
109
|
+
try {
|
|
110
|
+
const parsed = new URL(url);
|
|
111
|
+
// Remove hash
|
|
112
|
+
parsed.hash = '';
|
|
113
|
+
// Sort search params for consistent comparison
|
|
114
|
+
parsed.searchParams.sort();
|
|
115
|
+
// Remove trailing slash (but keep root /)
|
|
116
|
+
let pathname = parsed.pathname;
|
|
117
|
+
if (pathname.length > 1 && pathname.endsWith('/')) {
|
|
118
|
+
pathname = pathname.slice(0, -1);
|
|
119
|
+
}
|
|
120
|
+
parsed.pathname = pathname;
|
|
121
|
+
return parsed.href;
|
|
122
|
+
}
|
|
123
|
+
catch {
|
|
124
|
+
return url;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Extract links from HTML that are same-origin
|
|
129
|
+
*/
|
|
130
|
+
function extractHtmlLinks(html, baseUrl) {
|
|
131
|
+
const $ = cheerio.load(html);
|
|
132
|
+
const baseOrigin = new URL(baseUrl).origin;
|
|
133
|
+
const links = [];
|
|
134
|
+
// Navigation links (<a> in nav, header)
|
|
135
|
+
$('nav a[href], header a[href]').each((_i, el) => {
|
|
136
|
+
const href = $(el).attr('href');
|
|
137
|
+
if (!href)
|
|
138
|
+
return;
|
|
139
|
+
try {
|
|
140
|
+
const resolved = new URL(href, baseUrl).href;
|
|
141
|
+
if (isSameOrigin(resolved, baseOrigin)) {
|
|
142
|
+
links.push({ url: resolved, source: 'navigation', hasForm: false });
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
catch {
|
|
146
|
+
// Invalid URL — skip
|
|
147
|
+
}
|
|
148
|
+
});
|
|
149
|
+
// All other links
|
|
150
|
+
$('a[href]').each((_i, el) => {
|
|
151
|
+
const href = $(el).attr('href');
|
|
152
|
+
if (!href)
|
|
153
|
+
return;
|
|
154
|
+
// Skip anchor-only, javascript:, mailto:, tel:
|
|
155
|
+
if (href.startsWith('#') || href.startsWith('javascript:') ||
|
|
156
|
+
href.startsWith('mailto:') || href.startsWith('tel:'))
|
|
157
|
+
return;
|
|
158
|
+
try {
|
|
159
|
+
const resolved = new URL(href, baseUrl).href;
|
|
160
|
+
if (isSameOrigin(resolved, baseOrigin)) {
|
|
161
|
+
links.push({ url: resolved, source: 'link', hasForm: false });
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
catch {
|
|
165
|
+
// Invalid URL — skip
|
|
166
|
+
}
|
|
167
|
+
});
|
|
168
|
+
// Form actions
|
|
169
|
+
$('form[action]').each((_i, el) => {
|
|
170
|
+
const action = $(el).attr('action');
|
|
171
|
+
if (!action)
|
|
172
|
+
return;
|
|
173
|
+
try {
|
|
174
|
+
const resolved = new URL(action, baseUrl).href;
|
|
175
|
+
if (isSameOrigin(resolved, baseOrigin)) {
|
|
176
|
+
links.push({ url: resolved, source: 'form-action', hasForm: true });
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
catch {
|
|
180
|
+
// Invalid URL — skip
|
|
181
|
+
}
|
|
182
|
+
});
|
|
183
|
+
return links;
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Check if HTML contains form or interactive elements at a URL
|
|
187
|
+
* This is a lightweight heuristic — actual forms are detected during audit
|
|
188
|
+
*/
|
|
189
|
+
function htmlHasInteractiveElements(html) {
|
|
190
|
+
const $ = cheerio.load(html);
|
|
191
|
+
// Check for interactive elements beyond forms
|
|
192
|
+
return ($('button').length > 0 ||
|
|
193
|
+
$('[role="button"]').length > 0 ||
|
|
194
|
+
$('[onclick]').length > 0 ||
|
|
195
|
+
$('[data-action]').length > 0 ||
|
|
196
|
+
$('details').length > 0 ||
|
|
197
|
+
$('[role="tab"]').length > 0);
|
|
198
|
+
}
|
|
199
|
+
/**
|
|
200
|
+
* Discover all routes from sitemap, HTML, and form actions.
|
|
201
|
+
* Deduplicates, classifies, scores, and sorts by priority.
|
|
202
|
+
*/
|
|
203
|
+
export function discoverRoutes(input) {
|
|
204
|
+
const { baseUrl, rootHtml, sitemapEntries, robotsDirectives, respectRobotsTxt } = input;
|
|
205
|
+
const baseOrigin = new URL(baseUrl).origin;
|
|
206
|
+
// Map: normalizedUrl -> Route
|
|
207
|
+
const routeMap = new Map();
|
|
208
|
+
/**
|
|
209
|
+
* Add or update a route in the map. First source wins,
|
|
210
|
+
* but some properties are merged.
|
|
211
|
+
*/
|
|
212
|
+
function addRoute(url, source, depth, extras) {
|
|
213
|
+
const normalized = normalizeUrl(url);
|
|
214
|
+
// Skip non-same-origin
|
|
215
|
+
if (!isSameOrigin(normalized, baseOrigin))
|
|
216
|
+
return;
|
|
217
|
+
// Respect robots.txt
|
|
218
|
+
if (respectRobotsTxt && robotsDirectives.length > 0) {
|
|
219
|
+
const pathname = new URL(normalized).pathname;
|
|
220
|
+
if (!isPathAllowed(pathname, robotsDirectives))
|
|
221
|
+
return;
|
|
222
|
+
}
|
|
223
|
+
// Skip known non-page resources
|
|
224
|
+
const pathname = new URL(normalized).pathname.toLowerCase();
|
|
225
|
+
if (/\.(jpg|jpeg|png|gif|svg|webp|ico|css|js|woff|woff2|ttf|eot|pdf|zip|tar|gz)$/i.test(pathname)) {
|
|
226
|
+
return;
|
|
227
|
+
}
|
|
228
|
+
const existing = routeMap.get(normalized);
|
|
229
|
+
if (existing) {
|
|
230
|
+
// Merge: use lower depth, merge sitemap data, merge hasForm
|
|
231
|
+
if (depth < existing.depth)
|
|
232
|
+
existing.depth = depth;
|
|
233
|
+
if (extras?.hasForm)
|
|
234
|
+
existing.hasForm = true;
|
|
235
|
+
if (extras?.lastmod && !existing.lastmod)
|
|
236
|
+
existing.lastmod = extras.lastmod;
|
|
237
|
+
if (extras?.changefreq && !existing.changefreq)
|
|
238
|
+
existing.changefreq = extras.changefreq;
|
|
239
|
+
if (extras?.sitemapPriority != null && existing.sitemapPriority == null) {
|
|
240
|
+
existing.sitemapPriority = extras.sitemapPriority;
|
|
241
|
+
}
|
|
242
|
+
return;
|
|
243
|
+
}
|
|
244
|
+
const estimatedPageType = classifyPageType(normalized);
|
|
245
|
+
const hasForm = extras?.hasForm ?? false;
|
|
246
|
+
const route = {
|
|
247
|
+
url: normalized,
|
|
248
|
+
source,
|
|
249
|
+
depth,
|
|
250
|
+
hasForm,
|
|
251
|
+
hasInteractiveElements: false, // Updated later from HTML analysis
|
|
252
|
+
estimatedPageType,
|
|
253
|
+
priority: 'medium', // Calculated after all properties set
|
|
254
|
+
priorityScore: 0,
|
|
255
|
+
lastmod: extras?.lastmod,
|
|
256
|
+
changefreq: extras?.changefreq,
|
|
257
|
+
sitemapPriority: extras?.sitemapPriority,
|
|
258
|
+
};
|
|
259
|
+
routeMap.set(normalized, route);
|
|
260
|
+
}
|
|
261
|
+
// 1. Add root URL
|
|
262
|
+
addRoute(baseUrl, 'navigation', 0, { hasForm: false });
|
|
263
|
+
// 2. Add sitemap entries
|
|
264
|
+
for (const entry of sitemapEntries) {
|
|
265
|
+
try {
|
|
266
|
+
const entryUrl = new URL(entry.loc, baseUrl).href;
|
|
267
|
+
addRoute(entryUrl, 'sitemap', 1, {
|
|
268
|
+
lastmod: entry.lastmod,
|
|
269
|
+
changefreq: entry.changefreq,
|
|
270
|
+
sitemapPriority: entry.priority,
|
|
271
|
+
});
|
|
272
|
+
}
|
|
273
|
+
catch {
|
|
274
|
+
// Invalid sitemap URL — skip
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
// 3. Extract links from root HTML
|
|
278
|
+
const htmlLinks = extractHtmlLinks(rootHtml, baseUrl);
|
|
279
|
+
for (const link of htmlLinks) {
|
|
280
|
+
addRoute(link.url, link.source, 1, { hasForm: link.hasForm });
|
|
281
|
+
}
|
|
282
|
+
// 4. Check root HTML for interactive elements
|
|
283
|
+
const rootHasInteractive = htmlHasInteractiveElements(rootHtml);
|
|
284
|
+
const rootNormalized = normalizeUrl(baseUrl);
|
|
285
|
+
const rootRoute = routeMap.get(rootNormalized);
|
|
286
|
+
if (rootRoute) {
|
|
287
|
+
rootRoute.hasInteractiveElements = rootHasInteractive;
|
|
288
|
+
// Root page likely has forms if the HTML has them
|
|
289
|
+
const $ = cheerio.load(rootHtml);
|
|
290
|
+
if ($('form').length > 0) {
|
|
291
|
+
rootRoute.hasForm = true;
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
// 5. Calculate priority scores for all routes
|
|
295
|
+
for (const route of routeMap.values()) {
|
|
296
|
+
route.priorityScore = calculatePriorityScore(route);
|
|
297
|
+
route.priority = scoreToPriority(route.priorityScore);
|
|
298
|
+
}
|
|
299
|
+
// 6. Sort by priority score (descending)
|
|
300
|
+
const routes = [...routeMap.values()].sort((a, b) => b.priorityScore - a.priorityScore);
|
|
301
|
+
return routes;
|
|
302
|
+
}
|
|
303
|
+
//# sourceMappingURL=route-discovery.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"route-discovery.js","sourceRoot":"","sources":["../../src/recon/route-discovery.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AASnC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C;;GAEG;AACH,MAAM,kBAAkB,GAA0C;IAChE,EAAE,OAAO,EAAE,oCAAoC,EAAE,IAAI,EAAE,QAAQ,EAAE;IACjE,EAAE,OAAO,EAAE,SAAS,EAAE,IAAI,EAAE,QAAQ,EAAE;IACtC,EAAE,OAAO,EAAE,0CAA0C,EAAE,IAAI,EAAE,iBAAiB,EAAE;IAChF,EAAE,OAAO,EAAE,4BAA4B,EAAE,IAAI,EAAE,aAAa,EAAE;IAC9D,EAAE,OAAO,EAAE,oBAAoB,EAAE,IAAI,EAAE,eAAe,EAAE;IACxD,EAAE,OAAO,EAAE,sCAAsC,EAAE,IAAI,EAAE,kBAAkB,EAAE;IAC7E,EAAE,OAAO,EAAE,yCAAyC,EAAE,IAAI,EAAE,oBAAoB,EAAE;IAClF,EAAE,OAAO,EAAE,oCAAoC,EAAE,IAAI,EAAE,UAAU,EAAE;IACnE,EAAE,OAAO,EAAE,0DAA0D,EAAE,IAAI,EAAE,gBAAgB,EAAE;IAC/F,EAAE,OAAO,EAAE,iCAAiC,EAAE,IAAI,EAAE,WAAW,EAAE;IACjE,EAAE,OAAO,EAAE,oCAAoC,EAAE,IAAI,EAAE,SAAS,EAAE;IAClE,EAAE,OAAO,EAAE,4DAA4D,EAAE,IAAI,EAAE,cAAc,EAAE;IAC/F,EAAE,OAAO,EAAE,qCAAqC,EAAE,IAAI,EAAE,iBAAiB,EAAE;IAC3E,EAAE,OAAO,EAAE,mCAAmC,EAAE,IAAI,EAAE,iBAAiB,EAAE;CAC1E,CAAC;AAEF;;GAEG;AACH,MAAM,gBAAgB,GAA6B;IACjD,MAAM,EAAE,EAAE;IACV,kBAAkB,EAAE,EAAE;IACtB,eAAe,EAAE,EAAE;IACnB,iBAAiB,EAAE,EAAE;IACrB,QAAQ,EAAE,EAAE;IACZ,aAAa,EAAE,EAAE;IACjB,oBAAoB,EAAE,EAAE;IACxB,iBAAiB,EAAE,EAAE;IACrB,QAAQ,EAAE,EAAE;IACZ,cAAc,EAAE,EAAE;IAClB,SAAS,EAAE,EAAE;IACb,OAAO,EAAE,EAAE;IACX,cAAc,EAAE,CAAC;IACjB,iBAAiB,EAAE,CAAC;IACpB,OAAO,EAAE,EAAE;CACZ,CAAC;AAEF;;GAEG;AACH,SAAS,gBAAgB,CAAC,GAAW;IACnC,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;IAEvC,uBAAuB;IACvB,IAAI,QAAQ,KAAK,GAAG,IAAI,QAAQ,KAAK,EAAE;QAAE,OAAO,UAAU,CAAC;IAE3D,KAAK,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,kBAAkB,EAAE,CAAC;QACnD,IAAI,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC;YAAE,OAAO,IAAI,CAAC;IAC1C,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,SAAS,sBAAsB,CAAC,KAAwF;IACtH,IAAI,KAAK,GAAG,CAAC,CAAC;IAEd,4CAA4C;IAC5C,IAAI,KAAK,CAAC,OAAO;QAAE,KAAK,IAAI,EAAE,CAAC;IAE/B,mBAAmB;IACnB,KAAK,IAAI,gBAAgB,CAAC,KAAK,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC;IAExD,0CAA0C;IAC1C,KAAK,IAAI,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC;IAEzB,6BAA6B;IAC7B,IAAI,KAAK,CAAC,sBAAsB;QAAE,KAAK,IAAI,EAAE,CAAC;IAE9C,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;AAC5B,CAAC;AAED;;GAEG;AACH,SAAS,eAAe,CAAC,KAAa;IACpC,IAAI,KAAK,IAAI,EAAE;QAAE,OAAO,UAAU,CAAC;IACnC,IAAI,KAAK,IAAI,EAAE;QAAE,OAAO,MAAM,CAAC;IAC/B,IAAI,KAAK,IAAI,EAAE;QAAE,OAAO,QAAQ,CAAC;IACjC,IAAI,KAAK,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC;IAC7B,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,YAAY,CAAC,GAAW,EAAE,UAAkB;IACnD,IAAI,CAAC;QACH,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,MAAM,KAAK,UAAU,CAAC;IAC5C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,YAAY,CAAC,GAAW;IAC/B,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAC5B,cAAc;QACd,MAAM,CAAC,IAAI,GAAG,EAAE,CAAC;QACjB,+CAA+C;QAC/C,MAAM,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC;QAC3B,0CAA0C;QAC1C,IAAI,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC;QAC/B,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YAClD,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QACnC,CAAC;QACD,MAAM,CAAC,QAAQ,GAAG,QAAQ,CAAC;QAC3B,OAAO,MAAM,CAAC,IAAI,CAAC;IACrB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,GAAG,CAAC;IACb,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CACvB,IAAY,EACZ,OAAe;IAEf,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;IAC3C,MAAM,KAAK,GAA6D,EAAE,CAAC;IAE3E,wCAAwC;IACxC,CAAC,CAAC,6BAA6B,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE;QAC/C,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAChC,IAAI,CAAC,IAAI;YAAE,OAAO;QAClB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;YAC7C,IAAI,YAAY,CAAC,QAAQ,EAAE,UAAU,CAAC,EAAE,CAAC;gBACvC,KAAK,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,EAAE,YAAY,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;YACtE,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,qBAAqB;QACvB,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,kBAAkB;IAClB,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE;QAC3B,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAChC,IAAI,CAAC,IAAI;YAAE,OAAO;QAClB,+CAA+C;QAC/C,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;YACtD,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC;YAAE,OAAO;QAClE,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;YAC7C,IAAI,YAAY,CAAC,QAAQ,EAAE,UAAU,CAAC,EAAE,CAAC;gBACvC,KAAK,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;YAChE,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,qBAAqB;QACvB,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,eAAe;IACf,CAAC,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE;QAChC,MAAM,MAAM,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACpC,IAAI,CAAC,MAAM;YAAE,OAAO;QACpB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;YAC/C,IAAI,YAAY,CAAC,QAAQ,EAAE,UAAU,CAAC,EAAE,CAAC;gBACvC,KAAK,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,EAAE,aAAa,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;YACtE,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,qBAAqB;QACvB,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;GAGG;AACH,SAAS,0BAA0B,CAAC,IAAY;IAC9C,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,8CAA8C;IAC9C,OAAO,CACL,CAAC,CAAC,QAAQ,CAAC,CAAC,MAAM,GAAG,CAAC;QACtB,CAAC,CAAC,iBAAiB,CAAC,CAAC,MAAM,GAAG,CAAC;QAC/B,CAAC,CAAC,WAAW,CAAC,CAAC,MAAM,GAAG,CAAC;QACzB,CAAC,CAAC,eAAe,CAAC,CAAC,MAAM,GAAG,CAAC;QAC7B,CAAC,CAAC,SAAS,CAAC,CAAC,MAAM,GAAG,CAAC;QACvB,CAAC,CAAC,cAAc,CAAC,CAAC,MAAM,GAAG,CAAC,CAC7B,CAAC;AACJ,CAAC;AAmBD;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,KAA0B;IACvD,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,cAAc,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,GAAG,KAAK,CAAC;IACxF,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;IAE3C,8BAA8B;IAC9B,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAiB,CAAC;IAE1C;;;OAGG;IACH,SAAS,QAAQ,CACf,GAAW,EACX,MAAmB,EACnB,KAAa,EACb,MAKC;QAED,MAAM,UAAU,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;QAErC,uBAAuB;QACvB,IAAI,CAAC,YAAY,CAAC,UAAU,EAAE,UAAU,CAAC;YAAE,OAAO;QAElD,qBAAqB;QACrB,IAAI,gBAAgB,IAAI,gBAAgB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpD,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,CAAC,QAAQ,CAAC;YAC9C,IAAI,CAAC,aAAa,CAAC,QAAQ,EAAE,gBAAgB,CAAC;gBAAE,OAAO;QACzD,CAAC;QAED,gCAAgC;QAChC,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;QAC5D,IAAI,8EAA8E,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;YAClG,OAAO;QACT,CAAC;QAED,MAAM,QAAQ,GAAG,QAAQ,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QAC1C,IAAI,QAAQ,EAAE,CAAC;YACb,4DAA4D;YAC5D,IAAI,KAAK,GAAG,QAAQ,CAAC,KAAK;gBAAE,QAAQ,CAAC,KAAK,GAAG,KAAK,CAAC;YACnD,IAAI,MAAM,EAAE,OAAO;gBAAE,QAAQ,CAAC,OAAO,GAAG,IAAI,CAAC;YAC7C,IAAI,MAAM,EAAE,OAAO,IAAI,CAAC,QAAQ,CAAC,OAAO;gBAAE,QAAQ,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC;YAC5E,IAAI,MAAM,EAAE,UAAU,IAAI,CAAC,QAAQ,CAAC,UAAU;gBAAE,QAAQ,CAAC,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;YACxF,IAAI,MAAM,EAAE,eAAe,IAAI,IAAI,IAAI,QAAQ,CAAC,eAAe,IAAI,IAAI,EAAE,CAAC;gBACxE,QAAQ,CAAC,eAAe,GAAG,MAAM,CAAC,eAAe,CAAC;YACpD,CAAC;YACD,OAAO;QACT,CAAC;QAED,MAAM,iBAAiB,GAAG,gBAAgB,CAAC,UAAU,CAAC,CAAC;QACvD,MAAM,OAAO,GAAG,MAAM,EAAE,OAAO,IAAI,KAAK,CAAC;QAEzC,MAAM,KAAK,GAAU;YACnB,GAAG,EAAE,UAAU;YACf,MAAM;YACN,KAAK;YACL,OAAO;YACP,sBAAsB,EAAE,KAAK,EAAE,mCAAmC;YAClE,iBAAiB;YACjB,QAAQ,EAAE,QAAQ,EAAE,sCAAsC;YAC1D,aAAa,EAAE,CAAC;YAChB,OAAO,EAAE,MAAM,EAAE,OAAO;YACxB,UAAU,EAAE,MAAM,EAAE,UAAU;YAC9B,eAAe,EAAE,MAAM,EAAE,eAAe;SACzC,CAAC;QAEF,QAAQ,CAAC,GAAG,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;IAClC,CAAC;IAED,kBAAkB;IAClB,QAAQ,CAAC,OAAO,EAAE,YAAY,EAAE,CAAC,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;IAEvD,yBAAyB;IACzB,KAAK,MAAM,KAAK,IAAI,cAAc,EAAE,CAAC;QACnC,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;YAClD,QAAQ,CAAC,QAAQ,EAAE,SAAS,EAAE,CAAC,EAAE;gBAC/B,OAAO,EAAE,KAAK,CAAC,OAAO;gBACtB,UAAU,EAAE,KAAK,CAAC,UAAU;gBAC5B,eAAe,EAAE,KAAK,CAAC,QAAQ;aAChC,CAAC,CAAC;QACL,CAAC;QAAC,MAAM,CAAC;YACP,6BAA6B;QAC/B,CAAC;IACH,CAAC;IAED,kCAAkC;IAClC,MAAM,SAAS,GAAG,gBAAgB,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IACtD,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;QAC7B,QAAQ,CAAC,IAAI,CAAC,GAAG,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC;IAChE,CAAC;IAED,8CAA8C;IAC9C,MAAM,kBAAkB,GAAG,0BAA0B,CAAC,QAAQ,CAAC,CAAC;IAChE,MAAM,cAAc,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;IAC7C,MAAM,SAAS,GAAG,QAAQ,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;IAC/C,IAAI,SAAS,EAAE,CAAC;QACd,SAAS,CAAC,sBAAsB,GAAG,kBAAkB,CAAC;QACtD,kDAAkD;QAClD,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACjC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,SAAS,CAAC,OAAO,GAAG,IAAI,CAAC;QAC3B,CAAC;IACH,CAAC;IAED,8CAA8C;IAC9C,KAAK,MAAM,KAAK,IAAI,QAAQ,CAAC,MAAM,EAAE,EAAE,CAAC;QACtC,KAAK,CAAC,aAAa,GAAG,sBAAsB,CAAC,KAAK,CAAC,CAAC;QACpD,KAAK,CAAC,QAAQ,GAAG,eAAe,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;IACxD,CAAC;IAED,yCAAyC;IACzC,MAAM,MAAM,GAAG,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC,IAAI,CACxC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,GAAG,CAAC,CAAC,aAAa,CAC5C,CAAC;IAEF,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sitemap Parser
|
|
3
|
+
*
|
|
4
|
+
* Fetches and parses sitemap.xml and sitemap index files.
|
|
5
|
+
* Uses native fetch. Handles missing/malformed sitemaps gracefully.
|
|
6
|
+
*/
|
|
7
|
+
import type { SitemapResult } from '../core/types/recon.js';
|
|
8
|
+
/**
|
|
9
|
+
* Fetch and parse a sitemap from a URL.
|
|
10
|
+
* Handles sitemap index files by recursively fetching children.
|
|
11
|
+
*/
|
|
12
|
+
export declare function parseSitemap(baseUrl: string, sitemapUrl?: string): Promise<SitemapResult>;
|