webmcp-cli 1.2.2 → 1.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/analysis/form-to-tool-mapper.d.ts +61 -0
- package/dist/analysis/form-to-tool-mapper.js +360 -0
- package/dist/analysis/form-to-tool-mapper.js.map +1 -0
- package/dist/analysis/index.d.ts +84 -0
- package/dist/analysis/index.js +81 -0
- package/dist/analysis/index.js.map +1 -0
- package/dist/analysis/missing-tool-analyzer.d.ts +35 -0
- package/dist/analysis/missing-tool-analyzer.js +617 -0
- package/dist/analysis/missing-tool-analyzer.js.map +1 -0
- package/dist/audit/run-multi-page-audit.d.ts +34 -0
- package/dist/audit/run-multi-page-audit.js +233 -0
- package/dist/audit/run-multi-page-audit.js.map +1 -0
- package/dist/cli/commands/potential.d.ts +8 -0
- package/dist/cli/commands/potential.js +323 -0
- package/dist/cli/commands/potential.js.map +1 -0
- package/dist/cli/commands/report.d.ts +12 -0
- package/dist/cli/commands/report.js +89 -0
- package/dist/cli/commands/report.js.map +1 -0
- package/dist/cli/index.js +35 -0
- package/dist/cli/index.js.map +1 -1
- package/dist/config/defaults.d.ts +36 -0
- package/dist/config/defaults.js +33 -0
- package/dist/config/defaults.js.map +1 -0
- package/dist/config/index.d.ts +7 -0
- package/dist/config/index.js +7 -0
- package/dist/config/index.js.map +1 -0
- package/dist/config/loader.d.ts +22 -0
- package/dist/config/loader.js +91 -0
- package/dist/config/loader.js.map +1 -0
- package/dist/config/schema.d.ts +280 -0
- package/dist/config/schema.js +42 -0
- package/dist/config/schema.js.map +1 -0
- package/dist/core/types/audit.d.ts +1 -1
- package/dist/core/types/index.d.ts +1 -0
- package/dist/core/types/index.js +1 -0
- package/dist/core/types/index.js.map +1 -1
- package/dist/core/types/recon.d.ts +265 -0
- package/dist/core/types/recon.js +5 -0
- package/dist/core/types/recon.js.map +1 -0
- package/dist/core/types/rule.d.ts +1 -1
- package/dist/core/types/rule.js +7 -5
- package/dist/core/types/rule.js.map +1 -1
- package/dist/crawler/depth-crawler.d.ts +29 -0
- package/dist/crawler/depth-crawler.js +212 -0
- package/dist/crawler/depth-crawler.js.map +1 -0
- package/dist/crawler/index.d.ts +2 -0
- package/dist/crawler/index.js +3 -0
- package/dist/crawler/index.js.map +1 -0
- package/dist/crawler/link-extractor.d.ts +1 -0
- package/dist/crawler/link-extractor.js +49 -0
- package/dist/crawler/link-extractor.js.map +1 -0
- package/dist/generators/index.d.ts +10 -0
- package/dist/generators/index.js +8 -0
- package/dist/generators/index.js.map +1 -0
- package/dist/generators/report-html.d.ts +12 -0
- package/dist/generators/report-html.js +470 -0
- package/dist/generators/report-html.js.map +1 -0
- package/dist/generators/report-json.d.ts +95 -0
- package/dist/generators/report-json.js +144 -0
- package/dist/generators/report-json.js.map +1 -0
- package/dist/generators/report-manager.d.ts +31 -0
- package/dist/generators/report-manager.js +208 -0
- package/dist/generators/report-manager.js.map +1 -0
- package/dist/generators/tool-code-generator.d.ts +31 -0
- package/dist/generators/tool-code-generator.js +201 -0
- package/dist/generators/tool-code-generator.js.map +1 -0
- package/dist/potential/ai-recommender.d.ts +33 -0
- package/dist/potential/ai-recommender.js +414 -0
- package/dist/potential/ai-recommender.js.map +1 -0
- package/dist/potential/analyzer.d.ts +32 -0
- package/dist/potential/analyzer.js +383 -0
- package/dist/potential/analyzer.js.map +1 -0
- package/dist/potential/index.d.ts +3 -0
- package/dist/potential/index.js +4 -0
- package/dist/potential/index.js.map +1 -0
- package/dist/potential/prompts.d.ts +20 -0
- package/dist/potential/prompts.js +42 -0
- package/dist/potential/prompts.js.map +1 -0
- package/dist/potential/types.d.ts +40 -0
- package/dist/potential/types.js +2 -0
- package/dist/potential/types.js.map +1 -0
- package/dist/recon/index.d.ts +20 -0
- package/dist/recon/index.js +143 -0
- package/dist/recon/index.js.map +1 -0
- package/dist/recon/manifest.d.ts +16 -0
- package/dist/recon/manifest.js +108 -0
- package/dist/recon/manifest.js.map +1 -0
- package/dist/recon/meta-extractor.d.ts +11 -0
- package/dist/recon/meta-extractor.js +276 -0
- package/dist/recon/meta-extractor.js.map +1 -0
- package/dist/recon/robots.d.ts +16 -0
- package/dist/recon/robots.js +158 -0
- package/dist/recon/robots.js.map +1 -0
- package/dist/recon/route-discovery.d.ts +25 -0
- package/dist/recon/route-discovery.js +303 -0
- package/dist/recon/route-discovery.js.map +1 -0
- package/dist/recon/sitemap.d.ts +12 -0
- package/dist/recon/sitemap.js +177 -0
- package/dist/recon/sitemap.js.map +1 -0
- package/dist/rules/accessibility/AXE-001.d.ts +9 -0
- package/dist/rules/accessibility/AXE-001.js +109 -0
- package/dist/rules/accessibility/AXE-001.js.map +1 -0
- package/dist/rules/accessibility/AXE-002.d.ts +8 -0
- package/dist/rules/accessibility/AXE-002.js +85 -0
- package/dist/rules/accessibility/AXE-002.js.map +1 -0
- package/dist/rules/accessibility/AXE-003.d.ts +8 -0
- package/dist/rules/accessibility/AXE-003.js +94 -0
- package/dist/rules/accessibility/AXE-003.js.map +1 -0
- package/dist/rules/accessibility/AXE-004.d.ts +8 -0
- package/dist/rules/accessibility/AXE-004.js +101 -0
- package/dist/rules/accessibility/AXE-004.js.map +1 -0
- package/dist/rules/accessibility/AXE-005.d.ts +9 -0
- package/dist/rules/accessibility/AXE-005.js +89 -0
- package/dist/rules/accessibility/AXE-005.js.map +1 -0
- package/dist/rules/best-practices/BP-004.d.ts +9 -0
- package/dist/rules/best-practices/BP-004.js +96 -0
- package/dist/rules/best-practices/BP-004.js.map +1 -0
- package/dist/rules/best-practices/BP-005.d.ts +8 -0
- package/dist/rules/best-practices/BP-005.js +94 -0
- package/dist/rules/best-practices/BP-005.js.map +1 -0
- package/dist/rules/best-practices/BP-006.d.ts +8 -0
- package/dist/rules/best-practices/BP-006.js +80 -0
- package/dist/rules/best-practices/BP-006.js.map +1 -0
- package/dist/rules/best-practices/BP-007.d.ts +8 -0
- package/dist/rules/best-practices/BP-007.js +92 -0
- package/dist/rules/best-practices/BP-007.js.map +1 -0
- package/dist/rules/best-practices/BP-008.d.ts +12 -0
- package/dist/rules/best-practices/BP-008.js +86 -0
- package/dist/rules/best-practices/BP-008.js.map +1 -0
- package/dist/rules/best-practices/BP-009.d.ts +9 -0
- package/dist/rules/best-practices/BP-009.js +77 -0
- package/dist/rules/best-practices/BP-009.js.map +1 -0
- package/dist/rules/best-practices/BP-010.d.ts +8 -0
- package/dist/rules/best-practices/BP-010.js +85 -0
- package/dist/rules/best-practices/BP-010.js.map +1 -0
- package/dist/rules/coverage/COV-002.d.ts +8 -0
- package/dist/rules/coverage/COV-002.js +68 -0
- package/dist/rules/coverage/COV-002.js.map +1 -0
- package/dist/rules/coverage/COV-003.d.ts +8 -0
- package/dist/rules/coverage/COV-003.js +68 -0
- package/dist/rules/coverage/COV-003.js.map +1 -0
- package/dist/rules/coverage/COV-004.d.ts +8 -0
- package/dist/rules/coverage/COV-004.js +89 -0
- package/dist/rules/coverage/COV-004.js.map +1 -0
- package/dist/rules/coverage/COV-005.d.ts +8 -0
- package/dist/rules/coverage/COV-005.js +67 -0
- package/dist/rules/coverage/COV-005.js.map +1 -0
- package/dist/rules/coverage/COV-006.d.ts +9 -0
- package/dist/rules/coverage/COV-006.js +76 -0
- package/dist/rules/coverage/COV-006.js.map +1 -0
- package/dist/rules/coverage/COV-007.d.ts +8 -0
- package/dist/rules/coverage/COV-007.js +67 -0
- package/dist/rules/coverage/COV-007.js.map +1 -0
- package/dist/rules/coverage/COV-008.d.ts +9 -0
- package/dist/rules/coverage/COV-008.js +87 -0
- package/dist/rules/coverage/COV-008.js.map +1 -0
- package/dist/rules/coverage/COV-009.d.ts +8 -0
- package/dist/rules/coverage/COV-009.js +73 -0
- package/dist/rules/coverage/COV-009.js.map +1 -0
- package/dist/rules/coverage/COV-010.d.ts +9 -0
- package/dist/rules/coverage/COV-010.js +82 -0
- package/dist/rules/coverage/COV-010.js.map +1 -0
- package/dist/rules/description/DESC-001.d.ts +9 -0
- package/dist/rules/description/DESC-001.js +88 -0
- package/dist/rules/description/DESC-001.js.map +1 -0
- package/dist/rules/description/DESC-002.d.ts +10 -0
- package/dist/rules/description/DESC-002.js +99 -0
- package/dist/rules/description/DESC-002.js.map +1 -0
- package/dist/rules/description/DESC-006.d.ts +9 -0
- package/dist/rules/description/DESC-006.js +78 -0
- package/dist/rules/description/DESC-006.js.map +1 -0
- package/dist/rules/description/DESC-007.d.ts +9 -0
- package/dist/rules/description/DESC-007.js +70 -0
- package/dist/rules/description/DESC-007.js.map +1 -0
- package/dist/rules/description/DESC-008.d.ts +9 -0
- package/dist/rules/description/DESC-008.js +70 -0
- package/dist/rules/description/DESC-008.js.map +1 -0
- package/dist/rules/description/DESC-009.d.ts +8 -0
- package/dist/rules/description/DESC-009.js +55 -0
- package/dist/rules/description/DESC-009.js.map +1 -0
- package/dist/rules/description/DESC-010.d.ts +9 -0
- package/dist/rules/description/DESC-010.js +92 -0
- package/dist/rules/description/DESC-010.js.map +1 -0
- package/dist/rules/description/DESC-011.d.ts +9 -0
- package/dist/rules/description/DESC-011.js +81 -0
- package/dist/rules/description/DESC-011.js.map +1 -0
- package/dist/rules/description/DESC-012.d.ts +9 -0
- package/dist/rules/description/DESC-012.js +98 -0
- package/dist/rules/description/DESC-012.js.map +1 -0
- package/dist/rules/implementation/IMP-002.d.ts +9 -0
- package/dist/rules/implementation/IMP-002.js +59 -0
- package/dist/rules/implementation/IMP-002.js.map +1 -0
- package/dist/rules/implementation/IMP-006.d.ts +9 -0
- package/dist/rules/implementation/IMP-006.js +48 -0
- package/dist/rules/implementation/IMP-006.js.map +1 -0
- package/dist/rules/implementation/IMP-008.d.ts +9 -0
- package/dist/rules/implementation/IMP-008.js +46 -0
- package/dist/rules/implementation/IMP-008.js.map +1 -0
- package/dist/rules/implementation/IMP-009.d.ts +9 -0
- package/dist/rules/implementation/IMP-009.js +48 -0
- package/dist/rules/implementation/IMP-009.js.map +1 -0
- package/dist/rules/implementation/IMP-010.d.ts +9 -0
- package/dist/rules/implementation/IMP-010.js +66 -0
- package/dist/rules/implementation/IMP-010.js.map +1 -0
- package/dist/rules/implementation/IMP-011.d.ts +9 -0
- package/dist/rules/implementation/IMP-011.js +82 -0
- package/dist/rules/implementation/IMP-011.js.map +1 -0
- package/dist/rules/implementation/IMP-012.d.ts +9 -0
- package/dist/rules/implementation/IMP-012.js +88 -0
- package/dist/rules/implementation/IMP-012.js.map +1 -0
- package/dist/rules/implementation/IMP-014.d.ts +9 -0
- package/dist/rules/implementation/IMP-014.js +58 -0
- package/dist/rules/implementation/IMP-014.js.map +1 -0
- package/dist/rules/implementation/IMP-015.d.ts +9 -0
- package/dist/rules/implementation/IMP-015.js +64 -0
- package/dist/rules/implementation/IMP-015.js.map +1 -0
- package/dist/rules/implementation/IMP-016.d.ts +9 -0
- package/dist/rules/implementation/IMP-016.js +52 -0
- package/dist/rules/implementation/IMP-016.js.map +1 -0
- package/dist/rules/implementation/IMP-017.d.ts +8 -0
- package/dist/rules/implementation/IMP-017.js +51 -0
- package/dist/rules/implementation/IMP-017.js.map +1 -0
- package/dist/rules/implementation/IMP-018.d.ts +8 -0
- package/dist/rules/implementation/IMP-018.js +52 -0
- package/dist/rules/implementation/IMP-018.js.map +1 -0
- package/dist/rules/implementation/IMP-019.d.ts +8 -0
- package/dist/rules/implementation/IMP-019.js +53 -0
- package/dist/rules/implementation/IMP-019.js.map +1 -0
- package/dist/rules/implementation/IMP-020.d.ts +9 -0
- package/dist/rules/implementation/IMP-020.js +62 -0
- package/dist/rules/implementation/IMP-020.js.map +1 -0
- package/dist/rules/implementation/IMP-021.d.ts +8 -0
- package/dist/rules/implementation/IMP-021.js +64 -0
- package/dist/rules/implementation/IMP-021.js.map +1 -0
- package/dist/rules/implementation/IMP-022.d.ts +8 -0
- package/dist/rules/implementation/IMP-022.js +70 -0
- package/dist/rules/implementation/IMP-022.js.map +1 -0
- package/dist/rules/index.d.ts +73 -6
- package/dist/rules/index.js +141 -6
- package/dist/rules/index.js.map +1 -1
- package/dist/rules/schema/SCHEMA-004.d.ts +9 -0
- package/dist/rules/schema/SCHEMA-004.js +57 -0
- package/dist/rules/schema/SCHEMA-004.js.map +1 -0
- package/dist/rules/schema/SCHEMA-005.d.ts +9 -0
- package/dist/rules/schema/SCHEMA-005.js +61 -0
- package/dist/rules/schema/SCHEMA-005.js.map +1 -0
- package/dist/rules/schema/SCHEMA-006.d.ts +10 -0
- package/dist/rules/schema/SCHEMA-006.js +85 -0
- package/dist/rules/schema/SCHEMA-006.js.map +1 -0
- package/dist/rules/schema/SCHEMA-007.d.ts +9 -0
- package/dist/rules/schema/SCHEMA-007.js +73 -0
- package/dist/rules/schema/SCHEMA-007.js.map +1 -0
- package/dist/rules/schema/SCHEMA-008.d.ts +9 -0
- package/dist/rules/schema/SCHEMA-008.js +70 -0
- package/dist/rules/schema/SCHEMA-008.js.map +1 -0
- package/dist/rules/schema/SCHEMA-009.d.ts +10 -0
- package/dist/rules/schema/SCHEMA-009.js +80 -0
- package/dist/rules/schema/SCHEMA-009.js.map +1 -0
- package/dist/rules/schema/SCHEMA-010.d.ts +9 -0
- package/dist/rules/schema/SCHEMA-010.js +96 -0
- package/dist/rules/schema/SCHEMA-010.js.map +1 -0
- package/dist/rules/schema/SCHEMA-012.d.ts +9 -0
- package/dist/rules/schema/SCHEMA-012.js +65 -0
- package/dist/rules/schema/SCHEMA-012.js.map +1 -0
- package/dist/rules/security/SEC-002.d.ts +8 -0
- package/dist/rules/security/SEC-002.js +81 -0
- package/dist/rules/security/SEC-002.js.map +1 -0
- package/dist/rules/security/SEC-003.d.ts +8 -0
- package/dist/rules/security/SEC-003.js +85 -0
- package/dist/rules/security/SEC-003.js.map +1 -0
- package/dist/rules/security/SEC-004.d.ts +9 -0
- package/dist/rules/security/SEC-004.js +87 -0
- package/dist/rules/security/SEC-004.js.map +1 -0
- package/dist/rules/security/SEC-005.d.ts +8 -0
- package/dist/rules/security/SEC-005.js +87 -0
- package/dist/rules/security/SEC-005.js.map +1 -0
- package/dist/rules/security/SEC-006.d.ts +10 -0
- package/dist/rules/security/SEC-006.js +108 -0
- package/dist/rules/security/SEC-006.js.map +1 -0
- package/dist/rules/security/SEC-007.d.ts +9 -0
- package/dist/rules/security/SEC-007.js +108 -0
- package/dist/rules/security/SEC-007.js.map +1 -0
- package/dist/rules/security/SEC-008.d.ts +8 -0
- package/dist/rules/security/SEC-008.js +109 -0
- package/dist/rules/security/SEC-008.js.map +1 -0
- package/dist/rules/security/SEC-009.d.ts +9 -0
- package/dist/rules/security/SEC-009.js +93 -0
- package/dist/rules/security/SEC-009.js.map +1 -0
- package/dist/rules/security/SEC-010.d.ts +8 -0
- package/dist/rules/security/SEC-010.js +78 -0
- package/dist/rules/security/SEC-010.js.map +1 -0
- package/dist/rules/security/SEC-011.d.ts +8 -0
- package/dist/rules/security/SEC-011.js +93 -0
- package/dist/rules/security/SEC-011.js.map +1 -0
- package/dist/rules/security/SEC-012.d.ts +8 -0
- package/dist/rules/security/SEC-012.js +79 -0
- package/dist/rules/security/SEC-012.js.map +1 -0
- package/dist/rules/security/SEC-013.d.ts +9 -0
- package/dist/rules/security/SEC-013.js +107 -0
- package/dist/rules/security/SEC-013.js.map +1 -0
- package/dist/scoring/calculator.js +1 -0
- package/dist/scoring/calculator.js.map +1 -1
- package/dist/ui/ink/components/AIRecommendationCard.d.ts +11 -0
- package/dist/ui/ink/components/AIRecommendationCard.js +23 -0
- package/dist/ui/ink/components/AIRecommendationCard.js.map +1 -0
- package/dist/ui/ink/components/OpportunityList.d.ts +10 -0
- package/dist/ui/ink/components/OpportunityList.js +48 -0
- package/dist/ui/ink/components/OpportunityList.js.map +1 -0
- package/dist/ui/ink/components/PotentialPageCard.d.ts +13 -0
- package/dist/ui/ink/components/PotentialPageCard.js +43 -0
- package/dist/ui/ink/components/PotentialPageCard.js.map +1 -0
- package/dist/ui/ink/components/PotentialProgress.d.ts +16 -0
- package/dist/ui/ink/components/PotentialProgress.js +44 -0
- package/dist/ui/ink/components/PotentialProgress.js.map +1 -0
- package/dist/ui/ink/components/PotentialSummary.d.ts +10 -0
- package/dist/ui/ink/components/PotentialSummary.js +86 -0
- package/dist/ui/ink/components/PotentialSummary.js.map +1 -0
- package/dist/ui/ink/components/SuggestionCard.d.ts +34 -0
- package/dist/ui/ink/components/SuggestionCard.js +36 -0
- package/dist/ui/ink/components/SuggestionCard.js.map +1 -0
- package/dist/ui/ink/components/views/MultiPageCrawlView.d.ts +21 -0
- package/dist/ui/ink/components/views/MultiPageCrawlView.js +55 -0
- package/dist/ui/ink/components/views/MultiPageCrawlView.js.map +1 -0
- package/dist/ui/ink/components/views/PotentialView.d.ts +18 -0
- package/dist/ui/ink/components/views/PotentialView.js +74 -0
- package/dist/ui/ink/components/views/PotentialView.js.map +1 -0
- package/dist/ui/ink/components/views/ReconView.d.ts +22 -0
- package/dist/ui/ink/components/views/ReconView.js +30 -0
- package/dist/ui/ink/components/views/ReconView.js.map +1 -0
- package/package.json +2 -1
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Configuration Schema
|
|
3
|
+
*
|
|
4
|
+
* Zod schema for validating configuration files.
|
|
5
|
+
*/
|
|
6
|
+
import { z } from 'zod';
|
|
7
|
+
export const crawlSchema = z.object({
|
|
8
|
+
maxPages: z.number().int().positive().default(20),
|
|
9
|
+
maxDepth: z.number().int().positive().default(4),
|
|
10
|
+
timeout: z.number().int().positive().default(30000),
|
|
11
|
+
respectRobotsTxt: z.boolean().default(true),
|
|
12
|
+
}).strict();
|
|
13
|
+
export const ruleOverrideSchema = z.object({
|
|
14
|
+
severity: z.enum(['critical', 'warning', 'info']).optional(),
|
|
15
|
+
maxScore: z.number().int().nonnegative().optional(),
|
|
16
|
+
});
|
|
17
|
+
export const rulesSchema = z.object({
|
|
18
|
+
disabled: z.array(z.string()).default([]),
|
|
19
|
+
overrides: z.record(z.string(), ruleOverrideSchema).default({}),
|
|
20
|
+
}).strict();
|
|
21
|
+
export const scoringWeightsSchema = z.object({
|
|
22
|
+
implementation: z.number().min(0).max(1).default(0.25),
|
|
23
|
+
coverage: z.number().min(0).max(1).default(0.20),
|
|
24
|
+
security: z.number().min(0).max(1).default(0.20),
|
|
25
|
+
bestPractices: z.number().min(0).max(1).default(0.15),
|
|
26
|
+
promptHandling: z.number().min(0).max(1).default(0.20),
|
|
27
|
+
}).strict();
|
|
28
|
+
export const scoringSchema = z.object({
|
|
29
|
+
weights: scoringWeightsSchema.default({}),
|
|
30
|
+
}).strict();
|
|
31
|
+
export const outputSchema = z.object({
|
|
32
|
+
directory: z.string().default('./agentready-report'),
|
|
33
|
+
formats: z.array(z.enum(['html', 'json', 'markdown'])).default(['html', 'json']),
|
|
34
|
+
includeScreenshots: z.boolean().default(true),
|
|
35
|
+
}).strict();
|
|
36
|
+
export const configSchema = z.object({
|
|
37
|
+
crawl: crawlSchema.default({}),
|
|
38
|
+
rules: rulesSchema.default({}),
|
|
39
|
+
scoring: scoringSchema.default({}),
|
|
40
|
+
output: outputSchema.default({}),
|
|
41
|
+
}).strict();
|
|
42
|
+
//# sourceMappingURL=schema.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"schema.js","sourceRoot":"","sources":["../../src/config/schema.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,MAAM,CAAC,MAAM,WAAW,GAAG,CAAC,CAAC,MAAM,CAAC;IAClC,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,EAAE,CAAC;IACjD,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;IAChD,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC;IACnD,gBAAgB,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC;CAC5C,CAAC,CAAC,MAAM,EAAE,CAAC;AAEZ,MAAM,CAAC,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;IACzC,QAAQ,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,UAAU,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC,CAAC,QAAQ,EAAE;IAC5D,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,WAAW,EAAE,CAAC,QAAQ,EAAE;CACpD,CAAC,CAAC;AAEH,MAAM,CAAC,MAAM,WAAW,GAAG,CAAC,CAAC,MAAM,CAAC;IAClC,QAAQ,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC;IACzC,SAAS,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,EAAE,EAAE,kBAAkB,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC;CAChE,CAAC,CAAC,MAAM,EAAE,CAAC;AAEZ,MAAM,CAAC,MAAM,oBAAoB,GAAG,CAAC,CAAC,MAAM,CAAC;IAC3C,cAAc,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC;IACtD,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC;IAChD,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC;IAChD,aAAa,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC;IACrD,cAAc,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC;CACvD,CAAC,CAAC,MAAM,EAAE,CAAC;AAEZ,MAAM,CAAC,MAAM,aAAa,GAAG,CAAC,CAAC,MAAM,CAAC;IACpC,OAAO,EAAE,oBAAoB,CAAC,OAAO,CAAC,EAAE,CAAC;CAC1C,CAAC,CAAC,MAAM,EAAE,CAAC;AAEZ,MAAM,CAAC,MAAM,YAAY,GAAG,CAAC,CAAC,MAAM,CAAC;IACnC,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,OAAO,CAAC,qBAAqB,CAAC;IACpD,OAAO,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAChF,kBAAkB,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC;CAC9C,CAAC,CAAC,MAAM,EAAE,CAAC;AAEZ,MAAM,CAAC,MAAM,YAAY,GAAG,CAAC,CAAC,MAAM,CAAC;IACnC,KAAK,EAAE,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC;IAC9B,KAAK,EAAE,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC;IAC9B,OAAO,EAAE,aAAa,CAAC,OAAO,CAAC,EAAE,CAAC;IAClC,MAAM,EAAE,YAAY,CAAC,OAAO,CAAC,EAAE,CAAC;CACjC,CAAC,CAAC,MAAM,EAAE,CAAC"}
|
|
@@ -63,7 +63,7 @@ export interface CategoryScore {
|
|
|
63
63
|
/**
|
|
64
64
|
* Category ID
|
|
65
65
|
*/
|
|
66
|
-
id: 'implementation' | 'description' | 'schema' | 'security' | 'best-practices' | 'coverage';
|
|
66
|
+
id: 'implementation' | 'description' | 'schema' | 'security' | 'best-practices' | 'coverage' | 'accessibility';
|
|
67
67
|
/**
|
|
68
68
|
* Score achieved in this category
|
|
69
69
|
*/
|
package/dist/core/types/index.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/core/types/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,cAAc,WAAW,CAAC;AAC1B,cAAc,YAAY,CAAC;AAC3B,cAAc,WAAW,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/core/types/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,cAAc,WAAW,CAAC;AAC1B,cAAc,YAAY,CAAC;AAC3B,cAAc,WAAW,CAAC;AAC1B,cAAc,YAAY,CAAC"}
|
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Reconnaissance & Multi-Page Crawl Types
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Page type classification based on URL/content heuristics
|
|
6
|
+
*/
|
|
7
|
+
export type PageType = 'homepage' | 'search' | 'listing-results' | 'detail-page' | 'checkout-cart' | 'checkout-payment' | 'form-submission' | 'account-management' | 'settings' | 'contact' | 'content-article' | 'help-support' | 'authentication' | 'dashboard' | 'unknown';
|
|
8
|
+
/**
|
|
9
|
+
* Route priority for crawl ordering
|
|
10
|
+
*/
|
|
11
|
+
export type RoutePriority = 'critical' | 'high' | 'medium' | 'low' | 'skip';
|
|
12
|
+
/**
|
|
13
|
+
* Source that discovered a route
|
|
14
|
+
*/
|
|
15
|
+
export type RouteSource = 'sitemap' | 'navigation' | 'link' | 'form-action' | 'script' | 'inferred';
|
|
16
|
+
/**
|
|
17
|
+
* A discovered route within the target site
|
|
18
|
+
*/
|
|
19
|
+
export interface Route {
|
|
20
|
+
/** Full URL of the route */
|
|
21
|
+
url: string;
|
|
22
|
+
/** How the route was discovered */
|
|
23
|
+
source: RouteSource;
|
|
24
|
+
/** Clicks from homepage (0 = root) */
|
|
25
|
+
depth: number;
|
|
26
|
+
/** Whether a <form> was detected in the source HTML (if known) */
|
|
27
|
+
hasForm: boolean;
|
|
28
|
+
/** Whether interactive elements (buttons, tabs, etc.) were detected */
|
|
29
|
+
hasInteractiveElements: boolean;
|
|
30
|
+
/** Heuristic page type classification */
|
|
31
|
+
estimatedPageType: PageType;
|
|
32
|
+
/** Priority for crawl ordering */
|
|
33
|
+
priority: RoutePriority;
|
|
34
|
+
/** Numeric priority score (higher = more important) */
|
|
35
|
+
priorityScore: number;
|
|
36
|
+
/** Sitemap lastmod date if available */
|
|
37
|
+
lastmod?: string;
|
|
38
|
+
/** Sitemap changefreq if available */
|
|
39
|
+
changefreq?: string;
|
|
40
|
+
/** Sitemap priority if available */
|
|
41
|
+
sitemapPriority?: number;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Parsed sitemap entry
|
|
45
|
+
*/
|
|
46
|
+
export interface SitemapEntry {
|
|
47
|
+
loc: string;
|
|
48
|
+
lastmod?: string;
|
|
49
|
+
changefreq?: string;
|
|
50
|
+
priority?: number;
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Result of sitemap parsing
|
|
54
|
+
*/
|
|
55
|
+
export interface SitemapResult {
|
|
56
|
+
/** Whether a sitemap was found */
|
|
57
|
+
found: boolean;
|
|
58
|
+
/** URLs from the sitemap */
|
|
59
|
+
entries: SitemapEntry[];
|
|
60
|
+
/** Sitemap source URL */
|
|
61
|
+
source: string;
|
|
62
|
+
/** Whether this was a sitemap index */
|
|
63
|
+
isIndex: boolean;
|
|
64
|
+
/** Error message if parsing failed */
|
|
65
|
+
error?: string;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Parsed robots.txt directive
|
|
69
|
+
*/
|
|
70
|
+
export interface RobotsDirective {
|
|
71
|
+
userAgent: string;
|
|
72
|
+
allow: string[];
|
|
73
|
+
disallow: string[];
|
|
74
|
+
crawlDelay?: number;
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Result of robots.txt parsing
|
|
78
|
+
*/
|
|
79
|
+
export interface RobotsResult {
|
|
80
|
+
/** Whether robots.txt was found */
|
|
81
|
+
found: boolean;
|
|
82
|
+
/** Parsed directives grouped by user-agent */
|
|
83
|
+
directives: RobotsDirective[];
|
|
84
|
+
/** Sitemap URLs referenced in robots.txt */
|
|
85
|
+
sitemapUrls: string[];
|
|
86
|
+
/** Raw robots.txt content */
|
|
87
|
+
raw: string;
|
|
88
|
+
/** Error message if parsing failed */
|
|
89
|
+
error?: string;
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Result of web app manifest parsing
|
|
93
|
+
*/
|
|
94
|
+
export interface ManifestResult {
|
|
95
|
+
/** Whether a manifest was found */
|
|
96
|
+
found: boolean;
|
|
97
|
+
/** App name */
|
|
98
|
+
name?: string;
|
|
99
|
+
/** Short name */
|
|
100
|
+
shortName?: string;
|
|
101
|
+
/** App description */
|
|
102
|
+
description?: string;
|
|
103
|
+
/** Start URL */
|
|
104
|
+
startUrl?: string;
|
|
105
|
+
/** Display mode */
|
|
106
|
+
display?: string;
|
|
107
|
+
/** Theme color */
|
|
108
|
+
themeColor?: string;
|
|
109
|
+
/** Source URL of the manifest */
|
|
110
|
+
source?: string;
|
|
111
|
+
/** Raw manifest data */
|
|
112
|
+
raw?: Record<string, unknown>;
|
|
113
|
+
/** Error message if parsing failed */
|
|
114
|
+
error?: string;
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Extracted meta tag information
|
|
118
|
+
*/
|
|
119
|
+
export interface MetaTagResult {
|
|
120
|
+
/** Meta tag name or property */
|
|
121
|
+
name: string;
|
|
122
|
+
/** Meta tag content */
|
|
123
|
+
content: string;
|
|
124
|
+
/** Category: og, twitter, schema, generic */
|
|
125
|
+
category: 'og' | 'twitter' | 'schema' | 'generic';
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Schema.org JSON-LD data
|
|
129
|
+
*/
|
|
130
|
+
export interface SchemaOrgData {
|
|
131
|
+
'@type': string;
|
|
132
|
+
[key: string]: unknown;
|
|
133
|
+
}
|
|
134
|
+
/**
|
|
135
|
+
* Site type classification from meta analysis
|
|
136
|
+
*/
|
|
137
|
+
export type SiteClassification = 'e-commerce' | 'travel' | 'healthcare' | 'finance' | 'education' | 'news-media' | 'social' | 'saas' | 'government' | 'entertainment' | 'food-restaurant' | 'real-estate' | 'general';
|
|
138
|
+
/**
|
|
139
|
+
* Result of meta tag extraction
|
|
140
|
+
*/
|
|
141
|
+
export interface MetaExtractResult {
|
|
142
|
+
/** All extracted meta tags */
|
|
143
|
+
metaTags: MetaTagResult[];
|
|
144
|
+
/** OpenGraph data */
|
|
145
|
+
openGraph: Record<string, string>;
|
|
146
|
+
/** Twitter card data */
|
|
147
|
+
twitterCard: Record<string, string>;
|
|
148
|
+
/** Schema.org JSON-LD entries */
|
|
149
|
+
schemaOrg: SchemaOrgData[];
|
|
150
|
+
/** Classified site type */
|
|
151
|
+
siteClassification: SiteClassification;
|
|
152
|
+
/** Page title from <title> tag */
|
|
153
|
+
pageTitle: string;
|
|
154
|
+
/** Page description from meta description */
|
|
155
|
+
pageDescription: string;
|
|
156
|
+
/** Canonical URL */
|
|
157
|
+
canonicalUrl?: string;
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Full reconnaissance result
|
|
161
|
+
*/
|
|
162
|
+
export interface ReconResult {
|
|
163
|
+
/** Original target URL */
|
|
164
|
+
url: string;
|
|
165
|
+
/** Resolved URL after redirects */
|
|
166
|
+
resolvedUrl: string;
|
|
167
|
+
/** HTTP status code of root page */
|
|
168
|
+
statusCode: number;
|
|
169
|
+
/** Response time in ms for root page fetch */
|
|
170
|
+
responseTime: number;
|
|
171
|
+
/** Sitemap analysis */
|
|
172
|
+
sitemap: SitemapResult | null;
|
|
173
|
+
/** Robots.txt analysis */
|
|
174
|
+
robots: RobotsResult | null;
|
|
175
|
+
/** Web app manifest */
|
|
176
|
+
manifest: ManifestResult | null;
|
|
177
|
+
/** Meta tag extraction from root page */
|
|
178
|
+
meta: MetaExtractResult | null;
|
|
179
|
+
/** All discovered routes, sorted by priority */
|
|
180
|
+
discoveredRoutes: Route[];
|
|
181
|
+
/** Root page HTML (for downstream use) */
|
|
182
|
+
rootHtml: string;
|
|
183
|
+
/** Errors encountered during recon (non-fatal) */
|
|
184
|
+
errors: string[];
|
|
185
|
+
}
|
|
186
|
+
/**
|
|
187
|
+
* Configuration for multi-page crawling
|
|
188
|
+
*/
|
|
189
|
+
export interface CrawlConfig {
|
|
190
|
+
/** Maximum number of pages to crawl (default: 20) */
|
|
191
|
+
maxPages: number;
|
|
192
|
+
/** Maximum crawl depth from root (default: 4) */
|
|
193
|
+
maxDepth: number;
|
|
194
|
+
/** Per-page timeout in ms (default: 30000) */
|
|
195
|
+
timeout: number;
|
|
196
|
+
/** Extra wait time for SPA hydration in ms (default: 2000) */
|
|
197
|
+
extraWaitMs: number;
|
|
198
|
+
/** Whether to respect robots.txt disallow rules (default: true) */
|
|
199
|
+
respectRobotsTxt: boolean;
|
|
200
|
+
}
|
|
201
|
+
/**
|
|
202
|
+
* Progress callback for multi-page crawl
|
|
203
|
+
*/
|
|
204
|
+
export interface MultiPageCrawlProgress {
|
|
205
|
+
/** Current phase: 'recon' | 'crawling' | 'complete' */
|
|
206
|
+
phase: 'recon' | 'crawling' | 'complete';
|
|
207
|
+
/** Overall progress percentage (0-100) */
|
|
208
|
+
progress: number;
|
|
209
|
+
/** Current page being audited */
|
|
210
|
+
currentUrl: string;
|
|
211
|
+
/** Number of pages completed */
|
|
212
|
+
pagesCompleted: number;
|
|
213
|
+
/** Total pages planned */
|
|
214
|
+
pagesTotal: number;
|
|
215
|
+
/** Tools found so far across all pages */
|
|
216
|
+
toolsFound: number;
|
|
217
|
+
/** Human-readable status message */
|
|
218
|
+
message: string;
|
|
219
|
+
}
|
|
220
|
+
/**
|
|
221
|
+
* Per-page result within a multi-page audit
|
|
222
|
+
*/
|
|
223
|
+
export interface PageAuditSummary {
|
|
224
|
+
/** URL of the page */
|
|
225
|
+
url: string;
|
|
226
|
+
/** Page title */
|
|
227
|
+
title: string;
|
|
228
|
+
/** Whether page loaded successfully */
|
|
229
|
+
success: boolean;
|
|
230
|
+
/** Whether WebMCP was found on this page */
|
|
231
|
+
hasWebMCP: boolean;
|
|
232
|
+
/** Number of tools found on this page */
|
|
233
|
+
toolCount: number;
|
|
234
|
+
/** Number of opportunities found */
|
|
235
|
+
opportunityCount: number;
|
|
236
|
+
/** Page-level score (if available) */
|
|
237
|
+
score?: number;
|
|
238
|
+
/** Error message if page failed */
|
|
239
|
+
error?: string;
|
|
240
|
+
/** Route that led to this page */
|
|
241
|
+
route: Route;
|
|
242
|
+
}
|
|
243
|
+
/**
|
|
244
|
+
* Complete multi-page audit result
|
|
245
|
+
*/
|
|
246
|
+
export interface MultiPageAuditResult {
|
|
247
|
+
/** Reconnaissance data */
|
|
248
|
+
recon: ReconResult;
|
|
249
|
+
/** Per-page audit summaries */
|
|
250
|
+
pageSummaries: PageAuditSummary[];
|
|
251
|
+
/** Aggregate: all tools found across all pages */
|
|
252
|
+
allTools: import('./tool.js').DetectedTool[];
|
|
253
|
+
/** Aggregate: all opportunities across all pages */
|
|
254
|
+
allOpportunities: import('./tool.js').UnregisteredForm[];
|
|
255
|
+
/** Aggregate: all findings across all pages */
|
|
256
|
+
allFindings: import('./rule.js').Finding[];
|
|
257
|
+
/** Aggregate score */
|
|
258
|
+
aggregateScore: import('./audit.js').ScoreSummary;
|
|
259
|
+
/** Pages that had WebMCP */
|
|
260
|
+
webmcpPages: string[];
|
|
261
|
+
/** Total pages attempted */
|
|
262
|
+
pagesAttempted: number;
|
|
263
|
+
/** Total pages successfully audited */
|
|
264
|
+
pagesSucceeded: number;
|
|
265
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"recon.js","sourceRoot":"","sources":["../../../src/core/types/recon.ts"],"names":[],"mappings":"AAAA;;GAEG"}
|
|
@@ -7,7 +7,7 @@ import type { PageInfo } from './audit.js';
|
|
|
7
7
|
/**
|
|
8
8
|
* Rule categories
|
|
9
9
|
*/
|
|
10
|
-
export type RuleCategory = 'implementation' | 'description' | 'schema' | 'security' | 'best-practices' | 'coverage';
|
|
10
|
+
export type RuleCategory = 'implementation' | 'description' | 'schema' | 'security' | 'best-practices' | 'coverage' | 'accessibility';
|
|
11
11
|
/**
|
|
12
12
|
* Rule severity levels
|
|
13
13
|
*/
|
package/dist/core/types/rule.js
CHANGED
|
@@ -5,12 +5,13 @@
|
|
|
5
5
|
* Score weights by category
|
|
6
6
|
*/
|
|
7
7
|
export const CATEGORY_WEIGHTS = {
|
|
8
|
-
implementation:
|
|
9
|
-
description:
|
|
10
|
-
schema:
|
|
11
|
-
security:
|
|
12
|
-
'best-practices':
|
|
8
|
+
implementation: 23,
|
|
9
|
+
description: 14,
|
|
10
|
+
schema: 18,
|
|
11
|
+
security: 23,
|
|
12
|
+
'best-practices': 9,
|
|
13
13
|
coverage: 5,
|
|
14
|
+
accessibility: 8,
|
|
14
15
|
};
|
|
15
16
|
/**
|
|
16
17
|
* Category display names
|
|
@@ -22,5 +23,6 @@ export const CATEGORY_NAMES = {
|
|
|
22
23
|
security: 'Security',
|
|
23
24
|
'best-practices': 'Best Practices',
|
|
24
25
|
coverage: 'Coverage',
|
|
26
|
+
accessibility: 'Accessibility',
|
|
25
27
|
};
|
|
26
28
|
//# sourceMappingURL=rule.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"rule.js","sourceRoot":"","sources":["../../../src/core/types/rule.ts"],"names":[],"mappings":"AAAA;;GAEG;
|
|
1
|
+
{"version":3,"file":"rule.js","sourceRoot":"","sources":["../../../src/core/types/rule.ts"],"names":[],"mappings":"AAAA;;GAEG;AAkOH;;GAEG;AACH,MAAM,CAAC,MAAM,gBAAgB,GAAiC;IAC5D,cAAc,EAAE,EAAE;IAClB,WAAW,EAAE,EAAE;IACf,MAAM,EAAE,EAAE;IACV,QAAQ,EAAE,EAAE;IACZ,gBAAgB,EAAE,CAAC;IACnB,QAAQ,EAAE,CAAC;IACX,aAAa,EAAE,CAAC;CACjB,CAAC;AAEF;;GAEG;AACH,MAAM,CAAC,MAAM,cAAc,GAAiC;IAC1D,cAAc,EAAE,gBAAgB;IAChC,WAAW,EAAE,qBAAqB;IAClC,MAAM,EAAE,mBAAmB;IAC3B,QAAQ,EAAE,UAAU;IACpB,gBAAgB,EAAE,gBAAgB;IAClC,QAAQ,EAAE,UAAU;IACpB,aAAa,EAAE,eAAe;CAC/B,CAAC"}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
export interface CrawlPage {
|
|
2
|
+
url: string;
|
|
3
|
+
depth: number;
|
|
4
|
+
html: string;
|
|
5
|
+
title: string;
|
|
6
|
+
internalLinks: string[];
|
|
7
|
+
statusCode: number;
|
|
8
|
+
loadTime: number;
|
|
9
|
+
}
|
|
10
|
+
export interface CrawlResult {
|
|
11
|
+
pages: CrawlPage[];
|
|
12
|
+
totalPagesFound: number;
|
|
13
|
+
totalPagesCrawled: number;
|
|
14
|
+
crawlDuration: number;
|
|
15
|
+
errors: Array<{
|
|
16
|
+
url: string;
|
|
17
|
+
error: string;
|
|
18
|
+
}>;
|
|
19
|
+
}
|
|
20
|
+
export interface CrawlOptions {
|
|
21
|
+
maxPages?: number;
|
|
22
|
+
timeout?: number;
|
|
23
|
+
concurrency?: number;
|
|
24
|
+
onPageCrawled?: (page: CrawlPage, progress: {
|
|
25
|
+
done: number;
|
|
26
|
+
total: number;
|
|
27
|
+
}) => void;
|
|
28
|
+
}
|
|
29
|
+
export declare function crawlDepth2(startUrl: string, options?: CrawlOptions): Promise<CrawlResult>;
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
import * as cheerio from 'cheerio';
|
|
2
|
+
import { extractInternalLinks } from './link-extractor.js';
|
|
3
|
+
const DEFAULT_MAX_PAGES = 30;
|
|
4
|
+
const DEFAULT_TIMEOUT = 15_000;
|
|
5
|
+
const DEFAULT_CONCURRENCY = 3;
|
|
6
|
+
const MAX_DEPTH = 2;
|
|
7
|
+
const MIN_TIMEOUT = 1_000;
|
|
8
|
+
const USER_AGENT = 'WebMCP-CLI DepthCrawler/1.0 (+https://webmcp.org)';
|
|
9
|
+
function normalizeCrawlUrl(url) {
|
|
10
|
+
try {
|
|
11
|
+
const parsed = new URL(url);
|
|
12
|
+
parsed.hash = '';
|
|
13
|
+
parsed.searchParams.sort();
|
|
14
|
+
if (parsed.pathname.length > 1 && parsed.pathname.endsWith('/')) {
|
|
15
|
+
parsed.pathname = parsed.pathname.slice(0, -1);
|
|
16
|
+
}
|
|
17
|
+
return parsed.href;
|
|
18
|
+
}
|
|
19
|
+
catch {
|
|
20
|
+
return null;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
function extractTitle(html) {
|
|
24
|
+
const $ = cheerio.load(html);
|
|
25
|
+
return $('title').first().text().trim();
|
|
26
|
+
}
|
|
27
|
+
function isSameHostname(url, hostname) {
|
|
28
|
+
try {
|
|
29
|
+
return new URL(url).hostname === hostname;
|
|
30
|
+
}
|
|
31
|
+
catch {
|
|
32
|
+
return false;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
async function fetchPage(url, timeout) {
|
|
36
|
+
const controller = new AbortController();
|
|
37
|
+
const timer = setTimeout(() => controller.abort(), timeout);
|
|
38
|
+
const started = Date.now();
|
|
39
|
+
try {
|
|
40
|
+
const response = await fetch(url, {
|
|
41
|
+
signal: controller.signal,
|
|
42
|
+
redirect: 'follow',
|
|
43
|
+
headers: {
|
|
44
|
+
'User-Agent': USER_AGENT,
|
|
45
|
+
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
46
|
+
},
|
|
47
|
+
});
|
|
48
|
+
const html = await response.text();
|
|
49
|
+
return {
|
|
50
|
+
html,
|
|
51
|
+
statusCode: response.status,
|
|
52
|
+
finalUrl: response.url,
|
|
53
|
+
loadTime: Date.now() - started,
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
finally {
|
|
57
|
+
clearTimeout(timer);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
export async function crawlDepth2(startUrl, options) {
|
|
61
|
+
const crawlStart = Date.now();
|
|
62
|
+
const pages = [];
|
|
63
|
+
const errors = [];
|
|
64
|
+
const discovered = new Set();
|
|
65
|
+
const enqueued = new Set();
|
|
66
|
+
const visited = new Set();
|
|
67
|
+
const maxPages = Math.max(1, Math.floor(options?.maxPages ?? DEFAULT_MAX_PAGES));
|
|
68
|
+
const timeout = Math.max(MIN_TIMEOUT, Math.floor(options?.timeout ?? DEFAULT_TIMEOUT));
|
|
69
|
+
const concurrency = Math.max(1, Math.floor(options?.concurrency ?? DEFAULT_CONCURRENCY));
|
|
70
|
+
const onPageCrawled = options?.onPageCrawled;
|
|
71
|
+
const normalizedStart = normalizeCrawlUrl(startUrl);
|
|
72
|
+
if (!normalizedStart) {
|
|
73
|
+
return {
|
|
74
|
+
pages,
|
|
75
|
+
totalPagesFound: 0,
|
|
76
|
+
totalPagesCrawled: 0,
|
|
77
|
+
crawlDuration: 0,
|
|
78
|
+
errors: [{ url: startUrl, error: 'Invalid start URL' }],
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
const startUrlObject = new URL(normalizedStart);
|
|
82
|
+
const allowedHostname = startUrlObject.hostname;
|
|
83
|
+
let baseOrigin = startUrlObject.origin;
|
|
84
|
+
const queue = [{ url: normalizedStart, depth: 0 }];
|
|
85
|
+
enqueued.add(normalizedStart);
|
|
86
|
+
discovered.add(normalizedStart);
|
|
87
|
+
const enqueueLink = (url, depth) => {
|
|
88
|
+
if (depth > MAX_DEPTH)
|
|
89
|
+
return;
|
|
90
|
+
const normalized = normalizeCrawlUrl(url);
|
|
91
|
+
if (!normalized)
|
|
92
|
+
return;
|
|
93
|
+
if (new URL(normalized).origin !== baseOrigin)
|
|
94
|
+
return;
|
|
95
|
+
if (enqueued.has(normalized) || visited.has(normalized))
|
|
96
|
+
return;
|
|
97
|
+
enqueued.add(normalized);
|
|
98
|
+
discovered.add(normalized);
|
|
99
|
+
queue.push({ url: normalized, depth });
|
|
100
|
+
};
|
|
101
|
+
const processPage = async (item) => {
|
|
102
|
+
if (visited.has(item.url)) {
|
|
103
|
+
return;
|
|
104
|
+
}
|
|
105
|
+
visited.add(item.url);
|
|
106
|
+
try {
|
|
107
|
+
const result = await fetchPage(item.url, timeout);
|
|
108
|
+
if (!isSameHostname(result.finalUrl, allowedHostname)) {
|
|
109
|
+
errors.push({ url: item.url, error: 'Redirected outside allowed origin' });
|
|
110
|
+
return;
|
|
111
|
+
}
|
|
112
|
+
const finalOrigin = new URL(result.finalUrl).origin;
|
|
113
|
+
if (item.depth === 0 && finalOrigin !== baseOrigin) {
|
|
114
|
+
baseOrigin = finalOrigin;
|
|
115
|
+
}
|
|
116
|
+
else if (finalOrigin !== baseOrigin) {
|
|
117
|
+
errors.push({ url: item.url, error: 'Redirected to different origin' });
|
|
118
|
+
return;
|
|
119
|
+
}
|
|
120
|
+
const normalizedFinalUrl = normalizeCrawlUrl(result.finalUrl) ?? item.url;
|
|
121
|
+
if (normalizedFinalUrl !== item.url) {
|
|
122
|
+
enqueued.add(normalizedFinalUrl);
|
|
123
|
+
discovered.add(normalizedFinalUrl);
|
|
124
|
+
visited.add(normalizedFinalUrl);
|
|
125
|
+
}
|
|
126
|
+
const internalLinks = extractInternalLinks(result.html, result.finalUrl);
|
|
127
|
+
const title = extractTitle(result.html);
|
|
128
|
+
if (pages.length >= maxPages) {
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
const page = {
|
|
132
|
+
url: normalizedFinalUrl,
|
|
133
|
+
depth: item.depth,
|
|
134
|
+
html: result.html,
|
|
135
|
+
title,
|
|
136
|
+
internalLinks,
|
|
137
|
+
statusCode: result.statusCode,
|
|
138
|
+
loadTime: result.loadTime,
|
|
139
|
+
};
|
|
140
|
+
pages.push(page);
|
|
141
|
+
if (onPageCrawled) {
|
|
142
|
+
try {
|
|
143
|
+
onPageCrawled(page, { done: pages.length, total: maxPages });
|
|
144
|
+
}
|
|
145
|
+
catch (callbackError) {
|
|
146
|
+
errors.push({
|
|
147
|
+
url: item.url,
|
|
148
|
+
error: `onPageCrawled callback failed: ${callbackError instanceof Error ? callbackError.message : String(callbackError)}`,
|
|
149
|
+
});
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
if (item.depth < MAX_DEPTH) {
|
|
153
|
+
for (const link of internalLinks) {
|
|
154
|
+
enqueueLink(link, item.depth + 1);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
catch (error) {
|
|
159
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
160
|
+
errors.push({ url: item.url, error: message });
|
|
161
|
+
}
|
|
162
|
+
};
|
|
163
|
+
const inFlight = new Set();
|
|
164
|
+
await new Promise((resolve) => {
|
|
165
|
+
let finished = false;
|
|
166
|
+
const finish = () => {
|
|
167
|
+
if (finished)
|
|
168
|
+
return;
|
|
169
|
+
finished = true;
|
|
170
|
+
resolve();
|
|
171
|
+
};
|
|
172
|
+
const schedule = () => {
|
|
173
|
+
if (pages.length >= maxPages) {
|
|
174
|
+
queue.length = 0;
|
|
175
|
+
}
|
|
176
|
+
while (inFlight.size < concurrency &&
|
|
177
|
+
queue.length > 0 &&
|
|
178
|
+
pages.length + inFlight.size < maxPages) {
|
|
179
|
+
const next = queue.shift();
|
|
180
|
+
if (!next)
|
|
181
|
+
break;
|
|
182
|
+
const task = processPage(next)
|
|
183
|
+
.catch((error) => {
|
|
184
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
185
|
+
errors.push({ url: next.url, error: message });
|
|
186
|
+
})
|
|
187
|
+
.finally(() => {
|
|
188
|
+
inFlight.delete(task);
|
|
189
|
+
if (queue.length === 0 && inFlight.size === 0) {
|
|
190
|
+
finish();
|
|
191
|
+
}
|
|
192
|
+
else {
|
|
193
|
+
schedule();
|
|
194
|
+
}
|
|
195
|
+
});
|
|
196
|
+
inFlight.add(task);
|
|
197
|
+
}
|
|
198
|
+
if (queue.length === 0 && inFlight.size === 0) {
|
|
199
|
+
finish();
|
|
200
|
+
}
|
|
201
|
+
};
|
|
202
|
+
schedule();
|
|
203
|
+
});
|
|
204
|
+
return {
|
|
205
|
+
pages,
|
|
206
|
+
totalPagesFound: discovered.size,
|
|
207
|
+
totalPagesCrawled: pages.length,
|
|
208
|
+
crawlDuration: Date.now() - crawlStart,
|
|
209
|
+
errors,
|
|
210
|
+
};
|
|
211
|
+
}
|
|
212
|
+
//# sourceMappingURL=depth-crawler.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"depth-crawler.js","sourceRoot":"","sources":["../../src/crawler/depth-crawler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AACnC,OAAO,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAC;AAuC3D,MAAM,iBAAiB,GAAG,EAAE,CAAC;AAC7B,MAAM,eAAe,GAAG,MAAM,CAAC;AAC/B,MAAM,mBAAmB,GAAG,CAAC,CAAC;AAC9B,MAAM,SAAS,GAAG,CAAC,CAAC;AACpB,MAAM,WAAW,GAAG,KAAK,CAAC;AAC1B,MAAM,UAAU,GAAG,mDAAmD,CAAC;AAEvE,SAAS,iBAAiB,CAAC,GAAW;IACpC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAC5B,MAAM,CAAC,IAAI,GAAG,EAAE,CAAC;QACjB,MAAM,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC;QAC3B,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YAChE,MAAM,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QACjD,CAAC;QACD,OAAO,MAAM,CAAC,IAAI,CAAC;IACrB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,SAAS,YAAY,CAAC,IAAY;IAChC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,OAAO,CAAC,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;AAC1C,CAAC;AAED,SAAS,cAAc,CAAC,GAAW,EAAE,QAAgB;IACnD,IAAI,CAAC;QACH,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,KAAK,QAAQ,CAAC;IAC5C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,KAAK,UAAU,SAAS,CAAC,GAAW,EAAE,OAAe;IACnD,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;IACzC,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,OAAO,CAAC,CAAC;IAC5D,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE3B,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAChC,MAAM,EAAE,UAAU,CAAC,MAAM;YACzB,QAAQ,EAAE,QAAQ;YAClB,OAAO,EAAE;gBACP,YAAY,EAAE,UAAU;gBACxB,MAAM,EAAE,iEAAiE;aAC1E;SACF,CAAC,CAAC;QAEH,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACnC,OAAO;YACL,IAAI;YACJ,UAAU,EAAE,QAAQ,CAAC,MAAM;YAC3B,QAAQ,EAAE,QAAQ,CAAC,GAAG;YACtB,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO;SAC/B,CAAC;IACJ,CAAC;YAAS,CAAC;QACT,YAAY,CAAC,KAAK,CAAC,CAAC;IACtB,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,QAAgB,EAAE,OAAsB;IACxE,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAC9B,MAAM,KAAK,GAAgB,EAAE,CAAC;IAC9B,MAAM,MAAM,GAA0C,EAAE,CAAC;IACzD,MAAM,UAAU,GAAG,IAAI,GAAG,EAAU,CAAC;IACrC,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAC;IACnC,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAElC,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,QAAQ,IAAI,iBAAiB,CAAC,CAAC,CAAC;IACjF,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,OAAO,IAAI,eAAe,CAAC,CAAC,CAAC;IACvF,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,WAAW,IAAI,mBAAmB,CAAC,CAAC,CAAC;IACzF,MAAM,aAAa,GAAG,OAAO,EAAE,aAAa,CAAC;IAE7C,MAAM,eAAe,GAAG,iBAAiB,CAAC,QAAQ,CAAC,CAAC;IACpD,IAAI,CAAC,eAAe,EAAE,CAAC;QACrB,OAAO;YACL,KAAK;YACL,eAAe,EAAE,CAAC;YAClB,iBAAiB,EAAE,CAAC;YACpB,aAAa,EAAE,CAAC;YAChB,MAAM,EAAE,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,KAAK,EAAE,mBAAmB,EAAE,CAAC;SACxD,CAAC;IACJ,CAAC;IAED,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,eAAe,CAAC,CAAC;IAChD,MAAM,eAAe,GAAG,cAAc,CAAC,QAAQ,CAAC;IAChD,IAAI,UAAU,GAAG,cAAc,CAAC,MAAM,CAAC;IAEvC,MAAM,KAAK,GAAgB,CAAC,EAAE,GAAG,EAAE,eAAe,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;IAChE,QAAQ,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC;IAC9B,UAAU,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC;IAEhC,MAAM,WAAW,GAAG,CAAC,GAAW,EAAE,KAAa,EAAQ,EAAE;QACvD,IAAI,KAAK,GAAG,SAAS;YAAE,OAAO;QAC9B,MAAM,UAAU,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAC;QAC1C,IAAI,CAAC,UAAU;YAAE,OAAO;QACxB,IAAI,IAAI,GAAG,CAAC,UAAU,CAAC,CAAC,MAAM,KAAK,UAAU;YAAE,OAAO;QACtD,IAAI,QAAQ,CAAC,GAAG,CAAC,UAAU,CAAC,IAAI,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC;YAAE,OAAO;QAEhE,QAAQ,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QACzB,UAAU,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QAC3B,KAAK,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC,CAAC;IACzC,CAAC,CAAC;IAEF,MAAM,WAAW,GAAG,KAAK,EAAE,IAAe,EAAiB,EAAE;QAC3D,IAAI,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;YAC1B,OAAO;QACT,CAAC;QACD,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAEtB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,IAAI,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;YAElD,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,QAAQ,EAAE,eAAe,CAAC,EAAE,CAAC;gBACtD,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,KAAK,EAAE,mCAAmC,EAAE,CAAC,CAAC;gBAC3E,OAAO;YACT,CAAC;YAED,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC;YACpD,IAAI,IAAI,CAAC,KAAK,KAAK,CAAC,IAAI,WAAW,KAAK,UAAU,EAAE,CAAC;gBACnD,UAAU,GAAG,WAAW,CAAC;YAC3B,CAAC;iBAAM,IAAI,WAAW,KAAK,UAAU,EAAE,CAAC;gBACtC,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,KAAK,EAAE,gCAAgC,EAAE,CAAC,CAAC;gBACxE,OAAO;YACT,CAAC;YAED,MAAM,kBAAkB,GAAG,iBAAiB,CAAC,MAAM,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC;YAC1E,IAAI,kBAAkB,KAAK,IAAI,CAAC,GAAG,EAAE,CAAC;gBACpC,QAAQ,CAAC,GAAG,CAAC,kBAAkB,CAAC,CAAC;gBACjC,UAAU,CAAC,GAAG,CAAC,kBAAkB,CAAC,CAAC;gBACnC,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAAC,CAAC;YAClC,CAAC;YACD,MAAM,aAAa,GAAG,oBAAoB,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,QAAQ,CAAC,CAAC;YACzE,MAAM,KAAK,GAAG,YAAY,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YAExC,IAAI,KAAK,CAAC,MAAM,IAAI,QAAQ,EAAE,CAAC;gBAC7B,OAAO;YACT,CAAC;YAED,MAAM,IAAI,GAAc;gBACtB,GAAG,EAAE,kBAAkB;gBACvB,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,IAAI,EAAE,MAAM,CAAC,IAAI;gBACjB,KAAK;gBACL,aAAa;gBACb,UAAU,EAAE,MAAM,CAAC,UAAU;gBAC7B,QAAQ,EAAE,MAAM,CAAC,QAAQ;aAC1B,CAAC;YAEF,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAEjB,IAAI,aAAa,EAAE,CAAC;gBAClB,IAAI,CAAC;oBACH,aAAa,CAAC,IAAI,EAAE,EAAE,IAAI,EAAE,KAAK,CAAC,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC;gBAC/D,CAAC;gBAAC,OAAO,aAAa,EAAE,CAAC;oBACvB,MAAM,CAAC,IAAI,CAAC;wBACV,GAAG,EAAE,IAAI,CAAC,GAAG;wBACb,KAAK,EAAE,kCAAkC,aAAa,YAAY,KAAK,CAAC,CAAC,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,aAAa,CAAC,EAAE;qBAC1H,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YAED,IAAI,IAAI,CAAC,KAAK,GAAG,SAAS,EAAE,CAAC;gBAC3B,KAAK,MAAM,IAAI,IAAI,aAAa,EAAE,CAAC;oBACjC,WAAW,CAAC,IAAI,EAAE,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;gBACpC,CAAC;YACH,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACvE,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC;QACjD,CAAC;IACH,CAAC,CAAC;IAEF,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAiB,CAAC;IAE1C,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE;QAClC,IAAI,QAAQ,GAAG,KAAK,CAAC;QAErB,MAAM,MAAM,GAAG,GAAS,EAAE;YACxB,IAAI,QAAQ;gBAAE,OAAO;YACrB,QAAQ,GAAG,IAAI,CAAC;YAChB,OAAO,EAAE,CAAC;QACZ,CAAC,CAAC;QAEF,MAAM,QAAQ,GAAG,GAAS,EAAE;YAC1B,IAAI,KAAK,CAAC,MAAM,IAAI,QAAQ,EAAE,CAAC;gBAC7B,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC;YACnB,CAAC;YAED,OACE,QAAQ,CAAC,IAAI,GAAG,WAAW;gBAC3B,KAAK,CAAC,MAAM,GAAG,CAAC;gBAChB,KAAK,CAAC,MAAM,GAAG,QAAQ,CAAC,IAAI,GAAG,QAAQ,EACvC,CAAC;gBACD,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,EAAE,CAAC;gBAC3B,IAAI,CAAC,IAAI;oBAAE,MAAM;gBAEjB,MAAM,IAAI,GAAG,WAAW,CAAC,IAAI,CAAC;qBAC3B,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;oBACf,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;oBACvE,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC;gBACjD,CAAC,CAAC;qBACD,OAAO,CAAC,GAAG,EAAE;oBACZ,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;oBACtB,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,IAAI,QAAQ,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;wBAC9C,MAAM,EAAE,CAAC;oBACX,CAAC;yBAAM,CAAC;wBACN,QAAQ,EAAE,CAAC;oBACb,CAAC;gBACH,CAAC,CAAC,CAAC;gBAEL,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YACrB,CAAC;YAED,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,IAAI,QAAQ,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;gBAC9C,MAAM,EAAE,CAAC;YACX,CAAC;QACH,CAAC,CAAC;QAEF,QAAQ,EAAE,CAAC;IACb,CAAC,CAAC,CAAC;IAEH,OAAO;QACL,KAAK;QACL,eAAe,EAAE,UAAU,CAAC,IAAI;QAChC,iBAAiB,EAAE,KAAK,CAAC,MAAM;QAC/B,aAAa,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,UAAU;QACtC,MAAM;KACP,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/crawler/index.ts"],"names":[],"mappings":"AAAA,cAAc,qBAAqB,CAAC;AACpC,cAAc,oBAAoB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function extractInternalLinks(html: string, baseUrl: string): string[];
|