webmcp-cli 1.2.2 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (330) hide show
  1. package/dist/analysis/form-to-tool-mapper.d.ts +61 -0
  2. package/dist/analysis/form-to-tool-mapper.js +360 -0
  3. package/dist/analysis/form-to-tool-mapper.js.map +1 -0
  4. package/dist/analysis/index.d.ts +84 -0
  5. package/dist/analysis/index.js +81 -0
  6. package/dist/analysis/index.js.map +1 -0
  7. package/dist/analysis/missing-tool-analyzer.d.ts +35 -0
  8. package/dist/analysis/missing-tool-analyzer.js +617 -0
  9. package/dist/analysis/missing-tool-analyzer.js.map +1 -0
  10. package/dist/audit/run-multi-page-audit.d.ts +34 -0
  11. package/dist/audit/run-multi-page-audit.js +233 -0
  12. package/dist/audit/run-multi-page-audit.js.map +1 -0
  13. package/dist/cli/commands/potential.d.ts +8 -0
  14. package/dist/cli/commands/potential.js +323 -0
  15. package/dist/cli/commands/potential.js.map +1 -0
  16. package/dist/cli/commands/report.d.ts +12 -0
  17. package/dist/cli/commands/report.js +89 -0
  18. package/dist/cli/commands/report.js.map +1 -0
  19. package/dist/cli/index.js +35 -0
  20. package/dist/cli/index.js.map +1 -1
  21. package/dist/config/defaults.d.ts +36 -0
  22. package/dist/config/defaults.js +33 -0
  23. package/dist/config/defaults.js.map +1 -0
  24. package/dist/config/index.d.ts +7 -0
  25. package/dist/config/index.js +7 -0
  26. package/dist/config/index.js.map +1 -0
  27. package/dist/config/loader.d.ts +22 -0
  28. package/dist/config/loader.js +91 -0
  29. package/dist/config/loader.js.map +1 -0
  30. package/dist/config/schema.d.ts +280 -0
  31. package/dist/config/schema.js +42 -0
  32. package/dist/config/schema.js.map +1 -0
  33. package/dist/core/types/audit.d.ts +1 -1
  34. package/dist/core/types/index.d.ts +1 -0
  35. package/dist/core/types/index.js +1 -0
  36. package/dist/core/types/index.js.map +1 -1
  37. package/dist/core/types/recon.d.ts +265 -0
  38. package/dist/core/types/recon.js +5 -0
  39. package/dist/core/types/recon.js.map +1 -0
  40. package/dist/core/types/rule.d.ts +1 -1
  41. package/dist/core/types/rule.js +7 -5
  42. package/dist/core/types/rule.js.map +1 -1
  43. package/dist/crawler/depth-crawler.d.ts +29 -0
  44. package/dist/crawler/depth-crawler.js +212 -0
  45. package/dist/crawler/depth-crawler.js.map +1 -0
  46. package/dist/crawler/index.d.ts +2 -0
  47. package/dist/crawler/index.js +3 -0
  48. package/dist/crawler/index.js.map +1 -0
  49. package/dist/crawler/link-extractor.d.ts +1 -0
  50. package/dist/crawler/link-extractor.js +49 -0
  51. package/dist/crawler/link-extractor.js.map +1 -0
  52. package/dist/generators/index.d.ts +10 -0
  53. package/dist/generators/index.js +8 -0
  54. package/dist/generators/index.js.map +1 -0
  55. package/dist/generators/report-html.d.ts +12 -0
  56. package/dist/generators/report-html.js +470 -0
  57. package/dist/generators/report-html.js.map +1 -0
  58. package/dist/generators/report-json.d.ts +95 -0
  59. package/dist/generators/report-json.js +144 -0
  60. package/dist/generators/report-json.js.map +1 -0
  61. package/dist/generators/report-manager.d.ts +31 -0
  62. package/dist/generators/report-manager.js +208 -0
  63. package/dist/generators/report-manager.js.map +1 -0
  64. package/dist/generators/tool-code-generator.d.ts +31 -0
  65. package/dist/generators/tool-code-generator.js +201 -0
  66. package/dist/generators/tool-code-generator.js.map +1 -0
  67. package/dist/potential/ai-recommender.d.ts +33 -0
  68. package/dist/potential/ai-recommender.js +414 -0
  69. package/dist/potential/ai-recommender.js.map +1 -0
  70. package/dist/potential/analyzer.d.ts +32 -0
  71. package/dist/potential/analyzer.js +383 -0
  72. package/dist/potential/analyzer.js.map +1 -0
  73. package/dist/potential/index.d.ts +3 -0
  74. package/dist/potential/index.js +4 -0
  75. package/dist/potential/index.js.map +1 -0
  76. package/dist/potential/prompts.d.ts +20 -0
  77. package/dist/potential/prompts.js +42 -0
  78. package/dist/potential/prompts.js.map +1 -0
  79. package/dist/potential/types.d.ts +40 -0
  80. package/dist/potential/types.js +2 -0
  81. package/dist/potential/types.js.map +1 -0
  82. package/dist/recon/index.d.ts +20 -0
  83. package/dist/recon/index.js +143 -0
  84. package/dist/recon/index.js.map +1 -0
  85. package/dist/recon/manifest.d.ts +16 -0
  86. package/dist/recon/manifest.js +108 -0
  87. package/dist/recon/manifest.js.map +1 -0
  88. package/dist/recon/meta-extractor.d.ts +11 -0
  89. package/dist/recon/meta-extractor.js +276 -0
  90. package/dist/recon/meta-extractor.js.map +1 -0
  91. package/dist/recon/robots.d.ts +16 -0
  92. package/dist/recon/robots.js +158 -0
  93. package/dist/recon/robots.js.map +1 -0
  94. package/dist/recon/route-discovery.d.ts +25 -0
  95. package/dist/recon/route-discovery.js +303 -0
  96. package/dist/recon/route-discovery.js.map +1 -0
  97. package/dist/recon/sitemap.d.ts +12 -0
  98. package/dist/recon/sitemap.js +177 -0
  99. package/dist/recon/sitemap.js.map +1 -0
  100. package/dist/rules/accessibility/AXE-001.d.ts +9 -0
  101. package/dist/rules/accessibility/AXE-001.js +109 -0
  102. package/dist/rules/accessibility/AXE-001.js.map +1 -0
  103. package/dist/rules/accessibility/AXE-002.d.ts +8 -0
  104. package/dist/rules/accessibility/AXE-002.js +85 -0
  105. package/dist/rules/accessibility/AXE-002.js.map +1 -0
  106. package/dist/rules/accessibility/AXE-003.d.ts +8 -0
  107. package/dist/rules/accessibility/AXE-003.js +94 -0
  108. package/dist/rules/accessibility/AXE-003.js.map +1 -0
  109. package/dist/rules/accessibility/AXE-004.d.ts +8 -0
  110. package/dist/rules/accessibility/AXE-004.js +101 -0
  111. package/dist/rules/accessibility/AXE-004.js.map +1 -0
  112. package/dist/rules/accessibility/AXE-005.d.ts +9 -0
  113. package/dist/rules/accessibility/AXE-005.js +89 -0
  114. package/dist/rules/accessibility/AXE-005.js.map +1 -0
  115. package/dist/rules/best-practices/BP-004.d.ts +9 -0
  116. package/dist/rules/best-practices/BP-004.js +96 -0
  117. package/dist/rules/best-practices/BP-004.js.map +1 -0
  118. package/dist/rules/best-practices/BP-005.d.ts +8 -0
  119. package/dist/rules/best-practices/BP-005.js +94 -0
  120. package/dist/rules/best-practices/BP-005.js.map +1 -0
  121. package/dist/rules/best-practices/BP-006.d.ts +8 -0
  122. package/dist/rules/best-practices/BP-006.js +80 -0
  123. package/dist/rules/best-practices/BP-006.js.map +1 -0
  124. package/dist/rules/best-practices/BP-007.d.ts +8 -0
  125. package/dist/rules/best-practices/BP-007.js +92 -0
  126. package/dist/rules/best-practices/BP-007.js.map +1 -0
  127. package/dist/rules/best-practices/BP-008.d.ts +12 -0
  128. package/dist/rules/best-practices/BP-008.js +86 -0
  129. package/dist/rules/best-practices/BP-008.js.map +1 -0
  130. package/dist/rules/best-practices/BP-009.d.ts +9 -0
  131. package/dist/rules/best-practices/BP-009.js +77 -0
  132. package/dist/rules/best-practices/BP-009.js.map +1 -0
  133. package/dist/rules/best-practices/BP-010.d.ts +8 -0
  134. package/dist/rules/best-practices/BP-010.js +85 -0
  135. package/dist/rules/best-practices/BP-010.js.map +1 -0
  136. package/dist/rules/coverage/COV-002.d.ts +8 -0
  137. package/dist/rules/coverage/COV-002.js +68 -0
  138. package/dist/rules/coverage/COV-002.js.map +1 -0
  139. package/dist/rules/coverage/COV-003.d.ts +8 -0
  140. package/dist/rules/coverage/COV-003.js +68 -0
  141. package/dist/rules/coverage/COV-003.js.map +1 -0
  142. package/dist/rules/coverage/COV-004.d.ts +8 -0
  143. package/dist/rules/coverage/COV-004.js +89 -0
  144. package/dist/rules/coverage/COV-004.js.map +1 -0
  145. package/dist/rules/coverage/COV-005.d.ts +8 -0
  146. package/dist/rules/coverage/COV-005.js +67 -0
  147. package/dist/rules/coverage/COV-005.js.map +1 -0
  148. package/dist/rules/coverage/COV-006.d.ts +9 -0
  149. package/dist/rules/coverage/COV-006.js +76 -0
  150. package/dist/rules/coverage/COV-006.js.map +1 -0
  151. package/dist/rules/coverage/COV-007.d.ts +8 -0
  152. package/dist/rules/coverage/COV-007.js +67 -0
  153. package/dist/rules/coverage/COV-007.js.map +1 -0
  154. package/dist/rules/coverage/COV-008.d.ts +9 -0
  155. package/dist/rules/coverage/COV-008.js +87 -0
  156. package/dist/rules/coverage/COV-008.js.map +1 -0
  157. package/dist/rules/coverage/COV-009.d.ts +8 -0
  158. package/dist/rules/coverage/COV-009.js +73 -0
  159. package/dist/rules/coverage/COV-009.js.map +1 -0
  160. package/dist/rules/coverage/COV-010.d.ts +9 -0
  161. package/dist/rules/coverage/COV-010.js +82 -0
  162. package/dist/rules/coverage/COV-010.js.map +1 -0
  163. package/dist/rules/description/DESC-001.d.ts +9 -0
  164. package/dist/rules/description/DESC-001.js +88 -0
  165. package/dist/rules/description/DESC-001.js.map +1 -0
  166. package/dist/rules/description/DESC-002.d.ts +10 -0
  167. package/dist/rules/description/DESC-002.js +99 -0
  168. package/dist/rules/description/DESC-002.js.map +1 -0
  169. package/dist/rules/description/DESC-006.d.ts +9 -0
  170. package/dist/rules/description/DESC-006.js +78 -0
  171. package/dist/rules/description/DESC-006.js.map +1 -0
  172. package/dist/rules/description/DESC-007.d.ts +9 -0
  173. package/dist/rules/description/DESC-007.js +70 -0
  174. package/dist/rules/description/DESC-007.js.map +1 -0
  175. package/dist/rules/description/DESC-008.d.ts +9 -0
  176. package/dist/rules/description/DESC-008.js +70 -0
  177. package/dist/rules/description/DESC-008.js.map +1 -0
  178. package/dist/rules/description/DESC-009.d.ts +8 -0
  179. package/dist/rules/description/DESC-009.js +55 -0
  180. package/dist/rules/description/DESC-009.js.map +1 -0
  181. package/dist/rules/description/DESC-010.d.ts +9 -0
  182. package/dist/rules/description/DESC-010.js +92 -0
  183. package/dist/rules/description/DESC-010.js.map +1 -0
  184. package/dist/rules/description/DESC-011.d.ts +9 -0
  185. package/dist/rules/description/DESC-011.js +81 -0
  186. package/dist/rules/description/DESC-011.js.map +1 -0
  187. package/dist/rules/description/DESC-012.d.ts +9 -0
  188. package/dist/rules/description/DESC-012.js +98 -0
  189. package/dist/rules/description/DESC-012.js.map +1 -0
  190. package/dist/rules/implementation/IMP-002.d.ts +9 -0
  191. package/dist/rules/implementation/IMP-002.js +59 -0
  192. package/dist/rules/implementation/IMP-002.js.map +1 -0
  193. package/dist/rules/implementation/IMP-006.d.ts +9 -0
  194. package/dist/rules/implementation/IMP-006.js +48 -0
  195. package/dist/rules/implementation/IMP-006.js.map +1 -0
  196. package/dist/rules/implementation/IMP-008.d.ts +9 -0
  197. package/dist/rules/implementation/IMP-008.js +46 -0
  198. package/dist/rules/implementation/IMP-008.js.map +1 -0
  199. package/dist/rules/implementation/IMP-009.d.ts +9 -0
  200. package/dist/rules/implementation/IMP-009.js +48 -0
  201. package/dist/rules/implementation/IMP-009.js.map +1 -0
  202. package/dist/rules/implementation/IMP-010.d.ts +9 -0
  203. package/dist/rules/implementation/IMP-010.js +66 -0
  204. package/dist/rules/implementation/IMP-010.js.map +1 -0
  205. package/dist/rules/implementation/IMP-011.d.ts +9 -0
  206. package/dist/rules/implementation/IMP-011.js +82 -0
  207. package/dist/rules/implementation/IMP-011.js.map +1 -0
  208. package/dist/rules/implementation/IMP-012.d.ts +9 -0
  209. package/dist/rules/implementation/IMP-012.js +88 -0
  210. package/dist/rules/implementation/IMP-012.js.map +1 -0
  211. package/dist/rules/implementation/IMP-014.d.ts +9 -0
  212. package/dist/rules/implementation/IMP-014.js +58 -0
  213. package/dist/rules/implementation/IMP-014.js.map +1 -0
  214. package/dist/rules/implementation/IMP-015.d.ts +9 -0
  215. package/dist/rules/implementation/IMP-015.js +64 -0
  216. package/dist/rules/implementation/IMP-015.js.map +1 -0
  217. package/dist/rules/implementation/IMP-016.d.ts +9 -0
  218. package/dist/rules/implementation/IMP-016.js +52 -0
  219. package/dist/rules/implementation/IMP-016.js.map +1 -0
  220. package/dist/rules/implementation/IMP-017.d.ts +8 -0
  221. package/dist/rules/implementation/IMP-017.js +51 -0
  222. package/dist/rules/implementation/IMP-017.js.map +1 -0
  223. package/dist/rules/implementation/IMP-018.d.ts +8 -0
  224. package/dist/rules/implementation/IMP-018.js +52 -0
  225. package/dist/rules/implementation/IMP-018.js.map +1 -0
  226. package/dist/rules/implementation/IMP-019.d.ts +8 -0
  227. package/dist/rules/implementation/IMP-019.js +53 -0
  228. package/dist/rules/implementation/IMP-019.js.map +1 -0
  229. package/dist/rules/implementation/IMP-020.d.ts +9 -0
  230. package/dist/rules/implementation/IMP-020.js +62 -0
  231. package/dist/rules/implementation/IMP-020.js.map +1 -0
  232. package/dist/rules/implementation/IMP-021.d.ts +8 -0
  233. package/dist/rules/implementation/IMP-021.js +64 -0
  234. package/dist/rules/implementation/IMP-021.js.map +1 -0
  235. package/dist/rules/implementation/IMP-022.d.ts +8 -0
  236. package/dist/rules/implementation/IMP-022.js +70 -0
  237. package/dist/rules/implementation/IMP-022.js.map +1 -0
  238. package/dist/rules/index.d.ts +73 -6
  239. package/dist/rules/index.js +141 -6
  240. package/dist/rules/index.js.map +1 -1
  241. package/dist/rules/schema/SCHEMA-004.d.ts +9 -0
  242. package/dist/rules/schema/SCHEMA-004.js +57 -0
  243. package/dist/rules/schema/SCHEMA-004.js.map +1 -0
  244. package/dist/rules/schema/SCHEMA-005.d.ts +9 -0
  245. package/dist/rules/schema/SCHEMA-005.js +61 -0
  246. package/dist/rules/schema/SCHEMA-005.js.map +1 -0
  247. package/dist/rules/schema/SCHEMA-006.d.ts +10 -0
  248. package/dist/rules/schema/SCHEMA-006.js +85 -0
  249. package/dist/rules/schema/SCHEMA-006.js.map +1 -0
  250. package/dist/rules/schema/SCHEMA-007.d.ts +9 -0
  251. package/dist/rules/schema/SCHEMA-007.js +73 -0
  252. package/dist/rules/schema/SCHEMA-007.js.map +1 -0
  253. package/dist/rules/schema/SCHEMA-008.d.ts +9 -0
  254. package/dist/rules/schema/SCHEMA-008.js +70 -0
  255. package/dist/rules/schema/SCHEMA-008.js.map +1 -0
  256. package/dist/rules/schema/SCHEMA-009.d.ts +10 -0
  257. package/dist/rules/schema/SCHEMA-009.js +80 -0
  258. package/dist/rules/schema/SCHEMA-009.js.map +1 -0
  259. package/dist/rules/schema/SCHEMA-010.d.ts +9 -0
  260. package/dist/rules/schema/SCHEMA-010.js +96 -0
  261. package/dist/rules/schema/SCHEMA-010.js.map +1 -0
  262. package/dist/rules/schema/SCHEMA-012.d.ts +9 -0
  263. package/dist/rules/schema/SCHEMA-012.js +65 -0
  264. package/dist/rules/schema/SCHEMA-012.js.map +1 -0
  265. package/dist/rules/security/SEC-002.d.ts +8 -0
  266. package/dist/rules/security/SEC-002.js +81 -0
  267. package/dist/rules/security/SEC-002.js.map +1 -0
  268. package/dist/rules/security/SEC-003.d.ts +8 -0
  269. package/dist/rules/security/SEC-003.js +85 -0
  270. package/dist/rules/security/SEC-003.js.map +1 -0
  271. package/dist/rules/security/SEC-004.d.ts +9 -0
  272. package/dist/rules/security/SEC-004.js +87 -0
  273. package/dist/rules/security/SEC-004.js.map +1 -0
  274. package/dist/rules/security/SEC-005.d.ts +8 -0
  275. package/dist/rules/security/SEC-005.js +87 -0
  276. package/dist/rules/security/SEC-005.js.map +1 -0
  277. package/dist/rules/security/SEC-006.d.ts +10 -0
  278. package/dist/rules/security/SEC-006.js +108 -0
  279. package/dist/rules/security/SEC-006.js.map +1 -0
  280. package/dist/rules/security/SEC-007.d.ts +9 -0
  281. package/dist/rules/security/SEC-007.js +108 -0
  282. package/dist/rules/security/SEC-007.js.map +1 -0
  283. package/dist/rules/security/SEC-008.d.ts +8 -0
  284. package/dist/rules/security/SEC-008.js +109 -0
  285. package/dist/rules/security/SEC-008.js.map +1 -0
  286. package/dist/rules/security/SEC-009.d.ts +9 -0
  287. package/dist/rules/security/SEC-009.js +93 -0
  288. package/dist/rules/security/SEC-009.js.map +1 -0
  289. package/dist/rules/security/SEC-010.d.ts +8 -0
  290. package/dist/rules/security/SEC-010.js +78 -0
  291. package/dist/rules/security/SEC-010.js.map +1 -0
  292. package/dist/rules/security/SEC-011.d.ts +8 -0
  293. package/dist/rules/security/SEC-011.js +93 -0
  294. package/dist/rules/security/SEC-011.js.map +1 -0
  295. package/dist/rules/security/SEC-012.d.ts +8 -0
  296. package/dist/rules/security/SEC-012.js +79 -0
  297. package/dist/rules/security/SEC-012.js.map +1 -0
  298. package/dist/rules/security/SEC-013.d.ts +9 -0
  299. package/dist/rules/security/SEC-013.js +107 -0
  300. package/dist/rules/security/SEC-013.js.map +1 -0
  301. package/dist/scoring/calculator.js +1 -0
  302. package/dist/scoring/calculator.js.map +1 -1
  303. package/dist/ui/ink/components/AIRecommendationCard.d.ts +11 -0
  304. package/dist/ui/ink/components/AIRecommendationCard.js +23 -0
  305. package/dist/ui/ink/components/AIRecommendationCard.js.map +1 -0
  306. package/dist/ui/ink/components/OpportunityList.d.ts +10 -0
  307. package/dist/ui/ink/components/OpportunityList.js +48 -0
  308. package/dist/ui/ink/components/OpportunityList.js.map +1 -0
  309. package/dist/ui/ink/components/PotentialPageCard.d.ts +13 -0
  310. package/dist/ui/ink/components/PotentialPageCard.js +43 -0
  311. package/dist/ui/ink/components/PotentialPageCard.js.map +1 -0
  312. package/dist/ui/ink/components/PotentialProgress.d.ts +16 -0
  313. package/dist/ui/ink/components/PotentialProgress.js +44 -0
  314. package/dist/ui/ink/components/PotentialProgress.js.map +1 -0
  315. package/dist/ui/ink/components/PotentialSummary.d.ts +10 -0
  316. package/dist/ui/ink/components/PotentialSummary.js +86 -0
  317. package/dist/ui/ink/components/PotentialSummary.js.map +1 -0
  318. package/dist/ui/ink/components/SuggestionCard.d.ts +34 -0
  319. package/dist/ui/ink/components/SuggestionCard.js +36 -0
  320. package/dist/ui/ink/components/SuggestionCard.js.map +1 -0
  321. package/dist/ui/ink/components/views/MultiPageCrawlView.d.ts +21 -0
  322. package/dist/ui/ink/components/views/MultiPageCrawlView.js +55 -0
  323. package/dist/ui/ink/components/views/MultiPageCrawlView.js.map +1 -0
  324. package/dist/ui/ink/components/views/PotentialView.d.ts +18 -0
  325. package/dist/ui/ink/components/views/PotentialView.js +74 -0
  326. package/dist/ui/ink/components/views/PotentialView.js.map +1 -0
  327. package/dist/ui/ink/components/views/ReconView.d.ts +22 -0
  328. package/dist/ui/ink/components/views/ReconView.js +30 -0
  329. package/dist/ui/ink/components/views/ReconView.js.map +1 -0
  330. package/package.json +2 -1
@@ -0,0 +1,158 @@
1
+ /**
2
+ * Robots.txt Parser
3
+ *
4
+ * Fetches and parses robots.txt. Extracts allowed/disallowed paths,
5
+ * crawl-delay, and sitemap references. Handles missing robots.txt gracefully.
6
+ */
7
+ /** Fetch timeout (ms) */
8
+ const FETCH_TIMEOUT_MS = 10_000;
9
+ /**
10
+ * Fetch with timeout using AbortController
11
+ */
12
+ async function fetchWithTimeout(url, timeoutMs = FETCH_TIMEOUT_MS) {
13
+ const controller = new AbortController();
14
+ const timer = setTimeout(() => controller.abort(), timeoutMs);
15
+ try {
16
+ return await fetch(url, {
17
+ signal: controller.signal,
18
+ headers: { 'User-Agent': 'WebMCP-CLI/1.0 (robots-parser)' },
19
+ redirect: 'follow',
20
+ });
21
+ }
22
+ finally {
23
+ clearTimeout(timer);
24
+ }
25
+ }
26
+ /**
27
+ * Parse robots.txt content into structured directives
28
+ */
29
+ function parseRobotsTxt(raw) {
30
+ const directives = [];
31
+ const sitemapUrls = [];
32
+ let current = null;
33
+ for (const rawLine of raw.split('\n')) {
34
+ // Strip comments and whitespace
35
+ const commentIdx = rawLine.indexOf('#');
36
+ const line = (commentIdx !== -1 ? rawLine.substring(0, commentIdx) : rawLine).trim();
37
+ if (!line)
38
+ continue;
39
+ const colonIdx = line.indexOf(':');
40
+ if (colonIdx === -1)
41
+ continue;
42
+ const field = line.substring(0, colonIdx).trim().toLowerCase();
43
+ const value = line.substring(colonIdx + 1).trim();
44
+ if (field === 'user-agent') {
45
+ // Start a new directive block
46
+ current = {
47
+ userAgent: value,
48
+ allow: [],
49
+ disallow: [],
50
+ };
51
+ directives.push(current);
52
+ continue;
53
+ }
54
+ if (field === 'sitemap') {
55
+ // Sitemap directives are global, not per user-agent
56
+ if (value)
57
+ sitemapUrls.push(value);
58
+ continue;
59
+ }
60
+ if (!current)
61
+ continue;
62
+ switch (field) {
63
+ case 'allow':
64
+ if (value)
65
+ current.allow.push(value);
66
+ break;
67
+ case 'disallow':
68
+ if (value)
69
+ current.disallow.push(value);
70
+ break;
71
+ case 'crawl-delay': {
72
+ const delay = parseFloat(value);
73
+ if (!Number.isNaN(delay) && delay >= 0) {
74
+ current.crawlDelay = delay;
75
+ }
76
+ break;
77
+ }
78
+ }
79
+ }
80
+ return { directives, sitemapUrls };
81
+ }
82
+ /**
83
+ * Check whether a path is allowed by the robots.txt directives.
84
+ * Uses wildcard user-agent (*) rules if no specific match.
85
+ */
86
+ export function isPathAllowed(path, directives, userAgent = '*') {
87
+ // Find matching directive (specific UA first, then wildcard)
88
+ const specific = directives.find((d) => d.userAgent.toLowerCase() === userAgent.toLowerCase());
89
+ const wildcard = directives.find((d) => d.userAgent === '*');
90
+ const directive = specific ?? wildcard;
91
+ if (!directive)
92
+ return true; // No rules = allowed
93
+ // Check disallow rules — longest match wins
94
+ let longestDisallow = 0;
95
+ let longestAllow = 0;
96
+ for (const rule of directive.disallow) {
97
+ if (path.startsWith(rule) && rule.length > longestDisallow) {
98
+ longestDisallow = rule.length;
99
+ }
100
+ }
101
+ for (const rule of directive.allow) {
102
+ if (path.startsWith(rule) && rule.length > longestAllow) {
103
+ longestAllow = rule.length;
104
+ }
105
+ }
106
+ // If allow is longer or equal, path is allowed
107
+ if (longestAllow >= longestDisallow)
108
+ return true;
109
+ return longestDisallow === 0;
110
+ }
111
+ /**
112
+ * Fetch and parse robots.txt for a site
113
+ */
114
+ export async function parseRobots(baseUrl) {
115
+ const url = new URL('/robots.txt', baseUrl).href;
116
+ try {
117
+ const response = await fetchWithTimeout(url);
118
+ if (!response.ok) {
119
+ return {
120
+ found: false,
121
+ directives: [],
122
+ sitemapUrls: [],
123
+ raw: '',
124
+ error: `HTTP ${response.status}`,
125
+ };
126
+ }
127
+ const contentType = response.headers.get('content-type') ?? '';
128
+ // Robots.txt should be text/plain; skip if HTML (common 404 page)
129
+ if (contentType.includes('text/html')) {
130
+ return {
131
+ found: false,
132
+ directives: [],
133
+ sitemapUrls: [],
134
+ raw: '',
135
+ error: 'Response was HTML, not robots.txt',
136
+ };
137
+ }
138
+ const raw = await response.text();
139
+ const { directives, sitemapUrls } = parseRobotsTxt(raw);
140
+ return {
141
+ found: true,
142
+ directives,
143
+ sitemapUrls,
144
+ raw,
145
+ };
146
+ }
147
+ catch (error) {
148
+ const message = error instanceof Error ? error.message : 'Unknown error';
149
+ return {
150
+ found: false,
151
+ directives: [],
152
+ sitemapUrls: [],
153
+ raw: '',
154
+ error: message,
155
+ };
156
+ }
157
+ }
158
+ //# sourceMappingURL=robots.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"robots.js","sourceRoot":"","sources":["../../src/recon/robots.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH,yBAAyB;AACzB,MAAM,gBAAgB,GAAG,MAAM,CAAC;AAEhC;;GAEG;AACH,KAAK,UAAU,gBAAgB,CAC7B,GAAW,EACX,YAAoB,gBAAgB;IAEpC,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;IACzC,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,SAAS,CAAC,CAAC;IAC9D,IAAI,CAAC;QACH,OAAO,MAAM,KAAK,CAAC,GAAG,EAAE;YACtB,MAAM,EAAE,UAAU,CAAC,MAAM;YACzB,OAAO,EAAE,EAAE,YAAY,EAAE,gCAAgC,EAAE;YAC3D,QAAQ,EAAE,QAAQ;SACnB,CAAC,CAAC;IACL,CAAC;YAAS,CAAC;QACT,YAAY,CAAC,KAAK,CAAC,CAAC;IACtB,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CACrB,GAAW;IAEX,MAAM,UAAU,GAAsB,EAAE,CAAC;IACzC,MAAM,WAAW,GAAa,EAAE,CAAC;IAEjC,IAAI,OAAO,GAA2B,IAAI,CAAC;IAE3C,KAAK,MAAM,OAAO,IAAI,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;QACtC,gCAAgC;QAChC,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACxC,MAAM,IAAI,GAAG,CAAC,UAAU,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;QACrF,IAAI,CAAC,IAAI;YAAE,SAAS;QAEpB,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACnC,IAAI,QAAQ,KAAK,CAAC,CAAC;YAAE,SAAS;QAE9B,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAC/D,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAElD,IAAI,KAAK,KAAK,YAAY,EAAE,CAAC;YAC3B,8BAA8B;YAC9B,OAAO,GAAG;gBACR,SAAS,EAAE,KAAK;gBAChB,KAAK,EAAE,EAAE;gBACT,QAAQ,EAAE,EAAE;aACb,CAAC;YACF,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACzB,SAAS;QACX,CAAC;QAED,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;YACxB,oDAAoD;YACpD,IAAI,KAAK;gBAAE,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACnC,SAAS;QACX,CAAC;QAED,IAAI,CAAC,OAAO;YAAE,SAAS;QAEvB,QAAQ,KAAK,EAAE,CAAC;YACd,KAAK,OAAO;gBACV,IAAI,KAAK;oBAAE,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBACrC,MAAM;YACR,KAAK,UAAU;gBACb,IAAI,KAAK;oBAAE,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBACxC,MAAM;YACR,KAAK,aAAa,CAAC,CAAC,CAAC;gBACnB,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC;gBAChC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,KAAK,IAAI,CAAC,EAAE,CAAC;oBACvC,OAAO,CAAC,UAAU,GAAG,KAAK,CAAC;gBAC7B,CAAC;gBACD,MAAM;YACR,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,EAAE,UAAU,EAAE,WAAW,EAAE,CAAC;AACrC,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,aAAa,CAC3B,IAAY,EACZ,UAA6B,EAC7B,YAAoB,GAAG;IAEvB,6DAA6D;IAC7D,MAAM,QAAQ,GAAG,UAAU,CAAC,IAAI,CAC9B,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,WAAW,EAAE,KAAK,SAAS,CAAC,WAAW,EAAE,CAC7D,CAAC;IACF,MAAM,QAAQ,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,GAAG,CAAC,CAAC;IAC7D,MAAM,SAAS,GAAG,QAAQ,IAAI,QAAQ,CAAC;IAEvC,IAAI,CAAC,SAAS;QAAE,OAAO,IAAI,CAAC,CAAC,qBAAqB;IAElD,4CAA4C;IAC5C,IAAI,eAAe,GAAG,CAAC,CAAC;IACxB,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,KAAK,MAAM,IAAI,IAAI,SAAS,CAAC,QAAQ,EAAE,CAAC;QACtC,IAAI,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,eAAe,EAAE,CAAC;YAC3D,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC;QAChC,CAAC;IACH,CAAC;IAED,KAAK,MAAM,IAAI,IAAI,SAAS,CAAC,KAAK,EAAE,CAAC;QACnC,IAAI,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,YAAY,EAAE,CAAC;YACxD,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC;QAC7B,CAAC;IACH,CAAC;IAED,+CAA+C;IAC/C,IAAI,YAAY,IAAI,eAAe;QAAE,OAAO,IAAI,CAAC;IACjD,OAAO,eAAe,KAAK,CAAC,CAAC;AAC/B,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,OAAe;IAC/C,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;IAEjD,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,gBAAgB,CAAC,GAAG,CAAC,CAAC;QAE7C,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,OAAO;gBACL,KAAK,EAAE,KAAK;gBACZ,UAAU,EAAE,EAAE;gBACd,WAAW,EAAE,EAAE;gBACf,GAAG,EAAE,EAAE;gBACP,KAAK,EAAE,QAAQ,QAAQ,CAAC,MAAM,EAAE;aACjC,CAAC;QACJ,CAAC;QAED,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;QAC/D,kEAAkE;QAClE,IAAI,WAAW,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;YACtC,OAAO;gBACL,KAAK,EAAE,KAAK;gBACZ,UAAU,EAAE,EAAE;gBACd,WAAW,EAAE,EAAE;gBACf,GAAG,EAAE,EAAE;gBACP,KAAK,EAAE,mCAAmC;aAC3C,CAAC;QACJ,CAAC;QAED,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QAClC,MAAM,EAAE,UAAU,EAAE,WAAW,EAAE,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC;QAExD,OAAO;YACL,KAAK,EAAE,IAAI;YACX,UAAU;YACV,WAAW;YACX,GAAG;SACJ,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;QACzE,OAAO;YACL,KAAK,EAAE,KAAK;YACZ,UAAU,EAAE,EAAE;YACd,WAAW,EAAE,EAAE;YACf,GAAG,EAAE,EAAE;YACP,KAAK,EAAE,OAAO;SACf,CAAC;IACJ,CAAC;AACH,CAAC"}
@@ -0,0 +1,25 @@
1
+ /**
2
+ * Route Discovery
3
+ *
4
+ * Builds a route graph from all recon sources: sitemap, HTML links,
5
+ * form actions. Deduplicates and assigns preliminary priorities using
6
+ * the scoring algorithm from the technical plan.
7
+ */
8
+ import type { Route, SitemapEntry, RobotsDirective } from '../core/types/recon.js';
9
+ export interface RouteDiscoveryInput {
10
+ /** Base URL of the site */
11
+ baseUrl: string;
12
+ /** Root page HTML for link extraction */
13
+ rootHtml: string;
14
+ /** Sitemap entries (if any) */
15
+ sitemapEntries: SitemapEntry[];
16
+ /** Robots.txt directives for filtering */
17
+ robotsDirectives: RobotsDirective[];
18
+ /** Whether to respect robots.txt disallow rules */
19
+ respectRobotsTxt: boolean;
20
+ }
21
+ /**
22
+ * Discover all routes from sitemap, HTML, and form actions.
23
+ * Deduplicates, classifies, scores, and sorts by priority.
24
+ */
25
+ export declare function discoverRoutes(input: RouteDiscoveryInput): Route[];
@@ -0,0 +1,303 @@
1
+ /**
2
+ * Route Discovery
3
+ *
4
+ * Builds a route graph from all recon sources: sitemap, HTML links,
5
+ * form actions. Deduplicates and assigns preliminary priorities using
6
+ * the scoring algorithm from the technical plan.
7
+ */
8
+ import * as cheerio from 'cheerio';
9
+ import { isPathAllowed } from './robots.js';
10
+ /**
11
+ * Page type heuristics based on URL patterns
12
+ */
13
+ const PAGE_TYPE_PATTERNS = [
14
+ { pattern: /\/(search|find|browse|discover)\b/i, type: 'search' },
15
+ { pattern: /[?&]q=/i, type: 'search' },
16
+ { pattern: /\/(results|listings|products|catalog)\b/i, type: 'listing-results' },
17
+ { pattern: /\/(product|item|detail)\//i, type: 'detail-page' },
18
+ { pattern: /\/(cart|basket)\b/i, type: 'checkout-cart' },
19
+ { pattern: /\/(checkout|pay|payment|purchase)\b/i, type: 'checkout-payment' },
20
+ { pattern: /\/(account|profile|my-?account|user)\b/i, type: 'account-management' },
21
+ { pattern: /\/(settings|preferences|config)\b/i, type: 'settings' },
22
+ { pattern: /\/(login|signin|sign-in|auth|register|signup|sign-up)\b/i, type: 'authentication' },
23
+ { pattern: /\/(dashboard|admin|overview)\b/i, type: 'dashboard' },
24
+ { pattern: /\/(contact|contact-us|reach-us)\b/i, type: 'contact' },
25
+ { pattern: /\/(help|support|faq|docs|documentation|knowledge-?base)\b/i, type: 'help-support' },
26
+ { pattern: /\/(blog|article|post|news|press)\b/i, type: 'content-article' },
27
+ { pattern: /\/(about|team|careers|company)\b/i, type: 'content-article' },
28
+ ];
29
+ /**
30
+ * Page type scores for priority calculation
31
+ */
32
+ const PAGE_TYPE_SCORES = {
33
+ search: 25,
34
+ 'checkout-payment': 25,
35
+ 'checkout-cart': 20,
36
+ 'listing-results': 20,
37
+ homepage: 20,
38
+ 'detail-page': 15,
39
+ 'account-management': 15,
40
+ 'form-submission': 15,
41
+ settings: 10,
42
+ authentication: 10,
43
+ dashboard: 10,
44
+ contact: 10,
45
+ 'help-support': 5,
46
+ 'content-article': 3,
47
+ unknown: 10,
48
+ };
49
+ /**
50
+ * Classify a URL into a page type based on URL patterns
51
+ */
52
+ function classifyPageType(url) {
53
+ const pathname = new URL(url).pathname;
54
+ // Root path = homepage
55
+ if (pathname === '/' || pathname === '')
56
+ return 'homepage';
57
+ for (const { pattern, type } of PAGE_TYPE_PATTERNS) {
58
+ if (pattern.test(pathname))
59
+ return type;
60
+ }
61
+ return 'unknown';
62
+ }
63
+ /**
64
+ * Calculate priority score for a route (higher = more important)
65
+ */
66
+ function calculatePriorityScore(route) {
67
+ let score = 0;
68
+ // Forms are the #1 signal for agentic value
69
+ if (route.hasForm)
70
+ score += 30;
71
+ // Page type scores
72
+ score += PAGE_TYPE_SCORES[route.estimatedPageType] ?? 0;
73
+ // Depth penalty (deeper = less important)
74
+ score -= route.depth * 3;
75
+ // Interactive elements bonus
76
+ if (route.hasInteractiveElements)
77
+ score += 10;
78
+ return Math.max(0, score);
79
+ }
80
+ /**
81
+ * Convert priority score to priority label
82
+ */
83
+ function scoreToPriority(score) {
84
+ if (score >= 40)
85
+ return 'critical';
86
+ if (score >= 25)
87
+ return 'high';
88
+ if (score >= 15)
89
+ return 'medium';
90
+ if (score >= 5)
91
+ return 'low';
92
+ return 'skip';
93
+ }
94
+ /**
95
+ * Check if a URL belongs to the same origin
96
+ */
97
+ function isSameOrigin(url, baseOrigin) {
98
+ try {
99
+ return new URL(url).origin === baseOrigin;
100
+ }
101
+ catch {
102
+ return false;
103
+ }
104
+ }
105
+ /**
106
+ * Normalize a URL for deduplication (strip trailing slash, hash, sort params)
107
+ */
108
+ function normalizeUrl(url) {
109
+ try {
110
+ const parsed = new URL(url);
111
+ // Remove hash
112
+ parsed.hash = '';
113
+ // Sort search params for consistent comparison
114
+ parsed.searchParams.sort();
115
+ // Remove trailing slash (but keep root /)
116
+ let pathname = parsed.pathname;
117
+ if (pathname.length > 1 && pathname.endsWith('/')) {
118
+ pathname = pathname.slice(0, -1);
119
+ }
120
+ parsed.pathname = pathname;
121
+ return parsed.href;
122
+ }
123
+ catch {
124
+ return url;
125
+ }
126
+ }
127
+ /**
128
+ * Extract links from HTML that are same-origin
129
+ */
130
+ function extractHtmlLinks(html, baseUrl) {
131
+ const $ = cheerio.load(html);
132
+ const baseOrigin = new URL(baseUrl).origin;
133
+ const links = [];
134
+ // Navigation links (<a> in nav, header)
135
+ $('nav a[href], header a[href]').each((_i, el) => {
136
+ const href = $(el).attr('href');
137
+ if (!href)
138
+ return;
139
+ try {
140
+ const resolved = new URL(href, baseUrl).href;
141
+ if (isSameOrigin(resolved, baseOrigin)) {
142
+ links.push({ url: resolved, source: 'navigation', hasForm: false });
143
+ }
144
+ }
145
+ catch {
146
+ // Invalid URL — skip
147
+ }
148
+ });
149
+ // All other links
150
+ $('a[href]').each((_i, el) => {
151
+ const href = $(el).attr('href');
152
+ if (!href)
153
+ return;
154
+ // Skip anchor-only, javascript:, mailto:, tel:
155
+ if (href.startsWith('#') || href.startsWith('javascript:') ||
156
+ href.startsWith('mailto:') || href.startsWith('tel:'))
157
+ return;
158
+ try {
159
+ const resolved = new URL(href, baseUrl).href;
160
+ if (isSameOrigin(resolved, baseOrigin)) {
161
+ links.push({ url: resolved, source: 'link', hasForm: false });
162
+ }
163
+ }
164
+ catch {
165
+ // Invalid URL — skip
166
+ }
167
+ });
168
+ // Form actions
169
+ $('form[action]').each((_i, el) => {
170
+ const action = $(el).attr('action');
171
+ if (!action)
172
+ return;
173
+ try {
174
+ const resolved = new URL(action, baseUrl).href;
175
+ if (isSameOrigin(resolved, baseOrigin)) {
176
+ links.push({ url: resolved, source: 'form-action', hasForm: true });
177
+ }
178
+ }
179
+ catch {
180
+ // Invalid URL — skip
181
+ }
182
+ });
183
+ return links;
184
+ }
185
+ /**
186
+ * Check if HTML contains form or interactive elements at a URL
187
+ * This is a lightweight heuristic — actual forms are detected during audit
188
+ */
189
+ function htmlHasInteractiveElements(html) {
190
+ const $ = cheerio.load(html);
191
+ // Check for interactive elements beyond forms
192
+ return ($('button').length > 0 ||
193
+ $('[role="button"]').length > 0 ||
194
+ $('[onclick]').length > 0 ||
195
+ $('[data-action]').length > 0 ||
196
+ $('details').length > 0 ||
197
+ $('[role="tab"]').length > 0);
198
+ }
199
+ /**
200
+ * Discover all routes from sitemap, HTML, and form actions.
201
+ * Deduplicates, classifies, scores, and sorts by priority.
202
+ */
203
+ export function discoverRoutes(input) {
204
+ const { baseUrl, rootHtml, sitemapEntries, robotsDirectives, respectRobotsTxt } = input;
205
+ const baseOrigin = new URL(baseUrl).origin;
206
+ // Map: normalizedUrl -> Route
207
+ const routeMap = new Map();
208
+ /**
209
+ * Add or update a route in the map. First source wins,
210
+ * but some properties are merged.
211
+ */
212
+ function addRoute(url, source, depth, extras) {
213
+ const normalized = normalizeUrl(url);
214
+ // Skip non-same-origin
215
+ if (!isSameOrigin(normalized, baseOrigin))
216
+ return;
217
+ // Respect robots.txt
218
+ if (respectRobotsTxt && robotsDirectives.length > 0) {
219
+ const pathname = new URL(normalized).pathname;
220
+ if (!isPathAllowed(pathname, robotsDirectives))
221
+ return;
222
+ }
223
+ // Skip known non-page resources
224
+ const pathname = new URL(normalized).pathname.toLowerCase();
225
+ if (/\.(jpg|jpeg|png|gif|svg|webp|ico|css|js|woff|woff2|ttf|eot|pdf|zip|tar|gz)$/i.test(pathname)) {
226
+ return;
227
+ }
228
+ const existing = routeMap.get(normalized);
229
+ if (existing) {
230
+ // Merge: use lower depth, merge sitemap data, merge hasForm
231
+ if (depth < existing.depth)
232
+ existing.depth = depth;
233
+ if (extras?.hasForm)
234
+ existing.hasForm = true;
235
+ if (extras?.lastmod && !existing.lastmod)
236
+ existing.lastmod = extras.lastmod;
237
+ if (extras?.changefreq && !existing.changefreq)
238
+ existing.changefreq = extras.changefreq;
239
+ if (extras?.sitemapPriority != null && existing.sitemapPriority == null) {
240
+ existing.sitemapPriority = extras.sitemapPriority;
241
+ }
242
+ return;
243
+ }
244
+ const estimatedPageType = classifyPageType(normalized);
245
+ const hasForm = extras?.hasForm ?? false;
246
+ const route = {
247
+ url: normalized,
248
+ source,
249
+ depth,
250
+ hasForm,
251
+ hasInteractiveElements: false, // Updated later from HTML analysis
252
+ estimatedPageType,
253
+ priority: 'medium', // Calculated after all properties set
254
+ priorityScore: 0,
255
+ lastmod: extras?.lastmod,
256
+ changefreq: extras?.changefreq,
257
+ sitemapPriority: extras?.sitemapPriority,
258
+ };
259
+ routeMap.set(normalized, route);
260
+ }
261
+ // 1. Add root URL
262
+ addRoute(baseUrl, 'navigation', 0, { hasForm: false });
263
+ // 2. Add sitemap entries
264
+ for (const entry of sitemapEntries) {
265
+ try {
266
+ const entryUrl = new URL(entry.loc, baseUrl).href;
267
+ addRoute(entryUrl, 'sitemap', 1, {
268
+ lastmod: entry.lastmod,
269
+ changefreq: entry.changefreq,
270
+ sitemapPriority: entry.priority,
271
+ });
272
+ }
273
+ catch {
274
+ // Invalid sitemap URL — skip
275
+ }
276
+ }
277
+ // 3. Extract links from root HTML
278
+ const htmlLinks = extractHtmlLinks(rootHtml, baseUrl);
279
+ for (const link of htmlLinks) {
280
+ addRoute(link.url, link.source, 1, { hasForm: link.hasForm });
281
+ }
282
+ // 4. Check root HTML for interactive elements
283
+ const rootHasInteractive = htmlHasInteractiveElements(rootHtml);
284
+ const rootNormalized = normalizeUrl(baseUrl);
285
+ const rootRoute = routeMap.get(rootNormalized);
286
+ if (rootRoute) {
287
+ rootRoute.hasInteractiveElements = rootHasInteractive;
288
+ // Root page likely has forms if the HTML has them
289
+ const $ = cheerio.load(rootHtml);
290
+ if ($('form').length > 0) {
291
+ rootRoute.hasForm = true;
292
+ }
293
+ }
294
+ // 5. Calculate priority scores for all routes
295
+ for (const route of routeMap.values()) {
296
+ route.priorityScore = calculatePriorityScore(route);
297
+ route.priority = scoreToPriority(route.priorityScore);
298
+ }
299
+ // 6. Sort by priority score (descending)
300
+ const routes = [...routeMap.values()].sort((a, b) => b.priorityScore - a.priorityScore);
301
+ return routes;
302
+ }
303
+ //# sourceMappingURL=route-discovery.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"route-discovery.js","sourceRoot":"","sources":["../../src/recon/route-discovery.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AASnC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C;;GAEG;AACH,MAAM,kBAAkB,GAA0C;IAChE,EAAE,OAAO,EAAE,oCAAoC,EAAE,IAAI,EAAE,QAAQ,EAAE;IACjE,EAAE,OAAO,EAAE,SAAS,EAAE,IAAI,EAAE,QAAQ,EAAE;IACtC,EAAE,OAAO,EAAE,0CAA0C,EAAE,IAAI,EAAE,iBAAiB,EAAE;IAChF,EAAE,OAAO,EAAE,4BAA4B,EAAE,IAAI,EAAE,aAAa,EAAE;IAC9D,EAAE,OAAO,EAAE,oBAAoB,EAAE,IAAI,EAAE,eAAe,EAAE;IACxD,EAAE,OAAO,EAAE,sCAAsC,EAAE,IAAI,EAAE,kBAAkB,EAAE;IAC7E,EAAE,OAAO,EAAE,yCAAyC,EAAE,IAAI,EAAE,oBAAoB,EAAE;IAClF,EAAE,OAAO,EAAE,oCAAoC,EAAE,IAAI,EAAE,UAAU,EAAE;IACnE,EAAE,OAAO,EAAE,0DAA0D,EAAE,IAAI,EAAE,gBAAgB,EAAE;IAC/F,EAAE,OAAO,EAAE,iCAAiC,EAAE,IAAI,EAAE,WAAW,EAAE;IACjE,EAAE,OAAO,EAAE,oCAAoC,EAAE,IAAI,EAAE,SAAS,EAAE;IAClE,EAAE,OAAO,EAAE,4DAA4D,EAAE,IAAI,EAAE,cAAc,EAAE;IAC/F,EAAE,OAAO,EAAE,qCAAqC,EAAE,IAAI,EAAE,iBAAiB,EAAE;IAC3E,EAAE,OAAO,EAAE,mCAAmC,EAAE,IAAI,EAAE,iBAAiB,EAAE;CAC1E,CAAC;AAEF;;GAEG;AACH,MAAM,gBAAgB,GAA6B;IACjD,MAAM,EAAE,EAAE;IACV,kBAAkB,EAAE,EAAE;IACtB,eAAe,EAAE,EAAE;IACnB,iBAAiB,EAAE,EAAE;IACrB,QAAQ,EAAE,EAAE;IACZ,aAAa,EAAE,EAAE;IACjB,oBAAoB,EAAE,EAAE;IACxB,iBAAiB,EAAE,EAAE;IACrB,QAAQ,EAAE,EAAE;IACZ,cAAc,EAAE,EAAE;IAClB,SAAS,EAAE,EAAE;IACb,OAAO,EAAE,EAAE;IACX,cAAc,EAAE,CAAC;IACjB,iBAAiB,EAAE,CAAC;IACpB,OAAO,EAAE,EAAE;CACZ,CAAC;AAEF;;GAEG;AACH,SAAS,gBAAgB,CAAC,GAAW;IACnC,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;IAEvC,uBAAuB;IACvB,IAAI,QAAQ,KAAK,GAAG,IAAI,QAAQ,KAAK,EAAE;QAAE,OAAO,UAAU,CAAC;IAE3D,KAAK,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,kBAAkB,EAAE,CAAC;QACnD,IAAI,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC;YAAE,OAAO,IAAI,CAAC;IAC1C,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,SAAS,sBAAsB,CAAC,KAAwF;IACtH,IAAI,KAAK,GAAG,CAAC,CAAC;IAEd,4CAA4C;IAC5C,IAAI,KAAK,CAAC,OAAO;QAAE,KAAK,IAAI,EAAE,CAAC;IAE/B,mBAAmB;IACnB,KAAK,IAAI,gBAAgB,CAAC,KAAK,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC;IAExD,0CAA0C;IAC1C,KAAK,IAAI,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC;IAEzB,6BAA6B;IAC7B,IAAI,KAAK,CAAC,sBAAsB;QAAE,KAAK,IAAI,EAAE,CAAC;IAE9C,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;AAC5B,CAAC;AAED;;GAEG;AACH,SAAS,eAAe,CAAC,KAAa;IACpC,IAAI,KAAK,IAAI,EAAE;QAAE,OAAO,UAAU,CAAC;IACnC,IAAI,KAAK,IAAI,EAAE;QAAE,OAAO,MAAM,CAAC;IAC/B,IAAI,KAAK,IAAI,EAAE;QAAE,OAAO,QAAQ,CAAC;IACjC,IAAI,KAAK,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC;IAC7B,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,YAAY,CAAC,GAAW,EAAE,UAAkB;IACnD,IAAI,CAAC;QACH,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,MAAM,KAAK,UAAU,CAAC;IAC5C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,YAAY,CAAC,GAAW;IAC/B,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAC5B,cAAc;QACd,MAAM,CAAC,IAAI,GAAG,EAAE,CAAC;QACjB,+CAA+C;QAC/C,MAAM,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC;QAC3B,0CAA0C;QAC1C,IAAI,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC;QAC/B,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YAClD,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QACnC,CAAC;QACD,MAAM,CAAC,QAAQ,GAAG,QAAQ,CAAC;QAC3B,OAAO,MAAM,CAAC,IAAI,CAAC;IACrB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,GAAG,CAAC;IACb,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CACvB,IAAY,EACZ,OAAe;IAEf,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;IAC3C,MAAM,KAAK,GAA6D,EAAE,CAAC;IAE3E,wCAAwC;IACxC,CAAC,CAAC,6BAA6B,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE;QAC/C,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAChC,IAAI,CAAC,IAAI;YAAE,OAAO;QAClB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;YAC7C,IAAI,YAAY,CAAC,QAAQ,EAAE,UAAU,CAAC,EAAE,CAAC;gBACvC,KAAK,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,EAAE,YAAY,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;YACtE,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,qBAAqB;QACvB,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,kBAAkB;IAClB,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE;QAC3B,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAChC,IAAI,CAAC,IAAI;YAAE,OAAO;QAClB,+CAA+C;QAC/C,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;YACtD,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC;YAAE,OAAO;QAClE,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;YAC7C,IAAI,YAAY,CAAC,QAAQ,EAAE,UAAU,CAAC,EAAE,CAAC;gBACvC,KAAK,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;YAChE,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,qBAAqB;QACvB,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,eAAe;IACf,CAAC,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE;QAChC,MAAM,MAAM,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACpC,IAAI,CAAC,MAAM;YAAE,OAAO;QACpB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;YAC/C,IAAI,YAAY,CAAC,QAAQ,EAAE,UAAU,CAAC,EAAE,CAAC;gBACvC,KAAK,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,EAAE,aAAa,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;YACtE,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,qBAAqB;QACvB,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;GAGG;AACH,SAAS,0BAA0B,CAAC,IAAY;IAC9C,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,8CAA8C;IAC9C,OAAO,CACL,CAAC,CAAC,QAAQ,CAAC,CAAC,MAAM,GAAG,CAAC;QACtB,CAAC,CAAC,iBAAiB,CAAC,CAAC,MAAM,GAAG,CAAC;QAC/B,CAAC,CAAC,WAAW,CAAC,CAAC,MAAM,GAAG,CAAC;QACzB,CAAC,CAAC,eAAe,CAAC,CAAC,MAAM,GAAG,CAAC;QAC7B,CAAC,CAAC,SAAS,CAAC,CAAC,MAAM,GAAG,CAAC;QACvB,CAAC,CAAC,cAAc,CAAC,CAAC,MAAM,GAAG,CAAC,CAC7B,CAAC;AACJ,CAAC;AAmBD;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,KAA0B;IACvD,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,cAAc,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,GAAG,KAAK,CAAC;IACxF,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;IAE3C,8BAA8B;IAC9B,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAiB,CAAC;IAE1C;;;OAGG;IACH,SAAS,QAAQ,CACf,GAAW,EACX,MAAmB,EACnB,KAAa,EACb,MAKC;QAED,MAAM,UAAU,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;QAErC,uBAAuB;QACvB,IAAI,CAAC,YAAY,CAAC,UAAU,EAAE,UAAU,CAAC;YAAE,OAAO;QAElD,qBAAqB;QACrB,IAAI,gBAAgB,IAAI,gBAAgB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpD,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,CAAC,QAAQ,CAAC;YAC9C,IAAI,CAAC,aAAa,CAAC,QAAQ,EAAE,gBAAgB,CAAC;gBAAE,OAAO;QACzD,CAAC;QAED,gCAAgC;QAChC,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;QAC5D,IAAI,8EAA8E,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;YAClG,OAAO;QACT,CAAC;QAED,MAAM,QAAQ,GAAG,QAAQ,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QAC1C,IAAI,QAAQ,EAAE,CAAC;YACb,4DAA4D;YAC5D,IAAI,KAAK,GAAG,QAAQ,CAAC,KAAK;gBAAE,QAAQ,CAAC,KAAK,GAAG,KAAK,CAAC;YACnD,IAAI,MAAM,EAAE,OAAO;gBAAE,QAAQ,CAAC,OAAO,GAAG,IAAI,CAAC;YAC7C,IAAI,MAAM,EAAE,OAAO,IAAI,CAAC,QAAQ,CAAC,OAAO;gBAAE,QAAQ,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC;YAC5E,IAAI,MAAM,EAAE,UAAU,IAAI,CAAC,QAAQ,CAAC,UAAU;gBAAE,QAAQ,CAAC,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;YACxF,IAAI,MAAM,EAAE,eAAe,IAAI,IAAI,IAAI,QAAQ,CAAC,eAAe,IAAI,IAAI,EAAE,CAAC;gBACxE,QAAQ,CAAC,eAAe,GAAG,MAAM,CAAC,eAAe,CAAC;YACpD,CAAC;YACD,OAAO;QACT,CAAC;QAED,MAAM,iBAAiB,GAAG,gBAAgB,CAAC,UAAU,CAAC,CAAC;QACvD,MAAM,OAAO,GAAG,MAAM,EAAE,OAAO,IAAI,KAAK,CAAC;QAEzC,MAAM,KAAK,GAAU;YACnB,GAAG,EAAE,UAAU;YACf,MAAM;YACN,KAAK;YACL,OAAO;YACP,sBAAsB,EAAE,KAAK,EAAE,mCAAmC;YAClE,iBAAiB;YACjB,QAAQ,EAAE,QAAQ,EAAE,sCAAsC;YAC1D,aAAa,EAAE,CAAC;YAChB,OAAO,EAAE,MAAM,EAAE,OAAO;YACxB,UAAU,EAAE,MAAM,EAAE,UAAU;YAC9B,eAAe,EAAE,MAAM,EAAE,eAAe;SACzC,CAAC;QAEF,QAAQ,CAAC,GAAG,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;IAClC,CAAC;IAED,kBAAkB;IAClB,QAAQ,CAAC,OAAO,EAAE,YAAY,EAAE,CAAC,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;IAEvD,yBAAyB;IACzB,KAAK,MAAM,KAAK,IAAI,cAAc,EAAE,CAAC;QACnC,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;YAClD,QAAQ,CAAC,QAAQ,EAAE,SAAS,EAAE,CAAC,EAAE;gBAC/B,OAAO,EAAE,KAAK,CAAC,OAAO;gBACtB,UAAU,EAAE,KAAK,CAAC,UAAU;gBAC5B,eAAe,EAAE,KAAK,CAAC,QAAQ;aAChC,CAAC,CAAC;QACL,CAAC;QAAC,MAAM,CAAC;YACP,6BAA6B;QAC/B,CAAC;IACH,CAAC;IAED,kCAAkC;IAClC,MAAM,SAAS,GAAG,gBAAgB,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IACtD,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;QAC7B,QAAQ,CAAC,IAAI,CAAC,GAAG,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC;IAChE,CAAC;IAED,8CAA8C;IAC9C,MAAM,kBAAkB,GAAG,0BAA0B,CAAC,QAAQ,CAAC,CAAC;IAChE,MAAM,cAAc,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;IAC7C,MAAM,SAAS,GAAG,QAAQ,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;IAC/C,IAAI,SAAS,EAAE,CAAC;QACd,SAAS,CAAC,sBAAsB,GAAG,kBAAkB,CAAC;QACtD,kDAAkD;QAClD,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACjC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,SAAS,CAAC,OAAO,GAAG,IAAI,CAAC;QAC3B,CAAC;IACH,CAAC;IAED,8CAA8C;IAC9C,KAAK,MAAM,KAAK,IAAI,QAAQ,CAAC,MAAM,EAAE,EAAE,CAAC;QACtC,KAAK,CAAC,aAAa,GAAG,sBAAsB,CAAC,KAAK,CAAC,CAAC;QACpD,KAAK,CAAC,QAAQ,GAAG,eAAe,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;IACxD,CAAC;IAED,yCAAyC;IACzC,MAAM,MAAM,GAAG,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC,IAAI,CACxC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,GAAG,CAAC,CAAC,aAAa,CAC5C,CAAC;IAEF,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -0,0 +1,12 @@
1
+ /**
2
+ * Sitemap Parser
3
+ *
4
+ * Fetches and parses sitemap.xml and sitemap index files.
5
+ * Uses native fetch. Handles missing/malformed sitemaps gracefully.
6
+ */
7
+ import type { SitemapResult } from '../core/types/recon.js';
8
+ /**
9
+ * Fetch and parse a sitemap from a URL.
10
+ * Handles sitemap index files by recursively fetching children.
11
+ */
12
+ export declare function parseSitemap(baseUrl: string, sitemapUrl?: string): Promise<SitemapResult>;