@dotsetlabs/bellwether 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (403) hide show
  1. package/CHANGELOG.md +291 -0
  2. package/LICENSE +21 -0
  3. package/README.md +739 -0
  4. package/dist/auth/credentials.d.ts +64 -0
  5. package/dist/auth/credentials.js +218 -0
  6. package/dist/auth/index.d.ts +6 -0
  7. package/dist/auth/index.js +6 -0
  8. package/dist/auth/keychain.d.ts +64 -0
  9. package/dist/auth/keychain.js +268 -0
  10. package/dist/baseline/ab-testing.d.ts +80 -0
  11. package/dist/baseline/ab-testing.js +236 -0
  12. package/dist/baseline/ai-compatibility-scorer.d.ts +95 -0
  13. package/dist/baseline/ai-compatibility-scorer.js +606 -0
  14. package/dist/baseline/calibration.d.ts +77 -0
  15. package/dist/baseline/calibration.js +136 -0
  16. package/dist/baseline/category-matching.d.ts +85 -0
  17. package/dist/baseline/category-matching.js +289 -0
  18. package/dist/baseline/change-impact-analyzer.d.ts +98 -0
  19. package/dist/baseline/change-impact-analyzer.js +592 -0
  20. package/dist/baseline/comparator.d.ts +64 -0
  21. package/dist/baseline/comparator.js +916 -0
  22. package/dist/baseline/confidence.d.ts +55 -0
  23. package/dist/baseline/confidence.js +122 -0
  24. package/dist/baseline/converter.d.ts +61 -0
  25. package/dist/baseline/converter.js +585 -0
  26. package/dist/baseline/dependency-analyzer.d.ts +89 -0
  27. package/dist/baseline/dependency-analyzer.js +567 -0
  28. package/dist/baseline/deprecation-tracker.d.ts +133 -0
  29. package/dist/baseline/deprecation-tracker.js +322 -0
  30. package/dist/baseline/diff.d.ts +55 -0
  31. package/dist/baseline/diff.js +1584 -0
  32. package/dist/baseline/documentation-scorer.d.ts +205 -0
  33. package/dist/baseline/documentation-scorer.js +466 -0
  34. package/dist/baseline/embeddings.d.ts +118 -0
  35. package/dist/baseline/embeddings.js +251 -0
  36. package/dist/baseline/error-analyzer.d.ts +198 -0
  37. package/dist/baseline/error-analyzer.js +721 -0
  38. package/dist/baseline/evaluation/evaluator.d.ts +42 -0
  39. package/dist/baseline/evaluation/evaluator.js +323 -0
  40. package/dist/baseline/evaluation/expanded-dataset.d.ts +45 -0
  41. package/dist/baseline/evaluation/expanded-dataset.js +1164 -0
  42. package/dist/baseline/evaluation/golden-dataset.d.ts +58 -0
  43. package/dist/baseline/evaluation/golden-dataset.js +717 -0
  44. package/dist/baseline/evaluation/index.d.ts +15 -0
  45. package/dist/baseline/evaluation/index.js +15 -0
  46. package/dist/baseline/evaluation/types.d.ts +186 -0
  47. package/dist/baseline/evaluation/types.js +8 -0
  48. package/dist/baseline/external-dependency-detector.d.ts +181 -0
  49. package/dist/baseline/external-dependency-detector.js +524 -0
  50. package/dist/baseline/golden-output.d.ts +162 -0
  51. package/dist/baseline/golden-output.js +636 -0
  52. package/dist/baseline/health-scorer.d.ts +174 -0
  53. package/dist/baseline/health-scorer.js +451 -0
  54. package/dist/baseline/incremental-checker.d.ts +97 -0
  55. package/dist/baseline/incremental-checker.js +174 -0
  56. package/dist/baseline/index.d.ts +31 -0
  57. package/dist/baseline/index.js +42 -0
  58. package/dist/baseline/migration-generator.d.ts +137 -0
  59. package/dist/baseline/migration-generator.js +554 -0
  60. package/dist/baseline/migrations.d.ts +60 -0
  61. package/dist/baseline/migrations.js +197 -0
  62. package/dist/baseline/performance-tracker.d.ts +214 -0
  63. package/dist/baseline/performance-tracker.js +577 -0
  64. package/dist/baseline/pr-comment-generator.d.ts +117 -0
  65. package/dist/baseline/pr-comment-generator.js +546 -0
  66. package/dist/baseline/response-fingerprint.d.ts +127 -0
  67. package/dist/baseline/response-fingerprint.js +728 -0
  68. package/dist/baseline/response-schema-tracker.d.ts +129 -0
  69. package/dist/baseline/response-schema-tracker.js +420 -0
  70. package/dist/baseline/risk-scorer.d.ts +54 -0
  71. package/dist/baseline/risk-scorer.js +434 -0
  72. package/dist/baseline/saver.d.ts +89 -0
  73. package/dist/baseline/saver.js +554 -0
  74. package/dist/baseline/scenario-generator.d.ts +151 -0
  75. package/dist/baseline/scenario-generator.js +905 -0
  76. package/dist/baseline/schema-compare.d.ts +86 -0
  77. package/dist/baseline/schema-compare.js +557 -0
  78. package/dist/baseline/schema-evolution.d.ts +189 -0
  79. package/dist/baseline/schema-evolution.js +467 -0
  80. package/dist/baseline/semantic.d.ts +203 -0
  81. package/dist/baseline/semantic.js +908 -0
  82. package/dist/baseline/synonyms.d.ts +60 -0
  83. package/dist/baseline/synonyms.js +386 -0
  84. package/dist/baseline/telemetry.d.ts +165 -0
  85. package/dist/baseline/telemetry.js +294 -0
  86. package/dist/baseline/test-pruner.d.ts +120 -0
  87. package/dist/baseline/test-pruner.js +387 -0
  88. package/dist/baseline/types.d.ts +449 -0
  89. package/dist/baseline/types.js +5 -0
  90. package/dist/baseline/version.d.ts +138 -0
  91. package/dist/baseline/version.js +206 -0
  92. package/dist/cache/index.d.ts +5 -0
  93. package/dist/cache/index.js +5 -0
  94. package/dist/cache/response-cache.d.ts +151 -0
  95. package/dist/cache/response-cache.js +287 -0
  96. package/dist/ci/index.d.ts +60 -0
  97. package/dist/ci/index.js +342 -0
  98. package/dist/cli/commands/auth.d.ts +12 -0
  99. package/dist/cli/commands/auth.js +352 -0
  100. package/dist/cli/commands/badge.d.ts +3 -0
  101. package/dist/cli/commands/badge.js +74 -0
  102. package/dist/cli/commands/baseline-accept.d.ts +15 -0
  103. package/dist/cli/commands/baseline-accept.js +178 -0
  104. package/dist/cli/commands/baseline-migrate.d.ts +12 -0
  105. package/dist/cli/commands/baseline-migrate.js +164 -0
  106. package/dist/cli/commands/baseline.d.ts +14 -0
  107. package/dist/cli/commands/baseline.js +449 -0
  108. package/dist/cli/commands/beta.d.ts +10 -0
  109. package/dist/cli/commands/beta.js +231 -0
  110. package/dist/cli/commands/check.d.ts +11 -0
  111. package/dist/cli/commands/check.js +820 -0
  112. package/dist/cli/commands/cloud/badge.d.ts +3 -0
  113. package/dist/cli/commands/cloud/badge.js +74 -0
  114. package/dist/cli/commands/cloud/diff.d.ts +6 -0
  115. package/dist/cli/commands/cloud/diff.js +79 -0
  116. package/dist/cli/commands/cloud/history.d.ts +6 -0
  117. package/dist/cli/commands/cloud/history.js +102 -0
  118. package/dist/cli/commands/cloud/link.d.ts +9 -0
  119. package/dist/cli/commands/cloud/link.js +119 -0
  120. package/dist/cli/commands/cloud/login.d.ts +7 -0
  121. package/dist/cli/commands/cloud/login.js +499 -0
  122. package/dist/cli/commands/cloud/projects.d.ts +6 -0
  123. package/dist/cli/commands/cloud/projects.js +44 -0
  124. package/dist/cli/commands/cloud/shared.d.ts +7 -0
  125. package/dist/cli/commands/cloud/shared.js +42 -0
  126. package/dist/cli/commands/cloud/teams.d.ts +8 -0
  127. package/dist/cli/commands/cloud/teams.js +169 -0
  128. package/dist/cli/commands/cloud/upload.d.ts +8 -0
  129. package/dist/cli/commands/cloud/upload.js +181 -0
  130. package/dist/cli/commands/contract.d.ts +11 -0
  131. package/dist/cli/commands/contract.js +280 -0
  132. package/dist/cli/commands/discover.d.ts +3 -0
  133. package/dist/cli/commands/discover.js +82 -0
  134. package/dist/cli/commands/eval.d.ts +9 -0
  135. package/dist/cli/commands/eval.js +187 -0
  136. package/dist/cli/commands/explore.d.ts +11 -0
  137. package/dist/cli/commands/explore.js +437 -0
  138. package/dist/cli/commands/feedback.d.ts +9 -0
  139. package/dist/cli/commands/feedback.js +174 -0
  140. package/dist/cli/commands/golden.d.ts +12 -0
  141. package/dist/cli/commands/golden.js +407 -0
  142. package/dist/cli/commands/history.d.ts +10 -0
  143. package/dist/cli/commands/history.js +202 -0
  144. package/dist/cli/commands/init.d.ts +9 -0
  145. package/dist/cli/commands/init.js +219 -0
  146. package/dist/cli/commands/interview.d.ts +3 -0
  147. package/dist/cli/commands/interview.js +903 -0
  148. package/dist/cli/commands/link.d.ts +10 -0
  149. package/dist/cli/commands/link.js +169 -0
  150. package/dist/cli/commands/login.d.ts +7 -0
  151. package/dist/cli/commands/login.js +499 -0
  152. package/dist/cli/commands/preset.d.ts +33 -0
  153. package/dist/cli/commands/preset.js +297 -0
  154. package/dist/cli/commands/profile.d.ts +33 -0
  155. package/dist/cli/commands/profile.js +286 -0
  156. package/dist/cli/commands/registry.d.ts +11 -0
  157. package/dist/cli/commands/registry.js +146 -0
  158. package/dist/cli/commands/shared.d.ts +79 -0
  159. package/dist/cli/commands/shared.js +196 -0
  160. package/dist/cli/commands/teams.d.ts +8 -0
  161. package/dist/cli/commands/teams.js +169 -0
  162. package/dist/cli/commands/test.d.ts +9 -0
  163. package/dist/cli/commands/test.js +500 -0
  164. package/dist/cli/commands/upload.d.ts +8 -0
  165. package/dist/cli/commands/upload.js +223 -0
  166. package/dist/cli/commands/validate-config.d.ts +6 -0
  167. package/dist/cli/commands/validate-config.js +35 -0
  168. package/dist/cli/commands/verify.d.ts +11 -0
  169. package/dist/cli/commands/verify.js +283 -0
  170. package/dist/cli/commands/watch.d.ts +12 -0
  171. package/dist/cli/commands/watch.js +253 -0
  172. package/dist/cli/index.d.ts +3 -0
  173. package/dist/cli/index.js +178 -0
  174. package/dist/cli/interactive.d.ts +47 -0
  175. package/dist/cli/interactive.js +216 -0
  176. package/dist/cli/output/terminal-reporter.d.ts +19 -0
  177. package/dist/cli/output/terminal-reporter.js +104 -0
  178. package/dist/cli/output.d.ts +226 -0
  179. package/dist/cli/output.js +438 -0
  180. package/dist/cli/utils/env.d.ts +5 -0
  181. package/dist/cli/utils/env.js +14 -0
  182. package/dist/cli/utils/progress.d.ts +59 -0
  183. package/dist/cli/utils/progress.js +206 -0
  184. package/dist/cli/utils/server-context.d.ts +10 -0
  185. package/dist/cli/utils/server-context.js +36 -0
  186. package/dist/cloud/auth.d.ts +144 -0
  187. package/dist/cloud/auth.js +374 -0
  188. package/dist/cloud/client.d.ts +24 -0
  189. package/dist/cloud/client.js +65 -0
  190. package/dist/cloud/http-client.d.ts +38 -0
  191. package/dist/cloud/http-client.js +215 -0
  192. package/dist/cloud/index.d.ts +23 -0
  193. package/dist/cloud/index.js +25 -0
  194. package/dist/cloud/mock-client.d.ts +107 -0
  195. package/dist/cloud/mock-client.js +545 -0
  196. package/dist/cloud/types.d.ts +515 -0
  197. package/dist/cloud/types.js +15 -0
  198. package/dist/config/defaults.d.ts +160 -0
  199. package/dist/config/defaults.js +169 -0
  200. package/dist/config/loader.d.ts +24 -0
  201. package/dist/config/loader.js +122 -0
  202. package/dist/config/template.d.ts +42 -0
  203. package/dist/config/template.js +647 -0
  204. package/dist/config/validator.d.ts +2112 -0
  205. package/dist/config/validator.js +658 -0
  206. package/dist/constants/cloud.d.ts +107 -0
  207. package/dist/constants/cloud.js +110 -0
  208. package/dist/constants/core.d.ts +521 -0
  209. package/dist/constants/core.js +556 -0
  210. package/dist/constants/testing.d.ts +1283 -0
  211. package/dist/constants/testing.js +1568 -0
  212. package/dist/constants.d.ts +10 -0
  213. package/dist/constants.js +10 -0
  214. package/dist/contract/index.d.ts +6 -0
  215. package/dist/contract/index.js +5 -0
  216. package/dist/contract/validator.d.ts +177 -0
  217. package/dist/contract/validator.js +574 -0
  218. package/dist/cost/index.d.ts +6 -0
  219. package/dist/cost/index.js +5 -0
  220. package/dist/cost/tracker.d.ts +134 -0
  221. package/dist/cost/tracker.js +313 -0
  222. package/dist/discovery/discovery.d.ts +16 -0
  223. package/dist/discovery/discovery.js +173 -0
  224. package/dist/discovery/types.d.ts +51 -0
  225. package/dist/discovery/types.js +2 -0
  226. package/dist/docs/agents.d.ts +3 -0
  227. package/dist/docs/agents.js +995 -0
  228. package/dist/docs/contract.d.ts +51 -0
  229. package/dist/docs/contract.js +1681 -0
  230. package/dist/docs/generator.d.ts +4 -0
  231. package/dist/docs/generator.js +4 -0
  232. package/dist/docs/html-reporter.d.ts +9 -0
  233. package/dist/docs/html-reporter.js +757 -0
  234. package/dist/docs/index.d.ts +10 -0
  235. package/dist/docs/index.js +11 -0
  236. package/dist/docs/junit-reporter.d.ts +18 -0
  237. package/dist/docs/junit-reporter.js +210 -0
  238. package/dist/docs/report.d.ts +14 -0
  239. package/dist/docs/report.js +44 -0
  240. package/dist/docs/sarif-reporter.d.ts +19 -0
  241. package/dist/docs/sarif-reporter.js +335 -0
  242. package/dist/docs/shared.d.ts +35 -0
  243. package/dist/docs/shared.js +162 -0
  244. package/dist/docs/templates.d.ts +12 -0
  245. package/dist/docs/templates.js +76 -0
  246. package/dist/errors/index.d.ts +6 -0
  247. package/dist/errors/index.js +6 -0
  248. package/dist/errors/retry.d.ts +92 -0
  249. package/dist/errors/retry.js +323 -0
  250. package/dist/errors/types.d.ts +321 -0
  251. package/dist/errors/types.js +584 -0
  252. package/dist/index.d.ts +32 -0
  253. package/dist/index.js +32 -0
  254. package/dist/interview/dependency-resolver.d.ts +11 -0
  255. package/dist/interview/dependency-resolver.js +32 -0
  256. package/dist/interview/interviewer.d.ts +232 -0
  257. package/dist/interview/interviewer.js +1939 -0
  258. package/dist/interview/mock-response-generator.d.ts +7 -0
  259. package/dist/interview/mock-response-generator.js +102 -0
  260. package/dist/interview/orchestrator.d.ts +237 -0
  261. package/dist/interview/orchestrator.js +1296 -0
  262. package/dist/interview/rate-limiter.d.ts +15 -0
  263. package/dist/interview/rate-limiter.js +55 -0
  264. package/dist/interview/response-validator.d.ts +10 -0
  265. package/dist/interview/response-validator.js +132 -0
  266. package/dist/interview/schema-inferrer.d.ts +8 -0
  267. package/dist/interview/schema-inferrer.js +71 -0
  268. package/dist/interview/schema-test-generator.d.ts +71 -0
  269. package/dist/interview/schema-test-generator.js +834 -0
  270. package/dist/interview/smart-value-generator.d.ts +155 -0
  271. package/dist/interview/smart-value-generator.js +554 -0
  272. package/dist/interview/stateful-test-runner.d.ts +19 -0
  273. package/dist/interview/stateful-test-runner.js +106 -0
  274. package/dist/interview/types.d.ts +561 -0
  275. package/dist/interview/types.js +2 -0
  276. package/dist/llm/anthropic.d.ts +41 -0
  277. package/dist/llm/anthropic.js +355 -0
  278. package/dist/llm/client.d.ts +123 -0
  279. package/dist/llm/client.js +42 -0
  280. package/dist/llm/factory.d.ts +38 -0
  281. package/dist/llm/factory.js +145 -0
  282. package/dist/llm/fallback.d.ts +140 -0
  283. package/dist/llm/fallback.js +379 -0
  284. package/dist/llm/index.d.ts +18 -0
  285. package/dist/llm/index.js +15 -0
  286. package/dist/llm/ollama.d.ts +37 -0
  287. package/dist/llm/ollama.js +330 -0
  288. package/dist/llm/openai.d.ts +25 -0
  289. package/dist/llm/openai.js +320 -0
  290. package/dist/llm/token-budget.d.ts +161 -0
  291. package/dist/llm/token-budget.js +395 -0
  292. package/dist/logging/logger.d.ts +70 -0
  293. package/dist/logging/logger.js +130 -0
  294. package/dist/metrics/collector.d.ts +106 -0
  295. package/dist/metrics/collector.js +547 -0
  296. package/dist/metrics/index.d.ts +7 -0
  297. package/dist/metrics/index.js +7 -0
  298. package/dist/metrics/prometheus.d.ts +20 -0
  299. package/dist/metrics/prometheus.js +241 -0
  300. package/dist/metrics/types.d.ts +209 -0
  301. package/dist/metrics/types.js +5 -0
  302. package/dist/persona/builtins.d.ts +54 -0
  303. package/dist/persona/builtins.js +219 -0
  304. package/dist/persona/index.d.ts +8 -0
  305. package/dist/persona/index.js +8 -0
  306. package/dist/persona/loader.d.ts +30 -0
  307. package/dist/persona/loader.js +190 -0
  308. package/dist/persona/types.d.ts +144 -0
  309. package/dist/persona/types.js +5 -0
  310. package/dist/persona/validation.d.ts +94 -0
  311. package/dist/persona/validation.js +332 -0
  312. package/dist/prompts/index.d.ts +5 -0
  313. package/dist/prompts/index.js +5 -0
  314. package/dist/prompts/templates.d.ts +180 -0
  315. package/dist/prompts/templates.js +431 -0
  316. package/dist/registry/client.d.ts +49 -0
  317. package/dist/registry/client.js +191 -0
  318. package/dist/registry/index.d.ts +7 -0
  319. package/dist/registry/index.js +6 -0
  320. package/dist/registry/types.d.ts +140 -0
  321. package/dist/registry/types.js +6 -0
  322. package/dist/scenarios/evaluator.d.ts +43 -0
  323. package/dist/scenarios/evaluator.js +206 -0
  324. package/dist/scenarios/index.d.ts +10 -0
  325. package/dist/scenarios/index.js +9 -0
  326. package/dist/scenarios/loader.d.ts +20 -0
  327. package/dist/scenarios/loader.js +285 -0
  328. package/dist/scenarios/types.d.ts +153 -0
  329. package/dist/scenarios/types.js +8 -0
  330. package/dist/security/index.d.ts +17 -0
  331. package/dist/security/index.js +18 -0
  332. package/dist/security/payloads.d.ts +61 -0
  333. package/dist/security/payloads.js +268 -0
  334. package/dist/security/security-tester.d.ts +42 -0
  335. package/dist/security/security-tester.js +582 -0
  336. package/dist/security/types.d.ts +166 -0
  337. package/dist/security/types.js +8 -0
  338. package/dist/transport/base-transport.d.ts +59 -0
  339. package/dist/transport/base-transport.js +38 -0
  340. package/dist/transport/http-transport.d.ts +67 -0
  341. package/dist/transport/http-transport.js +238 -0
  342. package/dist/transport/mcp-client.d.ts +141 -0
  343. package/dist/transport/mcp-client.js +496 -0
  344. package/dist/transport/sse-transport.d.ts +88 -0
  345. package/dist/transport/sse-transport.js +316 -0
  346. package/dist/transport/stdio-transport.d.ts +43 -0
  347. package/dist/transport/stdio-transport.js +238 -0
  348. package/dist/transport/types.d.ts +125 -0
  349. package/dist/transport/types.js +16 -0
  350. package/dist/utils/concurrency.d.ts +123 -0
  351. package/dist/utils/concurrency.js +213 -0
  352. package/dist/utils/formatters.d.ts +16 -0
  353. package/dist/utils/formatters.js +37 -0
  354. package/dist/utils/index.d.ts +8 -0
  355. package/dist/utils/index.js +8 -0
  356. package/dist/utils/jsonpath.d.ts +87 -0
  357. package/dist/utils/jsonpath.js +326 -0
  358. package/dist/utils/markdown.d.ts +113 -0
  359. package/dist/utils/markdown.js +265 -0
  360. package/dist/utils/network.d.ts +14 -0
  361. package/dist/utils/network.js +17 -0
  362. package/dist/utils/sanitize.d.ts +92 -0
  363. package/dist/utils/sanitize.js +191 -0
  364. package/dist/utils/semantic.d.ts +194 -0
  365. package/dist/utils/semantic.js +1051 -0
  366. package/dist/utils/smart-truncate.d.ts +94 -0
  367. package/dist/utils/smart-truncate.js +361 -0
  368. package/dist/utils/timeout.d.ts +153 -0
  369. package/dist/utils/timeout.js +205 -0
  370. package/dist/utils/yaml-parser.d.ts +58 -0
  371. package/dist/utils/yaml-parser.js +86 -0
  372. package/dist/validation/index.d.ts +32 -0
  373. package/dist/validation/index.js +32 -0
  374. package/dist/validation/semantic-test-generator.d.ts +50 -0
  375. package/dist/validation/semantic-test-generator.js +176 -0
  376. package/dist/validation/semantic-types.d.ts +66 -0
  377. package/dist/validation/semantic-types.js +94 -0
  378. package/dist/validation/semantic-validator.d.ts +38 -0
  379. package/dist/validation/semantic-validator.js +340 -0
  380. package/dist/verification/index.d.ts +6 -0
  381. package/dist/verification/index.js +5 -0
  382. package/dist/verification/types.d.ts +133 -0
  383. package/dist/verification/types.js +5 -0
  384. package/dist/verification/verifier.d.ts +30 -0
  385. package/dist/verification/verifier.js +309 -0
  386. package/dist/version.d.ts +19 -0
  387. package/dist/version.js +48 -0
  388. package/dist/workflow/auto-generator.d.ts +27 -0
  389. package/dist/workflow/auto-generator.js +513 -0
  390. package/dist/workflow/discovery.d.ts +40 -0
  391. package/dist/workflow/discovery.js +195 -0
  392. package/dist/workflow/executor.d.ts +82 -0
  393. package/dist/workflow/executor.js +611 -0
  394. package/dist/workflow/index.d.ts +10 -0
  395. package/dist/workflow/index.js +10 -0
  396. package/dist/workflow/loader.d.ts +24 -0
  397. package/dist/workflow/loader.js +194 -0
  398. package/dist/workflow/state-tracker.d.ts +98 -0
  399. package/dist/workflow/state-tracker.js +424 -0
  400. package/dist/workflow/types.d.ts +337 -0
  401. package/dist/workflow/types.js +5 -0
  402. package/package.json +94 -0
  403. package/schemas/bellwether-check.schema.json +651 -0
@@ -0,0 +1,582 @@
1
+ /**
2
+ * Deterministic security tester for check mode.
3
+ *
4
+ * Runs security payloads against MCP tools and analyzes responses to detect
5
+ * potential vulnerabilities. All testing is deterministic (no LLM required)
6
+ * and uses well-known security test patterns.
7
+ *
8
+ * This module is the core of the security baseline feature, enabling users
9
+ * to detect common vulnerability patterns in their MCP servers.
10
+ */
11
+ import { createHash } from 'crypto';
12
+ import { getPayloadsForCategory } from './payloads.js';
13
+ import { SECURITY_TESTING } from '../constants.js';
14
+ /**
15
+ * Run security tests for a single tool.
16
+ *
17
+ * @param context - The tool context including call function
18
+ * @param options - Security test configuration options
19
+ * @returns Security fingerprint with findings
20
+ */
21
+ export async function runSecurityTests(context, options = {}) {
22
+ const { categories = SECURITY_TESTING.DEFAULT_CATEGORIES, maxPayloadsPerCategory = SECURITY_TESTING.MAX_PAYLOADS_PER_CATEGORY, testErrorDisclosure = true, } = options;
23
+ const findings = [];
24
+ const categoriesTested = [];
25
+ const allResults = [];
26
+ // Identify testable parameters based on schema
27
+ const testableParams = identifyTestableParameters(context.inputSchema);
28
+ // Limit parameters to prevent excessive testing
29
+ const paramsToTest = testableParams.slice(0, SECURITY_TESTING.MAX_PARAMS_PER_TOOL);
30
+ // Test each category
31
+ for (const category of categories) {
32
+ if (category === 'error_disclosure') {
33
+ // Error disclosure is tested separately
34
+ continue;
35
+ }
36
+ const payloads = getPayloadsForCategory(category).slice(0, maxPayloadsPerCategory);
37
+ if (payloads.length === 0)
38
+ continue;
39
+ categoriesTested.push(category);
40
+ for (const payload of payloads) {
41
+ for (const param of paramsToTest) {
42
+ const result = await testPayload(context, param, payload);
43
+ allResults.push(result);
44
+ if (result.finding) {
45
+ findings.push(result.finding);
46
+ }
47
+ }
48
+ }
49
+ }
50
+ // Test for error disclosure
51
+ if (testErrorDisclosure && categories.includes('error_disclosure')) {
52
+ const errorFindings = await testErrorDisclosure_internal(context);
53
+ findings.push(...errorFindings);
54
+ if (errorFindings.length > 0 || categories.includes('error_disclosure')) {
55
+ categoriesTested.push('error_disclosure');
56
+ }
57
+ }
58
+ // Calculate risk score
59
+ const riskScore = calculateRiskScore(findings);
60
+ // Create findings hash for comparison
61
+ const findingsHash = computeFindingsHash(findings);
62
+ return {
63
+ tested: true,
64
+ categoriesTested,
65
+ findings,
66
+ riskScore,
67
+ testedAt: new Date().toISOString(),
68
+ findingsHash,
69
+ };
70
+ }
71
+ /**
72
+ * Identify parameters in a tool's schema that should be security tested.
73
+ * Prioritizes parameters that are more likely to be security-relevant.
74
+ *
75
+ * @param inputSchema - Tool's input schema
76
+ * @returns Array of parameter names to test
77
+ */
78
+ function identifyTestableParameters(inputSchema) {
79
+ if (!inputSchema)
80
+ return [];
81
+ const properties = inputSchema.properties;
82
+ if (!properties)
83
+ return [];
84
+ const testable = [];
85
+ for (const [name, prop] of Object.entries(properties)) {
86
+ const propSchema = prop;
87
+ if (!propSchema)
88
+ continue;
89
+ const type = propSchema.type;
90
+ // Only test string parameters (most security-relevant)
91
+ if (type !== 'string')
92
+ continue;
93
+ // Calculate priority based on name patterns
94
+ let priority = 1;
95
+ const nameLower = name.toLowerCase();
96
+ const desc = (propSchema.description ?? '').toLowerCase();
97
+ // Check against security-relevant patterns
98
+ for (const pattern of SECURITY_TESTING.SECURITY_RELEVANT_PARAM_PATTERNS) {
99
+ if (pattern.test(nameLower) || pattern.test(desc)) {
100
+ priority += 2;
101
+ break;
102
+ }
103
+ }
104
+ // Boost priority for path/file/url related names
105
+ if (/path|file|url|uri|query|command/i.test(nameLower)) {
106
+ priority += 3;
107
+ }
108
+ testable.push({ name, priority });
109
+ }
110
+ // Sort by priority (highest first) and return names
111
+ return testable
112
+ .sort((a, b) => b.priority - a.priority)
113
+ .map((p) => p.name);
114
+ }
115
+ /**
116
+ * Test a single payload against a parameter.
117
+ *
118
+ * @param context - Tool test context
119
+ * @param paramName - Parameter to inject payload into
120
+ * @param payload - Security payload to test
121
+ * @returns Test result
122
+ */
123
+ async function testPayload(context, paramName, payload) {
124
+ const args = {
125
+ [paramName]: payload.payload,
126
+ };
127
+ try {
128
+ const result = await Promise.race([
129
+ context.callTool(args),
130
+ new Promise((_, reject) => setTimeout(() => reject(new Error('Security test timeout')), SECURITY_TESTING.TEST_TIMEOUT_MS)),
131
+ ]);
132
+ // Analyze the response
133
+ return analyzeSecurityResponse(context, paramName, payload, result);
134
+ }
135
+ catch (error) {
136
+ // Error during call - check if it's a rejection (good) or unexpected
137
+ const errorMsg = error instanceof Error ? error.message : String(error);
138
+ const isRejection = isSecurityRejection(errorMsg);
139
+ if (isRejection) {
140
+ // Tool properly rejected the malicious input
141
+ return {
142
+ category: payload.category,
143
+ payload: payload.payload,
144
+ parameter: paramName,
145
+ passed: true,
146
+ riskLevel: 'info',
147
+ behavior: 'rejected',
148
+ };
149
+ }
150
+ // Unexpected error - might indicate a problem
151
+ return {
152
+ category: payload.category,
153
+ payload: payload.payload,
154
+ parameter: paramName,
155
+ passed: true, // Not conclusive - don't create finding for errors
156
+ riskLevel: 'info',
157
+ behavior: 'error',
158
+ };
159
+ }
160
+ }
161
+ /**
162
+ * Analyze a tool response for security issues.
163
+ *
164
+ * @param context - Tool test context
165
+ * @param paramName - Parameter that was tested
166
+ * @param payload - Payload that was used
167
+ * @param result - Tool call result
168
+ * @returns Test result with optional finding
169
+ */
170
+ function analyzeSecurityResponse(context, paramName, payload, result) {
171
+ const { content, isError, errorMessage } = result;
172
+ // Check if tool returned an error
173
+ if (isError) {
174
+ const errorText = errorMessage ?? content;
175
+ const isRejection = isSecurityRejection(errorText);
176
+ return {
177
+ category: payload.category,
178
+ payload: payload.payload,
179
+ parameter: paramName,
180
+ passed: isRejection, // Rejection = passed
181
+ riskLevel: isRejection ? 'info' : 'medium',
182
+ behavior: isRejection ? 'rejected' : 'error',
183
+ finding: isRejection ? undefined : createFinding(context, paramName, payload, 'Tool returned error but did not explicitly reject potentially dangerous input'),
184
+ };
185
+ }
186
+ // Success response - check based on expected behavior
187
+ if (payload.expectedBehavior === 'reject') {
188
+ // Payload should have been rejected but was accepted
189
+ return {
190
+ category: payload.category,
191
+ payload: payload.payload,
192
+ parameter: paramName,
193
+ passed: false,
194
+ riskLevel: getRiskLevelForCategory(payload.category),
195
+ behavior: 'accepted',
196
+ finding: createFinding(context, paramName, payload, `Tool accepted potentially dangerous input without validation: ${payload.description}`),
197
+ };
198
+ }
199
+ if (payload.expectedBehavior === 'sanitize') {
200
+ // Check if payload appears in response (reflection)
201
+ if (content.includes(payload.payload)) {
202
+ return {
203
+ category: payload.category,
204
+ payload: payload.payload,
205
+ parameter: paramName,
206
+ passed: false,
207
+ riskLevel: 'high',
208
+ behavior: 'accepted',
209
+ finding: createFinding(context, paramName, payload, 'Tool reflects input without sanitization'),
210
+ };
211
+ }
212
+ // Payload was sanitized
213
+ return {
214
+ category: payload.category,
215
+ payload: payload.payload,
216
+ parameter: paramName,
217
+ passed: true,
218
+ riskLevel: 'info',
219
+ behavior: 'sanitized',
220
+ };
221
+ }
222
+ // Default: accept behavior is expected
223
+ return {
224
+ category: payload.category,
225
+ payload: payload.payload,
226
+ parameter: paramName,
227
+ passed: true,
228
+ riskLevel: 'info',
229
+ behavior: 'accepted',
230
+ };
231
+ }
232
+ /**
233
+ * Test for error disclosure issues.
234
+ *
235
+ * @param context - Tool test context
236
+ * @returns Array of error disclosure findings
237
+ */
238
+ async function testErrorDisclosure_internal(context) {
239
+ const findings = [];
240
+ // Try to trigger errors with invalid inputs
241
+ const invalidInputs = [
242
+ { __invalid_param_12345__: 'test' },
243
+ { '': null },
244
+ { ['\x00']: 'null byte param' },
245
+ ];
246
+ for (const args of invalidInputs) {
247
+ try {
248
+ const result = await Promise.race([
249
+ context.callTool(args),
250
+ new Promise((_, reject) => setTimeout(() => reject(new Error('timeout')), SECURITY_TESTING.TEST_TIMEOUT_MS)),
251
+ ]);
252
+ if (result.isError) {
253
+ const errorText = result.errorMessage ?? result.content;
254
+ const disclosure = analyzeErrorDisclosure(errorText);
255
+ if (disclosure) {
256
+ findings.push({
257
+ category: 'error_disclosure',
258
+ riskLevel: disclosure.riskLevel,
259
+ title: 'Information disclosure in error messages',
260
+ description: disclosure.description,
261
+ evidence: disclosure.evidence,
262
+ remediation: 'Sanitize error messages to remove internal details, stack traces, file paths, and database information',
263
+ cweId: SECURITY_TESTING.CWE_IDS.error_disclosure,
264
+ parameter: 'N/A',
265
+ tool: context.toolName,
266
+ });
267
+ break; // One finding is enough for error disclosure
268
+ }
269
+ }
270
+ }
271
+ catch {
272
+ // Expected - timeout or other error
273
+ }
274
+ }
275
+ return findings;
276
+ }
277
+ /**
278
+ * Analyze error text for information disclosure.
279
+ *
280
+ * @param errorText - Error message to analyze
281
+ * @returns Disclosure info if found, null otherwise
282
+ */
283
+ function analyzeErrorDisclosure(errorText) {
284
+ const patterns = SECURITY_TESTING.ERROR_DISCLOSURE_PATTERNS;
285
+ // Check for stack traces (most severe)
286
+ for (const pattern of patterns.stackTrace) {
287
+ if (pattern.test(errorText)) {
288
+ return {
289
+ riskLevel: 'medium',
290
+ description: 'Error message includes stack trace with internal file paths',
291
+ evidence: 'Stack trace detected in error response',
292
+ };
293
+ }
294
+ }
295
+ // Check for file paths
296
+ for (const pattern of patterns.filePath) {
297
+ if (pattern.test(errorText)) {
298
+ return {
299
+ riskLevel: 'low',
300
+ description: 'Error message includes internal file paths',
301
+ evidence: 'File path detected in error response',
302
+ };
303
+ }
304
+ }
305
+ // Check for database details
306
+ for (const pattern of patterns.database) {
307
+ if (pattern.test(errorText)) {
308
+ return {
309
+ riskLevel: 'medium',
310
+ description: 'Error message includes database-related information',
311
+ evidence: 'Database information detected in error response',
312
+ };
313
+ }
314
+ }
315
+ // Check for internal IPs
316
+ for (const pattern of patterns.internalIp) {
317
+ if (pattern.test(errorText)) {
318
+ return {
319
+ riskLevel: 'low',
320
+ description: 'Error message includes internal network addresses',
321
+ evidence: 'Internal IP address detected in error response',
322
+ };
323
+ }
324
+ }
325
+ return null;
326
+ }
327
+ /**
328
+ * Check if error text indicates a security rejection (good behavior).
329
+ *
330
+ * @param text - Error text to check
331
+ * @returns True if text indicates the tool properly rejected input
332
+ */
333
+ function isSecurityRejection(text) {
334
+ for (const pattern of SECURITY_TESTING.REJECTION_PATTERNS) {
335
+ if (pattern.test(text)) {
336
+ return true;
337
+ }
338
+ }
339
+ return false;
340
+ }
341
+ /**
342
+ * Get the risk level for a security category.
343
+ *
344
+ * @param category - Security category
345
+ * @returns Appropriate risk level
346
+ */
347
+ function getRiskLevelForCategory(category) {
348
+ switch (category) {
349
+ case 'sql_injection':
350
+ case 'command_injection':
351
+ return 'critical';
352
+ case 'path_traversal':
353
+ case 'ssrf':
354
+ return 'high';
355
+ case 'xss':
356
+ return 'medium';
357
+ case 'error_disclosure':
358
+ return 'low';
359
+ default:
360
+ return 'medium';
361
+ }
362
+ }
363
+ /**
364
+ * Create a security finding.
365
+ *
366
+ * @param context - Tool test context
367
+ * @param paramName - Parameter where vulnerability was found
368
+ * @param payload - Payload that revealed the vulnerability
369
+ * @param description - Description of the issue
370
+ * @returns Security finding
371
+ */
372
+ function createFinding(context, paramName, payload, description) {
373
+ return {
374
+ category: payload.category,
375
+ riskLevel: getRiskLevelForCategory(payload.category),
376
+ title: `Potential ${formatCategoryName(payload.category)} vulnerability`,
377
+ description,
378
+ evidence: `Parameter: "${paramName}", Payload: "${truncate(payload.payload, 50)}"`,
379
+ remediation: getRemediation(payload.category),
380
+ cweId: SECURITY_TESTING.CWE_IDS[payload.category],
381
+ parameter: paramName,
382
+ tool: context.toolName,
383
+ };
384
+ }
385
+ /**
386
+ * Format a security category name for display.
387
+ *
388
+ * @param category - Security category
389
+ * @returns Human-readable category name
390
+ */
391
+ function formatCategoryName(category) {
392
+ return category
393
+ .split('_')
394
+ .map((w) => w.charAt(0).toUpperCase() + w.slice(1))
395
+ .join(' ');
396
+ }
397
+ /**
398
+ * Get remediation advice for a security category.
399
+ *
400
+ * @param category - Security category
401
+ * @returns Remediation advice string
402
+ */
403
+ function getRemediation(category) {
404
+ switch (category) {
405
+ case 'sql_injection':
406
+ return 'Use parameterized queries or prepared statements. Never concatenate user input directly into SQL queries.';
407
+ case 'xss':
408
+ return 'Escape or encode all user input before including it in output. Use Content-Security-Policy headers.';
409
+ case 'path_traversal':
410
+ return 'Validate and sanitize file paths. Use allowlists for permitted directories and reject paths containing "../" or absolute paths.';
411
+ case 'command_injection':
412
+ return 'Avoid shell execution with user input. Use safe APIs, input validation, and allowlists for permitted commands.';
413
+ case 'ssrf':
414
+ return 'Validate and allowlist permitted URLs and hosts. Block requests to private IP ranges (10.x, 172.16-31.x, 192.168.x) and localhost.';
415
+ case 'error_disclosure':
416
+ return 'Sanitize error messages to remove internal details. Use generic error messages for users and log detailed errors server-side only.';
417
+ default:
418
+ return 'Implement proper input validation and sanitization for all user-provided data.';
419
+ }
420
+ }
421
+ /**
422
+ * Truncate a string to a maximum length.
423
+ *
424
+ * @param str - String to truncate
425
+ * @param maxLength - Maximum length
426
+ * @returns Truncated string with ellipsis if needed
427
+ */
428
+ function truncate(str, maxLength) {
429
+ if (str.length <= maxLength)
430
+ return str;
431
+ return str.slice(0, maxLength - 3) + '...';
432
+ }
433
+ /**
434
+ * Calculate overall risk score from findings.
435
+ *
436
+ * @param findings - Array of security findings
437
+ * @returns Risk score (0-100)
438
+ */
439
+ function calculateRiskScore(findings) {
440
+ if (findings.length === 0)
441
+ return 0;
442
+ const weights = SECURITY_TESTING.RISK_WEIGHTS;
443
+ let score = 0;
444
+ for (const finding of findings) {
445
+ score += weights[finding.riskLevel];
446
+ }
447
+ return Math.min(100, score);
448
+ }
449
+ /**
450
+ * Compute a hash of findings for quick comparison.
451
+ *
452
+ * @param findings - Array of security findings
453
+ * @returns Hash string
454
+ */
455
+ function computeFindingsHash(findings) {
456
+ if (findings.length === 0)
457
+ return 'empty';
458
+ // Sort findings for consistent hashing
459
+ const sorted = [...findings].sort((a, b) => {
460
+ if (a.tool !== b.tool)
461
+ return a.tool.localeCompare(b.tool);
462
+ if (a.category !== b.category)
463
+ return a.category.localeCompare(b.category);
464
+ return a.parameter.localeCompare(b.parameter);
465
+ });
466
+ const content = sorted
467
+ .map((f) => `${f.tool}:${f.category}:${f.parameter}:${f.riskLevel}`)
468
+ .join('|');
469
+ return createHash('sha256').update(content).digest('hex').slice(0, 16);
470
+ }
471
+ /**
472
+ * Compare two security fingerprints to detect changes.
473
+ *
474
+ * @param previous - Previous security fingerprint (may be undefined)
475
+ * @param current - Current security fingerprint (may be undefined)
476
+ * @returns Security diff showing what changed
477
+ */
478
+ export function compareSecurityFingerprints(previous, current) {
479
+ // Handle missing fingerprints
480
+ if (!previous && !current) {
481
+ return {
482
+ newFindings: [],
483
+ resolvedFindings: [],
484
+ previousRiskScore: 0,
485
+ currentRiskScore: 0,
486
+ riskScoreChange: 0,
487
+ degraded: false,
488
+ summary: 'No security testing data available',
489
+ };
490
+ }
491
+ if (!previous) {
492
+ return {
493
+ newFindings: current.findings,
494
+ resolvedFindings: [],
495
+ previousRiskScore: 0,
496
+ currentRiskScore: current.riskScore,
497
+ riskScoreChange: current.riskScore,
498
+ degraded: current.findings.length > 0,
499
+ summary: current.findings.length > 0
500
+ ? `Initial security scan found ${current.findings.length} finding(s)`
501
+ : 'Initial security scan: no findings',
502
+ };
503
+ }
504
+ if (!current) {
505
+ return {
506
+ newFindings: [],
507
+ resolvedFindings: previous.findings,
508
+ previousRiskScore: previous.riskScore,
509
+ currentRiskScore: 0,
510
+ riskScoreChange: -previous.riskScore,
511
+ degraded: false,
512
+ summary: 'Security testing not performed in current run',
513
+ };
514
+ }
515
+ // Both exist - compare findings
516
+ const prevFindingKeys = new Set(previous.findings.map((f) => `${f.tool}:${f.category}:${f.parameter}`));
517
+ const currFindingKeys = new Set(current.findings.map((f) => `${f.tool}:${f.category}:${f.parameter}`));
518
+ const newFindings = current.findings.filter((f) => !prevFindingKeys.has(`${f.tool}:${f.category}:${f.parameter}`));
519
+ const resolvedFindings = previous.findings.filter((f) => !currFindingKeys.has(`${f.tool}:${f.category}:${f.parameter}`));
520
+ const riskScoreChange = current.riskScore - previous.riskScore;
521
+ const degraded = newFindings.length > 0 || riskScoreChange > 0;
522
+ // Generate summary
523
+ const parts = [];
524
+ if (newFindings.length > 0) {
525
+ parts.push(`${newFindings.length} new finding(s)`);
526
+ }
527
+ if (resolvedFindings.length > 0) {
528
+ parts.push(`${resolvedFindings.length} resolved`);
529
+ }
530
+ if (riskScoreChange !== 0) {
531
+ const direction = riskScoreChange > 0 ? 'increased' : 'decreased';
532
+ parts.push(`risk score ${direction} by ${Math.abs(riskScoreChange)}`);
533
+ }
534
+ const summary = parts.length > 0
535
+ ? parts.join(', ')
536
+ : 'No security changes detected';
537
+ return {
538
+ newFindings,
539
+ resolvedFindings,
540
+ previousRiskScore: previous.riskScore,
541
+ currentRiskScore: current.riskScore,
542
+ riskScoreChange,
543
+ degraded,
544
+ summary,
545
+ };
546
+ }
547
+ /**
548
+ * Get risk level classification from a risk score.
549
+ *
550
+ * @param score - Risk score (0-100)
551
+ * @returns Risk level
552
+ */
553
+ export function getRiskLevelFromScore(score) {
554
+ const thresholds = SECURITY_TESTING.RISK_THRESHOLDS;
555
+ if (score >= thresholds.critical)
556
+ return 'critical';
557
+ if (score >= thresholds.high)
558
+ return 'high';
559
+ if (score >= thresholds.medium)
560
+ return 'medium';
561
+ if (score >= thresholds.low)
562
+ return 'low';
563
+ return 'info';
564
+ }
565
+ /**
566
+ * Parse security categories from a comma-separated string.
567
+ *
568
+ * @param categoriesString - Comma-separated category names
569
+ * @returns Array of valid security categories
570
+ */
571
+ export function parseSecurityCategories(categoriesString) {
572
+ const validCategories = new Set(SECURITY_TESTING.DEFAULT_CATEGORIES);
573
+ const parsed = [];
574
+ for (const cat of categoriesString.split(',')) {
575
+ const trimmed = cat.trim().toLowerCase();
576
+ if (validCategories.has(trimmed)) {
577
+ parsed.push(trimmed);
578
+ }
579
+ }
580
+ return parsed.length > 0 ? parsed : [...SECURITY_TESTING.DEFAULT_CATEGORIES];
581
+ }
582
+ //# sourceMappingURL=security-tester.js.map