@dotsetlabs/bellwether 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (403) hide show
  1. package/CHANGELOG.md +291 -0
  2. package/LICENSE +21 -0
  3. package/README.md +739 -0
  4. package/dist/auth/credentials.d.ts +64 -0
  5. package/dist/auth/credentials.js +218 -0
  6. package/dist/auth/index.d.ts +6 -0
  7. package/dist/auth/index.js +6 -0
  8. package/dist/auth/keychain.d.ts +64 -0
  9. package/dist/auth/keychain.js +268 -0
  10. package/dist/baseline/ab-testing.d.ts +80 -0
  11. package/dist/baseline/ab-testing.js +236 -0
  12. package/dist/baseline/ai-compatibility-scorer.d.ts +95 -0
  13. package/dist/baseline/ai-compatibility-scorer.js +606 -0
  14. package/dist/baseline/calibration.d.ts +77 -0
  15. package/dist/baseline/calibration.js +136 -0
  16. package/dist/baseline/category-matching.d.ts +85 -0
  17. package/dist/baseline/category-matching.js +289 -0
  18. package/dist/baseline/change-impact-analyzer.d.ts +98 -0
  19. package/dist/baseline/change-impact-analyzer.js +592 -0
  20. package/dist/baseline/comparator.d.ts +64 -0
  21. package/dist/baseline/comparator.js +916 -0
  22. package/dist/baseline/confidence.d.ts +55 -0
  23. package/dist/baseline/confidence.js +122 -0
  24. package/dist/baseline/converter.d.ts +61 -0
  25. package/dist/baseline/converter.js +585 -0
  26. package/dist/baseline/dependency-analyzer.d.ts +89 -0
  27. package/dist/baseline/dependency-analyzer.js +567 -0
  28. package/dist/baseline/deprecation-tracker.d.ts +133 -0
  29. package/dist/baseline/deprecation-tracker.js +322 -0
  30. package/dist/baseline/diff.d.ts +55 -0
  31. package/dist/baseline/diff.js +1584 -0
  32. package/dist/baseline/documentation-scorer.d.ts +205 -0
  33. package/dist/baseline/documentation-scorer.js +466 -0
  34. package/dist/baseline/embeddings.d.ts +118 -0
  35. package/dist/baseline/embeddings.js +251 -0
  36. package/dist/baseline/error-analyzer.d.ts +198 -0
  37. package/dist/baseline/error-analyzer.js +721 -0
  38. package/dist/baseline/evaluation/evaluator.d.ts +42 -0
  39. package/dist/baseline/evaluation/evaluator.js +323 -0
  40. package/dist/baseline/evaluation/expanded-dataset.d.ts +45 -0
  41. package/dist/baseline/evaluation/expanded-dataset.js +1164 -0
  42. package/dist/baseline/evaluation/golden-dataset.d.ts +58 -0
  43. package/dist/baseline/evaluation/golden-dataset.js +717 -0
  44. package/dist/baseline/evaluation/index.d.ts +15 -0
  45. package/dist/baseline/evaluation/index.js +15 -0
  46. package/dist/baseline/evaluation/types.d.ts +186 -0
  47. package/dist/baseline/evaluation/types.js +8 -0
  48. package/dist/baseline/external-dependency-detector.d.ts +181 -0
  49. package/dist/baseline/external-dependency-detector.js +524 -0
  50. package/dist/baseline/golden-output.d.ts +162 -0
  51. package/dist/baseline/golden-output.js +636 -0
  52. package/dist/baseline/health-scorer.d.ts +174 -0
  53. package/dist/baseline/health-scorer.js +451 -0
  54. package/dist/baseline/incremental-checker.d.ts +97 -0
  55. package/dist/baseline/incremental-checker.js +174 -0
  56. package/dist/baseline/index.d.ts +31 -0
  57. package/dist/baseline/index.js +42 -0
  58. package/dist/baseline/migration-generator.d.ts +137 -0
  59. package/dist/baseline/migration-generator.js +554 -0
  60. package/dist/baseline/migrations.d.ts +60 -0
  61. package/dist/baseline/migrations.js +197 -0
  62. package/dist/baseline/performance-tracker.d.ts +214 -0
  63. package/dist/baseline/performance-tracker.js +577 -0
  64. package/dist/baseline/pr-comment-generator.d.ts +117 -0
  65. package/dist/baseline/pr-comment-generator.js +546 -0
  66. package/dist/baseline/response-fingerprint.d.ts +127 -0
  67. package/dist/baseline/response-fingerprint.js +728 -0
  68. package/dist/baseline/response-schema-tracker.d.ts +129 -0
  69. package/dist/baseline/response-schema-tracker.js +420 -0
  70. package/dist/baseline/risk-scorer.d.ts +54 -0
  71. package/dist/baseline/risk-scorer.js +434 -0
  72. package/dist/baseline/saver.d.ts +89 -0
  73. package/dist/baseline/saver.js +554 -0
  74. package/dist/baseline/scenario-generator.d.ts +151 -0
  75. package/dist/baseline/scenario-generator.js +905 -0
  76. package/dist/baseline/schema-compare.d.ts +86 -0
  77. package/dist/baseline/schema-compare.js +557 -0
  78. package/dist/baseline/schema-evolution.d.ts +189 -0
  79. package/dist/baseline/schema-evolution.js +467 -0
  80. package/dist/baseline/semantic.d.ts +203 -0
  81. package/dist/baseline/semantic.js +908 -0
  82. package/dist/baseline/synonyms.d.ts +60 -0
  83. package/dist/baseline/synonyms.js +386 -0
  84. package/dist/baseline/telemetry.d.ts +165 -0
  85. package/dist/baseline/telemetry.js +294 -0
  86. package/dist/baseline/test-pruner.d.ts +120 -0
  87. package/dist/baseline/test-pruner.js +387 -0
  88. package/dist/baseline/types.d.ts +449 -0
  89. package/dist/baseline/types.js +5 -0
  90. package/dist/baseline/version.d.ts +138 -0
  91. package/dist/baseline/version.js +206 -0
  92. package/dist/cache/index.d.ts +5 -0
  93. package/dist/cache/index.js +5 -0
  94. package/dist/cache/response-cache.d.ts +151 -0
  95. package/dist/cache/response-cache.js +287 -0
  96. package/dist/ci/index.d.ts +60 -0
  97. package/dist/ci/index.js +342 -0
  98. package/dist/cli/commands/auth.d.ts +12 -0
  99. package/dist/cli/commands/auth.js +352 -0
  100. package/dist/cli/commands/badge.d.ts +3 -0
  101. package/dist/cli/commands/badge.js +74 -0
  102. package/dist/cli/commands/baseline-accept.d.ts +15 -0
  103. package/dist/cli/commands/baseline-accept.js +178 -0
  104. package/dist/cli/commands/baseline-migrate.d.ts +12 -0
  105. package/dist/cli/commands/baseline-migrate.js +164 -0
  106. package/dist/cli/commands/baseline.d.ts +14 -0
  107. package/dist/cli/commands/baseline.js +449 -0
  108. package/dist/cli/commands/beta.d.ts +10 -0
  109. package/dist/cli/commands/beta.js +231 -0
  110. package/dist/cli/commands/check.d.ts +11 -0
  111. package/dist/cli/commands/check.js +820 -0
  112. package/dist/cli/commands/cloud/badge.d.ts +3 -0
  113. package/dist/cli/commands/cloud/badge.js +74 -0
  114. package/dist/cli/commands/cloud/diff.d.ts +6 -0
  115. package/dist/cli/commands/cloud/diff.js +79 -0
  116. package/dist/cli/commands/cloud/history.d.ts +6 -0
  117. package/dist/cli/commands/cloud/history.js +102 -0
  118. package/dist/cli/commands/cloud/link.d.ts +9 -0
  119. package/dist/cli/commands/cloud/link.js +119 -0
  120. package/dist/cli/commands/cloud/login.d.ts +7 -0
  121. package/dist/cli/commands/cloud/login.js +499 -0
  122. package/dist/cli/commands/cloud/projects.d.ts +6 -0
  123. package/dist/cli/commands/cloud/projects.js +44 -0
  124. package/dist/cli/commands/cloud/shared.d.ts +7 -0
  125. package/dist/cli/commands/cloud/shared.js +42 -0
  126. package/dist/cli/commands/cloud/teams.d.ts +8 -0
  127. package/dist/cli/commands/cloud/teams.js +169 -0
  128. package/dist/cli/commands/cloud/upload.d.ts +8 -0
  129. package/dist/cli/commands/cloud/upload.js +181 -0
  130. package/dist/cli/commands/contract.d.ts +11 -0
  131. package/dist/cli/commands/contract.js +280 -0
  132. package/dist/cli/commands/discover.d.ts +3 -0
  133. package/dist/cli/commands/discover.js +82 -0
  134. package/dist/cli/commands/eval.d.ts +9 -0
  135. package/dist/cli/commands/eval.js +187 -0
  136. package/dist/cli/commands/explore.d.ts +11 -0
  137. package/dist/cli/commands/explore.js +437 -0
  138. package/dist/cli/commands/feedback.d.ts +9 -0
  139. package/dist/cli/commands/feedback.js +174 -0
  140. package/dist/cli/commands/golden.d.ts +12 -0
  141. package/dist/cli/commands/golden.js +407 -0
  142. package/dist/cli/commands/history.d.ts +10 -0
  143. package/dist/cli/commands/history.js +202 -0
  144. package/dist/cli/commands/init.d.ts +9 -0
  145. package/dist/cli/commands/init.js +219 -0
  146. package/dist/cli/commands/interview.d.ts +3 -0
  147. package/dist/cli/commands/interview.js +903 -0
  148. package/dist/cli/commands/link.d.ts +10 -0
  149. package/dist/cli/commands/link.js +169 -0
  150. package/dist/cli/commands/login.d.ts +7 -0
  151. package/dist/cli/commands/login.js +499 -0
  152. package/dist/cli/commands/preset.d.ts +33 -0
  153. package/dist/cli/commands/preset.js +297 -0
  154. package/dist/cli/commands/profile.d.ts +33 -0
  155. package/dist/cli/commands/profile.js +286 -0
  156. package/dist/cli/commands/registry.d.ts +11 -0
  157. package/dist/cli/commands/registry.js +146 -0
  158. package/dist/cli/commands/shared.d.ts +79 -0
  159. package/dist/cli/commands/shared.js +196 -0
  160. package/dist/cli/commands/teams.d.ts +8 -0
  161. package/dist/cli/commands/teams.js +169 -0
  162. package/dist/cli/commands/test.d.ts +9 -0
  163. package/dist/cli/commands/test.js +500 -0
  164. package/dist/cli/commands/upload.d.ts +8 -0
  165. package/dist/cli/commands/upload.js +223 -0
  166. package/dist/cli/commands/validate-config.d.ts +6 -0
  167. package/dist/cli/commands/validate-config.js +35 -0
  168. package/dist/cli/commands/verify.d.ts +11 -0
  169. package/dist/cli/commands/verify.js +283 -0
  170. package/dist/cli/commands/watch.d.ts +12 -0
  171. package/dist/cli/commands/watch.js +253 -0
  172. package/dist/cli/index.d.ts +3 -0
  173. package/dist/cli/index.js +178 -0
  174. package/dist/cli/interactive.d.ts +47 -0
  175. package/dist/cli/interactive.js +216 -0
  176. package/dist/cli/output/terminal-reporter.d.ts +19 -0
  177. package/dist/cli/output/terminal-reporter.js +104 -0
  178. package/dist/cli/output.d.ts +226 -0
  179. package/dist/cli/output.js +438 -0
  180. package/dist/cli/utils/env.d.ts +5 -0
  181. package/dist/cli/utils/env.js +14 -0
  182. package/dist/cli/utils/progress.d.ts +59 -0
  183. package/dist/cli/utils/progress.js +206 -0
  184. package/dist/cli/utils/server-context.d.ts +10 -0
  185. package/dist/cli/utils/server-context.js +36 -0
  186. package/dist/cloud/auth.d.ts +144 -0
  187. package/dist/cloud/auth.js +374 -0
  188. package/dist/cloud/client.d.ts +24 -0
  189. package/dist/cloud/client.js +65 -0
  190. package/dist/cloud/http-client.d.ts +38 -0
  191. package/dist/cloud/http-client.js +215 -0
  192. package/dist/cloud/index.d.ts +23 -0
  193. package/dist/cloud/index.js +25 -0
  194. package/dist/cloud/mock-client.d.ts +107 -0
  195. package/dist/cloud/mock-client.js +545 -0
  196. package/dist/cloud/types.d.ts +515 -0
  197. package/dist/cloud/types.js +15 -0
  198. package/dist/config/defaults.d.ts +160 -0
  199. package/dist/config/defaults.js +169 -0
  200. package/dist/config/loader.d.ts +24 -0
  201. package/dist/config/loader.js +122 -0
  202. package/dist/config/template.d.ts +42 -0
  203. package/dist/config/template.js +647 -0
  204. package/dist/config/validator.d.ts +2112 -0
  205. package/dist/config/validator.js +658 -0
  206. package/dist/constants/cloud.d.ts +107 -0
  207. package/dist/constants/cloud.js +110 -0
  208. package/dist/constants/core.d.ts +521 -0
  209. package/dist/constants/core.js +556 -0
  210. package/dist/constants/testing.d.ts +1283 -0
  211. package/dist/constants/testing.js +1568 -0
  212. package/dist/constants.d.ts +10 -0
  213. package/dist/constants.js +10 -0
  214. package/dist/contract/index.d.ts +6 -0
  215. package/dist/contract/index.js +5 -0
  216. package/dist/contract/validator.d.ts +177 -0
  217. package/dist/contract/validator.js +574 -0
  218. package/dist/cost/index.d.ts +6 -0
  219. package/dist/cost/index.js +5 -0
  220. package/dist/cost/tracker.d.ts +134 -0
  221. package/dist/cost/tracker.js +313 -0
  222. package/dist/discovery/discovery.d.ts +16 -0
  223. package/dist/discovery/discovery.js +173 -0
  224. package/dist/discovery/types.d.ts +51 -0
  225. package/dist/discovery/types.js +2 -0
  226. package/dist/docs/agents.d.ts +3 -0
  227. package/dist/docs/agents.js +995 -0
  228. package/dist/docs/contract.d.ts +51 -0
  229. package/dist/docs/contract.js +1681 -0
  230. package/dist/docs/generator.d.ts +4 -0
  231. package/dist/docs/generator.js +4 -0
  232. package/dist/docs/html-reporter.d.ts +9 -0
  233. package/dist/docs/html-reporter.js +757 -0
  234. package/dist/docs/index.d.ts +10 -0
  235. package/dist/docs/index.js +11 -0
  236. package/dist/docs/junit-reporter.d.ts +18 -0
  237. package/dist/docs/junit-reporter.js +210 -0
  238. package/dist/docs/report.d.ts +14 -0
  239. package/dist/docs/report.js +44 -0
  240. package/dist/docs/sarif-reporter.d.ts +19 -0
  241. package/dist/docs/sarif-reporter.js +335 -0
  242. package/dist/docs/shared.d.ts +35 -0
  243. package/dist/docs/shared.js +162 -0
  244. package/dist/docs/templates.d.ts +12 -0
  245. package/dist/docs/templates.js +76 -0
  246. package/dist/errors/index.d.ts +6 -0
  247. package/dist/errors/index.js +6 -0
  248. package/dist/errors/retry.d.ts +92 -0
  249. package/dist/errors/retry.js +323 -0
  250. package/dist/errors/types.d.ts +321 -0
  251. package/dist/errors/types.js +584 -0
  252. package/dist/index.d.ts +32 -0
  253. package/dist/index.js +32 -0
  254. package/dist/interview/dependency-resolver.d.ts +11 -0
  255. package/dist/interview/dependency-resolver.js +32 -0
  256. package/dist/interview/interviewer.d.ts +232 -0
  257. package/dist/interview/interviewer.js +1939 -0
  258. package/dist/interview/mock-response-generator.d.ts +7 -0
  259. package/dist/interview/mock-response-generator.js +102 -0
  260. package/dist/interview/orchestrator.d.ts +237 -0
  261. package/dist/interview/orchestrator.js +1296 -0
  262. package/dist/interview/rate-limiter.d.ts +15 -0
  263. package/dist/interview/rate-limiter.js +55 -0
  264. package/dist/interview/response-validator.d.ts +10 -0
  265. package/dist/interview/response-validator.js +132 -0
  266. package/dist/interview/schema-inferrer.d.ts +8 -0
  267. package/dist/interview/schema-inferrer.js +71 -0
  268. package/dist/interview/schema-test-generator.d.ts +71 -0
  269. package/dist/interview/schema-test-generator.js +834 -0
  270. package/dist/interview/smart-value-generator.d.ts +155 -0
  271. package/dist/interview/smart-value-generator.js +554 -0
  272. package/dist/interview/stateful-test-runner.d.ts +19 -0
  273. package/dist/interview/stateful-test-runner.js +106 -0
  274. package/dist/interview/types.d.ts +561 -0
  275. package/dist/interview/types.js +2 -0
  276. package/dist/llm/anthropic.d.ts +41 -0
  277. package/dist/llm/anthropic.js +355 -0
  278. package/dist/llm/client.d.ts +123 -0
  279. package/dist/llm/client.js +42 -0
  280. package/dist/llm/factory.d.ts +38 -0
  281. package/dist/llm/factory.js +145 -0
  282. package/dist/llm/fallback.d.ts +140 -0
  283. package/dist/llm/fallback.js +379 -0
  284. package/dist/llm/index.d.ts +18 -0
  285. package/dist/llm/index.js +15 -0
  286. package/dist/llm/ollama.d.ts +37 -0
  287. package/dist/llm/ollama.js +330 -0
  288. package/dist/llm/openai.d.ts +25 -0
  289. package/dist/llm/openai.js +320 -0
  290. package/dist/llm/token-budget.d.ts +161 -0
  291. package/dist/llm/token-budget.js +395 -0
  292. package/dist/logging/logger.d.ts +70 -0
  293. package/dist/logging/logger.js +130 -0
  294. package/dist/metrics/collector.d.ts +106 -0
  295. package/dist/metrics/collector.js +547 -0
  296. package/dist/metrics/index.d.ts +7 -0
  297. package/dist/metrics/index.js +7 -0
  298. package/dist/metrics/prometheus.d.ts +20 -0
  299. package/dist/metrics/prometheus.js +241 -0
  300. package/dist/metrics/types.d.ts +209 -0
  301. package/dist/metrics/types.js +5 -0
  302. package/dist/persona/builtins.d.ts +54 -0
  303. package/dist/persona/builtins.js +219 -0
  304. package/dist/persona/index.d.ts +8 -0
  305. package/dist/persona/index.js +8 -0
  306. package/dist/persona/loader.d.ts +30 -0
  307. package/dist/persona/loader.js +190 -0
  308. package/dist/persona/types.d.ts +144 -0
  309. package/dist/persona/types.js +5 -0
  310. package/dist/persona/validation.d.ts +94 -0
  311. package/dist/persona/validation.js +332 -0
  312. package/dist/prompts/index.d.ts +5 -0
  313. package/dist/prompts/index.js +5 -0
  314. package/dist/prompts/templates.d.ts +180 -0
  315. package/dist/prompts/templates.js +431 -0
  316. package/dist/registry/client.d.ts +49 -0
  317. package/dist/registry/client.js +191 -0
  318. package/dist/registry/index.d.ts +7 -0
  319. package/dist/registry/index.js +6 -0
  320. package/dist/registry/types.d.ts +140 -0
  321. package/dist/registry/types.js +6 -0
  322. package/dist/scenarios/evaluator.d.ts +43 -0
  323. package/dist/scenarios/evaluator.js +206 -0
  324. package/dist/scenarios/index.d.ts +10 -0
  325. package/dist/scenarios/index.js +9 -0
  326. package/dist/scenarios/loader.d.ts +20 -0
  327. package/dist/scenarios/loader.js +285 -0
  328. package/dist/scenarios/types.d.ts +153 -0
  329. package/dist/scenarios/types.js +8 -0
  330. package/dist/security/index.d.ts +17 -0
  331. package/dist/security/index.js +18 -0
  332. package/dist/security/payloads.d.ts +61 -0
  333. package/dist/security/payloads.js +268 -0
  334. package/dist/security/security-tester.d.ts +42 -0
  335. package/dist/security/security-tester.js +582 -0
  336. package/dist/security/types.d.ts +166 -0
  337. package/dist/security/types.js +8 -0
  338. package/dist/transport/base-transport.d.ts +59 -0
  339. package/dist/transport/base-transport.js +38 -0
  340. package/dist/transport/http-transport.d.ts +67 -0
  341. package/dist/transport/http-transport.js +238 -0
  342. package/dist/transport/mcp-client.d.ts +141 -0
  343. package/dist/transport/mcp-client.js +496 -0
  344. package/dist/transport/sse-transport.d.ts +88 -0
  345. package/dist/transport/sse-transport.js +316 -0
  346. package/dist/transport/stdio-transport.d.ts +43 -0
  347. package/dist/transport/stdio-transport.js +238 -0
  348. package/dist/transport/types.d.ts +125 -0
  349. package/dist/transport/types.js +16 -0
  350. package/dist/utils/concurrency.d.ts +123 -0
  351. package/dist/utils/concurrency.js +213 -0
  352. package/dist/utils/formatters.d.ts +16 -0
  353. package/dist/utils/formatters.js +37 -0
  354. package/dist/utils/index.d.ts +8 -0
  355. package/dist/utils/index.js +8 -0
  356. package/dist/utils/jsonpath.d.ts +87 -0
  357. package/dist/utils/jsonpath.js +326 -0
  358. package/dist/utils/markdown.d.ts +113 -0
  359. package/dist/utils/markdown.js +265 -0
  360. package/dist/utils/network.d.ts +14 -0
  361. package/dist/utils/network.js +17 -0
  362. package/dist/utils/sanitize.d.ts +92 -0
  363. package/dist/utils/sanitize.js +191 -0
  364. package/dist/utils/semantic.d.ts +194 -0
  365. package/dist/utils/semantic.js +1051 -0
  366. package/dist/utils/smart-truncate.d.ts +94 -0
  367. package/dist/utils/smart-truncate.js +361 -0
  368. package/dist/utils/timeout.d.ts +153 -0
  369. package/dist/utils/timeout.js +205 -0
  370. package/dist/utils/yaml-parser.d.ts +58 -0
  371. package/dist/utils/yaml-parser.js +86 -0
  372. package/dist/validation/index.d.ts +32 -0
  373. package/dist/validation/index.js +32 -0
  374. package/dist/validation/semantic-test-generator.d.ts +50 -0
  375. package/dist/validation/semantic-test-generator.js +176 -0
  376. package/dist/validation/semantic-types.d.ts +66 -0
  377. package/dist/validation/semantic-types.js +94 -0
  378. package/dist/validation/semantic-validator.d.ts +38 -0
  379. package/dist/validation/semantic-validator.js +340 -0
  380. package/dist/verification/index.d.ts +6 -0
  381. package/dist/verification/index.js +5 -0
  382. package/dist/verification/types.d.ts +133 -0
  383. package/dist/verification/types.js +5 -0
  384. package/dist/verification/verifier.d.ts +30 -0
  385. package/dist/verification/verifier.js +309 -0
  386. package/dist/version.d.ts +19 -0
  387. package/dist/version.js +48 -0
  388. package/dist/workflow/auto-generator.d.ts +27 -0
  389. package/dist/workflow/auto-generator.js +513 -0
  390. package/dist/workflow/discovery.d.ts +40 -0
  391. package/dist/workflow/discovery.js +195 -0
  392. package/dist/workflow/executor.d.ts +82 -0
  393. package/dist/workflow/executor.js +611 -0
  394. package/dist/workflow/index.d.ts +10 -0
  395. package/dist/workflow/index.js +10 -0
  396. package/dist/workflow/loader.d.ts +24 -0
  397. package/dist/workflow/loader.js +194 -0
  398. package/dist/workflow/state-tracker.d.ts +98 -0
  399. package/dist/workflow/state-tracker.js +424 -0
  400. package/dist/workflow/types.d.ts +337 -0
  401. package/dist/workflow/types.js +5 -0
  402. package/package.json +94 -0
  403. package/schemas/bellwether-check.schema.json +651 -0
@@ -0,0 +1,500 @@
1
+ /**
2
+ * Test command - the simplified, config-driven MCP server testing command.
3
+ *
4
+ * All settings are read from bellwether.yaml (created by `bellwether init`).
5
+ * The only optional argument is the server command, which can also be in config.
6
+ */
7
+ import { Command } from 'commander';
8
+ import { writeFileSync, existsSync, mkdirSync } from 'fs';
9
+ import { join } from 'path';
10
+ import { MCPClient } from '../../transport/mcp-client.js';
11
+ import { discover } from '../../discovery/discovery.js';
12
+ import { createLLMClient } from '../../llm/index.js';
13
+ import { Interviewer } from '../../interview/interviewer.js';
14
+ import { generateAgentsMd, generateJsonReport } from '../../docs/generator.js';
15
+ import { loadConfig, ConfigNotFoundError } from '../../config/loader.js';
16
+ import { validateConfigForTest } from '../../config/validator.js';
17
+ import { createBaseline, loadBaseline, compareBaselines, formatDiffText, } from '../../baseline/index.js';
18
+ import { CostTracker, estimateInterviewCost, estimateInterviewTime, formatCostAndTimeEstimate, suggestOptimizations, formatOptimizationSuggestions, } from '../../cost/index.js';
19
+ import { getMetricsCollector, resetMetricsCollector } from '../../metrics/collector.js';
20
+ import { FallbackLLMClient } from '../../llm/fallback.js';
21
+ import { getGlobalCache, resetGlobalCache } from '../../cache/response-cache.js';
22
+ import { INTERVIEW, WORKFLOW } from '../../constants.js';
23
+ import { InterviewProgressBar, formatStartupBanner } from '../utils/progress.js';
24
+ import { parsePersonas } from '../../persona/builtins.js';
25
+ import { loadScenariosFromFile, tryLoadDefaultScenarios, DEFAULT_SCENARIOS_FILE, } from '../../scenarios/index.js';
26
+ import { loadWorkflowsFromFile, tryLoadDefaultWorkflows, DEFAULT_WORKFLOWS_FILE, } from '../../workflow/loader.js';
27
+ import * as output from '../output.js';
28
+ import { StreamingDisplay } from '../output.js';
29
+ import { suppressLogs, restoreLogLevel } from '../../logging/logger.js';
30
+ /**
31
+ * Wrapper to parse personas with warning output.
32
+ */
33
+ function parsePersonasWithWarning(personaList) {
34
+ return parsePersonas(personaList, (unknownName, validNames) => {
35
+ output.warn(`Unknown persona: ${unknownName}. Available: ${validNames.join(', ')}`);
36
+ });
37
+ }
38
+ /**
39
+ * Extract server context from command and arguments.
40
+ */
41
+ function extractServerContextFromArgs(command, args) {
42
+ const context = {
43
+ allowedDirectories: [],
44
+ constraints: [],
45
+ hints: [],
46
+ };
47
+ const fullCommand = `${command} ${args.join(' ')}`.toLowerCase();
48
+ const pathArgs = args.filter((arg) => arg.startsWith('/') && !arg.startsWith('--'));
49
+ if (fullCommand.includes('filesystem') || fullCommand.includes('file-system')) {
50
+ context.allowedDirectories = pathArgs;
51
+ if (context.allowedDirectories.length > 0) {
52
+ context.hints.push(`Filesystem server with allowed directories: ${context.allowedDirectories.join(', ')}`);
53
+ }
54
+ context.constraints.push('Operations limited to specified directories');
55
+ }
56
+ else if (fullCommand.includes('postgres') || fullCommand.includes('mysql') || fullCommand.includes('sqlite')) {
57
+ context.hints.push('Database server - SQL operations expected');
58
+ context.constraints.push('Database operations only');
59
+ }
60
+ else if (fullCommand.includes('git')) {
61
+ context.allowedDirectories = pathArgs;
62
+ context.hints.push('Git server - repository operations expected');
63
+ }
64
+ else {
65
+ context.allowedDirectories = pathArgs;
66
+ }
67
+ return context;
68
+ }
69
+ /**
70
+ * Detect if running in a CI environment.
71
+ */
72
+ function isCI() {
73
+ return !!(process.env.CI ||
74
+ process.env.CONTINUOUS_INTEGRATION ||
75
+ process.env.GITHUB_ACTIONS ||
76
+ process.env.GITLAB_CI ||
77
+ process.env.CIRCLECI ||
78
+ process.env.JENKINS_URL ||
79
+ process.env.TRAVIS ||
80
+ process.env.BUILDKITE);
81
+ }
82
+ export const testCommand = new Command('test')
83
+ .description('Test an MCP server using settings from bellwether.yaml')
84
+ .argument('[server-command]', 'Server command (overrides config)')
85
+ .argument('[args...]', 'Server arguments')
86
+ .option('-c, --config <path>', 'Path to config file (default: ./bellwether.yaml)')
87
+ .action(async (serverCommandArg, serverArgs, options) => {
88
+ // Load configuration (required)
89
+ let config;
90
+ try {
91
+ config = loadConfig(options.config);
92
+ }
93
+ catch (error) {
94
+ if (error instanceof ConfigNotFoundError) {
95
+ output.error(error.message);
96
+ process.exit(1);
97
+ }
98
+ throw error;
99
+ }
100
+ // Determine server command (CLI arg overrides config)
101
+ const serverCommand = serverCommandArg || config.server.command;
102
+ const args = serverArgs.length > 0 ? serverArgs : config.server.args;
103
+ // Validate config for running tests
104
+ try {
105
+ validateConfigForTest(config, serverCommand);
106
+ }
107
+ catch (error) {
108
+ output.error(error instanceof Error ? error.message : String(error));
109
+ process.exit(1);
110
+ }
111
+ // Extract settings from config
112
+ const isContractMode = config.mode === 'contract';
113
+ const timeout = config.server.timeout;
114
+ const outputDir = config.output.dir;
115
+ const wantsJson = config.output.format === 'json' || config.output.format === 'both';
116
+ const cacheEnabled = config.cache.enabled;
117
+ const verbose = config.logging.verbose;
118
+ const logLevel = config.logging.level;
119
+ // Parse personas from config
120
+ const selectedPersonas = parsePersonasWithWarning(config.test.personas);
121
+ const maxQuestions = config.test.maxQuestionsPerTool;
122
+ const parallelPersonas = config.test.parallelPersonas;
123
+ // Get LLM settings
124
+ const provider = config.llm.provider;
125
+ const model = config.llm.model || undefined; // Empty string means use default
126
+ // Display startup banner
127
+ const banner = formatStartupBanner({
128
+ serverCommand: `${serverCommand} ${args.join(' ')}`,
129
+ provider,
130
+ model: model || 'default',
131
+ isQuality: false,
132
+ personas: selectedPersonas.map((p) => p.name),
133
+ questionsPerTool: maxQuestions,
134
+ });
135
+ output.info(banner);
136
+ output.newline();
137
+ if (isContractMode) {
138
+ output.info('Mode: contract (free, deterministic - no LLM calls)');
139
+ }
140
+ else {
141
+ output.info(`Mode: document (LLM-powered using ${provider})`);
142
+ }
143
+ output.newline();
144
+ // Initialize cost tracker
145
+ const costTracker = new CostTracker(model || 'default');
146
+ // Initialize metrics collector
147
+ resetMetricsCollector();
148
+ const metricsCollector = getMetricsCollector();
149
+ metricsCollector.startInterview();
150
+ // Initialize cache
151
+ resetGlobalCache();
152
+ const cache = getGlobalCache({ enabled: cacheEnabled });
153
+ if (cacheEnabled && verbose) {
154
+ output.info('Response caching enabled');
155
+ }
156
+ // Initialize MCP client
157
+ const mcpClient = new MCPClient({
158
+ timeout,
159
+ debug: logLevel === 'debug',
160
+ transport: 'stdio',
161
+ });
162
+ // Initialize LLM client (only for document mode)
163
+ let llmClient;
164
+ if (!isContractMode) {
165
+ const onUsageCallback = (inputTokens, outputTokens) => {
166
+ costTracker.addUsage(inputTokens, outputTokens);
167
+ metricsCollector.recordTokenUsage(provider, model || 'default', inputTokens, outputTokens, 'llm_call');
168
+ };
169
+ try {
170
+ // Use fallback client for robustness
171
+ llmClient = new FallbackLLMClient({
172
+ providers: [{ provider, model, baseUrl: provider === 'ollama' ? config.llm.ollama.baseUrl : undefined }],
173
+ useOllamaFallback: true,
174
+ onUsage: onUsageCallback,
175
+ });
176
+ }
177
+ catch (error) {
178
+ output.error('Failed to initialize LLM client: ' + (error instanceof Error ? error.message : String(error)));
179
+ output.error(`\nProvider: ${provider}`);
180
+ output.error('Make sure the appropriate API key environment variable is set:');
181
+ output.error(' - OpenAI: OPENAI_API_KEY');
182
+ output.error(' - Anthropic: ANTHROPIC_API_KEY');
183
+ output.error(' - Ollama: No API key needed (ensure Ollama is running)');
184
+ process.exit(1);
185
+ }
186
+ }
187
+ // For contract mode, create a minimal LLM client that won't be used
188
+ if (!llmClient) {
189
+ llmClient = createLLMClient({
190
+ provider: 'ollama',
191
+ model: 'llama3.2', // Default model; not actually used in contract mode
192
+ baseUrl: 'http://localhost:11434',
193
+ });
194
+ }
195
+ try {
196
+ // Connect to MCP server
197
+ output.info('Connecting to MCP server...');
198
+ await mcpClient.connect(serverCommand, args, config.server.env);
199
+ // Discovery phase
200
+ output.info('Discovering capabilities...');
201
+ const discovery = await discover(mcpClient, serverCommand, args);
202
+ const resourceCount = discovery.resources?.length ?? 0;
203
+ const discoveryParts = [`${discovery.tools.length} tools`, `${discovery.prompts.length} prompts`];
204
+ if (resourceCount > 0) {
205
+ discoveryParts.push(`${resourceCount} resources`);
206
+ }
207
+ output.info(`Found ${discoveryParts.join(', ')}\n`);
208
+ // Update metrics
209
+ metricsCollector.updateInterviewCounters({
210
+ toolsDiscovered: discovery.tools.length,
211
+ personasUsed: selectedPersonas.length,
212
+ });
213
+ if (discovery.tools.length === 0) {
214
+ output.info('No tools found. Nothing to test.');
215
+ metricsCollector.endInterview();
216
+ await mcpClient.disconnect();
217
+ return;
218
+ }
219
+ // Show cost/time estimate (unless in contract mode or CI)
220
+ if (!isContractMode && !isCI()) {
221
+ const costEstimate = estimateInterviewCost(model || 'default', discovery.tools.length, maxQuestions, selectedPersonas.length);
222
+ const timeEstimate = estimateInterviewTime(discovery.tools.length, maxQuestions, selectedPersonas.length, parallelPersonas, provider, discovery.prompts.length, resourceCount, false);
223
+ output.info(formatCostAndTimeEstimate(costEstimate, timeEstimate));
224
+ // Show optimization suggestions
225
+ const hasScenariosFile = !!(config.scenarios.path || existsSync(join(outputDir, DEFAULT_SCENARIOS_FILE)));
226
+ const suggestions = suggestOptimizations({
227
+ estimatedCost: costEstimate.costUSD,
228
+ toolCount: discovery.tools.length,
229
+ personaCount: selectedPersonas.length,
230
+ isParallelPersonas: parallelPersonas,
231
+ isPremiumModel: false,
232
+ isUsingCiPreset: isContractMode,
233
+ hasScenariosFile,
234
+ });
235
+ if (suggestions.length > 0) {
236
+ output.newline();
237
+ output.info(formatOptimizationSuggestions(suggestions));
238
+ }
239
+ output.newline();
240
+ }
241
+ // Load custom scenarios
242
+ let customScenarios;
243
+ if (config.scenarios.path) {
244
+ try {
245
+ customScenarios = loadScenariosFromFile(config.scenarios.path);
246
+ output.info(`Loaded ${customScenarios.toolScenarios.length} tool scenarios from ${config.scenarios.path}`);
247
+ }
248
+ catch (error) {
249
+ output.error(`Failed to load scenarios: ${error instanceof Error ? error.message : error}`);
250
+ process.exit(1);
251
+ }
252
+ }
253
+ else if (!isContractMode) {
254
+ const defaultScenarios = tryLoadDefaultScenarios(outputDir);
255
+ if (defaultScenarios) {
256
+ customScenarios = defaultScenarios;
257
+ output.info(`Auto-loaded ${customScenarios.toolScenarios.length} scenarios from ${DEFAULT_SCENARIOS_FILE}`);
258
+ }
259
+ }
260
+ // Build workflow configuration
261
+ let workflowConfig;
262
+ if (config.workflows.path || config.workflows.discover) {
263
+ workflowConfig = {
264
+ discoverWorkflows: config.workflows.discover,
265
+ maxDiscoveredWorkflows: WORKFLOW.MAX_DISCOVERED_WORKFLOWS,
266
+ enableStateTracking: config.workflows.trackState,
267
+ };
268
+ if (config.workflows.path) {
269
+ try {
270
+ const workflows = loadWorkflowsFromFile(config.workflows.path);
271
+ workflowConfig.workflows = workflows;
272
+ workflowConfig.workflowsFile = config.workflows.path;
273
+ output.info(`Loaded ${workflows.length} workflow(s) from ${config.workflows.path}`);
274
+ }
275
+ catch (error) {
276
+ output.error(`Failed to load workflows: ${error instanceof Error ? error.message : error}`);
277
+ process.exit(1);
278
+ }
279
+ }
280
+ }
281
+ else if (!isContractMode) {
282
+ const defaultWorkflows = tryLoadDefaultWorkflows(outputDir);
283
+ if (defaultWorkflows && defaultWorkflows.length > 0) {
284
+ workflowConfig = {
285
+ discoverWorkflows: false,
286
+ maxDiscoveredWorkflows: WORKFLOW.MAX_DISCOVERED_WORKFLOWS,
287
+ enableStateTracking: config.workflows.trackState,
288
+ workflows: defaultWorkflows,
289
+ workflowsFile: `${outputDir}/${DEFAULT_WORKFLOWS_FILE}`,
290
+ };
291
+ output.info(`Auto-loaded ${defaultWorkflows.length} workflow(s) from ${DEFAULT_WORKFLOWS_FILE}`);
292
+ }
293
+ }
294
+ // Set up streaming display
295
+ let streamingDisplay = null;
296
+ let streamingCallbacks;
297
+ if (!isContractMode && !isCI() && logLevel !== 'silent') {
298
+ suppressLogs();
299
+ streamingDisplay = new StreamingDisplay({ style: 'dim', maxWidth: 100 });
300
+ streamingCallbacks = {
301
+ onStart: (operation) => {
302
+ const parts = operation.split(':');
303
+ const opType = parts[0];
304
+ const context = parts[1];
305
+ let prefix = '';
306
+ switch (opType) {
307
+ case 'generate-questions':
308
+ prefix = context ? `\n Generating questions for ${context}... ` : '\n Generating questions... ';
309
+ break;
310
+ case 'analyze':
311
+ prefix = context ? `\n Analyzing ${context}... ` : '\n Analyzing... ';
312
+ break;
313
+ case 'synthesize-tool':
314
+ prefix = context ? `\n Synthesizing profile for ${context}... ` : '\n Synthesizing profile... ';
315
+ break;
316
+ case 'synthesize-overall':
317
+ prefix = '\n Synthesizing overall findings... ';
318
+ break;
319
+ default:
320
+ prefix = '\n Processing... ';
321
+ }
322
+ streamingDisplay?.start(prefix);
323
+ },
324
+ onChunk: (chunk) => {
325
+ streamingDisplay?.write(chunk);
326
+ },
327
+ onComplete: () => {
328
+ streamingDisplay?.finish(' [done]');
329
+ },
330
+ onError: (error) => {
331
+ streamingDisplay?.abort(`[error: ${error.message}]`);
332
+ },
333
+ };
334
+ }
335
+ // Create interviewer
336
+ const fullServerCommand = `${serverCommand} ${args.join(' ')}`.trim();
337
+ const interviewer = new Interviewer(llmClient, {
338
+ maxQuestionsPerTool: maxQuestions,
339
+ timeout,
340
+ skipErrorTests: config.test.skipErrorTests,
341
+ model: model || 'default',
342
+ personas: selectedPersonas,
343
+ customScenarios,
344
+ customScenariosOnly: config.scenarios.only,
345
+ enableStreaming: !!streamingCallbacks,
346
+ streamingCallbacks,
347
+ parallelPersonas,
348
+ personaConcurrency: INTERVIEW.DEFAULT_PERSONA_CONCURRENCY,
349
+ cache,
350
+ workflowConfig,
351
+ contractOnly: isContractMode,
352
+ serverCommand: fullServerCommand,
353
+ });
354
+ // Extract server context
355
+ const serverContext = extractServerContextFromArgs(serverCommand, args);
356
+ if (serverContext.allowedDirectories && serverContext.allowedDirectories.length > 0) {
357
+ output.info(`Detected allowed directories: ${serverContext.allowedDirectories.join(', ')}`);
358
+ }
359
+ interviewer.setServerContext(serverContext);
360
+ // Set up progress display
361
+ const progressBar = new InterviewProgressBar({ enabled: !verbose && !streamingCallbacks });
362
+ const progressCallback = (progress) => {
363
+ if (verbose) {
364
+ switch (progress.phase) {
365
+ case 'starting':
366
+ output.info('Starting test...');
367
+ progressBar.start(progress.totalTools, progress.totalPersonas, progress.totalPrompts ?? 0, progress.totalResources ?? 0);
368
+ break;
369
+ case 'interviewing':
370
+ output.info(`[${progress.currentPersona}] Testing: ${progress.currentTool} (${progress.toolsCompleted + 1}/${progress.totalTools})`);
371
+ break;
372
+ case 'synthesizing':
373
+ output.info('Synthesizing findings...');
374
+ break;
375
+ case 'complete':
376
+ output.info('Test complete!');
377
+ break;
378
+ }
379
+ }
380
+ else {
381
+ if (progress.phase === 'starting') {
382
+ progressBar.start(progress.totalTools, progress.totalPersonas, progress.totalPrompts ?? 0, progress.totalResources ?? 0);
383
+ }
384
+ else if (['interviewing', 'prompts', 'resources', 'workflows'].includes(progress.phase)) {
385
+ progressBar.update(progress);
386
+ }
387
+ else if (progress.phase === 'complete' || progress.phase === 'synthesizing') {
388
+ progressBar.stop();
389
+ }
390
+ }
391
+ };
392
+ output.info('Starting test...\n');
393
+ const result = await interviewer.interview(mcpClient, discovery, progressCallback);
394
+ progressBar.stop();
395
+ if (!verbose) {
396
+ output.newline();
397
+ }
398
+ // Generate documentation
399
+ output.info('Generating documentation...');
400
+ mkdirSync(outputDir, { recursive: true });
401
+ const agentsMd = generateAgentsMd(result);
402
+ // Use CONTRACT.md for contract mode, AGENTS.md for document mode
403
+ const outputFileName = isContractMode ? 'CONTRACT.md' : 'AGENTS.md';
404
+ const agentsMdPath = join(outputDir, outputFileName);
405
+ writeFileSync(agentsMdPath, agentsMd);
406
+ output.info(`Written: ${agentsMdPath}`);
407
+ if (wantsJson) {
408
+ const jsonReport = generateJsonReport(result);
409
+ const jsonPath = join(outputDir, 'bellwether-report.json');
410
+ writeFileSync(jsonPath, jsonReport);
411
+ output.info(`Written: ${jsonPath}`);
412
+ }
413
+ // End metrics
414
+ metricsCollector.endInterview();
415
+ output.info('\nTest complete!');
416
+ output.info(`Duration: ${(result.metadata.durationMs / 1000).toFixed(1)}s`);
417
+ output.info(`Tools verified: ${result.toolProfiles.length}`);
418
+ // Display scenario results
419
+ if (result.scenarioResults && result.scenarioResults.length > 0) {
420
+ const passed = result.scenarioResults.filter((r) => r.passed).length;
421
+ const failed = result.scenarioResults.length - passed;
422
+ const statusIcon = failed === 0 ? '\u2713' : '\u2717';
423
+ output.info(`\nCustom scenarios: ${passed}/${result.scenarioResults.length} passed ${statusIcon}`);
424
+ if (failed > 0) {
425
+ output.info('\nFailed scenarios:');
426
+ for (const scenarioResult of result.scenarioResults.filter((r) => !r.passed)) {
427
+ const scenario = scenarioResult.scenario;
428
+ const toolOrPrompt = 'tool' in scenario ? scenario.tool : scenario.prompt;
429
+ output.info(` - ${toolOrPrompt}: ${scenario.description}`);
430
+ if (scenarioResult.error) {
431
+ output.info(` Error: ${scenarioResult.error}`);
432
+ }
433
+ }
434
+ }
435
+ }
436
+ // Display workflow results
437
+ if (result.workflowResults && result.workflowResults.length > 0) {
438
+ const successful = result.workflowResults.filter((wr) => wr.success).length;
439
+ const failed = result.workflowResults.length - successful;
440
+ const statusIcon = failed === 0 ? '\u2713' : '\u2717';
441
+ output.info(`\nWorkflows: ${successful}/${result.workflowResults.length} passed ${statusIcon}`);
442
+ if (failed > 0) {
443
+ output.info('\nFailed workflows:');
444
+ for (const wr of result.workflowResults.filter((w) => !w.success)) {
445
+ output.info(` - ${wr.workflow.name}: ${wr.failureReason ?? 'Unknown error'}`);
446
+ }
447
+ }
448
+ }
449
+ // Handle baseline comparison from config
450
+ if (config.baseline.comparePath) {
451
+ const compareBaselinePath = config.baseline.comparePath;
452
+ if (!existsSync(compareBaselinePath)) {
453
+ output.error(`\nBaseline file not found: ${compareBaselinePath}`);
454
+ process.exit(1);
455
+ }
456
+ const previousBaseline = loadBaseline(compareBaselinePath);
457
+ const baselineMode = isContractMode ? 'contract' : 'document';
458
+ const currentBaseline = createBaseline(result, fullServerCommand, baselineMode);
459
+ const diff = compareBaselines(previousBaseline, currentBaseline, {});
460
+ output.info('\n--- Drift Report ---');
461
+ output.info(formatDiffText(diff));
462
+ if (config.baseline.failOnDrift) {
463
+ if (diff.severity === 'breaking') {
464
+ output.error('\nBreaking changes detected!');
465
+ process.exit(1);
466
+ }
467
+ else if (diff.severity === 'warning') {
468
+ output.warn('\nWarning-level changes detected.');
469
+ process.exit(1);
470
+ }
471
+ }
472
+ }
473
+ }
474
+ catch (error) {
475
+ const errorMessage = error instanceof Error ? error.message : String(error);
476
+ output.error('\n--- Test Failed ---');
477
+ output.error(`Error: ${errorMessage}`);
478
+ if (errorMessage.includes('ECONNREFUSED') || errorMessage.includes('Connection refused')) {
479
+ output.error('\nPossible causes:');
480
+ output.error(' - The MCP server is not running');
481
+ output.error(' - The server address/port is incorrect');
482
+ }
483
+ else if (errorMessage.includes('timeout') || errorMessage.includes('Timeout')) {
484
+ output.error('\nPossible causes:');
485
+ output.error(' - The MCP server is taking too long to respond');
486
+ output.error(' - Increase server.timeout in bellwether.yaml');
487
+ }
488
+ else if (errorMessage.includes('ENOENT') || errorMessage.includes('not found')) {
489
+ output.error('\nPossible causes:');
490
+ output.error(' - The server command was not found');
491
+ output.error(' - Check that the command is installed and in PATH');
492
+ }
493
+ process.exit(1);
494
+ }
495
+ finally {
496
+ restoreLogLevel();
497
+ await mcpClient.disconnect();
498
+ }
499
+ });
500
+ //# sourceMappingURL=test.js.map
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Upload command for uploading baselines to Bellwether Cloud.
3
+ *
4
+ * Can read baseline path from bellwether.yaml config.
5
+ */
6
+ import { Command } from 'commander';
7
+ export declare const uploadCommand: Command;
8
+ //# sourceMappingURL=upload.d.ts.map