@dotsetlabs/bellwether 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (403) hide show
  1. package/CHANGELOG.md +291 -0
  2. package/LICENSE +21 -0
  3. package/README.md +739 -0
  4. package/dist/auth/credentials.d.ts +64 -0
  5. package/dist/auth/credentials.js +218 -0
  6. package/dist/auth/index.d.ts +6 -0
  7. package/dist/auth/index.js +6 -0
  8. package/dist/auth/keychain.d.ts +64 -0
  9. package/dist/auth/keychain.js +268 -0
  10. package/dist/baseline/ab-testing.d.ts +80 -0
  11. package/dist/baseline/ab-testing.js +236 -0
  12. package/dist/baseline/ai-compatibility-scorer.d.ts +95 -0
  13. package/dist/baseline/ai-compatibility-scorer.js +606 -0
  14. package/dist/baseline/calibration.d.ts +77 -0
  15. package/dist/baseline/calibration.js +136 -0
  16. package/dist/baseline/category-matching.d.ts +85 -0
  17. package/dist/baseline/category-matching.js +289 -0
  18. package/dist/baseline/change-impact-analyzer.d.ts +98 -0
  19. package/dist/baseline/change-impact-analyzer.js +592 -0
  20. package/dist/baseline/comparator.d.ts +64 -0
  21. package/dist/baseline/comparator.js +916 -0
  22. package/dist/baseline/confidence.d.ts +55 -0
  23. package/dist/baseline/confidence.js +122 -0
  24. package/dist/baseline/converter.d.ts +61 -0
  25. package/dist/baseline/converter.js +585 -0
  26. package/dist/baseline/dependency-analyzer.d.ts +89 -0
  27. package/dist/baseline/dependency-analyzer.js +567 -0
  28. package/dist/baseline/deprecation-tracker.d.ts +133 -0
  29. package/dist/baseline/deprecation-tracker.js +322 -0
  30. package/dist/baseline/diff.d.ts +55 -0
  31. package/dist/baseline/diff.js +1584 -0
  32. package/dist/baseline/documentation-scorer.d.ts +205 -0
  33. package/dist/baseline/documentation-scorer.js +466 -0
  34. package/dist/baseline/embeddings.d.ts +118 -0
  35. package/dist/baseline/embeddings.js +251 -0
  36. package/dist/baseline/error-analyzer.d.ts +198 -0
  37. package/dist/baseline/error-analyzer.js +721 -0
  38. package/dist/baseline/evaluation/evaluator.d.ts +42 -0
  39. package/dist/baseline/evaluation/evaluator.js +323 -0
  40. package/dist/baseline/evaluation/expanded-dataset.d.ts +45 -0
  41. package/dist/baseline/evaluation/expanded-dataset.js +1164 -0
  42. package/dist/baseline/evaluation/golden-dataset.d.ts +58 -0
  43. package/dist/baseline/evaluation/golden-dataset.js +717 -0
  44. package/dist/baseline/evaluation/index.d.ts +15 -0
  45. package/dist/baseline/evaluation/index.js +15 -0
  46. package/dist/baseline/evaluation/types.d.ts +186 -0
  47. package/dist/baseline/evaluation/types.js +8 -0
  48. package/dist/baseline/external-dependency-detector.d.ts +181 -0
  49. package/dist/baseline/external-dependency-detector.js +524 -0
  50. package/dist/baseline/golden-output.d.ts +162 -0
  51. package/dist/baseline/golden-output.js +636 -0
  52. package/dist/baseline/health-scorer.d.ts +174 -0
  53. package/dist/baseline/health-scorer.js +451 -0
  54. package/dist/baseline/incremental-checker.d.ts +97 -0
  55. package/dist/baseline/incremental-checker.js +174 -0
  56. package/dist/baseline/index.d.ts +31 -0
  57. package/dist/baseline/index.js +42 -0
  58. package/dist/baseline/migration-generator.d.ts +137 -0
  59. package/dist/baseline/migration-generator.js +554 -0
  60. package/dist/baseline/migrations.d.ts +60 -0
  61. package/dist/baseline/migrations.js +197 -0
  62. package/dist/baseline/performance-tracker.d.ts +214 -0
  63. package/dist/baseline/performance-tracker.js +577 -0
  64. package/dist/baseline/pr-comment-generator.d.ts +117 -0
  65. package/dist/baseline/pr-comment-generator.js +546 -0
  66. package/dist/baseline/response-fingerprint.d.ts +127 -0
  67. package/dist/baseline/response-fingerprint.js +728 -0
  68. package/dist/baseline/response-schema-tracker.d.ts +129 -0
  69. package/dist/baseline/response-schema-tracker.js +420 -0
  70. package/dist/baseline/risk-scorer.d.ts +54 -0
  71. package/dist/baseline/risk-scorer.js +434 -0
  72. package/dist/baseline/saver.d.ts +89 -0
  73. package/dist/baseline/saver.js +554 -0
  74. package/dist/baseline/scenario-generator.d.ts +151 -0
  75. package/dist/baseline/scenario-generator.js +905 -0
  76. package/dist/baseline/schema-compare.d.ts +86 -0
  77. package/dist/baseline/schema-compare.js +557 -0
  78. package/dist/baseline/schema-evolution.d.ts +189 -0
  79. package/dist/baseline/schema-evolution.js +467 -0
  80. package/dist/baseline/semantic.d.ts +203 -0
  81. package/dist/baseline/semantic.js +908 -0
  82. package/dist/baseline/synonyms.d.ts +60 -0
  83. package/dist/baseline/synonyms.js +386 -0
  84. package/dist/baseline/telemetry.d.ts +165 -0
  85. package/dist/baseline/telemetry.js +294 -0
  86. package/dist/baseline/test-pruner.d.ts +120 -0
  87. package/dist/baseline/test-pruner.js +387 -0
  88. package/dist/baseline/types.d.ts +449 -0
  89. package/dist/baseline/types.js +5 -0
  90. package/dist/baseline/version.d.ts +138 -0
  91. package/dist/baseline/version.js +206 -0
  92. package/dist/cache/index.d.ts +5 -0
  93. package/dist/cache/index.js +5 -0
  94. package/dist/cache/response-cache.d.ts +151 -0
  95. package/dist/cache/response-cache.js +287 -0
  96. package/dist/ci/index.d.ts +60 -0
  97. package/dist/ci/index.js +342 -0
  98. package/dist/cli/commands/auth.d.ts +12 -0
  99. package/dist/cli/commands/auth.js +352 -0
  100. package/dist/cli/commands/badge.d.ts +3 -0
  101. package/dist/cli/commands/badge.js +74 -0
  102. package/dist/cli/commands/baseline-accept.d.ts +15 -0
  103. package/dist/cli/commands/baseline-accept.js +178 -0
  104. package/dist/cli/commands/baseline-migrate.d.ts +12 -0
  105. package/dist/cli/commands/baseline-migrate.js +164 -0
  106. package/dist/cli/commands/baseline.d.ts +14 -0
  107. package/dist/cli/commands/baseline.js +449 -0
  108. package/dist/cli/commands/beta.d.ts +10 -0
  109. package/dist/cli/commands/beta.js +231 -0
  110. package/dist/cli/commands/check.d.ts +11 -0
  111. package/dist/cli/commands/check.js +820 -0
  112. package/dist/cli/commands/cloud/badge.d.ts +3 -0
  113. package/dist/cli/commands/cloud/badge.js +74 -0
  114. package/dist/cli/commands/cloud/diff.d.ts +6 -0
  115. package/dist/cli/commands/cloud/diff.js +79 -0
  116. package/dist/cli/commands/cloud/history.d.ts +6 -0
  117. package/dist/cli/commands/cloud/history.js +102 -0
  118. package/dist/cli/commands/cloud/link.d.ts +9 -0
  119. package/dist/cli/commands/cloud/link.js +119 -0
  120. package/dist/cli/commands/cloud/login.d.ts +7 -0
  121. package/dist/cli/commands/cloud/login.js +499 -0
  122. package/dist/cli/commands/cloud/projects.d.ts +6 -0
  123. package/dist/cli/commands/cloud/projects.js +44 -0
  124. package/dist/cli/commands/cloud/shared.d.ts +7 -0
  125. package/dist/cli/commands/cloud/shared.js +42 -0
  126. package/dist/cli/commands/cloud/teams.d.ts +8 -0
  127. package/dist/cli/commands/cloud/teams.js +169 -0
  128. package/dist/cli/commands/cloud/upload.d.ts +8 -0
  129. package/dist/cli/commands/cloud/upload.js +181 -0
  130. package/dist/cli/commands/contract.d.ts +11 -0
  131. package/dist/cli/commands/contract.js +280 -0
  132. package/dist/cli/commands/discover.d.ts +3 -0
  133. package/dist/cli/commands/discover.js +82 -0
  134. package/dist/cli/commands/eval.d.ts +9 -0
  135. package/dist/cli/commands/eval.js +187 -0
  136. package/dist/cli/commands/explore.d.ts +11 -0
  137. package/dist/cli/commands/explore.js +437 -0
  138. package/dist/cli/commands/feedback.d.ts +9 -0
  139. package/dist/cli/commands/feedback.js +174 -0
  140. package/dist/cli/commands/golden.d.ts +12 -0
  141. package/dist/cli/commands/golden.js +407 -0
  142. package/dist/cli/commands/history.d.ts +10 -0
  143. package/dist/cli/commands/history.js +202 -0
  144. package/dist/cli/commands/init.d.ts +9 -0
  145. package/dist/cli/commands/init.js +219 -0
  146. package/dist/cli/commands/interview.d.ts +3 -0
  147. package/dist/cli/commands/interview.js +903 -0
  148. package/dist/cli/commands/link.d.ts +10 -0
  149. package/dist/cli/commands/link.js +169 -0
  150. package/dist/cli/commands/login.d.ts +7 -0
  151. package/dist/cli/commands/login.js +499 -0
  152. package/dist/cli/commands/preset.d.ts +33 -0
  153. package/dist/cli/commands/preset.js +297 -0
  154. package/dist/cli/commands/profile.d.ts +33 -0
  155. package/dist/cli/commands/profile.js +286 -0
  156. package/dist/cli/commands/registry.d.ts +11 -0
  157. package/dist/cli/commands/registry.js +146 -0
  158. package/dist/cli/commands/shared.d.ts +79 -0
  159. package/dist/cli/commands/shared.js +196 -0
  160. package/dist/cli/commands/teams.d.ts +8 -0
  161. package/dist/cli/commands/teams.js +169 -0
  162. package/dist/cli/commands/test.d.ts +9 -0
  163. package/dist/cli/commands/test.js +500 -0
  164. package/dist/cli/commands/upload.d.ts +8 -0
  165. package/dist/cli/commands/upload.js +223 -0
  166. package/dist/cli/commands/validate-config.d.ts +6 -0
  167. package/dist/cli/commands/validate-config.js +35 -0
  168. package/dist/cli/commands/verify.d.ts +11 -0
  169. package/dist/cli/commands/verify.js +283 -0
  170. package/dist/cli/commands/watch.d.ts +12 -0
  171. package/dist/cli/commands/watch.js +253 -0
  172. package/dist/cli/index.d.ts +3 -0
  173. package/dist/cli/index.js +178 -0
  174. package/dist/cli/interactive.d.ts +47 -0
  175. package/dist/cli/interactive.js +216 -0
  176. package/dist/cli/output/terminal-reporter.d.ts +19 -0
  177. package/dist/cli/output/terminal-reporter.js +104 -0
  178. package/dist/cli/output.d.ts +226 -0
  179. package/dist/cli/output.js +438 -0
  180. package/dist/cli/utils/env.d.ts +5 -0
  181. package/dist/cli/utils/env.js +14 -0
  182. package/dist/cli/utils/progress.d.ts +59 -0
  183. package/dist/cli/utils/progress.js +206 -0
  184. package/dist/cli/utils/server-context.d.ts +10 -0
  185. package/dist/cli/utils/server-context.js +36 -0
  186. package/dist/cloud/auth.d.ts +144 -0
  187. package/dist/cloud/auth.js +374 -0
  188. package/dist/cloud/client.d.ts +24 -0
  189. package/dist/cloud/client.js +65 -0
  190. package/dist/cloud/http-client.d.ts +38 -0
  191. package/dist/cloud/http-client.js +215 -0
  192. package/dist/cloud/index.d.ts +23 -0
  193. package/dist/cloud/index.js +25 -0
  194. package/dist/cloud/mock-client.d.ts +107 -0
  195. package/dist/cloud/mock-client.js +545 -0
  196. package/dist/cloud/types.d.ts +515 -0
  197. package/dist/cloud/types.js +15 -0
  198. package/dist/config/defaults.d.ts +160 -0
  199. package/dist/config/defaults.js +169 -0
  200. package/dist/config/loader.d.ts +24 -0
  201. package/dist/config/loader.js +122 -0
  202. package/dist/config/template.d.ts +42 -0
  203. package/dist/config/template.js +647 -0
  204. package/dist/config/validator.d.ts +2112 -0
  205. package/dist/config/validator.js +658 -0
  206. package/dist/constants/cloud.d.ts +107 -0
  207. package/dist/constants/cloud.js +110 -0
  208. package/dist/constants/core.d.ts +521 -0
  209. package/dist/constants/core.js +556 -0
  210. package/dist/constants/testing.d.ts +1283 -0
  211. package/dist/constants/testing.js +1568 -0
  212. package/dist/constants.d.ts +10 -0
  213. package/dist/constants.js +10 -0
  214. package/dist/contract/index.d.ts +6 -0
  215. package/dist/contract/index.js +5 -0
  216. package/dist/contract/validator.d.ts +177 -0
  217. package/dist/contract/validator.js +574 -0
  218. package/dist/cost/index.d.ts +6 -0
  219. package/dist/cost/index.js +5 -0
  220. package/dist/cost/tracker.d.ts +134 -0
  221. package/dist/cost/tracker.js +313 -0
  222. package/dist/discovery/discovery.d.ts +16 -0
  223. package/dist/discovery/discovery.js +173 -0
  224. package/dist/discovery/types.d.ts +51 -0
  225. package/dist/discovery/types.js +2 -0
  226. package/dist/docs/agents.d.ts +3 -0
  227. package/dist/docs/agents.js +995 -0
  228. package/dist/docs/contract.d.ts +51 -0
  229. package/dist/docs/contract.js +1681 -0
  230. package/dist/docs/generator.d.ts +4 -0
  231. package/dist/docs/generator.js +4 -0
  232. package/dist/docs/html-reporter.d.ts +9 -0
  233. package/dist/docs/html-reporter.js +757 -0
  234. package/dist/docs/index.d.ts +10 -0
  235. package/dist/docs/index.js +11 -0
  236. package/dist/docs/junit-reporter.d.ts +18 -0
  237. package/dist/docs/junit-reporter.js +210 -0
  238. package/dist/docs/report.d.ts +14 -0
  239. package/dist/docs/report.js +44 -0
  240. package/dist/docs/sarif-reporter.d.ts +19 -0
  241. package/dist/docs/sarif-reporter.js +335 -0
  242. package/dist/docs/shared.d.ts +35 -0
  243. package/dist/docs/shared.js +162 -0
  244. package/dist/docs/templates.d.ts +12 -0
  245. package/dist/docs/templates.js +76 -0
  246. package/dist/errors/index.d.ts +6 -0
  247. package/dist/errors/index.js +6 -0
  248. package/dist/errors/retry.d.ts +92 -0
  249. package/dist/errors/retry.js +323 -0
  250. package/dist/errors/types.d.ts +321 -0
  251. package/dist/errors/types.js +584 -0
  252. package/dist/index.d.ts +32 -0
  253. package/dist/index.js +32 -0
  254. package/dist/interview/dependency-resolver.d.ts +11 -0
  255. package/dist/interview/dependency-resolver.js +32 -0
  256. package/dist/interview/interviewer.d.ts +232 -0
  257. package/dist/interview/interviewer.js +1939 -0
  258. package/dist/interview/mock-response-generator.d.ts +7 -0
  259. package/dist/interview/mock-response-generator.js +102 -0
  260. package/dist/interview/orchestrator.d.ts +237 -0
  261. package/dist/interview/orchestrator.js +1296 -0
  262. package/dist/interview/rate-limiter.d.ts +15 -0
  263. package/dist/interview/rate-limiter.js +55 -0
  264. package/dist/interview/response-validator.d.ts +10 -0
  265. package/dist/interview/response-validator.js +132 -0
  266. package/dist/interview/schema-inferrer.d.ts +8 -0
  267. package/dist/interview/schema-inferrer.js +71 -0
  268. package/dist/interview/schema-test-generator.d.ts +71 -0
  269. package/dist/interview/schema-test-generator.js +834 -0
  270. package/dist/interview/smart-value-generator.d.ts +155 -0
  271. package/dist/interview/smart-value-generator.js +554 -0
  272. package/dist/interview/stateful-test-runner.d.ts +19 -0
  273. package/dist/interview/stateful-test-runner.js +106 -0
  274. package/dist/interview/types.d.ts +561 -0
  275. package/dist/interview/types.js +2 -0
  276. package/dist/llm/anthropic.d.ts +41 -0
  277. package/dist/llm/anthropic.js +355 -0
  278. package/dist/llm/client.d.ts +123 -0
  279. package/dist/llm/client.js +42 -0
  280. package/dist/llm/factory.d.ts +38 -0
  281. package/dist/llm/factory.js +145 -0
  282. package/dist/llm/fallback.d.ts +140 -0
  283. package/dist/llm/fallback.js +379 -0
  284. package/dist/llm/index.d.ts +18 -0
  285. package/dist/llm/index.js +15 -0
  286. package/dist/llm/ollama.d.ts +37 -0
  287. package/dist/llm/ollama.js +330 -0
  288. package/dist/llm/openai.d.ts +25 -0
  289. package/dist/llm/openai.js +320 -0
  290. package/dist/llm/token-budget.d.ts +161 -0
  291. package/dist/llm/token-budget.js +395 -0
  292. package/dist/logging/logger.d.ts +70 -0
  293. package/dist/logging/logger.js +130 -0
  294. package/dist/metrics/collector.d.ts +106 -0
  295. package/dist/metrics/collector.js +547 -0
  296. package/dist/metrics/index.d.ts +7 -0
  297. package/dist/metrics/index.js +7 -0
  298. package/dist/metrics/prometheus.d.ts +20 -0
  299. package/dist/metrics/prometheus.js +241 -0
  300. package/dist/metrics/types.d.ts +209 -0
  301. package/dist/metrics/types.js +5 -0
  302. package/dist/persona/builtins.d.ts +54 -0
  303. package/dist/persona/builtins.js +219 -0
  304. package/dist/persona/index.d.ts +8 -0
  305. package/dist/persona/index.js +8 -0
  306. package/dist/persona/loader.d.ts +30 -0
  307. package/dist/persona/loader.js +190 -0
  308. package/dist/persona/types.d.ts +144 -0
  309. package/dist/persona/types.js +5 -0
  310. package/dist/persona/validation.d.ts +94 -0
  311. package/dist/persona/validation.js +332 -0
  312. package/dist/prompts/index.d.ts +5 -0
  313. package/dist/prompts/index.js +5 -0
  314. package/dist/prompts/templates.d.ts +180 -0
  315. package/dist/prompts/templates.js +431 -0
  316. package/dist/registry/client.d.ts +49 -0
  317. package/dist/registry/client.js +191 -0
  318. package/dist/registry/index.d.ts +7 -0
  319. package/dist/registry/index.js +6 -0
  320. package/dist/registry/types.d.ts +140 -0
  321. package/dist/registry/types.js +6 -0
  322. package/dist/scenarios/evaluator.d.ts +43 -0
  323. package/dist/scenarios/evaluator.js +206 -0
  324. package/dist/scenarios/index.d.ts +10 -0
  325. package/dist/scenarios/index.js +9 -0
  326. package/dist/scenarios/loader.d.ts +20 -0
  327. package/dist/scenarios/loader.js +285 -0
  328. package/dist/scenarios/types.d.ts +153 -0
  329. package/dist/scenarios/types.js +8 -0
  330. package/dist/security/index.d.ts +17 -0
  331. package/dist/security/index.js +18 -0
  332. package/dist/security/payloads.d.ts +61 -0
  333. package/dist/security/payloads.js +268 -0
  334. package/dist/security/security-tester.d.ts +42 -0
  335. package/dist/security/security-tester.js +582 -0
  336. package/dist/security/types.d.ts +166 -0
  337. package/dist/security/types.js +8 -0
  338. package/dist/transport/base-transport.d.ts +59 -0
  339. package/dist/transport/base-transport.js +38 -0
  340. package/dist/transport/http-transport.d.ts +67 -0
  341. package/dist/transport/http-transport.js +238 -0
  342. package/dist/transport/mcp-client.d.ts +141 -0
  343. package/dist/transport/mcp-client.js +496 -0
  344. package/dist/transport/sse-transport.d.ts +88 -0
  345. package/dist/transport/sse-transport.js +316 -0
  346. package/dist/transport/stdio-transport.d.ts +43 -0
  347. package/dist/transport/stdio-transport.js +238 -0
  348. package/dist/transport/types.d.ts +125 -0
  349. package/dist/transport/types.js +16 -0
  350. package/dist/utils/concurrency.d.ts +123 -0
  351. package/dist/utils/concurrency.js +213 -0
  352. package/dist/utils/formatters.d.ts +16 -0
  353. package/dist/utils/formatters.js +37 -0
  354. package/dist/utils/index.d.ts +8 -0
  355. package/dist/utils/index.js +8 -0
  356. package/dist/utils/jsonpath.d.ts +87 -0
  357. package/dist/utils/jsonpath.js +326 -0
  358. package/dist/utils/markdown.d.ts +113 -0
  359. package/dist/utils/markdown.js +265 -0
  360. package/dist/utils/network.d.ts +14 -0
  361. package/dist/utils/network.js +17 -0
  362. package/dist/utils/sanitize.d.ts +92 -0
  363. package/dist/utils/sanitize.js +191 -0
  364. package/dist/utils/semantic.d.ts +194 -0
  365. package/dist/utils/semantic.js +1051 -0
  366. package/dist/utils/smart-truncate.d.ts +94 -0
  367. package/dist/utils/smart-truncate.js +361 -0
  368. package/dist/utils/timeout.d.ts +153 -0
  369. package/dist/utils/timeout.js +205 -0
  370. package/dist/utils/yaml-parser.d.ts +58 -0
  371. package/dist/utils/yaml-parser.js +86 -0
  372. package/dist/validation/index.d.ts +32 -0
  373. package/dist/validation/index.js +32 -0
  374. package/dist/validation/semantic-test-generator.d.ts +50 -0
  375. package/dist/validation/semantic-test-generator.js +176 -0
  376. package/dist/validation/semantic-types.d.ts +66 -0
  377. package/dist/validation/semantic-types.js +94 -0
  378. package/dist/validation/semantic-validator.d.ts +38 -0
  379. package/dist/validation/semantic-validator.js +340 -0
  380. package/dist/verification/index.d.ts +6 -0
  381. package/dist/verification/index.js +5 -0
  382. package/dist/verification/types.d.ts +133 -0
  383. package/dist/verification/types.js +5 -0
  384. package/dist/verification/verifier.d.ts +30 -0
  385. package/dist/verification/verifier.js +309 -0
  386. package/dist/version.d.ts +19 -0
  387. package/dist/version.js +48 -0
  388. package/dist/workflow/auto-generator.d.ts +27 -0
  389. package/dist/workflow/auto-generator.js +513 -0
  390. package/dist/workflow/discovery.d.ts +40 -0
  391. package/dist/workflow/discovery.js +195 -0
  392. package/dist/workflow/executor.d.ts +82 -0
  393. package/dist/workflow/executor.js +611 -0
  394. package/dist/workflow/index.d.ts +10 -0
  395. package/dist/workflow/index.js +10 -0
  396. package/dist/workflow/loader.d.ts +24 -0
  397. package/dist/workflow/loader.js +194 -0
  398. package/dist/workflow/state-tracker.d.ts +98 -0
  399. package/dist/workflow/state-tracker.js +424 -0
  400. package/dist/workflow/types.d.ts +337 -0
  401. package/dist/workflow/types.js +5 -0
  402. package/package.json +94 -0
  403. package/schemas/bellwether-check.schema.json +651 -0
@@ -0,0 +1,903 @@
1
+ import { Command } from 'commander';
2
+ import { writeFileSync, existsSync, mkdirSync } from 'fs';
3
+ import { join } from 'path';
4
+ import { MCPClient } from '../../transport/mcp-client.js';
5
+ import { discover } from '../../discovery/discovery.js';
6
+ import { createLLMClient, PREMIUM_MODELS } from '../../llm/index.js';
7
+ import { Interviewer } from '../../interview/interviewer.js';
8
+ import { generateAgentsMd, generateJsonReport } from '../../docs/generator.js';
9
+ import { loadConfig } from '../../config/loader.js';
10
+ import { createBaseline, saveBaseline, loadBaseline, compareBaselines, formatDiffText, } from '../../baseline/index.js';
11
+ import { createCloudBaseline } from '../../baseline/converter.js';
12
+ import { CostTracker, estimateInterviewCost, formatCostEstimate, estimateInterviewTime, formatCostAndTimeEstimate, suggestOptimizations, formatOptimizationSuggestions, } from '../../cost/index.js';
13
+ import { COST_THRESHOLDS } from '../../constants.js';
14
+ import { getMetricsCollector, resetMetricsCollector } from '../../metrics/collector.js';
15
+ import { FallbackLLMClient } from '../../llm/fallback.js';
16
+ import { withTokenBudget } from '../../llm/token-budget.js';
17
+ import { getGlobalCache, resetGlobalCache } from '../../cache/response-cache.js';
18
+ import { INTERVIEW } from '../../constants.js';
19
+ import { promptForConfig, displayConfigSummary, } from '../interactive.js';
20
+ import { InterviewProgressBar, formatStartupBanner, } from '../utils/progress.js';
21
+ import { DEFAULT_PERSONA, securityTesterPersona, qaEngineerPersona, noviceUserPersona, } from '../../persona/builtins.js';
22
+ import { loadScenariosFromFile, tryLoadDefaultScenarios, generateSampleScenariosYaml, DEFAULT_SCENARIOS_FILE, } from '../../scenarios/index.js';
23
+ import { loadWorkflowsFromFile, tryLoadDefaultWorkflows, generateSampleWorkflowYaml, DEFAULT_WORKFLOWS_FILE, } from '../../workflow/loader.js';
24
+ import { WORKFLOW } from '../../constants.js';
25
+ import * as output from '../output.js';
26
+ import { StreamingDisplay } from '../output.js';
27
+ import { suppressLogs, restoreLogLevel } from '../../logging/logger.js';
28
+ /**
29
+ * Map of persona names to persona objects.
30
+ */
31
+ const PERSONA_MAP = {
32
+ technical: DEFAULT_PERSONA,
33
+ security: securityTesterPersona,
34
+ qa: qaEngineerPersona,
35
+ novice: noviceUserPersona,
36
+ };
37
+ const PRESETS = {
38
+ docs: {
39
+ personas: [DEFAULT_PERSONA],
40
+ maxQuestions: 3,
41
+ description: 'Documentation-focused: Technical Writer persona, 3 questions/tool (~$0.02, ~2 min)',
42
+ },
43
+ security: {
44
+ personas: [DEFAULT_PERSONA, securityTesterPersona],
45
+ maxQuestions: 3,
46
+ description: 'Security audit: Technical + Security personas, 3 questions/tool (~$0.05, ~3 min)',
47
+ },
48
+ thorough: {
49
+ personas: [DEFAULT_PERSONA, securityTesterPersona, qaEngineerPersona, noviceUserPersona],
50
+ maxQuestions: 5,
51
+ description: 'Comprehensive: All 4 personas, 5 questions/tool (~$0.12, ~8 min)',
52
+ },
53
+ ci: {
54
+ personas: [DEFAULT_PERSONA],
55
+ maxQuestions: 1,
56
+ description: 'Structural-only: Zero LLM, free, deterministic (for CI/CD)',
57
+ },
58
+ };
59
+ /**
60
+ * Detect if running in a CI environment.
61
+ */
62
+ function isCI() {
63
+ return !!(process.env.CI ||
64
+ process.env.CONTINUOUS_INTEGRATION ||
65
+ process.env.GITHUB_ACTIONS ||
66
+ process.env.GITLAB_CI ||
67
+ process.env.CIRCLECI ||
68
+ process.env.JENKINS_URL ||
69
+ process.env.TRAVIS ||
70
+ process.env.BUILDKITE);
71
+ }
72
+ /**
73
+ * Check if running in an interactive terminal.
74
+ */
75
+ function isInteractiveTTY() {
76
+ return process.stdout.isTTY && process.stdin.isTTY && !isCI();
77
+ }
78
+ /**
79
+ * Parse persona list from CLI option.
80
+ */
81
+ function parsePersonas(personaList) {
82
+ if (personaList === 'all') {
83
+ return Object.values(PERSONA_MAP);
84
+ }
85
+ const names = personaList.split(',').map((s) => s.trim().toLowerCase());
86
+ const personas = [];
87
+ for (const name of names) {
88
+ const persona = PERSONA_MAP[name];
89
+ if (persona) {
90
+ personas.push(persona);
91
+ }
92
+ else {
93
+ output.warn(`Unknown persona: ${name}. Available: ${Object.keys(PERSONA_MAP).join(', ')}, all`);
94
+ }
95
+ }
96
+ return personas.length > 0 ? personas : [DEFAULT_PERSONA];
97
+ }
98
+ /**
99
+ * Extract server context from command and arguments.
100
+ * Looks for common patterns like directory paths that indicate server constraints.
101
+ */
102
+ function extractServerContextFromArgs(command, args) {
103
+ const context = {
104
+ allowedDirectories: [],
105
+ constraints: [],
106
+ hints: [],
107
+ };
108
+ // Check if this is a known server type
109
+ const fullCommand = `${command} ${args.join(' ')}`.toLowerCase();
110
+ // Extract any arguments that look like absolute paths
111
+ const pathArgs = args.filter(arg => arg.startsWith('/') && !arg.startsWith('--'));
112
+ // Filesystem servers - look for directory arguments
113
+ if (fullCommand.includes('filesystem') || fullCommand.includes('file-system')) {
114
+ context.allowedDirectories = pathArgs;
115
+ if (context.allowedDirectories.length > 0) {
116
+ context.hints.push(`Filesystem server with allowed directories: ${context.allowedDirectories.join(', ')}`);
117
+ }
118
+ context.constraints.push('Operations limited to specified directories');
119
+ }
120
+ // Database servers - might have connection strings
121
+ else if (fullCommand.includes('postgres') || fullCommand.includes('mysql') || fullCommand.includes('sqlite')) {
122
+ context.hints.push('Database server - SQL operations expected');
123
+ context.constraints.push('Database operations only');
124
+ }
125
+ // Git servers
126
+ else if (fullCommand.includes('git')) {
127
+ context.allowedDirectories = pathArgs;
128
+ context.hints.push('Git server - repository operations expected');
129
+ }
130
+ // Generic case - any path arguments are potential allowed directories
131
+ else {
132
+ context.allowedDirectories = pathArgs;
133
+ }
134
+ return context;
135
+ }
136
+ export const interviewCommand = new Command('interview')
137
+ .description('Test an MCP server: structural drift detection (--structural) or full LLM interview')
138
+ .argument('[command]', 'Command to start the MCP server')
139
+ .argument('[args...]', 'Arguments to pass to the server')
140
+ .option('-o, --output <dir>', 'Output directory', '.')
141
+ .option('-c, --config <path>', 'Path to config file')
142
+ .option('--model <model>', 'LLM model to use')
143
+ .option('--max-questions <n>', 'Max questions per tool')
144
+ .option('--timeout <ms>', 'Timeout for tool calls in milliseconds', String(INTERVIEW.CLI_TIMEOUT))
145
+ .option('--json', 'Also output JSON report')
146
+ .option('--verbose', 'Verbose output')
147
+ .option('--debug', 'Debug MCP protocol')
148
+ .option('--save-baseline [path]', 'Save baseline for drift detection (default: bellwether-baseline.json)')
149
+ .option('--compare-baseline <path>', 'Compare against existing baseline')
150
+ .option('--fail-on-drift', 'Exit with error if behavioral drift detected')
151
+ .option('--cloud-format', 'Save baseline in cloud-ready format')
152
+ .option('--estimate-cost', 'Estimate cost before running interview')
153
+ .option('--show-cost', 'Show cost summary after interview')
154
+ .option('-i, --interactive', 'Run in interactive mode with prompts')
155
+ .option('-q, --quick', 'Quick mode for CI: 1 question per tool')
156
+ .option('-Q, --quality', 'Use premium LLM models for higher quality output')
157
+ .option('-p, --preset <name>', 'Use a preset configuration: docs, security, thorough, ci')
158
+ .option('--personas <list>', 'Comma-separated persona list: technical,security,qa,novice,all', 'technical')
159
+ .option('--security', 'Include security testing persona (shorthand for --personas technical,security)')
160
+ .option('--transport <type>', 'Transport type: stdio, sse, streamable-http', 'stdio')
161
+ .option('--url <url>', 'URL for remote MCP server (requires --transport sse or streamable-http)')
162
+ .option('--session-id <id>', 'Session ID for remote server authentication')
163
+ .option('--scenarios <path>', 'Path to custom test scenarios YAML file')
164
+ .option('--scenarios-only', 'Only run custom scenarios (skip LLM-generated questions)')
165
+ .option('--init-scenarios', 'Generate a sample bellwether-tests.yaml file and exit')
166
+ .option('--stream', 'Enable streaming output to show LLM responses in real-time')
167
+ .option('--quiet', 'Suppress streaming output (use with --stream to only log final results)')
168
+ .option('--parallel-personas', 'Run persona interviews in parallel for faster execution')
169
+ .option('--persona-concurrency <n>', `Max concurrent persona interviews (default: ${INTERVIEW.DEFAULT_PERSONA_CONCURRENCY}, requires --parallel-personas)`, String(INTERVIEW.DEFAULT_PERSONA_CONCURRENCY))
170
+ .option('--show-metrics', 'Show detailed metrics after interview (token usage, timing, costs)')
171
+ .option('--fallback', 'Enable automatic Ollama fallback if primary LLM provider fails')
172
+ .option('--max-tokens <n>', 'Maximum total tokens to use (prevents runaway costs)')
173
+ .option('--cache', 'Enable response caching to avoid redundant tool calls and LLM analysis (default: enabled)')
174
+ .option('--no-cache', 'Disable response caching')
175
+ .option('--resource-timeout <ms>', `Timeout for resource reads in milliseconds (default: ${INTERVIEW.RESOURCE_TIMEOUT})`, String(INTERVIEW.RESOURCE_TIMEOUT))
176
+ .option('--workflows <path>', 'Path to workflow definitions YAML file')
177
+ .option('--discover-workflows', 'Enable LLM-based workflow discovery')
178
+ .option('--max-workflows <n>', `Maximum workflows to discover (default: ${WORKFLOW.MAX_DISCOVERED_WORKFLOWS})`, String(WORKFLOW.MAX_DISCOVERED_WORKFLOWS))
179
+ .option('--init-workflows', 'Generate a sample bellwether-workflows.yaml file and exit')
180
+ .option('--workflow-state-tracking', 'Enable state tracking during workflow execution')
181
+ .option('--ci, --structural', 'Structural-only mode: zero LLM, free, deterministic drift detection (equivalent to --preset ci --yes --quiet)')
182
+ .option('-y, --yes', 'Skip confirmation prompts')
183
+ .action(async (command, args, options) => {
184
+ // Handle --init-scenarios: generate sample file and exit
185
+ if (options.initScenarios) {
186
+ const outputPath = options.scenarios ?? DEFAULT_SCENARIOS_FILE;
187
+ const content = generateSampleScenariosYaml();
188
+ writeFileSync(outputPath, content);
189
+ output.info(`Generated sample scenarios file: ${outputPath}`);
190
+ output.info('\nEdit this file to add custom test scenarios for your MCP server.');
191
+ output.info('Then run: bellwether interview <command> --scenarios ' + outputPath);
192
+ return;
193
+ }
194
+ // Handle --init-workflows: generate sample file and exit
195
+ if (options.initWorkflows) {
196
+ const outputPath = options.workflows ?? 'bellwether-workflows.yaml';
197
+ const content = generateSampleWorkflowYaml();
198
+ writeFileSync(outputPath, content);
199
+ output.info(`Generated sample workflows file: ${outputPath}`);
200
+ output.info('\nEdit this file to define custom workflow tests for your MCP server.');
201
+ output.info('Then run: bellwether interview <command> --workflows ' + outputPath);
202
+ return;
203
+ }
204
+ // Handle --ci / --structural composite flag: apply ci preset, yes, and quiet options
205
+ const isCiMode = options.ci === true || options.structural === true;
206
+ if (isCiMode) {
207
+ options.preset = 'ci';
208
+ options.yes = true;
209
+ options.quiet = true;
210
+ }
211
+ // Load configuration
212
+ const config = loadConfig(options.config);
213
+ // Handle interactive mode
214
+ let interactiveConfig;
215
+ if (options.interactive || !command) {
216
+ // If no command provided, enter interactive mode
217
+ if (!command && !options.interactive) {
218
+ output.info('No server command provided. Entering interactive mode...\n');
219
+ }
220
+ interactiveConfig = await promptForConfig(config, command, args);
221
+ displayConfigSummary(interactiveConfig);
222
+ // Update command and args from interactive config
223
+ command = interactiveConfig.serverCommand;
224
+ args = interactiveConfig.serverArgs;
225
+ }
226
+ // Ensure we have a command at this point
227
+ if (!command) {
228
+ output.error('Error: Server command is required.');
229
+ output.error('Usage: bellwether interview <command> [args...] or bellwether interview --interactive');
230
+ process.exit(1);
231
+ }
232
+ // Determine model: --quality uses premium models, otherwise defaults (now budget-friendly)
233
+ const isQualityMode = options.quality;
234
+ const model = options.model
235
+ ?? (isQualityMode ? PREMIUM_MODELS[config.llm.provider] : undefined)
236
+ ?? config.llm.model;
237
+ // Handle preset configurations
238
+ let presetConfig;
239
+ if (options.preset) {
240
+ presetConfig = PRESETS[options.preset.toLowerCase()];
241
+ if (!presetConfig) {
242
+ output.error(`Unknown preset: ${options.preset}`);
243
+ output.error(`Available presets: ${Object.keys(PRESETS).join(', ')}`);
244
+ output.error('\nPreset descriptions:');
245
+ for (const [name, cfg] of Object.entries(PRESETS)) {
246
+ output.error(` ${name}: ${cfg.description}`);
247
+ }
248
+ process.exit(1);
249
+ }
250
+ output.info(`Using preset: ${options.preset} (${presetConfig.description})\n`);
251
+ }
252
+ // Quick mode: 1 question per tool for fast CI runs
253
+ // Preset overrides quick mode if specified
254
+ const maxQuestions = presetConfig?.maxQuestions
255
+ ?? (options.quick
256
+ ? 1
257
+ : (interactiveConfig?.maxQuestions
258
+ ?? (options.maxQuestions ? parseInt(options.maxQuestions, 10) : config.interview.maxQuestionsPerTool)));
259
+ const timeout = options.timeout
260
+ ? parseInt(options.timeout, 10)
261
+ : config.interview.timeout;
262
+ const outputDir = interactiveConfig?.outputDir ?? options.output ?? config.output.outputDir ?? '.';
263
+ // Determine personas: preset > --security > --personas > config.yaml > default
264
+ let selectedPersonas;
265
+ if (presetConfig) {
266
+ selectedPersonas = presetConfig.personas;
267
+ }
268
+ else if (options.security) {
269
+ selectedPersonas = parsePersonas('technical,security');
270
+ }
271
+ else if (options.personas) {
272
+ selectedPersonas = parsePersonas(options.personas);
273
+ }
274
+ else if (config.interview.personas) {
275
+ // Support both string ("technical,security") and array (["technical", "security"]) formats
276
+ const personaList = Array.isArray(config.interview.personas)
277
+ ? config.interview.personas.join(',')
278
+ : config.interview.personas;
279
+ selectedPersonas = parsePersonas(personaList);
280
+ }
281
+ else {
282
+ selectedPersonas = parsePersonas('technical');
283
+ }
284
+ // Determine output format
285
+ const wantsJson = interactiveConfig
286
+ ? (interactiveConfig.outputFormat === 'json' || interactiveConfig.outputFormat === 'both')
287
+ : (options.json || config.output.format === 'json' || config.output.format === 'both');
288
+ // Determine baseline options
289
+ const shouldSaveBaseline = interactiveConfig?.saveBaseline ?? !!options.saveBaseline;
290
+ const baselinePath = interactiveConfig?.baselinePath
291
+ ?? (typeof options.saveBaseline === 'string' ? options.saveBaseline : undefined);
292
+ const compareBaselinePath = interactiveConfig?.compareBaseline ?? options.compareBaseline;
293
+ // Display startup banner with all settings
294
+ const serverCommand = `${command} ${args.join(' ')}`;
295
+ const personaNames = selectedPersonas.map((p) => p.name);
296
+ const banner = formatStartupBanner({
297
+ serverCommand,
298
+ provider: config.llm.provider,
299
+ model,
300
+ isQuality: isQualityMode,
301
+ personas: personaNames,
302
+ questionsPerTool: maxQuestions,
303
+ });
304
+ output.info(banner);
305
+ output.newline();
306
+ // Validate transport options
307
+ const transportType = options.transport;
308
+ const isRemoteTransport = transportType === 'sse' || transportType === 'streamable-http';
309
+ if (isRemoteTransport && !options.url) {
310
+ output.error(`Error: --url is required when using --transport ${transportType}`);
311
+ process.exit(1);
312
+ }
313
+ if (options.url && !isRemoteTransport) {
314
+ output.error('Error: --url requires --transport sse or --transport streamable-http');
315
+ process.exit(1);
316
+ }
317
+ // Initialize cost tracker for real usage tracking
318
+ const costTracker = new CostTracker(model);
319
+ // Initialize metrics collector for comprehensive observability
320
+ resetMetricsCollector();
321
+ const metricsCollector = getMetricsCollector();
322
+ metricsCollector.startInterview();
323
+ // Initialize cache for tool responses and LLM analysis
324
+ // Cache is enabled by default unless --no-cache is specified
325
+ const cacheEnabled = options.cache !== false;
326
+ resetGlobalCache();
327
+ const cache = getGlobalCache({ enabled: cacheEnabled });
328
+ if (cacheEnabled) {
329
+ output.info('Response caching enabled');
330
+ }
331
+ // Initialize clients
332
+ const mcpClient = new MCPClient({
333
+ timeout,
334
+ debug: options.debug,
335
+ transport: transportType,
336
+ });
337
+ let llmClient;
338
+ // Create usage callback for cost and metrics tracking
339
+ const onUsageCallback = (inputTokens, outputTokens) => {
340
+ costTracker.addUsage(inputTokens, outputTokens);
341
+ // Also record in metrics collector for comprehensive tracking
342
+ metricsCollector.recordTokenUsage(config.llm.provider, model, inputTokens, outputTokens, 'llm_call');
343
+ };
344
+ try {
345
+ // Use the LLM factory to create the appropriate provider client
346
+ const baseLLMClient = createLLMClient({
347
+ provider: config.llm.provider,
348
+ model,
349
+ apiKey: config.llm.apiKey,
350
+ apiKeyEnvVar: config.llm.apiKeyEnvVar,
351
+ baseUrl: config.llm.baseUrl,
352
+ onUsage: onUsageCallback,
353
+ });
354
+ // Wrap with fallback client if enabled
355
+ if (options.fallback) {
356
+ output.info('Fallback mode enabled - will use Ollama if primary provider fails');
357
+ llmClient = new FallbackLLMClient({
358
+ providers: [
359
+ {
360
+ provider: config.llm.provider,
361
+ model,
362
+ apiKey: config.llm.apiKey,
363
+ apiKeyEnvVar: config.llm.apiKeyEnvVar,
364
+ baseUrl: config.llm.baseUrl,
365
+ },
366
+ ],
367
+ useOllamaFallback: true,
368
+ onUsage: onUsageCallback,
369
+ });
370
+ }
371
+ else {
372
+ llmClient = baseLLMClient;
373
+ }
374
+ // Wrap with token budget enforcement if max-tokens specified
375
+ if (options.maxTokens) {
376
+ const maxTokens = parseInt(options.maxTokens, 10);
377
+ if (isNaN(maxTokens) || maxTokens < 1000) {
378
+ output.error('Invalid --max-tokens value: must be a positive integer >= 1000');
379
+ process.exit(1);
380
+ }
381
+ output.info(`Token budget enabled: ${maxTokens.toLocaleString()} tokens max`);
382
+ llmClient = withTokenBudget(llmClient, {
383
+ maxTotalTokens: maxTokens,
384
+ onBudgetWarning: (used, total, pct) => {
385
+ output.warn(`Token budget warning: ${pct.toFixed(0)}% used (${used.toLocaleString()}/${total.toLocaleString()})`);
386
+ },
387
+ onBudgetExceeded: (used, total) => {
388
+ output.error(`Token budget exceeded: ${used.toLocaleString()}/${total.toLocaleString()} tokens`);
389
+ },
390
+ });
391
+ }
392
+ }
393
+ catch (error) {
394
+ output.error('Failed to initialize LLM client: ' + (error instanceof Error ? error.message : String(error)));
395
+ output.error(`\nProvider: ${config.llm.provider}`);
396
+ output.error('Make sure the appropriate API key environment variable is set:');
397
+ output.error(' - OpenAI: OPENAI_API_KEY');
398
+ output.error(' - Anthropic: ANTHROPIC_API_KEY');
399
+ output.error(' - Ollama: No API key needed (ensure Ollama is running)');
400
+ process.exit(1);
401
+ }
402
+ // Determine streaming early so we can suppress logs before MCP connection
403
+ const enableStreaming = options.stream && !options.quiet;
404
+ if (enableStreaming) {
405
+ // Suppress JSON logs during streaming to keep output clean
406
+ suppressLogs();
407
+ }
408
+ try {
409
+ // Connect to MCP server
410
+ if (isRemoteTransport) {
411
+ output.info(`Connecting to remote MCP server via ${transportType}...`);
412
+ await mcpClient.connectRemote(options.url, {
413
+ transport: transportType,
414
+ sessionId: options.sessionId,
415
+ });
416
+ }
417
+ else {
418
+ output.info('Connecting to MCP server...');
419
+ await mcpClient.connect(command, args);
420
+ }
421
+ // Discovery phase
422
+ output.info('Discovering capabilities...');
423
+ const discovery = await discover(mcpClient, command, args);
424
+ const resourceCount = discovery.resources?.length ?? 0;
425
+ const discoveryParts = [`${discovery.tools.length} tools`, `${discovery.prompts.length} prompts`];
426
+ if (resourceCount > 0) {
427
+ discoveryParts.push(`${resourceCount} resources`);
428
+ }
429
+ output.info(`Found ${discoveryParts.join(', ')}\n`);
430
+ // Update metrics with discovery counts
431
+ metricsCollector.updateInterviewCounters({
432
+ toolsDiscovered: discovery.tools.length,
433
+ personasUsed: selectedPersonas.length,
434
+ });
435
+ if (discovery.tools.length === 0) {
436
+ output.info('No tools found. Nothing to interview.');
437
+ metricsCollector.endInterview();
438
+ await mcpClient.disconnect();
439
+ return;
440
+ }
441
+ // Check if scenarios file exists for optimization suggestions
442
+ const defaultScenariosPath = join(outputDir, DEFAULT_SCENARIOS_FILE);
443
+ const hasScenariosFile = !!(options.scenarios || existsSync(defaultScenariosPath));
444
+ // Cost and time estimation (shown by default unless --quiet or --ci)
445
+ // In CI mode, cost is near-zero since we skip LLM calls
446
+ const costEstimate = isCiMode
447
+ ? {
448
+ usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 },
449
+ costUSD: 0,
450
+ model,
451
+ breakdown: { inputCost: 0, outputCost: 0 },
452
+ }
453
+ : estimateInterviewCost(model, discovery.tools.length, maxQuestions, selectedPersonas.length);
454
+ const timeEstimate = estimateInterviewTime(discovery.tools.length, maxQuestions, selectedPersonas.length, options.parallelPersonas, config.llm.provider, discovery.prompts.length, resourceCount, isCiMode // structuralOnly in CI mode
455
+ );
456
+ // Show cost/time estimate (unless quiet mode)
457
+ if (!options.quiet) {
458
+ output.info(formatCostAndTimeEstimate(costEstimate, timeEstimate));
459
+ // Show optimization suggestions if cost is significant
460
+ const optimizationContext = {
461
+ estimatedCost: costEstimate.costUSD,
462
+ toolCount: discovery.tools.length,
463
+ personaCount: selectedPersonas.length,
464
+ isParallelPersonas: !!options.parallelPersonas,
465
+ isPremiumModel: !!options.quality,
466
+ isUsingCiPreset: options.preset === 'ci' || isCiMode,
467
+ hasScenariosFile,
468
+ };
469
+ const suggestions = suggestOptimizations(optimizationContext);
470
+ if (suggestions.length > 0) {
471
+ output.newline();
472
+ output.info(formatOptimizationSuggestions(suggestions));
473
+ }
474
+ output.newline();
475
+ }
476
+ // Legacy --estimate-cost flag shows detailed breakdown
477
+ if (options.estimateCost) {
478
+ output.info(formatCostEstimate(costEstimate));
479
+ output.newline();
480
+ }
481
+ // Confirmation prompt for high-cost runs in interactive terminals
482
+ if (costEstimate.costUSD > COST_THRESHOLDS.CONFIRMATION_THRESHOLD &&
483
+ isInteractiveTTY() &&
484
+ !options.yes &&
485
+ !isCiMode) {
486
+ const readline = await import('readline');
487
+ const rl = readline.createInterface({
488
+ input: process.stdin,
489
+ output: process.stdout,
490
+ });
491
+ const answer = await new Promise((resolve) => {
492
+ rl.question(`Estimated cost is ~$${costEstimate.costUSD.toFixed(2)}. Continue? [Y/n] `, resolve);
493
+ });
494
+ rl.close();
495
+ const normalizedAnswer = answer.trim().toLowerCase();
496
+ if (normalizedAnswer === 'n' || normalizedAnswer === 'no') {
497
+ output.info('Interview cancelled.');
498
+ await mcpClient.disconnect();
499
+ return;
500
+ }
501
+ }
502
+ // Load custom scenarios if provided
503
+ // In --ci mode, skip auto-loading scenarios for fast runs (use --scenarios to include them)
504
+ let customScenarios;
505
+ if (options.scenarios) {
506
+ try {
507
+ customScenarios = loadScenariosFromFile(options.scenarios);
508
+ output.info(`Loaded ${customScenarios.toolScenarios.length} tool scenarios, ${customScenarios.promptScenarios.length} prompt scenarios from ${options.scenarios}`);
509
+ }
510
+ catch (error) {
511
+ output.error(`Failed to load scenarios: ${error instanceof Error ? error.message : error}`);
512
+ process.exit(1);
513
+ }
514
+ }
515
+ else if (!isCiMode) {
516
+ // Try loading default scenarios file from output directory (skip in CI mode for speed)
517
+ const defaultScenarios = tryLoadDefaultScenarios(outputDir);
518
+ if (defaultScenarios) {
519
+ customScenarios = defaultScenarios;
520
+ output.info(`Auto-loaded ${customScenarios.toolScenarios.length} tool scenarios from ${DEFAULT_SCENARIOS_FILE}`);
521
+ }
522
+ }
523
+ // Build workflow configuration
524
+ let workflowConfig;
525
+ if (options.workflows || options.discoverWorkflows) {
526
+ workflowConfig = {
527
+ discoverWorkflows: options.discoverWorkflows,
528
+ maxDiscoveredWorkflows: options.maxWorkflows
529
+ ? parseInt(options.maxWorkflows, 10)
530
+ : WORKFLOW.MAX_DISCOVERED_WORKFLOWS,
531
+ enableStateTracking: options.workflowStateTracking,
532
+ };
533
+ // Load workflows from file if provided
534
+ if (options.workflows) {
535
+ try {
536
+ const workflows = loadWorkflowsFromFile(options.workflows);
537
+ workflowConfig.workflows = workflows;
538
+ workflowConfig.workflowsFile = options.workflows;
539
+ output.info(`Loaded ${workflows.length} workflow(s) from ${options.workflows}`);
540
+ }
541
+ catch (error) {
542
+ output.error(`Failed to load workflows: ${error instanceof Error ? error.message : error}`);
543
+ process.exit(1);
544
+ }
545
+ }
546
+ if (options.discoverWorkflows) {
547
+ output.info('Workflow discovery enabled - will analyze tools for workflow patterns');
548
+ }
549
+ }
550
+ else if (!isCiMode) {
551
+ // Try auto-loading default workflows file from output directory (skip in CI mode for speed)
552
+ // Similar to how scenarios are auto-loaded from bellwether-tests.yaml
553
+ const defaultWorkflows = tryLoadDefaultWorkflows(outputDir);
554
+ if (defaultWorkflows && defaultWorkflows.length > 0) {
555
+ workflowConfig = {
556
+ discoverWorkflows: false,
557
+ maxDiscoveredWorkflows: WORKFLOW.MAX_DISCOVERED_WORKFLOWS,
558
+ enableStateTracking: options.workflowStateTracking,
559
+ workflows: defaultWorkflows,
560
+ workflowsFile: `${outputDir}/${DEFAULT_WORKFLOWS_FILE}`,
561
+ };
562
+ output.info(`Auto-loaded ${defaultWorkflows.length} workflow(s) from ${DEFAULT_WORKFLOWS_FILE}`);
563
+ }
564
+ }
565
+ // Set up streaming display if enabled
566
+ let streamingDisplay = null;
567
+ let streamingCallbacks;
568
+ if (enableStreaming) {
569
+ streamingDisplay = new StreamingDisplay({
570
+ style: 'dim',
571
+ maxWidth: 100,
572
+ });
573
+ streamingCallbacks = {
574
+ onStart: (operation, _context) => {
575
+ // Parse operation to get a human-readable description
576
+ const parts = operation.split(':');
577
+ const opType = parts[0];
578
+ const context = parts[1];
579
+ let prefix = '';
580
+ switch (opType) {
581
+ case 'generate-questions':
582
+ prefix = context ? `\n Generating questions for ${context}... ` : '\n Generating questions... ';
583
+ break;
584
+ case 'analyze':
585
+ prefix = context ? `\n Analyzing ${context}... ` : '\n Analyzing... ';
586
+ break;
587
+ case 'synthesize-tool':
588
+ prefix = context ? `\n Synthesizing profile for ${context}... ` : '\n Synthesizing profile... ';
589
+ break;
590
+ case 'synthesize-overall':
591
+ prefix = '\n Synthesizing overall findings... ';
592
+ break;
593
+ case 'generate-prompt-questions':
594
+ case 'analyze-prompt':
595
+ case 'synthesize-prompt':
596
+ prefix = context ? `\n Processing prompt ${context}... ` : '\n Processing prompt... ';
597
+ break;
598
+ case 'generate-resource-questions':
599
+ case 'analyze-resource':
600
+ case 'synthesize-resource':
601
+ prefix = context ? `\n Processing resource ${context}... ` : '\n Processing resource... ';
602
+ break;
603
+ default:
604
+ prefix = '\n Processing... ';
605
+ }
606
+ streamingDisplay?.start(prefix);
607
+ },
608
+ onChunk: (chunk, _operation) => {
609
+ streamingDisplay?.write(chunk);
610
+ },
611
+ onComplete: (_text, _operation) => {
612
+ streamingDisplay?.finish(' [done]');
613
+ },
614
+ onError: (error, _operation) => {
615
+ streamingDisplay?.abort(`[error: ${error.message}]`);
616
+ },
617
+ };
618
+ output.info('Streaming mode enabled - showing LLM output in real-time\n');
619
+ }
620
+ // Parse and validate persona concurrency
621
+ let personaConcurrency;
622
+ if (options.personaConcurrency) {
623
+ personaConcurrency = parseInt(options.personaConcurrency, 10);
624
+ if (isNaN(personaConcurrency) || personaConcurrency < 1) {
625
+ output.error('Invalid --persona-concurrency value: must be a positive integer');
626
+ process.exit(1);
627
+ }
628
+ if (personaConcurrency > INTERVIEW.MAX_PERSONA_CONCURRENCY) {
629
+ output.warn(`High persona concurrency (${personaConcurrency}) may cause rate limiting or memory issues`);
630
+ }
631
+ }
632
+ // Parse resource timeout option
633
+ const resourceTimeout = options.resourceTimeout
634
+ ? parseInt(options.resourceTimeout, 10)
635
+ : undefined;
636
+ // Interview phase
637
+ const interviewer = new Interviewer(llmClient, {
638
+ maxQuestionsPerTool: maxQuestions,
639
+ timeout,
640
+ skipErrorTests: config.interview.skipErrorTests ?? false,
641
+ model,
642
+ personas: selectedPersonas,
643
+ customScenarios,
644
+ customScenariosOnly: options.scenariosOnly,
645
+ enableStreaming,
646
+ streamingCallbacks,
647
+ parallelPersonas: options.parallelPersonas,
648
+ personaConcurrency,
649
+ cache,
650
+ resourceTimeout,
651
+ workflowConfig,
652
+ structuralOnly: isCiMode, // Fast CI mode: skip LLM calls for speed
653
+ });
654
+ // Extract server context from command line arguments
655
+ const serverContext = extractServerContextFromArgs(command, args);
656
+ if (serverContext.allowedDirectories && serverContext.allowedDirectories.length > 0) {
657
+ output.info(`Detected allowed directories: ${serverContext.allowedDirectories.join(', ')}`);
658
+ }
659
+ interviewer.setServerContext(serverContext);
660
+ // Set up progress display - disable progress bar when streaming to avoid display conflicts
661
+ const progressBar = new InterviewProgressBar({ enabled: !options.verbose && !enableStreaming });
662
+ const progressCallback = (progress) => {
663
+ if (options.verbose) {
664
+ switch (progress.phase) {
665
+ case 'starting':
666
+ output.info('Starting interview...');
667
+ progressBar.start(progress.totalTools, progress.totalPersonas, progress.totalPrompts ?? 0, progress.totalResources ?? 0);
668
+ break;
669
+ case 'interviewing':
670
+ output.info(`[${progress.currentPersona}] Interviewing: ${progress.currentTool} (${progress.toolsCompleted + 1}/${progress.totalTools})`);
671
+ break;
672
+ case 'prompts':
673
+ output.info(`Interviewing prompt: ${progress.currentTool?.replace('prompt:', '') ?? '...'} (${(progress.promptsCompleted ?? 0) + 1}/${progress.totalPrompts ?? 0})`);
674
+ break;
675
+ case 'resources':
676
+ output.info(`Interviewing resource: ${progress.currentTool?.replace('resource:', '') ?? '...'} (${(progress.resourcesCompleted ?? 0) + 1}/${progress.totalResources ?? 0})`);
677
+ break;
678
+ case 'workflows':
679
+ if (progress.currentWorkflow) {
680
+ output.info(`Executing workflow: ${progress.currentWorkflow} (${(progress.workflowsCompleted ?? 0) + 1}/${progress.totalWorkflows})`);
681
+ }
682
+ else {
683
+ output.info('Executing workflows...');
684
+ }
685
+ break;
686
+ case 'synthesizing':
687
+ output.info('Synthesizing findings...');
688
+ break;
689
+ case 'complete':
690
+ output.info('Interview complete!');
691
+ break;
692
+ }
693
+ }
694
+ else {
695
+ // Use progress bar for non-verbose mode
696
+ if (progress.phase === 'starting') {
697
+ progressBar.start(progress.totalTools, progress.totalPersonas, progress.totalPrompts ?? 0, progress.totalResources ?? 0);
698
+ }
699
+ else if (progress.phase === 'interviewing' || progress.phase === 'prompts' || progress.phase === 'resources' || progress.phase === 'workflows') {
700
+ progressBar.update(progress);
701
+ }
702
+ else if (progress.phase === 'complete' || progress.phase === 'synthesizing') {
703
+ progressBar.stop();
704
+ }
705
+ }
706
+ };
707
+ output.info('Starting interview...\n');
708
+ const result = await interviewer.interview(mcpClient, discovery, progressCallback);
709
+ // Ensure progress bar is stopped
710
+ progressBar.stop();
711
+ if (!options.verbose) {
712
+ output.newline();
713
+ }
714
+ // Generate documentation
715
+ output.info('Generating documentation...');
716
+ // Ensure output directory exists
717
+ mkdirSync(outputDir, { recursive: true });
718
+ const agentsMd = generateAgentsMd(result);
719
+ const agentsMdPath = join(outputDir, 'AGENTS.md');
720
+ writeFileSync(agentsMdPath, agentsMd);
721
+ output.info(`Written: ${agentsMdPath}`);
722
+ if (wantsJson) {
723
+ const jsonReport = generateJsonReport(result);
724
+ const jsonPath = join(outputDir, 'bellwether-report.json');
725
+ writeFileSync(jsonPath, jsonReport);
726
+ output.info(`Written: ${jsonPath}`);
727
+ }
728
+ // End metrics tracking
729
+ const interviewMetrics = metricsCollector.endInterview();
730
+ output.info('\nInterview complete!');
731
+ output.info(`Duration: ${(result.metadata.durationMs / 1000).toFixed(1)}s`);
732
+ output.info(`Tool calls: ${result.metadata.toolCallCount} (${result.metadata.errorCount} errors)`);
733
+ // Display scenario results summary if scenarios were run
734
+ if (result.scenarioResults && result.scenarioResults.length > 0) {
735
+ const passed = result.scenarioResults.filter(r => r.passed).length;
736
+ const failed = result.scenarioResults.length - passed;
737
+ const statusIcon = failed === 0 ? '\u2713' : '\u2717';
738
+ output.info(`\nCustom scenarios: ${passed}/${result.scenarioResults.length} passed ${statusIcon}`);
739
+ // Show failed scenarios
740
+ if (failed > 0) {
741
+ output.info('\nFailed scenarios:');
742
+ for (const scenarioResult of result.scenarioResults.filter(r => !r.passed)) {
743
+ const scenario = scenarioResult.scenario;
744
+ const toolOrPrompt = 'tool' in scenario ? scenario.tool : scenario.prompt;
745
+ output.info(` - ${toolOrPrompt}: ${scenario.description}`);
746
+ if (scenarioResult.error) {
747
+ output.info(` Error: ${scenarioResult.error}`);
748
+ }
749
+ for (const assertion of scenarioResult.assertionResults.filter(a => !a.passed)) {
750
+ output.info(` Assertion failed: ${assertion.error}`);
751
+ }
752
+ }
753
+ }
754
+ }
755
+ // Display workflow results summary if workflows were executed
756
+ if (result.workflowResults && result.workflowResults.length > 0) {
757
+ const successful = result.workflowResults.filter(wr => wr.success).length;
758
+ const failed = result.workflowResults.length - successful;
759
+ const statusIcon = failed === 0 ? '\u2713' : '\u2717';
760
+ output.info(`\nWorkflows: ${successful}/${result.workflowResults.length} passed ${statusIcon}`);
761
+ // Show failed workflows
762
+ if (failed > 0) {
763
+ output.info('\nFailed workflows:');
764
+ for (const wr of result.workflowResults.filter(w => !w.success)) {
765
+ output.info(` - ${wr.workflow.name}: ${wr.failureReason ?? 'Unknown error'}`);
766
+ if (wr.failedStepIndex !== undefined) {
767
+ const failedStep = wr.workflow.steps[wr.failedStepIndex];
768
+ output.info(` Failed at step ${wr.failedStepIndex + 1}: ${failedStep?.tool ?? 'unknown'}`);
769
+ }
770
+ }
771
+ }
772
+ // Show workflow metadata summary
773
+ if (result.metadata.workflows) {
774
+ const wfMeta = result.metadata.workflows;
775
+ if (wfMeta.discoveredCount > 0) {
776
+ output.info(` Discovered: ${wfMeta.discoveredCount} workflow(s)`);
777
+ }
778
+ if (wfMeta.loadedCount > 0) {
779
+ output.info(` Loaded from file: ${wfMeta.loadedCount} workflow(s)`);
780
+ }
781
+ }
782
+ }
783
+ // Show cost summary if requested (uses real token counts from API responses)
784
+ if (options.showCost || options.estimateCost) {
785
+ output.info('\n' + costTracker.formatSummary());
786
+ }
787
+ // Show detailed metrics if requested
788
+ if (options.showMetrics && interviewMetrics) {
789
+ output.info('\n--- Interview Metrics ---');
790
+ output.info(`Tools discovered: ${interviewMetrics.toolsDiscovered}`);
791
+ output.info(`Personas used: ${interviewMetrics.personasUsed}`);
792
+ output.info(`LLM calls made: ${interviewMetrics.llmCallsMade}`);
793
+ output.info(`Total input tokens: ${interviewMetrics.totalInputTokens.toLocaleString()}`);
794
+ output.info(`Total output tokens: ${interviewMetrics.totalOutputTokens.toLocaleString()}`);
795
+ if (interviewMetrics.totalDurationMs) {
796
+ output.info(`Total duration: ${(interviewMetrics.totalDurationMs / 1000).toFixed(1)}s`);
797
+ }
798
+ if (interviewMetrics.totalCostUSD > 0) {
799
+ output.info(`Estimated cost: $${interviewMetrics.totalCostUSD.toFixed(4)}`);
800
+ }
801
+ }
802
+ // Show cache statistics if caching is enabled
803
+ if (cacheEnabled) {
804
+ const cacheStats = cache.getStats();
805
+ const totalCacheOps = cacheStats.hits + cacheStats.misses;
806
+ if (totalCacheOps > 0) {
807
+ output.info('\n--- Cache Statistics ---');
808
+ output.info(`Cache hits: ${cacheStats.hits}`);
809
+ output.info(`Cache misses: ${cacheStats.misses}`);
810
+ output.info(`Hit rate: ${cacheStats.hitRate.toFixed(1)}%`);
811
+ output.info(`Entries stored: ${cacheStats.entries}`);
812
+ if (cacheStats.hits > 0) {
813
+ output.info(`Estimated savings: ${cacheStats.hits} LLM/tool calls avoided`);
814
+ }
815
+ }
816
+ }
817
+ // Save baseline if requested
818
+ if (shouldSaveBaseline) {
819
+ const serverCommand = `${command} ${args.join(' ')}`;
820
+ const finalBaselinePath = baselinePath ?? join(outputDir, 'bellwether-baseline.json');
821
+ if (options.cloudFormat) {
822
+ // Save in cloud-ready format
823
+ const cloudBaseline = createCloudBaseline(result, serverCommand);
824
+ writeFileSync(finalBaselinePath, JSON.stringify(cloudBaseline, null, 2));
825
+ output.info(`\nCloud baseline saved: ${finalBaselinePath}`);
826
+ }
827
+ else {
828
+ // Save in local format
829
+ const baselineMode = isCiMode ? 'structural' : 'full';
830
+ const baseline = createBaseline(result, serverCommand, baselineMode);
831
+ saveBaseline(baseline, finalBaselinePath);
832
+ output.info(`\nBaseline saved: ${finalBaselinePath} (mode: ${baselineMode})`);
833
+ }
834
+ }
835
+ // Compare against baseline if requested
836
+ if (compareBaselinePath) {
837
+ if (!existsSync(compareBaselinePath)) {
838
+ output.error(`\nBaseline file not found: ${compareBaselinePath}`);
839
+ process.exit(1);
840
+ }
841
+ const serverCommand = `${command} ${args.join(' ')}`;
842
+ const previousBaseline = loadBaseline(compareBaselinePath);
843
+ const baselineMode = isCiMode ? 'structural' : 'full';
844
+ const currentBaseline = createBaseline(result, serverCommand, baselineMode);
845
+ // Note baseline mode mismatch
846
+ if (previousBaseline.mode && previousBaseline.mode !== baselineMode) {
847
+ output.info(`Note: Baseline mode changed (${previousBaseline.mode} -> ${baselineMode})`);
848
+ }
849
+ const diff = compareBaselines(previousBaseline, currentBaseline, {});
850
+ output.info('\n--- Drift Report ---');
851
+ output.info(formatDiffText(diff));
852
+ const shouldFailOnDrift = options.failOnDrift ?? config.drift?.failOnDrift ?? false;
853
+ if (shouldFailOnDrift) {
854
+ if (diff.severity === 'breaking') {
855
+ output.error('\nBreaking changes detected!');
856
+ process.exit(1);
857
+ }
858
+ else if (diff.severity === 'warning') {
859
+ output.warn('\nWarning-level changes detected.');
860
+ process.exit(1);
861
+ }
862
+ }
863
+ }
864
+ }
865
+ catch (error) {
866
+ const errorMessage = error instanceof Error ? error.message : String(error);
867
+ output.error('\n--- Interview Failed ---');
868
+ output.error(`Error: ${errorMessage}`);
869
+ // Provide helpful context for common errors
870
+ if (errorMessage.includes('ECONNREFUSED') || errorMessage.includes('Connection refused')) {
871
+ output.error('\nPossible causes:');
872
+ output.error(' - The MCP server is not running');
873
+ output.error(' - The server address/port is incorrect');
874
+ output.error(' - A firewall is blocking the connection');
875
+ }
876
+ else if (errorMessage.includes('timeout') || errorMessage.includes('Timeout')) {
877
+ output.error('\nPossible causes:');
878
+ output.error(' - The MCP server is taking too long to respond');
879
+ output.error(' - Try increasing --timeout value');
880
+ output.error(' - The server may be overloaded or stuck');
881
+ }
882
+ else if (errorMessage.includes('ENOENT') || errorMessage.includes('not found')) {
883
+ output.error('\nPossible causes:');
884
+ output.error(' - The server command was not found');
885
+ output.error(' - Check that the command is installed and in PATH');
886
+ output.error(' - Try using an absolute path to the server executable');
887
+ }
888
+ else if (errorMessage.includes('API') || errorMessage.includes('API_KEY')) {
889
+ output.error('\nPossible causes:');
890
+ output.error(' - Missing or invalid API key');
891
+ output.error(' - Set OPENAI_API_KEY or ANTHROPIC_API_KEY environment variable');
892
+ output.error(' - Or configure apiKeyEnvVar in bellwether.yaml');
893
+ }
894
+ process.exit(1);
895
+ }
896
+ finally {
897
+ // Restore log level if it was suppressed for streaming
898
+ // (restoreLogLevel is safe to call even if logs weren't suppressed)
899
+ restoreLogLevel();
900
+ await mcpClient.disconnect();
901
+ }
902
+ });
903
+ //# sourceMappingURL=interview.js.map