@dotsetlabs/bellwether 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (403) hide show
  1. package/CHANGELOG.md +291 -0
  2. package/LICENSE +21 -0
  3. package/README.md +739 -0
  4. package/dist/auth/credentials.d.ts +64 -0
  5. package/dist/auth/credentials.js +218 -0
  6. package/dist/auth/index.d.ts +6 -0
  7. package/dist/auth/index.js +6 -0
  8. package/dist/auth/keychain.d.ts +64 -0
  9. package/dist/auth/keychain.js +268 -0
  10. package/dist/baseline/ab-testing.d.ts +80 -0
  11. package/dist/baseline/ab-testing.js +236 -0
  12. package/dist/baseline/ai-compatibility-scorer.d.ts +95 -0
  13. package/dist/baseline/ai-compatibility-scorer.js +606 -0
  14. package/dist/baseline/calibration.d.ts +77 -0
  15. package/dist/baseline/calibration.js +136 -0
  16. package/dist/baseline/category-matching.d.ts +85 -0
  17. package/dist/baseline/category-matching.js +289 -0
  18. package/dist/baseline/change-impact-analyzer.d.ts +98 -0
  19. package/dist/baseline/change-impact-analyzer.js +592 -0
  20. package/dist/baseline/comparator.d.ts +64 -0
  21. package/dist/baseline/comparator.js +916 -0
  22. package/dist/baseline/confidence.d.ts +55 -0
  23. package/dist/baseline/confidence.js +122 -0
  24. package/dist/baseline/converter.d.ts +61 -0
  25. package/dist/baseline/converter.js +585 -0
  26. package/dist/baseline/dependency-analyzer.d.ts +89 -0
  27. package/dist/baseline/dependency-analyzer.js +567 -0
  28. package/dist/baseline/deprecation-tracker.d.ts +133 -0
  29. package/dist/baseline/deprecation-tracker.js +322 -0
  30. package/dist/baseline/diff.d.ts +55 -0
  31. package/dist/baseline/diff.js +1584 -0
  32. package/dist/baseline/documentation-scorer.d.ts +205 -0
  33. package/dist/baseline/documentation-scorer.js +466 -0
  34. package/dist/baseline/embeddings.d.ts +118 -0
  35. package/dist/baseline/embeddings.js +251 -0
  36. package/dist/baseline/error-analyzer.d.ts +198 -0
  37. package/dist/baseline/error-analyzer.js +721 -0
  38. package/dist/baseline/evaluation/evaluator.d.ts +42 -0
  39. package/dist/baseline/evaluation/evaluator.js +323 -0
  40. package/dist/baseline/evaluation/expanded-dataset.d.ts +45 -0
  41. package/dist/baseline/evaluation/expanded-dataset.js +1164 -0
  42. package/dist/baseline/evaluation/golden-dataset.d.ts +58 -0
  43. package/dist/baseline/evaluation/golden-dataset.js +717 -0
  44. package/dist/baseline/evaluation/index.d.ts +15 -0
  45. package/dist/baseline/evaluation/index.js +15 -0
  46. package/dist/baseline/evaluation/types.d.ts +186 -0
  47. package/dist/baseline/evaluation/types.js +8 -0
  48. package/dist/baseline/external-dependency-detector.d.ts +181 -0
  49. package/dist/baseline/external-dependency-detector.js +524 -0
  50. package/dist/baseline/golden-output.d.ts +162 -0
  51. package/dist/baseline/golden-output.js +636 -0
  52. package/dist/baseline/health-scorer.d.ts +174 -0
  53. package/dist/baseline/health-scorer.js +451 -0
  54. package/dist/baseline/incremental-checker.d.ts +97 -0
  55. package/dist/baseline/incremental-checker.js +174 -0
  56. package/dist/baseline/index.d.ts +31 -0
  57. package/dist/baseline/index.js +42 -0
  58. package/dist/baseline/migration-generator.d.ts +137 -0
  59. package/dist/baseline/migration-generator.js +554 -0
  60. package/dist/baseline/migrations.d.ts +60 -0
  61. package/dist/baseline/migrations.js +197 -0
  62. package/dist/baseline/performance-tracker.d.ts +214 -0
  63. package/dist/baseline/performance-tracker.js +577 -0
  64. package/dist/baseline/pr-comment-generator.d.ts +117 -0
  65. package/dist/baseline/pr-comment-generator.js +546 -0
  66. package/dist/baseline/response-fingerprint.d.ts +127 -0
  67. package/dist/baseline/response-fingerprint.js +728 -0
  68. package/dist/baseline/response-schema-tracker.d.ts +129 -0
  69. package/dist/baseline/response-schema-tracker.js +420 -0
  70. package/dist/baseline/risk-scorer.d.ts +54 -0
  71. package/dist/baseline/risk-scorer.js +434 -0
  72. package/dist/baseline/saver.d.ts +89 -0
  73. package/dist/baseline/saver.js +554 -0
  74. package/dist/baseline/scenario-generator.d.ts +151 -0
  75. package/dist/baseline/scenario-generator.js +905 -0
  76. package/dist/baseline/schema-compare.d.ts +86 -0
  77. package/dist/baseline/schema-compare.js +557 -0
  78. package/dist/baseline/schema-evolution.d.ts +189 -0
  79. package/dist/baseline/schema-evolution.js +467 -0
  80. package/dist/baseline/semantic.d.ts +203 -0
  81. package/dist/baseline/semantic.js +908 -0
  82. package/dist/baseline/synonyms.d.ts +60 -0
  83. package/dist/baseline/synonyms.js +386 -0
  84. package/dist/baseline/telemetry.d.ts +165 -0
  85. package/dist/baseline/telemetry.js +294 -0
  86. package/dist/baseline/test-pruner.d.ts +120 -0
  87. package/dist/baseline/test-pruner.js +387 -0
  88. package/dist/baseline/types.d.ts +449 -0
  89. package/dist/baseline/types.js +5 -0
  90. package/dist/baseline/version.d.ts +138 -0
  91. package/dist/baseline/version.js +206 -0
  92. package/dist/cache/index.d.ts +5 -0
  93. package/dist/cache/index.js +5 -0
  94. package/dist/cache/response-cache.d.ts +151 -0
  95. package/dist/cache/response-cache.js +287 -0
  96. package/dist/ci/index.d.ts +60 -0
  97. package/dist/ci/index.js +342 -0
  98. package/dist/cli/commands/auth.d.ts +12 -0
  99. package/dist/cli/commands/auth.js +352 -0
  100. package/dist/cli/commands/badge.d.ts +3 -0
  101. package/dist/cli/commands/badge.js +74 -0
  102. package/dist/cli/commands/baseline-accept.d.ts +15 -0
  103. package/dist/cli/commands/baseline-accept.js +178 -0
  104. package/dist/cli/commands/baseline-migrate.d.ts +12 -0
  105. package/dist/cli/commands/baseline-migrate.js +164 -0
  106. package/dist/cli/commands/baseline.d.ts +14 -0
  107. package/dist/cli/commands/baseline.js +449 -0
  108. package/dist/cli/commands/beta.d.ts +10 -0
  109. package/dist/cli/commands/beta.js +231 -0
  110. package/dist/cli/commands/check.d.ts +11 -0
  111. package/dist/cli/commands/check.js +820 -0
  112. package/dist/cli/commands/cloud/badge.d.ts +3 -0
  113. package/dist/cli/commands/cloud/badge.js +74 -0
  114. package/dist/cli/commands/cloud/diff.d.ts +6 -0
  115. package/dist/cli/commands/cloud/diff.js +79 -0
  116. package/dist/cli/commands/cloud/history.d.ts +6 -0
  117. package/dist/cli/commands/cloud/history.js +102 -0
  118. package/dist/cli/commands/cloud/link.d.ts +9 -0
  119. package/dist/cli/commands/cloud/link.js +119 -0
  120. package/dist/cli/commands/cloud/login.d.ts +7 -0
  121. package/dist/cli/commands/cloud/login.js +499 -0
  122. package/dist/cli/commands/cloud/projects.d.ts +6 -0
  123. package/dist/cli/commands/cloud/projects.js +44 -0
  124. package/dist/cli/commands/cloud/shared.d.ts +7 -0
  125. package/dist/cli/commands/cloud/shared.js +42 -0
  126. package/dist/cli/commands/cloud/teams.d.ts +8 -0
  127. package/dist/cli/commands/cloud/teams.js +169 -0
  128. package/dist/cli/commands/cloud/upload.d.ts +8 -0
  129. package/dist/cli/commands/cloud/upload.js +181 -0
  130. package/dist/cli/commands/contract.d.ts +11 -0
  131. package/dist/cli/commands/contract.js +280 -0
  132. package/dist/cli/commands/discover.d.ts +3 -0
  133. package/dist/cli/commands/discover.js +82 -0
  134. package/dist/cli/commands/eval.d.ts +9 -0
  135. package/dist/cli/commands/eval.js +187 -0
  136. package/dist/cli/commands/explore.d.ts +11 -0
  137. package/dist/cli/commands/explore.js +437 -0
  138. package/dist/cli/commands/feedback.d.ts +9 -0
  139. package/dist/cli/commands/feedback.js +174 -0
  140. package/dist/cli/commands/golden.d.ts +12 -0
  141. package/dist/cli/commands/golden.js +407 -0
  142. package/dist/cli/commands/history.d.ts +10 -0
  143. package/dist/cli/commands/history.js +202 -0
  144. package/dist/cli/commands/init.d.ts +9 -0
  145. package/dist/cli/commands/init.js +219 -0
  146. package/dist/cli/commands/interview.d.ts +3 -0
  147. package/dist/cli/commands/interview.js +903 -0
  148. package/dist/cli/commands/link.d.ts +10 -0
  149. package/dist/cli/commands/link.js +169 -0
  150. package/dist/cli/commands/login.d.ts +7 -0
  151. package/dist/cli/commands/login.js +499 -0
  152. package/dist/cli/commands/preset.d.ts +33 -0
  153. package/dist/cli/commands/preset.js +297 -0
  154. package/dist/cli/commands/profile.d.ts +33 -0
  155. package/dist/cli/commands/profile.js +286 -0
  156. package/dist/cli/commands/registry.d.ts +11 -0
  157. package/dist/cli/commands/registry.js +146 -0
  158. package/dist/cli/commands/shared.d.ts +79 -0
  159. package/dist/cli/commands/shared.js +196 -0
  160. package/dist/cli/commands/teams.d.ts +8 -0
  161. package/dist/cli/commands/teams.js +169 -0
  162. package/dist/cli/commands/test.d.ts +9 -0
  163. package/dist/cli/commands/test.js +500 -0
  164. package/dist/cli/commands/upload.d.ts +8 -0
  165. package/dist/cli/commands/upload.js +223 -0
  166. package/dist/cli/commands/validate-config.d.ts +6 -0
  167. package/dist/cli/commands/validate-config.js +35 -0
  168. package/dist/cli/commands/verify.d.ts +11 -0
  169. package/dist/cli/commands/verify.js +283 -0
  170. package/dist/cli/commands/watch.d.ts +12 -0
  171. package/dist/cli/commands/watch.js +253 -0
  172. package/dist/cli/index.d.ts +3 -0
  173. package/dist/cli/index.js +178 -0
  174. package/dist/cli/interactive.d.ts +47 -0
  175. package/dist/cli/interactive.js +216 -0
  176. package/dist/cli/output/terminal-reporter.d.ts +19 -0
  177. package/dist/cli/output/terminal-reporter.js +104 -0
  178. package/dist/cli/output.d.ts +226 -0
  179. package/dist/cli/output.js +438 -0
  180. package/dist/cli/utils/env.d.ts +5 -0
  181. package/dist/cli/utils/env.js +14 -0
  182. package/dist/cli/utils/progress.d.ts +59 -0
  183. package/dist/cli/utils/progress.js +206 -0
  184. package/dist/cli/utils/server-context.d.ts +10 -0
  185. package/dist/cli/utils/server-context.js +36 -0
  186. package/dist/cloud/auth.d.ts +144 -0
  187. package/dist/cloud/auth.js +374 -0
  188. package/dist/cloud/client.d.ts +24 -0
  189. package/dist/cloud/client.js +65 -0
  190. package/dist/cloud/http-client.d.ts +38 -0
  191. package/dist/cloud/http-client.js +215 -0
  192. package/dist/cloud/index.d.ts +23 -0
  193. package/dist/cloud/index.js +25 -0
  194. package/dist/cloud/mock-client.d.ts +107 -0
  195. package/dist/cloud/mock-client.js +545 -0
  196. package/dist/cloud/types.d.ts +515 -0
  197. package/dist/cloud/types.js +15 -0
  198. package/dist/config/defaults.d.ts +160 -0
  199. package/dist/config/defaults.js +169 -0
  200. package/dist/config/loader.d.ts +24 -0
  201. package/dist/config/loader.js +122 -0
  202. package/dist/config/template.d.ts +42 -0
  203. package/dist/config/template.js +647 -0
  204. package/dist/config/validator.d.ts +2112 -0
  205. package/dist/config/validator.js +658 -0
  206. package/dist/constants/cloud.d.ts +107 -0
  207. package/dist/constants/cloud.js +110 -0
  208. package/dist/constants/core.d.ts +521 -0
  209. package/dist/constants/core.js +556 -0
  210. package/dist/constants/testing.d.ts +1283 -0
  211. package/dist/constants/testing.js +1568 -0
  212. package/dist/constants.d.ts +10 -0
  213. package/dist/constants.js +10 -0
  214. package/dist/contract/index.d.ts +6 -0
  215. package/dist/contract/index.js +5 -0
  216. package/dist/contract/validator.d.ts +177 -0
  217. package/dist/contract/validator.js +574 -0
  218. package/dist/cost/index.d.ts +6 -0
  219. package/dist/cost/index.js +5 -0
  220. package/dist/cost/tracker.d.ts +134 -0
  221. package/dist/cost/tracker.js +313 -0
  222. package/dist/discovery/discovery.d.ts +16 -0
  223. package/dist/discovery/discovery.js +173 -0
  224. package/dist/discovery/types.d.ts +51 -0
  225. package/dist/discovery/types.js +2 -0
  226. package/dist/docs/agents.d.ts +3 -0
  227. package/dist/docs/agents.js +995 -0
  228. package/dist/docs/contract.d.ts +51 -0
  229. package/dist/docs/contract.js +1681 -0
  230. package/dist/docs/generator.d.ts +4 -0
  231. package/dist/docs/generator.js +4 -0
  232. package/dist/docs/html-reporter.d.ts +9 -0
  233. package/dist/docs/html-reporter.js +757 -0
  234. package/dist/docs/index.d.ts +10 -0
  235. package/dist/docs/index.js +11 -0
  236. package/dist/docs/junit-reporter.d.ts +18 -0
  237. package/dist/docs/junit-reporter.js +210 -0
  238. package/dist/docs/report.d.ts +14 -0
  239. package/dist/docs/report.js +44 -0
  240. package/dist/docs/sarif-reporter.d.ts +19 -0
  241. package/dist/docs/sarif-reporter.js +335 -0
  242. package/dist/docs/shared.d.ts +35 -0
  243. package/dist/docs/shared.js +162 -0
  244. package/dist/docs/templates.d.ts +12 -0
  245. package/dist/docs/templates.js +76 -0
  246. package/dist/errors/index.d.ts +6 -0
  247. package/dist/errors/index.js +6 -0
  248. package/dist/errors/retry.d.ts +92 -0
  249. package/dist/errors/retry.js +323 -0
  250. package/dist/errors/types.d.ts +321 -0
  251. package/dist/errors/types.js +584 -0
  252. package/dist/index.d.ts +32 -0
  253. package/dist/index.js +32 -0
  254. package/dist/interview/dependency-resolver.d.ts +11 -0
  255. package/dist/interview/dependency-resolver.js +32 -0
  256. package/dist/interview/interviewer.d.ts +232 -0
  257. package/dist/interview/interviewer.js +1939 -0
  258. package/dist/interview/mock-response-generator.d.ts +7 -0
  259. package/dist/interview/mock-response-generator.js +102 -0
  260. package/dist/interview/orchestrator.d.ts +237 -0
  261. package/dist/interview/orchestrator.js +1296 -0
  262. package/dist/interview/rate-limiter.d.ts +15 -0
  263. package/dist/interview/rate-limiter.js +55 -0
  264. package/dist/interview/response-validator.d.ts +10 -0
  265. package/dist/interview/response-validator.js +132 -0
  266. package/dist/interview/schema-inferrer.d.ts +8 -0
  267. package/dist/interview/schema-inferrer.js +71 -0
  268. package/dist/interview/schema-test-generator.d.ts +71 -0
  269. package/dist/interview/schema-test-generator.js +834 -0
  270. package/dist/interview/smart-value-generator.d.ts +155 -0
  271. package/dist/interview/smart-value-generator.js +554 -0
  272. package/dist/interview/stateful-test-runner.d.ts +19 -0
  273. package/dist/interview/stateful-test-runner.js +106 -0
  274. package/dist/interview/types.d.ts +561 -0
  275. package/dist/interview/types.js +2 -0
  276. package/dist/llm/anthropic.d.ts +41 -0
  277. package/dist/llm/anthropic.js +355 -0
  278. package/dist/llm/client.d.ts +123 -0
  279. package/dist/llm/client.js +42 -0
  280. package/dist/llm/factory.d.ts +38 -0
  281. package/dist/llm/factory.js +145 -0
  282. package/dist/llm/fallback.d.ts +140 -0
  283. package/dist/llm/fallback.js +379 -0
  284. package/dist/llm/index.d.ts +18 -0
  285. package/dist/llm/index.js +15 -0
  286. package/dist/llm/ollama.d.ts +37 -0
  287. package/dist/llm/ollama.js +330 -0
  288. package/dist/llm/openai.d.ts +25 -0
  289. package/dist/llm/openai.js +320 -0
  290. package/dist/llm/token-budget.d.ts +161 -0
  291. package/dist/llm/token-budget.js +395 -0
  292. package/dist/logging/logger.d.ts +70 -0
  293. package/dist/logging/logger.js +130 -0
  294. package/dist/metrics/collector.d.ts +106 -0
  295. package/dist/metrics/collector.js +547 -0
  296. package/dist/metrics/index.d.ts +7 -0
  297. package/dist/metrics/index.js +7 -0
  298. package/dist/metrics/prometheus.d.ts +20 -0
  299. package/dist/metrics/prometheus.js +241 -0
  300. package/dist/metrics/types.d.ts +209 -0
  301. package/dist/metrics/types.js +5 -0
  302. package/dist/persona/builtins.d.ts +54 -0
  303. package/dist/persona/builtins.js +219 -0
  304. package/dist/persona/index.d.ts +8 -0
  305. package/dist/persona/index.js +8 -0
  306. package/dist/persona/loader.d.ts +30 -0
  307. package/dist/persona/loader.js +190 -0
  308. package/dist/persona/types.d.ts +144 -0
  309. package/dist/persona/types.js +5 -0
  310. package/dist/persona/validation.d.ts +94 -0
  311. package/dist/persona/validation.js +332 -0
  312. package/dist/prompts/index.d.ts +5 -0
  313. package/dist/prompts/index.js +5 -0
  314. package/dist/prompts/templates.d.ts +180 -0
  315. package/dist/prompts/templates.js +431 -0
  316. package/dist/registry/client.d.ts +49 -0
  317. package/dist/registry/client.js +191 -0
  318. package/dist/registry/index.d.ts +7 -0
  319. package/dist/registry/index.js +6 -0
  320. package/dist/registry/types.d.ts +140 -0
  321. package/dist/registry/types.js +6 -0
  322. package/dist/scenarios/evaluator.d.ts +43 -0
  323. package/dist/scenarios/evaluator.js +206 -0
  324. package/dist/scenarios/index.d.ts +10 -0
  325. package/dist/scenarios/index.js +9 -0
  326. package/dist/scenarios/loader.d.ts +20 -0
  327. package/dist/scenarios/loader.js +285 -0
  328. package/dist/scenarios/types.d.ts +153 -0
  329. package/dist/scenarios/types.js +8 -0
  330. package/dist/security/index.d.ts +17 -0
  331. package/dist/security/index.js +18 -0
  332. package/dist/security/payloads.d.ts +61 -0
  333. package/dist/security/payloads.js +268 -0
  334. package/dist/security/security-tester.d.ts +42 -0
  335. package/dist/security/security-tester.js +582 -0
  336. package/dist/security/types.d.ts +166 -0
  337. package/dist/security/types.js +8 -0
  338. package/dist/transport/base-transport.d.ts +59 -0
  339. package/dist/transport/base-transport.js +38 -0
  340. package/dist/transport/http-transport.d.ts +67 -0
  341. package/dist/transport/http-transport.js +238 -0
  342. package/dist/transport/mcp-client.d.ts +141 -0
  343. package/dist/transport/mcp-client.js +496 -0
  344. package/dist/transport/sse-transport.d.ts +88 -0
  345. package/dist/transport/sse-transport.js +316 -0
  346. package/dist/transport/stdio-transport.d.ts +43 -0
  347. package/dist/transport/stdio-transport.js +238 -0
  348. package/dist/transport/types.d.ts +125 -0
  349. package/dist/transport/types.js +16 -0
  350. package/dist/utils/concurrency.d.ts +123 -0
  351. package/dist/utils/concurrency.js +213 -0
  352. package/dist/utils/formatters.d.ts +16 -0
  353. package/dist/utils/formatters.js +37 -0
  354. package/dist/utils/index.d.ts +8 -0
  355. package/dist/utils/index.js +8 -0
  356. package/dist/utils/jsonpath.d.ts +87 -0
  357. package/dist/utils/jsonpath.js +326 -0
  358. package/dist/utils/markdown.d.ts +113 -0
  359. package/dist/utils/markdown.js +265 -0
  360. package/dist/utils/network.d.ts +14 -0
  361. package/dist/utils/network.js +17 -0
  362. package/dist/utils/sanitize.d.ts +92 -0
  363. package/dist/utils/sanitize.js +191 -0
  364. package/dist/utils/semantic.d.ts +194 -0
  365. package/dist/utils/semantic.js +1051 -0
  366. package/dist/utils/smart-truncate.d.ts +94 -0
  367. package/dist/utils/smart-truncate.js +361 -0
  368. package/dist/utils/timeout.d.ts +153 -0
  369. package/dist/utils/timeout.js +205 -0
  370. package/dist/utils/yaml-parser.d.ts +58 -0
  371. package/dist/utils/yaml-parser.js +86 -0
  372. package/dist/validation/index.d.ts +32 -0
  373. package/dist/validation/index.js +32 -0
  374. package/dist/validation/semantic-test-generator.d.ts +50 -0
  375. package/dist/validation/semantic-test-generator.js +176 -0
  376. package/dist/validation/semantic-types.d.ts +66 -0
  377. package/dist/validation/semantic-types.js +94 -0
  378. package/dist/validation/semantic-validator.d.ts +38 -0
  379. package/dist/validation/semantic-validator.js +340 -0
  380. package/dist/verification/index.d.ts +6 -0
  381. package/dist/verification/index.js +5 -0
  382. package/dist/verification/types.d.ts +133 -0
  383. package/dist/verification/types.js +5 -0
  384. package/dist/verification/verifier.d.ts +30 -0
  385. package/dist/verification/verifier.js +309 -0
  386. package/dist/version.d.ts +19 -0
  387. package/dist/version.js +48 -0
  388. package/dist/workflow/auto-generator.d.ts +27 -0
  389. package/dist/workflow/auto-generator.js +513 -0
  390. package/dist/workflow/discovery.d.ts +40 -0
  391. package/dist/workflow/discovery.js +195 -0
  392. package/dist/workflow/executor.d.ts +82 -0
  393. package/dist/workflow/executor.js +611 -0
  394. package/dist/workflow/index.d.ts +10 -0
  395. package/dist/workflow/index.js +10 -0
  396. package/dist/workflow/loader.d.ts +24 -0
  397. package/dist/workflow/loader.js +194 -0
  398. package/dist/workflow/state-tracker.d.ts +98 -0
  399. package/dist/workflow/state-tracker.js +424 -0
  400. package/dist/workflow/types.d.ts +337 -0
  401. package/dist/workflow/types.js +5 -0
  402. package/package.json +94 -0
  403. package/schemas/bellwether-check.schema.json +651 -0
package/CHANGELOG.md ADDED
@@ -0,0 +1,291 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ ## [0.10.0] - 2026-01-24
6
+
7
+ ### Features
8
+
9
+ - **Smart test value generation**: New intelligent value generator that produces semantically valid test inputs by:
10
+ - Recognizing patterns in field names (dates, emails, URLs, phone numbers, IDs, etc.)
11
+ - Respecting JSON Schema `format` fields
12
+ - Generating syntactically correct values more likely to be accepted by real tools
13
+ - **Stateful testing**: Tests can now share outputs between tool calls
14
+ - Tool responses are parsed and stored in a shared state map
15
+ - Subsequent tool calls can inject values from prior outputs (e.g., IDs created by one tool used by another)
16
+ - Configurable via `check.statefulTesting.enabled` and `check.statefulTesting.shareOutputsBetweenTools`
17
+ - Maximum chain length configurable via `check.statefulTesting.maxChainLength`
18
+ - **Rate limiting**: Token bucket rate limiter for tool calls
19
+ - Configurable requests per second and burst limits
20
+ - Exponential or linear backoff strategies
21
+ - Automatic retry on rate limit errors
22
+ - Enabled via `check.rateLimit.enabled` in config
23
+ - **Response assertions**: Semantic validation of tool responses
24
+ - Automatic schema inference from successful responses
25
+ - Configurable strict mode for assertion failures
26
+ - Assertion results tracked per interaction and aggregated per tool
27
+ - Enabled via `check.assertions.enabled` in config
28
+ - **External service detection enhancements**: Improved detection with confidence levels
29
+ - `confirmed`: Error messages from the service were observed
30
+ - `likely`: Strong evidence from tool name/description patterns
31
+ - `possible`: Weak evidence, partial matches
32
+ - Evidence breakdown for transparency (fromErrorMessage, fromToolName, fromDescription)
33
+ - Service configuration status tracking (configured, sandboxAvailable, mockAvailable)
34
+ - **Warmup runs**: Skip initial runs before timing samples to account for cold starts
35
+ - Configurable 0-5 warmup runs via `check.warmupRuns`
36
+ - **Config validation warnings**: Non-blocking warnings for configuration issues
37
+ - Displayed before check runs without failing
38
+ - Helps catch common misconfigurations early
39
+ - **Tool-by-tool progress reporting**: Live progress shows reliability and timing per tool as they complete
40
+
41
+ ### Enhanced CONTRACT.md Output
42
+
43
+ - **Quick Reference table enhancements**: Now includes P50 latency, confidence indicators
44
+ - **Metrics legend section**: Explains confidence levels and reliability calculations
45
+ - **Validation testing section**: Separate metrics for validation tests vs happy-path tests
46
+ - **Issues detected section**: Aggregated summary of detected issues across tools
47
+ - **Stateful testing section**: Shows state sharing relationships between tools
48
+ - **External service configuration section**: Documents detected external services and their status
49
+ - **Response assertions section**: Documents inferred schemas and assertion rules
50
+ - **Skipped tool handling**: Tools skipped due to missing external service config are documented
51
+
52
+ ### Configuration Changes
53
+
54
+ - **New config options**:
55
+ - `check.warmupRuns` - Number of warmup runs before timing (default: 0)
56
+ - `check.smartTestValues` - Enable smart value generation (default: true)
57
+ - `check.statefulTesting.*` - Stateful testing configuration
58
+ - `check.externalServices.*` - External service handling (skip/mock/test modes)
59
+ - `check.assertions.*` - Response assertion configuration
60
+ - `check.rateLimit.*` - Rate limiting configuration
61
+ - `check.metrics.countValidationAsSuccess` - Count validation rejections as success (default: true)
62
+ - `check.metrics.separateValidationMetrics` - Separate validation from happy-path metrics (default: true)
63
+ - `baseline.savePath` - Separate path for saving baselines (default: `.bellwether/bellwether-baseline.json`)
64
+ - **Changed defaults**:
65
+ - `check.sampling.minSamples`: 3 → 10 (more samples for statistical confidence)
66
+ - `check.sampling.targetConfidence`: 'medium' → 'low' (match the lower sample count)
67
+ - `workflows.autoGenerate`: true → false (explicit opt-in for workflow discovery)
68
+ - `workflows.requireSuccessfulDependencies`: new option (default: true)
69
+ - **Parallel testing + stateful testing**: Parallel mode automatically disabled when stateful testing is enabled (state sharing requires sequential execution)
70
+
71
+ ### GitHub Action
72
+
73
+ - **Simplified inputs**: Removed CLI-flag-style inputs that are now config-only:
74
+ - Removed: `fail-on-drift`, `parallel`, `parallel-workers`, `incremental`, `incremental-cache-hours`, `performance-threshold`, `security`
75
+ - These are now configured in `bellwether.yaml` only
76
+ - **Improved config path handling**: Action now properly resolves config paths and copies existing configs when needed
77
+ - **New exit code**: Added exit code 5 for low-confidence results
78
+ - **Updated output descriptions**: Clarified severity levels and exit codes
79
+
80
+ ### Documentation
81
+
82
+ - **README updates**: Added documentation for previously undocumented commands:
83
+ - `auth add <provider>` and `auth remove <provider>` for managing LLM API keys
84
+ - `baseline accept` command for accepting drift as intentional
85
+ - `contract show` command for displaying generated CONTRACT.md
86
+ - `teams current` command for showing active team
87
+ - **Website documentation**: Updated guides for configuration, CI/CD, workflows, and output formats
88
+
89
+ ### Fixes
90
+
91
+ - **Fixed `-p` flag conflict**: Removed `-p` short flag from `init --preset` to avoid conflict with `upload -p/--project`. Use `--preset` for init command
92
+ - **Fixed stdio transport write error handling**: Added error handling for `output.write()` in stdio transport to properly emit errors when subprocess pipe breaks (EPIPE)
93
+ - **Fixed watch command signal handler cleanup**: Signal handlers (SIGINT/SIGTERM) are now properly removed on cleanup to prevent handler accumulation
94
+ - **Added debug logging to silent catches**: Silent catch blocks in Ollama client now log debug messages for better troubleshooting
95
+ - **Fixed minSamples override**: User's `minSamples` config is now respected exactly instead of being overridden by `targetConfidence` minimum
96
+
97
+ ## [0.9.0] - 2026-01-23
98
+
99
+ ### Documentation
100
+
101
+ - **Full documentation alignment**: Updated CLI docs, website guides, and README to match the config-first workflow and current command structure
102
+ - **New CLI references**: Added documentation for `bellwether golden` and `bellwether contract`
103
+ - **Cloud + registry updates**: Clarified config requirements, defaults, and registry overrides across cloud/registry pages
104
+
105
+ ### GitHub Action
106
+
107
+ - **Action docs refresh**: Updated inputs, examples, and output filenames to match current action behavior
108
+ - **Config-first guidance**: Clarified config requirements and output directory expectations
109
+
110
+ ### Developer Experience
111
+
112
+ - **Comprehensive .env example**: Added registry URL override and updated guidance for environment configuration
113
+
114
+ ## [0.8.1] - 2026-01-22
115
+
116
+ ### Features
117
+
118
+ - **Expanded credential resolution**: API keys can now be loaded from `.env` files
119
+ - Project `.env` file (`./.env` in current working directory)
120
+ - Global `.env` file (`~/.bellwether/.env`)
121
+ - Resolution order: config → custom env var → standard env var → project .env → global .env → keychain
122
+ - `bellwether auth status` now shows which `.env` file provided the key
123
+
124
+ ### Fixes
125
+
126
+ - **Fixed check mode LLM dependency**: Check mode no longer creates an LLM orchestrator, removing unnecessary dependency on LLM configuration for schema-only validation
127
+ - **Fixed parallel tool testing config**: The `parallelTools` config flag is now properly respected; when disabled, uses sequential execution (concurrency=1)
128
+ - **Fixed `baselineExists()` for directories**: Now correctly returns `false` for directories instead of `true`
129
+ - **Fixed stdio transport error handling**: Invalid JSON in newline-delimited mode now emits an error event for consistent behavior with Content-Length mode
130
+ - **Fixed baseline-accept command tests**: Resolved 13 failing tests in `baseline-accept.test.ts`
131
+ - Fixed schema hash mismatches by using computed `'empty'` hash for tools with empty interactions
132
+ - Fixed integrity hash verification by computing valid hashes with `recalculateIntegrityHash()`
133
+ - Fixed property order in test baselines to match Zod schema order (required for deterministic JSON serialization)
134
+ - Fixed report path from `.bellwether/bellwether-check.json` to `bellwether-check.json`
135
+ - Added missing `responseFingerprint` field to baseline fixtures to match `createBaseline()` output
136
+
137
+ ## [0.8.0] - 2026-01-22
138
+
139
+ ### Features
140
+
141
+ - **Granular exit codes**: Check command now returns semantic exit codes for CI/CD:
142
+ - `0` = Clean (no changes)
143
+ - `1` = Info-level changes (non-breaking)
144
+ - `2` = Warning-level changes
145
+ - `3` = Breaking changes
146
+ - `4` = Runtime error
147
+ - **JUnit/SARIF output formats**: New `--format` option supports `junit` and `sarif` for CI integration
148
+ - JUnit XML for Jenkins, GitLab CI, CircleCI test reporting
149
+ - SARIF 2.1.0 for GitHub Code Scanning with rule IDs BWH001-BWH004
150
+ - **Configurable severity thresholds**: New `baseline.severity` config section
151
+ - `minimumSeverity` - Filter changes below a severity level
152
+ - `failOnSeverity` - CI failure threshold
153
+ - `suppressWarnings` - Hide warning-level changes
154
+ - `aspectOverrides` - Custom severity per change aspect
155
+ - **Parallel tool testing**: New `--parallel` and `--parallel-workers` options for faster checks
156
+ - Tests tools concurrently with configurable worker count (1-10)
157
+ - Uses mutex for MCP client serialization
158
+ - **Incremental checking**: New `--incremental` option to only test tools with changed schemas
159
+ - Compares current schemas against baseline
160
+ - Reuses cached fingerprints for unchanged tools
161
+ - Significantly faster for large servers
162
+ - **Performance regression detection**: Track and compare tool latency
163
+ - Captures P50/P95 latency and success rate per tool
164
+ - New `--performance-threshold` option (default: 10%)
165
+ - Flags tools with latency regression exceeding threshold
166
+ - **Enhanced CONTRACT.md**: Richer generated documentation
167
+ - Quick reference table with success rates
168
+ - Performance baseline section with latency metrics
169
+ - Example usage from successful interactions (up to 2 per tool)
170
+ - Categorized error patterns (Permission, NotFound, Validation, Timeout, Network)
171
+ - Error summary section aggregating patterns across tools
172
+ - **Detailed schema diff**: Property-level schema change detection
173
+ - Wired existing `compareSchemas()` into baseline comparison
174
+ - Shows specific property additions, removals, and type changes
175
+ - **Edge case handling**: Improved robustness for enterprise workloads
176
+ - Circular reference detection in schemas
177
+ - Unicode normalization for property names
178
+ - Binary content detection
179
+ - Payload size limits (1MB schema, 10MB baseline, 5MB response)
180
+
181
+ ### Configuration
182
+
183
+ - New `check:` section in `bellwether.yaml`:
184
+ ```yaml
185
+ check:
186
+ incremental: false
187
+ incrementalCacheHours: 168
188
+ parallel: false
189
+ parallelWorkers: 4
190
+ performanceThreshold: 10
191
+ ```
192
+ - New `baseline.severity:` section for configurable thresholds
193
+ - CI preset now enables parallel testing by default
194
+
195
+ ### CLI Options
196
+
197
+ - `--format <fmt>` - Output format: text, json, compact, github, markdown, junit, sarif
198
+ - `--parallel` - Enable parallel tool testing
199
+ - `--parallel-workers <n>` - Number of concurrent workers (1-10)
200
+ - `--incremental` - Only test tools with changed schemas
201
+ - `--incremental-cache-hours <hours>` - Cache validity for incremental checking
202
+ - `--performance-threshold <n>` - Performance regression threshold (%)
203
+ - `--min-severity <level>` - Minimum severity to report
204
+ - `--fail-on-severity <level>` - CI failure threshold
205
+
206
+ ### Documentation
207
+
208
+ - Updated all CLI documentation with new options
209
+ - Added output formats guide with JUnit/SARIF examples
210
+ - Added parallel and incremental checking documentation
211
+ - Updated CI/CD guide with new exit codes and severity thresholds
212
+ - Updated baselines documentation with performance metrics
213
+ - Updated GitHub Action documentation with new inputs/outputs
214
+
215
+ ## [0.7.1] - 2026-01-22
216
+
217
+ ### Improvements
218
+
219
+ - **Reduced npm package size**: Excluded source maps from published package (682 kB → 445 kB, 35% smaller)
220
+ - **Added CHANGELOG.md to package**: Now included in npm package for version history visibility
221
+
222
+ ### Fixes
223
+
224
+ - Replaced `console.warn()` with structured logger in baseline loading for consistent log level filtering
225
+ - Removed unused function parameters in `cloud/client.ts` and `baseline/deprecation-tracker.ts`
226
+
227
+ ## [0.7.0] - 2026-01-21
228
+
229
+ ### Features
230
+
231
+ - **Drift acceptance workflow**: New `baseline accept` command to accept detected drift as intentional with full audit trail
232
+ - Records who, when, why, and what changes were accepted
233
+ - `--reason` option to document why drift was accepted
234
+ - `--accepted-by` option to record who accepted (for CI/CD bots)
235
+ - `--dry-run` option to preview acceptance without writing
236
+ - `--force` flag required for accepting breaking changes
237
+ - **Accept drift during check**: New `--accept-drift` and `--accept-reason` flags for the check command to accept drift in one step
238
+ - **Acceptance metadata in baselines**: Baselines now include optional `acceptance` field with full audit trail for compliance and team visibility
239
+
240
+ ### Fixes
241
+
242
+ - Fixed Date deserialization for `acceptance.acceptedAt` when loading baselines from JSON
243
+
244
+ ### Documentation
245
+
246
+ - Added `baseline accept` subcommand documentation
247
+ - Updated `check` command docs with `--accept-drift` and `--accept-reason` options
248
+ - Added acceptance workflow options to CI/CD integration guide
249
+
250
+ ## [0.6.1] - 2026-01-21
251
+
252
+ ### Features
253
+
254
+ - **Verify command cloud submission**: Added `--project` option to submit verification results directly to Bellwether Cloud
255
+ - **Progress display**: Added progress bar for verification runs showing interview progress
256
+
257
+ ### Changes
258
+
259
+ - **Default LLM models updated**: Changed OpenAI default to `gpt-4.1-nano` (budget-friendly, non-reasoning) and Ollama default to `qwen3:8b`
260
+ - **Preset providers updated**: Security and thorough presets now use Anthropic provider by default
261
+ - **Verify command**: Now requires config file; added `--config` option for explicit config path
262
+
263
+ ### Documentation
264
+
265
+ - Added `cloud/diff.md` documentation for comparing baseline versions
266
+ - Updated documentation across all CLI commands with improved examples
267
+ - Enhanced verify command documentation with cloud submission examples
268
+
269
+ ### Fixes
270
+
271
+ - Fixed test mocks to match updated default models and configurations
272
+
273
+ ## [0.6.0] - 2026-01-20
274
+
275
+ Initial public beta release of Bellwether CLI.
276
+
277
+ ### Features
278
+
279
+ - **Two testing modes**: `bellwether check` for free, deterministic schema validation and `bellwether explore` for LLM-powered behavioral exploration
280
+ - **Check mode**: Zero-cost structural drift detection without LLM dependencies, generates `CONTRACT.md`
281
+ - **Explore mode**: Multi-persona exploration with OpenAI, Anthropic, or Ollama, generates `AGENTS.md`
282
+ - **Four built-in personas**: Technical Writer, Security Tester, QA Engineer, and Novice User for comprehensive coverage
283
+ - **Baseline management**: Save, compare, and track schema changes over time with `bellwether baseline` commands
284
+ - **Drift detection**: Catch breaking changes before production with configurable severity levels
285
+ - **Workflow testing**: Define multi-step tool sequences with assertions and argument mapping
286
+ - **Custom scenarios**: YAML-based test definitions for repeatable validation
287
+ - **Watch mode**: Continuous testing during development with `bellwether watch`
288
+ - **MCP Registry integration**: Search and discover MCP servers with `bellwether registry`
289
+ - **Cloud integration**: Team collaboration, history tracking, and CI/CD support via Bellwether Cloud
290
+ - **Secure credential storage**: System keychain integration for API keys with `bellwether auth`
291
+ - **Multiple transports**: Support for stdio, SSE, and streamable-http MCP connections
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Dotset Labs LLC
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.