@dotsetlabs/bellwether 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (403) hide show
  1. package/CHANGELOG.md +291 -0
  2. package/LICENSE +21 -0
  3. package/README.md +739 -0
  4. package/dist/auth/credentials.d.ts +64 -0
  5. package/dist/auth/credentials.js +218 -0
  6. package/dist/auth/index.d.ts +6 -0
  7. package/dist/auth/index.js +6 -0
  8. package/dist/auth/keychain.d.ts +64 -0
  9. package/dist/auth/keychain.js +268 -0
  10. package/dist/baseline/ab-testing.d.ts +80 -0
  11. package/dist/baseline/ab-testing.js +236 -0
  12. package/dist/baseline/ai-compatibility-scorer.d.ts +95 -0
  13. package/dist/baseline/ai-compatibility-scorer.js +606 -0
  14. package/dist/baseline/calibration.d.ts +77 -0
  15. package/dist/baseline/calibration.js +136 -0
  16. package/dist/baseline/category-matching.d.ts +85 -0
  17. package/dist/baseline/category-matching.js +289 -0
  18. package/dist/baseline/change-impact-analyzer.d.ts +98 -0
  19. package/dist/baseline/change-impact-analyzer.js +592 -0
  20. package/dist/baseline/comparator.d.ts +64 -0
  21. package/dist/baseline/comparator.js +916 -0
  22. package/dist/baseline/confidence.d.ts +55 -0
  23. package/dist/baseline/confidence.js +122 -0
  24. package/dist/baseline/converter.d.ts +61 -0
  25. package/dist/baseline/converter.js +585 -0
  26. package/dist/baseline/dependency-analyzer.d.ts +89 -0
  27. package/dist/baseline/dependency-analyzer.js +567 -0
  28. package/dist/baseline/deprecation-tracker.d.ts +133 -0
  29. package/dist/baseline/deprecation-tracker.js +322 -0
  30. package/dist/baseline/diff.d.ts +55 -0
  31. package/dist/baseline/diff.js +1584 -0
  32. package/dist/baseline/documentation-scorer.d.ts +205 -0
  33. package/dist/baseline/documentation-scorer.js +466 -0
  34. package/dist/baseline/embeddings.d.ts +118 -0
  35. package/dist/baseline/embeddings.js +251 -0
  36. package/dist/baseline/error-analyzer.d.ts +198 -0
  37. package/dist/baseline/error-analyzer.js +721 -0
  38. package/dist/baseline/evaluation/evaluator.d.ts +42 -0
  39. package/dist/baseline/evaluation/evaluator.js +323 -0
  40. package/dist/baseline/evaluation/expanded-dataset.d.ts +45 -0
  41. package/dist/baseline/evaluation/expanded-dataset.js +1164 -0
  42. package/dist/baseline/evaluation/golden-dataset.d.ts +58 -0
  43. package/dist/baseline/evaluation/golden-dataset.js +717 -0
  44. package/dist/baseline/evaluation/index.d.ts +15 -0
  45. package/dist/baseline/evaluation/index.js +15 -0
  46. package/dist/baseline/evaluation/types.d.ts +186 -0
  47. package/dist/baseline/evaluation/types.js +8 -0
  48. package/dist/baseline/external-dependency-detector.d.ts +181 -0
  49. package/dist/baseline/external-dependency-detector.js +524 -0
  50. package/dist/baseline/golden-output.d.ts +162 -0
  51. package/dist/baseline/golden-output.js +636 -0
  52. package/dist/baseline/health-scorer.d.ts +174 -0
  53. package/dist/baseline/health-scorer.js +451 -0
  54. package/dist/baseline/incremental-checker.d.ts +97 -0
  55. package/dist/baseline/incremental-checker.js +174 -0
  56. package/dist/baseline/index.d.ts +31 -0
  57. package/dist/baseline/index.js +42 -0
  58. package/dist/baseline/migration-generator.d.ts +137 -0
  59. package/dist/baseline/migration-generator.js +554 -0
  60. package/dist/baseline/migrations.d.ts +60 -0
  61. package/dist/baseline/migrations.js +197 -0
  62. package/dist/baseline/performance-tracker.d.ts +214 -0
  63. package/dist/baseline/performance-tracker.js +577 -0
  64. package/dist/baseline/pr-comment-generator.d.ts +117 -0
  65. package/dist/baseline/pr-comment-generator.js +546 -0
  66. package/dist/baseline/response-fingerprint.d.ts +127 -0
  67. package/dist/baseline/response-fingerprint.js +728 -0
  68. package/dist/baseline/response-schema-tracker.d.ts +129 -0
  69. package/dist/baseline/response-schema-tracker.js +420 -0
  70. package/dist/baseline/risk-scorer.d.ts +54 -0
  71. package/dist/baseline/risk-scorer.js +434 -0
  72. package/dist/baseline/saver.d.ts +89 -0
  73. package/dist/baseline/saver.js +554 -0
  74. package/dist/baseline/scenario-generator.d.ts +151 -0
  75. package/dist/baseline/scenario-generator.js +905 -0
  76. package/dist/baseline/schema-compare.d.ts +86 -0
  77. package/dist/baseline/schema-compare.js +557 -0
  78. package/dist/baseline/schema-evolution.d.ts +189 -0
  79. package/dist/baseline/schema-evolution.js +467 -0
  80. package/dist/baseline/semantic.d.ts +203 -0
  81. package/dist/baseline/semantic.js +908 -0
  82. package/dist/baseline/synonyms.d.ts +60 -0
  83. package/dist/baseline/synonyms.js +386 -0
  84. package/dist/baseline/telemetry.d.ts +165 -0
  85. package/dist/baseline/telemetry.js +294 -0
  86. package/dist/baseline/test-pruner.d.ts +120 -0
  87. package/dist/baseline/test-pruner.js +387 -0
  88. package/dist/baseline/types.d.ts +449 -0
  89. package/dist/baseline/types.js +5 -0
  90. package/dist/baseline/version.d.ts +138 -0
  91. package/dist/baseline/version.js +206 -0
  92. package/dist/cache/index.d.ts +5 -0
  93. package/dist/cache/index.js +5 -0
  94. package/dist/cache/response-cache.d.ts +151 -0
  95. package/dist/cache/response-cache.js +287 -0
  96. package/dist/ci/index.d.ts +60 -0
  97. package/dist/ci/index.js +342 -0
  98. package/dist/cli/commands/auth.d.ts +12 -0
  99. package/dist/cli/commands/auth.js +352 -0
  100. package/dist/cli/commands/badge.d.ts +3 -0
  101. package/dist/cli/commands/badge.js +74 -0
  102. package/dist/cli/commands/baseline-accept.d.ts +15 -0
  103. package/dist/cli/commands/baseline-accept.js +178 -0
  104. package/dist/cli/commands/baseline-migrate.d.ts +12 -0
  105. package/dist/cli/commands/baseline-migrate.js +164 -0
  106. package/dist/cli/commands/baseline.d.ts +14 -0
  107. package/dist/cli/commands/baseline.js +449 -0
  108. package/dist/cli/commands/beta.d.ts +10 -0
  109. package/dist/cli/commands/beta.js +231 -0
  110. package/dist/cli/commands/check.d.ts +11 -0
  111. package/dist/cli/commands/check.js +820 -0
  112. package/dist/cli/commands/cloud/badge.d.ts +3 -0
  113. package/dist/cli/commands/cloud/badge.js +74 -0
  114. package/dist/cli/commands/cloud/diff.d.ts +6 -0
  115. package/dist/cli/commands/cloud/diff.js +79 -0
  116. package/dist/cli/commands/cloud/history.d.ts +6 -0
  117. package/dist/cli/commands/cloud/history.js +102 -0
  118. package/dist/cli/commands/cloud/link.d.ts +9 -0
  119. package/dist/cli/commands/cloud/link.js +119 -0
  120. package/dist/cli/commands/cloud/login.d.ts +7 -0
  121. package/dist/cli/commands/cloud/login.js +499 -0
  122. package/dist/cli/commands/cloud/projects.d.ts +6 -0
  123. package/dist/cli/commands/cloud/projects.js +44 -0
  124. package/dist/cli/commands/cloud/shared.d.ts +7 -0
  125. package/dist/cli/commands/cloud/shared.js +42 -0
  126. package/dist/cli/commands/cloud/teams.d.ts +8 -0
  127. package/dist/cli/commands/cloud/teams.js +169 -0
  128. package/dist/cli/commands/cloud/upload.d.ts +8 -0
  129. package/dist/cli/commands/cloud/upload.js +181 -0
  130. package/dist/cli/commands/contract.d.ts +11 -0
  131. package/dist/cli/commands/contract.js +280 -0
  132. package/dist/cli/commands/discover.d.ts +3 -0
  133. package/dist/cli/commands/discover.js +82 -0
  134. package/dist/cli/commands/eval.d.ts +9 -0
  135. package/dist/cli/commands/eval.js +187 -0
  136. package/dist/cli/commands/explore.d.ts +11 -0
  137. package/dist/cli/commands/explore.js +437 -0
  138. package/dist/cli/commands/feedback.d.ts +9 -0
  139. package/dist/cli/commands/feedback.js +174 -0
  140. package/dist/cli/commands/golden.d.ts +12 -0
  141. package/dist/cli/commands/golden.js +407 -0
  142. package/dist/cli/commands/history.d.ts +10 -0
  143. package/dist/cli/commands/history.js +202 -0
  144. package/dist/cli/commands/init.d.ts +9 -0
  145. package/dist/cli/commands/init.js +219 -0
  146. package/dist/cli/commands/interview.d.ts +3 -0
  147. package/dist/cli/commands/interview.js +903 -0
  148. package/dist/cli/commands/link.d.ts +10 -0
  149. package/dist/cli/commands/link.js +169 -0
  150. package/dist/cli/commands/login.d.ts +7 -0
  151. package/dist/cli/commands/login.js +499 -0
  152. package/dist/cli/commands/preset.d.ts +33 -0
  153. package/dist/cli/commands/preset.js +297 -0
  154. package/dist/cli/commands/profile.d.ts +33 -0
  155. package/dist/cli/commands/profile.js +286 -0
  156. package/dist/cli/commands/registry.d.ts +11 -0
  157. package/dist/cli/commands/registry.js +146 -0
  158. package/dist/cli/commands/shared.d.ts +79 -0
  159. package/dist/cli/commands/shared.js +196 -0
  160. package/dist/cli/commands/teams.d.ts +8 -0
  161. package/dist/cli/commands/teams.js +169 -0
  162. package/dist/cli/commands/test.d.ts +9 -0
  163. package/dist/cli/commands/test.js +500 -0
  164. package/dist/cli/commands/upload.d.ts +8 -0
  165. package/dist/cli/commands/upload.js +223 -0
  166. package/dist/cli/commands/validate-config.d.ts +6 -0
  167. package/dist/cli/commands/validate-config.js +35 -0
  168. package/dist/cli/commands/verify.d.ts +11 -0
  169. package/dist/cli/commands/verify.js +283 -0
  170. package/dist/cli/commands/watch.d.ts +12 -0
  171. package/dist/cli/commands/watch.js +253 -0
  172. package/dist/cli/index.d.ts +3 -0
  173. package/dist/cli/index.js +178 -0
  174. package/dist/cli/interactive.d.ts +47 -0
  175. package/dist/cli/interactive.js +216 -0
  176. package/dist/cli/output/terminal-reporter.d.ts +19 -0
  177. package/dist/cli/output/terminal-reporter.js +104 -0
  178. package/dist/cli/output.d.ts +226 -0
  179. package/dist/cli/output.js +438 -0
  180. package/dist/cli/utils/env.d.ts +5 -0
  181. package/dist/cli/utils/env.js +14 -0
  182. package/dist/cli/utils/progress.d.ts +59 -0
  183. package/dist/cli/utils/progress.js +206 -0
  184. package/dist/cli/utils/server-context.d.ts +10 -0
  185. package/dist/cli/utils/server-context.js +36 -0
  186. package/dist/cloud/auth.d.ts +144 -0
  187. package/dist/cloud/auth.js +374 -0
  188. package/dist/cloud/client.d.ts +24 -0
  189. package/dist/cloud/client.js +65 -0
  190. package/dist/cloud/http-client.d.ts +38 -0
  191. package/dist/cloud/http-client.js +215 -0
  192. package/dist/cloud/index.d.ts +23 -0
  193. package/dist/cloud/index.js +25 -0
  194. package/dist/cloud/mock-client.d.ts +107 -0
  195. package/dist/cloud/mock-client.js +545 -0
  196. package/dist/cloud/types.d.ts +515 -0
  197. package/dist/cloud/types.js +15 -0
  198. package/dist/config/defaults.d.ts +160 -0
  199. package/dist/config/defaults.js +169 -0
  200. package/dist/config/loader.d.ts +24 -0
  201. package/dist/config/loader.js +122 -0
  202. package/dist/config/template.d.ts +42 -0
  203. package/dist/config/template.js +647 -0
  204. package/dist/config/validator.d.ts +2112 -0
  205. package/dist/config/validator.js +658 -0
  206. package/dist/constants/cloud.d.ts +107 -0
  207. package/dist/constants/cloud.js +110 -0
  208. package/dist/constants/core.d.ts +521 -0
  209. package/dist/constants/core.js +556 -0
  210. package/dist/constants/testing.d.ts +1283 -0
  211. package/dist/constants/testing.js +1568 -0
  212. package/dist/constants.d.ts +10 -0
  213. package/dist/constants.js +10 -0
  214. package/dist/contract/index.d.ts +6 -0
  215. package/dist/contract/index.js +5 -0
  216. package/dist/contract/validator.d.ts +177 -0
  217. package/dist/contract/validator.js +574 -0
  218. package/dist/cost/index.d.ts +6 -0
  219. package/dist/cost/index.js +5 -0
  220. package/dist/cost/tracker.d.ts +134 -0
  221. package/dist/cost/tracker.js +313 -0
  222. package/dist/discovery/discovery.d.ts +16 -0
  223. package/dist/discovery/discovery.js +173 -0
  224. package/dist/discovery/types.d.ts +51 -0
  225. package/dist/discovery/types.js +2 -0
  226. package/dist/docs/agents.d.ts +3 -0
  227. package/dist/docs/agents.js +995 -0
  228. package/dist/docs/contract.d.ts +51 -0
  229. package/dist/docs/contract.js +1681 -0
  230. package/dist/docs/generator.d.ts +4 -0
  231. package/dist/docs/generator.js +4 -0
  232. package/dist/docs/html-reporter.d.ts +9 -0
  233. package/dist/docs/html-reporter.js +757 -0
  234. package/dist/docs/index.d.ts +10 -0
  235. package/dist/docs/index.js +11 -0
  236. package/dist/docs/junit-reporter.d.ts +18 -0
  237. package/dist/docs/junit-reporter.js +210 -0
  238. package/dist/docs/report.d.ts +14 -0
  239. package/dist/docs/report.js +44 -0
  240. package/dist/docs/sarif-reporter.d.ts +19 -0
  241. package/dist/docs/sarif-reporter.js +335 -0
  242. package/dist/docs/shared.d.ts +35 -0
  243. package/dist/docs/shared.js +162 -0
  244. package/dist/docs/templates.d.ts +12 -0
  245. package/dist/docs/templates.js +76 -0
  246. package/dist/errors/index.d.ts +6 -0
  247. package/dist/errors/index.js +6 -0
  248. package/dist/errors/retry.d.ts +92 -0
  249. package/dist/errors/retry.js +323 -0
  250. package/dist/errors/types.d.ts +321 -0
  251. package/dist/errors/types.js +584 -0
  252. package/dist/index.d.ts +32 -0
  253. package/dist/index.js +32 -0
  254. package/dist/interview/dependency-resolver.d.ts +11 -0
  255. package/dist/interview/dependency-resolver.js +32 -0
  256. package/dist/interview/interviewer.d.ts +232 -0
  257. package/dist/interview/interviewer.js +1939 -0
  258. package/dist/interview/mock-response-generator.d.ts +7 -0
  259. package/dist/interview/mock-response-generator.js +102 -0
  260. package/dist/interview/orchestrator.d.ts +237 -0
  261. package/dist/interview/orchestrator.js +1296 -0
  262. package/dist/interview/rate-limiter.d.ts +15 -0
  263. package/dist/interview/rate-limiter.js +55 -0
  264. package/dist/interview/response-validator.d.ts +10 -0
  265. package/dist/interview/response-validator.js +132 -0
  266. package/dist/interview/schema-inferrer.d.ts +8 -0
  267. package/dist/interview/schema-inferrer.js +71 -0
  268. package/dist/interview/schema-test-generator.d.ts +71 -0
  269. package/dist/interview/schema-test-generator.js +834 -0
  270. package/dist/interview/smart-value-generator.d.ts +155 -0
  271. package/dist/interview/smart-value-generator.js +554 -0
  272. package/dist/interview/stateful-test-runner.d.ts +19 -0
  273. package/dist/interview/stateful-test-runner.js +106 -0
  274. package/dist/interview/types.d.ts +561 -0
  275. package/dist/interview/types.js +2 -0
  276. package/dist/llm/anthropic.d.ts +41 -0
  277. package/dist/llm/anthropic.js +355 -0
  278. package/dist/llm/client.d.ts +123 -0
  279. package/dist/llm/client.js +42 -0
  280. package/dist/llm/factory.d.ts +38 -0
  281. package/dist/llm/factory.js +145 -0
  282. package/dist/llm/fallback.d.ts +140 -0
  283. package/dist/llm/fallback.js +379 -0
  284. package/dist/llm/index.d.ts +18 -0
  285. package/dist/llm/index.js +15 -0
  286. package/dist/llm/ollama.d.ts +37 -0
  287. package/dist/llm/ollama.js +330 -0
  288. package/dist/llm/openai.d.ts +25 -0
  289. package/dist/llm/openai.js +320 -0
  290. package/dist/llm/token-budget.d.ts +161 -0
  291. package/dist/llm/token-budget.js +395 -0
  292. package/dist/logging/logger.d.ts +70 -0
  293. package/dist/logging/logger.js +130 -0
  294. package/dist/metrics/collector.d.ts +106 -0
  295. package/dist/metrics/collector.js +547 -0
  296. package/dist/metrics/index.d.ts +7 -0
  297. package/dist/metrics/index.js +7 -0
  298. package/dist/metrics/prometheus.d.ts +20 -0
  299. package/dist/metrics/prometheus.js +241 -0
  300. package/dist/metrics/types.d.ts +209 -0
  301. package/dist/metrics/types.js +5 -0
  302. package/dist/persona/builtins.d.ts +54 -0
  303. package/dist/persona/builtins.js +219 -0
  304. package/dist/persona/index.d.ts +8 -0
  305. package/dist/persona/index.js +8 -0
  306. package/dist/persona/loader.d.ts +30 -0
  307. package/dist/persona/loader.js +190 -0
  308. package/dist/persona/types.d.ts +144 -0
  309. package/dist/persona/types.js +5 -0
  310. package/dist/persona/validation.d.ts +94 -0
  311. package/dist/persona/validation.js +332 -0
  312. package/dist/prompts/index.d.ts +5 -0
  313. package/dist/prompts/index.js +5 -0
  314. package/dist/prompts/templates.d.ts +180 -0
  315. package/dist/prompts/templates.js +431 -0
  316. package/dist/registry/client.d.ts +49 -0
  317. package/dist/registry/client.js +191 -0
  318. package/dist/registry/index.d.ts +7 -0
  319. package/dist/registry/index.js +6 -0
  320. package/dist/registry/types.d.ts +140 -0
  321. package/dist/registry/types.js +6 -0
  322. package/dist/scenarios/evaluator.d.ts +43 -0
  323. package/dist/scenarios/evaluator.js +206 -0
  324. package/dist/scenarios/index.d.ts +10 -0
  325. package/dist/scenarios/index.js +9 -0
  326. package/dist/scenarios/loader.d.ts +20 -0
  327. package/dist/scenarios/loader.js +285 -0
  328. package/dist/scenarios/types.d.ts +153 -0
  329. package/dist/scenarios/types.js +8 -0
  330. package/dist/security/index.d.ts +17 -0
  331. package/dist/security/index.js +18 -0
  332. package/dist/security/payloads.d.ts +61 -0
  333. package/dist/security/payloads.js +268 -0
  334. package/dist/security/security-tester.d.ts +42 -0
  335. package/dist/security/security-tester.js +582 -0
  336. package/dist/security/types.d.ts +166 -0
  337. package/dist/security/types.js +8 -0
  338. package/dist/transport/base-transport.d.ts +59 -0
  339. package/dist/transport/base-transport.js +38 -0
  340. package/dist/transport/http-transport.d.ts +67 -0
  341. package/dist/transport/http-transport.js +238 -0
  342. package/dist/transport/mcp-client.d.ts +141 -0
  343. package/dist/transport/mcp-client.js +496 -0
  344. package/dist/transport/sse-transport.d.ts +88 -0
  345. package/dist/transport/sse-transport.js +316 -0
  346. package/dist/transport/stdio-transport.d.ts +43 -0
  347. package/dist/transport/stdio-transport.js +238 -0
  348. package/dist/transport/types.d.ts +125 -0
  349. package/dist/transport/types.js +16 -0
  350. package/dist/utils/concurrency.d.ts +123 -0
  351. package/dist/utils/concurrency.js +213 -0
  352. package/dist/utils/formatters.d.ts +16 -0
  353. package/dist/utils/formatters.js +37 -0
  354. package/dist/utils/index.d.ts +8 -0
  355. package/dist/utils/index.js +8 -0
  356. package/dist/utils/jsonpath.d.ts +87 -0
  357. package/dist/utils/jsonpath.js +326 -0
  358. package/dist/utils/markdown.d.ts +113 -0
  359. package/dist/utils/markdown.js +265 -0
  360. package/dist/utils/network.d.ts +14 -0
  361. package/dist/utils/network.js +17 -0
  362. package/dist/utils/sanitize.d.ts +92 -0
  363. package/dist/utils/sanitize.js +191 -0
  364. package/dist/utils/semantic.d.ts +194 -0
  365. package/dist/utils/semantic.js +1051 -0
  366. package/dist/utils/smart-truncate.d.ts +94 -0
  367. package/dist/utils/smart-truncate.js +361 -0
  368. package/dist/utils/timeout.d.ts +153 -0
  369. package/dist/utils/timeout.js +205 -0
  370. package/dist/utils/yaml-parser.d.ts +58 -0
  371. package/dist/utils/yaml-parser.js +86 -0
  372. package/dist/validation/index.d.ts +32 -0
  373. package/dist/validation/index.js +32 -0
  374. package/dist/validation/semantic-test-generator.d.ts +50 -0
  375. package/dist/validation/semantic-test-generator.js +176 -0
  376. package/dist/validation/semantic-types.d.ts +66 -0
  377. package/dist/validation/semantic-types.js +94 -0
  378. package/dist/validation/semantic-validator.d.ts +38 -0
  379. package/dist/validation/semantic-validator.js +340 -0
  380. package/dist/verification/index.d.ts +6 -0
  381. package/dist/verification/index.js +5 -0
  382. package/dist/verification/types.d.ts +133 -0
  383. package/dist/verification/types.js +5 -0
  384. package/dist/verification/verifier.d.ts +30 -0
  385. package/dist/verification/verifier.js +309 -0
  386. package/dist/version.d.ts +19 -0
  387. package/dist/version.js +48 -0
  388. package/dist/workflow/auto-generator.d.ts +27 -0
  389. package/dist/workflow/auto-generator.js +513 -0
  390. package/dist/workflow/discovery.d.ts +40 -0
  391. package/dist/workflow/discovery.js +195 -0
  392. package/dist/workflow/executor.d.ts +82 -0
  393. package/dist/workflow/executor.js +611 -0
  394. package/dist/workflow/index.d.ts +10 -0
  395. package/dist/workflow/index.js +10 -0
  396. package/dist/workflow/loader.d.ts +24 -0
  397. package/dist/workflow/loader.js +194 -0
  398. package/dist/workflow/state-tracker.d.ts +98 -0
  399. package/dist/workflow/state-tracker.js +424 -0
  400. package/dist/workflow/types.d.ts +337 -0
  401. package/dist/workflow/types.js +5 -0
  402. package/package.json +94 -0
  403. package/schemas/bellwether-check.schema.json +651 -0
@@ -0,0 +1,636 @@
1
+ /**
2
+ * Golden Output Testing - Capture and compare expected tool outputs.
3
+ *
4
+ * Golden outputs provide a reference for expected tool behavior,
5
+ * enabling detection of semantic changes that schema validation
6
+ * might miss (e.g., different category names, changed formats).
7
+ */
8
+ import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
9
+ import { join, dirname } from 'path';
10
+ import { createHash } from 'crypto';
11
+ import { PATHS } from '../constants.js';
12
+ // Constants
13
+ const GOLDEN_STORE_VERSION = 1;
14
+ const DEFAULT_GOLDEN_DIR = '.bellwether/golden';
15
+ const DEFAULT_GOLDEN_FILE = 'bellwether-golden.json';
16
+ // Common timestamp patterns to normalize
17
+ const TIMESTAMP_PATTERNS = [
18
+ /\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:\d{2})?/g, // ISO 8601
19
+ /\d{10,13}/g, // Unix timestamps (seconds or milliseconds)
20
+ ];
21
+ // Common UUID patterns
22
+ const UUID_PATTERN = /[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/gi;
23
+ /**
24
+ * Get the path to the golden output store file.
25
+ */
26
+ export function getGoldenStorePath(outputDir) {
27
+ const dir = outputDir || PATHS.DEFAULT_CACHE_DIR || DEFAULT_GOLDEN_DIR;
28
+ return join(dir, DEFAULT_GOLDEN_FILE);
29
+ }
30
+ /**
31
+ * Load the golden output store from disk.
32
+ */
33
+ export function loadGoldenStore(storePath) {
34
+ if (!existsSync(storePath)) {
35
+ return {
36
+ version: GOLDEN_STORE_VERSION,
37
+ outputs: [],
38
+ lastUpdated: new Date().toISOString(),
39
+ };
40
+ }
41
+ try {
42
+ const content = readFileSync(storePath, 'utf-8');
43
+ const store = JSON.parse(content);
44
+ // Validate version compatibility
45
+ if (store.version > GOLDEN_STORE_VERSION) {
46
+ throw new Error(`Golden store version ${store.version} is newer than supported version ${GOLDEN_STORE_VERSION}`);
47
+ }
48
+ return store;
49
+ }
50
+ catch (error) {
51
+ if (error instanceof SyntaxError) {
52
+ throw new Error(`Invalid golden store file: ${storePath}`);
53
+ }
54
+ throw error;
55
+ }
56
+ }
57
+ /**
58
+ * Save the golden output store to disk.
59
+ */
60
+ export function saveGoldenStore(store, storePath) {
61
+ const dir = dirname(storePath);
62
+ if (!existsSync(dir)) {
63
+ mkdirSync(dir, { recursive: true });
64
+ }
65
+ store.lastUpdated = new Date().toISOString();
66
+ writeFileSync(storePath, JSON.stringify(store, null, 2));
67
+ }
68
+ /**
69
+ * Create a golden output from a tool response.
70
+ */
71
+ export function createGoldenOutput(toolName, inputArgs, response, options = {}) {
72
+ // Extract text content from response
73
+ const textContent = response.content.find(c => c.type === 'text');
74
+ const raw = textContent && 'text' in textContent ? String(textContent.text) : '';
75
+ // Detect content type
76
+ const contentType = detectContentType(raw);
77
+ // Compute content hash
78
+ const contentHash = computeContentHash(raw);
79
+ // Extract structure if JSON
80
+ let structure;
81
+ let keyValues;
82
+ if (contentType === 'json') {
83
+ try {
84
+ const parsed = JSON.parse(raw);
85
+ structure = inferJsonStructure(parsed);
86
+ keyValues = extractKeyValues(parsed);
87
+ }
88
+ catch {
89
+ // Not valid JSON despite looking like it
90
+ }
91
+ }
92
+ return {
93
+ toolName,
94
+ capturedAt: new Date().toISOString(),
95
+ inputArgs,
96
+ output: {
97
+ raw,
98
+ contentType,
99
+ contentHash,
100
+ structure,
101
+ keyValues,
102
+ },
103
+ tolerance: {
104
+ mode: options.mode || 'structural',
105
+ allowedDrift: options.allowedDrift || [],
106
+ normalizeTimestamps: options.normalizeTimestamps ?? true,
107
+ normalizeUuids: options.normalizeUuids ?? true,
108
+ },
109
+ description: options.description,
110
+ version: GOLDEN_STORE_VERSION,
111
+ };
112
+ }
113
+ /**
114
+ * Save a golden output to the store.
115
+ */
116
+ export function saveGoldenOutput(golden, storePath) {
117
+ const store = loadGoldenStore(storePath);
118
+ // Check if we already have a golden for this tool/args combo
119
+ const existingIndex = store.outputs.findIndex(g => g.toolName === golden.toolName &&
120
+ JSON.stringify(g.inputArgs) === JSON.stringify(golden.inputArgs));
121
+ if (existingIndex >= 0) {
122
+ store.outputs[existingIndex] = golden;
123
+ }
124
+ else {
125
+ store.outputs.push(golden);
126
+ }
127
+ saveGoldenStore(store, storePath);
128
+ }
129
+ /**
130
+ * Get a golden output for a specific tool.
131
+ */
132
+ export function getGoldenOutput(toolName, storePath, inputArgs) {
133
+ const store = loadGoldenStore(storePath);
134
+ if (inputArgs) {
135
+ return store.outputs.find(g => g.toolName === toolName &&
136
+ JSON.stringify(g.inputArgs) === JSON.stringify(inputArgs));
137
+ }
138
+ // Return first golden for this tool if no args specified
139
+ return store.outputs.find(g => g.toolName === toolName);
140
+ }
141
+ /**
142
+ * List all golden outputs in the store.
143
+ */
144
+ export function listGoldenOutputs(storePath) {
145
+ const store = loadGoldenStore(storePath);
146
+ return store.outputs;
147
+ }
148
+ /**
149
+ * Delete a golden output from the store.
150
+ */
151
+ export function deleteGoldenOutput(toolName, storePath, inputArgs) {
152
+ const store = loadGoldenStore(storePath);
153
+ const initialCount = store.outputs.length;
154
+ if (inputArgs) {
155
+ store.outputs = store.outputs.filter(g => !(g.toolName === toolName &&
156
+ JSON.stringify(g.inputArgs) === JSON.stringify(inputArgs)));
157
+ }
158
+ else {
159
+ store.outputs = store.outputs.filter(g => g.toolName !== toolName);
160
+ }
161
+ if (store.outputs.length < initialCount) {
162
+ saveGoldenStore(store, storePath);
163
+ return true;
164
+ }
165
+ return false;
166
+ }
167
+ /**
168
+ * Compare current output against a golden output.
169
+ */
170
+ export function compareWithGolden(golden, currentResponse) {
171
+ // Extract current output
172
+ const textContent = currentResponse.content.find(c => c.type === 'text');
173
+ const currentRaw = textContent && 'text' in textContent ? String(textContent.text) : '';
174
+ const differences = [];
175
+ const mode = golden.tolerance.mode;
176
+ // Normalize if configured
177
+ // IMPORTANT: UUID normalization must come BEFORE timestamp normalization
178
+ // because timestamp patterns can match numeric portions of UUIDs
179
+ let goldenNormalized = golden.output.raw;
180
+ let currentNormalized = currentRaw;
181
+ if (golden.tolerance.normalizeUuids) {
182
+ goldenNormalized = normalizeUuids(goldenNormalized);
183
+ currentNormalized = normalizeUuids(currentNormalized);
184
+ }
185
+ if (golden.tolerance.normalizeTimestamps) {
186
+ goldenNormalized = normalizeTimestamps(goldenNormalized);
187
+ currentNormalized = normalizeTimestamps(currentNormalized);
188
+ }
189
+ switch (mode) {
190
+ case 'exact':
191
+ if (goldenNormalized !== currentNormalized) {
192
+ differences.push({
193
+ type: 'changed',
194
+ path: '$',
195
+ expected: truncateForDisplay(goldenNormalized),
196
+ actual: truncateForDisplay(currentNormalized),
197
+ allowed: false,
198
+ description: 'Output content differs',
199
+ });
200
+ }
201
+ break;
202
+ case 'structural':
203
+ if (golden.output.contentType === 'json') {
204
+ const structuralDiffs = compareJsonStructure(golden.output.raw, currentRaw, golden.tolerance.allowedDrift);
205
+ differences.push(...structuralDiffs);
206
+ }
207
+ else {
208
+ // For non-JSON, fall back to line-by-line comparison
209
+ const lineDiffs = compareLines(goldenNormalized, currentNormalized);
210
+ differences.push(...lineDiffs);
211
+ }
212
+ break;
213
+ case 'semantic':
214
+ if (golden.output.contentType === 'json' && golden.output.keyValues) {
215
+ const semanticDiffs = compareSemanticValues(golden.output.keyValues, extractKeyValuesFromRaw(currentRaw), golden.tolerance.allowedDrift);
216
+ differences.push(...semanticDiffs);
217
+ }
218
+ else {
219
+ // Fall back to structural comparison
220
+ const lineDiffs = compareLines(goldenNormalized, currentNormalized);
221
+ differences.push(...lineDiffs);
222
+ }
223
+ break;
224
+ }
225
+ // Filter allowed differences
226
+ const disallowedDiffs = differences.filter(d => !d.allowed);
227
+ const severity = determineSeverity(disallowedDiffs, mode);
228
+ const passed = disallowedDiffs.length === 0;
229
+ return {
230
+ toolName: golden.toolName,
231
+ passed,
232
+ severity,
233
+ mode,
234
+ goldenCapturedAt: golden.capturedAt,
235
+ differences,
236
+ summary: generateComparisonSummary(disallowedDiffs, mode),
237
+ };
238
+ }
239
+ /**
240
+ * Compare all golden outputs against current tool responses.
241
+ */
242
+ export function compareAllGoldens(storePath, getToolResponse) {
243
+ const store = loadGoldenStore(storePath);
244
+ return Promise.all(store.outputs.map(async (golden) => {
245
+ try {
246
+ const response = await getToolResponse(golden.toolName, golden.inputArgs);
247
+ return compareWithGolden(golden, response);
248
+ }
249
+ catch (error) {
250
+ return {
251
+ toolName: golden.toolName,
252
+ passed: false,
253
+ severity: 'breaking',
254
+ mode: golden.tolerance.mode,
255
+ goldenCapturedAt: golden.capturedAt,
256
+ differences: [{
257
+ type: 'changed',
258
+ path: '$',
259
+ expected: 'successful response',
260
+ actual: `error: ${error instanceof Error ? error.message : String(error)}`,
261
+ allowed: false,
262
+ description: 'Tool call failed',
263
+ }],
264
+ summary: `Tool call failed: ${error instanceof Error ? error.message : String(error)}`,
265
+ };
266
+ }
267
+ }));
268
+ }
269
+ // Helper functions
270
+ /**
271
+ * Detect content type from raw output.
272
+ */
273
+ function detectContentType(raw) {
274
+ const trimmed = raw.trim();
275
+ // Check for JSON
276
+ if ((trimmed.startsWith('{') && trimmed.endsWith('}')) ||
277
+ (trimmed.startsWith('[') && trimmed.endsWith(']'))) {
278
+ try {
279
+ JSON.parse(trimmed);
280
+ return 'json';
281
+ }
282
+ catch {
283
+ // Not valid JSON
284
+ }
285
+ }
286
+ // Check for Markdown patterns
287
+ if (/^#|^\*{1,3}[^*]|\[.*\]\(.*\)|^```/.test(trimmed)) {
288
+ return 'markdown';
289
+ }
290
+ return 'text';
291
+ }
292
+ /**
293
+ * Compute a hash of content for quick comparison.
294
+ */
295
+ function computeContentHash(content) {
296
+ return createHash('sha256').update(content).digest('hex').slice(0, 16);
297
+ }
298
+ /**
299
+ * Infer JSON structure (types only, not values).
300
+ */
301
+ function inferJsonStructure(value, depth = 0) {
302
+ if (depth > 10)
303
+ return { type: 'any' }; // Prevent infinite recursion
304
+ if (value === null)
305
+ return { type: 'null' };
306
+ if (Array.isArray(value)) {
307
+ if (value.length === 0)
308
+ return { type: 'array', items: { type: 'any' } };
309
+ return { type: 'array', items: inferJsonStructure(value[0], depth + 1) };
310
+ }
311
+ if (typeof value === 'object') {
312
+ const properties = {};
313
+ for (const [key, val] of Object.entries(value)) {
314
+ properties[key] = inferJsonStructure(val, depth + 1);
315
+ }
316
+ return { type: 'object', properties };
317
+ }
318
+ return { type: typeof value };
319
+ }
320
+ /**
321
+ * Extract key-value pairs for semantic comparison.
322
+ * Flattens nested objects and captures important values.
323
+ */
324
+ function extractKeyValues(value, prefix = '', result = {}) {
325
+ if (value === null || value === undefined) {
326
+ if (prefix)
327
+ result[prefix] = value;
328
+ return result;
329
+ }
330
+ if (Array.isArray(value)) {
331
+ result[prefix ? `${prefix}.length` : 'length'] = value.length;
332
+ // Capture first few items for semantic comparison
333
+ value.slice(0, 3).forEach((item, i) => {
334
+ extractKeyValues(item, prefix ? `${prefix}[${i}]` : `[${i}]`, result);
335
+ });
336
+ return result;
337
+ }
338
+ if (typeof value === 'object') {
339
+ for (const [key, val] of Object.entries(value)) {
340
+ const newPrefix = prefix ? `${prefix}.${key}` : key;
341
+ extractKeyValues(val, newPrefix, result);
342
+ }
343
+ return result;
344
+ }
345
+ if (prefix) {
346
+ result[prefix] = value;
347
+ }
348
+ return result;
349
+ }
350
+ /**
351
+ * Extract key values from raw content.
352
+ */
353
+ function extractKeyValuesFromRaw(raw) {
354
+ try {
355
+ const parsed = JSON.parse(raw);
356
+ return extractKeyValues(parsed);
357
+ }
358
+ catch {
359
+ return { raw };
360
+ }
361
+ }
362
+ /**
363
+ * Normalize timestamps in content.
364
+ */
365
+ function normalizeTimestamps(content) {
366
+ let normalized = content;
367
+ for (const pattern of TIMESTAMP_PATTERNS) {
368
+ normalized = normalized.replace(pattern, '<TIMESTAMP>');
369
+ }
370
+ return normalized;
371
+ }
372
+ /**
373
+ * Normalize UUIDs in content.
374
+ */
375
+ function normalizeUuids(content) {
376
+ return content.replace(UUID_PATTERN, '<UUID>');
377
+ }
378
+ /**
379
+ * Compare JSON structure between golden and current.
380
+ */
381
+ function compareJsonStructure(goldenRaw, currentRaw, allowedPaths) {
382
+ const differences = [];
383
+ try {
384
+ const golden = JSON.parse(goldenRaw);
385
+ const current = JSON.parse(currentRaw);
386
+ compareObjects(golden, current, '$', allowedPaths, differences);
387
+ }
388
+ catch {
389
+ differences.push({
390
+ type: 'changed',
391
+ path: '$',
392
+ expected: 'valid JSON',
393
+ actual: 'invalid JSON',
394
+ allowed: false,
395
+ description: 'Current output is not valid JSON',
396
+ });
397
+ }
398
+ return differences;
399
+ }
400
+ /**
401
+ * Recursively compare objects for structural differences.
402
+ */
403
+ function compareObjects(golden, current, path, allowedPaths, differences) {
404
+ const allowed = isPathAllowed(path, allowedPaths);
405
+ // Type comparison
406
+ const goldenType = getType(golden);
407
+ const currentType = getType(current);
408
+ if (goldenType !== currentType) {
409
+ differences.push({
410
+ type: 'type_changed',
411
+ path,
412
+ expected: goldenType,
413
+ actual: currentType,
414
+ allowed,
415
+ description: `Type changed from ${goldenType} to ${currentType}`,
416
+ });
417
+ return;
418
+ }
419
+ // Object comparison
420
+ if (goldenType === 'object' && golden !== null && current !== null) {
421
+ const goldenObj = golden;
422
+ const currentObj = current;
423
+ const allKeys = new Set([...Object.keys(goldenObj), ...Object.keys(currentObj)]);
424
+ for (const key of allKeys) {
425
+ const childPath = `${path}.${key}`;
426
+ const keyAllowed = isPathAllowed(childPath, allowedPaths);
427
+ if (!(key in goldenObj)) {
428
+ differences.push({
429
+ type: 'added',
430
+ path: childPath,
431
+ actual: truncateForDisplay(currentObj[key]),
432
+ allowed: keyAllowed,
433
+ description: `Field "${key}" was added`,
434
+ });
435
+ }
436
+ else if (!(key in currentObj)) {
437
+ differences.push({
438
+ type: 'removed',
439
+ path: childPath,
440
+ expected: truncateForDisplay(goldenObj[key]),
441
+ allowed: keyAllowed,
442
+ description: `Field "${key}" was removed`,
443
+ });
444
+ }
445
+ else {
446
+ compareObjects(goldenObj[key], currentObj[key], childPath, allowedPaths, differences);
447
+ }
448
+ }
449
+ return;
450
+ }
451
+ // Array comparison
452
+ if (goldenType === 'array') {
453
+ const goldenArr = golden;
454
+ const currentArr = current;
455
+ if (goldenArr.length !== currentArr.length) {
456
+ differences.push({
457
+ type: 'value_changed',
458
+ path: `${path}.length`,
459
+ expected: goldenArr.length,
460
+ actual: currentArr.length,
461
+ allowed,
462
+ description: `Array length changed from ${goldenArr.length} to ${currentArr.length}`,
463
+ });
464
+ }
465
+ // Compare elements up to the shorter length
466
+ const minLength = Math.min(goldenArr.length, currentArr.length);
467
+ for (let i = 0; i < minLength; i++) {
468
+ compareObjects(goldenArr[i], currentArr[i], `${path}[${i}]`, allowedPaths, differences);
469
+ }
470
+ return;
471
+ }
472
+ // Primitive comparison (structural mode doesn't compare values)
473
+ // Only flag if types match but values differ for semantic checks
474
+ }
475
+ /**
476
+ * Compare semantic key values.
477
+ */
478
+ function compareSemanticValues(goldenValues, currentValues, allowedPaths) {
479
+ const differences = [];
480
+ const allKeys = new Set([...Object.keys(goldenValues), ...Object.keys(currentValues)]);
481
+ for (const key of allKeys) {
482
+ const allowed = isPathAllowed(key, allowedPaths);
483
+ if (!(key in goldenValues)) {
484
+ differences.push({
485
+ type: 'added',
486
+ path: key,
487
+ actual: truncateForDisplay(currentValues[key]),
488
+ allowed,
489
+ description: `Value "${key}" was added`,
490
+ });
491
+ }
492
+ else if (!(key in currentValues)) {
493
+ differences.push({
494
+ type: 'removed',
495
+ path: key,
496
+ expected: truncateForDisplay(goldenValues[key]),
497
+ allowed,
498
+ description: `Value "${key}" was removed`,
499
+ });
500
+ }
501
+ else if (String(goldenValues[key]) !== String(currentValues[key])) {
502
+ differences.push({
503
+ type: 'value_changed',
504
+ path: key,
505
+ expected: truncateForDisplay(goldenValues[key]),
506
+ actual: truncateForDisplay(currentValues[key]),
507
+ allowed,
508
+ description: `Value "${key}" changed`,
509
+ });
510
+ }
511
+ }
512
+ return differences;
513
+ }
514
+ /**
515
+ * Compare content line by line.
516
+ */
517
+ function compareLines(golden, current) {
518
+ const differences = [];
519
+ const goldenLines = golden.split('\n');
520
+ const currentLines = current.split('\n');
521
+ const maxLines = Math.max(goldenLines.length, currentLines.length);
522
+ for (let i = 0; i < maxLines; i++) {
523
+ if (i >= goldenLines.length) {
524
+ differences.push({
525
+ type: 'added',
526
+ path: `line ${i + 1}`,
527
+ actual: truncateForDisplay(currentLines[i]),
528
+ allowed: false,
529
+ description: `Line ${i + 1} was added`,
530
+ });
531
+ }
532
+ else if (i >= currentLines.length) {
533
+ differences.push({
534
+ type: 'removed',
535
+ path: `line ${i + 1}`,
536
+ expected: truncateForDisplay(goldenLines[i]),
537
+ allowed: false,
538
+ description: `Line ${i + 1} was removed`,
539
+ });
540
+ }
541
+ else if (goldenLines[i] !== currentLines[i]) {
542
+ differences.push({
543
+ type: 'changed',
544
+ path: `line ${i + 1}`,
545
+ expected: truncateForDisplay(goldenLines[i]),
546
+ actual: truncateForDisplay(currentLines[i]),
547
+ allowed: false,
548
+ description: `Line ${i + 1} changed`,
549
+ });
550
+ }
551
+ }
552
+ return differences;
553
+ }
554
+ /**
555
+ * Check if a path matches any allowed drift pattern.
556
+ * Handles both JSONPath-style patterns ($.field) and plain paths (field).
557
+ */
558
+ function isPathAllowed(path, allowedPaths) {
559
+ // Normalize path by stripping leading $. if present
560
+ const normalizedPath = path.replace(/^\$\.?/, '');
561
+ return allowedPaths.some(pattern => {
562
+ // Normalize pattern by stripping leading $. if present
563
+ const normalizedPattern = pattern.replace(/^\$\.?/, '');
564
+ // Simple glob matching: * matches any segment
565
+ const regex = new RegExp('^' + normalizedPattern.replace(/\*/g, '[^.]+').replace(/\./g, '\\.') + '$');
566
+ return regex.test(normalizedPath);
567
+ });
568
+ }
569
+ /**
570
+ * Get type of a value as a string.
571
+ */
572
+ function getType(value) {
573
+ if (value === null)
574
+ return 'null';
575
+ if (Array.isArray(value))
576
+ return 'array';
577
+ return typeof value;
578
+ }
579
+ /**
580
+ * Truncate a value for display purposes.
581
+ */
582
+ function truncateForDisplay(value, maxLength = 50) {
583
+ const str = typeof value === 'string' ? value : JSON.stringify(value);
584
+ if (str.length <= maxLength)
585
+ return str;
586
+ return str.slice(0, maxLength - 3) + '...';
587
+ }
588
+ /**
589
+ * Determine severity based on differences.
590
+ */
591
+ function determineSeverity(differences, mode) {
592
+ if (differences.length === 0)
593
+ return 'none';
594
+ const hasRemoved = differences.some(d => d.type === 'removed');
595
+ const hasTypeChanged = differences.some(d => d.type === 'type_changed');
596
+ // Removals and type changes are breaking in structural/semantic modes
597
+ if ((hasRemoved || hasTypeChanged) && mode !== 'exact') {
598
+ return 'breaking';
599
+ }
600
+ // Exact mode: any difference is breaking
601
+ if (mode === 'exact') {
602
+ return 'breaking';
603
+ }
604
+ // Additions are warnings
605
+ const hasAdded = differences.some(d => d.type === 'added');
606
+ if (hasAdded) {
607
+ return 'warning';
608
+ }
609
+ // Value changes are info in semantic mode
610
+ return 'info';
611
+ }
612
+ /**
613
+ * Generate a comparison summary.
614
+ */
615
+ function generateComparisonSummary(differences, mode) {
616
+ if (differences.length === 0) {
617
+ return `Output matches golden (${mode} mode)`;
618
+ }
619
+ const counts = {
620
+ added: differences.filter(d => d.type === 'added').length,
621
+ removed: differences.filter(d => d.type === 'removed').length,
622
+ changed: differences.filter(d => d.type === 'changed' || d.type === 'value_changed').length,
623
+ typeChanged: differences.filter(d => d.type === 'type_changed').length,
624
+ };
625
+ const parts = [];
626
+ if (counts.added > 0)
627
+ parts.push(`${counts.added} added`);
628
+ if (counts.removed > 0)
629
+ parts.push(`${counts.removed} removed`);
630
+ if (counts.changed > 0)
631
+ parts.push(`${counts.changed} changed`);
632
+ if (counts.typeChanged > 0)
633
+ parts.push(`${counts.typeChanged} type changes`);
634
+ return `${differences.length} difference(s): ${parts.join(', ')}`;
635
+ }
636
+ //# sourceMappingURL=golden-output.js.map