@dotsetlabs/bellwether 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (403) hide show
  1. package/CHANGELOG.md +291 -0
  2. package/LICENSE +21 -0
  3. package/README.md +739 -0
  4. package/dist/auth/credentials.d.ts +64 -0
  5. package/dist/auth/credentials.js +218 -0
  6. package/dist/auth/index.d.ts +6 -0
  7. package/dist/auth/index.js +6 -0
  8. package/dist/auth/keychain.d.ts +64 -0
  9. package/dist/auth/keychain.js +268 -0
  10. package/dist/baseline/ab-testing.d.ts +80 -0
  11. package/dist/baseline/ab-testing.js +236 -0
  12. package/dist/baseline/ai-compatibility-scorer.d.ts +95 -0
  13. package/dist/baseline/ai-compatibility-scorer.js +606 -0
  14. package/dist/baseline/calibration.d.ts +77 -0
  15. package/dist/baseline/calibration.js +136 -0
  16. package/dist/baseline/category-matching.d.ts +85 -0
  17. package/dist/baseline/category-matching.js +289 -0
  18. package/dist/baseline/change-impact-analyzer.d.ts +98 -0
  19. package/dist/baseline/change-impact-analyzer.js +592 -0
  20. package/dist/baseline/comparator.d.ts +64 -0
  21. package/dist/baseline/comparator.js +916 -0
  22. package/dist/baseline/confidence.d.ts +55 -0
  23. package/dist/baseline/confidence.js +122 -0
  24. package/dist/baseline/converter.d.ts +61 -0
  25. package/dist/baseline/converter.js +585 -0
  26. package/dist/baseline/dependency-analyzer.d.ts +89 -0
  27. package/dist/baseline/dependency-analyzer.js +567 -0
  28. package/dist/baseline/deprecation-tracker.d.ts +133 -0
  29. package/dist/baseline/deprecation-tracker.js +322 -0
  30. package/dist/baseline/diff.d.ts +55 -0
  31. package/dist/baseline/diff.js +1584 -0
  32. package/dist/baseline/documentation-scorer.d.ts +205 -0
  33. package/dist/baseline/documentation-scorer.js +466 -0
  34. package/dist/baseline/embeddings.d.ts +118 -0
  35. package/dist/baseline/embeddings.js +251 -0
  36. package/dist/baseline/error-analyzer.d.ts +198 -0
  37. package/dist/baseline/error-analyzer.js +721 -0
  38. package/dist/baseline/evaluation/evaluator.d.ts +42 -0
  39. package/dist/baseline/evaluation/evaluator.js +323 -0
  40. package/dist/baseline/evaluation/expanded-dataset.d.ts +45 -0
  41. package/dist/baseline/evaluation/expanded-dataset.js +1164 -0
  42. package/dist/baseline/evaluation/golden-dataset.d.ts +58 -0
  43. package/dist/baseline/evaluation/golden-dataset.js +717 -0
  44. package/dist/baseline/evaluation/index.d.ts +15 -0
  45. package/dist/baseline/evaluation/index.js +15 -0
  46. package/dist/baseline/evaluation/types.d.ts +186 -0
  47. package/dist/baseline/evaluation/types.js +8 -0
  48. package/dist/baseline/external-dependency-detector.d.ts +181 -0
  49. package/dist/baseline/external-dependency-detector.js +524 -0
  50. package/dist/baseline/golden-output.d.ts +162 -0
  51. package/dist/baseline/golden-output.js +636 -0
  52. package/dist/baseline/health-scorer.d.ts +174 -0
  53. package/dist/baseline/health-scorer.js +451 -0
  54. package/dist/baseline/incremental-checker.d.ts +97 -0
  55. package/dist/baseline/incremental-checker.js +174 -0
  56. package/dist/baseline/index.d.ts +31 -0
  57. package/dist/baseline/index.js +42 -0
  58. package/dist/baseline/migration-generator.d.ts +137 -0
  59. package/dist/baseline/migration-generator.js +554 -0
  60. package/dist/baseline/migrations.d.ts +60 -0
  61. package/dist/baseline/migrations.js +197 -0
  62. package/dist/baseline/performance-tracker.d.ts +214 -0
  63. package/dist/baseline/performance-tracker.js +577 -0
  64. package/dist/baseline/pr-comment-generator.d.ts +117 -0
  65. package/dist/baseline/pr-comment-generator.js +546 -0
  66. package/dist/baseline/response-fingerprint.d.ts +127 -0
  67. package/dist/baseline/response-fingerprint.js +728 -0
  68. package/dist/baseline/response-schema-tracker.d.ts +129 -0
  69. package/dist/baseline/response-schema-tracker.js +420 -0
  70. package/dist/baseline/risk-scorer.d.ts +54 -0
  71. package/dist/baseline/risk-scorer.js +434 -0
  72. package/dist/baseline/saver.d.ts +89 -0
  73. package/dist/baseline/saver.js +554 -0
  74. package/dist/baseline/scenario-generator.d.ts +151 -0
  75. package/dist/baseline/scenario-generator.js +905 -0
  76. package/dist/baseline/schema-compare.d.ts +86 -0
  77. package/dist/baseline/schema-compare.js +557 -0
  78. package/dist/baseline/schema-evolution.d.ts +189 -0
  79. package/dist/baseline/schema-evolution.js +467 -0
  80. package/dist/baseline/semantic.d.ts +203 -0
  81. package/dist/baseline/semantic.js +908 -0
  82. package/dist/baseline/synonyms.d.ts +60 -0
  83. package/dist/baseline/synonyms.js +386 -0
  84. package/dist/baseline/telemetry.d.ts +165 -0
  85. package/dist/baseline/telemetry.js +294 -0
  86. package/dist/baseline/test-pruner.d.ts +120 -0
  87. package/dist/baseline/test-pruner.js +387 -0
  88. package/dist/baseline/types.d.ts +449 -0
  89. package/dist/baseline/types.js +5 -0
  90. package/dist/baseline/version.d.ts +138 -0
  91. package/dist/baseline/version.js +206 -0
  92. package/dist/cache/index.d.ts +5 -0
  93. package/dist/cache/index.js +5 -0
  94. package/dist/cache/response-cache.d.ts +151 -0
  95. package/dist/cache/response-cache.js +287 -0
  96. package/dist/ci/index.d.ts +60 -0
  97. package/dist/ci/index.js +342 -0
  98. package/dist/cli/commands/auth.d.ts +12 -0
  99. package/dist/cli/commands/auth.js +352 -0
  100. package/dist/cli/commands/badge.d.ts +3 -0
  101. package/dist/cli/commands/badge.js +74 -0
  102. package/dist/cli/commands/baseline-accept.d.ts +15 -0
  103. package/dist/cli/commands/baseline-accept.js +178 -0
  104. package/dist/cli/commands/baseline-migrate.d.ts +12 -0
  105. package/dist/cli/commands/baseline-migrate.js +164 -0
  106. package/dist/cli/commands/baseline.d.ts +14 -0
  107. package/dist/cli/commands/baseline.js +449 -0
  108. package/dist/cli/commands/beta.d.ts +10 -0
  109. package/dist/cli/commands/beta.js +231 -0
  110. package/dist/cli/commands/check.d.ts +11 -0
  111. package/dist/cli/commands/check.js +820 -0
  112. package/dist/cli/commands/cloud/badge.d.ts +3 -0
  113. package/dist/cli/commands/cloud/badge.js +74 -0
  114. package/dist/cli/commands/cloud/diff.d.ts +6 -0
  115. package/dist/cli/commands/cloud/diff.js +79 -0
  116. package/dist/cli/commands/cloud/history.d.ts +6 -0
  117. package/dist/cli/commands/cloud/history.js +102 -0
  118. package/dist/cli/commands/cloud/link.d.ts +9 -0
  119. package/dist/cli/commands/cloud/link.js +119 -0
  120. package/dist/cli/commands/cloud/login.d.ts +7 -0
  121. package/dist/cli/commands/cloud/login.js +499 -0
  122. package/dist/cli/commands/cloud/projects.d.ts +6 -0
  123. package/dist/cli/commands/cloud/projects.js +44 -0
  124. package/dist/cli/commands/cloud/shared.d.ts +7 -0
  125. package/dist/cli/commands/cloud/shared.js +42 -0
  126. package/dist/cli/commands/cloud/teams.d.ts +8 -0
  127. package/dist/cli/commands/cloud/teams.js +169 -0
  128. package/dist/cli/commands/cloud/upload.d.ts +8 -0
  129. package/dist/cli/commands/cloud/upload.js +181 -0
  130. package/dist/cli/commands/contract.d.ts +11 -0
  131. package/dist/cli/commands/contract.js +280 -0
  132. package/dist/cli/commands/discover.d.ts +3 -0
  133. package/dist/cli/commands/discover.js +82 -0
  134. package/dist/cli/commands/eval.d.ts +9 -0
  135. package/dist/cli/commands/eval.js +187 -0
  136. package/dist/cli/commands/explore.d.ts +11 -0
  137. package/dist/cli/commands/explore.js +437 -0
  138. package/dist/cli/commands/feedback.d.ts +9 -0
  139. package/dist/cli/commands/feedback.js +174 -0
  140. package/dist/cli/commands/golden.d.ts +12 -0
  141. package/dist/cli/commands/golden.js +407 -0
  142. package/dist/cli/commands/history.d.ts +10 -0
  143. package/dist/cli/commands/history.js +202 -0
  144. package/dist/cli/commands/init.d.ts +9 -0
  145. package/dist/cli/commands/init.js +219 -0
  146. package/dist/cli/commands/interview.d.ts +3 -0
  147. package/dist/cli/commands/interview.js +903 -0
  148. package/dist/cli/commands/link.d.ts +10 -0
  149. package/dist/cli/commands/link.js +169 -0
  150. package/dist/cli/commands/login.d.ts +7 -0
  151. package/dist/cli/commands/login.js +499 -0
  152. package/dist/cli/commands/preset.d.ts +33 -0
  153. package/dist/cli/commands/preset.js +297 -0
  154. package/dist/cli/commands/profile.d.ts +33 -0
  155. package/dist/cli/commands/profile.js +286 -0
  156. package/dist/cli/commands/registry.d.ts +11 -0
  157. package/dist/cli/commands/registry.js +146 -0
  158. package/dist/cli/commands/shared.d.ts +79 -0
  159. package/dist/cli/commands/shared.js +196 -0
  160. package/dist/cli/commands/teams.d.ts +8 -0
  161. package/dist/cli/commands/teams.js +169 -0
  162. package/dist/cli/commands/test.d.ts +9 -0
  163. package/dist/cli/commands/test.js +500 -0
  164. package/dist/cli/commands/upload.d.ts +8 -0
  165. package/dist/cli/commands/upload.js +223 -0
  166. package/dist/cli/commands/validate-config.d.ts +6 -0
  167. package/dist/cli/commands/validate-config.js +35 -0
  168. package/dist/cli/commands/verify.d.ts +11 -0
  169. package/dist/cli/commands/verify.js +283 -0
  170. package/dist/cli/commands/watch.d.ts +12 -0
  171. package/dist/cli/commands/watch.js +253 -0
  172. package/dist/cli/index.d.ts +3 -0
  173. package/dist/cli/index.js +178 -0
  174. package/dist/cli/interactive.d.ts +47 -0
  175. package/dist/cli/interactive.js +216 -0
  176. package/dist/cli/output/terminal-reporter.d.ts +19 -0
  177. package/dist/cli/output/terminal-reporter.js +104 -0
  178. package/dist/cli/output.d.ts +226 -0
  179. package/dist/cli/output.js +438 -0
  180. package/dist/cli/utils/env.d.ts +5 -0
  181. package/dist/cli/utils/env.js +14 -0
  182. package/dist/cli/utils/progress.d.ts +59 -0
  183. package/dist/cli/utils/progress.js +206 -0
  184. package/dist/cli/utils/server-context.d.ts +10 -0
  185. package/dist/cli/utils/server-context.js +36 -0
  186. package/dist/cloud/auth.d.ts +144 -0
  187. package/dist/cloud/auth.js +374 -0
  188. package/dist/cloud/client.d.ts +24 -0
  189. package/dist/cloud/client.js +65 -0
  190. package/dist/cloud/http-client.d.ts +38 -0
  191. package/dist/cloud/http-client.js +215 -0
  192. package/dist/cloud/index.d.ts +23 -0
  193. package/dist/cloud/index.js +25 -0
  194. package/dist/cloud/mock-client.d.ts +107 -0
  195. package/dist/cloud/mock-client.js +545 -0
  196. package/dist/cloud/types.d.ts +515 -0
  197. package/dist/cloud/types.js +15 -0
  198. package/dist/config/defaults.d.ts +160 -0
  199. package/dist/config/defaults.js +169 -0
  200. package/dist/config/loader.d.ts +24 -0
  201. package/dist/config/loader.js +122 -0
  202. package/dist/config/template.d.ts +42 -0
  203. package/dist/config/template.js +647 -0
  204. package/dist/config/validator.d.ts +2112 -0
  205. package/dist/config/validator.js +658 -0
  206. package/dist/constants/cloud.d.ts +107 -0
  207. package/dist/constants/cloud.js +110 -0
  208. package/dist/constants/core.d.ts +521 -0
  209. package/dist/constants/core.js +556 -0
  210. package/dist/constants/testing.d.ts +1283 -0
  211. package/dist/constants/testing.js +1568 -0
  212. package/dist/constants.d.ts +10 -0
  213. package/dist/constants.js +10 -0
  214. package/dist/contract/index.d.ts +6 -0
  215. package/dist/contract/index.js +5 -0
  216. package/dist/contract/validator.d.ts +177 -0
  217. package/dist/contract/validator.js +574 -0
  218. package/dist/cost/index.d.ts +6 -0
  219. package/dist/cost/index.js +5 -0
  220. package/dist/cost/tracker.d.ts +134 -0
  221. package/dist/cost/tracker.js +313 -0
  222. package/dist/discovery/discovery.d.ts +16 -0
  223. package/dist/discovery/discovery.js +173 -0
  224. package/dist/discovery/types.d.ts +51 -0
  225. package/dist/discovery/types.js +2 -0
  226. package/dist/docs/agents.d.ts +3 -0
  227. package/dist/docs/agents.js +995 -0
  228. package/dist/docs/contract.d.ts +51 -0
  229. package/dist/docs/contract.js +1681 -0
  230. package/dist/docs/generator.d.ts +4 -0
  231. package/dist/docs/generator.js +4 -0
  232. package/dist/docs/html-reporter.d.ts +9 -0
  233. package/dist/docs/html-reporter.js +757 -0
  234. package/dist/docs/index.d.ts +10 -0
  235. package/dist/docs/index.js +11 -0
  236. package/dist/docs/junit-reporter.d.ts +18 -0
  237. package/dist/docs/junit-reporter.js +210 -0
  238. package/dist/docs/report.d.ts +14 -0
  239. package/dist/docs/report.js +44 -0
  240. package/dist/docs/sarif-reporter.d.ts +19 -0
  241. package/dist/docs/sarif-reporter.js +335 -0
  242. package/dist/docs/shared.d.ts +35 -0
  243. package/dist/docs/shared.js +162 -0
  244. package/dist/docs/templates.d.ts +12 -0
  245. package/dist/docs/templates.js +76 -0
  246. package/dist/errors/index.d.ts +6 -0
  247. package/dist/errors/index.js +6 -0
  248. package/dist/errors/retry.d.ts +92 -0
  249. package/dist/errors/retry.js +323 -0
  250. package/dist/errors/types.d.ts +321 -0
  251. package/dist/errors/types.js +584 -0
  252. package/dist/index.d.ts +32 -0
  253. package/dist/index.js +32 -0
  254. package/dist/interview/dependency-resolver.d.ts +11 -0
  255. package/dist/interview/dependency-resolver.js +32 -0
  256. package/dist/interview/interviewer.d.ts +232 -0
  257. package/dist/interview/interviewer.js +1939 -0
  258. package/dist/interview/mock-response-generator.d.ts +7 -0
  259. package/dist/interview/mock-response-generator.js +102 -0
  260. package/dist/interview/orchestrator.d.ts +237 -0
  261. package/dist/interview/orchestrator.js +1296 -0
  262. package/dist/interview/rate-limiter.d.ts +15 -0
  263. package/dist/interview/rate-limiter.js +55 -0
  264. package/dist/interview/response-validator.d.ts +10 -0
  265. package/dist/interview/response-validator.js +132 -0
  266. package/dist/interview/schema-inferrer.d.ts +8 -0
  267. package/dist/interview/schema-inferrer.js +71 -0
  268. package/dist/interview/schema-test-generator.d.ts +71 -0
  269. package/dist/interview/schema-test-generator.js +834 -0
  270. package/dist/interview/smart-value-generator.d.ts +155 -0
  271. package/dist/interview/smart-value-generator.js +554 -0
  272. package/dist/interview/stateful-test-runner.d.ts +19 -0
  273. package/dist/interview/stateful-test-runner.js +106 -0
  274. package/dist/interview/types.d.ts +561 -0
  275. package/dist/interview/types.js +2 -0
  276. package/dist/llm/anthropic.d.ts +41 -0
  277. package/dist/llm/anthropic.js +355 -0
  278. package/dist/llm/client.d.ts +123 -0
  279. package/dist/llm/client.js +42 -0
  280. package/dist/llm/factory.d.ts +38 -0
  281. package/dist/llm/factory.js +145 -0
  282. package/dist/llm/fallback.d.ts +140 -0
  283. package/dist/llm/fallback.js +379 -0
  284. package/dist/llm/index.d.ts +18 -0
  285. package/dist/llm/index.js +15 -0
  286. package/dist/llm/ollama.d.ts +37 -0
  287. package/dist/llm/ollama.js +330 -0
  288. package/dist/llm/openai.d.ts +25 -0
  289. package/dist/llm/openai.js +320 -0
  290. package/dist/llm/token-budget.d.ts +161 -0
  291. package/dist/llm/token-budget.js +395 -0
  292. package/dist/logging/logger.d.ts +70 -0
  293. package/dist/logging/logger.js +130 -0
  294. package/dist/metrics/collector.d.ts +106 -0
  295. package/dist/metrics/collector.js +547 -0
  296. package/dist/metrics/index.d.ts +7 -0
  297. package/dist/metrics/index.js +7 -0
  298. package/dist/metrics/prometheus.d.ts +20 -0
  299. package/dist/metrics/prometheus.js +241 -0
  300. package/dist/metrics/types.d.ts +209 -0
  301. package/dist/metrics/types.js +5 -0
  302. package/dist/persona/builtins.d.ts +54 -0
  303. package/dist/persona/builtins.js +219 -0
  304. package/dist/persona/index.d.ts +8 -0
  305. package/dist/persona/index.js +8 -0
  306. package/dist/persona/loader.d.ts +30 -0
  307. package/dist/persona/loader.js +190 -0
  308. package/dist/persona/types.d.ts +144 -0
  309. package/dist/persona/types.js +5 -0
  310. package/dist/persona/validation.d.ts +94 -0
  311. package/dist/persona/validation.js +332 -0
  312. package/dist/prompts/index.d.ts +5 -0
  313. package/dist/prompts/index.js +5 -0
  314. package/dist/prompts/templates.d.ts +180 -0
  315. package/dist/prompts/templates.js +431 -0
  316. package/dist/registry/client.d.ts +49 -0
  317. package/dist/registry/client.js +191 -0
  318. package/dist/registry/index.d.ts +7 -0
  319. package/dist/registry/index.js +6 -0
  320. package/dist/registry/types.d.ts +140 -0
  321. package/dist/registry/types.js +6 -0
  322. package/dist/scenarios/evaluator.d.ts +43 -0
  323. package/dist/scenarios/evaluator.js +206 -0
  324. package/dist/scenarios/index.d.ts +10 -0
  325. package/dist/scenarios/index.js +9 -0
  326. package/dist/scenarios/loader.d.ts +20 -0
  327. package/dist/scenarios/loader.js +285 -0
  328. package/dist/scenarios/types.d.ts +153 -0
  329. package/dist/scenarios/types.js +8 -0
  330. package/dist/security/index.d.ts +17 -0
  331. package/dist/security/index.js +18 -0
  332. package/dist/security/payloads.d.ts +61 -0
  333. package/dist/security/payloads.js +268 -0
  334. package/dist/security/security-tester.d.ts +42 -0
  335. package/dist/security/security-tester.js +582 -0
  336. package/dist/security/types.d.ts +166 -0
  337. package/dist/security/types.js +8 -0
  338. package/dist/transport/base-transport.d.ts +59 -0
  339. package/dist/transport/base-transport.js +38 -0
  340. package/dist/transport/http-transport.d.ts +67 -0
  341. package/dist/transport/http-transport.js +238 -0
  342. package/dist/transport/mcp-client.d.ts +141 -0
  343. package/dist/transport/mcp-client.js +496 -0
  344. package/dist/transport/sse-transport.d.ts +88 -0
  345. package/dist/transport/sse-transport.js +316 -0
  346. package/dist/transport/stdio-transport.d.ts +43 -0
  347. package/dist/transport/stdio-transport.js +238 -0
  348. package/dist/transport/types.d.ts +125 -0
  349. package/dist/transport/types.js +16 -0
  350. package/dist/utils/concurrency.d.ts +123 -0
  351. package/dist/utils/concurrency.js +213 -0
  352. package/dist/utils/formatters.d.ts +16 -0
  353. package/dist/utils/formatters.js +37 -0
  354. package/dist/utils/index.d.ts +8 -0
  355. package/dist/utils/index.js +8 -0
  356. package/dist/utils/jsonpath.d.ts +87 -0
  357. package/dist/utils/jsonpath.js +326 -0
  358. package/dist/utils/markdown.d.ts +113 -0
  359. package/dist/utils/markdown.js +265 -0
  360. package/dist/utils/network.d.ts +14 -0
  361. package/dist/utils/network.js +17 -0
  362. package/dist/utils/sanitize.d.ts +92 -0
  363. package/dist/utils/sanitize.js +191 -0
  364. package/dist/utils/semantic.d.ts +194 -0
  365. package/dist/utils/semantic.js +1051 -0
  366. package/dist/utils/smart-truncate.d.ts +94 -0
  367. package/dist/utils/smart-truncate.js +361 -0
  368. package/dist/utils/timeout.d.ts +153 -0
  369. package/dist/utils/timeout.js +205 -0
  370. package/dist/utils/yaml-parser.d.ts +58 -0
  371. package/dist/utils/yaml-parser.js +86 -0
  372. package/dist/validation/index.d.ts +32 -0
  373. package/dist/validation/index.js +32 -0
  374. package/dist/validation/semantic-test-generator.d.ts +50 -0
  375. package/dist/validation/semantic-test-generator.js +176 -0
  376. package/dist/validation/semantic-types.d.ts +66 -0
  377. package/dist/validation/semantic-types.js +94 -0
  378. package/dist/validation/semantic-validator.d.ts +38 -0
  379. package/dist/validation/semantic-validator.js +340 -0
  380. package/dist/verification/index.d.ts +6 -0
  381. package/dist/verification/index.js +5 -0
  382. package/dist/verification/types.d.ts +133 -0
  383. package/dist/verification/types.js +5 -0
  384. package/dist/verification/verifier.d.ts +30 -0
  385. package/dist/verification/verifier.js +309 -0
  386. package/dist/version.d.ts +19 -0
  387. package/dist/version.js +48 -0
  388. package/dist/workflow/auto-generator.d.ts +27 -0
  389. package/dist/workflow/auto-generator.js +513 -0
  390. package/dist/workflow/discovery.d.ts +40 -0
  391. package/dist/workflow/discovery.js +195 -0
  392. package/dist/workflow/executor.d.ts +82 -0
  393. package/dist/workflow/executor.js +611 -0
  394. package/dist/workflow/index.d.ts +10 -0
  395. package/dist/workflow/index.js +10 -0
  396. package/dist/workflow/loader.d.ts +24 -0
  397. package/dist/workflow/loader.js +194 -0
  398. package/dist/workflow/state-tracker.d.ts +98 -0
  399. package/dist/workflow/state-tracker.js +424 -0
  400. package/dist/workflow/types.d.ts +337 -0
  401. package/dist/workflow/types.js +5 -0
  402. package/package.json +94 -0
  403. package/schemas/bellwether-check.schema.json +651 -0
@@ -0,0 +1,916 @@
1
+ /**
2
+ * Structural comparison for drift detection.
3
+ *
4
+ * Compares baselines using deterministic structural comparison:
5
+ * - Tool presence/absence
6
+ * - Schema changes (hash comparison)
7
+ * - Description changes (exact string comparison)
8
+ * - Workflow success/failure changes
9
+ *
10
+ * All comparisons are 100% deterministic - no LLM involvement.
11
+ */
12
+ import { createBaseline } from './saver.js';
13
+ import { compareFingerprints, compareErrorPatterns, } from './response-fingerprint.js';
14
+ import { analyzeErrorTrends } from './error-analyzer.js';
15
+ import { compareSecurityFingerprints } from '../security/security-tester.js';
16
+ import { compareSchemaEvolution } from './response-schema-tracker.js';
17
+ import { checkVersionCompatibility, BaselineVersionError, parseVersion, areVersionsCompatible, getCompatibilityWarning, } from './version.js';
18
+ import { compareSchemas } from './schema-compare.js';
19
+ import { PERFORMANCE_TRACKING } from '../constants.js';
20
+ import { hasReliableConfidence } from './performance-tracker.js';
21
+ import { compareDocumentationScores, scoreDocumentation } from './documentation-scorer.js';
22
+ /**
23
+ * Compare current interview results against a baseline.
24
+ */
25
+ export function compareWithBaseline(baseline, current, serverCommand, options = {}) {
26
+ const currentBaseline = createBaseline(current, serverCommand);
27
+ return compareBaselines(baseline, currentBaseline, options);
28
+ }
29
+ /**
30
+ * Compare two baselines directly.
31
+ * All changes are structural and deterministic.
32
+ *
33
+ * @param previous - The baseline to compare against (source/old)
34
+ * @param current - The current baseline (target/new)
35
+ * @param options - Comparison options
36
+ * @returns Diff result including version compatibility information
37
+ * @throws BaselineVersionError if versions are incompatible and ignoreVersionMismatch is false
38
+ */
39
+ export function compareBaselines(previous, current, options = {}) {
40
+ // Check version compatibility
41
+ const v1 = parseVersion(previous.version);
42
+ const v2 = parseVersion(current.version);
43
+ const versionCompatibility = {
44
+ compatible: areVersionsCompatible(v1, v2),
45
+ warning: getCompatibilityWarning(v1, v2),
46
+ sourceVersion: v1.raw,
47
+ targetVersion: v2.raw,
48
+ };
49
+ // Throw error if versions are incompatible (unless ignored)
50
+ if (!versionCompatibility.compatible && !options.ignoreVersionMismatch) {
51
+ throw new BaselineVersionError(`Cannot compare baselines with incompatible format versions: v${v1.raw} vs v${v2.raw}. ` +
52
+ `Use \`bellwether baseline migrate\` to upgrade the older baseline, ` +
53
+ `or use --ignore-version-mismatch to force comparison (results may be incorrect).`, v1.raw, v2.raw);
54
+ }
55
+ const previousToolMap = new Map(previous.tools.map((t) => [t.name, t]));
56
+ const currentToolMap = new Map(current.tools.map((t) => [t.name, t]));
57
+ const toolsAdded = [];
58
+ const toolsRemoved = [];
59
+ const toolsModified = [];
60
+ const behaviorChanges = [];
61
+ // Check for removed tools
62
+ for (const [name] of previousToolMap) {
63
+ if (!currentToolMap.has(name)) {
64
+ if (!options.tools || options.tools.length === 0 || options.tools.includes(name)) {
65
+ toolsRemoved.push(name);
66
+ }
67
+ }
68
+ }
69
+ // Check for added tools and modifications
70
+ for (const [name, currentTool] of currentToolMap) {
71
+ if (options.tools && options.tools.length > 0 && !options.tools.includes(name)) {
72
+ continue;
73
+ }
74
+ const previousTool = previousToolMap.get(name);
75
+ if (!previousTool) {
76
+ toolsAdded.push(name);
77
+ continue;
78
+ }
79
+ const toolDiff = compareTool(previousTool, currentTool, options);
80
+ if (toolDiff.changes.length > 0 ||
81
+ toolDiff.schemaChanged ||
82
+ toolDiff.descriptionChanged ||
83
+ toolDiff.securityChanged ||
84
+ toolDiff.responseSchemaEvolutionChanged) {
85
+ toolsModified.push(toolDiff);
86
+ behaviorChanges.push(...toolDiff.changes);
87
+ }
88
+ }
89
+ // Compare workflows
90
+ const workflowChanges = compareWorkflows(previous.workflowSignatures || [], current.workflowSignatures || []);
91
+ behaviorChanges.push(...workflowChanges);
92
+ const { severity, breakingCount, warningCount, infoCount } = calculateSeverity(toolsAdded, toolsRemoved, behaviorChanges);
93
+ const summary = generateSummary(toolsAdded, toolsRemoved, toolsModified, behaviorChanges, severity);
94
+ // Generate performance regression report if performance data is available
95
+ const performanceReport = comparePerformanceData(previous, current, options.performanceThreshold ?? PERFORMANCE_TRACKING.DEFAULT_REGRESSION_THRESHOLD);
96
+ // Generate security diff report if security testing was performed
97
+ const securityReport = compareSecurityData(previous, current, options.ignoreSecurityChanges ?? false);
98
+ // Generate schema evolution report if schema evolution data is available
99
+ const schemaEvolutionReport = generateSchemaEvolutionReport(toolsModified, previous, current);
100
+ // Generate error trend report if error pattern data is available
101
+ const errorTrendReport = generateErrorTrendReport(previous, current, options.ignoreErrorPatternChanges ?? false);
102
+ // Generate documentation score comparison if documentation scores are available
103
+ const documentationScoreReport = compareDocumentationData(previous, current);
104
+ return {
105
+ toolsAdded,
106
+ toolsRemoved,
107
+ toolsModified,
108
+ behaviorChanges,
109
+ severity,
110
+ breakingCount,
111
+ warningCount,
112
+ infoCount,
113
+ summary,
114
+ versionCompatibility,
115
+ performanceReport,
116
+ securityReport,
117
+ schemaEvolutionReport,
118
+ errorTrendReport,
119
+ documentationScoreReport,
120
+ };
121
+ }
122
+ function compareTool(previous, current, options) {
123
+ const changes = [];
124
+ let schemaChanged = false;
125
+ let descriptionChanged = false;
126
+ let responseStructureChanged = false;
127
+ let errorPatternsChanged = false;
128
+ let responseSchemaEvolutionChanged = false;
129
+ let securityChanged = false;
130
+ let schemaEvolutionDiff;
131
+ // Compare input schema with detailed diff
132
+ if (previous.schemaHash !== current.schemaHash && !options.ignoreSchemaChanges) {
133
+ schemaChanged = true;
134
+ // Get detailed schema comparison if inputSchema is available on both
135
+ const schemaComparison = compareSchemas(previous.inputSchema, current.inputSchema);
136
+ if (schemaComparison.changes.length > 0) {
137
+ // Add individual schema changes with proper severity
138
+ for (const schemaChange of schemaComparison.changes) {
139
+ changes.push({
140
+ tool: current.name,
141
+ aspect: 'schema',
142
+ before: formatSchemaChangeValue(schemaChange.before),
143
+ after: formatSchemaChangeValue(schemaChange.after),
144
+ severity: schemaChange.breaking ? 'breaking' : 'warning',
145
+ description: `${schemaChange.path}: ${schemaChange.description}`,
146
+ });
147
+ }
148
+ }
149
+ else {
150
+ // Fallback to hash comparison if no detailed changes detected
151
+ // (can happen if inputSchema is missing on one or both sides)
152
+ changes.push({
153
+ tool: current.name,
154
+ aspect: 'schema',
155
+ before: `Schema hash: ${previous.schemaHash}`,
156
+ after: `Schema hash: ${current.schemaHash}`,
157
+ severity: 'breaking',
158
+ description: `Schema for ${current.name} has changed`,
159
+ });
160
+ }
161
+ }
162
+ // Compare description
163
+ if (previous.description !== current.description && !options.ignoreDescriptionChanges) {
164
+ descriptionChanged = true;
165
+ changes.push({
166
+ tool: current.name,
167
+ aspect: 'description',
168
+ before: previous.description,
169
+ after: current.description,
170
+ severity: 'info',
171
+ description: `Description for ${current.name} has changed`,
172
+ });
173
+ }
174
+ // Compare response structure fingerprints (check mode enhancement)
175
+ if (!options.ignoreResponseStructureChanges) {
176
+ const fingerprintDiff = compareFingerprints(previous.responseFingerprint, current.responseFingerprint);
177
+ if (!fingerprintDiff.identical) {
178
+ responseStructureChanged = true;
179
+ for (const change of fingerprintDiff.changes) {
180
+ changes.push({
181
+ tool: current.name,
182
+ aspect: 'response_structure',
183
+ before: change.before,
184
+ after: change.after,
185
+ severity: change.breaking ? 'breaking' : 'warning',
186
+ description: change.description,
187
+ });
188
+ }
189
+ }
190
+ }
191
+ // Compare error patterns (check mode enhancement)
192
+ if (!options.ignoreErrorPatternChanges) {
193
+ const errorDiff = compareErrorPatterns(previous.errorPatterns, current.errorPatterns);
194
+ if (errorDiff.behaviorChanged) {
195
+ errorPatternsChanged = true;
196
+ for (const added of errorDiff.added) {
197
+ changes.push({
198
+ tool: current.name,
199
+ aspect: 'error_pattern',
200
+ before: 'none',
201
+ after: `${added.category}: ${added.example.slice(0, 50)}...`,
202
+ severity: 'warning',
203
+ description: `New error pattern detected: ${added.category}`,
204
+ });
205
+ }
206
+ for (const removed of errorDiff.removed) {
207
+ changes.push({
208
+ tool: current.name,
209
+ aspect: 'error_pattern',
210
+ before: `${removed.category}: ${removed.example.slice(0, 50)}...`,
211
+ after: 'none',
212
+ severity: 'info',
213
+ description: `Error pattern no longer occurs: ${removed.category}`,
214
+ });
215
+ }
216
+ }
217
+ }
218
+ // Compare response schema evolution (check mode enhancement)
219
+ if (!options.ignoreResponseStructureChanges) {
220
+ schemaEvolutionDiff = compareSchemaEvolution(previous.responseSchemaEvolution, current.responseSchemaEvolution);
221
+ if (schemaEvolutionDiff.structureChanged) {
222
+ responseSchemaEvolutionChanged = true;
223
+ // Add changes for removed fields (breaking)
224
+ if (schemaEvolutionDiff.fieldsRemoved.length > 0) {
225
+ changes.push({
226
+ tool: current.name,
227
+ aspect: 'response_schema_evolution',
228
+ before: schemaEvolutionDiff.fieldsRemoved.join(', '),
229
+ after: 'removed',
230
+ severity: 'breaking',
231
+ description: `Response fields removed: ${schemaEvolutionDiff.fieldsRemoved.join(', ')}`,
232
+ });
233
+ }
234
+ // Add changes for added fields (non-breaking)
235
+ if (schemaEvolutionDiff.fieldsAdded.length > 0) {
236
+ changes.push({
237
+ tool: current.name,
238
+ aspect: 'response_schema_evolution',
239
+ before: 'none',
240
+ after: schemaEvolutionDiff.fieldsAdded.join(', '),
241
+ severity: 'info',
242
+ description: `Response fields added: ${schemaEvolutionDiff.fieldsAdded.join(', ')}`,
243
+ });
244
+ }
245
+ // Add changes for type changes
246
+ for (const typeChange of schemaEvolutionDiff.typeChanges) {
247
+ changes.push({
248
+ tool: current.name,
249
+ aspect: 'response_schema_evolution',
250
+ before: typeChange.previousType,
251
+ after: typeChange.currentType,
252
+ severity: typeChange.backwardCompatible ? 'warning' : 'breaking',
253
+ description: `Response field "${typeChange.field}" type changed: ${typeChange.previousType} → ${typeChange.currentType}`,
254
+ });
255
+ }
256
+ // Add changes for new required fields (breaking)
257
+ if (schemaEvolutionDiff.newRequired.length > 0) {
258
+ changes.push({
259
+ tool: current.name,
260
+ aspect: 'response_schema_evolution',
261
+ before: 'optional',
262
+ after: 'required',
263
+ severity: 'breaking',
264
+ description: `Response fields now required: ${schemaEvolutionDiff.newRequired.join(', ')}`,
265
+ });
266
+ }
267
+ }
268
+ }
269
+ // Compare security fingerprints (check mode --security flag)
270
+ if (!options.ignoreSecurityChanges) {
271
+ const securityDiff = compareSecurityFingerprints(previous.securityFingerprint, current.securityFingerprint);
272
+ if (securityDiff.newFindings.length > 0 || securityDiff.resolvedFindings.length > 0) {
273
+ securityChanged = true;
274
+ // Add changes for new security findings (security degradation)
275
+ for (const finding of securityDiff.newFindings) {
276
+ changes.push({
277
+ tool: current.name,
278
+ aspect: 'security',
279
+ before: 'no finding',
280
+ after: `${finding.riskLevel}: ${finding.title}`,
281
+ severity: finding.riskLevel === 'critical' || finding.riskLevel === 'high'
282
+ ? 'breaking'
283
+ : finding.riskLevel === 'medium'
284
+ ? 'warning'
285
+ : 'info',
286
+ description: `New security finding: ${finding.title} (${finding.cweId})`,
287
+ });
288
+ }
289
+ // Add changes for resolved security findings (security improvement)
290
+ for (const finding of securityDiff.resolvedFindings) {
291
+ changes.push({
292
+ tool: current.name,
293
+ aspect: 'security',
294
+ before: `${finding.riskLevel}: ${finding.title}`,
295
+ after: 'resolved',
296
+ severity: 'info',
297
+ description: `Security finding resolved: ${finding.title} (${finding.cweId})`,
298
+ });
299
+ }
300
+ }
301
+ }
302
+ return {
303
+ tool: current.name,
304
+ changes,
305
+ schemaChanged,
306
+ descriptionChanged,
307
+ responseStructureChanged,
308
+ errorPatternsChanged,
309
+ responseSchemaEvolutionChanged,
310
+ securityChanged,
311
+ schemaEvolutionDiff,
312
+ };
313
+ }
314
+ /**
315
+ * Format a schema change value for display in BehaviorChange.
316
+ * Converts unknown values to human-readable strings.
317
+ */
318
+ function formatSchemaChangeValue(value) {
319
+ if (value === undefined)
320
+ return '<none>';
321
+ if (value === null)
322
+ return 'null';
323
+ if (typeof value === 'string')
324
+ return value;
325
+ if (typeof value === 'number' || typeof value === 'boolean')
326
+ return String(value);
327
+ if (Array.isArray(value)) {
328
+ if (value.length === 0)
329
+ return '[]';
330
+ if (value.length <= 3)
331
+ return `[${value.map(v => formatSchemaChangeValue(v)).join(', ')}]`;
332
+ return `[${value.slice(0, 3).map(v => formatSchemaChangeValue(v)).join(', ')}, ...]`;
333
+ }
334
+ // For objects, show a compact representation
335
+ try {
336
+ const json = JSON.stringify(value);
337
+ return json.length > 50 ? json.slice(0, 47) + '...' : json;
338
+ }
339
+ catch {
340
+ return String(value);
341
+ }
342
+ }
343
+ function compareWorkflows(previous, current) {
344
+ const changes = [];
345
+ const prevMap = new Map(previous.map((w) => [w.id, w]));
346
+ const currMap = new Map(current.map((w) => [w.id, w]));
347
+ for (const [id, currWorkflow] of currMap) {
348
+ const prevWorkflow = prevMap.get(id);
349
+ if (prevWorkflow) {
350
+ if (prevWorkflow.succeeded && !currWorkflow.succeeded) {
351
+ changes.push({
352
+ tool: currWorkflow.name,
353
+ aspect: 'error_handling',
354
+ before: 'succeeded',
355
+ after: 'failed',
356
+ severity: 'breaking',
357
+ description: `Workflow "${currWorkflow.name}" now fails (previously succeeded)`,
358
+ });
359
+ }
360
+ else if (!prevWorkflow.succeeded && currWorkflow.succeeded) {
361
+ changes.push({
362
+ tool: currWorkflow.name,
363
+ aspect: 'error_handling',
364
+ before: 'failed',
365
+ after: 'succeeded',
366
+ severity: 'info',
367
+ description: `Workflow "${currWorkflow.name}" now succeeds (previously failed)`,
368
+ });
369
+ }
370
+ }
371
+ }
372
+ return changes;
373
+ }
374
+ function calculateSeverity(toolsAdded, toolsRemoved, changes) {
375
+ let breakingCount = toolsRemoved.length;
376
+ let warningCount = 0;
377
+ let infoCount = toolsAdded.length;
378
+ for (const change of changes) {
379
+ switch (change.severity) {
380
+ case 'breaking':
381
+ breakingCount++;
382
+ break;
383
+ case 'warning':
384
+ warningCount++;
385
+ break;
386
+ case 'info':
387
+ infoCount++;
388
+ break;
389
+ }
390
+ }
391
+ let severity = 'none';
392
+ if (breakingCount > 0) {
393
+ severity = 'breaking';
394
+ }
395
+ else if (warningCount > 0) {
396
+ severity = 'warning';
397
+ }
398
+ else if (infoCount > 0) {
399
+ severity = 'info';
400
+ }
401
+ return { severity, breakingCount, warningCount, infoCount };
402
+ }
403
+ function generateSummary(toolsAdded, toolsRemoved, toolsModified, changes, severity) {
404
+ if (severity === 'none') {
405
+ return 'No changes detected.';
406
+ }
407
+ const parts = [];
408
+ if (toolsRemoved.length > 0) {
409
+ parts.push(`${toolsRemoved.length} tool(s) removed: ${toolsRemoved.join(', ')}`);
410
+ }
411
+ if (toolsAdded.length > 0) {
412
+ parts.push(`${toolsAdded.length} tool(s) added: ${toolsAdded.join(', ')}`);
413
+ }
414
+ if (toolsModified.length > 0) {
415
+ parts.push(`${toolsModified.length} tool(s) modified`);
416
+ }
417
+ const breakingChanges = changes.filter((c) => c.severity === 'breaking').length;
418
+ const warningChanges = changes.filter((c) => c.severity === 'warning').length;
419
+ if (breakingChanges > 0) {
420
+ parts.push(`${breakingChanges} breaking change(s)`);
421
+ }
422
+ if (warningChanges > 0) {
423
+ parts.push(`${warningChanges} warning(s)`);
424
+ }
425
+ return parts.join('. ') + '.';
426
+ }
427
+ export function hasBreakingChanges(diff) {
428
+ return diff.severity === 'breaking';
429
+ }
430
+ export function hasSecurityChanges(diff) {
431
+ return diff.behaviorChanges.some((c) => c.aspect === 'security');
432
+ }
433
+ export function filterByMinimumSeverity(diff, minSeverity) {
434
+ const severityOrder = ['none', 'info', 'warning', 'breaking'];
435
+ const minIndex = severityOrder.indexOf(minSeverity);
436
+ return diff.behaviorChanges.filter((change) => {
437
+ // BehaviorChange.severity is already a ChangeSeverity, so no mapping needed
438
+ return severityOrder.indexOf(change.severity) >= minIndex;
439
+ });
440
+ }
441
+ /**
442
+ * Severity order for comparison.
443
+ */
444
+ const SEVERITY_ORDER = ['none', 'info', 'warning', 'breaking'];
445
+ /**
446
+ * Compare two severity levels.
447
+ * Returns negative if a < b, positive if a > b, 0 if equal.
448
+ */
449
+ export function compareSeverity(a, b) {
450
+ return SEVERITY_ORDER.indexOf(a) - SEVERITY_ORDER.indexOf(b);
451
+ }
452
+ /**
453
+ * Check if a severity meets or exceeds a threshold.
454
+ */
455
+ export function severityMeetsThreshold(severity, threshold) {
456
+ return compareSeverity(severity, threshold) >= 0;
457
+ }
458
+ /**
459
+ * Apply aspect overrides to a behavior change.
460
+ * Returns the modified severity based on aspect overrides.
461
+ */
462
+ export function applyAspectOverride(change, aspectOverrides) {
463
+ if (!aspectOverrides) {
464
+ return change.severity;
465
+ }
466
+ const override = aspectOverrides[change.aspect];
467
+ return override !== undefined ? override : change.severity;
468
+ }
469
+ /**
470
+ * Apply severity configuration to a diff result.
471
+ * Returns a new diff with filtered/modified changes based on config.
472
+ */
473
+ export function applySeverityConfig(diff, config) {
474
+ const { minimumSeverity = 'none', suppressWarnings = false, aspectOverrides, } = config;
475
+ // Apply aspect overrides and filter by minimum severity
476
+ const filteredChanges = diff.behaviorChanges
477
+ .map((change) => {
478
+ const newSeverity = applyAspectOverride(change, aspectOverrides);
479
+ return { ...change, severity: newSeverity };
480
+ })
481
+ .filter((change) => {
482
+ // Filter by minimum severity
483
+ if (!severityMeetsThreshold(change.severity, minimumSeverity)) {
484
+ return false;
485
+ }
486
+ // Suppress warnings if configured
487
+ if (suppressWarnings && change.severity === 'warning') {
488
+ return false;
489
+ }
490
+ return true;
491
+ });
492
+ // Filter toolsModified to only include those with remaining changes
493
+ const toolsWithChanges = new Set(filteredChanges.map((c) => c.tool));
494
+ const filteredToolsModified = diff.toolsModified.filter((td) => toolsWithChanges.has(td.tool) ||
495
+ (td.schemaChanged &&
496
+ (!aspectOverrides?.schema || aspectOverrides.schema !== 'none')) ||
497
+ (td.descriptionChanged &&
498
+ (!aspectOverrides?.description || aspectOverrides.description !== 'none')));
499
+ // Recalculate counts
500
+ let breakingCount = diff.toolsRemoved.length;
501
+ let warningCount = 0;
502
+ let infoCount = diff.toolsAdded.length;
503
+ for (const change of filteredChanges) {
504
+ switch (change.severity) {
505
+ case 'breaking':
506
+ breakingCount++;
507
+ break;
508
+ case 'warning':
509
+ warningCount++;
510
+ break;
511
+ case 'info':
512
+ infoCount++;
513
+ break;
514
+ }
515
+ }
516
+ // Determine overall severity
517
+ let severity = 'none';
518
+ if (breakingCount > 0) {
519
+ severity = 'breaking';
520
+ }
521
+ else if (warningCount > 0) {
522
+ severity = 'warning';
523
+ }
524
+ else if (infoCount > 0) {
525
+ severity = 'info';
526
+ }
527
+ return {
528
+ ...diff,
529
+ behaviorChanges: filteredChanges,
530
+ toolsModified: filteredToolsModified,
531
+ severity,
532
+ breakingCount,
533
+ warningCount,
534
+ infoCount,
535
+ summary: generateSummary(diff.toolsAdded, diff.toolsRemoved, filteredToolsModified, filteredChanges, severity),
536
+ };
537
+ }
538
+ /**
539
+ * Determine the appropriate exit code based on diff severity and config.
540
+ * Returns true if the check should fail (non-zero exit).
541
+ */
542
+ export function shouldFailOnDiff(diff, failOnSeverity = 'breaking') {
543
+ return severityMeetsThreshold(diff.severity, failOnSeverity);
544
+ }
545
+ /**
546
+ * Check if two baselines have compatible versions for comparison.
547
+ *
548
+ * @param baseline1 - First baseline
549
+ * @param baseline2 - Second baseline
550
+ * @returns Version compatibility information
551
+ */
552
+ export function checkBaselineVersionCompatibility(baseline1, baseline2) {
553
+ const result = checkVersionCompatibility(baseline1.version, baseline2.version);
554
+ return {
555
+ compatible: result.compatible,
556
+ warning: result.warning,
557
+ sourceVersion: result.sourceVersion,
558
+ targetVersion: result.targetVersion,
559
+ };
560
+ }
561
+ /**
562
+ * Compare performance data between two baselines.
563
+ * Detects performance regressions based on p50 latency threshold.
564
+ * Includes confidence information to indicate reliability of comparisons.
565
+ *
566
+ * @param previous - The previous baseline
567
+ * @param current - The current baseline
568
+ * @param threshold - Regression threshold (0-1, e.g., 0.10 = 10% slower)
569
+ * @returns Performance regression report, or undefined if no performance data
570
+ */
571
+ function comparePerformanceData(previous, current, threshold) {
572
+ const regressions = [];
573
+ const confidenceChanges = [];
574
+ const lowConfidenceTools = [];
575
+ let improvementCount = 0;
576
+ // Build map of previous tool performance (including confidence)
577
+ const previousPerf = new Map();
578
+ for (const tool of previous.tools) {
579
+ if (tool.baselineP50Ms !== undefined) {
580
+ previousPerf.set(tool.name, {
581
+ p50: tool.baselineP50Ms,
582
+ p95: tool.baselineP95Ms ?? tool.baselineP50Ms,
583
+ confidence: tool.performanceConfidence?.confidenceLevel,
584
+ });
585
+ }
586
+ }
587
+ // Compare current tool performance
588
+ for (const tool of current.tools) {
589
+ if (tool.baselineP50Ms === undefined) {
590
+ continue; // No performance data
591
+ }
592
+ const currentConfidence = tool.performanceConfidence;
593
+ const currentConfidenceLevel = currentConfidence?.confidenceLevel ?? 'low';
594
+ // Track low confidence tools
595
+ if (currentConfidence && !hasReliableConfidence(currentConfidence)) {
596
+ lowConfidenceTools.push(tool.name);
597
+ }
598
+ const prev = previousPerf.get(tool.name);
599
+ if (!prev) {
600
+ continue; // New tool, no baseline to compare
601
+ }
602
+ // Track confidence level changes
603
+ if (prev.confidence && currentConfidenceLevel !== prev.confidence) {
604
+ const previousLevel = prev.confidence;
605
+ const improved = (previousLevel === 'low' && currentConfidenceLevel !== 'low') ||
606
+ (previousLevel === 'medium' && currentConfidenceLevel === 'high');
607
+ const degraded = (previousLevel === 'high' && currentConfidenceLevel !== 'high') ||
608
+ (previousLevel === 'medium' && currentConfidenceLevel === 'low');
609
+ confidenceChanges.push({
610
+ toolName: tool.name,
611
+ previousLevel,
612
+ currentLevel: currentConfidenceLevel,
613
+ improved,
614
+ degraded,
615
+ summary: improved
616
+ ? `Confidence improved from ${previousLevel} to ${currentConfidenceLevel}`
617
+ : degraded
618
+ ? `Confidence degraded from ${previousLevel} to ${currentConfidenceLevel}`
619
+ : `Confidence changed from ${previousLevel} to ${currentConfidenceLevel}`,
620
+ });
621
+ }
622
+ // Calculate regression percentage
623
+ const regressionPercent = prev.p50 > 0 ? (tool.baselineP50Ms - prev.p50) / prev.p50 : 0;
624
+ // Determine if the regression is reliable (based on confidence)
625
+ const isReliable = currentConfidence !== undefined && hasReliableConfidence(currentConfidence);
626
+ if (regressionPercent > threshold) {
627
+ // Performance regression
628
+ regressions.push({
629
+ toolName: tool.name,
630
+ previousP50Ms: prev.p50,
631
+ currentP50Ms: tool.baselineP50Ms,
632
+ regressionPercent,
633
+ exceedsThreshold: true,
634
+ previousConfidence: prev.confidence,
635
+ currentConfidence: currentConfidenceLevel,
636
+ isReliable,
637
+ });
638
+ }
639
+ else if (regressionPercent < -PERFORMANCE_TRACKING.WARNING_THRESHOLD) {
640
+ // Performance improvement (> 5% faster)
641
+ improvementCount++;
642
+ }
643
+ }
644
+ // Return undefined if no performance data exists
645
+ if (previousPerf.size === 0) {
646
+ return undefined;
647
+ }
648
+ return {
649
+ regressions,
650
+ regressionCount: regressions.length,
651
+ improvementCount,
652
+ hasRegressions: regressions.length > 0,
653
+ confidenceChanges: confidenceChanges.length > 0 ? confidenceChanges : undefined,
654
+ lowConfidenceTools: lowConfidenceTools.length > 0 ? lowConfidenceTools : undefined,
655
+ };
656
+ }
657
+ /**
658
+ * Compare security data between two baselines.
659
+ * Aggregates security findings across all tools to produce a server-level security diff.
660
+ *
661
+ * @param previous - The previous baseline
662
+ * @param current - The current baseline
663
+ * @param ignoreSecurityChanges - Whether to skip security comparison
664
+ * @returns Security diff report, or undefined if no security data
665
+ */
666
+ function compareSecurityData(previous, current, ignoreSecurityChanges) {
667
+ if (ignoreSecurityChanges) {
668
+ return undefined;
669
+ }
670
+ // Check if either baseline has security data
671
+ const previousHasSecurity = previous.tools.some((t) => t.securityFingerprint?.tested);
672
+ const currentHasSecurity = current.tools.some((t) => t.securityFingerprint?.tested);
673
+ if (!previousHasSecurity && !currentHasSecurity) {
674
+ return undefined; // No security data to compare
675
+ }
676
+ // Aggregate findings from all tools
677
+ const previousFindings = new Map();
678
+ const currentFindings = new Map();
679
+ // Build finding maps keyed by a unique identifier (tool:category:cweId:parameter)
680
+ for (const tool of previous.tools) {
681
+ if (tool.securityFingerprint?.findings) {
682
+ for (const finding of tool.securityFingerprint.findings) {
683
+ const key = `${finding.tool}:${finding.category}:${finding.cweId}:${finding.parameter}`;
684
+ previousFindings.set(key, finding);
685
+ }
686
+ }
687
+ }
688
+ for (const tool of current.tools) {
689
+ if (tool.securityFingerprint?.findings) {
690
+ for (const finding of tool.securityFingerprint.findings) {
691
+ const key = `${finding.tool}:${finding.category}:${finding.cweId}:${finding.parameter}`;
692
+ currentFindings.set(key, finding);
693
+ }
694
+ }
695
+ }
696
+ // Calculate new and resolved findings
697
+ const newFindings = [];
698
+ const resolvedFindings = [];
699
+ for (const [key, finding] of currentFindings) {
700
+ if (!previousFindings.has(key)) {
701
+ newFindings.push(finding);
702
+ }
703
+ }
704
+ for (const [key, finding] of previousFindings) {
705
+ if (!currentFindings.has(key)) {
706
+ resolvedFindings.push(finding);
707
+ }
708
+ }
709
+ // Calculate aggregate risk scores
710
+ let previousRiskScore = 0;
711
+ let currentRiskScore = 0;
712
+ let previousToolCount = 0;
713
+ let currentToolCount = 0;
714
+ for (const tool of previous.tools) {
715
+ if (tool.securityFingerprint?.tested) {
716
+ previousRiskScore += tool.securityFingerprint.riskScore;
717
+ previousToolCount++;
718
+ }
719
+ }
720
+ for (const tool of current.tools) {
721
+ if (tool.securityFingerprint?.tested) {
722
+ currentRiskScore += tool.securityFingerprint.riskScore;
723
+ currentToolCount++;
724
+ }
725
+ }
726
+ // Average risk scores if there are tested tools
727
+ const avgPreviousRisk = previousToolCount > 0 ? previousRiskScore / previousToolCount : 0;
728
+ const avgCurrentRisk = currentToolCount > 0 ? currentRiskScore / currentToolCount : 0;
729
+ const riskScoreChange = avgCurrentRisk - avgPreviousRisk;
730
+ // Generate summary
731
+ const summaryParts = [];
732
+ if (newFindings.length > 0) {
733
+ const criticalHigh = newFindings.filter((f) => f.riskLevel === 'critical' || f.riskLevel === 'high').length;
734
+ if (criticalHigh > 0) {
735
+ summaryParts.push(`${criticalHigh} critical/high severity findings detected`);
736
+ }
737
+ summaryParts.push(`${newFindings.length} new security finding(s)`);
738
+ }
739
+ if (resolvedFindings.length > 0) {
740
+ summaryParts.push(`${resolvedFindings.length} finding(s) resolved`);
741
+ }
742
+ if (riskScoreChange > 0) {
743
+ summaryParts.push(`risk score increased by ${riskScoreChange.toFixed(1)}`);
744
+ }
745
+ else if (riskScoreChange < 0) {
746
+ summaryParts.push(`risk score decreased by ${Math.abs(riskScoreChange).toFixed(1)}`);
747
+ }
748
+ const summary = summaryParts.length > 0 ? summaryParts.join('; ') : 'No security changes detected';
749
+ return {
750
+ newFindings,
751
+ resolvedFindings,
752
+ previousRiskScore: Math.round(avgPreviousRisk),
753
+ currentRiskScore: Math.round(avgCurrentRisk),
754
+ riskScoreChange: Math.round(riskScoreChange),
755
+ degraded: newFindings.length > 0 || riskScoreChange > 0,
756
+ summary,
757
+ };
758
+ }
759
+ /**
760
+ * Generate schema evolution report from tool diffs.
761
+ * Tracks schema stability changes across tools.
762
+ *
763
+ * @param toolsModified - Tools with modifications
764
+ * @param previous - The previous baseline
765
+ * @param current - The current baseline
766
+ * @returns Schema evolution report, or undefined if no schema evolution data
767
+ */
768
+ function generateSchemaEvolutionReport(toolsModified, previous, current) {
769
+ // Check if either baseline has schema evolution data
770
+ const previousHasEvolution = previous.tools.some((t) => t.responseSchemaEvolution);
771
+ const currentHasEvolution = current.tools.some((t) => t.responseSchemaEvolution);
772
+ if (!previousHasEvolution && !currentHasEvolution) {
773
+ return undefined; // No schema evolution data to compare
774
+ }
775
+ const toolsWithIssues = [];
776
+ let unstableCount = 0;
777
+ let stableCount = 0;
778
+ let structureChangedCount = 0;
779
+ let hasBreakingChanges = false;
780
+ // Analyze tools with schema evolution data
781
+ for (const tool of current.tools) {
782
+ const currEvolution = tool.responseSchemaEvolution;
783
+ if (!currEvolution)
784
+ continue;
785
+ // Count stable vs unstable
786
+ if (currEvolution.isStable) {
787
+ stableCount++;
788
+ }
789
+ else {
790
+ unstableCount++;
791
+ }
792
+ // Find corresponding tool diff
793
+ const toolDiff = toolsModified.find((td) => td.tool === tool.name);
794
+ if (toolDiff?.schemaEvolutionDiff?.structureChanged) {
795
+ structureChangedCount++;
796
+ if (toolDiff.schemaEvolutionDiff.isBreaking) {
797
+ hasBreakingChanges = true;
798
+ }
799
+ // Find previous tool
800
+ const prevTool = previous.tools.find((t) => t.name === tool.name);
801
+ const prevEvolution = prevTool?.responseSchemaEvolution;
802
+ const becameUnstable = (prevEvolution?.isStable ?? false) && !currEvolution.isStable;
803
+ toolsWithIssues.push({
804
+ toolName: tool.name,
805
+ becameUnstable,
806
+ fieldsAdded: toolDiff.schemaEvolutionDiff.fieldsAdded,
807
+ fieldsRemoved: toolDiff.schemaEvolutionDiff.fieldsRemoved,
808
+ isBreaking: toolDiff.schemaEvolutionDiff.isBreaking,
809
+ summary: toolDiff.schemaEvolutionDiff.summary,
810
+ });
811
+ }
812
+ else if (!currEvolution.isStable && currEvolution.inconsistentFields.length > 0) {
813
+ // Tool with unstable schema (no change, but already unstable)
814
+ const prevTool = previous.tools.find((t) => t.name === tool.name);
815
+ const prevEvolution = prevTool?.responseSchemaEvolution;
816
+ const becameUnstable = (prevEvolution?.isStable ?? false) && !currEvolution.isStable;
817
+ if (becameUnstable) {
818
+ toolsWithIssues.push({
819
+ toolName: tool.name,
820
+ becameUnstable: true,
821
+ fieldsAdded: [],
822
+ fieldsRemoved: [],
823
+ isBreaking: false,
824
+ summary: `Schema became unstable: ${currEvolution.inconsistentFields.join(', ')}`,
825
+ });
826
+ }
827
+ }
828
+ }
829
+ return {
830
+ toolsWithIssues,
831
+ unstableCount,
832
+ stableCount,
833
+ structureChangedCount,
834
+ hasBreakingChanges,
835
+ };
836
+ }
837
+ /**
838
+ * Generate error trend report from baseline comparison.
839
+ * Aggregates error patterns across all tools to identify trends.
840
+ *
841
+ * @param previous - The previous baseline
842
+ * @param current - The current baseline
843
+ * @param ignoreErrorPatternChanges - Whether to skip error pattern comparison
844
+ * @returns Error trend report, or undefined if no error pattern data
845
+ */
846
+ function generateErrorTrendReport(previous, current, ignoreErrorPatternChanges) {
847
+ if (ignoreErrorPatternChanges) {
848
+ return undefined;
849
+ }
850
+ // Check if either baseline has error pattern data
851
+ const previousHasErrors = previous.tools.some((t) => t.errorPatterns && t.errorPatterns.length > 0);
852
+ const currentHasErrors = current.tools.some((t) => t.errorPatterns && t.errorPatterns.length > 0);
853
+ if (!previousHasErrors && !currentHasErrors) {
854
+ return undefined; // No error pattern data to compare
855
+ }
856
+ // Aggregate error patterns from all tools
857
+ const allPreviousPatterns = previous.tools.flatMap((t) => t.errorPatterns ?? []);
858
+ const allCurrentPatterns = current.tools.flatMap((t) => t.errorPatterns ?? []);
859
+ return analyzeErrorTrends(allPreviousPatterns, allCurrentPatterns);
860
+ }
861
+ /**
862
+ * Compare documentation scores between baselines.
863
+ * Returns a change report if documentation score data is available.
864
+ *
865
+ * @param previous - The previous baseline
866
+ * @param current - The current baseline
867
+ * @returns Documentation score change report, or undefined if no data
868
+ */
869
+ function compareDocumentationData(previous, current) {
870
+ // If current doesn't have documentation score, try to calculate it from tools
871
+ // This allows comparing old baselines without scores against new ones with scores
872
+ const currentScore = current.documentationScore ?? calculateDocScoreFromTools(current);
873
+ const previousScore = previous.documentationScore;
874
+ if (!currentScore) {
875
+ return undefined;
876
+ }
877
+ // Use the documentation scorer's comparison function
878
+ // We need to reconstruct a minimal DocumentationScore for comparison
879
+ const currentDocScore = {
880
+ overallScore: currentScore.overallScore,
881
+ grade: currentScore.grade,
882
+ components: {
883
+ descriptionCoverage: 0,
884
+ descriptionQuality: 0,
885
+ parameterDocumentation: 0,
886
+ exampleCoverage: 0,
887
+ },
888
+ issues: [],
889
+ suggestions: [],
890
+ toolCount: currentScore.toolCount,
891
+ };
892
+ return compareDocumentationScores(previousScore, currentDocScore);
893
+ }
894
+ /**
895
+ * Calculate documentation score summary from baseline tools.
896
+ * Used when baseline doesn't have pre-calculated score.
897
+ */
898
+ function calculateDocScoreFromTools(baseline) {
899
+ if (!baseline.tools || baseline.tools.length === 0) {
900
+ return undefined;
901
+ }
902
+ // Create minimal MCPTool objects from ToolFingerprint
903
+ const tools = baseline.tools.map((t) => ({
904
+ name: t.name,
905
+ description: t.description,
906
+ inputSchema: t.inputSchema ?? {},
907
+ }));
908
+ const score = scoreDocumentation(tools);
909
+ return {
910
+ overallScore: score.overallScore,
911
+ grade: score.grade,
912
+ issueCount: score.issues.length,
913
+ toolCount: score.toolCount,
914
+ };
915
+ }
916
+ //# sourceMappingURL=comparator.js.map