@dotsetlabs/bellwether 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (403) hide show
  1. package/CHANGELOG.md +291 -0
  2. package/LICENSE +21 -0
  3. package/README.md +739 -0
  4. package/dist/auth/credentials.d.ts +64 -0
  5. package/dist/auth/credentials.js +218 -0
  6. package/dist/auth/index.d.ts +6 -0
  7. package/dist/auth/index.js +6 -0
  8. package/dist/auth/keychain.d.ts +64 -0
  9. package/dist/auth/keychain.js +268 -0
  10. package/dist/baseline/ab-testing.d.ts +80 -0
  11. package/dist/baseline/ab-testing.js +236 -0
  12. package/dist/baseline/ai-compatibility-scorer.d.ts +95 -0
  13. package/dist/baseline/ai-compatibility-scorer.js +606 -0
  14. package/dist/baseline/calibration.d.ts +77 -0
  15. package/dist/baseline/calibration.js +136 -0
  16. package/dist/baseline/category-matching.d.ts +85 -0
  17. package/dist/baseline/category-matching.js +289 -0
  18. package/dist/baseline/change-impact-analyzer.d.ts +98 -0
  19. package/dist/baseline/change-impact-analyzer.js +592 -0
  20. package/dist/baseline/comparator.d.ts +64 -0
  21. package/dist/baseline/comparator.js +916 -0
  22. package/dist/baseline/confidence.d.ts +55 -0
  23. package/dist/baseline/confidence.js +122 -0
  24. package/dist/baseline/converter.d.ts +61 -0
  25. package/dist/baseline/converter.js +585 -0
  26. package/dist/baseline/dependency-analyzer.d.ts +89 -0
  27. package/dist/baseline/dependency-analyzer.js +567 -0
  28. package/dist/baseline/deprecation-tracker.d.ts +133 -0
  29. package/dist/baseline/deprecation-tracker.js +322 -0
  30. package/dist/baseline/diff.d.ts +55 -0
  31. package/dist/baseline/diff.js +1584 -0
  32. package/dist/baseline/documentation-scorer.d.ts +205 -0
  33. package/dist/baseline/documentation-scorer.js +466 -0
  34. package/dist/baseline/embeddings.d.ts +118 -0
  35. package/dist/baseline/embeddings.js +251 -0
  36. package/dist/baseline/error-analyzer.d.ts +198 -0
  37. package/dist/baseline/error-analyzer.js +721 -0
  38. package/dist/baseline/evaluation/evaluator.d.ts +42 -0
  39. package/dist/baseline/evaluation/evaluator.js +323 -0
  40. package/dist/baseline/evaluation/expanded-dataset.d.ts +45 -0
  41. package/dist/baseline/evaluation/expanded-dataset.js +1164 -0
  42. package/dist/baseline/evaluation/golden-dataset.d.ts +58 -0
  43. package/dist/baseline/evaluation/golden-dataset.js +717 -0
  44. package/dist/baseline/evaluation/index.d.ts +15 -0
  45. package/dist/baseline/evaluation/index.js +15 -0
  46. package/dist/baseline/evaluation/types.d.ts +186 -0
  47. package/dist/baseline/evaluation/types.js +8 -0
  48. package/dist/baseline/external-dependency-detector.d.ts +181 -0
  49. package/dist/baseline/external-dependency-detector.js +524 -0
  50. package/dist/baseline/golden-output.d.ts +162 -0
  51. package/dist/baseline/golden-output.js +636 -0
  52. package/dist/baseline/health-scorer.d.ts +174 -0
  53. package/dist/baseline/health-scorer.js +451 -0
  54. package/dist/baseline/incremental-checker.d.ts +97 -0
  55. package/dist/baseline/incremental-checker.js +174 -0
  56. package/dist/baseline/index.d.ts +31 -0
  57. package/dist/baseline/index.js +42 -0
  58. package/dist/baseline/migration-generator.d.ts +137 -0
  59. package/dist/baseline/migration-generator.js +554 -0
  60. package/dist/baseline/migrations.d.ts +60 -0
  61. package/dist/baseline/migrations.js +197 -0
  62. package/dist/baseline/performance-tracker.d.ts +214 -0
  63. package/dist/baseline/performance-tracker.js +577 -0
  64. package/dist/baseline/pr-comment-generator.d.ts +117 -0
  65. package/dist/baseline/pr-comment-generator.js +546 -0
  66. package/dist/baseline/response-fingerprint.d.ts +127 -0
  67. package/dist/baseline/response-fingerprint.js +728 -0
  68. package/dist/baseline/response-schema-tracker.d.ts +129 -0
  69. package/dist/baseline/response-schema-tracker.js +420 -0
  70. package/dist/baseline/risk-scorer.d.ts +54 -0
  71. package/dist/baseline/risk-scorer.js +434 -0
  72. package/dist/baseline/saver.d.ts +89 -0
  73. package/dist/baseline/saver.js +554 -0
  74. package/dist/baseline/scenario-generator.d.ts +151 -0
  75. package/dist/baseline/scenario-generator.js +905 -0
  76. package/dist/baseline/schema-compare.d.ts +86 -0
  77. package/dist/baseline/schema-compare.js +557 -0
  78. package/dist/baseline/schema-evolution.d.ts +189 -0
  79. package/dist/baseline/schema-evolution.js +467 -0
  80. package/dist/baseline/semantic.d.ts +203 -0
  81. package/dist/baseline/semantic.js +908 -0
  82. package/dist/baseline/synonyms.d.ts +60 -0
  83. package/dist/baseline/synonyms.js +386 -0
  84. package/dist/baseline/telemetry.d.ts +165 -0
  85. package/dist/baseline/telemetry.js +294 -0
  86. package/dist/baseline/test-pruner.d.ts +120 -0
  87. package/dist/baseline/test-pruner.js +387 -0
  88. package/dist/baseline/types.d.ts +449 -0
  89. package/dist/baseline/types.js +5 -0
  90. package/dist/baseline/version.d.ts +138 -0
  91. package/dist/baseline/version.js +206 -0
  92. package/dist/cache/index.d.ts +5 -0
  93. package/dist/cache/index.js +5 -0
  94. package/dist/cache/response-cache.d.ts +151 -0
  95. package/dist/cache/response-cache.js +287 -0
  96. package/dist/ci/index.d.ts +60 -0
  97. package/dist/ci/index.js +342 -0
  98. package/dist/cli/commands/auth.d.ts +12 -0
  99. package/dist/cli/commands/auth.js +352 -0
  100. package/dist/cli/commands/badge.d.ts +3 -0
  101. package/dist/cli/commands/badge.js +74 -0
  102. package/dist/cli/commands/baseline-accept.d.ts +15 -0
  103. package/dist/cli/commands/baseline-accept.js +178 -0
  104. package/dist/cli/commands/baseline-migrate.d.ts +12 -0
  105. package/dist/cli/commands/baseline-migrate.js +164 -0
  106. package/dist/cli/commands/baseline.d.ts +14 -0
  107. package/dist/cli/commands/baseline.js +449 -0
  108. package/dist/cli/commands/beta.d.ts +10 -0
  109. package/dist/cli/commands/beta.js +231 -0
  110. package/dist/cli/commands/check.d.ts +11 -0
  111. package/dist/cli/commands/check.js +820 -0
  112. package/dist/cli/commands/cloud/badge.d.ts +3 -0
  113. package/dist/cli/commands/cloud/badge.js +74 -0
  114. package/dist/cli/commands/cloud/diff.d.ts +6 -0
  115. package/dist/cli/commands/cloud/diff.js +79 -0
  116. package/dist/cli/commands/cloud/history.d.ts +6 -0
  117. package/dist/cli/commands/cloud/history.js +102 -0
  118. package/dist/cli/commands/cloud/link.d.ts +9 -0
  119. package/dist/cli/commands/cloud/link.js +119 -0
  120. package/dist/cli/commands/cloud/login.d.ts +7 -0
  121. package/dist/cli/commands/cloud/login.js +499 -0
  122. package/dist/cli/commands/cloud/projects.d.ts +6 -0
  123. package/dist/cli/commands/cloud/projects.js +44 -0
  124. package/dist/cli/commands/cloud/shared.d.ts +7 -0
  125. package/dist/cli/commands/cloud/shared.js +42 -0
  126. package/dist/cli/commands/cloud/teams.d.ts +8 -0
  127. package/dist/cli/commands/cloud/teams.js +169 -0
  128. package/dist/cli/commands/cloud/upload.d.ts +8 -0
  129. package/dist/cli/commands/cloud/upload.js +181 -0
  130. package/dist/cli/commands/contract.d.ts +11 -0
  131. package/dist/cli/commands/contract.js +280 -0
  132. package/dist/cli/commands/discover.d.ts +3 -0
  133. package/dist/cli/commands/discover.js +82 -0
  134. package/dist/cli/commands/eval.d.ts +9 -0
  135. package/dist/cli/commands/eval.js +187 -0
  136. package/dist/cli/commands/explore.d.ts +11 -0
  137. package/dist/cli/commands/explore.js +437 -0
  138. package/dist/cli/commands/feedback.d.ts +9 -0
  139. package/dist/cli/commands/feedback.js +174 -0
  140. package/dist/cli/commands/golden.d.ts +12 -0
  141. package/dist/cli/commands/golden.js +407 -0
  142. package/dist/cli/commands/history.d.ts +10 -0
  143. package/dist/cli/commands/history.js +202 -0
  144. package/dist/cli/commands/init.d.ts +9 -0
  145. package/dist/cli/commands/init.js +219 -0
  146. package/dist/cli/commands/interview.d.ts +3 -0
  147. package/dist/cli/commands/interview.js +903 -0
  148. package/dist/cli/commands/link.d.ts +10 -0
  149. package/dist/cli/commands/link.js +169 -0
  150. package/dist/cli/commands/login.d.ts +7 -0
  151. package/dist/cli/commands/login.js +499 -0
  152. package/dist/cli/commands/preset.d.ts +33 -0
  153. package/dist/cli/commands/preset.js +297 -0
  154. package/dist/cli/commands/profile.d.ts +33 -0
  155. package/dist/cli/commands/profile.js +286 -0
  156. package/dist/cli/commands/registry.d.ts +11 -0
  157. package/dist/cli/commands/registry.js +146 -0
  158. package/dist/cli/commands/shared.d.ts +79 -0
  159. package/dist/cli/commands/shared.js +196 -0
  160. package/dist/cli/commands/teams.d.ts +8 -0
  161. package/dist/cli/commands/teams.js +169 -0
  162. package/dist/cli/commands/test.d.ts +9 -0
  163. package/dist/cli/commands/test.js +500 -0
  164. package/dist/cli/commands/upload.d.ts +8 -0
  165. package/dist/cli/commands/upload.js +223 -0
  166. package/dist/cli/commands/validate-config.d.ts +6 -0
  167. package/dist/cli/commands/validate-config.js +35 -0
  168. package/dist/cli/commands/verify.d.ts +11 -0
  169. package/dist/cli/commands/verify.js +283 -0
  170. package/dist/cli/commands/watch.d.ts +12 -0
  171. package/dist/cli/commands/watch.js +253 -0
  172. package/dist/cli/index.d.ts +3 -0
  173. package/dist/cli/index.js +178 -0
  174. package/dist/cli/interactive.d.ts +47 -0
  175. package/dist/cli/interactive.js +216 -0
  176. package/dist/cli/output/terminal-reporter.d.ts +19 -0
  177. package/dist/cli/output/terminal-reporter.js +104 -0
  178. package/dist/cli/output.d.ts +226 -0
  179. package/dist/cli/output.js +438 -0
  180. package/dist/cli/utils/env.d.ts +5 -0
  181. package/dist/cli/utils/env.js +14 -0
  182. package/dist/cli/utils/progress.d.ts +59 -0
  183. package/dist/cli/utils/progress.js +206 -0
  184. package/dist/cli/utils/server-context.d.ts +10 -0
  185. package/dist/cli/utils/server-context.js +36 -0
  186. package/dist/cloud/auth.d.ts +144 -0
  187. package/dist/cloud/auth.js +374 -0
  188. package/dist/cloud/client.d.ts +24 -0
  189. package/dist/cloud/client.js +65 -0
  190. package/dist/cloud/http-client.d.ts +38 -0
  191. package/dist/cloud/http-client.js +215 -0
  192. package/dist/cloud/index.d.ts +23 -0
  193. package/dist/cloud/index.js +25 -0
  194. package/dist/cloud/mock-client.d.ts +107 -0
  195. package/dist/cloud/mock-client.js +545 -0
  196. package/dist/cloud/types.d.ts +515 -0
  197. package/dist/cloud/types.js +15 -0
  198. package/dist/config/defaults.d.ts +160 -0
  199. package/dist/config/defaults.js +169 -0
  200. package/dist/config/loader.d.ts +24 -0
  201. package/dist/config/loader.js +122 -0
  202. package/dist/config/template.d.ts +42 -0
  203. package/dist/config/template.js +647 -0
  204. package/dist/config/validator.d.ts +2112 -0
  205. package/dist/config/validator.js +658 -0
  206. package/dist/constants/cloud.d.ts +107 -0
  207. package/dist/constants/cloud.js +110 -0
  208. package/dist/constants/core.d.ts +521 -0
  209. package/dist/constants/core.js +556 -0
  210. package/dist/constants/testing.d.ts +1283 -0
  211. package/dist/constants/testing.js +1568 -0
  212. package/dist/constants.d.ts +10 -0
  213. package/dist/constants.js +10 -0
  214. package/dist/contract/index.d.ts +6 -0
  215. package/dist/contract/index.js +5 -0
  216. package/dist/contract/validator.d.ts +177 -0
  217. package/dist/contract/validator.js +574 -0
  218. package/dist/cost/index.d.ts +6 -0
  219. package/dist/cost/index.js +5 -0
  220. package/dist/cost/tracker.d.ts +134 -0
  221. package/dist/cost/tracker.js +313 -0
  222. package/dist/discovery/discovery.d.ts +16 -0
  223. package/dist/discovery/discovery.js +173 -0
  224. package/dist/discovery/types.d.ts +51 -0
  225. package/dist/discovery/types.js +2 -0
  226. package/dist/docs/agents.d.ts +3 -0
  227. package/dist/docs/agents.js +995 -0
  228. package/dist/docs/contract.d.ts +51 -0
  229. package/dist/docs/contract.js +1681 -0
  230. package/dist/docs/generator.d.ts +4 -0
  231. package/dist/docs/generator.js +4 -0
  232. package/dist/docs/html-reporter.d.ts +9 -0
  233. package/dist/docs/html-reporter.js +757 -0
  234. package/dist/docs/index.d.ts +10 -0
  235. package/dist/docs/index.js +11 -0
  236. package/dist/docs/junit-reporter.d.ts +18 -0
  237. package/dist/docs/junit-reporter.js +210 -0
  238. package/dist/docs/report.d.ts +14 -0
  239. package/dist/docs/report.js +44 -0
  240. package/dist/docs/sarif-reporter.d.ts +19 -0
  241. package/dist/docs/sarif-reporter.js +335 -0
  242. package/dist/docs/shared.d.ts +35 -0
  243. package/dist/docs/shared.js +162 -0
  244. package/dist/docs/templates.d.ts +12 -0
  245. package/dist/docs/templates.js +76 -0
  246. package/dist/errors/index.d.ts +6 -0
  247. package/dist/errors/index.js +6 -0
  248. package/dist/errors/retry.d.ts +92 -0
  249. package/dist/errors/retry.js +323 -0
  250. package/dist/errors/types.d.ts +321 -0
  251. package/dist/errors/types.js +584 -0
  252. package/dist/index.d.ts +32 -0
  253. package/dist/index.js +32 -0
  254. package/dist/interview/dependency-resolver.d.ts +11 -0
  255. package/dist/interview/dependency-resolver.js +32 -0
  256. package/dist/interview/interviewer.d.ts +232 -0
  257. package/dist/interview/interviewer.js +1939 -0
  258. package/dist/interview/mock-response-generator.d.ts +7 -0
  259. package/dist/interview/mock-response-generator.js +102 -0
  260. package/dist/interview/orchestrator.d.ts +237 -0
  261. package/dist/interview/orchestrator.js +1296 -0
  262. package/dist/interview/rate-limiter.d.ts +15 -0
  263. package/dist/interview/rate-limiter.js +55 -0
  264. package/dist/interview/response-validator.d.ts +10 -0
  265. package/dist/interview/response-validator.js +132 -0
  266. package/dist/interview/schema-inferrer.d.ts +8 -0
  267. package/dist/interview/schema-inferrer.js +71 -0
  268. package/dist/interview/schema-test-generator.d.ts +71 -0
  269. package/dist/interview/schema-test-generator.js +834 -0
  270. package/dist/interview/smart-value-generator.d.ts +155 -0
  271. package/dist/interview/smart-value-generator.js +554 -0
  272. package/dist/interview/stateful-test-runner.d.ts +19 -0
  273. package/dist/interview/stateful-test-runner.js +106 -0
  274. package/dist/interview/types.d.ts +561 -0
  275. package/dist/interview/types.js +2 -0
  276. package/dist/llm/anthropic.d.ts +41 -0
  277. package/dist/llm/anthropic.js +355 -0
  278. package/dist/llm/client.d.ts +123 -0
  279. package/dist/llm/client.js +42 -0
  280. package/dist/llm/factory.d.ts +38 -0
  281. package/dist/llm/factory.js +145 -0
  282. package/dist/llm/fallback.d.ts +140 -0
  283. package/dist/llm/fallback.js +379 -0
  284. package/dist/llm/index.d.ts +18 -0
  285. package/dist/llm/index.js +15 -0
  286. package/dist/llm/ollama.d.ts +37 -0
  287. package/dist/llm/ollama.js +330 -0
  288. package/dist/llm/openai.d.ts +25 -0
  289. package/dist/llm/openai.js +320 -0
  290. package/dist/llm/token-budget.d.ts +161 -0
  291. package/dist/llm/token-budget.js +395 -0
  292. package/dist/logging/logger.d.ts +70 -0
  293. package/dist/logging/logger.js +130 -0
  294. package/dist/metrics/collector.d.ts +106 -0
  295. package/dist/metrics/collector.js +547 -0
  296. package/dist/metrics/index.d.ts +7 -0
  297. package/dist/metrics/index.js +7 -0
  298. package/dist/metrics/prometheus.d.ts +20 -0
  299. package/dist/metrics/prometheus.js +241 -0
  300. package/dist/metrics/types.d.ts +209 -0
  301. package/dist/metrics/types.js +5 -0
  302. package/dist/persona/builtins.d.ts +54 -0
  303. package/dist/persona/builtins.js +219 -0
  304. package/dist/persona/index.d.ts +8 -0
  305. package/dist/persona/index.js +8 -0
  306. package/dist/persona/loader.d.ts +30 -0
  307. package/dist/persona/loader.js +190 -0
  308. package/dist/persona/types.d.ts +144 -0
  309. package/dist/persona/types.js +5 -0
  310. package/dist/persona/validation.d.ts +94 -0
  311. package/dist/persona/validation.js +332 -0
  312. package/dist/prompts/index.d.ts +5 -0
  313. package/dist/prompts/index.js +5 -0
  314. package/dist/prompts/templates.d.ts +180 -0
  315. package/dist/prompts/templates.js +431 -0
  316. package/dist/registry/client.d.ts +49 -0
  317. package/dist/registry/client.js +191 -0
  318. package/dist/registry/index.d.ts +7 -0
  319. package/dist/registry/index.js +6 -0
  320. package/dist/registry/types.d.ts +140 -0
  321. package/dist/registry/types.js +6 -0
  322. package/dist/scenarios/evaluator.d.ts +43 -0
  323. package/dist/scenarios/evaluator.js +206 -0
  324. package/dist/scenarios/index.d.ts +10 -0
  325. package/dist/scenarios/index.js +9 -0
  326. package/dist/scenarios/loader.d.ts +20 -0
  327. package/dist/scenarios/loader.js +285 -0
  328. package/dist/scenarios/types.d.ts +153 -0
  329. package/dist/scenarios/types.js +8 -0
  330. package/dist/security/index.d.ts +17 -0
  331. package/dist/security/index.js +18 -0
  332. package/dist/security/payloads.d.ts +61 -0
  333. package/dist/security/payloads.js +268 -0
  334. package/dist/security/security-tester.d.ts +42 -0
  335. package/dist/security/security-tester.js +582 -0
  336. package/dist/security/types.d.ts +166 -0
  337. package/dist/security/types.js +8 -0
  338. package/dist/transport/base-transport.d.ts +59 -0
  339. package/dist/transport/base-transport.js +38 -0
  340. package/dist/transport/http-transport.d.ts +67 -0
  341. package/dist/transport/http-transport.js +238 -0
  342. package/dist/transport/mcp-client.d.ts +141 -0
  343. package/dist/transport/mcp-client.js +496 -0
  344. package/dist/transport/sse-transport.d.ts +88 -0
  345. package/dist/transport/sse-transport.js +316 -0
  346. package/dist/transport/stdio-transport.d.ts +43 -0
  347. package/dist/transport/stdio-transport.js +238 -0
  348. package/dist/transport/types.d.ts +125 -0
  349. package/dist/transport/types.js +16 -0
  350. package/dist/utils/concurrency.d.ts +123 -0
  351. package/dist/utils/concurrency.js +213 -0
  352. package/dist/utils/formatters.d.ts +16 -0
  353. package/dist/utils/formatters.js +37 -0
  354. package/dist/utils/index.d.ts +8 -0
  355. package/dist/utils/index.js +8 -0
  356. package/dist/utils/jsonpath.d.ts +87 -0
  357. package/dist/utils/jsonpath.js +326 -0
  358. package/dist/utils/markdown.d.ts +113 -0
  359. package/dist/utils/markdown.js +265 -0
  360. package/dist/utils/network.d.ts +14 -0
  361. package/dist/utils/network.js +17 -0
  362. package/dist/utils/sanitize.d.ts +92 -0
  363. package/dist/utils/sanitize.js +191 -0
  364. package/dist/utils/semantic.d.ts +194 -0
  365. package/dist/utils/semantic.js +1051 -0
  366. package/dist/utils/smart-truncate.d.ts +94 -0
  367. package/dist/utils/smart-truncate.js +361 -0
  368. package/dist/utils/timeout.d.ts +153 -0
  369. package/dist/utils/timeout.js +205 -0
  370. package/dist/utils/yaml-parser.d.ts +58 -0
  371. package/dist/utils/yaml-parser.js +86 -0
  372. package/dist/validation/index.d.ts +32 -0
  373. package/dist/validation/index.js +32 -0
  374. package/dist/validation/semantic-test-generator.d.ts +50 -0
  375. package/dist/validation/semantic-test-generator.js +176 -0
  376. package/dist/validation/semantic-types.d.ts +66 -0
  377. package/dist/validation/semantic-types.js +94 -0
  378. package/dist/validation/semantic-validator.d.ts +38 -0
  379. package/dist/validation/semantic-validator.js +340 -0
  380. package/dist/verification/index.d.ts +6 -0
  381. package/dist/verification/index.js +5 -0
  382. package/dist/verification/types.d.ts +133 -0
  383. package/dist/verification/types.js +5 -0
  384. package/dist/verification/verifier.d.ts +30 -0
  385. package/dist/verification/verifier.js +309 -0
  386. package/dist/version.d.ts +19 -0
  387. package/dist/version.js +48 -0
  388. package/dist/workflow/auto-generator.d.ts +27 -0
  389. package/dist/workflow/auto-generator.js +513 -0
  390. package/dist/workflow/discovery.d.ts +40 -0
  391. package/dist/workflow/discovery.js +195 -0
  392. package/dist/workflow/executor.d.ts +82 -0
  393. package/dist/workflow/executor.js +611 -0
  394. package/dist/workflow/index.d.ts +10 -0
  395. package/dist/workflow/index.js +10 -0
  396. package/dist/workflow/loader.d.ts +24 -0
  397. package/dist/workflow/loader.js +194 -0
  398. package/dist/workflow/state-tracker.d.ts +98 -0
  399. package/dist/workflow/state-tracker.js +424 -0
  400. package/dist/workflow/types.d.ts +337 -0
  401. package/dist/workflow/types.js +5 -0
  402. package/package.json +94 -0
  403. package/schemas/bellwether-check.schema.json +651 -0
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Drift Detection Evaluation Framework
3
+ *
4
+ * Provides tools for measuring the accuracy of semantic comparison
5
+ * algorithms used in behavioral drift detection.
6
+ *
7
+ * Usage:
8
+ * import { evaluate, formatEvaluationReport } from './evaluation';
9
+ * const result = evaluate();
10
+ * console.log(formatEvaluationReport(result));
11
+ */
12
+ export * from './types.js';
13
+ export * from './golden-dataset.js';
14
+ export * from './evaluator.js';
15
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Drift Detection Evaluation Framework
3
+ *
4
+ * Provides tools for measuring the accuracy of semantic comparison
5
+ * algorithms used in behavioral drift detection.
6
+ *
7
+ * Usage:
8
+ * import { evaluate, formatEvaluationReport } from './evaluation';
9
+ * const result = evaluate();
10
+ * console.log(formatEvaluationReport(result));
11
+ */
12
+ export * from './types.js';
13
+ export * from './golden-dataset.js';
14
+ export * from './evaluator.js';
15
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1,186 @@
1
+ /**
2
+ * Types for the drift detection evaluation framework.
3
+ *
4
+ * This framework enables systematic measurement of semantic comparison
5
+ * accuracy, including precision, recall, and confidence calibration.
6
+ */
7
+ /**
8
+ * A labeled test case for evaluating semantic comparison accuracy.
9
+ */
10
+ export interface GoldenTestCase {
11
+ /** Unique identifier for this test case */
12
+ id: string;
13
+ /** Category of comparison being tested */
14
+ category: 'security' | 'limitation' | 'assertion';
15
+ /** First text to compare */
16
+ text1: string;
17
+ /** Second text to compare */
18
+ text2: string;
19
+ /** Tool name for context (affects fingerprinting) */
20
+ toolName: string;
21
+ /** Whether these should be considered semantically equivalent */
22
+ expectedMatch: boolean;
23
+ /** Expected confidence range (optional) */
24
+ expectedConfidence?: {
25
+ min: number;
26
+ max: number;
27
+ };
28
+ /** Human reasoning for why this is the expected outcome */
29
+ reasoning: string;
30
+ /** Source of this test case */
31
+ source: 'manual' | 'llm-generated' | 'production' | 'user-feedback';
32
+ /** Tags for filtering and analysis */
33
+ tags?: string[];
34
+ }
35
+ /**
36
+ * Result of evaluating a single test case.
37
+ */
38
+ export interface TestCaseResult {
39
+ /** The test case that was evaluated */
40
+ testCase: GoldenTestCase;
41
+ /** Whether the comparison returned match */
42
+ actualMatch: boolean;
43
+ /** The confidence score returned */
44
+ actualConfidence: number;
45
+ /** Whether this test passed */
46
+ passed: boolean;
47
+ /** Type of failure if not passed */
48
+ failureType?: 'false_positive' | 'false_negative' | 'confidence_out_of_range';
49
+ /** Time taken for this comparison (ms) */
50
+ durationMs: number;
51
+ /** Detailed confidence factors */
52
+ confidenceFactors?: Array<{
53
+ name: string;
54
+ weight: number;
55
+ value: number;
56
+ description: string;
57
+ }>;
58
+ }
59
+ /**
60
+ * Metrics for a specific category of comparisons.
61
+ */
62
+ export interface CategoryMetrics {
63
+ /** Category name */
64
+ category: string;
65
+ /** Number of test cases */
66
+ totalCases: number;
67
+ /** Accuracy for this category */
68
+ accuracy: number;
69
+ /** Precision for this category */
70
+ precision: number;
71
+ /** Recall for this category */
72
+ recall: number;
73
+ /** F1 score for this category */
74
+ f1Score: number;
75
+ /** Confusion matrix counts */
76
+ truePositives: number;
77
+ trueNegatives: number;
78
+ falsePositives: number;
79
+ falseNegatives: number;
80
+ }
81
+ /**
82
+ * Calibration bucket for analyzing confidence score accuracy.
83
+ */
84
+ export interface CalibrationBucket {
85
+ /** Range of predicted confidence scores */
86
+ predictedRange: {
87
+ min: number;
88
+ max: number;
89
+ };
90
+ /** Actual accuracy for predictions in this range */
91
+ actualAccuracy: number;
92
+ /** Number of samples in this bucket */
93
+ sampleCount: number;
94
+ /** Calibration error (|predicted - actual|) */
95
+ calibrationError: number;
96
+ }
97
+ /**
98
+ * Complete evaluation result for a drift detection algorithm.
99
+ */
100
+ export interface EvaluationResult {
101
+ /** Timestamp of evaluation */
102
+ timestamp: Date;
103
+ /** Algorithm version being evaluated */
104
+ algorithmVersion: string;
105
+ /** Dataset version used */
106
+ datasetVersion: string;
107
+ /** Total number of test cases */
108
+ totalCases: number;
109
+ /** Overall accuracy: (TP + TN) / Total */
110
+ accuracy: number;
111
+ /** Precision: TP / (TP + FP) - low false positive rate */
112
+ precision: number;
113
+ /** Recall: TP / (TP + FN) - catches real drift */
114
+ recall: number;
115
+ /** F1 Score: harmonic mean of precision and recall */
116
+ f1Score: number;
117
+ /** Correctly identified as matching */
118
+ truePositives: number;
119
+ /** Correctly identified as different */
120
+ trueNegatives: number;
121
+ /** Incorrectly flagged as different (noise/false alarm) */
122
+ falsePositives: number;
123
+ /** Missed real differences (dangerous) */
124
+ falseNegatives: number;
125
+ /** Average |predicted_confidence - actual_accuracy| */
126
+ calibrationError: number;
127
+ /** Mean squared error of probabilistic predictions */
128
+ brierScore: number;
129
+ /** Calibration buckets for detailed analysis */
130
+ calibrationBuckets: CalibrationBucket[];
131
+ /** Metrics per category */
132
+ categoryMetrics: CategoryMetrics[];
133
+ /** All individual test results */
134
+ testResults: TestCaseResult[];
135
+ /** Failed test cases for analysis */
136
+ failures: TestCaseResult[];
137
+ /** Total evaluation time (ms) */
138
+ totalDurationMs: number;
139
+ /** Average comparison time (ms) */
140
+ averageComparisonMs: number;
141
+ }
142
+ /**
143
+ * Summary for display/reporting.
144
+ */
145
+ export interface EvaluationSummary {
146
+ accuracy: string;
147
+ precision: string;
148
+ recall: string;
149
+ f1Score: string;
150
+ falsePositiveRate: string;
151
+ falseNegativeRate: string;
152
+ calibrationError: string;
153
+ totalCases: number;
154
+ passedCases: number;
155
+ failedCases: number;
156
+ }
157
+ /**
158
+ * Options for running evaluation.
159
+ */
160
+ export interface EvaluationOptions {
161
+ /** Filter to specific categories */
162
+ categories?: Array<'security' | 'limitation' | 'assertion'>;
163
+ /** Filter to specific tags */
164
+ tags?: string[];
165
+ /** Verbose output */
166
+ verbose?: boolean;
167
+ /** Include detailed confidence factors in results */
168
+ includeFactors?: boolean;
169
+ }
170
+ /**
171
+ * Semantic comparator interface for pluggable algorithms.
172
+ */
173
+ export interface SemanticComparator {
174
+ /** Compare two texts and return match result with confidence */
175
+ compare(text1: string, text2: string, toolName: string, category: 'security' | 'limitation' | 'assertion'): {
176
+ matches: boolean;
177
+ confidence: number;
178
+ factors?: Array<{
179
+ name: string;
180
+ weight: number;
181
+ value: number;
182
+ description: string;
183
+ }>;
184
+ };
185
+ }
186
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Types for the drift detection evaluation framework.
3
+ *
4
+ * This framework enables systematic measurement of semantic comparison
5
+ * accuracy, including precision, recall, and confidence calibration.
6
+ */
7
+ export {};
8
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1,181 @@
1
+ /**
2
+ * External Dependency Detection
3
+ *
4
+ * Detects and categorizes errors from known external services (Plaid, Stripe, AWS, etc.)
5
+ * to distinguish between:
6
+ * - Environment misconfiguration (missing credentials)
7
+ * - External API failures (service down, rate limited)
8
+ * - Actual code bugs
9
+ *
10
+ * This helps users understand whether test failures are due to their MCP server code
11
+ * or external factors beyond their control.
12
+ */
13
+ import { EXTERNAL_DEPENDENCIES } from '../constants.js';
14
+ import type { ErrorPattern } from './response-fingerprint.js';
15
+ import type { ExternalServicesConfig } from '../interview/types.js';
16
+ /** Known external service names */
17
+ export type ExternalServiceName = keyof typeof EXTERNAL_DEPENDENCIES.SERVICES;
18
+ /** Error source classification */
19
+ export type ErrorSource = keyof typeof EXTERNAL_DEPENDENCIES.ERROR_SOURCES;
20
+ /**
21
+ * Confidence level for dependency detection.
22
+ * - 'confirmed': Actual error messages from the service were observed
23
+ * - 'likely': Strong evidence from tool name/description patterns
24
+ * - 'possible': Weak evidence, only partial matches
25
+ */
26
+ export type DependencyConfidenceLevel = 'confirmed' | 'likely' | 'possible';
27
+ /** Information about a detected external dependency */
28
+ export interface ExternalDependencyInfo {
29
+ /** Name of the external service (e.g., 'plaid', 'stripe') */
30
+ serviceName: ExternalServiceName;
31
+ /** Display name of the service (e.g., 'Plaid', 'Stripe') */
32
+ displayName: string;
33
+ /** Confidence level of the detection (0-1) */
34
+ confidence: number;
35
+ /**
36
+ * Whether this dependency was confirmed by actual errors.
37
+ * - 'confirmed': Error message matched service-specific patterns
38
+ * - 'likely': Tool name/description strongly suggests this service
39
+ * - 'possible': Weak evidence, might be a false positive
40
+ */
41
+ confidenceLevel: DependencyConfidenceLevel;
42
+ /** Whether this appears to be a transient/temporary error */
43
+ isTransient: boolean;
44
+ /** Suggested remediation for this error */
45
+ remediation: string;
46
+ /** Matched patterns that led to detection */
47
+ matchedPatterns: string[];
48
+ /** Evidence breakdown for transparency */
49
+ evidence: {
50
+ /** True if error message patterns matched */
51
+ fromErrorMessage: boolean;
52
+ /** True if tool name patterns matched */
53
+ fromToolName: boolean;
54
+ /** True if tool description patterns matched */
55
+ fromDescription: boolean;
56
+ /** Number of actual errors attributed to this dependency */
57
+ actualErrorCount: number;
58
+ };
59
+ }
60
+ /** Configuration status for a known external service */
61
+ export interface ServiceStatus {
62
+ /** External service name */
63
+ service: ExternalServiceName;
64
+ /** Whether the service is fully configured */
65
+ configured: boolean;
66
+ /** Missing credential keys or env vars */
67
+ missingCredentials: string[];
68
+ /** Whether a sandbox is available */
69
+ sandboxAvailable: boolean;
70
+ /** Whether a mock response is available */
71
+ mockAvailable: boolean;
72
+ }
73
+ /** Result of analyzing an error for external dependencies */
74
+ export interface ExternalDependencyAnalysis {
75
+ /** The error source classification */
76
+ source: ErrorSource;
77
+ /** Detected external dependency info (if source is 'external_dependency') */
78
+ dependency?: ExternalDependencyInfo;
79
+ /** Whether the error appears transient */
80
+ isTransient: boolean;
81
+ /** Human-readable explanation of the classification */
82
+ explanation: string;
83
+ /** Remediation suggestion */
84
+ remediation?: string;
85
+ }
86
+ /** Summary of external dependencies across all tools */
87
+ export interface ExternalDependencySummary {
88
+ /** Services detected across all tools */
89
+ services: Map<ExternalServiceName, ExternalServiceSummary>;
90
+ /** Total number of external dependency errors */
91
+ totalExternalErrors: number;
92
+ /** Total number of environment configuration errors */
93
+ totalEnvironmentErrors: number;
94
+ /** Total number of likely code bugs */
95
+ totalCodeBugErrors: number;
96
+ /** Total number of unclassified errors */
97
+ totalUnknownErrors: number;
98
+ /** Tools affected by external dependencies */
99
+ affectedTools: Map<string, ExternalServiceName[]>;
100
+ }
101
+ /** Summary for a single external service */
102
+ export interface ExternalServiceSummary {
103
+ /** Display name of the service */
104
+ displayName: string;
105
+ /** Number of errors from this service */
106
+ errorCount: number;
107
+ /** Number of confirmed errors (from error message patterns) */
108
+ confirmedErrorCount: number;
109
+ /** Tools that use this service (confirmed from errors) */
110
+ confirmedTools: string[];
111
+ /** Tools that likely use this service (from name/description only) */
112
+ detectedTools: string[];
113
+ /** All tools associated with this service */
114
+ tools: string[];
115
+ /** Whether errors appear to be transient */
116
+ hasTransientErrors: boolean;
117
+ /** Primary remediation suggestion */
118
+ remediation: string;
119
+ /** Highest confidence level for this service */
120
+ highestConfidenceLevel: DependencyConfidenceLevel;
121
+ }
122
+ /**
123
+ * Detect if an error message indicates an external dependency.
124
+ *
125
+ * @param errorMessage - The error message to analyze
126
+ * @param toolName - Optional tool name for context
127
+ * @param toolDescription - Optional tool description for context
128
+ * @returns External dependency info if detected, null otherwise
129
+ */
130
+ export declare function detectExternalDependency(errorMessage: string, toolName?: string, toolDescription?: string): ExternalDependencyInfo | null;
131
+ /**
132
+ * Detect external service dependencies based on tool name/description alone.
133
+ */
134
+ export declare function detectExternalServiceFromTool(toolName: string, toolDescription?: string): ExternalDependencyInfo | null;
135
+ /**
136
+ * Determine whether an external service is configured.
137
+ */
138
+ export declare function getExternalServiceStatus(serviceName: ExternalServiceName, config?: ExternalServicesConfig): ServiceStatus;
139
+ /**
140
+ * Categorize the source of an error.
141
+ *
142
+ * @param errorMessage - The error message to analyze
143
+ * @param toolName - Optional tool name for context
144
+ * @param toolDescription - Optional tool description for context
145
+ * @returns Analysis of the error source
146
+ */
147
+ export declare function categorizeErrorSource(errorMessage: string, toolName?: string, toolDescription?: string): ExternalDependencyAnalysis;
148
+ /**
149
+ * Check if an error appears to be transient (temporary).
150
+ *
151
+ * @param errorMessage - The error message to check
152
+ * @returns True if the error appears transient
153
+ */
154
+ export declare function isTransientError(errorMessage: string): boolean;
155
+ /**
156
+ * Analyze multiple error patterns and generate a summary.
157
+ *
158
+ * @param errors - Array of tool names and their error patterns
159
+ * @returns Summary of external dependencies
160
+ */
161
+ export declare function analyzeExternalDependencies(errors: Array<{
162
+ toolName: string;
163
+ toolDescription?: string;
164
+ patterns: ErrorPattern[];
165
+ }>): ExternalDependencySummary;
166
+ /**
167
+ * Format external dependency summary for display.
168
+ *
169
+ * @param summary - The summary to format
170
+ * @param useColors - Whether to use ANSI colors
171
+ * @returns Formatted string
172
+ */
173
+ export declare function formatExternalDependencySummary(summary: ExternalDependencySummary, useColors?: boolean): string;
174
+ /**
175
+ * Generate markdown table for external dependencies.
176
+ *
177
+ * @param summary - The summary to format
178
+ * @returns Markdown string
179
+ */
180
+ export declare function formatExternalDependenciesMarkdown(summary: ExternalDependencySummary): string;
181
+ //# sourceMappingURL=external-dependency-detector.d.ts.map