@dotsetlabs/bellwether 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (403) hide show
  1. package/CHANGELOG.md +291 -0
  2. package/LICENSE +21 -0
  3. package/README.md +739 -0
  4. package/dist/auth/credentials.d.ts +64 -0
  5. package/dist/auth/credentials.js +218 -0
  6. package/dist/auth/index.d.ts +6 -0
  7. package/dist/auth/index.js +6 -0
  8. package/dist/auth/keychain.d.ts +64 -0
  9. package/dist/auth/keychain.js +268 -0
  10. package/dist/baseline/ab-testing.d.ts +80 -0
  11. package/dist/baseline/ab-testing.js +236 -0
  12. package/dist/baseline/ai-compatibility-scorer.d.ts +95 -0
  13. package/dist/baseline/ai-compatibility-scorer.js +606 -0
  14. package/dist/baseline/calibration.d.ts +77 -0
  15. package/dist/baseline/calibration.js +136 -0
  16. package/dist/baseline/category-matching.d.ts +85 -0
  17. package/dist/baseline/category-matching.js +289 -0
  18. package/dist/baseline/change-impact-analyzer.d.ts +98 -0
  19. package/dist/baseline/change-impact-analyzer.js +592 -0
  20. package/dist/baseline/comparator.d.ts +64 -0
  21. package/dist/baseline/comparator.js +916 -0
  22. package/dist/baseline/confidence.d.ts +55 -0
  23. package/dist/baseline/confidence.js +122 -0
  24. package/dist/baseline/converter.d.ts +61 -0
  25. package/dist/baseline/converter.js +585 -0
  26. package/dist/baseline/dependency-analyzer.d.ts +89 -0
  27. package/dist/baseline/dependency-analyzer.js +567 -0
  28. package/dist/baseline/deprecation-tracker.d.ts +133 -0
  29. package/dist/baseline/deprecation-tracker.js +322 -0
  30. package/dist/baseline/diff.d.ts +55 -0
  31. package/dist/baseline/diff.js +1584 -0
  32. package/dist/baseline/documentation-scorer.d.ts +205 -0
  33. package/dist/baseline/documentation-scorer.js +466 -0
  34. package/dist/baseline/embeddings.d.ts +118 -0
  35. package/dist/baseline/embeddings.js +251 -0
  36. package/dist/baseline/error-analyzer.d.ts +198 -0
  37. package/dist/baseline/error-analyzer.js +721 -0
  38. package/dist/baseline/evaluation/evaluator.d.ts +42 -0
  39. package/dist/baseline/evaluation/evaluator.js +323 -0
  40. package/dist/baseline/evaluation/expanded-dataset.d.ts +45 -0
  41. package/dist/baseline/evaluation/expanded-dataset.js +1164 -0
  42. package/dist/baseline/evaluation/golden-dataset.d.ts +58 -0
  43. package/dist/baseline/evaluation/golden-dataset.js +717 -0
  44. package/dist/baseline/evaluation/index.d.ts +15 -0
  45. package/dist/baseline/evaluation/index.js +15 -0
  46. package/dist/baseline/evaluation/types.d.ts +186 -0
  47. package/dist/baseline/evaluation/types.js +8 -0
  48. package/dist/baseline/external-dependency-detector.d.ts +181 -0
  49. package/dist/baseline/external-dependency-detector.js +524 -0
  50. package/dist/baseline/golden-output.d.ts +162 -0
  51. package/dist/baseline/golden-output.js +636 -0
  52. package/dist/baseline/health-scorer.d.ts +174 -0
  53. package/dist/baseline/health-scorer.js +451 -0
  54. package/dist/baseline/incremental-checker.d.ts +97 -0
  55. package/dist/baseline/incremental-checker.js +174 -0
  56. package/dist/baseline/index.d.ts +31 -0
  57. package/dist/baseline/index.js +42 -0
  58. package/dist/baseline/migration-generator.d.ts +137 -0
  59. package/dist/baseline/migration-generator.js +554 -0
  60. package/dist/baseline/migrations.d.ts +60 -0
  61. package/dist/baseline/migrations.js +197 -0
  62. package/dist/baseline/performance-tracker.d.ts +214 -0
  63. package/dist/baseline/performance-tracker.js +577 -0
  64. package/dist/baseline/pr-comment-generator.d.ts +117 -0
  65. package/dist/baseline/pr-comment-generator.js +546 -0
  66. package/dist/baseline/response-fingerprint.d.ts +127 -0
  67. package/dist/baseline/response-fingerprint.js +728 -0
  68. package/dist/baseline/response-schema-tracker.d.ts +129 -0
  69. package/dist/baseline/response-schema-tracker.js +420 -0
  70. package/dist/baseline/risk-scorer.d.ts +54 -0
  71. package/dist/baseline/risk-scorer.js +434 -0
  72. package/dist/baseline/saver.d.ts +89 -0
  73. package/dist/baseline/saver.js +554 -0
  74. package/dist/baseline/scenario-generator.d.ts +151 -0
  75. package/dist/baseline/scenario-generator.js +905 -0
  76. package/dist/baseline/schema-compare.d.ts +86 -0
  77. package/dist/baseline/schema-compare.js +557 -0
  78. package/dist/baseline/schema-evolution.d.ts +189 -0
  79. package/dist/baseline/schema-evolution.js +467 -0
  80. package/dist/baseline/semantic.d.ts +203 -0
  81. package/dist/baseline/semantic.js +908 -0
  82. package/dist/baseline/synonyms.d.ts +60 -0
  83. package/dist/baseline/synonyms.js +386 -0
  84. package/dist/baseline/telemetry.d.ts +165 -0
  85. package/dist/baseline/telemetry.js +294 -0
  86. package/dist/baseline/test-pruner.d.ts +120 -0
  87. package/dist/baseline/test-pruner.js +387 -0
  88. package/dist/baseline/types.d.ts +449 -0
  89. package/dist/baseline/types.js +5 -0
  90. package/dist/baseline/version.d.ts +138 -0
  91. package/dist/baseline/version.js +206 -0
  92. package/dist/cache/index.d.ts +5 -0
  93. package/dist/cache/index.js +5 -0
  94. package/dist/cache/response-cache.d.ts +151 -0
  95. package/dist/cache/response-cache.js +287 -0
  96. package/dist/ci/index.d.ts +60 -0
  97. package/dist/ci/index.js +342 -0
  98. package/dist/cli/commands/auth.d.ts +12 -0
  99. package/dist/cli/commands/auth.js +352 -0
  100. package/dist/cli/commands/badge.d.ts +3 -0
  101. package/dist/cli/commands/badge.js +74 -0
  102. package/dist/cli/commands/baseline-accept.d.ts +15 -0
  103. package/dist/cli/commands/baseline-accept.js +178 -0
  104. package/dist/cli/commands/baseline-migrate.d.ts +12 -0
  105. package/dist/cli/commands/baseline-migrate.js +164 -0
  106. package/dist/cli/commands/baseline.d.ts +14 -0
  107. package/dist/cli/commands/baseline.js +449 -0
  108. package/dist/cli/commands/beta.d.ts +10 -0
  109. package/dist/cli/commands/beta.js +231 -0
  110. package/dist/cli/commands/check.d.ts +11 -0
  111. package/dist/cli/commands/check.js +820 -0
  112. package/dist/cli/commands/cloud/badge.d.ts +3 -0
  113. package/dist/cli/commands/cloud/badge.js +74 -0
  114. package/dist/cli/commands/cloud/diff.d.ts +6 -0
  115. package/dist/cli/commands/cloud/diff.js +79 -0
  116. package/dist/cli/commands/cloud/history.d.ts +6 -0
  117. package/dist/cli/commands/cloud/history.js +102 -0
  118. package/dist/cli/commands/cloud/link.d.ts +9 -0
  119. package/dist/cli/commands/cloud/link.js +119 -0
  120. package/dist/cli/commands/cloud/login.d.ts +7 -0
  121. package/dist/cli/commands/cloud/login.js +499 -0
  122. package/dist/cli/commands/cloud/projects.d.ts +6 -0
  123. package/dist/cli/commands/cloud/projects.js +44 -0
  124. package/dist/cli/commands/cloud/shared.d.ts +7 -0
  125. package/dist/cli/commands/cloud/shared.js +42 -0
  126. package/dist/cli/commands/cloud/teams.d.ts +8 -0
  127. package/dist/cli/commands/cloud/teams.js +169 -0
  128. package/dist/cli/commands/cloud/upload.d.ts +8 -0
  129. package/dist/cli/commands/cloud/upload.js +181 -0
  130. package/dist/cli/commands/contract.d.ts +11 -0
  131. package/dist/cli/commands/contract.js +280 -0
  132. package/dist/cli/commands/discover.d.ts +3 -0
  133. package/dist/cli/commands/discover.js +82 -0
  134. package/dist/cli/commands/eval.d.ts +9 -0
  135. package/dist/cli/commands/eval.js +187 -0
  136. package/dist/cli/commands/explore.d.ts +11 -0
  137. package/dist/cli/commands/explore.js +437 -0
  138. package/dist/cli/commands/feedback.d.ts +9 -0
  139. package/dist/cli/commands/feedback.js +174 -0
  140. package/dist/cli/commands/golden.d.ts +12 -0
  141. package/dist/cli/commands/golden.js +407 -0
  142. package/dist/cli/commands/history.d.ts +10 -0
  143. package/dist/cli/commands/history.js +202 -0
  144. package/dist/cli/commands/init.d.ts +9 -0
  145. package/dist/cli/commands/init.js +219 -0
  146. package/dist/cli/commands/interview.d.ts +3 -0
  147. package/dist/cli/commands/interview.js +903 -0
  148. package/dist/cli/commands/link.d.ts +10 -0
  149. package/dist/cli/commands/link.js +169 -0
  150. package/dist/cli/commands/login.d.ts +7 -0
  151. package/dist/cli/commands/login.js +499 -0
  152. package/dist/cli/commands/preset.d.ts +33 -0
  153. package/dist/cli/commands/preset.js +297 -0
  154. package/dist/cli/commands/profile.d.ts +33 -0
  155. package/dist/cli/commands/profile.js +286 -0
  156. package/dist/cli/commands/registry.d.ts +11 -0
  157. package/dist/cli/commands/registry.js +146 -0
  158. package/dist/cli/commands/shared.d.ts +79 -0
  159. package/dist/cli/commands/shared.js +196 -0
  160. package/dist/cli/commands/teams.d.ts +8 -0
  161. package/dist/cli/commands/teams.js +169 -0
  162. package/dist/cli/commands/test.d.ts +9 -0
  163. package/dist/cli/commands/test.js +500 -0
  164. package/dist/cli/commands/upload.d.ts +8 -0
  165. package/dist/cli/commands/upload.js +223 -0
  166. package/dist/cli/commands/validate-config.d.ts +6 -0
  167. package/dist/cli/commands/validate-config.js +35 -0
  168. package/dist/cli/commands/verify.d.ts +11 -0
  169. package/dist/cli/commands/verify.js +283 -0
  170. package/dist/cli/commands/watch.d.ts +12 -0
  171. package/dist/cli/commands/watch.js +253 -0
  172. package/dist/cli/index.d.ts +3 -0
  173. package/dist/cli/index.js +178 -0
  174. package/dist/cli/interactive.d.ts +47 -0
  175. package/dist/cli/interactive.js +216 -0
  176. package/dist/cli/output/terminal-reporter.d.ts +19 -0
  177. package/dist/cli/output/terminal-reporter.js +104 -0
  178. package/dist/cli/output.d.ts +226 -0
  179. package/dist/cli/output.js +438 -0
  180. package/dist/cli/utils/env.d.ts +5 -0
  181. package/dist/cli/utils/env.js +14 -0
  182. package/dist/cli/utils/progress.d.ts +59 -0
  183. package/dist/cli/utils/progress.js +206 -0
  184. package/dist/cli/utils/server-context.d.ts +10 -0
  185. package/dist/cli/utils/server-context.js +36 -0
  186. package/dist/cloud/auth.d.ts +144 -0
  187. package/dist/cloud/auth.js +374 -0
  188. package/dist/cloud/client.d.ts +24 -0
  189. package/dist/cloud/client.js +65 -0
  190. package/dist/cloud/http-client.d.ts +38 -0
  191. package/dist/cloud/http-client.js +215 -0
  192. package/dist/cloud/index.d.ts +23 -0
  193. package/dist/cloud/index.js +25 -0
  194. package/dist/cloud/mock-client.d.ts +107 -0
  195. package/dist/cloud/mock-client.js +545 -0
  196. package/dist/cloud/types.d.ts +515 -0
  197. package/dist/cloud/types.js +15 -0
  198. package/dist/config/defaults.d.ts +160 -0
  199. package/dist/config/defaults.js +169 -0
  200. package/dist/config/loader.d.ts +24 -0
  201. package/dist/config/loader.js +122 -0
  202. package/dist/config/template.d.ts +42 -0
  203. package/dist/config/template.js +647 -0
  204. package/dist/config/validator.d.ts +2112 -0
  205. package/dist/config/validator.js +658 -0
  206. package/dist/constants/cloud.d.ts +107 -0
  207. package/dist/constants/cloud.js +110 -0
  208. package/dist/constants/core.d.ts +521 -0
  209. package/dist/constants/core.js +556 -0
  210. package/dist/constants/testing.d.ts +1283 -0
  211. package/dist/constants/testing.js +1568 -0
  212. package/dist/constants.d.ts +10 -0
  213. package/dist/constants.js +10 -0
  214. package/dist/contract/index.d.ts +6 -0
  215. package/dist/contract/index.js +5 -0
  216. package/dist/contract/validator.d.ts +177 -0
  217. package/dist/contract/validator.js +574 -0
  218. package/dist/cost/index.d.ts +6 -0
  219. package/dist/cost/index.js +5 -0
  220. package/dist/cost/tracker.d.ts +134 -0
  221. package/dist/cost/tracker.js +313 -0
  222. package/dist/discovery/discovery.d.ts +16 -0
  223. package/dist/discovery/discovery.js +173 -0
  224. package/dist/discovery/types.d.ts +51 -0
  225. package/dist/discovery/types.js +2 -0
  226. package/dist/docs/agents.d.ts +3 -0
  227. package/dist/docs/agents.js +995 -0
  228. package/dist/docs/contract.d.ts +51 -0
  229. package/dist/docs/contract.js +1681 -0
  230. package/dist/docs/generator.d.ts +4 -0
  231. package/dist/docs/generator.js +4 -0
  232. package/dist/docs/html-reporter.d.ts +9 -0
  233. package/dist/docs/html-reporter.js +757 -0
  234. package/dist/docs/index.d.ts +10 -0
  235. package/dist/docs/index.js +11 -0
  236. package/dist/docs/junit-reporter.d.ts +18 -0
  237. package/dist/docs/junit-reporter.js +210 -0
  238. package/dist/docs/report.d.ts +14 -0
  239. package/dist/docs/report.js +44 -0
  240. package/dist/docs/sarif-reporter.d.ts +19 -0
  241. package/dist/docs/sarif-reporter.js +335 -0
  242. package/dist/docs/shared.d.ts +35 -0
  243. package/dist/docs/shared.js +162 -0
  244. package/dist/docs/templates.d.ts +12 -0
  245. package/dist/docs/templates.js +76 -0
  246. package/dist/errors/index.d.ts +6 -0
  247. package/dist/errors/index.js +6 -0
  248. package/dist/errors/retry.d.ts +92 -0
  249. package/dist/errors/retry.js +323 -0
  250. package/dist/errors/types.d.ts +321 -0
  251. package/dist/errors/types.js +584 -0
  252. package/dist/index.d.ts +32 -0
  253. package/dist/index.js +32 -0
  254. package/dist/interview/dependency-resolver.d.ts +11 -0
  255. package/dist/interview/dependency-resolver.js +32 -0
  256. package/dist/interview/interviewer.d.ts +232 -0
  257. package/dist/interview/interviewer.js +1939 -0
  258. package/dist/interview/mock-response-generator.d.ts +7 -0
  259. package/dist/interview/mock-response-generator.js +102 -0
  260. package/dist/interview/orchestrator.d.ts +237 -0
  261. package/dist/interview/orchestrator.js +1296 -0
  262. package/dist/interview/rate-limiter.d.ts +15 -0
  263. package/dist/interview/rate-limiter.js +55 -0
  264. package/dist/interview/response-validator.d.ts +10 -0
  265. package/dist/interview/response-validator.js +132 -0
  266. package/dist/interview/schema-inferrer.d.ts +8 -0
  267. package/dist/interview/schema-inferrer.js +71 -0
  268. package/dist/interview/schema-test-generator.d.ts +71 -0
  269. package/dist/interview/schema-test-generator.js +834 -0
  270. package/dist/interview/smart-value-generator.d.ts +155 -0
  271. package/dist/interview/smart-value-generator.js +554 -0
  272. package/dist/interview/stateful-test-runner.d.ts +19 -0
  273. package/dist/interview/stateful-test-runner.js +106 -0
  274. package/dist/interview/types.d.ts +561 -0
  275. package/dist/interview/types.js +2 -0
  276. package/dist/llm/anthropic.d.ts +41 -0
  277. package/dist/llm/anthropic.js +355 -0
  278. package/dist/llm/client.d.ts +123 -0
  279. package/dist/llm/client.js +42 -0
  280. package/dist/llm/factory.d.ts +38 -0
  281. package/dist/llm/factory.js +145 -0
  282. package/dist/llm/fallback.d.ts +140 -0
  283. package/dist/llm/fallback.js +379 -0
  284. package/dist/llm/index.d.ts +18 -0
  285. package/dist/llm/index.js +15 -0
  286. package/dist/llm/ollama.d.ts +37 -0
  287. package/dist/llm/ollama.js +330 -0
  288. package/dist/llm/openai.d.ts +25 -0
  289. package/dist/llm/openai.js +320 -0
  290. package/dist/llm/token-budget.d.ts +161 -0
  291. package/dist/llm/token-budget.js +395 -0
  292. package/dist/logging/logger.d.ts +70 -0
  293. package/dist/logging/logger.js +130 -0
  294. package/dist/metrics/collector.d.ts +106 -0
  295. package/dist/metrics/collector.js +547 -0
  296. package/dist/metrics/index.d.ts +7 -0
  297. package/dist/metrics/index.js +7 -0
  298. package/dist/metrics/prometheus.d.ts +20 -0
  299. package/dist/metrics/prometheus.js +241 -0
  300. package/dist/metrics/types.d.ts +209 -0
  301. package/dist/metrics/types.js +5 -0
  302. package/dist/persona/builtins.d.ts +54 -0
  303. package/dist/persona/builtins.js +219 -0
  304. package/dist/persona/index.d.ts +8 -0
  305. package/dist/persona/index.js +8 -0
  306. package/dist/persona/loader.d.ts +30 -0
  307. package/dist/persona/loader.js +190 -0
  308. package/dist/persona/types.d.ts +144 -0
  309. package/dist/persona/types.js +5 -0
  310. package/dist/persona/validation.d.ts +94 -0
  311. package/dist/persona/validation.js +332 -0
  312. package/dist/prompts/index.d.ts +5 -0
  313. package/dist/prompts/index.js +5 -0
  314. package/dist/prompts/templates.d.ts +180 -0
  315. package/dist/prompts/templates.js +431 -0
  316. package/dist/registry/client.d.ts +49 -0
  317. package/dist/registry/client.js +191 -0
  318. package/dist/registry/index.d.ts +7 -0
  319. package/dist/registry/index.js +6 -0
  320. package/dist/registry/types.d.ts +140 -0
  321. package/dist/registry/types.js +6 -0
  322. package/dist/scenarios/evaluator.d.ts +43 -0
  323. package/dist/scenarios/evaluator.js +206 -0
  324. package/dist/scenarios/index.d.ts +10 -0
  325. package/dist/scenarios/index.js +9 -0
  326. package/dist/scenarios/loader.d.ts +20 -0
  327. package/dist/scenarios/loader.js +285 -0
  328. package/dist/scenarios/types.d.ts +153 -0
  329. package/dist/scenarios/types.js +8 -0
  330. package/dist/security/index.d.ts +17 -0
  331. package/dist/security/index.js +18 -0
  332. package/dist/security/payloads.d.ts +61 -0
  333. package/dist/security/payloads.js +268 -0
  334. package/dist/security/security-tester.d.ts +42 -0
  335. package/dist/security/security-tester.js +582 -0
  336. package/dist/security/types.d.ts +166 -0
  337. package/dist/security/types.js +8 -0
  338. package/dist/transport/base-transport.d.ts +59 -0
  339. package/dist/transport/base-transport.js +38 -0
  340. package/dist/transport/http-transport.d.ts +67 -0
  341. package/dist/transport/http-transport.js +238 -0
  342. package/dist/transport/mcp-client.d.ts +141 -0
  343. package/dist/transport/mcp-client.js +496 -0
  344. package/dist/transport/sse-transport.d.ts +88 -0
  345. package/dist/transport/sse-transport.js +316 -0
  346. package/dist/transport/stdio-transport.d.ts +43 -0
  347. package/dist/transport/stdio-transport.js +238 -0
  348. package/dist/transport/types.d.ts +125 -0
  349. package/dist/transport/types.js +16 -0
  350. package/dist/utils/concurrency.d.ts +123 -0
  351. package/dist/utils/concurrency.js +213 -0
  352. package/dist/utils/formatters.d.ts +16 -0
  353. package/dist/utils/formatters.js +37 -0
  354. package/dist/utils/index.d.ts +8 -0
  355. package/dist/utils/index.js +8 -0
  356. package/dist/utils/jsonpath.d.ts +87 -0
  357. package/dist/utils/jsonpath.js +326 -0
  358. package/dist/utils/markdown.d.ts +113 -0
  359. package/dist/utils/markdown.js +265 -0
  360. package/dist/utils/network.d.ts +14 -0
  361. package/dist/utils/network.js +17 -0
  362. package/dist/utils/sanitize.d.ts +92 -0
  363. package/dist/utils/sanitize.js +191 -0
  364. package/dist/utils/semantic.d.ts +194 -0
  365. package/dist/utils/semantic.js +1051 -0
  366. package/dist/utils/smart-truncate.d.ts +94 -0
  367. package/dist/utils/smart-truncate.js +361 -0
  368. package/dist/utils/timeout.d.ts +153 -0
  369. package/dist/utils/timeout.js +205 -0
  370. package/dist/utils/yaml-parser.d.ts +58 -0
  371. package/dist/utils/yaml-parser.js +86 -0
  372. package/dist/validation/index.d.ts +32 -0
  373. package/dist/validation/index.js +32 -0
  374. package/dist/validation/semantic-test-generator.d.ts +50 -0
  375. package/dist/validation/semantic-test-generator.js +176 -0
  376. package/dist/validation/semantic-types.d.ts +66 -0
  377. package/dist/validation/semantic-types.js +94 -0
  378. package/dist/validation/semantic-validator.d.ts +38 -0
  379. package/dist/validation/semantic-validator.js +340 -0
  380. package/dist/verification/index.d.ts +6 -0
  381. package/dist/verification/index.js +5 -0
  382. package/dist/verification/types.d.ts +133 -0
  383. package/dist/verification/types.js +5 -0
  384. package/dist/verification/verifier.d.ts +30 -0
  385. package/dist/verification/verifier.js +309 -0
  386. package/dist/version.d.ts +19 -0
  387. package/dist/version.js +48 -0
  388. package/dist/workflow/auto-generator.d.ts +27 -0
  389. package/dist/workflow/auto-generator.js +513 -0
  390. package/dist/workflow/discovery.d.ts +40 -0
  391. package/dist/workflow/discovery.js +195 -0
  392. package/dist/workflow/executor.d.ts +82 -0
  393. package/dist/workflow/executor.js +611 -0
  394. package/dist/workflow/index.d.ts +10 -0
  395. package/dist/workflow/index.js +10 -0
  396. package/dist/workflow/loader.d.ts +24 -0
  397. package/dist/workflow/loader.js +194 -0
  398. package/dist/workflow/state-tracker.d.ts +98 -0
  399. package/dist/workflow/state-tracker.js +424 -0
  400. package/dist/workflow/types.d.ts +337 -0
  401. package/dist/workflow/types.js +5 -0
  402. package/package.json +94 -0
  403. package/schemas/bellwether-check.schema.json +651 -0
@@ -0,0 +1,606 @@
1
+ /**
2
+ * AI Agent Compatibility Scoring.
3
+ *
4
+ * Evaluates how well an MCP server is designed for AI agent consumption.
5
+ * Scores tools based on description clarity, parameter naming, error quality,
6
+ * example completeness, workflow documentation, and response predictability.
7
+ */
8
+ import { AI_COMPATIBILITY } from '../constants.js';
9
+ /**
10
+ * Calculate AI compatibility score for a set of tools.
11
+ */
12
+ export function calculateAICompatibilityScore(inputs) {
13
+ if (inputs.length === 0) {
14
+ return createEmptyScore();
15
+ }
16
+ // Calculate individual component scores
17
+ const descriptionClarity = scoreDescriptionClarity(inputs);
18
+ const parameterNaming = scoreParameterNaming(inputs);
19
+ const errorMessageQuality = scoreErrorMessageQuality(inputs);
20
+ const exampleCompleteness = scoreExampleCompleteness(inputs);
21
+ const workflowDocumentation = scoreWorkflowDocumentation(inputs);
22
+ const responsePredictability = scoreResponsePredictability(inputs);
23
+ // Calculate weighted overall score
24
+ const overall = Math.round(descriptionClarity.weightedScore +
25
+ parameterNaming.weightedScore +
26
+ errorMessageQuality.weightedScore +
27
+ exampleCompleteness.weightedScore +
28
+ workflowDocumentation.weightedScore +
29
+ responsePredictability.weightedScore);
30
+ // Determine grade
31
+ const grade = calculateGrade(overall);
32
+ // Calculate per-tool scores
33
+ const toolScores = inputs.map(input => calculateToolScore(input));
34
+ // Generate recommendations
35
+ const recommendations = generateRecommendations({
36
+ descriptionClarity,
37
+ parameterNaming,
38
+ errorMessageQuality,
39
+ exampleCompleteness,
40
+ workflowDocumentation,
41
+ responsePredictability,
42
+ }, inputs, toolScores);
43
+ return {
44
+ overall,
45
+ grade,
46
+ breakdown: {
47
+ descriptionClarity,
48
+ parameterNaming,
49
+ errorMessageQuality,
50
+ exampleCompleteness,
51
+ workflowDocumentation,
52
+ responsePredictability,
53
+ },
54
+ recommendations,
55
+ toolScores,
56
+ };
57
+ }
58
+ /**
59
+ * Score description clarity across all tools.
60
+ * Checks for: minimum length, action verbs, purpose explanation, input/output mentions.
61
+ */
62
+ function scoreDescriptionClarity(inputs) {
63
+ const weight = AI_COMPATIBILITY.WEIGHTS.descriptionClarity;
64
+ const notes = [];
65
+ let totalScore = 0;
66
+ for (const { tool } of inputs) {
67
+ let toolScore = 0;
68
+ const description = tool.description || '';
69
+ // Check minimum length
70
+ if (description.length >= AI_COMPATIBILITY.DESCRIPTION.GOOD_LENGTH) {
71
+ toolScore += AI_COMPATIBILITY.DESCRIPTION.POINTS.GOOD_LENGTH;
72
+ }
73
+ else if (description.length >= AI_COMPATIBILITY.DESCRIPTION.MIN_LENGTH) {
74
+ toolScore += AI_COMPATIBILITY.DESCRIPTION.POINTS.MIN_LENGTH;
75
+ }
76
+ // Check for action verb at start
77
+ if (AI_COMPATIBILITY.DESCRIPTION.ACTION_VERB_PATTERN.test(description)) {
78
+ toolScore += AI_COMPATIBILITY.DESCRIPTION.POINTS.ACTION_VERB;
79
+ }
80
+ // Check for purpose explanation
81
+ if (AI_COMPATIBILITY.DESCRIPTION.PURPOSE_PATTERN.test(description)) {
82
+ toolScore += AI_COMPATIBILITY.DESCRIPTION.POINTS.PURPOSE;
83
+ }
84
+ // Check for input/output mentions
85
+ if (AI_COMPATIBILITY.DESCRIPTION.IO_PATTERN.test(description)) {
86
+ toolScore += AI_COMPATIBILITY.DESCRIPTION.POINTS.IO_MENTION;
87
+ }
88
+ totalScore += Math.min(toolScore, 100);
89
+ }
90
+ const score = Math.round(totalScore / inputs.length);
91
+ // Add notes based on common issues
92
+ const shortDescriptions = inputs.filter(i => (i.tool.description || '').length < AI_COMPATIBILITY.DESCRIPTION.MIN_LENGTH);
93
+ if (shortDescriptions.length > 0) {
94
+ notes.push(`${shortDescriptions.length} tool(s) have short descriptions (<${AI_COMPATIBILITY.DESCRIPTION.MIN_LENGTH} chars)`);
95
+ }
96
+ const missingActionVerbs = inputs.filter(i => !AI_COMPATIBILITY.DESCRIPTION.ACTION_VERB_PATTERN.test(i.tool.description || ''));
97
+ if (missingActionVerbs.length > inputs.length / 2) {
98
+ notes.push('Many tools lack action verbs in descriptions');
99
+ }
100
+ if (notes.length === 0 && score >= 80) {
101
+ notes.push('Good description clarity across tools');
102
+ }
103
+ return {
104
+ score,
105
+ weight,
106
+ weightedScore: score * weight,
107
+ notes,
108
+ };
109
+ }
110
+ /**
111
+ * Score parameter naming quality.
112
+ * Checks for: descriptive names, consistent casing, common conventions.
113
+ */
114
+ function scoreParameterNaming(inputs) {
115
+ const weight = AI_COMPATIBILITY.WEIGHTS.parameterNaming;
116
+ const notes = [];
117
+ let totalParams = 0;
118
+ let goodParams = 0;
119
+ const badNames = [];
120
+ for (const { tool } of inputs) {
121
+ const schema = tool.inputSchema;
122
+ if (!schema?.properties)
123
+ continue;
124
+ for (const paramName of Object.keys(schema.properties)) {
125
+ totalParams++;
126
+ // Check for generic/bad names
127
+ if (AI_COMPATIBILITY.PARAMETER.BAD_NAMES.includes(paramName.toLowerCase())) {
128
+ badNames.push(`${tool.name}.${paramName}`);
129
+ continue;
130
+ }
131
+ // Check for minimum length
132
+ if (paramName.length < AI_COMPATIBILITY.PARAMETER.MIN_NAME_LENGTH) {
133
+ continue;
134
+ }
135
+ // Check for consistent casing (snake_case or camelCase)
136
+ const isSnakeCase = /^[a-z][a-z0-9]*(_[a-z0-9]+)*$/.test(paramName);
137
+ const isCamelCase = /^[a-z][a-zA-Z0-9]*$/.test(paramName);
138
+ if (!isSnakeCase && !isCamelCase) {
139
+ continue;
140
+ }
141
+ goodParams++;
142
+ }
143
+ }
144
+ const score = totalParams > 0 ? Math.round((goodParams / totalParams) * 100) : 100;
145
+ if (badNames.length > 0) {
146
+ const displayNames = badNames.slice(0, 3).join(', ');
147
+ const more = badNames.length > 3 ? ` and ${badNames.length - 3} more` : '';
148
+ notes.push(`Generic names found: ${displayNames}${more}`);
149
+ }
150
+ if (score >= 90) {
151
+ notes.push('Excellent parameter naming conventions');
152
+ }
153
+ else if (score < 60) {
154
+ notes.push('Many parameters have non-descriptive names');
155
+ }
156
+ return {
157
+ score,
158
+ weight,
159
+ weightedScore: score * weight,
160
+ notes,
161
+ };
162
+ }
163
+ /**
164
+ * Score error message quality.
165
+ * Checks for: actionable messages, remediation hints, consistent format.
166
+ */
167
+ function scoreErrorMessageQuality(inputs) {
168
+ const weight = AI_COMPATIBILITY.WEIGHTS.errorMessageQuality;
169
+ const notes = [];
170
+ let totalErrors = 0;
171
+ let goodErrors = 0;
172
+ const poorErrors = [];
173
+ for (const { errorPatterns } of inputs) {
174
+ if (!errorPatterns)
175
+ continue;
176
+ for (const pattern of errorPatterns) {
177
+ totalErrors++;
178
+ const message = pattern.example || '';
179
+ let quality = 0;
180
+ // Check for minimum length
181
+ if (message.length >= AI_COMPATIBILITY.ERROR.MIN_MESSAGE_LENGTH) {
182
+ quality++;
183
+ }
184
+ // Check for actionable content (contains suggestion/fix)
185
+ if (AI_COMPATIBILITY.ERROR.ACTIONABLE_PATTERN.test(message)) {
186
+ quality++;
187
+ }
188
+ // Check for remediation hints
189
+ if (AI_COMPATIBILITY.ERROR.REMEDIATION_PATTERN.test(message)) {
190
+ quality++;
191
+ }
192
+ // Good error messages have at least 2 quality indicators
193
+ if (quality >= 2) {
194
+ goodErrors++;
195
+ }
196
+ else if (quality === 0 && message.length > 0) {
197
+ poorErrors.push(message.slice(0, 50));
198
+ }
199
+ }
200
+ }
201
+ // If no errors were observed, give a neutral score
202
+ const score = totalErrors > 0
203
+ ? Math.round((goodErrors / totalErrors) * 100)
204
+ : AI_COMPATIBILITY.ERROR.DEFAULT_SCORE;
205
+ if (totalErrors === 0) {
206
+ notes.push('No error patterns observed (run with more test cases)');
207
+ }
208
+ else if (poorErrors.length > 0) {
209
+ notes.push(`${poorErrors.length} error message(s) lack actionable guidance`);
210
+ }
211
+ if (score >= 80 && totalErrors > 0) {
212
+ notes.push('Error messages provide good guidance');
213
+ }
214
+ return {
215
+ score,
216
+ weight,
217
+ weightedScore: score * weight,
218
+ notes,
219
+ };
220
+ }
221
+ /**
222
+ * Score example completeness.
223
+ * Checks for: non-truncated examples, variety of examples, example coverage.
224
+ */
225
+ function scoreExampleCompleteness(inputs) {
226
+ const weight = AI_COMPATIBILITY.WEIGHTS.exampleCompleteness;
227
+ const notes = [];
228
+ let toolsWithExamples = 0;
229
+ let truncatedExamples = 0;
230
+ for (const { fingerprint } of inputs) {
231
+ // Check if tool has response data (indicates examples exist)
232
+ if (fingerprint?.responseFingerprint) {
233
+ toolsWithExamples++;
234
+ // Check for truncation indicators
235
+ const raw = JSON.stringify(fingerprint.responseFingerprint);
236
+ if (raw.includes('...') || raw.includes('truncated')) {
237
+ truncatedExamples++;
238
+ }
239
+ }
240
+ }
241
+ const coverage = inputs.length > 0 ? toolsWithExamples / inputs.length : 0;
242
+ const truncationPenalty = toolsWithExamples > 0
243
+ ? truncatedExamples / toolsWithExamples
244
+ : 0;
245
+ // Score based on coverage and truncation
246
+ const score = Math.round((coverage * AI_COMPATIBILITY.EXAMPLE.COVERAGE_WEIGHT +
247
+ (1 - truncationPenalty) * AI_COMPATIBILITY.EXAMPLE.QUALITY_WEIGHT) * 100);
248
+ if (truncatedExamples > 0) {
249
+ notes.push(`${truncatedExamples} tool(s) have truncated examples`);
250
+ }
251
+ if (coverage < 0.5) {
252
+ notes.push('Less than half of tools have captured examples');
253
+ }
254
+ else if (coverage === 1 && truncatedExamples === 0) {
255
+ notes.push('Full example coverage with no truncation');
256
+ }
257
+ return {
258
+ score,
259
+ weight,
260
+ weightedScore: score * weight,
261
+ notes,
262
+ };
263
+ }
264
+ /**
265
+ * Score workflow documentation quality.
266
+ * Checks for: sequence descriptions, dependency hints, multi-step guidance.
267
+ */
268
+ function scoreWorkflowDocumentation(inputs) {
269
+ const weight = AI_COMPATIBILITY.WEIGHTS.workflowDocumentation;
270
+ const notes = [];
271
+ let toolsWithSequenceHints = 0;
272
+ let toolsWithDependencyHints = 0;
273
+ for (const { tool } of inputs) {
274
+ const description = (tool.description || '').toLowerCase();
275
+ // Check for sequence hints
276
+ if (AI_COMPATIBILITY.WORKFLOW.SEQUENCE_PATTERN.test(description)) {
277
+ toolsWithSequenceHints++;
278
+ }
279
+ // Check for dependency hints
280
+ if (AI_COMPATIBILITY.WORKFLOW.DEPENDENCY_PATTERN.test(description)) {
281
+ toolsWithDependencyHints++;
282
+ }
283
+ }
284
+ // Calculate score based on presence of workflow hints
285
+ const sequenceRatio = inputs.length > 0 ? toolsWithSequenceHints / inputs.length : 0;
286
+ const dependencyRatio = inputs.length > 0 ? toolsWithDependencyHints / inputs.length : 0;
287
+ // Workflow documentation is good if at least some tools have hints
288
+ // But we don't penalize heavily if tools are independent
289
+ const score = Math.round(Math.min(100, 50 + sequenceRatio * 25 + dependencyRatio * 25));
290
+ if (toolsWithSequenceHints > 0) {
291
+ notes.push(`${toolsWithSequenceHints} tool(s) describe execution sequences`);
292
+ }
293
+ if (toolsWithDependencyHints > 0) {
294
+ notes.push(`${toolsWithDependencyHints} tool(s) mention dependencies`);
295
+ }
296
+ if (score < 60 && inputs.length > 3) {
297
+ notes.push('Consider adding workflow guidance to descriptions');
298
+ }
299
+ return {
300
+ score,
301
+ weight,
302
+ weightedScore: score * weight,
303
+ notes,
304
+ };
305
+ }
306
+ /**
307
+ * Score response predictability.
308
+ * Checks for: schema stability, consistent structure, type consistency.
309
+ */
310
+ function scoreResponsePredictability(inputs) {
311
+ const weight = AI_COMPATIBILITY.WEIGHTS.responsePredictability;
312
+ const notes = [];
313
+ let stableTools = 0;
314
+ const unstableTools = [];
315
+ for (const { tool, schemaEvolution } of inputs) {
316
+ if (!schemaEvolution) {
317
+ // No evolution data - assume stable
318
+ stableTools++;
319
+ continue;
320
+ }
321
+ // Check stability based on schema evolution data
322
+ if (schemaEvolution.isStable) {
323
+ stableTools++;
324
+ }
325
+ else {
326
+ unstableTools.push(tool.name);
327
+ }
328
+ }
329
+ const score = inputs.length > 0
330
+ ? Math.round((stableTools / inputs.length) * 100)
331
+ : AI_COMPATIBILITY.RESPONSE.DEFAULT_SCORE;
332
+ if (unstableTools.length > 0) {
333
+ const displayTools = unstableTools.slice(0, 3).join(', ');
334
+ const more = unstableTools.length > 3 ? ` and ${unstableTools.length - 3} more` : '';
335
+ notes.push(`Unstable responses: ${displayTools}${more}`);
336
+ }
337
+ if (score >= 90) {
338
+ notes.push('Highly predictable response structures');
339
+ }
340
+ return {
341
+ score,
342
+ weight,
343
+ weightedScore: score * weight,
344
+ notes,
345
+ };
346
+ }
347
+ /**
348
+ * Calculate score for a single tool.
349
+ */
350
+ function calculateToolScore(input) {
351
+ const issues = [];
352
+ let score = 100;
353
+ const { tool, errorPatterns, schemaEvolution } = input;
354
+ const description = tool.description || '';
355
+ // Description issues
356
+ if (description.length < AI_COMPATIBILITY.DESCRIPTION.MIN_LENGTH) {
357
+ score -= 15;
358
+ issues.push('Short or missing description');
359
+ }
360
+ else if (!AI_COMPATIBILITY.DESCRIPTION.ACTION_VERB_PATTERN.test(description)) {
361
+ score -= 5;
362
+ issues.push('Description lacks action verb');
363
+ }
364
+ // Parameter issues
365
+ const schema = tool.inputSchema;
366
+ if (schema?.properties) {
367
+ for (const paramName of Object.keys(schema.properties)) {
368
+ if (AI_COMPATIBILITY.PARAMETER.BAD_NAMES.includes(paramName.toLowerCase())) {
369
+ score -= 10;
370
+ issues.push(`Generic parameter name: ${paramName}`);
371
+ break; // Only penalize once per tool
372
+ }
373
+ }
374
+ }
375
+ // Error quality issues
376
+ if (errorPatterns && errorPatterns.length > 0) {
377
+ const poorErrors = errorPatterns.filter(e => {
378
+ const msg = e.example || '';
379
+ return msg.length < AI_COMPATIBILITY.ERROR.MIN_MESSAGE_LENGTH ||
380
+ (!AI_COMPATIBILITY.ERROR.ACTIONABLE_PATTERN.test(msg) &&
381
+ !AI_COMPATIBILITY.ERROR.REMEDIATION_PATTERN.test(msg));
382
+ });
383
+ if (poorErrors.length > errorPatterns.length / 2) {
384
+ score -= 10;
385
+ issues.push('Error messages lack guidance');
386
+ }
387
+ }
388
+ // Response stability issues
389
+ if (schemaEvolution && !schemaEvolution.isStable) {
390
+ score -= 15;
391
+ issues.push('Response structure is unstable');
392
+ }
393
+ return {
394
+ toolName: tool.name,
395
+ score: Math.max(0, score),
396
+ issues,
397
+ };
398
+ }
399
+ /**
400
+ * Generate actionable recommendations based on scores.
401
+ */
402
+ function generateRecommendations(breakdown, inputs, toolScores) {
403
+ const recommendations = [];
404
+ let priority = 1;
405
+ // Recommend based on lowest-scoring components
406
+ const components = Object.entries(breakdown);
407
+ const sortedComponents = components.sort((a, b) => a[1].score - b[1].score);
408
+ for (const [category, component] of sortedComponents) {
409
+ if (component.score >= AI_COMPATIBILITY.RECOMMENDATION_THRESHOLD)
410
+ continue;
411
+ const affectedTools = toolScores
412
+ .filter(t => t.issues.some(i => isIssueRelatedToCategory(i, category)))
413
+ .map(t => t.toolName);
414
+ const recommendation = createRecommendation(category, component, affectedTools, priority++, inputs);
415
+ if (recommendation) {
416
+ recommendations.push(recommendation);
417
+ }
418
+ // Limit recommendations
419
+ if (recommendations.length >= AI_COMPATIBILITY.MAX_RECOMMENDATIONS) {
420
+ break;
421
+ }
422
+ }
423
+ return recommendations;
424
+ }
425
+ /**
426
+ * Check if an issue is related to a scoring category.
427
+ */
428
+ function isIssueRelatedToCategory(issue, category) {
429
+ const categoryKeywords = {
430
+ descriptionClarity: ['description', 'action verb'],
431
+ parameterNaming: ['parameter', 'name', 'generic'],
432
+ errorMessageQuality: ['error', 'message', 'guidance'],
433
+ exampleCompleteness: ['example', 'truncated'],
434
+ workflowDocumentation: ['workflow', 'sequence', 'dependency'],
435
+ responsePredictability: ['response', 'unstable', 'structure'],
436
+ };
437
+ const keywords = categoryKeywords[category] || [];
438
+ return keywords.some(kw => issue.toLowerCase().includes(kw));
439
+ }
440
+ /**
441
+ * Create a specific recommendation for a category.
442
+ */
443
+ function createRecommendation(category, component, affectedTools, priority, inputs) {
444
+ const potentialImprovement = Math.round((100 - component.score) * component.weight);
445
+ switch (category) {
446
+ case 'descriptionClarity': {
447
+ const shortDescTools = inputs
448
+ .filter(i => (i.tool.description || '').length < AI_COMPATIBILITY.DESCRIPTION.MIN_LENGTH)
449
+ .map(i => i.tool.name);
450
+ return {
451
+ priority,
452
+ category,
453
+ title: 'Improve tool descriptions',
454
+ description: `Add clear, action-oriented descriptions (${AI_COMPATIBILITY.DESCRIPTION.MIN_LENGTH}+ chars) that explain what each tool does and when to use it.`,
455
+ affectedTools: shortDescTools.length > 0 ? shortDescTools : affectedTools,
456
+ potentialImprovement,
457
+ };
458
+ }
459
+ case 'parameterNaming':
460
+ return {
461
+ priority,
462
+ category,
463
+ title: 'Use descriptive parameter names',
464
+ description: 'Replace generic names (data, value, input) with specific, semantic names (transactionData, accountId, searchQuery).',
465
+ affectedTools,
466
+ potentialImprovement,
467
+ };
468
+ case 'errorMessageQuality':
469
+ return {
470
+ priority,
471
+ category,
472
+ title: 'Add remediation hints to errors',
473
+ description: 'Include suggestions for fixing errors (e.g., "Invalid date format. Expected: YYYY-MM-DD").',
474
+ affectedTools,
475
+ potentialImprovement,
476
+ };
477
+ case 'exampleCompleteness':
478
+ return {
479
+ priority,
480
+ category,
481
+ title: 'Expand examples',
482
+ description: 'Run with --full-examples to capture complete output samples for AI agent reference.',
483
+ affectedTools,
484
+ potentialImprovement,
485
+ };
486
+ case 'workflowDocumentation':
487
+ return {
488
+ priority,
489
+ category,
490
+ title: 'Document tool workflows',
491
+ description: 'Add sequence/dependency hints to descriptions (e.g., "Call after create_user" or "Requires valid access_token").',
492
+ affectedTools,
493
+ potentialImprovement,
494
+ };
495
+ case 'responsePredictability':
496
+ return {
497
+ priority,
498
+ category,
499
+ title: 'Stabilize response structures',
500
+ description: 'Ensure tools return consistent field names and types across calls.',
501
+ affectedTools,
502
+ potentialImprovement,
503
+ };
504
+ default:
505
+ return null;
506
+ }
507
+ }
508
+ /**
509
+ * Calculate letter grade from score.
510
+ */
511
+ function calculateGrade(score) {
512
+ if (score >= AI_COMPATIBILITY.GRADE_THRESHOLDS.A)
513
+ return 'A';
514
+ if (score >= AI_COMPATIBILITY.GRADE_THRESHOLDS.B)
515
+ return 'B';
516
+ if (score >= AI_COMPATIBILITY.GRADE_THRESHOLDS.C)
517
+ return 'C';
518
+ if (score >= AI_COMPATIBILITY.GRADE_THRESHOLDS.D)
519
+ return 'D';
520
+ return 'F';
521
+ }
522
+ /**
523
+ * Create an empty score for servers with no tools.
524
+ */
525
+ function createEmptyScore() {
526
+ const emptyComponent = () => ({
527
+ score: 0,
528
+ weight: 0,
529
+ weightedScore: 0,
530
+ notes: ['No tools available'],
531
+ });
532
+ return {
533
+ overall: 0,
534
+ grade: 'F',
535
+ breakdown: {
536
+ descriptionClarity: emptyComponent(),
537
+ parameterNaming: emptyComponent(),
538
+ errorMessageQuality: emptyComponent(),
539
+ exampleCompleteness: emptyComponent(),
540
+ workflowDocumentation: emptyComponent(),
541
+ responsePredictability: emptyComponent(),
542
+ },
543
+ recommendations: [],
544
+ toolScores: [],
545
+ };
546
+ }
547
+ /**
548
+ * Generate markdown documentation for AI compatibility score.
549
+ */
550
+ export function generateAICompatibilityMarkdown(score) {
551
+ const lines = [];
552
+ lines.push('## AI Agent Compatibility');
553
+ lines.push('');
554
+ lines.push(`**Overall Score: ${score.overall}/100 (Grade ${score.grade})**`);
555
+ lines.push('');
556
+ // Breakdown table
557
+ lines.push('| Factor | Score | Weight | Notes |');
558
+ lines.push('|--------|-------|--------|-------|');
559
+ const components = [
560
+ { name: 'Description Clarity', key: 'descriptionClarity' },
561
+ { name: 'Parameter Naming', key: 'parameterNaming' },
562
+ { name: 'Error Messages', key: 'errorMessageQuality' },
563
+ { name: 'Example Completeness', key: 'exampleCompleteness' },
564
+ { name: 'Workflow Docs', key: 'workflowDocumentation' },
565
+ { name: 'Response Predictability', key: 'responsePredictability' },
566
+ ];
567
+ for (const { name, key } of components) {
568
+ const component = score.breakdown[key];
569
+ const weightPercent = Math.round(component.weight * 100);
570
+ const notes = component.notes.join('; ') || '-';
571
+ lines.push(`| ${name} | ${component.score}/100 | ${weightPercent}% | ${notes} |`);
572
+ }
573
+ lines.push('');
574
+ // Recommendations
575
+ if (score.recommendations.length > 0) {
576
+ lines.push('### Improvement Recommendations');
577
+ lines.push('');
578
+ for (const rec of score.recommendations) {
579
+ lines.push(`${rec.priority}. **${rec.title}** - ${rec.description}`);
580
+ if (rec.affectedTools && rec.affectedTools.length > 0) {
581
+ const tools = rec.affectedTools.slice(0, 5).map(t => `\`${t}\``).join(', ');
582
+ const more = rec.affectedTools.length > 5 ? ` (+${rec.affectedTools.length - 5} more)` : '';
583
+ lines.push(` - Affects: ${tools}${more}`);
584
+ }
585
+ }
586
+ lines.push('');
587
+ }
588
+ // Low-scoring tools
589
+ const lowScoreTools = score.toolScores.filter(t => t.score < 70);
590
+ if (lowScoreTools.length > 0) {
591
+ lines.push('### Tools Needing Attention');
592
+ lines.push('');
593
+ lines.push('| Tool | Score | Issues |');
594
+ lines.push('|------|-------|--------|');
595
+ for (const tool of lowScoreTools.slice(0, 10)) {
596
+ const issues = tool.issues.slice(0, 2).join('; ') || '-';
597
+ lines.push(`| \`${tool.toolName}\` | ${tool.score}/100 | ${issues} |`);
598
+ }
599
+ if (lowScoreTools.length > 10) {
600
+ lines.push(`| ... | ... | ${lowScoreTools.length - 10} more tools below 70 |`);
601
+ }
602
+ lines.push('');
603
+ }
604
+ return lines.join('\n');
605
+ }
606
+ //# sourceMappingURL=ai-compatibility-scorer.js.map
@@ -0,0 +1,77 @@
1
+ /**
2
+ * Confidence Calibration for Drift Detection
3
+ *
4
+ * Calibrates raw confidence scores to match actual accuracy.
5
+ * A calibrated confidence of 80% means the algorithm is correct ~80% of the time
6
+ * when it reports that confidence level.
7
+ *
8
+ * Calibration is based on evaluation against the golden dataset.
9
+ */
10
+ /**
11
+ * Calibration bucket defining expected accuracy for a confidence range.
12
+ */
13
+ export interface CalibrationBucket {
14
+ /** Minimum confidence in this bucket (inclusive) */
15
+ min: number;
16
+ /** Maximum confidence in this bucket (exclusive) */
17
+ max: number;
18
+ /** Calibrated accuracy for this bucket */
19
+ calibratedAccuracy: number;
20
+ /** Number of samples used to calculate this bucket */
21
+ sampleCount: number;
22
+ }
23
+ /**
24
+ * Default calibration model based on golden dataset evaluation.
25
+ *
26
+ * These values should be updated as the algorithm improves.
27
+ * Current baseline: v1.0.1 (50 test cases)
28
+ */
29
+ export declare const DEFAULT_CALIBRATION_MODEL: CalibrationBucket[];
30
+ /**
31
+ * Calibrate a raw confidence score to reflect actual accuracy.
32
+ *
33
+ * @param rawScore - Raw confidence score (0-100)
34
+ * @param model - Calibration model to use (defaults to DEFAULT_CALIBRATION_MODEL)
35
+ * @returns Calibrated confidence score
36
+ */
37
+ export declare function calibrateConfidence(rawScore: number, model?: CalibrationBucket[]): number;
38
+ /**
39
+ * Format confidence score with calibration information.
40
+ *
41
+ * @param rawScore - Raw confidence score
42
+ * @param showRaw - Whether to show raw score alongside calibrated
43
+ * @returns Formatted string
44
+ */
45
+ export declare function formatCalibratedConfidence(rawScore: number, showRaw?: boolean): string;
46
+ /**
47
+ * Get confidence label based on calibrated score.
48
+ */
49
+ export declare function getCalibratedConfidenceLabel(rawScore: number): 'high' | 'medium' | 'low' | 'very-low';
50
+ /**
51
+ * Check if a calibrated confidence meets a threshold.
52
+ *
53
+ * @param rawScore - Raw confidence score
54
+ * @param threshold - Minimum required calibrated confidence
55
+ * @returns True if calibrated confidence meets threshold
56
+ */
57
+ export declare function meetsCalibratedThreshold(rawScore: number, threshold: number): boolean;
58
+ /**
59
+ * Update calibration model based on evaluation results.
60
+ * This recalculates accuracy for each bucket from test results.
61
+ *
62
+ * @param results - Array of {predictedConfidence, wasCorrect} pairs
63
+ * @returns Updated calibration model
64
+ */
65
+ export declare function updateCalibrationModel(results: Array<{
66
+ predictedConfidence: number;
67
+ wasCorrect: boolean;
68
+ }>): CalibrationBucket[];
69
+ /**
70
+ * Calculate calibration error (ECE - Expected Calibration Error).
71
+ * Lower is better. 0 = perfectly calibrated.
72
+ *
73
+ * @param model - Calibration model
74
+ * @returns ECE as a percentage (0-100)
75
+ */
76
+ export declare function calculateCalibrationError(model: CalibrationBucket[]): number;
77
+ //# sourceMappingURL=calibration.d.ts.map