@dotsetlabs/bellwether 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (403) hide show
  1. package/CHANGELOG.md +291 -0
  2. package/LICENSE +21 -0
  3. package/README.md +739 -0
  4. package/dist/auth/credentials.d.ts +64 -0
  5. package/dist/auth/credentials.js +218 -0
  6. package/dist/auth/index.d.ts +6 -0
  7. package/dist/auth/index.js +6 -0
  8. package/dist/auth/keychain.d.ts +64 -0
  9. package/dist/auth/keychain.js +268 -0
  10. package/dist/baseline/ab-testing.d.ts +80 -0
  11. package/dist/baseline/ab-testing.js +236 -0
  12. package/dist/baseline/ai-compatibility-scorer.d.ts +95 -0
  13. package/dist/baseline/ai-compatibility-scorer.js +606 -0
  14. package/dist/baseline/calibration.d.ts +77 -0
  15. package/dist/baseline/calibration.js +136 -0
  16. package/dist/baseline/category-matching.d.ts +85 -0
  17. package/dist/baseline/category-matching.js +289 -0
  18. package/dist/baseline/change-impact-analyzer.d.ts +98 -0
  19. package/dist/baseline/change-impact-analyzer.js +592 -0
  20. package/dist/baseline/comparator.d.ts +64 -0
  21. package/dist/baseline/comparator.js +916 -0
  22. package/dist/baseline/confidence.d.ts +55 -0
  23. package/dist/baseline/confidence.js +122 -0
  24. package/dist/baseline/converter.d.ts +61 -0
  25. package/dist/baseline/converter.js +585 -0
  26. package/dist/baseline/dependency-analyzer.d.ts +89 -0
  27. package/dist/baseline/dependency-analyzer.js +567 -0
  28. package/dist/baseline/deprecation-tracker.d.ts +133 -0
  29. package/dist/baseline/deprecation-tracker.js +322 -0
  30. package/dist/baseline/diff.d.ts +55 -0
  31. package/dist/baseline/diff.js +1584 -0
  32. package/dist/baseline/documentation-scorer.d.ts +205 -0
  33. package/dist/baseline/documentation-scorer.js +466 -0
  34. package/dist/baseline/embeddings.d.ts +118 -0
  35. package/dist/baseline/embeddings.js +251 -0
  36. package/dist/baseline/error-analyzer.d.ts +198 -0
  37. package/dist/baseline/error-analyzer.js +721 -0
  38. package/dist/baseline/evaluation/evaluator.d.ts +42 -0
  39. package/dist/baseline/evaluation/evaluator.js +323 -0
  40. package/dist/baseline/evaluation/expanded-dataset.d.ts +45 -0
  41. package/dist/baseline/evaluation/expanded-dataset.js +1164 -0
  42. package/dist/baseline/evaluation/golden-dataset.d.ts +58 -0
  43. package/dist/baseline/evaluation/golden-dataset.js +717 -0
  44. package/dist/baseline/evaluation/index.d.ts +15 -0
  45. package/dist/baseline/evaluation/index.js +15 -0
  46. package/dist/baseline/evaluation/types.d.ts +186 -0
  47. package/dist/baseline/evaluation/types.js +8 -0
  48. package/dist/baseline/external-dependency-detector.d.ts +181 -0
  49. package/dist/baseline/external-dependency-detector.js +524 -0
  50. package/dist/baseline/golden-output.d.ts +162 -0
  51. package/dist/baseline/golden-output.js +636 -0
  52. package/dist/baseline/health-scorer.d.ts +174 -0
  53. package/dist/baseline/health-scorer.js +451 -0
  54. package/dist/baseline/incremental-checker.d.ts +97 -0
  55. package/dist/baseline/incremental-checker.js +174 -0
  56. package/dist/baseline/index.d.ts +31 -0
  57. package/dist/baseline/index.js +42 -0
  58. package/dist/baseline/migration-generator.d.ts +137 -0
  59. package/dist/baseline/migration-generator.js +554 -0
  60. package/dist/baseline/migrations.d.ts +60 -0
  61. package/dist/baseline/migrations.js +197 -0
  62. package/dist/baseline/performance-tracker.d.ts +214 -0
  63. package/dist/baseline/performance-tracker.js +577 -0
  64. package/dist/baseline/pr-comment-generator.d.ts +117 -0
  65. package/dist/baseline/pr-comment-generator.js +546 -0
  66. package/dist/baseline/response-fingerprint.d.ts +127 -0
  67. package/dist/baseline/response-fingerprint.js +728 -0
  68. package/dist/baseline/response-schema-tracker.d.ts +129 -0
  69. package/dist/baseline/response-schema-tracker.js +420 -0
  70. package/dist/baseline/risk-scorer.d.ts +54 -0
  71. package/dist/baseline/risk-scorer.js +434 -0
  72. package/dist/baseline/saver.d.ts +89 -0
  73. package/dist/baseline/saver.js +554 -0
  74. package/dist/baseline/scenario-generator.d.ts +151 -0
  75. package/dist/baseline/scenario-generator.js +905 -0
  76. package/dist/baseline/schema-compare.d.ts +86 -0
  77. package/dist/baseline/schema-compare.js +557 -0
  78. package/dist/baseline/schema-evolution.d.ts +189 -0
  79. package/dist/baseline/schema-evolution.js +467 -0
  80. package/dist/baseline/semantic.d.ts +203 -0
  81. package/dist/baseline/semantic.js +908 -0
  82. package/dist/baseline/synonyms.d.ts +60 -0
  83. package/dist/baseline/synonyms.js +386 -0
  84. package/dist/baseline/telemetry.d.ts +165 -0
  85. package/dist/baseline/telemetry.js +294 -0
  86. package/dist/baseline/test-pruner.d.ts +120 -0
  87. package/dist/baseline/test-pruner.js +387 -0
  88. package/dist/baseline/types.d.ts +449 -0
  89. package/dist/baseline/types.js +5 -0
  90. package/dist/baseline/version.d.ts +138 -0
  91. package/dist/baseline/version.js +206 -0
  92. package/dist/cache/index.d.ts +5 -0
  93. package/dist/cache/index.js +5 -0
  94. package/dist/cache/response-cache.d.ts +151 -0
  95. package/dist/cache/response-cache.js +287 -0
  96. package/dist/ci/index.d.ts +60 -0
  97. package/dist/ci/index.js +342 -0
  98. package/dist/cli/commands/auth.d.ts +12 -0
  99. package/dist/cli/commands/auth.js +352 -0
  100. package/dist/cli/commands/badge.d.ts +3 -0
  101. package/dist/cli/commands/badge.js +74 -0
  102. package/dist/cli/commands/baseline-accept.d.ts +15 -0
  103. package/dist/cli/commands/baseline-accept.js +178 -0
  104. package/dist/cli/commands/baseline-migrate.d.ts +12 -0
  105. package/dist/cli/commands/baseline-migrate.js +164 -0
  106. package/dist/cli/commands/baseline.d.ts +14 -0
  107. package/dist/cli/commands/baseline.js +449 -0
  108. package/dist/cli/commands/beta.d.ts +10 -0
  109. package/dist/cli/commands/beta.js +231 -0
  110. package/dist/cli/commands/check.d.ts +11 -0
  111. package/dist/cli/commands/check.js +820 -0
  112. package/dist/cli/commands/cloud/badge.d.ts +3 -0
  113. package/dist/cli/commands/cloud/badge.js +74 -0
  114. package/dist/cli/commands/cloud/diff.d.ts +6 -0
  115. package/dist/cli/commands/cloud/diff.js +79 -0
  116. package/dist/cli/commands/cloud/history.d.ts +6 -0
  117. package/dist/cli/commands/cloud/history.js +102 -0
  118. package/dist/cli/commands/cloud/link.d.ts +9 -0
  119. package/dist/cli/commands/cloud/link.js +119 -0
  120. package/dist/cli/commands/cloud/login.d.ts +7 -0
  121. package/dist/cli/commands/cloud/login.js +499 -0
  122. package/dist/cli/commands/cloud/projects.d.ts +6 -0
  123. package/dist/cli/commands/cloud/projects.js +44 -0
  124. package/dist/cli/commands/cloud/shared.d.ts +7 -0
  125. package/dist/cli/commands/cloud/shared.js +42 -0
  126. package/dist/cli/commands/cloud/teams.d.ts +8 -0
  127. package/dist/cli/commands/cloud/teams.js +169 -0
  128. package/dist/cli/commands/cloud/upload.d.ts +8 -0
  129. package/dist/cli/commands/cloud/upload.js +181 -0
  130. package/dist/cli/commands/contract.d.ts +11 -0
  131. package/dist/cli/commands/contract.js +280 -0
  132. package/dist/cli/commands/discover.d.ts +3 -0
  133. package/dist/cli/commands/discover.js +82 -0
  134. package/dist/cli/commands/eval.d.ts +9 -0
  135. package/dist/cli/commands/eval.js +187 -0
  136. package/dist/cli/commands/explore.d.ts +11 -0
  137. package/dist/cli/commands/explore.js +437 -0
  138. package/dist/cli/commands/feedback.d.ts +9 -0
  139. package/dist/cli/commands/feedback.js +174 -0
  140. package/dist/cli/commands/golden.d.ts +12 -0
  141. package/dist/cli/commands/golden.js +407 -0
  142. package/dist/cli/commands/history.d.ts +10 -0
  143. package/dist/cli/commands/history.js +202 -0
  144. package/dist/cli/commands/init.d.ts +9 -0
  145. package/dist/cli/commands/init.js +219 -0
  146. package/dist/cli/commands/interview.d.ts +3 -0
  147. package/dist/cli/commands/interview.js +903 -0
  148. package/dist/cli/commands/link.d.ts +10 -0
  149. package/dist/cli/commands/link.js +169 -0
  150. package/dist/cli/commands/login.d.ts +7 -0
  151. package/dist/cli/commands/login.js +499 -0
  152. package/dist/cli/commands/preset.d.ts +33 -0
  153. package/dist/cli/commands/preset.js +297 -0
  154. package/dist/cli/commands/profile.d.ts +33 -0
  155. package/dist/cli/commands/profile.js +286 -0
  156. package/dist/cli/commands/registry.d.ts +11 -0
  157. package/dist/cli/commands/registry.js +146 -0
  158. package/dist/cli/commands/shared.d.ts +79 -0
  159. package/dist/cli/commands/shared.js +196 -0
  160. package/dist/cli/commands/teams.d.ts +8 -0
  161. package/dist/cli/commands/teams.js +169 -0
  162. package/dist/cli/commands/test.d.ts +9 -0
  163. package/dist/cli/commands/test.js +500 -0
  164. package/dist/cli/commands/upload.d.ts +8 -0
  165. package/dist/cli/commands/upload.js +223 -0
  166. package/dist/cli/commands/validate-config.d.ts +6 -0
  167. package/dist/cli/commands/validate-config.js +35 -0
  168. package/dist/cli/commands/verify.d.ts +11 -0
  169. package/dist/cli/commands/verify.js +283 -0
  170. package/dist/cli/commands/watch.d.ts +12 -0
  171. package/dist/cli/commands/watch.js +253 -0
  172. package/dist/cli/index.d.ts +3 -0
  173. package/dist/cli/index.js +178 -0
  174. package/dist/cli/interactive.d.ts +47 -0
  175. package/dist/cli/interactive.js +216 -0
  176. package/dist/cli/output/terminal-reporter.d.ts +19 -0
  177. package/dist/cli/output/terminal-reporter.js +104 -0
  178. package/dist/cli/output.d.ts +226 -0
  179. package/dist/cli/output.js +438 -0
  180. package/dist/cli/utils/env.d.ts +5 -0
  181. package/dist/cli/utils/env.js +14 -0
  182. package/dist/cli/utils/progress.d.ts +59 -0
  183. package/dist/cli/utils/progress.js +206 -0
  184. package/dist/cli/utils/server-context.d.ts +10 -0
  185. package/dist/cli/utils/server-context.js +36 -0
  186. package/dist/cloud/auth.d.ts +144 -0
  187. package/dist/cloud/auth.js +374 -0
  188. package/dist/cloud/client.d.ts +24 -0
  189. package/dist/cloud/client.js +65 -0
  190. package/dist/cloud/http-client.d.ts +38 -0
  191. package/dist/cloud/http-client.js +215 -0
  192. package/dist/cloud/index.d.ts +23 -0
  193. package/dist/cloud/index.js +25 -0
  194. package/dist/cloud/mock-client.d.ts +107 -0
  195. package/dist/cloud/mock-client.js +545 -0
  196. package/dist/cloud/types.d.ts +515 -0
  197. package/dist/cloud/types.js +15 -0
  198. package/dist/config/defaults.d.ts +160 -0
  199. package/dist/config/defaults.js +169 -0
  200. package/dist/config/loader.d.ts +24 -0
  201. package/dist/config/loader.js +122 -0
  202. package/dist/config/template.d.ts +42 -0
  203. package/dist/config/template.js +647 -0
  204. package/dist/config/validator.d.ts +2112 -0
  205. package/dist/config/validator.js +658 -0
  206. package/dist/constants/cloud.d.ts +107 -0
  207. package/dist/constants/cloud.js +110 -0
  208. package/dist/constants/core.d.ts +521 -0
  209. package/dist/constants/core.js +556 -0
  210. package/dist/constants/testing.d.ts +1283 -0
  211. package/dist/constants/testing.js +1568 -0
  212. package/dist/constants.d.ts +10 -0
  213. package/dist/constants.js +10 -0
  214. package/dist/contract/index.d.ts +6 -0
  215. package/dist/contract/index.js +5 -0
  216. package/dist/contract/validator.d.ts +177 -0
  217. package/dist/contract/validator.js +574 -0
  218. package/dist/cost/index.d.ts +6 -0
  219. package/dist/cost/index.js +5 -0
  220. package/dist/cost/tracker.d.ts +134 -0
  221. package/dist/cost/tracker.js +313 -0
  222. package/dist/discovery/discovery.d.ts +16 -0
  223. package/dist/discovery/discovery.js +173 -0
  224. package/dist/discovery/types.d.ts +51 -0
  225. package/dist/discovery/types.js +2 -0
  226. package/dist/docs/agents.d.ts +3 -0
  227. package/dist/docs/agents.js +995 -0
  228. package/dist/docs/contract.d.ts +51 -0
  229. package/dist/docs/contract.js +1681 -0
  230. package/dist/docs/generator.d.ts +4 -0
  231. package/dist/docs/generator.js +4 -0
  232. package/dist/docs/html-reporter.d.ts +9 -0
  233. package/dist/docs/html-reporter.js +757 -0
  234. package/dist/docs/index.d.ts +10 -0
  235. package/dist/docs/index.js +11 -0
  236. package/dist/docs/junit-reporter.d.ts +18 -0
  237. package/dist/docs/junit-reporter.js +210 -0
  238. package/dist/docs/report.d.ts +14 -0
  239. package/dist/docs/report.js +44 -0
  240. package/dist/docs/sarif-reporter.d.ts +19 -0
  241. package/dist/docs/sarif-reporter.js +335 -0
  242. package/dist/docs/shared.d.ts +35 -0
  243. package/dist/docs/shared.js +162 -0
  244. package/dist/docs/templates.d.ts +12 -0
  245. package/dist/docs/templates.js +76 -0
  246. package/dist/errors/index.d.ts +6 -0
  247. package/dist/errors/index.js +6 -0
  248. package/dist/errors/retry.d.ts +92 -0
  249. package/dist/errors/retry.js +323 -0
  250. package/dist/errors/types.d.ts +321 -0
  251. package/dist/errors/types.js +584 -0
  252. package/dist/index.d.ts +32 -0
  253. package/dist/index.js +32 -0
  254. package/dist/interview/dependency-resolver.d.ts +11 -0
  255. package/dist/interview/dependency-resolver.js +32 -0
  256. package/dist/interview/interviewer.d.ts +232 -0
  257. package/dist/interview/interviewer.js +1939 -0
  258. package/dist/interview/mock-response-generator.d.ts +7 -0
  259. package/dist/interview/mock-response-generator.js +102 -0
  260. package/dist/interview/orchestrator.d.ts +237 -0
  261. package/dist/interview/orchestrator.js +1296 -0
  262. package/dist/interview/rate-limiter.d.ts +15 -0
  263. package/dist/interview/rate-limiter.js +55 -0
  264. package/dist/interview/response-validator.d.ts +10 -0
  265. package/dist/interview/response-validator.js +132 -0
  266. package/dist/interview/schema-inferrer.d.ts +8 -0
  267. package/dist/interview/schema-inferrer.js +71 -0
  268. package/dist/interview/schema-test-generator.d.ts +71 -0
  269. package/dist/interview/schema-test-generator.js +834 -0
  270. package/dist/interview/smart-value-generator.d.ts +155 -0
  271. package/dist/interview/smart-value-generator.js +554 -0
  272. package/dist/interview/stateful-test-runner.d.ts +19 -0
  273. package/dist/interview/stateful-test-runner.js +106 -0
  274. package/dist/interview/types.d.ts +561 -0
  275. package/dist/interview/types.js +2 -0
  276. package/dist/llm/anthropic.d.ts +41 -0
  277. package/dist/llm/anthropic.js +355 -0
  278. package/dist/llm/client.d.ts +123 -0
  279. package/dist/llm/client.js +42 -0
  280. package/dist/llm/factory.d.ts +38 -0
  281. package/dist/llm/factory.js +145 -0
  282. package/dist/llm/fallback.d.ts +140 -0
  283. package/dist/llm/fallback.js +379 -0
  284. package/dist/llm/index.d.ts +18 -0
  285. package/dist/llm/index.js +15 -0
  286. package/dist/llm/ollama.d.ts +37 -0
  287. package/dist/llm/ollama.js +330 -0
  288. package/dist/llm/openai.d.ts +25 -0
  289. package/dist/llm/openai.js +320 -0
  290. package/dist/llm/token-budget.d.ts +161 -0
  291. package/dist/llm/token-budget.js +395 -0
  292. package/dist/logging/logger.d.ts +70 -0
  293. package/dist/logging/logger.js +130 -0
  294. package/dist/metrics/collector.d.ts +106 -0
  295. package/dist/metrics/collector.js +547 -0
  296. package/dist/metrics/index.d.ts +7 -0
  297. package/dist/metrics/index.js +7 -0
  298. package/dist/metrics/prometheus.d.ts +20 -0
  299. package/dist/metrics/prometheus.js +241 -0
  300. package/dist/metrics/types.d.ts +209 -0
  301. package/dist/metrics/types.js +5 -0
  302. package/dist/persona/builtins.d.ts +54 -0
  303. package/dist/persona/builtins.js +219 -0
  304. package/dist/persona/index.d.ts +8 -0
  305. package/dist/persona/index.js +8 -0
  306. package/dist/persona/loader.d.ts +30 -0
  307. package/dist/persona/loader.js +190 -0
  308. package/dist/persona/types.d.ts +144 -0
  309. package/dist/persona/types.js +5 -0
  310. package/dist/persona/validation.d.ts +94 -0
  311. package/dist/persona/validation.js +332 -0
  312. package/dist/prompts/index.d.ts +5 -0
  313. package/dist/prompts/index.js +5 -0
  314. package/dist/prompts/templates.d.ts +180 -0
  315. package/dist/prompts/templates.js +431 -0
  316. package/dist/registry/client.d.ts +49 -0
  317. package/dist/registry/client.js +191 -0
  318. package/dist/registry/index.d.ts +7 -0
  319. package/dist/registry/index.js +6 -0
  320. package/dist/registry/types.d.ts +140 -0
  321. package/dist/registry/types.js +6 -0
  322. package/dist/scenarios/evaluator.d.ts +43 -0
  323. package/dist/scenarios/evaluator.js +206 -0
  324. package/dist/scenarios/index.d.ts +10 -0
  325. package/dist/scenarios/index.js +9 -0
  326. package/dist/scenarios/loader.d.ts +20 -0
  327. package/dist/scenarios/loader.js +285 -0
  328. package/dist/scenarios/types.d.ts +153 -0
  329. package/dist/scenarios/types.js +8 -0
  330. package/dist/security/index.d.ts +17 -0
  331. package/dist/security/index.js +18 -0
  332. package/dist/security/payloads.d.ts +61 -0
  333. package/dist/security/payloads.js +268 -0
  334. package/dist/security/security-tester.d.ts +42 -0
  335. package/dist/security/security-tester.js +582 -0
  336. package/dist/security/types.d.ts +166 -0
  337. package/dist/security/types.js +8 -0
  338. package/dist/transport/base-transport.d.ts +59 -0
  339. package/dist/transport/base-transport.js +38 -0
  340. package/dist/transport/http-transport.d.ts +67 -0
  341. package/dist/transport/http-transport.js +238 -0
  342. package/dist/transport/mcp-client.d.ts +141 -0
  343. package/dist/transport/mcp-client.js +496 -0
  344. package/dist/transport/sse-transport.d.ts +88 -0
  345. package/dist/transport/sse-transport.js +316 -0
  346. package/dist/transport/stdio-transport.d.ts +43 -0
  347. package/dist/transport/stdio-transport.js +238 -0
  348. package/dist/transport/types.d.ts +125 -0
  349. package/dist/transport/types.js +16 -0
  350. package/dist/utils/concurrency.d.ts +123 -0
  351. package/dist/utils/concurrency.js +213 -0
  352. package/dist/utils/formatters.d.ts +16 -0
  353. package/dist/utils/formatters.js +37 -0
  354. package/dist/utils/index.d.ts +8 -0
  355. package/dist/utils/index.js +8 -0
  356. package/dist/utils/jsonpath.d.ts +87 -0
  357. package/dist/utils/jsonpath.js +326 -0
  358. package/dist/utils/markdown.d.ts +113 -0
  359. package/dist/utils/markdown.js +265 -0
  360. package/dist/utils/network.d.ts +14 -0
  361. package/dist/utils/network.js +17 -0
  362. package/dist/utils/sanitize.d.ts +92 -0
  363. package/dist/utils/sanitize.js +191 -0
  364. package/dist/utils/semantic.d.ts +194 -0
  365. package/dist/utils/semantic.js +1051 -0
  366. package/dist/utils/smart-truncate.d.ts +94 -0
  367. package/dist/utils/smart-truncate.js +361 -0
  368. package/dist/utils/timeout.d.ts +153 -0
  369. package/dist/utils/timeout.js +205 -0
  370. package/dist/utils/yaml-parser.d.ts +58 -0
  371. package/dist/utils/yaml-parser.js +86 -0
  372. package/dist/validation/index.d.ts +32 -0
  373. package/dist/validation/index.js +32 -0
  374. package/dist/validation/semantic-test-generator.d.ts +50 -0
  375. package/dist/validation/semantic-test-generator.js +176 -0
  376. package/dist/validation/semantic-types.d.ts +66 -0
  377. package/dist/validation/semantic-types.js +94 -0
  378. package/dist/validation/semantic-validator.d.ts +38 -0
  379. package/dist/validation/semantic-validator.js +340 -0
  380. package/dist/verification/index.d.ts +6 -0
  381. package/dist/verification/index.js +5 -0
  382. package/dist/verification/types.d.ts +133 -0
  383. package/dist/verification/types.js +5 -0
  384. package/dist/verification/verifier.d.ts +30 -0
  385. package/dist/verification/verifier.js +309 -0
  386. package/dist/version.d.ts +19 -0
  387. package/dist/version.js +48 -0
  388. package/dist/workflow/auto-generator.d.ts +27 -0
  389. package/dist/workflow/auto-generator.js +513 -0
  390. package/dist/workflow/discovery.d.ts +40 -0
  391. package/dist/workflow/discovery.js +195 -0
  392. package/dist/workflow/executor.d.ts +82 -0
  393. package/dist/workflow/executor.js +611 -0
  394. package/dist/workflow/index.d.ts +10 -0
  395. package/dist/workflow/index.js +10 -0
  396. package/dist/workflow/loader.d.ts +24 -0
  397. package/dist/workflow/loader.js +194 -0
  398. package/dist/workflow/state-tracker.d.ts +98 -0
  399. package/dist/workflow/state-tracker.js +424 -0
  400. package/dist/workflow/types.d.ts +337 -0
  401. package/dist/workflow/types.js +5 -0
  402. package/package.json +94 -0
  403. package/schemas/bellwether-check.schema.json +651 -0
@@ -0,0 +1,1681 @@
1
+ import { analyzeDependencies, calculateDependencyStats, generateDependencyMarkdown, } from '../baseline/dependency-analyzer.js';
2
+ import { getSchemaStabilityGrade } from '../baseline/response-schema-tracker.js';
3
+ import { getGradeIndicator } from '../baseline/documentation-scorer.js';
4
+ import { formatDateISO, formatDuration, escapeTableCell, mermaidLabel, validateJsonForCodeBlock, } from '../utils/index.js';
5
+ import { smartTruncate, getExampleLength } from '../utils/smart-truncate.js';
6
+ import { calculatePerformanceMetrics, extractParameters, looksLikeError } from './shared.js';
7
+ import { analyzeExternalDependencies, formatExternalDependenciesMarkdown, } from '../baseline/external-dependency-detector.js';
8
+ import { SEMANTIC_VALIDATION, SCHEMA_EVOLUTION, ERROR_ANALYSIS, PERFORMANCE_CONFIDENCE, DOCUMENTATION_SCORING, EXAMPLE_OUTPUT, EXTERNAL_DEPENDENCIES, RELIABILITY_DISPLAY, CONFIDENCE_INDICATORS, DISPLAY_LIMITS, } from '../constants.js';
9
+ /**
10
+ * Generate CONTRACT.md documentation from check results.
11
+ * Enhanced with examples, error patterns, and performance data.
12
+ * Used by: bellwether check
13
+ */
14
+ export function generateContractMd(result, options) {
15
+ const lines = [];
16
+ const { discovery, toolProfiles, metadata } = result;
17
+ const securityFingerprints = options?.securityFingerprints;
18
+ const semanticInferences = options?.semanticInferences;
19
+ const schemaEvolution = options?.schemaEvolution;
20
+ const errorAnalysisSummaries = options?.errorAnalysisSummaries;
21
+ const documentationScore = options?.documentationScore;
22
+ const workflowResults = options?.workflowResults;
23
+ const countValidationAsSuccess = options?.countValidationAsSuccess ?? true;
24
+ const separateValidationMetrics = options?.separateValidationMetrics ?? true;
25
+ // Example output configuration
26
+ const fullExamples = options?.fullExamples ?? false;
27
+ const exampleLength = getExampleLength(fullExamples, options?.exampleLength);
28
+ const maxExamplesPerTool = options?.maxExamplesPerTool ?? EXAMPLE_OUTPUT.DEFAULT_EXAMPLES_PER_TOOL;
29
+ // targetConfidence is available for future documentation enhancements
30
+ const _targetConfidence = options?.targetConfidence ?? 'low';
31
+ void _targetConfidence; // Suppress unused variable warning
32
+ // Header
33
+ lines.push(`# ${discovery.serverInfo.name}`);
34
+ lines.push('');
35
+ lines.push(`> Generated by [Bellwether](https://github.com/dotsetlabs/bellwether) on ${formatDateISO(metadata.startTime)}`);
36
+ lines.push('');
37
+ // Overview
38
+ lines.push('## Overview');
39
+ lines.push('');
40
+ lines.push(`**Server Version:** ${discovery.serverInfo.version}`);
41
+ lines.push(`**Protocol Version:** ${discovery.protocolVersion}`);
42
+ lines.push('');
43
+ const performanceMetrics = calculatePerformanceMetrics(toolProfiles);
44
+ const performanceByTool = new Map(performanceMetrics.map(metric => [metric.toolName, metric]));
45
+ // Capabilities summary
46
+ lines.push('## Capabilities');
47
+ lines.push('');
48
+ if (discovery.capabilities.tools) {
49
+ lines.push(`- **Tools:** ${discovery.tools.length} available`);
50
+ }
51
+ if (discovery.capabilities.prompts) {
52
+ lines.push(`- **Prompts:** ${discovery.prompts.length} available`);
53
+ }
54
+ if (discovery.capabilities.resources) {
55
+ lines.push(`- **Resources:** ${(discovery.resources ?? []).length} available`);
56
+ }
57
+ if (discovery.capabilities.logging) {
58
+ lines.push('- **Logging:** Supported');
59
+ }
60
+ lines.push('');
61
+ // Quick Reference section with performance data
62
+ if (toolProfiles.length > 0) {
63
+ lines.push('## Quick Reference');
64
+ lines.push('');
65
+ lines.push('| Tool | Parameters | Reliability | P50 | Confidence | Description |');
66
+ lines.push('|------|------------|-------------|-----|------------|-------------|');
67
+ for (const tool of discovery.tools) {
68
+ const params = extractParameters(tool.inputSchema);
69
+ const desc = tool.description?.substring(0, 50) || 'No description';
70
+ const descDisplay = tool.description && tool.description.length > 50 ? desc + '...' : desc;
71
+ const profile = toolProfiles.find(p => p.name === tool.name);
72
+ const perf = performanceByTool.get(tool.name);
73
+ const successRate = calculateToolSuccessRate(profile, {
74
+ countValidationAsSuccess,
75
+ separateValidationMetrics,
76
+ });
77
+ const p50Display = perf ? `${perf.p50Ms}ms` : '-';
78
+ const confidenceDisplay = formatConfidenceIndicator(perf?.confidence?.confidenceLevel);
79
+ lines.push(`| \`${escapeTableCell(tool.name)}\` | ${escapeTableCell(params)} | ${successRate} | ${p50Display} | ${confidenceDisplay} | ${escapeTableCell(descDisplay)} |`);
80
+ }
81
+ lines.push('');
82
+ }
83
+ const legendSection = generateMetricsLegendSection();
84
+ if (legendSection.length > 0) {
85
+ lines.push(...legendSection);
86
+ }
87
+ const validationSection = generateValidationTestingSection(toolProfiles);
88
+ if (validationSection.length > 0) {
89
+ lines.push(...validationSection);
90
+ }
91
+ const issuesSection = generateIssuesDetectedSection(toolProfiles);
92
+ if (issuesSection.length > 0) {
93
+ lines.push(...issuesSection);
94
+ }
95
+ // Performance Baseline section
96
+ const perfSection = generateContractPerformanceSection(toolProfiles, performanceMetrics);
97
+ if (perfSection.length > 0) {
98
+ lines.push(...perfSection);
99
+ }
100
+ // Security Baseline section (if security testing was performed)
101
+ if (securityFingerprints && securityFingerprints.size > 0) {
102
+ const securitySection = generateContractSecuritySection(securityFingerprints);
103
+ if (securitySection.length > 0) {
104
+ lines.push(...securitySection);
105
+ }
106
+ }
107
+ // Workflow Testing section (if workflow testing was performed)
108
+ if (workflowResults && workflowResults.length > 0) {
109
+ const workflowSection = generateWorkflowTestingSection(workflowResults);
110
+ if (workflowSection.length > 0) {
111
+ lines.push(...workflowSection);
112
+ }
113
+ }
114
+ // Stateful Testing section (if enabled)
115
+ const statefulSection = generateStatefulTestingSection(toolProfiles, result.metadata.statefulTesting);
116
+ if (statefulSection.length > 0) {
117
+ lines.push(...statefulSection);
118
+ }
119
+ // Dependency Analysis section (auto-generated from tools)
120
+ const includeDependencyAnalysis = options?.includeDependencyAnalysis ?? true;
121
+ if (includeDependencyAnalysis && discovery.tools.length >= 2) {
122
+ const depGraph = analyzeDependencies(discovery.tools);
123
+ if (depGraph.edges.length > 0) {
124
+ const depStats = calculateDependencyStats(depGraph);
125
+ const depSection = generateDependencyMarkdown(depGraph, depStats);
126
+ lines.push(depSection);
127
+ }
128
+ }
129
+ // Semantic Types section (if semantic inferences were discovered)
130
+ if (semanticInferences && semanticInferences.size > 0) {
131
+ const semanticSection = generateSemanticTypesSection(semanticInferences);
132
+ if (semanticSection.length > 0) {
133
+ lines.push(...semanticSection);
134
+ }
135
+ }
136
+ // Schema Stability section (if schema evolution data available)
137
+ if (schemaEvolution && schemaEvolution.size > 0) {
138
+ const schemaStabilitySection = generateSchemaStabilitySection(schemaEvolution);
139
+ if (schemaStabilitySection.length > 0) {
140
+ lines.push(...schemaStabilitySection);
141
+ }
142
+ }
143
+ // Error Analysis section (if error analysis summaries available)
144
+ if (errorAnalysisSummaries && errorAnalysisSummaries.size > 0) {
145
+ const errorAnalysisSection = generateErrorAnalysisSection(errorAnalysisSummaries);
146
+ if (errorAnalysisSection.length > 0) {
147
+ lines.push(...errorAnalysisSection);
148
+ }
149
+ }
150
+ // External Dependencies section - analyze errors for external service patterns
151
+ const externalDepAnalysis = analyzeToolsForExternalDependencies(toolProfiles, discovery.tools);
152
+ if (externalDepAnalysis && externalDepAnalysis.services.size > 0) {
153
+ const externalDepSection = formatExternalDependenciesMarkdown(externalDepAnalysis);
154
+ if (externalDepSection.length > 0) {
155
+ lines.push(externalDepSection);
156
+ lines.push('');
157
+ }
158
+ }
159
+ // External service configuration section (from config handling)
160
+ const externalConfigSection = generateExternalServiceConfigSection(result.metadata.externalServices);
161
+ if (externalConfigSection.length > 0) {
162
+ lines.push(...externalConfigSection);
163
+ }
164
+ // Response Assertions section
165
+ const assertionSection = generateResponseAssertionsSection(toolProfiles);
166
+ if (assertionSection.length > 0) {
167
+ lines.push(...assertionSection);
168
+ }
169
+ // Documentation Quality section (if documentation score available)
170
+ if (documentationScore) {
171
+ const documentationSection = generateDocumentationQualitySection(documentationScore);
172
+ if (documentationSection.length > 0) {
173
+ lines.push(...documentationSection);
174
+ }
175
+ }
176
+ // Tools section with examples and error patterns
177
+ if (discovery.tools.length > 0) {
178
+ lines.push('## Tools');
179
+ lines.push('');
180
+ for (const tool of discovery.tools) {
181
+ const profile = toolProfiles.find(p => p.name === tool.name);
182
+ lines.push(`### ${tool.name}`);
183
+ lines.push('');
184
+ lines.push(tool.description || 'No description available.');
185
+ lines.push('');
186
+ if (profile?.skipped) {
187
+ lines.push(`*Skipped:* ${profile.skipReason ?? 'External service not configured.'}`);
188
+ lines.push('');
189
+ }
190
+ if (profile?.mocked) {
191
+ const serviceLabel = profile.mockService ? ` (${profile.mockService})` : '';
192
+ lines.push(`*Mocked response used${serviceLabel}.*`);
193
+ lines.push('');
194
+ }
195
+ if (profile?.assertionSummary) {
196
+ lines.push(`*Response assertions:* ${profile.assertionSummary.passed}/${profile.assertionSummary.total} passed`);
197
+ const failures = collectAssertionFailures(profile);
198
+ if (failures.length > 0) {
199
+ lines.push('Failed assertions:');
200
+ for (const failure of failures.slice(0, 3)) {
201
+ lines.push(`- ${failure}`);
202
+ }
203
+ if (failures.length > 3) {
204
+ lines.push(`- ... and ${failures.length - 3} more`);
205
+ }
206
+ lines.push('');
207
+ }
208
+ }
209
+ if (tool.inputSchema) {
210
+ lines.push('**Input Schema:**');
211
+ const schemaJson = validateJsonForCodeBlock(tool.inputSchema);
212
+ lines.push('```json');
213
+ lines.push(schemaJson.content);
214
+ lines.push('```');
215
+ lines.push('');
216
+ }
217
+ // Add example usage from successful interactions
218
+ const examples = generateToolExamples(profile, maxExamplesPerTool, exampleLength);
219
+ if (examples.length > 0) {
220
+ lines.push(...examples);
221
+ }
222
+ // Add error patterns if any were observed
223
+ const errorPatterns = generateToolErrorPatterns(profile);
224
+ if (errorPatterns.length > 0) {
225
+ lines.push(...errorPatterns);
226
+ }
227
+ }
228
+ }
229
+ // Prompts section
230
+ if (discovery.prompts.length > 0) {
231
+ lines.push('## Prompts');
232
+ lines.push('');
233
+ for (const prompt of discovery.prompts) {
234
+ lines.push(`### ${prompt.name}`);
235
+ lines.push('');
236
+ if (prompt.description) {
237
+ lines.push(prompt.description);
238
+ lines.push('');
239
+ }
240
+ if (prompt.arguments && prompt.arguments.length > 0) {
241
+ lines.push('**Arguments:**');
242
+ for (const arg of prompt.arguments) {
243
+ const required = arg.required ? ' (required)' : '';
244
+ lines.push(`- \`${arg.name}\`${required}: ${arg.description ?? 'No description'}`);
245
+ }
246
+ lines.push('');
247
+ }
248
+ }
249
+ }
250
+ // Resources section
251
+ if ((discovery.resources ?? []).length > 0) {
252
+ lines.push('## Resources');
253
+ lines.push('');
254
+ for (const resource of discovery.resources ?? []) {
255
+ lines.push(`### ${resource.name}`);
256
+ lines.push('');
257
+ lines.push(`**URI:** \`${resource.uri}\``);
258
+ if (resource.mimeType) {
259
+ lines.push(`**MIME Type:** ${resource.mimeType}`);
260
+ }
261
+ lines.push('');
262
+ if (resource.description) {
263
+ lines.push(resource.description);
264
+ lines.push('');
265
+ }
266
+ }
267
+ }
268
+ // Error Summary section
269
+ const errorSummary = generateErrorSummarySection(toolProfiles);
270
+ if (errorSummary.length > 0) {
271
+ lines.push(...errorSummary);
272
+ }
273
+ // Metadata footer
274
+ lines.push('---');
275
+ lines.push('');
276
+ lines.push(`*Schema validation completed in ${formatDuration(metadata.durationMs)}.*`);
277
+ return lines.join('\n');
278
+ }
279
+ /**
280
+ * Calculate detailed reliability metrics for a tool.
281
+ * Counts correct rejections (validation tests) as successes.
282
+ */
283
+ function calculateReliabilityMetrics(profile, options) {
284
+ if (!profile) {
285
+ return null;
286
+ }
287
+ const interactions = profile.interactions.filter(i => !i.mocked);
288
+ if (interactions.length === 0) {
289
+ return null;
290
+ }
291
+ let happyPathSuccesses = 0;
292
+ let happyPathTotal = 0;
293
+ let validationSuccesses = 0;
294
+ let validationTotal = 0;
295
+ for (const interaction of interactions) {
296
+ const expected = interaction.question.expectedOutcome ?? 'success';
297
+ const hasError = interaction.error || interaction.response?.isError;
298
+ const textContent = interaction.response?.content?.find(c => c.type === 'text');
299
+ const hasErrorText = textContent && 'text' in textContent && looksLikeError(String(textContent.text));
300
+ const gotError = hasError || hasErrorText;
301
+ if (expected === 'error') {
302
+ // Validation test - error is the expected/correct outcome
303
+ validationTotal++;
304
+ if (gotError) {
305
+ validationSuccesses++; // Correct rejection!
306
+ }
307
+ }
308
+ else if (expected === 'success') {
309
+ // Happy path test - success is the expected outcome
310
+ happyPathTotal++;
311
+ if (!gotError) {
312
+ happyPathSuccesses++;
313
+ }
314
+ }
315
+ else {
316
+ // 'either' - counts as success regardless
317
+ happyPathTotal++;
318
+ happyPathSuccesses++; // Either outcome is acceptable
319
+ }
320
+ }
321
+ const total = interactions.length;
322
+ const countedValidationSuccesses = options.countValidationAsSuccess ? validationSuccesses : 0;
323
+ const correctOutcomes = happyPathSuccesses + countedValidationSuccesses;
324
+ const reliabilityRate = total > 0 ? (correctOutcomes / total) * 100 : 0;
325
+ const happyPathRate = happyPathTotal > 0 ? (happyPathSuccesses / happyPathTotal) * 100 : 100;
326
+ const validationRate = options.separateValidationMetrics
327
+ ? (validationTotal > 0 ? (validationSuccesses / validationTotal) * 100 : 100)
328
+ : 100;
329
+ return {
330
+ total,
331
+ happyPathSuccesses,
332
+ happyPathTotal,
333
+ validationSuccesses,
334
+ validationTotal,
335
+ reliabilityRate,
336
+ happyPathRate,
337
+ validationRate,
338
+ };
339
+ }
340
+ /**
341
+ * Calculate success rate for a tool from its interactions.
342
+ * Now uses reliability metrics that count correct rejections as success.
343
+ */
344
+ function calculateToolSuccessRate(profile, options) {
345
+ const metrics = calculateReliabilityMetrics(profile, options);
346
+ if (!metrics) {
347
+ return '-';
348
+ }
349
+ // Use reliability rate (includes correct rejections as success)
350
+ const rate = metrics.reliabilityRate;
351
+ const emoji = rate >= RELIABILITY_DISPLAY.HIGH_THRESHOLD
352
+ ? RELIABILITY_DISPLAY.SYMBOLS.PASS
353
+ : rate >= RELIABILITY_DISPLAY.MEDIUM_THRESHOLD
354
+ ? RELIABILITY_DISPLAY.SYMBOLS.WARN
355
+ : RELIABILITY_DISPLAY.SYMBOLS.FAIL;
356
+ return `${emoji} ${rate.toFixed(0)}%`;
357
+ }
358
+ function formatConfidenceIndicator(level) {
359
+ if (!level) {
360
+ return '-';
361
+ }
362
+ const indicator = CONFIDENCE_INDICATORS[level];
363
+ return `${indicator} ${level}`;
364
+ }
365
+ function generateMetricsLegendSection() {
366
+ const lines = [];
367
+ lines.push('## Metrics Legend');
368
+ lines.push('');
369
+ lines.push('| Symbol | Meaning |');
370
+ lines.push('|--------|---------|');
371
+ lines.push(`| ${RELIABILITY_DISPLAY.SYMBOLS.PASS} | All tests passed as expected |`);
372
+ lines.push(`| ${RELIABILITY_DISPLAY.SYMBOLS.WARN} | Some unexpected behavior |`);
373
+ lines.push(`| ${RELIABILITY_DISPLAY.SYMBOLS.FAIL} | Critical issues detected |`);
374
+ lines.push(`| ${CONFIDENCE_INDICATORS.high} | High confidence in performance metrics |`);
375
+ lines.push(`| ${CONFIDENCE_INDICATORS.medium} | Medium confidence in performance metrics |`);
376
+ lines.push(`| ${CONFIDENCE_INDICATORS.low} | Low confidence in performance metrics |`);
377
+ lines.push('');
378
+ lines.push('**Reliability Score**: Percentage of tests where the tool behaved as expected');
379
+ lines.push('(correct success or correct rejection of invalid input).');
380
+ lines.push('');
381
+ return lines;
382
+ }
383
+ function generateValidationTestingSection(profiles) {
384
+ const lines = [];
385
+ const validationSummary = profiles.map(profile => {
386
+ const buckets = {
387
+ input: summarizeValidationBucket(profile, 'input'),
388
+ type: summarizeValidationBucket(profile, 'type'),
389
+ required: summarizeValidationBucket(profile, 'required'),
390
+ };
391
+ return { profile, buckets };
392
+ });
393
+ const hasValidationTests = validationSummary.some(summary => Object.values(summary.buckets).some(bucket => bucket.total > 0));
394
+ if (!hasValidationTests) {
395
+ return lines;
396
+ }
397
+ lines.push('## Validation Testing');
398
+ lines.push('');
399
+ lines.push('| Tool | Input Validation | Type Checking | Required Params |');
400
+ lines.push('|------|------------------|---------------|-----------------|');
401
+ for (const summary of validationSummary) {
402
+ const toolName = escapeTableCell(summary.profile.name);
403
+ const inputStatus = formatValidationStatus(summary.buckets.input);
404
+ const typeStatus = formatValidationStatus(summary.buckets.type);
405
+ const requiredStatus = formatValidationStatus(summary.buckets.required);
406
+ lines.push(`| \`${toolName}\` | ${inputStatus} | ${typeStatus} | ${requiredStatus} |`);
407
+ }
408
+ lines.push('');
409
+ return lines;
410
+ }
411
+ function generateIssuesDetectedSection(profiles) {
412
+ const lines = [];
413
+ const criticalIssues = [];
414
+ const warnings = [];
415
+ for (const profile of profiles) {
416
+ for (const interaction of profile.interactions) {
417
+ if (interaction.mocked || !interaction.outcomeAssessment || interaction.outcomeAssessment.correct) {
418
+ continue;
419
+ }
420
+ const expected = interaction.outcomeAssessment.expected;
421
+ const actual = interaction.outcomeAssessment.actual;
422
+ const description = interaction.question.description;
423
+ const toolLabel = `\`${escapeTableCell(profile.name)}\``;
424
+ if (expected === 'error' && actual === 'success') {
425
+ criticalIssues.push(`${toolLabel} accepts invalid input: ${description}`);
426
+ }
427
+ else if (expected === 'success' && actual === 'error') {
428
+ warnings.push(`${toolLabel} failed on valid input: ${description}`);
429
+ }
430
+ else {
431
+ warnings.push(`${toolLabel} returned unexpected outcome: ${description}`);
432
+ }
433
+ }
434
+ }
435
+ lines.push('## Issues Detected');
436
+ lines.push('');
437
+ if (criticalIssues.length === 0 && warnings.length === 0) {
438
+ lines.push(`${RELIABILITY_DISPLAY.SYMBOLS.PASS} No issues detected in validation or happy-path behavior.`);
439
+ lines.push('');
440
+ return lines;
441
+ }
442
+ if (criticalIssues.length > 0) {
443
+ lines.push('### Critical');
444
+ for (const issue of criticalIssues.slice(0, DISPLAY_LIMITS.ISSUES_DISPLAY_LIMIT)) {
445
+ lines.push(`- ${issue}`);
446
+ }
447
+ if (criticalIssues.length > DISPLAY_LIMITS.ISSUES_DISPLAY_LIMIT) {
448
+ lines.push(`- ... ${criticalIssues.length - DISPLAY_LIMITS.ISSUES_DISPLAY_LIMIT} more`);
449
+ }
450
+ lines.push('');
451
+ }
452
+ if (warnings.length > 0) {
453
+ lines.push('### Warnings');
454
+ for (const issue of warnings.slice(0, DISPLAY_LIMITS.ISSUES_DISPLAY_LIMIT)) {
455
+ lines.push(`- ${issue}`);
456
+ }
457
+ if (warnings.length > DISPLAY_LIMITS.ISSUES_DISPLAY_LIMIT) {
458
+ lines.push(`- ... ${warnings.length - DISPLAY_LIMITS.ISSUES_DISPLAY_LIMIT} more`);
459
+ }
460
+ lines.push('');
461
+ }
462
+ return lines;
463
+ }
464
+ function summarizeValidationBucket(profile, bucket) {
465
+ let total = 0;
466
+ let passed = 0;
467
+ for (const interaction of profile.interactions) {
468
+ if (interaction.mocked) {
469
+ continue;
470
+ }
471
+ const question = interaction.question;
472
+ if (question.expectedOutcome !== 'error') {
473
+ continue;
474
+ }
475
+ if (classifyValidationBucket(question) !== bucket) {
476
+ continue;
477
+ }
478
+ total += 1;
479
+ if (interaction.outcomeAssessment?.correct) {
480
+ passed += 1;
481
+ }
482
+ }
483
+ return { total, passed };
484
+ }
485
+ function classifyValidationBucket(question) {
486
+ const description = question.description.toLowerCase();
487
+ if (/missing|required/.test(description)) {
488
+ return 'required';
489
+ }
490
+ if (/type|coercion|format|invalid\s+type/.test(description)) {
491
+ return 'type';
492
+ }
493
+ return 'input';
494
+ }
495
+ function formatValidationStatus(bucket) {
496
+ if (bucket.total === 0) {
497
+ return '-';
498
+ }
499
+ if (bucket.passed === bucket.total) {
500
+ return `${RELIABILITY_DISPLAY.SYMBOLS.PASS} Pass (${bucket.passed}/${bucket.total})`;
501
+ }
502
+ if (bucket.passed === 0) {
503
+ return `${RELIABILITY_DISPLAY.SYMBOLS.FAIL} Fail (0/${bucket.total})`;
504
+ }
505
+ return `${RELIABILITY_DISPLAY.SYMBOLS.WARN} Partial (${bucket.passed}/${bucket.total})`;
506
+ }
507
+ /**
508
+ * Generate performance baseline section for CONTRACT.md.
509
+ */
510
+ function generateContractPerformanceSection(profiles, metricsOverride) {
511
+ const lines = [];
512
+ const metrics = metricsOverride ?? calculatePerformanceMetrics(profiles);
513
+ if (metrics.length === 0) {
514
+ return [];
515
+ }
516
+ // Only show if we have meaningful data
517
+ const hasValidMetrics = metrics.some(m => m.callCount >= 2);
518
+ if (!hasValidMetrics) {
519
+ return [];
520
+ }
521
+ lines.push('## Performance Baseline');
522
+ lines.push('');
523
+ lines.push('Response time metrics observed during schema validation:');
524
+ lines.push('');
525
+ lines.push('| Tool | Calls | P50 | P95 | Happy Path % | Confidence |');
526
+ lines.push('|------|-------|-----|-----|--------------|------------|');
527
+ for (const m of metrics) {
528
+ const successRate = ((1 - m.errorRate) * 100).toFixed(0);
529
+ const successEmoji = m.errorRate < 0.1 ? '✓' : m.errorRate < 0.5 ? '⚠' : '✗';
530
+ const confidenceDisplay = formatConfidenceDisplay(m.confidence);
531
+ // Guard against 0 calls edge case - show N/A for latency metrics
532
+ const p50Display = m.callCount > 0 ? `${m.p50Ms}ms` : 'N/A';
533
+ const p95Display = m.callCount > 0 ? `${m.p95Ms}ms` : 'N/A';
534
+ lines.push(`| \`${escapeTableCell(m.toolName)}\` | ${m.callCount} | ${p50Display} | ${p95Display} | ${successEmoji} ${successRate}% | ${confidenceDisplay} |`);
535
+ }
536
+ lines.push('');
537
+ // Show low confidence warning if any tools have low confidence
538
+ const lowConfidenceTools = metrics.filter(m => m.confidence?.confidenceLevel === 'low');
539
+ if (lowConfidenceTools.length > 0) {
540
+ // Categorize low confidence by reason
541
+ const lowSampleTools = lowConfidenceTools.filter(m => (m.confidence?.successfulSamples ?? 0) < PERFORMANCE_CONFIDENCE.HIGH.MIN_SAMPLES);
542
+ const highVariabilityTools = lowConfidenceTools.filter(m => (m.confidence?.successfulSamples ?? 0) >= PERFORMANCE_CONFIDENCE.HIGH.MIN_SAMPLES &&
543
+ (m.confidence?.coefficientOfVariation ?? 0) > PERFORMANCE_CONFIDENCE.MEDIUM.MAX_CV);
544
+ lines.push(`> **⚠️ Low Confidence**: ${lowConfidenceTools.length} tool(s) have low statistical confidence.`);
545
+ if (lowSampleTools.length > 0) {
546
+ lines.push(`> - ${lowSampleTools.length} tool(s) have insufficient happy path samples (need ${PERFORMANCE_CONFIDENCE.HIGH.MIN_SAMPLES}+)`);
547
+ }
548
+ if (highVariabilityTools.length > 0) {
549
+ lines.push(`> - ${highVariabilityTools.length} tool(s) have high response time variability (CV > ${(PERFORMANCE_CONFIDENCE.MEDIUM.MAX_CV * 100).toFixed(0)}%)`);
550
+ }
551
+ lines.push('> Run with `--warmup-runs 3` and `--max-questions 5` for more reliable baselines.');
552
+ lines.push('');
553
+ }
554
+ // Add confidence summary section (collapsed)
555
+ const hasConfidenceData = metrics.some(m => m.confidence);
556
+ if (hasConfidenceData) {
557
+ lines.push('<details>');
558
+ lines.push('<summary>Confidence Metrics Details</summary>');
559
+ lines.push('');
560
+ lines.push('| Tool | Happy Path | Validation | Total | Std Dev | CV | Level |');
561
+ lines.push('|------|------------|------------|-------|---------|-----|-------|');
562
+ for (const m of metrics) {
563
+ if (m.confidence) {
564
+ // Guard against impossible metrics: 0 samples shouldn't have stdDev/CV
565
+ const successfulSamples = m.confidence.successfulSamples ?? m.confidence.sampleCount;
566
+ const validationSamples = m.confidence.validationSamples ?? 0;
567
+ const totalTests = m.confidence.totalTests ?? m.confidence.sampleCount;
568
+ // Use confidence.standardDeviation (from successful samples) for consistency with CV
569
+ const roundedStdDev = Math.round(m.confidence.standardDeviation);
570
+ const stdDevDisplay = successfulSamples > 0 ? `${roundedStdDev}ms` : 'N/A';
571
+ // When stdDev rounds to 0ms, showing high CV is misleading (sub-millisecond noise)
572
+ // In this case, display ~0% to indicate the variability is below measurement threshold
573
+ const rawCV = m.confidence.coefficientOfVariation * 100;
574
+ const cvDisplay = successfulSamples > 0
575
+ ? (roundedStdDev === 0 && rawCV > 1 ? '~0%' : `${rawCV.toFixed(1)}%`)
576
+ : 'N/A';
577
+ const levelLabel = PERFORMANCE_CONFIDENCE.LABELS[m.confidence.confidenceLevel];
578
+ lines.push(`| \`${escapeTableCell(m.toolName)}\` | ${successfulSamples} | ${validationSamples} | ${totalTests} | ${stdDevDisplay} | ${cvDisplay} | ${levelLabel} |`);
579
+ }
580
+ }
581
+ lines.push('');
582
+ lines.push('**Legend:**');
583
+ lines.push(`- **Happy Path**: Successful tests with expected outcome "success" (used for confidence)`);
584
+ lines.push(`- **Validation**: Tests with expected outcome "error" (not used for performance confidence)`);
585
+ lines.push(`- HIGH: ${PERFORMANCE_CONFIDENCE.HIGH.MIN_SAMPLES}+ happy path samples, CV ≤ ${PERFORMANCE_CONFIDENCE.HIGH.MAX_CV * 100}%`);
586
+ lines.push(`- MEDIUM: ${PERFORMANCE_CONFIDENCE.MEDIUM.MIN_SAMPLES}+ happy path samples, CV ≤ ${PERFORMANCE_CONFIDENCE.MEDIUM.MAX_CV * 100}%`);
587
+ lines.push('- LOW: Insufficient happy path samples or high variability');
588
+ lines.push('');
589
+ lines.push('</details>');
590
+ lines.push('');
591
+ }
592
+ return lines;
593
+ }
594
+ /**
595
+ * Format confidence for display in table.
596
+ */
597
+ function formatConfidenceDisplay(confidence) {
598
+ if (!confidence) {
599
+ return '-';
600
+ }
601
+ const indicator = PERFORMANCE_CONFIDENCE.INDICATORS[confidence.confidenceLevel];
602
+ const label = PERFORMANCE_CONFIDENCE.LABELS[confidence.confidenceLevel];
603
+ return `${indicator} ${label}`;
604
+ }
605
+ /**
606
+ * Generate Security Baseline section for CONTRACT.md.
607
+ */
608
+ function generateContractSecuritySection(fingerprints) {
609
+ const lines = [];
610
+ // Collect all findings
611
+ const allFindings = [];
612
+ let totalTested = 0;
613
+ let totalRiskScore = 0;
614
+ for (const [toolName, fp] of fingerprints) {
615
+ if (fp.tested) {
616
+ totalTested++;
617
+ totalRiskScore += fp.riskScore;
618
+ for (const finding of fp.findings) {
619
+ allFindings.push({ ...finding, toolName });
620
+ }
621
+ }
622
+ }
623
+ if (totalTested === 0) {
624
+ return [];
625
+ }
626
+ const avgRiskScore = totalTested > 0 ? Math.round(totalRiskScore / totalTested) : 0;
627
+ lines.push('## Security Baseline');
628
+ lines.push('');
629
+ lines.push(`Security testing performed on ${totalTested} tools.`);
630
+ lines.push('');
631
+ // Summary table
632
+ lines.push('| Metric | Value |');
633
+ lines.push('|--------|-------|');
634
+ lines.push(`| Tools Tested | ${totalTested} |`);
635
+ lines.push(`| Total Findings | ${allFindings.length} |`);
636
+ lines.push(`| Average Risk Score | ${avgRiskScore}/100 |`);
637
+ // Count by severity
638
+ const bySeverity = {
639
+ critical: allFindings.filter(f => f.riskLevel === 'critical').length,
640
+ high: allFindings.filter(f => f.riskLevel === 'high').length,
641
+ medium: allFindings.filter(f => f.riskLevel === 'medium').length,
642
+ low: allFindings.filter(f => f.riskLevel === 'low').length,
643
+ info: allFindings.filter(f => f.riskLevel === 'info').length,
644
+ };
645
+ if (bySeverity.critical > 0) {
646
+ lines.push(`| Critical Findings | ${bySeverity.critical} |`);
647
+ }
648
+ if (bySeverity.high > 0) {
649
+ lines.push(`| High Findings | ${bySeverity.high} |`);
650
+ }
651
+ if (bySeverity.medium > 0) {
652
+ lines.push(`| Medium Findings | ${bySeverity.medium} |`);
653
+ }
654
+ lines.push('');
655
+ // If no findings, show clean status
656
+ if (allFindings.length === 0) {
657
+ lines.push('✅ No security vulnerabilities detected during testing.');
658
+ lines.push('');
659
+ return lines;
660
+ }
661
+ // Show findings by severity
662
+ const criticalAndHigh = allFindings.filter(f => f.riskLevel === 'critical' || f.riskLevel === 'high');
663
+ if (criticalAndHigh.length > 0) {
664
+ lines.push('### Critical and High Severity Findings');
665
+ lines.push('');
666
+ lines.push('| Risk | Tool | Finding | CWE |');
667
+ lines.push('|------|------|---------|-----|');
668
+ for (const finding of criticalAndHigh) {
669
+ const riskEmoji = finding.riskLevel === 'critical' ? '🔴' : '🟠';
670
+ lines.push(`| ${riskEmoji} ${finding.riskLevel} | \`${escapeTableCell(finding.tool)}\` | ${escapeTableCell(finding.title)} | ${finding.cweId} |`);
671
+ }
672
+ lines.push('');
673
+ // Detailed findings
674
+ lines.push('<details>');
675
+ lines.push('<summary>Finding Details</summary>');
676
+ lines.push('');
677
+ for (const finding of criticalAndHigh) {
678
+ lines.push(`#### ${finding.title}`);
679
+ lines.push('');
680
+ lines.push(`**Tool:** \`${finding.tool}\``);
681
+ lines.push(`**Parameter:** \`${finding.parameter}\``);
682
+ lines.push(`**Risk Level:** ${finding.riskLevel.toUpperCase()}`);
683
+ lines.push(`**CWE:** ${finding.cweId}`);
684
+ lines.push('');
685
+ lines.push(finding.description);
686
+ lines.push('');
687
+ lines.push('**Remediation:**');
688
+ lines.push(finding.remediation);
689
+ lines.push('');
690
+ }
691
+ lines.push('</details>');
692
+ lines.push('');
693
+ }
694
+ // Show medium/low findings in collapsed section
695
+ const mediumAndLow = allFindings.filter(f => f.riskLevel === 'medium' || f.riskLevel === 'low' || f.riskLevel === 'info');
696
+ if (mediumAndLow.length > 0) {
697
+ lines.push('<details>');
698
+ lines.push(`<summary>Medium/Low Severity Findings (${mediumAndLow.length})</summary>`);
699
+ lines.push('');
700
+ lines.push('| Risk | Tool | Finding | CWE |');
701
+ lines.push('|------|------|---------|-----|');
702
+ for (const finding of mediumAndLow) {
703
+ const riskEmoji = finding.riskLevel === 'medium' ? '🟡' : '🔵';
704
+ lines.push(`| ${riskEmoji} ${finding.riskLevel} | \`${escapeTableCell(finding.tool)}\` | ${escapeTableCell(finding.title)} | ${finding.cweId} |`);
705
+ }
706
+ lines.push('');
707
+ lines.push('</details>');
708
+ lines.push('');
709
+ }
710
+ // Per-tool risk scores
711
+ lines.push('### Tool Risk Scores');
712
+ lines.push('');
713
+ lines.push('| Tool | Risk Score | Findings |');
714
+ lines.push('|------|------------|----------|');
715
+ const toolScores = Array.from(fingerprints.entries())
716
+ .filter(([, fp]) => fp.tested)
717
+ .map(([name, fp]) => ({ name, riskScore: fp.riskScore, findingCount: fp.findings.length }))
718
+ .sort((a, b) => b.riskScore - a.riskScore);
719
+ for (const { name, riskScore, findingCount } of toolScores) {
720
+ const scoreEmoji = riskScore >= 70 ? '🔴' : riskScore >= 40 ? '🟠' : riskScore >= 20 ? '🟡' : '🟢';
721
+ lines.push(`| \`${escapeTableCell(name)}\` | ${scoreEmoji} ${riskScore}/100 | ${findingCount} |`);
722
+ }
723
+ lines.push('');
724
+ return lines;
725
+ }
726
+ /**
727
+ * Generate Workflow Testing section for CONTRACT.md.
728
+ * Documents workflow test results with step details and data flow.
729
+ */
730
+ function generateWorkflowTestingSection(results) {
731
+ const lines = [];
732
+ if (results.length === 0) {
733
+ return [];
734
+ }
735
+ const passed = results.filter(r => r.success).length;
736
+ const failed = results.length - passed;
737
+ const totalSteps = results.reduce((sum, r) => sum + r.workflow.steps.length, 0);
738
+ const passedSteps = results.reduce((sum, r) => sum + r.steps.filter(s => s.success).length, 0);
739
+ const totalDurationMs = results.reduce((sum, r) => sum + r.durationMs, 0);
740
+ lines.push('## Workflow Testing');
741
+ lines.push('');
742
+ lines.push('Multi-step workflow tests validate tool chains and state transitions.');
743
+ lines.push('');
744
+ // Summary table
745
+ lines.push('| Metric | Value |');
746
+ lines.push('|--------|-------|');
747
+ lines.push(`| Workflows | ${results.length} |`);
748
+ lines.push(`| Passed | ${passed} |`);
749
+ lines.push(`| Failed | ${failed} |`);
750
+ lines.push(`| Total Steps | ${totalSteps} |`);
751
+ lines.push(`| Steps Passed | ${passedSteps} |`);
752
+ lines.push(`| Total Duration | ${formatDuration(totalDurationMs)} |`);
753
+ lines.push('');
754
+ // Results table
755
+ lines.push('### Results');
756
+ lines.push('');
757
+ lines.push('| Workflow | Status | Steps | Duration |');
758
+ lines.push('|----------|--------|-------|----------|');
759
+ for (const result of results) {
760
+ const status = result.success ? '✓ Passed' : '✗ Failed';
761
+ const stepsInfo = `${result.steps.filter(s => s.success).length}/${result.workflow.steps.length}`;
762
+ const duration = formatDuration(result.durationMs);
763
+ lines.push(`| ${escapeTableCell(result.workflow.name)} | ${status} | ${stepsInfo} | ${duration} |`);
764
+ }
765
+ lines.push('');
766
+ // Details for each workflow
767
+ for (const result of results) {
768
+ const statusIcon = result.success ? '✓' : '✗';
769
+ lines.push(`### ${statusIcon} ${result.workflow.name}`);
770
+ lines.push('');
771
+ lines.push(`**ID:** \`${result.workflow.id}\``);
772
+ if (result.workflow.description) {
773
+ lines.push(`**Description:** ${result.workflow.description}`);
774
+ }
775
+ lines.push(`**Expected Outcome:** ${result.workflow.expectedOutcome}`);
776
+ lines.push('');
777
+ // Step details table
778
+ lines.push('| Step | Tool | Status | Duration | Notes |');
779
+ lines.push('|------|------|--------|----------|-------|');
780
+ for (let i = 0; i < result.steps.length; i++) {
781
+ const stepResult = result.steps[i];
782
+ const step = result.workflow.steps[i];
783
+ const stepNum = i + 1;
784
+ const status = stepResult.success ? '✓ Pass' : '✗ Fail';
785
+ const duration = formatDuration(stepResult.durationMs);
786
+ let notes = '';
787
+ if (!stepResult.success) {
788
+ if (stepResult.error) {
789
+ notes = escapeTableCell(truncateString(stepResult.error, 40));
790
+ }
791
+ else if (stepResult.assertionResults?.some(a => !a.passed)) {
792
+ const failedAssertions = stepResult.assertionResults.filter(a => !a.passed);
793
+ notes = `${failedAssertions.length} assertion(s) failed`;
794
+ }
795
+ }
796
+ else if (step.optional) {
797
+ notes = '(optional)';
798
+ }
799
+ lines.push(`| ${stepNum} | \`${escapeTableCell(step.tool)}\` | ${status} | ${duration} | ${notes} |`);
800
+ }
801
+ lines.push('');
802
+ // Show failure details if any
803
+ if (!result.success && result.failureReason) {
804
+ lines.push('**Failure:**');
805
+ lines.push(`> ${result.failureReason}`);
806
+ lines.push('');
807
+ }
808
+ // Show data flow if present
809
+ if (result.dataFlow && result.dataFlow.length > 0) {
810
+ lines.push('<details>');
811
+ lines.push('<summary>Data Flow</summary>');
812
+ lines.push('');
813
+ lines.push('```mermaid');
814
+ lines.push('graph LR');
815
+ for (const edge of result.dataFlow) {
816
+ const fromLabel = mermaidLabel(`Step ${edge.fromStep + 1}`);
817
+ const toLabel = mermaidLabel(`Step ${edge.toStep + 1}`);
818
+ const edgeLabel = mermaidLabel(edge.targetParam);
819
+ lines.push(` ${fromLabel} -->|${edgeLabel}| ${toLabel}`);
820
+ }
821
+ lines.push('```');
822
+ lines.push('');
823
+ lines.push('</details>');
824
+ lines.push('');
825
+ }
826
+ // Show state changes if present
827
+ if (result.stateTracking?.changes && result.stateTracking.changes.length > 0) {
828
+ lines.push('<details>');
829
+ lines.push('<summary>State Changes</summary>');
830
+ lines.push('');
831
+ lines.push('| Step | Type | Path |');
832
+ lines.push('|------|------|------|');
833
+ for (const change of result.stateTracking.changes) {
834
+ lines.push(`| ${change.causedByStep + 1} | ${change.type} | \`${escapeTableCell(change.path)}\` |`);
835
+ }
836
+ lines.push('');
837
+ lines.push('</details>');
838
+ lines.push('');
839
+ }
840
+ }
841
+ return lines;
842
+ }
843
+ /**
844
+ * Truncate a string to a maximum length with ellipsis.
845
+ */
846
+ function truncateString(str, maxLength) {
847
+ if (str.length <= maxLength)
848
+ return str;
849
+ return str.slice(0, maxLength - 3) + '...';
850
+ }
851
+ /**
852
+ * Generate Semantic Types section for CONTRACT.md.
853
+ * Documents inferred semantic types for parameters across all tools.
854
+ */
855
+ function generateSemanticTypesSection(inferences) {
856
+ const lines = [];
857
+ // Collect all inferences with high confidence
858
+ const allInferences = [];
859
+ for (const [toolName, toolInferences] of inferences) {
860
+ for (const inference of toolInferences) {
861
+ if (inference.confidence >= 0.5 && inference.inferredType !== 'unknown') {
862
+ allInferences.push({ ...inference, toolName });
863
+ }
864
+ }
865
+ }
866
+ if (allInferences.length === 0) {
867
+ return [];
868
+ }
869
+ lines.push('## Semantic Types');
870
+ lines.push('');
871
+ lines.push('Parameters with inferred semantic types for enhanced validation:');
872
+ lines.push('');
873
+ // Group by semantic type
874
+ const byType = new Map();
875
+ for (const inf of allInferences) {
876
+ const existing = byType.get(inf.inferredType) ?? [];
877
+ existing.push({
878
+ toolName: inf.toolName,
879
+ paramName: inf.paramName,
880
+ confidence: inf.confidence,
881
+ });
882
+ byType.set(inf.inferredType, existing);
883
+ }
884
+ // Sort by number of parameters (most common types first)
885
+ const sortedTypes = Array.from(byType.entries())
886
+ .sort((a, b) => b[1].length - a[1].length);
887
+ lines.push('| Type | Parameters | Expected Format |');
888
+ lines.push('|------|------------|-----------------|');
889
+ for (const [type, params] of sortedTypes) {
890
+ const displayName = SEMANTIC_VALIDATION.TYPE_DISPLAY_NAMES[type] ?? type;
891
+ const exampleValue = SEMANTIC_VALIDATION.EXAMPLE_VALUES[type] ?? '';
892
+ // Format parameters as tool.param
893
+ const paramList = params
894
+ .slice(0, 3)
895
+ .map(p => `\`${p.toolName}.${p.paramName}\``)
896
+ .join(', ');
897
+ const moreCount = params.length > 3 ? ` +${params.length - 3} more` : '';
898
+ lines.push(`| ${displayName} | ${paramList}${moreCount} | \`${exampleValue}\` |`);
899
+ }
900
+ lines.push('');
901
+ // Detailed list (collapsed)
902
+ if (allInferences.length > 5) {
903
+ lines.push('<details>');
904
+ lines.push('<summary>All Inferred Semantic Types</summary>');
905
+ lines.push('');
906
+ }
907
+ // Group by tool
908
+ const byTool = new Map();
909
+ for (const inf of allInferences) {
910
+ const existing = byTool.get(inf.toolName) ?? [];
911
+ existing.push(inf);
912
+ byTool.set(inf.toolName, existing);
913
+ }
914
+ for (const [toolName, toolInferences] of byTool) {
915
+ lines.push(`### ${toolName}`);
916
+ lines.push('');
917
+ lines.push('| Parameter | Type | Confidence |');
918
+ lines.push('|-----------|------|------------|');
919
+ for (const inf of toolInferences) {
920
+ const displayName = SEMANTIC_VALIDATION.TYPE_DISPLAY_NAMES[inf.inferredType] ?? inf.inferredType;
921
+ const confidenceDisplay = `${Math.round(inf.confidence * 100)}%`;
922
+ lines.push(`| \`${escapeTableCell(inf.paramName)}\` | ${displayName} | ${confidenceDisplay} |`);
923
+ }
924
+ lines.push('');
925
+ }
926
+ if (allInferences.length > 5) {
927
+ lines.push('</details>');
928
+ lines.push('');
929
+ }
930
+ return lines;
931
+ }
932
+ /**
933
+ * Generate Schema Stability section for CONTRACT.md.
934
+ * Documents response schema consistency and stability across tools.
935
+ */
936
+ function generateSchemaStabilitySection(schemaEvolution) {
937
+ const lines = [];
938
+ // Collect tools with meaningful schema data
939
+ const toolsWithSchemas = [];
940
+ for (const [toolName, evolution] of schemaEvolution) {
941
+ if (evolution.sampleCount > 0) {
942
+ const grade = getSchemaStabilityGrade(evolution);
943
+ toolsWithSchemas.push({ name: toolName, evolution, grade });
944
+ }
945
+ }
946
+ if (toolsWithSchemas.length === 0) {
947
+ return [];
948
+ }
949
+ lines.push('## Schema Stability');
950
+ lines.push('');
951
+ lines.push('Response schema consistency metrics for tools with sufficient test samples:');
952
+ lines.push('');
953
+ // Summary stats
954
+ const stableCount = toolsWithSchemas.filter(t => t.evolution.isStable).length;
955
+ const unstableCount = toolsWithSchemas.length - stableCount;
956
+ const avgConfidence = toolsWithSchemas.reduce((sum, t) => sum + t.evolution.stabilityConfidence, 0) / toolsWithSchemas.length;
957
+ lines.push('| Metric | Value |');
958
+ lines.push('|--------|-------|');
959
+ lines.push(`| Tools Analyzed | ${toolsWithSchemas.length} |`);
960
+ lines.push(`| Stable Schemas | ${stableCount} |`);
961
+ lines.push(`| Unstable Schemas | ${unstableCount} |`);
962
+ lines.push(`| Avg Confidence | ${Math.round(avgConfidence * 100)}% |`);
963
+ lines.push('');
964
+ // Overall status
965
+ if (stableCount === toolsWithSchemas.length) {
966
+ lines.push('✅ All tested tools have consistent response schemas.');
967
+ lines.push('');
968
+ }
969
+ else if (unstableCount > 0) {
970
+ lines.push(`⚠️ ${unstableCount} tool(s) have inconsistent response schemas.`);
971
+ lines.push('');
972
+ }
973
+ // Per-tool table
974
+ lines.push('| Tool | Grade | Stability | Confidence | Samples | Issues |');
975
+ lines.push('|------|-------|-----------|------------|---------|--------|');
976
+ // Sort by grade (worst first, then by name)
977
+ const gradeOrder = { 'F': 0, 'D': 1, 'C': 2, 'B': 3, 'A': 4, 'N/A': 5 };
978
+ const sortedTools = [...toolsWithSchemas].sort((a, b) => {
979
+ const gradeCompare = gradeOrder[a.grade] - gradeOrder[b.grade];
980
+ if (gradeCompare !== 0)
981
+ return gradeCompare;
982
+ return a.name.localeCompare(b.name);
983
+ });
984
+ for (const { name, evolution, grade } of sortedTools) {
985
+ const gradeEmoji = getGradeEmoji(grade);
986
+ const stabilityStatus = evolution.isStable
987
+ ? SCHEMA_EVOLUTION.STABILITY_LABELS.STABLE
988
+ : SCHEMA_EVOLUTION.STABILITY_LABELS.UNSTABLE;
989
+ const confidenceDisplay = `${Math.round(evolution.stabilityConfidence * 100)}%`;
990
+ const issues = evolution.inconsistentFields.length > 0
991
+ ? evolution.inconsistentFields.slice(0, 2).join(', ') +
992
+ (evolution.inconsistentFields.length > 2 ? ` +${evolution.inconsistentFields.length - 2}` : '')
993
+ : '-';
994
+ lines.push(`| \`${escapeTableCell(name)}\` | ${gradeEmoji} ${grade} | ${stabilityStatus} | ${confidenceDisplay} | ${evolution.sampleCount} | ${escapeTableCell(issues)} |`);
995
+ }
996
+ lines.push('');
997
+ // Detailed breakdown for unstable tools
998
+ const unstableTools = sortedTools.filter(t => !t.evolution.isStable && t.evolution.inconsistentFields.length > 0);
999
+ if (unstableTools.length > 0) {
1000
+ lines.push('<details>');
1001
+ lines.push('<summary>Unstable Schema Details</summary>');
1002
+ lines.push('');
1003
+ for (const { name, evolution } of unstableTools) {
1004
+ lines.push(`### ${name}`);
1005
+ lines.push('');
1006
+ lines.push(`**Inconsistent Fields:** ${evolution.inconsistentFields.join(', ')}`);
1007
+ lines.push('');
1008
+ lines.push('These fields appear inconsistently across responses, indicating the tool may return');
1009
+ lines.push('different structures depending on input or state.');
1010
+ lines.push('');
1011
+ }
1012
+ lines.push('</details>');
1013
+ lines.push('');
1014
+ }
1015
+ // Grade legend
1016
+ lines.push('<details>');
1017
+ lines.push('<summary>Grade Legend</summary>');
1018
+ lines.push('');
1019
+ lines.push(`- **A**: ${SCHEMA_EVOLUTION.GRADE_THRESHOLDS.A * 100}%+ stability confidence`);
1020
+ lines.push(`- **B**: ${SCHEMA_EVOLUTION.GRADE_THRESHOLDS.B * 100}%+ stability confidence`);
1021
+ lines.push(`- **C**: ${SCHEMA_EVOLUTION.GRADE_THRESHOLDS.C * 100}%+ stability confidence`);
1022
+ lines.push(`- **D**: ${SCHEMA_EVOLUTION.GRADE_THRESHOLDS.D * 100}%+ stability confidence`);
1023
+ lines.push('- **F**: Below minimum threshold');
1024
+ lines.push(`- **N/A**: Insufficient samples (< ${SCHEMA_EVOLUTION.MIN_SAMPLES_FOR_STABILITY})`);
1025
+ lines.push('');
1026
+ lines.push('</details>');
1027
+ lines.push('');
1028
+ return lines;
1029
+ }
1030
+ /**
1031
+ * Get emoji for stability grade.
1032
+ */
1033
+ function getGradeEmoji(grade) {
1034
+ switch (grade) {
1035
+ case 'A': return '🟢';
1036
+ case 'B': return '🟢';
1037
+ case 'C': return '🟡';
1038
+ case 'D': return '🟠';
1039
+ case 'F': return '🔴';
1040
+ case 'N/A': return '⚪';
1041
+ }
1042
+ }
1043
+ /**
1044
+ * Generate Error Analysis section for CONTRACT.md.
1045
+ * Documents enhanced error analysis with root causes and remediations.
1046
+ */
1047
+ function generateErrorAnalysisSection(summaries) {
1048
+ const lines = [];
1049
+ // Collect tools with errors
1050
+ const toolsWithErrors = [];
1051
+ for (const [toolName, summary] of summaries) {
1052
+ if (summary.totalErrors > 0) {
1053
+ toolsWithErrors.push({ name: toolName, summary });
1054
+ }
1055
+ }
1056
+ if (toolsWithErrors.length === 0) {
1057
+ return [];
1058
+ }
1059
+ lines.push('## Error Analysis');
1060
+ lines.push('');
1061
+ lines.push('Enhanced error analysis with root causes and remediation suggestions:');
1062
+ lines.push('');
1063
+ // Summary stats
1064
+ const totalErrors = toolsWithErrors.reduce((sum, t) => sum + t.summary.totalErrors, 0);
1065
+ const allCategories = new Set();
1066
+ const transientCount = toolsWithErrors.reduce((sum, t) => sum + t.summary.transientErrors, 0);
1067
+ for (const { summary } of toolsWithErrors) {
1068
+ for (const cat of summary.categoryCounts.keys()) {
1069
+ allCategories.add(cat);
1070
+ }
1071
+ }
1072
+ lines.push('| Metric | Value |');
1073
+ lines.push('|--------|-------|');
1074
+ lines.push(`| Tools with Errors | ${toolsWithErrors.length} |`);
1075
+ lines.push(`| Total Errors | ${totalErrors} |`);
1076
+ lines.push(`| Error Categories | ${allCategories.size} |`);
1077
+ lines.push(`| Transient Errors | ${transientCount} |`);
1078
+ lines.push('');
1079
+ // Overall error breakdown by category
1080
+ const globalCategoryCounts = new Map();
1081
+ for (const { summary } of toolsWithErrors) {
1082
+ for (const [cat, count] of summary.categoryCounts) {
1083
+ globalCategoryCounts.set(cat, (globalCategoryCounts.get(cat) ?? 0) + count);
1084
+ }
1085
+ }
1086
+ if (globalCategoryCounts.size > 0) {
1087
+ lines.push('### Error Categories');
1088
+ lines.push('');
1089
+ lines.push('| Category | Count | Description |');
1090
+ lines.push('|----------|-------|-------------|');
1091
+ // Sort by count descending
1092
+ const sortedCategories = Array.from(globalCategoryCounts.entries())
1093
+ .sort((a, b) => b[1] - a[1]);
1094
+ for (const [category, count] of sortedCategories) {
1095
+ const label = ERROR_ANALYSIS.CATEGORY_LABELS[category] ?? category;
1096
+ const emoji = getCategoryEmoji(category);
1097
+ lines.push(`| ${emoji} ${label} | ${count} | ${escapeTableCell(formatCategoryDescription(category))} |`);
1098
+ }
1099
+ lines.push('');
1100
+ }
1101
+ // Per-tool breakdown
1102
+ lines.push('### By Tool');
1103
+ lines.push('');
1104
+ lines.push('| Tool | Total | Transient | Top Category | Remediation |');
1105
+ lines.push('|------|-------|-----------|--------------|-------------|');
1106
+ // Sort by error count descending
1107
+ const sortedTools = [...toolsWithErrors].sort((a, b) => b.summary.totalErrors - a.summary.totalErrors);
1108
+ for (const { name, summary } of sortedTools) {
1109
+ const topCategory = getTopCategory(summary.categoryCounts);
1110
+ const topCategoryLabel = topCategory
1111
+ ? (ERROR_ANALYSIS.CATEGORY_LABELS[topCategory] ?? topCategory)
1112
+ : '-';
1113
+ const topRemediation = summary.topRemediations[0] ?? '-';
1114
+ const truncatedRemediation = topRemediation.length > 50
1115
+ ? topRemediation.slice(0, 47) + '...'
1116
+ : topRemediation;
1117
+ lines.push(`| \`${escapeTableCell(name)}\` | ${summary.totalErrors} | ${summary.transientErrors} | ${topCategoryLabel} | ${escapeTableCell(truncatedRemediation)} |`);
1118
+ }
1119
+ lines.push('');
1120
+ // Detailed remediation suggestions (collapsed)
1121
+ const toolsWithRemediations = sortedTools.filter(t => t.summary.topRemediations.length > 0);
1122
+ if (toolsWithRemediations.length > 0) {
1123
+ lines.push('<details>');
1124
+ lines.push('<summary>Remediation Suggestions</summary>');
1125
+ lines.push('');
1126
+ for (const { name, summary } of toolsWithRemediations.slice(0, ERROR_ANALYSIS.MAX_REMEDIATIONS_DISPLAY)) {
1127
+ lines.push(`### ${name}`);
1128
+ lines.push('');
1129
+ if (summary.topRootCauses.length > 0) {
1130
+ lines.push('**Root Causes:**');
1131
+ for (const cause of summary.topRootCauses) {
1132
+ lines.push(`- ${cause}`);
1133
+ }
1134
+ lines.push('');
1135
+ }
1136
+ if (summary.topRemediations.length > 0) {
1137
+ lines.push('**Suggested Remediations:**');
1138
+ for (const remediation of summary.topRemediations) {
1139
+ lines.push(`- ${remediation}`);
1140
+ }
1141
+ lines.push('');
1142
+ }
1143
+ if (summary.relatedParameters.length > 0) {
1144
+ lines.push(`**Related Parameters:** ${summary.relatedParameters.join(', ')}`);
1145
+ lines.push('');
1146
+ }
1147
+ }
1148
+ lines.push('</details>');
1149
+ lines.push('');
1150
+ }
1151
+ // Category legend
1152
+ lines.push('<details>');
1153
+ lines.push('<summary>Category Legend</summary>');
1154
+ lines.push('');
1155
+ lines.push('- **Validation Error (400)**: Client sent invalid input that failed validation');
1156
+ lines.push('- **Authentication Error (401)**: Missing or invalid authentication credentials');
1157
+ lines.push('- **Not Found (404)**: Requested resource does not exist');
1158
+ lines.push('- **Conflict (409)**: Request conflicts with current state');
1159
+ lines.push('- **Rate Limited (429)**: Too many requests, retry after delay');
1160
+ lines.push('- **Server Error (5xx)**: Internal server error, may be transient');
1161
+ lines.push('');
1162
+ lines.push('</details>');
1163
+ lines.push('');
1164
+ return lines;
1165
+ }
1166
+ /**
1167
+ * Get emoji for error category.
1168
+ */
1169
+ function getCategoryEmoji(category) {
1170
+ switch (category) {
1171
+ case 'client_error_validation': return '⚠️';
1172
+ case 'client_error_auth': return '🔐';
1173
+ case 'client_error_not_found': return '🔍';
1174
+ case 'client_error_conflict': return '💥';
1175
+ case 'client_error_rate_limit': return '⏱️';
1176
+ case 'server_error': return '🔥';
1177
+ default: return '❓';
1178
+ }
1179
+ }
1180
+ /**
1181
+ * Get human-readable description for error category.
1182
+ */
1183
+ function formatCategoryDescription(category) {
1184
+ switch (category) {
1185
+ case 'client_error_validation':
1186
+ return 'Invalid input or missing required parameters';
1187
+ case 'client_error_auth':
1188
+ return 'Authentication or authorization failure';
1189
+ case 'client_error_not_found':
1190
+ return 'Resource not found or does not exist';
1191
+ case 'client_error_conflict':
1192
+ return 'Conflict with current resource state';
1193
+ case 'client_error_rate_limit':
1194
+ return 'Rate limit exceeded, retry after delay';
1195
+ case 'server_error':
1196
+ return 'Internal server error, may be transient';
1197
+ default:
1198
+ return 'Unknown error category';
1199
+ }
1200
+ }
1201
+ /**
1202
+ * Get the top category from a category counts map.
1203
+ */
1204
+ function getTopCategory(counts) {
1205
+ let topCategory;
1206
+ let topCount = 0;
1207
+ for (const [category, count] of counts) {
1208
+ if (count > topCount) {
1209
+ topCount = count;
1210
+ topCategory = category;
1211
+ }
1212
+ }
1213
+ return topCategory;
1214
+ }
1215
+ /**
1216
+ * Generate documentation quality section for CONTRACT.md.
1217
+ */
1218
+ function generateDocumentationQualitySection(score) {
1219
+ const lines = [];
1220
+ lines.push('## Documentation Quality');
1221
+ lines.push('');
1222
+ // Overall score with grade badge
1223
+ const indicator = getGradeIndicator(score.grade);
1224
+ lines.push(`**Overall Score:** ${indicator} ${score.overallScore}/100 (${score.grade})`);
1225
+ lines.push('');
1226
+ // Component breakdown table
1227
+ lines.push('### Score Components');
1228
+ lines.push('');
1229
+ lines.push('| Component | Score | Weight |');
1230
+ lines.push('|-----------|-------|--------|');
1231
+ const weights = DOCUMENTATION_SCORING.WEIGHTS;
1232
+ lines.push(`| Description Coverage | ${score.components.descriptionCoverage}% | ${(weights.descriptionCoverage * 100).toFixed(0)}% |`);
1233
+ lines.push(`| Description Quality | ${score.components.descriptionQuality}% | ${(weights.descriptionQuality * 100).toFixed(0)}% |`);
1234
+ lines.push(`| Parameter Documentation | ${score.components.parameterDocumentation}% | ${(weights.parameterDocumentation * 100).toFixed(0)}% |`);
1235
+ lines.push(`| Example Coverage | ${score.components.exampleCoverage}% | ${(weights.exampleCoverage * 100).toFixed(0)}% |`);
1236
+ lines.push('');
1237
+ // Issues by type (if any)
1238
+ if (score.issues.length > 0) {
1239
+ lines.push('### Issues');
1240
+ lines.push('');
1241
+ // Group issues by type
1242
+ const issuesByType = new Map();
1243
+ for (const issue of score.issues) {
1244
+ const existing = issuesByType.get(issue.type) ?? [];
1245
+ existing.push(issue);
1246
+ issuesByType.set(issue.type, existing);
1247
+ }
1248
+ // Create issues table
1249
+ lines.push('| Issue Type | Count | Severity |');
1250
+ lines.push('|------------|-------|----------|');
1251
+ for (const [type, issues] of issuesByType) {
1252
+ const severityLabel = issues[0].severity;
1253
+ const severityEmoji = severityLabel === 'error' ? '🔴' : severityLabel === 'warning' ? '🟡' : '🔵';
1254
+ const typeLabel = formatIssueTypeLabel(type);
1255
+ lines.push(`| ${typeLabel} | ${issues.length} | ${severityEmoji} ${severityLabel} |`);
1256
+ }
1257
+ lines.push('');
1258
+ // Show specific issues in collapsible section
1259
+ if (score.issues.length <= 10) {
1260
+ lines.push('<details>');
1261
+ lines.push('<summary>Issue Details</summary>');
1262
+ lines.push('');
1263
+ for (const issue of score.issues) {
1264
+ lines.push(`- **${issue.tool}**: ${issue.message}`);
1265
+ }
1266
+ lines.push('');
1267
+ lines.push('</details>');
1268
+ lines.push('');
1269
+ }
1270
+ }
1271
+ // Suggestions (if any)
1272
+ if (score.suggestions.length > 0) {
1273
+ lines.push('### Improvement Suggestions');
1274
+ lines.push('');
1275
+ for (const suggestion of score.suggestions) {
1276
+ lines.push(`- ${suggestion}`);
1277
+ }
1278
+ lines.push('');
1279
+ }
1280
+ // Grade thresholds reference
1281
+ lines.push('<details>');
1282
+ lines.push('<summary>Grade Thresholds</summary>');
1283
+ lines.push('');
1284
+ const thresholds = DOCUMENTATION_SCORING.GRADE_THRESHOLDS;
1285
+ lines.push(`- **A**: ${thresholds.A}+`);
1286
+ lines.push(`- **B**: ${thresholds.B}-${thresholds.A - 1}`);
1287
+ lines.push(`- **C**: ${thresholds.C}-${thresholds.B - 1}`);
1288
+ lines.push(`- **D**: ${thresholds.D}-${thresholds.C - 1}`);
1289
+ lines.push(`- **F**: Below ${thresholds.D}`);
1290
+ lines.push('');
1291
+ lines.push('</details>');
1292
+ lines.push('');
1293
+ return lines;
1294
+ }
1295
+ /**
1296
+ * Format issue type label for display.
1297
+ */
1298
+ function formatIssueTypeLabel(type) {
1299
+ switch (type) {
1300
+ case 'missing_description':
1301
+ return 'Missing Description';
1302
+ case 'short_description':
1303
+ return 'Short Description';
1304
+ case 'missing_param_description':
1305
+ return 'Missing Parameter Description';
1306
+ case 'no_examples':
1307
+ return 'No Examples';
1308
+ default:
1309
+ return type.split('_').map(w => w.charAt(0).toUpperCase() + w.slice(1)).join(' ');
1310
+ }
1311
+ }
1312
+ /**
1313
+ * Generate example usage for a tool from successful interactions.
1314
+ *
1315
+ * @param profile - Tool profile with interactions
1316
+ * @param maxExamples - Maximum number of examples to include
1317
+ * @param maxExampleLength - Maximum length for each example response (uses smart truncation)
1318
+ */
1319
+ function generateToolExamples(profile, maxExamples, maxExampleLength = EXAMPLE_OUTPUT.DEFAULT_LENGTH) {
1320
+ const lines = [];
1321
+ if (!profile || profile.interactions.length === 0) {
1322
+ return [];
1323
+ }
1324
+ // Find successful interactions
1325
+ const successful = profile.interactions.filter(i => {
1326
+ if (i.error || i.response?.isError)
1327
+ return false;
1328
+ const textContent = i.response?.content?.find(c => c.type === 'text');
1329
+ if (textContent && 'text' in textContent) {
1330
+ if (looksLikeError(String(textContent.text)))
1331
+ return false;
1332
+ }
1333
+ return true;
1334
+ });
1335
+ if (successful.length === 0) {
1336
+ return [];
1337
+ }
1338
+ // Take up to maxExamples unique examples (by different args)
1339
+ const examples = [];
1340
+ const seenArgsHashes = new Set();
1341
+ for (const interaction of successful) {
1342
+ if (examples.length >= maxExamples)
1343
+ break;
1344
+ const argsHash = JSON.stringify(interaction.question.args);
1345
+ if (seenArgsHashes.has(argsHash))
1346
+ continue;
1347
+ seenArgsHashes.add(argsHash);
1348
+ const textContent = interaction.response?.content?.find(c => c.type === 'text');
1349
+ if (!textContent || !('text' in textContent))
1350
+ continue;
1351
+ const responseText = String(textContent.text);
1352
+ if (responseText.length === 0)
1353
+ continue;
1354
+ // Use smart truncation to preserve structure
1355
+ const truncated = smartTruncate(responseText, { maxLength: maxExampleLength });
1356
+ examples.push({
1357
+ args: interaction.question.args,
1358
+ response: truncated.content,
1359
+ wasTruncated: truncated.wasTruncated,
1360
+ });
1361
+ }
1362
+ if (examples.length === 0) {
1363
+ return [];
1364
+ }
1365
+ lines.push(`**Example${examples.length > 1 ? 's' : ''}:**`);
1366
+ lines.push('');
1367
+ for (let i = 0; i < examples.length; i++) {
1368
+ const example = examples[i];
1369
+ if (examples.length > 1) {
1370
+ lines.push(`*Example ${i + 1}:*`);
1371
+ }
1372
+ // Show input
1373
+ lines.push('Input:');
1374
+ const inputJson = validateJsonForCodeBlock(example.args);
1375
+ lines.push('```json');
1376
+ lines.push(inputJson.content);
1377
+ lines.push('```');
1378
+ // Show output (with truncation note if applicable)
1379
+ const outputLabel = example.wasTruncated ? 'Output (truncated):' : 'Output:';
1380
+ lines.push(outputLabel);
1381
+ const outputJson = validateJsonForCodeBlock(example.response);
1382
+ lines.push('```');
1383
+ lines.push(outputJson.content);
1384
+ lines.push('```');
1385
+ lines.push('');
1386
+ }
1387
+ return lines;
1388
+ }
1389
+ /**
1390
+ * Generate error patterns section for a tool.
1391
+ */
1392
+ function generateToolErrorPatterns(profile) {
1393
+ const lines = [];
1394
+ if (!profile || profile.interactions.length === 0) {
1395
+ return [];
1396
+ }
1397
+ // Categorize errors
1398
+ const errorCategories = new Map();
1399
+ for (const interaction of profile.interactions) {
1400
+ if (interaction.mocked) {
1401
+ continue;
1402
+ }
1403
+ const errorText = interaction.error || '';
1404
+ const textContent = interaction.response?.content?.find(c => c.type === 'text');
1405
+ const responseText = textContent && 'text' in textContent ? String(textContent.text) : '';
1406
+ const isError = interaction.error || interaction.response?.isError || looksLikeError(responseText);
1407
+ if (!isError)
1408
+ continue;
1409
+ const errorContent = errorText || responseText;
1410
+ if (!errorContent)
1411
+ continue;
1412
+ const category = categorizeError(errorContent);
1413
+ const existing = errorCategories.get(category) || [];
1414
+ if (existing.length < 2) { // Max 2 examples per category
1415
+ const truncated = errorContent.length > 100 ? errorContent.slice(0, 97) + '...' : errorContent;
1416
+ existing.push(truncated);
1417
+ }
1418
+ errorCategories.set(category, existing);
1419
+ }
1420
+ if (errorCategories.size === 0) {
1421
+ return [];
1422
+ }
1423
+ lines.push('**Error Patterns:**');
1424
+ lines.push('');
1425
+ for (const [category, examples] of errorCategories) {
1426
+ lines.push(`- **${category}**: ${examples[0]}`);
1427
+ }
1428
+ lines.push('');
1429
+ return lines;
1430
+ }
1431
+ /**
1432
+ * Categorize an error message.
1433
+ */
1434
+ function categorizeError(errorText) {
1435
+ const lower = errorText.toLowerCase();
1436
+ if (/permission|denied|not allowed|forbidden|unauthorized/i.test(lower)) {
1437
+ return 'Permission';
1438
+ }
1439
+ if (/not found|does not exist|no such|cannot find|missing/i.test(lower)) {
1440
+ return 'NotFound';
1441
+ }
1442
+ if (/invalid|validation|required|must be|expected|type error/i.test(lower)) {
1443
+ return 'Validation';
1444
+ }
1445
+ if (/timeout|timed out|deadline/i.test(lower)) {
1446
+ return 'Timeout';
1447
+ }
1448
+ if (/connect|network|econnrefused|socket/i.test(lower)) {
1449
+ return 'Network';
1450
+ }
1451
+ return 'Other';
1452
+ }
1453
+ /**
1454
+ * Generate error summary section aggregating errors across all tools.
1455
+ */
1456
+ function generateErrorSummarySection(profiles) {
1457
+ const lines = [];
1458
+ // Count errors by category across all tools
1459
+ const categoryCounts = new Map();
1460
+ for (const profile of profiles) {
1461
+ for (const interaction of profile.interactions) {
1462
+ if (interaction.mocked) {
1463
+ continue;
1464
+ }
1465
+ const errorText = interaction.error || '';
1466
+ const textContent = interaction.response?.content?.find(c => c.type === 'text');
1467
+ const responseText = textContent && 'text' in textContent ? String(textContent.text) : '';
1468
+ const isError = interaction.error || interaction.response?.isError || looksLikeError(responseText);
1469
+ if (!isError)
1470
+ continue;
1471
+ const errorContent = errorText || responseText;
1472
+ if (!errorContent)
1473
+ continue;
1474
+ const category = categorizeError(errorContent);
1475
+ const existing = categoryCounts.get(category) || { count: 0, tools: new Set(), example: '' };
1476
+ existing.count++;
1477
+ existing.tools.add(profile.name);
1478
+ if (!existing.example) {
1479
+ existing.example = errorContent.length > 80 ? errorContent.slice(0, 77) + '...' : errorContent;
1480
+ }
1481
+ categoryCounts.set(category, existing);
1482
+ }
1483
+ }
1484
+ if (categoryCounts.size === 0) {
1485
+ return [];
1486
+ }
1487
+ lines.push('## Error Patterns Summary');
1488
+ lines.push('');
1489
+ lines.push('Errors observed during schema validation:');
1490
+ lines.push('');
1491
+ lines.push('| Category | Count | Affected Tools |');
1492
+ lines.push('|----------|-------|----------------|');
1493
+ for (const [category, data] of categoryCounts) {
1494
+ const toolList = Array.from(data.tools).slice(0, 3).map(t => `\`${t}\``).join(', ');
1495
+ const more = data.tools.size > 3 ? ` +${data.tools.size - 3} more` : '';
1496
+ lines.push(`| ${category} | ${data.count} | ${toolList}${more} |`);
1497
+ }
1498
+ lines.push('');
1499
+ return lines;
1500
+ }
1501
+ /**
1502
+ * Analyze tool profiles for external dependency errors.
1503
+ *
1504
+ * Extracts error patterns from tool interactions and analyzes them
1505
+ * to detect errors from known external services (Plaid, Stripe, AWS, etc.)
1506
+ *
1507
+ * @param profiles - Tool profiles with interaction data
1508
+ * @param tools - MCPTool definitions for description context
1509
+ * @returns External dependency summary or null if no significant external deps
1510
+ */
1511
+ function analyzeToolsForExternalDependencies(profiles, tools) {
1512
+ const errorInputs = [];
1513
+ for (const profile of profiles) {
1514
+ const patterns = [];
1515
+ const patternCounts = new Map();
1516
+ for (const interaction of profile.interactions) {
1517
+ if (interaction.mocked) {
1518
+ continue;
1519
+ }
1520
+ const errorText = interaction.error || '';
1521
+ const textContent = interaction.response?.content?.find(c => c.type === 'text');
1522
+ const responseText = textContent && 'text' in textContent ? String(textContent.text) : '';
1523
+ const isError = interaction.error || interaction.response?.isError || looksLikeError(responseText);
1524
+ if (!isError)
1525
+ continue;
1526
+ const errorContent = errorText || responseText;
1527
+ if (!errorContent)
1528
+ continue;
1529
+ // Simple categorization for pattern hashing
1530
+ const category = categorizeError(errorContent);
1531
+ const key = `${category}:${errorContent.slice(0, 50)}`;
1532
+ const existing = patternCounts.get(key);
1533
+ if (existing) {
1534
+ existing.count++;
1535
+ }
1536
+ else {
1537
+ patternCounts.set(key, { count: 1, example: errorContent });
1538
+ }
1539
+ }
1540
+ // Convert to ErrorPattern format
1541
+ for (const [key, data] of patternCounts) {
1542
+ const [category] = key.split(':');
1543
+ patterns.push({
1544
+ category: mapCategoryToErrorCategory(category),
1545
+ patternHash: key,
1546
+ example: data.example,
1547
+ count: data.count,
1548
+ });
1549
+ }
1550
+ if (patterns.length > 0) {
1551
+ const tool = tools.find(t => t.name === profile.name);
1552
+ errorInputs.push({
1553
+ toolName: profile.name,
1554
+ toolDescription: tool?.description,
1555
+ patterns,
1556
+ });
1557
+ }
1558
+ }
1559
+ if (errorInputs.length === 0) {
1560
+ return null;
1561
+ }
1562
+ return analyzeExternalDependencies(errorInputs);
1563
+ }
1564
+ /**
1565
+ * Map simple error category to ErrorPattern category type.
1566
+ */
1567
+ function mapCategoryToErrorCategory(category) {
1568
+ switch (category.toLowerCase()) {
1569
+ case 'permission':
1570
+ return 'permission';
1571
+ case 'notfound':
1572
+ return 'not_found';
1573
+ case 'validation':
1574
+ return 'validation';
1575
+ case 'timeout':
1576
+ return 'timeout';
1577
+ case 'network':
1578
+ case 'other':
1579
+ default:
1580
+ return 'unknown';
1581
+ }
1582
+ }
1583
+ function generateStatefulTestingSection(toolProfiles, summary) {
1584
+ if (!summary?.enabled)
1585
+ return [];
1586
+ const lines = [];
1587
+ const withDeps = toolProfiles
1588
+ .filter((p) => p.dependencyInfo)
1589
+ .sort((a, b) => (a.dependencyInfo?.sequencePosition ?? 0) - (b.dependencyInfo?.sequencePosition ?? 0));
1590
+ if (withDeps.length === 0) {
1591
+ return [];
1592
+ }
1593
+ lines.push('## Stateful Testing');
1594
+ lines.push('');
1595
+ lines.push(`Stateful testing executed across ${summary.toolCount} tool(s) with ${summary.dependencyCount} dependency edge(s).`);
1596
+ lines.push('');
1597
+ lines.push('| Tool | Sequence | Depends On |');
1598
+ lines.push('|------|----------|------------|');
1599
+ for (const profile of withDeps) {
1600
+ const deps = profile.dependencyInfo?.dependsOn?.length
1601
+ ? profile.dependencyInfo.dependsOn.join(', ')
1602
+ : 'None';
1603
+ lines.push(`| \`${escapeTableCell(profile.name)}\` | ${profile.dependencyInfo?.sequencePosition ?? 0} | ${escapeTableCell(deps)} |`);
1604
+ }
1605
+ lines.push('');
1606
+ const edges = withDeps.flatMap((profile) => (profile.dependencyInfo?.dependsOn ?? []).map((dep) => ({ from: dep, to: profile.name })));
1607
+ if (edges.length > 0 && edges.length <= 50) {
1608
+ lines.push('```mermaid');
1609
+ lines.push('graph TD');
1610
+ for (const edge of edges) {
1611
+ lines.push(` ${mermaidLabel(edge.from)} --> ${mermaidLabel(edge.to)}`);
1612
+ }
1613
+ lines.push('```');
1614
+ lines.push('');
1615
+ }
1616
+ return lines;
1617
+ }
1618
+ function generateExternalServiceConfigSection(summary) {
1619
+ if (!summary || summary.unconfiguredServices.length === 0)
1620
+ return [];
1621
+ const lines = [];
1622
+ lines.push('## External Service Setup');
1623
+ lines.push('');
1624
+ lines.push(`Mode: \`${summary.mode}\``);
1625
+ lines.push('');
1626
+ for (const serviceName of summary.unconfiguredServices) {
1627
+ const service = EXTERNAL_DEPENDENCIES.SERVICES[serviceName];
1628
+ if (!service)
1629
+ continue;
1630
+ lines.push(`- **${service.name}**: ${service.remediation}`);
1631
+ }
1632
+ lines.push('');
1633
+ return lines;
1634
+ }
1635
+ function generateResponseAssertionsSection(toolProfiles) {
1636
+ const profiles = toolProfiles.filter((p) => p.assertionSummary);
1637
+ if (profiles.length === 0)
1638
+ return [];
1639
+ const lines = [];
1640
+ lines.push('## Response Assertions');
1641
+ lines.push('');
1642
+ lines.push('| Tool | Passed | Failed |');
1643
+ lines.push('|------|--------|--------|');
1644
+ for (const profile of profiles) {
1645
+ const summary = profile.assertionSummary;
1646
+ lines.push(`| \`${escapeTableCell(profile.name)}\` | ${summary.passed} | ${summary.failed} |`);
1647
+ }
1648
+ lines.push('');
1649
+ const failingTools = profiles.filter((p) => (p.assertionSummary?.failed ?? 0) > 0);
1650
+ if (failingTools.length > 0) {
1651
+ lines.push('### Assertion Failures');
1652
+ lines.push('');
1653
+ for (const profile of failingTools) {
1654
+ const failures = collectAssertionFailures(profile);
1655
+ lines.push(`- \`${profile.name}\`: ${failures.slice(0, 3).join('; ')}${failures.length > 3 ? ' ...' : ''}`);
1656
+ }
1657
+ lines.push('');
1658
+ }
1659
+ return lines;
1660
+ }
1661
+ function collectAssertionFailures(profile) {
1662
+ const failures = new Set();
1663
+ for (const interaction of profile.interactions) {
1664
+ if (interaction.mocked) {
1665
+ continue;
1666
+ }
1667
+ for (const result of interaction.assertionResults ?? []) {
1668
+ if (result.passed)
1669
+ continue;
1670
+ const message = result.message ? `${result.type}: ${result.message}` : `${result.type} failed`;
1671
+ failures.add(message);
1672
+ }
1673
+ }
1674
+ return Array.from(failures);
1675
+ }
1676
+ /**
1677
+ * Generate AGENTS.md documentation from explore results.
1678
+ * Full LLM-powered behavioral documentation with persona findings.
1679
+ * Used by: bellwether explore
1680
+ */
1681
+ //# sourceMappingURL=contract.js.map