@dotsetlabs/bellwether 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (403) hide show
  1. package/CHANGELOG.md +291 -0
  2. package/LICENSE +21 -0
  3. package/README.md +739 -0
  4. package/dist/auth/credentials.d.ts +64 -0
  5. package/dist/auth/credentials.js +218 -0
  6. package/dist/auth/index.d.ts +6 -0
  7. package/dist/auth/index.js +6 -0
  8. package/dist/auth/keychain.d.ts +64 -0
  9. package/dist/auth/keychain.js +268 -0
  10. package/dist/baseline/ab-testing.d.ts +80 -0
  11. package/dist/baseline/ab-testing.js +236 -0
  12. package/dist/baseline/ai-compatibility-scorer.d.ts +95 -0
  13. package/dist/baseline/ai-compatibility-scorer.js +606 -0
  14. package/dist/baseline/calibration.d.ts +77 -0
  15. package/dist/baseline/calibration.js +136 -0
  16. package/dist/baseline/category-matching.d.ts +85 -0
  17. package/dist/baseline/category-matching.js +289 -0
  18. package/dist/baseline/change-impact-analyzer.d.ts +98 -0
  19. package/dist/baseline/change-impact-analyzer.js +592 -0
  20. package/dist/baseline/comparator.d.ts +64 -0
  21. package/dist/baseline/comparator.js +916 -0
  22. package/dist/baseline/confidence.d.ts +55 -0
  23. package/dist/baseline/confidence.js +122 -0
  24. package/dist/baseline/converter.d.ts +61 -0
  25. package/dist/baseline/converter.js +585 -0
  26. package/dist/baseline/dependency-analyzer.d.ts +89 -0
  27. package/dist/baseline/dependency-analyzer.js +567 -0
  28. package/dist/baseline/deprecation-tracker.d.ts +133 -0
  29. package/dist/baseline/deprecation-tracker.js +322 -0
  30. package/dist/baseline/diff.d.ts +55 -0
  31. package/dist/baseline/diff.js +1584 -0
  32. package/dist/baseline/documentation-scorer.d.ts +205 -0
  33. package/dist/baseline/documentation-scorer.js +466 -0
  34. package/dist/baseline/embeddings.d.ts +118 -0
  35. package/dist/baseline/embeddings.js +251 -0
  36. package/dist/baseline/error-analyzer.d.ts +198 -0
  37. package/dist/baseline/error-analyzer.js +721 -0
  38. package/dist/baseline/evaluation/evaluator.d.ts +42 -0
  39. package/dist/baseline/evaluation/evaluator.js +323 -0
  40. package/dist/baseline/evaluation/expanded-dataset.d.ts +45 -0
  41. package/dist/baseline/evaluation/expanded-dataset.js +1164 -0
  42. package/dist/baseline/evaluation/golden-dataset.d.ts +58 -0
  43. package/dist/baseline/evaluation/golden-dataset.js +717 -0
  44. package/dist/baseline/evaluation/index.d.ts +15 -0
  45. package/dist/baseline/evaluation/index.js +15 -0
  46. package/dist/baseline/evaluation/types.d.ts +186 -0
  47. package/dist/baseline/evaluation/types.js +8 -0
  48. package/dist/baseline/external-dependency-detector.d.ts +181 -0
  49. package/dist/baseline/external-dependency-detector.js +524 -0
  50. package/dist/baseline/golden-output.d.ts +162 -0
  51. package/dist/baseline/golden-output.js +636 -0
  52. package/dist/baseline/health-scorer.d.ts +174 -0
  53. package/dist/baseline/health-scorer.js +451 -0
  54. package/dist/baseline/incremental-checker.d.ts +97 -0
  55. package/dist/baseline/incremental-checker.js +174 -0
  56. package/dist/baseline/index.d.ts +31 -0
  57. package/dist/baseline/index.js +42 -0
  58. package/dist/baseline/migration-generator.d.ts +137 -0
  59. package/dist/baseline/migration-generator.js +554 -0
  60. package/dist/baseline/migrations.d.ts +60 -0
  61. package/dist/baseline/migrations.js +197 -0
  62. package/dist/baseline/performance-tracker.d.ts +214 -0
  63. package/dist/baseline/performance-tracker.js +577 -0
  64. package/dist/baseline/pr-comment-generator.d.ts +117 -0
  65. package/dist/baseline/pr-comment-generator.js +546 -0
  66. package/dist/baseline/response-fingerprint.d.ts +127 -0
  67. package/dist/baseline/response-fingerprint.js +728 -0
  68. package/dist/baseline/response-schema-tracker.d.ts +129 -0
  69. package/dist/baseline/response-schema-tracker.js +420 -0
  70. package/dist/baseline/risk-scorer.d.ts +54 -0
  71. package/dist/baseline/risk-scorer.js +434 -0
  72. package/dist/baseline/saver.d.ts +89 -0
  73. package/dist/baseline/saver.js +554 -0
  74. package/dist/baseline/scenario-generator.d.ts +151 -0
  75. package/dist/baseline/scenario-generator.js +905 -0
  76. package/dist/baseline/schema-compare.d.ts +86 -0
  77. package/dist/baseline/schema-compare.js +557 -0
  78. package/dist/baseline/schema-evolution.d.ts +189 -0
  79. package/dist/baseline/schema-evolution.js +467 -0
  80. package/dist/baseline/semantic.d.ts +203 -0
  81. package/dist/baseline/semantic.js +908 -0
  82. package/dist/baseline/synonyms.d.ts +60 -0
  83. package/dist/baseline/synonyms.js +386 -0
  84. package/dist/baseline/telemetry.d.ts +165 -0
  85. package/dist/baseline/telemetry.js +294 -0
  86. package/dist/baseline/test-pruner.d.ts +120 -0
  87. package/dist/baseline/test-pruner.js +387 -0
  88. package/dist/baseline/types.d.ts +449 -0
  89. package/dist/baseline/types.js +5 -0
  90. package/dist/baseline/version.d.ts +138 -0
  91. package/dist/baseline/version.js +206 -0
  92. package/dist/cache/index.d.ts +5 -0
  93. package/dist/cache/index.js +5 -0
  94. package/dist/cache/response-cache.d.ts +151 -0
  95. package/dist/cache/response-cache.js +287 -0
  96. package/dist/ci/index.d.ts +60 -0
  97. package/dist/ci/index.js +342 -0
  98. package/dist/cli/commands/auth.d.ts +12 -0
  99. package/dist/cli/commands/auth.js +352 -0
  100. package/dist/cli/commands/badge.d.ts +3 -0
  101. package/dist/cli/commands/badge.js +74 -0
  102. package/dist/cli/commands/baseline-accept.d.ts +15 -0
  103. package/dist/cli/commands/baseline-accept.js +178 -0
  104. package/dist/cli/commands/baseline-migrate.d.ts +12 -0
  105. package/dist/cli/commands/baseline-migrate.js +164 -0
  106. package/dist/cli/commands/baseline.d.ts +14 -0
  107. package/dist/cli/commands/baseline.js +449 -0
  108. package/dist/cli/commands/beta.d.ts +10 -0
  109. package/dist/cli/commands/beta.js +231 -0
  110. package/dist/cli/commands/check.d.ts +11 -0
  111. package/dist/cli/commands/check.js +820 -0
  112. package/dist/cli/commands/cloud/badge.d.ts +3 -0
  113. package/dist/cli/commands/cloud/badge.js +74 -0
  114. package/dist/cli/commands/cloud/diff.d.ts +6 -0
  115. package/dist/cli/commands/cloud/diff.js +79 -0
  116. package/dist/cli/commands/cloud/history.d.ts +6 -0
  117. package/dist/cli/commands/cloud/history.js +102 -0
  118. package/dist/cli/commands/cloud/link.d.ts +9 -0
  119. package/dist/cli/commands/cloud/link.js +119 -0
  120. package/dist/cli/commands/cloud/login.d.ts +7 -0
  121. package/dist/cli/commands/cloud/login.js +499 -0
  122. package/dist/cli/commands/cloud/projects.d.ts +6 -0
  123. package/dist/cli/commands/cloud/projects.js +44 -0
  124. package/dist/cli/commands/cloud/shared.d.ts +7 -0
  125. package/dist/cli/commands/cloud/shared.js +42 -0
  126. package/dist/cli/commands/cloud/teams.d.ts +8 -0
  127. package/dist/cli/commands/cloud/teams.js +169 -0
  128. package/dist/cli/commands/cloud/upload.d.ts +8 -0
  129. package/dist/cli/commands/cloud/upload.js +181 -0
  130. package/dist/cli/commands/contract.d.ts +11 -0
  131. package/dist/cli/commands/contract.js +280 -0
  132. package/dist/cli/commands/discover.d.ts +3 -0
  133. package/dist/cli/commands/discover.js +82 -0
  134. package/dist/cli/commands/eval.d.ts +9 -0
  135. package/dist/cli/commands/eval.js +187 -0
  136. package/dist/cli/commands/explore.d.ts +11 -0
  137. package/dist/cli/commands/explore.js +437 -0
  138. package/dist/cli/commands/feedback.d.ts +9 -0
  139. package/dist/cli/commands/feedback.js +174 -0
  140. package/dist/cli/commands/golden.d.ts +12 -0
  141. package/dist/cli/commands/golden.js +407 -0
  142. package/dist/cli/commands/history.d.ts +10 -0
  143. package/dist/cli/commands/history.js +202 -0
  144. package/dist/cli/commands/init.d.ts +9 -0
  145. package/dist/cli/commands/init.js +219 -0
  146. package/dist/cli/commands/interview.d.ts +3 -0
  147. package/dist/cli/commands/interview.js +903 -0
  148. package/dist/cli/commands/link.d.ts +10 -0
  149. package/dist/cli/commands/link.js +169 -0
  150. package/dist/cli/commands/login.d.ts +7 -0
  151. package/dist/cli/commands/login.js +499 -0
  152. package/dist/cli/commands/preset.d.ts +33 -0
  153. package/dist/cli/commands/preset.js +297 -0
  154. package/dist/cli/commands/profile.d.ts +33 -0
  155. package/dist/cli/commands/profile.js +286 -0
  156. package/dist/cli/commands/registry.d.ts +11 -0
  157. package/dist/cli/commands/registry.js +146 -0
  158. package/dist/cli/commands/shared.d.ts +79 -0
  159. package/dist/cli/commands/shared.js +196 -0
  160. package/dist/cli/commands/teams.d.ts +8 -0
  161. package/dist/cli/commands/teams.js +169 -0
  162. package/dist/cli/commands/test.d.ts +9 -0
  163. package/dist/cli/commands/test.js +500 -0
  164. package/dist/cli/commands/upload.d.ts +8 -0
  165. package/dist/cli/commands/upload.js +223 -0
  166. package/dist/cli/commands/validate-config.d.ts +6 -0
  167. package/dist/cli/commands/validate-config.js +35 -0
  168. package/dist/cli/commands/verify.d.ts +11 -0
  169. package/dist/cli/commands/verify.js +283 -0
  170. package/dist/cli/commands/watch.d.ts +12 -0
  171. package/dist/cli/commands/watch.js +253 -0
  172. package/dist/cli/index.d.ts +3 -0
  173. package/dist/cli/index.js +178 -0
  174. package/dist/cli/interactive.d.ts +47 -0
  175. package/dist/cli/interactive.js +216 -0
  176. package/dist/cli/output/terminal-reporter.d.ts +19 -0
  177. package/dist/cli/output/terminal-reporter.js +104 -0
  178. package/dist/cli/output.d.ts +226 -0
  179. package/dist/cli/output.js +438 -0
  180. package/dist/cli/utils/env.d.ts +5 -0
  181. package/dist/cli/utils/env.js +14 -0
  182. package/dist/cli/utils/progress.d.ts +59 -0
  183. package/dist/cli/utils/progress.js +206 -0
  184. package/dist/cli/utils/server-context.d.ts +10 -0
  185. package/dist/cli/utils/server-context.js +36 -0
  186. package/dist/cloud/auth.d.ts +144 -0
  187. package/dist/cloud/auth.js +374 -0
  188. package/dist/cloud/client.d.ts +24 -0
  189. package/dist/cloud/client.js +65 -0
  190. package/dist/cloud/http-client.d.ts +38 -0
  191. package/dist/cloud/http-client.js +215 -0
  192. package/dist/cloud/index.d.ts +23 -0
  193. package/dist/cloud/index.js +25 -0
  194. package/dist/cloud/mock-client.d.ts +107 -0
  195. package/dist/cloud/mock-client.js +545 -0
  196. package/dist/cloud/types.d.ts +515 -0
  197. package/dist/cloud/types.js +15 -0
  198. package/dist/config/defaults.d.ts +160 -0
  199. package/dist/config/defaults.js +169 -0
  200. package/dist/config/loader.d.ts +24 -0
  201. package/dist/config/loader.js +122 -0
  202. package/dist/config/template.d.ts +42 -0
  203. package/dist/config/template.js +647 -0
  204. package/dist/config/validator.d.ts +2112 -0
  205. package/dist/config/validator.js +658 -0
  206. package/dist/constants/cloud.d.ts +107 -0
  207. package/dist/constants/cloud.js +110 -0
  208. package/dist/constants/core.d.ts +521 -0
  209. package/dist/constants/core.js +556 -0
  210. package/dist/constants/testing.d.ts +1283 -0
  211. package/dist/constants/testing.js +1568 -0
  212. package/dist/constants.d.ts +10 -0
  213. package/dist/constants.js +10 -0
  214. package/dist/contract/index.d.ts +6 -0
  215. package/dist/contract/index.js +5 -0
  216. package/dist/contract/validator.d.ts +177 -0
  217. package/dist/contract/validator.js +574 -0
  218. package/dist/cost/index.d.ts +6 -0
  219. package/dist/cost/index.js +5 -0
  220. package/dist/cost/tracker.d.ts +134 -0
  221. package/dist/cost/tracker.js +313 -0
  222. package/dist/discovery/discovery.d.ts +16 -0
  223. package/dist/discovery/discovery.js +173 -0
  224. package/dist/discovery/types.d.ts +51 -0
  225. package/dist/discovery/types.js +2 -0
  226. package/dist/docs/agents.d.ts +3 -0
  227. package/dist/docs/agents.js +995 -0
  228. package/dist/docs/contract.d.ts +51 -0
  229. package/dist/docs/contract.js +1681 -0
  230. package/dist/docs/generator.d.ts +4 -0
  231. package/dist/docs/generator.js +4 -0
  232. package/dist/docs/html-reporter.d.ts +9 -0
  233. package/dist/docs/html-reporter.js +757 -0
  234. package/dist/docs/index.d.ts +10 -0
  235. package/dist/docs/index.js +11 -0
  236. package/dist/docs/junit-reporter.d.ts +18 -0
  237. package/dist/docs/junit-reporter.js +210 -0
  238. package/dist/docs/report.d.ts +14 -0
  239. package/dist/docs/report.js +44 -0
  240. package/dist/docs/sarif-reporter.d.ts +19 -0
  241. package/dist/docs/sarif-reporter.js +335 -0
  242. package/dist/docs/shared.d.ts +35 -0
  243. package/dist/docs/shared.js +162 -0
  244. package/dist/docs/templates.d.ts +12 -0
  245. package/dist/docs/templates.js +76 -0
  246. package/dist/errors/index.d.ts +6 -0
  247. package/dist/errors/index.js +6 -0
  248. package/dist/errors/retry.d.ts +92 -0
  249. package/dist/errors/retry.js +323 -0
  250. package/dist/errors/types.d.ts +321 -0
  251. package/dist/errors/types.js +584 -0
  252. package/dist/index.d.ts +32 -0
  253. package/dist/index.js +32 -0
  254. package/dist/interview/dependency-resolver.d.ts +11 -0
  255. package/dist/interview/dependency-resolver.js +32 -0
  256. package/dist/interview/interviewer.d.ts +232 -0
  257. package/dist/interview/interviewer.js +1939 -0
  258. package/dist/interview/mock-response-generator.d.ts +7 -0
  259. package/dist/interview/mock-response-generator.js +102 -0
  260. package/dist/interview/orchestrator.d.ts +237 -0
  261. package/dist/interview/orchestrator.js +1296 -0
  262. package/dist/interview/rate-limiter.d.ts +15 -0
  263. package/dist/interview/rate-limiter.js +55 -0
  264. package/dist/interview/response-validator.d.ts +10 -0
  265. package/dist/interview/response-validator.js +132 -0
  266. package/dist/interview/schema-inferrer.d.ts +8 -0
  267. package/dist/interview/schema-inferrer.js +71 -0
  268. package/dist/interview/schema-test-generator.d.ts +71 -0
  269. package/dist/interview/schema-test-generator.js +834 -0
  270. package/dist/interview/smart-value-generator.d.ts +155 -0
  271. package/dist/interview/smart-value-generator.js +554 -0
  272. package/dist/interview/stateful-test-runner.d.ts +19 -0
  273. package/dist/interview/stateful-test-runner.js +106 -0
  274. package/dist/interview/types.d.ts +561 -0
  275. package/dist/interview/types.js +2 -0
  276. package/dist/llm/anthropic.d.ts +41 -0
  277. package/dist/llm/anthropic.js +355 -0
  278. package/dist/llm/client.d.ts +123 -0
  279. package/dist/llm/client.js +42 -0
  280. package/dist/llm/factory.d.ts +38 -0
  281. package/dist/llm/factory.js +145 -0
  282. package/dist/llm/fallback.d.ts +140 -0
  283. package/dist/llm/fallback.js +379 -0
  284. package/dist/llm/index.d.ts +18 -0
  285. package/dist/llm/index.js +15 -0
  286. package/dist/llm/ollama.d.ts +37 -0
  287. package/dist/llm/ollama.js +330 -0
  288. package/dist/llm/openai.d.ts +25 -0
  289. package/dist/llm/openai.js +320 -0
  290. package/dist/llm/token-budget.d.ts +161 -0
  291. package/dist/llm/token-budget.js +395 -0
  292. package/dist/logging/logger.d.ts +70 -0
  293. package/dist/logging/logger.js +130 -0
  294. package/dist/metrics/collector.d.ts +106 -0
  295. package/dist/metrics/collector.js +547 -0
  296. package/dist/metrics/index.d.ts +7 -0
  297. package/dist/metrics/index.js +7 -0
  298. package/dist/metrics/prometheus.d.ts +20 -0
  299. package/dist/metrics/prometheus.js +241 -0
  300. package/dist/metrics/types.d.ts +209 -0
  301. package/dist/metrics/types.js +5 -0
  302. package/dist/persona/builtins.d.ts +54 -0
  303. package/dist/persona/builtins.js +219 -0
  304. package/dist/persona/index.d.ts +8 -0
  305. package/dist/persona/index.js +8 -0
  306. package/dist/persona/loader.d.ts +30 -0
  307. package/dist/persona/loader.js +190 -0
  308. package/dist/persona/types.d.ts +144 -0
  309. package/dist/persona/types.js +5 -0
  310. package/dist/persona/validation.d.ts +94 -0
  311. package/dist/persona/validation.js +332 -0
  312. package/dist/prompts/index.d.ts +5 -0
  313. package/dist/prompts/index.js +5 -0
  314. package/dist/prompts/templates.d.ts +180 -0
  315. package/dist/prompts/templates.js +431 -0
  316. package/dist/registry/client.d.ts +49 -0
  317. package/dist/registry/client.js +191 -0
  318. package/dist/registry/index.d.ts +7 -0
  319. package/dist/registry/index.js +6 -0
  320. package/dist/registry/types.d.ts +140 -0
  321. package/dist/registry/types.js +6 -0
  322. package/dist/scenarios/evaluator.d.ts +43 -0
  323. package/dist/scenarios/evaluator.js +206 -0
  324. package/dist/scenarios/index.d.ts +10 -0
  325. package/dist/scenarios/index.js +9 -0
  326. package/dist/scenarios/loader.d.ts +20 -0
  327. package/dist/scenarios/loader.js +285 -0
  328. package/dist/scenarios/types.d.ts +153 -0
  329. package/dist/scenarios/types.js +8 -0
  330. package/dist/security/index.d.ts +17 -0
  331. package/dist/security/index.js +18 -0
  332. package/dist/security/payloads.d.ts +61 -0
  333. package/dist/security/payloads.js +268 -0
  334. package/dist/security/security-tester.d.ts +42 -0
  335. package/dist/security/security-tester.js +582 -0
  336. package/dist/security/types.d.ts +166 -0
  337. package/dist/security/types.js +8 -0
  338. package/dist/transport/base-transport.d.ts +59 -0
  339. package/dist/transport/base-transport.js +38 -0
  340. package/dist/transport/http-transport.d.ts +67 -0
  341. package/dist/transport/http-transport.js +238 -0
  342. package/dist/transport/mcp-client.d.ts +141 -0
  343. package/dist/transport/mcp-client.js +496 -0
  344. package/dist/transport/sse-transport.d.ts +88 -0
  345. package/dist/transport/sse-transport.js +316 -0
  346. package/dist/transport/stdio-transport.d.ts +43 -0
  347. package/dist/transport/stdio-transport.js +238 -0
  348. package/dist/transport/types.d.ts +125 -0
  349. package/dist/transport/types.js +16 -0
  350. package/dist/utils/concurrency.d.ts +123 -0
  351. package/dist/utils/concurrency.js +213 -0
  352. package/dist/utils/formatters.d.ts +16 -0
  353. package/dist/utils/formatters.js +37 -0
  354. package/dist/utils/index.d.ts +8 -0
  355. package/dist/utils/index.js +8 -0
  356. package/dist/utils/jsonpath.d.ts +87 -0
  357. package/dist/utils/jsonpath.js +326 -0
  358. package/dist/utils/markdown.d.ts +113 -0
  359. package/dist/utils/markdown.js +265 -0
  360. package/dist/utils/network.d.ts +14 -0
  361. package/dist/utils/network.js +17 -0
  362. package/dist/utils/sanitize.d.ts +92 -0
  363. package/dist/utils/sanitize.js +191 -0
  364. package/dist/utils/semantic.d.ts +194 -0
  365. package/dist/utils/semantic.js +1051 -0
  366. package/dist/utils/smart-truncate.d.ts +94 -0
  367. package/dist/utils/smart-truncate.js +361 -0
  368. package/dist/utils/timeout.d.ts +153 -0
  369. package/dist/utils/timeout.js +205 -0
  370. package/dist/utils/yaml-parser.d.ts +58 -0
  371. package/dist/utils/yaml-parser.js +86 -0
  372. package/dist/validation/index.d.ts +32 -0
  373. package/dist/validation/index.js +32 -0
  374. package/dist/validation/semantic-test-generator.d.ts +50 -0
  375. package/dist/validation/semantic-test-generator.js +176 -0
  376. package/dist/validation/semantic-types.d.ts +66 -0
  377. package/dist/validation/semantic-types.js +94 -0
  378. package/dist/validation/semantic-validator.d.ts +38 -0
  379. package/dist/validation/semantic-validator.js +340 -0
  380. package/dist/verification/index.d.ts +6 -0
  381. package/dist/verification/index.js +5 -0
  382. package/dist/verification/types.d.ts +133 -0
  383. package/dist/verification/types.js +5 -0
  384. package/dist/verification/verifier.d.ts +30 -0
  385. package/dist/verification/verifier.js +309 -0
  386. package/dist/version.d.ts +19 -0
  387. package/dist/version.js +48 -0
  388. package/dist/workflow/auto-generator.d.ts +27 -0
  389. package/dist/workflow/auto-generator.js +513 -0
  390. package/dist/workflow/discovery.d.ts +40 -0
  391. package/dist/workflow/discovery.js +195 -0
  392. package/dist/workflow/executor.d.ts +82 -0
  393. package/dist/workflow/executor.js +611 -0
  394. package/dist/workflow/index.d.ts +10 -0
  395. package/dist/workflow/index.js +10 -0
  396. package/dist/workflow/loader.d.ts +24 -0
  397. package/dist/workflow/loader.js +194 -0
  398. package/dist/workflow/state-tracker.d.ts +98 -0
  399. package/dist/workflow/state-tracker.js +424 -0
  400. package/dist/workflow/types.d.ts +337 -0
  401. package/dist/workflow/types.js +5 -0
  402. package/package.json +94 -0
  403. package/schemas/bellwether-check.schema.json +651 -0
@@ -0,0 +1,995 @@
1
+ import { formatDateISO, formatDuration, escapeTableCell, mermaidLabel, validateJsonForCodeBlock } from '../utils/index.js';
2
+ import { DISPLAY_LIMITS, MATH_FACTORS } from '../constants.js';
3
+ import { calculatePerformanceMetrics, extractParameters, looksLikeError } from './shared.js';
4
+ /**
5
+ * Detect configuration issues based on error patterns.
6
+ * Returns a warning message if issues detected, null otherwise.
7
+ */
8
+ function detectConfigurationIssues(profiles, metadata) {
9
+ if (metadata.toolCallCount === 0)
10
+ return null;
11
+ const errorRate = metadata.errorCount / metadata.toolCallCount;
12
+ if (errorRate < 0.7)
13
+ return null; // Less than 70% errors is probably fine
14
+ // Look for access-related errors in interactions
15
+ let accessErrors = 0;
16
+ let totalErrors = 0;
17
+ for (const profile of profiles) {
18
+ for (const interaction of profile.interactions) {
19
+ // Get response text content
20
+ const textContent = interaction.response?.content?.find(c => c.type === 'text');
21
+ const responseText = textContent && 'text' in textContent ? String(textContent.text) : '';
22
+ const errorText = interaction.error || '';
23
+ const combined = errorText + responseText;
24
+ // Count as error if flagged as error OR if content looks like an error
25
+ const isError = interaction.error || interaction.response?.isError || looksLikeError(responseText);
26
+ if (isError) {
27
+ totalErrors++;
28
+ if (/access denied|outside.*(allowed|predefined)|not allowed|permission denied|cannot.*(access|read|write|create|list).*outside|restricted to.*(predefined|allowed)/i.test(combined)) {
29
+ accessErrors++;
30
+ }
31
+ }
32
+ }
33
+ }
34
+ // If we found access-related errors, show warning
35
+ if (totalErrors > 0 && accessErrors / totalErrors > 0.5) {
36
+ return 'Most tool calls failed with access-related errors. The server may not have been configured with allowed directories. ' +
37
+ 'For filesystem servers, try: `bellwether check npx @modelcontextprotocol/server-filesystem /path/to/allowed/dir`';
38
+ }
39
+ // Also check synthesized limitations for access patterns (fallback)
40
+ let accessRelatedLimitations = 0;
41
+ let totalLimitations = 0;
42
+ for (const profile of profiles) {
43
+ for (const limitation of profile.limitations) {
44
+ totalLimitations++;
45
+ if (/outside.*(allowed|predefined)|restricted|access.*(denied|control)|cannot.*(access|read|write)/i.test(limitation)) {
46
+ accessRelatedLimitations++;
47
+ }
48
+ }
49
+ }
50
+ // If most tools have access-related limitations and high error rate, show warning
51
+ if (totalLimitations > 0 && accessRelatedLimitations / totalLimitations > 0.5 && errorRate > 0.8) {
52
+ return 'Most tool calls failed, likely due to missing allowed directories configuration. ' +
53
+ 'For filesystem servers, try: `bellwether check npx @modelcontextprotocol/server-filesystem /path/to/allowed/dir`';
54
+ }
55
+ return null;
56
+ }
57
+ export function generateAgentsMd(result) {
58
+ const lines = [];
59
+ const { discovery, toolProfiles, summary, limitations, recommendations, metadata } = result;
60
+ // Header
61
+ lines.push(`# ${discovery.serverInfo.name}`);
62
+ lines.push('');
63
+ lines.push(`> Generated by [Bellwether](https://github.com/dotsetlabs/bellwether) on ${formatDateISO(metadata.startTime)}`);
64
+ lines.push('');
65
+ // Check for configuration issues (high error rate with access-related errors)
66
+ const configWarning = detectConfigurationIssues(toolProfiles, metadata);
67
+ if (configWarning) {
68
+ lines.push('> ⚠️ **Configuration Issue Detected**');
69
+ lines.push('>');
70
+ lines.push(`> ${configWarning}`);
71
+ lines.push('');
72
+ }
73
+ // Overview
74
+ lines.push('## Overview');
75
+ lines.push('');
76
+ lines.push(summary);
77
+ lines.push('');
78
+ lines.push(`**Server Version:** ${discovery.serverInfo.version}`);
79
+ lines.push(`**Protocol Version:** ${discovery.protocolVersion}`);
80
+ // Show personas used
81
+ if (metadata.personas && metadata.personas.length > 0) {
82
+ const personaNames = metadata.personas.map(p => p.name).join(', ');
83
+ lines.push(`**Interview Personas:** ${personaNames}`);
84
+ }
85
+ lines.push('');
86
+ // Capabilities summary
87
+ lines.push('## Capabilities');
88
+ lines.push('');
89
+ if (discovery.capabilities.tools) {
90
+ lines.push(`- **Tools:** ${discovery.tools.length} available`);
91
+ }
92
+ if (discovery.capabilities.prompts) {
93
+ lines.push(`- **Prompts:** ${discovery.prompts.length} available`);
94
+ }
95
+ if (discovery.capabilities.resources) {
96
+ lines.push(`- **Resources:** ${(discovery.resources ?? []).length} available`);
97
+ }
98
+ if (discovery.capabilities.logging) {
99
+ lines.push('- **Logging:** Supported');
100
+ }
101
+ lines.push('');
102
+ // Extract common constraints across all tools
103
+ const { common: commonConstraints, byTool: toolSpecificConstraints } = extractCommonConstraints(toolProfiles);
104
+ // Quick Reference section
105
+ if (toolProfiles.length > 0) {
106
+ lines.push(...generateQuickReference(discovery.tools, toolProfiles));
107
+ }
108
+ // Global Constraints section (common restrictions)
109
+ if (commonConstraints.length > 0) {
110
+ lines.push(...generateGlobalConstraintsSection(commonConstraints));
111
+ }
112
+ // Security Considerations section (aggregated)
113
+ const securitySection = generateSecuritySection(toolProfiles);
114
+ if (securitySection.length > 0) {
115
+ lines.push(...securitySection);
116
+ }
117
+ // Performance section
118
+ const performanceSection = generatePerformanceSection(toolProfiles);
119
+ if (performanceSection.length > 0) {
120
+ lines.push(...performanceSection);
121
+ }
122
+ // Tools section
123
+ if (toolProfiles.length > 0) {
124
+ lines.push('## Tools');
125
+ lines.push('');
126
+ for (const profile of toolProfiles) {
127
+ lines.push(`### ${profile.name}`);
128
+ lines.push('');
129
+ lines.push(profile.description);
130
+ lines.push('');
131
+ // Find the original tool to get schema
132
+ const tool = discovery.tools.find(t => t.name === profile.name);
133
+ if (tool?.inputSchema) {
134
+ lines.push('**Input Schema:**');
135
+ // Validate JSON and escape for code block
136
+ const schemaJson = validateJsonForCodeBlock(tool.inputSchema);
137
+ lines.push('```json');
138
+ lines.push(schemaJson.content);
139
+ lines.push('```');
140
+ lines.push('');
141
+ }
142
+ // Add sample response if we have successful calls
143
+ const sampleResponse = generateSampleResponse(profile);
144
+ if (sampleResponse.length > 0) {
145
+ lines.push(...sampleResponse);
146
+ }
147
+ if (profile.behavioralNotes.length > 0) {
148
+ lines.push('**Observed Behavior:**');
149
+ for (const note of profile.behavioralNotes) {
150
+ lines.push(`- ${note}`);
151
+ }
152
+ lines.push('');
153
+ }
154
+ // Only show tool-specific limitations (common ones are in Global Constraints)
155
+ const specificLimitations = toolSpecificConstraints.get(profile.name) ?? [];
156
+ if (specificLimitations.length > 0) {
157
+ lines.push('**Limitations:**');
158
+ for (const limitation of specificLimitations) {
159
+ lines.push(`- ${limitation}`);
160
+ }
161
+ lines.push('');
162
+ }
163
+ if (profile.securityNotes.length > 0) {
164
+ lines.push('**Security Considerations:**');
165
+ for (const note of profile.securityNotes) {
166
+ lines.push(`- ${note}`);
167
+ }
168
+ lines.push('');
169
+ }
170
+ // Findings by persona (if multiple personas used)
171
+ if (profile.findingsByPersona && profile.findingsByPersona.length > 1) {
172
+ lines.push('<details>');
173
+ lines.push('<summary><strong>Findings by Persona</strong></summary>');
174
+ lines.push('');
175
+ for (const personaFindings of profile.findingsByPersona) {
176
+ lines.push(`#### ${personaFindings.personaName}`);
177
+ lines.push('');
178
+ if (personaFindings.behavioralNotes.length > 0) {
179
+ for (const note of personaFindings.behavioralNotes) {
180
+ lines.push(`- ${note}`);
181
+ }
182
+ }
183
+ if (personaFindings.limitations.length > 0) {
184
+ lines.push('');
185
+ lines.push('*Limitations:*');
186
+ for (const limitation of personaFindings.limitations) {
187
+ lines.push(`- ${limitation}`);
188
+ }
189
+ }
190
+ if (personaFindings.securityNotes.length > 0) {
191
+ lines.push('');
192
+ lines.push('*Security:*');
193
+ for (const note of personaFindings.securityNotes) {
194
+ lines.push(`- ${note}`);
195
+ }
196
+ }
197
+ lines.push('');
198
+ }
199
+ lines.push('</details>');
200
+ lines.push('');
201
+ }
202
+ }
203
+ }
204
+ // Common Workflows section (summarized view of successful workflows)
205
+ if (result.workflowResults && result.workflowResults.length > 0) {
206
+ const successfulWorkflows = result.workflowResults.filter(wr => wr.success);
207
+ if (successfulWorkflows.length > 0) {
208
+ lines.push('## Common Workflows');
209
+ lines.push('');
210
+ lines.push('These workflows demonstrate recommended patterns for using tools together:');
211
+ lines.push('');
212
+ for (const wr of successfulWorkflows) {
213
+ const toolSequence = wr.workflow.steps.map(s => `\`${s.tool}\``).join(' → ');
214
+ lines.push(`### ${wr.workflow.name}`);
215
+ lines.push('');
216
+ lines.push(wr.workflow.description);
217
+ lines.push('');
218
+ lines.push(`**Tool Sequence:** ${toolSequence}`);
219
+ lines.push('');
220
+ // Generate mermaid workflow diagram
221
+ lines.push('```mermaid');
222
+ lines.push('flowchart LR');
223
+ for (let i = 0; i < wr.workflow.steps.length; i++) {
224
+ const step = wr.workflow.steps[i];
225
+ const nodeId = `S${i}`;
226
+ const nextNodeId = `S${i + 1}`;
227
+ // Use mermaidLabel to safely escape tool names
228
+ if (i === 0) {
229
+ lines.push(` ${nodeId}[${mermaidLabel(step.tool)}]`);
230
+ }
231
+ if (i < wr.workflow.steps.length - 1) {
232
+ const nextStep = wr.workflow.steps[i + 1];
233
+ lines.push(` ${nodeId} --> ${nextNodeId}[${mermaidLabel(nextStep.tool)}]`);
234
+ }
235
+ }
236
+ lines.push('```');
237
+ lines.push('');
238
+ }
239
+ }
240
+ }
241
+ // Detailed Workflows section (with full test results)
242
+ if (result.workflowResults && result.workflowResults.length > 0) {
243
+ lines.push('## Workflow Test Results');
244
+ lines.push('');
245
+ lines.push('The following workflows were tested to verify tool chaining behavior:');
246
+ lines.push('');
247
+ for (const workflowResult of result.workflowResults) {
248
+ const statusIcon = workflowResult.success ? '✅' : '❌';
249
+ lines.push(`### ${statusIcon} ${workflowResult.workflow.name}`);
250
+ lines.push('');
251
+ lines.push(workflowResult.workflow.description);
252
+ lines.push('');
253
+ if (workflowResult.summary) {
254
+ lines.push(`> ${workflowResult.summary}`);
255
+ lines.push('');
256
+ }
257
+ // Show steps
258
+ lines.push('**Steps:**');
259
+ lines.push('');
260
+ for (let i = 0; i < workflowResult.steps.length; i++) {
261
+ const stepResult = workflowResult.steps[i];
262
+ const stepIcon = stepResult.success ? '✓' : '✗';
263
+ lines.push(`${i + 1}. ${stepIcon} **${stepResult.step.tool}**: ${stepResult.step.description}`);
264
+ if (stepResult.analysis) {
265
+ lines.push(` - ${stepResult.analysis}`);
266
+ }
267
+ if (stepResult.error) {
268
+ lines.push(` - ⚠️ Error: ${stepResult.error}`);
269
+ }
270
+ }
271
+ lines.push('');
272
+ // Show data flow diagram if present
273
+ if (workflowResult.dataFlow && workflowResult.dataFlow.length > 0) {
274
+ lines.push('**Data Flow:**');
275
+ lines.push('');
276
+ lines.push('```mermaid');
277
+ lines.push('flowchart LR');
278
+ // Add nodes first - escape tool names for Mermaid
279
+ for (let i = 0; i < workflowResult.steps.length; i++) {
280
+ const stepResult = workflowResult.steps[i];
281
+ const status = stepResult.success ? ':::success' : ':::failure';
282
+ lines.push(` Step${i}[${mermaidLabel(stepResult.step.tool)}]${status}`);
283
+ }
284
+ // Add edges - escape parameter names
285
+ for (const edge of workflowResult.dataFlow) {
286
+ const param = mermaidLabel(edge.targetParam).replace(/"/g, '');
287
+ lines.push(` Step${edge.fromStep} -->|${param}| Step${edge.toStep}`);
288
+ }
289
+ // Add styling
290
+ lines.push(' classDef success fill:#90EE90');
291
+ lines.push(' classDef failure fill:#FFB6C1');
292
+ lines.push('```');
293
+ lines.push('');
294
+ }
295
+ }
296
+ }
297
+ // Prompts section - use profiles if available, otherwise basic listing
298
+ if (result.promptProfiles && result.promptProfiles.length > 0) {
299
+ lines.push('## Prompts');
300
+ lines.push('');
301
+ lines.push('Prompts are reusable templates that generate structured messages for LLM interactions.');
302
+ lines.push('');
303
+ for (const profile of result.promptProfiles) {
304
+ lines.push(`### ${profile.name}`);
305
+ lines.push('');
306
+ lines.push(profile.description);
307
+ lines.push('');
308
+ if (profile.arguments.length > 0) {
309
+ lines.push('**Arguments:**');
310
+ for (const arg of profile.arguments) {
311
+ const required = arg.required ? ' (required)' : '';
312
+ lines.push(`- \`${arg.name}\`${required}: ${arg.description ?? 'No description'}`);
313
+ }
314
+ lines.push('');
315
+ }
316
+ if (profile.exampleOutput) {
317
+ lines.push('**Example Output:**');
318
+ lines.push('```');
319
+ lines.push(profile.exampleOutput.length > DISPLAY_LIMITS.DOCS_EXAMPLE_LENGTH
320
+ ? profile.exampleOutput.substring(0, DISPLAY_LIMITS.DOCS_EXAMPLE_LENGTH) + '...'
321
+ : profile.exampleOutput);
322
+ lines.push('```');
323
+ lines.push('');
324
+ }
325
+ if (profile.behavioralNotes.length > 0) {
326
+ lines.push('**Observed Behavior:**');
327
+ for (const note of profile.behavioralNotes) {
328
+ lines.push(`- ${note}`);
329
+ }
330
+ lines.push('');
331
+ }
332
+ if (profile.limitations.length > 0) {
333
+ lines.push('**Limitations:**');
334
+ for (const limitation of profile.limitations) {
335
+ lines.push(`- ${limitation}`);
336
+ }
337
+ lines.push('');
338
+ }
339
+ }
340
+ }
341
+ else if (discovery.prompts.length > 0) {
342
+ // Fallback to basic listing if no profiles
343
+ lines.push('## Prompts');
344
+ lines.push('');
345
+ for (const prompt of discovery.prompts) {
346
+ lines.push(`### ${prompt.name}`);
347
+ lines.push('');
348
+ if (prompt.description) {
349
+ lines.push(prompt.description);
350
+ lines.push('');
351
+ }
352
+ if (prompt.arguments && prompt.arguments.length > 0) {
353
+ lines.push('**Arguments:**');
354
+ for (const arg of prompt.arguments) {
355
+ const required = arg.required ? ' (required)' : '';
356
+ lines.push(`- \`${arg.name}\`${required}: ${arg.description ?? 'No description'}`);
357
+ }
358
+ lines.push('');
359
+ }
360
+ }
361
+ }
362
+ // Resources section - use profiles if available, otherwise basic listing
363
+ if (result.resourceProfiles && result.resourceProfiles.length > 0) {
364
+ lines.push('## Resources');
365
+ lines.push('');
366
+ lines.push('Resources are data sources exposed by the server that can be read by clients.');
367
+ lines.push('');
368
+ for (const profile of result.resourceProfiles) {
369
+ lines.push(`### ${profile.name}`);
370
+ lines.push('');
371
+ lines.push(`**URI:** \`${profile.uri}\``);
372
+ if (profile.mimeType) {
373
+ lines.push(`**MIME Type:** ${profile.mimeType}`);
374
+ }
375
+ lines.push('');
376
+ lines.push(profile.description);
377
+ lines.push('');
378
+ if (profile.contentPreview) {
379
+ lines.push('**Content Preview:**');
380
+ lines.push('```');
381
+ lines.push(profile.contentPreview);
382
+ lines.push('```');
383
+ lines.push('');
384
+ }
385
+ if (profile.behavioralNotes.length > 0) {
386
+ lines.push('**Observed Behavior:**');
387
+ for (const note of profile.behavioralNotes) {
388
+ lines.push(`- ${note}`);
389
+ }
390
+ lines.push('');
391
+ }
392
+ if (profile.limitations.length > 0) {
393
+ lines.push('**Limitations:**');
394
+ for (const limitation of profile.limitations) {
395
+ lines.push(`- ${limitation}`);
396
+ }
397
+ lines.push('');
398
+ }
399
+ }
400
+ }
401
+ else if ((discovery.resources ?? []).length > 0) {
402
+ // Fallback to basic listing if no profiles
403
+ lines.push('## Resources');
404
+ lines.push('');
405
+ lines.push('Resources are data sources exposed by the server that can be read by clients.');
406
+ lines.push('');
407
+ for (const resource of discovery.resources ?? []) {
408
+ lines.push(`### ${resource.name}`);
409
+ lines.push('');
410
+ lines.push(`**URI:** \`${resource.uri}\``);
411
+ if (resource.mimeType) {
412
+ lines.push(`**MIME Type:** ${resource.mimeType}`);
413
+ }
414
+ lines.push('');
415
+ if (resource.description) {
416
+ lines.push(resource.description);
417
+ lines.push('');
418
+ }
419
+ }
420
+ }
421
+ // Overall limitations
422
+ if (limitations.length > 0) {
423
+ lines.push('## Known Limitations');
424
+ lines.push('');
425
+ for (const limitation of limitations) {
426
+ lines.push(`- ${limitation}`);
427
+ }
428
+ lines.push('');
429
+ }
430
+ // Recommendations
431
+ if (recommendations.length > 0) {
432
+ lines.push('## Recommendations');
433
+ lines.push('');
434
+ for (const rec of recommendations) {
435
+ lines.push(`- ${rec}`);
436
+ }
437
+ lines.push('');
438
+ }
439
+ // Behavioral Matrix (tool × persona findings)
440
+ if (metadata.personas && metadata.personas.length > 1 && toolProfiles.length > 0) {
441
+ const matrixSection = generateBehavioralMatrix(toolProfiles, metadata.personas);
442
+ if (matrixSection.length > 0) {
443
+ lines.push(...matrixSection);
444
+ }
445
+ }
446
+ // Metadata footer
447
+ lines.push('---');
448
+ lines.push('');
449
+ // Check if check mode (no LLM, deterministic)
450
+ const isCheckMode = metadata.model === 'check';
451
+ // Basic stats - simpler for check mode
452
+ let statsLine;
453
+ if (isCheckMode) {
454
+ statsLine = `*Check analysis completed in ${formatDuration(metadata.durationMs)}.*`;
455
+ lines.push(statsLine);
456
+ }
457
+ else {
458
+ // Explore mode: show more details
459
+ statsLine = `*Interview completed in ${formatDuration(metadata.durationMs)} with ${metadata.toolCallCount} tool interactions`;
460
+ // Add persona breakdown if multiple personas
461
+ if (metadata.personas && metadata.personas.length > 1) {
462
+ statsLine += '.*';
463
+ lines.push(statsLine);
464
+ lines.push('');
465
+ lines.push('**Persona Breakdown:**');
466
+ for (const persona of metadata.personas) {
467
+ lines.push(`- ${persona.name}: ${persona.questionsAsked} questions, ${persona.toolCallCount} calls`);
468
+ }
469
+ }
470
+ else {
471
+ statsLine += '.*';
472
+ lines.push(statsLine);
473
+ }
474
+ }
475
+ return lines.join('\n');
476
+ }
477
+ /**
478
+ * Generate Quick Reference section with tool signatures.
479
+ */
480
+ function generateQuickReference(tools, profiles) {
481
+ const lines = [];
482
+ lines.push('## Quick Reference');
483
+ lines.push('');
484
+ lines.push('| Tool | Parameters | Returns |');
485
+ lines.push('|------|------------|---------|');
486
+ for (const tool of tools) {
487
+ const params = extractParameters(tool.inputSchema);
488
+ const profile = profiles.find(p => p.name === tool.name);
489
+ const returnType = inferReturnTypeDetailed(profile);
490
+ // Escape table cell content to prevent broken tables
491
+ lines.push(`| \`${escapeTableCell(tool.name)}\` | ${escapeTableCell(params)} | ${escapeTableCell(returnType)} |`);
492
+ }
493
+ lines.push('');
494
+ // Only add example section if we have at least one successful example
495
+ const successfulExamples = [];
496
+ for (const tool of tools) {
497
+ const profile = profiles.find(p => p.name === tool.name);
498
+ const example = generateExampleSnippet(tool, profile);
499
+ if (example) {
500
+ successfulExamples.push({ tool, example });
501
+ }
502
+ }
503
+ if (successfulExamples.length > 0) {
504
+ lines.push('### Example Usage');
505
+ lines.push('');
506
+ lines.push('*Examples shown are from successful tool calls during the interview.*');
507
+ lines.push('');
508
+ for (const { tool, example } of successfulExamples) {
509
+ lines.push(`#### ${tool.name}`);
510
+ lines.push('');
511
+ // Validate and escape JSON for code block
512
+ const jsonResult = validateJsonForCodeBlock(example);
513
+ lines.push('```json');
514
+ lines.push(jsonResult.content);
515
+ lines.push('```');
516
+ lines.push('');
517
+ }
518
+ }
519
+ return lines;
520
+ }
521
+ /**
522
+ * Infer return type from tool description when we can't observe actual responses.
523
+ */
524
+ function inferReturnTypeFromDescription(description) {
525
+ const patterns = [
526
+ [/returns?\s+(the\s+)?base64[\s-]?encoded/i, 'base64 data (expected)'],
527
+ [/returns?\s+(a\s+)?json/i, 'JSON (expected)'],
528
+ [/returns?\s+(the\s+)?contents?\s+of/i, 'file content (expected)'],
529
+ [/returns?\s+(a\s+)?list(ing)?\s+of/i, 'list (expected)'],
530
+ [/returns?\s+(the\s+)?tree\s+(view|structure)/i, 'tree structure (expected)'],
531
+ [/returns?\s+(detailed\s+)?metadata/i, 'metadata (expected)'],
532
+ [/returns?\s+full\s+paths?/i, 'file paths (expected)'],
533
+ [/returns?\s+(a\s+)?git-style\s+diff/i, 'diff output (expected)'],
534
+ [/returns?\s+(a\s+)?recursive\s+tree/i, 'tree structure (expected)'],
535
+ [/read\s+(the\s+)?(complete\s+)?contents/i, 'file content (expected)'],
536
+ [/get\s+(a\s+)?(detailed\s+)?listing/i, 'directory listing (expected)'],
537
+ [/retrieve\s+(detailed\s+)?metadata/i, 'metadata (expected)'],
538
+ [/search\s+(for\s+)?files/i, 'file paths (expected)'],
539
+ [/create\s+(a\s+)?new\s+(file|directory)/i, 'success/error status (expected)'],
540
+ [/move\s+or\s+rename/i, 'success/error status (expected)'],
541
+ ];
542
+ for (const [pattern, returnType] of patterns) {
543
+ if (pattern.test(description)) {
544
+ return returnType;
545
+ }
546
+ }
547
+ return null;
548
+ }
549
+ /**
550
+ * Infer return type with more detail for Quick Reference.
551
+ */
552
+ function inferReturnTypeDetailed(profile) {
553
+ if (!profile || profile.interactions.length === 0) {
554
+ return 'unknown';
555
+ }
556
+ // Look at successful interactions that don't have error-like content
557
+ const successful = profile.interactions.find(i => {
558
+ if (i.error || !i.response || i.response.isError)
559
+ return false;
560
+ const textContent = i.response.content?.find(c => c.type === 'text');
561
+ if (textContent && 'text' in textContent) {
562
+ if (looksLikeError(String(textContent.text)))
563
+ return false;
564
+ }
565
+ return true;
566
+ });
567
+ if (!successful || !successful.response) {
568
+ // No successful calls - try to infer from description
569
+ const inferred = inferReturnTypeFromDescription(profile.description);
570
+ if (inferred) {
571
+ return inferred;
572
+ }
573
+ return 'unknown';
574
+ }
575
+ const content = successful.response.content;
576
+ if (!content || content.length === 0) {
577
+ return 'empty response';
578
+ }
579
+ // Check content types
580
+ const types = new Set(content.map(c => c.type));
581
+ if (types.size === 1) {
582
+ const type = content[0].type;
583
+ if (type === 'text') {
584
+ const text = content[0].text || '';
585
+ // Try to identify JSON structure
586
+ if (text.startsWith('{')) {
587
+ try {
588
+ const parsed = JSON.parse(text);
589
+ const keys = Object.keys(parsed).slice(0, 3);
590
+ if (keys.length > 0) {
591
+ return `JSON object {${keys.join(', ')}${Object.keys(parsed).length > 3 ? ', ...' : ''}}`;
592
+ }
593
+ return 'JSON object';
594
+ }
595
+ catch {
596
+ return 'JSON-like text';
597
+ }
598
+ }
599
+ if (text.startsWith('[')) {
600
+ try {
601
+ const parsed = JSON.parse(text);
602
+ if (Array.isArray(parsed)) {
603
+ return `JSON array (${parsed.length} items)`;
604
+ }
605
+ }
606
+ catch {
607
+ return 'JSON-like text';
608
+ }
609
+ }
610
+ // Check for common patterns
611
+ if (text.includes('\n')) {
612
+ const lines = text.split('\n').length;
613
+ return `multi-line text (${lines} lines)`;
614
+ }
615
+ if (text.length > 100) {
616
+ return `text (${text.length} chars)`;
617
+ }
618
+ return 'text';
619
+ }
620
+ if (type === 'image') {
621
+ return 'base64 image';
622
+ }
623
+ return type;
624
+ }
625
+ return `mixed (${Array.from(types).join(', ')})`;
626
+ }
627
+ /**
628
+ * Generate example code snippet for a tool.
629
+ * Only returns examples from successful calls - returns null if no successful calls.
630
+ */
631
+ function generateExampleSnippet(tool, profile) {
632
+ // ONLY use successful interactions - never generate synthetic examples
633
+ if (!profile || profile.interactions.length === 0) {
634
+ return null;
635
+ }
636
+ // Find a successful interaction (not an error) that doesn't have error-like content
637
+ const successful = profile.interactions.find(i => {
638
+ if (i.error || !i.response || i.response.isError)
639
+ return false;
640
+ const textContent = i.response.content?.find(c => c.type === 'text');
641
+ if (textContent && 'text' in textContent) {
642
+ if (looksLikeError(String(textContent.text)))
643
+ return false;
644
+ }
645
+ return true;
646
+ });
647
+ if (!successful) {
648
+ // No successful calls - don't generate fake examples
649
+ return null;
650
+ }
651
+ return JSON.stringify({
652
+ tool: tool.name,
653
+ arguments: successful.question.args,
654
+ }, null, 2);
655
+ }
656
+ /**
657
+ * Generate sample response section for a tool if we have successful calls.
658
+ */
659
+ function generateSampleResponse(profile) {
660
+ const lines = [];
661
+ // Find a successful interaction with a response that doesn't look like an error
662
+ const successful = profile.interactions.find(i => {
663
+ if (i.error || !i.response || i.response.isError)
664
+ return false;
665
+ // Also check if the response content looks like an error
666
+ const textContent = i.response.content?.find(c => c.type === 'text');
667
+ if (textContent && 'text' in textContent) {
668
+ if (looksLikeError(String(textContent.text)))
669
+ return false;
670
+ }
671
+ return true;
672
+ });
673
+ if (!successful || !successful.response?.content) {
674
+ return lines;
675
+ }
676
+ const textContent = successful.response.content.find(c => c.type === 'text');
677
+ if (!textContent || !('text' in textContent)) {
678
+ return lines;
679
+ }
680
+ const text = String(textContent.text);
681
+ if (!text || text.length === 0) {
682
+ return lines;
683
+ }
684
+ // Truncate very long responses
685
+ const maxLength = 500;
686
+ let displayText = text;
687
+ let truncated = false;
688
+ if (text.length > maxLength) {
689
+ displayText = text.substring(0, maxLength);
690
+ truncated = true;
691
+ }
692
+ // Try to pretty-print JSON and validate/escape
693
+ const jsonResult = validateJsonForCodeBlock(displayText, {
694
+ maxLength: truncated ? maxLength : undefined,
695
+ truncationIndicator: ' ...',
696
+ });
697
+ lines.push('**Sample Response:**');
698
+ lines.push('```');
699
+ lines.push(jsonResult.content);
700
+ lines.push('```');
701
+ lines.push('');
702
+ return lines;
703
+ }
704
+ /**
705
+ * Extract common constraints that apply to multiple tools.
706
+ * Returns { common: string[], byTool: Map<string, string[]> }
707
+ */
708
+ function extractCommonConstraints(profiles) {
709
+ // Count how many tools have each constraint (normalized)
710
+ const constraintCounts = new Map();
711
+ const toolConstraints = new Map();
712
+ const normalizeConstraint = (c) => {
713
+ return c.toLowerCase()
714
+ .replace(/['"`]/g, '')
715
+ .replace(/\s+/g, ' ')
716
+ .trim();
717
+ };
718
+ // Common patterns that should be grouped
719
+ const commonPatterns = [
720
+ { pattern: /directory.*restriction|access.*control|allowed.*director|within allowed/i, label: 'Directory access restrictions apply' },
721
+ { pattern: /path.*restriction|access denied.*path|outside.*allowed/i, label: 'Path access is restricted' },
722
+ { pattern: /requires.*parameter|parameter.*required|missing.*parameter/i, label: 'Validates required parameters' },
723
+ ];
724
+ for (const profile of profiles) {
725
+ const toolSpecific = [];
726
+ for (const limitation of profile.limitations) {
727
+ const normalized = normalizeConstraint(limitation);
728
+ // Check if this matches a common pattern
729
+ let isCommon = false;
730
+ for (const { pattern, label } of commonPatterns) {
731
+ if (pattern.test(limitation)) {
732
+ const existing = constraintCounts.get(label);
733
+ if (existing) {
734
+ existing.count++;
735
+ }
736
+ else {
737
+ constraintCounts.set(label, { count: 1, original: label });
738
+ }
739
+ isCommon = true;
740
+ break;
741
+ }
742
+ }
743
+ if (!isCommon) {
744
+ // Check for exact/similar matches
745
+ const existing = constraintCounts.get(normalized);
746
+ if (existing) {
747
+ existing.count++;
748
+ }
749
+ else {
750
+ constraintCounts.set(normalized, { count: 1, original: limitation });
751
+ toolSpecific.push(limitation);
752
+ }
753
+ }
754
+ }
755
+ if (toolSpecific.length > 0) {
756
+ toolConstraints.set(profile.name, toolSpecific);
757
+ }
758
+ }
759
+ // Constraints that appear in more than half of tools are "common"
760
+ const threshold = Math.max(MATH_FACTORS.MIN_COMMON_CONSTRAINT_THRESHOLD, Math.floor(profiles.length / 2));
761
+ const common = [];
762
+ for (const [, { count, original }] of constraintCounts) {
763
+ if (count >= threshold) {
764
+ common.push(original);
765
+ }
766
+ }
767
+ // Remove common constraints from per-tool lists
768
+ if (common.length > 0) {
769
+ for (const [toolName, constraints] of toolConstraints) {
770
+ const filtered = constraints.filter(c => {
771
+ const normalized = normalizeConstraint(c);
772
+ // Keep if it's tool-specific
773
+ return !common.some(common => normalizeConstraint(common) === normalized ||
774
+ commonPatterns.some(p => p.label === common && p.pattern.test(c)));
775
+ });
776
+ toolConstraints.set(toolName, filtered);
777
+ }
778
+ }
779
+ return { common, byTool: toolConstraints };
780
+ }
781
+ /**
782
+ * Generate Global Constraints section for common restrictions.
783
+ */
784
+ function generateGlobalConstraintsSection(common) {
785
+ if (common.length === 0) {
786
+ return [];
787
+ }
788
+ const lines = [];
789
+ lines.push('## Global Constraints');
790
+ lines.push('');
791
+ lines.push('The following restrictions apply to all tools:');
792
+ lines.push('');
793
+ for (const constraint of common) {
794
+ lines.push(`- ${constraint}`);
795
+ }
796
+ lines.push('');
797
+ return lines;
798
+ }
799
+ /**
800
+ * Generate Security Considerations section.
801
+ */
802
+ function generateSecuritySection(profiles) {
803
+ const lines = [];
804
+ // Collect all security notes
805
+ const securityFindings = [];
806
+ for (const profile of profiles) {
807
+ for (const note of profile.securityNotes) {
808
+ const severity = classifySecuritySeverity(note);
809
+ securityFindings.push({ tool: profile.name, note, severity });
810
+ }
811
+ }
812
+ if (securityFindings.length === 0) {
813
+ return [];
814
+ }
815
+ lines.push('## Security Considerations');
816
+ lines.push('');
817
+ // Group by severity
818
+ const critical = securityFindings.filter(f => f.severity === 'critical');
819
+ const warnings = securityFindings.filter(f => f.severity === 'warning');
820
+ const info = securityFindings.filter(f => f.severity === 'info');
821
+ if (critical.length > 0) {
822
+ lines.push('### Critical Issues');
823
+ lines.push('');
824
+ for (const finding of critical) {
825
+ lines.push(`- **${finding.tool}**: ${finding.note}`);
826
+ }
827
+ lines.push('');
828
+ }
829
+ if (warnings.length > 0) {
830
+ lines.push('### Warnings');
831
+ lines.push('');
832
+ for (const finding of warnings) {
833
+ lines.push(`- **${finding.tool}**: ${finding.note}`);
834
+ }
835
+ lines.push('');
836
+ }
837
+ if (info.length > 0) {
838
+ lines.push('### Informational');
839
+ lines.push('');
840
+ for (const finding of info) {
841
+ lines.push(`- **${finding.tool}**: ${finding.note}`);
842
+ }
843
+ lines.push('');
844
+ }
845
+ return lines;
846
+ }
847
+ /**
848
+ * Classify security note severity.
849
+ */
850
+ function classifySecuritySeverity(note) {
851
+ const lowerNote = note.toLowerCase();
852
+ const criticalKeywords = ['injection', 'rce', 'remote code', 'arbitrary code', 'command execution', 'sql injection', 'xss'];
853
+ const warningKeywords = ['risk', 'vulnerab', 'dangerous', 'unsafe', 'leak', 'exposure', 'sensitive'];
854
+ if (criticalKeywords.some(kw => lowerNote.includes(kw))) {
855
+ return 'critical';
856
+ }
857
+ if (warningKeywords.some(kw => lowerNote.includes(kw))) {
858
+ return 'warning';
859
+ }
860
+ return 'info';
861
+ }
862
+ /**
863
+ * Generate Performance section for AGENTS.md.
864
+ */
865
+ function generatePerformanceSection(profiles) {
866
+ const lines = [];
867
+ const metrics = calculatePerformanceMetrics(profiles);
868
+ if (metrics.length === 0) {
869
+ return [];
870
+ }
871
+ lines.push('## Performance');
872
+ lines.push('');
873
+ lines.push('Response time metrics from interview sessions (in milliseconds):');
874
+ lines.push('');
875
+ lines.push('| Tool | Calls | Avg | P50 | P95 | Max | Error Rate |');
876
+ lines.push('|------|-------|-----|-----|-----|-----|------------|');
877
+ for (const m of metrics) {
878
+ const errorPct = (m.errorRate * 100).toFixed(0);
879
+ const errorDisplay = m.errorRate > 0.5 ? `**${errorPct}%**` : `${errorPct}%`;
880
+ // Escape table cell content
881
+ lines.push(`| \`${escapeTableCell(m.toolName)}\` | ${m.callCount} | ${m.avgMs}ms | ${m.p50Ms}ms | ${m.p95Ms}ms | ${m.maxMs}ms | ${errorDisplay} |`);
882
+ }
883
+ lines.push('');
884
+ // Add timing breakdown if separate timing data is available
885
+ const metricsWithBreakdown = metrics.filter(m => m.avgToolMs !== undefined && m.avgAnalysisMs !== undefined);
886
+ if (metricsWithBreakdown.length > 0) {
887
+ lines.push('### Timing Breakdown');
888
+ lines.push('');
889
+ lines.push('Breakdown of total time into tool execution (MCP) and LLM analysis:');
890
+ lines.push('');
891
+ lines.push('| Tool | Total Avg | Tool Exec | LLM Analysis | Tool % |');
892
+ lines.push('|------|-----------|-----------|--------------|--------|');
893
+ for (const m of metricsWithBreakdown) {
894
+ const toolPct = m.avgToolMs !== undefined && m.avgMs > 0
895
+ ? Math.round((m.avgToolMs / m.avgMs) * 100)
896
+ : 0;
897
+ lines.push(`| \`${escapeTableCell(m.toolName)}\` | ${m.avgMs}ms | ${m.avgToolMs}ms | ${m.avgAnalysisMs}ms | ${toolPct}% |`);
898
+ }
899
+ lines.push('');
900
+ }
901
+ // Add performance insights
902
+ const slowTools = metrics.filter(m => m.avgMs > 1000);
903
+ const unreliableTools = metrics.filter(m => m.errorRate > 0.3);
904
+ // Identify tools where LLM analysis dominates (>70% of total time)
905
+ const llmDominatedTools = metricsWithBreakdown.filter(m => {
906
+ const toolPct = m.avgToolMs !== undefined && m.avgMs > 0 ? (m.avgToolMs / m.avgMs) : 0;
907
+ return toolPct < 0.3; // Tool execution is < 30% means LLM is > 70%
908
+ });
909
+ if (slowTools.length > 0 || unreliableTools.length > 0 || llmDominatedTools.length > 0) {
910
+ lines.push('### Performance Insights');
911
+ lines.push('');
912
+ if (slowTools.length > 0) {
913
+ lines.push('**Slow Tools** (avg > 1s):');
914
+ for (const tool of slowTools) {
915
+ // Include breakdown if available
916
+ if (tool.avgToolMs !== undefined && tool.avgAnalysisMs !== undefined) {
917
+ lines.push(`- \`${tool.toolName}\`: ${tool.avgMs}ms average (tool: ${tool.avgToolMs}ms, analysis: ${tool.avgAnalysisMs}ms)`);
918
+ }
919
+ else {
920
+ lines.push(`- \`${tool.toolName}\`: ${tool.avgMs}ms average`);
921
+ }
922
+ }
923
+ lines.push('');
924
+ }
925
+ if (llmDominatedTools.length > 0) {
926
+ lines.push('**LLM Analysis Dominated** (tool execution < 30% of total):');
927
+ lines.push('');
928
+ lines.push('These timings are dominated by LLM analysis rather than actual tool execution:');
929
+ for (const tool of llmDominatedTools) {
930
+ lines.push(`- \`${tool.toolName}\`: tool exec ${tool.avgToolMs}ms vs analysis ${tool.avgAnalysisMs}ms`);
931
+ }
932
+ lines.push('');
933
+ }
934
+ if (unreliableTools.length > 0) {
935
+ lines.push('**Unreliable Tools** (error rate > 30%):');
936
+ for (const tool of unreliableTools) {
937
+ const errorPct = (tool.errorRate * 100).toFixed(0);
938
+ lines.push(`- \`${tool.toolName}\`: ${errorPct}% error rate`);
939
+ }
940
+ lines.push('');
941
+ }
942
+ }
943
+ return lines;
944
+ }
945
+ /**
946
+ * Generate Behavioral Matrix (tool × persona findings).
947
+ */
948
+ function generateBehavioralMatrix(profiles, personas) {
949
+ const lines = [];
950
+ // Check if we have findings by persona
951
+ const hasPersonaFindings = profiles.some(p => p.findingsByPersona && p.findingsByPersona.length > 0);
952
+ if (!hasPersonaFindings) {
953
+ return [];
954
+ }
955
+ lines.push('## Behavioral Matrix');
956
+ lines.push('');
957
+ lines.push('Summary of findings by tool and persona:');
958
+ lines.push('');
959
+ // Build header - escape persona names in case they contain special characters
960
+ const header = ['Tool', ...personas.map(p => escapeTableCell(p.name))];
961
+ lines.push(`| ${header.join(' | ')} |`);
962
+ lines.push(`| ${header.map(() => '---').join(' | ')} |`);
963
+ // Build rows
964
+ for (const profile of profiles) {
965
+ const row = [escapeTableCell(profile.name)];
966
+ for (const persona of personas) {
967
+ const findings = profile.findingsByPersona?.find(f => f.personaId === persona.id);
968
+ if (findings) {
969
+ const count = findings.behavioralNotes.length +
970
+ findings.limitations.length +
971
+ findings.securityNotes.length;
972
+ const hasWarnings = findings.securityNotes.length > 0 || findings.limitations.length > 0;
973
+ const icon = hasWarnings ? '⚠️' : '✓';
974
+ row.push(`${icon} ${count} finding${count !== 1 ? 's' : ''}`);
975
+ }
976
+ else {
977
+ row.push('-');
978
+ }
979
+ }
980
+ lines.push(`| ${row.join(' | ')} |`);
981
+ }
982
+ lines.push('');
983
+ // Add legend
984
+ lines.push('<details>');
985
+ lines.push('<summary>Matrix Legend</summary>');
986
+ lines.push('');
987
+ lines.push('- ✓ = No warnings or limitations found');
988
+ lines.push('- ⚠️ = Warnings or limitations detected');
989
+ lines.push('- Numbers indicate total findings (behavioral notes + limitations + security notes)');
990
+ lines.push('');
991
+ lines.push('</details>');
992
+ lines.push('');
993
+ return lines;
994
+ }
995
+ //# sourceMappingURL=agents.js.map