@tyvm/knowhow 0.0.33 → 0.0.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (504) hide show
  1. package/autodoc/plugins/downloader/downloader.mdx +2 -2
  2. package/benchmarks/.dockerignore +7 -0
  3. package/benchmarks/README.md +166 -0
  4. package/benchmarks/docker/Dockerfile +68 -0
  5. package/benchmarks/example-config.yml +27 -0
  6. package/benchmarks/jest.config.js +13 -0
  7. package/benchmarks/package-lock.json +4297 -0
  8. package/benchmarks/package.json +39 -0
  9. package/benchmarks/results/4542435/2025-08-05/lms/lms-openai-gpt-oss-20b.json +2814 -0
  10. package/benchmarks/results/4542435/2025-08-05/lms/lms-qwen-qwen3-30b-a3b-2507.json +2014 -0
  11. package/benchmarks/results/4fb9125/2025-08-07/anthropic/anthropic-claude-sonnet-4-20250514.json +3121 -0
  12. package/benchmarks/results/5766aee/2025-08-02/lms-qwen/qwen3-coder-30b.json +98 -0
  13. package/benchmarks/results/6d73808/2025-08-07/openai/openai-gpt-5.json +3256 -0
  14. package/benchmarks/results/77bf0a6/2025-08-02/lms-qwen/qwen3-30b-a3b-2507.json +4298 -0
  15. package/benchmarks/results/8c0d445/2025-08-03/anthropic/anthropic-claude-sonnet-4-20250514.json +3031 -0
  16. package/benchmarks/results/8c0d445/2025-08-03/openai/openai-gpt-4.1-2025-04-14.json +2990 -0
  17. package/benchmarks/results/ac6b2ab/2025-08-03/anthropic/anthropic-claude-sonnet-4-20250514.json +3256 -0
  18. package/benchmarks/results/ac6b2ab/2025-08-03/lms/lms-qwen-qwen3-coder-30b.json +3007 -0
  19. package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-2025-04-14.json +3256 -0
  20. package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-mini-2025-04-14.json +3036 -0
  21. package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-nano-2025-04-14.json +3280 -0
  22. package/benchmarks/results/adff675/2025-08-04/lms/lms-qwen-qwen3-30b-a3b-2507.json +1920 -0
  23. package/benchmarks/results/adff675/2025-08-04/lms/lms-qwen-qwen3-coder-30b.json +3281 -0
  24. package/benchmarks/results/b502ed9/2025-08-03/lms-qwen/qwen3-coder-30b.json +2896 -0
  25. package/benchmarks/results/d1a8129/2025-08-03/lms/lms-qwen-qwen3-coder-30b.json +3011 -0
  26. package/benchmarks/results/e60471c/2025-08-03/lms/qwen3-30b-a3b-2507.json +3003 -0
  27. package/benchmarks/scripts/build-and-run.sh +47 -0
  28. package/benchmarks/scripts/clone-exercism.sh +92 -0
  29. package/benchmarks/scripts/validate.sh +48 -0
  30. package/benchmarks/src/__tests__/runner.test.ts +27 -0
  31. package/benchmarks/src/cli.ts +90 -0
  32. package/benchmarks/src/evaluators/EvaluatorRegistry.ts +64 -0
  33. package/benchmarks/src/evaluators/JavaScriptEvaluator.ts +183 -0
  34. package/benchmarks/src/evaluators/index.ts +3 -0
  35. package/benchmarks/src/evaluators/types.ts +22 -0
  36. package/benchmarks/src/index.ts +3 -0
  37. package/benchmarks/src/providers.ts +13 -0
  38. package/benchmarks/src/runner.ts +824 -0
  39. package/benchmarks/src/types.ts +63 -0
  40. package/benchmarks/tsconfig.json +19 -0
  41. package/jest.config.js +2 -1
  42. package/leaderboard/README.md +148 -0
  43. package/leaderboard/app/api/benchmark-data/route.ts +131 -0
  44. package/leaderboard/app/api/benchmark-detail/route.ts +172 -0
  45. package/leaderboard/app/details/[model]/[provider]/[language]/page.tsx +501 -0
  46. package/leaderboard/app/exercise/[model]/[provider]/[language]/[exercise]/page.tsx +375 -0
  47. package/leaderboard/app/globals.css +27 -0
  48. package/leaderboard/app/layout.tsx +21 -0
  49. package/leaderboard/app/page.tsx +170 -0
  50. package/leaderboard/components/LeaderboardTable.tsx +168 -0
  51. package/leaderboard/components/PerformanceChart.tsx +109 -0
  52. package/leaderboard/next-env.d.ts +5 -0
  53. package/leaderboard/next.config.js +4 -0
  54. package/leaderboard/package-lock.json +6363 -0
  55. package/leaderboard/package.json +28 -0
  56. package/leaderboard/postcss.config.js +6 -0
  57. package/leaderboard/tailwind.config.js +17 -0
  58. package/leaderboard/tsconfig.json +28 -0
  59. package/leaderboard/types/benchmark.ts +67 -0
  60. package/leaderboard/utils/dataProcessor.ts +33 -0
  61. package/package.json +2 -1
  62. package/src/agents/base/base.ts +182 -24
  63. package/src/agents/base/prompt.ts +28 -0
  64. package/src/agents/index.ts +3 -0
  65. package/src/agents/patcher/patcher.ts +6 -4
  66. package/src/agents/setup/setup.ts +56 -0
  67. package/src/agents/tools/agentCall.ts +6 -2
  68. package/src/agents/tools/aiClient.ts +74 -8
  69. package/src/agents/tools/execCommand.ts +13 -14
  70. package/src/agents/tools/executeScript/README.md +16 -0
  71. package/src/agents/tools/index.ts +2 -0
  72. package/src/agents/tools/list.ts +73 -16
  73. package/src/agents/tools/startAgentTask.ts +109 -0
  74. package/src/agents/tools/textSearch.ts +1 -1
  75. package/src/agents/tools/visionTool.ts +31 -2
  76. package/src/agents/tools/ycmd/client.ts +608 -0
  77. package/src/agents/tools/ycmd/definitions.ts +294 -0
  78. package/src/agents/tools/ycmd/detection.ts +211 -0
  79. package/src/agents/tools/ycmd/index.ts +11 -0
  80. package/src/agents/tools/ycmd/installer.ts +251 -0
  81. package/src/agents/tools/ycmd/server.ts +535 -0
  82. package/src/agents/tools/ycmd/serverManager.ts +316 -0
  83. package/src/agents/tools/ycmd/tools/completion.ts +113 -0
  84. package/src/agents/tools/ycmd/tools/diagnostics.ts +155 -0
  85. package/src/agents/tools/ycmd/tools/getLocations.ts +173 -0
  86. package/src/agents/tools/ycmd/tools/goto.ts +169 -0
  87. package/src/agents/tools/ycmd/tools/refactor.ts +204 -0
  88. package/src/agents/tools/ycmd/tools/signature.ts +174 -0
  89. package/src/agents/tools/ycmd/tools/start.ts +95 -0
  90. package/src/agents/tools/ycmd/utils/pathUtils.ts +59 -0
  91. package/src/ai.ts +15 -0
  92. package/src/chat/CliChatService.ts +277 -0
  93. package/src/chat/modules/AgentModule.ts +985 -0
  94. package/src/chat/modules/AskModule.ts +98 -0
  95. package/src/chat/modules/BaseChatModule.ts +66 -0
  96. package/src/chat/modules/InternalChatModule.ts +174 -0
  97. package/src/chat/modules/SearchModule.ts +166 -0
  98. package/src/chat/modules/SetupModule.ts +185 -0
  99. package/src/chat/modules/SystemModule.ts +120 -0
  100. package/src/chat/modules/VoiceModule.ts +70 -0
  101. package/src/chat/modules/index.js +5 -0
  102. package/src/chat/types.ts +97 -0
  103. package/src/chat.ts +9 -1
  104. package/src/chat2.ts +62 -0
  105. package/src/cli.ts +264 -35
  106. package/src/clients/anthropic.ts +14 -7
  107. package/src/clients/gemini.ts +15 -7
  108. package/src/clients/http.ts +17 -7
  109. package/src/clients/index.ts +117 -4
  110. package/src/clients/knowhow.ts +7 -2
  111. package/src/clients/knowhowMcp.ts +118 -0
  112. package/src/clients/openai.ts +32 -8
  113. package/src/clients/types.ts +1 -0
  114. package/src/clients/xai.ts +17 -5
  115. package/src/config.ts +30 -5
  116. package/src/conversion.ts +4 -1
  117. package/src/login.ts +26 -9
  118. package/src/microphone.ts +0 -1
  119. package/src/plugins/downloader/downloader.ts +191 -49
  120. package/src/plugins/downloader/plugin.ts +3 -1
  121. package/src/plugins/plugins.ts +3 -0
  122. package/src/processors/CustomVariables.ts +425 -0
  123. package/src/processors/HarmonyToolProcessor.ts +264 -0
  124. package/src/processors/XmlToolCallProcessor.ts +533 -0
  125. package/src/processors/index.ts +3 -0
  126. package/src/prompts/KnowhowConfigExamples.ts +376 -0
  127. package/src/services/KnowhowClient.ts +49 -3
  128. package/src/services/Mcp.ts +42 -3
  129. package/src/services/McpServer.ts +14 -4
  130. package/src/services/McpWebsocketTransport.ts +21 -7
  131. package/src/services/MessageProcessor.ts +10 -5
  132. package/src/services/index.ts +5 -0
  133. package/src/services/script-execution/ScriptExecutor.ts +34 -1
  134. package/src/services/types.ts +17 -14
  135. package/src/types.ts +17 -0
  136. package/src/utils/index.ts +138 -0
  137. package/tests/XmlToolCallProcessor.test.ts +468 -0
  138. package/tests/manual/ycmd/debug_diagnostics_test.ts +127 -0
  139. package/tests/manual/ycmd/fixtures/debug_diagnostics.ts +26 -0
  140. package/tests/manual/ycmd/fixtures/file_change_test.ts +17 -0
  141. package/tests/manual/ycmd/minimal_advanced_test.ts +108 -0
  142. package/tests/manual/ycmd/simple_diagnostics_test.ts +61 -0
  143. package/tests/manual/ycmd/simple_test.ts +74 -0
  144. package/tests/manual/ycmd/test-typescript-sample.ts +34 -0
  145. package/tests/manual/ycmd/test_advanced_features.ts +407 -0
  146. package/tests/manual/ycmd/test_advanced_with_tools.ts +320 -0
  147. package/tests/manual/ycmd/test_comprehensive_typescript.ts +179 -0
  148. package/tests/manual/ycmd/test_diagnostics_file_changes.ts +249 -0
  149. package/tests/manual/ycmd/test_diagnostics_fix.ts +99 -0
  150. package/tests/manual/ycmd/test_diagnostics_simple.ts +100 -0
  151. package/tests/manual/ycmd/test_diagnostics_timing.ts +120 -0
  152. package/tests/manual/ycmd/test_discover_commands.ts +310 -0
  153. package/tests/manual/ycmd/test_endpoints.ts +115 -0
  154. package/tests/manual/ycmd/test_final_comprehensive.ts +218 -0
  155. package/tests/manual/ycmd/test_final_validation.ts +150 -0
  156. package/tests/manual/ycmd/test_implementation.js +42 -0
  157. package/tests/manual/ycmd/test_individual_ycmd_tool.ts +39 -0
  158. package/tests/manual/ycmd/test_server_manager.ts +52 -0
  159. package/tests/manual/ycmd/test_simple_debug.ts +86 -0
  160. package/tests/manual/ycmd/test_tsserver_workflow.js +83 -0
  161. package/tests/manual/ycmd/test_tsserver_workflow.ts +122 -0
  162. package/tests/manual/ycmd/test_typescript_simple.ts +48 -0
  163. package/tests/manual/ycmd/test_typescript_ycmd.ts +105 -0
  164. package/tests/manual/ycmd/test_workspace_config.ts +90 -0
  165. package/tests/manual/ycmd/test_ycmd_auto_start.ts +137 -0
  166. package/tests/manual/ycmd/test_ycmd_comprehensive.ts +73 -0
  167. package/tests/manual/ycmd/test_ycmd_connection.py +10 -0
  168. package/tests/manual/ycmd/test_ycmd_direct.ts +142 -0
  169. package/tests/manual/ycmd/test_ycmd_experiment.ts +48 -0
  170. package/tests/manual/ycmd/test_ycmd_final.ts +200 -0
  171. package/tests/manual/ycmd/test_ycmd_fixed.py +18 -0
  172. package/tests/manual/ycmd/test_ycmd_integration.ts +112 -0
  173. package/tests/manual/ycmd/test_ycmd_simple.ts +45 -0
  174. package/tests/manual/ycmd/test_ycmd_usage.py +27 -0
  175. package/tests/manual/ycmd/working_simple_test.ts +134 -0
  176. package/ts_build/src/agents/base/base.d.ts +15 -1
  177. package/ts_build/src/agents/base/base.js +121 -20
  178. package/ts_build/src/agents/base/base.js.map +1 -1
  179. package/ts_build/src/agents/base/prompt.d.ts +1 -1
  180. package/ts_build/src/agents/base/prompt.js +28 -0
  181. package/ts_build/src/agents/base/prompt.js.map +1 -1
  182. package/ts_build/src/agents/index.d.ts +2 -0
  183. package/ts_build/src/agents/index.js +2 -0
  184. package/ts_build/src/agents/index.js.map +1 -1
  185. package/ts_build/src/agents/patcher/patcher.js +6 -3
  186. package/ts_build/src/agents/patcher/patcher.js.map +1 -1
  187. package/ts_build/src/agents/setup/setup.d.ts +8 -0
  188. package/ts_build/src/agents/setup/setup.js +59 -0
  189. package/ts_build/src/agents/setup/setup.js.map +1 -0
  190. package/ts_build/src/agents/tools/agentCall.js +5 -2
  191. package/ts_build/src/agents/tools/agentCall.js.map +1 -1
  192. package/ts_build/src/agents/tools/aiClient.d.ts +6 -5
  193. package/ts_build/src/agents/tools/aiClient.js +37 -6
  194. package/ts_build/src/agents/tools/aiClient.js.map +1 -1
  195. package/ts_build/src/agents/tools/execCommand.d.ts +2 -2
  196. package/ts_build/src/agents/tools/execCommand.js +5 -6
  197. package/ts_build/src/agents/tools/execCommand.js.map +1 -1
  198. package/ts_build/src/agents/tools/executeScript/index.d.ts +1 -1
  199. package/ts_build/src/agents/tools/index.d.ts +2 -0
  200. package/ts_build/src/agents/tools/index.js +2 -0
  201. package/ts_build/src/agents/tools/index.js.map +1 -1
  202. package/ts_build/src/agents/tools/list.js +66 -16
  203. package/ts_build/src/agents/tools/list.js.map +1 -1
  204. package/ts_build/src/agents/tools/startAgentTask.d.ts +13 -0
  205. package/ts_build/src/agents/tools/startAgentTask.js +74 -0
  206. package/ts_build/src/agents/tools/startAgentTask.js.map +1 -0
  207. package/ts_build/src/agents/tools/startChatTask.d.ts +13 -0
  208. package/ts_build/src/agents/tools/startChatTask.js +73 -0
  209. package/ts_build/src/agents/tools/startChatTask.js.map +1 -0
  210. package/ts_build/src/agents/tools/textSearch.js +1 -1
  211. package/ts_build/src/agents/tools/textSearch.js.map +1 -1
  212. package/ts_build/src/agents/tools/visionTool.d.ts +1 -1
  213. package/ts_build/src/agents/tools/visionTool.js +23 -3
  214. package/ts_build/src/agents/tools/visionTool.js.map +1 -1
  215. package/ts_build/src/agents/tools/ycmd/client.d.ts +93 -0
  216. package/ts_build/src/agents/tools/ycmd/client.js +355 -0
  217. package/ts_build/src/agents/tools/ycmd/client.js.map +1 -0
  218. package/ts_build/src/agents/tools/ycmd/definitions.d.ts +345 -0
  219. package/ts_build/src/agents/tools/ycmd/definitions.js +298 -0
  220. package/ts_build/src/agents/tools/ycmd/definitions.js.map +1 -0
  221. package/ts_build/src/agents/tools/ycmd/detection.d.ts +11 -0
  222. package/ts_build/src/agents/tools/ycmd/detection.js +175 -0
  223. package/ts_build/src/agents/tools/ycmd/detection.js.map +1 -0
  224. package/ts_build/src/agents/tools/ycmd/index.d.ts +8 -0
  225. package/ts_build/src/agents/tools/ycmd/index.js +20 -0
  226. package/ts_build/src/agents/tools/ycmd/index.js.map +1 -0
  227. package/ts_build/src/agents/tools/ycmd/installer.d.ts +19 -0
  228. package/ts_build/src/agents/tools/ycmd/installer.js +196 -0
  229. package/ts_build/src/agents/tools/ycmd/installer.js.map +1 -0
  230. package/ts_build/src/agents/tools/ycmd/server.d.ts +35 -0
  231. package/ts_build/src/agents/tools/ycmd/server.js +363 -0
  232. package/ts_build/src/agents/tools/ycmd/server.js.map +1 -0
  233. package/ts_build/src/agents/tools/ycmd/serverManager.d.ts +39 -0
  234. package/ts_build/src/agents/tools/ycmd/serverManager.js +210 -0
  235. package/ts_build/src/agents/tools/ycmd/serverManager.js.map +1 -0
  236. package/ts_build/src/agents/tools/ycmd/tools/completion.d.ts +22 -0
  237. package/ts_build/src/agents/tools/ycmd/tools/completion.js +72 -0
  238. package/ts_build/src/agents/tools/ycmd/tools/completion.js.map +1 -0
  239. package/ts_build/src/agents/tools/ycmd/tools/diagnostics.d.ts +42 -0
  240. package/ts_build/src/agents/tools/ycmd/tools/diagnostics.js +88 -0
  241. package/ts_build/src/agents/tools/ycmd/tools/diagnostics.js.map +1 -0
  242. package/ts_build/src/agents/tools/ycmd/tools/getLocations.d.ts +22 -0
  243. package/ts_build/src/agents/tools/ycmd/tools/getLocations.js +142 -0
  244. package/ts_build/src/agents/tools/ycmd/tools/getLocations.js.map +1 -0
  245. package/ts_build/src/agents/tools/ycmd/tools/goto.d.ts +20 -0
  246. package/ts_build/src/agents/tools/ycmd/tools/goto.js +101 -0
  247. package/ts_build/src/agents/tools/ycmd/tools/goto.js.map +1 -0
  248. package/ts_build/src/agents/tools/ycmd/tools/refactor.d.ts +32 -0
  249. package/ts_build/src/agents/tools/ycmd/tools/refactor.js +123 -0
  250. package/ts_build/src/agents/tools/ycmd/tools/refactor.js.map +1 -0
  251. package/ts_build/src/agents/tools/ycmd/tools/signature.d.ts +25 -0
  252. package/ts_build/src/agents/tools/ycmd/tools/signature.js +110 -0
  253. package/ts_build/src/agents/tools/ycmd/tools/signature.js.map +1 -0
  254. package/ts_build/src/agents/tools/ycmd/tools/start.d.ts +17 -0
  255. package/ts_build/src/agents/tools/ycmd/tools/start.js +65 -0
  256. package/ts_build/src/agents/tools/ycmd/tools/start.js.map +1 -0
  257. package/ts_build/src/agents/tools/ycmd/utils/pathUtils.d.ts +4 -0
  258. package/ts_build/src/agents/tools/ycmd/utils/pathUtils.js +67 -0
  259. package/ts_build/src/agents/tools/ycmd/utils/pathUtils.js.map +1 -0
  260. package/ts_build/src/ai.d.ts +1 -0
  261. package/ts_build/src/ai.js +40 -1
  262. package/ts_build/src/ai.js.map +1 -1
  263. package/ts_build/src/chat/ChatCommandHandler.d.ts +36 -0
  264. package/ts_build/src/chat/ChatCommandHandler.js +268 -0
  265. package/ts_build/src/chat/ChatCommandHandler.js.map +1 -0
  266. package/ts_build/src/chat/ChatInputManager.d.ts +22 -0
  267. package/ts_build/src/chat/ChatInputManager.js +85 -0
  268. package/ts_build/src/chat/ChatInputManager.js.map +1 -0
  269. package/ts_build/src/chat/ChatManager.d.ts +49 -0
  270. package/ts_build/src/chat/ChatManager.js +271 -0
  271. package/ts_build/src/chat/ChatManager.js.map +1 -0
  272. package/ts_build/src/chat/ChatSession.d.ts +32 -0
  273. package/ts_build/src/chat/ChatSession.js +3 -0
  274. package/ts_build/src/chat/ChatSession.js.map +1 -0
  275. package/ts_build/src/chat/ChatSessionManager.d.ts +19 -0
  276. package/ts_build/src/chat/ChatSessionManager.js +188 -0
  277. package/ts_build/src/chat/ChatSessionManager.js.map +1 -0
  278. package/ts_build/src/chat/ChatStateManager.d.ts +58 -0
  279. package/ts_build/src/chat/ChatStateManager.js +156 -0
  280. package/ts_build/src/chat/ChatStateManager.js.map +1 -0
  281. package/ts_build/src/chat/CliChatService.d.ts +35 -0
  282. package/ts_build/src/chat/CliChatService.js +201 -0
  283. package/ts_build/src/chat/CliChatService.js.map +1 -0
  284. package/ts_build/src/chat/InterruptibleInput.d.ts +20 -0
  285. package/ts_build/src/chat/InterruptibleInput.js +109 -0
  286. package/ts_build/src/chat/InterruptibleInput.js.map +1 -0
  287. package/ts_build/src/chat/interfaces/ChatModule.d.ts +6 -0
  288. package/ts_build/src/chat/interfaces/ChatModule.js +3 -0
  289. package/ts_build/src/chat/interfaces/ChatModule.js.map +1 -0
  290. package/ts_build/src/chat/modules/AgentModule.d.ts +57 -0
  291. package/ts_build/src/chat/modules/AgentModule.js +709 -0
  292. package/ts_build/src/chat/modules/AgentModule.js.map +1 -0
  293. package/ts_build/src/chat/modules/AskModule.d.ts +10 -0
  294. package/ts_build/src/chat/modules/AskModule.js +63 -0
  295. package/ts_build/src/chat/modules/AskModule.js.map +1 -0
  296. package/ts_build/src/chat/modules/BaseChatModule.d.ts +14 -0
  297. package/ts_build/src/chat/modules/BaseChatModule.js +32 -0
  298. package/ts_build/src/chat/modules/BaseChatModule.js.map +1 -0
  299. package/ts_build/src/chat/modules/InternalChatModule.d.ts +24 -0
  300. package/ts_build/src/chat/modules/InternalChatModule.js +127 -0
  301. package/ts_build/src/chat/modules/InternalChatModule.js.map +1 -0
  302. package/ts_build/src/chat/modules/SearchModule.d.ts +12 -0
  303. package/ts_build/src/chat/modules/SearchModule.js +119 -0
  304. package/ts_build/src/chat/modules/SearchModule.js.map +1 -0
  305. package/ts_build/src/chat/modules/SetupModule.d.ts +15 -0
  306. package/ts_build/src/chat/modules/SetupModule.js +147 -0
  307. package/ts_build/src/chat/modules/SetupModule.js.map +1 -0
  308. package/ts_build/src/chat/modules/SystemModule.d.ts +14 -0
  309. package/ts_build/src/chat/modules/SystemModule.js +90 -0
  310. package/ts_build/src/chat/modules/SystemModule.js.map +1 -0
  311. package/ts_build/src/chat/modules/VoiceModule.d.ts +11 -0
  312. package/ts_build/src/chat/modules/VoiceModule.js +57 -0
  313. package/ts_build/src/chat/modules/VoiceModule.js.map +1 -0
  314. package/ts_build/src/chat/types.d.ts +83 -0
  315. package/ts_build/src/chat/types.js +3 -0
  316. package/ts_build/src/chat/types.js.map +1 -0
  317. package/ts_build/src/chat.js +7 -1
  318. package/ts_build/src/chat.js.map +1 -1
  319. package/ts_build/src/chat2.d.ts +3 -0
  320. package/ts_build/src/chat2.js +47 -0
  321. package/ts_build/src/chat2.js.map +1 -0
  322. package/ts_build/src/cli.js +218 -37
  323. package/ts_build/src/cli.js.map +1 -1
  324. package/ts_build/src/clients/anthropic.d.ts +5 -2
  325. package/ts_build/src/clients/anthropic.js +12 -7
  326. package/ts_build/src/clients/anthropic.js.map +1 -1
  327. package/ts_build/src/clients/gemini.d.ts +6 -3
  328. package/ts_build/src/clients/gemini.js +13 -7
  329. package/ts_build/src/clients/gemini.js.map +1 -1
  330. package/ts_build/src/clients/http.d.ts +1 -0
  331. package/ts_build/src/clients/http.js +12 -5
  332. package/ts_build/src/clients/http.js.map +1 -1
  333. package/ts_build/src/clients/index.d.ts +10 -0
  334. package/ts_build/src/clients/index.js +74 -4
  335. package/ts_build/src/clients/index.js.map +1 -1
  336. package/ts_build/src/clients/knowhow.d.ts +3 -1
  337. package/ts_build/src/clients/knowhow.js +8 -2
  338. package/ts_build/src/clients/knowhow.js.map +1 -1
  339. package/ts_build/src/clients/knowhowMcp.d.ts +20 -0
  340. package/ts_build/src/clients/knowhowMcp.js +86 -0
  341. package/ts_build/src/clients/knowhowMcp.js.map +1 -0
  342. package/ts_build/src/clients/openai.d.ts +5 -2
  343. package/ts_build/src/clients/openai.js +29 -8
  344. package/ts_build/src/clients/openai.js.map +1 -1
  345. package/ts_build/src/clients/types.d.ts +1 -0
  346. package/ts_build/src/clients/xai.d.ts +5 -2
  347. package/ts_build/src/clients/xai.js +15 -5
  348. package/ts_build/src/clients/xai.js.map +1 -1
  349. package/ts_build/src/config.js +24 -3
  350. package/ts_build/src/config.js.map +1 -1
  351. package/ts_build/src/conversion.js +6 -4
  352. package/ts_build/src/conversion.js.map +1 -1
  353. package/ts_build/src/login.d.ts +1 -1
  354. package/ts_build/src/login.js +21 -7
  355. package/ts_build/src/login.js.map +1 -1
  356. package/ts_build/src/microphone.js.map +1 -1
  357. package/ts_build/src/plugins/downloader/downloader.d.ts +7 -5
  358. package/ts_build/src/plugins/downloader/downloader.js +147 -44
  359. package/ts_build/src/plugins/downloader/downloader.js.map +1 -1
  360. package/ts_build/src/plugins/downloader/plugin.js +5 -3
  361. package/ts_build/src/plugins/downloader/plugin.js.map +1 -1
  362. package/ts_build/src/plugins/plugins.js +3 -0
  363. package/ts_build/src/plugins/plugins.js.map +1 -1
  364. package/ts_build/src/processors/CustomVariables.d.ts +32 -0
  365. package/ts_build/src/processors/CustomVariables.js +297 -0
  366. package/ts_build/src/processors/CustomVariables.js.map +1 -0
  367. package/ts_build/src/processors/HarmonyToolProcessor.d.ts +15 -0
  368. package/ts_build/src/processors/HarmonyToolProcessor.js +154 -0
  369. package/ts_build/src/processors/HarmonyToolProcessor.js.map +1 -0
  370. package/ts_build/src/processors/XmlToolCallProcessor.d.ts +14 -0
  371. package/ts_build/src/processors/XmlToolCallProcessor.js +357 -0
  372. package/ts_build/src/processors/XmlToolCallProcessor.js.map +1 -0
  373. package/ts_build/src/processors/index.d.ts +3 -0
  374. package/ts_build/src/processors/index.js +7 -1
  375. package/ts_build/src/processors/index.js.map +1 -1
  376. package/ts_build/src/prompts/KnowhowConfigExamples.d.ts +2 -0
  377. package/ts_build/src/prompts/KnowhowConfigExamples.js +379 -0
  378. package/ts_build/src/prompts/KnowhowConfigExamples.js.map +1 -0
  379. package/ts_build/src/services/KnowhowClient.d.ts +22 -0
  380. package/ts_build/src/services/KnowhowClient.js +14 -2
  381. package/ts_build/src/services/KnowhowClient.js.map +1 -1
  382. package/ts_build/src/services/Mcp.d.ts +1 -0
  383. package/ts_build/src/services/Mcp.js +20 -3
  384. package/ts_build/src/services/Mcp.js.map +1 -1
  385. package/ts_build/src/services/McpServer.d.ts +1 -1
  386. package/ts_build/src/services/McpServer.js +8 -4
  387. package/ts_build/src/services/McpServer.js.map +1 -1
  388. package/ts_build/src/services/McpWebsocketTransport.js +17 -7
  389. package/ts_build/src/services/McpWebsocketTransport.js.map +1 -1
  390. package/ts_build/src/services/MessageProcessor.d.ts +1 -1
  391. package/ts_build/src/services/MessageProcessor.js +4 -4
  392. package/ts_build/src/services/MessageProcessor.js.map +1 -1
  393. package/ts_build/src/services/index.d.ts +2 -0
  394. package/ts_build/src/services/index.js +4 -0
  395. package/ts_build/src/services/index.js.map +1 -1
  396. package/ts_build/src/services/script-execution/ScriptExecutor.d.ts +1 -0
  397. package/ts_build/src/services/script-execution/ScriptExecutor.js +23 -0
  398. package/ts_build/src/services/script-execution/ScriptExecutor.js.map +1 -1
  399. package/ts_build/src/services/types.d.ts +2 -6
  400. package/ts_build/src/services/types.js +4 -4
  401. package/ts_build/src/services/types.js.map +1 -1
  402. package/ts_build/src/types.d.ts +11 -0
  403. package/ts_build/src/types.js +8 -0
  404. package/ts_build/src/types.js.map +1 -1
  405. package/ts_build/src/utils/index.d.ts +2 -0
  406. package/ts_build/src/utils/index.js +102 -1
  407. package/ts_build/src/utils/index.js.map +1 -1
  408. package/ts_build/tests/XmlToolCallProcessor.test.d.ts +1 -0
  409. package/ts_build/tests/XmlToolCallProcessor.test.js +376 -0
  410. package/ts_build/tests/XmlToolCallProcessor.test.js.map +1 -0
  411. package/ts_build/tests/manual/ycmd/debug_diagnostics_test.d.ts +1 -0
  412. package/ts_build/tests/manual/ycmd/debug_diagnostics_test.js +114 -0
  413. package/ts_build/tests/manual/ycmd/debug_diagnostics_test.js.map +1 -0
  414. package/ts_build/tests/manual/ycmd/minimal_advanced_test.d.ts +2 -0
  415. package/ts_build/tests/manual/ycmd/minimal_advanced_test.js +104 -0
  416. package/ts_build/tests/manual/ycmd/minimal_advanced_test.js.map +1 -0
  417. package/ts_build/tests/manual/ycmd/simple_diagnostics_test.d.ts +1 -0
  418. package/ts_build/tests/manual/ycmd/simple_diagnostics_test.js +74 -0
  419. package/ts_build/tests/manual/ycmd/simple_diagnostics_test.js.map +1 -0
  420. package/ts_build/tests/manual/ycmd/simple_test.d.ts +2 -0
  421. package/ts_build/tests/manual/ycmd/simple_test.js +82 -0
  422. package/ts_build/tests/manual/ycmd/simple_test.js.map +1 -0
  423. package/ts_build/tests/manual/ycmd/test-typescript-sample.d.ts +14 -0
  424. package/ts_build/tests/manual/ycmd/test-typescript-sample.js +20 -0
  425. package/ts_build/tests/manual/ycmd/test-typescript-sample.js.map +1 -0
  426. package/ts_build/tests/manual/ycmd/test_advanced_features.d.ts +2 -0
  427. package/ts_build/tests/manual/ycmd/test_advanced_features.js +297 -0
  428. package/ts_build/tests/manual/ycmd/test_advanced_features.js.map +1 -0
  429. package/ts_build/tests/manual/ycmd/test_advanced_with_tools.d.ts +3 -0
  430. package/ts_build/tests/manual/ycmd/test_advanced_with_tools.js +262 -0
  431. package/ts_build/tests/manual/ycmd/test_advanced_with_tools.js.map +1 -0
  432. package/ts_build/tests/manual/ycmd/test_comprehensive_typescript.d.ts +2 -0
  433. package/ts_build/tests/manual/ycmd/test_comprehensive_typescript.js +186 -0
  434. package/ts_build/tests/manual/ycmd/test_comprehensive_typescript.js.map +1 -0
  435. package/ts_build/tests/manual/ycmd/test_diagnostics_file_changes.d.ts +1 -0
  436. package/ts_build/tests/manual/ycmd/test_diagnostics_file_changes.js +174 -0
  437. package/ts_build/tests/manual/ycmd/test_diagnostics_file_changes.js.map +1 -0
  438. package/ts_build/tests/manual/ycmd/test_diagnostics_fix.d.ts +2 -0
  439. package/ts_build/tests/manual/ycmd/test_diagnostics_fix.js +106 -0
  440. package/ts_build/tests/manual/ycmd/test_diagnostics_fix.js.map +1 -0
  441. package/ts_build/tests/manual/ycmd/test_diagnostics_simple.d.ts +1 -0
  442. package/ts_build/tests/manual/ycmd/test_diagnostics_simple.js +104 -0
  443. package/ts_build/tests/manual/ycmd/test_diagnostics_simple.js.map +1 -0
  444. package/ts_build/tests/manual/ycmd/test_diagnostics_timing.d.ts +1 -0
  445. package/ts_build/tests/manual/ycmd/test_diagnostics_timing.js +119 -0
  446. package/ts_build/tests/manual/ycmd/test_diagnostics_timing.js.map +1 -0
  447. package/ts_build/tests/manual/ycmd/test_discover_commands.d.ts +2 -0
  448. package/ts_build/tests/manual/ycmd/test_discover_commands.js +243 -0
  449. package/ts_build/tests/manual/ycmd/test_discover_commands.js.map +1 -0
  450. package/ts_build/tests/manual/ycmd/test_endpoints.d.ts +2 -0
  451. package/ts_build/tests/manual/ycmd/test_endpoints.js +120 -0
  452. package/ts_build/tests/manual/ycmd/test_endpoints.js.map +1 -0
  453. package/ts_build/tests/manual/ycmd/test_final_comprehensive.d.ts +2 -0
  454. package/ts_build/tests/manual/ycmd/test_final_comprehensive.js +221 -0
  455. package/ts_build/tests/manual/ycmd/test_final_comprehensive.js.map +1 -0
  456. package/ts_build/tests/manual/ycmd/test_final_validation.d.ts +2 -0
  457. package/ts_build/tests/manual/ycmd/test_final_validation.js +160 -0
  458. package/ts_build/tests/manual/ycmd/test_final_validation.js.map +1 -0
  459. package/ts_build/tests/manual/ycmd/test_individual_ycmd_tool.d.ts +2 -0
  460. package/ts_build/tests/manual/ycmd/test_individual_ycmd_tool.js +37 -0
  461. package/ts_build/tests/manual/ycmd/test_individual_ycmd_tool.js.map +1 -0
  462. package/ts_build/tests/manual/ycmd/test_server_manager.d.ts +1 -0
  463. package/ts_build/tests/manual/ycmd/test_server_manager.js +38 -0
  464. package/ts_build/tests/manual/ycmd/test_server_manager.js.map +1 -0
  465. package/ts_build/tests/manual/ycmd/test_simple_debug.d.ts +2 -0
  466. package/ts_build/tests/manual/ycmd/test_simple_debug.js +99 -0
  467. package/ts_build/tests/manual/ycmd/test_simple_debug.js.map +1 -0
  468. package/ts_build/tests/manual/ycmd/test_tsserver_workflow.d.ts +1 -0
  469. package/ts_build/tests/manual/ycmd/test_tsserver_workflow.js +128 -0
  470. package/ts_build/tests/manual/ycmd/test_tsserver_workflow.js.map +1 -0
  471. package/ts_build/tests/manual/ycmd/test_typescript_simple.d.ts +1 -0
  472. package/ts_build/tests/manual/ycmd/test_typescript_simple.js +66 -0
  473. package/ts_build/tests/manual/ycmd/test_typescript_simple.js.map +1 -0
  474. package/ts_build/tests/manual/ycmd/test_typescript_ycmd.d.ts +1 -0
  475. package/ts_build/tests/manual/ycmd/test_typescript_ycmd.js +105 -0
  476. package/ts_build/tests/manual/ycmd/test_typescript_ycmd.js.map +1 -0
  477. package/ts_build/tests/manual/ycmd/test_workspace_config.d.ts +1 -0
  478. package/ts_build/tests/manual/ycmd/test_workspace_config.js +89 -0
  479. package/ts_build/tests/manual/ycmd/test_workspace_config.js.map +1 -0
  480. package/ts_build/tests/manual/ycmd/test_ycmd_auto_start.d.ts +2 -0
  481. package/ts_build/tests/manual/ycmd/test_ycmd_auto_start.js +130 -0
  482. package/ts_build/tests/manual/ycmd/test_ycmd_auto_start.js.map +1 -0
  483. package/ts_build/tests/manual/ycmd/test_ycmd_comprehensive.d.ts +1 -0
  484. package/ts_build/tests/manual/ycmd/test_ycmd_comprehensive.js +83 -0
  485. package/ts_build/tests/manual/ycmd/test_ycmd_comprehensive.js.map +1 -0
  486. package/ts_build/tests/manual/ycmd/test_ycmd_direct.d.ts +2 -0
  487. package/ts_build/tests/manual/ycmd/test_ycmd_direct.js +149 -0
  488. package/ts_build/tests/manual/ycmd/test_ycmd_direct.js.map +1 -0
  489. package/ts_build/tests/manual/ycmd/test_ycmd_experiment.d.ts +15 -0
  490. package/ts_build/tests/manual/ycmd/test_ycmd_experiment.js +58 -0
  491. package/ts_build/tests/manual/ycmd/test_ycmd_experiment.js.map +1 -0
  492. package/ts_build/tests/manual/ycmd/test_ycmd_final.d.ts +2 -0
  493. package/ts_build/tests/manual/ycmd/test_ycmd_final.js +195 -0
  494. package/ts_build/tests/manual/ycmd/test_ycmd_final.js.map +1 -0
  495. package/ts_build/tests/manual/ycmd/test_ycmd_integration.d.ts +3 -0
  496. package/ts_build/tests/manual/ycmd/test_ycmd_integration.js +110 -0
  497. package/ts_build/tests/manual/ycmd/test_ycmd_integration.js.map +1 -0
  498. package/ts_build/tests/manual/ycmd/test_ycmd_simple.d.ts +2 -0
  499. package/ts_build/tests/manual/ycmd/test_ycmd_simple.js +36 -0
  500. package/ts_build/tests/manual/ycmd/test_ycmd_simple.js.map +1 -0
  501. package/ts_build/tests/manual/ycmd/working_simple_test.d.ts +2 -0
  502. package/ts_build/tests/manual/ycmd/working_simple_test.js +134 -0
  503. package/ts_build/tests/manual/ycmd/working_simple_test.js.map +1 -0
  504. package/tsconfig.json +3 -1
@@ -0,0 +1,63 @@
1
+ import { TestResult } from './evaluators/types';
2
+
3
+ export interface BenchmarkConfig {
4
+ language: string;
5
+ maxExercises: number;
6
+ model: string;
7
+ provider: string;
8
+ agent?: string; // Agent type to use (default: 'Patcher')
9
+ limits: BenchmarkLimits;
10
+ outputFile: string;
11
+ }
12
+
13
+ export interface BenchmarkLimits {
14
+ maxTurns: number;
15
+ maxTime: number; // in seconds
16
+ maxCost: number; // in dollars
17
+ }
18
+
19
+ export interface ExerciseResult {
20
+ exerciseName: string;
21
+ status: 'success' | 'failure' | 'timeout' | 'cost_limit' | 'turn_limit';
22
+ testResult?: TestResult; // Actual test execution results
23
+ turns: number;
24
+ timeElapsed: number; // in seconds
25
+ cost: number; // in dollars
26
+ startTime: Date;
27
+ endTime: Date;
28
+ errorMessage?: string;
29
+ finalOutput?: string;
30
+ }
31
+
32
+ export interface BenchmarkResults {
33
+ config: BenchmarkConfig;
34
+ exercises: ExerciseResult[];
35
+ summary: {
36
+ totalExercises: number;
37
+ testableExercises: number; // Exercises that had evaluatable tests
38
+ testsPassedCount: number; // Exercises where all tests passed
39
+ testsFailedCount: number; // Exercises where some tests failed
40
+ testPassRate: number; // Percentage of testable exercises where tests passed
41
+ agentSuccessRate: number; // Original success rate (agent thinks it succeeded)
42
+ successCount: number;
43
+ failureCount: number;
44
+ timeoutCount: number;
45
+ costLimitCount: number;
46
+ turnLimitCount: number;
47
+ totalTime: number;
48
+ totalCost: number;
49
+ averageTurns: number;
50
+ averageTime: number;
51
+ successRate: number;
52
+ };
53
+ startTime: Date;
54
+ endTime: Date;
55
+ }
56
+
57
+ export interface Exercise {
58
+ name: string;
59
+ path: string;
60
+ description?: string;
61
+ hasTests: boolean;
62
+ files: string[];
63
+ }
@@ -0,0 +1,19 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2020",
4
+ "module": "commonjs",
5
+ "lib": ["ES2020"],
6
+ "outDir": "./dist",
7
+ "rootDir": "./src",
8
+ "strict": true,
9
+ "esModuleInterop": true,
10
+ "skipLibCheck": true,
11
+ "forceConsistentCasingInFileNames": true,
12
+ "resolveJsonModule": true,
13
+ "declaration": true,
14
+ "declarationMap": true,
15
+ "sourceMap": true,
16
+ },
17
+ "include": ["src/**/*"],
18
+ "exclude": ["node_modules", "dist", "**/*.test.ts"]
19
+ }
package/jest.config.js CHANGED
@@ -14,5 +14,6 @@ module.exports = {
14
14
  testEnvironment: 'node',
15
15
  testRegex: '/tests/.*\.(test|spec)?\.(ts|tsx|js)$',
16
16
  moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'node'],
17
- modulePathIgnorePatterns: ["ts_build"]
17
+ modulePathIgnorePatterns: ["ts_build", "benchmarks"],
18
+ testPathIgnorePatterns: ["<rootDir>/benchmarks/"]
18
19
  };
@@ -0,0 +1,148 @@
1
+ # Benchmark Results Leaderboard
2
+
3
+ A Next.js application to display and analyze benchmark results from coding exercise evaluations.
4
+
5
+ ## Features
6
+
7
+ - **Interactive Leaderboard**: Sortable table showing model performance metrics
8
+ - **Data Visualization**: Charts comparing success rates and cost vs performance
9
+ - **Model Comparison**: Detailed statistics for each model/provider/language combination
10
+ - **Responsive Design**: Works on desktop and mobile devices
11
+ - **Real-time Data**: Automatically loads latest benchmark results
12
+
13
+ ## Getting Started
14
+
15
+ ### Prerequisites
16
+
17
+ - Node.js 18+
18
+ - npm or yarn
19
+
20
+ ### Installation
21
+
22
+ 1. Install dependencies:
23
+ ```bash
24
+ npm install
25
+ ```
26
+
27
+ 2. Run the development server:
28
+ ```bash
29
+ npm run dev
30
+ ```
31
+
32
+ 3. Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
33
+
34
+ ### Data Source
35
+
36
+ The application reads benchmark results from `../benchmarks/results/results.json`. Make sure to run benchmarks first to generate data.
37
+
38
+ Expected file structure:
39
+ ```
40
+ benchmarks/
41
+ results/
42
+ results.json # Main results file
43
+ # Additional result files can be added here
44
+ ```
45
+
46
+ ## Project Structure
47
+
48
+ ```
49
+ leaderboard/
50
+ ├── app/
51
+ │ ├── globals.css # Global styles
52
+ │ ├── layout.tsx # Root layout
53
+ │ └── page.tsx # Main page
54
+ ├── components/
55
+ │ ├── LeaderboardTable.tsx # Sortable results table
56
+ │ └── PerformanceChart.tsx # Data visualization
57
+ ├── types/
58
+ │ └── benchmark.ts # TypeScript interfaces
59
+ ├── utils/
60
+ │ └── dataProcessor.ts # Data loading and aggregation
61
+ └── package.json
62
+ ```
63
+
64
+ ## Available Scripts
65
+
66
+ - `npm run dev` - Start development server
67
+ - `npm run build` - Build for production
68
+ - `npm run start` - Start production server
69
+ - `npm run lint` - Run ESLint
70
+
71
+ ## Metrics Displayed
72
+
73
+ - **Success Rate**: Percentage of exercises completed successfully
74
+ - **Total Exercises**: Number of exercises attempted
75
+ - **Average Cost**: Mean cost per exercise in USD
76
+ - **Average Time**: Mean time per exercise in seconds
77
+ - **Average Turns**: Mean number of agent turns per exercise
78
+ - **Total Runs**: Number of benchmark runs for this model/language
79
+
80
+ ## Charts
81
+
82
+ 1. **Success Rate Comparison**: Bar chart showing top 10 models by success rate
83
+ 2. **Cost vs Performance**: Scatter plot comparing cost efficiency vs success rate
84
+
85
+ ## Customization
86
+
87
+ ### Adding New Data Sources
88
+
89
+ Modify `utils/dataProcessor.ts` to load additional result files or change the aggregation logic.
90
+
91
+ ### Styling
92
+
93
+ The application uses Tailwind CSS. Modify component styles directly in the JSX files or update `globals.css` for global changes.
94
+
95
+ ### Adding New Charts
96
+
97
+ Use the Recharts library to create additional visualizations in `components/PerformanceChart.tsx`.
98
+
99
+ ## Deployment
100
+
101
+ ### Build for Production
102
+
103
+ ```bash
104
+ npm run build
105
+ npm run start
106
+ ```
107
+
108
+ ### Deploy to Vercel
109
+
110
+ The easiest way to deploy is using the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme).
111
+
112
+ ## Data Format
113
+
114
+ The application expects benchmark results in this format:
115
+
116
+ ```json
117
+ {
118
+ "config": {
119
+ "language": "javascript",
120
+ "model": "claude-sonnet-4",
121
+ "provider": "openai",
122
+ "maxExercises": 1,
123
+ "limits": {
124
+ "maxTurns": 20,
125
+ "maxTime": 300,
126
+ "maxCost": 1
127
+ }
128
+ },
129
+ "exercises": [
130
+ {
131
+ "exerciseName": "accumulate",
132
+ "status": "success",
133
+ "turns": 1,
134
+ "timeElapsed": 46.668,
135
+ "cost": 0.090424,
136
+ "startTime": "2025-08-02T07:26:04.029Z",
137
+ "endTime": "2025-08-02T07:26:50.697Z"
138
+ }
139
+ ],
140
+ "summary": {
141
+ "totalExercises": 1,
142
+ "successCount": 1,
143
+ "totalTime": 46.668,
144
+ "totalCost": 0.090424,
145
+ "successRate": 1
146
+ }
147
+ }
148
+ ```
@@ -0,0 +1,131 @@
1
+ import { NextRequest, NextResponse } from 'next/server';
2
+ import { BenchmarkResults, LeaderboardEntry } from '@/types/benchmark';
3
+ import fs from 'fs';
4
+ import path from 'path';
5
+
6
+ export async function GET(request: NextRequest) {
7
+ try {
8
+ const results = await loadAllBenchmarkResults();
9
+
10
+ const leaderboardData = aggregateResults(results);
11
+ return NextResponse.json(leaderboardData);
12
+ } catch (error) {
13
+ console.error('Error loading benchmark results:', error);
14
+
15
+ // Return mock data for development
16
+ const mockData: LeaderboardEntry[] = [
17
+ {
18
+ model: 'sample-model',
19
+ provider: 'sample-provider',
20
+ language: 'javascript',
21
+ successRate: 85.5,
22
+ totalExercises: 6,
23
+ averageCost: 0.05,
24
+ averageTime: 145.2,
25
+ averageTurns: 12.4,
26
+ totalRuns: 1,
27
+ lastRun: new Date().toISOString()
28
+ }
29
+ ];
30
+
31
+ return NextResponse.json(mockData);
32
+ }
33
+ }
34
+
35
+ // Recursive function to find JSON files in nested directories
36
+ function findBenchmarkFiles(dir: string): string[] {
37
+ const files: string[] = [];
38
+
39
+ try {
40
+ const items = fs.readdirSync(dir, { withFileTypes: true });
41
+
42
+ for (const item of items) {
43
+ const fullPath = path.join(dir, item.name);
44
+
45
+ if (item.isDirectory()) {
46
+ // Recursively search subdirectories
47
+ files.push(...findBenchmarkFiles(fullPath));
48
+ } else if (item.isFile() && item.name.endsWith('.json')) {
49
+ // Add JSON files to our list
50
+ files.push(fullPath);
51
+ }
52
+ }
53
+ } catch (error) {
54
+ // Ignore directories we can't read
55
+ }
56
+
57
+ return files;
58
+ }
59
+
60
+ async function loadAllBenchmarkResults(): Promise<BenchmarkResults[]> {
61
+ const resultsPath = path.join(process.cwd(), '..', 'benchmarks', 'results');
62
+ const results: BenchmarkResults[] = [];
63
+
64
+ if (!fs.existsSync(resultsPath)) {
65
+ console.warn('Benchmark results directory not found:', resultsPath);
66
+ return results;
67
+ }
68
+
69
+ // Find all JSON files recursively - handles both old and new file structures
70
+ const allFiles = findBenchmarkFiles(resultsPath);
71
+
72
+ for (const filePath of allFiles) {
73
+ try {
74
+ const data = fs.readFileSync(filePath, 'utf8');
75
+ const parsed = JSON.parse(data);
76
+
77
+ // Validate that this is a valid benchmark result
78
+ if (parsed.config && parsed.summary && parsed.exercises) {
79
+ results.push(parsed);
80
+ }
81
+ } catch (error) {
82
+ console.error(`Error loading result file ${filePath}:`, error);
83
+ }
84
+ }
85
+
86
+ return results;
87
+ }
88
+
89
+ function aggregateResults(results: BenchmarkResults[]): LeaderboardEntry[] {
90
+ const entriesMap = new Map<string, LeaderboardEntry>();
91
+
92
+ for (const result of results) {
93
+ const key = `${result.config.model}-${result.config.provider}-${result.config.language}`;
94
+
95
+ if (entriesMap.has(key)) {
96
+ // Keep track of total runs, but only show most recent performance
97
+ const existing = entriesMap.get(key)!;
98
+
99
+ // Increment total runs count
100
+ existing.totalRuns = existing.totalRuns + 1;
101
+
102
+ // If this result is more recent, replace the performance data
103
+ if (result.endTime > existing.lastRun) {
104
+ existing.successRate = result.summary.successRate * 100; // Convert from decimal to percentage
105
+ existing.totalExercises = result.summary.totalExercises;
106
+ existing.averageCost = result.summary.totalCost / result.summary.totalExercises;
107
+ existing.averageTime = result.summary.averageTime;
108
+ existing.averageTurns = result.summary.averageTurns;
109
+ existing.lastRun = result.endTime;
110
+ }
111
+ } else {
112
+ // Create new entry
113
+ const entry: LeaderboardEntry = {
114
+ model: result.config.model,
115
+ provider: result.config.provider,
116
+ language: result.config.language,
117
+ successRate: result.summary.successRate * 100, // Convert from decimal to percentage
118
+ totalExercises: result.summary.totalExercises,
119
+ averageCost: result.summary.totalCost / result.summary.totalExercises,
120
+ averageTime: result.summary.averageTime,
121
+ averageTurns: result.summary.averageTurns,
122
+ totalRuns: 1,
123
+ lastRun: result.endTime
124
+ };
125
+
126
+ entriesMap.set(key, entry);
127
+ }
128
+ }
129
+
130
+ return Array.from(entriesMap.values());
131
+ }
@@ -0,0 +1,172 @@
1
+ import { NextRequest, NextResponse } from 'next/server';
2
+ import fs from 'fs';
3
+ import path from 'path';
4
+ import { BenchmarkResults } from '@/types/benchmark';
5
+
6
+ // Recursive function to find JSON files in nested directories
7
+ function findBenchmarkFiles(dir: string): string[] {
8
+ const files: string[] = [];
9
+
10
+ try {
11
+ const items = fs.readdirSync(dir, { withFileTypes: true });
12
+
13
+ for (const item of items) {
14
+ const fullPath = path.join(dir, item.name);
15
+
16
+ if (item.isDirectory()) {
17
+ // Recursively search subdirectories
18
+ files.push(...findBenchmarkFiles(fullPath));
19
+ } else if (item.isFile() && item.name.endsWith('.json')) {
20
+ // Add JSON files to our list
21
+ files.push(fullPath);
22
+ }
23
+ }
24
+ } catch (error) {
25
+ // Ignore directories we can't read
26
+ }
27
+
28
+ return files;
29
+ }
30
+
31
+ export async function GET(request: NextRequest) {
32
+ const { searchParams } = new URL(request.url);
33
+ const model = searchParams.get('model');
34
+ const provider = searchParams.get('provider');
35
+ const language = searchParams.get('language');
36
+ const timestamp = searchParams.get('timestamp'); // Optional parameter to get specific run
37
+
38
+ if (!model || !provider || !language) {
39
+ return NextResponse.json(
40
+ { error: 'Missing required parameters: model, provider, language' },
41
+ { status: 400 }
42
+ );
43
+ }
44
+
45
+ try {
46
+ // Look for benchmark result files in the results directory
47
+ const resultsDir = path.join(process.cwd(), '..', 'benchmarks', 'results');
48
+
49
+ if (!fs.existsSync(resultsDir)) {
50
+ return NextResponse.json(
51
+ { error: 'Results directory not found' },
52
+ { status: 404 }
53
+ );
54
+ }
55
+
56
+ // Find all JSON files recursively in the results directory
57
+ const allFiles = findBenchmarkFiles(resultsDir);
58
+
59
+ // Filter files that match our model/provider/language criteria
60
+ const matchingFiles = allFiles.filter(filePath => {
61
+ try {
62
+ // Read and parse the JSON file to check its config
63
+ const fileContent = fs.readFileSync(filePath, 'utf-8');
64
+ const data = JSON.parse(fileContent);
65
+
66
+ if (!data.config) {
67
+ return false;
68
+ }
69
+
70
+ const configModel = data.config.model;
71
+ const configProvider = data.config.provider;
72
+ const configLanguage = data.config.language;
73
+
74
+ // Exact match on all three parameters
75
+ return configModel === model &&
76
+ configProvider === provider &&
77
+ configLanguage === language;
78
+ } catch (error) {
79
+ return false;
80
+ }
81
+ });
82
+
83
+ if (matchingFiles.length === 0) {
84
+ return NextResponse.json(
85
+ { error: 'No benchmark results found for the specified model, provider, and language' },
86
+ { status: 404 }
87
+ );
88
+ }
89
+
90
+ // Load all matching benchmark results
91
+ const allResults: BenchmarkResults[] = [];
92
+ const filePathMap = new Map<BenchmarkResults, string>(); // Track file paths for commit extraction
93
+
94
+ for (const filePath of matchingFiles) {
95
+ try {
96
+ const fileContent = fs.readFileSync(filePath, 'utf-8');
97
+ const benchmarkData = JSON.parse(fileContent);
98
+
99
+ // Validate that we have the expected structure
100
+ if (benchmarkData.exercises && benchmarkData.summary && benchmarkData.config) {
101
+ allResults.push(benchmarkData);
102
+ filePathMap.set(benchmarkData, filePath);
103
+ }
104
+ } catch (parseError) {
105
+ console.error(`Error parsing file ${filePath}:`, parseError);
106
+ // Continue with other files
107
+ }
108
+ }
109
+
110
+ if (allResults.length === 0) {
111
+ return NextResponse.json(
112
+ { error: 'No valid benchmark results found' },
113
+ { status: 404 }
114
+ );
115
+ }
116
+
117
+ // Sort results by endTime (most recent first)
118
+ allResults.sort((a, b) => new Date(b.endTime).getTime() - new Date(a.endTime).getTime());
119
+
120
+ // If timestamp is provided, return that specific run
121
+ if (timestamp) {
122
+ const targetTime = timestamp;
123
+ const specificRun = allResults.find(result => result.endTime === targetTime);
124
+
125
+ if (specificRun) {
126
+ return NextResponse.json({
127
+ latest: specificRun,
128
+ history: [], // Don't need history for specific run view
129
+ totalRuns: allResults.length
130
+ });
131
+ }
132
+ }
133
+
134
+ // Get the most recent result as the main data
135
+ const latestResult = allResults[0];
136
+
137
+ // Create historical summary for previous runs (excluding the latest)
138
+ const previousRuns = allResults; // Skip the first (latest) result
139
+ const historicalRuns = previousRuns.map(result => ({
140
+ endTime: result.endTime,
141
+ successRate: result.summary.successRate * 100, // Convert to percentage
142
+ totalExercises: result.summary.totalExercises,
143
+ totalCost: result.summary.totalCost,
144
+ averageTime: result.summary.averageTime,
145
+ averageTurns: result.summary.averageTurns,
146
+ // Include commit info if available
147
+ commitHash: result.commitHash || 'unknown',
148
+ // Calculate average cost per exercise
149
+ averageCost: result.summary.totalCost / result.summary.totalExercises
150
+ }));
151
+
152
+ // Return both the latest detailed result and historical summary
153
+ const response = {
154
+ // Latest detailed benchmark data
155
+ latest: latestResult,
156
+ // Historical performance summary
157
+ history: historicalRuns,
158
+ // Total number of runs
159
+ totalRuns: allResults.length
160
+ };
161
+
162
+ return NextResponse.json(response);
163
+
164
+
165
+ } catch (error) {
166
+ console.error('Error reading benchmark detail:', error);
167
+ return NextResponse.json(
168
+ { error: 'Internal server error' },
169
+ { status: 500 }
170
+ );
171
+ }
172
+ }