@tyvm/knowhow 0.0.33 → 0.0.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (504) hide show
  1. package/autodoc/plugins/downloader/downloader.mdx +2 -2
  2. package/benchmarks/.dockerignore +7 -0
  3. package/benchmarks/README.md +166 -0
  4. package/benchmarks/docker/Dockerfile +68 -0
  5. package/benchmarks/example-config.yml +27 -0
  6. package/benchmarks/jest.config.js +13 -0
  7. package/benchmarks/package-lock.json +4297 -0
  8. package/benchmarks/package.json +39 -0
  9. package/benchmarks/results/4542435/2025-08-05/lms/lms-openai-gpt-oss-20b.json +2814 -0
  10. package/benchmarks/results/4542435/2025-08-05/lms/lms-qwen-qwen3-30b-a3b-2507.json +2014 -0
  11. package/benchmarks/results/4fb9125/2025-08-07/anthropic/anthropic-claude-sonnet-4-20250514.json +3121 -0
  12. package/benchmarks/results/5766aee/2025-08-02/lms-qwen/qwen3-coder-30b.json +98 -0
  13. package/benchmarks/results/6d73808/2025-08-07/openai/openai-gpt-5.json +3256 -0
  14. package/benchmarks/results/77bf0a6/2025-08-02/lms-qwen/qwen3-30b-a3b-2507.json +4298 -0
  15. package/benchmarks/results/8c0d445/2025-08-03/anthropic/anthropic-claude-sonnet-4-20250514.json +3031 -0
  16. package/benchmarks/results/8c0d445/2025-08-03/openai/openai-gpt-4.1-2025-04-14.json +2990 -0
  17. package/benchmarks/results/ac6b2ab/2025-08-03/anthropic/anthropic-claude-sonnet-4-20250514.json +3256 -0
  18. package/benchmarks/results/ac6b2ab/2025-08-03/lms/lms-qwen-qwen3-coder-30b.json +3007 -0
  19. package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-2025-04-14.json +3256 -0
  20. package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-mini-2025-04-14.json +3036 -0
  21. package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-nano-2025-04-14.json +3280 -0
  22. package/benchmarks/results/adff675/2025-08-04/lms/lms-qwen-qwen3-30b-a3b-2507.json +1920 -0
  23. package/benchmarks/results/adff675/2025-08-04/lms/lms-qwen-qwen3-coder-30b.json +3281 -0
  24. package/benchmarks/results/b502ed9/2025-08-03/lms-qwen/qwen3-coder-30b.json +2896 -0
  25. package/benchmarks/results/d1a8129/2025-08-03/lms/lms-qwen-qwen3-coder-30b.json +3011 -0
  26. package/benchmarks/results/e60471c/2025-08-03/lms/qwen3-30b-a3b-2507.json +3003 -0
  27. package/benchmarks/scripts/build-and-run.sh +47 -0
  28. package/benchmarks/scripts/clone-exercism.sh +92 -0
  29. package/benchmarks/scripts/validate.sh +48 -0
  30. package/benchmarks/src/__tests__/runner.test.ts +27 -0
  31. package/benchmarks/src/cli.ts +90 -0
  32. package/benchmarks/src/evaluators/EvaluatorRegistry.ts +64 -0
  33. package/benchmarks/src/evaluators/JavaScriptEvaluator.ts +183 -0
  34. package/benchmarks/src/evaluators/index.ts +3 -0
  35. package/benchmarks/src/evaluators/types.ts +22 -0
  36. package/benchmarks/src/index.ts +3 -0
  37. package/benchmarks/src/providers.ts +13 -0
  38. package/benchmarks/src/runner.ts +824 -0
  39. package/benchmarks/src/types.ts +63 -0
  40. package/benchmarks/tsconfig.json +19 -0
  41. package/jest.config.js +2 -1
  42. package/leaderboard/README.md +148 -0
  43. package/leaderboard/app/api/benchmark-data/route.ts +131 -0
  44. package/leaderboard/app/api/benchmark-detail/route.ts +172 -0
  45. package/leaderboard/app/details/[model]/[provider]/[language]/page.tsx +501 -0
  46. package/leaderboard/app/exercise/[model]/[provider]/[language]/[exercise]/page.tsx +375 -0
  47. package/leaderboard/app/globals.css +27 -0
  48. package/leaderboard/app/layout.tsx +21 -0
  49. package/leaderboard/app/page.tsx +170 -0
  50. package/leaderboard/components/LeaderboardTable.tsx +168 -0
  51. package/leaderboard/components/PerformanceChart.tsx +109 -0
  52. package/leaderboard/next-env.d.ts +5 -0
  53. package/leaderboard/next.config.js +4 -0
  54. package/leaderboard/package-lock.json +6363 -0
  55. package/leaderboard/package.json +28 -0
  56. package/leaderboard/postcss.config.js +6 -0
  57. package/leaderboard/tailwind.config.js +17 -0
  58. package/leaderboard/tsconfig.json +28 -0
  59. package/leaderboard/types/benchmark.ts +67 -0
  60. package/leaderboard/utils/dataProcessor.ts +33 -0
  61. package/package.json +2 -1
  62. package/src/agents/base/base.ts +182 -24
  63. package/src/agents/base/prompt.ts +28 -0
  64. package/src/agents/index.ts +3 -0
  65. package/src/agents/patcher/patcher.ts +6 -4
  66. package/src/agents/setup/setup.ts +56 -0
  67. package/src/agents/tools/agentCall.ts +6 -2
  68. package/src/agents/tools/aiClient.ts +74 -8
  69. package/src/agents/tools/execCommand.ts +13 -14
  70. package/src/agents/tools/executeScript/README.md +16 -0
  71. package/src/agents/tools/index.ts +2 -0
  72. package/src/agents/tools/list.ts +73 -16
  73. package/src/agents/tools/startAgentTask.ts +109 -0
  74. package/src/agents/tools/textSearch.ts +1 -1
  75. package/src/agents/tools/visionTool.ts +31 -2
  76. package/src/agents/tools/ycmd/client.ts +608 -0
  77. package/src/agents/tools/ycmd/definitions.ts +294 -0
  78. package/src/agents/tools/ycmd/detection.ts +211 -0
  79. package/src/agents/tools/ycmd/index.ts +11 -0
  80. package/src/agents/tools/ycmd/installer.ts +251 -0
  81. package/src/agents/tools/ycmd/server.ts +535 -0
  82. package/src/agents/tools/ycmd/serverManager.ts +316 -0
  83. package/src/agents/tools/ycmd/tools/completion.ts +113 -0
  84. package/src/agents/tools/ycmd/tools/diagnostics.ts +155 -0
  85. package/src/agents/tools/ycmd/tools/getLocations.ts +173 -0
  86. package/src/agents/tools/ycmd/tools/goto.ts +169 -0
  87. package/src/agents/tools/ycmd/tools/refactor.ts +204 -0
  88. package/src/agents/tools/ycmd/tools/signature.ts +174 -0
  89. package/src/agents/tools/ycmd/tools/start.ts +95 -0
  90. package/src/agents/tools/ycmd/utils/pathUtils.ts +59 -0
  91. package/src/ai.ts +15 -0
  92. package/src/chat/CliChatService.ts +277 -0
  93. package/src/chat/modules/AgentModule.ts +985 -0
  94. package/src/chat/modules/AskModule.ts +98 -0
  95. package/src/chat/modules/BaseChatModule.ts +66 -0
  96. package/src/chat/modules/InternalChatModule.ts +174 -0
  97. package/src/chat/modules/SearchModule.ts +166 -0
  98. package/src/chat/modules/SetupModule.ts +185 -0
  99. package/src/chat/modules/SystemModule.ts +120 -0
  100. package/src/chat/modules/VoiceModule.ts +70 -0
  101. package/src/chat/modules/index.js +5 -0
  102. package/src/chat/types.ts +97 -0
  103. package/src/chat.ts +9 -1
  104. package/src/chat2.ts +62 -0
  105. package/src/cli.ts +264 -35
  106. package/src/clients/anthropic.ts +14 -7
  107. package/src/clients/gemini.ts +15 -7
  108. package/src/clients/http.ts +17 -7
  109. package/src/clients/index.ts +117 -4
  110. package/src/clients/knowhow.ts +7 -2
  111. package/src/clients/knowhowMcp.ts +118 -0
  112. package/src/clients/openai.ts +32 -8
  113. package/src/clients/types.ts +1 -0
  114. package/src/clients/xai.ts +17 -5
  115. package/src/config.ts +30 -5
  116. package/src/conversion.ts +4 -1
  117. package/src/login.ts +26 -9
  118. package/src/microphone.ts +0 -1
  119. package/src/plugins/downloader/downloader.ts +191 -49
  120. package/src/plugins/downloader/plugin.ts +3 -1
  121. package/src/plugins/plugins.ts +3 -0
  122. package/src/processors/CustomVariables.ts +425 -0
  123. package/src/processors/HarmonyToolProcessor.ts +264 -0
  124. package/src/processors/XmlToolCallProcessor.ts +533 -0
  125. package/src/processors/index.ts +3 -0
  126. package/src/prompts/KnowhowConfigExamples.ts +376 -0
  127. package/src/services/KnowhowClient.ts +49 -3
  128. package/src/services/Mcp.ts +42 -3
  129. package/src/services/McpServer.ts +14 -4
  130. package/src/services/McpWebsocketTransport.ts +21 -7
  131. package/src/services/MessageProcessor.ts +10 -5
  132. package/src/services/index.ts +5 -0
  133. package/src/services/script-execution/ScriptExecutor.ts +34 -1
  134. package/src/services/types.ts +17 -14
  135. package/src/types.ts +17 -0
  136. package/src/utils/index.ts +138 -0
  137. package/tests/XmlToolCallProcessor.test.ts +468 -0
  138. package/tests/manual/ycmd/debug_diagnostics_test.ts +127 -0
  139. package/tests/manual/ycmd/fixtures/debug_diagnostics.ts +26 -0
  140. package/tests/manual/ycmd/fixtures/file_change_test.ts +17 -0
  141. package/tests/manual/ycmd/minimal_advanced_test.ts +108 -0
  142. package/tests/manual/ycmd/simple_diagnostics_test.ts +61 -0
  143. package/tests/manual/ycmd/simple_test.ts +74 -0
  144. package/tests/manual/ycmd/test-typescript-sample.ts +34 -0
  145. package/tests/manual/ycmd/test_advanced_features.ts +407 -0
  146. package/tests/manual/ycmd/test_advanced_with_tools.ts +320 -0
  147. package/tests/manual/ycmd/test_comprehensive_typescript.ts +179 -0
  148. package/tests/manual/ycmd/test_diagnostics_file_changes.ts +249 -0
  149. package/tests/manual/ycmd/test_diagnostics_fix.ts +99 -0
  150. package/tests/manual/ycmd/test_diagnostics_simple.ts +100 -0
  151. package/tests/manual/ycmd/test_diagnostics_timing.ts +120 -0
  152. package/tests/manual/ycmd/test_discover_commands.ts +310 -0
  153. package/tests/manual/ycmd/test_endpoints.ts +115 -0
  154. package/tests/manual/ycmd/test_final_comprehensive.ts +218 -0
  155. package/tests/manual/ycmd/test_final_validation.ts +150 -0
  156. package/tests/manual/ycmd/test_implementation.js +42 -0
  157. package/tests/manual/ycmd/test_individual_ycmd_tool.ts +39 -0
  158. package/tests/manual/ycmd/test_server_manager.ts +52 -0
  159. package/tests/manual/ycmd/test_simple_debug.ts +86 -0
  160. package/tests/manual/ycmd/test_tsserver_workflow.js +83 -0
  161. package/tests/manual/ycmd/test_tsserver_workflow.ts +122 -0
  162. package/tests/manual/ycmd/test_typescript_simple.ts +48 -0
  163. package/tests/manual/ycmd/test_typescript_ycmd.ts +105 -0
  164. package/tests/manual/ycmd/test_workspace_config.ts +90 -0
  165. package/tests/manual/ycmd/test_ycmd_auto_start.ts +137 -0
  166. package/tests/manual/ycmd/test_ycmd_comprehensive.ts +73 -0
  167. package/tests/manual/ycmd/test_ycmd_connection.py +10 -0
  168. package/tests/manual/ycmd/test_ycmd_direct.ts +142 -0
  169. package/tests/manual/ycmd/test_ycmd_experiment.ts +48 -0
  170. package/tests/manual/ycmd/test_ycmd_final.ts +200 -0
  171. package/tests/manual/ycmd/test_ycmd_fixed.py +18 -0
  172. package/tests/manual/ycmd/test_ycmd_integration.ts +112 -0
  173. package/tests/manual/ycmd/test_ycmd_simple.ts +45 -0
  174. package/tests/manual/ycmd/test_ycmd_usage.py +27 -0
  175. package/tests/manual/ycmd/working_simple_test.ts +134 -0
  176. package/ts_build/src/agents/base/base.d.ts +15 -1
  177. package/ts_build/src/agents/base/base.js +121 -20
  178. package/ts_build/src/agents/base/base.js.map +1 -1
  179. package/ts_build/src/agents/base/prompt.d.ts +1 -1
  180. package/ts_build/src/agents/base/prompt.js +28 -0
  181. package/ts_build/src/agents/base/prompt.js.map +1 -1
  182. package/ts_build/src/agents/index.d.ts +2 -0
  183. package/ts_build/src/agents/index.js +2 -0
  184. package/ts_build/src/agents/index.js.map +1 -1
  185. package/ts_build/src/agents/patcher/patcher.js +6 -3
  186. package/ts_build/src/agents/patcher/patcher.js.map +1 -1
  187. package/ts_build/src/agents/setup/setup.d.ts +8 -0
  188. package/ts_build/src/agents/setup/setup.js +59 -0
  189. package/ts_build/src/agents/setup/setup.js.map +1 -0
  190. package/ts_build/src/agents/tools/agentCall.js +5 -2
  191. package/ts_build/src/agents/tools/agentCall.js.map +1 -1
  192. package/ts_build/src/agents/tools/aiClient.d.ts +6 -5
  193. package/ts_build/src/agents/tools/aiClient.js +37 -6
  194. package/ts_build/src/agents/tools/aiClient.js.map +1 -1
  195. package/ts_build/src/agents/tools/execCommand.d.ts +2 -2
  196. package/ts_build/src/agents/tools/execCommand.js +5 -6
  197. package/ts_build/src/agents/tools/execCommand.js.map +1 -1
  198. package/ts_build/src/agents/tools/executeScript/index.d.ts +1 -1
  199. package/ts_build/src/agents/tools/index.d.ts +2 -0
  200. package/ts_build/src/agents/tools/index.js +2 -0
  201. package/ts_build/src/agents/tools/index.js.map +1 -1
  202. package/ts_build/src/agents/tools/list.js +66 -16
  203. package/ts_build/src/agents/tools/list.js.map +1 -1
  204. package/ts_build/src/agents/tools/startAgentTask.d.ts +13 -0
  205. package/ts_build/src/agents/tools/startAgentTask.js +74 -0
  206. package/ts_build/src/agents/tools/startAgentTask.js.map +1 -0
  207. package/ts_build/src/agents/tools/startChatTask.d.ts +13 -0
  208. package/ts_build/src/agents/tools/startChatTask.js +73 -0
  209. package/ts_build/src/agents/tools/startChatTask.js.map +1 -0
  210. package/ts_build/src/agents/tools/textSearch.js +1 -1
  211. package/ts_build/src/agents/tools/textSearch.js.map +1 -1
  212. package/ts_build/src/agents/tools/visionTool.d.ts +1 -1
  213. package/ts_build/src/agents/tools/visionTool.js +23 -3
  214. package/ts_build/src/agents/tools/visionTool.js.map +1 -1
  215. package/ts_build/src/agents/tools/ycmd/client.d.ts +93 -0
  216. package/ts_build/src/agents/tools/ycmd/client.js +355 -0
  217. package/ts_build/src/agents/tools/ycmd/client.js.map +1 -0
  218. package/ts_build/src/agents/tools/ycmd/definitions.d.ts +345 -0
  219. package/ts_build/src/agents/tools/ycmd/definitions.js +298 -0
  220. package/ts_build/src/agents/tools/ycmd/definitions.js.map +1 -0
  221. package/ts_build/src/agents/tools/ycmd/detection.d.ts +11 -0
  222. package/ts_build/src/agents/tools/ycmd/detection.js +175 -0
  223. package/ts_build/src/agents/tools/ycmd/detection.js.map +1 -0
  224. package/ts_build/src/agents/tools/ycmd/index.d.ts +8 -0
  225. package/ts_build/src/agents/tools/ycmd/index.js +20 -0
  226. package/ts_build/src/agents/tools/ycmd/index.js.map +1 -0
  227. package/ts_build/src/agents/tools/ycmd/installer.d.ts +19 -0
  228. package/ts_build/src/agents/tools/ycmd/installer.js +196 -0
  229. package/ts_build/src/agents/tools/ycmd/installer.js.map +1 -0
  230. package/ts_build/src/agents/tools/ycmd/server.d.ts +35 -0
  231. package/ts_build/src/agents/tools/ycmd/server.js +363 -0
  232. package/ts_build/src/agents/tools/ycmd/server.js.map +1 -0
  233. package/ts_build/src/agents/tools/ycmd/serverManager.d.ts +39 -0
  234. package/ts_build/src/agents/tools/ycmd/serverManager.js +210 -0
  235. package/ts_build/src/agents/tools/ycmd/serverManager.js.map +1 -0
  236. package/ts_build/src/agents/tools/ycmd/tools/completion.d.ts +22 -0
  237. package/ts_build/src/agents/tools/ycmd/tools/completion.js +72 -0
  238. package/ts_build/src/agents/tools/ycmd/tools/completion.js.map +1 -0
  239. package/ts_build/src/agents/tools/ycmd/tools/diagnostics.d.ts +42 -0
  240. package/ts_build/src/agents/tools/ycmd/tools/diagnostics.js +88 -0
  241. package/ts_build/src/agents/tools/ycmd/tools/diagnostics.js.map +1 -0
  242. package/ts_build/src/agents/tools/ycmd/tools/getLocations.d.ts +22 -0
  243. package/ts_build/src/agents/tools/ycmd/tools/getLocations.js +142 -0
  244. package/ts_build/src/agents/tools/ycmd/tools/getLocations.js.map +1 -0
  245. package/ts_build/src/agents/tools/ycmd/tools/goto.d.ts +20 -0
  246. package/ts_build/src/agents/tools/ycmd/tools/goto.js +101 -0
  247. package/ts_build/src/agents/tools/ycmd/tools/goto.js.map +1 -0
  248. package/ts_build/src/agents/tools/ycmd/tools/refactor.d.ts +32 -0
  249. package/ts_build/src/agents/tools/ycmd/tools/refactor.js +123 -0
  250. package/ts_build/src/agents/tools/ycmd/tools/refactor.js.map +1 -0
  251. package/ts_build/src/agents/tools/ycmd/tools/signature.d.ts +25 -0
  252. package/ts_build/src/agents/tools/ycmd/tools/signature.js +110 -0
  253. package/ts_build/src/agents/tools/ycmd/tools/signature.js.map +1 -0
  254. package/ts_build/src/agents/tools/ycmd/tools/start.d.ts +17 -0
  255. package/ts_build/src/agents/tools/ycmd/tools/start.js +65 -0
  256. package/ts_build/src/agents/tools/ycmd/tools/start.js.map +1 -0
  257. package/ts_build/src/agents/tools/ycmd/utils/pathUtils.d.ts +4 -0
  258. package/ts_build/src/agents/tools/ycmd/utils/pathUtils.js +67 -0
  259. package/ts_build/src/agents/tools/ycmd/utils/pathUtils.js.map +1 -0
  260. package/ts_build/src/ai.d.ts +1 -0
  261. package/ts_build/src/ai.js +40 -1
  262. package/ts_build/src/ai.js.map +1 -1
  263. package/ts_build/src/chat/ChatCommandHandler.d.ts +36 -0
  264. package/ts_build/src/chat/ChatCommandHandler.js +268 -0
  265. package/ts_build/src/chat/ChatCommandHandler.js.map +1 -0
  266. package/ts_build/src/chat/ChatInputManager.d.ts +22 -0
  267. package/ts_build/src/chat/ChatInputManager.js +85 -0
  268. package/ts_build/src/chat/ChatInputManager.js.map +1 -0
  269. package/ts_build/src/chat/ChatManager.d.ts +49 -0
  270. package/ts_build/src/chat/ChatManager.js +271 -0
  271. package/ts_build/src/chat/ChatManager.js.map +1 -0
  272. package/ts_build/src/chat/ChatSession.d.ts +32 -0
  273. package/ts_build/src/chat/ChatSession.js +3 -0
  274. package/ts_build/src/chat/ChatSession.js.map +1 -0
  275. package/ts_build/src/chat/ChatSessionManager.d.ts +19 -0
  276. package/ts_build/src/chat/ChatSessionManager.js +188 -0
  277. package/ts_build/src/chat/ChatSessionManager.js.map +1 -0
  278. package/ts_build/src/chat/ChatStateManager.d.ts +58 -0
  279. package/ts_build/src/chat/ChatStateManager.js +156 -0
  280. package/ts_build/src/chat/ChatStateManager.js.map +1 -0
  281. package/ts_build/src/chat/CliChatService.d.ts +35 -0
  282. package/ts_build/src/chat/CliChatService.js +201 -0
  283. package/ts_build/src/chat/CliChatService.js.map +1 -0
  284. package/ts_build/src/chat/InterruptibleInput.d.ts +20 -0
  285. package/ts_build/src/chat/InterruptibleInput.js +109 -0
  286. package/ts_build/src/chat/InterruptibleInput.js.map +1 -0
  287. package/ts_build/src/chat/interfaces/ChatModule.d.ts +6 -0
  288. package/ts_build/src/chat/interfaces/ChatModule.js +3 -0
  289. package/ts_build/src/chat/interfaces/ChatModule.js.map +1 -0
  290. package/ts_build/src/chat/modules/AgentModule.d.ts +57 -0
  291. package/ts_build/src/chat/modules/AgentModule.js +709 -0
  292. package/ts_build/src/chat/modules/AgentModule.js.map +1 -0
  293. package/ts_build/src/chat/modules/AskModule.d.ts +10 -0
  294. package/ts_build/src/chat/modules/AskModule.js +63 -0
  295. package/ts_build/src/chat/modules/AskModule.js.map +1 -0
  296. package/ts_build/src/chat/modules/BaseChatModule.d.ts +14 -0
  297. package/ts_build/src/chat/modules/BaseChatModule.js +32 -0
  298. package/ts_build/src/chat/modules/BaseChatModule.js.map +1 -0
  299. package/ts_build/src/chat/modules/InternalChatModule.d.ts +24 -0
  300. package/ts_build/src/chat/modules/InternalChatModule.js +127 -0
  301. package/ts_build/src/chat/modules/InternalChatModule.js.map +1 -0
  302. package/ts_build/src/chat/modules/SearchModule.d.ts +12 -0
  303. package/ts_build/src/chat/modules/SearchModule.js +119 -0
  304. package/ts_build/src/chat/modules/SearchModule.js.map +1 -0
  305. package/ts_build/src/chat/modules/SetupModule.d.ts +15 -0
  306. package/ts_build/src/chat/modules/SetupModule.js +147 -0
  307. package/ts_build/src/chat/modules/SetupModule.js.map +1 -0
  308. package/ts_build/src/chat/modules/SystemModule.d.ts +14 -0
  309. package/ts_build/src/chat/modules/SystemModule.js +90 -0
  310. package/ts_build/src/chat/modules/SystemModule.js.map +1 -0
  311. package/ts_build/src/chat/modules/VoiceModule.d.ts +11 -0
  312. package/ts_build/src/chat/modules/VoiceModule.js +57 -0
  313. package/ts_build/src/chat/modules/VoiceModule.js.map +1 -0
  314. package/ts_build/src/chat/types.d.ts +83 -0
  315. package/ts_build/src/chat/types.js +3 -0
  316. package/ts_build/src/chat/types.js.map +1 -0
  317. package/ts_build/src/chat.js +7 -1
  318. package/ts_build/src/chat.js.map +1 -1
  319. package/ts_build/src/chat2.d.ts +3 -0
  320. package/ts_build/src/chat2.js +47 -0
  321. package/ts_build/src/chat2.js.map +1 -0
  322. package/ts_build/src/cli.js +218 -37
  323. package/ts_build/src/cli.js.map +1 -1
  324. package/ts_build/src/clients/anthropic.d.ts +5 -2
  325. package/ts_build/src/clients/anthropic.js +12 -7
  326. package/ts_build/src/clients/anthropic.js.map +1 -1
  327. package/ts_build/src/clients/gemini.d.ts +6 -3
  328. package/ts_build/src/clients/gemini.js +13 -7
  329. package/ts_build/src/clients/gemini.js.map +1 -1
  330. package/ts_build/src/clients/http.d.ts +1 -0
  331. package/ts_build/src/clients/http.js +12 -5
  332. package/ts_build/src/clients/http.js.map +1 -1
  333. package/ts_build/src/clients/index.d.ts +10 -0
  334. package/ts_build/src/clients/index.js +74 -4
  335. package/ts_build/src/clients/index.js.map +1 -1
  336. package/ts_build/src/clients/knowhow.d.ts +3 -1
  337. package/ts_build/src/clients/knowhow.js +8 -2
  338. package/ts_build/src/clients/knowhow.js.map +1 -1
  339. package/ts_build/src/clients/knowhowMcp.d.ts +20 -0
  340. package/ts_build/src/clients/knowhowMcp.js +86 -0
  341. package/ts_build/src/clients/knowhowMcp.js.map +1 -0
  342. package/ts_build/src/clients/openai.d.ts +5 -2
  343. package/ts_build/src/clients/openai.js +29 -8
  344. package/ts_build/src/clients/openai.js.map +1 -1
  345. package/ts_build/src/clients/types.d.ts +1 -0
  346. package/ts_build/src/clients/xai.d.ts +5 -2
  347. package/ts_build/src/clients/xai.js +15 -5
  348. package/ts_build/src/clients/xai.js.map +1 -1
  349. package/ts_build/src/config.js +24 -3
  350. package/ts_build/src/config.js.map +1 -1
  351. package/ts_build/src/conversion.js +6 -4
  352. package/ts_build/src/conversion.js.map +1 -1
  353. package/ts_build/src/login.d.ts +1 -1
  354. package/ts_build/src/login.js +21 -7
  355. package/ts_build/src/login.js.map +1 -1
  356. package/ts_build/src/microphone.js.map +1 -1
  357. package/ts_build/src/plugins/downloader/downloader.d.ts +7 -5
  358. package/ts_build/src/plugins/downloader/downloader.js +147 -44
  359. package/ts_build/src/plugins/downloader/downloader.js.map +1 -1
  360. package/ts_build/src/plugins/downloader/plugin.js +5 -3
  361. package/ts_build/src/plugins/downloader/plugin.js.map +1 -1
  362. package/ts_build/src/plugins/plugins.js +3 -0
  363. package/ts_build/src/plugins/plugins.js.map +1 -1
  364. package/ts_build/src/processors/CustomVariables.d.ts +32 -0
  365. package/ts_build/src/processors/CustomVariables.js +297 -0
  366. package/ts_build/src/processors/CustomVariables.js.map +1 -0
  367. package/ts_build/src/processors/HarmonyToolProcessor.d.ts +15 -0
  368. package/ts_build/src/processors/HarmonyToolProcessor.js +154 -0
  369. package/ts_build/src/processors/HarmonyToolProcessor.js.map +1 -0
  370. package/ts_build/src/processors/XmlToolCallProcessor.d.ts +14 -0
  371. package/ts_build/src/processors/XmlToolCallProcessor.js +357 -0
  372. package/ts_build/src/processors/XmlToolCallProcessor.js.map +1 -0
  373. package/ts_build/src/processors/index.d.ts +3 -0
  374. package/ts_build/src/processors/index.js +7 -1
  375. package/ts_build/src/processors/index.js.map +1 -1
  376. package/ts_build/src/prompts/KnowhowConfigExamples.d.ts +2 -0
  377. package/ts_build/src/prompts/KnowhowConfigExamples.js +379 -0
  378. package/ts_build/src/prompts/KnowhowConfigExamples.js.map +1 -0
  379. package/ts_build/src/services/KnowhowClient.d.ts +22 -0
  380. package/ts_build/src/services/KnowhowClient.js +14 -2
  381. package/ts_build/src/services/KnowhowClient.js.map +1 -1
  382. package/ts_build/src/services/Mcp.d.ts +1 -0
  383. package/ts_build/src/services/Mcp.js +20 -3
  384. package/ts_build/src/services/Mcp.js.map +1 -1
  385. package/ts_build/src/services/McpServer.d.ts +1 -1
  386. package/ts_build/src/services/McpServer.js +8 -4
  387. package/ts_build/src/services/McpServer.js.map +1 -1
  388. package/ts_build/src/services/McpWebsocketTransport.js +17 -7
  389. package/ts_build/src/services/McpWebsocketTransport.js.map +1 -1
  390. package/ts_build/src/services/MessageProcessor.d.ts +1 -1
  391. package/ts_build/src/services/MessageProcessor.js +4 -4
  392. package/ts_build/src/services/MessageProcessor.js.map +1 -1
  393. package/ts_build/src/services/index.d.ts +2 -0
  394. package/ts_build/src/services/index.js +4 -0
  395. package/ts_build/src/services/index.js.map +1 -1
  396. package/ts_build/src/services/script-execution/ScriptExecutor.d.ts +1 -0
  397. package/ts_build/src/services/script-execution/ScriptExecutor.js +23 -0
  398. package/ts_build/src/services/script-execution/ScriptExecutor.js.map +1 -1
  399. package/ts_build/src/services/types.d.ts +2 -6
  400. package/ts_build/src/services/types.js +4 -4
  401. package/ts_build/src/services/types.js.map +1 -1
  402. package/ts_build/src/types.d.ts +11 -0
  403. package/ts_build/src/types.js +8 -0
  404. package/ts_build/src/types.js.map +1 -1
  405. package/ts_build/src/utils/index.d.ts +2 -0
  406. package/ts_build/src/utils/index.js +102 -1
  407. package/ts_build/src/utils/index.js.map +1 -1
  408. package/ts_build/tests/XmlToolCallProcessor.test.d.ts +1 -0
  409. package/ts_build/tests/XmlToolCallProcessor.test.js +376 -0
  410. package/ts_build/tests/XmlToolCallProcessor.test.js.map +1 -0
  411. package/ts_build/tests/manual/ycmd/debug_diagnostics_test.d.ts +1 -0
  412. package/ts_build/tests/manual/ycmd/debug_diagnostics_test.js +114 -0
  413. package/ts_build/tests/manual/ycmd/debug_diagnostics_test.js.map +1 -0
  414. package/ts_build/tests/manual/ycmd/minimal_advanced_test.d.ts +2 -0
  415. package/ts_build/tests/manual/ycmd/minimal_advanced_test.js +104 -0
  416. package/ts_build/tests/manual/ycmd/minimal_advanced_test.js.map +1 -0
  417. package/ts_build/tests/manual/ycmd/simple_diagnostics_test.d.ts +1 -0
  418. package/ts_build/tests/manual/ycmd/simple_diagnostics_test.js +74 -0
  419. package/ts_build/tests/manual/ycmd/simple_diagnostics_test.js.map +1 -0
  420. package/ts_build/tests/manual/ycmd/simple_test.d.ts +2 -0
  421. package/ts_build/tests/manual/ycmd/simple_test.js +82 -0
  422. package/ts_build/tests/manual/ycmd/simple_test.js.map +1 -0
  423. package/ts_build/tests/manual/ycmd/test-typescript-sample.d.ts +14 -0
  424. package/ts_build/tests/manual/ycmd/test-typescript-sample.js +20 -0
  425. package/ts_build/tests/manual/ycmd/test-typescript-sample.js.map +1 -0
  426. package/ts_build/tests/manual/ycmd/test_advanced_features.d.ts +2 -0
  427. package/ts_build/tests/manual/ycmd/test_advanced_features.js +297 -0
  428. package/ts_build/tests/manual/ycmd/test_advanced_features.js.map +1 -0
  429. package/ts_build/tests/manual/ycmd/test_advanced_with_tools.d.ts +3 -0
  430. package/ts_build/tests/manual/ycmd/test_advanced_with_tools.js +262 -0
  431. package/ts_build/tests/manual/ycmd/test_advanced_with_tools.js.map +1 -0
  432. package/ts_build/tests/manual/ycmd/test_comprehensive_typescript.d.ts +2 -0
  433. package/ts_build/tests/manual/ycmd/test_comprehensive_typescript.js +186 -0
  434. package/ts_build/tests/manual/ycmd/test_comprehensive_typescript.js.map +1 -0
  435. package/ts_build/tests/manual/ycmd/test_diagnostics_file_changes.d.ts +1 -0
  436. package/ts_build/tests/manual/ycmd/test_diagnostics_file_changes.js +174 -0
  437. package/ts_build/tests/manual/ycmd/test_diagnostics_file_changes.js.map +1 -0
  438. package/ts_build/tests/manual/ycmd/test_diagnostics_fix.d.ts +2 -0
  439. package/ts_build/tests/manual/ycmd/test_diagnostics_fix.js +106 -0
  440. package/ts_build/tests/manual/ycmd/test_diagnostics_fix.js.map +1 -0
  441. package/ts_build/tests/manual/ycmd/test_diagnostics_simple.d.ts +1 -0
  442. package/ts_build/tests/manual/ycmd/test_diagnostics_simple.js +104 -0
  443. package/ts_build/tests/manual/ycmd/test_diagnostics_simple.js.map +1 -0
  444. package/ts_build/tests/manual/ycmd/test_diagnostics_timing.d.ts +1 -0
  445. package/ts_build/tests/manual/ycmd/test_diagnostics_timing.js +119 -0
  446. package/ts_build/tests/manual/ycmd/test_diagnostics_timing.js.map +1 -0
  447. package/ts_build/tests/manual/ycmd/test_discover_commands.d.ts +2 -0
  448. package/ts_build/tests/manual/ycmd/test_discover_commands.js +243 -0
  449. package/ts_build/tests/manual/ycmd/test_discover_commands.js.map +1 -0
  450. package/ts_build/tests/manual/ycmd/test_endpoints.d.ts +2 -0
  451. package/ts_build/tests/manual/ycmd/test_endpoints.js +120 -0
  452. package/ts_build/tests/manual/ycmd/test_endpoints.js.map +1 -0
  453. package/ts_build/tests/manual/ycmd/test_final_comprehensive.d.ts +2 -0
  454. package/ts_build/tests/manual/ycmd/test_final_comprehensive.js +221 -0
  455. package/ts_build/tests/manual/ycmd/test_final_comprehensive.js.map +1 -0
  456. package/ts_build/tests/manual/ycmd/test_final_validation.d.ts +2 -0
  457. package/ts_build/tests/manual/ycmd/test_final_validation.js +160 -0
  458. package/ts_build/tests/manual/ycmd/test_final_validation.js.map +1 -0
  459. package/ts_build/tests/manual/ycmd/test_individual_ycmd_tool.d.ts +2 -0
  460. package/ts_build/tests/manual/ycmd/test_individual_ycmd_tool.js +37 -0
  461. package/ts_build/tests/manual/ycmd/test_individual_ycmd_tool.js.map +1 -0
  462. package/ts_build/tests/manual/ycmd/test_server_manager.d.ts +1 -0
  463. package/ts_build/tests/manual/ycmd/test_server_manager.js +38 -0
  464. package/ts_build/tests/manual/ycmd/test_server_manager.js.map +1 -0
  465. package/ts_build/tests/manual/ycmd/test_simple_debug.d.ts +2 -0
  466. package/ts_build/tests/manual/ycmd/test_simple_debug.js +99 -0
  467. package/ts_build/tests/manual/ycmd/test_simple_debug.js.map +1 -0
  468. package/ts_build/tests/manual/ycmd/test_tsserver_workflow.d.ts +1 -0
  469. package/ts_build/tests/manual/ycmd/test_tsserver_workflow.js +128 -0
  470. package/ts_build/tests/manual/ycmd/test_tsserver_workflow.js.map +1 -0
  471. package/ts_build/tests/manual/ycmd/test_typescript_simple.d.ts +1 -0
  472. package/ts_build/tests/manual/ycmd/test_typescript_simple.js +66 -0
  473. package/ts_build/tests/manual/ycmd/test_typescript_simple.js.map +1 -0
  474. package/ts_build/tests/manual/ycmd/test_typescript_ycmd.d.ts +1 -0
  475. package/ts_build/tests/manual/ycmd/test_typescript_ycmd.js +105 -0
  476. package/ts_build/tests/manual/ycmd/test_typescript_ycmd.js.map +1 -0
  477. package/ts_build/tests/manual/ycmd/test_workspace_config.d.ts +1 -0
  478. package/ts_build/tests/manual/ycmd/test_workspace_config.js +89 -0
  479. package/ts_build/tests/manual/ycmd/test_workspace_config.js.map +1 -0
  480. package/ts_build/tests/manual/ycmd/test_ycmd_auto_start.d.ts +2 -0
  481. package/ts_build/tests/manual/ycmd/test_ycmd_auto_start.js +130 -0
  482. package/ts_build/tests/manual/ycmd/test_ycmd_auto_start.js.map +1 -0
  483. package/ts_build/tests/manual/ycmd/test_ycmd_comprehensive.d.ts +1 -0
  484. package/ts_build/tests/manual/ycmd/test_ycmd_comprehensive.js +83 -0
  485. package/ts_build/tests/manual/ycmd/test_ycmd_comprehensive.js.map +1 -0
  486. package/ts_build/tests/manual/ycmd/test_ycmd_direct.d.ts +2 -0
  487. package/ts_build/tests/manual/ycmd/test_ycmd_direct.js +149 -0
  488. package/ts_build/tests/manual/ycmd/test_ycmd_direct.js.map +1 -0
  489. package/ts_build/tests/manual/ycmd/test_ycmd_experiment.d.ts +15 -0
  490. package/ts_build/tests/manual/ycmd/test_ycmd_experiment.js +58 -0
  491. package/ts_build/tests/manual/ycmd/test_ycmd_experiment.js.map +1 -0
  492. package/ts_build/tests/manual/ycmd/test_ycmd_final.d.ts +2 -0
  493. package/ts_build/tests/manual/ycmd/test_ycmd_final.js +195 -0
  494. package/ts_build/tests/manual/ycmd/test_ycmd_final.js.map +1 -0
  495. package/ts_build/tests/manual/ycmd/test_ycmd_integration.d.ts +3 -0
  496. package/ts_build/tests/manual/ycmd/test_ycmd_integration.js +110 -0
  497. package/ts_build/tests/manual/ycmd/test_ycmd_integration.js.map +1 -0
  498. package/ts_build/tests/manual/ycmd/test_ycmd_simple.d.ts +2 -0
  499. package/ts_build/tests/manual/ycmd/test_ycmd_simple.js +36 -0
  500. package/ts_build/tests/manual/ycmd/test_ycmd_simple.js.map +1 -0
  501. package/ts_build/tests/manual/ycmd/working_simple_test.d.ts +2 -0
  502. package/ts_build/tests/manual/ycmd/working_simple_test.js +134 -0
  503. package/ts_build/tests/manual/ycmd/working_simple_test.js.map +1 -0
  504. package/tsconfig.json +3 -1
@@ -0,0 +1,501 @@
1
+ "use client";
2
+
3
+ import { useState, useEffect } from "react";
4
+ import { useParams, useRouter, useSearchParams } from "next/navigation";
5
+ import { BenchmarkResults, ExerciseResult } from "@/types/benchmark";
6
+ import {
7
+ formatCurrency,
8
+ formatTime,
9
+ formatPercentage,
10
+ } from "@/utils/dataProcessor";
11
+
12
+ interface HistoricalRun {
13
+ endTime: string;
14
+ successRate: number;
15
+ totalExercises: number;
16
+ totalCost: number;
17
+ averageTime: number;
18
+ averageTurns: number;
19
+ commitHash: string;
20
+ averageCost: number;
21
+ }
22
+
23
+ interface DetailResponse {
24
+ latest: BenchmarkResults;
25
+ history: HistoricalRun[];
26
+ totalRuns: number;
27
+ }
28
+
29
+ export default function ModelDetailPage() {
30
+ const params = useParams();
31
+ const router = useRouter();
32
+ const searchParams = useSearchParams();
33
+ const [detailData, setDetailData] = useState<DetailResponse | null>(
34
+ null
35
+ );
36
+ const [loading, setLoading] = useState(true);
37
+ const [error, setError] = useState<string | null>(null);
38
+
39
+ const model = decodeURIComponent(params.model as string);
40
+ const provider = decodeURIComponent(params.provider as string);
41
+ const language = decodeURIComponent(params.language as string);
42
+ const timestamp = searchParams.get('timestamp');
43
+
44
+ useEffect(() => {
45
+ async function fetchDetailData() {
46
+ try {
47
+ const response = await fetch(
48
+ `/api/benchmark-detail?model=${encodeURIComponent(
49
+ model
50
+ )}&provider=${encodeURIComponent(
51
+ provider
52
+ )}&language=${encodeURIComponent(language)}${
53
+ timestamp ? `&timestamp=${timestamp}` : ''
54
+ }`
55
+ );
56
+ if (!response.ok) {
57
+ throw new Error("Failed to fetch benchmark details");
58
+ }
59
+ const data = await response.json();
60
+ setDetailData(data);
61
+ } catch (err) {
62
+ setError(err instanceof Error ? err.message : "An error occurred");
63
+ } finally {
64
+ setLoading(false);
65
+ }
66
+ }
67
+
68
+ fetchDetailData();
69
+ }, [model, provider, language, timestamp]);
70
+
71
+ const loadHistoricalRun = async (timestamp: string) => {
72
+ // Navigate to the same page but with timestamp parameter
73
+ router.push(`/details/${encodeURIComponent(model)}/${encodeURIComponent(provider)}/${encodeURIComponent(language)}?timestamp=${timestamp}`);
74
+ };
75
+
76
+ const backToLatestRun = () => {
77
+ // Navigate to the same page without timestamp parameter
78
+ router.push(`/details/${encodeURIComponent(model)}/${encodeURIComponent(provider)}/${encodeURIComponent(language)}`);
79
+ };
80
+
81
+ const getStatusBadge = (status: string) => {
82
+ const baseClasses = "px-2 py-1 text-xs font-medium rounded-full";
83
+ switch (status) {
84
+ case "success":
85
+ return `${baseClasses} bg-green-100 text-green-800`;
86
+ case "failure":
87
+ return `${baseClasses} bg-red-100 text-red-800`;
88
+ case "timeout":
89
+ return `${baseClasses} bg-yellow-100 text-yellow-800`;
90
+ case "cost_limit":
91
+ return `${baseClasses} bg-orange-100 text-orange-800`;
92
+ case "turn_limit":
93
+ return `${baseClasses} bg-purple-100 text-purple-800`;
94
+ default:
95
+ return `${baseClasses} bg-gray-100 text-gray-800`;
96
+ }
97
+ };
98
+
99
+ const getStatusIcon = (status: string) => {
100
+ switch (status) {
101
+ case "success":
102
+ return "✅";
103
+ case "failure":
104
+ return "❌";
105
+ case "timeout":
106
+ return "⏰";
107
+ case "cost_limit":
108
+ return "💰";
109
+ case "turn_limit":
110
+ return "🔄";
111
+ default:
112
+ return "❓";
113
+ }
114
+ };
115
+
116
+ if (loading) {
117
+ return (
118
+ <div className="min-h-screen bg-gray-50 flex items-center justify-center">
119
+ <div className="text-center">
120
+ <div className="animate-spin rounded-full h-32 w-32 border-b-2 border-blue-500 mx-auto"></div>
121
+ <p className="mt-4 text-gray-600">Loading benchmark details...</p>
122
+ </div>
123
+ </div>
124
+ );
125
+ }
126
+
127
+ if (error || !detailData) {
128
+ return (
129
+ <div className="min-h-screen bg-gray-50 flex items-center justify-center">
130
+ <div className="text-center">
131
+ <div className="text-red-500 text-6xl mb-4">⚠️</div>
132
+ <h3 className="text-lg font-medium text-gray-900 mb-2">
133
+ Error Loading Details
134
+ </h3>
135
+ <p className="text-gray-500 mb-4">
136
+ {error || "Benchmark data not found"}
137
+ </p>
138
+ <button
139
+ onClick={() => router.back()}
140
+ className="bg-blue-500 hover:bg-blue-600 text-white px-4 py-2 rounded-md"
141
+ >
142
+ Go Back
143
+ </button>
144
+ </div>
145
+ </div>
146
+ );
147
+ }
148
+
149
+ // Extract the latest benchmark data for display
150
+ const benchmarkData = detailData.latest;
151
+ const isHistoricalView = timestamp !== null;
152
+
153
+ return (
154
+ <div className="min-h-screen bg-gray-50">
155
+ <div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-8">
156
+ {/* Header */}
157
+ <div className="mb-8">
158
+ <button
159
+ onClick={() => router.push("/")}
160
+ className="mb-4 text-blue-600 hover:text-blue-800 flex items-center"
161
+ >
162
+ ← Back to Leaderboard
163
+ </button>
164
+ <h1 className="text-3xl font-bold text-gray-900">
165
+ Benchmark Details: {model}
166
+ </h1>
167
+ <p className="mt-2 text-gray-600">
168
+ Provider: {provider} • Language: {language}
169
+ </p>
170
+ {isHistoricalView && (
171
+ <div className="mt-4 p-3 bg-blue-50 border border-blue-200 rounded-md">
172
+ <p className="text-blue-800 text-sm">
173
+ Viewing historical run from {new Date(benchmarkData.endTime).toLocaleString()}
174
+ </p>
175
+ <button
176
+ onClick={backToLatestRun}
177
+ className="mt-2 text-blue-600 hover:text-blue-800 text-sm underline"
178
+ >
179
+ ← Back to latest run
180
+ </button>
181
+ </div>
182
+ )}
183
+ </div>
184
+
185
+ {/* Summary Cards */}
186
+ <div className="grid grid-cols-1 md:grid-cols-4 gap-6 mb-8">
187
+ <div className="bg-white p-6 rounded-lg shadow-sm border border-gray-200">
188
+ <div className="flex items-center">
189
+ <div className="flex-shrink-0">
190
+ <div className="w-8 h-8 bg-green-500 rounded-md flex items-center justify-center">
191
+ <span className="text-white font-bold">%</span>
192
+ </div>
193
+ </div>
194
+ <div className="ml-4">
195
+ <p className="text-sm font-medium text-gray-500">
196
+ Success Rate
197
+ </p>
198
+ <p className="text-2xl font-semibold text-gray-900">
199
+ {formatPercentage(benchmarkData.summary.successRate * 100)}
200
+ </p>
201
+ </div>
202
+ </div>
203
+ </div>
204
+
205
+ <div className="bg-white p-6 rounded-lg shadow-sm border border-gray-200">
206
+ <div className="flex items-center">
207
+ <div className="flex-shrink-0">
208
+ <div className="w-8 h-8 bg-blue-500 rounded-md flex items-center justify-center">
209
+ <span className="text-white font-bold">E</span>
210
+ </div>
211
+ </div>
212
+ <div className="ml-4">
213
+ <p className="text-sm font-medium text-gray-500">
214
+ Total Exercises
215
+ </p>
216
+ <p className="text-2xl font-semibold text-gray-900">
217
+ {benchmarkData.summary.totalExercises}
218
+ </p>
219
+ </div>
220
+ </div>
221
+ </div>
222
+
223
+ <div className="bg-white p-6 rounded-lg shadow-sm border border-gray-200">
224
+ <div className="flex items-center">
225
+ <div className="flex-shrink-0">
226
+ <div className="w-8 h-8 bg-yellow-500 rounded-md flex items-center justify-center">
227
+ <span className="text-white font-bold">$</span>
228
+ </div>
229
+ </div>
230
+ <div className="ml-4">
231
+ <p className="text-sm font-medium text-gray-500">Total Cost</p>
232
+ <p className="text-2xl font-semibold text-gray-900">
233
+ {formatCurrency(benchmarkData.summary.totalCost)}
234
+ </p>
235
+ </div>
236
+ </div>
237
+ </div>
238
+
239
+ <div className="bg-white p-6 rounded-lg shadow-sm border border-gray-200">
240
+ <div className="flex items-center">
241
+ <div className="flex-shrink-0">
242
+ <div className="w-8 h-8 bg-purple-500 rounded-md flex items-center justify-center">
243
+ <span className="text-white font-bold">T</span>
244
+ </div>
245
+ </div>
246
+ <div className="ml-4">
247
+ <p className="text-sm font-medium text-gray-500">Total Time</p>
248
+ <p className="text-2xl font-semibold text-gray-900">
249
+ {formatTime(benchmarkData.summary.totalTime)}
250
+ </p>
251
+ </div>
252
+ </div>
253
+ </div>
254
+ </div>
255
+
256
+ {/* Exercise Results Table */}
257
+ <div className="bg-white rounded-lg shadow-sm border border-gray-200">
258
+ <div className="px-6 py-4 border-b border-gray-200">
259
+ <h2 className="text-xl font-semibold text-gray-900">
260
+ Exercise Results
261
+ </h2>
262
+ <p className="mt-1 text-sm text-gray-500">
263
+ Detailed breakdown of each exercise performance
264
+ </p>
265
+ </div>
266
+ <div className="overflow-x-auto">
267
+ <table className="min-w-full">
268
+ <thead className="bg-gray-50">
269
+ <tr>
270
+ <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
271
+ Exercise
272
+ </th>
273
+ <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
274
+ Status
275
+ </th>
276
+ <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
277
+ Pass / Total
278
+ </th>
279
+ <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
280
+ Time
281
+ </th>
282
+ <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
283
+ Cost
284
+ </th>
285
+ <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
286
+ Turns
287
+ </th>
288
+ <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
289
+ Output
290
+ </th>
291
+ </tr>
292
+ </thead>
293
+ <tbody className="bg-white divide-y divide-gray-200">
294
+ {benchmarkData.exercises.map((exercise, index) => (
295
+ <tr key={exercise.exerciseName} className="hover:bg-gray-50">
296
+ <td className="px-6 py-4 whitespace-nowrap text-sm font-medium text-gray-900">
297
+ <button
298
+ onClick={() => router.push(`/exercise/${encodeURIComponent(model)}/${encodeURIComponent(provider)}/${encodeURIComponent(language)}/${encodeURIComponent(exercise.exerciseName)}`)}
299
+ className="text-blue-600 hover:text-blue-800 hover:underline text-left"
300
+ title="Click to view detailed exercise results"
301
+ >
302
+ {exercise.exerciseName}
303
+ </button>
304
+ </td>
305
+ <td className="px-6 py-4 whitespace-nowrap">
306
+ <div className="flex items-center">
307
+ <span className="mr-2">
308
+ {getStatusIcon(
309
+ exercise.testResult?.success ? "success" : "failure"
310
+ )}
311
+ </span>
312
+ <span
313
+ className={getStatusBadge(
314
+ exercise.testResult?.success
315
+ )}
316
+ >
317
+ {exercise.testResult?.success ? "Pass" : "Fail"}
318
+ </span>
319
+ </div>
320
+ </td>
321
+ <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
322
+ {exercise.testResult?.passed} /{" "}
323
+ {exercise.testResult?.total}{" "}
324
+ <div>
325
+ {exercise.testResult?.skipped
326
+ ? `(${exercise.testResult?.skipped} skipped)`
327
+ : ""}
328
+ </div>
329
+ </td>
330
+ <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
331
+ {formatTime(exercise.timeElapsed)}
332
+ </td>
333
+ <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
334
+ {formatCurrency(exercise.cost)}
335
+ </td>
336
+ <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
337
+ {exercise.turns}
338
+ </td>
339
+ <td className="px-6 py-4 text-sm text-gray-500 max-w-xs truncate">
340
+ {exercise.finalOutput?.slice(0, 100) || "-"}
341
+ </td>
342
+ </tr>
343
+ ))}
344
+ </tbody>
345
+ </table>
346
+ </div>
347
+ </div>
348
+
349
+ {/* Historical Performance Section */}
350
+ {detailData.history.length > 0 && (
351
+ <div className="mt-8 bg-white rounded-lg shadow-sm border border-gray-200">
352
+ <div className="px-6 py-4 border-b border-gray-200">
353
+ <h2 className="text-xl font-semibold text-gray-900">
354
+ Historical Performance
355
+ </h2>
356
+ <p className="mt-1 text-sm text-gray-500">
357
+ Previous runs for this model/provider/language combination ({detailData.totalRuns} total runs)
358
+ </p>
359
+ </div>
360
+ <div className="overflow-x-auto">
361
+ <table className="min-w-full">
362
+ <thead className="bg-gray-50">
363
+ <tr>
364
+ <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
365
+ Run Date
366
+ </th>
367
+ <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
368
+ Success Rate
369
+ </th>
370
+ <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
371
+ Exercises
372
+ </th>
373
+ <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
374
+ Avg Cost
375
+ </th>
376
+ <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
377
+ Avg Time
378
+ </th>
379
+ <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
380
+ Avg Turns
381
+ </th>
382
+ <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
383
+ Commit
384
+ </th>
385
+ </tr>
386
+ </thead>
387
+ <tbody className="bg-white divide-y divide-gray-200">
388
+ {detailData.history.map((run, index) => (
389
+ <tr
390
+ key={`${run.endTime}-${index}`}
391
+ className="hover:bg-gray-50 cursor-pointer"
392
+ onClick={() => loadHistoricalRun(run.endTime)}
393
+ title="Click to view detailed results for this run"
394
+ >
395
+ <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
396
+ {new Date(run.endTime).toLocaleDateString()}
397
+ </td>
398
+ <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
399
+ {formatPercentage(run.successRate)}
400
+ </td>
401
+ <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
402
+ {run.totalExercises}
403
+ </td>
404
+ <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
405
+ {formatCurrency(run.averageCost)}
406
+ </td>
407
+ <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
408
+ {formatTime(run.averageTime)}
409
+ </td>
410
+ <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
411
+ {run.averageTurns.toFixed(1)}
412
+ </td>
413
+ <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-500 font-mono">
414
+ {run.commitHash.slice(0, 8)}
415
+ </td>
416
+ </tr>
417
+ ))}
418
+ </tbody>
419
+ </table>
420
+ </div>
421
+ </div>
422
+ )}
423
+
424
+ {/* Run Information */}
425
+ <div className="mt-8 bg-white rounded-lg shadow-sm border border-gray-200 p-6">
426
+ <h3 className="text-lg font-semibold text-gray-900 mb-4">
427
+ Run Information
428
+ </h3>
429
+ <div className="grid grid-cols-1 md:grid-cols-2 gap-6">
430
+ <div>
431
+ <h4 className="text-sm font-medium text-gray-500 mb-2">
432
+ Configuration
433
+ </h4>
434
+ <dl className="space-y-1">
435
+ <div className="flex">
436
+ <dt className="text-sm text-gray-500 w-24">Model:</dt>
437
+ <dd className="text-sm text-gray-900">
438
+ {benchmarkData.config.model}
439
+ </dd>
440
+ </div>
441
+ <div className="flex">
442
+ <dt className="text-sm text-gray-500 w-24">Provider:</dt>
443
+ <dd className="text-sm text-gray-900">
444
+ {benchmarkData.config.provider}
445
+ </dd>
446
+ </div>
447
+ <div className="flex">
448
+ <dt className="text-sm text-gray-500 w-24">Language:</dt>
449
+ <dd className="text-sm text-gray-900">
450
+ {benchmarkData.config.language}
451
+ </dd>
452
+ </div>
453
+ {benchmarkData.config.agent && (
454
+ <div className="flex">
455
+ <dt className="text-sm text-gray-500 w-24">Agent:</dt>
456
+ <dd className="text-sm text-gray-900">
457
+ {benchmarkData.config.agent}
458
+ </dd>
459
+ </div>
460
+ )}
461
+ </dl>
462
+ </div>
463
+ <div>
464
+ <h4 className="text-sm font-medium text-gray-500 mb-2">Limits</h4>
465
+ <dl className="space-y-1">
466
+ <div className="flex">
467
+ <dt className="text-sm text-gray-500 w-24">Max Turns:</dt>
468
+ <dd className="text-sm text-gray-900">
469
+ {benchmarkData.config.limits.maxTurns}
470
+ </dd>
471
+ </div>
472
+ <div className="flex">
473
+ <dt className="text-sm text-gray-500 w-24">Max Time:</dt>
474
+ <dd className="text-sm text-gray-900">
475
+ {formatTime(benchmarkData.config.limits.maxTime)}
476
+ </dd>
477
+ </div>
478
+ <div className="flex">
479
+ <dt className="text-sm text-gray-500 w-24">Max Cost:</dt>
480
+ <dd className="text-sm text-gray-900">
481
+ {formatCurrency(benchmarkData.config.limits.maxCost)}
482
+ </dd>
483
+ </div>
484
+ </dl>
485
+ </div>
486
+ </div>
487
+ <div className="mt-4 pt-4 border-t border-gray-200">
488
+ <div className="flex justify-between text-sm text-gray-500">
489
+ <span>
490
+ Started: {new Date(benchmarkData.startTime).toLocaleString()}
491
+ </span>
492
+ <span>
493
+ Completed: {new Date(benchmarkData.endTime).toLocaleString()}
494
+ </span>
495
+ </div>
496
+ </div>
497
+ </div>
498
+ </div>
499
+ </div>
500
+ );
501
+ }