@tyvm/knowhow 0.0.32 → 0.0.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (508) hide show
  1. package/autodoc/plugins/downloader/downloader.mdx +2 -2
  2. package/benchmarks/.dockerignore +7 -0
  3. package/benchmarks/README.md +166 -0
  4. package/benchmarks/docker/Dockerfile +68 -0
  5. package/benchmarks/example-config.yml +27 -0
  6. package/benchmarks/jest.config.js +13 -0
  7. package/benchmarks/package-lock.json +4297 -0
  8. package/benchmarks/package.json +39 -0
  9. package/benchmarks/results/4542435/2025-08-05/lms/lms-openai-gpt-oss-20b.json +2814 -0
  10. package/benchmarks/results/4542435/2025-08-05/lms/lms-qwen-qwen3-30b-a3b-2507.json +2014 -0
  11. package/benchmarks/results/4fb9125/2025-08-07/anthropic/anthropic-claude-sonnet-4-20250514.json +3121 -0
  12. package/benchmarks/results/5766aee/2025-08-02/lms-qwen/qwen3-coder-30b.json +98 -0
  13. package/benchmarks/results/6d73808/2025-08-07/openai/openai-gpt-5.json +3256 -0
  14. package/benchmarks/results/77bf0a6/2025-08-02/lms-qwen/qwen3-30b-a3b-2507.json +4298 -0
  15. package/benchmarks/results/8c0d445/2025-08-03/anthropic/anthropic-claude-sonnet-4-20250514.json +3031 -0
  16. package/benchmarks/results/8c0d445/2025-08-03/openai/openai-gpt-4.1-2025-04-14.json +2990 -0
  17. package/benchmarks/results/ac6b2ab/2025-08-03/anthropic/anthropic-claude-sonnet-4-20250514.json +3256 -0
  18. package/benchmarks/results/ac6b2ab/2025-08-03/lms/lms-qwen-qwen3-coder-30b.json +3007 -0
  19. package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-2025-04-14.json +3256 -0
  20. package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-mini-2025-04-14.json +3036 -0
  21. package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-nano-2025-04-14.json +3280 -0
  22. package/benchmarks/results/adff675/2025-08-04/lms/lms-qwen-qwen3-30b-a3b-2507.json +1920 -0
  23. package/benchmarks/results/adff675/2025-08-04/lms/lms-qwen-qwen3-coder-30b.json +3281 -0
  24. package/benchmarks/results/b502ed9/2025-08-03/lms-qwen/qwen3-coder-30b.json +2896 -0
  25. package/benchmarks/results/d1a8129/2025-08-03/lms/lms-qwen-qwen3-coder-30b.json +3011 -0
  26. package/benchmarks/results/e60471c/2025-08-03/lms/qwen3-30b-a3b-2507.json +3003 -0
  27. package/benchmarks/scripts/build-and-run.sh +47 -0
  28. package/benchmarks/scripts/clone-exercism.sh +92 -0
  29. package/benchmarks/scripts/validate.sh +48 -0
  30. package/benchmarks/src/__tests__/runner.test.ts +27 -0
  31. package/benchmarks/src/cli.ts +90 -0
  32. package/benchmarks/src/evaluators/EvaluatorRegistry.ts +64 -0
  33. package/benchmarks/src/evaluators/JavaScriptEvaluator.ts +183 -0
  34. package/benchmarks/src/evaluators/index.ts +3 -0
  35. package/benchmarks/src/evaluators/types.ts +22 -0
  36. package/benchmarks/src/index.ts +3 -0
  37. package/benchmarks/src/providers.ts +13 -0
  38. package/benchmarks/src/runner.ts +824 -0
  39. package/benchmarks/src/types.ts +63 -0
  40. package/benchmarks/tsconfig.json +19 -0
  41. package/jest.config.js +2 -1
  42. package/leaderboard/README.md +148 -0
  43. package/leaderboard/app/api/benchmark-data/route.ts +131 -0
  44. package/leaderboard/app/api/benchmark-detail/route.ts +172 -0
  45. package/leaderboard/app/details/[model]/[provider]/[language]/page.tsx +501 -0
  46. package/leaderboard/app/exercise/[model]/[provider]/[language]/[exercise]/page.tsx +375 -0
  47. package/leaderboard/app/globals.css +27 -0
  48. package/leaderboard/app/layout.tsx +21 -0
  49. package/leaderboard/app/page.tsx +170 -0
  50. package/leaderboard/components/LeaderboardTable.tsx +168 -0
  51. package/leaderboard/components/PerformanceChart.tsx +109 -0
  52. package/leaderboard/next-env.d.ts +5 -0
  53. package/leaderboard/next.config.js +4 -0
  54. package/leaderboard/package-lock.json +6363 -0
  55. package/leaderboard/package.json +28 -0
  56. package/leaderboard/postcss.config.js +6 -0
  57. package/leaderboard/tailwind.config.js +17 -0
  58. package/leaderboard/tsconfig.json +28 -0
  59. package/leaderboard/types/benchmark.ts +67 -0
  60. package/leaderboard/utils/dataProcessor.ts +33 -0
  61. package/package.json +2 -1
  62. package/src/agents/base/base.ts +147 -21
  63. package/src/agents/base/prompt.ts +28 -0
  64. package/src/agents/index.ts +3 -0
  65. package/src/agents/patcher/patcher.ts +6 -4
  66. package/src/agents/setup/setup.ts +56 -0
  67. package/src/agents/tools/agentCall.ts +6 -2
  68. package/src/agents/tools/aiClient.ts +74 -8
  69. package/src/agents/tools/execCommand.ts +13 -14
  70. package/src/agents/tools/executeScript/README.md +16 -0
  71. package/src/agents/tools/index.ts +2 -0
  72. package/src/agents/tools/list.ts +73 -16
  73. package/src/agents/tools/startAgentTask.ts +109 -0
  74. package/src/agents/tools/textSearch.ts +1 -1
  75. package/src/agents/tools/visionTool.ts +31 -2
  76. package/src/agents/tools/ycmd/client.ts +608 -0
  77. package/src/agents/tools/ycmd/definitions.ts +294 -0
  78. package/src/agents/tools/ycmd/detection.ts +211 -0
  79. package/src/agents/tools/ycmd/index.ts +11 -0
  80. package/src/agents/tools/ycmd/installer.ts +251 -0
  81. package/src/agents/tools/ycmd/server.ts +535 -0
  82. package/src/agents/tools/ycmd/serverManager.ts +316 -0
  83. package/src/agents/tools/ycmd/tools/completion.ts +113 -0
  84. package/src/agents/tools/ycmd/tools/diagnostics.ts +155 -0
  85. package/src/agents/tools/ycmd/tools/getLocations.ts +173 -0
  86. package/src/agents/tools/ycmd/tools/goto.ts +169 -0
  87. package/src/agents/tools/ycmd/tools/refactor.ts +204 -0
  88. package/src/agents/tools/ycmd/tools/signature.ts +174 -0
  89. package/src/agents/tools/ycmd/tools/start.ts +95 -0
  90. package/src/agents/tools/ycmd/utils/pathUtils.ts +59 -0
  91. package/src/ai.ts +15 -0
  92. package/src/chat/CliChatService.ts +277 -0
  93. package/src/chat/modules/AgentModule.ts +980 -0
  94. package/src/chat/modules/AskModule.ts +98 -0
  95. package/src/chat/modules/BaseChatModule.ts +66 -0
  96. package/src/chat/modules/InternalChatModule.ts +174 -0
  97. package/src/chat/modules/SearchModule.ts +166 -0
  98. package/src/chat/modules/SetupModule.ts +185 -0
  99. package/src/chat/modules/SystemModule.ts +120 -0
  100. package/src/chat/modules/VoiceModule.ts +70 -0
  101. package/src/chat/modules/index.js +5 -0
  102. package/src/chat/types.ts +97 -0
  103. package/src/chat.ts +9 -1
  104. package/src/chat2.ts +62 -0
  105. package/src/cli.ts +264 -35
  106. package/src/clients/anthropic.ts +14 -7
  107. package/src/clients/gemini.ts +15 -7
  108. package/src/clients/http.ts +17 -7
  109. package/src/clients/index.ts +117 -4
  110. package/src/clients/knowhow.ts +7 -2
  111. package/src/clients/knowhowMcp.ts +118 -0
  112. package/src/clients/openai.ts +32 -8
  113. package/src/clients/types.ts +1 -0
  114. package/src/clients/xai.ts +17 -5
  115. package/src/config.ts +30 -5
  116. package/src/conversion.ts +4 -1
  117. package/src/embeddings.ts +79 -23
  118. package/src/login.ts +26 -9
  119. package/src/microphone.ts +0 -1
  120. package/src/plugins/downloader/downloader.ts +72 -24
  121. package/src/plugins/downloader/plugin.ts +3 -1
  122. package/src/plugins/plugins.ts +3 -0
  123. package/src/processors/CustomVariables.ts +425 -0
  124. package/src/processors/HarmonyToolProcessor.ts +264 -0
  125. package/src/processors/XmlToolCallProcessor.ts +533 -0
  126. package/src/processors/index.ts +3 -0
  127. package/src/prompts/KnowhowConfigExamples.ts +376 -0
  128. package/src/services/KnowhowClient.ts +49 -3
  129. package/src/services/Mcp.ts +42 -3
  130. package/src/services/McpServer.ts +14 -4
  131. package/src/services/McpWebsocketTransport.ts +21 -7
  132. package/src/services/MessageProcessor.ts +10 -5
  133. package/src/services/index.ts +5 -0
  134. package/src/services/script-execution/ScriptExecutor.ts +34 -1
  135. package/src/services/types.ts +17 -14
  136. package/src/types.ts +17 -0
  137. package/src/utils/index.ts +138 -0
  138. package/tests/XmlToolCallProcessor.test.ts +468 -0
  139. package/tests/manual/ycmd/debug_diagnostics_test.ts +127 -0
  140. package/tests/manual/ycmd/fixtures/debug_diagnostics.ts +26 -0
  141. package/tests/manual/ycmd/fixtures/file_change_test.ts +17 -0
  142. package/tests/manual/ycmd/minimal_advanced_test.ts +108 -0
  143. package/tests/manual/ycmd/simple_diagnostics_test.ts +61 -0
  144. package/tests/manual/ycmd/simple_test.ts +74 -0
  145. package/tests/manual/ycmd/test-typescript-sample.ts +34 -0
  146. package/tests/manual/ycmd/test_advanced_features.ts +407 -0
  147. package/tests/manual/ycmd/test_advanced_with_tools.ts +320 -0
  148. package/tests/manual/ycmd/test_comprehensive_typescript.ts +179 -0
  149. package/tests/manual/ycmd/test_diagnostics_file_changes.ts +249 -0
  150. package/tests/manual/ycmd/test_diagnostics_fix.ts +99 -0
  151. package/tests/manual/ycmd/test_diagnostics_simple.ts +100 -0
  152. package/tests/manual/ycmd/test_diagnostics_timing.ts +120 -0
  153. package/tests/manual/ycmd/test_discover_commands.ts +310 -0
  154. package/tests/manual/ycmd/test_endpoints.ts +115 -0
  155. package/tests/manual/ycmd/test_final_comprehensive.ts +218 -0
  156. package/tests/manual/ycmd/test_final_validation.ts +150 -0
  157. package/tests/manual/ycmd/test_implementation.js +42 -0
  158. package/tests/manual/ycmd/test_individual_ycmd_tool.ts +39 -0
  159. package/tests/manual/ycmd/test_server_manager.ts +52 -0
  160. package/tests/manual/ycmd/test_simple_debug.ts +86 -0
  161. package/tests/manual/ycmd/test_tsserver_workflow.js +83 -0
  162. package/tests/manual/ycmd/test_tsserver_workflow.ts +122 -0
  163. package/tests/manual/ycmd/test_typescript_simple.ts +48 -0
  164. package/tests/manual/ycmd/test_typescript_ycmd.ts +105 -0
  165. package/tests/manual/ycmd/test_workspace_config.ts +90 -0
  166. package/tests/manual/ycmd/test_ycmd_auto_start.ts +137 -0
  167. package/tests/manual/ycmd/test_ycmd_comprehensive.ts +73 -0
  168. package/tests/manual/ycmd/test_ycmd_connection.py +10 -0
  169. package/tests/manual/ycmd/test_ycmd_direct.ts +142 -0
  170. package/tests/manual/ycmd/test_ycmd_experiment.ts +48 -0
  171. package/tests/manual/ycmd/test_ycmd_final.ts +200 -0
  172. package/tests/manual/ycmd/test_ycmd_fixed.py +18 -0
  173. package/tests/manual/ycmd/test_ycmd_integration.ts +112 -0
  174. package/tests/manual/ycmd/test_ycmd_simple.ts +45 -0
  175. package/tests/manual/ycmd/test_ycmd_usage.py +27 -0
  176. package/tests/manual/ycmd/working_simple_test.ts +134 -0
  177. package/ts_build/src/agents/base/base.d.ts +14 -1
  178. package/ts_build/src/agents/base/base.js +91 -17
  179. package/ts_build/src/agents/base/base.js.map +1 -1
  180. package/ts_build/src/agents/base/prompt.d.ts +1 -1
  181. package/ts_build/src/agents/base/prompt.js +28 -0
  182. package/ts_build/src/agents/base/prompt.js.map +1 -1
  183. package/ts_build/src/agents/index.d.ts +2 -0
  184. package/ts_build/src/agents/index.js +2 -0
  185. package/ts_build/src/agents/index.js.map +1 -1
  186. package/ts_build/src/agents/patcher/patcher.js +6 -3
  187. package/ts_build/src/agents/patcher/patcher.js.map +1 -1
  188. package/ts_build/src/agents/setup/setup.d.ts +8 -0
  189. package/ts_build/src/agents/setup/setup.js +59 -0
  190. package/ts_build/src/agents/setup/setup.js.map +1 -0
  191. package/ts_build/src/agents/tools/agentCall.js +5 -2
  192. package/ts_build/src/agents/tools/agentCall.js.map +1 -1
  193. package/ts_build/src/agents/tools/aiClient.d.ts +6 -5
  194. package/ts_build/src/agents/tools/aiClient.js +37 -6
  195. package/ts_build/src/agents/tools/aiClient.js.map +1 -1
  196. package/ts_build/src/agents/tools/execCommand.d.ts +2 -2
  197. package/ts_build/src/agents/tools/execCommand.js +5 -6
  198. package/ts_build/src/agents/tools/execCommand.js.map +1 -1
  199. package/ts_build/src/agents/tools/executeScript/index.d.ts +1 -1
  200. package/ts_build/src/agents/tools/index.d.ts +2 -0
  201. package/ts_build/src/agents/tools/index.js +2 -0
  202. package/ts_build/src/agents/tools/index.js.map +1 -1
  203. package/ts_build/src/agents/tools/list.js +66 -16
  204. package/ts_build/src/agents/tools/list.js.map +1 -1
  205. package/ts_build/src/agents/tools/startAgentTask.d.ts +13 -0
  206. package/ts_build/src/agents/tools/startAgentTask.js +74 -0
  207. package/ts_build/src/agents/tools/startAgentTask.js.map +1 -0
  208. package/ts_build/src/agents/tools/startChatTask.d.ts +13 -0
  209. package/ts_build/src/agents/tools/startChatTask.js +73 -0
  210. package/ts_build/src/agents/tools/startChatTask.js.map +1 -0
  211. package/ts_build/src/agents/tools/textSearch.js +1 -1
  212. package/ts_build/src/agents/tools/textSearch.js.map +1 -1
  213. package/ts_build/src/agents/tools/visionTool.d.ts +1 -1
  214. package/ts_build/src/agents/tools/visionTool.js +23 -3
  215. package/ts_build/src/agents/tools/visionTool.js.map +1 -1
  216. package/ts_build/src/agents/tools/ycmd/client.d.ts +93 -0
  217. package/ts_build/src/agents/tools/ycmd/client.js +355 -0
  218. package/ts_build/src/agents/tools/ycmd/client.js.map +1 -0
  219. package/ts_build/src/agents/tools/ycmd/definitions.d.ts +345 -0
  220. package/ts_build/src/agents/tools/ycmd/definitions.js +298 -0
  221. package/ts_build/src/agents/tools/ycmd/definitions.js.map +1 -0
  222. package/ts_build/src/agents/tools/ycmd/detection.d.ts +11 -0
  223. package/ts_build/src/agents/tools/ycmd/detection.js +175 -0
  224. package/ts_build/src/agents/tools/ycmd/detection.js.map +1 -0
  225. package/ts_build/src/agents/tools/ycmd/index.d.ts +8 -0
  226. package/ts_build/src/agents/tools/ycmd/index.js +20 -0
  227. package/ts_build/src/agents/tools/ycmd/index.js.map +1 -0
  228. package/ts_build/src/agents/tools/ycmd/installer.d.ts +19 -0
  229. package/ts_build/src/agents/tools/ycmd/installer.js +196 -0
  230. package/ts_build/src/agents/tools/ycmd/installer.js.map +1 -0
  231. package/ts_build/src/agents/tools/ycmd/server.d.ts +35 -0
  232. package/ts_build/src/agents/tools/ycmd/server.js +363 -0
  233. package/ts_build/src/agents/tools/ycmd/server.js.map +1 -0
  234. package/ts_build/src/agents/tools/ycmd/serverManager.d.ts +39 -0
  235. package/ts_build/src/agents/tools/ycmd/serverManager.js +210 -0
  236. package/ts_build/src/agents/tools/ycmd/serverManager.js.map +1 -0
  237. package/ts_build/src/agents/tools/ycmd/tools/completion.d.ts +22 -0
  238. package/ts_build/src/agents/tools/ycmd/tools/completion.js +72 -0
  239. package/ts_build/src/agents/tools/ycmd/tools/completion.js.map +1 -0
  240. package/ts_build/src/agents/tools/ycmd/tools/diagnostics.d.ts +42 -0
  241. package/ts_build/src/agents/tools/ycmd/tools/diagnostics.js +88 -0
  242. package/ts_build/src/agents/tools/ycmd/tools/diagnostics.js.map +1 -0
  243. package/ts_build/src/agents/tools/ycmd/tools/getLocations.d.ts +22 -0
  244. package/ts_build/src/agents/tools/ycmd/tools/getLocations.js +142 -0
  245. package/ts_build/src/agents/tools/ycmd/tools/getLocations.js.map +1 -0
  246. package/ts_build/src/agents/tools/ycmd/tools/goto.d.ts +20 -0
  247. package/ts_build/src/agents/tools/ycmd/tools/goto.js +101 -0
  248. package/ts_build/src/agents/tools/ycmd/tools/goto.js.map +1 -0
  249. package/ts_build/src/agents/tools/ycmd/tools/refactor.d.ts +32 -0
  250. package/ts_build/src/agents/tools/ycmd/tools/refactor.js +123 -0
  251. package/ts_build/src/agents/tools/ycmd/tools/refactor.js.map +1 -0
  252. package/ts_build/src/agents/tools/ycmd/tools/signature.d.ts +25 -0
  253. package/ts_build/src/agents/tools/ycmd/tools/signature.js +110 -0
  254. package/ts_build/src/agents/tools/ycmd/tools/signature.js.map +1 -0
  255. package/ts_build/src/agents/tools/ycmd/tools/start.d.ts +17 -0
  256. package/ts_build/src/agents/tools/ycmd/tools/start.js +65 -0
  257. package/ts_build/src/agents/tools/ycmd/tools/start.js.map +1 -0
  258. package/ts_build/src/agents/tools/ycmd/utils/pathUtils.d.ts +4 -0
  259. package/ts_build/src/agents/tools/ycmd/utils/pathUtils.js +67 -0
  260. package/ts_build/src/agents/tools/ycmd/utils/pathUtils.js.map +1 -0
  261. package/ts_build/src/ai.d.ts +1 -0
  262. package/ts_build/src/ai.js +40 -1
  263. package/ts_build/src/ai.js.map +1 -1
  264. package/ts_build/src/chat/ChatCommandHandler.d.ts +36 -0
  265. package/ts_build/src/chat/ChatCommandHandler.js +268 -0
  266. package/ts_build/src/chat/ChatCommandHandler.js.map +1 -0
  267. package/ts_build/src/chat/ChatInputManager.d.ts +22 -0
  268. package/ts_build/src/chat/ChatInputManager.js +85 -0
  269. package/ts_build/src/chat/ChatInputManager.js.map +1 -0
  270. package/ts_build/src/chat/ChatManager.d.ts +49 -0
  271. package/ts_build/src/chat/ChatManager.js +271 -0
  272. package/ts_build/src/chat/ChatManager.js.map +1 -0
  273. package/ts_build/src/chat/ChatSession.d.ts +32 -0
  274. package/ts_build/src/chat/ChatSession.js +3 -0
  275. package/ts_build/src/chat/ChatSession.js.map +1 -0
  276. package/ts_build/src/chat/ChatSessionManager.d.ts +19 -0
  277. package/ts_build/src/chat/ChatSessionManager.js +188 -0
  278. package/ts_build/src/chat/ChatSessionManager.js.map +1 -0
  279. package/ts_build/src/chat/ChatStateManager.d.ts +58 -0
  280. package/ts_build/src/chat/ChatStateManager.js +156 -0
  281. package/ts_build/src/chat/ChatStateManager.js.map +1 -0
  282. package/ts_build/src/chat/CliChatService.d.ts +35 -0
  283. package/ts_build/src/chat/CliChatService.js +201 -0
  284. package/ts_build/src/chat/CliChatService.js.map +1 -0
  285. package/ts_build/src/chat/InterruptibleInput.d.ts +20 -0
  286. package/ts_build/src/chat/InterruptibleInput.js +109 -0
  287. package/ts_build/src/chat/InterruptibleInput.js.map +1 -0
  288. package/ts_build/src/chat/interfaces/ChatModule.d.ts +6 -0
  289. package/ts_build/src/chat/interfaces/ChatModule.js +3 -0
  290. package/ts_build/src/chat/interfaces/ChatModule.js.map +1 -0
  291. package/ts_build/src/chat/modules/AgentModule.d.ts +56 -0
  292. package/ts_build/src/chat/modules/AgentModule.js +705 -0
  293. package/ts_build/src/chat/modules/AgentModule.js.map +1 -0
  294. package/ts_build/src/chat/modules/AskModule.d.ts +10 -0
  295. package/ts_build/src/chat/modules/AskModule.js +63 -0
  296. package/ts_build/src/chat/modules/AskModule.js.map +1 -0
  297. package/ts_build/src/chat/modules/BaseChatModule.d.ts +14 -0
  298. package/ts_build/src/chat/modules/BaseChatModule.js +32 -0
  299. package/ts_build/src/chat/modules/BaseChatModule.js.map +1 -0
  300. package/ts_build/src/chat/modules/InternalChatModule.d.ts +24 -0
  301. package/ts_build/src/chat/modules/InternalChatModule.js +127 -0
  302. package/ts_build/src/chat/modules/InternalChatModule.js.map +1 -0
  303. package/ts_build/src/chat/modules/SearchModule.d.ts +12 -0
  304. package/ts_build/src/chat/modules/SearchModule.js +119 -0
  305. package/ts_build/src/chat/modules/SearchModule.js.map +1 -0
  306. package/ts_build/src/chat/modules/SetupModule.d.ts +15 -0
  307. package/ts_build/src/chat/modules/SetupModule.js +147 -0
  308. package/ts_build/src/chat/modules/SetupModule.js.map +1 -0
  309. package/ts_build/src/chat/modules/SystemModule.d.ts +14 -0
  310. package/ts_build/src/chat/modules/SystemModule.js +90 -0
  311. package/ts_build/src/chat/modules/SystemModule.js.map +1 -0
  312. package/ts_build/src/chat/modules/VoiceModule.d.ts +11 -0
  313. package/ts_build/src/chat/modules/VoiceModule.js +57 -0
  314. package/ts_build/src/chat/modules/VoiceModule.js.map +1 -0
  315. package/ts_build/src/chat/types.d.ts +83 -0
  316. package/ts_build/src/chat/types.js +3 -0
  317. package/ts_build/src/chat/types.js.map +1 -0
  318. package/ts_build/src/chat.js +7 -1
  319. package/ts_build/src/chat.js.map +1 -1
  320. package/ts_build/src/chat2.d.ts +3 -0
  321. package/ts_build/src/chat2.js +47 -0
  322. package/ts_build/src/chat2.js.map +1 -0
  323. package/ts_build/src/cli.js +218 -37
  324. package/ts_build/src/cli.js.map +1 -1
  325. package/ts_build/src/clients/anthropic.d.ts +5 -2
  326. package/ts_build/src/clients/anthropic.js +12 -7
  327. package/ts_build/src/clients/anthropic.js.map +1 -1
  328. package/ts_build/src/clients/gemini.d.ts +6 -3
  329. package/ts_build/src/clients/gemini.js +13 -7
  330. package/ts_build/src/clients/gemini.js.map +1 -1
  331. package/ts_build/src/clients/http.d.ts +1 -0
  332. package/ts_build/src/clients/http.js +12 -5
  333. package/ts_build/src/clients/http.js.map +1 -1
  334. package/ts_build/src/clients/index.d.ts +10 -0
  335. package/ts_build/src/clients/index.js +74 -4
  336. package/ts_build/src/clients/index.js.map +1 -1
  337. package/ts_build/src/clients/knowhow.d.ts +3 -1
  338. package/ts_build/src/clients/knowhow.js +8 -2
  339. package/ts_build/src/clients/knowhow.js.map +1 -1
  340. package/ts_build/src/clients/knowhowMcp.d.ts +20 -0
  341. package/ts_build/src/clients/knowhowMcp.js +86 -0
  342. package/ts_build/src/clients/knowhowMcp.js.map +1 -0
  343. package/ts_build/src/clients/openai.d.ts +5 -2
  344. package/ts_build/src/clients/openai.js +29 -8
  345. package/ts_build/src/clients/openai.js.map +1 -1
  346. package/ts_build/src/clients/types.d.ts +1 -0
  347. package/ts_build/src/clients/xai.d.ts +5 -2
  348. package/ts_build/src/clients/xai.js +15 -5
  349. package/ts_build/src/clients/xai.js.map +1 -1
  350. package/ts_build/src/config.js +24 -3
  351. package/ts_build/src/config.js.map +1 -1
  352. package/ts_build/src/conversion.js +6 -4
  353. package/ts_build/src/conversion.js.map +1 -1
  354. package/ts_build/src/embeddings.d.ts +2 -1
  355. package/ts_build/src/embeddings.js +62 -17
  356. package/ts_build/src/embeddings.js.map +1 -1
  357. package/ts_build/src/login.d.ts +1 -1
  358. package/ts_build/src/login.js +21 -7
  359. package/ts_build/src/login.js.map +1 -1
  360. package/ts_build/src/microphone.js.map +1 -1
  361. package/ts_build/src/plugins/downloader/downloader.d.ts +4 -5
  362. package/ts_build/src/plugins/downloader/downloader.js +55 -26
  363. package/ts_build/src/plugins/downloader/downloader.js.map +1 -1
  364. package/ts_build/src/plugins/downloader/plugin.js +5 -3
  365. package/ts_build/src/plugins/downloader/plugin.js.map +1 -1
  366. package/ts_build/src/plugins/plugins.js +3 -0
  367. package/ts_build/src/plugins/plugins.js.map +1 -1
  368. package/ts_build/src/processors/CustomVariables.d.ts +32 -0
  369. package/ts_build/src/processors/CustomVariables.js +297 -0
  370. package/ts_build/src/processors/CustomVariables.js.map +1 -0
  371. package/ts_build/src/processors/HarmonyToolProcessor.d.ts +15 -0
  372. package/ts_build/src/processors/HarmonyToolProcessor.js +154 -0
  373. package/ts_build/src/processors/HarmonyToolProcessor.js.map +1 -0
  374. package/ts_build/src/processors/XmlToolCallProcessor.d.ts +14 -0
  375. package/ts_build/src/processors/XmlToolCallProcessor.js +357 -0
  376. package/ts_build/src/processors/XmlToolCallProcessor.js.map +1 -0
  377. package/ts_build/src/processors/index.d.ts +3 -0
  378. package/ts_build/src/processors/index.js +7 -1
  379. package/ts_build/src/processors/index.js.map +1 -1
  380. package/ts_build/src/prompts/KnowhowConfigExamples.d.ts +2 -0
  381. package/ts_build/src/prompts/KnowhowConfigExamples.js +379 -0
  382. package/ts_build/src/prompts/KnowhowConfigExamples.js.map +1 -0
  383. package/ts_build/src/services/KnowhowClient.d.ts +22 -0
  384. package/ts_build/src/services/KnowhowClient.js +14 -2
  385. package/ts_build/src/services/KnowhowClient.js.map +1 -1
  386. package/ts_build/src/services/Mcp.d.ts +1 -0
  387. package/ts_build/src/services/Mcp.js +20 -3
  388. package/ts_build/src/services/Mcp.js.map +1 -1
  389. package/ts_build/src/services/McpServer.d.ts +1 -1
  390. package/ts_build/src/services/McpServer.js +8 -4
  391. package/ts_build/src/services/McpServer.js.map +1 -1
  392. package/ts_build/src/services/McpWebsocketTransport.js +17 -7
  393. package/ts_build/src/services/McpWebsocketTransport.js.map +1 -1
  394. package/ts_build/src/services/MessageProcessor.d.ts +1 -1
  395. package/ts_build/src/services/MessageProcessor.js +4 -4
  396. package/ts_build/src/services/MessageProcessor.js.map +1 -1
  397. package/ts_build/src/services/index.d.ts +2 -0
  398. package/ts_build/src/services/index.js +4 -0
  399. package/ts_build/src/services/index.js.map +1 -1
  400. package/ts_build/src/services/script-execution/ScriptExecutor.d.ts +1 -0
  401. package/ts_build/src/services/script-execution/ScriptExecutor.js +23 -0
  402. package/ts_build/src/services/script-execution/ScriptExecutor.js.map +1 -1
  403. package/ts_build/src/services/types.d.ts +2 -6
  404. package/ts_build/src/services/types.js +4 -4
  405. package/ts_build/src/services/types.js.map +1 -1
  406. package/ts_build/src/types.d.ts +11 -0
  407. package/ts_build/src/types.js +8 -0
  408. package/ts_build/src/types.js.map +1 -1
  409. package/ts_build/src/utils/index.d.ts +2 -0
  410. package/ts_build/src/utils/index.js +102 -1
  411. package/ts_build/src/utils/index.js.map +1 -1
  412. package/ts_build/tests/XmlToolCallProcessor.test.d.ts +1 -0
  413. package/ts_build/tests/XmlToolCallProcessor.test.js +376 -0
  414. package/ts_build/tests/XmlToolCallProcessor.test.js.map +1 -0
  415. package/ts_build/tests/manual/ycmd/debug_diagnostics_test.d.ts +1 -0
  416. package/ts_build/tests/manual/ycmd/debug_diagnostics_test.js +114 -0
  417. package/ts_build/tests/manual/ycmd/debug_diagnostics_test.js.map +1 -0
  418. package/ts_build/tests/manual/ycmd/minimal_advanced_test.d.ts +2 -0
  419. package/ts_build/tests/manual/ycmd/minimal_advanced_test.js +104 -0
  420. package/ts_build/tests/manual/ycmd/minimal_advanced_test.js.map +1 -0
  421. package/ts_build/tests/manual/ycmd/simple_diagnostics_test.d.ts +1 -0
  422. package/ts_build/tests/manual/ycmd/simple_diagnostics_test.js +74 -0
  423. package/ts_build/tests/manual/ycmd/simple_diagnostics_test.js.map +1 -0
  424. package/ts_build/tests/manual/ycmd/simple_test.d.ts +2 -0
  425. package/ts_build/tests/manual/ycmd/simple_test.js +82 -0
  426. package/ts_build/tests/manual/ycmd/simple_test.js.map +1 -0
  427. package/ts_build/tests/manual/ycmd/test-typescript-sample.d.ts +14 -0
  428. package/ts_build/tests/manual/ycmd/test-typescript-sample.js +20 -0
  429. package/ts_build/tests/manual/ycmd/test-typescript-sample.js.map +1 -0
  430. package/ts_build/tests/manual/ycmd/test_advanced_features.d.ts +2 -0
  431. package/ts_build/tests/manual/ycmd/test_advanced_features.js +297 -0
  432. package/ts_build/tests/manual/ycmd/test_advanced_features.js.map +1 -0
  433. package/ts_build/tests/manual/ycmd/test_advanced_with_tools.d.ts +3 -0
  434. package/ts_build/tests/manual/ycmd/test_advanced_with_tools.js +262 -0
  435. package/ts_build/tests/manual/ycmd/test_advanced_with_tools.js.map +1 -0
  436. package/ts_build/tests/manual/ycmd/test_comprehensive_typescript.d.ts +2 -0
  437. package/ts_build/tests/manual/ycmd/test_comprehensive_typescript.js +186 -0
  438. package/ts_build/tests/manual/ycmd/test_comprehensive_typescript.js.map +1 -0
  439. package/ts_build/tests/manual/ycmd/test_diagnostics_file_changes.d.ts +1 -0
  440. package/ts_build/tests/manual/ycmd/test_diagnostics_file_changes.js +174 -0
  441. package/ts_build/tests/manual/ycmd/test_diagnostics_file_changes.js.map +1 -0
  442. package/ts_build/tests/manual/ycmd/test_diagnostics_fix.d.ts +2 -0
  443. package/ts_build/tests/manual/ycmd/test_diagnostics_fix.js +106 -0
  444. package/ts_build/tests/manual/ycmd/test_diagnostics_fix.js.map +1 -0
  445. package/ts_build/tests/manual/ycmd/test_diagnostics_simple.d.ts +1 -0
  446. package/ts_build/tests/manual/ycmd/test_diagnostics_simple.js +104 -0
  447. package/ts_build/tests/manual/ycmd/test_diagnostics_simple.js.map +1 -0
  448. package/ts_build/tests/manual/ycmd/test_diagnostics_timing.d.ts +1 -0
  449. package/ts_build/tests/manual/ycmd/test_diagnostics_timing.js +119 -0
  450. package/ts_build/tests/manual/ycmd/test_diagnostics_timing.js.map +1 -0
  451. package/ts_build/tests/manual/ycmd/test_discover_commands.d.ts +2 -0
  452. package/ts_build/tests/manual/ycmd/test_discover_commands.js +243 -0
  453. package/ts_build/tests/manual/ycmd/test_discover_commands.js.map +1 -0
  454. package/ts_build/tests/manual/ycmd/test_endpoints.d.ts +2 -0
  455. package/ts_build/tests/manual/ycmd/test_endpoints.js +120 -0
  456. package/ts_build/tests/manual/ycmd/test_endpoints.js.map +1 -0
  457. package/ts_build/tests/manual/ycmd/test_final_comprehensive.d.ts +2 -0
  458. package/ts_build/tests/manual/ycmd/test_final_comprehensive.js +221 -0
  459. package/ts_build/tests/manual/ycmd/test_final_comprehensive.js.map +1 -0
  460. package/ts_build/tests/manual/ycmd/test_final_validation.d.ts +2 -0
  461. package/ts_build/tests/manual/ycmd/test_final_validation.js +160 -0
  462. package/ts_build/tests/manual/ycmd/test_final_validation.js.map +1 -0
  463. package/ts_build/tests/manual/ycmd/test_individual_ycmd_tool.d.ts +2 -0
  464. package/ts_build/tests/manual/ycmd/test_individual_ycmd_tool.js +37 -0
  465. package/ts_build/tests/manual/ycmd/test_individual_ycmd_tool.js.map +1 -0
  466. package/ts_build/tests/manual/ycmd/test_server_manager.d.ts +1 -0
  467. package/ts_build/tests/manual/ycmd/test_server_manager.js +38 -0
  468. package/ts_build/tests/manual/ycmd/test_server_manager.js.map +1 -0
  469. package/ts_build/tests/manual/ycmd/test_simple_debug.d.ts +2 -0
  470. package/ts_build/tests/manual/ycmd/test_simple_debug.js +99 -0
  471. package/ts_build/tests/manual/ycmd/test_simple_debug.js.map +1 -0
  472. package/ts_build/tests/manual/ycmd/test_tsserver_workflow.d.ts +1 -0
  473. package/ts_build/tests/manual/ycmd/test_tsserver_workflow.js +128 -0
  474. package/ts_build/tests/manual/ycmd/test_tsserver_workflow.js.map +1 -0
  475. package/ts_build/tests/manual/ycmd/test_typescript_simple.d.ts +1 -0
  476. package/ts_build/tests/manual/ycmd/test_typescript_simple.js +66 -0
  477. package/ts_build/tests/manual/ycmd/test_typescript_simple.js.map +1 -0
  478. package/ts_build/tests/manual/ycmd/test_typescript_ycmd.d.ts +1 -0
  479. package/ts_build/tests/manual/ycmd/test_typescript_ycmd.js +105 -0
  480. package/ts_build/tests/manual/ycmd/test_typescript_ycmd.js.map +1 -0
  481. package/ts_build/tests/manual/ycmd/test_workspace_config.d.ts +1 -0
  482. package/ts_build/tests/manual/ycmd/test_workspace_config.js +89 -0
  483. package/ts_build/tests/manual/ycmd/test_workspace_config.js.map +1 -0
  484. package/ts_build/tests/manual/ycmd/test_ycmd_auto_start.d.ts +2 -0
  485. package/ts_build/tests/manual/ycmd/test_ycmd_auto_start.js +130 -0
  486. package/ts_build/tests/manual/ycmd/test_ycmd_auto_start.js.map +1 -0
  487. package/ts_build/tests/manual/ycmd/test_ycmd_comprehensive.d.ts +1 -0
  488. package/ts_build/tests/manual/ycmd/test_ycmd_comprehensive.js +83 -0
  489. package/ts_build/tests/manual/ycmd/test_ycmd_comprehensive.js.map +1 -0
  490. package/ts_build/tests/manual/ycmd/test_ycmd_direct.d.ts +2 -0
  491. package/ts_build/tests/manual/ycmd/test_ycmd_direct.js +149 -0
  492. package/ts_build/tests/manual/ycmd/test_ycmd_direct.js.map +1 -0
  493. package/ts_build/tests/manual/ycmd/test_ycmd_experiment.d.ts +15 -0
  494. package/ts_build/tests/manual/ycmd/test_ycmd_experiment.js +58 -0
  495. package/ts_build/tests/manual/ycmd/test_ycmd_experiment.js.map +1 -0
  496. package/ts_build/tests/manual/ycmd/test_ycmd_final.d.ts +2 -0
  497. package/ts_build/tests/manual/ycmd/test_ycmd_final.js +195 -0
  498. package/ts_build/tests/manual/ycmd/test_ycmd_final.js.map +1 -0
  499. package/ts_build/tests/manual/ycmd/test_ycmd_integration.d.ts +3 -0
  500. package/ts_build/tests/manual/ycmd/test_ycmd_integration.js +110 -0
  501. package/ts_build/tests/manual/ycmd/test_ycmd_integration.js.map +1 -0
  502. package/ts_build/tests/manual/ycmd/test_ycmd_simple.d.ts +2 -0
  503. package/ts_build/tests/manual/ycmd/test_ycmd_simple.js +36 -0
  504. package/ts_build/tests/manual/ycmd/test_ycmd_simple.js.map +1 -0
  505. package/ts_build/tests/manual/ycmd/working_simple_test.d.ts +2 -0
  506. package/ts_build/tests/manual/ycmd/working_simple_test.js +134 -0
  507. package/ts_build/tests/manual/ycmd/working_simple_test.js.map +1 -0
  508. package/tsconfig.json +3 -1
@@ -0,0 +1,47 @@
1
+ #!/bin/bash
2
+
3
+ # Build and run Knowhow benchmarks
4
+ # Usage: ./build-and-run.sh [command] [options...]
5
+
6
+ set -e
7
+
8
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
9
+ PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
10
+
11
+ echo "🏗️ Building Knowhow benchmark container..."
12
+
13
+ # Build the Docker container
14
+ docker build -f "$PROJECT_ROOT/benchmarks/docker/Dockerfile" -t knowhow-bench "$PROJECT_ROOT"
15
+
16
+ echo "✅ Container built successfully!"
17
+
18
+ # Create results directory if it doesn't exist
19
+ mkdir -p "$PROJECT_ROOT/benchmarks/results"
20
+
21
+ # If no arguments provided, show usage
22
+ if [ $# -eq 0 ]; then
23
+ echo ""
24
+ echo "Usage: $0 <command> [options...]"
25
+ echo ""
26
+ echo "Examples:"
27
+ echo " $0 setup --language javascript --count 5"
28
+ echo " $0 run --language javascript --count 5 --model gpt-4o-mini"
29
+ echo " $0 run --language python --count 10 --provider anthropic --model claude-3-sonnet-20240229"
30
+ echo ""
31
+ exit 0
32
+ fi
33
+
34
+ echo "🚀 Running benchmarks..."
35
+
36
+ # Run the container with all provided arguments
37
+ docker run --rm \
38
+ -v "$PROJECT_ROOT/benchmarks/results:/app/benchmarks/results" \
39
+ -e OPENAI_KEY \
40
+ -e ANTHROPIC_API_KEY \
41
+ -e GEMINI_API_KEY \
42
+ -e XAI_API_KEY \
43
+ --env-file "$PROJECT_ROOT/benchmarks/.env" \
44
+ knowhow-bench "$@"
45
+
46
+ echo "✅ Benchmarks completed!"
47
+ echo "📊 Results available in: benchmarks/results/"
@@ -0,0 +1,92 @@
1
+ #!/bin/bash
2
+
3
+ # Clone Exercism exercises for benchmarking
4
+ # Based on Aider's clone-exercism.sh approach
5
+
6
+ set -e
7
+
8
+ # Configuration
9
+ EXERCISM_REPO="https://github.com/exercism/problem-specifications.git"
10
+ LANGUAGE=${1:-"javascript"} # Default to JavaScript
11
+ MAX_EXERCISES=${2:-10} # Default to 10 exercises
12
+
13
+ # Use different paths for local vs container
14
+ if [ -n "$CONTAINER" ]; then
15
+ EXERCISES_DIR="/app/exercises"
16
+ else
17
+ EXERCISES_DIR="$(cd "$(dirname "$0")/.." && pwd)/exercises"
18
+ fi
19
+
20
+ echo "Cloning Exercism exercises for language: $LANGUAGE"
21
+ echo "Maximum exercises: $MAX_EXERCISES"
22
+ echo "Target directory: $EXERCISES_DIR"
23
+
24
+ # Create exercises directory if it doesn't exist
25
+ mkdir -p "$EXERCISES_DIR"
26
+
27
+ # Clone the problem specifications repo if not already cloned
28
+ if [ ! -d "$EXERCISES_DIR/problem-specifications" ]; then
29
+ echo "Cloning Exercism problem specifications..."
30
+ cd "$EXERCISES_DIR"
31
+ git clone "$EXERCISM_REPO" problem-specifications
32
+ fi
33
+
34
+ # Clone the language track
35
+ LANGUAGE_REPO="https://github.com/exercism/${LANGUAGE}.git"
36
+ LANGUAGE_DIR="$EXERCISES_DIR/$LANGUAGE"
37
+
38
+ if [ ! -d "$LANGUAGE_DIR" ]; then
39
+ echo "Cloning $LANGUAGE track..."
40
+ cd "$EXERCISES_DIR"
41
+ git clone "$LANGUAGE_REPO" "$LANGUAGE"
42
+ fi
43
+
44
+ # Find exercises with both problem specification and language implementation
45
+ echo "Finding exercises with both specification and implementation..."
46
+
47
+ SPEC_DIR="$EXERCISES_DIR/problem-specifications/exercises"
48
+ IMPL_DIR="$LANGUAGE_DIR/exercises"
49
+
50
+ # Create filtered exercises directory
51
+ FILTERED_DIR="$EXERCISES_DIR/filtered"
52
+ if [ -d "$FILTERED_DIR" ]; then
53
+ echo "Removing existing filtered directory: $FILTERED_DIR"
54
+ rm -rf "$FILTERED_DIR"
55
+ fi
56
+ mkdir -p "$FILTERED_DIR"
57
+
58
+ count=0
59
+ for exercise in $(ls "$SPEC_DIR" 2>/dev/null | sort); do
60
+ if [ $count -ge $MAX_EXERCISES ]; then
61
+ break
62
+ fi
63
+
64
+ if [ -d "$IMPL_DIR/practice/$exercise" ] || [ -d "$IMPL_DIR/$exercise" ]; then
65
+ echo "Found exercise: $exercise"
66
+
67
+ # Create exercise directory
68
+ exercise_dir="$FILTERED_DIR/$exercise"
69
+ mkdir -p "$exercise_dir"
70
+
71
+ # Copy problem specification
72
+ if [ -f "$SPEC_DIR/$exercise/description.md" ]; then
73
+ cp "$SPEC_DIR/$exercise/description.md" "$exercise_dir/"
74
+ fi
75
+
76
+ if [ -f "$SPEC_DIR/$exercise/metadata.yml" ]; then
77
+ cp "$SPEC_DIR/$exercise/metadata.yml" "$exercise_dir/"
78
+ fi
79
+
80
+ # Copy language implementation
81
+ if [ -d "$IMPL_DIR/practice/$exercise" ]; then
82
+ cp -r "$IMPL_DIR/practice/$exercise"/* "$exercise_dir/"
83
+ elif [ -d "$IMPL_DIR/$exercise" ]; then
84
+ cp -r "$IMPL_DIR/$exercise"/* "$exercise_dir/"
85
+ fi
86
+
87
+ count=$((count + 1))
88
+ fi
89
+ done
90
+
91
+ echo "Successfully set up $count exercises in $FILTERED_DIR"
92
+ echo "Ready for benchmarking!"
@@ -0,0 +1,48 @@
1
+ #!/bin/bash
2
+
3
+ # Simple validation script to test the benchmark setup
4
+ # This runs without the full Docker setup for quick validation
5
+
6
+ set -e
7
+
8
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
9
+ BENCHMARK_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
10
+
11
+ echo "🔍 Validating Knowhow Benchmarks setup..."
12
+
13
+ # 1. Check that benchmarks can be built
14
+ echo "1. Building benchmarks package..."
15
+ cd "$BENCHMARK_DIR"
16
+ npm run build > /dev/null 2>&1
17
+ echo " ✅ Build successful"
18
+
19
+ # 2. Check that tests pass
20
+ echo "2. Running tests..."
21
+ npm test > /dev/null 2>&1
22
+ echo " ✅ Tests passed"
23
+
24
+ # 3. Check that CLI can show help
25
+ echo "3. Testing CLI..."
26
+ node dist/cli.js --help > /dev/null 2>&1
27
+ echo " ✅ CLI working"
28
+
29
+ # 4. Check that Docker can build (optional - requires Docker)
30
+ if command -v docker &> /dev/null; then
31
+ echo "4. Testing Docker build..."
32
+ cd "$(dirname "$BENCHMARK_DIR")"
33
+ docker build -f benchmarks/docker/Dockerfile -t knowhow-bench-test . > /dev/null 2>&1
34
+ echo " ✅ Docker build successful"
35
+
36
+ # Clean up test image
37
+ docker rmi knowhow-bench-test > /dev/null 2>&1
38
+ else
39
+ echo "4. Skipping Docker test (Docker not available)"
40
+ fi
41
+
42
+ echo ""
43
+ echo "🎉 All validations passed!"
44
+ echo ""
45
+ echo "Ready to run benchmarks. Example usage:"
46
+ echo " ./scripts/build-and-run.sh setup --language javascript --count 5"
47
+ echo " ./scripts/build-and-run.sh run --language javascript --count 5 --model gpt-4o-mini"
48
+ echo ""
@@ -0,0 +1,27 @@
1
+ import { BenchmarkRunner } from '../runner';
2
+ import { BenchmarkConfig } from '../types';
3
+
4
+ describe('BenchmarkRunner', () => {
5
+ const mockConfig: BenchmarkConfig = {
6
+ language: 'javascript',
7
+ maxExercises: 5,
8
+ model: 'gpt-4o-mini',
9
+ provider: 'openai',
10
+ limits: {
11
+ maxTurns: 20,
12
+ maxTime: 300,
13
+ maxCost: 1.0
14
+ },
15
+ outputFile: 'test-results.json'
16
+ };
17
+
18
+ it('should create a BenchmarkRunner instance', () => {
19
+ const runner = new BenchmarkRunner(mockConfig);
20
+ expect(runner).toBeInstanceOf(BenchmarkRunner);
21
+ });
22
+
23
+ it('should have the correct configuration', () => {
24
+ const runner = new BenchmarkRunner(mockConfig);
25
+ expect(runner['config']).toEqual(mockConfig);
26
+ });
27
+ });
@@ -0,0 +1,90 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { Command } from "commander";
4
+ import { BenchmarkRunner } from "./runner";
5
+ import { BenchmarkConfig } from "./types";
6
+ import chalk from "chalk";
7
+
8
+ const program = new Command();
9
+
10
+ program
11
+ .name("knowhow-bench")
12
+ .description("Benchmark Knowhow terminal agent against coding exercises")
13
+ .version("0.0.1");
14
+
15
+ program
16
+ .command("run")
17
+ .description("Run benchmarks against Exercism exercises")
18
+ .option(
19
+ "-l, --language <language>",
20
+ "Programming language to test",
21
+ "javascript"
22
+ )
23
+ .option("-c, --count <count>", "Maximum number of exercises to run", "10")
24
+ .option("-m, --model <model>", "AI model to use", "gpt-4o-mini")
25
+ .option("-p, --provider <provider>", "AI provider to use", "openai")
26
+ .option("--max-turns <turns>", "Maximum turns per exercise", "30")
27
+ .option("--max-time <seconds>", "Maximum time per exercise in seconds", "300")
28
+ .option("--max-cost <dollars>", "Maximum cost per exercise in dollars", "1.0")
29
+ .option("--output <file>", "Output file for results", "results.json")
30
+ .action(async (options) => {
31
+ try {
32
+ console.log(chalk.blue("🚀 Starting Knowhow benchmarks..."));
33
+
34
+ const config: BenchmarkConfig = {
35
+ language: options.language,
36
+ maxExercises: parseInt(options.count),
37
+ model: options.model,
38
+ provider: options.provider,
39
+ limits: {
40
+ maxTurns: parseInt(options.maxTurns),
41
+ maxTime: parseInt(options.maxTime),
42
+ maxCost: parseFloat(options.maxCost),
43
+ },
44
+ outputFile: options.output,
45
+ };
46
+
47
+ const runner = new BenchmarkRunner(config);
48
+ await runner.run();
49
+
50
+ console.log(chalk.green("✅ Benchmarks completed successfully!"));
51
+ process.exit(0);
52
+ } catch (error) {
53
+ console.error(chalk.red("❌ Benchmark failed:"), error);
54
+ process.exit(1);
55
+ }
56
+ });
57
+
58
+ program
59
+ .command("setup")
60
+ .description("Set up exercises for benchmarking")
61
+ .option(
62
+ "-l, --language <language>",
63
+ "Programming language to setup",
64
+ "javascript"
65
+ )
66
+ .option("-c, --count <count>", "Maximum number of exercises to setup", "10")
67
+ .action(async (options) => {
68
+ try {
69
+ console.log(chalk.blue("📦 Setting up exercises..."));
70
+
71
+ const runner = new BenchmarkRunner({
72
+ language: options.language,
73
+ maxExercises: parseInt(options.count),
74
+ model: "gpt-4o-mini", // Dummy values for setup
75
+ provider: "openai",
76
+ limits: { maxTurns: 20, maxTime: 300, maxCost: 1.0 },
77
+ outputFile: "results.json",
78
+ });
79
+
80
+ await runner.setupExercises();
81
+
82
+ console.log(chalk.green("✅ Exercises setup completed!"));
83
+ process.exit(0);
84
+ } catch (error) {
85
+ console.error(chalk.red("❌ Setup failed:"), error);
86
+ process.exit(1);
87
+ }
88
+ });
89
+
90
+ program.parse();
@@ -0,0 +1,64 @@
1
+ import { ExerciseEvaluator, TestResult, TestEvaluationResult } from './types';
2
+ import { JavaScriptEvaluator } from './JavaScriptEvaluator';
3
+
4
+ export class EvaluatorRegistry {
5
+ private evaluators: ExerciseEvaluator[] = [];
6
+
7
+ constructor() {
8
+ // Register default evaluators
9
+ this.registerEvaluator(new JavaScriptEvaluator());
10
+ }
11
+
12
+ registerEvaluator(evaluator: ExerciseEvaluator): void {
13
+ this.evaluators.push(evaluator);
14
+ }
15
+
16
+ evalForExercise(exercisePath: string): ExerciseEvaluator | null {
17
+ return this.evaluators.find(e => e.canEvaluate(exercisePath)) || null;
18
+ }
19
+
20
+ async evaluateExercise(exercisePath: string, exerciseName: string): Promise<TestEvaluationResult | null> {
21
+ // Find the first evaluator that can handle this exercise
22
+ const evaluator = this.evalForExercise(exercisePath);
23
+
24
+ if (!evaluator) {
25
+ console.warn(`No evaluator found for exercise: ${exerciseName} at ${exercisePath}`);
26
+ return null;
27
+ }
28
+
29
+ try {
30
+ console.log(`Evaluating ${exerciseName} using ${evaluator.language} evaluator...`);
31
+ const testResult = await evaluator.evaluate(exercisePath);
32
+
33
+ return {
34
+ exerciseName,
35
+ testResult,
36
+ evaluatedBy: evaluator.language
37
+ };
38
+ } catch (error) {
39
+ console.error(`Error evaluating exercise ${exerciseName}:`, error);
40
+
41
+ // Return a failed test result instead of null
42
+ return {
43
+ exerciseName,
44
+ testResult: {
45
+ passed: 0,
46
+ failed: 0,
47
+ total: 0,
48
+ success: false,
49
+ output: '',
50
+ errorMessage: `Evaluation failed: ${error instanceof Error ? error.message : String(error)}`
51
+ },
52
+ evaluatedBy: evaluator.language
53
+ };
54
+ }
55
+ }
56
+
57
+ getAvailableEvaluators(): string[] {
58
+ return this.evaluators.map(e => e.language);
59
+ }
60
+
61
+ canEvaluateExercise(exercisePath: string): boolean {
62
+ return this.evaluators.some(e => e.canEvaluate(exercisePath));
63
+ }
64
+ }
@@ -0,0 +1,183 @@
1
+ import { ExerciseEvaluator, TestResult } from './types';
2
+ import { execSync } from 'child_process';
3
+ import * as fs from 'fs';
4
+ import * as path from 'path';
5
+
6
+ export class JavaScriptEvaluator implements ExerciseEvaluator {
7
+ language = 'javascript';
8
+
9
+ canEvaluate(exercisePath: string): boolean {
10
+ // Check for package.json with test script or jest config
11
+ const packageJsonPath = path.join(exercisePath, 'package.json');
12
+
13
+ if (!fs.existsSync(packageJsonPath)) {
14
+ return false;
15
+ }
16
+
17
+ try {
18
+ const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8'));
19
+
20
+ // Check if there's a test script or jest configuration
21
+ return !!(
22
+ packageJson.scripts?.test ||
23
+ packageJson.devDependencies?.jest ||
24
+ packageJson.dependencies?.jest ||
25
+ packageJson.jest ||
26
+ fs.existsSync(path.join(exercisePath, 'jest.config.js')) ||
27
+ fs.existsSync(path.join(exercisePath, 'jest.config.json'))
28
+ );
29
+ } catch (error) {
30
+ return false;
31
+ }
32
+ }
33
+
34
+ async evaluate(exercisePath: string): Promise<TestResult> {
35
+ try {
36
+ // First try to install dependencies if node_modules doesn't exist
37
+ const nodeModulesPath = path.join(exercisePath, 'node_modules');
38
+ if (!fs.existsSync(nodeModulesPath)) {
39
+ try {
40
+ execSync('npm install', {
41
+ cwd: exercisePath,
42
+ stdio: 'pipe',
43
+ timeout: 60000 // 60 second timeout
44
+ });
45
+ } catch (installError) {
46
+ // Continue anyway, maybe dependencies are not needed
47
+ console.warn(`Failed to install dependencies in ${exercisePath}:`, installError);
48
+ }
49
+ }
50
+
51
+ // Try to run tests with JSON output
52
+ let command = 'npm test';
53
+
54
+ // Check if we can use Jest directly with JSON reporter
55
+ const packageJsonPath = path.join(exercisePath, 'package.json');
56
+ if (fs.existsSync(packageJsonPath)) {
57
+ const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8'));
58
+
59
+ // If jest is available, use it directly with JSON reporter
60
+ if (packageJson.devDependencies?.jest || packageJson.dependencies?.jest) {
61
+ command = 'npx jest --json --verbose';
62
+ } else if (packageJson.scripts?.test) {
63
+ // Try to modify the test script to include JSON output
64
+ const testScript = packageJson.scripts.test;
65
+ if (testScript.includes('jest')) {
66
+ command = `${testScript} --json --verbose`;
67
+ }
68
+ }
69
+ }
70
+
71
+ const output = execSync(command, {
72
+ cwd: exercisePath,
73
+ stdio: 'pipe',
74
+ encoding: 'utf8',
75
+ timeout: 120000 // 2 minute timeout for tests
76
+ });
77
+
78
+ return this.parseJestOutput(output);
79
+
80
+ } catch (error: any) {
81
+ // Jest exits with non-zero code when tests fail, so we need to parse the output
82
+ if (error.stdout) {
83
+ try {
84
+ return this.parseJestOutput(error.stdout);
85
+ } catch (parseError) {
86
+ // If JSON parsing fails, try to extract basic info from text output
87
+ return this.parseTextOutput(error.stdout || error.stderr || '');
88
+ }
89
+ }
90
+
91
+ return {
92
+ passed: 0,
93
+ failed: 0,
94
+ total: 0,
95
+ success: false,
96
+ output: error.message || 'Test execution failed',
97
+ errorMessage: error.message,
98
+ details: error
99
+ };
100
+ }
101
+ }
102
+
103
+ private parseJestOutput(output: string): TestResult {
104
+ try {
105
+ // Try to find JSON output in the string
106
+ const lines = output.split('\n');
107
+ let jsonLine = '';
108
+
109
+ for (const line of lines) {
110
+ const trimmed = line.trim();
111
+ if (trimmed.startsWith('{') && (trimmed.includes('"success"') || trimmed.includes('"numTotalTests"'))) {
112
+ jsonLine = trimmed;
113
+ break;
114
+ }
115
+ }
116
+
117
+ if (jsonLine) {
118
+ const result = JSON.parse(jsonLine);
119
+
120
+ return {
121
+ passed: result.numPassedTests || 0,
122
+ failed: result.numFailedTests || 0,
123
+ total: result.numTotalTests || 0,
124
+ skipped: result.numPendingTests || 0,
125
+ success: result.success || false,
126
+ output: output,
127
+ details: result
128
+ };
129
+ }
130
+ } catch (error) {
131
+ // Fall back to text parsing
132
+ }
133
+
134
+ return this.parseTextOutput(output);
135
+ }
136
+
137
+ private parseTextOutput(output: string): TestResult {
138
+ // Try to parse Jest text output
139
+ let passed = 0;
140
+ let failed = 0;
141
+ let total = 0;
142
+ let success = false;
143
+
144
+ // Look for Jest summary patterns
145
+ const passedMatch = output.match(/(\d+) passed/);
146
+ const failedMatch = output.match(/(\d+) failed/);
147
+ const totalMatch = output.match(/(\d+) total/);
148
+
149
+ if (passedMatch) passed = parseInt(passedMatch[1]);
150
+ if (failedMatch) failed = parseInt(failedMatch[1]);
151
+ if (totalMatch) total = parseInt(totalMatch[1]);
152
+
153
+ // If we couldn't find specific numbers, try other patterns
154
+ if (total === 0) {
155
+ // Look for "Tests: " summary
156
+ const testsMatch = output.match(/Tests:\s+(\d+)\s+failed,\s+(\d+)\s+passed,\s+(\d+)\s+total/);
157
+ if (testsMatch) {
158
+ failed = parseInt(testsMatch[1]);
159
+ passed = parseInt(testsMatch[2]);
160
+ total = parseInt(testsMatch[3]);
161
+ } else {
162
+ // Look for individual test results
163
+ const testResults = output.match(/✓|✗|PASS|FAIL/g);
164
+ if (testResults) {
165
+ total = testResults.length;
166
+ passed = testResults.filter(r => r === '✓' || r === 'PASS').length;
167
+ failed = total - passed;
168
+ }
169
+ }
170
+ }
171
+
172
+ success = failed === 0 && total > 0;
173
+
174
+ return {
175
+ passed,
176
+ failed,
177
+ total,
178
+ success,
179
+ output,
180
+ errorMessage: success ? undefined : 'Some tests failed'
181
+ };
182
+ }
183
+ }
@@ -0,0 +1,3 @@
1
+ export * from './types';
2
+ export * from './JavaScriptEvaluator';
3
+ export * from './EvaluatorRegistry';
@@ -0,0 +1,22 @@
1
+ export interface TestResult {
2
+ passed: number;
3
+ failed: number;
4
+ total: number;
5
+ skipped?: number;
6
+ success: boolean;
7
+ output: string;
8
+ errorMessage?: string;
9
+ details?: any; // Raw test runner output
10
+ }
11
+
12
+ export interface ExerciseEvaluator {
13
+ language: string;
14
+ canEvaluate(exercisePath: string): boolean;
15
+ evaluate(exercisePath: string): Promise<TestResult>;
16
+ }
17
+
18
+ export interface TestEvaluationResult {
19
+ exerciseName: string;
20
+ testResult: TestResult;
21
+ evaluatedBy: string; // Which evaluator was used
22
+ }
@@ -0,0 +1,3 @@
1
+ export { BenchmarkRunner } from './runner';
2
+ export * from './types';
3
+ import 'dotenv/config'
@@ -0,0 +1,13 @@
1
+ import { AIClient, HttpClient } from "../../ts_build/src/clients";
2
+
3
+ export async function registerProvider(
4
+ provider: string,
5
+ url: string,
6
+ headers: Record<string, string>,
7
+ clients: AIClient
8
+ ): Promise<void> {
9
+ const client = new HttpClient(url, headers);
10
+
11
+ clients.registerClient(provider, client);
12
+ await clients.loadProviderModels(provider);
13
+ }