@tyvm/knowhow 0.0.32 → 0.0.34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/autodoc/plugins/downloader/downloader.mdx +2 -2
- package/benchmarks/.dockerignore +7 -0
- package/benchmarks/README.md +166 -0
- package/benchmarks/docker/Dockerfile +68 -0
- package/benchmarks/example-config.yml +27 -0
- package/benchmarks/jest.config.js +13 -0
- package/benchmarks/package-lock.json +4297 -0
- package/benchmarks/package.json +39 -0
- package/benchmarks/results/4542435/2025-08-05/lms/lms-openai-gpt-oss-20b.json +2814 -0
- package/benchmarks/results/4542435/2025-08-05/lms/lms-qwen-qwen3-30b-a3b-2507.json +2014 -0
- package/benchmarks/results/4fb9125/2025-08-07/anthropic/anthropic-claude-sonnet-4-20250514.json +3121 -0
- package/benchmarks/results/5766aee/2025-08-02/lms-qwen/qwen3-coder-30b.json +98 -0
- package/benchmarks/results/6d73808/2025-08-07/openai/openai-gpt-5.json +3256 -0
- package/benchmarks/results/77bf0a6/2025-08-02/lms-qwen/qwen3-30b-a3b-2507.json +4298 -0
- package/benchmarks/results/8c0d445/2025-08-03/anthropic/anthropic-claude-sonnet-4-20250514.json +3031 -0
- package/benchmarks/results/8c0d445/2025-08-03/openai/openai-gpt-4.1-2025-04-14.json +2990 -0
- package/benchmarks/results/ac6b2ab/2025-08-03/anthropic/anthropic-claude-sonnet-4-20250514.json +3256 -0
- package/benchmarks/results/ac6b2ab/2025-08-03/lms/lms-qwen-qwen3-coder-30b.json +3007 -0
- package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-2025-04-14.json +3256 -0
- package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-mini-2025-04-14.json +3036 -0
- package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-nano-2025-04-14.json +3280 -0
- package/benchmarks/results/adff675/2025-08-04/lms/lms-qwen-qwen3-30b-a3b-2507.json +1920 -0
- package/benchmarks/results/adff675/2025-08-04/lms/lms-qwen-qwen3-coder-30b.json +3281 -0
- package/benchmarks/results/b502ed9/2025-08-03/lms-qwen/qwen3-coder-30b.json +2896 -0
- package/benchmarks/results/d1a8129/2025-08-03/lms/lms-qwen-qwen3-coder-30b.json +3011 -0
- package/benchmarks/results/e60471c/2025-08-03/lms/qwen3-30b-a3b-2507.json +3003 -0
- package/benchmarks/scripts/build-and-run.sh +47 -0
- package/benchmarks/scripts/clone-exercism.sh +92 -0
- package/benchmarks/scripts/validate.sh +48 -0
- package/benchmarks/src/__tests__/runner.test.ts +27 -0
- package/benchmarks/src/cli.ts +90 -0
- package/benchmarks/src/evaluators/EvaluatorRegistry.ts +64 -0
- package/benchmarks/src/evaluators/JavaScriptEvaluator.ts +183 -0
- package/benchmarks/src/evaluators/index.ts +3 -0
- package/benchmarks/src/evaluators/types.ts +22 -0
- package/benchmarks/src/index.ts +3 -0
- package/benchmarks/src/providers.ts +13 -0
- package/benchmarks/src/runner.ts +824 -0
- package/benchmarks/src/types.ts +63 -0
- package/benchmarks/tsconfig.json +19 -0
- package/jest.config.js +2 -1
- package/leaderboard/README.md +148 -0
- package/leaderboard/app/api/benchmark-data/route.ts +131 -0
- package/leaderboard/app/api/benchmark-detail/route.ts +172 -0
- package/leaderboard/app/details/[model]/[provider]/[language]/page.tsx +501 -0
- package/leaderboard/app/exercise/[model]/[provider]/[language]/[exercise]/page.tsx +375 -0
- package/leaderboard/app/globals.css +27 -0
- package/leaderboard/app/layout.tsx +21 -0
- package/leaderboard/app/page.tsx +170 -0
- package/leaderboard/components/LeaderboardTable.tsx +168 -0
- package/leaderboard/components/PerformanceChart.tsx +109 -0
- package/leaderboard/next-env.d.ts +5 -0
- package/leaderboard/next.config.js +4 -0
- package/leaderboard/package-lock.json +6363 -0
- package/leaderboard/package.json +28 -0
- package/leaderboard/postcss.config.js +6 -0
- package/leaderboard/tailwind.config.js +17 -0
- package/leaderboard/tsconfig.json +28 -0
- package/leaderboard/types/benchmark.ts +67 -0
- package/leaderboard/utils/dataProcessor.ts +33 -0
- package/package.json +2 -1
- package/src/agents/base/base.ts +147 -21
- package/src/agents/base/prompt.ts +28 -0
- package/src/agents/index.ts +3 -0
- package/src/agents/patcher/patcher.ts +6 -4
- package/src/agents/setup/setup.ts +56 -0
- package/src/agents/tools/agentCall.ts +6 -2
- package/src/agents/tools/aiClient.ts +74 -8
- package/src/agents/tools/execCommand.ts +13 -14
- package/src/agents/tools/executeScript/README.md +16 -0
- package/src/agents/tools/index.ts +2 -0
- package/src/agents/tools/list.ts +73 -16
- package/src/agents/tools/startAgentTask.ts +109 -0
- package/src/agents/tools/textSearch.ts +1 -1
- package/src/agents/tools/visionTool.ts +31 -2
- package/src/agents/tools/ycmd/client.ts +608 -0
- package/src/agents/tools/ycmd/definitions.ts +294 -0
- package/src/agents/tools/ycmd/detection.ts +211 -0
- package/src/agents/tools/ycmd/index.ts +11 -0
- package/src/agents/tools/ycmd/installer.ts +251 -0
- package/src/agents/tools/ycmd/server.ts +535 -0
- package/src/agents/tools/ycmd/serverManager.ts +316 -0
- package/src/agents/tools/ycmd/tools/completion.ts +113 -0
- package/src/agents/tools/ycmd/tools/diagnostics.ts +155 -0
- package/src/agents/tools/ycmd/tools/getLocations.ts +173 -0
- package/src/agents/tools/ycmd/tools/goto.ts +169 -0
- package/src/agents/tools/ycmd/tools/refactor.ts +204 -0
- package/src/agents/tools/ycmd/tools/signature.ts +174 -0
- package/src/agents/tools/ycmd/tools/start.ts +95 -0
- package/src/agents/tools/ycmd/utils/pathUtils.ts +59 -0
- package/src/ai.ts +15 -0
- package/src/chat/CliChatService.ts +277 -0
- package/src/chat/modules/AgentModule.ts +980 -0
- package/src/chat/modules/AskModule.ts +98 -0
- package/src/chat/modules/BaseChatModule.ts +66 -0
- package/src/chat/modules/InternalChatModule.ts +174 -0
- package/src/chat/modules/SearchModule.ts +166 -0
- package/src/chat/modules/SetupModule.ts +185 -0
- package/src/chat/modules/SystemModule.ts +120 -0
- package/src/chat/modules/VoiceModule.ts +70 -0
- package/src/chat/modules/index.js +5 -0
- package/src/chat/types.ts +97 -0
- package/src/chat.ts +9 -1
- package/src/chat2.ts +62 -0
- package/src/cli.ts +264 -35
- package/src/clients/anthropic.ts +14 -7
- package/src/clients/gemini.ts +15 -7
- package/src/clients/http.ts +17 -7
- package/src/clients/index.ts +117 -4
- package/src/clients/knowhow.ts +7 -2
- package/src/clients/knowhowMcp.ts +118 -0
- package/src/clients/openai.ts +32 -8
- package/src/clients/types.ts +1 -0
- package/src/clients/xai.ts +17 -5
- package/src/config.ts +30 -5
- package/src/conversion.ts +4 -1
- package/src/embeddings.ts +79 -23
- package/src/login.ts +26 -9
- package/src/microphone.ts +0 -1
- package/src/plugins/downloader/downloader.ts +72 -24
- package/src/plugins/downloader/plugin.ts +3 -1
- package/src/plugins/plugins.ts +3 -0
- package/src/processors/CustomVariables.ts +425 -0
- package/src/processors/HarmonyToolProcessor.ts +264 -0
- package/src/processors/XmlToolCallProcessor.ts +533 -0
- package/src/processors/index.ts +3 -0
- package/src/prompts/KnowhowConfigExamples.ts +376 -0
- package/src/services/KnowhowClient.ts +49 -3
- package/src/services/Mcp.ts +42 -3
- package/src/services/McpServer.ts +14 -4
- package/src/services/McpWebsocketTransport.ts +21 -7
- package/src/services/MessageProcessor.ts +10 -5
- package/src/services/index.ts +5 -0
- package/src/services/script-execution/ScriptExecutor.ts +34 -1
- package/src/services/types.ts +17 -14
- package/src/types.ts +17 -0
- package/src/utils/index.ts +138 -0
- package/tests/XmlToolCallProcessor.test.ts +468 -0
- package/tests/manual/ycmd/debug_diagnostics_test.ts +127 -0
- package/tests/manual/ycmd/fixtures/debug_diagnostics.ts +26 -0
- package/tests/manual/ycmd/fixtures/file_change_test.ts +17 -0
- package/tests/manual/ycmd/minimal_advanced_test.ts +108 -0
- package/tests/manual/ycmd/simple_diagnostics_test.ts +61 -0
- package/tests/manual/ycmd/simple_test.ts +74 -0
- package/tests/manual/ycmd/test-typescript-sample.ts +34 -0
- package/tests/manual/ycmd/test_advanced_features.ts +407 -0
- package/tests/manual/ycmd/test_advanced_with_tools.ts +320 -0
- package/tests/manual/ycmd/test_comprehensive_typescript.ts +179 -0
- package/tests/manual/ycmd/test_diagnostics_file_changes.ts +249 -0
- package/tests/manual/ycmd/test_diagnostics_fix.ts +99 -0
- package/tests/manual/ycmd/test_diagnostics_simple.ts +100 -0
- package/tests/manual/ycmd/test_diagnostics_timing.ts +120 -0
- package/tests/manual/ycmd/test_discover_commands.ts +310 -0
- package/tests/manual/ycmd/test_endpoints.ts +115 -0
- package/tests/manual/ycmd/test_final_comprehensive.ts +218 -0
- package/tests/manual/ycmd/test_final_validation.ts +150 -0
- package/tests/manual/ycmd/test_implementation.js +42 -0
- package/tests/manual/ycmd/test_individual_ycmd_tool.ts +39 -0
- package/tests/manual/ycmd/test_server_manager.ts +52 -0
- package/tests/manual/ycmd/test_simple_debug.ts +86 -0
- package/tests/manual/ycmd/test_tsserver_workflow.js +83 -0
- package/tests/manual/ycmd/test_tsserver_workflow.ts +122 -0
- package/tests/manual/ycmd/test_typescript_simple.ts +48 -0
- package/tests/manual/ycmd/test_typescript_ycmd.ts +105 -0
- package/tests/manual/ycmd/test_workspace_config.ts +90 -0
- package/tests/manual/ycmd/test_ycmd_auto_start.ts +137 -0
- package/tests/manual/ycmd/test_ycmd_comprehensive.ts +73 -0
- package/tests/manual/ycmd/test_ycmd_connection.py +10 -0
- package/tests/manual/ycmd/test_ycmd_direct.ts +142 -0
- package/tests/manual/ycmd/test_ycmd_experiment.ts +48 -0
- package/tests/manual/ycmd/test_ycmd_final.ts +200 -0
- package/tests/manual/ycmd/test_ycmd_fixed.py +18 -0
- package/tests/manual/ycmd/test_ycmd_integration.ts +112 -0
- package/tests/manual/ycmd/test_ycmd_simple.ts +45 -0
- package/tests/manual/ycmd/test_ycmd_usage.py +27 -0
- package/tests/manual/ycmd/working_simple_test.ts +134 -0
- package/ts_build/src/agents/base/base.d.ts +14 -1
- package/ts_build/src/agents/base/base.js +91 -17
- package/ts_build/src/agents/base/base.js.map +1 -1
- package/ts_build/src/agents/base/prompt.d.ts +1 -1
- package/ts_build/src/agents/base/prompt.js +28 -0
- package/ts_build/src/agents/base/prompt.js.map +1 -1
- package/ts_build/src/agents/index.d.ts +2 -0
- package/ts_build/src/agents/index.js +2 -0
- package/ts_build/src/agents/index.js.map +1 -1
- package/ts_build/src/agents/patcher/patcher.js +6 -3
- package/ts_build/src/agents/patcher/patcher.js.map +1 -1
- package/ts_build/src/agents/setup/setup.d.ts +8 -0
- package/ts_build/src/agents/setup/setup.js +59 -0
- package/ts_build/src/agents/setup/setup.js.map +1 -0
- package/ts_build/src/agents/tools/agentCall.js +5 -2
- package/ts_build/src/agents/tools/agentCall.js.map +1 -1
- package/ts_build/src/agents/tools/aiClient.d.ts +6 -5
- package/ts_build/src/agents/tools/aiClient.js +37 -6
- package/ts_build/src/agents/tools/aiClient.js.map +1 -1
- package/ts_build/src/agents/tools/execCommand.d.ts +2 -2
- package/ts_build/src/agents/tools/execCommand.js +5 -6
- package/ts_build/src/agents/tools/execCommand.js.map +1 -1
- package/ts_build/src/agents/tools/executeScript/index.d.ts +1 -1
- package/ts_build/src/agents/tools/index.d.ts +2 -0
- package/ts_build/src/agents/tools/index.js +2 -0
- package/ts_build/src/agents/tools/index.js.map +1 -1
- package/ts_build/src/agents/tools/list.js +66 -16
- package/ts_build/src/agents/tools/list.js.map +1 -1
- package/ts_build/src/agents/tools/startAgentTask.d.ts +13 -0
- package/ts_build/src/agents/tools/startAgentTask.js +74 -0
- package/ts_build/src/agents/tools/startAgentTask.js.map +1 -0
- package/ts_build/src/agents/tools/startChatTask.d.ts +13 -0
- package/ts_build/src/agents/tools/startChatTask.js +73 -0
- package/ts_build/src/agents/tools/startChatTask.js.map +1 -0
- package/ts_build/src/agents/tools/textSearch.js +1 -1
- package/ts_build/src/agents/tools/textSearch.js.map +1 -1
- package/ts_build/src/agents/tools/visionTool.d.ts +1 -1
- package/ts_build/src/agents/tools/visionTool.js +23 -3
- package/ts_build/src/agents/tools/visionTool.js.map +1 -1
- package/ts_build/src/agents/tools/ycmd/client.d.ts +93 -0
- package/ts_build/src/agents/tools/ycmd/client.js +355 -0
- package/ts_build/src/agents/tools/ycmd/client.js.map +1 -0
- package/ts_build/src/agents/tools/ycmd/definitions.d.ts +345 -0
- package/ts_build/src/agents/tools/ycmd/definitions.js +298 -0
- package/ts_build/src/agents/tools/ycmd/definitions.js.map +1 -0
- package/ts_build/src/agents/tools/ycmd/detection.d.ts +11 -0
- package/ts_build/src/agents/tools/ycmd/detection.js +175 -0
- package/ts_build/src/agents/tools/ycmd/detection.js.map +1 -0
- package/ts_build/src/agents/tools/ycmd/index.d.ts +8 -0
- package/ts_build/src/agents/tools/ycmd/index.js +20 -0
- package/ts_build/src/agents/tools/ycmd/index.js.map +1 -0
- package/ts_build/src/agents/tools/ycmd/installer.d.ts +19 -0
- package/ts_build/src/agents/tools/ycmd/installer.js +196 -0
- package/ts_build/src/agents/tools/ycmd/installer.js.map +1 -0
- package/ts_build/src/agents/tools/ycmd/server.d.ts +35 -0
- package/ts_build/src/agents/tools/ycmd/server.js +363 -0
- package/ts_build/src/agents/tools/ycmd/server.js.map +1 -0
- package/ts_build/src/agents/tools/ycmd/serverManager.d.ts +39 -0
- package/ts_build/src/agents/tools/ycmd/serverManager.js +210 -0
- package/ts_build/src/agents/tools/ycmd/serverManager.js.map +1 -0
- package/ts_build/src/agents/tools/ycmd/tools/completion.d.ts +22 -0
- package/ts_build/src/agents/tools/ycmd/tools/completion.js +72 -0
- package/ts_build/src/agents/tools/ycmd/tools/completion.js.map +1 -0
- package/ts_build/src/agents/tools/ycmd/tools/diagnostics.d.ts +42 -0
- package/ts_build/src/agents/tools/ycmd/tools/diagnostics.js +88 -0
- package/ts_build/src/agents/tools/ycmd/tools/diagnostics.js.map +1 -0
- package/ts_build/src/agents/tools/ycmd/tools/getLocations.d.ts +22 -0
- package/ts_build/src/agents/tools/ycmd/tools/getLocations.js +142 -0
- package/ts_build/src/agents/tools/ycmd/tools/getLocations.js.map +1 -0
- package/ts_build/src/agents/tools/ycmd/tools/goto.d.ts +20 -0
- package/ts_build/src/agents/tools/ycmd/tools/goto.js +101 -0
- package/ts_build/src/agents/tools/ycmd/tools/goto.js.map +1 -0
- package/ts_build/src/agents/tools/ycmd/tools/refactor.d.ts +32 -0
- package/ts_build/src/agents/tools/ycmd/tools/refactor.js +123 -0
- package/ts_build/src/agents/tools/ycmd/tools/refactor.js.map +1 -0
- package/ts_build/src/agents/tools/ycmd/tools/signature.d.ts +25 -0
- package/ts_build/src/agents/tools/ycmd/tools/signature.js +110 -0
- package/ts_build/src/agents/tools/ycmd/tools/signature.js.map +1 -0
- package/ts_build/src/agents/tools/ycmd/tools/start.d.ts +17 -0
- package/ts_build/src/agents/tools/ycmd/tools/start.js +65 -0
- package/ts_build/src/agents/tools/ycmd/tools/start.js.map +1 -0
- package/ts_build/src/agents/tools/ycmd/utils/pathUtils.d.ts +4 -0
- package/ts_build/src/agents/tools/ycmd/utils/pathUtils.js +67 -0
- package/ts_build/src/agents/tools/ycmd/utils/pathUtils.js.map +1 -0
- package/ts_build/src/ai.d.ts +1 -0
- package/ts_build/src/ai.js +40 -1
- package/ts_build/src/ai.js.map +1 -1
- package/ts_build/src/chat/ChatCommandHandler.d.ts +36 -0
- package/ts_build/src/chat/ChatCommandHandler.js +268 -0
- package/ts_build/src/chat/ChatCommandHandler.js.map +1 -0
- package/ts_build/src/chat/ChatInputManager.d.ts +22 -0
- package/ts_build/src/chat/ChatInputManager.js +85 -0
- package/ts_build/src/chat/ChatInputManager.js.map +1 -0
- package/ts_build/src/chat/ChatManager.d.ts +49 -0
- package/ts_build/src/chat/ChatManager.js +271 -0
- package/ts_build/src/chat/ChatManager.js.map +1 -0
- package/ts_build/src/chat/ChatSession.d.ts +32 -0
- package/ts_build/src/chat/ChatSession.js +3 -0
- package/ts_build/src/chat/ChatSession.js.map +1 -0
- package/ts_build/src/chat/ChatSessionManager.d.ts +19 -0
- package/ts_build/src/chat/ChatSessionManager.js +188 -0
- package/ts_build/src/chat/ChatSessionManager.js.map +1 -0
- package/ts_build/src/chat/ChatStateManager.d.ts +58 -0
- package/ts_build/src/chat/ChatStateManager.js +156 -0
- package/ts_build/src/chat/ChatStateManager.js.map +1 -0
- package/ts_build/src/chat/CliChatService.d.ts +35 -0
- package/ts_build/src/chat/CliChatService.js +201 -0
- package/ts_build/src/chat/CliChatService.js.map +1 -0
- package/ts_build/src/chat/InterruptibleInput.d.ts +20 -0
- package/ts_build/src/chat/InterruptibleInput.js +109 -0
- package/ts_build/src/chat/InterruptibleInput.js.map +1 -0
- package/ts_build/src/chat/interfaces/ChatModule.d.ts +6 -0
- package/ts_build/src/chat/interfaces/ChatModule.js +3 -0
- package/ts_build/src/chat/interfaces/ChatModule.js.map +1 -0
- package/ts_build/src/chat/modules/AgentModule.d.ts +56 -0
- package/ts_build/src/chat/modules/AgentModule.js +705 -0
- package/ts_build/src/chat/modules/AgentModule.js.map +1 -0
- package/ts_build/src/chat/modules/AskModule.d.ts +10 -0
- package/ts_build/src/chat/modules/AskModule.js +63 -0
- package/ts_build/src/chat/modules/AskModule.js.map +1 -0
- package/ts_build/src/chat/modules/BaseChatModule.d.ts +14 -0
- package/ts_build/src/chat/modules/BaseChatModule.js +32 -0
- package/ts_build/src/chat/modules/BaseChatModule.js.map +1 -0
- package/ts_build/src/chat/modules/InternalChatModule.d.ts +24 -0
- package/ts_build/src/chat/modules/InternalChatModule.js +127 -0
- package/ts_build/src/chat/modules/InternalChatModule.js.map +1 -0
- package/ts_build/src/chat/modules/SearchModule.d.ts +12 -0
- package/ts_build/src/chat/modules/SearchModule.js +119 -0
- package/ts_build/src/chat/modules/SearchModule.js.map +1 -0
- package/ts_build/src/chat/modules/SetupModule.d.ts +15 -0
- package/ts_build/src/chat/modules/SetupModule.js +147 -0
- package/ts_build/src/chat/modules/SetupModule.js.map +1 -0
- package/ts_build/src/chat/modules/SystemModule.d.ts +14 -0
- package/ts_build/src/chat/modules/SystemModule.js +90 -0
- package/ts_build/src/chat/modules/SystemModule.js.map +1 -0
- package/ts_build/src/chat/modules/VoiceModule.d.ts +11 -0
- package/ts_build/src/chat/modules/VoiceModule.js +57 -0
- package/ts_build/src/chat/modules/VoiceModule.js.map +1 -0
- package/ts_build/src/chat/types.d.ts +83 -0
- package/ts_build/src/chat/types.js +3 -0
- package/ts_build/src/chat/types.js.map +1 -0
- package/ts_build/src/chat.js +7 -1
- package/ts_build/src/chat.js.map +1 -1
- package/ts_build/src/chat2.d.ts +3 -0
- package/ts_build/src/chat2.js +47 -0
- package/ts_build/src/chat2.js.map +1 -0
- package/ts_build/src/cli.js +218 -37
- package/ts_build/src/cli.js.map +1 -1
- package/ts_build/src/clients/anthropic.d.ts +5 -2
- package/ts_build/src/clients/anthropic.js +12 -7
- package/ts_build/src/clients/anthropic.js.map +1 -1
- package/ts_build/src/clients/gemini.d.ts +6 -3
- package/ts_build/src/clients/gemini.js +13 -7
- package/ts_build/src/clients/gemini.js.map +1 -1
- package/ts_build/src/clients/http.d.ts +1 -0
- package/ts_build/src/clients/http.js +12 -5
- package/ts_build/src/clients/http.js.map +1 -1
- package/ts_build/src/clients/index.d.ts +10 -0
- package/ts_build/src/clients/index.js +74 -4
- package/ts_build/src/clients/index.js.map +1 -1
- package/ts_build/src/clients/knowhow.d.ts +3 -1
- package/ts_build/src/clients/knowhow.js +8 -2
- package/ts_build/src/clients/knowhow.js.map +1 -1
- package/ts_build/src/clients/knowhowMcp.d.ts +20 -0
- package/ts_build/src/clients/knowhowMcp.js +86 -0
- package/ts_build/src/clients/knowhowMcp.js.map +1 -0
- package/ts_build/src/clients/openai.d.ts +5 -2
- package/ts_build/src/clients/openai.js +29 -8
- package/ts_build/src/clients/openai.js.map +1 -1
- package/ts_build/src/clients/types.d.ts +1 -0
- package/ts_build/src/clients/xai.d.ts +5 -2
- package/ts_build/src/clients/xai.js +15 -5
- package/ts_build/src/clients/xai.js.map +1 -1
- package/ts_build/src/config.js +24 -3
- package/ts_build/src/config.js.map +1 -1
- package/ts_build/src/conversion.js +6 -4
- package/ts_build/src/conversion.js.map +1 -1
- package/ts_build/src/embeddings.d.ts +2 -1
- package/ts_build/src/embeddings.js +62 -17
- package/ts_build/src/embeddings.js.map +1 -1
- package/ts_build/src/login.d.ts +1 -1
- package/ts_build/src/login.js +21 -7
- package/ts_build/src/login.js.map +1 -1
- package/ts_build/src/microphone.js.map +1 -1
- package/ts_build/src/plugins/downloader/downloader.d.ts +4 -5
- package/ts_build/src/plugins/downloader/downloader.js +55 -26
- package/ts_build/src/plugins/downloader/downloader.js.map +1 -1
- package/ts_build/src/plugins/downloader/plugin.js +5 -3
- package/ts_build/src/plugins/downloader/plugin.js.map +1 -1
- package/ts_build/src/plugins/plugins.js +3 -0
- package/ts_build/src/plugins/plugins.js.map +1 -1
- package/ts_build/src/processors/CustomVariables.d.ts +32 -0
- package/ts_build/src/processors/CustomVariables.js +297 -0
- package/ts_build/src/processors/CustomVariables.js.map +1 -0
- package/ts_build/src/processors/HarmonyToolProcessor.d.ts +15 -0
- package/ts_build/src/processors/HarmonyToolProcessor.js +154 -0
- package/ts_build/src/processors/HarmonyToolProcessor.js.map +1 -0
- package/ts_build/src/processors/XmlToolCallProcessor.d.ts +14 -0
- package/ts_build/src/processors/XmlToolCallProcessor.js +357 -0
- package/ts_build/src/processors/XmlToolCallProcessor.js.map +1 -0
- package/ts_build/src/processors/index.d.ts +3 -0
- package/ts_build/src/processors/index.js +7 -1
- package/ts_build/src/processors/index.js.map +1 -1
- package/ts_build/src/prompts/KnowhowConfigExamples.d.ts +2 -0
- package/ts_build/src/prompts/KnowhowConfigExamples.js +379 -0
- package/ts_build/src/prompts/KnowhowConfigExamples.js.map +1 -0
- package/ts_build/src/services/KnowhowClient.d.ts +22 -0
- package/ts_build/src/services/KnowhowClient.js +14 -2
- package/ts_build/src/services/KnowhowClient.js.map +1 -1
- package/ts_build/src/services/Mcp.d.ts +1 -0
- package/ts_build/src/services/Mcp.js +20 -3
- package/ts_build/src/services/Mcp.js.map +1 -1
- package/ts_build/src/services/McpServer.d.ts +1 -1
- package/ts_build/src/services/McpServer.js +8 -4
- package/ts_build/src/services/McpServer.js.map +1 -1
- package/ts_build/src/services/McpWebsocketTransport.js +17 -7
- package/ts_build/src/services/McpWebsocketTransport.js.map +1 -1
- package/ts_build/src/services/MessageProcessor.d.ts +1 -1
- package/ts_build/src/services/MessageProcessor.js +4 -4
- package/ts_build/src/services/MessageProcessor.js.map +1 -1
- package/ts_build/src/services/index.d.ts +2 -0
- package/ts_build/src/services/index.js +4 -0
- package/ts_build/src/services/index.js.map +1 -1
- package/ts_build/src/services/script-execution/ScriptExecutor.d.ts +1 -0
- package/ts_build/src/services/script-execution/ScriptExecutor.js +23 -0
- package/ts_build/src/services/script-execution/ScriptExecutor.js.map +1 -1
- package/ts_build/src/services/types.d.ts +2 -6
- package/ts_build/src/services/types.js +4 -4
- package/ts_build/src/services/types.js.map +1 -1
- package/ts_build/src/types.d.ts +11 -0
- package/ts_build/src/types.js +8 -0
- package/ts_build/src/types.js.map +1 -1
- package/ts_build/src/utils/index.d.ts +2 -0
- package/ts_build/src/utils/index.js +102 -1
- package/ts_build/src/utils/index.js.map +1 -1
- package/ts_build/tests/XmlToolCallProcessor.test.d.ts +1 -0
- package/ts_build/tests/XmlToolCallProcessor.test.js +376 -0
- package/ts_build/tests/XmlToolCallProcessor.test.js.map +1 -0
- package/ts_build/tests/manual/ycmd/debug_diagnostics_test.d.ts +1 -0
- package/ts_build/tests/manual/ycmd/debug_diagnostics_test.js +114 -0
- package/ts_build/tests/manual/ycmd/debug_diagnostics_test.js.map +1 -0
- package/ts_build/tests/manual/ycmd/minimal_advanced_test.d.ts +2 -0
- package/ts_build/tests/manual/ycmd/minimal_advanced_test.js +104 -0
- package/ts_build/tests/manual/ycmd/minimal_advanced_test.js.map +1 -0
- package/ts_build/tests/manual/ycmd/simple_diagnostics_test.d.ts +1 -0
- package/ts_build/tests/manual/ycmd/simple_diagnostics_test.js +74 -0
- package/ts_build/tests/manual/ycmd/simple_diagnostics_test.js.map +1 -0
- package/ts_build/tests/manual/ycmd/simple_test.d.ts +2 -0
- package/ts_build/tests/manual/ycmd/simple_test.js +82 -0
- package/ts_build/tests/manual/ycmd/simple_test.js.map +1 -0
- package/ts_build/tests/manual/ycmd/test-typescript-sample.d.ts +14 -0
- package/ts_build/tests/manual/ycmd/test-typescript-sample.js +20 -0
- package/ts_build/tests/manual/ycmd/test-typescript-sample.js.map +1 -0
- package/ts_build/tests/manual/ycmd/test_advanced_features.d.ts +2 -0
- package/ts_build/tests/manual/ycmd/test_advanced_features.js +297 -0
- package/ts_build/tests/manual/ycmd/test_advanced_features.js.map +1 -0
- package/ts_build/tests/manual/ycmd/test_advanced_with_tools.d.ts +3 -0
- package/ts_build/tests/manual/ycmd/test_advanced_with_tools.js +262 -0
- package/ts_build/tests/manual/ycmd/test_advanced_with_tools.js.map +1 -0
- package/ts_build/tests/manual/ycmd/test_comprehensive_typescript.d.ts +2 -0
- package/ts_build/tests/manual/ycmd/test_comprehensive_typescript.js +186 -0
- package/ts_build/tests/manual/ycmd/test_comprehensive_typescript.js.map +1 -0
- package/ts_build/tests/manual/ycmd/test_diagnostics_file_changes.d.ts +1 -0
- package/ts_build/tests/manual/ycmd/test_diagnostics_file_changes.js +174 -0
- package/ts_build/tests/manual/ycmd/test_diagnostics_file_changes.js.map +1 -0
- package/ts_build/tests/manual/ycmd/test_diagnostics_fix.d.ts +2 -0
- package/ts_build/tests/manual/ycmd/test_diagnostics_fix.js +106 -0
- package/ts_build/tests/manual/ycmd/test_diagnostics_fix.js.map +1 -0
- package/ts_build/tests/manual/ycmd/test_diagnostics_simple.d.ts +1 -0
- package/ts_build/tests/manual/ycmd/test_diagnostics_simple.js +104 -0
- package/ts_build/tests/manual/ycmd/test_diagnostics_simple.js.map +1 -0
- package/ts_build/tests/manual/ycmd/test_diagnostics_timing.d.ts +1 -0
- package/ts_build/tests/manual/ycmd/test_diagnostics_timing.js +119 -0
- package/ts_build/tests/manual/ycmd/test_diagnostics_timing.js.map +1 -0
- package/ts_build/tests/manual/ycmd/test_discover_commands.d.ts +2 -0
- package/ts_build/tests/manual/ycmd/test_discover_commands.js +243 -0
- package/ts_build/tests/manual/ycmd/test_discover_commands.js.map +1 -0
- package/ts_build/tests/manual/ycmd/test_endpoints.d.ts +2 -0
- package/ts_build/tests/manual/ycmd/test_endpoints.js +120 -0
- package/ts_build/tests/manual/ycmd/test_endpoints.js.map +1 -0
- package/ts_build/tests/manual/ycmd/test_final_comprehensive.d.ts +2 -0
- package/ts_build/tests/manual/ycmd/test_final_comprehensive.js +221 -0
- package/ts_build/tests/manual/ycmd/test_final_comprehensive.js.map +1 -0
- package/ts_build/tests/manual/ycmd/test_final_validation.d.ts +2 -0
- package/ts_build/tests/manual/ycmd/test_final_validation.js +160 -0
- package/ts_build/tests/manual/ycmd/test_final_validation.js.map +1 -0
- package/ts_build/tests/manual/ycmd/test_individual_ycmd_tool.d.ts +2 -0
- package/ts_build/tests/manual/ycmd/test_individual_ycmd_tool.js +37 -0
- package/ts_build/tests/manual/ycmd/test_individual_ycmd_tool.js.map +1 -0
- package/ts_build/tests/manual/ycmd/test_server_manager.d.ts +1 -0
- package/ts_build/tests/manual/ycmd/test_server_manager.js +38 -0
- package/ts_build/tests/manual/ycmd/test_server_manager.js.map +1 -0
- package/ts_build/tests/manual/ycmd/test_simple_debug.d.ts +2 -0
- package/ts_build/tests/manual/ycmd/test_simple_debug.js +99 -0
- package/ts_build/tests/manual/ycmd/test_simple_debug.js.map +1 -0
- package/ts_build/tests/manual/ycmd/test_tsserver_workflow.d.ts +1 -0
- package/ts_build/tests/manual/ycmd/test_tsserver_workflow.js +128 -0
- package/ts_build/tests/manual/ycmd/test_tsserver_workflow.js.map +1 -0
- package/ts_build/tests/manual/ycmd/test_typescript_simple.d.ts +1 -0
- package/ts_build/tests/manual/ycmd/test_typescript_simple.js +66 -0
- package/ts_build/tests/manual/ycmd/test_typescript_simple.js.map +1 -0
- package/ts_build/tests/manual/ycmd/test_typescript_ycmd.d.ts +1 -0
- package/ts_build/tests/manual/ycmd/test_typescript_ycmd.js +105 -0
- package/ts_build/tests/manual/ycmd/test_typescript_ycmd.js.map +1 -0
- package/ts_build/tests/manual/ycmd/test_workspace_config.d.ts +1 -0
- package/ts_build/tests/manual/ycmd/test_workspace_config.js +89 -0
- package/ts_build/tests/manual/ycmd/test_workspace_config.js.map +1 -0
- package/ts_build/tests/manual/ycmd/test_ycmd_auto_start.d.ts +2 -0
- package/ts_build/tests/manual/ycmd/test_ycmd_auto_start.js +130 -0
- package/ts_build/tests/manual/ycmd/test_ycmd_auto_start.js.map +1 -0
- package/ts_build/tests/manual/ycmd/test_ycmd_comprehensive.d.ts +1 -0
- package/ts_build/tests/manual/ycmd/test_ycmd_comprehensive.js +83 -0
- package/ts_build/tests/manual/ycmd/test_ycmd_comprehensive.js.map +1 -0
- package/ts_build/tests/manual/ycmd/test_ycmd_direct.d.ts +2 -0
- package/ts_build/tests/manual/ycmd/test_ycmd_direct.js +149 -0
- package/ts_build/tests/manual/ycmd/test_ycmd_direct.js.map +1 -0
- package/ts_build/tests/manual/ycmd/test_ycmd_experiment.d.ts +15 -0
- package/ts_build/tests/manual/ycmd/test_ycmd_experiment.js +58 -0
- package/ts_build/tests/manual/ycmd/test_ycmd_experiment.js.map +1 -0
- package/ts_build/tests/manual/ycmd/test_ycmd_final.d.ts +2 -0
- package/ts_build/tests/manual/ycmd/test_ycmd_final.js +195 -0
- package/ts_build/tests/manual/ycmd/test_ycmd_final.js.map +1 -0
- package/ts_build/tests/manual/ycmd/test_ycmd_integration.d.ts +3 -0
- package/ts_build/tests/manual/ycmd/test_ycmd_integration.js +110 -0
- package/ts_build/tests/manual/ycmd/test_ycmd_integration.js.map +1 -0
- package/ts_build/tests/manual/ycmd/test_ycmd_simple.d.ts +2 -0
- package/ts_build/tests/manual/ycmd/test_ycmd_simple.js +36 -0
- package/ts_build/tests/manual/ycmd/test_ycmd_simple.js.map +1 -0
- package/ts_build/tests/manual/ycmd/working_simple_test.d.ts +2 -0
- package/ts_build/tests/manual/ycmd/working_simple_test.js +134 -0
- package/ts_build/tests/manual/ycmd/working_simple_test.js.map +1 -0
- package/tsconfig.json +3 -1
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
|
|
3
|
+
# Build and run Knowhow benchmarks
|
|
4
|
+
# Usage: ./build-and-run.sh [command] [options...]
|
|
5
|
+
|
|
6
|
+
set -e
|
|
7
|
+
|
|
8
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
9
|
+
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
|
10
|
+
|
|
11
|
+
echo "🏗️ Building Knowhow benchmark container..."
|
|
12
|
+
|
|
13
|
+
# Build the Docker container
|
|
14
|
+
docker build -f "$PROJECT_ROOT/benchmarks/docker/Dockerfile" -t knowhow-bench "$PROJECT_ROOT"
|
|
15
|
+
|
|
16
|
+
echo "✅ Container built successfully!"
|
|
17
|
+
|
|
18
|
+
# Create results directory if it doesn't exist
|
|
19
|
+
mkdir -p "$PROJECT_ROOT/benchmarks/results"
|
|
20
|
+
|
|
21
|
+
# If no arguments provided, show usage
|
|
22
|
+
if [ $# -eq 0 ]; then
|
|
23
|
+
echo ""
|
|
24
|
+
echo "Usage: $0 <command> [options...]"
|
|
25
|
+
echo ""
|
|
26
|
+
echo "Examples:"
|
|
27
|
+
echo " $0 setup --language javascript --count 5"
|
|
28
|
+
echo " $0 run --language javascript --count 5 --model gpt-4o-mini"
|
|
29
|
+
echo " $0 run --language python --count 10 --provider anthropic --model claude-3-sonnet-20240229"
|
|
30
|
+
echo ""
|
|
31
|
+
exit 0
|
|
32
|
+
fi
|
|
33
|
+
|
|
34
|
+
echo "🚀 Running benchmarks..."
|
|
35
|
+
|
|
36
|
+
# Run the container with all provided arguments
|
|
37
|
+
docker run --rm \
|
|
38
|
+
-v "$PROJECT_ROOT/benchmarks/results:/app/benchmarks/results" \
|
|
39
|
+
-e OPENAI_KEY \
|
|
40
|
+
-e ANTHROPIC_API_KEY \
|
|
41
|
+
-e GEMINI_API_KEY \
|
|
42
|
+
-e XAI_API_KEY \
|
|
43
|
+
--env-file "$PROJECT_ROOT/benchmarks/.env" \
|
|
44
|
+
knowhow-bench "$@"
|
|
45
|
+
|
|
46
|
+
echo "✅ Benchmarks completed!"
|
|
47
|
+
echo "📊 Results available in: benchmarks/results/"
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
|
|
3
|
+
# Clone Exercism exercises for benchmarking
|
|
4
|
+
# Based on Aider's clone-exercism.sh approach
|
|
5
|
+
|
|
6
|
+
set -e
|
|
7
|
+
|
|
8
|
+
# Configuration
|
|
9
|
+
EXERCISM_REPO="https://github.com/exercism/problem-specifications.git"
|
|
10
|
+
LANGUAGE=${1:-"javascript"} # Default to JavaScript
|
|
11
|
+
MAX_EXERCISES=${2:-10} # Default to 10 exercises
|
|
12
|
+
|
|
13
|
+
# Use different paths for local vs container
|
|
14
|
+
if [ -n "$CONTAINER" ]; then
|
|
15
|
+
EXERCISES_DIR="/app/exercises"
|
|
16
|
+
else
|
|
17
|
+
EXERCISES_DIR="$(cd "$(dirname "$0")/.." && pwd)/exercises"
|
|
18
|
+
fi
|
|
19
|
+
|
|
20
|
+
echo "Cloning Exercism exercises for language: $LANGUAGE"
|
|
21
|
+
echo "Maximum exercises: $MAX_EXERCISES"
|
|
22
|
+
echo "Target directory: $EXERCISES_DIR"
|
|
23
|
+
|
|
24
|
+
# Create exercises directory if it doesn't exist
|
|
25
|
+
mkdir -p "$EXERCISES_DIR"
|
|
26
|
+
|
|
27
|
+
# Clone the problem specifications repo if not already cloned
|
|
28
|
+
if [ ! -d "$EXERCISES_DIR/problem-specifications" ]; then
|
|
29
|
+
echo "Cloning Exercism problem specifications..."
|
|
30
|
+
cd "$EXERCISES_DIR"
|
|
31
|
+
git clone "$EXERCISM_REPO" problem-specifications
|
|
32
|
+
fi
|
|
33
|
+
|
|
34
|
+
# Clone the language track
|
|
35
|
+
LANGUAGE_REPO="https://github.com/exercism/${LANGUAGE}.git"
|
|
36
|
+
LANGUAGE_DIR="$EXERCISES_DIR/$LANGUAGE"
|
|
37
|
+
|
|
38
|
+
if [ ! -d "$LANGUAGE_DIR" ]; then
|
|
39
|
+
echo "Cloning $LANGUAGE track..."
|
|
40
|
+
cd "$EXERCISES_DIR"
|
|
41
|
+
git clone "$LANGUAGE_REPO" "$LANGUAGE"
|
|
42
|
+
fi
|
|
43
|
+
|
|
44
|
+
# Find exercises with both problem specification and language implementation
|
|
45
|
+
echo "Finding exercises with both specification and implementation..."
|
|
46
|
+
|
|
47
|
+
SPEC_DIR="$EXERCISES_DIR/problem-specifications/exercises"
|
|
48
|
+
IMPL_DIR="$LANGUAGE_DIR/exercises"
|
|
49
|
+
|
|
50
|
+
# Create filtered exercises directory
|
|
51
|
+
FILTERED_DIR="$EXERCISES_DIR/filtered"
|
|
52
|
+
if [ -d "$FILTERED_DIR" ]; then
|
|
53
|
+
echo "Removing existing filtered directory: $FILTERED_DIR"
|
|
54
|
+
rm -rf "$FILTERED_DIR"
|
|
55
|
+
fi
|
|
56
|
+
mkdir -p "$FILTERED_DIR"
|
|
57
|
+
|
|
58
|
+
count=0
|
|
59
|
+
for exercise in $(ls "$SPEC_DIR" 2>/dev/null | sort); do
|
|
60
|
+
if [ $count -ge $MAX_EXERCISES ]; then
|
|
61
|
+
break
|
|
62
|
+
fi
|
|
63
|
+
|
|
64
|
+
if [ -d "$IMPL_DIR/practice/$exercise" ] || [ -d "$IMPL_DIR/$exercise" ]; then
|
|
65
|
+
echo "Found exercise: $exercise"
|
|
66
|
+
|
|
67
|
+
# Create exercise directory
|
|
68
|
+
exercise_dir="$FILTERED_DIR/$exercise"
|
|
69
|
+
mkdir -p "$exercise_dir"
|
|
70
|
+
|
|
71
|
+
# Copy problem specification
|
|
72
|
+
if [ -f "$SPEC_DIR/$exercise/description.md" ]; then
|
|
73
|
+
cp "$SPEC_DIR/$exercise/description.md" "$exercise_dir/"
|
|
74
|
+
fi
|
|
75
|
+
|
|
76
|
+
if [ -f "$SPEC_DIR/$exercise/metadata.yml" ]; then
|
|
77
|
+
cp "$SPEC_DIR/$exercise/metadata.yml" "$exercise_dir/"
|
|
78
|
+
fi
|
|
79
|
+
|
|
80
|
+
# Copy language implementation
|
|
81
|
+
if [ -d "$IMPL_DIR/practice/$exercise" ]; then
|
|
82
|
+
cp -r "$IMPL_DIR/practice/$exercise"/* "$exercise_dir/"
|
|
83
|
+
elif [ -d "$IMPL_DIR/$exercise" ]; then
|
|
84
|
+
cp -r "$IMPL_DIR/$exercise"/* "$exercise_dir/"
|
|
85
|
+
fi
|
|
86
|
+
|
|
87
|
+
count=$((count + 1))
|
|
88
|
+
fi
|
|
89
|
+
done
|
|
90
|
+
|
|
91
|
+
echo "Successfully set up $count exercises in $FILTERED_DIR"
|
|
92
|
+
echo "Ready for benchmarking!"
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
|
|
3
|
+
# Simple validation script to test the benchmark setup
|
|
4
|
+
# This runs without the full Docker setup for quick validation
|
|
5
|
+
|
|
6
|
+
set -e
|
|
7
|
+
|
|
8
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
9
|
+
BENCHMARK_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
10
|
+
|
|
11
|
+
echo "🔍 Validating Knowhow Benchmarks setup..."
|
|
12
|
+
|
|
13
|
+
# 1. Check that benchmarks can be built
|
|
14
|
+
echo "1. Building benchmarks package..."
|
|
15
|
+
cd "$BENCHMARK_DIR"
|
|
16
|
+
npm run build > /dev/null 2>&1
|
|
17
|
+
echo " ✅ Build successful"
|
|
18
|
+
|
|
19
|
+
# 2. Check that tests pass
|
|
20
|
+
echo "2. Running tests..."
|
|
21
|
+
npm test > /dev/null 2>&1
|
|
22
|
+
echo " ✅ Tests passed"
|
|
23
|
+
|
|
24
|
+
# 3. Check that CLI can show help
|
|
25
|
+
echo "3. Testing CLI..."
|
|
26
|
+
node dist/cli.js --help > /dev/null 2>&1
|
|
27
|
+
echo " ✅ CLI working"
|
|
28
|
+
|
|
29
|
+
# 4. Check that Docker can build (optional - requires Docker)
|
|
30
|
+
if command -v docker &> /dev/null; then
|
|
31
|
+
echo "4. Testing Docker build..."
|
|
32
|
+
cd "$(dirname "$BENCHMARK_DIR")"
|
|
33
|
+
docker build -f benchmarks/docker/Dockerfile -t knowhow-bench-test . > /dev/null 2>&1
|
|
34
|
+
echo " ✅ Docker build successful"
|
|
35
|
+
|
|
36
|
+
# Clean up test image
|
|
37
|
+
docker rmi knowhow-bench-test > /dev/null 2>&1
|
|
38
|
+
else
|
|
39
|
+
echo "4. Skipping Docker test (Docker not available)"
|
|
40
|
+
fi
|
|
41
|
+
|
|
42
|
+
echo ""
|
|
43
|
+
echo "🎉 All validations passed!"
|
|
44
|
+
echo ""
|
|
45
|
+
echo "Ready to run benchmarks. Example usage:"
|
|
46
|
+
echo " ./scripts/build-and-run.sh setup --language javascript --count 5"
|
|
47
|
+
echo " ./scripts/build-and-run.sh run --language javascript --count 5 --model gpt-4o-mini"
|
|
48
|
+
echo ""
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { BenchmarkRunner } from '../runner';
|
|
2
|
+
import { BenchmarkConfig } from '../types';
|
|
3
|
+
|
|
4
|
+
describe('BenchmarkRunner', () => {
|
|
5
|
+
const mockConfig: BenchmarkConfig = {
|
|
6
|
+
language: 'javascript',
|
|
7
|
+
maxExercises: 5,
|
|
8
|
+
model: 'gpt-4o-mini',
|
|
9
|
+
provider: 'openai',
|
|
10
|
+
limits: {
|
|
11
|
+
maxTurns: 20,
|
|
12
|
+
maxTime: 300,
|
|
13
|
+
maxCost: 1.0
|
|
14
|
+
},
|
|
15
|
+
outputFile: 'test-results.json'
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
it('should create a BenchmarkRunner instance', () => {
|
|
19
|
+
const runner = new BenchmarkRunner(mockConfig);
|
|
20
|
+
expect(runner).toBeInstanceOf(BenchmarkRunner);
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
it('should have the correct configuration', () => {
|
|
24
|
+
const runner = new BenchmarkRunner(mockConfig);
|
|
25
|
+
expect(runner['config']).toEqual(mockConfig);
|
|
26
|
+
});
|
|
27
|
+
});
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import { Command } from "commander";
|
|
4
|
+
import { BenchmarkRunner } from "./runner";
|
|
5
|
+
import { BenchmarkConfig } from "./types";
|
|
6
|
+
import chalk from "chalk";
|
|
7
|
+
|
|
8
|
+
const program = new Command();
|
|
9
|
+
|
|
10
|
+
program
|
|
11
|
+
.name("knowhow-bench")
|
|
12
|
+
.description("Benchmark Knowhow terminal agent against coding exercises")
|
|
13
|
+
.version("0.0.1");
|
|
14
|
+
|
|
15
|
+
program
|
|
16
|
+
.command("run")
|
|
17
|
+
.description("Run benchmarks against Exercism exercises")
|
|
18
|
+
.option(
|
|
19
|
+
"-l, --language <language>",
|
|
20
|
+
"Programming language to test",
|
|
21
|
+
"javascript"
|
|
22
|
+
)
|
|
23
|
+
.option("-c, --count <count>", "Maximum number of exercises to run", "10")
|
|
24
|
+
.option("-m, --model <model>", "AI model to use", "gpt-4o-mini")
|
|
25
|
+
.option("-p, --provider <provider>", "AI provider to use", "openai")
|
|
26
|
+
.option("--max-turns <turns>", "Maximum turns per exercise", "30")
|
|
27
|
+
.option("--max-time <seconds>", "Maximum time per exercise in seconds", "300")
|
|
28
|
+
.option("--max-cost <dollars>", "Maximum cost per exercise in dollars", "1.0")
|
|
29
|
+
.option("--output <file>", "Output file for results", "results.json")
|
|
30
|
+
.action(async (options) => {
|
|
31
|
+
try {
|
|
32
|
+
console.log(chalk.blue("🚀 Starting Knowhow benchmarks..."));
|
|
33
|
+
|
|
34
|
+
const config: BenchmarkConfig = {
|
|
35
|
+
language: options.language,
|
|
36
|
+
maxExercises: parseInt(options.count),
|
|
37
|
+
model: options.model,
|
|
38
|
+
provider: options.provider,
|
|
39
|
+
limits: {
|
|
40
|
+
maxTurns: parseInt(options.maxTurns),
|
|
41
|
+
maxTime: parseInt(options.maxTime),
|
|
42
|
+
maxCost: parseFloat(options.maxCost),
|
|
43
|
+
},
|
|
44
|
+
outputFile: options.output,
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
const runner = new BenchmarkRunner(config);
|
|
48
|
+
await runner.run();
|
|
49
|
+
|
|
50
|
+
console.log(chalk.green("✅ Benchmarks completed successfully!"));
|
|
51
|
+
process.exit(0);
|
|
52
|
+
} catch (error) {
|
|
53
|
+
console.error(chalk.red("❌ Benchmark failed:"), error);
|
|
54
|
+
process.exit(1);
|
|
55
|
+
}
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
program
|
|
59
|
+
.command("setup")
|
|
60
|
+
.description("Set up exercises for benchmarking")
|
|
61
|
+
.option(
|
|
62
|
+
"-l, --language <language>",
|
|
63
|
+
"Programming language to setup",
|
|
64
|
+
"javascript"
|
|
65
|
+
)
|
|
66
|
+
.option("-c, --count <count>", "Maximum number of exercises to setup", "10")
|
|
67
|
+
.action(async (options) => {
|
|
68
|
+
try {
|
|
69
|
+
console.log(chalk.blue("📦 Setting up exercises..."));
|
|
70
|
+
|
|
71
|
+
const runner = new BenchmarkRunner({
|
|
72
|
+
language: options.language,
|
|
73
|
+
maxExercises: parseInt(options.count),
|
|
74
|
+
model: "gpt-4o-mini", // Dummy values for setup
|
|
75
|
+
provider: "openai",
|
|
76
|
+
limits: { maxTurns: 20, maxTime: 300, maxCost: 1.0 },
|
|
77
|
+
outputFile: "results.json",
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
await runner.setupExercises();
|
|
81
|
+
|
|
82
|
+
console.log(chalk.green("✅ Exercises setup completed!"));
|
|
83
|
+
process.exit(0);
|
|
84
|
+
} catch (error) {
|
|
85
|
+
console.error(chalk.red("❌ Setup failed:"), error);
|
|
86
|
+
process.exit(1);
|
|
87
|
+
}
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
program.parse();
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import { ExerciseEvaluator, TestResult, TestEvaluationResult } from './types';
|
|
2
|
+
import { JavaScriptEvaluator } from './JavaScriptEvaluator';
|
|
3
|
+
|
|
4
|
+
export class EvaluatorRegistry {
|
|
5
|
+
private evaluators: ExerciseEvaluator[] = [];
|
|
6
|
+
|
|
7
|
+
constructor() {
|
|
8
|
+
// Register default evaluators
|
|
9
|
+
this.registerEvaluator(new JavaScriptEvaluator());
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
registerEvaluator(evaluator: ExerciseEvaluator): void {
|
|
13
|
+
this.evaluators.push(evaluator);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
evalForExercise(exercisePath: string): ExerciseEvaluator | null {
|
|
17
|
+
return this.evaluators.find(e => e.canEvaluate(exercisePath)) || null;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
async evaluateExercise(exercisePath: string, exerciseName: string): Promise<TestEvaluationResult | null> {
|
|
21
|
+
// Find the first evaluator that can handle this exercise
|
|
22
|
+
const evaluator = this.evalForExercise(exercisePath);
|
|
23
|
+
|
|
24
|
+
if (!evaluator) {
|
|
25
|
+
console.warn(`No evaluator found for exercise: ${exerciseName} at ${exercisePath}`);
|
|
26
|
+
return null;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
try {
|
|
30
|
+
console.log(`Evaluating ${exerciseName} using ${evaluator.language} evaluator...`);
|
|
31
|
+
const testResult = await evaluator.evaluate(exercisePath);
|
|
32
|
+
|
|
33
|
+
return {
|
|
34
|
+
exerciseName,
|
|
35
|
+
testResult,
|
|
36
|
+
evaluatedBy: evaluator.language
|
|
37
|
+
};
|
|
38
|
+
} catch (error) {
|
|
39
|
+
console.error(`Error evaluating exercise ${exerciseName}:`, error);
|
|
40
|
+
|
|
41
|
+
// Return a failed test result instead of null
|
|
42
|
+
return {
|
|
43
|
+
exerciseName,
|
|
44
|
+
testResult: {
|
|
45
|
+
passed: 0,
|
|
46
|
+
failed: 0,
|
|
47
|
+
total: 0,
|
|
48
|
+
success: false,
|
|
49
|
+
output: '',
|
|
50
|
+
errorMessage: `Evaluation failed: ${error instanceof Error ? error.message : String(error)}`
|
|
51
|
+
},
|
|
52
|
+
evaluatedBy: evaluator.language
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
getAvailableEvaluators(): string[] {
|
|
58
|
+
return this.evaluators.map(e => e.language);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
canEvaluateExercise(exercisePath: string): boolean {
|
|
62
|
+
return this.evaluators.some(e => e.canEvaluate(exercisePath));
|
|
63
|
+
}
|
|
64
|
+
}
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
import { ExerciseEvaluator, TestResult } from './types';
|
|
2
|
+
import { execSync } from 'child_process';
|
|
3
|
+
import * as fs from 'fs';
|
|
4
|
+
import * as path from 'path';
|
|
5
|
+
|
|
6
|
+
export class JavaScriptEvaluator implements ExerciseEvaluator {
|
|
7
|
+
language = 'javascript';
|
|
8
|
+
|
|
9
|
+
canEvaluate(exercisePath: string): boolean {
|
|
10
|
+
// Check for package.json with test script or jest config
|
|
11
|
+
const packageJsonPath = path.join(exercisePath, 'package.json');
|
|
12
|
+
|
|
13
|
+
if (!fs.existsSync(packageJsonPath)) {
|
|
14
|
+
return false;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
try {
|
|
18
|
+
const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8'));
|
|
19
|
+
|
|
20
|
+
// Check if there's a test script or jest configuration
|
|
21
|
+
return !!(
|
|
22
|
+
packageJson.scripts?.test ||
|
|
23
|
+
packageJson.devDependencies?.jest ||
|
|
24
|
+
packageJson.dependencies?.jest ||
|
|
25
|
+
packageJson.jest ||
|
|
26
|
+
fs.existsSync(path.join(exercisePath, 'jest.config.js')) ||
|
|
27
|
+
fs.existsSync(path.join(exercisePath, 'jest.config.json'))
|
|
28
|
+
);
|
|
29
|
+
} catch (error) {
|
|
30
|
+
return false;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
async evaluate(exercisePath: string): Promise<TestResult> {
|
|
35
|
+
try {
|
|
36
|
+
// First try to install dependencies if node_modules doesn't exist
|
|
37
|
+
const nodeModulesPath = path.join(exercisePath, 'node_modules');
|
|
38
|
+
if (!fs.existsSync(nodeModulesPath)) {
|
|
39
|
+
try {
|
|
40
|
+
execSync('npm install', {
|
|
41
|
+
cwd: exercisePath,
|
|
42
|
+
stdio: 'pipe',
|
|
43
|
+
timeout: 60000 // 60 second timeout
|
|
44
|
+
});
|
|
45
|
+
} catch (installError) {
|
|
46
|
+
// Continue anyway, maybe dependencies are not needed
|
|
47
|
+
console.warn(`Failed to install dependencies in ${exercisePath}:`, installError);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// Try to run tests with JSON output
|
|
52
|
+
let command = 'npm test';
|
|
53
|
+
|
|
54
|
+
// Check if we can use Jest directly with JSON reporter
|
|
55
|
+
const packageJsonPath = path.join(exercisePath, 'package.json');
|
|
56
|
+
if (fs.existsSync(packageJsonPath)) {
|
|
57
|
+
const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8'));
|
|
58
|
+
|
|
59
|
+
// If jest is available, use it directly with JSON reporter
|
|
60
|
+
if (packageJson.devDependencies?.jest || packageJson.dependencies?.jest) {
|
|
61
|
+
command = 'npx jest --json --verbose';
|
|
62
|
+
} else if (packageJson.scripts?.test) {
|
|
63
|
+
// Try to modify the test script to include JSON output
|
|
64
|
+
const testScript = packageJson.scripts.test;
|
|
65
|
+
if (testScript.includes('jest')) {
|
|
66
|
+
command = `${testScript} --json --verbose`;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const output = execSync(command, {
|
|
72
|
+
cwd: exercisePath,
|
|
73
|
+
stdio: 'pipe',
|
|
74
|
+
encoding: 'utf8',
|
|
75
|
+
timeout: 120000 // 2 minute timeout for tests
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
return this.parseJestOutput(output);
|
|
79
|
+
|
|
80
|
+
} catch (error: any) {
|
|
81
|
+
// Jest exits with non-zero code when tests fail, so we need to parse the output
|
|
82
|
+
if (error.stdout) {
|
|
83
|
+
try {
|
|
84
|
+
return this.parseJestOutput(error.stdout);
|
|
85
|
+
} catch (parseError) {
|
|
86
|
+
// If JSON parsing fails, try to extract basic info from text output
|
|
87
|
+
return this.parseTextOutput(error.stdout || error.stderr || '');
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
return {
|
|
92
|
+
passed: 0,
|
|
93
|
+
failed: 0,
|
|
94
|
+
total: 0,
|
|
95
|
+
success: false,
|
|
96
|
+
output: error.message || 'Test execution failed',
|
|
97
|
+
errorMessage: error.message,
|
|
98
|
+
details: error
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
private parseJestOutput(output: string): TestResult {
|
|
104
|
+
try {
|
|
105
|
+
// Try to find JSON output in the string
|
|
106
|
+
const lines = output.split('\n');
|
|
107
|
+
let jsonLine = '';
|
|
108
|
+
|
|
109
|
+
for (const line of lines) {
|
|
110
|
+
const trimmed = line.trim();
|
|
111
|
+
if (trimmed.startsWith('{') && (trimmed.includes('"success"') || trimmed.includes('"numTotalTests"'))) {
|
|
112
|
+
jsonLine = trimmed;
|
|
113
|
+
break;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
if (jsonLine) {
|
|
118
|
+
const result = JSON.parse(jsonLine);
|
|
119
|
+
|
|
120
|
+
return {
|
|
121
|
+
passed: result.numPassedTests || 0,
|
|
122
|
+
failed: result.numFailedTests || 0,
|
|
123
|
+
total: result.numTotalTests || 0,
|
|
124
|
+
skipped: result.numPendingTests || 0,
|
|
125
|
+
success: result.success || false,
|
|
126
|
+
output: output,
|
|
127
|
+
details: result
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
} catch (error) {
|
|
131
|
+
// Fall back to text parsing
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
return this.parseTextOutput(output);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
private parseTextOutput(output: string): TestResult {
|
|
138
|
+
// Try to parse Jest text output
|
|
139
|
+
let passed = 0;
|
|
140
|
+
let failed = 0;
|
|
141
|
+
let total = 0;
|
|
142
|
+
let success = false;
|
|
143
|
+
|
|
144
|
+
// Look for Jest summary patterns
|
|
145
|
+
const passedMatch = output.match(/(\d+) passed/);
|
|
146
|
+
const failedMatch = output.match(/(\d+) failed/);
|
|
147
|
+
const totalMatch = output.match(/(\d+) total/);
|
|
148
|
+
|
|
149
|
+
if (passedMatch) passed = parseInt(passedMatch[1]);
|
|
150
|
+
if (failedMatch) failed = parseInt(failedMatch[1]);
|
|
151
|
+
if (totalMatch) total = parseInt(totalMatch[1]);
|
|
152
|
+
|
|
153
|
+
// If we couldn't find specific numbers, try other patterns
|
|
154
|
+
if (total === 0) {
|
|
155
|
+
// Look for "Tests: " summary
|
|
156
|
+
const testsMatch = output.match(/Tests:\s+(\d+)\s+failed,\s+(\d+)\s+passed,\s+(\d+)\s+total/);
|
|
157
|
+
if (testsMatch) {
|
|
158
|
+
failed = parseInt(testsMatch[1]);
|
|
159
|
+
passed = parseInt(testsMatch[2]);
|
|
160
|
+
total = parseInt(testsMatch[3]);
|
|
161
|
+
} else {
|
|
162
|
+
// Look for individual test results
|
|
163
|
+
const testResults = output.match(/✓|✗|PASS|FAIL/g);
|
|
164
|
+
if (testResults) {
|
|
165
|
+
total = testResults.length;
|
|
166
|
+
passed = testResults.filter(r => r === '✓' || r === 'PASS').length;
|
|
167
|
+
failed = total - passed;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
success = failed === 0 && total > 0;
|
|
173
|
+
|
|
174
|
+
return {
|
|
175
|
+
passed,
|
|
176
|
+
failed,
|
|
177
|
+
total,
|
|
178
|
+
success,
|
|
179
|
+
output,
|
|
180
|
+
errorMessage: success ? undefined : 'Some tests failed'
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
export interface TestResult {
|
|
2
|
+
passed: number;
|
|
3
|
+
failed: number;
|
|
4
|
+
total: number;
|
|
5
|
+
skipped?: number;
|
|
6
|
+
success: boolean;
|
|
7
|
+
output: string;
|
|
8
|
+
errorMessage?: string;
|
|
9
|
+
details?: any; // Raw test runner output
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export interface ExerciseEvaluator {
|
|
13
|
+
language: string;
|
|
14
|
+
canEvaluate(exercisePath: string): boolean;
|
|
15
|
+
evaluate(exercisePath: string): Promise<TestResult>;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export interface TestEvaluationResult {
|
|
19
|
+
exerciseName: string;
|
|
20
|
+
testResult: TestResult;
|
|
21
|
+
evaluatedBy: string; // Which evaluator was used
|
|
22
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { AIClient, HttpClient } from "../../ts_build/src/clients";
|
|
2
|
+
|
|
3
|
+
export async function registerProvider(
|
|
4
|
+
provider: string,
|
|
5
|
+
url: string,
|
|
6
|
+
headers: Record<string, string>,
|
|
7
|
+
clients: AIClient
|
|
8
|
+
): Promise<void> {
|
|
9
|
+
const client = new HttpClient(url, headers);
|
|
10
|
+
|
|
11
|
+
clients.registerClient(provider, client);
|
|
12
|
+
await clients.loadProviderModels(provider);
|
|
13
|
+
}
|