@tyvm/knowhow 0.0.90 → 0.0.91

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (262) hide show
  1. package/.depcheckrc +31 -0
  2. package/bin/knowhow.js +1 -1
  3. package/package.json +4 -32
  4. package/src/agents/tools/executeScript/index.ts +5 -0
  5. package/src/agents/tools/googleSearch.ts +2 -2
  6. package/src/agents/tools/index.ts +0 -3
  7. package/src/agents/tools/list.ts +0 -147
  8. package/src/agents/tools/loadWebpage.ts +3 -113
  9. package/src/auth/browserLogin.ts +10 -13
  10. package/src/cli.ts +63 -3
  11. package/src/clients/gemini.ts +96 -25
  12. package/src/clients/http.ts +7 -11
  13. package/src/clients/pricing/google.ts +122 -26
  14. package/src/conversion.ts +24 -54
  15. package/src/index.ts +8 -1
  16. package/src/login.ts +5 -6
  17. package/src/plugins/language.ts +0 -4
  18. package/src/plugins/plugins.ts +0 -14
  19. package/src/plugins/url.ts +31 -12
  20. package/src/services/GitHub.ts +2 -2
  21. package/src/services/KnowhowClient.ts +34 -34
  22. package/src/{plugins/downloader/downloader.ts → services/MediaProcessorService.ts} +109 -267
  23. package/src/services/S3.ts +16 -16
  24. package/src/services/index.ts +4 -4
  25. package/src/services/modules/index.ts +10 -2
  26. package/src/services/modules/types.ts +5 -2
  27. package/src/services/script-execution/ScriptExecutor.ts +29 -10
  28. package/src/services/script-execution/ScriptPolicy.ts +6 -2
  29. package/src/types.ts +1 -0
  30. package/src/utils/http.ts +127 -0
  31. package/src/workers/auth/PasskeySetup.ts +7 -11
  32. package/tests/clients/AIClient.test.ts +24 -21
  33. package/tests/manual/file-edits/figma.test.ts +3 -70
  34. package/tests/plugins/language/languagePlugin-content-triggers.test.ts +2 -0
  35. package/tests/plugins/language/languagePlugin.test.ts +2 -0
  36. package/tests/processors/ToolResponseCache.test.ts +2 -2
  37. package/tests/test.spec.ts +0 -14
  38. package/tests/unit/modules/moduleLoading.test.ts +7 -4
  39. package/tests/unit/plugins/pluginLoading.test.ts +6 -6
  40. package/ts_build/package.json +4 -32
  41. package/ts_build/src/agents/tools/ast/astAppendNode.d.ts +1 -1
  42. package/ts_build/src/agents/tools/ast/astAppendNode.js +2 -90
  43. package/ts_build/src/agents/tools/ast/astAppendNode.js.map +1 -1
  44. package/ts_build/src/agents/tools/ast/astDeleteNode.d.ts +1 -1
  45. package/ts_build/src/agents/tools/ast/astDeleteNode.js +2 -88
  46. package/ts_build/src/agents/tools/ast/astDeleteNode.js.map +1 -1
  47. package/ts_build/src/agents/tools/ast/astEditNode.d.ts +1 -1
  48. package/ts_build/src/agents/tools/ast/astEditNode.js +2 -90
  49. package/ts_build/src/agents/tools/ast/astEditNode.js.map +1 -1
  50. package/ts_build/src/agents/tools/ast/astGetPathForLine.d.ts +1 -1
  51. package/ts_build/src/agents/tools/ast/astGetPathForLine.js +2 -72
  52. package/ts_build/src/agents/tools/ast/astGetPathForLine.js.map +1 -1
  53. package/ts_build/src/agents/tools/ast/astListPaths.d.ts +1 -1
  54. package/ts_build/src/agents/tools/ast/astListPaths.js +2 -72
  55. package/ts_build/src/agents/tools/ast/astListPaths.js.map +1 -1
  56. package/ts_build/src/agents/tools/executeScript/index.d.ts +3 -2
  57. package/ts_build/src/agents/tools/executeScript/index.js +4 -1
  58. package/ts_build/src/agents/tools/executeScript/index.js.map +1 -1
  59. package/ts_build/src/agents/tools/googleSearch.js +2 -2
  60. package/ts_build/src/agents/tools/googleSearch.js.map +1 -1
  61. package/ts_build/src/agents/tools/index.d.ts +0 -3
  62. package/ts_build/src/agents/tools/index.js +0 -3
  63. package/ts_build/src/agents/tools/index.js.map +1 -1
  64. package/ts_build/src/agents/tools/list.js +0 -138
  65. package/ts_build/src/agents/tools/list.js.map +1 -1
  66. package/ts_build/src/agents/tools/loadWebpage.js +1 -89
  67. package/ts_build/src/agents/tools/loadWebpage.js.map +1 -1
  68. package/ts_build/src/agents/tools/textSearch.d.ts +1 -1
  69. package/ts_build/src/auth/browserLogin.js +7 -7
  70. package/ts_build/src/auth/browserLogin.js.map +1 -1
  71. package/ts_build/src/cli.d.ts +1 -1
  72. package/ts_build/src/cli.js +47 -1
  73. package/ts_build/src/cli.js.map +1 -1
  74. package/ts_build/src/clients/gemini.d.ts +1 -73
  75. package/ts_build/src/clients/gemini.js +57 -19
  76. package/ts_build/src/clients/gemini.js.map +1 -1
  77. package/ts_build/src/clients/http.js +5 -9
  78. package/ts_build/src/clients/http.js.map +1 -1
  79. package/ts_build/src/clients/pricing/google.d.ts +17 -73
  80. package/ts_build/src/clients/pricing/google.js +47 -10
  81. package/ts_build/src/clients/pricing/google.js.map +1 -1
  82. package/ts_build/src/conversion.d.ts +1 -4
  83. package/ts_build/src/conversion.js +12 -27
  84. package/ts_build/src/conversion.js.map +1 -1
  85. package/ts_build/src/index.d.ts +4 -0
  86. package/ts_build/src/index.js +7 -1
  87. package/ts_build/src/index.js.map +1 -1
  88. package/ts_build/src/login.js +5 -4
  89. package/ts_build/src/login.js.map +1 -1
  90. package/ts_build/src/plugins/downloader/downloader.js +3 -3
  91. package/ts_build/src/plugins/downloader/downloader.js.map +1 -1
  92. package/ts_build/src/plugins/language.js.map +1 -1
  93. package/ts_build/src/plugins/plugins.js +0 -14
  94. package/ts_build/src/plugins/plugins.js.map +1 -1
  95. package/ts_build/src/plugins/tree-sitter/editor.d.ts +3 -32
  96. package/ts_build/src/plugins/tree-sitter/editor.js +6 -208
  97. package/ts_build/src/plugins/tree-sitter/editor.js.map +1 -1
  98. package/ts_build/src/plugins/tree-sitter/parser.d.ts +19 -54
  99. package/ts_build/src/plugins/tree-sitter/parser.js +19 -293
  100. package/ts_build/src/plugins/tree-sitter/parser.js.map +1 -1
  101. package/ts_build/src/plugins/tree-sitter/simple-paths.d.ts +2 -15
  102. package/ts_build/src/plugins/tree-sitter/simple-paths.js +2 -324
  103. package/ts_build/src/plugins/tree-sitter/simple-paths.js.map +1 -1
  104. package/ts_build/src/plugins/url.js +27 -8
  105. package/ts_build/src/plugins/url.js.map +1 -1
  106. package/ts_build/src/services/GitHub.js +2 -2
  107. package/ts_build/src/services/GitHub.js.map +1 -1
  108. package/ts_build/src/services/KnowhowClient.d.ts +29 -29
  109. package/ts_build/src/services/KnowhowClient.js +33 -33
  110. package/ts_build/src/services/KnowhowClient.js.map +1 -1
  111. package/ts_build/src/services/MediaProcessorService.d.ts +22 -0
  112. package/ts_build/src/services/MediaProcessorService.js +215 -0
  113. package/ts_build/src/services/MediaProcessorService.js.map +1 -0
  114. package/ts_build/src/services/S3.js +12 -18
  115. package/ts_build/src/services/S3.js.map +1 -1
  116. package/ts_build/src/services/index.d.ts +3 -2
  117. package/ts_build/src/services/index.js +3 -3
  118. package/ts_build/src/services/index.js.map +1 -1
  119. package/ts_build/src/services/modules/index.js +10 -2
  120. package/ts_build/src/services/modules/index.js.map +1 -1
  121. package/ts_build/src/services/modules/types.d.ts +5 -2
  122. package/ts_build/src/services/script-execution/ScriptExecutor.js +22 -7
  123. package/ts_build/src/services/script-execution/ScriptExecutor.js.map +1 -1
  124. package/ts_build/src/services/script-execution/ScriptPolicy.d.ts +1 -1
  125. package/ts_build/src/services/script-execution/ScriptPolicy.js +4 -2
  126. package/ts_build/src/services/script-execution/ScriptPolicy.js.map +1 -1
  127. package/ts_build/src/types.d.ts +1 -0
  128. package/ts_build/src/types.js +1 -0
  129. package/ts_build/src/types.js.map +1 -1
  130. package/ts_build/src/utils/http.d.ts +27 -0
  131. package/ts_build/src/utils/http.js +98 -0
  132. package/ts_build/src/utils/http.js.map +1 -0
  133. package/ts_build/src/workers/auth/PasskeySetup.js +6 -7
  134. package/ts_build/src/workers/auth/PasskeySetup.js.map +1 -1
  135. package/ts_build/tests/clients/AIClient.test.js +11 -14
  136. package/ts_build/tests/clients/AIClient.test.js.map +1 -1
  137. package/ts_build/tests/manual/file-edits/figma.test.d.ts +0 -1
  138. package/ts_build/tests/manual/file-edits/figma.test.js +1 -46
  139. package/ts_build/tests/manual/file-edits/figma.test.js.map +1 -1
  140. package/ts_build/tests/plugins/language/languagePlugin-content-triggers.test.js +2 -0
  141. package/ts_build/tests/plugins/language/languagePlugin-content-triggers.test.js.map +1 -1
  142. package/ts_build/tests/plugins/language/languagePlugin.test.js +2 -0
  143. package/ts_build/tests/plugins/language/languagePlugin.test.js.map +1 -1
  144. package/ts_build/tests/processors/ToolResponseCache.test.js +2 -2
  145. package/ts_build/tests/processors/ToolResponseCache.test.js.map +1 -1
  146. package/ts_build/tests/test.spec.js +0 -14
  147. package/ts_build/tests/test.spec.js.map +1 -1
  148. package/ts_build/tests/tree-sitter/tree-sitter.test.d.ts +0 -1
  149. package/ts_build/tests/tree-sitter/tree-sitter.test.js +2 -183
  150. package/ts_build/tests/tree-sitter/tree-sitter.test.js.map +1 -1
  151. package/ts_build/tests/unit/modules/moduleLoading.test.js +6 -4
  152. package/ts_build/tests/unit/modules/moduleLoading.test.js.map +1 -1
  153. package/ts_build/tests/unit/plugins/pluginLoading.test.js +4 -4
  154. package/ts_build/tests/unit/plugins/pluginLoading.test.js.map +1 -1
  155. package/benchmarks/.dockerignore +0 -7
  156. package/benchmarks/README.md +0 -166
  157. package/benchmarks/docker/Dockerfile +0 -68
  158. package/benchmarks/example-config.yml +0 -27
  159. package/benchmarks/jest.config.js +0 -13
  160. package/benchmarks/package-lock.json +0 -4297
  161. package/benchmarks/package.json +0 -39
  162. package/benchmarks/results/27b0a06/2025-09-27/xai/xai-grok-code-fast-1.json +0 -2909
  163. package/benchmarks/results/4057aed/2025-08-14/anthropic/anthropic-claude-sonnet-4-20250514.json +0 -1671
  164. package/benchmarks/results/4542435/2025-08-05/lms/lms-openai-gpt-oss-20b.json +0 -2814
  165. package/benchmarks/results/4542435/2025-08-05/lms/lms-qwen-qwen3-30b-a3b-2507.json +0 -2014
  166. package/benchmarks/results/4fb9125/2025-08-07/anthropic/anthropic-claude-sonnet-4-20250514.json +0 -3121
  167. package/benchmarks/results/5766aee/2025-08-02/lms-qwen/qwen3-coder-30b.json +0 -98
  168. package/benchmarks/results/6d73808/2025-08-07/openai/openai-gpt-5.json +0 -3256
  169. package/benchmarks/results/77bf0a6/2025-08-02/lms-qwen/qwen3-30b-a3b-2507.json +0 -4298
  170. package/benchmarks/results/8c0d445/2025-08-03/anthropic/anthropic-claude-sonnet-4-20250514.json +0 -3031
  171. package/benchmarks/results/8c0d445/2025-08-03/openai/openai-gpt-4.1-2025-04-14.json +0 -2990
  172. package/benchmarks/results/ac6b2ab/2025-08-03/anthropic/anthropic-claude-sonnet-4-20250514.json +0 -3256
  173. package/benchmarks/results/ac6b2ab/2025-08-03/lms/lms-qwen-qwen3-coder-30b.json +0 -3007
  174. package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-2025-04-14.json +0 -3256
  175. package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-mini-2025-04-14.json +0 -3036
  176. package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-nano-2025-04-14.json +0 -3280
  177. package/benchmarks/results/adff675/2025-08-04/lms/lms-qwen-qwen3-30b-a3b-2507.json +0 -1920
  178. package/benchmarks/results/adff675/2025-08-04/lms/lms-qwen-qwen3-coder-30b.json +0 -3281
  179. package/benchmarks/results/b502ed9/2025-08-03/lms-qwen/qwen3-coder-30b.json +0 -2896
  180. package/benchmarks/results/d1a8129/2025-08-03/lms/lms-qwen-qwen3-coder-30b.json +0 -3011
  181. package/benchmarks/results/e60471c/2025-08-03/lms/qwen3-30b-a3b-2507.json +0 -3003
  182. package/benchmarks/scripts/build-and-run.sh +0 -47
  183. package/benchmarks/scripts/clone-exercism.sh +0 -92
  184. package/benchmarks/scripts/validate.sh +0 -48
  185. package/benchmarks/src/__tests__/runner.test.ts +0 -27
  186. package/benchmarks/src/cli.ts +0 -90
  187. package/benchmarks/src/evaluators/EvaluatorRegistry.ts +0 -64
  188. package/benchmarks/src/evaluators/JavaScriptEvaluator.ts +0 -183
  189. package/benchmarks/src/evaluators/index.ts +0 -3
  190. package/benchmarks/src/evaluators/types.ts +0 -22
  191. package/benchmarks/src/index.ts +0 -3
  192. package/benchmarks/src/providers.ts +0 -13
  193. package/benchmarks/src/runner.ts +0 -824
  194. package/benchmarks/src/types.ts +0 -63
  195. package/benchmarks/tsconfig.json +0 -19
  196. package/leaderboard/README.md +0 -148
  197. package/leaderboard/app/api/benchmark-data/route.ts +0 -131
  198. package/leaderboard/app/api/benchmark-detail/route.ts +0 -172
  199. package/leaderboard/app/details/[model]/[provider]/[language]/page.tsx +0 -501
  200. package/leaderboard/app/exercise/[model]/[provider]/[language]/[exercise]/page.tsx +0 -375
  201. package/leaderboard/app/globals.css +0 -27
  202. package/leaderboard/app/layout.tsx +0 -21
  203. package/leaderboard/app/page.tsx +0 -170
  204. package/leaderboard/components/LeaderboardTable.tsx +0 -168
  205. package/leaderboard/components/PerformanceChart.tsx +0 -109
  206. package/leaderboard/next-env.d.ts +0 -5
  207. package/leaderboard/next.config.js +0 -4
  208. package/leaderboard/package-lock.json +0 -6363
  209. package/leaderboard/package.json +0 -28
  210. package/leaderboard/postcss.config.js +0 -6
  211. package/leaderboard/tailwind.config.js +0 -17
  212. package/leaderboard/tsconfig.json +0 -28
  213. package/leaderboard/types/benchmark.ts +0 -67
  214. package/leaderboard/utils/dataProcessor.ts +0 -33
  215. package/src/agents/tools/asana/definitions.ts +0 -199
  216. package/src/agents/tools/asana/index.ts +0 -108
  217. package/src/agents/tools/ast/astAppendNode.ts +0 -90
  218. package/src/agents/tools/ast/astDeleteNode.ts +0 -88
  219. package/src/agents/tools/ast/astEditNode.ts +0 -95
  220. package/src/agents/tools/ast/astGetPathForLine.ts +0 -73
  221. package/src/agents/tools/ast/astListPaths.ts +0 -66
  222. package/src/agents/tools/ast/index.ts +0 -7
  223. package/src/agents/tools/github/definitions.ts +0 -89
  224. package/src/agents/tools/github/index.ts +0 -67
  225. package/src/chat-old.ts +0 -446
  226. package/src/plugins/asana.ts +0 -146
  227. package/src/plugins/downloader/plugin.ts +0 -103
  228. package/src/plugins/downloader/types.ts +0 -92
  229. package/src/plugins/figma.ts +0 -158
  230. package/src/plugins/github.ts +0 -219
  231. package/src/plugins/jira.ts +0 -115
  232. package/src/plugins/linear.ts +0 -230
  233. package/src/plugins/notion.ts +0 -179
  234. package/src/plugins/tree-sitter/editor.ts +0 -369
  235. package/src/plugins/tree-sitter/lang-packs/index.ts +0 -23
  236. package/src/plugins/tree-sitter/lang-packs/java.ts +0 -59
  237. package/src/plugins/tree-sitter/lang-packs/javascript.ts +0 -57
  238. package/src/plugins/tree-sitter/lang-packs/python.ts +0 -45
  239. package/src/plugins/tree-sitter/lang-packs/types.ts +0 -79
  240. package/src/plugins/tree-sitter/lang-packs/typescript.ts +0 -49
  241. package/src/plugins/tree-sitter/parser.ts +0 -470
  242. package/src/plugins/tree-sitter/simple-paths.ts +0 -467
  243. package/tests/tree-sitter/editor.test.ts +0 -113
  244. package/tests/tree-sitter/invalid.test.ts +0 -299
  245. package/tests/tree-sitter/paths/common-edits.test.ts +0 -564
  246. package/tests/tree-sitter/paths/debug-exact-position.test.ts +0 -44
  247. package/tests/tree-sitter/paths/debug-line-indexing.test.ts +0 -49
  248. package/tests/tree-sitter/paths/debug-paths.test.ts +0 -90
  249. package/tests/tree-sitter/paths/paths.test.ts +0 -170
  250. package/tests/tree-sitter/paths/simple-paths.test.ts +0 -367
  251. package/tests/tree-sitter/sample-after.ts +0 -48
  252. package/tests/tree-sitter/sample-before.ts +0 -25
  253. package/tests/tree-sitter/test-files/completely-broken.ts +0 -7
  254. package/tests/tree-sitter/test-files/duplicate-braces.ts +0 -39
  255. package/tests/tree-sitter/test-files/invalid-nesting.ts +0 -39
  256. package/tests/tree-sitter/test-files/malformed-signature.ts +0 -39
  257. package/tests/tree-sitter/test-files/mismatched-parens.ts +0 -39
  258. package/tests/tree-sitter/test-files/missing-semicolon.ts +0 -39
  259. package/tests/tree-sitter/test-files/partially-broken.ts +0 -20
  260. package/tests/tree-sitter/test-files/specific-errors.ts +0 -14
  261. package/tests/tree-sitter/test-files/unclosed-string.ts +0 -39
  262. package/tests/tree-sitter/tree-sitter.test.ts +0 -251
@@ -1,47 +0,0 @@
1
- #!/bin/bash
2
-
3
- # Build and run Knowhow benchmarks
4
- # Usage: ./build-and-run.sh [command] [options...]
5
-
6
- set -e
7
-
8
- SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
9
- PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
10
-
11
- echo "🏗️ Building Knowhow benchmark container..."
12
-
13
- # Build the Docker container
14
- docker build -f "$PROJECT_ROOT/benchmarks/docker/Dockerfile" -t knowhow-bench "$PROJECT_ROOT"
15
-
16
- echo "✅ Container built successfully!"
17
-
18
- # Create results directory if it doesn't exist
19
- mkdir -p "$PROJECT_ROOT/benchmarks/results"
20
-
21
- # If no arguments provided, show usage
22
- if [ $# -eq 0 ]; then
23
- echo ""
24
- echo "Usage: $0 <command> [options...]"
25
- echo ""
26
- echo "Examples:"
27
- echo " $0 setup --language javascript --count 5"
28
- echo " $0 run --language javascript --count 5 --model gpt-4o-mini"
29
- echo " $0 run --language python --count 10 --provider anthropic --model claude-3-sonnet-20240229"
30
- echo ""
31
- exit 0
32
- fi
33
-
34
- echo "🚀 Running benchmarks..."
35
-
36
- # Run the container with all provided arguments
37
- docker run --rm \
38
- -v "$PROJECT_ROOT/benchmarks/results:/app/benchmarks/results" \
39
- -e OPENAI_KEY \
40
- -e ANTHROPIC_API_KEY \
41
- -e GEMINI_API_KEY \
42
- -e XAI_API_KEY \
43
- --env-file "$PROJECT_ROOT/benchmarks/.env" \
44
- knowhow-bench "$@"
45
-
46
- echo "✅ Benchmarks completed!"
47
- echo "📊 Results available in: benchmarks/results/"
@@ -1,92 +0,0 @@
1
- #!/bin/bash
2
-
3
- # Clone Exercism exercises for benchmarking
4
- # Based on Aider's clone-exercism.sh approach
5
-
6
- set -e
7
-
8
- # Configuration
9
- EXERCISM_REPO="https://github.com/exercism/problem-specifications.git"
10
- LANGUAGE=${1:-"javascript"} # Default to JavaScript
11
- MAX_EXERCISES=${2:-10} # Default to 10 exercises
12
-
13
- # Use different paths for local vs container
14
- if [ -n "$CONTAINER" ]; then
15
- EXERCISES_DIR="/app/exercises"
16
- else
17
- EXERCISES_DIR="$(cd "$(dirname "$0")/.." && pwd)/exercises"
18
- fi
19
-
20
- echo "Cloning Exercism exercises for language: $LANGUAGE"
21
- echo "Maximum exercises: $MAX_EXERCISES"
22
- echo "Target directory: $EXERCISES_DIR"
23
-
24
- # Create exercises directory if it doesn't exist
25
- mkdir -p "$EXERCISES_DIR"
26
-
27
- # Clone the problem specifications repo if not already cloned
28
- if [ ! -d "$EXERCISES_DIR/problem-specifications" ]; then
29
- echo "Cloning Exercism problem specifications..."
30
- cd "$EXERCISES_DIR"
31
- git clone "$EXERCISM_REPO" problem-specifications
32
- fi
33
-
34
- # Clone the language track
35
- LANGUAGE_REPO="https://github.com/exercism/${LANGUAGE}.git"
36
- LANGUAGE_DIR="$EXERCISES_DIR/$LANGUAGE"
37
-
38
- if [ ! -d "$LANGUAGE_DIR" ]; then
39
- echo "Cloning $LANGUAGE track..."
40
- cd "$EXERCISES_DIR"
41
- git clone "$LANGUAGE_REPO" "$LANGUAGE"
42
- fi
43
-
44
- # Find exercises with both problem specification and language implementation
45
- echo "Finding exercises with both specification and implementation..."
46
-
47
- SPEC_DIR="$EXERCISES_DIR/problem-specifications/exercises"
48
- IMPL_DIR="$LANGUAGE_DIR/exercises"
49
-
50
- # Create filtered exercises directory
51
- FILTERED_DIR="$EXERCISES_DIR/filtered"
52
- if [ -d "$FILTERED_DIR" ]; then
53
- echo "Removing existing filtered directory: $FILTERED_DIR"
54
- rm -rf "$FILTERED_DIR"
55
- fi
56
- mkdir -p "$FILTERED_DIR"
57
-
58
- count=0
59
- for exercise in $(ls "$SPEC_DIR" 2>/dev/null | sort); do
60
- if [ $count -ge $MAX_EXERCISES ]; then
61
- break
62
- fi
63
-
64
- if [ -d "$IMPL_DIR/practice/$exercise" ] || [ -d "$IMPL_DIR/$exercise" ]; then
65
- echo "Found exercise: $exercise"
66
-
67
- # Create exercise directory
68
- exercise_dir="$FILTERED_DIR/$exercise"
69
- mkdir -p "$exercise_dir"
70
-
71
- # Copy problem specification
72
- if [ -f "$SPEC_DIR/$exercise/description.md" ]; then
73
- cp "$SPEC_DIR/$exercise/description.md" "$exercise_dir/"
74
- fi
75
-
76
- if [ -f "$SPEC_DIR/$exercise/metadata.yml" ]; then
77
- cp "$SPEC_DIR/$exercise/metadata.yml" "$exercise_dir/"
78
- fi
79
-
80
- # Copy language implementation
81
- if [ -d "$IMPL_DIR/practice/$exercise" ]; then
82
- cp -r "$IMPL_DIR/practice/$exercise"/* "$exercise_dir/"
83
- elif [ -d "$IMPL_DIR/$exercise" ]; then
84
- cp -r "$IMPL_DIR/$exercise"/* "$exercise_dir/"
85
- fi
86
-
87
- count=$((count + 1))
88
- fi
89
- done
90
-
91
- echo "Successfully set up $count exercises in $FILTERED_DIR"
92
- echo "Ready for benchmarking!"
@@ -1,48 +0,0 @@
1
- #!/bin/bash
2
-
3
- # Simple validation script to test the benchmark setup
4
- # This runs without the full Docker setup for quick validation
5
-
6
- set -e
7
-
8
- SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
9
- BENCHMARK_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
10
-
11
- echo "🔍 Validating Knowhow Benchmarks setup..."
12
-
13
- # 1. Check that benchmarks can be built
14
- echo "1. Building benchmarks package..."
15
- cd "$BENCHMARK_DIR"
16
- npm run build > /dev/null 2>&1
17
- echo " ✅ Build successful"
18
-
19
- # 2. Check that tests pass
20
- echo "2. Running tests..."
21
- npm test > /dev/null 2>&1
22
- echo " ✅ Tests passed"
23
-
24
- # 3. Check that CLI can show help
25
- echo "3. Testing CLI..."
26
- node dist/cli.js --help > /dev/null 2>&1
27
- echo " ✅ CLI working"
28
-
29
- # 4. Check that Docker can build (optional - requires Docker)
30
- if command -v docker &> /dev/null; then
31
- echo "4. Testing Docker build..."
32
- cd "$(dirname "$BENCHMARK_DIR")"
33
- docker build -f benchmarks/docker/Dockerfile -t knowhow-bench-test . > /dev/null 2>&1
34
- echo " ✅ Docker build successful"
35
-
36
- # Clean up test image
37
- docker rmi knowhow-bench-test > /dev/null 2>&1
38
- else
39
- echo "4. Skipping Docker test (Docker not available)"
40
- fi
41
-
42
- echo ""
43
- echo "🎉 All validations passed!"
44
- echo ""
45
- echo "Ready to run benchmarks. Example usage:"
46
- echo " ./scripts/build-and-run.sh setup --language javascript --count 5"
47
- echo " ./scripts/build-and-run.sh run --language javascript --count 5 --model gpt-4o-mini"
48
- echo ""
@@ -1,27 +0,0 @@
1
- import { BenchmarkRunner } from '../runner';
2
- import { BenchmarkConfig } from '../types';
3
-
4
- describe('BenchmarkRunner', () => {
5
- const mockConfig: BenchmarkConfig = {
6
- language: 'javascript',
7
- maxExercises: 5,
8
- model: 'gpt-4o-mini',
9
- provider: 'openai',
10
- limits: {
11
- maxTurns: 20,
12
- maxTime: 300,
13
- maxCost: 1.0
14
- },
15
- outputFile: 'test-results.json'
16
- };
17
-
18
- it('should create a BenchmarkRunner instance', () => {
19
- const runner = new BenchmarkRunner(mockConfig);
20
- expect(runner).toBeInstanceOf(BenchmarkRunner);
21
- });
22
-
23
- it('should have the correct configuration', () => {
24
- const runner = new BenchmarkRunner(mockConfig);
25
- expect(runner['config']).toEqual(mockConfig);
26
- });
27
- });
@@ -1,90 +0,0 @@
1
- #!/usr/bin/env node
2
-
3
- import { Command } from "commander";
4
- import { BenchmarkRunner } from "./runner";
5
- import { BenchmarkConfig } from "./types";
6
- import chalk from "chalk";
7
-
8
- const program = new Command();
9
-
10
- program
11
- .name("knowhow-bench")
12
- .description("Benchmark Knowhow terminal agent against coding exercises")
13
- .version("0.0.1");
14
-
15
- program
16
- .command("run")
17
- .description("Run benchmarks against Exercism exercises")
18
- .option(
19
- "-l, --language <language>",
20
- "Programming language to test",
21
- "javascript"
22
- )
23
- .option("-c, --count <count>", "Maximum number of exercises to run", "10")
24
- .option("-m, --model <model>", "AI model to use", "gpt-4o-mini")
25
- .option("-p, --provider <provider>", "AI provider to use", "openai")
26
- .option("--max-turns <turns>", "Maximum turns per exercise", "30")
27
- .option("--max-time <seconds>", "Maximum time per exercise in seconds", "300")
28
- .option("--max-cost <dollars>", "Maximum cost per exercise in dollars", "1.0")
29
- .option("--output <file>", "Output file for results", "results.json")
30
- .action(async (options) => {
31
- try {
32
- console.log(chalk.blue("🚀 Starting Knowhow benchmarks..."));
33
-
34
- const config: BenchmarkConfig = {
35
- language: options.language,
36
- maxExercises: parseInt(options.count),
37
- model: options.model,
38
- provider: options.provider,
39
- limits: {
40
- maxTurns: parseInt(options.maxTurns),
41
- maxTime: parseInt(options.maxTime),
42
- maxCost: parseFloat(options.maxCost),
43
- },
44
- outputFile: options.output,
45
- };
46
-
47
- const runner = new BenchmarkRunner(config);
48
- await runner.run();
49
-
50
- console.log(chalk.green("✅ Benchmarks completed successfully!"));
51
- process.exit(0);
52
- } catch (error) {
53
- console.error(chalk.red("❌ Benchmark failed:"), error);
54
- process.exit(1);
55
- }
56
- });
57
-
58
- program
59
- .command("setup")
60
- .description("Set up exercises for benchmarking")
61
- .option(
62
- "-l, --language <language>",
63
- "Programming language to setup",
64
- "javascript"
65
- )
66
- .option("-c, --count <count>", "Maximum number of exercises to setup", "10")
67
- .action(async (options) => {
68
- try {
69
- console.log(chalk.blue("📦 Setting up exercises..."));
70
-
71
- const runner = new BenchmarkRunner({
72
- language: options.language,
73
- maxExercises: parseInt(options.count),
74
- model: "gpt-4o-mini", // Dummy values for setup
75
- provider: "openai",
76
- limits: { maxTurns: 20, maxTime: 300, maxCost: 1.0 },
77
- outputFile: "results.json",
78
- });
79
-
80
- await runner.setupExercises();
81
-
82
- console.log(chalk.green("✅ Exercises setup completed!"));
83
- process.exit(0);
84
- } catch (error) {
85
- console.error(chalk.red("❌ Setup failed:"), error);
86
- process.exit(1);
87
- }
88
- });
89
-
90
- program.parse();
@@ -1,64 +0,0 @@
1
- import { ExerciseEvaluator, TestResult, TestEvaluationResult } from './types';
2
- import { JavaScriptEvaluator } from './JavaScriptEvaluator';
3
-
4
- export class EvaluatorRegistry {
5
- private evaluators: ExerciseEvaluator[] = [];
6
-
7
- constructor() {
8
- // Register default evaluators
9
- this.registerEvaluator(new JavaScriptEvaluator());
10
- }
11
-
12
- registerEvaluator(evaluator: ExerciseEvaluator): void {
13
- this.evaluators.push(evaluator);
14
- }
15
-
16
- evalForExercise(exercisePath: string): ExerciseEvaluator | null {
17
- return this.evaluators.find(e => e.canEvaluate(exercisePath)) || null;
18
- }
19
-
20
- async evaluateExercise(exercisePath: string, exerciseName: string): Promise<TestEvaluationResult | null> {
21
- // Find the first evaluator that can handle this exercise
22
- const evaluator = this.evalForExercise(exercisePath);
23
-
24
- if (!evaluator) {
25
- console.warn(`No evaluator found for exercise: ${exerciseName} at ${exercisePath}`);
26
- return null;
27
- }
28
-
29
- try {
30
- console.log(`Evaluating ${exerciseName} using ${evaluator.language} evaluator...`);
31
- const testResult = await evaluator.evaluate(exercisePath);
32
-
33
- return {
34
- exerciseName,
35
- testResult,
36
- evaluatedBy: evaluator.language
37
- };
38
- } catch (error) {
39
- console.error(`Error evaluating exercise ${exerciseName}:`, error);
40
-
41
- // Return a failed test result instead of null
42
- return {
43
- exerciseName,
44
- testResult: {
45
- passed: 0,
46
- failed: 0,
47
- total: 0,
48
- success: false,
49
- output: '',
50
- errorMessage: `Evaluation failed: ${error instanceof Error ? error.message : String(error)}`
51
- },
52
- evaluatedBy: evaluator.language
53
- };
54
- }
55
- }
56
-
57
- getAvailableEvaluators(): string[] {
58
- return this.evaluators.map(e => e.language);
59
- }
60
-
61
- canEvaluateExercise(exercisePath: string): boolean {
62
- return this.evaluators.some(e => e.canEvaluate(exercisePath));
63
- }
64
- }
@@ -1,183 +0,0 @@
1
- import { ExerciseEvaluator, TestResult } from './types';
2
- import { execSync } from 'child_process';
3
- import * as fs from 'fs';
4
- import * as path from 'path';
5
-
6
- export class JavaScriptEvaluator implements ExerciseEvaluator {
7
- language = 'javascript';
8
-
9
- canEvaluate(exercisePath: string): boolean {
10
- // Check for package.json with test script or jest config
11
- const packageJsonPath = path.join(exercisePath, 'package.json');
12
-
13
- if (!fs.existsSync(packageJsonPath)) {
14
- return false;
15
- }
16
-
17
- try {
18
- const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8'));
19
-
20
- // Check if there's a test script or jest configuration
21
- return !!(
22
- packageJson.scripts?.test ||
23
- packageJson.devDependencies?.jest ||
24
- packageJson.dependencies?.jest ||
25
- packageJson.jest ||
26
- fs.existsSync(path.join(exercisePath, 'jest.config.js')) ||
27
- fs.existsSync(path.join(exercisePath, 'jest.config.json'))
28
- );
29
- } catch (error) {
30
- return false;
31
- }
32
- }
33
-
34
- async evaluate(exercisePath: string): Promise<TestResult> {
35
- try {
36
- // First try to install dependencies if node_modules doesn't exist
37
- const nodeModulesPath = path.join(exercisePath, 'node_modules');
38
- if (!fs.existsSync(nodeModulesPath)) {
39
- try {
40
- execSync('npm install', {
41
- cwd: exercisePath,
42
- stdio: 'pipe',
43
- timeout: 60000 // 60 second timeout
44
- });
45
- } catch (installError) {
46
- // Continue anyway, maybe dependencies are not needed
47
- console.warn(`Failed to install dependencies in ${exercisePath}:`, installError);
48
- }
49
- }
50
-
51
- // Try to run tests with JSON output
52
- let command = 'npm test';
53
-
54
- // Check if we can use Jest directly with JSON reporter
55
- const packageJsonPath = path.join(exercisePath, 'package.json');
56
- if (fs.existsSync(packageJsonPath)) {
57
- const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8'));
58
-
59
- // If jest is available, use it directly with JSON reporter
60
- if (packageJson.devDependencies?.jest || packageJson.dependencies?.jest) {
61
- command = 'npx jest --json --verbose';
62
- } else if (packageJson.scripts?.test) {
63
- // Try to modify the test script to include JSON output
64
- const testScript = packageJson.scripts.test;
65
- if (testScript.includes('jest')) {
66
- command = `${testScript} --json --verbose`;
67
- }
68
- }
69
- }
70
-
71
- const output = execSync(command, {
72
- cwd: exercisePath,
73
- stdio: 'pipe',
74
- encoding: 'utf8',
75
- timeout: 120000 // 2 minute timeout for tests
76
- });
77
-
78
- return this.parseJestOutput(output);
79
-
80
- } catch (error: any) {
81
- // Jest exits with non-zero code when tests fail, so we need to parse the output
82
- if (error.stdout) {
83
- try {
84
- return this.parseJestOutput(error.stdout);
85
- } catch (parseError) {
86
- // If JSON parsing fails, try to extract basic info from text output
87
- return this.parseTextOutput(error.stdout || error.stderr || '');
88
- }
89
- }
90
-
91
- return {
92
- passed: 0,
93
- failed: 0,
94
- total: 0,
95
- success: false,
96
- output: error.message || 'Test execution failed',
97
- errorMessage: error.message,
98
- details: error
99
- };
100
- }
101
- }
102
-
103
- private parseJestOutput(output: string): TestResult {
104
- try {
105
- // Try to find JSON output in the string
106
- const lines = output.split('\n');
107
- let jsonLine = '';
108
-
109
- for (const line of lines) {
110
- const trimmed = line.trim();
111
- if (trimmed.startsWith('{') && (trimmed.includes('"success"') || trimmed.includes('"numTotalTests"'))) {
112
- jsonLine = trimmed;
113
- break;
114
- }
115
- }
116
-
117
- if (jsonLine) {
118
- const result = JSON.parse(jsonLine);
119
-
120
- return {
121
- passed: result.numPassedTests || 0,
122
- failed: result.numFailedTests || 0,
123
- total: result.numTotalTests || 0,
124
- skipped: result.numPendingTests || 0,
125
- success: result.success || false,
126
- output: output,
127
- details: result
128
- };
129
- }
130
- } catch (error) {
131
- // Fall back to text parsing
132
- }
133
-
134
- return this.parseTextOutput(output);
135
- }
136
-
137
- private parseTextOutput(output: string): TestResult {
138
- // Try to parse Jest text output
139
- let passed = 0;
140
- let failed = 0;
141
- let total = 0;
142
- let success = false;
143
-
144
- // Look for Jest summary patterns
145
- const passedMatch = output.match(/(\d+) passed/);
146
- const failedMatch = output.match(/(\d+) failed/);
147
- const totalMatch = output.match(/(\d+) total/);
148
-
149
- if (passedMatch) passed = parseInt(passedMatch[1]);
150
- if (failedMatch) failed = parseInt(failedMatch[1]);
151
- if (totalMatch) total = parseInt(totalMatch[1]);
152
-
153
- // If we couldn't find specific numbers, try other patterns
154
- if (total === 0) {
155
- // Look for "Tests: " summary
156
- const testsMatch = output.match(/Tests:\s+(\d+)\s+failed,\s+(\d+)\s+passed,\s+(\d+)\s+total/);
157
- if (testsMatch) {
158
- failed = parseInt(testsMatch[1]);
159
- passed = parseInt(testsMatch[2]);
160
- total = parseInt(testsMatch[3]);
161
- } else {
162
- // Look for individual test results
163
- const testResults = output.match(/✓|✗|PASS|FAIL/g);
164
- if (testResults) {
165
- total = testResults.length;
166
- passed = testResults.filter(r => r === '✓' || r === 'PASS').length;
167
- failed = total - passed;
168
- }
169
- }
170
- }
171
-
172
- success = failed === 0 && total > 0;
173
-
174
- return {
175
- passed,
176
- failed,
177
- total,
178
- success,
179
- output,
180
- errorMessage: success ? undefined : 'Some tests failed'
181
- };
182
- }
183
- }
@@ -1,3 +0,0 @@
1
- export * from './types';
2
- export * from './JavaScriptEvaluator';
3
- export * from './EvaluatorRegistry';
@@ -1,22 +0,0 @@
1
- export interface TestResult {
2
- passed: number;
3
- failed: number;
4
- total: number;
5
- skipped?: number;
6
- success: boolean;
7
- output: string;
8
- errorMessage?: string;
9
- details?: any; // Raw test runner output
10
- }
11
-
12
- export interface ExerciseEvaluator {
13
- language: string;
14
- canEvaluate(exercisePath: string): boolean;
15
- evaluate(exercisePath: string): Promise<TestResult>;
16
- }
17
-
18
- export interface TestEvaluationResult {
19
- exerciseName: string;
20
- testResult: TestResult;
21
- evaluatedBy: string; // Which evaluator was used
22
- }
@@ -1,3 +0,0 @@
1
- export { BenchmarkRunner } from './runner';
2
- export * from './types';
3
- import 'dotenv/config'
@@ -1,13 +0,0 @@
1
- import { AIClient, HttpClient } from "../../ts_build/src/clients";
2
-
3
- export async function registerProvider(
4
- provider: string,
5
- url: string,
6
- headers: Record<string, string>,
7
- clients: AIClient
8
- ): Promise<void> {
9
- const client = new HttpClient(url, headers);
10
-
11
- clients.registerClient(provider, client);
12
- await clients.loadProviderModels(provider);
13
- }