@tyvm/knowhow 0.0.90 → 0.0.92
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.depcheckrc +30 -0
- package/bin/knowhow.js +1 -1
- package/package.json +8 -34
- package/src/agents/configurable/ConfigAgent.ts +2 -2
- package/src/agents/tools/executeScript/index.ts +5 -0
- package/src/agents/tools/googleSearch.ts +2 -2
- package/src/agents/tools/index.ts +0 -3
- package/src/agents/tools/list.ts +0 -147
- package/src/agents/tools/loadWebpage.ts +3 -113
- package/src/auth/browserLogin.ts +10 -13
- package/src/chat/modules/AgentModule.ts +0 -1
- package/src/chat/types.ts +1 -1
- package/src/cli.ts +63 -3
- package/src/clients/gemini.ts +96 -25
- package/src/clients/http.ts +7 -11
- package/src/clients/pricing/google.ts +122 -26
- package/src/conversion.ts +24 -54
- package/src/index.ts +15 -20
- package/src/login.ts +5 -6
- package/src/plugins/language.ts +0 -4
- package/src/plugins/plugins.ts +0 -14
- package/src/plugins/url.ts +31 -12
- package/src/services/EmbeddingsService.ts +70 -0
- package/src/services/KnowhowClient.ts +34 -34
- package/src/{plugins/downloader/downloader.ts → services/MediaProcessorService.ts} +109 -267
- package/src/services/S3.ts +19 -87
- package/src/services/index.ts +8 -8
- package/src/services/modules/index.ts +12 -3
- package/src/services/modules/types.ts +8 -2
- package/src/services/script-execution/ScriptExecutor.ts +29 -10
- package/src/services/script-execution/ScriptPolicy.ts +6 -2
- package/src/types.ts +1 -0
- package/src/utils/http.ts +127 -0
- package/src/workers/auth/PasskeySetup.ts +7 -11
- package/tests/clients/AIClient.test.ts +24 -21
- package/tests/manual/file-edits/figma.test.ts +3 -70
- package/tests/plugins/language/languagePlugin-content-triggers.test.ts +2 -0
- package/tests/plugins/language/languagePlugin.test.ts +2 -0
- package/tests/processors/ToolResponseCache.test.ts +2 -2
- package/tests/test.spec.ts +0 -14
- package/tests/unit/modules/moduleLoading.test.ts +12 -4
- package/tests/unit/plugins/pluginLoading.test.ts +6 -6
- package/ts_build/package.json +8 -34
- package/ts_build/src/agents/tools/ast/astAppendNode.d.ts +1 -1
- package/ts_build/src/agents/tools/ast/astAppendNode.js +2 -90
- package/ts_build/src/agents/tools/ast/astAppendNode.js.map +1 -1
- package/ts_build/src/agents/tools/ast/astDeleteNode.d.ts +1 -1
- package/ts_build/src/agents/tools/ast/astDeleteNode.js +2 -88
- package/ts_build/src/agents/tools/ast/astDeleteNode.js.map +1 -1
- package/ts_build/src/agents/tools/ast/astEditNode.d.ts +1 -1
- package/ts_build/src/agents/tools/ast/astEditNode.js +2 -90
- package/ts_build/src/agents/tools/ast/astEditNode.js.map +1 -1
- package/ts_build/src/agents/tools/ast/astGetPathForLine.d.ts +1 -1
- package/ts_build/src/agents/tools/ast/astGetPathForLine.js +2 -72
- package/ts_build/src/agents/tools/ast/astGetPathForLine.js.map +1 -1
- package/ts_build/src/agents/tools/ast/astListPaths.d.ts +1 -1
- package/ts_build/src/agents/tools/ast/astListPaths.js +2 -72
- package/ts_build/src/agents/tools/ast/astListPaths.js.map +1 -1
- package/ts_build/src/agents/tools/executeScript/index.d.ts +3 -2
- package/ts_build/src/agents/tools/executeScript/index.js +4 -1
- package/ts_build/src/agents/tools/executeScript/index.js.map +1 -1
- package/ts_build/src/agents/tools/googleSearch.js +2 -2
- package/ts_build/src/agents/tools/googleSearch.js.map +1 -1
- package/ts_build/src/agents/tools/index.d.ts +0 -3
- package/ts_build/src/agents/tools/index.js +0 -3
- package/ts_build/src/agents/tools/index.js.map +1 -1
- package/ts_build/src/agents/tools/list.js +0 -138
- package/ts_build/src/agents/tools/list.js.map +1 -1
- package/ts_build/src/agents/tools/loadWebpage.js +1 -89
- package/ts_build/src/agents/tools/loadWebpage.js.map +1 -1
- package/ts_build/src/agents/tools/textSearch.d.ts +1 -1
- package/ts_build/src/auth/browserLogin.js +7 -7
- package/ts_build/src/auth/browserLogin.js.map +1 -1
- package/ts_build/src/chat/modules/AgentModule.js.map +1 -1
- package/ts_build/src/chat/types.d.ts +1 -1
- package/ts_build/src/cli.d.ts +1 -1
- package/ts_build/src/cli.js +47 -1
- package/ts_build/src/cli.js.map +1 -1
- package/ts_build/src/clients/gemini.d.ts +1 -73
- package/ts_build/src/clients/gemini.js +57 -19
- package/ts_build/src/clients/gemini.js.map +1 -1
- package/ts_build/src/clients/http.js +5 -9
- package/ts_build/src/clients/http.js.map +1 -1
- package/ts_build/src/clients/pricing/google.d.ts +17 -73
- package/ts_build/src/clients/pricing/google.js +47 -10
- package/ts_build/src/clients/pricing/google.js.map +1 -1
- package/ts_build/src/conversion.d.ts +1 -4
- package/ts_build/src/conversion.js +12 -27
- package/ts_build/src/conversion.js.map +1 -1
- package/ts_build/src/index.d.ts +4 -0
- package/ts_build/src/index.js +15 -14
- package/ts_build/src/index.js.map +1 -1
- package/ts_build/src/login.js +5 -4
- package/ts_build/src/login.js.map +1 -1
- package/ts_build/src/plugins/downloader/downloader.js +3 -3
- package/ts_build/src/plugins/downloader/downloader.js.map +1 -1
- package/ts_build/src/plugins/language.js.map +1 -1
- package/ts_build/src/plugins/plugins.js +0 -14
- package/ts_build/src/plugins/plugins.js.map +1 -1
- package/ts_build/src/plugins/tree-sitter/editor.d.ts +3 -32
- package/ts_build/src/plugins/tree-sitter/editor.js +6 -208
- package/ts_build/src/plugins/tree-sitter/editor.js.map +1 -1
- package/ts_build/src/plugins/tree-sitter/parser.d.ts +19 -54
- package/ts_build/src/plugins/tree-sitter/parser.js +19 -293
- package/ts_build/src/plugins/tree-sitter/parser.js.map +1 -1
- package/ts_build/src/plugins/tree-sitter/simple-paths.d.ts +2 -15
- package/ts_build/src/plugins/tree-sitter/simple-paths.js +2 -324
- package/ts_build/src/plugins/tree-sitter/simple-paths.js.map +1 -1
- package/ts_build/src/plugins/url.js +27 -8
- package/ts_build/src/plugins/url.js.map +1 -1
- package/ts_build/src/services/EmbeddingsService.d.ts +14 -0
- package/ts_build/src/services/EmbeddingsService.js +33 -0
- package/ts_build/src/services/EmbeddingsService.js.map +1 -0
- package/ts_build/src/services/GitHub.js +2 -2
- package/ts_build/src/services/GitHub.js.map +1 -1
- package/ts_build/src/services/KnowhowClient.d.ts +29 -29
- package/ts_build/src/services/KnowhowClient.js +33 -33
- package/ts_build/src/services/KnowhowClient.js.map +1 -1
- package/ts_build/src/services/MediaProcessorService.d.ts +22 -0
- package/ts_build/src/services/MediaProcessorService.js +215 -0
- package/ts_build/src/services/MediaProcessorService.js.map +1 -0
- package/ts_build/src/services/S3.d.ts +0 -4
- package/ts_build/src/services/S3.js +14 -60
- package/ts_build/src/services/S3.js.map +1 -1
- package/ts_build/src/services/index.d.ts +6 -5
- package/ts_build/src/services/index.js +6 -6
- package/ts_build/src/services/index.js.map +1 -1
- package/ts_build/src/services/modules/index.js +12 -3
- package/ts_build/src/services/modules/index.js.map +1 -1
- package/ts_build/src/services/modules/types.d.ts +8 -2
- package/ts_build/src/services/script-execution/ScriptExecutor.js +22 -7
- package/ts_build/src/services/script-execution/ScriptExecutor.js.map +1 -1
- package/ts_build/src/services/script-execution/ScriptPolicy.d.ts +1 -1
- package/ts_build/src/services/script-execution/ScriptPolicy.js +4 -2
- package/ts_build/src/services/script-execution/ScriptPolicy.js.map +1 -1
- package/ts_build/src/types.d.ts +1 -0
- package/ts_build/src/types.js +1 -0
- package/ts_build/src/types.js.map +1 -1
- package/ts_build/src/utils/http.d.ts +27 -0
- package/ts_build/src/utils/http.js +98 -0
- package/ts_build/src/utils/http.js.map +1 -0
- package/ts_build/src/workers/auth/PasskeySetup.js +6 -7
- package/ts_build/src/workers/auth/PasskeySetup.js.map +1 -1
- package/ts_build/tests/clients/AIClient.test.js +11 -14
- package/ts_build/tests/clients/AIClient.test.js.map +1 -1
- package/ts_build/tests/manual/file-edits/figma.test.d.ts +0 -1
- package/ts_build/tests/manual/file-edits/figma.test.js +1 -46
- package/ts_build/tests/manual/file-edits/figma.test.js.map +1 -1
- package/ts_build/tests/plugins/language/languagePlugin-content-triggers.test.js +2 -0
- package/ts_build/tests/plugins/language/languagePlugin-content-triggers.test.js.map +1 -1
- package/ts_build/tests/plugins/language/languagePlugin.test.js +2 -0
- package/ts_build/tests/plugins/language/languagePlugin.test.js.map +1 -1
- package/ts_build/tests/processors/ToolResponseCache.test.js +2 -2
- package/ts_build/tests/processors/ToolResponseCache.test.js.map +1 -1
- package/ts_build/tests/test.spec.js +0 -14
- package/ts_build/tests/test.spec.js.map +1 -1
- package/ts_build/tests/tree-sitter/tree-sitter.test.d.ts +0 -1
- package/ts_build/tests/tree-sitter/tree-sitter.test.js +2 -183
- package/ts_build/tests/tree-sitter/tree-sitter.test.js.map +1 -1
- package/ts_build/tests/unit/modules/moduleLoading.test.js +11 -4
- package/ts_build/tests/unit/modules/moduleLoading.test.js.map +1 -1
- package/ts_build/tests/unit/plugins/pluginLoading.test.js +4 -4
- package/ts_build/tests/unit/plugins/pluginLoading.test.js.map +1 -1
- package/benchmarks/.dockerignore +0 -7
- package/benchmarks/README.md +0 -166
- package/benchmarks/docker/Dockerfile +0 -68
- package/benchmarks/example-config.yml +0 -27
- package/benchmarks/jest.config.js +0 -13
- package/benchmarks/package-lock.json +0 -4297
- package/benchmarks/package.json +0 -39
- package/benchmarks/results/27b0a06/2025-09-27/xai/xai-grok-code-fast-1.json +0 -2909
- package/benchmarks/results/4057aed/2025-08-14/anthropic/anthropic-claude-sonnet-4-20250514.json +0 -1671
- package/benchmarks/results/4542435/2025-08-05/lms/lms-openai-gpt-oss-20b.json +0 -2814
- package/benchmarks/results/4542435/2025-08-05/lms/lms-qwen-qwen3-30b-a3b-2507.json +0 -2014
- package/benchmarks/results/4fb9125/2025-08-07/anthropic/anthropic-claude-sonnet-4-20250514.json +0 -3121
- package/benchmarks/results/5766aee/2025-08-02/lms-qwen/qwen3-coder-30b.json +0 -98
- package/benchmarks/results/6d73808/2025-08-07/openai/openai-gpt-5.json +0 -3256
- package/benchmarks/results/77bf0a6/2025-08-02/lms-qwen/qwen3-30b-a3b-2507.json +0 -4298
- package/benchmarks/results/8c0d445/2025-08-03/anthropic/anthropic-claude-sonnet-4-20250514.json +0 -3031
- package/benchmarks/results/8c0d445/2025-08-03/openai/openai-gpt-4.1-2025-04-14.json +0 -2990
- package/benchmarks/results/ac6b2ab/2025-08-03/anthropic/anthropic-claude-sonnet-4-20250514.json +0 -3256
- package/benchmarks/results/ac6b2ab/2025-08-03/lms/lms-qwen-qwen3-coder-30b.json +0 -3007
- package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-2025-04-14.json +0 -3256
- package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-mini-2025-04-14.json +0 -3036
- package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-nano-2025-04-14.json +0 -3280
- package/benchmarks/results/adff675/2025-08-04/lms/lms-qwen-qwen3-30b-a3b-2507.json +0 -1920
- package/benchmarks/results/adff675/2025-08-04/lms/lms-qwen-qwen3-coder-30b.json +0 -3281
- package/benchmarks/results/b502ed9/2025-08-03/lms-qwen/qwen3-coder-30b.json +0 -2896
- package/benchmarks/results/d1a8129/2025-08-03/lms/lms-qwen-qwen3-coder-30b.json +0 -3011
- package/benchmarks/results/e60471c/2025-08-03/lms/qwen3-30b-a3b-2507.json +0 -3003
- package/benchmarks/scripts/build-and-run.sh +0 -47
- package/benchmarks/scripts/clone-exercism.sh +0 -92
- package/benchmarks/scripts/validate.sh +0 -48
- package/benchmarks/src/__tests__/runner.test.ts +0 -27
- package/benchmarks/src/cli.ts +0 -90
- package/benchmarks/src/evaluators/EvaluatorRegistry.ts +0 -64
- package/benchmarks/src/evaluators/JavaScriptEvaluator.ts +0 -183
- package/benchmarks/src/evaluators/index.ts +0 -3
- package/benchmarks/src/evaluators/types.ts +0 -22
- package/benchmarks/src/index.ts +0 -3
- package/benchmarks/src/providers.ts +0 -13
- package/benchmarks/src/runner.ts +0 -824
- package/benchmarks/src/types.ts +0 -63
- package/benchmarks/tsconfig.json +0 -19
- package/leaderboard/README.md +0 -148
- package/leaderboard/app/api/benchmark-data/route.ts +0 -131
- package/leaderboard/app/api/benchmark-detail/route.ts +0 -172
- package/leaderboard/app/details/[model]/[provider]/[language]/page.tsx +0 -501
- package/leaderboard/app/exercise/[model]/[provider]/[language]/[exercise]/page.tsx +0 -375
- package/leaderboard/app/globals.css +0 -27
- package/leaderboard/app/layout.tsx +0 -21
- package/leaderboard/app/page.tsx +0 -170
- package/leaderboard/components/LeaderboardTable.tsx +0 -168
- package/leaderboard/components/PerformanceChart.tsx +0 -109
- package/leaderboard/next-env.d.ts +0 -5
- package/leaderboard/next.config.js +0 -4
- package/leaderboard/package-lock.json +0 -6363
- package/leaderboard/package.json +0 -28
- package/leaderboard/postcss.config.js +0 -6
- package/leaderboard/tailwind.config.js +0 -17
- package/leaderboard/tsconfig.json +0 -28
- package/leaderboard/types/benchmark.ts +0 -67
- package/leaderboard/utils/dataProcessor.ts +0 -33
- package/src/agents/tools/asana/definitions.ts +0 -199
- package/src/agents/tools/asana/index.ts +0 -108
- package/src/agents/tools/ast/astAppendNode.ts +0 -90
- package/src/agents/tools/ast/astDeleteNode.ts +0 -88
- package/src/agents/tools/ast/astEditNode.ts +0 -95
- package/src/agents/tools/ast/astGetPathForLine.ts +0 -73
- package/src/agents/tools/ast/astListPaths.ts +0 -66
- package/src/agents/tools/ast/index.ts +0 -7
- package/src/agents/tools/github/definitions.ts +0 -89
- package/src/agents/tools/github/index.ts +0 -67
- package/src/chat-old.ts +0 -446
- package/src/plugins/asana.ts +0 -146
- package/src/plugins/downloader/plugin.ts +0 -103
- package/src/plugins/downloader/types.ts +0 -92
- package/src/plugins/figma.ts +0 -158
- package/src/plugins/github.ts +0 -219
- package/src/plugins/jira.ts +0 -115
- package/src/plugins/linear.ts +0 -230
- package/src/plugins/notion.ts +0 -179
- package/src/plugins/tree-sitter/editor.ts +0 -369
- package/src/plugins/tree-sitter/lang-packs/index.ts +0 -23
- package/src/plugins/tree-sitter/lang-packs/java.ts +0 -59
- package/src/plugins/tree-sitter/lang-packs/javascript.ts +0 -57
- package/src/plugins/tree-sitter/lang-packs/python.ts +0 -45
- package/src/plugins/tree-sitter/lang-packs/types.ts +0 -79
- package/src/plugins/tree-sitter/lang-packs/typescript.ts +0 -49
- package/src/plugins/tree-sitter/parser.ts +0 -470
- package/src/plugins/tree-sitter/simple-paths.ts +0 -467
- package/src/services/GitHub.ts +0 -59
- package/tests/tree-sitter/editor.test.ts +0 -113
- package/tests/tree-sitter/invalid.test.ts +0 -299
- package/tests/tree-sitter/paths/common-edits.test.ts +0 -564
- package/tests/tree-sitter/paths/debug-exact-position.test.ts +0 -44
- package/tests/tree-sitter/paths/debug-line-indexing.test.ts +0 -49
- package/tests/tree-sitter/paths/debug-paths.test.ts +0 -90
- package/tests/tree-sitter/paths/paths.test.ts +0 -170
- package/tests/tree-sitter/paths/simple-paths.test.ts +0 -367
- package/tests/tree-sitter/sample-after.ts +0 -48
- package/tests/tree-sitter/sample-before.ts +0 -25
- package/tests/tree-sitter/test-files/completely-broken.ts +0 -7
- package/tests/tree-sitter/test-files/duplicate-braces.ts +0 -39
- package/tests/tree-sitter/test-files/invalid-nesting.ts +0 -39
- package/tests/tree-sitter/test-files/malformed-signature.ts +0 -39
- package/tests/tree-sitter/test-files/mismatched-parens.ts +0 -39
- package/tests/tree-sitter/test-files/missing-semicolon.ts +0 -39
- package/tests/tree-sitter/test-files/partially-broken.ts +0 -20
- package/tests/tree-sitter/test-files/specific-errors.ts +0 -14
- package/tests/tree-sitter/test-files/unclosed-string.ts +0 -39
- package/tests/tree-sitter/tree-sitter.test.ts +0 -251
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
|
|
3
|
-
# Build and run Knowhow benchmarks
|
|
4
|
-
# Usage: ./build-and-run.sh [command] [options...]
|
|
5
|
-
|
|
6
|
-
set -e
|
|
7
|
-
|
|
8
|
-
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
9
|
-
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
|
10
|
-
|
|
11
|
-
echo "🏗️ Building Knowhow benchmark container..."
|
|
12
|
-
|
|
13
|
-
# Build the Docker container
|
|
14
|
-
docker build -f "$PROJECT_ROOT/benchmarks/docker/Dockerfile" -t knowhow-bench "$PROJECT_ROOT"
|
|
15
|
-
|
|
16
|
-
echo "✅ Container built successfully!"
|
|
17
|
-
|
|
18
|
-
# Create results directory if it doesn't exist
|
|
19
|
-
mkdir -p "$PROJECT_ROOT/benchmarks/results"
|
|
20
|
-
|
|
21
|
-
# If no arguments provided, show usage
|
|
22
|
-
if [ $# -eq 0 ]; then
|
|
23
|
-
echo ""
|
|
24
|
-
echo "Usage: $0 <command> [options...]"
|
|
25
|
-
echo ""
|
|
26
|
-
echo "Examples:"
|
|
27
|
-
echo " $0 setup --language javascript --count 5"
|
|
28
|
-
echo " $0 run --language javascript --count 5 --model gpt-4o-mini"
|
|
29
|
-
echo " $0 run --language python --count 10 --provider anthropic --model claude-3-sonnet-20240229"
|
|
30
|
-
echo ""
|
|
31
|
-
exit 0
|
|
32
|
-
fi
|
|
33
|
-
|
|
34
|
-
echo "🚀 Running benchmarks..."
|
|
35
|
-
|
|
36
|
-
# Run the container with all provided arguments
|
|
37
|
-
docker run --rm \
|
|
38
|
-
-v "$PROJECT_ROOT/benchmarks/results:/app/benchmarks/results" \
|
|
39
|
-
-e OPENAI_KEY \
|
|
40
|
-
-e ANTHROPIC_API_KEY \
|
|
41
|
-
-e GEMINI_API_KEY \
|
|
42
|
-
-e XAI_API_KEY \
|
|
43
|
-
--env-file "$PROJECT_ROOT/benchmarks/.env" \
|
|
44
|
-
knowhow-bench "$@"
|
|
45
|
-
|
|
46
|
-
echo "✅ Benchmarks completed!"
|
|
47
|
-
echo "📊 Results available in: benchmarks/results/"
|
|
@@ -1,92 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
|
|
3
|
-
# Clone Exercism exercises for benchmarking
|
|
4
|
-
# Based on Aider's clone-exercism.sh approach
|
|
5
|
-
|
|
6
|
-
set -e
|
|
7
|
-
|
|
8
|
-
# Configuration
|
|
9
|
-
EXERCISM_REPO="https://github.com/exercism/problem-specifications.git"
|
|
10
|
-
LANGUAGE=${1:-"javascript"} # Default to JavaScript
|
|
11
|
-
MAX_EXERCISES=${2:-10} # Default to 10 exercises
|
|
12
|
-
|
|
13
|
-
# Use different paths for local vs container
|
|
14
|
-
if [ -n "$CONTAINER" ]; then
|
|
15
|
-
EXERCISES_DIR="/app/exercises"
|
|
16
|
-
else
|
|
17
|
-
EXERCISES_DIR="$(cd "$(dirname "$0")/.." && pwd)/exercises"
|
|
18
|
-
fi
|
|
19
|
-
|
|
20
|
-
echo "Cloning Exercism exercises for language: $LANGUAGE"
|
|
21
|
-
echo "Maximum exercises: $MAX_EXERCISES"
|
|
22
|
-
echo "Target directory: $EXERCISES_DIR"
|
|
23
|
-
|
|
24
|
-
# Create exercises directory if it doesn't exist
|
|
25
|
-
mkdir -p "$EXERCISES_DIR"
|
|
26
|
-
|
|
27
|
-
# Clone the problem specifications repo if not already cloned
|
|
28
|
-
if [ ! -d "$EXERCISES_DIR/problem-specifications" ]; then
|
|
29
|
-
echo "Cloning Exercism problem specifications..."
|
|
30
|
-
cd "$EXERCISES_DIR"
|
|
31
|
-
git clone "$EXERCISM_REPO" problem-specifications
|
|
32
|
-
fi
|
|
33
|
-
|
|
34
|
-
# Clone the language track
|
|
35
|
-
LANGUAGE_REPO="https://github.com/exercism/${LANGUAGE}.git"
|
|
36
|
-
LANGUAGE_DIR="$EXERCISES_DIR/$LANGUAGE"
|
|
37
|
-
|
|
38
|
-
if [ ! -d "$LANGUAGE_DIR" ]; then
|
|
39
|
-
echo "Cloning $LANGUAGE track..."
|
|
40
|
-
cd "$EXERCISES_DIR"
|
|
41
|
-
git clone "$LANGUAGE_REPO" "$LANGUAGE"
|
|
42
|
-
fi
|
|
43
|
-
|
|
44
|
-
# Find exercises with both problem specification and language implementation
|
|
45
|
-
echo "Finding exercises with both specification and implementation..."
|
|
46
|
-
|
|
47
|
-
SPEC_DIR="$EXERCISES_DIR/problem-specifications/exercises"
|
|
48
|
-
IMPL_DIR="$LANGUAGE_DIR/exercises"
|
|
49
|
-
|
|
50
|
-
# Create filtered exercises directory
|
|
51
|
-
FILTERED_DIR="$EXERCISES_DIR/filtered"
|
|
52
|
-
if [ -d "$FILTERED_DIR" ]; then
|
|
53
|
-
echo "Removing existing filtered directory: $FILTERED_DIR"
|
|
54
|
-
rm -rf "$FILTERED_DIR"
|
|
55
|
-
fi
|
|
56
|
-
mkdir -p "$FILTERED_DIR"
|
|
57
|
-
|
|
58
|
-
count=0
|
|
59
|
-
for exercise in $(ls "$SPEC_DIR" 2>/dev/null | sort); do
|
|
60
|
-
if [ $count -ge $MAX_EXERCISES ]; then
|
|
61
|
-
break
|
|
62
|
-
fi
|
|
63
|
-
|
|
64
|
-
if [ -d "$IMPL_DIR/practice/$exercise" ] || [ -d "$IMPL_DIR/$exercise" ]; then
|
|
65
|
-
echo "Found exercise: $exercise"
|
|
66
|
-
|
|
67
|
-
# Create exercise directory
|
|
68
|
-
exercise_dir="$FILTERED_DIR/$exercise"
|
|
69
|
-
mkdir -p "$exercise_dir"
|
|
70
|
-
|
|
71
|
-
# Copy problem specification
|
|
72
|
-
if [ -f "$SPEC_DIR/$exercise/description.md" ]; then
|
|
73
|
-
cp "$SPEC_DIR/$exercise/description.md" "$exercise_dir/"
|
|
74
|
-
fi
|
|
75
|
-
|
|
76
|
-
if [ -f "$SPEC_DIR/$exercise/metadata.yml" ]; then
|
|
77
|
-
cp "$SPEC_DIR/$exercise/metadata.yml" "$exercise_dir/"
|
|
78
|
-
fi
|
|
79
|
-
|
|
80
|
-
# Copy language implementation
|
|
81
|
-
if [ -d "$IMPL_DIR/practice/$exercise" ]; then
|
|
82
|
-
cp -r "$IMPL_DIR/practice/$exercise"/* "$exercise_dir/"
|
|
83
|
-
elif [ -d "$IMPL_DIR/$exercise" ]; then
|
|
84
|
-
cp -r "$IMPL_DIR/$exercise"/* "$exercise_dir/"
|
|
85
|
-
fi
|
|
86
|
-
|
|
87
|
-
count=$((count + 1))
|
|
88
|
-
fi
|
|
89
|
-
done
|
|
90
|
-
|
|
91
|
-
echo "Successfully set up $count exercises in $FILTERED_DIR"
|
|
92
|
-
echo "Ready for benchmarking!"
|
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
|
|
3
|
-
# Simple validation script to test the benchmark setup
|
|
4
|
-
# This runs without the full Docker setup for quick validation
|
|
5
|
-
|
|
6
|
-
set -e
|
|
7
|
-
|
|
8
|
-
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
9
|
-
BENCHMARK_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
10
|
-
|
|
11
|
-
echo "🔍 Validating Knowhow Benchmarks setup..."
|
|
12
|
-
|
|
13
|
-
# 1. Check that benchmarks can be built
|
|
14
|
-
echo "1. Building benchmarks package..."
|
|
15
|
-
cd "$BENCHMARK_DIR"
|
|
16
|
-
npm run build > /dev/null 2>&1
|
|
17
|
-
echo " ✅ Build successful"
|
|
18
|
-
|
|
19
|
-
# 2. Check that tests pass
|
|
20
|
-
echo "2. Running tests..."
|
|
21
|
-
npm test > /dev/null 2>&1
|
|
22
|
-
echo " ✅ Tests passed"
|
|
23
|
-
|
|
24
|
-
# 3. Check that CLI can show help
|
|
25
|
-
echo "3. Testing CLI..."
|
|
26
|
-
node dist/cli.js --help > /dev/null 2>&1
|
|
27
|
-
echo " ✅ CLI working"
|
|
28
|
-
|
|
29
|
-
# 4. Check that Docker can build (optional - requires Docker)
|
|
30
|
-
if command -v docker &> /dev/null; then
|
|
31
|
-
echo "4. Testing Docker build..."
|
|
32
|
-
cd "$(dirname "$BENCHMARK_DIR")"
|
|
33
|
-
docker build -f benchmarks/docker/Dockerfile -t knowhow-bench-test . > /dev/null 2>&1
|
|
34
|
-
echo " ✅ Docker build successful"
|
|
35
|
-
|
|
36
|
-
# Clean up test image
|
|
37
|
-
docker rmi knowhow-bench-test > /dev/null 2>&1
|
|
38
|
-
else
|
|
39
|
-
echo "4. Skipping Docker test (Docker not available)"
|
|
40
|
-
fi
|
|
41
|
-
|
|
42
|
-
echo ""
|
|
43
|
-
echo "🎉 All validations passed!"
|
|
44
|
-
echo ""
|
|
45
|
-
echo "Ready to run benchmarks. Example usage:"
|
|
46
|
-
echo " ./scripts/build-and-run.sh setup --language javascript --count 5"
|
|
47
|
-
echo " ./scripts/build-and-run.sh run --language javascript --count 5 --model gpt-4o-mini"
|
|
48
|
-
echo ""
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
import { BenchmarkRunner } from '../runner';
|
|
2
|
-
import { BenchmarkConfig } from '../types';
|
|
3
|
-
|
|
4
|
-
describe('BenchmarkRunner', () => {
|
|
5
|
-
const mockConfig: BenchmarkConfig = {
|
|
6
|
-
language: 'javascript',
|
|
7
|
-
maxExercises: 5,
|
|
8
|
-
model: 'gpt-4o-mini',
|
|
9
|
-
provider: 'openai',
|
|
10
|
-
limits: {
|
|
11
|
-
maxTurns: 20,
|
|
12
|
-
maxTime: 300,
|
|
13
|
-
maxCost: 1.0
|
|
14
|
-
},
|
|
15
|
-
outputFile: 'test-results.json'
|
|
16
|
-
};
|
|
17
|
-
|
|
18
|
-
it('should create a BenchmarkRunner instance', () => {
|
|
19
|
-
const runner = new BenchmarkRunner(mockConfig);
|
|
20
|
-
expect(runner).toBeInstanceOf(BenchmarkRunner);
|
|
21
|
-
});
|
|
22
|
-
|
|
23
|
-
it('should have the correct configuration', () => {
|
|
24
|
-
const runner = new BenchmarkRunner(mockConfig);
|
|
25
|
-
expect(runner['config']).toEqual(mockConfig);
|
|
26
|
-
});
|
|
27
|
-
});
|
package/benchmarks/src/cli.ts
DELETED
|
@@ -1,90 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
import { Command } from "commander";
|
|
4
|
-
import { BenchmarkRunner } from "./runner";
|
|
5
|
-
import { BenchmarkConfig } from "./types";
|
|
6
|
-
import chalk from "chalk";
|
|
7
|
-
|
|
8
|
-
const program = new Command();
|
|
9
|
-
|
|
10
|
-
program
|
|
11
|
-
.name("knowhow-bench")
|
|
12
|
-
.description("Benchmark Knowhow terminal agent against coding exercises")
|
|
13
|
-
.version("0.0.1");
|
|
14
|
-
|
|
15
|
-
program
|
|
16
|
-
.command("run")
|
|
17
|
-
.description("Run benchmarks against Exercism exercises")
|
|
18
|
-
.option(
|
|
19
|
-
"-l, --language <language>",
|
|
20
|
-
"Programming language to test",
|
|
21
|
-
"javascript"
|
|
22
|
-
)
|
|
23
|
-
.option("-c, --count <count>", "Maximum number of exercises to run", "10")
|
|
24
|
-
.option("-m, --model <model>", "AI model to use", "gpt-4o-mini")
|
|
25
|
-
.option("-p, --provider <provider>", "AI provider to use", "openai")
|
|
26
|
-
.option("--max-turns <turns>", "Maximum turns per exercise", "30")
|
|
27
|
-
.option("--max-time <seconds>", "Maximum time per exercise in seconds", "300")
|
|
28
|
-
.option("--max-cost <dollars>", "Maximum cost per exercise in dollars", "1.0")
|
|
29
|
-
.option("--output <file>", "Output file for results", "results.json")
|
|
30
|
-
.action(async (options) => {
|
|
31
|
-
try {
|
|
32
|
-
console.log(chalk.blue("🚀 Starting Knowhow benchmarks..."));
|
|
33
|
-
|
|
34
|
-
const config: BenchmarkConfig = {
|
|
35
|
-
language: options.language,
|
|
36
|
-
maxExercises: parseInt(options.count),
|
|
37
|
-
model: options.model,
|
|
38
|
-
provider: options.provider,
|
|
39
|
-
limits: {
|
|
40
|
-
maxTurns: parseInt(options.maxTurns),
|
|
41
|
-
maxTime: parseInt(options.maxTime),
|
|
42
|
-
maxCost: parseFloat(options.maxCost),
|
|
43
|
-
},
|
|
44
|
-
outputFile: options.output,
|
|
45
|
-
};
|
|
46
|
-
|
|
47
|
-
const runner = new BenchmarkRunner(config);
|
|
48
|
-
await runner.run();
|
|
49
|
-
|
|
50
|
-
console.log(chalk.green("✅ Benchmarks completed successfully!"));
|
|
51
|
-
process.exit(0);
|
|
52
|
-
} catch (error) {
|
|
53
|
-
console.error(chalk.red("❌ Benchmark failed:"), error);
|
|
54
|
-
process.exit(1);
|
|
55
|
-
}
|
|
56
|
-
});
|
|
57
|
-
|
|
58
|
-
program
|
|
59
|
-
.command("setup")
|
|
60
|
-
.description("Set up exercises for benchmarking")
|
|
61
|
-
.option(
|
|
62
|
-
"-l, --language <language>",
|
|
63
|
-
"Programming language to setup",
|
|
64
|
-
"javascript"
|
|
65
|
-
)
|
|
66
|
-
.option("-c, --count <count>", "Maximum number of exercises to setup", "10")
|
|
67
|
-
.action(async (options) => {
|
|
68
|
-
try {
|
|
69
|
-
console.log(chalk.blue("📦 Setting up exercises..."));
|
|
70
|
-
|
|
71
|
-
const runner = new BenchmarkRunner({
|
|
72
|
-
language: options.language,
|
|
73
|
-
maxExercises: parseInt(options.count),
|
|
74
|
-
model: "gpt-4o-mini", // Dummy values for setup
|
|
75
|
-
provider: "openai",
|
|
76
|
-
limits: { maxTurns: 20, maxTime: 300, maxCost: 1.0 },
|
|
77
|
-
outputFile: "results.json",
|
|
78
|
-
});
|
|
79
|
-
|
|
80
|
-
await runner.setupExercises();
|
|
81
|
-
|
|
82
|
-
console.log(chalk.green("✅ Exercises setup completed!"));
|
|
83
|
-
process.exit(0);
|
|
84
|
-
} catch (error) {
|
|
85
|
-
console.error(chalk.red("❌ Setup failed:"), error);
|
|
86
|
-
process.exit(1);
|
|
87
|
-
}
|
|
88
|
-
});
|
|
89
|
-
|
|
90
|
-
program.parse();
|
|
@@ -1,64 +0,0 @@
|
|
|
1
|
-
import { ExerciseEvaluator, TestResult, TestEvaluationResult } from './types';
|
|
2
|
-
import { JavaScriptEvaluator } from './JavaScriptEvaluator';
|
|
3
|
-
|
|
4
|
-
export class EvaluatorRegistry {
|
|
5
|
-
private evaluators: ExerciseEvaluator[] = [];
|
|
6
|
-
|
|
7
|
-
constructor() {
|
|
8
|
-
// Register default evaluators
|
|
9
|
-
this.registerEvaluator(new JavaScriptEvaluator());
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
registerEvaluator(evaluator: ExerciseEvaluator): void {
|
|
13
|
-
this.evaluators.push(evaluator);
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
evalForExercise(exercisePath: string): ExerciseEvaluator | null {
|
|
17
|
-
return this.evaluators.find(e => e.canEvaluate(exercisePath)) || null;
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
async evaluateExercise(exercisePath: string, exerciseName: string): Promise<TestEvaluationResult | null> {
|
|
21
|
-
// Find the first evaluator that can handle this exercise
|
|
22
|
-
const evaluator = this.evalForExercise(exercisePath);
|
|
23
|
-
|
|
24
|
-
if (!evaluator) {
|
|
25
|
-
console.warn(`No evaluator found for exercise: ${exerciseName} at ${exercisePath}`);
|
|
26
|
-
return null;
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
try {
|
|
30
|
-
console.log(`Evaluating ${exerciseName} using ${evaluator.language} evaluator...`);
|
|
31
|
-
const testResult = await evaluator.evaluate(exercisePath);
|
|
32
|
-
|
|
33
|
-
return {
|
|
34
|
-
exerciseName,
|
|
35
|
-
testResult,
|
|
36
|
-
evaluatedBy: evaluator.language
|
|
37
|
-
};
|
|
38
|
-
} catch (error) {
|
|
39
|
-
console.error(`Error evaluating exercise ${exerciseName}:`, error);
|
|
40
|
-
|
|
41
|
-
// Return a failed test result instead of null
|
|
42
|
-
return {
|
|
43
|
-
exerciseName,
|
|
44
|
-
testResult: {
|
|
45
|
-
passed: 0,
|
|
46
|
-
failed: 0,
|
|
47
|
-
total: 0,
|
|
48
|
-
success: false,
|
|
49
|
-
output: '',
|
|
50
|
-
errorMessage: `Evaluation failed: ${error instanceof Error ? error.message : String(error)}`
|
|
51
|
-
},
|
|
52
|
-
evaluatedBy: evaluator.language
|
|
53
|
-
};
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
getAvailableEvaluators(): string[] {
|
|
58
|
-
return this.evaluators.map(e => e.language);
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
canEvaluateExercise(exercisePath: string): boolean {
|
|
62
|
-
return this.evaluators.some(e => e.canEvaluate(exercisePath));
|
|
63
|
-
}
|
|
64
|
-
}
|
|
@@ -1,183 +0,0 @@
|
|
|
1
|
-
import { ExerciseEvaluator, TestResult } from './types';
|
|
2
|
-
import { execSync } from 'child_process';
|
|
3
|
-
import * as fs from 'fs';
|
|
4
|
-
import * as path from 'path';
|
|
5
|
-
|
|
6
|
-
export class JavaScriptEvaluator implements ExerciseEvaluator {
|
|
7
|
-
language = 'javascript';
|
|
8
|
-
|
|
9
|
-
canEvaluate(exercisePath: string): boolean {
|
|
10
|
-
// Check for package.json with test script or jest config
|
|
11
|
-
const packageJsonPath = path.join(exercisePath, 'package.json');
|
|
12
|
-
|
|
13
|
-
if (!fs.existsSync(packageJsonPath)) {
|
|
14
|
-
return false;
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
try {
|
|
18
|
-
const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8'));
|
|
19
|
-
|
|
20
|
-
// Check if there's a test script or jest configuration
|
|
21
|
-
return !!(
|
|
22
|
-
packageJson.scripts?.test ||
|
|
23
|
-
packageJson.devDependencies?.jest ||
|
|
24
|
-
packageJson.dependencies?.jest ||
|
|
25
|
-
packageJson.jest ||
|
|
26
|
-
fs.existsSync(path.join(exercisePath, 'jest.config.js')) ||
|
|
27
|
-
fs.existsSync(path.join(exercisePath, 'jest.config.json'))
|
|
28
|
-
);
|
|
29
|
-
} catch (error) {
|
|
30
|
-
return false;
|
|
31
|
-
}
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
async evaluate(exercisePath: string): Promise<TestResult> {
|
|
35
|
-
try {
|
|
36
|
-
// First try to install dependencies if node_modules doesn't exist
|
|
37
|
-
const nodeModulesPath = path.join(exercisePath, 'node_modules');
|
|
38
|
-
if (!fs.existsSync(nodeModulesPath)) {
|
|
39
|
-
try {
|
|
40
|
-
execSync('npm install', {
|
|
41
|
-
cwd: exercisePath,
|
|
42
|
-
stdio: 'pipe',
|
|
43
|
-
timeout: 60000 // 60 second timeout
|
|
44
|
-
});
|
|
45
|
-
} catch (installError) {
|
|
46
|
-
// Continue anyway, maybe dependencies are not needed
|
|
47
|
-
console.warn(`Failed to install dependencies in ${exercisePath}:`, installError);
|
|
48
|
-
}
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
// Try to run tests with JSON output
|
|
52
|
-
let command = 'npm test';
|
|
53
|
-
|
|
54
|
-
// Check if we can use Jest directly with JSON reporter
|
|
55
|
-
const packageJsonPath = path.join(exercisePath, 'package.json');
|
|
56
|
-
if (fs.existsSync(packageJsonPath)) {
|
|
57
|
-
const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8'));
|
|
58
|
-
|
|
59
|
-
// If jest is available, use it directly with JSON reporter
|
|
60
|
-
if (packageJson.devDependencies?.jest || packageJson.dependencies?.jest) {
|
|
61
|
-
command = 'npx jest --json --verbose';
|
|
62
|
-
} else if (packageJson.scripts?.test) {
|
|
63
|
-
// Try to modify the test script to include JSON output
|
|
64
|
-
const testScript = packageJson.scripts.test;
|
|
65
|
-
if (testScript.includes('jest')) {
|
|
66
|
-
command = `${testScript} --json --verbose`;
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
const output = execSync(command, {
|
|
72
|
-
cwd: exercisePath,
|
|
73
|
-
stdio: 'pipe',
|
|
74
|
-
encoding: 'utf8',
|
|
75
|
-
timeout: 120000 // 2 minute timeout for tests
|
|
76
|
-
});
|
|
77
|
-
|
|
78
|
-
return this.parseJestOutput(output);
|
|
79
|
-
|
|
80
|
-
} catch (error: any) {
|
|
81
|
-
// Jest exits with non-zero code when tests fail, so we need to parse the output
|
|
82
|
-
if (error.stdout) {
|
|
83
|
-
try {
|
|
84
|
-
return this.parseJestOutput(error.stdout);
|
|
85
|
-
} catch (parseError) {
|
|
86
|
-
// If JSON parsing fails, try to extract basic info from text output
|
|
87
|
-
return this.parseTextOutput(error.stdout || error.stderr || '');
|
|
88
|
-
}
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
return {
|
|
92
|
-
passed: 0,
|
|
93
|
-
failed: 0,
|
|
94
|
-
total: 0,
|
|
95
|
-
success: false,
|
|
96
|
-
output: error.message || 'Test execution failed',
|
|
97
|
-
errorMessage: error.message,
|
|
98
|
-
details: error
|
|
99
|
-
};
|
|
100
|
-
}
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
private parseJestOutput(output: string): TestResult {
|
|
104
|
-
try {
|
|
105
|
-
// Try to find JSON output in the string
|
|
106
|
-
const lines = output.split('\n');
|
|
107
|
-
let jsonLine = '';
|
|
108
|
-
|
|
109
|
-
for (const line of lines) {
|
|
110
|
-
const trimmed = line.trim();
|
|
111
|
-
if (trimmed.startsWith('{') && (trimmed.includes('"success"') || trimmed.includes('"numTotalTests"'))) {
|
|
112
|
-
jsonLine = trimmed;
|
|
113
|
-
break;
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
if (jsonLine) {
|
|
118
|
-
const result = JSON.parse(jsonLine);
|
|
119
|
-
|
|
120
|
-
return {
|
|
121
|
-
passed: result.numPassedTests || 0,
|
|
122
|
-
failed: result.numFailedTests || 0,
|
|
123
|
-
total: result.numTotalTests || 0,
|
|
124
|
-
skipped: result.numPendingTests || 0,
|
|
125
|
-
success: result.success || false,
|
|
126
|
-
output: output,
|
|
127
|
-
details: result
|
|
128
|
-
};
|
|
129
|
-
}
|
|
130
|
-
} catch (error) {
|
|
131
|
-
// Fall back to text parsing
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
return this.parseTextOutput(output);
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
private parseTextOutput(output: string): TestResult {
|
|
138
|
-
// Try to parse Jest text output
|
|
139
|
-
let passed = 0;
|
|
140
|
-
let failed = 0;
|
|
141
|
-
let total = 0;
|
|
142
|
-
let success = false;
|
|
143
|
-
|
|
144
|
-
// Look for Jest summary patterns
|
|
145
|
-
const passedMatch = output.match(/(\d+) passed/);
|
|
146
|
-
const failedMatch = output.match(/(\d+) failed/);
|
|
147
|
-
const totalMatch = output.match(/(\d+) total/);
|
|
148
|
-
|
|
149
|
-
if (passedMatch) passed = parseInt(passedMatch[1]);
|
|
150
|
-
if (failedMatch) failed = parseInt(failedMatch[1]);
|
|
151
|
-
if (totalMatch) total = parseInt(totalMatch[1]);
|
|
152
|
-
|
|
153
|
-
// If we couldn't find specific numbers, try other patterns
|
|
154
|
-
if (total === 0) {
|
|
155
|
-
// Look for "Tests: " summary
|
|
156
|
-
const testsMatch = output.match(/Tests:\s+(\d+)\s+failed,\s+(\d+)\s+passed,\s+(\d+)\s+total/);
|
|
157
|
-
if (testsMatch) {
|
|
158
|
-
failed = parseInt(testsMatch[1]);
|
|
159
|
-
passed = parseInt(testsMatch[2]);
|
|
160
|
-
total = parseInt(testsMatch[3]);
|
|
161
|
-
} else {
|
|
162
|
-
// Look for individual test results
|
|
163
|
-
const testResults = output.match(/✓|✗|PASS|FAIL/g);
|
|
164
|
-
if (testResults) {
|
|
165
|
-
total = testResults.length;
|
|
166
|
-
passed = testResults.filter(r => r === '✓' || r === 'PASS').length;
|
|
167
|
-
failed = total - passed;
|
|
168
|
-
}
|
|
169
|
-
}
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
success = failed === 0 && total > 0;
|
|
173
|
-
|
|
174
|
-
return {
|
|
175
|
-
passed,
|
|
176
|
-
failed,
|
|
177
|
-
total,
|
|
178
|
-
success,
|
|
179
|
-
output,
|
|
180
|
-
errorMessage: success ? undefined : 'Some tests failed'
|
|
181
|
-
};
|
|
182
|
-
}
|
|
183
|
-
}
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
export interface TestResult {
|
|
2
|
-
passed: number;
|
|
3
|
-
failed: number;
|
|
4
|
-
total: number;
|
|
5
|
-
skipped?: number;
|
|
6
|
-
success: boolean;
|
|
7
|
-
output: string;
|
|
8
|
-
errorMessage?: string;
|
|
9
|
-
details?: any; // Raw test runner output
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
export interface ExerciseEvaluator {
|
|
13
|
-
language: string;
|
|
14
|
-
canEvaluate(exercisePath: string): boolean;
|
|
15
|
-
evaluate(exercisePath: string): Promise<TestResult>;
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
export interface TestEvaluationResult {
|
|
19
|
-
exerciseName: string;
|
|
20
|
-
testResult: TestResult;
|
|
21
|
-
evaluatedBy: string; // Which evaluator was used
|
|
22
|
-
}
|
package/benchmarks/src/index.ts
DELETED
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
import { AIClient, HttpClient } from "../../ts_build/src/clients";
|
|
2
|
-
|
|
3
|
-
export async function registerProvider(
|
|
4
|
-
provider: string,
|
|
5
|
-
url: string,
|
|
6
|
-
headers: Record<string, string>,
|
|
7
|
-
clients: AIClient
|
|
8
|
-
): Promise<void> {
|
|
9
|
-
const client = new HttpClient(url, headers);
|
|
10
|
-
|
|
11
|
-
clients.registerClient(provider, client);
|
|
12
|
-
await clients.loadProviderModels(provider);
|
|
13
|
-
}
|