npm - @tyvm/knowhow - Versions diffs - 0.0.90 → 0.0.91 - Mend

@tyvm/knowhow 0.0.90 → 0.0.91

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (262) hide show

package/.depcheckrc +31 -0
package/bin/knowhow.js +1 -1
package/package.json +4 -32
package/src/agents/tools/executeScript/index.ts +5 -0
package/src/agents/tools/googleSearch.ts +2 -2
package/src/agents/tools/index.ts +0 -3
package/src/agents/tools/list.ts +0 -147
package/src/agents/tools/loadWebpage.ts +3 -113
package/src/auth/browserLogin.ts +10 -13
package/src/cli.ts +63 -3
package/src/clients/gemini.ts +96 -25
package/src/clients/http.ts +7 -11
package/src/clients/pricing/google.ts +122 -26
package/src/conversion.ts +24 -54
package/src/index.ts +8 -1
package/src/login.ts +5 -6
package/src/plugins/language.ts +0 -4
package/src/plugins/plugins.ts +0 -14
package/src/plugins/url.ts +31 -12
package/src/services/GitHub.ts +2 -2
package/src/services/KnowhowClient.ts +34 -34
package/src/{plugins/downloader/downloader.ts → services/MediaProcessorService.ts} +109 -267
package/src/services/S3.ts +16 -16
package/src/services/index.ts +4 -4
package/src/services/modules/index.ts +10 -2
package/src/services/modules/types.ts +5 -2
package/src/services/script-execution/ScriptExecutor.ts +29 -10
package/src/services/script-execution/ScriptPolicy.ts +6 -2
package/src/types.ts +1 -0
package/src/utils/http.ts +127 -0
package/src/workers/auth/PasskeySetup.ts +7 -11
package/tests/clients/AIClient.test.ts +24 -21
package/tests/manual/file-edits/figma.test.ts +3 -70
package/tests/plugins/language/languagePlugin-content-triggers.test.ts +2 -0
package/tests/plugins/language/languagePlugin.test.ts +2 -0
package/tests/processors/ToolResponseCache.test.ts +2 -2
package/tests/test.spec.ts +0 -14
package/tests/unit/modules/moduleLoading.test.ts +7 -4
package/tests/unit/plugins/pluginLoading.test.ts +6 -6
package/ts_build/package.json +4 -32
package/ts_build/src/agents/tools/ast/astAppendNode.d.ts +1 -1
package/ts_build/src/agents/tools/ast/astAppendNode.js +2 -90
package/ts_build/src/agents/tools/ast/astAppendNode.js.map +1 -1
package/ts_build/src/agents/tools/ast/astDeleteNode.d.ts +1 -1
package/ts_build/src/agents/tools/ast/astDeleteNode.js +2 -88
package/ts_build/src/agents/tools/ast/astDeleteNode.js.map +1 -1
package/ts_build/src/agents/tools/ast/astEditNode.d.ts +1 -1
package/ts_build/src/agents/tools/ast/astEditNode.js +2 -90
package/ts_build/src/agents/tools/ast/astEditNode.js.map +1 -1
package/ts_build/src/agents/tools/ast/astGetPathForLine.d.ts +1 -1
package/ts_build/src/agents/tools/ast/astGetPathForLine.js +2 -72
package/ts_build/src/agents/tools/ast/astGetPathForLine.js.map +1 -1
package/ts_build/src/agents/tools/ast/astListPaths.d.ts +1 -1
package/ts_build/src/agents/tools/ast/astListPaths.js +2 -72
package/ts_build/src/agents/tools/ast/astListPaths.js.map +1 -1
package/ts_build/src/agents/tools/executeScript/index.d.ts +3 -2
package/ts_build/src/agents/tools/executeScript/index.js +4 -1
package/ts_build/src/agents/tools/executeScript/index.js.map +1 -1
package/ts_build/src/agents/tools/googleSearch.js +2 -2
package/ts_build/src/agents/tools/googleSearch.js.map +1 -1
package/ts_build/src/agents/tools/index.d.ts +0 -3
package/ts_build/src/agents/tools/index.js +0 -3
package/ts_build/src/agents/tools/index.js.map +1 -1
package/ts_build/src/agents/tools/list.js +0 -138
package/ts_build/src/agents/tools/list.js.map +1 -1
package/ts_build/src/agents/tools/loadWebpage.js +1 -89
package/ts_build/src/agents/tools/loadWebpage.js.map +1 -1
package/ts_build/src/agents/tools/textSearch.d.ts +1 -1
package/ts_build/src/auth/browserLogin.js +7 -7
package/ts_build/src/auth/browserLogin.js.map +1 -1
package/ts_build/src/cli.d.ts +1 -1
package/ts_build/src/cli.js +47 -1
package/ts_build/src/cli.js.map +1 -1
package/ts_build/src/clients/gemini.d.ts +1 -73
package/ts_build/src/clients/gemini.js +57 -19
package/ts_build/src/clients/gemini.js.map +1 -1
package/ts_build/src/clients/http.js +5 -9
package/ts_build/src/clients/http.js.map +1 -1
package/ts_build/src/clients/pricing/google.d.ts +17 -73
package/ts_build/src/clients/pricing/google.js +47 -10
package/ts_build/src/clients/pricing/google.js.map +1 -1
package/ts_build/src/conversion.d.ts +1 -4
package/ts_build/src/conversion.js +12 -27
package/ts_build/src/conversion.js.map +1 -1
package/ts_build/src/index.d.ts +4 -0
package/ts_build/src/index.js +7 -1
package/ts_build/src/index.js.map +1 -1
package/ts_build/src/login.js +5 -4
package/ts_build/src/login.js.map +1 -1
package/ts_build/src/plugins/downloader/downloader.js +3 -3
package/ts_build/src/plugins/downloader/downloader.js.map +1 -1
package/ts_build/src/plugins/language.js.map +1 -1
package/ts_build/src/plugins/plugins.js +0 -14
package/ts_build/src/plugins/plugins.js.map +1 -1
package/ts_build/src/plugins/tree-sitter/editor.d.ts +3 -32
package/ts_build/src/plugins/tree-sitter/editor.js +6 -208
package/ts_build/src/plugins/tree-sitter/editor.js.map +1 -1
package/ts_build/src/plugins/tree-sitter/parser.d.ts +19 -54
package/ts_build/src/plugins/tree-sitter/parser.js +19 -293
package/ts_build/src/plugins/tree-sitter/parser.js.map +1 -1
package/ts_build/src/plugins/tree-sitter/simple-paths.d.ts +2 -15
package/ts_build/src/plugins/tree-sitter/simple-paths.js +2 -324
package/ts_build/src/plugins/tree-sitter/simple-paths.js.map +1 -1
package/ts_build/src/plugins/url.js +27 -8
package/ts_build/src/plugins/url.js.map +1 -1
package/ts_build/src/services/GitHub.js +2 -2
package/ts_build/src/services/GitHub.js.map +1 -1
package/ts_build/src/services/KnowhowClient.d.ts +29 -29
package/ts_build/src/services/KnowhowClient.js +33 -33
package/ts_build/src/services/KnowhowClient.js.map +1 -1
package/ts_build/src/services/MediaProcessorService.d.ts +22 -0
package/ts_build/src/services/MediaProcessorService.js +215 -0
package/ts_build/src/services/MediaProcessorService.js.map +1 -0
package/ts_build/src/services/S3.js +12 -18
package/ts_build/src/services/S3.js.map +1 -1
package/ts_build/src/services/index.d.ts +3 -2
package/ts_build/src/services/index.js +3 -3
package/ts_build/src/services/index.js.map +1 -1
package/ts_build/src/services/modules/index.js +10 -2
package/ts_build/src/services/modules/index.js.map +1 -1
package/ts_build/src/services/modules/types.d.ts +5 -2
package/ts_build/src/services/script-execution/ScriptExecutor.js +22 -7
package/ts_build/src/services/script-execution/ScriptExecutor.js.map +1 -1
package/ts_build/src/services/script-execution/ScriptPolicy.d.ts +1 -1
package/ts_build/src/services/script-execution/ScriptPolicy.js +4 -2
package/ts_build/src/services/script-execution/ScriptPolicy.js.map +1 -1
package/ts_build/src/types.d.ts +1 -0
package/ts_build/src/types.js +1 -0
package/ts_build/src/types.js.map +1 -1
package/ts_build/src/utils/http.d.ts +27 -0
package/ts_build/src/utils/http.js +98 -0
package/ts_build/src/utils/http.js.map +1 -0
package/ts_build/src/workers/auth/PasskeySetup.js +6 -7
package/ts_build/src/workers/auth/PasskeySetup.js.map +1 -1
package/ts_build/tests/clients/AIClient.test.js +11 -14
package/ts_build/tests/clients/AIClient.test.js.map +1 -1
package/ts_build/tests/manual/file-edits/figma.test.d.ts +0 -1
package/ts_build/tests/manual/file-edits/figma.test.js +1 -46
package/ts_build/tests/manual/file-edits/figma.test.js.map +1 -1
package/ts_build/tests/plugins/language/languagePlugin-content-triggers.test.js +2 -0
package/ts_build/tests/plugins/language/languagePlugin-content-triggers.test.js.map +1 -1
package/ts_build/tests/plugins/language/languagePlugin.test.js +2 -0
package/ts_build/tests/plugins/language/languagePlugin.test.js.map +1 -1
package/ts_build/tests/processors/ToolResponseCache.test.js +2 -2
package/ts_build/tests/processors/ToolResponseCache.test.js.map +1 -1
package/ts_build/tests/test.spec.js +0 -14
package/ts_build/tests/test.spec.js.map +1 -1
package/ts_build/tests/tree-sitter/tree-sitter.test.d.ts +0 -1
package/ts_build/tests/tree-sitter/tree-sitter.test.js +2 -183
package/ts_build/tests/tree-sitter/tree-sitter.test.js.map +1 -1
package/ts_build/tests/unit/modules/moduleLoading.test.js +6 -4
package/ts_build/tests/unit/modules/moduleLoading.test.js.map +1 -1
package/ts_build/tests/unit/plugins/pluginLoading.test.js +4 -4
package/ts_build/tests/unit/plugins/pluginLoading.test.js.map +1 -1
package/benchmarks/.dockerignore +0 -7
package/benchmarks/README.md +0 -166
package/benchmarks/docker/Dockerfile +0 -68
package/benchmarks/example-config.yml +0 -27
package/benchmarks/jest.config.js +0 -13
package/benchmarks/package-lock.json +0 -4297
package/benchmarks/package.json +0 -39
package/benchmarks/results/27b0a06/2025-09-27/xai/xai-grok-code-fast-1.json +0 -2909
package/benchmarks/results/4057aed/2025-08-14/anthropic/anthropic-claude-sonnet-4-20250514.json +0 -1671
package/benchmarks/results/4542435/2025-08-05/lms/lms-openai-gpt-oss-20b.json +0 -2814
package/benchmarks/results/4542435/2025-08-05/lms/lms-qwen-qwen3-30b-a3b-2507.json +0 -2014
package/benchmarks/results/4fb9125/2025-08-07/anthropic/anthropic-claude-sonnet-4-20250514.json +0 -3121
package/benchmarks/results/5766aee/2025-08-02/lms-qwen/qwen3-coder-30b.json +0 -98
package/benchmarks/results/6d73808/2025-08-07/openai/openai-gpt-5.json +0 -3256
package/benchmarks/results/77bf0a6/2025-08-02/lms-qwen/qwen3-30b-a3b-2507.json +0 -4298
package/benchmarks/results/8c0d445/2025-08-03/anthropic/anthropic-claude-sonnet-4-20250514.json +0 -3031
package/benchmarks/results/8c0d445/2025-08-03/openai/openai-gpt-4.1-2025-04-14.json +0 -2990
package/benchmarks/results/ac6b2ab/2025-08-03/anthropic/anthropic-claude-sonnet-4-20250514.json +0 -3256
package/benchmarks/results/ac6b2ab/2025-08-03/lms/lms-qwen-qwen3-coder-30b.json +0 -3007
package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-2025-04-14.json +0 -3256
package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-mini-2025-04-14.json +0 -3036
package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-nano-2025-04-14.json +0 -3280
package/benchmarks/results/adff675/2025-08-04/lms/lms-qwen-qwen3-30b-a3b-2507.json +0 -1920
package/benchmarks/results/adff675/2025-08-04/lms/lms-qwen-qwen3-coder-30b.json +0 -3281
package/benchmarks/results/b502ed9/2025-08-03/lms-qwen/qwen3-coder-30b.json +0 -2896
package/benchmarks/results/d1a8129/2025-08-03/lms/lms-qwen-qwen3-coder-30b.json +0 -3011
package/benchmarks/results/e60471c/2025-08-03/lms/qwen3-30b-a3b-2507.json +0 -3003
package/benchmarks/scripts/build-and-run.sh +0 -47
package/benchmarks/scripts/clone-exercism.sh +0 -92
package/benchmarks/scripts/validate.sh +0 -48
package/benchmarks/src/__tests__/runner.test.ts +0 -27
package/benchmarks/src/cli.ts +0 -90
package/benchmarks/src/evaluators/EvaluatorRegistry.ts +0 -64
package/benchmarks/src/evaluators/JavaScriptEvaluator.ts +0 -183
package/benchmarks/src/evaluators/index.ts +0 -3
package/benchmarks/src/evaluators/types.ts +0 -22
package/benchmarks/src/index.ts +0 -3
package/benchmarks/src/providers.ts +0 -13
package/benchmarks/src/runner.ts +0 -824
package/benchmarks/src/types.ts +0 -63
package/benchmarks/tsconfig.json +0 -19
package/leaderboard/README.md +0 -148
package/leaderboard/app/api/benchmark-data/route.ts +0 -131
package/leaderboard/app/api/benchmark-detail/route.ts +0 -172
package/leaderboard/app/details/[model]/[provider]/[language]/page.tsx +0 -501
package/leaderboard/app/exercise/[model]/[provider]/[language]/[exercise]/page.tsx +0 -375
package/leaderboard/app/globals.css +0 -27
package/leaderboard/app/layout.tsx +0 -21
package/leaderboard/app/page.tsx +0 -170
package/leaderboard/components/LeaderboardTable.tsx +0 -168
package/leaderboard/components/PerformanceChart.tsx +0 -109
package/leaderboard/next-env.d.ts +0 -5
package/leaderboard/next.config.js +0 -4
package/leaderboard/package-lock.json +0 -6363
package/leaderboard/package.json +0 -28
package/leaderboard/postcss.config.js +0 -6
package/leaderboard/tailwind.config.js +0 -17
package/leaderboard/tsconfig.json +0 -28
package/leaderboard/types/benchmark.ts +0 -67
package/leaderboard/utils/dataProcessor.ts +0 -33
package/src/agents/tools/asana/definitions.ts +0 -199
package/src/agents/tools/asana/index.ts +0 -108
package/src/agents/tools/ast/astAppendNode.ts +0 -90
package/src/agents/tools/ast/astDeleteNode.ts +0 -88
package/src/agents/tools/ast/astEditNode.ts +0 -95
package/src/agents/tools/ast/astGetPathForLine.ts +0 -73
package/src/agents/tools/ast/astListPaths.ts +0 -66
package/src/agents/tools/ast/index.ts +0 -7
package/src/agents/tools/github/definitions.ts +0 -89
package/src/agents/tools/github/index.ts +0 -67
package/src/chat-old.ts +0 -446
package/src/plugins/asana.ts +0 -146
package/src/plugins/downloader/plugin.ts +0 -103
package/src/plugins/downloader/types.ts +0 -92
package/src/plugins/figma.ts +0 -158
package/src/plugins/github.ts +0 -219
package/src/plugins/jira.ts +0 -115
package/src/plugins/linear.ts +0 -230
package/src/plugins/notion.ts +0 -179
package/src/plugins/tree-sitter/editor.ts +0 -369
package/src/plugins/tree-sitter/lang-packs/index.ts +0 -23
package/src/plugins/tree-sitter/lang-packs/java.ts +0 -59
package/src/plugins/tree-sitter/lang-packs/javascript.ts +0 -57
package/src/plugins/tree-sitter/lang-packs/python.ts +0 -45
package/src/plugins/tree-sitter/lang-packs/types.ts +0 -79
package/src/plugins/tree-sitter/lang-packs/typescript.ts +0 -49
package/src/plugins/tree-sitter/parser.ts +0 -470
package/src/plugins/tree-sitter/simple-paths.ts +0 -467
package/tests/tree-sitter/editor.test.ts +0 -113
package/tests/tree-sitter/invalid.test.ts +0 -299
package/tests/tree-sitter/paths/common-edits.test.ts +0 -564
package/tests/tree-sitter/paths/debug-exact-position.test.ts +0 -44
package/tests/tree-sitter/paths/debug-line-indexing.test.ts +0 -49
package/tests/tree-sitter/paths/debug-paths.test.ts +0 -90
package/tests/tree-sitter/paths/paths.test.ts +0 -170
package/tests/tree-sitter/paths/simple-paths.test.ts +0 -367
package/tests/tree-sitter/sample-after.ts +0 -48
package/tests/tree-sitter/sample-before.ts +0 -25
package/tests/tree-sitter/test-files/completely-broken.ts +0 -7
package/tests/tree-sitter/test-files/duplicate-braces.ts +0 -39
package/tests/tree-sitter/test-files/invalid-nesting.ts +0 -39
package/tests/tree-sitter/test-files/malformed-signature.ts +0 -39
package/tests/tree-sitter/test-files/mismatched-parens.ts +0 -39
package/tests/tree-sitter/test-files/missing-semicolon.ts +0 -39
package/tests/tree-sitter/test-files/partially-broken.ts +0 -20
package/tests/tree-sitter/test-files/specific-errors.ts +0 -14
package/tests/tree-sitter/test-files/unclosed-string.ts +0 -39
package/tests/tree-sitter/tree-sitter.test.ts +0 -251

package/benchmarks/scripts/build-and-run.sh DELETED Viewed

@@ -1,47 +0,0 @@
-#!/bin/bash
-# Build and run Knowhow benchmarks
-# Usage: ./build-and-run.sh [command] [options...]
-set -e
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
-echo "🏗️  Building Knowhow benchmark container..."
-# Build the Docker container
-docker build -f "$PROJECT_ROOT/benchmarks/docker/Dockerfile" -t knowhow-bench "$PROJECT_ROOT"
-echo "✅ Container built successfully!"
-# Create results directory if it doesn't exist
-mkdir -p "$PROJECT_ROOT/benchmarks/results"
-# If no arguments provided, show usage
-if [ $# -eq 0 ]; then
-    echo ""
-    echo "Usage: $0 <command> [options...]"
-    echo ""
-    echo "Examples:"
-    echo "  $0 setup --language javascript --count 5"
-    echo "  $0 run --language javascript --count 5 --model gpt-4o-mini"
-    echo "  $0 run --language python --count 10 --provider anthropic --model claude-3-sonnet-20240229"
-    echo ""
-    exit 0
-fi
-echo "🚀 Running benchmarks..."
-# Run the container with all provided arguments
-docker run --rm \
-    -v "$PROJECT_ROOT/benchmarks/results:/app/benchmarks/results" \
-    -e OPENAI_KEY \
-    -e ANTHROPIC_API_KEY \
-    -e GEMINI_API_KEY \
-    -e XAI_API_KEY \
-    --env-file "$PROJECT_ROOT/benchmarks/.env" \
-    knowhow-bench "$@"
-echo "✅ Benchmarks completed!"
-echo "📊 Results available in: benchmarks/results/"

package/benchmarks/scripts/clone-exercism.sh DELETED Viewed

@@ -1,92 +0,0 @@
-#!/bin/bash
-# Clone Exercism exercises for benchmarking
-# Based on Aider's clone-exercism.sh approach
-set -e
-# Configuration
-EXERCISM_REPO="https://github.com/exercism/problem-specifications.git"
-LANGUAGE=${1:-"javascript"}  # Default to JavaScript
-MAX_EXERCISES=${2:-10}       # Default to 10 exercises
-# Use different paths for local vs container
-if [ -n "$CONTAINER" ]; then
-    EXERCISES_DIR="/app/exercises"
-else
-    EXERCISES_DIR="$(cd "$(dirname "$0")/.." && pwd)/exercises"
-fi
-echo "Cloning Exercism exercises for language: $LANGUAGE"
-echo "Maximum exercises: $MAX_EXERCISES"
-echo "Target directory: $EXERCISES_DIR"
-# Create exercises directory if it doesn't exist
-mkdir -p "$EXERCISES_DIR"
-# Clone the problem specifications repo if not already cloned
-if [ ! -d "$EXERCISES_DIR/problem-specifications" ]; then
-    echo "Cloning Exercism problem specifications..."
-    cd "$EXERCISES_DIR"
-    git clone "$EXERCISM_REPO" problem-specifications
-fi
-# Clone the language track
-LANGUAGE_REPO="https://github.com/exercism/${LANGUAGE}.git"
-LANGUAGE_DIR="$EXERCISES_DIR/$LANGUAGE"
-if [ ! -d "$LANGUAGE_DIR" ]; then
-    echo "Cloning $LANGUAGE track..."
-    cd "$EXERCISES_DIR"
-    git clone "$LANGUAGE_REPO" "$LANGUAGE"
-fi
-# Find exercises with both problem specification and language implementation
-echo "Finding exercises with both specification and implementation..."
-SPEC_DIR="$EXERCISES_DIR/problem-specifications/exercises"
-IMPL_DIR="$LANGUAGE_DIR/exercises"
-# Create filtered exercises directory
-FILTERED_DIR="$EXERCISES_DIR/filtered"
-if [ -d "$FILTERED_DIR" ]; then
-    echo "Removing existing filtered directory: $FILTERED_DIR"
-    rm -rf "$FILTERED_DIR"
-fi
-mkdir -p "$FILTERED_DIR"
-count=0
-for exercise in $(ls "$SPEC_DIR" 2>/dev/null | sort); do
-    if [ $count -ge $MAX_EXERCISES ]; then
-        break
-    fi
-    if [ -d "$IMPL_DIR/practice/$exercise" ] || [ -d "$IMPL_DIR/$exercise" ]; then
-        echo "Found exercise: $exercise"
-        # Create exercise directory
-        exercise_dir="$FILTERED_DIR/$exercise"
-        mkdir -p "$exercise_dir"
-        # Copy problem specification
-        if [ -f "$SPEC_DIR/$exercise/description.md" ]; then
-            cp "$SPEC_DIR/$exercise/description.md" "$exercise_dir/"
-        fi
-        if [ -f "$SPEC_DIR/$exercise/metadata.yml" ]; then
-            cp "$SPEC_DIR/$exercise/metadata.yml" "$exercise_dir/"
-        fi
-        # Copy language implementation
-        if [ -d "$IMPL_DIR/practice/$exercise" ]; then
-            cp -r "$IMPL_DIR/practice/$exercise"/* "$exercise_dir/"
-        elif [ -d "$IMPL_DIR/$exercise" ]; then
-            cp -r "$IMPL_DIR/$exercise"/* "$exercise_dir/"
-        fi
-        count=$((count + 1))
-    fi
-done
-echo "Successfully set up $count exercises in $FILTERED_DIR"
-echo "Ready for benchmarking!"

package/benchmarks/scripts/validate.sh DELETED Viewed

@@ -1,48 +0,0 @@
-#!/bin/bash
-# Simple validation script to test the benchmark setup
-# This runs without the full Docker setup for quick validation
-set -e
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-BENCHMARK_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
-echo "🔍 Validating Knowhow Benchmarks setup..."
-# 1. Check that benchmarks can be built
-echo "1. Building benchmarks package..."
-cd "$BENCHMARK_DIR"
-npm run build > /dev/null 2>&1
-echo "   ✅ Build successful"
-# 2. Check that tests pass
-echo "2. Running tests..."
-npm test > /dev/null 2>&1
-echo "   ✅ Tests passed"
-# 3. Check that CLI can show help
-echo "3. Testing CLI..."
-node dist/cli.js --help > /dev/null 2>&1
-echo "   ✅ CLI working"
-# 4. Check that Docker can build (optional - requires Docker)
-if command -v docker &> /dev/null; then
-    echo "4. Testing Docker build..."
-    cd "$(dirname "$BENCHMARK_DIR")"
-    docker build -f benchmarks/docker/Dockerfile -t knowhow-bench-test . > /dev/null 2>&1
-    echo "   ✅ Docker build successful"
-    # Clean up test image
-    docker rmi knowhow-bench-test > /dev/null 2>&1
-else
-    echo "4. Skipping Docker test (Docker not available)"
-fi
-echo ""
-echo "🎉 All validations passed!"
-echo ""
-echo "Ready to run benchmarks. Example usage:"
-echo "  ./scripts/build-and-run.sh setup --language javascript --count 5"
-echo "  ./scripts/build-and-run.sh run --language javascript --count 5 --model gpt-4o-mini"
-echo ""

package/benchmarks/src/__tests__/runner.test.ts DELETED Viewed

@@ -1,27 +0,0 @@
-import { BenchmarkRunner } from '../runner';
-import { BenchmarkConfig } from '../types';
-describe('BenchmarkRunner', () => {
-  const mockConfig: BenchmarkConfig = {
-    language: 'javascript',
-    maxExercises: 5,
-    model: 'gpt-4o-mini',
-    provider: 'openai',
-    limits: {
-      maxTurns: 20,
-      maxTime: 300,
-      maxCost: 1.0
-    },
-    outputFile: 'test-results.json'
-  };
-  it('should create a BenchmarkRunner instance', () => {
-    const runner = new BenchmarkRunner(mockConfig);
-    expect(runner).toBeInstanceOf(BenchmarkRunner);
-  });
-  it('should have the correct configuration', () => {
-    const runner = new BenchmarkRunner(mockConfig);
-    expect(runner['config']).toEqual(mockConfig);
-  });
-});

package/benchmarks/src/cli.ts DELETED Viewed

@@ -1,90 +0,0 @@
-#!/usr/bin/env node
-import { Command } from "commander";
-import { BenchmarkRunner } from "./runner";
-import { BenchmarkConfig } from "./types";
-import chalk from "chalk";
-const program = new Command();
-program
-  .name("knowhow-bench")
-  .description("Benchmark Knowhow terminal agent against coding exercises")
-  .version("0.0.1");
-program
-  .command("run")
-  .description("Run benchmarks against Exercism exercises")
-  .option(
-    "-l, --language <language>",
-    "Programming language to test",
-    "javascript"
-  )
-  .option("-c, --count <count>", "Maximum number of exercises to run", "10")
-  .option("-m, --model <model>", "AI model to use", "gpt-4o-mini")
-  .option("-p, --provider <provider>", "AI provider to use", "openai")
-  .option("--max-turns <turns>", "Maximum turns per exercise", "30")
-  .option("--max-time <seconds>", "Maximum time per exercise in seconds", "300")
-  .option("--max-cost <dollars>", "Maximum cost per exercise in dollars", "1.0")
-  .option("--output <file>", "Output file for results", "results.json")
-  .action(async (options) => {
-    try {
-      console.log(chalk.blue("🚀 Starting Knowhow benchmarks..."));
-      const config: BenchmarkConfig = {
-        language: options.language,
-        maxExercises: parseInt(options.count),
-        model: options.model,
-        provider: options.provider,
-        limits: {
-          maxTurns: parseInt(options.maxTurns),
-          maxTime: parseInt(options.maxTime),
-          maxCost: parseFloat(options.maxCost),
-        },
-        outputFile: options.output,
-      };
-      const runner = new BenchmarkRunner(config);
-      await runner.run();
-      console.log(chalk.green("✅ Benchmarks completed successfully!"));
-      process.exit(0);
-    } catch (error) {
-      console.error(chalk.red("❌ Benchmark failed:"), error);
-      process.exit(1);
-    }
-  });
-program
-  .command("setup")
-  .description("Set up exercises for benchmarking")
-  .option(
-    "-l, --language <language>",
-    "Programming language to setup",
-    "javascript"
-  )
-  .option("-c, --count <count>", "Maximum number of exercises to setup", "10")
-  .action(async (options) => {
-    try {
-      console.log(chalk.blue("📦 Setting up exercises..."));
-      const runner = new BenchmarkRunner({
-        language: options.language,
-        maxExercises: parseInt(options.count),
-        model: "gpt-4o-mini", // Dummy values for setup
-        provider: "openai",
-        limits: { maxTurns: 20, maxTime: 300, maxCost: 1.0 },
-        outputFile: "results.json",
-      });
-      await runner.setupExercises();
-      console.log(chalk.green("✅ Exercises setup completed!"));
-      process.exit(0);
-    } catch (error) {
-      console.error(chalk.red("❌ Setup failed:"), error);
-      process.exit(1);
-    }
-  });
-program.parse();

package/benchmarks/src/evaluators/EvaluatorRegistry.ts DELETED Viewed

@@ -1,64 +0,0 @@
-import { ExerciseEvaluator, TestResult, TestEvaluationResult } from './types';
-import { JavaScriptEvaluator } from './JavaScriptEvaluator';
-export class EvaluatorRegistry {
-  private evaluators: ExerciseEvaluator[] = [];
-  constructor() {
-    // Register default evaluators
-    this.registerEvaluator(new JavaScriptEvaluator());
-  }
-  registerEvaluator(evaluator: ExerciseEvaluator): void {
-    this.evaluators.push(evaluator);
-  }
-  evalForExercise(exercisePath: string): ExerciseEvaluator | null {
-    return this.evaluators.find(e => e.canEvaluate(exercisePath)) || null;
-  }
-  async evaluateExercise(exercisePath: string, exerciseName: string): Promise<TestEvaluationResult | null> {
-    // Find the first evaluator that can handle this exercise
-    const evaluator = this.evalForExercise(exercisePath);
-    if (!evaluator) {
-      console.warn(`No evaluator found for exercise: ${exerciseName} at ${exercisePath}`);
-      return null;
-    }
-    try {
-      console.log(`Evaluating ${exerciseName} using ${evaluator.language} evaluator...`);
-      const testResult = await evaluator.evaluate(exercisePath);
-      return {
-        exerciseName,
-        testResult,
-        evaluatedBy: evaluator.language
-      };
-    } catch (error) {
-      console.error(`Error evaluating exercise ${exerciseName}:`, error);
-      // Return a failed test result instead of null
-      return {
-        exerciseName,
-        testResult: {
-          passed: 0,
-          failed: 0,
-          total: 0,
-          success: false,
-          output: '',
-          errorMessage: `Evaluation failed: ${error instanceof Error ? error.message : String(error)}`
-        },
-        evaluatedBy: evaluator.language
-      };
-    }
-  }
-  getAvailableEvaluators(): string[] {
-    return this.evaluators.map(e => e.language);
-  }
-  canEvaluateExercise(exercisePath: string): boolean {
-    return this.evaluators.some(e => e.canEvaluate(exercisePath));
-  }
-}

package/benchmarks/src/evaluators/JavaScriptEvaluator.ts DELETED Viewed

@@ -1,183 +0,0 @@
-import { ExerciseEvaluator, TestResult } from './types';
-import { execSync } from 'child_process';
-import * as fs from 'fs';
-import * as path from 'path';
-export class JavaScriptEvaluator implements ExerciseEvaluator {
-  language = 'javascript';
-  canEvaluate(exercisePath: string): boolean {
-    // Check for package.json with test script or jest config
-    const packageJsonPath = path.join(exercisePath, 'package.json');
-    if (!fs.existsSync(packageJsonPath)) {
-      return false;
-    }
-    try {
-      const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8'));
-      // Check if there's a test script or jest configuration
-      return !!(
-        packageJson.scripts?.test ||
-        packageJson.devDependencies?.jest ||
-        packageJson.dependencies?.jest ||
-        packageJson.jest ||
-        fs.existsSync(path.join(exercisePath, 'jest.config.js')) ||
-        fs.existsSync(path.join(exercisePath, 'jest.config.json'))
-      );
-    } catch (error) {
-      return false;
-    }
-  }
-  async evaluate(exercisePath: string): Promise<TestResult> {
-    try {
-      // First try to install dependencies if node_modules doesn't exist
-      const nodeModulesPath = path.join(exercisePath, 'node_modules');
-      if (!fs.existsSync(nodeModulesPath)) {
-        try {
-          execSync('npm install', {
-            cwd: exercisePath,
-            stdio: 'pipe',
-            timeout: 60000 // 60 second timeout
-          });
-        } catch (installError) {
-          // Continue anyway, maybe dependencies are not needed
-          console.warn(`Failed to install dependencies in ${exercisePath}:`, installError);
-        }
-      }
-      // Try to run tests with JSON output
-      let command = 'npm test';
-      // Check if we can use Jest directly with JSON reporter
-      const packageJsonPath = path.join(exercisePath, 'package.json');
-      if (fs.existsSync(packageJsonPath)) {
-        const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8'));
-        // If jest is available, use it directly with JSON reporter
-        if (packageJson.devDependencies?.jest || packageJson.dependencies?.jest) {
-          command = 'npx jest --json --verbose';
-        } else if (packageJson.scripts?.test) {
-          // Try to modify the test script to include JSON output
-          const testScript = packageJson.scripts.test;
-          if (testScript.includes('jest')) {
-            command = `${testScript} --json --verbose`;
-          }
-        }
-      }
-      const output = execSync(command, {
-        cwd: exercisePath,
-        stdio: 'pipe',
-        encoding: 'utf8',
-        timeout: 120000 // 2 minute timeout for tests
-      });
-      return this.parseJestOutput(output);
-    } catch (error: any) {
-      // Jest exits with non-zero code when tests fail, so we need to parse the output
-      if (error.stdout) {
-        try {
-          return this.parseJestOutput(error.stdout);
-        } catch (parseError) {
-          // If JSON parsing fails, try to extract basic info from text output
-          return this.parseTextOutput(error.stdout || error.stderr || '');
-        }
-      }
-      return {
-        passed: 0,
-        failed: 0,
-        total: 0,
-        success: false,
-        output: error.message || 'Test execution failed',
-        errorMessage: error.message,
-        details: error
-      };
-    }
-  }
-  private parseJestOutput(output: string): TestResult {
-    try {
-      // Try to find JSON output in the string
-      const lines = output.split('\n');
-      let jsonLine = '';
-      for (const line of lines) {
-        const trimmed = line.trim();
-        if (trimmed.startsWith('{') && (trimmed.includes('"success"') || trimmed.includes('"numTotalTests"'))) {
-          jsonLine = trimmed;
-          break;
-        }
-      }
-      if (jsonLine) {
-        const result = JSON.parse(jsonLine);
-        return {
-          passed: result.numPassedTests || 0,
-          failed: result.numFailedTests || 0,
-          total: result.numTotalTests || 0,
-          skipped: result.numPendingTests || 0,
-          success: result.success || false,
-          output: output,
-          details: result
-        };
-      }
-    } catch (error) {
-      // Fall back to text parsing
-    }
-    return this.parseTextOutput(output);
-  }
-  private parseTextOutput(output: string): TestResult {
-    // Try to parse Jest text output
-    let passed = 0;
-    let failed = 0;
-    let total = 0;
-    let success = false;
-    // Look for Jest summary patterns
-    const passedMatch = output.match(/(\d+) passed/);
-    const failedMatch = output.match(/(\d+) failed/);
-    const totalMatch = output.match(/(\d+) total/);
-    if (passedMatch) passed = parseInt(passedMatch[1]);
-    if (failedMatch) failed = parseInt(failedMatch[1]);
-    if (totalMatch) total = parseInt(totalMatch[1]);
-    // If we couldn't find specific numbers, try other patterns
-    if (total === 0) {
-      // Look for "Tests: " summary
-      const testsMatch = output.match(/Tests:\s+(\d+)\s+failed,\s+(\d+)\s+passed,\s+(\d+)\s+total/);
-      if (testsMatch) {
-        failed = parseInt(testsMatch[1]);
-        passed = parseInt(testsMatch[2]);
-        total = parseInt(testsMatch[3]);
-      } else {
-        // Look for individual test results
-        const testResults = output.match(/✓|✗|PASS|FAIL/g);
-        if (testResults) {
-          total = testResults.length;
-          passed = testResults.filter(r => r === '✓' || r === 'PASS').length;
-          failed = total - passed;
-        }
-      }
-    }
-    success = failed === 0 && total > 0;
-    return {
-      passed,
-      failed,
-      total,
-      success,
-      output,
-      errorMessage: success ? undefined : 'Some tests failed'
-    };
-  }
-}

package/benchmarks/src/evaluators/index.ts DELETED Viewed

@@ -1,3 +0,0 @@
-export * from './types';
-export * from './JavaScriptEvaluator';
-export * from './EvaluatorRegistry';

package/benchmarks/src/evaluators/types.ts DELETED Viewed

@@ -1,22 +0,0 @@
-export interface TestResult {
-  passed: number;
-  failed: number;
-  total: number;
-  skipped?: number;
-  success: boolean;
-  output: string;
-  errorMessage?: string;
-  details?: any; // Raw test runner output
-}
-export interface ExerciseEvaluator {
-  language: string;
-  canEvaluate(exercisePath: string): boolean;
-  evaluate(exercisePath: string): Promise<TestResult>;
-}
-export interface TestEvaluationResult {
-  exerciseName: string;
-  testResult: TestResult;
-  evaluatedBy: string; // Which evaluator was used
-}

package/benchmarks/src/index.ts DELETED Viewed

@@ -1,3 +0,0 @@
-export { BenchmarkRunner } from './runner';
-export * from './types';
-import 'dotenv/config'

package/benchmarks/src/providers.ts DELETED Viewed

@@ -1,13 +0,0 @@
-import { AIClient, HttpClient } from "../../ts_build/src/clients";
-export async function registerProvider(
-  provider: string,
-  url: string,
-  headers: Record<string, string>,
-  clients: AIClient
-): Promise<void> {
-  const client = new HttpClient(url, headers);
-  clients.registerClient(provider, client);
-  await clients.loadProviderModels(provider);
-}