npm - claude-evolve - Versions diffs - 1.0.0 - Mend

claude-evolve 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/BRIEF.md +41 -0
package/bin/claude-evolve +4 -0
package/bin/claude-evolve-analyze +173 -0
package/bin/claude-evolve-ideate +206 -0
package/bin/claude-evolve-main +126 -0
package/bin/claude-evolve-run +248 -0
package/bin/claude-evolve-setup +55 -0
package/docs/CLAUDE-NOTES.md +57 -0
package/docs/IDEAS.md +168 -0
package/docs/PLAN.md +213 -0
package/docs/QUESTIONS.md +211 -0
package/lib/editor.sh +74 -0
package/package.json +20 -0
package/templates/BRIEF.md +21 -0
package/templates/algorithm.py +33 -0
package/templates/evaluator.py +76 -0

package/bin/claude-evolve-run ADDED Viewed

@@ -0,0 +1,248 @@
+#!/bin/bash
+set -e
+# Parse arguments
+timeout_seconds=""
+while [[ $# -gt 0 ]]; do
+  case $1 in
+  --timeout)
+    if [[ -z ${2:-} ]] || [[ ! $2 =~ ^[0-9]+$ ]] || [[ $2 -eq 0 ]]; then
+      echo "[ERROR] --timeout requires a positive integer (seconds)" >&2
+      exit 1
+    fi
+    timeout_seconds="$2"
+    shift 2
+    ;;
+  --help)
+    cat <<EOF
+claude-evolve run - Execute evolution candidates
+USAGE:
+  claude-evolve run [OPTIONS]
+OPTIONS:
+  --timeout <sec>  Kill evaluator after specified seconds (default: no timeout)
+  --help           Show this help message
+DESCRIPTION:
+  Processes the oldest pending candidate from evolution.csv by:
+  1. Generating algorithm mutation using Claude
+  2. Running evaluator.py on the generated algorithm
+  3. Updating CSV with performance score and completion status
+  Use --timeout to prevent runaway evaluations from blocking progress.
+EOF
+    exit 0
+    ;;
+  *)
+    echo "[ERROR] Unknown option: $1" >&2
+    exit 1
+    ;;
+  esac
+done
+echo "[INFO] Starting evolution run..."
+[[ -n $timeout_seconds ]] && echo "[INFO] Using timeout: ${timeout_seconds} seconds"
+# Validate workspace
+if [[ ! -d evolution ]]; then
+  echo "[ERROR] Evolution directory not found. Run 'claude-evolve setup' first." >&2
+  exit 1
+fi
+for file in evolution.csv evaluator.py; do
+  if [[ ! -f evolution/$file ]]; then
+    echo "[ERROR] $file not found. Run 'claude-evolve setup' first." >&2
+    exit 1
+  fi
+done
+# Find oldest empty row (pure shell)
+find_empty_row() {
+  local row_num=2 # Start after header
+  while IFS=, read -r id based_on desc perf status; do
+    if [[ -z $perf && -z $status ]]; then
+      echo $row_num
+      return 0
+    fi
+    ((row_num++))
+  done < <(tail -n +2 evolution/evolution.csv)
+  return 1
+}
+# Get CSV row (pure shell)
+get_csv_row() {
+  sed -n "${1}p" evolution/evolution.csv
+}
+# Update CSV row (pure shell with temp file)
+update_csv_row() {
+  local row_num="$1"
+  local performance="$2"
+  local status="$3"
+  # Read CSV and update specific row
+  local temp_file="evolution/evolution.csv.tmp"
+  local current_row=1
+  while IFS=, read -r id based_on desc perf stat; do
+    if [[ $current_row -eq $row_num ]]; then
+      # Update this row
+      echo "$id,$based_on,$desc,$performance,$status"
+    else
+      # Keep original row
+      echo "$id,$based_on,$desc,$perf,$stat"
+    fi
+    ((current_row++))
+  done <evolution/evolution.csv >"$temp_file"
+  mv "$temp_file" evolution/evolution.csv
+}
+# Find next candidate
+if ! row_num=$(find_empty_row); then
+  echo "[ERROR] No empty rows found in CSV. Run 'claude-evolve ideate' to add candidates." >&2
+  exit 1
+fi
+# Get row data
+row_data=$(get_csv_row "$row_num")
+IFS=, read -r id based_on_id description performance status <<<"$row_data"
+# Clean up description (remove quotes)
+description=${description#\"}
+description=${description%\"}
+echo "[INFO] Processing candidate ID: $id"
+echo "[INFO] Description: $description"
+echo "[INFO] Based on ID: $based_on_id"
+# Set interrupt handler
+trap 'update_csv_row "$row_num" "" "interrupted"; echo "[INFO] Evolution interrupted"; exit 130' INT
+# Mark as running
+update_csv_row "$row_num" "" "running"
+# Determine parent algorithm
+parent_file="evolution/algorithm.py"
+if [[ -n $based_on_id && $based_on_id != "0" ]]; then
+  parent_file="evolution/evolution_id${based_on_id}.py"
+  if [[ ! -f $parent_file ]]; then
+    echo "[ERROR] Parent algorithm file not found: $parent_file" >&2
+    update_csv_row "$row_num" "" "failed"
+    exit 1
+  fi
+fi
+echo "[INFO] Using parent algorithm: $parent_file"
+# Generate mutation
+output_file="evolution/evolution_id${id}.py"
+echo "[INFO] Generating algorithm mutation..."
+# Copy parent algorithm to output file first
+cp "$parent_file" "$output_file"
+echo "[INFO] Copied parent algorithm to: $output_file"
+# Check for claude CLI
+claude_cmd="${CLAUDE_CMD:-claude}"
+if ! command -v "$claude_cmd" >/dev/null 2>&1; then
+  echo "[ERROR] Claude CLI not found. Please install claude-cli." >&2
+  update_csv_row "$row_num" "" "failed"
+  exit 1
+fi
+# Create mutation prompt
+prompt="You are an AI assistant helping to evolve algorithms through mutations. Please modify the Python algorithm file at $output_file based on the requested modification.
+CONTEXT:
+$(cat evolution/BRIEF.md 2>/dev/null || echo "No brief available")
+ALGORITHM FILE TO MODIFY: $output_file
+REQUESTED MODIFICATION:
+$description
+INSTRUCTIONS:
+1. Read the existing algorithm file at $output_file
+2. Apply the requested modification while preserving the core structure
+3. Ensure the modified algorithm maintains the same interface (function signatures)
+4. Include proper error handling and documentation
+5. Overwrite the file with your improved version
+6. Return ONLY the complete Python code without explanation
+The output should be a complete, executable Python file that builds upon the existing algorithm."
+# Generate mutation
+if ! generated_code=$(echo "$prompt" | "$claude_cmd"); then
+  echo "[ERROR] Claude failed to generate algorithm mutation" >&2
+  update_csv_row "$row_num" "" "failed"
+  exit 1
+fi
+# Save generated algorithm (overwrite the copied file)
+echo "$generated_code" >"$output_file"
+echo "[INFO] Updated algorithm: $output_file"
+# Run evaluator
+echo "[INFO] Running evaluation..."
+eval_output=""
+eval_exit_code=0
+if [[ -n $timeout_seconds ]]; then
+  echo "[INFO] Starting evaluation with ${timeout_seconds}s timeout..."
+  if eval_output=$(timeout "$timeout_seconds" python3 evolution/evaluator.py "$output_file" 2>&1); then
+    eval_exit_code=0
+  else
+    eval_exit_code=$?
+    if [[ $eval_exit_code -eq 124 ]]; then
+      echo "[ERROR] Evaluation timed out after ${timeout_seconds} seconds" >&2
+      update_csv_row "$row_num" "" "timeout"
+      exit 1
+    fi
+  fi
+else
+  if eval_output=$(python3 evolution/evaluator.py "$output_file" 2>&1); then
+    eval_exit_code=0
+  else
+    eval_exit_code=$?
+  fi
+fi
+# Process results
+if [[ $eval_exit_code -eq 0 ]]; then
+  # Extract score from JSON (simple grep approach)
+  if score=$(echo "$eval_output" | grep -o '"score"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' '); then
+    if [[ -n $score ]]; then
+      update_csv_row "$row_num" "$score" "completed"
+      echo "[INFO] ✓ Evaluation completed successfully"
+      echo "[INFO] Performance score: $score"
+    else
+      # Try "performance" field
+      if score=$(echo "$eval_output" | grep -o '"performance"[[:space:]]*:[[:space:]]*[0-9.]*' | cut -d: -f2 | tr -d ' '); then
+        update_csv_row "$row_num" "$score" "completed"
+        echo "[INFO] ✓ Evaluation completed successfully"
+        echo "[INFO] Performance score: $score"
+      else
+        echo "[ERROR] No score found in evaluator output" >&2
+        echo "[ERROR] Output: $eval_output" >&2
+        update_csv_row "$row_num" "" "failed"
+        exit 1
+      fi
+    fi
+  else
+    echo "[ERROR] Failed to parse evaluator output" >&2
+    echo "[ERROR] Output: $eval_output" >&2
+    update_csv_row "$row_num" "" "failed"
+    exit 1
+  fi
+else
+  echo "[ERROR] Evaluator failed with exit code $eval_exit_code" >&2
+  echo "[ERROR] Output: $eval_output" >&2
+  update_csv_row "$row_num" "" "failed"
+  exit 1
+fi
+echo "[INFO] Evolution cycle completed successfully!"

package/bin/claude-evolve-setup ADDED Viewed

@@ -0,0 +1,55 @@
+#!/bin/bash
+set -e
+# Get script directory
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
+# Source shared editor functions
+source "$PROJECT_ROOT/lib/editor.sh"
+echo "[INFO] Initializing evolution workspace..."
+# Create evolution directory
+if [[ ! -d evolution ]]; then
+  echo "[INFO] Creating evolution/ directory..."
+  mkdir -p evolution
+else
+  echo "[INFO] evolution/ directory already exists"
+fi
+# Copy template files
+for file in BRIEF.md algorithm.py evaluator.py; do
+  if [[ ! -f evolution/$file ]]; then
+    if [[ -f "$PROJECT_ROOT/templates/$file" ]]; then
+      echo "[INFO] Copying $file from templates..."
+      cp "$PROJECT_ROOT/templates/$file" "evolution/$file"
+    else
+      echo "[ERROR] Template file not found: $PROJECT_ROOT/templates/$file"
+      exit 1
+    fi
+  else
+    echo "[INFO] $file already exists, skipping"
+  fi
+done
+# Create CSV with header
+if [[ ! -f evolution/evolution.csv ]]; then
+  echo "[INFO] Creating evolution.csv with header..."
+  echo "id,basedOnId,description,performance,status" >evolution/evolution.csv
+else
+  echo "[INFO] evolution.csv already exists, skipping"
+fi
+# Open editor for BRIEF.md if interactive and file is new or empty
+if [[ -t 1 ]] && [[ ! -s evolution/BRIEF.md ]]; then
+  echo "[INFO] Opening BRIEF.md for editing..."
+  open_with_editor evolution/BRIEF.md
+fi
+echo "[INFO] Evolution workspace setup complete!"
+echo "[INFO] Next steps:"
+echo "[INFO]   1. Edit evolution/BRIEF.md to describe your optimization problem"
+echo "[INFO]   2. Customize evolution/evaluator.py for your evaluation criteria"
+echo "[INFO]   3. Run 'claude-evolve ideate' to generate initial candidates"

package/docs/CLAUDE-NOTES.md ADDED Viewed

@@ -0,0 +1,57 @@
+# Claude-Evolve – AI Working Notes
+These notes capture my current understanding of the project, the major design choices already fixed in the brief / Q&A, and the open items that still require clarification. They are **living notes** – feel free to edit or extend them during the implementation.
+## 1. Project Understanding
+1. **Purpose** – Provide a lightweight command-line tool (`claude-evolve`) that orchestrates an _algorithm-evolution_ workflow driven by Claude AI. The tool repeatedly:
+   • plans → develops a candidate → runs the evaluator → records the result → lets the user/AI propose the next mutation.
+2. **Inspiration** – It mirrors the successful `claude-fsd` package (software delivery), but targets algorithm R&D. The entire CLI is implemented as simple **Bourne-compatible shell scripts** published as an **npm** package – no compiled binaries, no extra runtime besides POSIX sh and Node.
+3. **Artifacts produced**
+   • `evolution/BRIEF.md`  – high-level goal of the algorithm being optimised
+   • `evolution/evolution.csv` – log of all candidates (ID,basedOnID,description,performance,status)
+   • `evolution/evolution_details.md` – free-form explanation / commentary per candidate
+   • `evolution/evolution_idNNN.<ext>` – snapshot of the concrete algorithm evaluated
+4. **Evaluator contract** – An _executable_ (often Python, but not required) that receives the candidate file path as its sole argument and prints a **single-line JSON dict** to stdout, e.g. `{"score": 0.87}`. Claude-evolve treats the first numeric value in that dict as "performance" (higher is better).
+## 2. Key Technical Decisions & Rationale
+• **Shell scripts in an npm package** – keeps the runtime guarantees identical to `claude-fsd`, leverages cross-platform Node installer, and avoids the overhead of compiling/packaging native binaries.
+• **LLM-driven search** – instead of classic genetic algorithms, we rely on Claude to suggest mutations based on the project history and metrics. The human operator can inject ideas at any point (`claude-evolve ideate`).
+• **File-system persistence** – CSV + Markdown files are trivial to diff and review in Git. Snap-shooting each algorithm version guarantees perfect reproducibility.
+• **Single-metric MVP** – Start with exactly one performance number to keep the loop simple; extend to multi-metric later (post-MVP roadmap).
+• **Menu _and_ sub-commands** – An interactive menu for exploratory use, plus explicit sub-commands for CI automation, following `claude-fsd` precedent.
+• **Visualization as PNG via Node** – Node libraries (e.g. `chartjs-node-canvas`) generate a static PNG for `claude-evolve analyze`, sidestepping browser dependencies.
+• **Git-first workflow** – All artifacts (except large training artefacts / checkpoints) tracked in Git. Users work on feature branches; PRs reviewed like any other code change.
+• **Strict YAGNI** – Avoid prematurely implementing fancy features (branching selection strategies, cloud storage, etc.) until a real need emerges.
+## 3. Assumptions & Constraints
+1. `claude` CLI is installed and authenticated in the user’s environment.
+2. Users have a POSIX-style shell environment (bash/zsh/sh) and Node ≥16.
+3. Evaluations may be _slow_ and resource-intensive; scheduling and cost control are left to the evaluator implementation.
+4. The repository **should not** store large binary artefacts – evaluator is responsible for external storage if needed.
+5. Concurrency: MVP evaluates _one_ candidate at a time; optional parallelism (max-N background processes) is documented as a stretch goal.
+## 4. Areas Requiring Future Clarification
+• **Charting implementation** – exact Node library and minimum PNG spec (size, axis labels).
+• **Pre-commit policy** – exactly which linters (shellcheck, shfmt, prettier-markdown, …) are required.
+• **Timeout/Resource limits** – default wall-clock limit for an evaluation and how to surface that to the user.
+• **Multi-metric support** – data model changes (`evolution.csv`) once we decide to support >1 metric.
+• **Security/PII** – explicit organisational policy might evolve (currently "no constraints").
+• **Distribution** – npm org name, versioning scheme, release cadence.
+---
+These notes should evolve alongside the code. When a decision is implemented, reflect it here so future contributors can quickly understand the rationale.

package/docs/IDEAS.md ADDED Viewed

@@ -0,0 +1,168 @@
+# Claude-Evolve Future Ideas
+This file tracks potential enhancements and features that could be added to claude-evolve in the future.
+## CLI Enhancements
+### Interactive Menu Improvements
+- Add keyboard shortcuts (arrow keys) for menu navigation
+- Implement command search/filtering in interactive mode
+- Add history of recent commands in interactive menu
+### CLI Usability
+- Add shell completion support (bash, zsh, fish)
+- Implement command aliases (e.g., `claude-evolve i` for `ideate`)
+- Add progress bars for long-running operations
+- Colorized output with configurable themes
+- Implement timeout presets (--timeout-short, --timeout-medium, --timeout-long) for common use cases
+- Add timeout estimation based on historical evaluator performance
+- Create timeout warnings when approaching the limit during evaluation
+- Add configurable default timeout in project configuration file
+### Ideation Enhancements
+- Add a `--from-file` option to ideate command for bulk importing ideas
+- Implement idea similarity detection using embeddings or simple text comparison
+- Add progress bar for multi-idea generation
+- Create idea templates for common algorithm patterns
+- Add support for idea categories or tags for better organization
+- Implement idea rating/scoring before evaluation
+- Add interactive mode for refining AI-generated ideas
+- Cache BRIEF.md content to improve performance
+## Testing Framework Enhancements
+### Test Coverage
+- Add integration tests for template copying functionality
+- Implement test mocks for Claude API calls
+- Add performance/benchmark tests for CLI operations
+- Create end-to-end workflow tests
+- Add comprehensive unit tests for CSV manipulation functions in lib/common.sh
+- Fix run command implementation to resolve test failures (prioritize over environment blame)
+- Add tests for concurrent execution scenarios when parallel mode is implemented
+- Create stress tests for large CSV files and many candidates
+- Implement proper error handling in cmd_run to prevent silent failures
+- Add debugging output to understand why tests are failing in npm test environment
+### Test Infrastructure
+- Add test coverage reporting
+- Implement parallel test execution
+- Add visual regression testing for generated charts
+- Create test data generators and fixtures
+## Development Workflow
+### Code Quality
+- Add more sophisticated pre-commit hooks
+- Add pre-commit hook to run shellcheck and catch linting issues before commits
+- Implement automated dependency vulnerability scanning
+- Add code complexity analysis
+- Create automated documentation generation
+- Add automatic changelog generation from conventional commits
+- Implement semantic versioning based on conventional commit types
+- Consider adding commit message linting for conventional commit standards (✅ COMPLETED)
+- Add git hook integrity checks to prevent legacy hook conflicts
+- Implement automated commit message template generation for consistency
+### Build System
+- Add Docker containerization for consistent development environment
+- Implement cross-platform build verification
+- Add automated changelog generation
+- Create release automation workflows
+## Future Phase Ideas
+### Enhanced Error Handling
+- Implement structured error codes and recovery suggestions
+- Add error telemetry collection (with privacy controls)
+- Create error reproduction scripts for debugging
+- Add graceful degradation modes
+### Configuration System
+- Add configuration file support (.claude-evolve.json)
+- Implement environment-specific configurations
+- Add configuration validation and migration tools
+- Create configuration templates for common scenarios
+### Monitoring and Observability
+- Add execution time tracking and optimization suggestions
+- Implement resource usage monitoring (memory, CPU)
+- Create performance regression detection
+### Testing Infrastructure Improvements
+- **Automated Testing Matrix**: Set up GitHub Actions CI pipeline with multiple OS testing (Ubuntu, macOS, Windows WSL)
+- **Shell Script Coverage**: Implement code coverage reporting for shell scripts using tools like bashcov or kcov
+- **Performance Benchmarking**: Add automated performance tests to detect CLI execution speed regressions
+- **Integration Test Environments**: Create Docker-based test environments for consistent testing across platforms
+- **Test Data Management**: Implement test fixture management for reproducible testing scenarios
+- **Parallel Test Execution**: Optimize test suite execution time through parallel test running
+- **Test Result Reporting**: Add comprehensive test result reporting with trend analysis
+- **Mock Service Improvements**: Enhance Claude API mocking with more realistic response scenarios and error conditions
+- **Bats Environment Documentation**: Document the TMPDIR requirements for Bats tests in the README
+- **Cross-platform Test Compatibility**: Verify TMPDIR solution works across different platforms
+- **Test Runner Consolidation**: Consider whether to maintain both Bats and shell-based test runners
+### Enhanced Timeout Management
+- **Granular Timeout Controls**: Support timeout specification in minutes/hours (e.g., `--timeout 5m`, `--timeout 2h`)
+- **Process Group Management**: Implement proper process group cleanup to handle evaluators that spawn subprocesses
+- **Timeout Recovery Strategies**: Add automatic retry mechanisms for timeout scenarios with backoff logic
+- **Cross-platform Timeout**: Ensure consistent timeout behavior across Linux, macOS, and Windows WSL environments
+- **Timeout Monitoring**: Add real-time timeout countdown display during evaluation execution
+- **Smart Timeout Recommendations**: Analyze historical evaluation times to suggest optimal timeout values
+- Add execution analytics and insights
+- Implement CSV schema validation to catch column mismatch issues at runtime
+- Consider using a more robust CSV parsing library or approach to prevent manual column indexing errors
+## Architecture Improvements
+### Modularity
+- Extract common CLI patterns into reusable library
+- Implement plugin architecture for extensibility
+- Add support for custom command extensions
+- Create standardized interfaces for evaluators
+### Performance
+- Implement caching for frequently accessed data
+- Add lazy loading for heavy operations
+- Optimize JSON parsing and file operations
+- Create efficient batch processing modes
+## Documentation and User Experience
+### Documentation
+- Add man page generation
+- Create interactive tutorial mode
+- Implement contextual help system
+- Add troubleshooting guides and FAQ
+### User Experience
+- Add onboarding wizard for new projects
+- Implement project templates and examples
+- Create guided workflow suggestions
+- Add undo/rollback functionality for destructive operations
+## Repository Management
+### Branch Protection Enhancements
+- Consider adding required status checks once CI/CD is implemented in Phase 7
+- Evaluate enabling linear history requirement to simplify merge scenarios
+- Add automated branch protection rule updates when new CI checks are added
+- Implement branch protection rule validation/testing to ensure proper configuration
+- Consider adding protection for other important branches (develop, release branches)
+- Add monitoring/alerting for branch protection rule changes