npm - agentic-loop - Versions diffs - 3.10.2 → 3.11.0 - Mend

agentic-loop 3.10.2 → 3.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/.claude/commands/api.md +496 -0
package/.claude/commands/aws.md +408 -0
package/bin/ralph.sh +2 -1
package/package.json +2 -1
package/ralph/code-check.sh +307 -0
package/ralph/loop.sh +80 -27
package/ralph/prd-check.sh +498 -0
package/ralph/utils.sh +66 -351
package/templates/config/elixir.json +1 -1
package/templates/config/fastmcp.json +1 -1
package/templates/config/fullstack.json +1 -1
package/templates/config/go.json +1 -1
package/templates/config/minimal.json +1 -1
package/templates/config/node.json +1 -1
package/templates/config/python.json +1 -1
package/templates/config/rust.json +1 -1
package/ralph/verify.sh +0 -106

package/ralph/code-check.sh ADDED Viewed

@@ -0,0 +1,307 @@
+#!/usr/bin/env bash
+# shellcheck shell=bash
+#
+# code-check.sh - Code verification pipeline for Ralph autonomous development loop
+#
+# ============================================================================
+# OVERVIEW
+# ============================================================================
+# After Claude writes code for a story, this pipeline verifies the work before
+# marking it complete. If verification fails, context is saved and Claude
+# retries with knowledge of what went wrong.
+#
+# Philosophy: Claude handles complex verification (visual, UX, logic) using
+# MCP browser tools. Ralph handles deterministic checks (lint, tests, commands).
+#
+# ============================================================================
+# VERIFICATION PIPELINE (5 steps, fail-fast)
+# ============================================================================
+#
+#   [1/5] Lint checks      - ESLint, Ruff, golangci-lint, etc.
+#   [2/5] Tests            - Verify test files exist + run unit tests
+#   [3/5] PRD test steps   - Execute testSteps commands from prd.json
+#   [4/5] API smoke test   - Hit health endpoint (if configured)
+#   [5/5] Frontend smoke   - Load page, check for errors (if configured)
+#
+# Pipeline stops at first failure to save time.
+#
+# ============================================================================
+# FAILURE CONTEXT & LEARNING
+# ============================================================================
+# When verification fails, save_failure_context() ACCUMULATES errors across
+# retries (not just the last failure). This lets Claude see patterns:
+#
+#   === Attempt 1 failed for TASK-001 ===
+#   ERROR: relation "users" does not exist
+#   ---
+#   === Attempt 2 failed for TASK-001 ===
+#   ERROR: relation "users" does not exist
+#   ---
+#
+# Seeing "same error 3 times" signals a structural issue (missing migration,
+# wrong prerequisites) rather than a simple bug to fix.
+#
+# Context is:
+#   - Appended per attempt (not overwritten)
+#   - Capped at 200 lines to avoid huge prompts
+#   - Cleared when switching to a new story
+#   - Cleared on success
+#
+# STRUCTURAL ERROR DETECTION:
+# Some errors indicate structural issues (not code bugs) that can't be fixed
+# by retrying. These are detected and flagged with actionable suggestions:
+#
+#   - "column does not exist" → Suggest DB reset (schema mismatch)
+#   - "pending migration"     → Suggest running migrations
+#   - "connection refused"    → Suggest starting services
+#
+# This prevents infinite retry loops on issues that need manual intervention.
+#
+# ============================================================================
+# CONFIGURATION (via .ralph/config.json)
+# ============================================================================
+#
+#   .checks.lint        - Run linting (default: true)
+#   .checks.test        - Run tests: true, false, or "final" (last story only)
+#   .checks.requireTests - Require test files for new code (default: false)
+#   .api.baseUrl        - API URL for smoke tests
+#   .api.healthEndpoint - Health check path (default: /api/v1/health)
+#   .urls.frontend      - Frontend URL for smoke tests
+#
+# ============================================================================
+# MODULES
+# ============================================================================
+#   verify/lint.sh  - Linting and auto-fix (run_configured_checks)
+#   verify/tests.sh - Test existence + execution (verify_test_files_exist,
+#                     run_unit_tests, verify_prd_criteria)
+#   verify/api.sh   - API/frontend smoke tests (run_api_smoke_test,
+#                     run_frontend_smoke_test)
+#
+# DEPENDENCIES: Requires utils.sh to be sourced first (for get_config, print_*)
+#
+# ============================================================================
+# Source verification modules
+VERIFY_DIR="${RALPH_LIB:-$(dirname "${BASH_SOURCE[0]}")}"
+source "$VERIFY_DIR/verify/lint.sh"
+source "$VERIFY_DIR/verify/tests.sh"
+source "$VERIFY_DIR/verify/api.sh"
+run_verification() {
+  local story="$1"
+  echo ""
+  print_info "=== Verification: $story ==="
+  echo ""
+  # Get story type for targeted checks
+  local story_type
+  story_type=$(jq -r --arg id "$story" '.stories[] | select(.id==$id) | .type // "general"' "$RALPH_DIR/prd.json" 2>/dev/null)
+  export RALPH_STORY_TYPE="$story_type"
+  local failed=0
+  # ========================================
+  # STEP 1: Run lint checks
+  # ========================================
+  echo "  [1/5] Running lint checks..."
+  if ! run_configured_checks "$story_type"; then
+    failed=1
+  fi
+  # ========================================
+  # STEP 2: Verify tests exist + run them
+  # ========================================
+  if [[ $failed -eq 0 ]]; then
+    echo ""
+    echo "  [2/5] Running tests..."
+    # First check that test files exist for new code
+    if ! verify_test_files_exist; then
+      failed=1
+    elif ! run_unit_tests; then
+      failed=1
+    fi
+  fi
+  # ========================================
+  # STEP 3: Run PRD test steps
+  # ========================================
+  if [[ $failed -eq 0 ]]; then
+    echo ""
+    echo "  [3/5] Running PRD test steps..."
+    if ! verify_prd_criteria "$story"; then
+      failed=1
+    fi
+  fi
+  # ========================================
+  # STEP 4: API smoke test (if configured)
+  # ========================================
+  if [[ $failed -eq 0 ]]; then
+    if ! run_api_smoke_test "$story"; then
+      failed=1
+    fi
+  fi
+  # ========================================
+  # STEP 5: Frontend smoke test (if configured)
+  # ========================================
+  if [[ $failed -eq 0 ]]; then
+    if ! run_frontend_smoke_test "$story"; then
+      failed=1
+    fi
+  fi
+  # ========================================
+  # Final result
+  # ========================================
+  echo ""
+  if [[ $failed -eq 0 ]]; then
+    print_success "=== All verification passed ==="
+    return 0
+  else
+    print_error "=== Verification failed ==="
+    save_failure_context "$story"
+    return 1
+  fi
+}
+# ============================================================================
+# FAILURE CONTEXT
+# ============================================================================
+# Accumulates failure history across retries so Claude can identify patterns.
+# If the same error appears multiple times, it's likely a structural issue
+# (missing prerequisites, wrong approach) not a simple bug.
+#
+# Output format in .ralph/last_failure.txt:
+#   === Attempt 1 failed for STORY-ID ===
+#   <verification output>
+#   ---
+#   === Attempt 2 failed for STORY-ID ===
+#   <verification output>
+#   ---
+#
+save_failure_context() {
+  local story="$1"
+  local context_file="$RALPH_DIR/last_failure.txt"
+  # Get current attempt number from prd.json
+  local attempt
+  attempt=$(jq -r --arg id "$story" '.stories[] | select(.id==$id) | .retryCount // 1' "$RALPH_DIR/prd.json" 2>/dev/null || echo "1")
+  # Append to failure history (not overwrite)
+  {
+    echo ""
+    echo "=== Attempt $attempt failed for $story ==="
+    echo ""
+    if [[ -f "$RALPH_DIR/last_verification.log" ]]; then
+      # Shorter excerpt per attempt since we're accumulating
+      tail -50 "$RALPH_DIR/last_verification.log"
+    fi
+    echo ""
+    echo "---"
+  } >> "$context_file"
+  # Cap file size - keep last ~200 lines to avoid huge prompts
+  if [[ -f "$context_file" ]]; then
+    local line_count
+    line_count=$(wc -l < "$context_file" | tr -d ' ')
+    if [[ $line_count -gt 200 ]]; then
+      tail -200 "$context_file" > "$context_file.tmp" && mv "$context_file.tmp" "$context_file"
+    fi
+  fi
+  # Detect structural errors and add actionable suggestions
+  _detect_structural_errors "$context_file"
+}
+# ============================================================================
+# STRUCTURAL ERROR DETECTION
+# ============================================================================
+# Detects error patterns that indicate structural issues (not code bugs).
+# These can't be fixed by retrying - they need specific actions like DB reset.
+#
+_detect_structural_errors() {
+  local context_file="$1"
+  [[ ! -f "$context_file" ]] && return
+  local error_content
+  error_content=$(cat "$context_file")
+  # Schema/column errors - suggest DB reset
+  # Only show if not already detected (avoid duplicate markers on retry)
+  if echo "$error_content" | grep -qiE "(column.*does not exist|relation.*does not exist|no such column|unknown column|undefined column)" && \
+     ! grep -q ">>> STRUCTURAL ISSUE: Database schema mismatch" "$context_file" 2>/dev/null; then
+    echo ""
+    print_warning "STRUCTURAL ISSUE DETECTED: Database schema mismatch"
+    echo ""
+    echo "  The test database is missing columns/tables that the code expects."
+    echo "  This usually happens when:"
+    echo "    - Migrations were added but test DB wasn't reset"
+    echo "    - Models were modified without running migrations"
+    echo ""
+    echo "  SUGGESTED FIX (don't retry code - fix the schema):"
+    local reset_cmd
+    reset_cmd=$(get_config '.commands.resetDb' "")
+    if [[ -n "$reset_cmd" ]]; then
+      echo "    $reset_cmd"
+    else
+      echo "    # Add to .ralph/config.json:"
+      echo "    {\"commands\": {\"resetDb\": \"npm run db:reset:test\"}}"
+      echo ""
+      echo "    # Or run manually:"
+      echo "    dropdb test_db && createdb test_db && alembic upgrade head"
+    fi
+    echo ""
+    # Append suggestion to failure context for Claude
+    {
+      echo ""
+      echo ">>> STRUCTURAL ISSUE: Database schema mismatch"
+      echo ">>> ACTION NEEDED: Reset test database, don't just retry code"
+      echo ">>> This is NOT a code bug - the test DB is missing schema changes"
+    } >> "$context_file"
+  fi
+  # Migration pending errors
+  if echo "$error_content" | grep -qiE "(pending migration|migrations are pending|migrate your database|alembic.*head)" && \
+     ! grep -q ">>> STRUCTURAL ISSUE: Pending migrations" "$context_file" 2>/dev/null; then
+    echo ""
+    print_warning "STRUCTURAL ISSUE DETECTED: Pending migrations"
+    echo ""
+    echo "  Migrations need to be applied before tests can run."
+    echo ""
+    echo "  SUGGESTED FIX:"
+    local migrate_cmd
+    migrate_cmd=$(get_config '.migrations.command' "alembic upgrade head")
+    echo "    $migrate_cmd"
+    echo ""
+    {
+      echo ""
+      echo ">>> STRUCTURAL ISSUE: Pending migrations"
+      echo ">>> ACTION NEEDED: Run migrations before retrying"
+    } >> "$context_file"
+  fi
+  # Connection refused - service not running
+  if echo "$error_content" | grep -qiE "(connection refused|ECONNREFUSED|could not connect|connection error)" && \
+     ! grep -q ">>> STRUCTURAL ISSUE: Service not running" "$context_file" 2>/dev/null; then
+    echo ""
+    print_warning "STRUCTURAL ISSUE DETECTED: Service not running"
+    echo ""
+    echo "  A required service (database, API, etc.) is not running."
+    echo ""
+    echo "  SUGGESTED FIX:"
+    local dev_cmd
+    dev_cmd=$(get_config '.commands.dev' "docker compose up -d")
+    echo "    $dev_cmd"
+    echo ""
+    {
+      echo ""
+      echo ">>> STRUCTURAL ISSUE: Service not running"
+      echo ">>> ACTION NEEDED: Start required services before retrying"
+    } >> "$context_file"
+  fi
+}

package/ralph/loop.sh CHANGED Viewed

@@ -178,7 +178,12 @@ run_loop() {
   local iteration=0
   local last_story=""
   local consecutive_failures=0
-  local max_story_retries=5
+  local consecutive_timeouts=0
+  local max_story_retries
+  local max_timeouts=5  # Skip after 5 consecutive timeouts (likely too large/complex)
+  # Default to 15 retries - generous enough for transient issues, catches infinite loops
+  # Override with config.json: "maxStoryRetries": 25
+  max_story_retries=$(get_config '.maxStoryRetries' "15")
   local total_attempts=0
   local skipped_stories=()
   local start_time
@@ -226,34 +231,58 @@ run_loop() {
     ((total_attempts++))
-    # Track repeated failures on same story
+    # Track repeated failures on same story (also load from prd.json for restart persistence)
     if [[ "$story" == "$last_story" ]]; then
       ((consecutive_failures++))
-      # Circuit breaker: skip to next story after max retries
-      if [[ $consecutive_failures -gt $max_story_retries ]]; then
-        print_error "Circuit breaker: $story failed $max_story_retries times, skipping to next story"
-        echo ""
-        echo "  Saved failure context to: $RALPH_DIR/failures/$story.txt"
-        mkdir -p "$RALPH_DIR/failures"
-        cp "$RALPH_DIR/last_failure.txt" "$RALPH_DIR/failures/$story.txt" 2>/dev/null || true
-        # Clear failure context so it doesn't leak into next story
-        rm -f "$RALPH_DIR/last_failure.txt"
-        skipped_stories+=("$story")
-        # Mark as skipped (not passed, but move on)
-        jq --arg id "$story" '(.stories[] | select(.id==$id)) |= . + {skipped: true}' "$RALPH_DIR/prd.json" > "$RALPH_DIR/prd.json.tmp" && mv "$RALPH_DIR/prd.json.tmp" "$RALPH_DIR/prd.json"
-        last_story=""
-        consecutive_failures=0
-        continue
-      fi
-      # Quick retry - no delay needed (Claude API isn't rate-limited)
-      print_warning "Retry $consecutive_failures/$max_story_retries for $story"
     else
-      consecutive_failures=1
+      # New story - clear failure history from previous story
+      rm -f "$RALPH_DIR/last_failure.txt"
+      # Load retry count from prd.json (persists across restarts)
+      consecutive_failures=$(jq -r --arg id "$story" '.stories[] | select(.id==$id) | .retryCount // 0' "$RALPH_DIR/prd.json")
+      consecutive_failures=$((consecutive_failures + 1))
+      consecutive_timeouts=0
       last_story="$story"
     fi
+    # Persist retry count to prd.json (survives restarts)
+    jq --arg id "$story" --argjson count "$consecutive_failures" \
+      '(.stories[] | select(.id==$id)) |= . + {retryCount: $count}' \
+      "$RALPH_DIR/prd.json" > "$RALPH_DIR/prd.json.tmp" && mv "$RALPH_DIR/prd.json.tmp" "$RALPH_DIR/prd.json"
+    # Circuit breaker: skip to next story after max retries (prevents infinite loops)
+    # Note: This is NOT meant to stop legitimate retrying - 15 attempts is generous.
+    # If a story consistently fails after this many tries, it likely needs manual review
+    # (vague test steps, missing prerequisites, or fundamentally broken requirements).
+    if [[ $consecutive_failures -gt $max_story_retries ]]; then
+      print_error "Story $story has failed $consecutive_failures times - likely needs manual review"
+      echo ""
+      echo "  This usually means:"
+      echo "    - Test steps are too vague or ambiguous"
+      echo "    - Missing prerequisites (DB setup, env vars, etc.)"
+      echo "    - Story scope is too large - consider breaking it up"
+      echo ""
+      echo "  Failure context saved to: $RALPH_DIR/failures/$story.txt"
+      mkdir -p "$RALPH_DIR/failures"
+      cp "$RALPH_DIR/last_failure.txt" "$RALPH_DIR/failures/$story.txt" 2>/dev/null || true
+      rm -f "$RALPH_DIR/last_failure.txt"
+      skipped_stories+=("$story")
+      jq --arg id "$story" '(.stories[] | select(.id==$id)) |= . + {skipped: true, skipReason: "exceeded max retries"}' "$RALPH_DIR/prd.json" > "$RALPH_DIR/prd.json.tmp" && mv "$RALPH_DIR/prd.json.tmp" "$RALPH_DIR/prd.json"
+      last_story=""
+      consecutive_failures=0
+      continue
+    fi
+    # Show retry status (but don't make it scary - retrying is normal!)
+    if [[ $consecutive_failures -gt 1 ]]; then
+      if [[ $consecutive_failures -le 3 ]]; then
+        print_info "Attempt $consecutive_failures for $story (normal - refining solution)"
+      elif [[ $consecutive_failures -le 8 ]]; then
+        print_warning "Attempt $consecutive_failures/$max_story_retries for $story"
+      else
+        print_warning "Attempt $consecutive_failures/$max_story_retries for $story (getting close to limit)"
+      fi
+    fi
     # 2. Session startup checklist (skip on retries)
     [[ $consecutive_failures -gt 1 ]] && startup_checklist "true" || startup_checklist "false"
@@ -373,18 +402,42 @@ run_loop() {
     fi
     if [[ $claude_exit_code -ne 0 ]]; then
-      print_warning "Claude session ended (timeout or error)"
-      log_progress "$story" "TIMEOUT" "Claude session ended after ${timeout_seconds}s"
+      ((consecutive_timeouts++))
+      print_warning "Claude session ended (timeout or error) - timeout $consecutive_timeouts/$max_timeouts"
+      log_progress "$story" "TIMEOUT" "Claude session ended after ${timeout_seconds}s (timeout $consecutive_timeouts)"
       rm -f "$prompt_file"
       # Session may be broken - reset for next attempt
       session_started=false
+      # Skip on repeated timeouts (story is too large/complex for single session)
+      if [[ $consecutive_timeouts -ge $max_timeouts ]]; then
+        print_error "Story $story timed out $max_timeouts times - needs to be broken up"
+        echo ""
+        echo "  Consecutive timeouts indicate the story is too large for a single"
+        echo "  Claude session (${timeout_seconds}s). Consider:"
+        echo "    - Breaking it into smaller, focused stories"
+        echo "    - Increasing maxSessionSeconds in config.json"
+        echo ""
+        mkdir -p "$RALPH_DIR/failures"
+        echo "Story $story timed out $max_timeouts consecutive times (${timeout_seconds}s each)" > "$RALPH_DIR/failures/$story.txt"
+        echo "Consider breaking this story into smaller pieces." >> "$RALPH_DIR/failures/$story.txt"
+        skipped_stories+=("$story")
+        jq --arg id "$story" '(.stories[] | select(.id==$id)) |= . + {skipped: true, skipReason: "repeated timeouts"}' "$RALPH_DIR/prd.json" > "$RALPH_DIR/prd.json.tmp" && mv "$RALPH_DIR/prd.json.tmp" "$RALPH_DIR/prd.json"
+        last_story=""
+        consecutive_failures=0
+        consecutive_timeouts=0
+        continue
+      fi
       # If running specific story, exit on failure
       [[ -n "$specific_story" ]] && return 1
       continue
     fi
+    # Reset timeout counter on successful Claude run
+    consecutive_timeouts=0
     rm -f "$prompt_file"
     session_started=true  # Mark session as active for subsequent stories
@@ -402,9 +455,9 @@ run_loop() {
     local verify_log="$RALPH_DIR/last_verification.log"
     set -o pipefail
     if run_verification "$story" 2>&1 | tee "$verify_log"; then
-      # Mark story as complete
+      # Mark story as complete and reset retry count
       update_json "$RALPH_DIR/prd.json" \
-        --arg id "$story" '(.stories[] | select(.id==$id) | .passes) = true'
+        --arg id "$story" '(.stories[] | select(.id==$id)) |= . + {passes: true, retryCount: 0}'
       # Clear failure context on success
       rm -f "$RALPH_DIR/last_failure.txt"