npm - bmalph - Versions diffs - 2.9.0 → 2.11.0 - Mend

bmalph 2.9.0 → 2.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/README.md +7 -7
package/dist/cli.js +2 -1
package/dist/commands/doctor-checks.js +28 -0
package/dist/commands/doctor.js +2 -1
package/dist/commands/run.js +34 -17
package/dist/installer/template-files.js +29 -0
package/dist/run/ralph-process.js +20 -3
package/dist/run/run-dashboard.js +6 -5
package/dist/transition/context-output.js +3 -5
package/dist/transition/context.js +54 -55
package/dist/transition/fix-plan-sync.js +3 -2
package/dist/transition/fix-plan.js +20 -0
package/dist/transition/section-patterns.js +1 -1
package/dist/transition/specs-index.js +2 -2
package/dist/utils/format-status.js +13 -0
package/dist/watch/renderer.js +2 -1
package/dist/watch/state-reader.js +2 -0
package/package.json +1 -1
package/ralph/drivers/DRIVER_INTERFACE.md +422 -0
package/ralph/drivers/codex.sh +2 -2
package/ralph/lib/response_analyzer.sh +114 -102
package/ralph/ralph_loop.sh +318 -31
package/ralph/templates/PROMPT.md +15 -8
package/ralph/templates/ralphrc.template +8 -3

package/ralph/lib/response_analyzer.sh CHANGED Viewed

@@ -762,14 +762,14 @@ parse_json_response() {
     local summary_has_no_work_pattern="false"
     if [[ "$response_shape" == "codex_jsonl" || "$response_shape" == "opencode_jsonl" || "$response_shape" == "cursor_stream_jsonl" ]] && [[ "$explicit_exit_signal_found" != "true" && -n "$summary" ]]; then
         for keyword in "${COMPLETION_KEYWORDS[@]}"; do
-            if echo "$summary" | grep -qi "$keyword"; then
+            if echo "$summary" | grep -qiw "$keyword"; then
                 summary_has_completion_keyword="true"
                 break
             fi
         done
         for pattern in "${NO_WORK_PATTERNS[@]}"; do
-            if echo "$summary" | grep -qi "$pattern"; then
+            if echo "$summary" | grep -qiw "$pattern"; then
                 summary_has_no_work_pattern="true"
                 break
             fi
@@ -817,14 +817,6 @@ parse_json_response() {
         has_completion_signal="true"
     fi
-    # Boost confidence based on structured data availability
-    if [[ "$has_result_field" == "true" ]]; then
-        confidence=$((confidence + 20))  # Structured response boost
-    fi
-    if [[ $progress_count -gt 0 ]]; then
-        confidence=$((confidence + progress_count * 5))  # Progress indicators boost
-    fi
     # Write normalized result using jq for safe JSON construction
     # String fields use --arg (auto-escapes), numeric/boolean use --argjson
     jq -n \
@@ -844,12 +836,14 @@ parse_json_response() {
         --argjson permission_denial_count "$permission_denial_count" \
         --argjson denied_commands "$denied_commands_json" \
         --arg tests_status "$tests_status" \
+        --argjson has_result_field "$has_result_field" \
         '{
             status: $status,
             exit_signal: $exit_signal,
             is_test_only: $is_test_only,
             is_stuck: $is_stuck,
             has_completion_signal: $has_completion_signal,
+            has_result_field: $has_result_field,
             files_modified: $files_modified,
             error_count: $error_count,
             summary: $summary,
@@ -888,6 +882,7 @@ analyze_response() {
     local has_progress=false
     local confidence_score=0
     local exit_signal=false
+    local format_confidence=0
     local work_summary=""
     local files_modified=0
     local tasks_completed_this_loop=0
@@ -920,6 +915,7 @@ analyze_response() {
             tasks_completed_this_loop=$(jq -r -j '.tasks_completed_this_loop // 0' "$json_parse_result_file" 2>/dev/null || echo "0")
             tests_status=$(jq -r -j '.tests_status // "UNKNOWN"' "$json_parse_result_file" 2>/dev/null || echo "UNKNOWN")
             local json_confidence=$(jq -r -j '.confidence' "$json_parse_result_file" 2>/dev/null || echo "0")
+            local json_has_result_field=$(jq -r -j '.has_result_field' "$json_parse_result_file" 2>/dev/null || echo "false")
             local session_id=$(jq -r -j '.session_id' "$json_parse_result_file" 2>/dev/null || echo "")
             # Extract permission denial fields (Issue #101)
@@ -933,11 +929,16 @@ analyze_response() {
                 [[ "${VERBOSE_PROGRESS:-}" == "true" ]] && echo "DEBUG: Persisted session ID: $session_id" >&2
             fi
-            # JSON parsing provides high confidence
+            # Separate format confidence from completion confidence (Issue #124)
+            if [[ "$json_has_result_field" == "true" ]]; then
+                format_confidence=100
+            else
+                format_confidence=80
+            fi
             if [[ "$exit_signal" == "true" ]]; then
                 confidence_score=100
             else
-                confidence_score=$((json_confidence + 50))
+                confidence_score=$json_confidence
             fi
             if [[ ! "$tasks_completed_this_loop" =~ ^-?[0-9]+$ ]]; then
@@ -993,6 +994,7 @@ analyze_response() {
                 --argjson is_stuck "$is_stuck" \
                 --argjson has_progress "$has_progress" \
                 --argjson files_modified "$files_modified" \
+                --argjson format_confidence "$format_confidence" \
                 --argjson confidence_score "$confidence_score" \
                 --argjson exit_signal "$exit_signal" \
                 --argjson tasks_completed_this_loop "$tasks_completed_this_loop" \
@@ -1013,6 +1015,7 @@ analyze_response() {
                         is_stuck: $is_stuck,
                         has_progress: $has_progress,
                         files_modified: $files_modified,
+                        format_confidence: $format_confidence,
                         confidence_score: $confidence_score,
                         exit_signal: $exit_signal,
                         tasks_completed_this_loop: $tasks_completed_this_loop,
@@ -1035,13 +1038,16 @@ analyze_response() {
     # Text parsing fallback (original logic)
-    # Track whether an explicit EXIT_SIGNAL was found in RALPH_STATUS block
-    # If explicit signal found, heuristics should NOT override Claude's intent
-    local explicit_exit_signal_found=false
-    # 1. Check for explicit structured output (if Claude follows schema)
+    # 1. Check for explicit structured output (RALPH_STATUS block)
+    # When a status block is present, it is authoritative — skip all heuristics.
+    # A structurally valid but field-empty block results in exit_signal=false,
+    # confidence=0 by design (AI produced a block but provided no signal).
+    local ralph_status_block_found=false
     local ralph_status_json=""
     if ralph_status_json=$(extract_ralph_status_block_json "$output_content" 2>/dev/null); then
+        ralph_status_block_found=true
+        format_confidence=70
         local status
         status=$(printf '%s' "$ralph_status_json" | jq -r -j '.status' 2>/dev/null)
         local exit_sig_found
@@ -1062,14 +1068,14 @@ analyze_response() {
         # If EXIT_SIGNAL is explicitly provided, respect it
         if [[ "$exit_sig_found" == "true" ]]; then
-            explicit_exit_signal_found=true
             if [[ "$exit_sig" == "true" ]]; then
                 has_completion_signal=true
                 exit_signal=true
                 confidence_score=100
             else
-                # Explicit EXIT_SIGNAL: false - Claude says to continue
+                # Explicit EXIT_SIGNAL: false — Claude says to continue
                 exit_signal=false
+                confidence_score=80
             fi
         elif [[ "$status" == "COMPLETE" ]]; then
             # No explicit EXIT_SIGNAL but STATUS is COMPLETE
@@ -1077,68 +1083,94 @@ analyze_response() {
             exit_signal=true
             confidence_score=100
         fi
+        # is_test_only and is_stuck stay false (defaults) — status block is authoritative
     fi
-    # 2. Detect completion keywords in natural language output
-    for keyword in "${COMPLETION_KEYWORDS[@]}"; do
-        if grep -qi "$keyword" "$output_file"; then
-            has_completion_signal=true
-            ((confidence_score+=10))
-            break
-        fi
-    done
+    if [[ "$ralph_status_block_found" != "true" ]]; then
+        # No status block found — fall back to heuristic analysis
+        format_confidence=30
-    # 3. Detect test-only loops
-    local test_command_count=0
-    local implementation_count=0
-    local error_count=0
+        # 2. Detect completion keywords in natural language output
+        for keyword in "${COMPLETION_KEYWORDS[@]}"; do
+            if grep -qiw "$keyword" "$output_file"; then
+                has_completion_signal=true
+                ((confidence_score+=10))
+                break
+            fi
+        done
-    test_command_count=$(grep -c -i "running tests\|npm test\|bats\|pytest\|jest" "$output_file" 2>/dev/null | head -1 || echo "0")
-    implementation_count=$(grep -c -i "implementing\|creating\|writing\|adding\|function\|class" "$output_file" 2>/dev/null | head -1 || echo "0")
+        # 3. Detect test-only loops
+        local test_command_count=0
+        local implementation_count=0
+        local error_count=0
-    # Strip whitespace and ensure it's a number
-    test_command_count=$(echo "$test_command_count" | tr -d '[:space:]')
-    implementation_count=$(echo "$implementation_count" | tr -d '[:space:]')
+        test_command_count=$(grep -c -i "running tests\|npm test\|bats\|pytest\|jest" "$output_file" 2>/dev/null | head -1 || echo "0")
+        implementation_count=$(grep -c -i "implementing\|creating\|writing\|adding\|function\|class" "$output_file" 2>/dev/null | head -1 || echo "0")
-    # Convert to integers with default fallback
-    test_command_count=${test_command_count:-0}
-    implementation_count=${implementation_count:-0}
-    test_command_count=$((test_command_count + 0))
-    implementation_count=$((implementation_count + 0))
+        # Strip whitespace and ensure it's a number
+        test_command_count=$(echo "$test_command_count" | tr -d '[:space:]')
+        implementation_count=$(echo "$implementation_count" | tr -d '[:space:]')
-    if [[ $test_command_count -gt 0 ]] && [[ $implementation_count -eq 0 ]]; then
-        is_test_only=true
-        work_summary="Test execution only, no implementation"
-    fi
+        # Convert to integers with default fallback
+        test_command_count=${test_command_count:-0}
+        implementation_count=${implementation_count:-0}
+        test_command_count=$((test_command_count + 0))
+        implementation_count=$((implementation_count + 0))
-    # 4. Detect stuck/error loops
-    # Use two-stage filtering to avoid counting JSON field names as errors
-    # Stage 1: Filter out JSON field patterns like "is_error": false
-    # Stage 2: Count actual error messages in specific contexts
-    # Pattern aligned with ralph_loop.sh to ensure consistent behavior
-    error_count=$(grep -v '"[^"]*error[^"]*":' "$output_file" 2>/dev/null | \
-                  grep -cE '(^Error:|^ERROR:|^error:|\]: error|Link: error|Error occurred|failed with error|[Ee]xception|Fatal|FATAL)' \
-                  2>/dev/null || echo "0")
-    error_count=$(echo "$error_count" | tr -d '[:space:]')
-    error_count=${error_count:-0}
-    error_count=$((error_count + 0))
+        if [[ $test_command_count -gt 0 ]] && [[ $implementation_count -eq 0 ]]; then
+            is_test_only=true
+            work_summary="Test execution only, no implementation"
+        fi
-    if [[ $error_count -gt 5 ]]; then
-        is_stuck=true
-    fi
+        # 4. Detect stuck/error loops
+        # Use two-stage filtering to avoid counting JSON field names as errors
+        # Stage 1: Filter out JSON field patterns like "is_error": false
+        # Stage 2: Count actual error messages in specific contexts
+        # Pattern aligned with ralph_loop.sh to ensure consistent behavior
+        error_count=$(grep -v '"[^"]*error[^"]*":' "$output_file" 2>/dev/null | \
+                      grep -cE '(^Error:|^ERROR:|^error:|\]: error|Link: error|Error occurred|failed with error|[Ee]xception|Fatal|FATAL)' \
+                      2>/dev/null || echo "0")
+        error_count=$(echo "$error_count" | tr -d '[:space:]')
+        error_count=${error_count:-0}
+        error_count=$((error_count + 0))
+        if [[ $error_count -gt 5 ]]; then
+            is_stuck=true
+        fi
-    # 5. Detect "nothing to do" patterns
-    for pattern in "${NO_WORK_PATTERNS[@]}"; do
-        if grep -qi "$pattern" "$output_file"; then
-            has_completion_signal=true
-            ((confidence_score+=15))
-            work_summary="No work remaining"
-            break
+        # 5. Detect "nothing to do" patterns
+        for pattern in "${NO_WORK_PATTERNS[@]}"; do
+            if grep -qiw "$pattern" "$output_file"; then
+                has_completion_signal=true
+                ((confidence_score+=15))
+                work_summary="No work remaining"
+                break
+            fi
+        done
+        # 7. Analyze output length trends (detect declining engagement)
+        if [[ -f "$RALPH_DIR/.last_output_length" ]]; then
+            local last_length
+            last_length=$(cat "$RALPH_DIR/.last_output_length")
+            if [[ "$last_length" -gt 0 ]]; then
+                local length_ratio=$((output_length * 100 / last_length))
+                if [[ $length_ratio -lt 50 ]]; then
+                    # Output is less than 50% of previous - possible completion
+                    ((confidence_score+=10))
+                fi
+            fi
         fi
-    done
-    # 6. Check for file changes (git integration)
-    # Fix #141: Detect both uncommitted changes AND committed changes
+        # 9. Determine exit signal based on confidence (heuristic)
+        if [[ $confidence_score -ge 40 || "$has_completion_signal" == "true" ]]; then
+            exit_signal=true
+        fi
+    fi
+    # Always persist output length for next iteration (both paths)
+    echo "$output_length" > "$RALPH_DIR/.last_output_length"
+    # 6. Check for file changes (git integration) — always runs
     if command -v git &>/dev/null && git rev-parse --git-dir >/dev/null 2>&1; then
         local loop_start_sha=""
         local current_sha=""
@@ -1170,23 +1202,15 @@ analyze_response() {
         if [[ $files_modified -gt 0 ]]; then
             has_progress=true
-            ((confidence_score+=20))
-        fi
-    fi
-    # 7. Analyze output length trends (detect declining engagement)
-    if [[ -f "$RALPH_DIR/.last_output_length" ]]; then
-        local last_length=$(cat "$RALPH_DIR/.last_output_length")
-        local length_ratio=$((output_length * 100 / last_length))
-        if [[ $length_ratio -lt 50 ]]; then
-            # Output is less than 50% of previous - possible completion
-            ((confidence_score+=10))
+            # Only boost completion confidence in heuristic path (Issue #124)
+            # RALPH_STATUS block is authoritative — git changes shouldn't inflate it
+            if [[ "$ralph_status_block_found" != "true" ]]; then
+                ((confidence_score+=20))
+            fi
         fi
     fi
-    echo "$output_length" > "$RALPH_DIR/.last_output_length"
-    # 8. Extract work summary from output
+    # 8. Extract work summary from output — always runs
     if [[ -z "$work_summary" ]]; then
         # Try to find summary in output
         work_summary=$(grep -i "summary\|completed\|implemented" "$output_file" | head -1 | cut -c 1-100)
@@ -1195,21 +1219,6 @@ analyze_response() {
         fi
     fi
-    # Explicit EXIT_SIGNAL=false means "continue working", so completion
-    # heuristics must not register a done signal.
-    if [[ "$explicit_exit_signal_found" == "true" && "$exit_signal" == "false" ]]; then
-        has_completion_signal=false
-    fi
-    # 9. Determine exit signal based on confidence (heuristic)
-    # IMPORTANT: Only apply heuristics if no explicit EXIT_SIGNAL was found in RALPH_STATUS
-    # Claude's explicit intent takes precedence over natural language pattern matching
-    if [[ "$explicit_exit_signal_found" != "true" ]]; then
-        if [[ $confidence_score -ge 40 || "$has_completion_signal" == "true" ]]; then
-            exit_signal=true
-        fi
-    fi
     local has_permission_denials=false
     local permission_denial_count=0
     local denied_commands_json='[]'
@@ -1232,6 +1241,7 @@ analyze_response() {
         --argjson is_stuck "$is_stuck" \
         --argjson has_progress "$has_progress" \
         --argjson files_modified "$files_modified" \
+        --argjson format_confidence "$format_confidence" \
         --argjson confidence_score "$confidence_score" \
         --argjson exit_signal "$exit_signal" \
         --argjson tasks_completed_this_loop "$tasks_completed_this_loop" \
@@ -1252,6 +1262,7 @@ analyze_response() {
                 is_stuck: $is_stuck,
                 has_progress: $has_progress,
                 files_modified: $files_modified,
+                format_confidence: $format_confidence,
                 confidence_score: $confidence_score,
                 exit_signal: $exit_signal,
                 tasks_completed_this_loop: $tasks_completed_this_loop,
@@ -1309,9 +1320,8 @@ update_exit_signals() {
     fi
     # Update completion_indicators array (only when Claude explicitly signals exit)
-    # Note: Previously used confidence >= 60, but JSON mode always has confidence >= 70
-    # due to deterministic scoring (+50 for JSON format, +20 for result field).
-    # This caused premature exits after 5 loops. Now we respect Claude's explicit intent.
+    # Note: Format confidence (parse quality) is separated from completion confidence
+    # since Issue #124. Only exit_signal drives completion indicators, not confidence score.
     local exit_signal=$(jq -r -j '.analysis.exit_signal // false' "$analysis_file")
     if [[ "$has_permission_denials" != "true" && "$has_progress_tracking_mismatch" != "true" && "$exit_signal" == "true" ]]; then
         signals=$(echo "$signals" | jq ".completion_indicators += [$loop_number]")
@@ -1338,6 +1348,7 @@ log_analysis_summary() {
     local loop=$(jq -r -j '.loop_number' "$analysis_file")
     local exit_sig=$(jq -r -j '.analysis.exit_signal' "$analysis_file")
+    local format_conf=$(jq -r -j '.analysis.format_confidence // 0' "$analysis_file")
     local confidence=$(jq -r -j '.analysis.confidence_score' "$analysis_file")
     local test_only=$(jq -r -j '.analysis.is_test_only' "$analysis_file")
     local files_changed=$(jq -r -j '.analysis.files_modified' "$analysis_file")
@@ -1347,7 +1358,8 @@ log_analysis_summary() {
     echo -e "${BLUE}║           Response Analysis - Loop #$loop                 ║${NC}"
     echo -e "${BLUE}╚════════════════════════════════════════════════════════════╝${NC}"
     echo -e "${YELLOW}Exit Signal:${NC}      $exit_sig"
-    echo -e "${YELLOW}Confidence:${NC}       $confidence%"
+    echo -e "${YELLOW}Parse quality:${NC}    $format_conf%"
+    echo -e "${YELLOW}Completion:${NC}       $confidence%"
     echo -e "${YELLOW}Test Only:${NC}        $test_only"
     echo -e "${YELLOW}Files Changed:${NC}    $files_changed"
     echo -e "${YELLOW}Summary:${NC}          $summary"