bmalph 2.9.0 → 2.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -762,14 +762,14 @@ parse_json_response() {
762
762
  local summary_has_no_work_pattern="false"
763
763
  if [[ "$response_shape" == "codex_jsonl" || "$response_shape" == "opencode_jsonl" || "$response_shape" == "cursor_stream_jsonl" ]] && [[ "$explicit_exit_signal_found" != "true" && -n "$summary" ]]; then
764
764
  for keyword in "${COMPLETION_KEYWORDS[@]}"; do
765
- if echo "$summary" | grep -qi "$keyword"; then
765
+ if echo "$summary" | grep -qiw "$keyword"; then
766
766
  summary_has_completion_keyword="true"
767
767
  break
768
768
  fi
769
769
  done
770
770
 
771
771
  for pattern in "${NO_WORK_PATTERNS[@]}"; do
772
- if echo "$summary" | grep -qi "$pattern"; then
772
+ if echo "$summary" | grep -qiw "$pattern"; then
773
773
  summary_has_no_work_pattern="true"
774
774
  break
775
775
  fi
@@ -817,14 +817,6 @@ parse_json_response() {
817
817
  has_completion_signal="true"
818
818
  fi
819
819
 
820
- # Boost confidence based on structured data availability
821
- if [[ "$has_result_field" == "true" ]]; then
822
- confidence=$((confidence + 20)) # Structured response boost
823
- fi
824
- if [[ $progress_count -gt 0 ]]; then
825
- confidence=$((confidence + progress_count * 5)) # Progress indicators boost
826
- fi
827
-
828
820
  # Write normalized result using jq for safe JSON construction
829
821
  # String fields use --arg (auto-escapes), numeric/boolean use --argjson
830
822
  jq -n \
@@ -844,12 +836,14 @@ parse_json_response() {
844
836
  --argjson permission_denial_count "$permission_denial_count" \
845
837
  --argjson denied_commands "$denied_commands_json" \
846
838
  --arg tests_status "$tests_status" \
839
+ --argjson has_result_field "$has_result_field" \
847
840
  '{
848
841
  status: $status,
849
842
  exit_signal: $exit_signal,
850
843
  is_test_only: $is_test_only,
851
844
  is_stuck: $is_stuck,
852
845
  has_completion_signal: $has_completion_signal,
846
+ has_result_field: $has_result_field,
853
847
  files_modified: $files_modified,
854
848
  error_count: $error_count,
855
849
  summary: $summary,
@@ -888,6 +882,7 @@ analyze_response() {
888
882
  local has_progress=false
889
883
  local confidence_score=0
890
884
  local exit_signal=false
885
+ local format_confidence=0
891
886
  local work_summary=""
892
887
  local files_modified=0
893
888
  local tasks_completed_this_loop=0
@@ -920,6 +915,7 @@ analyze_response() {
920
915
  tasks_completed_this_loop=$(jq -r -j '.tasks_completed_this_loop // 0' "$json_parse_result_file" 2>/dev/null || echo "0")
921
916
  tests_status=$(jq -r -j '.tests_status // "UNKNOWN"' "$json_parse_result_file" 2>/dev/null || echo "UNKNOWN")
922
917
  local json_confidence=$(jq -r -j '.confidence' "$json_parse_result_file" 2>/dev/null || echo "0")
918
+ local json_has_result_field=$(jq -r -j '.has_result_field' "$json_parse_result_file" 2>/dev/null || echo "false")
923
919
  local session_id=$(jq -r -j '.session_id' "$json_parse_result_file" 2>/dev/null || echo "")
924
920
 
925
921
  # Extract permission denial fields (Issue #101)
@@ -933,11 +929,16 @@ analyze_response() {
933
929
  [[ "${VERBOSE_PROGRESS:-}" == "true" ]] && echo "DEBUG: Persisted session ID: $session_id" >&2
934
930
  fi
935
931
 
936
- # JSON parsing provides high confidence
932
+ # Separate format confidence from completion confidence (Issue #124)
933
+ if [[ "$json_has_result_field" == "true" ]]; then
934
+ format_confidence=100
935
+ else
936
+ format_confidence=80
937
+ fi
937
938
  if [[ "$exit_signal" == "true" ]]; then
938
939
  confidence_score=100
939
940
  else
940
- confidence_score=$((json_confidence + 50))
941
+ confidence_score=$json_confidence
941
942
  fi
942
943
 
943
944
  if [[ ! "$tasks_completed_this_loop" =~ ^-?[0-9]+$ ]]; then
@@ -993,6 +994,7 @@ analyze_response() {
993
994
  --argjson is_stuck "$is_stuck" \
994
995
  --argjson has_progress "$has_progress" \
995
996
  --argjson files_modified "$files_modified" \
997
+ --argjson format_confidence "$format_confidence" \
996
998
  --argjson confidence_score "$confidence_score" \
997
999
  --argjson exit_signal "$exit_signal" \
998
1000
  --argjson tasks_completed_this_loop "$tasks_completed_this_loop" \
@@ -1013,6 +1015,7 @@ analyze_response() {
1013
1015
  is_stuck: $is_stuck,
1014
1016
  has_progress: $has_progress,
1015
1017
  files_modified: $files_modified,
1018
+ format_confidence: $format_confidence,
1016
1019
  confidence_score: $confidence_score,
1017
1020
  exit_signal: $exit_signal,
1018
1021
  tasks_completed_this_loop: $tasks_completed_this_loop,
@@ -1035,13 +1038,16 @@ analyze_response() {
1035
1038
 
1036
1039
  # Text parsing fallback (original logic)
1037
1040
 
1038
- # Track whether an explicit EXIT_SIGNAL was found in RALPH_STATUS block
1039
- # If explicit signal found, heuristics should NOT override Claude's intent
1040
- local explicit_exit_signal_found=false
1041
-
1042
- # 1. Check for explicit structured output (if Claude follows schema)
1041
+ # 1. Check for explicit structured output (RALPH_STATUS block)
1042
+ # When a status block is present, it is authoritative skip all heuristics.
1043
+ # A structurally valid but field-empty block results in exit_signal=false,
1044
+ # confidence=0 by design (AI produced a block but provided no signal).
1045
+ local ralph_status_block_found=false
1043
1046
  local ralph_status_json=""
1044
1047
  if ralph_status_json=$(extract_ralph_status_block_json "$output_content" 2>/dev/null); then
1048
+ ralph_status_block_found=true
1049
+ format_confidence=70
1050
+
1045
1051
  local status
1046
1052
  status=$(printf '%s' "$ralph_status_json" | jq -r -j '.status' 2>/dev/null)
1047
1053
  local exit_sig_found
@@ -1062,14 +1068,14 @@ analyze_response() {
1062
1068
 
1063
1069
  # If EXIT_SIGNAL is explicitly provided, respect it
1064
1070
  if [[ "$exit_sig_found" == "true" ]]; then
1065
- explicit_exit_signal_found=true
1066
1071
  if [[ "$exit_sig" == "true" ]]; then
1067
1072
  has_completion_signal=true
1068
1073
  exit_signal=true
1069
1074
  confidence_score=100
1070
1075
  else
1071
- # Explicit EXIT_SIGNAL: false - Claude says to continue
1076
+ # Explicit EXIT_SIGNAL: false Claude says to continue
1072
1077
  exit_signal=false
1078
+ confidence_score=80
1073
1079
  fi
1074
1080
  elif [[ "$status" == "COMPLETE" ]]; then
1075
1081
  # No explicit EXIT_SIGNAL but STATUS is COMPLETE
@@ -1077,68 +1083,94 @@ analyze_response() {
1077
1083
  exit_signal=true
1078
1084
  confidence_score=100
1079
1085
  fi
1086
+ # is_test_only and is_stuck stay false (defaults) — status block is authoritative
1080
1087
  fi
1081
1088
 
1082
- # 2. Detect completion keywords in natural language output
1083
- for keyword in "${COMPLETION_KEYWORDS[@]}"; do
1084
- if grep -qi "$keyword" "$output_file"; then
1085
- has_completion_signal=true
1086
- ((confidence_score+=10))
1087
- break
1088
- fi
1089
- done
1089
+ if [[ "$ralph_status_block_found" != "true" ]]; then
1090
+ # No status block found — fall back to heuristic analysis
1091
+ format_confidence=30
1090
1092
 
1091
- # 3. Detect test-only loops
1092
- local test_command_count=0
1093
- local implementation_count=0
1094
- local error_count=0
1093
+ # 2. Detect completion keywords in natural language output
1094
+ for keyword in "${COMPLETION_KEYWORDS[@]}"; do
1095
+ if grep -qiw "$keyword" "$output_file"; then
1096
+ has_completion_signal=true
1097
+ ((confidence_score+=10))
1098
+ break
1099
+ fi
1100
+ done
1095
1101
 
1096
- test_command_count=$(grep -c -i "running tests\|npm test\|bats\|pytest\|jest" "$output_file" 2>/dev/null | head -1 || echo "0")
1097
- implementation_count=$(grep -c -i "implementing\|creating\|writing\|adding\|function\|class" "$output_file" 2>/dev/null | head -1 || echo "0")
1102
+ # 3. Detect test-only loops
1103
+ local test_command_count=0
1104
+ local implementation_count=0
1105
+ local error_count=0
1098
1106
 
1099
- # Strip whitespace and ensure it's a number
1100
- test_command_count=$(echo "$test_command_count" | tr -d '[:space:]')
1101
- implementation_count=$(echo "$implementation_count" | tr -d '[:space:]')
1107
+ test_command_count=$(grep -c -i "running tests\|npm test\|bats\|pytest\|jest" "$output_file" 2>/dev/null | head -1 || echo "0")
1108
+ implementation_count=$(grep -c -i "implementing\|creating\|writing\|adding\|function\|class" "$output_file" 2>/dev/null | head -1 || echo "0")
1102
1109
 
1103
- # Convert to integers with default fallback
1104
- test_command_count=${test_command_count:-0}
1105
- implementation_count=${implementation_count:-0}
1106
- test_command_count=$((test_command_count + 0))
1107
- implementation_count=$((implementation_count + 0))
1110
+ # Strip whitespace and ensure it's a number
1111
+ test_command_count=$(echo "$test_command_count" | tr -d '[:space:]')
1112
+ implementation_count=$(echo "$implementation_count" | tr -d '[:space:]')
1108
1113
 
1109
- if [[ $test_command_count -gt 0 ]] && [[ $implementation_count -eq 0 ]]; then
1110
- is_test_only=true
1111
- work_summary="Test execution only, no implementation"
1112
- fi
1114
+ # Convert to integers with default fallback
1115
+ test_command_count=${test_command_count:-0}
1116
+ implementation_count=${implementation_count:-0}
1117
+ test_command_count=$((test_command_count + 0))
1118
+ implementation_count=$((implementation_count + 0))
1113
1119
 
1114
- # 4. Detect stuck/error loops
1115
- # Use two-stage filtering to avoid counting JSON field names as errors
1116
- # Stage 1: Filter out JSON field patterns like "is_error": false
1117
- # Stage 2: Count actual error messages in specific contexts
1118
- # Pattern aligned with ralph_loop.sh to ensure consistent behavior
1119
- error_count=$(grep -v '"[^"]*error[^"]*":' "$output_file" 2>/dev/null | \
1120
- grep -cE '(^Error:|^ERROR:|^error:|\]: error|Link: error|Error occurred|failed with error|[Ee]xception|Fatal|FATAL)' \
1121
- 2>/dev/null || echo "0")
1122
- error_count=$(echo "$error_count" | tr -d '[:space:]')
1123
- error_count=${error_count:-0}
1124
- error_count=$((error_count + 0))
1120
+ if [[ $test_command_count -gt 0 ]] && [[ $implementation_count -eq 0 ]]; then
1121
+ is_test_only=true
1122
+ work_summary="Test execution only, no implementation"
1123
+ fi
1125
1124
 
1126
- if [[ $error_count -gt 5 ]]; then
1127
- is_stuck=true
1128
- fi
1125
+ # 4. Detect stuck/error loops
1126
+ # Use two-stage filtering to avoid counting JSON field names as errors
1127
+ # Stage 1: Filter out JSON field patterns like "is_error": false
1128
+ # Stage 2: Count actual error messages in specific contexts
1129
+ # Pattern aligned with ralph_loop.sh to ensure consistent behavior
1130
+ error_count=$(grep -v '"[^"]*error[^"]*":' "$output_file" 2>/dev/null | \
1131
+ grep -cE '(^Error:|^ERROR:|^error:|\]: error|Link: error|Error occurred|failed with error|[Ee]xception|Fatal|FATAL)' \
1132
+ 2>/dev/null || echo "0")
1133
+ error_count=$(echo "$error_count" | tr -d '[:space:]')
1134
+ error_count=${error_count:-0}
1135
+ error_count=$((error_count + 0))
1136
+
1137
+ if [[ $error_count -gt 5 ]]; then
1138
+ is_stuck=true
1139
+ fi
1129
1140
 
1130
- # 5. Detect "nothing to do" patterns
1131
- for pattern in "${NO_WORK_PATTERNS[@]}"; do
1132
- if grep -qi "$pattern" "$output_file"; then
1133
- has_completion_signal=true
1134
- ((confidence_score+=15))
1135
- work_summary="No work remaining"
1136
- break
1141
+ # 5. Detect "nothing to do" patterns
1142
+ for pattern in "${NO_WORK_PATTERNS[@]}"; do
1143
+ if grep -qiw "$pattern" "$output_file"; then
1144
+ has_completion_signal=true
1145
+ ((confidence_score+=15))
1146
+ work_summary="No work remaining"
1147
+ break
1148
+ fi
1149
+ done
1150
+
1151
+ # 7. Analyze output length trends (detect declining engagement)
1152
+ if [[ -f "$RALPH_DIR/.last_output_length" ]]; then
1153
+ local last_length
1154
+ last_length=$(cat "$RALPH_DIR/.last_output_length")
1155
+ if [[ "$last_length" -gt 0 ]]; then
1156
+ local length_ratio=$((output_length * 100 / last_length))
1157
+ if [[ $length_ratio -lt 50 ]]; then
1158
+ # Output is less than 50% of previous - possible completion
1159
+ ((confidence_score+=10))
1160
+ fi
1161
+ fi
1137
1162
  fi
1138
- done
1139
1163
 
1140
- # 6. Check for file changes (git integration)
1141
- # Fix #141: Detect both uncommitted changes AND committed changes
1164
+ # 9. Determine exit signal based on confidence (heuristic)
1165
+ if [[ $confidence_score -ge 40 || "$has_completion_signal" == "true" ]]; then
1166
+ exit_signal=true
1167
+ fi
1168
+ fi
1169
+
1170
+ # Always persist output length for next iteration (both paths)
1171
+ echo "$output_length" > "$RALPH_DIR/.last_output_length"
1172
+
1173
+ # 6. Check for file changes (git integration) — always runs
1142
1174
  if command -v git &>/dev/null && git rev-parse --git-dir >/dev/null 2>&1; then
1143
1175
  local loop_start_sha=""
1144
1176
  local current_sha=""
@@ -1170,23 +1202,15 @@ analyze_response() {
1170
1202
 
1171
1203
  if [[ $files_modified -gt 0 ]]; then
1172
1204
  has_progress=true
1173
- ((confidence_score+=20))
1174
- fi
1175
- fi
1176
-
1177
- # 7. Analyze output length trends (detect declining engagement)
1178
- if [[ -f "$RALPH_DIR/.last_output_length" ]]; then
1179
- local last_length=$(cat "$RALPH_DIR/.last_output_length")
1180
- local length_ratio=$((output_length * 100 / last_length))
1181
-
1182
- if [[ $length_ratio -lt 50 ]]; then
1183
- # Output is less than 50% of previous - possible completion
1184
- ((confidence_score+=10))
1205
+ # Only boost completion confidence in heuristic path (Issue #124)
1206
+ # RALPH_STATUS block is authoritative — git changes shouldn't inflate it
1207
+ if [[ "$ralph_status_block_found" != "true" ]]; then
1208
+ ((confidence_score+=20))
1209
+ fi
1185
1210
  fi
1186
1211
  fi
1187
- echo "$output_length" > "$RALPH_DIR/.last_output_length"
1188
1212
 
1189
- # 8. Extract work summary from output
1213
+ # 8. Extract work summary from output — always runs
1190
1214
  if [[ -z "$work_summary" ]]; then
1191
1215
  # Try to find summary in output
1192
1216
  work_summary=$(grep -i "summary\|completed\|implemented" "$output_file" | head -1 | cut -c 1-100)
@@ -1195,21 +1219,6 @@ analyze_response() {
1195
1219
  fi
1196
1220
  fi
1197
1221
 
1198
- # Explicit EXIT_SIGNAL=false means "continue working", so completion
1199
- # heuristics must not register a done signal.
1200
- if [[ "$explicit_exit_signal_found" == "true" && "$exit_signal" == "false" ]]; then
1201
- has_completion_signal=false
1202
- fi
1203
-
1204
- # 9. Determine exit signal based on confidence (heuristic)
1205
- # IMPORTANT: Only apply heuristics if no explicit EXIT_SIGNAL was found in RALPH_STATUS
1206
- # Claude's explicit intent takes precedence over natural language pattern matching
1207
- if [[ "$explicit_exit_signal_found" != "true" ]]; then
1208
- if [[ $confidence_score -ge 40 || "$has_completion_signal" == "true" ]]; then
1209
- exit_signal=true
1210
- fi
1211
- fi
1212
-
1213
1222
  local has_permission_denials=false
1214
1223
  local permission_denial_count=0
1215
1224
  local denied_commands_json='[]'
@@ -1232,6 +1241,7 @@ analyze_response() {
1232
1241
  --argjson is_stuck "$is_stuck" \
1233
1242
  --argjson has_progress "$has_progress" \
1234
1243
  --argjson files_modified "$files_modified" \
1244
+ --argjson format_confidence "$format_confidence" \
1235
1245
  --argjson confidence_score "$confidence_score" \
1236
1246
  --argjson exit_signal "$exit_signal" \
1237
1247
  --argjson tasks_completed_this_loop "$tasks_completed_this_loop" \
@@ -1252,6 +1262,7 @@ analyze_response() {
1252
1262
  is_stuck: $is_stuck,
1253
1263
  has_progress: $has_progress,
1254
1264
  files_modified: $files_modified,
1265
+ format_confidence: $format_confidence,
1255
1266
  confidence_score: $confidence_score,
1256
1267
  exit_signal: $exit_signal,
1257
1268
  tasks_completed_this_loop: $tasks_completed_this_loop,
@@ -1309,9 +1320,8 @@ update_exit_signals() {
1309
1320
  fi
1310
1321
 
1311
1322
  # Update completion_indicators array (only when Claude explicitly signals exit)
1312
- # Note: Previously used confidence >= 60, but JSON mode always has confidence >= 70
1313
- # due to deterministic scoring (+50 for JSON format, +20 for result field).
1314
- # This caused premature exits after 5 loops. Now we respect Claude's explicit intent.
1323
+ # Note: Format confidence (parse quality) is separated from completion confidence
1324
+ # since Issue #124. Only exit_signal drives completion indicators, not confidence score.
1315
1325
  local exit_signal=$(jq -r -j '.analysis.exit_signal // false' "$analysis_file")
1316
1326
  if [[ "$has_permission_denials" != "true" && "$has_progress_tracking_mismatch" != "true" && "$exit_signal" == "true" ]]; then
1317
1327
  signals=$(echo "$signals" | jq ".completion_indicators += [$loop_number]")
@@ -1338,6 +1348,7 @@ log_analysis_summary() {
1338
1348
 
1339
1349
  local loop=$(jq -r -j '.loop_number' "$analysis_file")
1340
1350
  local exit_sig=$(jq -r -j '.analysis.exit_signal' "$analysis_file")
1351
+ local format_conf=$(jq -r -j '.analysis.format_confidence // 0' "$analysis_file")
1341
1352
  local confidence=$(jq -r -j '.analysis.confidence_score' "$analysis_file")
1342
1353
  local test_only=$(jq -r -j '.analysis.is_test_only' "$analysis_file")
1343
1354
  local files_changed=$(jq -r -j '.analysis.files_modified' "$analysis_file")
@@ -1347,7 +1358,8 @@ log_analysis_summary() {
1347
1358
  echo -e "${BLUE}║ Response Analysis - Loop #$loop ║${NC}"
1348
1359
  echo -e "${BLUE}╚════════════════════════════════════════════════════════════╝${NC}"
1349
1360
  echo -e "${YELLOW}Exit Signal:${NC} $exit_sig"
1350
- echo -e "${YELLOW}Confidence:${NC} $confidence%"
1361
+ echo -e "${YELLOW}Parse quality:${NC} $format_conf%"
1362
+ echo -e "${YELLOW}Completion:${NC} $confidence%"
1351
1363
  echo -e "${YELLOW}Test Only:${NC} $test_only"
1352
1364
  echo -e "${YELLOW}Files Changed:${NC} $files_changed"
1353
1365
  echo -e "${YELLOW}Summary:${NC} $summary"