@ai-dev-methodologies/rlp-desk 0.15.0 → 0.15.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3045,24 +3045,50 @@ main() {
3045
3045
  return 1
3046
3046
  fi
3047
3047
 
3048
- # --- governance.md s7 step 8 (cleanup): Clean previous iteration signals ---
3049
- # Bug #7 Fix-R cleanup: unlock 0o444 sentinels written by the previous
3050
- # iteration's reaper before rm so cleanup does not log permission noise.
3051
- _unlock_sentinel "$SIGNAL_FILE"
3052
- _unlock_sentinel "$VERDICT_FILE"
3053
- rm -f "$SIGNAL_FILE" "$DONE_CLAIM_FILE" "$VERDICT_FILE" 2>/dev/null
3054
- rm -f "$WORKER_HEARTBEAT" "$VERIFIER_HEARTBEAT" 2>/dev/null
3055
-
3056
- # --- Clean previous claude session in panes (one-shot lifecycle) ---
3057
- # Only needed from iteration 2 onwards (iteration 1 has fresh panes)
3058
- if (( ITERATION > 1 )); then
3059
- # Send C-c first (in case claude is mid-task), then /exit
3060
- tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
3061
- sleep 1
3062
- tmux send-keys -t "$WORKER_PANE" "/exit" C-m 2>/dev/null
3063
- sleep 2
3064
- # Wait for shell prompt before proceeding
3065
- wait_for_pane_ready "$WORKER_PANE" 10 2>/dev/null || true
3048
+ # PR-A (Bug #10): operator-recovery hygiene check.
3049
+ # When the operator hand-rolls a `phase=verify` recovery (jq-patches
3050
+ # status.json, writes manual iter-signal.json + done-claim.json, deletes
3051
+ # the blocked sentinel), the leader MUST honor that work instead of
3052
+ # deleting the artifacts and resetting to phase=worker. Mirrors the
3053
+ # Node-side guard in src/node/runner/campaign-main-loop.mjs.
3054
+ local SKIP_NEXT_WORKER=0
3055
+ local LAST_PHASE=""
3056
+ if [[ -f "$STATUS_FILE" ]] && command -v jq >/dev/null 2>&1; then
3057
+ LAST_PHASE=$(jq -r '.phase // ""' "$STATUS_FILE" 2>/dev/null)
3058
+ fi
3059
+ if [[ "$LAST_PHASE" == "verify" ]]; then
3060
+ local _iter_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).worker-prompt.md"
3061
+ if _validate_operator_recovery_artifacts \
3062
+ "$SIGNAL_FILE" "$DONE_CLAIM_FILE" "$STATUS_FILE" "$_iter_prompt"; then
3063
+ log "[recovery] Resuming verify phase — operator manual recovery detected (iter=$ITERATION)"
3064
+ log_debug "[recovery] iter=$ITERATION skip_worker=true reason=manual_recovery_validated"
3065
+ SKIP_NEXT_WORKER=1
3066
+ else
3067
+ log "[recovery] phase=verify ignored: ${RECOVERY_FAIL_REASON}"
3068
+ log_debug "[recovery] iter=$ITERATION skip_worker=false reason=\"${RECOVERY_FAIL_REASON}\""
3069
+ fi
3070
+ fi
3071
+
3072
+ if (( ! SKIP_NEXT_WORKER )); then
3073
+ # --- governance.md s7 step 8 (cleanup): Clean previous iteration signals ---
3074
+ # Bug #7 Fix-R cleanup: unlock 0o444 sentinels written by the previous
3075
+ # iteration's reaper before rm so cleanup does not log permission noise.
3076
+ _unlock_sentinel "$SIGNAL_FILE"
3077
+ _unlock_sentinel "$VERDICT_FILE"
3078
+ rm -f "$SIGNAL_FILE" "$DONE_CLAIM_FILE" "$VERDICT_FILE" 2>/dev/null
3079
+ rm -f "$WORKER_HEARTBEAT" "$VERIFIER_HEARTBEAT" 2>/dev/null
3080
+
3081
+ # --- Clean previous claude session in panes (one-shot lifecycle) ---
3082
+ # Only needed from iteration 2 onwards (iteration 1 has fresh panes)
3083
+ if (( ITERATION > 1 )); then
3084
+ # Send C-c first (in case claude is mid-task), then /exit
3085
+ tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
3086
+ sleep 1
3087
+ tmux send-keys -t "$WORKER_PANE" "/exit" C-m 2>/dev/null
3088
+ sleep 2
3089
+ # Wait for shell prompt before proceeding
3090
+ wait_for_pane_ready "$WORKER_PANE" 10 2>/dev/null || true
3091
+ fi
3066
3092
  fi
3067
3093
 
3068
3094
  # Reset per-iteration state
@@ -3074,33 +3100,44 @@ main() {
3074
3100
  # --- US-004: detect PRD changes for live update + re-split ---
3075
3101
  check_prd_update
3076
3102
 
3077
- # --- governance.md s7 step 4: Build worker prompt + trigger ---
3078
- write_worker_trigger "$ITERATION"
3079
- local worker_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).worker-prompt.md"
3080
-
3081
- # AC1: capture worker start timestamp
3103
+ # AC1: capture worker start timestamp (still set for downstream telemetry
3104
+ # even when the worker dispatch is skipped — recovery still consumes time).
3082
3105
  ITER_WORKER_START=$(date +%s)
3083
3106
 
3084
- update_status "worker" "running"
3107
+ local worker_launch=""
3108
+ if (( ! SKIP_NEXT_WORKER )); then
3109
+ # --- governance.md s7 step 4: Build worker prompt + trigger ---
3110
+ write_worker_trigger "$ITERATION"
3111
+ local worker_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).worker-prompt.md"
3085
3112
 
3086
- # --- governance.md s7 step 5: Execute Worker (dispatched to engine-specific function) ---
3087
- log_debug "[FLOW] iter=$ITERATION phase=worker engine=$WORKER_ENGINE model=$WORKER_MODEL dispatched=true"
3113
+ update_status "worker" "running"
3088
3114
 
3089
- local worker_launch
3090
- if [[ "$WORKER_ENGINE" = "codex" ]]; then
3091
- worker_launch="${CODEX_BIN:-codex} -m $WORKER_CODEX_MODEL -c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" --disable plugins --dangerously-bypass-approvals-and-sandbox"
3092
- if ! launch_worker_codex "$WORKER_PANE" "$worker_prompt" "$ITERATION" "$worker_launch"; then
3093
- write_blocked_sentinel "Worker codex failed to start in pane" "" "infra_failure"
3094
- update_status "blocked" "worker_start_failed"
3095
- return 1
3115
+ # --- governance.md s7 step 5: Execute Worker (dispatched to engine-specific function) ---
3116
+ log_debug "[FLOW] iter=$ITERATION phase=worker engine=$WORKER_ENGINE model=$WORKER_MODEL dispatched=true"
3117
+
3118
+ if [[ "$WORKER_ENGINE" = "codex" ]]; then
3119
+ worker_launch="${CODEX_BIN:-codex} -m $WORKER_CODEX_MODEL -c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" --disable plugins --dangerously-bypass-approvals-and-sandbox"
3120
+ if ! launch_worker_codex "$WORKER_PANE" "$worker_prompt" "$ITERATION" "$worker_launch"; then
3121
+ write_blocked_sentinel "Worker codex failed to start in pane" "" "infra_failure"
3122
+ update_status "blocked" "worker_start_failed"
3123
+ return 1
3124
+ fi
3125
+ else
3126
+ worker_launch="$(build_claude_cmd tui "$WORKER_MODEL" "" "" "$WORKER_EFFORT")"
3127
+ if ! launch_worker_claude "$WORKER_PANE" "$worker_prompt" "$ITERATION" "$worker_launch"; then
3128
+ write_blocked_sentinel "Worker claude failed to start in pane" "" "infra_failure"
3129
+ update_status "blocked" "worker_start_failed"
3130
+ return 1
3131
+ fi
3096
3132
  fi
3097
3133
  else
3098
- worker_launch="$(build_claude_cmd tui "$WORKER_MODEL" "" "" "$WORKER_EFFORT")"
3099
- if ! launch_worker_claude "$WORKER_PANE" "$worker_prompt" "$ITERATION" "$worker_launch"; then
3100
- write_blocked_sentinel "Worker claude failed to start in pane" "" "infra_failure"
3101
- update_status "blocked" "worker_start_failed"
3102
- return 1
3103
- fi
3134
+ # PR-A (Bug #10): one-shot recovery path. The operator's iter-signal.json
3135
+ # is already on disk; polling below picks it up immediately and the loop
3136
+ # transitions cleanly into the verifier phase. Persist phase=verify so a
3137
+ # subsequent crash-and-relaunch sees the same contract. SKIP_NEXT_WORKER
3138
+ # is local to this iteration so iter-N+1 dispatches the worker normally.
3139
+ update_status "verify" "running"
3140
+ log "[recovery] Skipping worker dispatch for iter=$ITERATION (one-shot, honoring operator manual recovery)"
3104
3141
  fi
3105
3142
 
3106
3143
  # --- governance.md s7 step 5+6: Poll for Worker completion ---