@ai-dev-methodologies/rlp-desk 0.15.0 → 0.15.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/plans/bug-report-overhaul-backlog.md +49 -0
- package/docs/plans/bug-report-overhaul-v0.md +238 -0
- package/docs/plans/bug-report-overhaul-v1.md +319 -0
- package/docs/plans/native-agent-revert.md +184 -0
- package/docs/plans/strategic-review/rlp-desk-strategic-review.md +125 -0
- package/docs/plans/v0.15-stabilization-plan.md +178 -0
- package/package.json +1 -1
- package/src/commands/rlp-desk.md +56 -46
- package/src/node/run.mjs +54 -7
- package/src/node/runner/campaign-main-loop.mjs +156 -12
- package/src/scripts/lib_ralph_desk.zsh +84 -0
- package/src/scripts/run_ralph_desk.zsh +76 -39
|
@@ -3045,24 +3045,50 @@ main() {
|
|
|
3045
3045
|
return 1
|
|
3046
3046
|
fi
|
|
3047
3047
|
|
|
3048
|
-
#
|
|
3049
|
-
#
|
|
3050
|
-
#
|
|
3051
|
-
|
|
3052
|
-
|
|
3053
|
-
|
|
3054
|
-
|
|
3055
|
-
|
|
3056
|
-
|
|
3057
|
-
|
|
3058
|
-
|
|
3059
|
-
|
|
3060
|
-
|
|
3061
|
-
|
|
3062
|
-
|
|
3063
|
-
|
|
3064
|
-
|
|
3065
|
-
|
|
3048
|
+
# PR-A (Bug #10): operator-recovery hygiene check.
|
|
3049
|
+
# When the operator hand-rolls a `phase=verify` recovery (jq-patches
|
|
3050
|
+
# status.json, writes manual iter-signal.json + done-claim.json, deletes
|
|
3051
|
+
# the blocked sentinel), the leader MUST honor that work instead of
|
|
3052
|
+
# deleting the artifacts and resetting to phase=worker. Mirrors the
|
|
3053
|
+
# Node-side guard in src/node/runner/campaign-main-loop.mjs.
|
|
3054
|
+
local SKIP_NEXT_WORKER=0
|
|
3055
|
+
local LAST_PHASE=""
|
|
3056
|
+
if [[ -f "$STATUS_FILE" ]] && command -v jq >/dev/null 2>&1; then
|
|
3057
|
+
LAST_PHASE=$(jq -r '.phase // ""' "$STATUS_FILE" 2>/dev/null)
|
|
3058
|
+
fi
|
|
3059
|
+
if [[ "$LAST_PHASE" == "verify" ]]; then
|
|
3060
|
+
local _iter_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).worker-prompt.md"
|
|
3061
|
+
if _validate_operator_recovery_artifacts \
|
|
3062
|
+
"$SIGNAL_FILE" "$DONE_CLAIM_FILE" "$STATUS_FILE" "$_iter_prompt"; then
|
|
3063
|
+
log "[recovery] Resuming verify phase — operator manual recovery detected (iter=$ITERATION)"
|
|
3064
|
+
log_debug "[recovery] iter=$ITERATION skip_worker=true reason=manual_recovery_validated"
|
|
3065
|
+
SKIP_NEXT_WORKER=1
|
|
3066
|
+
else
|
|
3067
|
+
log "[recovery] phase=verify ignored: ${RECOVERY_FAIL_REASON}"
|
|
3068
|
+
log_debug "[recovery] iter=$ITERATION skip_worker=false reason=\"${RECOVERY_FAIL_REASON}\""
|
|
3069
|
+
fi
|
|
3070
|
+
fi
|
|
3071
|
+
|
|
3072
|
+
if (( ! SKIP_NEXT_WORKER )); then
|
|
3073
|
+
# --- governance.md s7 step 8 (cleanup): Clean previous iteration signals ---
|
|
3074
|
+
# Bug #7 Fix-R cleanup: unlock 0o444 sentinels written by the previous
|
|
3075
|
+
# iteration's reaper before rm so cleanup does not log permission noise.
|
|
3076
|
+
_unlock_sentinel "$SIGNAL_FILE"
|
|
3077
|
+
_unlock_sentinel "$VERDICT_FILE"
|
|
3078
|
+
rm -f "$SIGNAL_FILE" "$DONE_CLAIM_FILE" "$VERDICT_FILE" 2>/dev/null
|
|
3079
|
+
rm -f "$WORKER_HEARTBEAT" "$VERIFIER_HEARTBEAT" 2>/dev/null
|
|
3080
|
+
|
|
3081
|
+
# --- Clean previous claude session in panes (one-shot lifecycle) ---
|
|
3082
|
+
# Only needed from iteration 2 onwards (iteration 1 has fresh panes)
|
|
3083
|
+
if (( ITERATION > 1 )); then
|
|
3084
|
+
# Send C-c first (in case claude is mid-task), then /exit
|
|
3085
|
+
tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
|
|
3086
|
+
sleep 1
|
|
3087
|
+
tmux send-keys -t "$WORKER_PANE" "/exit" C-m 2>/dev/null
|
|
3088
|
+
sleep 2
|
|
3089
|
+
# Wait for shell prompt before proceeding
|
|
3090
|
+
wait_for_pane_ready "$WORKER_PANE" 10 2>/dev/null || true
|
|
3091
|
+
fi
|
|
3066
3092
|
fi
|
|
3067
3093
|
|
|
3068
3094
|
# Reset per-iteration state
|
|
@@ -3074,33 +3100,44 @@ main() {
|
|
|
3074
3100
|
# --- US-004: detect PRD changes for live update + re-split ---
|
|
3075
3101
|
check_prd_update
|
|
3076
3102
|
|
|
3077
|
-
#
|
|
3078
|
-
|
|
3079
|
-
local worker_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).worker-prompt.md"
|
|
3080
|
-
|
|
3081
|
-
# AC1: capture worker start timestamp
|
|
3103
|
+
# AC1: capture worker start timestamp (still set for downstream telemetry
|
|
3104
|
+
# even when the worker dispatch is skipped — recovery still consumes time).
|
|
3082
3105
|
ITER_WORKER_START=$(date +%s)
|
|
3083
3106
|
|
|
3084
|
-
|
|
3107
|
+
local worker_launch=""
|
|
3108
|
+
if (( ! SKIP_NEXT_WORKER )); then
|
|
3109
|
+
# --- governance.md s7 step 4: Build worker prompt + trigger ---
|
|
3110
|
+
write_worker_trigger "$ITERATION"
|
|
3111
|
+
local worker_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).worker-prompt.md"
|
|
3085
3112
|
|
|
3086
|
-
|
|
3087
|
-
log_debug "[FLOW] iter=$ITERATION phase=worker engine=$WORKER_ENGINE model=$WORKER_MODEL dispatched=true"
|
|
3113
|
+
update_status "worker" "running"
|
|
3088
3114
|
|
|
3089
|
-
|
|
3090
|
-
|
|
3091
|
-
|
|
3092
|
-
if
|
|
3093
|
-
|
|
3094
|
-
|
|
3095
|
-
|
|
3115
|
+
# --- governance.md s7 step 5: Execute Worker (dispatched to engine-specific function) ---
|
|
3116
|
+
log_debug "[FLOW] iter=$ITERATION phase=worker engine=$WORKER_ENGINE model=$WORKER_MODEL dispatched=true"
|
|
3117
|
+
|
|
3118
|
+
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
3119
|
+
worker_launch="${CODEX_BIN:-codex} -m $WORKER_CODEX_MODEL -c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" --disable plugins --dangerously-bypass-approvals-and-sandbox"
|
|
3120
|
+
if ! launch_worker_codex "$WORKER_PANE" "$worker_prompt" "$ITERATION" "$worker_launch"; then
|
|
3121
|
+
write_blocked_sentinel "Worker codex failed to start in pane" "" "infra_failure"
|
|
3122
|
+
update_status "blocked" "worker_start_failed"
|
|
3123
|
+
return 1
|
|
3124
|
+
fi
|
|
3125
|
+
else
|
|
3126
|
+
worker_launch="$(build_claude_cmd tui "$WORKER_MODEL" "" "" "$WORKER_EFFORT")"
|
|
3127
|
+
if ! launch_worker_claude "$WORKER_PANE" "$worker_prompt" "$ITERATION" "$worker_launch"; then
|
|
3128
|
+
write_blocked_sentinel "Worker claude failed to start in pane" "" "infra_failure"
|
|
3129
|
+
update_status "blocked" "worker_start_failed"
|
|
3130
|
+
return 1
|
|
3131
|
+
fi
|
|
3096
3132
|
fi
|
|
3097
3133
|
else
|
|
3098
|
-
|
|
3099
|
-
|
|
3100
|
-
|
|
3101
|
-
|
|
3102
|
-
|
|
3103
|
-
|
|
3134
|
+
# PR-A (Bug #10): one-shot recovery path. The operator's iter-signal.json
|
|
3135
|
+
# is already on disk; polling below picks it up immediately and the loop
|
|
3136
|
+
# transitions cleanly into the verifier phase. Persist phase=verify so a
|
|
3137
|
+
# subsequent crash-and-relaunch sees the same contract. SKIP_NEXT_WORKER
|
|
3138
|
+
# is local to this iteration so iter-N+1 dispatches the worker normally.
|
|
3139
|
+
update_status "verify" "running"
|
|
3140
|
+
log "[recovery] Skipping worker dispatch for iter=$ITERATION (one-shot, honoring operator manual recovery)"
|
|
3104
3141
|
fi
|
|
3105
3142
|
|
|
3106
3143
|
# --- governance.md s7 step 5+6: Poll for Worker completion ---
|