@ai-dev-methodologies/rlp-desk 0.14.2 → 0.14.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -1
- package/src/scripts/init_ralph_desk.zsh +1309 -0
- package/src/scripts/lib_ralph_desk.zsh +1218 -0
- package/src/scripts/run_ralph_desk.zsh +3490 -0
|
@@ -0,0 +1,1218 @@
|
|
|
1
|
+
# lib_ralph_desk.zsh — Shared business logic for RLP Desk runner
|
|
2
|
+
# SOURCED by run_ralph_desk.zsh. Do NOT execute directly.
|
|
3
|
+
#
|
|
4
|
+
# IMPORTANT: Must be sourced at file scope, not inside a function.
|
|
5
|
+
# typeset -A creates local arrays inside functions, breaking global state.
|
|
6
|
+
# Functions in this file read/write globals defined by the sourcing script.
|
|
7
|
+
|
|
8
|
+
if [[ -n "${funcstack[2]:-}" ]]; then
|
|
9
|
+
echo "FATAL: lib_ralph_desk.zsh must be sourced at file scope" >&2
|
|
10
|
+
exit 1
|
|
11
|
+
fi
|
|
12
|
+
|
|
13
|
+
# =============================================================================
|
|
14
|
+
# Utility Functions
|
|
15
|
+
# =============================================================================
|
|
16
|
+
|
|
17
|
+
log() {
|
|
18
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
log_debug() {
|
|
22
|
+
if (( DEBUG )); then
|
|
23
|
+
mkdir -p "$(dirname "$DEBUG_LOG")" 2>/dev/null
|
|
24
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] DEBUG: $*" >> "$DEBUG_LOG"
|
|
25
|
+
fi
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
log_error() {
|
|
29
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: $*" >&2
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
# build_claude_cmd() — centralized claude CLI command builder
|
|
33
|
+
# Single source of truth for all claude invocation flags (--mcp-config, DISABLE_OMC, --effort, etc.)
|
|
34
|
+
# Inspired by codex-plugin-cc companion pattern: CLI abstraction in one place.
|
|
35
|
+
# Args: $1=mode (tui|print) $2=model $3=prompt_file (print mode only) $4=output_log (print mode only) $5=effort (optional: low|medium|high|max)
|
|
36
|
+
# Output: complete command string on stdout
|
|
37
|
+
# Globals read: CLAUDE_BIN
|
|
38
|
+
build_claude_cmd() {
|
|
39
|
+
local mode="$1"
|
|
40
|
+
local model="$2"
|
|
41
|
+
local prompt_file="${3:-}"
|
|
42
|
+
local output_log="${4:-}"
|
|
43
|
+
local effort="${5:-}"
|
|
44
|
+
|
|
45
|
+
# Bug 1 (v5.7 §4.12): zsh ${(qq)var} wraps in single quotes with proper internal escape.
|
|
46
|
+
# Defends against bracketed model ids like 'claude-opus-4-7[1m]' (zsh char-class glob),
|
|
47
|
+
# spaces, embedded quotes, etc. Plain "$model" would let zsh expand brackets as glob.
|
|
48
|
+
#
|
|
49
|
+
# v5.7 §4.9: auto-enable Opus 1M context window via ANTHROPIC_BETA env. Mirror
|
|
50
|
+
# of src/node/constants.mjs OPUS_1M_BETA. Update both on header rotation.
|
|
51
|
+
local _opus_beta=""
|
|
52
|
+
case "$model" in
|
|
53
|
+
opus|claude-opus-*) _opus_beta="ANTHROPIC_BETA='context-1m-2025-08-07' " ;;
|
|
54
|
+
esac
|
|
55
|
+
# v5.7 §4.11.a: --add-dir whitelist for autonomous mode. ROOT (campaign cwd)
|
|
56
|
+
# plus home rlp-desk tree authorized for read/write without TUI prompts.
|
|
57
|
+
local _home_desk="$HOME/.claude/ralph-desk"
|
|
58
|
+
local _add_dirs="--add-dir ${(qq)_home_desk} --add-dir ${(qq)ROOT}"
|
|
59
|
+
local base="DISABLE_OMC=1 ${_opus_beta}$CLAUDE_BIN --model ${(qq)model} --mcp-config '{\"mcpServers\":{}}' --strict-mcp-config --dangerously-skip-permissions ${_add_dirs}"
|
|
60
|
+
if [[ -n "$effort" ]]; then
|
|
61
|
+
base="$base --effort $effort"
|
|
62
|
+
fi
|
|
63
|
+
case "$mode" in
|
|
64
|
+
tui)
|
|
65
|
+
echo "$base"
|
|
66
|
+
;;
|
|
67
|
+
print)
|
|
68
|
+
echo "$base -p \"\$(cat $prompt_file)\" --output-format text 2>&1 | tee $output_log"
|
|
69
|
+
;;
|
|
70
|
+
*)
|
|
71
|
+
echo "ERROR: build_claude_cmd unknown mode '$mode'" >&2
|
|
72
|
+
return 1
|
|
73
|
+
;;
|
|
74
|
+
esac
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
# parse_model_flag() — parse unified --worker-model / --verifier-model value
|
|
78
|
+
# Colon format: claude models (haiku/sonnet/opus) with effort → claude engine + effort
|
|
79
|
+
# codex models (gpt-*/spark) with reasoning → codex engine + reasoning
|
|
80
|
+
# plain name → claude engine (no effort override)
|
|
81
|
+
# Usage: parse_model_flag <value> <role>
|
|
82
|
+
# Output (stdout): "engine model [reasoning_or_effort]"
|
|
83
|
+
# e.g. "codex gpt-5.5 medium" | "claude opus max" | "claude sonnet"
|
|
84
|
+
# Returns: 0 on success, 1 on invalid format (error written to stderr)
|
|
85
|
+
parse_model_flag() {
|
|
86
|
+
local value="$1"
|
|
87
|
+
local role="${2:-worker}"
|
|
88
|
+
local colon_count
|
|
89
|
+
colon_count=$(printf '%s' "$value" | tr -cd ':' | wc -c | tr -d ' ')
|
|
90
|
+
if (( colon_count > 1 )); then
|
|
91
|
+
echo "ERROR: Invalid --${role}-model format '${value}'. Use 'model:effort' (claude) or 'model:reasoning' (codex)." >&2
|
|
92
|
+
return 1
|
|
93
|
+
fi
|
|
94
|
+
if (( colon_count == 1 )); then
|
|
95
|
+
local model="${value%%:*}"
|
|
96
|
+
local level="${value##*:}"
|
|
97
|
+
# Detect engine by model name
|
|
98
|
+
case "$model" in
|
|
99
|
+
haiku|sonnet|opus)
|
|
100
|
+
echo "claude $model $level"
|
|
101
|
+
;;
|
|
102
|
+
spark)
|
|
103
|
+
echo "codex gpt-5.3-codex-spark $level"
|
|
104
|
+
;;
|
|
105
|
+
*)
|
|
106
|
+
echo "codex $model $level"
|
|
107
|
+
;;
|
|
108
|
+
esac
|
|
109
|
+
else
|
|
110
|
+
echo "claude $value"
|
|
111
|
+
fi
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
# get_model_string() — return engine-appropriate model identifier string
|
|
115
|
+
# Claude: returns model name (e.g., "sonnet")
|
|
116
|
+
# Codex: returns model:reasoning (e.g., "gpt-5.5:high")
|
|
117
|
+
# Args: $1=engine (claude|codex) $2=model $3=codex_reasoning (optional)
|
|
118
|
+
# Output: model string on stdout
|
|
119
|
+
get_model_string() {
|
|
120
|
+
local engine="$1"
|
|
121
|
+
local model="$2"
|
|
122
|
+
local reasoning="${3:-}"
|
|
123
|
+
|
|
124
|
+
if [[ "$engine" = "codex" && -n "$reasoning" ]]; then
|
|
125
|
+
echo "${model}:${reasoning}"
|
|
126
|
+
else
|
|
127
|
+
echo "$model"
|
|
128
|
+
fi
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
# get_next_model() — return next model in Worker upgrade path, or empty at ceiling
|
|
132
|
+
# Usage: get_next_model <model_str>
|
|
133
|
+
# claude: "haiku"|"sonnet"|"opus"
|
|
134
|
+
# codex: "gpt-5.5:medium"|"gpt-5.5:high"|"gpt-5.5:xhigh"|"gpt-5.3-codex-spark:medium"|...
|
|
135
|
+
# Output: next model string, or empty string if at ceiling
|
|
136
|
+
get_next_model() {
|
|
137
|
+
local current="$1"
|
|
138
|
+
case "$current" in
|
|
139
|
+
# Claude upgrade path (Worker only — Verifier fixed)
|
|
140
|
+
haiku) echo "sonnet" ;;
|
|
141
|
+
sonnet) echo "opus" ;;
|
|
142
|
+
opus) echo "" ;;
|
|
143
|
+
# Codex GPT Pro (spark) upgrade path
|
|
144
|
+
gpt-5.3-codex-spark:low) echo "gpt-5.3-codex-spark:medium" ;;
|
|
145
|
+
gpt-5.3-codex-spark:medium) echo "gpt-5.3-codex-spark:high" ;;
|
|
146
|
+
gpt-5.3-codex-spark:high) echo "gpt-5.3-codex-spark:xhigh" ;;
|
|
147
|
+
gpt-5.3-codex-spark:xhigh) echo "" ;; # spark ceiling
|
|
148
|
+
# Codex Non-Pro upgrade path
|
|
149
|
+
gpt-5.5:low) echo "gpt-5.5:medium" ;;
|
|
150
|
+
gpt-5.5:medium) echo "gpt-5.5:high" ;;
|
|
151
|
+
gpt-5.5:high) echo "gpt-5.5:xhigh" ;;
|
|
152
|
+
gpt-5.5:xhigh) echo "" ;;
|
|
153
|
+
*) echo "" ;; # unknown → treat as ceiling
|
|
154
|
+
esac
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
# check_model_upgrade() — evaluate and apply Worker model upgrade on repeated same-US failure
|
|
158
|
+
# Called in the fail verdict path. Upgrades Worker model when same US fails >= 2 consecutive times.
|
|
159
|
+
# Respects LOCK_WORKER_MODEL flag. Never modifies VERIFIER_MODEL.
|
|
160
|
+
# Usage: check_model_upgrade <us_id>
|
|
161
|
+
check_model_upgrade() {
|
|
162
|
+
local current_us="$1"
|
|
163
|
+
|
|
164
|
+
# Track consecutive failures on same US
|
|
165
|
+
if [[ "$current_us" = "$_LAST_FAILED_US" ]]; then
|
|
166
|
+
(( _SAME_US_FAIL_COUNT++ ))
|
|
167
|
+
else
|
|
168
|
+
_SAME_US_FAIL_COUNT=1
|
|
169
|
+
_LAST_FAILED_US="$current_us"
|
|
170
|
+
fi
|
|
171
|
+
|
|
172
|
+
# Respect --lock-worker-model: no upgrade; CB threshold handles BLOCKED
|
|
173
|
+
if (( LOCK_WORKER_MODEL )); then
|
|
174
|
+
log_debug "[DECIDE] iter=${ITERATION:-0} phase=model_select model_upgrade=false reason=locked"
|
|
175
|
+
return 0
|
|
176
|
+
fi
|
|
177
|
+
|
|
178
|
+
# Upgrade when same US fails >= 2 consecutive times
|
|
179
|
+
if (( _SAME_US_FAIL_COUNT >= 2 )); then
|
|
180
|
+
local current_model_str
|
|
181
|
+
current_model_str=$(get_model_string "$WORKER_ENGINE" "${WORKER_CODEX_MODEL:-$WORKER_MODEL}" "${WORKER_CODEX_REASONING:-}")
|
|
182
|
+
|
|
183
|
+
local next_model
|
|
184
|
+
next_model=$(get_next_model "$current_model_str")
|
|
185
|
+
|
|
186
|
+
if [[ -z "$next_model" ]]; then
|
|
187
|
+
# Already at ceiling — CB threshold will trigger BLOCKED with escalation message
|
|
188
|
+
log_debug "[DECIDE] iter=${ITERATION:-0} phase=model_select model_upgrade=false reason=already_max current=$current_model_str"
|
|
189
|
+
return 0
|
|
190
|
+
fi
|
|
191
|
+
|
|
192
|
+
# Save original model on first upgrade only
|
|
193
|
+
if (( _MODEL_UPGRADED == 0 )); then
|
|
194
|
+
_ORIGINAL_WORKER_MODEL="$WORKER_MODEL"
|
|
195
|
+
_ORIGINAL_WORKER_CODEX_REASONING="$WORKER_CODEX_REASONING"
|
|
196
|
+
fi
|
|
197
|
+
_MODEL_UPGRADED=1
|
|
198
|
+
|
|
199
|
+
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
200
|
+
WORKER_CODEX_MODEL="${next_model%%:*}"
|
|
201
|
+
WORKER_CODEX_REASONING="${next_model##*:}"
|
|
202
|
+
WORKER_MODEL="$WORKER_CODEX_MODEL"
|
|
203
|
+
else
|
|
204
|
+
WORKER_MODEL="$next_model"
|
|
205
|
+
fi
|
|
206
|
+
|
|
207
|
+
log " Worker model upgraded: ${_ORIGINAL_WORKER_MODEL} → ${WORKER_MODEL} (same-US consecutive fail threshold)"
|
|
208
|
+
log " [WARN] Same AC failing repeatedly — consider IL-2 re-assessment of AC quality (spec quality check)"
|
|
209
|
+
log_debug "[DECIDE] iter=${ITERATION:-0} phase=model_select model_upgrade=true reason=consecutive_same_ac_fail from=${_ORIGINAL_WORKER_MODEL} to=${WORKER_MODEL}"
|
|
210
|
+
_SAME_US_FAIL_COUNT=0 # Reset counter after upgrade
|
|
211
|
+
fi
|
|
212
|
+
|
|
213
|
+
return 0
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
# record_us_failure() — track per-US cumulative failure count (dual counter, Option D)
|
|
217
|
+
# Unlike CONSECUTIVE_FAILURES which resets on pass, US_FAIL_HISTORY persists across phases.
|
|
218
|
+
# This enables prior-failure warnings when a US that struggled in per-US mode fails again in final verify.
|
|
219
|
+
# Usage: record_us_failure <us_id>
|
|
220
|
+
record_us_failure() {
|
|
221
|
+
local us_id="$1"
|
|
222
|
+
[[ -z "$us_id" || "$us_id" = "unknown" ]] && return 0
|
|
223
|
+
|
|
224
|
+
local prev_count="${US_FAIL_HISTORY[$us_id]:-0}"
|
|
225
|
+
US_FAIL_HISTORY[$us_id]=$(( prev_count + 1 ))
|
|
226
|
+
|
|
227
|
+
# Prior-failure warning: if this US has failed before, it's showing fragility
|
|
228
|
+
if (( prev_count > 0 )); then
|
|
229
|
+
log " [WARN] US $us_id has prior failure history (${US_FAIL_HISTORY[$us_id]} total failures) — consider IL-2 AC quality re-assessment"
|
|
230
|
+
log_debug "[GOV] iter=${ITERATION:-0} us_prior_failures=$us_id count=${US_FAIL_HISTORY[$us_id]}"
|
|
231
|
+
fi
|
|
232
|
+
|
|
233
|
+
return 0
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
# --- governance.md s7: Atomic file writes (tmux pattern) ---
|
|
237
|
+
# All file writes by the Leader use tmp+mv to prevent corruption.
|
|
238
|
+
atomic_write() {
|
|
239
|
+
local target="$1"
|
|
240
|
+
local tmp="${target}.tmp.$$"
|
|
241
|
+
cat > "$tmp"
|
|
242
|
+
mv "$tmp" "$target"
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
# =============================================================================
|
|
246
|
+
# Scaffold Validation
|
|
247
|
+
# =============================================================================
|
|
248
|
+
|
|
249
|
+
validate_scaffold() {
|
|
250
|
+
local errors=0
|
|
251
|
+
|
|
252
|
+
if [[ ! -f "$WORKER_PROMPT_BASE" ]]; then
|
|
253
|
+
log_error "Worker prompt not found: $WORKER_PROMPT_BASE"
|
|
254
|
+
errors=1
|
|
255
|
+
fi
|
|
256
|
+
|
|
257
|
+
if [[ ! -f "$VERIFIER_PROMPT_BASE" ]]; then
|
|
258
|
+
log_error "Verifier prompt not found: $VERIFIER_PROMPT_BASE"
|
|
259
|
+
errors=1
|
|
260
|
+
fi
|
|
261
|
+
|
|
262
|
+
if [[ ! -f "$CONTEXT_FILE" ]]; then
|
|
263
|
+
log_error "Context file not found: $CONTEXT_FILE"
|
|
264
|
+
errors=1
|
|
265
|
+
fi
|
|
266
|
+
|
|
267
|
+
if [[ ! -f "$MEMORY_FILE" ]]; then
|
|
268
|
+
log_error "Memory file not found: $MEMORY_FILE"
|
|
269
|
+
errors=1
|
|
270
|
+
fi
|
|
271
|
+
|
|
272
|
+
if (( errors )); then
|
|
273
|
+
log_error "Scaffold validation failed. Run init_ralph_desk.zsh first."
|
|
274
|
+
exit 1
|
|
275
|
+
fi
|
|
276
|
+
|
|
277
|
+
mkdir -p "$LOGS_DIR"
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
# =============================================================================
|
|
281
|
+
# Status Updates
|
|
282
|
+
# =============================================================================
|
|
283
|
+
|
|
284
|
+
# --- governance.md s7 step 8: Update status.json ---
|
|
285
|
+
update_status() {
|
|
286
|
+
local phase="$1"
|
|
287
|
+
local last_result="$2"
|
|
288
|
+
|
|
289
|
+
# Build verified_us as JSON array
|
|
290
|
+
local verified_us_json="[]"
|
|
291
|
+
if [[ -n "$VERIFIED_US" ]]; then
|
|
292
|
+
verified_us_json=$(echo "$VERIFIED_US" | tr ',' '\n' | jq -R . | jq -s .)
|
|
293
|
+
fi
|
|
294
|
+
|
|
295
|
+
# Build consensus fields
|
|
296
|
+
local consensus_json=""
|
|
297
|
+
if [[ "$CONSENSUS_MODE" != "off" ]]; then
|
|
298
|
+
consensus_json=',
|
|
299
|
+
"consensus_scope": "'"$CONSENSUS_SCOPE"'",
|
|
300
|
+
"consensus_round": '"$CONSENSUS_ROUND"',
|
|
301
|
+
"claude_verdict": "'"${CLAUDE_VERDICT:-}"'",
|
|
302
|
+
"codex_verdict": "'"${CODEX_VERDICT:-}"'"'
|
|
303
|
+
fi
|
|
304
|
+
|
|
305
|
+
echo '{
|
|
306
|
+
"slug": "'"$SLUG"'",
|
|
307
|
+
"baseline_commit": "'"${BASELINE_COMMIT:-none}"'",
|
|
308
|
+
"iteration": '"$ITERATION"',
|
|
309
|
+
"max_iter": '"$MAX_ITER"',
|
|
310
|
+
"phase": "'"$phase"'",
|
|
311
|
+
"worker_model": "'"$WORKER_MODEL"'",
|
|
312
|
+
"verifier_model": "'"$VERIFIER_MODEL"'",
|
|
313
|
+
"worker_engine": "'"$WORKER_ENGINE"'",
|
|
314
|
+
"verifier_engine": "'"$VERIFIER_ENGINE"'",
|
|
315
|
+
"worker_codex_model": "'"$WORKER_CODEX_MODEL"'",
|
|
316
|
+
"worker_codex_reasoning": "'"$WORKER_CODEX_REASONING"'",
|
|
317
|
+
"verifier_codex_model": "'"$VERIFIER_CODEX_MODEL"'",
|
|
318
|
+
"verifier_codex_reasoning": "'"$VERIFIER_CODEX_REASONING"'",
|
|
319
|
+
"verify_mode": "'"$VERIFY_MODE"'",
|
|
320
|
+
"consensus_mode": "'"$CONSENSUS_MODE"'",
|
|
321
|
+
"last_result": "'"$last_result"'",
|
|
322
|
+
"consecutive_failures": '"$CONSECUTIVE_FAILURES"',
|
|
323
|
+
"verified_us": '"$verified_us_json"''"$consensus_json"',
|
|
324
|
+
"updated_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"
|
|
325
|
+
}' | atomic_write "$STATUS_FILE"
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
# --- governance.md s7 step 8: Write result log ---
|
|
329
|
+
write_result_log() {
|
|
330
|
+
local iter="$1"
|
|
331
|
+
local result="$2"
|
|
332
|
+
local result_file="$LOGS_DIR/iter-$(printf '%03d' $iter).result.md"
|
|
333
|
+
|
|
334
|
+
local git_diff=""
|
|
335
|
+
if git -C "$ROOT" rev-parse HEAD &>/dev/null; then
|
|
336
|
+
git_diff=$(git -C "$ROOT" diff --stat HEAD 2>/dev/null || echo "(no git diff available)")
|
|
337
|
+
else
|
|
338
|
+
git_diff="(no commits in repo — cannot diff)"
|
|
339
|
+
fi
|
|
340
|
+
# Include untracked new files in result log
|
|
341
|
+
local result_untracked
|
|
342
|
+
result_untracked=$(git -C "$ROOT" ls-files --others --exclude-standard 2>/dev/null | head -20)
|
|
343
|
+
if [[ -n "$result_untracked" ]]; then
|
|
344
|
+
git_diff="${git_diff}
|
|
345
|
+
|
|
346
|
+
Untracked new files:
|
|
347
|
+
${result_untracked}"
|
|
348
|
+
fi
|
|
349
|
+
|
|
350
|
+
{
|
|
351
|
+
echo "# Iteration $iter Result"
|
|
352
|
+
echo ""
|
|
353
|
+
echo "## Status"
|
|
354
|
+
echo "$result [leader-measured]"
|
|
355
|
+
echo ""
|
|
356
|
+
echo "## Files Changed"
|
|
357
|
+
echo '```'
|
|
358
|
+
echo "$git_diff"
|
|
359
|
+
echo '```'
|
|
360
|
+
echo "[git-measured]"
|
|
361
|
+
echo ""
|
|
362
|
+
echo "## Timestamp"
|
|
363
|
+
echo "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
|
364
|
+
} | atomic_write "$result_file"
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
# --- step 7d: Archive iteration artifacts (done-claim + verdict) to logs/ ---
|
|
368
|
+
archive_iter_artifacts() {
|
|
369
|
+
local iter="$1"
|
|
370
|
+
local iter_padded
|
|
371
|
+
iter_padded=$(printf '%03d' "$iter")
|
|
372
|
+
if [[ -f "$DONE_CLAIM_FILE" ]]; then
|
|
373
|
+
cp "$DONE_CLAIM_FILE" "$LOGS_DIR/iter-${iter_padded}-done-claim.json" 2>/dev/null
|
|
374
|
+
fi
|
|
375
|
+
if [[ -f "$VERDICT_FILE" ]]; then
|
|
376
|
+
cp "$VERDICT_FILE" "$LOGS_DIR/iter-${iter_padded}-verify-verdict.json" 2>/dev/null
|
|
377
|
+
fi
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
# --- US-024 (R12 P0): tmux pane / session lifecycle verification ---
|
|
381
|
+
# Returns 0 if pane is alive (#{pane_dead} == 0), non-zero otherwise.
|
|
382
|
+
# Empty pane id is treated as dead so callers don't have to pre-check.
|
|
383
|
+
_verify_pane_alive() {
|
|
384
|
+
local pane_id="$1"
|
|
385
|
+
[[ -z "$pane_id" ]] && return 1
|
|
386
|
+
local dead
|
|
387
|
+
dead=$(tmux display-message -p -t "$pane_id" '#{pane_dead}' 2>/dev/null)
|
|
388
|
+
[[ "$dead" == "0" ]]
|
|
389
|
+
}
|
|
390
|
+
# Returns 0 if the named tmux session is alive, non-zero otherwise.
|
|
391
|
+
_verify_session_alive() {
|
|
392
|
+
local session="$1"
|
|
393
|
+
[[ -z "$session" ]] && return 1
|
|
394
|
+
tmux has-session -t "$session" 2>/dev/null
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
# --- US-022 (R10 P2-J): Normalized PRD US-list extractor ---
|
|
398
|
+
# Recognises `## US-005:`, `## US-005 -`, and bare `## US-005` headings.
|
|
399
|
+
# Returns one US-NNN per line, sorted unique.
|
|
400
|
+
_extract_prd_us_list() {
|
|
401
|
+
local prd_file="$1"
|
|
402
|
+
[[ -f "$prd_file" ]] || return 0
|
|
403
|
+
grep -oE '^##[[:space:]]+US-[0-9]+([[:space:]:-]|$)' "$prd_file" 2>/dev/null \
|
|
404
|
+
| grep -oE 'US-[0-9]+' \
|
|
405
|
+
| sort -u
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
# --- US-022 (R10 P2-J): Quarantine stale iter-signal.json from prior mission ---
|
|
409
|
+
# Worker autonomy is preserved: the signal is moved (not deleted) to
|
|
410
|
+
# .sisyphus/quarantine/iter-signal.<epoch>.json so the operator can recover.
|
|
411
|
+
# Argument 4 (force) skips the PRD scope check and quarantines unconditionally
|
|
412
|
+
# (used for tests / invasive cleanups).
|
|
413
|
+
_quarantine_stale_signal() {
|
|
414
|
+
local signal_file="$1"
|
|
415
|
+
local prd_file="$2"
|
|
416
|
+
local desk="${3:-${DESK:-}}"
|
|
417
|
+
[[ -f "$signal_file" ]] || return 0
|
|
418
|
+
[[ -n "$desk" ]] || return 0
|
|
419
|
+
local stale_us
|
|
420
|
+
stale_us=$(jq -r '.us_id // empty' "$signal_file" 2>/dev/null)
|
|
421
|
+
[[ -z "$stale_us" || "$stale_us" == "ALL" || "$stale_us" == "null" ]] && return 0
|
|
422
|
+
if [[ -f "$prd_file" ]]; then
|
|
423
|
+
local prd_us_list
|
|
424
|
+
prd_us_list=$(_extract_prd_us_list "$prd_file")
|
|
425
|
+
if echo "$prd_us_list" | grep -qx "$stale_us"; then
|
|
426
|
+
return 0
|
|
427
|
+
fi
|
|
428
|
+
fi
|
|
429
|
+
local qdir="$desk/.sisyphus/quarantine"
|
|
430
|
+
mkdir -p "$qdir" 2>/dev/null
|
|
431
|
+
local qfile="$qdir/iter-signal.$(date +%s).json"
|
|
432
|
+
mv "$signal_file" "$qfile" 2>/dev/null && \
|
|
433
|
+
echo "[lane] cross-mission stale us_id ($stale_us) — quarantined to $qfile" >&2
|
|
434
|
+
return 0
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
# --- US-021 (R9 P2-I): Canonical block reason for consecutive_blocks counter ---
|
|
438
|
+
# Strips wrapper prefixes (hygiene_violated:, wrapped:) so the counter compares
|
|
439
|
+
# semantic reasons rather than surface labels. Truncates to 80 chars so noisy
|
|
440
|
+
# tail content doesn't fragment the same logical reason into different keys.
|
|
441
|
+
_canonical_block_reason() {
|
|
442
|
+
local raw="$1"
|
|
443
|
+
echo "$raw" | sed -E 's/^(hygiene_violated:[[:space:]]*|wrapped:[[:space:]]*)//' | cut -c1-80
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
# --- US-018 (R6 P1-F): Test density enforcement (≥3 tests/AC) ---
|
|
447
|
+
# Counts ACs per US in the PRD (lines like `- AC1:`, `- AC2:`) and tests per US
|
|
448
|
+
# in the test-spec (lines like `### Test ` or `**T-`). Emits a warning when any
|
|
449
|
+
# US has < 3 tests / AC. Mode 'strict' returns non-zero so callers can `exit 1`.
|
|
450
|
+
# Reference: governance §7f.
|
|
451
|
+
_lint_test_density() {
|
|
452
|
+
local prd_file="$1"
|
|
453
|
+
local spec_file="$2"
|
|
454
|
+
local mode="${3:-warn}"
|
|
455
|
+
local fail=0
|
|
456
|
+
|
|
457
|
+
[[ -f "$prd_file" ]] || { echo "[lint] PRD missing: $prd_file" >&2; return 0; }
|
|
458
|
+
[[ -f "$spec_file" ]] || { echo "[lint] test-spec missing: $spec_file" >&2; return 0; }
|
|
459
|
+
|
|
460
|
+
local us_list
|
|
461
|
+
us_list=$(grep -oE '^##[[:space:]]+US-[0-9]+' "$prd_file" 2>/dev/null | grep -oE 'US-[0-9]+' | sort -u)
|
|
462
|
+
[[ -z "$us_list" ]] && return 0
|
|
463
|
+
|
|
464
|
+
local audit_dir="${LOGS_DIR:-/tmp}"
|
|
465
|
+
local audit_file="$audit_dir/test-density-audit.jsonl"
|
|
466
|
+
[[ -d "$audit_dir" ]] || audit_file="/tmp/test-density-audit.jsonl"
|
|
467
|
+
|
|
468
|
+
local us
|
|
469
|
+
for us in ${(f)us_list}; do
|
|
470
|
+
# ACs in this US block of the PRD
|
|
471
|
+
local ac_count
|
|
472
|
+
ac_count=$(awk -v us="$us" '
|
|
473
|
+
$0 ~ "^##[[:space:]]+"us"([[:space:]]|:|-|$)" { in_us=1; next }
|
|
474
|
+
in_us && /^##[[:space:]]+US-[0-9]+/ { in_us=0 }
|
|
475
|
+
in_us && /^[[:space:]]*-[[:space:]]+AC[0-9]+/ { c++ }
|
|
476
|
+
END { print c+0 }
|
|
477
|
+
' "$prd_file")
|
|
478
|
+
|
|
479
|
+
local test_count
|
|
480
|
+
test_count=$(awk -v us="$us" '
|
|
481
|
+
$0 ~ "^##[[:space:]]+"us"([[:space:]]|:|-|$)" { in_us=1; next }
|
|
482
|
+
in_us && /^##[[:space:]]+US-[0-9]+/ { in_us=0 }
|
|
483
|
+
in_us && (/^###[[:space:]]+Test[[:space:]]/ || /^\*\*T-/) { c++ }
|
|
484
|
+
END { print c+0 }
|
|
485
|
+
' "$spec_file")
|
|
486
|
+
|
|
487
|
+
[[ "$ac_count" -eq 0 ]] && continue
|
|
488
|
+
|
|
489
|
+
local required=$(( ac_count * 3 ))
|
|
490
|
+
if [[ "$test_count" -lt "$required" ]]; then
|
|
491
|
+
fail=1
|
|
492
|
+
local msg="Test density warning: $us has $test_count tests for $ac_count ACs (ratio=$test_count/$ac_count, required >=3 per AC = $required)"
|
|
493
|
+
echo "$msg" >&2
|
|
494
|
+
printf '{"event":"test_density_warning","us_id":"%s","ac_count":%s,"test_count":%s,"required":%s,"timestamp":"%s"}\n' \
|
|
495
|
+
"$us" "$ac_count" "$test_count" "$required" "$(date -u +%Y-%m-%dT%H:%M:%SZ)" >> "$audit_file"
|
|
496
|
+
fi
|
|
497
|
+
done
|
|
498
|
+
|
|
499
|
+
if (( fail == 1 )); then
|
|
500
|
+
if [[ "$mode" == "strict" ]]; then
|
|
501
|
+
echo "[lint] Test density STRICT mode — exit 1 (run without --test-density-strict to continue)" >&2
|
|
502
|
+
return 1
|
|
503
|
+
fi
|
|
504
|
+
echo "[lint] Test density WARN — see $audit_file (use --test-density-strict to fail init)" >&2
|
|
505
|
+
fi
|
|
506
|
+
return 0
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
# --- US-017 (R5 P0-D): Append A4 fallback audit entry ---
|
|
510
|
+
# Worker forgot to write iter-signal.json after done-claim → A4 fallback auto-generated a verify signal.
|
|
511
|
+
# This helper records the event for debugging context loss tracking.
|
|
512
|
+
# Per-mission ratio < 10% recommended (governance §1f).
|
|
513
|
+
_emit_a4_fallback_audit() {
|
|
514
|
+
local us_id="${1:-UNKNOWN}"
|
|
515
|
+
local iter="${2:-0}"
|
|
516
|
+
local source_path="${3:-inline}"
|
|
517
|
+
local audit_dir="${LOGS_DIR:-/tmp}"
|
|
518
|
+
[[ -d "$audit_dir" ]] || return 0
|
|
519
|
+
local audit_file="$audit_dir/a4-fallback-audit.jsonl"
|
|
520
|
+
local ts
|
|
521
|
+
ts=$(date -u +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || echo "")
|
|
522
|
+
printf '{"event":"a4_fallback","iter":%s,"us_id":"%s","source":"%s","timestamp":"%s"}\n' \
|
|
523
|
+
"$iter" "$us_id" "$source_path" "$ts" >> "$audit_file"
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
# --- AC5: Write per-iteration cost estimate to cost-log.jsonl ---
|
|
527
|
+
write_cost_log() {
|
|
528
|
+
local iter="$1"
|
|
529
|
+
local iter_padded
|
|
530
|
+
iter_padded=$(printf '%03d' "$iter")
|
|
531
|
+
|
|
532
|
+
local prompt_bytes=0 claim_bytes=0 verdict_bytes=0
|
|
533
|
+
local worker_prompt_file="$LOGS_DIR/iter-${iter_padded}.worker-prompt.md"
|
|
534
|
+
[[ -f "$worker_prompt_file" ]] && prompt_bytes=$(wc -c < "$worker_prompt_file" 2>/dev/null || echo 0)
|
|
535
|
+
[[ -f "$DONE_CLAIM_FILE" ]] && claim_bytes=$(wc -c < "$DONE_CLAIM_FILE" 2>/dev/null || echo 0)
|
|
536
|
+
[[ -f "$VERDICT_FILE" ]] && verdict_bytes=$(wc -c < "$VERDICT_FILE" 2>/dev/null || echo 0)
|
|
537
|
+
|
|
538
|
+
local estimated_tokens=$(( (prompt_bytes + claim_bytes + verdict_bytes) / 4 ))
|
|
539
|
+
|
|
540
|
+
# AC1: per-phase timing fields
|
|
541
|
+
local worker_start_time="" worker_end_time="" worker_duration_s=0
|
|
542
|
+
local verifier_start_time="" verifier_end_time="" verifier_duration_s=0
|
|
543
|
+
if [[ -n "${ITER_WORKER_START:-}" ]]; then
|
|
544
|
+
worker_start_time=$(date -u -r "$ITER_WORKER_START" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || echo "")
|
|
545
|
+
worker_end_time=$(date -u -r "${ITER_WORKER_END:-$ITER_WORKER_START}" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || echo "")
|
|
546
|
+
worker_duration_s=$(( ${ITER_WORKER_END:-$ITER_WORKER_START} - ITER_WORKER_START ))
|
|
547
|
+
fi
|
|
548
|
+
if [[ -n "${ITER_VERIFIER_START:-}" ]]; then
|
|
549
|
+
verifier_start_time=$(date -u -r "$ITER_VERIFIER_START" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || echo "")
|
|
550
|
+
verifier_end_time=$(date -u -r "${ITER_VERIFIER_END:-$ITER_VERIFIER_START}" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || echo "")
|
|
551
|
+
verifier_duration_s=$(( ${ITER_VERIFIER_END:-$ITER_VERIFIER_START} - ITER_VERIFIER_START ))
|
|
552
|
+
fi
|
|
553
|
+
|
|
554
|
+
# AC2: consensus mode per-engine timing
|
|
555
|
+
local consensus_fields=""
|
|
556
|
+
if [[ -n "${ITER_VERIFIER_CLAUDE_DURATION_S:-}" ]]; then
|
|
557
|
+
consensus_fields="${consensus_fields}"',"verifier_claude_duration_s":'"${ITER_VERIFIER_CLAUDE_DURATION_S}"
|
|
558
|
+
fi
|
|
559
|
+
if [[ -n "${ITER_VERIFIER_CODEX_DURATION_S:-}" ]]; then
|
|
560
|
+
consensus_fields="${consensus_fields}"',"verifier_codex_duration_s":'"${ITER_VERIFIER_CODEX_DURATION_S}"
|
|
561
|
+
fi
|
|
562
|
+
|
|
563
|
+
# US-023 R11 P2-K: emit a `note` field so empty-inputs entries are distinguishable
|
|
564
|
+
# from broken logging. The audit pipeline can branch on `note == 'no_actual_usage_recorded'`
|
|
565
|
+
# to know that the iteration ran but token counts were not captured (tmux/estimated path).
|
|
566
|
+
local cost_note="${COST_LOG_NOTE:-}"
|
|
567
|
+
if [[ -z "$cost_note" ]] && (( prompt_bytes == 0 && claim_bytes == 0 && verdict_bytes == 0 )); then
|
|
568
|
+
cost_note="no_actual_usage_recorded"
|
|
569
|
+
fi
|
|
570
|
+
|
|
571
|
+
echo '{"iteration":'"$iter"',"estimated_tokens":'"$estimated_tokens"',"token_source":"estimated","prompt_bytes":'"$prompt_bytes"',"claim_bytes":'"$claim_bytes"',"verdict_bytes":'"$verdict_bytes"',"worker_start_time":"'"$worker_start_time"'","worker_end_time":"'"$worker_end_time"'","worker_duration_s":'"$worker_duration_s"',"verifier_start_time":"'"$verifier_start_time"'","verifier_end_time":"'"$verifier_end_time"'","verifier_duration_s":'"$verifier_duration_s"''"$consensus_fields"',"note":"'"$cost_note"'","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' >> "$COST_LOG"
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
# --- Analytics: write per-iteration structured data to campaign.jsonl (always-on) ---
|
|
575
|
+
write_campaign_jsonl() {
|
|
576
|
+
local iter="$1"
|
|
577
|
+
local us_id="${2:-unknown}"
|
|
578
|
+
local verdict="${3:-unknown}"
|
|
579
|
+
|
|
580
|
+
local worker_duration_s=0
|
|
581
|
+
local verifier_duration_s=0
|
|
582
|
+
if [[ -n "${ITER_WORKER_START:-}" ]]; then
|
|
583
|
+
worker_duration_s=$(( ${ITER_WORKER_END:-$(date +%s)} - ITER_WORKER_START ))
|
|
584
|
+
fi
|
|
585
|
+
if [[ -n "${ITER_VERIFIER_START:-}" ]]; then
|
|
586
|
+
verifier_duration_s=$(( ${ITER_VERIFIER_END:-$(date +%s)} - ITER_VERIFIER_START ))
|
|
587
|
+
fi
|
|
588
|
+
|
|
589
|
+
# Build us_fail_history JSON object from associative array
|
|
590
|
+
local us_fail_history_json="{}"
|
|
591
|
+
if (( ${#US_FAIL_HISTORY[@]} > 0 )); then
|
|
592
|
+
us_fail_history_json="{"
|
|
593
|
+
local first=1
|
|
594
|
+
for key in "${(@k)US_FAIL_HISTORY}"; do
|
|
595
|
+
(( first )) || us_fail_history_json+=","
|
|
596
|
+
us_fail_history_json+="\"$key\":${US_FAIL_HISTORY[$key]}"
|
|
597
|
+
first=0
|
|
598
|
+
done
|
|
599
|
+
us_fail_history_json+="}"
|
|
600
|
+
fi
|
|
601
|
+
|
|
602
|
+
jq -nc \
|
|
603
|
+
--argjson iter "$iter" \
|
|
604
|
+
--arg us_id "$us_id" \
|
|
605
|
+
--arg worker_model "$WORKER_MODEL" \
|
|
606
|
+
--arg worker_engine "$WORKER_ENGINE" \
|
|
607
|
+
--arg verifier_engine "$VERIFIER_ENGINE" \
|
|
608
|
+
--arg claude_verdict "${CLAUDE_VERDICT:-$verdict}" \
|
|
609
|
+
--arg codex_verdict "${CODEX_VERDICT:-N/A}" \
|
|
610
|
+
--arg consensus_mode "$CONSENSUS_MODE" \
|
|
611
|
+
--argjson consecutive_failures "$CONSECUTIVE_FAILURES" \
|
|
612
|
+
--argjson model_upgraded "${_MODEL_UPGRADED:-0}" \
|
|
613
|
+
--argjson us_fail_history "$us_fail_history_json" \
|
|
614
|
+
--argjson duration_worker_s "$worker_duration_s" \
|
|
615
|
+
--argjson duration_verifier_s "$verifier_duration_s" \
|
|
616
|
+
--arg project_root "$ROOT" \
|
|
617
|
+
--arg slug "$SLUG" \
|
|
618
|
+
--arg timestamp "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
|
619
|
+
'{iter: $iter, us_id: $us_id, worker_model: $worker_model, worker_engine: $worker_engine, verifier_engine: $verifier_engine, claude_verdict: $claude_verdict, codex_verdict: $codex_verdict, consensus_mode: $consensus_mode, consecutive_failures: $consecutive_failures, model_upgraded: $model_upgraded, us_fail_history: $us_fail_history, duration_worker_s: $duration_worker_s, duration_verifier_s: $duration_verifier_s, project_root: $project_root, slug: $slug, timestamp: $timestamp}' \
|
|
620
|
+
>> "$CAMPAIGN_JSONL"
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
# --- AC4: Generate campaign-report.md on all terminal states ---
|
|
624
|
+
generate_campaign_report() {
|
|
625
|
+
# Guard: idempotent — only generate once per campaign run
|
|
626
|
+
if (( CAMPAIGN_REPORT_GENERATED )); then return 0; fi
|
|
627
|
+
CAMPAIGN_REPORT_GENERATED=1
|
|
628
|
+
|
|
629
|
+
local final_status="UNKNOWN"
|
|
630
|
+
local blocked_reason=""
|
|
631
|
+
local blocked_category=""
|
|
632
|
+
if [[ -f "$COMPLETE_SENTINEL" ]]; then final_status="COMPLETE"
|
|
633
|
+
elif [[ -f "$BLOCKED_SENTINEL" ]]; then
|
|
634
|
+
final_status="BLOCKED"
|
|
635
|
+
# governance §1f BLOCKED Surfacing (4-channel): markdown sentinel +
|
|
636
|
+
# JSON sidecar + status + console + report. Pull both Reason and
|
|
637
|
+
# Category lines for the report. Tolerate legacy sentinels missing
|
|
638
|
+
# either field (back-compat).
|
|
639
|
+
blocked_reason=$(grep -m1 -E '^[Rr]eason:[[:space:]]*' "$BLOCKED_SENTINEL" 2>/dev/null \
|
|
640
|
+
| sed -E 's/^[Rr]eason:[[:space:]]*//' \
|
|
641
|
+
|| true)
|
|
642
|
+
blocked_category=$(grep -m1 -E '^[Cc]ategory:[[:space:]]*' "$BLOCKED_SENTINEL" 2>/dev/null \
|
|
643
|
+
| sed -E 's/^[Cc]ategory:[[:space:]]*//' \
|
|
644
|
+
|| true)
|
|
645
|
+
else final_status="TIMEOUT"; fi
|
|
646
|
+
|
|
647
|
+
local report_file="$LOGS_DIR/campaign-report.md"
|
|
648
|
+
|
|
649
|
+
# AC9: Version existing report before writing new one
|
|
650
|
+
if [[ -f "$report_file" ]]; then
|
|
651
|
+
local v=1
|
|
652
|
+
while [[ -f "${report_file%.md}-v${v}.md" ]]; do (( v++ )); done
|
|
653
|
+
mv "$report_file" "${report_file%.md}-v${v}.md"
|
|
654
|
+
fi
|
|
655
|
+
|
|
656
|
+
local end_time
|
|
657
|
+
end_time=$(date +%s)
|
|
658
|
+
local elapsed=$(( end_time - START_TIME ))
|
|
659
|
+
|
|
660
|
+
local baseline_commit_val="${BASELINE_COMMIT:-none}"
|
|
661
|
+
local files_changed=""
|
|
662
|
+
if [[ "$baseline_commit_val" != "none" ]]; then
|
|
663
|
+
files_changed=$(git -C "$ROOT" diff --stat "${baseline_commit_val}" 2>/dev/null || echo "(git diff unavailable)")
|
|
664
|
+
elif git -C "$ROOT" rev-parse HEAD &>/dev/null; then
|
|
665
|
+
files_changed=$(git -C "$ROOT" diff --stat HEAD 2>/dev/null || echo "(git diff unavailable)")
|
|
666
|
+
else
|
|
667
|
+
files_changed="(no commits in repo — cannot diff)"
|
|
668
|
+
fi
|
|
669
|
+
# Include untracked new files
|
|
670
|
+
local untracked
|
|
671
|
+
untracked=$(git -C "$ROOT" ls-files --others --exclude-standard 2>/dev/null | head -20)
|
|
672
|
+
if [[ -n "$untracked" ]]; then
|
|
673
|
+
files_changed="${files_changed}
|
|
674
|
+
|
|
675
|
+
Untracked new files:
|
|
676
|
+
${untracked}"
|
|
677
|
+
fi
|
|
678
|
+
|
|
679
|
+
local sv_summary=""
|
|
680
|
+
if (( WITH_SELF_VERIFICATION )); then
|
|
681
|
+
local sv_report
|
|
682
|
+
sv_report=$(ls -t "$LOGS_DIR"/self-verification-report-*.md 2>/dev/null | head -1)
|
|
683
|
+
if [[ -n "$sv_report" ]]; then
|
|
684
|
+
sv_summary="See: $sv_report"
|
|
685
|
+
else
|
|
686
|
+
sv_summary="SV report generation pending — will be appended after this report."
|
|
687
|
+
fi
|
|
688
|
+
elif [[ "${WITH_SELF_VERIFICATION_REQUESTED:-0}" == "1" ]]; then
|
|
689
|
+
sv_summary="N/A — --with-self-verification requested but skipped (reason: ${SV_SKIPPED_REASON:-unknown})"
|
|
690
|
+
else
|
|
691
|
+
sv_summary="N/A — --with-self-verification not enabled"
|
|
692
|
+
fi
|
|
693
|
+
|
|
694
|
+
{
|
|
695
|
+
echo "# Campaign Report: $SLUG"
|
|
696
|
+
echo ""
|
|
697
|
+
echo "Generated: $(date -u +%Y-%m-%dT%H:%M:%SZ) | Status: $final_status | Iterations: $ITERATION"
|
|
698
|
+
echo ""
|
|
699
|
+
echo "## Objective"
|
|
700
|
+
local prd_file="$DESK/plans/prd-$SLUG.md"
|
|
701
|
+
if [[ -f "$prd_file" ]]; then
|
|
702
|
+
grep '^## Objective' -A3 "$prd_file" 2>/dev/null | tail -n +2 | head -3
|
|
703
|
+
else
|
|
704
|
+
echo "(PRD not found)"
|
|
705
|
+
fi
|
|
706
|
+
echo ""
|
|
707
|
+
echo "## Execution Summary"
|
|
708
|
+
echo "- Terminal state: $final_status"
|
|
709
|
+
if [[ -n "$blocked_reason" ]]; then
|
|
710
|
+
echo "- Blocked reason: $blocked_reason"
|
|
711
|
+
fi
|
|
712
|
+
if [[ -n "$blocked_category" ]]; then
|
|
713
|
+
echo "- Blocked category: $blocked_category"
|
|
714
|
+
fi
|
|
715
|
+
echo "- Iterations run: $ITERATION / $MAX_ITER"
|
|
716
|
+
echo "- Elapsed: ${elapsed}s"
|
|
717
|
+
echo "- Worker model: $WORKER_MODEL ($WORKER_ENGINE)"
|
|
718
|
+
echo "- Verifier model: $VERIFIER_MODEL ($VERIFIER_ENGINE)"
|
|
719
|
+
echo "- Consensus: mode=$CONSENSUS_MODE model=$CONSENSUS_MODEL final_model=$FINAL_CONSENSUS_MODEL"
|
|
720
|
+
echo ""
|
|
721
|
+
echo "## US Status"
|
|
722
|
+
echo "- Verified: ${VERIFIED_US:-none}"
|
|
723
|
+
echo "- Consecutive failures at end: $CONSECUTIVE_FAILURES"
|
|
724
|
+
echo ""
|
|
725
|
+
echo "## Verification Results"
|
|
726
|
+
local ri=1
|
|
727
|
+
while (( ri <= ITERATION )); do
|
|
728
|
+
local iter_dc="$LOGS_DIR/iter-$(printf '%03d' $ri)-done-claim.json"
|
|
729
|
+
if [[ -f "$iter_dc" ]]; then
|
|
730
|
+
local us_id
|
|
731
|
+
us_id=$(jq -r '.us_id // "unknown"' "$iter_dc" 2>/dev/null)
|
|
732
|
+
echo "- $(basename "$iter_dc"): us_id=$us_id"
|
|
733
|
+
fi
|
|
734
|
+
(( ri++ ))
|
|
735
|
+
done
|
|
736
|
+
echo ""
|
|
737
|
+
echo "## Issues Encountered"
|
|
738
|
+
local fi_found=0
|
|
739
|
+
local fi_i=1
|
|
740
|
+
while (( fi_i <= ITERATION )); do
|
|
741
|
+
local fix_f="$LOGS_DIR/iter-$(printf '%03d' $fi_i).fix-contract.md"
|
|
742
|
+
if [[ -f "$fix_f" ]]; then
|
|
743
|
+
echo "- $(basename "$fix_f")"
|
|
744
|
+
fi_found=1
|
|
745
|
+
fi
|
|
746
|
+
(( fi_i++ ))
|
|
747
|
+
done
|
|
748
|
+
(( fi_found == 0 )) && echo "- None"
|
|
749
|
+
echo ""
|
|
750
|
+
echo "## Cost & Performance"
|
|
751
|
+
if [[ -f "$COST_LOG" ]]; then
|
|
752
|
+
local total_tokens=0
|
|
753
|
+
while IFS= read -r line; do
|
|
754
|
+
local t
|
|
755
|
+
t=$(echo "$line" | jq -r '.estimated_tokens // 0' 2>/dev/null || echo 0)
|
|
756
|
+
total_tokens=$(( total_tokens + t ))
|
|
757
|
+
done < "$COST_LOG"
|
|
758
|
+
echo "- Total estimated tokens: $total_tokens (source: estimated, tmux mode)"
|
|
759
|
+
echo "- See: cost-log.jsonl for per-iteration breakdown"
|
|
760
|
+
else
|
|
761
|
+
echo "- No cost data available"
|
|
762
|
+
fi
|
|
763
|
+
echo ""
|
|
764
|
+
echo "## SV Summary"
|
|
765
|
+
echo "$sv_summary"
|
|
766
|
+
echo ""
|
|
767
|
+
echo "## Files Changed"
|
|
768
|
+
echo '```'
|
|
769
|
+
echo "$files_changed"
|
|
770
|
+
echo '```'
|
|
771
|
+
echo "Note: Files Changed may include pre-existing uncommitted changes if the campaign started in a dirty worktree."
|
|
772
|
+
echo ""
|
|
773
|
+
echo "## Suggested Next Actions"
|
|
774
|
+
if [[ "$final_status" == "COMPLETE" ]]; then
|
|
775
|
+
echo "- Review verified US list and plan next feature campaign or next cycle"
|
|
776
|
+
echo "- Consider re-run with --mode improve for quality refinement"
|
|
777
|
+
echo "- Archive campaign artifacts and update project documentation"
|
|
778
|
+
elif [[ "$final_status" == "BLOCKED" ]]; then
|
|
779
|
+
echo "- Review PRD acceptance criteria for the failing US"
|
|
780
|
+
echo "- Check circuit breaker history (consecutive failures: $CONSECUTIVE_FAILURES)"
|
|
781
|
+
echo "- Consider relaxing verifier criteria if false-negative pattern detected"
|
|
782
|
+
elif [[ "$final_status" == "TIMEOUT" ]]; then
|
|
783
|
+
echo "- Increase --max-iter to allow more iterations for completion"
|
|
784
|
+
echo "- Reduce scope by splitting remaining US into a follow-up campaign"
|
|
785
|
+
echo "- Review last iteration done-claim for partial progress"
|
|
786
|
+
fi
|
|
787
|
+
} | atomic_write "$report_file"
|
|
788
|
+
|
|
789
|
+
log "Campaign report written: $report_file"
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
generate_sv_report() {
|
|
793
|
+
# AC1-boundary: SV_REPORT_GENERATED guard (init + check + set = 3 occurrences)
|
|
794
|
+
if (( SV_REPORT_GENERATED )); then return 0; fi
|
|
795
|
+
|
|
796
|
+
# AC3-negative: early return if ! WITH_SELF_VERIFICATION flag not set
|
|
797
|
+
if (( ! WITH_SELF_VERIFICATION )); then return 0; fi
|
|
798
|
+
|
|
799
|
+
# Defense-in-depth: skip in tmux runner even if WITH_SELF_VERIFICATION leaks through
|
|
800
|
+
# (claude --print hangs without TTY/stdin in tmux pane; SV is Agent-mode only)
|
|
801
|
+
if [[ -n "${TMUX:-}" ]]; then
|
|
802
|
+
log "SV report skipped: tmux runner detected (Agent-mode only feature)"
|
|
803
|
+
return 0
|
|
804
|
+
fi
|
|
805
|
+
|
|
806
|
+
SV_REPORT_GENERATED=1
|
|
807
|
+
|
|
808
|
+
# AC4: check claude CLI availability — graceful degradation, not exit 1
|
|
809
|
+
if ! command -v claude &>/dev/null; then
|
|
810
|
+
echo "SV report generation failed: claude CLI not found" >> "$LOGS_DIR/campaign-report.md"
|
|
811
|
+
return 0
|
|
812
|
+
fi
|
|
813
|
+
|
|
814
|
+
# AC2: versioning — find next available sv_version slot (in logs dir)
|
|
815
|
+
local sv_version=1
|
|
816
|
+
while [[ -f "$LOGS_DIR/self-verification-report-$(printf '%03d' $sv_version).md" ]]; do
|
|
817
|
+
(( sv_version++ ))
|
|
818
|
+
done
|
|
819
|
+
local sv_report_file="$LOGS_DIR/self-verification-report-$(printf '%03d' $sv_version).md"
|
|
820
|
+
|
|
821
|
+
log "Generating SV report: $(basename "$sv_report_file")"
|
|
822
|
+
|
|
823
|
+
# AC5: configurable timeout with in-process watchdog
|
|
824
|
+
local _sv_timeout_secs="${_SV_TIMEOUT_SECS:-300}"
|
|
825
|
+
local _sv_timeout_flag=0
|
|
826
|
+
local _sv_timeout_file="$LOGS_DIR/.sv_timeout_${$}.tmp"
|
|
827
|
+
rm -f "$_sv_timeout_file"
|
|
828
|
+
|
|
829
|
+
# Spawn claude CLI in background — write to sv_report_file
|
|
830
|
+
# </dev/null prevents the spawned process from blocking on inherited stdin (e.g. tmux pane)
|
|
831
|
+
claude --print "Analyze campaign artifacts in $LOGS_DIR and generate a self-verification report with sections: 1. Automated Validation Summary, 2. Failure Deep Dive, 3. Worker Process Quality, 4. Verifier Judgment Quality, 5. AC Lifecycle, 6. Test-Spec Adherence, 7. Patterns: Strengths & Weaknesses, 8. Recommendations for Next Cycle, 9. Cost & Performance, 10. Blind Spots." \
|
|
832
|
+
</dev/null > "$sv_report_file" 2>/dev/null &
|
|
833
|
+
local _sv_pid=$!
|
|
834
|
+
|
|
835
|
+
# AC5: watchdog — signals timeout file THEN kills _sv_pid after _sv_timeout_secs
|
|
836
|
+
local _sv_watchdog
|
|
837
|
+
(
|
|
838
|
+
sleep "$_sv_timeout_secs"
|
|
839
|
+
if kill -0 "$_sv_pid" 2>/dev/null; then
|
|
840
|
+
touch "$_sv_timeout_file"
|
|
841
|
+
kill "$_sv_pid" 2>/dev/null
|
|
842
|
+
fi
|
|
843
|
+
) &
|
|
844
|
+
_sv_watchdog=$!
|
|
845
|
+
|
|
846
|
+
wait "$_sv_pid"
|
|
847
|
+
local _sv_exit=$?
|
|
848
|
+
kill "$_sv_watchdog" 2>/dev/null
|
|
849
|
+
wait "$_sv_watchdog" 2>/dev/null
|
|
850
|
+
|
|
851
|
+
# AC5: detect timeout — exit code 124 or watchdog file present
|
|
852
|
+
if [[ "$_sv_exit" == 124 ]] || [[ -f "$_sv_timeout_file" ]]; then
|
|
853
|
+
_sv_timeout_flag=1
|
|
854
|
+
rm -f "$_sv_timeout_file"
|
|
855
|
+
local _timeout_msg="SV report generation TIMEOUT: exceeded ${_sv_timeout_secs}s"
|
|
856
|
+
echo "$_timeout_msg" >> "$sv_report_file"
|
|
857
|
+
echo "$_timeout_msg" >> "$LOGS_DIR/campaign-report.md"
|
|
858
|
+
log "$_timeout_msg"
|
|
859
|
+
return 0
|
|
860
|
+
fi
|
|
861
|
+
|
|
862
|
+
# On success: append reference to campaign-report (full path, cross-directory)
|
|
863
|
+
echo "See: $sv_report_file" >> "$LOGS_DIR/campaign-report.md"
|
|
864
|
+
log "SV report written: $sv_report_file"
|
|
865
|
+
return 0
|
|
866
|
+
}
|
|
867
|
+
|
|
868
|
+
# =============================================================================
|
|
869
|
+
# Sentinel Writers
|
|
870
|
+
# =============================================================================
|
|
871
|
+
|
|
872
|
+
# --- governance.md s7: Only the Leader writes sentinels ---
|
|
873
|
+
write_complete_sentinel() {
|
|
874
|
+
local summary="$1"
|
|
875
|
+
# Optional 2nd arg: us_id (defaults to ALL). Same first-line contract
|
|
876
|
+
# as writeSentinel(complete) on the Node side so wrappers can parse
|
|
877
|
+
# `head -1 | awk '{print $2}'` consistently.
|
|
878
|
+
local us_id="${2:-${CURRENT_US:-ALL}}"
|
|
879
|
+
echo "COMPLETE: $us_id
|
|
880
|
+
Summary: $summary
|
|
881
|
+
|
|
882
|
+
# Campaign Complete
|
|
883
|
+
|
|
884
|
+
Completed at iteration $ITERATION.
|
|
885
|
+
|
|
886
|
+
Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" | atomic_write "$COMPLETE_SENTINEL"
|
|
887
|
+
log "COMPLETE sentinel written: $COMPLETE_SENTINEL"
|
|
888
|
+
}
|
|
889
|
+
|
|
890
|
+
# P1-D Cross-US dependency detection: scan a verdict summary or worker
|
|
891
|
+
# signal body for cross-US dependency tokens. Returns "cross_us_dep" when
|
|
892
|
+
# any token matches, "metric_failure" otherwise. governance.md §1f locks
|
|
893
|
+
# the token list — keep this in sync with that section.
|
|
894
|
+
# English: "depends on US-", "blocking US-", "awaits US-",
|
|
895
|
+
# "post-iter US-", "requires US-", "cross-US"
|
|
896
|
+
# Korean: "US-NNN 산출물", "신규 US-", "post-iter"
|
|
897
|
+
_classify_cross_us_or_metric() {
|
|
898
|
+
local text="$1"
|
|
899
|
+
if echo "$text" | grep -qE 'depends on US-|blocking US-|awaits US-|post-iter US-|requires US-[0-9]+|cross-US|US-[0-9]+ 산출물|신규 US-|post-iter'; then
|
|
900
|
+
echo "cross_us_dep"
|
|
901
|
+
else
|
|
902
|
+
echo "metric_failure"
|
|
903
|
+
fi
|
|
904
|
+
}
|
|
905
|
+
|
|
906
|
+
# P1-D Failure Taxonomy: derive (recoverable, suggested_action) from
|
|
907
|
+
# reason_category. governance.md §1f defines the 6 reason_category values
|
|
908
|
+
# (metric_failure, cross_us_dep, context_limit, infra_failure, repeat_axis,
|
|
909
|
+
# mission_abort). wrapper MUST branch on reason_category; failure_category
|
|
910
|
+
# is diagnostic only.
|
|
911
|
+
_blocked_recoverable_for_category() {
|
|
912
|
+
case "$1" in
|
|
913
|
+
metric_failure|cross_us_dep|infra_failure) echo "true" ;;
|
|
914
|
+
context_limit|repeat_axis|mission_abort) echo "false" ;;
|
|
915
|
+
*) echo "false" ;;
|
|
916
|
+
esac
|
|
917
|
+
}
|
|
918
|
+
_blocked_action_for_category() {
|
|
919
|
+
case "$1" in
|
|
920
|
+
metric_failure|cross_us_dep) echo "retry_after_fix" ;;
|
|
921
|
+
infra_failure) echo "restart" ;;
|
|
922
|
+
context_limit|repeat_axis) echo "next_mission_chain" ;;
|
|
923
|
+
mission_abort) echo "terminal_alert" ;;
|
|
924
|
+
*) echo "terminal_alert" ;;
|
|
925
|
+
esac
|
|
926
|
+
}
|
|
927
|
+
|
|
928
|
+
write_blocked_sentinel() {
|
|
929
|
+
local reason="$1"
|
|
930
|
+
# Optional 2nd arg: us_id (defaults to ALL).
|
|
931
|
+
local us_id="${2:-${CURRENT_US:-ALL}}"
|
|
932
|
+
# Optional 3rd arg: reason_category (default metric_failure).
|
|
933
|
+
# See governance.md §1f Failure Taxonomy for the 6-value enum.
|
|
934
|
+
local category="${3:-metric_failure}"
|
|
935
|
+
local recoverable suggested_action json_path
|
|
936
|
+
recoverable=$(_blocked_recoverable_for_category "$category")
|
|
937
|
+
suggested_action=$(_blocked_action_for_category "$category")
|
|
938
|
+
json_path="${BLOCKED_SENTINEL%.md}.json"
|
|
939
|
+
local now_iso
|
|
940
|
+
now_iso=$(date -u +%Y-%m-%dT%H:%M:%SZ)
|
|
941
|
+
|
|
942
|
+
# US-020 R8 P1-H: Blocked exit hygiene auto-check.
|
|
943
|
+
# Worker is required to update memory.md (Blocking History) and latest.md (Known Issues)
|
|
944
|
+
# before signalling blocked. We compare those file mtimes against the sentinel write time;
|
|
945
|
+
# if either is older than 5 minutes the worker skipped the hygiene step and we tag the
|
|
946
|
+
# JSON sidecar so audit pipelines (governance §1f, 5th channel) can see it.
|
|
947
|
+
local hygiene_violated=false
|
|
948
|
+
local hygiene_now hygiene_mem_mt hygiene_lat_mt
|
|
949
|
+
hygiene_now=$(date +%s 2>/dev/null || echo 0)
|
|
950
|
+
if [[ -n "${DESK:-}" && -n "${SLUG:-}" && "$hygiene_now" -gt 0 ]]; then
|
|
951
|
+
local mem_file="$DESK/memos/$SLUG-memory.md"
|
|
952
|
+
local lat_file="$DESK/context/$SLUG-latest.md"
|
|
953
|
+
for hf in "$mem_file" "$lat_file"; do
|
|
954
|
+
if [[ -f "$hf" ]]; then
|
|
955
|
+
local f_mt
|
|
956
|
+
f_mt=$(stat -f %m "$hf" 2>/dev/null || stat -c %Y "$hf" 2>/dev/null || echo 0)
|
|
957
|
+
if (( hygiene_now - f_mt > 300 )); then
|
|
958
|
+
hygiene_violated=true
|
|
959
|
+
break
|
|
960
|
+
fi
|
|
961
|
+
fi
|
|
962
|
+
done
|
|
963
|
+
fi
|
|
964
|
+
|
|
965
|
+
# P1-D Write Order Contract (governance.md §1f):
|
|
966
|
+
# 1. JSON sidecar FIRST (wrapper-friendly, jq parseable).
|
|
967
|
+
# 2. markdown sentinel SECOND (legacy, watched by older wrappers).
|
|
968
|
+
# Invariant: markdown exists ⇒ JSON exists. Wrappers watch markdown,
|
|
969
|
+
# then read JSON; if JSON not yet visible (rare), retry up to 5×50ms.
|
|
970
|
+
# atomic_write provides per-file rename atomicity; cross-file ordering
|
|
971
|
+
# is enforced by the explicit two-call sequence below.
|
|
972
|
+
jq -n \
|
|
973
|
+
--arg sv "2.0" \
|
|
974
|
+
--arg slug "${SLUG:-unknown}" \
|
|
975
|
+
--arg us_id "$us_id" \
|
|
976
|
+
--argjson iter "${ITERATION:-0}" \
|
|
977
|
+
--arg utc "$now_iso" \
|
|
978
|
+
--arg category "$category" \
|
|
979
|
+
--arg detail "$reason" \
|
|
980
|
+
--argjson recoverable "$recoverable" \
|
|
981
|
+
--arg action "$suggested_action" \
|
|
982
|
+
--argjson hygiene "$hygiene_violated" \
|
|
983
|
+
'{
|
|
984
|
+
schema_version: $sv,
|
|
985
|
+
slug: $slug,
|
|
986
|
+
us_id: $us_id,
|
|
987
|
+
blocked_at_iter: $iter,
|
|
988
|
+
blocked_at_utc: $utc,
|
|
989
|
+
reason_category: $category,
|
|
990
|
+
reason_detail: $detail,
|
|
991
|
+
failure_category: null,
|
|
992
|
+
recoverable: $recoverable,
|
|
993
|
+
suggested_action: $action,
|
|
994
|
+
meta: { blocked_hygiene_violated: $hygiene }
|
|
995
|
+
}' | atomic_write "$json_path"
|
|
996
|
+
|
|
997
|
+
echo "BLOCKED: $us_id
|
|
998
|
+
Reason: $reason
|
|
999
|
+
Category: $category
|
|
1000
|
+
|
|
1001
|
+
# Campaign Blocked
|
|
1002
|
+
|
|
1003
|
+
Blocked at iteration $ITERATION.
|
|
1004
|
+
|
|
1005
|
+
Timestamp: $now_iso" | atomic_write "$BLOCKED_SENTINEL"
|
|
1006
|
+
|
|
1007
|
+
log_error "Campaign BLOCKED: [$category] $reason"
|
|
1008
|
+
log "BLOCKED sentinel written: $BLOCKED_SENTINEL"
|
|
1009
|
+
log "BLOCKED sidecar written: $json_path"
|
|
1010
|
+
}
|
|
1011
|
+
|
|
1012
|
+
# =============================================================================
|
|
1013
|
+
# PRD Tracking
|
|
1014
|
+
# =============================================================================
|
|
1015
|
+
|
|
1016
|
+
# --- US-004: Live PRD update helpers ---
|
|
1017
|
+
compute_prd_hash() {
|
|
1018
|
+
local prd_file="${PRD_FILE:-}"
|
|
1019
|
+
if [[ -z "$prd_file" && -n "${DESK:-}" && -n "${SLUG:-}" ]]; then
|
|
1020
|
+
prd_file="$DESK/plans/prd-$SLUG.md"
|
|
1021
|
+
fi
|
|
1022
|
+
if [[ -f "$prd_file" ]]; then
|
|
1023
|
+
md5 -q "$prd_file" 2>/dev/null || md5sum "$prd_file" 2>/dev/null | cut -d' ' -f1
|
|
1024
|
+
else
|
|
1025
|
+
echo ""
|
|
1026
|
+
fi
|
|
1027
|
+
}
|
|
1028
|
+
|
|
1029
|
+
count_prd_us() {
|
|
1030
|
+
local prd_file="${PRD_FILE:-}"
|
|
1031
|
+
if [[ -z "$prd_file" && -n "${DESK:-}" && -n "${SLUG:-}" ]]; then
|
|
1032
|
+
prd_file="$DESK/plans/prd-$SLUG.md"
|
|
1033
|
+
fi
|
|
1034
|
+
if [[ -f "$prd_file" ]]; then
|
|
1035
|
+
grep -oE '^### US-[0-9]+' "$prd_file" 2>/dev/null | sed 's/^### //' | sort -u | tr '\n' ',' | sed 's/,$//'
|
|
1036
|
+
else
|
|
1037
|
+
echo ""
|
|
1038
|
+
fi
|
|
1039
|
+
}
|
|
1040
|
+
|
|
1041
|
+
split_prd_by_us() {
|
|
1042
|
+
local prd_file="$1"
|
|
1043
|
+
local slug="$2"
|
|
1044
|
+
local plans_dir
|
|
1045
|
+
plans_dir="$(dirname "$prd_file")"
|
|
1046
|
+
|
|
1047
|
+
[[ -f "$prd_file" ]] || return 0
|
|
1048
|
+
|
|
1049
|
+
local us_count
|
|
1050
|
+
us_count=$(grep -oE '^### US-' "$prd_file" 2>/dev/null | wc -l | tr -d ' ') || us_count=0
|
|
1051
|
+
if [[ "$us_count" -eq 0 ]]; then
|
|
1052
|
+
return 0
|
|
1053
|
+
fi
|
|
1054
|
+
|
|
1055
|
+
awk -v dir="$plans_dir" -v slug="$slug" '
|
|
1056
|
+
/^### US-[0-9]+:/ {
|
|
1057
|
+
if (out != "") close(out)
|
|
1058
|
+
match($0, /US-[0-9]+/)
|
|
1059
|
+
us_id = substr($0, RSTART, RLENGTH)
|
|
1060
|
+
out = dir "/prd-" slug "-" us_id ".md"
|
|
1061
|
+
}
|
|
1062
|
+
out != "" { print > out }
|
|
1063
|
+
' "$prd_file"
|
|
1064
|
+
}
|
|
1065
|
+
|
|
1066
|
+
split_test_spec_by_us() {
|
|
1067
|
+
local ts_file="$1"
|
|
1068
|
+
local slug="$2"
|
|
1069
|
+
local plans_dir
|
|
1070
|
+
plans_dir="$(dirname "$ts_file")"
|
|
1071
|
+
|
|
1072
|
+
[[ -f "$ts_file" ]] || return 0
|
|
1073
|
+
|
|
1074
|
+
local us_count
|
|
1075
|
+
us_count=$(grep -oE '^## US-' "$ts_file" 2>/dev/null | wc -l | tr -d ' ') || us_count=0
|
|
1076
|
+
if [[ "$us_count" -eq 0 ]]; then
|
|
1077
|
+
return 0
|
|
1078
|
+
fi
|
|
1079
|
+
|
|
1080
|
+
local header_tmp="${plans_dir}/test-spec-${slug}-header.tmp.$$"
|
|
1081
|
+
awk '/^## US-[0-9]+:/{exit} {print}' "$ts_file" > "$header_tmp"
|
|
1082
|
+
|
|
1083
|
+
awk -v dir="$plans_dir" -v slug="$slug" '
|
|
1084
|
+
/^## US-[0-9]+:/ {
|
|
1085
|
+
if (out != "") close(out)
|
|
1086
|
+
match($0, /US-[0-9]+/)
|
|
1087
|
+
us_id = substr($0, RSTART, RLENGTH)
|
|
1088
|
+
out = dir "/test-spec-" slug "-" us_id ".md"
|
|
1089
|
+
}
|
|
1090
|
+
out != "" { print > out }
|
|
1091
|
+
' "$ts_file"
|
|
1092
|
+
|
|
1093
|
+
for split_file in "$plans_dir"/test-spec-"$slug"-US-*.md; do
|
|
1094
|
+
[[ -f "$split_file" ]] || continue
|
|
1095
|
+
local tmp="${split_file}.tmp.$$"
|
|
1096
|
+
cat "$header_tmp" "$split_file" > "$tmp" && mv "$tmp" "$split_file"
|
|
1097
|
+
done
|
|
1098
|
+
rm -f "$header_tmp"
|
|
1099
|
+
}
|
|
1100
|
+
|
|
1101
|
+
check_prd_update() {
|
|
1102
|
+
local current_hash current_us_list us_count_prev us_count_now new_us
|
|
1103
|
+
current_hash=$(compute_prd_hash)
|
|
1104
|
+
current_us_list=$(count_prd_us)
|
|
1105
|
+
us_count_prev=$(echo "$PREV_PRD_US_LIST" | tr ',' '\n' | grep -c 'US-' 2>/dev/null || echo 0)
|
|
1106
|
+
us_count_now=$(echo "$current_us_list" | tr ',' '\n' | grep -c 'US-' 2>/dev/null || echo 0)
|
|
1107
|
+
|
|
1108
|
+
_PRD_CHANGED=0
|
|
1109
|
+
|
|
1110
|
+
if [[ "$current_hash" != "$PREV_PRD_HASH" ]]; then
|
|
1111
|
+
_PRD_CHANGED=1
|
|
1112
|
+
new_us=$(printf '%s\n' "$current_us_list" | tr ',' '\n' | awk -v prev="$PREV_PRD_US_LIST" '
|
|
1113
|
+
BEGIN {
|
|
1114
|
+
split(prev, p, ",")
|
|
1115
|
+
for (i in p) {
|
|
1116
|
+
seen[p[i]] = 1
|
|
1117
|
+
}
|
|
1118
|
+
}
|
|
1119
|
+
{
|
|
1120
|
+
if ($0 != "" && !seen[$0]) {
|
|
1121
|
+
if (out == "") out = $0
|
|
1122
|
+
else out = out "," $0
|
|
1123
|
+
}
|
|
1124
|
+
}
|
|
1125
|
+
END { print out }
|
|
1126
|
+
')
|
|
1127
|
+
log_debug "prd_changed=true prd_hash_prev=${PREV_PRD_HASH:-none} prd_hash_now=${current_hash:-none} us_count_prev=${us_count_prev} us_count_now=${us_count_now} new_us=${new_us:-none}"
|
|
1128
|
+
split_prd_by_us "$PRD_FILE" "$SLUG"
|
|
1129
|
+
split_test_spec_by_us "$TEST_SPEC_FILE" "$SLUG"
|
|
1130
|
+
US_LIST="$current_us_list"
|
|
1131
|
+
else
|
|
1132
|
+
log_debug "prd_changed=false prd_hash_prev=${PREV_PRD_HASH:-none} prd_hash_now=${current_hash:-none} us_count_prev=${us_count_prev} us_count_now=${us_count_now}"
|
|
1133
|
+
fi
|
|
1134
|
+
|
|
1135
|
+
PREV_PRD_HASH="$current_hash"
|
|
1136
|
+
PREV_PRD_US_LIST="$current_us_list"
|
|
1137
|
+
}
|
|
1138
|
+
|
|
1139
|
+
# =============================================================================
|
|
1140
|
+
# Circuit Breakers: Stale Context Detection
|
|
1141
|
+
# =============================================================================
|
|
1142
|
+
|
|
1143
|
+
# --- governance.md s7 step 8: Stale context detection ---
|
|
1144
|
+
compute_context_hash() {
|
|
1145
|
+
# Hash context-latest.md + memory.md + verified_us from status.json
|
|
1146
|
+
# This prevents false stale detection when Worker updates memory but not context,
|
|
1147
|
+
# or when verified_us changes between iterations
|
|
1148
|
+
local hash_input=""
|
|
1149
|
+
if [[ -f "$CONTEXT_FILE" ]]; then
|
|
1150
|
+
hash_input+=$(md5 -q "$CONTEXT_FILE" 2>/dev/null || md5sum "$CONTEXT_FILE" 2>/dev/null | cut -d' ' -f1)
|
|
1151
|
+
fi
|
|
1152
|
+
local memory_file="$DESK/memos/${SLUG}-memory.md"
|
|
1153
|
+
if [[ -f "$memory_file" ]]; then
|
|
1154
|
+
hash_input+=$(md5 -q "$memory_file" 2>/dev/null || md5sum "$memory_file" 2>/dev/null | cut -d' ' -f1)
|
|
1155
|
+
fi
|
|
1156
|
+
if [[ -f "$STATUS_FILE" ]]; then
|
|
1157
|
+
hash_input+=$(jq -r '.verified_us // [] | join(",")' "$STATUS_FILE" 2>/dev/null)
|
|
1158
|
+
fi
|
|
1159
|
+
echo -n "$hash_input" | md5 -q 2>/dev/null || echo -n "$hash_input" | md5sum 2>/dev/null | cut -d' ' -f1
|
|
1160
|
+
}
|
|
1161
|
+
|
|
1162
|
+
check_stale_context() {
|
|
1163
|
+
local current_hash
|
|
1164
|
+
current_hash=$(compute_context_hash)
|
|
1165
|
+
|
|
1166
|
+
if [[ "$current_hash" == "$PREV_CONTEXT_HASH" ]]; then
|
|
1167
|
+
(( STALE_CONTEXT_COUNT++ ))
|
|
1168
|
+
log " WARNING: Context unchanged ($STALE_CONTEXT_COUNT/3 stale iterations)"
|
|
1169
|
+
if (( STALE_CONTEXT_COUNT >= 3 )); then
|
|
1170
|
+
log_error "Circuit breaker: context unchanged for 3 consecutive iterations"
|
|
1171
|
+
return 1
|
|
1172
|
+
fi
|
|
1173
|
+
else
|
|
1174
|
+
STALE_CONTEXT_COUNT=0
|
|
1175
|
+
fi
|
|
1176
|
+
|
|
1177
|
+
PREV_CONTEXT_HASH="$current_hash"
|
|
1178
|
+
return 0
|
|
1179
|
+
}
|
|
1180
|
+
|
|
1181
|
+
# =============================================================================
|
|
1182
|
+
# Error Detection
|
|
1183
|
+
# =============================================================================
|
|
1184
|
+
|
|
1185
|
+
# --- US-003: API error detector using tmux pane buffer ---
|
|
1186
|
+
is_api_error() {
|
|
1187
|
+
local pane_id="$1"
|
|
1188
|
+
local pane_output
|
|
1189
|
+
pane_output=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null || true)
|
|
1190
|
+
if [[ -z "$pane_output" ]]; then
|
|
1191
|
+
return 1
|
|
1192
|
+
fi
|
|
1193
|
+
|
|
1194
|
+
if echo "$pane_output" | grep -qiE '(^|[^[:digit:]])500([^[:digit:]]|$)' \
|
|
1195
|
+
|| echo "$pane_output" | grep -qiE '(^|[^[:digit:]])529([^[:digit:]]|$)' \
|
|
1196
|
+
|| echo "$pane_output" | grep -qi 'overloaded' \
|
|
1197
|
+
|| echo "$pane_output" | grep -qi 'too many requests' \
|
|
1198
|
+
|| echo "$pane_output" | grep -qi 'service unavailable'; then
|
|
1199
|
+
return 0
|
|
1200
|
+
fi
|
|
1201
|
+
return 1
|
|
1202
|
+
}
|
|
1203
|
+
|
|
1204
|
+
# =============================================================================
|
|
1205
|
+
# Security Warning
|
|
1206
|
+
# =============================================================================
|
|
1207
|
+
|
|
1208
|
+
print_security_warning() {
|
|
1209
|
+
echo ""
|
|
1210
|
+
echo "================================================================"
|
|
1211
|
+
echo " WARNING: Running with --dangerously-skip-permissions"
|
|
1212
|
+
echo ""
|
|
1213
|
+
echo " The claude CLI will execute tools (file writes, shell commands)"
|
|
1214
|
+
echo " without asking for confirmation. Only run this on code you"
|
|
1215
|
+
echo " trust in an environment you control."
|
|
1216
|
+
echo "================================================================"
|
|
1217
|
+
echo ""
|
|
1218
|
+
}
|