prizmkit 1.1.21 → 1.1.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. package/bundled/VERSION.json +3 -3
  2. package/bundled/dev-pipeline/lib/heartbeat.sh +50 -7
  3. package/bundled/dev-pipeline/reset-bug.sh +21 -13
  4. package/bundled/dev-pipeline/reset-feature.sh +21 -13
  5. package/bundled/dev-pipeline/reset-refactor.sh +21 -13
  6. package/bundled/dev-pipeline/run-bugfix.sh +40 -2
  7. package/bundled/dev-pipeline/run-feature.sh +41 -1
  8. package/bundled/dev-pipeline/run-refactor.sh +40 -2
  9. package/bundled/dev-pipeline/scripts/detect-stuck.py +25 -14
  10. package/bundled/dev-pipeline/scripts/init-bugfix-pipeline.py +0 -5
  11. package/bundled/dev-pipeline/scripts/init-pipeline.py +0 -5
  12. package/bundled/dev-pipeline/scripts/init-refactor-pipeline.py +0 -5
  13. package/bundled/dev-pipeline/scripts/update-bug-status.py +40 -31
  14. package/bundled/dev-pipeline/scripts/update-feature-status.py +54 -60
  15. package/bundled/dev-pipeline/scripts/update-refactor-status.py +43 -34
  16. package/bundled/dev-pipeline/templates/bootstrap-tier1.md +50 -7
  17. package/bundled/dev-pipeline/templates/bootstrap-tier2.md +50 -7
  18. package/bundled/dev-pipeline/templates/bootstrap-tier3.md +50 -7
  19. package/bundled/dev-pipeline/templates/sections/context-budget-rules.md +20 -0
  20. package/bundled/dev-pipeline/templates/sections/phase-browser-verification.md +84 -5
  21. package/bundled/dev-pipeline/templates/sections/phase-implement-agent.md +7 -0
  22. package/bundled/dev-pipeline/templates/sections/phase-implement-full.md +7 -0
  23. package/bundled/dev-pipeline/templates/sections/phase-implement-lite.md +7 -0
  24. package/bundled/dev-pipeline/tests/test_auto_skip.py +10 -3
  25. package/bundled/skills/_metadata.json +1 -1
  26. package/package.json +1 -1
@@ -1,5 +1,5 @@
1
1
  {
2
- "frameworkVersion": "1.1.21",
3
- "bundledAt": "2026-04-11T11:23:13.654Z",
4
- "bundledFrom": "5137496"
2
+ "frameworkVersion": "1.1.24",
3
+ "bundledAt": "2026-04-12T01:13:39.954Z",
4
+ "bundledFrom": "626fbf5"
5
5
  }
@@ -6,9 +6,14 @@
6
6
  # structured progress from progress.json (written by
7
7
  # parse-stream-progress.py) and fall back to tail-based monitoring.
8
8
  #
9
+ # When stale_kill_threshold is set (>0), the heartbeat monitor will
10
+ # automatically kill the AI CLI process if it shows no progress for
11
+ # the specified duration. This prevents sessions from hanging forever
12
+ # when the AI CLI process doesn't exit after completing its work.
13
+ #
9
14
  # Usage:
10
15
  # source "$SCRIPT_DIR/lib/heartbeat.sh"
11
- # start_heartbeat "$cli_pid" "$session_log" "$progress_json" "$interval"
16
+ # start_heartbeat "$cli_pid" "$session_log" "$progress_json" "$interval" ["$stale_kill_threshold"]
12
17
  # # ... wait for CLI to finish ...
13
18
  # stop_heartbeat "$_HEARTBEAT_PID"
14
19
  #
@@ -20,19 +25,23 @@
20
25
  # Sets _HEARTBEAT_PID to the background process PID.
21
26
  #
22
27
  # Arguments:
23
- # $1 - cli_pid PID of the AI CLI process to monitor
24
- # $2 - session_log Path to session.log
25
- # $3 - progress_json Path to progress.json (may not exist if stream-json disabled)
26
- # $4 - interval Heartbeat interval in seconds
28
+ # $1 - cli_pid PID of the AI CLI process to monitor
29
+ # $2 - session_log Path to session.log
30
+ # $3 - progress_json Path to progress.json (may not exist if stream-json disabled)
31
+ # $4 - interval Heartbeat interval in seconds
32
+ # $5 - stale_kill_threshold (optional) Seconds of no progress before auto-killing the process.
33
+ # 0 = disabled (default). Recommended: 900 (15 minutes).
27
34
  start_heartbeat() {
28
35
  local cli_pid="$1"
29
36
  local session_log="$2"
30
37
  local progress_json="$3"
31
38
  local heartbeat_interval="$4"
39
+ local stale_kill_threshold="${5:-0}"
32
40
 
33
41
  (
34
42
  local elapsed=0
35
43
  local prev_size=0
44
+ local stale_seconds=0
36
45
  while kill -0 "$cli_pid" 2>/dev/null; do
37
46
  sleep "$heartbeat_interval"
38
47
  elapsed=$((elapsed + heartbeat_interval))
@@ -48,6 +57,13 @@ start_heartbeat() {
48
57
  local growth=$((cur_size - prev_size))
49
58
  prev_size=$cur_size
50
59
 
60
+ # Track progress staleness (no log growth = stale)
61
+ if [[ $growth -eq 0 ]]; then
62
+ stale_seconds=$((stale_seconds + heartbeat_interval))
63
+ else
64
+ stale_seconds=0
65
+ fi
66
+
51
67
  local size_display
52
68
  if [[ $cur_size -gt 1048576 ]]; then
53
69
  size_display="$((cur_size / 1048576))MB"
@@ -67,6 +83,33 @@ start_heartbeat() {
67
83
  status_icon="${YELLOW}⏸${NC}"
68
84
  fi
69
85
 
86
+ # Stale-kill: auto-terminate process if no progress for too long
87
+ if [[ $stale_kill_threshold -gt 0 && $stale_seconds -ge $stale_kill_threshold ]]; then
88
+ local stale_mins=$((stale_seconds / 60))
89
+ echo -e " ${RED}[HEARTBEAT]${NC} ${mins}m${secs}s | log: ${size_display} | ${RED}STALE-KILL: no progress for ${stale_mins}m (threshold: ${stale_kill_threshold}s)${NC}"
90
+ echo -e " ${RED}[HEARTBEAT]${NC} Killing AI CLI process $cli_pid (stale session)..."
91
+ kill -TERM "$cli_pid" 2>/dev/null || true
92
+ # Give process 10s to exit gracefully, then force kill
93
+ sleep 10
94
+ if kill -0 "$cli_pid" 2>/dev/null; then
95
+ echo -e " ${RED}[HEARTBEAT]${NC} Process still alive after SIGTERM, sending SIGKILL..."
96
+ kill -9 "$cli_pid" 2>/dev/null || true
97
+ fi
98
+ # Write stale-kill marker so spawn_and_wait_session knows this wasn't a crash
99
+ local _marker_dir
100
+ _marker_dir="$(dirname "$session_log")"
101
+ echo "{\"killed_at\": \"$(date -u +%Y-%m-%dT%H:%M:%SZ)\", \"reason\": \"stale_session\", \"stale_seconds\": $stale_seconds, \"threshold\": $stale_kill_threshold}" > "$_marker_dir/stale-kill.json" 2>/dev/null || true
102
+ break
103
+ fi
104
+
105
+ # Build staleness hint for display
106
+ local stale_hint=""
107
+ if [[ $stale_kill_threshold -gt 0 && $stale_seconds -gt 0 ]]; then
108
+ local stale_mins=$((stale_seconds / 60))
109
+ local threshold_mins=$((stale_kill_threshold / 60))
110
+ stale_hint=" | stale: ${stale_mins}m/${threshold_mins}m"
111
+ fi
112
+
70
113
  # Try structured progress from progress.json
71
114
  if [[ -f "$progress_json" ]]; then
72
115
  local phase tool msgs tools_total
@@ -86,7 +129,7 @@ try:
86
129
  except Exception:
87
130
  sys.exit(1)
88
131
  " "$progress_json" 2>/dev/null) && {
89
- echo -e " ${status_icon} ${BLUE}[HEARTBEAT]${NC} ${mins}m${secs}s | log: ${size_display} | ${phase}"
132
+ echo -e " ${status_icon} ${BLUE}[HEARTBEAT]${NC} ${mins}m${secs}s | log: ${size_display} | ${phase}${stale_hint}"
90
133
  continue
91
134
  }
92
135
  fi
@@ -97,7 +140,7 @@ except Exception:
97
140
  last_activity=$(tail -20 "$session_log" 2>/dev/null | grep -v '^$' | tail -1 | cut -c1-80 || echo "")
98
141
  fi
99
142
 
100
- echo -e " ${status_icon} ${BLUE}[HEARTBEAT]${NC} ${mins}m${secs}s elapsed | log: ${size_display} (+${growth}B) | ${last_activity}"
143
+ echo -e " ${status_icon} ${BLUE}[HEARTBEAT]${NC} ${mins}m${secs}s elapsed | log: ${size_display} (+${growth}B) | ${last_activity}${stale_hint}"
101
144
  done
102
145
  ) &
103
146
  _HEARTBEAT_PID=$!
@@ -139,22 +139,20 @@ fi
139
139
  BUG_IDS=()
140
140
 
141
141
  if [[ -n "$FILTER_MODE" ]]; then
142
- # Filter by status from .prizmkit/state/bugfix/bugs/*/status.json
142
+ # Filter by status from bug-fix-list.json (single source of truth)
143
143
  while IFS= read -r bid; do
144
144
  [[ -n "$bid" ]] && BUG_IDS+=("$bid")
145
145
  done < <(python3 -c "
146
- import json, os, sys
147
- state_dir = '$STATE_DIR'
146
+ import json, sys
148
147
  filter_mode = '$FILTER_MODE'
149
- bugs_dir = os.path.join(state_dir, 'bugs')
150
- if not os.path.isdir(bugs_dir):
151
- sys.exit(0)
152
- for bid in sorted(os.listdir(bugs_dir)):
153
- status_file = os.path.join(bugs_dir, bid, 'status.json')
154
- if not os.path.isfile(status_file):
148
+ bug_list = '$BUG_LIST'
149
+ with open(bug_list) as f:
150
+ data = json.load(f)
151
+ for bug in data.get('bugs', []):
152
+ if not isinstance(bug, dict):
155
153
  continue
156
- with open(status_file) as f:
157
- status = json.load(f).get('status', '')
154
+ bid = bug.get('id', '')
155
+ status = bug.get('status', '')
158
156
  if filter_mode == 'auto_skipped' and status == 'auto_skipped':
159
157
  print(bid)
160
158
  elif filter_mode == 'failed' and status == 'failed':
@@ -244,13 +242,23 @@ sys.exit(1)
244
242
  echo -e "${BOLD}════════════════════════════════════════════════════${NC}"
245
243
 
246
244
  STATUS_FILE="$STATE_DIR/bugs/$CUR_BUG_ID/status.json"
245
+ # Read status from bug-fix-list.json (single source of truth)
246
+ CURRENT_STATUS=$(python3 -c "
247
+ import json, sys
248
+ with open('$BUG_LIST') as f:
249
+ data = json.load(f)
250
+ for bug in data.get('bugs', []):
251
+ if isinstance(bug, dict) and bug.get('id') == '$CUR_BUG_ID':
252
+ print(bug.get('status', '?'))
253
+ sys.exit(0)
254
+ print('?')
255
+ " 2>/dev/null || echo "?")
247
256
  if [[ -f "$STATUS_FILE" ]]; then
248
- CURRENT_STATUS=$(python3 -c "import json; d=json.load(open('$STATUS_FILE')); print(d.get('status','?'))")
249
257
  CURRENT_RETRY=$(python3 -c "import json; d=json.load(open('$STATUS_FILE')); print(d.get('retry_count',0))")
250
258
  SESSION_COUNT=$(python3 -c "import json; d=json.load(open('$STATUS_FILE')); print(len(d.get('sessions',[])))")
251
259
  log_info "Current status: $CURRENT_STATUS (retry $CURRENT_RETRY, $SESSION_COUNT sessions)"
252
260
  else
253
- log_info "No status file found (never executed)"
261
+ log_info "Current status: $CURRENT_STATUS (no runtime state file)"
254
262
  fi
255
263
 
256
264
  BUGFIX_DIR="$PROJECT_ROOT/.prizmkit/bugfix/$CUR_BUG_ID"
@@ -139,22 +139,20 @@ fi
139
139
  FEATURE_IDS=()
140
140
 
141
141
  if [[ -n "$FILTER_MODE" ]]; then
142
- # Filter by status from state/features/*/status.json
142
+ # Filter by status from feature-list.json (single source of truth)
143
143
  while IFS= read -r fid; do
144
144
  [[ -n "$fid" ]] && FEATURE_IDS+=("$fid")
145
145
  done < <(python3 -c "
146
- import json, os, sys
147
- state_dir = '$STATE_DIR'
146
+ import json, sys
148
147
  filter_mode = '$FILTER_MODE'
149
- features_dir = os.path.join(state_dir, 'features')
150
- if not os.path.isdir(features_dir):
151
- sys.exit(0)
152
- for fid in sorted(os.listdir(features_dir)):
153
- status_file = os.path.join(features_dir, fid, 'status.json')
154
- if not os.path.isfile(status_file):
148
+ feature_list = '$FEATURE_LIST'
149
+ with open(feature_list) as f:
150
+ data = json.load(f)
151
+ for feat in data.get('features', []):
152
+ if not isinstance(feat, dict):
155
153
  continue
156
- with open(status_file) as f:
157
- status = json.load(f).get('status', '')
154
+ fid = feat.get('id', '')
155
+ status = feat.get('status', '')
158
156
  if filter_mode == 'auto_skipped' and status == 'auto_skipped':
159
157
  print(fid)
160
158
  elif filter_mode == 'failed' and status == 'failed':
@@ -253,13 +251,23 @@ sys.exit(1)
253
251
  echo -e "${BOLD}════════════════════════════════════════════════════${NC}"
254
252
 
255
253
  STATUS_FILE="$STATE_DIR/features/$CUR_FEATURE_ID/status.json"
254
+ # Read status from feature-list.json (single source of truth)
255
+ CURRENT_STATUS=$(python3 -c "
256
+ import json, sys
257
+ with open('$FEATURE_LIST') as f:
258
+ data = json.load(f)
259
+ for feat in data.get('features', []):
260
+ if isinstance(feat, dict) and feat.get('id') == '$CUR_FEATURE_ID':
261
+ print(feat.get('status', '?'))
262
+ sys.exit(0)
263
+ print('?')
264
+ " 2>/dev/null || echo "?")
256
265
  if [[ -f "$STATUS_FILE" ]]; then
257
- CURRENT_STATUS=$(python3 -c "import json; d=json.load(open('$STATUS_FILE')); print(d.get('status','?'))")
258
266
  CURRENT_RETRY=$(python3 -c "import json; d=json.load(open('$STATUS_FILE')); print(d.get('retry_count',0))")
259
267
  SESSION_COUNT=$(python3 -c "import json; d=json.load(open('$STATUS_FILE')); print(len(d.get('sessions',[])))")
260
268
  log_info "Current status: $CURRENT_STATUS (retry $CURRENT_RETRY, $SESSION_COUNT sessions)"
261
269
  else
262
- log_info "No status file found (never executed)"
270
+ log_info "Current status: $CURRENT_STATUS (no runtime state file)"
263
271
  fi
264
272
 
265
273
  SPECS_DIR="$PROJECT_ROOT/.prizmkit/specs/$FEATURE_SLUG"
@@ -129,22 +129,20 @@ fi
129
129
  REFACTOR_IDS=()
130
130
 
131
131
  if [[ -n "$FILTER_MODE" ]]; then
132
- # Filter by status from .prizmkit/state/refactor/refactors/*/status.json
132
+ # Filter by status from refactor-list.json (single source of truth)
133
133
  while IFS= read -r rid; do
134
134
  [[ -n "$rid" ]] && REFACTOR_IDS+=("$rid")
135
135
  done < <(python3 -c "
136
- import json, os, sys
137
- state_dir = '$STATE_DIR'
136
+ import json, sys
138
137
  filter_mode = '$FILTER_MODE'
139
- refactors_dir = os.path.join(state_dir, 'refactors')
140
- if not os.path.isdir(refactors_dir):
141
- sys.exit(0)
142
- for rid in sorted(os.listdir(refactors_dir)):
143
- status_file = os.path.join(refactors_dir, rid, 'status.json')
144
- if not os.path.isfile(status_file):
138
+ refactor_list = '$REFACTOR_LIST'
139
+ with open(refactor_list) as f:
140
+ data = json.load(f)
141
+ for r in data.get('refactors', []):
142
+ if not isinstance(r, dict):
145
143
  continue
146
- with open(status_file) as f:
147
- status = json.load(f).get('status', '')
144
+ rid = r.get('id', '')
145
+ status = r.get('status', '')
148
146
  if filter_mode == 'auto_skipped' and status == 'auto_skipped':
149
147
  print(rid)
150
148
  elif filter_mode == 'failed' and status == 'failed':
@@ -242,13 +240,23 @@ sys.exit(1)
242
240
  echo -e "${BOLD}════════════════════════════════════════════════════${NC}"
243
241
 
244
242
  STATUS_FILE="$STATE_DIR/refactors/$CUR_REFACTOR_ID/status.json"
243
+ # Read status from refactor-list.json (single source of truth)
244
+ CURRENT_STATUS=$(python3 -c "
245
+ import json, sys
246
+ with open('$REFACTOR_LIST') as f:
247
+ data = json.load(f)
248
+ for r in data.get('refactors', []):
249
+ if isinstance(r, dict) and r.get('id') == '$CUR_REFACTOR_ID':
250
+ print(r.get('status', '?'))
251
+ sys.exit(0)
252
+ print('?')
253
+ " 2>/dev/null || echo "?")
245
254
  if [[ -f "$STATUS_FILE" ]]; then
246
- CURRENT_STATUS=$(python3 -c "import json; d=json.load(open('$STATUS_FILE')); print(d.get('status','?'))")
247
255
  CURRENT_RETRY=$(python3 -c "import json; d=json.load(open('$STATUS_FILE')); print(d.get('retry_count',0))")
248
256
  SESSION_COUNT=$(python3 -c "import json; d=json.load(open('$STATUS_FILE')); print(len(d.get('sessions',[])))")
249
257
  log_info "Current status: $CURRENT_STATUS (retry $CURRENT_RETRY, $SESSION_COUNT sessions)"
250
258
  else
251
- log_info "No status file found (never executed)"
259
+ log_info "Current status: $CURRENT_STATUS (no runtime state file)"
252
260
  fi
253
261
 
254
262
  SPECS_DIR="$PROJECT_ROOT/.prizmkit/specs/$REFACTOR_SLUG"
@@ -21,6 +21,7 @@ set -euo pipefail
21
21
  # PRIZMKIT_PLATFORM Force platform: 'codebuddy' or 'claude' (auto-detected)
22
22
  # VERBOSE Set to 1 to enable --verbose on AI CLI
23
23
  # HEARTBEAT_INTERVAL Heartbeat log interval in seconds (default: 30)
24
+ # STALE_KILL_THRESHOLD Auto-kill session after N seconds of no progress (default: 900)
24
25
  # HEARTBEAT_STALE_THRESHOLD Heartbeat stale threshold in seconds (default: 600)
25
26
  # LOG_CLEANUP_ENABLED Run periodic log cleanup (default: 1)
26
27
  # LOG_RETENTION_DAYS Delete logs older than N days (default: 14)
@@ -39,6 +40,7 @@ MAX_RETRIES=${MAX_RETRIES:-3}
39
40
  SESSION_TIMEOUT=${SESSION_TIMEOUT:-0}
40
41
  HEARTBEAT_STALE_THRESHOLD=${HEARTBEAT_STALE_THRESHOLD:-600}
41
42
  HEARTBEAT_INTERVAL=${HEARTBEAT_INTERVAL:-30}
43
+ STALE_KILL_THRESHOLD=${STALE_KILL_THRESHOLD:-900}
42
44
  LOG_CLEANUP_ENABLED=${LOG_CLEANUP_ENABLED:-1}
43
45
  LOG_RETENTION_DAYS=${LOG_RETENTION_DAYS:-14}
44
46
  LOG_MAX_TOTAL_MB=${LOG_MAX_TOTAL_MB:-1024}
@@ -146,8 +148,8 @@ spawn_and_wait_session() {
146
148
  watcher_pid=$!
147
149
  fi
148
150
 
149
- # Heartbeat monitor
150
- start_heartbeat "$cli_pid" "$session_log" "$progress_json" "$HEARTBEAT_INTERVAL"
151
+ # Heartbeat monitor (with stale-kill protection)
152
+ start_heartbeat "$cli_pid" "$session_log" "$progress_json" "$HEARTBEAT_INTERVAL" "$STALE_KILL_THRESHOLD"
151
153
  local heartbeat_pid="${_HEARTBEAT_PID:-}"
152
154
 
153
155
  # Wait for AI CLI to finish
@@ -166,6 +168,14 @@ spawn_and_wait_session() {
166
168
 
167
169
  [[ $exit_code -eq 143 ]] && exit_code=124
168
170
 
171
+ # Check for stale-kill marker (heartbeat killed the process due to no progress)
172
+ local stale_kill_marker="$session_dir/logs/stale-kill.json"
173
+ local was_stale_killed=false
174
+ if [[ -f "$stale_kill_marker" ]]; then
175
+ was_stale_killed=true
176
+ log_warn "Session was stale-killed by heartbeat monitor (no progress for too long)"
177
+ fi
178
+
169
179
  # Session summary
170
180
  if [[ -f "$session_log" ]]; then
171
181
  local final_size=$(wc -c < "$session_log" 2>/dev/null | tr -d ' ')
@@ -183,6 +193,33 @@ spawn_and_wait_session() {
183
193
  if [[ $exit_code -eq 124 ]]; then
184
194
  log_warn "Session timed out after ${SESSION_TIMEOUT}s"
185
195
  session_status="timed_out"
196
+ elif [[ "$was_stale_killed" == true ]]; then
197
+ log_warn "Session stale-killed (no progress for ${STALE_KILL_THRESHOLD}s)"
198
+ local has_commits=""
199
+ if git -C "$project_root" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
200
+ has_commits=$(git -C "$project_root" log "${default_branch}..HEAD" --oneline 2>/dev/null | head -1)
201
+ fi
202
+ if [[ -n "$has_commits" ]]; then
203
+ log_info "Stale-killed session has commits — treating as success"
204
+ session_status="success"
205
+ else
206
+ local uncommitted=""
207
+ uncommitted=$(git -C "$project_root" status --porcelain 2>/dev/null | head -1 || true)
208
+ if [[ -n "$uncommitted" ]]; then
209
+ log_warn "Stale-killed session has uncommitted changes — auto-committing..."
210
+ git -C "$project_root" add -A 2>/dev/null || true
211
+ if git -C "$project_root" commit --no-verify -m "chore($bug_id): auto-commit session work (stale-killed)" 2>/dev/null; then
212
+ log_info "Auto-commit succeeded"
213
+ session_status="success"
214
+ else
215
+ log_warn "Auto-commit failed — no changes to commit"
216
+ session_status="crashed"
217
+ fi
218
+ else
219
+ log_warn "Stale-killed session produced no commits and no changes"
220
+ session_status="crashed"
221
+ fi
222
+ fi
186
223
  elif [[ $exit_code -ne 0 ]]; then
187
224
  log_warn "Session exited with code $exit_code"
188
225
  session_status="crashed"
@@ -1091,6 +1128,7 @@ show_help() {
1091
1128
  echo " AI_CLI AI CLI command name (auto-detected: cbc or claude)"
1092
1129
  echo " VERBOSE Set to 1 for verbose AI CLI output"
1093
1130
  echo " HEARTBEAT_INTERVAL Heartbeat log interval in seconds (default: 30)"
1131
+ echo " STALE_KILL_THRESHOLD Auto-kill session after N seconds of no progress (default: 900)"
1094
1132
  echo " LOG_CLEANUP_ENABLED Run log cleanup before execution (default: 1)"
1095
1133
  echo " LOG_RETENTION_DAYS Delete logs older than N days (default: 14)"
1096
1134
  echo " LOG_MAX_TOTAL_MB Keep total logs under N MB (default: 1024)"
@@ -23,6 +23,7 @@ set -euo pipefail
23
23
  # MODEL AI model to use (e.g. claude-opus-4.6, claude-sonnet-4.6, claude-haiku-4.5)
24
24
  # VERBOSE Set to 1 to enable --verbose on AI CLI (shows subagent output)
25
25
  # HEARTBEAT_INTERVAL Heartbeat log interval in seconds (default: 30)
26
+ # STALE_KILL_THRESHOLD Auto-kill session after N seconds of no progress (default: 900)
26
27
  # HEARTBEAT_STALE_THRESHOLD Heartbeat stale threshold in seconds (default: 600)
27
28
  # LOG_CLEANUP_ENABLED Run periodic log cleanup (default: 1)
28
29
  # LOG_RETENTION_DAYS Delete logs older than N days (default: 14)
@@ -42,6 +43,7 @@ MAX_RETRIES=${MAX_RETRIES:-3}
42
43
  SESSION_TIMEOUT=${SESSION_TIMEOUT:-0}
43
44
  HEARTBEAT_STALE_THRESHOLD=${HEARTBEAT_STALE_THRESHOLD:-600}
44
45
  HEARTBEAT_INTERVAL=${HEARTBEAT_INTERVAL:-30}
46
+ STALE_KILL_THRESHOLD=${STALE_KILL_THRESHOLD:-900}
45
47
  LOG_CLEANUP_ENABLED=${LOG_CLEANUP_ENABLED:-1}
46
48
  LOG_RETENTION_DAYS=${LOG_RETENTION_DAYS:-14}
47
49
  LOG_MAX_TOTAL_MB=${LOG_MAX_TOTAL_MB:-1024}
@@ -157,7 +159,8 @@ spawn_and_wait_session() {
157
159
  fi
158
160
 
159
161
  # Heartbeat monitor (reads progress.json when available, falls back to tail)
160
- start_heartbeat "$cbc_pid" "$session_log" "$progress_json" "$HEARTBEAT_INTERVAL"
162
+ # Also monitors for stale sessions and auto-kills if no progress for STALE_KILL_THRESHOLD seconds
163
+ start_heartbeat "$cbc_pid" "$session_log" "$progress_json" "$HEARTBEAT_INTERVAL" "$STALE_KILL_THRESHOLD"
161
164
  local heartbeat_pid="${_HEARTBEAT_PID:-}"
162
165
 
163
166
  # Wait for AI CLI to finish
@@ -179,6 +182,14 @@ spawn_and_wait_session() {
179
182
  exit_code=124
180
183
  fi
181
184
 
185
+ # Check for stale-kill marker (heartbeat killed the process due to no progress)
186
+ local stale_kill_marker="$session_dir/logs/stale-kill.json"
187
+ local was_stale_killed=false
188
+ if [[ -f "$stale_kill_marker" ]]; then
189
+ was_stale_killed=true
190
+ log_warn "Session was stale-killed by heartbeat monitor (no progress for too long)"
191
+ fi
192
+
182
193
  # Show final session summary
183
194
  if [[ -f "$session_log" ]]; then
184
195
  local final_size=$(wc -c < "$session_log" 2>/dev/null | tr -d ' ')
@@ -198,6 +209,34 @@ spawn_and_wait_session() {
198
209
  if [[ $exit_code -eq 124 ]]; then
199
210
  log_warn "Session timed out after ${SESSION_TIMEOUT}s"
200
211
  session_status="timed_out"
212
+ elif [[ "$was_stale_killed" == true ]]; then
213
+ log_warn "Session stale-killed (no progress for ${STALE_KILL_THRESHOLD}s)"
214
+ # Treat stale-killed as potentially successful — check for commits
215
+ local has_commits=""
216
+ if git -C "$project_root" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
217
+ has_commits=$(git -C "$project_root" log "${default_branch}..HEAD" --oneline 2>/dev/null | head -1)
218
+ fi
219
+ if [[ -n "$has_commits" ]]; then
220
+ log_info "Stale-killed session has commits — treating as success"
221
+ session_status="success"
222
+ else
223
+ local uncommitted=""
224
+ uncommitted=$(git -C "$project_root" status --porcelain 2>/dev/null | head -1 || true)
225
+ if [[ -n "$uncommitted" ]]; then
226
+ log_warn "Stale-killed session has uncommitted changes — auto-committing..."
227
+ git -C "$project_root" add -A 2>/dev/null || true
228
+ if git -C "$project_root" commit --no-verify -m "chore($feature_id): auto-commit session work (stale-killed)" 2>/dev/null; then
229
+ log_info "Auto-commit succeeded"
230
+ session_status="success"
231
+ else
232
+ log_warn "Auto-commit failed — no changes to commit"
233
+ session_status="crashed"
234
+ fi
235
+ else
236
+ log_warn "Stale-killed session produced no commits and no changes"
237
+ session_status="crashed"
238
+ fi
239
+ fi
201
240
  elif [[ $exit_code -ne 0 ]]; then
202
241
  log_warn "Session exited with code $exit_code"
203
242
  session_status="crashed"
@@ -1318,6 +1357,7 @@ show_help() {
1318
1357
  echo " AI_CLI AI CLI command name (auto-detected: cbc or claude)"
1319
1358
  echo " MODEL AI model ID (e.g. claude-opus-4.6, claude-sonnet-4.6, claude-haiku-4.5)"
1320
1359
  echo " HEARTBEAT_INTERVAL Heartbeat log interval in seconds (default: 30)"
1360
+ echo " STALE_KILL_THRESHOLD Auto-kill session after N seconds of no progress (default: 900)"
1321
1361
  echo " HEARTBEAT_STALE_THRESHOLD Heartbeat stale threshold in seconds (default: 600)"
1322
1362
  echo " LOG_CLEANUP_ENABLED Run log cleanup before execution (default: 1)"
1323
1363
  echo " LOG_RETENTION_DAYS Delete logs older than N days (default: 14)"
@@ -21,6 +21,7 @@ set -euo pipefail
21
21
  # PRIZMKIT_PLATFORM Force platform: 'codebuddy' or 'claude' (auto-detected)
22
22
  # VERBOSE Set to 1 to enable --verbose on AI CLI
23
23
  # HEARTBEAT_INTERVAL Heartbeat log interval in seconds (default: 30)
24
+ # STALE_KILL_THRESHOLD Auto-kill session after N seconds of no progress (default: 900)
24
25
  # HEARTBEAT_STALE_THRESHOLD Heartbeat stale threshold in seconds (default: 600)
25
26
  # LOG_CLEANUP_ENABLED Run periodic log cleanup (default: 1)
26
27
  # LOG_RETENTION_DAYS Delete logs older than N days (default: 14)
@@ -40,6 +41,7 @@ MAX_RETRIES=${MAX_RETRIES:-3}
40
41
  SESSION_TIMEOUT=${SESSION_TIMEOUT:-0}
41
42
  HEARTBEAT_STALE_THRESHOLD=${HEARTBEAT_STALE_THRESHOLD:-600}
42
43
  HEARTBEAT_INTERVAL=${HEARTBEAT_INTERVAL:-30}
44
+ STALE_KILL_THRESHOLD=${STALE_KILL_THRESHOLD:-900}
43
45
  LOG_CLEANUP_ENABLED=${LOG_CLEANUP_ENABLED:-1}
44
46
  LOG_RETENTION_DAYS=${LOG_RETENTION_DAYS:-14}
45
47
  LOG_MAX_TOTAL_MB=${LOG_MAX_TOTAL_MB:-1024}
@@ -148,8 +150,8 @@ spawn_and_wait_session() {
148
150
  watcher_pid=$!
149
151
  fi
150
152
 
151
- # Heartbeat monitor
152
- start_heartbeat "$cli_pid" "$session_log" "$progress_json" "$HEARTBEAT_INTERVAL"
153
+ # Heartbeat monitor (with stale-kill protection)
154
+ start_heartbeat "$cli_pid" "$session_log" "$progress_json" "$HEARTBEAT_INTERVAL" "$STALE_KILL_THRESHOLD"
153
155
  local heartbeat_pid="${_HEARTBEAT_PID:-}"
154
156
 
155
157
  # Wait for AI CLI to finish
@@ -168,6 +170,14 @@ spawn_and_wait_session() {
168
170
 
169
171
  [[ $exit_code -eq 143 ]] && exit_code=124
170
172
 
173
+ # Check for stale-kill marker (heartbeat killed the process due to no progress)
174
+ local stale_kill_marker="$session_dir/logs/stale-kill.json"
175
+ local was_stale_killed=false
176
+ if [[ -f "$stale_kill_marker" ]]; then
177
+ was_stale_killed=true
178
+ log_warn "Session was stale-killed by heartbeat monitor (no progress for too long)"
179
+ fi
180
+
171
181
  # Session summary
172
182
  if [[ -f "$session_log" ]]; then
173
183
  local final_size=$(wc -c < "$session_log" 2>/dev/null | tr -d ' ')
@@ -185,6 +195,33 @@ spawn_and_wait_session() {
185
195
  if [[ $exit_code -eq 124 ]]; then
186
196
  log_warn "Session timed out after ${SESSION_TIMEOUT}s"
187
197
  session_status="timed_out"
198
+ elif [[ "$was_stale_killed" == true ]]; then
199
+ log_warn "Session stale-killed (no progress for ${STALE_KILL_THRESHOLD}s)"
200
+ local has_commits=""
201
+ if git -C "$project_root" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
202
+ has_commits=$(git -C "$project_root" log "${default_branch}..HEAD" --oneline 2>/dev/null | head -1)
203
+ fi
204
+ if [[ -n "$has_commits" ]]; then
205
+ log_info "Stale-killed session has commits — treating as success"
206
+ session_status="success"
207
+ else
208
+ local uncommitted=""
209
+ uncommitted=$(git -C "$project_root" status --porcelain 2>/dev/null | head -1 || true)
210
+ if [[ -n "$uncommitted" ]]; then
211
+ log_warn "Stale-killed session has uncommitted changes — auto-committing..."
212
+ git -C "$project_root" add -A 2>/dev/null || true
213
+ if git -C "$project_root" commit --no-verify -m "chore($refactor_id): auto-commit session work (stale-killed)" 2>/dev/null; then
214
+ log_info "Auto-commit succeeded"
215
+ session_status="success"
216
+ else
217
+ log_warn "Auto-commit failed — no changes to commit"
218
+ session_status="crashed"
219
+ fi
220
+ else
221
+ log_warn "Stale-killed session produced no commits and no changes"
222
+ session_status="crashed"
223
+ fi
224
+ fi
188
225
  elif [[ $exit_code -ne 0 ]]; then
189
226
  log_warn "Session exited with code $exit_code"
190
227
  session_status="crashed"
@@ -1129,6 +1166,7 @@ show_help() {
1129
1166
  echo " VERBOSE Set to 1 for verbose AI CLI output"
1130
1167
  echo " STRICT_BEHAVIOR_CHECK Force full test suite after each refactor (default: 1)"
1131
1168
  echo " HEARTBEAT_INTERVAL Heartbeat log interval in seconds (default: 30)"
1169
+ echo " STALE_KILL_THRESHOLD Auto-kill session after N seconds of no progress (default: 900)"
1132
1170
  echo " LOG_CLEANUP_ENABLED Run log cleanup before execution (default: 1)"
1133
1171
  echo " LOG_RETENTION_DAYS Delete logs older than N days (default: 14)"
1134
1172
  echo " LOG_MAX_TOTAL_MB Keep total logs under N MB (default: 1024)"
@@ -226,15 +226,16 @@ def check_stuck_checkpoint(item_dir):
226
226
  return None
227
227
 
228
228
 
229
- def check_stale_heartbeat(item_id, item_status, state_dir, items_subdir, stale_threshold):
229
+ def check_stale_heartbeat(item_id, item_status, state_dir, items_subdir, stale_threshold, task_list_status=None):
230
230
  """Check 3: Is the heartbeat stale or missing for an in_progress item?
231
231
 
232
232
  Only applies to items whose status indicates active work.
233
- Uses last_session_id from the item's own status to find the active session.
233
+ Status is read from task_list_status (task list JSON, single source of truth).
234
+ Uses last_session_id from the item's own status.json to find the active session.
234
235
 
235
236
  Returns a stuck-report dict or None.
236
237
  """
237
- status = item_status.get("status")
238
+ status = task_list_status
238
239
  # All pipelines now use "in_progress" as the active status
239
240
  in_progress_statuses = {"in_progress"}
240
241
  if status not in in_progress_statuses:
@@ -287,6 +288,8 @@ def check_stale_heartbeat(item_id, item_status, state_dir, items_subdir, stale_t
287
288
  def check_dependency_deadlock(item_id, task_list_data, state_dir, items_subdir, items_key):
288
289
  """Check 4: Does this item depend on a failed item?
289
290
 
291
+ Reads dependency status from task list JSON (single source of truth).
292
+
290
293
  Returns a stuck-report dict or None.
291
294
  """
292
295
  if task_list_data is None:
@@ -296,6 +299,12 @@ def check_dependency_deadlock(item_id, task_list_data, state_dir, items_subdir,
296
299
  if not isinstance(items, list):
297
300
  return None
298
301
 
302
+ # Build status map from task list
303
+ status_map = {}
304
+ for item in items:
305
+ if isinstance(item, dict) and item.get("id"):
306
+ status_map[item["id"]] = item.get("status", "pending")
307
+
299
308
  # Find this item in the list to get its dependencies
300
309
  deps = None
301
310
  for item in items:
@@ -308,15 +317,9 @@ def check_dependency_deadlock(item_id, task_list_data, state_dir, items_subdir,
308
317
  if not deps or not isinstance(deps, list):
309
318
  return None
310
319
 
311
- # Check each dependency's status in state
320
+ # Check each dependency's status from the task list
312
321
  for dep_id in deps:
313
- dep_status_path = os.path.join(
314
- state_dir, items_subdir, dep_id, "status.json"
315
- )
316
- dep_status = load_json(dep_status_path)
317
- if dep_status is None:
318
- continue
319
- dep_state = dep_status.get("status")
322
+ dep_state = status_map.get(dep_id)
320
323
  if dep_state == "failed":
321
324
  return {
322
325
  "reason": "dependency_failed",
@@ -376,8 +379,16 @@ def check_item(item_id, state_dir, items_subdir, items_key, task_list_data, max_
376
379
  item_status = load_json(status_path)
377
380
 
378
381
  if item_status is None:
379
- # Cannot read status skip silently
380
- return []
382
+ # Create a minimal runtime dict so checks can proceed
383
+ item_status = {}
384
+
385
+ # Look up item status from task list (single source of truth)
386
+ task_list_status = None
387
+ if task_list_data:
388
+ for item in task_list_data.get(items_key, []):
389
+ if isinstance(item, dict) and item.get("id") == item_id:
390
+ task_list_status = item.get("status", "pending")
391
+ break
381
392
 
382
393
  reports = []
383
394
 
@@ -392,7 +403,7 @@ def check_item(item_id, state_dir, items_subdir, items_key, task_list_data, max_
392
403
  reports.append(result)
393
404
 
394
405
  # Check 3: Stale heartbeat
395
- result = check_stale_heartbeat(item_id, item_status, state_dir, items_subdir, stale_threshold)
406
+ result = check_stale_heartbeat(item_id, item_status, state_dir, items_subdir, stale_threshold, task_list_status)
396
407
  if result is not None:
397
408
  reports.append(result)
398
409
 
@@ -249,13 +249,8 @@ def create_state_directory(state_dir, bug_list_path, bugs):
249
249
  sessions_dir = os.path.join(bug_dir, "sessions")
250
250
  os.makedirs(sessions_dir, exist_ok=True)
251
251
 
252
- # Respect existing terminal status from bug-fix-list.json
253
- bl_status = bug.get("status", "pending")
254
- init_status = bl_status if bl_status in TERMINAL_STATUSES else "pending"
255
-
256
252
  bug_status = {
257
253
  "bug_id": bid,
258
- "status": init_status,
259
254
  "retry_count": 0,
260
255
  "max_retries": 3,
261
256
  "sessions": [],