prizmkit 1.1.66 → 1.1.67
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bundled/VERSION.json +3 -3
- package/bundled/adapters/codex/settings-adapter.js +1 -1
- package/bundled/dev-pipeline/.env.example +3 -0
- package/bundled/dev-pipeline/SCHEMA_ANALYSIS.md +3 -1
- package/bundled/dev-pipeline/lib/common.sh +21 -18
- package/bundled/dev-pipeline/lib/heartbeat.sh +104 -11
- package/bundled/dev-pipeline/scripts/parse-stream-progress.py +144 -12
- package/bundled/dev-pipeline-windows/.env.example +3 -2
- package/bundled/dev-pipeline-windows/SCHEMA_ANALYSIS.md +3 -2
- package/bundled/dev-pipeline-windows/lib/common.ps1 +78 -5
- package/bundled/dev-pipeline-windows/lib/pipeline.ps1 +12 -4
- package/bundled/dev-pipeline-windows/run-recovery.ps1 +8 -1
- package/bundled/dev-pipeline-windows/scripts/parse-stream-progress.py +144 -12
- package/bundled/skills/_metadata.json +1 -1
- package/package.json +1 -1
- package/src/scaffold.js +1 -1
package/bundled/VERSION.json
CHANGED
|
@@ -41,6 +41,9 @@
|
|
|
41
41
|
# ─── Logging & Heartbeat ─────────────────────────────────────────────
|
|
42
42
|
# HEARTBEAT_INTERVAL=30 # Heartbeat log interval in seconds
|
|
43
43
|
# HEARTBEAT_STALE_THRESHOLD=600 # Max seconds without heartbeat before marking stale
|
|
44
|
+
# STALE_KILL_THRESHOLD=900 # Auto-kill after N seconds without parent log progress (0 = disabled)
|
|
45
|
+
# CODEX_WAIT_STALE_KILL_THRESHOLD=3600 # Longer no-log window while Codex waits on subagents
|
|
46
|
+
# CODEX_SUBAGENT_TIMEOUT_SECONDS=3300 # Codex subagent max runtime; defaults to wait threshold - 300
|
|
44
47
|
# LOG_CLEANUP_ENABLED=1 # Periodic log cleanup (1=on, 0=off)
|
|
45
48
|
# LOG_RETENTION_DAYS=14 # Delete logs older than N days
|
|
46
49
|
# LOG_MAX_TOTAL_MB=1024 # Keep total logs under N MB via oldest-first cleanup
|
|
@@ -353,6 +353,9 @@ pending, in_progress, completed, failed, skipped
|
|
|
353
353
|
| `DEV_BRANCH` | string | auto-generated | Custom branch name |
|
|
354
354
|
| `HEARTBEAT_INTERVAL` | integer | 30 | Heartbeat log interval (s) |
|
|
355
355
|
| `HEARTBEAT_STALE_THRESHOLD` | integer | 600 | Max seconds without heartbeat |
|
|
356
|
+
| `STALE_KILL_THRESHOLD` | integer | 900 | Auto-kill after N seconds without parent log progress |
|
|
357
|
+
| `CODEX_WAIT_STALE_KILL_THRESHOLD` | integer | 3600 | Longer no-log stale window while Codex waits on subagents |
|
|
358
|
+
| `CODEX_SUBAGENT_TIMEOUT_SECONDS` | integer | 3300 | Codex subagent max runtime |
|
|
356
359
|
| `LOG_CLEANUP_ENABLED` | integer | 1 | Periodic cleanup |
|
|
357
360
|
| `LOG_RETENTION_DAYS` | integer | 14 | Delete logs older than N days |
|
|
358
361
|
| `LOG_MAX_TOTAL_MB` | integer | 1024 | Max total logs (MB) |
|
|
@@ -532,4 +535,3 @@ Located in `/dev-pipeline/templates/`:
|
|
|
532
535
|
- Agent: 6 files
|
|
533
536
|
- Base/Shared: 7 files
|
|
534
537
|
- Singleton: 3 files
|
|
535
|
-
|
|
@@ -344,6 +344,23 @@ prizm_detect_cli_and_platform() {
|
|
|
344
344
|
# command substitution; the background process must remain a child of the
|
|
345
345
|
# runner shell so wait/heartbeat/trap handling works correctly.
|
|
346
346
|
PRIZM_AI_PID=""
|
|
347
|
+
|
|
348
|
+
_prizm_codex_subagent_timeout_seconds() {
|
|
349
|
+
local configured="${CODEX_SUBAGENT_TIMEOUT_SECONDS:-}"
|
|
350
|
+
if [[ "$configured" =~ ^[0-9]+$ && "$configured" -gt 0 ]]; then
|
|
351
|
+
printf '%s\n' "$configured"
|
|
352
|
+
return 0
|
|
353
|
+
fi
|
|
354
|
+
|
|
355
|
+
local wait_threshold="${CODEX_WAIT_STALE_KILL_THRESHOLD:-3600}"
|
|
356
|
+
if [[ "$wait_threshold" =~ ^[0-9]+$ && "$wait_threshold" -gt 600 ]]; then
|
|
357
|
+
printf '%s\n' "$((wait_threshold - 300))"
|
|
358
|
+
return 0
|
|
359
|
+
fi
|
|
360
|
+
|
|
361
|
+
printf '%s\n' 3300
|
|
362
|
+
}
|
|
363
|
+
|
|
347
364
|
prizm_start_ai_session() {
|
|
348
365
|
local prompt_path="$1"
|
|
349
366
|
local log_path="$2"
|
|
@@ -370,15 +387,8 @@ prizm_start_ai_session() {
|
|
|
370
387
|
;;
|
|
371
388
|
codex)
|
|
372
389
|
local codex_args=(--ask-for-approval never --sandbox danger-full-access)
|
|
373
|
-
local codex_subagent_timeout
|
|
374
|
-
|
|
375
|
-
local outer_stale_threshold="${STALE_KILL_THRESHOLD:-900}"
|
|
376
|
-
if [[ "$outer_stale_threshold" =~ ^[0-9]+$ && "$outer_stale_threshold" -gt 120 ]]; then
|
|
377
|
-
codex_subagent_timeout=$((outer_stale_threshold - 60))
|
|
378
|
-
else
|
|
379
|
-
codex_subagent_timeout=840
|
|
380
|
-
fi
|
|
381
|
-
fi
|
|
390
|
+
local codex_subagent_timeout
|
|
391
|
+
codex_subagent_timeout="$(_prizm_codex_subagent_timeout_seconds)"
|
|
382
392
|
if [[ "$codex_subagent_timeout" =~ ^[0-9]+$ && "$codex_subagent_timeout" -gt 0 ]]; then
|
|
383
393
|
codex_args+=(--config "agents.job_max_runtime_seconds=$codex_subagent_timeout")
|
|
384
394
|
fi
|
|
@@ -430,15 +440,8 @@ prizm_run_ai_session() {
|
|
|
430
440
|
;;
|
|
431
441
|
codex)
|
|
432
442
|
local codex_args=(--ask-for-approval never --sandbox danger-full-access)
|
|
433
|
-
local codex_subagent_timeout
|
|
434
|
-
|
|
435
|
-
local outer_stale_threshold="${STALE_KILL_THRESHOLD:-900}"
|
|
436
|
-
if [[ "$outer_stale_threshold" =~ ^[0-9]+$ && "$outer_stale_threshold" -gt 120 ]]; then
|
|
437
|
-
codex_subagent_timeout=$((outer_stale_threshold - 60))
|
|
438
|
-
else
|
|
439
|
-
codex_subagent_timeout=840
|
|
440
|
-
fi
|
|
441
|
-
fi
|
|
443
|
+
local codex_subagent_timeout
|
|
444
|
+
codex_subagent_timeout="$(_prizm_codex_subagent_timeout_seconds)"
|
|
442
445
|
if [[ "$codex_subagent_timeout" =~ ^[0-9]+$ && "$codex_subagent_timeout" -gt 0 ]]; then
|
|
443
446
|
codex_args+=(--config "agents.job_max_runtime_seconds=$codex_subagent_timeout")
|
|
444
447
|
fi
|
|
@@ -41,6 +41,7 @@ start_heartbeat() {
|
|
|
41
41
|
(
|
|
42
42
|
local elapsed=0
|
|
43
43
|
local prev_size=0
|
|
44
|
+
local prev_child_activity_signature=""
|
|
44
45
|
local stale_seconds=0
|
|
45
46
|
while kill -0 "$cli_pid" 2>/dev/null; do
|
|
46
47
|
sleep "$heartbeat_interval"
|
|
@@ -57,8 +58,41 @@ start_heartbeat() {
|
|
|
57
58
|
local growth=$((cur_size - prev_size))
|
|
58
59
|
prev_size=$cur_size
|
|
59
60
|
|
|
60
|
-
|
|
61
|
-
|
|
61
|
+
local child_activity_signature=""
|
|
62
|
+
local child_total_bytes=0
|
|
63
|
+
local child_session_count=0
|
|
64
|
+
if [[ -f "$progress_json" ]]; then
|
|
65
|
+
local child_activity_data
|
|
66
|
+
child_activity_data=$(python3 - "$progress_json" <<'PY' 2>/dev/null || true
|
|
67
|
+
import json
|
|
68
|
+
import sys
|
|
69
|
+
|
|
70
|
+
try:
|
|
71
|
+
with open(sys.argv[1], "r", encoding="utf-8") as fh:
|
|
72
|
+
progress = json.load(fh)
|
|
73
|
+
except Exception:
|
|
74
|
+
sys.exit(0)
|
|
75
|
+
|
|
76
|
+
signature = str(progress.get("child_activity_signature") or "")
|
|
77
|
+
total_bytes = int(progress.get("child_total_bytes") or 0)
|
|
78
|
+
session_count = len(progress.get("child_session_files") or [])
|
|
79
|
+
print(f"{signature}\t{total_bytes}\t{session_count}")
|
|
80
|
+
PY
|
|
81
|
+
)
|
|
82
|
+
if [[ -n "$child_activity_data" ]]; then
|
|
83
|
+
IFS=$'\t' read -r child_activity_signature child_total_bytes child_session_count <<< "$child_activity_data"
|
|
84
|
+
fi
|
|
85
|
+
fi
|
|
86
|
+
|
|
87
|
+
local child_growth=0
|
|
88
|
+
if [[ -n "$child_activity_signature" && "$child_activity_signature" != "$prev_child_activity_signature" ]]; then
|
|
89
|
+
child_growth=1
|
|
90
|
+
fi
|
|
91
|
+
prev_child_activity_signature="$child_activity_signature"
|
|
92
|
+
|
|
93
|
+
# Track progress staleness. A Codex parent can sit in `wait`
|
|
94
|
+
# while child transcripts keep growing, so child activity counts.
|
|
95
|
+
if [[ $growth -eq 0 && $child_growth -eq 0 ]]; then
|
|
62
96
|
stale_seconds=$((stale_seconds + heartbeat_interval))
|
|
63
97
|
else
|
|
64
98
|
stale_seconds=0
|
|
@@ -72,28 +106,87 @@ start_heartbeat() {
|
|
|
72
106
|
else
|
|
73
107
|
size_display="${cur_size}B"
|
|
74
108
|
fi
|
|
109
|
+
local child_display=""
|
|
110
|
+
if [[ ${child_total_bytes:-0} -gt 0 ]]; then
|
|
111
|
+
local child_size_display
|
|
112
|
+
if [[ $child_total_bytes -gt 1048576 ]]; then
|
|
113
|
+
child_size_display="$((child_total_bytes / 1048576))MB"
|
|
114
|
+
elif [[ $child_total_bytes -gt 1024 ]]; then
|
|
115
|
+
child_size_display="$((child_total_bytes / 1024))KB"
|
|
116
|
+
else
|
|
117
|
+
child_size_display="${child_total_bytes}B"
|
|
118
|
+
fi
|
|
119
|
+
child_display=" | child: ${child_size_display}"
|
|
120
|
+
if [[ ${child_session_count:-0} -gt 1 ]]; then
|
|
121
|
+
child_display="${child_display}/${child_session_count}"
|
|
122
|
+
fi
|
|
123
|
+
fi
|
|
75
124
|
|
|
76
125
|
local mins=$((elapsed / 60))
|
|
77
126
|
local secs=$((elapsed % 60))
|
|
78
127
|
|
|
79
128
|
local status_icon
|
|
80
|
-
if [[ $growth -gt 0 ]]; then
|
|
129
|
+
if [[ $growth -gt 0 || $child_growth -gt 0 ]]; then
|
|
81
130
|
status_icon="${GREEN}▶${NC}"
|
|
82
131
|
else
|
|
83
132
|
status_icon="${YELLOW}⏸${NC}"
|
|
84
133
|
fi
|
|
85
134
|
|
|
86
|
-
|
|
87
|
-
if [[ $stale_kill_threshold -gt 0 &&
|
|
135
|
+
local effective_stale_kill_threshold="$stale_kill_threshold"
|
|
136
|
+
if [[ $stale_kill_threshold -gt 0 && -f "$progress_json" ]]; then
|
|
137
|
+
local codex_wait_threshold
|
|
138
|
+
codex_wait_threshold=$(python3 - "$progress_json" "$stale_kill_threshold" <<'PY' 2>/dev/null || true
|
|
139
|
+
import json
|
|
140
|
+
import os
|
|
141
|
+
import sys
|
|
142
|
+
|
|
143
|
+
progress_path = sys.argv[1]
|
|
144
|
+
base_threshold = int(sys.argv[2])
|
|
145
|
+
|
|
146
|
+
with open(progress_path, "r", encoding="utf-8") as fh:
|
|
147
|
+
progress = json.load(fh)
|
|
148
|
+
|
|
149
|
+
spawn_count = 0
|
|
150
|
+
for tool in progress.get("tool_calls", []):
|
|
151
|
+
if isinstance(tool, dict) and tool.get("name") == "spawn_agent":
|
|
152
|
+
try:
|
|
153
|
+
spawn_count += int(tool.get("count", 0))
|
|
154
|
+
except (TypeError, ValueError):
|
|
155
|
+
pass
|
|
156
|
+
|
|
157
|
+
if (
|
|
158
|
+
progress.get("event_format") == "codex-json"
|
|
159
|
+
and progress.get("current_tool") == "wait"
|
|
160
|
+
and spawn_count > 0
|
|
161
|
+
):
|
|
162
|
+
configured = os.environ.get("CODEX_WAIT_STALE_KILL_THRESHOLD", "")
|
|
163
|
+
try:
|
|
164
|
+
wait_threshold = int(configured)
|
|
165
|
+
except ValueError:
|
|
166
|
+
wait_threshold = max(base_threshold * 4, 3600)
|
|
167
|
+
if wait_threshold > base_threshold:
|
|
168
|
+
print(wait_threshold)
|
|
169
|
+
PY
|
|
170
|
+
)
|
|
171
|
+
if [[ "$codex_wait_threshold" =~ ^[0-9]+$ && "$codex_wait_threshold" -gt "$stale_kill_threshold" ]]; then
|
|
172
|
+
effective_stale_kill_threshold="$codex_wait_threshold"
|
|
173
|
+
fi
|
|
174
|
+
fi
|
|
175
|
+
|
|
176
|
+
# Stale-kill: auto-terminate process if no progress for too long.
|
|
177
|
+
# Codex parent sessions can sit on the `wait` tool while a spawned
|
|
178
|
+
# subagent is still doing useful work. Give that valid wait a longer
|
|
179
|
+
# stale window; normal single-agent stalls still use the base limit.
|
|
180
|
+
if [[ $effective_stale_kill_threshold -gt 0 && $stale_seconds -ge $effective_stale_kill_threshold ]]; then
|
|
88
181
|
local stale_mins=$((stale_seconds / 60))
|
|
89
|
-
echo -e " ${RED}[HEARTBEAT]${NC} ${mins}m${secs}s | log: ${size_display} | ${RED}STALE-KILL: no progress for ${stale_mins}m (threshold: ${
|
|
182
|
+
echo -e " ${RED}[HEARTBEAT]${NC} ${mins}m${secs}s | log: ${size_display} | ${RED}STALE-KILL: no progress for ${stale_mins}m (threshold: ${effective_stale_kill_threshold}s)${NC}"
|
|
90
183
|
echo -e " ${RED}[HEARTBEAT]${NC} Killing AI CLI process $cli_pid (stale session)..."
|
|
91
184
|
# Write the marker before killing. Some CLIs exit quickly, and the
|
|
92
185
|
# parent runner may stop this heartbeat process immediately after
|
|
93
186
|
# wait(1) returns.
|
|
94
187
|
local _marker_dir
|
|
95
188
|
_marker_dir="$(dirname "$session_log")"
|
|
96
|
-
echo "{\"killed_at\": \"$(date -u +%Y-%m-%dT%H:%M:%SZ)\", \"reason\": \"stale_session\", \"stale_seconds\": $stale_seconds, \"threshold\": $
|
|
189
|
+
echo "{\"killed_at\": \"$(date -u +%Y-%m-%dT%H:%M:%SZ)\", \"reason\": \"stale_session\", \"stale_seconds\": $stale_seconds, \"threshold\": $effective_stale_kill_threshold}" > "$_marker_dir/stale-kill.json" 2>/dev/null || true
|
|
97
190
|
kill -TERM "$cli_pid" 2>/dev/null || true
|
|
98
191
|
# Give process 10s to exit gracefully, then force kill
|
|
99
192
|
local stale_kill_grace_seconds="${STALE_KILL_GRACE_SECONDS:-10}"
|
|
@@ -109,9 +202,9 @@ start_heartbeat() {
|
|
|
109
202
|
|
|
110
203
|
# Build staleness hint for display
|
|
111
204
|
local stale_hint=""
|
|
112
|
-
if [[ $
|
|
205
|
+
if [[ $effective_stale_kill_threshold -gt 0 && $stale_seconds -gt 0 ]]; then
|
|
113
206
|
local stale_mins=$((stale_seconds / 60))
|
|
114
|
-
local threshold_mins=$((
|
|
207
|
+
local threshold_mins=$((effective_stale_kill_threshold / 60))
|
|
115
208
|
stale_hint=" | stale: ${stale_mins}m/${threshold_mins}m"
|
|
116
209
|
fi
|
|
117
210
|
|
|
@@ -134,7 +227,7 @@ try:
|
|
|
134
227
|
except Exception:
|
|
135
228
|
sys.exit(1)
|
|
136
229
|
" "$progress_json" 2>/dev/null) && {
|
|
137
|
-
echo -e " ${status_icon} ${BLUE}[HEARTBEAT]${NC} ${mins}m${secs}s | log: ${size_display} | ${phase}${stale_hint}"
|
|
230
|
+
echo -e " ${status_icon} ${BLUE}[HEARTBEAT]${NC} ${mins}m${secs}s | log: ${size_display}${child_display} | ${phase}${stale_hint}"
|
|
138
231
|
continue
|
|
139
232
|
}
|
|
140
233
|
fi
|
|
@@ -145,7 +238,7 @@ except Exception:
|
|
|
145
238
|
last_activity=$(tail -20 "$session_log" 2>/dev/null | grep -v '^$' | tail -1 | cut -c1-80 || echo "")
|
|
146
239
|
fi
|
|
147
240
|
|
|
148
|
-
echo -e " ${status_icon} ${BLUE}[HEARTBEAT]${NC} ${mins}m${secs}s elapsed | log: ${size_display} (+${growth}B) | ${last_activity}${stale_hint}"
|
|
241
|
+
echo -e " ${status_icon} ${BLUE}[HEARTBEAT]${NC} ${mins}m${secs}s elapsed | log: ${size_display}${child_display} (+${growth}B) | ${last_activity}${stale_hint}"
|
|
149
242
|
done
|
|
150
243
|
) &
|
|
151
244
|
_HEARTBEAT_PID=$!
|
|
@@ -23,6 +23,7 @@ import tempfile
|
|
|
23
23
|
import time
|
|
24
24
|
from collections import Counter
|
|
25
25
|
from datetime import datetime, timezone
|
|
26
|
+
from pathlib import Path
|
|
26
27
|
|
|
27
28
|
|
|
28
29
|
# Ordered pipeline phases — index defines forward-only progression.
|
|
@@ -76,6 +77,13 @@ class ProgressTracker:
|
|
|
76
77
|
self.event_format = ""
|
|
77
78
|
self.active_subagent_count = 0
|
|
78
79
|
self.subagent_status_counts = Counter()
|
|
80
|
+
self.codex_child_thread_ids = set()
|
|
81
|
+
self.child_session_files = []
|
|
82
|
+
self.child_total_bytes = 0
|
|
83
|
+
self.child_activity_signature = ""
|
|
84
|
+
self.last_child_activity_at = ""
|
|
85
|
+
self._codex_child_session_paths = {}
|
|
86
|
+
self._last_child_scan_at = 0.0
|
|
79
87
|
self._text_buffer = ""
|
|
80
88
|
self._in_tool_use = False
|
|
81
89
|
self._current_tool_input_parts = []
|
|
@@ -113,6 +121,9 @@ class ProgressTracker:
|
|
|
113
121
|
|
|
114
122
|
elif item_type == "collab_tool_call":
|
|
115
123
|
tool_name = item.get("tool", "collab")
|
|
124
|
+
self._record_codex_child_thread_ids(
|
|
125
|
+
item.get("receiver_thread_ids")
|
|
126
|
+
)
|
|
116
127
|
if event_type == "item.started":
|
|
117
128
|
self.current_tool = tool_name
|
|
118
129
|
self.tool_call_counts[tool_name] += 1
|
|
@@ -345,8 +356,117 @@ class ProgressTracker:
|
|
|
345
356
|
self.subagent_status_counts = counts
|
|
346
357
|
self.active_subagent_count = active
|
|
347
358
|
|
|
359
|
+
def _record_codex_child_thread_ids(self, thread_ids):
|
|
360
|
+
"""Remember Codex child thread IDs reported by collab tool calls."""
|
|
361
|
+
if not isinstance(thread_ids, list):
|
|
362
|
+
return
|
|
363
|
+
for thread_id in thread_ids:
|
|
364
|
+
if isinstance(thread_id, str) and thread_id.strip():
|
|
365
|
+
self.codex_child_thread_ids.add(thread_id.strip())
|
|
366
|
+
|
|
367
|
+
def _codex_sessions_dir(self):
|
|
368
|
+
"""Return the Codex sessions directory for the current environment."""
|
|
369
|
+
codex_home = os.environ.get("CODEX_HOME")
|
|
370
|
+
if codex_home:
|
|
371
|
+
return Path(codex_home).expanduser() / "sessions"
|
|
372
|
+
return Path.home() / ".codex" / "sessions"
|
|
373
|
+
|
|
374
|
+
def _find_codex_child_session_file(self, thread_id):
|
|
375
|
+
"""Find a Codex transcript file for a child thread ID."""
|
|
376
|
+
sessions_dir = self._codex_sessions_dir()
|
|
377
|
+
if not sessions_dir.exists():
|
|
378
|
+
return None
|
|
379
|
+
|
|
380
|
+
try:
|
|
381
|
+
matches = list(sessions_dir.rglob(f"*{thread_id}.jsonl"))
|
|
382
|
+
except OSError:
|
|
383
|
+
return None
|
|
384
|
+
|
|
385
|
+
if not matches:
|
|
386
|
+
return None
|
|
387
|
+
|
|
388
|
+
try:
|
|
389
|
+
matches.sort(key=lambda path: path.stat().st_mtime, reverse=True)
|
|
390
|
+
except OSError:
|
|
391
|
+
pass
|
|
392
|
+
return str(matches[0])
|
|
393
|
+
|
|
394
|
+
def refresh_child_session_activity(self, force=False):
|
|
395
|
+
"""Refresh Codex child transcript file stats.
|
|
396
|
+
|
|
397
|
+
The heartbeat monitor uses this activity signature to treat subagent
|
|
398
|
+
transcript growth as real progress while the parent Codex session is
|
|
399
|
+
blocked in `wait`.
|
|
400
|
+
"""
|
|
401
|
+
previous_signature = self.child_activity_signature
|
|
402
|
+
|
|
403
|
+
if not self.codex_child_thread_ids:
|
|
404
|
+
self.child_session_files = []
|
|
405
|
+
self.child_total_bytes = 0
|
|
406
|
+
self.child_activity_signature = ""
|
|
407
|
+
self.last_child_activity_at = ""
|
|
408
|
+
return previous_signature != self.child_activity_signature
|
|
409
|
+
|
|
410
|
+
now = time.monotonic()
|
|
411
|
+
should_scan = (
|
|
412
|
+
force
|
|
413
|
+
or self._last_child_scan_at == 0.0
|
|
414
|
+
or (now - self._last_child_scan_at >= 2.0)
|
|
415
|
+
)
|
|
416
|
+
if should_scan:
|
|
417
|
+
for thread_id in sorted(self.codex_child_thread_ids):
|
|
418
|
+
path = self._codex_child_session_paths.get(thread_id)
|
|
419
|
+
if not path or not os.path.exists(path):
|
|
420
|
+
found = self._find_codex_child_session_file(thread_id)
|
|
421
|
+
if found:
|
|
422
|
+
self._codex_child_session_paths[thread_id] = found
|
|
423
|
+
self._last_child_scan_at = now
|
|
424
|
+
|
|
425
|
+
files = []
|
|
426
|
+
signature_parts = []
|
|
427
|
+
total_bytes = 0
|
|
428
|
+
max_mtime = 0.0
|
|
429
|
+
|
|
430
|
+
for thread_id in sorted(self.codex_child_thread_ids):
|
|
431
|
+
path = self._codex_child_session_paths.get(thread_id)
|
|
432
|
+
if not path:
|
|
433
|
+
continue
|
|
434
|
+
try:
|
|
435
|
+
stat = os.stat(path)
|
|
436
|
+
except OSError:
|
|
437
|
+
continue
|
|
438
|
+
|
|
439
|
+
total_bytes += stat.st_size
|
|
440
|
+
max_mtime = max(max_mtime, stat.st_mtime)
|
|
441
|
+
signature_parts.append(
|
|
442
|
+
f"{thread_id}:{stat.st_size}:{getattr(stat, 'st_mtime_ns', int(stat.st_mtime * 1_000_000_000))}"
|
|
443
|
+
)
|
|
444
|
+
files.append(
|
|
445
|
+
{
|
|
446
|
+
"thread_id": thread_id,
|
|
447
|
+
"path": path,
|
|
448
|
+
"size": stat.st_size,
|
|
449
|
+
"mtime": datetime.fromtimestamp(
|
|
450
|
+
stat.st_mtime, timezone.utc
|
|
451
|
+
).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
452
|
+
}
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
self.child_session_files = files
|
|
456
|
+
self.child_total_bytes = total_bytes
|
|
457
|
+
self.child_activity_signature = "|".join(signature_parts)
|
|
458
|
+
self.last_child_activity_at = (
|
|
459
|
+
datetime.fromtimestamp(max_mtime, timezone.utc).strftime(
|
|
460
|
+
"%Y-%m-%dT%H:%M:%SZ"
|
|
461
|
+
)
|
|
462
|
+
if max_mtime
|
|
463
|
+
else ""
|
|
464
|
+
)
|
|
465
|
+
return previous_signature != self.child_activity_signature
|
|
466
|
+
|
|
348
467
|
def to_dict(self):
|
|
349
468
|
"""Export current state as a dictionary for JSON serialization."""
|
|
469
|
+
self.refresh_child_session_activity()
|
|
350
470
|
tool_calls = [
|
|
351
471
|
{"name": name, "count": count}
|
|
352
472
|
for name, count in self.tool_call_counts.most_common()
|
|
@@ -367,6 +487,11 @@ class ProgressTracker:
|
|
|
367
487
|
"total_tool_calls": self.total_tool_calls,
|
|
368
488
|
"active_subagent_count": self.active_subagent_count,
|
|
369
489
|
"subagent_states": subagent_states,
|
|
490
|
+
"child_thread_ids": sorted(self.codex_child_thread_ids),
|
|
491
|
+
"child_session_files": self.child_session_files,
|
|
492
|
+
"child_total_bytes": self.child_total_bytes,
|
|
493
|
+
"child_activity_signature": self.child_activity_signature,
|
|
494
|
+
"last_child_activity_at": self.last_child_activity_at,
|
|
370
495
|
"last_text_snippet": self.last_text_snippet,
|
|
371
496
|
"is_active": self.is_active,
|
|
372
497
|
"errors": self.errors[-10:], # Keep last 10 errors
|
|
@@ -397,6 +522,15 @@ def tail_and_parse(session_log, progress_file, poll_interval=0.5):
|
|
|
397
522
|
tracker = ProgressTracker()
|
|
398
523
|
last_write_state = None
|
|
399
524
|
|
|
525
|
+
def state_key(state):
|
|
526
|
+
return (
|
|
527
|
+
state["message_count"],
|
|
528
|
+
state["current_tool"],
|
|
529
|
+
state["current_phase"],
|
|
530
|
+
state["total_tool_calls"],
|
|
531
|
+
state.get("child_activity_signature", ""),
|
|
532
|
+
)
|
|
533
|
+
|
|
400
534
|
# Wait for log file to appear
|
|
401
535
|
wait_count = 0
|
|
402
536
|
while not os.path.exists(session_log):
|
|
@@ -428,22 +562,20 @@ def tail_and_parse(session_log, progress_file, poll_interval=0.5):
|
|
|
428
562
|
|
|
429
563
|
# Write progress if state changed
|
|
430
564
|
current_state = tracker.to_dict()
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
current_state["current_tool"],
|
|
434
|
-
current_state["current_phase"],
|
|
435
|
-
current_state["total_tool_calls"],
|
|
436
|
-
)
|
|
437
|
-
if state_key != last_write_state:
|
|
565
|
+
current_state_key = state_key(current_state)
|
|
566
|
+
if current_state_key != last_write_state:
|
|
438
567
|
atomic_write_json(current_state, progress_file)
|
|
439
|
-
last_write_state =
|
|
568
|
+
last_write_state = current_state_key
|
|
440
569
|
else:
|
|
441
570
|
idle_count += 1
|
|
442
|
-
#
|
|
443
|
-
#
|
|
444
|
-
if idle_count ==
|
|
571
|
+
# Every 2 seconds of no parent log data, refresh child Codex
|
|
572
|
+
# transcript stats and write if child activity advanced.
|
|
573
|
+
if idle_count % 4 == 0:
|
|
445
574
|
current_state = tracker.to_dict()
|
|
446
|
-
|
|
575
|
+
current_state_key = state_key(current_state)
|
|
576
|
+
if current_state_key != last_write_state or idle_count == 4:
|
|
577
|
+
atomic_write_json(current_state, progress_file)
|
|
578
|
+
last_write_state = current_state_key
|
|
447
579
|
|
|
448
580
|
# After 3600 idle cycles (30 min), mark inactive and exit
|
|
449
581
|
if idle_count > 3600:
|
|
@@ -22,9 +22,10 @@
|
|
|
22
22
|
# SESSION_TIMEOUT=0 # Session timeout in seconds (0 = no limit)
|
|
23
23
|
# VERBOSE=1 # Verbose logging (1=on, 0=off)
|
|
24
24
|
# HEARTBEAT_INTERVAL=30 # Poll interval for session progress/stale checks
|
|
25
|
-
# STALE_KILL_THRESHOLD=900 # Auto-kill session after N seconds without log progress (0 = disabled)
|
|
25
|
+
# STALE_KILL_THRESHOLD=900 # Auto-kill session after N seconds without parent log progress (0 = disabled)
|
|
26
26
|
# STALE_KILL_GRACE_SECONDS=10 # Grace period after stale-kill before force-stopping the job
|
|
27
|
-
#
|
|
27
|
+
# CODEX_WAIT_STALE_KILL_THRESHOLD=3600 # Longer no-log window while Codex waits on subagents
|
|
28
|
+
# CODEX_SUBAGENT_TIMEOUT_SECONDS=3300 # Codex subagent max runtime; defaults to wait threshold - 300
|
|
28
29
|
# LOG_CLEANUP_ENABLED=1 # Run periodic session log cleanup
|
|
29
30
|
# LOG_RETENTION_DAYS=14 # Delete session logs older than N days
|
|
30
31
|
# LOG_MAX_TOTAL_MB=1024 # Keep total logs under N MB via oldest-first cleanup
|
|
@@ -346,9 +346,10 @@ pending, in_progress, completed, failed, skipped
|
|
|
346
346
|
| `SESSION_TIMEOUT` | integer | 0 | 0 = no limit |
|
|
347
347
|
| `VERBOSE` | integer | (not specified) | 1=on, 0=off |
|
|
348
348
|
| `HEARTBEAT_INTERVAL` | integer | 30 | Poll interval for session progress/stale checks |
|
|
349
|
-
| `STALE_KILL_THRESHOLD` | integer | 900 | Auto-kill after N seconds without log progress; 0 disables |
|
|
349
|
+
| `STALE_KILL_THRESHOLD` | integer | 900 | Auto-kill after N seconds without parent log progress; 0 disables |
|
|
350
350
|
| `STALE_KILL_GRACE_SECONDS` | integer | 10 | Grace period after stale-kill before force-stopping |
|
|
351
|
-
| `
|
|
351
|
+
| `CODEX_WAIT_STALE_KILL_THRESHOLD` | integer | 3600 | Longer no-log stale window while Codex waits on subagents |
|
|
352
|
+
| `CODEX_SUBAGENT_TIMEOUT_SECONDS` | integer | 3300 | Codex subagent max runtime |
|
|
352
353
|
| `LOG_CLEANUP_ENABLED` | boolean | 1 | Periodic session log cleanup |
|
|
353
354
|
| `LOG_RETENTION_DAYS` | integer | 14 | Delete session logs older than N days |
|
|
354
355
|
| `LOG_MAX_TOTAL_MB` | integer | 1024 | Keep total logs under N MB |
|
|
@@ -345,13 +345,86 @@ function Get-PrizmCodexSubagentTimeoutSeconds {
|
|
|
345
345
|
return $configuredTimeout
|
|
346
346
|
}
|
|
347
347
|
|
|
348
|
-
$
|
|
349
|
-
$
|
|
350
|
-
if ([int]::TryParse($
|
|
351
|
-
return ($
|
|
348
|
+
$waitThreshold = 0
|
|
349
|
+
$waitThresholdText = if ($env:CODEX_WAIT_STALE_KILL_THRESHOLD) { $env:CODEX_WAIT_STALE_KILL_THRESHOLD } else { '3600' }
|
|
350
|
+
if ([int]::TryParse($waitThresholdText, [ref]$waitThreshold) -and $waitThreshold -gt 600) {
|
|
351
|
+
return ($waitThreshold - 300)
|
|
352
352
|
}
|
|
353
353
|
|
|
354
|
-
return
|
|
354
|
+
return 3300
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
function Get-PrizmEffectiveStaleKillThreshold {
|
|
358
|
+
param(
|
|
359
|
+
[string]$ProgressFile,
|
|
360
|
+
[int]$BaseThreshold
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
if ($BaseThreshold -le 0) { return $BaseThreshold }
|
|
364
|
+
if (-not (Test-Path $ProgressFile)) { return $BaseThreshold }
|
|
365
|
+
|
|
366
|
+
try {
|
|
367
|
+
$progress = Get-Content $ProgressFile -Raw -ErrorAction Stop | ConvertFrom-Json -ErrorAction Stop
|
|
368
|
+
} catch {
|
|
369
|
+
return $BaseThreshold
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
$spawnCount = 0
|
|
373
|
+
if ($progress.event_format -eq 'codex-json' -and $progress.current_tool -eq 'wait' -and $progress.tool_calls) {
|
|
374
|
+
foreach ($tool in @($progress.tool_calls)) {
|
|
375
|
+
if ($tool.name -eq 'spawn_agent') {
|
|
376
|
+
$count = 0
|
|
377
|
+
if ([int]::TryParse([string]$tool.count, [ref]$count)) { $spawnCount += $count }
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
if ($spawnCount -le 0) { return $BaseThreshold }
|
|
383
|
+
|
|
384
|
+
$waitThreshold = 0
|
|
385
|
+
if ([int]::TryParse($env:CODEX_WAIT_STALE_KILL_THRESHOLD, [ref]$waitThreshold) -and $waitThreshold -gt $BaseThreshold) {
|
|
386
|
+
return $waitThreshold
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
return [Math]::Max($BaseThreshold * 4, 3600)
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
function Get-PrizmProgressChildActivity {
|
|
393
|
+
param([string]$ProgressFile)
|
|
394
|
+
|
|
395
|
+
$empty = [pscustomobject]@{
|
|
396
|
+
Signature = ''
|
|
397
|
+
TotalBytes = 0
|
|
398
|
+
SessionCount = 0
|
|
399
|
+
}
|
|
400
|
+
if (-not (Test-Path $ProgressFile)) { return $empty }
|
|
401
|
+
|
|
402
|
+
try {
|
|
403
|
+
$progress = Get-Content $ProgressFile -Raw -ErrorAction Stop | ConvertFrom-Json -ErrorAction Stop
|
|
404
|
+
} catch {
|
|
405
|
+
return $empty
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
$signature = ''
|
|
409
|
+
if ($progress.PSObject.Properties['child_activity_signature'] -and $progress.child_activity_signature) {
|
|
410
|
+
$signature = [string]$progress.child_activity_signature
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
$totalBytes = [int64]0
|
|
414
|
+
if ($progress.PSObject.Properties['child_total_bytes']) {
|
|
415
|
+
[int64]::TryParse([string]$progress.child_total_bytes, [ref]$totalBytes) | Out-Null
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
$sessionCount = 0
|
|
419
|
+
if ($progress.PSObject.Properties['child_session_files'] -and $progress.child_session_files) {
|
|
420
|
+
$sessionCount = @($progress.child_session_files).Count
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
return [pscustomobject]@{
|
|
424
|
+
Signature = $signature
|
|
425
|
+
TotalBytes = $totalBytes
|
|
426
|
+
SessionCount = $sessionCount
|
|
427
|
+
}
|
|
355
428
|
}
|
|
356
429
|
|
|
357
430
|
function Test-PrizmCodexJsonSupport {
|
|
@@ -552,6 +552,7 @@ function Invoke-PrizmPipeline {
|
|
|
552
552
|
$elapsedSeconds = 0
|
|
553
553
|
$staleSeconds = 0
|
|
554
554
|
$previousLogSize = 0
|
|
555
|
+
$previousChildActivitySignature = ''
|
|
555
556
|
$wasTimedOut = $false
|
|
556
557
|
$staleKillMarker = Join-Path $logsDir 'stale-kill.json'
|
|
557
558
|
$wasStaleKilled = $false
|
|
@@ -568,7 +569,13 @@ function Invoke-PrizmPipeline {
|
|
|
568
569
|
}
|
|
569
570
|
$growth = $currentLogSize - $previousLogSize
|
|
570
571
|
$previousLogSize = $currentLogSize
|
|
571
|
-
|
|
572
|
+
|
|
573
|
+
$childActivity = Get-PrizmProgressChildActivity -ProgressFile $progressJson
|
|
574
|
+
$childSignature = [string]$childActivity.Signature
|
|
575
|
+
$childAdvanced = ($childSignature -and $childSignature -ne $previousChildActivitySignature)
|
|
576
|
+
$previousChildActivitySignature = $childSignature
|
|
577
|
+
|
|
578
|
+
if ($growth -gt 0 -or $childAdvanced) {
|
|
572
579
|
$staleSeconds = 0
|
|
573
580
|
} else {
|
|
574
581
|
$staleSeconds += $waitSeconds
|
|
@@ -580,10 +587,11 @@ function Invoke-PrizmPipeline {
|
|
|
580
587
|
break
|
|
581
588
|
}
|
|
582
589
|
|
|
583
|
-
|
|
590
|
+
$effectiveStaleKillThreshold = Get-PrizmEffectiveStaleKillThreshold -ProgressFile $progressJson -BaseThreshold $staleKillThreshold
|
|
591
|
+
if ($effectiveStaleKillThreshold -gt 0 -and $staleSeconds -ge $effectiveStaleKillThreshold) {
|
|
584
592
|
$wasStaleKilled = $true
|
|
585
|
-
Write-PrizmWarn "Session stale-killed (no progress for ${
|
|
586
|
-
Write-PrizmStaleKillMarker $staleKillMarker $staleSeconds $
|
|
593
|
+
Write-PrizmWarn "Session stale-killed (no progress for ${effectiveStaleKillThreshold}s)"
|
|
594
|
+
Write-PrizmStaleKillMarker $staleKillMarker $staleSeconds $effectiveStaleKillThreshold
|
|
587
595
|
Stop-PrizmSessionProcess $pidPath
|
|
588
596
|
if ($staleKillGraceSeconds -gt 0) { Start-Sleep -Seconds $staleKillGraceSeconds }
|
|
589
597
|
break
|
|
@@ -110,6 +110,7 @@ $job = Start-Job -ScriptBlock {
|
|
|
110
110
|
$elapsedSeconds = 0
|
|
111
111
|
$staleSeconds = 0
|
|
112
112
|
$previousLogSize = 0
|
|
113
|
+
$previousChildActivitySignature = ''
|
|
113
114
|
$wasTimedOut = $false
|
|
114
115
|
$wasStaleKilled = $false
|
|
115
116
|
while ($true) {
|
|
@@ -123,7 +124,13 @@ while ($true) {
|
|
|
123
124
|
if (Test-Path $logPath) { $currentLogSize = [int64](Get-Item $logPath).Length }
|
|
124
125
|
$growth = $currentLogSize - $previousLogSize
|
|
125
126
|
$previousLogSize = $currentLogSize
|
|
126
|
-
|
|
127
|
+
|
|
128
|
+
$childActivity = Get-PrizmProgressChildActivity -ProgressFile $progressPath
|
|
129
|
+
$childSignature = [string]$childActivity.Signature
|
|
130
|
+
$childAdvanced = ($childSignature -and $childSignature -ne $previousChildActivitySignature)
|
|
131
|
+
$previousChildActivitySignature = $childSignature
|
|
132
|
+
|
|
133
|
+
if ($growth -gt 0 -or $childAdvanced) { $staleSeconds = 0 } else { $staleSeconds += $waitSeconds }
|
|
127
134
|
|
|
128
135
|
if ($timeoutSeconds -gt 0 -and $elapsedSeconds -ge $timeoutSeconds) {
|
|
129
136
|
$wasTimedOut = $true
|
|
@@ -23,6 +23,7 @@ import tempfile
|
|
|
23
23
|
import time
|
|
24
24
|
from collections import Counter
|
|
25
25
|
from datetime import datetime, timezone
|
|
26
|
+
from pathlib import Path
|
|
26
27
|
|
|
27
28
|
|
|
28
29
|
# Ordered pipeline phases — index defines forward-only progression.
|
|
@@ -76,6 +77,13 @@ class ProgressTracker:
|
|
|
76
77
|
self.event_format = ""
|
|
77
78
|
self.active_subagent_count = 0
|
|
78
79
|
self.subagent_status_counts = Counter()
|
|
80
|
+
self.codex_child_thread_ids = set()
|
|
81
|
+
self.child_session_files = []
|
|
82
|
+
self.child_total_bytes = 0
|
|
83
|
+
self.child_activity_signature = ""
|
|
84
|
+
self.last_child_activity_at = ""
|
|
85
|
+
self._codex_child_session_paths = {}
|
|
86
|
+
self._last_child_scan_at = 0.0
|
|
79
87
|
self._text_buffer = ""
|
|
80
88
|
self._in_tool_use = False
|
|
81
89
|
self._current_tool_input_parts = []
|
|
@@ -113,6 +121,9 @@ class ProgressTracker:
|
|
|
113
121
|
|
|
114
122
|
elif item_type == "collab_tool_call":
|
|
115
123
|
tool_name = item.get("tool", "collab")
|
|
124
|
+
self._record_codex_child_thread_ids(
|
|
125
|
+
item.get("receiver_thread_ids")
|
|
126
|
+
)
|
|
116
127
|
if event_type == "item.started":
|
|
117
128
|
self.current_tool = tool_name
|
|
118
129
|
self.tool_call_counts[tool_name] += 1
|
|
@@ -345,8 +356,117 @@ class ProgressTracker:
|
|
|
345
356
|
self.subagent_status_counts = counts
|
|
346
357
|
self.active_subagent_count = active
|
|
347
358
|
|
|
359
|
+
def _record_codex_child_thread_ids(self, thread_ids):
|
|
360
|
+
"""Remember Codex child thread IDs reported by collab tool calls."""
|
|
361
|
+
if not isinstance(thread_ids, list):
|
|
362
|
+
return
|
|
363
|
+
for thread_id in thread_ids:
|
|
364
|
+
if isinstance(thread_id, str) and thread_id.strip():
|
|
365
|
+
self.codex_child_thread_ids.add(thread_id.strip())
|
|
366
|
+
|
|
367
|
+
def _codex_sessions_dir(self):
|
|
368
|
+
"""Return the Codex sessions directory for the current environment."""
|
|
369
|
+
codex_home = os.environ.get("CODEX_HOME")
|
|
370
|
+
if codex_home:
|
|
371
|
+
return Path(codex_home).expanduser() / "sessions"
|
|
372
|
+
return Path.home() / ".codex" / "sessions"
|
|
373
|
+
|
|
374
|
+
def _find_codex_child_session_file(self, thread_id):
|
|
375
|
+
"""Find a Codex transcript file for a child thread ID."""
|
|
376
|
+
sessions_dir = self._codex_sessions_dir()
|
|
377
|
+
if not sessions_dir.exists():
|
|
378
|
+
return None
|
|
379
|
+
|
|
380
|
+
try:
|
|
381
|
+
matches = list(sessions_dir.rglob(f"*{thread_id}.jsonl"))
|
|
382
|
+
except OSError:
|
|
383
|
+
return None
|
|
384
|
+
|
|
385
|
+
if not matches:
|
|
386
|
+
return None
|
|
387
|
+
|
|
388
|
+
try:
|
|
389
|
+
matches.sort(key=lambda path: path.stat().st_mtime, reverse=True)
|
|
390
|
+
except OSError:
|
|
391
|
+
pass
|
|
392
|
+
return str(matches[0])
|
|
393
|
+
|
|
394
|
+
def refresh_child_session_activity(self, force=False):
|
|
395
|
+
"""Refresh Codex child transcript file stats.
|
|
396
|
+
|
|
397
|
+
The heartbeat monitor uses this activity signature to treat subagent
|
|
398
|
+
transcript growth as real progress while the parent Codex session is
|
|
399
|
+
blocked in `wait`.
|
|
400
|
+
"""
|
|
401
|
+
previous_signature = self.child_activity_signature
|
|
402
|
+
|
|
403
|
+
if not self.codex_child_thread_ids:
|
|
404
|
+
self.child_session_files = []
|
|
405
|
+
self.child_total_bytes = 0
|
|
406
|
+
self.child_activity_signature = ""
|
|
407
|
+
self.last_child_activity_at = ""
|
|
408
|
+
return previous_signature != self.child_activity_signature
|
|
409
|
+
|
|
410
|
+
now = time.monotonic()
|
|
411
|
+
should_scan = (
|
|
412
|
+
force
|
|
413
|
+
or self._last_child_scan_at == 0.0
|
|
414
|
+
or (now - self._last_child_scan_at >= 2.0)
|
|
415
|
+
)
|
|
416
|
+
if should_scan:
|
|
417
|
+
for thread_id in sorted(self.codex_child_thread_ids):
|
|
418
|
+
path = self._codex_child_session_paths.get(thread_id)
|
|
419
|
+
if not path or not os.path.exists(path):
|
|
420
|
+
found = self._find_codex_child_session_file(thread_id)
|
|
421
|
+
if found:
|
|
422
|
+
self._codex_child_session_paths[thread_id] = found
|
|
423
|
+
self._last_child_scan_at = now
|
|
424
|
+
|
|
425
|
+
files = []
|
|
426
|
+
signature_parts = []
|
|
427
|
+
total_bytes = 0
|
|
428
|
+
max_mtime = 0.0
|
|
429
|
+
|
|
430
|
+
for thread_id in sorted(self.codex_child_thread_ids):
|
|
431
|
+
path = self._codex_child_session_paths.get(thread_id)
|
|
432
|
+
if not path:
|
|
433
|
+
continue
|
|
434
|
+
try:
|
|
435
|
+
stat = os.stat(path)
|
|
436
|
+
except OSError:
|
|
437
|
+
continue
|
|
438
|
+
|
|
439
|
+
total_bytes += stat.st_size
|
|
440
|
+
max_mtime = max(max_mtime, stat.st_mtime)
|
|
441
|
+
signature_parts.append(
|
|
442
|
+
f"{thread_id}:{stat.st_size}:{getattr(stat, 'st_mtime_ns', int(stat.st_mtime * 1_000_000_000))}"
|
|
443
|
+
)
|
|
444
|
+
files.append(
|
|
445
|
+
{
|
|
446
|
+
"thread_id": thread_id,
|
|
447
|
+
"path": path,
|
|
448
|
+
"size": stat.st_size,
|
|
449
|
+
"mtime": datetime.fromtimestamp(
|
|
450
|
+
stat.st_mtime, timezone.utc
|
|
451
|
+
).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
452
|
+
}
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
self.child_session_files = files
|
|
456
|
+
self.child_total_bytes = total_bytes
|
|
457
|
+
self.child_activity_signature = "|".join(signature_parts)
|
|
458
|
+
self.last_child_activity_at = (
|
|
459
|
+
datetime.fromtimestamp(max_mtime, timezone.utc).strftime(
|
|
460
|
+
"%Y-%m-%dT%H:%M:%SZ"
|
|
461
|
+
)
|
|
462
|
+
if max_mtime
|
|
463
|
+
else ""
|
|
464
|
+
)
|
|
465
|
+
return previous_signature != self.child_activity_signature
|
|
466
|
+
|
|
348
467
|
def to_dict(self):
|
|
349
468
|
"""Export current state as a dictionary for JSON serialization."""
|
|
469
|
+
self.refresh_child_session_activity()
|
|
350
470
|
tool_calls = [
|
|
351
471
|
{"name": name, "count": count}
|
|
352
472
|
for name, count in self.tool_call_counts.most_common()
|
|
@@ -367,6 +487,11 @@ class ProgressTracker:
|
|
|
367
487
|
"total_tool_calls": self.total_tool_calls,
|
|
368
488
|
"active_subagent_count": self.active_subagent_count,
|
|
369
489
|
"subagent_states": subagent_states,
|
|
490
|
+
"child_thread_ids": sorted(self.codex_child_thread_ids),
|
|
491
|
+
"child_session_files": self.child_session_files,
|
|
492
|
+
"child_total_bytes": self.child_total_bytes,
|
|
493
|
+
"child_activity_signature": self.child_activity_signature,
|
|
494
|
+
"last_child_activity_at": self.last_child_activity_at,
|
|
370
495
|
"last_text_snippet": self.last_text_snippet,
|
|
371
496
|
"is_active": self.is_active,
|
|
372
497
|
"errors": self.errors[-10:], # Keep last 10 errors
|
|
@@ -397,6 +522,15 @@ def tail_and_parse(session_log, progress_file, poll_interval=0.5):
|
|
|
397
522
|
tracker = ProgressTracker()
|
|
398
523
|
last_write_state = None
|
|
399
524
|
|
|
525
|
+
def state_key(state):
|
|
526
|
+
return (
|
|
527
|
+
state["message_count"],
|
|
528
|
+
state["current_tool"],
|
|
529
|
+
state["current_phase"],
|
|
530
|
+
state["total_tool_calls"],
|
|
531
|
+
state.get("child_activity_signature", ""),
|
|
532
|
+
)
|
|
533
|
+
|
|
400
534
|
# Wait for log file to appear
|
|
401
535
|
wait_count = 0
|
|
402
536
|
while not os.path.exists(session_log):
|
|
@@ -428,22 +562,20 @@ def tail_and_parse(session_log, progress_file, poll_interval=0.5):
|
|
|
428
562
|
|
|
429
563
|
# Write progress if state changed
|
|
430
564
|
current_state = tracker.to_dict()
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
current_state["current_tool"],
|
|
434
|
-
current_state["current_phase"],
|
|
435
|
-
current_state["total_tool_calls"],
|
|
436
|
-
)
|
|
437
|
-
if state_key != last_write_state:
|
|
565
|
+
current_state_key = state_key(current_state)
|
|
566
|
+
if current_state_key != last_write_state:
|
|
438
567
|
atomic_write_json(current_state, progress_file)
|
|
439
|
-
last_write_state =
|
|
568
|
+
last_write_state = current_state_key
|
|
440
569
|
else:
|
|
441
570
|
idle_count += 1
|
|
442
|
-
#
|
|
443
|
-
#
|
|
444
|
-
if idle_count ==
|
|
571
|
+
# Every 2 seconds of no parent log data, refresh child Codex
|
|
572
|
+
# transcript stats and write if child activity advanced.
|
|
573
|
+
if idle_count % 4 == 0:
|
|
445
574
|
current_state = tracker.to_dict()
|
|
446
|
-
|
|
575
|
+
current_state_key = state_key(current_state)
|
|
576
|
+
if current_state_key != last_write_state or idle_count == 4:
|
|
577
|
+
atomic_write_json(current_state, progress_file)
|
|
578
|
+
last_write_state = current_state_key
|
|
447
579
|
|
|
448
580
|
# After 3600 idle cycles (30 min), mark inactive and exit
|
|
449
581
|
if idle_count > 3600:
|
package/package.json
CHANGED
package/src/scaffold.js
CHANGED
|
@@ -576,7 +576,7 @@ project_doc_fallback_filenames = ["CLAUDE.md", "CODEBUDDY.md"]
|
|
|
576
576
|
|
|
577
577
|
[agents]
|
|
578
578
|
max_depth = 1
|
|
579
|
-
job_max_runtime_seconds =
|
|
579
|
+
job_max_runtime_seconds = 3300
|
|
580
580
|
`;
|
|
581
581
|
await fs.writeFile(configPath, configToml);
|
|
582
582
|
await fs.remove(legacySettingsPath);
|