prizmkit 1.1.68 → 1.1.70
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bundled/VERSION.json +3 -3
- package/bundled/dev-pipeline/lib/heartbeat.sh +5 -5
- package/bundled/dev-pipeline/scripts/generate-bootstrap-prompt.py +11 -12
- package/bundled/dev-pipeline/scripts/parse-stream-progress.py +217 -18
- package/bundled/dev-pipeline/templates/agent-prompts/dev-implement.md +36 -22
- package/bundled/dev-pipeline/templates/agent-prompts/reviewer-review.md +1 -1
- package/bundled/dev-pipeline/templates/bootstrap-tier2.md +19 -1
- package/bundled/dev-pipeline/templates/bootstrap-tier3.md +19 -1
- package/bundled/dev-pipeline/templates/bugfix-bootstrap-prompt.md +24 -21
- package/bundled/dev-pipeline/templates/refactor-bootstrap-prompt.md +32 -24
- package/bundled/dev-pipeline/templates/sections/ac-verification-checklist.md +4 -10
- package/bundled/dev-pipeline/templates/sections/context-budget-rules.md +1 -0
- package/bundled/dev-pipeline/templates/sections/feature-context.md +16 -11
- package/bundled/dev-pipeline/templates/sections/phase-browser-verification-auto.md +17 -26
- package/bundled/dev-pipeline/templates/sections/phase-browser-verification-opencli.md +1 -1
- package/bundled/dev-pipeline/templates/sections/phase-browser-verification.md +1 -1
- package/bundled/dev-pipeline/templates/sections/phase-context-snapshot-base.md +1 -1
- package/bundled/dev-pipeline/templates/sections/phase-critic-plan-full.md +10 -0
- package/bundled/dev-pipeline/templates/sections/phase-critic-plan.md +10 -0
- package/bundled/dev-pipeline/templates/sections/phase-implement-agent.md +14 -9
- package/bundled/dev-pipeline/templates/sections/phase-implement-full.md +14 -9
- package/bundled/dev-pipeline/templates/sections/phase-implement-lite.md +8 -17
- package/bundled/dev-pipeline/templates/sections/phase-plan-lite.md +1 -1
- package/bundled/dev-pipeline/templates/sections/phase-review-agent.md +5 -1
- package/bundled/dev-pipeline/templates/sections/phase-review-full.md +6 -2
- package/bundled/dev-pipeline/templates/sections/phase-specify-plan-full.md +1 -1
- package/bundled/dev-pipeline/templates/sections/task-contract.md +34 -0
- package/bundled/dev-pipeline/templates/sections/test-failure-recovery-agent.md +27 -46
- package/bundled/dev-pipeline/templates/sections/test-failure-recovery-lite.md +27 -37
- package/bundled/dev-pipeline/tests/test_generate_bootstrap_prompt.py +13 -0
- package/bundled/dev-pipeline-windows/scripts/generate-bootstrap-prompt.py +11 -12
- package/bundled/dev-pipeline-windows/scripts/parse-stream-progress.py +217 -18
- package/bundled/dev-pipeline-windows/templates/agent-prompts/dev-implement.md +36 -22
- package/bundled/dev-pipeline-windows/templates/agent-prompts/reviewer-review.md +1 -1
- package/bundled/dev-pipeline-windows/templates/bugfix-bootstrap-prompt.md +24 -21
- package/bundled/dev-pipeline-windows/templates/refactor-bootstrap-prompt.md +32 -24
- package/bundled/dev-pipeline-windows/templates/sections/ac-verification-checklist.md +4 -10
- package/bundled/dev-pipeline-windows/templates/sections/context-budget-rules.md +1 -0
- package/bundled/dev-pipeline-windows/templates/sections/feature-context.md +16 -11
- package/bundled/dev-pipeline-windows/templates/sections/phase-browser-verification-auto.md +22 -10
- package/bundled/dev-pipeline-windows/templates/sections/phase-context-snapshot-base.md +1 -1
- package/bundled/dev-pipeline-windows/templates/sections/phase-critic-plan-full.md +10 -0
- package/bundled/dev-pipeline-windows/templates/sections/phase-critic-plan.md +10 -0
- package/bundled/dev-pipeline-windows/templates/sections/phase-implement-agent.md +14 -9
- package/bundled/dev-pipeline-windows/templates/sections/phase-implement-full.md +14 -9
- package/bundled/dev-pipeline-windows/templates/sections/phase-implement-lite.md +8 -19
- package/bundled/dev-pipeline-windows/templates/sections/phase-plan-lite.md +1 -1
- package/bundled/dev-pipeline-windows/templates/sections/phase-review-agent.md +5 -1
- package/bundled/dev-pipeline-windows/templates/sections/phase-review-full.md +6 -2
- package/bundled/dev-pipeline-windows/templates/sections/phase-specify-plan-full.md +1 -1
- package/bundled/dev-pipeline-windows/templates/sections/task-contract.md +34 -0
- package/bundled/dev-pipeline-windows/templates/sections/test-failure-recovery-agent.md +27 -46
- package/bundled/dev-pipeline-windows/templates/sections/test-failure-recovery-lite.md +27 -37
- package/bundled/skills/_metadata.json +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
## Task Contract
|
|
2
|
+
|
|
3
|
+
This section defines the only work that belongs to this session.
|
|
4
|
+
|
|
5
|
+
### Objective
|
|
6
|
+
|
|
7
|
+
Implement {{FEATURE_ID}}: "{{FEATURE_TITLE}}".
|
|
8
|
+
|
|
9
|
+
{{FEATURE_DESCRIPTION}}
|
|
10
|
+
|
|
11
|
+
### Scope Rule
|
|
12
|
+
|
|
13
|
+
Current scope is limited to the intersection of:
|
|
14
|
+
|
|
15
|
+
1. The Objective above
|
|
16
|
+
2. The Verification Gates below
|
|
17
|
+
3. Dependencies required to complete those gates
|
|
18
|
+
|
|
19
|
+
Raw user context, project brief, and completed dependency notes are authoritative context, but they do not expand scope by themselves.
|
|
20
|
+
|
|
21
|
+
### Non-Scope Rule
|
|
22
|
+
|
|
23
|
+
Do NOT implement unrelated backlog items, already completed features, or adjacent modules unless they are required by the Objective or a Verification Gate.
|
|
24
|
+
|
|
25
|
+
### Verification Gates
|
|
26
|
+
|
|
27
|
+
These gates are generated from `feature.acceptance_criteria` and are the only acceptance requirements for this session.
|
|
28
|
+
|
|
29
|
+
{{AC_CHECKLIST}}
|
|
30
|
+
|
|
31
|
+
Gate rule:
|
|
32
|
+
- `[x]` means verified with implementation or test evidence.
|
|
33
|
+
- Any remaining `[ ]` means the feature is incomplete.
|
|
34
|
+
- If a gate is blocked, document the reason in `failure-log.md`; blocked is not success.
|
|
@@ -1,67 +1,48 @@
|
|
|
1
1
|
## Test Failure Recovery Protocol
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Use this protocol whenever implementation or review tests fail. Its purpose is to distinguish tolerated pre-existing failures from blockers introduced by this session.
|
|
4
4
|
|
|
5
|
-
###
|
|
6
|
-
|
|
7
|
-
1. **Run tests and record results**:
|
|
8
|
-
- Count total failures and note which tests failed
|
|
9
|
-
- Compare against baseline (BASELINE_FAILURES) — exclude pre-existing failures
|
|
10
|
-
|
|
11
|
-
2. **Check termination conditions** (evaluate BEFORE each fix attempt):
|
|
12
|
-
- **All tests pass** → Done. Exit recovery loop.
|
|
13
|
-
- **Plateau detected** — same failure count AND same failing tests for 3 consecutive rounds → AI cannot resolve these failures. Document and exit.
|
|
14
|
-
- **Still making progress** — failure count decreased compared to previous round → Continue fixing.
|
|
15
|
-
- **First round** — no history yet → Proceed to fix.
|
|
16
|
-
|
|
17
|
-
3. **Fix and iterate**:
|
|
18
|
-
- Analyze remaining failures: root cause (code bug vs. test brittleness vs. environment issue)
|
|
19
|
-
- Categorize:
|
|
20
|
-
- **Pre-existing baseline failure**: Expected, do NOT fix
|
|
21
|
-
- **New regression**: Fix the code
|
|
22
|
-
- **Brittle test**: Fix the test or environment setup
|
|
23
|
-
- Apply fix, re-run `($TEST_CMD)`, go back to step 1
|
|
5
|
+
### Failure Classes
|
|
24
6
|
|
|
25
|
-
|
|
7
|
+
| Class | Meaning | Required Action | May Continue? |
|
|
8
|
+
|-------|---------|-----------------|---------------|
|
|
9
|
+
| Baseline failure | Existed before this session | Document in Implementation Log or review notes | Yes |
|
|
10
|
+
| New regression | Introduced by this session | Fix before reporting success | No |
|
|
11
|
+
| Brittle test | Test expectation/setup is wrong for the intended behavior | Fix the test or environment setup, then rerun | Only after fixed or documented as blocked |
|
|
12
|
+
| Environment/tooling failure | External tool, network, install, or local environment prevents verification | Document in `failure-log.md` with impact on gates | Only if no Verification Gate is blocked |
|
|
26
13
|
|
|
27
|
-
|
|
14
|
+
### Recovery Loop
|
|
28
15
|
|
|
29
|
-
|
|
16
|
+
1. Run tests and record: failing test names, failure count, and class for each failure.
|
|
17
|
+
2. Compare with `BASELINE_FAILURES`; never blame baseline failures on this feature.
|
|
18
|
+
3. Fix new regressions and brittle tests while progress is being made.
|
|
19
|
+
4. Stop after a plateau: same failure count and same failing tests for 3 consecutive rounds.
|
|
20
|
+
5. If failures decrease, reset the plateau counter.
|
|
30
21
|
|
|
31
|
-
###
|
|
22
|
+
### Success Rule
|
|
32
23
|
|
|
33
|
-
|
|
34
|
-
- Dev appends failure details to Implementation Log
|
|
35
|
-
- Reviewer agent runs full test suite in Phase 5
|
|
36
|
-
- If Reviewer confirms NEW regressions (not in baseline): mark verdict as `NEEDS_FIXES`
|
|
37
|
-
- If Reviewer confirms only baseline failures remain: proceed with `PASS_WITH_WARNINGS`
|
|
24
|
+
Proceed to review only when:
|
|
38
25
|
|
|
39
|
-
|
|
26
|
+
1. all new regressions are fixed;
|
|
27
|
+
2. baseline failures are documented;
|
|
28
|
+
3. every Verification Gate is verified.
|
|
40
29
|
|
|
41
|
-
|
|
42
|
-
- If Implementation Log section in context-snapshot.md already confirms "all tests passing"
|
|
43
|
-
- → Skip Phase 5 test suite re-run (Reviewer will verify baseline log instead)
|
|
44
|
-
- This avoids rebuilding/re-running tests when already verified
|
|
30
|
+
Blocked gates are not success. If any gate cannot be verified, follow the Blocked Rule.
|
|
45
31
|
|
|
46
|
-
|
|
47
|
-
- If Implementation Log is missing or incomplete
|
|
48
|
-
- If any new code was added after the last test run
|
|
49
|
-
- If Reviewer suspects brittleness or environment drift
|
|
32
|
+
### Blocked Rule
|
|
50
33
|
|
|
51
|
-
|
|
34
|
+
If a remaining failure prevents any Verification Gate from being verified, the feature is incomplete. Write `failure-log.md` and do not report success.
|
|
52
35
|
|
|
53
|
-
|
|
36
|
+
### Failure Capture Format
|
|
54
37
|
|
|
55
|
-
```
|
|
38
|
+
```markdown
|
|
56
39
|
## Test Failures Encountered
|
|
57
40
|
|
|
58
41
|
- **Test**: [test name/path]
|
|
59
42
|
- Root Cause: [explanation]
|
|
60
|
-
- Category: [
|
|
43
|
+
- Category: [baseline failure | new regression | brittle test | environment/tooling]
|
|
61
44
|
- Rounds Attempted: [N rounds, plateau at round M]
|
|
62
|
-
- Status: [
|
|
45
|
+
- Status: [fixed | still failing | blocked]
|
|
63
46
|
|
|
64
|
-
- **Impact on
|
|
47
|
+
- **Impact on Verification Gates**: [verified | not affected | blocked + reason]
|
|
65
48
|
```
|
|
66
|
-
|
|
67
|
-
**Rule**: If any AC cannot be verified due to test failure, the feature is incomplete. Document in failure-log.md for next session.
|
|
@@ -1,58 +1,48 @@
|
|
|
1
1
|
## Test Failure Recovery Protocol
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Use this protocol whenever implementation tests fail. Its purpose is to distinguish tolerated pre-existing failures from blockers introduced by this session.
|
|
4
4
|
|
|
5
|
-
###
|
|
6
|
-
|
|
7
|
-
1. **Run tests and record results**:
|
|
8
|
-
- Count total failures and note which tests failed
|
|
9
|
-
- Compare against baseline (BASELINE_FAILURES) — exclude pre-existing failures
|
|
10
|
-
|
|
11
|
-
2. **Check termination conditions** (evaluate BEFORE each fix attempt):
|
|
12
|
-
- **All tests pass** → Done. Exit recovery loop.
|
|
13
|
-
- **Plateau detected** — same failure count AND same failing tests for 3 consecutive rounds → AI cannot resolve these failures. Document and exit.
|
|
14
|
-
- **Still making progress** — failure count decreased compared to previous round → Continue fixing.
|
|
15
|
-
- **First round** — no history yet → Proceed to fix.
|
|
5
|
+
### Failure Classes
|
|
16
6
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
- Apply fix, re-run `($TEST_CMD)`, go back to step 1
|
|
7
|
+
| Class | Meaning | Required Action | May Continue? |
|
|
8
|
+
|-------|---------|-----------------|---------------|
|
|
9
|
+
| Baseline failure | Existed before this session | Document in Implementation Log | Yes |
|
|
10
|
+
| New regression | Introduced by this session | Fix before reporting success | No |
|
|
11
|
+
| Brittle test | Test expectation/setup is wrong for the intended behavior | Fix the test or environment setup, then rerun | Only after fixed or documented as blocked |
|
|
12
|
+
| Environment/tooling failure | External tool, network, install, or local environment prevents verification | Document in `failure-log.md` with impact on gates | Only if no Verification Gate is blocked |
|
|
24
13
|
|
|
25
|
-
###
|
|
14
|
+
### Recovery Loop
|
|
26
15
|
|
|
27
|
-
|
|
16
|
+
1. Run tests and record: failing test names, failure count, and class for each failure.
|
|
17
|
+
2. Compare with `BASELINE_FAILURES`; never blame baseline failures on this feature.
|
|
18
|
+
3. Fix new regressions and brittle tests while progress is being made.
|
|
19
|
+
4. Stop after a plateau: same failure count and same failing tests for 3 consecutive rounds.
|
|
20
|
+
5. If failures decrease, reset the plateau counter.
|
|
28
21
|
|
|
29
|
-
|
|
22
|
+
### Success Rule
|
|
30
23
|
|
|
31
|
-
|
|
24
|
+
Proceed only when:
|
|
32
25
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
- **Do NOT block commit** — unresolved test failures are deferred to next session
|
|
26
|
+
1. all new regressions are fixed;
|
|
27
|
+
2. baseline failures are documented;
|
|
28
|
+
3. every Verification Gate is verified.
|
|
37
29
|
|
|
38
|
-
|
|
30
|
+
Blocked gates are not success. If any gate cannot be verified, follow the Blocked Rule.
|
|
39
31
|
|
|
40
|
-
|
|
32
|
+
### Blocked Rule
|
|
41
33
|
|
|
42
|
-
|
|
34
|
+
If a remaining failure prevents any Verification Gate from being verified, the feature is incomplete. Write `failure-log.md` and do not report success.
|
|
43
35
|
|
|
44
|
-
|
|
36
|
+
### Failure Capture Format
|
|
45
37
|
|
|
46
|
-
```
|
|
38
|
+
```markdown
|
|
47
39
|
## Test Failures Encountered
|
|
48
40
|
|
|
49
41
|
- **Test**: [test name/path]
|
|
50
42
|
- Root Cause: [explanation]
|
|
51
|
-
- Category: [
|
|
43
|
+
- Category: [baseline failure | new regression | brittle test | environment/tooling]
|
|
52
44
|
- Rounds Attempted: [N rounds, plateau at round M]
|
|
53
|
-
- Status: [
|
|
45
|
+
- Status: [fixed | still failing | blocked]
|
|
54
46
|
|
|
55
|
-
- **Impact on
|
|
47
|
+
- **Impact on Verification Gates**: [verified | not affected | blocked + reason]
|
|
56
48
|
```
|
|
57
|
-
|
|
58
|
-
**Rule**: If any AC cannot be verified due to test failure, the feature is incomplete. Document in failure-log.md for next session.
|
|
@@ -6,6 +6,7 @@ from generate_bootstrap_prompt import (
|
|
|
6
6
|
compute_feature_slug,
|
|
7
7
|
find_feature,
|
|
8
8
|
format_acceptance_criteria,
|
|
9
|
+
format_ac_checklist,
|
|
9
10
|
format_global_context,
|
|
10
11
|
get_completed_dependencies,
|
|
11
12
|
determine_pipeline_mode,
|
|
@@ -73,6 +74,18 @@ class TestFormatAcceptanceCriteria:
|
|
|
73
74
|
assert "- Password reset works" in result
|
|
74
75
|
|
|
75
76
|
|
|
77
|
+
class TestFormatAcChecklist:
|
|
78
|
+
def test_empty_does_not_create_unchecked_gate(self):
|
|
79
|
+
result = format_ac_checklist([])
|
|
80
|
+
assert result == "- (no Verification Gates specified)"
|
|
81
|
+
assert "[ ]" not in result
|
|
82
|
+
|
|
83
|
+
def test_items_are_unchecked_verification_gates(self):
|
|
84
|
+
result = format_ac_checklist(["Users can log in", "Password reset works"])
|
|
85
|
+
assert "- [ ] Users can log in" in result
|
|
86
|
+
assert "- [ ] Password reset works" in result
|
|
87
|
+
|
|
88
|
+
|
|
76
89
|
# ---------------------------------------------------------------------------
|
|
77
90
|
# format_global_context
|
|
78
91
|
# ---------------------------------------------------------------------------
|
|
@@ -286,7 +286,7 @@ def extract_baseline_failures(test_commands, project_root):
|
|
|
286
286
|
def format_ac_checklist(acceptance_criteria):
|
|
287
287
|
"""Format acceptance criteria as a markdown checkbox list."""
|
|
288
288
|
if not acceptance_criteria:
|
|
289
|
-
return "-
|
|
289
|
+
return "- (no Verification Gates specified)"
|
|
290
290
|
lines = []
|
|
291
291
|
for item in acceptance_criteria:
|
|
292
292
|
lines.append("- [ ] {}".format(item))
|
|
@@ -323,10 +323,10 @@ def format_user_context(user_context):
|
|
|
323
323
|
if not items:
|
|
324
324
|
return ""
|
|
325
325
|
lines = [
|
|
326
|
-
"
|
|
327
|
-
""
|
|
328
|
-
"
|
|
329
|
-
"
|
|
326
|
+
"> These materials were provided by the user and are authoritative "
|
|
327
|
+
"when they clarify or constrain this feature. They do not expand "
|
|
328
|
+
"the current scope by themselves; use the Task Contract to decide "
|
|
329
|
+
"what belongs to this session.",
|
|
330
330
|
"",
|
|
331
331
|
]
|
|
332
332
|
for item in items:
|
|
@@ -970,6 +970,10 @@ def assemble_sections(pipeline_mode, sections_dir, init_done, is_resume,
|
|
|
970
970
|
mission += "\n\n" + tier_desc
|
|
971
971
|
sections.append(("mission", mission))
|
|
972
972
|
|
|
973
|
+
# --- Task Contract: single source of current scope and gates ---
|
|
974
|
+
sections.append(("task-contract",
|
|
975
|
+
load_section(sections_dir, "task-contract.md")))
|
|
976
|
+
|
|
973
977
|
# --- Feature Context (XML-wrapped, optimization 3) ---
|
|
974
978
|
sections.append(("feature-context",
|
|
975
979
|
load_section(sections_dir, "feature-context.md")))
|
|
@@ -1079,13 +1083,8 @@ def assemble_sections(pipeline_mode, sections_dir, init_done, is_resume,
|
|
|
1079
1083
|
load_section(sections_dir,
|
|
1080
1084
|
"test-failure-recovery-agent.md")))
|
|
1081
1085
|
|
|
1082
|
-
#
|
|
1083
|
-
|
|
1084
|
-
if os.path.isfile(ac_checklist_path):
|
|
1085
|
-
sections.append(("ac-verification-checklist",
|
|
1086
|
-
load_section(sections_dir,
|
|
1087
|
-
"ac-verification-checklist.md")))
|
|
1088
|
-
|
|
1086
|
+
# Verification Gates are included in Task Contract. Keep AC in one place so
|
|
1087
|
+
# background context and implementation prompts cannot redefine scope.
|
|
1089
1088
|
# --- Review (only for agent tiers) ---
|
|
1090
1089
|
if pipeline_mode == "full":
|
|
1091
1090
|
sections.append(("phase-review",
|
|
@@ -63,7 +63,8 @@ PHASE_KEYWORDS = {
|
|
|
63
63
|
class ProgressTracker:
|
|
64
64
|
"""Tracks progress state from stream-json events."""
|
|
65
65
|
|
|
66
|
-
def __init__(self):
|
|
66
|
+
def __init__(self, session_log=None):
|
|
67
|
+
self.session_log_path = Path(session_log).expanduser() if session_log else None
|
|
67
68
|
self.message_count = 0
|
|
68
69
|
self.current_tool = None
|
|
69
70
|
self.current_tool_input_summary = ""
|
|
@@ -78,12 +79,19 @@ class ProgressTracker:
|
|
|
78
79
|
self.active_subagent_count = 0
|
|
79
80
|
self.subagent_status_counts = Counter()
|
|
80
81
|
self.codex_child_thread_ids = set()
|
|
82
|
+
self.claude_session_id = ""
|
|
83
|
+
self.claude_cwd = ""
|
|
84
|
+
self.claude_task_states = {}
|
|
81
85
|
self.child_session_files = []
|
|
82
86
|
self.child_total_bytes = 0
|
|
83
87
|
self.child_activity_signature = ""
|
|
84
88
|
self.last_child_activity_at = ""
|
|
85
89
|
self._codex_child_session_paths = {}
|
|
90
|
+
self._claude_child_session_files = []
|
|
86
91
|
self._last_child_scan_at = 0.0
|
|
92
|
+
self._last_claude_fallback_scan_at = 0.0
|
|
93
|
+
self._last_claude_fallback_scan_key = ""
|
|
94
|
+
self._claude_fallback_scan_interval_seconds = 10.0
|
|
87
95
|
self._text_buffer = ""
|
|
88
96
|
self._in_tool_use = False
|
|
89
97
|
self._current_tool_input_parts = []
|
|
@@ -195,11 +203,76 @@ class ProgressTracker:
|
|
|
195
203
|
self.is_active = True
|
|
196
204
|
|
|
197
205
|
elif event_type == "system":
|
|
198
|
-
# System events (hooks, init, etc.) — track but don't count as messages
|
|
206
|
+
# System events (hooks, init, task notifications, etc.) — track but don't count as messages.
|
|
199
207
|
self.event_format = self.event_format or "stream-json"
|
|
200
208
|
subtype = event.get("subtype", "")
|
|
201
209
|
if subtype == "init":
|
|
202
210
|
self.is_active = True
|
|
211
|
+
session_id = event.get("session_id")
|
|
212
|
+
if isinstance(session_id, str) and session_id.strip():
|
|
213
|
+
self.claude_session_id = session_id.strip()
|
|
214
|
+
cwd = event.get("cwd")
|
|
215
|
+
if isinstance(cwd, str) and cwd.strip():
|
|
216
|
+
self.claude_cwd = cwd.strip()
|
|
217
|
+
elif subtype == "task_started":
|
|
218
|
+
task_id = event.get("task_id")
|
|
219
|
+
if isinstance(task_id, str) and task_id.strip():
|
|
220
|
+
self.claude_task_states[task_id.strip()] = {
|
|
221
|
+
"status": "running",
|
|
222
|
+
"summary": str(event.get("description") or "")[:120],
|
|
223
|
+
"tool_use_id": str(event.get("tool_use_id") or ""),
|
|
224
|
+
"task_type": str(event.get("task_type") or ""),
|
|
225
|
+
"subagent_type": str(event.get("subagent_type") or ""),
|
|
226
|
+
}
|
|
227
|
+
self._update_claude_subagent_status_counts()
|
|
228
|
+
elif subtype in ("task_updated", "task_progress"):
|
|
229
|
+
task_id = event.get("task_id")
|
|
230
|
+
if isinstance(task_id, str) and task_id.strip():
|
|
231
|
+
state = self.claude_task_states.setdefault(task_id.strip(), {})
|
|
232
|
+
patch = event.get("patch") if isinstance(event.get("patch"), dict) else {}
|
|
233
|
+
status = patch.get("status") or event.get("status")
|
|
234
|
+
if status:
|
|
235
|
+
state["status"] = str(status)
|
|
236
|
+
summary = patch.get("summary") or patch.get("description") or event.get("summary") or event.get("description")
|
|
237
|
+
if summary:
|
|
238
|
+
state["summary"] = str(summary)[:120]
|
|
239
|
+
else:
|
|
240
|
+
state.setdefault("summary", "")
|
|
241
|
+
tool_use_id = patch.get("tool_use_id") or event.get("tool_use_id")
|
|
242
|
+
if tool_use_id:
|
|
243
|
+
state["tool_use_id"] = str(tool_use_id)
|
|
244
|
+
else:
|
|
245
|
+
state.setdefault("tool_use_id", "")
|
|
246
|
+
task_type = patch.get("task_type") or event.get("task_type")
|
|
247
|
+
if task_type:
|
|
248
|
+
state["task_type"] = str(task_type)
|
|
249
|
+
else:
|
|
250
|
+
state.setdefault("task_type", "")
|
|
251
|
+
subagent_type = patch.get("subagent_type") or event.get("subagent_type")
|
|
252
|
+
if subagent_type:
|
|
253
|
+
state["subagent_type"] = str(subagent_type)
|
|
254
|
+
else:
|
|
255
|
+
state.setdefault("subagent_type", "")
|
|
256
|
+
self._update_claude_subagent_status_counts()
|
|
257
|
+
elif subtype == "task_notification":
|
|
258
|
+
task_id = event.get("task_id")
|
|
259
|
+
if isinstance(task_id, str) and task_id.strip():
|
|
260
|
+
state = self.claude_task_states.setdefault(task_id.strip(), {})
|
|
261
|
+
status = event.get("status") or "completed"
|
|
262
|
+
state["status"] = str(status)
|
|
263
|
+
state["summary"] = str(event.get("summary") or state.get("summary") or "")[:120]
|
|
264
|
+
state.setdefault("tool_use_id", str(event.get("tool_use_id") or ""))
|
|
265
|
+
task_type = event.get("task_type")
|
|
266
|
+
if task_type:
|
|
267
|
+
state["task_type"] = str(task_type)
|
|
268
|
+
else:
|
|
269
|
+
state.setdefault("task_type", "")
|
|
270
|
+
subagent_type = event.get("subagent_type")
|
|
271
|
+
if subagent_type:
|
|
272
|
+
state["subagent_type"] = str(subagent_type)
|
|
273
|
+
else:
|
|
274
|
+
state.setdefault("subagent_type", "")
|
|
275
|
+
self._update_claude_subagent_status_counts()
|
|
203
276
|
|
|
204
277
|
# ── Claude API raw stream format ────────────────────────────
|
|
205
278
|
elif event_type == "message_start":
|
|
@@ -391,16 +464,135 @@ class ProgressTracker:
|
|
|
391
464
|
pass
|
|
392
465
|
return str(matches[0])
|
|
393
466
|
|
|
467
|
+
def _is_tracked_claude_subagent_state(self, state):
|
|
468
|
+
"""Return true for Claude Code task events representing in-process agents."""
|
|
469
|
+
if not isinstance(state, dict):
|
|
470
|
+
return False
|
|
471
|
+
task_type = str(state.get("task_type") or "")
|
|
472
|
+
task_type_lower = task_type.lower()
|
|
473
|
+
subagent_type = str(state.get("subagent_type") or "")
|
|
474
|
+
if task_type_lower == "local_bash":
|
|
475
|
+
return False
|
|
476
|
+
tracked_types = {"in_process_teammate", "subagent", "agent", "teammate"}
|
|
477
|
+
if task_type_lower in tracked_types:
|
|
478
|
+
return True
|
|
479
|
+
if task_type_lower == "local_agent" and subagent_type:
|
|
480
|
+
return True
|
|
481
|
+
summary = str(state.get("summary") or "")
|
|
482
|
+
return bool(
|
|
483
|
+
not task_type
|
|
484
|
+
and summary.lower().startswith(("dev:", "critic:", "reviewer:", "agent:"))
|
|
485
|
+
)
|
|
486
|
+
|
|
487
|
+
def _has_tracked_claude_subagent_task(self):
|
|
488
|
+
"""Return true once a Claude Code local-agent/subagent task has been observed."""
|
|
489
|
+
return any(
|
|
490
|
+
self._is_tracked_claude_subagent_state(state)
|
|
491
|
+
for state in self.claude_task_states.values()
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
def _update_claude_subagent_status_counts(self):
|
|
495
|
+
"""Track Claude Code in-process teammate task state counts."""
|
|
496
|
+
counts = Counter()
|
|
497
|
+
active = 0
|
|
498
|
+
inactive_statuses = {
|
|
499
|
+
"completed",
|
|
500
|
+
"failed",
|
|
501
|
+
"cancelled",
|
|
502
|
+
"canceled",
|
|
503
|
+
"killed",
|
|
504
|
+
"stopped",
|
|
505
|
+
"success",
|
|
506
|
+
"error",
|
|
507
|
+
}
|
|
508
|
+
for state in self.claude_task_states.values():
|
|
509
|
+
if not self._is_tracked_claude_subagent_state(state):
|
|
510
|
+
continue
|
|
511
|
+
status = str(state.get("status") or "unknown")
|
|
512
|
+
counts[status] += 1
|
|
513
|
+
if status.lower() not in inactive_statuses:
|
|
514
|
+
active += 1
|
|
515
|
+
summary = state.get("summary") or state.get("subagent_type")
|
|
516
|
+
if summary:
|
|
517
|
+
self.last_text_snippet = str(summary).strip()[:120]
|
|
518
|
+
self._detect_phase(str(summary))
|
|
519
|
+
self.subagent_status_counts = counts
|
|
520
|
+
self.active_subagent_count = active
|
|
521
|
+
|
|
522
|
+
def _claude_projects_dir(self):
|
|
523
|
+
"""Return the Claude Code projects directory for transcript lookup."""
|
|
524
|
+
projects_dir = os.environ.get("CLAUDE_PROJECTS_DIR")
|
|
525
|
+
if projects_dir:
|
|
526
|
+
return Path(projects_dir).expanduser()
|
|
527
|
+
claude_config_dir = os.environ.get("CLAUDE_CONFIG_DIR")
|
|
528
|
+
if claude_config_dir:
|
|
529
|
+
return Path(claude_config_dir).expanduser() / "projects"
|
|
530
|
+
claude_home = os.environ.get("CLAUDE_HOME")
|
|
531
|
+
if claude_home:
|
|
532
|
+
return Path(claude_home).expanduser() / "projects"
|
|
533
|
+
return Path.home() / ".claude" / "projects"
|
|
534
|
+
|
|
535
|
+
def _claude_project_key(self):
|
|
536
|
+
"""Encode cwd the same way Claude Code stores project transcript dirs."""
|
|
537
|
+
cwd = self.claude_cwd
|
|
538
|
+
if not cwd:
|
|
539
|
+
return ""
|
|
540
|
+
return cwd.replace("\\", "-").replace("/", "-").replace(":", "")
|
|
541
|
+
|
|
542
|
+
def _find_claude_child_session_files(self):
|
|
543
|
+
"""Find Claude Code subagent transcripts for this parent session."""
|
|
544
|
+
if not self.claude_session_id:
|
|
545
|
+
return []
|
|
546
|
+
|
|
547
|
+
projects_dir = self._claude_projects_dir()
|
|
548
|
+
if not projects_dir.exists():
|
|
549
|
+
return []
|
|
550
|
+
|
|
551
|
+
candidates = []
|
|
552
|
+
project_key = self._claude_project_key()
|
|
553
|
+
if project_key:
|
|
554
|
+
candidates.append(
|
|
555
|
+
projects_dir / project_key / self.claude_session_id / "subagents"
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
for candidate in candidates:
|
|
559
|
+
if candidate.exists():
|
|
560
|
+
try:
|
|
561
|
+
return sorted(candidate.glob("*.jsonl"))
|
|
562
|
+
except OSError:
|
|
563
|
+
return []
|
|
564
|
+
|
|
565
|
+
# Fallback for non-standard cwd encoding or custom Claude homes. Avoid
|
|
566
|
+
# repeatedly walking every stored transcript before any Agent task exists.
|
|
567
|
+
if not self._has_tracked_claude_subagent_task():
|
|
568
|
+
return []
|
|
569
|
+
|
|
570
|
+
fallback_scan_key = f"{projects_dir}:{self.claude_session_id}"
|
|
571
|
+
now = time.monotonic()
|
|
572
|
+
if (
|
|
573
|
+
self._last_claude_fallback_scan_key == fallback_scan_key
|
|
574
|
+
and now - self._last_claude_fallback_scan_at < self._claude_fallback_scan_interval_seconds
|
|
575
|
+
):
|
|
576
|
+
return self._claude_child_session_files
|
|
577
|
+
self._last_claude_fallback_scan_key = fallback_scan_key
|
|
578
|
+
self._last_claude_fallback_scan_at = now
|
|
579
|
+
try:
|
|
580
|
+
matches = sorted(projects_dir.rglob(f"{self.claude_session_id}/subagents/*.jsonl"))
|
|
581
|
+
except OSError:
|
|
582
|
+
return []
|
|
583
|
+
return matches
|
|
584
|
+
|
|
394
585
|
def refresh_child_session_activity(self, force=False):
|
|
395
|
-
"""Refresh
|
|
586
|
+
"""Refresh child transcript file stats.
|
|
396
587
|
|
|
397
588
|
The heartbeat monitor uses this activity signature to treat subagent
|
|
398
|
-
transcript growth as real progress while the parent
|
|
399
|
-
|
|
589
|
+
transcript growth as real progress while the parent session is blocked
|
|
590
|
+
waiting for a child agent/tool result. Supports Codex child threads and
|
|
591
|
+
Claude Code in-process teammate transcripts.
|
|
400
592
|
"""
|
|
401
593
|
previous_signature = self.child_activity_signature
|
|
402
594
|
|
|
403
|
-
if not self.codex_child_thread_ids:
|
|
595
|
+
if not self.codex_child_thread_ids and not self.claude_session_id:
|
|
404
596
|
self.child_session_files = []
|
|
405
597
|
self.child_total_bytes = 0
|
|
406
598
|
self.child_activity_signature = ""
|
|
@@ -420,6 +612,7 @@ class ProgressTracker:
|
|
|
420
612
|
found = self._find_codex_child_session_file(thread_id)
|
|
421
613
|
if found:
|
|
422
614
|
self._codex_child_session_paths[thread_id] = found
|
|
615
|
+
self._claude_child_session_files = self._find_claude_child_session_files()
|
|
423
616
|
self._last_child_scan_at = now
|
|
424
617
|
|
|
425
618
|
files = []
|
|
@@ -427,24 +620,22 @@ class ProgressTracker:
|
|
|
427
620
|
total_bytes = 0
|
|
428
621
|
max_mtime = 0.0
|
|
429
622
|
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
if not path:
|
|
433
|
-
continue
|
|
623
|
+
def add_file(kind, identifier, path):
|
|
624
|
+
nonlocal total_bytes, max_mtime
|
|
434
625
|
try:
|
|
435
626
|
stat = os.stat(path)
|
|
436
627
|
except OSError:
|
|
437
|
-
|
|
438
|
-
|
|
628
|
+
return
|
|
629
|
+
path_str = str(path)
|
|
439
630
|
total_bytes += stat.st_size
|
|
440
631
|
max_mtime = max(max_mtime, stat.st_mtime)
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
)
|
|
632
|
+
mtime_ns = getattr(stat, "st_mtime_ns", int(stat.st_mtime * 1_000_000_000))
|
|
633
|
+
signature_parts.append(f"{kind}:{identifier}:{stat.st_size}:{mtime_ns}")
|
|
444
634
|
files.append(
|
|
445
635
|
{
|
|
446
|
-
"
|
|
447
|
-
"
|
|
636
|
+
"kind": kind,
|
|
637
|
+
"thread_id": identifier,
|
|
638
|
+
"path": path_str,
|
|
448
639
|
"size": stat.st_size,
|
|
449
640
|
"mtime": datetime.fromtimestamp(
|
|
450
641
|
stat.st_mtime, timezone.utc
|
|
@@ -452,6 +643,14 @@ class ProgressTracker:
|
|
|
452
643
|
}
|
|
453
644
|
)
|
|
454
645
|
|
|
646
|
+
for thread_id in sorted(self.codex_child_thread_ids):
|
|
647
|
+
path = self._codex_child_session_paths.get(thread_id)
|
|
648
|
+
if path:
|
|
649
|
+
add_file("codex", thread_id, path)
|
|
650
|
+
|
|
651
|
+
for path in self._claude_child_session_files:
|
|
652
|
+
add_file("claude", path.stem, path)
|
|
653
|
+
|
|
455
654
|
self.child_session_files = files
|
|
456
655
|
self.child_total_bytes = total_bytes
|
|
457
656
|
self.child_activity_signature = "|".join(signature_parts)
|
|
@@ -519,7 +718,7 @@ def atomic_write_json(data, filepath):
|
|
|
519
718
|
|
|
520
719
|
def tail_and_parse(session_log, progress_file, poll_interval=0.5):
|
|
521
720
|
"""Tail session log and parse stream-json events."""
|
|
522
|
-
tracker = ProgressTracker()
|
|
721
|
+
tracker = ProgressTracker(session_log)
|
|
523
722
|
last_write_state = None
|
|
524
723
|
|
|
525
724
|
def state_key(state):
|