prizmkit 1.1.68 → 1.1.70

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/bundled/VERSION.json +3 -3
  2. package/bundled/dev-pipeline/lib/heartbeat.sh +5 -5
  3. package/bundled/dev-pipeline/scripts/generate-bootstrap-prompt.py +11 -12
  4. package/bundled/dev-pipeline/scripts/parse-stream-progress.py +217 -18
  5. package/bundled/dev-pipeline/templates/agent-prompts/dev-implement.md +36 -22
  6. package/bundled/dev-pipeline/templates/agent-prompts/reviewer-review.md +1 -1
  7. package/bundled/dev-pipeline/templates/bootstrap-tier2.md +19 -1
  8. package/bundled/dev-pipeline/templates/bootstrap-tier3.md +19 -1
  9. package/bundled/dev-pipeline/templates/bugfix-bootstrap-prompt.md +24 -21
  10. package/bundled/dev-pipeline/templates/refactor-bootstrap-prompt.md +32 -24
  11. package/bundled/dev-pipeline/templates/sections/ac-verification-checklist.md +4 -10
  12. package/bundled/dev-pipeline/templates/sections/context-budget-rules.md +1 -0
  13. package/bundled/dev-pipeline/templates/sections/feature-context.md +16 -11
  14. package/bundled/dev-pipeline/templates/sections/phase-browser-verification-auto.md +17 -26
  15. package/bundled/dev-pipeline/templates/sections/phase-browser-verification-opencli.md +1 -1
  16. package/bundled/dev-pipeline/templates/sections/phase-browser-verification.md +1 -1
  17. package/bundled/dev-pipeline/templates/sections/phase-context-snapshot-base.md +1 -1
  18. package/bundled/dev-pipeline/templates/sections/phase-critic-plan-full.md +10 -0
  19. package/bundled/dev-pipeline/templates/sections/phase-critic-plan.md +10 -0
  20. package/bundled/dev-pipeline/templates/sections/phase-implement-agent.md +14 -9
  21. package/bundled/dev-pipeline/templates/sections/phase-implement-full.md +14 -9
  22. package/bundled/dev-pipeline/templates/sections/phase-implement-lite.md +8 -17
  23. package/bundled/dev-pipeline/templates/sections/phase-plan-lite.md +1 -1
  24. package/bundled/dev-pipeline/templates/sections/phase-review-agent.md +5 -1
  25. package/bundled/dev-pipeline/templates/sections/phase-review-full.md +6 -2
  26. package/bundled/dev-pipeline/templates/sections/phase-specify-plan-full.md +1 -1
  27. package/bundled/dev-pipeline/templates/sections/task-contract.md +34 -0
  28. package/bundled/dev-pipeline/templates/sections/test-failure-recovery-agent.md +27 -46
  29. package/bundled/dev-pipeline/templates/sections/test-failure-recovery-lite.md +27 -37
  30. package/bundled/dev-pipeline/tests/test_generate_bootstrap_prompt.py +13 -0
  31. package/bundled/dev-pipeline-windows/scripts/generate-bootstrap-prompt.py +11 -12
  32. package/bundled/dev-pipeline-windows/scripts/parse-stream-progress.py +217 -18
  33. package/bundled/dev-pipeline-windows/templates/agent-prompts/dev-implement.md +36 -22
  34. package/bundled/dev-pipeline-windows/templates/agent-prompts/reviewer-review.md +1 -1
  35. package/bundled/dev-pipeline-windows/templates/bugfix-bootstrap-prompt.md +24 -21
  36. package/bundled/dev-pipeline-windows/templates/refactor-bootstrap-prompt.md +32 -24
  37. package/bundled/dev-pipeline-windows/templates/sections/ac-verification-checklist.md +4 -10
  38. package/bundled/dev-pipeline-windows/templates/sections/context-budget-rules.md +1 -0
  39. package/bundled/dev-pipeline-windows/templates/sections/feature-context.md +16 -11
  40. package/bundled/dev-pipeline-windows/templates/sections/phase-browser-verification-auto.md +22 -10
  41. package/bundled/dev-pipeline-windows/templates/sections/phase-context-snapshot-base.md +1 -1
  42. package/bundled/dev-pipeline-windows/templates/sections/phase-critic-plan-full.md +10 -0
  43. package/bundled/dev-pipeline-windows/templates/sections/phase-critic-plan.md +10 -0
  44. package/bundled/dev-pipeline-windows/templates/sections/phase-implement-agent.md +14 -9
  45. package/bundled/dev-pipeline-windows/templates/sections/phase-implement-full.md +14 -9
  46. package/bundled/dev-pipeline-windows/templates/sections/phase-implement-lite.md +8 -19
  47. package/bundled/dev-pipeline-windows/templates/sections/phase-plan-lite.md +1 -1
  48. package/bundled/dev-pipeline-windows/templates/sections/phase-review-agent.md +5 -1
  49. package/bundled/dev-pipeline-windows/templates/sections/phase-review-full.md +6 -2
  50. package/bundled/dev-pipeline-windows/templates/sections/phase-specify-plan-full.md +1 -1
  51. package/bundled/dev-pipeline-windows/templates/sections/task-contract.md +34 -0
  52. package/bundled/dev-pipeline-windows/templates/sections/test-failure-recovery-agent.md +27 -46
  53. package/bundled/dev-pipeline-windows/templates/sections/test-failure-recovery-lite.md +27 -37
  54. package/bundled/skills/_metadata.json +1 -1
  55. package/package.json +1 -1
@@ -0,0 +1,34 @@
1
+ ## Task Contract
2
+
3
+ This section defines the only work that belongs to this session.
4
+
5
+ ### Objective
6
+
7
+ Implement {{FEATURE_ID}}: "{{FEATURE_TITLE}}".
8
+
9
+ {{FEATURE_DESCRIPTION}}
10
+
11
+ ### Scope Rule
12
+
13
+ Current scope is limited to the intersection of:
14
+
15
+ 1. The Objective above
16
+ 2. The Verification Gates below
17
+ 3. Dependencies required to complete those gates
18
+
19
+ Raw user context, project brief, and completed dependency notes are authoritative context, but they do not expand scope by themselves.
20
+
21
+ ### Non-Scope Rule
22
+
23
+ Do NOT implement unrelated backlog items, already completed features, or adjacent modules unless they are required by the Objective or a Verification Gate.
24
+
25
+ ### Verification Gates
26
+
27
+ These gates are generated from `feature.acceptance_criteria` and are the only acceptance requirements for this session.
28
+
29
+ {{AC_CHECKLIST}}
30
+
31
+ Gate rule:
32
+ - `[x]` means verified with implementation or test evidence.
33
+ - Any remaining `[ ]` means the feature is incomplete.
34
+ - If a gate is blocked, document the reason in `failure-log.md`; blocked is not success.
@@ -1,67 +1,48 @@
1
1
  ## Test Failure Recovery Protocol
2
2
 
3
- When tests fail during implementation (Phase 3 / Phase 4), use **convergence-based recovery** keep fixing as long as progress is being made.
3
+ Use this protocol whenever implementation or review tests fail. Its purpose is to distinguish tolerated pre-existing failures from blockers introduced by this session.
4
4
 
5
- ### Recovery Loop
6
-
7
- 1. **Run tests and record results**:
8
- - Count total failures and note which tests failed
9
- - Compare against baseline (BASELINE_FAILURES) — exclude pre-existing failures
10
-
11
- 2. **Check termination conditions** (evaluate BEFORE each fix attempt):
12
- - **All tests pass** → Done. Exit recovery loop.
13
- - **Plateau detected** — same failure count AND same failing tests for 3 consecutive rounds → AI cannot resolve these failures. Document and exit.
14
- - **Still making progress** — failure count decreased compared to previous round → Continue fixing.
15
- - **First round** — no history yet → Proceed to fix.
16
-
17
- 3. **Fix and iterate**:
18
- - Analyze remaining failures: root cause (code bug vs. test brittleness vs. environment issue)
19
- - Categorize:
20
- - **Pre-existing baseline failure**: Expected, do NOT fix
21
- - **New regression**: Fix the code
22
- - **Brittle test**: Fix the test or environment setup
23
- - Apply fix, re-run `($TEST_CMD)`, go back to step 1
5
+ ### Failure Classes
24
6
 
25
- ### Convergence Tracking
7
+ | Class | Meaning | Required Action | May Continue? |
8
+ |-------|---------|-----------------|---------------|
9
+ | Baseline failure | Existed before this session | Document in Implementation Log or review notes | Yes |
10
+ | New regression | Introduced by this session | Fix before reporting success | No |
11
+ | Brittle test | Test expectation/setup is wrong for the intended behavior | Fix the test or environment setup, then rerun | Only after fixed or documented as blocked |
12
+ | Environment/tooling failure | External tool, network, install, or local environment prevents verification | Document in `failure-log.md` with impact on gates | Only if no Verification Gate is blocked |
26
13
 
27
- Track failures each round. Example: 5→3→3→3→3 = plateau at round 3, stop at round 5 (3/3).
14
+ ### Recovery Loop
28
15
 
29
- **Key rule**: If failures decrease (even by 1), the plateau counter resets to 0.
16
+ 1. Run tests and record: failing test names, failure count, and class for each failure.
17
+ 2. Compare with `BASELINE_FAILURES`; never blame baseline failures on this feature.
18
+ 3. Fix new regressions and brittle tests while progress is being made.
19
+ 4. Stop after a plateau: same failure count and same failing tests for 3 consecutive rounds.
20
+ 5. If failures decrease, reset the plateau counter.
30
21
 
31
- ### Escalation — Dev + Reviewer Workflow
22
+ ### Success Rule
32
23
 
33
- When the recovery loop exits with remaining failures:
34
- - Dev appends failure details to Implementation Log
35
- - Reviewer agent runs full test suite in Phase 5
36
- - If Reviewer confirms NEW regressions (not in baseline): mark verdict as `NEEDS_FIXES`
37
- - If Reviewer confirms only baseline failures remain: proceed with `PASS_WITH_WARNINGS`
24
+ Proceed to review only when:
38
25
 
39
- ### Context-Aware Test Re-run (Performance Optimization)
26
+ 1. all new regressions are fixed;
27
+ 2. baseline failures are documented;
28
+ 3. every Verification Gate is verified.
40
29
 
41
- **Skip redundant re-runs**:
42
- - If Implementation Log section in context-snapshot.md already confirms "all tests passing"
43
- - → Skip Phase 5 test suite re-run (Reviewer will verify baseline log instead)
44
- - This avoids rebuilding/re-running tests when already verified
30
+ Blocked gates are not success. If any gate cannot be verified, follow the Blocked Rule.
45
31
 
46
- **When to re-run**:
47
- - If Implementation Log is missing or incomplete
48
- - If any new code was added after the last test run
49
- - If Reviewer suspects brittleness or environment drift
32
+ ### Blocked Rule
50
33
 
51
- ### Failure Capture Rules
34
+ If a remaining failure prevents any Verification Gate from being verified, the feature is incomplete. Write `failure-log.md` and do not report success.
52
35
 
53
- If tests remain broken after recovery:
36
+ ### Failure Capture Format
54
37
 
55
- ```
38
+ ```markdown
56
39
  ## Test Failures Encountered
57
40
 
58
41
  - **Test**: [test name/path]
59
42
  - Root Cause: [explanation]
60
- - Category: [pre-existing baseline | new regression | brittle test | environment]
43
+ - Category: [baseline failure | new regression | brittle test | environment/tooling]
61
44
  - Rounds Attempted: [N rounds, plateau at round M]
62
- - Status: [still failing | requires next session | known limitation]
45
+ - Status: [fixed | still failing | blocked]
63
46
 
64
- - **Impact on Feature**: [can AC be verified despite failure | blocks AC verification]
47
+ - **Impact on Verification Gates**: [verified | not affected | blocked + reason]
65
48
  ```
66
-
67
- **Rule**: If any AC cannot be verified due to test failure, the feature is incomplete. Document in failure-log.md for next session.
@@ -1,58 +1,48 @@
1
1
  ## Test Failure Recovery Protocol
2
2
 
3
- When tests fail during implementation, use **convergence-based recovery** keep fixing as long as progress is being made.
3
+ Use this protocol whenever implementation tests fail. Its purpose is to distinguish tolerated pre-existing failures from blockers introduced by this session.
4
4
 
5
- ### Recovery Loop
6
-
7
- 1. **Run tests and record results**:
8
- - Count total failures and note which tests failed
9
- - Compare against baseline (BASELINE_FAILURES) — exclude pre-existing failures
10
-
11
- 2. **Check termination conditions** (evaluate BEFORE each fix attempt):
12
- - **All tests pass** → Done. Exit recovery loop.
13
- - **Plateau detected** — same failure count AND same failing tests for 3 consecutive rounds → AI cannot resolve these failures. Document and exit.
14
- - **Still making progress** — failure count decreased compared to previous round → Continue fixing.
15
- - **First round** — no history yet → Proceed to fix.
5
+ ### Failure Classes
16
6
 
17
- 3. **Fix and iterate**:
18
- - Analyze remaining failures: root cause (code bug vs. test brittleness vs. environment issue)
19
- - Categorize:
20
- - **Pre-existing baseline failure**: Expected, do NOT fix
21
- - **New regression**: Fix the code
22
- - **Brittle test**: Fix the test or environment setup
23
- - Apply fix, re-run `($TEST_CMD)`, go back to step 1
7
+ | Class | Meaning | Required Action | May Continue? |
8
+ |-------|---------|-----------------|---------------|
9
+ | Baseline failure | Existed before this session | Document in Implementation Log | Yes |
10
+ | New regression | Introduced by this session | Fix before reporting success | No |
11
+ | Brittle test | Test expectation/setup is wrong for the intended behavior | Fix the test or environment setup, then rerun | Only after fixed or documented as blocked |
12
+ | Environment/tooling failure | External tool, network, install, or local environment prevents verification | Document in `failure-log.md` with impact on gates | Only if no Verification Gate is blocked |
24
13
 
25
- ### Convergence Tracking
14
+ ### Recovery Loop
26
15
 
27
- Track failures each round. Example: 5→3→3→3→3 = plateau at round 3, stop at round 5 (3/3).
16
+ 1. Run tests and record: failing test names, failure count, and class for each failure.
17
+ 2. Compare with `BASELINE_FAILURES`; never blame baseline failures on this feature.
18
+ 3. Fix new regressions and brittle tests while progress is being made.
19
+ 4. Stop after a plateau: same failure count and same failing tests for 3 consecutive rounds.
20
+ 5. If failures decrease, reset the plateau counter.
28
21
 
29
- **Key rule**: If failures decrease (even by 1), the plateau counter resets to 0.
22
+ ### Success Rule
30
23
 
31
- ### Escalation — Single Agent
24
+ Proceed only when:
32
25
 
33
- When the recovery loop exits with remaining failures:
34
- - Document all remaining failures in Implementation Log with root cause analysis
35
- - Record PARTIAL status with known failure list
36
- - **Do NOT block commit** — unresolved test failures are deferred to next session
26
+ 1. all new regressions are fixed;
27
+ 2. baseline failures are documented;
28
+ 3. every Verification Gate is verified.
37
29
 
38
- ### Context-Aware Optimization
30
+ Blocked gates are not success. If any gate cannot be verified, follow the Blocked Rule.
39
31
 
40
- **Skip redundant re-runs**: If Implementation Log already confirms "all tests passing", skip full suite re-run.
32
+ ### Blocked Rule
41
33
 
42
- ### Failure Capture Rules
34
+ If a remaining failure prevents any Verification Gate from being verified, the feature is incomplete. Write `failure-log.md` and do not report success.
43
35
 
44
- If tests remain broken after recovery:
36
+ ### Failure Capture Format
45
37
 
46
- ```
38
+ ```markdown
47
39
  ## Test Failures Encountered
48
40
 
49
41
  - **Test**: [test name/path]
50
42
  - Root Cause: [explanation]
51
- - Category: [pre-existing baseline | new regression | brittle test | environment]
43
+ - Category: [baseline failure | new regression | brittle test | environment/tooling]
52
44
  - Rounds Attempted: [N rounds, plateau at round M]
53
- - Status: [still failing | requires next session | known limitation]
45
+ - Status: [fixed | still failing | blocked]
54
46
 
55
- - **Impact on Feature**: [can AC be verified despite failure | blocks AC verification]
47
+ - **Impact on Verification Gates**: [verified | not affected | blocked + reason]
56
48
  ```
57
-
58
- **Rule**: If any AC cannot be verified due to test failure, the feature is incomplete. Document in failure-log.md for next session.
@@ -6,6 +6,7 @@ from generate_bootstrap_prompt import (
6
6
  compute_feature_slug,
7
7
  find_feature,
8
8
  format_acceptance_criteria,
9
+ format_ac_checklist,
9
10
  format_global_context,
10
11
  get_completed_dependencies,
11
12
  determine_pipeline_mode,
@@ -73,6 +74,18 @@ class TestFormatAcceptanceCriteria:
73
74
  assert "- Password reset works" in result
74
75
 
75
76
 
77
+ class TestFormatAcChecklist:
78
+ def test_empty_does_not_create_unchecked_gate(self):
79
+ result = format_ac_checklist([])
80
+ assert result == "- (no Verification Gates specified)"
81
+ assert "[ ]" not in result
82
+
83
+ def test_items_are_unchecked_verification_gates(self):
84
+ result = format_ac_checklist(["Users can log in", "Password reset works"])
85
+ assert "- [ ] Users can log in" in result
86
+ assert "- [ ] Password reset works" in result
87
+
88
+
76
89
  # ---------------------------------------------------------------------------
77
90
  # format_global_context
78
91
  # ---------------------------------------------------------------------------
@@ -286,7 +286,7 @@ def extract_baseline_failures(test_commands, project_root):
286
286
  def format_ac_checklist(acceptance_criteria):
287
287
  """Format acceptance criteria as a markdown checkbox list."""
288
288
  if not acceptance_criteria:
289
- return "- [ ] (no acceptance criteria specified)"
289
+ return "- (no Verification Gates specified)"
290
290
  lines = []
291
291
  for item in acceptance_criteria:
292
292
  lines.append("- [ ] {}".format(item))
@@ -323,10 +323,10 @@ def format_user_context(user_context):
323
323
  if not items:
324
324
  return ""
325
325
  lines = [
326
- "### User-Provided Context (HIGHEST PRIORITY)",
327
- "",
328
- "> The following materials were provided by the user. "
329
- "They take precedence over AI inference.",
326
+ "> These materials were provided by the user and are authoritative "
327
+ "when they clarify or constrain this feature. They do not expand "
328
+ "the current scope by themselves; use the Task Contract to decide "
329
+ "what belongs to this session.",
330
330
  "",
331
331
  ]
332
332
  for item in items:
@@ -970,6 +970,10 @@ def assemble_sections(pipeline_mode, sections_dir, init_done, is_resume,
970
970
  mission += "\n\n" + tier_desc
971
971
  sections.append(("mission", mission))
972
972
 
973
+ # --- Task Contract: single source of current scope and gates ---
974
+ sections.append(("task-contract",
975
+ load_section(sections_dir, "task-contract.md")))
976
+
973
977
  # --- Feature Context (XML-wrapped, optimization 3) ---
974
978
  sections.append(("feature-context",
975
979
  load_section(sections_dir, "feature-context.md")))
@@ -1079,13 +1083,8 @@ def assemble_sections(pipeline_mode, sections_dir, init_done, is_resume,
1079
1083
  load_section(sections_dir,
1080
1084
  "test-failure-recovery-agent.md")))
1081
1085
 
1082
- # --- AC Verification Checklist (all tiers) ---
1083
- ac_checklist_path = os.path.join(sections_dir, "ac-verification-checklist.md")
1084
- if os.path.isfile(ac_checklist_path):
1085
- sections.append(("ac-verification-checklist",
1086
- load_section(sections_dir,
1087
- "ac-verification-checklist.md")))
1088
-
1086
+ # Verification Gates are included in Task Contract. Keep AC in one place so
1087
+ # background context and implementation prompts cannot redefine scope.
1089
1088
  # --- Review (only for agent tiers) ---
1090
1089
  if pipeline_mode == "full":
1091
1090
  sections.append(("phase-review",
@@ -63,7 +63,8 @@ PHASE_KEYWORDS = {
63
63
  class ProgressTracker:
64
64
  """Tracks progress state from stream-json events."""
65
65
 
66
- def __init__(self):
66
+ def __init__(self, session_log=None):
67
+ self.session_log_path = Path(session_log).expanduser() if session_log else None
67
68
  self.message_count = 0
68
69
  self.current_tool = None
69
70
  self.current_tool_input_summary = ""
@@ -78,12 +79,19 @@ class ProgressTracker:
78
79
  self.active_subagent_count = 0
79
80
  self.subagent_status_counts = Counter()
80
81
  self.codex_child_thread_ids = set()
82
+ self.claude_session_id = ""
83
+ self.claude_cwd = ""
84
+ self.claude_task_states = {}
81
85
  self.child_session_files = []
82
86
  self.child_total_bytes = 0
83
87
  self.child_activity_signature = ""
84
88
  self.last_child_activity_at = ""
85
89
  self._codex_child_session_paths = {}
90
+ self._claude_child_session_files = []
86
91
  self._last_child_scan_at = 0.0
92
+ self._last_claude_fallback_scan_at = 0.0
93
+ self._last_claude_fallback_scan_key = ""
94
+ self._claude_fallback_scan_interval_seconds = 10.0
87
95
  self._text_buffer = ""
88
96
  self._in_tool_use = False
89
97
  self._current_tool_input_parts = []
@@ -195,11 +203,76 @@ class ProgressTracker:
195
203
  self.is_active = True
196
204
 
197
205
  elif event_type == "system":
198
- # System events (hooks, init, etc.) — track but don't count as messages
206
+ # System events (hooks, init, task notifications, etc.) — track but don't count as messages.
199
207
  self.event_format = self.event_format or "stream-json"
200
208
  subtype = event.get("subtype", "")
201
209
  if subtype == "init":
202
210
  self.is_active = True
211
+ session_id = event.get("session_id")
212
+ if isinstance(session_id, str) and session_id.strip():
213
+ self.claude_session_id = session_id.strip()
214
+ cwd = event.get("cwd")
215
+ if isinstance(cwd, str) and cwd.strip():
216
+ self.claude_cwd = cwd.strip()
217
+ elif subtype == "task_started":
218
+ task_id = event.get("task_id")
219
+ if isinstance(task_id, str) and task_id.strip():
220
+ self.claude_task_states[task_id.strip()] = {
221
+ "status": "running",
222
+ "summary": str(event.get("description") or "")[:120],
223
+ "tool_use_id": str(event.get("tool_use_id") or ""),
224
+ "task_type": str(event.get("task_type") or ""),
225
+ "subagent_type": str(event.get("subagent_type") or ""),
226
+ }
227
+ self._update_claude_subagent_status_counts()
228
+ elif subtype in ("task_updated", "task_progress"):
229
+ task_id = event.get("task_id")
230
+ if isinstance(task_id, str) and task_id.strip():
231
+ state = self.claude_task_states.setdefault(task_id.strip(), {})
232
+ patch = event.get("patch") if isinstance(event.get("patch"), dict) else {}
233
+ status = patch.get("status") or event.get("status")
234
+ if status:
235
+ state["status"] = str(status)
236
+ summary = patch.get("summary") or patch.get("description") or event.get("summary") or event.get("description")
237
+ if summary:
238
+ state["summary"] = str(summary)[:120]
239
+ else:
240
+ state.setdefault("summary", "")
241
+ tool_use_id = patch.get("tool_use_id") or event.get("tool_use_id")
242
+ if tool_use_id:
243
+ state["tool_use_id"] = str(tool_use_id)
244
+ else:
245
+ state.setdefault("tool_use_id", "")
246
+ task_type = patch.get("task_type") or event.get("task_type")
247
+ if task_type:
248
+ state["task_type"] = str(task_type)
249
+ else:
250
+ state.setdefault("task_type", "")
251
+ subagent_type = patch.get("subagent_type") or event.get("subagent_type")
252
+ if subagent_type:
253
+ state["subagent_type"] = str(subagent_type)
254
+ else:
255
+ state.setdefault("subagent_type", "")
256
+ self._update_claude_subagent_status_counts()
257
+ elif subtype == "task_notification":
258
+ task_id = event.get("task_id")
259
+ if isinstance(task_id, str) and task_id.strip():
260
+ state = self.claude_task_states.setdefault(task_id.strip(), {})
261
+ status = event.get("status") or "completed"
262
+ state["status"] = str(status)
263
+ state["summary"] = str(event.get("summary") or state.get("summary") or "")[:120]
264
+ state.setdefault("tool_use_id", str(event.get("tool_use_id") or ""))
265
+ task_type = event.get("task_type")
266
+ if task_type:
267
+ state["task_type"] = str(task_type)
268
+ else:
269
+ state.setdefault("task_type", "")
270
+ subagent_type = event.get("subagent_type")
271
+ if subagent_type:
272
+ state["subagent_type"] = str(subagent_type)
273
+ else:
274
+ state.setdefault("subagent_type", "")
275
+ self._update_claude_subagent_status_counts()
203
276
 
204
277
  # ── Claude API raw stream format ────────────────────────────
205
278
  elif event_type == "message_start":
@@ -391,16 +464,135 @@ class ProgressTracker:
391
464
  pass
392
465
  return str(matches[0])
393
466
 
467
+ def _is_tracked_claude_subagent_state(self, state):
468
+ """Return true for Claude Code task events representing in-process agents."""
469
+ if not isinstance(state, dict):
470
+ return False
471
+ task_type = str(state.get("task_type") or "")
472
+ task_type_lower = task_type.lower()
473
+ subagent_type = str(state.get("subagent_type") or "")
474
+ if task_type_lower == "local_bash":
475
+ return False
476
+ tracked_types = {"in_process_teammate", "subagent", "agent", "teammate"}
477
+ if task_type_lower in tracked_types:
478
+ return True
479
+ if task_type_lower == "local_agent" and subagent_type:
480
+ return True
481
+ summary = str(state.get("summary") or "")
482
+ return bool(
483
+ not task_type
484
+ and summary.lower().startswith(("dev:", "critic:", "reviewer:", "agent:"))
485
+ )
486
+
487
+ def _has_tracked_claude_subagent_task(self):
488
+ """Return true once a Claude Code local-agent/subagent task has been observed."""
489
+ return any(
490
+ self._is_tracked_claude_subagent_state(state)
491
+ for state in self.claude_task_states.values()
492
+ )
493
+
494
+ def _update_claude_subagent_status_counts(self):
495
+ """Track Claude Code in-process teammate task state counts."""
496
+ counts = Counter()
497
+ active = 0
498
+ inactive_statuses = {
499
+ "completed",
500
+ "failed",
501
+ "cancelled",
502
+ "canceled",
503
+ "killed",
504
+ "stopped",
505
+ "success",
506
+ "error",
507
+ }
508
+ for state in self.claude_task_states.values():
509
+ if not self._is_tracked_claude_subagent_state(state):
510
+ continue
511
+ status = str(state.get("status") or "unknown")
512
+ counts[status] += 1
513
+ if status.lower() not in inactive_statuses:
514
+ active += 1
515
+ summary = state.get("summary") or state.get("subagent_type")
516
+ if summary:
517
+ self.last_text_snippet = str(summary).strip()[:120]
518
+ self._detect_phase(str(summary))
519
+ self.subagent_status_counts = counts
520
+ self.active_subagent_count = active
521
+
522
+ def _claude_projects_dir(self):
523
+ """Return the Claude Code projects directory for transcript lookup."""
524
+ projects_dir = os.environ.get("CLAUDE_PROJECTS_DIR")
525
+ if projects_dir:
526
+ return Path(projects_dir).expanduser()
527
+ claude_config_dir = os.environ.get("CLAUDE_CONFIG_DIR")
528
+ if claude_config_dir:
529
+ return Path(claude_config_dir).expanduser() / "projects"
530
+ claude_home = os.environ.get("CLAUDE_HOME")
531
+ if claude_home:
532
+ return Path(claude_home).expanduser() / "projects"
533
+ return Path.home() / ".claude" / "projects"
534
+
535
+ def _claude_project_key(self):
536
+ """Encode cwd the same way Claude Code stores project transcript dirs."""
537
+ cwd = self.claude_cwd
538
+ if not cwd:
539
+ return ""
540
+ return cwd.replace("\\", "-").replace("/", "-").replace(":", "")
541
+
542
+ def _find_claude_child_session_files(self):
543
+ """Find Claude Code subagent transcripts for this parent session."""
544
+ if not self.claude_session_id:
545
+ return []
546
+
547
+ projects_dir = self._claude_projects_dir()
548
+ if not projects_dir.exists():
549
+ return []
550
+
551
+ candidates = []
552
+ project_key = self._claude_project_key()
553
+ if project_key:
554
+ candidates.append(
555
+ projects_dir / project_key / self.claude_session_id / "subagents"
556
+ )
557
+
558
+ for candidate in candidates:
559
+ if candidate.exists():
560
+ try:
561
+ return sorted(candidate.glob("*.jsonl"))
562
+ except OSError:
563
+ return []
564
+
565
+ # Fallback for non-standard cwd encoding or custom Claude homes. Avoid
566
+ # repeatedly walking every stored transcript before any Agent task exists.
567
+ if not self._has_tracked_claude_subagent_task():
568
+ return []
569
+
570
+ fallback_scan_key = f"{projects_dir}:{self.claude_session_id}"
571
+ now = time.monotonic()
572
+ if (
573
+ self._last_claude_fallback_scan_key == fallback_scan_key
574
+ and now - self._last_claude_fallback_scan_at < self._claude_fallback_scan_interval_seconds
575
+ ):
576
+ return self._claude_child_session_files
577
+ self._last_claude_fallback_scan_key = fallback_scan_key
578
+ self._last_claude_fallback_scan_at = now
579
+ try:
580
+ matches = sorted(projects_dir.rglob(f"{self.claude_session_id}/subagents/*.jsonl"))
581
+ except OSError:
582
+ return []
583
+ return matches
584
+
394
585
  def refresh_child_session_activity(self, force=False):
395
- """Refresh Codex child transcript file stats.
586
+ """Refresh child transcript file stats.
396
587
 
397
588
  The heartbeat monitor uses this activity signature to treat subagent
398
- transcript growth as real progress while the parent Codex session is
399
- blocked in `wait`.
589
+ transcript growth as real progress while the parent session is blocked
590
+ waiting for a child agent/tool result. Supports Codex child threads and
591
+ Claude Code in-process teammate transcripts.
400
592
  """
401
593
  previous_signature = self.child_activity_signature
402
594
 
403
- if not self.codex_child_thread_ids:
595
+ if not self.codex_child_thread_ids and not self.claude_session_id:
404
596
  self.child_session_files = []
405
597
  self.child_total_bytes = 0
406
598
  self.child_activity_signature = ""
@@ -420,6 +612,7 @@ class ProgressTracker:
420
612
  found = self._find_codex_child_session_file(thread_id)
421
613
  if found:
422
614
  self._codex_child_session_paths[thread_id] = found
615
+ self._claude_child_session_files = self._find_claude_child_session_files()
423
616
  self._last_child_scan_at = now
424
617
 
425
618
  files = []
@@ -427,24 +620,22 @@ class ProgressTracker:
427
620
  total_bytes = 0
428
621
  max_mtime = 0.0
429
622
 
430
- for thread_id in sorted(self.codex_child_thread_ids):
431
- path = self._codex_child_session_paths.get(thread_id)
432
- if not path:
433
- continue
623
+ def add_file(kind, identifier, path):
624
+ nonlocal total_bytes, max_mtime
434
625
  try:
435
626
  stat = os.stat(path)
436
627
  except OSError:
437
- continue
438
-
628
+ return
629
+ path_str = str(path)
439
630
  total_bytes += stat.st_size
440
631
  max_mtime = max(max_mtime, stat.st_mtime)
441
- signature_parts.append(
442
- f"{thread_id}:{stat.st_size}:{getattr(stat, 'st_mtime_ns', int(stat.st_mtime * 1_000_000_000))}"
443
- )
632
+ mtime_ns = getattr(stat, "st_mtime_ns", int(stat.st_mtime * 1_000_000_000))
633
+ signature_parts.append(f"{kind}:{identifier}:{stat.st_size}:{mtime_ns}")
444
634
  files.append(
445
635
  {
446
- "thread_id": thread_id,
447
- "path": path,
636
+ "kind": kind,
637
+ "thread_id": identifier,
638
+ "path": path_str,
448
639
  "size": stat.st_size,
449
640
  "mtime": datetime.fromtimestamp(
450
641
  stat.st_mtime, timezone.utc
@@ -452,6 +643,14 @@ class ProgressTracker:
452
643
  }
453
644
  )
454
645
 
646
+ for thread_id in sorted(self.codex_child_thread_ids):
647
+ path = self._codex_child_session_paths.get(thread_id)
648
+ if path:
649
+ add_file("codex", thread_id, path)
650
+
651
+ for path in self._claude_child_session_files:
652
+ add_file("claude", path.stem, path)
653
+
455
654
  self.child_session_files = files
456
655
  self.child_total_bytes = total_bytes
457
656
  self.child_activity_signature = "|".join(signature_parts)
@@ -519,7 +718,7 @@ def atomic_write_json(data, filepath):
519
718
 
520
719
  def tail_and_parse(session_log, progress_file, poll_interval=0.5):
521
720
  """Tail session log and parse stream-json events."""
522
- tracker = ProgressTracker()
721
+ tracker = ProgressTracker(session_log)
523
722
  last_write_state = None
524
723
 
525
724
  def state_key(state):