cclaw-cli 0.51.23 → 0.51.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +135 -414
  2. package/dist/artifact-linter.js +10 -6
  3. package/dist/config.d.ts +1 -1
  4. package/dist/config.js +28 -3
  5. package/dist/content/core-agents.d.ts +128 -2
  6. package/dist/content/core-agents.js +291 -13
  7. package/dist/content/examples.js +21 -10
  8. package/dist/content/next-command.js +10 -6
  9. package/dist/content/reference-patterns.d.ts +18 -0
  10. package/dist/content/reference-patterns.js +391 -0
  11. package/dist/content/seed-shelf.js +73 -8
  12. package/dist/content/skills.js +39 -34
  13. package/dist/content/stage-common-guidance.js +19 -3
  14. package/dist/content/stage-schema.d.ts +12 -0
  15. package/dist/content/stage-schema.js +224 -24
  16. package/dist/content/stages/_lint-metadata/index.js +3 -2
  17. package/dist/content/stages/brainstorm.js +27 -18
  18. package/dist/content/stages/design.js +27 -18
  19. package/dist/content/stages/review.js +20 -9
  20. package/dist/content/stages/schema-types.d.ts +9 -2
  21. package/dist/content/stages/scope.js +21 -10
  22. package/dist/content/stages/ship.js +3 -2
  23. package/dist/content/stages/tdd.js +18 -13
  24. package/dist/content/start-command.js +3 -2
  25. package/dist/content/status-command.js +9 -4
  26. package/dist/content/subagents.js +336 -38
  27. package/dist/content/templates.js +182 -25
  28. package/dist/delegation.d.ts +2 -0
  29. package/dist/delegation.js +27 -6
  30. package/dist/doctor.js +167 -25
  31. package/dist/flow-state.d.ts +1 -0
  32. package/dist/flow-state.js +1 -0
  33. package/dist/gate-evidence.js +25 -2
  34. package/dist/install.js +72 -8
  35. package/dist/internal/advance-stage.js +179 -26
  36. package/dist/knowledge-store.js +30 -6
  37. package/dist/run-archive.js +11 -0
  38. package/dist/run-persistence.js +35 -10
  39. package/dist/tdd-verification-evidence.d.ts +17 -0
  40. package/dist/tdd-verification-evidence.js +43 -0
  41. package/dist/types.d.ts +10 -0
  42. package/package.json +1 -1
@@ -29,10 +29,33 @@ export const ARTIFACT_TEMPLATES = {
29
29
  ### Discovered context
30
30
  - (paths, prior artifacts, seeds, prompt fragments — referenced by downstream stages, or \`- None.\`)
31
31
 
32
- ## Problem
33
- - **What we're solving:**
34
- - **Success criteria:**
35
- - **Constraints:**
32
+ ## Reference Pattern Candidates
33
+ | Pattern / source | Reusable invariant | Disposition (accept/reject/defer) | Why |
34
+ |---|---|---|---|
35
+ | | | | |
36
+
37
+ ## Problem Decision Record
38
+ - **Depth:** lite | standard | deep
39
+ - **Frame type:** product | technical-maintenance
40
+
41
+ ### Product framing (use when applicable)
42
+ - **Persona / user:**
43
+ - **Job to be done:**
44
+ - **Pain / trigger:**
45
+ - **Value hypothesis:**
46
+ - **Evidence / signal:**
47
+ - **Success metric:**
48
+ - **Why now:**
49
+ - **Do-nothing consequence:**
50
+ - **Non-goals:**
51
+
52
+ ### Technical-maintenance framing (use when product framing is not applicable)
53
+ - **Affected operator/developer:**
54
+ - **Current failure mode:**
55
+ - **Expected operational improvement:**
56
+ - **Verification signal:**
57
+ - **Do-nothing cost:**
58
+ - **Non-goals:**
36
59
 
37
60
  ## Premise Check
38
61
  - **Right problem?** (yes/no + one-line justification — take a position)
@@ -42,12 +65,17 @@ export const ARTIFACT_TEMPLATES = {
42
65
  ## How Might We
43
66
  - *How might we …?* — one line naming the user, the desired outcome, and the binding constraint.
44
67
 
68
+ ## Clarity Gate
69
+ - Ambiguity score (0.00-1.00):
70
+ - Decision boundaries (what this stage will decide):
71
+ - Reaffirmed non-goals:
72
+ - Residual-risk handoff to scope:
73
+
45
74
  ## Sharpening Questions
75
+ > Ask one decision-changing question at a time. For concrete early exits, record \`None - early exit\` with rationale.
46
76
  | # | Question | Answer / Assumption | Decision impact |
47
77
  |---|---|---|---|
48
78
  | 1 | | | |
49
- | 2 | | | |
50
- | 3 | | | |
51
79
 
52
80
  ## Clarifying Questions
53
81
  | # | Question | Answer | Decision impact |
@@ -55,7 +83,7 @@ export const ARTIFACT_TEMPLATES = {
55
83
  | 1 | | | |
56
84
 
57
85
  ## Approach Tier
58
- - Tier: Lightweight | Standard | Deep
86
+ - Tier: lite | standard | deep
59
87
  - Why this tier:
60
88
 
61
89
  ## Short-Circuit Decision
@@ -64,7 +92,7 @@ export const ARTIFACT_TEMPLATES = {
64
92
  - Scope handoff:
65
93
 
66
94
  ## Approaches
67
- | Approach | Role | Upside | Architecture | Trade-offs | Reuses | Recommendation |
95
+ | Approach | Role | Upside | Architecture | Trade-offs | Reuses / reference pattern | Recommendation |
68
96
  |---|---|---|---|---|---|---|
69
97
  | A | baseline | modest | | | | |
70
98
  | B | challenger | high | | | | |
@@ -80,7 +108,7 @@ export const ARTIFACT_TEMPLATES = {
80
108
  - **Approach:**
81
109
  - **Rationale:** Trace this to the prior Approach Reaction.
82
110
  - **Approval:** pending
83
- - **Next-stage handoff:** On standard track, hand this to \`scope\`; on medium track, hand this directly to \`spec\` with explicit requirements/constraints.
111
+ - **Next-stage handoff:** On standard track, hand this to \`scope\`; on medium track, hand this directly to \`spec\`. Include upstream decisions used, drift, confidence, unresolved questions, risk hints, and non-goals.
84
112
 
85
113
  ## Not Doing
86
114
  - (3-5 things this brainstorm is *not* committing to — distinct from \`Deferred\`. These will not appear in scope unless the user explicitly opts in.)
@@ -165,8 +193,35 @@ ${SEED_SHELF_SECTION}
165
193
  | HOUR 4-5 (integration) | | | |
166
194
  | HOUR 6+ (polish/tests) | | | |
167
195
 
196
+ ## Scope Contract
197
+ - **Selected mode:** HOLD SCOPE | SELECTIVE EXPANSION | SCOPE EXPANSION | SCOPE REDUCTION
198
+ - **In scope:**
199
+ - **Out of scope:**
200
+ - **Requirements:**
201
+ - **Locked decisions:**
202
+ - **Discretion areas:**
203
+ - **Deferred ideas:**
204
+ - **Accepted reference ideas:**
205
+ - **Rejected reference ideas:**
206
+ - **Success definition:**
207
+ - **Design handoff:**
208
+
209
+ ## Decision Drivers
210
+ | Driver | Weight (1-5) | Option A | Option B | Option C | Notes |
211
+ |---|---|---|---|---|---|
212
+ | Value impact | | | | | |
213
+ | Risk reduction | | | | | |
214
+ | Reversibility | | | | | |
215
+ | Delivery effort | | | | | |
216
+ | Timeline fit | | | | | |
217
+
218
+ ## Scope Completeness Score
219
+ - Score (0.00-1.00):
220
+ - What is still uncertain:
221
+ - Blockers requiring escalation:
222
+
168
223
  ## Scope Mode
169
- - [ ] SCOPE EXPANSION — dream bigger; user explicitly opts into the larger product slice.
224
+ - [ ] SCOPE EXPANSION — explore ambitious alternatives; user explicitly opts into the larger product slice.
170
225
  - [ ] SELECTIVE EXPANSION — hold baseline scope and cherry-pick one high-leverage addition.
171
226
  - [ ] HOLD SCOPE — preserve the approved brainstorm direction with maximum rigor.
172
227
  - [ ] SCOPE REDUCTION — strip to the smallest useful wedge when risk/blast radius is too high.
@@ -174,9 +229,29 @@ ${SEED_SHELF_SECTION}
174
229
  ## Mode-Specific Analysis
175
230
  | Selected mode | Rationale | Depth |
176
231
  |---|---|---|
177
- | | | default / deep |
232
+ | | | lite / standard / deep |
233
+
234
+ > Default path: one selected-mode row plus rationale. Deep/high-risk scope may expand with optional evidence headings below.
235
+
236
+ ## Landscape Check
237
+ - Optional for EXPAND/SELECTIVE/deep; omit for compact HOLD SCOPE.
238
+
239
+ ## Taste Calibration
240
+ - Optional quality-bar references from in-repo modules/files.
241
+
242
+ ## Reference Pattern Registry
243
+ | Pattern / source | Invariant to preserve | Disposition (accepted/rejected/deferred) | Scope boundary impact |
244
+ |---|---|---|---|
245
+ | | | | |
178
246
 
179
- > Default path: one selected-mode row plus rationale. Deep/high-risk scope may expand below with mode-specific analysis.
247
+ ## Reference Pull
248
+ - Optional evidence from \`/Users/zuevrs/Downloads/references\`; list accepted/rejected ideas or \`Not needed - compact scope\`.
249
+
250
+ ## Ambitious Alternatives
251
+ - Optional for SCOPE EXPANSION/SELECTIVE; list larger alternatives and disposition.
252
+
253
+ ## Ruthless Minimum Slice
254
+ - Optional for SCOPE REDUCTION/high-risk scope; define the smallest useful wedge.
180
255
 
181
256
  ## Requirements (stable IDs)
182
257
  | ID | Requirement (observable outcome) | Priority | Source (origin doc / prompt line) |
@@ -241,6 +316,9 @@ ${SEED_SHELF_SECTION}
241
316
 
242
317
  ## Scope Summary
243
318
  - Selected mode: (one of \`SCOPE EXPANSION\` | \`SELECTIVE EXPANSION\` | \`HOLD SCOPE\` | \`SCOPE REDUCTION\`)
319
+ - Confidence: high | medium | low
320
+ - Drift from brainstorm: None / <specific drift>
321
+ - Unresolved questions: None / <questions>
244
322
  - Strongest challenges resolved:
245
323
  - Recommended path:
246
324
  - Accepted scope:
@@ -291,7 +369,7 @@ ${SEED_SHELF_SECTION}
291
369
 
292
370
  ## Compact-First Scaffold
293
371
  - Default to the compact design spine unless risk requires Standard/Deep add-ons.
294
- - Compact required spine: Codebase Investigation, Architecture Boundaries, Architecture Diagram, Data Flow, Failure Mode Table, Test Strategy, and Completion Dashboard.
372
+ - Compact required spine: Upstream Handoff, Codebase Investigation, Engineering Lock, Architecture Boundaries, Architecture Diagram, Data Flow, Failure Mode Table, Test Strategy, Spec Handoff, and Completion Dashboard.
295
373
  - Mark optional Standard/Deep sections as \`Omitted - compact design\` when they do not apply; do not expand the scaffold just to fill empty tables.
296
374
 
297
375
  ## Upstream Handoff
@@ -302,9 +380,19 @@ ${SEED_SHELF_SECTION}
302
380
  - Drift from upstream (or \`None\`):
303
381
 
304
382
  ## Codebase Investigation
305
- | File | Current responsibility | Patterns discovered |
306
- |---|---|---|
307
- | | | |
383
+ | File | Current responsibility | Patterns discovered | Existing fit / reuse candidate |
384
+ |---|---|---|---|
385
+ | | | | |
386
+
387
+ ## Engineering Lock
388
+ | Decision area | Chosen path | Shadow alternative | Switch trigger | Failure/rescue/degraded behavior | Verification evidence | Confidence |
389
+ |---|---|---|---|---|---|---|
390
+ | | | | | | | |
391
+
392
+ ## Architecture Decision Record (ADR)
393
+ | ADR ID | Context | Decision | Alternatives considered | Consequences | Reversal trigger |
394
+ |---|---|---|---|---|---|
395
+ | ADR-1 | | | | | |
308
396
 
309
397
  ## Search Before Building
310
398
  | Layer | Label | What to reuse first |
@@ -336,9 +424,9 @@ ${MARKDOWN_CODE_FENCE}
336
424
  ## Data-Flow Shadow Paths
337
425
  - Standard/Deep add-on; omit when compact design does not need a shadow path.
338
426
  <!-- diagram: data-flow-shadow-paths -->
339
- | Path | Trigger | Fallback/Degrade behavior |
340
- |---|---|---|
341
- | | | |
427
+ | Chosen path | Shadow alternative | Switch trigger | Failure/rescue/degraded behavior | Verification evidence |
428
+ |---|---|---|---|---|
429
+ | | | | | |
342
430
 
343
431
  ## Error Flow Diagram
344
432
  - Standard/Deep add-on; omit when the Failure Mode Table is sufficient.
@@ -387,6 +475,8 @@ ${MARKDOWN_CODE_FENCE}
387
475
  | | | | |
388
476
 
389
477
  ## Data Flow
478
+ - Data/state flow:
479
+ - Critical path:
390
480
  - Happy path:
391
481
  - Nil/empty input path:
392
482
  - Upstream error path:
@@ -411,11 +501,21 @@ ${MARKDOWN_CODE_FENCE}
411
501
  |---|---|---|---|
412
502
  | | | | |
413
503
 
504
+ ## Pre-mortem
505
+ | Scenario | Earliest warning signal | Mitigation owner | Containment action |
506
+ |---|---|---|---|
507
+ | | | | |
508
+
414
509
  ## Test Strategy
415
510
  - Unit:
416
511
  - Integration:
417
512
  - E2E:
418
513
 
514
+ ## Test-Diagram Mapping
515
+ | Critical flow | Test coverage (ID/command) | Diagram anchor | Gap status |
516
+ |---|---|---|---|
517
+ | | | | covered/gap |
518
+
419
519
  ## Performance Budget
420
520
  | Critical path | Metric | Target | Measurement method |
421
521
  |---|---|---|---|
@@ -431,6 +531,23 @@ ${MARKDOWN_CODE_FENCE}
431
531
  |---|---|---|
432
532
  | | | |
433
533
 
534
+ ## Rejected Alternatives
535
+ | Alternative | Why rejected | Revival signal |
536
+ |---|---|---|
537
+ | | | |
538
+
539
+ ## Design Decisions
540
+ | Decision Ref | Requirement / LD refs | Decision | Spec impact |
541
+ |---|---|---|---|
542
+ | DD-1 | | | |
543
+
544
+ ## Spec Handoff
545
+ - Requirements to carry forward:
546
+ - Design decisions to encode:
547
+ - Risks and rescue paths:
548
+ - Test/performance expectations:
549
+ - Unresolved questions (or \`None\`):
550
+
434
551
  ## Outside Voice Findings
435
552
  | ID | Dimension | Finding | Disposition | Rationale |
436
553
  |---|---|---|---|---|
@@ -458,6 +575,11 @@ ${MARKDOWN_CODE_FENCE}
458
575
  |---|---|---|
459
576
  | | | |
460
577
 
578
+ ## Reference-Grade Contracts
579
+ | Pattern / source | Reusable invariant | Local adaptation | Rejection boundary | Verification signal |
580
+ |---|---|---|---|---|
581
+ | | | | | |
582
+
461
583
  ## Interface Contracts
462
584
  - Standard/Deep add-on when module boundaries or APIs change; omit for compact local changes.
463
585
  | Module | Produces | Consumes |
@@ -486,6 +608,9 @@ ${SEED_SHELF_SECTION}
486
608
 
487
609
  **Decisions made:** 0 | **Unresolved:** 0
488
610
 
611
+ ## Learning Capture Hint
612
+ For meaningful design work, replace the Learnings sentinel with 1-3 JSON learning bullets, for example: \`- {"type":"lesson","trigger":"when design chooses a risky fallback path","action":"record the switch trigger and rollback signal in Spec Handoff","confidence":"medium","domain":"architecture","stage":"design"}\`
613
+
489
614
  ## Learnings
490
615
  - None this stage.
491
616
  `,
@@ -663,7 +788,7 @@ Execution rule: complete and verify each batch before starting the next batch.
663
788
 
664
789
  ## Execution Posture
665
790
  - Posture: sequential | dependency-batched | blocked
666
- - RED/GREEN/REFACTOR checkpoint plan:
791
+ - Vertical-slice RED/GREEN/REFACTOR checkpoint plan:
667
792
  - Incremental commits: yes/no/deferred because
668
793
 
669
794
  ## RED Evidence
@@ -672,7 +797,7 @@ Execution rule: complete and verify each batch before starting the next batch.
672
797
  | S-1 | | | |
673
798
 
674
799
  ## Acceptance Mapping
675
- | Slice | Source item ID | Spec criterion ID |
800
+ | Vertical slice | Source item ID | Spec criterion ID |
676
801
  |---|---|---|
677
802
  | S-1 | SRC-1 | AC-1 |
678
803
 
@@ -721,6 +846,9 @@ Execution rule: complete and verify each batch before starting the next batch.
721
846
  |---|---|---|---|---|
722
847
  | S-1 | | | | |
723
848
 
849
+ ## Learning Capture Hint
850
+ For meaningful TDD work, replace the Learnings sentinel with 1-3 JSON learning bullets, for example: \`- {"type":"pattern","trigger":"when a regression only fails after state rewind","action":"keep the RED fixture and add a cycle-log assertion before GREEN","confidence":"medium","domain":"testing","stage":"tdd"}\`
851
+
724
852
  ## Learnings
725
853
  - None this stage.
726
854
  `,
@@ -735,16 +863,40 @@ Execution rule: complete and verify each batch before starting the next batch.
735
863
  - Open questions:
736
864
  - Drift from upstream (or \`None\`):
737
865
 
866
+ ## Review Evidence Scope
867
+ - Base/head:
868
+ - Files inspected:
869
+ - Changed-file coverage summary:
870
+ - Diagnostics run:
871
+ - Omitted files with explicit reason:
872
+ - Reviewer delegation evidence:
873
+ - Security-reviewer delegation evidence:
874
+
875
+ ## Changed-File Coverage
876
+ | File | Coverage status | Evidence / no-impact reason |
877
+ |---|---|---|
878
+ | | inspected / broader-module / omitted-no-impact | |
879
+
738
880
  ## Layer 1 Verdict
739
881
  | Criterion | Verdict | Evidence |
740
882
  |---|---|---|
741
883
  | AC-1 | PASS/FAIL | |
742
884
 
743
885
  ## Layer 2 Findings
744
- | ID | Severity | Category | Description | Status |
745
- |---|---|---|---|---|
746
- | R-1 | Critical/Important/Suggestion | correctness/security/performance/architecture/external-safety | | open/resolved |
747
- - NO_CHANGE_ATTESTATION: <required when Category=security has no entries; explain why no security-relevant changes were detected>
886
+ | ID | Severity | Category | File:line / no-line reason | Description | Status |
887
+ |---|---|---|---|---|---|
888
+ | R-1 | Critical/Important/Suggestion | correctness/security/performance/architecture/external-safety | path:line | | open/resolved |
889
+ - NO_FINDINGS_ATTESTATION: <required when no findings are reported; cite inspected coverage>
890
+
891
+ ## Security Sweep Attestation
892
+ - Result: findings | NO_CHANGE_ATTESTATION | NO_SECURITY_IMPACT
893
+ - Inspected surfaces:
894
+ - Rationale:
895
+
896
+ ## Dependency & Version Audit
897
+ - Relevant: yes/no
898
+ - Manifests/lockfiles/generated clients/CI/runtime config/external APIs inspected:
899
+ - Result / no-impact rationale:
748
900
 
749
901
  ## Incoming Feedback Queue
750
902
  | ID | Source | Severity | File:line | Request | Status | Evidence |
@@ -757,6 +909,7 @@ Execution rule: complete and verify each batch before starting the next batch.
757
909
 
758
910
  ## Review Readiness Snapshot
759
911
 
912
+ - Victory Detector: pass | fail (Layer 1, Layer 2, security sweep, structured findings, trace evidence, unresolved-critical status)
760
913
  - Completed checks: Layer 1, Layer 2 tags, security sweep, schema validation
761
914
  - Delegation log: \`.cclaw/state/delegation-log.json\` required/completed/waived/pending
762
915
  - Staleness signal: commit at last review pass vs current commit
@@ -797,6 +950,9 @@ Execution rule: complete and verify each batch before starting the next batch.
797
950
  ## Final Verdict
798
951
  - APPROVED | APPROVED_WITH_CONCERNS | BLOCKED
799
952
 
953
+ ## Learning Capture Hint
954
+ For meaningful review work, replace the Learnings sentinel with 1-3 JSON learning bullets, for example: \`- {"type":"lesson","trigger":"when security sweep finds no issues but touches trust boundaries","action":"record NO_SECURITY_IMPACT with inspected surfaces and rationale","confidence":"medium","domain":"security","stage":"review"}\`
955
+
800
956
  ## Learnings
801
957
  - None this stage.
802
958
  `,
@@ -865,6 +1021,7 @@ ${SHIP_FINALIZATION_ENUM_LINES}
865
1021
  - NO_VCS handoff target + artifact path (if FINALIZE_NO_VCS):
866
1022
 
867
1023
  ## Completion Status
1024
+ - Victory Detector: pass | fail (review verdict valid, preflight fresh, rollback ready, one finalization enum selected, execution result present)
868
1025
  - SHIPPED | SHIPPED_WITH_EXCEPTIONS | BLOCKED
869
1026
  - Exceptions (if any):
870
1027
 
@@ -96,6 +96,8 @@ export declare function checkMandatoryDelegations(projectRoot: string, stage: Fl
96
96
  staleIgnored: string[];
97
97
  /** Delegation rows missing required evidence under a role-switch fallback. */
98
98
  missingEvidence: string[];
99
+ /** Current-run scheduled rows with no terminal row sharing the same spanId. */
100
+ staleWorkers: string[];
99
101
  /** Expected fulfillment mode for the active harness set. */
100
102
  expectedMode: DelegationFulfillmentMode;
101
103
  }>;
@@ -9,6 +9,7 @@ import { HARNESS_ADAPTERS } from "./harness-adapters.js";
9
9
  import { readFlowState } from "./runs.js";
10
10
  import { stageSchema } from "./content/stage-schema.js";
11
11
  const execFileAsync = promisify(execFile);
12
+ const TERMINAL_DELEGATION_STATUSES = new Set(["completed", "failed", "waived"]);
12
13
  function delegationLogPath(projectRoot) {
13
14
  return path.join(projectRoot, RUNTIME_ROOT, "state", "delegation-log.json");
14
15
  }
@@ -135,6 +136,11 @@ function isDelegationEntry(value) {
135
136
  o.status === "waived";
136
137
  const timestampOk = typeof o.ts === "string" ||
137
138
  typeof o.startTs === "string";
139
+ const terminalStatus = o.status === "completed" || o.status === "failed" || o.status === "waived";
140
+ const lifecycleOk = o.status !== "scheduled" || o.endTs === undefined;
141
+ const terminalLifecycleOk = !terminalStatus ||
142
+ o.endTs === undefined ||
143
+ typeof o.endTs === "string";
138
144
  const retryOk = o.retryCount === undefined ||
139
145
  (typeof o.retryCount === "number" &&
140
146
  Number.isFinite(o.retryCount) &&
@@ -146,6 +152,8 @@ function isDelegationEntry(value) {
146
152
  modeOk &&
147
153
  statusOk &&
148
154
  timestampOk &&
155
+ lifecycleOk &&
156
+ terminalLifecycleOk &&
149
157
  (o.spanId === undefined || typeof o.spanId === "string") &&
150
158
  (o.parentSpanId === undefined || typeof o.parentSpanId === "string") &&
151
159
  (o.startTs === undefined || typeof o.startTs === "string") &&
@@ -185,6 +193,7 @@ function parseLedger(raw, runId) {
185
193
  ...item,
186
194
  spanId: item.spanId ?? createSpanId(),
187
195
  startTs: ts,
196
+ endTs: TERMINAL_DELEGATION_STATUSES.has(item.status) ? (item.endTs ?? ts) : undefined,
188
197
  ts,
189
198
  retryCount: typeof item.retryCount === "number" && Number.isInteger(item.retryCount) && item.retryCount >= 0
190
199
  ? item.retryCount
@@ -226,6 +235,12 @@ export async function appendDelegation(projectRoot, entry) {
226
235
  stamped.spanId = entry.spanId ?? createSpanId();
227
236
  stamped.startTs = startTs;
228
237
  stamped.ts = startTs;
238
+ if (TERMINAL_DELEGATION_STATUSES.has(stamped.status) && !stamped.endTs) {
239
+ stamped.endTs = new Date().toISOString();
240
+ }
241
+ if (stamped.status === "scheduled") {
242
+ delete stamped.endTs;
243
+ }
229
244
  stamped.schemaVersion = 1;
230
245
  if (stamped.retryCount === undefined ||
231
246
  !Number.isInteger(stamped.retryCount) ||
@@ -247,11 +262,10 @@ export async function appendDelegation(projectRoot, entry) {
247
262
  stamped.fulfillmentMode = expectedFulfillmentMode(fallbacks);
248
263
  }
249
264
  }
250
- // Idempotency: if a caller (or a retried hook) tries to append a row
251
- // with a spanId that already exists in the ledger, treat it as a no-op
252
- // instead of growing the log with duplicate entries that subsequent
253
- // delegation checks would mis-count.
254
- if (prior.entries.some((existing) => existing.spanId === stamped.spanId)) {
265
+ // Idempotency: a retried hook may replay the same lifecycle row. Allow a
266
+ // terminal row to close an existing scheduled span, but drop exact same
267
+ // span/status duplicates so checks do not mis-count repeated writes.
268
+ if (prior.entries.some((existing) => existing.spanId === stamped.spanId && existing.status === stamped.status)) {
255
269
  return;
256
270
  }
257
271
  const ledger = {
@@ -293,6 +307,12 @@ export async function checkMandatoryDelegations(projectRoot, stage, options = {}
293
307
  const missing = [];
294
308
  const waived = [];
295
309
  const missingEvidence = [];
310
+ const terminalSpanIds = new Set(forRun
311
+ .filter((entry) => TERMINAL_DELEGATION_STATUSES.has(entry.status) && entry.spanId)
312
+ .map((entry) => entry.spanId));
313
+ const staleWorkers = forRun
314
+ .filter((entry) => entry.status === "scheduled" && entry.spanId && !terminalSpanIds.has(entry.spanId))
315
+ .map((entry) => `${entry.agent}(spanId=${entry.spanId})`);
296
316
  const config = await readConfig(projectRoot).catch(() => null);
297
317
  const harnesses = config?.harnesses ?? [];
298
318
  const configuredFallbacks = harnesses.map((h) => HARNESS_ADAPTERS[h].capabilities.subagentFallback);
@@ -324,11 +344,12 @@ export async function checkMandatoryDelegations(projectRoot, stage, options = {}
324
344
  }
325
345
  }
326
346
  return {
327
- satisfied: missing.length === 0 && missingEvidence.length === 0,
347
+ satisfied: missing.length === 0 && missingEvidence.length === 0 && staleWorkers.length === 0,
328
348
  missing,
329
349
  waived,
330
350
  staleIgnored,
331
351
  missingEvidence,
352
+ staleWorkers,
332
353
  expectedMode
333
354
  };
334
355
  }