okstra 0.63.0 → 0.64.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/docs/kr/architecture.md +1 -1
  2. package/docs/superpowers/plans/2026-06-09-implementation-run-artifact-stage-isolation.md +320 -0
  3. package/docs/superpowers/plans/2026-06-10-lead-worker-completion-polling-PROBE.md +42 -0
  4. package/docs/superpowers/plans/2026-06-10-lead-worker-completion-polling.md +337 -0
  5. package/docs/superpowers/specs/2026-06-09-executor-model-custom-id-cascade-design.md +66 -0
  6. package/docs/superpowers/specs/2026-06-09-implementation-run-artifact-stage-isolation-design.md +87 -0
  7. package/docs/superpowers/specs/2026-06-10-lead-worker-completion-polling-design.md +113 -0
  8. package/package.json +1 -1
  9. package/runtime/BUILD.json +2 -2
  10. package/runtime/agents/SKILL.md +5 -2
  11. package/runtime/agents/TODO.md +9 -2
  12. package/runtime/agents/workers/claude-worker.md +1 -1
  13. package/runtime/bin/lib/okstra-ctl/cmd-rerun.sh +23 -4
  14. package/runtime/prompts/profiles/implementation-planning.md +1 -1
  15. package/runtime/prompts/wizard/prompts.ko.json +17 -1
  16. package/runtime/python/okstra_ctl/backfill.py +23 -4
  17. package/runtime/python/okstra_ctl/consumers.py +118 -1
  18. package/runtime/python/okstra_ctl/paths.py +11 -0
  19. package/runtime/python/okstra_ctl/run.py +147 -67
  20. package/runtime/python/okstra_ctl/run_context.py +2 -0
  21. package/runtime/python/okstra_ctl/wizard.py +127 -29
  22. package/runtime/skills/okstra-convergence/SKILL.md +3 -1
  23. package/runtime/skills/okstra-report-writer/SKILL.md +2 -0
  24. package/runtime/skills/okstra-run/SKILL.md +1 -1
  25. package/runtime/skills/okstra-team-contract/SKILL.md +37 -0
  26. package/runtime/templates/reports/final-report.template.md +1 -1
  27. package/runtime/validators/validate-run.py +20 -3
  28. package/src/install.mjs +21 -0
  29. package/src/uninstall.mjs +17 -17
@@ -91,6 +91,7 @@ Required checkpoints:
91
91
  - `PROGRESS: phase-2-prompts preparing <N> worker prompts` — at the start of Phase 2, before any `Write` to the assigned prompt paths.
92
92
  - `PROGRESS: phase-3-team-create attempting TeamCreate` — immediately before the `TeamCreate` call.
93
93
  - `PROGRESS: phase-4-dispatch worker=<role> model=<model>` — once per worker, immediately before the `Agent` / wrapper call.
94
+ - `PROGRESS: phase-5-poll pending=<n> done=<m>` — emitted on each wakeup while the pending set is non-empty.
94
95
  - `PROGRESS: phase-5-collect worker=<role> status=<terminal-status>` — once per worker, immediately after the result file is verified.
95
96
  - `PROGRESS: phase-5.5-convergence round=<N> queue=<count>` — at the start of each convergence round (Phase 5.5).
96
97
  - `PROGRESS: phase-5.6-critic provider=<provider> gaps=<n>` — when the coverage critic pass runs (Phase 5.6, opt-in). Omitted when `convergence.critic.enabled == false`.
@@ -226,6 +227,8 @@ Spawn **analysis workers only** in the same turn (Phase 4 in Teams mode; Phase 5
226
227
 
227
228
  The no-`team_name` fallback (Phase 5) is only legal when team-state's `teamCreate.status` is `"error"` for this run. If `teamCreate` is missing or `attempted: false`, the correct action when an Agent dispatch is rejected for a missing team is to GO BACK to Phase 3 and call `TeamCreate` — never to strip `team_name` and continue.
228
229
 
230
+ **Completion detection after dispatch (BLOCKING).** The `Agent(... team_name ...)` call returns `Spawned successfully` immediately; that ack is NOT completion. After dispatching the analysis workers (async), Lead MUST detect their completion via the self-scheduled polling protocol in [okstra-team-contract](./skills/okstra-team-contract/SKILL.md) "Worker-completion detection (self-scheduled polling)" — do NOT restate the algorithm here. Lead MUST NOT end its turn with a prose "waiting for workers" statement; that path stalls the run until the user manually nudges it.
231
+
229
232
  ### Errors log path wiring (BLOCKING)
230
233
 
231
234
  The launch prompt's `## Run Logs (error-log wiring)` section gives Lead the resolved absolute paths for the run-level errors log and every per-worker sidecar. When Lead constructs each worker's dispatch prompt body, Lead MUST inject the matching two header lines verbatim:
@@ -314,7 +317,7 @@ Distinct from Phase 5.5 finding convergence:
314
317
 
315
318
  Lead's responsibilities in this sub-step (in order):
316
319
 
317
- 1. Extract `P-*` plan items from the draft report's `## 5.5 Implementation Plan Deliverables` per the prefix → source-section mapping in the convergence skill.
320
+ 1. Extract `P-*` plan items from the draft report's `## 5.4 Implementation Plan Deliverables` per the prefix → source-section mapping in the convergence skill.
318
321
  2. Dispatch a single plan-body reverify round to every analyser worker in the roster (`claude`, `codex`, and `gemini` when opted in). `Report writer worker` is NOT a participant in this round.
319
322
  3. Aggregate verdicts and resolve the gate result to one of `passed` / `passed-with-dissent` / `blocked-by-disagreement` / `aborted-non-result`.
320
323
  4. Write `runs/<task-type>/state/plan-body-verification.json` (schema in the convergence skill).
@@ -378,7 +381,7 @@ After persistence, reply briefly in the resolved Report Language with: completio
378
381
  | Letting `convergence.maxRounds` default to 2 for `requirements-discovery` | Resolve effective default to `1` for discovery and record in convergence state artifact |
379
382
  | Issuing serial Read calls in Phase 1 | The intake files are independent — issue all Read calls in a single message (parallel) |
380
383
  | Flagging the claude-worker dispatch prompt as "incomplete" because it lacks `[Required reading]` / `[Error reporting]` blocks | Intentional asymmetry — see [okstra-team-contract](./skills/okstra-team-contract/SKILL.md) "Asymmetry between claude-worker and codex/gemini-worker prompts" |
381
- | Waiting silently while the dispatched `claude-worker` Agent call returns nothing for many minutes (the dev-9495 pattern: two 28+25-minute hangs before lead manually `tmux kill-pane`d) | The claude-worker MUST append a `- PROGRESS: <stage> <ISO-UTC>` line to its audit sidecar (`runs/<task-type>/worker-results/claude-worker-audit-<task-type>-<seq>.md`) at least every 5 minutes (see `agents/workers/claude-worker.md` "Heartbeat" rule). If the sidecar is absent or its mtime is >5 minutes stale, treat the dispatch as `timeout` and redispatch once with a byte-identical prompt; after a second silent hang, record terminal status `timeout` with the missing-sidecar reason in team-state. Lead cannot poll mid-Agent-call but MUST inspect the audit sidecar immediately when the Agent call finally returns a missing sidecar after `completed` is itself a contract violation per the heartbeat rule |
384
+ | Waiting silently while the dispatched `claude-worker` Agent call returns nothing for many minutes (the dev-9495 pattern: two 28+25-minute hangs before lead manually `tmux kill-pane`d) | The claude-worker MUST append a `- PROGRESS: <stage> <ISO-UTC>` line to its audit sidecar (`runs/<task-type>/worker-results/claude-worker-audit-<task-type>-<seq>.md`) at least every 5 minutes (see `agents/workers/claude-worker.md` "Heartbeat" rule). If the sidecar is absent or its mtime is >5 minutes stale, treat the dispatch as `timeout` and redispatch once with a byte-identical prompt; after a second silent hang, record terminal status `timeout` with the missing-sidecar reason in team-state. The authoritative completion signal is the **result file's appearance**, detected via self-scheduled polling (see [okstra-team-contract](./skills/okstra-team-contract/SKILL.md) "Worker-completion detection (self-scheduled polling)") — NOT the Agent-call return, which under `team_name` dispatch is just an immediate `Spawned successfully` ack. The heartbeat sidecar is an auxiliary liveness signal layered on top: a missing sidecar after the result file appears is itself a contract violation per the heartbeat rule |
382
385
  | Re-sending confirmed findings (`full-consensus`/`partial-consensus`/`worker-unique`) to a worker in Round 2 | Queue pruning rule — see [okstra-convergence](./skills/okstra-convergence/SKILL.md) "Round 1-N: Re-verification Loop (queue-pruned)" |
383
386
  | Aggregating a `timeout`/`error` reverify dispatch as `DISAGREE` | Worker failure handling — record as `verification-error` and add to `skippedWorkers[]`. See [okstra-convergence](./skills/okstra-convergence/SKILL.md) "Worker failure handling in reverify" |
384
387
  | Skipping `--substitute-data` in the Phase 7 collector run | Always pass the flag — see [okstra-report-writer](./skills/okstra-report-writer/SKILL.md) "Phase 7 token-usage collector" |
@@ -96,7 +96,11 @@ Lead가 implementation.md 프로필을 따라 Phase 4 dispatch를 시도할 때:
96
96
 
97
97
  ---
98
98
 
99
- ## 수정 B — Leader-side 워커 soft timeout 도입
99
+ ## 수정 B — Leader-side 워커 soft timeout 도입 [해결됨, 2026-06-10]
100
+
101
+ ### 해결 메모 (2026-06-10)
102
+
103
+ 본 항목이 요구한 per-worker soft-timeout 안전장치는 self-scheduled polling 작업으로 구현됐다. lead 가 백그라운드 폴링 루프를 띄울 때 거는 `deadline=$((SECONDS + <per-worker-deadline-seconds>))` 가드([skills/okstra-team-contract/SKILL.md:134](../skills/okstra-team-contract/SKILL.md:134))가 워커별 마감을 강제하고, 초과 시 폴링이 `POLL_TIMEOUT` 으로 종료된다([skills/okstra-team-contract/SKILL.md:136](../skills/okstra-team-contract/SKILL.md:136)). 규약은 "Worker-completion detection (self-scheduled polling)" 섹션([skills/okstra-team-contract/SKILL.md:124](../skills/okstra-team-contract/SKILL.md:124))에 정본으로 명세돼 있다. 설계 근거는 [docs/superpowers/specs/2026-06-10-lead-worker-completion-polling-design.md](../docs/superpowers/specs/2026-06-10-lead-worker-completion-polling-design.md) 참고. 아래 배경/초안 분석은 당시 기록으로 보존한다.
100
104
 
101
105
  ### 배경
102
106
 
@@ -166,10 +170,12 @@ fabricate a `timeout` status against a missing artifact.
166
170
 
167
171
  - 수정 A (Claude worker Stop Condition) 가 먼저 머지되어 일정 기간 사용 데이터를 수집해야, 위 표의 expected duration 값을 실측 기반으로 조정할 수 있다. 초안 값은 보수적으로 잡았으므로 1~2주 사용 후 조정 권장.
168
172
 
169
- ### 참고 — 적용 범위 밖 (수정 C 후보)
173
+ ### 참고 — 적용 범위 밖 (수정 C 후보) [해결됨, 2026-06-10]
170
174
 
171
175
  다음은 수정 B와 별개의 더 큰 설계 변경이며, 별도 브랜치에서 다룬다.
172
176
 
177
+ > 해결 메모 (2026-06-10): 아래 첫 두 항목(background 전환 + worker-results 파일 폴링 루프)은 동일한 self-scheduled polling 프로토콜로 구현됐다 — "Worker-completion detection (self-scheduled polling)" 섹션([skills/okstra-team-contract/SKILL.md:124](../skills/okstra-team-contract/SKILL.md:124)), 설계는 [docs/superpowers/specs/2026-06-10-lead-worker-completion-polling-design.md](../docs/superpowers/specs/2026-06-10-lead-worker-completion-polling-design.md). 진짜 cancellation 통합은 여전히 미해결.
178
+
173
179
  - Phase 4를 foreground multi-Agent에서 `run_in_background: true` + leader polling 방식으로 전환
174
180
  - Lead 가 worker-results 파일 존재 + 헤더 검증으로 완료를 판단하는 폴링 루프 추가
175
181
  - 진짜 cancellation (Agent 강제 종료) 가 가능해지면 그 시점에 통합
@@ -179,3 +185,4 @@ fabricate a `timeout` status against a missing artifact.
179
185
  ## 변경 이력
180
186
 
181
187
  - 2026-05-03 — 작성. 수정 A(Claude worker Stop Condition) 동시 머지에 따른 후속 항목 기록.
188
+ - 2026-06-10 — 수정 B(Leader-side 워커 soft timeout) 및 수정 C 후보(Phase 4 background polling / worker-results 파일 폴링 루프)를 self-scheduled polling 작업으로 해소 표기. 정본: okstra-team-contract "Worker-completion detection (self-scheduled polling)" 섹션, 설계: docs/superpowers/specs/2026-06-10-lead-worker-completion-polling-design.md.
@@ -86,7 +86,7 @@ This contract mirrors the `okstra-team-contract` skill's Worker Output Contract
86
86
 
87
87
  ## Stop Condition (BLOCKING)
88
88
 
89
- You are an in-process Claude subagent Lead's `Agent()` call blocks until you return your final assistant message. Lingering after your worker-results file is on disk extends Phase 4 wall-clock time for the entire run and delays convergence. Be deliberate about stopping.
89
+ When Lead dispatches you with `team_name` (Teams mode), its `Agent()` call returns `Spawned successfully` **immediately** and does NOT block on your completion — Lead detects your completion by self-scheduled polling of your worker-results file (see `okstra-team-contract` "Worker-completion detection (self-scheduled polling)"). Therefore you MUST write your worker-results file at the canonical Result Path before returning: that file's appearance is the ONLY completion signal Lead uses. Lingering after your worker-results file is on disk extends Phase 4 wall-clock time for the entire run and delays convergence. Be deliberate about stopping.
90
90
 
91
91
  After your `Write` to the assigned worker-results file (path provided by Lead as `**Result Path:**` — the canonical anchor header defined in `okstra-team-contract` "Worker Prompt Composition" — or derived under `runs/<task-type>/worker-results/claude-worker-<task-type>-<seq>.md`) succeeds:
92
92
 
@@ -143,13 +143,32 @@ def _brief_path_from_argv(argv, cwd, project_root):
143
143
 
144
144
  batch_id = make_batch_id()
145
145
  items = []
146
- spawned = skipped = 0
146
+ spawned = skipped = rejected = 0
147
147
  # 같은 (project, group, task_id, task_type) 에 속한 다중 rerun 이 같은 batch 내에서
148
148
  # 같은 seq 를 받지 않도록 메모리상 reservation 추적. tmux spawn 후 detached okstra 가
149
149
  # 디스크에 manifest/report 를 쓰기 전에 락이 풀리므로 filesystem-only 예측은 충돌한다.
150
150
  batch_reserved = {}
151
151
  for original in targets:
152
152
  row = find_row_by_run_id(home, original)
153
+ # implementation 은 stage 격리 task-type 이다. 한 run = 런타임에 live
154
+ # registry/consumers 상태로 auto-resolve 되는 단일 stage(_resolve_effective_stages)
155
+ # 이므로, cmd-rerun 은 spawn 전에 어느 stage-<N> 가 선택될지 알 수 없어
156
+ # runs/implementation/stage-<N> 경로도 per-stage run_seq 도 예측할 수 없다.
157
+ # OKSTRA_RUN_SEQ_OVERRIDE(run 전체에 단일 seq) 는 per-stage seq 카운터와
158
+ # 구조적으로 양립 불가하고, 원본 invocation 재생은 진짜 rerun 이 아니다
159
+ # (forced --stage N 은 이미 done 이라 거부되고, --stage auto 는 다른 ready
160
+ # stage 로 조용히 전진한다). 따라서 선예약/spawn 이전에 거부하고 신규
161
+ # implementation run 으로 안내한다.
162
+ if row and slugify_task_segment(row.get("taskType", "")) == "implementation":
163
+ items.append({"originalRunId": original, "newRunId": None,
164
+ "newRunSeq": None, "sessionName": None,
165
+ "status": "rejected", "spawnedAt": None,
166
+ "skipReason": "implementation 은 stage 격리되어 okstra-ctl rerun 으로 "
167
+ "재실행할 수 없습니다. 다음 ready stage 를 실행하려면 "
168
+ "새 implementation run 을 시작하십시오 "
169
+ "(okstra.sh --task-type implementation --stage auto)."})
170
+ rejected += 1
171
+ continue
153
172
  inv = (load_invocation(home, row["projectId"], row["taskGroup"],
154
173
  row["taskId"], row["taskType"], row["runSeq"])
155
174
  if row else None)
@@ -296,14 +315,14 @@ write_batch_meta(home, batch_id, {
296
315
  "selectorRaw": selector_raw, "maxSpawn": max_spawn,
297
316
  "items": items,
298
317
  "summary": {"total": len(targets), "spawned": spawned,
299
- "skipped": skipped, "rejected": 0},
318
+ "skipped": skipped, "rejected": rejected},
300
319
  })
301
320
 
302
321
  dry_run_count = sum(1 for it in items if it["status"] == "dry-run")
303
322
  if dry_run_count:
304
- print(f"batch {batch_id} dry-run: {dry_run_count} skipped: {skipped} rejected: 0")
323
+ print(f"batch {batch_id} dry-run: {dry_run_count} skipped: {skipped} rejected: {rejected}")
305
324
  else:
306
- print(f"batch {batch_id} spawned: {spawned} skipped: {skipped} rejected: 0")
325
+ print(f"batch {batch_id} spawned: {spawned} skipped: {skipped} rejected: {rejected}")
307
326
  print()
308
327
  header = ("RUN-ID", "SESSION-NAME", "ATTACH")
309
328
  rows = []
@@ -55,7 +55,7 @@
55
55
  - Section heading contract (BLOCKING — validator scans for these literal English substrings):
56
56
  - The final report MUST include section headings containing each of the following exact strings: `Option Candidates`, `Trade-off`, `Recommended Option`, `Stage Map`, `Stage Exit Contract`, `Stage Validation`, `Dependency`, `Validation Checklist`, `Rollback`, `Requirement Coverage`. (Approval is no longer a body section — it is the YAML frontmatter `approved` field.)
57
57
  - Korean translations are allowed in parentheses (e.g. `### Recommended Option (권장 옵션)`), but the English keyword must be present verbatim in the heading line.
58
- - The shape and ordering follow `final-report-template.md` section 5.5 (`Implementation Plan Deliverables` + `Stage Map`). Do NOT translate the heading keywords — `validators/validate-run.py` does substring matching on the raw report text and missing English strings are a real, repeatedly observed failure mode (root cause: writer translated the headings to Korean).
58
+ - The shape and ordering follow `final-report-template.md` sections 5.4 (`Implementation Plan Deliverables`) + 5.5 (`Stage Map`). Do NOT translate the heading keywords — `validators/validate-run.py` does substring matching on the raw report text and missing English strings are a real, repeatedly observed failure mode (root cause: writer translated the headings to Korean).
59
59
  - Beyond substring matching, when the Plan Body Verification gate result is `passed` / `passed-with-dissent`, `validators/validate-run.py` runs the **structural** Stage Map validator (`validators/validate-implementation-plan-stages.py`) at the planning boundary — the exact `## 5.5 Stage Map` heading, each `## 5.5.<i> Stage <i>:` section with its four required subsections, the per-stage effective step count (≤6), the `depends-on` DAG, and the per-stage vertical-slice contract (S10) are all enforced here, not deferred to the `implementation` entry gate. S10 scans for the literal in-section strings `Slice value:`, `Acceptance:`, and the Stepwise `action`-cell prefixes `RED:` / `GREEN:` (or a `TDD exemption:` line) — keep these tokens verbatim for the same reason as the heading keywords above.
60
60
  - Required deliverable shape (final report, in addition to the standard sections):
61
61
  - at least two implementation options. **Each option must include**:
@@ -157,6 +157,22 @@
157
157
  "label": "approved final-report.md 의 경로를 알려주세요 (APPROVED 마커 필수)",
158
158
  "echo_template": "approved-plan: {value}"
159
159
  },
160
+ "approve_plan_confirm": {
161
+ "label": "이 플랜으로 implementation 을 진행할까요?\n {path}\n· 예 — 진행합니다. 플랜이 아직 승인 전이면 지금 data.json(정본) + 리포트를 함께 approved 로 처리한 뒤 진행합니다. (markdown 만 손으로 고치면 일관성 검증에서 거부되므로 이 경로로 승인하세요.)\n· 아니오 — 진행하지 않습니다.",
162
+ "echo_template": "approve-plan: {value}",
163
+ "options": {
164
+ "yes": "예 — 승인하고 진행",
165
+ "no": "아니오 — 진행하지 않음"
166
+ },
167
+ "echo_variants": {
168
+ "selected": "plan 선택: {path} — 다음 단계에서 승인·진행 여부를 확인합니다",
169
+ "approved": "approved-plan: {path} (승인·진행 확인됨)"
170
+ },
171
+ "errors": {
172
+ "declined": "진행을 선택하지 않으면 implementation 을 시작할 수 없습니다. 진행(예)하거나 위저드를 종료하세요.",
173
+ "still_unapproved": "approve-plan: 승인 처리 후에도 승인 상태가 아닙니다 (data.json/markdown 불일치): {path}"
174
+ }
175
+ },
160
176
  "stage_pick": {
161
177
  "label": "stage 범위를 선택하세요. auto 는 전체 task(모든 stage)를, 특정 번호는 해당 stage 만 대상으로 합니다.",
162
178
  "echo_template": "stage: {value}",
@@ -260,7 +276,7 @@
260
276
  }
261
277
  },
262
278
  "defaults_or_custom": {
263
- "label": "역할별로 어떤 모델을 쓸지 정하는 단계입니다 (참여 워커 구성을 바꾸는 게 아닙니다).\n· 기본값으로 진행 — lead·실행자/워커·report-writer 모두 추천 모델로 두고 바로 진행합니다.\n· 커스터마이즈 — 역할별 모델을 직접 고르고, 추가 directive·관련 task 도 지정합니다.",
279
+ "label": "역할별 모델·실행 옵션 단계입니다 (참여 워커 구성을 바꾸는 게 아닙니다).\n이번 run 의 워커: {workers}\n· 기본값으로 진행 — 모든 역할을 추천 모델로 둡니다. 추천 기본값: lead·report-writer=opus, claude=opus, codex=gpt-5.5, gemini=auto (실제 값은 runtime 기본값으로 해소). directive·관련 task 없이 바로 진행.\n· 커스터마이즈 — 역할별 모델을 직접 고르고, 추가 directive·관련 task 도 지정합니다.",
264
280
  "echo_template": "customize: {value}",
265
281
  "options": {
266
282
  "defaults": "기본값으로 진행 (역할별 추천 모델 그대로)",
@@ -15,6 +15,28 @@ from .jsonl import append_jsonl, read_jsonl, rotate_recent_if_needed
15
15
  from .project_meta import _project_meta_path
16
16
  from .reconcile import _now_iso, normalize_central_status
17
17
 
18
+ _STAGE_DIR_RE = _re.compile(r"^stage-\d+$")
19
+
20
+
21
+ def _iter_manifest_dirs(runs: Path):
22
+ """`runs/` 아래에서 실제 run-manifest 가 사는 manifests 디렉터리를 모두 yield.
23
+
24
+ 대부분 task-type 은 `runs/<task_type>/manifests/`. implementation 은 stage
25
+ 격리로 산출물이 `runs/implementation/stage-<N>/manifests/` 에 사므로, 직접
26
+ 하위 `manifests` 뿐 아니라 `stage-<N>/manifests` 까지 내려가 스캔해야
27
+ backfill/reindex 가 implementation run 을 누락하지 않는다.
28
+ """
29
+ for type_dir in sorted(p for p in runs.iterdir() if p.is_dir()):
30
+ direct = type_dir / "manifests"
31
+ if direct.is_dir():
32
+ yield direct
33
+ for stage_dir in sorted(p for p in type_dir.iterdir() if p.is_dir()):
34
+ if not _STAGE_DIR_RE.match(stage_dir.name):
35
+ continue
36
+ stage_manifests = stage_dir / "manifests"
37
+ if stage_manifests.is_dir():
38
+ yield stage_manifests
39
+
18
40
 
19
41
  def discover_project_roots(home: Path) -> List[tuple]:
20
42
  """`~/.okstra/projects/<projectId>/meta.json` 을 권위 소스로 (project_id,
@@ -110,10 +132,7 @@ def backfill_project(home: Path, project_id: str, project_root: Path) -> int:
110
132
  runs = task_dir / "runs"
111
133
  if not runs.is_dir():
112
134
  continue
113
- for type_dir in sorted(p for p in runs.iterdir() if p.is_dir()):
114
- manifests = type_dir / "manifests"
115
- if not manifests.is_dir():
116
- continue
135
+ for manifests in _iter_manifest_dirs(runs):
117
136
  for mf in sorted(manifests.iterdir()):
118
137
  m = manifest_re.match(mf.name)
119
138
  if not m:
@@ -7,8 +7,9 @@ so the same (started / done) record is never duplicated."""
7
7
  from __future__ import annotations
8
8
 
9
9
  import json
10
+ import re
10
11
  from pathlib import Path
11
- from typing import Any, Dict, List
12
+ from typing import Any, Dict, List, Optional
12
13
 
13
14
  from .run_context import consumers_mutex
14
15
 
@@ -51,3 +52,119 @@ def append_consumer(plan_run_root: Path, *, impl_task_key: str, stage: int,
51
52
  }
52
53
  with _path(plan_run_root).open("a", encoding="utf-8") as f:
53
54
  f.write(json.dumps(record, ensure_ascii=False) + "\n")
55
+
56
+
57
+ # --- carry-as-SSOT done recovery ---------------------------------------------
58
+ #
59
+ # A stage's completion evidence is the verifier-authored sidecar at
60
+ # `runs/implementation/carry/stage-<N>.json`. The `done` row in consumers.jsonl
61
+ # is a derived index that the lead appends by hand (per the implementation
62
+ # profile) — so it can be missing even when the stage actually finished. The
63
+ # dependency gate (`_resolve_stage_base_commit`) reads `done.head_commit`, so a
64
+ # missing `done` row wrongly blocks downstream stages. We treat the carry file
65
+ # as the source of truth and backfill the missing `done` rows from it before
66
+ # the gate runs. A stage with no carry, or an unfinished carry, is left blocked
67
+ # on purpose.
68
+
69
+
70
+ def _carry_stage_number(carry: Dict[str, Any], filename: str) -> Optional[int]:
71
+ for key in ("stage", "stageNumber"):
72
+ v = carry.get(key)
73
+ if isinstance(v, int):
74
+ return v
75
+ m = re.search(r"stage-(\d+)", filename)
76
+ return int(m.group(1)) if m else None
77
+
78
+
79
+ _FAILED_CARRY_STATUSES = ("fail", "failed", "blocked", "error", "aborted")
80
+
81
+
82
+ def _carry_is_complete(carry: Dict[str, Any]) -> bool:
83
+ # A carry sidecar is written only after the stage's steps + Stage Validation
84
+ # post commands all pass (spec §3.2), so its mere presence marks completion.
85
+ # Treat it as complete unless it explicitly records a failure status. The
86
+ # real backfill guard is whether a head commit can be extracted.
87
+ status = carry.get("status")
88
+ if status is not None and str(status).lower() in _FAILED_CARRY_STATUSES:
89
+ return False
90
+ return True
91
+
92
+
93
+ def _carry_head_commit(carry: Dict[str, Any]) -> str:
94
+ rng = carry.get("stageCommitRange")
95
+ if isinstance(rng, dict) and rng.get("head"):
96
+ return str(rng["head"])
97
+ for key in ("head_sha", "head_commit", "head"):
98
+ v = carry.get(key)
99
+ if v:
100
+ return str(v)
101
+ commits = carry.get("commits")
102
+ if isinstance(commits, list) and commits:
103
+ last = commits[-1]
104
+ if isinstance(last, dict) and last.get("sha"):
105
+ return str(last["sha"])
106
+ return ""
107
+
108
+
109
+ def _carry_dir(plan_run_root: Path) -> Path:
110
+ # consumers.jsonl lives at runs/implementation-planning/; the carry sidecars
111
+ # live at the sibling runs/implementation/carry/.
112
+ return plan_run_root.parent / "implementation" / "carry"
113
+
114
+
115
+ def backfill_done_from_carry(plan_run_root: Path) -> int:
116
+ """Recover missing `done` rows from carry sidecars (carry is SSOT).
117
+
118
+ For every `runs/implementation/carry/stage-<N>.json` that is complete and
119
+ not already recorded as `done` in consumers.jsonl, append a `done` row with
120
+ the head commit read from the carry. Returns the number of rows recovered.
121
+ Stages with no carry or an unfinished carry are skipped, so the dependency
122
+ gate still legitimately blocks genuinely-unstarted stages."""
123
+ carry_dir = _carry_dir(plan_run_root)
124
+ if not carry_dir.is_dir():
125
+ return 0
126
+ existing = read_consumers(plan_run_root)
127
+ done_stages = {r.get("stage") for r in existing if r.get("status") == "done"}
128
+ key_by_stage: Dict[Any, str] = {}
129
+ fallback_key = ""
130
+ for r in existing:
131
+ k = r.get("impl_task_key")
132
+ if k:
133
+ fallback_key = k
134
+ key_by_stage.setdefault(r.get("stage"), k)
135
+ task_root = plan_run_root.parents[1]
136
+ recovered = 0
137
+ for cf in sorted(carry_dir.glob("stage-*.json")):
138
+ try:
139
+ carry = json.loads(cf.read_text(encoding="utf-8"))
140
+ except (json.JSONDecodeError, OSError):
141
+ continue
142
+ if not isinstance(carry, dict):
143
+ continue
144
+ stage = _carry_stage_number(carry, cf.name)
145
+ if stage is None or stage in done_stages:
146
+ continue
147
+ if not _carry_is_complete(carry):
148
+ continue
149
+ head = _carry_head_commit(carry)
150
+ if not head:
151
+ continue
152
+ impl_key = key_by_stage.get(stage) or carry.get("impl_task_key") or fallback_key
153
+ if not impl_key:
154
+ continue
155
+ try:
156
+ carry_path = str(cf.relative_to(task_root))
157
+ except ValueError:
158
+ carry_path = str(cf)
159
+ append_consumer(
160
+ plan_run_root,
161
+ impl_task_key=impl_key,
162
+ stage=stage,
163
+ status="done",
164
+ head_commit=head,
165
+ carry_path=carry_path,
166
+ source="carry-backfill",
167
+ )
168
+ done_stages.add(stage)
169
+ recovered += 1
170
+ return recovered
@@ -87,6 +87,7 @@ def compute_run_paths(
87
87
  task_id: str,
88
88
  task_type: str,
89
89
  run_seq_override: Optional[int] = None,
90
+ stage: Optional[int] = None,
90
91
  ) -> dict:
91
92
  """주어진 identity 와 task-type 에 대해 모든 path/segment 값을 계산해
92
93
  dict 로 돌려준다. 부수효과 없음.
@@ -123,6 +124,15 @@ def compute_run_paths(
123
124
  timeline_file = history_dir / "timeline.json"
124
125
 
125
126
  run_dir = runs_dir / task_type_segment
127
+ # implementation stage isolation: each stage's run artifacts live in a
128
+ # dedicated `stage-<N>` subtree (mirrors the per-stage worktree) so two
129
+ # concurrent `implementation` runs never share reports/state/worker-results.
130
+ # consumers.jsonl + the worktree registry stay at the task-type level (the
131
+ # shared stage ledger / occupancy SSOT); they are computed OUTSIDE this
132
+ # function and are intentionally NOT stage-scoped. Other task-types have no
133
+ # stage concept, so their run_dir is unchanged.
134
+ if task_type_segment == "implementation" and stage is not None:
135
+ run_dir = run_dir / f"stage-{int(stage)}"
126
136
  run_manifests = run_dir / "manifests"
127
137
  run_state = run_dir / "state"
128
138
  run_prompts = run_dir / "prompts"
@@ -208,6 +218,7 @@ def compute_run_paths(
208
218
  "HISTORY_DIR": str(history_dir),
209
219
  "TIMELINE_PATH": str(timeline_file),
210
220
  "RUN_DIR": str(run_dir),
221
+ "RUN_STAGE": "" if stage is None else str(int(stage)),
211
222
  "RUN_MANIFESTS_DIR": str(run_manifests),
212
223
  "RUN_STATE_DIR": str(run_state),
213
224
  "RUN_PROMPTS_DIR": str(run_prompts),