okstra 0.63.0 → 0.64.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/docs/kr/architecture.md +1 -1
  2. package/docs/superpowers/plans/2026-06-09-implementation-run-artifact-stage-isolation.md +320 -0
  3. package/docs/superpowers/plans/2026-06-10-lead-worker-completion-polling-PROBE.md +42 -0
  4. package/docs/superpowers/plans/2026-06-10-lead-worker-completion-polling.md +337 -0
  5. package/docs/superpowers/specs/2026-06-09-executor-model-custom-id-cascade-design.md +66 -0
  6. package/docs/superpowers/specs/2026-06-09-implementation-run-artifact-stage-isolation-design.md +87 -0
  7. package/docs/superpowers/specs/2026-06-10-lead-worker-completion-polling-design.md +113 -0
  8. package/package.json +1 -1
  9. package/runtime/BUILD.json +2 -2
  10. package/runtime/agents/SKILL.md +5 -2
  11. package/runtime/agents/TODO.md +9 -2
  12. package/runtime/agents/workers/claude-worker.md +1 -1
  13. package/runtime/bin/lib/okstra-ctl/cmd-rerun.sh +23 -4
  14. package/runtime/prompts/profiles/_implementation-executor.md +1 -0
  15. package/runtime/prompts/profiles/_implementation-verifier.md +1 -0
  16. package/runtime/prompts/profiles/implementation-planning.md +1 -1
  17. package/runtime/prompts/wizard/prompts.ko.json +17 -1
  18. package/runtime/python/okstra_ctl/backfill.py +23 -4
  19. package/runtime/python/okstra_ctl/consumers.py +118 -1
  20. package/runtime/python/okstra_ctl/paths.py +11 -0
  21. package/runtime/python/okstra_ctl/run.py +147 -67
  22. package/runtime/python/okstra_ctl/run_context.py +2 -0
  23. package/runtime/python/okstra_ctl/wizard.py +127 -29
  24. package/runtime/skills/okstra-convergence/SKILL.md +3 -1
  25. package/runtime/skills/okstra-report-writer/SKILL.md +2 -0
  26. package/runtime/skills/okstra-run/SKILL.md +1 -1
  27. package/runtime/skills/okstra-team-contract/SKILL.md +37 -0
  28. package/runtime/templates/reports/final-report.template.md +1 -1
  29. package/runtime/validators/validate-run.py +20 -3
  30. package/src/install.mjs +21 -0
  31. package/src/uninstall.mjs +17 -17
@@ -91,6 +91,7 @@ Required checkpoints:
91
91
  - `PROGRESS: phase-2-prompts preparing <N> worker prompts` — at the start of Phase 2, before any `Write` to the assigned prompt paths.
92
92
  - `PROGRESS: phase-3-team-create attempting TeamCreate` — immediately before the `TeamCreate` call.
93
93
  - `PROGRESS: phase-4-dispatch worker=<role> model=<model>` — once per worker, immediately before the `Agent` / wrapper call.
94
+ - `PROGRESS: phase-5-poll pending=<n> done=<m>` — emitted on each wakeup while the pending set is non-empty.
94
95
  - `PROGRESS: phase-5-collect worker=<role> status=<terminal-status>` — once per worker, immediately after the result file is verified.
95
96
  - `PROGRESS: phase-5.5-convergence round=<N> queue=<count>` — at the start of each convergence round (Phase 5.5).
96
97
  - `PROGRESS: phase-5.6-critic provider=<provider> gaps=<n>` — when the coverage critic pass runs (Phase 5.6, opt-in). Omitted when `convergence.critic.enabled == false`.
@@ -226,6 +227,8 @@ Spawn **analysis workers only** in the same turn (Phase 4 in Teams mode; Phase 5
226
227
 
227
228
  The no-`team_name` fallback (Phase 5) is only legal when team-state's `teamCreate.status` is `"error"` for this run. If `teamCreate` is missing or `attempted: false`, the correct action when an Agent dispatch is rejected for a missing team is to GO BACK to Phase 3 and call `TeamCreate` — never to strip `team_name` and continue.
228
229
 
230
+ **Completion detection after dispatch (BLOCKING).** The `Agent(... team_name ...)` call returns `Spawned successfully` immediately; that ack is NOT completion. After dispatching the analysis workers (async), Lead MUST detect their completion via the self-scheduled polling protocol in [okstra-team-contract](./skills/okstra-team-contract/SKILL.md) "Worker-completion detection (self-scheduled polling)" — do NOT restate the algorithm here. Lead MUST NOT end its turn with a prose "waiting for workers" statement; that path stalls the run until the user manually nudges it.
231
+
229
232
  ### Errors log path wiring (BLOCKING)
230
233
 
231
234
  The launch prompt's `## Run Logs (error-log wiring)` section gives Lead the resolved absolute paths for the run-level errors log and every per-worker sidecar. When Lead constructs each worker's dispatch prompt body, Lead MUST inject the matching two header lines verbatim:
@@ -314,7 +317,7 @@ Distinct from Phase 5.5 finding convergence:
314
317
 
315
318
  Lead's responsibilities in this sub-step (in order):
316
319
 
317
- 1. Extract `P-*` plan items from the draft report's `## 5.5 Implementation Plan Deliverables` per the prefix → source-section mapping in the convergence skill.
320
+ 1. Extract `P-*` plan items from the draft report's `## 5.4 Implementation Plan Deliverables` per the prefix → source-section mapping in the convergence skill.
318
321
  2. Dispatch a single plan-body reverify round to every analyser worker in the roster (`claude`, `codex`, and `gemini` when opted in). `Report writer worker` is NOT a participant in this round.
319
322
  3. Aggregate verdicts and resolve the gate result to one of `passed` / `passed-with-dissent` / `blocked-by-disagreement` / `aborted-non-result`.
320
323
  4. Write `runs/<task-type>/state/plan-body-verification.json` (schema in the convergence skill).
@@ -378,7 +381,7 @@ After persistence, reply briefly in the resolved Report Language with: completio
378
381
  | Letting `convergence.maxRounds` default to 2 for `requirements-discovery` | Resolve effective default to `1` for discovery and record in convergence state artifact |
379
382
  | Issuing serial Read calls in Phase 1 | The intake files are independent — issue all Read calls in a single message (parallel) |
380
383
  | Flagging the claude-worker dispatch prompt as "incomplete" because it lacks `[Required reading]` / `[Error reporting]` blocks | Intentional asymmetry — see [okstra-team-contract](./skills/okstra-team-contract/SKILL.md) "Asymmetry between claude-worker and codex/gemini-worker prompts" |
381
- | Waiting silently while the dispatched `claude-worker` Agent call returns nothing for many minutes (the dev-9495 pattern: two 28+25-minute hangs before lead manually `tmux kill-pane`d) | The claude-worker MUST append a `- PROGRESS: <stage> <ISO-UTC>` line to its audit sidecar (`runs/<task-type>/worker-results/claude-worker-audit-<task-type>-<seq>.md`) at least every 5 minutes (see `agents/workers/claude-worker.md` "Heartbeat" rule). If the sidecar is absent or its mtime is >5 minutes stale, treat the dispatch as `timeout` and redispatch once with a byte-identical prompt; after a second silent hang, record terminal status `timeout` with the missing-sidecar reason in team-state. Lead cannot poll mid-Agent-call but MUST inspect the audit sidecar immediately when the Agent call finally returns a missing sidecar after `completed` is itself a contract violation per the heartbeat rule |
384
+ | Waiting silently while the dispatched `claude-worker` Agent call returns nothing for many minutes (the dev-9495 pattern: two 28+25-minute hangs before lead manually `tmux kill-pane`d) | The claude-worker MUST append a `- PROGRESS: <stage> <ISO-UTC>` line to its audit sidecar (`runs/<task-type>/worker-results/claude-worker-audit-<task-type>-<seq>.md`) at least every 5 minutes (see `agents/workers/claude-worker.md` "Heartbeat" rule). If the sidecar is absent or its mtime is >5 minutes stale, treat the dispatch as `timeout` and redispatch once with a byte-identical prompt; after a second silent hang, record terminal status `timeout` with the missing-sidecar reason in team-state. The authoritative completion signal is the **result file's appearance**, detected via self-scheduled polling (see [okstra-team-contract](./skills/okstra-team-contract/SKILL.md) "Worker-completion detection (self-scheduled polling)") — NOT the Agent-call return, which under `team_name` dispatch is just an immediate `Spawned successfully` ack. The heartbeat sidecar is an auxiliary liveness signal layered on top: a missing sidecar after the result file appears is itself a contract violation per the heartbeat rule |
382
385
  | Re-sending confirmed findings (`full-consensus`/`partial-consensus`/`worker-unique`) to a worker in Round 2 | Queue pruning rule — see [okstra-convergence](./skills/okstra-convergence/SKILL.md) "Round 1-N: Re-verification Loop (queue-pruned)" |
383
386
  | Aggregating a `timeout`/`error` reverify dispatch as `DISAGREE` | Worker failure handling — record as `verification-error` and add to `skippedWorkers[]`. See [okstra-convergence](./skills/okstra-convergence/SKILL.md) "Worker failure handling in reverify" |
384
387
  | Skipping `--substitute-data` in the Phase 7 collector run | Always pass the flag — see [okstra-report-writer](./skills/okstra-report-writer/SKILL.md) "Phase 7 token-usage collector" |
@@ -96,7 +96,11 @@ Lead가 implementation.md 프로필을 따라 Phase 4 dispatch를 시도할 때:
96
96
 
97
97
  ---
98
98
 
99
- ## 수정 B — Leader-side 워커 soft timeout 도입
99
+ ## 수정 B — Leader-side 워커 soft timeout 도입 [해결됨, 2026-06-10]
100
+
101
+ ### 해결 메모 (2026-06-10)
102
+
103
+ 본 항목이 요구한 per-worker soft-timeout 안전장치는 self-scheduled polling 작업으로 구현됐다. lead 가 백그라운드 폴링 루프를 띄울 때 거는 `deadline=$((SECONDS + <per-worker-deadline-seconds>))` 가드([skills/okstra-team-contract/SKILL.md:134](../skills/okstra-team-contract/SKILL.md:134))가 워커별 마감을 강제하고, 초과 시 폴링이 `POLL_TIMEOUT` 으로 종료된다([skills/okstra-team-contract/SKILL.md:136](../skills/okstra-team-contract/SKILL.md:136)). 규약은 "Worker-completion detection (self-scheduled polling)" 섹션([skills/okstra-team-contract/SKILL.md:124](../skills/okstra-team-contract/SKILL.md:124))에 정본으로 명세돼 있다. 설계 근거는 [docs/superpowers/specs/2026-06-10-lead-worker-completion-polling-design.md](../docs/superpowers/specs/2026-06-10-lead-worker-completion-polling-design.md) 참고. 아래 배경/초안 분석은 당시 기록으로 보존한다.
100
104
 
101
105
  ### 배경
102
106
 
@@ -166,10 +170,12 @@ fabricate a `timeout` status against a missing artifact.
166
170
 
167
171
  - 수정 A (Claude worker Stop Condition) 가 먼저 머지되어 일정 기간 사용 데이터를 수집해야, 위 표의 expected duration 값을 실측 기반으로 조정할 수 있다. 초안 값은 보수적으로 잡았으므로 1~2주 사용 후 조정 권장.
168
172
 
169
- ### 참고 — 적용 범위 밖 (수정 C 후보)
173
+ ### 참고 — 적용 범위 밖 (수정 C 후보) [해결됨, 2026-06-10]
170
174
 
171
175
  다음은 수정 B와 별개의 더 큰 설계 변경이며, 별도 브랜치에서 다룬다.
172
176
 
177
+ > 해결 메모 (2026-06-10): 아래 첫 두 항목(background 전환 + worker-results 파일 폴링 루프)은 동일한 self-scheduled polling 프로토콜로 구현됐다 — "Worker-completion detection (self-scheduled polling)" 섹션([skills/okstra-team-contract/SKILL.md:124](../skills/okstra-team-contract/SKILL.md:124)), 설계는 [docs/superpowers/specs/2026-06-10-lead-worker-completion-polling-design.md](../docs/superpowers/specs/2026-06-10-lead-worker-completion-polling-design.md). 진짜 cancellation 통합은 여전히 미해결.
178
+
173
179
  - Phase 4를 foreground multi-Agent에서 `run_in_background: true` + leader polling 방식으로 전환
174
180
  - Lead 가 worker-results 파일 존재 + 헤더 검증으로 완료를 판단하는 폴링 루프 추가
175
181
  - 진짜 cancellation (Agent 강제 종료) 가 가능해지면 그 시점에 통합
@@ -179,3 +185,4 @@ fabricate a `timeout` status against a missing artifact.
179
185
  ## 변경 이력
180
186
 
181
187
  - 2026-05-03 — 작성. 수정 A(Claude worker Stop Condition) 동시 머지에 따른 후속 항목 기록.
188
+ - 2026-06-10 — 수정 B(Leader-side 워커 soft timeout) 및 수정 C 후보(Phase 4 background polling / worker-results 파일 폴링 루프)를 self-scheduled polling 작업으로 해소 표기. 정본: okstra-team-contract "Worker-completion detection (self-scheduled polling)" 섹션, 설계: docs/superpowers/specs/2026-06-10-lead-worker-completion-polling-design.md.
@@ -86,7 +86,7 @@ This contract mirrors the `okstra-team-contract` skill's Worker Output Contract
86
86
 
87
87
  ## Stop Condition (BLOCKING)
88
88
 
89
- You are an in-process Claude subagent Lead's `Agent()` call blocks until you return your final assistant message. Lingering after your worker-results file is on disk extends Phase 4 wall-clock time for the entire run and delays convergence. Be deliberate about stopping.
89
+ When Lead dispatches you with `team_name` (Teams mode), its `Agent()` call returns `Spawned successfully` **immediately** and does NOT block on your completion — Lead detects your completion by self-scheduled polling of your worker-results file (see `okstra-team-contract` "Worker-completion detection (self-scheduled polling)"). Therefore you MUST write your worker-results file at the canonical Result Path before returning: that file's appearance is the ONLY completion signal Lead uses. Lingering after your worker-results file is on disk extends Phase 4 wall-clock time for the entire run and delays convergence. Be deliberate about stopping.
90
90
 
91
91
  After your `Write` to the assigned worker-results file (path provided by Lead as `**Result Path:**` — the canonical anchor header defined in `okstra-team-contract` "Worker Prompt Composition" — or derived under `runs/<task-type>/worker-results/claude-worker-<task-type>-<seq>.md`) succeeds:
92
92
 
@@ -143,13 +143,32 @@ def _brief_path_from_argv(argv, cwd, project_root):
143
143
 
144
144
  batch_id = make_batch_id()
145
145
  items = []
146
- spawned = skipped = 0
146
+ spawned = skipped = rejected = 0
147
147
  # 같은 (project, group, task_id, task_type) 에 속한 다중 rerun 이 같은 batch 내에서
148
148
  # 같은 seq 를 받지 않도록 메모리상 reservation 추적. tmux spawn 후 detached okstra 가
149
149
  # 디스크에 manifest/report 를 쓰기 전에 락이 풀리므로 filesystem-only 예측은 충돌한다.
150
150
  batch_reserved = {}
151
151
  for original in targets:
152
152
  row = find_row_by_run_id(home, original)
153
+ # implementation 은 stage 격리 task-type 이다. 한 run = 런타임에 live
154
+ # registry/consumers 상태로 auto-resolve 되는 단일 stage(_resolve_effective_stages)
155
+ # 이므로, cmd-rerun 은 spawn 전에 어느 stage-<N> 가 선택될지 알 수 없어
156
+ # runs/implementation/stage-<N> 경로도 per-stage run_seq 도 예측할 수 없다.
157
+ # OKSTRA_RUN_SEQ_OVERRIDE(run 전체에 단일 seq) 는 per-stage seq 카운터와
158
+ # 구조적으로 양립 불가하고, 원본 invocation 재생은 진짜 rerun 이 아니다
159
+ # (forced --stage N 은 이미 done 이라 거부되고, --stage auto 는 다른 ready
160
+ # stage 로 조용히 전진한다). 따라서 선예약/spawn 이전에 거부하고 신규
161
+ # implementation run 으로 안내한다.
162
+ if row and slugify_task_segment(row.get("taskType", "")) == "implementation":
163
+ items.append({"originalRunId": original, "newRunId": None,
164
+ "newRunSeq": None, "sessionName": None,
165
+ "status": "rejected", "spawnedAt": None,
166
+ "skipReason": "implementation 은 stage 격리되어 okstra-ctl rerun 으로 "
167
+ "재실행할 수 없습니다. 다음 ready stage 를 실행하려면 "
168
+ "새 implementation run 을 시작하십시오 "
169
+ "(okstra.sh --task-type implementation --stage auto)."})
170
+ rejected += 1
171
+ continue
153
172
  inv = (load_invocation(home, row["projectId"], row["taskGroup"],
154
173
  row["taskId"], row["taskType"], row["runSeq"])
155
174
  if row else None)
@@ -296,14 +315,14 @@ write_batch_meta(home, batch_id, {
296
315
  "selectorRaw": selector_raw, "maxSpawn": max_spawn,
297
316
  "items": items,
298
317
  "summary": {"total": len(targets), "spawned": spawned,
299
- "skipped": skipped, "rejected": 0},
318
+ "skipped": skipped, "rejected": rejected},
300
319
  })
301
320
 
302
321
  dry_run_count = sum(1 for it in items if it["status"] == "dry-run")
303
322
  if dry_run_count:
304
- print(f"batch {batch_id} dry-run: {dry_run_count} skipped: {skipped} rejected: 0")
323
+ print(f"batch {batch_id} dry-run: {dry_run_count} skipped: {skipped} rejected: {rejected}")
305
324
  else:
306
- print(f"batch {batch_id} spawned: {spawned} skipped: {skipped} rejected: 0")
325
+ print(f"batch {batch_id} spawned: {spawned} skipped: {skipped} rejected: {rejected}")
307
326
  print()
308
327
  header = ("RUN-ID", "SESSION-NAME", "ATTACH")
309
328
  rows = []
@@ -28,6 +28,7 @@ until Phase 5 ends, then drop from active context for Phase 6/7.
28
28
  - Doc-only / config-only / pure-rename steps that have no observable runtime behaviour are exempt from the failing-test requirement, but the executor MUST cite the exemption per step in the final report (`TDD exemption: <reason>`).
29
29
  - When the touched area has no existing test harness, the executor MUST stand up the minimum harness needed to host one regression test for this run rather than skipping TDD entirely. Record the harness-bootstrap step as an `Out-of-plan edit` if it is not in the plan.
30
30
  - **DB / IO / SQL changes require real execution — mock-only is NOT validation evidence:** when this run's diff touches DB/IO/SQL (ORM / query-builder code — sequelize / typeorm / prisma / knex / raw SQL — `*.repository.*`, model/entity files, `migrations/**`, `*.sql`, or any changed query string), a mocked unit test cannot observe the SQL the query builder actually emits — a mocked suite once passed while `count({ col: 'FontFamily.fontFamily' })` threw `Unknown column` on the real DB. The executor MUST run the change against a real (or faithful-replica) datastore — the `db-test` validation step (plan `validation` db step, else `project.json.qaCommands.db-test`), targeting a **local / replica** DB — and cite its exact command + exit code in the final report's `Validation evidence`. If no real DB / `db-test` command is reachable, do NOT claim the change verified: label the DB portion `정적 분석상 …, 미검증(실행 안 함)` in the report, surface it in the routing recommendation, and never downplay the real run as "too heavy". `git push` stays forbidden (universal list); the unverified DB state is carried forward so `final-verification` cannot accept it and `release-handoff` cannot push.
31
+ - **Real-IO test isolation (BLOCKING).** A test that exercises a **real** datastore, HTTP endpoint, external service, message queue, or filesystem — a live DB connection / DSN, a real `fetch` / `axios` / `http` request, an actual S3 / queue client, anything the project's normal CI test suite cannot run because that backend is absent — MUST be written under the task's qa directory `<task_root>/qa/` (the `TASK_QA_PATH` token; same directory that holds the Tier 3 conformance manifest). It MUST NOT be written into the project source test tree — `src/**`, `test/**`, `tests/**`, `**/__test__/**`, `**/__tests__/**`, `*.spec.*`, `*.test.*`, or anywhere the project's lint/test globs collect. Two reasons: (a) the project's CI / normal suite has no real DB or network, so a real-IO test placed in source silently breaks the pipeline; (b) it is an okstra verification artifact, and the artifact-home rule confines okstra outputs to `.okstra/`. **The dividing line is the IO, not the intent:** a unit test that stubs/spies only *injected collaborators* (mock — no real socket, no real DB handle) is a TDD red-green artifact and stays in source; the moment a test opens a real connection or makes a real network call it belongs in qa. A stage's real-IO requirement check is a Tier 3 conformance script under `<task_root>/qa/` (declared via the implementation-planning conformance entry) — never smuggle real IO into a `*.spec.*` in source to make it run "as a unit test". The `db-test` real-execution gate above is satisfied by the conformance/db-test path against the replica, NOT by adding a live-DB `*.spec.*` to the project suite.
31
32
  - re-read the approved plan end-to-end and parse the `## 5.5 Stage Map`. Read the **Stage** injected in the launch prompt (`Stage for this implementation run`): the single stage number this run owns. The runtime already selected and reserved this stage (one run = one stage) — do NOT recompute the start stage from `consumers.jsonl`.
32
33
  - load every `runs/<plan-key>/carry/stage-<i>.json` for `i ∈ depends-on(this stage)` and inject them into the executor's working context as "runtime carry-in". For a `depends-on (none)` stage, no sidecar load — task-brief only.
33
34
  - this stage's `depends-on` are all already `status:done`. Its file list, step order, Stage Validation commands, Stage Exit Contract, and rollback path are the authoritative scope.
@@ -96,6 +96,7 @@ Re-running commands proves the diff *builds and passes*; it does NOT prove the d
96
96
  - **Tautological delegation assertion:** a test asserts the SUT result equals a direct call to the same pure helper/collaborator that the SUT delegates to, instead of asserting an independent literal value or observable state.
97
97
  - **Untruthful name:** a read-named function (`get*` / `find*` / `load*`) that writes/inserts/mutates; an adapter or repository name encoding the caller's use-case (`*ForInit`) or hiding a domain rule (`findValid*` / `findActive*`).
98
98
  - **Hexagonal (only when the overlay is loaded):** business logic inside a port body; an adapter method that is not pure I/O (post-fetch JS filtering on domain state, domain-rule evaluation); a domain object declared outside the `domain/` boundary.
99
+ - **Real-IO test in source tree:** a changed/added test under the project source test tree — `src/**`, `test/**`, `tests/**`, `**/__test__/**`, `**/__tests__/**`, `*.spec.*`, `*.test.*` — that opens a **real** DB connection / DSN, makes a real `fetch` / `axios` / `http` request, or otherwise hits real external IO without mocking the injected collaborator (a live handle, not a stub/spy). Real-IO tests MUST live under `<task_root>/qa/` per the executor's *Real-IO test isolation* rule — a live-IO test in source silently breaks the project's CI suite and violates the artifact-home rule. Cite the test file + the real-IO line; recommend moving it to `<task_root>/qa/` (or declaring it as a Tier 3 conformance script). Mock-only unit tests in source are NOT a hit.
99
100
  - **Advisory findings (recorded as recommendations; verdict MAY still PASS):** function >50 effective lines, a single body mixing read+write stages, weak readability, a missing-but-non-critical outcome assertion, newly orphaned private/public code that is safe to remove but not on a critical path, or weak-but-not-misleading names. These land in the verifier result as `should-fix` / `nit` recommendations, not as a `FAIL`.
100
101
  - **Output.** Every finding — blocking or advisory — is a structured item in the verifier's worker result (`path:line`, rule, severity, suggested fix) so it carries into Phase 5.5 convergence and the final report. A blocking hit sets the verifier verdict to `FAIL` with the rule cited, using the same verdict machinery as the Discrepancy rule above. `Claude lead` MUST NOT silently downgrade a cited blocking finding to advisory during synthesis; an override requires a concrete cited reason, exactly as for the Discrepancy rule.
101
102
 
@@ -55,7 +55,7 @@
55
55
  - Section heading contract (BLOCKING — validator scans for these literal English substrings):
56
56
  - The final report MUST include section headings containing each of the following exact strings: `Option Candidates`, `Trade-off`, `Recommended Option`, `Stage Map`, `Stage Exit Contract`, `Stage Validation`, `Dependency`, `Validation Checklist`, `Rollback`, `Requirement Coverage`. (Approval is no longer a body section — it is the YAML frontmatter `approved` field.)
57
57
  - Korean translations are allowed in parentheses (e.g. `### Recommended Option (권장 옵션)`), but the English keyword must be present verbatim in the heading line.
58
- - The shape and ordering follow `final-report-template.md` section 5.5 (`Implementation Plan Deliverables` + `Stage Map`). Do NOT translate the heading keywords — `validators/validate-run.py` does substring matching on the raw report text and missing English strings are a real, repeatedly observed failure mode (root cause: writer translated the headings to Korean).
58
+ - The shape and ordering follow `final-report-template.md` sections 5.4 (`Implementation Plan Deliverables`) + 5.5 (`Stage Map`). Do NOT translate the heading keywords — `validators/validate-run.py` does substring matching on the raw report text and missing English strings are a real, repeatedly observed failure mode (root cause: writer translated the headings to Korean).
59
59
  - Beyond substring matching, when the Plan Body Verification gate result is `passed` / `passed-with-dissent`, `validators/validate-run.py` runs the **structural** Stage Map validator (`validators/validate-implementation-plan-stages.py`) at the planning boundary — the exact `## 5.5 Stage Map` heading, each `## 5.5.<i> Stage <i>:` section with its four required subsections, the per-stage effective step count (≤6), the `depends-on` DAG, and the per-stage vertical-slice contract (S10) are all enforced here, not deferred to the `implementation` entry gate. S10 scans for the literal in-section strings `Slice value:`, `Acceptance:`, and the Stepwise `action`-cell prefixes `RED:` / `GREEN:` (or a `TDD exemption:` line) — keep these tokens verbatim for the same reason as the heading keywords above.
60
60
  - Required deliverable shape (final report, in addition to the standard sections):
61
61
  - at least two implementation options. **Each option must include**:
@@ -157,6 +157,22 @@
157
157
  "label": "approved final-report.md 의 경로를 알려주세요 (APPROVED 마커 필수)",
158
158
  "echo_template": "approved-plan: {value}"
159
159
  },
160
+ "approve_plan_confirm": {
161
+ "label": "이 플랜으로 implementation 을 진행할까요?\n {path}\n· 예 — 진행합니다. 플랜이 아직 승인 전이면 지금 data.json(정본) + 리포트를 함께 approved 로 처리한 뒤 진행합니다. (markdown 만 손으로 고치면 일관성 검증에서 거부되므로 이 경로로 승인하세요.)\n· 아니오 — 진행하지 않습니다.",
162
+ "echo_template": "approve-plan: {value}",
163
+ "options": {
164
+ "yes": "예 — 승인하고 진행",
165
+ "no": "아니오 — 진행하지 않음"
166
+ },
167
+ "echo_variants": {
168
+ "selected": "plan 선택: {path} — 다음 단계에서 승인·진행 여부를 확인합니다",
169
+ "approved": "approved-plan: {path} (승인·진행 확인됨)"
170
+ },
171
+ "errors": {
172
+ "declined": "진행을 선택하지 않으면 implementation 을 시작할 수 없습니다. 진행(예)하거나 위저드를 종료하세요.",
173
+ "still_unapproved": "approve-plan: 승인 처리 후에도 승인 상태가 아닙니다 (data.json/markdown 불일치): {path}"
174
+ }
175
+ },
160
176
  "stage_pick": {
161
177
  "label": "stage 범위를 선택하세요. auto 는 전체 task(모든 stage)를, 특정 번호는 해당 stage 만 대상으로 합니다.",
162
178
  "echo_template": "stage: {value}",
@@ -260,7 +276,7 @@
260
276
  }
261
277
  },
262
278
  "defaults_or_custom": {
263
- "label": "역할별로 어떤 모델을 쓸지 정하는 단계입니다 (참여 워커 구성을 바꾸는 게 아닙니다).\n· 기본값으로 진행 — lead·실행자/워커·report-writer 모두 추천 모델로 두고 바로 진행합니다.\n· 커스터마이즈 — 역할별 모델을 직접 고르고, 추가 directive·관련 task 도 지정합니다.",
279
+ "label": "역할별 모델·실행 옵션 단계입니다 (참여 워커 구성을 바꾸는 게 아닙니다).\n이번 run 의 워커: {workers}\n· 기본값으로 진행 — 모든 역할을 추천 모델로 둡니다. 추천 기본값: lead·report-writer=opus, claude=opus, codex=gpt-5.5, gemini=auto (실제 값은 runtime 기본값으로 해소). directive·관련 task 없이 바로 진행.\n· 커스터마이즈 — 역할별 모델을 직접 고르고, 추가 directive·관련 task 도 지정합니다.",
264
280
  "echo_template": "customize: {value}",
265
281
  "options": {
266
282
  "defaults": "기본값으로 진행 (역할별 추천 모델 그대로)",
@@ -15,6 +15,28 @@ from .jsonl import append_jsonl, read_jsonl, rotate_recent_if_needed
15
15
  from .project_meta import _project_meta_path
16
16
  from .reconcile import _now_iso, normalize_central_status
17
17
 
18
+ _STAGE_DIR_RE = _re.compile(r"^stage-\d+$")
19
+
20
+
21
+ def _iter_manifest_dirs(runs: Path):
22
+ """`runs/` 아래에서 실제 run-manifest 가 사는 manifests 디렉터리를 모두 yield.
23
+
24
+ 대부분 task-type 은 `runs/<task_type>/manifests/`. implementation 은 stage
25
+ 격리로 산출물이 `runs/implementation/stage-<N>/manifests/` 에 사므로, 직접
26
+ 하위 `manifests` 뿐 아니라 `stage-<N>/manifests` 까지 내려가 스캔해야
27
+ backfill/reindex 가 implementation run 을 누락하지 않는다.
28
+ """
29
+ for type_dir in sorted(p for p in runs.iterdir() if p.is_dir()):
30
+ direct = type_dir / "manifests"
31
+ if direct.is_dir():
32
+ yield direct
33
+ for stage_dir in sorted(p for p in type_dir.iterdir() if p.is_dir()):
34
+ if not _STAGE_DIR_RE.match(stage_dir.name):
35
+ continue
36
+ stage_manifests = stage_dir / "manifests"
37
+ if stage_manifests.is_dir():
38
+ yield stage_manifests
39
+
18
40
 
19
41
  def discover_project_roots(home: Path) -> List[tuple]:
20
42
  """`~/.okstra/projects/<projectId>/meta.json` 을 권위 소스로 (project_id,
@@ -110,10 +132,7 @@ def backfill_project(home: Path, project_id: str, project_root: Path) -> int:
110
132
  runs = task_dir / "runs"
111
133
  if not runs.is_dir():
112
134
  continue
113
- for type_dir in sorted(p for p in runs.iterdir() if p.is_dir()):
114
- manifests = type_dir / "manifests"
115
- if not manifests.is_dir():
116
- continue
135
+ for manifests in _iter_manifest_dirs(runs):
117
136
  for mf in sorted(manifests.iterdir()):
118
137
  m = manifest_re.match(mf.name)
119
138
  if not m:
@@ -7,8 +7,9 @@ so the same (started / done) record is never duplicated."""
7
7
  from __future__ import annotations
8
8
 
9
9
  import json
10
+ import re
10
11
  from pathlib import Path
11
- from typing import Any, Dict, List
12
+ from typing import Any, Dict, List, Optional
12
13
 
13
14
  from .run_context import consumers_mutex
14
15
 
@@ -51,3 +52,119 @@ def append_consumer(plan_run_root: Path, *, impl_task_key: str, stage: int,
51
52
  }
52
53
  with _path(plan_run_root).open("a", encoding="utf-8") as f:
53
54
  f.write(json.dumps(record, ensure_ascii=False) + "\n")
55
+
56
+
57
+ # --- carry-as-SSOT done recovery ---------------------------------------------
58
+ #
59
+ # A stage's completion evidence is the verifier-authored sidecar at
60
+ # `runs/implementation/carry/stage-<N>.json`. The `done` row in consumers.jsonl
61
+ # is a derived index that the lead appends by hand (per the implementation
62
+ # profile) — so it can be missing even when the stage actually finished. The
63
+ # dependency gate (`_resolve_stage_base_commit`) reads `done.head_commit`, so a
64
+ # missing `done` row wrongly blocks downstream stages. We treat the carry file
65
+ # as the source of truth and backfill the missing `done` rows from it before
66
+ # the gate runs. A stage with no carry, or an unfinished carry, is left blocked
67
+ # on purpose.
68
+
69
+
70
+ def _carry_stage_number(carry: Dict[str, Any], filename: str) -> Optional[int]:
71
+ for key in ("stage", "stageNumber"):
72
+ v = carry.get(key)
73
+ if isinstance(v, int):
74
+ return v
75
+ m = re.search(r"stage-(\d+)", filename)
76
+ return int(m.group(1)) if m else None
77
+
78
+
79
+ _FAILED_CARRY_STATUSES = ("fail", "failed", "blocked", "error", "aborted")
80
+
81
+
82
+ def _carry_is_complete(carry: Dict[str, Any]) -> bool:
83
+ # A carry sidecar is written only after the stage's steps + Stage Validation
84
+ # post commands all pass (spec §3.2), so its mere presence marks completion.
85
+ # Treat it as complete unless it explicitly records a failure status. The
86
+ # real backfill guard is whether a head commit can be extracted.
87
+ status = carry.get("status")
88
+ if status is not None and str(status).lower() in _FAILED_CARRY_STATUSES:
89
+ return False
90
+ return True
91
+
92
+
93
+ def _carry_head_commit(carry: Dict[str, Any]) -> str:
94
+ rng = carry.get("stageCommitRange")
95
+ if isinstance(rng, dict) and rng.get("head"):
96
+ return str(rng["head"])
97
+ for key in ("head_sha", "head_commit", "head"):
98
+ v = carry.get(key)
99
+ if v:
100
+ return str(v)
101
+ commits = carry.get("commits")
102
+ if isinstance(commits, list) and commits:
103
+ last = commits[-1]
104
+ if isinstance(last, dict) and last.get("sha"):
105
+ return str(last["sha"])
106
+ return ""
107
+
108
+
109
+ def _carry_dir(plan_run_root: Path) -> Path:
110
+ # consumers.jsonl lives at runs/implementation-planning/; the carry sidecars
111
+ # live at the sibling runs/implementation/carry/.
112
+ return plan_run_root.parent / "implementation" / "carry"
113
+
114
+
115
+ def backfill_done_from_carry(plan_run_root: Path) -> int:
116
+ """Recover missing `done` rows from carry sidecars (carry is SSOT).
117
+
118
+ For every `runs/implementation/carry/stage-<N>.json` that is complete and
119
+ not already recorded as `done` in consumers.jsonl, append a `done` row with
120
+ the head commit read from the carry. Returns the number of rows recovered.
121
+ Stages with no carry or an unfinished carry are skipped, so the dependency
122
+ gate still legitimately blocks genuinely-unstarted stages."""
123
+ carry_dir = _carry_dir(plan_run_root)
124
+ if not carry_dir.is_dir():
125
+ return 0
126
+ existing = read_consumers(plan_run_root)
127
+ done_stages = {r.get("stage") for r in existing if r.get("status") == "done"}
128
+ key_by_stage: Dict[Any, str] = {}
129
+ fallback_key = ""
130
+ for r in existing:
131
+ k = r.get("impl_task_key")
132
+ if k:
133
+ fallback_key = k
134
+ key_by_stage.setdefault(r.get("stage"), k)
135
+ task_root = plan_run_root.parents[1]
136
+ recovered = 0
137
+ for cf in sorted(carry_dir.glob("stage-*.json")):
138
+ try:
139
+ carry = json.loads(cf.read_text(encoding="utf-8"))
140
+ except (json.JSONDecodeError, OSError):
141
+ continue
142
+ if not isinstance(carry, dict):
143
+ continue
144
+ stage = _carry_stage_number(carry, cf.name)
145
+ if stage is None or stage in done_stages:
146
+ continue
147
+ if not _carry_is_complete(carry):
148
+ continue
149
+ head = _carry_head_commit(carry)
150
+ if not head:
151
+ continue
152
+ impl_key = key_by_stage.get(stage) or carry.get("impl_task_key") or fallback_key
153
+ if not impl_key:
154
+ continue
155
+ try:
156
+ carry_path = str(cf.relative_to(task_root))
157
+ except ValueError:
158
+ carry_path = str(cf)
159
+ append_consumer(
160
+ plan_run_root,
161
+ impl_task_key=impl_key,
162
+ stage=stage,
163
+ status="done",
164
+ head_commit=head,
165
+ carry_path=carry_path,
166
+ source="carry-backfill",
167
+ )
168
+ done_stages.add(stage)
169
+ recovered += 1
170
+ return recovered
@@ -87,6 +87,7 @@ def compute_run_paths(
87
87
  task_id: str,
88
88
  task_type: str,
89
89
  run_seq_override: Optional[int] = None,
90
+ stage: Optional[int] = None,
90
91
  ) -> dict:
91
92
  """주어진 identity 와 task-type 에 대해 모든 path/segment 값을 계산해
92
93
  dict 로 돌려준다. 부수효과 없음.
@@ -123,6 +124,15 @@ def compute_run_paths(
123
124
  timeline_file = history_dir / "timeline.json"
124
125
 
125
126
  run_dir = runs_dir / task_type_segment
127
+ # implementation stage isolation: each stage's run artifacts live in a
128
+ # dedicated `stage-<N>` subtree (mirrors the per-stage worktree) so two
129
+ # concurrent `implementation` runs never share reports/state/worker-results.
130
+ # consumers.jsonl + the worktree registry stay at the task-type level (the
131
+ # shared stage ledger / occupancy SSOT); they are computed OUTSIDE this
132
+ # function and are intentionally NOT stage-scoped. Other task-types have no
133
+ # stage concept, so their run_dir is unchanged.
134
+ if task_type_segment == "implementation" and stage is not None:
135
+ run_dir = run_dir / f"stage-{int(stage)}"
126
136
  run_manifests = run_dir / "manifests"
127
137
  run_state = run_dir / "state"
128
138
  run_prompts = run_dir / "prompts"
@@ -208,6 +218,7 @@ def compute_run_paths(
208
218
  "HISTORY_DIR": str(history_dir),
209
219
  "TIMELINE_PATH": str(timeline_file),
210
220
  "RUN_DIR": str(run_dir),
221
+ "RUN_STAGE": "" if stage is None else str(int(stage)),
211
222
  "RUN_MANIFESTS_DIR": str(run_manifests),
212
223
  "RUN_STATE_DIR": str(run_state),
213
224
  "RUN_PROMPTS_DIR": str(run_prompts),