okstra 0.67.0 → 0.69.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/bin/okstra +25 -0
  2. package/docs/kr/architecture.md +17 -1
  3. package/docs/superpowers/plans/2026-06-10-concurrent-run-team-guard.md +456 -0
  4. package/docs/superpowers/plans/2026-06-10-git-reconcile-stale-sha-recovery.md +1408 -0
  5. package/docs/superpowers/plans/2026-06-10-stage-group-handoff.md +1572 -0
  6. package/docs/superpowers/specs/2026-06-06-stage-worktree-isolation-design.md +1 -1
  7. package/docs/superpowers/specs/2026-06-10-concurrent-run-team-guard-design.md +107 -0
  8. package/docs/superpowers/specs/2026-06-10-git-reconcile-stale-sha-recovery-design.md +105 -0
  9. package/docs/superpowers/specs/2026-06-10-stage-group-handoff-design.md +156 -0
  10. package/package.json +1 -1
  11. package/runtime/BUILD.json +2 -2
  12. package/runtime/agents/SKILL.md +8 -7
  13. package/runtime/agents/workers/claude-worker.md +1 -1
  14. package/runtime/agents/workers/codex-worker.md +3 -3
  15. package/runtime/agents/workers/gemini-worker.md +3 -3
  16. package/runtime/agents/workers/report-writer-worker.md +2 -2
  17. package/runtime/prompts/launch.template.md +2 -2
  18. package/runtime/prompts/profiles/_common-contract.md +6 -6
  19. package/runtime/prompts/profiles/_implementation-deliverable.md +1 -0
  20. package/runtime/prompts/profiles/_implementation-executor.md +3 -1
  21. package/runtime/prompts/profiles/_implementation-verifier.md +1 -0
  22. package/runtime/prompts/profiles/final-verification.md +3 -2
  23. package/runtime/prompts/profiles/improvement-discovery.md +1 -1
  24. package/runtime/prompts/profiles/release-handoff.md +12 -5
  25. package/runtime/prompts/wizard/prompts.ko.json +5 -5
  26. package/runtime/python/okstra_ctl/conformance.py +17 -0
  27. package/runtime/python/okstra_ctl/consumers.py +72 -5
  28. package/runtime/python/okstra_ctl/git_reconcile.py +322 -0
  29. package/runtime/python/okstra_ctl/handoff.py +348 -0
  30. package/runtime/python/okstra_ctl/render.py +44 -2
  31. package/runtime/python/okstra_ctl/run.py +175 -44
  32. package/runtime/python/okstra_ctl/wizard.py +89 -22
  33. package/runtime/python/okstra_ctl/worktree.py +28 -0
  34. package/runtime/python/okstra_ctl/worktree_registry.py +40 -9
  35. package/runtime/python/okstra_token_usage/collect.py +27 -0
  36. package/runtime/skills/okstra-context-loader/SKILL.md +1 -1
  37. package/runtime/skills/okstra-convergence/SKILL.md +3 -3
  38. package/runtime/skills/okstra-report-writer/SKILL.md +8 -8
  39. package/runtime/skills/okstra-run/SKILL.md +43 -3
  40. package/runtime/skills/okstra-team-contract/SKILL.md +7 -7
  41. package/runtime/validators/validate-run.py +51 -11
  42. package/src/_python-helper.mjs +52 -0
  43. package/src/error-log.mjs +19 -0
  44. package/src/git-reconcile.mjs +31 -0
  45. package/src/handoff.mjs +30 -0
  46. package/src/inject-report-index.mjs +22 -0
  47. package/src/render-final-report.mjs +22 -0
  48. package/src/render-views.mjs +9 -48
  49. package/src/spawn-followups.mjs +23 -0
  50. package/src/token-usage.mjs +3 -34
@@ -45,8 +45,9 @@ The wizard tells you *which UI to use* via `kind` (and the optional `multi` flag
45
45
  - `kind: "pick_group"` → render a SINGLE `AskUserQuestion` whose questions array maps 1:1 to the wizard's `questions[]`. For each entry use `questions[].label`, `questions[].options[].label`, and `multiSelect: questions[].multi`. Collect the user's chosen `options[].value` per tab, build a JSON object keyed by each `questions[].step`, and submit it as a single literal `--answer '{"lead_model":"opus","claude_model":"default",...}'`. A tab the user leaves at its default still gets its `"default"`/`""` value in the JSON. Never split a `pick_group` into multiple `AskUserQuestion` calls — the wizard already capped it at 4 tabs and emits any remainder as the next prompt.
46
46
  - `kind: "text"` → write `label` as a plain text message and consume the user's NEXT message as the answer.
47
47
  - `kind: "done"` → input collection finished; move to Step 5.
48
+ - `kind: "aborted"` → the user picked 중단; the wizard is terminally cancelled. Tell the user on one short line that the run setup was aborted, delete the state file (`rm` with the literal path), and stop this skill — do NOT call `render-args` or `render-bundle` (the wizard rejects `render-args` on an aborted state).
48
49
 
49
- The `branch_confirm` step (shown just before `confirm`) is a normal `pick` step and is rendered the same way — no special handling needed.
50
+ The `branch_confirm` step (shown just before `confirm`) is a normal `pick` step and is rendered the same way — no special handling needed. Its options always include `중단` (abort); `base-ref 다시 고르기` (edit) appears only when a new worktree would be created.
50
51
 
51
52
  Never invent additional questions. Never reorder. **Never drop, hide, or merge a `pick` / `pick_group` option** — render every `options[]` entry as its own selectable `AskUserQuestion` choice, including entries that carry a `(default)` / `(recommended)` suffix. Do NOT collapse a multi-option pick into a "recommended + 직접 입력 / Other" shortlist: the wizard's `options[]` array IS the complete, authoritative choice set. Example: the `executor` step always emits `claude` / `codex` / `gemini` — show all three, never just `claude`. The run-prompt recommendation rule (1–2 추천 + 직접 입력) applies ONLY to prompts this skill authors itself (e.g. the conformance-waiver picker), never to wizard-provided `options[]`. Never use `AskUserQuestion` for `text` prompts — the wizard explicitly chose `text` to avoid the picker-Other re-render lag.
52
53
 
@@ -92,7 +93,7 @@ Output: the same `{ok, next}` JSON described above. The first `next` is always `
92
93
 
93
94
  ## Step 3: Run the prompt loop
94
95
 
95
- Repeat until `next.kind == "done"`:
96
+ Repeat until `next.kind == "done"` (or `"aborted"` — terminal cancel, see "How the wizard talks to you"):
96
97
 
97
98
  1. **Render** the prompt according to `kind` (and `multi` for pick):
98
99
  - `pick` + `multi: false` → `AskUserQuestion` with `multiSelect: false`, `label`, and `options`. The user's chosen option's `value` is the answer string.
@@ -178,7 +179,7 @@ okstra render-bundle \
178
179
  --pr-template-path "<args.pr-template-path>"
179
180
  ```
180
181
 
181
- `render-bundle` auto-supplies `--workspace-root` and forces `--render-only`. Stdout prints `okstra task root:`, `okstra instruction-set:`, and the full rendered lead prompt. Parse the labelled lines for `TASK_ROOT` and `INSTRUCTION_SET_PATH`.
182
+ `render-bundle` auto-supplies `--workspace-root` and forces `--render-only`. Stdout prints `okstra task root:`, `okstra instruction-set:`, and the full rendered lead prompt. Parse the labelled lines for `TASK_ROOT` and `INSTRUCTION_SET_PATH`. Also watch for an optional `okstra concurrent-run stages:` label line — present only when a concurrent run is detected (see "동시-run 감지 분기" below).
182
183
 
183
184
  The python function underneath is mutex-protected (`~/.okstra/.locks/<task-key>.lock`), writes `run-context-*.json` + `run-inputs-*.json` + all manifests + discovery files, and registers the run in `~/.okstra/recent.jsonl` with status `prepared`.
184
185
 
@@ -196,6 +197,45 @@ This is **never** a lead/worker self-exemption — only the user may waive. Offe
196
197
 
197
198
  When the user picks a waiver, append `--qa-waiver "<stageKey>:<reason>"` to the `render-bundle` invocation above. Omit the flag entirely otherwise (do **not** pass `--qa-waiver ""`). A malformed value or unknown `<stageKey>` aborts `render-bundle` with a `PrepareError`.
198
199
 
200
+ ### 동시-run 감지 분기 (concurrent-run)
201
+
202
+ `render-bundle` stdout 에 `okstra concurrent-run stages: <stages>` 라벨 라인이
203
+ 있으면(같은 task-key 의 다른 implementation run 이 `<stages>` 를 점유 중), launch
204
+ 프롬프트는 이미 "Concurrent-run: no-team background" 게이트로 렌더돼 있다. 이 라인이
205
+ 없으면 동시-run 이 아니므로 이 분기를 건너뛴다. 라인이 있으면 dispatch 전에
206
+ 사용자에게 3-옵션 recommendation picker 를 제시한다 (run-prompt recommendation 규칙:
207
+ 1–2 추천 + 직접 입력; 이 picker 는 스킬이 author 하는 것이라 wizard `options[]`
208
+ 제약과 무관):
209
+
210
+ 1. (추천) 이대로 no-team background 로 진행 — 이미 렌더된 bundle 을 그대로 사용한다.
211
+ team 을 만들지 않아 `~/.claude/teams/` race 를 회피한다(Teams split-pane 관찰성만 포기).
212
+ 2. 대기 — 지금 dispatch 를 보류한다. stage worktree·run-context 는 보존되므로,
213
+ 점유 중인 다른 run 종료 후 같은 stage 를 resume 으로 재개하면 그때는 정상 team
214
+ 경로다. resume 명령(`okstra-inspect` history → resume)을 사용자에게 출력한다.
215
+ 3. 직접 입력.
216
+
217
+ ### Stale git SHA recovery (git-reconcile gate)
218
+
219
+ `render-bundle` 이 `Recorded stage SHAs no longer match the git history` 를 포함한
220
+ `PrepareError` 로 실패하면, okstra 밖에서 git 히스토리가 바뀐 것이다(rebase /
221
+ squash / 리뷰 반영 amend / branch 삭제). 절대 registry/consumers 를 손으로
222
+ 고치지 말고 다음 순서로 회복한다:
223
+
224
+ 1. 에러 메시지에 인쇄된 `okstra git-reconcile … --check --json` 명령을 그대로 실행해
225
+ stale 리포트를 얻는다. (patch-id 로 내용 동일성이 증명되는 항목은 prepare
226
+ 가 이미 자동 화해했으므로, 여기 남는 것은 confirm 항목뿐이다.)
227
+ 2. confirm 항목별로 사용자에게 3-옵션 picker 를 제시한다:
228
+ - **`stage-<N>` branch 의 현재 tip 으로 재기록 (추천)** — 리뷰 반영 등
229
+ 의도된 수정이 그 branch 에 있을 때.
230
+ - **다른 ref 직접 입력** — 사용자가 commit/branch/tag 를 직접 지정.
231
+ - **중단** — 회복하지 않고 run 을 멈춘다.
232
+ 3. 선택된 ref 로 `okstra git-reconcile … --apply --stage <N> --use-ref <ref>`
233
+ 를 실행한 뒤, 실패했던 `render-bundle` 을 동일 인자로 재시도한다.
234
+
235
+ anchor(`implementation_base_commit`)가 unresolvable 로 보고되면 같은 명령의
236
+ `--reset-anchor <ref>` 를 사용자 확인 후 실행한다. picker 없이 confirm 항목을
237
+ 보정하는 것은 금지 — 런타임도 `--use-ref` 없는 confirm 보정을 거부한다.
238
+
199
239
  ## Step 6: Take over as Claude lead
200
240
 
201
241
  Read `<INSTRUCTION_SET_PATH>/claude-execution-prompt.md` verbatim and enter `Claude lead` mode. The lead prompt now points to compact intake artifacts first (`active-run-context`, `analysis-profile.md`, and `analysis-packet.md`); full source files such as `analysis-material.md`, `reference-expectations.md`, and `final-report-template.md` are lazy/fallback inputs. Follow the rendered prompt order, do not preempt it.
@@ -54,7 +54,7 @@ Only workers selected from `recommendedWorkers` in `task-manifest.json` and `res
54
54
 
55
55
  ## Operating Rules
56
56
 
57
- 0. **TeamCreate ordering (BLOCKING).** Before issuing any `Agent` dispatch that includes `team_name`, Lead MUST have called `TeamCreate(team_name: "okstra-<task-key>", ...)` in this run and recorded the outcome in team-state as `teamCreate: { attempted: true, status: "ok"|"error", error?: <message> }`. If the Agent tool rejects a dispatch with `"team must be created first or call without team_name"` / `"team을 먼저 생성하거나 team_name 없이 호출해야 합니다"`, the correct response is to go back to Phase 3 and call `TeamCreate` — NOT to strip `team_name` and retry. The no-`team_name` Phase 5 fallback is only legal when `teamCreate.status == "error"` is already recorded; otherwise stripping `team_name` silently degrades the run to in-process background dispatch and loses the Teams split-pane behavior. See [okstra agent SKILL.md Phase 3](../../agents/SKILL.md) for the full team-creation sequence.
57
+ 0. **TeamCreate ordering (BLOCKING).** Before issuing any `Agent` dispatch that includes `team_name`, Lead MUST have called `TeamCreate` with the exact team name from the launch prompt's Team Creation Gate block (`okstra-<task-key>`; implementation stage runs append `-s<N>`) in this run and recorded the outcome in team-state as `teamCreate: { attempted: true, status: "ok"|"error", error?: <message> }` plus the name as `teamName`. On a "team already exists" failure, follow the stale-team recovery in [okstra agent SKILL.md Phase 3 step 2-1](../../agents/SKILL.md) — never shell-delete `~/.claude/teams/...` on your own initiative. If the Agent tool rejects a dispatch with `"team must be created first or call without team_name"` / `"team을 먼저 생성하거나 team_name 없이 호출해야 합니다"`, the correct response is to go back to Phase 3 and call `TeamCreate` — NOT to strip `team_name` and retry. The no-`team_name` Phase 5 fallback is legal when `teamCreate.status == "error"` is already recorded, OR when the launch prompt's concurrent-run gate recorded `status: "skipped", reason: "concurrent-run"`; otherwise stripping `team_name` silently degrades the run to in-process background dispatch and loses the Teams split-pane behavior. See [okstra agent SKILL.md Phase 3](../../agents/SKILL.md) for the full team-creation sequence.
58
58
  1. `Claude lead` is responsible for orchestration, convergence supervision, and final-report review/approval. It never overrides worker analysis results, and it never authors the final-report file when `Report writer worker` is in the roster.
59
59
  2. `Report writer worker` is NOT an analysis worker. It is excluded from Phase 4/5 (initial analysis) and Phase 5.5 (convergence re-verification). It is spawned only in Phase 6 and is the **author** of the final-report file at `runs/<task-type>/reports/final-report-<task-type>-<seq>.md`.
60
60
  3. When `Report writer worker` is in the roster, Lead MUST dispatch it in Phase 6. The only legal lead-authored fallback is when a dispatch was attempted and recorded a terminal status of `error` / `timeout` / `not-run` with a concrete logged reason. Speculative reasons such as "session resume constraint" or "team is no longer alive" are NOT valid — Lead can always dispatch a fresh subagent (omit `team_name` if the team is gone).
@@ -303,7 +303,7 @@ Schema:
303
303
  Workers MUST omit `source` / `recordedAt` / `agent` / `agentRole` / `model` /
304
304
  `taskKey`. Claude lead fills those in when dumping the sidecar to the
305
305
  run-level errors log (`runs/<task-type>/logs/errors-<task-type>-<seq>.jsonl`)
306
- via `scripts/okstra-error-log.py append-from-worker`.
306
+ via `okstra error-log append-from-worker`.
307
307
 
308
308
  Workers MUST use only `errorType: "tool-failure"` in the **sidecar file**.
309
309
 
@@ -312,7 +312,7 @@ run-level errors log path or their sidecar path from the
312
312
  `runs/<task-type>/...` template syntax. Both absolute paths are delivered
313
313
  by Lead via two dispatch-prompt header lines:
314
314
 
315
- - `**Errors log path:** <absolute path>` — run-level JSONL (`okstra-error-log.py append-observed --out ...`)
315
+ - `**Errors log path:** <absolute path>` — run-level JSONL (`okstra error-log append-observed --out ...`)
316
316
  - `**Errors sidecar path:** <absolute path>` — per-worker JSON (`{ "schemaVersion": 1, "errors": [...] }`)
317
317
 
318
318
  Lead obtains both paths from the launch prompt's `## Run Logs (error-log
@@ -322,7 +322,7 @@ without proceeding — this is the contractual replacement for the previous
322
322
  "derive from template placeholders" behavior, which silently produced
323
323
  empty run-level error logs in production.
324
324
 
325
- - `cli-failure` events are recorded by the wrapper subagent itself (Codex / Gemini), but **directly to the run-level error log** via `okstra-error-log.py append-observed --error-type cli-failure ...` — NOT via the sidecar. The sidecar is an in-process tool-failure channel only.
325
+ - `cli-failure` events are recorded by the wrapper subagent itself (Codex / Gemini), but **directly to the run-level error log** via `okstra error-log append-observed --error-type cli-failure ...` — NOT via the sidecar. The sidecar is an in-process tool-failure channel only.
326
326
  - **Wrapper invocation arity.** Both `okstra-codex-exec.sh` and `okstra-gemini-exec.sh` accept four required positional arguments plus an optional fifth `<role>`: `<project-root> <model> <prompt-path> <worktree-path> [<role>]`. The fourth (worktree) argument is **mandatory for implementation phase** and optional otherwise. For codex it becomes `--add-dir <worktree>` (sandbox write access); for gemini it is appended to `--include-directories`. Omitting it during implementation causes the codex sandbox to reject every Edit/Write targeting the worktree with EPERM. Workers extract the path from the `**Worktree:**` / `EXECUTOR_WORKTREE_PATH` / `cwd for every mutating command:` line in the lead prompt. The optional fifth `<role>` is folded into both the caller (worker) pane title `<cli>-<role>-<pid>` and the sibling trace-pane title `<cli>-<role>-<pid>-trace[from=<caller-pane-id>]` (e.g. `codex-worker-93421` ↔ `codex-worker-93421-trace[from=%5]`). `<pid>` is the wrapper's own PID and disambiguates concurrent dispatches of the same role; the embedded caller pane id keeps the trace ↔ worker correlation visible even when the worker pane's title is overwritten by the parent process (Claude Code's TUI emits OSC 2 title escape sequences on its own pane). Always pass the literal string `worker` so the dispatch is self-describing (the wrapper defaults to `worker` if omitted).
327
327
  - **Background dispatch + polling contract (Codex / Gemini wrappers).** Both wrapper subagents MUST dispatch `okstra-codex-exec.sh` / `okstra-gemini-exec.sh` via `Bash(run_in_background: true)` and poll with `BashOutput(bash_id)` until the shell reports `status == "completed"`, capped at 30 minutes (1800s) of wall-clock elapsed time. `BashOutput` itself is the wait primitive — call it back-to-back; do NOT insert a standalone `sleep` between polls. The Claude Code harness blocks `sleep` calls of 5 seconds or longer as a circumvention vector and explicitly forbids chaining shorter sleeps inside until-loops to work around the block. Workers that hit the contract bug must NOT self-recover with `until ...; do sleep 2; done` wrappers — that path violates the harness anti-circumvention rule, even though it superficially "works". The legacy "single foreground `Bash` with 120000ms timeout" rule, and the subsequent "60-second cadence with `sleep 60` between polls" rule, are both retired. The current rule applies in **every phase** (analysis runs typically complete in 1–2 `BashOutput` calls, so there is no regression for short jobs). Recording responsibilities:
328
328
  - Successful completion: return the wrapper's accumulated stdout from the final `BashOutput`. No log entry.
@@ -330,8 +330,8 @@ empty run-level error logs in production.
330
330
  - Polling cap reached: before `KillShell`, perform a one-shot **mtime-grace check** on the wrapper's live log (`<prompt>.log`). If the log was written within the last 90 seconds AND grace has not yet been applied this loop, extend the cap from 1800s → 2100s (one-shot +5min) and continue polling. Otherwise (log stale, OR grace already applied), call `KillShell(shell_id)`, record `cli-failure` with `--exit-code 124 --duration-ms <observed_ms> --message "<wrapper> exceeded polling cap (grace=<applied|not-applied>, last_mtime_age=<n>s)"`, then return the language-specific `*_CLI_TIMEOUT` sentinel. The grace exists to absorb token-budget spikes where the CLI is genuinely still producing output past the 30-minute mark; it is a one-shot soft extension, NOT a loop.
331
331
  - Token-usage matching is unaffected: the wrapper subagent stays alive throughout polling, so the wrapper's jsonl timestamp window continues to cover the underlying CLI rollout's full duration (see §"Token-usage accounting" below).
332
332
  - **No external timeout on wrapper subagents.** The codex/gemini wrapper subagent's polling loop (with optional mtime grace) is the SINGLE timeout authority for its dispatch. Lead MUST NOT impose a separate `Agent()` call timeout, an outer `Bash` wall-clock deadline, or any other mechanism that terminates the subagent before its own polling cap is reached. Doing so reproduces the historical failure mode that motivated this rule: Lead aborts the subagent at e.g. 18 minutes, the subagent returns nothing, and Lead classifies the role as "no response" while the underlying CLI was actively working. The wrapper's polling cap (30min + optional 5min grace) is calibrated so that, combined with Lead's redispatch policy (see "Lead Redispatch Policy on Result-Missing"), a recoverable single-run failure costs at most ~70 minutes of wall-clock — predictable enough to plan around. If a specific run requires a tighter cap, lower it in the wrapper subagent's polling contract (single source of truth), NOT by layering Lead-side timeouts.
333
- - `contract-violation` events (C) are recorded by Lead via `okstra-error-log.py append-observed --error-type contract-violation ...` after inspecting worker outputs.
334
- - Lead's responsibility regarding the sidecar is to dump it to the run-level error log via `okstra-error-log.py append-from-worker` after each worker terminates; Lead does not write into the sidecar.
333
+ - `contract-violation` events (C) are recorded by Lead via `okstra error-log append-observed --error-type contract-violation ...` after inspecting worker outputs.
334
+ - Lead's responsibility regarding the sidecar is to dump it to the run-level error log via `okstra error-log append-from-worker` after each worker terminates; Lead does not write into the sidecar.
335
335
 
336
336
  ## Convergence Phase Rules
337
337
 
@@ -406,7 +406,7 @@ okstra token-usage /abs/path/to/run/state/team-state-<task-type>-<seq>.json --wr
406
406
  `okstra token-usage` is a thin Node-side wrapper around the python helper installed at `~/.okstra/bin/okstra-token-usage.py`. Calling the python script directly with `python3 "$HOME/..."` is forbidden — the `$HOME` expansion breaks the literal-token permission match and forces a confirmation prompt every call.
407
407
 
408
408
  The script reads:
409
- - `~/.claude/projects/<encoded-cwd>/<sessionId>.jsonl` for the lead and every Claude-side worker (Claude worker, Report writer worker, plus the Claude wrappers around Codex/Gemini workers). Sessions are discovered by `teamName: okstra-<task-id>`, lead is identified by `lead.sessionId`, and other workers are identified by `agentName` (e.g. `claude-worker`, `codex-worker`, `gemini-worker`, `report-writer`). **For this `agentName` match to work, Lead MUST set the Agent `name` arg to `<workerId>-worker` on every dispatch** (see [agents SKILL.md Phase 4 — "Agent `name` on dispatch"](../../agents/SKILL.md)); a worker dispatched without `name` carries no `agentName`, so the collector cannot attribute its session and records it `unavailable` (now surfaced as a `usageSummary.unattributedTeamSessions` entry rather than dropped silently).
409
+ - `~/.claude/projects/<encoded-cwd>/<sessionId>.jsonl` for the lead and every Claude-side worker (Claude worker, Report writer worker, plus the Claude wrappers around Codex/Gemini workers). Sessions are discovered by the recorded team-state `teamName`, lead is identified by `lead.sessionId`, and other workers are identified by `agentName` (e.g. `claude-worker`, `codex-worker`, `gemini-worker`, `report-writer`). **For this `agentName` match to work, Lead MUST set the Agent `name` arg to `<workerId>-worker` on every dispatch** (see [agents SKILL.md Phase 4 — "Agent `name` on dispatch"](../../agents/SKILL.md)); a worker dispatched without `name` carries no `agentName`, so the collector cannot attribute its session and records it `unavailable` (now surfaced as a `usageSummary.unattributedTeamSessions` entry rather than dropped silently).
410
410
  - `~/.codex/sessions/Y/M/D/rollout-*.jsonl` for the underlying Codex CLI session (matched by `cwd` and timestamp window of the wrapper subagent). Last `event_msg.token_count.total_token_usage.total_tokens` is the session total.
411
411
  - `~/.gemini/tmp/<project>/chats/session-*.json` for the underlying Gemini CLI session. Sum of per-message `tokens.total`.
412
412
 
@@ -621,7 +621,7 @@ def _scan_token_usage_summary(content: str, failures: list[str]) -> None:
621
621
  failures.append(
622
622
  f"Token Usage Summary row `{label_cell or '<unlabeled>'}` has "
623
623
  f"a zero value `{stripped}` — no okstra run consumes zero "
624
- "tokens. Re-run `python3 scripts/okstra-token-usage.py "
624
+ "tokens. Re-run `okstra token-usage "
625
625
  "<team-state> --write --summary --substitute-data "
626
626
  "<report-path>` to repopulate from session jsonls. The "
627
627
  "Codex/Gemini CLI row is the only place `$0.00` is "
@@ -748,6 +748,36 @@ def _parse_diff_summary_files(content: str) -> list[str]:
748
748
  return _DIFF_ROW_PATH_RE.findall(section.group(0))
749
749
 
750
750
 
751
+ _STAGE_RUN_DIR_RE = re.compile(r"^stage-\d+$")
752
+
753
+
754
+ def _implementation_stage_name(run_dir: Path) -> str | None:
755
+ """implementation stage 격리 run(`runs/implementation/stage-<N>`)이면
756
+ `stage-<N>` 을 반환. 그 외(final-verification 등 task-type 레벨 run)는
757
+ None — whole-task 스코프."""
758
+ if run_dir.parent.name == "implementation" and _STAGE_RUN_DIR_RE.match(run_dir.name):
759
+ return run_dir.name
760
+ return None
761
+
762
+
763
+ def _scope_manifest_entries(manifest: dict, stage_name: str | None) -> dict:
764
+ """게이트 평가 대상 entry 를 run 스코프로 좁힌 manifest 를 반환.
765
+
766
+ implementation 은 한 run = 한 stage 이므로 자기 stageKey
767
+ (`<task-id>-stage-<N>`) entry 만 게이트한다 — 다른 stage 는 각자의
768
+ implementation run / final-verification(whole-task) 이 검증한다.
769
+ suffix 매칭인 이유: stageKey 의 `<task-id>` 는 planning 이 쓴 원문이라
770
+ task 디렉터리 segment 와 표기가 다를 수 있다.
771
+ """
772
+ if stage_name is None:
773
+ return manifest
774
+ entries = [
775
+ e for e in manifest.get("entries", [])
776
+ if isinstance(e, dict) and str(e.get("stageKey", "")).endswith(f"-{stage_name}")
777
+ ]
778
+ return {"entries": entries}
779
+
780
+
751
781
  def _task_root_from_run_dir(run_dir: Path) -> Path:
752
782
  """run_dir 에서 `runs` 디렉터리를 앵커로 task_root 를 복원한다.
753
783
 
@@ -770,6 +800,12 @@ def _validate_conformance(report_path: Path, failures: list[str],
770
800
  가 없다는 뜻 — 선언을 강제하는 것은 planning 계약(Phase 4)의 몫). 매니페스트가
771
801
  있으면 결과 사이드카와 함께 evaluate_conformance 로 판정하고 BLOCKING verdict
772
802
  를 run 검증 실패로 승격한다. WAIVED(conditional)/EXEMPT 는 통과시킨다.
803
+
804
+ 게이트 스코프: implementation stage 격리 run 은 자기 stage entry 만(결과
805
+ 게이트·diff-surface 교차검증 모두 — 미래 stage 의 미실행이 현재 run 을
806
+ 막으면 안 된다), final-verification 등 task-type 레벨 run 은 전 entry
807
+ (whole-task). prompts/profiles/_implementation-verifier.md §Tier 3 /
808
+ final-verification.md 의 스코프 계약과 동형.
773
809
  """
774
810
  # conformance 산출물은 task-level(<task_root>/qa)에 있어 planning/
775
811
  # implementation/final-verification 가 공유한다. report_path 는
@@ -792,8 +828,9 @@ def _validate_conformance(report_path: Path, failures: list[str],
792
828
  if schema_errors:
793
829
  failures.extend(f"conformance manifest: {e}" for e in schema_errors)
794
830
  return
795
- results = _load_conformance_results(qa_dir, manifest)
796
- for verdict in evaluate_conformance(manifest, results):
831
+ scoped = _scope_manifest_entries(manifest, _implementation_stage_name(run_dir))
832
+ results = _load_conformance_results(qa_dir, scoped)
833
+ for verdict in evaluate_conformance(scoped, results):
797
834
  if not verdict.ok:
798
835
  failures.append(
799
836
  f"conformance gate BLOCKING for stage {verdict.stage_key}: "
@@ -803,13 +840,14 @@ def _validate_conformance(report_path: Path, failures: list[str],
803
840
  )
804
841
  changed_files = _parse_diff_summary_files(report_path.read_text(encoding="utf-8"))
805
842
  if changed_files:
806
- uncovered = detect_surfaces(changed_files, surface_patterns) - manifest_required_surfaces(manifest)
843
+ uncovered = detect_surfaces(changed_files, surface_patterns) - manifest_required_surfaces(scoped)
807
844
  if uncovered:
808
845
  failures.append(
809
846
  "conformance gate BLOCKING: implementation diff touches undeclared "
810
- f"surface(s) {sorted(uncovered)} — no stage declares `requires` for "
811
- "them. Declare a conformance entry (requires=[...]) for the touching "
812
- "stage, or an explicit exemption. (silent mock-green 방지 — DEV-9184)"
847
+ f"surface(s) {sorted(uncovered)} — no in-scope stage declares "
848
+ "`requires` for them. Declare a conformance entry (requires=[...]) "
849
+ "for the touching stage, or an explicit exemption. "
850
+ "(silent mock-green 방지 — DEV-9184)"
813
851
  )
814
852
 
815
853
 
@@ -986,7 +1024,7 @@ def validate_team_state_usage(team_state: dict, failures: list[str]) -> None:
986
1024
  if not summary or not summary.get("collectedAt"):
987
1025
  failures.append(
988
1026
  "team-state.usageSummary is empty — Phase 7 token-usage collection was skipped. "
989
- "Run `python3 scripts/okstra-token-usage.py <team-state> --write --summary "
1027
+ "Run `okstra token-usage <team-state> --write --summary "
990
1028
  "--substitute-data <final-report>`."
991
1029
  )
992
1030
  return
@@ -1399,11 +1437,13 @@ def _validate_final_verification_consistency(data: dict, failures: list[str]) ->
1399
1437
  f"final-verification: verificationScope must be `whole-task` or "
1400
1438
  f"`single-stage`, got {scope!r}."
1401
1439
  )
1402
- if scope == "single-stage" and "release-handoff" in routing:
1440
+ if (scope == "single-stage" and "release-handoff" in routing
1441
+ and "release-handoff(stage-group)" not in routing):
1403
1442
  failures.append(
1404
1443
  "final-verification: verificationScope `single-stage` cannot recommend "
1405
- "release-handoff routing — single-stage is a partial verification and "
1406
- "release-handoff requires whole-task verification."
1444
+ "plain release-handoff routing — a single-stage accepted verdict may "
1445
+ "only route to `release-handoff(stage-group)` (partial-PR mode); "
1446
+ "whole-task release-handoff requires whole-task verification."
1407
1447
  )
1408
1448
 
1409
1449
 
@@ -1,6 +1,58 @@
1
1
  import { spawn } from "node:child_process";
2
+ import { existsSync } from "node:fs";
3
+ import { join, resolve as resolvePath } from "node:path";
4
+ import { fileURLToPath } from "node:url";
2
5
  import { buildPythonpath, resolvePaths } from "./paths.mjs";
3
6
 
7
+ function resolveInstalledScript(paths, scriptName) {
8
+ // Prefer the installed copy under ~/.okstra/bin (what production users run);
9
+ // fall back to the in-repo source when invoked from a checkout that has not
10
+ // been installed (dev / CI).
11
+ const installed = join(paths.bin, scriptName);
12
+ if (existsSync(installed)) return installed;
13
+ const repoRoot = fileURLToPath(new URL("..", import.meta.url));
14
+ const dev = resolvePath(repoRoot, "scripts", scriptName);
15
+ return existsSync(dev) ? dev : null;
16
+ }
17
+
18
+ // Thin spawn shim shared by every `okstra <cmd>` subcommand that fronts a
19
+ // `scripts/okstra-*.py` entry point. Centralizing it keeps PYTHONPATH wiring
20
+ // and installed/dev resolution in one place so skills call `okstra <cmd>`
21
+ // instead of emitting `python3 "$HOME/..."` (which breaks `Bash(okstra:*)`
22
+ // permission matching and prompts on every call).
23
+ export async function runInstalledScript({ scriptName, args, usage, emptyArgsCode = 2 }) {
24
+ if (args.length === 0) {
25
+ process.stdout.write(usage);
26
+ return emptyArgsCode;
27
+ }
28
+ // Only a bare `--help` / `-h` prints the wrapper usage. A `--help` that
29
+ // follows a subcommand (e.g. `error-log append-observed --help`) must reach
30
+ // the python helper so its own per-subcommand help shows through.
31
+ if (args.length === 1 && (args[0] === "--help" || args[0] === "-h")) {
32
+ process.stdout.write(usage);
33
+ return 0;
34
+ }
35
+ const paths = await resolvePaths();
36
+ const entry = resolveInstalledScript(paths, scriptName);
37
+ if (!entry) {
38
+ process.stderr.write(
39
+ `error: ${scriptName} not found — run 'okstra install' (or 'okstra ensure-installed') first\n`,
40
+ );
41
+ return 1;
42
+ }
43
+ return await new Promise((resolve) => {
44
+ const child = spawn("python3", [entry, ...args], {
45
+ stdio: "inherit",
46
+ env: { ...process.env, PYTHONPATH: buildPythonpath(paths) },
47
+ });
48
+ child.on("error", (err) => {
49
+ process.stderr.write(`error: failed to spawn python3: ${err.message}\n`);
50
+ resolve(1);
51
+ });
52
+ child.on("close", (code) => resolve(typeof code === "number" ? code : 1));
53
+ });
54
+ }
55
+
4
56
  export async function runPythonSnippet({ script, args = [], extraEnv = {} }) {
5
57
  const paths = await resolvePaths();
6
58
  return new Promise((resolve) => {
@@ -0,0 +1,19 @@
1
+ import { runInstalledScript } from "./_python-helper.mjs";
2
+
3
+ const USAGE = `okstra error-log — append okstra run error events to the run error log
4
+
5
+ Wraps the python helper (\`okstra-error-log.py\`) installed under
6
+ \`~/.okstra/bin/\` so skills and worker wrappers call \`okstra error-log\`
7
+ instead of emitting a \`python3 "$HOME/..."\` invocation (which breaks
8
+ \`Bash(okstra:*)\` permission matching and prompts on every call).
9
+
10
+ Usage:
11
+ okstra error-log <subcommand> [...] # e.g. append-observed / append-from-worker
12
+
13
+ All arguments are forwarded verbatim to the python helper. See
14
+ \`okstra error-log append-observed --help\` for the full option list.
15
+ `;
16
+
17
+ export async function run(args) {
18
+ return runInstalledScript({ scriptName: "okstra-error-log.py", args, usage: USAGE });
19
+ }
@@ -0,0 +1,31 @@
1
+ import { runPythonModule } from "./_python-helper.mjs";
2
+
3
+ const USAGE = `okstra git-reconcile — okstra 밖 git 히스토리 변경 후 기록 화해
4
+
5
+ A thin shim over \`python3 -m okstra_ctl.git_reconcile\`. 기본은 검사:
6
+ stale 항목을 JSON 으로 출력하고, confirm 항목이 남으면 exit 2.
7
+ patch-id 로 내용 동일성이 증명되는 항목(auto)은 --apply 로 일괄 보정되고,
8
+ 내용이 바뀐 항목(confirm)은 --stage/--use-ref 로만 보정된다.
9
+
10
+ Usage:
11
+ okstra git-reconcile --plan-run-root <dir> --project-id <id> \\
12
+ --task-group <g> --task-id <t> --work-category <c> \\
13
+ [--apply] [--stage <N> --use-ref <ref>] [--reset-anchor <ref>] [--json]
14
+
15
+ Exit codes:
16
+ 0 stale 없음 또는 보정 완료
17
+ 2 confirm 항목 잔존 (check: 확인 필요 / apply: 일부 미보정)
18
+ 1 error (resolve 실패 등)
19
+ `;
20
+
21
+ export async function run(args) {
22
+ if (args.includes("--help") || args.includes("-h")) {
23
+ process.stdout.write(USAGE);
24
+ return 0;
25
+ }
26
+ const { code } = await runPythonModule({
27
+ module: "okstra_ctl.git_reconcile",
28
+ args,
29
+ });
30
+ return code ?? 1;
31
+ }
@@ -0,0 +1,30 @@
1
+ import { runPythonModule } from "./_python-helper.mjs";
2
+
3
+ const USAGE = `okstra handoff — release-handoff stage-group 보조 (자격/수집/기록)
4
+
5
+ A thin shim over \`python3 -m okstra_ctl.handoff\`. JSON 출력.
6
+
7
+ Usage:
8
+ okstra handoff eligible --plan-run-root <dir> --approved-plan <md>
9
+ okstra handoff assemble --plan-run-root <dir> --approved-plan <md> \\
10
+ --project-root <dir> --project-id <id> --task-group <g> --task-id <t> \\
11
+ --work-category <c> --stages 2,3 --base <branch>
12
+ okstra handoff record-verified --plan-run-root <dir> --stage <N> \\
13
+ --report-path <md> --data-json <json>
14
+ okstra handoff record-pr --plan-run-root <dir> --stages 2,3 \\
15
+ --branch <b> --url <u>
16
+
17
+ Exit codes: 0 ok / 1 자격·전제 위반 / 2 stage 간 merge 충돌(conflicts 동봉)
18
+ `;
19
+
20
+ export async function run(args) {
21
+ if (args.includes("--help") || args.includes("-h")) {
22
+ process.stdout.write(USAGE);
23
+ return 0;
24
+ }
25
+ const { code } = await runPythonModule({
26
+ module: "okstra_ctl.handoff",
27
+ args,
28
+ });
29
+ return code ?? 1;
30
+ }
@@ -0,0 +1,22 @@
1
+ import { runInstalledScript } from "./_python-helper.mjs";
2
+
3
+ const USAGE = `okstra inject-report-index — add the top-of-report Index + scroll anchors to a report
4
+
5
+ Wraps the python helper (\`okstra-inject-report-index.py\`) installed under
6
+ \`~/.okstra/bin/\` so skills call \`okstra inject-report-index\` instead of
7
+ emitting a \`python3 "$HOME/..."\` invocation (which breaks \`Bash(okstra:*)\`
8
+ permission matching and prompts on every call).
9
+
10
+ Usage:
11
+ okstra inject-report-index <markdown-path> [--report-language <en|ko>]
12
+
13
+ All arguments are forwarded verbatim to the python helper.
14
+ `;
15
+
16
+ export async function run(args) {
17
+ return runInstalledScript({
18
+ scriptName: "okstra-inject-report-index.py",
19
+ args,
20
+ usage: USAGE,
21
+ });
22
+ }
@@ -0,0 +1,22 @@
1
+ import { runInstalledScript } from "./_python-helper.mjs";
2
+
3
+ const USAGE = `okstra render-final-report — render the markdown sibling of a final-report data.json
4
+
5
+ Wraps the python helper (\`okstra-render-final-report.py\`) installed under
6
+ \`~/.okstra/bin/\` so skills call \`okstra render-final-report\` instead of
7
+ emitting a \`python3 "$HOME/..."\` invocation (which breaks \`Bash(okstra:*)\`
8
+ permission matching and prompts on every call).
9
+
10
+ Usage:
11
+ okstra render-final-report <path-to-final-report.data.json>
12
+
13
+ The argument is forwarded verbatim to the python helper.
14
+ `;
15
+
16
+ export async function run(args) {
17
+ return runInstalledScript({
18
+ scriptName: "okstra-render-final-report.py",
19
+ args,
20
+ usage: USAGE,
21
+ });
22
+ }
@@ -1,18 +1,14 @@
1
- import { spawn } from "node:child_process";
2
- import { existsSync } from "node:fs";
3
- import { resolve as resolvePath } from "node:path";
4
- import { fileURLToPath } from "node:url";
5
- import { resolvePaths } from "./paths.mjs";
1
+ import { runInstalledScript } from "./_python-helper.mjs";
6
2
 
7
- const USAGE = `okstra render-views — render slim AI + self-contained HTML views of a final-report
3
+ const USAGE = `okstra render-views — render the self-contained HTML view of a final-report
8
4
 
9
5
  A thin spawn shim over \`scripts/okstra-render-report-views.py\` (installed
10
6
  at \`$HOME/.okstra/bin/okstra-render-report-views.py\`). Reads the final-
11
- report MD and writes two siblings:
7
+ report MD and writes a single sibling:
12
8
 
13
- <stem>.slim.md — token-saving AI consumption copy
14
9
  <stem>.html — single-file self-contained human view with form
15
- controls on §5 clarification rows
10
+ controls on §5 clarification rows (skipped when the
11
+ report has no §5 C-* clarification rows)
16
12
 
17
13
  Usage:
18
14
  okstra render-views <path-to-final-report.md>
@@ -23,45 +19,10 @@ When the optional flags are omitted the script infers from the report
23
19
  path and its '- Task Type:' / '- Task Key:' lines.
24
20
  `;
25
21
 
26
- function resolveEntrypoint(paths) {
27
- // Prefer the installed copy under ~/.okstra/bin (what production users
28
- // see); fall back to the in-repo dev source when running from a
29
- // checkout that hasn't been installed.
30
- const installed = resolvePath(paths.home, "bin", "okstra-render-report-views.py");
31
- if (existsSync(installed)) return installed;
32
- const here = fileURLToPath(new URL("..", import.meta.url));
33
- const dev = resolvePath(here, "scripts", "okstra-render-report-views.py");
34
- if (existsSync(dev)) return dev;
35
- return null;
36
- }
37
-
38
22
  export async function run(args) {
39
- if (args.includes("--help") || args.includes("-h")) {
40
- process.stdout.write(USAGE);
41
- return 0;
42
- }
43
- if (args.length === 0) {
44
- process.stderr.write("error: missing <path-to-final-report.md>\n");
45
- process.stderr.write(USAGE);
46
- return 2;
47
- }
48
- const paths = await resolvePaths();
49
- const entry = resolveEntrypoint(paths);
50
- if (!entry) {
51
- process.stderr.write(
52
- "error: okstra-render-report-views.py not found. " +
53
- "Run `okstra install` to install the runtime.\n",
54
- );
55
- return 1;
56
- }
57
- return await new Promise((res) => {
58
- const child = spawn("python3", [entry, ...args], {
59
- stdio: ["ignore", "inherit", "inherit"],
60
- });
61
- child.on("error", (err) => {
62
- process.stderr.write(`error: ${err.message}\n`);
63
- res(1);
64
- });
65
- child.on("close", (code) => res(code ?? 0));
23
+ return runInstalledScript({
24
+ scriptName: "okstra-render-report-views.py",
25
+ args,
26
+ usage: USAGE,
66
27
  });
67
28
  }
@@ -0,0 +1,23 @@
1
+ import { runInstalledScript } from "./_python-helper.mjs";
2
+
3
+ const USAGE = `okstra spawn-followups — create follow-up task bundles from a final report
4
+
5
+ Wraps the python helper (\`okstra-spawn-followups.py\`) installed under
6
+ \`~/.okstra/bin/\` so skills call \`okstra spawn-followups\` instead of
7
+ emitting a \`python3 "$HOME/..."\` invocation (which breaks \`Bash(okstra:*)\`
8
+ permission matching and prompts on every call).
9
+
10
+ Usage:
11
+ okstra spawn-followups <args...>
12
+
13
+ All arguments are forwarded verbatim to the python helper. See
14
+ \`okstra spawn-followups --help\` for the full option list.
15
+ `;
16
+
17
+ export async function run(args) {
18
+ return runInstalledScript({
19
+ scriptName: "okstra-spawn-followups.py",
20
+ args,
21
+ usage: USAGE,
22
+ });
23
+ }
@@ -1,7 +1,4 @@
1
- import { spawn } from "node:child_process";
2
- import { join } from "node:path";
3
- import { promises as fs } from "node:fs";
4
- import { resolvePaths } from "./paths.mjs";
1
+ import { runInstalledScript } from "./_python-helper.mjs";
5
2
 
6
3
  const USAGE = `okstra token-usage — collect token usage for a run
7
4
 
@@ -15,37 +12,9 @@ Usage:
15
12
  okstra token-usage <state-file> [--write] [--summary] [...]
16
13
 
17
14
  Arguments and flags after the state-file path are forwarded verbatim to
18
- the python helper. See \`python3 ~/.okstra/bin/okstra-token-usage.py --help\`
19
- for the full option list.
15
+ the python helper. See \`okstra token-usage --help\` for the full option list.
20
16
  `;
21
17
 
22
18
  export async function run(args) {
23
- if (args.length === 0 || args.includes("--help") || args.includes("-h")) {
24
- process.stdout.write(USAGE);
25
- return args.length === 0 ? 2 : 0;
26
- }
27
-
28
- const paths = await resolvePaths();
29
- const script = join(paths.bin, "okstra-token-usage.py");
30
-
31
- try {
32
- await fs.access(script);
33
- } catch {
34
- process.stderr.write(
35
- `error: ${script} not found — run 'okstra install' (or 'okstra ensure-installed') first\n`,
36
- );
37
- return 1;
38
- }
39
-
40
- return await new Promise((resolve) => {
41
- const child = spawn("python3", [script, ...args], {
42
- stdio: "inherit",
43
- env: { ...process.env, PYTHONPATH: paths.pythonpath },
44
- });
45
- child.on("error", (err) => {
46
- process.stderr.write(`error: failed to spawn python3: ${err.message}\n`);
47
- resolve(1);
48
- });
49
- child.on("close", (code) => resolve(typeof code === "number" ? code : 1));
50
- });
19
+ return runInstalledScript({ scriptName: "okstra-token-usage.py", args, usage: USAGE });
51
20
  }