npm - okstra - Versions diffs - 0.67.0 → 0.69.0 - Mend

okstra 0.67.0 → 0.69.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/bin/okstra +25 -0
package/docs/kr/architecture.md +17 -1
package/docs/superpowers/plans/2026-06-10-concurrent-run-team-guard.md +456 -0
package/docs/superpowers/plans/2026-06-10-git-reconcile-stale-sha-recovery.md +1408 -0
package/docs/superpowers/plans/2026-06-10-stage-group-handoff.md +1572 -0
package/docs/superpowers/specs/2026-06-06-stage-worktree-isolation-design.md +1 -1
package/docs/superpowers/specs/2026-06-10-concurrent-run-team-guard-design.md +107 -0
package/docs/superpowers/specs/2026-06-10-git-reconcile-stale-sha-recovery-design.md +105 -0
package/docs/superpowers/specs/2026-06-10-stage-group-handoff-design.md +156 -0
package/package.json +1 -1
package/runtime/BUILD.json +2 -2
package/runtime/agents/SKILL.md +8 -7
package/runtime/agents/workers/claude-worker.md +1 -1
package/runtime/agents/workers/codex-worker.md +3 -3
package/runtime/agents/workers/gemini-worker.md +3 -3
package/runtime/agents/workers/report-writer-worker.md +2 -2
package/runtime/prompts/launch.template.md +2 -2
package/runtime/prompts/profiles/_common-contract.md +6 -6
package/runtime/prompts/profiles/_implementation-deliverable.md +1 -0
package/runtime/prompts/profiles/_implementation-executor.md +3 -1
package/runtime/prompts/profiles/_implementation-verifier.md +1 -0
package/runtime/prompts/profiles/final-verification.md +3 -2
package/runtime/prompts/profiles/improvement-discovery.md +1 -1
package/runtime/prompts/profiles/release-handoff.md +12 -5
package/runtime/prompts/wizard/prompts.ko.json +5 -5
package/runtime/python/okstra_ctl/conformance.py +17 -0
package/runtime/python/okstra_ctl/consumers.py +72 -5
package/runtime/python/okstra_ctl/git_reconcile.py +322 -0
package/runtime/python/okstra_ctl/handoff.py +348 -0
package/runtime/python/okstra_ctl/render.py +44 -2
package/runtime/python/okstra_ctl/run.py +175 -44
package/runtime/python/okstra_ctl/wizard.py +89 -22
package/runtime/python/okstra_ctl/worktree.py +28 -0
package/runtime/python/okstra_ctl/worktree_registry.py +40 -9
package/runtime/python/okstra_token_usage/collect.py +27 -0
package/runtime/skills/okstra-context-loader/SKILL.md +1 -1
package/runtime/skills/okstra-convergence/SKILL.md +3 -3
package/runtime/skills/okstra-report-writer/SKILL.md +8 -8
package/runtime/skills/okstra-run/SKILL.md +43 -3
package/runtime/skills/okstra-team-contract/SKILL.md +7 -7
package/runtime/validators/validate-run.py +51 -11
package/src/_python-helper.mjs +52 -0
package/src/error-log.mjs +19 -0
package/src/git-reconcile.mjs +31 -0
package/src/handoff.mjs +30 -0
package/src/inject-report-index.mjs +22 -0
package/src/render-final-report.mjs +22 -0
package/src/render-views.mjs +9 -48
package/src/spawn-followups.mjs +23 -0
package/src/token-usage.mjs +3 -34

package/runtime/skills/okstra-run/SKILL.md CHANGED Viewed

@@ -45,8 +45,9 @@ The wizard tells you *which UI to use* via `kind` (and the optional `multi` flag
 - `kind: "pick_group"` → render a SINGLE `AskUserQuestion` whose questions array maps 1:1 to the wizard's `questions[]`. For each entry use `questions[].label`, `questions[].options[].label`, and `multiSelect: questions[].multi`. Collect the user's chosen `options[].value` per tab, build a JSON object keyed by each `questions[].step`, and submit it as a single literal `--answer '{"lead_model":"opus","claude_model":"default",...}'`. A tab the user leaves at its default still gets its `"default"`/`""` value in the JSON. Never split a `pick_group` into multiple `AskUserQuestion` calls — the wizard already capped it at 4 tabs and emits any remainder as the next prompt.
 - `kind: "text"` → write `label` as a plain text message and consume the user's NEXT message as the answer.
 - `kind: "done"` → input collection finished; move to Step 5.
+- `kind: "aborted"` → the user picked 중단; the wizard is terminally cancelled. Tell the user on one short line that the run setup was aborted, delete the state file (`rm` with the literal path), and stop this skill — do NOT call `render-args` or `render-bundle` (the wizard rejects `render-args` on an aborted state).
-The `branch_confirm` step (shown just before `confirm`) is a normal `pick` step and is rendered the same way — no special handling needed.
+The `branch_confirm` step (shown just before `confirm`) is a normal `pick` step and is rendered the same way — no special handling needed. Its options always include `중단` (abort); `base-ref 다시 고르기` (edit) appears only when a new worktree would be created.
 Never invent additional questions. Never reorder. **Never drop, hide, or merge a `pick` / `pick_group` option** — render every `options[]` entry as its own selectable `AskUserQuestion` choice, including entries that carry a `(default)` / `(recommended)` suffix. Do NOT collapse a multi-option pick into a "recommended + 직접 입력 / Other" shortlist: the wizard's `options[]` array IS the complete, authoritative choice set. Example: the `executor` step always emits `claude` / `codex` / `gemini` — show all three, never just `claude`. The run-prompt recommendation rule (1–2 추천 + 직접 입력) applies ONLY to prompts this skill authors itself (e.g. the conformance-waiver picker), never to wizard-provided `options[]`. Never use `AskUserQuestion` for `text` prompts — the wizard explicitly chose `text` to avoid the picker-Other re-render lag.
@@ -92,7 +93,7 @@ Output: the same `{ok, next}` JSON described above. The first `next` is always `
 ## Step 3: Run the prompt loop
-Repeat until `next.kind == "done"`:
+Repeat until `next.kind == "done"` (or `"aborted"` — terminal cancel, see "How the wizard talks to you"):
 1. **Render** the prompt according to `kind` (and `multi` for pick):
    - `pick` + `multi: false` → `AskUserQuestion` with `multiSelect: false`, `label`, and `options`. The user's chosen option's `value` is the answer string.
@@ -178,7 +179,7 @@ okstra render-bundle \
   --pr-template-path "<args.pr-template-path>"
 ```
-`render-bundle` auto-supplies `--workspace-root` and forces `--render-only`. Stdout prints `okstra task root:`, `okstra instruction-set:`, and the full rendered lead prompt. Parse the labelled lines for `TASK_ROOT` and `INSTRUCTION_SET_PATH`.
+`render-bundle` auto-supplies `--workspace-root` and forces `--render-only`. Stdout prints `okstra task root:`, `okstra instruction-set:`, and the full rendered lead prompt. Parse the labelled lines for `TASK_ROOT` and `INSTRUCTION_SET_PATH`. Also watch for an optional `okstra concurrent-run stages:` label line — present only when a concurrent run is detected (see "동시-run 감지 분기" below).
 The python function underneath is mutex-protected (`~/.okstra/.locks/<task-key>.lock`), writes `run-context-*.json` + `run-inputs-*.json` + all manifests + discovery files, and registers the run in `~/.okstra/recent.jsonl` with status `prepared`.
@@ -196,6 +197,45 @@ This is **never** a lead/worker self-exemption — only the user may waive. Offe
 When the user picks a waiver, append `--qa-waiver "<stageKey>:<reason>"` to the `render-bundle` invocation above. Omit the flag entirely otherwise (do **not** pass `--qa-waiver ""`). A malformed value or unknown `<stageKey>` aborts `render-bundle` with a `PrepareError`.
+### 동시-run 감지 분기 (concurrent-run)
+`render-bundle` stdout 에 `okstra concurrent-run stages: <stages>` 라벨 라인이
+있으면(같은 task-key 의 다른 implementation run 이 `<stages>` 를 점유 중), launch
+프롬프트는 이미 "Concurrent-run: no-team background" 게이트로 렌더돼 있다. 이 라인이
+없으면 동시-run 이 아니므로 이 분기를 건너뛴다. 라인이 있으면 dispatch 전에
+사용자에게 3-옵션 recommendation picker 를 제시한다 (run-prompt recommendation 규칙:
+1–2 추천 + 직접 입력; 이 picker 는 스킬이 author 하는 것이라 wizard `options[]`
+제약과 무관):
+1. (추천) 이대로 no-team background 로 진행 — 이미 렌더된 bundle 을 그대로 사용한다.
+   team 을 만들지 않아 `~/.claude/teams/` race 를 회피한다(Teams split-pane 관찰성만 포기).
+2. 대기 — 지금 dispatch 를 보류한다. stage worktree·run-context 는 보존되므로,
+   점유 중인 다른 run 종료 후 같은 stage 를 resume 으로 재개하면 그때는 정상 team
+   경로다. resume 명령(`okstra-inspect` history → resume)을 사용자에게 출력한다.
+3. 직접 입력.
+### Stale git SHA recovery (git-reconcile gate)
+`render-bundle` 이 `Recorded stage SHAs no longer match the git history` 를 포함한
+`PrepareError` 로 실패하면, okstra 밖에서 git 히스토리가 바뀐 것이다(rebase /
+squash / 리뷰 반영 amend / branch 삭제). 절대 registry/consumers 를 손으로
+고치지 말고 다음 순서로 회복한다:
+1. 에러 메시지에 인쇄된 `okstra git-reconcile … --check --json` 명령을 그대로 실행해
+   stale 리포트를 얻는다. (patch-id 로 내용 동일성이 증명되는 항목은 prepare
+   가 이미 자동 화해했으므로, 여기 남는 것은 confirm 항목뿐이다.)
+2. confirm 항목별로 사용자에게 3-옵션 picker 를 제시한다:
+   - **`stage-<N>` branch 의 현재 tip 으로 재기록 (추천)** — 리뷰 반영 등
+     의도된 수정이 그 branch 에 있을 때.
+   - **다른 ref 직접 입력** — 사용자가 commit/branch/tag 를 직접 지정.
+   - **중단** — 회복하지 않고 run 을 멈춘다.
+3. 선택된 ref 로 `okstra git-reconcile … --apply --stage <N> --use-ref <ref>`
+   를 실행한 뒤, 실패했던 `render-bundle` 을 동일 인자로 재시도한다.
+anchor(`implementation_base_commit`)가 unresolvable 로 보고되면 같은 명령의
+`--reset-anchor <ref>` 를 사용자 확인 후 실행한다. picker 없이 confirm 항목을
+보정하는 것은 금지 — 런타임도 `--use-ref` 없는 confirm 보정을 거부한다.
 ## Step 6: Take over as Claude lead
 Read `<INSTRUCTION_SET_PATH>/claude-execution-prompt.md` verbatim and enter `Claude lead` mode. The lead prompt now points to compact intake artifacts first (`active-run-context`, `analysis-profile.md`, and `analysis-packet.md`); full source files such as `analysis-material.md`, `reference-expectations.md`, and `final-report-template.md` are lazy/fallback inputs. Follow the rendered prompt order, do not preempt it.

package/runtime/skills/okstra-team-contract/SKILL.md CHANGED Viewed

@@ -54,7 +54,7 @@ Only workers selected from `recommendedWorkers` in `task-manifest.json` and `res
 ## Operating Rules
-0. **TeamCreate ordering (BLOCKING).** Before issuing any `Agent` dispatch that includes `team_name`, Lead MUST have called `TeamCreate(team_name: "okstra-<task-key>", ...)` in this run and recorded the outcome in team-state as `teamCreate: { attempted: true, status: "ok"|"error", error?: <message> }`. If the Agent tool rejects a dispatch with `"team must be created first or call without team_name"` / `"team을 먼저 생성하거나 team_name 없이 호출해야 합니다"`, the correct response is to go back to Phase 3 and call `TeamCreate` — NOT to strip `team_name` and retry. The no-`team_name` Phase 5 fallback is only legal when `teamCreate.status == "error"` is already recorded; otherwise stripping `team_name` silently degrades the run to in-process background dispatch and loses the Teams split-pane behavior. See [okstra agent SKILL.md Phase 3](../../agents/SKILL.md) for the full team-creation sequence.
+0. **TeamCreate ordering (BLOCKING).** Before issuing any `Agent` dispatch that includes `team_name`, Lead MUST have called `TeamCreate` with the exact team name from the launch prompt's Team Creation Gate block (`okstra-<task-key>`; implementation stage runs append `-s<N>`) in this run and recorded the outcome in team-state as `teamCreate: { attempted: true, status: "ok"|"error", error?: <message> }` plus the name as `teamName`. On a "team already exists" failure, follow the stale-team recovery in [okstra agent SKILL.md Phase 3 step 2-1](../../agents/SKILL.md) — never shell-delete `~/.claude/teams/...` on your own initiative. If the Agent tool rejects a dispatch with `"team must be created first or call without team_name"` / `"team을 먼저 생성하거나 team_name 없이 호출해야 합니다"`, the correct response is to go back to Phase 3 and call `TeamCreate` — NOT to strip `team_name` and retry. The no-`team_name` Phase 5 fallback is legal when `teamCreate.status == "error"` is already recorded, OR when the launch prompt's concurrent-run gate recorded `status: "skipped", reason: "concurrent-run"`; otherwise stripping `team_name` silently degrades the run to in-process background dispatch and loses the Teams split-pane behavior. See [okstra agent SKILL.md Phase 3](../../agents/SKILL.md) for the full team-creation sequence.
 1. `Claude lead` is responsible for orchestration, convergence supervision, and final-report review/approval. It never overrides worker analysis results, and it never authors the final-report file when `Report writer worker` is in the roster.
 2. `Report writer worker` is NOT an analysis worker. It is excluded from Phase 4/5 (initial analysis) and Phase 5.5 (convergence re-verification). It is spawned only in Phase 6 and is the **author** of the final-report file at `runs/<task-type>/reports/final-report-<task-type>-<seq>.md`.
 3. When `Report writer worker` is in the roster, Lead MUST dispatch it in Phase 6. The only legal lead-authored fallback is when a dispatch was attempted and recorded a terminal status of `error` / `timeout` / `not-run` with a concrete logged reason. Speculative reasons such as "session resume constraint" or "team is no longer alive" are NOT valid — Lead can always dispatch a fresh subagent (omit `team_name` if the team is gone).
@@ -303,7 +303,7 @@ Schema:
 Workers MUST omit `source` / `recordedAt` / `agent` / `agentRole` / `model` /
 `taskKey`. Claude lead fills those in when dumping the sidecar to the
 run-level errors log (`runs/<task-type>/logs/errors-<task-type>-<seq>.jsonl`)
-via `scripts/okstra-error-log.py append-from-worker`.
+via `okstra error-log append-from-worker`.
 Workers MUST use only `errorType: "tool-failure"` in the **sidecar file**.
@@ -312,7 +312,7 @@ run-level errors log path or their sidecar path from the
 `runs/<task-type>/...` template syntax. Both absolute paths are delivered
 by Lead via two dispatch-prompt header lines:
-- `**Errors log path:** <absolute path>` — run-level JSONL (`okstra-error-log.py append-observed --out ...`)
+- `**Errors log path:** <absolute path>` — run-level JSONL (`okstra error-log append-observed --out ...`)
 - `**Errors sidecar path:** <absolute path>` — per-worker JSON (`{ "schemaVersion": 1, "errors": [...] }`)
 Lead obtains both paths from the launch prompt's `## Run Logs (error-log
@@ -322,7 +322,7 @@ without proceeding — this is the contractual replacement for the previous
 "derive from template placeholders" behavior, which silently produced
 empty run-level error logs in production.
-- `cli-failure` events are recorded by the wrapper subagent itself (Codex / Gemini), but **directly to the run-level error log** via `okstra-error-log.py append-observed --error-type cli-failure ...` — NOT via the sidecar. The sidecar is an in-process tool-failure channel only.
+- `cli-failure` events are recorded by the wrapper subagent itself (Codex / Gemini), but **directly to the run-level error log** via `okstra error-log append-observed --error-type cli-failure ...` — NOT via the sidecar. The sidecar is an in-process tool-failure channel only.
 - **Wrapper invocation arity.** Both `okstra-codex-exec.sh` and `okstra-gemini-exec.sh` accept four required positional arguments plus an optional fifth `<role>`: `<project-root> <model> <prompt-path> <worktree-path> [<role>]`. The fourth (worktree) argument is **mandatory for implementation phase** and optional otherwise. For codex it becomes `--add-dir <worktree>` (sandbox write access); for gemini it is appended to `--include-directories`. Omitting it during implementation causes the codex sandbox to reject every Edit/Write targeting the worktree with EPERM. Workers extract the path from the `**Worktree:**` / `EXECUTOR_WORKTREE_PATH` / `cwd for every mutating command:` line in the lead prompt. The optional fifth `<role>` is folded into both the caller (worker) pane title `<cli>-<role>-<pid>` and the sibling trace-pane title `<cli>-<role>-<pid>-trace[from=<caller-pane-id>]` (e.g. `codex-worker-93421` ↔ `codex-worker-93421-trace[from=%5]`). `<pid>` is the wrapper's own PID and disambiguates concurrent dispatches of the same role; the embedded caller pane id keeps the trace ↔ worker correlation visible even when the worker pane's title is overwritten by the parent process (Claude Code's TUI emits OSC 2 title escape sequences on its own pane). Always pass the literal string `worker` so the dispatch is self-describing (the wrapper defaults to `worker` if omitted).
 - **Background dispatch + polling contract (Codex / Gemini wrappers).** Both wrapper subagents MUST dispatch `okstra-codex-exec.sh` / `okstra-gemini-exec.sh` via `Bash(run_in_background: true)` and poll with `BashOutput(bash_id)` until the shell reports `status == "completed"`, capped at 30 minutes (1800s) of wall-clock elapsed time. `BashOutput` itself is the wait primitive — call it back-to-back; do NOT insert a standalone `sleep` between polls. The Claude Code harness blocks `sleep` calls of 5 seconds or longer as a circumvention vector and explicitly forbids chaining shorter sleeps inside until-loops to work around the block. Workers that hit the contract bug must NOT self-recover with `until ...; do sleep 2; done` wrappers — that path violates the harness anti-circumvention rule, even though it superficially "works". The legacy "single foreground `Bash` with 120000ms timeout" rule, and the subsequent "60-second cadence with `sleep 60` between polls" rule, are both retired. The current rule applies in **every phase** (analysis runs typically complete in 1–2 `BashOutput` calls, so there is no regression for short jobs). Recording responsibilities:
   - Successful completion: return the wrapper's accumulated stdout from the final `BashOutput`. No log entry.
@@ -330,8 +330,8 @@ empty run-level error logs in production.
   - Polling cap reached: before `KillShell`, perform a one-shot **mtime-grace check** on the wrapper's live log (`<prompt>.log`). If the log was written within the last 90 seconds AND grace has not yet been applied this loop, extend the cap from 1800s → 2100s (one-shot +5min) and continue polling. Otherwise (log stale, OR grace already applied), call `KillShell(shell_id)`, record `cli-failure` with `--exit-code 124 --duration-ms <observed_ms> --message "<wrapper> exceeded polling cap (grace=<applied|not-applied>, last_mtime_age=<n>s)"`, then return the language-specific `*_CLI_TIMEOUT` sentinel. The grace exists to absorb token-budget spikes where the CLI is genuinely still producing output past the 30-minute mark; it is a one-shot soft extension, NOT a loop.
   - Token-usage matching is unaffected: the wrapper subagent stays alive throughout polling, so the wrapper's jsonl timestamp window continues to cover the underlying CLI rollout's full duration (see §"Token-usage accounting" below).
 - **No external timeout on wrapper subagents.** The codex/gemini wrapper subagent's polling loop (with optional mtime grace) is the SINGLE timeout authority for its dispatch. Lead MUST NOT impose a separate `Agent()` call timeout, an outer `Bash` wall-clock deadline, or any other mechanism that terminates the subagent before its own polling cap is reached. Doing so reproduces the historical failure mode that motivated this rule: Lead aborts the subagent at e.g. 18 minutes, the subagent returns nothing, and Lead classifies the role as "no response" while the underlying CLI was actively working. The wrapper's polling cap (30min + optional 5min grace) is calibrated so that, combined with Lead's redispatch policy (see "Lead Redispatch Policy on Result-Missing"), a recoverable single-run failure costs at most ~70 minutes of wall-clock — predictable enough to plan around. If a specific run requires a tighter cap, lower it in the wrapper subagent's polling contract (single source of truth), NOT by layering Lead-side timeouts.
-- `contract-violation` events (C) are recorded by Lead via `okstra-error-log.py append-observed --error-type contract-violation ...` after inspecting worker outputs.
-- Lead's responsibility regarding the sidecar is to dump it to the run-level error log via `okstra-error-log.py append-from-worker` after each worker terminates; Lead does not write into the sidecar.
+- `contract-violation` events (C) are recorded by Lead via `okstra error-log append-observed --error-type contract-violation ...` after inspecting worker outputs.
+- Lead's responsibility regarding the sidecar is to dump it to the run-level error log via `okstra error-log append-from-worker` after each worker terminates; Lead does not write into the sidecar.
 ## Convergence Phase Rules
@@ -406,7 +406,7 @@ okstra token-usage /abs/path/to/run/state/team-state-<task-type>-<seq>.json --wr
 `okstra token-usage` is a thin Node-side wrapper around the python helper installed at `~/.okstra/bin/okstra-token-usage.py`. Calling the python script directly with `python3 "$HOME/..."` is forbidden — the `$HOME` expansion breaks the literal-token permission match and forces a confirmation prompt every call.
 The script reads:
-- `~/.claude/projects/<encoded-cwd>/<sessionId>.jsonl` for the lead and every Claude-side worker (Claude worker, Report writer worker, plus the Claude wrappers around Codex/Gemini workers). Sessions are discovered by `teamName: okstra-<task-id>`, lead is identified by `lead.sessionId`, and other workers are identified by `agentName` (e.g. `claude-worker`, `codex-worker`, `gemini-worker`, `report-writer`). **For this `agentName` match to work, Lead MUST set the Agent `name` arg to `<workerId>-worker` on every dispatch** (see [agents SKILL.md Phase 4 — "Agent `name` on dispatch"](../../agents/SKILL.md)); a worker dispatched without `name` carries no `agentName`, so the collector cannot attribute its session and records it `unavailable` (now surfaced as a `usageSummary.unattributedTeamSessions` entry rather than dropped silently).
+- `~/.claude/projects/<encoded-cwd>/<sessionId>.jsonl` for the lead and every Claude-side worker (Claude worker, Report writer worker, plus the Claude wrappers around Codex/Gemini workers). Sessions are discovered by the recorded team-state `teamName`, lead is identified by `lead.sessionId`, and other workers are identified by `agentName` (e.g. `claude-worker`, `codex-worker`, `gemini-worker`, `report-writer`). **For this `agentName` match to work, Lead MUST set the Agent `name` arg to `<workerId>-worker` on every dispatch** (see [agents SKILL.md Phase 4 — "Agent `name` on dispatch"](../../agents/SKILL.md)); a worker dispatched without `name` carries no `agentName`, so the collector cannot attribute its session and records it `unavailable` (now surfaced as a `usageSummary.unattributedTeamSessions` entry rather than dropped silently).
 - `~/.codex/sessions/Y/M/D/rollout-*.jsonl` for the underlying Codex CLI session (matched by `cwd` and timestamp window of the wrapper subagent). Last `event_msg.token_count.total_token_usage.total_tokens` is the session total.
 - `~/.gemini/tmp/<project>/chats/session-*.json` for the underlying Gemini CLI session. Sum of per-message `tokens.total`.

package/runtime/validators/validate-run.py CHANGED Viewed

@@ -621,7 +621,7 @@ def _scan_token_usage_summary(content: str, failures: list[str]) -> None:
                     failures.append(
                         f"Token Usage Summary row `{label_cell or '<unlabeled>'}` has "
                         f"a zero value `{stripped}` — no okstra run consumes zero "
-                        "tokens. Re-run `python3 scripts/okstra-token-usage.py "
+                        "tokens. Re-run `okstra token-usage "
                         "<team-state> --write --summary --substitute-data "
                         "<report-path>` to repopulate from session jsonls. The "
                         "Codex/Gemini CLI row is the only place `$0.00` is "
@@ -748,6 +748,36 @@ def _parse_diff_summary_files(content: str) -> list[str]:
     return _DIFF_ROW_PATH_RE.findall(section.group(0))
+_STAGE_RUN_DIR_RE = re.compile(r"^stage-\d+$")
+def _implementation_stage_name(run_dir: Path) -> str | None:
+    """implementation stage 격리 run(`runs/implementation/stage-<N>`)이면
+    `stage-<N>` 을 반환. 그 외(final-verification 등 task-type 레벨 run)는
+    None — whole-task 스코프."""
+    if run_dir.parent.name == "implementation" and _STAGE_RUN_DIR_RE.match(run_dir.name):
+        return run_dir.name
+    return None
+def _scope_manifest_entries(manifest: dict, stage_name: str | None) -> dict:
+    """게이트 평가 대상 entry 를 run 스코프로 좁힌 manifest 를 반환.
+    implementation 은 한 run = 한 stage 이므로 자기 stageKey
+    (`<task-id>-stage-<N>`) entry 만 게이트한다 — 다른 stage 는 각자의
+    implementation run / final-verification(whole-task) 이 검증한다.
+    suffix 매칭인 이유: stageKey 의 `<task-id>` 는 planning 이 쓴 원문이라
+    task 디렉터리 segment 와 표기가 다를 수 있다.
+    """
+    if stage_name is None:
+        return manifest
+    entries = [
+        e for e in manifest.get("entries", [])
+        if isinstance(e, dict) and str(e.get("stageKey", "")).endswith(f"-{stage_name}")
+    ]
+    return {"entries": entries}
 def _task_root_from_run_dir(run_dir: Path) -> Path:
     """run_dir 에서 `runs` 디렉터리를 앵커로 task_root 를 복원한다.
@@ -770,6 +800,12 @@ def _validate_conformance(report_path: Path, failures: list[str],
     가 없다는 뜻 — 선언을 강제하는 것은 planning 계약(Phase 4)의 몫). 매니페스트가
     있으면 결과 사이드카와 함께 evaluate_conformance 로 판정하고 BLOCKING verdict
     를 run 검증 실패로 승격한다. WAIVED(conditional)/EXEMPT 는 통과시킨다.
+    게이트 스코프: implementation stage 격리 run 은 자기 stage entry 만(결과
+    게이트·diff-surface 교차검증 모두 — 미래 stage 의 미실행이 현재 run 을
+    막으면 안 된다), final-verification 등 task-type 레벨 run 은 전 entry
+    (whole-task). prompts/profiles/_implementation-verifier.md §Tier 3 /
+    final-verification.md 의 스코프 계약과 동형.
     """
     # conformance 산출물은 task-level(<task_root>/qa)에 있어 planning/
     # implementation/final-verification 가 공유한다. report_path 는
@@ -792,8 +828,9 @@ def _validate_conformance(report_path: Path, failures: list[str],
     if schema_errors:
         failures.extend(f"conformance manifest: {e}" for e in schema_errors)
         return
-    results = _load_conformance_results(qa_dir, manifest)
-    for verdict in evaluate_conformance(manifest, results):
+    scoped = _scope_manifest_entries(manifest, _implementation_stage_name(run_dir))
+    results = _load_conformance_results(qa_dir, scoped)
+    for verdict in evaluate_conformance(scoped, results):
         if not verdict.ok:
             failures.append(
                 f"conformance gate BLOCKING for stage {verdict.stage_key}: "
@@ -803,13 +840,14 @@ def _validate_conformance(report_path: Path, failures: list[str],
             )
     changed_files = _parse_diff_summary_files(report_path.read_text(encoding="utf-8"))
     if changed_files:
-        uncovered = detect_surfaces(changed_files, surface_patterns) - manifest_required_surfaces(manifest)
+        uncovered = detect_surfaces(changed_files, surface_patterns) - manifest_required_surfaces(scoped)
         if uncovered:
             failures.append(
                 "conformance gate BLOCKING: implementation diff touches undeclared "
-                f"surface(s) {sorted(uncovered)} — no stage declares `requires` for "
-                "them. Declare a conformance entry (requires=[...]) for the touching "
-                "stage, or an explicit exemption. (silent mock-green 방지 — DEV-9184)"
+                f"surface(s) {sorted(uncovered)} — no in-scope stage declares "
+                "`requires` for them. Declare a conformance entry (requires=[...]) "
+                "for the touching stage, or an explicit exemption. "
+                "(silent mock-green 방지 — DEV-9184)"
             )
@@ -986,7 +1024,7 @@ def validate_team_state_usage(team_state: dict, failures: list[str]) -> None:
     if not summary or not summary.get("collectedAt"):
         failures.append(
             "team-state.usageSummary is empty — Phase 7 token-usage collection was skipped. "
-            "Run `python3 scripts/okstra-token-usage.py <team-state> --write --summary "
+            "Run `okstra token-usage <team-state> --write --summary "
             "--substitute-data <final-report>`."
         )
         return
@@ -1399,11 +1437,13 @@ def _validate_final_verification_consistency(data: dict, failures: list[str]) ->
             f"final-verification: verificationScope must be `whole-task` or "
             f"`single-stage`, got {scope!r}."
         )
-    if scope == "single-stage" and "release-handoff" in routing:
+    if (scope == "single-stage" and "release-handoff" in routing
+            and "release-handoff(stage-group)" not in routing):
         failures.append(
             "final-verification: verificationScope `single-stage` cannot recommend "
-            "release-handoff routing — single-stage is a partial verification and "
-            "release-handoff requires whole-task verification."
+            "plain release-handoff routing — a single-stage accepted verdict may "
+            "only route to `release-handoff(stage-group)` (partial-PR mode); "
+            "whole-task release-handoff requires whole-task verification."
         )

package/src/_python-helper.mjs CHANGED Viewed

@@ -1,6 +1,58 @@
 import { spawn } from "node:child_process";
+import { existsSync } from "node:fs";
+import { join, resolve as resolvePath } from "node:path";
+import { fileURLToPath } from "node:url";
 import { buildPythonpath, resolvePaths } from "./paths.mjs";
+function resolveInstalledScript(paths, scriptName) {
+  // Prefer the installed copy under ~/.okstra/bin (what production users run);
+  // fall back to the in-repo source when invoked from a checkout that has not
+  // been installed (dev / CI).
+  const installed = join(paths.bin, scriptName);
+  if (existsSync(installed)) return installed;
+  const repoRoot = fileURLToPath(new URL("..", import.meta.url));
+  const dev = resolvePath(repoRoot, "scripts", scriptName);
+  return existsSync(dev) ? dev : null;
+}
+// Thin spawn shim shared by every `okstra <cmd>` subcommand that fronts a
+// `scripts/okstra-*.py` entry point. Centralizing it keeps PYTHONPATH wiring
+// and installed/dev resolution in one place so skills call `okstra <cmd>`
+// instead of emitting `python3 "$HOME/..."` (which breaks `Bash(okstra:*)`
+// permission matching and prompts on every call).
+export async function runInstalledScript({ scriptName, args, usage, emptyArgsCode = 2 }) {
+  if (args.length === 0) {
+    process.stdout.write(usage);
+    return emptyArgsCode;
+  }
+  // Only a bare `--help` / `-h` prints the wrapper usage. A `--help` that
+  // follows a subcommand (e.g. `error-log append-observed --help`) must reach
+  // the python helper so its own per-subcommand help shows through.
+  if (args.length === 1 && (args[0] === "--help" || args[0] === "-h")) {
+    process.stdout.write(usage);
+    return 0;
+  }
+  const paths = await resolvePaths();
+  const entry = resolveInstalledScript(paths, scriptName);
+  if (!entry) {
+    process.stderr.write(
+      `error: ${scriptName} not found — run 'okstra install' (or 'okstra ensure-installed') first\n`,
+    );
+    return 1;
+  }
+  return await new Promise((resolve) => {
+    const child = spawn("python3", [entry, ...args], {
+      stdio: "inherit",
+      env: { ...process.env, PYTHONPATH: buildPythonpath(paths) },
+    });
+    child.on("error", (err) => {
+      process.stderr.write(`error: failed to spawn python3: ${err.message}\n`);
+      resolve(1);
+    });
+    child.on("close", (code) => resolve(typeof code === "number" ? code : 1));
+  });
+}
 export async function runPythonSnippet({ script, args = [], extraEnv = {} }) {
   const paths = await resolvePaths();
   return new Promise((resolve) => {

package/src/error-log.mjs ADDED Viewed

@@ -0,0 +1,19 @@
+import { runInstalledScript } from "./_python-helper.mjs";
+const USAGE = `okstra error-log — append okstra run error events to the run error log
+Wraps the python helper (\`okstra-error-log.py\`) installed under
+\`~/.okstra/bin/\` so skills and worker wrappers call \`okstra error-log\`
+instead of emitting a \`python3 "$HOME/..."\` invocation (which breaks
+\`Bash(okstra:*)\` permission matching and prompts on every call).
+Usage:
+  okstra error-log <subcommand> [...]      # e.g. append-observed / append-from-worker
+All arguments are forwarded verbatim to the python helper. See
+\`okstra error-log append-observed --help\` for the full option list.
+`;
+export async function run(args) {
+  return runInstalledScript({ scriptName: "okstra-error-log.py", args, usage: USAGE });
+}

package/src/git-reconcile.mjs ADDED Viewed

@@ -0,0 +1,31 @@
+import { runPythonModule } from "./_python-helper.mjs";
+const USAGE = `okstra git-reconcile — okstra 밖 git 히스토리 변경 후 기록 화해
+A thin shim over \`python3 -m okstra_ctl.git_reconcile\`. 기본은 검사:
+stale 항목을 JSON 으로 출력하고, confirm 항목이 남으면 exit 2.
+patch-id 로 내용 동일성이 증명되는 항목(auto)은 --apply 로 일괄 보정되고,
+내용이 바뀐 항목(confirm)은 --stage/--use-ref 로만 보정된다.
+Usage:
+  okstra git-reconcile --plan-run-root <dir> --project-id <id> \\
+    --task-group <g> --task-id <t> --work-category <c> \\
+    [--apply] [--stage <N> --use-ref <ref>] [--reset-anchor <ref>] [--json]
+Exit codes:
+  0  stale 없음 또는 보정 완료
+  2  confirm 항목 잔존 (check: 확인 필요 / apply: 일부 미보정)
+  1  error (resolve 실패 등)
+`;
+export async function run(args) {
+  if (args.includes("--help") || args.includes("-h")) {
+    process.stdout.write(USAGE);
+    return 0;
+  }
+  const { code } = await runPythonModule({
+    module: "okstra_ctl.git_reconcile",
+    args,
+  });
+  return code ?? 1;
+}

package/src/handoff.mjs ADDED Viewed

@@ -0,0 +1,30 @@
+import { runPythonModule } from "./_python-helper.mjs";
+const USAGE = `okstra handoff — release-handoff stage-group 보조 (자격/수집/기록)
+A thin shim over \`python3 -m okstra_ctl.handoff\`. JSON 출력.
+Usage:
+  okstra handoff eligible --plan-run-root <dir> --approved-plan <md>
+  okstra handoff assemble --plan-run-root <dir> --approved-plan <md> \\
+    --project-root <dir> --project-id <id> --task-group <g> --task-id <t> \\
+    --work-category <c> --stages 2,3 --base <branch>
+  okstra handoff record-verified --plan-run-root <dir> --stage <N> \\
+    --report-path <md> --data-json <json>
+  okstra handoff record-pr --plan-run-root <dir> --stages 2,3 \\
+    --branch <b> --url <u>
+Exit codes: 0 ok / 1 자격·전제 위반 / 2 stage 간 merge 충돌(conflicts 동봉)
+`;
+export async function run(args) {
+  if (args.includes("--help") || args.includes("-h")) {
+    process.stdout.write(USAGE);
+    return 0;
+  }
+  const { code } = await runPythonModule({
+    module: "okstra_ctl.handoff",
+    args,
+  });
+  return code ?? 1;
+}

package/src/inject-report-index.mjs ADDED Viewed

@@ -0,0 +1,22 @@
+import { runInstalledScript } from "./_python-helper.mjs";
+const USAGE = `okstra inject-report-index — add the top-of-report Index + scroll anchors to a report
+Wraps the python helper (\`okstra-inject-report-index.py\`) installed under
+\`~/.okstra/bin/\` so skills call \`okstra inject-report-index\` instead of
+emitting a \`python3 "$HOME/..."\` invocation (which breaks \`Bash(okstra:*)\`
+permission matching and prompts on every call).
+Usage:
+  okstra inject-report-index <markdown-path> [--report-language <en|ko>]
+All arguments are forwarded verbatim to the python helper.
+`;
+export async function run(args) {
+  return runInstalledScript({
+    scriptName: "okstra-inject-report-index.py",
+    args,
+    usage: USAGE,
+  });
+}

package/src/render-final-report.mjs ADDED Viewed

@@ -0,0 +1,22 @@
+import { runInstalledScript } from "./_python-helper.mjs";
+const USAGE = `okstra render-final-report — render the markdown sibling of a final-report data.json
+Wraps the python helper (\`okstra-render-final-report.py\`) installed under
+\`~/.okstra/bin/\` so skills call \`okstra render-final-report\` instead of
+emitting a \`python3 "$HOME/..."\` invocation (which breaks \`Bash(okstra:*)\`
+permission matching and prompts on every call).
+Usage:
+  okstra render-final-report <path-to-final-report.data.json>
+The argument is forwarded verbatim to the python helper.
+`;
+export async function run(args) {
+  return runInstalledScript({
+    scriptName: "okstra-render-final-report.py",
+    args,
+    usage: USAGE,
+  });
+}

package/src/render-views.mjs CHANGED Viewed

@@ -1,18 +1,14 @@
-import { spawn } from "node:child_process";
-import { existsSync } from "node:fs";
-import { resolve as resolvePath } from "node:path";
-import { fileURLToPath } from "node:url";
-import { resolvePaths } from "./paths.mjs";
+import { runInstalledScript } from "./_python-helper.mjs";
-const USAGE = `okstra render-views — render slim AI + self-contained HTML views of a final-report
+const USAGE = `okstra render-views — render the self-contained HTML view of a final-report
 A thin spawn shim over \`scripts/okstra-render-report-views.py\` (installed
 at \`$HOME/.okstra/bin/okstra-render-report-views.py\`). Reads the final-
-report MD and writes two siblings:
+report MD and writes a single sibling:
-  <stem>.slim.md   — token-saving AI consumption copy
   <stem>.html       — single-file self-contained human view with form
-                       controls on §5 clarification rows
+                       controls on §5 clarification rows (skipped when the
+                       report has no §5 C-* clarification rows)
 Usage:
   okstra render-views <path-to-final-report.md>
@@ -23,45 +19,10 @@ When the optional flags are omitted the script infers from the report
 path and its '- Task Type:' / '- Task Key:' lines.
 `;
-function resolveEntrypoint(paths) {
-  // Prefer the installed copy under ~/.okstra/bin (what production users
-  // see); fall back to the in-repo dev source when running from a
-  // checkout that hasn't been installed.
-  const installed = resolvePath(paths.home, "bin", "okstra-render-report-views.py");
-  if (existsSync(installed)) return installed;
-  const here = fileURLToPath(new URL("..", import.meta.url));
-  const dev = resolvePath(here, "scripts", "okstra-render-report-views.py");
-  if (existsSync(dev)) return dev;
-  return null;
-}
 export async function run(args) {
-  if (args.includes("--help") || args.includes("-h")) {
-    process.stdout.write(USAGE);
-    return 0;
-  }
-  if (args.length === 0) {
-    process.stderr.write("error: missing <path-to-final-report.md>\n");
-    process.stderr.write(USAGE);
-    return 2;
-  }
-  const paths = await resolvePaths();
-  const entry = resolveEntrypoint(paths);
-  if (!entry) {
-    process.stderr.write(
-      "error: okstra-render-report-views.py not found. " +
-      "Run `okstra install` to install the runtime.\n",
-    );
-    return 1;
-  }
-  return await new Promise((res) => {
-    const child = spawn("python3", [entry, ...args], {
-      stdio: ["ignore", "inherit", "inherit"],
-    });
-    child.on("error", (err) => {
-      process.stderr.write(`error: ${err.message}\n`);
-      res(1);
-    });
-    child.on("close", (code) => res(code ?? 0));
+  return runInstalledScript({
+    scriptName: "okstra-render-report-views.py",
+    args,
+    usage: USAGE,
   });
 }

package/src/spawn-followups.mjs ADDED Viewed

@@ -0,0 +1,23 @@
+import { runInstalledScript } from "./_python-helper.mjs";
+const USAGE = `okstra spawn-followups — create follow-up task bundles from a final report
+Wraps the python helper (\`okstra-spawn-followups.py\`) installed under
+\`~/.okstra/bin/\` so skills call \`okstra spawn-followups\` instead of
+emitting a \`python3 "$HOME/..."\` invocation (which breaks \`Bash(okstra:*)\`
+permission matching and prompts on every call).
+Usage:
+  okstra spawn-followups <args...>
+All arguments are forwarded verbatim to the python helper. See
+\`okstra spawn-followups --help\` for the full option list.
+`;
+export async function run(args) {
+  return runInstalledScript({
+    scriptName: "okstra-spawn-followups.py",
+    args,
+    usage: USAGE,
+  });
+}

package/src/token-usage.mjs CHANGED Viewed

@@ -1,7 +1,4 @@
-import { spawn } from "node:child_process";
-import { join } from "node:path";
-import { promises as fs } from "node:fs";
-import { resolvePaths } from "./paths.mjs";
+import { runInstalledScript } from "./_python-helper.mjs";
 const USAGE = `okstra token-usage — collect token usage for a run
@@ -15,37 +12,9 @@ Usage:
   okstra token-usage <state-file> [--write] [--summary] [...]
 Arguments and flags after the state-file path are forwarded verbatim to
-the python helper. See \`python3 ~/.okstra/bin/okstra-token-usage.py --help\`
-for the full option list.
+the python helper. See \`okstra token-usage --help\` for the full option list.
 `;
 export async function run(args) {
-  if (args.length === 0 || args.includes("--help") || args.includes("-h")) {
-    process.stdout.write(USAGE);
-    return args.length === 0 ? 2 : 0;
-  }
-  const paths = await resolvePaths();
-  const script = join(paths.bin, "okstra-token-usage.py");
-  try {
-    await fs.access(script);
-  } catch {
-    process.stderr.write(
-      `error: ${script} not found — run 'okstra install' (or 'okstra ensure-installed') first\n`,
-    );
-    return 1;
-  }
-  return await new Promise((resolve) => {
-    const child = spawn("python3", [script, ...args], {
-      stdio: "inherit",
-      env: { ...process.env, PYTHONPATH: paths.pythonpath },
-    });
-    child.on("error", (err) => {
-      process.stderr.write(`error: failed to spawn python3: ${err.message}\n`);
-      resolve(1);
-    });
-    child.on("close", (code) => resolve(typeof code === "number" ? code : 1));
-  });
+  return runInstalledScript({ scriptName: "okstra-token-usage.py", args, usage: USAGE });
 }