npm - okstra - Versions diffs - 0.34.1 → 0.36.1 - Mend

okstra 0.34.1 → 0.36.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (108) hide show

package/README.kr.md +27 -19
package/README.md +27 -19
package/docs/kr/architecture.md +59 -45
package/docs/kr/cli.md +61 -18
package/docs/pr-template-usage.md +65 -0
package/docs/project-structure-overview.md +353 -354
package/docs/superpowers/plans/2026-05-12-ticket-id-in-reports.md +1 -1
package/docs/superpowers/plans/2026-05-14-convergence-queue-pruning.md +1 -1
package/docs/superpowers/plans/2026-05-17-dual-format-final-report.md +1 -1
package/docs/superpowers/plans/2026-05-20-final-report-language.md +1501 -0
package/docs/superpowers/plans/2026-05-20-implementation-planning-multi-stage.md +1267 -0
package/docs/superpowers/plans/2026-05-20-okstra-run-prompt-sot-b1.md +1007 -0
package/docs/superpowers/plans/2026-05-20-wizard-messages-json-sot.md +720 -0
package/docs/superpowers/plans/2026-05-20-wizard-prompt-json-sot-a1.md +681 -0
package/docs/superpowers/plans/2026-05-21-improvement-discovery-task-type.md +1691 -0
package/docs/superpowers/plans/2026-05-24-implementation-lead-context-slimming.md +1700 -0
package/docs/superpowers/specs/2026-05-20-final-report-language-design.md +383 -0
package/docs/superpowers/specs/2026-05-20-implementation-planning-multi-stage-design.md +320 -0
package/docs/superpowers/specs/2026-05-20-okstra-run-prompt-sot-design.md +299 -0
package/docs/superpowers/specs/2026-05-21-improvement-discovery-task-type-design.md +335 -0
package/docs/task-process/README.md +74 -0
package/docs/task-process/common-flow.md +166 -0
package/docs/task-process/error-analysis.md +101 -0
package/docs/task-process/final-verification.md +167 -0
package/docs/task-process/implementation-planning.md +128 -0
package/docs/task-process/implementation.md +149 -0
package/docs/task-process/release-handoff.md +206 -0
package/docs/task-process/requirements-discovery.md +115 -0
package/package.json +1 -1
package/runtime/BUILD.json +2 -2
package/runtime/agents/SKILL.md +30 -7
package/runtime/agents/workers/claude-worker.md +31 -6
package/runtime/agents/workers/codex-worker.md +37 -10
package/runtime/agents/workers/gemini-worker.md +34 -7
package/runtime/agents/workers/report-writer-worker.md +19 -10
package/runtime/bin/okstra-central.sh +6 -6
package/runtime/bin/okstra-codex-exec.sh +49 -28
package/runtime/bin/okstra-gemini-exec.sh +39 -21
package/runtime/bin/okstra-render-final-report.py +13 -2
package/runtime/bin/okstra-wrapper-status.py +155 -0
package/runtime/bin/okstra.sh +2 -2
package/runtime/prompts/launch.template.md +1 -0
package/runtime/prompts/profiles/_common-contract.md +11 -6
package/runtime/prompts/profiles/_implementation-deliverable.md +53 -0
package/runtime/prompts/profiles/_implementation-executor.md +60 -0
package/runtime/prompts/profiles/_implementation-verifier.md +76 -0
package/runtime/prompts/profiles/error-analysis.md +3 -7
package/runtime/prompts/profiles/implementation-planning.md +22 -21
package/runtime/prompts/profiles/implementation.md +28 -118
package/runtime/prompts/profiles/improvement-discovery.md +42 -0
package/runtime/prompts/profiles/release-handoff.md +1 -1
package/runtime/prompts/profiles/requirements-discovery.md +8 -12
package/runtime/prompts/wizard/prompts.ko.json +230 -0
package/runtime/python/lib/okstra/cli.sh +2 -49
package/runtime/python/lib/okstra/globals.sh +21 -21
package/runtime/python/lib/okstra/interactive.sh +7 -7
package/runtime/python/okstra_ctl/clarification_items.py +3 -9
package/runtime/python/okstra_ctl/consumers.py +53 -0
package/runtime/python/okstra_ctl/final_report_schema.py +0 -7
package/runtime/python/okstra_ctl/i18n.py +73 -0
package/runtime/python/okstra_ctl/improvement_lenses.py +44 -0
package/runtime/python/okstra_ctl/index.py +1 -1
package/runtime/python/okstra_ctl/paths.py +26 -20
package/runtime/python/okstra_ctl/render.py +166 -207
package/runtime/python/okstra_ctl/render_final_report.py +53 -10
package/runtime/python/okstra_ctl/run.py +299 -108
package/runtime/python/okstra_ctl/run_context.py +22 -0
package/runtime/python/okstra_ctl/seeding.py +186 -0
package/runtime/python/okstra_ctl/session.py +65 -7
package/runtime/python/okstra_ctl/wizard.py +348 -127
package/runtime/python/okstra_ctl/workflow.py +21 -2
package/runtime/python/okstra_ctl/worktree.py +54 -1
package/runtime/python/okstra_project/resolver.py +4 -3
package/runtime/python/okstra_token_usage/report.py +2 -2
package/runtime/schemas/final-report-v1.0.schema.json +22 -16
package/runtime/skills/okstra-brief/SKILL.md +102 -218
package/runtime/skills/okstra-convergence/SKILL.md +2 -3
package/runtime/skills/okstra-inspect/SKILL.md +581 -0
package/runtime/skills/okstra-report-writer/SKILL.md +35 -15
package/runtime/skills/okstra-run/SKILL.md +8 -7
package/runtime/skills/okstra-schedule/SKILL.md +14 -157
package/runtime/skills/okstra-setup/SKILL.md +28 -1
package/runtime/skills/okstra-team-contract/SKILL.md +16 -107
package/runtime/templates/okstra.CLAUDE.md +104 -0
package/runtime/templates/reports/brief.template.md +204 -0
package/runtime/templates/reports/final-report.template.md +93 -98
package/runtime/templates/reports/i18n/en.json +135 -0
package/runtime/templates/reports/i18n/ko.json +135 -0
package/runtime/templates/reports/implementation-planning-input.template.md +18 -0
package/runtime/templates/reports/improvement-discovery-input.template.md +78 -0
package/runtime/templates/reports/schedule.template.md +12 -3
package/runtime/templates/reports/task-brief.template.md +2 -2
package/runtime/templates/worker-prompt-preamble.md +108 -0
package/runtime/validators/lib/fixtures.sh +30 -0
package/runtime/validators/lib/runners.sh +1 -1
package/runtime/validators/validate-implementation-plan-stages.py +211 -0
package/runtime/validators/validate-run.py +121 -26
package/runtime/validators/validate-workflow.sh +2 -2
package/runtime/validators/validate_improvement_report.py +275 -0
package/src/config.mjs +18 -0
package/src/install.mjs +41 -14
package/src/setup.mjs +133 -1
package/src/uninstall.mjs +27 -3
package/runtime/skills/okstra-history/SKILL.md +0 -165
package/runtime/skills/okstra-logs/SKILL.md +0 -173
package/runtime/skills/okstra-report-finder/SKILL.md +0 -111
package/runtime/skills/okstra-status/SKILL.md +0 -246
package/runtime/skills/okstra-time-summary/SKILL.md +0 -172

package/runtime/prompts/profiles/implementation.md CHANGED Viewed

@@ -14,131 +14,41 @@
   - Executor model: `{{EXECUTOR_MODEL_DISPLAY}}` (launch value: `{{EXECUTOR_MODEL_EXECUTION_VALUE}}`)
   - Wherever this profile mentions the `Executor`, it refers to the role bound above. The other two providers in the roster (`claude` / `codex` / `gemini` minus the executor) are dispatched as **verifiers only** for this run and remain strictly read-only.
 {{INCLUDE:_common-contract.md}}
-- Team contract (phase-specific overrides — `Claude worker` is replaced by `Executor` + verifier set in this phase):
-  - **Executor role:** the `Executor` (bound above) is the **only worker permitted to use Edit / Write / state-mutating Bash commands** on project files. All other workers run read-only. When the executor provider is `codex` or `gemini`, the actual file mutation happens inside the executor CLI's own auto-edit mode (e.g. `codex exec --sandbox workspace-write`, gemini's equivalent) — not through Claude-side Edit/Write tools — but the safety rules in this profile still apply identically.
-  - **Verifier roles:** the verifier slots are `Claude verifier` and `Codex verifier`, plus `Gemini verifier` **only when `gemini` is in the resolved `--workers` roster**. Every verifier in the resolved roster is dispatched regardless of which provider holds the executor role; the executor's own provider is run *separately* as a verifier (a fresh CLI session with no shared context) so that no verdict is produced from the same session that wrote the diff. Verifiers MUST NOT call Edit, Write, or any Bash command that mutates files outside the run's artifact directories. If a verifier wants a fix, it records the recommendation in its worker result; it does not apply the fix itself.
-  - **Verifier QA duties (independent re-run mandate):** every verifier acts as a QA gate, not just a diff reviewer. Trusting the executor's reported evidence is forbidden — verifiers MUST reproduce it themselves from the same worktree path the executor used.
-    - **Two-tier command lookup (NO auto-detection):** verifier obtains the QA command set from exactly two declared sources, in order — there is **no fallback to guessing tools from manifest files**.
-      1. **Tier 1 — plan validation set (task-specific):** every command listed under the approved plan's `validation` block (pre / mid / post).
-      2. **Tier 2 — project baseline (`project.json.qaCommands`):** the project's standing QA baseline declared in `<PROJECT_ROOT>/.project-docs/okstra/project.json` under the `qaCommands` key. Schema (each category is an array of `{ "label", "cmd", "language"? }` objects):
-         ```json
-         {
-           "qaCommands": {
-             "lint":      [{ "label": "cargo clippy", "cmd": "cargo clippy --all-targets -- -D warnings", "language": "rust" }],
-             "format":    [{ "label": "cargo fmt",    "cmd": "cargo fmt --check",                          "language": "rust" }],
-             "typecheck": [{ "label": "tsc",          "cmd": "pnpm exec tsc --noEmit",                     "language": "ts"   }],
-             "test":      [{ "label": "cargo test",   "cmd": "cargo test --workspace --locked",            "language": "rust" }]
-           }
-         }
-         ```
-         `language` is optional; when present, verifier MAY skip categories whose `language` is not represented in this run's diff (recorded as `qa-command skipped: <label> (language=<x> not in diff)`). Absent `language` means "always run".
-    - **Execution rule:** Tier 1 commands run verbatim first. Then every Tier 2 entry runs once. Each command runs in the worktree cwd, and is recorded in the worker result with its exact command line, exit code, and the tail of stdout/stderr. Substituting or paraphrasing a Tier 1 command is forbidden (see Forbidden actions).
-    - **Missing-tier handling:** if a tier is empty or absent, verifier records the single line `qa-command not configured: <category>` per missing category (`lint` / `format` / `typecheck` / `test`) in the worker result and proceeds — silent omission is a contract violation. Verifier MUST NOT auto-detect or invent a command in this case; the user/operator must declare it in `project.json.qaCommands` or in the plan.
-    - **`cmd` field deny-list (Tier 2 validation):** the runtime AND the verifier MUST reject any `cmd` containing tokens that imply mutation: `--fix`, `--write`, ` -w` (gofmt write), ` -u` (jest snapshot update), `--update-snapshots`, `--snapshot-update`, `--update-goldens`, `INSTA_UPDATE=` (with any value other than `no`), `cargo insta accept`, `npm install` (without `ci`), `cargo update`, `pip install -U`, `pnpm add`, `bun add`. Encountering a denied token aborts the verifier run with `contract-violated` and the operator is asked to re-declare the command in check-only form.
-    - **Discrepancy rule:** if the verifier's re-run result differs from what the executor reported (a passing test fails on re-run, a clean lint surfaces warnings, an exit code mismatches), the verifier MUST issue verdict `FAIL` with the divergence cited. `Claude lead` MUST NOT silently prefer the executor's evidence over a verifier's reproduced result during synthesis; if it overrides, it MUST cite a concrete reproduction-time reason (flaky-test commit-cited, environment delta documented) — handwaving is not allowed.
-    - **Read-only command log (per verifier):** the worker result MUST contain a `Read-only command log` block listing every command executed during the verifier run with its exact invocation and exit code, in execution order. No mutating command may appear in this block. This log is copied into the final report's verifier result section verbatim.
-    - **Verifier evidence is independent of executor evidence:** the final report keeps both — executor's `Validation evidence` AND each verifier's `Read-only command log` — so reviewers can compare them line-by-line.
-  - Session isolation — not model-variant divergence — is the primary self-review safeguard: each verifier is a separate CLI invocation with its own context window, so reusing the same model variant for executor and same-provider verifier is acceptable. Different model variants (e.g. executor=opus / Claude verifier=sonnet) remain recommended when available.
-  - Phase-specific model defaults override the shared defaults: `Claude verifier`=`sonnet`, `Codex verifier`=`gpt-5.5`, `Gemini verifier`=`auto` (only when present in the roster). The `Executor`'s model is taken from the provider-specific worker model corresponding to `--executor`: claude→`--claude-model` (default `sonnet`, override to `opus` recommended when this run's executor is claude), codex→`--codex-model` (default `gpt-5.5`), gemini→`--gemini-model` (default `auto`).
-  - **All-verifier-failure policy**: if every verifier present in the resolved roster (`Claude verifier`, `Codex verifier`, and `Gemini verifier` when opted in) ends with a non-result terminal status (`timeout`, `error`, `not-run`) — i.e. zero independent verdicts were produced — the run MUST end with status `blocked` and route to a follow-up `error-analysis` run. `Claude lead` MUST NOT substitute its own verdict in place of the missing verifier outputs; synthesis requires at least one independent verifier's verdict. If one or more verifiers fail but at least one returns a verdict, the run proceeds with the surviving verdict(s) and the final report MUST explicitly notate which verifiers were unavailable, with the captured error / timeout evidence per failed verifier.
 - Pre-implementation gate (mandatory — refuse to start if any item fails):
   - the run brief MUST cite `--approved-plan <path>` pointing to a `final-report.md` produced by a prior `implementation-planning` run located under `runs/implementation-planning/.../reports/final-report.md`
-  - that file MUST contain a `User Approval Request` block (canonically placed at the **top of the report**, immediately under the metadata header) AND a recorded user approval marker. The canonical, recommended form is the single markdown checkbox line `- [x] Approved`. The runtime regex in `okstra_ctl.run._validate_approved_plan` also accepts (case-insensitive, line-anchored, optional leading `-`/`*`/`+` bullet): `APPROVED` (alone, followed by `:`, or end-of-line), `[x] Approved`, or `User Approval: APPROVED|granted|yes`. Free-form approvals such as "lgtm", "go ahead", or paraphrased confirmations are intentionally NOT accepted; if the user's approval is informal, re-edit the plan file to flip the top checkbox to `- [x] Approved` before invoking the implementation run.
-  - Two equally-valid approval paths exist (both end up satisfying the same regex gate):
-    - **Manual edit** — the user opens the report, flips `- [ ] Approved` to `- [x] Approved`, saves, then runs `okstra ... --task-type implementation --approved-plan <path>`.
-    - **CLI ack** — the user runs `okstra ... --task-type implementation --approved-plan <path> --approve`. The CLI invocation itself is modelled as the user's act of approval; the runtime (`okstra_ctl.run._apply_cli_approval`) flips the checkbox in the report file and appends an audit line `- 승인 일시 (CLI ack): <ISO8601> — recorded by \`okstra --approve\`` before the standard regex validation runs. Use this when running unattended or when you want a single command to both approve and launch the next phase.
-  - The `--approve` flag is **only meaningful with `--task-type implementation` and `--approved-plan <path>`**. Passing it with any other task-type causes `PrepareError` (the runtime refuses to silently ignore approval signals). It is also a no-op if the file already carries a valid approval marker (idempotent — only an audit line is appended, the marker is not re-toggled).
-  - the file's `Recommended option` and its bite-sized step list become the authoritative scope for this run; any deviation must be justified in the final report and routed back to a new `implementation-planning` run instead of being silently expanded.
+  - that file's YAML frontmatter MUST carry `approved: true`. report-writer emits `approved: false` by default; the user flips it to `true` to authorise this run. Free-form approvals such as "lgtm" / "go ahead" / paraphrased confirmations are NOT accepted; re-edit the plan file's frontmatter to `approved: true` before invoking implementation, or pass `--approve` so the CLI flips it on the user's behalf (`okstra_ctl.run._apply_cli_approval`).
+  - The `--approve` flag is meaningful ONLY with `--task-type implementation` and `--approved-plan <path>`; any other use raises `PrepareError`. Idempotent — re-running with `approved: true` already set appends an audit line but does NOT re-toggle.
+  - the file's `Recommended option` and its bite-sized step list become the authoritative scope for this run; deviations must be justified in the final report and routed back to a new `implementation-planning` run rather than silently expanded.
 - Task worktree (provisioned by `okstra-ctl` at the first phase's run-prep time, reused for every subsequent phase of this task-key):
   - Status: `{{EXECUTOR_WORKTREE_STATUS}}` (one of: `created` | `reused` | `skipped-in-worktree` | `skipped-not-git`)
-  - Working tree path: `{{EXECUTOR_WORKTREE_PATH}}` — when status is `created` or `reused`, this is the task's `git worktree` rooted at `~/.okstra/worktrees/<project>/<task-group>/<task-id>/` (segments sanitised — `/` `:` → `-`). When skipped, this is the caller's `project_root`.
-  - Branch: `{{EXECUTOR_WORKTREE_BRANCH}}` — empty when status is `skipped-*`. The branch name encodes `<work-category-prefix>-<task-id-segment>` and is globally unique across task-keys via `~/.okstra/worktrees/registry.json`.
-  - Base ref: `{{EXECUTOR_WORKTREE_BASE_REF}}` — commit SHA the worktree was branched from at the first phase; canonical `<base>` for every `git diff` / `git log` in this run.
+  - Working tree path: `{{EXECUTOR_WORKTREE_PATH}}` — when status is `created` or `reused`, this is the task's `git worktree` rooted at `~/.okstra/worktrees/<project>/<task-group>/<task-id>/`. When skipped, this is the caller's `project_root`.
+  - Branch: `{{EXECUTOR_WORKTREE_BRANCH}}` — empty when status is `skipped-*`. Branch name = `<work-category-prefix>-<task-id-segment>`, globally unique via `~/.okstra/worktrees/registry.json`.
+  - Base ref: `{{EXECUTOR_WORKTREE_BASE_REF}}` — canonical `<base>` for every `git diff` / `git log` in this run.
   - Provisioning note: `{{EXECUTOR_WORKTREE_NOTE}}`
-  - **Executor behaviour**: when status is `created` or `reused`, the Executor MUST run every Edit / Write / build / test / commit command with the working tree path above as cwd. Treat it as `project_root` for the duration of this run. Do NOT mutate the caller's original checkout. Do NOT `cd` out of the worktree to reach files; if a file outside the worktree is needed, the dependency is a planning gap — record it in `Out-of-plan edits` and continue.
-    - **How to set cwd per Bash call**: the Claude Bash tool inherits its cwd from the lead session, which is NOT the worktree. To put cwd-sensitive toolchains (`cargo`, `npm`, `pnpm`, `bun`, `pytest`, `make`, `go`) into the worktree, prefix the command with `cd {{EXECUTOR_WORKTREE_PATH}} && ` inside the same Bash invocation — e.g. `cd {{EXECUTOR_WORKTREE_PATH}} && cargo test -p foo`. **Never wrap in `bash -lc "..."` or `bash -c "..."`** — the wrapper hides the leading `cd` token from Claude Code's permission auto-allow layer (causing prompts on every call) without any safety benefit. For tools that accept an explicit working-directory flag (`git -C <path>`, `cargo --manifest-path`, `pytest --rootdir`), prefer that form over the `cd && ` chain. Edit / Write / Read tool calls already use absolute paths and need no cwd handling. The codex / gemini executor CLI wrappers (`okstra-codex-exec.sh -C`, `okstra-gemini-exec.sh --include-directories`) already inject worktree cwd at the CLI layer, so this rule applies primarily to the Claude executor.
-  - **Verifier behaviour**: all verifier roles in the resolved roster read from the SAME working tree path so they observe the exact diff the Executor produced. Verifiers remain strictly read-only there.
-  - **Lifecycle**: the worktree is kept after the run completes (no automatic cleanup) and is reused by every subsequent phase of the same task-key. Cleanup, when the task is fully done, is manual: `git -C <main-worktree> worktree remove <path>` followed by `git -C <main-worktree> branch -D <branch>`, plus removing the task-key entry from `~/.okstra/worktrees/registry.json`.
-  - **Skipped paths**: when status is `skipped-in-worktree` or `skipped-not-git`, the executor operates in `project_root` as before. Cite the status in the final report's metadata header so reviewers know which path was taken.
-  - **Synced state directories (symlinks into the MAIN worktree)**: at provision time `okstra-ctl` symlinks `.project-docs/`, `.scratch/`, and `graphify-out/` from the repo's **main worktree** into the task worktree (override via `OKSTRA_WORKTREE_SYNC_DIRS`; empty string disables). These are NOT independent copies — writes through them land in the main worktree. Inside this run the executor MUST confine writes under these paths to its own task scope (i.e. only `.project-docs/okstra/tasks/<this-task-id>/...`). Reading from elsewhere under the symlinks (other tasks, `graphify-out/GRAPH_REPORT.md`, `.scratch/` issues) is allowed and expected for context.
-- Pre-implementation context exploration (executor before first edit):
-  - **Mandatory skill invocation — `tdd`**: BEFORE the first `Edit` or `Write` call, the executor MUST invoke the `tdd` skill via the `Skill` tool and follow its red-green-refactor loop for every code change in this run. This is a hard requirement, not a recommendation; skipping it is a `contract-violated` outcome. The skill governs HOW each step is executed (failing test first → minimal implementation → refactor); it does not override the approved plan's WHAT/file scope.
-    - Order of operations per plan step: (1) write/extend the test that captures the step's acceptance criterion and confirm it fails for the right reason, (2) commit the failing test (`test(<scope>): ...`), (3) implement the minimum change to make it pass, (4) commit the implementation (`feat|fix(<scope>): ...`), (5) refactor without changing behaviour and commit separately if any cleanup is made (`refactor(<scope>): ...`). The failing-then-passing transition between steps (2) and (4) is the `TDD evidence` required by the final report.
-    - Doc-only / config-only / pure-rename steps that have no observable runtime behaviour are exempt from the failing-test requirement, but the executor MUST cite the exemption per step in the final report (`TDD exemption: <reason>`).
-    - When the touched area has no existing test harness, the executor MUST stand up the minimum harness needed to host one regression test for this run rather than skipping TDD entirely. Record the harness-bootstrap step as an `Out-of-plan edit` if it is not in the plan.
-  - re-read the approved plan end-to-end and extract: file list, step order, validation commands, rollback path
-  - inspect the current state of every file the plan names; if any file has changed materially since the plan was written, stop and route to a new `implementation-planning` run instead of editing speculatively
-  - "materially changed" means: the function, class, section, or behaviour the plan targets has been edited, renamed, moved, removed, or otherwise altered in a way that invalidates the plan's reasoning. Cosmetic edits (whitespace, comment-only changes, unrelated function modifications elsewhere in the same file) do NOT trigger a re-plan; cite the diff (`git log --oneline <plan-created-at>..HEAD -- <file>`) in the final report and proceed.
-  - distinguish the two file-scope rules (they are not in conflict):
-    - **drift rule** (this section): if a file *named in the plan* has materially drifted, refuse to edit and route back to planning. This protects trust in the approved scope.
-    - **out-of-plan rule** (Allowed actions section below): if a step *requires touching a file NOT in the plan list*, that is permitted with `Out-of-plan edits` justification. This handles honest scope discovery during execution.
-  - confirm the test/build commands referenced in the plan still exist and run from a clean state
-- Allowed actions during the run:
-  - **Edit / Write on any project file** (no path whitelist — scope is bounded by the approved plan's file list, not by directory). Editing files outside the plan's list is permitted only when strictly needed to satisfy a step, and MUST be recorded in the final report's `Out-of-plan edits` block with rationale.
-  - read-only inspection commands: `git status`, `git diff`, `git log`, `grep`, `rg`, `find`, `cat`, `ls`, file Read tools
-  - build, lint, type-check, and test commands (`npm test`, `pytest`, `go build`, `cargo test`, `bash -n`, etc.)
-  - **local git operations only**: `git add`, `git commit`. Prefer small commits keyed to plan steps.
-  - **Commit message format (mandatory)**: every commit message MUST follow Conventional Commits — `<type>(<scope>): <subject>` for the first line, optional body separated by a blank line, optional footer. Constraints:
-    - `<type>` MUST be one of: `feat` / `fix` / `perf` / `revert` / `deps` / `docs` / `refactor` / `build` / `ci` / `chore` / `test`. When the repo is `release-please`-managed, this aligns the commit with a configured changelog section.
-    - `<scope>` SHOULD be the plan step identifier or the primary module touched (e.g. `feat(report-writer): ...`). Omit the parentheses only when no meaningful scope applies.
-    - `<subject>` MUST be ≤72 characters, imperative mood (`add`, `fix`, `remove` — not `added` / `adding`), no trailing period, no emoji, no AI attribution lines (no `Co-Authored-By: Claude ...`, no `Generated with Claude Code`).
-    - Body (when present) explains *why*, not *what*; wrap at ~100 chars.
-    - Do NOT append okstra artefact paths to the commit message — no `Plan: .project-docs/okstra/...`, no `Report: ...`, no `Run: ...`, no `Task: ...` footers, and no other reference to files under `.project-docs/okstra/`. Those paths belong in the final report's `Plan link & approval evidence` section, not in git history; they rot quickly and leak internal layout into the upstream changelog.
-    - Allowed footers are limited to standard Conventional Commits trailers (`BREAKING CHANGE: ...`, `Refs: <issue/ticket-id>`, `Closes #<n>`). When citing a ticket, use the ticket id only (e.g. `Refs: DEV-9423`) — never a filesystem path.
-    - One commit MUST correspond to one plan step (or one cohesive sub-step). Do NOT bundle unrelated steps into a single commit, and do NOT split a single step across commits unless the plan explicitly sequenced it that way.
-    - The exact message used for each commit MUST be reproduced verbatim in the final report's `Commit list` so reviewers can audit it without re-running `git log`.
+  - Treat the working-tree path as `project_root` for the duration of this run. Do NOT mutate the caller's original checkout. cwd-sensitive Bash commands MUST be prefixed `cd {{EXECUTOR_WORKTREE_PATH}} && ` in the same Bash invocation (never `bash -lc "..."` wrappers — see executor sidecar for full rules).
+  - Lifecycle: kept after the run completes; reused by every subsequent phase of the same task-key. Manual cleanup: `git worktree remove <path>` → `git branch -D <branch>` → drop registry entry.
 - Approval gate (phase-specific addendum to shared authority rule):
-  - the pre-implementation gate's recorded user approval marker is the only authorised approval gate at this phase — proceed once it is satisfied without further external coordination
-- Forbidden actions (any occurrence → terminal status `contract-violated`):
+  - the pre-implementation gate's recorded user approval marker is the only authorised approval gate at this phase — proceed once it is satisfied without further external coordination.
+- Forbidden actions — universal (any occurrence → terminal status `contract-violated`):
   - **`git push` of any kind**, including `--dry-run` against a real remote that produces side-effects
   - publishing or release commands: `npm publish`, `cargo publish`, `pip publish`, `gh release`, `docker push`
-  - real database migrations, schema changes that touch shared environments, or any command that writes to a non-local datastore
-  - production credentials, deploy commands, infra mutation (`terraform apply`, `kubectl apply` against non-local cluster, etc.)
-  - external API calls that *write* (POST/PUT/PATCH/DELETE) to third-party services other than localhost test fixtures
-  - source edits or Bash mutations performed by any verifier role
-  - any Edit/Write before the pre-implementation gate has passed
+  - real database migrations or schema changes that touch shared environments
+  - production credentials, deploy commands, infra mutation against non-local clusters
+  - external API WRITE calls (POST/PUT/PATCH/DELETE) to third-party services other than localhost test fixtures
   - dispatching parallel sub-agents beyond the required worker roster
-  - silent scope expansion — adding files, dependencies, or features that the approved plan did not list, without recording an `Out-of-plan edits` justification
-  - leaving placeholders such as TBD / TODO / "implement later" / "handle edge cases" in committed code
-  - **(verifier-specific)** running lint / formatter auto-fix modes during a verifier's re-run — `eslint --fix`, `prettier --write`, `ruff check --fix`, `rustfmt` (writes by default; verifiers MUST use `cargo fmt --check` or `rustfmt --check`), `gofmt -w`, `black .` (use `black --check`), `isort .` (use `isort --check-only`), or any equivalent rewrite mode
-  - **(verifier-specific)** updating snapshots / golden fixtures during verification — `jest -u` / `--updateSnapshot`, `pytest --snapshot-update`, `INSTA_UPDATE=*` (any value other than `no`), `cargo insta accept`, `--update-goldens`, or any equivalent "make the test agree with current output" flag
-  - **(verifier-specific)** masking test failure with selection or shell tricks during re-run — `-k <expr>` / `--ignore` / `--deselect` to skip subsets, trailing `|| true`, `set +e` followed by a manually softened comparison, redirecting non-zero exit to success. The plan's listed test command MUST run in full
-  - **(verifier-specific)** substituting the plan's validation commands — verifier MUST run the plan's pre/mid/post validation commands verbatim; replacing them with paraphrased or "equivalent" commands is forbidden. Adding supplementary check-only lint/type-check is allowed and is logged separately in the verifier's Read-only command log
-  - **(verifier-specific)** mutating lockfiles or dependency manifests — `npm install <pkg>`, `npm install` (without lockfile freeze; use `npm ci`), `pnpm add`, `bun add`, `cargo add`, `cargo update`, `pip install -U`, or any dependency install that is not lockfile-frozen (`--locked` / `--frozen-lockfile` / `npm ci` / `pip install --require-hashes`)
-  - **(verifier-specific)** git state mutations — `git add`, `git commit`, `git stash`, `git checkout -- <file>`, `git restore`, `git reset`, `git rebase`, `git merge`, branch creation/deletion, tag creation. Only read-only git queries (`git status`, `git diff`, `git log`, `git show`, `git rev-parse`, `git blame`) are permitted for verifiers
-  - **(verifier-specific)** running integration / end-to-end tests that produce non-local side effects (DB writes against a non-local datastore, external API writes, docker compose against a non-isolated environment) unless that exact command is listed in the approved plan's validation set
-  - **(verifier-specific)** redirecting tool caches or output to paths outside the worktree — e.g. setting `CARGO_TARGET_DIR`, `PYTEST_CACHE_DIR`, `NODE_OPTIONS=--require=<external>`, or any env var that causes the verifier's command to write outside the worktree's normal build artifact paths
-- Required deliverable shape (final report, in addition to the standard sections):
-  - **Plan link & approval evidence**: path to the approved `final-report.md` and the exact quoted approval marker
-  - **Commit list**: each commit's SHA (or short SHA), message, and the plan step it satisfies
-  - **Diff summary**: `git diff --stat <base>..HEAD` output, plus a per-file one-line summary of changes
-  - **Out-of-plan edits block**: every file edited that was not in the approved plan's file list, with rationale (empty block is acceptable and preferred)
-  - **Validation evidence**: actual command output (stdout/stderr) for every `pre / mid / post` validation command from the plan. Truncated output is acceptable but the command line and exit code MUST be exact. No paraphrasing of test results.
-  - **TDD evidence (when applicable)**: for steps that should be TDD-ordered, show the failing-test output BEFORE the implementation commit and the passing-test output AFTER, with commit SHAs framing the transition.
-  - **Verifier results**: a section per verifier present in the resolved roster (`Claude verifier`, `Codex verifier`, and `Gemini verifier` when opted in) containing:
-    - their independent verdict (PASS / CONCERNS / FAIL),
-    - cited diff snippets supporting the verdict,
-    - the verifier's `Read-only command log` (every command they ran with exact invocation and exit code, in execution order — copied verbatim from the worker result),
-    - **independent validation re-run results** — per plan-validation command: command line, exit code, and tail of output captured by the verifier (not the executor); any divergence from the executor's reported result MUST be called out as a `Discrepancy` line citing both sides,
-    - **style / lint / type-check results** — each check-only tool the verifier ran, its exit code, and the count of new findings attributable to lines this run introduced. When no tool is configured for a touched language, record the single line `no lint/style tool configured for <language>`,
-    - any fix recommendations the verifier declined to apply.
-    `Claude lead` synthesises a unified verdict but MUST preserve dissent — do not collapse opinions into one paragraph. If any verifier issued `FAIL` on a `Discrepancy` line, the synthesised verdict MUST be `FAIL` unless lead cites a concrete reproduction-time reason (committed flaky-test record, documented environment delta) for overriding.
-  - **Rollback verification**: confirmation that the plan's rollback path is still valid after the changes. Strength of verification depends on the change category:
-    - **Pure code changes** (no persisted state, no infra mutation): a reachable revert SHA is sufficient. Record the exact `git revert <SHA>` command that would undo the change, and confirm `git rev-parse <SHA>` resolves.
-    - **Feature-flag-gated changes**: confirm the off-switch path was exercised in this run's validation evidence (i.e. one of the validation commands ran with the flag off and succeeded). A plan that ships a flag without exercising the off-path does NOT satisfy this requirement.
-    - **Schema migrations, config-format changes, or any change with persisted state**: a **dry-run of the rollback step is mandatory**, not preferred. Record the exact rollback command and its captured exit code / stdout. If the migration tool offers no dry-run mode (`--dry-run`, `--plan`, equivalent), the executor MUST refuse to claim rollback verification and instead end the run with a routing recommendation back to `implementation-planning` for a safer rollback strategy. Skipping this step on a stateful change is treated as a `contract-violated` outcome by `final-verification`.
-  - **Routing recommendation for `final-verification`**: brief note on whether the changes are ready for final-verification phase or need a new error-analysis / planning loop first.
-  - **Follow-up tasks (Section 7 of the final report)**: every item discovered during this run that was *not* delivered MUST appear in the final report's `## 7. Follow-up Tasks (후속 작업)` table with a concrete `Origin`, `New Task ID`, `Suggested task-type`, `Scope`, and `Reason / Why deferred`. Sources include: out-of-scope discoveries that the executor consciously chose not to fold into this run, verifier concerns the executor declined to fix in-place, scope-boundary items from the approved plan that turned out to need their own ticket, and any unresolved `## 5. Clarification Items` row carried over from the approved plan (`Status` ∈ `{open, answered}` at approval time). An empty section is acceptable but only when expressed as the single line `- 후속 작업 없음.` — silence is treated as a contract violation. Rows with `Auto-spawn? = yes` will be materialised by `scripts/okstra-spawn-followups.py` in Phase 7; rows with `Auto-spawn? = no` MUST also appear in `Section 6. Recommended Next Steps` so the user knows to act manually.
-- Self-review pass before finalising the report (`Claude lead` runs this; do not delegate to a generic subagent):
-  1. **Plan coverage** — every step in the approved plan's recommended option must point to a commit (or an explicit `Skipped: <reason>` entry). List gaps.
-  2. **Evidence completeness** — every `Validation evidence` and `TDD evidence` claim has the actual command line and exit code? No paraphrased "tests pass" without output?
-  3. **Out-of-plan honesty** — files in the diff that are NOT in the plan list must appear in the `Out-of-plan edits` block. Cross-check with `git diff --name-only`.
-  4. **Verifier dissent preserved** — if the verifiers in the resolved roster disagree, the disagreement is visible in the report? Synthesis hides nothing?
-  5. **Forbidden action audit** — `git push`, publish, deploy, migration, third-party write commands: scan the run's session transcripts for any occurrence and confirm none happened.
-  6. **Placeholder scan** — restrict the scan to lines this run actually introduced; pre-existing placeholders in unchanged regions of touched files are out of scope. Required command (substitute `<base>` with the parent of the first commit in this run's commit list):
-     ```
-     git diff <base>..HEAD | grep -E '^\+[^+].*\b(TBD|TODO|FIXME|XXX|implement later|handle edge cases|similar to|placeholder)\b' || echo 'clean'
-     ```
-     Only newly-added lines (those starting with `+` and not part of the `+++` header) are inspected. If output is anything other than `clean`, the run MUST either remove the placeholders before finalising or record an explicit justification per occurrence in the final report.
+  - any Edit/Write before the pre-implementation gate has passed
+  - source edits or Bash mutations performed by any verifier role (verifier-specific deny-list lives in the verifier sidecar)
 - In-phase debugging:
-  - follows the spirit of `systematic-debugging` (root cause before fix), but the executor MUST NOT route to a separate `error-analysis` phase mid-run; if a defect blocks plan progress, the executor records findings and routes to a new run after this one ends.
+  - isolate root cause before changing the fix direction, but the executor MUST NOT route to a separate `error-analysis` phase mid-run; if a defect blocks plan progress, the executor records findings and routes to a new run after this one ends.
+## Lazy section pointers (BLOCKING for lead — load at the listed phase, not at Phase 1)
+The bulk of this profile's body is split into three sidecars so the lead's Phase 1 baseline stays under ~50 effective lines. Read each sidecar ONCE, at the phase noted, into the lead's active context — do NOT pre-load them at Phase 1.
+| Sidecar | Read at | Purpose |
+|---------|---------|---------|
+| `prompts/profiles/_implementation-executor.md` | Start of Phase 5 (after Stage Map parse, before Executor's first Edit / Write) | Executor role binding, Pre-implementation context exploration, TDD loop, Stage execution contract, allowed actions, commit-message format |
+| `prompts/profiles/_implementation-verifier.md` | Phase 5, between Executor stage completion and the first verifier dispatch | Verifier roles, Two-tier command lookup, deny-list, discrepancy rule, Read-only command log, verifier-specific forbidden actions |
+| `prompts/profiles/_implementation-deliverable.md` | Start of Phase 6 (after Phase 5.5 convergence completes, before report-writer dispatch prompt construction) | Required deliverable shape, Validation / TDD evidence rules, Verifier results structure, Self-review pass, Lead post-stage persistence |
+**Phase 5 / 6 진입 시 해당 sidecar 가 lead context 에 없으면 BLOCKING — phase 진입 거부.** Lead 는 sidecar 를 read 한 후 1 회 turn 안에 phase 의 후속 action 으로 이어가야 한다 (즉 sidecar 의 룰은 read 한 그 turn 부터 효력 발생).

package/runtime/prompts/profiles/improvement-discovery.md ADDED Viewed

@@ -0,0 +1,42 @@
+# Improvement Discovery Profile
+- Purpose: scan a codebase scope through a fixed lens whitelist and surface ranked improvement candidates with multi-worker consensus classification
+- Required workers:
+  - claude
+  - codex
+  - gemini
+  - report-writer
+- Optional workers (opt-in via `--workers`):
+  - none — every required worker stays required because lens diversity is the load-bearing value of this phase
+{{INCLUDE:_common-contract.md}}
+- Brief consumption (phase-specific addendum — shared rules live in `_common-contract.md` under "Brief handoff contract"):
+  - this phase REQUIRES a codebase-scan brief whose frontmatter contains `scope: codebase`. A brief without that marker is rejected before worker dispatch.
+  - the brief's `priority-lenses` MUST be a non-empty subset (size 1..4) of the lens whitelist defined in `scripts/okstra_ctl/improvement_lenses.py`. Lenses outside the whitelist are rejected.
+  - the brief's `scan-scope` defines the only paths workers may read for candidate evidence. `out-of-scope` paths MUST be ignored even when the codebase is otherwise reachable.
+  - the brief's `candidate-cap` (default 8 if absent, absolute cap 12) bounds the number of rows in `## 4.9 Improvement Candidates`.
+  - Apply the shared reporter-confirmation precondition as written. For this phase any unresolved `intent-check:` / `conversion-block:` row uses `Blocks=next-phase`.
+- Primary focus areas:
+  - candidate discovery within the lens whitelist
+  - per-candidate evidence (path:line) and scope mapping
+  - per-candidate severity / effort / recommended-next-phase
+  - convergence classification (full / partial / contested / worker-unique) across workers
+- Phase 1.5 — Lead reflect-back grilling (runs after Phase 1 context loading and before Phase 4 worker dispatch):
+  - Lead inspects scan-scope paths via `ls` / `Grep` / `Read` to map modules, entry points, dependencies, approximate LOC, and recent commit patterns.
+  - Lead emits a single reflect-back message covering: (a) understood scope per path (one-line summary), (b) understood meaning of each priority lens in this scope, (c) understood out-of-scope rationale, (d) ordered list of N open questions.
+  - For each open question Lead asks ONE `AskUserQuestion` with a `(Recommended)` answer drawn from a codebase-first inspection. Budget: at most 12 questions in this phase.
+  - Stop conditions (OR): all questions resolved / budget exhausted / user signals proceed.
+  - Lead persists the round at `<RUN_DIR>/state/phase-1.5-grilling.md` with one section per question (question / recommended / user answer) and a closing `Resolved scope` / `Resolved lenses` block. Worker prompts use this resolved block as the authoritative scope and lens definition.
+- Decision-tree walk (bounded):
+  - When candidates branch on a structural question (e.g. "is module X meant to own this responsibility?"), resolve via `Read` / `Grep` first. Only escalate to the user inside the Phase 1.5 budget.
+- Expected output emphasis:
+  - the `## 4.9 Improvement Candidates` table populated with rows that obey the 10-column schema from `validators/validate-improvement-report.py` (Cand ID `I-NNN`, Lens from whitelist, Title, Scope ⊆ scan-scope, Severity, Effort, Consensus, Source workers `<worker>:<id>` from {claude, codex, gemini}, Recommended next-phase ∈ {requirements-discovery, implementation-planning, error-analysis}, Evidence as path:line list)
+  - `## 2. Final Verdict` Verdict Token ∈ {`candidates-ready`, `no-candidates`, `blocked`}; Direction `routing`; Next Step "사용자에게 후보 K개 선택 의뢰 (## 4.9 표 참조)"
+  - `## 6. Recommended Next Steps` first entry summarises per-candidate routing and proposes new task-key names of the form `<task-group>/imp-<Cand-ID>`
+- Clarification request policy (phase-specific addenda — shared policy is in `_common-contract.md`):
+  - if scan-scope or priority-lenses cannot be made concrete during Phase 1.5, end the run with Verdict Token `blocked`, populate `## 5. Clarification Items` with `Blocks=next-phase` rows, and do not run worker dispatch
+  - every clarification row carries a recommended answer + one-line rationale inside the `Expected form` cell
+- Non-goals:
+  - concrete implementation plans, cost estimates, or code edits for any candidate
+  - inventing lenses outside the whitelist
+  - acting as a final-verification quality gate — this phase is discovery, not acceptance
+  - silently merging out-of-scope findings into in-scope candidates

package/runtime/prompts/profiles/release-handoff.md CHANGED Viewed

@@ -43,7 +43,7 @@
      - `cancel` — end the run without executing push or PR commands; record the cancellation in the final report.
 - Inline drafting rules (Claude lead):
   - read the run brief, the cited final-verification report, `git log --oneline <base>..HEAD`, and `git diff <base>..HEAD --stat` to ground the drafted text in actual committed changes.
-  - **PR body template** — the run context exposes `PR_TEMPLATE_PATH` (resolved by the prepare step in priority order: per-run override → `<project_root>/.project-docs/okstra/project.json` `prTemplatePath` → `~/.okstra/config.json` `prTemplatePath` → bundled default at `~/.claude/skills/okstra-run/templates/pr-body.template.md`) along with `PR_TEMPLATE_SOURCE` indicating which scope was used. The lead MUST `Read` this file verbatim, strip HTML comments, then fill in the placeholders. Do NOT hard-code a section list — the template is the source of truth for the structure. If the resolved file is missing at draft time, abort the run with a clear error rather than inventing a structure.
+  - **PR body template** — the run context exposes `PR_TEMPLATE_PATH` and `PR_TEMPLATE_SOURCE`. The path MUST be an okstra-owned project artifact under `<PROJECT_ROOT>/.project-docs/okstra/**` or a file already materialised into this run's artifact directory by the prepare step. The lead MUST `Read` this file verbatim, strip HTML comments, then fill in the placeholders. Do NOT hard-code a section list — the template is the source of truth for the structure. If the resolved file is missing or outside the okstra resource boundary at draft time, abort the run with a clear error rather than inventing a structure.
   - produce **two artifacts** before showing them to the user:
     1. **PR title** — by default the subject of the most recent implementation commit, or a concise Conventional Commits-style summary of the committed range.
     2. **PR body** — markdown filled from `PR_TEMPLATE_PATH`. The user-confirmation step's diff (Q3 `edit then proceed`) is computed against the filled template, not against the raw template file.

package/runtime/prompts/profiles/requirements-discovery.md CHANGED Viewed

@@ -9,11 +9,7 @@
   - gemini — when added to the roster it joins the analyser set; omitted by default
 {{INCLUDE:_common-contract.md}}
 - Brief consumption (phase-specific addendum — shared rules live in `_common-contract.md` under "Brief handoff contract"):
-  - **Precondition check (BLOCKING — runs before any analysis)**: read the brief's frontmatter `reporter-confirmations:` field and inspect every `Open Questions` row prefixed `intent-check:` / `conversion-block:` for the `[CONFIRMED …]` marker.
-    - `reporter-confirmations: complete` → proceed normally (no unresolved reporter-only rows).
-    - `reporter-confirmations: partial` → proceed; treat the still-unmarked `intent-check:` / `conversion-block:` rows per the `skipped` branch below.
-    - `reporter-confirmations: skipped` (or `partial` with remainder) → do NOT silently infer the missing answers. Promote each unmarked `intent-check:` / `conversion-block:` row into this run's `## 5. Clarification Items` as `Kind=decision, Blocks=next-phase`, with the recommended answer drawn from the brief's matching `intent-inference` / `conversion-block:` text and clearly labelled `보고자 직접 확인 권장`. Then proceed with the rest of the classification work.
-    - `reporter-confirmations: pending` (or field missing) → ABORT analysis. Write only `## 0. Reporter Confirmation Required` summarising which rows are pending and stop. The operator must rerun `okstra-brief` Step 6.5 to collect answers, then restart this phase. The final report carries `Blocks=next-phase`.
+  - Apply the shared reporter-confirmation precondition exactly as written. In this phase, unresolved `intent-check:` / `conversion-block:` rows use `Blocks=next-phase`.
   - before classifying (after the precondition passes), scan the brief for every `Open Questions` row prefixed `intent-check:` / `terminology:` / `conversion-block:` and every `Augmentation` entry labelled `intent-inference` / `terminology-mapping`. Each one is a translation signal that this phase must resolve OR carry forward.
   - `intent-inference` augmentations whose paired `intent-check:` row carries `[CONFIRMED …]` are treated as **confirmed**; trust the confirmation text in `## Reporter Confirmations` over the original inference if they differ. Unconfirmed `intent-inference` rows under `reporter-confirmations: skipped` follow the precondition's `skipped` branch above.
   - `conversion-block:` rows are explicit "translation failed" signals — never attempt to resolve them by inference here; the precondition above already handled them.
@@ -23,11 +19,11 @@
   - identify missing materials that block reliable routing
   - define task continuity expectations for long-running work under the same task key
   - capture approval or confirmation points before the next phase starts
-  - **domain alignment check**: read in priority order — (authoritative) `<PROJECT_ROOT>/.project-docs/okstra/glossary.md` and `<PROJECT_ROOT>/.project-docs/okstra/decisions/` titles if present; (supplementary) `<PROJECT_ROOT>/CONTEXT.md` (or `CONTEXT-MAP.md` → per-context `CONTEXT.md`) and `<PROJECT_ROOT>/docs/adr/` titles if present. Absent external files are normal — do not error. Validate that every `terminology:*` entry under the brief's `Open Questions` has a canonical resolution before routing. Fuzzy or overloaded terms in the brief MUST be resolved to a single canonical term in this phase.
-- Decision-tree walk (grill-me adopted, bounded):
+  - **domain alignment check**: read `<PROJECT_ROOT>/.project-docs/okstra/glossary.md` and `<PROJECT_ROOT>/.project-docs/okstra/decisions/` titles if present. Absent okstra memory files are normal — do not error. Validate that every `terminology:*` entry under the brief's `Open Questions` has a canonical resolution before routing. Fuzzy or overloaded terms in the brief MUST be resolved to a single canonical term in this phase.
+- Decision-tree walk (bounded):
   - When the brief's `Desired Outcome`, classification, or routing target depends on a chain of decisions, walk that chain one branch at a time. Each branch is one `Clarification Items` row, not a free-form interview.
-  - For every clarification row, write the row's `Recommended` cell with the single best answer plus a one-line rationale. Other options are listed in `Alternatives` with one-sentence consequences.
-  - **Codebase-first rule**: if a branch can be resolved by `Read` / `Grep` / file inspection, resolve it that way and record the evidence in the same row's `Evidence` cell. Do NOT escalate to the user.
+  - For every clarification row, put the single best answer and one-line rationale in `Expected form` as `Recommended: ...`. Put other options and one-sentence consequences in the same cell as `Alternatives: ...`.
+  - **Codebase-first rule**: if a branch can be resolved by `Read` / `Grep` / file inspection, resolve it that way and record `Evidence checked: <path:line>` in the `Statement` cell. Do NOT escalate to the user.
   - Budget: the unified `## 5. Clarification Items` table caps at the smaller of (a) one row per unresolved decision branch, (b) 8 rows total. Beyond the cap, fold remaining ambiguity into the routing recommendation's risk notes.
 - Expected output emphasis:
   - evidence-backed routing decision
@@ -37,10 +33,10 @@
 - Clarification request policy (phase-specific addenda — shared policy is in `_common-contract.md`):
   - if any blocking input is missing at the time of writing the final report, populate `## 5. Clarification Items` in `final-report-template.md` (a single unified table; `Blocks=next-phase` for items the next run cannot start without)
   - prefer concrete questions whose answers map directly to a routing decision (`bugfix` vs `feature`, `error-analysis` vs `implementation-planning`, etc.). State each option in plain language with one sentence describing what choosing it would mean for the next phase.
-  - every clarification row carries a `Recommended` answer + one-line rationale; rows that lack a recommendation are rejected as half-formed.
+  - every clarification row carries a recommended answer + one-line rationale inside the `Expected form` cell; rows that lack a recommendation are rejected as half-formed.
   - **Codebase-first ambiguity resolution (defect rule)**: any ambiguity that can be answered by `Read` / `Grep` / file inspection MUST be resolved that way and recorded with file:line evidence. Writing a clarification row for something the codebase already answers is a defect of this phase.
-  - **`evidence-checked:` cell required**: every clarification row carries an `evidence-checked: <path:line> | none` cell. `evidence-checked: <path:line>` means the codebase was inspected and the row records what was found (or that the code did not contain the answer). `evidence-checked: none` is allowed ONLY when the row's nature is "only a human can answer this" (reporter intent, business priority, external authority); the row body must state which one in one line. A row with `evidence-checked: none` that *could* have been answered by the codebase is a defect.
+  - **Evidence note required inside `Statement`**: every clarification row includes `Evidence checked: <path:line>` or `Evidence checked: none — <human-only reason>` in the `Statement` cell. `none` is allowed ONLY when the row's nature is "only a human can answer this" (reporter intent, business priority, external authority). A row with `none` that *could* have been answered by the codebase is a defect.
 - Non-goals:
   - full implementation design unless it is required to decide the next phase
   - **source code edits, plan authoring, builds, or deployments** — this run only classifies the work and routes it; deeper analysis and planning belong to subsequent phases
-  - **edits to any path outside `<PROJECT_ROOT>/.project-docs/okstra/`** — okstra never writes to external paths. Glossary additions land in `<PROJECT_ROOT>/.project-docs/okstra/glossary.md` (via `okstra-brief` Step 4.5); decision drafts land in `<PROJECT_ROOT>/.project-docs/okstra/decisions/` (via `implementation-planning`). External `<PROJECT_ROOT>/CONTEXT.md` / `CONTEXT-MAP.md` / `docs/adr/` are read-only references.
+  - **writes outside `<PROJECT_ROOT>/.project-docs/okstra/`** — this phase only uses okstra's artifact root. Glossary additions land in `<PROJECT_ROOT>/.project-docs/okstra/glossary.md` (via `okstra-brief` Step 4.5); decision drafts land in `<PROJECT_ROOT>/.project-docs/okstra/decisions/` (via `implementation-planning`).

package/runtime/prompts/wizard/prompts.ko.json ADDED Viewed

@@ -0,0 +1,230 @@
+{
+  "schema_version": 1,
+  "locale": "ko",
+  "steps": {
+    "task_pick": {
+      "label": "어느 task?",
+      "echo_template": "task: {value}",
+      "options": {
+        "__new__": "Start a brand-new task",
+        "_LATEST_SUFFIX": "  (latest)"
+      }
+    },
+    "task_group": {
+      "label": "Task group 을 알려주세요 (예: backend-api, INV-1234, refactor)",
+      "echo_template": "task-group: {value}",
+      "echo_variants": {
+        "free_input": "task-group: (직접 입력)"
+      }
+    },
+    "task_group_with_suggestion": {
+      "label": "Task group? (brief 추천: {suggestion})",
+      "echo_template": "task-group: {value}",
+      "options": {
+        "__use_suggested__": "brief 값 사용: {suggestion}",
+        "__free_input__": "다른 값 입력"
+      }
+    },
+    "task_group_text": {
+      "label": "Task group 을 입력해주세요 (예: backend-api, INV-1234, refactor)",
+      "echo_template": "task-group: {value}"
+    },
+    "task_id": {
+      "label": "Task id 를 알려주세요 (예: login-error-analysis, dev-9043)",
+      "echo_template": "task-id: {value}",
+      "echo_variants": {
+        "free_input": "task-id: (직접 입력)"
+      }
+    },
+    "task_id_with_suggestion": {
+      "label": "Task id? (brief 추천: {suggestion})",
+      "echo_template": "task-id: {value}",
+      "options": {
+        "__use_suggested__": "brief 값 사용: {suggestion}",
+        "__free_input__": "다른 값 입력"
+      }
+    },
+    "task_id_text": {
+      "label": "Task id 를 입력해주세요 (예: login-error-analysis, dev-9043)",
+      "echo_template": "task-id: {value}"
+    },
+    "task_type": {
+      "label": "Task type?",
+      "echo_template": "task-type: {value}",
+      "options": {
+        "_RECOMMENDED_SUFFIX": " (recommended)"
+      }
+    },
+    "brief_keep": {
+      "label": "기존 brief 경로 [{existing_brief_path}] 를 유지할까요?",
+      "echo_template": "brief: {value}",
+      "options": {
+        "keep": "유지",
+        "change": "변경"
+      },
+      "echo_variants": {
+        "kept": "brief: {brief_path} (유지)"
+      }
+    },
+    "brief_path": {
+      "label": "task brief markdown 의 경로를 알려주세요 (project root 기준 상대경로 또는 절대경로)",
+      "echo_template": "brief: {value}"
+    },
+    "base_ref_pick": {
+      "label": "이 task worktree 의 base branch?",
+      "echo_template": "base-ref: {value}",
+      "options": {
+        "_RECOMMENDED_SUFFIX": " (recommended)",
+        "__free_input__": "직접 입력"
+      }
+    },
+    "base_ref_text": {
+      "label": "base ref 를 입력해주세요 (branch, tag, 또는 short/full SHA)",
+      "echo_template": "base-ref: {value}"
+    },
+    "approved_plan_pick": {
+      "label": "approved final-report 경로 (기본: {default})",
+      "echo_template": "approved-plan(pick): {value}",
+      "options": {
+        "__use_default__": "기본 경로 사용: {default}",
+        "__other__": "다른 경로 입력"
+      },
+      "errors": {
+        "default_not_found": "기본 approved-plan 경로를 찾을 수 없습니다. '다른 경로 입력'을 선택하세요."
+      }
+    },
+    "approved_plan": {
+      "label": "approved final-report.md 의 경로를 알려주세요 (APPROVED 마커 필수)",
+      "echo_template": "approved-plan: {value}"
+    },
+    "stage_pick": {
+      "label": "실행할 stage 를 선택하세요. auto 는 의존성이 만족된 가장 빠른 미완료 stage 를 자동으로 잡습니다.",
+      "echo_template": "stage: {value}",
+      "options": {
+        "auto": "auto (다음 미완료 stage)"
+      }
+    },
+    "directive_pick": {
+      "label": "추가 directive 가 있나요?",
+      "echo_template": "directive(pick): {value}",
+      "options": {
+        "__skip__": "없음 (건너뛰기)",
+        "__enter__": "있음 (입력)"
+      }
+    },
+    "related_tasks_pick": {
+      "label": "관련 task id 목록이 있나요?",
+      "echo_template": "related-tasks(pick): {value}",
+      "options": {
+        "__skip__": "없음 (건너뛰기)",
+        "__enter__": "있음 (입력)"
+      }
+    },
+    "clarification_pick": {
+      "label": "clarification-response 파일 경로가 있나요? (follow-up 시에만)",
+      "echo_template": "clarification(pick): {value}",
+      "options": {
+        "__skip__": "없음 (건너뛰기)",
+        "__enter__": "있음 (입력)"
+      }
+    },
+    "pr_template_pick": {
+      "label": "PR 본문 템플릿 경로를 직접 지정할까요?",
+      "echo_template": "pr-template(pick): {value}",
+      "options": {
+        "__skip__": "자동 해석 (project.json → config → 기본)",
+        "__enter__": "직접 경로 입력 (1회성 override)"
+      }
+    },
+    "executor": {
+      "label": "실행자 (executor)?",
+      "echo_template": "executor: {value}",
+      "options": {
+        "_DEFAULT_SUFFIX": " (default)"
+      }
+    },
+    "defaults_or_custom": {
+      "label": "기본 워커/모델로 진행할까요, 아니면 커스터마이즈할까요?",
+      "echo_template": "customize: {value}",
+      "options": {
+        "defaults": "Use defaults",
+        "customize": "Customize"
+      }
+    },
+    "workers_override": {
+      "label": "참여시킬 분석 워커를 선택해주세요 (최소 1개). report-writer 는 항상 포함됩니다.",
+      "echo_template": "workers: {value}",
+      "options": {
+        "_OPTIONAL_SUFFIX": " (옵션)"
+      },
+      "errors": {
+        "min_one_required": "워커를 최소 1개 선택해주세요"
+      }
+    },
+    "lead_model": {
+      "label": "리더(Claude lead) 모델?",
+      "echo_template": "lead-model: {value}"
+    },
+    "executor_model": {
+      "label": "실행자({executor}) 모델?",
+      "echo_template": "{executor}-model: {value}"
+    },
+    "claude_model": {
+      "label": "claude 워커 모델?",
+      "echo_template": "claude-model: {value}"
+    },
+    "codex_model": {
+      "label": "codex 워커 모델?",
+      "echo_template": "codex-model: {value}"
+    },
+    "gemini_model": {
+      "label": "gemini 워커 모델?",
+      "echo_template": "gemini-model: {value}"
+    },
+    "report_writer_model": {
+      "label": "리포트 작성자(report-writer) 모델?",
+      "echo_template": "report-writer-model: {value}"
+    },
+    "directive": {
+      "label": "추가 directive 가 있으면 적어주세요 (없으면 빈 줄)",
+      "echo_template": "directive: {value}"
+    },
+    "related_tasks": {
+      "label": "관련 task id 목록을 쉼표로 구분해서 적어주세요 (없으면 빈 줄)",
+      "echo_template": "related-tasks: {value}"
+    },
+    "clarification": {
+      "label": "clarification-response 파일 경로 (follow-up 시에만, 없으면 빈 줄)",
+      "echo_template": "clarification: {value}"
+    },
+    "pr_template": {
+      "label": "PR 본문 템플릿 경로 1회성 override (빈 줄이면 project.json → ~/.okstra/config.json → 스킬 디폴트 순으로 자동 해석)",
+      "echo_template": "pr-template: {value}"
+    },
+    "pr_template_scope": {
+      "label": "방금 입력한 경로를 영구 저장할까요?",
+      "echo_template": "pr-template-scope: {value}",
+      "options": {
+        "once": "이번 run 만 (1회성)",
+        "project": "프로젝트에 저장 (project scope)",
+        "global": "전역에 저장 (global scope)"
+      }
+    },
+    "confirm": {
+      "label": "이대로 진행할까요?",
+      "echo_template": "confirm: {value}",
+      "options": {
+        "proceed": "Proceed",
+        "edit": "Edit"
+      }
+    },
+    "edit_target": {
+      "label": "어느 step 으로 돌아갈까요?",
+      "echo_template": "edit-target: {value}"
+    }
+  },
+  "confirmation": {
+    "header": "선택 확인:",
+    "workers_implementation_default": "  workers       : (프로필 기본 — executor + verifier 2 + report-writer)"
+  }
+}

package/runtime/python/lib/okstra/cli.sh CHANGED Viewed

@@ -23,57 +23,10 @@ collect_required_arguments() {
   prompt_for_required_argument PROJECT_ID "Project ID"
   prompt_for_required_argument TASK_GROUP "Task Group"
   prompt_for_required_argument TASK_ID "Task ID"
-  prompt_for_required_argument ANALYSIS_TYPE "Task Type"
+  prompt_for_required_argument TASK_TYPE "Task Type"
   prompt_for_required_argument BRIEF_PATH "Task Brief Path"
 }
-confirm_execution_plan() {
-  local confirmation=""
-  local normalized_confirmation=""
-  if [[ "$ASSUME_YES" == "true" ]]; then
-    return 0
-  fi
-  if ! is_interactive_session; then
-    return 0
-  fi
-  cat >&2 <<CONFIRM_EOF
-okstra execution summary:
-  render only: ${RENDER_ONLY}
-  task type: ${ANALYSIS_TYPE}
-  project id: ${PROJECT_ID}
-  task group: ${TASK_GROUP}
-  task id: ${TASK_ID}
-  task brief path: ${BRIEF_FILE_PATH}
-  clarification response: ${CLARIFICATION_RESPONSE_FILE:-None}
-  related tasks: ${RELATED_TASKS_INLINE}
-  recommended workers: ${SELECTED_REVIEWERS}
-  lead model: ${LEAD_MODEL_DISPLAY}
-  worker models: claude=${CLAUDE_WORKER_MODEL_DISPLAY}, codex=${CODEX_WORKER_MODEL_DISPLAY}, gemini=${GEMINI_WORKER_MODEL_DISPLAY}, report-writer=${REPORT_WRITER_MODEL_DISPLAY}
-  executor (implementation only): ${EXECUTOR_OVERRIDE:-default(claude)}
-  task key input: ${TASK_KEY_INPUT:-None}
-  task key: ${TASK_KEY}
-  task root: ${TASK_ROOT}
-  run dir: ${RUN_DIR}
-  final report path: ${FINAL_REPORT_FILE}
-  final report template: ${FINAL_REPORT_TEMPLATE_FILE}
-CONFIRM_EOF
-  printf 'Continue? [y/yes]: ' >&2
-  if ! IFS= read -r confirmation; then
-    printf 'confirmation cancelled\n' >&2
-    exit 1
-  fi
-  normalized_confirmation="$(trim_whitespace "$confirmation")"
-  normalized_confirmation="$(printf '%s' "$normalized_confirmation" | tr '[:upper:]' '[:lower:]')"
-  if [[ "$normalized_confirmation" != "y" && "$normalized_confirmation" != "yes" ]]; then
-    printf 'okstra cancelled before execution\n' >&2
-    exit 1
-  fi
-}
 require_option_value() {
   local option_name="$1"
   local option_value="${2-}"
@@ -143,7 +96,7 @@ while [[ $# -gt 0 ]]; do
       shift 2
       ;;
     --task-type)
-      ANALYSIS_TYPE="$(require_option_value --task-type "${2-}")"
+      TASK_TYPE="$(require_option_value --task-type "${2-}")"
       shift 2
       ;;
     --project-id)