okstra 0.23.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/okstra CHANGED
@@ -15,6 +15,7 @@ const COMMANDS = new Map([
15
15
  ["worktree-lookup", () => import("../src/worktree-lookup.mjs").then((m) => m.run)],
16
16
  ["plan-validate", () => import("../src/plan-validate.mjs").then((m) => m.run)],
17
17
  ["render-bundle", () => import("../src/render-bundle.mjs").then((m) => m.run)],
18
+ ["wizard", () => import("../src/wizard.mjs").then((m) => m.run)],
18
19
  ]);
19
20
 
20
21
  const USAGE = `okstra — multi-agent cross-verification orchestrator for Claude Code
@@ -51,6 +52,8 @@ Introspection commands (JSON output, used by skills to avoid python heredocs):
51
52
  plan-validate Check an approved-plan file for the approval marker
52
53
  render-bundle Preview prepare_task_bundle() output (forwards to
53
54
  python3 -m okstra_ctl.run --render-only)
55
+ wizard Drive the okstra-run interactive input state machine
56
+ (init / step / render-args / confirmation)
54
57
 
55
58
  Global options:
56
59
  --version Print okstra version and exit
@@ -95,7 +95,7 @@ okstra 의 prepare 책임은 단일 python 진입점 [`okstra_ctl.run.prepare_ta
95
95
  `prepare_task_bundle` 의 두 caller:
96
96
 
97
97
  1. **`scripts/okstra.sh`**: CLI 인자를 파싱·확인하고 → `prepare_task_bundle` 호출 → `--render-only` 가 아니면 `claude --model ... --session-id ... "$PROMPT"` 를 `exec` 으로 띄움. ~160 줄의 thin wrapper.
98
- 2. **`okstra-run` skill**: 같은 claude 세션 안에서 `AskUserQuestion` 으로 인자를 모은 뒤 → `prepare_task_bundle(render_only=True)` 직접 호출 → 렌더된 lead prompt 를 현재 세션이 그대로 읽어 lead 역할 수행. 새 claude 프로세스를 띄우지 않음.
98
+ 2. **`okstra-run` skill**: 같은 claude 세션 안에서 [`okstra_ctl.wizard`](../../scripts/okstra_ctl/wizard.py) 상태머신(`okstra wizard init|step|...` CLI)을 돌려 사용자 입력을 모은 뒤 → `okstra render-bundle` (즉 `prepare_task_bundle(render_only=True)`) 호출 → 렌더된 lead prompt 를 현재 세션이 그대로 읽어 lead 역할 수행. 새 claude 프로세스를 띄우지 않음. 분기/검증/순서는 모두 wizard 가 결정하므로 skill 본문은 `Prompt.kind` 에 맞춰 `AskUserQuestion`(`pick`) 또는 평문 메시지(`text`)를 띄우는 ~30 줄짜리 루프이다.
99
99
 
100
100
  판단 정책과 worker orchestration 은 lead claude 가 담당하고, okstra 의 prepare 단계는 그 lead 가 정확한 입력 묶음과 출력 골격을 받아 일을 시작할 수 있게 정형화된 자산을 준비할 뿐입니다.
101
101
 
@@ -167,7 +167,7 @@ okstra 의 prepare 단계는 디스크 권위 + 단일 python 진입점 모델
167
167
  ├────────────────────────────────────────────────────────────────┤
168
168
  │ │
169
169
  │ scripts/okstra.sh skills/okstra-run/SKILL.md │
170
- │ (CLI: bash 인자 파싱) (current claude 세션 안 AskUserQuestion)
170
+ │ (CLI: bash 인자 파싱) (okstra_ctl.wizard 상태머신 루프)
171
171
  │ │ │ │
172
172
  │ └─────────────┬────────────────┘ │
173
173
  │ ▼ │
package/docs/kr/cli.md CHANGED
@@ -492,6 +492,7 @@ chmod +x ~/.local/bin/okstra-ctl
492
492
  | `okstra worktree-lookup <task-key>` | `worktree_registry.lookup` 결과 (예약된 path / branch / base ref / 현재 상태) |
493
493
  | `okstra plan-validate <plan-path>` | `_validate_approved_plan` — approval marker 인식 결과와 sanitization 후 diff |
494
494
  | `okstra render-bundle <args…>` | `prepare_task_bundle(render_only=True)` 의 thin shim — `python3 -m okstra_ctl.run --render-only` 와 동일 시그니처 |
495
+ | `okstra wizard <init\|step\|render-args\|confirmation> --state-file <path>` | okstra-run 인터랙티브 입력 상태머신 (`okstra_ctl.wizard`). `init` 으로 state file 을 시드한 뒤 skill 이 `step --answer <val>` 을 반복 호출하면 다음 `Prompt` JSON 을 받음. `render-args` 는 최종 `render-bundle` 인자 맵, `confirmation` 은 사용자 echo 블록을 반환 |
495
496
 
496
497
  > 모든 subcommand 는 `bin/okstra` 가 spawn 하는 python 헬퍼 (`src/_python-helper.mjs`) 가 `PYTHONPATH` 와 `~/.okstra/lib/python` 을 wire 합니다. 직접 `python3 -m okstra_ctl.*` 으로 호출하면 `PYTHONPATH` 를 사용자가 직접 셋업해야 합니다.
497
498
 
@@ -101,6 +101,7 @@ okstra/
101
101
  | `worktree-lookup.mjs` | 작업별 git worktree 경로 조회 |
102
102
  | `plan-validate.mjs` | Approved plan 파일의 approval marker 검증 |
103
103
  | `render-bundle.mjs` | `prepare_task_bundle()` 산출 미리보기 |
104
+ | `wizard.mjs` | okstra-run 인터랙티브 입력 상태머신 구동 (`init`/`step`/`render-args`/`confirmation`) |
104
105
  | `uninstall.mjs` | `~/.okstra` 및 스킬 제거 |
105
106
 
106
107
  **핵심 설계**: 모든 CLI 명령은 `okstra paths --shell` 결과를 환경변수로 로드한 뒤, Python의 단일 진입점 `okstra_ctl.run.prepare_task_bundle`을 호출한다. CLI(`okstra.sh`)와 skill(`okstra-run`)이 같은 Python 함수를 공유하는 **Rule of Two**를 강제한다.
@@ -126,6 +127,7 @@ okstra/
126
127
  | `workers.py` | Worker 모델 할당 (Claude/Codex/Gemini) |
127
128
  | `backfill.py` | 레거시 run 자동 마이그레이션 |
128
129
  | `models.py` | `ModelAssignment`, `PrepareInputs`, `PrepareOutputs` 데이터클래스 |
130
+ | `wizard.py` | `WizardState`, `Prompt`, `STEPS` — okstra-run 입력 수집 상태머신 (단일 권위) |
129
131
 
130
132
  #### 3.2.2 `scripts/okstra_project/` — 프로젝트 메타 리졸버
131
133
 
@@ -242,7 +244,7 @@ okstra/
242
244
 
243
245
  ### 3.8 `bin/okstra` — Node.js CLI
244
246
 
245
- 12개 명령(`paths`, `install`, `ensure-installed`, `uninstall`, `doctor`, `setup`, `check-project`, `task-list`, `task-show`, `worktree-lookup`, `plan-validate`, `render-bundle`) 라우터. 명령 파싱 → `src/<cmd>.mjs` 동적 로드 → `run(args)` 호출 → exit code 반환.
247
+ 13개 명령(`paths`, `install`, `ensure-installed`, `uninstall`, `doctor`, `setup`, `check-project`, `task-list`, `task-show`, `worktree-lookup`, `plan-validate`, `render-bundle`, `wizard`) 라우터. 명령 파싱 → `src/<cmd>.mjs` 동적 로드 → `run(args)` 호출 → exit code 반환.
246
248
 
247
249
  ---
248
250
 
@@ -306,6 +308,7 @@ okstra/
306
308
  | 프로젝트 | `setup.mjs`, `check-project.mjs` |
307
309
  | Task 조회 | `task-list.mjs`, `task-show.mjs` |
308
310
  | 실행 보조 | `worktree-lookup.mjs`, `plan-validate.mjs`, `render-bundle.mjs` |
311
+ | 스킬 구동 | `wizard.mjs` |
309
312
 
310
313
  ---
311
314
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "okstra",
3
- "version": "0.23.0",
3
+ "version": "0.25.0",
4
4
  "description": "Multi-agent cross-verification orchestrator runtime + Claude Code skills.",
5
5
  "license": "MIT",
6
6
  "author": "devonshin",
@@ -1,5 +1,5 @@
1
1
  {
2
- "package": "0.23.0",
3
- "builtAt": "2026-05-15T01:56:19.363Z",
2
+ "package": "0.25.0",
3
+ "builtAt": "2026-05-15T07:26:25.848Z",
4
4
  "repoRoot": "/home/runner/work/okstra/okstra"
5
5
  }
@@ -57,7 +57,7 @@ Unlike the Codex / Gemini workers, you are an in-process Claude subagent — you
57
57
  Before producing any output, you MUST read every input file enumerated in the `[Required reading]` block of the lead's prompt from the very first character to the very last character. This includes the task brief, analysis profile, analysis material (if present), reference expectations, the carry-in clarification response (if present), and the final report template.
58
58
 
59
59
  - Use a single `Read` call per file with no `offset` and no `limit`. If a file is genuinely too large for one read, page through it with explicit `offset` / `limit` calls that together cover the entire file, and record the page boundaries in your Findings.
60
- - For the carry-in clarification response, walk every row of sub-section 5.1 (`A1`, `A2`, ... material requests) and sub-section 5.2 (`Q1`, `Q2`, ... user questions) in full, even when the answer column is blank. Skimming these rows is the most common failure mode here; do not repeat it just because the file you will eventually contribute to has a structurally similar Section 5.
60
+ - For the carry-in clarification response, walk every row of `## 5. Clarification Items` (`C-001`, `C-002`, ...) in full, including rows whose `User input` cell is blank a blank `User input` with `Status=open` is itself a signal you must surface, not skip. Skimming these rows is the most common failure mode here; the fact that the file you will eventually contribute to has a structurally similar section 5 is NOT a license to skim. If the prior report uses the deprecated `4.5.9 Open Questions` / `5.1` / `5.2` layout with `OQ-*` / `A*` / `Q*` IDs, walk all three blocks the same way (legacy carry-in transitional rule).
61
61
  - Before listing any Findings, state one sentence per input file confirming you read it end-to-end (e.g. "Read task-brief.md end-to-end (147 lines)."). If you cannot truthfully say this for a file, record a `tool-failure` in the errors sidecar instead of fabricating Findings.
62
62
  - Do not skip a file because its name suggests its content is already familiar from a prior run. Each file is canonical for the current run only.
63
63
 
@@ -125,7 +125,7 @@ This wrapper does NOT invoke MCP tools directly. MCP availability inside the Cod
125
125
  Before producing any output, you MUST ensure the underlying Codex CLI run reads every input file enumerated in the `[Required reading]` block of the lead's prompt from the very first character to the very last character. This includes the task brief, analysis profile, analysis material (if present), reference expectations, the carry-in clarification response (if present), and the final report template.
126
126
 
127
127
  - The lead's prompt body, which you persist verbatim and feed into Codex via stdin, already contains the explicit list of files and the end-to-end reading rule. Do not strip or summarize that block before passing it to the CLI.
128
- - For the carry-in clarification response, the CLI must walk every row of sub-section 5.1 (`A1`, `A2`, ... material requests) and sub-section 5.2 (`Q1`, `Q2`, ... user questions) in full, even when the answer column is blank. The fact that the prior run's final report and the upcoming output share Section 5 structure is NOT a license to skim.
128
+ - For the carry-in clarification response, the CLI must walk every row of `## 5. Clarification Items` (`C-001`, `C-002`, ...) in full, including rows whose `User input` cell is blank a blank `User input` with `Status=open` is itself a signal you must surface. The fact that the prior run's final report and the upcoming output share section 5 structure is NOT a license to skim. If the prior report uses the deprecated `4.5.9 Open Questions` / `5.1` / `5.2` layout with `OQ-*` / `A*` / `Q*` IDs, walk all three blocks the same way (legacy carry-in transitional rule).
129
129
  - The Codex output you return MUST begin with one sentence per input file confirming end-to-end reading (e.g. "Read task-brief.md end-to-end (147 lines)."). If any file was skipped, record a `tool-failure` in the errors sidecar instead of fabricating Findings.
130
130
 
131
131
  ## Worker Output Structure
@@ -125,7 +125,7 @@ This wrapper does NOT invoke MCP tools directly. MCP availability inside the Gem
125
125
  Before producing any output, you MUST ensure the underlying Gemini CLI run reads every input file enumerated in the `[Required reading]` block of the lead's prompt from the very first character to the very last character. This includes the task brief, analysis profile, analysis material (if present), reference expectations, the carry-in clarification response (if present), and the final report template.
126
126
 
127
127
  - The lead's prompt body, which you persist verbatim and feed into Gemini via stdin, already contains the explicit list of files and the end-to-end reading rule. Do not strip or summarize that block before passing it to the CLI.
128
- - For the carry-in clarification response, the CLI must walk every row of sub-section 5.1 (`A1`, `A2`, ... material requests) and sub-section 5.2 (`Q1`, `Q2`, ... user questions) in full, even when the answer column is blank. The structural similarity between the prior final report and the upcoming output is the most common reason this step gets skipped — do not repeat that.
128
+ - For the carry-in clarification response, the CLI must walk every row of `## 5. Clarification Items` (`C-001`, `C-002`, ...) in full, including rows whose `User input` cell is blank a blank `User input` with `Status=open` is itself a signal you must surface. The structural similarity between the prior final report and the upcoming output is the most common reason this step gets skipped — do not repeat that. If the prior report uses the deprecated `4.5.9 Open Questions` / `5.1` / `5.2` layout with `OQ-*` / `A*` / `Q*` IDs, walk all three blocks the same way (legacy carry-in transitional rule).
129
129
  - The Gemini output you return MUST begin with one sentence per input file confirming end-to-end reading (e.g. "Read task-brief.md end-to-end (147 lines)."). If any file was skipped, record a `tool-failure` in the errors sidecar instead of fabricating Findings.
130
130
 
131
131
  ## Worker Output Structure
@@ -46,7 +46,7 @@ If you find yourself thinking "I'll just return the report inline and let lead s
46
46
  Before writing the final report, you MUST read every input file enumerated in the `[Required reading]` block of the lead's prompt from the very first character to the very last character. This always includes `final-report-template.md` and every analysis worker's result file under `worker-results/`, plus the convergence output under `state/convergence-<task-type>-<seq>.json` (if present).
47
47
 
48
48
  - Use a single `Read` call per file with no `offset` and no `limit`. If a file is too large for one read, page through it with explicit `offset` / `limit` calls covering the full file.
49
- - For the carry-in `clarification-response.md` (if present), walk every row of sub-section 5.1 (`A1`, `A2`, ...) and 5.2 (`Q1`, `Q2`, ...) including blank-answer rows. The fact that the file you write has a structurally similar Section 5/6 is NOT an excuse to skim.
49
+ - For the carry-in `clarification-response.md` (if present), walk every row of `## 5. Clarification Items` (`C-001`, `C-002`, ...) including rows whose `User input` cell is blank — a blank cell with `Status=open` is itself a signal you must surface in section 0. The fact that the file you write has a structurally similar section 5 is NOT an excuse to skim. If the prior report uses the deprecated `4.5.9 Open Questions` / `5.1` / `5.2` layout with `OQ-*` / `A*` / `Q*` IDs, walk all three blocks the same way (legacy carry-in transitional rule).
50
50
  - Open every analysis-worker result file under `worker-results/` end-to-end. Do not summarize them from convergence output alone — convergence captures classifications, not full evidence.
51
51
  - Before writing, state one sentence per input file confirming end-to-end reading. If you cannot truthfully say this for a file, record a `tool-failure` in the errors sidecar instead of fabricating the report.
52
52
  - When the convergence-state file is present, read it fully and reproduce the `roundHistory[]` array, `round2SkippedReason`, and `finalClassificationCounts` in the final report's Section 1 Round History sub-table. Do not derive these values from worker results alone — they live in `state/convergence-<task-type>-<seq>.json`.
@@ -61,7 +61,7 @@ Hard rules:
61
61
  - Include all four convergence categories (Full Consensus, Partial Consensus, Contested, Worker-Unique). Do not omit Contested or Worker-Unique findings.
62
62
  - Include a Round History sub-table in Section 1 (one row per executed round) and a `round2SkippedReason` line below it. When convergence is disabled, omit both. The values are quoted verbatim from `state/convergence-<task-type>-<seq>.json` — do not recompute.
63
63
  - Treat `verification-error` votes as their own verdict. They are listed in vote summaries as `verification-error`, not folded into AGREE/DISAGREE counts.
64
- - Include the per-agent execution status table and the token-usage summary section. All numbers come from `team-state-<task-type>-<seq>.json` (populated by `okstra-token-usage.py` at the start of Phase 7). Do not estimate or invent.
64
+ - Include the per-agent execution status table and the token-usage summary section. **Leave the 10 token-related `{{...}}` placeholders verbatim** (`{{LEAD_TOTAL_TOKENS}}`, `{{LEAD_BILLABLE_TOKENS}}`, `{{LEAD_COST_USD}}`, `{{WORKER_TOTAL_TOKENS}}`, `{{WORKER_BILLABLE_TOKENS}}`, `{{WORKER_COST_USD}}`, `{{GRAND_TOTAL_TOKENS}}`, `{{GRAND_BILLABLE_TOKENS}}`, `{{GRAND_COST_USD}}`, `{{CLI_COST_USD}}`). You run in Phase 6, but `team-state-<task-type>-<seq>.json` is populated by `okstra-token-usage.py` at the start of Phase 7 and the same Phase 7 invocation substitutes the placeholders via `--substitute-final-report`. Never replace these cells with `not-collected`, `N/A`, `--`, `0`, or any other sentinel — doing so deletes the substitution target and the report ships with no token numbers. Likewise do NOT append a note like "Phase 7 has not run yet"; that statement is unfalsifiable at write-time and is wrong by the time the report is shipped.
65
65
  - If only one analysis worker produced a usable result, perform a reduced-confidence write-up and say so explicitly.
66
66
  - If evidence is missing, write `I don't know` rather than fabricating confidence.
67
67
  - Cite file paths and line numbers for every code-evidence claim.
@@ -85,4 +85,4 @@ Invoke the `okstra` skill now. Read the manifests below for all task metadata, p
85
85
 
86
86
  - Source path: `{{CLARIFICATION_RESPONSE_RELATIVE_PATH}}`
87
87
  - If the source path above is empty, no prior clarification response was attached to this run.
88
- - If the source path is set, a copy is staged at `{{INSTRUCTION_SET_RELATIVE_PATH}}/clarification-response.md`. Read it before running workers; reconcile each `Q*` row in section 5 of the prior report against new evidence and record the outcome in section 0 of this run's final report.
88
+ - If the source path is set, a copy is staged at `{{INSTRUCTION_SET_RELATIVE_PATH}}/clarification-response.md`. Read it before running workers; reconcile each `C-*` row in section 5 (`## 5. Clarification Items`) of the prior report against new evidence and record the outcome in section 0 of this run's final report. If the prior report uses the deprecated `4.5.9 Open Questions` / `5.1` / `5.2` layout with `OQ-*` / `A*` / `Q*` IDs, follow the legacy-carry-in mapping rule in `final-report-template.md` section 0.
@@ -15,7 +15,7 @@ profile document.
15
15
  - the read-only MCP servers declared in the task brief's `## Available MCP Servers` section may be queried as a read-only cross-check; that section is the canonical source of which servers and tools exist for this run, and any MCP-derived finding MUST cite server, table, and the SELECT used. MCP MUST NEVER be used as a write path — schema/data mutations go through repository migration files reviewed by humans.
16
16
  - Authority & permissions assumption (HARD RULE — applies to every okstra task-type):
17
17
  - **Assume the user (and their team) holds full authority and every permission required for the anticipated, in-flight, or follow-up work.** Treat external approvals, third-party access grants, role/IAM permissions, organisational sign-off, legal/compliance review, vendor coordination, and "verify access exists" steps as already satisfied unless the task brief explicitly states otherwise.
18
- - Do NOT add such items to routing decisions, missing-materials lists, clarification questions, option trade-offs, dependency/migration risk, validation checklists, rollout plans, acceptance blockers, residual risks, release recommendations, `Open Questions`, or any day/effort estimate. They are not legitimate sources of schedule extension.
18
+ - Do NOT add such items to routing decisions, missing-materials lists, clarification questions, option trade-offs, dependency/migration risk, validation checklists, rollout plans, acceptance blockers, residual risks, release recommendations, the `## 5. Clarification Items` table, or any day/effort estimate. They are not legitimate sources of schedule extension.
19
19
  - Internal okstra phase handoffs (e.g. the `User Approval Request` block in `implementation-planning`) are unaffected — those are the user themselves approving and proceed without external coordination.
20
20
  - This rule does NOT relax any phase-specific Forbidden actions list; safety rules in the per-profile document remain in force regardless of the user's authority.
21
21
  - Anti-escalation rule (shared):
@@ -32,8 +32,11 @@ profile document.
32
32
  - On `아니오` / `n` / `keep` → leave the panes intact; remind the user that they will be cleaned up automatically when Claude `/exit` fires the `SessionEnd` hook.
33
33
  - The question MUST be a clean yes/no — do NOT offer "close some / keep some" partial answers, do NOT propose alternatives like "close only codex panes". The whole-set decision keeps the wrap-up predictable.
34
34
  - This step is mandatory for every phase (`requirements-discovery`, `error-analysis`, `implementation-planning`, `implementation`, `final-verification`, `release-handoff`). It is silent-skipped when `$TMUX_PANE` is unset (lead running outside tmux); the lead MUST NOT fabricate a synthetic pane list in that case.
35
- - Clarification request policy (shared — applies whenever a profile uses `## 5. Clarification Requests for the Next Run`):
36
- - section 5 MUST be split into two distinct sub-sections per `final-report-template.md` `5.1 추가 자료 요청 (Additional Materials Requested)` for files/logs/screenshots/links the user must attach, and `5.2 사용자 확인 질문 (Questions for the User)` for decisions or facts only the user can confirm. Never mix material requests and decision questions in the same row or list.
37
- - write every entry in full, descriptive sentences that a non-developer can act on without further context. Avoid abbreviations and internal jargon. For each material request, state *why* it is needed, *where* the user can find it, and *where* to place it. For each question, state *why* the answer changes the next step, *what* is being asked in a complete sentence, and *what shape of answer* is expected (예/아니오, 보기 하나, 숫자/날짜, 짧은 서술 등); supply concrete option choices when applicable.
35
+ - Clarification request policy (shared — applies whenever a profile uses `## 5. Clarification Items`):
36
+ - section 5 is a **single unified table** per `final-report-template.md`. Every clarification item whether the user must attach a file, choose between options, or supply a single number/path is one row of that table. Do not split it into sub-sections, do not create a parallel table elsewhere in the report, and do not duplicate the same item into `## 4.5.8 User Approval Request` or any other section.
37
+ - each row's `Kind` column picks one of `{material, decision, data-point}`: `material` for files / snapshots / logs / screenshots the user must attach (the `User input` cell will hold a path or URL); `decision` for choices and yes/no confirmations only the user can make; `data-point` for a single number, ID, date, or short string the user can answer inline. Items that mix "yes/no + file path if yes" are one row of `Kind=material` with the combined expectation written into `Expected form`.
38
+ - each row's `Blocks` column picks one of `{approval, next-phase, none}`. `approval` is reserved for items that gate the `implementation-planning` User Approval Request — never use `approval` outside that task-type. `next-phase` blocks the next run from starting cleanly. `none` is informational/audit-only.
39
+ - write every entry in full, descriptive sentences that a non-developer can act on without further context. Avoid abbreviations and internal jargon. The `Statement` cell must state *what* is needed, *why* the answer / attachment changes the next step, and (for `material`) *where* the user can find it and *where* to place it. The `Expected form` cell must state the shape of the answer (예/아니오, 보기 중 하나, 숫자/날짜, 파일 경로, 짧은 서술 등); supply concrete option choices when applicable.
38
40
  - the same `final-report.md` file is the canonical artifact carried into the next run; the user appends answers inline before rerunning. The preferred turn-around is `scripts/okstra.sh --resume-clarification --task-key <project-id>:<task-group>:<task-id>` (opens the latest report in `$EDITOR`, then auto-reruns the same phase with `--clarification-response` carry-in). The lower-level form `--clarification-response <path>` remains available for scripted runs.
39
- - if a clarification response was carried in for this run, reconcile each prior `A*` (material) and `Q*` (question) row in section 0 and update its `Status` (`resolved`, `obsolete`) before issuing the next decision/verdict.
41
+ - if a clarification response was carried in for this run, walk every `C-*` row of the prior report's `## 5. Clarification Items` table in section 0 of this report, reconcile each one against new evidence, and update its `Status` to `resolved` or `obsolete` before issuing the next decision/verdict.
42
+ - transitional rule for legacy carry-in (one release cycle): if the prior report uses the deprecated `4.5.9 Open Questions` / `5.1 Additional Materials` / `5.2 Questions for the User` layout, follow the mapping described in section 0 of `final-report-template.md` — collapse `OQ-*` / `A*` / `Q*` into `C-*` rows in this run's new section 5 (legacy ID preserved in the Statement for traceability).
@@ -20,7 +20,7 @@
20
20
  - uncertainty boundaries
21
21
  - practical next diagnostic steps
22
22
  - Clarification request policy (phase-specific addenda — shared policy is in `_common-contract.md`):
23
- - if any blocking uncertainty remains at the time of writing the final report, populate `## 5. Clarification Requests for the Next Run` in `final-report-template.md`
23
+ - if any blocking uncertainty remains at the time of writing the final report, populate `## 5. Clarification Items` in `final-report-template.md` (a single unified table; `Blocks=next-phase` for items the next run cannot start without)
24
24
  - prefer plain Korean over abbreviations (e.g. write "초당 평균 요청 수" instead of "QPS", "재현 절차" instead of "repro")
25
25
  - Non-goals:
26
26
  - implementation details unless they are necessary to validate the cause
@@ -34,7 +34,7 @@
34
34
  - **Two-tier command lookup (shared with `implementation`):** when this phase performs its own independent re-validation, the command source is exactly the same two tiers `implementation` verifiers use — Tier 1 is the originating task brief / approved plan's `validation` set, Tier 2 is `<PROJECT_ROOT>/.project-docs/okstra/project.json` under `qaCommands`. Auto-detecting tools from manifest files is forbidden; missing tiers are recorded as `qa-command not configured: <category>` and do NOT trigger a guess. The `cmd` deny-list (`--fix`, `--write`, ` -w`, ` -u`, `--snapshot-update`, `INSTA_UPDATE=<not-no>`, `cargo update`, `npm install` without `ci`, etc.) is enforced identically. NOTE: runtime fail-fast validation (`okstra_ctl.qa_commands.validate_qa_commands`) only fires at `--task-type implementation` run-prep, so this phase MUST self-check each `qaCommands` entry against the deny-list before executing it — if a denied token is present, skip the command and record it as a `Read-only command log` line `qa-command rejected (denied token: <token>): <label>`.
35
35
  - **Routing recommendation**: brief note on the next safe phase (`done`, `error-analysis`, `implementation-planning`) tied to the verdict and blocker list.
36
36
  - Clarification request policy (phase-specific addendum — shared policy is in `_common-contract.md`):
37
- - populate section 5 only when a blocker hinges on information only the user can supply (deployment intent, intended target environment, business-rule interpretation)
37
+ - populate `## 5. Clarification Items` only when a blocker hinges on information only the user can supply (deployment intent, intended target environment, business-rule interpretation); use `Blocks=next-phase` for items that gate continuing to release-handoff
38
38
  - Self-review pass before finalising the report (`Claude lead` runs this; do not delegate to a generic subagent):
39
39
  1. **Verdict precision** — section 2 includes `Verdict Token` with one of the three allowed verdict tokens; `conditional-accept` lists every condition as an actionable item.
40
40
  2. **Blocker traceability** — every blocker cites a concrete artifact (file:line, log excerpt, test exit code, MCP SELECT). Blockers without evidence are demoted to residual risk or removed.
@@ -12,7 +12,7 @@
12
12
  - read the task brief, related-task briefs, and any cited spec / design doc end-to-end
13
13
  - inspect the current state of every file the task names (or the closest matching files if names are stale) — record current responsibilities, public interfaces, and known coupling points
14
14
  - skim recent commits touching those files (`git log -- <path>`) to surface in-flight work or contested areas
15
- - flag any requirement that is ambiguous, contradictory, or missing success criteria — list these in the report's `Open Questions` block instead of guessing
15
+ - flag any requirement that is ambiguous, contradictory, or missing success criteria — register each one as a row in the report's `## 5. Clarification Items` table with `Blocks=approval` instead of guessing
16
16
  - Primary focus areas:
17
17
  - requirement gaps
18
18
  - affected components and boundaries
@@ -57,7 +57,7 @@
57
57
  - validation checklist (pre / mid / post) — each item is an exact command or observable outcome
58
58
  - rollback strategy — exact revert path (commits, flags, migrations) and the signal that triggers rollback
59
59
  - explicit `User Approval Request (사용자 승인 게이트)` block placed at the **top of the report** with a single canonical checkbox marker `- [ ] Approved` (user toggles to `- [x] Approved` to authorise the next `implementation` run). Section `4.5.8` is retained only as a back-pointer to this top block for validator/key-substring compatibility — it must NOT carry an independent marker.
60
- - `Open Questions` block listing every ambiguity flagged during pre-planning that the user must resolve before approval
60
+ - every ambiguity flagged during pre-planning that the user must resolve before approval registered as a `Blocks=approval` row in the `## 5. Clarification Items` table (do NOT create a separate `Open Questions` block under `4.5.x` — the unified table is the single home)
61
61
  - No-placeholder rule (plan failures — reject any option or step that contains these):
62
62
  - "TBD", "TODO", "implement later", "fill in details", "add appropriate error handling", "handle edge cases", "write tests for the above" without actual test code
63
63
  - "similar to Option/Task N" without repeating the concrete content (readers may consume sections out of order)
@@ -67,5 +67,5 @@
67
67
  1. **Spec coverage** — for every requirement in the task brief, point to the option(s) and step(s) that satisfy it. List gaps explicitly.
68
68
  2. **Placeholder scan** — search the report for the patterns in the No-placeholder rule above and fix inline.
69
69
  3. **Internal consistency** — option file lists, trade-off matrix, and recommended step list must agree on file paths, names, and signatures. A symbol called `clearLayers()` in the matrix and `clearFullLayers()` in the steps is a bug.
70
- 4. **Ambiguity check** — any requirement that could be read two ways must be made explicit or moved to `Open Questions`.
70
+ 4. **Ambiguity check** — any requirement that could be read two ways must be made explicit or moved to the `## 5. Clarification Items` table as a `Blocks=approval` row.
71
71
  5. **Scope check** — if the recommended plan now spans multiple independent subsystems, recommend splitting into separate planning runs rather than shipping an oversized plan.
@@ -128,7 +128,7 @@
128
128
  - **Feature-flag-gated changes**: confirm the off-switch path was exercised in this run's validation evidence (i.e. one of the validation commands ran with the flag off and succeeded). A plan that ships a flag without exercising the off-path does NOT satisfy this requirement.
129
129
  - **Schema migrations, config-format changes, or any change with persisted state**: a **dry-run of the rollback step is mandatory**, not preferred. Record the exact rollback command and its captured exit code / stdout. If the migration tool offers no dry-run mode (`--dry-run`, `--plan`, equivalent), the executor MUST refuse to claim rollback verification and instead end the run with a routing recommendation back to `implementation-planning` for a safer rollback strategy. Skipping this step on a stateful change is treated as a `contract-violated` outcome by `final-verification`.
130
130
  - **Routing recommendation for `final-verification`**: brief note on whether the changes are ready for final-verification phase or need a new error-analysis / planning loop first.
131
- - **Follow-up tasks (Section 7 of the final report)**: every item discovered during this run that was *not* delivered MUST appear in the final report's `## 7. Follow-up Tasks (후속 작업)` table with a concrete `Origin`, `New Task ID`, `Suggested task-type`, `Scope`, and `Reason / Why deferred`. Sources include: out-of-scope discoveries that the executor consciously chose not to fold into this run, verifier concerns the executor declined to fix in-place, scope-boundary items from the approved plan that turned out to need their own ticket, and any open question carried over from `4.5.9`. An empty section is acceptable but only when expressed as the single line `- 후속 작업 없음.` — silence is treated as a contract violation. Rows with `Auto-spawn? = yes` will be materialised by `scripts/okstra-spawn-followups.py` in Phase 7; rows with `Auto-spawn? = no` MUST also appear in `Section 6. Recommended Next Steps` so the user knows to act manually.
131
+ - **Follow-up tasks (Section 7 of the final report)**: every item discovered during this run that was *not* delivered MUST appear in the final report's `## 7. Follow-up Tasks (후속 작업)` table with a concrete `Origin`, `New Task ID`, `Suggested task-type`, `Scope`, and `Reason / Why deferred`. Sources include: out-of-scope discoveries that the executor consciously chose not to fold into this run, verifier concerns the executor declined to fix in-place, scope-boundary items from the approved plan that turned out to need their own ticket, and any unresolved `## 5. Clarification Items` row carried over from the approved plan (`Status` ∈ `{open, answered}` at approval time). An empty section is acceptable but only when expressed as the single line `- 후속 작업 없음.` — silence is treated as a contract violation. Rows with `Auto-spawn? = yes` will be materialised by `scripts/okstra-spawn-followups.py` in Phase 7; rows with `Auto-spawn? = no` MUST also appear in `Section 6. Recommended Next Steps` so the user knows to act manually.
132
132
  - Self-review pass before finalising the report (`Claude lead` runs this; do not delegate to a generic subagent):
133
133
  1. **Plan coverage** — every step in the approved plan's recommended option must point to a commit (or an explicit `Skipped: <reason>` entry). List gaps.
134
134
  2. **Evidence completeness** — every `Validation evidence` and `TDD evidence` claim has the actual command line and exit code? No paraphrased "tests pass" without output?
@@ -19,7 +19,7 @@
19
19
  - uncertainty boundaries and missing inputs
20
20
  - next recommended phase and safe resume guidance
21
21
  - Clarification request policy (phase-specific addenda — shared policy is in `_common-contract.md`):
22
- - if any blocking input is missing at the time of writing the final report, populate `## 5. Clarification Requests for the Next Run` in `final-report-template.md`
22
+ - if any blocking input is missing at the time of writing the final report, populate `## 5. Clarification Items` in `final-report-template.md` (a single unified table; `Blocks=next-phase` for items the next run cannot start without)
23
23
  - prefer concrete questions whose answers map directly to a routing decision (`bugfix` vs `feature`, `error-analysis` vs `implementation-planning`, etc.). State each option in plain language with one sentence describing what choosing it would mean for the next phase.
24
24
  - Non-goals:
25
25
  - full implementation design unless it is required to decide the next phase
@@ -0,0 +1,190 @@
1
+ """Parse the ``## 5. Clarification Items`` table from a final-report markdown.
2
+
3
+ The unified §5 table (introduced when §4.5.9 / §5.1 / §5.2 collapsed into a
4
+ single section) is the canonical home for every clarification an
5
+ implementation-planning run owes the user — decisions, file attachments,
6
+ single data points. Each row carries a ``Blocks`` column whose value picks
7
+ one of ``{approval, next-phase, none}``. Rows with ``Blocks=approval`` are
8
+ the approval gate: they MUST resolve before the user marks the report
9
+ ``Approved`` and starts the next ``implementation`` run.
10
+
11
+ This module exposes one read function for that gate so both
12
+ ``_validate_approved_plan`` (pre-implementation run-prep) and any later
13
+ validator can share the same parsing logic.
14
+
15
+ Legacy compatibility: reports written before the §5 unification used
16
+ ``4.5.9 Open Questions`` + ``5.1 Additional Materials`` + ``5.2 Questions
17
+ for the User`` and lacked a ``Blocks`` column. Those reports cannot be
18
+ gate-checked by Blocks; the parser returns ``None`` to signal "schema
19
+ absent, skip check" rather than fabricating a verdict.
20
+ """
21
+ from __future__ import annotations
22
+
23
+ import re
24
+ from dataclasses import dataclass
25
+ from pathlib import Path
26
+ from typing import Optional
27
+
28
+
29
+ SECTION_HEADING_PATTERN = re.compile(r"^##\s+5\.\s+Clarification Items\s*$", re.MULTILINE)
30
+ NEXT_TOP_LEVEL_HEADING_PATTERN = re.compile(r"^##\s+(?!5\.)", re.MULTILINE)
31
+
32
+
33
+ @dataclass(frozen=True)
34
+ class ClarificationItem:
35
+ """One row of the §5 table.
36
+
37
+ ``raw_*`` fields preserve the exact cell text (after backtick stripping)
38
+ for diagnostics; canonical lowercased versions live in ``blocks`` /
39
+ ``status`` for predicate use.
40
+ """
41
+ row_id: str
42
+ kind: str # "material" | "decision" | "data-point" | other
43
+ blocks: str # canonical lowercase: "approval" | "next-phase" | "none" | other
44
+ status: str # canonical lowercase: "open" | "answered" | "resolved" | "obsolete" | other
45
+ raw_blocks: str
46
+ raw_status: str
47
+
48
+
49
+ def _strip_backticks(cell: str) -> str:
50
+ s = cell.strip()
51
+ if s.startswith("`") and s.endswith("`") and len(s) >= 2:
52
+ s = s[1:-1].strip()
53
+ return s
54
+
55
+
56
+ def _split_pipe_row(line: str) -> list[str]:
57
+ """Split a markdown pipe-table row into cells (strip outer pipes + cell whitespace)."""
58
+ stripped = line.strip()
59
+ if stripped.startswith("|"):
60
+ stripped = stripped[1:]
61
+ if stripped.endswith("|"):
62
+ stripped = stripped[:-1]
63
+ return [_strip_backticks(c) for c in stripped.split("|")]
64
+
65
+
66
+ def _is_separator_row(line: str) -> bool:
67
+ """Detect ``|---|---|---|`` divider lines that separate header from body."""
68
+ stripped = line.strip()
69
+ if not stripped.startswith("|"):
70
+ return False
71
+ inner = stripped.strip("|").strip()
72
+ # A separator cell is dashes optionally framed by colons; anything else
73
+ # (real text) means this is a data row, not a separator.
74
+ for cell in inner.split("|"):
75
+ if not re.fullmatch(r"\s*:?-{2,}:?\s*", cell):
76
+ return False
77
+ return True
78
+
79
+
80
+ def _section_5_slice(report_text: str) -> Optional[str]:
81
+ """Return the substring spanning the §5 section (heading exclusive of the
82
+ next ``##`` heading), or None if §5 is absent."""
83
+ start_match = SECTION_HEADING_PATTERN.search(report_text)
84
+ if not start_match:
85
+ return None
86
+ rest = report_text[start_match.end():]
87
+ end_match = NEXT_TOP_LEVEL_HEADING_PATTERN.search(rest)
88
+ return rest[: end_match.start()] if end_match else rest
89
+
90
+
91
+ def parse_clarification_items(report_text: str) -> Optional[list[ClarificationItem]]:
92
+ """Return the list of §5 rows. ``None`` means "no unified §5 table
93
+ detected" (legacy report or missing section) — caller must NOT treat
94
+ that as "table is empty".
95
+
96
+ An empty list ``[]`` means "table exists but has no data rows" (e.g.,
97
+ just the ``- 추가 정보 요청 없음.`` placeholder); that IS a confident
98
+ "no approval-blocking items".
99
+ """
100
+ section = _section_5_slice(report_text)
101
+ if section is None:
102
+ return None
103
+
104
+ lines = section.splitlines()
105
+ # Locate the header row containing both `Blocks` and `Status` columns.
106
+ # We scan for the first table-shaped header line that includes both
107
+ # column names (case-insensitive); anything else (legacy 5.1 / 5.2
108
+ # tables, intro tables) is rejected.
109
+ header_idx = -1
110
+ headers: list[str] = []
111
+ for idx, line in enumerate(lines):
112
+ if not line.lstrip().startswith("|"):
113
+ continue
114
+ cells = [c.lower() for c in _split_pipe_row(line)]
115
+ if "blocks" in cells and "status" in cells:
116
+ header_idx = idx
117
+ headers = cells
118
+ break
119
+ if header_idx < 0:
120
+ # Section heading present but no Blocks/Status header — legacy
121
+ # layout or schema not yet adopted.
122
+ return None
123
+
124
+ blocks_col = headers.index("blocks")
125
+ status_col = headers.index("status")
126
+ id_col = headers.index("id") if "id" in headers else 0
127
+ kind_col = headers.index("kind") if "kind" in headers else None
128
+
129
+ items: list[ClarificationItem] = []
130
+ body_started = False
131
+ for line in lines[header_idx + 1:]:
132
+ if not line.lstrip().startswith("|"):
133
+ if body_started:
134
+ break
135
+ continue
136
+ if _is_separator_row(line):
137
+ body_started = True
138
+ continue
139
+ if not body_started:
140
+ # A second header (unlikely) or a malformed row before the
141
+ # separator — skip.
142
+ continue
143
+ cells = _split_pipe_row(line)
144
+ if max(blocks_col, status_col, id_col) >= len(cells):
145
+ # malformed row; skip rather than crash
146
+ continue
147
+ raw_blocks = cells[blocks_col]
148
+ raw_status = cells[status_col]
149
+ row_id = cells[id_col]
150
+ kind = cells[kind_col] if kind_col is not None and kind_col < len(cells) else ""
151
+ # Treat template-placeholder rows (e.g. literal ``C-001`` with
152
+ # angle-bracket sample text) as still-real rows but caller can
153
+ # filter on raw_id == "C-001" if needed.
154
+ items.append(
155
+ ClarificationItem(
156
+ row_id=row_id,
157
+ kind=kind.lower(),
158
+ blocks=raw_blocks.lower(),
159
+ status=raw_status.lower(),
160
+ raw_blocks=raw_blocks,
161
+ raw_status=raw_status,
162
+ )
163
+ )
164
+ return items
165
+
166
+
167
+ UNRESOLVED_STATUSES = {"open", "answered"}
168
+
169
+
170
+ def unresolved_approval_blockers(report_text: str) -> Optional[list[ClarificationItem]]:
171
+ """Return rows that gate the User Approval Request — ``Blocks=approval``
172
+ AND ``Status`` in ``{open, answered}``.
173
+
174
+ ``None`` propagates the "schema absent" signal from
175
+ ``parse_clarification_items``: caller may decide to soft-pass legacy
176
+ reports and only enforce on the new-format §5.
177
+ """
178
+ items = parse_clarification_items(report_text)
179
+ if items is None:
180
+ return None
181
+ return [
182
+ it for it in items
183
+ if it.blocks == "approval" and it.status in UNRESOLVED_STATUSES
184
+ ]
185
+
186
+
187
+ def unresolved_approval_blockers_in_file(path: Path) -> Optional[list[ClarificationItem]]:
188
+ return unresolved_approval_blockers(
189
+ Path(path).read_text(encoding="utf-8", errors="replace")
190
+ )
@@ -24,6 +24,7 @@ from datetime import datetime, timezone
24
24
  from pathlib import Path
25
25
 
26
26
  from okstra_project import upsert_project_json
27
+ from .clarification_items import unresolved_approval_blockers
27
28
  from .qa_commands import format_errors as _format_qa_errors, validate_qa_commands
28
29
  from .material import (
29
30
  build_analysis_material,
@@ -131,7 +132,8 @@ def _validate_approved_plan(path: str) -> None:
131
132
  p = Path(path)
132
133
  if not p.is_file():
133
134
  raise PrepareError(f"approved plan file not found: {path}")
134
- if not APPROVED_PLAN_PATTERN.search(p.read_text(encoding="utf-8", errors="replace")):
135
+ body = p.read_text(encoding="utf-8", errors="replace")
136
+ if not APPROVED_PLAN_PATTERN.search(body):
135
137
  raise PrepareError(
136
138
  f"approved plan has no recognised user-approval marker: {path}\n"
137
139
  ' canonical form (single line, top-of-report block): "- [x] Approved"\n'
@@ -140,6 +142,21 @@ def _validate_approved_plan(path: str) -> None:
140
142
  " shortcut: re-run okstra with --approve to have the CLI itself "
141
143
  "record the approval marker on this file."
142
144
  )
145
+ # The approval marker is set. Cross-check the §5 Clarification Items
146
+ # table: any row with Blocks=approval that is still open/answered
147
+ # invalidates the approval. Legacy reports (no unified §5 with a
148
+ # Blocks column) return None — soft-pass to preserve compatibility
149
+ # during the transitional release.
150
+ blockers = unresolved_approval_blockers(body)
151
+ if blockers:
152
+ lines = [
153
+ f"approved plan marker is set but §5 has {len(blockers)} unresolved "
154
+ f"`Blocks=approval` row(s); resolve them or mark them obsolete before approving:",
155
+ ]
156
+ for b in blockers:
157
+ lines.append(f" - {b.row_id} (Status={b.raw_status})")
158
+ lines.append(f" file: {path}")
159
+ raise PrepareError("\n".join(lines))
143
160
 
144
161
 
145
162
  # `- [ ] Approved` 라인을 정확히 한 번만 매치한다. 좌측 leading whitespace 와