okstra 0.25.1 → 0.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.kr.md +16 -0
  2. package/README.md +16 -0
  3. package/docs/kr/architecture.md +3 -7
  4. package/docs/kr/cli.md +47 -4
  5. package/docs/kr/performance-improvement-plan-v2.md +23 -0
  6. package/docs/kr/performance-improvement-plan.md +22 -0
  7. package/docs/superpowers/specs/2026-05-15-implementation-plan-verification-design.md +254 -0
  8. package/package.json +1 -1
  9. package/runtime/BUILD.json +2 -2
  10. package/runtime/agents/SKILL.md +30 -2
  11. package/runtime/bin/okstra.sh +1 -1
  12. package/runtime/prompts/profiles/_common-contract.md +30 -1
  13. package/runtime/prompts/profiles/error-analysis.md +12 -0
  14. package/runtime/prompts/profiles/implementation-planning.md +23 -0
  15. package/runtime/prompts/profiles/requirements-discovery.md +20 -0
  16. package/runtime/python/lib/okstra/cli.sh +8 -7
  17. package/runtime/python/lib/okstra/globals.sh +3 -1
  18. package/runtime/python/lib/okstra/usage.sh +8 -4
  19. package/runtime/python/okstra_ctl/render.py +35 -0
  20. package/runtime/python/okstra_ctl/run.py +27 -6
  21. package/runtime/python/okstra_ctl/run_context.py +1 -1
  22. package/runtime/python/okstra_ctl/wizard.py +259 -10
  23. package/runtime/python/okstra_token_usage/blocks.py +5 -1
  24. package/runtime/python/okstra_token_usage/claude.py +16 -1
  25. package/runtime/python/okstra_token_usage/collect.py +17 -3
  26. package/runtime/python/okstra_token_usage/pricing.py +159 -24
  27. package/runtime/skills/okstra-brief/SKILL.md +532 -65
  28. package/runtime/skills/okstra-context-loader/SKILL.md +25 -11
  29. package/runtime/skills/okstra-convergence/SKILL.md +235 -8
  30. package/runtime/skills/okstra-history/SKILL.md +68 -37
  31. package/runtime/skills/okstra-logs/SKILL.md +26 -4
  32. package/runtime/skills/okstra-report-finder/SKILL.md +49 -22
  33. package/runtime/skills/okstra-report-writer/SKILL.md +59 -64
  34. package/runtime/skills/okstra-run/SKILL.md +53 -39
  35. package/runtime/skills/okstra-schedule/SKILL.md +51 -20
  36. package/runtime/skills/okstra-setup/SKILL.md +31 -12
  37. package/runtime/skills/okstra-status/SKILL.md +20 -8
  38. package/runtime/skills/okstra-team-contract/SKILL.md +27 -15
  39. package/runtime/skills/okstra-time-summary/SKILL.md +53 -16
  40. package/runtime/templates/reports/final-report.template.md +34 -0
  41. package/runtime/templates/reports/settings.template.json +7 -4
  42. package/runtime/validators/lib/fixtures.sh +10 -2
  43. package/runtime/validators/lib/validate-assets.sh +50 -24
  44. package/runtime/validators/validate-brief.py +385 -0
  45. package/runtime/validators/validate-brief.sh +35 -0
  46. package/runtime/validators/validate-run.py +71 -0
  47. package/runtime/validators/validate-workflow.sh +7 -33
  48. package/src/wizard.mjs +21 -5
@@ -11,6 +11,11 @@ profile document.
11
11
  - default model assignments are resolved from centralised defaults; the fallback values are `Claude lead`/`Report writer worker`=`opus`, `Claude worker`=`sonnet`, `Codex worker`=`gpt-5.5`, `Gemini worker`=`auto`. Phase-specific overrides (e.g. `implementation`'s executor binding) live in the per-profile document.
12
12
  - every required worker listed in the per-profile `Required workers:` block must be attempted; the final verdict waits until each has either a result or an explicit terminal status (`timeout`, `error`, `not-run`).
13
13
  - unnamed generic parallel workers must not replace the required role roster, and no additional sub-agent dispatch is allowed beyond this roster.
14
+ - Worker interaction model (shared — read before inferring behaviour from the roster):
15
+ - the per-profile `Required workers:` block is a **roster**, not a behaviour contract. Each role's interaction mode changes across operating phases of the same run.
16
+ - **Phase 4 / 5 (independent analysis)**: analyser workers (`claude`, `codex`, `gemini` when opted in) produce findings independently and have no access to one another's outputs. `report-writer` does not analyse.
17
+ - **Phase 5.5 (convergence — peer review by workers)**: the lead replays each analyser's findings to the *other* analysers and collects `AGREE` / `DISAGREE` / `SUPPLEMENT` verdicts across up to `effectiveMaxRounds` rounds. Workers act as peer reviewers of each other's findings in this phase; the lead mediates but does not vote. See `skills/okstra-convergence/SKILL.md` for the round protocol, queue invariants, and final classification (`full-consensus` / `partial-consensus` / `contested` / `worker-unique`).
18
+ - Do NOT conclude "no peer review happens" from the roster alone — every profile that lists ≥2 analyser workers runs convergence by default (`convergence.enabled=true` in `task-manifest.json`).
14
19
  - Tooling — read-only MCP availability (shared):
15
20
  - the read-only MCP servers declared in the task brief's `## Available MCP Servers` section may be queried as a read-only cross-check; that section is the canonical source of which servers and tools exist for this run, and any MCP-derived finding MUST cite server, table, and the SELECT used. MCP MUST NEVER be used as a write path — schema/data mutations go through repository migration files reviewed by humans.
16
21
  - Authority & permissions assumption (HARD RULE — applies to every okstra task-type):
@@ -32,10 +37,34 @@ profile document.
32
37
  - On `아니오` / `n` / `keep` → leave the panes intact; remind the user that they will be cleaned up automatically when Claude `/exit` fires the `SessionEnd` hook.
33
38
  - The question MUST be a clean yes/no — do NOT offer "close some / keep some" partial answers, do NOT propose alternatives like "close only codex panes". The whole-set decision keeps the wrap-up predictable.
34
39
  - This step is mandatory for every phase (`requirements-discovery`, `error-analysis`, `implementation-planning`, `implementation`, `final-verification`, `release-handoff`). It is silent-skipped when `$TMUX_PANE` is unset (lead running outside tmux); the lead MUST NOT fabricate a synthetic pane list in that case.
40
+ - Brief handoff contract (shared — applies whenever the run consumes a task brief produced by `okstra-brief`):
41
+ - the brief is a **pre-discovery artifact**: it converts a domain-reporter's words (non-expert *or* developer) into expert-consumable form so this and later phases can run with zero fill-in questions to the operator. The brief is **not** authoritative on solution decisions; it is authoritative on the reporter's intent.
42
+ - **Reporter confirmation precondition (BLOCKING)**: the brief's frontmatter carries `reporter-confirmations: <complete | partial | pending | skipped>` set by `okstra-brief` Step 6.5. Every phase that consumes the brief MUST read this field before doing analysis. The handling matrix is:
43
+ - `complete` → proceed normally.
44
+ - `partial` → proceed; treat still-unmarked `intent-check:` / `conversion-block:` rows as the `skipped` branch.
45
+ - `skipped` → do NOT silently infer the missing answers. Promote each unmarked `intent-check:` / `conversion-block:` row into this run's `## 5. Clarification Items` as `Kind=decision`. Use `Blocks=approval` in `implementation-planning`, where the row gates the User Approval Request; otherwise use `Blocks=next-phase`. The recommended answer is drawn from the brief's matching content and clearly labelled `보고자 직접 확인 권장`.
46
+ - `pending` (or field missing) → ABORT analysis; write only `## 0. Reporter Confirmation Required` summarising which rows are pending. The final report carries `Blocks=approval` in `implementation-planning`, otherwise `Blocks=next-phase`. The operator must rerun `okstra-brief` Step 6.5.
47
+ `[CONFIRMED <YYYY-MM-DD> → RC-N]` markers on `Open Questions` rows are the per-row signal that the reporter has answered; their answers live verbatim under `## Reporter Confirmations` in the brief.
48
+ - `Source Material` is reporter-verbatim. Do NOT paraphrase, summarize, reorder, or restructure it. Quote it directly when needed.
49
+ - `Augmentation` entries carry one of four labels — `evidence-link`, `format-conversion`, `terminology-mapping`, `intent-inference`. Treat them as follows:
50
+ - `evidence-link` / `format-conversion` → trust without re-verification.
51
+ - `terminology-mapping` → verify against `<PROJECT_ROOT>/.project-docs/okstra/glossary.md` (authoritative); raise a `Clarification Items` row if the mapping is missing or contradicts the glossary.
52
+ - `intent-inference` → treat as an **unverified hypothesis**. Every `intent-inference` augmentation MUST be paired in the brief with an `Open Questions` row prefixed `intent-check:`. Promote that row into the run's `## 5. Clarification Items` table as `Kind=decision, Blocks=next-phase` (or `Blocks=approval` for `implementation-planning`) with the recommended answer set to "보고자에게 직접 확인 후 응답" unless the codebase can be inspected to confirm or refute the inference.
53
+ - `Open Questions` row prefixes are signals — do not strip them when promoting:
54
+ - `intent-check:` → `Kind=decision`, recommended answer = reporter confirmation. NEVER silently resolve an `intent-check:` by inference at this layer.
55
+ - `terminology:` → `Kind=decision`, recommended answer = canonical term from `<PROJECT_ROOT>/.project-docs/okstra/glossary.md` (or "extend okstra glossary via brief Step 4.5").
56
+ - `conversion-block:` → `Kind=decision`, recommended answer = "보고자에게 직접 확인". The brief is explicitly signalling that translation failed; further inference is forbidden until the reporter clarifies.
57
+ - `adr-candidate:` → handled by `implementation-planning`; carry forward without modification. Approved decision files land at `<PROJECT_ROOT>/.project-docs/okstra/decisions/<NNNN>-<slug>.md` (okstra-internal), never at external `<PROJECT_ROOT>/docs/adr/`.
58
+ - `general:` → free-form; classify per the standard `Clarification Items` rules.
59
+ - Any decision in this run that contradicts the brief's `Source Material` must be raised back to the reporter via a `Clarification Items` row; it must NOT be silently overridden. Disagreement with the reporter is allowed only after the row is resolved.
60
+ - This contract is the single authority on brief consumption. Phase-specific addenda may *tighten* these rules but may not relax them.
35
61
  - Clarification request policy (shared — applies whenever a profile uses `## 5. Clarification Items`):
62
+ - **Canonical column schema (SSOT — must match `templates/reports/final-report.template.md` §5.1 exactly):** every `## 5. Clarification Items` table has exactly these 8 columns, in this order:
63
+ `| ID | Ticket ID | Kind | Statement | Expected form | Blocks | Status | User input |`.
64
+ Profile-specific addenda may tighten cell content but MUST NOT add, remove, rename, or reorder columns. The `ID` cell uses `C-NNN` (3-digit zero-padded), the `Status` cell ∈ `{open, answered, resolved, obsolete}`, and the `Kind` / `Blocks` legal values are listed below.
36
65
  - section 5 is a **single unified table** per `final-report-template.md`. Every clarification item — whether the user must attach a file, choose between options, or supply a single number/path — is one row of that table. Do not split it into sub-sections, do not create a parallel table elsewhere in the report, and do not duplicate the same item into `## 4.5.8 User Approval Request` or any other section.
37
66
  - each row's `Kind` column picks one of `{material, decision, data-point}`: `material` for files / snapshots / logs / screenshots the user must attach (the `User input` cell will hold a path or URL); `decision` for choices and yes/no confirmations only the user can make; `data-point` for a single number, ID, date, or short string the user can answer inline. Items that mix "yes/no + file path if yes" are one row of `Kind=material` with the combined expectation written into `Expected form`.
38
- - each row's `Blocks` column picks one of `{approval, next-phase, none}`. `approval` is reserved for items that gate the `implementation-planning` User Approval Request never use `approval` outside that task-type. `next-phase` blocks the next run from starting cleanly. `none` is informational/audit-only.
67
+ - each row's `Blocks` column picks one of `{approval, next-phase, none}`. `approval` is reserved for items that gate an approval action, especially the `implementation-planning` User Approval Request; outside `implementation-planning`, unresolved brief reporter-confirmation rows use `next-phase` instead. `next-phase` blocks the next run from starting cleanly. `none` is informational/audit-only.
39
68
  - write every entry in full, descriptive sentences that a non-developer can act on without further context. Avoid abbreviations and internal jargon. The `Statement` cell must state *what* is needed, *why* the answer / attachment changes the next step, and (for `material`) *where* the user can find it and *where* to place it. The `Expected form` cell must state the shape of the answer (예/아니오, 보기 중 하나, 숫자/날짜, 파일 경로, 짧은 서술 등); supply concrete option choices when applicable.
40
69
  - the same `final-report.md` file is the canonical artifact carried into the next run; the user appends answers inline before rerunning. The preferred turn-around is `scripts/okstra.sh --resume-clarification --task-key <project-id>:<task-group>:<task-id>` (opens the latest report in `$EDITOR`, then auto-reruns the same phase with `--clarification-response` carry-in). The lower-level form `--clarification-response <path>` remains available for scripted runs.
41
70
  - if a clarification response was carried in for this run, walk every `C-*` row of the prior report's `## 5. Clarification Items` table in section 0 of this report, reconcile each one against new evidence, and update its `Status` to `resolved` or `obsolete` before issuing the next decision/verdict.
@@ -8,6 +8,15 @@
8
8
  - Optional workers (opt-in via `--workers`):
9
9
  - gemini — when added to the roster it joins the analyser set; omitted by default
10
10
  {{INCLUDE:_common-contract.md}}
11
+ - Brief consumption (phase-specific addendum — shared rules live in `_common-contract.md` under "Brief handoff contract"):
12
+ - **Precondition check (BLOCKING — runs before any analysis)**: read the brief's frontmatter `reporter-confirmations:` field and inspect every `Open Questions` row prefixed `intent-check:` / `conversion-block:` for the `[CONFIRMED …]` marker.
13
+ - `reporter-confirmations: complete` → proceed normally.
14
+ - `reporter-confirmations: partial` → proceed; treat still-unmarked `intent-check:` / `conversion-block:` rows per the `skipped` branch below.
15
+ - `reporter-confirmations: skipped` (or `partial` with remainder) → do NOT silently infer the missing answers. Promote each unmarked `intent-check:` / `conversion-block:` row into this run's `## 5. Clarification Items` as `Kind=decision, Blocks=next-phase`, with the recommended answer drawn from the brief's matching `intent-inference` / `conversion-block:` text and clearly labelled `보고자 직접 확인 권장`. Then proceed with the root-cause analysis using the inference as a *hypothesis* only.
16
+ - `reporter-confirmations: pending` (or field missing) → ABORT analysis. Write only `## 0. Reporter Confirmation Required` summarising which rows are pending and stop. The final report carries `Blocks=next-phase`.
17
+ - the reporter's symptom description in `Source Material` is the ground truth for what to reproduce. Do not paraphrase it when stating the symptom in the report; quote it.
18
+ - any `intent-inference` augmentation that re-characterises the symptom (e.g. classifying "가끔 안 됨" as "intermittent failure on a specific code path") is a **hypothesis**, not a confirmed symptom. If `[CONFIRMED …]` appears on the matching `intent-check:` row, treat the confirmation as the symptom; otherwise, follow the precondition's `skipped` branch above and keep the inference labelled as hypothesis in the root-cause analysis.
19
+ - `conversion-block:` rows mean the brief could not map a reporter statement to project vocabulary; never attempt to invent the missing mapping in this phase — the precondition above already handled them.
11
20
  - Primary focus areas:
12
21
  - symptom and trigger clarification
13
22
  - root-cause candidates
@@ -22,6 +31,9 @@
22
31
  - Clarification request policy (phase-specific addenda — shared policy is in `_common-contract.md`):
23
32
  - if any blocking uncertainty remains at the time of writing the final report, populate `## 5. Clarification Items` in `final-report-template.md` (a single unified table; `Blocks=next-phase` for items the next run cannot start without)
24
33
  - prefer plain Korean over abbreviations (e.g. write "초당 평균 요청 수" instead of "QPS", "재현 절차" instead of "repro")
34
+ - every clarification row carries a `Recommended` answer + one-line rationale; rows that lack a recommendation are rejected as half-formed.
35
+ - **Codebase-first ambiguity resolution (defect rule)**: any ambiguity about repro, file behavior, or symbol semantics that can be answered by `Read` / `Grep` / log inspection MUST be resolved that way and recorded with file:line (or log-line) evidence. Writing a clarification row for something the codebase or shipped logs already answer is a defect of this phase.
36
+ - **`evidence-checked:` cell required**: every clarification row carries an `evidence-checked: <path:line> | none` cell. `evidence-checked: <path:line>` means the codebase / log / reproducer was inspected and the row records what was found. `evidence-checked: none` is allowed ONLY when the row's nature is "only the reporter can answer this" (reporter-side data, business priority, environment they observed); the row body must state which one in one line. A row with `evidence-checked: none` that *could* have been answered by code or logs is a defect.
25
37
  - Non-goals:
26
38
  - implementation details unless they are necessary to validate the cause
27
39
  - **source code edits, builds, migrations, or deployments** — this run produces evidence and cause analysis only; the fix belongs to a later `implementation-planning` run followed by an `implementation` run
@@ -8,11 +8,21 @@
8
8
  - Optional workers (opt-in via `--workers`):
9
9
  - gemini — when added to the roster it joins the analyser set; omitted by default
10
10
  {{INCLUDE:_common-contract.md}}
11
+ - Brief consumption (phase-specific addendum — shared rules live in `_common-contract.md` under "Brief handoff contract"):
12
+ - **Precondition check (BLOCKING — runs before option drafting)**: read the brief's frontmatter `reporter-confirmations:` field and inspect every `Open Questions` row prefixed `intent-check:` / `conversion-block:` for the `[CONFIRMED …]` marker.
13
+ - `reporter-confirmations: complete` → proceed normally.
14
+ - `reporter-confirmations: partial` → proceed; treat still-unmarked `intent-check:` / `conversion-block:` rows per the `skipped` branch below.
15
+ - `reporter-confirmations: skipped` (or `partial` with remainder) → do NOT silently infer the missing answers. Promote each unmarked `intent-check:` / `conversion-block:` row into this run's `## 5. Clarification Items` as `Kind=decision, Blocks=approval`, with the recommended answer drawn from the brief's matching `intent-inference` / `conversion-block:` text and clearly labelled `보고자 직접 확인 권장`. Then proceed; the operator cannot toggle `User Approval Request` until those rows are resolved.
16
+ - `reporter-confirmations: pending` (or field missing) → ABORT planning. Write only `## 0. Reporter Confirmation Required` summarising which rows are pending and stop. The final report carries `Blocks=approval`.
17
+ - never plan around an unconfirmed `intent-inference` augmentation as if it were a settled requirement. After the precondition runs, a `[CONFIRMED …]` marker on the matching `intent-check:` row is the signal that the inference can be treated as settled; otherwise it remains a `Blocks=approval` clarification item per the precondition's `skipped` branch.
18
+ - `conversion-block:` rows are handled by the precondition; planning around an untranslated reporter phrase is forbidden until it is resolved.
11
19
  - Pre-planning context exploration (mandatory before option drafting):
12
20
  - read the task brief, related-task briefs, and any cited spec / design doc end-to-end
13
21
  - inspect the current state of every file the task names (or the closest matching files if names are stale) — record current responsibilities, public interfaces, and known coupling points
14
22
  - skim recent commits touching those files (`git log -- <path>`) to surface in-flight work or contested areas
23
+ - **codebase-first ambiguity resolution**: any ambiguity that can be answered by `Read` / `Grep` MUST be resolved that way and recorded with file:line evidence. Only ambiguities that genuinely require a human decision are escalated as `Clarification Items` rows. Writing a clarification row for something the code already answers is a defect of this phase.
15
24
  - flag any requirement that is ambiguous, contradictory, or missing success criteria — register each one as a row in the report's `## 5. Clarification Items` table with `Blocks=approval` instead of guessing
25
+ - read in priority order — (authoritative) `<PROJECT_ROOT>/.project-docs/okstra/glossary.md` and `<PROJECT_ROOT>/.project-docs/okstra/decisions/` titles if present; (supplementary) `<PROJECT_ROOT>/CONTEXT.md` (or `CONTEXT-MAP.md` → per-context `CONTEXT.md`) and `<PROJECT_ROOT>/docs/adr/` titles if present. Absent external files are the normal state — do not error. Treat the brief's `terminology:*` resolutions from `requirements-discovery` (if any) as authoritative; if missing, resolve any remaining fuzzy term as a `Blocks=approval` clarification row.
16
26
  - Primary focus areas:
17
27
  - requirement gaps
18
28
  - affected components and boundaries
@@ -38,6 +48,9 @@
38
48
  - this run stays in `implementation-planning` regardless of user phrasing — the shared anti-escalation rule applies
39
49
  - dispatching parallel sub-agents beyond the required worker roster — okstra owns worker fan-out
40
50
  - writing artifacts to `docs/superpowers/specs/` or `docs/superpowers/plans/` — the run's `reports/` directory is the canonical location
51
+ - Clarification request policy (phase-specific addenda — shared policy is in `_common-contract.md`):
52
+ - every clarification row carries a `Recommended` answer + one-line rationale; rows that lack a recommendation are rejected as half-formed.
53
+ - **`evidence-checked:` cell required**: every clarification row carries an `evidence-checked: <path:line> | none` cell. `evidence-checked: <path:line>` means the codebase was inspected and the row records what was found. `evidence-checked: none` is allowed ONLY when the row's nature is "only a human can answer this" (reporter intent, business priority, organisational decision); the row body must state which one in one line. A row with `evidence-checked: none` that *could* have been answered by the codebase is a defect of this phase, restated from the pre-planning rule above.
41
54
  - Section heading contract (BLOCKING — validator scans for these literal English substrings):
42
55
  - The final report MUST include section headings containing each of the following exact strings: `Option Candidates`, `Trade-off`, `Recommended Option`, `Stepwise Execution Order`, `Dependency`, `Validation Checklist`, `Rollback`, `User Approval Request`.
43
56
  - Korean translations are allowed in parentheses (e.g. `### Recommended Option (권장 옵션)`), but the English keyword must be present verbatim in the heading line.
@@ -57,7 +70,16 @@
57
70
  - validation checklist (pre / mid / post) — each item is an exact command or observable outcome
58
71
  - rollback strategy — exact revert path (commits, flags, migrations) and the signal that triggers rollback
59
72
  - explicit `User Approval Request (사용자 승인 게이트)` block placed at the **top of the report** with a single canonical checkbox marker `- [ ] Approved` (user toggles to `- [x] Approved` to authorise the next `implementation` run). Section `4.5.8` is retained only as a back-pointer to this top block for validator/key-substring compatibility — it must NOT carry an independent marker.
73
+ - **the marker line is rendered only when the plan-body verification gate (§4.5.9) returns `passed` or `passed-with-dissent`.** When the gate returns `blocked-by-disagreement` or `aborted-non-result`, the top-of-report Approval block is rendered **without** the canonical `- [ ] Approved` bullet (the rest of the block — title, summary, audit lines — stays). The `validators/validate-run.py` `validate_phase_boundary` function enforces this exact correspondence between gate result and marker line presence.
60
74
  - every ambiguity flagged during pre-planning that the user must resolve before approval registered as a `Blocks=approval` row in the `## 5. Clarification Items` table (do NOT create a separate `Open Questions` block under `4.5.x` — the unified table is the single home)
75
+ - **§4.5.9 Plan Body Verification (BLOCKING).** After report-writer finishes the draft, the lead MUST run a worker peer-review round on the consolidated plan body (sections 4.5.1 – 4.5.7) and populate `### 4.5.9 Plan Body Verification` in the final report. The round protocol, plan-item ID scheme (`P-Opt-*` / `P-Step-*` / `P-Dep-*` / `P-Val-*` / `P-Rb-*`), verdict semantics, gate-result classification, and dissent log format are defined in `skills/okstra-convergence/SKILL.md` "Plan-body verification mode". The four gate-result values are `passed`, `passed-with-dissent`, `blocked-by-disagreement`, `aborted-non-result`. When the gate would have been `blocked-by-disagreement` or `aborted-non-result`, the lead MUST NOT silently flip it to one of the passing values to "unblock" the run — that is a contract violation.
76
+ - **ADR evaluation (grill-with-docs adopted, sole owner)**: this phase is the **single owner** of ADR evaluation in the okstra lifecycle. The brief never evaluates or drafts ADRs — it only forwards `adr-candidate:*` signals. Every `adr-candidate:*` entry inherited from the brief's `Open Questions` is a mandatory evaluation target. In addition, evaluate every decision the recommended option introduces against the three ADR criteria:
77
+ 1. **Hard to reverse** — would changing the decision later cost meaningfully more than deciding now?
78
+ 2. **Surprising without context** — would a future reader, seeing only the code, wonder "why was it built this way?"?
79
+ 3. **Real trade-off** — were there named alternatives, and was one picked for specific reasons?
80
+ If **all three** hold, attach a decision draft as a report appendix section titled `Decision Drafts` (one decision per subsection). Each draft uses the `## Context / ## Decision / ## Consequences / ## Alternatives Considered` shape, names the alternatives that were rejected and why, and starts with `## Status: Proposed`. The next decision number is `(max existing in <PROJECT_ROOT>/.project-docs/okstra/decisions/ + 1)` zero-padded to 4 digits. If any of the three criteria is missing, do NOT raise a decision draft — instead record `skipped adr-candidate: <topic> — reason: <criterion that failed>` on one line under `Decision Drafts` so the next reader knows the candidate was evaluated and intentionally dropped.
81
+ The drafts are NOT written by this phase. The approved plan's stepwise execution order MUST include the step `Create <PROJECT_ROOT>/.project-docs/okstra/decisions/<NNNN>-<slug>.md from the decision draft in section X` so the `implementation` run commits the file. External `<PROJECT_ROOT>/docs/adr/` is never touched.
82
+ - **Domain-doc proposals**: if `CONTEXT.md` / `CONTEXT-MAP.md` needs a new term or edited definition, add the step `Update CONTEXT.md: <term> = <definition>` to the stepwise execution order. Do NOT edit the file in this phase.
61
83
  - No-placeholder rule (plan failures — reject any option or step that contains these):
62
84
  - "TBD", "TODO", "implement later", "fill in details", "add appropriate error handling", "handle edge cases", "write tests for the above" without actual test code
63
85
  - "similar to Option/Task N" without repeating the concrete content (readers may consume sections out of order)
@@ -69,3 +91,4 @@
69
91
  3. **Internal consistency** — option file lists, trade-off matrix, and recommended step list must agree on file paths, names, and signatures. A symbol called `clearLayers()` in the matrix and `clearFullLayers()` in the steps is a bug.
70
92
  4. **Ambiguity check** — any requirement that could be read two ways must be made explicit or moved to the `## 5. Clarification Items` table as a `Blocks=approval` row.
71
93
  5. **Scope check** — if the recommended plan now spans multiple independent subsystems, recommend splitting into separate planning runs rather than shipping an oversized plan.
94
+ 6. **Plan-body verification reconciliation (BLOCKING for implementation-planning).** Inspect the `### 4.5.9 Plan Body Verification` verdict table. For every plan-item row classified as `majority-disagree → C-<N>`, the corresponding `C-<N>` row MUST exist in `## 5. Clarification Items` with `Kind` chosen per the standard policy and `Blocks=approval`. Do NOT create a parallel `### 4.5.x Open Questions` block — the unified table is the single home. Conversely, the `Classification` column's `C-<N>` reference and the `## 5. Clarification Items` `ID` column MUST match 1:1; an orphan on either side is a contract violation. For `partial-consensus` and `worker-unique` plan-items, the dissenting opinion lives in §4.5.9 `Dissent log` and is NOT promoted to §5.
@@ -8,19 +8,39 @@
8
8
  - Optional workers (opt-in via `--workers`):
9
9
  - gemini — when added to the roster it joins the analyser set; omitted by default
10
10
  {{INCLUDE:_common-contract.md}}
11
+ - Brief consumption (phase-specific addendum — shared rules live in `_common-contract.md` under "Brief handoff contract"):
12
+ - **Precondition check (BLOCKING — runs before any analysis)**: read the brief's frontmatter `reporter-confirmations:` field and inspect every `Open Questions` row prefixed `intent-check:` / `conversion-block:` for the `[CONFIRMED …]` marker.
13
+ - `reporter-confirmations: complete` → proceed normally (no unresolved reporter-only rows).
14
+ - `reporter-confirmations: partial` → proceed; treat the still-unmarked `intent-check:` / `conversion-block:` rows per the `skipped` branch below.
15
+ - `reporter-confirmations: skipped` (or `partial` with remainder) → do NOT silently infer the missing answers. Promote each unmarked `intent-check:` / `conversion-block:` row into this run's `## 5. Clarification Items` as `Kind=decision, Blocks=next-phase`, with the recommended answer drawn from the brief's matching `intent-inference` / `conversion-block:` text and clearly labelled `보고자 직접 확인 권장`. Then proceed with the rest of the classification work.
16
+ - `reporter-confirmations: pending` (or field missing) → ABORT analysis. Write only `## 0. Reporter Confirmation Required` summarising which rows are pending and stop. The operator must rerun `okstra-brief` Step 6.5 to collect answers, then restart this phase. The final report carries `Blocks=next-phase`.
17
+ - before classifying (after the precondition passes), scan the brief for every `Open Questions` row prefixed `intent-check:` / `terminology:` / `conversion-block:` and every `Augmentation` entry labelled `intent-inference` / `terminology-mapping`. Each one is a translation signal that this phase must resolve OR carry forward.
18
+ - `intent-inference` augmentations whose paired `intent-check:` row carries `[CONFIRMED …]` are treated as **confirmed**; trust the confirmation text in `## Reporter Confirmations` over the original inference if they differ. Unconfirmed `intent-inference` rows under `reporter-confirmations: skipped` follow the precondition's `skipped` branch above.
19
+ - `conversion-block:` rows are explicit "translation failed" signals — never attempt to resolve them by inference here; the precondition above already handled them.
11
20
  - Primary focus areas:
12
21
  - classify the work as bugfix, feature, improvement, refactor, or ops-change
13
22
  - determine whether `error-analysis` or `implementation-planning` is the next safe step; direct `implementation` handoff is not a valid routing target because implementation requires an approved `implementation-planning` report
14
23
  - identify missing materials that block reliable routing
15
24
  - define task continuity expectations for long-running work under the same task key
16
25
  - capture approval or confirmation points before the next phase starts
26
+ - **domain alignment check**: read in priority order — (authoritative) `<PROJECT_ROOT>/.project-docs/okstra/glossary.md` and `<PROJECT_ROOT>/.project-docs/okstra/decisions/` titles if present; (supplementary) `<PROJECT_ROOT>/CONTEXT.md` (or `CONTEXT-MAP.md` → per-context `CONTEXT.md`) and `<PROJECT_ROOT>/docs/adr/` titles if present. Absent external files are normal — do not error. Validate that every `terminology:*` entry under the brief's `Open Questions` has a canonical resolution before routing. Fuzzy or overloaded terms in the brief MUST be resolved to a single canonical term in this phase.
27
+ - Decision-tree walk (grill-me adopted, bounded):
28
+ - When the brief's `Desired Outcome`, classification, or routing target depends on a chain of decisions, walk that chain one branch at a time. Each branch is one `Clarification Items` row, not a free-form interview.
29
+ - For every clarification row, write the row's `Recommended` cell with the single best answer plus a one-line rationale. Other options are listed in `Alternatives` with one-sentence consequences.
30
+ - **Codebase-first rule**: if a branch can be resolved by `Read` / `Grep` / file inspection, resolve it that way and record the evidence in the same row's `Evidence` cell. Do NOT escalate to the user.
31
+ - Budget: the unified `## 5. Clarification Items` table caps at the smaller of (a) one row per unresolved decision branch, (b) 8 rows total. Beyond the cap, fold remaining ambiguity into the routing recommendation's risk notes.
17
32
  - Expected output emphasis:
18
33
  - evidence-backed routing decision
19
34
  - uncertainty boundaries and missing inputs
20
35
  - next recommended phase and safe resume guidance
36
+ - canonical-term resolution for every `terminology:*` brief item, written as a one-line `<term> = <definition>` line in a new `Domain Alignment` subsection of the final report; alongside each, propose whether `<PROJECT_ROOT>/.project-docs/okstra/glossary.md` should be updated (proposal only — actual writes happen via `okstra-brief` Step 4.5 on a subsequent run)
21
37
  - Clarification request policy (phase-specific addenda — shared policy is in `_common-contract.md`):
22
38
  - if any blocking input is missing at the time of writing the final report, populate `## 5. Clarification Items` in `final-report-template.md` (a single unified table; `Blocks=next-phase` for items the next run cannot start without)
23
39
  - prefer concrete questions whose answers map directly to a routing decision (`bugfix` vs `feature`, `error-analysis` vs `implementation-planning`, etc.). State each option in plain language with one sentence describing what choosing it would mean for the next phase.
40
+ - every clarification row carries a `Recommended` answer + one-line rationale; rows that lack a recommendation are rejected as half-formed.
41
+ - **Codebase-first ambiguity resolution (defect rule)**: any ambiguity that can be answered by `Read` / `Grep` / file inspection MUST be resolved that way and recorded with file:line evidence. Writing a clarification row for something the codebase already answers is a defect of this phase.
42
+ - **`evidence-checked:` cell required**: every clarification row carries an `evidence-checked: <path:line> | none` cell. `evidence-checked: <path:line>` means the codebase was inspected and the row records what was found (or that the code did not contain the answer). `evidence-checked: none` is allowed ONLY when the row's nature is "only a human can answer this" (reporter intent, business priority, external authority); the row body must state which one in one line. A row with `evidence-checked: none` that *could* have been answered by the codebase is a defect.
24
43
  - Non-goals:
25
44
  - full implementation design unless it is required to decide the next phase
26
45
  - **source code edits, plan authoring, builds, or deployments** — this run only classifies the work and routes it; deeper analysis and planning belong to subsequent phases
46
+ - **edits to any path outside `<PROJECT_ROOT>/.project-docs/okstra/`** — okstra never writes to external paths. Glossary additions land in `<PROJECT_ROOT>/.project-docs/okstra/glossary.md` (via `okstra-brief` Step 4.5); decision drafts land in `<PROJECT_ROOT>/.project-docs/okstra/decisions/` (via `implementation-planning`). External `<PROJECT_ROOT>/CONTEXT.md` / `CONTEXT-MAP.md` / `docs/adr/` are read-only references.
@@ -102,12 +102,6 @@ while [[ $# -gt 0 ]]; do
102
102
  ASSUME_YES="true"
103
103
  shift
104
104
  ;;
105
- --refresh-assets)
106
- printf 'warning: --refresh-assets is deprecated. okstra now installs into ~/.claude and ~/.okstra via okstra-install.sh.\n' >&2
107
- printf ' re-run "%s/scripts/okstra-install.sh --refresh" to refresh installed assets.\n' "$WORKSPACE_ROOT" >&2
108
- REFRESH_OKSTRA_ASSETS="true"
109
- shift
110
- ;;
111
105
  --workers)
112
106
  WORKERS_OVERRIDE="$(require_option_value --workers "${2-}")"
113
107
  shift 2
@@ -195,6 +189,13 @@ while [[ $# -gt 0 ]]; do
195
189
  APPROVE_PLAN_ACK="true"
196
190
  shift
197
191
  ;;
192
+ --no-plan-verification)
193
+ # implementation-planning 의 Phase 6 plan-body verification 라운드를
194
+ # 끈다. 기본값은 활성화. 비활성 시 final-report 상단의 User Approval
195
+ # 체크박스는 무조건 렌더된다 (legacy 동작). 빠른 반복용 opt-out.
196
+ PLAN_VERIFICATION_ENABLED="false"
197
+ shift
198
+ ;;
198
199
  -h|--help)
199
200
  usage
200
201
  exit 0
@@ -224,7 +225,7 @@ while [[ $# -gt 0 ]]; do
224
225
  printf ' hint: did you mean --task-id?\n' >&2
225
226
  ;;
226
227
  esac
227
- printf ' valid options: --render-only --resume-clarification --yes --refresh-assets --workers --lead-model --claude-model --codex-model --gemini-model --report-writer-model --related-tasks --task-type --project-id --project-root --task-group --task-id --task-brief --directive --clarification-response --approved-plan --approve -h|--help\n' >&2
228
+ printf ' valid options: --render-only --resume-clarification --yes --workers --lead-model --claude-model --codex-model --gemini-model --report-writer-model --related-tasks --task-type --project-id --project-root --task-group --task-id --task-brief --directive --clarification-response --approved-plan --approve --no-plan-verification -h|--help\n' >&2
228
229
  usage
229
230
  exit 1
230
231
  ;;
@@ -17,7 +17,6 @@ OKSTRA_TASK_CATALOG_RELATIVE_PATH=""
17
17
  RENDER_ONLY="false"
18
18
  ASSUME_YES="false"
19
19
  RESUME_CLARIFICATION_MODE="false"
20
- REFRESH_OKSTRA_ASSETS="false"
21
20
  WORKERS_OVERRIDE=""
22
21
  LEAD_MODEL_OVERRIDE=""
23
22
  CLAUDE_MODEL_OVERRIDE=""
@@ -40,6 +39,9 @@ DIRECTIVE=""
40
39
  CLARIFICATION_RESPONSE_PATH=""
41
40
  APPROVED_PLAN_PATH=""
42
41
  APPROVE_PLAN_ACK="false"
42
+ # Phase 6 plan-body verification toggle. Default "true" (round runs).
43
+ # Flipped to "false" by --no-plan-verification on the CLI.
44
+ PLAN_VERIFICATION_ENABLED="true"
43
45
  CLARIFICATION_RESPONSE_FILE=""
44
46
  CLARIFICATION_RESPONSE_RELATIVE_PATH=""
45
47
  PROJECT_ROOT=""
@@ -3,7 +3,7 @@
3
3
  usage() {
4
4
  cat >&2 <<USAGE_EOF
5
5
  usage:
6
- $DISPLAY_COMMAND_NAME [--render-only] [--yes] [--refresh-assets] --task-type <task-type> [--workers worker1,worker2] [--lead-model <model>] [--claude-model <model>] [--codex-model <model>] [--gemini-model <model>] [--report-writer-model <model>] [--executor claude|codex|gemini] [--related-tasks taskA,taskB] --project-id <project-id> [--project-root <path>] --task-group <task-group> --task-id <task-id> --task-brief <brief-path> [--directive <directive>]
6
+ $DISPLAY_COMMAND_NAME [--render-only] [--yes] [--no-plan-verification] --task-type <task-type> [--workers worker1,worker2] [--lead-model <model>] [--claude-model <model>] [--codex-model <model>] [--gemini-model <model>] [--report-writer-model <model>] [--executor claude|codex|gemini] [--related-tasks taskA,taskB] --project-id <project-id> [--project-root <path>] --task-group <task-group> --task-id <task-id> --task-brief <brief-path> [--directive <directive>]
7
7
 
8
8
  summary:
9
9
  $DISPLAY_TOOL_NAME prepares a task-keyed instruction bundle for Claude Code and launches an interactive Claude session by default.
@@ -45,6 +45,13 @@ optional arguments:
45
45
  \`- [ ] Approved\` to \`- [x] Approved\` and appends an approval audit line
46
46
  (timestamp + "CLI --approve"). Use this for scripted/CI flows or when you want a
47
47
  single command to both approve and launch the next phase.
48
+ --no-plan-verification
49
+ Disable the Phase 6 plan-body verification round that runs after the report-writer
50
+ authors the implementation-planning draft. Default: enabled. Only meaningful with
51
+ --task-type=implementation-planning; ignored for other task types. When disabled the
52
+ top-of-report \`User Approval Request\` checkbox renders unconditionally (legacy
53
+ behaviour). Use this for fast iteration; the default is recommended for handoff-ready
54
+ plans.
48
55
  --task-key <project-id:task-group:task-id>
49
56
  Shorthand for --project-id/--task-group/--task-id. When the matching task-manifest.json
50
57
  exists, brief-path and task-type are auto-filled from it (taskBriefPath and
@@ -62,9 +69,6 @@ options:
62
69
  (--project-id/--task-group/--task-id or --task-key). Mutually
63
70
  exclusive with --clarification-response and --approved-plan.
64
71
  --yes Skip interactive prompting and confirmation. Requires all required arguments.
65
- --refresh-assets Deprecated. okstra now installs skills/agents into ~/.claude and the codex
66
- wrapper into ~/.okstra/bin via scripts/okstra-install.sh. Re-run that
67
- installer with --refresh to update installed assets.
68
72
  --workers Comma-separated worker list for this run. Default: claude,codex,report-writer
69
73
  (Gemini worker is optional; add `gemini` explicitly, e.g. --workers claude,codex,gemini,report-writer)
70
74
  --lead-model Model for Claude lead. Default: OKSTRA_DEFAULT_LEAD_MODEL or opus
@@ -338,6 +338,9 @@ def render_task_catalog_discovery(output_path: str, ctx: dict) -> None:
338
338
  "taskType": s(manifest, "taskType"),
339
339
  "workCategory": s(manifest, "workCategory"),
340
340
  "currentStatus": s(manifest, "currentStatus"),
341
+ "workStatus": s(manifest, "workStatus"),
342
+ "workStatusUpdatedAt": s(manifest, "workStatusUpdatedAt"),
343
+ "workStatusNote": s(manifest, "workStatusNote"),
341
344
  "updatedAt": s(manifest, "updatedAt"),
342
345
  "currentPhase": (workflow or {}).get("currentPhase", "") if isinstance(workflow, dict) else "",
343
346
  "currentPhaseState": (workflow or {}).get("currentPhaseState", "") if isinstance(workflow, dict) else "",
@@ -586,6 +589,7 @@ def render_task_manifest(manifest_path: str, ctx: dict) -> None:
586
589
  latest_report_relative = current_report_relative or existing.get("latestReportPath", "")
587
590
  latest_team_state_relative = ctx.get("TEAM_STATE_RELATIVE_PATH", "")
588
591
  latest_resume_command_relative = ctx.get("CLAUDE_RESUME_COMMAND_RELATIVE_PATH", "") or existing.get("latestResumeCommandPath", "")
592
+ convergence_block = _build_convergence_block(ctx)
589
593
  payload = {
590
594
  "schemaVersion": "1.0",
591
595
  "projectId": ctx.get("PROJECT_ID", ""),
@@ -695,12 +699,43 @@ def render_task_manifest(manifest_path: str, ctx: dict) -> None:
695
699
  "sessionId": ctx.get("CLAUDE_SESSION_ID", ""),
696
700
  "resumeCommandPath": ctx.get("CLAUDE_RESUME_COMMAND_RELATIVE_PATH", ""),
697
701
  },
702
+ "convergence": convergence_block,
698
703
  "createdAt": existing.get("createdAt") or ctx.get("RUN_TIMESTAMP_ISO", ""),
699
704
  "updatedAt": ctx.get("RUN_TIMESTAMP_ISO", ""),
700
705
  }
701
706
  _write_json(path, payload)
702
707
 
703
708
 
709
+ def _build_convergence_block(ctx: dict) -> dict:
710
+ """Resolve the `convergence` sub-tree written into task-manifest.json.
711
+
712
+ Defaults follow `skills/okstra-convergence/SKILL.md`:
713
+ - `enabled` default True
714
+ - `maxRounds` default 1 for `requirements-discovery`, 2 otherwise
715
+ - `verificationMode` default "lightweight"
716
+ - `planBodyVerification` is implementation-planning specific; the key is
717
+ always emitted (dead-letter on other phases) so the schema stays stable.
718
+
719
+ ctx knobs honoured:
720
+ - `OKSTRA_PLAN_VERIFICATION`: "true" | "false" | "" (empty → default True).
721
+ Wired from CLI `--no-plan-verification` (sets "false").
722
+ """
723
+ task_type = ctx.get("ANALYSIS_TYPE", "")
724
+ default_max_rounds = 1 if task_type == "requirements-discovery" else 2
725
+ raw_plan_verify = (ctx.get("OKSTRA_PLAN_VERIFICATION", "") or "").strip().lower()
726
+ plan_verify_enabled = raw_plan_verify != "false"
727
+ return {
728
+ "enabled": True,
729
+ "maxRounds": default_max_rounds,
730
+ "verificationMode": "lightweight",
731
+ "planBodyVerification": {
732
+ "enabled": plan_verify_enabled,
733
+ "maxRounds": 1,
734
+ "gating": True,
735
+ },
736
+ }
737
+
738
+
704
739
  def render_run_manifest(run_manifest_path: str, ctx: dict) -> None:
705
740
  task_manifest_path = Path(ctx.get("TASK_MANIFEST_FILE", ""))
706
741
  task_manifest = {}
@@ -113,8 +113,12 @@ class PrepareInputs:
113
113
  # project.json → global config → 스킬 디폴트 순으로 해석된다.
114
114
  pr_template_path: str = ""
115
115
  render_only: bool = False
116
- refresh_assets: bool = False
117
116
  approve_plan_ack: bool = False
117
+ # Phase 6 plan-body verification opt-out. Default True (round runs after
118
+ # report-writer draft). Flipped to False by CLI `--no-plan-verification`.
119
+ # Only meaningful for `--task-type implementation-planning`; the manifest
120
+ # records the value for other phases too to keep the schema stable.
121
+ plan_verification_enabled: bool = True
118
122
 
119
123
 
120
124
  @dataclass
@@ -380,8 +384,8 @@ def _canonical_argv(inp: PrepareInputs, ctx: dict) -> list[str]:
380
384
  argv.extend([flag, val])
381
385
  if inp.render_only:
382
386
  argv.append("--render-only")
383
- if inp.refresh_assets:
384
- argv.append("--refresh-assets")
387
+ if not inp.plan_verification_enabled:
388
+ argv.append("--no-plan-verification")
385
389
  argv.append("--yes")
386
390
  return argv
387
391
 
@@ -632,6 +636,13 @@ def prepare_task_bundle(inp: PrepareInputs) -> PrepareOutputs:
632
636
  "EXECUTOR_WORKTREE_BASE_REF": worktree.base_ref,
633
637
  "EXECUTOR_WORKTREE_STATUS": worktree.status,
634
638
  "EXECUTOR_WORKTREE_NOTE": worktree.note,
639
+ # Phase 6 plan-body verification toggle, read by
640
+ # `render._build_convergence_block` when emitting the manifest's
641
+ # `convergence.planBodyVerification.enabled` field. Default ("")
642
+ # is treated as enabled.
643
+ "OKSTRA_PLAN_VERIFICATION": (
644
+ "false" if not inp.plan_verification_enabled else ""
645
+ ),
635
646
  })
636
647
 
637
648
  if inp.render_only:
@@ -792,7 +803,6 @@ def prepare_task_bundle(inp: PrepareInputs) -> PrepareOutputs:
792
803
  "approvedPlanPath": inp.approved_plan_path,
793
804
  "clarificationResponsePath": inp.clarification_response_path,
794
805
  "renderOnly": inp.render_only,
795
- "refreshAssets": inp.refresh_assets,
796
806
  },
797
807
  )
798
808
 
@@ -909,7 +919,18 @@ def main(argv: list[str]) -> int:
909
919
  ),
910
920
  )
911
921
  p.add_argument("--render-only", action="store_true", dest="render_only")
912
- p.add_argument("--refresh-assets", action="store_true", dest="refresh_assets")
922
+ p.add_argument(
923
+ "--no-plan-verification",
924
+ action="store_false",
925
+ dest="plan_verification_enabled",
926
+ default=True,
927
+ help=(
928
+ "Disable the Phase 6 plan-body verification round for "
929
+ "`--task-type implementation-planning`. Default: enabled. "
930
+ "When disabled, the top-of-report `User Approval Request` "
931
+ "marker line is rendered unconditionally (legacy behaviour)."
932
+ ),
933
+ )
913
934
  p.add_argument(
914
935
  "--work-category",
915
936
  default="",
@@ -974,8 +995,8 @@ def main(argv: list[str]) -> int:
974
995
  clarification_response_path=clarification_abs,
975
996
  pr_template_path=args.pr_template_path,
976
997
  render_only=args.render_only,
977
- refresh_assets=args.refresh_assets,
978
998
  approve_plan_ack=args.approve_plan_ack,
999
+ plan_verification_enabled=args.plan_verification_enabled,
979
1000
  )
980
1001
  try:
981
1002
  out = prepare_task_bundle(inputs)
@@ -140,7 +140,7 @@ def write_run_inputs(
140
140
  inputs schema (모든 키 optional):
141
141
  taskBriefPath, directive, workers, leadModel, claudeModel, codexModel,
142
142
  geminiModel, reportWriterModel, relatedTasks, approvedPlanPath,
143
- clarificationResponsePath, renderOnly, refreshAssets
143
+ clarificationResponsePath, renderOnly
144
144
  """
145
145
  run_manifests_dir = Path(run_manifests_dir)
146
146
  path = run_manifests_dir / _run_inputs_filename(task_type_segment, seq)