okstra 0.51.0 → 0.53.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/README.kr.md +1 -1
  2. package/README.md +1 -1
  3. package/docs/kr/architecture.md +1 -0
  4. package/docs/kr/cli.md +2 -1
  5. package/docs/superpowers/plans/2026-06-06-final-verification-whole-task-gate.md +993 -0
  6. package/docs/superpowers/plans/2026-06-06-stage-parallel-and-pending-fixes.md +93 -0
  7. package/docs/superpowers/plans/2026-06-06-stage-worktree-isolation-p1.md +447 -0
  8. package/docs/superpowers/plans/2026-06-06-stage-worktree-isolation-p2.md +289 -0
  9. package/docs/superpowers/plans/2026-06-06-stage-worktree-isolation-p3.md +774 -0
  10. package/docs/superpowers/plans/2026-06-06-stage-worktree-isolation-p4.md +303 -0
  11. package/docs/superpowers/plans/2026-06-06-stage-worktree-isolation-p5-multidep-base.md +387 -0
  12. package/docs/superpowers/specs/2026-06-06-final-verification-whole-task-gate-design.md +126 -0
  13. package/docs/superpowers/specs/2026-06-06-stage-worktree-isolation-design.md +180 -0
  14. package/docs/superpowers/specs/2026-06-06-vertical-slice-tdd-planning-design.md +179 -0
  15. package/package.json +1 -1
  16. package/runtime/BUILD.json +2 -2
  17. package/runtime/agents/workers/report-writer-worker.md +1 -0
  18. package/runtime/bin/lib/okstra/cli.sh +5 -1
  19. package/runtime/bin/okstra.sh +1 -0
  20. package/runtime/prompts/launch.template.md +1 -0
  21. package/runtime/prompts/profiles/_implementation-deliverable.md +1 -1
  22. package/runtime/prompts/profiles/_implementation-executor.md +16 -9
  23. package/runtime/prompts/profiles/_implementation-verifier.md +4 -1
  24. package/runtime/prompts/profiles/final-verification.md +7 -7
  25. package/runtime/prompts/profiles/implementation-planning.md +14 -7
  26. package/runtime/prompts/wizard/prompts.ko.json +3 -2
  27. package/runtime/python/okstra_ctl/analysis_packet.py +14 -2
  28. package/runtime/python/okstra_ctl/render.py +3 -0
  29. package/runtime/python/okstra_ctl/run.py +541 -41
  30. package/runtime/python/okstra_ctl/wizard.py +25 -7
  31. package/runtime/python/okstra_ctl/worktree.py +126 -9
  32. package/runtime/python/okstra_ctl/worktree_registry.py +88 -17
  33. package/runtime/schemas/final-report-v1.0.schema.json +36 -0
  34. package/runtime/skills/okstra-convergence/SKILL.md +14 -3
  35. package/runtime/skills/okstra-memory/SKILL.md +28 -5
  36. package/runtime/skills/okstra-run/SKILL.md +1 -1
  37. package/runtime/templates/reports/final-report.template.md +12 -0
  38. package/runtime/templates/reports/final-verification-input.template.md +8 -5
  39. package/runtime/templates/reports/i18n/en.json +3 -1
  40. package/runtime/templates/reports/i18n/ko.json +3 -1
  41. package/runtime/validators/validate-implementation-plan-stages.py +57 -11
  42. package/runtime/validators/validate-run.py +143 -1
  43. package/runtime/validators/validate-workflow.sh +6 -1
  44. package/src/memory.mjs +50 -11
@@ -31,6 +31,7 @@
31
31
  - **Files that change together live together**: split by responsibility, not by technical layer. Penalize options that scatter one logical change across unrelated layers.
32
32
  - **Follow established patterns**: in existing codebases, conform to current conventions. Targeted cleanup of a file you are already modifying is acceptable; unrelated refactors are not.
33
33
  - **YAGNI ruthlessly**: drop features, abstractions, and configuration knobs that do not serve the stated requirement.
34
+ - **Project review-rule preflight**: before choosing the recommended option, look for project-local review rule packs such as `<PROJECT_ROOT>/skills/*review*`, `<PROJECT_ROOT>/.claude/skills/*review*`, and up to two parent directories' `skills/*review*/SKILL.md`. If present, read the relevant `SKILL.md` plus referenced `references/*.md` files and treat their rules as planning constraints. Do not run the PR-review workflow here; extract only the rules. For Fonts Ninja-style TS/NestJS review packs, this means planning away known review findings before code exists: shared transforms instead of duplicate helper stacks, behavioral tests instead of collaborator-tautology assertions, domain rules in domain modules rather than repositories/adapters, domain objects under `domain/`, plain-English functions, truthful/specific names, and no dead APIs introduced by the plan.
34
35
  - Expected output emphasis:
35
36
  - feasible plan options
36
37
  - dependency and risk visibility
@@ -52,10 +53,10 @@
52
53
  - every clarification row carries a recommended answer + one-line rationale inside the `Expected form` cell; rows that lack a recommendation are rejected as half-formed.
53
54
  - **Evidence note required inside `Statement`**: every clarification row includes `Evidence checked: <path:line>` or `Evidence checked: none — <human-only reason>` in the `Statement` cell. `none` is allowed ONLY when the row's nature is "only a human can answer this" (reporter intent, business priority, organisational decision). A row with `none` that *could* have been answered by the codebase is a defect of this phase, restated from the pre-planning rule above.
54
55
  - Section heading contract (BLOCKING — validator scans for these literal English substrings):
55
- - The final report MUST include section headings containing each of the following exact strings: `Option Candidates`, `Trade-off`, `Recommended Option`, `Stage Map`, `Stage Exit Contract`, `Stage Validation`, `Dependency`, `Validation Checklist`, `Rollback`. (Approval is no longer a body section — it is the YAML frontmatter `approved` field.)
56
+ - The final report MUST include section headings containing each of the following exact strings: `Option Candidates`, `Trade-off`, `Recommended Option`, `Stage Map`, `Stage Exit Contract`, `Stage Validation`, `Dependency`, `Validation Checklist`, `Rollback`, `Requirement Coverage`. (Approval is no longer a body section — it is the YAML frontmatter `approved` field.)
56
57
  - Korean translations are allowed in parentheses (e.g. `### Recommended Option (권장 옵션)`), but the English keyword must be present verbatim in the heading line.
57
58
  - The shape and ordering follow `final-report-template.md` section 4.5 (`Implementation Plan Deliverables`). Do NOT translate the heading keywords — `validators/validate-run.py` does substring matching on the raw report text and 7-of-8 missing strings is a real, repeatedly observed failure mode (root cause: writer translated the headings to Korean).
58
- - Beyond substring matching, when the Plan Body Verification gate result is `passed` / `passed-with-dissent`, `validators/validate-run.py` runs the **structural** Stage Map validator (`validators/validate-implementation-plan-stages.py`) at the planning boundary — the exact `## 5.5 Stage Map` heading, each `## 5.5.<i> Stage <i>:` section with its four required subsections, the per-stage effective step count (≤6), and the `depends-on` DAG are all enforced here, not deferred to the `implementation` entry gate.
59
+ - Beyond substring matching, when the Plan Body Verification gate result is `passed` / `passed-with-dissent`, `validators/validate-run.py` runs the **structural** Stage Map validator (`validators/validate-implementation-plan-stages.py`) at the planning boundary — the exact `## 5.5 Stage Map` heading, each `## 5.5.<i> Stage <i>:` section with its four required subsections, the per-stage effective step count (≤6), the `depends-on` DAG, and the per-stage vertical-slice contract (S10) are all enforced here, not deferred to the `implementation` entry gate. S10 scans for the literal in-section strings `Slice value:`, `Acceptance:`, and the Stepwise `action`-cell prefixes `RED:` / `GREEN:` (or a `TDD exemption:` line) — keep these tokens verbatim for the same reason as the heading keywords above.
59
60
  - Required deliverable shape (final report, in addition to the standard sections):
60
61
  - at least two implementation options. **Each option must include**:
61
62
  - **File Structure**: an explicit list of files to create / modify / delete with each file's responsibility (one-line each). Use the form `Create: path — responsibility` / `Modify: path:line-range — change summary` / `Delete: path — reason`.
@@ -64,17 +65,22 @@
64
65
  - trade-off matrix across options (rows = options, columns at minimum: complexity, risk, reversibility, test coverage cost, rollout cost)
65
66
  - recommended option with rationale tied to the design principles above
66
67
  - **Stage Map (mandatory — always emitted, even when N=1):** a table of all stages with `stage | title | depends-on | step-count | exit-contract-summary`. `depends-on` is `(none)` or a comma-separated stage number list. Stages with `depends-on (none)` can be implemented in parallel by two simultaneous `implementation` runs.
68
+ - **Per-stage slice declaration (mandatory two lines, directly under the `## 5.5.<i> Stage <i>:` heading, before `### Carry-In`):**
69
+ - `Slice value: <the one user-observable increment this stage delivers, end-to-end>` — describe WHAT starts working from the consumer's view (e.g. "X 를 조회하면 Y 가 반환된다"), NOT a layer name ("repository 추가"). Validator S10a rejects a missing/empty value.
70
+ - `Acceptance: <the observable pass condition or the exact command>` — the signal that proves the slice is done; normally the same test command that the `RED:` step below flips to PASS. Validator S10b rejects a missing/empty value.
67
71
  - **Per-stage subsections** (`## 5.5.<i> Stage <i>: <title>` for each `i`), each containing the four required subsections:
68
72
  - `### Carry-In` — for `depends-on (none)`: task-brief only. Otherwise: each depended-on stage's static exit contract + runtime sidecar path `runs/<impl-key>/carry/stage-<i>.json` placeholder.
69
- - `### Stepwise Execution Order` — bite-sized table with `step | action | files | command | expected`. **Effective row count ≤ 6** (excluding header / divider / blank). Each step is one action completable in 2–5 minutes; for code steps include actual code or diff sketch; prefer TDD ordering (failing test implementation green commit).
73
+ - `### Stepwise Execution Order` — bite-sized table with `step | action | files | command | expected`. **Effective row count ≤ 6** (excluding header / divider / blank). Each step is one action completable in 2–5 minutes; for code steps include actual code or diff sketch. **TDD ordering is MUST, not a preference:** the **first** effective step's `action` cell MUST start with the literal `RED:` and describe the failing test that captures this stage's `Acceptance` (`expected` = FAIL); at least one later `action` cell MUST start with the literal `GREEN:` and describe the minimal implementation that makes it pass (`expected` = PASS); an optional refactor step starts with `REFACTOR:`. **Exemption:** doc-only / config-only / pure-rename stages with no observable runtime behaviour may omit RED/GREEN by declaring one line `TDD exemption: <reason>` in the stage section (mirrors the executor's per-step exemption in `_implementation-executor.md`). Validator S10c enforces RED-first + GREEN, or the exemption line.
70
74
  - `### Stage Exit Contract` — predicted added/modified files, newly exposed identifiers/types/endpoints, downstream-usable resources.
71
75
  - `### Stage Validation` — pre / mid / post exact commands or observable outcomes for this stage only.
72
- - **Cohesion-first partition rule (1st-class):** the grouping anchor is **shared file/module proximity** steps touching the same file/directory/module go in the same stage so the diff, PR, and rollback unit are semantically cohesive. A stage is split ONLY when (a) a real `depends-on` data/contract dependency exists, (b) effective steps would exceed 6, or (c) the file sets are disjoint (unrelated work touching no shared file is not crammed together). Maximising the number of parallel stages is NOT a reason to split — parallelism is an emergent property of independent stages, never a partitioning goal.
76
+ - **Vertical-slice-first partition rule (1st-class):** the grouping anchor is a **thin end-to-end vertical slice** — one stage delivers a single user-observable increment, crossing whatever layers are needed (data → service → API → UI) to make that one increment work. File/module proximity is demoted to the **intra-slice grouping rule**: within a slice, keep steps touching the same file/directory/module together so the diff, PR, and rollback unit stay cohesive. **Horizontal layer-splitting is forbidden** — never carve "the DB layer" into one stage and "the service layer" into the next; that produces stages that ship no standalone user value. A stage is split ONLY when (a) a real `depends-on` data/contract dependency exists, (b) effective steps would exceed 6, or (c) it is a distinct vertical slice (a different user-value increment). Maximising the number of parallel stages is NOT a reason to split — parallelism is an emergent property of independent stages, never a partitioning goal.
73
77
  - **Parallel-safety invariant (BLOCKING):** any two stages that are both `depends-on (none)` MUST predict disjoint file sets in their `Stage Exit Contract`. Two parallel `implementation` runs would otherwise edit the same file concurrently. Work touching a shared file must either go in one stage or be ordered with `depends-on`. Enforced by `validators/validate-implementation-plan-stages.py` check S9.
74
78
  - **Stage exit contract is the carry surface:** keep it as narrow as possible. Wider surface = more downstream coupling.
75
79
  - dependency / migration risk assessment (ordering constraints, data backfills, feature-flag prerequisites, repo-internal sequencing)
76
80
  - validation checklist (pre / mid / post) — each item is an exact command or observable outcome
77
81
  - rollback strategy — exact revert path (commits, flags, migrations) and the signal that triggers rollback
82
+ - **Requirement Coverage (mandatory, §5.5.8):** one row per concrete requirement from the task brief / packet. Assign stable IDs `R-001`, `R-002`, ... in source order. Columns: `ID | Source | Requirement | Covered by option / stage / step | Status`. `Source` cites the brief heading or file/line where the requirement came from. `Covered by` must name the specific Option Candidate and Stage/Step that satisfies it, not just "recommended option". `Status` is one of `covered`, `gap`, or `blocked C-NNN`. If any row is `gap` or `blocked C-NNN`, the Plan Body Verification gate MUST NOT be `passed` / `passed-with-dissent`; add a matching `Blocks=approval` row for the blocker and keep `approved: false`.
83
+ - **Review-rule compliance plan:** when a project-local review rule pack is found, each Option Candidate MUST include the design implication of those rules in its File Structure / interfaces / blast-radius notes. For any helper or data transform used by more than one changed service, the plan must either place it in a shared module or explicitly justify why duplication is intentional. For any test step, the plan must state the observable behavior being asserted, not the internal collaborator call being pinned. For any exported/public method added or renamed, the step must carry the intended noun/side-effect semantics so implementation names can be reviewed before code is written.
78
84
  - the YAML frontmatter MUST include the line `approved: false` (report-writer always emits the unflipped value). The user authorises the next `implementation` run by flipping it to `approved: true` (manual edit or `--approve` CLI). Do NOT recreate any `User Approval Request` body block — the validator fails reports that contain one (see `validators/validate-run.py` deprecated patterns).
79
85
  - the YAML frontmatter MUST include the line `implementation-option:` directly under `approved:` (report-writer always emits it with an **empty value**). The user selects which Option Candidate the next `implementation` run executes by filling this line with that option's name (manual edit or `--implementation-option <name>` CLI). When left empty, the `implementation` run falls back to the `Recommended Option`.
80
86
  - **the frontmatter `approved: false` line is rendered unconditionally; if the plan-body verification gate (§5.5.9) returns `blocked-by-disagreement` or `aborted-non-result`, the writer MUST keep `approved: false` and the validator refuses any report that ships with `approved: true` under such a gate result.**
@@ -93,10 +99,11 @@
93
99
  - references to types, functions, flags, or files that no other step or option defines
94
100
  - steps that describe *what* to do without showing *how* (commands, code, or exact diffs are required for any code-touching step)
95
101
  - Self-review pass before finalising the report (`Claude lead` runs this; do not delegate to a generic subagent):
96
- 1. **Spec coverage** — for every requirement in the task brief, point to the option(s) and step(s) that satisfy it. List gaps explicitly.
102
+ 1. **Spec coverage** — for every requirement in the task brief, point to the option(s) and step(s) that satisfy it in `### 5.5.8 Requirement Coverage`. Every row must name the Option Candidate and Stage/Step. List gaps explicitly as `gap` or `blocked C-NNN`; a publishable gate with a non-`covered` row is a validator failure.
97
103
  2. **Placeholder scan** — search the report for the patterns in the No-placeholder rule above and fix inline.
98
104
  3. **Internal consistency** — option file lists, trade-off matrix, and recommended step list must agree on file paths, names, and signatures. A symbol called `clearLayers()` in the matrix and `clearFullLayers()` in the steps is a bug.
99
105
  4. **Ambiguity check** — any requirement that could be read two ways must be made explicit or moved to the `## 1. Clarification Items` table as a `Blocks=approval` row.
100
106
  5. **Scope check** — if the recommended plan now spans multiple independent subsystems, recommend splitting into separate planning runs rather than shipping an oversized plan.
101
- 6. **Plan-body verification reconciliation (BLOCKING for implementation-planning).** Inspect the `### 5.5.9 Plan Body Verification` verdict table. For every plan-item row classified as `majority-disagree C-<N>`, the corresponding `C-<N>` row MUST exist in `## 1. Clarification Items` with `Kind` chosen per the standard policy and `Blocks=approval`. Do NOT create a parallel `### 5.5.x Open Questions` block the unified table is the single home. Conversely, the `Classification` column's `C-<N>` reference and the `## 1. Clarification Items` `ID` column MUST match 1:1; an orphan on either side is a contract violation. For `partial-consensus` and `worker-unique` plan-items, the dissenting opinion lives in §5.5.9 `Dissent log` and is NOT promoted to §5.
102
- 7. **Stage Map self-check** for every stage, count the effective rows of its `Stepwise Execution Order` table by hand; reject the draft if any stage exceeds 6. Walk the `depends-on` graph and confirm it is a DAG (no cycle, no self-reference). For each `depends-on` link, confirm it encodes a real data/contract dependency do NOT add links to serialise unrelated work, and do NOT split a stage merely to create more parallel stages. **Parallel-safety:** for every pair of `depends-on (none)` stages, confirm their `Stage Exit Contract` predicted file sets are disjoint; if they share a file, merge them or add a `depends-on` link (validator S9 rejects overlap).
107
+ 6. **Review-rule preflight check** if a project review rule pack exists, map each relevant rule to the recommended option. Reject the draft if it knowingly creates a violation that the later PR reviewer would flag, unless the plan records a specific rationale and follow-up. In particular, scan for repeated helper stacks across planned files, tests that assert delegation to the same calculator/helper they exercise, public names that hide side effects, domain rules placed in repositories/adapters, and APIs made dead by this change.
108
+ 7. **Plan-body verification reconciliation (BLOCKING for implementation-planning).** Inspect the `### 5.5.9 Plan Body Verification` verdict table. For every plan-item row classified as `majority-disagree C-<N>`, the corresponding `C-<N>` row MUST exist in `## 1. Clarification Items` with `Kind` chosen per the standard policy and `Blocks=approval`. Do NOT create a parallel `### 5.5.x Open Questions` block the unified table is the single home. Conversely, the `Classification` column's `C-<N>` reference and the `## 1. Clarification Items` `ID` column MUST match 1:1; an orphan on either side is a contract violation. For `partial-consensus` and `worker-unique` plan-items, the dissenting opinion lives in §5.5.9 `Dissent log` and is NOT promoted to §5.
109
+ 8. **Stage Map self-check** — for every stage, count the effective rows of its `Stepwise Execution Order` table by hand; reject the draft if any stage exceeds 6. Confirm each stage declares a non-empty `Slice value:` and `Acceptance:` line, and that its first step `action` starts with `RED:` with a later `GREEN:` (or carries a `TDD exemption:` line) — this is what validator S10 enforces. Walk the `depends-on` graph and confirm it is a DAG (no cycle, no self-reference). For each `depends-on` link, confirm it encodes a real data/contract dependency — do NOT add links to serialise unrelated work, and do NOT split a stage merely to create more parallel stages. **Parallel-safety:** for every pair of `depends-on (none)` stages, confirm their `Stage Exit Contract` predicted file sets are disjoint; if they share a file, merge them or add a `depends-on` link (validator S9 rejects overlap).
@@ -155,10 +155,11 @@
155
155
  "echo_template": "approved-plan: {value}"
156
156
  },
157
157
  "stage_pick": {
158
- "label": "실행할 stage 선택하세요. auto 는 의존성이 만족된 가장 빠른 미완료 stage 자동으로 잡습니다.",
158
+ "label": "stage 범위를 선택하세요. auto 는 전체 task(모든 stage)를, 특정 번호는 해당 stage 대상으로 합니다.",
159
159
  "echo_template": "stage: {value}",
160
160
  "options": {
161
- "auto": "auto (다음 미완료 stage)"
161
+ "auto": "auto (다음 미완료 stage)",
162
+ "auto_final_verification": "auto (전체 task — 모든 stage 머지 후 한 번)"
162
163
  }
163
164
  },
164
165
  "directive_pick": {
@@ -26,6 +26,14 @@ PROFILE_SECTIONS = (
26
26
  "Non-goals",
27
27
  "Clarification request policy",
28
28
  )
29
+ PROFILE_SECTIONS_BY_TASK_TYPE = {
30
+ "implementation-planning": (
31
+ "Section heading contract",
32
+ "Required deliverable shape",
33
+ "No-placeholder rule",
34
+ "Self-review pass before finalising the report",
35
+ ),
36
+ }
29
37
  CLARIFICATION_SECTIONS = (
30
38
  "Clarification Items",
31
39
  "Clarification Response Carried In From Previous Run",
@@ -123,8 +131,12 @@ def _brief_block(brief_text: str) -> list[str]:
123
131
 
124
132
 
125
133
  def _profile_block(task_type: str, profile_text: str) -> list[str]:
126
- profile_sections = _extract_sections(profile_text, PROFILE_SECTIONS)
127
- profile_bullets = _extract_bullet_sections(profile_text, PROFILE_SECTIONS)
134
+ section_names = (
135
+ PROFILE_SECTIONS
136
+ + PROFILE_SECTIONS_BY_TASK_TYPE.get(task_type, ())
137
+ )
138
+ profile_sections = _extract_sections(profile_text, section_names)
139
+ profile_bullets = _extract_bullet_sections(profile_text, section_names)
128
140
  profile_focus = "\n\n".join(
129
141
  part for part in (profile_sections, profile_bullets)
130
142
  if part and not part.startswith("- No matching")
@@ -1716,6 +1716,9 @@ def apply_lead_prompt_defaults(ctx: dict) -> None:
1716
1716
  # Empty for non-implementation runs; the implementation prepare path
1717
1717
  # overwrites it with the resolved stage-batch directive.
1718
1718
  ctx.setdefault("STAGE_BATCH_DIRECTIVE", "")
1719
+ # Empty for non-final-verification runs; the final-verification prepare
1720
+ # path overwrites it with the resolved verification target block.
1721
+ ctx.setdefault("VERIFICATION_TARGET", "")
1719
1722
  ctx.setdefault(
1720
1723
  "WORKER_PROMPT_PREAMBLE_PATH",
1721
1724
  str(Path.home() / ".okstra" / "templates" / "worker-prompt-preamble.md"),