okstra 0.55.0 → 0.56.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/bin/okstra +24 -7
  2. package/docs/project-structure-overview.md +0 -1
  3. package/docs/superpowers/plans/2026-05-25-okstra-project-root-rename.md +0 -1
  4. package/docs/superpowers/plans/2026-06-07-stage-conformance-qa-phase2.md +275 -0
  5. package/docs/superpowers/plans/2026-06-07-stage-conformance-qa-phase3.md +282 -0
  6. package/docs/superpowers/plans/2026-06-07-stage-conformance-qa-phase4a.md +147 -0
  7. package/docs/superpowers/plans/2026-06-07-stage-conformance-qa-phase4b.md +262 -0
  8. package/docs/superpowers/plans/2026-06-07-stage-conformance-qa-phase4c.md +184 -0
  9. package/docs/superpowers/plans/2026-06-07-stage-conformance-qa-phase4d.md +88 -0
  10. package/docs/superpowers/plans/2026-06-07-stage-conformance-qa-phase4e.md +250 -0
  11. package/docs/superpowers/plans/2026-06-07-stage-conformance-qa.md +409 -0
  12. package/docs/superpowers/specs/2026-06-07-stage-conformance-qa-design.md +169 -0
  13. package/package.json +1 -1
  14. package/runtime/BUILD.json +2 -2
  15. package/runtime/bin/lib/okstra/cli.sh +5 -1
  16. package/runtime/bin/lib/okstra/usage.sh +5 -0
  17. package/runtime/bin/okstra.sh +1 -0
  18. package/runtime/prompts/profiles/_implementation-verifier.md +23 -2
  19. package/runtime/prompts/profiles/final-verification.md +1 -0
  20. package/runtime/prompts/profiles/implementation-planning.md +4 -0
  21. package/runtime/python/okstra_ctl/conformance.py +270 -0
  22. package/runtime/python/okstra_ctl/paths.py +2 -0
  23. package/runtime/python/okstra_ctl/run.py +29 -0
  24. package/runtime/skills/okstra-run/SKILL.md +12 -0
  25. package/runtime/skills/okstra-setup/SKILL.md +35 -0
  26. package/runtime/validators/validate-implementation-plan-stages.py +28 -3
  27. package/runtime/validators/validate-run.py +96 -0
  28. package/src/okstra-dirs.mjs +1 -1
  29. package/src/migrate.mjs +0 -146
@@ -39,7 +39,28 @@ Verifier obtains the QA command set from exactly two declared sources, in order
39
39
 
40
40
  ### Execution rule
41
41
 
42
- Tier 1 commands run verbatim first. Then every Tier 2 entry runs once. Each command runs in the worktree cwd, and is recorded in the worker result with its exact command line, exit code, and the tail of stdout/stderr. Substituting or paraphrasing a Tier 1 command is forbidden (see Verifier-specific forbidden actions below).
42
+ Tier 1 commands run verbatim first. Then every Tier 2 entry runs once. Then the Tier 3 stage conformance script (below) runs once. Each command runs in the worktree cwd, and is recorded in the worker result with its exact command line, exit code, and the tail of stdout/stderr. Substituting or paraphrasing a Tier 1 command is forbidden (see Verifier-specific forbidden actions below).
43
+
44
+ ### Tier 3 — stage conformance scripts (요구사항 부합 검증)
45
+
46
+ Tiers 1·2 prove the diff *builds and passes*; Tier 3 proves the stage actually *meets the upper-level requirement* it was scoped to, by running a declared conformance script against the running state. This is a real gate — its result sidecar is the input the `validate-run.py` Tier 3 gate reads, so a missing or non-PASS result BLOCKS acceptance.
47
+
48
+ - **Source.** The conformance manifest is `<task_root>/qa/conformance-manifest.json` (the directory is the `TASK_QA_PATH` token). This run's stage conformance entry is the manifest `entries[]` item whose `stageKey` equals this run's stageKey — `<task-id>-stage-<N>`, where `<N>` is the injected Stage number. Find that one entry; ignore the others (other stages are run by their own implementation runs or by final-verification).
49
+ - **Exemption / waiver → do NOT run.** If the entry carries an `exemption` (or a user `waiver`), the verifier does NOT execute the script. It records the fact and the reason (`exemption.reason` / `waiver.reason` + `waiver.acknowledgedBy`) in the Read-only command log AND writes the result sidecar reflecting the skip. An `exemption` passes the gate outright; a `waiver` passes but is conditional (conformance left unverified by explicit user acknowledgement). No script runs in either case.
50
+ - **Otherwise run `runCommand` in the worktree cwd.** Execute the entry's `runCommand` verbatim from the worktree cwd. Inject env from `<PROJECT_ROOT>/.okstra/project.json`'s `qaEnv` (replica DB DSN / app base URL / env file — declared in Phase 4e). This is a **replica / test environment only** path — never run it against shared / staging / prod, identical to the DB real-execution gate principle above.
51
+ - **Interpret the standard interface.** Parse the process exit code together with stdout: the `QA-RESULT: PASS|FAIL` marker line (if several appear, the last one wins) and the per-requirement `REQ <id>: PASS|FAIL: <reason>` lines. If no `QA-RESULT` marker is emitted, the overall result is `MISSING` — which the gate treats as BLOCKING (the script broke the contract).
52
+ - **Write the result sidecar (BLOCKING deliverable).** Write `<task_root>/qa/result-<stageKey>.json` as:
53
+ ```json
54
+ {
55
+ "stageKey": "<task-id>-stage-<N>",
56
+ "overall": "PASS",
57
+ "ranAt": "<UTC ISO8601>",
58
+ "requirements": { "<id>": { "status": "PASS", "reason": "<from REQ line>" } }
59
+ }
60
+ ```
61
+ `overall` is exactly one of `PASS` / `FAIL` / `MISSING`. This file is the input to the `validate-run.py` Tier 3 gate — if it is absent the gate reports the stage as "never ran" and BLOCKS, so writing it is mandatory whenever the script runs (and on the exemption/waiver skip path, recording the skip outcome).
62
+ - **Read-only command log.** Record the `runCommand` exact line + its exit code in the Read-only command log. Unlike Tiers 1·2, a conformance script MAY mutate the **replica datastore** (exercising integrated state is its whole purpose) — but only the `qaEnv` replica target, never a shared/staging/prod store. The `runCommand` itself is still subject to the same source/lockfile mutation deny-list as Tier 2 (`--fix`, `npm install` without `ci`, etc.); a denied token aborts with `contract-violated`.
63
+ - **No manifest / no entry for this stage.** If the manifest file is absent, or it has no entry whose `stageKey` matches this run's stageKey, the verifier records `conformance: no manifest entry for <stageKey>` and proceeds (forcing the *declaration* of conformance entries is the job of planning Step 11 + the `validate-run.py` diff-surface cross-check, not the verifier).
43
64
 
44
65
  ### Missing-tier handling
45
66
 
@@ -55,7 +76,7 @@ If the verifier's re-run result differs from what the executor reported (a passi
55
76
 
56
77
  ### Read-only command log (per verifier)
57
78
 
58
- The worker result MUST contain a `Read-only command log` block listing every command executed during the verifier run with its exact invocation and exit code, in execution order. No mutating command may appear in this block. This log is copied into the final report's verifier result section verbatim.
79
+ The worker result MUST contain a `Read-only command log` block listing every command executed during the verifier run with its exact invocation and exit code, in execution order — including the Tier 3 conformance `runCommand` (or the exemption/waiver skip note when no script ran). No source-mutating command may appear in this block; the only permitted mutation is a Tier 3 conformance script writing to its `qaEnv` replica datastore, which is logged like any other command. This log is copied into the final report's verifier result section verbatim.
59
80
 
60
81
  ### Verifier evidence is independent of executor evidence
61
82
 
@@ -36,6 +36,7 @@
36
36
  - **Validation Evidence**: for every requirement in the originating plan or task brief, cite the artifact (commit SHA, test output, log line, MCP SELECT result) that demonstrates coverage. Paraphrased "verified" claims without an artifact are rejected.
37
37
  - **Read-only command log**: any pre-existing test/validation command executed during this run MUST be listed with its exact command line and exit code. No mutating commands may appear here.
38
38
  - **Two-tier command lookup (shared with `implementation`):** when this phase performs its own independent re-validation, the command source is exactly the same two tiers `implementation` verifiers use — Tier 1 is the originating task brief / approved plan's `validation` set, Tier 2 is `<PROJECT_ROOT>/.okstra/project.json` under `qaCommands`. Auto-detecting tools from manifest files is forbidden; missing tiers are recorded as `qa-command not configured: <category>` and do NOT trigger a guess. The `cmd` deny-list (`--fix`, `--write`, ` -w`, ` -u`, `--snapshot-update`, `INSTA_UPDATE=<not-no>`, `cargo update`, `npm install` without `ci`, etc.) is enforced identically. NOTE: runtime fail-fast validation (`okstra_ctl.qa_commands.validate_qa_commands`) only fires at `--task-type implementation` run-prep, so this phase MUST self-check each `qaCommands` entry against the deny-list before executing it — if a denied token is present, skip the command and record it as a `Read-only command log` line `qa-command rejected (denied token: <token>): <label>`.
39
+ - **Tier 3 — stage conformance scripts (whole-task union):** because this phase verifies the **integrated, merged** state, it re-runs conformance against that state rather than per-stage. Read the task-level manifest `<task_root>/qa/conformance-manifest.json` (the directory is the `TASK_QA_PATH` token) and, in **whole-task scope**, run the `runCommand` of **every** `entries[]` item against the merged worktree, refreshing each `<task_root>/qa/result-<stageKey>.json` (`{ "stageKey", "overall": "PASS"|"FAIL"|"MISSING", "ranAt", "requirements" }`). In **single-stage scope**, run only the entry whose `stageKey` matches the verified stage. An entry carrying an `exemption` or user `waiver` is NOT executed — record the skip and reason; a `waiver` becomes a `conditional-accept` condition surfaced in the section 7 Verdict (conformance left unverified by user acknowledgement). Each `runCommand` runs in the worktree cwd with `qaEnv` env (replica DB DSN / app base URL / env file) — **replica / test environment only**, never shared / staging / prod, and the same source/lockfile mutation deny-list applies (a conformance script MAY mutate only its `qaEnv` replica datastore). Interpret each result from the exit code + stdout `QA-RESULT: PASS|FAIL` (last wins) and `REQ <id>: PASS|FAIL: <reason>` lines; no `QA-RESULT` marker → `MISSING`. Any entry whose result is not `PASS` (including `MISSING` or a never-run/missing sidecar) is an **Acceptance Blocker** (`major`+) — exactly like the DB real-execution gate above, since `accepted` requires zero blockers the verdict becomes `conditional-accept` / `blocked`. This is the same gate the `validate-run.py` Tier 3 check enforces on the result sidecars.
39
40
  - **Routing recommendation**: the next safe phase — one of `release-handoff`, `done`, `error-analysis`, `implementation-planning` — tied to the verdict and blocker list. `release-handoff` is allowed ONLY when the Verdict Token is `accepted`. `release-handoff` is additionally allowed ONLY when the verification scope (the `Verification scope:` line of the injected `VERIFICATION_TARGET` block, recorded as the report's `verificationScope` field) is `whole-task`; a `single-stage` run is partial and routes to `implementation` / `done` even on an `accepted` verdict.
40
41
  - Clarification request policy (phase-specific addendum — shared policy is in `_common-contract.md`):
41
42
  - populate `## 1. Clarification Items` only when a blocker hinges on information only the user can supply (deployment intent, intended target environment, business-rule interpretation); use `Blocks=next-phase` for items that gate continuing to release-handoff
@@ -71,6 +71,10 @@
71
71
  - **Per-stage subsections** (`## 5.5.<i> Stage <i>: <title>` for each `i`), each containing the four required subsections:
72
72
  - `### Carry-In` — for `depends-on (none)`: task-brief only. Otherwise: each depended-on stage's static exit contract + runtime sidecar path `runs/<impl-key>/carry/stage-<i>.json` placeholder.
73
73
  - `### Stepwise Execution Order` — bite-sized table with `step | action | files | command | expected`. **Effective row count ≤ 6** (excluding header / divider / blank). Each step is one action completable in 2–5 minutes; for code steps include actual code or diff sketch. **TDD ordering is MUST, not a preference:** the **first** effective step's `action` cell MUST start with the literal `RED:` and describe the failing test that captures this stage's `Acceptance` (`expected` = FAIL); at least one later `action` cell MUST start with the literal `GREEN:` and describe the minimal implementation that makes it pass (`expected` = PASS); an optional refactor step starts with `REFACTOR:`. **Exemption:** doc-only / config-only / pure-rename stages with no observable runtime behaviour may omit RED/GREEN by declaring one line `TDD exemption: <reason>` in the stage section (mirrors the executor's per-step exemption in `_implementation-executor.md`). Validator S10c enforces RED-first + GREEN, or the exemption line.
74
+ - **Per-stage conformance declaration (mandatory one line, in the stage section — same placement freedom as `TDD exemption:`):** the stage MUST carry exactly one of:
75
+ - `Conformance tests: stage-<N> — <task_root>/qa/stage-<N>.<ext> (requires=[db|io|http|external,...])` — a Tier3 verification script that proves this stage's upstream requirements (brief / requirements-discovery / error-analysis / improvement-discovery → this stage's `Acceptance`) hold against **real** DB rows, real endpoints, or the real external API — NOT mocks. When you emit this line you MUST also (a) write the script to `<task_root>/qa/stage-<N>.<ext>` and (b) add a matching entry to `<task_root>/qa/conformance-manifest.json` with fields `stageKey` (= `<task-id>-stage-<N>`), `script`, `runCommand`, `requirementIds`, `requires` (subset of `{db, io, http, external}`), `passContract`, `exemption: null`, `waiver: null`. The script's standard interface: a `main` that exits `0`=PASS / non-zero=FAIL, and whose stdout ends with `QA-RESULT: PASS|FAIL` followed by one `REQ <id>: PASS|FAIL: <근거>` line per requirement.
76
+ - `Conformance exemption: <reason>` — only for stages that touch no db/io/http/external surface, or where unit tests fully cover the increment. (If the eventual `implementation` diff actually touches one of those surfaces, `validate-run.py`'s diff-surface cross-check is BLOCKING — an exemption cannot hide a real db/io/http/external change.)
77
+ The manifest lives at the **task level** (`<task_root>/qa/`, path token `TASK_QA_PATH`) and is shared across planning → implementation → final-verification. This declaration is enforced at three layers: `validators/validate-implementation-plan-stages.py` check **S11** forces every stage to carry one of the two lines; the manifest JSON structure is enforced by `validate_conformance_manifest` (run / validate-run); and the result gate (each script's `QA-RESULT`) is enforced by the verifier Tier3 + validate-run.
74
78
  - `### Stage Exit Contract` — predicted added/modified files, newly exposed identifiers/types/endpoints, downstream-usable resources.
75
79
  - `### Stage Validation` — pre / mid / post exact commands or observable outcomes for this stage only.
76
80
  - **Vertical-slice-first partition rule (1st-class):** the grouping anchor is a **thin end-to-end vertical slice** — one stage delivers a single user-observable increment, crossing whatever layers are needed (data → service → API → UI) to make that one increment work. File/module proximity is demoted to the **intra-slice grouping rule**: within a slice, keep steps touching the same file/directory/module together so the diff, PR, and rollback unit stay cohesive. **Horizontal layer-splitting is forbidden** — never carve "the DB layer" into one stage and "the service layer" into the next; that produces stages that ship no standalone user value. A stage is split ONLY when (a) a real `depends-on` data/contract dependency exists, (b) effective steps would exceed 6, or (c) it is a distinct vertical slice (a different user-value increment). Maximising the number of parallel stages is NOT a reason to split — parallelism is an emergent property of independent stages, never a partitioning goal.
@@ -0,0 +1,270 @@
1
+ """Stage conformance(Tier 3) 매니페스트 검증 + `QA-RESULT` 파서.
2
+
3
+ implementation/final-verification 의 verifier 는 stage 별 conformance 스크립트를
4
+ 실행해 상위 요구사항 부합을 검증한다. 본 모듈은 그 검증/파싱의 결정론적 코어다.
5
+
6
+ 1. `conformance-manifest.json` 구조 검증 (`validate_conformance_manifest`).
7
+ 2. 스크립트 stdout 의 `QA-RESULT` 마커 파싱 (`parse_qa_result`).
8
+
9
+ 스크립트 실행/게이트 강제는 verifier prompt 와 validators/validate-run.py 가 담당한다.
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import fnmatch
14
+ import re
15
+ from dataclasses import dataclass
16
+
17
+ # diff 가 건드린 표면과 대조할 capability 태그 화이트리스트.
18
+ CAPABILITY_WHITELIST: tuple[str, ...] = ("db", "io", "http", "external")
19
+
20
+
21
+ def _check_nonempty_str(value: object, path: str, errors: list[str]) -> bool:
22
+ if not isinstance(value, str) or not value.strip():
23
+ errors.append(f"{path} must be a non-empty string")
24
+ return False
25
+ return True
26
+
27
+
28
+ def _check_capabilities(value: object, path: str, errors: list[str]) -> None:
29
+ if not isinstance(value, list):
30
+ errors.append(f"{path} must be an array")
31
+ return
32
+ for cap in value:
33
+ if cap not in CAPABILITY_WHITELIST:
34
+ errors.append(
35
+ f"{path}: unknown capability {cap!r} "
36
+ f"(allowed: {', '.join(CAPABILITY_WHITELIST)})"
37
+ )
38
+
39
+
40
+ def _check_exemption(value: object, path: str, errors: list[str]) -> None:
41
+ if value is None:
42
+ return
43
+ if not isinstance(value, dict):
44
+ errors.append(f"{path} must be an object or null")
45
+ return
46
+ _check_nonempty_str(value.get("reason"), f"{path}.reason", errors)
47
+ _check_nonempty_str(value.get("declaredAt"), f"{path}.declaredAt", errors)
48
+
49
+
50
+ def _check_waiver(value: object, path: str, errors: list[str]) -> None:
51
+ if value is None:
52
+ return
53
+ if not isinstance(value, dict):
54
+ errors.append(f"{path} must be an object or null")
55
+ return
56
+ _check_nonempty_str(value.get("acknowledgedBy"), f"{path}.acknowledgedBy", errors)
57
+ _check_nonempty_str(value.get("reason"), f"{path}.reason", errors)
58
+ _check_nonempty_str(value.get("at"), f"{path}.at", errors)
59
+ _check_capabilities(value.get("scope", []), f"{path}.scope", errors)
60
+
61
+
62
+ def _check_entry(entry: object, idx: int, errors: list[str]) -> None:
63
+ path = f"entries[{idx}]"
64
+ if not isinstance(entry, dict):
65
+ errors.append(f"{path} must be an object")
66
+ return
67
+ _check_nonempty_str(entry.get("stageKey"), f"{path}.stageKey", errors)
68
+ _check_nonempty_str(entry.get("script"), f"{path}.script", errors)
69
+ _check_nonempty_str(entry.get("runCommand"), f"{path}.runCommand", errors)
70
+ _check_nonempty_str(entry.get("passContract"), f"{path}.passContract", errors)
71
+ req_ids = entry.get("requirementIds")
72
+ if (
73
+ not isinstance(req_ids, list)
74
+ or not req_ids
75
+ or not all(isinstance(r, str) and r.strip() for r in req_ids)
76
+ ):
77
+ errors.append(f"{path}.requirementIds must be a non-empty array of strings")
78
+ _check_capabilities(entry.get("requires", []), f"{path}.requires", errors)
79
+ _check_exemption(entry.get("exemption"), f"{path}.exemption", errors)
80
+ _check_waiver(entry.get("waiver"), f"{path}.waiver", errors)
81
+
82
+
83
+ def validate_conformance_manifest(manifest: object) -> list[str]:
84
+ """conformance-manifest 전체 검증. 위반 메시지 리스트 반환(비면 안전).
85
+
86
+ 매니페스트 부재(None)는 합법 — 스크립트 없는 task 가 있을 수 있고, 게이트
87
+ 강제(diff surface 대조)는 validators/validate-run.py 가 판정한다.
88
+ """
89
+ if manifest is None:
90
+ return []
91
+ if not isinstance(manifest, dict):
92
+ return [f"conformance manifest must be an object, got {type(manifest).__name__}"]
93
+ entries = manifest.get("entries")
94
+ if not isinstance(entries, list):
95
+ return ["conformance manifest .entries must be an array"]
96
+ errors: list[str] = []
97
+ seen: set[str] = set()
98
+ for idx, entry in enumerate(entries):
99
+ _check_entry(entry, idx, errors)
100
+ key = entry.get("stageKey") if isinstance(entry, dict) else None
101
+ if isinstance(key, str) and key:
102
+ if key in seen:
103
+ errors.append(f"entries[{idx}].stageKey duplicate: {key!r}")
104
+ seen.add(key)
105
+ return errors
106
+
107
+
108
+ _QA_RESULT_RE = re.compile(r"^QA-RESULT:\s*(PASS|FAIL)\s*$", re.MULTILINE)
109
+ _REQ_LINE_RE = re.compile(r"^REQ\s+(\S+):\s*(PASS|FAIL):\s*(.*)$", re.MULTILINE)
110
+
111
+
112
+ @dataclass
113
+ class QaResult:
114
+ overall: str # "PASS" | "FAIL" | "MISSING"
115
+ requirements: dict[str, dict[str, str]] # id -> {"status": "PASS"|"FAIL", "reason": str}
116
+
117
+
118
+ def parse_qa_result(stdout: str) -> QaResult:
119
+ """스크립트 stdout 에서 `QA-RESULT` 마커 + `REQ` 줄 파싱.
120
+
121
+ 마커가 없으면 overall='MISSING' — 스크립트가 계약을 안 지킨 것이므로 게이트는
122
+ FAIL 로 취급한다. 마커가 여럿이면 마지막 것을 채택한다.
123
+ """
124
+ text = stdout or ""
125
+ markers = _QA_RESULT_RE.findall(text)
126
+ overall = markers[-1] if markers else "MISSING"
127
+ requirements: dict = {}
128
+ for rid, status, reason in _REQ_LINE_RE.findall(text):
129
+ requirements[rid] = {"status": status, "reason": reason.strip()}
130
+ return QaResult(overall=overall, requirements=requirements)
131
+
132
+
133
+ @dataclass
134
+ class ConformanceVerdict:
135
+ stage_key: str
136
+ status: str # "PASS" | "BLOCKING" | "WAIVED" | "EXEMPT"
137
+ ok: bool # 진행 허용 여부 (PASS/WAIVED/EXEMPT 면 True)
138
+ conditional: bool # WAIVED 일 때만 True — conformance 미검증(사용자 확인)
139
+ message: str
140
+
141
+
142
+ def decide_conformance_gate(entry: dict, result: object) -> ConformanceVerdict:
143
+ """단일 stage entry + 실행 결과(`QaResult | None`)로 게이트 판정.
144
+
145
+ 우선순위: exemption → waiver → 결과 평가. 미실행/MISSING/FAIL 은 BLOCKING.
146
+ 면제·waiver 의 형태 검증은 `validate_conformance_manifest` 가 이미 보장한다.
147
+ """
148
+ key = entry.get("stageKey", "<unknown>")
149
+ exemption = entry.get("exemption")
150
+ if exemption:
151
+ return ConformanceVerdict(
152
+ key, "EXEMPT", True, False,
153
+ f"conformance exempted: {exemption.get('reason', '')}",
154
+ )
155
+ waiver = entry.get("waiver")
156
+ if waiver:
157
+ return ConformanceVerdict(
158
+ key, "WAIVED", True, True,
159
+ f"conformance waived by {waiver.get('acknowledgedBy', '?')}: "
160
+ f"{waiver.get('reason', '')}",
161
+ )
162
+ overall = getattr(result, "overall", None) # None when result is None → "never ran"
163
+ if overall == "PASS":
164
+ return ConformanceVerdict(key, "PASS", True, False, "conformance PASS")
165
+ if overall is None:
166
+ return ConformanceVerdict(
167
+ key, "BLOCKING", False, False,
168
+ "conformance script never ran (no result recorded)",
169
+ )
170
+ if overall == "MISSING":
171
+ return ConformanceVerdict(
172
+ key, "BLOCKING", False, False,
173
+ "conformance script ran but emitted no QA-RESULT marker",
174
+ )
175
+ return ConformanceVerdict(key, "BLOCKING", False, False, f"conformance {overall}")
176
+
177
+
178
+ def qa_result_from_dict(data: object) -> QaResult:
179
+ """결과 사이드카(JSON dict)를 `QaResult` 로 복원. Phase 3 의 verifier 가 쓴
180
+ `result-stage-<N>.json` 을 validate-run 이 로드할 때 쓴다. 형태가 깨졌으면
181
+ overall='MISSING'(=BLOCKING 취급)으로 안전하게 강등한다."""
182
+ if not isinstance(data, dict):
183
+ return QaResult(overall="MISSING", requirements={})
184
+ overall = data.get("overall")
185
+ if overall not in ("PASS", "FAIL", "MISSING"):
186
+ overall = "MISSING"
187
+ reqs = data.get("requirements")
188
+ return QaResult(overall=overall, requirements=reqs if isinstance(reqs, dict) else {})
189
+
190
+
191
+ def evaluate_conformance(manifest: object, results_by_stage: object) -> list[ConformanceVerdict]:
192
+ """매니페스트 전 entry 에 대해 게이트 판정 목록을 반환.
193
+
194
+ `results_by_stage`: stageKey -> `QaResult`. 키가 없으면 미실행(None)으로 본다.
195
+ 매니페스트 구조 검증은 호출 전에 `validate_conformance_manifest` 로 끝낸다는 전제.
196
+ """
197
+ entries = manifest.get("entries") if isinstance(manifest, dict) else None
198
+ if not isinstance(entries, list):
199
+ return []
200
+ results = results_by_stage if isinstance(results_by_stage, dict) else {}
201
+ verdicts: list[ConformanceVerdict] = []
202
+ for entry in entries:
203
+ if not isinstance(entry, dict):
204
+ continue
205
+ result = results.get(entry.get("stageKey"))
206
+ verdicts.append(decide_conformance_gate(entry, result))
207
+ return verdicts
208
+
209
+
210
+ # 경로 → capability surface 기본 매핑. 프로젝트별 override 는 qaEnv.surfacePatterns
211
+ # (Phase 4e). 'external' 은 경로로 감지하기 어려워 기본 패턴 없음 — 명시 선언 의존.
212
+ _DEFAULT_SURFACE_PATTERNS: dict[str, tuple[str, ...]] = {
213
+ "db": ("*.sql", "*migration*", "*repository*", "*.entity.*", "*entities*", "*schema.prisma*"),
214
+ "http": ("*controller*", "*.routes.*", "*router*", "*endpoint*", "*.api.*"),
215
+ "io": ("*filesystem*", "*storage*", "*.fs.*"),
216
+ }
217
+
218
+
219
+ def detect_surfaces(file_paths: object, patterns: object = None) -> set[str]:
220
+ """변경된 파일 경로들에서 capability surface 집합을 감지(소문자 fnmatch).
221
+ `patterns` 미지정 시 기본 매핑 사용."""
222
+ table = patterns if isinstance(patterns, dict) else _DEFAULT_SURFACE_PATTERNS
223
+ found: set[str] = set()
224
+ for raw in file_paths or []:
225
+ if not isinstance(raw, str):
226
+ continue
227
+ path = raw.strip().lower()
228
+ for surface, globs in table.items():
229
+ if any(fnmatch.fnmatch(path, g) for g in globs):
230
+ found.add(surface)
231
+ return found
232
+
233
+
234
+ def parse_qa_waiver_arg(arg: object) -> tuple[str, str] | None:
235
+ """`--qa-waiver` 값 `<stageKey>:<reason>` 를 (stageKey, reason) 로 분해.
236
+ 형식이 아니거나 비면 None."""
237
+ if not isinstance(arg, str) or ":" not in arg:
238
+ return None
239
+ key, reason = arg.split(":", 1)
240
+ key, reason = key.strip(), reason.strip()
241
+ if not key or not reason:
242
+ return None
243
+ return key, reason
244
+
245
+
246
+ def apply_qa_waiver(manifest: object, stage_key: str, reason: str, *, at: str,
247
+ acknowledged_by: str = "user") -> bool:
248
+ """매니페스트에서 stage_key entry 의 `waiver` 를 채운다(in place). 찾으면 True.
249
+ 사용자 확인형 우회(spec §7.2) — reason 은 사용자 지시 원문."""
250
+ entries = manifest.get("entries") if isinstance(manifest, dict) else None
251
+ if not isinstance(entries, list):
252
+ return False
253
+ for entry in entries:
254
+ if isinstance(entry, dict) and entry.get("stageKey") == stage_key:
255
+ entry["waiver"] = {"acknowledgedBy": acknowledged_by, "reason": reason,
256
+ "scope": [], "at": at}
257
+ return True
258
+ return False
259
+
260
+
261
+ def manifest_required_surfaces(manifest: object) -> set[str]:
262
+ """매니페스트 전 entry 의 `requires` 합집합 — 선언된 surface 집합."""
263
+ entries = manifest.get("entries") if isinstance(manifest, dict) else None
264
+ if not isinstance(entries, list):
265
+ return set()
266
+ out: set[str] = set()
267
+ for entry in entries:
268
+ if isinstance(entry, dict) and isinstance(entry.get("requires"), list):
269
+ out.update(c for c in entry["requires"] if isinstance(c, str))
270
+ return out
@@ -117,6 +117,7 @@ def compute_run_paths(
117
117
  task_index = task_root / "task-index.md"
118
118
  instruction_set = task_root / "instruction-set"
119
119
  analysis_packet = instruction_set / "analysis-packet.md"
120
+ task_qa = task_root / "qa"
120
121
  runs_dir = task_root / "runs"
121
122
  history_dir = task_root / "history"
122
123
  timeline_file = history_dir / "timeline.json"
@@ -202,6 +203,7 @@ def compute_run_paths(
202
203
  "TASK_INDEX_PATH": str(task_index),
203
204
  "INSTRUCTION_SET_PATH": str(instruction_set),
204
205
  "ANALYSIS_PACKET_PATH": str(analysis_packet),
206
+ "TASK_QA_PATH": str(task_qa),
205
207
  "RUNS_DIR": str(runs_dir),
206
208
  "HISTORY_DIR": str(history_dir),
207
209
  "TIMELINE_PATH": str(timeline_file),
@@ -276,6 +276,9 @@ class PrepareInputs:
276
276
  work_category: str = ""
277
277
  base_ref: str = ""
278
278
  approved_plan_path: str = ""
279
+ # implementation 전용: `--qa-waiver "<stageKey>:<reason>"` 사용자 확인형 우회.
280
+ # prepare-time 에 task-level conformance 매니페스트 entry.waiver 를 채운다.
281
+ qa_waiver: str = ""
279
282
  stage: str = "auto"
280
283
  clarification_response_path: str = "" # absolute or empty
281
284
  # release-handoff 전용: PR 본문 템플릿 1회성 override. 빈 문자열이면
@@ -1092,6 +1095,28 @@ def _validate_prepare_inputs(project_root: Path, inp: PrepareInputs) -> list:
1092
1095
  return ctx_stage_map
1093
1096
 
1094
1097
 
1098
+ def _apply_qa_waiver_if_requested(inp: "PrepareInputs", project_root: Path) -> None:
1099
+ """`--qa-waiver` 가 있으면 task-level 매니페스트 entry 의 waiver 를 채운다."""
1100
+ if not inp.qa_waiver:
1101
+ return
1102
+ from .conformance import apply_qa_waiver, parse_qa_waiver_arg
1103
+ from .paths import task_dir
1104
+ parsed = parse_qa_waiver_arg(inp.qa_waiver)
1105
+ if parsed is None:
1106
+ raise PrepareError(
1107
+ f'--qa-waiver must be "<stageKey>:<reason>", got {inp.qa_waiver!r}'
1108
+ )
1109
+ stage_key, reason = parsed
1110
+ manifest_path = task_dir(project_root, inp.task_group, inp.task_id) / "qa" / "conformance-manifest.json"
1111
+ if not manifest_path.is_file():
1112
+ raise PrepareError(f"--qa-waiver: conformance manifest not found at {manifest_path}")
1113
+ manifest = json.loads(manifest_path.read_text())
1114
+ when = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
1115
+ if not apply_qa_waiver(manifest, stage_key, reason, at=when):
1116
+ raise PrepareError(f"--qa-waiver: stageKey {stage_key!r} not in manifest {manifest_path}")
1117
+ manifest_path.write_text(json.dumps(manifest, indent=2, ensure_ascii=False) + "\n")
1118
+
1119
+
1095
1120
  def _register_and_check_project(project_root: Path, inp: PrepareInputs) -> None:
1096
1121
  """project.json self-registration + (implementation 한정) qaCommands gate 검증."""
1097
1122
  from okstra_project import ResolverError
@@ -1120,6 +1145,7 @@ def _register_and_check_project(project_root: Path, inp: PrepareInputs) -> None:
1120
1145
  qa_errors = validate_qa_commands(project_meta.get("qaCommands"))
1121
1146
  if qa_errors:
1122
1147
  raise PrepareError(_format_qa_errors(qa_errors))
1148
+ _apply_qa_waiver_if_requested(inp, project_root)
1123
1149
 
1124
1150
 
1125
1151
  def _resolve_roster(inp: PrepareInputs, profile_file: Path) -> tuple[list[str], str]:
@@ -1860,6 +1886,8 @@ def main(argv: list[str]) -> int:
1860
1886
  p.add_argument("--critic", default="")
1861
1887
  p.add_argument("--related-tasks", default="", dest="related_tasks_raw")
1862
1888
  p.add_argument("--approved-plan", default="", dest="approved_plan_path")
1889
+ p.add_argument("--qa-waiver", default="", dest="qa_waiver",
1890
+ help='Stage conformance 우회: "<stageKey>:<reason>" (사용자 확인형, 매니페스트 entry.waiver 기록)')
1863
1891
  p.add_argument(
1864
1892
  "--stage", default="auto", dest="stage",
1865
1893
  help=(
@@ -1975,6 +2003,7 @@ def main(argv: list[str]) -> int:
1975
2003
  work_category=args.work_category,
1976
2004
  base_ref=args.base_ref,
1977
2005
  approved_plan_path=args.approved_plan_path,
2006
+ qa_waiver=args.qa_waiver,
1978
2007
  stage=args.stage,
1979
2008
  clarification_response_path=clarification_abs,
1980
2009
  pr_template_path=args.pr_template_path,
@@ -184,6 +184,18 @@ The python function underneath is mutex-protected (`~/.okstra/.locks/<task-key>.
184
184
 
185
185
  You can delete the literal state-file path after this point — its job is done. Invoke `rm` with the literal path (e.g. `rm /var/folders/.../okstra-wizard.AbCd.json`), not a shell variable.
186
186
 
187
+ ### Step 5.1 (implementation only): conformance waiver offer
188
+
189
+ `render-bundle` accepts an optional `--qa-waiver "<stageKey>:<reason>"` flag (implementation only). It records a **user-acknowledged** waiver into the task-level conformance manifest entry (`entry.waiver`), letting the run proceed when a stage's Tier 3 conformance script genuinely cannot run (e.g. the replica DB is unreachable). The waiver records the user's reason **verbatim**.
190
+
191
+ This is **never** a lead/worker self-exemption — only the user may waive. Offer it **only** when conformance BLOCKING is expected (the chosen stage declares a conformance entry whose script you cannot run in this environment). Surface it as a 3-option recommendation picker (per the run-prompt recommendation rule):
192
+
193
+ 1. (recommended) Run the conformance script — no waiver.
194
+ 2. Waive this stage — ask the user for the exact `<stageKey>` and reason, then pass `--qa-waiver "<stageKey>:<reason>"` to `render-bundle` (reason = the user's words, unedited).
195
+ 3. 직접 입력 — the user types the full `<stageKey>:<reason>` value.
196
+
197
+ When the user picks a waiver, append `--qa-waiver "<stageKey>:<reason>"` to the `render-bundle` invocation above. Omit the flag entirely otherwise (do **not** pass `--qa-waiver ""`). A malformed value or unknown `<stageKey>` aborts `render-bundle` with a `PrepareError`.
198
+
187
199
  ## Step 6: Take over as Claude lead
188
200
 
189
201
  Read `<INSTRUCTION_SET_PATH>/claude-execution-prompt.md` verbatim and enter `Claude lead` mode. The lead prompt now points to compact intake artifacts first (`active-run-context`, `analysis-profile.md`, and `analysis-packet.md`); full source files such as `analysis-material.md`, `reference-expectations.md`, and `final-report-template.md` are lazy/fallback inputs. Follow the rendered prompt order, do not preempt it.
@@ -181,6 +181,41 @@ The field is preserved across the runtime's auto-upserts of
181
181
  `updatedAt` are runtime-owned, so manual edits to `qaCommands`
182
182
  survive every subsequent `okstra setup` / `okstra run` invocation.
183
183
 
184
+ ### Step 4.6.1 (optional): `qaEnv` — Tier 3 conformance environment
185
+
186
+ `implementation` / `final-verification` verifiers run **stage
187
+ conformance scripts** (Tier 3) that may need to reach a database or an
188
+ HTTP endpoint to prove the diff satisfies upstream requirements. Declare
189
+ the environment those scripts are allowed to touch under `qaEnv`. Every
190
+ field is optional; declare only what your conformance scripts use.
191
+
192
+ ```json
193
+ "qaEnv": {
194
+ "replicaDbDsn": "<replica/test DB DSN — never shared/staging/prod>",
195
+ "appBaseUrl": "http://localhost:3000",
196
+ "envFile": ".okstra/qa.env",
197
+ "surfacePatterns": { "db": ["*.sql", "*repository*"], "http": ["*controller*"] }
198
+ }
199
+ ```
200
+
201
+ - `replicaDbDsn` — DSN the conformance script connects to. MUST be a
202
+ replica / disposable test DB, **never** a shared, staging, or
203
+ production database (conformance scripts may write).
204
+ - `appBaseUrl` — base URL for endpoint-level conformance checks
205
+ (local app only).
206
+ - `envFile` — path (under `.okstra/`) to an env file the verifier
207
+ sources before running conformance scripts.
208
+ - `surfacePatterns` — per-project **override** of the diff-surface
209
+ cross-check map (`capability → glob list`). The validator maps each
210
+ changed file to a capability surface (`db` / `http` / `io`) and fails
211
+ the run when the diff touches a surface no stage `requires`. The
212
+ built-in patterns (e.g. `*router*` for `http`, `*storage*` for `io`)
213
+ are broad and match many front-end files, so front-end-heavy repos
214
+ should override with narrower globs to avoid false BLOCKING verdicts
215
+ (Phase 4b review note). An over-broad pattern over-blocks; an
216
+ over-narrow one lets an undeclared surface through — tune to the
217
+ repo's real db/http/io file naming.
218
+
184
219
  ## Step 4.7 (automatic): project-local Claude settings symlink
185
220
 
186
221
  `okstra setup` (and `okstra run` on its first invocation per project)
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env python3
2
- """S1–S10 checks for the Stage Map structure of an approved
2
+ """S1–S11 checks for the Stage Map structure of an approved
3
3
  implementation-planning final-report.md. Run from prepare_task_bundle
4
4
  of `implementation` task or standalone."""
5
5
 
@@ -40,7 +40,7 @@ class StageMeta:
40
40
 
41
41
  @dataclass
42
42
  class ValidationError:
43
- code: str # S1..S10
43
+ code: str # S1..S11
44
44
  stage: int # 0 = global
45
45
  message: str
46
46
 
@@ -168,6 +168,8 @@ def _check_each_stage_section(text: str, stages: List[StageMeta]) -> List[Valida
168
168
  SLICE_VALUE = re.compile(r"^\s*Slice value\s*:\s*(.+?)\s*$", re.M)
169
169
  ACCEPTANCE = re.compile(r"^\s*Acceptance\s*:\s*(.+?)\s*$", re.M)
170
170
  TDD_EXEMPTION = re.compile(r"^\s*TDD exemption\s*:\s*\S", re.M)
171
+ CONFORMANCE_TESTS = re.compile(r"^\s*Conformance tests\s*:\s*\S", re.M)
172
+ CONFORMANCE_EXEMPTION = re.compile(r"^\s*Conformance exemption\s*:\s*\S", re.M)
171
173
 
172
174
 
173
175
  def _check_slice_tdd(text: str, stages: List[StageMeta]) -> List[ValidationError]:
@@ -204,6 +206,28 @@ def _check_slice_tdd(text: str, stages: List[StageMeta]) -> List[ValidationError
204
206
  return errs
205
207
 
206
208
 
209
+ def _check_conformance_declaration(
210
+ text: str, stages: List[StageMeta]
211
+ ) -> List[ValidationError]:
212
+ """S11: 각 stage 는 conformance 검증을 선언하거나 명시적으로 면제한다.
213
+
214
+ S11 — `Conformance tests:` 라인(Tier3 검증 스크립트 선언) 또는
215
+ `Conformance exemption:` 라인(테스트 불필요 사유) 중 하나 필수.
216
+ diff 가 db/io/http surface 를 건드렸는데 아무 선언이 없는 silent-pass(DEV-9184)
217
+ 를 planning boundary 에서 차단한다.
218
+ """
219
+ errs: List[ValidationError] = []
220
+ for s in stages:
221
+ section = _slice_stage_section(text, s.stage_number)
222
+ if not (CONFORMANCE_TESTS.search(section) or CONFORMANCE_EXEMPTION.search(section)):
223
+ errs.append(ValidationError(
224
+ "S11", s.stage_number,
225
+ "S11: stage must declare 'Conformance tests:' (Tier3 검증 스크립트) "
226
+ "or 'Conformance exemption:' (사유) — stage conformance QA design §12.2",
227
+ ))
228
+ return errs
229
+
230
+
207
231
  def _check_depends_on(stages: List[StageMeta]) -> List[ValidationError]:
208
232
  errs: List[ValidationError] = []
209
233
  valid = {s.stage_number for s in stages}
@@ -274,7 +298,7 @@ def _check_parallel_safety(text: str, stages: List[StageMeta]) -> List[Validatio
274
298
 
275
299
 
276
300
  def collect_validation_errors(text: str) -> List[ValidationError]:
277
- """All S1–S10 checks against the report text; empty list means valid.
301
+ """All S1–S11 checks against the report text; empty list means valid.
278
302
 
279
303
  S1 (missing `## 5.5 Stage Map` heading) makes the rest unparseable, so it
280
304
  short-circuits. Shared by `main()` (CLI / implementation entry) and the
@@ -290,6 +314,7 @@ def collect_validation_errors(text: str) -> List[ValidationError]:
290
314
  if stages:
291
315
  errors.extend(_check_each_stage_section(text, stages))
292
316
  errors.extend(_check_slice_tdd(text, stages))
317
+ errors.extend(_check_conformance_declaration(text, stages))
293
318
  errors.extend(_check_depends_on(stages))
294
319
  errors.extend(_check_parallel_safety(text, stages))
295
320
  return errors