okstra 0.54.0 → 0.56.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/okstra +24 -7
- package/docs/project-structure-overview.md +0 -1
- package/docs/superpowers/plans/2026-05-25-okstra-project-root-rename.md +0 -1
- package/docs/superpowers/plans/2026-06-07-stage-conformance-qa-phase2.md +275 -0
- package/docs/superpowers/plans/2026-06-07-stage-conformance-qa-phase3.md +282 -0
- package/docs/superpowers/plans/2026-06-07-stage-conformance-qa-phase4a.md +147 -0
- package/docs/superpowers/plans/2026-06-07-stage-conformance-qa-phase4b.md +262 -0
- package/docs/superpowers/plans/2026-06-07-stage-conformance-qa-phase4c.md +184 -0
- package/docs/superpowers/plans/2026-06-07-stage-conformance-qa-phase4d.md +88 -0
- package/docs/superpowers/plans/2026-06-07-stage-conformance-qa-phase4e.md +250 -0
- package/docs/superpowers/plans/2026-06-07-stage-conformance-qa.md +409 -0
- package/docs/superpowers/specs/2026-06-07-stage-conformance-qa-design.md +169 -0
- package/package.json +1 -1
- package/runtime/BUILD.json +2 -2
- package/runtime/agents/workers/report-writer-worker.md +1 -1
- package/runtime/bin/lib/okstra/cli.sh +5 -1
- package/runtime/bin/lib/okstra/usage.sh +5 -0
- package/runtime/bin/okstra-inject-report-index.py +66 -0
- package/runtime/bin/okstra.sh +1 -0
- package/runtime/prompts/profiles/_implementation-verifier.md +23 -2
- package/runtime/prompts/profiles/final-verification.md +1 -0
- package/runtime/prompts/profiles/implementation-planning.md +4 -0
- package/runtime/prompts/profiles/improvement-discovery.md +1 -0
- package/runtime/python/okstra_ctl/clarification_items.py +10 -1
- package/runtime/python/okstra_ctl/conformance.py +270 -0
- package/runtime/python/okstra_ctl/paths.py +2 -0
- package/runtime/python/okstra_ctl/render_final_report.py +221 -2
- package/runtime/python/okstra_ctl/report_views.py +23 -4
- package/runtime/python/okstra_ctl/run.py +29 -0
- package/runtime/skills/okstra-run/SKILL.md +12 -0
- package/runtime/skills/okstra-setup/SKILL.md +35 -0
- package/runtime/templates/reports/i18n/en.json +6 -0
- package/runtime/templates/reports/i18n/ko.json +6 -0
- package/runtime/validators/lib/fixtures.sh +9 -0
- package/runtime/validators/validate-implementation-plan-stages.py +28 -3
- package/runtime/validators/validate-run.py +136 -1
- package/runtime/validators/validate_improvement_report.py +5 -1
- package/src/okstra-dirs.mjs +1 -1
- package/src/migrate.mjs +0 -146
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""CLI entry for the top-of-report index / scroll-anchor injector.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
python3 scripts/okstra-inject-report-index.py \\
|
|
6
|
+
<final-report.md> \\
|
|
7
|
+
[--report-language en|ko]
|
|
8
|
+
|
|
9
|
+
Adds the top-of-report Index (section list + ID index) and `<a id="...">`
|
|
10
|
+
scroll anchors to a markdown report that was authored *free-form* rather
|
|
11
|
+
than rendered from a data.json. The only such task-type today is
|
|
12
|
+
`improvement-discovery`: its `## 5.9 Improvement Candidates` table is
|
|
13
|
+
written directly by the report-writer worker, so the data.json renderer
|
|
14
|
+
(which injects the index for every other task-type) never sees it.
|
|
15
|
+
|
|
16
|
+
Idempotent — re-running on an already-indexed report is a no-op.
|
|
17
|
+
"""
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import argparse
|
|
21
|
+
import sys
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
|
|
24
|
+
_HERE = Path(__file__).resolve().parent
|
|
25
|
+
# Make ``okstra_ctl`` importable for in-repo invocation (the installed
|
|
26
|
+
# runtime adds ``~/.okstra/lib/python`` via the wrapper scripts).
|
|
27
|
+
sys.path.insert(0, str(_HERE))
|
|
28
|
+
|
|
29
|
+
from okstra_ctl.i18n import SUPPORTED_LANGS # noqa: E402
|
|
30
|
+
from okstra_ctl.render_final_report import ( # noqa: E402
|
|
31
|
+
FinalReportRenderError,
|
|
32
|
+
inject_index_into_file,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def main(argv: list[str]) -> int:
|
|
37
|
+
parser = argparse.ArgumentParser(
|
|
38
|
+
description="Inject the top-of-report index + scroll anchors into a free-form markdown report.",
|
|
39
|
+
)
|
|
40
|
+
parser.add_argument(
|
|
41
|
+
"report",
|
|
42
|
+
type=Path,
|
|
43
|
+
help="Path to the final-report markdown to rewrite in place.",
|
|
44
|
+
)
|
|
45
|
+
parser.add_argument(
|
|
46
|
+
"--report-language",
|
|
47
|
+
choices=list(SUPPORTED_LANGS),
|
|
48
|
+
default="en",
|
|
49
|
+
help="Language for the index labels (Index/목차, …). Default: en.",
|
|
50
|
+
)
|
|
51
|
+
args = parser.parse_args(argv)
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
bytes_written = inject_index_into_file(
|
|
55
|
+
args.report, report_language=args.report_language
|
|
56
|
+
)
|
|
57
|
+
except FinalReportRenderError as exc:
|
|
58
|
+
print(f"error: {exc}", file=sys.stderr)
|
|
59
|
+
return 1
|
|
60
|
+
|
|
61
|
+
print(f"injected index + anchors -> {args.report} ({bytes_written} bytes)")
|
|
62
|
+
return 0
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
if __name__ == "__main__":
|
|
66
|
+
raise SystemExit(main(sys.argv[1:]))
|
package/runtime/bin/okstra.sh
CHANGED
|
@@ -123,6 +123,7 @@ PY_ARGS=(
|
|
|
123
123
|
[[ -n "${WORK_CATEGORY-}" ]] && PY_ARGS+=(--work-category "$WORK_CATEGORY")
|
|
124
124
|
[[ -n "${BASE_REF-}" ]] && PY_ARGS+=(--base-ref "$BASE_REF")
|
|
125
125
|
[[ -n "${STAGE-}" ]] && PY_ARGS+=(--stage "$STAGE")
|
|
126
|
+
[[ -n "${QA_WAIVER-}" ]] && PY_ARGS+=(--qa-waiver "$QA_WAIVER")
|
|
126
127
|
[[ "$RENDER_ONLY" == "true" ]] && PY_ARGS+=(--render-only)
|
|
127
128
|
[[ "$PLAN_VERIFICATION_ENABLED" == "false" ]] && PY_ARGS+=(--no-plan-verification)
|
|
128
129
|
|
|
@@ -39,7 +39,28 @@ Verifier obtains the QA command set from exactly two declared sources, in order
|
|
|
39
39
|
|
|
40
40
|
### Execution rule
|
|
41
41
|
|
|
42
|
-
Tier 1 commands run verbatim first. Then every Tier 2 entry runs once. Each command runs in the worktree cwd, and is recorded in the worker result with its exact command line, exit code, and the tail of stdout/stderr. Substituting or paraphrasing a Tier 1 command is forbidden (see Verifier-specific forbidden actions below).
|
|
42
|
+
Tier 1 commands run verbatim first. Then every Tier 2 entry runs once. Then the Tier 3 stage conformance script (below) runs once. Each command runs in the worktree cwd, and is recorded in the worker result with its exact command line, exit code, and the tail of stdout/stderr. Substituting or paraphrasing a Tier 1 command is forbidden (see Verifier-specific forbidden actions below).
|
|
43
|
+
|
|
44
|
+
### Tier 3 — stage conformance scripts (요구사항 부합 검증)
|
|
45
|
+
|
|
46
|
+
Tiers 1·2 prove the diff *builds and passes*; Tier 3 proves the stage actually *meets the upper-level requirement* it was scoped to, by running a declared conformance script against the running state. This is a real gate — its result sidecar is the input the `validate-run.py` Tier 3 gate reads, so a missing or non-PASS result BLOCKS acceptance.
|
|
47
|
+
|
|
48
|
+
- **Source.** The conformance manifest is `<task_root>/qa/conformance-manifest.json` (the directory is the `TASK_QA_PATH` token). This run's stage conformance entry is the manifest `entries[]` item whose `stageKey` equals this run's stageKey — `<task-id>-stage-<N>`, where `<N>` is the injected Stage number. Find that one entry; ignore the others (other stages are run by their own implementation runs or by final-verification).
|
|
49
|
+
- **Exemption / waiver → do NOT run.** If the entry carries an `exemption` (or a user `waiver`), the verifier does NOT execute the script. It records the fact and the reason (`exemption.reason` / `waiver.reason` + `waiver.acknowledgedBy`) in the Read-only command log AND writes the result sidecar reflecting the skip. An `exemption` passes the gate outright; a `waiver` passes but is conditional (conformance left unverified by explicit user acknowledgement). No script runs in either case.
|
|
50
|
+
- **Otherwise run `runCommand` in the worktree cwd.** Execute the entry's `runCommand` verbatim from the worktree cwd. Inject env from `<PROJECT_ROOT>/.okstra/project.json`'s `qaEnv` (replica DB DSN / app base URL / env file — declared in Phase 4e). This is a **replica / test environment only** path — never run it against shared / staging / prod, identical to the DB real-execution gate principle above.
|
|
51
|
+
- **Interpret the standard interface.** Parse the process exit code together with stdout: the `QA-RESULT: PASS|FAIL` marker line (if several appear, the last one wins) and the per-requirement `REQ <id>: PASS|FAIL: <reason>` lines. If no `QA-RESULT` marker is emitted, the overall result is `MISSING` — which the gate treats as BLOCKING (the script broke the contract).
|
|
52
|
+
- **Write the result sidecar (BLOCKING deliverable).** Write `<task_root>/qa/result-<stageKey>.json` as:
|
|
53
|
+
```json
|
|
54
|
+
{
|
|
55
|
+
"stageKey": "<task-id>-stage-<N>",
|
|
56
|
+
"overall": "PASS",
|
|
57
|
+
"ranAt": "<UTC ISO8601>",
|
|
58
|
+
"requirements": { "<id>": { "status": "PASS", "reason": "<from REQ line>" } }
|
|
59
|
+
}
|
|
60
|
+
```
|
|
61
|
+
`overall` is exactly one of `PASS` / `FAIL` / `MISSING`. This file is the input to the `validate-run.py` Tier 3 gate — if it is absent the gate reports the stage as "never ran" and BLOCKS, so writing it is mandatory whenever the script runs (and on the exemption/waiver skip path, recording the skip outcome).
|
|
62
|
+
- **Read-only command log.** Record the `runCommand` exact line + its exit code in the Read-only command log. Unlike Tiers 1·2, a conformance script MAY mutate the **replica datastore** (exercising integrated state is its whole purpose) — but only the `qaEnv` replica target, never a shared/staging/prod store. The `runCommand` itself is still subject to the same source/lockfile mutation deny-list as Tier 2 (`--fix`, `npm install` without `ci`, etc.); a denied token aborts with `contract-violated`.
|
|
63
|
+
- **No manifest / no entry for this stage.** If the manifest file is absent, or it has no entry whose `stageKey` matches this run's stageKey, the verifier records `conformance: no manifest entry for <stageKey>` and proceeds (forcing the *declaration* of conformance entries is the job of planning Step 11 + the `validate-run.py` diff-surface cross-check, not the verifier).
|
|
43
64
|
|
|
44
65
|
### Missing-tier handling
|
|
45
66
|
|
|
@@ -55,7 +76,7 @@ If the verifier's re-run result differs from what the executor reported (a passi
|
|
|
55
76
|
|
|
56
77
|
### Read-only command log (per verifier)
|
|
57
78
|
|
|
58
|
-
The worker result MUST contain a `Read-only command log` block listing every command executed during the verifier run with its exact invocation and exit code, in execution order. No mutating command may appear in this block. This log is copied into the final report's verifier result section verbatim.
|
|
79
|
+
The worker result MUST contain a `Read-only command log` block listing every command executed during the verifier run with its exact invocation and exit code, in execution order — including the Tier 3 conformance `runCommand` (or the exemption/waiver skip note when no script ran). No source-mutating command may appear in this block; the only permitted mutation is a Tier 3 conformance script writing to its `qaEnv` replica datastore, which is logged like any other command. This log is copied into the final report's verifier result section verbatim.
|
|
59
80
|
|
|
60
81
|
### Verifier evidence is independent of executor evidence
|
|
61
82
|
|
|
@@ -36,6 +36,7 @@
|
|
|
36
36
|
- **Validation Evidence**: for every requirement in the originating plan or task brief, cite the artifact (commit SHA, test output, log line, MCP SELECT result) that demonstrates coverage. Paraphrased "verified" claims without an artifact are rejected.
|
|
37
37
|
- **Read-only command log**: any pre-existing test/validation command executed during this run MUST be listed with its exact command line and exit code. No mutating commands may appear here.
|
|
38
38
|
- **Two-tier command lookup (shared with `implementation`):** when this phase performs its own independent re-validation, the command source is exactly the same two tiers `implementation` verifiers use — Tier 1 is the originating task brief / approved plan's `validation` set, Tier 2 is `<PROJECT_ROOT>/.okstra/project.json` under `qaCommands`. Auto-detecting tools from manifest files is forbidden; missing tiers are recorded as `qa-command not configured: <category>` and do NOT trigger a guess. The `cmd` deny-list (`--fix`, `--write`, ` -w`, ` -u`, `--snapshot-update`, `INSTA_UPDATE=<not-no>`, `cargo update`, `npm install` without `ci`, etc.) is enforced identically. NOTE: runtime fail-fast validation (`okstra_ctl.qa_commands.validate_qa_commands`) only fires at `--task-type implementation` run-prep, so this phase MUST self-check each `qaCommands` entry against the deny-list before executing it — if a denied token is present, skip the command and record it as a `Read-only command log` line `qa-command rejected (denied token: <token>): <label>`.
|
|
39
|
+
- **Tier 3 — stage conformance scripts (whole-task union):** because this phase verifies the **integrated, merged** state, it re-runs conformance against that state rather than per-stage. Read the task-level manifest `<task_root>/qa/conformance-manifest.json` (the directory is the `TASK_QA_PATH` token) and, in **whole-task scope**, run the `runCommand` of **every** `entries[]` item against the merged worktree, refreshing each `<task_root>/qa/result-<stageKey>.json` (`{ "stageKey", "overall": "PASS"|"FAIL"|"MISSING", "ranAt", "requirements" }`). In **single-stage scope**, run only the entry whose `stageKey` matches the verified stage. An entry carrying an `exemption` or user `waiver` is NOT executed — record the skip and reason; a `waiver` becomes a `conditional-accept` condition surfaced in the section 7 Verdict (conformance left unverified by user acknowledgement). Each `runCommand` runs in the worktree cwd with `qaEnv` env (replica DB DSN / app base URL / env file) — **replica / test environment only**, never shared / staging / prod, and the same source/lockfile mutation deny-list applies (a conformance script MAY mutate only its `qaEnv` replica datastore). Interpret each result from the exit code + stdout `QA-RESULT: PASS|FAIL` (last wins) and `REQ <id>: PASS|FAIL: <reason>` lines; no `QA-RESULT` marker → `MISSING`. Any entry whose result is not `PASS` (including `MISSING` or a never-run/missing sidecar) is an **Acceptance Blocker** (`major`+) — exactly like the DB real-execution gate above, since `accepted` requires zero blockers the verdict becomes `conditional-accept` / `blocked`. This is the same gate the `validate-run.py` Tier 3 check enforces on the result sidecars.
|
|
39
40
|
- **Routing recommendation**: the next safe phase — one of `release-handoff`, `done`, `error-analysis`, `implementation-planning` — tied to the verdict and blocker list. `release-handoff` is allowed ONLY when the Verdict Token is `accepted`. `release-handoff` is additionally allowed ONLY when the verification scope (the `Verification scope:` line of the injected `VERIFICATION_TARGET` block, recorded as the report's `verificationScope` field) is `whole-task`; a `single-stage` run is partial and routes to `implementation` / `done` even on an `accepted` verdict.
|
|
40
41
|
- Clarification request policy (phase-specific addendum — shared policy is in `_common-contract.md`):
|
|
41
42
|
- populate `## 1. Clarification Items` only when a blocker hinges on information only the user can supply (deployment intent, intended target environment, business-rule interpretation); use `Blocks=next-phase` for items that gate continuing to release-handoff
|
|
@@ -71,6 +71,10 @@
|
|
|
71
71
|
- **Per-stage subsections** (`## 5.5.<i> Stage <i>: <title>` for each `i`), each containing the four required subsections:
|
|
72
72
|
- `### Carry-In` — for `depends-on (none)`: task-brief only. Otherwise: each depended-on stage's static exit contract + runtime sidecar path `runs/<impl-key>/carry/stage-<i>.json` placeholder.
|
|
73
73
|
- `### Stepwise Execution Order` — bite-sized table with `step | action | files | command | expected`. **Effective row count ≤ 6** (excluding header / divider / blank). Each step is one action completable in 2–5 minutes; for code steps include actual code or diff sketch. **TDD ordering is MUST, not a preference:** the **first** effective step's `action` cell MUST start with the literal `RED:` and describe the failing test that captures this stage's `Acceptance` (`expected` = FAIL); at least one later `action` cell MUST start with the literal `GREEN:` and describe the minimal implementation that makes it pass (`expected` = PASS); an optional refactor step starts with `REFACTOR:`. **Exemption:** doc-only / config-only / pure-rename stages with no observable runtime behaviour may omit RED/GREEN by declaring one line `TDD exemption: <reason>` in the stage section (mirrors the executor's per-step exemption in `_implementation-executor.md`). Validator S10c enforces RED-first + GREEN, or the exemption line.
|
|
74
|
+
- **Per-stage conformance declaration (mandatory one line, in the stage section — same placement freedom as `TDD exemption:`):** the stage MUST carry exactly one of:
|
|
75
|
+
- `Conformance tests: stage-<N> — <task_root>/qa/stage-<N>.<ext> (requires=[db|io|http|external,...])` — a Tier3 verification script that proves this stage's upstream requirements (brief / requirements-discovery / error-analysis / improvement-discovery → this stage's `Acceptance`) hold against **real** DB rows, real endpoints, or the real external API — NOT mocks. When you emit this line you MUST also (a) write the script to `<task_root>/qa/stage-<N>.<ext>` and (b) add a matching entry to `<task_root>/qa/conformance-manifest.json` with fields `stageKey` (= `<task-id>-stage-<N>`), `script`, `runCommand`, `requirementIds`, `requires` (subset of `{db, io, http, external}`), `passContract`, `exemption: null`, `waiver: null`. The script's standard interface: a `main` that exits `0`=PASS / non-zero=FAIL, and whose stdout ends with `QA-RESULT: PASS|FAIL` followed by one `REQ <id>: PASS|FAIL: <근거>` line per requirement.
|
|
76
|
+
- `Conformance exemption: <reason>` — only for stages that touch no db/io/http/external surface, or where unit tests fully cover the increment. (If the eventual `implementation` diff actually touches one of those surfaces, `validate-run.py`'s diff-surface cross-check is BLOCKING — an exemption cannot hide a real db/io/http/external change.)
|
|
77
|
+
The manifest lives at the **task level** (`<task_root>/qa/`, path token `TASK_QA_PATH`) and is shared across planning → implementation → final-verification. This declaration is enforced at three layers: `validators/validate-implementation-plan-stages.py` check **S11** forces every stage to carry one of the two lines; the manifest JSON structure is enforced by `validate_conformance_manifest` (run / validate-run); and the result gate (each script's `QA-RESULT`) is enforced by the verifier Tier3 + validate-run.
|
|
74
78
|
- `### Stage Exit Contract` — predicted added/modified files, newly exposed identifiers/types/endpoints, downstream-usable resources.
|
|
75
79
|
- `### Stage Validation` — pre / mid / post exact commands or observable outcomes for this stage only.
|
|
76
80
|
- **Vertical-slice-first partition rule (1st-class):** the grouping anchor is a **thin end-to-end vertical slice** — one stage delivers a single user-observable increment, crossing whatever layers are needed (data → service → API → UI) to make that one increment work. File/module proximity is demoted to the **intra-slice grouping rule**: within a slice, keep steps touching the same file/directory/module together so the diff, PR, and rollback unit stay cohesive. **Horizontal layer-splitting is forbidden** — never carve "the DB layer" into one stage and "the service layer" into the next; that produces stages that ship no standalone user value. A stage is split ONLY when (a) a real `depends-on` data/contract dependency exists, (b) effective steps would exceed 6, or (c) it is a distinct vertical slice (a different user-value increment). Maximising the number of parallel stages is NOT a reason to split — parallelism is an emergent property of independent stages, never a partitioning goal.
|
|
@@ -32,6 +32,7 @@
|
|
|
32
32
|
- the `## 5.9 Improvement Candidates` table populated with rows that obey the 10-column schema from `validators/validate-improvement-report.py` (Cand ID `I-NNN`, Lens from whitelist, Title, Scope ⊆ scan-scope, Severity, Effort, Consensus, Source workers `<worker>:<id>` from {claude, codex, gemini}, Recommended next-phase ∈ {requirements-discovery, implementation-planning, error-analysis}, Evidence as path:line list)
|
|
33
33
|
- `## 7. Final Verdict` Verdict Token ∈ {`candidates-ready`, `no-candidates`, `blocked`}; Direction `routing`; Next Step "사용자에게 후보 K개 선택 의뢰 (## 5.9 표 참조)"
|
|
34
34
|
- `## 3. Recommended Next Steps` first entry summarises per-candidate routing and proposes new task-key names of the form `<task-group>/imp-<Cand-ID>`
|
|
35
|
+
- this report is authored free-form (improvement-discovery is not in the data.json schema enum); after the markdown is written, the report-writer runs `scripts/okstra-inject-report-index.py <report.md> --report-language <en|ko>` to add the top-of-report Index + `I-NNN`/`C-NNN` scroll anchors. The run validator fails the report when the Index anchor is missing.
|
|
35
36
|
- Clarification request policy (phase-specific addenda — shared policy is in `_common-contract.md`):
|
|
36
37
|
- if scan-scope or priority-lenses cannot be made concrete during Phase 1.5, end the run with Verdict Token `blocked`, populate `## 1. Clarification Items` with `Blocks=next-phase` rows, and do not run worker dispatch
|
|
37
38
|
- every clarification row carries a recommended answer + one-line rationale inside the `Expected form` cell
|
|
@@ -46,8 +46,17 @@ class ClarificationItem:
|
|
|
46
46
|
raw_status: str
|
|
47
47
|
|
|
48
48
|
|
|
49
|
+
# The final-report renderer injects a scroll anchor into ID-defining first
|
|
50
|
+
# cells — either leading (`<a id="c-001"></a>C-001`) or inside the bold marker
|
|
51
|
+
# (`**<a id="e-001"></a>E-001**`). Strip every such empty anchor during cell
|
|
52
|
+
# normalization so the ID parses as a bare token for clarification parsing AND
|
|
53
|
+
# the HTML view's `C-\d+` form detection, and so the anchor never leaks into
|
|
54
|
+
# the HTML view as html-escaped literal text.
|
|
55
|
+
_CELL_ANCHOR_RE = re.compile(r'<a id="[^"]*"></a>')
|
|
56
|
+
|
|
57
|
+
|
|
49
58
|
def _strip_backticks(cell: str) -> str:
|
|
50
|
-
s = cell.strip()
|
|
59
|
+
s = _CELL_ANCHOR_RE.sub("", cell.strip()).strip()
|
|
51
60
|
if s.startswith("`") and s.endswith("`") and len(s) >= 2:
|
|
52
61
|
s = s[1:-1].strip()
|
|
53
62
|
return s
|
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
"""Stage conformance(Tier 3) 매니페스트 검증 + `QA-RESULT` 파서.
|
|
2
|
+
|
|
3
|
+
implementation/final-verification 의 verifier 는 stage 별 conformance 스크립트를
|
|
4
|
+
실행해 상위 요구사항 부합을 검증한다. 본 모듈은 그 검증/파싱의 결정론적 코어다.
|
|
5
|
+
|
|
6
|
+
1. `conformance-manifest.json` 구조 검증 (`validate_conformance_manifest`).
|
|
7
|
+
2. 스크립트 stdout 의 `QA-RESULT` 마커 파싱 (`parse_qa_result`).
|
|
8
|
+
|
|
9
|
+
스크립트 실행/게이트 강제는 verifier prompt 와 validators/validate-run.py 가 담당한다.
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import fnmatch
|
|
14
|
+
import re
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
|
|
17
|
+
# diff 가 건드린 표면과 대조할 capability 태그 화이트리스트.
|
|
18
|
+
CAPABILITY_WHITELIST: tuple[str, ...] = ("db", "io", "http", "external")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _check_nonempty_str(value: object, path: str, errors: list[str]) -> bool:
|
|
22
|
+
if not isinstance(value, str) or not value.strip():
|
|
23
|
+
errors.append(f"{path} must be a non-empty string")
|
|
24
|
+
return False
|
|
25
|
+
return True
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _check_capabilities(value: object, path: str, errors: list[str]) -> None:
|
|
29
|
+
if not isinstance(value, list):
|
|
30
|
+
errors.append(f"{path} must be an array")
|
|
31
|
+
return
|
|
32
|
+
for cap in value:
|
|
33
|
+
if cap not in CAPABILITY_WHITELIST:
|
|
34
|
+
errors.append(
|
|
35
|
+
f"{path}: unknown capability {cap!r} "
|
|
36
|
+
f"(allowed: {', '.join(CAPABILITY_WHITELIST)})"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _check_exemption(value: object, path: str, errors: list[str]) -> None:
|
|
41
|
+
if value is None:
|
|
42
|
+
return
|
|
43
|
+
if not isinstance(value, dict):
|
|
44
|
+
errors.append(f"{path} must be an object or null")
|
|
45
|
+
return
|
|
46
|
+
_check_nonempty_str(value.get("reason"), f"{path}.reason", errors)
|
|
47
|
+
_check_nonempty_str(value.get("declaredAt"), f"{path}.declaredAt", errors)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _check_waiver(value: object, path: str, errors: list[str]) -> None:
|
|
51
|
+
if value is None:
|
|
52
|
+
return
|
|
53
|
+
if not isinstance(value, dict):
|
|
54
|
+
errors.append(f"{path} must be an object or null")
|
|
55
|
+
return
|
|
56
|
+
_check_nonempty_str(value.get("acknowledgedBy"), f"{path}.acknowledgedBy", errors)
|
|
57
|
+
_check_nonempty_str(value.get("reason"), f"{path}.reason", errors)
|
|
58
|
+
_check_nonempty_str(value.get("at"), f"{path}.at", errors)
|
|
59
|
+
_check_capabilities(value.get("scope", []), f"{path}.scope", errors)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _check_entry(entry: object, idx: int, errors: list[str]) -> None:
|
|
63
|
+
path = f"entries[{idx}]"
|
|
64
|
+
if not isinstance(entry, dict):
|
|
65
|
+
errors.append(f"{path} must be an object")
|
|
66
|
+
return
|
|
67
|
+
_check_nonempty_str(entry.get("stageKey"), f"{path}.stageKey", errors)
|
|
68
|
+
_check_nonempty_str(entry.get("script"), f"{path}.script", errors)
|
|
69
|
+
_check_nonempty_str(entry.get("runCommand"), f"{path}.runCommand", errors)
|
|
70
|
+
_check_nonempty_str(entry.get("passContract"), f"{path}.passContract", errors)
|
|
71
|
+
req_ids = entry.get("requirementIds")
|
|
72
|
+
if (
|
|
73
|
+
not isinstance(req_ids, list)
|
|
74
|
+
or not req_ids
|
|
75
|
+
or not all(isinstance(r, str) and r.strip() for r in req_ids)
|
|
76
|
+
):
|
|
77
|
+
errors.append(f"{path}.requirementIds must be a non-empty array of strings")
|
|
78
|
+
_check_capabilities(entry.get("requires", []), f"{path}.requires", errors)
|
|
79
|
+
_check_exemption(entry.get("exemption"), f"{path}.exemption", errors)
|
|
80
|
+
_check_waiver(entry.get("waiver"), f"{path}.waiver", errors)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def validate_conformance_manifest(manifest: object) -> list[str]:
|
|
84
|
+
"""conformance-manifest 전체 검증. 위반 메시지 리스트 반환(비면 안전).
|
|
85
|
+
|
|
86
|
+
매니페스트 부재(None)는 합법 — 스크립트 없는 task 가 있을 수 있고, 게이트
|
|
87
|
+
강제(diff surface 대조)는 validators/validate-run.py 가 판정한다.
|
|
88
|
+
"""
|
|
89
|
+
if manifest is None:
|
|
90
|
+
return []
|
|
91
|
+
if not isinstance(manifest, dict):
|
|
92
|
+
return [f"conformance manifest must be an object, got {type(manifest).__name__}"]
|
|
93
|
+
entries = manifest.get("entries")
|
|
94
|
+
if not isinstance(entries, list):
|
|
95
|
+
return ["conformance manifest .entries must be an array"]
|
|
96
|
+
errors: list[str] = []
|
|
97
|
+
seen: set[str] = set()
|
|
98
|
+
for idx, entry in enumerate(entries):
|
|
99
|
+
_check_entry(entry, idx, errors)
|
|
100
|
+
key = entry.get("stageKey") if isinstance(entry, dict) else None
|
|
101
|
+
if isinstance(key, str) and key:
|
|
102
|
+
if key in seen:
|
|
103
|
+
errors.append(f"entries[{idx}].stageKey duplicate: {key!r}")
|
|
104
|
+
seen.add(key)
|
|
105
|
+
return errors
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
_QA_RESULT_RE = re.compile(r"^QA-RESULT:\s*(PASS|FAIL)\s*$", re.MULTILINE)
|
|
109
|
+
_REQ_LINE_RE = re.compile(r"^REQ\s+(\S+):\s*(PASS|FAIL):\s*(.*)$", re.MULTILINE)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@dataclass
|
|
113
|
+
class QaResult:
|
|
114
|
+
overall: str # "PASS" | "FAIL" | "MISSING"
|
|
115
|
+
requirements: dict[str, dict[str, str]] # id -> {"status": "PASS"|"FAIL", "reason": str}
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def parse_qa_result(stdout: str) -> QaResult:
|
|
119
|
+
"""스크립트 stdout 에서 `QA-RESULT` 마커 + `REQ` 줄 파싱.
|
|
120
|
+
|
|
121
|
+
마커가 없으면 overall='MISSING' — 스크립트가 계약을 안 지킨 것이므로 게이트는
|
|
122
|
+
FAIL 로 취급한다. 마커가 여럿이면 마지막 것을 채택한다.
|
|
123
|
+
"""
|
|
124
|
+
text = stdout or ""
|
|
125
|
+
markers = _QA_RESULT_RE.findall(text)
|
|
126
|
+
overall = markers[-1] if markers else "MISSING"
|
|
127
|
+
requirements: dict = {}
|
|
128
|
+
for rid, status, reason in _REQ_LINE_RE.findall(text):
|
|
129
|
+
requirements[rid] = {"status": status, "reason": reason.strip()}
|
|
130
|
+
return QaResult(overall=overall, requirements=requirements)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@dataclass
|
|
134
|
+
class ConformanceVerdict:
|
|
135
|
+
stage_key: str
|
|
136
|
+
status: str # "PASS" | "BLOCKING" | "WAIVED" | "EXEMPT"
|
|
137
|
+
ok: bool # 진행 허용 여부 (PASS/WAIVED/EXEMPT 면 True)
|
|
138
|
+
conditional: bool # WAIVED 일 때만 True — conformance 미검증(사용자 확인)
|
|
139
|
+
message: str
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def decide_conformance_gate(entry: dict, result: object) -> ConformanceVerdict:
|
|
143
|
+
"""단일 stage entry + 실행 결과(`QaResult | None`)로 게이트 판정.
|
|
144
|
+
|
|
145
|
+
우선순위: exemption → waiver → 결과 평가. 미실행/MISSING/FAIL 은 BLOCKING.
|
|
146
|
+
면제·waiver 의 형태 검증은 `validate_conformance_manifest` 가 이미 보장한다.
|
|
147
|
+
"""
|
|
148
|
+
key = entry.get("stageKey", "<unknown>")
|
|
149
|
+
exemption = entry.get("exemption")
|
|
150
|
+
if exemption:
|
|
151
|
+
return ConformanceVerdict(
|
|
152
|
+
key, "EXEMPT", True, False,
|
|
153
|
+
f"conformance exempted: {exemption.get('reason', '')}",
|
|
154
|
+
)
|
|
155
|
+
waiver = entry.get("waiver")
|
|
156
|
+
if waiver:
|
|
157
|
+
return ConformanceVerdict(
|
|
158
|
+
key, "WAIVED", True, True,
|
|
159
|
+
f"conformance waived by {waiver.get('acknowledgedBy', '?')}: "
|
|
160
|
+
f"{waiver.get('reason', '')}",
|
|
161
|
+
)
|
|
162
|
+
overall = getattr(result, "overall", None) # None when result is None → "never ran"
|
|
163
|
+
if overall == "PASS":
|
|
164
|
+
return ConformanceVerdict(key, "PASS", True, False, "conformance PASS")
|
|
165
|
+
if overall is None:
|
|
166
|
+
return ConformanceVerdict(
|
|
167
|
+
key, "BLOCKING", False, False,
|
|
168
|
+
"conformance script never ran (no result recorded)",
|
|
169
|
+
)
|
|
170
|
+
if overall == "MISSING":
|
|
171
|
+
return ConformanceVerdict(
|
|
172
|
+
key, "BLOCKING", False, False,
|
|
173
|
+
"conformance script ran but emitted no QA-RESULT marker",
|
|
174
|
+
)
|
|
175
|
+
return ConformanceVerdict(key, "BLOCKING", False, False, f"conformance {overall}")
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def qa_result_from_dict(data: object) -> QaResult:
|
|
179
|
+
"""결과 사이드카(JSON dict)를 `QaResult` 로 복원. Phase 3 의 verifier 가 쓴
|
|
180
|
+
`result-stage-<N>.json` 을 validate-run 이 로드할 때 쓴다. 형태가 깨졌으면
|
|
181
|
+
overall='MISSING'(=BLOCKING 취급)으로 안전하게 강등한다."""
|
|
182
|
+
if not isinstance(data, dict):
|
|
183
|
+
return QaResult(overall="MISSING", requirements={})
|
|
184
|
+
overall = data.get("overall")
|
|
185
|
+
if overall not in ("PASS", "FAIL", "MISSING"):
|
|
186
|
+
overall = "MISSING"
|
|
187
|
+
reqs = data.get("requirements")
|
|
188
|
+
return QaResult(overall=overall, requirements=reqs if isinstance(reqs, dict) else {})
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def evaluate_conformance(manifest: object, results_by_stage: object) -> list[ConformanceVerdict]:
|
|
192
|
+
"""매니페스트 전 entry 에 대해 게이트 판정 목록을 반환.
|
|
193
|
+
|
|
194
|
+
`results_by_stage`: stageKey -> `QaResult`. 키가 없으면 미실행(None)으로 본다.
|
|
195
|
+
매니페스트 구조 검증은 호출 전에 `validate_conformance_manifest` 로 끝낸다는 전제.
|
|
196
|
+
"""
|
|
197
|
+
entries = manifest.get("entries") if isinstance(manifest, dict) else None
|
|
198
|
+
if not isinstance(entries, list):
|
|
199
|
+
return []
|
|
200
|
+
results = results_by_stage if isinstance(results_by_stage, dict) else {}
|
|
201
|
+
verdicts: list[ConformanceVerdict] = []
|
|
202
|
+
for entry in entries:
|
|
203
|
+
if not isinstance(entry, dict):
|
|
204
|
+
continue
|
|
205
|
+
result = results.get(entry.get("stageKey"))
|
|
206
|
+
verdicts.append(decide_conformance_gate(entry, result))
|
|
207
|
+
return verdicts
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
# 경로 → capability surface 기본 매핑. 프로젝트별 override 는 qaEnv.surfacePatterns
|
|
211
|
+
# (Phase 4e). 'external' 은 경로로 감지하기 어려워 기본 패턴 없음 — 명시 선언 의존.
|
|
212
|
+
_DEFAULT_SURFACE_PATTERNS: dict[str, tuple[str, ...]] = {
|
|
213
|
+
"db": ("*.sql", "*migration*", "*repository*", "*.entity.*", "*entities*", "*schema.prisma*"),
|
|
214
|
+
"http": ("*controller*", "*.routes.*", "*router*", "*endpoint*", "*.api.*"),
|
|
215
|
+
"io": ("*filesystem*", "*storage*", "*.fs.*"),
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def detect_surfaces(file_paths: object, patterns: object = None) -> set[str]:
|
|
220
|
+
"""변경된 파일 경로들에서 capability surface 집합을 감지(소문자 fnmatch).
|
|
221
|
+
`patterns` 미지정 시 기본 매핑 사용."""
|
|
222
|
+
table = patterns if isinstance(patterns, dict) else _DEFAULT_SURFACE_PATTERNS
|
|
223
|
+
found: set[str] = set()
|
|
224
|
+
for raw in file_paths or []:
|
|
225
|
+
if not isinstance(raw, str):
|
|
226
|
+
continue
|
|
227
|
+
path = raw.strip().lower()
|
|
228
|
+
for surface, globs in table.items():
|
|
229
|
+
if any(fnmatch.fnmatch(path, g) for g in globs):
|
|
230
|
+
found.add(surface)
|
|
231
|
+
return found
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def parse_qa_waiver_arg(arg: object) -> tuple[str, str] | None:
|
|
235
|
+
"""`--qa-waiver` 값 `<stageKey>:<reason>` 를 (stageKey, reason) 로 분해.
|
|
236
|
+
형식이 아니거나 비면 None."""
|
|
237
|
+
if not isinstance(arg, str) or ":" not in arg:
|
|
238
|
+
return None
|
|
239
|
+
key, reason = arg.split(":", 1)
|
|
240
|
+
key, reason = key.strip(), reason.strip()
|
|
241
|
+
if not key or not reason:
|
|
242
|
+
return None
|
|
243
|
+
return key, reason
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def apply_qa_waiver(manifest: object, stage_key: str, reason: str, *, at: str,
|
|
247
|
+
acknowledged_by: str = "user") -> bool:
|
|
248
|
+
"""매니페스트에서 stage_key entry 의 `waiver` 를 채운다(in place). 찾으면 True.
|
|
249
|
+
사용자 확인형 우회(spec §7.2) — reason 은 사용자 지시 원문."""
|
|
250
|
+
entries = manifest.get("entries") if isinstance(manifest, dict) else None
|
|
251
|
+
if not isinstance(entries, list):
|
|
252
|
+
return False
|
|
253
|
+
for entry in entries:
|
|
254
|
+
if isinstance(entry, dict) and entry.get("stageKey") == stage_key:
|
|
255
|
+
entry["waiver"] = {"acknowledgedBy": acknowledged_by, "reason": reason,
|
|
256
|
+
"scope": [], "at": at}
|
|
257
|
+
return True
|
|
258
|
+
return False
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def manifest_required_surfaces(manifest: object) -> set[str]:
|
|
262
|
+
"""매니페스트 전 entry 의 `requires` 합집합 — 선언된 surface 집합."""
|
|
263
|
+
entries = manifest.get("entries") if isinstance(manifest, dict) else None
|
|
264
|
+
if not isinstance(entries, list):
|
|
265
|
+
return set()
|
|
266
|
+
out: set[str] = set()
|
|
267
|
+
for entry in entries:
|
|
268
|
+
if isinstance(entry, dict) and isinstance(entry.get("requires"), list):
|
|
269
|
+
out.update(c for c in entry["requires"] if isinstance(c, str))
|
|
270
|
+
return out
|
|
@@ -117,6 +117,7 @@ def compute_run_paths(
|
|
|
117
117
|
task_index = task_root / "task-index.md"
|
|
118
118
|
instruction_set = task_root / "instruction-set"
|
|
119
119
|
analysis_packet = instruction_set / "analysis-packet.md"
|
|
120
|
+
task_qa = task_root / "qa"
|
|
120
121
|
runs_dir = task_root / "runs"
|
|
121
122
|
history_dir = task_root / "history"
|
|
122
123
|
timeline_file = history_dir / "timeline.json"
|
|
@@ -202,6 +203,7 @@ def compute_run_paths(
|
|
|
202
203
|
"TASK_INDEX_PATH": str(task_index),
|
|
203
204
|
"INSTRUCTION_SET_PATH": str(instruction_set),
|
|
204
205
|
"ANALYSIS_PACKET_PATH": str(analysis_packet),
|
|
206
|
+
"TASK_QA_PATH": str(task_qa),
|
|
205
207
|
"RUNS_DIR": str(runs_dir),
|
|
206
208
|
"HISTORY_DIR": str(history_dir),
|
|
207
209
|
"TIMELINE_PATH": str(timeline_file),
|