okstra 0.55.0 → 0.56.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/okstra +24 -7
- package/docs/kr/architecture.md +2 -2
- package/docs/project-structure-overview.md +0 -1
- package/docs/superpowers/plans/2026-05-25-okstra-project-root-rename.md +0 -1
- package/docs/superpowers/plans/2026-06-07-stage-conformance-qa-phase2.md +275 -0
- package/docs/superpowers/plans/2026-06-07-stage-conformance-qa-phase3.md +282 -0
- package/docs/superpowers/plans/2026-06-07-stage-conformance-qa-phase4a.md +147 -0
- package/docs/superpowers/plans/2026-06-07-stage-conformance-qa-phase4b.md +262 -0
- package/docs/superpowers/plans/2026-06-07-stage-conformance-qa-phase4c.md +184 -0
- package/docs/superpowers/plans/2026-06-07-stage-conformance-qa-phase4d.md +88 -0
- package/docs/superpowers/plans/2026-06-07-stage-conformance-qa-phase4e.md +250 -0
- package/docs/superpowers/plans/2026-06-07-stage-conformance-qa.md +409 -0
- package/docs/superpowers/specs/2026-06-07-stage-conformance-qa-design.md +169 -0
- package/package.json +1 -1
- package/runtime/BUILD.json +2 -2
- package/runtime/bin/lib/okstra/cli.sh +5 -1
- package/runtime/bin/lib/okstra/usage.sh +5 -0
- package/runtime/bin/okstra.sh +1 -0
- package/runtime/prompts/profiles/_common-contract.md +4 -4
- package/runtime/prompts/profiles/_implementation-deliverable.md +4 -4
- package/runtime/prompts/profiles/_implementation-executor.md +1 -4
- package/runtime/prompts/profiles/_implementation-verifier.md +23 -2
- package/runtime/prompts/profiles/final-verification.md +2 -1
- package/runtime/prompts/profiles/implementation-planning.md +9 -5
- package/runtime/prompts/profiles/implementation.md +6 -6
- package/runtime/prompts/profiles/improvement-discovery.md +1 -0
- package/runtime/prompts/profiles/release-handoff.md +4 -4
- package/runtime/python/okstra_ctl/conformance.py +270 -0
- package/runtime/python/okstra_ctl/paths.py +2 -0
- package/runtime/python/okstra_ctl/run.py +29 -0
- package/runtime/schemas/final-report-v1.0.schema.json +127 -10
- package/runtime/skills/okstra-coding-preflight/SKILL.md +8 -0
- package/runtime/skills/okstra-coding-preflight/clean-code.md +6 -0
- package/runtime/skills/okstra-run/SKILL.md +12 -0
- package/runtime/skills/okstra-run/templates/pr-body.template.md +12 -12
- package/runtime/skills/okstra-setup/SKILL.md +35 -0
- package/runtime/templates/reports/final-report.template.md +63 -19
- package/runtime/templates/reports/i18n/en.json +1 -1
- package/runtime/templates/reports/i18n/ko.json +1 -1
- package/runtime/templates/reports/implementation-input.template.md +1 -1
- package/runtime/templates/reports/implementation-planning-input.template.md +3 -3
- package/runtime/validators/validate-implementation-plan-stages.py +28 -3
- package/runtime/validators/validate-run.py +98 -0
- package/src/okstra-dirs.mjs +1 -1
- package/src/migrate.mjs +0 -146
|
@@ -25,7 +25,7 @@ profile document.
|
|
|
25
25
|
- Authority & permissions assumption (applies to every okstra task-type):
|
|
26
26
|
- **Assume the user (and their team) holds full authority and every permission required for the anticipated, in-flight, or follow-up work.** Treat external approvals, third-party access grants, role/IAM permissions, organisational sign-off, legal/compliance review, vendor coordination, and "verify access exists" steps as already satisfied unless the task brief explicitly states otherwise.
|
|
27
27
|
- Do NOT add such items to routing decisions, missing-materials lists, clarification questions, option trade-offs, dependency/migration risk, validation checklists, rollout plans, acceptance blockers, residual risks, release recommendations, the `## 1. Clarification Items` table, or any day/effort estimate. They are not legitimate sources of schedule extension.
|
|
28
|
-
- Internal okstra phase handoffs (e.g. the `
|
|
28
|
+
- Internal okstra phase handoffs (e.g. the `approved:` frontmatter gate in `implementation-planning`) are unaffected — those are the user themselves approving and proceed without external coordination.
|
|
29
29
|
- This rule does NOT relax any phase-specific Forbidden actions list; safety rules in the per-profile document remain in force regardless of the user's authority.
|
|
30
30
|
- Anti-escalation rule (shared):
|
|
31
31
|
- treating "다음 단계 진행해" or equivalent user phrases as authorisation to start a *different* lifecycle phase is forbidden. The next phase begins only in a separate okstra run launched with the new `--task-type`. Per-profile documents may further restrict this within their own scope.
|
|
@@ -61,7 +61,7 @@ profile document.
|
|
|
61
61
|
- **Reporter confirmation precondition (BLOCKING)**: the brief's frontmatter carries `reporter-confirmations: <complete | partial | pending | skipped>` set by `okstra-brief` Step 6.5. Every phase that consumes the brief MUST read this field before doing analysis. The handling matrix is:
|
|
62
62
|
- `complete` → proceed normally.
|
|
63
63
|
- `partial` → proceed; treat still-unmarked `intent-check:` / `conversion-block:` rows as the `skipped` branch.
|
|
64
|
-
- `skipped` → do NOT silently infer the missing answers. Promote each unmarked `intent-check:` / `conversion-block:` row into this run's `## 1. Clarification Items` as `Kind=decision`. Use `Blocks=approval` in `implementation-planning`, where the row gates the
|
|
64
|
+
- `skipped` → do NOT silently infer the missing answers. Promote each unmarked `intent-check:` / `conversion-block:` row into this run's `## 1. Clarification Items` as `Kind=decision`. Use `Blocks=approval` in `implementation-planning`, where the row gates the `approved:` frontmatter flip; otherwise use `Blocks=next-phase`. The recommended answer is drawn from the brief's matching content and clearly labelled `보고자 직접 확인 권장`.
|
|
65
65
|
- `pending` (or field missing) → ABORT analysis; render the Verdict Card with `Verdict Token = blocked` + `Direction = hold` and write a single `## Reporter Confirmation Required` block (no leading number) summarising which rows are pending. The `## 1. Clarification Items` table carries one row per pending item with `Blocks=approval` in `implementation-planning`, otherwise `Blocks=next-phase`. The operator must rerun `okstra-brief` Step 6.5. Do NOT emit `## 0.` for this case — Section 0 is reserved for clarification-response carry-in only.
|
|
66
66
|
`[CONFIRMED <YYYY-MM-DD> → RC-N]` markers on `Open Questions` rows are the per-row signal that the reporter has answered; their answers live verbatim under `## Reporter Confirmations` in the brief.
|
|
67
67
|
- `Source Material` is reporter-verbatim. Do NOT paraphrase, summarize, reorder, or restructure it. Quote it directly when needed.
|
|
@@ -81,9 +81,9 @@ profile document.
|
|
|
81
81
|
- **Canonical column schema (SSOT — must match `templates/reports/final-report.template.md` §1 exactly):** every `## 1. Clarification Items` table has exactly these 8 columns, in this order:
|
|
82
82
|
`| ID | Ticket ID | Kind | Statement | Expected form | Blocks | Status | User input |`.
|
|
83
83
|
Profile-specific addenda may tighten cell content but MUST NOT add, remove, rename, or reorder columns. The `ID` cell uses `C-NNN` (3-digit zero-padded), the `Status` cell ∈ `{open, answered, resolved, obsolete}`, and the `Kind` / `Blocks` legal values are listed below.
|
|
84
|
-
- section 1 is a **single unified table** per `final-report-template.md`. Every clarification item — whether the user must attach a file, choose between options, or supply a single number/path — is one row of that table. Do not split it into sub-sections (`1.1 추가 자료 요청` / `1.2 사용자 확인 질문` / `5.5.9 Open Questions` are removed and the validator fails reports that reintroduce them), do not create a parallel table elsewhere in the report, and do not duplicate the same item into
|
|
84
|
+
- section 1 is a **single unified table** per `final-report-template.md`. Every clarification item — whether the user must attach a file, choose between options, or supply a single number/path — is one row of that table. Do not split it into sub-sections (`1.1 추가 자료 요청` / `1.2 사용자 확인 질문` / `5.5.9 Open Questions` are removed and the validator fails reports that reintroduce them), do not create a parallel table elsewhere in the report, and do not duplicate the same item into an approval block or any other section.
|
|
85
85
|
- each row's `Kind` column picks one of `{material, decision, data-point}`: `material` for files / snapshots / logs / screenshots the user must attach (the `User input` cell will hold a path or URL); `decision` for choices and yes/no confirmations only the user can make; `data-point` for a single number, ID, date, or short string the user can answer inline. Items that mix "yes/no + file path if yes" are one row of `Kind=material` with the combined expectation written into `Expected form`.
|
|
86
|
-
- each row's `Blocks` column picks one of `{approval, next-phase, none}`. `approval` is reserved for items that gate an approval action, especially the `implementation-planning`
|
|
86
|
+
- each row's `Blocks` column picks one of `{approval, next-phase, none}`. `approval` is reserved for items that gate an approval action, especially the `implementation-planning` `approved:` frontmatter flip; outside `implementation-planning`, unresolved brief reporter-confirmation rows use `next-phase` instead. `next-phase` blocks the next run from starting cleanly. `none` is informational/audit-only.
|
|
87
87
|
- write every entry in full, descriptive sentences that a non-developer can act on without further context. Avoid abbreviations and internal jargon. The `Statement` cell must state *what* is needed, *why* the answer / attachment changes the next step, and (for `material`) *where* the user can find it and *where* to place it. The `Expected form` cell must state the answer shape (예/아니오, 보기 중 하나, 숫자/날짜, 파일 경로, 짧은 서술 등); supply concrete option choices when applicable.
|
|
88
88
|
- if a phase requires a recommended answer, alternatives, or an evidence-check note, encode it inside the existing 8-column schema: put evidence notes in `Statement` as `Evidence checked: <path:line>` or `Evidence checked: none — <human-only reason>`, and put recommendations/options in `Expected form` as `Recommended: <answer> — <rationale>; Alternatives: <options>`. Do not add `Recommended`, `Evidence`, `Alternatives`, or `evidence-checked` columns.
|
|
89
89
|
- the same `final-report.md` file is the canonical artifact carried into the next run; the user appends answers inline before rerunning. The preferred turn-around is `scripts/okstra.sh --resume-clarification --task-key <project-id>:<task-group>:<task-id>` (opens the latest report in `$EDITOR`, then auto-reruns the same phase with `--clarification-response` carry-in). The lower-level form `--clarification-response <path>` remains available for scripted runs.
|
|
@@ -48,7 +48,7 @@ are collected and convergence finished. Phase 1-5 do not need it.
|
|
|
48
48
|
## Lead post-stage persistence (BLOCKING — runs after the Executor emits `### Stage Carry Evidence`)
|
|
49
49
|
|
|
50
50
|
- Parse the executor's `### Stage Carry Evidence` JSON block. If absent or unparsable, end with status `contract-violated` and route to a follow-up `error-analysis`.
|
|
51
|
-
- For
|
|
52
|
-
- For
|
|
53
|
-
- The verifier round, Phase 5.5 convergence, and this Phase 6 report run **once per run** over
|
|
54
|
-
- Quote
|
|
51
|
+
- For this run's single stage: write its JSON verbatim to `runs/<impl-task-key>/carry/stage-<N>.json`. Refuse to overwrite an existing file (one stage = one sidecar; re-runs are out of scope for this version).
|
|
52
|
+
- For this run's single stage: append a `status:"done"` row to `runs/<plan-task-key>/consumers.jsonl` with `completed_at`, `carry_path`, `report_path` (this run's final-report path relative to the run root), and the SHA of HEAD. Use the okstra runtime's `consumers_mutex` helper (NOT a raw filesystem write) to honour the lock. `report_path` lets `final-verification` cite each stage's originating report when assembling its Source Implementation Report list.
|
|
53
|
+
- The verifier round, Phase 5.5 convergence, and this Phase 6 report run **once per run** over this stage's diff — NOT per step.
|
|
54
|
+
- Quote this stage's new contents (the sidecar JSON in full and the new consumers row by itself) in the final report's `Stage sidecar evidence` deliverable section.
|
|
@@ -48,10 +48,7 @@ until Phase 5 ends, then drop from active context for Phase 6/7.
|
|
|
48
48
|
|
|
49
49
|
- **Sidecar evidence writer (BLOCKING).** When this stage's Stage Validation `post` commands all succeed, the Executor MUST emit a JSON object matching the schema in `docs/superpowers/specs/2026-05-20-implementation-planning-multi-stage-design.md` §3.2 and the lead MUST persist it to `runs/<impl-task-key>/carry/stage-<N>.json`. The file MUST NOT exist before the run starts (overwrite is refused — see `--force-stage` non-goal).
|
|
50
50
|
- **Reverse link (BLOCKING).** The runtime already appended a `status:"started"` row for this stage before the run began. On completion, append a `status:"done"` row with `carry_path` populated for this stage number.
|
|
51
|
-
- **
|
|
52
|
-
- `## Stage <N>` — number, title (from Stage Map row), touched files, and validation result.
|
|
53
|
-
- `## Carry-In summary` — depends-on list + cited identifiers/SHAs from each loaded sidecar (omit when depends-on is empty).
|
|
54
|
-
- `## Previous run` / `## Next run` — links so a reviewer can navigate the run chain.
|
|
51
|
+
- **No PR / push in this phase.** This run produces local commits, carry sidecar evidence, verifier results, and the implementation final report only. Push and PR creation belong exclusively to the later `release-handoff` phase after `final-verification` returns `accepted`.
|
|
55
52
|
|
|
56
53
|
## Allowed actions during the run
|
|
57
54
|
|
|
@@ -39,7 +39,28 @@ Verifier obtains the QA command set from exactly two declared sources, in order
|
|
|
39
39
|
|
|
40
40
|
### Execution rule
|
|
41
41
|
|
|
42
|
-
Tier 1 commands run verbatim first. Then every Tier 2 entry runs once. Each command runs in the worktree cwd, and is recorded in the worker result with its exact command line, exit code, and the tail of stdout/stderr. Substituting or paraphrasing a Tier 1 command is forbidden (see Verifier-specific forbidden actions below).
|
|
42
|
+
Tier 1 commands run verbatim first. Then every Tier 2 entry runs once. Then the Tier 3 stage conformance script (below) runs once. Each command runs in the worktree cwd, and is recorded in the worker result with its exact command line, exit code, and the tail of stdout/stderr. Substituting or paraphrasing a Tier 1 command is forbidden (see Verifier-specific forbidden actions below).
|
|
43
|
+
|
|
44
|
+
### Tier 3 — stage conformance scripts (요구사항 부합 검증)
|
|
45
|
+
|
|
46
|
+
Tiers 1·2 prove the diff *builds and passes*; Tier 3 proves the stage actually *meets the upper-level requirement* it was scoped to, by running a declared conformance script against the running state. This is a real gate — its result sidecar is the input the `validate-run.py` Tier 3 gate reads, so a missing or non-PASS result BLOCKS acceptance.
|
|
47
|
+
|
|
48
|
+
- **Source.** The conformance manifest is `<task_root>/qa/conformance-manifest.json` (the directory is the `TASK_QA_PATH` token). This run's stage conformance entry is the manifest `entries[]` item whose `stageKey` equals this run's stageKey — `<task-id>-stage-<N>`, where `<N>` is the injected Stage number. Find that one entry; ignore the others (other stages are run by their own implementation runs or by final-verification).
|
|
49
|
+
- **Exemption / waiver → do NOT run.** If the entry carries an `exemption` (or a user `waiver`), the verifier does NOT execute the script. It records the fact and the reason (`exemption.reason` / `waiver.reason` + `waiver.acknowledgedBy`) in the Read-only command log AND writes the result sidecar reflecting the skip. An `exemption` passes the gate outright; a `waiver` passes but is conditional (conformance left unverified by explicit user acknowledgement). No script runs in either case.
|
|
50
|
+
- **Otherwise run `runCommand` in the worktree cwd.** Execute the entry's `runCommand` verbatim from the worktree cwd. Inject env from `<PROJECT_ROOT>/.okstra/project.json`'s `qaEnv` (replica DB DSN / app base URL / env file — declared in Phase 4e). This is a **replica / test environment only** path — never run it against shared / staging / prod, identical to the DB real-execution gate principle above.
|
|
51
|
+
- **Interpret the standard interface.** Parse the process exit code together with stdout: the `QA-RESULT: PASS|FAIL` marker line (if several appear, the last one wins) and the per-requirement `REQ <id>: PASS|FAIL: <reason>` lines. If no `QA-RESULT` marker is emitted, the overall result is `MISSING` — which the gate treats as BLOCKING (the script broke the contract).
|
|
52
|
+
- **Write the result sidecar (BLOCKING deliverable).** Write `<task_root>/qa/result-<stageKey>.json` as:
|
|
53
|
+
```json
|
|
54
|
+
{
|
|
55
|
+
"stageKey": "<task-id>-stage-<N>",
|
|
56
|
+
"overall": "PASS",
|
|
57
|
+
"ranAt": "<UTC ISO8601>",
|
|
58
|
+
"requirements": { "<id>": { "status": "PASS", "reason": "<from REQ line>" } }
|
|
59
|
+
}
|
|
60
|
+
```
|
|
61
|
+
`overall` is exactly one of `PASS` / `FAIL` / `MISSING`. This file is the input to the `validate-run.py` Tier 3 gate — if it is absent the gate reports the stage as "never ran" and BLOCKS, so writing it is mandatory whenever the script runs (and on the exemption/waiver skip path, recording the skip outcome).
|
|
62
|
+
- **Read-only command log.** Record the `runCommand` exact line + its exit code in the Read-only command log. Unlike Tiers 1·2, a conformance script MAY mutate the **replica datastore** (exercising integrated state is its whole purpose) — but only the `qaEnv` replica target, never a shared/staging/prod store. The `runCommand` itself is still subject to the same source/lockfile mutation deny-list as Tier 2 (`--fix`, `npm install` without `ci`, etc.); a denied token aborts with `contract-violated`.
|
|
63
|
+
- **No manifest / no entry for this stage.** If the manifest file is absent, or it has no entry whose `stageKey` matches this run's stageKey, the verifier records `conformance: no manifest entry for <stageKey>` and proceeds (forcing the *declaration* of conformance entries is the job of planning Step 11 + the `validate-run.py` diff-surface cross-check, not the verifier).
|
|
43
64
|
|
|
44
65
|
### Missing-tier handling
|
|
45
66
|
|
|
@@ -55,7 +76,7 @@ If the verifier's re-run result differs from what the executor reported (a passi
|
|
|
55
76
|
|
|
56
77
|
### Read-only command log (per verifier)
|
|
57
78
|
|
|
58
|
-
The worker result MUST contain a `Read-only command log` block listing every command executed during the verifier run with its exact invocation and exit code, in execution order. No mutating command may appear in this block. This log is copied into the final report's verifier result section verbatim.
|
|
79
|
+
The worker result MUST contain a `Read-only command log` block listing every command executed during the verifier run with its exact invocation and exit code, in execution order — including the Tier 3 conformance `runCommand` (or the exemption/waiver skip note when no script ran). No source-mutating command may appear in this block; the only permitted mutation is a Tier 3 conformance script writing to its `qaEnv` replica datastore, which is logged like any other command. This log is copied into the final report's verifier result section verbatim.
|
|
59
80
|
|
|
60
81
|
### Verifier evidence is independent of executor evidence
|
|
61
82
|
|
|
@@ -36,11 +36,12 @@
|
|
|
36
36
|
- **Validation Evidence**: for every requirement in the originating plan or task brief, cite the artifact (commit SHA, test output, log line, MCP SELECT result) that demonstrates coverage. Paraphrased "verified" claims without an artifact are rejected.
|
|
37
37
|
- **Read-only command log**: any pre-existing test/validation command executed during this run MUST be listed with its exact command line and exit code. No mutating commands may appear here.
|
|
38
38
|
- **Two-tier command lookup (shared with `implementation`):** when this phase performs its own independent re-validation, the command source is exactly the same two tiers `implementation` verifiers use — Tier 1 is the originating task brief / approved plan's `validation` set, Tier 2 is `<PROJECT_ROOT>/.okstra/project.json` under `qaCommands`. Auto-detecting tools from manifest files is forbidden; missing tiers are recorded as `qa-command not configured: <category>` and do NOT trigger a guess. The `cmd` deny-list (`--fix`, `--write`, ` -w`, ` -u`, `--snapshot-update`, `INSTA_UPDATE=<not-no>`, `cargo update`, `npm install` without `ci`, etc.) is enforced identically. NOTE: runtime fail-fast validation (`okstra_ctl.qa_commands.validate_qa_commands`) only fires at `--task-type implementation` run-prep, so this phase MUST self-check each `qaCommands` entry against the deny-list before executing it — if a denied token is present, skip the command and record it as a `Read-only command log` line `qa-command rejected (denied token: <token>): <label>`.
|
|
39
|
+
- **Tier 3 — stage conformance scripts (whole-task union):** because this phase verifies the **integrated, merged** state, it re-runs conformance against that state rather than per-stage. Read the task-level manifest `<task_root>/qa/conformance-manifest.json` (the directory is the `TASK_QA_PATH` token) and, in **whole-task scope**, run the `runCommand` of **every** `entries[]` item against the merged worktree, refreshing each `<task_root>/qa/result-<stageKey>.json` (`{ "stageKey", "overall": "PASS"|"FAIL"|"MISSING", "ranAt", "requirements" }`). In **single-stage scope**, run only the entry whose `stageKey` matches the verified stage. An entry carrying an `exemption` or user `waiver` is NOT executed — record the skip and reason; a `waiver` becomes a `conditional-accept` condition surfaced in the section 7 Verdict (conformance left unverified by user acknowledgement). Each `runCommand` runs in the worktree cwd with `qaEnv` env (replica DB DSN / app base URL / env file) — **replica / test environment only**, never shared / staging / prod, and the same source/lockfile mutation deny-list applies (a conformance script MAY mutate only its `qaEnv` replica datastore). Interpret each result from the exit code + stdout `QA-RESULT: PASS|FAIL` (last wins) and `REQ <id>: PASS|FAIL: <reason>` lines; no `QA-RESULT` marker → `MISSING`. Any entry whose result is not `PASS` (including `MISSING` or a never-run/missing sidecar) is an **Acceptance Blocker** (`major`+) — exactly like the DB real-execution gate above, since `accepted` requires zero blockers the verdict becomes `conditional-accept` / `blocked`. This is the same gate the `validate-run.py` Tier 3 check enforces on the result sidecars.
|
|
39
40
|
- **Routing recommendation**: the next safe phase — one of `release-handoff`, `done`, `error-analysis`, `implementation-planning` — tied to the verdict and blocker list. `release-handoff` is allowed ONLY when the Verdict Token is `accepted`. `release-handoff` is additionally allowed ONLY when the verification scope (the `Verification scope:` line of the injected `VERIFICATION_TARGET` block, recorded as the report's `verificationScope` field) is `whole-task`; a `single-stage` run is partial and routes to `implementation` / `done` even on an `accepted` verdict.
|
|
40
41
|
- Clarification request policy (phase-specific addendum — shared policy is in `_common-contract.md`):
|
|
41
42
|
- populate `## 1. Clarification Items` only when a blocker hinges on information only the user can supply (deployment intent, intended target environment, business-rule interpretation); use `Blocks=next-phase` for items that gate continuing to release-handoff
|
|
42
43
|
- Self-review pass before finalising the report (`Claude lead` runs this; do not delegate to a generic subagent):
|
|
43
|
-
1. **Verdict precision** — section
|
|
44
|
+
1. **Verdict precision** — section 7 (`Final Verdict`) includes `Verdict Token` with one of the three allowed verdict tokens; `conditional-accept` lists every condition as an actionable item.
|
|
44
45
|
2. **Blocker traceability** — every blocker cites a concrete artifact (file:line, log excerpt, test exit code, MCP SELECT). Blockers without evidence are demoted to residual risk or removed.
|
|
45
46
|
3. **Coverage check** — every requirement in the originating plan/task brief is either marked covered (with artifact) or listed as a blocker. No silent omissions.
|
|
46
47
|
4. **Verifier dissent preserved** — if workers reach different verdicts, the disagreement is visible in section 1.2; synthesis hides nothing.
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
- gemini — when added to the roster it joins the analyser set; omitted by default
|
|
10
10
|
{{INCLUDE:_common-contract.md}}
|
|
11
11
|
- Brief consumption (phase-specific addendum — shared rules live in `_common-contract.md` under "Brief handoff contract"):
|
|
12
|
-
- Apply the shared reporter-confirmation precondition exactly as written. In this phase, unresolved `intent-check:` / `conversion-block:` rows use `Blocks=approval`; the operator cannot
|
|
12
|
+
- Apply the shared reporter-confirmation precondition exactly as written. In this phase, unresolved `intent-check:` / `conversion-block:` rows use `Blocks=approval`; the operator cannot flip the approval frontmatter to `approved: true` until those rows are resolved.
|
|
13
13
|
- never plan around an unconfirmed `intent-inference` augmentation as if it were a settled requirement. After the precondition runs, a `[CONFIRMED …]` marker on the matching `intent-check:` row is the signal that the inference can be treated as settled; otherwise it remains a `Blocks=approval` clarification item per the precondition's `skipped` branch.
|
|
14
14
|
- `conversion-block:` rows are handled by the precondition; planning around an untranslated reporter phrase is forbidden until it is resolved.
|
|
15
15
|
- Pre-planning context exploration (mandatory before option drafting):
|
|
@@ -55,7 +55,7 @@
|
|
|
55
55
|
- Section heading contract (BLOCKING — validator scans for these literal English substrings):
|
|
56
56
|
- The final report MUST include section headings containing each of the following exact strings: `Option Candidates`, `Trade-off`, `Recommended Option`, `Stage Map`, `Stage Exit Contract`, `Stage Validation`, `Dependency`, `Validation Checklist`, `Rollback`, `Requirement Coverage`. (Approval is no longer a body section — it is the YAML frontmatter `approved` field.)
|
|
57
57
|
- Korean translations are allowed in parentheses (e.g. `### Recommended Option (권장 옵션)`), but the English keyword must be present verbatim in the heading line.
|
|
58
|
-
- The shape and ordering follow `final-report-template.md` section
|
|
58
|
+
- The shape and ordering follow `final-report-template.md` section 5.5 (`Implementation Plan Deliverables` + `Stage Map`). Do NOT translate the heading keywords — `validators/validate-run.py` does substring matching on the raw report text and missing English strings are a real, repeatedly observed failure mode (root cause: writer translated the headings to Korean).
|
|
59
59
|
- Beyond substring matching, when the Plan Body Verification gate result is `passed` / `passed-with-dissent`, `validators/validate-run.py` runs the **structural** Stage Map validator (`validators/validate-implementation-plan-stages.py`) at the planning boundary — the exact `## 5.5 Stage Map` heading, each `## 5.5.<i> Stage <i>:` section with its four required subsections, the per-stage effective step count (≤6), the `depends-on` DAG, and the per-stage vertical-slice contract (S10) are all enforced here, not deferred to the `implementation` entry gate. S10 scans for the literal in-section strings `Slice value:`, `Acceptance:`, and the Stepwise `action`-cell prefixes `RED:` / `GREEN:` (or a `TDD exemption:` line) — keep these tokens verbatim for the same reason as the heading keywords above.
|
|
60
60
|
- Required deliverable shape (final report, in addition to the standard sections):
|
|
61
61
|
- at least two implementation options. **Each option must include**:
|
|
@@ -71,6 +71,10 @@
|
|
|
71
71
|
- **Per-stage subsections** (`## 5.5.<i> Stage <i>: <title>` for each `i`), each containing the four required subsections:
|
|
72
72
|
- `### Carry-In` — for `depends-on (none)`: task-brief only. Otherwise: each depended-on stage's static exit contract + runtime sidecar path `runs/<impl-key>/carry/stage-<i>.json` placeholder.
|
|
73
73
|
- `### Stepwise Execution Order` — bite-sized table with `step | action | files | command | expected`. **Effective row count ≤ 6** (excluding header / divider / blank). Each step is one action completable in 2–5 minutes; for code steps include actual code or diff sketch. **TDD ordering is MUST, not a preference:** the **first** effective step's `action` cell MUST start with the literal `RED:` and describe the failing test that captures this stage's `Acceptance` (`expected` = FAIL); at least one later `action` cell MUST start with the literal `GREEN:` and describe the minimal implementation that makes it pass (`expected` = PASS); an optional refactor step starts with `REFACTOR:`. **Exemption:** doc-only / config-only / pure-rename stages with no observable runtime behaviour may omit RED/GREEN by declaring one line `TDD exemption: <reason>` in the stage section (mirrors the executor's per-step exemption in `_implementation-executor.md`). Validator S10c enforces RED-first + GREEN, or the exemption line.
|
|
74
|
+
- **Per-stage conformance declaration (mandatory one line, in the stage section — same placement freedom as `TDD exemption:`):** the stage MUST carry exactly one of:
|
|
75
|
+
- `Conformance tests: stage-<N> — <task_root>/qa/stage-<N>.<ext> (requires=[db|io|http|external,...])` — a Tier3 verification script that proves this stage's upstream requirements (brief / requirements-discovery / error-analysis / improvement-discovery → this stage's `Acceptance`) hold against **real** DB rows, real endpoints, or the real external API — NOT mocks. When you emit this line you MUST also (a) write the script to `<task_root>/qa/stage-<N>.<ext>` and (b) add a matching entry to `<task_root>/qa/conformance-manifest.json` with fields `stageKey` (= `<task-id>-stage-<N>`), `script`, `runCommand`, `requirementIds`, `requires` (subset of `{db, io, http, external}`), `passContract`, `exemption: null`, `waiver: null`. The script's standard interface: a `main` that exits `0`=PASS / non-zero=FAIL, and whose stdout ends with `QA-RESULT: PASS|FAIL` followed by one `REQ <id>: PASS|FAIL: <근거>` line per requirement.
|
|
76
|
+
- `Conformance exemption: <reason>` — only for stages that touch no db/io/http/external surface, or where unit tests fully cover the increment. (If the eventual `implementation` diff actually touches one of those surfaces, `validate-run.py`'s diff-surface cross-check is BLOCKING — an exemption cannot hide a real db/io/http/external change.)
|
|
77
|
+
The manifest lives at the **task level** (`<task_root>/qa/`, path token `TASK_QA_PATH`) and is shared across planning → implementation → final-verification. This declaration is enforced at three layers: `validators/validate-implementation-plan-stages.py` check **S11** forces every stage to carry one of the two lines; the manifest JSON structure is enforced by `validate_conformance_manifest` (run / validate-run); and the result gate (each script's `QA-RESULT`) is enforced by the verifier Tier3 + validate-run.
|
|
74
78
|
- `### Stage Exit Contract` — predicted added/modified files, newly exposed identifiers/types/endpoints, downstream-usable resources.
|
|
75
79
|
- `### Stage Validation` — pre / mid / post exact commands or observable outcomes for this stage only.
|
|
76
80
|
- **Vertical-slice-first partition rule (1st-class):** the grouping anchor is a **thin end-to-end vertical slice** — one stage delivers a single user-observable increment, crossing whatever layers are needed (data → service → API → UI) to make that one increment work. File/module proximity is demoted to the **intra-slice grouping rule**: within a slice, keep steps touching the same file/directory/module together so the diff, PR, and rollback unit stay cohesive. **Horizontal layer-splitting is forbidden** — never carve "the DB layer" into one stage and "the service layer" into the next; that produces stages that ship no standalone user value. A stage is split ONLY when (a) a real `depends-on` data/contract dependency exists, (b) effective steps would exceed 6, or (c) it is a distinct vertical slice (a different user-value increment). Maximising the number of parallel stages is NOT a reason to split — parallelism is an emergent property of independent stages, never a partitioning goal.
|
|
@@ -84,8 +88,8 @@
|
|
|
84
88
|
- the YAML frontmatter MUST include the line `approved: false` (report-writer always emits the unflipped value). The user authorises the next `implementation` run by flipping it to `approved: true` (manual edit or `--approve` CLI). Do NOT recreate any `User Approval Request` body block — the validator fails reports that contain one (see `validators/validate-run.py` deprecated patterns).
|
|
85
89
|
- the YAML frontmatter MUST include the line `implementation-option:` directly under `approved:` (report-writer always emits it with an **empty value**). The user selects which Option Candidate the next `implementation` run executes by filling this line with that option's name (manual edit or `--implementation-option <name>` CLI). When left empty, the `implementation` run falls back to the `Recommended Option`.
|
|
86
90
|
- **the frontmatter `approved: false` line is rendered unconditionally; if the plan-body verification gate (§5.5.9) returns `blocked-by-disagreement` or `aborted-non-result`, the writer MUST keep `approved: false` and the validator refuses any report that ships with `approved: true` under such a gate result.**
|
|
87
|
-
- every ambiguity flagged during pre-planning that the user must resolve before approval registered as a `Blocks=approval` row in the `## 1. Clarification Items` table (do NOT create a separate `Open Questions` block under
|
|
88
|
-
- **§5.5.9 Plan Body Verification (BLOCKING).** After report-writer finishes the draft, the lead MUST run a worker peer-review round on the consolidated plan body (sections
|
|
91
|
+
- every ambiguity flagged during pre-planning that the user must resolve before approval registered as a `Blocks=approval` row in the `## 1. Clarification Items` table (do NOT create a separate `Open Questions` block under the implementation plan body — the unified table is the single home)
|
|
92
|
+
- **§5.5.9 Plan Body Verification (BLOCKING).** After report-writer finishes the draft, the lead MUST run a worker peer-review round on the consolidated plan body (Option Candidates / Trade-off Matrix / Recommended Option / Stage Map and per-stage sections / Dependency / Validation Checklist / Rollback / Requirement Coverage) and populate `### 5.5.9 Plan Body Verification` in the final report. The round protocol, plan-item ID scheme (`P-Opt-*` / `P-Step-*` / `P-Dep-*` / `P-Val-*` / `P-Rb-*`), verdict semantics, gate-result classification, and dissent log format are defined in `skills/okstra-convergence/SKILL.md` "Plan-body verification mode". The four gate-result values are `passed`, `passed-with-dissent`, `blocked-by-disagreement`, `aborted-non-result`. When the gate would have been `blocked-by-disagreement` or `aborted-non-result`, the lead MUST NOT silently flip it to one of the passing values to "unblock" the run — that is a contract violation. When `convergence.adversarial=true` (the default for this phase), this round uses the adversarial posture — verifiers confirm cited paths/commands and the burden of proof is on the plan — but the gate threshold stays `majority-disagree` (see that skill's §"Adversarial plan-body posture").
|
|
89
93
|
- **Decision-record evaluation (sole owner)**: this phase is the **single owner** of decision-record evaluation in the okstra lifecycle. The brief never evaluates or drafts decision records — it only forwards `adr-candidate:*` signals. Every `adr-candidate:*` entry inherited from the brief's `Open Questions` is a mandatory evaluation target. In addition, evaluate every decision the recommended option introduces against the three criteria:
|
|
90
94
|
1. **Hard to reverse** — would changing the decision later cost meaningfully more than deciding now?
|
|
91
95
|
2. **Surprising without context** — would a future reader, seeing only the code, wonder "why was it built this way?"?
|
|
@@ -105,5 +109,5 @@
|
|
|
105
109
|
4. **Ambiguity check** — any requirement that could be read two ways must be made explicit or moved to the `## 1. Clarification Items` table as a `Blocks=approval` row.
|
|
106
110
|
5. **Scope check** — if the recommended plan now spans multiple independent subsystems, recommend splitting into separate planning runs rather than shipping an oversized plan.
|
|
107
111
|
6. **Review-rule preflight check** — if a project review rule pack exists, map each relevant rule to the recommended option. Reject the draft if it knowingly creates a violation that the later PR reviewer would flag, unless the plan records a specific rationale and follow-up. In particular, scan for repeated helper stacks across planned files, tests that assert delegation to the same calculator/helper they exercise, public names that hide side effects, domain rules placed in repositories/adapters, and APIs made dead by this change.
|
|
108
|
-
7. **Plan-body verification reconciliation (BLOCKING for implementation-planning).** Inspect the `### 5.5.9 Plan Body Verification` verdict table. For every plan-item row classified as `majority-disagree → C-<N>`, the corresponding `C-<N>` row MUST exist in `## 1. Clarification Items` with `Kind` chosen per the standard policy and `Blocks=approval`. Do NOT create a parallel
|
|
112
|
+
7. **Plan-body verification reconciliation (BLOCKING for implementation-planning).** Inspect the `### 5.5.9 Plan Body Verification` verdict table. For every plan-item row classified as `majority-disagree → C-<N>`, the corresponding `C-<N>` row MUST exist in `## 1. Clarification Items` with `Kind` chosen per the standard policy and `Blocks=approval`. Do NOT create a parallel `Open Questions` block under the implementation plan body — the unified table is the single home. Conversely, the `Classification` column's `C-<N>` reference and the `## 1. Clarification Items` `ID` column MUST match 1:1; an orphan on either side is a contract violation. For `partial-consensus` and `worker-unique` plan-items, the dissenting opinion lives in §5.5.9 `Dissent log` and is NOT promoted to §5.
|
|
109
113
|
8. **Stage Map self-check** — for every stage, count the effective rows of its `Stepwise Execution Order` table by hand; reject the draft if any stage exceeds 6. Confirm each stage declares a non-empty `Slice value:` and `Acceptance:` line, and that its first step `action` starts with `RED:` with a later `GREEN:` (or carries a `TDD exemption:` line) — this is what validator S10 enforces. Walk the `depends-on` graph and confirm it is a DAG (no cycle, no self-reference). For each `depends-on` link, confirm it encodes a real data/contract dependency — do NOT add links to serialise unrelated work, and do NOT split a stage merely to create more parallel stages. **Parallel-safety:** for every pair of `depends-on (none)` stages, confirm their `Stage Exit Contract` predicted file sets are disjoint; if they share a file, merge them or add a `depends-on` link (validator S9 rejects overlap).
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# Implementation Profile
|
|
2
2
|
|
|
3
3
|
- Purpose: realise the approved `implementation-planning` deliverable as actual source changes, with cross-model verification, while keeping the run reversible
|
|
4
|
-
- **Run-level fixed cost:** the verifier set, Phase 5.5 convergence, and the Phase 6 report-writer run exactly once per run, over
|
|
4
|
+
- **Run-level fixed cost:** the verifier set, Phase 5.5 convergence, and the Phase 6 report-writer run exactly once per implementation run, over this run's single stage diff — never once per step.
|
|
5
5
|
- Required workers:
|
|
6
6
|
- claude
|
|
7
7
|
- codex
|
|
@@ -20,14 +20,14 @@
|
|
|
20
20
|
- that file's YAML frontmatter MUST carry `approved: true`. report-writer emits `approved: false` by default; the user flips it to `true` to authorise this run. Free-form approvals such as "lgtm" / "go ahead" / paraphrased confirmations are NOT accepted; re-edit the plan file's frontmatter to `approved: true` before invoking implementation, or pass `--approve` so the CLI flips it on the user's behalf (`okstra_ctl.run._apply_cli_approval`).
|
|
21
21
|
- The `--approve` flag is meaningful ONLY with `--task-type implementation` and `--approved-plan <path>`; any other use raises `PrepareError`. Idempotent — re-running with `approved: true` already set appends an audit line but does NOT re-toggle.
|
|
22
22
|
- the authoritative scope for this run is the Option Candidate named by the YAML frontmatter `implementation-option:` field. **If `implementation-option:` is empty, fall back to the plan's `Recommended Option`** (this is a soft fallback, not a hard block). The chosen option's bite-sized step list becomes the authoritative scope; deviations must be justified in the final report and routed back to a new `implementation-planning` run rather than silently expanded. If the chosen option name does not match any heading under `Option Candidates`, record it as a deviation.
|
|
23
|
-
-
|
|
23
|
+
- Stage worktree (provisioned by `okstra-ctl` at this implementation run's prep time):
|
|
24
24
|
- Status: `{{EXECUTOR_WORKTREE_STATUS}}` (one of: `created` | `reused` | `skipped-in-worktree` | `skipped-not-git`)
|
|
25
|
-
- Working tree path: `{{EXECUTOR_WORKTREE_PATH}}` — when status is `created` or `reused`, this is
|
|
26
|
-
- Branch: `{{EXECUTOR_WORKTREE_BRANCH}}` — empty when status is `skipped-*`. Branch name = `<work-category-prefix>-<task-id-segment>`, globally unique via `~/.okstra/worktrees/registry.json`.
|
|
27
|
-
- Base ref: `{{EXECUTOR_WORKTREE_BASE_REF}}` — canonical `<base>` for every `git diff` / `git log` in this run.
|
|
25
|
+
- Working tree path: `{{EXECUTOR_WORKTREE_PATH}}` — when status is `created` or `reused`, this is this run's isolated stage worktree rooted at `~/.okstra/worktrees/<project>/<task-group>/<task-id>/stage-<N>/`. When skipped, this is the caller's `project_root`.
|
|
26
|
+
- Branch: `{{EXECUTOR_WORKTREE_BRANCH}}` — empty when status is `skipped-*`. Branch name = `<work-category-prefix>-<task-id-segment>-s<N>`, globally unique via `~/.okstra/worktrees/registry.json`.
|
|
27
|
+
- Base ref: `{{EXECUTOR_WORKTREE_BASE_REF}}` — canonical `<base>` for every `git diff` / `git log` in this run. Independent stages start from the task anchor; dependent stages start from the predecessor done commit or the verified merged task worktree head.
|
|
28
28
|
- Provisioning note: `{{EXECUTOR_WORKTREE_NOTE}}`
|
|
29
29
|
- Treat the working-tree path as `project_root` for the duration of this run. Do NOT mutate the caller's original checkout. cwd-sensitive Bash commands MUST be prefixed `cd {{EXECUTOR_WORKTREE_PATH}} && ` in the same Bash invocation (never `bash -lc "..."` wrappers — see executor sidecar for full rules).
|
|
30
|
-
- Lifecycle: kept after the run completes
|
|
30
|
+
- Lifecycle: kept after the run completes as this stage's evidence worktree. Later stages get their own stage worktrees. Manual cleanup: `git worktree remove <path>` → `git branch -D <branch>` → drop the stage-key registry entry (`<task-key>#stage-<N>`).
|
|
31
31
|
- Approval gate (phase-specific addendum to shared authority rule):
|
|
32
32
|
- the pre-implementation gate's recorded user approval marker is the only authorised approval gate at this phase — proceed once it is satisfied without further external coordination.
|
|
33
33
|
- Forbidden actions — universal (any occurrence → terminal status `contract-violated`):
|
|
@@ -30,6 +30,7 @@
|
|
|
30
30
|
- When candidates branch on a structural question (e.g. "is module X meant to own this responsibility?"), resolve via `Read` / `Grep` first. Only escalate to the user inside the Phase 1.5 budget.
|
|
31
31
|
- Expected output emphasis:
|
|
32
32
|
- the `## 5.9 Improvement Candidates` table populated with rows that obey the 10-column schema from `validators/validate-improvement-report.py` (Cand ID `I-NNN`, Lens from whitelist, Title, Scope ⊆ scan-scope, Severity, Effort, Consensus, Source workers `<worker>:<id>` from {claude, codex, gemini}, Recommended next-phase ∈ {requirements-discovery, implementation-planning, error-analysis}, Evidence as path:line list)
|
|
33
|
+
- `Consensus` cells in `## 5.9 Improvement Candidates` use the table enum exactly: `full`, `partial`, `contested`, `worker-unique`. Map convergence's `full-consensus` / `partial-consensus` labels to `full` / `partial` before writing the table.
|
|
33
34
|
- `## 7. Final Verdict` Verdict Token ∈ {`candidates-ready`, `no-candidates`, `blocked`}; Direction `routing`; Next Step "사용자에게 후보 K개 선택 의뢰 (## 5.9 표 참조)"
|
|
34
35
|
- `## 3. Recommended Next Steps` first entry summarises per-candidate routing and proposes new task-key names of the form `<task-group>/imp-<Cand-ID>`
|
|
35
36
|
- this report is authored free-form (improvement-discovery is not in the data.json schema enum); after the markdown is written, the report-writer runs `scripts/okstra-inject-report-index.py <report.md> --report-language <en|ko>` to add the top-of-report Index + `I-NNN`/`C-NNN` scroll anchors. The run validator fails the report when the Index anchor is missing.
|
|
@@ -2,11 +2,11 @@
|
|
|
2
2
|
|
|
3
3
|
- Purpose: take an `accepted` final-verification verdict for an already-committed implementation branch and turn it into a delivered push and/or pull request, with explicit user selection at every mutating step
|
|
4
4
|
- **Execution model: single-lead, no worker dispatch.** This phase is a thin orchestrator over `git` / `gh`; it does NOT run team-mode, does NOT call `TeamCreate`, does NOT dispatch analysis or drafter sub-agents, and does NOT run convergence. The Claude lead performs every step inline (drafting PR text, asking the user, running git / gh, writing the final report) — see "Lead-only contract" below.
|
|
5
|
-
-
|
|
5
|
+
- Worker roster: none — this profile intentionally has no `- Required workers:` block; the run is executed entirely by the Claude lead.
|
|
6
6
|
- Lead-only contract (replaces the shared team contract for this phase):
|
|
7
7
|
- The Claude lead is the sole agent for this run. No `Agent(...)` worker dispatch, no `TeamCreate`, no parallel sub-agents, no convergence loop.
|
|
8
8
|
- The lead drafts the PR title and PR body **inline** by reading the run brief, the cited final-verification report, `git log --oneline <base>..HEAD`, and `git diff <base>..HEAD --stat`. No drafter worker is dispatched.
|
|
9
|
-
- The lead authors the final-report file directly (no `Report writer worker` dispatch). The report still conforms to the standard `
|
|
9
|
+
- The lead authors the final-report file directly (no `Report writer worker` dispatch). The report still conforms to the standard `templates/reports/final-report.template.md` structure, including the `## 5.6 Release Handoff Deliverables` section.
|
|
10
10
|
- The shared anti-escalation rule from the common contract still applies: do not start any other lifecycle phase from inside this run.
|
|
11
11
|
- The shared "authority & permissions assumption" rule from the common contract still applies: assume the user holds every permission needed; do not block on hypothetical approvals.
|
|
12
12
|
- The shared "MCP read-only" rule still applies if the brief lists MCP servers, though most release-handoff runs do not use MCP.
|
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
- `push + PR` — push the feature branch, then open or reuse a pull request.
|
|
23
23
|
- `skip` — record the verified state and end the run without any git command.
|
|
24
24
|
If the user picks `skip`, route directly to the final-report self-review pass.
|
|
25
|
-
2. **PR base branch** (only when the user picked `push + PR`) — present
|
|
25
|
+
2. **PR base branch** (only when the user picked `push + PR`) — present four options and capture exactly one:
|
|
26
26
|
- `staging`
|
|
27
27
|
- `preprod`
|
|
28
28
|
- `main`
|
|
@@ -75,7 +75,7 @@
|
|
|
75
75
|
- **User Selections**: a block recording each prompt and the user's verbatim answer.
|
|
76
76
|
- Q1 action: `local only` | `push + PR` | `skip`.
|
|
77
77
|
- Q2 PR base (if applicable): the chosen branch and how it was selected (menu pick vs free-form input).
|
|
78
|
-
- Q2b merge-conflict probe (if applicable): `clean` (no conflict, no prompt shown) | `proceed anyway` | `change base branch` | `cancel`. When a conflict was detected, list the conflicting paths.
|
|
78
|
+
- Q2b merge-conflict probe (if applicable): `not-run` | `clean` (no conflict, no prompt shown) | `proceed anyway` | `change base branch` | `cancel`. Record it in both the `User Selections` row `H2b` and the `Merge Conflict Probe` deliverable. When a conflict was detected, list the conflicting paths.
|
|
79
79
|
- Q3 title/body: `use as-is` | `edit then proceed` (with a diff between the lead's draft and the final text) | `cancel`.
|
|
80
80
|
- **Executed Commands**: every git / gh command the lead actually ran, with its exit code and a one-line stdout/stderr summary. Read-only inspection commands MAY be summarised; mutating commands MUST be listed verbatim.
|
|
81
81
|
- **Commit List**: each existing implementation commit in `git log <base>..HEAD`, with short/full SHA, subject line, and touched files. Release-handoff MUST NOT create new commits.
|
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
"""Stage conformance(Tier 3) 매니페스트 검증 + `QA-RESULT` 파서.
|
|
2
|
+
|
|
3
|
+
implementation/final-verification 의 verifier 는 stage 별 conformance 스크립트를
|
|
4
|
+
실행해 상위 요구사항 부합을 검증한다. 본 모듈은 그 검증/파싱의 결정론적 코어다.
|
|
5
|
+
|
|
6
|
+
1. `conformance-manifest.json` 구조 검증 (`validate_conformance_manifest`).
|
|
7
|
+
2. 스크립트 stdout 의 `QA-RESULT` 마커 파싱 (`parse_qa_result`).
|
|
8
|
+
|
|
9
|
+
스크립트 실행/게이트 강제는 verifier prompt 와 validators/validate-run.py 가 담당한다.
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import fnmatch
|
|
14
|
+
import re
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
|
|
17
|
+
# diff 가 건드린 표면과 대조할 capability 태그 화이트리스트.
|
|
18
|
+
CAPABILITY_WHITELIST: tuple[str, ...] = ("db", "io", "http", "external")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _check_nonempty_str(value: object, path: str, errors: list[str]) -> bool:
|
|
22
|
+
if not isinstance(value, str) or not value.strip():
|
|
23
|
+
errors.append(f"{path} must be a non-empty string")
|
|
24
|
+
return False
|
|
25
|
+
return True
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _check_capabilities(value: object, path: str, errors: list[str]) -> None:
|
|
29
|
+
if not isinstance(value, list):
|
|
30
|
+
errors.append(f"{path} must be an array")
|
|
31
|
+
return
|
|
32
|
+
for cap in value:
|
|
33
|
+
if cap not in CAPABILITY_WHITELIST:
|
|
34
|
+
errors.append(
|
|
35
|
+
f"{path}: unknown capability {cap!r} "
|
|
36
|
+
f"(allowed: {', '.join(CAPABILITY_WHITELIST)})"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _check_exemption(value: object, path: str, errors: list[str]) -> None:
|
|
41
|
+
if value is None:
|
|
42
|
+
return
|
|
43
|
+
if not isinstance(value, dict):
|
|
44
|
+
errors.append(f"{path} must be an object or null")
|
|
45
|
+
return
|
|
46
|
+
_check_nonempty_str(value.get("reason"), f"{path}.reason", errors)
|
|
47
|
+
_check_nonempty_str(value.get("declaredAt"), f"{path}.declaredAt", errors)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _check_waiver(value: object, path: str, errors: list[str]) -> None:
|
|
51
|
+
if value is None:
|
|
52
|
+
return
|
|
53
|
+
if not isinstance(value, dict):
|
|
54
|
+
errors.append(f"{path} must be an object or null")
|
|
55
|
+
return
|
|
56
|
+
_check_nonempty_str(value.get("acknowledgedBy"), f"{path}.acknowledgedBy", errors)
|
|
57
|
+
_check_nonempty_str(value.get("reason"), f"{path}.reason", errors)
|
|
58
|
+
_check_nonempty_str(value.get("at"), f"{path}.at", errors)
|
|
59
|
+
_check_capabilities(value.get("scope", []), f"{path}.scope", errors)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _check_entry(entry: object, idx: int, errors: list[str]) -> None:
|
|
63
|
+
path = f"entries[{idx}]"
|
|
64
|
+
if not isinstance(entry, dict):
|
|
65
|
+
errors.append(f"{path} must be an object")
|
|
66
|
+
return
|
|
67
|
+
_check_nonempty_str(entry.get("stageKey"), f"{path}.stageKey", errors)
|
|
68
|
+
_check_nonempty_str(entry.get("script"), f"{path}.script", errors)
|
|
69
|
+
_check_nonempty_str(entry.get("runCommand"), f"{path}.runCommand", errors)
|
|
70
|
+
_check_nonempty_str(entry.get("passContract"), f"{path}.passContract", errors)
|
|
71
|
+
req_ids = entry.get("requirementIds")
|
|
72
|
+
if (
|
|
73
|
+
not isinstance(req_ids, list)
|
|
74
|
+
or not req_ids
|
|
75
|
+
or not all(isinstance(r, str) and r.strip() for r in req_ids)
|
|
76
|
+
):
|
|
77
|
+
errors.append(f"{path}.requirementIds must be a non-empty array of strings")
|
|
78
|
+
_check_capabilities(entry.get("requires", []), f"{path}.requires", errors)
|
|
79
|
+
_check_exemption(entry.get("exemption"), f"{path}.exemption", errors)
|
|
80
|
+
_check_waiver(entry.get("waiver"), f"{path}.waiver", errors)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def validate_conformance_manifest(manifest: object) -> list[str]:
|
|
84
|
+
"""conformance-manifest 전체 검증. 위반 메시지 리스트 반환(비면 안전).
|
|
85
|
+
|
|
86
|
+
매니페스트 부재(None)는 합법 — 스크립트 없는 task 가 있을 수 있고, 게이트
|
|
87
|
+
강제(diff surface 대조)는 validators/validate-run.py 가 판정한다.
|
|
88
|
+
"""
|
|
89
|
+
if manifest is None:
|
|
90
|
+
return []
|
|
91
|
+
if not isinstance(manifest, dict):
|
|
92
|
+
return [f"conformance manifest must be an object, got {type(manifest).__name__}"]
|
|
93
|
+
entries = manifest.get("entries")
|
|
94
|
+
if not isinstance(entries, list):
|
|
95
|
+
return ["conformance manifest .entries must be an array"]
|
|
96
|
+
errors: list[str] = []
|
|
97
|
+
seen: set[str] = set()
|
|
98
|
+
for idx, entry in enumerate(entries):
|
|
99
|
+
_check_entry(entry, idx, errors)
|
|
100
|
+
key = entry.get("stageKey") if isinstance(entry, dict) else None
|
|
101
|
+
if isinstance(key, str) and key:
|
|
102
|
+
if key in seen:
|
|
103
|
+
errors.append(f"entries[{idx}].stageKey duplicate: {key!r}")
|
|
104
|
+
seen.add(key)
|
|
105
|
+
return errors
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
_QA_RESULT_RE = re.compile(r"^QA-RESULT:\s*(PASS|FAIL)\s*$", re.MULTILINE)
|
|
109
|
+
_REQ_LINE_RE = re.compile(r"^REQ\s+(\S+):\s*(PASS|FAIL):\s*(.*)$", re.MULTILINE)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@dataclass
|
|
113
|
+
class QaResult:
|
|
114
|
+
overall: str # "PASS" | "FAIL" | "MISSING"
|
|
115
|
+
requirements: dict[str, dict[str, str]] # id -> {"status": "PASS"|"FAIL", "reason": str}
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def parse_qa_result(stdout: str) -> QaResult:
|
|
119
|
+
"""스크립트 stdout 에서 `QA-RESULT` 마커 + `REQ` 줄 파싱.
|
|
120
|
+
|
|
121
|
+
마커가 없으면 overall='MISSING' — 스크립트가 계약을 안 지킨 것이므로 게이트는
|
|
122
|
+
FAIL 로 취급한다. 마커가 여럿이면 마지막 것을 채택한다.
|
|
123
|
+
"""
|
|
124
|
+
text = stdout or ""
|
|
125
|
+
markers = _QA_RESULT_RE.findall(text)
|
|
126
|
+
overall = markers[-1] if markers else "MISSING"
|
|
127
|
+
requirements: dict = {}
|
|
128
|
+
for rid, status, reason in _REQ_LINE_RE.findall(text):
|
|
129
|
+
requirements[rid] = {"status": status, "reason": reason.strip()}
|
|
130
|
+
return QaResult(overall=overall, requirements=requirements)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@dataclass
|
|
134
|
+
class ConformanceVerdict:
|
|
135
|
+
stage_key: str
|
|
136
|
+
status: str # "PASS" | "BLOCKING" | "WAIVED" | "EXEMPT"
|
|
137
|
+
ok: bool # 진행 허용 여부 (PASS/WAIVED/EXEMPT 면 True)
|
|
138
|
+
conditional: bool # WAIVED 일 때만 True — conformance 미검증(사용자 확인)
|
|
139
|
+
message: str
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def decide_conformance_gate(entry: dict, result: object) -> ConformanceVerdict:
|
|
143
|
+
"""단일 stage entry + 실행 결과(`QaResult | None`)로 게이트 판정.
|
|
144
|
+
|
|
145
|
+
우선순위: exemption → waiver → 결과 평가. 미실행/MISSING/FAIL 은 BLOCKING.
|
|
146
|
+
면제·waiver 의 형태 검증은 `validate_conformance_manifest` 가 이미 보장한다.
|
|
147
|
+
"""
|
|
148
|
+
key = entry.get("stageKey", "<unknown>")
|
|
149
|
+
exemption = entry.get("exemption")
|
|
150
|
+
if exemption:
|
|
151
|
+
return ConformanceVerdict(
|
|
152
|
+
key, "EXEMPT", True, False,
|
|
153
|
+
f"conformance exempted: {exemption.get('reason', '')}",
|
|
154
|
+
)
|
|
155
|
+
waiver = entry.get("waiver")
|
|
156
|
+
if waiver:
|
|
157
|
+
return ConformanceVerdict(
|
|
158
|
+
key, "WAIVED", True, True,
|
|
159
|
+
f"conformance waived by {waiver.get('acknowledgedBy', '?')}: "
|
|
160
|
+
f"{waiver.get('reason', '')}",
|
|
161
|
+
)
|
|
162
|
+
overall = getattr(result, "overall", None) # None when result is None → "never ran"
|
|
163
|
+
if overall == "PASS":
|
|
164
|
+
return ConformanceVerdict(key, "PASS", True, False, "conformance PASS")
|
|
165
|
+
if overall is None:
|
|
166
|
+
return ConformanceVerdict(
|
|
167
|
+
key, "BLOCKING", False, False,
|
|
168
|
+
"conformance script never ran (no result recorded)",
|
|
169
|
+
)
|
|
170
|
+
if overall == "MISSING":
|
|
171
|
+
return ConformanceVerdict(
|
|
172
|
+
key, "BLOCKING", False, False,
|
|
173
|
+
"conformance script ran but emitted no QA-RESULT marker",
|
|
174
|
+
)
|
|
175
|
+
return ConformanceVerdict(key, "BLOCKING", False, False, f"conformance {overall}")
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def qa_result_from_dict(data: object) -> QaResult:
|
|
179
|
+
"""결과 사이드카(JSON dict)를 `QaResult` 로 복원. Phase 3 의 verifier 가 쓴
|
|
180
|
+
`result-stage-<N>.json` 을 validate-run 이 로드할 때 쓴다. 형태가 깨졌으면
|
|
181
|
+
overall='MISSING'(=BLOCKING 취급)으로 안전하게 강등한다."""
|
|
182
|
+
if not isinstance(data, dict):
|
|
183
|
+
return QaResult(overall="MISSING", requirements={})
|
|
184
|
+
overall = data.get("overall")
|
|
185
|
+
if overall not in ("PASS", "FAIL", "MISSING"):
|
|
186
|
+
overall = "MISSING"
|
|
187
|
+
reqs = data.get("requirements")
|
|
188
|
+
return QaResult(overall=overall, requirements=reqs if isinstance(reqs, dict) else {})
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def evaluate_conformance(manifest: object, results_by_stage: object) -> list[ConformanceVerdict]:
|
|
192
|
+
"""매니페스트 전 entry 에 대해 게이트 판정 목록을 반환.
|
|
193
|
+
|
|
194
|
+
`results_by_stage`: stageKey -> `QaResult`. 키가 없으면 미실행(None)으로 본다.
|
|
195
|
+
매니페스트 구조 검증은 호출 전에 `validate_conformance_manifest` 로 끝낸다는 전제.
|
|
196
|
+
"""
|
|
197
|
+
entries = manifest.get("entries") if isinstance(manifest, dict) else None
|
|
198
|
+
if not isinstance(entries, list):
|
|
199
|
+
return []
|
|
200
|
+
results = results_by_stage if isinstance(results_by_stage, dict) else {}
|
|
201
|
+
verdicts: list[ConformanceVerdict] = []
|
|
202
|
+
for entry in entries:
|
|
203
|
+
if not isinstance(entry, dict):
|
|
204
|
+
continue
|
|
205
|
+
result = results.get(entry.get("stageKey"))
|
|
206
|
+
verdicts.append(decide_conformance_gate(entry, result))
|
|
207
|
+
return verdicts
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
# 경로 → capability surface 기본 매핑. 프로젝트별 override 는 qaEnv.surfacePatterns
|
|
211
|
+
# (Phase 4e). 'external' 은 경로로 감지하기 어려워 기본 패턴 없음 — 명시 선언 의존.
|
|
212
|
+
_DEFAULT_SURFACE_PATTERNS: dict[str, tuple[str, ...]] = {
|
|
213
|
+
"db": ("*.sql", "*migration*", "*repository*", "*.entity.*", "*entities*", "*schema.prisma*"),
|
|
214
|
+
"http": ("*controller*", "*.routes.*", "*router*", "*endpoint*", "*.api.*"),
|
|
215
|
+
"io": ("*filesystem*", "*storage*", "*.fs.*"),
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def detect_surfaces(file_paths: object, patterns: object = None) -> set[str]:
|
|
220
|
+
"""변경된 파일 경로들에서 capability surface 집합을 감지(소문자 fnmatch).
|
|
221
|
+
`patterns` 미지정 시 기본 매핑 사용."""
|
|
222
|
+
table = patterns if isinstance(patterns, dict) else _DEFAULT_SURFACE_PATTERNS
|
|
223
|
+
found: set[str] = set()
|
|
224
|
+
for raw in file_paths or []:
|
|
225
|
+
if not isinstance(raw, str):
|
|
226
|
+
continue
|
|
227
|
+
path = raw.strip().lower()
|
|
228
|
+
for surface, globs in table.items():
|
|
229
|
+
if any(fnmatch.fnmatch(path, g) for g in globs):
|
|
230
|
+
found.add(surface)
|
|
231
|
+
return found
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def parse_qa_waiver_arg(arg: object) -> tuple[str, str] | None:
|
|
235
|
+
"""`--qa-waiver` 값 `<stageKey>:<reason>` 를 (stageKey, reason) 로 분해.
|
|
236
|
+
형식이 아니거나 비면 None."""
|
|
237
|
+
if not isinstance(arg, str) or ":" not in arg:
|
|
238
|
+
return None
|
|
239
|
+
key, reason = arg.split(":", 1)
|
|
240
|
+
key, reason = key.strip(), reason.strip()
|
|
241
|
+
if not key or not reason:
|
|
242
|
+
return None
|
|
243
|
+
return key, reason
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def apply_qa_waiver(manifest: object, stage_key: str, reason: str, *, at: str,
|
|
247
|
+
acknowledged_by: str = "user") -> bool:
|
|
248
|
+
"""매니페스트에서 stage_key entry 의 `waiver` 를 채운다(in place). 찾으면 True.
|
|
249
|
+
사용자 확인형 우회(spec §7.2) — reason 은 사용자 지시 원문."""
|
|
250
|
+
entries = manifest.get("entries") if isinstance(manifest, dict) else None
|
|
251
|
+
if not isinstance(entries, list):
|
|
252
|
+
return False
|
|
253
|
+
for entry in entries:
|
|
254
|
+
if isinstance(entry, dict) and entry.get("stageKey") == stage_key:
|
|
255
|
+
entry["waiver"] = {"acknowledgedBy": acknowledged_by, "reason": reason,
|
|
256
|
+
"scope": [], "at": at}
|
|
257
|
+
return True
|
|
258
|
+
return False
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def manifest_required_surfaces(manifest: object) -> set[str]:
|
|
262
|
+
"""매니페스트 전 entry 의 `requires` 합집합 — 선언된 surface 집합."""
|
|
263
|
+
entries = manifest.get("entries") if isinstance(manifest, dict) else None
|
|
264
|
+
if not isinstance(entries, list):
|
|
265
|
+
return set()
|
|
266
|
+
out: set[str] = set()
|
|
267
|
+
for entry in entries:
|
|
268
|
+
if isinstance(entry, dict) and isinstance(entry.get("requires"), list):
|
|
269
|
+
out.update(c for c in entry["requires"] if isinstance(c, str))
|
|
270
|
+
return out
|
|
@@ -117,6 +117,7 @@ def compute_run_paths(
|
|
|
117
117
|
task_index = task_root / "task-index.md"
|
|
118
118
|
instruction_set = task_root / "instruction-set"
|
|
119
119
|
analysis_packet = instruction_set / "analysis-packet.md"
|
|
120
|
+
task_qa = task_root / "qa"
|
|
120
121
|
runs_dir = task_root / "runs"
|
|
121
122
|
history_dir = task_root / "history"
|
|
122
123
|
timeline_file = history_dir / "timeline.json"
|
|
@@ -202,6 +203,7 @@ def compute_run_paths(
|
|
|
202
203
|
"TASK_INDEX_PATH": str(task_index),
|
|
203
204
|
"INSTRUCTION_SET_PATH": str(instruction_set),
|
|
204
205
|
"ANALYSIS_PACKET_PATH": str(analysis_packet),
|
|
206
|
+
"TASK_QA_PATH": str(task_qa),
|
|
205
207
|
"RUNS_DIR": str(runs_dir),
|
|
206
208
|
"HISTORY_DIR": str(history_dir),
|
|
207
209
|
"TIMELINE_PATH": str(timeline_file),
|