okstra 0.28.0 → 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/okstra +1 -0
- package/docs/superpowers/plans/2026-05-17-dual-format-final-report.md +167 -0
- package/package.json +1 -1
- package/runtime/BUILD.json +2 -2
- package/runtime/agents/workers/claude-worker.md +2 -2
- package/runtime/agents/workers/codex-worker.md +1 -1
- package/runtime/agents/workers/gemini-worker.md +1 -1
- package/runtime/agents/workers/report-writer-worker.md +3 -1
- package/runtime/bin/okstra-render-report-views.py +129 -0
- package/runtime/prompts/profiles/implementation-planning.md +1 -1
- package/runtime/python/okstra_ctl/report_views.py +701 -0
- package/runtime/skills/okstra-report-writer/SKILL.md +19 -4
- package/runtime/skills/okstra-team-contract/SKILL.md +2 -5
- package/runtime/templates/reports/final-report.template.md +169 -2
- package/runtime/templates/reports/report.css +151 -0
- package/runtime/templates/reports/report.js +163 -0
- package/runtime/templates/reports/user-response.template.md +69 -0
- package/runtime/validators/lib/fixtures.sh +40 -3
- package/runtime/validators/validate-report-views.py +283 -0
- package/runtime/validators/validate-run.py +251 -3
- package/runtime/validators/validate-workflow.sh +4 -0
- package/src/install.mjs +1 -0
- package/src/render-views.mjs +67 -0
|
@@ -328,7 +328,7 @@ report_lines = [
|
|
|
328
328
|
"| 항목 | 값 |",
|
|
329
329
|
"|------|----|",
|
|
330
330
|
"| Final Conclusion | validation fixture |",
|
|
331
|
-
"| Verdict Token | `
|
|
331
|
+
"| Verdict Token | `accepted` |",
|
|
332
332
|
"| Direction | `continue-investigation` |",
|
|
333
333
|
"| Approval Required? | `no` |",
|
|
334
334
|
"| Next Step | fixture |",
|
|
@@ -350,13 +350,50 @@ report_lines.extend(
|
|
|
350
350
|
"| **전체 합계** | **`2`** | **`2`** | **`$0.02`** |",
|
|
351
351
|
"| Codex/Gemini CLI 추가 비용 | | | `$0.00` |",
|
|
352
352
|
"",
|
|
353
|
-
"## Final Verdict",
|
|
354
|
-
"
|
|
353
|
+
"## 2. Final Verdict",
|
|
354
|
+
"",
|
|
355
|
+
"| 항목 | 값 |",
|
|
356
|
+
"|------|----|",
|
|
357
|
+
"| Verdict Token | `accepted` |",
|
|
358
|
+
"",
|
|
359
|
+
"## 4.8 Final Verification Deliverables",
|
|
360
|
+
"",
|
|
361
|
+
"Source Implementation Report / Acceptance Blockers / Residual Risk / "
|
|
362
|
+
"Validation Evidence / Read-only Command Log / Routing Recommendation: "
|
|
363
|
+
"fixture stub.",
|
|
355
364
|
]
|
|
356
365
|
)
|
|
357
366
|
report_path.parent.mkdir(parents=True, exist_ok=True)
|
|
358
367
|
report_path.write_text("\n".join(report_lines) + "\n")
|
|
359
368
|
|
|
369
|
+
# Phase 7 step 1.5 (BLOCKING) — render the slim/html sibling artifacts
|
|
370
|
+
# next to the final-report so validate-run.py's new report-views hook
|
|
371
|
+
# passes. The workflow validator's fixture predates that step; we
|
|
372
|
+
# materialise both files in-place using the same single-reference-point
|
|
373
|
+
# helper the CLI uses.
|
|
374
|
+
import os
|
|
375
|
+
WORKSPACE_ROOT = os.environ.get("OKSTRA_WORKSPACE_ROOT_FOR_FIXTURE", "")
|
|
376
|
+
if WORKSPACE_ROOT:
|
|
377
|
+
import sys as _sys
|
|
378
|
+
_sys.path.insert(0, str(Path(WORKSPACE_ROOT) / "scripts"))
|
|
379
|
+
try:
|
|
380
|
+
from okstra_ctl.report_views import RunMeta, render_both_views
|
|
381
|
+
css = (Path(WORKSPACE_ROOT) / "templates" / "reports" / "report.css").read_text(encoding="utf-8")
|
|
382
|
+
js = (Path(WORKSPACE_ROOT) / "templates" / "reports" / "report.js").read_text(encoding="utf-8")
|
|
383
|
+
render_both_views(
|
|
384
|
+
report_path,
|
|
385
|
+
run_meta=RunMeta(
|
|
386
|
+
task_key=str(task_manifest.get("taskKey", "validation/fixture")),
|
|
387
|
+
task_type=str(task_manifest.get("taskType", "validation")),
|
|
388
|
+
seq="001",
|
|
389
|
+
source_report=report_path.name,
|
|
390
|
+
),
|
|
391
|
+
css=css,
|
|
392
|
+
js=js,
|
|
393
|
+
)
|
|
394
|
+
except Exception as exc: # pragma: no cover — fixture path only
|
|
395
|
+
raise SystemExit(f"failed to render report views in fixture: {exc}")
|
|
396
|
+
|
|
360
397
|
if final_status_path.exists():
|
|
361
398
|
final_status_path.unlink()
|
|
362
399
|
|
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Validate the two derived final-report views produced by Phase 7 step
|
|
3
|
+
1.5 (``scripts/okstra-render-report-views.py``).
|
|
4
|
+
|
|
5
|
+
Checks, for a given final-report MD path:
|
|
6
|
+
1. ``*.slim.md`` and ``*.html`` siblings both exist.
|
|
7
|
+
2. Slim preserves every phase-required substring from ``validate-run.py``
|
|
8
|
+
byte-identically (drift fence vs the report_views module).
|
|
9
|
+
3. HTML body (``<main>`` slice) contains the same plain-text
|
|
10
|
+
substrings (after html.unescape and the markdown ``#`` prefix is
|
|
11
|
+
stripped).
|
|
12
|
+
4. HTML's §4.6 / §4.7 / §4.8 deliverable regions contain no
|
|
13
|
+
``<textarea>`` / ``<input>`` / ``<select>`` (form-attach is
|
|
14
|
+
restricted to §5 clarification rows).
|
|
15
|
+
5. HTML has no external URLs in ``<script src=>`` / ``<link href=>`` /
|
|
16
|
+
``<img src=>`` — self-contained guarantee.
|
|
17
|
+
6. Every Response ID in HTML matches the §5 Clarification Items table
|
|
18
|
+
of the source MD (1:1).
|
|
19
|
+
|
|
20
|
+
Exit codes: 0 on success, 1 on any failure. Failures are printed one
|
|
21
|
+
per line to stderr.
|
|
22
|
+
"""
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import argparse
|
|
26
|
+
import html as html_lib
|
|
27
|
+
import importlib.util
|
|
28
|
+
import re
|
|
29
|
+
import sys
|
|
30
|
+
from pathlib import Path
|
|
31
|
+
from typing import Iterable
|
|
32
|
+
|
|
33
|
+
REPO_ROOT = Path(__file__).resolve().parents[1]
|
|
34
|
+
SCRIPTS_DIR = REPO_ROOT / "scripts"
|
|
35
|
+
if str(SCRIPTS_DIR) not in sys.path:
|
|
36
|
+
sys.path.insert(0, str(SCRIPTS_DIR))
|
|
37
|
+
|
|
38
|
+
from okstra_ctl.clarification_items import parse_clarification_items # noqa: E402
|
|
39
|
+
from okstra_ctl.report_views import ( # noqa: E402
|
|
40
|
+
_strip_leading_digest_comment,
|
|
41
|
+
extract_html_digest,
|
|
42
|
+
extract_slim_digest,
|
|
43
|
+
preserved_substrings,
|
|
44
|
+
slim_markdown,
|
|
45
|
+
source_digest,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _load_validate_run() -> object:
|
|
50
|
+
"""Dynamically load validate-run.py — it's a stand-alone script
|
|
51
|
+
with a hyphenated name, not an importable module. We snapshot the
|
|
52
|
+
substring lists from it so drift between the two files surfaces
|
|
53
|
+
here rather than silently in production."""
|
|
54
|
+
path = REPO_ROOT / "validators" / "validate-run.py"
|
|
55
|
+
spec = importlib.util.spec_from_file_location("_okstra_validate_run", path)
|
|
56
|
+
assert spec and spec.loader, f"cannot load {path}"
|
|
57
|
+
mod = importlib.util.module_from_spec(spec)
|
|
58
|
+
spec.loader.exec_module(mod)
|
|
59
|
+
return mod
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _detect_task_type(md: str) -> str | None:
|
|
63
|
+
m = re.search(r"^- Task Type:\s*(\S+)", md, re.MULTILINE)
|
|
64
|
+
return m.group(1).strip() if m else None
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _main_body(html_text: str) -> str:
|
|
68
|
+
start = html_text.find("<main>")
|
|
69
|
+
end = html_text.find("</main>", start) if start >= 0 else -1
|
|
70
|
+
if start < 0 or end < 0:
|
|
71
|
+
return html_text
|
|
72
|
+
return html_text[start + len("<main>"): end]
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _strip_md_prefix(sub: str) -> str:
|
|
76
|
+
s = sub
|
|
77
|
+
while s.startswith("#"):
|
|
78
|
+
s = s[1:]
|
|
79
|
+
return s.strip()
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
_EXTERNAL_URL_RE = re.compile(
|
|
83
|
+
r'<(?:script|link|img|iframe|source|video|audio)\s[^>]*?(?:src|href)\s*=\s*["\']https?://',
|
|
84
|
+
re.IGNORECASE,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
_RESPONSE_ID_ATTR_RE = re.compile(r'data-response-id="(C-\d+)"')
|
|
88
|
+
|
|
89
|
+
_NO_FORM_HEADING_RE = re.compile(
|
|
90
|
+
r"<h[23][^>]*>\s*(?:4\.6|4\.7|4\.8)[\s\S]*?</h[23]>"
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _no_form_sections(html_body: str) -> list[str]:
|
|
95
|
+
"""Return a list of strings, each being the rendered chunk of a
|
|
96
|
+
no-form section (4.6 / 4.7 / 4.8) up to the next h2/h3. Used to
|
|
97
|
+
check that no form controls live inside them.
|
|
98
|
+
"""
|
|
99
|
+
chunks: list[str] = []
|
|
100
|
+
headings = [
|
|
101
|
+
m for m in re.finditer(
|
|
102
|
+
r'<h([23])[^>]*>([^<]*?)</h[23]>', html_body
|
|
103
|
+
)
|
|
104
|
+
]
|
|
105
|
+
for i, m in enumerate(headings):
|
|
106
|
+
text = m.group(2).strip()
|
|
107
|
+
if not text.startswith(("4.6", "4.7", "4.8")):
|
|
108
|
+
continue
|
|
109
|
+
start = m.end()
|
|
110
|
+
end = headings[i + 1].start() if i + 1 < len(headings) else len(html_body)
|
|
111
|
+
chunks.append(html_body[start:end])
|
|
112
|
+
return chunks
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def validate(report_path: Path) -> list[str]:
|
|
116
|
+
failures: list[str] = []
|
|
117
|
+
if not report_path.is_file():
|
|
118
|
+
return [f"final-report not found: {report_path}"]
|
|
119
|
+
|
|
120
|
+
md = report_path.read_text(encoding="utf-8")
|
|
121
|
+
task_type = _detect_task_type(md)
|
|
122
|
+
slim_path = report_path.with_name(report_path.stem + ".slim.md")
|
|
123
|
+
html_path = report_path.with_name(report_path.stem + ".html")
|
|
124
|
+
|
|
125
|
+
# (1) sibling artifacts exist
|
|
126
|
+
if not slim_path.is_file():
|
|
127
|
+
failures.append(f"missing slim artifact: {slim_path}")
|
|
128
|
+
if not html_path.is_file():
|
|
129
|
+
failures.append(f"missing html artifact: {html_path}")
|
|
130
|
+
if failures:
|
|
131
|
+
return failures
|
|
132
|
+
|
|
133
|
+
slim = slim_path.read_text(encoding="utf-8")
|
|
134
|
+
html_text = html_path.read_text(encoding="utf-8")
|
|
135
|
+
html_body = _main_body(html_text)
|
|
136
|
+
html_body_text = html_lib.unescape(html_body)
|
|
137
|
+
|
|
138
|
+
# (2a) source-digest staleness — both derived views must carry the
|
|
139
|
+
# sha256 of the artifact derived from the *current* MD. A mismatch
|
|
140
|
+
# means the views were rendered against an older MD body and Phase
|
|
141
|
+
# 7 step 1.5 was not re-run.
|
|
142
|
+
# - HTML digest is over the MD body (after stripping any stale
|
|
143
|
+
# leading digest comment), so the expected value is the digest
|
|
144
|
+
# of the current MD itself.
|
|
145
|
+
# - slim digest is over the slim BODY (so any byte-difference
|
|
146
|
+
# between expected and actual slim shows up as a digest
|
|
147
|
+
# mismatch). We recompute the expected slim and extract its
|
|
148
|
+
# embedded digest for comparison — equivalent to but cheaper
|
|
149
|
+
# than diffing the full body.
|
|
150
|
+
expected_md_digest = source_digest(_strip_leading_digest_comment(md))
|
|
151
|
+
expected_slim_digest = extract_slim_digest(
|
|
152
|
+
slim_markdown(md, task_type=task_type)
|
|
153
|
+
)
|
|
154
|
+
slim_digest = extract_slim_digest(slim)
|
|
155
|
+
html_digest = extract_html_digest(html_text)
|
|
156
|
+
if slim_digest is None:
|
|
157
|
+
failures.append(
|
|
158
|
+
"slim missing source-sha256 header — re-render with okstra render-views"
|
|
159
|
+
)
|
|
160
|
+
elif slim_digest != expected_slim_digest:
|
|
161
|
+
failures.append(
|
|
162
|
+
f"stale slim: body sha256 {slim_digest[:12]}… does not match "
|
|
163
|
+
f"current MD's rendered slim {str(expected_slim_digest)[:12]}…"
|
|
164
|
+
)
|
|
165
|
+
if html_digest is None:
|
|
166
|
+
failures.append(
|
|
167
|
+
"html missing source-sha256 in run-meta — re-render with okstra render-views"
|
|
168
|
+
)
|
|
169
|
+
elif html_digest != expected_md_digest:
|
|
170
|
+
failures.append(
|
|
171
|
+
f"stale html: source-sha256 {html_digest[:12]}… does not match "
|
|
172
|
+
f"current MD {expected_md_digest[:12]}…"
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
# (2b) drift fence: report_views.preserved_substrings(task_type)
|
|
176
|
+
# must agree with validate-run.py's tuples for the same task_type.
|
|
177
|
+
vr = _load_validate_run()
|
|
178
|
+
drift = _diff_substring_lists(task_type, vr)
|
|
179
|
+
failures.extend(drift)
|
|
180
|
+
|
|
181
|
+
# (3) slim preserves every required substring byte-identically
|
|
182
|
+
for sub in preserved_substrings(task_type):
|
|
183
|
+
if sub in md and sub not in slim:
|
|
184
|
+
failures.append(f"slim dropped required substring: {sub!r}")
|
|
185
|
+
# Also forward-check against validate-run.py's tuples so a
|
|
186
|
+
# future addition there fails this validator immediately.
|
|
187
|
+
for sub in _validate_run_substrings(task_type, vr):
|
|
188
|
+
if sub in md and sub not in slim:
|
|
189
|
+
failures.append(f"slim dropped validate-run substring: {sub!r}")
|
|
190
|
+
|
|
191
|
+
# (4) html preserves the same set (modulo md prefix + html.escape)
|
|
192
|
+
for sub in preserved_substrings(task_type):
|
|
193
|
+
if sub not in md:
|
|
194
|
+
continue
|
|
195
|
+
needle = _strip_md_prefix(sub)
|
|
196
|
+
if needle not in html_body_text:
|
|
197
|
+
failures.append(f"html body dropped substring: {needle!r}")
|
|
198
|
+
|
|
199
|
+
# (5) deliverable sections contain no form controls
|
|
200
|
+
for chunk in _no_form_sections(html_body):
|
|
201
|
+
if "<textarea" in chunk or "<input" in chunk or "<select" in chunk:
|
|
202
|
+
failures.append(
|
|
203
|
+
"html §4.6/§4.7/§4.8 deliverable section contains a form control"
|
|
204
|
+
)
|
|
205
|
+
break
|
|
206
|
+
|
|
207
|
+
# (6) no external URLs in <script src> / <link href> / etc.
|
|
208
|
+
if _EXTERNAL_URL_RE.search(html_text):
|
|
209
|
+
failures.append("html contains external URL in script/link/img — must be self-contained")
|
|
210
|
+
|
|
211
|
+
# (7) Response ID parity: HTML form rows ↔ §5 C-* rows in MD.
|
|
212
|
+
# Bidirectional — catches both "MD has C-* the HTML lost" AND
|
|
213
|
+
# "HTML has stale C-* that the current MD no longer declares".
|
|
214
|
+
md_ids = _md_response_ids(md)
|
|
215
|
+
html_ids = sorted(set(_RESPONSE_ID_ATTR_RE.findall(html_text)))
|
|
216
|
+
if md_ids != html_ids:
|
|
217
|
+
failures.append(
|
|
218
|
+
f"Response ID mismatch: MD §5 has {md_ids}, HTML has {html_ids}"
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
return failures
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def _validate_run_substrings(task_type: str | None, vr) -> Iterable[str]:
|
|
225
|
+
if task_type == "implementation":
|
|
226
|
+
return getattr(vr, "IMPLEMENTATION_REQUIRED_SECTIONS", ())
|
|
227
|
+
if task_type == "final-verification":
|
|
228
|
+
return getattr(vr, "FINAL_VERIFICATION_REQUIRED_SECTIONS", ())
|
|
229
|
+
return ()
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def _diff_substring_lists(task_type: str | None, vr) -> list[str]:
|
|
233
|
+
"""Surface drift between report_views and validate-run substring
|
|
234
|
+
tuples. The two MUST stay in lock-step."""
|
|
235
|
+
failures: list[str] = []
|
|
236
|
+
from okstra_ctl.report_views import (
|
|
237
|
+
PRESERVED_SUBSTRINGS_IMPLEMENTATION,
|
|
238
|
+
PRESERVED_SUBSTRINGS_FINAL_VERIFICATION,
|
|
239
|
+
)
|
|
240
|
+
impl_vr = set(getattr(vr, "IMPLEMENTATION_REQUIRED_SECTIONS", ()))
|
|
241
|
+
impl_rv = set(PRESERVED_SUBSTRINGS_IMPLEMENTATION)
|
|
242
|
+
if impl_vr != impl_rv:
|
|
243
|
+
missing_in_views = impl_vr - impl_rv
|
|
244
|
+
extra_in_views = impl_rv - impl_vr
|
|
245
|
+
failures.append(
|
|
246
|
+
"drift: PRESERVED_SUBSTRINGS_IMPLEMENTATION vs validate-run "
|
|
247
|
+
f"IMPLEMENTATION_REQUIRED_SECTIONS — missing in views {missing_in_views or '∅'}, "
|
|
248
|
+
f"extra in views {extra_in_views or '∅'}"
|
|
249
|
+
)
|
|
250
|
+
fv_vr = set(getattr(vr, "FINAL_VERIFICATION_REQUIRED_SECTIONS", ()))
|
|
251
|
+
fv_rv = set(PRESERVED_SUBSTRINGS_FINAL_VERIFICATION)
|
|
252
|
+
if fv_vr != fv_rv:
|
|
253
|
+
failures.append(
|
|
254
|
+
"drift: PRESERVED_SUBSTRINGS_FINAL_VERIFICATION vs validate-run "
|
|
255
|
+
f"FINAL_VERIFICATION_REQUIRED_SECTIONS — missing in views {fv_vr - fv_rv or '∅'}, "
|
|
256
|
+
f"extra in views {fv_rv - fv_vr or '∅'}"
|
|
257
|
+
)
|
|
258
|
+
return failures
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def _md_response_ids(md: str) -> list[str]:
|
|
262
|
+
items = parse_clarification_items(md) or []
|
|
263
|
+
return sorted({it.row_id for it in items if re.fullmatch(r"C-\d+", it.row_id)})
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def main(argv: list[str] | None = None) -> int:
|
|
267
|
+
parser = argparse.ArgumentParser(
|
|
268
|
+
description="Validate slim/html derived views of an okstra final-report."
|
|
269
|
+
)
|
|
270
|
+
parser.add_argument(
|
|
271
|
+
"report_path",
|
|
272
|
+
type=Path,
|
|
273
|
+
help="Path to the original final-report markdown.",
|
|
274
|
+
)
|
|
275
|
+
args = parser.parse_args(argv)
|
|
276
|
+
failures = validate(args.report_path)
|
|
277
|
+
for f in failures:
|
|
278
|
+
print(f, file=sys.stderr)
|
|
279
|
+
return 1 if failures else 0
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
if __name__ == "__main__":
|
|
283
|
+
sys.exit(main())
|
|
@@ -832,6 +832,70 @@ PLANNING_REQUIRED_SECTIONS = (
|
|
|
832
832
|
"Plan Body Verification",
|
|
833
833
|
)
|
|
834
834
|
|
|
835
|
+
# §4.7 implementation deliverables — substring scan against report body.
|
|
836
|
+
IMPLEMENTATION_REQUIRED_SECTIONS = (
|
|
837
|
+
"Approved Plan Reference",
|
|
838
|
+
"Commit List",
|
|
839
|
+
"Diff Summary",
|
|
840
|
+
"Out-of-plan Edits",
|
|
841
|
+
"Validation Evidence",
|
|
842
|
+
"Verifier Results",
|
|
843
|
+
"Rollback Verification",
|
|
844
|
+
"Routing Recommendation",
|
|
845
|
+
)
|
|
846
|
+
|
|
847
|
+
# §4.8 final-verification deliverables — substring scan against report body.
|
|
848
|
+
FINAL_VERIFICATION_REQUIRED_SECTIONS = (
|
|
849
|
+
"Source Implementation Report",
|
|
850
|
+
"Acceptance Blockers",
|
|
851
|
+
"Residual Risk",
|
|
852
|
+
"Validation Evidence",
|
|
853
|
+
"Read-only Command Log",
|
|
854
|
+
"Routing Recommendation",
|
|
855
|
+
)
|
|
856
|
+
|
|
857
|
+
# Allowed Verdict Token vocabulary, by task-type. `release-handoff` is
|
|
858
|
+
# author-tagged but reads its entry gate from final-verification's report
|
|
859
|
+
# and renders `not-applicable` itself.
|
|
860
|
+
FINAL_VERIFICATION_VERDICT_TOKENS = (
|
|
861
|
+
"accepted",
|
|
862
|
+
"conditional-accept",
|
|
863
|
+
"blocked",
|
|
864
|
+
)
|
|
865
|
+
|
|
866
|
+
# `## 2. Final Verdict` Verdict Token cell — captures the value between
|
|
867
|
+
# backticks on the `Verdict Token` row. Tolerant to extra column whitespace
|
|
868
|
+
# and to leading bold/italic markers in the label cell.
|
|
869
|
+
_FINAL_VERDICT_TOKEN_RE = re.compile(
|
|
870
|
+
r"^\|[ \t]*\*{0,2}Verdict Token\*{0,2}[ \t]*\|[ \t]*`(?P<value>[^`\n]*)`",
|
|
871
|
+
re.MULTILINE,
|
|
872
|
+
)
|
|
873
|
+
|
|
874
|
+
# Verdict Card Verdict Token row (top-of-report at-a-glance). Same shape
|
|
875
|
+
# as `_FINAL_VERDICT_TOKEN_RE` but matched against the first occurrence in
|
|
876
|
+
# the Verdict Card block, scoped to the body between `## Verdict Card`
|
|
877
|
+
# heading and the next `##` heading.
|
|
878
|
+
_VERDICT_CARD_BLOCK_RE = re.compile(
|
|
879
|
+
r"^##[ \t]+Verdict Card[ \t]*$\n(?P<body>.*?)(?=^##[ \t]|\Z)",
|
|
880
|
+
re.DOTALL | re.MULTILINE,
|
|
881
|
+
)
|
|
882
|
+
|
|
883
|
+
# `## 2. Final Verdict` block scope — used to scope the Verdict Token
|
|
884
|
+
# regex so that we don't accidentally match a Verdict Token row that
|
|
885
|
+
# lives in the Verdict Card or anywhere else.
|
|
886
|
+
_FINAL_VERDICT_BLOCK_RE = re.compile(
|
|
887
|
+
r"^##[ \t]+2\.[ \t]+Final Verdict[ \t]*$\n(?P<body>.*?)(?=^##[ \t]|\Z)",
|
|
888
|
+
re.DOTALL | re.MULTILINE,
|
|
889
|
+
)
|
|
890
|
+
|
|
891
|
+
# `## 4.6 Release Handoff Deliverables` and `## 4.6.6 Merge Conflict
|
|
892
|
+
# Probe` are required when task-type == release-handoff. The probe sub-
|
|
893
|
+
# section was retro-added to the template; old runs that predate it ship
|
|
894
|
+
# without it, but new runs must include it.
|
|
895
|
+
_MERGE_CONFLICT_PROBE_HEADING_RE = re.compile(
|
|
896
|
+
r"^###[ \t]+4\.6\.6[ \t]+Merge Conflict Probe\b", re.MULTILINE
|
|
897
|
+
)
|
|
898
|
+
|
|
835
899
|
PLAN_VERIFY_GATE_VALUES = (
|
|
836
900
|
"passed",
|
|
837
901
|
"passed-with-dissent",
|
|
@@ -861,6 +925,92 @@ _APPROVAL_CHECKBOX_RE = re.compile(
|
|
|
861
925
|
)
|
|
862
926
|
|
|
863
927
|
|
|
928
|
+
def _extract_final_verdict_token(content: str) -> str | None:
|
|
929
|
+
"""Return the `Verdict Token` cell value from the `## 2. Final Verdict`
|
|
930
|
+
block, or None when the row is absent. Scoped to §2 so the Verdict
|
|
931
|
+
Card row (which has the same shape) does not shadow the authoritative
|
|
932
|
+
value.
|
|
933
|
+
"""
|
|
934
|
+
block = _FINAL_VERDICT_BLOCK_RE.search(content)
|
|
935
|
+
if block is None:
|
|
936
|
+
return None
|
|
937
|
+
match = _FINAL_VERDICT_TOKEN_RE.search(block.group("body"))
|
|
938
|
+
if match is None:
|
|
939
|
+
return None
|
|
940
|
+
return match.group("value")
|
|
941
|
+
|
|
942
|
+
|
|
943
|
+
def _extract_verdict_card_token(content: str) -> str | None:
|
|
944
|
+
"""Return the `Verdict Token` cell from the Verdict Card block."""
|
|
945
|
+
block = _VERDICT_CARD_BLOCK_RE.search(content)
|
|
946
|
+
if block is None:
|
|
947
|
+
return None
|
|
948
|
+
match = _FINAL_VERDICT_TOKEN_RE.search(block.group("body"))
|
|
949
|
+
if match is None:
|
|
950
|
+
return None
|
|
951
|
+
return match.group("value")
|
|
952
|
+
|
|
953
|
+
|
|
954
|
+
def _validate_verdict_card_consistency(content: str, failures: list[str]) -> None:
|
|
955
|
+
"""Verdict Card is a non-authoritative index of §2. If both blocks
|
|
956
|
+
carry a Verdict Token row, the values MUST byte-match (modulo case
|
|
957
|
+
and surrounding whitespace) — divergence is a contract violation per
|
|
958
|
+
`okstra-report-writer` SKILL.md "Authoring Contract".
|
|
959
|
+
"""
|
|
960
|
+
card_value = _extract_verdict_card_token(content)
|
|
961
|
+
final_value = _extract_final_verdict_token(content)
|
|
962
|
+
if card_value is None or final_value is None:
|
|
963
|
+
# Missing-Card and missing-§2 are surfaced by other checks; this
|
|
964
|
+
# function only enforces the consistency contract between the two.
|
|
965
|
+
return
|
|
966
|
+
if card_value.strip().lower() != final_value.strip().lower():
|
|
967
|
+
failures.append(
|
|
968
|
+
"Verdict Card `Verdict Token` value "
|
|
969
|
+
f"`{card_value}` does not match `## 2. Final Verdict` value "
|
|
970
|
+
f"`{final_value}` — the Card is a non-authoritative index and "
|
|
971
|
+
"MUST byte-match §2. Either fix the Card or update §2; do not "
|
|
972
|
+
"ship divergent values."
|
|
973
|
+
)
|
|
974
|
+
|
|
975
|
+
|
|
976
|
+
def validate_report_views(report_path: Path, failures: list[str]) -> None:
|
|
977
|
+
"""Enforce Phase 7 step 1.5 (BLOCKING) — the slim AI copy and the
|
|
978
|
+
self-contained HTML view must exist next to the final-report MD,
|
|
979
|
+
and both must satisfy the contract checked by
|
|
980
|
+
``validators/validate-report-views.py``.
|
|
981
|
+
|
|
982
|
+
Delegated to that script as a subprocess so the contract surface
|
|
983
|
+
stays in one place. Failures from the delegate are folded back as
|
|
984
|
+
structured ``report-views: <line>`` failure strings.
|
|
985
|
+
"""
|
|
986
|
+
import subprocess
|
|
987
|
+
|
|
988
|
+
here = Path(__file__).resolve().parent
|
|
989
|
+
delegate = here / "validate-report-views.py"
|
|
990
|
+
if not delegate.is_file():
|
|
991
|
+
# The delegate is part of the same install bundle; absence is
|
|
992
|
+
# itself a broken installation rather than an optional feature.
|
|
993
|
+
failures.append(
|
|
994
|
+
f"validate-report-views.py missing under {here} — okstra install incomplete"
|
|
995
|
+
)
|
|
996
|
+
return
|
|
997
|
+
try:
|
|
998
|
+
proc = subprocess.run(
|
|
999
|
+
[sys.executable, str(delegate), str(report_path)],
|
|
1000
|
+
capture_output=True,
|
|
1001
|
+
text=True,
|
|
1002
|
+
timeout=30,
|
|
1003
|
+
)
|
|
1004
|
+
except subprocess.TimeoutExpired:
|
|
1005
|
+
failures.append("report-views validator timed out (30s)")
|
|
1006
|
+
return
|
|
1007
|
+
if proc.returncode != 0:
|
|
1008
|
+
for line in proc.stderr.splitlines():
|
|
1009
|
+
line = line.strip()
|
|
1010
|
+
if line:
|
|
1011
|
+
failures.append(f"report-views: {line}")
|
|
1012
|
+
|
|
1013
|
+
|
|
864
1014
|
def validate_phase_boundary(
|
|
865
1015
|
task_type: str,
|
|
866
1016
|
report_path: Path,
|
|
@@ -880,11 +1030,55 @@ def validate_phase_boundary(
|
|
|
880
1030
|
MUST be absent (lead converted findings into Clarification rows
|
|
881
1031
|
instead of opening the gate).
|
|
882
1032
|
"""
|
|
883
|
-
if task_type != "implementation-planning":
|
|
884
|
-
return
|
|
885
1033
|
if not report_path.exists():
|
|
886
1034
|
return
|
|
887
1035
|
content = report_path.read_text()
|
|
1036
|
+
|
|
1037
|
+
# Verdict Card vs §2. Final Verdict Verdict Token consistency. The Card
|
|
1038
|
+
# is a non-authoritative index; divergence is a contract violation.
|
|
1039
|
+
_validate_verdict_card_consistency(content, failures)
|
|
1040
|
+
|
|
1041
|
+
if task_type == "implementation":
|
|
1042
|
+
for needle in IMPLEMENTATION_REQUIRED_SECTIONS:
|
|
1043
|
+
if needle not in content:
|
|
1044
|
+
failures.append(
|
|
1045
|
+
"implementation report is missing required §4.7 "
|
|
1046
|
+
f"deliverable section: `{needle}`"
|
|
1047
|
+
)
|
|
1048
|
+
|
|
1049
|
+
if task_type == "final-verification":
|
|
1050
|
+
for needle in FINAL_VERIFICATION_REQUIRED_SECTIONS:
|
|
1051
|
+
if needle not in content:
|
|
1052
|
+
failures.append(
|
|
1053
|
+
"final-verification report is missing required §4.8 "
|
|
1054
|
+
f"deliverable section: `{needle}`"
|
|
1055
|
+
)
|
|
1056
|
+
token_value = _extract_final_verdict_token(content)
|
|
1057
|
+
if token_value is None:
|
|
1058
|
+
failures.append(
|
|
1059
|
+
"final-verification report `## 2. Final Verdict` table is "
|
|
1060
|
+
"missing the `Verdict Token` row — required by the release-"
|
|
1061
|
+
"handoff entry gate."
|
|
1062
|
+
)
|
|
1063
|
+
elif token_value.strip().lower() not in FINAL_VERIFICATION_VERDICT_TOKENS:
|
|
1064
|
+
failures.append(
|
|
1065
|
+
"final-verification report `Verdict Token` value "
|
|
1066
|
+
f"`{token_value}` is not one of "
|
|
1067
|
+
f"{', '.join(FINAL_VERIFICATION_VERDICT_TOKENS)}."
|
|
1068
|
+
)
|
|
1069
|
+
|
|
1070
|
+
if task_type == "release-handoff":
|
|
1071
|
+
if _MERGE_CONFLICT_PROBE_HEADING_RE.search(content) is None:
|
|
1072
|
+
failures.append(
|
|
1073
|
+
"release-handoff report is missing `### 4.6.6 Merge Conflict "
|
|
1074
|
+
"Probe` sub-section — required by the release-handoff profile "
|
|
1075
|
+
"(self-review 6, merge-conflict probe audit). When the run is "
|
|
1076
|
+
"`local only` / `skip`, record the single line `- Not run "
|
|
1077
|
+
"(user picked local only or skip).` under the heading."
|
|
1078
|
+
)
|
|
1079
|
+
|
|
1080
|
+
if task_type != "implementation-planning":
|
|
1081
|
+
return
|
|
888
1082
|
for needle in PLANNING_REQUIRED_SECTIONS:
|
|
889
1083
|
if needle not in content:
|
|
890
1084
|
failures.append(
|
|
@@ -1123,6 +1317,14 @@ def attempt_token_usage_autofix(
|
|
|
1123
1317
|
return "accuracy-failed", [
|
|
1124
1318
|
f"Phase 7 token-usage substitution refused: {exc}"
|
|
1125
1319
|
]
|
|
1320
|
+
|
|
1321
|
+
# Phase 7 step 1.5 is BLOCKING and the autofix just mutated the
|
|
1322
|
+
# source MD — any pre-existing slim/html sibling is now stale by
|
|
1323
|
+
# construction. Re-render the derived views in lock-step so the
|
|
1324
|
+
# downstream report-views validator does not trip over the
|
|
1325
|
+
# autofix's own side effect.
|
|
1326
|
+
rerender_note = _rerender_report_views_after_autofix(report_path)
|
|
1327
|
+
|
|
1126
1328
|
detail = (
|
|
1127
1329
|
f"replaced {replaced} placeholder(s)"
|
|
1128
1330
|
if replaced > 0
|
|
@@ -1130,7 +1332,52 @@ def attempt_token_usage_autofix(
|
|
|
1130
1332
|
if replaced == 0
|
|
1131
1333
|
else "report file missing"
|
|
1132
1334
|
)
|
|
1133
|
-
|
|
1335
|
+
msg = f"usageSummary repopulated; {detail}"
|
|
1336
|
+
if rerender_note:
|
|
1337
|
+
msg += f"; {rerender_note}"
|
|
1338
|
+
return "recovered", [msg]
|
|
1339
|
+
|
|
1340
|
+
|
|
1341
|
+
def _rerender_report_views_after_autofix(report_path: Path) -> str:
|
|
1342
|
+
"""Re-render ``*.slim.md`` and ``*.html`` siblings against the
|
|
1343
|
+
just-substituted MD. Returns a short status note for the autofix
|
|
1344
|
+
message (empty on no-op, descriptive on failure).
|
|
1345
|
+
"""
|
|
1346
|
+
if not report_path.is_file():
|
|
1347
|
+
return ""
|
|
1348
|
+
try:
|
|
1349
|
+
# Late import — keeps validate-run.py importable in environments
|
|
1350
|
+
# that don't ship report_views (older installs).
|
|
1351
|
+
scripts_dir = Path(__file__).resolve().parent.parent / "scripts"
|
|
1352
|
+
if str(scripts_dir) not in sys.path:
|
|
1353
|
+
sys.path.insert(0, str(scripts_dir))
|
|
1354
|
+
from okstra_ctl.report_views import RunMeta, render_both_views
|
|
1355
|
+
templates_dir = (
|
|
1356
|
+
Path(__file__).resolve().parent.parent / "templates" / "reports"
|
|
1357
|
+
)
|
|
1358
|
+
css = (templates_dir / "report.css").read_text(encoding="utf-8")
|
|
1359
|
+
js = (templates_dir / "report.js").read_text(encoding="utf-8")
|
|
1360
|
+
except Exception as exc: # noqa: BLE001 — best-effort
|
|
1361
|
+
return f"report-views re-render skipped ({exc})"
|
|
1362
|
+
|
|
1363
|
+
# Infer task-key / task-type / seq from path + body when possible;
|
|
1364
|
+
# fall back to placeholders so the digest comparison (the actual
|
|
1365
|
+
# contract) still works.
|
|
1366
|
+
text = report_path.read_text(encoding="utf-8")
|
|
1367
|
+
task_type_m = re.search(r"^- Task Type:\s*(\S+)", text, re.MULTILINE)
|
|
1368
|
+
task_key_m = re.search(r"^- Task Key:\s*(\S+)", text, re.MULTILINE)
|
|
1369
|
+
seq_m = re.search(r"-(\d+)\.md$", report_path.name)
|
|
1370
|
+
meta = RunMeta(
|
|
1371
|
+
task_key=task_key_m.group(1) if task_key_m else "unknown",
|
|
1372
|
+
task_type=task_type_m.group(1) if task_type_m else "unknown",
|
|
1373
|
+
seq=seq_m.group(1) if seq_m else "000",
|
|
1374
|
+
source_report=report_path.name,
|
|
1375
|
+
)
|
|
1376
|
+
try:
|
|
1377
|
+
render_both_views(report_path, run_meta=meta, css=css, js=js)
|
|
1378
|
+
except Exception as exc: # noqa: BLE001
|
|
1379
|
+
return f"report-views re-render failed: {exc}"
|
|
1380
|
+
return "report-views re-rendered"
|
|
1134
1381
|
|
|
1135
1382
|
|
|
1136
1383
|
def main() -> int:
|
|
@@ -1205,6 +1452,7 @@ def main() -> int:
|
|
|
1205
1452
|
validate_phase_boundary(task_type, report_path, failures)
|
|
1206
1453
|
if task_type:
|
|
1207
1454
|
validate_worker_results_audit(report_path, task_type, failures)
|
|
1455
|
+
validate_report_views(report_path, failures)
|
|
1208
1456
|
|
|
1209
1457
|
validation_status = "passed" if not failures else "failed"
|
|
1210
1458
|
update_validation_metadata(
|
|
@@ -109,6 +109,10 @@ fi
|
|
|
109
109
|
pass "Primary task discovery artifacts are valid"
|
|
110
110
|
|
|
111
111
|
step "Preparing validator fixture artifacts for the primary task"
|
|
112
|
+
# Fixture needs to render Phase 7 step 1.5 sibling artifacts; pass the
|
|
113
|
+
# repo root so the heredoc can import okstra_ctl.report_views and load
|
|
114
|
+
# inline assets from templates/reports/.
|
|
115
|
+
export OKSTRA_WORKSPACE_ROOT_FOR_FIXTURE="$WORKSPACE_ROOT"
|
|
112
116
|
if ! prepare_run_validator_fixture "$PRIMARY_TASK_GROUP" "$PRIMARY_TASK_ID" codex; then
|
|
113
117
|
fail "Failed to prepare validator fixture artifacts for the primary task"
|
|
114
118
|
fi
|