okstra 0.27.0 → 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/okstra +1 -0
- package/docs/superpowers/plans/2026-05-17-dual-format-final-report.md +167 -0
- package/package.json +1 -1
- package/runtime/BUILD.json +2 -2
- package/runtime/agents/workers/claude-worker.md +6 -5
- package/runtime/agents/workers/codex-worker.md +5 -4
- package/runtime/agents/workers/gemini-worker.md +5 -4
- package/runtime/agents/workers/report-writer-worker.md +10 -3
- package/runtime/bin/okstra-render-report-views.py +129 -0
- package/runtime/prompts/launch.template.md +1 -1
- package/runtime/prompts/profiles/_common-contract.md +12 -4
- package/runtime/prompts/profiles/implementation-planning.md +1 -1
- package/runtime/python/okstra_ctl/report_views.py +701 -0
- package/runtime/python/okstra_token_usage/cli.py +9 -2
- package/runtime/python/okstra_token_usage/report.py +32 -3
- package/runtime/skills/okstra-convergence/SKILL.md +2 -2
- package/runtime/skills/okstra-report-writer/SKILL.md +25 -8
- package/runtime/skills/okstra-team-contract/SKILL.md +16 -15
- package/runtime/templates/reports/final-report.template.md +398 -211
- package/runtime/templates/reports/report.css +151 -0
- package/runtime/templates/reports/report.js +163 -0
- package/runtime/templates/reports/user-response.template.md +69 -0
- package/runtime/validators/lib/fixtures.sh +76 -2
- package/runtime/validators/validate-report-views.py +283 -0
- package/runtime/validators/validate-run.py +564 -4
- package/runtime/validators/validate-workflow.sh +4 -0
- package/src/install.mjs +1 -0
- package/src/render-views.mjs +67 -0
|
@@ -465,6 +465,187 @@ TOKEN_PLACEHOLDERS = (
|
|
|
465
465
|
)
|
|
466
466
|
|
|
467
467
|
|
|
468
|
+
# Token Usage Summary section between its `##` heading and the next `##`
|
|
469
|
+
# heading (or end-of-file). Matched non-greedily so the body of the next
|
|
470
|
+
# section never bleeds in.
|
|
471
|
+
_TOKEN_USAGE_SECTION_RE = re.compile(
|
|
472
|
+
r"^##[ \t]+Token Usage Summary[ \t]*$\n(?P<body>.*?)(?=^##[ \t]|\Z)",
|
|
473
|
+
re.DOTALL | re.MULTILINE,
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
# Backtick-wrapped cell values inside a Token Usage Summary row. We use
|
|
477
|
+
# this to inspect actual cell contents rather than fighting markdown
|
|
478
|
+
# table parsing rules.
|
|
479
|
+
_TOKEN_USAGE_BACKTICK_CELL_RE = re.compile(r"`([^`\n]*)`")
|
|
480
|
+
|
|
481
|
+
# Sentinel words workers have been observed typing INSTEAD of leaving the
|
|
482
|
+
# `{{...}}` placeholders verbatim. These bypass the placeholder check
|
|
483
|
+
# because they are valid string values; we must reject them by name.
|
|
484
|
+
_TOKEN_USAGE_SENTINEL_VALUES = frozenset(
|
|
485
|
+
{
|
|
486
|
+
"pending",
|
|
487
|
+
"n/a",
|
|
488
|
+
"na",
|
|
489
|
+
"tbd",
|
|
490
|
+
"tba",
|
|
491
|
+
"not-collected",
|
|
492
|
+
"not collected",
|
|
493
|
+
"--",
|
|
494
|
+
"?",
|
|
495
|
+
"unknown",
|
|
496
|
+
"",
|
|
497
|
+
}
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
# Numeric "valid zero" patterns. These ARE allowed in the CLI row when no
|
|
501
|
+
# Codex/Gemini CLI work was billed; rejected everywhere else.
|
|
502
|
+
_TOKEN_USAGE_ZERO_VALUES = frozenset({"0", "$0.00", "$0", "0.00"})
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
def _scan_token_usage_summary(content: str, failures: list[str]) -> None:
|
|
506
|
+
"""Reject sentinel / zero values that workers typed into the Token
|
|
507
|
+
Usage Summary table instead of leaving the `{{...}}` placeholders
|
|
508
|
+
verbatim for Phase 7 substitution.
|
|
509
|
+
|
|
510
|
+
The placeholder check (`TOKEN_PLACEHOLDERS`) above catches the
|
|
511
|
+
"didn't substitute" case; this scanner catches the "substituted with
|
|
512
|
+
a sentinel string" case which is invisible to that check and was the
|
|
513
|
+
real source of `0` / `$0.00` / `pending` shipping in real reports.
|
|
514
|
+
|
|
515
|
+
Rules:
|
|
516
|
+
- The Codex/Gemini CLI 추가 비용 row may carry an empty cell or
|
|
517
|
+
`$0.00` (no CLI work was billed). Sentinel words are still
|
|
518
|
+
rejected.
|
|
519
|
+
- Every other row's backtick-wrapped cells must be either a
|
|
520
|
+
comma-grouped integer (e.g. `1,234,567`) or a USD value (`$5.43`).
|
|
521
|
+
Zero values (`0` / `$0.00`) are rejected because no okstra run
|
|
522
|
+
consumes zero tokens — a zero there means the writer fabricated a
|
|
523
|
+
stub.
|
|
524
|
+
"""
|
|
525
|
+
match = _TOKEN_USAGE_SECTION_RE.search(content)
|
|
526
|
+
if match is None:
|
|
527
|
+
# The Token Usage Summary section is required in every report
|
|
528
|
+
# (the template emits it unconditionally). A missing section is
|
|
529
|
+
# surfaced elsewhere by the placeholder check (which would also
|
|
530
|
+
# not fire — so we add a dedicated failure here).
|
|
531
|
+
failures.append(
|
|
532
|
+
"final report is missing the `## Token Usage Summary` section — "
|
|
533
|
+
"the template renders it unconditionally and Phase 7 substitution "
|
|
534
|
+
"depends on it being present."
|
|
535
|
+
)
|
|
536
|
+
return
|
|
537
|
+
|
|
538
|
+
body = match.group("body")
|
|
539
|
+
for raw_line in body.splitlines():
|
|
540
|
+
line = raw_line.strip()
|
|
541
|
+
if not line.startswith("|") or line.startswith("|--"):
|
|
542
|
+
# Skip non-table lines, the header separator (`|------|`), and
|
|
543
|
+
# blank lines. Header rows have no backticks so they self-skip.
|
|
544
|
+
continue
|
|
545
|
+
cells = [c.strip() for c in line.strip("|").split("|")]
|
|
546
|
+
if not cells:
|
|
547
|
+
continue
|
|
548
|
+
label_cell = cells[0].strip("* `")
|
|
549
|
+
# The CLI row's label always contains the word "CLI" — matching
|
|
550
|
+
# `Codex/Gemini CLI 추가 비용` regardless of formatting variations.
|
|
551
|
+
is_cli_row = "CLI" in label_cell
|
|
552
|
+
for raw_cell in cells[1:]:
|
|
553
|
+
for value in _TOKEN_USAGE_BACKTICK_CELL_RE.findall(raw_cell):
|
|
554
|
+
stripped = value.strip()
|
|
555
|
+
lowered = stripped.lower()
|
|
556
|
+
if lowered in _TOKEN_USAGE_SENTINEL_VALUES:
|
|
557
|
+
failures.append(
|
|
558
|
+
"Token Usage Summary cell contains sentinel value "
|
|
559
|
+
f"`{stripped}` on row labelled `{label_cell or '<unlabeled>'}` — "
|
|
560
|
+
"leave the `{{...}}` placeholder verbatim until "
|
|
561
|
+
"`okstra-token-usage.py --substitute-final-report` runs "
|
|
562
|
+
"in Phase 7."
|
|
563
|
+
)
|
|
564
|
+
continue
|
|
565
|
+
if stripped in _TOKEN_USAGE_ZERO_VALUES and not is_cli_row:
|
|
566
|
+
failures.append(
|
|
567
|
+
f"Token Usage Summary row `{label_cell or '<unlabeled>'}` has "
|
|
568
|
+
f"a zero value `{stripped}` — no okstra run consumes zero "
|
|
569
|
+
"tokens. Re-run `python3 scripts/okstra-token-usage.py "
|
|
570
|
+
"<team-state> --write --summary --substitute-final-report "
|
|
571
|
+
"<report-path>` to repopulate from session jsonls. The "
|
|
572
|
+
"Codex/Gemini CLI row is the only place `$0.00` is "
|
|
573
|
+
"allowed (when no CLI work was billed)."
|
|
574
|
+
)
|
|
575
|
+
|
|
576
|
+
|
|
577
|
+
# Verdict Card heading (mandatory top-of-report at-a-glance block introduced
|
|
578
|
+
# with the report-format readability pass). Matches `## Verdict Card` only as
|
|
579
|
+
# a section heading line (not as inline text inside a paragraph or table).
|
|
580
|
+
_VERDICT_CARD_HEADING_RE = re.compile(r"^##[ \t]+Verdict Card\b", re.MULTILINE)
|
|
581
|
+
|
|
582
|
+
# Reading Confirmation heading must NOT appear in the final-report — it
|
|
583
|
+
# belongs in the worker audit sidecar (`<worker>-audit-<task-type>-<seq>.md`).
|
|
584
|
+
_READING_CONFIRMATION_HEADING_RE = re.compile(
|
|
585
|
+
r"^##[ \t]+0\.[ \t]+Reading Confirmation\b", re.MULTILINE
|
|
586
|
+
)
|
|
587
|
+
|
|
588
|
+
# Empty Section 0 (Clarification Response Carried In) stub. When no
|
|
589
|
+
# carry-in path is provided, the writer must OMIT the `## 0.` heading
|
|
590
|
+
# entirely — emitting the heading followed by the "No prior clarification
|
|
591
|
+
# response was provided" stub line is the recurring failure mode this
|
|
592
|
+
# regex catches. The 400-char window after the heading covers the stub
|
|
593
|
+
# line + any boilerplate without crossing into the next section.
|
|
594
|
+
_EMPTY_CARRY_IN_RE = re.compile(
|
|
595
|
+
r"^##[ \t]+0\.[ \t]+Clarification Response Carried In"
|
|
596
|
+
r"[\s\S]{0,400}?No prior clarification response was provided",
|
|
597
|
+
re.MULTILINE,
|
|
598
|
+
)
|
|
599
|
+
|
|
600
|
+
# Section 0 heading with an empty `Source file: \`\`` line — the second
|
|
601
|
+
# failure shape (writer keeps the heading + Source file row but with an
|
|
602
|
+
# empty backtick value because no carry-in was provided). Same remedy:
|
|
603
|
+
# omit the entire `## 0.` block when carry-in is absent.
|
|
604
|
+
_EMPTY_CARRY_IN_SOURCE_RE = re.compile(
|
|
605
|
+
r"^##[ \t]+0\.[ \t]+Clarification Response Carried In"
|
|
606
|
+
r"[\s\S]{0,400}?Source file:[ \t]*`\s*`",
|
|
607
|
+
re.MULTILINE,
|
|
608
|
+
)
|
|
609
|
+
|
|
610
|
+
# Deprecated section headings removed by the report-format readability
|
|
611
|
+
# pass. Each entry is (regex, human-readable remedy). The regexes are
|
|
612
|
+
# line-anchored to avoid false positives from inline references in prose
|
|
613
|
+
# (e.g. this file itself, or skill documentation that mentions the
|
|
614
|
+
# deprecated names).
|
|
615
|
+
_DEPRECATED_FINAL_REPORT_PATTERNS: tuple[tuple[re.Pattern, str], ...] = (
|
|
616
|
+
(
|
|
617
|
+
re.compile(r"^###[ \t]+4\.5\.8[ \t]+User Approval Request\b", re.MULTILINE),
|
|
618
|
+
"deprecated `### 4.5.8 User Approval Request` stub — the top-of-report "
|
|
619
|
+
"`## User Approval Request (사용자 승인 게이트)` block is the only one. "
|
|
620
|
+
"Delete the §4.5.8 heading + body.",
|
|
621
|
+
),
|
|
622
|
+
(
|
|
623
|
+
re.compile(r"^###[ \t]+4\.5\.9[ \t]+Open Questions\b", re.MULTILINE),
|
|
624
|
+
"deprecated `### 4.5.9 Open Questions` block — promote each row into "
|
|
625
|
+
"`## 5. Clarification Items` with `Kind=decision` (and `Blocks=approval` "
|
|
626
|
+
"if it gated the User Approval Request).",
|
|
627
|
+
),
|
|
628
|
+
(
|
|
629
|
+
re.compile(
|
|
630
|
+
r"^###[ \t]+5\.1[ \t]+(?:추가 자료 요청|Additional Materials)\b",
|
|
631
|
+
re.MULTILINE,
|
|
632
|
+
),
|
|
633
|
+
"deprecated `### 5.1 추가 자료 요청` / `Additional Materials` sub-section — "
|
|
634
|
+
"every clarification item lives as one row of the unified `## 5. "
|
|
635
|
+
"Clarification Items` 8-column table (`Kind=material`).",
|
|
636
|
+
),
|
|
637
|
+
(
|
|
638
|
+
re.compile(
|
|
639
|
+
r"^###[ \t]+5\.2[ \t]+(?:사용자 확인 질문|Questions for the User)\b",
|
|
640
|
+
re.MULTILINE,
|
|
641
|
+
),
|
|
642
|
+
"deprecated `### 5.2 사용자 확인 질문` / `Questions for the User` "
|
|
643
|
+
"sub-section — collapse into the unified `## 5. Clarification Items` "
|
|
644
|
+
"8-column table (`Kind=decision`).",
|
|
645
|
+
),
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
|
|
468
649
|
def validate_report(
|
|
469
650
|
report_path: Path, required_agent_status_entries: list[str], failures: list[str]
|
|
470
651
|
) -> None:
|
|
@@ -486,6 +667,126 @@ def validate_report(
|
|
|
486
667
|
"run `okstra-token-usage.py ... --substitute-final-report <report-path>` during Phase 7"
|
|
487
668
|
)
|
|
488
669
|
|
|
670
|
+
# Catch the "workers typed `0` / `pending` instead of the placeholder"
|
|
671
|
+
# failure mode that bypasses the placeholder check above.
|
|
672
|
+
_scan_token_usage_summary(content, failures)
|
|
673
|
+
|
|
674
|
+
# Verdict Card is mandatory in every final-report (introduced with the
|
|
675
|
+
# report-format readability pass). Missing card means the reader has no
|
|
676
|
+
# at-a-glance index — first decision lives 100+ lines down.
|
|
677
|
+
if _VERDICT_CARD_HEADING_RE.search(content) is None:
|
|
678
|
+
failures.append(
|
|
679
|
+
"final report is missing the top-of-report `## Verdict Card` block — "
|
|
680
|
+
"render it between the report header and the (conditional) Approval "
|
|
681
|
+
"block. Its Verdict Token / Direction / Next Step cells must byte-match "
|
|
682
|
+
"the corresponding cells in `## 2. Final Verdict` and `## 6.` first item."
|
|
683
|
+
)
|
|
684
|
+
|
|
685
|
+
# Reading Confirmation belongs in the worker audit sidecar, not the
|
|
686
|
+
# user-facing final-report.
|
|
687
|
+
if _READING_CONFIRMATION_HEADING_RE.search(content) is not None:
|
|
688
|
+
failures.append(
|
|
689
|
+
"final report contains a `## 0. Reading Confirmation` heading — "
|
|
690
|
+
"Reading Confirmation lives in the worker audit sidecar "
|
|
691
|
+
"(`runs/<task-type>/worker-results/<worker>-audit-<task-type>-<seq>.md`), "
|
|
692
|
+
"never in the final-report."
|
|
693
|
+
)
|
|
694
|
+
|
|
695
|
+
# Empty Section 0 stub — when no carry-in path was provided, the
|
|
696
|
+
# writer must OMIT the `## 0.` heading entirely.
|
|
697
|
+
if _EMPTY_CARRY_IN_RE.search(content) is not None or _EMPTY_CARRY_IN_SOURCE_RE.search(
|
|
698
|
+
content
|
|
699
|
+
) is not None:
|
|
700
|
+
failures.append(
|
|
701
|
+
"final report has an empty `## 0. Clarification Response Carried In "
|
|
702
|
+
"From Previous Run` stub (either the `Source file:` cell is empty or "
|
|
703
|
+
"the body contains `No prior clarification response was provided`). "
|
|
704
|
+
"When no carry-in path was provided, OMIT the entire `## 0.` heading "
|
|
705
|
+
"and body — do NOT emit a placeholder stub."
|
|
706
|
+
)
|
|
707
|
+
|
|
708
|
+
# Deprecated section headings — pre-1.0 hard removal.
|
|
709
|
+
for pattern, remedy in _DEPRECATED_FINAL_REPORT_PATTERNS:
|
|
710
|
+
if pattern.search(content) is not None:
|
|
711
|
+
failures.append(f"final report contains {remedy}")
|
|
712
|
+
|
|
713
|
+
|
|
714
|
+
# Worker-results filename pattern: `<worker-role>-<task-type>-<seq>.md`.
|
|
715
|
+
# Every analysis-worker role name ends in `-worker` (`claude-worker`,
|
|
716
|
+
# `codex-worker`, `gemini-worker`, `report-writer-worker`), so anchor the
|
|
717
|
+
# split on that suffix — otherwise `gemini-worker-error-analysis-001.md`
|
|
718
|
+
# ambiguously parses as `worker=gemini, task=worker-error-analysis`.
|
|
719
|
+
# Audit sidecars (`*-audit-*`) and errors sidecars (`.json`) are not
|
|
720
|
+
# matched here.
|
|
721
|
+
_WORKER_RESULT_BASENAME_RE = re.compile(
|
|
722
|
+
r"^(?P<worker>[a-z][a-z0-9-]*-worker)-(?P<task_type>[a-z][a-z-]*?)-(?P<seq>\d{3})\.md$"
|
|
723
|
+
)
|
|
724
|
+
|
|
725
|
+
|
|
726
|
+
def validate_worker_results_audit(
|
|
727
|
+
report_path: Path, task_type: str, failures: list[str]
|
|
728
|
+
) -> None:
|
|
729
|
+
"""Enforce the worker audit sidecar contract.
|
|
730
|
+
|
|
731
|
+
For every `worker-results/<worker>-<task-type>-<seq>.md` produced by a
|
|
732
|
+
worker (skipping the audit sidecar itself), the validator checks:
|
|
733
|
+
|
|
734
|
+
1. The main worker-results file does NOT contain a `## 0. Reading
|
|
735
|
+
Confirmation` heading. That block moved to the audit sidecar with
|
|
736
|
+
the report-format readability pass.
|
|
737
|
+
2. The matching audit sidecar exists at
|
|
738
|
+
`<worker>-audit-<task-type>-<seq>.md`. Missing sidecar means the
|
|
739
|
+
worker silently skipped the reading-confirmation step.
|
|
740
|
+
"""
|
|
741
|
+
# `report_path` is `runs/<task-type>/reports/final-report-...md`; the
|
|
742
|
+
# sibling `worker-results/` directory holds every worker artifact.
|
|
743
|
+
worker_results_dir = report_path.parent.parent / "worker-results"
|
|
744
|
+
if not worker_results_dir.is_dir():
|
|
745
|
+
# No worker-results directory means no analysis workers ran (e.g.
|
|
746
|
+
# `release-handoff` which is single-lead). Nothing to enforce.
|
|
747
|
+
return
|
|
748
|
+
|
|
749
|
+
for path in sorted(worker_results_dir.glob("*.md")):
|
|
750
|
+
name = path.name
|
|
751
|
+
if "-audit-" in name:
|
|
752
|
+
continue
|
|
753
|
+
match = _WORKER_RESULT_BASENAME_RE.match(name)
|
|
754
|
+
if match is None:
|
|
755
|
+
# Files that don't match the canonical pattern (e.g. ad-hoc
|
|
756
|
+
# notes left by the operator) are out of contract scope.
|
|
757
|
+
continue
|
|
758
|
+
if match.group("task_type") != task_type:
|
|
759
|
+
# Cross-phase artifacts shouldn't appear here; skip rather
|
|
760
|
+
# than fail to keep the check focused on the current phase.
|
|
761
|
+
continue
|
|
762
|
+
|
|
763
|
+
worker_role = match.group("worker")
|
|
764
|
+
seq = match.group("seq")
|
|
765
|
+
rel = path.name
|
|
766
|
+
try:
|
|
767
|
+
content = path.read_text()
|
|
768
|
+
except OSError as exc:
|
|
769
|
+
failures.append(f"worker-results file unreadable: {rel} ({exc})")
|
|
770
|
+
continue
|
|
771
|
+
|
|
772
|
+
if _READING_CONFIRMATION_HEADING_RE.search(content) is not None:
|
|
773
|
+
failures.append(
|
|
774
|
+
f"worker-results file `{rel}` contains a `## 0. Reading "
|
|
775
|
+
f"Confirmation` heading — that block moved to the audit "
|
|
776
|
+
f"sidecar (`{worker_role}-audit-{task_type}-{seq}.md`). "
|
|
777
|
+
f"Remove the §0 heading + body from the main file and "
|
|
778
|
+
f"write a fresh sidecar."
|
|
779
|
+
)
|
|
780
|
+
|
|
781
|
+
audit_path = worker_results_dir / f"{worker_role}-audit-{task_type}-{seq}.md"
|
|
782
|
+
if not audit_path.exists():
|
|
783
|
+
failures.append(
|
|
784
|
+
f"worker `{worker_role}` produced `{rel}` but no audit sidecar "
|
|
785
|
+
f"at `{audit_path.name}` — the sidecar must carry the Reading "
|
|
786
|
+
f"Confirmation block (one short line per input file). Workers "
|
|
787
|
+
f"write this in the same step as the main worker-results file."
|
|
788
|
+
)
|
|
789
|
+
|
|
489
790
|
|
|
490
791
|
def validate_team_state_usage(team_state: dict, failures: list[str]) -> None:
|
|
491
792
|
summary = team_state.get("usageSummary") or {}
|
|
@@ -531,6 +832,70 @@ PLANNING_REQUIRED_SECTIONS = (
|
|
|
531
832
|
"Plan Body Verification",
|
|
532
833
|
)
|
|
533
834
|
|
|
835
|
+
# §4.7 implementation deliverables — substring scan against report body.
|
|
836
|
+
IMPLEMENTATION_REQUIRED_SECTIONS = (
|
|
837
|
+
"Approved Plan Reference",
|
|
838
|
+
"Commit List",
|
|
839
|
+
"Diff Summary",
|
|
840
|
+
"Out-of-plan Edits",
|
|
841
|
+
"Validation Evidence",
|
|
842
|
+
"Verifier Results",
|
|
843
|
+
"Rollback Verification",
|
|
844
|
+
"Routing Recommendation",
|
|
845
|
+
)
|
|
846
|
+
|
|
847
|
+
# §4.8 final-verification deliverables — substring scan against report body.
|
|
848
|
+
FINAL_VERIFICATION_REQUIRED_SECTIONS = (
|
|
849
|
+
"Source Implementation Report",
|
|
850
|
+
"Acceptance Blockers",
|
|
851
|
+
"Residual Risk",
|
|
852
|
+
"Validation Evidence",
|
|
853
|
+
"Read-only Command Log",
|
|
854
|
+
"Routing Recommendation",
|
|
855
|
+
)
|
|
856
|
+
|
|
857
|
+
# Allowed Verdict Token vocabulary, by task-type. `release-handoff` is
|
|
858
|
+
# author-tagged but reads its entry gate from final-verification's report
|
|
859
|
+
# and renders `not-applicable` itself.
|
|
860
|
+
FINAL_VERIFICATION_VERDICT_TOKENS = (
|
|
861
|
+
"accepted",
|
|
862
|
+
"conditional-accept",
|
|
863
|
+
"blocked",
|
|
864
|
+
)
|
|
865
|
+
|
|
866
|
+
# `## 2. Final Verdict` Verdict Token cell — captures the value between
|
|
867
|
+
# backticks on the `Verdict Token` row. Tolerant to extra column whitespace
|
|
868
|
+
# and to leading bold/italic markers in the label cell.
|
|
869
|
+
_FINAL_VERDICT_TOKEN_RE = re.compile(
|
|
870
|
+
r"^\|[ \t]*\*{0,2}Verdict Token\*{0,2}[ \t]*\|[ \t]*`(?P<value>[^`\n]*)`",
|
|
871
|
+
re.MULTILINE,
|
|
872
|
+
)
|
|
873
|
+
|
|
874
|
+
# Verdict Card Verdict Token row (top-of-report at-a-glance). Same shape
|
|
875
|
+
# as `_FINAL_VERDICT_TOKEN_RE` but matched against the first occurrence in
|
|
876
|
+
# the Verdict Card block, scoped to the body between `## Verdict Card`
|
|
877
|
+
# heading and the next `##` heading.
|
|
878
|
+
_VERDICT_CARD_BLOCK_RE = re.compile(
|
|
879
|
+
r"^##[ \t]+Verdict Card[ \t]*$\n(?P<body>.*?)(?=^##[ \t]|\Z)",
|
|
880
|
+
re.DOTALL | re.MULTILINE,
|
|
881
|
+
)
|
|
882
|
+
|
|
883
|
+
# `## 2. Final Verdict` block scope — used to scope the Verdict Token
|
|
884
|
+
# regex so that we don't accidentally match a Verdict Token row that
|
|
885
|
+
# lives in the Verdict Card or anywhere else.
|
|
886
|
+
_FINAL_VERDICT_BLOCK_RE = re.compile(
|
|
887
|
+
r"^##[ \t]+2\.[ \t]+Final Verdict[ \t]*$\n(?P<body>.*?)(?=^##[ \t]|\Z)",
|
|
888
|
+
re.DOTALL | re.MULTILINE,
|
|
889
|
+
)
|
|
890
|
+
|
|
891
|
+
# `## 4.6 Release Handoff Deliverables` and `## 4.6.6 Merge Conflict
|
|
892
|
+
# Probe` are required when task-type == release-handoff. The probe sub-
|
|
893
|
+
# section was retro-added to the template; old runs that predate it ship
|
|
894
|
+
# without it, but new runs must include it.
|
|
895
|
+
_MERGE_CONFLICT_PROBE_HEADING_RE = re.compile(
|
|
896
|
+
r"^###[ \t]+4\.6\.6[ \t]+Merge Conflict Probe\b", re.MULTILINE
|
|
897
|
+
)
|
|
898
|
+
|
|
534
899
|
PLAN_VERIFY_GATE_VALUES = (
|
|
535
900
|
"passed",
|
|
536
901
|
"passed-with-dissent",
|
|
@@ -560,6 +925,92 @@ _APPROVAL_CHECKBOX_RE = re.compile(
|
|
|
560
925
|
)
|
|
561
926
|
|
|
562
927
|
|
|
928
|
+
def _extract_final_verdict_token(content: str) -> str | None:
|
|
929
|
+
"""Return the `Verdict Token` cell value from the `## 2. Final Verdict`
|
|
930
|
+
block, or None when the row is absent. Scoped to §2 so the Verdict
|
|
931
|
+
Card row (which has the same shape) does not shadow the authoritative
|
|
932
|
+
value.
|
|
933
|
+
"""
|
|
934
|
+
block = _FINAL_VERDICT_BLOCK_RE.search(content)
|
|
935
|
+
if block is None:
|
|
936
|
+
return None
|
|
937
|
+
match = _FINAL_VERDICT_TOKEN_RE.search(block.group("body"))
|
|
938
|
+
if match is None:
|
|
939
|
+
return None
|
|
940
|
+
return match.group("value")
|
|
941
|
+
|
|
942
|
+
|
|
943
|
+
def _extract_verdict_card_token(content: str) -> str | None:
|
|
944
|
+
"""Return the `Verdict Token` cell from the Verdict Card block."""
|
|
945
|
+
block = _VERDICT_CARD_BLOCK_RE.search(content)
|
|
946
|
+
if block is None:
|
|
947
|
+
return None
|
|
948
|
+
match = _FINAL_VERDICT_TOKEN_RE.search(block.group("body"))
|
|
949
|
+
if match is None:
|
|
950
|
+
return None
|
|
951
|
+
return match.group("value")
|
|
952
|
+
|
|
953
|
+
|
|
954
|
+
def _validate_verdict_card_consistency(content: str, failures: list[str]) -> None:
|
|
955
|
+
"""Verdict Card is a non-authoritative index of §2. If both blocks
|
|
956
|
+
carry a Verdict Token row, the values MUST byte-match (modulo case
|
|
957
|
+
and surrounding whitespace) — divergence is a contract violation per
|
|
958
|
+
`okstra-report-writer` SKILL.md "Authoring Contract".
|
|
959
|
+
"""
|
|
960
|
+
card_value = _extract_verdict_card_token(content)
|
|
961
|
+
final_value = _extract_final_verdict_token(content)
|
|
962
|
+
if card_value is None or final_value is None:
|
|
963
|
+
# Missing-Card and missing-§2 are surfaced by other checks; this
|
|
964
|
+
# function only enforces the consistency contract between the two.
|
|
965
|
+
return
|
|
966
|
+
if card_value.strip().lower() != final_value.strip().lower():
|
|
967
|
+
failures.append(
|
|
968
|
+
"Verdict Card `Verdict Token` value "
|
|
969
|
+
f"`{card_value}` does not match `## 2. Final Verdict` value "
|
|
970
|
+
f"`{final_value}` — the Card is a non-authoritative index and "
|
|
971
|
+
"MUST byte-match §2. Either fix the Card or update §2; do not "
|
|
972
|
+
"ship divergent values."
|
|
973
|
+
)
|
|
974
|
+
|
|
975
|
+
|
|
976
|
+
def validate_report_views(report_path: Path, failures: list[str]) -> None:
|
|
977
|
+
"""Enforce Phase 7 step 1.5 (BLOCKING) — the slim AI copy and the
|
|
978
|
+
self-contained HTML view must exist next to the final-report MD,
|
|
979
|
+
and both must satisfy the contract checked by
|
|
980
|
+
``validators/validate-report-views.py``.
|
|
981
|
+
|
|
982
|
+
Delegated to that script as a subprocess so the contract surface
|
|
983
|
+
stays in one place. Failures from the delegate are folded back as
|
|
984
|
+
structured ``report-views: <line>`` failure strings.
|
|
985
|
+
"""
|
|
986
|
+
import subprocess
|
|
987
|
+
|
|
988
|
+
here = Path(__file__).resolve().parent
|
|
989
|
+
delegate = here / "validate-report-views.py"
|
|
990
|
+
if not delegate.is_file():
|
|
991
|
+
# The delegate is part of the same install bundle; absence is
|
|
992
|
+
# itself a broken installation rather than an optional feature.
|
|
993
|
+
failures.append(
|
|
994
|
+
f"validate-report-views.py missing under {here} — okstra install incomplete"
|
|
995
|
+
)
|
|
996
|
+
return
|
|
997
|
+
try:
|
|
998
|
+
proc = subprocess.run(
|
|
999
|
+
[sys.executable, str(delegate), str(report_path)],
|
|
1000
|
+
capture_output=True,
|
|
1001
|
+
text=True,
|
|
1002
|
+
timeout=30,
|
|
1003
|
+
)
|
|
1004
|
+
except subprocess.TimeoutExpired:
|
|
1005
|
+
failures.append("report-views validator timed out (30s)")
|
|
1006
|
+
return
|
|
1007
|
+
if proc.returncode != 0:
|
|
1008
|
+
for line in proc.stderr.splitlines():
|
|
1009
|
+
line = line.strip()
|
|
1010
|
+
if line:
|
|
1011
|
+
failures.append(f"report-views: {line}")
|
|
1012
|
+
|
|
1013
|
+
|
|
563
1014
|
def validate_phase_boundary(
|
|
564
1015
|
task_type: str,
|
|
565
1016
|
report_path: Path,
|
|
@@ -579,11 +1030,55 @@ def validate_phase_boundary(
|
|
|
579
1030
|
MUST be absent (lead converted findings into Clarification rows
|
|
580
1031
|
instead of opening the gate).
|
|
581
1032
|
"""
|
|
582
|
-
if task_type != "implementation-planning":
|
|
583
|
-
return
|
|
584
1033
|
if not report_path.exists():
|
|
585
1034
|
return
|
|
586
1035
|
content = report_path.read_text()
|
|
1036
|
+
|
|
1037
|
+
# Verdict Card vs §2. Final Verdict Verdict Token consistency. The Card
|
|
1038
|
+
# is a non-authoritative index; divergence is a contract violation.
|
|
1039
|
+
_validate_verdict_card_consistency(content, failures)
|
|
1040
|
+
|
|
1041
|
+
if task_type == "implementation":
|
|
1042
|
+
for needle in IMPLEMENTATION_REQUIRED_SECTIONS:
|
|
1043
|
+
if needle not in content:
|
|
1044
|
+
failures.append(
|
|
1045
|
+
"implementation report is missing required §4.7 "
|
|
1046
|
+
f"deliverable section: `{needle}`"
|
|
1047
|
+
)
|
|
1048
|
+
|
|
1049
|
+
if task_type == "final-verification":
|
|
1050
|
+
for needle in FINAL_VERIFICATION_REQUIRED_SECTIONS:
|
|
1051
|
+
if needle not in content:
|
|
1052
|
+
failures.append(
|
|
1053
|
+
"final-verification report is missing required §4.8 "
|
|
1054
|
+
f"deliverable section: `{needle}`"
|
|
1055
|
+
)
|
|
1056
|
+
token_value = _extract_final_verdict_token(content)
|
|
1057
|
+
if token_value is None:
|
|
1058
|
+
failures.append(
|
|
1059
|
+
"final-verification report `## 2. Final Verdict` table is "
|
|
1060
|
+
"missing the `Verdict Token` row — required by the release-"
|
|
1061
|
+
"handoff entry gate."
|
|
1062
|
+
)
|
|
1063
|
+
elif token_value.strip().lower() not in FINAL_VERIFICATION_VERDICT_TOKENS:
|
|
1064
|
+
failures.append(
|
|
1065
|
+
"final-verification report `Verdict Token` value "
|
|
1066
|
+
f"`{token_value}` is not one of "
|
|
1067
|
+
f"{', '.join(FINAL_VERIFICATION_VERDICT_TOKENS)}."
|
|
1068
|
+
)
|
|
1069
|
+
|
|
1070
|
+
if task_type == "release-handoff":
|
|
1071
|
+
if _MERGE_CONFLICT_PROBE_HEADING_RE.search(content) is None:
|
|
1072
|
+
failures.append(
|
|
1073
|
+
"release-handoff report is missing `### 4.6.6 Merge Conflict "
|
|
1074
|
+
"Probe` sub-section — required by the release-handoff profile "
|
|
1075
|
+
"(self-review 6, merge-conflict probe audit). When the run is "
|
|
1076
|
+
"`local only` / `skip`, record the single line `- Not run "
|
|
1077
|
+
"(user picked local only or skip).` under the heading."
|
|
1078
|
+
)
|
|
1079
|
+
|
|
1080
|
+
if task_type != "implementation-planning":
|
|
1081
|
+
return
|
|
587
1082
|
for needle in PLANNING_REQUIRED_SECTIONS:
|
|
588
1083
|
if needle not in content:
|
|
589
1084
|
failures.append(
|
|
@@ -812,7 +1307,24 @@ def attempt_token_usage_autofix(
|
|
|
812
1307
|
team_state_path.write_text(
|
|
813
1308
|
json.dumps(updated, indent=2, ensure_ascii=False) + "\n"
|
|
814
1309
|
)
|
|
815
|
-
|
|
1310
|
+
try:
|
|
1311
|
+
replaced = substitute_final_report(report_path, updated)
|
|
1312
|
+
except Exception as exc: # noqa: BLE001
|
|
1313
|
+
# `SubstituteRefusedError` (or any unexpected substitution
|
|
1314
|
+
# failure) — report it as an accuracy failure so the validator
|
|
1315
|
+
# surfaces a concrete remediation instead of silently shipping
|
|
1316
|
+
# a report with zeros / sentinels.
|
|
1317
|
+
return "accuracy-failed", [
|
|
1318
|
+
f"Phase 7 token-usage substitution refused: {exc}"
|
|
1319
|
+
]
|
|
1320
|
+
|
|
1321
|
+
# Phase 7 step 1.5 is BLOCKING and the autofix just mutated the
|
|
1322
|
+
# source MD — any pre-existing slim/html sibling is now stale by
|
|
1323
|
+
# construction. Re-render the derived views in lock-step so the
|
|
1324
|
+
# downstream report-views validator does not trip over the
|
|
1325
|
+
# autofix's own side effect.
|
|
1326
|
+
rerender_note = _rerender_report_views_after_autofix(report_path)
|
|
1327
|
+
|
|
816
1328
|
detail = (
|
|
817
1329
|
f"replaced {replaced} placeholder(s)"
|
|
818
1330
|
if replaced > 0
|
|
@@ -820,7 +1332,52 @@ def attempt_token_usage_autofix(
|
|
|
820
1332
|
if replaced == 0
|
|
821
1333
|
else "report file missing"
|
|
822
1334
|
)
|
|
823
|
-
|
|
1335
|
+
msg = f"usageSummary repopulated; {detail}"
|
|
1336
|
+
if rerender_note:
|
|
1337
|
+
msg += f"; {rerender_note}"
|
|
1338
|
+
return "recovered", [msg]
|
|
1339
|
+
|
|
1340
|
+
|
|
1341
|
+
def _rerender_report_views_after_autofix(report_path: Path) -> str:
|
|
1342
|
+
"""Re-render ``*.slim.md`` and ``*.html`` siblings against the
|
|
1343
|
+
just-substituted MD. Returns a short status note for the autofix
|
|
1344
|
+
message (empty on no-op, descriptive on failure).
|
|
1345
|
+
"""
|
|
1346
|
+
if not report_path.is_file():
|
|
1347
|
+
return ""
|
|
1348
|
+
try:
|
|
1349
|
+
# Late import — keeps validate-run.py importable in environments
|
|
1350
|
+
# that don't ship report_views (older installs).
|
|
1351
|
+
scripts_dir = Path(__file__).resolve().parent.parent / "scripts"
|
|
1352
|
+
if str(scripts_dir) not in sys.path:
|
|
1353
|
+
sys.path.insert(0, str(scripts_dir))
|
|
1354
|
+
from okstra_ctl.report_views import RunMeta, render_both_views
|
|
1355
|
+
templates_dir = (
|
|
1356
|
+
Path(__file__).resolve().parent.parent / "templates" / "reports"
|
|
1357
|
+
)
|
|
1358
|
+
css = (templates_dir / "report.css").read_text(encoding="utf-8")
|
|
1359
|
+
js = (templates_dir / "report.js").read_text(encoding="utf-8")
|
|
1360
|
+
except Exception as exc: # noqa: BLE001 — best-effort
|
|
1361
|
+
return f"report-views re-render skipped ({exc})"
|
|
1362
|
+
|
|
1363
|
+
# Infer task-key / task-type / seq from path + body when possible;
|
|
1364
|
+
# fall back to placeholders so the digest comparison (the actual
|
|
1365
|
+
# contract) still works.
|
|
1366
|
+
text = report_path.read_text(encoding="utf-8")
|
|
1367
|
+
task_type_m = re.search(r"^- Task Type:\s*(\S+)", text, re.MULTILINE)
|
|
1368
|
+
task_key_m = re.search(r"^- Task Key:\s*(\S+)", text, re.MULTILINE)
|
|
1369
|
+
seq_m = re.search(r"-(\d+)\.md$", report_path.name)
|
|
1370
|
+
meta = RunMeta(
|
|
1371
|
+
task_key=task_key_m.group(1) if task_key_m else "unknown",
|
|
1372
|
+
task_type=task_type_m.group(1) if task_type_m else "unknown",
|
|
1373
|
+
seq=seq_m.group(1) if seq_m else "000",
|
|
1374
|
+
source_report=report_path.name,
|
|
1375
|
+
)
|
|
1376
|
+
try:
|
|
1377
|
+
render_both_views(report_path, run_meta=meta, css=css, js=js)
|
|
1378
|
+
except Exception as exc: # noqa: BLE001
|
|
1379
|
+
return f"report-views re-render failed: {exc}"
|
|
1380
|
+
return "report-views re-rendered"
|
|
824
1381
|
|
|
825
1382
|
|
|
826
1383
|
def main() -> int:
|
|
@@ -893,6 +1450,9 @@ def main() -> int:
|
|
|
893
1450
|
|
|
894
1451
|
task_type = str(task_manifest.get("taskType") or run_manifest.get("taskType") or "").strip()
|
|
895
1452
|
validate_phase_boundary(task_type, report_path, failures)
|
|
1453
|
+
if task_type:
|
|
1454
|
+
validate_worker_results_audit(report_path, task_type, failures)
|
|
1455
|
+
validate_report_views(report_path, failures)
|
|
896
1456
|
|
|
897
1457
|
validation_status = "passed" if not failures else "failed"
|
|
898
1458
|
update_validation_metadata(
|
|
@@ -109,6 +109,10 @@ fi
|
|
|
109
109
|
pass "Primary task discovery artifacts are valid"
|
|
110
110
|
|
|
111
111
|
step "Preparing validator fixture artifacts for the primary task"
|
|
112
|
+
# Fixture needs to render Phase 7 step 1.5 sibling artifacts; pass the
|
|
113
|
+
# repo root so the heredoc can import okstra_ctl.report_views and load
|
|
114
|
+
# inline assets from templates/reports/.
|
|
115
|
+
export OKSTRA_WORKSPACE_ROOT_FOR_FIXTURE="$WORKSPACE_ROOT"
|
|
112
116
|
if ! prepare_run_validator_fixture "$PRIMARY_TASK_GROUP" "$PRIMARY_TASK_ID" codex; then
|
|
113
117
|
fail "Failed to prepare validator fixture artifacts for the primary task"
|
|
114
118
|
fi
|