okstra 0.27.0 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. package/bin/okstra +1 -0
  2. package/docs/superpowers/plans/2026-05-17-dual-format-final-report.md +167 -0
  3. package/package.json +1 -1
  4. package/runtime/BUILD.json +2 -2
  5. package/runtime/agents/workers/claude-worker.md +6 -5
  6. package/runtime/agents/workers/codex-worker.md +5 -4
  7. package/runtime/agents/workers/gemini-worker.md +5 -4
  8. package/runtime/agents/workers/report-writer-worker.md +10 -3
  9. package/runtime/bin/okstra-render-report-views.py +129 -0
  10. package/runtime/prompts/launch.template.md +1 -1
  11. package/runtime/prompts/profiles/_common-contract.md +12 -4
  12. package/runtime/prompts/profiles/implementation-planning.md +1 -1
  13. package/runtime/python/okstra_ctl/report_views.py +701 -0
  14. package/runtime/python/okstra_token_usage/cli.py +9 -2
  15. package/runtime/python/okstra_token_usage/report.py +32 -3
  16. package/runtime/skills/okstra-convergence/SKILL.md +2 -2
  17. package/runtime/skills/okstra-report-writer/SKILL.md +25 -8
  18. package/runtime/skills/okstra-team-contract/SKILL.md +16 -15
  19. package/runtime/templates/reports/final-report.template.md +398 -211
  20. package/runtime/templates/reports/report.css +151 -0
  21. package/runtime/templates/reports/report.js +163 -0
  22. package/runtime/templates/reports/user-response.template.md +69 -0
  23. package/runtime/validators/lib/fixtures.sh +76 -2
  24. package/runtime/validators/validate-report-views.py +283 -0
  25. package/runtime/validators/validate-run.py +564 -4
  26. package/runtime/validators/validate-workflow.sh +4 -0
  27. package/src/install.mjs +1 -0
  28. package/src/render-views.mjs +67 -0
@@ -465,6 +465,187 @@ TOKEN_PLACEHOLDERS = (
465
465
  )
466
466
 
467
467
 
468
+ # Token Usage Summary section between its `##` heading and the next `##`
469
+ # heading (or end-of-file). Matched non-greedily so the body of the next
470
+ # section never bleeds in.
471
+ _TOKEN_USAGE_SECTION_RE = re.compile(
472
+ r"^##[ \t]+Token Usage Summary[ \t]*$\n(?P<body>.*?)(?=^##[ \t]|\Z)",
473
+ re.DOTALL | re.MULTILINE,
474
+ )
475
+
476
+ # Backtick-wrapped cell values inside a Token Usage Summary row. We use
477
+ # this to inspect actual cell contents rather than fighting markdown
478
+ # table parsing rules.
479
+ _TOKEN_USAGE_BACKTICK_CELL_RE = re.compile(r"`([^`\n]*)`")
480
+
481
+ # Sentinel words workers have been observed typing INSTEAD of leaving the
482
+ # `{{...}}` placeholders verbatim. These bypass the placeholder check
483
+ # because they are valid string values; we must reject them by name.
484
+ _TOKEN_USAGE_SENTINEL_VALUES = frozenset(
485
+ {
486
+ "pending",
487
+ "n/a",
488
+ "na",
489
+ "tbd",
490
+ "tba",
491
+ "not-collected",
492
+ "not collected",
493
+ "--",
494
+ "?",
495
+ "unknown",
496
+ "",
497
+ }
498
+ )
499
+
500
+ # Numeric "valid zero" patterns. These ARE allowed in the CLI row when no
501
+ # Codex/Gemini CLI work was billed; rejected everywhere else.
502
+ _TOKEN_USAGE_ZERO_VALUES = frozenset({"0", "$0.00", "$0", "0.00"})
503
+
504
+
505
+ def _scan_token_usage_summary(content: str, failures: list[str]) -> None:
506
+ """Reject sentinel / zero values that workers typed into the Token
507
+ Usage Summary table instead of leaving the `{{...}}` placeholders
508
+ verbatim for Phase 7 substitution.
509
+
510
+ The placeholder check (`TOKEN_PLACEHOLDERS`) above catches the
511
+ "didn't substitute" case; this scanner catches the "substituted with
512
+ a sentinel string" case which is invisible to that check and was the
513
+ real source of `0` / `$0.00` / `pending` shipping in real reports.
514
+
515
+ Rules:
516
+ - The Codex/Gemini CLI 추가 비용 row may carry an empty cell or
517
+ `$0.00` (no CLI work was billed). Sentinel words are still
518
+ rejected.
519
+ - Every other row's backtick-wrapped cells must be either a
520
+ comma-grouped integer (e.g. `1,234,567`) or a USD value (`$5.43`).
521
+ Zero values (`0` / `$0.00`) are rejected because no okstra run
522
+ consumes zero tokens — a zero there means the writer fabricated a
523
+ stub.
524
+ """
525
+ match = _TOKEN_USAGE_SECTION_RE.search(content)
526
+ if match is None:
527
+ # The Token Usage Summary section is required in every report
528
+ # (the template emits it unconditionally). A missing section is
529
+ # surfaced elsewhere by the placeholder check (which would also
530
+ # not fire — so we add a dedicated failure here).
531
+ failures.append(
532
+ "final report is missing the `## Token Usage Summary` section — "
533
+ "the template renders it unconditionally and Phase 7 substitution "
534
+ "depends on it being present."
535
+ )
536
+ return
537
+
538
+ body = match.group("body")
539
+ for raw_line in body.splitlines():
540
+ line = raw_line.strip()
541
+ if not line.startswith("|") or line.startswith("|--"):
542
+ # Skip non-table lines, the header separator (`|------|`), and
543
+ # blank lines. Header rows have no backticks so they self-skip.
544
+ continue
545
+ cells = [c.strip() for c in line.strip("|").split("|")]
546
+ if not cells:
547
+ continue
548
+ label_cell = cells[0].strip("* `")
549
+ # The CLI row's label always contains the word "CLI" — matching
550
+ # `Codex/Gemini CLI 추가 비용` regardless of formatting variations.
551
+ is_cli_row = "CLI" in label_cell
552
+ for raw_cell in cells[1:]:
553
+ for value in _TOKEN_USAGE_BACKTICK_CELL_RE.findall(raw_cell):
554
+ stripped = value.strip()
555
+ lowered = stripped.lower()
556
+ if lowered in _TOKEN_USAGE_SENTINEL_VALUES:
557
+ failures.append(
558
+ "Token Usage Summary cell contains sentinel value "
559
+ f"`{stripped}` on row labelled `{label_cell or '<unlabeled>'}` — "
560
+ "leave the `{{...}}` placeholder verbatim until "
561
+ "`okstra-token-usage.py --substitute-final-report` runs "
562
+ "in Phase 7."
563
+ )
564
+ continue
565
+ if stripped in _TOKEN_USAGE_ZERO_VALUES and not is_cli_row:
566
+ failures.append(
567
+ f"Token Usage Summary row `{label_cell or '<unlabeled>'}` has "
568
+ f"a zero value `{stripped}` — no okstra run consumes zero "
569
+ "tokens. Re-run `python3 scripts/okstra-token-usage.py "
570
+ "<team-state> --write --summary --substitute-final-report "
571
+ "<report-path>` to repopulate from session jsonls. The "
572
+ "Codex/Gemini CLI row is the only place `$0.00` is "
573
+ "allowed (when no CLI work was billed)."
574
+ )
575
+
576
+
577
+ # Verdict Card heading (mandatory top-of-report at-a-glance block introduced
578
+ # with the report-format readability pass). Matches `## Verdict Card` only as
579
+ # a section heading line (not as inline text inside a paragraph or table).
580
+ _VERDICT_CARD_HEADING_RE = re.compile(r"^##[ \t]+Verdict Card\b", re.MULTILINE)
581
+
582
+ # Reading Confirmation heading must NOT appear in the final-report — it
583
+ # belongs in the worker audit sidecar (`<worker>-audit-<task-type>-<seq>.md`).
584
+ _READING_CONFIRMATION_HEADING_RE = re.compile(
585
+ r"^##[ \t]+0\.[ \t]+Reading Confirmation\b", re.MULTILINE
586
+ )
587
+
588
+ # Empty Section 0 (Clarification Response Carried In) stub. When no
589
+ # carry-in path is provided, the writer must OMIT the `## 0.` heading
590
+ # entirely — emitting the heading followed by the "No prior clarification
591
+ # response was provided" stub line is the recurring failure mode this
592
+ # regex catches. The 400-char window after the heading covers the stub
593
+ # line + any boilerplate without crossing into the next section.
594
+ _EMPTY_CARRY_IN_RE = re.compile(
595
+ r"^##[ \t]+0\.[ \t]+Clarification Response Carried In"
596
+ r"[\s\S]{0,400}?No prior clarification response was provided",
597
+ re.MULTILINE,
598
+ )
599
+
600
+ # Section 0 heading with an empty `Source file: \`\`` line — the second
601
+ # failure shape (writer keeps the heading + Source file row but with an
602
+ # empty backtick value because no carry-in was provided). Same remedy:
603
+ # omit the entire `## 0.` block when carry-in is absent.
604
+ _EMPTY_CARRY_IN_SOURCE_RE = re.compile(
605
+ r"^##[ \t]+0\.[ \t]+Clarification Response Carried In"
606
+ r"[\s\S]{0,400}?Source file:[ \t]*`\s*`",
607
+ re.MULTILINE,
608
+ )
609
+
610
+ # Deprecated section headings removed by the report-format readability
611
+ # pass. Each entry is (regex, human-readable remedy). The regexes are
612
+ # line-anchored to avoid false positives from inline references in prose
613
+ # (e.g. this file itself, or skill documentation that mentions the
614
+ # deprecated names).
615
+ _DEPRECATED_FINAL_REPORT_PATTERNS: tuple[tuple[re.Pattern, str], ...] = (
616
+ (
617
+ re.compile(r"^###[ \t]+4\.5\.8[ \t]+User Approval Request\b", re.MULTILINE),
618
+ "deprecated `### 4.5.8 User Approval Request` stub — the top-of-report "
619
+ "`## User Approval Request (사용자 승인 게이트)` block is the only one. "
620
+ "Delete the §4.5.8 heading + body.",
621
+ ),
622
+ (
623
+ re.compile(r"^###[ \t]+4\.5\.9[ \t]+Open Questions\b", re.MULTILINE),
624
+ "deprecated `### 4.5.9 Open Questions` block — promote each row into "
625
+ "`## 5. Clarification Items` with `Kind=decision` (and `Blocks=approval` "
626
+ "if it gated the User Approval Request).",
627
+ ),
628
+ (
629
+ re.compile(
630
+ r"^###[ \t]+5\.1[ \t]+(?:추가 자료 요청|Additional Materials)\b",
631
+ re.MULTILINE,
632
+ ),
633
+ "deprecated `### 5.1 추가 자료 요청` / `Additional Materials` sub-section — "
634
+ "every clarification item lives as one row of the unified `## 5. "
635
+ "Clarification Items` 8-column table (`Kind=material`).",
636
+ ),
637
+ (
638
+ re.compile(
639
+ r"^###[ \t]+5\.2[ \t]+(?:사용자 확인 질문|Questions for the User)\b",
640
+ re.MULTILINE,
641
+ ),
642
+ "deprecated `### 5.2 사용자 확인 질문` / `Questions for the User` "
643
+ "sub-section — collapse into the unified `## 5. Clarification Items` "
644
+ "8-column table (`Kind=decision`).",
645
+ ),
646
+ )
647
+
648
+
468
649
  def validate_report(
469
650
  report_path: Path, required_agent_status_entries: list[str], failures: list[str]
470
651
  ) -> None:
@@ -486,6 +667,126 @@ def validate_report(
486
667
  "run `okstra-token-usage.py ... --substitute-final-report <report-path>` during Phase 7"
487
668
  )
488
669
 
670
+ # Catch the "workers typed `0` / `pending` instead of the placeholder"
671
+ # failure mode that bypasses the placeholder check above.
672
+ _scan_token_usage_summary(content, failures)
673
+
674
+ # Verdict Card is mandatory in every final-report (introduced with the
675
+ # report-format readability pass). Missing card means the reader has no
676
+ # at-a-glance index — first decision lives 100+ lines down.
677
+ if _VERDICT_CARD_HEADING_RE.search(content) is None:
678
+ failures.append(
679
+ "final report is missing the top-of-report `## Verdict Card` block — "
680
+ "render it between the report header and the (conditional) Approval "
681
+ "block. Its Verdict Token / Direction / Next Step cells must byte-match "
682
+ "the corresponding cells in `## 2. Final Verdict` and `## 6.` first item."
683
+ )
684
+
685
+ # Reading Confirmation belongs in the worker audit sidecar, not the
686
+ # user-facing final-report.
687
+ if _READING_CONFIRMATION_HEADING_RE.search(content) is not None:
688
+ failures.append(
689
+ "final report contains a `## 0. Reading Confirmation` heading — "
690
+ "Reading Confirmation lives in the worker audit sidecar "
691
+ "(`runs/<task-type>/worker-results/<worker>-audit-<task-type>-<seq>.md`), "
692
+ "never in the final-report."
693
+ )
694
+
695
+ # Empty Section 0 stub — when no carry-in path was provided, the
696
+ # writer must OMIT the `## 0.` heading entirely.
697
+ if _EMPTY_CARRY_IN_RE.search(content) is not None or _EMPTY_CARRY_IN_SOURCE_RE.search(
698
+ content
699
+ ) is not None:
700
+ failures.append(
701
+ "final report has an empty `## 0. Clarification Response Carried In "
702
+ "From Previous Run` stub (either the `Source file:` cell is empty or "
703
+ "the body contains `No prior clarification response was provided`). "
704
+ "When no carry-in path was provided, OMIT the entire `## 0.` heading "
705
+ "and body — do NOT emit a placeholder stub."
706
+ )
707
+
708
+ # Deprecated section headings — pre-1.0 hard removal.
709
+ for pattern, remedy in _DEPRECATED_FINAL_REPORT_PATTERNS:
710
+ if pattern.search(content) is not None:
711
+ failures.append(f"final report contains {remedy}")
712
+
713
+
714
+ # Worker-results filename pattern: `<worker-role>-<task-type>-<seq>.md`.
715
+ # Every analysis-worker role name ends in `-worker` (`claude-worker`,
716
+ # `codex-worker`, `gemini-worker`, `report-writer-worker`), so anchor the
717
+ # split on that suffix — otherwise `gemini-worker-error-analysis-001.md`
718
+ # ambiguously parses as `worker=gemini, task=worker-error-analysis`.
719
+ # Audit sidecars (`*-audit-*`) and errors sidecars (`.json`) are not
720
+ # matched here.
721
+ _WORKER_RESULT_BASENAME_RE = re.compile(
722
+ r"^(?P<worker>[a-z][a-z0-9-]*-worker)-(?P<task_type>[a-z][a-z-]*?)-(?P<seq>\d{3})\.md$"
723
+ )
724
+
725
+
726
+ def validate_worker_results_audit(
727
+ report_path: Path, task_type: str, failures: list[str]
728
+ ) -> None:
729
+ """Enforce the worker audit sidecar contract.
730
+
731
+ For every `worker-results/<worker>-<task-type>-<seq>.md` produced by a
732
+ worker (skipping the audit sidecar itself), the validator checks:
733
+
734
+ 1. The main worker-results file does NOT contain a `## 0. Reading
735
+ Confirmation` heading. That block moved to the audit sidecar with
736
+ the report-format readability pass.
737
+ 2. The matching audit sidecar exists at
738
+ `<worker>-audit-<task-type>-<seq>.md`. Missing sidecar means the
739
+ worker silently skipped the reading-confirmation step.
740
+ """
741
+ # `report_path` is `runs/<task-type>/reports/final-report-...md`; the
742
+ # sibling `worker-results/` directory holds every worker artifact.
743
+ worker_results_dir = report_path.parent.parent / "worker-results"
744
+ if not worker_results_dir.is_dir():
745
+ # No worker-results directory means no analysis workers ran (e.g.
746
+ # `release-handoff` which is single-lead). Nothing to enforce.
747
+ return
748
+
749
+ for path in sorted(worker_results_dir.glob("*.md")):
750
+ name = path.name
751
+ if "-audit-" in name:
752
+ continue
753
+ match = _WORKER_RESULT_BASENAME_RE.match(name)
754
+ if match is None:
755
+ # Files that don't match the canonical pattern (e.g. ad-hoc
756
+ # notes left by the operator) are out of contract scope.
757
+ continue
758
+ if match.group("task_type") != task_type:
759
+ # Cross-phase artifacts shouldn't appear here; skip rather
760
+ # than fail to keep the check focused on the current phase.
761
+ continue
762
+
763
+ worker_role = match.group("worker")
764
+ seq = match.group("seq")
765
+ rel = path.name
766
+ try:
767
+ content = path.read_text()
768
+ except OSError as exc:
769
+ failures.append(f"worker-results file unreadable: {rel} ({exc})")
770
+ continue
771
+
772
+ if _READING_CONFIRMATION_HEADING_RE.search(content) is not None:
773
+ failures.append(
774
+ f"worker-results file `{rel}` contains a `## 0. Reading "
775
+ f"Confirmation` heading — that block moved to the audit "
776
+ f"sidecar (`{worker_role}-audit-{task_type}-{seq}.md`). "
777
+ f"Remove the §0 heading + body from the main file and "
778
+ f"write a fresh sidecar."
779
+ )
780
+
781
+ audit_path = worker_results_dir / f"{worker_role}-audit-{task_type}-{seq}.md"
782
+ if not audit_path.exists():
783
+ failures.append(
784
+ f"worker `{worker_role}` produced `{rel}` but no audit sidecar "
785
+ f"at `{audit_path.name}` — the sidecar must carry the Reading "
786
+ f"Confirmation block (one short line per input file). Workers "
787
+ f"write this in the same step as the main worker-results file."
788
+ )
789
+
489
790
 
490
791
  def validate_team_state_usage(team_state: dict, failures: list[str]) -> None:
491
792
  summary = team_state.get("usageSummary") or {}
@@ -531,6 +832,70 @@ PLANNING_REQUIRED_SECTIONS = (
531
832
  "Plan Body Verification",
532
833
  )
533
834
 
835
+ # §4.7 implementation deliverables — substring scan against report body.
836
+ IMPLEMENTATION_REQUIRED_SECTIONS = (
837
+ "Approved Plan Reference",
838
+ "Commit List",
839
+ "Diff Summary",
840
+ "Out-of-plan Edits",
841
+ "Validation Evidence",
842
+ "Verifier Results",
843
+ "Rollback Verification",
844
+ "Routing Recommendation",
845
+ )
846
+
847
+ # §4.8 final-verification deliverables — substring scan against report body.
848
+ FINAL_VERIFICATION_REQUIRED_SECTIONS = (
849
+ "Source Implementation Report",
850
+ "Acceptance Blockers",
851
+ "Residual Risk",
852
+ "Validation Evidence",
853
+ "Read-only Command Log",
854
+ "Routing Recommendation",
855
+ )
856
+
857
+ # Allowed Verdict Token vocabulary, by task-type. `release-handoff` is
858
+ # author-tagged but reads its entry gate from final-verification's report
859
+ # and renders `not-applicable` itself.
860
+ FINAL_VERIFICATION_VERDICT_TOKENS = (
861
+ "accepted",
862
+ "conditional-accept",
863
+ "blocked",
864
+ )
865
+
866
+ # `## 2. Final Verdict` Verdict Token cell — captures the value between
867
+ # backticks on the `Verdict Token` row. Tolerant to extra column whitespace
868
+ # and to leading bold/italic markers in the label cell.
869
+ _FINAL_VERDICT_TOKEN_RE = re.compile(
870
+ r"^\|[ \t]*\*{0,2}Verdict Token\*{0,2}[ \t]*\|[ \t]*`(?P<value>[^`\n]*)`",
871
+ re.MULTILINE,
872
+ )
873
+
874
+ # Verdict Card Verdict Token row (top-of-report at-a-glance). Same shape
875
+ # as `_FINAL_VERDICT_TOKEN_RE` but matched against the first occurrence in
876
+ # the Verdict Card block, scoped to the body between `## Verdict Card`
877
+ # heading and the next `##` heading.
878
+ _VERDICT_CARD_BLOCK_RE = re.compile(
879
+ r"^##[ \t]+Verdict Card[ \t]*$\n(?P<body>.*?)(?=^##[ \t]|\Z)",
880
+ re.DOTALL | re.MULTILINE,
881
+ )
882
+
883
+ # `## 2. Final Verdict` block scope — used to scope the Verdict Token
884
+ # regex so that we don't accidentally match a Verdict Token row that
885
+ # lives in the Verdict Card or anywhere else.
886
+ _FINAL_VERDICT_BLOCK_RE = re.compile(
887
+ r"^##[ \t]+2\.[ \t]+Final Verdict[ \t]*$\n(?P<body>.*?)(?=^##[ \t]|\Z)",
888
+ re.DOTALL | re.MULTILINE,
889
+ )
890
+
891
+ # `## 4.6 Release Handoff Deliverables` and `## 4.6.6 Merge Conflict
892
+ # Probe` are required when task-type == release-handoff. The probe sub-
893
+ # section was retro-added to the template; old runs that predate it ship
894
+ # without it, but new runs must include it.
895
+ _MERGE_CONFLICT_PROBE_HEADING_RE = re.compile(
896
+ r"^###[ \t]+4\.6\.6[ \t]+Merge Conflict Probe\b", re.MULTILINE
897
+ )
898
+
534
899
  PLAN_VERIFY_GATE_VALUES = (
535
900
  "passed",
536
901
  "passed-with-dissent",
@@ -560,6 +925,92 @@ _APPROVAL_CHECKBOX_RE = re.compile(
560
925
  )
561
926
 
562
927
 
928
+ def _extract_final_verdict_token(content: str) -> str | None:
929
+ """Return the `Verdict Token` cell value from the `## 2. Final Verdict`
930
+ block, or None when the row is absent. Scoped to §2 so the Verdict
931
+ Card row (which has the same shape) does not shadow the authoritative
932
+ value.
933
+ """
934
+ block = _FINAL_VERDICT_BLOCK_RE.search(content)
935
+ if block is None:
936
+ return None
937
+ match = _FINAL_VERDICT_TOKEN_RE.search(block.group("body"))
938
+ if match is None:
939
+ return None
940
+ return match.group("value")
941
+
942
+
943
+ def _extract_verdict_card_token(content: str) -> str | None:
944
+ """Return the `Verdict Token` cell from the Verdict Card block."""
945
+ block = _VERDICT_CARD_BLOCK_RE.search(content)
946
+ if block is None:
947
+ return None
948
+ match = _FINAL_VERDICT_TOKEN_RE.search(block.group("body"))
949
+ if match is None:
950
+ return None
951
+ return match.group("value")
952
+
953
+
954
+ def _validate_verdict_card_consistency(content: str, failures: list[str]) -> None:
955
+ """Verdict Card is a non-authoritative index of §2. If both blocks
956
+ carry a Verdict Token row, the values MUST byte-match (modulo case
957
+ and surrounding whitespace) — divergence is a contract violation per
958
+ `okstra-report-writer` SKILL.md "Authoring Contract".
959
+ """
960
+ card_value = _extract_verdict_card_token(content)
961
+ final_value = _extract_final_verdict_token(content)
962
+ if card_value is None or final_value is None:
963
+ # Missing-Card and missing-§2 are surfaced by other checks; this
964
+ # function only enforces the consistency contract between the two.
965
+ return
966
+ if card_value.strip().lower() != final_value.strip().lower():
967
+ failures.append(
968
+ "Verdict Card `Verdict Token` value "
969
+ f"`{card_value}` does not match `## 2. Final Verdict` value "
970
+ f"`{final_value}` — the Card is a non-authoritative index and "
971
+ "MUST byte-match §2. Either fix the Card or update §2; do not "
972
+ "ship divergent values."
973
+ )
974
+
975
+
976
+ def validate_report_views(report_path: Path, failures: list[str]) -> None:
977
+ """Enforce Phase 7 step 1.5 (BLOCKING) — the slim AI copy and the
978
+ self-contained HTML view must exist next to the final-report MD,
979
+ and both must satisfy the contract checked by
980
+ ``validators/validate-report-views.py``.
981
+
982
+ Delegated to that script as a subprocess so the contract surface
983
+ stays in one place. Failures from the delegate are folded back as
984
+ structured ``report-views: <line>`` failure strings.
985
+ """
986
+ import subprocess
987
+
988
+ here = Path(__file__).resolve().parent
989
+ delegate = here / "validate-report-views.py"
990
+ if not delegate.is_file():
991
+ # The delegate is part of the same install bundle; absence is
992
+ # itself a broken installation rather than an optional feature.
993
+ failures.append(
994
+ f"validate-report-views.py missing under {here} — okstra install incomplete"
995
+ )
996
+ return
997
+ try:
998
+ proc = subprocess.run(
999
+ [sys.executable, str(delegate), str(report_path)],
1000
+ capture_output=True,
1001
+ text=True,
1002
+ timeout=30,
1003
+ )
1004
+ except subprocess.TimeoutExpired:
1005
+ failures.append("report-views validator timed out (30s)")
1006
+ return
1007
+ if proc.returncode != 0:
1008
+ for line in proc.stderr.splitlines():
1009
+ line = line.strip()
1010
+ if line:
1011
+ failures.append(f"report-views: {line}")
1012
+
1013
+
563
1014
  def validate_phase_boundary(
564
1015
  task_type: str,
565
1016
  report_path: Path,
@@ -579,11 +1030,55 @@ def validate_phase_boundary(
579
1030
  MUST be absent (lead converted findings into Clarification rows
580
1031
  instead of opening the gate).
581
1032
  """
582
- if task_type != "implementation-planning":
583
- return
584
1033
  if not report_path.exists():
585
1034
  return
586
1035
  content = report_path.read_text()
1036
+
1037
+ # Verdict Card vs §2. Final Verdict Verdict Token consistency. The Card
1038
+ # is a non-authoritative index; divergence is a contract violation.
1039
+ _validate_verdict_card_consistency(content, failures)
1040
+
1041
+ if task_type == "implementation":
1042
+ for needle in IMPLEMENTATION_REQUIRED_SECTIONS:
1043
+ if needle not in content:
1044
+ failures.append(
1045
+ "implementation report is missing required §4.7 "
1046
+ f"deliverable section: `{needle}`"
1047
+ )
1048
+
1049
+ if task_type == "final-verification":
1050
+ for needle in FINAL_VERIFICATION_REQUIRED_SECTIONS:
1051
+ if needle not in content:
1052
+ failures.append(
1053
+ "final-verification report is missing required §4.8 "
1054
+ f"deliverable section: `{needle}`"
1055
+ )
1056
+ token_value = _extract_final_verdict_token(content)
1057
+ if token_value is None:
1058
+ failures.append(
1059
+ "final-verification report `## 2. Final Verdict` table is "
1060
+ "missing the `Verdict Token` row — required by the release-"
1061
+ "handoff entry gate."
1062
+ )
1063
+ elif token_value.strip().lower() not in FINAL_VERIFICATION_VERDICT_TOKENS:
1064
+ failures.append(
1065
+ "final-verification report `Verdict Token` value "
1066
+ f"`{token_value}` is not one of "
1067
+ f"{', '.join(FINAL_VERIFICATION_VERDICT_TOKENS)}."
1068
+ )
1069
+
1070
+ if task_type == "release-handoff":
1071
+ if _MERGE_CONFLICT_PROBE_HEADING_RE.search(content) is None:
1072
+ failures.append(
1073
+ "release-handoff report is missing `### 4.6.6 Merge Conflict "
1074
+ "Probe` sub-section — required by the release-handoff profile "
1075
+ "(self-review 6, merge-conflict probe audit). When the run is "
1076
+ "`local only` / `skip`, record the single line `- Not run "
1077
+ "(user picked local only or skip).` under the heading."
1078
+ )
1079
+
1080
+ if task_type != "implementation-planning":
1081
+ return
587
1082
  for needle in PLANNING_REQUIRED_SECTIONS:
588
1083
  if needle not in content:
589
1084
  failures.append(
@@ -812,7 +1307,24 @@ def attempt_token_usage_autofix(
812
1307
  team_state_path.write_text(
813
1308
  json.dumps(updated, indent=2, ensure_ascii=False) + "\n"
814
1309
  )
815
- replaced = substitute_final_report(report_path, updated)
1310
+ try:
1311
+ replaced = substitute_final_report(report_path, updated)
1312
+ except Exception as exc: # noqa: BLE001
1313
+ # `SubstituteRefusedError` (or any unexpected substitution
1314
+ # failure) — report it as an accuracy failure so the validator
1315
+ # surfaces a concrete remediation instead of silently shipping
1316
+ # a report with zeros / sentinels.
1317
+ return "accuracy-failed", [
1318
+ f"Phase 7 token-usage substitution refused: {exc}"
1319
+ ]
1320
+
1321
+ # Phase 7 step 1.5 is BLOCKING and the autofix just mutated the
1322
+ # source MD — any pre-existing slim/html sibling is now stale by
1323
+ # construction. Re-render the derived views in lock-step so the
1324
+ # downstream report-views validator does not trip over the
1325
+ # autofix's own side effect.
1326
+ rerender_note = _rerender_report_views_after_autofix(report_path)
1327
+
816
1328
  detail = (
817
1329
  f"replaced {replaced} placeholder(s)"
818
1330
  if replaced > 0
@@ -820,7 +1332,52 @@ def attempt_token_usage_autofix(
820
1332
  if replaced == 0
821
1333
  else "report file missing"
822
1334
  )
823
- return "recovered", [f"usageSummary repopulated; {detail}"]
1335
+ msg = f"usageSummary repopulated; {detail}"
1336
+ if rerender_note:
1337
+ msg += f"; {rerender_note}"
1338
+ return "recovered", [msg]
1339
+
1340
+
1341
+ def _rerender_report_views_after_autofix(report_path: Path) -> str:
1342
+ """Re-render ``*.slim.md`` and ``*.html`` siblings against the
1343
+ just-substituted MD. Returns a short status note for the autofix
1344
+ message (empty on no-op, descriptive on failure).
1345
+ """
1346
+ if not report_path.is_file():
1347
+ return ""
1348
+ try:
1349
+ # Late import — keeps validate-run.py importable in environments
1350
+ # that don't ship report_views (older installs).
1351
+ scripts_dir = Path(__file__).resolve().parent.parent / "scripts"
1352
+ if str(scripts_dir) not in sys.path:
1353
+ sys.path.insert(0, str(scripts_dir))
1354
+ from okstra_ctl.report_views import RunMeta, render_both_views
1355
+ templates_dir = (
1356
+ Path(__file__).resolve().parent.parent / "templates" / "reports"
1357
+ )
1358
+ css = (templates_dir / "report.css").read_text(encoding="utf-8")
1359
+ js = (templates_dir / "report.js").read_text(encoding="utf-8")
1360
+ except Exception as exc: # noqa: BLE001 — best-effort
1361
+ return f"report-views re-render skipped ({exc})"
1362
+
1363
+ # Infer task-key / task-type / seq from path + body when possible;
1364
+ # fall back to placeholders so the digest comparison (the actual
1365
+ # contract) still works.
1366
+ text = report_path.read_text(encoding="utf-8")
1367
+ task_type_m = re.search(r"^- Task Type:\s*(\S+)", text, re.MULTILINE)
1368
+ task_key_m = re.search(r"^- Task Key:\s*(\S+)", text, re.MULTILINE)
1369
+ seq_m = re.search(r"-(\d+)\.md$", report_path.name)
1370
+ meta = RunMeta(
1371
+ task_key=task_key_m.group(1) if task_key_m else "unknown",
1372
+ task_type=task_type_m.group(1) if task_type_m else "unknown",
1373
+ seq=seq_m.group(1) if seq_m else "000",
1374
+ source_report=report_path.name,
1375
+ )
1376
+ try:
1377
+ render_both_views(report_path, run_meta=meta, css=css, js=js)
1378
+ except Exception as exc: # noqa: BLE001
1379
+ return f"report-views re-render failed: {exc}"
1380
+ return "report-views re-rendered"
824
1381
 
825
1382
 
826
1383
  def main() -> int:
@@ -893,6 +1450,9 @@ def main() -> int:
893
1450
 
894
1451
  task_type = str(task_manifest.get("taskType") or run_manifest.get("taskType") or "").strip()
895
1452
  validate_phase_boundary(task_type, report_path, failures)
1453
+ if task_type:
1454
+ validate_worker_results_audit(report_path, task_type, failures)
1455
+ validate_report_views(report_path, failures)
896
1456
 
897
1457
  validation_status = "passed" if not failures else "failed"
898
1458
  update_validation_metadata(
@@ -109,6 +109,10 @@ fi
109
109
  pass "Primary task discovery artifacts are valid"
110
110
 
111
111
  step "Preparing validator fixture artifacts for the primary task"
112
+ # Fixture needs to render Phase 7 step 1.5 sibling artifacts; pass the
113
+ # repo root so the heredoc can import okstra_ctl.report_views and load
114
+ # inline assets from templates/reports/.
115
+ export OKSTRA_WORKSPACE_ROOT_FOR_FIXTURE="$WORKSPACE_ROOT"
112
116
  if ! prepare_run_validator_fixture "$PRIMARY_TASK_GROUP" "$PRIMARY_TASK_ID" codex; then
113
117
  fail "Failed to prepare validator fixture artifacts for the primary task"
114
118
  fi
package/src/install.mjs CHANGED
@@ -24,6 +24,7 @@ const BIN_ENTRYPOINTS = [
24
24
  "okstra-central.sh",
25
25
  "okstra-token-usage.py",
26
26
  "okstra-error-log.py",
27
+ "okstra-render-report-views.py",
27
28
  ];
28
29
 
29
30
  const INSTALL_USAGE = `okstra install — install runtime into ~/.okstra