okstra 0.27.0 → 0.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/runtime/BUILD.json +2 -2
- package/runtime/agents/workers/claude-worker.md +4 -3
- package/runtime/agents/workers/codex-worker.md +4 -3
- package/runtime/agents/workers/gemini-worker.md +4 -3
- package/runtime/agents/workers/report-writer-worker.md +7 -2
- package/runtime/prompts/launch.template.md +1 -1
- package/runtime/prompts/profiles/_common-contract.md +12 -4
- package/runtime/python/okstra_token_usage/cli.py +9 -2
- package/runtime/python/okstra_token_usage/report.py +32 -3
- package/runtime/skills/okstra-convergence/SKILL.md +2 -2
- package/runtime/skills/okstra-report-writer/SKILL.md +6 -4
- package/runtime/skills/okstra-team-contract/SKILL.md +14 -10
- package/runtime/templates/reports/final-report.template.md +227 -207
- package/runtime/validators/lib/fixtures.sh +37 -0
- package/runtime/validators/validate-run.py +313 -1
|
@@ -250,6 +250,24 @@ for worker in team_state.get("workers", []):
|
|
|
250
250
|
)
|
|
251
251
|
+ "\n"
|
|
252
252
|
)
|
|
253
|
+
# Mirror the audit sidecar contract — every completed worker-results
|
|
254
|
+
# file ships alongside `<worker>-audit-<task-type>-<seq>.md` carrying
|
|
255
|
+
# the Reading Confirmation block. Derive the sidecar path by
|
|
256
|
+
# inserting `-audit` after the worker-role segment of the
|
|
257
|
+
# result-file stem.
|
|
258
|
+
result_stem = result_path.stem # e.g. claude-worker-error-analysis-001
|
|
259
|
+
audit_stem = result_stem.replace("-worker-", "-worker-audit-", 1)
|
|
260
|
+
audit_path = result_path.with_name(f"{audit_stem}{result_path.suffix}")
|
|
261
|
+
audit_path.write_text(
|
|
262
|
+
"\n".join(
|
|
263
|
+
[
|
|
264
|
+
f"# {worker.get('role', worker_id)} Audit",
|
|
265
|
+
"",
|
|
266
|
+
"- Read task-brief.md end-to-end (validation fixture).",
|
|
267
|
+
]
|
|
268
|
+
)
|
|
269
|
+
+ "\n"
|
|
270
|
+
)
|
|
253
271
|
|
|
254
272
|
lead = team_state.get("lead")
|
|
255
273
|
if isinstance(lead, dict):
|
|
@@ -305,6 +323,16 @@ if not isinstance(required_status_entries, list):
|
|
|
305
323
|
report_lines = [
|
|
306
324
|
"# Validation Fixture Report",
|
|
307
325
|
"",
|
|
326
|
+
"## Verdict Card",
|
|
327
|
+
"",
|
|
328
|
+
"| 항목 | 값 |",
|
|
329
|
+
"|------|----|",
|
|
330
|
+
"| Final Conclusion | validation fixture |",
|
|
331
|
+
"| Verdict Token | `not-applicable` |",
|
|
332
|
+
"| Direction | `continue-investigation` |",
|
|
333
|
+
"| Approval Required? | `no` |",
|
|
334
|
+
"| Next Step | fixture |",
|
|
335
|
+
"",
|
|
308
336
|
"## Agent Execution Status",
|
|
309
337
|
]
|
|
310
338
|
for label in required_status_entries:
|
|
@@ -312,6 +340,15 @@ for label in required_status_entries:
|
|
|
312
340
|
report_lines.append(f"- {label}: fixture status recorded")
|
|
313
341
|
report_lines.extend(
|
|
314
342
|
[
|
|
343
|
+
"",
|
|
344
|
+
"## Token Usage Summary",
|
|
345
|
+
"",
|
|
346
|
+
"| 항목 | 처리 토큰 | 환산 토큰 | 비용 (USD) |",
|
|
347
|
+
"|------|-----------|-----------|------------|",
|
|
348
|
+
"| Lead | `1` | `1` | `$0.01` |",
|
|
349
|
+
"| Worker 합계 | `1` | `1` | `$0.01` |",
|
|
350
|
+
"| **전체 합계** | **`2`** | **`2`** | **`$0.02`** |",
|
|
351
|
+
"| Codex/Gemini CLI 추가 비용 | | | `$0.00` |",
|
|
315
352
|
"",
|
|
316
353
|
"## Final Verdict",
|
|
317
354
|
"- Validation fixture report generated.",
|
|
@@ -465,6 +465,187 @@ TOKEN_PLACEHOLDERS = (
|
|
|
465
465
|
)
|
|
466
466
|
|
|
467
467
|
|
|
468
|
+
# Token Usage Summary section between its `##` heading and the next `##`
|
|
469
|
+
# heading (or end-of-file). Matched non-greedily so the body of the next
|
|
470
|
+
# section never bleeds in.
|
|
471
|
+
_TOKEN_USAGE_SECTION_RE = re.compile(
|
|
472
|
+
r"^##[ \t]+Token Usage Summary[ \t]*$\n(?P<body>.*?)(?=^##[ \t]|\Z)",
|
|
473
|
+
re.DOTALL | re.MULTILINE,
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
# Backtick-wrapped cell values inside a Token Usage Summary row. We use
|
|
477
|
+
# this to inspect actual cell contents rather than fighting markdown
|
|
478
|
+
# table parsing rules.
|
|
479
|
+
_TOKEN_USAGE_BACKTICK_CELL_RE = re.compile(r"`([^`\n]*)`")
|
|
480
|
+
|
|
481
|
+
# Sentinel words workers have been observed typing INSTEAD of leaving the
|
|
482
|
+
# `{{...}}` placeholders verbatim. These bypass the placeholder check
|
|
483
|
+
# because they are valid string values; we must reject them by name.
|
|
484
|
+
_TOKEN_USAGE_SENTINEL_VALUES = frozenset(
|
|
485
|
+
{
|
|
486
|
+
"pending",
|
|
487
|
+
"n/a",
|
|
488
|
+
"na",
|
|
489
|
+
"tbd",
|
|
490
|
+
"tba",
|
|
491
|
+
"not-collected",
|
|
492
|
+
"not collected",
|
|
493
|
+
"--",
|
|
494
|
+
"?",
|
|
495
|
+
"unknown",
|
|
496
|
+
"",
|
|
497
|
+
}
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
# Numeric "valid zero" patterns. These ARE allowed in the CLI row when no
|
|
501
|
+
# Codex/Gemini CLI work was billed; rejected everywhere else.
|
|
502
|
+
_TOKEN_USAGE_ZERO_VALUES = frozenset({"0", "$0.00", "$0", "0.00"})
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
def _scan_token_usage_summary(content: str, failures: list[str]) -> None:
|
|
506
|
+
"""Reject sentinel / zero values that workers typed into the Token
|
|
507
|
+
Usage Summary table instead of leaving the `{{...}}` placeholders
|
|
508
|
+
verbatim for Phase 7 substitution.
|
|
509
|
+
|
|
510
|
+
The placeholder check (`TOKEN_PLACEHOLDERS`) above catches the
|
|
511
|
+
"didn't substitute" case; this scanner catches the "substituted with
|
|
512
|
+
a sentinel string" case which is invisible to that check and was the
|
|
513
|
+
real source of `0` / `$0.00` / `pending` shipping in real reports.
|
|
514
|
+
|
|
515
|
+
Rules:
|
|
516
|
+
- The Codex/Gemini CLI 추가 비용 row may carry an empty cell or
|
|
517
|
+
`$0.00` (no CLI work was billed). Sentinel words are still
|
|
518
|
+
rejected.
|
|
519
|
+
- Every other row's backtick-wrapped cells must be either a
|
|
520
|
+
comma-grouped integer (e.g. `1,234,567`) or a USD value (`$5.43`).
|
|
521
|
+
Zero values (`0` / `$0.00`) are rejected because no okstra run
|
|
522
|
+
consumes zero tokens — a zero there means the writer fabricated a
|
|
523
|
+
stub.
|
|
524
|
+
"""
|
|
525
|
+
match = _TOKEN_USAGE_SECTION_RE.search(content)
|
|
526
|
+
if match is None:
|
|
527
|
+
# The Token Usage Summary section is required in every report
|
|
528
|
+
# (the template emits it unconditionally). A missing section is
|
|
529
|
+
# surfaced elsewhere by the placeholder check (which would also
|
|
530
|
+
# not fire — so we add a dedicated failure here).
|
|
531
|
+
failures.append(
|
|
532
|
+
"final report is missing the `## Token Usage Summary` section — "
|
|
533
|
+
"the template renders it unconditionally and Phase 7 substitution "
|
|
534
|
+
"depends on it being present."
|
|
535
|
+
)
|
|
536
|
+
return
|
|
537
|
+
|
|
538
|
+
body = match.group("body")
|
|
539
|
+
for raw_line in body.splitlines():
|
|
540
|
+
line = raw_line.strip()
|
|
541
|
+
if not line.startswith("|") or line.startswith("|--"):
|
|
542
|
+
# Skip non-table lines, the header separator (`|------|`), and
|
|
543
|
+
# blank lines. Header rows have no backticks so they self-skip.
|
|
544
|
+
continue
|
|
545
|
+
cells = [c.strip() for c in line.strip("|").split("|")]
|
|
546
|
+
if not cells:
|
|
547
|
+
continue
|
|
548
|
+
label_cell = cells[0].strip("* `")
|
|
549
|
+
# The CLI row's label always contains the word "CLI" — matching
|
|
550
|
+
# `Codex/Gemini CLI 추가 비용` regardless of formatting variations.
|
|
551
|
+
is_cli_row = "CLI" in label_cell
|
|
552
|
+
for raw_cell in cells[1:]:
|
|
553
|
+
for value in _TOKEN_USAGE_BACKTICK_CELL_RE.findall(raw_cell):
|
|
554
|
+
stripped = value.strip()
|
|
555
|
+
lowered = stripped.lower()
|
|
556
|
+
if lowered in _TOKEN_USAGE_SENTINEL_VALUES:
|
|
557
|
+
failures.append(
|
|
558
|
+
"Token Usage Summary cell contains sentinel value "
|
|
559
|
+
f"`{stripped}` on row labelled `{label_cell or '<unlabeled>'}` — "
|
|
560
|
+
"leave the `{{...}}` placeholder verbatim until "
|
|
561
|
+
"`okstra-token-usage.py --substitute-final-report` runs "
|
|
562
|
+
"in Phase 7."
|
|
563
|
+
)
|
|
564
|
+
continue
|
|
565
|
+
if stripped in _TOKEN_USAGE_ZERO_VALUES and not is_cli_row:
|
|
566
|
+
failures.append(
|
|
567
|
+
f"Token Usage Summary row `{label_cell or '<unlabeled>'}` has "
|
|
568
|
+
f"a zero value `{stripped}` — no okstra run consumes zero "
|
|
569
|
+
"tokens. Re-run `python3 scripts/okstra-token-usage.py "
|
|
570
|
+
"<team-state> --write --summary --substitute-final-report "
|
|
571
|
+
"<report-path>` to repopulate from session jsonls. The "
|
|
572
|
+
"Codex/Gemini CLI row is the only place `$0.00` is "
|
|
573
|
+
"allowed (when no CLI work was billed)."
|
|
574
|
+
)
|
|
575
|
+
|
|
576
|
+
|
|
577
|
+
# Verdict Card heading (mandatory top-of-report at-a-glance block introduced
|
|
578
|
+
# with the report-format readability pass). Matches `## Verdict Card` only as
|
|
579
|
+
# a section heading line (not as inline text inside a paragraph or table).
|
|
580
|
+
_VERDICT_CARD_HEADING_RE = re.compile(r"^##[ \t]+Verdict Card\b", re.MULTILINE)
|
|
581
|
+
|
|
582
|
+
# Reading Confirmation heading must NOT appear in the final-report — it
|
|
583
|
+
# belongs in the worker audit sidecar (`<worker>-audit-<task-type>-<seq>.md`).
|
|
584
|
+
_READING_CONFIRMATION_HEADING_RE = re.compile(
|
|
585
|
+
r"^##[ \t]+0\.[ \t]+Reading Confirmation\b", re.MULTILINE
|
|
586
|
+
)
|
|
587
|
+
|
|
588
|
+
# Empty Section 0 (Clarification Response Carried In) stub. When no
|
|
589
|
+
# carry-in path is provided, the writer must OMIT the `## 0.` heading
|
|
590
|
+
# entirely — emitting the heading followed by the "No prior clarification
|
|
591
|
+
# response was provided" stub line is the recurring failure mode this
|
|
592
|
+
# regex catches. The 400-char window after the heading covers the stub
|
|
593
|
+
# line + any boilerplate without crossing into the next section.
|
|
594
|
+
_EMPTY_CARRY_IN_RE = re.compile(
|
|
595
|
+
r"^##[ \t]+0\.[ \t]+Clarification Response Carried In"
|
|
596
|
+
r"[\s\S]{0,400}?No prior clarification response was provided",
|
|
597
|
+
re.MULTILINE,
|
|
598
|
+
)
|
|
599
|
+
|
|
600
|
+
# Section 0 heading with an empty `Source file: \`\`` line — the second
|
|
601
|
+
# failure shape (writer keeps the heading + Source file row but with an
|
|
602
|
+
# empty backtick value because no carry-in was provided). Same remedy:
|
|
603
|
+
# omit the entire `## 0.` block when carry-in is absent.
|
|
604
|
+
_EMPTY_CARRY_IN_SOURCE_RE = re.compile(
|
|
605
|
+
r"^##[ \t]+0\.[ \t]+Clarification Response Carried In"
|
|
606
|
+
r"[\s\S]{0,400}?Source file:[ \t]*`\s*`",
|
|
607
|
+
re.MULTILINE,
|
|
608
|
+
)
|
|
609
|
+
|
|
610
|
+
# Deprecated section headings removed by the report-format readability
|
|
611
|
+
# pass. Each entry is (regex, human-readable remedy). The regexes are
|
|
612
|
+
# line-anchored to avoid false positives from inline references in prose
|
|
613
|
+
# (e.g. this file itself, or skill documentation that mentions the
|
|
614
|
+
# deprecated names).
|
|
615
|
+
_DEPRECATED_FINAL_REPORT_PATTERNS: tuple[tuple[re.Pattern, str], ...] = (
|
|
616
|
+
(
|
|
617
|
+
re.compile(r"^###[ \t]+4\.5\.8[ \t]+User Approval Request\b", re.MULTILINE),
|
|
618
|
+
"deprecated `### 4.5.8 User Approval Request` stub — the top-of-report "
|
|
619
|
+
"`## User Approval Request (사용자 승인 게이트)` block is the only one. "
|
|
620
|
+
"Delete the §4.5.8 heading + body.",
|
|
621
|
+
),
|
|
622
|
+
(
|
|
623
|
+
re.compile(r"^###[ \t]+4\.5\.9[ \t]+Open Questions\b", re.MULTILINE),
|
|
624
|
+
"deprecated `### 4.5.9 Open Questions` block — promote each row into "
|
|
625
|
+
"`## 5. Clarification Items` with `Kind=decision` (and `Blocks=approval` "
|
|
626
|
+
"if it gated the User Approval Request).",
|
|
627
|
+
),
|
|
628
|
+
(
|
|
629
|
+
re.compile(
|
|
630
|
+
r"^###[ \t]+5\.1[ \t]+(?:추가 자료 요청|Additional Materials)\b",
|
|
631
|
+
re.MULTILINE,
|
|
632
|
+
),
|
|
633
|
+
"deprecated `### 5.1 추가 자료 요청` / `Additional Materials` sub-section — "
|
|
634
|
+
"every clarification item lives as one row of the unified `## 5. "
|
|
635
|
+
"Clarification Items` 8-column table (`Kind=material`).",
|
|
636
|
+
),
|
|
637
|
+
(
|
|
638
|
+
re.compile(
|
|
639
|
+
r"^###[ \t]+5\.2[ \t]+(?:사용자 확인 질문|Questions for the User)\b",
|
|
640
|
+
re.MULTILINE,
|
|
641
|
+
),
|
|
642
|
+
"deprecated `### 5.2 사용자 확인 질문` / `Questions for the User` "
|
|
643
|
+
"sub-section — collapse into the unified `## 5. Clarification Items` "
|
|
644
|
+
"8-column table (`Kind=decision`).",
|
|
645
|
+
),
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
|
|
468
649
|
def validate_report(
|
|
469
650
|
report_path: Path, required_agent_status_entries: list[str], failures: list[str]
|
|
470
651
|
) -> None:
|
|
@@ -486,6 +667,126 @@ def validate_report(
|
|
|
486
667
|
"run `okstra-token-usage.py ... --substitute-final-report <report-path>` during Phase 7"
|
|
487
668
|
)
|
|
488
669
|
|
|
670
|
+
# Catch the "workers typed `0` / `pending` instead of the placeholder"
|
|
671
|
+
# failure mode that bypasses the placeholder check above.
|
|
672
|
+
_scan_token_usage_summary(content, failures)
|
|
673
|
+
|
|
674
|
+
# Verdict Card is mandatory in every final-report (introduced with the
|
|
675
|
+
# report-format readability pass). Missing card means the reader has no
|
|
676
|
+
# at-a-glance index — first decision lives 100+ lines down.
|
|
677
|
+
if _VERDICT_CARD_HEADING_RE.search(content) is None:
|
|
678
|
+
failures.append(
|
|
679
|
+
"final report is missing the top-of-report `## Verdict Card` block — "
|
|
680
|
+
"render it between the report header and the (conditional) Approval "
|
|
681
|
+
"block. Its Verdict Token / Direction / Next Step cells must byte-match "
|
|
682
|
+
"the corresponding cells in `## 2. Final Verdict` and `## 6.` first item."
|
|
683
|
+
)
|
|
684
|
+
|
|
685
|
+
# Reading Confirmation belongs in the worker audit sidecar, not the
|
|
686
|
+
# user-facing final-report.
|
|
687
|
+
if _READING_CONFIRMATION_HEADING_RE.search(content) is not None:
|
|
688
|
+
failures.append(
|
|
689
|
+
"final report contains a `## 0. Reading Confirmation` heading — "
|
|
690
|
+
"Reading Confirmation lives in the worker audit sidecar "
|
|
691
|
+
"(`runs/<task-type>/worker-results/<worker>-audit-<task-type>-<seq>.md`), "
|
|
692
|
+
"never in the final-report."
|
|
693
|
+
)
|
|
694
|
+
|
|
695
|
+
# Empty Section 0 stub — when no carry-in path was provided, the
|
|
696
|
+
# writer must OMIT the `## 0.` heading entirely.
|
|
697
|
+
if _EMPTY_CARRY_IN_RE.search(content) is not None or _EMPTY_CARRY_IN_SOURCE_RE.search(
|
|
698
|
+
content
|
|
699
|
+
) is not None:
|
|
700
|
+
failures.append(
|
|
701
|
+
"final report has an empty `## 0. Clarification Response Carried In "
|
|
702
|
+
"From Previous Run` stub (either the `Source file:` cell is empty or "
|
|
703
|
+
"the body contains `No prior clarification response was provided`). "
|
|
704
|
+
"When no carry-in path was provided, OMIT the entire `## 0.` heading "
|
|
705
|
+
"and body — do NOT emit a placeholder stub."
|
|
706
|
+
)
|
|
707
|
+
|
|
708
|
+
# Deprecated section headings — pre-1.0 hard removal.
|
|
709
|
+
for pattern, remedy in _DEPRECATED_FINAL_REPORT_PATTERNS:
|
|
710
|
+
if pattern.search(content) is not None:
|
|
711
|
+
failures.append(f"final report contains {remedy}")
|
|
712
|
+
|
|
713
|
+
|
|
714
|
+
# Worker-results filename pattern: `<worker-role>-<task-type>-<seq>.md`.
|
|
715
|
+
# Every analysis-worker role name ends in `-worker` (`claude-worker`,
|
|
716
|
+
# `codex-worker`, `gemini-worker`, `report-writer-worker`), so anchor the
|
|
717
|
+
# split on that suffix — otherwise `gemini-worker-error-analysis-001.md`
|
|
718
|
+
# ambiguously parses as `worker=gemini, task=worker-error-analysis`.
|
|
719
|
+
# Audit sidecars (`*-audit-*`) and errors sidecars (`.json`) are not
|
|
720
|
+
# matched here.
|
|
721
|
+
_WORKER_RESULT_BASENAME_RE = re.compile(
|
|
722
|
+
r"^(?P<worker>[a-z][a-z0-9-]*-worker)-(?P<task_type>[a-z][a-z-]*?)-(?P<seq>\d{3})\.md$"
|
|
723
|
+
)
|
|
724
|
+
|
|
725
|
+
|
|
726
|
+
def validate_worker_results_audit(
|
|
727
|
+
report_path: Path, task_type: str, failures: list[str]
|
|
728
|
+
) -> None:
|
|
729
|
+
"""Enforce the worker audit sidecar contract.
|
|
730
|
+
|
|
731
|
+
For every `worker-results/<worker>-<task-type>-<seq>.md` produced by a
|
|
732
|
+
worker (skipping the audit sidecar itself), the validator checks:
|
|
733
|
+
|
|
734
|
+
1. The main worker-results file does NOT contain a `## 0. Reading
|
|
735
|
+
Confirmation` heading. That block moved to the audit sidecar with
|
|
736
|
+
the report-format readability pass.
|
|
737
|
+
2. The matching audit sidecar exists at
|
|
738
|
+
`<worker>-audit-<task-type>-<seq>.md`. Missing sidecar means the
|
|
739
|
+
worker silently skipped the reading-confirmation step.
|
|
740
|
+
"""
|
|
741
|
+
# `report_path` is `runs/<task-type>/reports/final-report-...md`; the
|
|
742
|
+
# sibling `worker-results/` directory holds every worker artifact.
|
|
743
|
+
worker_results_dir = report_path.parent.parent / "worker-results"
|
|
744
|
+
if not worker_results_dir.is_dir():
|
|
745
|
+
# No worker-results directory means no analysis workers ran (e.g.
|
|
746
|
+
# `release-handoff` which is single-lead). Nothing to enforce.
|
|
747
|
+
return
|
|
748
|
+
|
|
749
|
+
for path in sorted(worker_results_dir.glob("*.md")):
|
|
750
|
+
name = path.name
|
|
751
|
+
if "-audit-" in name:
|
|
752
|
+
continue
|
|
753
|
+
match = _WORKER_RESULT_BASENAME_RE.match(name)
|
|
754
|
+
if match is None:
|
|
755
|
+
# Files that don't match the canonical pattern (e.g. ad-hoc
|
|
756
|
+
# notes left by the operator) are out of contract scope.
|
|
757
|
+
continue
|
|
758
|
+
if match.group("task_type") != task_type:
|
|
759
|
+
# Cross-phase artifacts shouldn't appear here; skip rather
|
|
760
|
+
# than fail to keep the check focused on the current phase.
|
|
761
|
+
continue
|
|
762
|
+
|
|
763
|
+
worker_role = match.group("worker")
|
|
764
|
+
seq = match.group("seq")
|
|
765
|
+
rel = path.name
|
|
766
|
+
try:
|
|
767
|
+
content = path.read_text()
|
|
768
|
+
except OSError as exc:
|
|
769
|
+
failures.append(f"worker-results file unreadable: {rel} ({exc})")
|
|
770
|
+
continue
|
|
771
|
+
|
|
772
|
+
if _READING_CONFIRMATION_HEADING_RE.search(content) is not None:
|
|
773
|
+
failures.append(
|
|
774
|
+
f"worker-results file `{rel}` contains a `## 0. Reading "
|
|
775
|
+
f"Confirmation` heading — that block moved to the audit "
|
|
776
|
+
f"sidecar (`{worker_role}-audit-{task_type}-{seq}.md`). "
|
|
777
|
+
f"Remove the §0 heading + body from the main file and "
|
|
778
|
+
f"write a fresh sidecar."
|
|
779
|
+
)
|
|
780
|
+
|
|
781
|
+
audit_path = worker_results_dir / f"{worker_role}-audit-{task_type}-{seq}.md"
|
|
782
|
+
if not audit_path.exists():
|
|
783
|
+
failures.append(
|
|
784
|
+
f"worker `{worker_role}` produced `{rel}` but no audit sidecar "
|
|
785
|
+
f"at `{audit_path.name}` — the sidecar must carry the Reading "
|
|
786
|
+
f"Confirmation block (one short line per input file). Workers "
|
|
787
|
+
f"write this in the same step as the main worker-results file."
|
|
788
|
+
)
|
|
789
|
+
|
|
489
790
|
|
|
490
791
|
def validate_team_state_usage(team_state: dict, failures: list[str]) -> None:
|
|
491
792
|
summary = team_state.get("usageSummary") or {}
|
|
@@ -812,7 +1113,16 @@ def attempt_token_usage_autofix(
|
|
|
812
1113
|
team_state_path.write_text(
|
|
813
1114
|
json.dumps(updated, indent=2, ensure_ascii=False) + "\n"
|
|
814
1115
|
)
|
|
815
|
-
|
|
1116
|
+
try:
|
|
1117
|
+
replaced = substitute_final_report(report_path, updated)
|
|
1118
|
+
except Exception as exc: # noqa: BLE001
|
|
1119
|
+
# `SubstituteRefusedError` (or any unexpected substitution
|
|
1120
|
+
# failure) — report it as an accuracy failure so the validator
|
|
1121
|
+
# surfaces a concrete remediation instead of silently shipping
|
|
1122
|
+
# a report with zeros / sentinels.
|
|
1123
|
+
return "accuracy-failed", [
|
|
1124
|
+
f"Phase 7 token-usage substitution refused: {exc}"
|
|
1125
|
+
]
|
|
816
1126
|
detail = (
|
|
817
1127
|
f"replaced {replaced} placeholder(s)"
|
|
818
1128
|
if replaced > 0
|
|
@@ -893,6 +1203,8 @@ def main() -> int:
|
|
|
893
1203
|
|
|
894
1204
|
task_type = str(task_manifest.get("taskType") or run_manifest.get("taskType") or "").strip()
|
|
895
1205
|
validate_phase_boundary(task_type, report_path, failures)
|
|
1206
|
+
if task_type:
|
|
1207
|
+
validate_worker_results_audit(report_path, task_type, failures)
|
|
896
1208
|
|
|
897
1209
|
validation_status = "passed" if not failures else "failed"
|
|
898
1210
|
update_validation_metadata(
|