npm - okstra - Versions diffs - 0.26.0 → 0.28.0 - Mend

okstra 0.26.0 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/README.kr.md +15 -0
package/README.md +15 -0
package/docs/kr/architecture.md +2 -6
package/docs/kr/cli.md +40 -6
package/docs/kr/performance-improvement-plan-v2.md +23 -0
package/docs/kr/performance-improvement-plan.md +22 -0
package/package.json +1 -1
package/runtime/BUILD.json +2 -2
package/runtime/agents/workers/claude-worker.md +4 -3
package/runtime/agents/workers/codex-worker.md +4 -3
package/runtime/agents/workers/gemini-worker.md +4 -3
package/runtime/agents/workers/report-writer-worker.md +7 -2
package/runtime/bin/okstra.sh +0 -1
package/runtime/prompts/launch.template.md +1 -1
package/runtime/prompts/profiles/_common-contract.md +36 -4
package/runtime/prompts/profiles/error-analysis.md +12 -0
package/runtime/prompts/profiles/implementation-planning.md +20 -0
package/runtime/prompts/profiles/requirements-discovery.md +20 -0
package/runtime/python/lib/okstra/cli.sh +1 -7
package/runtime/python/lib/okstra/globals.sh +0 -1
package/runtime/python/lib/okstra/usage.sh +1 -4
package/runtime/python/okstra_ctl/render.py +3 -0
package/runtime/python/okstra_ctl/run.py +0 -6
package/runtime/python/okstra_ctl/run_context.py +1 -1
package/runtime/python/okstra_ctl/wizard.py +25 -2
package/runtime/python/okstra_token_usage/blocks.py +5 -1
package/runtime/python/okstra_token_usage/claude.py +16 -1
package/runtime/python/okstra_token_usage/cli.py +9 -2
package/runtime/python/okstra_token_usage/collect.py +17 -3
package/runtime/python/okstra_token_usage/pricing.py +159 -24
package/runtime/python/okstra_token_usage/report.py +32 -3
package/runtime/skills/okstra-brief/SKILL.md +532 -65
package/runtime/skills/okstra-context-loader/SKILL.md +25 -11
package/runtime/skills/okstra-convergence/SKILL.md +38 -14
package/runtime/skills/okstra-history/SKILL.md +68 -37
package/runtime/skills/okstra-logs/SKILL.md +26 -4
package/runtime/skills/okstra-report-finder/SKILL.md +49 -22
package/runtime/skills/okstra-report-writer/SKILL.md +62 -65
package/runtime/skills/okstra-run/SKILL.md +35 -34
package/runtime/skills/okstra-schedule/SKILL.md +51 -20
package/runtime/skills/okstra-setup/SKILL.md +31 -12
package/runtime/skills/okstra-status/SKILL.md +20 -8
package/runtime/skills/okstra-team-contract/SKILL.md +41 -25
package/runtime/skills/okstra-time-summary/SKILL.md +53 -16
package/runtime/templates/reports/final-report.template.md +227 -207
package/runtime/templates/reports/settings.template.json +7 -4
package/runtime/validators/lib/fixtures.sh +47 -2
package/runtime/validators/lib/validate-assets.sh +50 -24
package/runtime/validators/validate-brief.py +385 -0
package/runtime/validators/validate-brief.sh +35 -0
package/runtime/validators/validate-run.py +313 -1
package/runtime/validators/validate-workflow.sh +7 -33

package/runtime/python/lib/okstra/cli.sh CHANGED Viewed

@@ -102,12 +102,6 @@ while [[ $# -gt 0 ]]; do
       ASSUME_YES="true"
       shift
       ;;
-    --refresh-assets)
-      printf 'warning: --refresh-assets is deprecated. okstra now installs into ~/.claude and ~/.okstra via okstra-install.sh.\n' >&2
-      printf '         re-run "%s/scripts/okstra-install.sh --refresh" to refresh installed assets.\n' "$WORKSPACE_ROOT" >&2
-      REFRESH_OKSTRA_ASSETS="true"
-      shift
-      ;;
     --workers)
       WORKERS_OVERRIDE="$(require_option_value --workers "${2-}")"
       shift 2
@@ -231,7 +225,7 @@ while [[ $# -gt 0 ]]; do
           printf '  hint: did you mean --task-id?\n' >&2
           ;;
       esac
-      printf '  valid options: --render-only --resume-clarification --yes --refresh-assets --workers --lead-model --claude-model --codex-model --gemini-model --report-writer-model --related-tasks --task-type --project-id --project-root --task-group --task-id --task-brief --directive --clarification-response --approved-plan --approve --no-plan-verification -h|--help\n' >&2
+      printf '  valid options: --render-only --resume-clarification --yes --workers --lead-model --claude-model --codex-model --gemini-model --report-writer-model --related-tasks --task-type --project-id --project-root --task-group --task-id --task-brief --directive --clarification-response --approved-plan --approve --no-plan-verification -h|--help\n' >&2
       usage
       exit 1
       ;;

package/runtime/python/lib/okstra/globals.sh CHANGED Viewed

@@ -17,7 +17,6 @@ OKSTRA_TASK_CATALOG_RELATIVE_PATH=""
 RENDER_ONLY="false"
 ASSUME_YES="false"
 RESUME_CLARIFICATION_MODE="false"
-REFRESH_OKSTRA_ASSETS="false"
 WORKERS_OVERRIDE=""
 LEAD_MODEL_OVERRIDE=""
 CLAUDE_MODEL_OVERRIDE=""

package/runtime/python/lib/okstra/usage.sh CHANGED Viewed

@@ -3,7 +3,7 @@
 usage() {
   cat >&2 <<USAGE_EOF
 usage:
-  $DISPLAY_COMMAND_NAME [--render-only] [--yes] [--refresh-assets] [--no-plan-verification] --task-type <task-type> [--workers worker1,worker2] [--lead-model <model>] [--claude-model <model>] [--codex-model <model>] [--gemini-model <model>] [--report-writer-model <model>] [--executor claude|codex|gemini] [--related-tasks taskA,taskB] --project-id <project-id> [--project-root <path>] --task-group <task-group> --task-id <task-id> --task-brief <brief-path> [--directive <directive>]
+  $DISPLAY_COMMAND_NAME [--render-only] [--yes] [--no-plan-verification] --task-type <task-type> [--workers worker1,worker2] [--lead-model <model>] [--claude-model <model>] [--codex-model <model>] [--gemini-model <model>] [--report-writer-model <model>] [--executor claude|codex|gemini] [--related-tasks taskA,taskB] --project-id <project-id> [--project-root <path>] --task-group <task-group> --task-id <task-id> --task-brief <brief-path> [--directive <directive>]
 summary:
   $DISPLAY_TOOL_NAME prepares a task-keyed instruction bundle for Claude Code and launches an interactive Claude session by default.
@@ -69,9 +69,6 @@ options:
                        (--project-id/--task-group/--task-id or --task-key). Mutually
                        exclusive with --clarification-response and --approved-plan.
   --yes                Skip interactive prompting and confirmation. Requires all required arguments.
-  --refresh-assets    Deprecated. okstra now installs skills/agents into ~/.claude and the codex
-                      wrapper into ~/.okstra/bin via scripts/okstra-install.sh. Re-run that
-                      installer with --refresh to update installed assets.
   --workers            Comma-separated worker list for this run. Default: claude,codex,report-writer
                       (Gemini worker is optional; add `gemini` explicitly, e.g. --workers claude,codex,gemini,report-writer)
   --lead-model         Model for Claude lead. Default: OKSTRA_DEFAULT_LEAD_MODEL or opus

package/runtime/python/okstra_ctl/render.py CHANGED Viewed

@@ -338,6 +338,9 @@ def render_task_catalog_discovery(output_path: str, ctx: dict) -> None:
             "taskType": s(manifest, "taskType"),
             "workCategory": s(manifest, "workCategory"),
             "currentStatus": s(manifest, "currentStatus"),
+            "workStatus": s(manifest, "workStatus"),
+            "workStatusUpdatedAt": s(manifest, "workStatusUpdatedAt"),
+            "workStatusNote": s(manifest, "workStatusNote"),
             "updatedAt": s(manifest, "updatedAt"),
             "currentPhase": (workflow or {}).get("currentPhase", "") if isinstance(workflow, dict) else "",
             "currentPhaseState": (workflow or {}).get("currentPhaseState", "") if isinstance(workflow, dict) else "",

package/runtime/python/okstra_ctl/run.py CHANGED Viewed

@@ -113,7 +113,6 @@ class PrepareInputs:
     # project.json → global config → 스킬 디폴트 순으로 해석된다.
     pr_template_path: str = ""
     render_only: bool = False
-    refresh_assets: bool = False
     approve_plan_ack: bool = False
     # Phase 6 plan-body verification opt-out. Default True (round runs after
     # report-writer draft). Flipped to False by CLI `--no-plan-verification`.
@@ -385,8 +384,6 @@ def _canonical_argv(inp: PrepareInputs, ctx: dict) -> list[str]:
             argv.extend([flag, val])
     if inp.render_only:
         argv.append("--render-only")
-    if inp.refresh_assets:
-        argv.append("--refresh-assets")
     if not inp.plan_verification_enabled:
         argv.append("--no-plan-verification")
     argv.append("--yes")
@@ -806,7 +803,6 @@ def prepare_task_bundle(inp: PrepareInputs) -> PrepareOutputs:
             "approvedPlanPath": inp.approved_plan_path,
             "clarificationResponsePath": inp.clarification_response_path,
             "renderOnly": inp.render_only,
-            "refreshAssets": inp.refresh_assets,
         },
     )
@@ -923,7 +919,6 @@ def main(argv: list[str]) -> int:
         ),
     )
     p.add_argument("--render-only", action="store_true", dest="render_only")
-    p.add_argument("--refresh-assets", action="store_true", dest="refresh_assets")
     p.add_argument(
         "--no-plan-verification",
         action="store_false",
@@ -1000,7 +995,6 @@ def main(argv: list[str]) -> int:
         clarification_response_path=clarification_abs,
         pr_template_path=args.pr_template_path,
         render_only=args.render_only,
-        refresh_assets=args.refresh_assets,
         approve_plan_ack=args.approve_plan_ack,
         plan_verification_enabled=args.plan_verification_enabled,
     )

package/runtime/python/okstra_ctl/run_context.py CHANGED Viewed

@@ -140,7 +140,7 @@ def write_run_inputs(
     inputs schema (모든 키 optional):
       taskBriefPath, directive, workers, leadModel, claudeModel, codexModel,
       geminiModel, reportWriterModel, relatedTasks, approvedPlanPath,
-      clarificationResponsePath, renderOnly, refreshAssets
+      clarificationResponsePath, renderOnly
     """
     run_manifests_dir = Path(run_manifests_dir)
     path = run_manifests_dir / _run_inputs_filename(task_type_segment, seq)

package/runtime/python/okstra_ctl/wizard.py CHANGED Viewed

@@ -1398,7 +1398,7 @@ def _cli(argv: list[str]) -> int:
     Subcommands:
       init  --state-file PATH --workspace-root P --project-root P --project-id ID
-      step  --state-file PATH [--answer VALUE]
+      step  --state-file PATH (--answer VALUE | --no-submit)
       render-args --state-file PATH
       confirmation --state-file PATH
     """
@@ -1416,6 +1416,11 @@ def _cli(argv: list[str]) -> int:
     p_step = sub.add_parser("step")
     p_step.add_argument("--state-file", required=True)
     p_step.add_argument("--answer", default=None)
+    p_step.add_argument(
+        "--no-submit",
+        action="store_true",
+        help="Fetch the current prompt without submitting an answer.",
+    )
     p_render = sub.add_parser("render-args")
     p_render.add_argument("--state-file", required=True)
@@ -1440,8 +1445,26 @@ def _cli(argv: list[str]) -> int:
     if args.cmd == "step":
         state = load_state_file(state_path)
+        if args.no_submit and args.answer is not None:
+            print(json.dumps(
+                {"ok": False, "error": "--no-submit and --answer are mutually exclusive"},
+                ensure_ascii=False, indent=2,
+            ))
+            return 2
+        if not args.no_submit and args.answer is None:
+            print(json.dumps(
+                {
+                    "ok": False,
+                    "error": (
+                        "step requires --answer VALUE (use --answer '' to submit an "
+                        "empty value, or --no-submit to peek at the current prompt)"
+                    ),
+                },
+                ensure_ascii=False, indent=2,
+            ))
+            return 2
         try:
-            if args.answer is None:
+            if args.no_submit:
                 result = {"echo": "", "next": next_prompt(state).to_json()}
             else:
                 result = submit(state, args.answer)

package/runtime/python/okstra_token_usage/blocks.py CHANGED Viewed

@@ -18,18 +18,21 @@ def usage_block(totals: dict, source: str, note: str | None = None) -> dict:
         "source": source,
         "collectedAt": utc_now(),
     }
-    for key in ("cacheCreationTokens", "cacheReadTokens", "cachedInputTokens",
+    for key in ("cacheCreationTokens", "cacheCreation5mTokens", "cacheCreation1hTokens",
+                "cacheReadTokens", "cachedInputTokens",
                 "reasoningOutputTokens", "cachedTokens", "thoughtsTokens", "toolTokens"):
         if totals.get(key):
             block[key] = totals[key]
     # Billable-equivalent + cost.
     if source == "claude-jsonl":
+        cc_1h = totals.get("cacheCreation1hTokens", 0) or 0
         be = claude_billable_equivalent(
             totals.get("inputTokens", 0) or 0,
             totals.get("cacheCreationTokens", 0) or 0,
             totals.get("cacheReadTokens", 0) or 0,
             totals.get("outputTokens", 0) or 0,
+            cache_create_1h_t=cc_1h,
         )
         block["billableEquivalentTokens"] = be
         cost = claude_cost_usd(
@@ -38,6 +41,7 @@ def usage_block(totals: dict, source: str, note: str | None = None) -> dict:
             totals.get("cacheCreationTokens", 0) or 0,
             totals.get("cacheReadTokens", 0) or 0,
             totals.get("outputTokens", 0) or 0,
+            cache_create_1h_t=cc_1h,
         )
         if cost is not None:
             block["estimatedCostUsd"] = cost

package/runtime/python/okstra_token_usage/claude.py CHANGED Viewed

@@ -10,6 +10,7 @@ from .paths import claude_project_dir
 def claude_session_totals(jsonl_path: Path) -> dict:
     """Return totals + agentName + assistant model + time window for a Claude session jsonl."""
     input_t = output_t = cache_create_t = cache_read_t = 0
+    cache_create_5m_t = cache_create_1h_t = 0
     tool_uses = 0
     agent_name: str | None = None
     model: str | None = None
@@ -23,8 +24,20 @@ def claude_session_totals(jsonl_path: Path) -> dict:
         if usage:
             input_t += usage.get("input_tokens", 0) or 0
             output_t += usage.get("output_tokens", 0) or 0
-            cache_create_t += usage.get("cache_creation_input_tokens", 0) or 0
+            cc_total = usage.get("cache_creation_input_tokens", 0) or 0
+            cache_create_t += cc_total
             cache_read_t += usage.get("cache_read_input_tokens", 0) or 0
+            # Split into 5m / 1h ephemeral tiers when the API breakdown is
+            # present. If only the aggregate is given, attribute all of it to
+            # the 5m tier (1.25x — the cheaper assumption, matches prior
+            # behavior).
+            cc_break = usage.get("cache_creation") or {}
+            if isinstance(cc_break, dict) and (cc_break.get("ephemeral_5m_input_tokens") is not None
+                                               or cc_break.get("ephemeral_1h_input_tokens") is not None):
+                cache_create_5m_t += cc_break.get("ephemeral_5m_input_tokens", 0) or 0
+                cache_create_1h_t += cc_break.get("ephemeral_1h_input_tokens", 0) or 0
+            else:
+                cache_create_5m_t += cc_total
         if rec.get("type") == "assistant":
             if model is None and msg.get("model"):
                 model = msg["model"]
@@ -51,6 +64,8 @@ def claude_session_totals(jsonl_path: Path) -> dict:
         "inputTokens": input_t,
         "outputTokens": output_t,
         "cacheCreationTokens": cache_create_t,
+        "cacheCreation5mTokens": cache_create_5m_t,
+        "cacheCreation1hTokens": cache_create_1h_t,
         "cacheReadTokens": cache_read_t,
         "toolUses": tool_uses,
         "durationMs": duration_ms,

package/runtime/python/okstra_token_usage/cli.py CHANGED Viewed

@@ -6,7 +6,7 @@ import json
 import sys
 from pathlib import Path
 from .collect import collect
-from .report import substitute_final_report
+from .report import SubstituteRefusedError, substitute_final_report
 def main() -> int:
@@ -60,7 +60,14 @@ def main() -> int:
         print(f"sessions={s.get('sessionsFound', 0)} team={s.get('teamName', '')}", file=sys.stderr)
     if args.substitute_final_report is not None:
-        replaced = substitute_final_report(args.substitute_final_report, updated)
+        try:
+            replaced = substitute_final_report(args.substitute_final_report, updated)
+        except SubstituteRefusedError as exc:
+            print(
+                f"final-report substitution REFUSED: {exc}",
+                file=sys.stderr,
+            )
+            return 2
         if replaced < 0:
             print(
                 f"final-report substitution skipped: file not found at {args.substitute_final_report}",

package/runtime/python/okstra_token_usage/collect.py CHANGED Viewed

@@ -54,14 +54,16 @@ def _aggregate_totals(items: list[dict]) -> dict:
     """
     aggregate: dict = {
         "totalTokens": 0, "inputTokens": 0, "outputTokens": 0,
-        "cacheCreationTokens": 0, "cacheReadTokens": 0,
+        "cacheCreationTokens": 0, "cacheCreation5mTokens": 0, "cacheCreation1hTokens": 0,
+        "cacheReadTokens": 0,
         "toolUses": 0, "durationMs": 0,
         "agentName": None, "model": None,
         "startedAt": None, "endedAt": None,
     }
     for t in items:
         for k in ("totalTokens", "inputTokens", "outputTokens",
-                  "cacheCreationTokens", "cacheReadTokens", "toolUses"):
+                  "cacheCreationTokens", "cacheCreation5mTokens", "cacheCreation1hTokens",
+                  "cacheReadTokens", "toolUses"):
             aggregate[k] += t.get(k, 0) or 0
         if aggregate["agentName"] is None and t.get("agentName"):
             aggregate["agentName"] = t["agentName"]
@@ -210,6 +212,17 @@ def collect(team_state_path: Path, project_root: Path | None = None) -> dict:
     worker_billable = sum((w.get("usage") or {}).get("billableEquivalentTokens", 0) or 0 for w in workers)
     worker_cost = sum((w.get("usage") or {}).get("estimatedCostUsd", 0) or 0 for w in workers)
     cli_cost = sum((w.get("usage") or {}).get("cliEstimatedCostUsd", 0) or 0 for w in workers)
+    # Surface models whose pricing lookup failed so the silent-zero case is visible.
+    unmatched_models: list[str] = []
+    if lead.get("model") and lead.get("estimatedCostUsd") is None and (lead.get("totalTokens") or 0) > 0:
+        unmatched_models.append(lead["model"])
+    for w in workers:
+        u = w.get("usage") or {}
+        if u.get("model") and u.get("estimatedCostUsd") is None and (u.get("totalTokens") or 0) > 0:
+            unmatched_models.append(u["model"])
+        if u.get("cliModel") and u.get("cliEstimatedCostUsd") is None and (u.get("cliTotalTokens") or 0) > 0:
+            unmatched_models.append(u["cliModel"])
     state["usageSummary"] = {
         "leadTotalTokens": lead_total,
         "workerTotalTokens": worker_total,
@@ -226,9 +239,10 @@ def collect(team_state_path: Path, project_root: Path | None = None) -> dict:
         "collectedAt": utc_now(),
         "teamName": team_name,
         "sessionsFound": len(claude_sessions),
+        "unmatchedModels": sorted(set(unmatched_models)),
         "definitions": {
             "totalTokens": "Sum of input + output + cache_creation + cache_read tokens (raw processed volume; matches Anthropic API breakdown). Cache reads are 95%+ in long sessions.",
-            "billableEquivalentTokens": "Tokens normalized to base-input-price units (cache_creation x1.25, cache_read x0.1, output x5). Useful when comparing sessions across models or to gauge cost.",
+            "billableEquivalentTokens": "Tokens normalized to base-input-price units (cache_creation_5m x1.25, cache_creation_1h x2.0, cache_read x0.1, output x5). 5m vs 1h is split from usage.cache_creation when the API breakdown is present; otherwise all cache_creation falls into 5m.",
             "estimatedCostUsd": "USD cost using public list pricing for the model recorded in the session. cliWorkers covers Codex/Gemini CLI calls billed under those providers.",
         },
     }

package/runtime/python/okstra_token_usage/pricing.py CHANGED Viewed

@@ -1,31 +1,131 @@
-"""Public list pricing tables and per-provider cost helpers."""
+"""Public list pricing tables and per-provider cost helpers.
+Pricing is matched by substring against the model id recorded in the session
+transcript, so keys must reflect the *actual* model id form emitted by each
+provider:
+  * Anthropic — `claude-opus-4-*`, `claude-sonnet-4-*`, `claude-haiku-4-5-*`,
+    `claude-3-5-sonnet-*`, `claude-3-5-haiku-*`, `claude-3-opus-*`,
+    `claude-3-haiku-*`.
+  * OpenAI / Codex — `gpt-5*`, `gpt-4o*`, `gpt-4*`.
+  * Google / Gemini — `gemini-2.5-pro*`, `gemini-2.5-flash*`, `gemini-2.0-flash*`.
+Insertion order is the match order, so list more specific keys first. Update
+when providers change list pricing.
+Sources (last verified 2026-05-17, public list prices, USD per 1M tokens):
+  * Anthropic: https://www.anthropic.com/pricing
+  * OpenAI:    https://openai.com/api/pricing
+  * Google:    https://ai.google.dev/gemini-api/docs/pricing
+"""
 from __future__ import annotations
-# Public list pricing (USD per 1M tokens). Used for cost estimation only.
-# Update when Anthropic / OpenAI / Google change pricing.
-# Anthropic billing ratios relative to base input: cache_creation=1.25x, cache_read=0.1x, output=5x.
+# Anthropic billing ratios relative to base input: cache_creation (5m) = 1.25x,
+# cache_creation (1h) = 2x, cache_read = 0.1x, output = 5x. The CLAUDE_PRICING
+# entries below carry the 5m tier; the 1h price is derived as base_input * 2x
+# at call time so the table stays compact.
 CLAUDE_PRICING = {
-    # model substring -> (input, cache_creation, cache_read, output) USD/1M
-    "opus-4": (15.0, 18.75, 1.50, 75.0),
-    "sonnet-4": (3.0, 3.75, 0.30, 15.0),
-    "haiku-4": (1.0, 1.25, 0.10, 5.0),
-    "opus-3": (15.0, 18.75, 1.50, 75.0),
-    "sonnet-3": (3.0, 3.75, 0.30, 15.0),
-    "haiku-3": (0.80, 1.0, 0.08, 4.0),
+    # model substring -> (input, cache_creation_5m, cache_read, output) USD/1M.
+    #
+    # Order matters — list more specific keys (e.g. `opus-4-7`, `3-7-sonnet`)
+    # before the family fallbacks (`opus-4`, `3-5-sonnet`).
+    #
+    # For the newer 4.x point releases (Opus 4.7, Sonnet 4.6, Haiku 4.5),
+    # Anthropic's public price page only lists input/output. Cache-write and
+    # cache-read are filled in using Anthropic's published billing ratios
+    # (5m cache_creation = 1.25x input, cache_read = 0.1x input), which have
+    # been consistent across the Claude 3 / 4 families.
+    # Claude 3 series (legacy).
+    "3-7-sonnet": (3.0, 3.75, 0.30, 15.0),     # Sonnet 3.7
+    "3-5-sonnet": (3.0, 3.75, 0.30, 15.0),     # Sonnet 3.5
+    "3-5-haiku":  (0.80, 1.0, 0.08, 4.0),      # Haiku 3.5
+    "3-opus":     (15.0, 18.75, 1.50, 75.0),   # Opus 3
+    "3-sonnet":   (3.0, 3.75, 0.30, 15.0),     # legacy 3 Sonnet
+    "3-haiku":    (0.25, 0.30, 0.03, 1.25),    # Haiku 3
+    # Claude 4 point releases (explicit so future divergence is easy to see).
+    "opus-4-7":   (5.0, 6.25, 0.50, 25.0),     # Opus 4.7 (cache prices derived from ratios)
+    "sonnet-4-6": (3.0, 3.75, 0.30, 15.0),     # Sonnet 4.6 (cache prices derived from ratios)
+    "haiku-4-5":  (1.0, 1.25, 0.10, 5.0),      # Haiku 4.5  (cache prices derived from ratios)
+    # Claude 4 family fallbacks (Opus 4 / Sonnet 4 / Haiku 4 base).
+    "opus-4":     (15.0, 18.75, 1.50, 75.0),
+    "sonnet-4":   (3.0, 3.75, 0.30, 15.0),
+    "haiku-4":    (1.0, 1.25, 0.10, 5.0),
 }
+# Anthropic 1h ephemeral cache_creation multiplier on the base input rate.
+CLAUDE_CACHE_CREATE_1H_MULT = 2.0
 CODEX_PRICING = {
-    # model substring -> (input USD/1M, cached_input USD/1M, output USD/1M)
-    "gpt-5": (1.25, 0.125, 10.0),
-    "gpt-4": (2.50, 0.625, 10.0),
+    # model substring -> (input USD/1M, cached_input USD/1M, output USD/1M).
+    # IMPORTANT: substring match order is insertion order. List the most
+    # specific keys first (e.g. `gpt-5-mini` before `gpt-5`, `o3-mini` before
+    # `o3`, `gpt-4o-mini` before `gpt-4o`, `gpt-4o` before the legacy `gpt-4`).
+    # For models with no published cached-input rate (o1-pro, o3-pro), cached
+    # is set equal to input as a conservative no-discount default.
+    # GPT-5 series.
+    "gpt-5.5":      (5.00,  0.50,  30.0),
+    "gpt-5.4-mini": (0.75,  0.075, 4.50),
+    "gpt-5.4":      (2.50,  0.25,  15.0),
+    "gpt-5.2-pro":  (21.0,  2.10,  168.0),
+    "gpt-5.2":      (1.75,  0.175, 14.0),
+    "gpt-5.1":      (1.25,  0.125, 10.0),
+    "gpt-5-mini":   (0.25,  0.025, 2.00),
+    "gpt-5-nano":   (0.05,  0.005, 0.40),
+    "gpt-5":        (1.25,  0.125, 10.0),  # base GPT-5 (also matches gpt-5-codex)
+    # O-series reasoning models.
+    "o1-pro":  (150.0, 150.0, 600.0),  # no cached rate published
+    "o3-pro":  (20.0,  20.0,  80.0),   # no cached rate published
+    "o4-mini": (1.10,  0.275, 4.40),
+    "o3-mini": (1.10,  0.275, 4.40),
+    "o1":      (15.0,  7.50,  60.0),
+    "o3":      (2.00,  1.00,  8.00),
+    # GPT-4 series.
+    "gpt-4.1-nano": (0.10, 0.01,  0.40),
+    "gpt-4.1-mini": (0.40, 0.04,  1.60),
+    "gpt-4.1":      (2.00, 0.20,  8.00),
+    "gpt-4o-mini":  (0.15, 0.075, 0.60),
+    "gpt-4o":       (2.50, 1.25,  10.0),
+    "gpt-4":        (2.50, 0.625, 10.0),  # legacy gpt-4 fallback
 }
 GEMINI_PRICING = {
-    # model substring -> (input USD/1M, output USD/1M); cached not separately priced for short runs
-    "pro": (1.25, 5.0),
-    "flash": (0.075, 0.30),
-    "auto": (1.25, 5.0),  # treat unknown as pro
+    # model substring -> (input USD/1M, output USD/1M).
+    #
+    # Cached-input prices exist for some models but are not separately priced
+    # here because the Gemini transcript collector does not yet record cached
+    # input tokens. Models with two-tier context pricing (Gemini 2.5 Pro,
+    # Gemini 3.1 Pro) are charged at the ≤200K rate; runs above 200K input
+    # will be slightly undercounted.
+    #
+    # Both dotted (`gemini-3.1-pro`) and hyphenated (`gemini-3-1-pro`) id
+    # forms appear in the wild, so include both for the new 3.x families.
+    # Gemini 3 series (preview).
+    "3.1-pro":         (2.00, 12.0),
+    "3-1-pro":         (2.00, 12.0),
+    "3-flash":         (0.50, 3.00),
+    # Gemini 2.5 series.
+    "2.5-flash-lite":  (0.10, 0.40),
+    "2.5-flash":       (0.30, 2.50),
+    "2.5-pro":         (1.25, 10.0),
+    # Gemini 2.0 series.
+    "2.0-flash-lite":  (0.075, 0.30),
+    "2.0-flash":       (0.10,  0.40),
+    # Fallbacks for unspecified family names.
+    "flash-lite":      (0.10, 0.40),   # assume 2.5 Flash-Lite
+    "pro":             (1.25, 10.0),   # assume 2.5 Pro
+    "flash":           (0.30, 2.50),   # assume 2.5 Flash
+    "auto":            (1.25, 10.0),   # treat unknown/auto as 2.5 Pro
 }
@@ -39,17 +139,53 @@ def _match_pricing(model: str | None, table: dict) -> tuple | None:
     return None
-def claude_billable_equivalent(input_t: int, cache_create_t: int, cache_read_t: int, output_t: int) -> int:
-    """Sum normalized to base-input units (cache_creation 1.25x, cache_read 0.1x, output 5x)."""
-    return int(round(input_t + 1.25 * cache_create_t + 0.1 * cache_read_t + 5.0 * output_t))
+def claude_billable_equivalent(
+    input_t: int,
+    cache_create_t: int,
+    cache_read_t: int,
+    output_t: int,
+    cache_create_1h_t: int = 0,
+) -> int:
+    """Sum normalized to base-input units.
+    Ratios: cache_creation_5m=1.25x, cache_creation_1h=2x, cache_read=0.1x,
+    output=5x. `cache_create_t` is the total cache_creation tokens; pass the
+    1h portion separately via `cache_create_1h_t` so the 5m vs 1h tiers are
+    weighted correctly (the 5m portion is the difference).
+    """
+    cc_1h = max(0, cache_create_1h_t)
+    cc_5m = max(0, cache_create_t - cc_1h)
+    return int(round(
+        input_t
+        + 1.25 * cc_5m
+        + CLAUDE_CACHE_CREATE_1H_MULT * cc_1h
+        + 0.1 * cache_read_t
+        + 5.0 * output_t
+    ))
-def claude_cost_usd(model: str | None, input_t: int, cache_create_t: int, cache_read_t: int, output_t: int) -> float | None:
+def claude_cost_usd(
+    model: str | None,
+    input_t: int,
+    cache_create_t: int,
+    cache_read_t: int,
+    output_t: int,
+    cache_create_1h_t: int = 0,
+) -> float | None:
     p = _match_pricing(model, CLAUDE_PRICING)
     if p is None:
         return None
     pi, pcc, pcr, po = p
-    return round((input_t * pi + cache_create_t * pcc + cache_read_t * pcr + output_t * po) / 1_000_000, 4)
+    cc_1h = max(0, cache_create_1h_t)
+    cc_5m = max(0, cache_create_t - cc_1h)
+    pcc_1h = pi * CLAUDE_CACHE_CREATE_1H_MULT
+    return round((
+        input_t * pi
+        + cc_5m * pcc
+        + cc_1h * pcc_1h
+        + cache_read_t * pcr
+        + output_t * po
+    ) / 1_000_000, 4)
 def codex_cost_usd(model: str | None, input_t: int, cached_input_t: int, output_t: int) -> float | None:
@@ -68,4 +204,3 @@ def gemini_cost_usd(model: str | None, input_t: int, output_t: int) -> float | N
         return None
     pi, po = p
     return round((input_t * pi + output_t * po) / 1_000_000, 4)

package/runtime/python/okstra_token_usage/report.py CHANGED Viewed

@@ -18,19 +18,48 @@ def _format_usd(v) -> str:
         return "$0.00"
+class SubstituteRefusedError(RuntimeError):
+    """Raised when substitution would write a zero-only Token Usage Summary.
+    Shipping `0` / `$0.00` in the Lead / Worker / Grand rows is the
+    observed silent-failure mode where the collector ran but every
+    session jsonl was empty (or the writer fabricated zeros). The
+    validator catches it post-hoc, but raising here at the substitution
+    boundary surfaces the failure at the exact step where it can still
+    be retried with a re-collection.
+    """
 def substitute_final_report(report_path: Path, state: dict) -> int:
     """Replace token-usage placeholders in the final report file with concrete
     values from the freshly computed usageSummary.
     Returns the number of placeholder occurrences replaced. If the report file
-    does not exist, returns -1 without raising. If any required placeholder is
-    still present after substitution attempts (e.g. usageSummary missing), the
-    function still writes what it can and returns the count.
+    does not exist, returns -1 without raising.
+    Raises ``SubstituteRefusedError`` when ``usageSummary.grandTotalTokens``
+    is zero — substituting zeros into the report bakes in the most common
+    silent failure mode (collector ran but found nothing). Callers that
+    want to suppress the refusal (e.g. unit-test fixtures) can pass a
+    summary with ``grandTotalTokens`` > 0 or remove the summary entirely
+    so substitution is skipped.
     """
     if not report_path.is_file():
         return -1
     summary = state.get("usageSummary") or {}
+    grand_total = summary.get("grandTotalTokens", 0)
+    if isinstance(grand_total, (int, float)) and grand_total == 0 and summary:
+        raise SubstituteRefusedError(
+            "Refusing to substitute zero-only usageSummary into the final "
+            f"report at {report_path}. grandTotalTokens=0 means the "
+            "collector ran but every session jsonl was empty (or absent). "
+            "Re-run `python3 scripts/okstra-token-usage.py <team-state> "
+            "--write --summary --substitute-final-report <report-path>` "
+            "after locating the missing session jsonls. To intentionally "
+            "ship zeros (test fixtures only), omit `usageSummary` from the "
+            "team-state JSON before calling substitute_final_report."
+        )
     cost = summary.get("estimatedCostUsd") or {}
     lead_cost = cost.get("lead") or 0
     worker_cost = cost.get("claudeWorkers") or 0