okstra 0.26.0 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.kr.md +15 -0
  2. package/README.md +15 -0
  3. package/docs/kr/architecture.md +2 -6
  4. package/docs/kr/cli.md +40 -6
  5. package/docs/kr/performance-improvement-plan-v2.md +23 -0
  6. package/docs/kr/performance-improvement-plan.md +22 -0
  7. package/package.json +1 -1
  8. package/runtime/BUILD.json +2 -2
  9. package/runtime/agents/workers/claude-worker.md +4 -3
  10. package/runtime/agents/workers/codex-worker.md +4 -3
  11. package/runtime/agents/workers/gemini-worker.md +4 -3
  12. package/runtime/agents/workers/report-writer-worker.md +7 -2
  13. package/runtime/bin/okstra.sh +0 -1
  14. package/runtime/prompts/launch.template.md +1 -1
  15. package/runtime/prompts/profiles/_common-contract.md +36 -4
  16. package/runtime/prompts/profiles/error-analysis.md +12 -0
  17. package/runtime/prompts/profiles/implementation-planning.md +20 -0
  18. package/runtime/prompts/profiles/requirements-discovery.md +20 -0
  19. package/runtime/python/lib/okstra/cli.sh +1 -7
  20. package/runtime/python/lib/okstra/globals.sh +0 -1
  21. package/runtime/python/lib/okstra/usage.sh +1 -4
  22. package/runtime/python/okstra_ctl/render.py +3 -0
  23. package/runtime/python/okstra_ctl/run.py +0 -6
  24. package/runtime/python/okstra_ctl/run_context.py +1 -1
  25. package/runtime/python/okstra_ctl/wizard.py +25 -2
  26. package/runtime/python/okstra_token_usage/blocks.py +5 -1
  27. package/runtime/python/okstra_token_usage/claude.py +16 -1
  28. package/runtime/python/okstra_token_usage/cli.py +9 -2
  29. package/runtime/python/okstra_token_usage/collect.py +17 -3
  30. package/runtime/python/okstra_token_usage/pricing.py +159 -24
  31. package/runtime/python/okstra_token_usage/report.py +32 -3
  32. package/runtime/skills/okstra-brief/SKILL.md +532 -65
  33. package/runtime/skills/okstra-context-loader/SKILL.md +25 -11
  34. package/runtime/skills/okstra-convergence/SKILL.md +38 -14
  35. package/runtime/skills/okstra-history/SKILL.md +68 -37
  36. package/runtime/skills/okstra-logs/SKILL.md +26 -4
  37. package/runtime/skills/okstra-report-finder/SKILL.md +49 -22
  38. package/runtime/skills/okstra-report-writer/SKILL.md +62 -65
  39. package/runtime/skills/okstra-run/SKILL.md +35 -34
  40. package/runtime/skills/okstra-schedule/SKILL.md +51 -20
  41. package/runtime/skills/okstra-setup/SKILL.md +31 -12
  42. package/runtime/skills/okstra-status/SKILL.md +20 -8
  43. package/runtime/skills/okstra-team-contract/SKILL.md +41 -25
  44. package/runtime/skills/okstra-time-summary/SKILL.md +53 -16
  45. package/runtime/templates/reports/final-report.template.md +227 -207
  46. package/runtime/templates/reports/settings.template.json +7 -4
  47. package/runtime/validators/lib/fixtures.sh +47 -2
  48. package/runtime/validators/lib/validate-assets.sh +50 -24
  49. package/runtime/validators/validate-brief.py +385 -0
  50. package/runtime/validators/validate-brief.sh +35 -0
  51. package/runtime/validators/validate-run.py +313 -1
  52. package/runtime/validators/validate-workflow.sh +7 -33
@@ -102,12 +102,6 @@ while [[ $# -gt 0 ]]; do
102
102
  ASSUME_YES="true"
103
103
  shift
104
104
  ;;
105
- --refresh-assets)
106
- printf 'warning: --refresh-assets is deprecated. okstra now installs into ~/.claude and ~/.okstra via okstra-install.sh.\n' >&2
107
- printf ' re-run "%s/scripts/okstra-install.sh --refresh" to refresh installed assets.\n' "$WORKSPACE_ROOT" >&2
108
- REFRESH_OKSTRA_ASSETS="true"
109
- shift
110
- ;;
111
105
  --workers)
112
106
  WORKERS_OVERRIDE="$(require_option_value --workers "${2-}")"
113
107
  shift 2
@@ -231,7 +225,7 @@ while [[ $# -gt 0 ]]; do
231
225
  printf ' hint: did you mean --task-id?\n' >&2
232
226
  ;;
233
227
  esac
234
- printf ' valid options: --render-only --resume-clarification --yes --refresh-assets --workers --lead-model --claude-model --codex-model --gemini-model --report-writer-model --related-tasks --task-type --project-id --project-root --task-group --task-id --task-brief --directive --clarification-response --approved-plan --approve --no-plan-verification -h|--help\n' >&2
228
+ printf ' valid options: --render-only --resume-clarification --yes --workers --lead-model --claude-model --codex-model --gemini-model --report-writer-model --related-tasks --task-type --project-id --project-root --task-group --task-id --task-brief --directive --clarification-response --approved-plan --approve --no-plan-verification -h|--help\n' >&2
235
229
  usage
236
230
  exit 1
237
231
  ;;
@@ -17,7 +17,6 @@ OKSTRA_TASK_CATALOG_RELATIVE_PATH=""
17
17
  RENDER_ONLY="false"
18
18
  ASSUME_YES="false"
19
19
  RESUME_CLARIFICATION_MODE="false"
20
- REFRESH_OKSTRA_ASSETS="false"
21
20
  WORKERS_OVERRIDE=""
22
21
  LEAD_MODEL_OVERRIDE=""
23
22
  CLAUDE_MODEL_OVERRIDE=""
@@ -3,7 +3,7 @@
3
3
  usage() {
4
4
  cat >&2 <<USAGE_EOF
5
5
  usage:
6
- $DISPLAY_COMMAND_NAME [--render-only] [--yes] [--refresh-assets] [--no-plan-verification] --task-type <task-type> [--workers worker1,worker2] [--lead-model <model>] [--claude-model <model>] [--codex-model <model>] [--gemini-model <model>] [--report-writer-model <model>] [--executor claude|codex|gemini] [--related-tasks taskA,taskB] --project-id <project-id> [--project-root <path>] --task-group <task-group> --task-id <task-id> --task-brief <brief-path> [--directive <directive>]
6
+ $DISPLAY_COMMAND_NAME [--render-only] [--yes] [--no-plan-verification] --task-type <task-type> [--workers worker1,worker2] [--lead-model <model>] [--claude-model <model>] [--codex-model <model>] [--gemini-model <model>] [--report-writer-model <model>] [--executor claude|codex|gemini] [--related-tasks taskA,taskB] --project-id <project-id> [--project-root <path>] --task-group <task-group> --task-id <task-id> --task-brief <brief-path> [--directive <directive>]
7
7
 
8
8
  summary:
9
9
  $DISPLAY_TOOL_NAME prepares a task-keyed instruction bundle for Claude Code and launches an interactive Claude session by default.
@@ -69,9 +69,6 @@ options:
69
69
  (--project-id/--task-group/--task-id or --task-key). Mutually
70
70
  exclusive with --clarification-response and --approved-plan.
71
71
  --yes Skip interactive prompting and confirmation. Requires all required arguments.
72
- --refresh-assets Deprecated. okstra now installs skills/agents into ~/.claude and the codex
73
- wrapper into ~/.okstra/bin via scripts/okstra-install.sh. Re-run that
74
- installer with --refresh to update installed assets.
75
72
  --workers Comma-separated worker list for this run. Default: claude,codex,report-writer
76
73
  (Gemini worker is optional; add `gemini` explicitly, e.g. --workers claude,codex,gemini,report-writer)
77
74
  --lead-model Model for Claude lead. Default: OKSTRA_DEFAULT_LEAD_MODEL or opus
@@ -338,6 +338,9 @@ def render_task_catalog_discovery(output_path: str, ctx: dict) -> None:
338
338
  "taskType": s(manifest, "taskType"),
339
339
  "workCategory": s(manifest, "workCategory"),
340
340
  "currentStatus": s(manifest, "currentStatus"),
341
+ "workStatus": s(manifest, "workStatus"),
342
+ "workStatusUpdatedAt": s(manifest, "workStatusUpdatedAt"),
343
+ "workStatusNote": s(manifest, "workStatusNote"),
341
344
  "updatedAt": s(manifest, "updatedAt"),
342
345
  "currentPhase": (workflow or {}).get("currentPhase", "") if isinstance(workflow, dict) else "",
343
346
  "currentPhaseState": (workflow or {}).get("currentPhaseState", "") if isinstance(workflow, dict) else "",
@@ -113,7 +113,6 @@ class PrepareInputs:
113
113
  # project.json → global config → 스킬 디폴트 순으로 해석된다.
114
114
  pr_template_path: str = ""
115
115
  render_only: bool = False
116
- refresh_assets: bool = False
117
116
  approve_plan_ack: bool = False
118
117
  # Phase 6 plan-body verification opt-out. Default True (round runs after
119
118
  # report-writer draft). Flipped to False by CLI `--no-plan-verification`.
@@ -385,8 +384,6 @@ def _canonical_argv(inp: PrepareInputs, ctx: dict) -> list[str]:
385
384
  argv.extend([flag, val])
386
385
  if inp.render_only:
387
386
  argv.append("--render-only")
388
- if inp.refresh_assets:
389
- argv.append("--refresh-assets")
390
387
  if not inp.plan_verification_enabled:
391
388
  argv.append("--no-plan-verification")
392
389
  argv.append("--yes")
@@ -806,7 +803,6 @@ def prepare_task_bundle(inp: PrepareInputs) -> PrepareOutputs:
806
803
  "approvedPlanPath": inp.approved_plan_path,
807
804
  "clarificationResponsePath": inp.clarification_response_path,
808
805
  "renderOnly": inp.render_only,
809
- "refreshAssets": inp.refresh_assets,
810
806
  },
811
807
  )
812
808
 
@@ -923,7 +919,6 @@ def main(argv: list[str]) -> int:
923
919
  ),
924
920
  )
925
921
  p.add_argument("--render-only", action="store_true", dest="render_only")
926
- p.add_argument("--refresh-assets", action="store_true", dest="refresh_assets")
927
922
  p.add_argument(
928
923
  "--no-plan-verification",
929
924
  action="store_false",
@@ -1000,7 +995,6 @@ def main(argv: list[str]) -> int:
1000
995
  clarification_response_path=clarification_abs,
1001
996
  pr_template_path=args.pr_template_path,
1002
997
  render_only=args.render_only,
1003
- refresh_assets=args.refresh_assets,
1004
998
  approve_plan_ack=args.approve_plan_ack,
1005
999
  plan_verification_enabled=args.plan_verification_enabled,
1006
1000
  )
@@ -140,7 +140,7 @@ def write_run_inputs(
140
140
  inputs schema (모든 키 optional):
141
141
  taskBriefPath, directive, workers, leadModel, claudeModel, codexModel,
142
142
  geminiModel, reportWriterModel, relatedTasks, approvedPlanPath,
143
- clarificationResponsePath, renderOnly, refreshAssets
143
+ clarificationResponsePath, renderOnly
144
144
  """
145
145
  run_manifests_dir = Path(run_manifests_dir)
146
146
  path = run_manifests_dir / _run_inputs_filename(task_type_segment, seq)
@@ -1398,7 +1398,7 @@ def _cli(argv: list[str]) -> int:
1398
1398
 
1399
1399
  Subcommands:
1400
1400
  init --state-file PATH --workspace-root P --project-root P --project-id ID
1401
- step --state-file PATH [--answer VALUE]
1401
+ step --state-file PATH (--answer VALUE | --no-submit)
1402
1402
  render-args --state-file PATH
1403
1403
  confirmation --state-file PATH
1404
1404
  """
@@ -1416,6 +1416,11 @@ def _cli(argv: list[str]) -> int:
1416
1416
  p_step = sub.add_parser("step")
1417
1417
  p_step.add_argument("--state-file", required=True)
1418
1418
  p_step.add_argument("--answer", default=None)
1419
+ p_step.add_argument(
1420
+ "--no-submit",
1421
+ action="store_true",
1422
+ help="Fetch the current prompt without submitting an answer.",
1423
+ )
1419
1424
 
1420
1425
  p_render = sub.add_parser("render-args")
1421
1426
  p_render.add_argument("--state-file", required=True)
@@ -1440,8 +1445,26 @@ def _cli(argv: list[str]) -> int:
1440
1445
 
1441
1446
  if args.cmd == "step":
1442
1447
  state = load_state_file(state_path)
1448
+ if args.no_submit and args.answer is not None:
1449
+ print(json.dumps(
1450
+ {"ok": False, "error": "--no-submit and --answer are mutually exclusive"},
1451
+ ensure_ascii=False, indent=2,
1452
+ ))
1453
+ return 2
1454
+ if not args.no_submit and args.answer is None:
1455
+ print(json.dumps(
1456
+ {
1457
+ "ok": False,
1458
+ "error": (
1459
+ "step requires --answer VALUE (use --answer '' to submit an "
1460
+ "empty value, or --no-submit to peek at the current prompt)"
1461
+ ),
1462
+ },
1463
+ ensure_ascii=False, indent=2,
1464
+ ))
1465
+ return 2
1443
1466
  try:
1444
- if args.answer is None:
1467
+ if args.no_submit:
1445
1468
  result = {"echo": "", "next": next_prompt(state).to_json()}
1446
1469
  else:
1447
1470
  result = submit(state, args.answer)
@@ -18,18 +18,21 @@ def usage_block(totals: dict, source: str, note: str | None = None) -> dict:
18
18
  "source": source,
19
19
  "collectedAt": utc_now(),
20
20
  }
21
- for key in ("cacheCreationTokens", "cacheReadTokens", "cachedInputTokens",
21
+ for key in ("cacheCreationTokens", "cacheCreation5mTokens", "cacheCreation1hTokens",
22
+ "cacheReadTokens", "cachedInputTokens",
22
23
  "reasoningOutputTokens", "cachedTokens", "thoughtsTokens", "toolTokens"):
23
24
  if totals.get(key):
24
25
  block[key] = totals[key]
25
26
 
26
27
  # Billable-equivalent + cost.
27
28
  if source == "claude-jsonl":
29
+ cc_1h = totals.get("cacheCreation1hTokens", 0) or 0
28
30
  be = claude_billable_equivalent(
29
31
  totals.get("inputTokens", 0) or 0,
30
32
  totals.get("cacheCreationTokens", 0) or 0,
31
33
  totals.get("cacheReadTokens", 0) or 0,
32
34
  totals.get("outputTokens", 0) or 0,
35
+ cache_create_1h_t=cc_1h,
33
36
  )
34
37
  block["billableEquivalentTokens"] = be
35
38
  cost = claude_cost_usd(
@@ -38,6 +41,7 @@ def usage_block(totals: dict, source: str, note: str | None = None) -> dict:
38
41
  totals.get("cacheCreationTokens", 0) or 0,
39
42
  totals.get("cacheReadTokens", 0) or 0,
40
43
  totals.get("outputTokens", 0) or 0,
44
+ cache_create_1h_t=cc_1h,
41
45
  )
42
46
  if cost is not None:
43
47
  block["estimatedCostUsd"] = cost
@@ -10,6 +10,7 @@ from .paths import claude_project_dir
10
10
  def claude_session_totals(jsonl_path: Path) -> dict:
11
11
  """Return totals + agentName + assistant model + time window for a Claude session jsonl."""
12
12
  input_t = output_t = cache_create_t = cache_read_t = 0
13
+ cache_create_5m_t = cache_create_1h_t = 0
13
14
  tool_uses = 0
14
15
  agent_name: str | None = None
15
16
  model: str | None = None
@@ -23,8 +24,20 @@ def claude_session_totals(jsonl_path: Path) -> dict:
23
24
  if usage:
24
25
  input_t += usage.get("input_tokens", 0) or 0
25
26
  output_t += usage.get("output_tokens", 0) or 0
26
- cache_create_t += usage.get("cache_creation_input_tokens", 0) or 0
27
+ cc_total = usage.get("cache_creation_input_tokens", 0) or 0
28
+ cache_create_t += cc_total
27
29
  cache_read_t += usage.get("cache_read_input_tokens", 0) or 0
30
+ # Split into 5m / 1h ephemeral tiers when the API breakdown is
31
+ # present. If only the aggregate is given, attribute all of it to
32
+ # the 5m tier (1.25x — the cheaper assumption, matches prior
33
+ # behavior).
34
+ cc_break = usage.get("cache_creation") or {}
35
+ if isinstance(cc_break, dict) and (cc_break.get("ephemeral_5m_input_tokens") is not None
36
+ or cc_break.get("ephemeral_1h_input_tokens") is not None):
37
+ cache_create_5m_t += cc_break.get("ephemeral_5m_input_tokens", 0) or 0
38
+ cache_create_1h_t += cc_break.get("ephemeral_1h_input_tokens", 0) or 0
39
+ else:
40
+ cache_create_5m_t += cc_total
28
41
  if rec.get("type") == "assistant":
29
42
  if model is None and msg.get("model"):
30
43
  model = msg["model"]
@@ -51,6 +64,8 @@ def claude_session_totals(jsonl_path: Path) -> dict:
51
64
  "inputTokens": input_t,
52
65
  "outputTokens": output_t,
53
66
  "cacheCreationTokens": cache_create_t,
67
+ "cacheCreation5mTokens": cache_create_5m_t,
68
+ "cacheCreation1hTokens": cache_create_1h_t,
54
69
  "cacheReadTokens": cache_read_t,
55
70
  "toolUses": tool_uses,
56
71
  "durationMs": duration_ms,
@@ -6,7 +6,7 @@ import json
6
6
  import sys
7
7
  from pathlib import Path
8
8
  from .collect import collect
9
- from .report import substitute_final_report
9
+ from .report import SubstituteRefusedError, substitute_final_report
10
10
 
11
11
 
12
12
  def main() -> int:
@@ -60,7 +60,14 @@ def main() -> int:
60
60
  print(f"sessions={s.get('sessionsFound', 0)} team={s.get('teamName', '')}", file=sys.stderr)
61
61
 
62
62
  if args.substitute_final_report is not None:
63
- replaced = substitute_final_report(args.substitute_final_report, updated)
63
+ try:
64
+ replaced = substitute_final_report(args.substitute_final_report, updated)
65
+ except SubstituteRefusedError as exc:
66
+ print(
67
+ f"final-report substitution REFUSED: {exc}",
68
+ file=sys.stderr,
69
+ )
70
+ return 2
64
71
  if replaced < 0:
65
72
  print(
66
73
  f"final-report substitution skipped: file not found at {args.substitute_final_report}",
@@ -54,14 +54,16 @@ def _aggregate_totals(items: list[dict]) -> dict:
54
54
  """
55
55
  aggregate: dict = {
56
56
  "totalTokens": 0, "inputTokens": 0, "outputTokens": 0,
57
- "cacheCreationTokens": 0, "cacheReadTokens": 0,
57
+ "cacheCreationTokens": 0, "cacheCreation5mTokens": 0, "cacheCreation1hTokens": 0,
58
+ "cacheReadTokens": 0,
58
59
  "toolUses": 0, "durationMs": 0,
59
60
  "agentName": None, "model": None,
60
61
  "startedAt": None, "endedAt": None,
61
62
  }
62
63
  for t in items:
63
64
  for k in ("totalTokens", "inputTokens", "outputTokens",
64
- "cacheCreationTokens", "cacheReadTokens", "toolUses"):
65
+ "cacheCreationTokens", "cacheCreation5mTokens", "cacheCreation1hTokens",
66
+ "cacheReadTokens", "toolUses"):
65
67
  aggregate[k] += t.get(k, 0) or 0
66
68
  if aggregate["agentName"] is None and t.get("agentName"):
67
69
  aggregate["agentName"] = t["agentName"]
@@ -210,6 +212,17 @@ def collect(team_state_path: Path, project_root: Path | None = None) -> dict:
210
212
  worker_billable = sum((w.get("usage") or {}).get("billableEquivalentTokens", 0) or 0 for w in workers)
211
213
  worker_cost = sum((w.get("usage") or {}).get("estimatedCostUsd", 0) or 0 for w in workers)
212
214
  cli_cost = sum((w.get("usage") or {}).get("cliEstimatedCostUsd", 0) or 0 for w in workers)
215
+
216
+ # Surface models whose pricing lookup failed so the silent-zero case is visible.
217
+ unmatched_models: list[str] = []
218
+ if lead.get("model") and lead.get("estimatedCostUsd") is None and (lead.get("totalTokens") or 0) > 0:
219
+ unmatched_models.append(lead["model"])
220
+ for w in workers:
221
+ u = w.get("usage") or {}
222
+ if u.get("model") and u.get("estimatedCostUsd") is None and (u.get("totalTokens") or 0) > 0:
223
+ unmatched_models.append(u["model"])
224
+ if u.get("cliModel") and u.get("cliEstimatedCostUsd") is None and (u.get("cliTotalTokens") or 0) > 0:
225
+ unmatched_models.append(u["cliModel"])
213
226
  state["usageSummary"] = {
214
227
  "leadTotalTokens": lead_total,
215
228
  "workerTotalTokens": worker_total,
@@ -226,9 +239,10 @@ def collect(team_state_path: Path, project_root: Path | None = None) -> dict:
226
239
  "collectedAt": utc_now(),
227
240
  "teamName": team_name,
228
241
  "sessionsFound": len(claude_sessions),
242
+ "unmatchedModels": sorted(set(unmatched_models)),
229
243
  "definitions": {
230
244
  "totalTokens": "Sum of input + output + cache_creation + cache_read tokens (raw processed volume; matches Anthropic API breakdown). Cache reads are 95%+ in long sessions.",
231
- "billableEquivalentTokens": "Tokens normalized to base-input-price units (cache_creation x1.25, cache_read x0.1, output x5). Useful when comparing sessions across models or to gauge cost.",
245
+ "billableEquivalentTokens": "Tokens normalized to base-input-price units (cache_creation_5m x1.25, cache_creation_1h x2.0, cache_read x0.1, output x5). 5m vs 1h is split from usage.cache_creation when the API breakdown is present; otherwise all cache_creation falls into 5m.",
232
246
  "estimatedCostUsd": "USD cost using public list pricing for the model recorded in the session. cliWorkers covers Codex/Gemini CLI calls billed under those providers.",
233
247
  },
234
248
  }
@@ -1,31 +1,131 @@
1
- """Public list pricing tables and per-provider cost helpers."""
1
+ """Public list pricing tables and per-provider cost helpers.
2
+
3
+ Pricing is matched by substring against the model id recorded in the session
4
+ transcript, so keys must reflect the *actual* model id form emitted by each
5
+ provider:
6
+
7
+ * Anthropic — `claude-opus-4-*`, `claude-sonnet-4-*`, `claude-haiku-4-5-*`,
8
+ `claude-3-5-sonnet-*`, `claude-3-5-haiku-*`, `claude-3-opus-*`,
9
+ `claude-3-haiku-*`.
10
+ * OpenAI / Codex — `gpt-5*`, `gpt-4o*`, `gpt-4*`.
11
+ * Google / Gemini — `gemini-2.5-pro*`, `gemini-2.5-flash*`, `gemini-2.0-flash*`.
12
+
13
+ Insertion order is the match order, so list more specific keys first. Update
14
+ when providers change list pricing.
15
+
16
+ Sources (last verified 2026-05-17, public list prices, USD per 1M tokens):
17
+ * Anthropic: https://www.anthropic.com/pricing
18
+ * OpenAI: https://openai.com/api/pricing
19
+ * Google: https://ai.google.dev/gemini-api/docs/pricing
20
+ """
2
21
  from __future__ import annotations
3
22
 
4
23
 
5
- # Public list pricing (USD per 1M tokens). Used for cost estimation only.
6
- # Update when Anthropic / OpenAI / Google change pricing.
7
- # Anthropic billing ratios relative to base input: cache_creation=1.25x, cache_read=0.1x, output=5x.
24
+ # Anthropic billing ratios relative to base input: cache_creation (5m) = 1.25x,
25
+ # cache_creation (1h) = 2x, cache_read = 0.1x, output = 5x. The CLAUDE_PRICING
26
+ # entries below carry the 5m tier; the 1h price is derived as base_input * 2x
27
+ # at call time so the table stays compact.
8
28
  CLAUDE_PRICING = {
9
- # model substring -> (input, cache_creation, cache_read, output) USD/1M
10
- "opus-4": (15.0, 18.75, 1.50, 75.0),
11
- "sonnet-4": (3.0, 3.75, 0.30, 15.0),
12
- "haiku-4": (1.0, 1.25, 0.10, 5.0),
13
- "opus-3": (15.0, 18.75, 1.50, 75.0),
14
- "sonnet-3": (3.0, 3.75, 0.30, 15.0),
15
- "haiku-3": (0.80, 1.0, 0.08, 4.0),
29
+ # model substring -> (input, cache_creation_5m, cache_read, output) USD/1M.
30
+ #
31
+ # Order matters — list more specific keys (e.g. `opus-4-7`, `3-7-sonnet`)
32
+ # before the family fallbacks (`opus-4`, `3-5-sonnet`).
33
+ #
34
+ # For the newer 4.x point releases (Opus 4.7, Sonnet 4.6, Haiku 4.5),
35
+ # Anthropic's public price page only lists input/output. Cache-write and
36
+ # cache-read are filled in using Anthropic's published billing ratios
37
+ # (5m cache_creation = 1.25x input, cache_read = 0.1x input), which have
38
+ # been consistent across the Claude 3 / 4 families.
39
+
40
+ # Claude 3 series (legacy).
41
+ "3-7-sonnet": (3.0, 3.75, 0.30, 15.0), # Sonnet 3.7
42
+ "3-5-sonnet": (3.0, 3.75, 0.30, 15.0), # Sonnet 3.5
43
+ "3-5-haiku": (0.80, 1.0, 0.08, 4.0), # Haiku 3.5
44
+ "3-opus": (15.0, 18.75, 1.50, 75.0), # Opus 3
45
+ "3-sonnet": (3.0, 3.75, 0.30, 15.0), # legacy 3 Sonnet
46
+ "3-haiku": (0.25, 0.30, 0.03, 1.25), # Haiku 3
47
+
48
+ # Claude 4 point releases (explicit so future divergence is easy to see).
49
+ "opus-4-7": (5.0, 6.25, 0.50, 25.0), # Opus 4.7 (cache prices derived from ratios)
50
+ "sonnet-4-6": (3.0, 3.75, 0.30, 15.0), # Sonnet 4.6 (cache prices derived from ratios)
51
+ "haiku-4-5": (1.0, 1.25, 0.10, 5.0), # Haiku 4.5 (cache prices derived from ratios)
52
+
53
+ # Claude 4 family fallbacks (Opus 4 / Sonnet 4 / Haiku 4 base).
54
+ "opus-4": (15.0, 18.75, 1.50, 75.0),
55
+ "sonnet-4": (3.0, 3.75, 0.30, 15.0),
56
+ "haiku-4": (1.0, 1.25, 0.10, 5.0),
16
57
  }
17
58
 
59
+ # Anthropic 1h ephemeral cache_creation multiplier on the base input rate.
60
+ CLAUDE_CACHE_CREATE_1H_MULT = 2.0
61
+
18
62
  CODEX_PRICING = {
19
- # model substring -> (input USD/1M, cached_input USD/1M, output USD/1M)
20
- "gpt-5": (1.25, 0.125, 10.0),
21
- "gpt-4": (2.50, 0.625, 10.0),
63
+ # model substring -> (input USD/1M, cached_input USD/1M, output USD/1M).
64
+ # IMPORTANT: substring match order is insertion order. List the most
65
+ # specific keys first (e.g. `gpt-5-mini` before `gpt-5`, `o3-mini` before
66
+ # `o3`, `gpt-4o-mini` before `gpt-4o`, `gpt-4o` before the legacy `gpt-4`).
67
+ # For models with no published cached-input rate (o1-pro, o3-pro), cached
68
+ # is set equal to input as a conservative no-discount default.
69
+
70
+ # GPT-5 series.
71
+ "gpt-5.5": (5.00, 0.50, 30.0),
72
+ "gpt-5.4-mini": (0.75, 0.075, 4.50),
73
+ "gpt-5.4": (2.50, 0.25, 15.0),
74
+ "gpt-5.2-pro": (21.0, 2.10, 168.0),
75
+ "gpt-5.2": (1.75, 0.175, 14.0),
76
+ "gpt-5.1": (1.25, 0.125, 10.0),
77
+ "gpt-5-mini": (0.25, 0.025, 2.00),
78
+ "gpt-5-nano": (0.05, 0.005, 0.40),
79
+ "gpt-5": (1.25, 0.125, 10.0), # base GPT-5 (also matches gpt-5-codex)
80
+
81
+ # O-series reasoning models.
82
+ "o1-pro": (150.0, 150.0, 600.0), # no cached rate published
83
+ "o3-pro": (20.0, 20.0, 80.0), # no cached rate published
84
+ "o4-mini": (1.10, 0.275, 4.40),
85
+ "o3-mini": (1.10, 0.275, 4.40),
86
+ "o1": (15.0, 7.50, 60.0),
87
+ "o3": (2.00, 1.00, 8.00),
88
+
89
+ # GPT-4 series.
90
+ "gpt-4.1-nano": (0.10, 0.01, 0.40),
91
+ "gpt-4.1-mini": (0.40, 0.04, 1.60),
92
+ "gpt-4.1": (2.00, 0.20, 8.00),
93
+ "gpt-4o-mini": (0.15, 0.075, 0.60),
94
+ "gpt-4o": (2.50, 1.25, 10.0),
95
+ "gpt-4": (2.50, 0.625, 10.0), # legacy gpt-4 fallback
22
96
  }
23
97
 
24
98
  GEMINI_PRICING = {
25
- # model substring -> (input USD/1M, output USD/1M); cached not separately priced for short runs
26
- "pro": (1.25, 5.0),
27
- "flash": (0.075, 0.30),
28
- "auto": (1.25, 5.0), # treat unknown as pro
99
+ # model substring -> (input USD/1M, output USD/1M).
100
+ #
101
+ # Cached-input prices exist for some models but are not separately priced
102
+ # here because the Gemini transcript collector does not yet record cached
103
+ # input tokens. Models with two-tier context pricing (Gemini 2.5 Pro,
104
+ # Gemini 3.1 Pro) are charged at the ≤200K rate; runs above 200K input
105
+ # will be slightly undercounted.
106
+ #
107
+ # Both dotted (`gemini-3.1-pro`) and hyphenated (`gemini-3-1-pro`) id
108
+ # forms appear in the wild, so include both for the new 3.x families.
109
+
110
+ # Gemini 3 series (preview).
111
+ "3.1-pro": (2.00, 12.0),
112
+ "3-1-pro": (2.00, 12.0),
113
+ "3-flash": (0.50, 3.00),
114
+
115
+ # Gemini 2.5 series.
116
+ "2.5-flash-lite": (0.10, 0.40),
117
+ "2.5-flash": (0.30, 2.50),
118
+ "2.5-pro": (1.25, 10.0),
119
+
120
+ # Gemini 2.0 series.
121
+ "2.0-flash-lite": (0.075, 0.30),
122
+ "2.0-flash": (0.10, 0.40),
123
+
124
+ # Fallbacks for unspecified family names.
125
+ "flash-lite": (0.10, 0.40), # assume 2.5 Flash-Lite
126
+ "pro": (1.25, 10.0), # assume 2.5 Pro
127
+ "flash": (0.30, 2.50), # assume 2.5 Flash
128
+ "auto": (1.25, 10.0), # treat unknown/auto as 2.5 Pro
29
129
  }
30
130
 
31
131
 
@@ -39,17 +139,53 @@ def _match_pricing(model: str | None, table: dict) -> tuple | None:
39
139
  return None
40
140
 
41
141
 
42
- def claude_billable_equivalent(input_t: int, cache_create_t: int, cache_read_t: int, output_t: int) -> int:
43
- """Sum normalized to base-input units (cache_creation 1.25x, cache_read 0.1x, output 5x)."""
44
- return int(round(input_t + 1.25 * cache_create_t + 0.1 * cache_read_t + 5.0 * output_t))
142
+ def claude_billable_equivalent(
143
+ input_t: int,
144
+ cache_create_t: int,
145
+ cache_read_t: int,
146
+ output_t: int,
147
+ cache_create_1h_t: int = 0,
148
+ ) -> int:
149
+ """Sum normalized to base-input units.
45
150
 
151
+ Ratios: cache_creation_5m=1.25x, cache_creation_1h=2x, cache_read=0.1x,
152
+ output=5x. `cache_create_t` is the total cache_creation tokens; pass the
153
+ 1h portion separately via `cache_create_1h_t` so the 5m vs 1h tiers are
154
+ weighted correctly (the 5m portion is the difference).
155
+ """
156
+ cc_1h = max(0, cache_create_1h_t)
157
+ cc_5m = max(0, cache_create_t - cc_1h)
158
+ return int(round(
159
+ input_t
160
+ + 1.25 * cc_5m
161
+ + CLAUDE_CACHE_CREATE_1H_MULT * cc_1h
162
+ + 0.1 * cache_read_t
163
+ + 5.0 * output_t
164
+ ))
46
165
 
47
- def claude_cost_usd(model: str | None, input_t: int, cache_create_t: int, cache_read_t: int, output_t: int) -> float | None:
166
+
167
+ def claude_cost_usd(
168
+ model: str | None,
169
+ input_t: int,
170
+ cache_create_t: int,
171
+ cache_read_t: int,
172
+ output_t: int,
173
+ cache_create_1h_t: int = 0,
174
+ ) -> float | None:
48
175
  p = _match_pricing(model, CLAUDE_PRICING)
49
176
  if p is None:
50
177
  return None
51
178
  pi, pcc, pcr, po = p
52
- return round((input_t * pi + cache_create_t * pcc + cache_read_t * pcr + output_t * po) / 1_000_000, 4)
179
+ cc_1h = max(0, cache_create_1h_t)
180
+ cc_5m = max(0, cache_create_t - cc_1h)
181
+ pcc_1h = pi * CLAUDE_CACHE_CREATE_1H_MULT
182
+ return round((
183
+ input_t * pi
184
+ + cc_5m * pcc
185
+ + cc_1h * pcc_1h
186
+ + cache_read_t * pcr
187
+ + output_t * po
188
+ ) / 1_000_000, 4)
53
189
 
54
190
 
55
191
  def codex_cost_usd(model: str | None, input_t: int, cached_input_t: int, output_t: int) -> float | None:
@@ -68,4 +204,3 @@ def gemini_cost_usd(model: str | None, input_t: int, output_t: int) -> float | N
68
204
  return None
69
205
  pi, po = p
70
206
  return round((input_t * pi + output_t * po) / 1_000_000, 4)
71
-
@@ -18,19 +18,48 @@ def _format_usd(v) -> str:
18
18
  return "$0.00"
19
19
 
20
20
 
21
+ class SubstituteRefusedError(RuntimeError):
22
+ """Raised when substitution would write a zero-only Token Usage Summary.
23
+
24
+ Shipping `0` / `$0.00` in the Lead / Worker / Grand rows is the
25
+ observed silent-failure mode where the collector ran but every
26
+ session jsonl was empty (or the writer fabricated zeros). The
27
+ validator catches it post-hoc, but raising here at the substitution
28
+ boundary surfaces the failure at the exact step where it can still
29
+ be retried with a re-collection.
30
+ """
31
+
32
+
21
33
  def substitute_final_report(report_path: Path, state: dict) -> int:
22
34
  """Replace token-usage placeholders in the final report file with concrete
23
35
  values from the freshly computed usageSummary.
24
36
 
25
37
  Returns the number of placeholder occurrences replaced. If the report file
26
- does not exist, returns -1 without raising. If any required placeholder is
27
- still present after substitution attempts (e.g. usageSummary missing), the
28
- function still writes what it can and returns the count.
38
+ does not exist, returns -1 without raising.
39
+
40
+ Raises ``SubstituteRefusedError`` when ``usageSummary.grandTotalTokens``
41
+ is zero — substituting zeros into the report bakes in the most common
42
+ silent failure mode (collector ran but found nothing). Callers that
43
+ want to suppress the refusal (e.g. unit-test fixtures) can pass a
44
+ summary with ``grandTotalTokens`` > 0 or remove the summary entirely
45
+ so substitution is skipped.
29
46
  """
30
47
  if not report_path.is_file():
31
48
  return -1
32
49
 
33
50
  summary = state.get("usageSummary") or {}
51
+ grand_total = summary.get("grandTotalTokens", 0)
52
+ if isinstance(grand_total, (int, float)) and grand_total == 0 and summary:
53
+ raise SubstituteRefusedError(
54
+ "Refusing to substitute zero-only usageSummary into the final "
55
+ f"report at {report_path}. grandTotalTokens=0 means the "
56
+ "collector ran but every session jsonl was empty (or absent). "
57
+ "Re-run `python3 scripts/okstra-token-usage.py <team-state> "
58
+ "--write --summary --substitute-final-report <report-path>` "
59
+ "after locating the missing session jsonls. To intentionally "
60
+ "ship zeros (test fixtures only), omit `usageSummary` from the "
61
+ "team-state JSON before calling substitute_final_report."
62
+ )
34
63
  cost = summary.get("estimatedCostUsd") or {}
35
64
  lead_cost = cost.get("lead") or 0
36
65
  worker_cost = cost.get("claudeWorkers") or 0