okstra 0.25.1 → 0.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.kr.md +16 -0
  2. package/README.md +16 -0
  3. package/docs/kr/architecture.md +3 -7
  4. package/docs/kr/cli.md +47 -4
  5. package/docs/kr/performance-improvement-plan-v2.md +23 -0
  6. package/docs/kr/performance-improvement-plan.md +22 -0
  7. package/docs/superpowers/specs/2026-05-15-implementation-plan-verification-design.md +254 -0
  8. package/package.json +1 -1
  9. package/runtime/BUILD.json +2 -2
  10. package/runtime/agents/SKILL.md +30 -2
  11. package/runtime/bin/okstra.sh +1 -1
  12. package/runtime/prompts/profiles/_common-contract.md +30 -1
  13. package/runtime/prompts/profiles/error-analysis.md +12 -0
  14. package/runtime/prompts/profiles/implementation-planning.md +23 -0
  15. package/runtime/prompts/profiles/requirements-discovery.md +20 -0
  16. package/runtime/python/lib/okstra/cli.sh +8 -7
  17. package/runtime/python/lib/okstra/globals.sh +3 -1
  18. package/runtime/python/lib/okstra/usage.sh +8 -4
  19. package/runtime/python/okstra_ctl/render.py +35 -0
  20. package/runtime/python/okstra_ctl/run.py +27 -6
  21. package/runtime/python/okstra_ctl/run_context.py +1 -1
  22. package/runtime/python/okstra_ctl/wizard.py +259 -10
  23. package/runtime/python/okstra_token_usage/blocks.py +5 -1
  24. package/runtime/python/okstra_token_usage/claude.py +16 -1
  25. package/runtime/python/okstra_token_usage/collect.py +17 -3
  26. package/runtime/python/okstra_token_usage/pricing.py +159 -24
  27. package/runtime/skills/okstra-brief/SKILL.md +532 -65
  28. package/runtime/skills/okstra-context-loader/SKILL.md +25 -11
  29. package/runtime/skills/okstra-convergence/SKILL.md +235 -8
  30. package/runtime/skills/okstra-history/SKILL.md +68 -37
  31. package/runtime/skills/okstra-logs/SKILL.md +26 -4
  32. package/runtime/skills/okstra-report-finder/SKILL.md +49 -22
  33. package/runtime/skills/okstra-report-writer/SKILL.md +59 -64
  34. package/runtime/skills/okstra-run/SKILL.md +53 -39
  35. package/runtime/skills/okstra-schedule/SKILL.md +51 -20
  36. package/runtime/skills/okstra-setup/SKILL.md +31 -12
  37. package/runtime/skills/okstra-status/SKILL.md +20 -8
  38. package/runtime/skills/okstra-team-contract/SKILL.md +27 -15
  39. package/runtime/skills/okstra-time-summary/SKILL.md +53 -16
  40. package/runtime/templates/reports/final-report.template.md +34 -0
  41. package/runtime/templates/reports/settings.template.json +7 -4
  42. package/runtime/validators/lib/fixtures.sh +10 -2
  43. package/runtime/validators/lib/validate-assets.sh +50 -24
  44. package/runtime/validators/validate-brief.py +385 -0
  45. package/runtime/validators/validate-brief.sh +35 -0
  46. package/runtime/validators/validate-run.py +71 -0
  47. package/runtime/validators/validate-workflow.sh +7 -33
  48. package/src/wizard.mjs +21 -5
@@ -1,31 +1,131 @@
1
- """Public list pricing tables and per-provider cost helpers."""
1
+ """Public list pricing tables and per-provider cost helpers.
2
+
3
+ Pricing is matched by substring against the model id recorded in the session
4
+ transcript, so keys must reflect the *actual* model id form emitted by each
5
+ provider:
6
+
7
+ * Anthropic — `claude-opus-4-*`, `claude-sonnet-4-*`, `claude-haiku-4-5-*`,
8
+ `claude-3-5-sonnet-*`, `claude-3-5-haiku-*`, `claude-3-opus-*`,
9
+ `claude-3-haiku-*`.
10
+ * OpenAI / Codex — `gpt-5*`, `gpt-4o*`, `gpt-4*`.
11
+ * Google / Gemini — `gemini-2.5-pro*`, `gemini-2.5-flash*`, `gemini-2.0-flash*`.
12
+
13
+ Insertion order is the match order, so list more specific keys first. Update
14
+ when providers change list pricing.
15
+
16
+ Sources (last verified 2026-05-17, public list prices, USD per 1M tokens):
17
+ * Anthropic: https://www.anthropic.com/pricing
18
+ * OpenAI: https://openai.com/api/pricing
19
+ * Google: https://ai.google.dev/gemini-api/docs/pricing
20
+ """
2
21
  from __future__ import annotations
3
22
 
4
23
 
5
- # Public list pricing (USD per 1M tokens). Used for cost estimation only.
6
- # Update when Anthropic / OpenAI / Google change pricing.
7
- # Anthropic billing ratios relative to base input: cache_creation=1.25x, cache_read=0.1x, output=5x.
24
+ # Anthropic billing ratios relative to base input: cache_creation (5m) = 1.25x,
25
+ # cache_creation (1h) = 2x, cache_read = 0.1x, output = 5x. The CLAUDE_PRICING
26
+ # entries below carry the 5m tier; the 1h price is derived as base_input * 2x
27
+ # at call time so the table stays compact.
8
28
  CLAUDE_PRICING = {
9
- # model substring -> (input, cache_creation, cache_read, output) USD/1M
10
- "opus-4": (15.0, 18.75, 1.50, 75.0),
11
- "sonnet-4": (3.0, 3.75, 0.30, 15.0),
12
- "haiku-4": (1.0, 1.25, 0.10, 5.0),
13
- "opus-3": (15.0, 18.75, 1.50, 75.0),
14
- "sonnet-3": (3.0, 3.75, 0.30, 15.0),
15
- "haiku-3": (0.80, 1.0, 0.08, 4.0),
29
+ # model substring -> (input, cache_creation_5m, cache_read, output) USD/1M.
30
+ #
31
+ # Order matters — list more specific keys (e.g. `opus-4-7`, `3-7-sonnet`)
32
+ # before the family fallbacks (`opus-4`, `3-5-sonnet`).
33
+ #
34
+ # For the newer 4.x point releases (Opus 4.7, Sonnet 4.6, Haiku 4.5),
35
+ # Anthropic's public price page only lists input/output. Cache-write and
36
+ # cache-read are filled in using Anthropic's published billing ratios
37
+ # (5m cache_creation = 1.25x input, cache_read = 0.1x input), which have
38
+ # been consistent across the Claude 3 / 4 families.
39
+
40
+ # Claude 3 series (legacy).
41
+ "3-7-sonnet": (3.0, 3.75, 0.30, 15.0), # Sonnet 3.7
42
+ "3-5-sonnet": (3.0, 3.75, 0.30, 15.0), # Sonnet 3.5
43
+ "3-5-haiku": (0.80, 1.0, 0.08, 4.0), # Haiku 3.5
44
+ "3-opus": (15.0, 18.75, 1.50, 75.0), # Opus 3
45
+ "3-sonnet": (3.0, 3.75, 0.30, 15.0), # legacy 3 Sonnet
46
+ "3-haiku": (0.25, 0.30, 0.03, 1.25), # Haiku 3
47
+
48
+ # Claude 4 point releases (explicit so future divergence is easy to see).
49
+ "opus-4-7": (5.0, 6.25, 0.50, 25.0), # Opus 4.7 (cache prices derived from ratios)
50
+ "sonnet-4-6": (3.0, 3.75, 0.30, 15.0), # Sonnet 4.6 (cache prices derived from ratios)
51
+ "haiku-4-5": (1.0, 1.25, 0.10, 5.0), # Haiku 4.5 (cache prices derived from ratios)
52
+
53
+ # Claude 4 family fallbacks (Opus 4 / Sonnet 4 / Haiku 4 base).
54
+ "opus-4": (15.0, 18.75, 1.50, 75.0),
55
+ "sonnet-4": (3.0, 3.75, 0.30, 15.0),
56
+ "haiku-4": (1.0, 1.25, 0.10, 5.0),
16
57
  }
17
58
 
59
+ # Anthropic 1h ephemeral cache_creation multiplier on the base input rate.
60
+ CLAUDE_CACHE_CREATE_1H_MULT = 2.0
61
+
18
62
  CODEX_PRICING = {
19
- # model substring -> (input USD/1M, cached_input USD/1M, output USD/1M)
20
- "gpt-5": (1.25, 0.125, 10.0),
21
- "gpt-4": (2.50, 0.625, 10.0),
63
+ # model substring -> (input USD/1M, cached_input USD/1M, output USD/1M).
64
+ # IMPORTANT: substring match order is insertion order. List the most
65
+ # specific keys first (e.g. `gpt-5-mini` before `gpt-5`, `o3-mini` before
66
+ # `o3`, `gpt-4o-mini` before `gpt-4o`, `gpt-4o` before the legacy `gpt-4`).
67
+ # For models with no published cached-input rate (o1-pro, o3-pro), cached
68
+ # is set equal to input as a conservative no-discount default.
69
+
70
+ # GPT-5 series.
71
+ "gpt-5.5": (5.00, 0.50, 30.0),
72
+ "gpt-5.4-mini": (0.75, 0.075, 4.50),
73
+ "gpt-5.4": (2.50, 0.25, 15.0),
74
+ "gpt-5.2-pro": (21.0, 2.10, 168.0),
75
+ "gpt-5.2": (1.75, 0.175, 14.0),
76
+ "gpt-5.1": (1.25, 0.125, 10.0),
77
+ "gpt-5-mini": (0.25, 0.025, 2.00),
78
+ "gpt-5-nano": (0.05, 0.005, 0.40),
79
+ "gpt-5": (1.25, 0.125, 10.0), # base GPT-5 (also matches gpt-5-codex)
80
+
81
+ # O-series reasoning models.
82
+ "o1-pro": (150.0, 150.0, 600.0), # no cached rate published
83
+ "o3-pro": (20.0, 20.0, 80.0), # no cached rate published
84
+ "o4-mini": (1.10, 0.275, 4.40),
85
+ "o3-mini": (1.10, 0.275, 4.40),
86
+ "o1": (15.0, 7.50, 60.0),
87
+ "o3": (2.00, 1.00, 8.00),
88
+
89
+ # GPT-4 series.
90
+ "gpt-4.1-nano": (0.10, 0.01, 0.40),
91
+ "gpt-4.1-mini": (0.40, 0.04, 1.60),
92
+ "gpt-4.1": (2.00, 0.20, 8.00),
93
+ "gpt-4o-mini": (0.15, 0.075, 0.60),
94
+ "gpt-4o": (2.50, 1.25, 10.0),
95
+ "gpt-4": (2.50, 0.625, 10.0), # legacy gpt-4 fallback
22
96
  }
23
97
 
24
98
  GEMINI_PRICING = {
25
- # model substring -> (input USD/1M, output USD/1M); cached not separately priced for short runs
26
- "pro": (1.25, 5.0),
27
- "flash": (0.075, 0.30),
28
- "auto": (1.25, 5.0), # treat unknown as pro
99
+ # model substring -> (input USD/1M, output USD/1M).
100
+ #
101
+ # Cached-input prices exist for some models but are not separately priced
102
+ # here because the Gemini transcript collector does not yet record cached
103
+ # input tokens. Models with two-tier context pricing (Gemini 2.5 Pro,
104
+ # Gemini 3.1 Pro) are charged at the ≤200K rate; runs above 200K input
105
+ # will be slightly undercounted.
106
+ #
107
+ # Both dotted (`gemini-3.1-pro`) and hyphenated (`gemini-3-1-pro`) id
108
+ # forms appear in the wild, so include both for the new 3.x families.
109
+
110
+ # Gemini 3 series (preview).
111
+ "3.1-pro": (2.00, 12.0),
112
+ "3-1-pro": (2.00, 12.0),
113
+ "3-flash": (0.50, 3.00),
114
+
115
+ # Gemini 2.5 series.
116
+ "2.5-flash-lite": (0.10, 0.40),
117
+ "2.5-flash": (0.30, 2.50),
118
+ "2.5-pro": (1.25, 10.0),
119
+
120
+ # Gemini 2.0 series.
121
+ "2.0-flash-lite": (0.075, 0.30),
122
+ "2.0-flash": (0.10, 0.40),
123
+
124
+ # Fallbacks for unspecified family names.
125
+ "flash-lite": (0.10, 0.40), # assume 2.5 Flash-Lite
126
+ "pro": (1.25, 10.0), # assume 2.5 Pro
127
+ "flash": (0.30, 2.50), # assume 2.5 Flash
128
+ "auto": (1.25, 10.0), # treat unknown/auto as 2.5 Pro
29
129
  }
30
130
 
31
131
 
@@ -39,17 +139,53 @@ def _match_pricing(model: str | None, table: dict) -> tuple | None:
39
139
  return None
40
140
 
41
141
 
42
- def claude_billable_equivalent(input_t: int, cache_create_t: int, cache_read_t: int, output_t: int) -> int:
43
- """Sum normalized to base-input units (cache_creation 1.25x, cache_read 0.1x, output 5x)."""
44
- return int(round(input_t + 1.25 * cache_create_t + 0.1 * cache_read_t + 5.0 * output_t))
142
+ def claude_billable_equivalent(
143
+ input_t: int,
144
+ cache_create_t: int,
145
+ cache_read_t: int,
146
+ output_t: int,
147
+ cache_create_1h_t: int = 0,
148
+ ) -> int:
149
+ """Sum normalized to base-input units.
45
150
 
151
+ Ratios: cache_creation_5m=1.25x, cache_creation_1h=2x, cache_read=0.1x,
152
+ output=5x. `cache_create_t` is the total cache_creation tokens; pass the
153
+ 1h portion separately via `cache_create_1h_t` so the 5m vs 1h tiers are
154
+ weighted correctly (the 5m portion is the difference).
155
+ """
156
+ cc_1h = max(0, cache_create_1h_t)
157
+ cc_5m = max(0, cache_create_t - cc_1h)
158
+ return int(round(
159
+ input_t
160
+ + 1.25 * cc_5m
161
+ + CLAUDE_CACHE_CREATE_1H_MULT * cc_1h
162
+ + 0.1 * cache_read_t
163
+ + 5.0 * output_t
164
+ ))
46
165
 
47
- def claude_cost_usd(model: str | None, input_t: int, cache_create_t: int, cache_read_t: int, output_t: int) -> float | None:
166
+
167
+ def claude_cost_usd(
168
+ model: str | None,
169
+ input_t: int,
170
+ cache_create_t: int,
171
+ cache_read_t: int,
172
+ output_t: int,
173
+ cache_create_1h_t: int = 0,
174
+ ) -> float | None:
48
175
  p = _match_pricing(model, CLAUDE_PRICING)
49
176
  if p is None:
50
177
  return None
51
178
  pi, pcc, pcr, po = p
52
- return round((input_t * pi + cache_create_t * pcc + cache_read_t * pcr + output_t * po) / 1_000_000, 4)
179
+ cc_1h = max(0, cache_create_1h_t)
180
+ cc_5m = max(0, cache_create_t - cc_1h)
181
+ pcc_1h = pi * CLAUDE_CACHE_CREATE_1H_MULT
182
+ return round((
183
+ input_t * pi
184
+ + cc_5m * pcc
185
+ + cc_1h * pcc_1h
186
+ + cache_read_t * pcr
187
+ + output_t * po
188
+ ) / 1_000_000, 4)
53
189
 
54
190
 
55
191
  def codex_cost_usd(model: str | None, input_t: int, cached_input_t: int, output_t: int) -> float | None:
@@ -68,4 +204,3 @@ def gemini_cost_usd(model: str | None, input_t: int, output_t: int) -> float | N
68
204
  return None
69
205
  pi, po = p
70
206
  return round((input_t * pi + output_t * po) / 1_000_000, 4)
71
-