llm_cost_tracker 0.7.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +31 -0
  3. data/README.md +21 -16
  4. data/app/assets/llm_cost_tracker/application.css +3 -0
  5. data/app/controllers/llm_cost_tracker/application_controller.rb +22 -4
  6. data/app/controllers/llm_cost_tracker/calls_controller.rb +6 -11
  7. data/app/controllers/llm_cost_tracker/dashboard_controller.rb +2 -1
  8. data/app/controllers/llm_cost_tracker/data_quality_controller.rb +5 -1
  9. data/app/controllers/llm_cost_tracker/models_controller.rb +0 -1
  10. data/app/controllers/llm_cost_tracker/tags_controller.rb +1 -8
  11. data/app/helpers/llm_cost_tracker/application_helper.rb +2 -1
  12. data/app/helpers/llm_cost_tracker/dashboard_filter_helper.rb +1 -2
  13. data/app/helpers/llm_cost_tracker/dashboard_filter_options_helper.rb +1 -1
  14. data/app/helpers/llm_cost_tracker/dashboard_query_helper.rb +10 -27
  15. data/app/helpers/llm_cost_tracker/token_usage_helper.rb +58 -0
  16. data/app/models/llm_cost_tracker/ingestion/event.rb +13 -0
  17. data/app/models/llm_cost_tracker/ingestion/lease.rb +11 -0
  18. data/app/models/llm_cost_tracker/ledger/call.rb +45 -0
  19. data/app/models/llm_cost_tracker/ledger/call_metrics.rb +66 -0
  20. data/app/models/llm_cost_tracker/ledger/period/grouping.rb +71 -0
  21. data/app/models/llm_cost_tracker/ledger/period/total.rb +13 -0
  22. data/app/models/llm_cost_tracker/ledger/tags/accessors.rb +19 -0
  23. data/app/services/llm_cost_tracker/dashboard/data_quality.rb +111 -94
  24. data/app/services/llm_cost_tracker/dashboard/date_range.rb +2 -2
  25. data/app/services/llm_cost_tracker/dashboard/filter.rb +7 -18
  26. data/app/services/llm_cost_tracker/dashboard/overview_stats.rb +58 -67
  27. data/app/services/llm_cost_tracker/dashboard/pagination.rb +59 -0
  28. data/app/services/llm_cost_tracker/dashboard/params.rb +26 -0
  29. data/app/services/llm_cost_tracker/dashboard/provider_breakdown.rb +18 -20
  30. data/app/services/llm_cost_tracker/dashboard/spend_anomaly.rb +4 -13
  31. data/app/services/llm_cost_tracker/dashboard/tag_breakdown.rb +28 -61
  32. data/app/services/llm_cost_tracker/dashboard/tag_key_explorer.rb +8 -21
  33. data/app/services/llm_cost_tracker/dashboard/time_series.rb +1 -1
  34. data/app/services/llm_cost_tracker/dashboard/top_models.rb +12 -47
  35. data/app/views/llm_cost_tracker/calls/index.html.erb +12 -18
  36. data/app/views/llm_cost_tracker/calls/show.html.erb +30 -32
  37. data/app/views/llm_cost_tracker/dashboard/index.html.erb +17 -19
  38. data/app/views/llm_cost_tracker/data_quality/index.html.erb +108 -135
  39. data/app/views/llm_cost_tracker/models/index.html.erb +8 -9
  40. data/app/views/llm_cost_tracker/shared/setup_required.html.erb +13 -2
  41. data/app/views/llm_cost_tracker/tags/show.html.erb +20 -20
  42. data/lib/llm_cost_tracker/budget.rb +8 -20
  43. data/lib/llm_cost_tracker/capture/stream.rb +9 -0
  44. data/lib/llm_cost_tracker/capture/stream_collector.rb +189 -0
  45. data/lib/llm_cost_tracker/{integrations → capture}/stream_tracker.rb +41 -73
  46. data/lib/llm_cost_tracker/configuration/instrumentation.rb +3 -7
  47. data/lib/llm_cost_tracker/configuration.rb +33 -36
  48. data/lib/llm_cost_tracker/doctor/capture_verifier.rb +61 -0
  49. data/lib/llm_cost_tracker/doctor/check.rb +7 -0
  50. data/lib/llm_cost_tracker/doctor/ingestion_check.rb +22 -59
  51. data/lib/llm_cost_tracker/doctor/price_check.rb +60 -0
  52. data/lib/llm_cost_tracker/doctor.rb +63 -71
  53. data/lib/llm_cost_tracker/errors.rb +4 -15
  54. data/lib/llm_cost_tracker/event.rb +6 -6
  55. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_token_usage_generator.rb +42 -0
  56. data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +2 -0
  57. data/lib/llm_cost_tracker/generators/llm_cost_tracker/prices_generator.rb +7 -7
  58. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_period_totals_to_llm_cost_tracker.rb.erb +3 -3
  59. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_token_usage_to_llm_api_calls.rb.erb +22 -0
  60. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +9 -14
  61. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +0 -4
  62. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_cost_precision.rb.erb +12 -1
  63. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +2 -2
  64. data/lib/llm_cost_tracker/{storage/active_record_inbox_batch.rb → ingestion/batch.rb} +21 -20
  65. data/lib/llm_cost_tracker/ingestion/inbox.rb +105 -0
  66. data/lib/llm_cost_tracker/{storage/active_record_ingestor_lease.rb → ingestion/lease_claim.rb} +5 -7
  67. data/lib/llm_cost_tracker/{storage/active_record_ingestor.rb → ingestion/worker.rb} +38 -48
  68. data/lib/llm_cost_tracker/ingestion.rb +129 -0
  69. data/lib/llm_cost_tracker/integrations/anthropic.rb +66 -31
  70. data/lib/llm_cost_tracker/integrations/base.rb +73 -34
  71. data/lib/llm_cost_tracker/integrations/openai.rb +43 -37
  72. data/lib/llm_cost_tracker/integrations/ruby_llm.rb +40 -30
  73. data/lib/llm_cost_tracker/integrations.rb +43 -0
  74. data/lib/llm_cost_tracker/ledger/period/totals.rb +66 -0
  75. data/lib/llm_cost_tracker/{storage/active_record_periods.rb → ledger/period.rb} +2 -2
  76. data/lib/llm_cost_tracker/ledger/rollups/batch.rb +43 -0
  77. data/lib/llm_cost_tracker/ledger/rollups/upsert_sql.rb +46 -0
  78. data/lib/llm_cost_tracker/ledger/rollups.rb +87 -0
  79. data/lib/llm_cost_tracker/ledger/schema/adapter.rb +51 -0
  80. data/lib/llm_cost_tracker/ledger/schema/calls.rb +101 -0
  81. data/lib/llm_cost_tracker/ledger/schema/period_totals.rb +32 -0
  82. data/lib/llm_cost_tracker/ledger/store.rb +60 -0
  83. data/lib/llm_cost_tracker/ledger/tags/query.rb +29 -0
  84. data/lib/llm_cost_tracker/ledger/tags/sql.rb +33 -0
  85. data/lib/llm_cost_tracker/ledger.rb +13 -0
  86. data/lib/llm_cost_tracker/logging.rb +3 -6
  87. data/lib/llm_cost_tracker/middleware/faraday.rb +88 -46
  88. data/lib/llm_cost_tracker/parsers/anthropic.rb +62 -29
  89. data/lib/llm_cost_tracker/parsers/base.rb +12 -21
  90. data/lib/llm_cost_tracker/parsers/gemini.rb +50 -25
  91. data/lib/llm_cost_tracker/parsers/openai.rb +27 -5
  92. data/lib/llm_cost_tracker/parsers/openai_compatible.rb +14 -4
  93. data/lib/llm_cost_tracker/parsers/openai_usage.rb +58 -25
  94. data/lib/llm_cost_tracker/parsers/sse.rb +4 -7
  95. data/lib/llm_cost_tracker/parsers.rb +20 -0
  96. data/lib/llm_cost_tracker/prices.json +361 -36
  97. data/lib/llm_cost_tracker/pricing/components.rb +37 -0
  98. data/lib/llm_cost_tracker/pricing/effective_prices.rb +46 -50
  99. data/lib/llm_cost_tracker/pricing/explainer.rb +25 -30
  100. data/lib/llm_cost_tracker/pricing/lookup.rb +67 -46
  101. data/lib/llm_cost_tracker/pricing/registry.rb +156 -0
  102. data/lib/llm_cost_tracker/pricing/sync/fetcher.rb +107 -0
  103. data/lib/llm_cost_tracker/pricing/sync/registry_diff.rb +53 -0
  104. data/lib/llm_cost_tracker/pricing/sync/registry_loader.rb +63 -0
  105. data/lib/llm_cost_tracker/pricing/sync/registry_writer.rb +31 -0
  106. data/lib/llm_cost_tracker/pricing/sync.rb +159 -0
  107. data/lib/llm_cost_tracker/pricing/unknown.rb +46 -0
  108. data/lib/llm_cost_tracker/pricing.rb +33 -32
  109. data/lib/llm_cost_tracker/railtie.rb +7 -8
  110. data/lib/llm_cost_tracker/report/data.rb +72 -0
  111. data/lib/llm_cost_tracker/report/formatter.rb +69 -0
  112. data/lib/llm_cost_tracker/report.rb +8 -8
  113. data/lib/llm_cost_tracker/retention.rb +27 -10
  114. data/lib/llm_cost_tracker/tags/context.rb +35 -0
  115. data/lib/llm_cost_tracker/tags/key.rb +18 -0
  116. data/lib/llm_cost_tracker/tags/sanitizer.rb +68 -0
  117. data/lib/llm_cost_tracker/token_usage.rb +67 -0
  118. data/lib/llm_cost_tracker/tracker.rb +39 -69
  119. data/lib/llm_cost_tracker/usage_capture.rb +37 -0
  120. data/lib/llm_cost_tracker/version.rb +1 -1
  121. data/lib/llm_cost_tracker.rb +56 -78
  122. data/lib/tasks/llm_cost_tracker.rake +18 -13
  123. metadata +54 -58
  124. data/app/services/llm_cost_tracker/dashboard/data_quality_aggregate.rb +0 -81
  125. data/app/services/llm_cost_tracker/pagination.rb +0 -57
  126. data/lib/llm_cost_tracker/active_record_adapter.rb +0 -53
  127. data/lib/llm_cost_tracker/capture_verifier.rb +0 -64
  128. data/lib/llm_cost_tracker/cost.rb +0 -12
  129. data/lib/llm_cost_tracker/doctor/capture_check.rb +0 -39
  130. data/lib/llm_cost_tracker/event_metadata.rb +0 -52
  131. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_usage_breakdown_generator.rb +0 -29
  132. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_usage_breakdown_to_llm_api_calls.rb.erb +0 -29
  133. data/lib/llm_cost_tracker/inbox_event.rb +0 -9
  134. data/lib/llm_cost_tracker/ingestor_lease.rb +0 -9
  135. data/lib/llm_cost_tracker/integrations/object_reader.rb +0 -56
  136. data/lib/llm_cost_tracker/integrations/registry.rb +0 -71
  137. data/lib/llm_cost_tracker/llm_api_call.rb +0 -60
  138. data/lib/llm_cost_tracker/llm_api_call_metrics.rb +0 -63
  139. data/lib/llm_cost_tracker/parameter_hash.rb +0 -33
  140. data/lib/llm_cost_tracker/parsed_usage.rb +0 -72
  141. data/lib/llm_cost_tracker/parsers/registry.rb +0 -58
  142. data/lib/llm_cost_tracker/period_grouping.rb +0 -67
  143. data/lib/llm_cost_tracker/period_total.rb +0 -9
  144. data/lib/llm_cost_tracker/price_freshness.rb +0 -38
  145. data/lib/llm_cost_tracker/price_registry.rb +0 -144
  146. data/lib/llm_cost_tracker/price_sync/fetcher.rb +0 -104
  147. data/lib/llm_cost_tracker/price_sync/registry_diff.rb +0 -51
  148. data/lib/llm_cost_tracker/price_sync/registry_loader.rb +0 -61
  149. data/lib/llm_cost_tracker/price_sync/registry_writer.rb +0 -29
  150. data/lib/llm_cost_tracker/price_sync.rb +0 -144
  151. data/lib/llm_cost_tracker/report_data.rb +0 -94
  152. data/lib/llm_cost_tracker/report_formatter.rb +0 -67
  153. data/lib/llm_cost_tracker/request_url.rb +0 -20
  154. data/lib/llm_cost_tracker/storage/active_record_backend.rb +0 -167
  155. data/lib/llm_cost_tracker/storage/active_record_connection_cleanup.rb +0 -13
  156. data/lib/llm_cost_tracker/storage/active_record_inbox.rb +0 -160
  157. data/lib/llm_cost_tracker/storage/active_record_period_totals.rb +0 -84
  158. data/lib/llm_cost_tracker/storage/active_record_rollup_batch.rb +0 -41
  159. data/lib/llm_cost_tracker/storage/active_record_rollup_upsert_sql.rb +0 -42
  160. data/lib/llm_cost_tracker/storage/active_record_rollups.rb +0 -146
  161. data/lib/llm_cost_tracker/storage/active_record_store.rb +0 -145
  162. data/lib/llm_cost_tracker/storage/writer.rb +0 -35
  163. data/lib/llm_cost_tracker/stream_capture.rb +0 -7
  164. data/lib/llm_cost_tracker/stream_collector.rb +0 -199
  165. data/lib/llm_cost_tracker/tag_accessors.rb +0 -15
  166. data/lib/llm_cost_tracker/tag_context.rb +0 -52
  167. data/lib/llm_cost_tracker/tag_key.rb +0 -16
  168. data/lib/llm_cost_tracker/tag_query.rb +0 -43
  169. data/lib/llm_cost_tracker/tag_sanitizer.rb +0 -81
  170. data/lib/llm_cost_tracker/tag_sql.rb +0 -34
  171. data/lib/llm_cost_tracker/tags_column.rb +0 -105
  172. data/lib/llm_cost_tracker/unknown_pricing.rb +0 -54
  173. data/lib/llm_cost_tracker/usage_breakdown.rb +0 -30
  174. data/lib/llm_cost_tracker/value_helpers.rb +0 -40
@@ -1,12 +1,16 @@
1
1
  {
2
2
  "metadata": {
3
- "updated_at": "2026-04-27",
3
+ "updated_at": "2026-05-01",
4
4
  "currency": "USD",
5
5
  "unit": "1M tokens",
6
6
  "source_urls": [
7
7
  "https://developers.openai.com/api/docs/pricing",
8
8
  "https://platform.claude.com/docs/en/about-claude/pricing",
9
- "https://ai.google.dev/pricing"
9
+ "https://ai.google.dev/gemini-api/docs/pricing",
10
+ "https://console.groq.com/docs/models",
11
+ "https://console.groq.com/docs/prompt-caching",
12
+ "https://console.groq.com/docs/flex-processing",
13
+ "https://console.groq.com/docs/service-tiers"
10
14
  ],
11
15
  "schema_version": 1,
12
16
  "min_gem_version": "0.4.0"
@@ -17,6 +21,7 @@
17
21
  "output": 5.0,
18
22
  "cache_read_input": 0.1,
19
23
  "cache_write_input": 1.25,
24
+ "cache_write_1h_input": 2.0,
20
25
  "batch_input": 0.5,
21
26
  "batch_output": 2.5
22
27
  },
@@ -25,6 +30,7 @@
25
30
  "output": 75.0,
26
31
  "cache_read_input": 1.5,
27
32
  "cache_write_input": 18.75,
33
+ "cache_write_1h_input": 30.0,
28
34
  "batch_input": 7.5,
29
35
  "batch_output": 37.5
30
36
  },
@@ -33,6 +39,7 @@
33
39
  "output": 75.0,
34
40
  "cache_read_input": 1.5,
35
41
  "cache_write_input": 18.75,
42
+ "cache_write_1h_input": 30.0,
36
43
  "batch_input": 7.5,
37
44
  "batch_output": 37.5
38
45
  },
@@ -41,6 +48,7 @@
41
48
  "output": 25.0,
42
49
  "cache_read_input": 0.5,
43
50
  "cache_write_input": 6.25,
51
+ "cache_write_1h_input": 10.0,
44
52
  "batch_input": 2.5,
45
53
  "batch_output": 12.5
46
54
  },
@@ -49,22 +57,49 @@
49
57
  "output": 25.0,
50
58
  "cache_read_input": 0.5,
51
59
  "cache_write_input": 6.25,
60
+ "cache_write_1h_input": 10.0,
52
61
  "batch_input": 2.5,
53
- "batch_output": 12.5
62
+ "batch_output": 12.5,
63
+ "data_residency_input": 5.5,
64
+ "data_residency_cache_write_input": 6.875,
65
+ "data_residency_cache_write_1h_input": 11.0,
66
+ "data_residency_cache_read_input": 0.55,
67
+ "data_residency_output": 27.5,
68
+ "data_residency_batch_input": 2.75,
69
+ "data_residency_batch_output": 13.75,
70
+ "fast_input": 30.0,
71
+ "fast_cache_write_input": 37.5,
72
+ "fast_cache_write_1h_input": 60.0,
73
+ "fast_cache_read_input": 3.0,
74
+ "fast_output": 150.0,
75
+ "fast_data_residency_input": 33.0,
76
+ "fast_data_residency_cache_write_input": 41.25,
77
+ "fast_data_residency_cache_write_1h_input": 66.0,
78
+ "fast_data_residency_cache_read_input": 3.3,
79
+ "fast_data_residency_output": 165.0
54
80
  },
55
81
  "anthropic/claude-opus-4-7": {
56
82
  "input": 5.0,
57
83
  "output": 25.0,
58
84
  "cache_read_input": 0.5,
59
85
  "cache_write_input": 6.25,
86
+ "cache_write_1h_input": 10.0,
60
87
  "batch_input": 2.5,
61
- "batch_output": 12.5
88
+ "batch_output": 12.5,
89
+ "data_residency_input": 5.5,
90
+ "data_residency_cache_write_input": 6.875,
91
+ "data_residency_cache_write_1h_input": 11.0,
92
+ "data_residency_cache_read_input": 0.55,
93
+ "data_residency_output": 27.5,
94
+ "data_residency_batch_input": 2.75,
95
+ "data_residency_batch_output": 13.75
62
96
  },
63
97
  "anthropic/claude-sonnet-4": {
64
98
  "input": 3.0,
65
99
  "output": 15.0,
66
100
  "cache_read_input": 0.3,
67
101
  "cache_write_input": 3.75,
102
+ "cache_write_1h_input": 6.0,
68
103
  "batch_input": 1.5,
69
104
  "batch_output": 7.5
70
105
  },
@@ -73,6 +108,7 @@
73
108
  "output": 15.0,
74
109
  "cache_read_input": 0.3,
75
110
  "cache_write_input": 3.75,
111
+ "cache_write_1h_input": 6.0,
76
112
  "batch_input": 1.5,
77
113
  "batch_output": 7.5
78
114
  },
@@ -81,15 +117,24 @@
81
117
  "output": 15.0,
82
118
  "cache_read_input": 0.3,
83
119
  "cache_write_input": 3.75,
120
+ "cache_write_1h_input": 6.0,
84
121
  "batch_input": 1.5,
85
- "batch_output": 7.5
122
+ "batch_output": 7.5,
123
+ "data_residency_input": 3.3,
124
+ "data_residency_cache_write_input": 4.125,
125
+ "data_residency_cache_write_1h_input": 6.6,
126
+ "data_residency_cache_read_input": 0.33,
127
+ "data_residency_output": 16.5,
128
+ "data_residency_batch_input": 1.65,
129
+ "data_residency_batch_output": 8.25
86
130
  },
87
131
  "gemini/gemini-2.0-flash": {
88
132
  "input": 0.1,
89
133
  "cache_read_input": 0.025,
90
134
  "output": 0.4,
91
135
  "batch_input": 0.05,
92
- "batch_output": 0.2
136
+ "batch_output": 0.2,
137
+ "batch_cache_read_input": 0.025
93
138
  },
94
139
  "gemini/gemini-2.0-flash-lite": {
95
140
  "input": 0.075,
@@ -101,25 +146,94 @@
101
146
  "input": 0.3,
102
147
  "output": 2.5,
103
148
  "cache_read_input": 0.03,
104
- "cache_write_input": 0.083333333333,
105
149
  "batch_input": 0.15,
106
- "batch_output": 1.25
150
+ "batch_output": 1.25,
151
+ "batch_cache_read_input": 0.03,
152
+ "flex_input": 0.15,
153
+ "flex_output": 1.25,
154
+ "flex_cache_read_input": 0.03,
155
+ "priority_input": 0.54,
156
+ "priority_output": 4.5,
157
+ "priority_cache_read_input": 0.054
107
158
  },
108
159
  "gemini/gemini-2.5-flash-lite": {
109
160
  "input": 0.1,
110
161
  "output": 0.4,
111
162
  "cache_read_input": 0.01,
112
- "cache_write_input": 0.083333333333,
113
163
  "batch_input": 0.05,
114
- "batch_output": 0.2
164
+ "batch_output": 0.2,
165
+ "batch_cache_read_input": 0.01,
166
+ "flex_input": 0.05,
167
+ "flex_output": 0.2,
168
+ "flex_cache_read_input": 0.01,
169
+ "priority_input": 0.18,
170
+ "priority_output": 0.72,
171
+ "priority_cache_read_input": 0.018
115
172
  },
116
173
  "gemini/gemini-2.5-pro": {
117
174
  "input": 1.25,
118
175
  "output": 10.0,
119
176
  "cache_read_input": 0.125,
120
- "cache_write_input": 0.375,
121
177
  "batch_input": 0.625,
122
- "batch_output": 5.0
178
+ "batch_output": 5.0,
179
+ "batch_cache_read_input": 0.125,
180
+ "_context_price_threshold_tokens": 200000,
181
+ "above_context_input": 2.5,
182
+ "above_context_output": 15.0,
183
+ "above_context_cache_read_input": 0.25,
184
+ "above_context_batch_input": 1.25,
185
+ "above_context_batch_output": 7.5,
186
+ "above_context_batch_cache_read_input": 0.25,
187
+ "flex_input": 0.625,
188
+ "flex_output": 5.0,
189
+ "above_context_flex_input": 1.25,
190
+ "above_context_flex_output": 7.5,
191
+ "flex_cache_read_input": 0.125,
192
+ "above_context_flex_cache_read_input": 0.25,
193
+ "priority_input": 2.25,
194
+ "priority_output": 18.0,
195
+ "above_context_priority_input": 4.5,
196
+ "above_context_priority_output": 27.0,
197
+ "priority_cache_read_input": 0.225,
198
+ "above_context_priority_cache_read_input": 0.45
199
+ },
200
+ "groq/llama-3.1-8b-instant": {
201
+ "input": 0.05,
202
+ "output": 0.08,
203
+ "on_demand_input": 0.05,
204
+ "on_demand_output": 0.08,
205
+ "flex_input": 0.05,
206
+ "flex_output": 0.08
207
+ },
208
+ "groq/llama-3.3-70b-versatile": {
209
+ "input": 0.59,
210
+ "output": 0.79,
211
+ "on_demand_input": 0.59,
212
+ "on_demand_output": 0.79,
213
+ "flex_input": 0.59,
214
+ "flex_output": 0.79
215
+ },
216
+ "groq/openai/gpt-oss-120b": {
217
+ "input": 0.15,
218
+ "cache_read_input": 0.075,
219
+ "output": 0.6,
220
+ "on_demand_input": 0.15,
221
+ "on_demand_cache_read_input": 0.075,
222
+ "on_demand_output": 0.6,
223
+ "flex_input": 0.15,
224
+ "flex_cache_read_input": 0.075,
225
+ "flex_output": 0.6
226
+ },
227
+ "groq/openai/gpt-oss-20b": {
228
+ "input": 0.075,
229
+ "cache_read_input": 0.0375,
230
+ "output": 0.3,
231
+ "on_demand_input": 0.075,
232
+ "on_demand_cache_read_input": 0.0375,
233
+ "on_demand_output": 0.3,
234
+ "flex_input": 0.075,
235
+ "flex_cache_read_input": 0.0375,
236
+ "flex_output": 0.3
123
237
  },
124
238
  "openai/gpt-3.5-turbo": {
125
239
  "input": 0.5,
@@ -142,41 +256,58 @@
142
256
  "output": 8.0,
143
257
  "cache_read_input": 0.5,
144
258
  "batch_input": 1.0,
145
- "batch_output": 4.0
259
+ "batch_output": 4.0,
260
+ "priority_input": 3.5,
261
+ "priority_output": 14.0,
262
+ "priority_cache_read_input": 0.875
146
263
  },
147
264
  "openai/gpt-4.1-mini": {
148
265
  "input": 0.4,
149
266
  "output": 1.6,
150
267
  "cache_read_input": 0.1,
151
268
  "batch_input": 0.2,
152
- "batch_output": 0.8
269
+ "batch_output": 0.8,
270
+ "priority_input": 0.7,
271
+ "priority_output": 2.8,
272
+ "priority_cache_read_input": 0.175
153
273
  },
154
274
  "openai/gpt-4.1-nano": {
155
275
  "input": 0.1,
156
276
  "output": 0.4,
157
277
  "cache_read_input": 0.025,
158
278
  "batch_input": 0.05,
159
- "batch_output": 0.2
279
+ "batch_output": 0.2,
280
+ "priority_input": 0.2,
281
+ "priority_output": 0.8,
282
+ "priority_cache_read_input": 0.05
160
283
  },
161
284
  "openai/gpt-4o": {
162
285
  "input": 2.5,
163
286
  "output": 10.0,
164
287
  "cache_read_input": 1.25,
165
288
  "batch_input": 1.25,
166
- "batch_output": 5.0
289
+ "batch_output": 5.0,
290
+ "priority_input": 4.25,
291
+ "priority_output": 17.0,
292
+ "priority_cache_read_input": 2.125
167
293
  },
168
294
  "openai/gpt-4o-2024-05-13": {
169
295
  "input": 5.0,
170
296
  "output": 15.0,
171
297
  "batch_input": 2.5,
172
- "batch_output": 7.5
298
+ "batch_output": 7.5,
299
+ "priority_input": 8.75,
300
+ "priority_output": 26.25
173
301
  },
174
302
  "openai/gpt-4o-mini": {
175
303
  "input": 0.15,
176
304
  "output": 0.6,
177
305
  "cache_read_input": 0.075,
178
306
  "batch_input": 0.075,
179
- "batch_output": 0.3
307
+ "batch_output": 0.3,
308
+ "priority_input": 0.25,
309
+ "priority_output": 1.0,
310
+ "priority_cache_read_input": 0.125
180
311
  },
181
312
  "openai/gpt-5": {
182
313
  "input": 1.25,
@@ -184,7 +315,13 @@
184
315
  "cache_read_input": 0.125,
185
316
  "batch_input": 0.625,
186
317
  "batch_output": 5.0,
187
- "batch_cache_read_input": 0.0625
318
+ "batch_cache_read_input": 0.0625,
319
+ "flex_input": 0.625,
320
+ "flex_output": 5.0,
321
+ "flex_cache_read_input": 0.0625,
322
+ "priority_input": 2.5,
323
+ "priority_output": 20.0,
324
+ "priority_cache_read_input": 0.25
188
325
  },
189
326
  "openai/gpt-5-chat-latest": {
190
327
  "input": 1.25,
@@ -194,7 +331,10 @@
194
331
  "openai/gpt-5-codex": {
195
332
  "input": 1.25,
196
333
  "output": 10.0,
197
- "cache_read_input": 0.125
334
+ "cache_read_input": 0.125,
335
+ "priority_input": 2.5,
336
+ "priority_output": 20.0,
337
+ "priority_cache_read_input": 0.25
198
338
  },
199
339
  "openai/gpt-5-mini": {
200
340
  "input": 0.25,
@@ -202,7 +342,13 @@
202
342
  "cache_read_input": 0.025,
203
343
  "batch_input": 0.125,
204
344
  "batch_output": 1.0,
205
- "batch_cache_read_input": 0.0125
345
+ "batch_cache_read_input": 0.0125,
346
+ "flex_input": 0.125,
347
+ "flex_output": 1.0,
348
+ "flex_cache_read_input": 0.0125,
349
+ "priority_input": 0.45,
350
+ "priority_output": 3.6,
351
+ "priority_cache_read_input": 0.045
206
352
  },
207
353
  "openai/gpt-5-nano": {
208
354
  "input": 0.05,
@@ -210,7 +356,10 @@
210
356
  "cache_read_input": 0.005,
211
357
  "batch_input": 0.025,
212
358
  "batch_output": 0.2,
213
- "batch_cache_read_input": 0.0025
359
+ "batch_cache_read_input": 0.0025,
360
+ "flex_input": 0.025,
361
+ "flex_output": 0.2,
362
+ "flex_cache_read_input": 0.0025
214
363
  },
215
364
  "openai/gpt-5-pro": {
216
365
  "input": 15.0,
@@ -224,7 +373,13 @@
224
373
  "cache_read_input": 0.125,
225
374
  "batch_input": 0.625,
226
375
  "batch_output": 5.0,
227
- "batch_cache_read_input": 0.0625
376
+ "batch_cache_read_input": 0.0625,
377
+ "flex_input": 0.625,
378
+ "flex_output": 5.0,
379
+ "flex_cache_read_input": 0.0625,
380
+ "priority_input": 2.5,
381
+ "priority_output": 20.0,
382
+ "priority_cache_read_input": 0.25
228
383
  },
229
384
  "openai/gpt-5.1-chat-latest": {
230
385
  "input": 1.25,
@@ -234,12 +389,18 @@
234
389
  "openai/gpt-5.1-codex": {
235
390
  "input": 1.25,
236
391
  "output": 10.0,
237
- "cache_read_input": 0.125
392
+ "cache_read_input": 0.125,
393
+ "priority_input": 2.5,
394
+ "priority_output": 20.0,
395
+ "priority_cache_read_input": 0.25
238
396
  },
239
397
  "openai/gpt-5.1-codex-max": {
240
398
  "input": 1.25,
241
399
  "output": 10.0,
242
- "cache_read_input": 0.125
400
+ "cache_read_input": 0.125,
401
+ "priority_input": 2.5,
402
+ "priority_output": 20.0,
403
+ "priority_cache_read_input": 0.25
243
404
  },
244
405
  "openai/gpt-5.1-codex-mini": {
245
406
  "input": 0.25,
@@ -252,7 +413,13 @@
252
413
  "cache_read_input": 0.175,
253
414
  "batch_input": 0.875,
254
415
  "batch_output": 7.0,
255
- "batch_cache_read_input": 0.0875
416
+ "batch_cache_read_input": 0.0875,
417
+ "flex_input": 0.875,
418
+ "flex_output": 7.0,
419
+ "flex_cache_read_input": 0.0875,
420
+ "priority_input": 3.5,
421
+ "priority_output": 28.0,
422
+ "priority_cache_read_input": 0.35
256
423
  },
257
424
  "openai/gpt-5.2-chat-latest": {
258
425
  "input": 1.75,
@@ -262,7 +429,10 @@
262
429
  "openai/gpt-5.2-codex": {
263
430
  "input": 1.75,
264
431
  "output": 14.0,
265
- "cache_read_input": 0.175
432
+ "cache_read_input": 0.175,
433
+ "priority_input": 3.5,
434
+ "priority_output": 28.0,
435
+ "priority_cache_read_input": 0.35
266
436
  },
267
437
  "openai/gpt-5.2-pro": {
268
438
  "input": 21.0,
@@ -276,7 +446,44 @@
276
446
  "cache_read_input": 0.25,
277
447
  "batch_input": 1.25,
278
448
  "batch_output": 7.5,
279
- "batch_cache_read_input": 0.13
449
+ "batch_cache_read_input": 0.13,
450
+ "_context_price_threshold_tokens": 272000,
451
+ "above_context_input": 5.0,
452
+ "above_context_output": 22.5,
453
+ "above_context_cache_read_input": 0.5,
454
+ "above_context_batch_input": 2.5,
455
+ "above_context_batch_output": 11.25,
456
+ "above_context_batch_cache_read_input": 0.25,
457
+ "flex_input": 1.25,
458
+ "flex_output": 7.5,
459
+ "flex_cache_read_input": 0.13,
460
+ "above_context_flex_input": 2.5,
461
+ "above_context_flex_output": 11.25,
462
+ "above_context_flex_cache_read_input": 0.25,
463
+ "priority_input": 5.0,
464
+ "priority_output": 30.0,
465
+ "priority_cache_read_input": 0.5,
466
+ "data_residency_input": 2.75,
467
+ "data_residency_output": 16.5,
468
+ "data_residency_cache_read_input": 0.275,
469
+ "above_context_data_residency_input": 5.5,
470
+ "above_context_data_residency_output": 24.75,
471
+ "above_context_data_residency_cache_read_input": 0.55,
472
+ "batch_data_residency_input": 1.375,
473
+ "batch_data_residency_output": 8.25,
474
+ "batch_data_residency_cache_read_input": 0.143,
475
+ "above_context_batch_data_residency_input": 2.75,
476
+ "above_context_batch_data_residency_output": 12.375,
477
+ "above_context_batch_data_residency_cache_read_input": 0.275,
478
+ "flex_data_residency_input": 1.375,
479
+ "flex_data_residency_output": 8.25,
480
+ "flex_data_residency_cache_read_input": 0.143,
481
+ "above_context_flex_data_residency_input": 2.75,
482
+ "above_context_flex_data_residency_output": 12.375,
483
+ "above_context_flex_data_residency_cache_read_input": 0.275,
484
+ "priority_data_residency_input": 5.5,
485
+ "priority_data_residency_output": 33.0,
486
+ "priority_data_residency_cache_read_input": 0.55
280
487
  },
281
488
  "openai/gpt-5.4-mini": {
282
489
  "input": 0.75,
@@ -284,7 +491,25 @@
284
491
  "cache_read_input": 0.075,
285
492
  "batch_input": 0.375,
286
493
  "batch_output": 2.25,
287
- "batch_cache_read_input": 0.0375
494
+ "batch_cache_read_input": 0.0375,
495
+ "flex_input": 0.375,
496
+ "flex_output": 2.25,
497
+ "flex_cache_read_input": 0.0375,
498
+ "priority_input": 1.5,
499
+ "priority_output": 9.0,
500
+ "priority_cache_read_input": 0.15,
501
+ "data_residency_input": 0.825,
502
+ "data_residency_output": 4.95,
503
+ "data_residency_cache_read_input": 0.0825,
504
+ "batch_data_residency_input": 0.4125,
505
+ "batch_data_residency_output": 2.475,
506
+ "batch_data_residency_cache_read_input": 0.04125,
507
+ "flex_data_residency_input": 0.4125,
508
+ "flex_data_residency_output": 2.475,
509
+ "flex_data_residency_cache_read_input": 0.04125,
510
+ "priority_data_residency_input": 1.65,
511
+ "priority_data_residency_output": 9.9,
512
+ "priority_data_residency_cache_read_input": 0.165
288
513
  },
289
514
  "openai/gpt-5.4-nano": {
290
515
  "input": 0.2,
@@ -292,13 +517,46 @@
292
517
  "cache_read_input": 0.02,
293
518
  "batch_input": 0.1,
294
519
  "batch_output": 0.625,
295
- "batch_cache_read_input": 0.01
520
+ "batch_cache_read_input": 0.01,
521
+ "flex_input": 0.1,
522
+ "flex_output": 0.625,
523
+ "flex_cache_read_input": 0.01,
524
+ "data_residency_input": 0.22,
525
+ "data_residency_output": 1.375,
526
+ "data_residency_cache_read_input": 0.022,
527
+ "batch_data_residency_input": 0.11,
528
+ "batch_data_residency_output": 0.6875,
529
+ "batch_data_residency_cache_read_input": 0.011,
530
+ "flex_data_residency_input": 0.11,
531
+ "flex_data_residency_output": 0.6875,
532
+ "flex_data_residency_cache_read_input": 0.011
296
533
  },
297
534
  "openai/gpt-5.4-pro": {
298
535
  "input": 30.0,
299
536
  "output": 180.0,
300
537
  "batch_input": 15.0,
301
- "batch_output": 90.0
538
+ "batch_output": 90.0,
539
+ "_context_price_threshold_tokens": 272000,
540
+ "above_context_input": 60.0,
541
+ "above_context_output": 270.0,
542
+ "above_context_batch_input": 30.0,
543
+ "above_context_batch_output": 135.0,
544
+ "flex_input": 15.0,
545
+ "flex_output": 90.0,
546
+ "above_context_flex_input": 30.0,
547
+ "above_context_flex_output": 135.0,
548
+ "data_residency_input": 33.0,
549
+ "data_residency_output": 198.0,
550
+ "above_context_data_residency_input": 66.0,
551
+ "above_context_data_residency_output": 297.0,
552
+ "batch_data_residency_input": 16.5,
553
+ "batch_data_residency_output": 99.0,
554
+ "above_context_batch_data_residency_input": 33.0,
555
+ "above_context_batch_data_residency_output": 148.5,
556
+ "flex_data_residency_input": 16.5,
557
+ "flex_data_residency_output": 99.0,
558
+ "above_context_flex_data_residency_input": 33.0,
559
+ "above_context_flex_data_residency_output": 148.5
302
560
  },
303
561
  "openai/gpt-5.5": {
304
562
  "input": 5.0,
@@ -306,13 +564,63 @@
306
564
  "cache_read_input": 0.5,
307
565
  "batch_input": 2.5,
308
566
  "batch_output": 15.0,
309
- "batch_cache_read_input": 0.25
567
+ "batch_cache_read_input": 0.25,
568
+ "_context_price_threshold_tokens": 272000,
569
+ "above_context_input": 10.0,
570
+ "above_context_output": 45.0,
571
+ "above_context_cache_read_input": 1.0,
572
+ "above_context_batch_input": 5.0,
573
+ "above_context_batch_output": 22.5,
574
+ "above_context_batch_cache_read_input": 0.5,
575
+ "flex_input": 2.5,
576
+ "flex_output": 15.0,
577
+ "flex_cache_read_input": 0.25,
578
+ "above_context_flex_input": 5.0,
579
+ "above_context_flex_output": 22.5,
580
+ "above_context_flex_cache_read_input": 0.5,
581
+ "priority_input": 12.5,
582
+ "priority_output": 75.0,
583
+ "priority_cache_read_input": 1.25,
584
+ "data_residency_input": 5.5,
585
+ "data_residency_output": 33.0,
586
+ "data_residency_cache_read_input": 0.55,
587
+ "above_context_data_residency_input": 11.0,
588
+ "above_context_data_residency_output": 49.5,
589
+ "above_context_data_residency_cache_read_input": 1.1,
590
+ "batch_data_residency_input": 2.75,
591
+ "batch_data_residency_output": 16.5,
592
+ "batch_data_residency_cache_read_input": 0.275,
593
+ "above_context_batch_data_residency_input": 5.5,
594
+ "above_context_batch_data_residency_output": 24.75,
595
+ "above_context_batch_data_residency_cache_read_input": 0.55,
596
+ "flex_data_residency_input": 2.75,
597
+ "flex_data_residency_output": 16.5,
598
+ "flex_data_residency_cache_read_input": 0.275,
599
+ "above_context_flex_data_residency_input": 5.5,
600
+ "above_context_flex_data_residency_output": 24.75,
601
+ "above_context_flex_data_residency_cache_read_input": 0.55,
602
+ "priority_data_residency_input": 13.75,
603
+ "priority_data_residency_output": 82.5,
604
+ "priority_data_residency_cache_read_input": 1.375
310
605
  },
311
606
  "openai/gpt-5.5-pro": {
312
607
  "input": 30.0,
313
608
  "output": 180.0,
314
609
  "batch_input": 15.0,
315
- "batch_output": 90.0
610
+ "batch_output": 90.0,
611
+ "_context_price_threshold_tokens": 272000,
612
+ "above_context_input": 60.0,
613
+ "above_context_output": 270.0,
614
+ "flex_input": 15.0,
615
+ "flex_output": 90.0,
616
+ "data_residency_input": 33.0,
617
+ "data_residency_output": 198.0,
618
+ "above_context_data_residency_input": 66.0,
619
+ "above_context_data_residency_output": 297.0,
620
+ "batch_data_residency_input": 16.5,
621
+ "batch_data_residency_output": 99.0,
622
+ "flex_data_residency_input": 16.5,
623
+ "flex_data_residency_output": 99.0
316
624
  },
317
625
  "openai/o1": {
318
626
  "input": 15.0,
@@ -333,7 +641,13 @@
333
641
  "output": 8.0,
334
642
  "cache_read_input": 0.5,
335
643
  "batch_input": 1.0,
336
- "batch_output": 4.0
644
+ "batch_output": 4.0,
645
+ "flex_input": 1.0,
646
+ "flex_output": 4.0,
647
+ "flex_cache_read_input": 0.25,
648
+ "priority_input": 3.5,
649
+ "priority_output": 14.0,
650
+ "priority_cache_read_input": 0.875
337
651
  },
338
652
  "openai/o3-mini": {
339
653
  "input": 1.1,
@@ -347,11 +661,18 @@
347
661
  "output": 4.4,
348
662
  "cache_read_input": 0.275,
349
663
  "batch_input": 0.55,
350
- "batch_output": 2.2
664
+ "batch_output": 2.2,
665
+ "flex_input": 0.55,
666
+ "flex_output": 2.2,
667
+ "flex_cache_read_input": 0.138,
668
+ "priority_input": 2.0,
669
+ "priority_output": 8.0,
670
+ "priority_cache_read_input": 0.5
351
671
  },
352
672
  "anthropic/claude-haiku-3-5": {
353
673
  "input": 0.8,
354
674
  "cache_write_input": 1.0,
675
+ "cache_write_1h_input": 1.6,
355
676
  "cache_read_input": 0.08,
356
677
  "output": 4.0,
357
678
  "batch_input": 0.4,
@@ -360,6 +681,7 @@
360
681
  "anthropic/claude-haiku-3": {
361
682
  "input": 0.25,
362
683
  "cache_write_input": 0.3,
684
+ "cache_write_1h_input": 0.5,
363
685
  "cache_read_input": 0.03,
364
686
  "output": 1.25,
365
687
  "batch_input": 0.125,
@@ -389,7 +711,10 @@
389
711
  "openai/gpt-5.3-codex": {
390
712
  "input": 1.75,
391
713
  "output": 14.0,
392
- "cache_read_input": 0.175
714
+ "cache_read_input": 0.175,
715
+ "priority_input": 3.5,
716
+ "priority_output": 28.0,
717
+ "priority_cache_read_input": 0.35
393
718
  },
394
719
  "openai/codex-mini-latest": {
395
720
  "input": 1.5,
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LlmCostTracker
4
+ module Pricing
5
+ Component = Data.define(:price_key, :token_key, :cost_key)
6
+
7
+ COMPONENTS = [
8
+ Component.new(
9
+ price_key: :input,
10
+ token_key: :input_tokens,
11
+ cost_key: :input_cost
12
+ ),
13
+ Component.new(
14
+ price_key: :cache_read_input,
15
+ token_key: :cache_read_input_tokens,
16
+ cost_key: :cache_read_input_cost
17
+ ),
18
+ Component.new(
19
+ price_key: :cache_write_input,
20
+ token_key: :cache_write_input_tokens,
21
+ cost_key: :cache_write_input_cost
22
+ ),
23
+ Component.new(
24
+ price_key: :cache_write_1h_input,
25
+ token_key: :cache_write_1h_input_tokens,
26
+ cost_key: :cache_write_1h_input_cost
27
+ ),
28
+ Component.new(
29
+ price_key: :output,
30
+ token_key: :output_tokens,
31
+ cost_key: :output_cost
32
+ )
33
+ ].freeze
34
+
35
+ COST_KEYS = (COMPONENTS.map(&:cost_key) + %i[total_cost]).freeze
36
+ end
37
+ end