llm_cost_tracker 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +35 -0
- data/README.md +34 -14
- data/app/services/llm_cost_tracker/dashboard/data_quality.rb +101 -19
- data/app/views/llm_cost_tracker/data_quality/index.html.erb +65 -0
- data/lib/llm_cost_tracker/budget.rb +85 -21
- data/lib/llm_cost_tracker/configuration.rb +4 -0
- data/lib/llm_cost_tracker/cost.rb +1 -2
- data/lib/llm_cost_tracker/errors.rb +22 -3
- data/lib/llm_cost_tracker/event.rb +4 -0
- data/lib/llm_cost_tracker/event_metadata.rb +21 -15
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_period_totals_generator.rb +29 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_usage_breakdown_generator.rb +29 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_period_totals_to_llm_cost_tracker.rb.erb +66 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_usage_breakdown_to_llm_api_calls.rb.erb +29 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +15 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +3 -1
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/llm_cost_tracker_prices.yml.erb +11 -3
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +1 -0
- data/lib/llm_cost_tracker/middleware/faraday.rb +27 -9
- data/lib/llm_cost_tracker/parsed_usage.rb +16 -7
- data/lib/llm_cost_tracker/parsers/anthropic.rb +7 -6
- data/lib/llm_cost_tracker/parsers/base.rb +2 -1
- data/lib/llm_cost_tracker/parsers/gemini.rb +5 -2
- data/lib/llm_cost_tracker/parsers/openai_usage.rb +18 -5
- data/lib/llm_cost_tracker/period_total.rb +9 -0
- data/lib/llm_cost_tracker/price_registry.rb +14 -4
- data/lib/llm_cost_tracker/price_sync/merger.rb +1 -1
- data/lib/llm_cost_tracker/price_sync/raw_price.rb +3 -5
- data/lib/llm_cost_tracker/price_sync/sources/litellm.rb +2 -3
- data/lib/llm_cost_tracker/price_sync/sources/open_router.rb +2 -3
- data/lib/llm_cost_tracker/prices.json +30 -30
- data/lib/llm_cost_tracker/pricing.rb +44 -32
- data/lib/llm_cost_tracker/railtie.rb +2 -0
- data/lib/llm_cost_tracker/storage/active_record_rollups.rb +122 -0
- data/lib/llm_cost_tracker/storage/active_record_store.rb +38 -13
- data/lib/llm_cost_tracker/stream_collector.rb +5 -3
- data/lib/llm_cost_tracker/tags_column.rb +19 -0
- data/lib/llm_cost_tracker/tracker.rb +58 -32
- data/lib/llm_cost_tracker/unknown_pricing.rb +14 -0
- data/lib/llm_cost_tracker/usage_breakdown.rb +30 -0
- data/lib/llm_cost_tracker/version.rb +1 -1
- data/lib/llm_cost_tracker.rb +12 -3
- metadata +10 -4
- data/llm_cost_tracker.gemspec +0 -50
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ccb9a8365f4a06026a4352385efa1318ac59ce403cb848e0c9aff992fc80f64c
|
|
4
|
+
data.tar.gz: f21503cd322e923dc5bde0139cc61bc1547cef01eac59fe7a3861e1ab33e9860
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 304ab6de6404f070b21b1dd72ce9eae2b44fb2fc7845eae8831a04971ed2b8ec2b6f740bc082fb36cfa42d90f0be59ab5800d43d72b68f04918a113b6d7d8cbd
|
|
7
|
+
data.tar.gz: afa2e92a99062bb1e0b4a00ab1d0762ca688f1890e0d76a29801881e2319e68db217c036d8f8c5d99558b580d5d4c039f8b3334283631763ee093fd12d369329
|
data/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,41 @@ Format: [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). Versioning: [S
|
|
|
4
4
|
|
|
5
5
|
## [Unreleased]
|
|
6
6
|
|
|
7
|
+
## [0.4.0] - 2026-04-24
|
|
8
|
+
|
|
9
|
+
### Changed
|
|
10
|
+
|
|
11
|
+
- BREAKING: Canonical usage and pricing now use `cache_read_input` / `cache_write_input` instead of `cached_input` / `cache_creation_input`.
|
|
12
|
+
- BREAKING: `Pricing.cost_for` now requires `provider:` and prefers provider-specific price entries before model-only entries.
|
|
13
|
+
- BREAKING: Fresh ActiveRecord installs include cache-read, cache-write, and hidden-output token/cost breakdown columns.
|
|
14
|
+
- BREAKING: ActiveRecord budget rollups now use `llm_cost_tracker_period_totals`.
|
|
15
|
+
- BREAKING: `llm_cost_tracker:add_monthly_totals` was replaced by `llm_cost_tracker:add_period_totals`.
|
|
16
|
+
- `llm_cost_tracker:add_usage_breakdown` generator for upgrading existing ActiveRecord installs.
|
|
17
|
+
- `llm_cost_tracker:add_period_totals` generator for upgrading existing ActiveRecord installs.
|
|
18
|
+
- Generic `pricing_mode` support with mode-prefixed local price keys.
|
|
19
|
+
- Data Quality now shows usage bucket totals and hidden-output share.
|
|
20
|
+
- Daily budget and per-call budget guardrails.
|
|
21
|
+
|
|
22
|
+
## [0.3.3] - 2026-04-24
|
|
23
|
+
|
|
24
|
+
### Added
|
|
25
|
+
|
|
26
|
+
- Monthly rollup totals for ActiveRecord budget checks, plus `llm_cost_tracker:add_monthly_totals` for upgrading existing installs.
|
|
27
|
+
|
|
28
|
+
### Changed
|
|
29
|
+
|
|
30
|
+
- ActiveRecord monthly totals now update through a single atomic upsert.
|
|
31
|
+
- Faraday stream capture overflow now records `usage_source: "unknown"` instead of dropping the tracked event.
|
|
32
|
+
- Budget `:notify` callbacks now fire only on the first event that crosses the monthly limit.
|
|
33
|
+
|
|
34
|
+
### Fixed
|
|
35
|
+
|
|
36
|
+
- Treat `config.enabled = false` as a global kill switch for direct `track` and `track_stream` calls too.
|
|
37
|
+
- Deduplicate unknown-pricing warnings per model.
|
|
38
|
+
- Detect streaming requests from parsed JSON instead of raw body substring matching.
|
|
39
|
+
- Cap automatic SSE capture to avoid unbounded memory growth on large streaming responses.
|
|
40
|
+
- Warn that the generated PostgreSQL `tags -> jsonb` upgrade migration rewrites large tables and should run in a maintenance window.
|
|
41
|
+
|
|
7
42
|
## [0.3.2] - 2026-04-22
|
|
8
43
|
|
|
9
44
|
### Added
|
data/README.md
CHANGED
|
@@ -15,7 +15,7 @@ Every Rails app with LLM integrations eventually runs into the same question: wh
|
|
|
15
15
|
|
|
16
16
|
## What You Get
|
|
17
17
|
|
|
18
|
-
- A local ActiveRecord ledger of provider, model,
|
|
18
|
+
- A local ActiveRecord ledger of provider, model, usage breakdown, cost, latency, tags, streaming usage, and provider response IDs
|
|
19
19
|
- Faraday middleware plus explicit `track` / `track_stream` helpers for non-Faraday clients
|
|
20
20
|
- Server-rendered Rails dashboard with overview, calls, tags, CSV export, and data-quality pages
|
|
21
21
|
- Local pricing snapshots, price sync tasks, and budget guardrails
|
|
@@ -159,7 +159,7 @@ LlmCostTracker.track_stream(provider: "anthropic", model: "claude-sonnet-4-6") d
|
|
|
159
159
|
end
|
|
160
160
|
```
|
|
161
161
|
|
|
162
|
-
Run `bin/rails g llm_cost_tracker:add_streaming` once on existing installs to add the `stream` and `usage_source` columns. Run `bin/rails g llm_cost_tracker:add_provider_response_id` to persist provider-issued response IDs.
|
|
162
|
+
Run `bin/rails g llm_cost_tracker:add_streaming` once on existing installs to add the `stream` and `usage_source` columns. Run `bin/rails g llm_cost_tracker:add_provider_response_id` to persist provider-issued response IDs. Run `bin/rails g llm_cost_tracker:add_usage_breakdown` to add cache-read, cache-write, hidden-output, and pricing-mode columns.
|
|
163
163
|
|
|
164
164
|
### Manual tracking
|
|
165
165
|
|
|
@@ -176,6 +176,10 @@ LlmCostTracker.track(
|
|
|
176
176
|
)
|
|
177
177
|
```
|
|
178
178
|
|
|
179
|
+
`input_tokens` is regular non-cache input. Put cache hits in
|
|
180
|
+
`cache_read_input_tokens` and cache writes in `cache_write_input_tokens`; total
|
|
181
|
+
tokens are calculated from the canonical billing breakdown.
|
|
182
|
+
|
|
179
183
|
## Configuration
|
|
180
184
|
|
|
181
185
|
```ruby
|
|
@@ -185,17 +189,19 @@ LlmCostTracker.configure do |config|
|
|
|
185
189
|
config.default_tags = { app: "my_app", environment: Rails.env }
|
|
186
190
|
|
|
187
191
|
config.monthly_budget = 500.00
|
|
192
|
+
config.daily_budget = 50.00
|
|
193
|
+
config.per_call_budget = 2.00
|
|
188
194
|
config.budget_exceeded_behavior = :notify # :notify, :raise, :block_requests
|
|
189
195
|
config.storage_error_behavior = :warn # :ignore, :warn, :raise
|
|
190
196
|
config.unknown_pricing_behavior = :warn # :ignore, :warn, :raise
|
|
191
197
|
|
|
192
198
|
config.on_budget_exceeded = ->(data) {
|
|
193
|
-
SlackNotifier.notify("#alerts", "🚨 LLM budget $#{data[:
|
|
199
|
+
SlackNotifier.notify("#alerts", "🚨 LLM #{data[:budget_type]} budget $#{data[:total].round(2)} / $#{data[:budget]}")
|
|
194
200
|
}
|
|
195
201
|
|
|
196
202
|
config.prices_file = Rails.root.join("config/llm_cost_tracker_prices.yml")
|
|
197
203
|
config.pricing_overrides = {
|
|
198
|
-
"ft:gpt-4o-mini:my-org" => { input: 0.30,
|
|
204
|
+
"ft:gpt-4o-mini:my-org" => { input: 0.30, cache_read_input: 0.15, output: 1.20 }
|
|
199
205
|
}
|
|
200
206
|
|
|
201
207
|
# Built-in: openrouter.ai, api.deepseek.com
|
|
@@ -203,7 +209,9 @@ LlmCostTracker.configure do |config|
|
|
|
203
209
|
end
|
|
204
210
|
```
|
|
205
211
|
|
|
206
|
-
Pricing is best effort. OpenRouter-style IDs like `openai/gpt-4o-mini` are normalized to built-in names when possible. Use `prices_file` / `pricing_overrides` for fine-tunes, gateway-specific IDs, enterprise discounts,
|
|
212
|
+
Pricing is best effort. OpenRouter-style IDs like `openai/gpt-4o-mini` are normalized to built-in names when possible. Use `prices_file` / `pricing_overrides` for fine-tunes, gateway-specific IDs, enterprise discounts, alternate pricing modes, or models the gem does not know.
|
|
213
|
+
Provider-specific entries like `openai/gpt-4o-mini` win over model-only entries like `gpt-4o-mini`.
|
|
214
|
+
Pass `pricing_mode: :batch` to use optional mode-specific keys such as `batch_input` / `batch_output`; missing mode-specific keys fall back to standard `input` / `output` rates. The same pattern works for custom modes, for example `contract_input`.
|
|
207
215
|
|
|
208
216
|
`storage_error_behavior = :warn` (default) lets LLM responses continue if storage fails; `:raise` exposes `StorageError#original_error`.
|
|
209
217
|
|
|
@@ -225,7 +233,7 @@ bin/rails generate llm_cost_tracker:prices
|
|
|
225
233
|
{
|
|
226
234
|
"metadata": { "updated_at": "2026-04-18", "currency": "USD", "unit": "1M tokens" },
|
|
227
235
|
"models": {
|
|
228
|
-
"my-gateway/gpt-4o-mini": { "input": 0.20, "
|
|
236
|
+
"my-gateway/gpt-4o-mini": { "input": 0.20, "cache_read_input": 0.10, "output": 0.80, "batch_input": 0.10, "batch_output": 0.40 }
|
|
229
237
|
}
|
|
230
238
|
}
|
|
231
239
|
```
|
|
@@ -256,16 +264,22 @@ Large price changes are flagged during sync. If a specific entry is expected to
|
|
|
256
264
|
```ruby
|
|
257
265
|
config.storage_backend = :active_record
|
|
258
266
|
config.monthly_budget = 100.00
|
|
267
|
+
config.daily_budget = 10.00
|
|
268
|
+
config.per_call_budget = 1.00
|
|
259
269
|
config.budget_exceeded_behavior = :block_requests
|
|
260
270
|
```
|
|
261
271
|
|
|
262
272
|
- `:notify` — fire `on_budget_exceeded` after an event pushes the month over budget.
|
|
263
273
|
- `:raise` — record the event, then raise `BudgetExceededError`.
|
|
264
|
-
- `:block_requests` — block preflight when the stored monthly total is already over budget; still raises post-response on the event that crosses the line. Needs `:active_record` storage.
|
|
274
|
+
- `:block_requests` — block preflight when the stored monthly or daily total is already over budget; still raises post-response on the event that crosses the line. Needs `:active_record` storage for preflight.
|
|
275
|
+
|
|
276
|
+
`monthly_budget` and `daily_budget` are cumulative ledger limits. `per_call_budget` is a ceiling for a single priced event and runs after the response cost is known.
|
|
277
|
+
|
|
278
|
+
ActiveRecord installs keep `llm_cost_tracker_period_totals` in sync with atomic upserts. Budget preflight reads period rollups instead of scanning `llm_api_calls`.
|
|
265
279
|
|
|
266
280
|
```ruby
|
|
267
281
|
rescue LlmCostTracker::BudgetExceededError => e
|
|
268
|
-
# e.
|
|
282
|
+
# e.budget_type, e.total, e.budget, e.monthly_total, e.daily_total, e.call_cost, e.last_event
|
|
269
283
|
```
|
|
270
284
|
|
|
271
285
|
`:block_requests` is a **guardrail, not a hard cap**. The preflight and the spend-recording write are separate statements, so under Puma / Sidekiq concurrency multiple workers can all pass the preflight and then collectively overshoot the budget. The setting reliably *stops new requests after the overshoot is visible* — it does not prevent the overshoot itself. For strict quotas use a provider- or gateway-level limit, or a database-backed counter outside this gem.
|
|
@@ -343,12 +357,15 @@ On other adapters tags fall back to JSON in a text column. `by_tag` uses JSONB c
|
|
|
343
357
|
Upgrade an existing install:
|
|
344
358
|
|
|
345
359
|
```bash
|
|
360
|
+
bin/rails generate llm_cost_tracker:add_period_totals # shared budget rollups
|
|
346
361
|
bin/rails generate llm_cost_tracker:upgrade_tags_to_jsonb # PG: text → jsonb + GIN
|
|
347
362
|
bin/rails generate llm_cost_tracker:upgrade_cost_precision # widen cost columns
|
|
348
363
|
bin/rails generate llm_cost_tracker:add_latency_ms
|
|
349
364
|
bin/rails db:migrate
|
|
350
365
|
```
|
|
351
366
|
|
|
367
|
+
On PostgreSQL, the generated `upgrade_tags_to_jsonb` migration rewrites `llm_api_calls`. Run it during a maintenance window on large tables, or replace it with a two-phase backfill for zero-downtime deploys.
|
|
368
|
+
|
|
352
369
|
## Mounting the dashboard
|
|
353
370
|
|
|
354
371
|
Optional Rails Engine. Plain ERB, no JavaScript framework, no asset pipeline required. Requires Rails 7.1+; the core middleware works without Rails.
|
|
@@ -400,12 +417,14 @@ ActiveSupport::Notifications.subscribe("llm_request.llm_cost_tracker") do |*, pa
|
|
|
400
417
|
# payload =>
|
|
401
418
|
# {
|
|
402
419
|
# provider: "openai", model: "gpt-4o",
|
|
403
|
-
# input_tokens: 150,
|
|
420
|
+
# input_tokens: 150, cache_read_input_tokens: 0, cache_write_input_tokens: 0,
|
|
421
|
+
# hidden_output_tokens: 0, output_tokens: 42, total_tokens: 192, latency_ms: 248,
|
|
404
422
|
# cost: {
|
|
405
|
-
# input_cost: 0.000375,
|
|
406
|
-
#
|
|
407
|
-
#
|
|
423
|
+
# input_cost: 0.000375, cache_read_input_cost: 0.0,
|
|
424
|
+
# cache_write_input_cost: 0.0, output_cost: 0.00042,
|
|
425
|
+
# total_cost: 0.000795, currency: "USD"
|
|
408
426
|
# },
|
|
427
|
+
# pricing_mode: "batch",
|
|
409
428
|
# tags: { feature: "chat", user_id: 42 },
|
|
410
429
|
# tracked_at: 2026-04-16 14:30:00 UTC
|
|
411
430
|
# }
|
|
@@ -508,11 +527,12 @@ The gem is designed for multi-threaded hosts — Puma with `max_threads > 1` and
|
|
|
508
527
|
- `:block_requests` is a best-effort guardrail, not a hard cap. Concurrent workers can pass preflight simultaneously and collectively overshoot the budget. Use an external quota system if you need a transactional cap.
|
|
509
528
|
- Streaming capture relies on the provider emitting a final-usage event (OpenAI needs `stream_options: { include_usage: true }`); missing events are recorded with `usage_source: "unknown"` so they surface on the Data Quality page.
|
|
510
529
|
- `provider_response_id` is stored only when the provider exposes a stable response object ID. Missing IDs stay `nil` and surface on the Data Quality page.
|
|
511
|
-
-
|
|
512
|
-
- OpenAI reasoning tokens included in output totals; separate reasoning-token attribution not stored.
|
|
530
|
+
- Cache write TTL variants (1h vs 5min writes) not modeled separately.
|
|
513
531
|
|
|
514
532
|
## Development
|
|
515
533
|
|
|
534
|
+
Architecture rules for future changes live in [`docs/architecture.md`](docs/architecture.md).
|
|
535
|
+
|
|
516
536
|
```bash
|
|
517
537
|
bundle install
|
|
518
538
|
bundle exec rspec
|
|
@@ -13,6 +13,16 @@ module LlmCostTracker
|
|
|
13
13
|
:stream_column_present,
|
|
14
14
|
:missing_provider_response_id_count,
|
|
15
15
|
:provider_response_id_column_present,
|
|
16
|
+
:usage_breakdown_column_present,
|
|
17
|
+
:input_tokens,
|
|
18
|
+
:cache_read_input_tokens,
|
|
19
|
+
:cache_write_input_tokens,
|
|
20
|
+
:output_tokens,
|
|
21
|
+
:hidden_output_tokens,
|
|
22
|
+
:input_cost,
|
|
23
|
+
:cache_read_input_cost,
|
|
24
|
+
:cache_write_input_cost,
|
|
25
|
+
:output_cost,
|
|
16
26
|
:unknown_pricing_by_model
|
|
17
27
|
)
|
|
18
28
|
|
|
@@ -20,32 +30,104 @@ module LlmCostTracker
|
|
|
20
30
|
class << self
|
|
21
31
|
def call(scope: LlmCostTracker::LlmApiCall.all)
|
|
22
32
|
total = scope.count
|
|
23
|
-
latency_present = LlmCostTracker::LlmApiCall.latency_column?
|
|
24
|
-
stream_present = LlmCostTracker::LlmApiCall.stream_column?
|
|
25
|
-
provider_response_id_present = LlmCostTracker::LlmApiCall.provider_response_id_column?
|
|
26
33
|
|
|
27
34
|
DataQualityStats.new(
|
|
28
35
|
total_calls: total,
|
|
29
36
|
unknown_pricing_count: scope.unknown_pricing.count,
|
|
30
37
|
untagged_calls_count: total - scope.with_json_tags.count,
|
|
38
|
+
**latency_stats(scope),
|
|
39
|
+
**stream_stats(scope),
|
|
40
|
+
**provider_response_id_stats(scope),
|
|
41
|
+
**usage_stats(scope),
|
|
42
|
+
unknown_pricing_by_model: unknown_pricing_by_model(scope)
|
|
43
|
+
)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
def latency_stats(scope)
|
|
49
|
+
latency_present = LlmCostTracker::LlmApiCall.latency_column?
|
|
50
|
+
|
|
51
|
+
{
|
|
31
52
|
missing_latency_count: latency_present ? scope.where(latency_ms: nil).count : nil,
|
|
32
|
-
latency_column_present: latency_present
|
|
53
|
+
latency_column_present: latency_present
|
|
54
|
+
}
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def stream_stats(scope)
|
|
58
|
+
stream_present = LlmCostTracker::LlmApiCall.stream_column?
|
|
59
|
+
|
|
60
|
+
{
|
|
33
61
|
streaming_count: stream_present ? scope.streaming.count : nil,
|
|
34
|
-
streaming_missing_usage_count:
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
62
|
+
streaming_missing_usage_count: streaming_missing_usage_count(scope, stream_present),
|
|
63
|
+
stream_column_present: stream_present
|
|
64
|
+
}
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def provider_response_id_stats(scope)
|
|
68
|
+
column_present = LlmCostTracker::LlmApiCall.provider_response_id_column?
|
|
69
|
+
|
|
70
|
+
{
|
|
71
|
+
missing_provider_response_id_count: column_present ? scope.missing_provider_response_id.count : nil,
|
|
72
|
+
provider_response_id_column_present: column_present
|
|
73
|
+
}
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def usage_stats(scope)
|
|
77
|
+
usage_breakdown_present = LlmCostTracker::LlmApiCall.usage_breakdown_columns?
|
|
78
|
+
usage_breakdown_cost_present = LlmCostTracker::LlmApiCall.usage_breakdown_cost_columns?
|
|
79
|
+
sums = sum_columns(scope, usage_sum_columns(usage_breakdown_present, usage_breakdown_cost_present))
|
|
80
|
+
|
|
81
|
+
{
|
|
82
|
+
usage_breakdown_column_present: usage_breakdown_present,
|
|
83
|
+
input_tokens: sums[:input_tokens].to_i,
|
|
84
|
+
cache_read_input_tokens: usage_breakdown_present ? sums[:cache_read_input_tokens].to_i : nil,
|
|
85
|
+
cache_write_input_tokens: usage_breakdown_present ? sums[:cache_write_input_tokens].to_i : nil,
|
|
86
|
+
output_tokens: sums[:output_tokens].to_i,
|
|
87
|
+
hidden_output_tokens: usage_breakdown_present ? sums[:hidden_output_tokens].to_i : nil,
|
|
88
|
+
input_cost: decimal_sum(sums[:input_cost]),
|
|
89
|
+
cache_read_input_cost: usage_breakdown_cost_present ? decimal_sum(sums[:cache_read_input_cost]) : nil,
|
|
90
|
+
cache_write_input_cost: usage_breakdown_cost_present ? decimal_sum(sums[:cache_write_input_cost]) : nil,
|
|
91
|
+
output_cost: decimal_sum(sums[:output_cost])
|
|
92
|
+
}
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def usage_sum_columns(usage_breakdown_present, usage_breakdown_cost_present)
|
|
96
|
+
columns = %i[input_tokens output_tokens input_cost output_cost]
|
|
97
|
+
if usage_breakdown_present
|
|
98
|
+
columns += %i[cache_read_input_tokens cache_write_input_tokens hidden_output_tokens]
|
|
99
|
+
end
|
|
100
|
+
columns += %i[cache_read_input_cost cache_write_input_cost] if usage_breakdown_cost_present
|
|
101
|
+
columns
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def streaming_missing_usage_count(scope, stream_present)
|
|
105
|
+
return unless stream_present && LlmCostTracker::LlmApiCall.usage_source_column?
|
|
106
|
+
|
|
107
|
+
scope.streaming_missing_usage.count
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def unknown_pricing_by_model(scope)
|
|
111
|
+
scope.unknown_pricing
|
|
112
|
+
.group(:model)
|
|
113
|
+
.order(Arel.sql("COUNT(*) DESC"))
|
|
114
|
+
.count
|
|
115
|
+
.first(10)
|
|
116
|
+
.to_h
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def sum_columns(scope, columns)
|
|
120
|
+
values = scope.unscope(:order).pick(*columns.map { |column| sum_expression(scope, column) })
|
|
121
|
+
|
|
122
|
+
columns.zip(values).to_h
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def sum_expression(scope, column)
|
|
126
|
+
Arel.sql("COALESCE(SUM(#{scope.connection.quote_column_name(column)}), 0)")
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def decimal_sum(value)
|
|
130
|
+
value.to_f.round(8)
|
|
49
131
|
end
|
|
50
132
|
end
|
|
51
133
|
end
|
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
<% streaming_count = @stats.streaming_count %>
|
|
3
3
|
<% streaming_missing_usage = @stats.streaming_missing_usage_count %>
|
|
4
4
|
<% calls_with_provider_response_id = @stats.provider_response_id_column_present ? total - @stats.missing_provider_response_id_count : nil %>
|
|
5
|
+
<% billable_tokens = @stats.input_tokens + @stats.output_tokens + @stats.cache_read_input_tokens.to_i + @stats.cache_write_input_tokens.to_i %>
|
|
6
|
+
<% hidden_output_share = coverage_percent(@stats.hidden_output_tokens.to_i, @stats.output_tokens) %>
|
|
5
7
|
|
|
6
8
|
<section class="lct-panel lct-toolbar">
|
|
7
9
|
<div class="lct-toolbar-head">
|
|
@@ -118,6 +120,14 @@
|
|
|
118
120
|
<p class="lct-stat-sub"><%= percent(coverage_percent(calls_with_provider_response_id, total)) %> of calls</p>
|
|
119
121
|
</article>
|
|
120
122
|
<% end %>
|
|
123
|
+
|
|
124
|
+
<% if @stats.usage_breakdown_column_present && @stats.output_tokens.positive? %>
|
|
125
|
+
<article class="lct-stat">
|
|
126
|
+
<p class="lct-stat-label">Hidden output share</p>
|
|
127
|
+
<p class="lct-stat-value"><%= percent(hidden_output_share) %></p>
|
|
128
|
+
<p class="lct-stat-sub"><%= number(@stats.hidden_output_tokens) %> of <%= number(@stats.output_tokens) %> output tokens</p>
|
|
129
|
+
</article>
|
|
130
|
+
<% end %>
|
|
121
131
|
</div>
|
|
122
132
|
</div>
|
|
123
133
|
</section>
|
|
@@ -243,6 +253,61 @@
|
|
|
243
253
|
</section>
|
|
244
254
|
</section>
|
|
245
255
|
|
|
256
|
+
<% if @stats.usage_breakdown_column_present %>
|
|
257
|
+
<section class="lct-panel">
|
|
258
|
+
<div class="lct-section-head">
|
|
259
|
+
<div>
|
|
260
|
+
<h2 class="lct-section-title">Usage breakdown</h2>
|
|
261
|
+
</div>
|
|
262
|
+
</div>
|
|
263
|
+
|
|
264
|
+
<div class="lct-table-wrap">
|
|
265
|
+
<table class="lct-table lct-table-compact">
|
|
266
|
+
<thead>
|
|
267
|
+
<tr>
|
|
268
|
+
<th>Bucket</th>
|
|
269
|
+
<th class="lct-num">Tokens</th>
|
|
270
|
+
<th class="lct-num">Share</th>
|
|
271
|
+
<th class="lct-num">Cost</th>
|
|
272
|
+
</tr>
|
|
273
|
+
</thead>
|
|
274
|
+
<tbody>
|
|
275
|
+
<tr>
|
|
276
|
+
<td>Regular input</td>
|
|
277
|
+
<td class="lct-num"><%= number(@stats.input_tokens) %></td>
|
|
278
|
+
<td class="lct-num"><%= percent(coverage_percent(@stats.input_tokens, billable_tokens)) %></td>
|
|
279
|
+
<td class="lct-num"><%= money(@stats.input_cost) %></td>
|
|
280
|
+
</tr>
|
|
281
|
+
<tr>
|
|
282
|
+
<td>Cache read input</td>
|
|
283
|
+
<td class="lct-num"><%= number(@stats.cache_read_input_tokens) %></td>
|
|
284
|
+
<td class="lct-num"><%= percent(coverage_percent(@stats.cache_read_input_tokens, billable_tokens)) %></td>
|
|
285
|
+
<td class="lct-num<%= ' lct-num-muted' if @stats.cache_read_input_cost.nil? %>"><%= optional_money(@stats.cache_read_input_cost) %></td>
|
|
286
|
+
</tr>
|
|
287
|
+
<tr>
|
|
288
|
+
<td>Cache write input</td>
|
|
289
|
+
<td class="lct-num"><%= number(@stats.cache_write_input_tokens) %></td>
|
|
290
|
+
<td class="lct-num"><%= percent(coverage_percent(@stats.cache_write_input_tokens, billable_tokens)) %></td>
|
|
291
|
+
<td class="lct-num<%= ' lct-num-muted' if @stats.cache_write_input_cost.nil? %>"><%= optional_money(@stats.cache_write_input_cost) %></td>
|
|
292
|
+
</tr>
|
|
293
|
+
<tr>
|
|
294
|
+
<td>Output</td>
|
|
295
|
+
<td class="lct-num"><%= number(@stats.output_tokens) %></td>
|
|
296
|
+
<td class="lct-num"><%= percent(coverage_percent(@stats.output_tokens, billable_tokens)) %></td>
|
|
297
|
+
<td class="lct-num"><%= money(@stats.output_cost) %></td>
|
|
298
|
+
</tr>
|
|
299
|
+
<tr>
|
|
300
|
+
<td>Hidden output</td>
|
|
301
|
+
<td class="lct-num"><%= number(@stats.hidden_output_tokens) %></td>
|
|
302
|
+
<td class="lct-num"><%= percent(hidden_output_share) %> of output</td>
|
|
303
|
+
<td class="lct-num lct-num-muted">n/a</td>
|
|
304
|
+
</tr>
|
|
305
|
+
</tbody>
|
|
306
|
+
</table>
|
|
307
|
+
</div>
|
|
308
|
+
</section>
|
|
309
|
+
<% end %>
|
|
310
|
+
|
|
246
311
|
<% unless @stats.unknown_pricing_by_model.empty? %>
|
|
247
312
|
<section class="lct-panel">
|
|
248
313
|
<div class="lct-section-head">
|
|
@@ -7,52 +7,116 @@ module LlmCostTracker
|
|
|
7
7
|
class << self
|
|
8
8
|
def enforce!
|
|
9
9
|
config = LlmCostTracker.configuration
|
|
10
|
-
return unless config.monthly_budget
|
|
11
10
|
return unless config.budget_exceeded_behavior == :block_requests
|
|
12
11
|
return unless config.active_record?
|
|
13
12
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
handle_exceeded(monthly_total: monthly_total)
|
|
13
|
+
enforce_period_budget(:monthly, config.monthly_budget)
|
|
14
|
+
enforce_period_budget(:daily, config.daily_budget)
|
|
18
15
|
end
|
|
19
16
|
|
|
20
17
|
def check!(event)
|
|
21
18
|
config = LlmCostTracker.configuration
|
|
22
|
-
return unless config.monthly_budget
|
|
23
19
|
return unless event.cost
|
|
24
20
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
event.cost.total_cost
|
|
29
|
-
end
|
|
30
|
-
return unless monthly_total >= config.monthly_budget
|
|
31
|
-
|
|
32
|
-
handle_exceeded(monthly_total: monthly_total, last_event: event)
|
|
21
|
+
check_per_call_budget(event, config)
|
|
22
|
+
check_period_budget(event, config, :daily, config.daily_budget)
|
|
23
|
+
check_period_budget(event, config, :monthly, config.monthly_budget)
|
|
33
24
|
end
|
|
34
25
|
|
|
35
26
|
private
|
|
36
27
|
|
|
37
|
-
def
|
|
28
|
+
def enforce_period_budget(period, budget)
|
|
29
|
+
return unless budget
|
|
30
|
+
|
|
31
|
+
total = active_record_total(period, time: Time.now.utc)
|
|
32
|
+
return unless total >= budget
|
|
33
|
+
|
|
34
|
+
handle_exceeded(budget_type: period, total: total, budget: budget)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def check_per_call_budget(event, config)
|
|
38
|
+
budget = config.per_call_budget
|
|
39
|
+
return unless budget
|
|
40
|
+
|
|
41
|
+
call_cost = event.cost.total_cost
|
|
42
|
+
return unless call_cost >= budget
|
|
43
|
+
|
|
44
|
+
handle_exceeded(budget_type: :per_call, total: call_cost, budget: budget, last_event: event)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def check_period_budget(event, config, period, budget)
|
|
48
|
+
return unless budget
|
|
49
|
+
|
|
50
|
+
total = if config.active_record?
|
|
51
|
+
active_record_total(period, time: event.tracked_at)
|
|
52
|
+
else
|
|
53
|
+
event.cost.total_cost
|
|
54
|
+
end
|
|
55
|
+
return unless total >= budget
|
|
56
|
+
|
|
57
|
+
handle_exceeded(budget_type: period, total: total, budget: budget, last_event: event)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def active_record_total(period, time:)
|
|
61
|
+
case period
|
|
62
|
+
when :monthly then active_record_monthly_total(time: time)
|
|
63
|
+
when :daily then active_record_daily_total(time: time)
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def active_record_monthly_total(time: Time.now.utc)
|
|
38
68
|
require_relative "llm_api_call" unless defined?(LlmCostTracker::LlmApiCall)
|
|
39
69
|
require_relative "storage/active_record_store" unless defined?(LlmCostTracker::Storage::ActiveRecordStore)
|
|
40
70
|
|
|
41
|
-
LlmCostTracker::Storage::ActiveRecordStore.monthly_total
|
|
71
|
+
LlmCostTracker::Storage::ActiveRecordStore.monthly_total(time: time)
|
|
42
72
|
rescue LoadError => e
|
|
43
73
|
raise Error, "ActiveRecord storage requires the active_record gem: #{e.message}"
|
|
44
74
|
end
|
|
45
75
|
|
|
46
|
-
def
|
|
76
|
+
def active_record_daily_total(time: Time.now.utc)
|
|
77
|
+
require_relative "llm_api_call" unless defined?(LlmCostTracker::LlmApiCall)
|
|
78
|
+
require_relative "storage/active_record_store" unless defined?(LlmCostTracker::Storage::ActiveRecordStore)
|
|
79
|
+
|
|
80
|
+
LlmCostTracker::Storage::ActiveRecordStore.daily_total(time: time)
|
|
81
|
+
rescue LoadError => e
|
|
82
|
+
raise Error, "ActiveRecord storage requires the active_record gem: #{e.message}"
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def handle_exceeded(budget_type:, total:, budget:, last_event: nil)
|
|
47
86
|
config = LlmCostTracker.configuration
|
|
87
|
+
payload = budget_payload(
|
|
88
|
+
budget_type: budget_type,
|
|
89
|
+
total: total,
|
|
90
|
+
budget: budget,
|
|
91
|
+
last_event: last_event
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
if notify_exceeded?(config, budget_type: budget_type, total: total, budget: budget, last_event: last_event)
|
|
95
|
+
config.on_budget_exceeded&.call(payload)
|
|
96
|
+
end
|
|
97
|
+
raise BudgetExceededError.new(**payload) if raise_on_exceeded?(config)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def budget_payload(budget_type:, total:, budget:, last_event:)
|
|
48
101
|
payload = {
|
|
49
|
-
|
|
50
|
-
|
|
102
|
+
budget_type: budget_type,
|
|
103
|
+
total: total,
|
|
104
|
+
budget: budget,
|
|
51
105
|
last_event: last_event
|
|
52
106
|
}
|
|
107
|
+
payload[:monthly_total] = total if budget_type == :monthly
|
|
108
|
+
payload[:daily_total] = total if budget_type == :daily
|
|
109
|
+
payload[:call_cost] = total if budget_type == :per_call
|
|
110
|
+
payload
|
|
111
|
+
end
|
|
53
112
|
|
|
54
|
-
|
|
55
|
-
|
|
113
|
+
def notify_exceeded?(config, budget_type:, total:, budget:, last_event:)
|
|
114
|
+
return false unless config.on_budget_exceeded
|
|
115
|
+
return true unless config.budget_exceeded_behavior == :notify
|
|
116
|
+
return true unless last_event&.cost
|
|
117
|
+
return true if budget_type == :per_call
|
|
118
|
+
|
|
119
|
+
total - last_event.cost.total_cost < budget
|
|
56
120
|
end
|
|
57
121
|
|
|
58
122
|
def raise_on_exceeded?(config)
|
|
@@ -19,6 +19,8 @@ module LlmCostTracker
|
|
|
19
19
|
custom_storage
|
|
20
20
|
on_budget_exceeded
|
|
21
21
|
monthly_budget
|
|
22
|
+
daily_budget
|
|
23
|
+
per_call_budget
|
|
22
24
|
log_level
|
|
23
25
|
prices_file
|
|
24
26
|
].freeze
|
|
@@ -48,6 +50,8 @@ module LlmCostTracker
|
|
|
48
50
|
@default_tags = {}
|
|
49
51
|
@on_budget_exceeded = nil
|
|
50
52
|
@monthly_budget = nil
|
|
53
|
+
@daily_budget = nil
|
|
54
|
+
@per_call_budget = nil
|
|
51
55
|
self.budget_exceeded_behavior = :notify
|
|
52
56
|
self.storage_error_behavior = :warn
|
|
53
57
|
self.unknown_pricing_behavior = :warn
|
|
@@ -6,14 +6,33 @@ module LlmCostTracker
|
|
|
6
6
|
class InvalidFilterError < Error; end
|
|
7
7
|
|
|
8
8
|
class BudgetExceededError < Error
|
|
9
|
-
attr_reader :monthly_total, :budget, :last_event
|
|
9
|
+
attr_reader :monthly_total, :daily_total, :call_cost, :total, :budget, :budget_type, :last_event
|
|
10
10
|
|
|
11
|
-
def initialize(
|
|
11
|
+
def initialize(budget:, last_event: nil, budget_type: nil, total: nil, monthly_total: nil, daily_total: nil,
|
|
12
|
+
call_cost: nil)
|
|
12
13
|
@monthly_total = monthly_total
|
|
14
|
+
@daily_total = daily_total
|
|
15
|
+
@call_cost = call_cost
|
|
16
|
+
@total = total || monthly_total || daily_total || call_cost
|
|
13
17
|
@budget = budget
|
|
18
|
+
@budget_type = budget_type || inferred_budget_type
|
|
14
19
|
@last_event = last_event
|
|
15
20
|
|
|
16
|
-
super("LLM
|
|
21
|
+
super("LLM #{budget_label} budget exceeded: $#{format('%.6f', @total)} / $#{format('%.6f', budget)}")
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
private
|
|
25
|
+
|
|
26
|
+
def inferred_budget_type
|
|
27
|
+
return :monthly if monthly_total
|
|
28
|
+
return :daily if daily_total
|
|
29
|
+
return :per_call if call_cost
|
|
30
|
+
|
|
31
|
+
:unknown
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def budget_label
|
|
35
|
+
budget_type.to_s.tr("_", "-")
|
|
17
36
|
end
|
|
18
37
|
end
|
|
19
38
|
|