llm_cost_tracker 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +36 -0
- data/README.md +124 -68
- data/Rakefile +2 -0
- data/app/assets/llm_cost_tracker/application.css +1 -4
- data/app/controllers/llm_cost_tracker/assets_controller.rb +1 -2
- data/app/controllers/llm_cost_tracker/calls_controller.rb +9 -13
- data/app/controllers/llm_cost_tracker/dashboard_controller.rb +8 -19
- data/app/controllers/llm_cost_tracker/data_quality_controller.rb +1 -2
- data/app/controllers/llm_cost_tracker/models_controller.rb +5 -2
- data/app/controllers/llm_cost_tracker/tags_controller.rb +2 -4
- data/app/helpers/llm_cost_tracker/dashboard_filter_helper.rb +6 -1
- data/app/helpers/llm_cost_tracker/dashboard_filter_options_helper.rb +1 -7
- data/app/helpers/llm_cost_tracker/dashboard_query_helper.rb +5 -9
- data/app/services/llm_cost_tracker/dashboard/data_quality.rb +16 -1
- data/app/services/llm_cost_tracker/dashboard/filter.rb +26 -24
- data/app/services/llm_cost_tracker/dashboard/provider_breakdown.rb +0 -3
- data/app/services/llm_cost_tracker/dashboard/tag_breakdown.rb +0 -2
- data/app/services/llm_cost_tracker/pagination.rb +1 -9
- data/app/views/layouts/llm_cost_tracker/application.html.erb +1 -16
- data/app/views/llm_cost_tracker/calls/index.html.erb +23 -13
- data/app/views/llm_cost_tracker/calls/show.html.erb +8 -3
- data/app/views/llm_cost_tracker/dashboard/index.html.erb +11 -1
- data/app/views/llm_cost_tracker/data_quality/index.html.erb +78 -10
- data/app/views/llm_cost_tracker/models/index.html.erb +10 -9
- data/app/views/llm_cost_tracker/shared/_spend_chart.html.erb +0 -1
- data/app/views/llm_cost_tracker/shared/_tag_chips.html.erb +0 -1
- data/app/views/llm_cost_tracker/tags/index.html.erb +1 -1
- data/app/views/llm_cost_tracker/tags/show.html.erb +1 -1
- data/lib/llm_cost_tracker/assets.rb +6 -11
- data/lib/llm_cost_tracker/configuration.rb +78 -43
- data/lib/llm_cost_tracker/event.rb +3 -0
- data/lib/llm_cost_tracker/event_metadata.rb +1 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_provider_response_id_generator.rb +29 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_streaming_generator.rb +29 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_provider_response_id_to_llm_api_calls.rb.erb +15 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_streaming_to_llm_api_calls.rb.erb +25 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +6 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/llm_cost_tracker_prices.yml.erb +8 -1
- data/lib/llm_cost_tracker/llm_api_call.rb +14 -2
- data/lib/llm_cost_tracker/middleware/faraday.rb +58 -9
- data/lib/llm_cost_tracker/parameter_hash.rb +33 -0
- data/lib/llm_cost_tracker/parsed_usage.rb +18 -3
- data/lib/llm_cost_tracker/parsers/anthropic.rb +98 -1
- data/lib/llm_cost_tracker/parsers/base.rb +17 -5
- data/lib/llm_cost_tracker/parsers/gemini.rb +83 -6
- data/lib/llm_cost_tracker/parsers/openai.rb +8 -0
- data/lib/llm_cost_tracker/parsers/openai_compatible.rb +12 -5
- data/lib/llm_cost_tracker/parsers/openai_usage.rb +69 -1
- data/lib/llm_cost_tracker/parsers/registry.rb +15 -3
- data/lib/llm_cost_tracker/parsers/sse.rb +81 -0
- data/lib/llm_cost_tracker/price_registry.rb +23 -8
- data/lib/llm_cost_tracker/price_sync/fetcher.rb +72 -0
- data/lib/llm_cost_tracker/price_sync/merger.rb +72 -0
- data/lib/llm_cost_tracker/price_sync/model_catalog.rb +77 -0
- data/lib/llm_cost_tracker/price_sync/raw_price.rb +35 -0
- data/lib/llm_cost_tracker/price_sync/refresh_plan_builder.rb +162 -0
- data/lib/llm_cost_tracker/price_sync/registry_loader.rb +55 -0
- data/lib/llm_cost_tracker/price_sync/registry_writer.rb +25 -0
- data/lib/llm_cost_tracker/price_sync/source.rb +29 -0
- data/lib/llm_cost_tracker/price_sync/source_result.rb +7 -0
- data/lib/llm_cost_tracker/price_sync/sources/litellm.rb +91 -0
- data/lib/llm_cost_tracker/price_sync/sources/open_router.rb +94 -0
- data/lib/llm_cost_tracker/price_sync/validator.rb +66 -0
- data/lib/llm_cost_tracker/price_sync.rb +142 -0
- data/lib/llm_cost_tracker/pricing.rb +0 -11
- data/lib/llm_cost_tracker/railtie.rb +0 -1
- data/lib/llm_cost_tracker/report.rb +0 -5
- data/lib/llm_cost_tracker/storage/active_record_store.rb +10 -9
- data/lib/llm_cost_tracker/stream_collector.rb +162 -0
- data/lib/llm_cost_tracker/tags_column.rb +12 -0
- data/lib/llm_cost_tracker/tracker.rb +23 -12
- data/lib/llm_cost_tracker/value_helpers.rb +40 -0
- data/lib/llm_cost_tracker/version.rb +1 -1
- data/lib/llm_cost_tracker.rb +48 -35
- data/lib/tasks/llm_cost_tracker.rake +116 -0
- data/llm_cost_tracker.gemspec +8 -6
- metadata +30 -8
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 93ef8bc5c6bc0e850398b7555499a4667d1cc3d8ba2328c1fb926204a794a5a7
|
|
4
|
+
data.tar.gz: e7208b7bf518332040837498b5de7d1e5e6c761a276d6fb732d14133d38d8c74
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 5d19b85e0a4398332a0161f75bc561b79c6ebf12546fe21013b12f2b7f5ff931179fcb8d5610faccd4d84063bf10f4297bd1688df04c24e41ffb63d4ff38b851
|
|
7
|
+
data.tar.gz: e7b4f3a2164cc9f6e9545e123fe4aeabac356ab66becfc94466f8f25d54329ed5af4056339cfda1c71e20bcba1c4de30a9922ff3b7b5664528bde515834e19f1
|
data/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,42 @@ Format: [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). Versioning: [S
|
|
|
4
4
|
|
|
5
5
|
## [Unreleased]
|
|
6
6
|
|
|
7
|
+
## [0.3.1] - 2026-04-22
|
|
8
|
+
|
|
9
|
+
### Added
|
|
10
|
+
|
|
11
|
+
- `provider_response_id` persistence, parser extraction, and Data Quality coverage for provider-issued response object IDs.
|
|
12
|
+
|
|
13
|
+
### Changed
|
|
14
|
+
|
|
15
|
+
- Simplified dashboard helpers, filter normalization, and view templates without changing dashboard behavior.
|
|
16
|
+
- Split `PriceSync` internals into smaller components and removed redundant internal wrapper layers.
|
|
17
|
+
|
|
18
|
+
### Fixed
|
|
19
|
+
|
|
20
|
+
- Removed inline dashboard JavaScript to keep the engine server-rendered.
|
|
21
|
+
- Reset ActiveRecord model column information in storage specs to avoid stale schema state across recreated tables.
|
|
22
|
+
|
|
23
|
+
## [0.3.0] - 2026-04-22
|
|
24
|
+
|
|
25
|
+
### Added
|
|
26
|
+
|
|
27
|
+
- Streaming capture across OpenAI, Anthropic, and Gemini, including `LlmCostTracker.track_stream` for non-Faraday clients.
|
|
28
|
+
- `stream` / `usage_source` persistence and dashboard coverage for streamed calls.
|
|
29
|
+
- `llm_cost_tracker:prices:sync` and `llm_cost_tracker:prices:check` for keeping local price snapshots current.
|
|
30
|
+
- `LlmCostTracker.enforce_budget!` and opt-in `enforce_budget:` keyword for `track` / `track_stream`.
|
|
31
|
+
|
|
32
|
+
### Changed
|
|
33
|
+
|
|
34
|
+
- Price refresh now uses structured JSON sources (LiteLLM primary, OpenRouter secondary) instead of scraping provider HTML pages.
|
|
35
|
+
- Synced price entries now carry source provenance (`_source`, `_source_version`, `_fetched_at`), while `_source: "manual"` entries remain untouched.
|
|
36
|
+
- Manual stream parsing now resolves parsers through the shared registry, so configured OpenAI-compatible providers work the same way as built-in ones.
|
|
37
|
+
- `LlmCostTracker.configure` now treats configuration as an immutable snapshot after the block returns; mutating or replacing shared fields through `LlmCostTracker.configuration` raises `FrozenError`.
|
|
38
|
+
|
|
39
|
+
### Removed
|
|
40
|
+
|
|
41
|
+
- Public `LlmCostTracker.configuration=` writer; use `LlmCostTracker.configure` to replace configuration snapshots.
|
|
42
|
+
|
|
7
43
|
## [0.2.0] - 2026-04-20
|
|
8
44
|
|
|
9
45
|
### Added
|
data/README.md
CHANGED
|
@@ -1,35 +1,17 @@
|
|
|
1
|
-
#
|
|
1
|
+
# LLM Cost Tracker
|
|
2
2
|
|
|
3
|
-
**Self-hosted LLM cost tracking for Ruby and Rails.** Intercepts Faraday LLM responses, prices
|
|
3
|
+
**Self-hosted LLM cost tracking for Ruby and Rails.** Intercepts Faraday LLM responses or records usage explicitly, prices events locally, and stores them in your database. No proxy, no SaaS.
|
|
4
4
|
|
|
5
5
|
[](https://rubygems.org/gems/llm_cost_tracker)
|
|
6
6
|
[](https://github.com/sergey-homenko/llm_cost_tracker/actions)
|
|
7
7
|
|
|
8
|
-
```text
|
|
9
|
-
LLM Cost Report (last 30 days)
|
|
10
|
-
|
|
11
|
-
Total cost: $127.420000
|
|
12
|
-
Requests: 4,218
|
|
13
|
-
Avg latency: 812ms
|
|
14
|
-
Unknown pricing: 0
|
|
15
|
-
|
|
16
|
-
By model:
|
|
17
|
-
gpt-4o $82.100000
|
|
18
|
-
claude-sonnet-4-6 $31.200000
|
|
19
|
-
gemini-2.5-flash $14.120000
|
|
20
|
-
|
|
21
|
-
By tag key "env":
|
|
22
|
-
production $119.300000
|
|
23
|
-
staging $8.120000
|
|
24
|
-
```
|
|
25
|
-
|
|
26
8
|
## Why
|
|
27
9
|
|
|
28
|
-
Every Rails app with LLM integrations eventually runs into the same question: where did that invoice come from? Full observability platforms like Langfuse and Helicone
|
|
10
|
+
Every Rails app with LLM integrations eventually runs into the same question: where did that invoice come from? Full observability platforms like Langfuse and Helicone solve a broader set of problems; sometimes you just need a small Rails-native ledger in your own database.
|
|
29
11
|
|
|
30
|
-
`llm_cost_tracker` is
|
|
12
|
+
`llm_cost_tracker` is built for that. It plugs into Faraday or lets you record usage explicitly with `track` / `track_stream`, looks up pricing locally, and writes an event. You end up with a ledger you can query with plain ActiveRecord, slice by any tag dimension, and optionally surface on a built-in dashboard. No proxy, no SaaS, no separate service to run.
|
|
31
13
|
|
|
32
|
-
It
|
|
14
|
+
It is not a tracing platform, prompt CMS, eval system, or gateway. The goal is to answer _"what did this app spend on LLM APIs, and where did that spend come from?"_ clearly enough to make spend review routine.
|
|
33
15
|
|
|
34
16
|
## Installation
|
|
35
17
|
|
|
@@ -44,23 +26,6 @@ bin/rails generate llm_cost_tracker:install
|
|
|
44
26
|
bin/rails db:migrate
|
|
45
27
|
```
|
|
46
28
|
|
|
47
|
-
## Quick try (no database)
|
|
48
|
-
|
|
49
|
-
```ruby
|
|
50
|
-
require "llm_cost_tracker"
|
|
51
|
-
|
|
52
|
-
LlmCostTracker.configure { |c| c.storage_backend = :log }
|
|
53
|
-
|
|
54
|
-
LlmCostTracker.track(
|
|
55
|
-
provider: :openai,
|
|
56
|
-
model: "gpt-4o",
|
|
57
|
-
input_tokens: 1000,
|
|
58
|
-
output_tokens: 200,
|
|
59
|
-
feature: "demo"
|
|
60
|
-
)
|
|
61
|
-
# => [LlmCostTracker] openai/gpt-4o tokens=1000+200 cost=$0.004500 tags={:feature=>"demo"}
|
|
62
|
-
```
|
|
63
|
-
|
|
64
29
|
## Usage
|
|
65
30
|
|
|
66
31
|
### Patch an existing client's Faraday connection
|
|
@@ -78,19 +43,7 @@ OpenAI.configure do |config|
|
|
|
78
43
|
end
|
|
79
44
|
```
|
|
80
45
|
|
|
81
|
-
`tags:` can be a callable
|
|
82
|
-
|
|
83
|
-
```ruby
|
|
84
|
-
class Current < ActiveSupport::CurrentAttributes
|
|
85
|
-
attribute :user, :tenant, :workflow
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
# application_controller.rb
|
|
89
|
-
before_action do
|
|
90
|
-
Current.user = current_user
|
|
91
|
-
Current.workflow = "chat"
|
|
92
|
-
end
|
|
93
|
-
```
|
|
46
|
+
`tags:` can be a callable and is evaluated on each request.
|
|
94
47
|
|
|
95
48
|
### Raw Faraday
|
|
96
49
|
|
|
@@ -105,7 +58,52 @@ end
|
|
|
105
58
|
conn.post("/v1/responses", { model: "gpt-5-mini", input: "Hello!" })
|
|
106
59
|
```
|
|
107
60
|
|
|
108
|
-
Place `llm_cost_tracker` inside the Faraday stack where it can see the final response body.
|
|
61
|
+
Place `llm_cost_tracker` inside the Faraday stack where it can see the final response body.
|
|
62
|
+
|
|
63
|
+
### Streaming
|
|
64
|
+
|
|
65
|
+
Streaming is captured automatically for OpenAI, Anthropic, and Gemini when the request goes through the Faraday middleware. The middleware tees the `on_data` callback, keeps the stream flowing to your code, and records the final usage block once the response completes.
|
|
66
|
+
|
|
67
|
+
```ruby
|
|
68
|
+
# OpenAI: include usage in the final chunk
|
|
69
|
+
client.chat(parameters: {
|
|
70
|
+
model: "gpt-4o",
|
|
71
|
+
messages: [...],
|
|
72
|
+
stream: proc { |chunk| ... },
|
|
73
|
+
stream_options: { include_usage: true }
|
|
74
|
+
})
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Anthropic emits usage in `message_start` + `message_delta` events. Gemini's `:streamGenerateContent` endpoint includes `usageMetadata`; usage from the final chunk is used.
|
|
78
|
+
|
|
79
|
+
Streamed calls are stored with `stream: true` and `usage_source: "stream_final"`. If the provider never sends final usage, the call is still recorded with `usage_source: "unknown"` so those calls surface on the Data Quality page.
|
|
80
|
+
|
|
81
|
+
When the provider emits a stable response object ID, LLM Cost Tracker stores it as `provider_response_id`. OpenAI and Anthropic are covered end-to-end; Gemini is best effort and may vary by endpoint or API version.
|
|
82
|
+
|
|
83
|
+
For non-Faraday clients (raw `Net::HTTP`, custom SSE code, Azure OpenAI), use the explicit helper:
|
|
84
|
+
|
|
85
|
+
```ruby
|
|
86
|
+
LlmCostTracker.track_stream(provider: "openai", model: "gpt-4o") do |stream|
|
|
87
|
+
my_client.stream(...) { |chunk| stream.event(chunk) }
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Or skip the chunk parsing entirely if you already know the totals:
|
|
91
|
+
LlmCostTracker.track_stream(provider: "openai", model: "gpt-4o") do |stream|
|
|
92
|
+
# ... your streaming loop ...
|
|
93
|
+
stream.usage(input_tokens: 120, output_tokens: 45)
|
|
94
|
+
end
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
If your custom streaming client exposes the provider's response object ID after the stream starts, set it explicitly:
|
|
98
|
+
|
|
99
|
+
```ruby
|
|
100
|
+
LlmCostTracker.track_stream(provider: "anthropic", model: "claude-sonnet-4-6") do |stream|
|
|
101
|
+
stream.provider_response_id = response.id
|
|
102
|
+
stream.usage(input_tokens: 120, output_tokens: 45)
|
|
103
|
+
end
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
Run `bin/rails g llm_cost_tracker:add_streaming` once on existing installs to add the `stream` and `usage_source` columns. Run `bin/rails g llm_cost_tracker:add_provider_response_id` to persist provider-issued response IDs.
|
|
109
107
|
|
|
110
108
|
### Manual tracking
|
|
111
109
|
|
|
@@ -115,6 +113,7 @@ LlmCostTracker.track(
|
|
|
115
113
|
model: "claude-sonnet-4-6",
|
|
116
114
|
input_tokens: 1500,
|
|
117
115
|
output_tokens: 320,
|
|
116
|
+
provider_response_id: "msg_01XFDUDYJgAACzvnptvVoYEL",
|
|
118
117
|
cache_read_input_tokens: 1200,
|
|
119
118
|
feature: "summarizer",
|
|
120
119
|
user_id: current_user.id
|
|
@@ -148,7 +147,7 @@ LlmCostTracker.configure do |config|
|
|
|
148
147
|
end
|
|
149
148
|
```
|
|
150
149
|
|
|
151
|
-
Pricing is best
|
|
150
|
+
Pricing is best effort. OpenRouter-style IDs like `openai/gpt-4o-mini` are normalized to built-in names when possible. Use `prices_file` / `pricing_overrides` for fine-tunes, gateway-specific IDs, enterprise discounts, batch pricing, or models the gem does not know.
|
|
152
151
|
|
|
153
152
|
`storage_error_behavior = :warn` (default) lets LLM responses continue if storage fails; `:raise` exposes `StorageError#original_error`.
|
|
154
153
|
|
|
@@ -160,7 +159,7 @@ LlmCostTracker::LlmApiCall.unknown_pricing.group(:model).count
|
|
|
160
159
|
|
|
161
160
|
### Keeping prices current
|
|
162
161
|
|
|
163
|
-
Built-in prices
|
|
162
|
+
Built-in prices live in `lib/llm_cost_tracker/prices.json`. The gem never fetches pricing on boot. For production, keep a local snapshot under `config/` and point the gem at it:
|
|
164
163
|
|
|
165
164
|
```bash
|
|
166
165
|
bin/rails generate llm_cost_tracker:prices
|
|
@@ -175,7 +174,26 @@ bin/rails generate llm_cost_tracker:prices
|
|
|
175
174
|
}
|
|
176
175
|
```
|
|
177
176
|
|
|
178
|
-
`pricing_overrides` has the highest precedence
|
|
177
|
+
`pricing_overrides` has the highest precedence. Use it for a handful of Ruby-side overrides; use `prices_file` when you want a local pricing table under source control.
|
|
178
|
+
|
|
179
|
+
To refresh prices on demand:
|
|
180
|
+
|
|
181
|
+
```bash
|
|
182
|
+
bin/rails llm_cost_tracker:prices:sync
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
`llm_cost_tracker:prices:sync` refreshes the current registry from two structured sources: LiteLLM first, OpenRouter second. LiteLLM is the primary source; OpenRouter fills gaps and helps surface discrepancies.
|
|
186
|
+
|
|
187
|
+
`llm_cost_tracker:prices:sync` / `llm_cost_tracker:prices:check` perform HTTP GET requests to:
|
|
188
|
+
|
|
189
|
+
- LiteLLM pricing JSON: `https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json`
|
|
190
|
+
- OpenRouter Models API: `https://openrouter.ai/api/v1/models`
|
|
191
|
+
|
|
192
|
+
If `config.prices_file` is configured, the task syncs that file automatically; otherwise it works from the built-in snapshot. `_source: "manual"` entries are never touched. Models that are still in your file but missing from both upstream sources are left alone and reported as orphaned. For intentional custom entries, mark them as manual so they stop showing up in orphaned warnings.
|
|
193
|
+
|
|
194
|
+
Use `PREVIEW=1` to see the diff without writing. Use `STRICT=1` to fail instead of applying a partial refresh when a source fails or the validator rejects a price. Use `bin/rails llm_cost_tracker:prices:check` in CI to print the current diff and exit non-zero when the snapshot has drifted or refresh fails.
|
|
195
|
+
|
|
196
|
+
Large price changes are flagged during sync. If a specific entry is expected to move by more than 3x, add `_validator_override: ["skip_relative_change"]` to that entry in your local price file.
|
|
179
197
|
|
|
180
198
|
## Budget enforcement
|
|
181
199
|
|
|
@@ -194,14 +212,31 @@ rescue LlmCostTracker::BudgetExceededError => e
|
|
|
194
212
|
# e.monthly_total, e.budget, e.last_event
|
|
195
213
|
```
|
|
196
214
|
|
|
197
|
-
`:block_requests` is
|
|
215
|
+
`:block_requests` is a **guardrail, not a hard cap**. The preflight and the spend-recording write are separate statements, so under Puma / Sidekiq concurrency multiple workers can all pass the preflight and then collectively overshoot the budget. The setting reliably *stops new requests after the overshoot is visible* — it does not prevent the overshoot itself. For strict quotas use a provider- or gateway-level limit, or a database-backed counter outside this gem.
|
|
216
|
+
|
|
217
|
+
Preflight is wired into the Faraday middleware automatically. When you record events via `LlmCostTracker.track` / `track_stream` and also want the same preflight, opt in:
|
|
218
|
+
|
|
219
|
+
```ruby
|
|
220
|
+
LlmCostTracker.track(
|
|
221
|
+
provider: "openai",
|
|
222
|
+
model: "gpt-4o",
|
|
223
|
+
input_tokens: 120,
|
|
224
|
+
output_tokens: 45,
|
|
225
|
+
enforce_budget: true
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
LlmCostTracker.track_stream(provider: "openai", model: "gpt-4o", enforce_budget: true) do |stream|
|
|
229
|
+
# raises BudgetExceededError before the block runs when over budget
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
LlmCostTracker.enforce_budget! # standalone preflight
|
|
233
|
+
```
|
|
198
234
|
|
|
199
235
|
## Querying costs
|
|
200
236
|
|
|
201
237
|
```bash
|
|
202
238
|
bin/rails llm_cost_tracker:report
|
|
203
239
|
DAYS=7 bin/rails llm_cost_tracker:report
|
|
204
|
-
DAYS=90 bin/rails llm_cost_tracker:prune # delete calls older than N days in batches
|
|
205
240
|
```
|
|
206
241
|
|
|
207
242
|
```ruby
|
|
@@ -230,7 +265,15 @@ LlmCostTracker::LlmApiCall.by_tags(user_id: 42, feature: "chat").this_month.tota
|
|
|
230
265
|
LlmCostTracker::LlmApiCall.between(1.week.ago, Time.current).cost_by_model
|
|
231
266
|
```
|
|
232
267
|
|
|
233
|
-
|
|
268
|
+
## Retention
|
|
269
|
+
|
|
270
|
+
Retention is not enforced automatically. Use the rake task below if you need to delete older records in batches.
|
|
271
|
+
|
|
272
|
+
```bash
|
|
273
|
+
DAYS=90 bin/rails llm_cost_tracker:prune # delete calls older than N days in batches
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
## Tag storage
|
|
234
277
|
|
|
235
278
|
New installs use `jsonb` + GIN on PostgreSQL:
|
|
236
279
|
|
|
@@ -252,7 +295,7 @@ bin/rails db:migrate
|
|
|
252
295
|
|
|
253
296
|
## Dashboard (optional)
|
|
254
297
|
|
|
255
|
-
|
|
298
|
+
Optional Rails Engine. Plain ERB, no JavaScript framework, no asset pipeline required. Requires Rails 7.1+; the core middleware works without Rails.
|
|
256
299
|
|
|
257
300
|
```ruby
|
|
258
301
|
# config/application.rb (or an initializer)
|
|
@@ -272,7 +315,7 @@ Routes (GET-only; CSV export included):
|
|
|
272
315
|
- `/llm-costs/tags/:key` — breakdown by values of a given tag key
|
|
273
316
|
- `/llm-costs/data_quality` — unknown pricing share, untagged calls, missing latency
|
|
274
317
|
|
|
275
|
-
> ⚠️ **No built-in auth.** Tags carry whatever your app puts in them. Protect the mount point with your
|
|
318
|
+
> ⚠️ **No built-in auth.** Tags carry whatever your app puts in them. Protect the mount point with your application's authentication.
|
|
276
319
|
|
|
277
320
|
### Basic auth
|
|
278
321
|
|
|
@@ -331,7 +374,7 @@ config.custom_storage = ->(event) {
|
|
|
331
374
|
config.openai_compatible_providers["gateway.example.com"] = "internal_gateway"
|
|
332
375
|
```
|
|
333
376
|
|
|
334
|
-
Configured hosts are parsed
|
|
377
|
+
Configured hosts are parsed using the OpenAI-compatible usage shape (`prompt_tokens` / `completion_tokens` / `total_tokens`, `input_tokens` / `output_tokens`, and optional cached-input details). This covers OpenRouter, DeepSeek, and private gateways exposing Chat Completions / Responses / Completions / Embeddings.
|
|
335
378
|
|
|
336
379
|
## Custom parser
|
|
337
380
|
|
|
@@ -373,20 +416,33 @@ LlmCostTracker::Parsers::Registry.register(AcmeParser.new)
|
|
|
373
416
|
| Google Gemini | ✅ | Gemini 2.5 Pro/Flash/Flash-Lite, 2.0 Flash/Flash-Lite, 1.5 Pro/Flash |
|
|
374
417
|
| Any other | 🔧 | Custom parser |
|
|
375
418
|
|
|
376
|
-
Endpoints: OpenAI Chat Completions / Responses / Completions / Embeddings; OpenAI-compatible equivalents; Anthropic Messages; Gemini `generateContent`
|
|
419
|
+
Endpoints: OpenAI Chat Completions / Responses / Completions / Embeddings; OpenAI-compatible equivalents; Anthropic Messages; Gemini `generateContent` and `streamGenerateContent`. All endpoints support streaming capture.
|
|
377
420
|
|
|
378
421
|
## Safety
|
|
379
422
|
|
|
380
|
-
- No external HTTP calls.
|
|
423
|
+
- No external HTTP calls at request-tracking time.
|
|
381
424
|
- No prompt or response bodies stored.
|
|
382
425
|
- Faraday responses not modified.
|
|
383
426
|
- Storage failures non-fatal by default (`storage_error_behavior = :warn`).
|
|
384
|
-
- Budget
|
|
427
|
+
- Budget and unknown-pricing errors are raised only when you opt in.
|
|
428
|
+
|
|
429
|
+
## Thread safety (Puma, Sidekiq)
|
|
430
|
+
|
|
431
|
+
The gem is designed for multi-threaded hosts — Puma with `max_threads > 1` and Sidekiq with `concurrency > 1` are both supported. A few rules:
|
|
432
|
+
|
|
433
|
+
- **Configure once at boot.** `LlmCostTracker.configure` deep-freezes `default_tags`, `pricing_overrides`, `report_tag_breakdowns`, and `openai_compatible_providers` when the block returns. Mutating or replacing shared fields through `LlmCostTracker.configuration` raises `FrozenError`.
|
|
434
|
+
- **Use `:active_record` storage for shared ledgers.** Puma workers and Sidekiq processes do not share memory; `:log` and `:custom` backends see per-process state only. `:active_record` writes to a single table and is the right choice for dashboards and budget checks across processes.
|
|
435
|
+
- **Size your connection pool.** Each tracked call on the middleware path issues up to three SQL queries (preflight `SUM`, `INSERT`, post-check `SUM`). Make sure the AR pool covers `puma max_threads + sidekiq concurrency` plus your app's own usage.
|
|
436
|
+
- **Don't share a `StreamCollector` across threads you don't own.** The collector itself is thread-safe — `event`, `usage`, and `finish!` synchronize internally and `finish!` is idempotent — but the documented pattern is one collector per stream.
|
|
437
|
+
- **`finish!` is a barrier.** Once a stream is finished, later `event`, `usage`, or `model=` calls raise `FrozenError` instead of mutating a closed collector.
|
|
438
|
+
- **`ActiveSupport::Notifications` subscribers run synchronously** in the caller's thread. Keep them fast or hand off to a background job; otherwise they add latency to every tracked call.
|
|
439
|
+
- **`storage_error_behavior = :raise` inside Sidekiq** will retry the job, which can duplicate an expensive LLM call. Prefer `:warn` plus a Notifications subscriber, or `:ignore`, for worker contexts.
|
|
385
440
|
|
|
386
441
|
## Known limitations
|
|
387
442
|
|
|
388
|
-
- `:block_requests` is best-effort
|
|
389
|
-
- Streaming
|
|
443
|
+
- `:block_requests` is a best-effort guardrail, not a hard cap. Concurrent workers can pass preflight simultaneously and collectively overshoot the budget. Use an external quota system if you need a transactional cap.
|
|
444
|
+
- Streaming capture relies on the provider emitting a final-usage event (OpenAI needs `stream_options: { include_usage: true }`); missing events are recorded with `usage_source: "unknown"` so they surface on the Data Quality page.
|
|
445
|
+
- `provider_response_id` is stored only when the provider exposes a stable response object ID. Missing IDs stay `nil` and surface on the Data Quality page.
|
|
390
446
|
- Anthropic cache TTL variants (1h vs 5min writes) not modeled separately.
|
|
391
447
|
- OpenAI reasoning tokens included in output totals; separate reasoning-token attribution not stored.
|
|
392
448
|
|
data/Rakefile
CHANGED
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
--mono: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace;
|
|
35
35
|
}
|
|
36
36
|
|
|
37
|
-
|
|
37
|
+
.lct-body { margin: 0; }
|
|
38
38
|
|
|
39
39
|
.lct-app {
|
|
40
40
|
background: var(--lct-bg);
|
|
@@ -183,7 +183,6 @@
|
|
|
183
183
|
|
|
184
184
|
.lct-stat-sub { color: var(--lct-muted); font-size: var(--fs-xs); margin: 4px 0 0; }
|
|
185
185
|
|
|
186
|
-
/* Shared "small uppercase-ish label" recipe */
|
|
187
186
|
.lct-stat-label,
|
|
188
187
|
.lct-field label,
|
|
189
188
|
.lct-dl dt,
|
|
@@ -201,7 +200,6 @@
|
|
|
201
200
|
.lct-chip-label { color: var(--lct-accent); font-weight: 700; }
|
|
202
201
|
.lct-field label { color: var(--lct-text); font-size: var(--fs-md); font-weight: 500; }
|
|
203
202
|
|
|
204
|
-
/* Shared "muted body copy" recipe */
|
|
205
203
|
.lct-section-copy,
|
|
206
204
|
.lct-stat-copy,
|
|
207
205
|
.lct-banner-copy,
|
|
@@ -285,7 +283,6 @@
|
|
|
285
283
|
.lct-calls-table td:last-child,
|
|
286
284
|
.lct-calls-table th:last-child { text-align: right; }
|
|
287
285
|
|
|
288
|
-
/* Track + fill primitives — shared by bar / budget / stack */
|
|
289
286
|
.lct-bar-track,
|
|
290
287
|
.lct-budget-track,
|
|
291
288
|
.lct-stack-track {
|
|
@@ -5,9 +5,8 @@ module LlmCostTracker
|
|
|
5
5
|
skip_forgery_protection if respond_to?(:skip_forgery_protection)
|
|
6
6
|
|
|
7
7
|
def stylesheet
|
|
8
|
-
path = File.join(LlmCostTracker::Assets.root, LlmCostTracker::Assets::STYLESHEET)
|
|
9
8
|
response.set_header("Cache-Control", "public, max-age=31536000, immutable")
|
|
10
|
-
send_file
|
|
9
|
+
send_file LlmCostTracker::Assets::STYLESHEET_PATH, type: "text/css", disposition: "inline"
|
|
11
10
|
end
|
|
12
11
|
end
|
|
13
12
|
end
|
|
@@ -6,6 +6,7 @@ module LlmCostTracker
|
|
|
6
6
|
class CallsController < ApplicationController
|
|
7
7
|
CSV_EXPORT_LIMIT = 10_000
|
|
8
8
|
CSV_FORMULA_PREFIXES = ["=", "+", "-", "@", "\t", "\r"].freeze
|
|
9
|
+
DEFAULT_ORDER = "tracked_at DESC, id DESC"
|
|
9
10
|
|
|
10
11
|
def index
|
|
11
12
|
@sort = params[:sort].to_s
|
|
@@ -30,9 +31,6 @@ module LlmCostTracker
|
|
|
30
31
|
|
|
31
32
|
def show
|
|
32
33
|
@call = LlmApiCall.find(params[:id])
|
|
33
|
-
@tags = @call.parsed_tags
|
|
34
|
-
@metadata_available = @call.has_attribute?("metadata")
|
|
35
|
-
@metadata = @call.read_attribute("metadata") if @metadata_available
|
|
36
34
|
@latency_available = LlmApiCall.latency_column?
|
|
37
35
|
end
|
|
38
36
|
|
|
@@ -41,29 +39,26 @@ module LlmCostTracker
|
|
|
41
39
|
def calls_order(sort)
|
|
42
40
|
case sort
|
|
43
41
|
when "expensive"
|
|
44
|
-
"CASE WHEN total_cost IS NULL THEN 1 ELSE 0 END ASC, total_cost DESC, #{
|
|
42
|
+
"CASE WHEN total_cost IS NULL THEN 1 ELSE 0 END ASC, total_cost DESC, #{DEFAULT_ORDER}"
|
|
45
43
|
when "input"
|
|
46
|
-
"input_tokens DESC, #{
|
|
44
|
+
"input_tokens DESC, #{DEFAULT_ORDER}"
|
|
47
45
|
when "output"
|
|
48
|
-
"output_tokens DESC, #{
|
|
46
|
+
"output_tokens DESC, #{DEFAULT_ORDER}"
|
|
49
47
|
when "slow"
|
|
50
|
-
return
|
|
48
|
+
return DEFAULT_ORDER unless LlmApiCall.latency_column?
|
|
51
49
|
|
|
52
|
-
"CASE WHEN latency_ms IS NULL THEN 1 ELSE 0 END ASC, latency_ms DESC, #{
|
|
50
|
+
"CASE WHEN latency_ms IS NULL THEN 1 ELSE 0 END ASC, latency_ms DESC, #{DEFAULT_ORDER}"
|
|
53
51
|
else
|
|
54
|
-
|
|
52
|
+
DEFAULT_ORDER
|
|
55
53
|
end
|
|
56
54
|
end
|
|
57
55
|
|
|
58
|
-
def default_order
|
|
59
|
-
"tracked_at DESC, id DESC"
|
|
60
|
-
end
|
|
61
|
-
|
|
62
56
|
def render_csv(relation)
|
|
63
57
|
latency = LlmApiCall.latency_column?
|
|
64
58
|
CSV.generate do |csv|
|
|
65
59
|
headers = %w[tracked_at provider model input_tokens output_tokens total_tokens total_cost]
|
|
66
60
|
headers << "latency_ms" if latency
|
|
61
|
+
headers << "provider_response_id" if LlmApiCall.provider_response_id_column?
|
|
67
62
|
headers << "tags"
|
|
68
63
|
csv << headers
|
|
69
64
|
|
|
@@ -78,6 +73,7 @@ module LlmCostTracker
|
|
|
78
73
|
call.total_cost
|
|
79
74
|
]
|
|
80
75
|
row << call.latency_ms if latency
|
|
76
|
+
row << csv_safe(call.provider_response_id) if LlmApiCall.provider_response_id_column?
|
|
81
77
|
row << csv_safe(call.parsed_tags.to_json)
|
|
82
78
|
csv << row
|
|
83
79
|
end
|
|
@@ -5,15 +5,19 @@ module LlmCostTracker
|
|
|
5
5
|
def index
|
|
6
6
|
@from_date, @to_date = overview_range
|
|
7
7
|
prev_from, prev_to = previous_range
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
8
|
+
filter_params = LlmCostTracker::ParameterHash.to_hash(params)
|
|
9
|
+
scope = Dashboard::Filter.call(
|
|
10
|
+
params: filter_params.merge("from" => @from_date.iso8601, "to" => @to_date.iso8601)
|
|
11
|
+
)
|
|
12
|
+
previous_scope = Dashboard::Filter.call(
|
|
13
|
+
params: filter_params.merge("from" => prev_from.iso8601, "to" => prev_to.iso8601)
|
|
14
|
+
)
|
|
11
15
|
|
|
12
16
|
@stats = Dashboard::OverviewStats.call(scope: scope, previous_scope: previous_scope)
|
|
13
17
|
@time_series = Dashboard::TimeSeries.call(scope: scope, from: @from_date, to: @to_date)
|
|
14
18
|
@comparison_series = Dashboard::TimeSeries.call(scope: previous_scope, from: prev_from, to: prev_to)
|
|
15
19
|
@spend_anomaly = Dashboard::SpendAnomaly.call(from: @from_date, to: @to_date, scope: scope)
|
|
16
|
-
@top_models =
|
|
20
|
+
@top_models = Dashboard::TopModels.call(scope: scope)
|
|
17
21
|
@providers = Dashboard::ProviderBreakdown.call(scope: scope)
|
|
18
22
|
end
|
|
19
23
|
|
|
@@ -32,21 +36,6 @@ module LlmCostTracker
|
|
|
32
36
|
[prev_from, prev_to]
|
|
33
37
|
end
|
|
34
38
|
|
|
35
|
-
def overview_filter_params
|
|
36
|
-
params.to_unsafe_h.merge(
|
|
37
|
-
"from" => @from_date.iso8601,
|
|
38
|
-
"to" => @to_date.iso8601
|
|
39
|
-
)
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
def previous_filter_params
|
|
43
|
-
prev_from, prev_to = previous_range
|
|
44
|
-
params.to_unsafe_h.merge(
|
|
45
|
-
"from" => prev_from.iso8601,
|
|
46
|
-
"to" => prev_to.iso8601
|
|
47
|
-
)
|
|
48
|
-
end
|
|
49
|
-
|
|
50
39
|
def parsed_date(value)
|
|
51
40
|
return nil if value.to_s.strip.empty?
|
|
52
41
|
|
|
@@ -3,8 +3,7 @@
|
|
|
3
3
|
module LlmCostTracker
|
|
4
4
|
class DataQualityController < ApplicationController
|
|
5
5
|
def index
|
|
6
|
-
|
|
7
|
-
@stats = Dashboard::DataQuality.call(scope: scope)
|
|
6
|
+
@stats = Dashboard::DataQuality.call(scope: Dashboard::Filter.call(params: params))
|
|
8
7
|
end
|
|
9
8
|
end
|
|
10
9
|
end
|
|
@@ -3,9 +3,12 @@
|
|
|
3
3
|
module LlmCostTracker
|
|
4
4
|
class ModelsController < ApplicationController
|
|
5
5
|
def index
|
|
6
|
-
scope = Dashboard::Filter.call(params: params)
|
|
7
6
|
@sort = params[:sort].to_s
|
|
8
|
-
@rows = Dashboard::TopModels.call(
|
|
7
|
+
@rows = Dashboard::TopModels.call(
|
|
8
|
+
scope: Dashboard::Filter.call(params: params),
|
|
9
|
+
limit: nil,
|
|
10
|
+
sort: @sort
|
|
11
|
+
)
|
|
9
12
|
@latency_available = LlmApiCall.latency_column?
|
|
10
13
|
end
|
|
11
14
|
end
|
|
@@ -3,14 +3,12 @@
|
|
|
3
3
|
module LlmCostTracker
|
|
4
4
|
class TagsController < ApplicationController
|
|
5
5
|
def index
|
|
6
|
-
|
|
7
|
-
@rows = Dashboard::TagKeyExplorer.call(scope: scope)
|
|
6
|
+
@rows = Dashboard::TagKeyExplorer.call(scope: Dashboard::Filter.call(params: params))
|
|
8
7
|
end
|
|
9
8
|
|
|
10
9
|
def show
|
|
11
10
|
@tag_key = params[:key]
|
|
12
|
-
|
|
13
|
-
@rows = Dashboard::TagBreakdown.call(scope: scope, key: @tag_key)
|
|
11
|
+
@rows = Dashboard::TagBreakdown.call(scope: Dashboard::Filter.call(params: params), key: @tag_key)
|
|
14
12
|
@total_calls = @rows.sum(&:calls)
|
|
15
13
|
|
|
16
14
|
tagged_rows = @rows.reject { |r| r.value == "(untagged)" }
|
|
@@ -2,7 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
module LlmCostTracker
|
|
4
4
|
module DashboardFilterHelper
|
|
5
|
-
FILTER_PARAM_KEYS = %i[from to provider model tag sort page per].freeze
|
|
5
|
+
FILTER_PARAM_KEYS = %i[from to provider model stream usage_source tag sort page per].freeze
|
|
6
|
+
|
|
7
|
+
STREAM_FILTER_OPTIONS = [
|
|
8
|
+
["Streaming only", "yes"],
|
|
9
|
+
["Non-streaming only", "no"]
|
|
10
|
+
].freeze
|
|
6
11
|
|
|
7
12
|
def any_filter_applied?
|
|
8
13
|
FILTER_PARAM_KEYS.any? { |key| params[key].present? }
|
|
@@ -13,7 +13,7 @@ module LlmCostTracker
|
|
|
13
13
|
private
|
|
14
14
|
|
|
15
15
|
def filter_options_for(column, filter_params:)
|
|
16
|
-
source =
|
|
16
|
+
source = LlmCostTracker::ParameterHash.to_hash(filter_params)
|
|
17
17
|
scope_params = source.stringify_keys.merge(
|
|
18
18
|
column.to_s => nil, "format" => nil, "page" => nil, "per" => nil, "sort" => nil
|
|
19
19
|
)
|
|
@@ -24,11 +24,5 @@ module LlmCostTracker
|
|
|
24
24
|
values.unshift(current) if current && !values.include?(current)
|
|
25
25
|
values
|
|
26
26
|
end
|
|
27
|
-
|
|
28
|
-
def filter_source_hash(filter_params)
|
|
29
|
-
return filter_params.to_unsafe_h if filter_params.respond_to?(:to_unsafe_h)
|
|
30
|
-
|
|
31
|
-
filter_params.to_h
|
|
32
|
-
end
|
|
33
27
|
end
|
|
34
28
|
end
|
|
@@ -19,18 +19,14 @@ module LlmCostTracker
|
|
|
19
19
|
private
|
|
20
20
|
|
|
21
21
|
def normalized_query_tags(tags)
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
tags = tags.to_unsafe_h if tags.respond_to?(:to_unsafe_h)
|
|
25
|
-
tags = tags.to_h if tags.respond_to?(:to_h)
|
|
26
|
-
return {} unless tags.is_a?(Hash)
|
|
27
|
-
|
|
28
|
-
tags.transform_keys(&:to_s).transform_values(&:to_s)
|
|
22
|
+
LlmCostTracker::ParameterHash.to_hash(tags).transform_keys(&:to_s).transform_values(&:to_s)
|
|
29
23
|
end
|
|
30
24
|
|
|
31
25
|
def clean_dashboard_query(value)
|
|
32
|
-
|
|
33
|
-
|
|
26
|
+
if LlmCostTracker::ParameterHash.hash_like?(value)
|
|
27
|
+
return clean_dashboard_hash(LlmCostTracker::ParameterHash.to_hash(value))
|
|
28
|
+
end
|
|
29
|
+
|
|
34
30
|
return clean_dashboard_array(value) if value.is_a?(Array)
|
|
35
31
|
return clean_dashboard_string(value) if value.is_a?(String)
|
|
36
32
|
|