llm_cost_tracker 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +20 -0
  3. data/README.md +111 -68
  4. data/Rakefile +2 -0
  5. data/app/controllers/llm_cost_tracker/assets_controller.rb +1 -2
  6. data/app/helpers/llm_cost_tracker/dashboard_filter_helper.rb +6 -1
  7. data/app/services/llm_cost_tracker/dashboard/data_quality.rb +16 -1
  8. data/app/services/llm_cost_tracker/dashboard/filter.rb +22 -0
  9. data/app/views/llm_cost_tracker/calls/index.html.erb +10 -0
  10. data/app/views/llm_cost_tracker/dashboard/index.html.erb +10 -0
  11. data/app/views/llm_cost_tracker/data_quality/index.html.erb +46 -0
  12. data/lib/llm_cost_tracker/assets.rb +6 -11
  13. data/lib/llm_cost_tracker/configuration.rb +78 -42
  14. data/lib/llm_cost_tracker/event.rb +2 -0
  15. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_streaming_generator.rb +29 -0
  16. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_streaming_to_llm_api_calls.rb.erb +25 -0
  17. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +4 -0
  18. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/llm_cost_tracker_prices.yml.erb +8 -1
  19. data/lib/llm_cost_tracker/llm_api_call.rb +8 -0
  20. data/lib/llm_cost_tracker/middleware/faraday.rb +57 -9
  21. data/lib/llm_cost_tracker/parsed_usage.rb +7 -3
  22. data/lib/llm_cost_tracker/parsers/anthropic.rb +79 -1
  23. data/lib/llm_cost_tracker/parsers/base.rb +17 -5
  24. data/lib/llm_cost_tracker/parsers/gemini.rb +59 -6
  25. data/lib/llm_cost_tracker/parsers/openai.rb +8 -0
  26. data/lib/llm_cost_tracker/parsers/openai_compatible.rb +8 -0
  27. data/lib/llm_cost_tracker/parsers/openai_usage.rb +55 -1
  28. data/lib/llm_cost_tracker/parsers/registry.rb +15 -3
  29. data/lib/llm_cost_tracker/parsers/sse.rb +81 -0
  30. data/lib/llm_cost_tracker/price_registry.rb +1 -1
  31. data/lib/llm_cost_tracker/price_sync/fetcher.rb +72 -0
  32. data/lib/llm_cost_tracker/price_sync/merger.rb +72 -0
  33. data/lib/llm_cost_tracker/price_sync/model_catalog.rb +77 -0
  34. data/lib/llm_cost_tracker/price_sync/raw_price.rb +35 -0
  35. data/lib/llm_cost_tracker/price_sync/source.rb +29 -0
  36. data/lib/llm_cost_tracker/price_sync/source_result.rb +7 -0
  37. data/lib/llm_cost_tracker/price_sync/sources/litellm.rb +91 -0
  38. data/lib/llm_cost_tracker/price_sync/sources/open_router.rb +94 -0
  39. data/lib/llm_cost_tracker/price_sync/validator.rb +66 -0
  40. data/lib/llm_cost_tracker/price_sync.rb +310 -0
  41. data/lib/llm_cost_tracker/storage/active_record_store.rb +3 -1
  42. data/lib/llm_cost_tracker/stream_collector.rb +158 -0
  43. data/lib/llm_cost_tracker/tags_column.rb +8 -0
  44. data/lib/llm_cost_tracker/tracker.rb +15 -12
  45. data/lib/llm_cost_tracker/value_helpers.rb +40 -0
  46. data/lib/llm_cost_tracker/version.rb +1 -1
  47. data/lib/llm_cost_tracker.rb +50 -29
  48. data/lib/tasks/llm_cost_tracker.rake +116 -0
  49. data/llm_cost_tracker.gemspec +8 -6
  50. metadata +24 -8
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: eba077a3fb9b0dc146673535769b8be4d34dcbde133ae41e2910652b7833e306
4
- data.tar.gz: 3b384ad865a23598566ee46974bedda14af88673bbe4822501fb5eb65980655a
3
+ metadata.gz: 8b20da957651521f022866af9d4735a4ef53d52a2dc3c278b8b2a90e1d7a7f98
4
+ data.tar.gz: ea98b2a7505d99c5f78d7756d0adc50224c4fdc88000fa5ec81be4450c9200f1
5
5
  SHA512:
6
- metadata.gz: 2c5d72d58222d3b3f66fe7d1511ce69c8a008b45a31351827d59a13a982b5478f24ce7e14afe1417150e51653452ec1cc43c8a9fc8f1f069148da7489e0c2698
7
- data.tar.gz: a5a70b59d1622080271d0a1a24259c8630938c132e93cfeb041ef258d3d0a720bb44a1e0b8b5be65c6b9801847f031395f2e1d7adc48d9cf71b83c3f276ae281
6
+ metadata.gz: 9ca709080d46395ac32b9a2931b4b3cb7d4df6016b73bad3579cb1decdd046be21a2fb67c06e96876013a754a113e9ce5987ed0e27792b312716324bdb5f9adb
7
+ data.tar.gz: 445b77222180802f208246a2e25b30e5e0a5679d2d5b84a2ba00d1e2fc97a5cf3127521be13f415c6d76bbcc056dd0bdfe6ade937eb9d67d737ce6b6548665fa
data/CHANGELOG.md CHANGED
@@ -4,6 +4,26 @@ Format: [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). Versioning: [S
4
4
 
5
5
  ## [Unreleased]
6
6
 
7
+ ## [0.3.0] - 2026-04-22
8
+
9
+ ### Added
10
+
11
+ - Streaming capture across OpenAI, Anthropic, and Gemini, including `LlmCostTracker.track_stream` for non-Faraday clients.
12
+ - `stream` / `usage_source` persistence and dashboard coverage for streamed calls.
13
+ - `llm_cost_tracker:prices:sync` and `llm_cost_tracker:prices:check` for keeping local price snapshots current.
14
+ - `LlmCostTracker.enforce_budget!` and opt-in `enforce_budget:` keyword for `track` / `track_stream`.
15
+
16
+ ### Changed
17
+
18
+ - Price refresh now uses structured JSON sources (LiteLLM primary, OpenRouter secondary) instead of scraping provider HTML pages.
19
+ - Synced price entries now carry source provenance (`_source`, `_source_version`, `_fetched_at`), while `_source: "manual"` entries remain untouched.
20
+ - Manual stream parsing now resolves parsers through the shared registry, so configured OpenAI-compatible providers work the same way as built-in ones.
21
+ - `LlmCostTracker.configure` now treats configuration as an immutable snapshot after the block returns; mutating or replacing shared fields through `LlmCostTracker.configuration` raises `FrozenError`.
22
+
23
+ ### Removed
24
+
25
+ - Public `LlmCostTracker.configuration=` writer; use `LlmCostTracker.configure` to replace configuration snapshots.
26
+
7
27
  ## [0.2.0] - 2026-04-20
8
28
 
9
29
  ### Added
data/README.md CHANGED
@@ -1,35 +1,17 @@
1
- # LlmCostTracker
1
+ # LLM Cost Tracker
2
2
 
3
- **Self-hosted LLM cost tracking for Ruby and Rails.** Intercepts Faraday LLM responses, prices them locally, stores events in your database. No proxy, no SaaS.
3
+ **Self-hosted LLM cost tracking for Ruby and Rails.** Intercepts Faraday LLM responses or records usage explicitly, prices events locally, and stores them in your database. No proxy, no SaaS.
4
4
 
5
5
  [![Gem Version](https://img.shields.io/gem/v/llm_cost_tracker.svg)](https://rubygems.org/gems/llm_cost_tracker)
6
6
  [![CI](https://github.com/sergey-homenko/llm_cost_tracker/actions/workflows/ruby.yml/badge.svg)](https://github.com/sergey-homenko/llm_cost_tracker/actions)
7
7
 
8
- ```text
9
- LLM Cost Report (last 30 days)
10
-
11
- Total cost: $127.420000
12
- Requests: 4,218
13
- Avg latency: 812ms
14
- Unknown pricing: 0
15
-
16
- By model:
17
- gpt-4o $82.100000
18
- claude-sonnet-4-6 $31.200000
19
- gemini-2.5-flash $14.120000
20
-
21
- By tag key "env":
22
- production $119.300000
23
- staging $8.120000
24
- ```
25
-
26
8
  ## Why
27
9
 
28
- Every Rails app with LLM integrations eventually runs into the same question: where did that invoice come from? Full observability platforms like Langfuse and Helicone cover a lot more than cost, and sometimes you just want a small Rails-native ledger that lives in your own database.
10
+ Every Rails app with LLM integrations eventually runs into the same question: where did that invoice come from? Full observability platforms like Langfuse and Helicone solve a broader set of problems; sometimes you just need a small Rails-native ledger in your own database.
29
11
 
30
- `llm_cost_tracker` is scoped to that. It plugs into Faraday, parses provider usage out of the response, looks up pricing locally, and writes an event. You end up with a ledger you can query with plain ActiveRecord, slice by any tag dimension, and optionally surface on a built-in dashboard. No proxy, no SaaS, no separate service to run.
12
+ `llm_cost_tracker` is built for that. It plugs into Faraday or lets you record usage explicitly with `track` / `track_stream`, looks up pricing locally, and writes an event. You end up with a ledger you can query with plain ActiveRecord, slice by any tag dimension, and optionally surface on a built-in dashboard. No proxy, no SaaS, no separate service to run.
31
13
 
32
- It's not a tracing platform, prompt CMS, eval system, or gateway — and doesn't want to be. The goal is answering _"what did this app spend on LLM APIs, and where did that spend come from?"_ well enough that you stop worrying about it.
14
+ It is not a tracing platform, prompt CMS, eval system, or gateway. The goal is to answer _"what did this app spend on LLM APIs, and where did that spend come from?"_ clearly enough to make spend review routine.
33
15
 
34
16
  ## Installation
35
17
 
@@ -44,23 +26,6 @@ bin/rails generate llm_cost_tracker:install
44
26
  bin/rails db:migrate
45
27
  ```
46
28
 
47
- ## Quick try (no database)
48
-
49
- ```ruby
50
- require "llm_cost_tracker"
51
-
52
- LlmCostTracker.configure { |c| c.storage_backend = :log }
53
-
54
- LlmCostTracker.track(
55
- provider: :openai,
56
- model: "gpt-4o",
57
- input_tokens: 1000,
58
- output_tokens: 200,
59
- feature: "demo"
60
- )
61
- # => [LlmCostTracker] openai/gpt-4o tokens=1000+200 cost=$0.004500 tags={:feature=>"demo"}
62
- ```
63
-
64
29
  ## Usage
65
30
 
66
31
  ### Patch an existing client's Faraday connection
@@ -78,19 +43,7 @@ OpenAI.configure do |config|
78
43
  end
79
44
  ```
80
45
 
81
- `tags:` can be a callable so `Current` attributes are evaluated per request:
82
-
83
- ```ruby
84
- class Current < ActiveSupport::CurrentAttributes
85
- attribute :user, :tenant, :workflow
86
- end
87
-
88
- # application_controller.rb
89
- before_action do
90
- Current.user = current_user
91
- Current.workflow = "chat"
92
- end
93
- ```
46
+ `tags:` can be a callable and is evaluated on each request.
94
47
 
95
48
  ### Raw Faraday
96
49
 
@@ -105,7 +58,41 @@ end
105
58
  conn.post("/v1/responses", { model: "gpt-5-mini", input: "Hello!" })
106
59
  ```
107
60
 
108
- Place `llm_cost_tracker` inside the Faraday stack where it can see the final response body. For streaming APIs, tracking requires the final body to expose provider usage; otherwise the gem warns and skips — use manual tracking there.
61
+ Place `llm_cost_tracker` inside the Faraday stack where it can see the final response body.
62
+
63
+ ### Streaming
64
+
65
+ Streaming is captured automatically for OpenAI, Anthropic, and Gemini when the request goes through the Faraday middleware. The middleware tees the `on_data` callback, keeps the stream flowing to your code, and records the final usage block once the response completes.
66
+
67
+ ```ruby
68
+ # OpenAI: include usage in the final chunk
69
+ client.chat(parameters: {
70
+ model: "gpt-4o",
71
+ messages: [...],
72
+ stream: proc { |chunk| ... },
73
+ stream_options: { include_usage: true }
74
+ })
75
+ ```
76
+
77
+ Anthropic emits usage in `message_start` + `message_delta` events. Gemini's `:streamGenerateContent` endpoint includes `usageMetadata`; usage from the final chunk is used.
78
+
79
+ Streamed calls are stored with `stream: true` and `usage_source: "stream_final"`. If the provider never sends final usage, the call is still recorded with `usage_source: "unknown"` so those calls surface on the Data Quality page.
80
+
81
+ For non-Faraday clients (raw `Net::HTTP`, custom SSE code, Azure OpenAI), use the explicit helper:
82
+
83
+ ```ruby
84
+ LlmCostTracker.track_stream(provider: "openai", model: "gpt-4o") do |stream|
85
+ my_client.stream(...) { |chunk| stream.event(chunk) }
86
+ end
87
+
88
+ # Or skip the chunk parsing entirely if you already know the totals:
89
+ LlmCostTracker.track_stream(provider: "openai", model: "gpt-4o") do |stream|
90
+ # ... your streaming loop ...
91
+ stream.usage(input_tokens: 120, output_tokens: 45)
92
+ end
93
+ ```
94
+
95
+ Run `bin/rails g llm_cost_tracker:add_streaming` once on existing installs to add the `stream` and `usage_source` columns.
109
96
 
110
97
  ### Manual tracking
111
98
 
@@ -148,7 +135,7 @@ LlmCostTracker.configure do |config|
148
135
  end
149
136
  ```
150
137
 
151
- Pricing is best-effort. OpenRouter-style IDs like `openai/gpt-4o-mini` are normalized to built-in names when possible. Use `prices_file` / `pricing_overrides` for fine-tunes, gateway-specific IDs, enterprise discounts, batch pricing, or models the gem doesn't know.
138
+ Pricing is best effort. OpenRouter-style IDs like `openai/gpt-4o-mini` are normalized to built-in names when possible. Use `prices_file` / `pricing_overrides` for fine-tunes, gateway-specific IDs, enterprise discounts, batch pricing, or models the gem does not know.
152
139
 
153
140
  `storage_error_behavior = :warn` (default) lets LLM responses continue if storage fails; `:raise` exposes `StorageError#original_error`.
154
141
 
@@ -160,7 +147,7 @@ LlmCostTracker::LlmApiCall.unknown_pricing.group(:model).count
160
147
 
161
148
  ### Keeping prices current
162
149
 
163
- Built-in prices are in `lib/llm_cost_tracker/prices.json`. The gem never fetches pricing on boot. For production, generate a local overrides file and point the gem at it:
150
+ Built-in prices live in `lib/llm_cost_tracker/prices.json`. The gem never fetches pricing on boot. For production, keep a local snapshot under `config/` and point the gem at it:
164
151
 
165
152
  ```bash
166
153
  bin/rails generate llm_cost_tracker:prices
@@ -175,7 +162,26 @@ bin/rails generate llm_cost_tracker:prices
175
162
  }
176
163
  ```
177
164
 
178
- `pricing_overrides` has the highest precedence; use it for small Ruby-only tweaks, `prices_file` for broader tables.
165
+ `pricing_overrides` has the highest precedence. Use it for a handful of Ruby-side overrides; use `prices_file` when you want a local pricing table under source control.
166
+
167
+ To refresh prices on demand:
168
+
169
+ ```bash
170
+ bin/rails llm_cost_tracker:prices:sync
171
+ ```
172
+
173
+ `llm_cost_tracker:prices:sync` refreshes the current registry from two structured sources: LiteLLM first, OpenRouter second. LiteLLM is the primary source; OpenRouter fills gaps and helps surface discrepancies.
174
+
175
+ `llm_cost_tracker:prices:sync` / `llm_cost_tracker:prices:check` perform HTTP GET requests to:
176
+
177
+ - LiteLLM pricing JSON: `https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json`
178
+ - OpenRouter Models API: `https://openrouter.ai/api/v1/models`
179
+
180
+ If `config.prices_file` is configured, the task syncs that file automatically; otherwise it works from the built-in snapshot. `_source: "manual"` entries are never touched. Models that are still in your file but missing from both upstream sources are left alone and reported as orphaned. For intentional custom entries, mark them as manual so they stop showing up in orphaned warnings.
181
+
182
+ Use `PREVIEW=1` to see the diff without writing. Use `STRICT=1` to fail instead of applying a partial refresh when a source fails or the validator rejects a price. Use `bin/rails llm_cost_tracker:prices:check` in CI to print the current diff and exit non-zero when the snapshot has drifted or refresh fails.
183
+
184
+ Large price changes are flagged during sync. If a specific entry is expected to move by more than 3x, add `_validator_override: ["skip_relative_change"]` to that entry in your local price file.
179
185
 
180
186
  ## Budget enforcement
181
187
 
@@ -194,14 +200,31 @@ rescue LlmCostTracker::BudgetExceededError => e
194
200
  # e.monthly_total, e.budget, e.last_event
195
201
  ```
196
202
 
197
- `:block_requests` is best-effort under concurrency, not a transactional cap. Use provider/gateway-level limits for strict quotas.
203
+ `:block_requests` is a **guardrail, not a hard cap**. The preflight and the spend-recording write are separate statements, so under Puma / Sidekiq concurrency multiple workers can all pass the preflight and then collectively overshoot the budget. The setting reliably *stops new requests after the overshoot is visible* — it does not prevent the overshoot itself. For strict quotas use a provider- or gateway-level limit, or a database-backed counter outside this gem.
204
+
205
+ Preflight is wired into the Faraday middleware automatically. When you record events via `LlmCostTracker.track` / `track_stream` and also want the same preflight, opt in:
206
+
207
+ ```ruby
208
+ LlmCostTracker.track(
209
+ provider: "openai",
210
+ model: "gpt-4o",
211
+ input_tokens: 120,
212
+ output_tokens: 45,
213
+ enforce_budget: true
214
+ )
215
+
216
+ LlmCostTracker.track_stream(provider: "openai", model: "gpt-4o", enforce_budget: true) do |stream|
217
+ # raises BudgetExceededError before the block runs when over budget
218
+ end
219
+
220
+ LlmCostTracker.enforce_budget! # standalone preflight
221
+ ```
198
222
 
199
223
  ## Querying costs
200
224
 
201
225
  ```bash
202
226
  bin/rails llm_cost_tracker:report
203
227
  DAYS=7 bin/rails llm_cost_tracker:report
204
- DAYS=90 bin/rails llm_cost_tracker:prune # delete calls older than N days in batches
205
228
  ```
206
229
 
207
230
  ```ruby
@@ -230,7 +253,15 @@ LlmCostTracker::LlmApiCall.by_tags(user_id: 42, feature: "chat").this_month.tota
230
253
  LlmCostTracker::LlmApiCall.between(1.week.ago, Time.current).cost_by_model
231
254
  ```
232
255
 
233
- ### Tag storage
256
+ ## Retention
257
+
258
+ Retention is not enforced automatically. Use the rake task below if you need to delete older records in batches.
259
+
260
+ ```bash
261
+ DAYS=90 bin/rails llm_cost_tracker:prune # delete calls older than N days in batches
262
+ ```
263
+
264
+ ## Tag storage
234
265
 
235
266
  New installs use `jsonb` + GIN on PostgreSQL:
236
267
 
@@ -252,7 +283,7 @@ bin/rails db:migrate
252
283
 
253
284
  ## Dashboard (optional)
254
285
 
255
- Opt-in Rails Engine. Plain ERB, inline CSS, no JS. Requires Rails 7.1+; the core middleware works without Rails.
286
+ Optional Rails Engine. Plain ERB, no JavaScript framework, no asset pipeline required. Requires Rails 7.1+; the core middleware works without Rails.
256
287
 
257
288
  ```ruby
258
289
  # config/application.rb (or an initializer)
@@ -272,7 +303,7 @@ Routes (GET-only; CSV export included):
272
303
  - `/llm-costs/tags/:key` — breakdown by values of a given tag key
273
304
  - `/llm-costs/data_quality` — unknown pricing share, untagged calls, missing latency
274
305
 
275
- > ⚠️ **No built-in auth.** Tags carry whatever your app puts in them. Protect the mount point with your app's auth.
306
+ > ⚠️ **No built-in auth.** Tags carry whatever your app puts in them. Protect the mount point with your application's authentication.
276
307
 
277
308
  ### Basic auth
278
309
 
@@ -331,7 +362,7 @@ config.custom_storage = ->(event) {
331
362
  config.openai_compatible_providers["gateway.example.com"] = "internal_gateway"
332
363
  ```
333
364
 
334
- Configured hosts are parsed with the OpenAI-compatible usage shape (`prompt_tokens` / `completion_tokens` / `total_tokens`, `input_tokens` / `output_tokens`, and optional cached-input details). Covers OpenRouter, DeepSeek, and private gateways exposing Chat Completions / Responses / Completions / Embeddings.
365
+ Configured hosts are parsed using the OpenAI-compatible usage shape (`prompt_tokens` / `completion_tokens` / `total_tokens`, `input_tokens` / `output_tokens`, and optional cached-input details). This covers OpenRouter, DeepSeek, and private gateways exposing Chat Completions / Responses / Completions / Embeddings.
335
366
 
336
367
  ## Custom parser
337
368
 
@@ -373,20 +404,32 @@ LlmCostTracker::Parsers::Registry.register(AcmeParser.new)
373
404
  | Google Gemini | ✅ | Gemini 2.5 Pro/Flash/Flash-Lite, 2.0 Flash/Flash-Lite, 1.5 Pro/Flash |
374
405
  | Any other | 🔧 | Custom parser |
375
406
 
376
- Endpoints: OpenAI Chat Completions / Responses / Completions / Embeddings; OpenAI-compatible equivalents; Anthropic Messages; Gemini `generateContent` with `usageMetadata`.
407
+ Endpoints: OpenAI Chat Completions / Responses / Completions / Embeddings; OpenAI-compatible equivalents; Anthropic Messages; Gemini `generateContent` and `streamGenerateContent`. All endpoints support streaming capture.
377
408
 
378
409
  ## Safety
379
410
 
380
- - No external HTTP calls.
411
+ - No external HTTP calls at request-tracking time.
381
412
  - No prompt or response bodies stored.
382
413
  - Faraday responses not modified.
383
414
  - Storage failures non-fatal by default (`storage_error_behavior = :warn`).
384
- - Budget / unknown-pricing errors are raised only when you opt in.
415
+ - Budget and unknown-pricing errors are raised only when you opt in.
416
+
417
+ ## Thread safety (Puma, Sidekiq)
418
+
419
+ The gem is designed for multi-threaded hosts — Puma with `max_threads > 1` and Sidekiq with `concurrency > 1` are both supported. A few rules:
420
+
421
+ - **Configure once at boot.** `LlmCostTracker.configure` deep-freezes `default_tags`, `pricing_overrides`, `report_tag_breakdowns`, and `openai_compatible_providers` when the block returns. Mutating or replacing shared fields through `LlmCostTracker.configuration` raises `FrozenError`.
422
+ - **Use `:active_record` storage for shared ledgers.** Puma workers and Sidekiq processes do not share memory; `:log` and `:custom` backends see per-process state only. `:active_record` writes to a single table and is the right choice for dashboards and budget checks across processes.
423
+ - **Size your connection pool.** Each tracked call on the middleware path issues up to three SQL queries (preflight `SUM`, `INSERT`, post-check `SUM`). Make sure the AR pool covers `puma max_threads + sidekiq concurrency` plus your app's own usage.
424
+ - **Don't share a `StreamCollector` across threads you don't own.** The collector itself is thread-safe — `event`, `usage`, and `finish!` synchronize internally and `finish!` is idempotent — but the documented pattern is one collector per stream.
425
+ - **`finish!` is a barrier.** Once a stream is finished, later `event`, `usage`, or `model=` calls raise `FrozenError` instead of mutating a closed collector.
426
+ - **`ActiveSupport::Notifications` subscribers run synchronously** in the caller's thread. Keep them fast or hand off to a background job; otherwise they add latency to every tracked call.
427
+ - **`storage_error_behavior = :raise` inside Sidekiq** will retry the job, which can duplicate an expensive LLM call. Prefer `:warn` plus a Notifications subscriber, or `:ignore`, for worker contexts.
385
428
 
386
429
  ## Known limitations
387
430
 
388
- - `:block_requests` is best-effort under concurrency; use an external quota system for hard caps.
389
- - Streaming/SSE tracked only when Faraday exposes a final body with usage.
431
+ - `:block_requests` is a best-effort guardrail, not a hard cap. Concurrent workers can pass preflight simultaneously and collectively overshoot the budget. Use an external quota system if you need a transactional cap.
432
+ - Streaming capture relies on the provider emitting a final-usage event (OpenAI needs `stream_options: { include_usage: true }`); missing events are recorded with `usage_source: "unknown"` so they surface on the Data Quality page.
390
433
  - Anthropic cache TTL variants (1h vs 5min writes) not modeled separately.
391
434
  - OpenAI reasoning tokens included in output totals; separate reasoning-token attribution not stored.
392
435
 
data/Rakefile CHANGED
@@ -4,6 +4,8 @@ require "bundler/gem_tasks"
4
4
  require "rspec/core/rake_task"
5
5
  require "rubocop/rake_task"
6
6
 
7
+ Dir[File.expand_path("lib/tasks/**/*.rake", __dir__)].each { |path| load path }
8
+
7
9
  RSpec::Core::RakeTask.new(:spec)
8
10
  RuboCop::RakeTask.new(:rubocop)
9
11
 
@@ -5,9 +5,8 @@ module LlmCostTracker
5
5
  skip_forgery_protection if respond_to?(:skip_forgery_protection)
6
6
 
7
7
  def stylesheet
8
- path = File.join(LlmCostTracker::Assets.root, LlmCostTracker::Assets::STYLESHEET)
9
8
  response.set_header("Cache-Control", "public, max-age=31536000, immutable")
10
- send_file path, type: "text/css", disposition: "inline"
9
+ send_file LlmCostTracker::Assets::STYLESHEET_PATH, type: "text/css", disposition: "inline"
11
10
  end
12
11
  end
13
12
  end
@@ -2,7 +2,12 @@
2
2
 
3
3
  module LlmCostTracker
4
4
  module DashboardFilterHelper
5
- FILTER_PARAM_KEYS = %i[from to provider model tag sort page per].freeze
5
+ FILTER_PARAM_KEYS = %i[from to provider model stream usage_source tag sort page per].freeze
6
+
7
+ STREAM_FILTER_OPTIONS = [
8
+ ["Streaming only", "yes"],
9
+ ["Non-streaming only", "no"]
10
+ ].freeze
6
11
 
7
12
  def any_filter_applied?
8
13
  FILTER_PARAM_KEYS.any? { |key| params[key].present? }
@@ -8,15 +8,18 @@ module LlmCostTracker
8
8
  :untagged_calls_count,
9
9
  :missing_latency_count,
10
10
  :latency_column_present,
11
+ :streaming_count,
12
+ :streaming_missing_usage_count,
13
+ :stream_column_present,
11
14
  :unknown_pricing_by_model
12
15
  )
13
16
 
14
- # Computes data quality metrics: coverage of cost, tags, and latency.
15
17
  class DataQuality
16
18
  class << self
17
19
  def call(scope: LlmCostTracker::LlmApiCall.all)
18
20
  total = scope.count
19
21
  latency_present = LlmCostTracker::LlmApiCall.latency_column?
22
+ stream_present = LlmCostTracker::LlmApiCall.stream_column?
20
23
 
21
24
  DataQualityStats.new(
22
25
  total_calls: total,
@@ -24,6 +27,9 @@ module LlmCostTracker
24
27
  untagged_calls_count: total - scope.with_json_tags.count,
25
28
  missing_latency_count: latency_present ? scope.where(latency_ms: nil).count : nil,
26
29
  latency_column_present: latency_present,
30
+ streaming_count: stream_present ? scope.streaming.count : nil,
31
+ streaming_missing_usage_count: streaming_missing_usage_count(scope, stream_present),
32
+ stream_column_present: stream_present,
27
33
  unknown_pricing_by_model: scope.unknown_pricing
28
34
  .group(:model)
29
35
  .order(Arel.sql("COUNT(*) DESC"))
@@ -32,6 +38,15 @@ module LlmCostTracker
32
38
  .to_h
33
39
  )
34
40
  end
41
+
42
+ private
43
+
44
+ def streaming_missing_usage_count(scope, stream_present)
45
+ return nil unless stream_present
46
+ return nil unless LlmCostTracker::LlmApiCall.usage_source_column?
47
+
48
+ scope.streaming_missing_usage.count
49
+ end
35
50
  end
36
51
  end
37
52
  end
@@ -25,6 +25,8 @@ module LlmCostTracker
25
25
  filtered_scope = apply_date_filters(filtered_scope)
26
26
  filtered_scope = apply_exact_filter(filtered_scope, :provider)
27
27
  filtered_scope = apply_exact_filter(filtered_scope, :model)
28
+ filtered_scope = apply_stream_filter(filtered_scope)
29
+ filtered_scope = apply_usage_source_filter(filtered_scope)
28
30
  apply_tag_filters(filtered_scope)
29
31
  end
30
32
 
@@ -64,6 +66,26 @@ module LlmCostTracker
64
66
  relation.by_tags(tags)
65
67
  end
66
68
 
69
+ def apply_stream_filter(relation)
70
+ value = string_param(:stream)
71
+ return relation if value.nil?
72
+ return relation unless relation.klass.stream_column?
73
+
74
+ case value.downcase
75
+ when "yes", "true", "1" then relation.where(stream: true)
76
+ when "no", "false", "0" then relation.where(stream: [false, nil])
77
+ else relation
78
+ end
79
+ end
80
+
81
+ def apply_usage_source_filter(relation)
82
+ value = string_param(:usage_source)
83
+ return relation if value.nil?
84
+ return relation unless relation.klass.usage_source_column?
85
+
86
+ relation.where(usage_source: value)
87
+ end
88
+
67
89
  def tag_params
68
90
  tags = hash_param(:tag)
69
91
 
@@ -34,6 +34,16 @@
34
34
  id: "lct-model" %>
35
35
  </div>
36
36
 
37
+ <% if LlmCostTracker::LlmApiCall.stream_column? %>
38
+ <div class="lct-field">
39
+ <label for="lct-stream">Stream</label>
40
+ <%= select_tag :stream,
41
+ options_for_select(LlmCostTracker::DashboardFilterHelper::STREAM_FILTER_OPTIONS, params[:stream]),
42
+ include_blank: "All calls",
43
+ id: "lct-stream" %>
44
+ </div>
45
+ <% end %>
46
+
37
47
  <div class="lct-field">
38
48
  <label for="lct-sort">Sort</label>
39
49
  <select id="lct-sort" name="sort">
@@ -29,6 +29,16 @@
29
29
  id: "lct-overview-model" %>
30
30
  </div>
31
31
 
32
+ <% if LlmCostTracker::LlmApiCall.stream_column? %>
33
+ <div class="lct-field">
34
+ <label for="lct-overview-stream">Stream</label>
35
+ <%= select_tag :stream,
36
+ options_for_select(LlmCostTracker::DashboardFilterHelper::STREAM_FILTER_OPTIONS, params[:stream]),
37
+ include_blank: "All calls",
38
+ id: "lct-overview-stream" %>
39
+ </div>
40
+ <% end %>
41
+
32
42
  <div class="lct-filter-actions">
33
43
  <button class="lct-button" type="submit">Apply</button>
34
44
  <%= link_to("Reset", root_path, class: "lct-button lct-button-secondary") if any_filter_applied? %>
@@ -2,6 +2,9 @@
2
2
  <% known_pricing_calls = total - @stats.unknown_pricing_count %>
3
3
  <% tagged_calls = total - @stats.untagged_calls_count %>
4
4
  <% latency_calls = @stats.latency_column_present ? total - @stats.missing_latency_count : nil %>
5
+ <% streaming_count = @stats.streaming_count %>
6
+ <% streaming_missing_usage = @stats.streaming_missing_usage_count %>
7
+ <% streams_with_usage = streaming_count && streaming_missing_usage ? streaming_count - streaming_missing_usage : nil %>
5
8
 
6
9
  <section class="lct-panel lct-toolbar">
7
10
  <div class="lct-toolbar-head">
@@ -36,6 +39,16 @@
36
39
  id: "lct-quality-model" %>
37
40
  </div>
38
41
 
42
+ <% if LlmCostTracker::LlmApiCall.stream_column? %>
43
+ <div class="lct-field">
44
+ <label for="lct-quality-stream">Stream</label>
45
+ <%= select_tag :stream,
46
+ options_for_select(LlmCostTracker::DashboardFilterHelper::STREAM_FILTER_OPTIONS, params[:stream]),
47
+ include_blank: "All calls",
48
+ id: "lct-quality-stream" %>
49
+ </div>
50
+ <% end %>
51
+
39
52
  <div class="lct-filter-actions">
40
53
  <button class="lct-button" type="submit">Apply</button>
41
54
  <%= link_to("Reset", data_quality_path, class: "lct-button lct-button-secondary") if any_filter_applied? %>
@@ -84,6 +97,22 @@
84
97
  <p class="lct-stat-sub"><%= percent(coverage_percent(@stats.missing_latency_count, total)) %> of calls</p>
85
98
  </article>
86
99
  <% end %>
100
+
101
+ <% if @stats.stream_column_present %>
102
+ <article class="lct-stat">
103
+ <p class="lct-stat-label">Streaming calls</p>
104
+ <p class="lct-stat-value"><%= number(streaming_count) %></p>
105
+ <p class="lct-stat-sub"><%= percent(coverage_percent(streaming_count, total)) %> of calls</p>
106
+ </article>
107
+
108
+ <% if streaming_missing_usage && streaming_count.positive? %>
109
+ <article class="lct-stat">
110
+ <p class="lct-stat-label">Streams without usage</p>
111
+ <p class="lct-stat-value"><%= number(streaming_missing_usage) %></p>
112
+ <p class="lct-stat-sub"><%= percent(coverage_percent(streaming_missing_usage, streaming_count)) %> of streams</p>
113
+ </article>
114
+ <% end %>
115
+ <% end %>
87
116
  </div>
88
117
  </div>
89
118
  </section>
@@ -132,6 +161,16 @@
132
161
  <td><%= render "llm_cost_tracker/shared/bar", value: latency_coverage, max: 100.0 %></td>
133
162
  </tr>
134
163
  <% end %>
164
+
165
+ <% if @stats.stream_column_present && streams_with_usage && streaming_count.to_i.positive? %>
166
+ <% stream_coverage = coverage_percent(streams_with_usage, streaming_count) %>
167
+ <tr>
168
+ <td>Streaming usage captured</td>
169
+ <td class="lct-num"><%= percent(stream_coverage) %></td>
170
+ <td class="lct-num"><%= number(streams_with_usage) %> / <%= number(streaming_count) %></td>
171
+ <td><%= render "llm_cost_tracker/shared/bar", value: stream_coverage, max: 100.0 %></td>
172
+ </tr>
173
+ <% end %>
135
174
  </tbody>
136
175
  </table>
137
176
  </section>
@@ -170,6 +209,13 @@
170
209
  <td>Make sure latency capture is enabled on every tracked request.</td>
171
210
  </tr>
172
211
  <% end %>
212
+ <% if @stats.stream_column_present && streaming_missing_usage.to_i.positive? %>
213
+ <tr>
214
+ <td>Streams without usage</td>
215
+ <td>Token totals undercount when streaming responses drop the final usage event.</td>
216
+ <td>Send OpenAI requests with <code class="lct-code">stream_options: { include_usage: true }</code>, or wrap custom clients with <code class="lct-code">LlmCostTracker.track_stream</code>.</td>
217
+ </tr>
218
+ <% end %>
173
219
  </tbody>
174
220
  </table>
175
221
  </section>
@@ -6,19 +6,14 @@ module LlmCostTracker
6
6
  module Assets
7
7
  ROOT = File.expand_path("../../app/assets/llm_cost_tracker", __dir__)
8
8
  STYLESHEET = "application.css"
9
+ STYLESHEET_PATH = File.join(ROOT, STYLESHEET).freeze
10
+ STYLESHEET_FINGERPRINT = Digest::SHA256.file(STYLESHEET_PATH).hexdigest[0, 12].freeze
11
+ STYLESHEET_FILENAME = "application-#{STYLESHEET_FINGERPRINT}.css".freeze
9
12
 
10
13
  class << self
11
- def root
12
- ROOT
13
- end
14
-
15
- def stylesheet_fingerprint
16
- @stylesheet_fingerprint ||= Digest::SHA256.file(File.join(ROOT, STYLESHEET)).hexdigest[0, 12]
17
- end
18
-
19
- def stylesheet_filename
20
- "application-#{stylesheet_fingerprint}.css"
21
- end
14
+ def root = ROOT
15
+ def stylesheet_fingerprint = STYLESHEET_FINGERPRINT
16
+ def stylesheet_filename = STYLESHEET_FILENAME
22
17
  end
23
18
  end
24
19
  end