llm_cost_tracker 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +69 -0
  3. data/README.md +333 -30
  4. data/lib/llm_cost_tracker/budget.rb +85 -0
  5. data/lib/llm_cost_tracker/configuration.rb +82 -3
  6. data/lib/llm_cost_tracker/cost.rb +15 -0
  7. data/lib/llm_cost_tracker/errors.rb +37 -0
  8. data/lib/llm_cost_tracker/event.rb +24 -0
  9. data/lib/llm_cost_tracker/event_metadata.rb +54 -0
  10. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_latency_ms_generator.rb +29 -0
  11. data/lib/llm_cost_tracker/generators/llm_cost_tracker/prices_generator.rb +20 -0
  12. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_latency_ms_to_llm_api_calls.rb.erb +9 -0
  13. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +16 -4
  14. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +14 -1
  15. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/llm_cost_tracker_prices.yml.erb +36 -0
  16. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_cost_precision.rb.erb +15 -0
  17. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +41 -0
  18. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_cost_precision_generator.rb +29 -0
  19. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_tags_to_jsonb_generator.rb +29 -0
  20. data/lib/llm_cost_tracker/llm_api_call.rb +45 -14
  21. data/lib/llm_cost_tracker/logging.rb +44 -0
  22. data/lib/llm_cost_tracker/middleware/faraday.rb +54 -13
  23. data/lib/llm_cost_tracker/parsed_usage.rb +45 -0
  24. data/lib/llm_cost_tracker/parsers/anthropic.rb +6 -4
  25. data/lib/llm_cost_tracker/parsers/base.rb +2 -0
  26. data/lib/llm_cost_tracker/parsers/gemini.rb +12 -5
  27. data/lib/llm_cost_tracker/parsers/openai.rb +11 -22
  28. data/lib/llm_cost_tracker/parsers/openai_compatible.rb +48 -0
  29. data/lib/llm_cost_tracker/parsers/openai_usage.rb +33 -0
  30. data/lib/llm_cost_tracker/parsers/registry.rb +16 -7
  31. data/lib/llm_cost_tracker/price_registry.rb +99 -0
  32. data/lib/llm_cost_tracker/prices.json +51 -0
  33. data/lib/llm_cost_tracker/pricing.rb +103 -77
  34. data/lib/llm_cost_tracker/railtie.rb +8 -0
  35. data/lib/llm_cost_tracker/report.rb +29 -0
  36. data/lib/llm_cost_tracker/report_data.rb +84 -0
  37. data/lib/llm_cost_tracker/report_formatter.rb +59 -0
  38. data/lib/llm_cost_tracker/storage/active_record_backend.rb +19 -0
  39. data/lib/llm_cost_tracker/storage/active_record_store.rb +21 -12
  40. data/lib/llm_cost_tracker/storage/backends.rb +26 -0
  41. data/lib/llm_cost_tracker/storage/custom_backend.rb +16 -0
  42. data/lib/llm_cost_tracker/storage/log_backend.rb +28 -0
  43. data/lib/llm_cost_tracker/tag_accessors.rb +23 -0
  44. data/lib/llm_cost_tracker/tag_query.rb +38 -0
  45. data/lib/llm_cost_tracker/tags_column.rb +16 -0
  46. data/lib/llm_cost_tracker/tracker.rb +43 -97
  47. data/lib/llm_cost_tracker/unknown_pricing.rb +40 -0
  48. data/lib/llm_cost_tracker/value_object.rb +45 -0
  49. data/lib/llm_cost_tracker/version.rb +1 -1
  50. data/lib/llm_cost_tracker.rb +49 -6
  51. data/lib/tasks/llm_cost_tracker.rake +9 -0
  52. data/llm_cost_tracker.gemspec +4 -3
  53. metadata +39 -6
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f7b40f1010c79358da89ffdd10637f59fa90e24aa0f50aec364828d2e2cbf5b9
4
- data.tar.gz: d12d1cf407b87afd6e1084c22ceda143c7ab9bf5e6ea6825d70a8e24969cafa5
3
+ metadata.gz: 0d1192ed209333057bd2522173d05530b4f45c6bb63242189c75354a83b5a746
4
+ data.tar.gz: 486555221b66a0da6cb867d207fb76349f0d56a23da623418da35aab7672875f
5
5
  SHA512:
6
- metadata.gz: 949157f0a6718bc03f8f0d825982ed732df2754ddf1e4ee07b18522b0e20cc4367a97c599071bcda95bbdda4dde0e160f5d586a9b42a0dd8b1f3c89910286547
7
- data.tar.gz: 9ea9007142d157446271bcf81bc4786e4b22a00f6e353dc2e3dc26c1be12d9abf88aed8d8852da37778b5fe3f71fcd4422c6153d8a531f195adaf0d0b9bb8dd2
6
+ metadata.gz: 8e74531effe3fc425de0384c13c7717c54b8be8d683493c92665b356ed8142d2629c9ce555f5fff703c2cd4a676d69e82efb39de767fc75fdbdcabdca9289f2c
7
+ data.tar.gz: 38e9744e157248e67bebcdd818b356c06b923d75a216b406f96f0b1b10368d4a118cbb9593461e2a19d19bdb5b10fc115c2f871d15fcf8669e656ad8ea8e034a
data/CHANGELOG.md CHANGED
@@ -5,6 +5,75 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [0.1.3] - 2026-04-18
9
+
10
+ ### Thread-safety, pricing UX, and internal hardening
11
+
12
+ **Thread-safety**
13
+
14
+ - Guard `PriceRegistry.file_prices` and `Pricing.sorted_price_keys` memoization with mutexes.
15
+
16
+ **Pricing UX**
17
+
18
+ - Warn on unknown keys in local prices files.
19
+ - Add `llm_cost_tracker:prices` generator for creating a local price override template.
20
+ - Document that budget enforcement skips events with unknown pricing.
21
+
22
+ **Onboarding UX**
23
+
24
+ - Add callable Faraday `tags:` support for per-request Rails attribution with `Current`.
25
+ - Add `llm_cost_tracker:report` rake task for a quick terminal cost report.
26
+ - Rework README with a no-database quick try, report output, and safety guarantees.
27
+
28
+ **Internal refactor (no behavior change)**
29
+
30
+ - Extract `Logging` module and remove duplicated warning helpers.
31
+ - Extract `TagQuery`, `TagsColumn`, and `TagAccessors` helpers from `LlmApiCall`.
32
+ - Introduce typed `Cost`, `Event`, and `ParsedUsage` value objects while preserving hash-like access.
33
+ - Move storage dispatch into dedicated backend objects with a uniform save contract.
34
+ - Split `Report` into `ReportData` and `ReportFormatter`.
35
+ - Use `OpenaiUsage` composition for OpenAI-compatible providers instead of parser inheritance.
36
+ - Move config enum validation into `Configuration` setters.
37
+ - Memoize the merged built-in/file/override prices table.
38
+ - Restrict the Gemini parser to `generateContent` and `streamGenerateContent` paths.
39
+
40
+ ## [0.1.2] - 2026-04-18
41
+
42
+ ### Added
43
+
44
+ - Auto-detect OpenRouter and DeepSeek as OpenAI-compatible providers.
45
+ - Add `openai_compatible_providers` configuration for private OpenAI-compatible gateways.
46
+ - Add `BudgetExceededError` and `budget_exceeded_behavior` for best-effort budget guardrails.
47
+ - Add `:raise` and `:block_requests` budget behaviors; `:block_requests` is not a hard cap under concurrency.
48
+ - Add `StorageError` and `storage_error_behavior` so storage failures do not have to break host LLM calls.
49
+ - Add `UnknownPricingError` and `unknown_pricing_behavior` for unknown model pricing.
50
+ - Add built-in `prices.json` registry with metadata and source URLs.
51
+ - Add `prices_file` configuration for local JSON/YAML pricing overrides.
52
+ - Add `with_cost`, `without_cost`, and `unknown_pricing` ActiveRecord scopes.
53
+ - Add `latency_ms` tracking for Faraday calls, manual tracking, notifications, and ActiveRecord storage.
54
+ - Add `with_latency`, `average_latency_ms`, `latency_by_model`, and `latency_by_provider`.
55
+ - Use PostgreSQL `jsonb` storage for tags in newly generated migrations.
56
+ - Add a GIN index on `llm_api_calls.tags` for PostgreSQL installs.
57
+ - Add adapter-aware `by_tag` querying with JSONB containment on PostgreSQL and text fallback elsewhere.
58
+ - Add `by_tags`, `by_user`, and `by_feature` scopes for common attribution queries.
59
+ - Add `llm_cost_tracker:upgrade_tags_to_jsonb` generator for existing PostgreSQL installs.
60
+ - Add `llm_cost_tracker:upgrade_cost_precision` generator for widening stored cost columns.
61
+ - Add `llm_cost_tracker:add_latency_ms` generator for existing installs.
62
+
63
+ ### Changed
64
+
65
+ - Store tags as a Hash for JSON-backed columns and as JSON text for fallback columns.
66
+ - Keep internal usage metadata such as cache token counts out of stored attribution tags.
67
+ - Normalize provider-prefixed model IDs like `openai/gpt-4o-mini` for built-in price lookup.
68
+ - Normalize configured OpenAI-compatible host keys to lowercase after configuration.
69
+ - Avoid double fuzzy-match passes during price lookup.
70
+ - Widen generated cost decimal columns to `precision: 20, scale: 8`.
71
+ - Count Gemini `thoughtsTokenCount` as output tokens for better thinking-mode cost estimates.
72
+ - Warn when Faraday exposes an unreadable streaming/SSE response body.
73
+ - Document tag storage behavior, budget guardrail limits, known limitations, common tag scopes, and upgrade flows.
74
+ - Clarify that budget errors raised after a response occur after the event has been recorded.
75
+ - Route custom storage exceptions that inherit from `LlmCostTracker::Error` through `storage_error_behavior`.
76
+
8
77
  ## [0.1.1] - 2026-04-17
9
78
 
10
79
  ### Fixed
data/README.md CHANGED
@@ -1,12 +1,31 @@
1
1
  # LlmCostTracker
2
2
 
3
- **Self-hosted LLM API cost tracking for Ruby and Rails apps.**
3
+ **See where your Rails app spends money on LLM APIs.**
4
4
 
5
- Track token usage and estimated costs for OpenAI, Anthropic, and Google Gemini calls from Faraday-based Ruby clients. Store the data in your own database, tag calls by user or feature, and get budget alerts without adding an external SaaS or proxy.
5
+ Track cost by user, tenant, feature, provider, and model, all in your own database. No proxy. No SaaS required.
6
6
 
7
- [![Gem Version](https://badge.fury.io/rb/llm_cost_tracker.svg)](https://rubygems.org/gems/llm_cost_tracker)
7
+ [![Gem Version](https://img.shields.io/gem/v/llm_cost_tracker.svg)](https://rubygems.org/gems/llm_cost_tracker)
8
8
  [![CI](https://github.com/sergey-homenko/llm_cost_tracker/actions/workflows/ruby.yml/badge.svg)](https://github.com/sergey-homenko/llm_cost_tracker/actions)
9
9
 
10
+ ```text
11
+ LLM Cost Report (last 30 days)
12
+
13
+ Total cost: $127.420000
14
+ Requests: 4,218
15
+ Avg latency: 812ms
16
+ Unknown pricing: 0
17
+
18
+ By model:
19
+ gpt-4o $82.100000
20
+ claude-sonnet-4-6 $31.200000
21
+ gemini-2.5-flash $14.120000
22
+
23
+ By feature:
24
+ chat $73.500000
25
+ summarizer $29.220000
26
+ translate $24.700000
27
+ ```
28
+
10
29
  ## Why?
11
30
 
12
31
  Every Rails app integrating LLMs faces the same problem: **you don't know how much AI is costing you** until the invoice arrives. Full observability platforms like Langfuse and Helicone are powerful, but sometimes you just need a small Rails-native cost ledger that lives in your app database.
@@ -17,7 +36,9 @@ Every Rails app integrating LLMs faces the same problem: **you don't know how mu
17
36
  - 🏠 **Self-hosted** — your data stays in your database
18
37
  - 🧩 **Client-light** — works with raw Faraday and LLM gems that expose their Faraday connection
19
38
  - 🏷️ **Attribution-first** — tag spend by feature, tenant, user, job, or environment
20
- - 💸 **Budget-aware** — emit notifications and callbacks before spend surprises you
39
+ - 🌐 **OpenAI-compatible** — auto-detect OpenRouter and DeepSeek, with custom compatible hosts configurable
40
+ - 🛑 **Budget guardrails** — notify, raise, or block requests when monthly spend is exhausted
41
+ - 📊 **Quick reports** — print a terminal cost report with one rake task
21
42
 
22
43
  This gem is intentionally not a tracing platform, prompt CMS, eval system, or gateway. It focuses on the boring but valuable question: "What did this app spend on LLM APIs, and where did that spend come from?"
23
44
 
@@ -36,28 +57,41 @@ bin/rails generate llm_cost_tracker:install
36
57
  bin/rails db:migrate
37
58
  ```
38
59
 
39
- ## Quick Start
40
-
41
- ### Option 1: Faraday Middleware
60
+ ## Try It In 30 Seconds
42
61
 
43
- If your LLM client uses Faraday, add the middleware to that connection:
62
+ Try cost calculation without a database or migration:
44
63
 
45
64
  ```ruby
46
- conn = Faraday.new(url: "https://api.openai.com") do |f|
47
- f.use :llm_cost_tracker, tags: { feature: "chat", user_id: current_user.id }
48
- f.request :json
49
- f.response :json
50
- f.adapter Faraday.default_adapter
65
+ require "llm_cost_tracker"
66
+
67
+ LlmCostTracker.configure do |config|
68
+ config.storage_backend = :log
51
69
  end
52
70
 
53
- # Every supported LLM request through this connection is tracked
54
- response = conn.post("/v1/responses", {
55
- model: "gpt-5-mini",
56
- input: "Hello!"
57
- })
71
+ LlmCostTracker.track(
72
+ provider: :openai,
73
+ model: "gpt-4o",
74
+ input_tokens: 1000,
75
+ output_tokens: 200,
76
+ feature: "demo"
77
+ )
78
+ ```
79
+
80
+ Output:
81
+
82
+ ```text
83
+ [LlmCostTracker] openai/gpt-4o tokens=1000+200 cost=$0.004500 tags={:feature=>"demo"}
58
84
  ```
59
85
 
60
- ### Option 2: Patch an existing client
86
+ ## Quick Start
87
+
88
+ Use the path that matches your app:
89
+
90
+ - Using `ruby-openai`, `ruby_llm`, or another client that exposes Faraday? Patch that client's Faraday connection.
91
+ - Using raw Faraday? Add the middleware directly.
92
+ - Using a client without Faraday access? Use manual tracking.
93
+
94
+ ### Option 1: Patch An Existing Client
61
95
 
62
96
  Some LLM gems expose their Faraday connection. For example, with `ruby-openai`:
63
97
 
@@ -67,11 +101,51 @@ OpenAI.configure do |config|
67
101
  config.access_token = ENV["OPENAI_API_KEY"]
68
102
 
69
103
  config.faraday do |f|
70
- f.use :llm_cost_tracker, tags: { feature: "openai_default" }
104
+ f.use :llm_cost_tracker, tags: -> {
105
+ {
106
+ user_id: Current.user&.id,
107
+ feature: Current.llm_feature || "openai"
108
+ }
109
+ }
71
110
  end
72
111
  end
73
112
  ```
74
113
 
114
+ For Rails apps, `tags:` can be a callable so request-local values are evaluated per request:
115
+
116
+ ```ruby
117
+ # app/models/current.rb
118
+ class Current < ActiveSupport::CurrentAttributes
119
+ attribute :user, :tenant, :llm_feature
120
+ end
121
+
122
+ # app/controllers/application_controller.rb
123
+ before_action do
124
+ Current.user = current_user
125
+ Current.tenant = current_tenant if respond_to?(:current_tenant, true)
126
+ Current.llm_feature = "chat"
127
+ end
128
+ ```
129
+
130
+ ### Option 2: Faraday Middleware
131
+
132
+ If your LLM client uses Faraday, add the middleware to that connection:
133
+
134
+ ```ruby
135
+ conn = Faraday.new(url: "https://api.openai.com") do |f|
136
+ f.use :llm_cost_tracker, tags: -> { { feature: "chat", user_id: Current.user&.id } }
137
+ f.request :json
138
+ f.response :json
139
+ f.adapter Faraday.default_adapter
140
+ end
141
+
142
+ # Every supported LLM request through this connection is tracked
143
+ response = conn.post("/v1/responses", {
144
+ model: "gpt-5-mini",
145
+ input: "Hello!"
146
+ })
147
+ ```
148
+
75
149
  If a client does not expose its HTTP connection, use manual tracking or register a custom parser around the HTTP layer you control.
76
150
 
77
151
  ### Option 3: Manual tracking
@@ -103,6 +177,9 @@ LlmCostTracker.configure do |config|
103
177
 
104
178
  # Monthly budget in USD
105
179
  config.monthly_budget = 500.00
180
+ config.budget_exceeded_behavior = :notify # :notify, :raise, or :block_requests
181
+ config.storage_error_behavior = :warn # :ignore, :warn, or :raise
182
+ config.unknown_pricing_behavior = :warn # :ignore, :warn, or :raise
106
183
 
107
184
  # Alert callback
108
185
  config.on_budget_exceeded = ->(data) {
@@ -113,16 +190,135 @@ LlmCostTracker.configure do |config|
113
190
  }
114
191
 
115
192
  # Override pricing for custom/fine-tuned models (per 1M tokens)
193
+ config.prices_file = Rails.root.join("config/llm_cost_tracker_prices.yml")
116
194
  config.pricing_overrides = {
117
195
  "ft:gpt-4o-mini:my-org" => { input: 0.30, cached_input: 0.15, output: 1.20 }
118
196
  }
197
+
198
+ # OpenAI-compatible APIs. OpenRouter and DeepSeek are included by default.
199
+ config.openai_compatible_providers["llm.my-company.com"] = "internal_gateway"
200
+ end
201
+ ```
202
+
203
+ Pricing is best-effort and based on public provider pricing for standard token usage. Providers change pricing frequently, and some features have extra charges or tiered pricing. OpenRouter-style model IDs such as `openai/gpt-4o-mini` are normalized to built-in model names when possible. Use `prices_file` or `pricing_overrides` for fine-tunes, gateway-specific model IDs, enterprise discounts, batch pricing, long-context premiums, and any model this gem does not know yet.
204
+
205
+ Storage errors are non-fatal by default:
206
+
207
+ ```ruby
208
+ config.storage_error_behavior = :warn # default
209
+ config.storage_error_behavior = :raise # fail fast with StorageError
210
+ config.storage_error_behavior = :ignore # skip storage failures silently
211
+ ```
212
+
213
+ With the default `:warn` behavior, tracking emits a warning and lets the LLM response continue if ActiveRecord or custom storage fails. `LlmCostTracker::StorageError` exposes `original_error` when `:raise` is enabled.
214
+
215
+ Unknown model pricing is visible by default:
216
+
217
+ ```ruby
218
+ config.unknown_pricing_behavior = :warn # default
219
+ config.unknown_pricing_behavior = :raise # fail fast with UnknownPricingError
220
+ config.unknown_pricing_behavior = :ignore # keep tracking tokens silently
221
+ ```
222
+
223
+ When pricing is unknown, the event can still be recorded with token counts, but `cost` is `nil` and budget enforcement is skipped for that event. Use `prices_file` or `pricing_overrides` to ensure all production models are priced. Check this ActiveRecord query for a list of unpriced models in your data:
224
+
225
+ ```ruby
226
+ LlmCostTracker::LlmApiCall.unknown_pricing.group(:model).count
227
+ ```
228
+
229
+ ### Keeping Prices Current
230
+
231
+ Built-in prices live in `lib/llm_cost_tracker/prices.json`, with `updated_at`, `unit`, `currency`, and source URLs in the file metadata. The gem does not fetch pricing on boot; that keeps it self-hosted and avoids hidden external dependencies.
232
+
233
+ For production apps, keep a local JSON or YAML price file and point the gem at it:
234
+
235
+ ```bash
236
+ bin/rails generate llm_cost_tracker:prices
237
+ ```
238
+
239
+ ```ruby
240
+ config.prices_file = Rails.root.join("config/llm_cost_tracker_prices.yml")
241
+ ```
242
+
243
+ Example JSON:
244
+
245
+ ```json
246
+ {
247
+ "metadata": {
248
+ "updated_at": "2026-04-18",
249
+ "currency": "USD",
250
+ "unit": "1M tokens"
251
+ },
252
+ "models": {
253
+ "my-gateway/gpt-4o-mini": {
254
+ "input": 0.20,
255
+ "cached_input": 0.10,
256
+ "output": 0.80
257
+ }
258
+ }
259
+ }
260
+ ```
261
+
262
+ `pricing_overrides` still has the highest precedence, so you can use it for small Ruby-only overrides and keep broader provider tables in the file. A practical release rhythm is to refresh built-in `prices.json` quarterly and use `prices_file` for urgent provider changes between gem releases.
263
+
264
+ ## Budget Enforcement
265
+
266
+ ```ruby
267
+ LlmCostTracker.configure do |config|
268
+ config.storage_backend = :active_record
269
+ config.monthly_budget = 100.00
270
+ config.budget_exceeded_behavior = :block_requests
271
+ end
272
+ ```
273
+
274
+ Budget behavior options:
275
+
276
+ - `:notify` — default. Calls `on_budget_exceeded` after a tracked event pushes the month over budget.
277
+ - `:raise` — records the event, then raises `LlmCostTracker::BudgetExceededError` when the month is over budget.
278
+ - `:block_requests` — blocks Faraday LLM requests before the HTTP call when the ActiveRecord monthly total has already reached the budget. If a request pushes the month over budget, it also raises after recording the event.
279
+
280
+ `BudgetExceededError` exposes `monthly_total`, `budget`, and `last_event`:
281
+
282
+ ```ruby
283
+ begin
284
+ client.chat(...)
285
+ rescue LlmCostTracker::BudgetExceededError => e
286
+ Rails.logger.warn("LLM budget exhausted: #{e.monthly_total} / #{e.budget}")
119
287
  end
120
288
  ```
121
289
 
122
- Pricing is best-effort and based on public provider pricing for standard token usage. Providers change pricing frequently, and some features have extra charges or tiered pricing. Use `pricing_overrides` for fine-tunes, gateway-specific model IDs, enterprise discounts, batch pricing, long-context premiums, and any model this gem does not know yet.
290
+ Pre-request blocking needs `storage_backend = :active_record` because the middleware must query your stored monthly total before sending the request. With `:log` or `:custom` storage, `:raise` and the post-response part of `:block_requests` still work for the event being tracked.
291
+
292
+ `:block_requests` is a best-effort guardrail, not a transactional hard quota. In highly concurrent deployments, multiple workers can pass the preflight check at the same time before any of them records its final cost. The request that first pushes the month over budget is stored before the post-response `BudgetExceededError` is raised; later Faraday requests are blocked during preflight once the stored monthly total is exhausted. Use provider-side limits or a gateway-level quota if you need strict cross-process enforcement.
123
293
 
124
294
  ## Querying Costs (ActiveRecord)
125
295
 
296
+ Print a quick terminal report:
297
+
298
+ ```bash
299
+ bin/rails llm_cost_tracker:report
300
+
301
+ # Optional: change the window
302
+ DAYS=7 bin/rails llm_cost_tracker:report
303
+ ```
304
+
305
+ Example:
306
+
307
+ ```text
308
+ LLM Cost Report (last 30 days)
309
+
310
+ Total cost: $127.420000
311
+ Requests: 4,218
312
+ Avg latency: 812ms
313
+ Unknown pricing: 0
314
+
315
+ By provider:
316
+ openai $96.220000
317
+ anthropic $31.200000
318
+ ```
319
+
320
+ Or query the ledger directly:
321
+
126
322
  ```ruby
127
323
  # Today's total spend
128
324
  LlmCostTracker::LlmApiCall.today.total_cost
@@ -140,16 +336,65 @@ LlmCostTracker::LlmApiCall.this_month.cost_by_provider
140
336
  LlmCostTracker::LlmApiCall.daily_costs(days: 7)
141
337
  # => { "2026-04-10" => 1.5, "2026-04-11" => 2.3, ... }
142
338
 
339
+ # Latency overview
340
+ LlmCostTracker::LlmApiCall.with_latency.average_latency_ms
341
+ LlmCostTracker::LlmApiCall.this_month.latency_by_model
342
+
143
343
  # Filter by feature
144
344
  LlmCostTracker::LlmApiCall.by_tag("feature", "chat").this_month.total_cost
145
345
 
146
346
  # Filter by user
147
347
  LlmCostTracker::LlmApiCall.by_tag("user_id", "42").today.total_cost
348
+ LlmCostTracker::LlmApiCall.by_user(42).today.total_cost
349
+
350
+ # Filter by multiple tags
351
+ LlmCostTracker::LlmApiCall.by_tags(user_id: 42, feature: "chat").this_month.total_cost
352
+
353
+ # Feature shortcut
354
+ LlmCostTracker::LlmApiCall.by_feature("summarizer").this_month.total_cost
355
+
356
+ # Find models without pricing
357
+ LlmCostTracker::LlmApiCall.unknown_pricing.group(:model).count
358
+ LlmCostTracker::LlmApiCall.with_cost.this_month.total_cost
148
359
 
149
360
  # Custom date range
150
361
  LlmCostTracker::LlmApiCall.between(1.week.ago, Time.current).cost_by_model
151
362
  ```
152
363
 
364
+ ### Tag Storage
365
+
366
+ The install generator uses `jsonb` tags with a GIN index on PostgreSQL:
367
+
368
+ ```ruby
369
+ t.jsonb :tags, null: false, default: {}
370
+ add_index :llm_api_calls, :tags, using: :gin
371
+ ```
372
+
373
+ On SQLite, MySQL, and other adapters, tags fall back to JSON stored in a text column. The `by_tag` scope automatically uses PostgreSQL JSONB containment when the column supports it, and the text fallback otherwise. This works, but tag queries are less efficient than PostgreSQL JSONB containment.
374
+
375
+ If you installed `llm_cost_tracker` before JSONB tags were available and your app uses PostgreSQL, generate an upgrade migration:
376
+
377
+ ```bash
378
+ bin/rails generate llm_cost_tracker:upgrade_tags_to_jsonb
379
+ bin/rails db:migrate
380
+ ```
381
+
382
+ This converts the existing `tags` text column to `jsonb`, keeps existing tag data, and adds the GIN index.
383
+
384
+ If you installed an earlier version with `precision: 12, scale: 8` cost columns, widen them for larger production ledgers:
385
+
386
+ ```bash
387
+ bin/rails generate llm_cost_tracker:upgrade_cost_precision
388
+ bin/rails db:migrate
389
+ ```
390
+
391
+ If you installed before `latency_ms` was available, add the latency column:
392
+
393
+ ```bash
394
+ bin/rails generate llm_cost_tracker:add_latency_ms
395
+ bin/rails db:migrate
396
+ ```
397
+
153
398
  ## ActiveSupport::Notifications
154
399
 
155
400
  Every tracked call emits an `llm_request.llm_cost_tracker` event:
@@ -163,6 +408,7 @@ ActiveSupport::Notifications.subscribe("llm_request.llm_cost_tracker") do |*, pa
163
408
  # input_tokens: 150,
164
409
  # output_tokens: 42,
165
410
  # total_tokens: 192,
411
+ # latency_ms: 248,
166
412
  # cost: {
167
413
  # input_cost: 0.000375,
168
414
  # cached_input_cost: 0.0,
@@ -188,19 +434,72 @@ LlmCostTracker.configure do |config|
188
434
  config.storage_backend = :custom
189
435
  config.custom_storage = ->(event) {
190
436
  InfluxDB.write("llm_costs", {
191
- values: { cost: event[:cost][:total_cost], tokens: event[:total_tokens] },
437
+ values: {
438
+ cost: event[:cost]&.fetch(:total_cost, nil),
439
+ tokens: event[:total_tokens],
440
+ latency_ms: event[:latency_ms]
441
+ },
192
442
  tags: { provider: event[:provider], model: event[:model] }
193
443
  })
194
444
  }
195
445
  end
196
446
  ```
197
447
 
448
+ ## OpenAI-Compatible Providers
449
+
450
+ ```ruby
451
+ LlmCostTracker.configure do |config|
452
+ # Built in:
453
+ # "openrouter.ai" => "openrouter"
454
+ # "api.deepseek.com" => "deepseek"
455
+ config.openai_compatible_providers["gateway.example.com"] = "internal_gateway"
456
+ end
457
+ ```
458
+
459
+ Any configured host is parsed with the OpenAI-compatible usage shape:
460
+
461
+ - `prompt_tokens` / `completion_tokens` / `total_tokens`
462
+ - `input_tokens` / `output_tokens` / `total_tokens`
463
+ - optional cached input details when the response includes them
464
+
465
+ This covers OpenRouter, DeepSeek, and private gateways that expose OpenAI-style Chat Completions, Responses, Completions, or Embeddings endpoints.
466
+
467
+ ## Safety Guarantees
468
+
469
+ - `llm_cost_tracker` does not make external HTTP calls.
470
+ - It does not store prompt or response bodies.
471
+ - Faraday responses are not modified.
472
+ - Storage failures are non-fatal by default via `storage_error_behavior = :warn`.
473
+ - Budget and unknown-pricing errors are raised only when you opt into `:raise` or `:block_requests`.
474
+ - Pricing is local and best-effort; use `prices_file` or `pricing_overrides` for production-specific rates.
475
+ - Streaming/SSE calls are skipped with a warning when the final usage payload is not readable by Faraday.
476
+
477
+ ## Production Checklist
478
+
479
+ - Use `storage_backend = :active_record` in production.
480
+ - Set `monthly_budget` and choose `budget_exceeded_behavior`.
481
+ - Treat `:block_requests` as best-effort in concurrent systems, not a strict quota.
482
+ - Keep `unknown_pricing_behavior = :warn` or `:raise` until pricing overrides are complete.
483
+ - Add `pricing_overrides` for custom, fine-tuned, gateway-specific, or newly released models.
484
+ - Tag calls with `tenant_id`, `user_id`, and `feature` where possible.
485
+ - Check `LlmCostTracker::LlmApiCall.unknown_pricing.group(:model).count` after deploys.
486
+ - Track `latency_ms` and watch `latency_by_model` for slow or degraded providers.
487
+
488
+ ## Known Limitations
489
+
490
+ - `:block_requests` is best-effort under concurrency. For hard caps, use an external quota system, provider-side limits, or a gateway-level budget.
491
+ - Streaming/SSE calls are tracked only when Faraday exposes a final response body with usage data. Otherwise the gem warns and skips automatic tracking.
492
+ - Anthropic cache creation TTL variants are not modeled separately yet; 1-hour cache writes may be underestimated compared with the default 5-minute cache write rate.
493
+ - OpenAI reasoning tokens are included in output-token totals when providers report them that way, but separate reasoning-token attribution is not stored yet.
494
+
198
495
  ## Adding a Custom Provider Parser
199
496
 
497
+ Use this for providers that are not OpenAI-compatible and return a different usage shape.
498
+
200
499
  ```ruby
201
- class DeepSeekParser < LlmCostTracker::Parsers::Base
500
+ class AcmeParser < LlmCostTracker::Parsers::Base
202
501
  def match?(url)
203
- url.to_s.include?("api.deepseek.com")
502
+ url.to_s.include?("api.acme-llm.example")
204
503
  end
205
504
 
206
505
  def parse(request_url, request_body, response_status, response_body)
@@ -211,16 +510,16 @@ class DeepSeekParser < LlmCostTracker::Parsers::Base
211
510
  return nil unless usage
212
511
 
213
512
  {
214
- provider: "deepseek",
513
+ provider: "acme",
215
514
  model: response["model"],
216
- input_tokens: usage["prompt_tokens"] || 0,
217
- output_tokens: usage["completion_tokens"] || 0
515
+ input_tokens: usage["input"] || 0,
516
+ output_tokens: usage["output"] || 0
218
517
  }
219
518
  end
220
519
  end
221
520
 
222
521
  # Register it
223
- LlmCostTracker::Parsers::Registry.register(DeepSeekParser.new)
522
+ LlmCostTracker::Parsers::Registry.register(AcmeParser.new)
224
523
  ```
225
524
 
226
525
  ## Supported Providers
@@ -228,6 +527,9 @@ LlmCostTracker::Parsers::Registry.register(DeepSeekParser.new)
228
527
  | Provider | Auto-detected | Models with pricing |
229
528
  |----------|:---:|---|
230
529
  | OpenAI | ✅ | GPT-5.2/5.1/5, GPT-5 mini/nano, GPT-4.1, GPT-4o, o1/o3/o4-mini |
530
+ | OpenRouter | ✅ | Uses OpenAI-compatible usage; provider-prefixed OpenAI model IDs are normalized when possible |
531
+ | DeepSeek | ✅ | Uses OpenAI-compatible usage; add `pricing_overrides` for DeepSeek model pricing |
532
+ | OpenAI-compatible hosts | 🔧 | Configure `openai_compatible_providers` |
231
533
  | Anthropic | ✅ | Claude Opus 4.6/4.1/4, Sonnet 4.6/4.5/4, Haiku 4.5, Claude 3.x |
232
534
  | Google Gemini | ✅ | Gemini 2.5 Pro/Flash/Flash-Lite, 2.0 Flash/Flash-Lite, 1.5 Pro/Flash |
233
535
  | Any other | 🔧 | Via custom parser (see above) |
@@ -235,6 +537,7 @@ LlmCostTracker::Parsers::Registry.register(DeepSeekParser.new)
235
537
  Supported endpoint families:
236
538
 
237
539
  - OpenAI: Chat Completions, Responses, Completions, Embeddings
540
+ - OpenAI-compatible: Chat Completions, Responses, Completions, Embeddings
238
541
  - Anthropic: Messages
239
542
  - Google Gemini: `generateContent` responses with `usageMetadata`
240
543
 
@@ -251,9 +554,9 @@ Your App → Faraday → [LlmCostTracker Middleware] → LLM API
251
554
  ActiveRecord / Log / Custom
252
555
  ```
253
556
 
254
- The middleware intercepts **outgoing** HTTP responses (not incoming Rails requests), parses the provider usage object, looks up pricing, and records the event. It never modifies requests or responses.
557
+ The middleware intercepts **outgoing** HTTP responses (not incoming Rails requests), parses the provider usage object, looks up pricing, and records the event. It never modifies requests or responses. Put `llm_cost_tracker` inside the Faraday stack where it can see the final response body; if another middleware consumes or transforms streaming bodies, use manual tracking.
255
558
 
256
- For streaming APIs, tracking depends on the final response body including provider usage data. If the client consumes server-sent events without exposing the final usage payload to Faraday, use manual tracking.
559
+ For streaming APIs, tracking depends on the final response body including provider usage data. If the client consumes server-sent events without exposing the final usage payload to Faraday, the gem logs a warning and skips tracking; use manual tracking for those calls.
257
560
 
258
561
  ## Development
259
562
 
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "logging"
4
+
5
+ module LlmCostTracker
6
+ class Budget
7
+ WARNING_MUTEX = Mutex.new
8
+ private_constant :WARNING_MUTEX
9
+
10
+ class << self
11
+ def enforce!
12
+ return unless LlmCostTracker.configuration.monthly_budget
13
+ return unless behavior == :block_requests
14
+ return warn_non_active_record_block_requests unless LlmCostTracker.configuration.active_record?
15
+
16
+ monthly_total = calculate_monthly_total(0)
17
+ return unless monthly_total >= LlmCostTracker.configuration.monthly_budget
18
+
19
+ handle_exceeded(monthly_total: monthly_total)
20
+ end
21
+
22
+ def check!(event)
23
+ config = LlmCostTracker.configuration
24
+ return unless config.monthly_budget
25
+ return unless event[:cost]
26
+
27
+ monthly_total = calculate_monthly_total(event[:cost][:total_cost])
28
+ return unless monthly_total > config.monthly_budget
29
+
30
+ handle_exceeded(monthly_total: monthly_total, last_event: event)
31
+ end
32
+
33
+ private
34
+
35
+ def calculate_monthly_total(latest_cost)
36
+ if LlmCostTracker.configuration.active_record?
37
+ active_record_monthly_total
38
+ else
39
+ latest_cost
40
+ end
41
+ end
42
+
43
+ def active_record_monthly_total
44
+ require_relative "llm_api_call" unless defined?(LlmCostTracker::LlmApiCall)
45
+ require_relative "storage/active_record_store" unless defined?(LlmCostTracker::Storage::ActiveRecordStore)
46
+
47
+ LlmCostTracker::Storage::ActiveRecordStore.monthly_total
48
+ rescue LoadError => e
49
+ raise Error, "ActiveRecord storage requires the active_record gem: #{e.message}"
50
+ end
51
+
52
+ def warn_non_active_record_block_requests
53
+ should_warn = WARNING_MUTEX.synchronize do
54
+ unless @warned_non_active_record_block_requests
55
+ @warned_non_active_record_block_requests = true
56
+ true
57
+ end
58
+ end
59
+ return unless should_warn
60
+
61
+ Logging.warn(":block_requests preflight requires storage_backend = :active_record; request was not blocked.")
62
+ end
63
+
64
+ def handle_exceeded(monthly_total:, last_event: nil)
65
+ config = LlmCostTracker.configuration
66
+ payload = {
67
+ monthly_total: monthly_total,
68
+ budget: config.monthly_budget,
69
+ last_event: last_event
70
+ }
71
+
72
+ config.on_budget_exceeded&.call(payload)
73
+ raise BudgetExceededError.new(**payload) if raise_on_exceeded?
74
+ end
75
+
76
+ def raise_on_exceeded?
77
+ %i[raise block_requests].include?(behavior)
78
+ end
79
+
80
+ def behavior
81
+ LlmCostTracker.configuration.budget_exceeded_behavior
82
+ end
83
+ end
84
+ end
85
+ end