llm_cost_tracker 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +37 -0
  3. data/README.md +202 -11
  4. data/lib/llm_cost_tracker/budget.rb +97 -0
  5. data/lib/llm_cost_tracker/configuration.rb +37 -0
  6. data/lib/llm_cost_tracker/errors.rb +37 -0
  7. data/lib/llm_cost_tracker/event_metadata.rb +54 -0
  8. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_latency_ms_generator.rb +29 -0
  9. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_latency_ms_to_llm_api_calls.rb.erb +9 -0
  10. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +16 -4
  11. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +14 -1
  12. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_cost_precision.rb.erb +15 -0
  13. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +41 -0
  14. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_cost_precision_generator.rb +29 -0
  15. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_tags_to_jsonb_generator.rb +29 -0
  16. data/lib/llm_cost_tracker/llm_api_call.rb +68 -2
  17. data/lib/llm_cost_tracker/middleware/faraday.rb +50 -12
  18. data/lib/llm_cost_tracker/parsers/anthropic.rb +4 -1
  19. data/lib/llm_cost_tracker/parsers/gemini.rb +9 -2
  20. data/lib/llm_cost_tracker/parsers/openai.rb +10 -3
  21. data/lib/llm_cost_tracker/parsers/openai_compatible.rb +44 -0
  22. data/lib/llm_cost_tracker/parsers/registry.rb +16 -7
  23. data/lib/llm_cost_tracker/price_registry.rb +69 -0
  24. data/lib/llm_cost_tracker/prices.json +51 -0
  25. data/lib/llm_cost_tracker/pricing.rb +74 -74
  26. data/lib/llm_cost_tracker/railtie.rb +3 -0
  27. data/lib/llm_cost_tracker/storage/active_record_store.rb +12 -3
  28. data/lib/llm_cost_tracker/tracker.rb +49 -54
  29. data/lib/llm_cost_tracker/unknown_pricing.rb +47 -0
  30. data/lib/llm_cost_tracker/version.rb +1 -1
  31. data/lib/llm_cost_tracker.rb +33 -5
  32. data/llm_cost_tracker.gemspec +4 -3
  33. metadata +20 -6
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f7b40f1010c79358da89ffdd10637f59fa90e24aa0f50aec364828d2e2cbf5b9
4
- data.tar.gz: d12d1cf407b87afd6e1084c22ceda143c7ab9bf5e6ea6825d70a8e24969cafa5
3
+ metadata.gz: 6a014d7c3de26b91ba6a0b99d300a803d04ab8a95c55b374377d6b8cdf631e50
4
+ data.tar.gz: 7e042cf740a65c1019d0ee986eeec9fc2266a1ec59c55d4807f306521f95f869
5
5
  SHA512:
6
- metadata.gz: 949157f0a6718bc03f8f0d825982ed732df2754ddf1e4ee07b18522b0e20cc4367a97c599071bcda95bbdda4dde0e160f5d586a9b42a0dd8b1f3c89910286547
7
- data.tar.gz: 9ea9007142d157446271bcf81bc4786e4b22a00f6e353dc2e3dc26c1be12d9abf88aed8d8852da37778b5fe3f71fcd4422c6153d8a531f195adaf0d0b9bb8dd2
6
+ metadata.gz: 1c41d7a9002fb484df80b6d3e5c7ce1fc14a3d481443f0bbefe1c74a4e2a0f92a039f56677a7a354dbfaeaaae6b5d4727bb5ca9466b06b77d574d26efd477fdb
7
+ data.tar.gz: d8017bbf7975f5bafbc4328dc8df76a614bc5e7700bb14569cead5ffc06aac6a4828707a9a609b9b7af69e4b7e3a08543712a750ed79d7b125ab2c96336cefa1
data/CHANGELOG.md CHANGED
@@ -5,6 +5,43 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [0.1.2] - 2026-04-18
9
+
10
+ ### Added
11
+
12
+ - Auto-detect OpenRouter and DeepSeek as OpenAI-compatible providers.
13
+ - Add `openai_compatible_providers` configuration for private OpenAI-compatible gateways.
14
+ - Add `BudgetExceededError` and `budget_exceeded_behavior` for best-effort budget guardrails.
15
+ - Add `:raise` and `:block_requests` budget behaviors; `:block_requests` is not a hard cap under concurrency.
16
+ - Add `StorageError` and `storage_error_behavior` so storage failures do not have to break host LLM calls.
17
+ - Add `UnknownPricingError` and `unknown_pricing_behavior` for unknown model pricing.
18
+ - Add built-in `prices.json` registry with metadata and source URLs.
19
+ - Add `prices_file` configuration for local JSON/YAML pricing overrides.
20
+ - Add `with_cost`, `without_cost`, and `unknown_pricing` ActiveRecord scopes.
21
+ - Add `latency_ms` tracking for Faraday calls, manual tracking, notifications, and ActiveRecord storage.
22
+ - Add `with_latency`, `average_latency_ms`, `latency_by_model`, and `latency_by_provider`.
23
+ - Use PostgreSQL `jsonb` storage for tags in newly generated migrations.
24
+ - Add a GIN index on `llm_api_calls.tags` for PostgreSQL installs.
25
+ - Add adapter-aware `by_tag` querying with JSONB containment on PostgreSQL and text fallback elsewhere.
26
+ - Add `by_tags`, `by_user`, and `by_feature` scopes for common attribution queries.
27
+ - Add `llm_cost_tracker:upgrade_tags_to_jsonb` generator for existing PostgreSQL installs.
28
+ - Add `llm_cost_tracker:upgrade_cost_precision` generator for widening stored cost columns.
29
+ - Add `llm_cost_tracker:add_latency_ms` generator for existing installs.
30
+
31
+ ### Changed
32
+
33
+ - Store tags as a Hash for JSON-backed columns and as JSON text for fallback columns.
34
+ - Keep internal usage metadata such as cache token counts out of stored attribution tags.
35
+ - Normalize provider-prefixed model IDs like `openai/gpt-4o-mini` for built-in price lookup.
36
+ - Normalize configured OpenAI-compatible host keys to lowercase after configuration.
37
+ - Avoid double fuzzy-match passes during price lookup.
38
+ - Widen generated cost decimal columns to `precision: 20, scale: 8`.
39
+ - Count Gemini `thoughtsTokenCount` as output tokens for better thinking-mode cost estimates.
40
+ - Warn when Faraday exposes an unreadable streaming/SSE response body.
41
+ - Document tag storage behavior, budget guardrail limits, known limitations, common tag scopes, and upgrade flows.
42
+ - Clarify that budget errors raised after a response occur after the event has been recorded.
43
+ - Route custom storage exceptions that inherit from `LlmCostTracker::Error` through `storage_error_behavior`.
44
+
8
45
  ## [0.1.1] - 2026-04-17
9
46
 
10
47
  ### Fixed
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  **Self-hosted LLM API cost tracking for Ruby and Rails apps.**
4
4
 
5
- Track token usage and estimated costs for OpenAI, Anthropic, and Google Gemini calls from Faraday-based Ruby clients. Store the data in your own database, tag calls by user or feature, and get budget alerts without adding an external SaaS or proxy.
5
+ Track, attribute, and enforce AI costs for OpenAI, Anthropic, Google Gemini, OpenRouter, DeepSeek, and OpenAI-compatible calls from Faraday-based Ruby clients. Store the data in your own database, tag calls by user or feature, and get budget alerts without adding an external SaaS or proxy.
6
6
 
7
7
  [![Gem Version](https://badge.fury.io/rb/llm_cost_tracker.svg)](https://rubygems.org/gems/llm_cost_tracker)
8
8
  [![CI](https://github.com/sergey-homenko/llm_cost_tracker/actions/workflows/ruby.yml/badge.svg)](https://github.com/sergey-homenko/llm_cost_tracker/actions)
@@ -17,6 +17,8 @@ Every Rails app integrating LLMs faces the same problem: **you don't know how mu
17
17
  - 🏠 **Self-hosted** — your data stays in your database
18
18
  - 🧩 **Client-light** — works with raw Faraday and LLM gems that expose their Faraday connection
19
19
  - 🏷️ **Attribution-first** — tag spend by feature, tenant, user, job, or environment
20
+ - 🌐 **OpenAI-compatible** — auto-detect OpenRouter and DeepSeek, with custom compatible hosts configurable
21
+ - 🛑 **Budget guardrails** — notify, raise, or block requests when monthly spend is exhausted
20
22
  - 💸 **Budget-aware** — emit notifications and callbacks before spend surprises you
21
23
 
22
24
  This gem is intentionally not a tracing platform, prompt CMS, eval system, or gateway. It focuses on the boring but valuable question: "What did this app spend on LLM APIs, and where did that spend come from?"
@@ -103,6 +105,9 @@ LlmCostTracker.configure do |config|
103
105
 
104
106
  # Monthly budget in USD
105
107
  config.monthly_budget = 500.00
108
+ config.budget_exceeded_behavior = :notify # :notify, :raise, or :block_requests
109
+ config.storage_error_behavior = :warn # :ignore, :warn, or :raise
110
+ config.unknown_pricing_behavior = :warn # :ignore, :warn, or :raise
106
111
 
107
112
  # Alert callback
108
113
  config.on_budget_exceeded = ->(data) {
@@ -113,13 +118,102 @@ LlmCostTracker.configure do |config|
113
118
  }
114
119
 
115
120
  # Override pricing for custom/fine-tuned models (per 1M tokens)
121
+ config.prices_file = Rails.root.join("config/llm_cost_tracker_prices.json")
116
122
  config.pricing_overrides = {
117
123
  "ft:gpt-4o-mini:my-org" => { input: 0.30, cached_input: 0.15, output: 1.20 }
118
124
  }
125
+
126
+ # OpenAI-compatible APIs. OpenRouter and DeepSeek are included by default.
127
+ config.openai_compatible_providers["llm.my-company.com"] = "internal_gateway"
119
128
  end
120
129
  ```
121
130
 
122
- Pricing is best-effort and based on public provider pricing for standard token usage. Providers change pricing frequently, and some features have extra charges or tiered pricing. Use `pricing_overrides` for fine-tunes, gateway-specific model IDs, enterprise discounts, batch pricing, long-context premiums, and any model this gem does not know yet.
131
+ Pricing is best-effort and based on public provider pricing for standard token usage. Providers change pricing frequently, and some features have extra charges or tiered pricing. OpenRouter-style model IDs such as `openai/gpt-4o-mini` are normalized to built-in model names when possible. Use `prices_file` or `pricing_overrides` for fine-tunes, gateway-specific model IDs, enterprise discounts, batch pricing, long-context premiums, and any model this gem does not know yet.
132
+
133
+ Storage errors are non-fatal by default:
134
+
135
+ ```ruby
136
+ config.storage_error_behavior = :warn # default
137
+ config.storage_error_behavior = :raise # fail fast with StorageError
138
+ config.storage_error_behavior = :ignore # skip storage failures silently
139
+ ```
140
+
141
+ With the default `:warn` behavior, tracking emits a warning and lets the LLM response continue if ActiveRecord or custom storage fails. `LlmCostTracker::StorageError` exposes `original_error` when `:raise` is enabled.
142
+
143
+ Unknown model pricing is visible by default:
144
+
145
+ ```ruby
146
+ config.unknown_pricing_behavior = :warn # default
147
+ config.unknown_pricing_behavior = :raise # fail fast with UnknownPricingError
148
+ config.unknown_pricing_behavior = :ignore # keep tracking tokens silently
149
+ ```
150
+
151
+ When pricing is unknown, the event can still be recorded with token counts, but `cost` is `nil` and budget enforcement is skipped for that event. Use this ActiveRecord query to find the gaps:
152
+
153
+ ```ruby
154
+ LlmCostTracker::LlmApiCall.unknown_pricing.group(:model).count
155
+ ```
156
+
157
+ ### Keeping Prices Current
158
+
159
+ Built-in prices live in `lib/llm_cost_tracker/prices.json`, with `updated_at`, `unit`, `currency`, and source URLs in the file metadata. The gem does not fetch pricing on boot; that keeps it self-hosted and avoids hidden external dependencies.
160
+
161
+ For production apps, keep a local JSON or YAML price file and point the gem at it:
162
+
163
+ ```ruby
164
+ config.prices_file = Rails.root.join("config/llm_cost_tracker_prices.json")
165
+ ```
166
+
167
+ Example JSON:
168
+
169
+ ```json
170
+ {
171
+ "metadata": {
172
+ "updated_at": "2026-04-18",
173
+ "currency": "USD",
174
+ "unit": "1M tokens"
175
+ },
176
+ "models": {
177
+ "my-gateway/gpt-4o-mini": {
178
+ "input": 0.20,
179
+ "cached_input": 0.10,
180
+ "output": 0.80
181
+ }
182
+ }
183
+ }
184
+ ```
185
+
186
+ `pricing_overrides` still has the highest precedence, so you can use it for small Ruby-only overrides and keep broader provider tables in the file. A practical release rhythm is to refresh built-in `prices.json` quarterly and use `prices_file` for urgent provider changes between gem releases.
187
+
188
+ ## Budget Enforcement
189
+
190
+ ```ruby
191
+ LlmCostTracker.configure do |config|
192
+ config.storage_backend = :active_record
193
+ config.monthly_budget = 100.00
194
+ config.budget_exceeded_behavior = :block_requests
195
+ end
196
+ ```
197
+
198
+ Budget behavior options:
199
+
200
+ - `:notify` — default. Calls `on_budget_exceeded` after a tracked event pushes the month over budget.
201
+ - `:raise` — records the event, then raises `LlmCostTracker::BudgetExceededError` when the month is over budget.
202
+ - `:block_requests` — blocks Faraday LLM requests before the HTTP call when the ActiveRecord monthly total has already reached the budget. If a request pushes the month over budget, it also raises after recording the event.
203
+
204
+ `BudgetExceededError` exposes `monthly_total`, `budget`, and `last_event`:
205
+
206
+ ```ruby
207
+ begin
208
+ client.chat(...)
209
+ rescue LlmCostTracker::BudgetExceededError => e
210
+ Rails.logger.warn("LLM budget exhausted: #{e.monthly_total} / #{e.budget}")
211
+ end
212
+ ```
213
+
214
+ Pre-request blocking needs `storage_backend = :active_record` because the middleware must query your stored monthly total before sending the request. With `:log` or `:custom` storage, `:raise` and the post-response part of `:block_requests` still work for the event being tracked.
215
+
216
+ `:block_requests` is a best-effort guardrail, not a transactional hard quota. In highly concurrent deployments, multiple workers can pass the preflight check at the same time before any of them records its final cost. The request that first pushes the month over budget is stored before the post-response `BudgetExceededError` is raised; later Faraday requests are blocked during preflight once the stored monthly total is exhausted. Use provider-side limits or a gateway-level quota if you need strict cross-process enforcement.
123
217
 
124
218
  ## Querying Costs (ActiveRecord)
125
219
 
@@ -140,16 +234,65 @@ LlmCostTracker::LlmApiCall.this_month.cost_by_provider
140
234
  LlmCostTracker::LlmApiCall.daily_costs(days: 7)
141
235
  # => { "2026-04-10" => 1.5, "2026-04-11" => 2.3, ... }
142
236
 
237
+ # Latency overview
238
+ LlmCostTracker::LlmApiCall.with_latency.average_latency_ms
239
+ LlmCostTracker::LlmApiCall.this_month.latency_by_model
240
+
143
241
  # Filter by feature
144
242
  LlmCostTracker::LlmApiCall.by_tag("feature", "chat").this_month.total_cost
145
243
 
146
244
  # Filter by user
147
245
  LlmCostTracker::LlmApiCall.by_tag("user_id", "42").today.total_cost
246
+ LlmCostTracker::LlmApiCall.by_user(42).today.total_cost
247
+
248
+ # Filter by multiple tags
249
+ LlmCostTracker::LlmApiCall.by_tags(user_id: 42, feature: "chat").this_month.total_cost
250
+
251
+ # Feature shortcut
252
+ LlmCostTracker::LlmApiCall.by_feature("summarizer").this_month.total_cost
253
+
254
+ # Find models without pricing
255
+ LlmCostTracker::LlmApiCall.unknown_pricing.group(:model).count
256
+ LlmCostTracker::LlmApiCall.with_cost.this_month.total_cost
148
257
 
149
258
  # Custom date range
150
259
  LlmCostTracker::LlmApiCall.between(1.week.ago, Time.current).cost_by_model
151
260
  ```
152
261
 
262
+ ### Tag Storage
263
+
264
+ The install generator uses `jsonb` tags with a GIN index on PostgreSQL:
265
+
266
+ ```ruby
267
+ t.jsonb :tags, null: false, default: {}
268
+ add_index :llm_api_calls, :tags, using: :gin
269
+ ```
270
+
271
+ On SQLite and other adapters, tags fall back to JSON stored in a text column. The `by_tag` scope automatically uses PostgreSQL JSONB containment when the column supports it, and the text fallback otherwise.
272
+
273
+ If you installed `llm_cost_tracker` before JSONB tags were available and your app uses PostgreSQL, generate an upgrade migration:
274
+
275
+ ```bash
276
+ bin/rails generate llm_cost_tracker:upgrade_tags_to_jsonb
277
+ bin/rails db:migrate
278
+ ```
279
+
280
+ This converts the existing `tags` text column to `jsonb`, keeps existing tag data, and adds the GIN index.
281
+
282
+ If you installed an earlier version with `precision: 12, scale: 8` cost columns, widen them for larger production ledgers:
283
+
284
+ ```bash
285
+ bin/rails generate llm_cost_tracker:upgrade_cost_precision
286
+ bin/rails db:migrate
287
+ ```
288
+
289
+ If you installed before `latency_ms` was available, add the latency column:
290
+
291
+ ```bash
292
+ bin/rails generate llm_cost_tracker:add_latency_ms
293
+ bin/rails db:migrate
294
+ ```
295
+
153
296
  ## ActiveSupport::Notifications
154
297
 
155
298
  Every tracked call emits an `llm_request.llm_cost_tracker` event:
@@ -163,6 +306,7 @@ ActiveSupport::Notifications.subscribe("llm_request.llm_cost_tracker") do |*, pa
163
306
  # input_tokens: 150,
164
307
  # output_tokens: 42,
165
308
  # total_tokens: 192,
309
+ # latency_ms: 248,
166
310
  # cost: {
167
311
  # input_cost: 0.000375,
168
312
  # cached_input_cost: 0.0,
@@ -188,19 +332,62 @@ LlmCostTracker.configure do |config|
188
332
  config.storage_backend = :custom
189
333
  config.custom_storage = ->(event) {
190
334
  InfluxDB.write("llm_costs", {
191
- values: { cost: event[:cost][:total_cost], tokens: event[:total_tokens] },
335
+ values: {
336
+ cost: event[:cost]&.fetch(:total_cost, nil),
337
+ tokens: event[:total_tokens],
338
+ latency_ms: event[:latency_ms]
339
+ },
192
340
  tags: { provider: event[:provider], model: event[:model] }
193
341
  })
194
342
  }
195
343
  end
196
344
  ```
197
345
 
346
+ ## OpenAI-Compatible Providers
347
+
348
+ ```ruby
349
+ LlmCostTracker.configure do |config|
350
+ # Built in:
351
+ # "openrouter.ai" => "openrouter"
352
+ # "api.deepseek.com" => "deepseek"
353
+ config.openai_compatible_providers["gateway.example.com"] = "internal_gateway"
354
+ end
355
+ ```
356
+
357
+ Any configured host is parsed with the OpenAI-compatible usage shape:
358
+
359
+ - `prompt_tokens` / `completion_tokens` / `total_tokens`
360
+ - `input_tokens` / `output_tokens` / `total_tokens`
361
+ - optional cached input details when the response includes them
362
+
363
+ This covers OpenRouter, DeepSeek, and private gateways that expose OpenAI-style Chat Completions, Responses, Completions, or Embeddings endpoints.
364
+
365
+ ## Production Checklist
366
+
367
+ - Use `storage_backend = :active_record` in production.
368
+ - Set `monthly_budget` and choose `budget_exceeded_behavior`.
369
+ - Treat `:block_requests` as best-effort in concurrent systems, not a strict quota.
370
+ - Keep `unknown_pricing_behavior = :warn` or `:raise` until pricing overrides are complete.
371
+ - Add `pricing_overrides` for custom, fine-tuned, gateway-specific, or newly released models.
372
+ - Tag calls with `tenant_id`, `user_id`, and `feature` where possible.
373
+ - Check `LlmCostTracker::LlmApiCall.unknown_pricing.group(:model).count` after deploys.
374
+ - Track `latency_ms` and watch `latency_by_model` for slow or degraded providers.
375
+
376
+ ## Known Limitations
377
+
378
+ - `:block_requests` is best-effort under concurrency. For hard caps, use an external quota system, provider-side limits, or a gateway-level budget.
379
+ - Streaming/SSE calls are tracked only when Faraday exposes a final response body with usage data. Otherwise the gem warns and skips automatic tracking.
380
+ - Anthropic cache creation TTL variants are not modeled separately yet; 1-hour cache writes may be underestimated compared with the default 5-minute cache write rate.
381
+ - OpenAI reasoning tokens are included in output-token totals when providers report them that way, but separate reasoning-token attribution is not stored yet.
382
+
198
383
  ## Adding a Custom Provider Parser
199
384
 
385
+ Use this for providers that are not OpenAI-compatible and return a different usage shape.
386
+
200
387
  ```ruby
201
- class DeepSeekParser < LlmCostTracker::Parsers::Base
388
+ class AcmeParser < LlmCostTracker::Parsers::Base
202
389
  def match?(url)
203
- url.to_s.include?("api.deepseek.com")
390
+ url.to_s.include?("api.acme-llm.example")
204
391
  end
205
392
 
206
393
  def parse(request_url, request_body, response_status, response_body)
@@ -211,16 +398,16 @@ class DeepSeekParser < LlmCostTracker::Parsers::Base
211
398
  return nil unless usage
212
399
 
213
400
  {
214
- provider: "deepseek",
401
+ provider: "acme",
215
402
  model: response["model"],
216
- input_tokens: usage["prompt_tokens"] || 0,
217
- output_tokens: usage["completion_tokens"] || 0
403
+ input_tokens: usage["input"] || 0,
404
+ output_tokens: usage["output"] || 0
218
405
  }
219
406
  end
220
407
  end
221
408
 
222
409
  # Register it
223
- LlmCostTracker::Parsers::Registry.register(DeepSeekParser.new)
410
+ LlmCostTracker::Parsers::Registry.register(AcmeParser.new)
224
411
  ```
225
412
 
226
413
  ## Supported Providers
@@ -228,6 +415,9 @@ LlmCostTracker::Parsers::Registry.register(DeepSeekParser.new)
228
415
  | Provider | Auto-detected | Models with pricing |
229
416
  |----------|:---:|---|
230
417
  | OpenAI | ✅ | GPT-5.2/5.1/5, GPT-5 mini/nano, GPT-4.1, GPT-4o, o1/o3/o4-mini |
418
+ | OpenRouter | ✅ | Uses OpenAI-compatible usage; provider-prefixed OpenAI model IDs are normalized when possible |
419
+ | DeepSeek | ✅ | Uses OpenAI-compatible usage; add `pricing_overrides` for DeepSeek model pricing |
420
+ | OpenAI-compatible hosts | 🔧 | Configure `openai_compatible_providers` |
231
421
  | Anthropic | ✅ | Claude Opus 4.6/4.1/4, Sonnet 4.6/4.5/4, Haiku 4.5, Claude 3.x |
232
422
  | Google Gemini | ✅ | Gemini 2.5 Pro/Flash/Flash-Lite, 2.0 Flash/Flash-Lite, 1.5 Pro/Flash |
233
423
  | Any other | 🔧 | Via custom parser (see above) |
@@ -235,6 +425,7 @@ LlmCostTracker::Parsers::Registry.register(DeepSeekParser.new)
235
425
  Supported endpoint families:
236
426
 
237
427
  - OpenAI: Chat Completions, Responses, Completions, Embeddings
428
+ - OpenAI-compatible: Chat Completions, Responses, Completions, Embeddings
238
429
  - Anthropic: Messages
239
430
  - Google Gemini: `generateContent` responses with `usageMetadata`
240
431
 
@@ -251,9 +442,9 @@ Your App → Faraday → [LlmCostTracker Middleware] → LLM API
251
442
  ActiveRecord / Log / Custom
252
443
  ```
253
444
 
254
- The middleware intercepts **outgoing** HTTP responses (not incoming Rails requests), parses the provider usage object, looks up pricing, and records the event. It never modifies requests or responses.
445
+ The middleware intercepts **outgoing** HTTP responses (not incoming Rails requests), parses the provider usage object, looks up pricing, and records the event. It never modifies requests or responses. Put `llm_cost_tracker` inside the Faraday stack where it can see the final response body; if another middleware consumes or transforms streaming bodies, use manual tracking.
255
446
 
256
- For streaming APIs, tracking depends on the final response body including provider usage data. If the client consumes server-sent events without exposing the final usage payload to Faraday, use manual tracking.
447
+ For streaming APIs, tracking depends on the final response body including provider usage data. If the client consumes server-sent events without exposing the final usage payload to Faraday, the gem logs a warning and skips tracking; use manual tracking for those calls.
257
448
 
258
449
  ## Development
259
450
 
@@ -0,0 +1,97 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LlmCostTracker
4
+ class Budget
5
+ class << self
6
+ WARNING_MUTEX = Mutex.new
7
+
8
+ def enforce!
9
+ return unless LlmCostTracker.configuration.monthly_budget
10
+ return unless behavior == :block_requests
11
+ return warn_non_active_record_block_requests unless LlmCostTracker.configuration.active_record?
12
+
13
+ monthly_total = calculate_monthly_total(0)
14
+ return unless monthly_total >= LlmCostTracker.configuration.monthly_budget
15
+
16
+ handle_exceeded(monthly_total: monthly_total)
17
+ end
18
+
19
+ def check!(event)
20
+ config = LlmCostTracker.configuration
21
+ return unless config.monthly_budget
22
+ return unless event[:cost]
23
+
24
+ monthly_total = calculate_monthly_total(event[:cost][:total_cost])
25
+ return unless monthly_total > config.monthly_budget
26
+
27
+ handle_exceeded(monthly_total: monthly_total, last_event: event)
28
+ end
29
+
30
+ private
31
+
32
+ def calculate_monthly_total(latest_cost)
33
+ if LlmCostTracker.configuration.active_record?
34
+ active_record_monthly_total
35
+ else
36
+ latest_cost
37
+ end
38
+ end
39
+
40
+ def active_record_monthly_total
41
+ require_relative "llm_api_call" unless defined?(LlmCostTracker::LlmApiCall)
42
+ require_relative "storage/active_record_store" unless defined?(LlmCostTracker::Storage::ActiveRecordStore)
43
+
44
+ LlmCostTracker::Storage::ActiveRecordStore.monthly_total
45
+ rescue LoadError => e
46
+ raise Error, "ActiveRecord storage requires the active_record gem: #{e.message}"
47
+ end
48
+
49
+ def warn_non_active_record_block_requests
50
+ should_warn = WARNING_MUTEX.synchronize do
51
+ unless @warned_non_active_record_block_requests
52
+ @warned_non_active_record_block_requests = true
53
+ true
54
+ end
55
+ end
56
+ return unless should_warn
57
+
58
+ log_warning(":block_requests preflight requires storage_backend = :active_record; request was not blocked.")
59
+ end
60
+
61
+ def handle_exceeded(monthly_total:, last_event: nil)
62
+ config = LlmCostTracker.configuration
63
+ payload = {
64
+ monthly_total: monthly_total,
65
+ budget: config.monthly_budget,
66
+ last_event: last_event
67
+ }
68
+
69
+ config.on_budget_exceeded&.call(payload)
70
+ raise BudgetExceededError.new(**payload) if raise_on_exceeded?
71
+ end
72
+
73
+ def raise_on_exceeded?
74
+ %i[raise block_requests].include?(behavior)
75
+ end
76
+
77
+ def behavior
78
+ behavior = (LlmCostTracker.configuration.budget_exceeded_behavior || :notify).to_sym
79
+ return behavior if Configuration::BUDGET_EXCEEDED_BEHAVIORS.include?(behavior)
80
+
81
+ raise Error,
82
+ "Unknown budget_exceeded_behavior: #{behavior.inspect}. " \
83
+ "Use one of: #{Configuration::BUDGET_EXCEEDED_BEHAVIORS.join(', ')}"
84
+ end
85
+
86
+ def log_warning(message)
87
+ message = "[LlmCostTracker] #{message}"
88
+
89
+ if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
90
+ Rails.logger.warn(message)
91
+ else
92
+ warn message
93
+ end
94
+ end
95
+ end
96
+ end
97
+ end
@@ -2,15 +2,31 @@
2
2
 
3
3
  module LlmCostTracker
4
4
  class Configuration
5
+ # Hostname => provider name for OpenAI-compatible APIs.
6
+ OPENAI_COMPATIBLE_PROVIDERS = {
7
+ "openrouter.ai" => "openrouter",
8
+ "api.deepseek.com" => "deepseek"
9
+ }.freeze
10
+
11
+ BUDGET_EXCEEDED_BEHAVIORS = %i[notify raise block_requests].freeze
12
+ STORAGE_ERROR_BEHAVIORS = %i[ignore warn raise].freeze
13
+ UNKNOWN_PRICING_BEHAVIORS = %i[ignore warn raise].freeze
14
+
5
15
  attr_accessor :enabled,
6
16
  :storage_backend, # :log, :active_record, :custom
7
17
  :custom_storage, # callable object for :custom backend
8
18
  :default_tags, # Hash of default tags added to every event
9
19
  :on_budget_exceeded, # callable, receives event hash
10
20
  :monthly_budget, # Float, in USD — nil means no limit
21
+ :budget_exceeded_behavior, # :notify, :raise, :block_requests
22
+ :storage_error_behavior, # :ignore, :warn, :raise
23
+ :unknown_pricing_behavior, # :ignore, :warn, :raise
11
24
  :log_level, # :debug, :info, :warn
25
+ :prices_file, # JSON/YAML file that overrides built-in prices
12
26
  :pricing_overrides # Hash to override built-in pricing
13
27
 
28
+ attr_reader :openai_compatible_providers
29
+
14
30
  def initialize
15
31
  @enabled = true
16
32
  @storage_backend = :log
@@ -18,8 +34,21 @@ module LlmCostTracker
18
34
  @default_tags = {}
19
35
  @on_budget_exceeded = nil
20
36
  @monthly_budget = nil
37
+ @budget_exceeded_behavior = :notify
38
+ @storage_error_behavior = :warn
39
+ @unknown_pricing_behavior = :warn
21
40
  @log_level = :info
41
+ @prices_file = nil
22
42
  @pricing_overrides = {}
43
+ self.openai_compatible_providers = OPENAI_COMPATIBLE_PROVIDERS
44
+ end
45
+
46
+ def openai_compatible_providers=(providers)
47
+ @openai_compatible_providers = normalize_openai_compatible_providers(providers)
48
+ end
49
+
50
+ def normalize_openai_compatible_providers!
51
+ self.openai_compatible_providers = openai_compatible_providers
23
52
  end
24
53
 
25
54
  def active_record?
@@ -29,5 +58,13 @@ module LlmCostTracker
29
58
  def log?
30
59
  storage_backend == :log
31
60
  end
61
+
62
+ private
63
+
64
+ def normalize_openai_compatible_providers(providers)
65
+ (providers || {}).each_with_object({}) do |(host, provider), normalized|
66
+ normalized[host.to_s.downcase] = provider.to_s
67
+ end
68
+ end
32
69
  end
33
70
  end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LlmCostTracker
4
+ class Error < StandardError; end
5
+
6
+ class BudgetExceededError < Error
7
+ attr_reader :monthly_total, :budget, :last_event
8
+
9
+ def initialize(monthly_total:, budget:, last_event: nil)
10
+ @monthly_total = monthly_total
11
+ @budget = budget
12
+ @last_event = last_event
13
+
14
+ super("LLM monthly budget exceeded: $#{format('%.6f', monthly_total)} / $#{format('%.6f', budget)}")
15
+ end
16
+ end
17
+
18
+ class UnknownPricingError < Error
19
+ attr_reader :model
20
+
21
+ def initialize(model:)
22
+ @model = model
23
+
24
+ super("No pricing configured for LLM model: #{model.inspect}")
25
+ end
26
+ end
27
+
28
+ class StorageError < Error
29
+ attr_reader :original_error
30
+
31
+ def initialize(original_error)
32
+ @original_error = original_error
33
+
34
+ super("Failed to store LLM cost event: #{original_error.class}: #{original_error.message}")
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LlmCostTracker
4
+ module EventMetadata
5
+ INTERNAL_TAG_KEYS = %w[
6
+ cache_creation_input_tokens
7
+ cache_creation_tokens
8
+ cache_read_input_tokens
9
+ cache_read_tokens
10
+ cached_input_tokens
11
+ input_tokens
12
+ output_tokens
13
+ reasoning_tokens
14
+ total_tokens
15
+ ].freeze
16
+
17
+ class << self
18
+ def usage_data(input_tokens, output_tokens, metadata)
19
+ cache_read_input_tokens = integer_metadata(metadata, :cache_read_input_tokens, :cache_read_tokens)
20
+ cache_creation_input_tokens = integer_metadata(
21
+ metadata,
22
+ :cache_creation_input_tokens,
23
+ :cache_creation_tokens
24
+ )
25
+ cached_input_tokens = integer_metadata(metadata, :cached_input_tokens)
26
+
27
+ {
28
+ input_tokens: input_tokens.to_i,
29
+ output_tokens: output_tokens.to_i,
30
+ cached_input_tokens: cached_input_tokens,
31
+ cache_read_input_tokens: cache_read_input_tokens,
32
+ cache_creation_input_tokens: cache_creation_input_tokens,
33
+ total_tokens: input_tokens.to_i + output_tokens.to_i +
34
+ cache_read_input_tokens + cache_creation_input_tokens
35
+ }
36
+ end
37
+
38
+ def tags(metadata)
39
+ metadata.reject { |key, _value| INTERNAL_TAG_KEYS.include?(key.to_s) }
40
+ end
41
+
42
+ private
43
+
44
+ def integer_metadata(metadata, *keys)
45
+ keys.each do |key|
46
+ value = metadata[key] || metadata[key.to_s]
47
+ return value.to_i unless value.nil?
48
+ end
49
+
50
+ 0
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rails/generators"
4
+ require "rails/generators/active_record"
5
+
6
+ module LlmCostTracker
7
+ module Generators
8
+ class AddLatencyMsGenerator < Rails::Generators::Base
9
+ include ActiveRecord::Generators::Migration
10
+
11
+ source_root File.expand_path("templates", __dir__)
12
+
13
+ desc "Creates a migration to add llm_api_calls.latency_ms"
14
+
15
+ def create_migration_file
16
+ migration_template(
17
+ "add_latency_ms_to_llm_api_calls.rb.erb",
18
+ "db/migrate/add_latency_ms_to_llm_api_calls.rb"
19
+ )
20
+ end
21
+
22
+ private
23
+
24
+ def migration_version
25
+ "[#{ActiveRecord::VERSION::MAJOR}.#{ActiveRecord::VERSION::MINOR}]"
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,9 @@
1
+ class AddLatencyMsToLlmApiCalls < ActiveRecord::Migration<%= migration_version %>
2
+ def up
3
+ add_column :llm_api_calls, :latency_ms, :integer unless column_exists?(:llm_api_calls, :latency_ms)
4
+ end
5
+
6
+ def down
7
+ remove_column :llm_api_calls, :latency_ms if column_exists?(:llm_api_calls, :latency_ms)
8
+ end
9
+ end