llm_cost_tracker 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +37 -0
- data/README.md +202 -11
- data/lib/llm_cost_tracker/budget.rb +97 -0
- data/lib/llm_cost_tracker/configuration.rb +37 -0
- data/lib/llm_cost_tracker/errors.rb +37 -0
- data/lib/llm_cost_tracker/event_metadata.rb +54 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_latency_ms_generator.rb +29 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_latency_ms_to_llm_api_calls.rb.erb +9 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +16 -4
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +14 -1
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_cost_precision.rb.erb +15 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +41 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_cost_precision_generator.rb +29 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_tags_to_jsonb_generator.rb +29 -0
- data/lib/llm_cost_tracker/llm_api_call.rb +68 -2
- data/lib/llm_cost_tracker/middleware/faraday.rb +50 -12
- data/lib/llm_cost_tracker/parsers/anthropic.rb +4 -1
- data/lib/llm_cost_tracker/parsers/gemini.rb +9 -2
- data/lib/llm_cost_tracker/parsers/openai.rb +10 -3
- data/lib/llm_cost_tracker/parsers/openai_compatible.rb +44 -0
- data/lib/llm_cost_tracker/parsers/registry.rb +16 -7
- data/lib/llm_cost_tracker/price_registry.rb +69 -0
- data/lib/llm_cost_tracker/prices.json +51 -0
- data/lib/llm_cost_tracker/pricing.rb +74 -74
- data/lib/llm_cost_tracker/railtie.rb +3 -0
- data/lib/llm_cost_tracker/storage/active_record_store.rb +12 -3
- data/lib/llm_cost_tracker/tracker.rb +49 -54
- data/lib/llm_cost_tracker/unknown_pricing.rb +47 -0
- data/lib/llm_cost_tracker/version.rb +1 -1
- data/lib/llm_cost_tracker.rb +33 -5
- data/llm_cost_tracker.gemspec +4 -3
- metadata +20 -6
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 6a014d7c3de26b91ba6a0b99d300a803d04ab8a95c55b374377d6b8cdf631e50
|
|
4
|
+
data.tar.gz: 7e042cf740a65c1019d0ee986eeec9fc2266a1ec59c55d4807f306521f95f869
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 1c41d7a9002fb484df80b6d3e5c7ce1fc14a3d481443f0bbefe1c74a4e2a0f92a039f56677a7a354dbfaeaaae6b5d4727bb5ca9466b06b77d574d26efd477fdb
|
|
7
|
+
data.tar.gz: d8017bbf7975f5bafbc4328dc8df76a614bc5e7700bb14569cead5ffc06aac6a4828707a9a609b9b7af69e4b7e3a08543712a750ed79d7b125ab2c96336cefa1
|
data/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,43 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [0.1.2] - 2026-04-18
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
|
|
12
|
+
- Auto-detect OpenRouter and DeepSeek as OpenAI-compatible providers.
|
|
13
|
+
- Add `openai_compatible_providers` configuration for private OpenAI-compatible gateways.
|
|
14
|
+
- Add `BudgetExceededError` and `budget_exceeded_behavior` for best-effort budget guardrails.
|
|
15
|
+
- Add `:raise` and `:block_requests` budget behaviors; `:block_requests` is not a hard cap under concurrency.
|
|
16
|
+
- Add `StorageError` and `storage_error_behavior` so storage failures do not have to break host LLM calls.
|
|
17
|
+
- Add `UnknownPricingError` and `unknown_pricing_behavior` for unknown model pricing.
|
|
18
|
+
- Add built-in `prices.json` registry with metadata and source URLs.
|
|
19
|
+
- Add `prices_file` configuration for local JSON/YAML pricing overrides.
|
|
20
|
+
- Add `with_cost`, `without_cost`, and `unknown_pricing` ActiveRecord scopes.
|
|
21
|
+
- Add `latency_ms` tracking for Faraday calls, manual tracking, notifications, and ActiveRecord storage.
|
|
22
|
+
- Add `with_latency`, `average_latency_ms`, `latency_by_model`, and `latency_by_provider`.
|
|
23
|
+
- Use PostgreSQL `jsonb` storage for tags in newly generated migrations.
|
|
24
|
+
- Add a GIN index on `llm_api_calls.tags` for PostgreSQL installs.
|
|
25
|
+
- Add adapter-aware `by_tag` querying with JSONB containment on PostgreSQL and text fallback elsewhere.
|
|
26
|
+
- Add `by_tags`, `by_user`, and `by_feature` scopes for common attribution queries.
|
|
27
|
+
- Add `llm_cost_tracker:upgrade_tags_to_jsonb` generator for existing PostgreSQL installs.
|
|
28
|
+
- Add `llm_cost_tracker:upgrade_cost_precision` generator for widening stored cost columns.
|
|
29
|
+
- Add `llm_cost_tracker:add_latency_ms` generator for existing installs.
|
|
30
|
+
|
|
31
|
+
### Changed
|
|
32
|
+
|
|
33
|
+
- Store tags as a Hash for JSON-backed columns and as JSON text for fallback columns.
|
|
34
|
+
- Keep internal usage metadata such as cache token counts out of stored attribution tags.
|
|
35
|
+
- Normalize provider-prefixed model IDs like `openai/gpt-4o-mini` for built-in price lookup.
|
|
36
|
+
- Normalize configured OpenAI-compatible host keys to lowercase after configuration.
|
|
37
|
+
- Avoid double fuzzy-match passes during price lookup.
|
|
38
|
+
- Widen generated cost decimal columns to `precision: 20, scale: 8`.
|
|
39
|
+
- Count Gemini `thoughtsTokenCount` as output tokens for better thinking-mode cost estimates.
|
|
40
|
+
- Warn when Faraday exposes an unreadable streaming/SSE response body.
|
|
41
|
+
- Document tag storage behavior, budget guardrail limits, known limitations, common tag scopes, and upgrade flows.
|
|
42
|
+
- Clarify that budget errors raised after a response occur after the event has been recorded.
|
|
43
|
+
- Route custom storage exceptions that inherit from `LlmCostTracker::Error` through `storage_error_behavior`.
|
|
44
|
+
|
|
8
45
|
## [0.1.1] - 2026-04-17
|
|
9
46
|
|
|
10
47
|
### Fixed
|
data/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
**Self-hosted LLM API cost tracking for Ruby and Rails apps.**
|
|
4
4
|
|
|
5
|
-
Track
|
|
5
|
+
Track, attribute, and enforce AI costs for OpenAI, Anthropic, Google Gemini, OpenRouter, DeepSeek, and OpenAI-compatible calls from Faraday-based Ruby clients. Store the data in your own database, tag calls by user or feature, and get budget alerts without adding an external SaaS or proxy.
|
|
6
6
|
|
|
7
7
|
[](https://rubygems.org/gems/llm_cost_tracker)
|
|
8
8
|
[](https://github.com/sergey-homenko/llm_cost_tracker/actions)
|
|
@@ -17,6 +17,8 @@ Every Rails app integrating LLMs faces the same problem: **you don't know how mu
|
|
|
17
17
|
- 🏠 **Self-hosted** — your data stays in your database
|
|
18
18
|
- 🧩 **Client-light** — works with raw Faraday and LLM gems that expose their Faraday connection
|
|
19
19
|
- 🏷️ **Attribution-first** — tag spend by feature, tenant, user, job, or environment
|
|
20
|
+
- 🌐 **OpenAI-compatible** — auto-detect OpenRouter and DeepSeek, with custom compatible hosts configurable
|
|
21
|
+
- 🛑 **Budget guardrails** — notify, raise, or block requests when monthly spend is exhausted
|
|
20
22
|
- 💸 **Budget-aware** — emit notifications and callbacks before spend surprises you
|
|
21
23
|
|
|
22
24
|
This gem is intentionally not a tracing platform, prompt CMS, eval system, or gateway. It focuses on the boring but valuable question: "What did this app spend on LLM APIs, and where did that spend come from?"
|
|
@@ -103,6 +105,9 @@ LlmCostTracker.configure do |config|
|
|
|
103
105
|
|
|
104
106
|
# Monthly budget in USD
|
|
105
107
|
config.monthly_budget = 500.00
|
|
108
|
+
config.budget_exceeded_behavior = :notify # :notify, :raise, or :block_requests
|
|
109
|
+
config.storage_error_behavior = :warn # :ignore, :warn, or :raise
|
|
110
|
+
config.unknown_pricing_behavior = :warn # :ignore, :warn, or :raise
|
|
106
111
|
|
|
107
112
|
# Alert callback
|
|
108
113
|
config.on_budget_exceeded = ->(data) {
|
|
@@ -113,13 +118,102 @@ LlmCostTracker.configure do |config|
|
|
|
113
118
|
}
|
|
114
119
|
|
|
115
120
|
# Override pricing for custom/fine-tuned models (per 1M tokens)
|
|
121
|
+
config.prices_file = Rails.root.join("config/llm_cost_tracker_prices.json")
|
|
116
122
|
config.pricing_overrides = {
|
|
117
123
|
"ft:gpt-4o-mini:my-org" => { input: 0.30, cached_input: 0.15, output: 1.20 }
|
|
118
124
|
}
|
|
125
|
+
|
|
126
|
+
# OpenAI-compatible APIs. OpenRouter and DeepSeek are included by default.
|
|
127
|
+
config.openai_compatible_providers["llm.my-company.com"] = "internal_gateway"
|
|
119
128
|
end
|
|
120
129
|
```
|
|
121
130
|
|
|
122
|
-
Pricing is best-effort and based on public provider pricing for standard token usage. Providers change pricing frequently, and some features have extra charges or tiered pricing. Use `pricing_overrides` for fine-tunes, gateway-specific model IDs, enterprise discounts, batch pricing, long-context premiums, and any model this gem does not know yet.
|
|
131
|
+
Pricing is best-effort and based on public provider pricing for standard token usage. Providers change pricing frequently, and some features have extra charges or tiered pricing. OpenRouter-style model IDs such as `openai/gpt-4o-mini` are normalized to built-in model names when possible. Use `prices_file` or `pricing_overrides` for fine-tunes, gateway-specific model IDs, enterprise discounts, batch pricing, long-context premiums, and any model this gem does not know yet.
|
|
132
|
+
|
|
133
|
+
Storage errors are non-fatal by default:
|
|
134
|
+
|
|
135
|
+
```ruby
|
|
136
|
+
config.storage_error_behavior = :warn # default
|
|
137
|
+
config.storage_error_behavior = :raise # fail fast with StorageError
|
|
138
|
+
config.storage_error_behavior = :ignore # skip storage failures silently
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
With the default `:warn` behavior, tracking emits a warning and lets the LLM response continue if ActiveRecord or custom storage fails. `LlmCostTracker::StorageError` exposes `original_error` when `:raise` is enabled.
|
|
142
|
+
|
|
143
|
+
Unknown model pricing is visible by default:
|
|
144
|
+
|
|
145
|
+
```ruby
|
|
146
|
+
config.unknown_pricing_behavior = :warn # default
|
|
147
|
+
config.unknown_pricing_behavior = :raise # fail fast with UnknownPricingError
|
|
148
|
+
config.unknown_pricing_behavior = :ignore # keep tracking tokens silently
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
When pricing is unknown, the event can still be recorded with token counts, but `cost` is `nil` and budget enforcement is skipped for that event. Use this ActiveRecord query to find the gaps:
|
|
152
|
+
|
|
153
|
+
```ruby
|
|
154
|
+
LlmCostTracker::LlmApiCall.unknown_pricing.group(:model).count
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### Keeping Prices Current
|
|
158
|
+
|
|
159
|
+
Built-in prices live in `lib/llm_cost_tracker/prices.json`, with `updated_at`, `unit`, `currency`, and source URLs in the file metadata. The gem does not fetch pricing on boot; that keeps it self-hosted and avoids hidden external dependencies.
|
|
160
|
+
|
|
161
|
+
For production apps, keep a local JSON or YAML price file and point the gem at it:
|
|
162
|
+
|
|
163
|
+
```ruby
|
|
164
|
+
config.prices_file = Rails.root.join("config/llm_cost_tracker_prices.json")
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
Example JSON:
|
|
168
|
+
|
|
169
|
+
```json
|
|
170
|
+
{
|
|
171
|
+
"metadata": {
|
|
172
|
+
"updated_at": "2026-04-18",
|
|
173
|
+
"currency": "USD",
|
|
174
|
+
"unit": "1M tokens"
|
|
175
|
+
},
|
|
176
|
+
"models": {
|
|
177
|
+
"my-gateway/gpt-4o-mini": {
|
|
178
|
+
"input": 0.20,
|
|
179
|
+
"cached_input": 0.10,
|
|
180
|
+
"output": 0.80
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
`pricing_overrides` still has the highest precedence, so you can use it for small Ruby-only overrides and keep broader provider tables in the file. A practical release rhythm is to refresh built-in `prices.json` quarterly and use `prices_file` for urgent provider changes between gem releases.
|
|
187
|
+
|
|
188
|
+
## Budget Enforcement
|
|
189
|
+
|
|
190
|
+
```ruby
|
|
191
|
+
LlmCostTracker.configure do |config|
|
|
192
|
+
config.storage_backend = :active_record
|
|
193
|
+
config.monthly_budget = 100.00
|
|
194
|
+
config.budget_exceeded_behavior = :block_requests
|
|
195
|
+
end
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
Budget behavior options:
|
|
199
|
+
|
|
200
|
+
- `:notify` — default. Calls `on_budget_exceeded` after a tracked event pushes the month over budget.
|
|
201
|
+
- `:raise` — records the event, then raises `LlmCostTracker::BudgetExceededError` when the month is over budget.
|
|
202
|
+
- `:block_requests` — blocks Faraday LLM requests before the HTTP call when the ActiveRecord monthly total has already reached the budget. If a request pushes the month over budget, it also raises after recording the event.
|
|
203
|
+
|
|
204
|
+
`BudgetExceededError` exposes `monthly_total`, `budget`, and `last_event`:
|
|
205
|
+
|
|
206
|
+
```ruby
|
|
207
|
+
begin
|
|
208
|
+
client.chat(...)
|
|
209
|
+
rescue LlmCostTracker::BudgetExceededError => e
|
|
210
|
+
Rails.logger.warn("LLM budget exhausted: #{e.monthly_total} / #{e.budget}")
|
|
211
|
+
end
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
Pre-request blocking needs `storage_backend = :active_record` because the middleware must query your stored monthly total before sending the request. With `:log` or `:custom` storage, `:raise` and the post-response part of `:block_requests` still work for the event being tracked.
|
|
215
|
+
|
|
216
|
+
`:block_requests` is a best-effort guardrail, not a transactional hard quota. In highly concurrent deployments, multiple workers can pass the preflight check at the same time before any of them records its final cost. The request that first pushes the month over budget is stored before the post-response `BudgetExceededError` is raised; later Faraday requests are blocked during preflight once the stored monthly total is exhausted. Use provider-side limits or a gateway-level quota if you need strict cross-process enforcement.
|
|
123
217
|
|
|
124
218
|
## Querying Costs (ActiveRecord)
|
|
125
219
|
|
|
@@ -140,16 +234,65 @@ LlmCostTracker::LlmApiCall.this_month.cost_by_provider
|
|
|
140
234
|
LlmCostTracker::LlmApiCall.daily_costs(days: 7)
|
|
141
235
|
# => { "2026-04-10" => 1.5, "2026-04-11" => 2.3, ... }
|
|
142
236
|
|
|
237
|
+
# Latency overview
|
|
238
|
+
LlmCostTracker::LlmApiCall.with_latency.average_latency_ms
|
|
239
|
+
LlmCostTracker::LlmApiCall.this_month.latency_by_model
|
|
240
|
+
|
|
143
241
|
# Filter by feature
|
|
144
242
|
LlmCostTracker::LlmApiCall.by_tag("feature", "chat").this_month.total_cost
|
|
145
243
|
|
|
146
244
|
# Filter by user
|
|
147
245
|
LlmCostTracker::LlmApiCall.by_tag("user_id", "42").today.total_cost
|
|
246
|
+
LlmCostTracker::LlmApiCall.by_user(42).today.total_cost
|
|
247
|
+
|
|
248
|
+
# Filter by multiple tags
|
|
249
|
+
LlmCostTracker::LlmApiCall.by_tags(user_id: 42, feature: "chat").this_month.total_cost
|
|
250
|
+
|
|
251
|
+
# Feature shortcut
|
|
252
|
+
LlmCostTracker::LlmApiCall.by_feature("summarizer").this_month.total_cost
|
|
253
|
+
|
|
254
|
+
# Find models without pricing
|
|
255
|
+
LlmCostTracker::LlmApiCall.unknown_pricing.group(:model).count
|
|
256
|
+
LlmCostTracker::LlmApiCall.with_cost.this_month.total_cost
|
|
148
257
|
|
|
149
258
|
# Custom date range
|
|
150
259
|
LlmCostTracker::LlmApiCall.between(1.week.ago, Time.current).cost_by_model
|
|
151
260
|
```
|
|
152
261
|
|
|
262
|
+
### Tag Storage
|
|
263
|
+
|
|
264
|
+
The install generator uses `jsonb` tags with a GIN index on PostgreSQL:
|
|
265
|
+
|
|
266
|
+
```ruby
|
|
267
|
+
t.jsonb :tags, null: false, default: {}
|
|
268
|
+
add_index :llm_api_calls, :tags, using: :gin
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
On SQLite and other adapters, tags fall back to JSON stored in a text column. The `by_tag` scope automatically uses PostgreSQL JSONB containment when the column supports it, and the text fallback otherwise.
|
|
272
|
+
|
|
273
|
+
If you installed `llm_cost_tracker` before JSONB tags were available and your app uses PostgreSQL, generate an upgrade migration:
|
|
274
|
+
|
|
275
|
+
```bash
|
|
276
|
+
bin/rails generate llm_cost_tracker:upgrade_tags_to_jsonb
|
|
277
|
+
bin/rails db:migrate
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
This converts the existing `tags` text column to `jsonb`, keeps existing tag data, and adds the GIN index.
|
|
281
|
+
|
|
282
|
+
If you installed an earlier version with `precision: 12, scale: 8` cost columns, widen them for larger production ledgers:
|
|
283
|
+
|
|
284
|
+
```bash
|
|
285
|
+
bin/rails generate llm_cost_tracker:upgrade_cost_precision
|
|
286
|
+
bin/rails db:migrate
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
If you installed before `latency_ms` was available, add the latency column:
|
|
290
|
+
|
|
291
|
+
```bash
|
|
292
|
+
bin/rails generate llm_cost_tracker:add_latency_ms
|
|
293
|
+
bin/rails db:migrate
|
|
294
|
+
```
|
|
295
|
+
|
|
153
296
|
## ActiveSupport::Notifications
|
|
154
297
|
|
|
155
298
|
Every tracked call emits an `llm_request.llm_cost_tracker` event:
|
|
@@ -163,6 +306,7 @@ ActiveSupport::Notifications.subscribe("llm_request.llm_cost_tracker") do |*, pa
|
|
|
163
306
|
# input_tokens: 150,
|
|
164
307
|
# output_tokens: 42,
|
|
165
308
|
# total_tokens: 192,
|
|
309
|
+
# latency_ms: 248,
|
|
166
310
|
# cost: {
|
|
167
311
|
# input_cost: 0.000375,
|
|
168
312
|
# cached_input_cost: 0.0,
|
|
@@ -188,19 +332,62 @@ LlmCostTracker.configure do |config|
|
|
|
188
332
|
config.storage_backend = :custom
|
|
189
333
|
config.custom_storage = ->(event) {
|
|
190
334
|
InfluxDB.write("llm_costs", {
|
|
191
|
-
values: {
|
|
335
|
+
values: {
|
|
336
|
+
cost: event[:cost]&.fetch(:total_cost, nil),
|
|
337
|
+
tokens: event[:total_tokens],
|
|
338
|
+
latency_ms: event[:latency_ms]
|
|
339
|
+
},
|
|
192
340
|
tags: { provider: event[:provider], model: event[:model] }
|
|
193
341
|
})
|
|
194
342
|
}
|
|
195
343
|
end
|
|
196
344
|
```
|
|
197
345
|
|
|
346
|
+
## OpenAI-Compatible Providers
|
|
347
|
+
|
|
348
|
+
```ruby
|
|
349
|
+
LlmCostTracker.configure do |config|
|
|
350
|
+
# Built in:
|
|
351
|
+
# "openrouter.ai" => "openrouter"
|
|
352
|
+
# "api.deepseek.com" => "deepseek"
|
|
353
|
+
config.openai_compatible_providers["gateway.example.com"] = "internal_gateway"
|
|
354
|
+
end
|
|
355
|
+
```
|
|
356
|
+
|
|
357
|
+
Any configured host is parsed with the OpenAI-compatible usage shape:
|
|
358
|
+
|
|
359
|
+
- `prompt_tokens` / `completion_tokens` / `total_tokens`
|
|
360
|
+
- `input_tokens` / `output_tokens` / `total_tokens`
|
|
361
|
+
- optional cached input details when the response includes them
|
|
362
|
+
|
|
363
|
+
This covers OpenRouter, DeepSeek, and private gateways that expose OpenAI-style Chat Completions, Responses, Completions, or Embeddings endpoints.
|
|
364
|
+
|
|
365
|
+
## Production Checklist
|
|
366
|
+
|
|
367
|
+
- Use `storage_backend = :active_record` in production.
|
|
368
|
+
- Set `monthly_budget` and choose `budget_exceeded_behavior`.
|
|
369
|
+
- Treat `:block_requests` as best-effort in concurrent systems, not a strict quota.
|
|
370
|
+
- Keep `unknown_pricing_behavior = :warn` or `:raise` until pricing overrides are complete.
|
|
371
|
+
- Add `pricing_overrides` for custom, fine-tuned, gateway-specific, or newly released models.
|
|
372
|
+
- Tag calls with `tenant_id`, `user_id`, and `feature` where possible.
|
|
373
|
+
- Check `LlmCostTracker::LlmApiCall.unknown_pricing.group(:model).count` after deploys.
|
|
374
|
+
- Track `latency_ms` and watch `latency_by_model` for slow or degraded providers.
|
|
375
|
+
|
|
376
|
+
## Known Limitations
|
|
377
|
+
|
|
378
|
+
- `:block_requests` is best-effort under concurrency. For hard caps, use an external quota system, provider-side limits, or a gateway-level budget.
|
|
379
|
+
- Streaming/SSE calls are tracked only when Faraday exposes a final response body with usage data. Otherwise the gem warns and skips automatic tracking.
|
|
380
|
+
- Anthropic cache creation TTL variants are not modeled separately yet; 1-hour cache writes may be underestimated compared with the default 5-minute cache write rate.
|
|
381
|
+
- OpenAI reasoning tokens are included in output-token totals when providers report them that way, but separate reasoning-token attribution is not stored yet.
|
|
382
|
+
|
|
198
383
|
## Adding a Custom Provider Parser
|
|
199
384
|
|
|
385
|
+
Use this for providers that are not OpenAI-compatible and return a different usage shape.
|
|
386
|
+
|
|
200
387
|
```ruby
|
|
201
|
-
class
|
|
388
|
+
class AcmeParser < LlmCostTracker::Parsers::Base
|
|
202
389
|
def match?(url)
|
|
203
|
-
url.to_s.include?("api.
|
|
390
|
+
url.to_s.include?("api.acme-llm.example")
|
|
204
391
|
end
|
|
205
392
|
|
|
206
393
|
def parse(request_url, request_body, response_status, response_body)
|
|
@@ -211,16 +398,16 @@ class DeepSeekParser < LlmCostTracker::Parsers::Base
|
|
|
211
398
|
return nil unless usage
|
|
212
399
|
|
|
213
400
|
{
|
|
214
|
-
provider: "
|
|
401
|
+
provider: "acme",
|
|
215
402
|
model: response["model"],
|
|
216
|
-
input_tokens: usage["
|
|
217
|
-
output_tokens: usage["
|
|
403
|
+
input_tokens: usage["input"] || 0,
|
|
404
|
+
output_tokens: usage["output"] || 0
|
|
218
405
|
}
|
|
219
406
|
end
|
|
220
407
|
end
|
|
221
408
|
|
|
222
409
|
# Register it
|
|
223
|
-
LlmCostTracker::Parsers::Registry.register(
|
|
410
|
+
LlmCostTracker::Parsers::Registry.register(AcmeParser.new)
|
|
224
411
|
```
|
|
225
412
|
|
|
226
413
|
## Supported Providers
|
|
@@ -228,6 +415,9 @@ LlmCostTracker::Parsers::Registry.register(DeepSeekParser.new)
|
|
|
228
415
|
| Provider | Auto-detected | Models with pricing |
|
|
229
416
|
|----------|:---:|---|
|
|
230
417
|
| OpenAI | ✅ | GPT-5.2/5.1/5, GPT-5 mini/nano, GPT-4.1, GPT-4o, o1/o3/o4-mini |
|
|
418
|
+
| OpenRouter | ✅ | Uses OpenAI-compatible usage; provider-prefixed OpenAI model IDs are normalized when possible |
|
|
419
|
+
| DeepSeek | ✅ | Uses OpenAI-compatible usage; add `pricing_overrides` for DeepSeek model pricing |
|
|
420
|
+
| OpenAI-compatible hosts | 🔧 | Configure `openai_compatible_providers` |
|
|
231
421
|
| Anthropic | ✅ | Claude Opus 4.6/4.1/4, Sonnet 4.6/4.5/4, Haiku 4.5, Claude 3.x |
|
|
232
422
|
| Google Gemini | ✅ | Gemini 2.5 Pro/Flash/Flash-Lite, 2.0 Flash/Flash-Lite, 1.5 Pro/Flash |
|
|
233
423
|
| Any other | 🔧 | Via custom parser (see above) |
|
|
@@ -235,6 +425,7 @@ LlmCostTracker::Parsers::Registry.register(DeepSeekParser.new)
|
|
|
235
425
|
Supported endpoint families:
|
|
236
426
|
|
|
237
427
|
- OpenAI: Chat Completions, Responses, Completions, Embeddings
|
|
428
|
+
- OpenAI-compatible: Chat Completions, Responses, Completions, Embeddings
|
|
238
429
|
- Anthropic: Messages
|
|
239
430
|
- Google Gemini: `generateContent` responses with `usageMetadata`
|
|
240
431
|
|
|
@@ -251,9 +442,9 @@ Your App → Faraday → [LlmCostTracker Middleware] → LLM API
|
|
|
251
442
|
ActiveRecord / Log / Custom
|
|
252
443
|
```
|
|
253
444
|
|
|
254
|
-
The middleware intercepts **outgoing** HTTP responses (not incoming Rails requests), parses the provider usage object, looks up pricing, and records the event. It never modifies requests or responses.
|
|
445
|
+
The middleware intercepts **outgoing** HTTP responses (not incoming Rails requests), parses the provider usage object, looks up pricing, and records the event. It never modifies requests or responses. Put `llm_cost_tracker` inside the Faraday stack where it can see the final response body; if another middleware consumes or transforms streaming bodies, use manual tracking.
|
|
255
446
|
|
|
256
|
-
For streaming APIs, tracking depends on the final response body including provider usage data. If the client consumes server-sent events without exposing the final usage payload to Faraday, use manual tracking.
|
|
447
|
+
For streaming APIs, tracking depends on the final response body including provider usage data. If the client consumes server-sent events without exposing the final usage payload to Faraday, the gem logs a warning and skips tracking; use manual tracking for those calls.
|
|
257
448
|
|
|
258
449
|
## Development
|
|
259
450
|
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmCostTracker
|
|
4
|
+
class Budget
|
|
5
|
+
class << self
|
|
6
|
+
WARNING_MUTEX = Mutex.new
|
|
7
|
+
|
|
8
|
+
def enforce!
|
|
9
|
+
return unless LlmCostTracker.configuration.monthly_budget
|
|
10
|
+
return unless behavior == :block_requests
|
|
11
|
+
return warn_non_active_record_block_requests unless LlmCostTracker.configuration.active_record?
|
|
12
|
+
|
|
13
|
+
monthly_total = calculate_monthly_total(0)
|
|
14
|
+
return unless monthly_total >= LlmCostTracker.configuration.monthly_budget
|
|
15
|
+
|
|
16
|
+
handle_exceeded(monthly_total: monthly_total)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def check!(event)
|
|
20
|
+
config = LlmCostTracker.configuration
|
|
21
|
+
return unless config.monthly_budget
|
|
22
|
+
return unless event[:cost]
|
|
23
|
+
|
|
24
|
+
monthly_total = calculate_monthly_total(event[:cost][:total_cost])
|
|
25
|
+
return unless monthly_total > config.monthly_budget
|
|
26
|
+
|
|
27
|
+
handle_exceeded(monthly_total: monthly_total, last_event: event)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
private
|
|
31
|
+
|
|
32
|
+
def calculate_monthly_total(latest_cost)
|
|
33
|
+
if LlmCostTracker.configuration.active_record?
|
|
34
|
+
active_record_monthly_total
|
|
35
|
+
else
|
|
36
|
+
latest_cost
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def active_record_monthly_total
|
|
41
|
+
require_relative "llm_api_call" unless defined?(LlmCostTracker::LlmApiCall)
|
|
42
|
+
require_relative "storage/active_record_store" unless defined?(LlmCostTracker::Storage::ActiveRecordStore)
|
|
43
|
+
|
|
44
|
+
LlmCostTracker::Storage::ActiveRecordStore.monthly_total
|
|
45
|
+
rescue LoadError => e
|
|
46
|
+
raise Error, "ActiveRecord storage requires the active_record gem: #{e.message}"
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def warn_non_active_record_block_requests
|
|
50
|
+
should_warn = WARNING_MUTEX.synchronize do
|
|
51
|
+
unless @warned_non_active_record_block_requests
|
|
52
|
+
@warned_non_active_record_block_requests = true
|
|
53
|
+
true
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
return unless should_warn
|
|
57
|
+
|
|
58
|
+
log_warning(":block_requests preflight requires storage_backend = :active_record; request was not blocked.")
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def handle_exceeded(monthly_total:, last_event: nil)
|
|
62
|
+
config = LlmCostTracker.configuration
|
|
63
|
+
payload = {
|
|
64
|
+
monthly_total: monthly_total,
|
|
65
|
+
budget: config.monthly_budget,
|
|
66
|
+
last_event: last_event
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
config.on_budget_exceeded&.call(payload)
|
|
70
|
+
raise BudgetExceededError.new(**payload) if raise_on_exceeded?
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def raise_on_exceeded?
|
|
74
|
+
%i[raise block_requests].include?(behavior)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def behavior
|
|
78
|
+
behavior = (LlmCostTracker.configuration.budget_exceeded_behavior || :notify).to_sym
|
|
79
|
+
return behavior if Configuration::BUDGET_EXCEEDED_BEHAVIORS.include?(behavior)
|
|
80
|
+
|
|
81
|
+
raise Error,
|
|
82
|
+
"Unknown budget_exceeded_behavior: #{behavior.inspect}. " \
|
|
83
|
+
"Use one of: #{Configuration::BUDGET_EXCEEDED_BEHAVIORS.join(', ')}"
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def log_warning(message)
|
|
87
|
+
message = "[LlmCostTracker] #{message}"
|
|
88
|
+
|
|
89
|
+
if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
|
|
90
|
+
Rails.logger.warn(message)
|
|
91
|
+
else
|
|
92
|
+
warn message
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
@@ -2,15 +2,31 @@
|
|
|
2
2
|
|
|
3
3
|
module LlmCostTracker
|
|
4
4
|
class Configuration
|
|
5
|
+
# Hostname => provider name for OpenAI-compatible APIs.
|
|
6
|
+
OPENAI_COMPATIBLE_PROVIDERS = {
|
|
7
|
+
"openrouter.ai" => "openrouter",
|
|
8
|
+
"api.deepseek.com" => "deepseek"
|
|
9
|
+
}.freeze
|
|
10
|
+
|
|
11
|
+
BUDGET_EXCEEDED_BEHAVIORS = %i[notify raise block_requests].freeze
|
|
12
|
+
STORAGE_ERROR_BEHAVIORS = %i[ignore warn raise].freeze
|
|
13
|
+
UNKNOWN_PRICING_BEHAVIORS = %i[ignore warn raise].freeze
|
|
14
|
+
|
|
5
15
|
attr_accessor :enabled,
|
|
6
16
|
:storage_backend, # :log, :active_record, :custom
|
|
7
17
|
:custom_storage, # callable object for :custom backend
|
|
8
18
|
:default_tags, # Hash of default tags added to every event
|
|
9
19
|
:on_budget_exceeded, # callable, receives event hash
|
|
10
20
|
:monthly_budget, # Float, in USD — nil means no limit
|
|
21
|
+
:budget_exceeded_behavior, # :notify, :raise, :block_requests
|
|
22
|
+
:storage_error_behavior, # :ignore, :warn, :raise
|
|
23
|
+
:unknown_pricing_behavior, # :ignore, :warn, :raise
|
|
11
24
|
:log_level, # :debug, :info, :warn
|
|
25
|
+
:prices_file, # JSON/YAML file that overrides built-in prices
|
|
12
26
|
:pricing_overrides # Hash to override built-in pricing
|
|
13
27
|
|
|
28
|
+
attr_reader :openai_compatible_providers
|
|
29
|
+
|
|
14
30
|
def initialize
|
|
15
31
|
@enabled = true
|
|
16
32
|
@storage_backend = :log
|
|
@@ -18,8 +34,21 @@ module LlmCostTracker
|
|
|
18
34
|
@default_tags = {}
|
|
19
35
|
@on_budget_exceeded = nil
|
|
20
36
|
@monthly_budget = nil
|
|
37
|
+
@budget_exceeded_behavior = :notify
|
|
38
|
+
@storage_error_behavior = :warn
|
|
39
|
+
@unknown_pricing_behavior = :warn
|
|
21
40
|
@log_level = :info
|
|
41
|
+
@prices_file = nil
|
|
22
42
|
@pricing_overrides = {}
|
|
43
|
+
self.openai_compatible_providers = OPENAI_COMPATIBLE_PROVIDERS
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def openai_compatible_providers=(providers)
|
|
47
|
+
@openai_compatible_providers = normalize_openai_compatible_providers(providers)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def normalize_openai_compatible_providers!
|
|
51
|
+
self.openai_compatible_providers = openai_compatible_providers
|
|
23
52
|
end
|
|
24
53
|
|
|
25
54
|
def active_record?
|
|
@@ -29,5 +58,13 @@ module LlmCostTracker
|
|
|
29
58
|
def log?
|
|
30
59
|
storage_backend == :log
|
|
31
60
|
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
def normalize_openai_compatible_providers(providers)
|
|
65
|
+
(providers || {}).each_with_object({}) do |(host, provider), normalized|
|
|
66
|
+
normalized[host.to_s.downcase] = provider.to_s
|
|
67
|
+
end
|
|
68
|
+
end
|
|
32
69
|
end
|
|
33
70
|
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmCostTracker
|
|
4
|
+
class Error < StandardError; end
|
|
5
|
+
|
|
6
|
+
class BudgetExceededError < Error
|
|
7
|
+
attr_reader :monthly_total, :budget, :last_event
|
|
8
|
+
|
|
9
|
+
def initialize(monthly_total:, budget:, last_event: nil)
|
|
10
|
+
@monthly_total = monthly_total
|
|
11
|
+
@budget = budget
|
|
12
|
+
@last_event = last_event
|
|
13
|
+
|
|
14
|
+
super("LLM monthly budget exceeded: $#{format('%.6f', monthly_total)} / $#{format('%.6f', budget)}")
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
class UnknownPricingError < Error
|
|
19
|
+
attr_reader :model
|
|
20
|
+
|
|
21
|
+
def initialize(model:)
|
|
22
|
+
@model = model
|
|
23
|
+
|
|
24
|
+
super("No pricing configured for LLM model: #{model.inspect}")
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
class StorageError < Error
|
|
29
|
+
attr_reader :original_error
|
|
30
|
+
|
|
31
|
+
def initialize(original_error)
|
|
32
|
+
@original_error = original_error
|
|
33
|
+
|
|
34
|
+
super("Failed to store LLM cost event: #{original_error.class}: #{original_error.message}")
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmCostTracker
|
|
4
|
+
module EventMetadata
|
|
5
|
+
INTERNAL_TAG_KEYS = %w[
|
|
6
|
+
cache_creation_input_tokens
|
|
7
|
+
cache_creation_tokens
|
|
8
|
+
cache_read_input_tokens
|
|
9
|
+
cache_read_tokens
|
|
10
|
+
cached_input_tokens
|
|
11
|
+
input_tokens
|
|
12
|
+
output_tokens
|
|
13
|
+
reasoning_tokens
|
|
14
|
+
total_tokens
|
|
15
|
+
].freeze
|
|
16
|
+
|
|
17
|
+
class << self
|
|
18
|
+
def usage_data(input_tokens, output_tokens, metadata)
|
|
19
|
+
cache_read_input_tokens = integer_metadata(metadata, :cache_read_input_tokens, :cache_read_tokens)
|
|
20
|
+
cache_creation_input_tokens = integer_metadata(
|
|
21
|
+
metadata,
|
|
22
|
+
:cache_creation_input_tokens,
|
|
23
|
+
:cache_creation_tokens
|
|
24
|
+
)
|
|
25
|
+
cached_input_tokens = integer_metadata(metadata, :cached_input_tokens)
|
|
26
|
+
|
|
27
|
+
{
|
|
28
|
+
input_tokens: input_tokens.to_i,
|
|
29
|
+
output_tokens: output_tokens.to_i,
|
|
30
|
+
cached_input_tokens: cached_input_tokens,
|
|
31
|
+
cache_read_input_tokens: cache_read_input_tokens,
|
|
32
|
+
cache_creation_input_tokens: cache_creation_input_tokens,
|
|
33
|
+
total_tokens: input_tokens.to_i + output_tokens.to_i +
|
|
34
|
+
cache_read_input_tokens + cache_creation_input_tokens
|
|
35
|
+
}
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def tags(metadata)
|
|
39
|
+
metadata.reject { |key, _value| INTERNAL_TAG_KEYS.include?(key.to_s) }
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
private
|
|
43
|
+
|
|
44
|
+
def integer_metadata(metadata, *keys)
|
|
45
|
+
keys.each do |key|
|
|
46
|
+
value = metadata[key] || metadata[key.to_s]
|
|
47
|
+
return value.to_i unless value.nil?
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
0
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "rails/generators"
|
|
4
|
+
require "rails/generators/active_record"
|
|
5
|
+
|
|
6
|
+
module LlmCostTracker
|
|
7
|
+
module Generators
|
|
8
|
+
class AddLatencyMsGenerator < Rails::Generators::Base
|
|
9
|
+
include ActiveRecord::Generators::Migration
|
|
10
|
+
|
|
11
|
+
source_root File.expand_path("templates", __dir__)
|
|
12
|
+
|
|
13
|
+
desc "Creates a migration to add llm_api_calls.latency_ms"
|
|
14
|
+
|
|
15
|
+
def create_migration_file
|
|
16
|
+
migration_template(
|
|
17
|
+
"add_latency_ms_to_llm_api_calls.rb.erb",
|
|
18
|
+
"db/migrate/add_latency_ms_to_llm_api_calls.rb"
|
|
19
|
+
)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
private
|
|
23
|
+
|
|
24
|
+
def migration_version
|
|
25
|
+
"[#{ActiveRecord::VERSION::MAJOR}.#{ActiveRecord::VERSION::MINOR}]"
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
class AddLatencyMsToLlmApiCalls < ActiveRecord::Migration<%= migration_version %>
|
|
2
|
+
def up
|
|
3
|
+
add_column :llm_api_calls, :latency_ms, :integer unless column_exists?(:llm_api_calls, :latency_ms)
|
|
4
|
+
end
|
|
5
|
+
|
|
6
|
+
def down
|
|
7
|
+
remove_column :llm_api_calls, :latency_ms if column_exists?(:llm_api_calls, :latency_ms)
|
|
8
|
+
end
|
|
9
|
+
end
|