llm_cost_tracker 0.7.1 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/README.md +10 -7
- data/lib/llm_cost_tracker/capture/stream_collector.rb +11 -4
- data/lib/llm_cost_tracker/capture/stream_tracker.rb +1 -1
- data/lib/llm_cost_tracker/configuration.rb +5 -1
- data/lib/llm_cost_tracker/integrations/anthropic.rb +25 -8
- data/lib/llm_cost_tracker/integrations/openai.rb +4 -4
- data/lib/llm_cost_tracker/middleware/faraday.rb +56 -13
- data/lib/llm_cost_tracker/parsers/anthropic.rb +35 -13
- data/lib/llm_cost_tracker/parsers/base.rb +2 -2
- data/lib/llm_cost_tracker/parsers/gemini.rb +38 -12
- data/lib/llm_cost_tracker/parsers/openai.rb +27 -5
- data/lib/llm_cost_tracker/parsers/openai_compatible.rb +14 -4
- data/lib/llm_cost_tracker/parsers/openai_usage.rb +41 -13
- data/lib/llm_cost_tracker/prices.json +316 -32
- data/lib/llm_cost_tracker/pricing/effective_prices.rb +23 -17
- data/lib/llm_cost_tracker/pricing/explainer.rb +17 -11
- data/lib/llm_cost_tracker/pricing/lookup.rb +44 -22
- data/lib/llm_cost_tracker/pricing/sync.rb +19 -3
- data/lib/llm_cost_tracker/tracker.rb +6 -4
- data/lib/llm_cost_tracker/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a5d394087953583d254479b4fe162adbb5b5a0f4de09c535428d514a6c623e76
|
|
4
|
+
data.tar.gz: b3269262ceec2e1f622780e3e44ac33adb1df703e077bee43fcddd7c251a21dc
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 93ce84108bea091e89df70b28a192e280a1e3de92bdb14b8d47ca4057527ebcd4ec1ffa25fc37fc5216df4804539f5b1b9483d6f1fb1afdd292fd19e836431e5
|
|
7
|
+
data.tar.gz: f69fed55512f322118e93493b9069821e3cd9b372940b6163b7f80578afc3b95799126b91178b25125c7eba871ebe8b3a8fd32e607f8103649c1f3d4d606923d
|
data/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,21 @@ Format: [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). Versioning: [S
|
|
|
4
4
|
|
|
5
5
|
## [Unreleased]
|
|
6
6
|
|
|
7
|
+
## [0.7.2] - 2026-05-01
|
|
8
|
+
|
|
9
|
+
### Added
|
|
10
|
+
|
|
11
|
+
- Groq auto-detection, price scraping, and bundled production text model prices.
|
|
12
|
+
|
|
13
|
+
### Changed
|
|
14
|
+
|
|
15
|
+
- Bundled prices refreshed from official provider pricing as of 2026-05-01.
|
|
16
|
+
- Bundled prices now include OpenAI Flex/Priority/regional processing, Gemini Flex/Priority, and Anthropic fast/data residency rates.
|
|
17
|
+
|
|
18
|
+
### Fixed
|
|
19
|
+
|
|
20
|
+
- Streaming capture now snapshots tags when the stream starts.
|
|
21
|
+
|
|
7
22
|
## [0.7.1] - 2026-04-30
|
|
8
23
|
|
|
9
24
|
### Changed
|
data/README.md
CHANGED
|
@@ -35,7 +35,7 @@ Drop this into `config/initializers/llm_cost_tracker.rb`:
|
|
|
35
35
|
|
|
36
36
|
```ruby
|
|
37
37
|
LlmCostTracker.configure do |config|
|
|
38
|
-
config.default_tags
|
|
38
|
+
config.default_tags = -> { { environment: Rails.env } }
|
|
39
39
|
config.instrument :openai
|
|
40
40
|
end
|
|
41
41
|
```
|
|
@@ -78,7 +78,7 @@ Drop-in for RubyLLM and the official `openai` and `anthropic` gems. `config.inst
|
|
|
78
78
|
|
|
79
79
|
```ruby
|
|
80
80
|
LlmCostTracker.configure do |config|
|
|
81
|
-
config.instrument :openai
|
|
81
|
+
config.instrument :openai # or :anthropic / :ruby_llm
|
|
82
82
|
end
|
|
83
83
|
|
|
84
84
|
LlmCostTracker.with_tags(feature: "support_chat") do
|
|
@@ -98,7 +98,7 @@ This patches **only** RubyLLM and the official Ruby SDKs. `ruby-openai` (alexrud
|
|
|
98
98
|
|
|
99
99
|
### 2. Faraday middleware
|
|
100
100
|
|
|
101
|
-
For `ruby-openai`, the Gemini REST API, custom Faraday clients, or anything OpenAI-compatible (OpenRouter, DeepSeek, LiteLLM proxies):
|
|
101
|
+
For `ruby-openai`, the Gemini REST API, custom Faraday clients, or anything OpenAI-compatible (OpenRouter, DeepSeek, Groq, LiteLLM proxies):
|
|
102
102
|
|
|
103
103
|
```ruby
|
|
104
104
|
conn = Faraday.new(url: "https://api.openai.com") do |f|
|
|
@@ -137,13 +137,15 @@ For streaming the same way, `track_stream` accepts a block, parses provider even
|
|
|
137
137
|
Tags answer the only question that matters in attribution: which feature, which user, which job, which tenant. They're free-form strings, stored as JSONB on PostgreSQL or JSON on MySQL, and queryable from both Ruby and the dashboard.
|
|
138
138
|
|
|
139
139
|
```ruby
|
|
140
|
-
LlmCostTracker.with_tags(user_id: current_user.id, feature: "support_chat"
|
|
140
|
+
LlmCostTracker.with_tags(user_id: current_user.id, feature: "support_chat") do
|
|
141
141
|
client.chat(parameters: { model: "gpt-4o", messages: [...] })
|
|
142
142
|
end
|
|
143
143
|
```
|
|
144
144
|
|
|
145
145
|
`with_tags` is thread- and fiber-isolated, so concurrent requests in Puma or jobs in Sidekiq don't bleed into each other. A `default_tags` callable on configuration runs on every event for things you always want — `environment`, `region`, deployment SHA. Explicit tags passed to `track` win over scoped tags, scoped tags win over defaults.
|
|
146
146
|
|
|
147
|
+
Streaming capture snapshots tags when the stream starts, so attribution survives delayed or cross-thread stream consumption.
|
|
148
|
+
|
|
147
149
|
What you put in tags is **your** input — they're queryable strings. Don't put prompts, completions, emails, or secrets there. Use IDs.
|
|
148
150
|
|
|
149
151
|
## Pricing
|
|
@@ -184,7 +186,7 @@ Budgets are guardrails, not transactional caps:
|
|
|
184
186
|
config.monthly_budget = 500.00
|
|
185
187
|
config.daily_budget = 50.00
|
|
186
188
|
config.per_call_budget = 2.00
|
|
187
|
-
config.budget_exceeded_behavior = :block_requests
|
|
189
|
+
config.budget_exceeded_behavior = :block_requests # or :notify, :raise
|
|
188
190
|
config.on_budget_exceeded = ->(data) { SlackNotifier.notify("#alerts", "...") }
|
|
189
191
|
```
|
|
190
192
|
|
|
@@ -233,6 +235,7 @@ Auth is your job. Examples for basic auth and Devise: [`docs/dashboard.md`](docs
|
|
|
233
235
|
| Google Gemini | Yes | Gemini 2.5 Pro/Flash/Flash-Lite, 2.0 Flash/Flash-Lite |
|
|
234
236
|
| OpenRouter | Yes | OpenAI-compatible usage; provider-prefixed model IDs are normalized |
|
|
235
237
|
| DeepSeek | Yes | OpenAI-compatible usage; add `pricing_overrides` for DeepSeek-specific rates |
|
|
238
|
+
| Groq | Yes | OpenAI-compatible usage with bundled prices for production text models |
|
|
236
239
|
| Other OpenAI-compatible hosts | Configurable | Register the host via `config.openai_compatible_providers` |
|
|
237
240
|
| Anything else | Manual | Use `LlmCostTracker.track` / `track_stream` |
|
|
238
241
|
|
|
@@ -275,10 +278,10 @@ is still brief.
|
|
|
275
278
|
|
|
276
279
|
```bash
|
|
277
280
|
bundle install
|
|
278
|
-
bin/check
|
|
281
|
+
bin/check # rubocop + rspec + coverage gate
|
|
279
282
|
```
|
|
280
283
|
|
|
281
|
-
Architecture rules and conventions for contributions live in [`
|
|
284
|
+
Architecture rules and conventions for contributions live in [`docs/architecture.md`](docs/architecture.md).
|
|
282
285
|
|
|
283
286
|
## License
|
|
284
287
|
|
|
@@ -10,13 +10,15 @@ module LlmCostTracker
|
|
|
10
10
|
class StreamCollector
|
|
11
11
|
attr_reader :provider
|
|
12
12
|
|
|
13
|
-
def initialize(provider:, model:, latency_ms: nil, provider_response_id: nil, pricing_mode: nil, metadata: {}
|
|
13
|
+
def initialize(provider:, model:, latency_ms: nil, provider_response_id: nil, pricing_mode: nil, metadata: {},
|
|
14
|
+
context_tags: nil)
|
|
14
15
|
@provider = provider.to_s
|
|
15
16
|
@model = model
|
|
16
17
|
@latency_ms = latency_ms
|
|
17
18
|
@provider_response_id = provider_response_id
|
|
18
19
|
@pricing_mode = pricing_mode
|
|
19
20
|
@metadata = (metadata || {}).deep_dup
|
|
21
|
+
@context_tags = (context_tags || LlmCostTracker::Tags::Context.tags).deep_dup
|
|
20
22
|
@events = []
|
|
21
23
|
@captured_bytes = 0
|
|
22
24
|
@overflowed = false
|
|
@@ -85,7 +87,8 @@ module LlmCostTracker
|
|
|
85
87
|
latency_ms: @latency_ms,
|
|
86
88
|
provider_response_id: @provider_response_id,
|
|
87
89
|
pricing_mode: @pricing_mode,
|
|
88
|
-
metadata: @metadata.deep_dup
|
|
90
|
+
metadata: @metadata.deep_dup,
|
|
91
|
+
context_tags: @context_tags.deep_dup
|
|
89
92
|
}
|
|
90
93
|
end
|
|
91
94
|
|
|
@@ -98,7 +101,8 @@ module LlmCostTracker
|
|
|
98
101
|
latency_ms: snapshot[:latency_ms] ||
|
|
99
102
|
((Process.clock_gettime(Process::CLOCK_MONOTONIC) - @started_at) * 1000).round,
|
|
100
103
|
pricing_mode: snapshot[:pricing_mode],
|
|
101
|
-
metadata: (errored ? { stream_errored: true } : {}).merge(snapshot[:metadata])
|
|
104
|
+
metadata: (errored ? { stream_errored: true } : {}).merge(snapshot[:metadata]),
|
|
105
|
+
context_tags: snapshot[:context_tags]
|
|
102
106
|
)
|
|
103
107
|
end
|
|
104
108
|
|
|
@@ -114,7 +118,10 @@ module LlmCostTracker
|
|
|
114
118
|
return build_from_explicit_usage(snapshot) if snapshot[:explicit_usage]
|
|
115
119
|
return build_unknown_usage(snapshot) if snapshot[:overflowed]
|
|
116
120
|
|
|
117
|
-
capture = Parsers.find_for_provider(@provider)&.parse_stream(
|
|
121
|
+
capture = Parsers.find_for_provider(@provider)&.parse_stream(
|
|
122
|
+
response_status: 200,
|
|
123
|
+
events: snapshot[:events]
|
|
124
|
+
)
|
|
118
125
|
if capture
|
|
119
126
|
model = present_model(capture.model) || present_model(snapshot[:model]) || UsageCapture::UNKNOWN_MODEL
|
|
120
127
|
return capture.with(provider: @provider, model: model)
|
|
@@ -8,7 +8,7 @@ require_relative "../logging"
|
|
|
8
8
|
module LlmCostTracker
|
|
9
9
|
module Capture
|
|
10
10
|
class StreamTracker
|
|
11
|
-
def initialize(stream
|
|
11
|
+
def initialize(stream:, collector:, active:, finish: nil)
|
|
12
12
|
@stream = stream
|
|
13
13
|
@collector = collector
|
|
14
14
|
@active = active
|
|
@@ -8,7 +8,11 @@ module LlmCostTracker
|
|
|
8
8
|
class Configuration
|
|
9
9
|
include ConfigurationInstrumentation
|
|
10
10
|
|
|
11
|
-
OPENAI_COMPATIBLE_PROVIDERS = {
|
|
11
|
+
OPENAI_COMPATIBLE_PROVIDERS = {
|
|
12
|
+
"openrouter.ai" => "openrouter",
|
|
13
|
+
"api.deepseek.com" => "deepseek",
|
|
14
|
+
"api.groq.com" => "groq"
|
|
15
|
+
}.freeze
|
|
12
16
|
|
|
13
17
|
BUDGET_EXCEEDED_BEHAVIORS = %i[notify raise block_requests].freeze
|
|
14
18
|
UNKNOWN_PRICING_BEHAVIORS = %i[ignore warn raise].freeze
|
|
@@ -49,9 +49,8 @@ module LlmCostTracker
|
|
|
49
49
|
capture: UsageCapture.build(
|
|
50
50
|
provider: "anthropic",
|
|
51
51
|
model: object_value(message, :model) || request[:model],
|
|
52
|
-
pricing_mode:
|
|
53
|
-
|
|
54
|
-
token_usage: token_usage(usage, input_tokens, output_tokens),
|
|
52
|
+
pricing_mode: pricing_mode(message: message, request: request, usage: usage),
|
|
53
|
+
token_usage: token_usage(usage: usage, input_tokens: input_tokens, output_tokens: output_tokens),
|
|
55
54
|
usage_source: :sdk_response,
|
|
56
55
|
provider_response_id: object_value(message, :id)
|
|
57
56
|
),
|
|
@@ -60,7 +59,7 @@ module LlmCostTracker
|
|
|
60
59
|
end
|
|
61
60
|
end
|
|
62
61
|
|
|
63
|
-
def token_usage(usage
|
|
62
|
+
def token_usage(usage:, input_tokens:, output_tokens:)
|
|
64
63
|
cache_write_1h = object_dig(usage, :cache_creation, :ephemeral_1h_input_tokens).to_i
|
|
65
64
|
cache_write_5m = object_dig(usage, :cache_creation, :ephemeral_5m_input_tokens)
|
|
66
65
|
cache_write = if cache_write_5m.nil?
|
|
@@ -84,14 +83,32 @@ module LlmCostTracker
|
|
|
84
83
|
)
|
|
85
84
|
end
|
|
86
85
|
|
|
86
|
+
def pricing_mode(message:, request:, usage:)
|
|
87
|
+
modes = [
|
|
88
|
+
Pricing.normalize_mode(object_value(usage, :speed) || object_value(message, :speed) || request[:speed]),
|
|
89
|
+
Pricing.normalize_mode(
|
|
90
|
+
object_value(usage, :service_tier) || object_value(message, :service_tier) || request[:service_tier]
|
|
91
|
+
)
|
|
92
|
+
]
|
|
93
|
+
modes << "data_residency" if inference_geo(message: message, request: request, usage: usage).to_s == "us"
|
|
94
|
+
modes = modes.compact.uniq
|
|
95
|
+
modes.empty? ? nil : modes.join("_")
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def inference_geo(message:, request:, usage:)
|
|
99
|
+
object_value(usage, :inference_geo) ||
|
|
100
|
+
object_value(message, :inference_geo) ||
|
|
101
|
+
request[:inference_geo]
|
|
102
|
+
end
|
|
103
|
+
|
|
87
104
|
def track_stream(stream, collector:)
|
|
88
105
|
return stream unless active?
|
|
89
106
|
|
|
90
107
|
LlmCostTracker::Capture::StreamTracker.new(
|
|
91
|
-
stream,
|
|
92
|
-
collector,
|
|
93
|
-
-> { active? },
|
|
94
|
-
->(errored:) { finish_stream(collector, errored: errored) }
|
|
108
|
+
stream: stream,
|
|
109
|
+
collector: collector,
|
|
110
|
+
active: -> { active? },
|
|
111
|
+
finish: ->(errored:) { finish_stream(collector, errored: errored) }
|
|
95
112
|
).wrap
|
|
96
113
|
end
|
|
97
114
|
|
|
@@ -90,10 +90,10 @@ module LlmCostTracker
|
|
|
90
90
|
return stream unless active?
|
|
91
91
|
|
|
92
92
|
LlmCostTracker::Capture::StreamTracker.new(
|
|
93
|
-
stream,
|
|
94
|
-
collector,
|
|
95
|
-
-> { active? },
|
|
96
|
-
->(errored:) { finish_stream(collector, errored: errored) }
|
|
93
|
+
stream: stream,
|
|
94
|
+
collector: collector,
|
|
95
|
+
active: -> { active? },
|
|
96
|
+
finish: ->(errored:) { finish_stream(collector, errored: errored) }
|
|
97
97
|
).wrap
|
|
98
98
|
end
|
|
99
99
|
|
|
@@ -25,40 +25,54 @@ module LlmCostTracker
|
|
|
25
25
|
stream_buffer = install_stream_tap(request_env) if streaming
|
|
26
26
|
|
|
27
27
|
Tracker.enforce_budget! if parser
|
|
28
|
+
context_tags, metadata = tag_snapshot(request_env) if parser
|
|
28
29
|
started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
29
30
|
|
|
30
31
|
@app.call(request_env).on_complete do |response_env|
|
|
31
32
|
process(
|
|
32
33
|
parser: parser,
|
|
33
|
-
request_env: request_env,
|
|
34
34
|
request_url: request_url,
|
|
35
35
|
request_body: request_body,
|
|
36
36
|
response_env: response_env,
|
|
37
37
|
latency_ms: ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_at) * 1000).round,
|
|
38
38
|
streaming: streaming,
|
|
39
|
-
stream_buffer: stream_buffer
|
|
39
|
+
stream_buffer: stream_buffer,
|
|
40
|
+
context_tags: context_tags,
|
|
41
|
+
metadata: metadata
|
|
40
42
|
)
|
|
41
43
|
end
|
|
42
44
|
end
|
|
43
45
|
|
|
44
46
|
private
|
|
45
47
|
|
|
46
|
-
def process(parser:,
|
|
47
|
-
latency_ms:, streaming:, stream_buffer:)
|
|
48
|
+
def process(parser:, request_url:, request_body:, response_env:,
|
|
49
|
+
latency_ms:, streaming:, stream_buffer:, context_tags:, metadata:)
|
|
48
50
|
return unless parser
|
|
49
51
|
|
|
50
52
|
parsed =
|
|
51
53
|
if streaming
|
|
52
|
-
parse_stream(
|
|
54
|
+
parse_stream(
|
|
55
|
+
parser: parser,
|
|
56
|
+
request_url: request_url,
|
|
57
|
+
request_body: request_body,
|
|
58
|
+
response_env: response_env,
|
|
59
|
+
stream_buffer: stream_buffer
|
|
60
|
+
)
|
|
53
61
|
else
|
|
54
|
-
parse_response(
|
|
62
|
+
parse_response(
|
|
63
|
+
parser: parser,
|
|
64
|
+
request_url: request_url,
|
|
65
|
+
request_body: request_body,
|
|
66
|
+
response_env: response_env
|
|
67
|
+
)
|
|
55
68
|
end
|
|
56
69
|
return unless parsed
|
|
57
70
|
|
|
58
71
|
Tracker.record(
|
|
59
72
|
capture: parsed,
|
|
60
73
|
latency_ms: latency_ms,
|
|
61
|
-
metadata:
|
|
74
|
+
metadata: metadata,
|
|
75
|
+
context_tags: context_tags
|
|
62
76
|
)
|
|
63
77
|
rescue LlmCostTracker::Error
|
|
64
78
|
raise
|
|
@@ -66,7 +80,7 @@ module LlmCostTracker
|
|
|
66
80
|
Logging.warn("Error processing response: #{e.class}: #{e.message}")
|
|
67
81
|
end
|
|
68
82
|
|
|
69
|
-
def parse_response(parser
|
|
83
|
+
def parse_response(parser:, request_url:, request_body:, response_env:)
|
|
70
84
|
response_body = read_body(response_env.body)
|
|
71
85
|
unless response_body
|
|
72
86
|
Logging.warn(
|
|
@@ -77,13 +91,24 @@ module LlmCostTracker
|
|
|
77
91
|
return nil
|
|
78
92
|
end
|
|
79
93
|
|
|
80
|
-
parser.parse(
|
|
94
|
+
parser.parse(
|
|
95
|
+
request_url: request_url,
|
|
96
|
+
request_body: request_body,
|
|
97
|
+
response_status: response_env.status,
|
|
98
|
+
response_body: response_body,
|
|
99
|
+
response_headers: response_env.response_headers
|
|
100
|
+
)
|
|
81
101
|
end
|
|
82
102
|
|
|
83
|
-
def parse_stream(parser
|
|
103
|
+
def parse_stream(parser:, request_url:, request_body:, response_env:, stream_buffer:)
|
|
84
104
|
if stream_buffer&.dig(:overflowed)
|
|
85
105
|
Logging.warn(capture_warning(request_url, stream_buffer))
|
|
86
|
-
return parser.parse_stream(
|
|
106
|
+
return parser.parse_stream(
|
|
107
|
+
request_url: request_url,
|
|
108
|
+
request_body: request_body,
|
|
109
|
+
response_status: response_env.status,
|
|
110
|
+
response_headers: response_env.response_headers
|
|
111
|
+
)
|
|
87
112
|
end
|
|
88
113
|
|
|
89
114
|
body = stream_buffer&.dig(:buffer)&.string
|
|
@@ -91,11 +116,22 @@ module LlmCostTracker
|
|
|
91
116
|
|
|
92
117
|
if body.blank?
|
|
93
118
|
Logging.warn(capture_warning(request_url, stream_buffer))
|
|
94
|
-
return parser.parse_stream(
|
|
119
|
+
return parser.parse_stream(
|
|
120
|
+
request_url: request_url,
|
|
121
|
+
request_body: request_body,
|
|
122
|
+
response_status: response_env.status,
|
|
123
|
+
response_headers: response_env.response_headers
|
|
124
|
+
)
|
|
95
125
|
end
|
|
96
126
|
|
|
97
127
|
events = Parsers::SSE.parse(body)
|
|
98
|
-
parser.parse_stream(
|
|
128
|
+
parser.parse_stream(
|
|
129
|
+
request_url: request_url,
|
|
130
|
+
request_body: request_body,
|
|
131
|
+
response_status: response_env.status,
|
|
132
|
+
events: events,
|
|
133
|
+
response_headers: response_env.response_headers
|
|
134
|
+
)
|
|
99
135
|
end
|
|
100
136
|
|
|
101
137
|
def install_stream_tap(request_env)
|
|
@@ -147,6 +183,13 @@ module LlmCostTracker
|
|
|
147
183
|
tags.to_h
|
|
148
184
|
end
|
|
149
185
|
|
|
186
|
+
def tag_snapshot(request_env)
|
|
187
|
+
[LlmCostTracker::Tags::Context.tags, resolved_tags(request_env)]
|
|
188
|
+
rescue StandardError => e
|
|
189
|
+
Logging.warn("Error resolving request tags: #{e.class}: #{e.message}")
|
|
190
|
+
[{}, {}]
|
|
191
|
+
end
|
|
192
|
+
|
|
150
193
|
def capture_warning(request_url, stream_buffer)
|
|
151
194
|
unless stream_buffer&.dig(:overflowed)
|
|
152
195
|
return "Unable to capture streaming response for #{request_url_label(request_url)}; " \
|
|
@@ -15,7 +15,7 @@ module LlmCostTracker
|
|
|
15
15
|
%w[anthropic]
|
|
16
16
|
end
|
|
17
17
|
|
|
18
|
-
def parse(
|
|
18
|
+
def parse(request_body:, response_status:, response_body:, **)
|
|
19
19
|
return nil unless response_status == 200
|
|
20
20
|
|
|
21
21
|
response = safe_json_parse(response_body)
|
|
@@ -28,14 +28,14 @@ module LlmCostTracker
|
|
|
28
28
|
UsageCapture.build(
|
|
29
29
|
provider: "anthropic",
|
|
30
30
|
provider_response_id: response["id"],
|
|
31
|
-
pricing_mode: pricing_mode(request, response, usage),
|
|
31
|
+
pricing_mode: pricing_mode(request: request, response: response, usage: usage),
|
|
32
32
|
model: response["model"] || request["model"],
|
|
33
|
-
token_usage: token_usage(usage, cache_read),
|
|
33
|
+
token_usage: token_usage(usage: usage, cache_read: cache_read),
|
|
34
34
|
usage_source: :response
|
|
35
35
|
)
|
|
36
36
|
end
|
|
37
37
|
|
|
38
|
-
def parse_stream(
|
|
38
|
+
def parse_stream(response_status:, request_body: nil, events: [], **)
|
|
39
39
|
return nil unless response_status == 200
|
|
40
40
|
|
|
41
41
|
request = safe_json_parse(request_body)
|
|
@@ -44,13 +44,18 @@ module LlmCostTracker
|
|
|
44
44
|
response_id = find_event_value(events) { |data| data.dig("message", "id") || data["id"] }
|
|
45
45
|
|
|
46
46
|
if usage
|
|
47
|
-
build_stream_result(
|
|
47
|
+
build_stream_result(
|
|
48
|
+
model: model,
|
|
49
|
+
usage: usage,
|
|
50
|
+
response_id: response_id,
|
|
51
|
+
pricing_mode: pricing_mode(request: request, response: nil, usage: usage)
|
|
52
|
+
)
|
|
48
53
|
else
|
|
49
54
|
build_unknown_stream_usage(
|
|
50
55
|
provider: "anthropic",
|
|
51
56
|
model: model,
|
|
52
57
|
provider_response_id: response_id,
|
|
53
|
-
pricing_mode: pricing_mode(request, nil, usage)
|
|
58
|
+
pricing_mode: pricing_mode(request: request, response: nil, usage: usage)
|
|
54
59
|
)
|
|
55
60
|
end
|
|
56
61
|
end
|
|
@@ -72,7 +77,7 @@ module LlmCostTracker
|
|
|
72
77
|
end
|
|
73
78
|
end
|
|
74
79
|
|
|
75
|
-
def build_stream_result(model
|
|
80
|
+
def build_stream_result(model:, usage:, response_id:, pricing_mode:)
|
|
76
81
|
cache_read = usage["cache_read_input_tokens"].to_i
|
|
77
82
|
|
|
78
83
|
UsageCapture.build(
|
|
@@ -80,13 +85,13 @@ module LlmCostTracker
|
|
|
80
85
|
provider_response_id: response_id,
|
|
81
86
|
pricing_mode: pricing_mode,
|
|
82
87
|
model: model,
|
|
83
|
-
token_usage: token_usage(usage, cache_read),
|
|
88
|
+
token_usage: token_usage(usage: usage, cache_read: cache_read),
|
|
84
89
|
stream: true,
|
|
85
90
|
usage_source: :stream_final
|
|
86
91
|
)
|
|
87
92
|
end
|
|
88
93
|
|
|
89
|
-
def token_usage(usage
|
|
94
|
+
def token_usage(usage:, cache_read:)
|
|
90
95
|
input = usage["input_tokens"].to_i
|
|
91
96
|
output = usage["output_tokens"].to_i
|
|
92
97
|
cache_creation = usage["cache_creation"]
|
|
@@ -108,10 +113,27 @@ module LlmCostTracker
|
|
|
108
113
|
)
|
|
109
114
|
end
|
|
110
115
|
|
|
111
|
-
def pricing_mode(request
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
116
|
+
def pricing_mode(request:, response:, usage:)
|
|
117
|
+
modes = []
|
|
118
|
+
speed = usage&.fetch("speed", nil) || response&.fetch("speed", nil) || request["speed"]
|
|
119
|
+
service_tier = usage&.fetch("service_tier", nil) ||
|
|
120
|
+
response&.fetch("service_tier", nil) ||
|
|
121
|
+
request["service_tier"]
|
|
122
|
+
|
|
123
|
+
modes << Pricing.normalize_mode(speed)
|
|
124
|
+
modes << Pricing.normalize_mode(service_tier)
|
|
125
|
+
modes << "data_residency" if inference_geo(request: request, response: response, usage: usage) == "us"
|
|
126
|
+
|
|
127
|
+
modes = modes.compact.uniq
|
|
128
|
+
modes.empty? ? nil : modes.join("_")
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def inference_geo(request:, response:, usage:)
|
|
132
|
+
(
|
|
133
|
+
usage&.fetch("inference_geo", nil) ||
|
|
134
|
+
response&.fetch("inference_geo", nil) ||
|
|
135
|
+
request["inference_geo"]
|
|
136
|
+
).to_s
|
|
115
137
|
end
|
|
116
138
|
end
|
|
117
139
|
end
|
|
@@ -7,7 +7,7 @@ require "uri"
|
|
|
7
7
|
module LlmCostTracker
|
|
8
8
|
module Parsers
|
|
9
9
|
class Base
|
|
10
|
-
def parse(
|
|
10
|
+
def parse(**)
|
|
11
11
|
raise NotImplementedError
|
|
12
12
|
end
|
|
13
13
|
|
|
@@ -28,7 +28,7 @@ module LlmCostTracker
|
|
|
28
28
|
request.is_a?(Hash) && request["stream"] == true
|
|
29
29
|
end
|
|
30
30
|
|
|
31
|
-
def parse_stream(
|
|
31
|
+
def parse_stream(**)
|
|
32
32
|
nil
|
|
33
33
|
end
|
|
34
34
|
|
|
@@ -23,58 +23,66 @@ module LlmCostTracker
|
|
|
23
23
|
super
|
|
24
24
|
end
|
|
25
25
|
|
|
26
|
-
def parse(request_url
|
|
26
|
+
def parse(request_url:, request_body:, response_status:, response_body:, response_headers: nil)
|
|
27
27
|
return nil unless response_status == 200
|
|
28
28
|
|
|
29
29
|
response = safe_json_parse(response_body)
|
|
30
30
|
usage = response["usageMetadata"]
|
|
31
31
|
return nil unless usage
|
|
32
32
|
|
|
33
|
+
request = safe_json_parse(request_body)
|
|
33
34
|
build_usage_capture(
|
|
34
|
-
request_url,
|
|
35
|
-
usage,
|
|
35
|
+
request_url: request_url,
|
|
36
|
+
usage: usage,
|
|
36
37
|
usage_source: :response,
|
|
37
|
-
provider_response_id: response["responseId"]
|
|
38
|
+
provider_response_id: response["responseId"],
|
|
39
|
+
pricing_mode: pricing_mode(request: request, response_headers: response_headers)
|
|
38
40
|
)
|
|
39
41
|
end
|
|
40
42
|
|
|
41
|
-
def parse_stream(request_url,
|
|
43
|
+
def parse_stream(response_status:, request_url: nil, request_body: nil, events: [], response_headers: nil)
|
|
42
44
|
return nil unless response_status == 200
|
|
43
45
|
|
|
46
|
+
request = safe_json_parse(request_body)
|
|
44
47
|
usage = merged_stream_usage(events)
|
|
45
48
|
model = extract_model_from_url(request_url)
|
|
46
49
|
response_id = stream_response_id(events)
|
|
50
|
+
mode = pricing_mode(request: request, response_headers: response_headers)
|
|
47
51
|
|
|
48
52
|
if usage
|
|
49
53
|
build_usage_capture(
|
|
50
|
-
request_url,
|
|
51
|
-
usage,
|
|
54
|
+
request_url: request_url,
|
|
55
|
+
usage: usage,
|
|
52
56
|
stream: true,
|
|
53
57
|
usage_source: :stream_final,
|
|
54
|
-
provider_response_id: response_id
|
|
58
|
+
provider_response_id: response_id,
|
|
59
|
+
pricing_mode: mode
|
|
55
60
|
)
|
|
56
61
|
else
|
|
57
62
|
build_unknown_stream_usage(
|
|
58
63
|
provider: "gemini",
|
|
59
64
|
model: model,
|
|
60
|
-
provider_response_id: response_id
|
|
65
|
+
provider_response_id: response_id,
|
|
66
|
+
pricing_mode: mode
|
|
61
67
|
)
|
|
62
68
|
end
|
|
63
69
|
end
|
|
64
70
|
|
|
65
71
|
private
|
|
66
72
|
|
|
67
|
-
def build_usage_capture(request_url
|
|
73
|
+
def build_usage_capture(request_url:, usage:, usage_source:, stream: false, provider_response_id: nil,
|
|
74
|
+
pricing_mode: nil)
|
|
68
75
|
cache_read = usage["cachedContentTokenCount"].to_i
|
|
69
76
|
tool_use_prompt = usage["toolUsePromptTokenCount"].to_i
|
|
70
77
|
|
|
71
78
|
UsageCapture.build(
|
|
72
79
|
provider: "gemini",
|
|
73
80
|
model: extract_model_from_url(request_url),
|
|
81
|
+
pricing_mode: pricing_mode,
|
|
74
82
|
token_usage: TokenUsage.build(
|
|
75
83
|
input_tokens: [usage["promptTokenCount"].to_i - cache_read, 0].max + tool_use_prompt,
|
|
76
84
|
output_tokens: output_tokens(usage),
|
|
77
|
-
total_tokens: total_tokens(usage, cache_read, tool_use_prompt),
|
|
85
|
+
total_tokens: total_tokens(usage: usage, cache_read: cache_read, tool_use_prompt: tool_use_prompt),
|
|
78
86
|
cache_read_input_tokens: usage["cachedContentTokenCount"],
|
|
79
87
|
hidden_output_tokens: usage["thoughtsTokenCount"]
|
|
80
88
|
),
|
|
@@ -95,7 +103,7 @@ module LlmCostTracker
|
|
|
95
103
|
usage["candidatesTokenCount"].to_i + usage["thoughtsTokenCount"].to_i
|
|
96
104
|
end
|
|
97
105
|
|
|
98
|
-
def total_tokens(usage
|
|
106
|
+
def total_tokens(usage:, cache_read:, tool_use_prompt:)
|
|
99
107
|
total = usage["totalTokenCount"]
|
|
100
108
|
return total.to_i unless total.nil?
|
|
101
109
|
|
|
@@ -113,6 +121,24 @@ module LlmCostTracker
|
|
|
113
121
|
match = uri.path.match(%r{/models/([^/:]+)})
|
|
114
122
|
match && match[1]
|
|
115
123
|
end
|
|
124
|
+
|
|
125
|
+
def pricing_mode(request:, response_headers:)
|
|
126
|
+
response_tier = response_header(response_headers, "x-gemini-service-tier")
|
|
127
|
+
response_mode = Pricing.normalize_mode(response_tier)
|
|
128
|
+
return response_mode if response_mode
|
|
129
|
+
|
|
130
|
+
request_mode = Pricing.normalize_mode(
|
|
131
|
+
request["service_tier"] ||
|
|
132
|
+
request["serviceTier"] ||
|
|
133
|
+
request.dig("config", "service_tier") ||
|
|
134
|
+
request.dig("config", "serviceTier")
|
|
135
|
+
)
|
|
136
|
+
request_mode == "flex" ? request_mode : nil
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def response_header(headers, name)
|
|
140
|
+
headers.to_h.find { |key, _value| key.to_s.downcase == name }&.last
|
|
141
|
+
end
|
|
116
142
|
end
|
|
117
143
|
end
|
|
118
144
|
end
|