llm_cost_tracker 0.7.1 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -0
- data/README.md +16 -9
- data/app/models/llm_cost_tracker/ledger/call.rb +1 -1
- data/app/models/llm_cost_tracker/ledger/call_metrics.rb +1 -1
- data/app/services/llm_cost_tracker/dashboard/data_quality.rb +9 -9
- data/lib/llm_cost_tracker/capture/stream_collector.rb +11 -4
- data/lib/llm_cost_tracker/capture/stream_tracker.rb +1 -1
- data/lib/llm_cost_tracker/configuration.rb +5 -1
- data/lib/llm_cost_tracker/integrations/anthropic.rb +25 -8
- data/lib/llm_cost_tracker/integrations/openai.rb +4 -4
- data/lib/llm_cost_tracker/ledger/rollups/upsert_sql.rb +4 -10
- data/lib/llm_cost_tracker/ledger/rollups.rb +7 -7
- data/lib/llm_cost_tracker/ledger/store.rb +22 -13
- data/lib/llm_cost_tracker/ledger/tags/query.rb +5 -5
- data/lib/llm_cost_tracker/ledger/tags/sql.rb +8 -7
- data/lib/llm_cost_tracker/middleware/faraday.rb +56 -13
- data/lib/llm_cost_tracker/parsers/anthropic.rb +35 -13
- data/lib/llm_cost_tracker/parsers/base.rb +2 -2
- data/lib/llm_cost_tracker/parsers/gemini.rb +39 -13
- data/lib/llm_cost_tracker/parsers/openai.rb +27 -5
- data/lib/llm_cost_tracker/parsers/openai_compatible.rb +14 -4
- data/lib/llm_cost_tracker/parsers/openai_usage.rb +41 -13
- data/lib/llm_cost_tracker/prices.json +316 -32
- data/lib/llm_cost_tracker/pricing/effective_prices.rb +23 -17
- data/lib/llm_cost_tracker/pricing/explainer.rb +17 -11
- data/lib/llm_cost_tracker/pricing/lookup.rb +44 -22
- data/lib/llm_cost_tracker/pricing/sync.rb +19 -3
- data/lib/llm_cost_tracker/tracker.rb +6 -4
- data/lib/llm_cost_tracker/version.rb +1 -1
- metadata +2 -2
|
@@ -25,40 +25,54 @@ module LlmCostTracker
|
|
|
25
25
|
stream_buffer = install_stream_tap(request_env) if streaming
|
|
26
26
|
|
|
27
27
|
Tracker.enforce_budget! if parser
|
|
28
|
+
context_tags, metadata = tag_snapshot(request_env) if parser
|
|
28
29
|
started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
29
30
|
|
|
30
31
|
@app.call(request_env).on_complete do |response_env|
|
|
31
32
|
process(
|
|
32
33
|
parser: parser,
|
|
33
|
-
request_env: request_env,
|
|
34
34
|
request_url: request_url,
|
|
35
35
|
request_body: request_body,
|
|
36
36
|
response_env: response_env,
|
|
37
37
|
latency_ms: ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_at) * 1000).round,
|
|
38
38
|
streaming: streaming,
|
|
39
|
-
stream_buffer: stream_buffer
|
|
39
|
+
stream_buffer: stream_buffer,
|
|
40
|
+
context_tags: context_tags,
|
|
41
|
+
metadata: metadata
|
|
40
42
|
)
|
|
41
43
|
end
|
|
42
44
|
end
|
|
43
45
|
|
|
44
46
|
private
|
|
45
47
|
|
|
46
|
-
def process(parser:,
|
|
47
|
-
latency_ms:, streaming:, stream_buffer:)
|
|
48
|
+
def process(parser:, request_url:, request_body:, response_env:,
|
|
49
|
+
latency_ms:, streaming:, stream_buffer:, context_tags:, metadata:)
|
|
48
50
|
return unless parser
|
|
49
51
|
|
|
50
52
|
parsed =
|
|
51
53
|
if streaming
|
|
52
|
-
parse_stream(
|
|
54
|
+
parse_stream(
|
|
55
|
+
parser: parser,
|
|
56
|
+
request_url: request_url,
|
|
57
|
+
request_body: request_body,
|
|
58
|
+
response_env: response_env,
|
|
59
|
+
stream_buffer: stream_buffer
|
|
60
|
+
)
|
|
53
61
|
else
|
|
54
|
-
parse_response(
|
|
62
|
+
parse_response(
|
|
63
|
+
parser: parser,
|
|
64
|
+
request_url: request_url,
|
|
65
|
+
request_body: request_body,
|
|
66
|
+
response_env: response_env
|
|
67
|
+
)
|
|
55
68
|
end
|
|
56
69
|
return unless parsed
|
|
57
70
|
|
|
58
71
|
Tracker.record(
|
|
59
72
|
capture: parsed,
|
|
60
73
|
latency_ms: latency_ms,
|
|
61
|
-
metadata:
|
|
74
|
+
metadata: metadata,
|
|
75
|
+
context_tags: context_tags
|
|
62
76
|
)
|
|
63
77
|
rescue LlmCostTracker::Error
|
|
64
78
|
raise
|
|
@@ -66,7 +80,7 @@ module LlmCostTracker
|
|
|
66
80
|
Logging.warn("Error processing response: #{e.class}: #{e.message}")
|
|
67
81
|
end
|
|
68
82
|
|
|
69
|
-
def parse_response(parser
|
|
83
|
+
def parse_response(parser:, request_url:, request_body:, response_env:)
|
|
70
84
|
response_body = read_body(response_env.body)
|
|
71
85
|
unless response_body
|
|
72
86
|
Logging.warn(
|
|
@@ -77,13 +91,24 @@ module LlmCostTracker
|
|
|
77
91
|
return nil
|
|
78
92
|
end
|
|
79
93
|
|
|
80
|
-
parser.parse(
|
|
94
|
+
parser.parse(
|
|
95
|
+
request_url: request_url,
|
|
96
|
+
request_body: request_body,
|
|
97
|
+
response_status: response_env.status,
|
|
98
|
+
response_body: response_body,
|
|
99
|
+
response_headers: response_env.response_headers
|
|
100
|
+
)
|
|
81
101
|
end
|
|
82
102
|
|
|
83
|
-
def parse_stream(parser
|
|
103
|
+
def parse_stream(parser:, request_url:, request_body:, response_env:, stream_buffer:)
|
|
84
104
|
if stream_buffer&.dig(:overflowed)
|
|
85
105
|
Logging.warn(capture_warning(request_url, stream_buffer))
|
|
86
|
-
return parser.parse_stream(
|
|
106
|
+
return parser.parse_stream(
|
|
107
|
+
request_url: request_url,
|
|
108
|
+
request_body: request_body,
|
|
109
|
+
response_status: response_env.status,
|
|
110
|
+
response_headers: response_env.response_headers
|
|
111
|
+
)
|
|
87
112
|
end
|
|
88
113
|
|
|
89
114
|
body = stream_buffer&.dig(:buffer)&.string
|
|
@@ -91,11 +116,22 @@ module LlmCostTracker
|
|
|
91
116
|
|
|
92
117
|
if body.blank?
|
|
93
118
|
Logging.warn(capture_warning(request_url, stream_buffer))
|
|
94
|
-
return parser.parse_stream(
|
|
119
|
+
return parser.parse_stream(
|
|
120
|
+
request_url: request_url,
|
|
121
|
+
request_body: request_body,
|
|
122
|
+
response_status: response_env.status,
|
|
123
|
+
response_headers: response_env.response_headers
|
|
124
|
+
)
|
|
95
125
|
end
|
|
96
126
|
|
|
97
127
|
events = Parsers::SSE.parse(body)
|
|
98
|
-
parser.parse_stream(
|
|
128
|
+
parser.parse_stream(
|
|
129
|
+
request_url: request_url,
|
|
130
|
+
request_body: request_body,
|
|
131
|
+
response_status: response_env.status,
|
|
132
|
+
events: events,
|
|
133
|
+
response_headers: response_env.response_headers
|
|
134
|
+
)
|
|
99
135
|
end
|
|
100
136
|
|
|
101
137
|
def install_stream_tap(request_env)
|
|
@@ -147,6 +183,13 @@ module LlmCostTracker
|
|
|
147
183
|
tags.to_h
|
|
148
184
|
end
|
|
149
185
|
|
|
186
|
+
def tag_snapshot(request_env)
|
|
187
|
+
[LlmCostTracker::Tags::Context.tags, resolved_tags(request_env)]
|
|
188
|
+
rescue StandardError => e
|
|
189
|
+
Logging.warn("Error resolving request tags: #{e.class}: #{e.message}")
|
|
190
|
+
[{}, {}]
|
|
191
|
+
end
|
|
192
|
+
|
|
150
193
|
def capture_warning(request_url, stream_buffer)
|
|
151
194
|
unless stream_buffer&.dig(:overflowed)
|
|
152
195
|
return "Unable to capture streaming response for #{request_url_label(request_url)}; " \
|
|
@@ -15,7 +15,7 @@ module LlmCostTracker
|
|
|
15
15
|
%w[anthropic]
|
|
16
16
|
end
|
|
17
17
|
|
|
18
|
-
def parse(
|
|
18
|
+
def parse(request_body:, response_status:, response_body:, **)
|
|
19
19
|
return nil unless response_status == 200
|
|
20
20
|
|
|
21
21
|
response = safe_json_parse(response_body)
|
|
@@ -28,14 +28,14 @@ module LlmCostTracker
|
|
|
28
28
|
UsageCapture.build(
|
|
29
29
|
provider: "anthropic",
|
|
30
30
|
provider_response_id: response["id"],
|
|
31
|
-
pricing_mode: pricing_mode(request, response, usage),
|
|
31
|
+
pricing_mode: pricing_mode(request: request, response: response, usage: usage),
|
|
32
32
|
model: response["model"] || request["model"],
|
|
33
|
-
token_usage: token_usage(usage, cache_read),
|
|
33
|
+
token_usage: token_usage(usage: usage, cache_read: cache_read),
|
|
34
34
|
usage_source: :response
|
|
35
35
|
)
|
|
36
36
|
end
|
|
37
37
|
|
|
38
|
-
def parse_stream(
|
|
38
|
+
def parse_stream(response_status:, request_body: nil, events: [], **)
|
|
39
39
|
return nil unless response_status == 200
|
|
40
40
|
|
|
41
41
|
request = safe_json_parse(request_body)
|
|
@@ -44,13 +44,18 @@ module LlmCostTracker
|
|
|
44
44
|
response_id = find_event_value(events) { |data| data.dig("message", "id") || data["id"] }
|
|
45
45
|
|
|
46
46
|
if usage
|
|
47
|
-
build_stream_result(
|
|
47
|
+
build_stream_result(
|
|
48
|
+
model: model,
|
|
49
|
+
usage: usage,
|
|
50
|
+
response_id: response_id,
|
|
51
|
+
pricing_mode: pricing_mode(request: request, response: nil, usage: usage)
|
|
52
|
+
)
|
|
48
53
|
else
|
|
49
54
|
build_unknown_stream_usage(
|
|
50
55
|
provider: "anthropic",
|
|
51
56
|
model: model,
|
|
52
57
|
provider_response_id: response_id,
|
|
53
|
-
pricing_mode: pricing_mode(request, nil, usage)
|
|
58
|
+
pricing_mode: pricing_mode(request: request, response: nil, usage: usage)
|
|
54
59
|
)
|
|
55
60
|
end
|
|
56
61
|
end
|
|
@@ -72,7 +77,7 @@ module LlmCostTracker
|
|
|
72
77
|
end
|
|
73
78
|
end
|
|
74
79
|
|
|
75
|
-
def build_stream_result(model
|
|
80
|
+
def build_stream_result(model:, usage:, response_id:, pricing_mode:)
|
|
76
81
|
cache_read = usage["cache_read_input_tokens"].to_i
|
|
77
82
|
|
|
78
83
|
UsageCapture.build(
|
|
@@ -80,13 +85,13 @@ module LlmCostTracker
|
|
|
80
85
|
provider_response_id: response_id,
|
|
81
86
|
pricing_mode: pricing_mode,
|
|
82
87
|
model: model,
|
|
83
|
-
token_usage: token_usage(usage, cache_read),
|
|
88
|
+
token_usage: token_usage(usage: usage, cache_read: cache_read),
|
|
84
89
|
stream: true,
|
|
85
90
|
usage_source: :stream_final
|
|
86
91
|
)
|
|
87
92
|
end
|
|
88
93
|
|
|
89
|
-
def token_usage(usage
|
|
94
|
+
def token_usage(usage:, cache_read:)
|
|
90
95
|
input = usage["input_tokens"].to_i
|
|
91
96
|
output = usage["output_tokens"].to_i
|
|
92
97
|
cache_creation = usage["cache_creation"]
|
|
@@ -108,10 +113,27 @@ module LlmCostTracker
|
|
|
108
113
|
)
|
|
109
114
|
end
|
|
110
115
|
|
|
111
|
-
def pricing_mode(request
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
116
|
+
def pricing_mode(request:, response:, usage:)
|
|
117
|
+
modes = []
|
|
118
|
+
speed = usage&.fetch("speed", nil) || response&.fetch("speed", nil) || request["speed"]
|
|
119
|
+
service_tier = usage&.fetch("service_tier", nil) ||
|
|
120
|
+
response&.fetch("service_tier", nil) ||
|
|
121
|
+
request["service_tier"]
|
|
122
|
+
|
|
123
|
+
modes << Pricing.normalize_mode(speed)
|
|
124
|
+
modes << Pricing.normalize_mode(service_tier)
|
|
125
|
+
modes << "data_residency" if inference_geo(request: request, response: response, usage: usage) == "us"
|
|
126
|
+
|
|
127
|
+
modes = modes.compact.uniq
|
|
128
|
+
modes.empty? ? nil : modes.join("_")
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def inference_geo(request:, response:, usage:)
|
|
132
|
+
(
|
|
133
|
+
usage&.fetch("inference_geo", nil) ||
|
|
134
|
+
response&.fetch("inference_geo", nil) ||
|
|
135
|
+
request["inference_geo"]
|
|
136
|
+
).to_s
|
|
115
137
|
end
|
|
116
138
|
end
|
|
117
139
|
end
|
|
@@ -7,7 +7,7 @@ require "uri"
|
|
|
7
7
|
module LlmCostTracker
|
|
8
8
|
module Parsers
|
|
9
9
|
class Base
|
|
10
|
-
def parse(
|
|
10
|
+
def parse(**)
|
|
11
11
|
raise NotImplementedError
|
|
12
12
|
end
|
|
13
13
|
|
|
@@ -28,7 +28,7 @@ module LlmCostTracker
|
|
|
28
28
|
request.is_a?(Hash) && request["stream"] == true
|
|
29
29
|
end
|
|
30
30
|
|
|
31
|
-
def parse_stream(
|
|
31
|
+
def parse_stream(**)
|
|
32
32
|
nil
|
|
33
33
|
end
|
|
34
34
|
|
|
@@ -23,58 +23,66 @@ module LlmCostTracker
|
|
|
23
23
|
super
|
|
24
24
|
end
|
|
25
25
|
|
|
26
|
-
def parse(request_url
|
|
26
|
+
def parse(request_url:, request_body:, response_status:, response_body:, response_headers: nil)
|
|
27
27
|
return nil unless response_status == 200
|
|
28
28
|
|
|
29
29
|
response = safe_json_parse(response_body)
|
|
30
30
|
usage = response["usageMetadata"]
|
|
31
31
|
return nil unless usage
|
|
32
32
|
|
|
33
|
+
request = safe_json_parse(request_body)
|
|
33
34
|
build_usage_capture(
|
|
34
|
-
request_url,
|
|
35
|
-
usage,
|
|
35
|
+
request_url: request_url,
|
|
36
|
+
usage: usage,
|
|
36
37
|
usage_source: :response,
|
|
37
|
-
provider_response_id: response["responseId"]
|
|
38
|
+
provider_response_id: response["responseId"],
|
|
39
|
+
pricing_mode: pricing_mode(request: request, response_headers: response_headers)
|
|
38
40
|
)
|
|
39
41
|
end
|
|
40
42
|
|
|
41
|
-
def parse_stream(request_url,
|
|
43
|
+
def parse_stream(response_status:, request_url: nil, request_body: nil, events: [], response_headers: nil)
|
|
42
44
|
return nil unless response_status == 200
|
|
43
45
|
|
|
46
|
+
request = safe_json_parse(request_body)
|
|
44
47
|
usage = merged_stream_usage(events)
|
|
45
48
|
model = extract_model_from_url(request_url)
|
|
46
49
|
response_id = stream_response_id(events)
|
|
50
|
+
mode = pricing_mode(request: request, response_headers: response_headers)
|
|
47
51
|
|
|
48
52
|
if usage
|
|
49
53
|
build_usage_capture(
|
|
50
|
-
request_url,
|
|
51
|
-
usage,
|
|
54
|
+
request_url: request_url,
|
|
55
|
+
usage: usage,
|
|
52
56
|
stream: true,
|
|
53
57
|
usage_source: :stream_final,
|
|
54
|
-
provider_response_id: response_id
|
|
58
|
+
provider_response_id: response_id,
|
|
59
|
+
pricing_mode: mode
|
|
55
60
|
)
|
|
56
61
|
else
|
|
57
62
|
build_unknown_stream_usage(
|
|
58
63
|
provider: "gemini",
|
|
59
64
|
model: model,
|
|
60
|
-
provider_response_id: response_id
|
|
65
|
+
provider_response_id: response_id,
|
|
66
|
+
pricing_mode: mode
|
|
61
67
|
)
|
|
62
68
|
end
|
|
63
69
|
end
|
|
64
70
|
|
|
65
71
|
private
|
|
66
72
|
|
|
67
|
-
def build_usage_capture(request_url
|
|
73
|
+
def build_usage_capture(request_url:, usage:, usage_source:, stream: false, provider_response_id: nil,
|
|
74
|
+
pricing_mode: nil)
|
|
68
75
|
cache_read = usage["cachedContentTokenCount"].to_i
|
|
69
76
|
tool_use_prompt = usage["toolUsePromptTokenCount"].to_i
|
|
70
77
|
|
|
71
78
|
UsageCapture.build(
|
|
72
79
|
provider: "gemini",
|
|
73
80
|
model: extract_model_from_url(request_url),
|
|
81
|
+
pricing_mode: pricing_mode,
|
|
74
82
|
token_usage: TokenUsage.build(
|
|
75
83
|
input_tokens: [usage["promptTokenCount"].to_i - cache_read, 0].max + tool_use_prompt,
|
|
76
84
|
output_tokens: output_tokens(usage),
|
|
77
|
-
total_tokens: total_tokens(usage, cache_read, tool_use_prompt),
|
|
85
|
+
total_tokens: total_tokens(usage: usage, cache_read: cache_read, tool_use_prompt: tool_use_prompt),
|
|
78
86
|
cache_read_input_tokens: usage["cachedContentTokenCount"],
|
|
79
87
|
hidden_output_tokens: usage["thoughtsTokenCount"]
|
|
80
88
|
),
|
|
@@ -92,10 +100,10 @@ module LlmCostTracker
|
|
|
92
100
|
end
|
|
93
101
|
|
|
94
102
|
def output_tokens(usage)
|
|
95
|
-
usage["candidatesTokenCount"].to_i
|
|
103
|
+
usage["candidatesTokenCount"].to_i
|
|
96
104
|
end
|
|
97
105
|
|
|
98
|
-
def total_tokens(usage
|
|
106
|
+
def total_tokens(usage:, cache_read:, tool_use_prompt:)
|
|
99
107
|
total = usage["totalTokenCount"]
|
|
100
108
|
return total.to_i unless total.nil?
|
|
101
109
|
|
|
@@ -113,6 +121,24 @@ module LlmCostTracker
|
|
|
113
121
|
match = uri.path.match(%r{/models/([^/:]+)})
|
|
114
122
|
match && match[1]
|
|
115
123
|
end
|
|
124
|
+
|
|
125
|
+
def pricing_mode(request:, response_headers:)
|
|
126
|
+
response_tier = response_header(response_headers, "x-gemini-service-tier")
|
|
127
|
+
response_mode = Pricing.normalize_mode(response_tier)
|
|
128
|
+
return response_mode if response_mode
|
|
129
|
+
|
|
130
|
+
request_mode = Pricing.normalize_mode(
|
|
131
|
+
request["service_tier"] ||
|
|
132
|
+
request["serviceTier"] ||
|
|
133
|
+
request.dig("config", "service_tier") ||
|
|
134
|
+
request.dig("config", "serviceTier")
|
|
135
|
+
)
|
|
136
|
+
request_mode == "flex" ? request_mode : nil
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def response_header(headers, name)
|
|
140
|
+
headers.to_h.find { |key, _value| key.to_s.downcase == name }&.last
|
|
141
|
+
end
|
|
116
142
|
end
|
|
117
143
|
end
|
|
118
144
|
end
|
|
@@ -8,7 +8,19 @@ module LlmCostTracker
|
|
|
8
8
|
class Openai < Base
|
|
9
9
|
include OpenaiUsage
|
|
10
10
|
|
|
11
|
-
HOSTS = %w[
|
|
11
|
+
HOSTS = %w[
|
|
12
|
+
api.openai.com
|
|
13
|
+
us.api.openai.com
|
|
14
|
+
eu.api.openai.com
|
|
15
|
+
au.api.openai.com
|
|
16
|
+
ca.api.openai.com
|
|
17
|
+
jp.api.openai.com
|
|
18
|
+
in.api.openai.com
|
|
19
|
+
sg.api.openai.com
|
|
20
|
+
kr.api.openai.com
|
|
21
|
+
gb.api.openai.com
|
|
22
|
+
ae.api.openai.com
|
|
23
|
+
].freeze
|
|
12
24
|
TRACKED_PATHS = %w[/v1/chat/completions /v1/completions /v1/embeddings /v1/responses].freeze
|
|
13
25
|
|
|
14
26
|
def match?(url)
|
|
@@ -19,12 +31,22 @@ module LlmCostTracker
|
|
|
19
31
|
%w[openai]
|
|
20
32
|
end
|
|
21
33
|
|
|
22
|
-
def parse(request_url
|
|
23
|
-
parse_openai_usage(
|
|
34
|
+
def parse(request_url:, request_body:, response_status:, response_body:, **)
|
|
35
|
+
parse_openai_usage(
|
|
36
|
+
request_url: request_url,
|
|
37
|
+
request_body: request_body,
|
|
38
|
+
response_status: response_status,
|
|
39
|
+
response_body: response_body
|
|
40
|
+
)
|
|
24
41
|
end
|
|
25
42
|
|
|
26
|
-
def parse_stream(request_url, request_body,
|
|
27
|
-
parse_openai_stream_usage(
|
|
43
|
+
def parse_stream(response_status:, request_url: nil, request_body: nil, events: [], **)
|
|
44
|
+
parse_openai_stream_usage(
|
|
45
|
+
request_url: request_url,
|
|
46
|
+
request_body: request_body,
|
|
47
|
+
response_status: response_status,
|
|
48
|
+
events: events
|
|
49
|
+
)
|
|
28
50
|
end
|
|
29
51
|
|
|
30
52
|
private
|
|
@@ -21,12 +21,22 @@ module LlmCostTracker
|
|
|
21
21
|
].uniq.freeze
|
|
22
22
|
end
|
|
23
23
|
|
|
24
|
-
def parse(request_url
|
|
25
|
-
parse_openai_usage(
|
|
24
|
+
def parse(request_url:, request_body:, response_status:, response_body:, **)
|
|
25
|
+
parse_openai_usage(
|
|
26
|
+
request_url: request_url,
|
|
27
|
+
request_body: request_body,
|
|
28
|
+
response_status: response_status,
|
|
29
|
+
response_body: response_body
|
|
30
|
+
)
|
|
26
31
|
end
|
|
27
32
|
|
|
28
|
-
def parse_stream(request_url, request_body,
|
|
29
|
-
parse_openai_stream_usage(
|
|
33
|
+
def parse_stream(response_status:, request_url: nil, request_body: nil, events: [], **)
|
|
34
|
+
parse_openai_stream_usage(
|
|
35
|
+
request_url: request_url,
|
|
36
|
+
request_body: request_body,
|
|
37
|
+
response_status: response_status,
|
|
38
|
+
events: events
|
|
39
|
+
)
|
|
30
40
|
end
|
|
31
41
|
|
|
32
42
|
private
|
|
@@ -5,7 +5,7 @@ module LlmCostTracker
|
|
|
5
5
|
module OpenaiUsage
|
|
6
6
|
private
|
|
7
7
|
|
|
8
|
-
def parse_openai_usage(request_url
|
|
8
|
+
def parse_openai_usage(request_url:, request_body:, response_status:, response_body:)
|
|
9
9
|
return nil unless response_status == 200
|
|
10
10
|
|
|
11
11
|
response = safe_json_parse(response_body)
|
|
@@ -15,17 +15,23 @@ module LlmCostTracker
|
|
|
15
15
|
request = safe_json_parse(request_body)
|
|
16
16
|
cache_read = cache_read_input_tokens(usage)
|
|
17
17
|
|
|
18
|
+
model = response["model"] || request["model"]
|
|
19
|
+
|
|
18
20
|
UsageCapture.build(
|
|
19
21
|
provider: provider_for(request_url),
|
|
20
22
|
provider_response_id: response["id"],
|
|
21
|
-
pricing_mode:
|
|
22
|
-
|
|
23
|
-
|
|
23
|
+
pricing_mode: pricing_mode(
|
|
24
|
+
request_url: request_url,
|
|
25
|
+
model: model,
|
|
26
|
+
service_tier: response["service_tier"] || request["service_tier"]
|
|
27
|
+
),
|
|
28
|
+
model: model,
|
|
29
|
+
token_usage: token_usage(usage: usage, cache_read: cache_read),
|
|
24
30
|
usage_source: :response
|
|
25
31
|
)
|
|
26
32
|
end
|
|
27
33
|
|
|
28
|
-
def parse_openai_stream_usage(request_url, request_body
|
|
34
|
+
def parse_openai_stream_usage(response_status:, request_url: nil, request_body: nil, events: [])
|
|
29
35
|
return nil unless response_status == 200
|
|
30
36
|
|
|
31
37
|
request = safe_json_parse(request_body)
|
|
@@ -33,7 +39,11 @@ module LlmCostTracker
|
|
|
33
39
|
find_event_value(events) { |data| data["model"] || data.dig("response", "model") } || request["model"]
|
|
34
40
|
usage = detect_stream_usage(events)
|
|
35
41
|
response_id = find_event_value(events) { |data| data["id"] || data.dig("response", "id") }
|
|
36
|
-
pricing_mode =
|
|
42
|
+
pricing_mode = pricing_mode(
|
|
43
|
+
request_url: request_url,
|
|
44
|
+
model: model,
|
|
45
|
+
service_tier: stream_pricing_mode(events) || request["service_tier"]
|
|
46
|
+
)
|
|
37
47
|
|
|
38
48
|
if usage
|
|
39
49
|
cache_read = cache_read_input_tokens(usage)
|
|
@@ -42,7 +52,7 @@ module LlmCostTracker
|
|
|
42
52
|
provider_response_id: response_id,
|
|
43
53
|
pricing_mode: pricing_mode,
|
|
44
54
|
model: model,
|
|
45
|
-
token_usage: token_usage(usage, cache_read),
|
|
55
|
+
token_usage: token_usage(usage: usage, cache_read: cache_read),
|
|
46
56
|
stream: true,
|
|
47
57
|
usage_source: :stream_final
|
|
48
58
|
)
|
|
@@ -69,17 +79,35 @@ module LlmCostTracker
|
|
|
69
79
|
end
|
|
70
80
|
end
|
|
71
81
|
|
|
72
|
-
def
|
|
82
|
+
def pricing_mode(request_url:, model:, service_tier:)
|
|
83
|
+
modes = [Pricing.normalize_mode(service_tier)]
|
|
84
|
+
modes << "data_residency" if openai_regional_processing?(request_url: request_url, model: model)
|
|
85
|
+
modes = modes.compact.uniq
|
|
86
|
+
modes.empty? ? nil : modes.join("_")
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def openai_regional_processing?(request_url:, model:)
|
|
90
|
+
uri = parsed_uri(request_url)
|
|
91
|
+
return false unless %w[us.api.openai.com eu.api.openai.com].include?(uri&.host.to_s.downcase)
|
|
92
|
+
|
|
93
|
+
openai_data_residency_model?(model)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def openai_data_residency_model?(model)
|
|
97
|
+
model.to_s.match?(/\Agpt-5\.(?:4|5)(?:-(?:mini|nano|pro))?(?:-\d{4}-\d{2}-\d{2})?\z/)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def token_usage(usage:, cache_read:)
|
|
73
101
|
TokenUsage.build(
|
|
74
|
-
input_tokens: regular_input_tokens(usage, cache_read),
|
|
102
|
+
input_tokens: regular_input_tokens(usage: usage, cache_read: cache_read),
|
|
75
103
|
output_tokens: (usage["completion_tokens"] || usage["output_tokens"]).to_i,
|
|
76
|
-
total_tokens: total_tokens(usage, cache_read),
|
|
104
|
+
total_tokens: total_tokens(usage: usage, cache_read: cache_read),
|
|
77
105
|
cache_read_input_tokens: cache_read,
|
|
78
106
|
hidden_output_tokens: hidden_output_tokens(usage)
|
|
79
107
|
)
|
|
80
108
|
end
|
|
81
109
|
|
|
82
|
-
def regular_input_tokens(usage
|
|
110
|
+
def regular_input_tokens(usage:, cache_read:)
|
|
83
111
|
[(usage["prompt_tokens"] || usage["input_tokens"]).to_i - cache_read.to_i, 0].max
|
|
84
112
|
end
|
|
85
113
|
|
|
@@ -93,11 +121,11 @@ module LlmCostTracker
|
|
|
93
121
|
details["reasoning_tokens"]
|
|
94
122
|
end
|
|
95
123
|
|
|
96
|
-
def total_tokens(usage
|
|
124
|
+
def total_tokens(usage:, cache_read:)
|
|
97
125
|
total = usage["total_tokens"]
|
|
98
126
|
return total.to_i unless total.nil?
|
|
99
127
|
|
|
100
|
-
regular_input_tokens(usage, cache_read) +
|
|
128
|
+
regular_input_tokens(usage: usage, cache_read: cache_read) +
|
|
101
129
|
cache_read.to_i +
|
|
102
130
|
(usage["completion_tokens"] || usage["output_tokens"]).to_i
|
|
103
131
|
end
|