llm_cost_tracker 0.7.1 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/README.md +10 -7
- data/lib/llm_cost_tracker/capture/stream_collector.rb +11 -4
- data/lib/llm_cost_tracker/capture/stream_tracker.rb +1 -1
- data/lib/llm_cost_tracker/configuration.rb +5 -1
- data/lib/llm_cost_tracker/integrations/anthropic.rb +25 -8
- data/lib/llm_cost_tracker/integrations/openai.rb +4 -4
- data/lib/llm_cost_tracker/middleware/faraday.rb +56 -13
- data/lib/llm_cost_tracker/parsers/anthropic.rb +35 -13
- data/lib/llm_cost_tracker/parsers/base.rb +2 -2
- data/lib/llm_cost_tracker/parsers/gemini.rb +38 -12
- data/lib/llm_cost_tracker/parsers/openai.rb +27 -5
- data/lib/llm_cost_tracker/parsers/openai_compatible.rb +14 -4
- data/lib/llm_cost_tracker/parsers/openai_usage.rb +41 -13
- data/lib/llm_cost_tracker/prices.json +316 -32
- data/lib/llm_cost_tracker/pricing/effective_prices.rb +23 -17
- data/lib/llm_cost_tracker/pricing/explainer.rb +17 -11
- data/lib/llm_cost_tracker/pricing/lookup.rb +44 -22
- data/lib/llm_cost_tracker/pricing/sync.rb +19 -3
- data/lib/llm_cost_tracker/tracker.rb +6 -4
- data/lib/llm_cost_tracker/version.rb +1 -1
- metadata +2 -2
|
@@ -8,7 +8,19 @@ module LlmCostTracker
|
|
|
8
8
|
class Openai < Base
|
|
9
9
|
include OpenaiUsage
|
|
10
10
|
|
|
11
|
-
HOSTS = %w[
|
|
11
|
+
HOSTS = %w[
|
|
12
|
+
api.openai.com
|
|
13
|
+
us.api.openai.com
|
|
14
|
+
eu.api.openai.com
|
|
15
|
+
au.api.openai.com
|
|
16
|
+
ca.api.openai.com
|
|
17
|
+
jp.api.openai.com
|
|
18
|
+
in.api.openai.com
|
|
19
|
+
sg.api.openai.com
|
|
20
|
+
kr.api.openai.com
|
|
21
|
+
gb.api.openai.com
|
|
22
|
+
ae.api.openai.com
|
|
23
|
+
].freeze
|
|
12
24
|
TRACKED_PATHS = %w[/v1/chat/completions /v1/completions /v1/embeddings /v1/responses].freeze
|
|
13
25
|
|
|
14
26
|
def match?(url)
|
|
@@ -19,12 +31,22 @@ module LlmCostTracker
|
|
|
19
31
|
%w[openai]
|
|
20
32
|
end
|
|
21
33
|
|
|
22
|
-
def parse(request_url
|
|
23
|
-
parse_openai_usage(
|
|
34
|
+
def parse(request_url:, request_body:, response_status:, response_body:, **)
|
|
35
|
+
parse_openai_usage(
|
|
36
|
+
request_url: request_url,
|
|
37
|
+
request_body: request_body,
|
|
38
|
+
response_status: response_status,
|
|
39
|
+
response_body: response_body
|
|
40
|
+
)
|
|
24
41
|
end
|
|
25
42
|
|
|
26
|
-
def parse_stream(request_url, request_body,
|
|
27
|
-
parse_openai_stream_usage(
|
|
43
|
+
def parse_stream(response_status:, request_url: nil, request_body: nil, events: [], **)
|
|
44
|
+
parse_openai_stream_usage(
|
|
45
|
+
request_url: request_url,
|
|
46
|
+
request_body: request_body,
|
|
47
|
+
response_status: response_status,
|
|
48
|
+
events: events
|
|
49
|
+
)
|
|
28
50
|
end
|
|
29
51
|
|
|
30
52
|
private
|
|
@@ -21,12 +21,22 @@ module LlmCostTracker
|
|
|
21
21
|
].uniq.freeze
|
|
22
22
|
end
|
|
23
23
|
|
|
24
|
-
def parse(request_url
|
|
25
|
-
parse_openai_usage(
|
|
24
|
+
def parse(request_url:, request_body:, response_status:, response_body:, **)
|
|
25
|
+
parse_openai_usage(
|
|
26
|
+
request_url: request_url,
|
|
27
|
+
request_body: request_body,
|
|
28
|
+
response_status: response_status,
|
|
29
|
+
response_body: response_body
|
|
30
|
+
)
|
|
26
31
|
end
|
|
27
32
|
|
|
28
|
-
def parse_stream(request_url, request_body,
|
|
29
|
-
parse_openai_stream_usage(
|
|
33
|
+
def parse_stream(response_status:, request_url: nil, request_body: nil, events: [], **)
|
|
34
|
+
parse_openai_stream_usage(
|
|
35
|
+
request_url: request_url,
|
|
36
|
+
request_body: request_body,
|
|
37
|
+
response_status: response_status,
|
|
38
|
+
events: events
|
|
39
|
+
)
|
|
30
40
|
end
|
|
31
41
|
|
|
32
42
|
private
|
|
@@ -5,7 +5,7 @@ module LlmCostTracker
|
|
|
5
5
|
module OpenaiUsage
|
|
6
6
|
private
|
|
7
7
|
|
|
8
|
-
def parse_openai_usage(request_url
|
|
8
|
+
def parse_openai_usage(request_url:, request_body:, response_status:, response_body:)
|
|
9
9
|
return nil unless response_status == 200
|
|
10
10
|
|
|
11
11
|
response = safe_json_parse(response_body)
|
|
@@ -15,17 +15,23 @@ module LlmCostTracker
|
|
|
15
15
|
request = safe_json_parse(request_body)
|
|
16
16
|
cache_read = cache_read_input_tokens(usage)
|
|
17
17
|
|
|
18
|
+
model = response["model"] || request["model"]
|
|
19
|
+
|
|
18
20
|
UsageCapture.build(
|
|
19
21
|
provider: provider_for(request_url),
|
|
20
22
|
provider_response_id: response["id"],
|
|
21
|
-
pricing_mode:
|
|
22
|
-
|
|
23
|
-
|
|
23
|
+
pricing_mode: pricing_mode(
|
|
24
|
+
request_url: request_url,
|
|
25
|
+
model: model,
|
|
26
|
+
service_tier: response["service_tier"] || request["service_tier"]
|
|
27
|
+
),
|
|
28
|
+
model: model,
|
|
29
|
+
token_usage: token_usage(usage: usage, cache_read: cache_read),
|
|
24
30
|
usage_source: :response
|
|
25
31
|
)
|
|
26
32
|
end
|
|
27
33
|
|
|
28
|
-
def parse_openai_stream_usage(request_url, request_body
|
|
34
|
+
def parse_openai_stream_usage(response_status:, request_url: nil, request_body: nil, events: [])
|
|
29
35
|
return nil unless response_status == 200
|
|
30
36
|
|
|
31
37
|
request = safe_json_parse(request_body)
|
|
@@ -33,7 +39,11 @@ module LlmCostTracker
|
|
|
33
39
|
find_event_value(events) { |data| data["model"] || data.dig("response", "model") } || request["model"]
|
|
34
40
|
usage = detect_stream_usage(events)
|
|
35
41
|
response_id = find_event_value(events) { |data| data["id"] || data.dig("response", "id") }
|
|
36
|
-
pricing_mode =
|
|
42
|
+
pricing_mode = pricing_mode(
|
|
43
|
+
request_url: request_url,
|
|
44
|
+
model: model,
|
|
45
|
+
service_tier: stream_pricing_mode(events) || request["service_tier"]
|
|
46
|
+
)
|
|
37
47
|
|
|
38
48
|
if usage
|
|
39
49
|
cache_read = cache_read_input_tokens(usage)
|
|
@@ -42,7 +52,7 @@ module LlmCostTracker
|
|
|
42
52
|
provider_response_id: response_id,
|
|
43
53
|
pricing_mode: pricing_mode,
|
|
44
54
|
model: model,
|
|
45
|
-
token_usage: token_usage(usage, cache_read),
|
|
55
|
+
token_usage: token_usage(usage: usage, cache_read: cache_read),
|
|
46
56
|
stream: true,
|
|
47
57
|
usage_source: :stream_final
|
|
48
58
|
)
|
|
@@ -69,17 +79,35 @@ module LlmCostTracker
|
|
|
69
79
|
end
|
|
70
80
|
end
|
|
71
81
|
|
|
72
|
-
def
|
|
82
|
+
def pricing_mode(request_url:, model:, service_tier:)
|
|
83
|
+
modes = [Pricing.normalize_mode(service_tier)]
|
|
84
|
+
modes << "data_residency" if openai_regional_processing?(request_url: request_url, model: model)
|
|
85
|
+
modes = modes.compact.uniq
|
|
86
|
+
modes.empty? ? nil : modes.join("_")
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def openai_regional_processing?(request_url:, model:)
|
|
90
|
+
uri = parsed_uri(request_url)
|
|
91
|
+
return false unless %w[us.api.openai.com eu.api.openai.com].include?(uri&.host.to_s.downcase)
|
|
92
|
+
|
|
93
|
+
openai_data_residency_model?(model)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def openai_data_residency_model?(model)
|
|
97
|
+
model.to_s.match?(/\Agpt-5\.(?:4|5)(?:-(?:mini|nano|pro))?(?:-\d{4}-\d{2}-\d{2})?\z/)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def token_usage(usage:, cache_read:)
|
|
73
101
|
TokenUsage.build(
|
|
74
|
-
input_tokens: regular_input_tokens(usage, cache_read),
|
|
102
|
+
input_tokens: regular_input_tokens(usage: usage, cache_read: cache_read),
|
|
75
103
|
output_tokens: (usage["completion_tokens"] || usage["output_tokens"]).to_i,
|
|
76
|
-
total_tokens: total_tokens(usage, cache_read),
|
|
104
|
+
total_tokens: total_tokens(usage: usage, cache_read: cache_read),
|
|
77
105
|
cache_read_input_tokens: cache_read,
|
|
78
106
|
hidden_output_tokens: hidden_output_tokens(usage)
|
|
79
107
|
)
|
|
80
108
|
end
|
|
81
109
|
|
|
82
|
-
def regular_input_tokens(usage
|
|
110
|
+
def regular_input_tokens(usage:, cache_read:)
|
|
83
111
|
[(usage["prompt_tokens"] || usage["input_tokens"]).to_i - cache_read.to_i, 0].max
|
|
84
112
|
end
|
|
85
113
|
|
|
@@ -93,11 +121,11 @@ module LlmCostTracker
|
|
|
93
121
|
details["reasoning_tokens"]
|
|
94
122
|
end
|
|
95
123
|
|
|
96
|
-
def total_tokens(usage
|
|
124
|
+
def total_tokens(usage:, cache_read:)
|
|
97
125
|
total = usage["total_tokens"]
|
|
98
126
|
return total.to_i unless total.nil?
|
|
99
127
|
|
|
100
|
-
regular_input_tokens(usage, cache_read) +
|
|
128
|
+
regular_input_tokens(usage: usage, cache_read: cache_read) +
|
|
101
129
|
cache_read.to_i +
|
|
102
130
|
(usage["completion_tokens"] || usage["output_tokens"]).to_i
|
|
103
131
|
end
|