llm_cost_tracker 0.2.0.alpha2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +48 -1
- data/README.md +114 -70
- data/Rakefile +2 -0
- data/app/assets/llm_cost_tracker/application.css +760 -0
- data/app/controllers/llm_cost_tracker/application_controller.rb +1 -7
- data/app/controllers/llm_cost_tracker/assets_controller.rb +12 -0
- data/app/controllers/llm_cost_tracker/calls_controller.rb +29 -12
- data/app/controllers/llm_cost_tracker/dashboard_controller.rb +5 -1
- data/app/helpers/llm_cost_tracker/application_helper.rb +46 -5
- data/app/helpers/llm_cost_tracker/chart_helper.rb +133 -0
- data/app/helpers/llm_cost_tracker/dashboard_filter_helper.rb +47 -0
- data/app/helpers/llm_cost_tracker/dashboard_filter_options_helper.rb +34 -0
- data/app/helpers/llm_cost_tracker/dashboard_query_helper.rb +58 -0
- data/app/helpers/llm_cost_tracker/pagination_helper.rb +18 -0
- data/app/services/llm_cost_tracker/dashboard/data_quality.rb +16 -1
- data/app/services/llm_cost_tracker/dashboard/filter.rb +22 -3
- data/app/services/llm_cost_tracker/dashboard/overview_stats.rb +16 -1
- data/app/services/llm_cost_tracker/dashboard/spend_anomaly.rb +79 -0
- data/app/services/llm_cost_tracker/dashboard/tag_key_explorer.rb +19 -46
- data/app/services/llm_cost_tracker/dashboard/top_models.rb +17 -8
- data/app/services/llm_cost_tracker/pagination.rb +6 -0
- data/app/views/layouts/llm_cost_tracker/application.html.erb +35 -333
- data/app/views/llm_cost_tracker/calls/index.html.erb +116 -74
- data/app/views/llm_cost_tracker/calls/show.html.erb +58 -1
- data/app/views/llm_cost_tracker/dashboard/index.html.erb +211 -111
- data/app/views/llm_cost_tracker/data_quality/index.html.erb +224 -78
- data/app/views/llm_cost_tracker/errors/database.html.erb +3 -3
- data/app/views/llm_cost_tracker/errors/invalid_filter.html.erb +3 -3
- data/app/views/llm_cost_tracker/errors/not_found.html.erb +3 -3
- data/app/views/llm_cost_tracker/models/index.html.erb +66 -58
- data/app/views/llm_cost_tracker/shared/_active_filters.html.erb +16 -0
- data/app/views/llm_cost_tracker/shared/_metric_stack.html.erb +23 -0
- data/app/views/llm_cost_tracker/shared/_spend_chart.html.erb +18 -0
- data/app/views/llm_cost_tracker/shared/_tag_chips.html.erb +15 -0
- data/app/views/llm_cost_tracker/shared/setup_required.html.erb +3 -2
- data/app/views/llm_cost_tracker/tags/index.html.erb +55 -12
- data/app/views/llm_cost_tracker/tags/show.html.erb +88 -39
- data/config/routes.rb +3 -0
- data/lib/llm_cost_tracker/assets.rb +19 -0
- data/lib/llm_cost_tracker/configuration.rb +78 -42
- data/lib/llm_cost_tracker/engine.rb +2 -0
- data/lib/llm_cost_tracker/event.rb +2 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_streaming_generator.rb +29 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_streaming_to_llm_api_calls.rb.erb +25 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +4 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/llm_cost_tracker_prices.yml.erb +8 -1
- data/lib/llm_cost_tracker/llm_api_call.rb +9 -1
- data/lib/llm_cost_tracker/middleware/faraday.rb +57 -9
- data/lib/llm_cost_tracker/parsed_usage.rb +7 -3
- data/lib/llm_cost_tracker/parsers/anthropic.rb +79 -1
- data/lib/llm_cost_tracker/parsers/base.rb +17 -5
- data/lib/llm_cost_tracker/parsers/gemini.rb +59 -6
- data/lib/llm_cost_tracker/parsers/openai.rb +8 -0
- data/lib/llm_cost_tracker/parsers/openai_compatible.rb +8 -0
- data/lib/llm_cost_tracker/parsers/openai_usage.rb +55 -1
- data/lib/llm_cost_tracker/parsers/registry.rb +15 -3
- data/lib/llm_cost_tracker/parsers/sse.rb +81 -0
- data/lib/llm_cost_tracker/price_registry.rb +18 -7
- data/lib/llm_cost_tracker/price_sync/fetcher.rb +72 -0
- data/lib/llm_cost_tracker/price_sync/merger.rb +72 -0
- data/lib/llm_cost_tracker/price_sync/model_catalog.rb +77 -0
- data/lib/llm_cost_tracker/price_sync/raw_price.rb +35 -0
- data/lib/llm_cost_tracker/price_sync/source.rb +29 -0
- data/lib/llm_cost_tracker/price_sync/source_result.rb +7 -0
- data/lib/llm_cost_tracker/price_sync/sources/litellm.rb +91 -0
- data/lib/llm_cost_tracker/price_sync/sources/open_router.rb +94 -0
- data/lib/llm_cost_tracker/price_sync/validator.rb +66 -0
- data/lib/llm_cost_tracker/price_sync.rb +310 -0
- data/lib/llm_cost_tracker/pricing.rb +19 -6
- data/lib/llm_cost_tracker/retention.rb +34 -0
- data/lib/llm_cost_tracker/storage/active_record_store.rb +3 -1
- data/lib/llm_cost_tracker/stream_collector.rb +158 -0
- data/lib/llm_cost_tracker/tag_query.rb +7 -2
- data/lib/llm_cost_tracker/tags_column.rb +21 -1
- data/lib/llm_cost_tracker/tracker.rb +15 -12
- data/lib/llm_cost_tracker/value_helpers.rb +40 -0
- data/lib/llm_cost_tracker/version.rb +1 -1
- data/lib/llm_cost_tracker.rb +51 -29
- data/lib/tasks/llm_cost_tracker.rake +124 -0
- data/llm_cost_tracker.gemspec +9 -8
- metadata +40 -12
- data/PLAN_0.2.md +0 -488
|
@@ -18,22 +18,39 @@ module LlmCostTracker
|
|
|
18
18
|
|
|
19
19
|
request_url = request_env.url.to_s
|
|
20
20
|
request_body = read_body(request_env.body) || ""
|
|
21
|
+
parser = Parsers::Registry.find_for(request_url)
|
|
22
|
+
streaming = parser&.streaming_request?(request_url, request_body)
|
|
23
|
+
stream_buffer = install_stream_tap(request_env) if streaming
|
|
21
24
|
|
|
22
|
-
|
|
25
|
+
Tracker.enforce_budget! if parser
|
|
23
26
|
started_at = monotonic_time
|
|
24
27
|
|
|
25
28
|
@app.call(request_env).on_complete do |response_env|
|
|
26
|
-
process(
|
|
29
|
+
process(
|
|
30
|
+
parser: parser,
|
|
31
|
+
request_env: request_env,
|
|
32
|
+
request_url: request_url,
|
|
33
|
+
request_body: request_body,
|
|
34
|
+
response_env: response_env,
|
|
35
|
+
latency_ms: elapsed_ms(started_at),
|
|
36
|
+
streaming: streaming,
|
|
37
|
+
stream_buffer: stream_buffer
|
|
38
|
+
)
|
|
27
39
|
end
|
|
28
40
|
end
|
|
29
41
|
|
|
30
42
|
private
|
|
31
43
|
|
|
32
|
-
def process(request_env
|
|
33
|
-
|
|
44
|
+
def process(parser:, request_env:, request_url:, request_body:, response_env:,
|
|
45
|
+
latency_ms:, streaming:, stream_buffer:)
|
|
34
46
|
return unless parser
|
|
35
47
|
|
|
36
|
-
parsed =
|
|
48
|
+
parsed =
|
|
49
|
+
if streaming
|
|
50
|
+
parse_stream(parser, request_url, request_body, response_env, stream_buffer)
|
|
51
|
+
else
|
|
52
|
+
parse_response(parser, request_url, request_body, response_env)
|
|
53
|
+
end
|
|
37
54
|
return unless parsed
|
|
38
55
|
|
|
39
56
|
Tracker.record(
|
|
@@ -42,6 +59,8 @@ module LlmCostTracker
|
|
|
42
59
|
input_tokens: parsed.input_tokens,
|
|
43
60
|
output_tokens: parsed.output_tokens,
|
|
44
61
|
latency_ms: latency_ms,
|
|
62
|
+
stream: parsed.stream,
|
|
63
|
+
usage_source: parsed.usage_source,
|
|
45
64
|
metadata: resolved_tags(request_env).merge(parsed.metadata)
|
|
46
65
|
)
|
|
47
66
|
rescue LlmCostTracker::Error
|
|
@@ -54,7 +73,9 @@ module LlmCostTracker
|
|
|
54
73
|
response_body = read_body(response_env.body)
|
|
55
74
|
unless response_body
|
|
56
75
|
Logging.warn(
|
|
57
|
-
"Unable to read response body for #{request_url};
|
|
76
|
+
"Unable to read response body for #{request_url}; " \
|
|
77
|
+
"streaming responses are captured automatically for OpenAI/Anthropic/Gemini " \
|
|
78
|
+
"or via LlmCostTracker.track_stream for custom clients."
|
|
58
79
|
)
|
|
59
80
|
return nil
|
|
60
81
|
end
|
|
@@ -62,10 +83,37 @@ module LlmCostTracker
|
|
|
62
83
|
parser.parse(request_url, request_body, response_env.status, response_body)
|
|
63
84
|
end
|
|
64
85
|
|
|
65
|
-
def
|
|
66
|
-
|
|
86
|
+
def parse_stream(parser, request_url, request_body, response_env, stream_buffer)
|
|
87
|
+
body = stream_buffer&.string
|
|
88
|
+
body = read_body(response_env.body) if body.nil? || body.empty?
|
|
89
|
+
|
|
90
|
+
if body.nil? || body.empty?
|
|
91
|
+
Logging.warn(
|
|
92
|
+
"Unable to capture streaming response for #{request_url}; " \
|
|
93
|
+
"fall back to LlmCostTracker.track_stream for manual capture."
|
|
94
|
+
)
|
|
95
|
+
return nil
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
events = Parsers::SSE.parse(body)
|
|
99
|
+
parser.parse_stream(request_url, request_body, response_env.status, events)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def install_stream_tap(request_env)
|
|
103
|
+
return nil unless request_env.respond_to?(:request) && request_env.request
|
|
67
104
|
|
|
68
|
-
|
|
105
|
+
original = request_env.request.on_data
|
|
106
|
+
return nil unless original
|
|
107
|
+
|
|
108
|
+
buffer = StringIO.new
|
|
109
|
+
request_env.request.on_data = proc do |chunk, size, env|
|
|
110
|
+
buffer << chunk.to_s
|
|
111
|
+
original.call(chunk, size, env)
|
|
112
|
+
end
|
|
113
|
+
buffer
|
|
114
|
+
rescue StandardError => e
|
|
115
|
+
Logging.warn("Unable to install streaming tap: #{e.class}: #{e.message}")
|
|
116
|
+
nil
|
|
69
117
|
end
|
|
70
118
|
|
|
71
119
|
def read_body(body)
|
|
@@ -10,11 +10,13 @@ module LlmCostTracker
|
|
|
10
10
|
:cached_input_tokens,
|
|
11
11
|
:cache_read_input_tokens,
|
|
12
12
|
:cache_creation_input_tokens,
|
|
13
|
-
:reasoning_tokens
|
|
13
|
+
:reasoning_tokens,
|
|
14
|
+
:stream,
|
|
15
|
+
:usage_source
|
|
14
16
|
)
|
|
15
17
|
|
|
16
18
|
class ParsedUsage
|
|
17
|
-
TRACKING_KEYS = %i[provider model input_tokens output_tokens total_tokens].freeze
|
|
19
|
+
TRACKING_KEYS = %i[provider model input_tokens output_tokens total_tokens stream usage_source].freeze
|
|
18
20
|
|
|
19
21
|
def self.build(**attributes)
|
|
20
22
|
new(
|
|
@@ -26,7 +28,9 @@ module LlmCostTracker
|
|
|
26
28
|
cached_input_tokens: attributes[:cached_input_tokens],
|
|
27
29
|
cache_read_input_tokens: attributes[:cache_read_input_tokens],
|
|
28
30
|
cache_creation_input_tokens: attributes[:cache_creation_input_tokens],
|
|
29
|
-
reasoning_tokens: attributes[:reasoning_tokens]
|
|
31
|
+
reasoning_tokens: attributes[:reasoning_tokens],
|
|
32
|
+
stream: attributes[:stream] || false,
|
|
33
|
+
usage_source: attributes[:usage_source]
|
|
30
34
|
)
|
|
31
35
|
end
|
|
32
36
|
|
|
@@ -16,6 +16,10 @@ module LlmCostTracker
|
|
|
16
16
|
false
|
|
17
17
|
end
|
|
18
18
|
|
|
19
|
+
def provider_names
|
|
20
|
+
%w[anthropic]
|
|
21
|
+
end
|
|
22
|
+
|
|
19
23
|
def parse(_request_url, request_body, response_status, response_body)
|
|
20
24
|
return nil unless response_status == 200
|
|
21
25
|
|
|
@@ -33,9 +37,83 @@ module LlmCostTracker
|
|
|
33
37
|
total_tokens: usage["input_tokens"].to_i + usage["output_tokens"].to_i +
|
|
34
38
|
usage["cache_read_input_tokens"].to_i + usage["cache_creation_input_tokens"].to_i,
|
|
35
39
|
cache_read_input_tokens: usage["cache_read_input_tokens"],
|
|
36
|
-
cache_creation_input_tokens: usage["cache_creation_input_tokens"]
|
|
40
|
+
cache_creation_input_tokens: usage["cache_creation_input_tokens"],
|
|
41
|
+
usage_source: :response
|
|
37
42
|
)
|
|
38
43
|
end
|
|
44
|
+
|
|
45
|
+
def parse_stream(_request_url, request_body, response_status, events)
|
|
46
|
+
return nil unless response_status == 200
|
|
47
|
+
|
|
48
|
+
request = safe_json_parse(request_body)
|
|
49
|
+
model = stream_model(events) || request["model"]
|
|
50
|
+
usage = stream_usage(events)
|
|
51
|
+
|
|
52
|
+
if usage
|
|
53
|
+
input = usage["input_tokens"].to_i
|
|
54
|
+
output = usage["output_tokens"].to_i
|
|
55
|
+
cache_read = usage["cache_read_input_tokens"].to_i
|
|
56
|
+
cache_creation = usage["cache_creation_input_tokens"].to_i
|
|
57
|
+
|
|
58
|
+
ParsedUsage.build(
|
|
59
|
+
provider: "anthropic",
|
|
60
|
+
model: model,
|
|
61
|
+
input_tokens: input,
|
|
62
|
+
output_tokens: output,
|
|
63
|
+
total_tokens: input + output + cache_read + cache_creation,
|
|
64
|
+
cache_read_input_tokens: usage["cache_read_input_tokens"],
|
|
65
|
+
cache_creation_input_tokens: usage["cache_creation_input_tokens"],
|
|
66
|
+
stream: true,
|
|
67
|
+
usage_source: :stream_final
|
|
68
|
+
)
|
|
69
|
+
else
|
|
70
|
+
ParsedUsage.build(
|
|
71
|
+
provider: "anthropic",
|
|
72
|
+
model: model,
|
|
73
|
+
input_tokens: 0,
|
|
74
|
+
output_tokens: 0,
|
|
75
|
+
total_tokens: 0,
|
|
76
|
+
stream: true,
|
|
77
|
+
usage_source: :unknown
|
|
78
|
+
)
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
private
|
|
83
|
+
|
|
84
|
+
def stream_usage(events)
|
|
85
|
+
start_usage = nil
|
|
86
|
+
latest_delta = nil
|
|
87
|
+
|
|
88
|
+
events.each do |event|
|
|
89
|
+
data = event[:data]
|
|
90
|
+
next unless data.is_a?(Hash)
|
|
91
|
+
|
|
92
|
+
case data["type"]
|
|
93
|
+
when "message_start"
|
|
94
|
+
start_usage = data.dig("message", "usage")
|
|
95
|
+
when "message_delta"
|
|
96
|
+
latest_delta = data["usage"] if data["usage"].is_a?(Hash)
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
return nil unless start_usage || latest_delta
|
|
101
|
+
|
|
102
|
+
(start_usage || {}).merge(latest_delta || {}) do |_key, start_val, delta_val|
|
|
103
|
+
delta_val.nil? ? start_val : delta_val
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def stream_model(events)
|
|
108
|
+
events.each do |event|
|
|
109
|
+
data = event[:data]
|
|
110
|
+
next unless data.is_a?(Hash)
|
|
111
|
+
|
|
112
|
+
model = data.dig("message", "model")
|
|
113
|
+
return model if model && !model.empty?
|
|
114
|
+
end
|
|
115
|
+
nil
|
|
116
|
+
end
|
|
39
117
|
end
|
|
40
118
|
end
|
|
41
119
|
end
|
|
@@ -5,19 +5,31 @@ require "json"
|
|
|
5
5
|
module LlmCostTracker
|
|
6
6
|
module Parsers
|
|
7
7
|
class Base
|
|
8
|
-
# Parse a provider response into a {LlmCostTracker::ParsedUsage}, or return
|
|
9
|
-
# nil when the response is not trackable (non-200, missing usage, etc).
|
|
10
|
-
#
|
|
11
|
-
# @return [LlmCostTracker::ParsedUsage, nil]
|
|
12
8
|
def parse(request_url, request_body, response_status, response_body)
|
|
13
9
|
raise NotImplementedError
|
|
14
10
|
end
|
|
15
11
|
|
|
16
|
-
|
|
12
|
+
def provider_names
|
|
13
|
+
[]
|
|
14
|
+
end
|
|
15
|
+
|
|
17
16
|
def match?(url)
|
|
18
17
|
raise NotImplementedError
|
|
19
18
|
end
|
|
20
19
|
|
|
20
|
+
def streaming_request?(_request_url, request_body)
|
|
21
|
+
return false if request_body.nil?
|
|
22
|
+
|
|
23
|
+
body = request_body.to_s
|
|
24
|
+
return false if body.empty?
|
|
25
|
+
|
|
26
|
+
body.include?('"stream":true') || body.include?('"stream": true') || body.include?("stream: true")
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def parse_stream(_request_url, _request_body, _response_status, _events)
|
|
30
|
+
nil
|
|
31
|
+
end
|
|
32
|
+
|
|
21
33
|
private
|
|
22
34
|
|
|
23
35
|
def safe_json_parse(body)
|
|
@@ -9,6 +9,7 @@ module LlmCostTracker
|
|
|
9
9
|
class Gemini < Base
|
|
10
10
|
HOSTS = %w[generativelanguage.googleapis.com].freeze
|
|
11
11
|
TRACKED_PATH_PATTERN = %r{/models/[^/:]+:(?:generateContent|streamGenerateContent)\z}
|
|
12
|
+
STREAM_PATH_PATTERN = /:streamGenerateContent\z/
|
|
12
13
|
|
|
13
14
|
def match?(url)
|
|
14
15
|
uri = URI.parse(url.to_s)
|
|
@@ -17,6 +18,16 @@ module LlmCostTracker
|
|
|
17
18
|
false
|
|
18
19
|
end
|
|
19
20
|
|
|
21
|
+
def provider_names
|
|
22
|
+
%w[gemini]
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def streaming_request?(request_url, request_body)
|
|
26
|
+
return true if streaming_url?(request_url)
|
|
27
|
+
|
|
28
|
+
super
|
|
29
|
+
end
|
|
30
|
+
|
|
20
31
|
def parse(request_url, _request_body, response_status, response_body)
|
|
21
32
|
return nil unless response_status == 200
|
|
22
33
|
|
|
@@ -24,31 +35,73 @@ module LlmCostTracker
|
|
|
24
35
|
usage = response["usageMetadata"]
|
|
25
36
|
return nil unless usage
|
|
26
37
|
|
|
27
|
-
|
|
38
|
+
build_parsed_usage(request_url, usage, usage_source: :response)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def parse_stream(request_url, _request_body, response_status, events)
|
|
42
|
+
return nil unless response_status == 200
|
|
43
|
+
|
|
44
|
+
usage = merged_stream_usage(events)
|
|
28
45
|
model = extract_model_from_url(request_url)
|
|
29
46
|
|
|
47
|
+
if usage
|
|
48
|
+
build_parsed_usage(request_url, usage, stream: true, usage_source: :stream_final)
|
|
49
|
+
else
|
|
50
|
+
ParsedUsage.build(
|
|
51
|
+
provider: "gemini",
|
|
52
|
+
model: model,
|
|
53
|
+
input_tokens: 0,
|
|
54
|
+
output_tokens: 0,
|
|
55
|
+
total_tokens: 0,
|
|
56
|
+
stream: true,
|
|
57
|
+
usage_source: :unknown
|
|
58
|
+
)
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
def build_parsed_usage(request_url, usage, usage_source:, stream: false)
|
|
30
65
|
ParsedUsage.build(
|
|
31
66
|
provider: "gemini",
|
|
32
|
-
model:
|
|
67
|
+
model: extract_model_from_url(request_url),
|
|
33
68
|
input_tokens: usage["promptTokenCount"].to_i,
|
|
34
69
|
output_tokens: output_tokens(usage),
|
|
35
70
|
total_tokens: usage["totalTokenCount"].to_i,
|
|
36
|
-
cached_input_tokens: usage["cachedContentTokenCount"]
|
|
71
|
+
cached_input_tokens: usage["cachedContentTokenCount"],
|
|
72
|
+
stream: stream,
|
|
73
|
+
usage_source: usage_source
|
|
37
74
|
)
|
|
38
75
|
end
|
|
39
76
|
|
|
40
|
-
|
|
77
|
+
def merged_stream_usage(events)
|
|
78
|
+
latest = nil
|
|
79
|
+
events.each do |event|
|
|
80
|
+
data = event[:data]
|
|
81
|
+
next unless data.is_a?(Hash)
|
|
82
|
+
|
|
83
|
+
meta = data["usageMetadata"]
|
|
84
|
+
latest = meta if meta.is_a?(Hash)
|
|
85
|
+
end
|
|
86
|
+
latest
|
|
87
|
+
end
|
|
41
88
|
|
|
42
89
|
def output_tokens(usage)
|
|
43
90
|
usage["candidatesTokenCount"].to_i + usage["thoughtsTokenCount"].to_i
|
|
44
91
|
end
|
|
45
92
|
|
|
93
|
+
def streaming_url?(request_url)
|
|
94
|
+
URI.parse(request_url.to_s).path.match?(STREAM_PATH_PATTERN)
|
|
95
|
+
rescue URI::InvalidURIError
|
|
96
|
+
false
|
|
97
|
+
end
|
|
98
|
+
|
|
46
99
|
def extract_model_from_url(url)
|
|
47
100
|
uri = URI.parse(url.to_s)
|
|
48
101
|
match = uri.path.match(%r{/models/([^/:]+)})
|
|
49
|
-
match
|
|
102
|
+
match && match[1]
|
|
50
103
|
rescue URI::InvalidURIError
|
|
51
|
-
|
|
104
|
+
nil
|
|
52
105
|
end
|
|
53
106
|
end
|
|
54
107
|
end
|
|
@@ -20,10 +20,18 @@ module LlmCostTracker
|
|
|
20
20
|
false
|
|
21
21
|
end
|
|
22
22
|
|
|
23
|
+
def provider_names
|
|
24
|
+
%w[openai]
|
|
25
|
+
end
|
|
26
|
+
|
|
23
27
|
def parse(request_url, request_body, response_status, response_body)
|
|
24
28
|
parse_openai_usage(request_url, request_body, response_status, response_body)
|
|
25
29
|
end
|
|
26
30
|
|
|
31
|
+
def parse_stream(request_url, request_body, response_status, events)
|
|
32
|
+
parse_openai_stream_usage(request_url, request_body, response_status, events)
|
|
33
|
+
end
|
|
34
|
+
|
|
27
35
|
private
|
|
28
36
|
|
|
29
37
|
def provider_for(_request_url)
|
|
@@ -19,10 +19,18 @@ module LlmCostTracker
|
|
|
19
19
|
false
|
|
20
20
|
end
|
|
21
21
|
|
|
22
|
+
def provider_names
|
|
23
|
+
["openai_compatible", *configured_providers.each_value.map(&:to_s)].uniq.freeze
|
|
24
|
+
end
|
|
25
|
+
|
|
22
26
|
def parse(request_url, request_body, response_status, response_body)
|
|
23
27
|
parse_openai_usage(request_url, request_body, response_status, response_body)
|
|
24
28
|
end
|
|
25
29
|
|
|
30
|
+
def parse_stream(request_url, request_body, response_status, events)
|
|
31
|
+
parse_openai_stream_usage(request_url, request_body, response_status, events)
|
|
32
|
+
end
|
|
33
|
+
|
|
26
34
|
private
|
|
27
35
|
|
|
28
36
|
def provider_for(request_url)
|
|
@@ -20,10 +20,64 @@ module LlmCostTracker
|
|
|
20
20
|
input_tokens: (usage["prompt_tokens"] || usage["input_tokens"]).to_i,
|
|
21
21
|
output_tokens: (usage["completion_tokens"] || usage["output_tokens"]).to_i,
|
|
22
22
|
total_tokens: usage["total_tokens"].to_i,
|
|
23
|
-
cached_input_tokens: cached_input_tokens(usage)
|
|
23
|
+
cached_input_tokens: cached_input_tokens(usage),
|
|
24
|
+
usage_source: :response
|
|
24
25
|
)
|
|
25
26
|
end
|
|
26
27
|
|
|
28
|
+
def parse_openai_stream_usage(request_url, request_body, response_status, events)
|
|
29
|
+
return nil unless response_status == 200
|
|
30
|
+
|
|
31
|
+
request = safe_json_parse(request_body)
|
|
32
|
+
model = detect_stream_model(events) || request["model"]
|
|
33
|
+
usage = detect_stream_usage(events)
|
|
34
|
+
|
|
35
|
+
if usage
|
|
36
|
+
ParsedUsage.build(
|
|
37
|
+
provider: provider_for(request_url),
|
|
38
|
+
model: model,
|
|
39
|
+
input_tokens: (usage["prompt_tokens"] || usage["input_tokens"]).to_i,
|
|
40
|
+
output_tokens: (usage["completion_tokens"] || usage["output_tokens"]).to_i,
|
|
41
|
+
total_tokens: usage["total_tokens"].to_i,
|
|
42
|
+
cached_input_tokens: cached_input_tokens(usage),
|
|
43
|
+
stream: true,
|
|
44
|
+
usage_source: :stream_final
|
|
45
|
+
)
|
|
46
|
+
else
|
|
47
|
+
ParsedUsage.build(
|
|
48
|
+
provider: provider_for(request_url),
|
|
49
|
+
model: model,
|
|
50
|
+
input_tokens: 0,
|
|
51
|
+
output_tokens: 0,
|
|
52
|
+
total_tokens: 0,
|
|
53
|
+
stream: true,
|
|
54
|
+
usage_source: :unknown
|
|
55
|
+
)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def detect_stream_usage(events)
|
|
60
|
+
events.reverse_each do |event|
|
|
61
|
+
data = event[:data]
|
|
62
|
+
next unless data.is_a?(Hash)
|
|
63
|
+
|
|
64
|
+
usage = data["usage"]
|
|
65
|
+
return usage if usage.is_a?(Hash) && !usage.empty?
|
|
66
|
+
end
|
|
67
|
+
nil
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def detect_stream_model(events)
|
|
71
|
+
events.each do |event|
|
|
72
|
+
data = event[:data]
|
|
73
|
+
next unless data.is_a?(Hash)
|
|
74
|
+
|
|
75
|
+
model = data["model"]
|
|
76
|
+
return model if model && !model.to_s.empty?
|
|
77
|
+
end
|
|
78
|
+
nil
|
|
79
|
+
end
|
|
80
|
+
|
|
27
81
|
def cached_input_tokens(usage)
|
|
28
82
|
details = usage["prompt_tokens_details"] || usage["input_tokens_details"] || {}
|
|
29
83
|
details["cached_tokens"]
|
|
@@ -1,23 +1,35 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "monitor"
|
|
4
|
+
|
|
3
5
|
module LlmCostTracker
|
|
4
6
|
module Parsers
|
|
5
7
|
class Registry
|
|
8
|
+
MUTEX = Monitor.new
|
|
9
|
+
|
|
6
10
|
class << self
|
|
7
11
|
def parsers
|
|
8
|
-
@parsers ||= default_parsers
|
|
12
|
+
@parsers || MUTEX.synchronize { @parsers ||= default_parsers.freeze }
|
|
9
13
|
end
|
|
10
14
|
|
|
11
15
|
def register(parser)
|
|
12
|
-
|
|
16
|
+
MUTEX.synchronize do
|
|
17
|
+
current = @parsers || default_parsers.freeze
|
|
18
|
+
@parsers = ([parser] + current).freeze
|
|
19
|
+
end
|
|
13
20
|
end
|
|
14
21
|
|
|
15
22
|
def find_for(url)
|
|
16
23
|
parsers.find { |parser| parser.match?(url) }
|
|
17
24
|
end
|
|
18
25
|
|
|
26
|
+
def find_for_provider(provider)
|
|
27
|
+
provider_name = provider.to_s
|
|
28
|
+
parsers.find { |parser| parser.provider_names.include?(provider_name) }
|
|
29
|
+
end
|
|
30
|
+
|
|
19
31
|
def reset!
|
|
20
|
-
@parsers = nil
|
|
32
|
+
MUTEX.synchronize { @parsers = nil }
|
|
21
33
|
end
|
|
22
34
|
|
|
23
35
|
private
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module LlmCostTracker
|
|
6
|
+
module Parsers
|
|
7
|
+
module SSE
|
|
8
|
+
DONE_MARKER = "[DONE]"
|
|
9
|
+
|
|
10
|
+
class << self
|
|
11
|
+
def parse(body)
|
|
12
|
+
return [] if body.nil? || body.empty?
|
|
13
|
+
|
|
14
|
+
return parse_json_array(body) if probably_json_array?(body)
|
|
15
|
+
|
|
16
|
+
parse_event_stream(body)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
private
|
|
20
|
+
|
|
21
|
+
def parse_event_stream(body)
|
|
22
|
+
events = []
|
|
23
|
+
current_event = nil
|
|
24
|
+
data_lines = []
|
|
25
|
+
|
|
26
|
+
body.each_line do |raw|
|
|
27
|
+
line = raw.chomp
|
|
28
|
+
|
|
29
|
+
if line.empty?
|
|
30
|
+
events << finalize_event(current_event, data_lines) if data_lines.any?
|
|
31
|
+
current_event = nil
|
|
32
|
+
data_lines = []
|
|
33
|
+
next
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
next if line.start_with?(":")
|
|
37
|
+
|
|
38
|
+
field, _, value = line.partition(":")
|
|
39
|
+
value = value[1..] if value.start_with?(" ")
|
|
40
|
+
|
|
41
|
+
case field
|
|
42
|
+
when "event" then current_event = value
|
|
43
|
+
when "data" then data_lines << value
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
events << finalize_event(current_event, data_lines) if data_lines.any?
|
|
48
|
+
events.compact
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def parse_json_array(body)
|
|
52
|
+
parsed = JSON.parse(body)
|
|
53
|
+
return [] unless parsed.is_a?(Array)
|
|
54
|
+
|
|
55
|
+
parsed.map { |entry| { event: nil, data: entry } }
|
|
56
|
+
rescue JSON::ParserError
|
|
57
|
+
[]
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def finalize_event(event_name, data_lines)
|
|
61
|
+
payload = data_lines.join("\n")
|
|
62
|
+
return nil if payload == DONE_MARKER
|
|
63
|
+
|
|
64
|
+
{ event: event_name, data: decode_data(payload) }
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def decode_data(payload)
|
|
68
|
+
return payload if payload.empty?
|
|
69
|
+
|
|
70
|
+
JSON.parse(payload)
|
|
71
|
+
rescue JSON::ParserError
|
|
72
|
+
payload
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def probably_json_array?(body)
|
|
76
|
+
body.lstrip.start_with?("[")
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "json"
|
|
4
|
+
require "monitor"
|
|
4
5
|
require "yaml"
|
|
5
6
|
|
|
6
7
|
require_relative "logging"
|
|
@@ -10,15 +11,18 @@ module LlmCostTracker
|
|
|
10
11
|
DEFAULT_PRICES_PATH = File.expand_path("prices.json", __dir__)
|
|
11
12
|
EMPTY_PRICES = {}.freeze
|
|
12
13
|
PRICE_KEYS = %w[input cached_input output cache_read_input cache_creation_input].freeze
|
|
13
|
-
METADATA_KEYS = %w[_source _updated _notes].freeze
|
|
14
|
+
METADATA_KEYS = %w[_source _source_version _fetched_at _updated _notes _validator_override].freeze
|
|
15
|
+
MUTEX = Monitor.new
|
|
14
16
|
|
|
15
17
|
class << self
|
|
16
18
|
def builtin_prices
|
|
17
|
-
@builtin_prices ||=
|
|
19
|
+
@builtin_prices ||= MUTEX.synchronize do
|
|
20
|
+
@builtin_prices || normalize_price_table(raw_registry.fetch("models", {})).freeze
|
|
21
|
+
end
|
|
18
22
|
end
|
|
19
23
|
|
|
20
24
|
def metadata
|
|
21
|
-
@metadata ||= raw_registry.fetch("metadata", {}).freeze
|
|
25
|
+
@metadata ||= MUTEX.synchronize { @metadata || raw_registry.fetch("metadata", {}).freeze }
|
|
22
26
|
end
|
|
23
27
|
|
|
24
28
|
def normalize_price_table(table)
|
|
@@ -35,9 +39,14 @@ module LlmCostTracker
|
|
|
35
39
|
cached = @file_prices_cache
|
|
36
40
|
return cached[:value] if cached && cached[:key] == cache_key
|
|
37
41
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
42
|
+
MUTEX.synchronize do
|
|
43
|
+
cached = @file_prices_cache
|
|
44
|
+
return cached[:value] if cached && cached[:key] == cache_key
|
|
45
|
+
|
|
46
|
+
value = normalize_file_prices(price_file_models(load_price_file(path)), path: path).freeze
|
|
47
|
+
@file_prices_cache = { key: cache_key, value: value }.freeze
|
|
48
|
+
value
|
|
49
|
+
end
|
|
41
50
|
rescue Errno::ENOENT, JSON::ParserError, Psych::Exception, ArgumentError, TypeError, NoMethodError => e
|
|
42
51
|
raise Error, "Unable to load prices_file #{path.inspect}: #{e.message}"
|
|
43
52
|
end
|
|
@@ -45,7 +54,9 @@ module LlmCostTracker
|
|
|
45
54
|
private
|
|
46
55
|
|
|
47
56
|
def raw_registry
|
|
48
|
-
@raw_registry ||=
|
|
57
|
+
@raw_registry ||= MUTEX.synchronize do
|
|
58
|
+
@raw_registry || JSON.parse(File.read(DEFAULT_PRICES_PATH)).freeze
|
|
59
|
+
end
|
|
49
60
|
end
|
|
50
61
|
|
|
51
62
|
def normalize_price_entry(price)
|