llm_cost_tracker 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +38 -0
- data/README.md +116 -467
- data/app/controllers/llm_cost_tracker/calls_controller.rb +2 -1
- data/app/controllers/llm_cost_tracker/dashboard_controller.rb +3 -15
- data/app/controllers/llm_cost_tracker/tags_controller.rb +7 -6
- data/app/helpers/llm_cost_tracker/application_helper.rb +21 -6
- data/app/helpers/llm_cost_tracker/dashboard_filter_options_helper.rb +3 -1
- data/app/services/llm_cost_tracker/dashboard/date_range.rb +42 -0
- data/app/services/llm_cost_tracker/dashboard/filter.rb +6 -8
- data/app/services/llm_cost_tracker/dashboard/spend_anomaly.rb +6 -5
- data/app/services/llm_cost_tracker/dashboard/tag_breakdown.rb +74 -18
- data/app/services/llm_cost_tracker/dashboard/tag_key_explorer.rb +15 -4
- data/app/views/llm_cost_tracker/shared/_tag_chips.html.erb +1 -1
- data/app/views/llm_cost_tracker/tags/show.html.erb +4 -0
- data/lib/llm_cost_tracker/configuration.rb +22 -16
- data/lib/llm_cost_tracker/doctor.rb +1 -1
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +1 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +8 -2
- data/lib/llm_cost_tracker/integrations/anthropic.rb +12 -3
- data/lib/llm_cost_tracker/integrations/base.rb +77 -6
- data/lib/llm_cost_tracker/integrations/object_reader.rb +1 -1
- data/lib/llm_cost_tracker/integrations/openai.rb +14 -5
- data/lib/llm_cost_tracker/integrations/registry.rb +3 -1
- data/lib/llm_cost_tracker/integrations/ruby_llm.rb +171 -0
- data/lib/llm_cost_tracker/llm_api_call.rb +10 -9
- data/lib/llm_cost_tracker/middleware/faraday.rb +10 -6
- data/lib/llm_cost_tracker/parsers/gemini.rb +8 -1
- data/lib/llm_cost_tracker/parsers/openai_usage.rb +11 -2
- data/lib/llm_cost_tracker/price_freshness.rb +3 -3
- data/lib/llm_cost_tracker/price_registry.rb +3 -0
- data/lib/llm_cost_tracker/price_sync/fetcher.rb +43 -12
- data/lib/llm_cost_tracker/price_sync/registry_diff.rb +51 -0
- data/lib/llm_cost_tracker/price_sync/registry_loader.rb +6 -0
- data/lib/llm_cost_tracker/price_sync/registry_writer.rb +5 -1
- data/lib/llm_cost_tracker/price_sync.rb +103 -111
- data/lib/llm_cost_tracker/prices.json +225 -229
- data/lib/llm_cost_tracker/pricing.rb +27 -15
- data/lib/llm_cost_tracker/report.rb +8 -1
- data/lib/llm_cost_tracker/report_data.rb +25 -9
- data/lib/llm_cost_tracker/retention.rb +30 -7
- data/lib/llm_cost_tracker/storage/dispatcher.rb +68 -0
- data/lib/llm_cost_tracker/stream_capture.rb +7 -0
- data/lib/llm_cost_tracker/stream_collector.rb +25 -1
- data/lib/llm_cost_tracker/tag_sanitizer.rb +81 -0
- data/lib/llm_cost_tracker/tracker.rb +7 -59
- data/lib/llm_cost_tracker/version.rb +1 -1
- data/lib/llm_cost_tracker.rb +1 -0
- data/lib/tasks/llm_cost_tracker.rake +24 -78
- metadata +26 -15
- data/lib/llm_cost_tracker/price_sync/merger.rb +0 -72
- data/lib/llm_cost_tracker/price_sync/model_catalog.rb +0 -77
- data/lib/llm_cost_tracker/price_sync/raw_price.rb +0 -33
- data/lib/llm_cost_tracker/price_sync/refresh_plan_builder.rb +0 -164
- data/lib/llm_cost_tracker/price_sync/source.rb +0 -29
- data/lib/llm_cost_tracker/price_sync/source_result.rb +0 -7
- data/lib/llm_cost_tracker/price_sync/sources/litellm.rb +0 -90
- data/lib/llm_cost_tracker/price_sync/sources/open_router.rb +0 -93
- data/lib/llm_cost_tracker/price_sync/validator.rb +0 -66
|
@@ -6,6 +6,7 @@ require_relative "object_reader"
|
|
|
6
6
|
module LlmCostTracker
|
|
7
7
|
module Integrations
|
|
8
8
|
module Base
|
|
9
|
+
PatchTarget = Data.define(:constant_name, :patch, :method_names, :optional)
|
|
9
10
|
Result = Data.define(:name, :status, :message)
|
|
10
11
|
|
|
11
12
|
def active?
|
|
@@ -13,15 +14,23 @@ module LlmCostTracker
|
|
|
13
14
|
end
|
|
14
15
|
|
|
15
16
|
def install
|
|
16
|
-
|
|
17
|
+
validate_contract!
|
|
18
|
+
patch_targets.each do |target|
|
|
19
|
+
target_class = constant(target.constant_name)
|
|
20
|
+
install_patch(target_class, target.patch) if target_class
|
|
21
|
+
end
|
|
17
22
|
end
|
|
18
23
|
|
|
19
24
|
def status
|
|
20
25
|
name = integration_name
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
26
|
+
problems = contract_problems
|
|
27
|
+
if problems.any?
|
|
28
|
+
return Result.new(name, :warn, "#{name} integration cannot be installed: #{problems.join('; ')}")
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
required_targets = patch_targets.reject(&:optional)
|
|
32
|
+
installed = required_targets.count { |target| patch_installed?(constant(target.constant_name), target.patch) }
|
|
33
|
+
return Result.new(name, :ok, "#{name} integration installed") if installed == required_targets.count
|
|
25
34
|
|
|
26
35
|
Result.new(name, :warn, "#{name} integration is enabled but not installed")
|
|
27
36
|
end
|
|
@@ -55,10 +64,72 @@ module LlmCostTracker
|
|
|
55
64
|
end
|
|
56
65
|
end
|
|
57
66
|
|
|
67
|
+
def minimum_version = nil
|
|
68
|
+
|
|
69
|
+
def version_constant = nil
|
|
70
|
+
|
|
71
|
+
def patch_targets = []
|
|
72
|
+
|
|
73
|
+
def patch_target(constant_name, with:, methods:, optional: false)
|
|
74
|
+
PatchTarget.new(constant_name, with, Array(methods), optional)
|
|
75
|
+
end
|
|
76
|
+
|
|
58
77
|
private
|
|
59
78
|
|
|
79
|
+
def validate_contract!
|
|
80
|
+
problems = contract_problems
|
|
81
|
+
return if problems.empty?
|
|
82
|
+
|
|
83
|
+
raise Error, "#{integration_name} integration cannot be installed: #{problems.join('; ')}"
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def contract_problems
|
|
87
|
+
version_problems + target_problems
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def version_problems
|
|
91
|
+
return [] unless minimum_version
|
|
92
|
+
|
|
93
|
+
name = integration_name.to_s
|
|
94
|
+
version = installed_version
|
|
95
|
+
return ["#{name} >= #{minimum_version} is required, but #{name} is not loaded"] unless version
|
|
96
|
+
return [] if version >= Gem::Version.new(minimum_version)
|
|
97
|
+
|
|
98
|
+
["#{name} >= #{minimum_version} is required, detected #{version}"]
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def installed_version
|
|
102
|
+
Gem.loaded_specs[integration_name.to_s]&.version || constant_version
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def constant_version
|
|
106
|
+
return nil unless version_constant
|
|
107
|
+
|
|
108
|
+
value = constant(version_constant)
|
|
109
|
+
value ? Gem::Version.new(value.to_s) : nil
|
|
110
|
+
rescue ArgumentError
|
|
111
|
+
nil
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def target_problems
|
|
115
|
+
patch_targets.flat_map do |target|
|
|
116
|
+
target_class = constant(target.constant_name)
|
|
117
|
+
next [] if target_class.nil? && target.optional
|
|
118
|
+
next ["#{target.constant_name} is not loaded"] unless target_class
|
|
119
|
+
|
|
120
|
+
missing_methods(target_class, target)
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def missing_methods(target_class, target)
|
|
125
|
+
target.method_names.filter_map do |method_name|
|
|
126
|
+
next if target_class.method_defined?(method_name) || target_class.private_method_defined?(method_name)
|
|
127
|
+
|
|
128
|
+
"#{target.constant_name}##{method_name} is not available"
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
60
132
|
def install_patch(target, patch)
|
|
61
|
-
return unless target
|
|
62
133
|
return if patch_installed?(target, patch)
|
|
63
134
|
|
|
64
135
|
target.prepend(patch)
|
|
@@ -10,10 +10,14 @@ module LlmCostTracker
|
|
|
10
10
|
class << self
|
|
11
11
|
def integration_name = :openai
|
|
12
12
|
|
|
13
|
-
def
|
|
13
|
+
def minimum_version = "0.59.0"
|
|
14
|
+
|
|
15
|
+
def version_constant = "OpenAI::VERSION"
|
|
16
|
+
|
|
17
|
+
def patch_targets
|
|
14
18
|
[
|
|
15
|
-
|
|
16
|
-
|
|
19
|
+
patch_target("OpenAI::Resources::Responses", with: ResponsesPatch, methods: :create),
|
|
20
|
+
patch_target("OpenAI::Resources::Chat::Completions", with: ChatCompletionsPatch, methods: :create)
|
|
17
21
|
]
|
|
18
22
|
end
|
|
19
23
|
|
|
@@ -28,15 +32,16 @@ module LlmCostTracker
|
|
|
28
32
|
output_tokens = ObjectReader.first(usage, :output_tokens, :completion_tokens)
|
|
29
33
|
next if input_tokens.nil? && output_tokens.nil?
|
|
30
34
|
|
|
35
|
+
metadata = usage_metadata(usage)
|
|
31
36
|
LlmCostTracker::Tracker.record(
|
|
32
37
|
provider: "openai",
|
|
33
38
|
model: ObjectReader.first(response, :model) || request[:model],
|
|
34
|
-
input_tokens:
|
|
39
|
+
input_tokens: regular_input_tokens(input_tokens, metadata[:cache_read_input_tokens]),
|
|
35
40
|
output_tokens: ObjectReader.integer(output_tokens),
|
|
36
41
|
latency_ms: latency_ms,
|
|
37
42
|
usage_source: :sdk_response,
|
|
38
43
|
provider_response_id: ObjectReader.first(response, :id),
|
|
39
|
-
metadata:
|
|
44
|
+
metadata: metadata
|
|
40
45
|
)
|
|
41
46
|
end
|
|
42
47
|
end
|
|
@@ -61,6 +66,10 @@ module LlmCostTracker
|
|
|
61
66
|
ObjectReader.nested(usage, :completion_tokens_details, :reasoning_tokens)
|
|
62
67
|
)
|
|
63
68
|
end
|
|
69
|
+
|
|
70
|
+
def regular_input_tokens(input_tokens, cache_read)
|
|
71
|
+
[ObjectReader.integer(input_tokens) - cache_read.to_i, 0].max
|
|
72
|
+
end
|
|
64
73
|
end
|
|
65
74
|
|
|
66
75
|
module ResponsesPatch
|
|
@@ -2,13 +2,15 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "openai"
|
|
4
4
|
require_relative "anthropic"
|
|
5
|
+
require_relative "ruby_llm"
|
|
5
6
|
|
|
6
7
|
module LlmCostTracker
|
|
7
8
|
module Integrations
|
|
8
9
|
module Registry
|
|
9
10
|
INTEGRATIONS = {
|
|
10
11
|
openai: Openai,
|
|
11
|
-
anthropic: Anthropic
|
|
12
|
+
anthropic: Anthropic,
|
|
13
|
+
ruby_llm: RubyLlm
|
|
12
14
|
}.freeze
|
|
13
15
|
|
|
14
16
|
module_function
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "base"
|
|
4
|
+
|
|
5
|
+
module LlmCostTracker
|
|
6
|
+
module Integrations
|
|
7
|
+
module RubyLlm
|
|
8
|
+
extend Base
|
|
9
|
+
|
|
10
|
+
class << self
|
|
11
|
+
def integration_name = :ruby_llm
|
|
12
|
+
|
|
13
|
+
def minimum_version = "1.14.1"
|
|
14
|
+
|
|
15
|
+
def version_constant = "RubyLLM::VERSION"
|
|
16
|
+
|
|
17
|
+
def patch_targets
|
|
18
|
+
[
|
|
19
|
+
patch_target(
|
|
20
|
+
"RubyLLM::Provider",
|
|
21
|
+
with: ProviderPatch,
|
|
22
|
+
methods: %i[slug complete embed transcribe]
|
|
23
|
+
)
|
|
24
|
+
]
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def record_completion(provider, response, request:, latency_ms:, stream:)
|
|
28
|
+
record_usage(
|
|
29
|
+
provider: provider_slug(provider),
|
|
30
|
+
model: response_model_id(response) || model_id(request[:model]),
|
|
31
|
+
response: response,
|
|
32
|
+
latency_ms: latency_ms,
|
|
33
|
+
stream: stream
|
|
34
|
+
)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def streaming_request?(request, has_block:)
|
|
38
|
+
has_block || request[:stream] == true
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def record_embedding(provider, response, request:, latency_ms:)
|
|
42
|
+
record_usage(
|
|
43
|
+
provider: provider_slug(provider),
|
|
44
|
+
model: response_model_id(response) || model_id(request[:model]),
|
|
45
|
+
response: response,
|
|
46
|
+
latency_ms: latency_ms,
|
|
47
|
+
stream: false,
|
|
48
|
+
output_tokens: 0
|
|
49
|
+
)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def record_transcription(provider, response, request:, latency_ms:)
|
|
53
|
+
record_usage(
|
|
54
|
+
provider: provider_slug(provider),
|
|
55
|
+
model: response_model_id(response) || model_id(request[:model]),
|
|
56
|
+
response: response,
|
|
57
|
+
latency_ms: latency_ms,
|
|
58
|
+
stream: false
|
|
59
|
+
)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def record_usage(provider:, model:, response:, latency_ms:, stream:, output_tokens: nil)
|
|
63
|
+
return unless active?
|
|
64
|
+
|
|
65
|
+
record_safely do
|
|
66
|
+
input_tokens = ObjectReader.first(response, :input_tokens)
|
|
67
|
+
output_tokens = ObjectReader.first(response, :output_tokens) if output_tokens.nil?
|
|
68
|
+
next if input_tokens.nil? && output_tokens.nil?
|
|
69
|
+
|
|
70
|
+
cache_read = ObjectReader.integer(ObjectReader.first(response, :cached_tokens))
|
|
71
|
+
|
|
72
|
+
LlmCostTracker::Tracker.record(
|
|
73
|
+
provider: provider,
|
|
74
|
+
model: model,
|
|
75
|
+
input_tokens: regular_input_tokens(input_tokens, cache_read),
|
|
76
|
+
output_tokens: ObjectReader.integer(output_tokens),
|
|
77
|
+
latency_ms: latency_ms,
|
|
78
|
+
stream: stream,
|
|
79
|
+
usage_source: :ruby_llm,
|
|
80
|
+
provider_response_id: provider_response_id(response),
|
|
81
|
+
metadata: usage_metadata(response, cache_read)
|
|
82
|
+
)
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def usage_metadata(response, cache_read)
|
|
87
|
+
{
|
|
88
|
+
cache_read_input_tokens: cache_read,
|
|
89
|
+
cache_write_input_tokens: ObjectReader.integer(ObjectReader.first(response, :cache_creation_tokens)),
|
|
90
|
+
hidden_output_tokens: ObjectReader.integer(
|
|
91
|
+
ObjectReader.first(response, :thinking_tokens, :reasoning_tokens)
|
|
92
|
+
)
|
|
93
|
+
}
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def regular_input_tokens(input_tokens, cache_read)
|
|
97
|
+
[ObjectReader.integer(input_tokens) - cache_read.to_i, 0].max
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def provider_slug(provider)
|
|
101
|
+
ObjectReader.first(provider, :slug).to_s
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def model_id(object)
|
|
105
|
+
return nil if object.nil?
|
|
106
|
+
|
|
107
|
+
value = ObjectReader.first(object, :id, :model_id, :model)
|
|
108
|
+
value ||= object if object.is_a?(String) || object.is_a?(Symbol)
|
|
109
|
+
value&.to_s
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def response_model_id(object)
|
|
113
|
+
value = ObjectReader.first(object, :model_id, :model)
|
|
114
|
+
value&.to_s
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def provider_response_id(response)
|
|
118
|
+
ObjectReader.first(response, :id, :provider_response_id) || ObjectReader.nested(response, :raw, :id)
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
module ProviderPatch
|
|
123
|
+
def complete(*args, **kwargs, &)
|
|
124
|
+
integration = LlmCostTracker::Integrations::RubyLlm
|
|
125
|
+
request = integration.request_params(args, kwargs)
|
|
126
|
+
started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
127
|
+
integration.enforce_budget!
|
|
128
|
+
response = super
|
|
129
|
+
integration.record_completion(
|
|
130
|
+
self,
|
|
131
|
+
response,
|
|
132
|
+
request: request,
|
|
133
|
+
latency_ms: integration.elapsed_ms(started_at),
|
|
134
|
+
stream: integration.streaming_request?(request, has_block: block_given?)
|
|
135
|
+
)
|
|
136
|
+
response
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def embed(*args, **kwargs)
|
|
140
|
+
integration = LlmCostTracker::Integrations::RubyLlm
|
|
141
|
+
request = integration.request_params(args, kwargs)
|
|
142
|
+
started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
143
|
+
integration.enforce_budget!
|
|
144
|
+
response = super
|
|
145
|
+
integration.record_embedding(
|
|
146
|
+
self,
|
|
147
|
+
response,
|
|
148
|
+
request: request,
|
|
149
|
+
latency_ms: integration.elapsed_ms(started_at)
|
|
150
|
+
)
|
|
151
|
+
response
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def transcribe(*args, **kwargs)
|
|
155
|
+
integration = LlmCostTracker::Integrations::RubyLlm
|
|
156
|
+
request = integration.request_params(args, kwargs)
|
|
157
|
+
started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
158
|
+
integration.enforce_budget!
|
|
159
|
+
response = super
|
|
160
|
+
integration.record_transcription(
|
|
161
|
+
self,
|
|
162
|
+
response,
|
|
163
|
+
request: request,
|
|
164
|
+
latency_ms: integration.elapsed_ms(started_at)
|
|
165
|
+
)
|
|
166
|
+
response
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
end
|
|
@@ -73,12 +73,15 @@ module LlmCostTracker
|
|
|
73
73
|
end
|
|
74
74
|
|
|
75
75
|
def self.group_by_tag(key)
|
|
76
|
-
group(Arel.sql(
|
|
76
|
+
group(Arel.sql(tag_value_expression(key)))
|
|
77
77
|
end
|
|
78
78
|
|
|
79
|
-
def self.cost_by_tag(key)
|
|
80
|
-
|
|
81
|
-
|
|
79
|
+
def self.cost_by_tag(key, limit: nil)
|
|
80
|
+
relation = group_by_tag(key).order(Arel.sql("COALESCE(SUM(total_cost), 0) DESC"))
|
|
81
|
+
relation = relation.limit(limit) if limit
|
|
82
|
+
|
|
83
|
+
costs = relation.sum(:total_cost).each_with_object(Hash.new(0.0)) do |(tag_value, cost), grouped|
|
|
84
|
+
grouped[tag_value_label(tag_value)] += cost.to_f
|
|
82
85
|
end
|
|
83
86
|
costs.sort_by { |_label, cost| -cost }.to_h
|
|
84
87
|
end
|
|
@@ -101,14 +104,13 @@ module LlmCostTracker
|
|
|
101
104
|
group(:provider).average(:latency_ms).transform_values(&:to_f)
|
|
102
105
|
end
|
|
103
106
|
|
|
104
|
-
def self.
|
|
107
|
+
def self.tag_value_label(value)
|
|
105
108
|
value.nil? || value == "" ? "(untagged)" : value.to_s
|
|
106
109
|
end
|
|
107
|
-
private_class_method :tag_label
|
|
108
110
|
|
|
109
|
-
def self.
|
|
111
|
+
def self.tag_value_expression(key, table_name: quoted_table_name)
|
|
110
112
|
key = validated_tag_key(key)
|
|
111
|
-
column = "#{
|
|
113
|
+
column = "#{table_name}.#{connection.quote_column_name('tags')}"
|
|
112
114
|
|
|
113
115
|
case connection.adapter_name
|
|
114
116
|
when /postgres/i
|
|
@@ -120,7 +122,6 @@ module LlmCostTracker
|
|
|
120
122
|
"json_extract(#{column}, #{connection.quote(json_path(key))})"
|
|
121
123
|
end
|
|
122
124
|
end
|
|
123
|
-
private_class_method :tag_group_expression
|
|
124
125
|
|
|
125
126
|
def self.validated_tag_key(key)
|
|
126
127
|
TagKey.validate!(key)
|
|
@@ -5,12 +5,11 @@ require "json"
|
|
|
5
5
|
|
|
6
6
|
require_relative "../logging"
|
|
7
7
|
require_relative "../request_url"
|
|
8
|
+
require_relative "../stream_capture"
|
|
8
9
|
|
|
9
10
|
module LlmCostTracker
|
|
10
11
|
module Middleware
|
|
11
12
|
class Faraday < ::Faraday::Middleware
|
|
12
|
-
STREAM_CAPTURE_LIMIT_BYTES = 1_048_576
|
|
13
|
-
|
|
14
13
|
def initialize(app, **options)
|
|
15
14
|
super(app)
|
|
16
15
|
@tags = options.fetch(:tags, {})
|
|
@@ -78,8 +77,8 @@ module LlmCostTracker
|
|
|
78
77
|
unless response_body
|
|
79
78
|
Logging.warn(
|
|
80
79
|
"Unable to read response body for #{RequestUrl.label(request_url)}; " \
|
|
81
|
-
"streaming responses are captured automatically
|
|
82
|
-
"
|
|
80
|
+
"known streaming responses are captured automatically, or via LlmCostTracker.track_stream " \
|
|
81
|
+
"for custom clients."
|
|
83
82
|
)
|
|
84
83
|
return nil
|
|
85
84
|
end
|
|
@@ -88,6 +87,11 @@ module LlmCostTracker
|
|
|
88
87
|
end
|
|
89
88
|
|
|
90
89
|
def parse_stream(parser, request_url, request_body, response_env, stream_buffer)
|
|
90
|
+
if stream_buffer&.dig(:overflowed)
|
|
91
|
+
Logging.warn(capture_warning(request_url, stream_buffer))
|
|
92
|
+
return parser.parse_stream(request_url, request_body, response_env.status, [])
|
|
93
|
+
end
|
|
94
|
+
|
|
91
95
|
body = stream_buffer&.dig(:buffer)&.string
|
|
92
96
|
body = read_body(response_env.body) if body.nil? || body.empty?
|
|
93
97
|
|
|
@@ -110,7 +114,7 @@ module LlmCostTracker
|
|
|
110
114
|
request_env.request.on_data = proc do |chunk, size, env|
|
|
111
115
|
chunk = chunk.to_s
|
|
112
116
|
unless state[:overflowed]
|
|
113
|
-
if state[:bytes] + chunk.bytesize <=
|
|
117
|
+
if state[:bytes] + chunk.bytesize <= StreamCapture::LIMIT_BYTES
|
|
114
118
|
state[:buffer] << chunk
|
|
115
119
|
state[:bytes] += chunk.bytesize
|
|
116
120
|
else
|
|
@@ -161,7 +165,7 @@ module LlmCostTracker
|
|
|
161
165
|
"recording usage_source=unknown. Use LlmCostTracker.track_stream for manual capture."
|
|
162
166
|
end
|
|
163
167
|
|
|
164
|
-
"Streaming response for #{RequestUrl.label(request_url)} exceeded #{
|
|
168
|
+
"Streaming response for #{RequestUrl.label(request_url)} exceeded #{StreamCapture::LIMIT_BYTES} bytes; " \
|
|
165
169
|
"recording usage_source=unknown. Use LlmCostTracker.track_stream for manual capture."
|
|
166
170
|
end
|
|
167
171
|
end
|
|
@@ -72,7 +72,7 @@ module LlmCostTracker
|
|
|
72
72
|
model: extract_model_from_url(request_url),
|
|
73
73
|
input_tokens: [usage["promptTokenCount"].to_i - cache_read, 0].max,
|
|
74
74
|
output_tokens: output_tokens(usage),
|
|
75
|
-
total_tokens: usage
|
|
75
|
+
total_tokens: total_tokens(usage, cache_read),
|
|
76
76
|
cache_read_input_tokens: usage["cachedContentTokenCount"],
|
|
77
77
|
hidden_output_tokens: usage["thoughtsTokenCount"],
|
|
78
78
|
stream: stream,
|
|
@@ -92,6 +92,13 @@ module LlmCostTracker
|
|
|
92
92
|
usage["candidatesTokenCount"].to_i + usage["thoughtsTokenCount"].to_i
|
|
93
93
|
end
|
|
94
94
|
|
|
95
|
+
def total_tokens(usage, cache_read)
|
|
96
|
+
total = usage["totalTokenCount"]
|
|
97
|
+
return total.to_i unless total.nil?
|
|
98
|
+
|
|
99
|
+
[usage["promptTokenCount"].to_i - cache_read, 0].max + cache_read + output_tokens(usage)
|
|
100
|
+
end
|
|
101
|
+
|
|
95
102
|
def stream_response_id(events)
|
|
96
103
|
find_event_value(events) { |data| data["responseId"] }
|
|
97
104
|
end
|
|
@@ -21,7 +21,7 @@ module LlmCostTracker
|
|
|
21
21
|
model: response["model"] || request["model"],
|
|
22
22
|
input_tokens: regular_input_tokens(usage, cache_read),
|
|
23
23
|
output_tokens: (usage["completion_tokens"] || usage["output_tokens"]).to_i,
|
|
24
|
-
total_tokens: usage
|
|
24
|
+
total_tokens: total_tokens(usage, cache_read),
|
|
25
25
|
cache_read_input_tokens: cache_read,
|
|
26
26
|
hidden_output_tokens: hidden_output_tokens(usage),
|
|
27
27
|
usage_source: :response
|
|
@@ -44,7 +44,7 @@ module LlmCostTracker
|
|
|
44
44
|
model: model,
|
|
45
45
|
input_tokens: regular_input_tokens(usage, cache_read),
|
|
46
46
|
output_tokens: (usage["completion_tokens"] || usage["output_tokens"]).to_i,
|
|
47
|
-
total_tokens: usage
|
|
47
|
+
total_tokens: total_tokens(usage, cache_read),
|
|
48
48
|
cache_read_input_tokens: cache_read,
|
|
49
49
|
hidden_output_tokens: hidden_output_tokens(usage),
|
|
50
50
|
stream: true,
|
|
@@ -87,6 +87,15 @@ module LlmCostTracker
|
|
|
87
87
|
details = usage["completion_tokens_details"] || usage["output_tokens_details"] || {}
|
|
88
88
|
details["reasoning_tokens"]
|
|
89
89
|
end
|
|
90
|
+
|
|
91
|
+
def total_tokens(usage, cache_read)
|
|
92
|
+
total = usage["total_tokens"]
|
|
93
|
+
return total.to_i unless total.nil?
|
|
94
|
+
|
|
95
|
+
regular_input_tokens(usage, cache_read) +
|
|
96
|
+
cache_read.to_i +
|
|
97
|
+
(usage["completion_tokens"] || usage["output_tokens"]).to_i
|
|
98
|
+
end
|
|
90
99
|
end
|
|
91
100
|
end
|
|
92
101
|
end
|
|
@@ -17,20 +17,20 @@ module LlmCostTracker
|
|
|
17
17
|
|
|
18
18
|
[:ok, "updated_at=#{updated_at}"]
|
|
19
19
|
rescue Date::Error
|
|
20
|
-
[:warn, "metadata.updated_at=#{updated_at.inspect} is invalid; run bin/rails llm_cost_tracker:prices:
|
|
20
|
+
[:warn, "metadata.updated_at=#{updated_at.inspect} is invalid; run bin/rails llm_cost_tracker:prices:refresh"]
|
|
21
21
|
end
|
|
22
22
|
|
|
23
23
|
private
|
|
24
24
|
|
|
25
25
|
def missing
|
|
26
|
-
[:warn, "metadata.updated_at missing; run bin/rails llm_cost_tracker:prices:
|
|
26
|
+
[:warn, "metadata.updated_at missing; run bin/rails llm_cost_tracker:prices:refresh"]
|
|
27
27
|
end
|
|
28
28
|
|
|
29
29
|
def stale(updated_at)
|
|
30
30
|
[
|
|
31
31
|
:warn,
|
|
32
32
|
"updated_at=#{updated_at} is older than #{STALE_AFTER_DAYS} days; " \
|
|
33
|
-
"run bin/rails llm_cost_tracker:prices:
|
|
33
|
+
"run bin/rails llm_cost_tracker:prices:refresh"
|
|
34
34
|
]
|
|
35
35
|
end
|
|
36
36
|
end
|
|
@@ -12,6 +12,7 @@ module LlmCostTracker
|
|
|
12
12
|
EMPTY_PRICES = {}.freeze
|
|
13
13
|
PRICE_KEYS = %w[input output cache_read_input cache_write_input].freeze
|
|
14
14
|
METADATA_KEYS = %w[_source _source_version _fetched_at _updated _notes _validator_override].freeze
|
|
15
|
+
MAX_FILE_BYTES = 2_097_152
|
|
15
16
|
MUTEX = Monitor.new
|
|
16
17
|
|
|
17
18
|
class << self
|
|
@@ -114,6 +115,8 @@ module LlmCostTracker
|
|
|
114
115
|
end
|
|
115
116
|
|
|
116
117
|
def load_price_file(path)
|
|
118
|
+
raise ArgumentError, "prices_file exceeds #{MAX_FILE_BYTES} bytes" if File.size(path) > MAX_FILE_BYTES
|
|
119
|
+
|
|
117
120
|
contents = File.read(path)
|
|
118
121
|
return YAML.safe_load(contents, aliases: false) || {} if yaml_file?(path)
|
|
119
122
|
|
|
@@ -15,8 +15,9 @@ module LlmCostTracker
|
|
|
15
15
|
end
|
|
16
16
|
end
|
|
17
17
|
|
|
18
|
-
USER_AGENT = "llm_cost_tracker price
|
|
18
|
+
USER_AGENT = "llm_cost_tracker price refresh"
|
|
19
19
|
MAX_REDIRECTS = 5
|
|
20
|
+
MAX_BODY_BYTES = 2_097_152
|
|
20
21
|
OPEN_TIMEOUT = 5
|
|
21
22
|
READ_TIMEOUT = 10
|
|
22
23
|
WRITE_TIMEOUT = 10
|
|
@@ -25,24 +26,17 @@ module LlmCostTracker
|
|
|
25
26
|
raise Error, "Too many redirects while fetching #{url}" if redirects > MAX_REDIRECTS
|
|
26
27
|
|
|
27
28
|
uri = URI.parse(url)
|
|
29
|
+
raise Error, "Pricing snapshot URL must use https" unless uri.scheme == "https"
|
|
30
|
+
|
|
28
31
|
request = Net::HTTP::Get.new(uri)
|
|
29
32
|
request["User-Agent"] = USER_AGENT
|
|
30
33
|
request["If-None-Match"] = etag if etag
|
|
31
34
|
|
|
32
|
-
response =
|
|
33
|
-
uri.host,
|
|
34
|
-
uri.port,
|
|
35
|
-
use_ssl: uri.scheme == "https",
|
|
36
|
-
open_timeout: OPEN_TIMEOUT,
|
|
37
|
-
read_timeout: READ_TIMEOUT,
|
|
38
|
-
write_timeout: WRITE_TIMEOUT
|
|
39
|
-
) do |http|
|
|
40
|
-
http.request(request)
|
|
41
|
-
end
|
|
35
|
+
response, body = fetch_response(uri, request)
|
|
42
36
|
|
|
43
37
|
case response
|
|
44
38
|
when Net::HTTPSuccess
|
|
45
|
-
build_response(response, not_modified: false)
|
|
39
|
+
build_response(response, body: body || limited_body(response), not_modified: false)
|
|
46
40
|
when Net::HTTPNotModified
|
|
47
41
|
build_response(response, body: nil, not_modified: true)
|
|
48
42
|
when Net::HTTPRedirection
|
|
@@ -59,6 +53,43 @@ module LlmCostTracker
|
|
|
59
53
|
|
|
60
54
|
private
|
|
61
55
|
|
|
56
|
+
def fetch_response(uri, request)
|
|
57
|
+
body = nil
|
|
58
|
+
response = Net::HTTP.start(
|
|
59
|
+
uri.host,
|
|
60
|
+
uri.port,
|
|
61
|
+
use_ssl: uri.scheme == "https",
|
|
62
|
+
open_timeout: OPEN_TIMEOUT,
|
|
63
|
+
read_timeout: READ_TIMEOUT,
|
|
64
|
+
write_timeout: WRITE_TIMEOUT
|
|
65
|
+
) do |http|
|
|
66
|
+
http.request(request) do |streamed_response|
|
|
67
|
+
body = limited_body(streamed_response) if streamed_response.is_a?(Net::HTTPSuccess)
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
[response, body]
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def limited_body(response)
|
|
75
|
+
body = +""
|
|
76
|
+
if response.respond_to?(:read_body)
|
|
77
|
+
response.read_body do |chunk|
|
|
78
|
+
chunk = chunk.to_s
|
|
79
|
+
if body.bytesize + chunk.bytesize > MAX_BODY_BYTES
|
|
80
|
+
raise Error, "Pricing snapshot response exceeds #{MAX_BODY_BYTES} bytes"
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
body << chunk
|
|
84
|
+
end
|
|
85
|
+
else
|
|
86
|
+
body = response.body.to_s
|
|
87
|
+
end
|
|
88
|
+
raise Error, "Pricing snapshot response exceeds #{MAX_BODY_BYTES} bytes" if body.bytesize > MAX_BODY_BYTES
|
|
89
|
+
|
|
90
|
+
body
|
|
91
|
+
end
|
|
92
|
+
|
|
62
93
|
def build_response(response, not_modified:, body: response.body)
|
|
63
94
|
Response.new(
|
|
64
95
|
body: body,
|