llm_cost_tracker 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/README.md +11 -7
- data/app/controllers/llm_cost_tracker/calls_controller.rb +2 -1
- data/app/controllers/llm_cost_tracker/dashboard_controller.rb +3 -15
- data/app/controllers/llm_cost_tracker/tags_controller.rb +7 -6
- data/app/helpers/llm_cost_tracker/application_helper.rb +21 -6
- data/app/helpers/llm_cost_tracker/dashboard_filter_options_helper.rb +3 -1
- data/app/services/llm_cost_tracker/dashboard/date_range.rb +42 -0
- data/app/services/llm_cost_tracker/dashboard/filter.rb +6 -8
- data/app/services/llm_cost_tracker/dashboard/spend_anomaly.rb +6 -5
- data/app/services/llm_cost_tracker/dashboard/tag_breakdown.rb +74 -18
- data/app/services/llm_cost_tracker/dashboard/tag_key_explorer.rb +15 -4
- data/app/views/llm_cost_tracker/shared/_tag_chips.html.erb +1 -1
- data/app/views/llm_cost_tracker/tags/show.html.erb +4 -0
- data/lib/llm_cost_tracker/configuration.rb +22 -16
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +1 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +7 -1
- data/lib/llm_cost_tracker/integrations/anthropic.rb +12 -3
- data/lib/llm_cost_tracker/integrations/base.rb +77 -6
- data/lib/llm_cost_tracker/integrations/object_reader.rb +1 -1
- data/lib/llm_cost_tracker/integrations/openai.rb +14 -5
- data/lib/llm_cost_tracker/integrations/registry.rb +3 -1
- data/lib/llm_cost_tracker/integrations/ruby_llm.rb +171 -0
- data/lib/llm_cost_tracker/llm_api_call.rb +10 -9
- data/lib/llm_cost_tracker/middleware/faraday.rb +8 -4
- data/lib/llm_cost_tracker/parsers/gemini.rb +8 -1
- data/lib/llm_cost_tracker/parsers/openai_usage.rb +11 -2
- data/lib/llm_cost_tracker/price_registry.rb +3 -0
- data/lib/llm_cost_tracker/price_sync/fetcher.rb +41 -12
- data/lib/llm_cost_tracker/price_sync/registry_loader.rb +6 -0
- data/lib/llm_cost_tracker/report.rb +8 -1
- data/lib/llm_cost_tracker/report_data.rb +25 -9
- data/lib/llm_cost_tracker/retention.rb +30 -7
- data/lib/llm_cost_tracker/stream_capture.rb +7 -0
- data/lib/llm_cost_tracker/stream_collector.rb +25 -1
- data/lib/llm_cost_tracker/tag_sanitizer.rb +81 -0
- data/lib/llm_cost_tracker/tracker.rb +6 -2
- data/lib/llm_cost_tracker/version.rb +1 -1
- data/lib/llm_cost_tracker.rb +1 -0
- metadata +9 -5
|
@@ -11,10 +11,16 @@ LlmCostTracker.configure do |config|
|
|
|
11
11
|
# Tags are merged into every event. Use a callable for request/job-time context.
|
|
12
12
|
config.default_tags = -> { { environment: Rails.env } }
|
|
13
13
|
|
|
14
|
+
# Tag guardrails keep accidental high-cardinality or sensitive values out of the ledger.
|
|
15
|
+
# config.max_tag_count = 50
|
|
16
|
+
# config.max_tag_value_bytesize = 1024
|
|
17
|
+
# config.redacted_tag_keys = %w[api_key access_token authorization credential password refresh_token secret]
|
|
18
|
+
|
|
14
19
|
# Optional SDK integrations. Provider SDK gems are not installed by LLM Cost Tracker.
|
|
15
|
-
#
|
|
20
|
+
# Enabled integrations are checked at boot, so enable only clients your app loads.
|
|
16
21
|
# config.instrument :openai
|
|
17
22
|
# config.instrument :anthropic
|
|
23
|
+
# config.instrument :ruby_llm
|
|
18
24
|
|
|
19
25
|
# Budget behavior: :notify calls on_budget_exceeded, :raise raises after recording,
|
|
20
26
|
# :block_requests preflights monthly/daily budgets before supported requests.
|
|
@@ -10,10 +10,19 @@ module LlmCostTracker
|
|
|
10
10
|
class << self
|
|
11
11
|
def integration_name = :anthropic
|
|
12
12
|
|
|
13
|
-
def
|
|
13
|
+
def minimum_version = "1.36.0"
|
|
14
|
+
|
|
15
|
+
def version_constant = "Anthropic::VERSION"
|
|
16
|
+
|
|
17
|
+
def patch_targets
|
|
14
18
|
[
|
|
15
|
-
|
|
16
|
-
|
|
19
|
+
patch_target("Anthropic::Resources::Messages", with: MessagesPatch, methods: :create),
|
|
20
|
+
patch_target(
|
|
21
|
+
"Anthropic::Resources::Beta::Messages",
|
|
22
|
+
with: MessagesPatch,
|
|
23
|
+
methods: :create,
|
|
24
|
+
optional: true
|
|
25
|
+
)
|
|
17
26
|
]
|
|
18
27
|
end
|
|
19
28
|
|
|
@@ -6,6 +6,7 @@ require_relative "object_reader"
|
|
|
6
6
|
module LlmCostTracker
|
|
7
7
|
module Integrations
|
|
8
8
|
module Base
|
|
9
|
+
PatchTarget = Data.define(:constant_name, :patch, :method_names, :optional)
|
|
9
10
|
Result = Data.define(:name, :status, :message)
|
|
10
11
|
|
|
11
12
|
def active?
|
|
@@ -13,15 +14,23 @@ module LlmCostTracker
|
|
|
13
14
|
end
|
|
14
15
|
|
|
15
16
|
def install
|
|
16
|
-
|
|
17
|
+
validate_contract!
|
|
18
|
+
patch_targets.each do |target|
|
|
19
|
+
target_class = constant(target.constant_name)
|
|
20
|
+
install_patch(target_class, target.patch) if target_class
|
|
21
|
+
end
|
|
17
22
|
end
|
|
18
23
|
|
|
19
24
|
def status
|
|
20
25
|
name = integration_name
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
26
|
+
problems = contract_problems
|
|
27
|
+
if problems.any?
|
|
28
|
+
return Result.new(name, :warn, "#{name} integration cannot be installed: #{problems.join('; ')}")
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
required_targets = patch_targets.reject(&:optional)
|
|
32
|
+
installed = required_targets.count { |target| patch_installed?(constant(target.constant_name), target.patch) }
|
|
33
|
+
return Result.new(name, :ok, "#{name} integration installed") if installed == required_targets.count
|
|
25
34
|
|
|
26
35
|
Result.new(name, :warn, "#{name} integration is enabled but not installed")
|
|
27
36
|
end
|
|
@@ -55,10 +64,72 @@ module LlmCostTracker
|
|
|
55
64
|
end
|
|
56
65
|
end
|
|
57
66
|
|
|
67
|
+
def minimum_version = nil
|
|
68
|
+
|
|
69
|
+
def version_constant = nil
|
|
70
|
+
|
|
71
|
+
def patch_targets = []
|
|
72
|
+
|
|
73
|
+
def patch_target(constant_name, with:, methods:, optional: false)
|
|
74
|
+
PatchTarget.new(constant_name, with, Array(methods), optional)
|
|
75
|
+
end
|
|
76
|
+
|
|
58
77
|
private
|
|
59
78
|
|
|
79
|
+
def validate_contract!
|
|
80
|
+
problems = contract_problems
|
|
81
|
+
return if problems.empty?
|
|
82
|
+
|
|
83
|
+
raise Error, "#{integration_name} integration cannot be installed: #{problems.join('; ')}"
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def contract_problems
|
|
87
|
+
version_problems + target_problems
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def version_problems
|
|
91
|
+
return [] unless minimum_version
|
|
92
|
+
|
|
93
|
+
name = integration_name.to_s
|
|
94
|
+
version = installed_version
|
|
95
|
+
return ["#{name} >= #{minimum_version} is required, but #{name} is not loaded"] unless version
|
|
96
|
+
return [] if version >= Gem::Version.new(minimum_version)
|
|
97
|
+
|
|
98
|
+
["#{name} >= #{minimum_version} is required, detected #{version}"]
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def installed_version
|
|
102
|
+
Gem.loaded_specs[integration_name.to_s]&.version || constant_version
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def constant_version
|
|
106
|
+
return nil unless version_constant
|
|
107
|
+
|
|
108
|
+
value = constant(version_constant)
|
|
109
|
+
value ? Gem::Version.new(value.to_s) : nil
|
|
110
|
+
rescue ArgumentError
|
|
111
|
+
nil
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def target_problems
|
|
115
|
+
patch_targets.flat_map do |target|
|
|
116
|
+
target_class = constant(target.constant_name)
|
|
117
|
+
next [] if target_class.nil? && target.optional
|
|
118
|
+
next ["#{target.constant_name} is not loaded"] unless target_class
|
|
119
|
+
|
|
120
|
+
missing_methods(target_class, target)
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def missing_methods(target_class, target)
|
|
125
|
+
target.method_names.filter_map do |method_name|
|
|
126
|
+
next if target_class.method_defined?(method_name) || target_class.private_method_defined?(method_name)
|
|
127
|
+
|
|
128
|
+
"#{target.constant_name}##{method_name} is not available"
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
60
132
|
def install_patch(target, patch)
|
|
61
|
-
return unless target
|
|
62
133
|
return if patch_installed?(target, patch)
|
|
63
134
|
|
|
64
135
|
target.prepend(patch)
|
|
@@ -10,10 +10,14 @@ module LlmCostTracker
|
|
|
10
10
|
class << self
|
|
11
11
|
def integration_name = :openai
|
|
12
12
|
|
|
13
|
-
def
|
|
13
|
+
def minimum_version = "0.59.0"
|
|
14
|
+
|
|
15
|
+
def version_constant = "OpenAI::VERSION"
|
|
16
|
+
|
|
17
|
+
def patch_targets
|
|
14
18
|
[
|
|
15
|
-
|
|
16
|
-
|
|
19
|
+
patch_target("OpenAI::Resources::Responses", with: ResponsesPatch, methods: :create),
|
|
20
|
+
patch_target("OpenAI::Resources::Chat::Completions", with: ChatCompletionsPatch, methods: :create)
|
|
17
21
|
]
|
|
18
22
|
end
|
|
19
23
|
|
|
@@ -28,15 +32,16 @@ module LlmCostTracker
|
|
|
28
32
|
output_tokens = ObjectReader.first(usage, :output_tokens, :completion_tokens)
|
|
29
33
|
next if input_tokens.nil? && output_tokens.nil?
|
|
30
34
|
|
|
35
|
+
metadata = usage_metadata(usage)
|
|
31
36
|
LlmCostTracker::Tracker.record(
|
|
32
37
|
provider: "openai",
|
|
33
38
|
model: ObjectReader.first(response, :model) || request[:model],
|
|
34
|
-
input_tokens:
|
|
39
|
+
input_tokens: regular_input_tokens(input_tokens, metadata[:cache_read_input_tokens]),
|
|
35
40
|
output_tokens: ObjectReader.integer(output_tokens),
|
|
36
41
|
latency_ms: latency_ms,
|
|
37
42
|
usage_source: :sdk_response,
|
|
38
43
|
provider_response_id: ObjectReader.first(response, :id),
|
|
39
|
-
metadata:
|
|
44
|
+
metadata: metadata
|
|
40
45
|
)
|
|
41
46
|
end
|
|
42
47
|
end
|
|
@@ -61,6 +66,10 @@ module LlmCostTracker
|
|
|
61
66
|
ObjectReader.nested(usage, :completion_tokens_details, :reasoning_tokens)
|
|
62
67
|
)
|
|
63
68
|
end
|
|
69
|
+
|
|
70
|
+
def regular_input_tokens(input_tokens, cache_read)
|
|
71
|
+
[ObjectReader.integer(input_tokens) - cache_read.to_i, 0].max
|
|
72
|
+
end
|
|
64
73
|
end
|
|
65
74
|
|
|
66
75
|
module ResponsesPatch
|
|
@@ -2,13 +2,15 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "openai"
|
|
4
4
|
require_relative "anthropic"
|
|
5
|
+
require_relative "ruby_llm"
|
|
5
6
|
|
|
6
7
|
module LlmCostTracker
|
|
7
8
|
module Integrations
|
|
8
9
|
module Registry
|
|
9
10
|
INTEGRATIONS = {
|
|
10
11
|
openai: Openai,
|
|
11
|
-
anthropic: Anthropic
|
|
12
|
+
anthropic: Anthropic,
|
|
13
|
+
ruby_llm: RubyLlm
|
|
12
14
|
}.freeze
|
|
13
15
|
|
|
14
16
|
module_function
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "base"
|
|
4
|
+
|
|
5
|
+
module LlmCostTracker
|
|
6
|
+
module Integrations
|
|
7
|
+
module RubyLlm
|
|
8
|
+
extend Base
|
|
9
|
+
|
|
10
|
+
class << self
|
|
11
|
+
def integration_name = :ruby_llm
|
|
12
|
+
|
|
13
|
+
def minimum_version = "1.14.1"
|
|
14
|
+
|
|
15
|
+
def version_constant = "RubyLLM::VERSION"
|
|
16
|
+
|
|
17
|
+
def patch_targets
|
|
18
|
+
[
|
|
19
|
+
patch_target(
|
|
20
|
+
"RubyLLM::Provider",
|
|
21
|
+
with: ProviderPatch,
|
|
22
|
+
methods: %i[slug complete embed transcribe]
|
|
23
|
+
)
|
|
24
|
+
]
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def record_completion(provider, response, request:, latency_ms:, stream:)
|
|
28
|
+
record_usage(
|
|
29
|
+
provider: provider_slug(provider),
|
|
30
|
+
model: response_model_id(response) || model_id(request[:model]),
|
|
31
|
+
response: response,
|
|
32
|
+
latency_ms: latency_ms,
|
|
33
|
+
stream: stream
|
|
34
|
+
)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def streaming_request?(request, has_block:)
|
|
38
|
+
has_block || request[:stream] == true
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def record_embedding(provider, response, request:, latency_ms:)
|
|
42
|
+
record_usage(
|
|
43
|
+
provider: provider_slug(provider),
|
|
44
|
+
model: response_model_id(response) || model_id(request[:model]),
|
|
45
|
+
response: response,
|
|
46
|
+
latency_ms: latency_ms,
|
|
47
|
+
stream: false,
|
|
48
|
+
output_tokens: 0
|
|
49
|
+
)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def record_transcription(provider, response, request:, latency_ms:)
|
|
53
|
+
record_usage(
|
|
54
|
+
provider: provider_slug(provider),
|
|
55
|
+
model: response_model_id(response) || model_id(request[:model]),
|
|
56
|
+
response: response,
|
|
57
|
+
latency_ms: latency_ms,
|
|
58
|
+
stream: false
|
|
59
|
+
)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def record_usage(provider:, model:, response:, latency_ms:, stream:, output_tokens: nil)
|
|
63
|
+
return unless active?
|
|
64
|
+
|
|
65
|
+
record_safely do
|
|
66
|
+
input_tokens = ObjectReader.first(response, :input_tokens)
|
|
67
|
+
output_tokens = ObjectReader.first(response, :output_tokens) if output_tokens.nil?
|
|
68
|
+
next if input_tokens.nil? && output_tokens.nil?
|
|
69
|
+
|
|
70
|
+
cache_read = ObjectReader.integer(ObjectReader.first(response, :cached_tokens))
|
|
71
|
+
|
|
72
|
+
LlmCostTracker::Tracker.record(
|
|
73
|
+
provider: provider,
|
|
74
|
+
model: model,
|
|
75
|
+
input_tokens: regular_input_tokens(input_tokens, cache_read),
|
|
76
|
+
output_tokens: ObjectReader.integer(output_tokens),
|
|
77
|
+
latency_ms: latency_ms,
|
|
78
|
+
stream: stream,
|
|
79
|
+
usage_source: :ruby_llm,
|
|
80
|
+
provider_response_id: provider_response_id(response),
|
|
81
|
+
metadata: usage_metadata(response, cache_read)
|
|
82
|
+
)
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def usage_metadata(response, cache_read)
|
|
87
|
+
{
|
|
88
|
+
cache_read_input_tokens: cache_read,
|
|
89
|
+
cache_write_input_tokens: ObjectReader.integer(ObjectReader.first(response, :cache_creation_tokens)),
|
|
90
|
+
hidden_output_tokens: ObjectReader.integer(
|
|
91
|
+
ObjectReader.first(response, :thinking_tokens, :reasoning_tokens)
|
|
92
|
+
)
|
|
93
|
+
}
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def regular_input_tokens(input_tokens, cache_read)
|
|
97
|
+
[ObjectReader.integer(input_tokens) - cache_read.to_i, 0].max
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def provider_slug(provider)
|
|
101
|
+
ObjectReader.first(provider, :slug).to_s
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def model_id(object)
|
|
105
|
+
return nil if object.nil?
|
|
106
|
+
|
|
107
|
+
value = ObjectReader.first(object, :id, :model_id, :model)
|
|
108
|
+
value ||= object if object.is_a?(String) || object.is_a?(Symbol)
|
|
109
|
+
value&.to_s
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def response_model_id(object)
|
|
113
|
+
value = ObjectReader.first(object, :model_id, :model)
|
|
114
|
+
value&.to_s
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def provider_response_id(response)
|
|
118
|
+
ObjectReader.first(response, :id, :provider_response_id) || ObjectReader.nested(response, :raw, :id)
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
module ProviderPatch
|
|
123
|
+
def complete(*args, **kwargs, &)
|
|
124
|
+
integration = LlmCostTracker::Integrations::RubyLlm
|
|
125
|
+
request = integration.request_params(args, kwargs)
|
|
126
|
+
started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
127
|
+
integration.enforce_budget!
|
|
128
|
+
response = super
|
|
129
|
+
integration.record_completion(
|
|
130
|
+
self,
|
|
131
|
+
response,
|
|
132
|
+
request: request,
|
|
133
|
+
latency_ms: integration.elapsed_ms(started_at),
|
|
134
|
+
stream: integration.streaming_request?(request, has_block: block_given?)
|
|
135
|
+
)
|
|
136
|
+
response
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def embed(*args, **kwargs)
|
|
140
|
+
integration = LlmCostTracker::Integrations::RubyLlm
|
|
141
|
+
request = integration.request_params(args, kwargs)
|
|
142
|
+
started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
143
|
+
integration.enforce_budget!
|
|
144
|
+
response = super
|
|
145
|
+
integration.record_embedding(
|
|
146
|
+
self,
|
|
147
|
+
response,
|
|
148
|
+
request: request,
|
|
149
|
+
latency_ms: integration.elapsed_ms(started_at)
|
|
150
|
+
)
|
|
151
|
+
response
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def transcribe(*args, **kwargs)
|
|
155
|
+
integration = LlmCostTracker::Integrations::RubyLlm
|
|
156
|
+
request = integration.request_params(args, kwargs)
|
|
157
|
+
started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
158
|
+
integration.enforce_budget!
|
|
159
|
+
response = super
|
|
160
|
+
integration.record_transcription(
|
|
161
|
+
self,
|
|
162
|
+
response,
|
|
163
|
+
request: request,
|
|
164
|
+
latency_ms: integration.elapsed_ms(started_at)
|
|
165
|
+
)
|
|
166
|
+
response
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
end
|
|
@@ -73,12 +73,15 @@ module LlmCostTracker
|
|
|
73
73
|
end
|
|
74
74
|
|
|
75
75
|
def self.group_by_tag(key)
|
|
76
|
-
group(Arel.sql(
|
|
76
|
+
group(Arel.sql(tag_value_expression(key)))
|
|
77
77
|
end
|
|
78
78
|
|
|
79
|
-
def self.cost_by_tag(key)
|
|
80
|
-
|
|
81
|
-
|
|
79
|
+
def self.cost_by_tag(key, limit: nil)
|
|
80
|
+
relation = group_by_tag(key).order(Arel.sql("COALESCE(SUM(total_cost), 0) DESC"))
|
|
81
|
+
relation = relation.limit(limit) if limit
|
|
82
|
+
|
|
83
|
+
costs = relation.sum(:total_cost).each_with_object(Hash.new(0.0)) do |(tag_value, cost), grouped|
|
|
84
|
+
grouped[tag_value_label(tag_value)] += cost.to_f
|
|
82
85
|
end
|
|
83
86
|
costs.sort_by { |_label, cost| -cost }.to_h
|
|
84
87
|
end
|
|
@@ -101,14 +104,13 @@ module LlmCostTracker
|
|
|
101
104
|
group(:provider).average(:latency_ms).transform_values(&:to_f)
|
|
102
105
|
end
|
|
103
106
|
|
|
104
|
-
def self.
|
|
107
|
+
def self.tag_value_label(value)
|
|
105
108
|
value.nil? || value == "" ? "(untagged)" : value.to_s
|
|
106
109
|
end
|
|
107
|
-
private_class_method :tag_label
|
|
108
110
|
|
|
109
|
-
def self.
|
|
111
|
+
def self.tag_value_expression(key, table_name: quoted_table_name)
|
|
110
112
|
key = validated_tag_key(key)
|
|
111
|
-
column = "#{
|
|
113
|
+
column = "#{table_name}.#{connection.quote_column_name('tags')}"
|
|
112
114
|
|
|
113
115
|
case connection.adapter_name
|
|
114
116
|
when /postgres/i
|
|
@@ -120,7 +122,6 @@ module LlmCostTracker
|
|
|
120
122
|
"json_extract(#{column}, #{connection.quote(json_path(key))})"
|
|
121
123
|
end
|
|
122
124
|
end
|
|
123
|
-
private_class_method :tag_group_expression
|
|
124
125
|
|
|
125
126
|
def self.validated_tag_key(key)
|
|
126
127
|
TagKey.validate!(key)
|
|
@@ -5,12 +5,11 @@ require "json"
|
|
|
5
5
|
|
|
6
6
|
require_relative "../logging"
|
|
7
7
|
require_relative "../request_url"
|
|
8
|
+
require_relative "../stream_capture"
|
|
8
9
|
|
|
9
10
|
module LlmCostTracker
|
|
10
11
|
module Middleware
|
|
11
12
|
class Faraday < ::Faraday::Middleware
|
|
12
|
-
STREAM_CAPTURE_LIMIT_BYTES = 1_048_576
|
|
13
|
-
|
|
14
13
|
def initialize(app, **options)
|
|
15
14
|
super(app)
|
|
16
15
|
@tags = options.fetch(:tags, {})
|
|
@@ -88,6 +87,11 @@ module LlmCostTracker
|
|
|
88
87
|
end
|
|
89
88
|
|
|
90
89
|
def parse_stream(parser, request_url, request_body, response_env, stream_buffer)
|
|
90
|
+
if stream_buffer&.dig(:overflowed)
|
|
91
|
+
Logging.warn(capture_warning(request_url, stream_buffer))
|
|
92
|
+
return parser.parse_stream(request_url, request_body, response_env.status, [])
|
|
93
|
+
end
|
|
94
|
+
|
|
91
95
|
body = stream_buffer&.dig(:buffer)&.string
|
|
92
96
|
body = read_body(response_env.body) if body.nil? || body.empty?
|
|
93
97
|
|
|
@@ -110,7 +114,7 @@ module LlmCostTracker
|
|
|
110
114
|
request_env.request.on_data = proc do |chunk, size, env|
|
|
111
115
|
chunk = chunk.to_s
|
|
112
116
|
unless state[:overflowed]
|
|
113
|
-
if state[:bytes] + chunk.bytesize <=
|
|
117
|
+
if state[:bytes] + chunk.bytesize <= StreamCapture::LIMIT_BYTES
|
|
114
118
|
state[:buffer] << chunk
|
|
115
119
|
state[:bytes] += chunk.bytesize
|
|
116
120
|
else
|
|
@@ -161,7 +165,7 @@ module LlmCostTracker
|
|
|
161
165
|
"recording usage_source=unknown. Use LlmCostTracker.track_stream for manual capture."
|
|
162
166
|
end
|
|
163
167
|
|
|
164
|
-
"Streaming response for #{RequestUrl.label(request_url)} exceeded #{
|
|
168
|
+
"Streaming response for #{RequestUrl.label(request_url)} exceeded #{StreamCapture::LIMIT_BYTES} bytes; " \
|
|
165
169
|
"recording usage_source=unknown. Use LlmCostTracker.track_stream for manual capture."
|
|
166
170
|
end
|
|
167
171
|
end
|
|
@@ -72,7 +72,7 @@ module LlmCostTracker
|
|
|
72
72
|
model: extract_model_from_url(request_url),
|
|
73
73
|
input_tokens: [usage["promptTokenCount"].to_i - cache_read, 0].max,
|
|
74
74
|
output_tokens: output_tokens(usage),
|
|
75
|
-
total_tokens: usage
|
|
75
|
+
total_tokens: total_tokens(usage, cache_read),
|
|
76
76
|
cache_read_input_tokens: usage["cachedContentTokenCount"],
|
|
77
77
|
hidden_output_tokens: usage["thoughtsTokenCount"],
|
|
78
78
|
stream: stream,
|
|
@@ -92,6 +92,13 @@ module LlmCostTracker
|
|
|
92
92
|
usage["candidatesTokenCount"].to_i + usage["thoughtsTokenCount"].to_i
|
|
93
93
|
end
|
|
94
94
|
|
|
95
|
+
def total_tokens(usage, cache_read)
|
|
96
|
+
total = usage["totalTokenCount"]
|
|
97
|
+
return total.to_i unless total.nil?
|
|
98
|
+
|
|
99
|
+
[usage["promptTokenCount"].to_i - cache_read, 0].max + cache_read + output_tokens(usage)
|
|
100
|
+
end
|
|
101
|
+
|
|
95
102
|
def stream_response_id(events)
|
|
96
103
|
find_event_value(events) { |data| data["responseId"] }
|
|
97
104
|
end
|
|
@@ -21,7 +21,7 @@ module LlmCostTracker
|
|
|
21
21
|
model: response["model"] || request["model"],
|
|
22
22
|
input_tokens: regular_input_tokens(usage, cache_read),
|
|
23
23
|
output_tokens: (usage["completion_tokens"] || usage["output_tokens"]).to_i,
|
|
24
|
-
total_tokens: usage
|
|
24
|
+
total_tokens: total_tokens(usage, cache_read),
|
|
25
25
|
cache_read_input_tokens: cache_read,
|
|
26
26
|
hidden_output_tokens: hidden_output_tokens(usage),
|
|
27
27
|
usage_source: :response
|
|
@@ -44,7 +44,7 @@ module LlmCostTracker
|
|
|
44
44
|
model: model,
|
|
45
45
|
input_tokens: regular_input_tokens(usage, cache_read),
|
|
46
46
|
output_tokens: (usage["completion_tokens"] || usage["output_tokens"]).to_i,
|
|
47
|
-
total_tokens: usage
|
|
47
|
+
total_tokens: total_tokens(usage, cache_read),
|
|
48
48
|
cache_read_input_tokens: cache_read,
|
|
49
49
|
hidden_output_tokens: hidden_output_tokens(usage),
|
|
50
50
|
stream: true,
|
|
@@ -87,6 +87,15 @@ module LlmCostTracker
|
|
|
87
87
|
details = usage["completion_tokens_details"] || usage["output_tokens_details"] || {}
|
|
88
88
|
details["reasoning_tokens"]
|
|
89
89
|
end
|
|
90
|
+
|
|
91
|
+
def total_tokens(usage, cache_read)
|
|
92
|
+
total = usage["total_tokens"]
|
|
93
|
+
return total.to_i unless total.nil?
|
|
94
|
+
|
|
95
|
+
regular_input_tokens(usage, cache_read) +
|
|
96
|
+
cache_read.to_i +
|
|
97
|
+
(usage["completion_tokens"] || usage["output_tokens"]).to_i
|
|
98
|
+
end
|
|
90
99
|
end
|
|
91
100
|
end
|
|
92
101
|
end
|
|
@@ -12,6 +12,7 @@ module LlmCostTracker
|
|
|
12
12
|
EMPTY_PRICES = {}.freeze
|
|
13
13
|
PRICE_KEYS = %w[input output cache_read_input cache_write_input].freeze
|
|
14
14
|
METADATA_KEYS = %w[_source _source_version _fetched_at _updated _notes _validator_override].freeze
|
|
15
|
+
MAX_FILE_BYTES = 2_097_152
|
|
15
16
|
MUTEX = Monitor.new
|
|
16
17
|
|
|
17
18
|
class << self
|
|
@@ -114,6 +115,8 @@ module LlmCostTracker
|
|
|
114
115
|
end
|
|
115
116
|
|
|
116
117
|
def load_price_file(path)
|
|
118
|
+
raise ArgumentError, "prices_file exceeds #{MAX_FILE_BYTES} bytes" if File.size(path) > MAX_FILE_BYTES
|
|
119
|
+
|
|
117
120
|
contents = File.read(path)
|
|
118
121
|
return YAML.safe_load(contents, aliases: false) || {} if yaml_file?(path)
|
|
119
122
|
|
|
@@ -17,6 +17,7 @@ module LlmCostTracker
|
|
|
17
17
|
|
|
18
18
|
USER_AGENT = "llm_cost_tracker price refresh"
|
|
19
19
|
MAX_REDIRECTS = 5
|
|
20
|
+
MAX_BODY_BYTES = 2_097_152
|
|
20
21
|
OPEN_TIMEOUT = 5
|
|
21
22
|
READ_TIMEOUT = 10
|
|
22
23
|
WRITE_TIMEOUT = 10
|
|
@@ -25,26 +26,17 @@ module LlmCostTracker
|
|
|
25
26
|
raise Error, "Too many redirects while fetching #{url}" if redirects > MAX_REDIRECTS
|
|
26
27
|
|
|
27
28
|
uri = URI.parse(url)
|
|
28
|
-
raise Error, "Pricing snapshot URL must use
|
|
29
|
+
raise Error, "Pricing snapshot URL must use https" unless uri.scheme == "https"
|
|
29
30
|
|
|
30
31
|
request = Net::HTTP::Get.new(uri)
|
|
31
32
|
request["User-Agent"] = USER_AGENT
|
|
32
33
|
request["If-None-Match"] = etag if etag
|
|
33
34
|
|
|
34
|
-
response =
|
|
35
|
-
uri.host,
|
|
36
|
-
uri.port,
|
|
37
|
-
use_ssl: uri.scheme == "https",
|
|
38
|
-
open_timeout: OPEN_TIMEOUT,
|
|
39
|
-
read_timeout: READ_TIMEOUT,
|
|
40
|
-
write_timeout: WRITE_TIMEOUT
|
|
41
|
-
) do |http|
|
|
42
|
-
http.request(request)
|
|
43
|
-
end
|
|
35
|
+
response, body = fetch_response(uri, request)
|
|
44
36
|
|
|
45
37
|
case response
|
|
46
38
|
when Net::HTTPSuccess
|
|
47
|
-
build_response(response, not_modified: false)
|
|
39
|
+
build_response(response, body: body || limited_body(response), not_modified: false)
|
|
48
40
|
when Net::HTTPNotModified
|
|
49
41
|
build_response(response, body: nil, not_modified: true)
|
|
50
42
|
when Net::HTTPRedirection
|
|
@@ -61,6 +53,43 @@ module LlmCostTracker
|
|
|
61
53
|
|
|
62
54
|
private
|
|
63
55
|
|
|
56
|
+
def fetch_response(uri, request)
|
|
57
|
+
body = nil
|
|
58
|
+
response = Net::HTTP.start(
|
|
59
|
+
uri.host,
|
|
60
|
+
uri.port,
|
|
61
|
+
use_ssl: uri.scheme == "https",
|
|
62
|
+
open_timeout: OPEN_TIMEOUT,
|
|
63
|
+
read_timeout: READ_TIMEOUT,
|
|
64
|
+
write_timeout: WRITE_TIMEOUT
|
|
65
|
+
) do |http|
|
|
66
|
+
http.request(request) do |streamed_response|
|
|
67
|
+
body = limited_body(streamed_response) if streamed_response.is_a?(Net::HTTPSuccess)
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
[response, body]
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def limited_body(response)
|
|
75
|
+
body = +""
|
|
76
|
+
if response.respond_to?(:read_body)
|
|
77
|
+
response.read_body do |chunk|
|
|
78
|
+
chunk = chunk.to_s
|
|
79
|
+
if body.bytesize + chunk.bytesize > MAX_BODY_BYTES
|
|
80
|
+
raise Error, "Pricing snapshot response exceeds #{MAX_BODY_BYTES} bytes"
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
body << chunk
|
|
84
|
+
end
|
|
85
|
+
else
|
|
86
|
+
body = response.body.to_s
|
|
87
|
+
end
|
|
88
|
+
raise Error, "Pricing snapshot response exceeds #{MAX_BODY_BYTES} bytes" if body.bytesize > MAX_BODY_BYTES
|
|
89
|
+
|
|
90
|
+
body
|
|
91
|
+
end
|
|
92
|
+
|
|
64
93
|
def build_response(response, not_modified:, body: response.body)
|
|
65
94
|
Response.new(
|
|
66
95
|
body: body,
|
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
require "json"
|
|
4
4
|
require "yaml"
|
|
5
5
|
|
|
6
|
+
require_relative "../price_registry"
|
|
7
|
+
|
|
6
8
|
module LlmCostTracker
|
|
7
9
|
module PriceSync
|
|
8
10
|
class RegistryLoader
|
|
@@ -18,6 +20,10 @@ module LlmCostTracker
|
|
|
18
20
|
private
|
|
19
21
|
|
|
20
22
|
def load_registry_file(path)
|
|
23
|
+
if File.size(path) > PriceRegistry::MAX_FILE_BYTES
|
|
24
|
+
raise ArgumentError, "pricing registry exceeds #{PriceRegistry::MAX_FILE_BYTES} bytes"
|
|
25
|
+
end
|
|
26
|
+
|
|
21
27
|
contents = File.read(path)
|
|
22
28
|
registry = yaml_file?(path) ? (YAML.safe_load(contents, aliases: false) || {}) : JSON.parse(contents)
|
|
23
29
|
raise ArgumentError, "pricing registry must be a hash" unless registry.is_a?(Hash)
|
|
@@ -9,7 +9,14 @@ module LlmCostTracker
|
|
|
9
9
|
|
|
10
10
|
class << self
|
|
11
11
|
def generate(days: DEFAULT_DAYS, now: Time.now.utc, tag_breakdowns: nil)
|
|
12
|
-
|
|
12
|
+
report_data = ReportData.build(
|
|
13
|
+
days: days,
|
|
14
|
+
now: now,
|
|
15
|
+
tag_breakdowns: tag_breakdowns,
|
|
16
|
+
breakdown_limit: ReportFormatter::TOP_LIMIT
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
ReportFormatter.new(report_data).to_s
|
|
13
20
|
rescue LoadError => e
|
|
14
21
|
"Unable to build LLM cost report: ActiveRecord storage is unavailable (#{e.message})"
|
|
15
22
|
rescue StandardError => e
|