llm_cost_tracker 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/README.md +11 -7
  4. data/app/controllers/llm_cost_tracker/calls_controller.rb +2 -1
  5. data/app/controllers/llm_cost_tracker/dashboard_controller.rb +3 -15
  6. data/app/controllers/llm_cost_tracker/tags_controller.rb +7 -6
  7. data/app/helpers/llm_cost_tracker/application_helper.rb +21 -6
  8. data/app/helpers/llm_cost_tracker/dashboard_filter_options_helper.rb +3 -1
  9. data/app/services/llm_cost_tracker/dashboard/date_range.rb +42 -0
  10. data/app/services/llm_cost_tracker/dashboard/filter.rb +6 -8
  11. data/app/services/llm_cost_tracker/dashboard/spend_anomaly.rb +6 -5
  12. data/app/services/llm_cost_tracker/dashboard/tag_breakdown.rb +74 -18
  13. data/app/services/llm_cost_tracker/dashboard/tag_key_explorer.rb +15 -4
  14. data/app/views/llm_cost_tracker/shared/_tag_chips.html.erb +1 -1
  15. data/app/views/llm_cost_tracker/tags/show.html.erb +4 -0
  16. data/lib/llm_cost_tracker/configuration.rb +22 -16
  17. data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +1 -0
  18. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +7 -1
  19. data/lib/llm_cost_tracker/integrations/anthropic.rb +12 -3
  20. data/lib/llm_cost_tracker/integrations/base.rb +77 -6
  21. data/lib/llm_cost_tracker/integrations/object_reader.rb +1 -1
  22. data/lib/llm_cost_tracker/integrations/openai.rb +14 -5
  23. data/lib/llm_cost_tracker/integrations/registry.rb +3 -1
  24. data/lib/llm_cost_tracker/integrations/ruby_llm.rb +171 -0
  25. data/lib/llm_cost_tracker/llm_api_call.rb +10 -9
  26. data/lib/llm_cost_tracker/middleware/faraday.rb +8 -4
  27. data/lib/llm_cost_tracker/parsers/gemini.rb +8 -1
  28. data/lib/llm_cost_tracker/parsers/openai_usage.rb +11 -2
  29. data/lib/llm_cost_tracker/price_registry.rb +3 -0
  30. data/lib/llm_cost_tracker/price_sync/fetcher.rb +41 -12
  31. data/lib/llm_cost_tracker/price_sync/registry_loader.rb +6 -0
  32. data/lib/llm_cost_tracker/report.rb +8 -1
  33. data/lib/llm_cost_tracker/report_data.rb +25 -9
  34. data/lib/llm_cost_tracker/retention.rb +30 -7
  35. data/lib/llm_cost_tracker/stream_capture.rb +7 -0
  36. data/lib/llm_cost_tracker/stream_collector.rb +25 -1
  37. data/lib/llm_cost_tracker/tag_sanitizer.rb +81 -0
  38. data/lib/llm_cost_tracker/tracker.rb +6 -2
  39. data/lib/llm_cost_tracker/version.rb +1 -1
  40. data/lib/llm_cost_tracker.rb +1 -0
  41. metadata +9 -5
@@ -11,10 +11,16 @@ LlmCostTracker.configure do |config|
11
11
  # Tags are merged into every event. Use a callable for request/job-time context.
12
12
  config.default_tags = -> { { environment: Rails.env } }
13
13
 
14
+ # Tag guardrails keep accidental high-cardinality or sensitive values out of the ledger.
15
+ # config.max_tag_count = 50
16
+ # config.max_tag_value_bytesize = 1024
17
+ # config.redacted_tag_keys = %w[api_key access_token authorization credential password refresh_token secret]
18
+
14
19
  # Optional SDK integrations. Provider SDK gems are not installed by LLM Cost Tracker.
15
- # Enable only the SDKs your app already uses.
20
+ # Enabled integrations are checked at boot, so enable only clients your app loads.
16
21
  # config.instrument :openai
17
22
  # config.instrument :anthropic
23
+ # config.instrument :ruby_llm
18
24
 
19
25
  # Budget behavior: :notify calls on_budget_exceeded, :raise raises after recording,
20
26
  # :block_requests preflights monthly/daily budgets before supported requests.
@@ -10,10 +10,19 @@ module LlmCostTracker
10
10
  class << self
11
11
  def integration_name = :anthropic
12
12
 
13
- def target_patches
13
+ def minimum_version = "1.36.0"
14
+
15
+ def version_constant = "Anthropic::VERSION"
16
+
17
+ def patch_targets
14
18
  [
15
- [constant("Anthropic::Resources::Messages"), MessagesPatch],
16
- [constant("Anthropic::Resources::Beta::Messages"), MessagesPatch]
19
+ patch_target("Anthropic::Resources::Messages", with: MessagesPatch, methods: :create),
20
+ patch_target(
21
+ "Anthropic::Resources::Beta::Messages",
22
+ with: MessagesPatch,
23
+ methods: :create,
24
+ optional: true
25
+ )
17
26
  ]
18
27
  end
19
28
 
@@ -6,6 +6,7 @@ require_relative "object_reader"
6
6
  module LlmCostTracker
7
7
  module Integrations
8
8
  module Base
9
+ PatchTarget = Data.define(:constant_name, :patch, :method_names, :optional)
9
10
  Result = Data.define(:name, :status, :message)
10
11
 
11
12
  def active?
@@ -13,15 +14,23 @@ module LlmCostTracker
13
14
  end
14
15
 
15
16
  def install
16
- target_patches.each { |target, patch| install_patch(target, patch) }
17
+ validate_contract!
18
+ patch_targets.each do |target|
19
+ target_class = constant(target.constant_name)
20
+ install_patch(target_class, target.patch) if target_class
21
+ end
17
22
  end
18
23
 
19
24
  def status
20
25
  name = integration_name
21
- installed = target_patches.count { |target, patch| patch_installed?(target, patch) }
22
- available = target_patches.count { |target, _patch| target }
23
- return Result.new(name, :ok, "#{name} integration installed") if installed.positive?
24
- return Result.new(name, :warn, "#{name} SDK classes are not loaded") if available.zero?
26
+ problems = contract_problems
27
+ if problems.any?
28
+ return Result.new(name, :warn, "#{name} integration cannot be installed: #{problems.join('; ')}")
29
+ end
30
+
31
+ required_targets = patch_targets.reject(&:optional)
32
+ installed = required_targets.count { |target| patch_installed?(constant(target.constant_name), target.patch) }
33
+ return Result.new(name, :ok, "#{name} integration installed") if installed == required_targets.count
25
34
 
26
35
  Result.new(name, :warn, "#{name} integration is enabled but not installed")
27
36
  end
@@ -55,10 +64,72 @@ module LlmCostTracker
55
64
  end
56
65
  end
57
66
 
67
+ def minimum_version = nil
68
+
69
+ def version_constant = nil
70
+
71
+ def patch_targets = []
72
+
73
+ def patch_target(constant_name, with:, methods:, optional: false)
74
+ PatchTarget.new(constant_name, with, Array(methods), optional)
75
+ end
76
+
58
77
  private
59
78
 
79
+ def validate_contract!
80
+ problems = contract_problems
81
+ return if problems.empty?
82
+
83
+ raise Error, "#{integration_name} integration cannot be installed: #{problems.join('; ')}"
84
+ end
85
+
86
+ def contract_problems
87
+ version_problems + target_problems
88
+ end
89
+
90
+ def version_problems
91
+ return [] unless minimum_version
92
+
93
+ name = integration_name.to_s
94
+ version = installed_version
95
+ return ["#{name} >= #{minimum_version} is required, but #{name} is not loaded"] unless version
96
+ return [] if version >= Gem::Version.new(minimum_version)
97
+
98
+ ["#{name} >= #{minimum_version} is required, detected #{version}"]
99
+ end
100
+
101
+ def installed_version
102
+ Gem.loaded_specs[integration_name.to_s]&.version || constant_version
103
+ end
104
+
105
+ def constant_version
106
+ return nil unless version_constant
107
+
108
+ value = constant(version_constant)
109
+ value ? Gem::Version.new(value.to_s) : nil
110
+ rescue ArgumentError
111
+ nil
112
+ end
113
+
114
+ def target_problems
115
+ patch_targets.flat_map do |target|
116
+ target_class = constant(target.constant_name)
117
+ next [] if target_class.nil? && target.optional
118
+ next ["#{target.constant_name} is not loaded"] unless target_class
119
+
120
+ missing_methods(target_class, target)
121
+ end
122
+ end
123
+
124
+ def missing_methods(target_class, target)
125
+ target.method_names.filter_map do |method_name|
126
+ next if target_class.method_defined?(method_name) || target_class.private_method_defined?(method_name)
127
+
128
+ "#{target.constant_name}##{method_name} is not available"
129
+ end
130
+ end
131
+
60
132
  def install_patch(target, patch)
61
- return unless target
62
133
  return if patch_installed?(target, patch)
63
134
 
64
135
  target.prepend(patch)
@@ -48,7 +48,7 @@ module LlmCostTracker
48
48
  return unless object.respond_to?(:[])
49
49
 
50
50
  object[key]
51
- rescue IndexError, TypeError, NoMethodError
51
+ rescue IndexError, NameError, TypeError
52
52
  nil
53
53
  end
54
54
  end
@@ -10,10 +10,14 @@ module LlmCostTracker
10
10
  class << self
11
11
  def integration_name = :openai
12
12
 
13
- def target_patches
13
+ def minimum_version = "0.59.0"
14
+
15
+ def version_constant = "OpenAI::VERSION"
16
+
17
+ def patch_targets
14
18
  [
15
- [constant("OpenAI::Resources::Responses"), ResponsesPatch],
16
- [constant("OpenAI::Resources::Chat::Completions"), ChatCompletionsPatch]
19
+ patch_target("OpenAI::Resources::Responses", with: ResponsesPatch, methods: :create),
20
+ patch_target("OpenAI::Resources::Chat::Completions", with: ChatCompletionsPatch, methods: :create)
17
21
  ]
18
22
  end
19
23
 
@@ -28,15 +32,16 @@ module LlmCostTracker
28
32
  output_tokens = ObjectReader.first(usage, :output_tokens, :completion_tokens)
29
33
  next if input_tokens.nil? && output_tokens.nil?
30
34
 
35
+ metadata = usage_metadata(usage)
31
36
  LlmCostTracker::Tracker.record(
32
37
  provider: "openai",
33
38
  model: ObjectReader.first(response, :model) || request[:model],
34
- input_tokens: ObjectReader.integer(input_tokens),
39
+ input_tokens: regular_input_tokens(input_tokens, metadata[:cache_read_input_tokens]),
35
40
  output_tokens: ObjectReader.integer(output_tokens),
36
41
  latency_ms: latency_ms,
37
42
  usage_source: :sdk_response,
38
43
  provider_response_id: ObjectReader.first(response, :id),
39
- metadata: usage_metadata(usage)
44
+ metadata: metadata
40
45
  )
41
46
  end
42
47
  end
@@ -61,6 +66,10 @@ module LlmCostTracker
61
66
  ObjectReader.nested(usage, :completion_tokens_details, :reasoning_tokens)
62
67
  )
63
68
  end
69
+
70
+ def regular_input_tokens(input_tokens, cache_read)
71
+ [ObjectReader.integer(input_tokens) - cache_read.to_i, 0].max
72
+ end
64
73
  end
65
74
 
66
75
  module ResponsesPatch
@@ -2,13 +2,15 @@
2
2
 
3
3
  require_relative "openai"
4
4
  require_relative "anthropic"
5
+ require_relative "ruby_llm"
5
6
 
6
7
  module LlmCostTracker
7
8
  module Integrations
8
9
  module Registry
9
10
  INTEGRATIONS = {
10
11
  openai: Openai,
11
- anthropic: Anthropic
12
+ anthropic: Anthropic,
13
+ ruby_llm: RubyLlm
12
14
  }.freeze
13
15
 
14
16
  module_function
@@ -0,0 +1,171 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "base"
4
+
5
+ module LlmCostTracker
6
+ module Integrations
7
+ module RubyLlm
8
+ extend Base
9
+
10
+ class << self
11
+ def integration_name = :ruby_llm
12
+
13
+ def minimum_version = "1.14.1"
14
+
15
+ def version_constant = "RubyLLM::VERSION"
16
+
17
+ def patch_targets
18
+ [
19
+ patch_target(
20
+ "RubyLLM::Provider",
21
+ with: ProviderPatch,
22
+ methods: %i[slug complete embed transcribe]
23
+ )
24
+ ]
25
+ end
26
+
27
+ def record_completion(provider, response, request:, latency_ms:, stream:)
28
+ record_usage(
29
+ provider: provider_slug(provider),
30
+ model: response_model_id(response) || model_id(request[:model]),
31
+ response: response,
32
+ latency_ms: latency_ms,
33
+ stream: stream
34
+ )
35
+ end
36
+
37
+ def streaming_request?(request, has_block:)
38
+ has_block || request[:stream] == true
39
+ end
40
+
41
+ def record_embedding(provider, response, request:, latency_ms:)
42
+ record_usage(
43
+ provider: provider_slug(provider),
44
+ model: response_model_id(response) || model_id(request[:model]),
45
+ response: response,
46
+ latency_ms: latency_ms,
47
+ stream: false,
48
+ output_tokens: 0
49
+ )
50
+ end
51
+
52
+ def record_transcription(provider, response, request:, latency_ms:)
53
+ record_usage(
54
+ provider: provider_slug(provider),
55
+ model: response_model_id(response) || model_id(request[:model]),
56
+ response: response,
57
+ latency_ms: latency_ms,
58
+ stream: false
59
+ )
60
+ end
61
+
62
+ def record_usage(provider:, model:, response:, latency_ms:, stream:, output_tokens: nil)
63
+ return unless active?
64
+
65
+ record_safely do
66
+ input_tokens = ObjectReader.first(response, :input_tokens)
67
+ output_tokens = ObjectReader.first(response, :output_tokens) if output_tokens.nil?
68
+ next if input_tokens.nil? && output_tokens.nil?
69
+
70
+ cache_read = ObjectReader.integer(ObjectReader.first(response, :cached_tokens))
71
+
72
+ LlmCostTracker::Tracker.record(
73
+ provider: provider,
74
+ model: model,
75
+ input_tokens: regular_input_tokens(input_tokens, cache_read),
76
+ output_tokens: ObjectReader.integer(output_tokens),
77
+ latency_ms: latency_ms,
78
+ stream: stream,
79
+ usage_source: :ruby_llm,
80
+ provider_response_id: provider_response_id(response),
81
+ metadata: usage_metadata(response, cache_read)
82
+ )
83
+ end
84
+ end
85
+
86
+ def usage_metadata(response, cache_read)
87
+ {
88
+ cache_read_input_tokens: cache_read,
89
+ cache_write_input_tokens: ObjectReader.integer(ObjectReader.first(response, :cache_creation_tokens)),
90
+ hidden_output_tokens: ObjectReader.integer(
91
+ ObjectReader.first(response, :thinking_tokens, :reasoning_tokens)
92
+ )
93
+ }
94
+ end
95
+
96
+ def regular_input_tokens(input_tokens, cache_read)
97
+ [ObjectReader.integer(input_tokens) - cache_read.to_i, 0].max
98
+ end
99
+
100
+ def provider_slug(provider)
101
+ ObjectReader.first(provider, :slug).to_s
102
+ end
103
+
104
+ def model_id(object)
105
+ return nil if object.nil?
106
+
107
+ value = ObjectReader.first(object, :id, :model_id, :model)
108
+ value ||= object if object.is_a?(String) || object.is_a?(Symbol)
109
+ value&.to_s
110
+ end
111
+
112
+ def response_model_id(object)
113
+ value = ObjectReader.first(object, :model_id, :model)
114
+ value&.to_s
115
+ end
116
+
117
+ def provider_response_id(response)
118
+ ObjectReader.first(response, :id, :provider_response_id) || ObjectReader.nested(response, :raw, :id)
119
+ end
120
+ end
121
+
122
+ module ProviderPatch
123
+ def complete(*args, **kwargs, &)
124
+ integration = LlmCostTracker::Integrations::RubyLlm
125
+ request = integration.request_params(args, kwargs)
126
+ started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
127
+ integration.enforce_budget!
128
+ response = super
129
+ integration.record_completion(
130
+ self,
131
+ response,
132
+ request: request,
133
+ latency_ms: integration.elapsed_ms(started_at),
134
+ stream: integration.streaming_request?(request, has_block: block_given?)
135
+ )
136
+ response
137
+ end
138
+
139
+ def embed(*args, **kwargs)
140
+ integration = LlmCostTracker::Integrations::RubyLlm
141
+ request = integration.request_params(args, kwargs)
142
+ started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
143
+ integration.enforce_budget!
144
+ response = super
145
+ integration.record_embedding(
146
+ self,
147
+ response,
148
+ request: request,
149
+ latency_ms: integration.elapsed_ms(started_at)
150
+ )
151
+ response
152
+ end
153
+
154
+ def transcribe(*args, **kwargs)
155
+ integration = LlmCostTracker::Integrations::RubyLlm
156
+ request = integration.request_params(args, kwargs)
157
+ started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
158
+ integration.enforce_budget!
159
+ response = super
160
+ integration.record_transcription(
161
+ self,
162
+ response,
163
+ request: request,
164
+ latency_ms: integration.elapsed_ms(started_at)
165
+ )
166
+ response
167
+ end
168
+ end
169
+ end
170
+ end
171
+ end
@@ -73,12 +73,15 @@ module LlmCostTracker
73
73
  end
74
74
 
75
75
  def self.group_by_tag(key)
76
- group(Arel.sql(tag_group_expression(key)))
76
+ group(Arel.sql(tag_value_expression(key)))
77
77
  end
78
78
 
79
- def self.cost_by_tag(key)
80
- costs = group_by_tag(key).sum(:total_cost).each_with_object(Hash.new(0.0)) do |(tag_value, cost), grouped|
81
- grouped[tag_label(tag_value)] += cost.to_f
79
+ def self.cost_by_tag(key, limit: nil)
80
+ relation = group_by_tag(key).order(Arel.sql("COALESCE(SUM(total_cost), 0) DESC"))
81
+ relation = relation.limit(limit) if limit
82
+
83
+ costs = relation.sum(:total_cost).each_with_object(Hash.new(0.0)) do |(tag_value, cost), grouped|
84
+ grouped[tag_value_label(tag_value)] += cost.to_f
82
85
  end
83
86
  costs.sort_by { |_label, cost| -cost }.to_h
84
87
  end
@@ -101,14 +104,13 @@ module LlmCostTracker
101
104
  group(:provider).average(:latency_ms).transform_values(&:to_f)
102
105
  end
103
106
 
104
- def self.tag_label(value)
107
+ def self.tag_value_label(value)
105
108
  value.nil? || value == "" ? "(untagged)" : value.to_s
106
109
  end
107
- private_class_method :tag_label
108
110
 
109
- def self.tag_group_expression(key)
111
+ def self.tag_value_expression(key, table_name: quoted_table_name)
110
112
  key = validated_tag_key(key)
111
- column = "#{quoted_table_name}.#{connection.quote_column_name('tags')}"
113
+ column = "#{table_name}.#{connection.quote_column_name('tags')}"
112
114
 
113
115
  case connection.adapter_name
114
116
  when /postgres/i
@@ -120,7 +122,6 @@ module LlmCostTracker
120
122
  "json_extract(#{column}, #{connection.quote(json_path(key))})"
121
123
  end
122
124
  end
123
- private_class_method :tag_group_expression
124
125
 
125
126
  def self.validated_tag_key(key)
126
127
  TagKey.validate!(key)
@@ -5,12 +5,11 @@ require "json"
5
5
 
6
6
  require_relative "../logging"
7
7
  require_relative "../request_url"
8
+ require_relative "../stream_capture"
8
9
 
9
10
  module LlmCostTracker
10
11
  module Middleware
11
12
  class Faraday < ::Faraday::Middleware
12
- STREAM_CAPTURE_LIMIT_BYTES = 1_048_576
13
-
14
13
  def initialize(app, **options)
15
14
  super(app)
16
15
  @tags = options.fetch(:tags, {})
@@ -88,6 +87,11 @@ module LlmCostTracker
88
87
  end
89
88
 
90
89
  def parse_stream(parser, request_url, request_body, response_env, stream_buffer)
90
+ if stream_buffer&.dig(:overflowed)
91
+ Logging.warn(capture_warning(request_url, stream_buffer))
92
+ return parser.parse_stream(request_url, request_body, response_env.status, [])
93
+ end
94
+
91
95
  body = stream_buffer&.dig(:buffer)&.string
92
96
  body = read_body(response_env.body) if body.nil? || body.empty?
93
97
 
@@ -110,7 +114,7 @@ module LlmCostTracker
110
114
  request_env.request.on_data = proc do |chunk, size, env|
111
115
  chunk = chunk.to_s
112
116
  unless state[:overflowed]
113
- if state[:bytes] + chunk.bytesize <= STREAM_CAPTURE_LIMIT_BYTES
117
+ if state[:bytes] + chunk.bytesize <= StreamCapture::LIMIT_BYTES
114
118
  state[:buffer] << chunk
115
119
  state[:bytes] += chunk.bytesize
116
120
  else
@@ -161,7 +165,7 @@ module LlmCostTracker
161
165
  "recording usage_source=unknown. Use LlmCostTracker.track_stream for manual capture."
162
166
  end
163
167
 
164
- "Streaming response for #{RequestUrl.label(request_url)} exceeded #{STREAM_CAPTURE_LIMIT_BYTES} bytes; " \
168
+ "Streaming response for #{RequestUrl.label(request_url)} exceeded #{StreamCapture::LIMIT_BYTES} bytes; " \
165
169
  "recording usage_source=unknown. Use LlmCostTracker.track_stream for manual capture."
166
170
  end
167
171
  end
@@ -72,7 +72,7 @@ module LlmCostTracker
72
72
  model: extract_model_from_url(request_url),
73
73
  input_tokens: [usage["promptTokenCount"].to_i - cache_read, 0].max,
74
74
  output_tokens: output_tokens(usage),
75
- total_tokens: usage["totalTokenCount"].to_i,
75
+ total_tokens: total_tokens(usage, cache_read),
76
76
  cache_read_input_tokens: usage["cachedContentTokenCount"],
77
77
  hidden_output_tokens: usage["thoughtsTokenCount"],
78
78
  stream: stream,
@@ -92,6 +92,13 @@ module LlmCostTracker
92
92
  usage["candidatesTokenCount"].to_i + usage["thoughtsTokenCount"].to_i
93
93
  end
94
94
 
95
+ def total_tokens(usage, cache_read)
96
+ total = usage["totalTokenCount"]
97
+ return total.to_i unless total.nil?
98
+
99
+ [usage["promptTokenCount"].to_i - cache_read, 0].max + cache_read + output_tokens(usage)
100
+ end
101
+
95
102
  def stream_response_id(events)
96
103
  find_event_value(events) { |data| data["responseId"] }
97
104
  end
@@ -21,7 +21,7 @@ module LlmCostTracker
21
21
  model: response["model"] || request["model"],
22
22
  input_tokens: regular_input_tokens(usage, cache_read),
23
23
  output_tokens: (usage["completion_tokens"] || usage["output_tokens"]).to_i,
24
- total_tokens: usage["total_tokens"].to_i,
24
+ total_tokens: total_tokens(usage, cache_read),
25
25
  cache_read_input_tokens: cache_read,
26
26
  hidden_output_tokens: hidden_output_tokens(usage),
27
27
  usage_source: :response
@@ -44,7 +44,7 @@ module LlmCostTracker
44
44
  model: model,
45
45
  input_tokens: regular_input_tokens(usage, cache_read),
46
46
  output_tokens: (usage["completion_tokens"] || usage["output_tokens"]).to_i,
47
- total_tokens: usage["total_tokens"].to_i,
47
+ total_tokens: total_tokens(usage, cache_read),
48
48
  cache_read_input_tokens: cache_read,
49
49
  hidden_output_tokens: hidden_output_tokens(usage),
50
50
  stream: true,
@@ -87,6 +87,15 @@ module LlmCostTracker
87
87
  details = usage["completion_tokens_details"] || usage["output_tokens_details"] || {}
88
88
  details["reasoning_tokens"]
89
89
  end
90
+
91
+ def total_tokens(usage, cache_read)
92
+ total = usage["total_tokens"]
93
+ return total.to_i unless total.nil?
94
+
95
+ regular_input_tokens(usage, cache_read) +
96
+ cache_read.to_i +
97
+ (usage["completion_tokens"] || usage["output_tokens"]).to_i
98
+ end
90
99
  end
91
100
  end
92
101
  end
@@ -12,6 +12,7 @@ module LlmCostTracker
12
12
  EMPTY_PRICES = {}.freeze
13
13
  PRICE_KEYS = %w[input output cache_read_input cache_write_input].freeze
14
14
  METADATA_KEYS = %w[_source _source_version _fetched_at _updated _notes _validator_override].freeze
15
+ MAX_FILE_BYTES = 2_097_152
15
16
  MUTEX = Monitor.new
16
17
 
17
18
  class << self
@@ -114,6 +115,8 @@ module LlmCostTracker
114
115
  end
115
116
 
116
117
  def load_price_file(path)
118
+ raise ArgumentError, "prices_file exceeds #{MAX_FILE_BYTES} bytes" if File.size(path) > MAX_FILE_BYTES
119
+
117
120
  contents = File.read(path)
118
121
  return YAML.safe_load(contents, aliases: false) || {} if yaml_file?(path)
119
122
 
@@ -17,6 +17,7 @@ module LlmCostTracker
17
17
 
18
18
  USER_AGENT = "llm_cost_tracker price refresh"
19
19
  MAX_REDIRECTS = 5
20
+ MAX_BODY_BYTES = 2_097_152
20
21
  OPEN_TIMEOUT = 5
21
22
  READ_TIMEOUT = 10
22
23
  WRITE_TIMEOUT = 10
@@ -25,26 +26,17 @@ module LlmCostTracker
25
26
  raise Error, "Too many redirects while fetching #{url}" if redirects > MAX_REDIRECTS
26
27
 
27
28
  uri = URI.parse(url)
28
- raise Error, "Pricing snapshot URL must use http or https" unless %w[http https].include?(uri.scheme)
29
+ raise Error, "Pricing snapshot URL must use https" unless uri.scheme == "https"
29
30
 
30
31
  request = Net::HTTP::Get.new(uri)
31
32
  request["User-Agent"] = USER_AGENT
32
33
  request["If-None-Match"] = etag if etag
33
34
 
34
- response = Net::HTTP.start(
35
- uri.host,
36
- uri.port,
37
- use_ssl: uri.scheme == "https",
38
- open_timeout: OPEN_TIMEOUT,
39
- read_timeout: READ_TIMEOUT,
40
- write_timeout: WRITE_TIMEOUT
41
- ) do |http|
42
- http.request(request)
43
- end
35
+ response, body = fetch_response(uri, request)
44
36
 
45
37
  case response
46
38
  when Net::HTTPSuccess
47
- build_response(response, not_modified: false)
39
+ build_response(response, body: body || limited_body(response), not_modified: false)
48
40
  when Net::HTTPNotModified
49
41
  build_response(response, body: nil, not_modified: true)
50
42
  when Net::HTTPRedirection
@@ -61,6 +53,43 @@ module LlmCostTracker
61
53
 
62
54
  private
63
55
 
56
+ def fetch_response(uri, request)
57
+ body = nil
58
+ response = Net::HTTP.start(
59
+ uri.host,
60
+ uri.port,
61
+ use_ssl: uri.scheme == "https",
62
+ open_timeout: OPEN_TIMEOUT,
63
+ read_timeout: READ_TIMEOUT,
64
+ write_timeout: WRITE_TIMEOUT
65
+ ) do |http|
66
+ http.request(request) do |streamed_response|
67
+ body = limited_body(streamed_response) if streamed_response.is_a?(Net::HTTPSuccess)
68
+ end
69
+ end
70
+
71
+ [response, body]
72
+ end
73
+
74
+ def limited_body(response)
75
+ body = +""
76
+ if response.respond_to?(:read_body)
77
+ response.read_body do |chunk|
78
+ chunk = chunk.to_s
79
+ if body.bytesize + chunk.bytesize > MAX_BODY_BYTES
80
+ raise Error, "Pricing snapshot response exceeds #{MAX_BODY_BYTES} bytes"
81
+ end
82
+
83
+ body << chunk
84
+ end
85
+ else
86
+ body = response.body.to_s
87
+ end
88
+ raise Error, "Pricing snapshot response exceeds #{MAX_BODY_BYTES} bytes" if body.bytesize > MAX_BODY_BYTES
89
+
90
+ body
91
+ end
92
+
64
93
  def build_response(response, not_modified:, body: response.body)
65
94
  Response.new(
66
95
  body: body,
@@ -3,6 +3,8 @@
3
3
  require "json"
4
4
  require "yaml"
5
5
 
6
+ require_relative "../price_registry"
7
+
6
8
  module LlmCostTracker
7
9
  module PriceSync
8
10
  class RegistryLoader
@@ -18,6 +20,10 @@ module LlmCostTracker
18
20
  private
19
21
 
20
22
  def load_registry_file(path)
23
+ if File.size(path) > PriceRegistry::MAX_FILE_BYTES
24
+ raise ArgumentError, "pricing registry exceeds #{PriceRegistry::MAX_FILE_BYTES} bytes"
25
+ end
26
+
21
27
  contents = File.read(path)
22
28
  registry = yaml_file?(path) ? (YAML.safe_load(contents, aliases: false) || {}) : JSON.parse(contents)
23
29
  raise ArgumentError, "pricing registry must be a hash" unless registry.is_a?(Hash)
@@ -9,7 +9,14 @@ module LlmCostTracker
9
9
 
10
10
  class << self
11
11
  def generate(days: DEFAULT_DAYS, now: Time.now.utc, tag_breakdowns: nil)
12
- ReportFormatter.new(data(days: days, now: now, tag_breakdowns: tag_breakdowns)).to_s
12
+ report_data = ReportData.build(
13
+ days: days,
14
+ now: now,
15
+ tag_breakdowns: tag_breakdowns,
16
+ breakdown_limit: ReportFormatter::TOP_LIMIT
17
+ )
18
+
19
+ ReportFormatter.new(report_data).to_s
13
20
  rescue LoadError => e
14
21
  "Unable to build LLM cost report: ActiveRecord storage is unavailable (#{e.message})"
15
22
  rescue StandardError => e