llm_cost_tracker 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +43 -0
  3. data/README.md +18 -9
  4. data/app/controllers/llm_cost_tracker/calls_controller.rb +2 -1
  5. data/app/controllers/llm_cost_tracker/dashboard_controller.rb +3 -15
  6. data/app/controllers/llm_cost_tracker/tags_controller.rb +7 -6
  7. data/app/helpers/llm_cost_tracker/application_helper.rb +21 -6
  8. data/app/helpers/llm_cost_tracker/dashboard_filter_options_helper.rb +3 -1
  9. data/app/services/llm_cost_tracker/dashboard/date_range.rb +42 -0
  10. data/app/services/llm_cost_tracker/dashboard/filter.rb +6 -8
  11. data/app/services/llm_cost_tracker/dashboard/spend_anomaly.rb +6 -5
  12. data/app/services/llm_cost_tracker/dashboard/tag_breakdown.rb +74 -18
  13. data/app/services/llm_cost_tracker/dashboard/tag_key_explorer.rb +15 -4
  14. data/app/views/llm_cost_tracker/shared/_tag_chips.html.erb +1 -1
  15. data/app/views/llm_cost_tracker/tags/show.html.erb +4 -0
  16. data/docs/architecture.md +28 -0
  17. data/docs/budgets.md +45 -0
  18. data/docs/configuration.md +65 -0
  19. data/docs/cookbook.md +185 -0
  20. data/docs/dashboard-overview.png +0 -0
  21. data/docs/dashboard.md +38 -0
  22. data/docs/extending.md +32 -0
  23. data/docs/operations.md +44 -0
  24. data/docs/pricing.md +94 -0
  25. data/docs/querying.md +36 -0
  26. data/docs/streaming.md +70 -0
  27. data/docs/technical/README.md +10 -0
  28. data/docs/technical/data-flow.md +67 -0
  29. data/docs/technical/extension-points.md +111 -0
  30. data/docs/technical/module-map.md +197 -0
  31. data/docs/technical/operational-notes.md +77 -0
  32. data/docs/upgrading.md +46 -0
  33. data/lib/llm_cost_tracker/capture_verifier.rb +71 -0
  34. data/lib/llm_cost_tracker/configuration/instrumentation.rb +1 -1
  35. data/lib/llm_cost_tracker/configuration/storage_backend.rb +26 -0
  36. data/lib/llm_cost_tracker/configuration.rb +24 -17
  37. data/lib/llm_cost_tracker/doctor/capture_check.rb +39 -0
  38. data/lib/llm_cost_tracker/doctor.rb +6 -1
  39. data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +1 -0
  40. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +7 -1
  41. data/lib/llm_cost_tracker/integrations/anthropic.rb +51 -3
  42. data/lib/llm_cost_tracker/integrations/base.rb +77 -6
  43. data/lib/llm_cost_tracker/integrations/object_reader.rb +1 -1
  44. data/lib/llm_cost_tracker/integrations/openai.rb +78 -5
  45. data/lib/llm_cost_tracker/integrations/registry.rb +36 -4
  46. data/lib/llm_cost_tracker/integrations/ruby_llm.rb +171 -0
  47. data/lib/llm_cost_tracker/integrations/stream_tracker.rb +166 -0
  48. data/lib/llm_cost_tracker/llm_api_call.rb +2 -77
  49. data/lib/llm_cost_tracker/llm_api_call_metrics.rb +63 -0
  50. data/lib/llm_cost_tracker/middleware/faraday.rb +8 -4
  51. data/lib/llm_cost_tracker/parsers/gemini.rb +8 -1
  52. data/lib/llm_cost_tracker/parsers/openai_usage.rb +12 -3
  53. data/lib/llm_cost_tracker/price_registry.rb +3 -0
  54. data/lib/llm_cost_tracker/price_sync/fetcher.rb +41 -12
  55. data/lib/llm_cost_tracker/price_sync/registry_loader.rb +6 -0
  56. data/lib/llm_cost_tracker/pricing/effective_prices.rb +75 -0
  57. data/lib/llm_cost_tracker/pricing/explainer.rb +77 -0
  58. data/lib/llm_cost_tracker/pricing/lookup.rb +110 -0
  59. data/lib/llm_cost_tracker/pricing.rb +25 -108
  60. data/lib/llm_cost_tracker/report.rb +8 -1
  61. data/lib/llm_cost_tracker/report_data.rb +25 -9
  62. data/lib/llm_cost_tracker/retention.rb +33 -16
  63. data/lib/llm_cost_tracker/storage/active_record_backend.rb +115 -0
  64. data/lib/llm_cost_tracker/storage/active_record_rollups.rb +42 -0
  65. data/lib/llm_cost_tracker/storage/active_record_store.rb +26 -0
  66. data/lib/llm_cost_tracker/storage/custom_backend.rb +32 -0
  67. data/lib/llm_cost_tracker/storage/dispatcher.rb +11 -34
  68. data/lib/llm_cost_tracker/storage/log_backend.rb +38 -0
  69. data/lib/llm_cost_tracker/storage/registry.rb +63 -0
  70. data/lib/llm_cost_tracker/stream_capture.rb +7 -0
  71. data/lib/llm_cost_tracker/stream_collector.rb +25 -1
  72. data/lib/llm_cost_tracker/tag_sanitizer.rb +81 -0
  73. data/lib/llm_cost_tracker/tag_sql.rb +34 -0
  74. data/lib/llm_cost_tracker/tracker.rb +6 -2
  75. data/lib/llm_cost_tracker/version.rb +1 -1
  76. data/lib/llm_cost_tracker.rb +4 -0
  77. data/lib/tasks/llm_cost_tracker.rake +49 -0
  78. metadata +40 -6
@@ -0,0 +1,166 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "monitor"
4
+
5
+ require_relative "../logging"
6
+ require_relative "../stream_collector"
7
+ require_relative "../value_helpers"
8
+ require_relative "object_reader"
9
+
10
+ module LlmCostTracker
11
+ module Integrations
12
+ class StreamTracker
13
+ def self.wrap(stream, collector:, active:, finish: nil) = new(stream, collector, active, finish).wrap
14
+
15
+ def initialize(stream, collector, active, finish)
16
+ @stream = stream
17
+ @collector = collector
18
+ @active = active
19
+ @finish = finish || proc { |errored:| @collector.finish!(errored: errored) }
20
+ @finished = false
21
+ @capture_failed = false
22
+ @monitor = Monitor.new
23
+ end
24
+
25
+ def wrap
26
+ return @stream unless @stream
27
+
28
+ iterator_wrapped = @stream.instance_variable_defined?(:@iterator) && wrap_iterator?
29
+ wrap_each if !iterator_wrapped && @stream.respond_to?(:each)
30
+
31
+ @stream
32
+ rescue StandardError => e
33
+ Logging.warn("stream integration failed to install wrapper: #{e.class}: #{e.message}")
34
+ @stream
35
+ end
36
+
37
+ private
38
+
39
+ def wrap_iterator?
40
+ iterator = @stream.instance_variable_get(:@iterator)
41
+ return false unless iterator.respond_to?(:each)
42
+
43
+ @stream.instance_variable_set(:@iterator, tracked_iterator(iterator))
44
+ true
45
+ end
46
+
47
+ def wrap_each
48
+ tracker = self
49
+ original_each = @stream.method(:each)
50
+ @stream.define_singleton_method(:each) do |&block|
51
+ next enum_for(:each) unless block
52
+
53
+ tracker.__send__(:each_from, original_each, &block)
54
+ end
55
+ end
56
+
57
+ def tracked_iterator(iterator)
58
+ Enumerator.new do |yielder|
59
+ each_from(iterator) { |event| yielder << event }
60
+ end
61
+ end
62
+
63
+ def each_from(iterable)
64
+ errored = false
65
+ iterate(iterable) do |event|
66
+ capture(event)
67
+ yield event
68
+ end
69
+ rescue StandardError
70
+ errored = true
71
+ raise
72
+ ensure
73
+ finish!(errored: errored)
74
+ end
75
+
76
+ def iterate(iterable, &)
77
+ if iterable.respond_to?(:each)
78
+ iterable.each(&)
79
+ else
80
+ iterable.call(&)
81
+ end
82
+ end
83
+
84
+ def capture(event)
85
+ payload = normalize(event_payload(event))
86
+ @collector.event(payload, type: event_type(event, payload))
87
+ rescue StandardError => e
88
+ warn_capture_failure(e)
89
+ end
90
+
91
+ def event_payload(event)
92
+ if event.respond_to?(:deep_to_h)
93
+ event.deep_to_h
94
+ elsif event.respond_to?(:to_h)
95
+ event.to_h
96
+ else
97
+ event_attributes(event)
98
+ end
99
+ end
100
+
101
+ def event_attributes(event)
102
+ %i[type id model usage response message].each_with_object({}) do |key, attributes|
103
+ value = ObjectReader.read(event, key)
104
+ attributes[key] = value unless value.nil?
105
+ end
106
+ end
107
+
108
+ def event_type(event, payload)
109
+ value = ObjectReader.first(event, :type) || payload["type"]
110
+ value&.to_s
111
+ end
112
+
113
+ def normalize(value)
114
+ case value
115
+ when Hash
116
+ value.each_with_object({}) do |(key, nested), normalized|
117
+ normalized[key.to_s] = normalize(nested)
118
+ end
119
+ when Array
120
+ value.map { |nested| normalize(nested) }
121
+ when Symbol
122
+ value.to_s
123
+ when NilClass
124
+ nil
125
+ else
126
+ converted = object_hash(value)
127
+ converted ? normalize(converted) : ValueHelpers.deep_dup(value)
128
+ end
129
+ end
130
+
131
+ def object_hash(value)
132
+ if value.respond_to?(:deep_to_h)
133
+ value.deep_to_h
134
+ elsif value.respond_to?(:to_h)
135
+ value.to_h
136
+ end
137
+ rescue StandardError
138
+ nil
139
+ end
140
+
141
+ def warn_capture_failure(error)
142
+ should_warn = @monitor.synchronize do
143
+ next false if @capture_failed
144
+
145
+ @capture_failed = true
146
+ true
147
+ end
148
+ return unless should_warn
149
+
150
+ Logging.warn("stream integration failed to capture event: #{error.class}: #{error.message}")
151
+ end
152
+
153
+ def finish!(errored:)
154
+ should_finish = @monitor.synchronize do
155
+ next false if @finished
156
+
157
+ @finished = true
158
+ true
159
+ end
160
+ return unless should_finish && @active.call
161
+
162
+ @finish.call(errored: errored)
163
+ end
164
+ end
165
+ end
166
+ end
@@ -2,9 +2,9 @@
2
2
 
3
3
  require "active_record"
4
4
 
5
+ require_relative "llm_api_call_metrics"
5
6
  require_relative "period_grouping"
6
7
  require_relative "tag_accessors"
7
- require_relative "tag_key"
8
8
  require_relative "tag_query"
9
9
  require_relative "tags_column"
10
10
 
@@ -12,6 +12,7 @@ module LlmCostTracker
12
12
  class LlmApiCall < ActiveRecord::Base
13
13
  extend PeriodGrouping
14
14
  extend TagsColumn
15
+ extend LlmApiCallMetrics
15
16
  include TagAccessors
16
17
 
17
18
  self.table_name = "llm_api_calls"
@@ -55,81 +56,5 @@ module LlmCostTracker
55
56
  def self.by_tags(tags)
56
57
  TagQuery.apply(self, tags)
57
58
  end
58
-
59
- def self.total_cost
60
- sum(:total_cost).to_f
61
- end
62
-
63
- def self.total_tokens
64
- sum(:total_tokens).to_i
65
- end
66
-
67
- def self.cost_by_model
68
- group(:model).sum(:total_cost)
69
- end
70
-
71
- def self.cost_by_provider
72
- group(:provider).sum(:total_cost)
73
- end
74
-
75
- def self.group_by_tag(key)
76
- group(Arel.sql(tag_group_expression(key)))
77
- end
78
-
79
- def self.cost_by_tag(key)
80
- costs = group_by_tag(key).sum(:total_cost).each_with_object(Hash.new(0.0)) do |(tag_value, cost), grouped|
81
- grouped[tag_label(tag_value)] += cost.to_f
82
- end
83
- costs.sort_by { |_label, cost| -cost }.to_h
84
- end
85
-
86
- def self.average_latency_ms
87
- return nil unless latency_column?
88
-
89
- average(:latency_ms)&.to_f
90
- end
91
-
92
- def self.latency_by_model
93
- return {} unless latency_column?
94
-
95
- group(:model).average(:latency_ms).transform_values(&:to_f)
96
- end
97
-
98
- def self.latency_by_provider
99
- return {} unless latency_column?
100
-
101
- group(:provider).average(:latency_ms).transform_values(&:to_f)
102
- end
103
-
104
- def self.tag_label(value)
105
- value.nil? || value == "" ? "(untagged)" : value.to_s
106
- end
107
- private_class_method :tag_label
108
-
109
- def self.tag_group_expression(key)
110
- key = validated_tag_key(key)
111
- column = "#{quoted_table_name}.#{connection.quote_column_name('tags')}"
112
-
113
- case connection.adapter_name
114
- when /postgres/i
115
- json_column = tags_jsonb_column? ? column : "(#{column})::jsonb"
116
- "#{json_column}->>#{connection.quote(key)}"
117
- when /mysql/i
118
- "JSON_UNQUOTE(JSON_EXTRACT(#{column}, #{connection.quote(json_path(key))}))"
119
- else
120
- "json_extract(#{column}, #{connection.quote(json_path(key))})"
121
- end
122
- end
123
- private_class_method :tag_group_expression
124
-
125
- def self.validated_tag_key(key)
126
- TagKey.validate!(key)
127
- end
128
- private_class_method :validated_tag_key
129
-
130
- def self.json_path(key)
131
- "$.\"#{key}\""
132
- end
133
- private_class_method :json_path
134
59
  end
135
60
  end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "tag_sql"
4
+
5
+ module LlmCostTracker
6
+ module LlmApiCallMetrics
7
+ def total_cost
8
+ sum(:total_cost).to_f
9
+ end
10
+
11
+ def total_tokens
12
+ sum(:total_tokens).to_i
13
+ end
14
+
15
+ def cost_by_model
16
+ group(:model).sum(:total_cost)
17
+ end
18
+
19
+ def cost_by_provider
20
+ group(:provider).sum(:total_cost)
21
+ end
22
+
23
+ def group_by_tag(key)
24
+ group(Arel.sql(tag_value_expression(key)))
25
+ end
26
+
27
+ def cost_by_tag(key, limit: nil)
28
+ relation = group_by_tag(key).order(Arel.sql("COALESCE(SUM(total_cost), 0) DESC"))
29
+ relation = relation.limit(limit) if limit
30
+
31
+ costs = relation.sum(:total_cost).each_with_object(Hash.new(0.0)) do |(tag_value, cost), grouped|
32
+ grouped[tag_value_label(tag_value)] += cost.to_f
33
+ end
34
+ costs.sort_by { |_label, cost| -cost }.to_h
35
+ end
36
+
37
+ def average_latency_ms
38
+ return nil unless latency_column?
39
+
40
+ average(:latency_ms)&.to_f
41
+ end
42
+
43
+ def latency_by_model
44
+ return {} unless latency_column?
45
+
46
+ group(:model).average(:latency_ms).transform_values(&:to_f)
47
+ end
48
+
49
+ def latency_by_provider
50
+ return {} unless latency_column?
51
+
52
+ group(:provider).average(:latency_ms).transform_values(&:to_f)
53
+ end
54
+
55
+ def tag_value_label(value)
56
+ TagSql.value_label(value)
57
+ end
58
+
59
+ def tag_value_expression(key, table_name: quoted_table_name)
60
+ TagSql.value_expression(self, key, table_name: table_name)
61
+ end
62
+ end
63
+ end
@@ -5,12 +5,11 @@ require "json"
5
5
 
6
6
  require_relative "../logging"
7
7
  require_relative "../request_url"
8
+ require_relative "../stream_capture"
8
9
 
9
10
  module LlmCostTracker
10
11
  module Middleware
11
12
  class Faraday < ::Faraday::Middleware
12
- STREAM_CAPTURE_LIMIT_BYTES = 1_048_576
13
-
14
13
  def initialize(app, **options)
15
14
  super(app)
16
15
  @tags = options.fetch(:tags, {})
@@ -88,6 +87,11 @@ module LlmCostTracker
88
87
  end
89
88
 
90
89
  def parse_stream(parser, request_url, request_body, response_env, stream_buffer)
90
+ if stream_buffer&.dig(:overflowed)
91
+ Logging.warn(capture_warning(request_url, stream_buffer))
92
+ return parser.parse_stream(request_url, request_body, response_env.status, [])
93
+ end
94
+
91
95
  body = stream_buffer&.dig(:buffer)&.string
92
96
  body = read_body(response_env.body) if body.nil? || body.empty?
93
97
 
@@ -110,7 +114,7 @@ module LlmCostTracker
110
114
  request_env.request.on_data = proc do |chunk, size, env|
111
115
  chunk = chunk.to_s
112
116
  unless state[:overflowed]
113
- if state[:bytes] + chunk.bytesize <= STREAM_CAPTURE_LIMIT_BYTES
117
+ if state[:bytes] + chunk.bytesize <= StreamCapture::LIMIT_BYTES
114
118
  state[:buffer] << chunk
115
119
  state[:bytes] += chunk.bytesize
116
120
  else
@@ -161,7 +165,7 @@ module LlmCostTracker
161
165
  "recording usage_source=unknown. Use LlmCostTracker.track_stream for manual capture."
162
166
  end
163
167
 
164
- "Streaming response for #{RequestUrl.label(request_url)} exceeded #{STREAM_CAPTURE_LIMIT_BYTES} bytes; " \
168
+ "Streaming response for #{RequestUrl.label(request_url)} exceeded #{StreamCapture::LIMIT_BYTES} bytes; " \
165
169
  "recording usage_source=unknown. Use LlmCostTracker.track_stream for manual capture."
166
170
  end
167
171
  end
@@ -72,7 +72,7 @@ module LlmCostTracker
72
72
  model: extract_model_from_url(request_url),
73
73
  input_tokens: [usage["promptTokenCount"].to_i - cache_read, 0].max,
74
74
  output_tokens: output_tokens(usage),
75
- total_tokens: usage["totalTokenCount"].to_i,
75
+ total_tokens: total_tokens(usage, cache_read),
76
76
  cache_read_input_tokens: usage["cachedContentTokenCount"],
77
77
  hidden_output_tokens: usage["thoughtsTokenCount"],
78
78
  stream: stream,
@@ -92,6 +92,13 @@ module LlmCostTracker
92
92
  usage["candidatesTokenCount"].to_i + usage["thoughtsTokenCount"].to_i
93
93
  end
94
94
 
95
+ def total_tokens(usage, cache_read)
96
+ total = usage["totalTokenCount"]
97
+ return total.to_i unless total.nil?
98
+
99
+ [usage["promptTokenCount"].to_i - cache_read, 0].max + cache_read + output_tokens(usage)
100
+ end
101
+
95
102
  def stream_response_id(events)
96
103
  find_event_value(events) { |data| data["responseId"] }
97
104
  end
@@ -21,7 +21,7 @@ module LlmCostTracker
21
21
  model: response["model"] || request["model"],
22
22
  input_tokens: regular_input_tokens(usage, cache_read),
23
23
  output_tokens: (usage["completion_tokens"] || usage["output_tokens"]).to_i,
24
- total_tokens: usage["total_tokens"].to_i,
24
+ total_tokens: total_tokens(usage, cache_read),
25
25
  cache_read_input_tokens: cache_read,
26
26
  hidden_output_tokens: hidden_output_tokens(usage),
27
27
  usage_source: :response
@@ -44,7 +44,7 @@ module LlmCostTracker
44
44
  model: model,
45
45
  input_tokens: regular_input_tokens(usage, cache_read),
46
46
  output_tokens: (usage["completion_tokens"] || usage["output_tokens"]).to_i,
47
- total_tokens: usage["total_tokens"].to_i,
47
+ total_tokens: total_tokens(usage, cache_read),
48
48
  cache_read_input_tokens: cache_read,
49
49
  hidden_output_tokens: hidden_output_tokens(usage),
50
50
  stream: true,
@@ -61,7 +61,7 @@ module LlmCostTracker
61
61
 
62
62
  def detect_stream_usage(events)
63
63
  find_event_value(events, reverse: true) do |data|
64
- usage = data["usage"]
64
+ usage = data["usage"] || data.dig("response", "usage")
65
65
  usage if usage.is_a?(Hash)
66
66
  end
67
67
  end
@@ -87,6 +87,15 @@ module LlmCostTracker
87
87
  details = usage["completion_tokens_details"] || usage["output_tokens_details"] || {}
88
88
  details["reasoning_tokens"]
89
89
  end
90
+
91
+ def total_tokens(usage, cache_read)
92
+ total = usage["total_tokens"]
93
+ return total.to_i unless total.nil?
94
+
95
+ regular_input_tokens(usage, cache_read) +
96
+ cache_read.to_i +
97
+ (usage["completion_tokens"] || usage["output_tokens"]).to_i
98
+ end
90
99
  end
91
100
  end
92
101
  end
@@ -12,6 +12,7 @@ module LlmCostTracker
12
12
  EMPTY_PRICES = {}.freeze
13
13
  PRICE_KEYS = %w[input output cache_read_input cache_write_input].freeze
14
14
  METADATA_KEYS = %w[_source _source_version _fetched_at _updated _notes _validator_override].freeze
15
+ MAX_FILE_BYTES = 2_097_152
15
16
  MUTEX = Monitor.new
16
17
 
17
18
  class << self
@@ -114,6 +115,8 @@ module LlmCostTracker
114
115
  end
115
116
 
116
117
  def load_price_file(path)
118
+ raise ArgumentError, "prices_file exceeds #{MAX_FILE_BYTES} bytes" if File.size(path) > MAX_FILE_BYTES
119
+
117
120
  contents = File.read(path)
118
121
  return YAML.safe_load(contents, aliases: false) || {} if yaml_file?(path)
119
122
 
@@ -17,6 +17,7 @@ module LlmCostTracker
17
17
 
18
18
  USER_AGENT = "llm_cost_tracker price refresh"
19
19
  MAX_REDIRECTS = 5
20
+ MAX_BODY_BYTES = 2_097_152
20
21
  OPEN_TIMEOUT = 5
21
22
  READ_TIMEOUT = 10
22
23
  WRITE_TIMEOUT = 10
@@ -25,26 +26,17 @@ module LlmCostTracker
25
26
  raise Error, "Too many redirects while fetching #{url}" if redirects > MAX_REDIRECTS
26
27
 
27
28
  uri = URI.parse(url)
28
- raise Error, "Pricing snapshot URL must use http or https" unless %w[http https].include?(uri.scheme)
29
+ raise Error, "Pricing snapshot URL must use https" unless uri.scheme == "https"
29
30
 
30
31
  request = Net::HTTP::Get.new(uri)
31
32
  request["User-Agent"] = USER_AGENT
32
33
  request["If-None-Match"] = etag if etag
33
34
 
34
- response = Net::HTTP.start(
35
- uri.host,
36
- uri.port,
37
- use_ssl: uri.scheme == "https",
38
- open_timeout: OPEN_TIMEOUT,
39
- read_timeout: READ_TIMEOUT,
40
- write_timeout: WRITE_TIMEOUT
41
- ) do |http|
42
- http.request(request)
43
- end
35
+ response, body = fetch_response(uri, request)
44
36
 
45
37
  case response
46
38
  when Net::HTTPSuccess
47
- build_response(response, not_modified: false)
39
+ build_response(response, body: body || limited_body(response), not_modified: false)
48
40
  when Net::HTTPNotModified
49
41
  build_response(response, body: nil, not_modified: true)
50
42
  when Net::HTTPRedirection
@@ -61,6 +53,43 @@ module LlmCostTracker
61
53
 
62
54
  private
63
55
 
56
+ def fetch_response(uri, request)
57
+ body = nil
58
+ response = Net::HTTP.start(
59
+ uri.host,
60
+ uri.port,
61
+ use_ssl: uri.scheme == "https",
62
+ open_timeout: OPEN_TIMEOUT,
63
+ read_timeout: READ_TIMEOUT,
64
+ write_timeout: WRITE_TIMEOUT
65
+ ) do |http|
66
+ http.request(request) do |streamed_response|
67
+ body = limited_body(streamed_response) if streamed_response.is_a?(Net::HTTPSuccess)
68
+ end
69
+ end
70
+
71
+ [response, body]
72
+ end
73
+
74
+ def limited_body(response)
75
+ body = +""
76
+ if response.respond_to?(:read_body)
77
+ response.read_body do |chunk|
78
+ chunk = chunk.to_s
79
+ if body.bytesize + chunk.bytesize > MAX_BODY_BYTES
80
+ raise Error, "Pricing snapshot response exceeds #{MAX_BODY_BYTES} bytes"
81
+ end
82
+
83
+ body << chunk
84
+ end
85
+ else
86
+ body = response.body.to_s
87
+ end
88
+ raise Error, "Pricing snapshot response exceeds #{MAX_BODY_BYTES} bytes" if body.bytesize > MAX_BODY_BYTES
89
+
90
+ body
91
+ end
92
+
64
93
  def build_response(response, not_modified:, body: response.body)
65
94
  Response.new(
66
95
  body: body,
@@ -3,6 +3,8 @@
3
3
  require "json"
4
4
  require "yaml"
5
5
 
6
+ require_relative "../price_registry"
7
+
6
8
  module LlmCostTracker
7
9
  module PriceSync
8
10
  class RegistryLoader
@@ -18,6 +20,10 @@ module LlmCostTracker
18
20
  private
19
21
 
20
22
  def load_registry_file(path)
23
+ if File.size(path) > PriceRegistry::MAX_FILE_BYTES
24
+ raise ArgumentError, "pricing registry exceeds #{PriceRegistry::MAX_FILE_BYTES} bytes"
25
+ end
26
+
21
27
  contents = File.read(path)
22
28
  registry = yaml_file?(path) ? (YAML.safe_load(contents, aliases: false) || {}) : JSON.parse(contents)
23
29
  raise ArgumentError, "pricing registry must be a hash" unless registry.is_a?(Hash)
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LlmCostTracker
4
+ module Pricing
5
+ EffectivePriceSet = Data.define(:input, :cache_read_input, :cache_write_input, :output) do
6
+ def to_h
7
+ {
8
+ input: input,
9
+ cache_read_input: cache_read_input,
10
+ cache_write_input: cache_write_input,
11
+ output: output
12
+ }
13
+ end
14
+
15
+ def complete?
16
+ missing_keys.empty?
17
+ end
18
+
19
+ def missing_keys
20
+ to_h.filter_map { |key, value| key if value.nil? }
21
+ end
22
+ end
23
+
24
+ module EffectivePrices
25
+ class << self
26
+ def call(usage:, prices:, pricing_mode:)
27
+ EffectivePriceSet.new(
28
+ input: price_for_usage(usage.input_tokens, prices, :input, pricing_mode),
29
+ cache_read_input: price_for_cache_usage(
30
+ usage.cache_read_input_tokens,
31
+ prices,
32
+ :cache_read_input,
33
+ pricing_mode
34
+ ),
35
+ cache_write_input: price_for_cache_usage(
36
+ usage.cache_write_input_tokens,
37
+ prices,
38
+ :cache_write_input,
39
+ pricing_mode
40
+ ),
41
+ output: price_for_usage(usage.output_tokens, prices, :output, pricing_mode)
42
+ )
43
+ end
44
+
45
+ private
46
+
47
+ def price_for_cache_usage(tokens, prices, key, pricing_mode)
48
+ return 0.0 unless tokens.positive?
49
+
50
+ price_for(prices, key, pricing_mode) || price_for(prices, :input, pricing_mode)
51
+ end
52
+
53
+ def price_for_usage(tokens, prices, key, pricing_mode)
54
+ tokens.positive? ? price_for(prices, key, pricing_mode) : 0.0
55
+ end
56
+
57
+ def price_for(prices, key, pricing_mode)
58
+ mode = normalized_pricing_mode(pricing_mode)
59
+ return prices[key] unless mode
60
+
61
+ prices[:"#{mode}_#{key}"] || prices[key]
62
+ end
63
+
64
+ def normalized_pricing_mode(value)
65
+ return nil if value.nil?
66
+
67
+ mode = value.to_s.strip
68
+ return nil if mode.empty? || mode == "standard"
69
+
70
+ mode
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end