llm_cost_tracker 0.7.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +31 -0
  3. data/README.md +21 -16
  4. data/app/assets/llm_cost_tracker/application.css +3 -0
  5. data/app/controllers/llm_cost_tracker/application_controller.rb +22 -4
  6. data/app/controllers/llm_cost_tracker/calls_controller.rb +6 -11
  7. data/app/controllers/llm_cost_tracker/dashboard_controller.rb +2 -1
  8. data/app/controllers/llm_cost_tracker/data_quality_controller.rb +5 -1
  9. data/app/controllers/llm_cost_tracker/models_controller.rb +0 -1
  10. data/app/controllers/llm_cost_tracker/tags_controller.rb +1 -8
  11. data/app/helpers/llm_cost_tracker/application_helper.rb +2 -1
  12. data/app/helpers/llm_cost_tracker/dashboard_filter_helper.rb +1 -2
  13. data/app/helpers/llm_cost_tracker/dashboard_filter_options_helper.rb +1 -1
  14. data/app/helpers/llm_cost_tracker/dashboard_query_helper.rb +10 -27
  15. data/app/helpers/llm_cost_tracker/token_usage_helper.rb +58 -0
  16. data/app/models/llm_cost_tracker/ingestion/event.rb +13 -0
  17. data/app/models/llm_cost_tracker/ingestion/lease.rb +11 -0
  18. data/app/models/llm_cost_tracker/ledger/call.rb +45 -0
  19. data/app/models/llm_cost_tracker/ledger/call_metrics.rb +66 -0
  20. data/app/models/llm_cost_tracker/ledger/period/grouping.rb +71 -0
  21. data/app/models/llm_cost_tracker/ledger/period/total.rb +13 -0
  22. data/app/models/llm_cost_tracker/ledger/tags/accessors.rb +19 -0
  23. data/app/services/llm_cost_tracker/dashboard/data_quality.rb +111 -94
  24. data/app/services/llm_cost_tracker/dashboard/date_range.rb +2 -2
  25. data/app/services/llm_cost_tracker/dashboard/filter.rb +7 -18
  26. data/app/services/llm_cost_tracker/dashboard/overview_stats.rb +58 -67
  27. data/app/services/llm_cost_tracker/dashboard/pagination.rb +59 -0
  28. data/app/services/llm_cost_tracker/dashboard/params.rb +26 -0
  29. data/app/services/llm_cost_tracker/dashboard/provider_breakdown.rb +18 -20
  30. data/app/services/llm_cost_tracker/dashboard/spend_anomaly.rb +4 -13
  31. data/app/services/llm_cost_tracker/dashboard/tag_breakdown.rb +28 -61
  32. data/app/services/llm_cost_tracker/dashboard/tag_key_explorer.rb +8 -21
  33. data/app/services/llm_cost_tracker/dashboard/time_series.rb +1 -1
  34. data/app/services/llm_cost_tracker/dashboard/top_models.rb +12 -47
  35. data/app/views/llm_cost_tracker/calls/index.html.erb +12 -18
  36. data/app/views/llm_cost_tracker/calls/show.html.erb +30 -32
  37. data/app/views/llm_cost_tracker/dashboard/index.html.erb +17 -19
  38. data/app/views/llm_cost_tracker/data_quality/index.html.erb +108 -135
  39. data/app/views/llm_cost_tracker/models/index.html.erb +8 -9
  40. data/app/views/llm_cost_tracker/shared/setup_required.html.erb +13 -2
  41. data/app/views/llm_cost_tracker/tags/show.html.erb +20 -20
  42. data/lib/llm_cost_tracker/budget.rb +8 -20
  43. data/lib/llm_cost_tracker/capture/stream.rb +9 -0
  44. data/lib/llm_cost_tracker/capture/stream_collector.rb +189 -0
  45. data/lib/llm_cost_tracker/{integrations → capture}/stream_tracker.rb +41 -73
  46. data/lib/llm_cost_tracker/configuration/instrumentation.rb +3 -7
  47. data/lib/llm_cost_tracker/configuration.rb +33 -36
  48. data/lib/llm_cost_tracker/doctor/capture_verifier.rb +61 -0
  49. data/lib/llm_cost_tracker/doctor/check.rb +7 -0
  50. data/lib/llm_cost_tracker/doctor/ingestion_check.rb +22 -59
  51. data/lib/llm_cost_tracker/doctor/price_check.rb +60 -0
  52. data/lib/llm_cost_tracker/doctor.rb +63 -71
  53. data/lib/llm_cost_tracker/errors.rb +4 -15
  54. data/lib/llm_cost_tracker/event.rb +6 -6
  55. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_token_usage_generator.rb +42 -0
  56. data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +2 -0
  57. data/lib/llm_cost_tracker/generators/llm_cost_tracker/prices_generator.rb +7 -7
  58. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_period_totals_to_llm_cost_tracker.rb.erb +3 -3
  59. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_token_usage_to_llm_api_calls.rb.erb +22 -0
  60. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +9 -14
  61. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +0 -4
  62. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_cost_precision.rb.erb +12 -1
  63. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +2 -2
  64. data/lib/llm_cost_tracker/{storage/active_record_inbox_batch.rb → ingestion/batch.rb} +21 -20
  65. data/lib/llm_cost_tracker/ingestion/inbox.rb +105 -0
  66. data/lib/llm_cost_tracker/{storage/active_record_ingestor_lease.rb → ingestion/lease_claim.rb} +5 -7
  67. data/lib/llm_cost_tracker/{storage/active_record_ingestor.rb → ingestion/worker.rb} +38 -48
  68. data/lib/llm_cost_tracker/ingestion.rb +129 -0
  69. data/lib/llm_cost_tracker/integrations/anthropic.rb +66 -31
  70. data/lib/llm_cost_tracker/integrations/base.rb +73 -34
  71. data/lib/llm_cost_tracker/integrations/openai.rb +43 -37
  72. data/lib/llm_cost_tracker/integrations/ruby_llm.rb +40 -30
  73. data/lib/llm_cost_tracker/integrations.rb +43 -0
  74. data/lib/llm_cost_tracker/ledger/period/totals.rb +66 -0
  75. data/lib/llm_cost_tracker/{storage/active_record_periods.rb → ledger/period.rb} +2 -2
  76. data/lib/llm_cost_tracker/ledger/rollups/batch.rb +43 -0
  77. data/lib/llm_cost_tracker/ledger/rollups/upsert_sql.rb +46 -0
  78. data/lib/llm_cost_tracker/ledger/rollups.rb +87 -0
  79. data/lib/llm_cost_tracker/ledger/schema/adapter.rb +51 -0
  80. data/lib/llm_cost_tracker/ledger/schema/calls.rb +101 -0
  81. data/lib/llm_cost_tracker/ledger/schema/period_totals.rb +32 -0
  82. data/lib/llm_cost_tracker/ledger/store.rb +60 -0
  83. data/lib/llm_cost_tracker/ledger/tags/query.rb +29 -0
  84. data/lib/llm_cost_tracker/ledger/tags/sql.rb +33 -0
  85. data/lib/llm_cost_tracker/ledger.rb +13 -0
  86. data/lib/llm_cost_tracker/logging.rb +3 -6
  87. data/lib/llm_cost_tracker/middleware/faraday.rb +88 -46
  88. data/lib/llm_cost_tracker/parsers/anthropic.rb +62 -29
  89. data/lib/llm_cost_tracker/parsers/base.rb +12 -21
  90. data/lib/llm_cost_tracker/parsers/gemini.rb +50 -25
  91. data/lib/llm_cost_tracker/parsers/openai.rb +27 -5
  92. data/lib/llm_cost_tracker/parsers/openai_compatible.rb +14 -4
  93. data/lib/llm_cost_tracker/parsers/openai_usage.rb +58 -25
  94. data/lib/llm_cost_tracker/parsers/sse.rb +4 -7
  95. data/lib/llm_cost_tracker/parsers.rb +20 -0
  96. data/lib/llm_cost_tracker/prices.json +361 -36
  97. data/lib/llm_cost_tracker/pricing/components.rb +37 -0
  98. data/lib/llm_cost_tracker/pricing/effective_prices.rb +46 -50
  99. data/lib/llm_cost_tracker/pricing/explainer.rb +25 -30
  100. data/lib/llm_cost_tracker/pricing/lookup.rb +67 -46
  101. data/lib/llm_cost_tracker/pricing/registry.rb +156 -0
  102. data/lib/llm_cost_tracker/pricing/sync/fetcher.rb +107 -0
  103. data/lib/llm_cost_tracker/pricing/sync/registry_diff.rb +53 -0
  104. data/lib/llm_cost_tracker/pricing/sync/registry_loader.rb +63 -0
  105. data/lib/llm_cost_tracker/pricing/sync/registry_writer.rb +31 -0
  106. data/lib/llm_cost_tracker/pricing/sync.rb +159 -0
  107. data/lib/llm_cost_tracker/pricing/unknown.rb +46 -0
  108. data/lib/llm_cost_tracker/pricing.rb +33 -32
  109. data/lib/llm_cost_tracker/railtie.rb +7 -8
  110. data/lib/llm_cost_tracker/report/data.rb +72 -0
  111. data/lib/llm_cost_tracker/report/formatter.rb +69 -0
  112. data/lib/llm_cost_tracker/report.rb +8 -8
  113. data/lib/llm_cost_tracker/retention.rb +27 -10
  114. data/lib/llm_cost_tracker/tags/context.rb +35 -0
  115. data/lib/llm_cost_tracker/tags/key.rb +18 -0
  116. data/lib/llm_cost_tracker/tags/sanitizer.rb +68 -0
  117. data/lib/llm_cost_tracker/token_usage.rb +67 -0
  118. data/lib/llm_cost_tracker/tracker.rb +39 -69
  119. data/lib/llm_cost_tracker/usage_capture.rb +37 -0
  120. data/lib/llm_cost_tracker/version.rb +1 -1
  121. data/lib/llm_cost_tracker.rb +56 -78
  122. data/lib/tasks/llm_cost_tracker.rake +18 -13
  123. metadata +54 -58
  124. data/app/services/llm_cost_tracker/dashboard/data_quality_aggregate.rb +0 -81
  125. data/app/services/llm_cost_tracker/pagination.rb +0 -57
  126. data/lib/llm_cost_tracker/active_record_adapter.rb +0 -53
  127. data/lib/llm_cost_tracker/capture_verifier.rb +0 -64
  128. data/lib/llm_cost_tracker/cost.rb +0 -12
  129. data/lib/llm_cost_tracker/doctor/capture_check.rb +0 -39
  130. data/lib/llm_cost_tracker/event_metadata.rb +0 -52
  131. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_usage_breakdown_generator.rb +0 -29
  132. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_usage_breakdown_to_llm_api_calls.rb.erb +0 -29
  133. data/lib/llm_cost_tracker/inbox_event.rb +0 -9
  134. data/lib/llm_cost_tracker/ingestor_lease.rb +0 -9
  135. data/lib/llm_cost_tracker/integrations/object_reader.rb +0 -56
  136. data/lib/llm_cost_tracker/integrations/registry.rb +0 -71
  137. data/lib/llm_cost_tracker/llm_api_call.rb +0 -60
  138. data/lib/llm_cost_tracker/llm_api_call_metrics.rb +0 -63
  139. data/lib/llm_cost_tracker/parameter_hash.rb +0 -33
  140. data/lib/llm_cost_tracker/parsed_usage.rb +0 -72
  141. data/lib/llm_cost_tracker/parsers/registry.rb +0 -58
  142. data/lib/llm_cost_tracker/period_grouping.rb +0 -67
  143. data/lib/llm_cost_tracker/period_total.rb +0 -9
  144. data/lib/llm_cost_tracker/price_freshness.rb +0 -38
  145. data/lib/llm_cost_tracker/price_registry.rb +0 -144
  146. data/lib/llm_cost_tracker/price_sync/fetcher.rb +0 -104
  147. data/lib/llm_cost_tracker/price_sync/registry_diff.rb +0 -51
  148. data/lib/llm_cost_tracker/price_sync/registry_loader.rb +0 -61
  149. data/lib/llm_cost_tracker/price_sync/registry_writer.rb +0 -29
  150. data/lib/llm_cost_tracker/price_sync.rb +0 -144
  151. data/lib/llm_cost_tracker/report_data.rb +0 -94
  152. data/lib/llm_cost_tracker/report_formatter.rb +0 -67
  153. data/lib/llm_cost_tracker/request_url.rb +0 -20
  154. data/lib/llm_cost_tracker/storage/active_record_backend.rb +0 -167
  155. data/lib/llm_cost_tracker/storage/active_record_connection_cleanup.rb +0 -13
  156. data/lib/llm_cost_tracker/storage/active_record_inbox.rb +0 -160
  157. data/lib/llm_cost_tracker/storage/active_record_period_totals.rb +0 -84
  158. data/lib/llm_cost_tracker/storage/active_record_rollup_batch.rb +0 -41
  159. data/lib/llm_cost_tracker/storage/active_record_rollup_upsert_sql.rb +0 -42
  160. data/lib/llm_cost_tracker/storage/active_record_rollups.rb +0 -146
  161. data/lib/llm_cost_tracker/storage/active_record_store.rb +0 -145
  162. data/lib/llm_cost_tracker/storage/writer.rb +0 -35
  163. data/lib/llm_cost_tracker/stream_capture.rb +0 -7
  164. data/lib/llm_cost_tracker/stream_collector.rb +0 -199
  165. data/lib/llm_cost_tracker/tag_accessors.rb +0 -15
  166. data/lib/llm_cost_tracker/tag_context.rb +0 -52
  167. data/lib/llm_cost_tracker/tag_key.rb +0 -16
  168. data/lib/llm_cost_tracker/tag_query.rb +0 -43
  169. data/lib/llm_cost_tracker/tag_sanitizer.rb +0 -81
  170. data/lib/llm_cost_tracker/tag_sql.rb +0 -34
  171. data/lib/llm_cost_tracker/tags_column.rb +0 -105
  172. data/lib/llm_cost_tracker/unknown_pricing.rb +0 -54
  173. data/lib/llm_cost_tracker/usage_breakdown.rb +0 -30
  174. data/lib/llm_cost_tracker/value_helpers.rb +0 -40
@@ -2,10 +2,10 @@
2
2
 
3
3
  require "faraday"
4
4
  require "json"
5
+ require "uri"
5
6
 
6
7
  require_relative "../logging"
7
- require_relative "../request_url"
8
- require_relative "../stream_capture"
8
+ require_relative "../capture/stream"
9
9
 
10
10
  module LlmCostTracker
11
11
  module Middleware
@@ -20,51 +20,59 @@ module LlmCostTracker
20
20
 
21
21
  request_url = request_env.url.to_s
22
22
  request_body = read_body(request_env.body) || ""
23
- parser = Parsers::Registry.find_for(request_url)
23
+ parser = Parsers.find_for(request_url)
24
24
  streaming = parser&.streaming_request?(request_url, request_body)
25
25
  stream_buffer = install_stream_tap(request_env) if streaming
26
26
 
27
27
  Tracker.enforce_budget! if parser
28
- started_at = monotonic_time
28
+ context_tags, metadata = tag_snapshot(request_env) if parser
29
+ started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
29
30
 
30
31
  @app.call(request_env).on_complete do |response_env|
31
32
  process(
32
33
  parser: parser,
33
- request_env: request_env,
34
34
  request_url: request_url,
35
35
  request_body: request_body,
36
36
  response_env: response_env,
37
- latency_ms: elapsed_ms(started_at),
37
+ latency_ms: ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_at) * 1000).round,
38
38
  streaming: streaming,
39
- stream_buffer: stream_buffer
39
+ stream_buffer: stream_buffer,
40
+ context_tags: context_tags,
41
+ metadata: metadata
40
42
  )
41
43
  end
42
44
  end
43
45
 
44
46
  private
45
47
 
46
- def process(parser:, request_env:, request_url:, request_body:, response_env:,
47
- latency_ms:, streaming:, stream_buffer:)
48
+ def process(parser:, request_url:, request_body:, response_env:,
49
+ latency_ms:, streaming:, stream_buffer:, context_tags:, metadata:)
48
50
  return unless parser
49
51
 
50
52
  parsed =
51
53
  if streaming
52
- parse_stream(parser, request_url, request_body, response_env, stream_buffer)
54
+ parse_stream(
55
+ parser: parser,
56
+ request_url: request_url,
57
+ request_body: request_body,
58
+ response_env: response_env,
59
+ stream_buffer: stream_buffer
60
+ )
53
61
  else
54
- parse_response(parser, request_url, request_body, response_env)
62
+ parse_response(
63
+ parser: parser,
64
+ request_url: request_url,
65
+ request_body: request_body,
66
+ response_env: response_env
67
+ )
55
68
  end
56
69
  return unless parsed
57
70
 
58
71
  Tracker.record(
59
- provider: parsed.provider,
60
- model: parsed.model,
61
- input_tokens: parsed.input_tokens,
62
- output_tokens: parsed.output_tokens,
72
+ capture: parsed,
63
73
  latency_ms: latency_ms,
64
- stream: parsed.stream,
65
- usage_source: parsed.usage_source,
66
- provider_response_id: parsed.provider_response_id,
67
- metadata: resolved_tags(request_env).merge(parsed.metadata)
74
+ metadata: metadata,
75
+ context_tags: context_tags
68
76
  )
69
77
  rescue LlmCostTracker::Error
70
78
  raise
@@ -72,49 +80,72 @@ module LlmCostTracker
72
80
  Logging.warn("Error processing response: #{e.class}: #{e.message}")
73
81
  end
74
82
 
75
- def parse_response(parser, request_url, request_body, response_env)
83
+ def parse_response(parser:, request_url:, request_body:, response_env:)
76
84
  response_body = read_body(response_env.body)
77
85
  unless response_body
78
86
  Logging.warn(
79
- "Unable to read response body for #{RequestUrl.label(request_url)}; " \
87
+ "Unable to read response body for #{request_url_label(request_url)}; " \
80
88
  "known streaming responses are captured automatically, or via LlmCostTracker.track_stream " \
81
89
  "for custom clients."
82
90
  )
83
91
  return nil
84
92
  end
85
93
 
86
- parser.parse(request_url, request_body, response_env.status, response_body)
94
+ parser.parse(
95
+ request_url: request_url,
96
+ request_body: request_body,
97
+ response_status: response_env.status,
98
+ response_body: response_body,
99
+ response_headers: response_env.response_headers
100
+ )
87
101
  end
88
102
 
89
- def parse_stream(parser, request_url, request_body, response_env, stream_buffer)
103
+ def parse_stream(parser:, request_url:, request_body:, response_env:, stream_buffer:)
90
104
  if stream_buffer&.dig(:overflowed)
91
105
  Logging.warn(capture_warning(request_url, stream_buffer))
92
- return parser.parse_stream(request_url, request_body, response_env.status, [])
106
+ return parser.parse_stream(
107
+ request_url: request_url,
108
+ request_body: request_body,
109
+ response_status: response_env.status,
110
+ response_headers: response_env.response_headers
111
+ )
93
112
  end
94
113
 
95
114
  body = stream_buffer&.dig(:buffer)&.string
96
- body = read_body(response_env.body) if body.nil? || body.empty?
115
+ body = read_body(response_env.body) if body.blank?
97
116
 
98
- if body.nil? || body.empty?
117
+ if body.blank?
99
118
  Logging.warn(capture_warning(request_url, stream_buffer))
100
- return parser.parse_stream(request_url, request_body, response_env.status, [])
119
+ return parser.parse_stream(
120
+ request_url: request_url,
121
+ request_body: request_body,
122
+ response_status: response_env.status,
123
+ response_headers: response_env.response_headers
124
+ )
101
125
  end
102
126
 
103
127
  events = Parsers::SSE.parse(body)
104
- parser.parse_stream(request_url, request_body, response_env.status, events)
128
+ parser.parse_stream(
129
+ request_url: request_url,
130
+ request_body: request_body,
131
+ response_status: response_env.status,
132
+ events: events,
133
+ response_headers: response_env.response_headers
134
+ )
105
135
  end
106
136
 
107
137
  def install_stream_tap(request_env)
108
- return nil unless request_env.respond_to?(:request) && request_env.request
138
+ request = request_env.try(:request)
139
+ return nil unless request
109
140
 
110
- original = request_env.request.on_data
141
+ original = request.on_data
111
142
  return nil unless original
112
143
 
113
144
  state = { buffer: StringIO.new, bytes: 0, overflowed: false }
114
- request_env.request.on_data = proc do |chunk, size, env|
145
+ request.on_data = proc do |chunk, size, env|
115
146
  chunk = chunk.to_s
116
147
  unless state[:overflowed]
117
- if state[:bytes] + chunk.bytesize <= StreamCapture::LIMIT_BYTES
148
+ if state[:bytes] + chunk.bytesize <= Capture::Stream::LIMIT_BYTES
118
149
  state[:buffer] << chunk
119
150
  state[:bytes] += chunk.bytesize
120
151
  else
@@ -136,38 +167,49 @@ module LlmCostTracker
136
167
  when nil then ""
137
168
  when Hash, Array then body.to_json
138
169
  else
139
- body.respond_to?(:to_str) ? body.to_str : nil
170
+ body.try(:to_str)
140
171
  end
141
172
  end
142
173
 
143
174
  def resolved_tags(request_env)
144
- tags = @tags.respond_to?(:call) ? call_tags(request_env) : @tags
175
+ tags =
176
+ if @tags.respond_to?(:call)
177
+ @tags.arity.zero? ? @tags.call : @tags.call(request_env)
178
+ else
179
+ @tags
180
+ end
145
181
  return {} if tags.nil?
146
182
 
147
183
  tags.to_h
148
184
  end
149
185
 
150
- def call_tags(request_env)
151
- @tags.arity.zero? ? @tags.call : @tags.call(request_env)
152
- end
153
-
154
- def monotonic_time
155
- Process.clock_gettime(Process::CLOCK_MONOTONIC)
156
- end
157
-
158
- def elapsed_ms(started_at)
159
- ((monotonic_time - started_at) * 1000).round
186
+ def tag_snapshot(request_env)
187
+ [LlmCostTracker::Tags::Context.tags, resolved_tags(request_env)]
188
+ rescue StandardError => e
189
+ Logging.warn("Error resolving request tags: #{e.class}: #{e.message}")
190
+ [{}, {}]
160
191
  end
161
192
 
162
193
  def capture_warning(request_url, stream_buffer)
163
194
  unless stream_buffer&.dig(:overflowed)
164
- return "Unable to capture streaming response for #{RequestUrl.label(request_url)}; " \
195
+ return "Unable to capture streaming response for #{request_url_label(request_url)}; " \
165
196
  "recording usage_source=unknown. Use LlmCostTracker.track_stream for manual capture."
166
197
  end
167
198
 
168
- "Streaming response for #{RequestUrl.label(request_url)} exceeded #{StreamCapture::LIMIT_BYTES} bytes; " \
199
+ "Streaming response for #{request_url_label(request_url)} exceeded #{Capture::Stream::LIMIT_BYTES} bytes; " \
169
200
  "recording usage_source=unknown. Use LlmCostTracker.track_stream for manual capture."
170
201
  end
202
+
203
+ def request_url_label(value)
204
+ uri = URI.parse(value.to_s)
205
+ uri.query = nil
206
+ uri.fragment = nil
207
+ uri.try(:user=, nil)
208
+ uri.try(:password=, nil)
209
+ uri.to_s
210
+ rescue URI::InvalidURIError
211
+ value.to_s.split("?", 2).first
212
+ end
171
213
  end
172
214
  end
173
215
  end
@@ -15,7 +15,7 @@ module LlmCostTracker
15
15
  %w[anthropic]
16
16
  end
17
17
 
18
- def parse(_request_url, request_body, response_status, response_body)
18
+ def parse(request_body:, response_status:, response_body:, **)
19
19
  return nil unless response_status == 200
20
20
 
21
21
  response = safe_json_parse(response_body)
@@ -24,36 +24,38 @@ module LlmCostTracker
24
24
 
25
25
  request = safe_json_parse(request_body)
26
26
  cache_read = usage["cache_read_input_tokens"].to_i
27
- cache_write = usage["cache_creation_input_tokens"].to_i
28
27
 
29
- ParsedUsage.build(
28
+ UsageCapture.build(
30
29
  provider: "anthropic",
31
30
  provider_response_id: response["id"],
31
+ pricing_mode: pricing_mode(request: request, response: response, usage: usage),
32
32
  model: response["model"] || request["model"],
33
- input_tokens: usage["input_tokens"].to_i,
34
- output_tokens: usage["output_tokens"].to_i,
35
- total_tokens: usage["input_tokens"].to_i + usage["output_tokens"].to_i + cache_read + cache_write,
36
- cache_read_input_tokens: usage["cache_read_input_tokens"],
37
- cache_write_input_tokens: usage["cache_creation_input_tokens"],
33
+ token_usage: token_usage(usage: usage, cache_read: cache_read),
38
34
  usage_source: :response
39
35
  )
40
36
  end
41
37
 
42
- def parse_stream(_request_url, request_body, response_status, events)
38
+ def parse_stream(response_status:, request_body: nil, events: [], **)
43
39
  return nil unless response_status == 200
44
40
 
45
41
  request = safe_json_parse(request_body)
46
- model = stream_model(events) || request["model"]
42
+ model = find_event_value(events) { |data| data.dig("message", "model") } || request["model"]
47
43
  usage = stream_usage(events)
48
- response_id = stream_response_id(events)
44
+ response_id = find_event_value(events) { |data| data.dig("message", "id") || data["id"] }
49
45
 
50
46
  if usage
51
- build_stream_result(model, usage, response_id)
47
+ build_stream_result(
48
+ model: model,
49
+ usage: usage,
50
+ response_id: response_id,
51
+ pricing_mode: pricing_mode(request: request, response: nil, usage: usage)
52
+ )
52
53
  else
53
54
  build_unknown_stream_usage(
54
55
  provider: "anthropic",
55
56
  model: model,
56
- provider_response_id: response_id
57
+ provider_response_id: response_id,
58
+ pricing_mode: pricing_mode(request: request, response: nil, usage: usage)
57
59
  )
58
60
  end
59
61
  end
@@ -75,33 +77,64 @@ module LlmCostTracker
75
77
  end
76
78
  end
77
79
 
78
- def stream_model(events)
79
- find_event_value(events) { |data| data.dig("message", "model") }
80
- end
80
+ def build_stream_result(model:, usage:, response_id:, pricing_mode:)
81
+ cache_read = usage["cache_read_input_tokens"].to_i
81
82
 
82
- def stream_response_id(events)
83
- find_event_value(events) { |data| data.dig("message", "id") || data["id"] }
83
+ UsageCapture.build(
84
+ provider: "anthropic",
85
+ provider_response_id: response_id,
86
+ pricing_mode: pricing_mode,
87
+ model: model,
88
+ token_usage: token_usage(usage: usage, cache_read: cache_read),
89
+ stream: true,
90
+ usage_source: :stream_final
91
+ )
84
92
  end
85
93
 
86
- def build_stream_result(model, usage, response_id)
94
+ def token_usage(usage:, cache_read:)
87
95
  input = usage["input_tokens"].to_i
88
96
  output = usage["output_tokens"].to_i
89
- cache_read = usage["cache_read_input_tokens"].to_i
90
- cache_write = usage["cache_creation_input_tokens"].to_i
97
+ cache_creation = usage["cache_creation"]
98
+ if cache_creation.is_a?(Hash)
99
+ cache_write = cache_creation["ephemeral_5m_input_tokens"].to_i
100
+ cache_write_1h = cache_creation["ephemeral_1h_input_tokens"].to_i
101
+ else
102
+ cache_write = usage["cache_creation_input_tokens"].to_i
103
+ cache_write_1h = 0
104
+ end
91
105
 
92
- ParsedUsage.build(
93
- provider: "anthropic",
94
- provider_response_id: response_id,
95
- model: model,
106
+ TokenUsage.build(
96
107
  input_tokens: input,
97
108
  output_tokens: output,
98
- total_tokens: input + output + cache_read + cache_write,
109
+ total_tokens: input + output + cache_read + cache_write + cache_write_1h,
99
110
  cache_read_input_tokens: usage["cache_read_input_tokens"],
100
- cache_write_input_tokens: usage["cache_creation_input_tokens"],
101
- stream: true,
102
- usage_source: :stream_final
111
+ cache_write_input_tokens: cache_write,
112
+ cache_write_1h_input_tokens: cache_write_1h
103
113
  )
104
114
  end
115
+
116
+ def pricing_mode(request:, response:, usage:)
117
+ modes = []
118
+ speed = usage&.fetch("speed", nil) || response&.fetch("speed", nil) || request["speed"]
119
+ service_tier = usage&.fetch("service_tier", nil) ||
120
+ response&.fetch("service_tier", nil) ||
121
+ request["service_tier"]
122
+
123
+ modes << Pricing.normalize_mode(speed)
124
+ modes << Pricing.normalize_mode(service_tier)
125
+ modes << "data_residency" if inference_geo(request: request, response: response, usage: usage) == "us"
126
+
127
+ modes = modes.compact.uniq
128
+ modes.empty? ? nil : modes.join("_")
129
+ end
130
+
131
+ def inference_geo(request:, response:, usage:)
132
+ (
133
+ usage&.fetch("inference_geo", nil) ||
134
+ response&.fetch("inference_geo", nil) ||
135
+ request["inference_geo"]
136
+ ).to_s
137
+ end
105
138
  end
106
139
  end
107
140
  end
@@ -1,12 +1,13 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "active_support/core_ext/object/blank"
3
4
  require "json"
4
5
  require "uri"
5
6
 
6
7
  module LlmCostTracker
7
8
  module Parsers
8
9
  class Base
9
- def parse(request_url, request_body, response_status, response_body)
10
+ def parse(**)
10
11
  raise NotImplementedError
11
12
  end
12
13
 
@@ -19,23 +20,22 @@ module LlmCostTracker
19
20
  end
20
21
 
21
22
  def streaming_request?(_request_url, request_body)
22
- return false if request_body.nil?
23
+ return false if request_body.blank?
23
24
 
24
25
  body = request_body.to_s
25
- return false if body.empty?
26
26
 
27
27
  request = safe_json_parse(body)
28
28
  request.is_a?(Hash) && request["stream"] == true
29
29
  end
30
30
 
31
- def parse_stream(_request_url, _request_body, _response_status, _events)
31
+ def parse_stream(**)
32
32
  nil
33
33
  end
34
34
 
35
35
  private
36
36
 
37
37
  def safe_json_parse(body)
38
- return {} if body.nil? || body.empty?
38
+ return {} if body.blank?
39
39
 
40
40
  JSON.parse(body)
41
41
  rescue JSON::ParserError
@@ -49,7 +49,7 @@ module LlmCostTracker
49
49
 
50
50
  def match_uri?(url, hosts: nil, exact_paths: nil, path_includes: nil, path_suffixes: nil, path_pattern: nil)
51
51
  uri_matches?(url) do |uri|
52
- host_match = hosts.nil? || host_matches?(uri, hosts)
52
+ host_match = hosts.nil? || hosts.include?(uri.host.to_s.downcase)
53
53
  path_match = path_matches?(
54
54
  uri,
55
55
  exact_paths: exact_paths,
@@ -69,10 +69,6 @@ module LlmCostTracker
69
69
  nil
70
70
  end
71
71
 
72
- def host_matches?(uri, hosts)
73
- hosts.include?(uri.host.to_s.downcase)
74
- end
75
-
76
72
  def path_matches?(uri, exact_paths: nil, path_includes: nil, path_suffixes: nil, path_pattern: nil)
77
73
  path = uri.path.to_s
78
74
  matches = true
@@ -98,28 +94,23 @@ module LlmCostTracker
98
94
  def find_event_value(events, reverse: false)
99
95
  each_event_data(events, reverse:) do |data|
100
96
  value = yield(data)
101
- return value if event_value_present?(value)
97
+ return value if value.present?
102
98
  end
103
99
 
104
100
  nil
105
101
  end
106
102
 
107
- def build_unknown_stream_usage(provider:, model:, provider_response_id:)
108
- ParsedUsage.build(
103
+ def build_unknown_stream_usage(provider:, model:, provider_response_id:, pricing_mode: nil)
104
+ UsageCapture.build(
109
105
  provider: provider,
110
106
  provider_response_id: provider_response_id,
111
- model: model || ParsedUsage::UNKNOWN_MODEL,
112
- input_tokens: 0,
113
- output_tokens: 0,
114
- total_tokens: 0,
107
+ pricing_mode: pricing_mode,
108
+ model: model || UsageCapture::UNKNOWN_MODEL,
109
+ token_usage: TokenUsage.build(input_tokens: 0, output_tokens: 0, total_tokens: 0),
115
110
  stream: true,
116
111
  usage_source: :unknown
117
112
  )
118
113
  end
119
-
120
- def event_value_present?(value)
121
- !value.nil? && (!value.respond_to?(:empty?) || !value.empty?)
122
- end
123
114
  end
124
115
  end
125
116
  end
@@ -18,63 +18,74 @@ module LlmCostTracker
18
18
  end
19
19
 
20
20
  def streaming_request?(request_url, request_body)
21
- return true if streaming_url?(request_url)
21
+ return true if match_uri?(request_url, path_pattern: STREAM_PATH_PATTERN)
22
22
 
23
23
  super
24
24
  end
25
25
 
26
- def parse(request_url, _request_body, response_status, response_body)
26
+ def parse(request_url:, request_body:, response_status:, response_body:, response_headers: nil)
27
27
  return nil unless response_status == 200
28
28
 
29
29
  response = safe_json_parse(response_body)
30
30
  usage = response["usageMetadata"]
31
31
  return nil unless usage
32
32
 
33
- build_parsed_usage(
34
- request_url,
35
- usage,
33
+ request = safe_json_parse(request_body)
34
+ build_usage_capture(
35
+ request_url: request_url,
36
+ usage: usage,
36
37
  usage_source: :response,
37
- provider_response_id: response["responseId"]
38
+ provider_response_id: response["responseId"],
39
+ pricing_mode: pricing_mode(request: request, response_headers: response_headers)
38
40
  )
39
41
  end
40
42
 
41
- def parse_stream(request_url, _request_body, response_status, events)
43
+ def parse_stream(response_status:, request_url: nil, request_body: nil, events: [], response_headers: nil)
42
44
  return nil unless response_status == 200
43
45
 
46
+ request = safe_json_parse(request_body)
44
47
  usage = merged_stream_usage(events)
45
48
  model = extract_model_from_url(request_url)
46
49
  response_id = stream_response_id(events)
50
+ mode = pricing_mode(request: request, response_headers: response_headers)
47
51
 
48
52
  if usage
49
- build_parsed_usage(
50
- request_url,
51
- usage,
53
+ build_usage_capture(
54
+ request_url: request_url,
55
+ usage: usage,
52
56
  stream: true,
53
57
  usage_source: :stream_final,
54
- provider_response_id: response_id
58
+ provider_response_id: response_id,
59
+ pricing_mode: mode
55
60
  )
56
61
  else
57
62
  build_unknown_stream_usage(
58
63
  provider: "gemini",
59
64
  model: model,
60
- provider_response_id: response_id
65
+ provider_response_id: response_id,
66
+ pricing_mode: mode
61
67
  )
62
68
  end
63
69
  end
64
70
 
65
71
  private
66
72
 
67
- def build_parsed_usage(request_url, usage, usage_source:, stream: false, provider_response_id: nil)
73
+ def build_usage_capture(request_url:, usage:, usage_source:, stream: false, provider_response_id: nil,
74
+ pricing_mode: nil)
68
75
  cache_read = usage["cachedContentTokenCount"].to_i
76
+ tool_use_prompt = usage["toolUsePromptTokenCount"].to_i
69
77
 
70
- ParsedUsage.build(
78
+ UsageCapture.build(
71
79
  provider: "gemini",
72
80
  model: extract_model_from_url(request_url),
73
- input_tokens: [usage["promptTokenCount"].to_i - cache_read, 0].max,
74
- output_tokens: output_tokens(usage),
75
- total_tokens: total_tokens(usage, cache_read),
76
- cache_read_input_tokens: usage["cachedContentTokenCount"],
77
- hidden_output_tokens: usage["thoughtsTokenCount"],
81
+ pricing_mode: pricing_mode,
82
+ token_usage: TokenUsage.build(
83
+ input_tokens: [usage["promptTokenCount"].to_i - cache_read, 0].max + tool_use_prompt,
84
+ output_tokens: output_tokens(usage),
85
+ total_tokens: total_tokens(usage: usage, cache_read: cache_read, tool_use_prompt: tool_use_prompt),
86
+ cache_read_input_tokens: usage["cachedContentTokenCount"],
87
+ hidden_output_tokens: usage["thoughtsTokenCount"]
88
+ ),
78
89
  stream: stream,
79
90
  usage_source: usage_source,
80
91
  provider_response_id: provider_response_id
@@ -92,21 +103,17 @@ module LlmCostTracker
92
103
  usage["candidatesTokenCount"].to_i + usage["thoughtsTokenCount"].to_i
93
104
  end
94
105
 
95
- def total_tokens(usage, cache_read)
106
+ def total_tokens(usage:, cache_read:, tool_use_prompt:)
96
107
  total = usage["totalTokenCount"]
97
108
  return total.to_i unless total.nil?
98
109
 
99
- [usage["promptTokenCount"].to_i - cache_read, 0].max + cache_read + output_tokens(usage)
110
+ [usage["promptTokenCount"].to_i - cache_read, 0].max + cache_read + tool_use_prompt + output_tokens(usage)
100
111
  end
101
112
 
102
113
  def stream_response_id(events)
103
114
  find_event_value(events) { |data| data["responseId"] }
104
115
  end
105
116
 
106
- def streaming_url?(request_url)
107
- match_uri?(request_url, path_pattern: STREAM_PATH_PATTERN)
108
- end
109
-
110
117
  def extract_model_from_url(url)
111
118
  uri = parsed_uri(url)
112
119
  return nil unless uri
@@ -114,6 +121,24 @@ module LlmCostTracker
114
121
  match = uri.path.match(%r{/models/([^/:]+)})
115
122
  match && match[1]
116
123
  end
124
+
125
+ def pricing_mode(request:, response_headers:)
126
+ response_tier = response_header(response_headers, "x-gemini-service-tier")
127
+ response_mode = Pricing.normalize_mode(response_tier)
128
+ return response_mode if response_mode
129
+
130
+ request_mode = Pricing.normalize_mode(
131
+ request["service_tier"] ||
132
+ request["serviceTier"] ||
133
+ request.dig("config", "service_tier") ||
134
+ request.dig("config", "serviceTier")
135
+ )
136
+ request_mode == "flex" ? request_mode : nil
137
+ end
138
+
139
+ def response_header(headers, name)
140
+ headers.to_h.find { |key, _value| key.to_s.downcase == name }&.last
141
+ end
117
142
  end
118
143
  end
119
144
  end
@@ -8,7 +8,19 @@ module LlmCostTracker
8
8
  class Openai < Base
9
9
  include OpenaiUsage
10
10
 
11
- HOSTS = %w[api.openai.com].freeze
11
+ HOSTS = %w[
12
+ api.openai.com
13
+ us.api.openai.com
14
+ eu.api.openai.com
15
+ au.api.openai.com
16
+ ca.api.openai.com
17
+ jp.api.openai.com
18
+ in.api.openai.com
19
+ sg.api.openai.com
20
+ kr.api.openai.com
21
+ gb.api.openai.com
22
+ ae.api.openai.com
23
+ ].freeze
12
24
  TRACKED_PATHS = %w[/v1/chat/completions /v1/completions /v1/embeddings /v1/responses].freeze
13
25
 
14
26
  def match?(url)
@@ -19,12 +31,22 @@ module LlmCostTracker
19
31
  %w[openai]
20
32
  end
21
33
 
22
- def parse(request_url, request_body, response_status, response_body)
23
- parse_openai_usage(request_url, request_body, response_status, response_body)
34
+ def parse(request_url:, request_body:, response_status:, response_body:, **)
35
+ parse_openai_usage(
36
+ request_url: request_url,
37
+ request_body: request_body,
38
+ response_status: response_status,
39
+ response_body: response_body
40
+ )
24
41
  end
25
42
 
26
- def parse_stream(request_url, request_body, response_status, events)
27
- parse_openai_stream_usage(request_url, request_body, response_status, events)
43
+ def parse_stream(response_status:, request_url: nil, request_body: nil, events: [], **)
44
+ parse_openai_stream_usage(
45
+ request_url: request_url,
46
+ request_body: request_body,
47
+ response_status: response_status,
48
+ events: events
49
+ )
28
50
  end
29
51
 
30
52
  private