llm_cost_tracker 0.7.3 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. checksums.yaml +4 -4
  2. data/.ruby-version +1 -0
  3. data/CHANGELOG.md +173 -0
  4. data/README.md +60 -220
  5. data/app/assets/llm_cost_tracker/application.css +282 -45
  6. data/app/controllers/llm_cost_tracker/application_controller.rb +25 -20
  7. data/app/controllers/llm_cost_tracker/assets_controller.rb +11 -1
  8. data/app/controllers/llm_cost_tracker/calls_controller.rb +22 -19
  9. data/app/controllers/llm_cost_tracker/data_quality_controller.rb +14 -2
  10. data/app/controllers/llm_cost_tracker/reconciliation_controller.rb +106 -0
  11. data/app/controllers/llm_cost_tracker/tags_controller.rb +15 -1
  12. data/app/helpers/llm_cost_tracker/application_helper.rb +18 -21
  13. data/app/helpers/llm_cost_tracker/dashboard_filter_helper.rb +3 -21
  14. data/app/helpers/llm_cost_tracker/dashboard_filter_options_helper.rb +4 -4
  15. data/app/helpers/llm_cost_tracker/dashboard_query_helper.rb +1 -1
  16. data/app/helpers/llm_cost_tracker/inline_style_helper.rb +28 -0
  17. data/app/helpers/llm_cost_tracker/reconciliation_helper.rb +13 -0
  18. data/app/helpers/llm_cost_tracker/token_usage_helper.rb +24 -7
  19. data/app/models/llm_cost_tracker/call.rb +166 -0
  20. data/app/models/llm_cost_tracker/call_line_item.rb +18 -0
  21. data/app/models/llm_cost_tracker/call_rollup.rb +6 -0
  22. data/app/models/llm_cost_tracker/call_tag.rb +12 -0
  23. data/app/models/llm_cost_tracker/ingestion/inbox_entry.rb +9 -0
  24. data/app/models/llm_cost_tracker/ingestion/lease.rb +0 -3
  25. data/app/models/llm_cost_tracker/provider_invoice.rb +13 -0
  26. data/app/models/llm_cost_tracker/provider_invoice_import.rb +24 -0
  27. data/app/services/llm_cost_tracker/dashboard/data_quality.rb +152 -32
  28. data/app/services/llm_cost_tracker/dashboard/date_range.rb +1 -1
  29. data/app/services/llm_cost_tracker/dashboard/filter.rb +8 -6
  30. data/app/services/llm_cost_tracker/dashboard/overview_stats.rb +74 -21
  31. data/app/services/llm_cost_tracker/dashboard/pagination.rb +6 -4
  32. data/app/services/llm_cost_tracker/dashboard/params.rb +8 -2
  33. data/app/services/llm_cost_tracker/dashboard/provider_breakdown.rb +1 -1
  34. data/app/services/llm_cost_tracker/dashboard/spend_anomaly.rb +4 -3
  35. data/app/services/llm_cost_tracker/dashboard/tag_breakdown.rb +42 -9
  36. data/app/services/llm_cost_tracker/dashboard/tag_key_explorer.rb +14 -37
  37. data/app/services/llm_cost_tracker/dashboard/time_series.rb +1 -1
  38. data/app/services/llm_cost_tracker/dashboard/top_models.rb +1 -1
  39. data/app/views/layouts/llm_cost_tracker/application.html.erb +6 -1
  40. data/app/views/llm_cost_tracker/calls/index.html.erb +33 -75
  41. data/app/views/llm_cost_tracker/calls/show.html.erb +73 -33
  42. data/app/views/llm_cost_tracker/dashboard/index.html.erb +16 -57
  43. data/app/views/llm_cost_tracker/data_quality/index.html.erb +183 -167
  44. data/app/views/llm_cost_tracker/errors/database.html.erb +1 -1
  45. data/app/views/llm_cost_tracker/models/index.html.erb +18 -50
  46. data/app/views/llm_cost_tracker/reconciliation/index.html.erb +183 -0
  47. data/app/views/llm_cost_tracker/shared/_bar.html.erb +1 -1
  48. data/app/views/llm_cost_tracker/shared/_filters.html.erb +66 -0
  49. data/app/views/llm_cost_tracker/shared/_metric_stack.html.erb +1 -1
  50. data/app/views/llm_cost_tracker/shared/_sort.html.erb +13 -0
  51. data/app/views/llm_cost_tracker/shared/setup_required.html.erb +1 -1
  52. data/app/views/llm_cost_tracker/tags/index.html.erb +3 -34
  53. data/app/views/llm_cost_tracker/tags/show.html.erb +64 -36
  54. data/config/routes.rb +3 -2
  55. data/lib/llm_cost_tracker/billing/components.rb +95 -0
  56. data/lib/llm_cost_tracker/billing/components.yml +188 -0
  57. data/lib/llm_cost_tracker/billing/cost_status.rb +45 -0
  58. data/lib/llm_cost_tracker/billing/line_item.rb +189 -0
  59. data/lib/llm_cost_tracker/budget.rb +26 -36
  60. data/lib/llm_cost_tracker/capture/stream_collector.rb +125 -38
  61. data/lib/llm_cost_tracker/capture/stream_tracker.rb +40 -5
  62. data/lib/llm_cost_tracker/configuration.rb +86 -17
  63. data/lib/llm_cost_tracker/dashboard_setup_state.rb +109 -0
  64. data/lib/llm_cost_tracker/doctor/cost_drift_check.rb +56 -0
  65. data/lib/llm_cost_tracker/doctor/ingestion_check.rb +48 -30
  66. data/lib/llm_cost_tracker/doctor/invoice_reconciliation_check.rb +164 -0
  67. data/lib/llm_cost_tracker/doctor/legacy_audit_check.rb +36 -0
  68. data/lib/llm_cost_tracker/doctor/legacy_billing_status_check.rb +22 -0
  69. data/lib/llm_cost_tracker/doctor/price_check.rb +2 -2
  70. data/lib/llm_cost_tracker/doctor/pricing_snapshot_drift_check.rb +85 -0
  71. data/lib/llm_cost_tracker/doctor/probe.rb +17 -0
  72. data/lib/llm_cost_tracker/doctor/schema_check.rb +34 -0
  73. data/lib/llm_cost_tracker/doctor.rb +111 -44
  74. data/lib/llm_cost_tracker/engine.rb +9 -0
  75. data/lib/llm_cost_tracker/errors.rb +5 -19
  76. data/lib/llm_cost_tracker/event.rb +11 -3
  77. data/lib/llm_cost_tracker/generators/llm_cost_tracker/call_rollups_generator.rb +43 -0
  78. data/lib/llm_cost_tracker/generators/llm_cost_tracker/durable_ingestion_generator.rb +43 -0
  79. data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +17 -5
  80. data/lib/llm_cost_tracker/generators/llm_cost_tracker/prices_generator.rb +2 -6
  81. data/lib/llm_cost_tracker/generators/llm_cost_tracker/reconciliation_generator.rb +34 -0
  82. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_call_rollups.rb.erb +15 -0
  83. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_calls.rb.erb +104 -0
  84. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_durable_ingestion.rb.erb +29 -0
  85. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_reconciliation.rb.erb +55 -0
  86. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +28 -25
  87. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_call_rollups_provider.rb.erb +20 -0
  88. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_call_tags_key_value_index.rb.erb +32 -0
  89. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_image_tokens.rb.erb +18 -0
  90. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_call_rollups_provider_generator.rb +38 -0
  91. data/lib/llm_cost_tracker/generators/llm_cost_tracker/{add_provider_response_id_generator.rb → upgrade_call_tags_key_value_index_generator.rb} +5 -4
  92. data/lib/llm_cost_tracker/generators/llm_cost_tracker/{add_streaming_generator.rb → upgrade_image_tokens_generator.rb} +4 -4
  93. data/lib/llm_cost_tracker/ingestion/batch.rb +11 -12
  94. data/lib/llm_cost_tracker/ingestion/inbox.rb +39 -24
  95. data/lib/llm_cost_tracker/ingestion/inline.rb +22 -0
  96. data/lib/llm_cost_tracker/ingestion/worker.rb +24 -7
  97. data/lib/llm_cost_tracker/ingestion.rb +66 -22
  98. data/lib/llm_cost_tracker/integrations/anthropic.rb +68 -42
  99. data/lib/llm_cost_tracker/integrations/base.rb +56 -32
  100. data/lib/llm_cost_tracker/integrations/openai.rb +342 -63
  101. data/lib/llm_cost_tracker/integrations/ruby_llm.rb +110 -11
  102. data/lib/llm_cost_tracker/integrations.rb +21 -3
  103. data/lib/llm_cost_tracker/ledger/period/totals.rb +30 -11
  104. data/lib/llm_cost_tracker/ledger/period.rb +5 -5
  105. data/lib/llm_cost_tracker/ledger/rollups/upsert_sql.rb +2 -2
  106. data/lib/llm_cost_tracker/ledger/rollups.rb +90 -25
  107. data/lib/llm_cost_tracker/ledger/schema/adapter.rb +18 -0
  108. data/lib/llm_cost_tracker/ledger/schema/call_line_items.rb +79 -0
  109. data/lib/llm_cost_tracker/ledger/schema/call_rollups.rb +37 -0
  110. data/lib/llm_cost_tracker/ledger/schema/call_tags.rb +41 -0
  111. data/lib/llm_cost_tracker/ledger/schema/calls.rb +36 -23
  112. data/lib/llm_cost_tracker/ledger/schema/ingestion_inbox_entries.rb +47 -0
  113. data/lib/llm_cost_tracker/ledger/schema/ingestion_leases.rb +42 -0
  114. data/lib/llm_cost_tracker/ledger/schema/provider_invoice_imports.rb +46 -0
  115. data/lib/llm_cost_tracker/ledger/schema/provider_invoices.rb +57 -0
  116. data/lib/llm_cost_tracker/ledger/store.rb +103 -20
  117. data/lib/llm_cost_tracker/ledger/tags/encoding.rb +37 -0
  118. data/lib/llm_cost_tracker/ledger/tags/query.rb +6 -11
  119. data/lib/llm_cost_tracker/ledger/tags/sql.rb +27 -15
  120. data/lib/llm_cost_tracker/ledger.rb +5 -2
  121. data/lib/llm_cost_tracker/logging.rb +2 -5
  122. data/lib/llm_cost_tracker/masking.rb +39 -0
  123. data/lib/llm_cost_tracker/middleware/faraday.rb +95 -35
  124. data/lib/llm_cost_tracker/parsers/anthropic.rb +74 -14
  125. data/lib/llm_cost_tracker/parsers/base.rb +13 -4
  126. data/lib/llm_cost_tracker/parsers/gemini.rb +105 -15
  127. data/lib/llm_cost_tracker/parsers/openai.rb +16 -2
  128. data/lib/llm_cost_tracker/parsers/openai_compatible.rb +15 -3
  129. data/lib/llm_cost_tracker/parsers/openai_service_charges.rb +126 -0
  130. data/lib/llm_cost_tracker/parsers/openai_usage.rb +157 -59
  131. data/lib/llm_cost_tracker/parsers/sse.rb +1 -1
  132. data/lib/llm_cost_tracker/parsers.rb +1 -1
  133. data/lib/llm_cost_tracker/prices.json +198 -22
  134. data/lib/llm_cost_tracker/pricing/effective_prices.rb +28 -21
  135. data/lib/llm_cost_tracker/pricing/explainer.rb +4 -5
  136. data/lib/llm_cost_tracker/pricing/lookup.rb +73 -36
  137. data/lib/llm_cost_tracker/pricing/mode.rb +76 -0
  138. data/lib/llm_cost_tracker/pricing/registry.rb +67 -45
  139. data/lib/llm_cost_tracker/pricing/service_charges.rb +210 -0
  140. data/lib/llm_cost_tracker/pricing/sync/fetcher.rb +26 -17
  141. data/lib/llm_cost_tracker/pricing/sync/registry_diff.rb +6 -15
  142. data/lib/llm_cost_tracker/pricing/sync/registry_writer.rb +50 -1
  143. data/lib/llm_cost_tracker/pricing/sync.rb +59 -10
  144. data/lib/llm_cost_tracker/pricing/sync_change_printer.rb +32 -0
  145. data/lib/llm_cost_tracker/pricing.rb +220 -28
  146. data/lib/llm_cost_tracker/railtie.rb +6 -8
  147. data/lib/llm_cost_tracker/reconcile_tasks.rb +134 -0
  148. data/lib/llm_cost_tracker/reconciliation/diff.rb +428 -0
  149. data/lib/llm_cost_tracker/reconciliation/diff_result.rb +48 -0
  150. data/lib/llm_cost_tracker/reconciliation/import_result.rb +19 -0
  151. data/lib/llm_cost_tracker/reconciliation/importer.rb +253 -0
  152. data/lib/llm_cost_tracker/reconciliation/sources/anthropic_usage.rb +171 -0
  153. data/lib/llm_cost_tracker/reconciliation/sources/fingerprint.rb +20 -0
  154. data/lib/llm_cost_tracker/reconciliation/sources/openai_usage.rb +142 -0
  155. data/lib/llm_cost_tracker/reconciliation.rb +118 -0
  156. data/lib/llm_cost_tracker/report/data.rb +19 -8
  157. data/lib/llm_cost_tracker/report.rb +0 -4
  158. data/lib/llm_cost_tracker/retention.rb +22 -9
  159. data/lib/llm_cost_tracker/tags/context.rb +2 -5
  160. data/lib/llm_cost_tracker/tags/key.rb +4 -0
  161. data/lib/llm_cost_tracker/tags/sanitizer.rb +71 -20
  162. data/lib/llm_cost_tracker/timing.rb +15 -0
  163. data/lib/llm_cost_tracker/token_usage.rb +64 -42
  164. data/lib/llm_cost_tracker/tracker.rb +97 -27
  165. data/lib/llm_cost_tracker/usage_capture.rb +29 -8
  166. data/lib/llm_cost_tracker/version.rb +1 -1
  167. data/lib/llm_cost_tracker.rb +45 -35
  168. data/lib/tasks/llm_cost_tracker.rake +45 -17
  169. metadata +71 -41
  170. data/app/models/llm_cost_tracker/ingestion/event.rb +0 -13
  171. data/app/models/llm_cost_tracker/ledger/call.rb +0 -45
  172. data/app/models/llm_cost_tracker/ledger/call_metrics.rb +0 -66
  173. data/app/models/llm_cost_tracker/ledger/period/grouping.rb +0 -71
  174. data/app/models/llm_cost_tracker/ledger/period/total.rb +0 -13
  175. data/app/models/llm_cost_tracker/ledger/tags/accessors.rb +0 -19
  176. data/lib/llm_cost_tracker/configuration/instrumentation.rb +0 -33
  177. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_ingestion_generator.rb +0 -29
  178. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_latency_ms_generator.rb +0 -29
  179. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_period_totals_generator.rb +0 -29
  180. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_token_usage_generator.rb +0 -42
  181. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_ingestion_to_llm_cost_tracker.rb.erb +0 -33
  182. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_latency_ms_to_llm_api_calls.rb.erb +0 -9
  183. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_period_totals_to_llm_cost_tracker.rb.erb +0 -104
  184. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_provider_response_id_to_llm_api_calls.rb.erb +0 -15
  185. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_streaming_to_llm_api_calls.rb.erb +0 -21
  186. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_token_usage_to_llm_api_calls.rb.erb +0 -22
  187. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +0 -83
  188. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_cost_precision.rb.erb +0 -26
  189. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +0 -44
  190. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_cost_precision_generator.rb +0 -29
  191. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_tags_to_jsonb_generator.rb +0 -29
  192. data/lib/llm_cost_tracker/ledger/rollups/batch.rb +0 -43
  193. data/lib/llm_cost_tracker/ledger/schema/period_totals.rb +0 -32
  194. data/lib/llm_cost_tracker/pricing/components.rb +0 -37
  195. data/lib/llm_cost_tracker/pricing/sync/registry_loader.rb +0 -63
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LlmCostTracker
4
+ module Masking
5
+ SENSITIVE_KEYS = %i[
6
+ provider_api_key_id provider_workspace_id provider_organization_id provider_project_id
7
+ ].to_set.freeze
8
+ MASK_TAIL_LENGTH = 4
9
+
10
+ module_function
11
+
12
+ def mask_value(key, value)
13
+ string = value.to_s
14
+ return string unless SENSITIVE_KEYS.include?(key.to_sym)
15
+ return string if string.length <= MASK_TAIL_LENGTH
16
+
17
+ "***#{string[-MASK_TAIL_LENGTH, MASK_TAIL_LENGTH]}"
18
+ end
19
+
20
+ def format_attribution(attribution, separator: ", ")
21
+ return "" if attribution.nil? || attribution.empty?
22
+
23
+ attribution.map { |key, value| "#{key}=#{mask_value(key, value)}" }.join(separator)
24
+ end
25
+
26
+ def mask_hash(hash)
27
+ return hash unless hash.is_a?(Hash)
28
+
29
+ hash.each_with_object({}) do |(key, value), masked|
30
+ masked[key] = case value
31
+ when Hash then mask_hash(value)
32
+ when Array then value.map { |entry| entry.is_a?(Hash) ? mask_hash(entry) : entry }
33
+ else
34
+ mask_value(key, value)
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -2,10 +2,12 @@
2
2
 
3
3
  require "faraday"
4
4
  require "json"
5
+ require "stringio"
5
6
  require "uri"
6
7
 
7
8
  require_relative "../logging"
8
9
  require_relative "../capture/stream"
10
+ require_relative "../timing"
9
11
 
10
12
  module LlmCostTracker
11
13
  module Middleware
@@ -19,31 +21,85 @@ module LlmCostTracker
19
21
  return @app.call(request_env) unless LlmCostTracker.configuration.enabled
20
22
 
21
23
  request_url = request_env.url.to_s
22
- request_body = read_body(request_env.body) || ""
24
+ request_body = read_body(request_env.body)
23
25
  parser = Parsers.find_for(request_url)
24
26
  streaming = parser&.streaming_request?(request_url, request_body)
27
+ request_body = inject_stream_usage_flag(request_env, parser, request_url) if streaming
25
28
  stream_buffer = install_stream_tap(request_env) if streaming
26
29
 
27
30
  Tracker.enforce_budget! if parser
28
31
  context_tags, metadata = tag_snapshot(request_env) if parser
29
- started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
32
+ started_at = LlmCostTracker::Timing.now_monotonic
30
33
 
34
+ invoke_app_with_capture(
35
+ request_env: request_env, parser: parser, request_url: request_url,
36
+ request_body: request_body, streaming: streaming, stream_buffer: stream_buffer,
37
+ context_tags: context_tags, metadata: metadata, started_at: started_at
38
+ )
39
+ end
40
+
41
+ private
42
+
43
+ def invoke_app_with_capture(request_env:, parser:, request_url:, request_body:, streaming:,
44
+ stream_buffer:, context_tags:, metadata:, started_at:)
45
+ response_received = false
31
46
  @app.call(request_env).on_complete do |response_env|
47
+ response_received = true
32
48
  process(
33
- parser: parser,
34
- request_url: request_url,
35
- request_body: request_body,
36
- response_env: response_env,
37
- latency_ms: ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_at) * 1000).round,
38
- streaming: streaming,
39
- stream_buffer: stream_buffer,
40
- context_tags: context_tags,
41
- metadata: metadata
49
+ parser: parser, request_url: request_url, request_body: request_body,
50
+ response_env: response_env, latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at),
51
+ streaming: streaming, stream_buffer: stream_buffer,
52
+ context_tags: context_tags, metadata: metadata
53
+ )
54
+ end
55
+ rescue StandardError => e
56
+ if streaming && parser && !response_received
57
+ process_interrupted_stream(
58
+ parser: parser, request_url: request_url, request_body: request_body,
59
+ latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at),
60
+ context_tags: context_tags, metadata: metadata, error: e
42
61
  )
43
62
  end
63
+ raise
44
64
  end
45
65
 
46
- private
66
+ def inject_stream_usage_flag(request_env, parser, request_url)
67
+ body_string = read_body(request_env.body)
68
+ return body_string unless LlmCostTracker.configuration.auto_enable_stream_usage
69
+ return body_string unless parser&.auto_enable_stream_usage?(request_url)
70
+
71
+ body = JSON.parse(body_string)
72
+ return body_string if body["stream_options"].is_a?(Hash) && body["stream_options"].key?("include_usage")
73
+
74
+ body["stream_options"] = (body["stream_options"] || {}).merge("include_usage" => true)
75
+ new_body = body.to_json
76
+ request_env.body = new_body
77
+ new_body
78
+ end
79
+
80
+ def process_interrupted_stream(parser:, request_url:, request_body:, latency_ms:,
81
+ context_tags:, metadata:, error:)
82
+ request = parser.safe_json_parse(request_body)
83
+ capture = UsageCapture.build(
84
+ provider: parser.provider_for(request_url),
85
+ model: request["model"] || UsageCapture::UNKNOWN_MODEL,
86
+ token_usage: TokenUsage.build(input_tokens: 0, output_tokens: 0, total_tokens: 0),
87
+ stream: true,
88
+ usage_source: :unknown
89
+ )
90
+ merged_metadata = (metadata || {}).merge(
91
+ stream_interrupted: true,
92
+ stream_interrupted_error: "#{error.class}: #{error.message}"
93
+ )
94
+ Tracker.record(
95
+ capture: capture,
96
+ latency_ms: latency_ms,
97
+ metadata: merged_metadata,
98
+ context_tags: context_tags
99
+ )
100
+ rescue StandardError => e
101
+ Logging.warn("Error recording interrupted stream: #{e.class}: #{e.message}")
102
+ end
47
103
 
48
104
  def process(parser:, request_url:, request_body:, response_env:,
49
105
  latency_ms:, streaming:, stream_buffer:, context_tags:, metadata:)
@@ -101,21 +157,14 @@ module LlmCostTracker
101
157
  end
102
158
 
103
159
  def parse_stream(parser:, request_url:, request_body:, response_env:, stream_buffer:)
104
- if stream_buffer&.dig(:overflowed)
105
- Logging.warn(capture_warning(request_url, stream_buffer))
106
- return parser.parse_stream(
107
- request_url: request_url,
108
- request_body: request_body,
109
- response_status: response_env.status,
110
- response_headers: response_env.response_headers
111
- )
112
- end
160
+ overflowed = stream_buffer&.dig(:overflowed) == true
161
+ Logging.warn(capture_warning(request_url, stream_buffer)) if overflowed
113
162
 
114
163
  body = stream_buffer&.dig(:buffer)&.string
115
164
  body = read_body(response_env.body) if body.blank?
116
165
 
117
166
  if body.blank?
118
- Logging.warn(capture_warning(request_url, stream_buffer))
167
+ Logging.warn(capture_warning(request_url, stream_buffer)) unless overflowed
119
168
  return parser.parse_stream(
120
169
  request_url: request_url,
121
170
  request_body: request_body,
@@ -124,7 +173,7 @@ module LlmCostTracker
124
173
  )
125
174
  end
126
175
 
127
- events = Parsers::SSE.parse(body)
176
+ events = overflowed ? [] : Parsers::SSE.parse(body)
128
177
  parser.parse_stream(
129
178
  request_url: request_url,
130
179
  request_body: request_body,
@@ -134,8 +183,19 @@ module LlmCostTracker
134
183
  )
135
184
  end
136
185
 
186
+ def forward_on_data_chunk(callable, chunk, size, env)
187
+ arity = callable.arity
188
+ return callable.call(chunk, size, env) if arity.negative?
189
+
190
+ case arity
191
+ when 0, 1 then callable.call(chunk)
192
+ when 2 then callable.call(chunk, size)
193
+ else callable.call(chunk, size, env)
194
+ end
195
+ end
196
+
137
197
  def install_stream_tap(request_env)
138
- request = request_env.try(:request)
198
+ request = request_env.request
139
199
  return nil unless request
140
200
 
141
201
  original = request.on_data
@@ -144,16 +204,16 @@ module LlmCostTracker
144
204
  state = { buffer: StringIO.new, bytes: 0, overflowed: false }
145
205
  request.on_data = proc do |chunk, size, env|
146
206
  chunk = chunk.to_s
147
- unless state[:overflowed]
148
- if state[:bytes] + chunk.bytesize <= Capture::Stream::LIMIT_BYTES
149
- state[:buffer] << chunk
150
- state[:bytes] += chunk.bytesize
151
- else
152
- state[:overflowed] = true
153
- state[:buffer] = nil
154
- end
207
+ remaining = Capture::Stream::LIMIT_BYTES - state[:bytes]
208
+ if chunk.bytesize <= remaining
209
+ state[:buffer] << chunk
210
+ state[:bytes] += chunk.bytesize
211
+ else
212
+ state[:buffer] << chunk.byteslice(0, remaining) if remaining.positive?
213
+ state[:bytes] += [remaining, 0].max
214
+ state[:overflowed] = true
155
215
  end
156
- original.call(chunk, size, env)
216
+ forward_on_data_chunk(original, chunk, size, env)
157
217
  end
158
218
  state
159
219
  rescue StandardError => e
@@ -204,8 +264,8 @@ module LlmCostTracker
204
264
  uri = URI.parse(value.to_s)
205
265
  uri.query = nil
206
266
  uri.fragment = nil
207
- uri.try(:user=, nil)
208
- uri.try(:password=, nil)
267
+ uri.user = nil
268
+ uri.password = nil
209
269
  uri.to_s
210
270
  rescue URI::InvalidURIError
211
271
  value.to_s.split("?", 2).first
@@ -31,7 +31,8 @@ module LlmCostTracker
31
31
  pricing_mode: pricing_mode(request: request, response: response, usage: usage),
32
32
  model: response["model"] || request["model"],
33
33
  token_usage: token_usage(usage: usage, cache_read: cache_read),
34
- usage_source: :response
34
+ usage_source: :response,
35
+ service_line_items: service_line_items(usage)
35
36
  )
36
37
  end
37
38
 
@@ -60,20 +61,28 @@ module LlmCostTracker
60
61
  end
61
62
  end
62
63
 
64
+ def provider_for(_request_url)
65
+ "anthropic"
66
+ end
67
+
68
+ DATA_RESIDENCY_GEOS = %w[us].freeze
69
+ STANDARD_EQUIVALENT_SERVICE_TIERS = %w[standard standard_only priority].freeze
70
+ private_constant :DATA_RESIDENCY_GEOS, :STANDARD_EQUIVALENT_SERVICE_TIERS
71
+
63
72
  private
64
73
 
65
74
  def stream_usage(events)
66
- start_usage = find_event_value(events, reverse: true) do |data|
67
- data.dig("message", "usage") if data["type"] == "message_start"
68
- end
69
75
  latest_delta = find_event_value(events, reverse: true) do |data|
70
76
  data["usage"] if data["type"] == "message_delta" && data["usage"].is_a?(Hash)
71
77
  end
78
+ return nil unless latest_delta
72
79
 
73
- return nil unless start_usage || latest_delta
80
+ start_usage = find_event_value(events, reverse: true) do |data|
81
+ data.dig("message", "usage") if data["type"] == "message_start"
82
+ end
74
83
 
75
- (start_usage || {}).merge(latest_delta || {}) do |_key, start_val, delta_val|
76
- delta_val.nil? ? start_val : delta_val
84
+ (start_usage || {}).merge(latest_delta) do |_key, start_val, delta_val|
85
+ delta_val || start_val
77
86
  end
78
87
  end
79
88
 
@@ -87,7 +96,44 @@ module LlmCostTracker
87
96
  model: model,
88
97
  token_usage: token_usage(usage: usage, cache_read: cache_read),
89
98
  stream: true,
90
- usage_source: :stream_final
99
+ usage_source: :stream_final,
100
+ service_line_items: service_line_items(usage)
101
+ )
102
+ end
103
+
104
+ def service_line_items(usage)
105
+ server_tool_use = usage["server_tool_use"]
106
+ return [] unless server_tool_use.is_a?(Hash)
107
+
108
+ [
109
+ service_line_item(
110
+ component_key: :web_search_request,
111
+ quantity: server_tool_use["web_search_requests"],
112
+ provider_field: "usage.server_tool_use.web_search_requests"
113
+ ),
114
+ service_line_item(
115
+ component_key: :web_fetch_request,
116
+ quantity: server_tool_use["web_fetch_requests"],
117
+ provider_field: "usage.server_tool_use.web_fetch_requests"
118
+ ),
119
+ service_line_item(
120
+ component_key: :code_execution_request,
121
+ quantity: server_tool_use["code_execution_requests"],
122
+ provider_field: "usage.server_tool_use.code_execution_requests"
123
+ )
124
+ ].compact
125
+ end
126
+
127
+ def service_line_item(component_key:, quantity:, provider_field:)
128
+ quantity = quantity.to_i
129
+ return if quantity.zero?
130
+
131
+ Billing::LineItem.build(
132
+ component_key: component_key,
133
+ quantity: quantity,
134
+ cost_status: Billing::CostStatus::UNKNOWN,
135
+ pricing_basis: :provider_usage,
136
+ provider_field: provider_field
91
137
  )
92
138
  end
93
139
 
@@ -97,32 +143,46 @@ module LlmCostTracker
97
143
  cache_creation = usage["cache_creation"]
98
144
  if cache_creation.is_a?(Hash)
99
145
  cache_write = cache_creation["ephemeral_5m_input_tokens"].to_i
100
- cache_write_1h = cache_creation["ephemeral_1h_input_tokens"].to_i
146
+ cache_write_extended = cache_creation["ephemeral_1h_input_tokens"].to_i
101
147
  else
148
+ warn_unexpected_cache_creation(cache_creation, usage)
102
149
  cache_write = usage["cache_creation_input_tokens"].to_i
103
- cache_write_1h = 0
150
+ cache_write_extended = 0
104
151
  end
152
+ hidden_output = (
153
+ usage["thinking_tokens"] || usage["thinking_output_tokens"] ||
154
+ usage.dig("output_tokens_details", "reasoning_tokens")
155
+ ).to_i
105
156
 
106
157
  TokenUsage.build(
107
158
  input_tokens: input,
108
159
  output_tokens: output,
109
- total_tokens: input + output + cache_read + cache_write + cache_write_1h,
110
- cache_read_input_tokens: usage["cache_read_input_tokens"],
160
+ total_tokens: input + output + cache_read + cache_write + cache_write_extended,
161
+ cache_read_input_tokens: cache_read,
111
162
  cache_write_input_tokens: cache_write,
112
- cache_write_1h_input_tokens: cache_write_1h
163
+ cache_write_extended_input_tokens: cache_write_extended,
164
+ hidden_output_tokens: hidden_output
113
165
  )
114
166
  end
115
167
 
168
+ def warn_unexpected_cache_creation(cache_creation, usage)
169
+ return if cache_creation.nil? || usage.key?("cache_creation_input_tokens")
170
+
171
+ Logging.warn("Anthropic usage.cache_creation has unexpected shape: #{cache_creation.class}")
172
+ end
173
+
116
174
  def pricing_mode(request:, response:, usage:)
117
175
  modes = []
118
176
  speed = usage&.fetch("speed", nil) || response&.fetch("speed", nil) || request["speed"]
119
177
  service_tier = usage&.fetch("service_tier", nil) ||
120
178
  response&.fetch("service_tier", nil) ||
121
179
  request["service_tier"]
180
+ service_tier = nil if STANDARD_EQUIVALENT_SERVICE_TIERS.include?(service_tier.to_s)
122
181
 
123
182
  modes << Pricing.normalize_mode(speed)
124
183
  modes << Pricing.normalize_mode(service_tier)
125
- modes << "data_residency" if inference_geo(request: request, response: response, usage: usage) == "us"
184
+ geo = inference_geo(request: request, response: response, usage: usage).downcase
185
+ modes << "data_residency" if DATA_RESIDENCY_GEOS.include?(geo)
126
186
 
127
187
  modes = modes.compact.uniq
128
188
  modes.empty? ? nil : modes.join("_")
@@ -32,7 +32,9 @@ module LlmCostTracker
32
32
  nil
33
33
  end
34
34
 
35
- private
35
+ def auto_enable_stream_usage?(_request_url)
36
+ false
37
+ end
36
38
 
37
39
  def safe_json_parse(body)
38
40
  return {} if body.blank?
@@ -42,6 +44,8 @@ module LlmCostTracker
42
44
  {}
43
45
  end
44
46
 
47
+ private
48
+
45
49
  def uri_matches?(url)
46
50
  uri = parsed_uri(url)
47
51
  uri ? yield(uri) : false
@@ -59,7 +63,10 @@ module LlmCostTracker
59
63
  )
60
64
  extra_match = block_given? ? yield(uri) : true
61
65
 
62
- host_match && path_match && extra_match ? true : false
66
+ next false unless host_match && path_match
67
+ next false unless extra_match
68
+
69
+ true
63
70
  end
64
71
  end
65
72
 
@@ -100,7 +107,8 @@ module LlmCostTracker
100
107
  nil
101
108
  end
102
109
 
103
- def build_unknown_stream_usage(provider:, model:, provider_response_id:, pricing_mode: nil)
110
+ def build_unknown_stream_usage(provider:, model:, provider_response_id:, pricing_mode: nil,
111
+ service_line_items: nil)
104
112
  UsageCapture.build(
105
113
  provider: provider,
106
114
  provider_response_id: provider_response_id,
@@ -108,7 +116,8 @@ module LlmCostTracker
108
116
  model: model || UsageCapture::UNKNOWN_MODEL,
109
117
  token_usage: TokenUsage.build(input_tokens: 0, output_tokens: 0, total_tokens: 0),
110
118
  stream: true,
111
- usage_source: :unknown
119
+ usage_source: :unknown,
120
+ service_line_items: service_line_items
112
121
  )
113
122
  end
114
123
  end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "../billing/line_item"
3
4
  require_relative "base"
4
5
 
5
6
  module LlmCostTracker
@@ -8,6 +9,7 @@ module LlmCostTracker
8
9
  HOSTS = %w[generativelanguage.googleapis.com].freeze
9
10
  TRACKED_PATH_PATTERN = %r{/models/[^/:]+:(?:generateContent|streamGenerateContent)\z}
10
11
  STREAM_PATH_PATTERN = /:streamGenerateContent\z/
12
+ PER_QUERY_GROUNDING_MODEL_PATTERN = /\bgemini-(?:[3-9]|[1-9]\d)\b/i
11
13
 
12
14
  def match?(url)
13
15
  match_uri?(url, hosts: HOSTS, path_pattern: TRACKED_PATH_PATTERN)
@@ -31,12 +33,14 @@ module LlmCostTracker
31
33
  return nil unless usage
32
34
 
33
35
  request = safe_json_parse(request_body)
36
+ model = extract_model_from_url(request_url)
34
37
  build_usage_capture(
35
38
  request_url: request_url,
36
39
  usage: usage,
37
40
  usage_source: :response,
38
41
  provider_response_id: response["responseId"],
39
- pricing_mode: pricing_mode(request: request, response_headers: response_headers)
42
+ pricing_mode: pricing_mode(request: request, response_headers: response_headers),
43
+ service_line_items: grounding_line_items_for_response(response, model: model)
40
44
  )
41
45
  end
42
46
 
@@ -48,6 +52,7 @@ module LlmCostTracker
48
52
  model = extract_model_from_url(request_url)
49
53
  response_id = stream_response_id(events)
50
54
  mode = pricing_mode(request: request, response_headers: response_headers)
55
+ service_line_items = grounding_line_items_for_stream(events, model: model)
51
56
 
52
57
  if usage
53
58
  build_usage_capture(
@@ -56,39 +61,51 @@ module LlmCostTracker
56
61
  stream: true,
57
62
  usage_source: :stream_final,
58
63
  provider_response_id: response_id,
59
- pricing_mode: mode
64
+ pricing_mode: mode,
65
+ service_line_items: service_line_items
60
66
  )
61
67
  else
62
68
  build_unknown_stream_usage(
63
69
  provider: "gemini",
64
70
  model: model,
65
71
  provider_response_id: response_id,
66
- pricing_mode: mode
72
+ pricing_mode: mode,
73
+ service_line_items: service_line_items
67
74
  )
68
75
  end
69
76
  end
70
77
 
78
+ def provider_for(_request_url)
79
+ "gemini"
80
+ end
81
+
71
82
  private
72
83
 
73
84
  def build_usage_capture(request_url:, usage:, usage_source:, stream: false, provider_response_id: nil,
74
- pricing_mode: nil)
85
+ pricing_mode: nil, service_line_items: nil)
75
86
  cache_read = usage["cachedContentTokenCount"].to_i
76
87
  tool_use_prompt = usage["toolUsePromptTokenCount"].to_i
88
+ audio_input = audio_input_tokens(usage)
89
+ audio_output = audio_output_tokens(usage)
77
90
 
78
91
  UsageCapture.build(
79
92
  provider: "gemini",
80
93
  model: extract_model_from_url(request_url),
81
94
  pricing_mode: pricing_mode,
82
95
  token_usage: TokenUsage.build(
83
- input_tokens: [usage["promptTokenCount"].to_i - cache_read, 0].max + tool_use_prompt,
84
- output_tokens: output_tokens(usage),
85
- total_tokens: total_tokens(usage: usage, cache_read: cache_read, tool_use_prompt: tool_use_prompt),
86
- cache_read_input_tokens: usage["cachedContentTokenCount"],
96
+ input_tokens: regular_input_tokens(usage: usage, cache_read: cache_read, audio_input: audio_input) +
97
+ tool_use_prompt,
98
+ output_tokens: regular_output_tokens(usage: usage, audio_output: audio_output),
99
+ total_tokens: usage["totalTokenCount"],
100
+ cache_read_input_tokens: cache_read,
101
+ audio_input_tokens: audio_input,
102
+ audio_output_tokens: audio_output,
87
103
  hidden_output_tokens: usage["thoughtsTokenCount"]
88
104
  ),
89
105
  stream: stream,
90
106
  usage_source: usage_source,
91
- provider_response_id: provider_response_id
107
+ provider_response_id: provider_response_id,
108
+ service_line_items: service_line_items
92
109
  )
93
110
  end
94
111
 
@@ -100,14 +117,41 @@ module LlmCostTracker
100
117
  end
101
118
 
102
119
  def output_tokens(usage)
103
- usage["candidatesTokenCount"].to_i
120
+ (usage["candidatesTokenCount"] || usage["responseTokenCount"]).to_i + usage["thoughtsTokenCount"].to_i
121
+ end
122
+
123
+ def regular_input_tokens(usage:, cache_read:, audio_input:)
124
+ [usage["promptTokenCount"].to_i - cache_read - audio_input, 0].max
104
125
  end
105
126
 
106
- def total_tokens(usage:, cache_read:, tool_use_prompt:)
107
- total = usage["totalTokenCount"]
108
- return total.to_i unless total.nil?
127
+ def regular_output_tokens(usage:, audio_output:)
128
+ [output_tokens(usage) - audio_output, 0].max
129
+ end
109
130
 
110
- [usage["promptTokenCount"].to_i - cache_read, 0].max + cache_read + tool_use_prompt + output_tokens(usage)
131
+ def audio_input_tokens(usage)
132
+ prompt_audio = modality_tokens(usage["promptTokensDetails"] || usage["prompt_tokens_details"], "AUDIO")
133
+ cache_audio = modality_tokens(usage["cacheTokensDetails"] || usage["cache_tokens_details"], "AUDIO")
134
+ [prompt_audio - cache_audio, 0].max
135
+ end
136
+
137
+ def audio_output_tokens(usage)
138
+ modality_tokens(
139
+ usage["candidatesTokensDetails"] ||
140
+ usage["candidates_tokens_details"] ||
141
+ usage["responseTokensDetails"] ||
142
+ usage["response_tokens_details"],
143
+ "AUDIO"
144
+ )
145
+ end
146
+
147
+ def modality_tokens(details, modality)
148
+ Array(details).sum do |detail|
149
+ next 0 unless detail.is_a?(Hash)
150
+
151
+ next 0 unless detail["modality"] == modality
152
+
153
+ (detail["tokenCount"] || detail["token_count"]).to_i
154
+ end
111
155
  end
112
156
 
113
157
  def stream_response_id(events)
@@ -133,12 +177,58 @@ module LlmCostTracker
133
177
  request.dig("config", "service_tier") ||
134
178
  request.dig("config", "serviceTier")
135
179
  )
136
- request_mode == "flex" ? request_mode : nil
180
+ request_mode == :flex ? request_mode : nil
137
181
  end
138
182
 
139
183
  def response_header(headers, name)
140
184
  headers.to_h.find { |key, _value| key.to_s.downcase == name }&.last
141
185
  end
186
+
187
+ def grounding_line_items_for_response(response, model:)
188
+ grounding_line_items(grounding_request_count(response["candidates"]), model: model)
189
+ end
190
+
191
+ def grounding_line_items_for_stream(events, model:)
192
+ quantity = find_event_value(events, reverse: true) do |data|
193
+ count = grounding_request_count(data["candidates"])
194
+ count if count.positive?
195
+ end
196
+ grounding_line_items(quantity || 0, model: model)
197
+ end
198
+
199
+ def grounding_request_count(candidates)
200
+ Array(candidates).sum do |candidate|
201
+ next 0 unless candidate.is_a?(Hash)
202
+
203
+ metadata = candidate["groundingMetadata"] || candidate["grounding_metadata"] || {}
204
+ queries = metadata["webSearchQueries"] || metadata["web_search_queries"] || []
205
+ Array(queries).size
206
+ end
207
+ end
208
+
209
+ def grounding_line_items(query_count, model:)
210
+ return [] unless query_count.positive?
211
+
212
+ billed_quantity = grounding_billed_quantity(query_count, model: model)
213
+ [
214
+ Billing::LineItem.build(
215
+ component_key: :grounding_request,
216
+ quantity: billed_quantity,
217
+ cost_status: Billing::CostStatus::UNKNOWN,
218
+ pricing_basis: :provider_usage,
219
+ provider_field: "response.candidates.groundingMetadata.webSearchQueries",
220
+ details: { web_search_queries: query_count }
221
+ )
222
+ ]
223
+ end
224
+
225
+ def grounding_billed_quantity(query_count, model:)
226
+ per_query_billing?(model) ? query_count : 1
227
+ end
228
+
229
+ def per_query_billing?(model)
230
+ model.to_s.match?(PER_QUERY_GROUNDING_MODEL_PATTERN)
231
+ end
142
232
  end
143
233
  end
144
234
  end
@@ -21,7 +21,19 @@ module LlmCostTracker
21
21
  gb.api.openai.com
22
22
  ae.api.openai.com
23
23
  ].freeze
24
- TRACKED_PATHS = %w[/v1/chat/completions /v1/completions /v1/embeddings /v1/responses].freeze
24
+ TRACKED_PATHS = %w[
25
+ /v1/chat/completions
26
+ /v1/completions
27
+ /v1/embeddings
28
+ /v1/responses
29
+ /v1/images/generations
30
+ /v1/images/edits
31
+ /v1/images/variations
32
+ /v1/audio/transcriptions
33
+ /v1/audio/translations
34
+ /v1/audio/speech
35
+ /v1/moderations
36
+ ].freeze
25
37
 
26
38
  def match?(url)
27
39
  match_uri?(url, hosts: HOSTS, exact_paths: TRACKED_PATHS)
@@ -49,7 +61,9 @@ module LlmCostTracker
49
61
  )
50
62
  end
51
63
 
52
- private
64
+ def auto_enable_stream_usage?(request_url)
65
+ openai_chat_completions_url?(request_url)
66
+ end
53
67
 
54
68
  def provider_for(_request_url)
55
69
  "openai"