llm_cost_tracker 0.7.2 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. checksums.yaml +4 -4
  2. data/.ruby-version +1 -0
  3. data/CHANGELOG.md +72 -1
  4. data/README.md +58 -221
  5. data/app/assets/llm_cost_tracker/application.css +218 -41
  6. data/app/controllers/llm_cost_tracker/application_controller.rb +30 -17
  7. data/app/controllers/llm_cost_tracker/assets_controller.rb +11 -1
  8. data/app/controllers/llm_cost_tracker/calls_controller.rb +19 -14
  9. data/app/controllers/llm_cost_tracker/data_quality_controller.rb +10 -2
  10. data/app/helpers/llm_cost_tracker/application_helper.rb +11 -24
  11. data/app/helpers/llm_cost_tracker/dashboard_filter_helper.rb +3 -21
  12. data/app/helpers/llm_cost_tracker/dashboard_filter_options_helper.rb +4 -4
  13. data/app/helpers/llm_cost_tracker/dashboard_query_helper.rb +1 -1
  14. data/app/helpers/llm_cost_tracker/token_usage_helper.rb +20 -7
  15. data/app/models/llm_cost_tracker/call.rb +169 -0
  16. data/app/models/llm_cost_tracker/call_line_item.rb +22 -0
  17. data/app/models/llm_cost_tracker/call_rollup.rb +9 -0
  18. data/app/models/llm_cost_tracker/call_tag.rb +16 -0
  19. data/app/models/llm_cost_tracker/ingestion/inbox_entry.rb +13 -0
  20. data/app/models/llm_cost_tracker/ingestion/lease.rb +1 -1
  21. data/app/models/llm_cost_tracker/provider_invoice.rb +9 -0
  22. data/app/services/llm_cost_tracker/dashboard/data_quality.rb +125 -34
  23. data/app/services/llm_cost_tracker/dashboard/date_range.rb +1 -1
  24. data/app/services/llm_cost_tracker/dashboard/filter.rb +2 -2
  25. data/app/services/llm_cost_tracker/dashboard/overview_stats.rb +74 -21
  26. data/app/services/llm_cost_tracker/dashboard/pagination.rb +6 -4
  27. data/app/services/llm_cost_tracker/dashboard/params.rb +8 -2
  28. data/app/services/llm_cost_tracker/dashboard/provider_breakdown.rb +1 -1
  29. data/app/services/llm_cost_tracker/dashboard/spend_anomaly.rb +4 -3
  30. data/app/services/llm_cost_tracker/dashboard/tag_breakdown.rb +42 -9
  31. data/app/services/llm_cost_tracker/dashboard/tag_key_explorer.rb +14 -37
  32. data/app/services/llm_cost_tracker/dashboard/time_series.rb +1 -1
  33. data/app/services/llm_cost_tracker/dashboard/top_models.rb +1 -1
  34. data/app/views/llm_cost_tracker/calls/index.html.erb +33 -75
  35. data/app/views/llm_cost_tracker/calls/show.html.erb +62 -7
  36. data/app/views/llm_cost_tracker/dashboard/index.html.erb +9 -50
  37. data/app/views/llm_cost_tracker/data_quality/index.html.erb +103 -126
  38. data/app/views/llm_cost_tracker/errors/database.html.erb +1 -1
  39. data/app/views/llm_cost_tracker/models/index.html.erb +18 -50
  40. data/app/views/llm_cost_tracker/shared/_filters.html.erb +63 -0
  41. data/app/views/llm_cost_tracker/shared/_sort.html.erb +13 -0
  42. data/app/views/llm_cost_tracker/shared/setup_required.html.erb +1 -1
  43. data/app/views/llm_cost_tracker/tags/index.html.erb +3 -34
  44. data/app/views/llm_cost_tracker/tags/show.html.erb +5 -37
  45. data/lib/llm_cost_tracker/billing/components.rb +53 -0
  46. data/lib/llm_cost_tracker/billing/components.yml +117 -0
  47. data/lib/llm_cost_tracker/billing/cost_status.rb +45 -0
  48. data/lib/llm_cost_tracker/billing/line_item.rb +189 -0
  49. data/lib/llm_cost_tracker/budget.rb +23 -35
  50. data/lib/llm_cost_tracker/capture/stream_collector.rb +47 -33
  51. data/lib/llm_cost_tracker/configuration.rb +36 -19
  52. data/lib/llm_cost_tracker/doctor/cost_drift_check.rb +54 -0
  53. data/lib/llm_cost_tracker/doctor/ingestion_check.rb +24 -32
  54. data/lib/llm_cost_tracker/doctor/legacy_audit_check.rb +36 -0
  55. data/lib/llm_cost_tracker/doctor/legacy_billing_status_check.rb +22 -0
  56. data/lib/llm_cost_tracker/doctor/price_check.rb +2 -2
  57. data/lib/llm_cost_tracker/doctor/pricing_snapshot_drift_check.rb +85 -0
  58. data/lib/llm_cost_tracker/doctor/probe.rb +17 -0
  59. data/lib/llm_cost_tracker/doctor/schema_check.rb +31 -0
  60. data/lib/llm_cost_tracker/doctor.rb +43 -45
  61. data/lib/llm_cost_tracker/errors.rb +5 -19
  62. data/lib/llm_cost_tracker/event.rb +10 -2
  63. data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +4 -2
  64. data/lib/llm_cost_tracker/generators/llm_cost_tracker/prices_generator.rb +2 -6
  65. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_calls.rb.erb +157 -0
  66. data/lib/llm_cost_tracker/ingestion/batch.rb +11 -12
  67. data/lib/llm_cost_tracker/ingestion/inbox.rb +39 -23
  68. data/lib/llm_cost_tracker/ingestion/worker.rb +14 -5
  69. data/lib/llm_cost_tracker/ingestion.rb +28 -22
  70. data/lib/llm_cost_tracker/integrations/anthropic.rb +45 -38
  71. data/lib/llm_cost_tracker/integrations/base.rb +36 -29
  72. data/lib/llm_cost_tracker/integrations/openai.rb +85 -40
  73. data/lib/llm_cost_tracker/integrations/ruby_llm.rb +5 -5
  74. data/lib/llm_cost_tracker/integrations.rb +2 -2
  75. data/lib/llm_cost_tracker/ledger/period/totals.rb +12 -9
  76. data/lib/llm_cost_tracker/ledger/period.rb +5 -5
  77. data/lib/llm_cost_tracker/ledger/rollups/upsert_sql.rb +4 -10
  78. data/lib/llm_cost_tracker/ledger/rollups.rb +76 -25
  79. data/lib/llm_cost_tracker/ledger/schema/adapter.rb +18 -0
  80. data/lib/llm_cost_tracker/ledger/schema/call_line_items.rb +50 -0
  81. data/lib/llm_cost_tracker/ledger/schema/call_rollups.rb +37 -0
  82. data/lib/llm_cost_tracker/ledger/schema/call_tags.rb +26 -0
  83. data/lib/llm_cost_tracker/ledger/schema/calls.rb +34 -23
  84. data/lib/llm_cost_tracker/ledger/schema/provider_invoices.rb +57 -0
  85. data/lib/llm_cost_tracker/ledger/store.rb +110 -18
  86. data/lib/llm_cost_tracker/ledger/tags/query.rb +5 -11
  87. data/lib/llm_cost_tracker/ledger/tags/sql.rb +27 -14
  88. data/lib/llm_cost_tracker/ledger.rb +4 -2
  89. data/lib/llm_cost_tracker/logging.rb +2 -5
  90. data/lib/llm_cost_tracker/middleware/faraday.rb +7 -6
  91. data/lib/llm_cost_tracker/parsers/anthropic.rb +52 -7
  92. data/lib/llm_cost_tracker/parsers/base.rb +8 -3
  93. data/lib/llm_cost_tracker/parsers/gemini.rb +101 -15
  94. data/lib/llm_cost_tracker/parsers/openai_compatible.rb +10 -2
  95. data/lib/llm_cost_tracker/parsers/openai_service_charges.rb +87 -0
  96. data/lib/llm_cost_tracker/parsers/openai_usage.rb +48 -21
  97. data/lib/llm_cost_tracker/parsers/sse.rb +1 -1
  98. data/lib/llm_cost_tracker/parsers.rb +1 -1
  99. data/lib/llm_cost_tracker/prices.json +105 -20
  100. data/lib/llm_cost_tracker/pricing/effective_prices.rb +57 -19
  101. data/lib/llm_cost_tracker/pricing/explainer.rb +4 -5
  102. data/lib/llm_cost_tracker/pricing/lookup.rb +38 -34
  103. data/lib/llm_cost_tracker/pricing/registry.rb +65 -45
  104. data/lib/llm_cost_tracker/pricing/service_charges.rb +204 -0
  105. data/lib/llm_cost_tracker/pricing/sync/fetcher.rb +26 -17
  106. data/lib/llm_cost_tracker/pricing/sync/registry_diff.rb +6 -15
  107. data/lib/llm_cost_tracker/pricing/sync.rb +57 -10
  108. data/lib/llm_cost_tracker/pricing/sync_change_printer.rb +32 -0
  109. data/lib/llm_cost_tracker/pricing.rb +190 -26
  110. data/lib/llm_cost_tracker/railtie.rb +0 -8
  111. data/lib/llm_cost_tracker/report/data.rb +16 -8
  112. data/lib/llm_cost_tracker/report.rb +0 -4
  113. data/lib/llm_cost_tracker/retention.rb +8 -8
  114. data/lib/llm_cost_tracker/tags/context.rb +2 -4
  115. data/lib/llm_cost_tracker/tags/key.rb +4 -0
  116. data/lib/llm_cost_tracker/tags/sanitizer.rb +12 -17
  117. data/lib/llm_cost_tracker/timing.rb +15 -0
  118. data/lib/llm_cost_tracker/token_usage.rb +56 -42
  119. data/lib/llm_cost_tracker/tracker.rb +67 -24
  120. data/lib/llm_cost_tracker/usage_capture.rb +29 -8
  121. data/lib/llm_cost_tracker/version.rb +1 -1
  122. data/lib/llm_cost_tracker.rb +36 -35
  123. data/lib/tasks/llm_cost_tracker.rake +22 -17
  124. metadata +36 -41
  125. data/app/models/llm_cost_tracker/ingestion/event.rb +0 -13
  126. data/app/models/llm_cost_tracker/ledger/call.rb +0 -45
  127. data/app/models/llm_cost_tracker/ledger/call_metrics.rb +0 -66
  128. data/app/models/llm_cost_tracker/ledger/period/grouping.rb +0 -71
  129. data/app/models/llm_cost_tracker/ledger/period/total.rb +0 -13
  130. data/app/models/llm_cost_tracker/ledger/tags/accessors.rb +0 -19
  131. data/lib/llm_cost_tracker/configuration/instrumentation.rb +0 -33
  132. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_ingestion_generator.rb +0 -29
  133. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_latency_ms_generator.rb +0 -29
  134. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_period_totals_generator.rb +0 -29
  135. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_provider_response_id_generator.rb +0 -29
  136. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_streaming_generator.rb +0 -29
  137. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_token_usage_generator.rb +0 -42
  138. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_ingestion_to_llm_cost_tracker.rb.erb +0 -33
  139. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_latency_ms_to_llm_api_calls.rb.erb +0 -9
  140. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_period_totals_to_llm_cost_tracker.rb.erb +0 -104
  141. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_provider_response_id_to_llm_api_calls.rb.erb +0 -15
  142. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_streaming_to_llm_api_calls.rb.erb +0 -21
  143. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_token_usage_to_llm_api_calls.rb.erb +0 -22
  144. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +0 -83
  145. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_cost_precision.rb.erb +0 -26
  146. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +0 -44
  147. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_cost_precision_generator.rb +0 -29
  148. data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_tags_to_jsonb_generator.rb +0 -29
  149. data/lib/llm_cost_tracker/ledger/rollups/batch.rb +0 -43
  150. data/lib/llm_cost_tracker/ledger/schema/period_totals.rb +0 -32
  151. data/lib/llm_cost_tracker/pricing/components.rb +0 -37
  152. data/lib/llm_cost_tracker/pricing/sync/registry_loader.rb +0 -63
@@ -1,30 +1,43 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../schema/adapter"
4
3
  require_relative "../../tags/key"
5
4
 
6
5
  module LlmCostTracker
7
6
  module Ledger
8
7
  module Tags
9
8
  module Sql
9
+ UNTAGGED_LABEL = "(untagged)"
10
+
10
11
  class << self
11
- def value_expression(model, key, table_name:)
12
- key = LlmCostTracker::Tags::Key.validate!(key)
13
- column = "#{table_name}.#{model.connection.quote_column_name('tags')}"
14
-
15
- if Ledger::Schema::Adapter.postgresql?(model.connection)
16
- "#{column}->>#{model.connection.quote(key)}"
17
- elsif Ledger::Schema::Adapter.mysql?(model.connection)
18
- "JSON_UNQUOTE(JSON_EXTRACT(#{column}, #{model.connection.quote(json_path(key))}))"
19
- else
20
- Ledger::Schema::Adapter.ensure_supported!(model.connection)
21
- end
12
+ def join_relation(scope, key)
13
+ validated_key = LlmCostTracker::Tags::Key.validate!(key)
14
+ connection = scope.connection
15
+ join = "LEFT OUTER JOIN #{call_tag_table} ON " \
16
+ "#{call_tag_table}.llm_cost_tracker_call_id = #{scope.quoted_table_name}.id AND " \
17
+ "#{call_tag_table}.#{connection.quote_column_name('key')} = #{connection.quote(validated_key)}"
18
+ scope.joins(join)
19
+ end
20
+
21
+ def value_arel
22
+ Arel.sql("#{call_tag_table}.#{quote_column('value')}")
23
+ end
24
+
25
+ def label_sql(connection)
26
+ "COALESCE(NULLIF(#{raw_value_sql(connection)}, ''), #{connection.quote(UNTAGGED_LABEL)})"
27
+ end
28
+
29
+ def raw_value_sql(connection)
30
+ "#{call_tag_table}.#{connection.quote_column_name('value')}"
22
31
  end
23
32
 
24
33
  private
25
34
 
26
- def json_path(key)
27
- "$.\"#{key}\""
35
+ def call_tag_table
36
+ LlmCostTracker::CallTag.quoted_table_name
37
+ end
38
+
39
+ def quote_column(name)
40
+ LlmCostTracker::CallTag.connection.quote_column_name(name)
28
41
  end
29
42
  end
30
43
  end
@@ -2,11 +2,13 @@
2
2
 
3
3
  require_relative "ledger/schema/adapter"
4
4
  require_relative "ledger/schema/calls"
5
- require_relative "ledger/schema/period_totals"
5
+ require_relative "ledger/schema/call_rollups"
6
+ require_relative "ledger/schema/call_line_items"
7
+ require_relative "ledger/schema/call_tags"
8
+ require_relative "ledger/schema/provider_invoices"
6
9
  require_relative "ledger/tags/query"
7
10
  require_relative "ledger/tags/sql"
8
11
  require_relative "ledger/period"
9
- require_relative "ledger/rollups/batch"
10
12
  require_relative "ledger/rollups/upsert_sql"
11
13
  require_relative "ledger/rollups"
12
14
  require_relative "ledger/store"
@@ -20,12 +20,9 @@ module LlmCostTracker
20
20
  def log(level, message)
21
21
  message = prefixed(message)
22
22
  logger = Rails.logger
23
+ return Kernel.warn(message) unless logger
23
24
 
24
- if logger
25
- logger.try(level, message)
26
- else
27
- Kernel.warn(message)
28
- end
25
+ logger.public_send(level, message)
29
26
  end
30
27
 
31
28
  private
@@ -6,6 +6,7 @@ require "uri"
6
6
 
7
7
  require_relative "../logging"
8
8
  require_relative "../capture/stream"
9
+ require_relative "../timing"
9
10
 
10
11
  module LlmCostTracker
11
12
  module Middleware
@@ -19,14 +20,14 @@ module LlmCostTracker
19
20
  return @app.call(request_env) unless LlmCostTracker.configuration.enabled
20
21
 
21
22
  request_url = request_env.url.to_s
22
- request_body = read_body(request_env.body) || ""
23
+ request_body = read_body(request_env.body)
23
24
  parser = Parsers.find_for(request_url)
24
25
  streaming = parser&.streaming_request?(request_url, request_body)
25
26
  stream_buffer = install_stream_tap(request_env) if streaming
26
27
 
27
28
  Tracker.enforce_budget! if parser
28
29
  context_tags, metadata = tag_snapshot(request_env) if parser
29
- started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
30
+ started_at = LlmCostTracker::Timing.now_monotonic
30
31
 
31
32
  @app.call(request_env).on_complete do |response_env|
32
33
  process(
@@ -34,7 +35,7 @@ module LlmCostTracker
34
35
  request_url: request_url,
35
36
  request_body: request_body,
36
37
  response_env: response_env,
37
- latency_ms: ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_at) * 1000).round,
38
+ latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at),
38
39
  streaming: streaming,
39
40
  stream_buffer: stream_buffer,
40
41
  context_tags: context_tags,
@@ -135,7 +136,7 @@ module LlmCostTracker
135
136
  end
136
137
 
137
138
  def install_stream_tap(request_env)
138
- request = request_env.try(:request)
139
+ request = request_env.request
139
140
  return nil unless request
140
141
 
141
142
  original = request.on_data
@@ -204,8 +205,8 @@ module LlmCostTracker
204
205
  uri = URI.parse(value.to_s)
205
206
  uri.query = nil
206
207
  uri.fragment = nil
207
- uri.try(:user=, nil)
208
- uri.try(:password=, nil)
208
+ uri.user = nil
209
+ uri.password = nil
209
210
  uri.to_s
210
211
  rescue URI::InvalidURIError
211
212
  value.to_s.split("?", 2).first
@@ -31,7 +31,8 @@ module LlmCostTracker
31
31
  pricing_mode: pricing_mode(request: request, response: response, usage: usage),
32
32
  model: response["model"] || request["model"],
33
33
  token_usage: token_usage(usage: usage, cache_read: cache_read),
34
- usage_source: :response
34
+ usage_source: :response,
35
+ service_line_items: service_line_items(usage)
35
36
  )
36
37
  end
37
38
 
@@ -87,7 +88,39 @@ module LlmCostTracker
87
88
  model: model,
88
89
  token_usage: token_usage(usage: usage, cache_read: cache_read),
89
90
  stream: true,
90
- usage_source: :stream_final
91
+ usage_source: :stream_final,
92
+ service_line_items: service_line_items(usage)
93
+ )
94
+ end
95
+
96
+ def service_line_items(usage)
97
+ server_tool_use = usage["server_tool_use"]
98
+ return [] unless server_tool_use.is_a?(Hash)
99
+
100
+ [
101
+ service_line_item(
102
+ component_key: :web_search_request,
103
+ quantity: server_tool_use["web_search_requests"],
104
+ provider_field: "usage.server_tool_use.web_search_requests"
105
+ ),
106
+ service_line_item(
107
+ component_key: :code_execution_request,
108
+ quantity: server_tool_use["code_execution_requests"],
109
+ provider_field: "usage.server_tool_use.code_execution_requests"
110
+ )
111
+ ].compact
112
+ end
113
+
114
+ def service_line_item(component_key:, quantity:, provider_field:)
115
+ quantity = quantity.to_i
116
+ return if quantity.zero?
117
+
118
+ Billing::LineItem.build(
119
+ component_key: component_key,
120
+ quantity: quantity,
121
+ cost_status: Billing::CostStatus::UNKNOWN,
122
+ pricing_basis: :provider_usage,
123
+ provider_field: provider_field
91
124
  )
92
125
  end
93
126
 
@@ -97,22 +130,34 @@ module LlmCostTracker
97
130
  cache_creation = usage["cache_creation"]
98
131
  if cache_creation.is_a?(Hash)
99
132
  cache_write = cache_creation["ephemeral_5m_input_tokens"].to_i
100
- cache_write_1h = cache_creation["ephemeral_1h_input_tokens"].to_i
133
+ cache_write_extended = cache_creation["ephemeral_1h_input_tokens"].to_i
101
134
  else
135
+ warn_unexpected_cache_creation(cache_creation, usage)
102
136
  cache_write = usage["cache_creation_input_tokens"].to_i
103
- cache_write_1h = 0
137
+ cache_write_extended = 0
104
138
  end
139
+ hidden_output = (
140
+ usage["thinking_tokens"] || usage["thinking_output_tokens"] ||
141
+ usage.dig("output_tokens_details", "reasoning_tokens")
142
+ ).to_i
105
143
 
106
144
  TokenUsage.build(
107
145
  input_tokens: input,
108
146
  output_tokens: output,
109
- total_tokens: input + output + cache_read + cache_write + cache_write_1h,
110
- cache_read_input_tokens: usage["cache_read_input_tokens"],
147
+ total_tokens: input + output + cache_read + cache_write + cache_write_extended,
148
+ cache_read_input_tokens: cache_read,
111
149
  cache_write_input_tokens: cache_write,
112
- cache_write_1h_input_tokens: cache_write_1h
150
+ cache_write_extended_input_tokens: cache_write_extended,
151
+ hidden_output_tokens: hidden_output
113
152
  )
114
153
  end
115
154
 
155
+ def warn_unexpected_cache_creation(cache_creation, usage)
156
+ return if cache_creation.nil? || usage.key?("cache_creation_input_tokens")
157
+
158
+ Logging.warn("Anthropic usage.cache_creation has unexpected shape: #{cache_creation.class}")
159
+ end
160
+
116
161
  def pricing_mode(request:, response:, usage:)
117
162
  modes = []
118
163
  speed = usage&.fetch("speed", nil) || response&.fetch("speed", nil) || request["speed"]
@@ -59,7 +59,10 @@ module LlmCostTracker
59
59
  )
60
60
  extra_match = block_given? ? yield(uri) : true
61
61
 
62
- host_match && path_match && extra_match ? true : false
62
+ next false unless host_match && path_match
63
+ next false unless extra_match
64
+
65
+ true
63
66
  end
64
67
  end
65
68
 
@@ -100,7 +103,8 @@ module LlmCostTracker
100
103
  nil
101
104
  end
102
105
 
103
- def build_unknown_stream_usage(provider:, model:, provider_response_id:, pricing_mode: nil)
106
+ def build_unknown_stream_usage(provider:, model:, provider_response_id:, pricing_mode: nil,
107
+ service_line_items: nil)
104
108
  UsageCapture.build(
105
109
  provider: provider,
106
110
  provider_response_id: provider_response_id,
@@ -108,7 +112,8 @@ module LlmCostTracker
108
112
  model: model || UsageCapture::UNKNOWN_MODEL,
109
113
  token_usage: TokenUsage.build(input_tokens: 0, output_tokens: 0, total_tokens: 0),
110
114
  stream: true,
111
- usage_source: :unknown
115
+ usage_source: :unknown,
116
+ service_line_items: service_line_items
112
117
  )
113
118
  end
114
119
  end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "../billing/line_item"
3
4
  require_relative "base"
4
5
 
5
6
  module LlmCostTracker
@@ -8,6 +9,7 @@ module LlmCostTracker
8
9
  HOSTS = %w[generativelanguage.googleapis.com].freeze
9
10
  TRACKED_PATH_PATTERN = %r{/models/[^/:]+:(?:generateContent|streamGenerateContent)\z}
10
11
  STREAM_PATH_PATTERN = /:streamGenerateContent\z/
12
+ PER_QUERY_GROUNDING_MODEL_PATTERN = /\bgemini-(?:[3-9]|[1-9]\d)\b/i
11
13
 
12
14
  def match?(url)
13
15
  match_uri?(url, hosts: HOSTS, path_pattern: TRACKED_PATH_PATTERN)
@@ -31,12 +33,14 @@ module LlmCostTracker
31
33
  return nil unless usage
32
34
 
33
35
  request = safe_json_parse(request_body)
36
+ model = extract_model_from_url(request_url)
34
37
  build_usage_capture(
35
38
  request_url: request_url,
36
39
  usage: usage,
37
40
  usage_source: :response,
38
41
  provider_response_id: response["responseId"],
39
- pricing_mode: pricing_mode(request: request, response_headers: response_headers)
42
+ pricing_mode: pricing_mode(request: request, response_headers: response_headers),
43
+ service_line_items: grounding_line_items_for_response(response, model: model)
40
44
  )
41
45
  end
42
46
 
@@ -48,6 +52,7 @@ module LlmCostTracker
48
52
  model = extract_model_from_url(request_url)
49
53
  response_id = stream_response_id(events)
50
54
  mode = pricing_mode(request: request, response_headers: response_headers)
55
+ service_line_items = grounding_line_items_for_stream(events, model: model)
51
56
 
52
57
  if usage
53
58
  build_usage_capture(
@@ -56,14 +61,16 @@ module LlmCostTracker
56
61
  stream: true,
57
62
  usage_source: :stream_final,
58
63
  provider_response_id: response_id,
59
- pricing_mode: mode
64
+ pricing_mode: mode,
65
+ service_line_items: service_line_items
60
66
  )
61
67
  else
62
68
  build_unknown_stream_usage(
63
69
  provider: "gemini",
64
70
  model: model,
65
71
  provider_response_id: response_id,
66
- pricing_mode: mode
72
+ pricing_mode: mode,
73
+ service_line_items: service_line_items
67
74
  )
68
75
  end
69
76
  end
@@ -71,24 +78,30 @@ module LlmCostTracker
71
78
  private
72
79
 
73
80
  def build_usage_capture(request_url:, usage:, usage_source:, stream: false, provider_response_id: nil,
74
- pricing_mode: nil)
81
+ pricing_mode: nil, service_line_items: nil)
75
82
  cache_read = usage["cachedContentTokenCount"].to_i
76
83
  tool_use_prompt = usage["toolUsePromptTokenCount"].to_i
84
+ audio_input = audio_input_tokens(usage)
85
+ audio_output = audio_output_tokens(usage)
77
86
 
78
87
  UsageCapture.build(
79
88
  provider: "gemini",
80
89
  model: extract_model_from_url(request_url),
81
90
  pricing_mode: pricing_mode,
82
91
  token_usage: TokenUsage.build(
83
- input_tokens: [usage["promptTokenCount"].to_i - cache_read, 0].max + tool_use_prompt,
84
- output_tokens: output_tokens(usage),
85
- total_tokens: total_tokens(usage: usage, cache_read: cache_read, tool_use_prompt: tool_use_prompt),
86
- cache_read_input_tokens: usage["cachedContentTokenCount"],
92
+ input_tokens: regular_input_tokens(usage: usage, cache_read: cache_read, audio_input: audio_input) +
93
+ tool_use_prompt,
94
+ output_tokens: regular_output_tokens(usage: usage, audio_output: audio_output),
95
+ total_tokens: usage["totalTokenCount"],
96
+ cache_read_input_tokens: cache_read,
97
+ audio_input_tokens: audio_input,
98
+ audio_output_tokens: audio_output,
87
99
  hidden_output_tokens: usage["thoughtsTokenCount"]
88
100
  ),
89
101
  stream: stream,
90
102
  usage_source: usage_source,
91
- provider_response_id: provider_response_id
103
+ provider_response_id: provider_response_id,
104
+ service_line_items: service_line_items
92
105
  )
93
106
  end
94
107
 
@@ -100,14 +113,41 @@ module LlmCostTracker
100
113
  end
101
114
 
102
115
  def output_tokens(usage)
103
- usage["candidatesTokenCount"].to_i + usage["thoughtsTokenCount"].to_i
116
+ (usage["candidatesTokenCount"] || usage["responseTokenCount"]).to_i + usage["thoughtsTokenCount"].to_i
117
+ end
118
+
119
+ def regular_input_tokens(usage:, cache_read:, audio_input:)
120
+ [usage["promptTokenCount"].to_i - cache_read - audio_input, 0].max
121
+ end
122
+
123
+ def regular_output_tokens(usage:, audio_output:)
124
+ [output_tokens(usage) - audio_output, 0].max
125
+ end
126
+
127
+ def audio_input_tokens(usage)
128
+ prompt_audio = modality_tokens(usage["promptTokensDetails"] || usage["prompt_tokens_details"], "AUDIO")
129
+ cache_audio = modality_tokens(usage["cacheTokensDetails"] || usage["cache_tokens_details"], "AUDIO")
130
+ [prompt_audio - cache_audio, 0].max
104
131
  end
105
132
 
106
- def total_tokens(usage:, cache_read:, tool_use_prompt:)
107
- total = usage["totalTokenCount"]
108
- return total.to_i unless total.nil?
133
+ def audio_output_tokens(usage)
134
+ modality_tokens(
135
+ usage["candidatesTokensDetails"] ||
136
+ usage["candidates_tokens_details"] ||
137
+ usage["responseTokensDetails"] ||
138
+ usage["response_tokens_details"],
139
+ "AUDIO"
140
+ )
141
+ end
142
+
143
+ def modality_tokens(details, modality)
144
+ Array(details).sum do |detail|
145
+ next 0 unless detail.is_a?(Hash)
146
+
147
+ next 0 unless detail["modality"] == modality
109
148
 
110
- [usage["promptTokenCount"].to_i - cache_read, 0].max + cache_read + tool_use_prompt + output_tokens(usage)
149
+ (detail["tokenCount"] || detail["token_count"]).to_i
150
+ end
111
151
  end
112
152
 
113
153
  def stream_response_id(events)
@@ -133,12 +173,58 @@ module LlmCostTracker
133
173
  request.dig("config", "service_tier") ||
134
174
  request.dig("config", "serviceTier")
135
175
  )
136
- request_mode == "flex" ? request_mode : nil
176
+ request_mode == :flex ? request_mode : nil
137
177
  end
138
178
 
139
179
  def response_header(headers, name)
140
180
  headers.to_h.find { |key, _value| key.to_s.downcase == name }&.last
141
181
  end
182
+
183
+ def grounding_line_items_for_response(response, model:)
184
+ grounding_line_items(grounding_request_count(response["candidates"]), model: model)
185
+ end
186
+
187
+ def grounding_line_items_for_stream(events, model:)
188
+ quantity = find_event_value(events, reverse: true) do |data|
189
+ count = grounding_request_count(data["candidates"])
190
+ count if count.positive?
191
+ end
192
+ grounding_line_items(quantity || 0, model: model)
193
+ end
194
+
195
+ def grounding_request_count(candidates)
196
+ Array(candidates).sum do |candidate|
197
+ next 0 unless candidate.is_a?(Hash)
198
+
199
+ metadata = candidate["groundingMetadata"] || candidate["grounding_metadata"] || {}
200
+ queries = metadata["webSearchQueries"] || metadata["web_search_queries"] || []
201
+ Array(queries).size
202
+ end
203
+ end
204
+
205
+ def grounding_line_items(query_count, model:)
206
+ return [] unless query_count.positive?
207
+
208
+ billed_quantity = grounding_billed_quantity(query_count, model: model)
209
+ [
210
+ Billing::LineItem.build(
211
+ component_key: :grounding_request,
212
+ quantity: billed_quantity,
213
+ cost_status: Billing::CostStatus::UNKNOWN,
214
+ pricing_basis: :provider_usage,
215
+ provider_field: "response.candidates.groundingMetadata.webSearchQueries",
216
+ details: { web_search_queries: query_count }
217
+ )
218
+ ]
219
+ end
220
+
221
+ def grounding_billed_quantity(query_count, model:)
222
+ per_query_billing?(model) ? query_count : 1
223
+ end
224
+
225
+ def per_query_billing?(model)
226
+ model.to_s.match?(PER_QUERY_GROUNDING_MODEL_PATTERN)
227
+ end
142
228
  end
143
229
  end
144
230
  end
@@ -15,10 +15,18 @@ module LlmCostTracker
15
15
  end
16
16
 
17
17
  def provider_names
18
- [
18
+ providers = LlmCostTracker.configuration.openai_compatible_providers
19
+ cached = @provider_names
20
+ return cached if cached && @provider_names_providers.equal?(providers)
21
+
22
+ names = [
19
23
  "openai_compatible",
20
- *LlmCostTracker.configuration.openai_compatible_providers.each_value.map(&:to_s)
24
+ *providers.each_value.map { |provider| provider.to_s.downcase }
21
25
  ].uniq.freeze
26
+ return names unless providers.frozen?
27
+
28
+ @provider_names_providers = providers
29
+ @provider_names = names
22
30
  end
23
31
 
24
32
  def parse(request_url:, request_body:, response_status:, response_body:, **)
@@ -0,0 +1,87 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../billing/line_item"
4
+
5
+ module LlmCostTracker
6
+ module Parsers
7
+ module OpenaiServiceCharges
8
+ RESPONSE_OUTPUT_COMPONENTS = {
9
+ "web_search_call" => :web_search_request,
10
+ "file_search_call" => :file_search_call,
11
+ "code_interpreter_call" => :container_session
12
+ }.freeze
13
+
14
+ module_function
15
+
16
+ def line_items_from_output(output_items)
17
+ deduped = {}
18
+ Array(output_items).each { |item| store_output_item(deduped, item) }
19
+ deduped.values.filter_map { |item| build_line_item(item) }
20
+ end
21
+
22
+ def billable?(item)
23
+ return false unless item.is_a?(Hash)
24
+
25
+ component = RESPONSE_OUTPUT_COMPONENTS[item["type"]]
26
+ return false unless component
27
+ return true unless component == :web_search_request
28
+
29
+ action_type = item.dig("action", "type")
30
+ action_type.nil? || action_type == "search"
31
+ end
32
+
33
+ def store_output_item(output_items, item)
34
+ return unless billable?(item)
35
+
36
+ component = RESPONSE_OUTPUT_COMPONENTS[item["type"]]
37
+ key = if component == :container_session && item["container_id"]
38
+ "#{component}:#{item['container_id']}"
39
+ else
40
+ item["id"] || "#{item['type']}:#{output_items.length}"
41
+ end
42
+ output_items[key] = item
43
+ end
44
+
45
+ def build_line_item(item)
46
+ component_key = RESPONSE_OUTPUT_COMPONENTS[item["type"]]
47
+ return nil unless component_key
48
+
49
+ provider_item_id = if component_key == :container_session
50
+ item["container_id"] || item["id"]
51
+ else
52
+ item["id"]
53
+ end
54
+ Billing::LineItem.build(
55
+ component_key: component_key,
56
+ quantity: 1,
57
+ cost_status: Billing::CostStatus::UNKNOWN,
58
+ pricing_basis: :provider_usage,
59
+ provider_field: "response.output.#{item['type']}",
60
+ provider_item_id: provider_item_id,
61
+ details: line_item_details(item)
62
+ )
63
+ end
64
+
65
+ def line_item_details(item)
66
+ {
67
+ "status" => item["status"],
68
+ "action_type" => item.dig("action", "type"),
69
+ "container_id" => item["container_id"]
70
+ }.compact
71
+ end
72
+
73
+ def openai_service_line_items(response)
74
+ line_items_from_output(response["output"])
75
+ end
76
+
77
+ def openai_stream_service_line_items(events)
78
+ output_items = []
79
+ each_event_data(events) do |data|
80
+ output_items.concat(Array(data.dig("response", "output")))
81
+ output_items << data["item"] if data["item"]
82
+ end
83
+ line_items_from_output(output_items)
84
+ end
85
+ end
86
+ end
87
+ end