llm_cost_tracker 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/README.md +11 -9
  4. data/app/assets/llm_cost_tracker/application.css +3 -0
  5. data/app/controllers/llm_cost_tracker/application_controller.rb +22 -4
  6. data/app/controllers/llm_cost_tracker/calls_controller.rb +6 -11
  7. data/app/controllers/llm_cost_tracker/dashboard_controller.rb +2 -1
  8. data/app/controllers/llm_cost_tracker/data_quality_controller.rb +5 -1
  9. data/app/controllers/llm_cost_tracker/models_controller.rb +0 -1
  10. data/app/controllers/llm_cost_tracker/tags_controller.rb +1 -8
  11. data/app/helpers/llm_cost_tracker/application_helper.rb +2 -1
  12. data/app/helpers/llm_cost_tracker/dashboard_filter_helper.rb +1 -2
  13. data/app/helpers/llm_cost_tracker/dashboard_filter_options_helper.rb +1 -1
  14. data/app/helpers/llm_cost_tracker/dashboard_query_helper.rb +10 -27
  15. data/app/helpers/llm_cost_tracker/token_usage_helper.rb +58 -0
  16. data/app/models/llm_cost_tracker/ingestion/event.rb +13 -0
  17. data/app/models/llm_cost_tracker/ingestion/lease.rb +11 -0
  18. data/app/models/llm_cost_tracker/ledger/call.rb +45 -0
  19. data/app/models/llm_cost_tracker/ledger/call_metrics.rb +66 -0
  20. data/app/models/llm_cost_tracker/ledger/period/grouping.rb +71 -0
  21. data/app/models/llm_cost_tracker/ledger/period/total.rb +13 -0
  22. data/app/models/llm_cost_tracker/ledger/tags/accessors.rb +19 -0
  23. data/app/services/llm_cost_tracker/dashboard/data_quality.rb +111 -94
  24. data/app/services/llm_cost_tracker/dashboard/date_range.rb +2 -2
  25. data/app/services/llm_cost_tracker/dashboard/filter.rb +7 -18
  26. data/app/services/llm_cost_tracker/dashboard/overview_stats.rb +58 -67
  27. data/app/services/llm_cost_tracker/dashboard/pagination.rb +59 -0
  28. data/app/services/llm_cost_tracker/dashboard/params.rb +26 -0
  29. data/app/services/llm_cost_tracker/dashboard/provider_breakdown.rb +18 -20
  30. data/app/services/llm_cost_tracker/dashboard/spend_anomaly.rb +4 -13
  31. data/app/services/llm_cost_tracker/dashboard/tag_breakdown.rb +28 -61
  32. data/app/services/llm_cost_tracker/dashboard/tag_key_explorer.rb +8 -21
  33. data/app/services/llm_cost_tracker/dashboard/time_series.rb +1 -1
  34. data/app/services/llm_cost_tracker/dashboard/top_models.rb +12 -47
  35. data/app/views/llm_cost_tracker/calls/index.html.erb +12 -18
  36. data/app/views/llm_cost_tracker/calls/show.html.erb +30 -32
  37. data/app/views/llm_cost_tracker/dashboard/index.html.erb +17 -19
  38. data/app/views/llm_cost_tracker/data_quality/index.html.erb +108 -135
  39. data/app/views/llm_cost_tracker/models/index.html.erb +8 -9
  40. data/app/views/llm_cost_tracker/shared/setup_required.html.erb +13 -2
  41. data/app/views/llm_cost_tracker/tags/show.html.erb +20 -20
  42. data/lib/llm_cost_tracker/budget.rb +8 -20
  43. data/lib/llm_cost_tracker/capture/stream.rb +9 -0
  44. data/lib/llm_cost_tracker/capture/stream_collector.rb +182 -0
  45. data/lib/llm_cost_tracker/{integrations → capture}/stream_tracker.rb +40 -72
  46. data/lib/llm_cost_tracker/configuration/instrumentation.rb +3 -7
  47. data/lib/llm_cost_tracker/configuration.rb +28 -35
  48. data/lib/llm_cost_tracker/doctor/capture_verifier.rb +61 -0
  49. data/lib/llm_cost_tracker/doctor/check.rb +7 -0
  50. data/lib/llm_cost_tracker/doctor/ingestion_check.rb +22 -59
  51. data/lib/llm_cost_tracker/doctor/price_check.rb +60 -0
  52. data/lib/llm_cost_tracker/doctor.rb +63 -71
  53. data/lib/llm_cost_tracker/errors.rb +4 -15
  54. data/lib/llm_cost_tracker/event.rb +6 -6
  55. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_token_usage_generator.rb +42 -0
  56. data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +2 -0
  57. data/lib/llm_cost_tracker/generators/llm_cost_tracker/prices_generator.rb +7 -7
  58. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_period_totals_to_llm_cost_tracker.rb.erb +3 -3
  59. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_token_usage_to_llm_api_calls.rb.erb +22 -0
  60. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +9 -14
  61. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +0 -4
  62. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_cost_precision.rb.erb +12 -1
  63. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +2 -2
  64. data/lib/llm_cost_tracker/{storage/active_record_inbox_batch.rb → ingestion/batch.rb} +21 -20
  65. data/lib/llm_cost_tracker/ingestion/inbox.rb +105 -0
  66. data/lib/llm_cost_tracker/{storage/active_record_ingestor_lease.rb → ingestion/lease_claim.rb} +5 -7
  67. data/lib/llm_cost_tracker/{storage/active_record_ingestor.rb → ingestion/worker.rb} +38 -48
  68. data/lib/llm_cost_tracker/ingestion.rb +129 -0
  69. data/lib/llm_cost_tracker/integrations/anthropic.rb +52 -34
  70. data/lib/llm_cost_tracker/integrations/base.rb +73 -34
  71. data/lib/llm_cost_tracker/integrations/openai.rb +45 -39
  72. data/lib/llm_cost_tracker/integrations/ruby_llm.rb +40 -30
  73. data/lib/llm_cost_tracker/integrations.rb +43 -0
  74. data/lib/llm_cost_tracker/ledger/period/totals.rb +66 -0
  75. data/lib/llm_cost_tracker/{storage/active_record_periods.rb → ledger/period.rb} +2 -2
  76. data/lib/llm_cost_tracker/ledger/rollups/batch.rb +43 -0
  77. data/lib/llm_cost_tracker/ledger/rollups/upsert_sql.rb +46 -0
  78. data/lib/llm_cost_tracker/ledger/rollups.rb +87 -0
  79. data/lib/llm_cost_tracker/ledger/schema/adapter.rb +51 -0
  80. data/lib/llm_cost_tracker/ledger/schema/calls.rb +101 -0
  81. data/lib/llm_cost_tracker/ledger/schema/period_totals.rb +32 -0
  82. data/lib/llm_cost_tracker/ledger/store.rb +60 -0
  83. data/lib/llm_cost_tracker/ledger/tags/query.rb +29 -0
  84. data/lib/llm_cost_tracker/ledger/tags/sql.rb +33 -0
  85. data/lib/llm_cost_tracker/ledger.rb +13 -0
  86. data/lib/llm_cost_tracker/logging.rb +3 -6
  87. data/lib/llm_cost_tracker/middleware/faraday.rb +35 -36
  88. data/lib/llm_cost_tracker/parsers/anthropic.rb +38 -27
  89. data/lib/llm_cost_tracker/parsers/base.rb +10 -19
  90. data/lib/llm_cost_tracker/parsers/gemini.rb +15 -16
  91. data/lib/llm_cost_tracker/parsers/openai_usage.rb +24 -19
  92. data/lib/llm_cost_tracker/parsers/sse.rb +4 -7
  93. data/lib/llm_cost_tracker/parsers.rb +20 -0
  94. data/lib/llm_cost_tracker/prices.json +52 -11
  95. data/lib/llm_cost_tracker/pricing/components.rb +37 -0
  96. data/lib/llm_cost_tracker/pricing/effective_prices.rb +40 -50
  97. data/lib/llm_cost_tracker/pricing/explainer.rb +12 -23
  98. data/lib/llm_cost_tracker/pricing/lookup.rb +24 -25
  99. data/lib/llm_cost_tracker/pricing/registry.rb +156 -0
  100. data/lib/llm_cost_tracker/pricing/sync/fetcher.rb +107 -0
  101. data/lib/llm_cost_tracker/pricing/sync/registry_diff.rb +53 -0
  102. data/lib/llm_cost_tracker/pricing/sync/registry_loader.rb +63 -0
  103. data/lib/llm_cost_tracker/pricing/sync/registry_writer.rb +31 -0
  104. data/lib/llm_cost_tracker/pricing/sync.rb +143 -0
  105. data/lib/llm_cost_tracker/pricing/unknown.rb +46 -0
  106. data/lib/llm_cost_tracker/pricing.rb +33 -32
  107. data/lib/llm_cost_tracker/railtie.rb +7 -8
  108. data/lib/llm_cost_tracker/report/data.rb +72 -0
  109. data/lib/llm_cost_tracker/report/formatter.rb +69 -0
  110. data/lib/llm_cost_tracker/report.rb +8 -8
  111. data/lib/llm_cost_tracker/retention.rb +27 -10
  112. data/lib/llm_cost_tracker/tags/context.rb +35 -0
  113. data/lib/llm_cost_tracker/tags/key.rb +18 -0
  114. data/lib/llm_cost_tracker/tags/sanitizer.rb +68 -0
  115. data/lib/llm_cost_tracker/token_usage.rb +67 -0
  116. data/lib/llm_cost_tracker/tracker.rb +38 -70
  117. data/lib/llm_cost_tracker/usage_capture.rb +37 -0
  118. data/lib/llm_cost_tracker/version.rb +1 -1
  119. data/lib/llm_cost_tracker.rb +56 -78
  120. data/lib/tasks/llm_cost_tracker.rake +18 -13
  121. metadata +54 -58
  122. data/app/services/llm_cost_tracker/dashboard/data_quality_aggregate.rb +0 -81
  123. data/app/services/llm_cost_tracker/pagination.rb +0 -57
  124. data/lib/llm_cost_tracker/active_record_adapter.rb +0 -53
  125. data/lib/llm_cost_tracker/capture_verifier.rb +0 -64
  126. data/lib/llm_cost_tracker/cost.rb +0 -12
  127. data/lib/llm_cost_tracker/doctor/capture_check.rb +0 -39
  128. data/lib/llm_cost_tracker/event_metadata.rb +0 -52
  129. data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_usage_breakdown_generator.rb +0 -29
  130. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_usage_breakdown_to_llm_api_calls.rb.erb +0 -29
  131. data/lib/llm_cost_tracker/inbox_event.rb +0 -9
  132. data/lib/llm_cost_tracker/ingestor_lease.rb +0 -9
  133. data/lib/llm_cost_tracker/integrations/object_reader.rb +0 -56
  134. data/lib/llm_cost_tracker/integrations/registry.rb +0 -71
  135. data/lib/llm_cost_tracker/llm_api_call.rb +0 -60
  136. data/lib/llm_cost_tracker/llm_api_call_metrics.rb +0 -63
  137. data/lib/llm_cost_tracker/parameter_hash.rb +0 -33
  138. data/lib/llm_cost_tracker/parsed_usage.rb +0 -72
  139. data/lib/llm_cost_tracker/parsers/registry.rb +0 -58
  140. data/lib/llm_cost_tracker/period_grouping.rb +0 -67
  141. data/lib/llm_cost_tracker/period_total.rb +0 -9
  142. data/lib/llm_cost_tracker/price_freshness.rb +0 -38
  143. data/lib/llm_cost_tracker/price_registry.rb +0 -144
  144. data/lib/llm_cost_tracker/price_sync/fetcher.rb +0 -104
  145. data/lib/llm_cost_tracker/price_sync/registry_diff.rb +0 -51
  146. data/lib/llm_cost_tracker/price_sync/registry_loader.rb +0 -61
  147. data/lib/llm_cost_tracker/price_sync/registry_writer.rb +0 -29
  148. data/lib/llm_cost_tracker/price_sync.rb +0 -144
  149. data/lib/llm_cost_tracker/report_data.rb +0 -94
  150. data/lib/llm_cost_tracker/report_formatter.rb +0 -67
  151. data/lib/llm_cost_tracker/request_url.rb +0 -20
  152. data/lib/llm_cost_tracker/storage/active_record_backend.rb +0 -167
  153. data/lib/llm_cost_tracker/storage/active_record_connection_cleanup.rb +0 -13
  154. data/lib/llm_cost_tracker/storage/active_record_inbox.rb +0 -160
  155. data/lib/llm_cost_tracker/storage/active_record_period_totals.rb +0 -84
  156. data/lib/llm_cost_tracker/storage/active_record_rollup_batch.rb +0 -41
  157. data/lib/llm_cost_tracker/storage/active_record_rollup_upsert_sql.rb +0 -42
  158. data/lib/llm_cost_tracker/storage/active_record_rollups.rb +0 -146
  159. data/lib/llm_cost_tracker/storage/active_record_store.rb +0 -145
  160. data/lib/llm_cost_tracker/storage/writer.rb +0 -35
  161. data/lib/llm_cost_tracker/stream_capture.rb +0 -7
  162. data/lib/llm_cost_tracker/stream_collector.rb +0 -199
  163. data/lib/llm_cost_tracker/tag_accessors.rb +0 -15
  164. data/lib/llm_cost_tracker/tag_context.rb +0 -52
  165. data/lib/llm_cost_tracker/tag_key.rb +0 -16
  166. data/lib/llm_cost_tracker/tag_query.rb +0 -43
  167. data/lib/llm_cost_tracker/tag_sanitizer.rb +0 -81
  168. data/lib/llm_cost_tracker/tag_sql.rb +0 -34
  169. data/lib/llm_cost_tracker/tags_column.rb +0 -105
  170. data/lib/llm_cost_tracker/unknown_pricing.rb +0 -54
  171. data/lib/llm_cost_tracker/usage_breakdown.rb +0 -30
  172. data/lib/llm_cost_tracker/value_helpers.rb +0 -40
@@ -24,17 +24,13 @@ module LlmCostTracker
24
24
 
25
25
  request = safe_json_parse(request_body)
26
26
  cache_read = usage["cache_read_input_tokens"].to_i
27
- cache_write = usage["cache_creation_input_tokens"].to_i
28
27
 
29
- ParsedUsage.build(
28
+ UsageCapture.build(
30
29
  provider: "anthropic",
31
30
  provider_response_id: response["id"],
31
+ pricing_mode: pricing_mode(request, response, usage),
32
32
  model: response["model"] || request["model"],
33
- input_tokens: usage["input_tokens"].to_i,
34
- output_tokens: usage["output_tokens"].to_i,
35
- total_tokens: usage["input_tokens"].to_i + usage["output_tokens"].to_i + cache_read + cache_write,
36
- cache_read_input_tokens: usage["cache_read_input_tokens"],
37
- cache_write_input_tokens: usage["cache_creation_input_tokens"],
33
+ token_usage: token_usage(usage, cache_read),
38
34
  usage_source: :response
39
35
  )
40
36
  end
@@ -43,17 +39,18 @@ module LlmCostTracker
43
39
  return nil unless response_status == 200
44
40
 
45
41
  request = safe_json_parse(request_body)
46
- model = stream_model(events) || request["model"]
42
+ model = find_event_value(events) { |data| data.dig("message", "model") } || request["model"]
47
43
  usage = stream_usage(events)
48
- response_id = stream_response_id(events)
44
+ response_id = find_event_value(events) { |data| data.dig("message", "id") || data["id"] }
49
45
 
50
46
  if usage
51
- build_stream_result(model, usage, response_id)
47
+ build_stream_result(model, usage, response_id, pricing_mode(request, nil, usage))
52
48
  else
53
49
  build_unknown_stream_usage(
54
50
  provider: "anthropic",
55
51
  model: model,
56
- provider_response_id: response_id
52
+ provider_response_id: response_id,
53
+ pricing_mode: pricing_mode(request, nil, usage)
57
54
  )
58
55
  end
59
56
  end
@@ -75,33 +72,47 @@ module LlmCostTracker
75
72
  end
76
73
  end
77
74
 
78
- def stream_model(events)
79
- find_event_value(events) { |data| data.dig("message", "model") }
80
- end
75
+ def build_stream_result(model, usage, response_id, pricing_mode)
76
+ cache_read = usage["cache_read_input_tokens"].to_i
81
77
 
82
- def stream_response_id(events)
83
- find_event_value(events) { |data| data.dig("message", "id") || data["id"] }
78
+ UsageCapture.build(
79
+ provider: "anthropic",
80
+ provider_response_id: response_id,
81
+ pricing_mode: pricing_mode,
82
+ model: model,
83
+ token_usage: token_usage(usage, cache_read),
84
+ stream: true,
85
+ usage_source: :stream_final
86
+ )
84
87
  end
85
88
 
86
- def build_stream_result(model, usage, response_id)
89
+ def token_usage(usage, cache_read)
87
90
  input = usage["input_tokens"].to_i
88
91
  output = usage["output_tokens"].to_i
89
- cache_read = usage["cache_read_input_tokens"].to_i
90
- cache_write = usage["cache_creation_input_tokens"].to_i
92
+ cache_creation = usage["cache_creation"]
93
+ if cache_creation.is_a?(Hash)
94
+ cache_write = cache_creation["ephemeral_5m_input_tokens"].to_i
95
+ cache_write_1h = cache_creation["ephemeral_1h_input_tokens"].to_i
96
+ else
97
+ cache_write = usage["cache_creation_input_tokens"].to_i
98
+ cache_write_1h = 0
99
+ end
91
100
 
92
- ParsedUsage.build(
93
- provider: "anthropic",
94
- provider_response_id: response_id,
95
- model: model,
101
+ TokenUsage.build(
96
102
  input_tokens: input,
97
103
  output_tokens: output,
98
- total_tokens: input + output + cache_read + cache_write,
104
+ total_tokens: input + output + cache_read + cache_write + cache_write_1h,
99
105
  cache_read_input_tokens: usage["cache_read_input_tokens"],
100
- cache_write_input_tokens: usage["cache_creation_input_tokens"],
101
- stream: true,
102
- usage_source: :stream_final
106
+ cache_write_input_tokens: cache_write,
107
+ cache_write_1h_input_tokens: cache_write_1h
103
108
  )
104
109
  end
110
+
111
+ def pricing_mode(request, response, usage)
112
+ usage&.fetch("service_tier", nil) ||
113
+ response&.fetch("service_tier", nil) ||
114
+ request["service_tier"]
115
+ end
105
116
  end
106
117
  end
107
118
  end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "active_support/core_ext/object/blank"
3
4
  require "json"
4
5
  require "uri"
5
6
 
@@ -19,10 +20,9 @@ module LlmCostTracker
19
20
  end
20
21
 
21
22
  def streaming_request?(_request_url, request_body)
22
- return false if request_body.nil?
23
+ return false if request_body.blank?
23
24
 
24
25
  body = request_body.to_s
25
- return false if body.empty?
26
26
 
27
27
  request = safe_json_parse(body)
28
28
  request.is_a?(Hash) && request["stream"] == true
@@ -35,7 +35,7 @@ module LlmCostTracker
35
35
  private
36
36
 
37
37
  def safe_json_parse(body)
38
- return {} if body.nil? || body.empty?
38
+ return {} if body.blank?
39
39
 
40
40
  JSON.parse(body)
41
41
  rescue JSON::ParserError
@@ -49,7 +49,7 @@ module LlmCostTracker
49
49
 
50
50
  def match_uri?(url, hosts: nil, exact_paths: nil, path_includes: nil, path_suffixes: nil, path_pattern: nil)
51
51
  uri_matches?(url) do |uri|
52
- host_match = hosts.nil? || host_matches?(uri, hosts)
52
+ host_match = hosts.nil? || hosts.include?(uri.host.to_s.downcase)
53
53
  path_match = path_matches?(
54
54
  uri,
55
55
  exact_paths: exact_paths,
@@ -69,10 +69,6 @@ module LlmCostTracker
69
69
  nil
70
70
  end
71
71
 
72
- def host_matches?(uri, hosts)
73
- hosts.include?(uri.host.to_s.downcase)
74
- end
75
-
76
72
  def path_matches?(uri, exact_paths: nil, path_includes: nil, path_suffixes: nil, path_pattern: nil)
77
73
  path = uri.path.to_s
78
74
  matches = true
@@ -98,28 +94,23 @@ module LlmCostTracker
98
94
  def find_event_value(events, reverse: false)
99
95
  each_event_data(events, reverse:) do |data|
100
96
  value = yield(data)
101
- return value if event_value_present?(value)
97
+ return value if value.present?
102
98
  end
103
99
 
104
100
  nil
105
101
  end
106
102
 
107
- def build_unknown_stream_usage(provider:, model:, provider_response_id:)
108
- ParsedUsage.build(
103
+ def build_unknown_stream_usage(provider:, model:, provider_response_id:, pricing_mode: nil)
104
+ UsageCapture.build(
109
105
  provider: provider,
110
106
  provider_response_id: provider_response_id,
111
- model: model || ParsedUsage::UNKNOWN_MODEL,
112
- input_tokens: 0,
113
- output_tokens: 0,
114
- total_tokens: 0,
107
+ pricing_mode: pricing_mode,
108
+ model: model || UsageCapture::UNKNOWN_MODEL,
109
+ token_usage: TokenUsage.build(input_tokens: 0, output_tokens: 0, total_tokens: 0),
115
110
  stream: true,
116
111
  usage_source: :unknown
117
112
  )
118
113
  end
119
-
120
- def event_value_present?(value)
121
- !value.nil? && (!value.respond_to?(:empty?) || !value.empty?)
122
- end
123
114
  end
124
115
  end
125
116
  end
@@ -18,7 +18,7 @@ module LlmCostTracker
18
18
  end
19
19
 
20
20
  def streaming_request?(request_url, request_body)
21
- return true if streaming_url?(request_url)
21
+ return true if match_uri?(request_url, path_pattern: STREAM_PATH_PATTERN)
22
22
 
23
23
  super
24
24
  end
@@ -30,7 +30,7 @@ module LlmCostTracker
30
30
  usage = response["usageMetadata"]
31
31
  return nil unless usage
32
32
 
33
- build_parsed_usage(
33
+ build_usage_capture(
34
34
  request_url,
35
35
  usage,
36
36
  usage_source: :response,
@@ -46,7 +46,7 @@ module LlmCostTracker
46
46
  response_id = stream_response_id(events)
47
47
 
48
48
  if usage
49
- build_parsed_usage(
49
+ build_usage_capture(
50
50
  request_url,
51
51
  usage,
52
52
  stream: true,
@@ -64,17 +64,20 @@ module LlmCostTracker
64
64
 
65
65
  private
66
66
 
67
- def build_parsed_usage(request_url, usage, usage_source:, stream: false, provider_response_id: nil)
67
+ def build_usage_capture(request_url, usage, usage_source:, stream: false, provider_response_id: nil)
68
68
  cache_read = usage["cachedContentTokenCount"].to_i
69
+ tool_use_prompt = usage["toolUsePromptTokenCount"].to_i
69
70
 
70
- ParsedUsage.build(
71
+ UsageCapture.build(
71
72
  provider: "gemini",
72
73
  model: extract_model_from_url(request_url),
73
- input_tokens: [usage["promptTokenCount"].to_i - cache_read, 0].max,
74
- output_tokens: output_tokens(usage),
75
- total_tokens: total_tokens(usage, cache_read),
76
- cache_read_input_tokens: usage["cachedContentTokenCount"],
77
- hidden_output_tokens: usage["thoughtsTokenCount"],
74
+ token_usage: TokenUsage.build(
75
+ input_tokens: [usage["promptTokenCount"].to_i - cache_read, 0].max + tool_use_prompt,
76
+ output_tokens: output_tokens(usage),
77
+ total_tokens: total_tokens(usage, cache_read, tool_use_prompt),
78
+ cache_read_input_tokens: usage["cachedContentTokenCount"],
79
+ hidden_output_tokens: usage["thoughtsTokenCount"]
80
+ ),
78
81
  stream: stream,
79
82
  usage_source: usage_source,
80
83
  provider_response_id: provider_response_id
@@ -92,21 +95,17 @@ module LlmCostTracker
92
95
  usage["candidatesTokenCount"].to_i + usage["thoughtsTokenCount"].to_i
93
96
  end
94
97
 
95
- def total_tokens(usage, cache_read)
98
+ def total_tokens(usage, cache_read, tool_use_prompt)
96
99
  total = usage["totalTokenCount"]
97
100
  return total.to_i unless total.nil?
98
101
 
99
- [usage["promptTokenCount"].to_i - cache_read, 0].max + cache_read + output_tokens(usage)
102
+ [usage["promptTokenCount"].to_i - cache_read, 0].max + cache_read + tool_use_prompt + output_tokens(usage)
100
103
  end
101
104
 
102
105
  def stream_response_id(events)
103
106
  find_event_value(events) { |data| data["responseId"] }
104
107
  end
105
108
 
106
- def streaming_url?(request_url)
107
- match_uri?(request_url, path_pattern: STREAM_PATH_PATTERN)
108
- end
109
-
110
109
  def extract_model_from_url(url)
111
110
  uri = parsed_uri(url)
112
111
  return nil unless uri
@@ -15,15 +15,12 @@ module LlmCostTracker
15
15
  request = safe_json_parse(request_body)
16
16
  cache_read = cache_read_input_tokens(usage)
17
17
 
18
- ParsedUsage.build(
18
+ UsageCapture.build(
19
19
  provider: provider_for(request_url),
20
20
  provider_response_id: response["id"],
21
+ pricing_mode: response["service_tier"] || request["service_tier"],
21
22
  model: response["model"] || request["model"],
22
- input_tokens: regular_input_tokens(usage, cache_read),
23
- output_tokens: (usage["completion_tokens"] || usage["output_tokens"]).to_i,
24
- total_tokens: total_tokens(usage, cache_read),
25
- cache_read_input_tokens: cache_read,
26
- hidden_output_tokens: hidden_output_tokens(usage),
23
+ token_usage: token_usage(usage, cache_read),
27
24
  usage_source: :response
28
25
  )
29
26
  end
@@ -32,21 +29,20 @@ module LlmCostTracker
32
29
  return nil unless response_status == 200
33
30
 
34
31
  request = safe_json_parse(request_body)
35
- model = detect_stream_model(events) || request["model"]
32
+ model =
33
+ find_event_value(events) { |data| data["model"] || data.dig("response", "model") } || request["model"]
36
34
  usage = detect_stream_usage(events)
37
- response_id = detect_stream_response_id(events)
35
+ response_id = find_event_value(events) { |data| data["id"] || data.dig("response", "id") }
36
+ pricing_mode = stream_pricing_mode(events) || request["service_tier"]
38
37
 
39
38
  if usage
40
39
  cache_read = cache_read_input_tokens(usage)
41
- ParsedUsage.build(
40
+ UsageCapture.build(
42
41
  provider: provider_for(request_url),
43
42
  provider_response_id: response_id,
43
+ pricing_mode: pricing_mode,
44
44
  model: model,
45
- input_tokens: regular_input_tokens(usage, cache_read),
46
- output_tokens: (usage["completion_tokens"] || usage["output_tokens"]).to_i,
47
- total_tokens: total_tokens(usage, cache_read),
48
- cache_read_input_tokens: cache_read,
49
- hidden_output_tokens: hidden_output_tokens(usage),
45
+ token_usage: token_usage(usage, cache_read),
50
46
  stream: true,
51
47
  usage_source: :stream_final
52
48
  )
@@ -54,7 +50,8 @@ module LlmCostTracker
54
50
  build_unknown_stream_usage(
55
51
  provider: provider_for(request_url),
56
52
  model: model,
57
- provider_response_id: response_id
53
+ provider_response_id: response_id,
54
+ pricing_mode: pricing_mode
58
55
  )
59
56
  end
60
57
  end
@@ -66,12 +63,20 @@ module LlmCostTracker
66
63
  end
67
64
  end
68
65
 
69
- def detect_stream_model(events)
70
- find_event_value(events) { |data| data["model"] || data.dig("response", "model") }
66
+ def stream_pricing_mode(events)
67
+ find_event_value(events, reverse: true) do |data|
68
+ data["service_tier"] || data.dig("response", "service_tier")
69
+ end
71
70
  end
72
71
 
73
- def detect_stream_response_id(events)
74
- find_event_value(events) { |data| data["id"] || data.dig("response", "id") }
72
+ def token_usage(usage, cache_read)
73
+ TokenUsage.build(
74
+ input_tokens: regular_input_tokens(usage, cache_read),
75
+ output_tokens: (usage["completion_tokens"] || usage["output_tokens"]).to_i,
76
+ total_tokens: total_tokens(usage, cache_read),
77
+ cache_read_input_tokens: cache_read,
78
+ hidden_output_tokens: hidden_output_tokens(usage)
79
+ )
75
80
  end
76
81
 
77
82
  def regular_input_tokens(usage, cache_read)
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "active_support/core_ext/object/blank"
3
4
  require "json"
4
5
 
5
6
  module LlmCostTracker
@@ -9,9 +10,9 @@ module LlmCostTracker
9
10
 
10
11
  class << self
11
12
  def parse(body)
12
- return [] if body.nil? || body.empty?
13
+ return [] if body.blank?
13
14
 
14
- return parse_json_array(body) if probably_json_array?(body)
15
+ return parse_json_array(body) if body.lstrip.start_with?("[")
15
16
 
16
17
  parse_event_stream(body)
17
18
  end
@@ -65,16 +66,12 @@ module LlmCostTracker
65
66
  end
66
67
 
67
68
  def decode_data(payload)
68
- return payload if payload.empty?
69
+ return payload if payload.blank?
69
70
 
70
71
  JSON.parse(payload)
71
72
  rescue JSON::ParserError
72
73
  payload
73
74
  end
74
-
75
- def probably_json_array?(body)
76
- body.lstrip.start_with?("[")
77
- end
78
75
  end
79
76
  end
80
77
  end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LlmCostTracker
4
+ module Parsers
5
+ BUILT_INS = [Openai.new, OpenaiCompatible.new, Anthropic.new, Gemini.new].freeze
6
+
7
+ module_function
8
+
9
+ def find_for(url)
10
+ BUILT_INS.find { |parser| parser.match?(url) }
11
+ end
12
+
13
+ def find_for_provider(provider)
14
+ provider_name = provider.to_s.downcase
15
+ BUILT_INS.find do |parser|
16
+ Array(parser.provider_names).map { |name| name.to_s.downcase }.include?(provider_name)
17
+ end
18
+ end
19
+ end
20
+ end
@@ -17,6 +17,7 @@
17
17
  "output": 5.0,
18
18
  "cache_read_input": 0.1,
19
19
  "cache_write_input": 1.25,
20
+ "cache_write_1h_input": 2.0,
20
21
  "batch_input": 0.5,
21
22
  "batch_output": 2.5
22
23
  },
@@ -25,6 +26,7 @@
25
26
  "output": 75.0,
26
27
  "cache_read_input": 1.5,
27
28
  "cache_write_input": 18.75,
29
+ "cache_write_1h_input": 30.0,
28
30
  "batch_input": 7.5,
29
31
  "batch_output": 37.5
30
32
  },
@@ -33,6 +35,7 @@
33
35
  "output": 75.0,
34
36
  "cache_read_input": 1.5,
35
37
  "cache_write_input": 18.75,
38
+ "cache_write_1h_input": 30.0,
36
39
  "batch_input": 7.5,
37
40
  "batch_output": 37.5
38
41
  },
@@ -41,6 +44,7 @@
41
44
  "output": 25.0,
42
45
  "cache_read_input": 0.5,
43
46
  "cache_write_input": 6.25,
47
+ "cache_write_1h_input": 10.0,
44
48
  "batch_input": 2.5,
45
49
  "batch_output": 12.5
46
50
  },
@@ -49,6 +53,7 @@
49
53
  "output": 25.0,
50
54
  "cache_read_input": 0.5,
51
55
  "cache_write_input": 6.25,
56
+ "cache_write_1h_input": 10.0,
52
57
  "batch_input": 2.5,
53
58
  "batch_output": 12.5
54
59
  },
@@ -57,6 +62,7 @@
57
62
  "output": 25.0,
58
63
  "cache_read_input": 0.5,
59
64
  "cache_write_input": 6.25,
65
+ "cache_write_1h_input": 10.0,
60
66
  "batch_input": 2.5,
61
67
  "batch_output": 12.5
62
68
  },
@@ -65,6 +71,7 @@
65
71
  "output": 15.0,
66
72
  "cache_read_input": 0.3,
67
73
  "cache_write_input": 3.75,
74
+ "cache_write_1h_input": 6.0,
68
75
  "batch_input": 1.5,
69
76
  "batch_output": 7.5
70
77
  },
@@ -73,6 +80,7 @@
73
80
  "output": 15.0,
74
81
  "cache_read_input": 0.3,
75
82
  "cache_write_input": 3.75,
83
+ "cache_write_1h_input": 6.0,
76
84
  "batch_input": 1.5,
77
85
  "batch_output": 7.5
78
86
  },
@@ -81,6 +89,7 @@
81
89
  "output": 15.0,
82
90
  "cache_read_input": 0.3,
83
91
  "cache_write_input": 3.75,
92
+ "cache_write_1h_input": 6.0,
84
93
  "batch_input": 1.5,
85
94
  "batch_output": 7.5
86
95
  },
@@ -89,7 +98,8 @@
89
98
  "cache_read_input": 0.025,
90
99
  "output": 0.4,
91
100
  "batch_input": 0.05,
92
- "batch_output": 0.2
101
+ "batch_output": 0.2,
102
+ "batch_cache_read_input": 0.025
93
103
  },
94
104
  "gemini/gemini-2.0-flash-lite": {
95
105
  "input": 0.075,
@@ -101,25 +111,32 @@
101
111
  "input": 0.3,
102
112
  "output": 2.5,
103
113
  "cache_read_input": 0.03,
104
- "cache_write_input": 0.083333333333,
105
114
  "batch_input": 0.15,
106
- "batch_output": 1.25
115
+ "batch_output": 1.25,
116
+ "batch_cache_read_input": 0.03
107
117
  },
108
118
  "gemini/gemini-2.5-flash-lite": {
109
119
  "input": 0.1,
110
120
  "output": 0.4,
111
121
  "cache_read_input": 0.01,
112
- "cache_write_input": 0.083333333333,
113
122
  "batch_input": 0.05,
114
- "batch_output": 0.2
123
+ "batch_output": 0.2,
124
+ "batch_cache_read_input": 0.01
115
125
  },
116
126
  "gemini/gemini-2.5-pro": {
117
127
  "input": 1.25,
118
128
  "output": 10.0,
119
129
  "cache_read_input": 0.125,
120
- "cache_write_input": 0.375,
121
130
  "batch_input": 0.625,
122
- "batch_output": 5.0
131
+ "batch_output": 5.0,
132
+ "batch_cache_read_input": 0.125,
133
+ "_context_price_threshold_tokens": 200000,
134
+ "above_context_input": 2.5,
135
+ "above_context_output": 15.0,
136
+ "above_context_cache_read_input": 0.25,
137
+ "above_context_batch_input": 1.25,
138
+ "above_context_batch_output": 7.5,
139
+ "above_context_batch_cache_read_input": 0.25
123
140
  },
124
141
  "openai/gpt-3.5-turbo": {
125
142
  "input": 0.5,
@@ -276,7 +293,14 @@
276
293
  "cache_read_input": 0.25,
277
294
  "batch_input": 1.25,
278
295
  "batch_output": 7.5,
279
- "batch_cache_read_input": 0.13
296
+ "batch_cache_read_input": 0.13,
297
+ "_context_price_threshold_tokens": 272000,
298
+ "above_context_input": 5.0,
299
+ "above_context_output": 22.5,
300
+ "above_context_cache_read_input": 0.5,
301
+ "above_context_batch_input": 2.5,
302
+ "above_context_batch_output": 11.25,
303
+ "above_context_batch_cache_read_input": 0.25
280
304
  },
281
305
  "openai/gpt-5.4-mini": {
282
306
  "input": 0.75,
@@ -298,7 +322,12 @@
298
322
  "input": 30.0,
299
323
  "output": 180.0,
300
324
  "batch_input": 15.0,
301
- "batch_output": 90.0
325
+ "batch_output": 90.0,
326
+ "_context_price_threshold_tokens": 272000,
327
+ "above_context_input": 60.0,
328
+ "above_context_output": 270.0,
329
+ "above_context_batch_input": 30.0,
330
+ "above_context_batch_output": 135.0
302
331
  },
303
332
  "openai/gpt-5.5": {
304
333
  "input": 5.0,
@@ -306,13 +335,23 @@
306
335
  "cache_read_input": 0.5,
307
336
  "batch_input": 2.5,
308
337
  "batch_output": 15.0,
309
- "batch_cache_read_input": 0.25
338
+ "batch_cache_read_input": 0.25,
339
+ "_context_price_threshold_tokens": 272000,
340
+ "above_context_input": 10.0,
341
+ "above_context_output": 45.0,
342
+ "above_context_cache_read_input": 1.0,
343
+ "above_context_batch_input": 5.0,
344
+ "above_context_batch_output": 22.5,
345
+ "above_context_batch_cache_read_input": 0.5
310
346
  },
311
347
  "openai/gpt-5.5-pro": {
312
348
  "input": 30.0,
313
349
  "output": 180.0,
314
350
  "batch_input": 15.0,
315
- "batch_output": 90.0
351
+ "batch_output": 90.0,
352
+ "_context_price_threshold_tokens": 272000,
353
+ "above_context_input": 60.0,
354
+ "above_context_output": 270.0
316
355
  },
317
356
  "openai/o1": {
318
357
  "input": 15.0,
@@ -352,6 +391,7 @@
352
391
  "anthropic/claude-haiku-3-5": {
353
392
  "input": 0.8,
354
393
  "cache_write_input": 1.0,
394
+ "cache_write_1h_input": 1.6,
355
395
  "cache_read_input": 0.08,
356
396
  "output": 4.0,
357
397
  "batch_input": 0.4,
@@ -360,6 +400,7 @@
360
400
  "anthropic/claude-haiku-3": {
361
401
  "input": 0.25,
362
402
  "cache_write_input": 0.3,
403
+ "cache_write_1h_input": 0.5,
363
404
  "cache_read_input": 0.03,
364
405
  "output": 1.25,
365
406
  "batch_input": 0.125,
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LlmCostTracker
4
+ module Pricing
5
+ Component = Data.define(:price_key, :token_key, :cost_key)
6
+
7
+ COMPONENTS = [
8
+ Component.new(
9
+ price_key: :input,
10
+ token_key: :input_tokens,
11
+ cost_key: :input_cost
12
+ ),
13
+ Component.new(
14
+ price_key: :cache_read_input,
15
+ token_key: :cache_read_input_tokens,
16
+ cost_key: :cache_read_input_cost
17
+ ),
18
+ Component.new(
19
+ price_key: :cache_write_input,
20
+ token_key: :cache_write_input_tokens,
21
+ cost_key: :cache_write_input_cost
22
+ ),
23
+ Component.new(
24
+ price_key: :cache_write_1h_input,
25
+ token_key: :cache_write_1h_input_tokens,
26
+ cost_key: :cache_write_1h_input_cost
27
+ ),
28
+ Component.new(
29
+ price_key: :output,
30
+ token_key: :output_tokens,
31
+ cost_key: :output_cost
32
+ )
33
+ ].freeze
34
+
35
+ COST_KEYS = (COMPONENTS.map(&:cost_key) + %i[total_cost]).freeze
36
+ end
37
+ end