llm_cost_tracker 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +43 -0
  3. data/README.md +18 -9
  4. data/app/controllers/llm_cost_tracker/calls_controller.rb +2 -1
  5. data/app/controllers/llm_cost_tracker/dashboard_controller.rb +3 -15
  6. data/app/controllers/llm_cost_tracker/tags_controller.rb +7 -6
  7. data/app/helpers/llm_cost_tracker/application_helper.rb +21 -6
  8. data/app/helpers/llm_cost_tracker/dashboard_filter_options_helper.rb +3 -1
  9. data/app/services/llm_cost_tracker/dashboard/date_range.rb +42 -0
  10. data/app/services/llm_cost_tracker/dashboard/filter.rb +6 -8
  11. data/app/services/llm_cost_tracker/dashboard/spend_anomaly.rb +6 -5
  12. data/app/services/llm_cost_tracker/dashboard/tag_breakdown.rb +74 -18
  13. data/app/services/llm_cost_tracker/dashboard/tag_key_explorer.rb +15 -4
  14. data/app/views/llm_cost_tracker/shared/_tag_chips.html.erb +1 -1
  15. data/app/views/llm_cost_tracker/tags/show.html.erb +4 -0
  16. data/docs/architecture.md +28 -0
  17. data/docs/budgets.md +45 -0
  18. data/docs/configuration.md +65 -0
  19. data/docs/cookbook.md +185 -0
  20. data/docs/dashboard-overview.png +0 -0
  21. data/docs/dashboard.md +38 -0
  22. data/docs/extending.md +32 -0
  23. data/docs/operations.md +44 -0
  24. data/docs/pricing.md +94 -0
  25. data/docs/querying.md +36 -0
  26. data/docs/streaming.md +70 -0
  27. data/docs/technical/README.md +10 -0
  28. data/docs/technical/data-flow.md +67 -0
  29. data/docs/technical/extension-points.md +111 -0
  30. data/docs/technical/module-map.md +197 -0
  31. data/docs/technical/operational-notes.md +77 -0
  32. data/docs/upgrading.md +46 -0
  33. data/lib/llm_cost_tracker/capture_verifier.rb +71 -0
  34. data/lib/llm_cost_tracker/configuration/instrumentation.rb +1 -1
  35. data/lib/llm_cost_tracker/configuration/storage_backend.rb +26 -0
  36. data/lib/llm_cost_tracker/configuration.rb +24 -17
  37. data/lib/llm_cost_tracker/doctor/capture_check.rb +39 -0
  38. data/lib/llm_cost_tracker/doctor.rb +6 -1
  39. data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +1 -0
  40. data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +7 -1
  41. data/lib/llm_cost_tracker/integrations/anthropic.rb +51 -3
  42. data/lib/llm_cost_tracker/integrations/base.rb +77 -6
  43. data/lib/llm_cost_tracker/integrations/object_reader.rb +1 -1
  44. data/lib/llm_cost_tracker/integrations/openai.rb +78 -5
  45. data/lib/llm_cost_tracker/integrations/registry.rb +36 -4
  46. data/lib/llm_cost_tracker/integrations/ruby_llm.rb +171 -0
  47. data/lib/llm_cost_tracker/integrations/stream_tracker.rb +166 -0
  48. data/lib/llm_cost_tracker/llm_api_call.rb +2 -77
  49. data/lib/llm_cost_tracker/llm_api_call_metrics.rb +63 -0
  50. data/lib/llm_cost_tracker/middleware/faraday.rb +8 -4
  51. data/lib/llm_cost_tracker/parsers/gemini.rb +8 -1
  52. data/lib/llm_cost_tracker/parsers/openai_usage.rb +12 -3
  53. data/lib/llm_cost_tracker/price_registry.rb +3 -0
  54. data/lib/llm_cost_tracker/price_sync/fetcher.rb +41 -12
  55. data/lib/llm_cost_tracker/price_sync/registry_loader.rb +6 -0
  56. data/lib/llm_cost_tracker/pricing/effective_prices.rb +75 -0
  57. data/lib/llm_cost_tracker/pricing/explainer.rb +77 -0
  58. data/lib/llm_cost_tracker/pricing/lookup.rb +110 -0
  59. data/lib/llm_cost_tracker/pricing.rb +25 -108
  60. data/lib/llm_cost_tracker/report.rb +8 -1
  61. data/lib/llm_cost_tracker/report_data.rb +25 -9
  62. data/lib/llm_cost_tracker/retention.rb +33 -16
  63. data/lib/llm_cost_tracker/storage/active_record_backend.rb +115 -0
  64. data/lib/llm_cost_tracker/storage/active_record_rollups.rb +42 -0
  65. data/lib/llm_cost_tracker/storage/active_record_store.rb +26 -0
  66. data/lib/llm_cost_tracker/storage/custom_backend.rb +32 -0
  67. data/lib/llm_cost_tracker/storage/dispatcher.rb +11 -34
  68. data/lib/llm_cost_tracker/storage/log_backend.rb +38 -0
  69. data/lib/llm_cost_tracker/storage/registry.rb +63 -0
  70. data/lib/llm_cost_tracker/stream_capture.rb +7 -0
  71. data/lib/llm_cost_tracker/stream_collector.rb +25 -1
  72. data/lib/llm_cost_tracker/tag_sanitizer.rb +81 -0
  73. data/lib/llm_cost_tracker/tag_sql.rb +34 -0
  74. data/lib/llm_cost_tracker/tracker.rb +6 -2
  75. data/lib/llm_cost_tracker/version.rb +1 -1
  76. data/lib/llm_cost_tracker.rb +4 -0
  77. data/lib/tasks/llm_cost_tracker.rake +49 -0
  78. metadata +40 -6
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "price_freshness"
4
+ require_relative "doctor/capture_check"
4
5
 
5
6
  module LlmCostTracker
6
7
  class Doctor
@@ -38,6 +39,7 @@ module LlmCostTracker
38
39
  def checks
39
40
  [
40
41
  configuration_check,
42
+ capture_check,
41
43
  *integration_checks,
42
44
  active_record_check,
43
45
  table_check,
@@ -51,9 +53,12 @@ module LlmCostTracker
51
53
  private
52
54
 
53
55
  def configuration_check
54
- Check.new(:ok, "configuration", "storage_backend=#{LlmCostTracker.configuration.storage_backend.inspect}")
56
+ config = LlmCostTracker.configuration
57
+ Check.new(:ok, "configuration", "storage_backend=#{config.storage_backend.inspect}, enabled=#{config.enabled}")
55
58
  end
56
59
 
60
+ def capture_check = CaptureCheck.call(Check)
61
+
57
62
  def integration_checks
58
63
  LlmCostTracker::Integrations.checks.map do |check|
59
64
  Check.new(check.status, check.name.to_s, check.message)
@@ -39,6 +39,7 @@ module LlmCostTracker
39
39
 
40
40
  add_engine_require
41
41
  route %(mount LlmCostTracker::Engine => "/llm-costs")
42
+ say "Mount /llm-costs behind your app's admin auth before deploying.", :yellow
42
43
  end
43
44
 
44
45
  private
@@ -11,10 +11,16 @@ LlmCostTracker.configure do |config|
11
11
  # Tags are merged into every event. Use a callable for request/job-time context.
12
12
  config.default_tags = -> { { environment: Rails.env } }
13
13
 
14
+ # Tag guardrails keep accidental high-cardinality or sensitive values out of the ledger.
15
+ # config.max_tag_count = 50
16
+ # config.max_tag_value_bytesize = 1024
17
+ # config.redacted_tag_keys = %w[api_key access_token authorization credential password refresh_token secret]
18
+
14
19
  # Optional SDK integrations. Provider SDK gems are not installed by LLM Cost Tracker.
15
- # Enable only the SDKs your app already uses.
20
+ # Enabled integrations are checked at boot, so enable only clients your app loads.
16
21
  # config.instrument :openai
17
22
  # config.instrument :anthropic
23
+ # config.instrument :ruby_llm
18
24
 
19
25
  # Budget behavior: :notify calls on_budget_exceeded, :raise raises after recording,
20
26
  # :block_requests preflights monthly/daily budgets before supported requests.
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "base"
4
+ require_relative "stream_tracker"
4
5
 
5
6
  module LlmCostTracker
6
7
  module Integrations
@@ -10,10 +11,19 @@ module LlmCostTracker
10
11
  class << self
11
12
  def integration_name = :anthropic
12
13
 
13
- def target_patches
14
+ def minimum_version = "1.36.0"
15
+
16
+ def version_constant = "Anthropic::VERSION"
17
+
18
+ def patch_targets
14
19
  [
15
- [constant("Anthropic::Resources::Messages"), MessagesPatch],
16
- [constant("Anthropic::Resources::Beta::Messages"), MessagesPatch]
20
+ patch_target("Anthropic::Resources::Messages", with: MessagesPatch, methods: %i[create stream stream_raw]),
21
+ patch_target(
22
+ "Anthropic::Resources::Beta::Messages",
23
+ with: MessagesPatch,
24
+ methods: %i[create stream stream_raw],
25
+ optional: true
26
+ )
17
27
  ]
18
28
  end
19
29
 
@@ -55,6 +65,28 @@ module LlmCostTracker
55
65
  ObjectReader.nested(usage, :output_tokens_details, :reasoning_tokens)
56
66
  )
57
67
  end
68
+
69
+ def track_stream(stream, collector:)
70
+ return stream unless active?
71
+
72
+ StreamTracker.wrap(
73
+ stream,
74
+ collector: collector,
75
+ active: -> { active? },
76
+ finish: ->(errored:) { finish_stream(collector, errored: errored) }
77
+ )
78
+ end
79
+
80
+ def stream_collector(request)
81
+ LlmCostTracker::StreamCollector.new(
82
+ provider: "anthropic",
83
+ model: request[:model] || request["model"]
84
+ )
85
+ end
86
+
87
+ def finish_stream(collector, errored:)
88
+ record_safely { collector.finish!(errored: errored) }
89
+ end
58
90
  end
59
91
 
60
92
  module MessagesPatch
@@ -69,6 +101,22 @@ module LlmCostTracker
69
101
  )
70
102
  message
71
103
  end
104
+
105
+ def stream(*args, **kwargs)
106
+ request = LlmCostTracker::Integrations::Anthropic.request_params(args, kwargs)
107
+ collector = LlmCostTracker::Integrations::Anthropic.stream_collector(request)
108
+ LlmCostTracker::Integrations::Anthropic.enforce_budget!
109
+ stream = super
110
+ LlmCostTracker::Integrations::Anthropic.track_stream(stream, collector: collector)
111
+ end
112
+
113
+ def stream_raw(*args, **kwargs)
114
+ request = LlmCostTracker::Integrations::Anthropic.request_params(args, kwargs)
115
+ collector = LlmCostTracker::Integrations::Anthropic.stream_collector(request)
116
+ LlmCostTracker::Integrations::Anthropic.enforce_budget!
117
+ stream = super
118
+ LlmCostTracker::Integrations::Anthropic.track_stream(stream, collector: collector)
119
+ end
72
120
  end
73
121
  end
74
122
  end
@@ -6,6 +6,7 @@ require_relative "object_reader"
6
6
  module LlmCostTracker
7
7
  module Integrations
8
8
  module Base
9
+ PatchTarget = Data.define(:constant_name, :patch, :method_names, :optional)
9
10
  Result = Data.define(:name, :status, :message)
10
11
 
11
12
  def active?
@@ -13,15 +14,23 @@ module LlmCostTracker
13
14
  end
14
15
 
15
16
  def install
16
- target_patches.each { |target, patch| install_patch(target, patch) }
17
+ validate_contract!
18
+ patch_targets.each do |target|
19
+ target_class = constant(target.constant_name)
20
+ install_patch(target_class, target.patch) if target_class
21
+ end
17
22
  end
18
23
 
19
24
  def status
20
25
  name = integration_name
21
- installed = target_patches.count { |target, patch| patch_installed?(target, patch) }
22
- available = target_patches.count { |target, _patch| target }
23
- return Result.new(name, :ok, "#{name} integration installed") if installed.positive?
24
- return Result.new(name, :warn, "#{name} SDK classes are not loaded") if available.zero?
26
+ problems = contract_problems
27
+ if problems.any?
28
+ return Result.new(name, :warn, "#{name} integration cannot be installed: #{problems.join('; ')}")
29
+ end
30
+
31
+ required_targets = patch_targets.reject(&:optional)
32
+ installed = required_targets.count { |target| patch_installed?(constant(target.constant_name), target.patch) }
33
+ return Result.new(name, :ok, "#{name} integration installed") if installed == required_targets.count
25
34
 
26
35
  Result.new(name, :warn, "#{name} integration is enabled but not installed")
27
36
  end
@@ -55,10 +64,72 @@ module LlmCostTracker
55
64
  end
56
65
  end
57
66
 
67
+ def minimum_version = nil
68
+
69
+ def version_constant = nil
70
+
71
+ def patch_targets = []
72
+
73
+ def patch_target(constant_name, with:, methods:, optional: false)
74
+ PatchTarget.new(constant_name, with, Array(methods), optional)
75
+ end
76
+
58
77
  private
59
78
 
79
+ def validate_contract!
80
+ problems = contract_problems
81
+ return if problems.empty?
82
+
83
+ raise Error, "#{integration_name} integration cannot be installed: #{problems.join('; ')}"
84
+ end
85
+
86
+ def contract_problems
87
+ version_problems + target_problems
88
+ end
89
+
90
+ def version_problems
91
+ return [] unless minimum_version
92
+
93
+ name = integration_name.to_s
94
+ version = installed_version
95
+ return ["#{name} >= #{minimum_version} is required, but #{name} is not loaded"] unless version
96
+ return [] if version >= Gem::Version.new(minimum_version)
97
+
98
+ ["#{name} >= #{minimum_version} is required, detected #{version}"]
99
+ end
100
+
101
+ def installed_version
102
+ Gem.loaded_specs[integration_name.to_s]&.version || constant_version
103
+ end
104
+
105
+ def constant_version
106
+ return nil unless version_constant
107
+
108
+ value = constant(version_constant)
109
+ value ? Gem::Version.new(value.to_s) : nil
110
+ rescue ArgumentError
111
+ nil
112
+ end
113
+
114
+ def target_problems
115
+ patch_targets.flat_map do |target|
116
+ target_class = constant(target.constant_name)
117
+ next [] if target_class.nil? && target.optional
118
+ next ["#{target.constant_name} is not loaded"] unless target_class
119
+
120
+ missing_methods(target_class, target)
121
+ end
122
+ end
123
+
124
+ def missing_methods(target_class, target)
125
+ target.method_names.filter_map do |method_name|
126
+ next if target_class.method_defined?(method_name) || target_class.private_method_defined?(method_name)
127
+
128
+ "#{target.constant_name}##{method_name} is not available"
129
+ end
130
+ end
131
+
60
132
  def install_patch(target, patch)
61
- return unless target
62
133
  return if patch_installed?(target, patch)
63
134
 
64
135
  target.prepend(patch)
@@ -48,7 +48,7 @@ module LlmCostTracker
48
48
  return unless object.respond_to?(:[])
49
49
 
50
50
  object[key]
51
- rescue IndexError, TypeError, NoMethodError
51
+ rescue IndexError, NameError, TypeError
52
52
  nil
53
53
  end
54
54
  end
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "base"
4
+ require_relative "stream_tracker"
4
5
 
5
6
  module LlmCostTracker
6
7
  module Integrations
@@ -10,10 +11,22 @@ module LlmCostTracker
10
11
  class << self
11
12
  def integration_name = :openai
12
13
 
13
- def target_patches
14
+ def minimum_version = "0.59.0"
15
+
16
+ def version_constant = "OpenAI::VERSION"
17
+
18
+ def patch_targets
14
19
  [
15
- [constant("OpenAI::Resources::Responses"), ResponsesPatch],
16
- [constant("OpenAI::Resources::Chat::Completions"), ChatCompletionsPatch]
20
+ patch_target(
21
+ "OpenAI::Resources::Responses",
22
+ with: ResponsesPatch,
23
+ methods: %i[create stream stream_raw retrieve_streaming]
24
+ ),
25
+ patch_target(
26
+ "OpenAI::Resources::Chat::Completions",
27
+ with: ChatCompletionsPatch,
28
+ methods: %i[create stream_raw]
29
+ )
17
30
  ]
18
31
  end
19
32
 
@@ -28,15 +41,16 @@ module LlmCostTracker
28
41
  output_tokens = ObjectReader.first(usage, :output_tokens, :completion_tokens)
29
42
  next if input_tokens.nil? && output_tokens.nil?
30
43
 
44
+ metadata = usage_metadata(usage)
31
45
  LlmCostTracker::Tracker.record(
32
46
  provider: "openai",
33
47
  model: ObjectReader.first(response, :model) || request[:model],
34
- input_tokens: ObjectReader.integer(input_tokens),
48
+ input_tokens: regular_input_tokens(input_tokens, metadata[:cache_read_input_tokens]),
35
49
  output_tokens: ObjectReader.integer(output_tokens),
36
50
  latency_ms: latency_ms,
37
51
  usage_source: :sdk_response,
38
52
  provider_response_id: ObjectReader.first(response, :id),
39
- metadata: usage_metadata(usage)
53
+ metadata: metadata
40
54
  )
41
55
  end
42
56
  end
@@ -61,6 +75,32 @@ module LlmCostTracker
61
75
  ObjectReader.nested(usage, :completion_tokens_details, :reasoning_tokens)
62
76
  )
63
77
  end
78
+
79
+ def regular_input_tokens(input_tokens, cache_read)
80
+ [ObjectReader.integer(input_tokens) - cache_read.to_i, 0].max
81
+ end
82
+
83
+ def track_stream(stream, collector:)
84
+ return stream unless active?
85
+
86
+ StreamTracker.wrap(
87
+ stream,
88
+ collector: collector,
89
+ active: -> { active? },
90
+ finish: ->(errored:) { finish_stream(collector, errored: errored) }
91
+ )
92
+ end
93
+
94
+ def stream_collector(request)
95
+ LlmCostTracker::StreamCollector.new(
96
+ provider: "openai",
97
+ model: request[:model] || request["model"]
98
+ )
99
+ end
100
+
101
+ def finish_stream(collector, errored:)
102
+ record_safely { collector.finish!(errored: errored) }
103
+ end
64
104
  end
65
105
 
66
106
  module ResponsesPatch
@@ -75,6 +115,31 @@ module LlmCostTracker
75
115
  )
76
116
  response
77
117
  end
118
+
119
+ def stream(*args, **kwargs)
120
+ request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
121
+ collector = LlmCostTracker::Integrations::Openai.stream_collector(request)
122
+ LlmCostTracker::Integrations::Openai.enforce_budget!
123
+ stream = super
124
+ LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
125
+ end
126
+
127
+ def stream_raw(*args, **kwargs)
128
+ request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
129
+ collector = LlmCostTracker::Integrations::Openai.stream_collector(request)
130
+ LlmCostTracker::Integrations::Openai.enforce_budget!
131
+ stream = super
132
+ LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
133
+ end
134
+
135
+ def retrieve_streaming(response_id, *args, **kwargs)
136
+ request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
137
+ collector = LlmCostTracker::Integrations::Openai.stream_collector(request)
138
+ collector.provider_response_id = response_id
139
+ LlmCostTracker::Integrations::Openai.enforce_budget!
140
+ stream = super
141
+ LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
142
+ end
78
143
  end
79
144
 
80
145
  module ChatCompletionsPatch
@@ -89,6 +154,14 @@ module LlmCostTracker
89
154
  )
90
155
  response
91
156
  end
157
+
158
+ def stream_raw(*args, **kwargs)
159
+ request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
160
+ collector = LlmCostTracker::Integrations::Openai.stream_collector(request)
161
+ LlmCostTracker::Integrations::Openai.enforce_budget!
162
+ stream = super
163
+ LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
164
+ end
92
165
  end
93
166
  end
94
167
  end
@@ -1,18 +1,31 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "monitor"
4
+
5
+ require_relative "../errors"
3
6
  require_relative "openai"
4
7
  require_relative "anthropic"
8
+ require_relative "ruby_llm"
5
9
 
6
10
  module LlmCostTracker
7
11
  module Integrations
8
12
  module Registry
9
- INTEGRATIONS = {
13
+ DEFAULT_INTEGRATIONS = {
10
14
  openai: Openai,
11
- anthropic: Anthropic
15
+ anthropic: Anthropic,
16
+ ruby_llm: RubyLlm
12
17
  }.freeze
18
+ MUTEX = Monitor.new
13
19
 
14
20
  module_function
15
21
 
22
+ def register(name, integration)
23
+ key = name.to_sym
24
+ validate_integration!(integration)
25
+ MUTEX.synchronize { @integrations = integrations.merge(key => integration).freeze }
26
+ integration
27
+ end
28
+
16
29
  def install!(names = LlmCostTracker.configuration.instrumented_integrations)
17
30
  normalize(names).each { |name| fetch(name).install }
18
31
  end
@@ -28,13 +41,32 @@ module LlmCostTracker
28
41
  end
29
42
 
30
43
  def fetch(name)
31
- INTEGRATIONS.fetch(name.to_sym) do
32
- message = "Unknown integration: #{name.inspect}. Use one of: #{INTEGRATIONS.keys.join(', ')}"
44
+ integrations.fetch(name.to_sym) do
45
+ message = "Unknown integration: #{name.inspect}. Use one of: #{names.join(', ')}"
33
46
  raise LlmCostTracker::Error, message
34
47
  end
35
48
  end
49
+
50
+ def names
51
+ integrations.keys
52
+ end
53
+
54
+ def reset!
55
+ MUTEX.synchronize { @integrations = DEFAULT_INTEGRATIONS.dup.freeze }
56
+ end
57
+
58
+ def integrations
59
+ @integrations || MUTEX.synchronize { @integrations ||= DEFAULT_INTEGRATIONS.dup.freeze }
60
+ end
61
+
62
+ def validate_integration!(integration)
63
+ return if integration.respond_to?(:install) && integration.respond_to?(:status)
64
+
65
+ raise ArgumentError, "integration must respond to install and status"
66
+ end
36
67
  end
37
68
 
69
+ def self.register(name, integration) = Registry.register(name, integration)
38
70
  def self.install! = Registry.install!
39
71
  def self.checks = Registry.checks
40
72
  end
@@ -0,0 +1,171 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "base"
4
+
5
+ module LlmCostTracker
6
+ module Integrations
7
+ module RubyLlm
8
+ extend Base
9
+
10
+ class << self
11
+ def integration_name = :ruby_llm
12
+
13
+ def minimum_version = "1.14.1"
14
+
15
+ def version_constant = "RubyLLM::VERSION"
16
+
17
+ def patch_targets
18
+ [
19
+ patch_target(
20
+ "RubyLLM::Provider",
21
+ with: ProviderPatch,
22
+ methods: %i[slug complete embed transcribe]
23
+ )
24
+ ]
25
+ end
26
+
27
+ def record_completion(provider, response, request:, latency_ms:, stream:)
28
+ record_usage(
29
+ provider: provider_slug(provider),
30
+ model: response_model_id(response) || model_id(request[:model]),
31
+ response: response,
32
+ latency_ms: latency_ms,
33
+ stream: stream
34
+ )
35
+ end
36
+
37
+ def streaming_request?(request, has_block:)
38
+ has_block || request[:stream] == true
39
+ end
40
+
41
+ def record_embedding(provider, response, request:, latency_ms:)
42
+ record_usage(
43
+ provider: provider_slug(provider),
44
+ model: response_model_id(response) || model_id(request[:model]),
45
+ response: response,
46
+ latency_ms: latency_ms,
47
+ stream: false,
48
+ output_tokens: 0
49
+ )
50
+ end
51
+
52
+ def record_transcription(provider, response, request:, latency_ms:)
53
+ record_usage(
54
+ provider: provider_slug(provider),
55
+ model: response_model_id(response) || model_id(request[:model]),
56
+ response: response,
57
+ latency_ms: latency_ms,
58
+ stream: false
59
+ )
60
+ end
61
+
62
+ def record_usage(provider:, model:, response:, latency_ms:, stream:, output_tokens: nil)
63
+ return unless active?
64
+
65
+ record_safely do
66
+ input_tokens = ObjectReader.first(response, :input_tokens)
67
+ output_tokens = ObjectReader.first(response, :output_tokens) if output_tokens.nil?
68
+ next if input_tokens.nil? && output_tokens.nil?
69
+
70
+ cache_read = ObjectReader.integer(ObjectReader.first(response, :cached_tokens))
71
+
72
+ LlmCostTracker::Tracker.record(
73
+ provider: provider,
74
+ model: model,
75
+ input_tokens: regular_input_tokens(input_tokens, cache_read),
76
+ output_tokens: ObjectReader.integer(output_tokens),
77
+ latency_ms: latency_ms,
78
+ stream: stream,
79
+ usage_source: :ruby_llm,
80
+ provider_response_id: provider_response_id(response),
81
+ metadata: usage_metadata(response, cache_read)
82
+ )
83
+ end
84
+ end
85
+
86
+ def usage_metadata(response, cache_read)
87
+ {
88
+ cache_read_input_tokens: cache_read,
89
+ cache_write_input_tokens: ObjectReader.integer(ObjectReader.first(response, :cache_creation_tokens)),
90
+ hidden_output_tokens: ObjectReader.integer(
91
+ ObjectReader.first(response, :thinking_tokens, :reasoning_tokens)
92
+ )
93
+ }
94
+ end
95
+
96
+ def regular_input_tokens(input_tokens, cache_read)
97
+ [ObjectReader.integer(input_tokens) - cache_read.to_i, 0].max
98
+ end
99
+
100
+ def provider_slug(provider)
101
+ ObjectReader.first(provider, :slug).to_s
102
+ end
103
+
104
+ def model_id(object)
105
+ return nil if object.nil?
106
+
107
+ value = ObjectReader.first(object, :id, :model_id, :model)
108
+ value ||= object if object.is_a?(String) || object.is_a?(Symbol)
109
+ value&.to_s
110
+ end
111
+
112
+ def response_model_id(object)
113
+ value = ObjectReader.first(object, :model_id, :model)
114
+ value&.to_s
115
+ end
116
+
117
+ def provider_response_id(response)
118
+ ObjectReader.first(response, :id, :provider_response_id) || ObjectReader.nested(response, :raw, :id)
119
+ end
120
+ end
121
+
122
+ module ProviderPatch
123
+ def complete(*args, **kwargs, &)
124
+ integration = LlmCostTracker::Integrations::RubyLlm
125
+ request = integration.request_params(args, kwargs)
126
+ started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
127
+ integration.enforce_budget!
128
+ response = super
129
+ integration.record_completion(
130
+ self,
131
+ response,
132
+ request: request,
133
+ latency_ms: integration.elapsed_ms(started_at),
134
+ stream: integration.streaming_request?(request, has_block: block_given?)
135
+ )
136
+ response
137
+ end
138
+
139
+ def embed(*args, **kwargs)
140
+ integration = LlmCostTracker::Integrations::RubyLlm
141
+ request = integration.request_params(args, kwargs)
142
+ started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
143
+ integration.enforce_budget!
144
+ response = super
145
+ integration.record_embedding(
146
+ self,
147
+ response,
148
+ request: request,
149
+ latency_ms: integration.elapsed_ms(started_at)
150
+ )
151
+ response
152
+ end
153
+
154
+ def transcribe(*args, **kwargs)
155
+ integration = LlmCostTracker::Integrations::RubyLlm
156
+ request = integration.request_params(args, kwargs)
157
+ started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
158
+ integration.enforce_budget!
159
+ response = super
160
+ integration.record_transcription(
161
+ self,
162
+ response,
163
+ request: request,
164
+ latency_ms: integration.elapsed_ms(started_at)
165
+ )
166
+ response
167
+ end
168
+ end
169
+ end
170
+ end
171
+ end