llm_cost_tracker 0.5.1 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +43 -0
- data/README.md +18 -9
- data/app/controllers/llm_cost_tracker/calls_controller.rb +2 -1
- data/app/controllers/llm_cost_tracker/dashboard_controller.rb +3 -15
- data/app/controllers/llm_cost_tracker/tags_controller.rb +7 -6
- data/app/helpers/llm_cost_tracker/application_helper.rb +21 -6
- data/app/helpers/llm_cost_tracker/dashboard_filter_options_helper.rb +3 -1
- data/app/services/llm_cost_tracker/dashboard/date_range.rb +42 -0
- data/app/services/llm_cost_tracker/dashboard/filter.rb +6 -8
- data/app/services/llm_cost_tracker/dashboard/spend_anomaly.rb +6 -5
- data/app/services/llm_cost_tracker/dashboard/tag_breakdown.rb +74 -18
- data/app/services/llm_cost_tracker/dashboard/tag_key_explorer.rb +15 -4
- data/app/views/llm_cost_tracker/shared/_tag_chips.html.erb +1 -1
- data/app/views/llm_cost_tracker/tags/show.html.erb +4 -0
- data/docs/architecture.md +28 -0
- data/docs/budgets.md +45 -0
- data/docs/configuration.md +65 -0
- data/docs/cookbook.md +185 -0
- data/docs/dashboard-overview.png +0 -0
- data/docs/dashboard.md +38 -0
- data/docs/extending.md +32 -0
- data/docs/operations.md +44 -0
- data/docs/pricing.md +94 -0
- data/docs/querying.md +36 -0
- data/docs/streaming.md +70 -0
- data/docs/technical/README.md +10 -0
- data/docs/technical/data-flow.md +67 -0
- data/docs/technical/extension-points.md +111 -0
- data/docs/technical/module-map.md +197 -0
- data/docs/technical/operational-notes.md +77 -0
- data/docs/upgrading.md +46 -0
- data/lib/llm_cost_tracker/capture_verifier.rb +71 -0
- data/lib/llm_cost_tracker/configuration/instrumentation.rb +1 -1
- data/lib/llm_cost_tracker/configuration/storage_backend.rb +26 -0
- data/lib/llm_cost_tracker/configuration.rb +24 -17
- data/lib/llm_cost_tracker/doctor/capture_check.rb +39 -0
- data/lib/llm_cost_tracker/doctor.rb +6 -1
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +1 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +7 -1
- data/lib/llm_cost_tracker/integrations/anthropic.rb +51 -3
- data/lib/llm_cost_tracker/integrations/base.rb +77 -6
- data/lib/llm_cost_tracker/integrations/object_reader.rb +1 -1
- data/lib/llm_cost_tracker/integrations/openai.rb +78 -5
- data/lib/llm_cost_tracker/integrations/registry.rb +36 -4
- data/lib/llm_cost_tracker/integrations/ruby_llm.rb +171 -0
- data/lib/llm_cost_tracker/integrations/stream_tracker.rb +166 -0
- data/lib/llm_cost_tracker/llm_api_call.rb +2 -77
- data/lib/llm_cost_tracker/llm_api_call_metrics.rb +63 -0
- data/lib/llm_cost_tracker/middleware/faraday.rb +8 -4
- data/lib/llm_cost_tracker/parsers/gemini.rb +8 -1
- data/lib/llm_cost_tracker/parsers/openai_usage.rb +12 -3
- data/lib/llm_cost_tracker/price_registry.rb +3 -0
- data/lib/llm_cost_tracker/price_sync/fetcher.rb +41 -12
- data/lib/llm_cost_tracker/price_sync/registry_loader.rb +6 -0
- data/lib/llm_cost_tracker/pricing/effective_prices.rb +75 -0
- data/lib/llm_cost_tracker/pricing/explainer.rb +77 -0
- data/lib/llm_cost_tracker/pricing/lookup.rb +110 -0
- data/lib/llm_cost_tracker/pricing.rb +25 -108
- data/lib/llm_cost_tracker/report.rb +8 -1
- data/lib/llm_cost_tracker/report_data.rb +25 -9
- data/lib/llm_cost_tracker/retention.rb +33 -16
- data/lib/llm_cost_tracker/storage/active_record_backend.rb +115 -0
- data/lib/llm_cost_tracker/storage/active_record_rollups.rb +42 -0
- data/lib/llm_cost_tracker/storage/active_record_store.rb +26 -0
- data/lib/llm_cost_tracker/storage/custom_backend.rb +32 -0
- data/lib/llm_cost_tracker/storage/dispatcher.rb +11 -34
- data/lib/llm_cost_tracker/storage/log_backend.rb +38 -0
- data/lib/llm_cost_tracker/storage/registry.rb +63 -0
- data/lib/llm_cost_tracker/stream_capture.rb +7 -0
- data/lib/llm_cost_tracker/stream_collector.rb +25 -1
- data/lib/llm_cost_tracker/tag_sanitizer.rb +81 -0
- data/lib/llm_cost_tracker/tag_sql.rb +34 -0
- data/lib/llm_cost_tracker/tracker.rb +6 -2
- data/lib/llm_cost_tracker/version.rb +1 -1
- data/lib/llm_cost_tracker.rb +4 -0
- data/lib/tasks/llm_cost_tracker.rake +49 -0
- metadata +40 -6
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative "price_freshness"
|
|
4
|
+
require_relative "doctor/capture_check"
|
|
4
5
|
|
|
5
6
|
module LlmCostTracker
|
|
6
7
|
class Doctor
|
|
@@ -38,6 +39,7 @@ module LlmCostTracker
|
|
|
38
39
|
def checks
|
|
39
40
|
[
|
|
40
41
|
configuration_check,
|
|
42
|
+
capture_check,
|
|
41
43
|
*integration_checks,
|
|
42
44
|
active_record_check,
|
|
43
45
|
table_check,
|
|
@@ -51,9 +53,12 @@ module LlmCostTracker
|
|
|
51
53
|
private
|
|
52
54
|
|
|
53
55
|
def configuration_check
|
|
54
|
-
|
|
56
|
+
config = LlmCostTracker.configuration
|
|
57
|
+
Check.new(:ok, "configuration", "storage_backend=#{config.storage_backend.inspect}, enabled=#{config.enabled}")
|
|
55
58
|
end
|
|
56
59
|
|
|
60
|
+
def capture_check = CaptureCheck.call(Check)
|
|
61
|
+
|
|
57
62
|
def integration_checks
|
|
58
63
|
LlmCostTracker::Integrations.checks.map do |check|
|
|
59
64
|
Check.new(check.status, check.name.to_s, check.message)
|
|
@@ -11,10 +11,16 @@ LlmCostTracker.configure do |config|
|
|
|
11
11
|
# Tags are merged into every event. Use a callable for request/job-time context.
|
|
12
12
|
config.default_tags = -> { { environment: Rails.env } }
|
|
13
13
|
|
|
14
|
+
# Tag guardrails keep accidental high-cardinality or sensitive values out of the ledger.
|
|
15
|
+
# config.max_tag_count = 50
|
|
16
|
+
# config.max_tag_value_bytesize = 1024
|
|
17
|
+
# config.redacted_tag_keys = %w[api_key access_token authorization credential password refresh_token secret]
|
|
18
|
+
|
|
14
19
|
# Optional SDK integrations. Provider SDK gems are not installed by LLM Cost Tracker.
|
|
15
|
-
#
|
|
20
|
+
# Enabled integrations are checked at boot, so enable only clients your app loads.
|
|
16
21
|
# config.instrument :openai
|
|
17
22
|
# config.instrument :anthropic
|
|
23
|
+
# config.instrument :ruby_llm
|
|
18
24
|
|
|
19
25
|
# Budget behavior: :notify calls on_budget_exceeded, :raise raises after recording,
|
|
20
26
|
# :block_requests preflights monthly/daily budgets before supported requests.
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative "base"
|
|
4
|
+
require_relative "stream_tracker"
|
|
4
5
|
|
|
5
6
|
module LlmCostTracker
|
|
6
7
|
module Integrations
|
|
@@ -10,10 +11,19 @@ module LlmCostTracker
|
|
|
10
11
|
class << self
|
|
11
12
|
def integration_name = :anthropic
|
|
12
13
|
|
|
13
|
-
def
|
|
14
|
+
def minimum_version = "1.36.0"
|
|
15
|
+
|
|
16
|
+
def version_constant = "Anthropic::VERSION"
|
|
17
|
+
|
|
18
|
+
def patch_targets
|
|
14
19
|
[
|
|
15
|
-
|
|
16
|
-
|
|
20
|
+
patch_target("Anthropic::Resources::Messages", with: MessagesPatch, methods: %i[create stream stream_raw]),
|
|
21
|
+
patch_target(
|
|
22
|
+
"Anthropic::Resources::Beta::Messages",
|
|
23
|
+
with: MessagesPatch,
|
|
24
|
+
methods: %i[create stream stream_raw],
|
|
25
|
+
optional: true
|
|
26
|
+
)
|
|
17
27
|
]
|
|
18
28
|
end
|
|
19
29
|
|
|
@@ -55,6 +65,28 @@ module LlmCostTracker
|
|
|
55
65
|
ObjectReader.nested(usage, :output_tokens_details, :reasoning_tokens)
|
|
56
66
|
)
|
|
57
67
|
end
|
|
68
|
+
|
|
69
|
+
def track_stream(stream, collector:)
|
|
70
|
+
return stream unless active?
|
|
71
|
+
|
|
72
|
+
StreamTracker.wrap(
|
|
73
|
+
stream,
|
|
74
|
+
collector: collector,
|
|
75
|
+
active: -> { active? },
|
|
76
|
+
finish: ->(errored:) { finish_stream(collector, errored: errored) }
|
|
77
|
+
)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def stream_collector(request)
|
|
81
|
+
LlmCostTracker::StreamCollector.new(
|
|
82
|
+
provider: "anthropic",
|
|
83
|
+
model: request[:model] || request["model"]
|
|
84
|
+
)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def finish_stream(collector, errored:)
|
|
88
|
+
record_safely { collector.finish!(errored: errored) }
|
|
89
|
+
end
|
|
58
90
|
end
|
|
59
91
|
|
|
60
92
|
module MessagesPatch
|
|
@@ -69,6 +101,22 @@ module LlmCostTracker
|
|
|
69
101
|
)
|
|
70
102
|
message
|
|
71
103
|
end
|
|
104
|
+
|
|
105
|
+
def stream(*args, **kwargs)
|
|
106
|
+
request = LlmCostTracker::Integrations::Anthropic.request_params(args, kwargs)
|
|
107
|
+
collector = LlmCostTracker::Integrations::Anthropic.stream_collector(request)
|
|
108
|
+
LlmCostTracker::Integrations::Anthropic.enforce_budget!
|
|
109
|
+
stream = super
|
|
110
|
+
LlmCostTracker::Integrations::Anthropic.track_stream(stream, collector: collector)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def stream_raw(*args, **kwargs)
|
|
114
|
+
request = LlmCostTracker::Integrations::Anthropic.request_params(args, kwargs)
|
|
115
|
+
collector = LlmCostTracker::Integrations::Anthropic.stream_collector(request)
|
|
116
|
+
LlmCostTracker::Integrations::Anthropic.enforce_budget!
|
|
117
|
+
stream = super
|
|
118
|
+
LlmCostTracker::Integrations::Anthropic.track_stream(stream, collector: collector)
|
|
119
|
+
end
|
|
72
120
|
end
|
|
73
121
|
end
|
|
74
122
|
end
|
|
@@ -6,6 +6,7 @@ require_relative "object_reader"
|
|
|
6
6
|
module LlmCostTracker
|
|
7
7
|
module Integrations
|
|
8
8
|
module Base
|
|
9
|
+
PatchTarget = Data.define(:constant_name, :patch, :method_names, :optional)
|
|
9
10
|
Result = Data.define(:name, :status, :message)
|
|
10
11
|
|
|
11
12
|
def active?
|
|
@@ -13,15 +14,23 @@ module LlmCostTracker
|
|
|
13
14
|
end
|
|
14
15
|
|
|
15
16
|
def install
|
|
16
|
-
|
|
17
|
+
validate_contract!
|
|
18
|
+
patch_targets.each do |target|
|
|
19
|
+
target_class = constant(target.constant_name)
|
|
20
|
+
install_patch(target_class, target.patch) if target_class
|
|
21
|
+
end
|
|
17
22
|
end
|
|
18
23
|
|
|
19
24
|
def status
|
|
20
25
|
name = integration_name
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
26
|
+
problems = contract_problems
|
|
27
|
+
if problems.any?
|
|
28
|
+
return Result.new(name, :warn, "#{name} integration cannot be installed: #{problems.join('; ')}")
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
required_targets = patch_targets.reject(&:optional)
|
|
32
|
+
installed = required_targets.count { |target| patch_installed?(constant(target.constant_name), target.patch) }
|
|
33
|
+
return Result.new(name, :ok, "#{name} integration installed") if installed == required_targets.count
|
|
25
34
|
|
|
26
35
|
Result.new(name, :warn, "#{name} integration is enabled but not installed")
|
|
27
36
|
end
|
|
@@ -55,10 +64,72 @@ module LlmCostTracker
|
|
|
55
64
|
end
|
|
56
65
|
end
|
|
57
66
|
|
|
67
|
+
def minimum_version = nil
|
|
68
|
+
|
|
69
|
+
def version_constant = nil
|
|
70
|
+
|
|
71
|
+
def patch_targets = []
|
|
72
|
+
|
|
73
|
+
def patch_target(constant_name, with:, methods:, optional: false)
|
|
74
|
+
PatchTarget.new(constant_name, with, Array(methods), optional)
|
|
75
|
+
end
|
|
76
|
+
|
|
58
77
|
private
|
|
59
78
|
|
|
79
|
+
def validate_contract!
|
|
80
|
+
problems = contract_problems
|
|
81
|
+
return if problems.empty?
|
|
82
|
+
|
|
83
|
+
raise Error, "#{integration_name} integration cannot be installed: #{problems.join('; ')}"
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def contract_problems
|
|
87
|
+
version_problems + target_problems
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def version_problems
|
|
91
|
+
return [] unless minimum_version
|
|
92
|
+
|
|
93
|
+
name = integration_name.to_s
|
|
94
|
+
version = installed_version
|
|
95
|
+
return ["#{name} >= #{minimum_version} is required, but #{name} is not loaded"] unless version
|
|
96
|
+
return [] if version >= Gem::Version.new(minimum_version)
|
|
97
|
+
|
|
98
|
+
["#{name} >= #{minimum_version} is required, detected #{version}"]
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def installed_version
|
|
102
|
+
Gem.loaded_specs[integration_name.to_s]&.version || constant_version
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def constant_version
|
|
106
|
+
return nil unless version_constant
|
|
107
|
+
|
|
108
|
+
value = constant(version_constant)
|
|
109
|
+
value ? Gem::Version.new(value.to_s) : nil
|
|
110
|
+
rescue ArgumentError
|
|
111
|
+
nil
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def target_problems
|
|
115
|
+
patch_targets.flat_map do |target|
|
|
116
|
+
target_class = constant(target.constant_name)
|
|
117
|
+
next [] if target_class.nil? && target.optional
|
|
118
|
+
next ["#{target.constant_name} is not loaded"] unless target_class
|
|
119
|
+
|
|
120
|
+
missing_methods(target_class, target)
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def missing_methods(target_class, target)
|
|
125
|
+
target.method_names.filter_map do |method_name|
|
|
126
|
+
next if target_class.method_defined?(method_name) || target_class.private_method_defined?(method_name)
|
|
127
|
+
|
|
128
|
+
"#{target.constant_name}##{method_name} is not available"
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
60
132
|
def install_patch(target, patch)
|
|
61
|
-
return unless target
|
|
62
133
|
return if patch_installed?(target, patch)
|
|
63
134
|
|
|
64
135
|
target.prepend(patch)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative "base"
|
|
4
|
+
require_relative "stream_tracker"
|
|
4
5
|
|
|
5
6
|
module LlmCostTracker
|
|
6
7
|
module Integrations
|
|
@@ -10,10 +11,22 @@ module LlmCostTracker
|
|
|
10
11
|
class << self
|
|
11
12
|
def integration_name = :openai
|
|
12
13
|
|
|
13
|
-
def
|
|
14
|
+
def minimum_version = "0.59.0"
|
|
15
|
+
|
|
16
|
+
def version_constant = "OpenAI::VERSION"
|
|
17
|
+
|
|
18
|
+
def patch_targets
|
|
14
19
|
[
|
|
15
|
-
|
|
16
|
-
|
|
20
|
+
patch_target(
|
|
21
|
+
"OpenAI::Resources::Responses",
|
|
22
|
+
with: ResponsesPatch,
|
|
23
|
+
methods: %i[create stream stream_raw retrieve_streaming]
|
|
24
|
+
),
|
|
25
|
+
patch_target(
|
|
26
|
+
"OpenAI::Resources::Chat::Completions",
|
|
27
|
+
with: ChatCompletionsPatch,
|
|
28
|
+
methods: %i[create stream_raw]
|
|
29
|
+
)
|
|
17
30
|
]
|
|
18
31
|
end
|
|
19
32
|
|
|
@@ -28,15 +41,16 @@ module LlmCostTracker
|
|
|
28
41
|
output_tokens = ObjectReader.first(usage, :output_tokens, :completion_tokens)
|
|
29
42
|
next if input_tokens.nil? && output_tokens.nil?
|
|
30
43
|
|
|
44
|
+
metadata = usage_metadata(usage)
|
|
31
45
|
LlmCostTracker::Tracker.record(
|
|
32
46
|
provider: "openai",
|
|
33
47
|
model: ObjectReader.first(response, :model) || request[:model],
|
|
34
|
-
input_tokens:
|
|
48
|
+
input_tokens: regular_input_tokens(input_tokens, metadata[:cache_read_input_tokens]),
|
|
35
49
|
output_tokens: ObjectReader.integer(output_tokens),
|
|
36
50
|
latency_ms: latency_ms,
|
|
37
51
|
usage_source: :sdk_response,
|
|
38
52
|
provider_response_id: ObjectReader.first(response, :id),
|
|
39
|
-
metadata:
|
|
53
|
+
metadata: metadata
|
|
40
54
|
)
|
|
41
55
|
end
|
|
42
56
|
end
|
|
@@ -61,6 +75,32 @@ module LlmCostTracker
|
|
|
61
75
|
ObjectReader.nested(usage, :completion_tokens_details, :reasoning_tokens)
|
|
62
76
|
)
|
|
63
77
|
end
|
|
78
|
+
|
|
79
|
+
def regular_input_tokens(input_tokens, cache_read)
|
|
80
|
+
[ObjectReader.integer(input_tokens) - cache_read.to_i, 0].max
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def track_stream(stream, collector:)
|
|
84
|
+
return stream unless active?
|
|
85
|
+
|
|
86
|
+
StreamTracker.wrap(
|
|
87
|
+
stream,
|
|
88
|
+
collector: collector,
|
|
89
|
+
active: -> { active? },
|
|
90
|
+
finish: ->(errored:) { finish_stream(collector, errored: errored) }
|
|
91
|
+
)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def stream_collector(request)
|
|
95
|
+
LlmCostTracker::StreamCollector.new(
|
|
96
|
+
provider: "openai",
|
|
97
|
+
model: request[:model] || request["model"]
|
|
98
|
+
)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def finish_stream(collector, errored:)
|
|
102
|
+
record_safely { collector.finish!(errored: errored) }
|
|
103
|
+
end
|
|
64
104
|
end
|
|
65
105
|
|
|
66
106
|
module ResponsesPatch
|
|
@@ -75,6 +115,31 @@ module LlmCostTracker
|
|
|
75
115
|
)
|
|
76
116
|
response
|
|
77
117
|
end
|
|
118
|
+
|
|
119
|
+
def stream(*args, **kwargs)
|
|
120
|
+
request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
|
|
121
|
+
collector = LlmCostTracker::Integrations::Openai.stream_collector(request)
|
|
122
|
+
LlmCostTracker::Integrations::Openai.enforce_budget!
|
|
123
|
+
stream = super
|
|
124
|
+
LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def stream_raw(*args, **kwargs)
|
|
128
|
+
request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
|
|
129
|
+
collector = LlmCostTracker::Integrations::Openai.stream_collector(request)
|
|
130
|
+
LlmCostTracker::Integrations::Openai.enforce_budget!
|
|
131
|
+
stream = super
|
|
132
|
+
LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def retrieve_streaming(response_id, *args, **kwargs)
|
|
136
|
+
request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
|
|
137
|
+
collector = LlmCostTracker::Integrations::Openai.stream_collector(request)
|
|
138
|
+
collector.provider_response_id = response_id
|
|
139
|
+
LlmCostTracker::Integrations::Openai.enforce_budget!
|
|
140
|
+
stream = super
|
|
141
|
+
LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
|
|
142
|
+
end
|
|
78
143
|
end
|
|
79
144
|
|
|
80
145
|
module ChatCompletionsPatch
|
|
@@ -89,6 +154,14 @@ module LlmCostTracker
|
|
|
89
154
|
)
|
|
90
155
|
response
|
|
91
156
|
end
|
|
157
|
+
|
|
158
|
+
def stream_raw(*args, **kwargs)
|
|
159
|
+
request = LlmCostTracker::Integrations::Openai.request_params(args, kwargs)
|
|
160
|
+
collector = LlmCostTracker::Integrations::Openai.stream_collector(request)
|
|
161
|
+
LlmCostTracker::Integrations::Openai.enforce_budget!
|
|
162
|
+
stream = super
|
|
163
|
+
LlmCostTracker::Integrations::Openai.track_stream(stream, collector: collector)
|
|
164
|
+
end
|
|
92
165
|
end
|
|
93
166
|
end
|
|
94
167
|
end
|
|
@@ -1,18 +1,31 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "monitor"
|
|
4
|
+
|
|
5
|
+
require_relative "../errors"
|
|
3
6
|
require_relative "openai"
|
|
4
7
|
require_relative "anthropic"
|
|
8
|
+
require_relative "ruby_llm"
|
|
5
9
|
|
|
6
10
|
module LlmCostTracker
|
|
7
11
|
module Integrations
|
|
8
12
|
module Registry
|
|
9
|
-
|
|
13
|
+
DEFAULT_INTEGRATIONS = {
|
|
10
14
|
openai: Openai,
|
|
11
|
-
anthropic: Anthropic
|
|
15
|
+
anthropic: Anthropic,
|
|
16
|
+
ruby_llm: RubyLlm
|
|
12
17
|
}.freeze
|
|
18
|
+
MUTEX = Monitor.new
|
|
13
19
|
|
|
14
20
|
module_function
|
|
15
21
|
|
|
22
|
+
def register(name, integration)
|
|
23
|
+
key = name.to_sym
|
|
24
|
+
validate_integration!(integration)
|
|
25
|
+
MUTEX.synchronize { @integrations = integrations.merge(key => integration).freeze }
|
|
26
|
+
integration
|
|
27
|
+
end
|
|
28
|
+
|
|
16
29
|
def install!(names = LlmCostTracker.configuration.instrumented_integrations)
|
|
17
30
|
normalize(names).each { |name| fetch(name).install }
|
|
18
31
|
end
|
|
@@ -28,13 +41,32 @@ module LlmCostTracker
|
|
|
28
41
|
end
|
|
29
42
|
|
|
30
43
|
def fetch(name)
|
|
31
|
-
|
|
32
|
-
message = "Unknown integration: #{name.inspect}. Use one of: #{
|
|
44
|
+
integrations.fetch(name.to_sym) do
|
|
45
|
+
message = "Unknown integration: #{name.inspect}. Use one of: #{names.join(', ')}"
|
|
33
46
|
raise LlmCostTracker::Error, message
|
|
34
47
|
end
|
|
35
48
|
end
|
|
49
|
+
|
|
50
|
+
def names
|
|
51
|
+
integrations.keys
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def reset!
|
|
55
|
+
MUTEX.synchronize { @integrations = DEFAULT_INTEGRATIONS.dup.freeze }
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def integrations
|
|
59
|
+
@integrations || MUTEX.synchronize { @integrations ||= DEFAULT_INTEGRATIONS.dup.freeze }
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def validate_integration!(integration)
|
|
63
|
+
return if integration.respond_to?(:install) && integration.respond_to?(:status)
|
|
64
|
+
|
|
65
|
+
raise ArgumentError, "integration must respond to install and status"
|
|
66
|
+
end
|
|
36
67
|
end
|
|
37
68
|
|
|
69
|
+
def self.register(name, integration) = Registry.register(name, integration)
|
|
38
70
|
def self.install! = Registry.install!
|
|
39
71
|
def self.checks = Registry.checks
|
|
40
72
|
end
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "base"
|
|
4
|
+
|
|
5
|
+
module LlmCostTracker
|
|
6
|
+
module Integrations
|
|
7
|
+
module RubyLlm
|
|
8
|
+
extend Base
|
|
9
|
+
|
|
10
|
+
class << self
|
|
11
|
+
def integration_name = :ruby_llm
|
|
12
|
+
|
|
13
|
+
def minimum_version = "1.14.1"
|
|
14
|
+
|
|
15
|
+
def version_constant = "RubyLLM::VERSION"
|
|
16
|
+
|
|
17
|
+
def patch_targets
|
|
18
|
+
[
|
|
19
|
+
patch_target(
|
|
20
|
+
"RubyLLM::Provider",
|
|
21
|
+
with: ProviderPatch,
|
|
22
|
+
methods: %i[slug complete embed transcribe]
|
|
23
|
+
)
|
|
24
|
+
]
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def record_completion(provider, response, request:, latency_ms:, stream:)
|
|
28
|
+
record_usage(
|
|
29
|
+
provider: provider_slug(provider),
|
|
30
|
+
model: response_model_id(response) || model_id(request[:model]),
|
|
31
|
+
response: response,
|
|
32
|
+
latency_ms: latency_ms,
|
|
33
|
+
stream: stream
|
|
34
|
+
)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def streaming_request?(request, has_block:)
|
|
38
|
+
has_block || request[:stream] == true
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def record_embedding(provider, response, request:, latency_ms:)
|
|
42
|
+
record_usage(
|
|
43
|
+
provider: provider_slug(provider),
|
|
44
|
+
model: response_model_id(response) || model_id(request[:model]),
|
|
45
|
+
response: response,
|
|
46
|
+
latency_ms: latency_ms,
|
|
47
|
+
stream: false,
|
|
48
|
+
output_tokens: 0
|
|
49
|
+
)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def record_transcription(provider, response, request:, latency_ms:)
|
|
53
|
+
record_usage(
|
|
54
|
+
provider: provider_slug(provider),
|
|
55
|
+
model: response_model_id(response) || model_id(request[:model]),
|
|
56
|
+
response: response,
|
|
57
|
+
latency_ms: latency_ms,
|
|
58
|
+
stream: false
|
|
59
|
+
)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def record_usage(provider:, model:, response:, latency_ms:, stream:, output_tokens: nil)
|
|
63
|
+
return unless active?
|
|
64
|
+
|
|
65
|
+
record_safely do
|
|
66
|
+
input_tokens = ObjectReader.first(response, :input_tokens)
|
|
67
|
+
output_tokens = ObjectReader.first(response, :output_tokens) if output_tokens.nil?
|
|
68
|
+
next if input_tokens.nil? && output_tokens.nil?
|
|
69
|
+
|
|
70
|
+
cache_read = ObjectReader.integer(ObjectReader.first(response, :cached_tokens))
|
|
71
|
+
|
|
72
|
+
LlmCostTracker::Tracker.record(
|
|
73
|
+
provider: provider,
|
|
74
|
+
model: model,
|
|
75
|
+
input_tokens: regular_input_tokens(input_tokens, cache_read),
|
|
76
|
+
output_tokens: ObjectReader.integer(output_tokens),
|
|
77
|
+
latency_ms: latency_ms,
|
|
78
|
+
stream: stream,
|
|
79
|
+
usage_source: :ruby_llm,
|
|
80
|
+
provider_response_id: provider_response_id(response),
|
|
81
|
+
metadata: usage_metadata(response, cache_read)
|
|
82
|
+
)
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def usage_metadata(response, cache_read)
|
|
87
|
+
{
|
|
88
|
+
cache_read_input_tokens: cache_read,
|
|
89
|
+
cache_write_input_tokens: ObjectReader.integer(ObjectReader.first(response, :cache_creation_tokens)),
|
|
90
|
+
hidden_output_tokens: ObjectReader.integer(
|
|
91
|
+
ObjectReader.first(response, :thinking_tokens, :reasoning_tokens)
|
|
92
|
+
)
|
|
93
|
+
}
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def regular_input_tokens(input_tokens, cache_read)
|
|
97
|
+
[ObjectReader.integer(input_tokens) - cache_read.to_i, 0].max
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def provider_slug(provider)
|
|
101
|
+
ObjectReader.first(provider, :slug).to_s
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def model_id(object)
|
|
105
|
+
return nil if object.nil?
|
|
106
|
+
|
|
107
|
+
value = ObjectReader.first(object, :id, :model_id, :model)
|
|
108
|
+
value ||= object if object.is_a?(String) || object.is_a?(Symbol)
|
|
109
|
+
value&.to_s
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def response_model_id(object)
|
|
113
|
+
value = ObjectReader.first(object, :model_id, :model)
|
|
114
|
+
value&.to_s
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def provider_response_id(response)
|
|
118
|
+
ObjectReader.first(response, :id, :provider_response_id) || ObjectReader.nested(response, :raw, :id)
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
module ProviderPatch
|
|
123
|
+
def complete(*args, **kwargs, &)
|
|
124
|
+
integration = LlmCostTracker::Integrations::RubyLlm
|
|
125
|
+
request = integration.request_params(args, kwargs)
|
|
126
|
+
started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
127
|
+
integration.enforce_budget!
|
|
128
|
+
response = super
|
|
129
|
+
integration.record_completion(
|
|
130
|
+
self,
|
|
131
|
+
response,
|
|
132
|
+
request: request,
|
|
133
|
+
latency_ms: integration.elapsed_ms(started_at),
|
|
134
|
+
stream: integration.streaming_request?(request, has_block: block_given?)
|
|
135
|
+
)
|
|
136
|
+
response
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def embed(*args, **kwargs)
|
|
140
|
+
integration = LlmCostTracker::Integrations::RubyLlm
|
|
141
|
+
request = integration.request_params(args, kwargs)
|
|
142
|
+
started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
143
|
+
integration.enforce_budget!
|
|
144
|
+
response = super
|
|
145
|
+
integration.record_embedding(
|
|
146
|
+
self,
|
|
147
|
+
response,
|
|
148
|
+
request: request,
|
|
149
|
+
latency_ms: integration.elapsed_ms(started_at)
|
|
150
|
+
)
|
|
151
|
+
response
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def transcribe(*args, **kwargs)
|
|
155
|
+
integration = LlmCostTracker::Integrations::RubyLlm
|
|
156
|
+
request = integration.request_params(args, kwargs)
|
|
157
|
+
started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
158
|
+
integration.enforce_budget!
|
|
159
|
+
response = super
|
|
160
|
+
integration.record_transcription(
|
|
161
|
+
self,
|
|
162
|
+
response,
|
|
163
|
+
request: request,
|
|
164
|
+
latency_ms: integration.elapsed_ms(started_at)
|
|
165
|
+
)
|
|
166
|
+
response
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
end
|