llm_cost_tracker 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +35 -0
- data/README.md +195 -109
- data/app/services/llm_cost_tracker/dashboard/data_quality.rb +46 -55
- data/app/services/llm_cost_tracker/dashboard/data_quality_aggregate.rb +81 -0
- data/lib/llm_cost_tracker/budget.rb +34 -37
- data/lib/llm_cost_tracker/configuration/instrumentation.rb +37 -0
- data/lib/llm_cost_tracker/configuration.rb +10 -5
- data/lib/llm_cost_tracker/doctor.rb +166 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +33 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/prices_generator.rb +12 -6
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_period_totals_to_llm_cost_tracker.rb.erb +38 -8
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +1 -2
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +53 -21
- data/lib/llm_cost_tracker/integrations/anthropic.rb +75 -0
- data/lib/llm_cost_tracker/integrations/base.rb +72 -0
- data/lib/llm_cost_tracker/integrations/object_reader.rb +56 -0
- data/lib/llm_cost_tracker/integrations/openai.rb +95 -0
- data/lib/llm_cost_tracker/integrations/registry.rb +41 -0
- data/lib/llm_cost_tracker/middleware/faraday.rb +4 -3
- data/lib/llm_cost_tracker/parsed_usage.rb +8 -1
- data/lib/llm_cost_tracker/parsers/anthropic.rb +17 -49
- data/lib/llm_cost_tracker/parsers/base.rb +80 -0
- data/lib/llm_cost_tracker/parsers/gemini.rb +12 -35
- data/lib/llm_cost_tracker/parsers/openai.rb +1 -6
- data/lib/llm_cost_tracker/parsers/openai_compatible.rb +6 -15
- data/lib/llm_cost_tracker/parsers/openai_usage.rb +8 -30
- data/lib/llm_cost_tracker/parsers/registry.rb +17 -2
- data/lib/llm_cost_tracker/price_freshness.rb +38 -0
- data/lib/llm_cost_tracker/price_registry.rb +14 -0
- data/lib/llm_cost_tracker/price_sync/fetcher.rb +2 -1
- data/lib/llm_cost_tracker/price_sync/refresh_plan_builder.rb +4 -2
- data/lib/llm_cost_tracker/price_sync.rb +10 -0
- data/lib/llm_cost_tracker/prices.json +394 -41
- data/lib/llm_cost_tracker/pricing.rb +8 -1
- data/lib/llm_cost_tracker/request_url.rb +20 -0
- data/lib/llm_cost_tracker/storage/active_record_rollups.rb +47 -27
- data/lib/llm_cost_tracker/storage/active_record_store.rb +4 -0
- data/lib/llm_cost_tracker/stream_collector.rb +3 -3
- data/lib/llm_cost_tracker/tag_context.rb +52 -0
- data/lib/llm_cost_tracker/tags_column.rb +62 -24
- data/lib/llm_cost_tracker/tracker.rb +5 -2
- data/lib/llm_cost_tracker/version.rb +1 -1
- data/lib/llm_cost_tracker.rb +14 -4
- data/lib/tasks/llm_cost_tracker.rake +21 -3
- metadata +13 -3
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/llm_cost_tracker_prices.yml.erb +0 -51
|
@@ -2,17 +2,23 @@
|
|
|
2
2
|
|
|
3
3
|
require "rails/generators"
|
|
4
4
|
|
|
5
|
+
require_relative "../../price_registry"
|
|
6
|
+
require_relative "../../price_sync/registry_loader"
|
|
7
|
+
require_relative "../../price_sync/registry_writer"
|
|
8
|
+
|
|
5
9
|
module LlmCostTracker
|
|
6
10
|
module Generators
|
|
7
11
|
class PricesGenerator < Rails::Generators::Base
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
desc "Creates a local LlmCostTracker price override file"
|
|
12
|
+
desc "Creates a local LLM Cost Tracker price snapshot"
|
|
11
13
|
|
|
12
14
|
def create_prices_file
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
15
|
+
registry = LlmCostTracker::PriceSync::RegistryLoader.new.call(
|
|
16
|
+
path: LlmCostTracker::PriceRegistry::DEFAULT_PRICES_PATH,
|
|
17
|
+
seed_path: LlmCostTracker::PriceRegistry::DEFAULT_PRICES_PATH
|
|
18
|
+
)
|
|
19
|
+
LlmCostTracker::PriceSync::RegistryWriter.new.call(
|
|
20
|
+
path: File.join(destination_root, "config/llm_cost_tracker_prices.yml"),
|
|
21
|
+
registry: registry
|
|
16
22
|
)
|
|
17
23
|
end
|
|
18
24
|
end
|
|
@@ -8,10 +8,10 @@ class AddPeriodTotalsToLlmCostTracker < ActiveRecord::Migration<%= migration_ver
|
|
|
8
8
|
t.timestamps
|
|
9
9
|
end unless table_exists?(:llm_cost_tracker_period_totals)
|
|
10
10
|
|
|
11
|
+
backfill_period_totals
|
|
12
|
+
|
|
11
13
|
add_index :llm_cost_tracker_period_totals, [:period, :period_start],
|
|
12
14
|
unique: true unless index_exists?(:llm_cost_tracker_period_totals, [:period, :period_start])
|
|
13
|
-
|
|
14
|
-
backfill_period_totals
|
|
15
15
|
end
|
|
16
16
|
|
|
17
17
|
def down
|
|
@@ -22,23 +22,53 @@ class AddPeriodTotalsToLlmCostTracker < ActiveRecord::Migration<%= migration_ver
|
|
|
22
22
|
private
|
|
23
23
|
|
|
24
24
|
def backfill_period_totals
|
|
25
|
+
backfill_legacy_monthly_totals if table_exists?(:llm_cost_tracker_monthly_totals)
|
|
25
26
|
return unless table_exists?(:llm_api_calls)
|
|
26
27
|
|
|
27
28
|
backfill_period_total("day", day_bucket_sql)
|
|
28
29
|
backfill_period_total("month", month_bucket_sql)
|
|
29
30
|
end
|
|
30
31
|
|
|
32
|
+
def backfill_legacy_monthly_totals
|
|
33
|
+
execute <<~SQL
|
|
34
|
+
INSERT INTO llm_cost_tracker_period_totals (period, period_start, total_cost, created_at, updated_at)
|
|
35
|
+
SELECT #{connection.quote("month")} AS period,
|
|
36
|
+
month AS period_start,
|
|
37
|
+
total_cost,
|
|
38
|
+
CURRENT_TIMESTAMP,
|
|
39
|
+
CURRENT_TIMESTAMP
|
|
40
|
+
FROM llm_cost_tracker_monthly_totals legacy
|
|
41
|
+
WHERE NOT EXISTS (
|
|
42
|
+
SELECT 1
|
|
43
|
+
FROM llm_cost_tracker_period_totals existing
|
|
44
|
+
WHERE existing.period = #{connection.quote("month")}
|
|
45
|
+
AND existing.period_start = legacy.month
|
|
46
|
+
)
|
|
47
|
+
SQL
|
|
48
|
+
end
|
|
49
|
+
|
|
31
50
|
def backfill_period_total(period, bucket_sql)
|
|
32
51
|
execute <<~SQL
|
|
33
52
|
INSERT INTO llm_cost_tracker_period_totals (period, period_start, total_cost, created_at, updated_at)
|
|
34
|
-
SELECT
|
|
35
|
-
|
|
36
|
-
|
|
53
|
+
SELECT aggregated.period,
|
|
54
|
+
aggregated.period_start,
|
|
55
|
+
aggregated.total_cost,
|
|
37
56
|
CURRENT_TIMESTAMP,
|
|
38
57
|
CURRENT_TIMESTAMP
|
|
39
|
-
FROM
|
|
40
|
-
|
|
41
|
-
|
|
58
|
+
FROM (
|
|
59
|
+
SELECT #{connection.quote(period)} AS period,
|
|
60
|
+
#{bucket_sql} AS period_start,
|
|
61
|
+
SUM(total_cost) AS total_cost
|
|
62
|
+
FROM llm_api_calls
|
|
63
|
+
WHERE total_cost IS NOT NULL
|
|
64
|
+
GROUP BY #{bucket_sql}
|
|
65
|
+
) aggregated
|
|
66
|
+
WHERE NOT EXISTS (
|
|
67
|
+
SELECT 1
|
|
68
|
+
FROM llm_cost_tracker_period_totals existing
|
|
69
|
+
WHERE existing.period = aggregated.period
|
|
70
|
+
AND existing.period_start = aggregated.period_start
|
|
71
|
+
)
|
|
42
72
|
SQL
|
|
43
73
|
end
|
|
44
74
|
|
|
@@ -37,10 +37,9 @@ class CreateLlmApiCalls < ActiveRecord::Migration<%= migration_version %>
|
|
|
37
37
|
t.timestamps
|
|
38
38
|
end
|
|
39
39
|
|
|
40
|
-
add_index :llm_api_calls, :provider
|
|
41
|
-
add_index :llm_api_calls, :model
|
|
42
40
|
add_index :llm_api_calls, :tracked_at
|
|
43
41
|
add_index :llm_api_calls, [:provider, :tracked_at]
|
|
42
|
+
add_index :llm_api_calls, [:model, :tracked_at]
|
|
44
43
|
add_index :llm_api_calls, :stream
|
|
45
44
|
add_index :llm_api_calls, :usage_source
|
|
46
45
|
add_index :llm_api_calls, :provider_response_id
|
|
@@ -1,42 +1,74 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
LlmCostTracker.configure do |config|
|
|
4
|
-
#
|
|
4
|
+
# Set to false to temporarily disable tracking without removing middleware.
|
|
5
5
|
config.enabled = true
|
|
6
6
|
|
|
7
|
-
#
|
|
7
|
+
# :active_record stores events in llm_api_calls for dashboards, reports, and shared budgets.
|
|
8
|
+
# Other options: :log for local logging, :custom for your own storage callable.
|
|
8
9
|
config.storage_backend = :active_record
|
|
9
10
|
|
|
10
|
-
#
|
|
11
|
-
|
|
11
|
+
# Tags are merged into every event. Use a callable for request/job-time context.
|
|
12
|
+
config.default_tags = -> { { environment: Rails.env } }
|
|
12
13
|
|
|
13
|
-
#
|
|
14
|
+
# Optional SDK integrations. Provider SDK gems are not installed by LLM Cost Tracker.
|
|
15
|
+
# Enable only the SDKs your app already uses.
|
|
16
|
+
# config.instrument :openai
|
|
17
|
+
# config.instrument :anthropic
|
|
18
|
+
|
|
19
|
+
# Budget behavior: :notify calls on_budget_exceeded, :raise raises after recording,
|
|
20
|
+
# :block_requests preflights monthly/daily budgets before supported requests.
|
|
21
|
+
config.budget_exceeded_behavior = :notify
|
|
22
|
+
|
|
23
|
+
# Storage failures are non-fatal by default so LLM responses can still return.
|
|
24
|
+
# Use :raise if failed ledger writes should fail the request/job.
|
|
25
|
+
config.storage_error_behavior = :warn
|
|
26
|
+
|
|
27
|
+
# Unknown pricing records token usage with nil cost by default. Use :raise if
|
|
28
|
+
# every model must have known pricing before it can be used.
|
|
29
|
+
config.unknown_pricing_behavior = :warn
|
|
30
|
+
|
|
31
|
+
# Used only by the :log storage backend.
|
|
32
|
+
config.log_level = :info
|
|
33
|
+
<% if options[:prices] -%>
|
|
34
|
+
|
|
35
|
+
# Local JSON/YAML pricing file generated by --prices. Keep it in source control
|
|
36
|
+
# and refresh it with bin/rails llm_cost_tracker:prices:sync.
|
|
37
|
+
config.prices_file = Rails.root.join("config/llm_cost_tracker_prices.yml")
|
|
38
|
+
<% end -%>
|
|
39
|
+
|
|
40
|
+
# Cumulative monthly/daily budgets and a single-call ceiling, in USD.
|
|
14
41
|
# config.monthly_budget = 100.00
|
|
15
42
|
# config.daily_budget = 10.00
|
|
16
43
|
# config.per_call_budget = 1.00
|
|
17
|
-
# config.budget_exceeded_behavior = :notify # :notify, :raise, or :block_requests
|
|
18
44
|
|
|
19
|
-
#
|
|
20
|
-
# config.storage_error_behavior = :warn # :ignore, :warn, or :raise
|
|
21
|
-
|
|
22
|
-
# What to do when a model has no built-in price and no pricing_overrides entry.
|
|
23
|
-
# config.unknown_pricing_behavior = :warn # :ignore, :warn, or :raise
|
|
24
|
-
|
|
25
|
-
# Callback when monthly budget is exceeded.
|
|
45
|
+
# Called when :notify is selected and a monthly, daily, or per-call budget is exceeded.
|
|
26
46
|
# config.on_budget_exceeded = ->(data) {
|
|
27
|
-
# Rails.logger.warn
|
|
28
|
-
# "#{data[:budget_type]}
|
|
29
|
-
#
|
|
47
|
+
# Rails.logger.warn(
|
|
48
|
+
# "LLM #{data[:budget_type]} budget exceeded: $#{data[:total]} / $#{data[:budget]}"
|
|
49
|
+
# )
|
|
30
50
|
# }
|
|
31
51
|
|
|
32
|
-
#
|
|
33
|
-
# config.prices_file = Rails.root.join("config/llm_cost_tracker_prices.
|
|
34
|
-
|
|
35
|
-
# Override pricing for specific models in Ruby (per 1M tokens, USD).
|
|
52
|
+
# Local pricing table and small Ruby-side overrides. Prices are USD per 1M tokens.
|
|
53
|
+
# config.prices_file = Rails.root.join("config/llm_cost_tracker_prices.yml")
|
|
36
54
|
# config.pricing_overrides = {
|
|
37
55
|
# "my-custom-model" => { input: 1.00, output: 2.00 }
|
|
38
56
|
# }
|
|
39
57
|
|
|
40
|
-
# OpenAI-compatible
|
|
58
|
+
# Register OpenAI-compatible gateway hosts and choose extra tag breakdowns
|
|
59
|
+
# for bin/rails llm_cost_tracker:report.
|
|
41
60
|
# config.openai_compatible_providers["llm.my-company.com"] = "internal_gateway"
|
|
61
|
+
# config.report_tag_breakdowns = %w[feature user_id]
|
|
62
|
+
|
|
63
|
+
# Use :custom when you want to send events to your own sink instead of ActiveRecord.
|
|
64
|
+
# Return false from custom_storage to skip budget checks for that event.
|
|
65
|
+
# config.storage_backend = :custom
|
|
66
|
+
# config.custom_storage = ->(event) {
|
|
67
|
+
# Rails.logger.info(
|
|
68
|
+
# provider: event.provider,
|
|
69
|
+
# model: event.model,
|
|
70
|
+
# total_cost: event.cost&.total_cost,
|
|
71
|
+
# tags: event.tags
|
|
72
|
+
# )
|
|
73
|
+
# }
|
|
42
74
|
end
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "base"
|
|
4
|
+
|
|
5
|
+
module LlmCostTracker
|
|
6
|
+
module Integrations
|
|
7
|
+
module Anthropic
|
|
8
|
+
extend Base
|
|
9
|
+
|
|
10
|
+
class << self
|
|
11
|
+
def integration_name = :anthropic
|
|
12
|
+
|
|
13
|
+
def target_patches
|
|
14
|
+
[
|
|
15
|
+
[constant("Anthropic::Resources::Messages"), MessagesPatch],
|
|
16
|
+
[constant("Anthropic::Resources::Beta::Messages"), MessagesPatch]
|
|
17
|
+
]
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def record_message(message, request:, latency_ms:)
|
|
21
|
+
return unless active?
|
|
22
|
+
|
|
23
|
+
record_safely do
|
|
24
|
+
usage = ObjectReader.first(message, :usage)
|
|
25
|
+
next unless usage
|
|
26
|
+
|
|
27
|
+
input_tokens = ObjectReader.first(usage, :input_tokens)
|
|
28
|
+
output_tokens = ObjectReader.first(usage, :output_tokens)
|
|
29
|
+
next if input_tokens.nil? && output_tokens.nil?
|
|
30
|
+
|
|
31
|
+
LlmCostTracker::Tracker.record(
|
|
32
|
+
provider: "anthropic",
|
|
33
|
+
model: ObjectReader.first(message, :model) || request[:model],
|
|
34
|
+
input_tokens: ObjectReader.integer(input_tokens),
|
|
35
|
+
output_tokens: ObjectReader.integer(output_tokens),
|
|
36
|
+
latency_ms: latency_ms,
|
|
37
|
+
usage_source: :sdk_response,
|
|
38
|
+
provider_response_id: ObjectReader.first(message, :id),
|
|
39
|
+
metadata: usage_metadata(usage)
|
|
40
|
+
)
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def usage_metadata(usage)
|
|
45
|
+
{
|
|
46
|
+
cache_read_input_tokens: ObjectReader.integer(ObjectReader.first(usage, :cache_read_input_tokens)),
|
|
47
|
+
cache_write_input_tokens: ObjectReader.integer(ObjectReader.first(usage, :cache_creation_input_tokens)),
|
|
48
|
+
hidden_output_tokens: hidden_output_tokens(usage)
|
|
49
|
+
}
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def hidden_output_tokens(usage)
|
|
53
|
+
ObjectReader.integer(
|
|
54
|
+
ObjectReader.first(usage, :thinking_tokens, :thinking_output_tokens) ||
|
|
55
|
+
ObjectReader.nested(usage, :output_tokens_details, :reasoning_tokens)
|
|
56
|
+
)
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
module MessagesPatch
|
|
61
|
+
def create(*args, **kwargs)
|
|
62
|
+
started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
63
|
+
LlmCostTracker::Integrations::Anthropic.enforce_budget!
|
|
64
|
+
message = super
|
|
65
|
+
LlmCostTracker::Integrations::Anthropic.record_message(
|
|
66
|
+
message,
|
|
67
|
+
request: LlmCostTracker::Integrations::Anthropic.request_params(args, kwargs),
|
|
68
|
+
latency_ms: LlmCostTracker::Integrations::Anthropic.elapsed_ms(started_at)
|
|
69
|
+
)
|
|
70
|
+
message
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../logging"
|
|
4
|
+
require_relative "object_reader"
|
|
5
|
+
|
|
6
|
+
module LlmCostTracker
|
|
7
|
+
module Integrations
|
|
8
|
+
module Base
|
|
9
|
+
Result = Data.define(:name, :status, :message)
|
|
10
|
+
|
|
11
|
+
def active?
|
|
12
|
+
LlmCostTracker.configuration.instrumented?(integration_name)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def install
|
|
16
|
+
target_patches.each { |target, patch| install_patch(target, patch) }
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def status
|
|
20
|
+
name = integration_name
|
|
21
|
+
installed = target_patches.count { |target, patch| patch_installed?(target, patch) }
|
|
22
|
+
available = target_patches.count { |target, _patch| target }
|
|
23
|
+
return Result.new(name, :ok, "#{name} integration installed") if installed.positive?
|
|
24
|
+
return Result.new(name, :warn, "#{name} SDK classes are not loaded") if available.zero?
|
|
25
|
+
|
|
26
|
+
Result.new(name, :warn, "#{name} integration is enabled but not installed")
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def elapsed_ms(started_at)
|
|
30
|
+
((Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_at) * 1000).round
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def enforce_budget!
|
|
34
|
+
LlmCostTracker::Tracker.enforce_budget! if active?
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def record_safely
|
|
38
|
+
yield
|
|
39
|
+
rescue LlmCostTracker::Error
|
|
40
|
+
raise
|
|
41
|
+
rescue StandardError => e
|
|
42
|
+
Logging.warn("#{integration_name} integration failed to record usage: #{e.class}: #{e.message}")
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def request_params(args, kwargs)
|
|
46
|
+
params = args.first.is_a?(Hash) ? args.first : {}
|
|
47
|
+
params.merge(kwargs)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def constant(path)
|
|
51
|
+
path.to_s.split("::").reduce(Object) do |scope, const_name|
|
|
52
|
+
return nil unless scope.const_defined?(const_name, false)
|
|
53
|
+
|
|
54
|
+
scope.const_get(const_name, false)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
private
|
|
59
|
+
|
|
60
|
+
def install_patch(target, patch)
|
|
61
|
+
return unless target
|
|
62
|
+
return if patch_installed?(target, patch)
|
|
63
|
+
|
|
64
|
+
target.prepend(patch)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def patch_installed?(target, patch)
|
|
68
|
+
target&.ancestors&.include?(patch)
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmCostTracker
|
|
4
|
+
module Integrations
|
|
5
|
+
module ObjectReader
|
|
6
|
+
module_function
|
|
7
|
+
|
|
8
|
+
def first(object, *keys)
|
|
9
|
+
keys.each do |key|
|
|
10
|
+
value = read(object, key)
|
|
11
|
+
return value unless value.nil?
|
|
12
|
+
end
|
|
13
|
+
nil
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def nested(object, *path)
|
|
17
|
+
path.reduce(object) do |current, key|
|
|
18
|
+
return nil if current.nil?
|
|
19
|
+
|
|
20
|
+
read(current, key)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def read(object, key)
|
|
25
|
+
return nil if object.nil?
|
|
26
|
+
|
|
27
|
+
read_hash(object, key) || read_method(object, key) || read_index(object, key)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def integer(value)
|
|
31
|
+
value.nil? ? 0 : value.to_i
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def read_hash(object, key)
|
|
35
|
+
return unless object.respond_to?(:key?)
|
|
36
|
+
|
|
37
|
+
return object[key] if object.key?(key)
|
|
38
|
+
|
|
39
|
+
string_key = key.to_s
|
|
40
|
+
object[string_key] if object.key?(string_key)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def read_method(object, key)
|
|
44
|
+
object.public_send(key) if object.respond_to?(key)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def read_index(object, key)
|
|
48
|
+
return unless object.respond_to?(:[])
|
|
49
|
+
|
|
50
|
+
object[key]
|
|
51
|
+
rescue IndexError, TypeError, NoMethodError
|
|
52
|
+
nil
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "base"
|
|
4
|
+
|
|
5
|
+
module LlmCostTracker
|
|
6
|
+
module Integrations
|
|
7
|
+
module Openai
|
|
8
|
+
extend Base
|
|
9
|
+
|
|
10
|
+
class << self
|
|
11
|
+
def integration_name = :openai
|
|
12
|
+
|
|
13
|
+
def target_patches
|
|
14
|
+
[
|
|
15
|
+
[constant("OpenAI::Resources::Responses"), ResponsesPatch],
|
|
16
|
+
[constant("OpenAI::Resources::Chat::Completions"), ChatCompletionsPatch]
|
|
17
|
+
]
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def record_response(response, request:, latency_ms:)
|
|
21
|
+
return unless active?
|
|
22
|
+
|
|
23
|
+
record_safely do
|
|
24
|
+
usage = ObjectReader.first(response, :usage)
|
|
25
|
+
next unless usage
|
|
26
|
+
|
|
27
|
+
input_tokens = ObjectReader.first(usage, :input_tokens, :prompt_tokens)
|
|
28
|
+
output_tokens = ObjectReader.first(usage, :output_tokens, :completion_tokens)
|
|
29
|
+
next if input_tokens.nil? && output_tokens.nil?
|
|
30
|
+
|
|
31
|
+
LlmCostTracker::Tracker.record(
|
|
32
|
+
provider: "openai",
|
|
33
|
+
model: ObjectReader.first(response, :model) || request[:model],
|
|
34
|
+
input_tokens: ObjectReader.integer(input_tokens),
|
|
35
|
+
output_tokens: ObjectReader.integer(output_tokens),
|
|
36
|
+
latency_ms: latency_ms,
|
|
37
|
+
usage_source: :sdk_response,
|
|
38
|
+
provider_response_id: ObjectReader.first(response, :id),
|
|
39
|
+
metadata: usage_metadata(usage)
|
|
40
|
+
)
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def usage_metadata(usage)
|
|
45
|
+
{
|
|
46
|
+
cache_read_input_tokens: cache_read_input_tokens(usage),
|
|
47
|
+
hidden_output_tokens: hidden_output_tokens(usage)
|
|
48
|
+
}
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def cache_read_input_tokens(usage)
|
|
52
|
+
ObjectReader.integer(
|
|
53
|
+
ObjectReader.nested(usage, :input_tokens_details, :cached_tokens) ||
|
|
54
|
+
ObjectReader.nested(usage, :prompt_tokens_details, :cached_tokens)
|
|
55
|
+
)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def hidden_output_tokens(usage)
|
|
59
|
+
ObjectReader.integer(
|
|
60
|
+
ObjectReader.nested(usage, :output_tokens_details, :reasoning_tokens) ||
|
|
61
|
+
ObjectReader.nested(usage, :completion_tokens_details, :reasoning_tokens)
|
|
62
|
+
)
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
module ResponsesPatch
|
|
67
|
+
def create(*args, **kwargs)
|
|
68
|
+
started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
69
|
+
LlmCostTracker::Integrations::Openai.enforce_budget!
|
|
70
|
+
response = super
|
|
71
|
+
LlmCostTracker::Integrations::Openai.record_response(
|
|
72
|
+
response,
|
|
73
|
+
request: LlmCostTracker::Integrations::Openai.request_params(args, kwargs),
|
|
74
|
+
latency_ms: LlmCostTracker::Integrations::Openai.elapsed_ms(started_at)
|
|
75
|
+
)
|
|
76
|
+
response
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
module ChatCompletionsPatch
|
|
81
|
+
def create(*args, **kwargs)
|
|
82
|
+
started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
83
|
+
LlmCostTracker::Integrations::Openai.enforce_budget!
|
|
84
|
+
response = super
|
|
85
|
+
LlmCostTracker::Integrations::Openai.record_response(
|
|
86
|
+
response,
|
|
87
|
+
request: LlmCostTracker::Integrations::Openai.request_params(args, kwargs),
|
|
88
|
+
latency_ms: LlmCostTracker::Integrations::Openai.elapsed_ms(started_at)
|
|
89
|
+
)
|
|
90
|
+
response
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "openai"
|
|
4
|
+
require_relative "anthropic"
|
|
5
|
+
|
|
6
|
+
module LlmCostTracker
|
|
7
|
+
module Integrations
|
|
8
|
+
module Registry
|
|
9
|
+
INTEGRATIONS = {
|
|
10
|
+
openai: Openai,
|
|
11
|
+
anthropic: Anthropic
|
|
12
|
+
}.freeze
|
|
13
|
+
|
|
14
|
+
module_function
|
|
15
|
+
|
|
16
|
+
def install!(names = LlmCostTracker.configuration.instrumented_integrations)
|
|
17
|
+
normalize(names).each { |name| fetch(name).install }
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def checks(names = LlmCostTracker.configuration.instrumented_integrations)
|
|
21
|
+
return [Base::Result.new(:integrations, :ok, "no SDK integrations enabled")] if names.empty?
|
|
22
|
+
|
|
23
|
+
normalize(names).map { |name| fetch(name).status }
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def normalize(names)
|
|
27
|
+
Array(names).flatten.map(&:to_sym).uniq
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def fetch(name)
|
|
31
|
+
INTEGRATIONS.fetch(name.to_sym) do
|
|
32
|
+
message = "Unknown integration: #{name.inspect}. Use one of: #{INTEGRATIONS.keys.join(', ')}"
|
|
33
|
+
raise LlmCostTracker::Error, message
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def self.install! = Registry.install!
|
|
39
|
+
def self.checks = Registry.checks
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -4,6 +4,7 @@ require "faraday"
|
|
|
4
4
|
require "json"
|
|
5
5
|
|
|
6
6
|
require_relative "../logging"
|
|
7
|
+
require_relative "../request_url"
|
|
7
8
|
|
|
8
9
|
module LlmCostTracker
|
|
9
10
|
module Middleware
|
|
@@ -76,7 +77,7 @@ module LlmCostTracker
|
|
|
76
77
|
response_body = read_body(response_env.body)
|
|
77
78
|
unless response_body
|
|
78
79
|
Logging.warn(
|
|
79
|
-
"Unable to read response body for #{request_url}; " \
|
|
80
|
+
"Unable to read response body for #{RequestUrl.label(request_url)}; " \
|
|
80
81
|
"streaming responses are captured automatically for OpenAI/Anthropic/Gemini " \
|
|
81
82
|
"or via LlmCostTracker.track_stream for custom clients."
|
|
82
83
|
)
|
|
@@ -156,11 +157,11 @@ module LlmCostTracker
|
|
|
156
157
|
|
|
157
158
|
def capture_warning(request_url, stream_buffer)
|
|
158
159
|
unless stream_buffer&.dig(:overflowed)
|
|
159
|
-
return "Unable to capture streaming response for #{request_url}; " \
|
|
160
|
+
return "Unable to capture streaming response for #{RequestUrl.label(request_url)}; " \
|
|
160
161
|
"recording usage_source=unknown. Use LlmCostTracker.track_stream for manual capture."
|
|
161
162
|
end
|
|
162
163
|
|
|
163
|
-
"Streaming response for #{request_url} exceeded #{STREAM_CAPTURE_LIMIT_BYTES} bytes; " \
|
|
164
|
+
"Streaming response for #{RequestUrl.label(request_url)} exceeded #{STREAM_CAPTURE_LIMIT_BYTES} bytes; " \
|
|
164
165
|
"recording usage_source=unknown. Use LlmCostTracker.track_stream for manual capture."
|
|
165
166
|
end
|
|
166
167
|
end
|
|
@@ -16,6 +16,7 @@ module LlmCostTracker
|
|
|
16
16
|
)
|
|
17
17
|
|
|
18
18
|
class ParsedUsage
|
|
19
|
+
UNKNOWN_MODEL = "unknown"
|
|
19
20
|
TRACKING_KEYS = %i[
|
|
20
21
|
provider
|
|
21
22
|
model
|
|
@@ -30,7 +31,7 @@ module LlmCostTracker
|
|
|
30
31
|
def self.build(**attributes)
|
|
31
32
|
new(
|
|
32
33
|
provider: attributes.fetch(:provider),
|
|
33
|
-
model: attributes.fetch(:model),
|
|
34
|
+
model: normalize_model(attributes.fetch(:model)),
|
|
34
35
|
input_tokens: attributes.fetch(:input_tokens).to_i,
|
|
35
36
|
output_tokens: attributes.fetch(:output_tokens).to_i,
|
|
36
37
|
total_tokens: attributes.fetch(:total_tokens, usage_breakdown(attributes).total_tokens).to_i,
|
|
@@ -61,5 +62,11 @@ module LlmCostTracker
|
|
|
61
62
|
)
|
|
62
63
|
end
|
|
63
64
|
private_class_method :usage_breakdown
|
|
65
|
+
|
|
66
|
+
def self.normalize_model(value)
|
|
67
|
+
model = value.to_s.strip
|
|
68
|
+
model.empty? ? UNKNOWN_MODEL : model
|
|
69
|
+
end
|
|
70
|
+
private_class_method :normalize_model
|
|
64
71
|
end
|
|
65
72
|
end
|