llm_cost_tracker 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +44 -0
- data/CHANGELOG.md +62 -0
- data/README.md +243 -26
- data/Rakefile +3 -1
- data/lib/llm_cost_tracker/budget.rb +97 -0
- data/lib/llm_cost_tracker/configuration.rb +37 -0
- data/lib/llm_cost_tracker/errors.rb +37 -0
- data/lib/llm_cost_tracker/event_metadata.rb +54 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_latency_ms_generator.rb +29 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_latency_ms_to_llm_api_calls.rb.erb +9 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +16 -4
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +14 -1
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_cost_precision.rb.erb +15 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +41 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_cost_precision_generator.rb +29 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_tags_to_jsonb_generator.rb +29 -0
- data/lib/llm_cost_tracker/llm_api_call.rb +69 -1
- data/lib/llm_cost_tracker/middleware/faraday.rb +51 -14
- data/lib/llm_cost_tracker/parsers/anthropic.rb +10 -5
- data/lib/llm_cost_tracker/parsers/gemini.rb +13 -5
- data/lib/llm_cost_tracker/parsers/openai.rb +22 -7
- data/lib/llm_cost_tracker/parsers/openai_compatible.rb +44 -0
- data/lib/llm_cost_tracker/parsers/registry.rb +16 -7
- data/lib/llm_cost_tracker/price_registry.rb +69 -0
- data/lib/llm_cost_tracker/prices.json +51 -0
- data/lib/llm_cost_tracker/pricing.rb +76 -41
- data/lib/llm_cost_tracker/railtie.rb +3 -0
- data/lib/llm_cost_tracker/storage/active_record_store.rb +24 -3
- data/lib/llm_cost_tracker/tracker.rb +65 -33
- data/lib/llm_cost_tracker/unknown_pricing.rb +47 -0
- data/lib/llm_cost_tracker/version.rb +1 -1
- data/lib/llm_cost_tracker.rb +33 -5
- data/llm_cost_tracker.gemspec +9 -7
- metadata +38 -23
|
@@ -2,15 +2,31 @@
|
|
|
2
2
|
|
|
3
3
|
module LlmCostTracker
|
|
4
4
|
class Configuration
|
|
5
|
+
# Hostname => provider name for OpenAI-compatible APIs.
|
|
6
|
+
OPENAI_COMPATIBLE_PROVIDERS = {
|
|
7
|
+
"openrouter.ai" => "openrouter",
|
|
8
|
+
"api.deepseek.com" => "deepseek"
|
|
9
|
+
}.freeze
|
|
10
|
+
|
|
11
|
+
BUDGET_EXCEEDED_BEHAVIORS = %i[notify raise block_requests].freeze
|
|
12
|
+
STORAGE_ERROR_BEHAVIORS = %i[ignore warn raise].freeze
|
|
13
|
+
UNKNOWN_PRICING_BEHAVIORS = %i[ignore warn raise].freeze
|
|
14
|
+
|
|
5
15
|
attr_accessor :enabled,
|
|
6
16
|
:storage_backend, # :log, :active_record, :custom
|
|
7
17
|
:custom_storage, # callable object for :custom backend
|
|
8
18
|
:default_tags, # Hash of default tags added to every event
|
|
9
19
|
:on_budget_exceeded, # callable, receives event hash
|
|
10
20
|
:monthly_budget, # Float, in USD — nil means no limit
|
|
21
|
+
:budget_exceeded_behavior, # :notify, :raise, :block_requests
|
|
22
|
+
:storage_error_behavior, # :ignore, :warn, :raise
|
|
23
|
+
:unknown_pricing_behavior, # :ignore, :warn, :raise
|
|
11
24
|
:log_level, # :debug, :info, :warn
|
|
25
|
+
:prices_file, # JSON/YAML file that overrides built-in prices
|
|
12
26
|
:pricing_overrides # Hash to override built-in pricing
|
|
13
27
|
|
|
28
|
+
attr_reader :openai_compatible_providers
|
|
29
|
+
|
|
14
30
|
def initialize
|
|
15
31
|
@enabled = true
|
|
16
32
|
@storage_backend = :log
|
|
@@ -18,8 +34,21 @@ module LlmCostTracker
|
|
|
18
34
|
@default_tags = {}
|
|
19
35
|
@on_budget_exceeded = nil
|
|
20
36
|
@monthly_budget = nil
|
|
37
|
+
@budget_exceeded_behavior = :notify
|
|
38
|
+
@storage_error_behavior = :warn
|
|
39
|
+
@unknown_pricing_behavior = :warn
|
|
21
40
|
@log_level = :info
|
|
41
|
+
@prices_file = nil
|
|
22
42
|
@pricing_overrides = {}
|
|
43
|
+
self.openai_compatible_providers = OPENAI_COMPATIBLE_PROVIDERS
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def openai_compatible_providers=(providers)
|
|
47
|
+
@openai_compatible_providers = normalize_openai_compatible_providers(providers)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def normalize_openai_compatible_providers!
|
|
51
|
+
self.openai_compatible_providers = openai_compatible_providers
|
|
23
52
|
end
|
|
24
53
|
|
|
25
54
|
def active_record?
|
|
@@ -29,5 +58,13 @@ module LlmCostTracker
|
|
|
29
58
|
def log?
|
|
30
59
|
storage_backend == :log
|
|
31
60
|
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
def normalize_openai_compatible_providers(providers)
|
|
65
|
+
(providers || {}).each_with_object({}) do |(host, provider), normalized|
|
|
66
|
+
normalized[host.to_s.downcase] = provider.to_s
|
|
67
|
+
end
|
|
68
|
+
end
|
|
32
69
|
end
|
|
33
70
|
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmCostTracker
|
|
4
|
+
class Error < StandardError; end
|
|
5
|
+
|
|
6
|
+
class BudgetExceededError < Error
|
|
7
|
+
attr_reader :monthly_total, :budget, :last_event
|
|
8
|
+
|
|
9
|
+
def initialize(monthly_total:, budget:, last_event: nil)
|
|
10
|
+
@monthly_total = monthly_total
|
|
11
|
+
@budget = budget
|
|
12
|
+
@last_event = last_event
|
|
13
|
+
|
|
14
|
+
super("LLM monthly budget exceeded: $#{format('%.6f', monthly_total)} / $#{format('%.6f', budget)}")
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
class UnknownPricingError < Error
|
|
19
|
+
attr_reader :model
|
|
20
|
+
|
|
21
|
+
def initialize(model:)
|
|
22
|
+
@model = model
|
|
23
|
+
|
|
24
|
+
super("No pricing configured for LLM model: #{model.inspect}")
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
class StorageError < Error
|
|
29
|
+
attr_reader :original_error
|
|
30
|
+
|
|
31
|
+
def initialize(original_error)
|
|
32
|
+
@original_error = original_error
|
|
33
|
+
|
|
34
|
+
super("Failed to store LLM cost event: #{original_error.class}: #{original_error.message}")
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmCostTracker
|
|
4
|
+
module EventMetadata
|
|
5
|
+
INTERNAL_TAG_KEYS = %w[
|
|
6
|
+
cache_creation_input_tokens
|
|
7
|
+
cache_creation_tokens
|
|
8
|
+
cache_read_input_tokens
|
|
9
|
+
cache_read_tokens
|
|
10
|
+
cached_input_tokens
|
|
11
|
+
input_tokens
|
|
12
|
+
output_tokens
|
|
13
|
+
reasoning_tokens
|
|
14
|
+
total_tokens
|
|
15
|
+
].freeze
|
|
16
|
+
|
|
17
|
+
class << self
|
|
18
|
+
def usage_data(input_tokens, output_tokens, metadata)
|
|
19
|
+
cache_read_input_tokens = integer_metadata(metadata, :cache_read_input_tokens, :cache_read_tokens)
|
|
20
|
+
cache_creation_input_tokens = integer_metadata(
|
|
21
|
+
metadata,
|
|
22
|
+
:cache_creation_input_tokens,
|
|
23
|
+
:cache_creation_tokens
|
|
24
|
+
)
|
|
25
|
+
cached_input_tokens = integer_metadata(metadata, :cached_input_tokens)
|
|
26
|
+
|
|
27
|
+
{
|
|
28
|
+
input_tokens: input_tokens.to_i,
|
|
29
|
+
output_tokens: output_tokens.to_i,
|
|
30
|
+
cached_input_tokens: cached_input_tokens,
|
|
31
|
+
cache_read_input_tokens: cache_read_input_tokens,
|
|
32
|
+
cache_creation_input_tokens: cache_creation_input_tokens,
|
|
33
|
+
total_tokens: input_tokens.to_i + output_tokens.to_i +
|
|
34
|
+
cache_read_input_tokens + cache_creation_input_tokens
|
|
35
|
+
}
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def tags(metadata)
|
|
39
|
+
metadata.reject { |key, _value| INTERNAL_TAG_KEYS.include?(key.to_s) }
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
private
|
|
43
|
+
|
|
44
|
+
def integer_metadata(metadata, *keys)
|
|
45
|
+
keys.each do |key|
|
|
46
|
+
value = metadata[key] || metadata[key.to_s]
|
|
47
|
+
return value.to_i unless value.nil?
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
0
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "rails/generators"
|
|
4
|
+
require "rails/generators/active_record"
|
|
5
|
+
|
|
6
|
+
module LlmCostTracker
|
|
7
|
+
module Generators
|
|
8
|
+
class AddLatencyMsGenerator < Rails::Generators::Base
|
|
9
|
+
include ActiveRecord::Generators::Migration
|
|
10
|
+
|
|
11
|
+
source_root File.expand_path("templates", __dir__)
|
|
12
|
+
|
|
13
|
+
desc "Creates a migration to add llm_api_calls.latency_ms"
|
|
14
|
+
|
|
15
|
+
def create_migration_file
|
|
16
|
+
migration_template(
|
|
17
|
+
"add_latency_ms_to_llm_api_calls.rb.erb",
|
|
18
|
+
"db/migrate/add_latency_ms_to_llm_api_calls.rb"
|
|
19
|
+
)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
private
|
|
23
|
+
|
|
24
|
+
def migration_version
|
|
25
|
+
"[#{ActiveRecord::VERSION::MAJOR}.#{ActiveRecord::VERSION::MINOR}]"
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
class AddLatencyMsToLlmApiCalls < ActiveRecord::Migration<%= migration_version %>
|
|
2
|
+
def up
|
|
3
|
+
add_column :llm_api_calls, :latency_ms, :integer unless column_exists?(:llm_api_calls, :latency_ms)
|
|
4
|
+
end
|
|
5
|
+
|
|
6
|
+
def down
|
|
7
|
+
remove_column :llm_api_calls, :latency_ms if column_exists?(:llm_api_calls, :latency_ms)
|
|
8
|
+
end
|
|
9
|
+
end
|
|
@@ -6,10 +6,15 @@ class CreateLlmApiCalls < ActiveRecord::Migration<%= migration_version %>
|
|
|
6
6
|
t.integer :input_tokens, null: false, default: 0
|
|
7
7
|
t.integer :output_tokens, null: false, default: 0
|
|
8
8
|
t.integer :total_tokens, null: false, default: 0
|
|
9
|
-
t.decimal :input_cost, precision:
|
|
10
|
-
t.decimal :output_cost, precision:
|
|
11
|
-
t.decimal :total_cost, precision:
|
|
12
|
-
t.
|
|
9
|
+
t.decimal :input_cost, precision: 20, scale: 8
|
|
10
|
+
t.decimal :output_cost, precision: 20, scale: 8
|
|
11
|
+
t.decimal :total_cost, precision: 20, scale: 8
|
|
12
|
+
t.integer :latency_ms
|
|
13
|
+
if postgresql?
|
|
14
|
+
t.jsonb :tags, null: false, default: {}
|
|
15
|
+
else
|
|
16
|
+
t.text :tags
|
|
17
|
+
end
|
|
13
18
|
t.datetime :tracked_at, null: false
|
|
14
19
|
|
|
15
20
|
t.timestamps
|
|
@@ -19,5 +24,12 @@ class CreateLlmApiCalls < ActiveRecord::Migration<%= migration_version %>
|
|
|
19
24
|
add_index :llm_api_calls, :model
|
|
20
25
|
add_index :llm_api_calls, :tracked_at
|
|
21
26
|
add_index :llm_api_calls, [:provider, :tracked_at]
|
|
27
|
+
add_index :llm_api_calls, :tags, using: :gin if postgresql?
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
private
|
|
31
|
+
|
|
32
|
+
def postgresql?
|
|
33
|
+
connection.adapter_name.downcase.include?("postgres")
|
|
22
34
|
end
|
|
23
35
|
end
|
|
@@ -12,6 +12,13 @@ LlmCostTracker.configure do |config|
|
|
|
12
12
|
|
|
13
13
|
# Monthly budget in USD. Set to nil to disable budget alerts.
|
|
14
14
|
# config.monthly_budget = 100.00
|
|
15
|
+
# config.budget_exceeded_behavior = :notify # :notify, :raise, or :block_requests
|
|
16
|
+
|
|
17
|
+
# What to do when storage fails.
|
|
18
|
+
# config.storage_error_behavior = :warn # :ignore, :warn, or :raise
|
|
19
|
+
|
|
20
|
+
# What to do when a model has no built-in price and no pricing_overrides entry.
|
|
21
|
+
# config.unknown_pricing_behavior = :warn # :ignore, :warn, or :raise
|
|
15
22
|
|
|
16
23
|
# Callback when monthly budget is exceeded.
|
|
17
24
|
# config.on_budget_exceeded = ->(data) {
|
|
@@ -20,8 +27,14 @@ LlmCostTracker.configure do |config|
|
|
|
20
27
|
# # Or send a Slack notification, email, etc.
|
|
21
28
|
# }
|
|
22
29
|
|
|
23
|
-
#
|
|
30
|
+
# Load a local JSON/YAML price table that overrides built-in pricing.
|
|
31
|
+
# config.prices_file = Rails.root.join("config/llm_cost_tracker_prices.json")
|
|
32
|
+
|
|
33
|
+
# Override pricing for specific models in Ruby (per 1M tokens, USD).
|
|
24
34
|
# config.pricing_overrides = {
|
|
25
35
|
# "my-custom-model" => { input: 1.00, output: 2.00 }
|
|
26
36
|
# }
|
|
37
|
+
|
|
38
|
+
# OpenAI-compatible APIs. OpenRouter and DeepSeek are included by default.
|
|
39
|
+
# config.openai_compatible_providers["llm.my-company.com"] = "internal_gateway"
|
|
27
40
|
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
class UpgradeLlmApiCallCostPrecision < ActiveRecord::Migration<%= migration_version %>
|
|
2
|
+
COST_COLUMNS = %i[input_cost output_cost total_cost].freeze
|
|
3
|
+
|
|
4
|
+
def up
|
|
5
|
+
COST_COLUMNS.each do |column|
|
|
6
|
+
change_column :llm_api_calls, column, :decimal, precision: 20, scale: 8
|
|
7
|
+
end
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def down
|
|
11
|
+
COST_COLUMNS.each do |column|
|
|
12
|
+
change_column :llm_api_calls, column, :decimal, precision: 12, scale: 8
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
class UpgradeLlmApiCallTagsToJsonb < ActiveRecord::Migration<%= migration_version %>
|
|
2
|
+
def up
|
|
3
|
+
unless postgresql?
|
|
4
|
+
say "Skipping llm_api_calls.tags JSONB upgrade: database adapter is #{connection.adapter_name}."
|
|
5
|
+
return
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
return if tags_jsonb?
|
|
9
|
+
|
|
10
|
+
remove_index :llm_api_calls, :tags if index_exists?(:llm_api_calls, :tags)
|
|
11
|
+
|
|
12
|
+
change_column(
|
|
13
|
+
:llm_api_calls,
|
|
14
|
+
:tags,
|
|
15
|
+
:jsonb,
|
|
16
|
+
using: "CASE WHEN tags IS NULL OR tags = '' THEN '{}'::jsonb ELSE tags::jsonb END",
|
|
17
|
+
default: {},
|
|
18
|
+
null: false
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
add_index :llm_api_calls, :tags, using: :gin unless index_exists?(:llm_api_calls, :tags)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def down
|
|
25
|
+
return unless postgresql?
|
|
26
|
+
|
|
27
|
+
remove_index :llm_api_calls, :tags if index_exists?(:llm_api_calls, :tags)
|
|
28
|
+
change_column :llm_api_calls, :tags, :text, using: "tags::text"
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
def postgresql?
|
|
34
|
+
connection.adapter_name.downcase.include?("postgres")
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def tags_jsonb?
|
|
38
|
+
column = connection.columns(:llm_api_calls).find { |candidate| candidate.name == "tags" }
|
|
39
|
+
column&.sql_type.to_s.downcase == "jsonb"
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "rails/generators"
|
|
4
|
+
require "rails/generators/active_record"
|
|
5
|
+
|
|
6
|
+
module LlmCostTracker
|
|
7
|
+
module Generators
|
|
8
|
+
class UpgradeCostPrecisionGenerator < Rails::Generators::Base
|
|
9
|
+
include ActiveRecord::Generators::Migration
|
|
10
|
+
|
|
11
|
+
source_root File.expand_path("templates", __dir__)
|
|
12
|
+
|
|
13
|
+
desc "Creates a migration to widen llm_api_calls cost decimal precision"
|
|
14
|
+
|
|
15
|
+
def create_migration_file
|
|
16
|
+
migration_template(
|
|
17
|
+
"upgrade_llm_api_call_cost_precision.rb.erb",
|
|
18
|
+
"db/migrate/upgrade_llm_api_call_cost_precision.rb"
|
|
19
|
+
)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
private
|
|
23
|
+
|
|
24
|
+
def migration_version
|
|
25
|
+
"[#{ActiveRecord::VERSION::MAJOR}.#{ActiveRecord::VERSION::MINOR}]"
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "rails/generators"
|
|
4
|
+
require "rails/generators/active_record"
|
|
5
|
+
|
|
6
|
+
module LlmCostTracker
|
|
7
|
+
module Generators
|
|
8
|
+
class UpgradeTagsToJsonbGenerator < Rails::Generators::Base
|
|
9
|
+
include ActiveRecord::Generators::Migration
|
|
10
|
+
|
|
11
|
+
source_root File.expand_path("templates", __dir__)
|
|
12
|
+
|
|
13
|
+
desc "Creates a migration to upgrade llm_api_calls.tags to PostgreSQL JSONB"
|
|
14
|
+
|
|
15
|
+
def create_migration_file
|
|
16
|
+
migration_template(
|
|
17
|
+
"upgrade_llm_api_call_tags_to_jsonb.rb.erb",
|
|
18
|
+
"db/migrate/upgrade_llm_api_call_tags_to_jsonb.rb"
|
|
19
|
+
)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
private
|
|
23
|
+
|
|
24
|
+
def migration_version
|
|
25
|
+
"[#{ActiveRecord::VERSION::MAJOR}.#{ActiveRecord::VERSION::MINOR}]"
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "active_record"
|
|
4
|
+
require "json"
|
|
4
5
|
|
|
5
6
|
module LlmCostTracker
|
|
6
7
|
class LlmApiCall < ActiveRecord::Base
|
|
@@ -9,7 +10,34 @@ module LlmCostTracker
|
|
|
9
10
|
# Scopes for querying
|
|
10
11
|
scope :by_provider, ->(provider) { where(provider: provider) }
|
|
11
12
|
scope :by_model, ->(model) { where(model: model) }
|
|
12
|
-
scope :by_tag,
|
|
13
|
+
scope :by_tag, ->(key, value) { by_tags(key => value) }
|
|
14
|
+
scope :by_tags, lambda { |tags|
|
|
15
|
+
normalized_tags = normalize_tags(tags)
|
|
16
|
+
|
|
17
|
+
if normalized_tags.empty?
|
|
18
|
+
all
|
|
19
|
+
elsif tags_json_column?
|
|
20
|
+
where("tags @> ?::jsonb", normalized_tags.to_json)
|
|
21
|
+
else
|
|
22
|
+
normalized_tags.reduce(all) do |relation, (key, value)|
|
|
23
|
+
relation.where("tags LIKE ? ESCAPE '\\'", "%#{sanitize_sql_like(json_tag_fragment(key, value))}%")
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
}
|
|
27
|
+
scope :by_user, ->(user_id) { by_tag("user_id", user_id) }
|
|
28
|
+
scope :by_feature, ->(feature) { by_tag("feature", feature) }
|
|
29
|
+
scope :with_cost, -> { where.not(total_cost: nil) }
|
|
30
|
+
scope :without_cost, -> { where(total_cost: nil) }
|
|
31
|
+
scope :unknown_pricing, -> { without_cost }
|
|
32
|
+
scope :with_latency, -> { latency_column? ? where.not(latency_ms: nil) : none }
|
|
33
|
+
|
|
34
|
+
scope :with_json_tags, lambda {
|
|
35
|
+
if tags_json_column?
|
|
36
|
+
where.not(tags: {})
|
|
37
|
+
else
|
|
38
|
+
where.not(tags: [nil, "", "{}"])
|
|
39
|
+
end
|
|
40
|
+
}
|
|
13
41
|
|
|
14
42
|
scope :today, -> { where(tracked_at: Time.now.utc.beginning_of_day..) }
|
|
15
43
|
scope :this_week, -> { where(tracked_at: Time.now.utc.beginning_of_week..) }
|
|
@@ -33,13 +61,53 @@ module LlmCostTracker
|
|
|
33
61
|
group(:provider).sum(:total_cost)
|
|
34
62
|
end
|
|
35
63
|
|
|
64
|
+
def self.average_latency_ms
|
|
65
|
+
return nil unless latency_column?
|
|
66
|
+
|
|
67
|
+
average(:latency_ms)&.to_f
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def self.latency_by_model
|
|
71
|
+
return {} unless latency_column?
|
|
72
|
+
|
|
73
|
+
group(:model).average(:latency_ms).transform_values(&:to_f)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def self.latency_by_provider
|
|
77
|
+
return {} unless latency_column?
|
|
78
|
+
|
|
79
|
+
group(:provider).average(:latency_ms).transform_values(&:to_f)
|
|
80
|
+
end
|
|
81
|
+
|
|
36
82
|
def self.daily_costs(days: 30)
|
|
37
83
|
where(tracked_at: days.days.ago..)
|
|
38
84
|
.group("DATE(tracked_at)")
|
|
39
85
|
.sum(:total_cost)
|
|
86
|
+
.transform_keys(&:to_s)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def self.tags_json_column?
|
|
90
|
+
column = columns_hash["tags"]
|
|
91
|
+
return false unless column
|
|
92
|
+
|
|
93
|
+
%i[json jsonb].include?(column.type) || column.sql_type.to_s.downcase == "jsonb"
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def self.latency_column?
|
|
97
|
+
columns_hash.key?("latency_ms")
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def self.normalize_tags(tags)
|
|
101
|
+
(tags || {}).to_h.transform_keys(&:to_s).transform_values(&:to_s)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def self.json_tag_fragment(key, value)
|
|
105
|
+
JSON.generate(key => value).delete_prefix("{").delete_suffix("}")
|
|
40
106
|
end
|
|
41
107
|
|
|
42
108
|
def parsed_tags
|
|
109
|
+
return tags.transform_keys(&:to_s) if tags.is_a?(Hash)
|
|
110
|
+
|
|
43
111
|
JSON.parse(tags || "{}")
|
|
44
112
|
rescue JSON::ParserError
|
|
45
113
|
{}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "faraday"
|
|
4
|
+
require "json"
|
|
4
5
|
|
|
5
6
|
module LlmCostTracker
|
|
6
7
|
module Middleware
|
|
@@ -14,28 +15,23 @@ module LlmCostTracker
|
|
|
14
15
|
return @app.call(request_env) unless LlmCostTracker.configuration.enabled
|
|
15
16
|
|
|
16
17
|
request_url = request_env.url.to_s
|
|
17
|
-
request_body = read_body(request_env.body)
|
|
18
|
+
request_body = read_body(request_env.body) || ""
|
|
19
|
+
|
|
20
|
+
enforce_budget_before_request(request_url)
|
|
21
|
+
started_at = monotonic_time
|
|
18
22
|
|
|
19
23
|
@app.call(request_env).on_complete do |response_env|
|
|
20
|
-
process(request_url, request_body, response_env)
|
|
24
|
+
process(request_url, request_body, response_env, elapsed_ms(started_at))
|
|
21
25
|
end
|
|
22
|
-
rescue StandardError => e
|
|
23
|
-
# Never break the actual request — log and re-raise
|
|
24
|
-
raise e
|
|
25
26
|
end
|
|
26
27
|
|
|
27
28
|
private
|
|
28
29
|
|
|
29
|
-
def process(request_url, request_body, response_env)
|
|
30
|
+
def process(request_url, request_body, response_env, latency_ms)
|
|
30
31
|
parser = Parsers::Registry.find_for(request_url)
|
|
31
32
|
return unless parser
|
|
32
33
|
|
|
33
|
-
parsed = parser
|
|
34
|
-
request_url,
|
|
35
|
-
request_body,
|
|
36
|
-
response_env.status,
|
|
37
|
-
read_body(response_env.body)
|
|
38
|
-
)
|
|
34
|
+
parsed = parse_response(parser, request_url, request_body, response_env)
|
|
39
35
|
return unless parsed
|
|
40
36
|
|
|
41
37
|
Tracker.record(
|
|
@@ -43,19 +39,60 @@ module LlmCostTracker
|
|
|
43
39
|
model: parsed[:model],
|
|
44
40
|
input_tokens: parsed[:input_tokens],
|
|
45
41
|
output_tokens: parsed[:output_tokens],
|
|
42
|
+
latency_ms: latency_ms,
|
|
46
43
|
metadata: @tags.merge(parsed.except(:provider, :model, :input_tokens, :output_tokens, :total_tokens))
|
|
47
44
|
)
|
|
45
|
+
rescue LlmCostTracker::Error
|
|
46
|
+
raise
|
|
48
47
|
rescue StandardError => e
|
|
49
|
-
|
|
48
|
+
log_warning("Error processing response: #{e.class}: #{e.message}")
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def parse_response(parser, request_url, request_body, response_env)
|
|
52
|
+
response_body = read_body(response_env.body)
|
|
53
|
+
unless response_body
|
|
54
|
+
log_warning(
|
|
55
|
+
"Unable to read response body for #{request_url}; streaming/SSE responses require manual tracking."
|
|
56
|
+
)
|
|
57
|
+
return nil
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
parser.parse(request_url, request_body, response_env.status, response_body)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def enforce_budget_before_request(request_url)
|
|
64
|
+
return unless Parsers::Registry.find_for(request_url)
|
|
65
|
+
|
|
66
|
+
Tracker.enforce_budget!
|
|
50
67
|
end
|
|
51
68
|
|
|
52
69
|
def read_body(body)
|
|
53
70
|
case body
|
|
54
71
|
when String then body
|
|
55
72
|
when nil then ""
|
|
56
|
-
|
|
73
|
+
when Hash, Array then body.to_json
|
|
74
|
+
else
|
|
75
|
+
body.respond_to?(:to_str) ? body.to_str : nil
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def log_warning(message)
|
|
80
|
+
message = "[LlmCostTracker] #{message}"
|
|
81
|
+
|
|
82
|
+
if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
|
|
83
|
+
Rails.logger.warn(message)
|
|
84
|
+
else
|
|
85
|
+
warn message
|
|
57
86
|
end
|
|
58
87
|
end
|
|
88
|
+
|
|
89
|
+
def monotonic_time
|
|
90
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def elapsed_ms(started_at)
|
|
94
|
+
((monotonic_time - started_at) * 1000).round
|
|
95
|
+
end
|
|
59
96
|
end
|
|
60
97
|
end
|
|
61
98
|
end
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "json"
|
|
4
|
+
require "uri"
|
|
5
|
+
|
|
6
|
+
require_relative "base"
|
|
4
7
|
|
|
5
8
|
module LlmCostTracker
|
|
6
9
|
module Parsers
|
|
@@ -9,12 +12,12 @@ module LlmCostTracker
|
|
|
9
12
|
|
|
10
13
|
def match?(url)
|
|
11
14
|
uri = URI.parse(url.to_s)
|
|
12
|
-
HOSTS.include?(uri.host) && uri.path.include?("/v1/messages")
|
|
15
|
+
HOSTS.include?(uri.host.to_s.downcase) && uri.path.include?("/v1/messages")
|
|
13
16
|
rescue URI::InvalidURIError
|
|
14
17
|
false
|
|
15
18
|
end
|
|
16
19
|
|
|
17
|
-
def parse(
|
|
20
|
+
def parse(_request_url, request_body, response_status, response_body)
|
|
18
21
|
return nil unless response_status == 200
|
|
19
22
|
|
|
20
23
|
response = safe_json_parse(response_body)
|
|
@@ -28,9 +31,11 @@ module LlmCostTracker
|
|
|
28
31
|
model: response["model"] || request["model"],
|
|
29
32
|
input_tokens: usage["input_tokens"] || 0,
|
|
30
33
|
output_tokens: usage["output_tokens"] || 0,
|
|
31
|
-
total_tokens: (usage["input_tokens"] || 0) + (usage["output_tokens"] || 0)
|
|
32
|
-
|
|
33
|
-
|
|
34
|
+
total_tokens: (usage["input_tokens"] || 0) + (usage["output_tokens"] || 0) +
|
|
35
|
+
(usage["cache_read_input_tokens"] || 0) +
|
|
36
|
+
(usage["cache_creation_input_tokens"] || 0),
|
|
37
|
+
cache_read_input_tokens: usage["cache_read_input_tokens"],
|
|
38
|
+
cache_creation_input_tokens: usage["cache_creation_input_tokens"]
|
|
34
39
|
}.compact
|
|
35
40
|
end
|
|
36
41
|
end
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "json"
|
|
4
|
+
require "uri"
|
|
5
|
+
|
|
6
|
+
require_relative "base"
|
|
4
7
|
|
|
5
8
|
module LlmCostTracker
|
|
6
9
|
module Parsers
|
|
@@ -9,12 +12,12 @@ module LlmCostTracker
|
|
|
9
12
|
|
|
10
13
|
def match?(url)
|
|
11
14
|
uri = URI.parse(url.to_s)
|
|
12
|
-
HOSTS.include?(uri.host)
|
|
15
|
+
HOSTS.include?(uri.host.to_s.downcase)
|
|
13
16
|
rescue URI::InvalidURIError
|
|
14
17
|
false
|
|
15
18
|
end
|
|
16
19
|
|
|
17
|
-
def parse(request_url,
|
|
20
|
+
def parse(request_url, _request_body, response_status, response_body)
|
|
18
21
|
return nil unless response_status == 200
|
|
19
22
|
|
|
20
23
|
response = safe_json_parse(response_body)
|
|
@@ -28,13 +31,18 @@ module LlmCostTracker
|
|
|
28
31
|
provider: "gemini",
|
|
29
32
|
model: model,
|
|
30
33
|
input_tokens: usage["promptTokenCount"] || 0,
|
|
31
|
-
output_tokens: usage
|
|
32
|
-
total_tokens: usage["totalTokenCount"] || 0
|
|
33
|
-
|
|
34
|
+
output_tokens: output_tokens(usage),
|
|
35
|
+
total_tokens: usage["totalTokenCount"] || 0,
|
|
36
|
+
cached_input_tokens: usage["cachedContentTokenCount"]
|
|
37
|
+
}.compact
|
|
34
38
|
end
|
|
35
39
|
|
|
36
40
|
private
|
|
37
41
|
|
|
42
|
+
def output_tokens(usage)
|
|
43
|
+
(usage["candidatesTokenCount"] || 0) + (usage["thoughtsTokenCount"] || 0)
|
|
44
|
+
end
|
|
45
|
+
|
|
38
46
|
def extract_model_from_url(url)
|
|
39
47
|
uri = URI.parse(url.to_s)
|
|
40
48
|
match = uri.path.match(%r{/models/([^/:]+)})
|