llm_cost_tracker 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +32 -0
- data/README.md +136 -24
- data/lib/llm_cost_tracker/budget.rb +7 -19
- data/lib/llm_cost_tracker/configuration.rb +52 -10
- data/lib/llm_cost_tracker/cost.rb +15 -0
- data/lib/llm_cost_tracker/event.rb +24 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/prices_generator.rb +20 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/llm_cost_tracker_prices.yml.erb +36 -0
- data/lib/llm_cost_tracker/llm_api_call.rb +15 -50
- data/lib/llm_cost_tracker/logging.rb +44 -0
- data/lib/llm_cost_tracker/middleware/faraday.rb +15 -12
- data/lib/llm_cost_tracker/parsed_usage.rb +45 -0
- data/lib/llm_cost_tracker/parsers/anthropic.rb +2 -3
- data/lib/llm_cost_tracker/parsers/base.rb +2 -0
- data/lib/llm_cost_tracker/parsers/gemini.rb +4 -4
- data/lib/llm_cost_tracker/parsers/openai.rb +4 -22
- data/lib/llm_cost_tracker/parsers/openai_compatible.rb +12 -8
- data/lib/llm_cost_tracker/parsers/openai_usage.rb +33 -0
- data/lib/llm_cost_tracker/price_registry.rb +36 -6
- data/lib/llm_cost_tracker/pricing.rb +36 -10
- data/lib/llm_cost_tracker/railtie.rb +5 -0
- data/lib/llm_cost_tracker/report.rb +29 -0
- data/lib/llm_cost_tracker/report_data.rb +84 -0
- data/lib/llm_cost_tracker/report_formatter.rb +59 -0
- data/lib/llm_cost_tracker/storage/active_record_backend.rb +19 -0
- data/lib/llm_cost_tracker/storage/active_record_store.rb +11 -11
- data/lib/llm_cost_tracker/storage/backends.rb +26 -0
- data/lib/llm_cost_tracker/storage/custom_backend.rb +16 -0
- data/lib/llm_cost_tracker/storage/log_backend.rb +28 -0
- data/lib/llm_cost_tracker/tag_accessors.rb +23 -0
- data/lib/llm_cost_tracker/tag_query.rb +38 -0
- data/lib/llm_cost_tracker/tags_column.rb +16 -0
- data/lib/llm_cost_tracker/tracker.rb +18 -67
- data/lib/llm_cost_tracker/unknown_pricing.rb +8 -15
- data/lib/llm_cost_tracker/value_object.rb +45 -0
- data/lib/llm_cost_tracker/version.rb +1 -1
- data/lib/llm_cost_tracker.rb +28 -13
- data/lib/tasks/llm_cost_tracker.rake +9 -0
- metadata +20 -1
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
require "faraday"
|
|
4
4
|
require "json"
|
|
5
5
|
|
|
6
|
+
require_relative "../logging"
|
|
7
|
+
|
|
6
8
|
module LlmCostTracker
|
|
7
9
|
module Middleware
|
|
8
10
|
class Faraday < ::Faraday::Middleware
|
|
@@ -21,13 +23,13 @@ module LlmCostTracker
|
|
|
21
23
|
started_at = monotonic_time
|
|
22
24
|
|
|
23
25
|
@app.call(request_env).on_complete do |response_env|
|
|
24
|
-
process(request_url, request_body, response_env, elapsed_ms(started_at))
|
|
26
|
+
process(request_env, request_url, request_body, response_env, elapsed_ms(started_at))
|
|
25
27
|
end
|
|
26
28
|
end
|
|
27
29
|
|
|
28
30
|
private
|
|
29
31
|
|
|
30
|
-
def process(request_url, request_body, response_env, latency_ms)
|
|
32
|
+
def process(request_env, request_url, request_body, response_env, latency_ms)
|
|
31
33
|
parser = Parsers::Registry.find_for(request_url)
|
|
32
34
|
return unless parser
|
|
33
35
|
|
|
@@ -40,18 +42,18 @@ module LlmCostTracker
|
|
|
40
42
|
input_tokens: parsed[:input_tokens],
|
|
41
43
|
output_tokens: parsed[:output_tokens],
|
|
42
44
|
latency_ms: latency_ms,
|
|
43
|
-
metadata:
|
|
45
|
+
metadata: resolved_tags(request_env).merge(parsed.metadata)
|
|
44
46
|
)
|
|
45
47
|
rescue LlmCostTracker::Error
|
|
46
48
|
raise
|
|
47
49
|
rescue StandardError => e
|
|
48
|
-
|
|
50
|
+
Logging.warn("Error processing response: #{e.class}: #{e.message}")
|
|
49
51
|
end
|
|
50
52
|
|
|
51
53
|
def parse_response(parser, request_url, request_body, response_env)
|
|
52
54
|
response_body = read_body(response_env.body)
|
|
53
55
|
unless response_body
|
|
54
|
-
|
|
56
|
+
Logging.warn(
|
|
55
57
|
"Unable to read response body for #{request_url}; streaming/SSE responses require manual tracking."
|
|
56
58
|
)
|
|
57
59
|
return nil
|
|
@@ -76,14 +78,15 @@ module LlmCostTracker
|
|
|
76
78
|
end
|
|
77
79
|
end
|
|
78
80
|
|
|
79
|
-
def
|
|
80
|
-
|
|
81
|
+
def resolved_tags(request_env)
|
|
82
|
+
tags = @tags.respond_to?(:call) ? call_tags(request_env) : @tags
|
|
83
|
+
return {} if tags.nil?
|
|
81
84
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
85
|
+
tags.to_h
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def call_tags(request_env)
|
|
89
|
+
@tags.arity.zero? ? @tags.call : @tags.call(request_env)
|
|
87
90
|
end
|
|
88
91
|
|
|
89
92
|
def monotonic_time
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "value_object"
|
|
4
|
+
|
|
5
|
+
module LlmCostTracker
|
|
6
|
+
ParsedUsage = ValueObject.define(
|
|
7
|
+
:provider,
|
|
8
|
+
:model,
|
|
9
|
+
:input_tokens,
|
|
10
|
+
:output_tokens,
|
|
11
|
+
:total_tokens,
|
|
12
|
+
:cached_input_tokens,
|
|
13
|
+
:cache_read_input_tokens,
|
|
14
|
+
:cache_creation_input_tokens,
|
|
15
|
+
:reasoning_tokens
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
ParsedUsage.const_set(:TRACKING_KEYS, %i[provider model input_tokens output_tokens total_tokens].freeze)
|
|
19
|
+
|
|
20
|
+
class << ParsedUsage
|
|
21
|
+
def build(**attributes)
|
|
22
|
+
new(
|
|
23
|
+
provider: attributes.fetch(:provider),
|
|
24
|
+
model: attributes.fetch(:model),
|
|
25
|
+
input_tokens: attributes.fetch(:input_tokens).to_i,
|
|
26
|
+
output_tokens: attributes.fetch(:output_tokens).to_i,
|
|
27
|
+
total_tokens: attributes.fetch(:total_tokens, 0).to_i,
|
|
28
|
+
cached_input_tokens: attributes[:cached_input_tokens],
|
|
29
|
+
cache_read_input_tokens: attributes[:cache_read_input_tokens],
|
|
30
|
+
cache_creation_input_tokens: attributes[:cache_creation_input_tokens],
|
|
31
|
+
reasoning_tokens: attributes[:reasoning_tokens]
|
|
32
|
+
)
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
class ParsedUsage
|
|
37
|
+
def metadata
|
|
38
|
+
except(*TRACKING_KEYS)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def to_h
|
|
42
|
+
super.compact
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "json"
|
|
4
3
|
require "uri"
|
|
5
4
|
|
|
6
5
|
require_relative "base"
|
|
@@ -26,7 +25,7 @@ module LlmCostTracker
|
|
|
26
25
|
|
|
27
26
|
request = safe_json_parse(request_body)
|
|
28
27
|
|
|
29
|
-
|
|
28
|
+
ParsedUsage.build(
|
|
30
29
|
provider: "anthropic",
|
|
31
30
|
model: response["model"] || request["model"],
|
|
32
31
|
input_tokens: usage["input_tokens"] || 0,
|
|
@@ -36,7 +35,7 @@ module LlmCostTracker
|
|
|
36
35
|
(usage["cache_creation_input_tokens"] || 0),
|
|
37
36
|
cache_read_input_tokens: usage["cache_read_input_tokens"],
|
|
38
37
|
cache_creation_input_tokens: usage["cache_creation_input_tokens"]
|
|
39
|
-
|
|
38
|
+
)
|
|
40
39
|
end
|
|
41
40
|
end
|
|
42
41
|
end
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "json"
|
|
4
3
|
require "uri"
|
|
5
4
|
|
|
6
5
|
require_relative "base"
|
|
@@ -9,10 +8,11 @@ module LlmCostTracker
|
|
|
9
8
|
module Parsers
|
|
10
9
|
class Gemini < Base
|
|
11
10
|
HOSTS = %w[generativelanguage.googleapis.com].freeze
|
|
11
|
+
TRACKED_PATH_PATTERN = %r{/models/[^/:]+:(?:generateContent|streamGenerateContent)\z}
|
|
12
12
|
|
|
13
13
|
def match?(url)
|
|
14
14
|
uri = URI.parse(url.to_s)
|
|
15
|
-
HOSTS.include?(uri.host.to_s.downcase)
|
|
15
|
+
HOSTS.include?(uri.host.to_s.downcase) && uri.path.match?(TRACKED_PATH_PATTERN)
|
|
16
16
|
rescue URI::InvalidURIError
|
|
17
17
|
false
|
|
18
18
|
end
|
|
@@ -27,14 +27,14 @@ module LlmCostTracker
|
|
|
27
27
|
# Extract model from URL: /v1beta/models/gemini-2.5-flash:generateContent
|
|
28
28
|
model = extract_model_from_url(request_url)
|
|
29
29
|
|
|
30
|
-
|
|
30
|
+
ParsedUsage.build(
|
|
31
31
|
provider: "gemini",
|
|
32
32
|
model: model,
|
|
33
33
|
input_tokens: usage["promptTokenCount"] || 0,
|
|
34
34
|
output_tokens: output_tokens(usage),
|
|
35
35
|
total_tokens: usage["totalTokenCount"] || 0,
|
|
36
36
|
cached_input_tokens: usage["cachedContentTokenCount"]
|
|
37
|
-
|
|
37
|
+
)
|
|
38
38
|
end
|
|
39
39
|
|
|
40
40
|
private
|
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "json"
|
|
4
3
|
require "uri"
|
|
5
4
|
|
|
6
5
|
require_relative "base"
|
|
6
|
+
require_relative "openai_usage"
|
|
7
7
|
|
|
8
8
|
module LlmCostTracker
|
|
9
9
|
module Parsers
|
|
10
10
|
class Openai < Base
|
|
11
|
+
include OpenaiUsage
|
|
12
|
+
|
|
11
13
|
HOSTS = %w[api.openai.com].freeze
|
|
12
14
|
TRACKED_PATHS = %w[/v1/chat/completions /v1/completions /v1/embeddings /v1/responses].freeze
|
|
13
15
|
|
|
@@ -19,22 +21,7 @@ module LlmCostTracker
|
|
|
19
21
|
end
|
|
20
22
|
|
|
21
23
|
def parse(request_url, request_body, response_status, response_body)
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
response = safe_json_parse(response_body)
|
|
25
|
-
usage = response["usage"]
|
|
26
|
-
return nil unless usage
|
|
27
|
-
|
|
28
|
-
request = safe_json_parse(request_body)
|
|
29
|
-
|
|
30
|
-
{
|
|
31
|
-
provider: provider_for(request_url),
|
|
32
|
-
model: response["model"] || request["model"],
|
|
33
|
-
input_tokens: usage["prompt_tokens"] || usage["input_tokens"] || 0,
|
|
34
|
-
output_tokens: usage["completion_tokens"] || usage["output_tokens"] || 0,
|
|
35
|
-
total_tokens: usage["total_tokens"] || 0,
|
|
36
|
-
cached_input_tokens: cached_input_tokens(usage)
|
|
37
|
-
}.compact
|
|
24
|
+
parse_openai_usage(request_url, request_body, response_status, response_body)
|
|
38
25
|
end
|
|
39
26
|
|
|
40
27
|
private
|
|
@@ -42,11 +29,6 @@ module LlmCostTracker
|
|
|
42
29
|
def provider_for(_request_url)
|
|
43
30
|
"openai"
|
|
44
31
|
end
|
|
45
|
-
|
|
46
|
-
def cached_input_tokens(usage)
|
|
47
|
-
details = usage["prompt_tokens_details"] || usage["input_tokens_details"] || {}
|
|
48
|
-
details["cached_tokens"]
|
|
49
|
-
end
|
|
50
32
|
end
|
|
51
33
|
end
|
|
52
34
|
end
|
|
@@ -1,10 +1,15 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
require "uri"
|
|
4
|
+
|
|
5
|
+
require_relative "base"
|
|
6
|
+
require_relative "openai_usage"
|
|
4
7
|
|
|
5
8
|
module LlmCostTracker
|
|
6
9
|
module Parsers
|
|
7
|
-
class OpenaiCompatible <
|
|
10
|
+
class OpenaiCompatible < Base
|
|
11
|
+
include OpenaiUsage
|
|
12
|
+
|
|
8
13
|
TRACKED_PATH_SUFFIXES = %w[/chat/completions /completions /embeddings /responses].freeze
|
|
9
14
|
|
|
10
15
|
def match?(url)
|
|
@@ -14,6 +19,10 @@ module LlmCostTracker
|
|
|
14
19
|
false
|
|
15
20
|
end
|
|
16
21
|
|
|
22
|
+
def parse(request_url, request_body, response_status, response_body)
|
|
23
|
+
parse_openai_usage(request_url, request_body, response_status, response_body)
|
|
24
|
+
end
|
|
25
|
+
|
|
17
26
|
private
|
|
18
27
|
|
|
19
28
|
def provider_for(request_url)
|
|
@@ -24,12 +33,7 @@ module LlmCostTracker
|
|
|
24
33
|
end
|
|
25
34
|
|
|
26
35
|
def provider_for_host(host)
|
|
27
|
-
host
|
|
28
|
-
provider_name = configured_providers[host] ||
|
|
29
|
-
configured_providers.find do |configured_host, _provider|
|
|
30
|
-
configured_host.to_s.downcase == host
|
|
31
|
-
end&.last
|
|
32
|
-
provider_name&.to_s
|
|
36
|
+
configured_providers[host.to_s.downcase]&.to_s
|
|
33
37
|
end
|
|
34
38
|
|
|
35
39
|
def configured_providers
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmCostTracker
|
|
4
|
+
module Parsers
|
|
5
|
+
module OpenaiUsage
|
|
6
|
+
private
|
|
7
|
+
|
|
8
|
+
def parse_openai_usage(request_url, request_body, response_status, response_body)
|
|
9
|
+
return nil unless response_status == 200
|
|
10
|
+
|
|
11
|
+
response = safe_json_parse(response_body)
|
|
12
|
+
usage = response["usage"]
|
|
13
|
+
return nil unless usage
|
|
14
|
+
|
|
15
|
+
request = safe_json_parse(request_body)
|
|
16
|
+
|
|
17
|
+
ParsedUsage.build(
|
|
18
|
+
provider: provider_for(request_url),
|
|
19
|
+
model: response["model"] || request["model"],
|
|
20
|
+
input_tokens: usage["prompt_tokens"] || usage["input_tokens"] || 0,
|
|
21
|
+
output_tokens: usage["completion_tokens"] || usage["output_tokens"] || 0,
|
|
22
|
+
total_tokens: usage["total_tokens"] || 0,
|
|
23
|
+
cached_input_tokens: cached_input_tokens(usage)
|
|
24
|
+
)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def cached_input_tokens(usage)
|
|
28
|
+
details = usage["prompt_tokens_details"] || usage["input_tokens_details"] || {}
|
|
29
|
+
details["cached_tokens"]
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -3,10 +3,15 @@
|
|
|
3
3
|
require "json"
|
|
4
4
|
require "yaml"
|
|
5
5
|
|
|
6
|
+
require_relative "logging"
|
|
7
|
+
|
|
6
8
|
module LlmCostTracker
|
|
7
9
|
module PriceRegistry
|
|
8
10
|
DEFAULT_PRICES_PATH = File.expand_path("prices.json", __dir__)
|
|
11
|
+
EMPTY_PRICES = {}.freeze
|
|
9
12
|
PRICE_KEYS = %w[input cached_input output cache_read_input cache_creation_input].freeze
|
|
13
|
+
METADATA_KEYS = %w[_source _updated _notes].freeze
|
|
14
|
+
FILE_PRICES_MUTEX = Mutex.new
|
|
10
15
|
NORMALIZE_PRICE_ENTRY = lambda do |price|
|
|
11
16
|
(price || {}).each_with_object({}) do |(key, value), normalized|
|
|
12
17
|
key = key.to_s
|
|
@@ -22,6 +27,8 @@ module LlmCostTracker
|
|
|
22
27
|
PRICE_METADATA = RAW_REGISTRY.fetch("metadata", {}).freeze
|
|
23
28
|
BUILTIN_PRICES = NORMALIZE_PRICE_TABLE.call(RAW_REGISTRY.fetch("models", {})).freeze
|
|
24
29
|
|
|
30
|
+
private_constant :FILE_PRICES_MUTEX
|
|
31
|
+
|
|
25
32
|
class << self
|
|
26
33
|
def builtin_prices
|
|
27
34
|
BUILTIN_PRICES
|
|
@@ -36,20 +43,43 @@ module LlmCostTracker
|
|
|
36
43
|
end
|
|
37
44
|
|
|
38
45
|
def file_prices(path)
|
|
39
|
-
return
|
|
46
|
+
return EMPTY_PRICES unless path
|
|
40
47
|
|
|
41
48
|
path = path.to_s
|
|
42
|
-
|
|
43
|
-
|
|
49
|
+
FILE_PRICES_MUTEX.synchronize do
|
|
50
|
+
cache_key = [path, File.mtime(path).to_f]
|
|
51
|
+
return @file_prices if @file_prices_cache_key == cache_key
|
|
44
52
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
53
|
+
@file_prices_cache_key = cache_key
|
|
54
|
+
@file_prices = normalize_file_prices(price_file_models(load_price_file(path)), path: path).freeze
|
|
55
|
+
end
|
|
56
|
+
rescue Errno::ENOENT, JSON::ParserError, Psych::Exception, ArgumentError, TypeError, NoMethodError => e
|
|
48
57
|
raise Error, "Unable to load prices_file #{path.inspect}: #{e.message}"
|
|
49
58
|
end
|
|
50
59
|
|
|
51
60
|
private
|
|
52
61
|
|
|
62
|
+
def normalize_file_prices(table, path:)
|
|
63
|
+
(table || {}).each_with_object({}) do |(model, price), normalized|
|
|
64
|
+
warn_unknown_keys(model, price, path)
|
|
65
|
+
normalized[model.to_s] = normalize_price_entry(price)
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def normalize_price_entry(price)
|
|
70
|
+
NORMALIZE_PRICE_ENTRY.call(price)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def warn_unknown_keys(model, price, path)
|
|
74
|
+
unknown_keys = price.keys.map(&:to_s) - PRICE_KEYS - METADATA_KEYS
|
|
75
|
+
return if unknown_keys.empty?
|
|
76
|
+
|
|
77
|
+
Logging.warn(
|
|
78
|
+
"Unknown price keys #{unknown_keys.inspect} for #{model.inspect} in #{path}; " \
|
|
79
|
+
"ignored. Known keys: #{(PRICE_KEYS + METADATA_KEYS).inspect}"
|
|
80
|
+
)
|
|
81
|
+
end
|
|
82
|
+
|
|
53
83
|
def load_price_file(path)
|
|
54
84
|
contents = File.read(path)
|
|
55
85
|
return YAML.safe_load(contents, aliases: false) || {} if yaml_file?(path)
|
|
@@ -1,11 +1,25 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module LlmCostTracker
|
|
4
|
-
#
|
|
4
|
+
# Calculates costs from price entries expressed in USD per 1M tokens.
|
|
5
5
|
module Pricing
|
|
6
6
|
PRICES = PriceRegistry.builtin_prices
|
|
7
|
+
PRICES_MUTEX = Mutex.new
|
|
8
|
+
SORTED_PRICE_KEYS_MUTEX = Mutex.new
|
|
9
|
+
|
|
10
|
+
private_constant :PRICES_MUTEX
|
|
11
|
+
private_constant :SORTED_PRICE_KEYS_MUTEX
|
|
7
12
|
|
|
8
13
|
class << self
|
|
14
|
+
# Estimate model cost from token counts.
|
|
15
|
+
#
|
|
16
|
+
# @param model [String] Provider model identifier.
|
|
17
|
+
# @param input_tokens [Integer] Input token count, including cached tokens if reported that way.
|
|
18
|
+
# @param output_tokens [Integer] Output token count.
|
|
19
|
+
# @param cached_input_tokens [Integer] OpenAI-style cached input tokens.
|
|
20
|
+
# @param cache_read_input_tokens [Integer] Anthropic-style cache read tokens.
|
|
21
|
+
# @param cache_creation_input_tokens [Integer] Anthropic-style cache creation tokens.
|
|
22
|
+
# @return [LlmCostTracker::Cost, nil] nil when no price is configured for the model.
|
|
9
23
|
def cost_for(model:, input_tokens:, output_tokens:, cached_input_tokens: 0,
|
|
10
24
|
cache_read_input_tokens: 0, cache_creation_input_tokens: 0)
|
|
11
25
|
prices = lookup(model)
|
|
@@ -15,7 +29,7 @@ module LlmCostTracker
|
|
|
15
29
|
cache_read_input_tokens, cache_creation_input_tokens)
|
|
16
30
|
costs = calculate_costs(token_counts, prices)
|
|
17
31
|
|
|
18
|
-
|
|
32
|
+
Cost.new(
|
|
19
33
|
input_cost: costs[:input].round(8),
|
|
20
34
|
cached_input_cost: costs[:cached_input].round(8),
|
|
21
35
|
cache_read_input_cost: costs[:cache_read_input].round(8),
|
|
@@ -23,7 +37,7 @@ module LlmCostTracker
|
|
|
23
37
|
output_cost: costs[:output].round(8),
|
|
24
38
|
total_cost: costs.values.sum.round(8),
|
|
25
39
|
currency: "USD"
|
|
26
|
-
|
|
40
|
+
)
|
|
27
41
|
end
|
|
28
42
|
|
|
29
43
|
def lookup(model)
|
|
@@ -43,9 +57,18 @@ module LlmCostTracker
|
|
|
43
57
|
end
|
|
44
58
|
|
|
45
59
|
def prices
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
60
|
+
file_prices = PriceRegistry.file_prices(LlmCostTracker.configuration.prices_file)
|
|
61
|
+
overrides = PriceRegistry.normalize_price_table(LlmCostTracker.configuration.pricing_overrides)
|
|
62
|
+
cache_key = [file_prices.object_id, LlmCostTracker.configuration.pricing_overrides.hash]
|
|
63
|
+
|
|
64
|
+
return @prices if @prices_cache_key == cache_key
|
|
65
|
+
|
|
66
|
+
PRICES_MUTEX.synchronize do
|
|
67
|
+
return @prices if @prices_cache_key == cache_key
|
|
68
|
+
|
|
69
|
+
@prices_cache_key = cache_key
|
|
70
|
+
@prices = PRICES.merge(file_prices).merge(overrides).freeze
|
|
71
|
+
end
|
|
49
72
|
end
|
|
50
73
|
|
|
51
74
|
private
|
|
@@ -97,11 +120,14 @@ module LlmCostTracker
|
|
|
97
120
|
end
|
|
98
121
|
|
|
99
122
|
def sorted_price_keys(table)
|
|
100
|
-
|
|
101
|
-
return @sorted_price_keys if @sorted_price_keys_cache_key == cache_key
|
|
123
|
+
return @sorted_price_keys if @sorted_price_keys_table.equal?(table)
|
|
102
124
|
|
|
103
|
-
|
|
104
|
-
|
|
125
|
+
SORTED_PRICE_KEYS_MUTEX.synchronize do
|
|
126
|
+
return @sorted_price_keys if @sorted_price_keys_table.equal?(table)
|
|
127
|
+
|
|
128
|
+
@sorted_price_keys_table = table
|
|
129
|
+
@sorted_price_keys = table.keys.sort_by { |key| -key.length }
|
|
130
|
+
end
|
|
105
131
|
end
|
|
106
132
|
end
|
|
107
133
|
end
|
|
@@ -5,10 +5,15 @@ module LlmCostTracker
|
|
|
5
5
|
generators do
|
|
6
6
|
require_relative "generators/llm_cost_tracker/add_latency_ms_generator"
|
|
7
7
|
require_relative "generators/llm_cost_tracker/install_generator"
|
|
8
|
+
require_relative "generators/llm_cost_tracker/prices_generator"
|
|
8
9
|
require_relative "generators/llm_cost_tracker/upgrade_cost_precision_generator"
|
|
9
10
|
require_relative "generators/llm_cost_tracker/upgrade_tags_to_jsonb_generator"
|
|
10
11
|
end
|
|
11
12
|
|
|
13
|
+
rake_tasks do
|
|
14
|
+
load File.expand_path("../tasks/llm_cost_tracker.rake", __dir__)
|
|
15
|
+
end
|
|
16
|
+
|
|
12
17
|
initializer "llm_cost_tracker.configure" do
|
|
13
18
|
# Auto-require ActiveRecord storage if configured
|
|
14
19
|
ActiveSupport.on_load(:active_record) do
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "report_data"
|
|
4
|
+
require_relative "report_formatter"
|
|
5
|
+
|
|
6
|
+
module LlmCostTracker
|
|
7
|
+
class Report
|
|
8
|
+
DEFAULT_DAYS = ReportData::DEFAULT_DAYS
|
|
9
|
+
|
|
10
|
+
class << self
|
|
11
|
+
# Render a terminal-friendly cost report from ActiveRecord storage.
|
|
12
|
+
#
|
|
13
|
+
# @param days [Integer] Number of trailing days to include.
|
|
14
|
+
# @param now [Time] Report end time.
|
|
15
|
+
# @return [String]
|
|
16
|
+
def generate(days: DEFAULT_DAYS, now: Time.now.utc)
|
|
17
|
+
ReportFormatter.new(data(days: days, now: now)).to_s
|
|
18
|
+
rescue LoadError => e
|
|
19
|
+
"Unable to build LLM cost report: ActiveRecord storage is unavailable (#{e.message})"
|
|
20
|
+
rescue StandardError => e
|
|
21
|
+
"Unable to build LLM cost report: #{e.class}: #{e.message}"
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def data(days: DEFAULT_DAYS, now: Time.now.utc)
|
|
25
|
+
ReportData.build(days: days, now: now)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "value_object"
|
|
4
|
+
|
|
5
|
+
module LlmCostTracker
|
|
6
|
+
TopCall = ValueObject.define(:provider, :model, :total_cost)
|
|
7
|
+
|
|
8
|
+
ReportData = ValueObject.define(
|
|
9
|
+
:days,
|
|
10
|
+
:from_time,
|
|
11
|
+
:to_time,
|
|
12
|
+
:total_cost,
|
|
13
|
+
:requests_count,
|
|
14
|
+
:average_latency_ms,
|
|
15
|
+
:unknown_pricing_count,
|
|
16
|
+
:cost_by_provider,
|
|
17
|
+
:cost_by_model,
|
|
18
|
+
:cost_by_feature,
|
|
19
|
+
:top_calls
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
ReportData.const_set(:DEFAULT_DAYS, 30)
|
|
23
|
+
ReportData.const_set(:TOP_LIMIT, 5)
|
|
24
|
+
|
|
25
|
+
class << ReportData
|
|
26
|
+
def build(days: ReportData::DEFAULT_DAYS, now: Time.now.utc)
|
|
27
|
+
require_relative "llm_api_call" unless defined?(LlmCostTracker::LlmApiCall)
|
|
28
|
+
|
|
29
|
+
days = normalized_days(days)
|
|
30
|
+
scope = LlmApiCall.where(tracked_at: from_time(days, now)..now)
|
|
31
|
+
|
|
32
|
+
new(
|
|
33
|
+
days: days,
|
|
34
|
+
from_time: from_time(days, now),
|
|
35
|
+
to_time: now,
|
|
36
|
+
total_cost: scope.sum(:total_cost).to_f,
|
|
37
|
+
requests_count: scope.count,
|
|
38
|
+
average_latency_ms: average_latency_ms(scope),
|
|
39
|
+
unknown_pricing_count: scope.where(total_cost: nil).count,
|
|
40
|
+
cost_by_provider: cost_by(scope, :provider),
|
|
41
|
+
cost_by_model: cost_by(scope, :model),
|
|
42
|
+
cost_by_feature: cost_by_feature(scope),
|
|
43
|
+
top_calls: top_calls(scope)
|
|
44
|
+
)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
private
|
|
48
|
+
|
|
49
|
+
def normalized_days(days)
|
|
50
|
+
days = days.to_i
|
|
51
|
+
days.positive? ? days : ReportData::DEFAULT_DAYS
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def from_time(days, now)
|
|
55
|
+
now - (days * 86_400)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def average_latency_ms(scope)
|
|
59
|
+
return nil unless LlmApiCall.latency_column?
|
|
60
|
+
|
|
61
|
+
scope.average(:latency_ms)&.to_f
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def cost_by(scope, column)
|
|
65
|
+
scope.group(column).sum(:total_cost).transform_values(&:to_f).sort_by { |_name, cost| -cost }
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def cost_by_feature(scope)
|
|
69
|
+
costs = Hash.new(0.0)
|
|
70
|
+
scope.select(:id, :tags, :total_cost).find_each do |call|
|
|
71
|
+
costs[call.feature || "(untagged)"] += call.total_cost.to_f
|
|
72
|
+
end
|
|
73
|
+
costs.sort_by { |_feature, cost| -cost }
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def top_calls(scope)
|
|
77
|
+
scope
|
|
78
|
+
.where.not(total_cost: nil)
|
|
79
|
+
.order(total_cost: :desc)
|
|
80
|
+
.limit(ReportData::TOP_LIMIT)
|
|
81
|
+
.map { |call| TopCall.new(provider: call.provider, model: call.model, total_cost: call.total_cost.to_f) }
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmCostTracker
|
|
4
|
+
class ReportFormatter
|
|
5
|
+
TOP_LIMIT = 5
|
|
6
|
+
|
|
7
|
+
def initialize(data)
|
|
8
|
+
@data = data
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def to_s
|
|
12
|
+
lines = ["LLM Cost Report (last #{@data.days} days)", ""]
|
|
13
|
+
append_summary(lines)
|
|
14
|
+
append_cost_section(lines, "By provider", @data.cost_by_provider)
|
|
15
|
+
append_cost_section(lines, "By model", @data.cost_by_model)
|
|
16
|
+
append_cost_section(lines, "By feature", @data.cost_by_feature)
|
|
17
|
+
append_top_calls(lines)
|
|
18
|
+
lines.join("\n")
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
private
|
|
22
|
+
|
|
23
|
+
def append_summary(lines)
|
|
24
|
+
lines << "Total cost: #{money(@data.total_cost)}"
|
|
25
|
+
lines << "Requests: #{@data.requests_count}"
|
|
26
|
+
lines << "Avg latency: #{average_latency}"
|
|
27
|
+
lines << "Unknown pricing: #{@data.unknown_pricing_count}"
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def append_cost_section(lines, title, rows)
|
|
31
|
+
lines << ""
|
|
32
|
+
lines << "#{title}:"
|
|
33
|
+
return lines << " none" if rows.empty?
|
|
34
|
+
|
|
35
|
+
rows.first(TOP_LIMIT).each do |name, cost|
|
|
36
|
+
lines << " #{name.to_s.ljust(28)} #{money(cost)}"
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def append_top_calls(lines)
|
|
41
|
+
lines << ""
|
|
42
|
+
lines << "Top expensive calls:"
|
|
43
|
+
return lines << " none" if @data.top_calls.empty?
|
|
44
|
+
|
|
45
|
+
@data.top_calls.first(TOP_LIMIT).each do |call|
|
|
46
|
+
label = "#{call.provider}/#{call.model}"
|
|
47
|
+
lines << " #{label.ljust(32)} #{money(call.total_cost)}"
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def average_latency
|
|
52
|
+
@data.average_latency_ms ? "#{@data.average_latency_ms.round}ms" : "n/a"
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def money(value)
|
|
56
|
+
"$#{format('%.6f', value.to_f)}"
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmCostTracker
|
|
4
|
+
module Storage
|
|
5
|
+
module ActiveRecordBackend
|
|
6
|
+
class << self
|
|
7
|
+
def save(event, **_options)
|
|
8
|
+
require_relative "../llm_api_call" unless defined?(LlmCostTracker::LlmApiCall)
|
|
9
|
+
require_relative "active_record_store" unless defined?(LlmCostTracker::Storage::ActiveRecordStore)
|
|
10
|
+
|
|
11
|
+
ActiveRecordStore.save(event)
|
|
12
|
+
event
|
|
13
|
+
rescue LoadError => e
|
|
14
|
+
raise Error, "ActiveRecord storage requires the active_record gem: #{e.message}"
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|