llm_cost_tracker 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +69 -0
- data/README.md +333 -30
- data/lib/llm_cost_tracker/budget.rb +85 -0
- data/lib/llm_cost_tracker/configuration.rb +82 -3
- data/lib/llm_cost_tracker/cost.rb +15 -0
- data/lib/llm_cost_tracker/errors.rb +37 -0
- data/lib/llm_cost_tracker/event.rb +24 -0
- data/lib/llm_cost_tracker/event_metadata.rb +54 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_latency_ms_generator.rb +29 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/prices_generator.rb +20 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_latency_ms_to_llm_api_calls.rb.erb +9 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +16 -4
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +14 -1
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/llm_cost_tracker_prices.yml.erb +36 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_cost_precision.rb.erb +15 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_llm_api_call_tags_to_jsonb.rb.erb +41 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_cost_precision_generator.rb +29 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_tags_to_jsonb_generator.rb +29 -0
- data/lib/llm_cost_tracker/llm_api_call.rb +45 -14
- data/lib/llm_cost_tracker/logging.rb +44 -0
- data/lib/llm_cost_tracker/middleware/faraday.rb +54 -13
- data/lib/llm_cost_tracker/parsed_usage.rb +45 -0
- data/lib/llm_cost_tracker/parsers/anthropic.rb +6 -4
- data/lib/llm_cost_tracker/parsers/base.rb +2 -0
- data/lib/llm_cost_tracker/parsers/gemini.rb +12 -5
- data/lib/llm_cost_tracker/parsers/openai.rb +11 -22
- data/lib/llm_cost_tracker/parsers/openai_compatible.rb +48 -0
- data/lib/llm_cost_tracker/parsers/openai_usage.rb +33 -0
- data/lib/llm_cost_tracker/parsers/registry.rb +16 -7
- data/lib/llm_cost_tracker/price_registry.rb +99 -0
- data/lib/llm_cost_tracker/prices.json +51 -0
- data/lib/llm_cost_tracker/pricing.rb +103 -77
- data/lib/llm_cost_tracker/railtie.rb +8 -0
- data/lib/llm_cost_tracker/report.rb +29 -0
- data/lib/llm_cost_tracker/report_data.rb +84 -0
- data/lib/llm_cost_tracker/report_formatter.rb +59 -0
- data/lib/llm_cost_tracker/storage/active_record_backend.rb +19 -0
- data/lib/llm_cost_tracker/storage/active_record_store.rb +21 -12
- data/lib/llm_cost_tracker/storage/backends.rb +26 -0
- data/lib/llm_cost_tracker/storage/custom_backend.rb +16 -0
- data/lib/llm_cost_tracker/storage/log_backend.rb +28 -0
- data/lib/llm_cost_tracker/tag_accessors.rb +23 -0
- data/lib/llm_cost_tracker/tag_query.rb +38 -0
- data/lib/llm_cost_tracker/tags_column.rb +16 -0
- data/lib/llm_cost_tracker/tracker.rb +43 -97
- data/lib/llm_cost_tracker/unknown_pricing.rb +40 -0
- data/lib/llm_cost_tracker/value_object.rb +45 -0
- data/lib/llm_cost_tracker/version.rb +1 -1
- data/lib/llm_cost_tracker.rb +49 -6
- data/lib/tasks/llm_cost_tracker.rake +9 -0
- data/llm_cost_tracker.gemspec +4 -3
- metadata +39 -6
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "faraday"
|
|
4
|
+
require "json"
|
|
5
|
+
|
|
6
|
+
require_relative "../logging"
|
|
4
7
|
|
|
5
8
|
module LlmCostTracker
|
|
6
9
|
module Middleware
|
|
@@ -14,25 +17,23 @@ module LlmCostTracker
|
|
|
14
17
|
return @app.call(request_env) unless LlmCostTracker.configuration.enabled
|
|
15
18
|
|
|
16
19
|
request_url = request_env.url.to_s
|
|
17
|
-
request_body = read_body(request_env.body)
|
|
20
|
+
request_body = read_body(request_env.body) || ""
|
|
21
|
+
|
|
22
|
+
enforce_budget_before_request(request_url)
|
|
23
|
+
started_at = monotonic_time
|
|
18
24
|
|
|
19
25
|
@app.call(request_env).on_complete do |response_env|
|
|
20
|
-
process(request_url, request_body, response_env)
|
|
26
|
+
process(request_env, request_url, request_body, response_env, elapsed_ms(started_at))
|
|
21
27
|
end
|
|
22
28
|
end
|
|
23
29
|
|
|
24
30
|
private
|
|
25
31
|
|
|
26
|
-
def process(request_url, request_body, response_env)
|
|
32
|
+
def process(request_env, request_url, request_body, response_env, latency_ms)
|
|
27
33
|
parser = Parsers::Registry.find_for(request_url)
|
|
28
34
|
return unless parser
|
|
29
35
|
|
|
30
|
-
parsed = parser
|
|
31
|
-
request_url,
|
|
32
|
-
request_body,
|
|
33
|
-
response_env.status,
|
|
34
|
-
read_body(response_env.body)
|
|
35
|
-
)
|
|
36
|
+
parsed = parse_response(parser, request_url, request_body, response_env)
|
|
36
37
|
return unless parsed
|
|
37
38
|
|
|
38
39
|
Tracker.record(
|
|
@@ -40,21 +41,61 @@ module LlmCostTracker
|
|
|
40
41
|
model: parsed[:model],
|
|
41
42
|
input_tokens: parsed[:input_tokens],
|
|
42
43
|
output_tokens: parsed[:output_tokens],
|
|
43
|
-
|
|
44
|
+
latency_ms: latency_ms,
|
|
45
|
+
metadata: resolved_tags(request_env).merge(parsed.metadata)
|
|
44
46
|
)
|
|
47
|
+
rescue LlmCostTracker::Error
|
|
48
|
+
raise
|
|
45
49
|
rescue StandardError => e
|
|
46
|
-
|
|
50
|
+
Logging.warn("Error processing response: #{e.class}: #{e.message}")
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def parse_response(parser, request_url, request_body, response_env)
|
|
54
|
+
response_body = read_body(response_env.body)
|
|
55
|
+
unless response_body
|
|
56
|
+
Logging.warn(
|
|
57
|
+
"Unable to read response body for #{request_url}; streaming/SSE responses require manual tracking."
|
|
58
|
+
)
|
|
59
|
+
return nil
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
parser.parse(request_url, request_body, response_env.status, response_body)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def enforce_budget_before_request(request_url)
|
|
66
|
+
return unless Parsers::Registry.find_for(request_url)
|
|
47
67
|
|
|
48
|
-
|
|
68
|
+
Tracker.enforce_budget!
|
|
49
69
|
end
|
|
50
70
|
|
|
51
71
|
def read_body(body)
|
|
52
72
|
case body
|
|
53
73
|
when String then body
|
|
54
74
|
when nil then ""
|
|
55
|
-
|
|
75
|
+
when Hash, Array then body.to_json
|
|
76
|
+
else
|
|
77
|
+
body.respond_to?(:to_str) ? body.to_str : nil
|
|
56
78
|
end
|
|
57
79
|
end
|
|
80
|
+
|
|
81
|
+
def resolved_tags(request_env)
|
|
82
|
+
tags = @tags.respond_to?(:call) ? call_tags(request_env) : @tags
|
|
83
|
+
return {} if tags.nil?
|
|
84
|
+
|
|
85
|
+
tags.to_h
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def call_tags(request_env)
|
|
89
|
+
@tags.arity.zero? ? @tags.call : @tags.call(request_env)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def monotonic_time
|
|
93
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def elapsed_ms(started_at)
|
|
97
|
+
((monotonic_time - started_at) * 1000).round
|
|
98
|
+
end
|
|
58
99
|
end
|
|
59
100
|
end
|
|
60
101
|
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "value_object"
|
|
4
|
+
|
|
5
|
+
module LlmCostTracker
|
|
6
|
+
ParsedUsage = ValueObject.define(
|
|
7
|
+
:provider,
|
|
8
|
+
:model,
|
|
9
|
+
:input_tokens,
|
|
10
|
+
:output_tokens,
|
|
11
|
+
:total_tokens,
|
|
12
|
+
:cached_input_tokens,
|
|
13
|
+
:cache_read_input_tokens,
|
|
14
|
+
:cache_creation_input_tokens,
|
|
15
|
+
:reasoning_tokens
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
ParsedUsage.const_set(:TRACKING_KEYS, %i[provider model input_tokens output_tokens total_tokens].freeze)
|
|
19
|
+
|
|
20
|
+
class << ParsedUsage
|
|
21
|
+
def build(**attributes)
|
|
22
|
+
new(
|
|
23
|
+
provider: attributes.fetch(:provider),
|
|
24
|
+
model: attributes.fetch(:model),
|
|
25
|
+
input_tokens: attributes.fetch(:input_tokens).to_i,
|
|
26
|
+
output_tokens: attributes.fetch(:output_tokens).to_i,
|
|
27
|
+
total_tokens: attributes.fetch(:total_tokens, 0).to_i,
|
|
28
|
+
cached_input_tokens: attributes[:cached_input_tokens],
|
|
29
|
+
cache_read_input_tokens: attributes[:cache_read_input_tokens],
|
|
30
|
+
cache_creation_input_tokens: attributes[:cache_creation_input_tokens],
|
|
31
|
+
reasoning_tokens: attributes[:reasoning_tokens]
|
|
32
|
+
)
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
class ParsedUsage
|
|
37
|
+
def metadata
|
|
38
|
+
except(*TRACKING_KEYS)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def to_h
|
|
42
|
+
super.compact
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "
|
|
3
|
+
require "uri"
|
|
4
|
+
|
|
5
|
+
require_relative "base"
|
|
4
6
|
|
|
5
7
|
module LlmCostTracker
|
|
6
8
|
module Parsers
|
|
@@ -9,7 +11,7 @@ module LlmCostTracker
|
|
|
9
11
|
|
|
10
12
|
def match?(url)
|
|
11
13
|
uri = URI.parse(url.to_s)
|
|
12
|
-
HOSTS.include?(uri.host) && uri.path.include?("/v1/messages")
|
|
14
|
+
HOSTS.include?(uri.host.to_s.downcase) && uri.path.include?("/v1/messages")
|
|
13
15
|
rescue URI::InvalidURIError
|
|
14
16
|
false
|
|
15
17
|
end
|
|
@@ -23,7 +25,7 @@ module LlmCostTracker
|
|
|
23
25
|
|
|
24
26
|
request = safe_json_parse(request_body)
|
|
25
27
|
|
|
26
|
-
|
|
28
|
+
ParsedUsage.build(
|
|
27
29
|
provider: "anthropic",
|
|
28
30
|
model: response["model"] || request["model"],
|
|
29
31
|
input_tokens: usage["input_tokens"] || 0,
|
|
@@ -33,7 +35,7 @@ module LlmCostTracker
|
|
|
33
35
|
(usage["cache_creation_input_tokens"] || 0),
|
|
34
36
|
cache_read_input_tokens: usage["cache_read_input_tokens"],
|
|
35
37
|
cache_creation_input_tokens: usage["cache_creation_input_tokens"]
|
|
36
|
-
|
|
38
|
+
)
|
|
37
39
|
end
|
|
38
40
|
end
|
|
39
41
|
end
|
|
@@ -1,15 +1,18 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "
|
|
3
|
+
require "uri"
|
|
4
|
+
|
|
5
|
+
require_relative "base"
|
|
4
6
|
|
|
5
7
|
module LlmCostTracker
|
|
6
8
|
module Parsers
|
|
7
9
|
class Gemini < Base
|
|
8
10
|
HOSTS = %w[generativelanguage.googleapis.com].freeze
|
|
11
|
+
TRACKED_PATH_PATTERN = %r{/models/[^/:]+:(?:generateContent|streamGenerateContent)\z}
|
|
9
12
|
|
|
10
13
|
def match?(url)
|
|
11
14
|
uri = URI.parse(url.to_s)
|
|
12
|
-
HOSTS.include?(uri.host)
|
|
15
|
+
HOSTS.include?(uri.host.to_s.downcase) && uri.path.match?(TRACKED_PATH_PATTERN)
|
|
13
16
|
rescue URI::InvalidURIError
|
|
14
17
|
false
|
|
15
18
|
end
|
|
@@ -24,18 +27,22 @@ module LlmCostTracker
|
|
|
24
27
|
# Extract model from URL: /v1beta/models/gemini-2.5-flash:generateContent
|
|
25
28
|
model = extract_model_from_url(request_url)
|
|
26
29
|
|
|
27
|
-
|
|
30
|
+
ParsedUsage.build(
|
|
28
31
|
provider: "gemini",
|
|
29
32
|
model: model,
|
|
30
33
|
input_tokens: usage["promptTokenCount"] || 0,
|
|
31
|
-
output_tokens: usage
|
|
34
|
+
output_tokens: output_tokens(usage),
|
|
32
35
|
total_tokens: usage["totalTokenCount"] || 0,
|
|
33
36
|
cached_input_tokens: usage["cachedContentTokenCount"]
|
|
34
|
-
|
|
37
|
+
)
|
|
35
38
|
end
|
|
36
39
|
|
|
37
40
|
private
|
|
38
41
|
|
|
42
|
+
def output_tokens(usage)
|
|
43
|
+
(usage["candidatesTokenCount"] || 0) + (usage["thoughtsTokenCount"] || 0)
|
|
44
|
+
end
|
|
45
|
+
|
|
39
46
|
def extract_model_from_url(url)
|
|
40
47
|
uri = URI.parse(url.to_s)
|
|
41
48
|
match = uri.path.match(%r{/models/([^/:]+)})
|
|
@@ -1,44 +1,33 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "
|
|
3
|
+
require "uri"
|
|
4
|
+
|
|
5
|
+
require_relative "base"
|
|
6
|
+
require_relative "openai_usage"
|
|
4
7
|
|
|
5
8
|
module LlmCostTracker
|
|
6
9
|
module Parsers
|
|
7
10
|
class Openai < Base
|
|
11
|
+
include OpenaiUsage
|
|
12
|
+
|
|
8
13
|
HOSTS = %w[api.openai.com].freeze
|
|
9
14
|
TRACKED_PATHS = %w[/v1/chat/completions /v1/completions /v1/embeddings /v1/responses].freeze
|
|
10
15
|
|
|
11
16
|
def match?(url)
|
|
12
17
|
uri = URI.parse(url.to_s)
|
|
13
|
-
HOSTS.include?(uri.host) && TRACKED_PATHS.include?(uri.path)
|
|
18
|
+
HOSTS.include?(uri.host.to_s.downcase) && TRACKED_PATHS.include?(uri.path)
|
|
14
19
|
rescue URI::InvalidURIError
|
|
15
20
|
false
|
|
16
21
|
end
|
|
17
22
|
|
|
18
|
-
def parse(
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
response = safe_json_parse(response_body)
|
|
22
|
-
usage = response["usage"]
|
|
23
|
-
return nil unless usage
|
|
24
|
-
|
|
25
|
-
request = safe_json_parse(request_body)
|
|
26
|
-
|
|
27
|
-
{
|
|
28
|
-
provider: "openai",
|
|
29
|
-
model: response["model"] || request["model"],
|
|
30
|
-
input_tokens: usage["prompt_tokens"] || usage["input_tokens"] || 0,
|
|
31
|
-
output_tokens: usage["completion_tokens"] || usage["output_tokens"] || 0,
|
|
32
|
-
total_tokens: usage["total_tokens"] || 0,
|
|
33
|
-
cached_input_tokens: cached_input_tokens(usage)
|
|
34
|
-
}.compact
|
|
23
|
+
def parse(request_url, request_body, response_status, response_body)
|
|
24
|
+
parse_openai_usage(request_url, request_body, response_status, response_body)
|
|
35
25
|
end
|
|
36
26
|
|
|
37
27
|
private
|
|
38
28
|
|
|
39
|
-
def
|
|
40
|
-
|
|
41
|
-
details["cached_tokens"]
|
|
29
|
+
def provider_for(_request_url)
|
|
30
|
+
"openai"
|
|
42
31
|
end
|
|
43
32
|
end
|
|
44
33
|
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "uri"
|
|
4
|
+
|
|
5
|
+
require_relative "base"
|
|
6
|
+
require_relative "openai_usage"
|
|
7
|
+
|
|
8
|
+
module LlmCostTracker
|
|
9
|
+
module Parsers
|
|
10
|
+
class OpenaiCompatible < Base
|
|
11
|
+
include OpenaiUsage
|
|
12
|
+
|
|
13
|
+
TRACKED_PATH_SUFFIXES = %w[/chat/completions /completions /embeddings /responses].freeze
|
|
14
|
+
|
|
15
|
+
def match?(url)
|
|
16
|
+
uri = URI.parse(url.to_s)
|
|
17
|
+
!provider_for_host(uri.host).nil? && tracked_path?(uri.path)
|
|
18
|
+
rescue URI::InvalidURIError
|
|
19
|
+
false
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def parse(request_url, request_body, response_status, response_body)
|
|
23
|
+
parse_openai_usage(request_url, request_body, response_status, response_body)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
private
|
|
27
|
+
|
|
28
|
+
def provider_for(request_url)
|
|
29
|
+
uri = URI.parse(request_url.to_s)
|
|
30
|
+
provider_for_host(uri.host) || "openai_compatible"
|
|
31
|
+
rescue URI::InvalidURIError
|
|
32
|
+
"openai_compatible"
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def provider_for_host(host)
|
|
36
|
+
configured_providers[host.to_s.downcase]&.to_s
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def configured_providers
|
|
40
|
+
LlmCostTracker.configuration.openai_compatible_providers
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def tracked_path?(path)
|
|
44
|
+
TRACKED_PATH_SUFFIXES.any? { |suffix| path == suffix || path.end_with?(suffix) }
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmCostTracker
|
|
4
|
+
module Parsers
|
|
5
|
+
module OpenaiUsage
|
|
6
|
+
private
|
|
7
|
+
|
|
8
|
+
def parse_openai_usage(request_url, request_body, response_status, response_body)
|
|
9
|
+
return nil unless response_status == 200
|
|
10
|
+
|
|
11
|
+
response = safe_json_parse(response_body)
|
|
12
|
+
usage = response["usage"]
|
|
13
|
+
return nil unless usage
|
|
14
|
+
|
|
15
|
+
request = safe_json_parse(request_body)
|
|
16
|
+
|
|
17
|
+
ParsedUsage.build(
|
|
18
|
+
provider: provider_for(request_url),
|
|
19
|
+
model: response["model"] || request["model"],
|
|
20
|
+
input_tokens: usage["prompt_tokens"] || usage["input_tokens"] || 0,
|
|
21
|
+
output_tokens: usage["completion_tokens"] || usage["output_tokens"] || 0,
|
|
22
|
+
total_tokens: usage["total_tokens"] || 0,
|
|
23
|
+
cached_input_tokens: cached_input_tokens(usage)
|
|
24
|
+
)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def cached_input_tokens(usage)
|
|
28
|
+
details = usage["prompt_tokens_details"] || usage["input_tokens_details"] || {}
|
|
29
|
+
details["cached_tokens"]
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -4,16 +4,14 @@ module LlmCostTracker
|
|
|
4
4
|
module Parsers
|
|
5
5
|
class Registry
|
|
6
6
|
class << self
|
|
7
|
+
PARSERS_MUTEX = Mutex.new
|
|
8
|
+
|
|
7
9
|
def parsers
|
|
8
|
-
@parsers ||=
|
|
9
|
-
Openai.new,
|
|
10
|
-
Anthropic.new,
|
|
11
|
-
Gemini.new
|
|
12
|
-
]
|
|
10
|
+
@parsers || PARSERS_MUTEX.synchronize { @parsers ||= default_parsers }
|
|
13
11
|
end
|
|
14
12
|
|
|
15
13
|
def register(parser)
|
|
16
|
-
parsers.unshift(parser)
|
|
14
|
+
PARSERS_MUTEX.synchronize { parsers.unshift(parser) }
|
|
17
15
|
end
|
|
18
16
|
|
|
19
17
|
def find_for(url)
|
|
@@ -21,7 +19,18 @@ module LlmCostTracker
|
|
|
21
19
|
end
|
|
22
20
|
|
|
23
21
|
def reset!
|
|
24
|
-
@parsers = nil
|
|
22
|
+
PARSERS_MUTEX.synchronize { @parsers = nil }
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
private
|
|
26
|
+
|
|
27
|
+
def default_parsers
|
|
28
|
+
[
|
|
29
|
+
Openai.new,
|
|
30
|
+
OpenaiCompatible.new,
|
|
31
|
+
Anthropic.new,
|
|
32
|
+
Gemini.new
|
|
33
|
+
]
|
|
25
34
|
end
|
|
26
35
|
end
|
|
27
36
|
end
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "yaml"
|
|
5
|
+
|
|
6
|
+
require_relative "logging"
|
|
7
|
+
|
|
8
|
+
module LlmCostTracker
|
|
9
|
+
module PriceRegistry
|
|
10
|
+
DEFAULT_PRICES_PATH = File.expand_path("prices.json", __dir__)
|
|
11
|
+
EMPTY_PRICES = {}.freeze
|
|
12
|
+
PRICE_KEYS = %w[input cached_input output cache_read_input cache_creation_input].freeze
|
|
13
|
+
METADATA_KEYS = %w[_source _updated _notes].freeze
|
|
14
|
+
FILE_PRICES_MUTEX = Mutex.new
|
|
15
|
+
NORMALIZE_PRICE_ENTRY = lambda do |price|
|
|
16
|
+
(price || {}).each_with_object({}) do |(key, value), normalized|
|
|
17
|
+
key = key.to_s
|
|
18
|
+
normalized[key.to_sym] = Float(value) if PRICE_KEYS.include?(key)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
NORMALIZE_PRICE_TABLE = lambda do |table|
|
|
22
|
+
(table || {}).each_with_object({}) do |(model, price), normalized|
|
|
23
|
+
normalized[model.to_s] = NORMALIZE_PRICE_ENTRY.call(price)
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
RAW_REGISTRY = JSON.parse(File.read(DEFAULT_PRICES_PATH)).freeze
|
|
27
|
+
PRICE_METADATA = RAW_REGISTRY.fetch("metadata", {}).freeze
|
|
28
|
+
BUILTIN_PRICES = NORMALIZE_PRICE_TABLE.call(RAW_REGISTRY.fetch("models", {})).freeze
|
|
29
|
+
|
|
30
|
+
private_constant :FILE_PRICES_MUTEX
|
|
31
|
+
|
|
32
|
+
class << self
|
|
33
|
+
def builtin_prices
|
|
34
|
+
BUILTIN_PRICES
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def metadata
|
|
38
|
+
PRICE_METADATA
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def normalize_price_table(table)
|
|
42
|
+
NORMALIZE_PRICE_TABLE.call(table)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def file_prices(path)
|
|
46
|
+
return EMPTY_PRICES unless path
|
|
47
|
+
|
|
48
|
+
path = path.to_s
|
|
49
|
+
FILE_PRICES_MUTEX.synchronize do
|
|
50
|
+
cache_key = [path, File.mtime(path).to_f]
|
|
51
|
+
return @file_prices if @file_prices_cache_key == cache_key
|
|
52
|
+
|
|
53
|
+
@file_prices_cache_key = cache_key
|
|
54
|
+
@file_prices = normalize_file_prices(price_file_models(load_price_file(path)), path: path).freeze
|
|
55
|
+
end
|
|
56
|
+
rescue Errno::ENOENT, JSON::ParserError, Psych::Exception, ArgumentError, TypeError, NoMethodError => e
|
|
57
|
+
raise Error, "Unable to load prices_file #{path.inspect}: #{e.message}"
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
private
|
|
61
|
+
|
|
62
|
+
def normalize_file_prices(table, path:)
|
|
63
|
+
(table || {}).each_with_object({}) do |(model, price), normalized|
|
|
64
|
+
warn_unknown_keys(model, price, path)
|
|
65
|
+
normalized[model.to_s] = normalize_price_entry(price)
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def normalize_price_entry(price)
|
|
70
|
+
NORMALIZE_PRICE_ENTRY.call(price)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def warn_unknown_keys(model, price, path)
|
|
74
|
+
unknown_keys = price.keys.map(&:to_s) - PRICE_KEYS - METADATA_KEYS
|
|
75
|
+
return if unknown_keys.empty?
|
|
76
|
+
|
|
77
|
+
Logging.warn(
|
|
78
|
+
"Unknown price keys #{unknown_keys.inspect} for #{model.inspect} in #{path}; " \
|
|
79
|
+
"ignored. Known keys: #{(PRICE_KEYS + METADATA_KEYS).inspect}"
|
|
80
|
+
)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def load_price_file(path)
|
|
84
|
+
contents = File.read(path)
|
|
85
|
+
return YAML.safe_load(contents, aliases: false) || {} if yaml_file?(path)
|
|
86
|
+
|
|
87
|
+
JSON.parse(contents)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def yaml_file?(path)
|
|
91
|
+
%w[.yaml .yml].include?(File.extname(path).downcase)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def price_file_models(registry)
|
|
95
|
+
registry.fetch("models", registry)
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
{
|
|
2
|
+
"metadata": {
|
|
3
|
+
"updated_at": "2026-04-18",
|
|
4
|
+
"currency": "USD",
|
|
5
|
+
"unit": "1M tokens",
|
|
6
|
+
"source_urls": [
|
|
7
|
+
"https://openai.com/api/pricing",
|
|
8
|
+
"https://www.anthropic.com/pricing",
|
|
9
|
+
"https://ai.google.dev/gemini-api/docs/pricing"
|
|
10
|
+
]
|
|
11
|
+
},
|
|
12
|
+
"models": {
|
|
13
|
+
"gpt-5.2": { "input": 1.75, "cached_input": 0.175, "output": 14.0 },
|
|
14
|
+
"gpt-5.1": { "input": 1.25, "cached_input": 0.125, "output": 10.0 },
|
|
15
|
+
"gpt-5": { "input": 1.25, "cached_input": 0.125, "output": 10.0 },
|
|
16
|
+
"gpt-5-mini": { "input": 0.25, "cached_input": 0.025, "output": 2.0 },
|
|
17
|
+
"gpt-5-nano": { "input": 0.05, "cached_input": 0.005, "output": 0.4 },
|
|
18
|
+
"gpt-4.1": { "input": 2.0, "cached_input": 0.5, "output": 8.0 },
|
|
19
|
+
"gpt-4.1-mini": { "input": 0.4, "cached_input": 0.1, "output": 1.6 },
|
|
20
|
+
"gpt-4.1-nano": { "input": 0.1, "cached_input": 0.025, "output": 0.4 },
|
|
21
|
+
"gpt-4o-2024-05-13": { "input": 5.0, "output": 15.0 },
|
|
22
|
+
"gpt-4o": { "input": 2.5, "cached_input": 1.25, "output": 10.0 },
|
|
23
|
+
"gpt-4o-mini": { "input": 0.15, "cached_input": 0.075, "output": 0.6 },
|
|
24
|
+
"gpt-4-turbo": { "input": 10.0, "output": 30.0 },
|
|
25
|
+
"gpt-4": { "input": 30.0, "output": 60.0 },
|
|
26
|
+
"gpt-3.5-turbo": { "input": 0.5, "output": 1.5 },
|
|
27
|
+
"o1": { "input": 15.0, "cached_input": 7.5, "output": 60.0 },
|
|
28
|
+
"o1-mini": { "input": 1.1, "cached_input": 0.55, "output": 4.4 },
|
|
29
|
+
"o3": { "input": 2.0, "cached_input": 0.5, "output": 8.0 },
|
|
30
|
+
"o3-mini": { "input": 1.1, "cached_input": 0.55, "output": 4.4 },
|
|
31
|
+
"o4-mini": { "input": 1.1, "cached_input": 0.275, "output": 4.4 },
|
|
32
|
+
"claude-sonnet-4-6": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
|
|
33
|
+
"claude-opus-4-6": { "input": 5.0, "output": 25.0, "cache_read_input": 0.5, "cache_creation_input": 6.25 },
|
|
34
|
+
"claude-opus-4-1": { "input": 15.0, "output": 75.0, "cache_read_input": 1.5, "cache_creation_input": 18.75 },
|
|
35
|
+
"claude-opus-4": { "input": 15.0, "output": 75.0, "cache_read_input": 1.5, "cache_creation_input": 18.75 },
|
|
36
|
+
"claude-sonnet-4-5": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
|
|
37
|
+
"claude-sonnet-4": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
|
|
38
|
+
"claude-haiku-4-5": { "input": 1.0, "output": 5.0, "cache_read_input": 0.1, "cache_creation_input": 1.25 },
|
|
39
|
+
"claude-3-7-sonnet": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
|
|
40
|
+
"claude-3-5-sonnet": { "input": 3.0, "output": 15.0, "cache_read_input": 0.3, "cache_creation_input": 3.75 },
|
|
41
|
+
"claude-3-5-haiku": { "input": 0.8, "output": 4.0, "cache_read_input": 0.08, "cache_creation_input": 1.0 },
|
|
42
|
+
"claude-3-opus": { "input": 15.0, "output": 75.0, "cache_read_input": 1.5, "cache_creation_input": 18.75 },
|
|
43
|
+
"gemini-2.5-pro": { "input": 1.25, "cached_input": 0.125, "output": 10.0 },
|
|
44
|
+
"gemini-2.5-flash": { "input": 0.3, "cached_input": 0.03, "output": 2.5 },
|
|
45
|
+
"gemini-2.5-flash-lite": { "input": 0.1, "cached_input": 0.01, "output": 0.4 },
|
|
46
|
+
"gemini-2.0-flash": { "input": 0.1, "cached_input": 0.025, "output": 0.4 },
|
|
47
|
+
"gemini-2.0-flash-lite": { "input": 0.075, "output": 0.3 },
|
|
48
|
+
"gemini-1.5-pro": { "input": 1.25, "output": 5.0 },
|
|
49
|
+
"gemini-1.5-flash": { "input": 0.075, "output": 0.3 }
|
|
50
|
+
}
|
|
51
|
+
}
|