llm_cost_tracker 0.5.1 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +43 -0
- data/README.md +18 -9
- data/app/controllers/llm_cost_tracker/calls_controller.rb +2 -1
- data/app/controllers/llm_cost_tracker/dashboard_controller.rb +3 -15
- data/app/controllers/llm_cost_tracker/tags_controller.rb +7 -6
- data/app/helpers/llm_cost_tracker/application_helper.rb +21 -6
- data/app/helpers/llm_cost_tracker/dashboard_filter_options_helper.rb +3 -1
- data/app/services/llm_cost_tracker/dashboard/date_range.rb +42 -0
- data/app/services/llm_cost_tracker/dashboard/filter.rb +6 -8
- data/app/services/llm_cost_tracker/dashboard/spend_anomaly.rb +6 -5
- data/app/services/llm_cost_tracker/dashboard/tag_breakdown.rb +74 -18
- data/app/services/llm_cost_tracker/dashboard/tag_key_explorer.rb +15 -4
- data/app/views/llm_cost_tracker/shared/_tag_chips.html.erb +1 -1
- data/app/views/llm_cost_tracker/tags/show.html.erb +4 -0
- data/docs/architecture.md +28 -0
- data/docs/budgets.md +45 -0
- data/docs/configuration.md +65 -0
- data/docs/cookbook.md +185 -0
- data/docs/dashboard-overview.png +0 -0
- data/docs/dashboard.md +38 -0
- data/docs/extending.md +32 -0
- data/docs/operations.md +44 -0
- data/docs/pricing.md +94 -0
- data/docs/querying.md +36 -0
- data/docs/streaming.md +70 -0
- data/docs/technical/README.md +10 -0
- data/docs/technical/data-flow.md +67 -0
- data/docs/technical/extension-points.md +111 -0
- data/docs/technical/module-map.md +197 -0
- data/docs/technical/operational-notes.md +77 -0
- data/docs/upgrading.md +46 -0
- data/lib/llm_cost_tracker/capture_verifier.rb +71 -0
- data/lib/llm_cost_tracker/configuration/instrumentation.rb +1 -1
- data/lib/llm_cost_tracker/configuration/storage_backend.rb +26 -0
- data/lib/llm_cost_tracker/configuration.rb +24 -17
- data/lib/llm_cost_tracker/doctor/capture_check.rb +39 -0
- data/lib/llm_cost_tracker/doctor.rb +6 -1
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +1 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +7 -1
- data/lib/llm_cost_tracker/integrations/anthropic.rb +51 -3
- data/lib/llm_cost_tracker/integrations/base.rb +77 -6
- data/lib/llm_cost_tracker/integrations/object_reader.rb +1 -1
- data/lib/llm_cost_tracker/integrations/openai.rb +78 -5
- data/lib/llm_cost_tracker/integrations/registry.rb +36 -4
- data/lib/llm_cost_tracker/integrations/ruby_llm.rb +171 -0
- data/lib/llm_cost_tracker/integrations/stream_tracker.rb +166 -0
- data/lib/llm_cost_tracker/llm_api_call.rb +2 -77
- data/lib/llm_cost_tracker/llm_api_call_metrics.rb +63 -0
- data/lib/llm_cost_tracker/middleware/faraday.rb +8 -4
- data/lib/llm_cost_tracker/parsers/gemini.rb +8 -1
- data/lib/llm_cost_tracker/parsers/openai_usage.rb +12 -3
- data/lib/llm_cost_tracker/price_registry.rb +3 -0
- data/lib/llm_cost_tracker/price_sync/fetcher.rb +41 -12
- data/lib/llm_cost_tracker/price_sync/registry_loader.rb +6 -0
- data/lib/llm_cost_tracker/pricing/effective_prices.rb +75 -0
- data/lib/llm_cost_tracker/pricing/explainer.rb +77 -0
- data/lib/llm_cost_tracker/pricing/lookup.rb +110 -0
- data/lib/llm_cost_tracker/pricing.rb +25 -108
- data/lib/llm_cost_tracker/report.rb +8 -1
- data/lib/llm_cost_tracker/report_data.rb +25 -9
- data/lib/llm_cost_tracker/retention.rb +33 -16
- data/lib/llm_cost_tracker/storage/active_record_backend.rb +115 -0
- data/lib/llm_cost_tracker/storage/active_record_rollups.rb +42 -0
- data/lib/llm_cost_tracker/storage/active_record_store.rb +26 -0
- data/lib/llm_cost_tracker/storage/custom_backend.rb +32 -0
- data/lib/llm_cost_tracker/storage/dispatcher.rb +11 -34
- data/lib/llm_cost_tracker/storage/log_backend.rb +38 -0
- data/lib/llm_cost_tracker/storage/registry.rb +63 -0
- data/lib/llm_cost_tracker/stream_capture.rb +7 -0
- data/lib/llm_cost_tracker/stream_collector.rb +25 -1
- data/lib/llm_cost_tracker/tag_sanitizer.rb +81 -0
- data/lib/llm_cost_tracker/tag_sql.rb +34 -0
- data/lib/llm_cost_tracker/tracker.rb +6 -2
- data/lib/llm_cost_tracker/version.rb +1 -1
- data/lib/llm_cost_tracker.rb +4 -0
- data/lib/tasks/llm_cost_tracker.rake +49 -0
- metadata +40 -6
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "monitor"
|
|
4
|
+
|
|
5
|
+
require_relative "../logging"
|
|
6
|
+
require_relative "../stream_collector"
|
|
7
|
+
require_relative "../value_helpers"
|
|
8
|
+
require_relative "object_reader"
|
|
9
|
+
|
|
10
|
+
module LlmCostTracker
|
|
11
|
+
module Integrations
|
|
12
|
+
class StreamTracker
|
|
13
|
+
def self.wrap(stream, collector:, active:, finish: nil) = new(stream, collector, active, finish).wrap
|
|
14
|
+
|
|
15
|
+
def initialize(stream, collector, active, finish)
|
|
16
|
+
@stream = stream
|
|
17
|
+
@collector = collector
|
|
18
|
+
@active = active
|
|
19
|
+
@finish = finish || proc { |errored:| @collector.finish!(errored: errored) }
|
|
20
|
+
@finished = false
|
|
21
|
+
@capture_failed = false
|
|
22
|
+
@monitor = Monitor.new
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def wrap
|
|
26
|
+
return @stream unless @stream
|
|
27
|
+
|
|
28
|
+
iterator_wrapped = @stream.instance_variable_defined?(:@iterator) && wrap_iterator?
|
|
29
|
+
wrap_each if !iterator_wrapped && @stream.respond_to?(:each)
|
|
30
|
+
|
|
31
|
+
@stream
|
|
32
|
+
rescue StandardError => e
|
|
33
|
+
Logging.warn("stream integration failed to install wrapper: #{e.class}: #{e.message}")
|
|
34
|
+
@stream
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
private
|
|
38
|
+
|
|
39
|
+
def wrap_iterator?
|
|
40
|
+
iterator = @stream.instance_variable_get(:@iterator)
|
|
41
|
+
return false unless iterator.respond_to?(:each)
|
|
42
|
+
|
|
43
|
+
@stream.instance_variable_set(:@iterator, tracked_iterator(iterator))
|
|
44
|
+
true
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def wrap_each
|
|
48
|
+
tracker = self
|
|
49
|
+
original_each = @stream.method(:each)
|
|
50
|
+
@stream.define_singleton_method(:each) do |&block|
|
|
51
|
+
next enum_for(:each) unless block
|
|
52
|
+
|
|
53
|
+
tracker.__send__(:each_from, original_each, &block)
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def tracked_iterator(iterator)
|
|
58
|
+
Enumerator.new do |yielder|
|
|
59
|
+
each_from(iterator) { |event| yielder << event }
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def each_from(iterable)
|
|
64
|
+
errored = false
|
|
65
|
+
iterate(iterable) do |event|
|
|
66
|
+
capture(event)
|
|
67
|
+
yield event
|
|
68
|
+
end
|
|
69
|
+
rescue StandardError
|
|
70
|
+
errored = true
|
|
71
|
+
raise
|
|
72
|
+
ensure
|
|
73
|
+
finish!(errored: errored)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def iterate(iterable, &)
|
|
77
|
+
if iterable.respond_to?(:each)
|
|
78
|
+
iterable.each(&)
|
|
79
|
+
else
|
|
80
|
+
iterable.call(&)
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def capture(event)
|
|
85
|
+
payload = normalize(event_payload(event))
|
|
86
|
+
@collector.event(payload, type: event_type(event, payload))
|
|
87
|
+
rescue StandardError => e
|
|
88
|
+
warn_capture_failure(e)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def event_payload(event)
|
|
92
|
+
if event.respond_to?(:deep_to_h)
|
|
93
|
+
event.deep_to_h
|
|
94
|
+
elsif event.respond_to?(:to_h)
|
|
95
|
+
event.to_h
|
|
96
|
+
else
|
|
97
|
+
event_attributes(event)
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def event_attributes(event)
|
|
102
|
+
%i[type id model usage response message].each_with_object({}) do |key, attributes|
|
|
103
|
+
value = ObjectReader.read(event, key)
|
|
104
|
+
attributes[key] = value unless value.nil?
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def event_type(event, payload)
|
|
109
|
+
value = ObjectReader.first(event, :type) || payload["type"]
|
|
110
|
+
value&.to_s
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def normalize(value)
|
|
114
|
+
case value
|
|
115
|
+
when Hash
|
|
116
|
+
value.each_with_object({}) do |(key, nested), normalized|
|
|
117
|
+
normalized[key.to_s] = normalize(nested)
|
|
118
|
+
end
|
|
119
|
+
when Array
|
|
120
|
+
value.map { |nested| normalize(nested) }
|
|
121
|
+
when Symbol
|
|
122
|
+
value.to_s
|
|
123
|
+
when NilClass
|
|
124
|
+
nil
|
|
125
|
+
else
|
|
126
|
+
converted = object_hash(value)
|
|
127
|
+
converted ? normalize(converted) : ValueHelpers.deep_dup(value)
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def object_hash(value)
|
|
132
|
+
if value.respond_to?(:deep_to_h)
|
|
133
|
+
value.deep_to_h
|
|
134
|
+
elsif value.respond_to?(:to_h)
|
|
135
|
+
value.to_h
|
|
136
|
+
end
|
|
137
|
+
rescue StandardError
|
|
138
|
+
nil
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def warn_capture_failure(error)
|
|
142
|
+
should_warn = @monitor.synchronize do
|
|
143
|
+
next false if @capture_failed
|
|
144
|
+
|
|
145
|
+
@capture_failed = true
|
|
146
|
+
true
|
|
147
|
+
end
|
|
148
|
+
return unless should_warn
|
|
149
|
+
|
|
150
|
+
Logging.warn("stream integration failed to capture event: #{error.class}: #{error.message}")
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def finish!(errored:)
|
|
154
|
+
should_finish = @monitor.synchronize do
|
|
155
|
+
next false if @finished
|
|
156
|
+
|
|
157
|
+
@finished = true
|
|
158
|
+
true
|
|
159
|
+
end
|
|
160
|
+
return unless should_finish && @active.call
|
|
161
|
+
|
|
162
|
+
@finish.call(errored: errored)
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
end
|
|
@@ -2,9 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
require "active_record"
|
|
4
4
|
|
|
5
|
+
require_relative "llm_api_call_metrics"
|
|
5
6
|
require_relative "period_grouping"
|
|
6
7
|
require_relative "tag_accessors"
|
|
7
|
-
require_relative "tag_key"
|
|
8
8
|
require_relative "tag_query"
|
|
9
9
|
require_relative "tags_column"
|
|
10
10
|
|
|
@@ -12,6 +12,7 @@ module LlmCostTracker
|
|
|
12
12
|
class LlmApiCall < ActiveRecord::Base
|
|
13
13
|
extend PeriodGrouping
|
|
14
14
|
extend TagsColumn
|
|
15
|
+
extend LlmApiCallMetrics
|
|
15
16
|
include TagAccessors
|
|
16
17
|
|
|
17
18
|
self.table_name = "llm_api_calls"
|
|
@@ -55,81 +56,5 @@ module LlmCostTracker
|
|
|
55
56
|
def self.by_tags(tags)
|
|
56
57
|
TagQuery.apply(self, tags)
|
|
57
58
|
end
|
|
58
|
-
|
|
59
|
-
def self.total_cost
|
|
60
|
-
sum(:total_cost).to_f
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
def self.total_tokens
|
|
64
|
-
sum(:total_tokens).to_i
|
|
65
|
-
end
|
|
66
|
-
|
|
67
|
-
def self.cost_by_model
|
|
68
|
-
group(:model).sum(:total_cost)
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
def self.cost_by_provider
|
|
72
|
-
group(:provider).sum(:total_cost)
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
def self.group_by_tag(key)
|
|
76
|
-
group(Arel.sql(tag_group_expression(key)))
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
def self.cost_by_tag(key)
|
|
80
|
-
costs = group_by_tag(key).sum(:total_cost).each_with_object(Hash.new(0.0)) do |(tag_value, cost), grouped|
|
|
81
|
-
grouped[tag_label(tag_value)] += cost.to_f
|
|
82
|
-
end
|
|
83
|
-
costs.sort_by { |_label, cost| -cost }.to_h
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
def self.average_latency_ms
|
|
87
|
-
return nil unless latency_column?
|
|
88
|
-
|
|
89
|
-
average(:latency_ms)&.to_f
|
|
90
|
-
end
|
|
91
|
-
|
|
92
|
-
def self.latency_by_model
|
|
93
|
-
return {} unless latency_column?
|
|
94
|
-
|
|
95
|
-
group(:model).average(:latency_ms).transform_values(&:to_f)
|
|
96
|
-
end
|
|
97
|
-
|
|
98
|
-
def self.latency_by_provider
|
|
99
|
-
return {} unless latency_column?
|
|
100
|
-
|
|
101
|
-
group(:provider).average(:latency_ms).transform_values(&:to_f)
|
|
102
|
-
end
|
|
103
|
-
|
|
104
|
-
def self.tag_label(value)
|
|
105
|
-
value.nil? || value == "" ? "(untagged)" : value.to_s
|
|
106
|
-
end
|
|
107
|
-
private_class_method :tag_label
|
|
108
|
-
|
|
109
|
-
def self.tag_group_expression(key)
|
|
110
|
-
key = validated_tag_key(key)
|
|
111
|
-
column = "#{quoted_table_name}.#{connection.quote_column_name('tags')}"
|
|
112
|
-
|
|
113
|
-
case connection.adapter_name
|
|
114
|
-
when /postgres/i
|
|
115
|
-
json_column = tags_jsonb_column? ? column : "(#{column})::jsonb"
|
|
116
|
-
"#{json_column}->>#{connection.quote(key)}"
|
|
117
|
-
when /mysql/i
|
|
118
|
-
"JSON_UNQUOTE(JSON_EXTRACT(#{column}, #{connection.quote(json_path(key))}))"
|
|
119
|
-
else
|
|
120
|
-
"json_extract(#{column}, #{connection.quote(json_path(key))})"
|
|
121
|
-
end
|
|
122
|
-
end
|
|
123
|
-
private_class_method :tag_group_expression
|
|
124
|
-
|
|
125
|
-
def self.validated_tag_key(key)
|
|
126
|
-
TagKey.validate!(key)
|
|
127
|
-
end
|
|
128
|
-
private_class_method :validated_tag_key
|
|
129
|
-
|
|
130
|
-
def self.json_path(key)
|
|
131
|
-
"$.\"#{key}\""
|
|
132
|
-
end
|
|
133
|
-
private_class_method :json_path
|
|
134
59
|
end
|
|
135
60
|
end
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "tag_sql"
|
|
4
|
+
|
|
5
|
+
module LlmCostTracker
|
|
6
|
+
module LlmApiCallMetrics
|
|
7
|
+
def total_cost
|
|
8
|
+
sum(:total_cost).to_f
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def total_tokens
|
|
12
|
+
sum(:total_tokens).to_i
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def cost_by_model
|
|
16
|
+
group(:model).sum(:total_cost)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def cost_by_provider
|
|
20
|
+
group(:provider).sum(:total_cost)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def group_by_tag(key)
|
|
24
|
+
group(Arel.sql(tag_value_expression(key)))
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def cost_by_tag(key, limit: nil)
|
|
28
|
+
relation = group_by_tag(key).order(Arel.sql("COALESCE(SUM(total_cost), 0) DESC"))
|
|
29
|
+
relation = relation.limit(limit) if limit
|
|
30
|
+
|
|
31
|
+
costs = relation.sum(:total_cost).each_with_object(Hash.new(0.0)) do |(tag_value, cost), grouped|
|
|
32
|
+
grouped[tag_value_label(tag_value)] += cost.to_f
|
|
33
|
+
end
|
|
34
|
+
costs.sort_by { |_label, cost| -cost }.to_h
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def average_latency_ms
|
|
38
|
+
return nil unless latency_column?
|
|
39
|
+
|
|
40
|
+
average(:latency_ms)&.to_f
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def latency_by_model
|
|
44
|
+
return {} unless latency_column?
|
|
45
|
+
|
|
46
|
+
group(:model).average(:latency_ms).transform_values(&:to_f)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def latency_by_provider
|
|
50
|
+
return {} unless latency_column?
|
|
51
|
+
|
|
52
|
+
group(:provider).average(:latency_ms).transform_values(&:to_f)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def tag_value_label(value)
|
|
56
|
+
TagSql.value_label(value)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def tag_value_expression(key, table_name: quoted_table_name)
|
|
60
|
+
TagSql.value_expression(self, key, table_name: table_name)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
@@ -5,12 +5,11 @@ require "json"
|
|
|
5
5
|
|
|
6
6
|
require_relative "../logging"
|
|
7
7
|
require_relative "../request_url"
|
|
8
|
+
require_relative "../stream_capture"
|
|
8
9
|
|
|
9
10
|
module LlmCostTracker
|
|
10
11
|
module Middleware
|
|
11
12
|
class Faraday < ::Faraday::Middleware
|
|
12
|
-
STREAM_CAPTURE_LIMIT_BYTES = 1_048_576
|
|
13
|
-
|
|
14
13
|
def initialize(app, **options)
|
|
15
14
|
super(app)
|
|
16
15
|
@tags = options.fetch(:tags, {})
|
|
@@ -88,6 +87,11 @@ module LlmCostTracker
|
|
|
88
87
|
end
|
|
89
88
|
|
|
90
89
|
def parse_stream(parser, request_url, request_body, response_env, stream_buffer)
|
|
90
|
+
if stream_buffer&.dig(:overflowed)
|
|
91
|
+
Logging.warn(capture_warning(request_url, stream_buffer))
|
|
92
|
+
return parser.parse_stream(request_url, request_body, response_env.status, [])
|
|
93
|
+
end
|
|
94
|
+
|
|
91
95
|
body = stream_buffer&.dig(:buffer)&.string
|
|
92
96
|
body = read_body(response_env.body) if body.nil? || body.empty?
|
|
93
97
|
|
|
@@ -110,7 +114,7 @@ module LlmCostTracker
|
|
|
110
114
|
request_env.request.on_data = proc do |chunk, size, env|
|
|
111
115
|
chunk = chunk.to_s
|
|
112
116
|
unless state[:overflowed]
|
|
113
|
-
if state[:bytes] + chunk.bytesize <=
|
|
117
|
+
if state[:bytes] + chunk.bytesize <= StreamCapture::LIMIT_BYTES
|
|
114
118
|
state[:buffer] << chunk
|
|
115
119
|
state[:bytes] += chunk.bytesize
|
|
116
120
|
else
|
|
@@ -161,7 +165,7 @@ module LlmCostTracker
|
|
|
161
165
|
"recording usage_source=unknown. Use LlmCostTracker.track_stream for manual capture."
|
|
162
166
|
end
|
|
163
167
|
|
|
164
|
-
"Streaming response for #{RequestUrl.label(request_url)} exceeded #{
|
|
168
|
+
"Streaming response for #{RequestUrl.label(request_url)} exceeded #{StreamCapture::LIMIT_BYTES} bytes; " \
|
|
165
169
|
"recording usage_source=unknown. Use LlmCostTracker.track_stream for manual capture."
|
|
166
170
|
end
|
|
167
171
|
end
|
|
@@ -72,7 +72,7 @@ module LlmCostTracker
|
|
|
72
72
|
model: extract_model_from_url(request_url),
|
|
73
73
|
input_tokens: [usage["promptTokenCount"].to_i - cache_read, 0].max,
|
|
74
74
|
output_tokens: output_tokens(usage),
|
|
75
|
-
total_tokens: usage
|
|
75
|
+
total_tokens: total_tokens(usage, cache_read),
|
|
76
76
|
cache_read_input_tokens: usage["cachedContentTokenCount"],
|
|
77
77
|
hidden_output_tokens: usage["thoughtsTokenCount"],
|
|
78
78
|
stream: stream,
|
|
@@ -92,6 +92,13 @@ module LlmCostTracker
|
|
|
92
92
|
usage["candidatesTokenCount"].to_i + usage["thoughtsTokenCount"].to_i
|
|
93
93
|
end
|
|
94
94
|
|
|
95
|
+
def total_tokens(usage, cache_read)
|
|
96
|
+
total = usage["totalTokenCount"]
|
|
97
|
+
return total.to_i unless total.nil?
|
|
98
|
+
|
|
99
|
+
[usage["promptTokenCount"].to_i - cache_read, 0].max + cache_read + output_tokens(usage)
|
|
100
|
+
end
|
|
101
|
+
|
|
95
102
|
def stream_response_id(events)
|
|
96
103
|
find_event_value(events) { |data| data["responseId"] }
|
|
97
104
|
end
|
|
@@ -21,7 +21,7 @@ module LlmCostTracker
|
|
|
21
21
|
model: response["model"] || request["model"],
|
|
22
22
|
input_tokens: regular_input_tokens(usage, cache_read),
|
|
23
23
|
output_tokens: (usage["completion_tokens"] || usage["output_tokens"]).to_i,
|
|
24
|
-
total_tokens: usage
|
|
24
|
+
total_tokens: total_tokens(usage, cache_read),
|
|
25
25
|
cache_read_input_tokens: cache_read,
|
|
26
26
|
hidden_output_tokens: hidden_output_tokens(usage),
|
|
27
27
|
usage_source: :response
|
|
@@ -44,7 +44,7 @@ module LlmCostTracker
|
|
|
44
44
|
model: model,
|
|
45
45
|
input_tokens: regular_input_tokens(usage, cache_read),
|
|
46
46
|
output_tokens: (usage["completion_tokens"] || usage["output_tokens"]).to_i,
|
|
47
|
-
total_tokens: usage
|
|
47
|
+
total_tokens: total_tokens(usage, cache_read),
|
|
48
48
|
cache_read_input_tokens: cache_read,
|
|
49
49
|
hidden_output_tokens: hidden_output_tokens(usage),
|
|
50
50
|
stream: true,
|
|
@@ -61,7 +61,7 @@ module LlmCostTracker
|
|
|
61
61
|
|
|
62
62
|
def detect_stream_usage(events)
|
|
63
63
|
find_event_value(events, reverse: true) do |data|
|
|
64
|
-
usage = data["usage"]
|
|
64
|
+
usage = data["usage"] || data.dig("response", "usage")
|
|
65
65
|
usage if usage.is_a?(Hash)
|
|
66
66
|
end
|
|
67
67
|
end
|
|
@@ -87,6 +87,15 @@ module LlmCostTracker
|
|
|
87
87
|
details = usage["completion_tokens_details"] || usage["output_tokens_details"] || {}
|
|
88
88
|
details["reasoning_tokens"]
|
|
89
89
|
end
|
|
90
|
+
|
|
91
|
+
def total_tokens(usage, cache_read)
|
|
92
|
+
total = usage["total_tokens"]
|
|
93
|
+
return total.to_i unless total.nil?
|
|
94
|
+
|
|
95
|
+
regular_input_tokens(usage, cache_read) +
|
|
96
|
+
cache_read.to_i +
|
|
97
|
+
(usage["completion_tokens"] || usage["output_tokens"]).to_i
|
|
98
|
+
end
|
|
90
99
|
end
|
|
91
100
|
end
|
|
92
101
|
end
|
|
@@ -12,6 +12,7 @@ module LlmCostTracker
|
|
|
12
12
|
EMPTY_PRICES = {}.freeze
|
|
13
13
|
PRICE_KEYS = %w[input output cache_read_input cache_write_input].freeze
|
|
14
14
|
METADATA_KEYS = %w[_source _source_version _fetched_at _updated _notes _validator_override].freeze
|
|
15
|
+
MAX_FILE_BYTES = 2_097_152
|
|
15
16
|
MUTEX = Monitor.new
|
|
16
17
|
|
|
17
18
|
class << self
|
|
@@ -114,6 +115,8 @@ module LlmCostTracker
|
|
|
114
115
|
end
|
|
115
116
|
|
|
116
117
|
def load_price_file(path)
|
|
118
|
+
raise ArgumentError, "prices_file exceeds #{MAX_FILE_BYTES} bytes" if File.size(path) > MAX_FILE_BYTES
|
|
119
|
+
|
|
117
120
|
contents = File.read(path)
|
|
118
121
|
return YAML.safe_load(contents, aliases: false) || {} if yaml_file?(path)
|
|
119
122
|
|
|
@@ -17,6 +17,7 @@ module LlmCostTracker
|
|
|
17
17
|
|
|
18
18
|
USER_AGENT = "llm_cost_tracker price refresh"
|
|
19
19
|
MAX_REDIRECTS = 5
|
|
20
|
+
MAX_BODY_BYTES = 2_097_152
|
|
20
21
|
OPEN_TIMEOUT = 5
|
|
21
22
|
READ_TIMEOUT = 10
|
|
22
23
|
WRITE_TIMEOUT = 10
|
|
@@ -25,26 +26,17 @@ module LlmCostTracker
|
|
|
25
26
|
raise Error, "Too many redirects while fetching #{url}" if redirects > MAX_REDIRECTS
|
|
26
27
|
|
|
27
28
|
uri = URI.parse(url)
|
|
28
|
-
raise Error, "Pricing snapshot URL must use
|
|
29
|
+
raise Error, "Pricing snapshot URL must use https" unless uri.scheme == "https"
|
|
29
30
|
|
|
30
31
|
request = Net::HTTP::Get.new(uri)
|
|
31
32
|
request["User-Agent"] = USER_AGENT
|
|
32
33
|
request["If-None-Match"] = etag if etag
|
|
33
34
|
|
|
34
|
-
response =
|
|
35
|
-
uri.host,
|
|
36
|
-
uri.port,
|
|
37
|
-
use_ssl: uri.scheme == "https",
|
|
38
|
-
open_timeout: OPEN_TIMEOUT,
|
|
39
|
-
read_timeout: READ_TIMEOUT,
|
|
40
|
-
write_timeout: WRITE_TIMEOUT
|
|
41
|
-
) do |http|
|
|
42
|
-
http.request(request)
|
|
43
|
-
end
|
|
35
|
+
response, body = fetch_response(uri, request)
|
|
44
36
|
|
|
45
37
|
case response
|
|
46
38
|
when Net::HTTPSuccess
|
|
47
|
-
build_response(response, not_modified: false)
|
|
39
|
+
build_response(response, body: body || limited_body(response), not_modified: false)
|
|
48
40
|
when Net::HTTPNotModified
|
|
49
41
|
build_response(response, body: nil, not_modified: true)
|
|
50
42
|
when Net::HTTPRedirection
|
|
@@ -61,6 +53,43 @@ module LlmCostTracker
|
|
|
61
53
|
|
|
62
54
|
private
|
|
63
55
|
|
|
56
|
+
def fetch_response(uri, request)
|
|
57
|
+
body = nil
|
|
58
|
+
response = Net::HTTP.start(
|
|
59
|
+
uri.host,
|
|
60
|
+
uri.port,
|
|
61
|
+
use_ssl: uri.scheme == "https",
|
|
62
|
+
open_timeout: OPEN_TIMEOUT,
|
|
63
|
+
read_timeout: READ_TIMEOUT,
|
|
64
|
+
write_timeout: WRITE_TIMEOUT
|
|
65
|
+
) do |http|
|
|
66
|
+
http.request(request) do |streamed_response|
|
|
67
|
+
body = limited_body(streamed_response) if streamed_response.is_a?(Net::HTTPSuccess)
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
[response, body]
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def limited_body(response)
|
|
75
|
+
body = +""
|
|
76
|
+
if response.respond_to?(:read_body)
|
|
77
|
+
response.read_body do |chunk|
|
|
78
|
+
chunk = chunk.to_s
|
|
79
|
+
if body.bytesize + chunk.bytesize > MAX_BODY_BYTES
|
|
80
|
+
raise Error, "Pricing snapshot response exceeds #{MAX_BODY_BYTES} bytes"
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
body << chunk
|
|
84
|
+
end
|
|
85
|
+
else
|
|
86
|
+
body = response.body.to_s
|
|
87
|
+
end
|
|
88
|
+
raise Error, "Pricing snapshot response exceeds #{MAX_BODY_BYTES} bytes" if body.bytesize > MAX_BODY_BYTES
|
|
89
|
+
|
|
90
|
+
body
|
|
91
|
+
end
|
|
92
|
+
|
|
64
93
|
def build_response(response, not_modified:, body: response.body)
|
|
65
94
|
Response.new(
|
|
66
95
|
body: body,
|
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
require "json"
|
|
4
4
|
require "yaml"
|
|
5
5
|
|
|
6
|
+
require_relative "../price_registry"
|
|
7
|
+
|
|
6
8
|
module LlmCostTracker
|
|
7
9
|
module PriceSync
|
|
8
10
|
class RegistryLoader
|
|
@@ -18,6 +20,10 @@ module LlmCostTracker
|
|
|
18
20
|
private
|
|
19
21
|
|
|
20
22
|
def load_registry_file(path)
|
|
23
|
+
if File.size(path) > PriceRegistry::MAX_FILE_BYTES
|
|
24
|
+
raise ArgumentError, "pricing registry exceeds #{PriceRegistry::MAX_FILE_BYTES} bytes"
|
|
25
|
+
end
|
|
26
|
+
|
|
21
27
|
contents = File.read(path)
|
|
22
28
|
registry = yaml_file?(path) ? (YAML.safe_load(contents, aliases: false) || {}) : JSON.parse(contents)
|
|
23
29
|
raise ArgumentError, "pricing registry must be a hash" unless registry.is_a?(Hash)
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmCostTracker
|
|
4
|
+
module Pricing
|
|
5
|
+
EffectivePriceSet = Data.define(:input, :cache_read_input, :cache_write_input, :output) do
|
|
6
|
+
def to_h
|
|
7
|
+
{
|
|
8
|
+
input: input,
|
|
9
|
+
cache_read_input: cache_read_input,
|
|
10
|
+
cache_write_input: cache_write_input,
|
|
11
|
+
output: output
|
|
12
|
+
}
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def complete?
|
|
16
|
+
missing_keys.empty?
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def missing_keys
|
|
20
|
+
to_h.filter_map { |key, value| key if value.nil? }
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
module EffectivePrices
|
|
25
|
+
class << self
|
|
26
|
+
def call(usage:, prices:, pricing_mode:)
|
|
27
|
+
EffectivePriceSet.new(
|
|
28
|
+
input: price_for_usage(usage.input_tokens, prices, :input, pricing_mode),
|
|
29
|
+
cache_read_input: price_for_cache_usage(
|
|
30
|
+
usage.cache_read_input_tokens,
|
|
31
|
+
prices,
|
|
32
|
+
:cache_read_input,
|
|
33
|
+
pricing_mode
|
|
34
|
+
),
|
|
35
|
+
cache_write_input: price_for_cache_usage(
|
|
36
|
+
usage.cache_write_input_tokens,
|
|
37
|
+
prices,
|
|
38
|
+
:cache_write_input,
|
|
39
|
+
pricing_mode
|
|
40
|
+
),
|
|
41
|
+
output: price_for_usage(usage.output_tokens, prices, :output, pricing_mode)
|
|
42
|
+
)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
private
|
|
46
|
+
|
|
47
|
+
def price_for_cache_usage(tokens, prices, key, pricing_mode)
|
|
48
|
+
return 0.0 unless tokens.positive?
|
|
49
|
+
|
|
50
|
+
price_for(prices, key, pricing_mode) || price_for(prices, :input, pricing_mode)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def price_for_usage(tokens, prices, key, pricing_mode)
|
|
54
|
+
tokens.positive? ? price_for(prices, key, pricing_mode) : 0.0
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def price_for(prices, key, pricing_mode)
|
|
58
|
+
mode = normalized_pricing_mode(pricing_mode)
|
|
59
|
+
return prices[key] unless mode
|
|
60
|
+
|
|
61
|
+
prices[:"#{mode}_#{key}"] || prices[key]
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def normalized_pricing_mode(value)
|
|
65
|
+
return nil if value.nil?
|
|
66
|
+
|
|
67
|
+
mode = value.to_s.strip
|
|
68
|
+
return nil if mode.empty? || mode == "standard"
|
|
69
|
+
|
|
70
|
+
mode
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|