llm_cost_tracker 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +20 -0
- data/README.md +111 -68
- data/Rakefile +2 -0
- data/app/controllers/llm_cost_tracker/assets_controller.rb +1 -2
- data/app/helpers/llm_cost_tracker/dashboard_filter_helper.rb +6 -1
- data/app/services/llm_cost_tracker/dashboard/data_quality.rb +16 -1
- data/app/services/llm_cost_tracker/dashboard/filter.rb +22 -0
- data/app/views/llm_cost_tracker/calls/index.html.erb +10 -0
- data/app/views/llm_cost_tracker/dashboard/index.html.erb +10 -0
- data/app/views/llm_cost_tracker/data_quality/index.html.erb +46 -0
- data/lib/llm_cost_tracker/assets.rb +6 -11
- data/lib/llm_cost_tracker/configuration.rb +78 -42
- data/lib/llm_cost_tracker/event.rb +2 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_streaming_generator.rb +29 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_streaming_to_llm_api_calls.rb.erb +25 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +4 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/llm_cost_tracker_prices.yml.erb +8 -1
- data/lib/llm_cost_tracker/llm_api_call.rb +8 -0
- data/lib/llm_cost_tracker/middleware/faraday.rb +57 -9
- data/lib/llm_cost_tracker/parsed_usage.rb +7 -3
- data/lib/llm_cost_tracker/parsers/anthropic.rb +79 -1
- data/lib/llm_cost_tracker/parsers/base.rb +17 -5
- data/lib/llm_cost_tracker/parsers/gemini.rb +59 -6
- data/lib/llm_cost_tracker/parsers/openai.rb +8 -0
- data/lib/llm_cost_tracker/parsers/openai_compatible.rb +8 -0
- data/lib/llm_cost_tracker/parsers/openai_usage.rb +55 -1
- data/lib/llm_cost_tracker/parsers/registry.rb +15 -3
- data/lib/llm_cost_tracker/parsers/sse.rb +81 -0
- data/lib/llm_cost_tracker/price_registry.rb +1 -1
- data/lib/llm_cost_tracker/price_sync/fetcher.rb +72 -0
- data/lib/llm_cost_tracker/price_sync/merger.rb +72 -0
- data/lib/llm_cost_tracker/price_sync/model_catalog.rb +77 -0
- data/lib/llm_cost_tracker/price_sync/raw_price.rb +35 -0
- data/lib/llm_cost_tracker/price_sync/source.rb +29 -0
- data/lib/llm_cost_tracker/price_sync/source_result.rb +7 -0
- data/lib/llm_cost_tracker/price_sync/sources/litellm.rb +91 -0
- data/lib/llm_cost_tracker/price_sync/sources/open_router.rb +94 -0
- data/lib/llm_cost_tracker/price_sync/validator.rb +66 -0
- data/lib/llm_cost_tracker/price_sync.rb +310 -0
- data/lib/llm_cost_tracker/storage/active_record_store.rb +3 -1
- data/lib/llm_cost_tracker/stream_collector.rb +158 -0
- data/lib/llm_cost_tracker/tags_column.rb +8 -0
- data/lib/llm_cost_tracker/tracker.rb +15 -12
- data/lib/llm_cost_tracker/value_helpers.rb +40 -0
- data/lib/llm_cost_tracker/version.rb +1 -1
- data/lib/llm_cost_tracker.rb +50 -29
- data/lib/tasks/llm_cost_tracker.rake +116 -0
- data/llm_cost_tracker.gemspec +8 -6
- metadata +24 -8
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmCostTracker
|
|
4
|
+
module PriceSync
|
|
5
|
+
class Validator
|
|
6
|
+
Result = Data.define(:accepted, :rejected, :flagged)
|
|
7
|
+
Issue = Data.define(:model, :reason, :old_price, :new_price)
|
|
8
|
+
|
|
9
|
+
MAX_INPUT_PER_MILLION = 100.0
|
|
10
|
+
MAX_OUTPUT_PER_MILLION = 500.0
|
|
11
|
+
MAX_RELATIVE_CHANGE = 3.0
|
|
12
|
+
|
|
13
|
+
def validate_batch(merged_prices, existing_registry:)
|
|
14
|
+
merged_prices.each_with_object(Result.new(accepted: {}, rejected: [], flagged: [])) do |(model, price), result|
|
|
15
|
+
old_price = normalize_entry(existing_registry[model])
|
|
16
|
+
status, reason = validate(new_price: price, old_price: old_price)
|
|
17
|
+
|
|
18
|
+
case status
|
|
19
|
+
when :rejected
|
|
20
|
+
result.rejected << Issue.new(model: model, reason: reason, old_price: old_price, new_price: price)
|
|
21
|
+
when :flagged
|
|
22
|
+
result.flagged << Issue.new(model: model, reason: reason, old_price: old_price, new_price: price)
|
|
23
|
+
result.accepted[model] = price
|
|
24
|
+
else
|
|
25
|
+
result.accepted[model] = price
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
private
|
|
31
|
+
|
|
32
|
+
def validate(new_price:, old_price:)
|
|
33
|
+
overrides = Array(normalize_entry(old_price)["_validator_override"])
|
|
34
|
+
|
|
35
|
+
return [:rejected, "input > $#{MAX_INPUT_PER_MILLION}/1M"] if new_price.input > MAX_INPUT_PER_MILLION
|
|
36
|
+
return [:rejected, "output > $#{MAX_OUTPUT_PER_MILLION}/1M"] if new_price.output > MAX_OUTPUT_PER_MILLION
|
|
37
|
+
return [:ok, nil] if overrides.include?("skip_relative_change")
|
|
38
|
+
|
|
39
|
+
if old_price.any? && changed_too_much?(old_price, new_price)
|
|
40
|
+
return [:flagged, "price changed >#{MAX_RELATIVE_CHANGE}x"]
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
[:ok, nil]
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def changed_too_much?(old_price, new_price)
|
|
47
|
+
%i[input output].any? do |field|
|
|
48
|
+
old_value = old_price[field.to_s].to_f
|
|
49
|
+
next false if old_value.zero?
|
|
50
|
+
|
|
51
|
+
new_value = new_price.public_send(field).to_f
|
|
52
|
+
next false if new_value.zero?
|
|
53
|
+
|
|
54
|
+
ratio = [new_value / old_value, old_value / new_value].max
|
|
55
|
+
ratio > MAX_RELATIVE_CHANGE
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def normalize_entry(entry)
|
|
60
|
+
(entry || {}).each_with_object({}) do |(key, value), normalized|
|
|
61
|
+
normalized[key.to_s] = value
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "date"
|
|
4
|
+
require "fileutils"
|
|
5
|
+
require "json"
|
|
6
|
+
require "yaml"
|
|
7
|
+
|
|
8
|
+
require_relative "price_sync/fetcher"
|
|
9
|
+
require_relative "price_sync/raw_price"
|
|
10
|
+
require_relative "price_sync/source"
|
|
11
|
+
require_relative "price_sync/source_result"
|
|
12
|
+
require_relative "price_sync/model_catalog"
|
|
13
|
+
require_relative "price_sync/merger"
|
|
14
|
+
require_relative "price_sync/validator"
|
|
15
|
+
require_relative "price_sync/sources/litellm"
|
|
16
|
+
require_relative "price_sync/sources/open_router"
|
|
17
|
+
|
|
18
|
+
module LlmCostTracker
|
|
19
|
+
# rubocop:disable Metrics/ModuleLength, Metrics/ClassLength
|
|
20
|
+
module PriceSync
|
|
21
|
+
DEFAULT_OUTPUT_PATH = PriceRegistry::DEFAULT_PRICES_PATH
|
|
22
|
+
YAML_EXTENSIONS = %w[.yml .yaml].freeze
|
|
23
|
+
|
|
24
|
+
SourceUsage = Data.define(:prices_count, :source_version)
|
|
25
|
+
SyncResult = Data.define(
|
|
26
|
+
:path,
|
|
27
|
+
:updated_models,
|
|
28
|
+
:changes,
|
|
29
|
+
:orphaned_models,
|
|
30
|
+
:failed_sources,
|
|
31
|
+
:discrepancies,
|
|
32
|
+
:rejected,
|
|
33
|
+
:flagged,
|
|
34
|
+
:sources_used,
|
|
35
|
+
:written
|
|
36
|
+
)
|
|
37
|
+
CheckResult = Data.define(
|
|
38
|
+
:path,
|
|
39
|
+
:changes,
|
|
40
|
+
:orphaned_models,
|
|
41
|
+
:failed_sources,
|
|
42
|
+
:discrepancies,
|
|
43
|
+
:rejected,
|
|
44
|
+
:flagged,
|
|
45
|
+
:sources_used,
|
|
46
|
+
:up_to_date
|
|
47
|
+
)
|
|
48
|
+
RefreshPlan = Data.define(
|
|
49
|
+
:path,
|
|
50
|
+
:registry,
|
|
51
|
+
:updated_registry,
|
|
52
|
+
:accepted,
|
|
53
|
+
:changes,
|
|
54
|
+
:orphaned_models,
|
|
55
|
+
:failed_sources,
|
|
56
|
+
:discrepancies,
|
|
57
|
+
:rejected,
|
|
58
|
+
:flagged,
|
|
59
|
+
:sources_used,
|
|
60
|
+
:source_results
|
|
61
|
+
) do
|
|
62
|
+
def refresh_succeeded?
|
|
63
|
+
source_results.any? { |_source, result| result.prices.any? }
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def up_to_date?
|
|
67
|
+
changes.empty? && failed_sources.empty? && rejected.empty?
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
class << self
|
|
72
|
+
def sync(path: DEFAULT_OUTPUT_PATH, seed_path: DEFAULT_OUTPUT_PATH, preview: false, strict: false,
|
|
73
|
+
fetcher: Fetcher.new, today: Date.today)
|
|
74
|
+
plan = build_refresh_plan(path: path, seed_path: seed_path, fetcher: fetcher, today: today)
|
|
75
|
+
raise Error, strict_failure_message(plan) if strict_sync_failure?(plan, strict: strict)
|
|
76
|
+
|
|
77
|
+
written = !preview && plan.refresh_succeeded?
|
|
78
|
+
write_registry(plan.path, plan.updated_registry) if written
|
|
79
|
+
|
|
80
|
+
SyncResult.new(
|
|
81
|
+
path: plan.path,
|
|
82
|
+
updated_models: plan.changes.keys.sort,
|
|
83
|
+
changes: plan.changes,
|
|
84
|
+
orphaned_models: plan.orphaned_models,
|
|
85
|
+
failed_sources: plan.failed_sources,
|
|
86
|
+
discrepancies: plan.discrepancies,
|
|
87
|
+
rejected: plan.rejected,
|
|
88
|
+
flagged: plan.flagged,
|
|
89
|
+
sources_used: plan.sources_used,
|
|
90
|
+
written: written
|
|
91
|
+
)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def check(path: DEFAULT_OUTPUT_PATH, seed_path: DEFAULT_OUTPUT_PATH, fetcher: Fetcher.new, today: Date.today)
|
|
95
|
+
plan = build_refresh_plan(path: path, seed_path: seed_path, fetcher: fetcher, today: today)
|
|
96
|
+
|
|
97
|
+
CheckResult.new(
|
|
98
|
+
path: plan.path,
|
|
99
|
+
changes: plan.changes,
|
|
100
|
+
orphaned_models: plan.orphaned_models,
|
|
101
|
+
failed_sources: plan.failed_sources,
|
|
102
|
+
discrepancies: plan.discrepancies,
|
|
103
|
+
rejected: plan.rejected,
|
|
104
|
+
flagged: plan.flagged,
|
|
105
|
+
sources_used: plan.sources_used,
|
|
106
|
+
up_to_date: plan.up_to_date?
|
|
107
|
+
)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
private
|
|
111
|
+
|
|
112
|
+
def sources
|
|
113
|
+
[Sources::Litellm.new, Sources::OpenRouter.new]
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def build_refresh_plan(path:, seed_path:, fetcher:, today:)
|
|
117
|
+
path = path.to_s
|
|
118
|
+
registry = load_registry(path, seed_path: seed_path)
|
|
119
|
+
current_models = registry.fetch("models", {})
|
|
120
|
+
source_results, failed_sources = fetch_all(current_models, fetcher)
|
|
121
|
+
merged, discrepancies = Merger.new.merge(source_results)
|
|
122
|
+
validated = Validator.new.validate_batch(merged, existing_registry: current_models)
|
|
123
|
+
updated_models = apply_changes(current_models, validated.accepted, today)
|
|
124
|
+
refresh_succeeded = source_results.any? { |_source, result| result.prices.any? }
|
|
125
|
+
|
|
126
|
+
RefreshPlan.new(
|
|
127
|
+
path: path,
|
|
128
|
+
registry: registry,
|
|
129
|
+
updated_registry: registry.merge(
|
|
130
|
+
"metadata" => updated_metadata(
|
|
131
|
+
registry["metadata"],
|
|
132
|
+
today,
|
|
133
|
+
refresh_succeeded: refresh_succeeded,
|
|
134
|
+
source_results: source_results
|
|
135
|
+
),
|
|
136
|
+
"models" => updated_models
|
|
137
|
+
),
|
|
138
|
+
accepted: validated.accepted,
|
|
139
|
+
changes: price_changes(current_models, updated_models),
|
|
140
|
+
orphaned_models: compute_orphaned(current_models, merged.keys),
|
|
141
|
+
failed_sources: failed_sources,
|
|
142
|
+
discrepancies: discrepancies,
|
|
143
|
+
rejected: validated.rejected,
|
|
144
|
+
flagged: validated.flagged,
|
|
145
|
+
sources_used: source_usage(source_results),
|
|
146
|
+
source_results: source_results
|
|
147
|
+
)
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def fetch_all(current_models, fetcher)
|
|
151
|
+
results = {}
|
|
152
|
+
failures = {}
|
|
153
|
+
|
|
154
|
+
sources.each do |source|
|
|
155
|
+
results[source.name.to_sym] = source.fetch(current_models: current_models, fetcher: fetcher)
|
|
156
|
+
rescue Error => e
|
|
157
|
+
failures[source.name.to_sym] = e.message
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
[results, failures]
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def apply_changes(current_models, accepted, today)
|
|
164
|
+
merged = seed_models(current_models)
|
|
165
|
+
|
|
166
|
+
accepted.each do |model, price|
|
|
167
|
+
next if manual_model?(merged[model])
|
|
168
|
+
|
|
169
|
+
merged[model] = registry_entry_for(merged[model], price, today)
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
merged.sort.to_h
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def compute_orphaned(current_models, merged_models)
|
|
176
|
+
seed_models(current_models).keys.reject do |model|
|
|
177
|
+
manual_model?(current_models[model]) || merged_models.include?(model)
|
|
178
|
+
end.sort
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def load_registry(path, seed_path:)
|
|
182
|
+
source_path = File.exist?(path) ? path : seed_path.to_s
|
|
183
|
+
normalize_registry(load_registry_file(source_path))
|
|
184
|
+
rescue Errno::ENOENT, JSON::ParserError, Psych::Exception, ArgumentError, TypeError, NoMethodError => e
|
|
185
|
+
raise Error, "Unable to load pricing registry #{source_path.inspect}: #{e.message}"
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def load_registry_file(path)
|
|
189
|
+
contents = File.read(path)
|
|
190
|
+
return YAML.safe_load(contents, aliases: false) || {} if yaml_file?(path)
|
|
191
|
+
|
|
192
|
+
JSON.parse(contents)
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def normalize_registry(registry)
|
|
196
|
+
{
|
|
197
|
+
"metadata" => normalize_hash(registry.fetch("metadata", {})),
|
|
198
|
+
"models" => normalize_models(registry.fetch("models", {}))
|
|
199
|
+
}
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
def normalize_models(models)
|
|
203
|
+
(models || {}).each_with_object({}) do |(model, entry), normalized|
|
|
204
|
+
normalized[model.to_s] = normalize_hash(entry)
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def normalize_hash(hash)
|
|
209
|
+
(hash || {}).each_with_object({}) do |(key, value), normalized|
|
|
210
|
+
normalized[key.to_s] = value
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
def seed_models(current_models)
|
|
215
|
+
normalize_models(current_models).transform_values do |entry|
|
|
216
|
+
next entry if entry.key?("_source")
|
|
217
|
+
|
|
218
|
+
entry.merge("_source" => "seed")
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def manual_model?(entry)
|
|
223
|
+
normalize_hash(entry)["_source"] == "manual"
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def registry_entry_for(existing_entry, price, today)
|
|
227
|
+
normalize_hash(existing_entry)
|
|
228
|
+
.except(*PriceRegistry::PRICE_KEYS)
|
|
229
|
+
.merge(price.to_registry_entry(today: today))
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
def updated_metadata(existing, today, refresh_succeeded:, source_results:)
|
|
233
|
+
metadata = normalize_hash(existing)
|
|
234
|
+
metadata["currency"] ||= "USD"
|
|
235
|
+
metadata["unit"] ||= "1M tokens"
|
|
236
|
+
return metadata unless refresh_succeeded
|
|
237
|
+
|
|
238
|
+
metadata["updated_at"] = today.iso8601
|
|
239
|
+
metadata["source_urls"] = source_urls(source_results)
|
|
240
|
+
metadata
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
def source_usage(source_results)
|
|
244
|
+
source_results.transform_values do |result|
|
|
245
|
+
SourceUsage.new(prices_count: result.prices.size, source_version: result.source_version)
|
|
246
|
+
end
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
def price_changes(current_models, updated_models)
|
|
250
|
+
current_models = normalize_models(current_models)
|
|
251
|
+
updated_models = normalize_models(updated_models)
|
|
252
|
+
|
|
253
|
+
(current_models.keys | updated_models.keys).sort.each_with_object({}) do |model, changes|
|
|
254
|
+
fields = price_field_changes(current_models[model], updated_models[model])
|
|
255
|
+
changes[model] = fields if fields.any?
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
def price_field_changes(current_entry, updated_entry)
|
|
260
|
+
current_price = comparable_price(current_entry)
|
|
261
|
+
updated_price = comparable_price(updated_entry)
|
|
262
|
+
|
|
263
|
+
(current_price.keys | updated_price.keys).sort.each_with_object({}) do |field, changes|
|
|
264
|
+
from = current_price[field]
|
|
265
|
+
to = updated_price[field]
|
|
266
|
+
next if from == to
|
|
267
|
+
|
|
268
|
+
changes[field] = { "from" => from, "to" => to }
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
def comparable_price(entry)
|
|
273
|
+
normalize_hash(entry).slice(*PriceRegistry::PRICE_KEYS)
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
def strict_sync_failure?(plan, strict:)
|
|
277
|
+
strict && (plan.failed_sources.any? || plan.rejected.any?)
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
def strict_failure_message(plan)
|
|
281
|
+
messages = []
|
|
282
|
+
if plan.failed_sources.any?
|
|
283
|
+
details = plan.failed_sources.map { |source, message| "#{source}: #{message}" }.join(", ")
|
|
284
|
+
messages << "source failures: #{details}"
|
|
285
|
+
end
|
|
286
|
+
if plan.rejected.any?
|
|
287
|
+
details = plan.rejected.map { |issue| "#{issue.model} (#{issue.reason})" }.join(", ")
|
|
288
|
+
messages << "validator rejections: #{details}"
|
|
289
|
+
end
|
|
290
|
+
"Price sync failed in strict mode: #{messages.join('; ')}"
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
def source_urls(source_results)
|
|
294
|
+
names = source_results.keys.map(&:to_sym)
|
|
295
|
+
sources.select { |source| names.include?(source.name.to_sym) }.map(&:url)
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
def write_registry(path, registry)
|
|
299
|
+
FileUtils.mkdir_p(File.dirname(path))
|
|
300
|
+
payload = yaml_file?(path) ? YAML.dump(registry) : "#{JSON.pretty_generate(registry)}\n"
|
|
301
|
+
File.write(path, payload)
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
def yaml_file?(path)
|
|
305
|
+
YAML_EXTENSIONS.include?(File.extname(path).downcase)
|
|
306
|
+
end
|
|
307
|
+
end
|
|
308
|
+
end
|
|
309
|
+
# rubocop:enable Metrics/ModuleLength, Metrics/ClassLength
|
|
310
|
+
end
|
|
@@ -19,7 +19,9 @@ module LlmCostTracker
|
|
|
19
19
|
tags: tags_for_storage(tags),
|
|
20
20
|
tracked_at: event.tracked_at
|
|
21
21
|
}
|
|
22
|
-
attributes[:latency_ms]
|
|
22
|
+
attributes[:latency_ms] = event.latency_ms if model_class.latency_column?
|
|
23
|
+
attributes[:stream] = event.stream if model_class.stream_column?
|
|
24
|
+
attributes[:usage_source] = event.usage_source if model_class.usage_source_column?
|
|
23
25
|
|
|
24
26
|
model_class.create!(attributes)
|
|
25
27
|
end
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "monitor"
|
|
4
|
+
|
|
5
|
+
require_relative "value_helpers"
|
|
6
|
+
|
|
7
|
+
module LlmCostTracker
|
|
8
|
+
class StreamCollector
|
|
9
|
+
attr_reader :provider
|
|
10
|
+
|
|
11
|
+
def initialize(provider:, model:, latency_ms: nil, metadata: {})
|
|
12
|
+
@provider = provider.to_s
|
|
13
|
+
@model = model
|
|
14
|
+
@latency_ms = latency_ms
|
|
15
|
+
@metadata = ValueHelpers.deep_dup(metadata || {})
|
|
16
|
+
@events = []
|
|
17
|
+
@explicit_usage = nil
|
|
18
|
+
@started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
19
|
+
@finished = false
|
|
20
|
+
@monitor = Monitor.new
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def model
|
|
24
|
+
@monitor.synchronize { @model }
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def metadata
|
|
28
|
+
@monitor.synchronize { ValueHelpers.deep_dup(@metadata) }
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def model=(value)
|
|
32
|
+
@monitor.synchronize do
|
|
33
|
+
ensure_open!
|
|
34
|
+
@model = value
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def event(data, type: nil)
|
|
39
|
+
@monitor.synchronize do
|
|
40
|
+
ensure_open!
|
|
41
|
+
@events << { event: type, data: ValueHelpers.deep_dup(data) } unless data.nil?
|
|
42
|
+
end
|
|
43
|
+
self
|
|
44
|
+
end
|
|
45
|
+
alias chunk event
|
|
46
|
+
|
|
47
|
+
def usage(input_tokens:, output_tokens:, **extra)
|
|
48
|
+
@monitor.synchronize do
|
|
49
|
+
ensure_open!
|
|
50
|
+
@explicit_usage = ValueHelpers.deep_dup(
|
|
51
|
+
extra.merge(
|
|
52
|
+
input_tokens: input_tokens.to_i,
|
|
53
|
+
output_tokens: output_tokens.to_i
|
|
54
|
+
)
|
|
55
|
+
)
|
|
56
|
+
end
|
|
57
|
+
self
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def finish!(errored: false)
|
|
61
|
+
snapshot = @monitor.synchronize do
|
|
62
|
+
return if @finished
|
|
63
|
+
|
|
64
|
+
@finished = true
|
|
65
|
+
{
|
|
66
|
+
events: ValueHelpers.deep_dup(@events),
|
|
67
|
+
explicit_usage: ValueHelpers.deep_dup(@explicit_usage),
|
|
68
|
+
model: @model,
|
|
69
|
+
latency_ms: @latency_ms,
|
|
70
|
+
metadata: ValueHelpers.deep_dup(@metadata)
|
|
71
|
+
}
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
parsed = build_parsed_usage(snapshot)
|
|
75
|
+
Tracker.record(
|
|
76
|
+
provider: parsed.provider,
|
|
77
|
+
model: parsed.model,
|
|
78
|
+
input_tokens: parsed.input_tokens,
|
|
79
|
+
output_tokens: parsed.output_tokens,
|
|
80
|
+
latency_ms: snapshot[:latency_ms] || elapsed_ms,
|
|
81
|
+
stream: true,
|
|
82
|
+
usage_source: parsed.usage_source,
|
|
83
|
+
metadata: error_metadata(errored).merge(snapshot[:metadata]).merge(parsed.metadata)
|
|
84
|
+
)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
private
|
|
88
|
+
|
|
89
|
+
def ensure_open!
|
|
90
|
+
return unless @finished
|
|
91
|
+
|
|
92
|
+
raise FrozenError, "can't modify finished LlmCostTracker::StreamCollector"
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def build_parsed_usage(snapshot)
|
|
96
|
+
return build_from_explicit_usage(snapshot) if snapshot[:explicit_usage]
|
|
97
|
+
|
|
98
|
+
parsed = Parsers::Registry.find_for_provider(@provider)&.parse_stream(nil, nil, 200, snapshot[:events])
|
|
99
|
+
return finalize(parsed, snapshot) if parsed
|
|
100
|
+
|
|
101
|
+
build_unknown_usage(snapshot)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def finalize(parsed, snapshot)
|
|
105
|
+
parsed.with(
|
|
106
|
+
provider: @provider,
|
|
107
|
+
model: present_model(parsed.model) || snapshot[:model]
|
|
108
|
+
)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def present_model(value)
|
|
112
|
+
return nil if value.nil?
|
|
113
|
+
|
|
114
|
+
string = value.to_s
|
|
115
|
+
return nil if string.empty? || string == "unknown"
|
|
116
|
+
|
|
117
|
+
string
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def build_from_explicit_usage(snapshot)
|
|
121
|
+
explicit = snapshot[:explicit_usage]
|
|
122
|
+
input = explicit[:input_tokens]
|
|
123
|
+
output = explicit[:output_tokens]
|
|
124
|
+
extras = explicit.except(:input_tokens, :output_tokens)
|
|
125
|
+
|
|
126
|
+
ParsedUsage.build(
|
|
127
|
+
provider: @provider,
|
|
128
|
+
model: snapshot[:model],
|
|
129
|
+
input_tokens: input,
|
|
130
|
+
output_tokens: output,
|
|
131
|
+
total_tokens: input + output,
|
|
132
|
+
stream: true,
|
|
133
|
+
usage_source: :manual,
|
|
134
|
+
**extras
|
|
135
|
+
)
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def build_unknown_usage(snapshot)
|
|
139
|
+
ParsedUsage.build(
|
|
140
|
+
provider: @provider,
|
|
141
|
+
model: snapshot[:model],
|
|
142
|
+
input_tokens: 0,
|
|
143
|
+
output_tokens: 0,
|
|
144
|
+
total_tokens: 0,
|
|
145
|
+
stream: true,
|
|
146
|
+
usage_source: :unknown
|
|
147
|
+
)
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def error_metadata(errored)
|
|
151
|
+
errored ? { stream_errored: true } : {}
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def elapsed_ms
|
|
155
|
+
((Process.clock_gettime(Process::CLOCK_MONOTONIC) - @started_at) * 1000).round
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
end
|
|
@@ -6,21 +6,15 @@ module LlmCostTracker
|
|
|
6
6
|
class Tracker
|
|
7
7
|
EVENT_NAME = "llm_request.llm_cost_tracker"
|
|
8
8
|
|
|
9
|
+
USAGE_SOURCES = %i[response stream_final manual unknown].freeze
|
|
10
|
+
|
|
9
11
|
class << self
|
|
10
12
|
def enforce_budget!
|
|
11
13
|
Budget.enforce!
|
|
12
14
|
end
|
|
13
15
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
# @param provider [String] Provider name.
|
|
17
|
-
# @param model [String] Model identifier.
|
|
18
|
-
# @param input_tokens [Integer] Input token count.
|
|
19
|
-
# @param output_tokens [Integer] Output token count.
|
|
20
|
-
# @param metadata [Hash] Attribution tags plus provider-specific usage metadata.
|
|
21
|
-
# @param latency_ms [Integer, nil] Optional latency in milliseconds.
|
|
22
|
-
# @return [LlmCostTracker::Event]
|
|
23
|
-
def record(provider:, model:, input_tokens:, output_tokens:, metadata: {}, latency_ms: nil)
|
|
16
|
+
def record(provider:, model:, input_tokens:, output_tokens:,
|
|
17
|
+
metadata: {}, latency_ms: nil, stream: false, usage_source: nil)
|
|
24
18
|
usage = EventMetadata.usage_data(input_tokens, output_tokens, metadata)
|
|
25
19
|
|
|
26
20
|
cost_data = Pricing.cost_for(
|
|
@@ -43,13 +37,13 @@ module LlmCostTracker
|
|
|
43
37
|
cost: cost_data,
|
|
44
38
|
tags: LlmCostTracker.configuration.default_tags.merge(EventMetadata.tags(metadata)).freeze,
|
|
45
39
|
latency_ms: normalized_latency_ms(latency_ms),
|
|
40
|
+
stream: stream ? true : false,
|
|
41
|
+
usage_source: normalized_usage_source(usage_source),
|
|
46
42
|
tracked_at: Time.now.utc
|
|
47
43
|
)
|
|
48
44
|
|
|
49
|
-
# Emit ActiveSupport::Notifications event
|
|
50
45
|
ActiveSupport::Notifications.instrument(EVENT_NAME, event.to_h)
|
|
51
46
|
|
|
52
|
-
# Store based on backend
|
|
53
47
|
stored = store(event)
|
|
54
48
|
Budget.check!(event) unless stored == false
|
|
55
49
|
|
|
@@ -77,6 +71,8 @@ module LlmCostTracker
|
|
|
77
71
|
"tokens=#{event.input_tokens}+#{event.output_tokens} " \
|
|
78
72
|
"cost=#{log_cost_label(event)}"
|
|
79
73
|
message += " latency=#{event.latency_ms}ms" if event.latency_ms
|
|
74
|
+
message += " stream=#{event.stream}" if event.stream
|
|
75
|
+
message += " source=#{event.usage_source}" if event.usage_source
|
|
80
76
|
message += " tags=#{event.tags}" unless event.tags.empty?
|
|
81
77
|
|
|
82
78
|
Logging.log(config.log_level, message)
|
|
@@ -119,6 +115,13 @@ module LlmCostTracker
|
|
|
119
115
|
|
|
120
116
|
[latency_ms.to_i, 0].max
|
|
121
117
|
end
|
|
118
|
+
|
|
119
|
+
def normalized_usage_source(value)
|
|
120
|
+
return nil if value.nil?
|
|
121
|
+
|
|
122
|
+
symbol = value.to_sym
|
|
123
|
+
USAGE_SOURCES.include?(symbol) ? symbol.to_s : nil
|
|
124
|
+
end
|
|
122
125
|
end
|
|
123
126
|
end
|
|
124
127
|
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmCostTracker
|
|
4
|
+
module ValueHelpers
|
|
5
|
+
class << self
|
|
6
|
+
def deep_dup(value)
|
|
7
|
+
case value
|
|
8
|
+
when Hash
|
|
9
|
+
value.each_with_object({}) do |(key, nested_value), duplicated|
|
|
10
|
+
duplicated[deep_dup(key)] = deep_dup(nested_value)
|
|
11
|
+
end
|
|
12
|
+
when Array
|
|
13
|
+
value.map { |nested_value| deep_dup(nested_value) }
|
|
14
|
+
when String
|
|
15
|
+
value.dup
|
|
16
|
+
else
|
|
17
|
+
value
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def deep_freeze(value)
|
|
22
|
+
case value
|
|
23
|
+
when Hash
|
|
24
|
+
value.each do |key, nested_value|
|
|
25
|
+
deep_freeze(key)
|
|
26
|
+
deep_freeze(nested_value)
|
|
27
|
+
end
|
|
28
|
+
value.frozen? ? value : value.freeze
|
|
29
|
+
when Array
|
|
30
|
+
value.each { |nested_value| deep_freeze(nested_value) }
|
|
31
|
+
value.frozen? ? value : value.freeze
|
|
32
|
+
when String
|
|
33
|
+
value.frozen? ? value : value.freeze
|
|
34
|
+
else
|
|
35
|
+
value
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|