llm_cost_tracker 0.2.0.alpha2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +48 -1
- data/README.md +114 -70
- data/Rakefile +2 -0
- data/app/assets/llm_cost_tracker/application.css +760 -0
- data/app/controllers/llm_cost_tracker/application_controller.rb +1 -7
- data/app/controllers/llm_cost_tracker/assets_controller.rb +12 -0
- data/app/controllers/llm_cost_tracker/calls_controller.rb +29 -12
- data/app/controllers/llm_cost_tracker/dashboard_controller.rb +5 -1
- data/app/helpers/llm_cost_tracker/application_helper.rb +46 -5
- data/app/helpers/llm_cost_tracker/chart_helper.rb +133 -0
- data/app/helpers/llm_cost_tracker/dashboard_filter_helper.rb +47 -0
- data/app/helpers/llm_cost_tracker/dashboard_filter_options_helper.rb +34 -0
- data/app/helpers/llm_cost_tracker/dashboard_query_helper.rb +58 -0
- data/app/helpers/llm_cost_tracker/pagination_helper.rb +18 -0
- data/app/services/llm_cost_tracker/dashboard/data_quality.rb +16 -1
- data/app/services/llm_cost_tracker/dashboard/filter.rb +22 -3
- data/app/services/llm_cost_tracker/dashboard/overview_stats.rb +16 -1
- data/app/services/llm_cost_tracker/dashboard/spend_anomaly.rb +79 -0
- data/app/services/llm_cost_tracker/dashboard/tag_key_explorer.rb +19 -46
- data/app/services/llm_cost_tracker/dashboard/top_models.rb +17 -8
- data/app/services/llm_cost_tracker/pagination.rb +6 -0
- data/app/views/layouts/llm_cost_tracker/application.html.erb +35 -333
- data/app/views/llm_cost_tracker/calls/index.html.erb +116 -74
- data/app/views/llm_cost_tracker/calls/show.html.erb +58 -1
- data/app/views/llm_cost_tracker/dashboard/index.html.erb +211 -111
- data/app/views/llm_cost_tracker/data_quality/index.html.erb +224 -78
- data/app/views/llm_cost_tracker/errors/database.html.erb +3 -3
- data/app/views/llm_cost_tracker/errors/invalid_filter.html.erb +3 -3
- data/app/views/llm_cost_tracker/errors/not_found.html.erb +3 -3
- data/app/views/llm_cost_tracker/models/index.html.erb +66 -58
- data/app/views/llm_cost_tracker/shared/_active_filters.html.erb +16 -0
- data/app/views/llm_cost_tracker/shared/_metric_stack.html.erb +23 -0
- data/app/views/llm_cost_tracker/shared/_spend_chart.html.erb +18 -0
- data/app/views/llm_cost_tracker/shared/_tag_chips.html.erb +15 -0
- data/app/views/llm_cost_tracker/shared/setup_required.html.erb +3 -2
- data/app/views/llm_cost_tracker/tags/index.html.erb +55 -12
- data/app/views/llm_cost_tracker/tags/show.html.erb +88 -39
- data/config/routes.rb +3 -0
- data/lib/llm_cost_tracker/assets.rb +19 -0
- data/lib/llm_cost_tracker/configuration.rb +78 -42
- data/lib/llm_cost_tracker/engine.rb +2 -0
- data/lib/llm_cost_tracker/event.rb +2 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/add_streaming_generator.rb +29 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/add_streaming_to_llm_api_calls.rb.erb +25 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_api_calls.rb.erb +4 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/llm_cost_tracker_prices.yml.erb +8 -1
- data/lib/llm_cost_tracker/llm_api_call.rb +9 -1
- data/lib/llm_cost_tracker/middleware/faraday.rb +57 -9
- data/lib/llm_cost_tracker/parsed_usage.rb +7 -3
- data/lib/llm_cost_tracker/parsers/anthropic.rb +79 -1
- data/lib/llm_cost_tracker/parsers/base.rb +17 -5
- data/lib/llm_cost_tracker/parsers/gemini.rb +59 -6
- data/lib/llm_cost_tracker/parsers/openai.rb +8 -0
- data/lib/llm_cost_tracker/parsers/openai_compatible.rb +8 -0
- data/lib/llm_cost_tracker/parsers/openai_usage.rb +55 -1
- data/lib/llm_cost_tracker/parsers/registry.rb +15 -3
- data/lib/llm_cost_tracker/parsers/sse.rb +81 -0
- data/lib/llm_cost_tracker/price_registry.rb +18 -7
- data/lib/llm_cost_tracker/price_sync/fetcher.rb +72 -0
- data/lib/llm_cost_tracker/price_sync/merger.rb +72 -0
- data/lib/llm_cost_tracker/price_sync/model_catalog.rb +77 -0
- data/lib/llm_cost_tracker/price_sync/raw_price.rb +35 -0
- data/lib/llm_cost_tracker/price_sync/source.rb +29 -0
- data/lib/llm_cost_tracker/price_sync/source_result.rb +7 -0
- data/lib/llm_cost_tracker/price_sync/sources/litellm.rb +91 -0
- data/lib/llm_cost_tracker/price_sync/sources/open_router.rb +94 -0
- data/lib/llm_cost_tracker/price_sync/validator.rb +66 -0
- data/lib/llm_cost_tracker/price_sync.rb +310 -0
- data/lib/llm_cost_tracker/pricing.rb +19 -6
- data/lib/llm_cost_tracker/retention.rb +34 -0
- data/lib/llm_cost_tracker/storage/active_record_store.rb +3 -1
- data/lib/llm_cost_tracker/stream_collector.rb +158 -0
- data/lib/llm_cost_tracker/tag_query.rb +7 -2
- data/lib/llm_cost_tracker/tags_column.rb +21 -1
- data/lib/llm_cost_tracker/tracker.rb +15 -12
- data/lib/llm_cost_tracker/value_helpers.rb +40 -0
- data/lib/llm_cost_tracker/version.rb +1 -1
- data/lib/llm_cost_tracker.rb +51 -29
- data/lib/tasks/llm_cost_tracker.rake +124 -0
- data/llm_cost_tracker.gemspec +9 -8
- metadata +40 -12
- data/PLAN_0.2.md +0 -488
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "date"
|
|
4
|
+
require "fileutils"
|
|
5
|
+
require "json"
|
|
6
|
+
require "yaml"
|
|
7
|
+
|
|
8
|
+
require_relative "price_sync/fetcher"
|
|
9
|
+
require_relative "price_sync/raw_price"
|
|
10
|
+
require_relative "price_sync/source"
|
|
11
|
+
require_relative "price_sync/source_result"
|
|
12
|
+
require_relative "price_sync/model_catalog"
|
|
13
|
+
require_relative "price_sync/merger"
|
|
14
|
+
require_relative "price_sync/validator"
|
|
15
|
+
require_relative "price_sync/sources/litellm"
|
|
16
|
+
require_relative "price_sync/sources/open_router"
|
|
17
|
+
|
|
18
|
+
module LlmCostTracker
|
|
19
|
+
# rubocop:disable Metrics/ModuleLength, Metrics/ClassLength
|
|
20
|
+
module PriceSync
|
|
21
|
+
DEFAULT_OUTPUT_PATH = PriceRegistry::DEFAULT_PRICES_PATH
|
|
22
|
+
YAML_EXTENSIONS = %w[.yml .yaml].freeze
|
|
23
|
+
|
|
24
|
+
SourceUsage = Data.define(:prices_count, :source_version)
|
|
25
|
+
SyncResult = Data.define(
|
|
26
|
+
:path,
|
|
27
|
+
:updated_models,
|
|
28
|
+
:changes,
|
|
29
|
+
:orphaned_models,
|
|
30
|
+
:failed_sources,
|
|
31
|
+
:discrepancies,
|
|
32
|
+
:rejected,
|
|
33
|
+
:flagged,
|
|
34
|
+
:sources_used,
|
|
35
|
+
:written
|
|
36
|
+
)
|
|
37
|
+
CheckResult = Data.define(
|
|
38
|
+
:path,
|
|
39
|
+
:changes,
|
|
40
|
+
:orphaned_models,
|
|
41
|
+
:failed_sources,
|
|
42
|
+
:discrepancies,
|
|
43
|
+
:rejected,
|
|
44
|
+
:flagged,
|
|
45
|
+
:sources_used,
|
|
46
|
+
:up_to_date
|
|
47
|
+
)
|
|
48
|
+
RefreshPlan = Data.define(
|
|
49
|
+
:path,
|
|
50
|
+
:registry,
|
|
51
|
+
:updated_registry,
|
|
52
|
+
:accepted,
|
|
53
|
+
:changes,
|
|
54
|
+
:orphaned_models,
|
|
55
|
+
:failed_sources,
|
|
56
|
+
:discrepancies,
|
|
57
|
+
:rejected,
|
|
58
|
+
:flagged,
|
|
59
|
+
:sources_used,
|
|
60
|
+
:source_results
|
|
61
|
+
) do
|
|
62
|
+
def refresh_succeeded?
|
|
63
|
+
source_results.any? { |_source, result| result.prices.any? }
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def up_to_date?
|
|
67
|
+
changes.empty? && failed_sources.empty? && rejected.empty?
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
class << self
|
|
72
|
+
def sync(path: DEFAULT_OUTPUT_PATH, seed_path: DEFAULT_OUTPUT_PATH, preview: false, strict: false,
|
|
73
|
+
fetcher: Fetcher.new, today: Date.today)
|
|
74
|
+
plan = build_refresh_plan(path: path, seed_path: seed_path, fetcher: fetcher, today: today)
|
|
75
|
+
raise Error, strict_failure_message(plan) if strict_sync_failure?(plan, strict: strict)
|
|
76
|
+
|
|
77
|
+
written = !preview && plan.refresh_succeeded?
|
|
78
|
+
write_registry(plan.path, plan.updated_registry) if written
|
|
79
|
+
|
|
80
|
+
SyncResult.new(
|
|
81
|
+
path: plan.path,
|
|
82
|
+
updated_models: plan.changes.keys.sort,
|
|
83
|
+
changes: plan.changes,
|
|
84
|
+
orphaned_models: plan.orphaned_models,
|
|
85
|
+
failed_sources: plan.failed_sources,
|
|
86
|
+
discrepancies: plan.discrepancies,
|
|
87
|
+
rejected: plan.rejected,
|
|
88
|
+
flagged: plan.flagged,
|
|
89
|
+
sources_used: plan.sources_used,
|
|
90
|
+
written: written
|
|
91
|
+
)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def check(path: DEFAULT_OUTPUT_PATH, seed_path: DEFAULT_OUTPUT_PATH, fetcher: Fetcher.new, today: Date.today)
|
|
95
|
+
plan = build_refresh_plan(path: path, seed_path: seed_path, fetcher: fetcher, today: today)
|
|
96
|
+
|
|
97
|
+
CheckResult.new(
|
|
98
|
+
path: plan.path,
|
|
99
|
+
changes: plan.changes,
|
|
100
|
+
orphaned_models: plan.orphaned_models,
|
|
101
|
+
failed_sources: plan.failed_sources,
|
|
102
|
+
discrepancies: plan.discrepancies,
|
|
103
|
+
rejected: plan.rejected,
|
|
104
|
+
flagged: plan.flagged,
|
|
105
|
+
sources_used: plan.sources_used,
|
|
106
|
+
up_to_date: plan.up_to_date?
|
|
107
|
+
)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
private
|
|
111
|
+
|
|
112
|
+
def sources
|
|
113
|
+
[Sources::Litellm.new, Sources::OpenRouter.new]
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def build_refresh_plan(path:, seed_path:, fetcher:, today:)
|
|
117
|
+
path = path.to_s
|
|
118
|
+
registry = load_registry(path, seed_path: seed_path)
|
|
119
|
+
current_models = registry.fetch("models", {})
|
|
120
|
+
source_results, failed_sources = fetch_all(current_models, fetcher)
|
|
121
|
+
merged, discrepancies = Merger.new.merge(source_results)
|
|
122
|
+
validated = Validator.new.validate_batch(merged, existing_registry: current_models)
|
|
123
|
+
updated_models = apply_changes(current_models, validated.accepted, today)
|
|
124
|
+
refresh_succeeded = source_results.any? { |_source, result| result.prices.any? }
|
|
125
|
+
|
|
126
|
+
RefreshPlan.new(
|
|
127
|
+
path: path,
|
|
128
|
+
registry: registry,
|
|
129
|
+
updated_registry: registry.merge(
|
|
130
|
+
"metadata" => updated_metadata(
|
|
131
|
+
registry["metadata"],
|
|
132
|
+
today,
|
|
133
|
+
refresh_succeeded: refresh_succeeded,
|
|
134
|
+
source_results: source_results
|
|
135
|
+
),
|
|
136
|
+
"models" => updated_models
|
|
137
|
+
),
|
|
138
|
+
accepted: validated.accepted,
|
|
139
|
+
changes: price_changes(current_models, updated_models),
|
|
140
|
+
orphaned_models: compute_orphaned(current_models, merged.keys),
|
|
141
|
+
failed_sources: failed_sources,
|
|
142
|
+
discrepancies: discrepancies,
|
|
143
|
+
rejected: validated.rejected,
|
|
144
|
+
flagged: validated.flagged,
|
|
145
|
+
sources_used: source_usage(source_results),
|
|
146
|
+
source_results: source_results
|
|
147
|
+
)
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def fetch_all(current_models, fetcher)
|
|
151
|
+
results = {}
|
|
152
|
+
failures = {}
|
|
153
|
+
|
|
154
|
+
sources.each do |source|
|
|
155
|
+
results[source.name.to_sym] = source.fetch(current_models: current_models, fetcher: fetcher)
|
|
156
|
+
rescue Error => e
|
|
157
|
+
failures[source.name.to_sym] = e.message
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
[results, failures]
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def apply_changes(current_models, accepted, today)
|
|
164
|
+
merged = seed_models(current_models)
|
|
165
|
+
|
|
166
|
+
accepted.each do |model, price|
|
|
167
|
+
next if manual_model?(merged[model])
|
|
168
|
+
|
|
169
|
+
merged[model] = registry_entry_for(merged[model], price, today)
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
merged.sort.to_h
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def compute_orphaned(current_models, merged_models)
|
|
176
|
+
seed_models(current_models).keys.reject do |model|
|
|
177
|
+
manual_model?(current_models[model]) || merged_models.include?(model)
|
|
178
|
+
end.sort
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def load_registry(path, seed_path:)
|
|
182
|
+
source_path = File.exist?(path) ? path : seed_path.to_s
|
|
183
|
+
normalize_registry(load_registry_file(source_path))
|
|
184
|
+
rescue Errno::ENOENT, JSON::ParserError, Psych::Exception, ArgumentError, TypeError, NoMethodError => e
|
|
185
|
+
raise Error, "Unable to load pricing registry #{source_path.inspect}: #{e.message}"
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def load_registry_file(path)
|
|
189
|
+
contents = File.read(path)
|
|
190
|
+
return YAML.safe_load(contents, aliases: false) || {} if yaml_file?(path)
|
|
191
|
+
|
|
192
|
+
JSON.parse(contents)
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def normalize_registry(registry)
|
|
196
|
+
{
|
|
197
|
+
"metadata" => normalize_hash(registry.fetch("metadata", {})),
|
|
198
|
+
"models" => normalize_models(registry.fetch("models", {}))
|
|
199
|
+
}
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
def normalize_models(models)
|
|
203
|
+
(models || {}).each_with_object({}) do |(model, entry), normalized|
|
|
204
|
+
normalized[model.to_s] = normalize_hash(entry)
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def normalize_hash(hash)
|
|
209
|
+
(hash || {}).each_with_object({}) do |(key, value), normalized|
|
|
210
|
+
normalized[key.to_s] = value
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
def seed_models(current_models)
|
|
215
|
+
normalize_models(current_models).transform_values do |entry|
|
|
216
|
+
next entry if entry.key?("_source")
|
|
217
|
+
|
|
218
|
+
entry.merge("_source" => "seed")
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def manual_model?(entry)
|
|
223
|
+
normalize_hash(entry)["_source"] == "manual"
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def registry_entry_for(existing_entry, price, today)
|
|
227
|
+
normalize_hash(existing_entry)
|
|
228
|
+
.except(*PriceRegistry::PRICE_KEYS)
|
|
229
|
+
.merge(price.to_registry_entry(today: today))
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
def updated_metadata(existing, today, refresh_succeeded:, source_results:)
|
|
233
|
+
metadata = normalize_hash(existing)
|
|
234
|
+
metadata["currency"] ||= "USD"
|
|
235
|
+
metadata["unit"] ||= "1M tokens"
|
|
236
|
+
return metadata unless refresh_succeeded
|
|
237
|
+
|
|
238
|
+
metadata["updated_at"] = today.iso8601
|
|
239
|
+
metadata["source_urls"] = source_urls(source_results)
|
|
240
|
+
metadata
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
def source_usage(source_results)
|
|
244
|
+
source_results.transform_values do |result|
|
|
245
|
+
SourceUsage.new(prices_count: result.prices.size, source_version: result.source_version)
|
|
246
|
+
end
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
def price_changes(current_models, updated_models)
|
|
250
|
+
current_models = normalize_models(current_models)
|
|
251
|
+
updated_models = normalize_models(updated_models)
|
|
252
|
+
|
|
253
|
+
(current_models.keys | updated_models.keys).sort.each_with_object({}) do |model, changes|
|
|
254
|
+
fields = price_field_changes(current_models[model], updated_models[model])
|
|
255
|
+
changes[model] = fields if fields.any?
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
def price_field_changes(current_entry, updated_entry)
|
|
260
|
+
current_price = comparable_price(current_entry)
|
|
261
|
+
updated_price = comparable_price(updated_entry)
|
|
262
|
+
|
|
263
|
+
(current_price.keys | updated_price.keys).sort.each_with_object({}) do |field, changes|
|
|
264
|
+
from = current_price[field]
|
|
265
|
+
to = updated_price[field]
|
|
266
|
+
next if from == to
|
|
267
|
+
|
|
268
|
+
changes[field] = { "from" => from, "to" => to }
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
def comparable_price(entry)
|
|
273
|
+
normalize_hash(entry).slice(*PriceRegistry::PRICE_KEYS)
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
def strict_sync_failure?(plan, strict:)
|
|
277
|
+
strict && (plan.failed_sources.any? || plan.rejected.any?)
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
def strict_failure_message(plan)
|
|
281
|
+
messages = []
|
|
282
|
+
if plan.failed_sources.any?
|
|
283
|
+
details = plan.failed_sources.map { |source, message| "#{source}: #{message}" }.join(", ")
|
|
284
|
+
messages << "source failures: #{details}"
|
|
285
|
+
end
|
|
286
|
+
if plan.rejected.any?
|
|
287
|
+
details = plan.rejected.map { |issue| "#{issue.model} (#{issue.reason})" }.join(", ")
|
|
288
|
+
messages << "validator rejections: #{details}"
|
|
289
|
+
end
|
|
290
|
+
"Price sync failed in strict mode: #{messages.join('; ')}"
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
def source_urls(source_results)
|
|
294
|
+
names = source_results.keys.map(&:to_sym)
|
|
295
|
+
sources.select { |source| names.include?(source.name.to_sym) }.map(&:url)
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
def write_registry(path, registry)
|
|
299
|
+
FileUtils.mkdir_p(File.dirname(path))
|
|
300
|
+
payload = yaml_file?(path) ? YAML.dump(registry) : "#{JSON.pretty_generate(registry)}\n"
|
|
301
|
+
File.write(path, payload)
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
def yaml_file?(path)
|
|
305
|
+
YAML_EXTENSIONS.include?(File.extname(path).downcase)
|
|
306
|
+
end
|
|
307
|
+
end
|
|
308
|
+
end
|
|
309
|
+
# rubocop:enable Metrics/ModuleLength, Metrics/ClassLength
|
|
310
|
+
end
|
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "monitor"
|
|
4
|
+
|
|
3
5
|
module LlmCostTracker
|
|
4
6
|
# Calculates costs from price entries expressed in USD per 1M tokens.
|
|
5
7
|
module Pricing
|
|
6
8
|
PRICES = PriceRegistry.builtin_prices
|
|
9
|
+
MUTEX = Monitor.new
|
|
7
10
|
|
|
8
11
|
class << self
|
|
9
12
|
# Estimate model cost from token counts.
|
|
@@ -59,9 +62,14 @@ module LlmCostTracker
|
|
|
59
62
|
cached = @prices_cache
|
|
60
63
|
return cached[:value] if cached && cached[:key] == cache_key
|
|
61
64
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
+
MUTEX.synchronize do
|
|
66
|
+
cached = @prices_cache
|
|
67
|
+
return cached[:value] if cached && cached[:key] == cache_key
|
|
68
|
+
|
|
69
|
+
value = PRICES.merge(file_prices).merge(overrides).freeze
|
|
70
|
+
@prices_cache = { key: cache_key, value: value }.freeze
|
|
71
|
+
value
|
|
72
|
+
end
|
|
65
73
|
end
|
|
66
74
|
|
|
67
75
|
private
|
|
@@ -116,9 +124,14 @@ module LlmCostTracker
|
|
|
116
124
|
cached = @sorted_price_keys_cache
|
|
117
125
|
return cached[:keys] if cached && cached[:table].equal?(table)
|
|
118
126
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
127
|
+
MUTEX.synchronize do
|
|
128
|
+
cached = @sorted_price_keys_cache
|
|
129
|
+
return cached[:keys] if cached && cached[:table].equal?(table)
|
|
130
|
+
|
|
131
|
+
keys = table.keys.sort_by { |key| -key.length }
|
|
132
|
+
@sorted_price_keys_cache = { table: table, keys: keys }.freeze
|
|
133
|
+
keys
|
|
134
|
+
end
|
|
122
135
|
end
|
|
123
136
|
end
|
|
124
137
|
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmCostTracker
|
|
4
|
+
module Retention
|
|
5
|
+
DEFAULT_BATCH_SIZE = 5_000
|
|
6
|
+
|
|
7
|
+
class << self
|
|
8
|
+
def prune(older_than:, batch_size: DEFAULT_BATCH_SIZE, now: Time.now.utc)
|
|
9
|
+
cutoff = resolve_cutoff(older_than, now)
|
|
10
|
+
require_relative "llm_api_call" unless defined?(LlmCostTracker::LlmApiCall)
|
|
11
|
+
|
|
12
|
+
deleted = 0
|
|
13
|
+
loop do
|
|
14
|
+
batch = LlmCostTracker::LlmApiCall.where(tracked_at: ...cutoff).limit(batch_size).delete_all
|
|
15
|
+
deleted += batch
|
|
16
|
+
break if batch < batch_size
|
|
17
|
+
end
|
|
18
|
+
deleted
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
private
|
|
22
|
+
|
|
23
|
+
def resolve_cutoff(older_than, now)
|
|
24
|
+
case older_than
|
|
25
|
+
when Time, DateTime then older_than.utc
|
|
26
|
+
when ActiveSupport::Duration then now - older_than
|
|
27
|
+
when Integer then now - (older_than * 86_400)
|
|
28
|
+
else
|
|
29
|
+
raise ArgumentError, "older_than must be a Duration, Time, or Integer days: #{older_than.inspect}"
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -19,7 +19,9 @@ module LlmCostTracker
|
|
|
19
19
|
tags: tags_for_storage(tags),
|
|
20
20
|
tracked_at: event.tracked_at
|
|
21
21
|
}
|
|
22
|
-
attributes[:latency_ms]
|
|
22
|
+
attributes[:latency_ms] = event.latency_ms if model_class.latency_column?
|
|
23
|
+
attributes[:stream] = event.stream if model_class.stream_column?
|
|
24
|
+
attributes[:usage_source] = event.usage_source if model_class.usage_source_column?
|
|
23
25
|
|
|
24
26
|
model_class.create!(attributes)
|
|
25
27
|
end
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "monitor"
|
|
4
|
+
|
|
5
|
+
require_relative "value_helpers"
|
|
6
|
+
|
|
7
|
+
module LlmCostTracker
|
|
8
|
+
class StreamCollector
|
|
9
|
+
attr_reader :provider
|
|
10
|
+
|
|
11
|
+
def initialize(provider:, model:, latency_ms: nil, metadata: {})
|
|
12
|
+
@provider = provider.to_s
|
|
13
|
+
@model = model
|
|
14
|
+
@latency_ms = latency_ms
|
|
15
|
+
@metadata = ValueHelpers.deep_dup(metadata || {})
|
|
16
|
+
@events = []
|
|
17
|
+
@explicit_usage = nil
|
|
18
|
+
@started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
19
|
+
@finished = false
|
|
20
|
+
@monitor = Monitor.new
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def model
|
|
24
|
+
@monitor.synchronize { @model }
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def metadata
|
|
28
|
+
@monitor.synchronize { ValueHelpers.deep_dup(@metadata) }
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def model=(value)
|
|
32
|
+
@monitor.synchronize do
|
|
33
|
+
ensure_open!
|
|
34
|
+
@model = value
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def event(data, type: nil)
|
|
39
|
+
@monitor.synchronize do
|
|
40
|
+
ensure_open!
|
|
41
|
+
@events << { event: type, data: ValueHelpers.deep_dup(data) } unless data.nil?
|
|
42
|
+
end
|
|
43
|
+
self
|
|
44
|
+
end
|
|
45
|
+
alias chunk event
|
|
46
|
+
|
|
47
|
+
def usage(input_tokens:, output_tokens:, **extra)
|
|
48
|
+
@monitor.synchronize do
|
|
49
|
+
ensure_open!
|
|
50
|
+
@explicit_usage = ValueHelpers.deep_dup(
|
|
51
|
+
extra.merge(
|
|
52
|
+
input_tokens: input_tokens.to_i,
|
|
53
|
+
output_tokens: output_tokens.to_i
|
|
54
|
+
)
|
|
55
|
+
)
|
|
56
|
+
end
|
|
57
|
+
self
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def finish!(errored: false)
|
|
61
|
+
snapshot = @monitor.synchronize do
|
|
62
|
+
return if @finished
|
|
63
|
+
|
|
64
|
+
@finished = true
|
|
65
|
+
{
|
|
66
|
+
events: ValueHelpers.deep_dup(@events),
|
|
67
|
+
explicit_usage: ValueHelpers.deep_dup(@explicit_usage),
|
|
68
|
+
model: @model,
|
|
69
|
+
latency_ms: @latency_ms,
|
|
70
|
+
metadata: ValueHelpers.deep_dup(@metadata)
|
|
71
|
+
}
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
parsed = build_parsed_usage(snapshot)
|
|
75
|
+
Tracker.record(
|
|
76
|
+
provider: parsed.provider,
|
|
77
|
+
model: parsed.model,
|
|
78
|
+
input_tokens: parsed.input_tokens,
|
|
79
|
+
output_tokens: parsed.output_tokens,
|
|
80
|
+
latency_ms: snapshot[:latency_ms] || elapsed_ms,
|
|
81
|
+
stream: true,
|
|
82
|
+
usage_source: parsed.usage_source,
|
|
83
|
+
metadata: error_metadata(errored).merge(snapshot[:metadata]).merge(parsed.metadata)
|
|
84
|
+
)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
private
|
|
88
|
+
|
|
89
|
+
def ensure_open!
|
|
90
|
+
return unless @finished
|
|
91
|
+
|
|
92
|
+
raise FrozenError, "can't modify finished LlmCostTracker::StreamCollector"
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def build_parsed_usage(snapshot)
|
|
96
|
+
return build_from_explicit_usage(snapshot) if snapshot[:explicit_usage]
|
|
97
|
+
|
|
98
|
+
parsed = Parsers::Registry.find_for_provider(@provider)&.parse_stream(nil, nil, 200, snapshot[:events])
|
|
99
|
+
return finalize(parsed, snapshot) if parsed
|
|
100
|
+
|
|
101
|
+
build_unknown_usage(snapshot)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def finalize(parsed, snapshot)
|
|
105
|
+
parsed.with(
|
|
106
|
+
provider: @provider,
|
|
107
|
+
model: present_model(parsed.model) || snapshot[:model]
|
|
108
|
+
)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def present_model(value)
|
|
112
|
+
return nil if value.nil?
|
|
113
|
+
|
|
114
|
+
string = value.to_s
|
|
115
|
+
return nil if string.empty? || string == "unknown"
|
|
116
|
+
|
|
117
|
+
string
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def build_from_explicit_usage(snapshot)
|
|
121
|
+
explicit = snapshot[:explicit_usage]
|
|
122
|
+
input = explicit[:input_tokens]
|
|
123
|
+
output = explicit[:output_tokens]
|
|
124
|
+
extras = explicit.except(:input_tokens, :output_tokens)
|
|
125
|
+
|
|
126
|
+
ParsedUsage.build(
|
|
127
|
+
provider: @provider,
|
|
128
|
+
model: snapshot[:model],
|
|
129
|
+
input_tokens: input,
|
|
130
|
+
output_tokens: output,
|
|
131
|
+
total_tokens: input + output,
|
|
132
|
+
stream: true,
|
|
133
|
+
usage_source: :manual,
|
|
134
|
+
**extras
|
|
135
|
+
)
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def build_unknown_usage(snapshot)
|
|
139
|
+
ParsedUsage.build(
|
|
140
|
+
provider: @provider,
|
|
141
|
+
model: snapshot[:model],
|
|
142
|
+
input_tokens: 0,
|
|
143
|
+
output_tokens: 0,
|
|
144
|
+
total_tokens: 0,
|
|
145
|
+
stream: true,
|
|
146
|
+
usage_source: :unknown
|
|
147
|
+
)
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def error_metadata(errored)
|
|
151
|
+
errored ? { stream_errored: true } : {}
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def elapsed_ms
|
|
155
|
+
((Process.clock_gettime(Process::CLOCK_MONOTONIC) - @started_at) * 1000).round
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
end
|
|
@@ -9,7 +9,8 @@ module LlmCostTracker
|
|
|
9
9
|
normalized_tags = normalize_tags(tags)
|
|
10
10
|
return model.all if normalized_tags.empty?
|
|
11
11
|
|
|
12
|
-
return
|
|
12
|
+
return postgres_json_query(model, normalized_tags) if model.tags_jsonb_column?
|
|
13
|
+
return mysql_json_query(model, normalized_tags) if model.tags_mysql_json_column?
|
|
13
14
|
|
|
14
15
|
text_query(model, normalized_tags)
|
|
15
16
|
end
|
|
@@ -20,10 +21,14 @@ module LlmCostTracker
|
|
|
20
21
|
|
|
21
22
|
private
|
|
22
23
|
|
|
23
|
-
def
|
|
24
|
+
def postgres_json_query(model, tags)
|
|
24
25
|
model.where("tags @> ?::jsonb", tags.to_json)
|
|
25
26
|
end
|
|
26
27
|
|
|
28
|
+
def mysql_json_query(model, tags)
|
|
29
|
+
model.where("JSON_CONTAINS(tags, ?)", tags.to_json)
|
|
30
|
+
end
|
|
31
|
+
|
|
27
32
|
def text_query(model, tags)
|
|
28
33
|
tags.reduce(model.all) do |relation, (key, value)|
|
|
29
34
|
relation.where("tags LIKE ? ESCAPE '\\'", "%#{model.sanitize_sql_like(json_tag_fragment(key, value))}%")
|
|
@@ -3,14 +3,34 @@
|
|
|
3
3
|
module LlmCostTracker
|
|
4
4
|
module TagsColumn
|
|
5
5
|
def tags_json_column?
|
|
6
|
+
tags_jsonb_column? || tags_mysql_json_column?
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def tags_jsonb_column?
|
|
6
10
|
column = columns_hash["tags"]
|
|
7
11
|
return false unless column
|
|
8
12
|
|
|
9
|
-
|
|
13
|
+
column.type == :jsonb || column.sql_type.to_s.downcase == "jsonb"
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def tags_mysql_json_column?
|
|
17
|
+
column = columns_hash["tags"]
|
|
18
|
+
return false unless column
|
|
19
|
+
return false if tags_jsonb_column?
|
|
20
|
+
|
|
21
|
+
column.type == :json && connection.adapter_name.match?(/mysql/i)
|
|
10
22
|
end
|
|
11
23
|
|
|
12
24
|
def latency_column?
|
|
13
25
|
columns_hash.key?("latency_ms")
|
|
14
26
|
end
|
|
27
|
+
|
|
28
|
+
def stream_column?
|
|
29
|
+
columns_hash.key?("stream")
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def usage_source_column?
|
|
33
|
+
columns_hash.key?("usage_source")
|
|
34
|
+
end
|
|
15
35
|
end
|
|
16
36
|
end
|
|
@@ -6,21 +6,15 @@ module LlmCostTracker
|
|
|
6
6
|
class Tracker
|
|
7
7
|
EVENT_NAME = "llm_request.llm_cost_tracker"
|
|
8
8
|
|
|
9
|
+
USAGE_SOURCES = %i[response stream_final manual unknown].freeze
|
|
10
|
+
|
|
9
11
|
class << self
|
|
10
12
|
def enforce_budget!
|
|
11
13
|
Budget.enforce!
|
|
12
14
|
end
|
|
13
15
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
# @param provider [String] Provider name.
|
|
17
|
-
# @param model [String] Model identifier.
|
|
18
|
-
# @param input_tokens [Integer] Input token count.
|
|
19
|
-
# @param output_tokens [Integer] Output token count.
|
|
20
|
-
# @param metadata [Hash] Attribution tags plus provider-specific usage metadata.
|
|
21
|
-
# @param latency_ms [Integer, nil] Optional latency in milliseconds.
|
|
22
|
-
# @return [LlmCostTracker::Event]
|
|
23
|
-
def record(provider:, model:, input_tokens:, output_tokens:, metadata: {}, latency_ms: nil)
|
|
16
|
+
def record(provider:, model:, input_tokens:, output_tokens:,
|
|
17
|
+
metadata: {}, latency_ms: nil, stream: false, usage_source: nil)
|
|
24
18
|
usage = EventMetadata.usage_data(input_tokens, output_tokens, metadata)
|
|
25
19
|
|
|
26
20
|
cost_data = Pricing.cost_for(
|
|
@@ -43,13 +37,13 @@ module LlmCostTracker
|
|
|
43
37
|
cost: cost_data,
|
|
44
38
|
tags: LlmCostTracker.configuration.default_tags.merge(EventMetadata.tags(metadata)).freeze,
|
|
45
39
|
latency_ms: normalized_latency_ms(latency_ms),
|
|
40
|
+
stream: stream ? true : false,
|
|
41
|
+
usage_source: normalized_usage_source(usage_source),
|
|
46
42
|
tracked_at: Time.now.utc
|
|
47
43
|
)
|
|
48
44
|
|
|
49
|
-
# Emit ActiveSupport::Notifications event
|
|
50
45
|
ActiveSupport::Notifications.instrument(EVENT_NAME, event.to_h)
|
|
51
46
|
|
|
52
|
-
# Store based on backend
|
|
53
47
|
stored = store(event)
|
|
54
48
|
Budget.check!(event) unless stored == false
|
|
55
49
|
|
|
@@ -77,6 +71,8 @@ module LlmCostTracker
|
|
|
77
71
|
"tokens=#{event.input_tokens}+#{event.output_tokens} " \
|
|
78
72
|
"cost=#{log_cost_label(event)}"
|
|
79
73
|
message += " latency=#{event.latency_ms}ms" if event.latency_ms
|
|
74
|
+
message += " stream=#{event.stream}" if event.stream
|
|
75
|
+
message += " source=#{event.usage_source}" if event.usage_source
|
|
80
76
|
message += " tags=#{event.tags}" unless event.tags.empty?
|
|
81
77
|
|
|
82
78
|
Logging.log(config.log_level, message)
|
|
@@ -119,6 +115,13 @@ module LlmCostTracker
|
|
|
119
115
|
|
|
120
116
|
[latency_ms.to_i, 0].max
|
|
121
117
|
end
|
|
118
|
+
|
|
119
|
+
def normalized_usage_source(value)
|
|
120
|
+
return nil if value.nil?
|
|
121
|
+
|
|
122
|
+
symbol = value.to_sym
|
|
123
|
+
USAGE_SOURCES.include?(symbol) ? symbol.to_s : nil
|
|
124
|
+
end
|
|
122
125
|
end
|
|
123
126
|
end
|
|
124
127
|
end
|