ruby_llm-agents 3.3.0 → 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +49 -1
- data/app/controllers/ruby_llm/agents/agents_controller.rb +27 -4
- data/app/services/ruby_llm/agents/agent_registry.rb +3 -1
- data/app/views/ruby_llm/agents/agents/_config_router.html.erb +110 -0
- data/app/views/ruby_llm/agents/agents/index.html.erb +6 -0
- data/app/views/ruby_llm/agents/executions/show.html.erb +10 -0
- data/app/views/ruby_llm/agents/shared/_agent_type_badge.html.erb +8 -0
- data/lib/ruby_llm/agents/audio/elevenlabs/model_registry.rb +187 -0
- data/lib/ruby_llm/agents/audio/speaker.rb +38 -0
- data/lib/ruby_llm/agents/audio/speech_client.rb +26 -2
- data/lib/ruby_llm/agents/audio/speech_pricing.rb +44 -3
- data/lib/ruby_llm/agents/audio/transcriber.rb +26 -15
- data/lib/ruby_llm/agents/audio/transcription_pricing.rb +226 -0
- data/lib/ruby_llm/agents/core/configuration.rb +32 -1
- data/lib/ruby_llm/agents/core/version.rb +1 -1
- data/lib/ruby_llm/agents/pricing/data_store.rb +339 -0
- data/lib/ruby_llm/agents/pricing/helicone_adapter.rb +88 -0
- data/lib/ruby_llm/agents/pricing/litellm_adapter.rb +105 -0
- data/lib/ruby_llm/agents/pricing/llmpricing_adapter.rb +73 -0
- data/lib/ruby_llm/agents/pricing/openrouter_adapter.rb +90 -0
- data/lib/ruby_llm/agents/pricing/portkey_adapter.rb +94 -0
- data/lib/ruby_llm/agents/pricing/ruby_llm_adapter.rb +94 -0
- data/lib/ruby_llm/agents/results/speech_result.rb +19 -16
- data/lib/ruby_llm/agents/routing/class_methods.rb +92 -0
- data/lib/ruby_llm/agents/routing/result.rb +74 -0
- data/lib/ruby_llm/agents/routing.rb +140 -0
- data/lib/ruby_llm/agents.rb +3 -0
- metadata +14 -1
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "net/http"
|
|
4
|
+
require "json"
|
|
5
|
+
|
|
6
|
+
module RubyLLM
|
|
7
|
+
module Agents
|
|
8
|
+
module Pricing
|
|
9
|
+
# Centralized HTTP fetch + two-layer cache for all pricing sources.
|
|
10
|
+
#
|
|
11
|
+
# Replaces the duplicated fetch_from_url / litellm_data / cache_expired?
|
|
12
|
+
# code previously copy-pasted across TranscriptionPricing, SpeechPricing,
|
|
13
|
+
# and ImageGenerator::Pricing.
|
|
14
|
+
#
|
|
15
|
+
# Two-layer cache:
|
|
16
|
+
# Layer 1: In-memory (per-process, instant)
|
|
17
|
+
# Layer 2: Rails.cache (cross-process, survives restarts)
|
|
18
|
+
#
|
|
19
|
+
# Thread-safety: All cache writes are protected by a Mutex.
|
|
20
|
+
#
|
|
21
|
+
# @example Fetch LiteLLM data
|
|
22
|
+
# DataStore.litellm_data # => Hash of all models
|
|
23
|
+
#
|
|
24
|
+
# @example Fetch Portkey data for a specific model
|
|
25
|
+
# DataStore.portkey_data("openai", "gpt-4o") # => Hash
|
|
26
|
+
#
|
|
27
|
+
# @example Refresh all caches
|
|
28
|
+
# DataStore.refresh!
|
|
29
|
+
#
|
|
30
|
+
module DataStore
|
|
31
|
+
extend self
|
|
32
|
+
|
|
33
|
+
DEFAULT_CACHE_TTL = 24 * 60 * 60 # 24 hours
|
|
34
|
+
|
|
35
|
+
LITELLM_URL = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
|
|
36
|
+
OPENROUTER_URL = "https://openrouter.ai/api/v1/models"
|
|
37
|
+
HELICONE_URL = "https://www.helicone.ai/api/llm-costs"
|
|
38
|
+
PORTKEY_BASE_URL = "https://api.portkey.ai/model-configs/pricing"
|
|
39
|
+
LLMPRICING_BASE_URL = "https://llmpricing.ai/api"
|
|
40
|
+
|
|
41
|
+
# ============================================================
|
|
42
|
+
# Bulk fetchers (one HTTP call gets all models)
|
|
43
|
+
# ============================================================
|
|
44
|
+
|
|
45
|
+
# @return [Hash] model_id => { pricing fields }
|
|
46
|
+
def litellm_data
|
|
47
|
+
fetch_bulk(:litellm, litellm_url) { |body| JSON.parse(body) }
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# @return [Array<Hash>] Array of model entries with pricing
|
|
51
|
+
def openrouter_data
|
|
52
|
+
return nil unless source_enabled?(:openrouter)
|
|
53
|
+
|
|
54
|
+
fetch_bulk(:openrouter, openrouter_url) do |body|
|
|
55
|
+
parsed = JSON.parse(body)
|
|
56
|
+
parsed.is_a?(Hash) ? (parsed["data"] || []) : parsed
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# @return [Array<Hash>] Array of cost entries
|
|
61
|
+
def helicone_data
|
|
62
|
+
return nil unless source_enabled?(:helicone)
|
|
63
|
+
|
|
64
|
+
fetch_bulk(:helicone, helicone_url) do |body|
|
|
65
|
+
parsed = JSON.parse(body)
|
|
66
|
+
parsed.is_a?(Array) ? parsed : (parsed["data"] || parsed["costs"] || [])
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# ============================================================
|
|
71
|
+
# Per-model fetchers (one HTTP call per model)
|
|
72
|
+
# ============================================================
|
|
73
|
+
|
|
74
|
+
# @param provider [String] e.g., "openai"
|
|
75
|
+
# @param model [String] e.g., "gpt-4o"
|
|
76
|
+
# @return [Hash, nil] Pricing data for this model
|
|
77
|
+
def portkey_data(provider, model)
|
|
78
|
+
return nil unless source_enabled?(:portkey)
|
|
79
|
+
|
|
80
|
+
cache_key = "portkey:#{provider}/#{model}"
|
|
81
|
+
fetch_per_model(cache_key, "#{portkey_base_url}/#{provider}/#{model}")
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# @param provider [String] e.g., "OpenAI"
|
|
85
|
+
# @param model [String] e.g., "gpt-4o"
|
|
86
|
+
# @param input_tokens [Integer] Token count for cost calculation
|
|
87
|
+
# @param output_tokens [Integer] Token count for cost calculation
|
|
88
|
+
# @return [Hash, nil] Pricing data
|
|
89
|
+
def llmpricing_data(provider, model, input_tokens, output_tokens)
|
|
90
|
+
return nil unless source_enabled?(:llmpricing)
|
|
91
|
+
|
|
92
|
+
cache_key = "llmpricing:#{provider}/#{model}"
|
|
93
|
+
url = "#{llmpricing_base_url}/prices?provider=#{uri_encode(provider)}&model=#{uri_encode(model)}&input_tokens=#{input_tokens}&output_tokens=#{output_tokens}"
|
|
94
|
+
fetch_per_model(cache_key, url)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# ============================================================
|
|
98
|
+
# Cache management
|
|
99
|
+
# ============================================================
|
|
100
|
+
|
|
101
|
+
# Clear caches and optionally re-fetch
|
|
102
|
+
#
|
|
103
|
+
# @param source [Symbol] :all, :litellm, :openrouter, :helicone, :portkey, :llmpricing
|
|
104
|
+
def refresh!(source = :all)
|
|
105
|
+
mutex.synchronize do
|
|
106
|
+
case source
|
|
107
|
+
when :all
|
|
108
|
+
@bulk_cache = {}
|
|
109
|
+
@bulk_fetched_at = {}
|
|
110
|
+
@per_model_cache = {}
|
|
111
|
+
@per_model_fetched_at = {}
|
|
112
|
+
when :litellm, :openrouter, :helicone
|
|
113
|
+
@bulk_cache&.delete(source)
|
|
114
|
+
@bulk_fetched_at&.delete(source)
|
|
115
|
+
when :portkey
|
|
116
|
+
@per_model_cache&.reject! { |k, _| k.start_with?("portkey:") }
|
|
117
|
+
@per_model_fetched_at&.reject! { |k, _| k.start_with?("portkey:") }
|
|
118
|
+
when :llmpricing
|
|
119
|
+
@per_model_cache&.reject! { |k, _| k.start_with?("llmpricing:") }
|
|
120
|
+
@per_model_fetched_at&.reject! { |k, _| k.start_with?("llmpricing:") }
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# @return [Hash] Cache statistics for each source
|
|
126
|
+
def cache_stats
|
|
127
|
+
{
|
|
128
|
+
litellm: bulk_stats(:litellm),
|
|
129
|
+
openrouter: bulk_stats(:openrouter),
|
|
130
|
+
helicone: bulk_stats(:helicone),
|
|
131
|
+
portkey: per_model_stats("portkey:"),
|
|
132
|
+
llmpricing: per_model_stats("llmpricing:")
|
|
133
|
+
}
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
private
|
|
137
|
+
|
|
138
|
+
def mutex
|
|
139
|
+
@mutex ||= Mutex.new
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# ============================================================
|
|
143
|
+
# Bulk fetch with two-layer cache
|
|
144
|
+
# ============================================================
|
|
145
|
+
|
|
146
|
+
def fetch_bulk(source, url, &parser)
|
|
147
|
+
@bulk_cache ||= {}
|
|
148
|
+
@bulk_fetched_at ||= {}
|
|
149
|
+
|
|
150
|
+
# Layer 1: In-memory
|
|
151
|
+
if @bulk_cache[source] && !bulk_cache_expired?(source)
|
|
152
|
+
return @bulk_cache[source]
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Layer 2: Rails.cache
|
|
156
|
+
data = from_rails_cache("ruby_llm_agents:pricing:#{source}") do
|
|
157
|
+
raw_fetch(url, &parser)
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
mutex.synchronize do
|
|
161
|
+
@bulk_cache[source] = data
|
|
162
|
+
@bulk_fetched_at[source] = Time.now
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
data
|
|
166
|
+
rescue => e
|
|
167
|
+
warn "[RubyLLM::Agents::Pricing] Failed to fetch #{source}: #{e.message}"
|
|
168
|
+
mutex.synchronize { @bulk_cache[source] = nil }
|
|
169
|
+
nil
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
# ============================================================
|
|
173
|
+
# Per-model fetch with two-layer cache
|
|
174
|
+
# ============================================================
|
|
175
|
+
|
|
176
|
+
def fetch_per_model(cache_key, url)
|
|
177
|
+
@per_model_cache ||= {}
|
|
178
|
+
@per_model_fetched_at ||= {}
|
|
179
|
+
|
|
180
|
+
# Layer 1: In-memory
|
|
181
|
+
if @per_model_cache.key?(cache_key) && !per_model_cache_expired?(cache_key)
|
|
182
|
+
return @per_model_cache[cache_key]
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# Layer 2: Rails.cache
|
|
186
|
+
data = from_rails_cache("ruby_llm_agents:pricing:#{cache_key}") do
|
|
187
|
+
raw_fetch(url) { |body| JSON.parse(body) }
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
mutex.synchronize do
|
|
191
|
+
@per_model_cache[cache_key] = data
|
|
192
|
+
@per_model_fetched_at[cache_key] = Time.now
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
data
|
|
196
|
+
rescue => e
|
|
197
|
+
warn "[RubyLLM::Agents::Pricing] Failed to fetch #{cache_key}: #{e.message}"
|
|
198
|
+
nil
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
# ============================================================
|
|
202
|
+
# HTTP fetch
|
|
203
|
+
# ============================================================
|
|
204
|
+
|
|
205
|
+
def raw_fetch(url)
|
|
206
|
+
uri = URI(url)
|
|
207
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
208
|
+
http.use_ssl = uri.scheme == "https"
|
|
209
|
+
http.open_timeout = 5
|
|
210
|
+
http.read_timeout = 15
|
|
211
|
+
|
|
212
|
+
request = Net::HTTP::Get.new(uri)
|
|
213
|
+
request["Accept"] = "application/json"
|
|
214
|
+
response = http.request(request)
|
|
215
|
+
|
|
216
|
+
return nil unless response.is_a?(Net::HTTPSuccess)
|
|
217
|
+
|
|
218
|
+
if block_given?
|
|
219
|
+
yield response.body
|
|
220
|
+
else
|
|
221
|
+
JSON.parse(response.body)
|
|
222
|
+
end
|
|
223
|
+
rescue => e
|
|
224
|
+
warn "[RubyLLM::Agents::Pricing] HTTP error: #{e.message}"
|
|
225
|
+
nil
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
# ============================================================
|
|
229
|
+
# Rails.cache layer
|
|
230
|
+
# ============================================================
|
|
231
|
+
|
|
232
|
+
def from_rails_cache(key)
|
|
233
|
+
if rails_cache_available?
|
|
234
|
+
Rails.cache.fetch(key, expires_in: cache_ttl) { yield }
|
|
235
|
+
else
|
|
236
|
+
yield
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
def rails_cache_available?
|
|
241
|
+
defined?(Rails) && Rails.respond_to?(:cache) && Rails.cache
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
# ============================================================
|
|
245
|
+
# Cache expiration
|
|
246
|
+
# ============================================================
|
|
247
|
+
|
|
248
|
+
def bulk_cache_expired?(source)
|
|
249
|
+
fetched_at = @bulk_fetched_at&.dig(source)
|
|
250
|
+
return true unless fetched_at
|
|
251
|
+
Time.now - fetched_at > cache_ttl
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
def per_model_cache_expired?(cache_key)
|
|
255
|
+
fetched_at = @per_model_fetched_at&.dig(cache_key)
|
|
256
|
+
return true unless fetched_at
|
|
257
|
+
Time.now - fetched_at > cache_ttl
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
def cache_ttl
|
|
261
|
+
cfg = config
|
|
262
|
+
ttl = cfg.respond_to?(:pricing_cache_ttl) && cfg.pricing_cache_ttl
|
|
263
|
+
ttl ||= cfg.respond_to?(:litellm_pricing_cache_ttl) && cfg.litellm_pricing_cache_ttl
|
|
264
|
+
return DEFAULT_CACHE_TTL unless ttl
|
|
265
|
+
ttl.respond_to?(:to_i) ? ttl.to_i : DEFAULT_CACHE_TTL
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
# ============================================================
|
|
269
|
+
# URL helpers
|
|
270
|
+
# ============================================================
|
|
271
|
+
|
|
272
|
+
def litellm_url
|
|
273
|
+
cfg = config
|
|
274
|
+
(cfg.respond_to?(:litellm_pricing_url) && cfg.litellm_pricing_url) || LITELLM_URL
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
def openrouter_url
|
|
278
|
+
cfg = config
|
|
279
|
+
(cfg.respond_to?(:openrouter_pricing_url) && cfg.openrouter_pricing_url) || OPENROUTER_URL
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
def helicone_url
|
|
283
|
+
cfg = config
|
|
284
|
+
(cfg.respond_to?(:helicone_pricing_url) && cfg.helicone_pricing_url) || HELICONE_URL
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
def portkey_base_url
|
|
288
|
+
cfg = config
|
|
289
|
+
(cfg.respond_to?(:portkey_pricing_url) && cfg.portkey_pricing_url) || PORTKEY_BASE_URL
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
def llmpricing_base_url
|
|
293
|
+
cfg = config
|
|
294
|
+
(cfg.respond_to?(:llmpricing_url) && cfg.llmpricing_url) || LLMPRICING_BASE_URL
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
def source_enabled?(source)
|
|
298
|
+
cfg = config
|
|
299
|
+
method_name = :"#{source}_pricing_enabled"
|
|
300
|
+
return true unless cfg.respond_to?(method_name)
|
|
301
|
+
cfg.send(method_name) != false
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
def uri_encode(str)
|
|
305
|
+
URI.encode_www_form_component(str.to_s)
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
# ============================================================
|
|
309
|
+
# Stats helpers
|
|
310
|
+
# ============================================================
|
|
311
|
+
|
|
312
|
+
def bulk_stats(source)
|
|
313
|
+
data = @bulk_cache&.dig(source)
|
|
314
|
+
{
|
|
315
|
+
fetched_at: @bulk_fetched_at&.dig(source),
|
|
316
|
+
size: if data.is_a?(Hash)
|
|
317
|
+
data.size
|
|
318
|
+
else
|
|
319
|
+
(data.is_a?(Array) ? data.size : 0)
|
|
320
|
+
end,
|
|
321
|
+
cached: !data.nil?
|
|
322
|
+
}
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
def per_model_stats(prefix)
|
|
326
|
+
entries = (@per_model_cache || {}).select { |k, _| k.start_with?(prefix) }
|
|
327
|
+
{
|
|
328
|
+
cached_models: entries.size,
|
|
329
|
+
keys: entries.keys
|
|
330
|
+
}
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
def config
|
|
334
|
+
RubyLLM::Agents.configuration
|
|
335
|
+
end
|
|
336
|
+
end
|
|
337
|
+
end
|
|
338
|
+
end
|
|
339
|
+
end
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Agents
|
|
5
|
+
module Pricing
|
|
6
|
+
# Normalizes Helicone bulk cost list into the common pricing format.
|
|
7
|
+
#
|
|
8
|
+
# Helicone prices are **per 1M tokens**. This adapter converts to
|
|
9
|
+
# per-token for consistency.
|
|
10
|
+
#
|
|
11
|
+
# Coverage: 172 text LLM models, some realtime audio models.
|
|
12
|
+
# No transcription, TTS, image, or embedding models.
|
|
13
|
+
#
|
|
14
|
+
# @example
|
|
15
|
+
# HeliconeAdapter.find_model("gpt-4o")
|
|
16
|
+
# # => { input_cost_per_token: 0.0000025, output_cost_per_token: 0.00001, source: :helicone }
|
|
17
|
+
#
|
|
18
|
+
module HeliconeAdapter
|
|
19
|
+
extend self
|
|
20
|
+
|
|
21
|
+
# Find and normalize pricing for a model
|
|
22
|
+
#
|
|
23
|
+
# @param model_id [String] The model identifier
|
|
24
|
+
# @return [Hash, nil] Normalized pricing hash or nil
|
|
25
|
+
def find_model(model_id)
|
|
26
|
+
data = DataStore.helicone_data
|
|
27
|
+
return nil unless data.is_a?(Array) && data.any?
|
|
28
|
+
|
|
29
|
+
entry = find_matching(data, model_id)
|
|
30
|
+
return nil unless entry
|
|
31
|
+
|
|
32
|
+
normalize(entry)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
private
|
|
36
|
+
|
|
37
|
+
def find_matching(data, model_id)
|
|
38
|
+
normalized = model_id.to_s.downcase
|
|
39
|
+
|
|
40
|
+
# Exact match on model field
|
|
41
|
+
entry = data.find { |e| e["model"]&.downcase == normalized }
|
|
42
|
+
return entry if entry
|
|
43
|
+
|
|
44
|
+
# Try without provider prefix
|
|
45
|
+
entry = data.find do |e|
|
|
46
|
+
model_name = e["model"].to_s.downcase
|
|
47
|
+
model_name == normalized || model_name.end_with?("/#{normalized}")
|
|
48
|
+
end
|
|
49
|
+
return entry if entry
|
|
50
|
+
|
|
51
|
+
# Fuzzy: model field contains the normalized ID
|
|
52
|
+
data.find do |e|
|
|
53
|
+
e["model"].to_s.downcase.include?(normalized)
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def normalize(entry)
|
|
58
|
+
result = {source: :helicone}
|
|
59
|
+
|
|
60
|
+
# Per-1M-token → per-token
|
|
61
|
+
if (input_1m = safe_number(entry["input_cost_per_1m"]))
|
|
62
|
+
result[:input_cost_per_token] = input_1m / 1_000_000.0
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
if (output_1m = safe_number(entry["output_cost_per_1m"]))
|
|
66
|
+
result[:output_cost_per_token] = output_1m / 1_000_000.0
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Audio tokens (realtime models)
|
|
70
|
+
if (audio_in = safe_number(entry["prompt_audio_per_1m"]))
|
|
71
|
+
result[:input_cost_per_audio_token] = audio_in / 1_000_000.0
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
if (audio_out = safe_number(entry["completion_audio_per_1m"]))
|
|
75
|
+
result[:output_cost_per_audio_token] = audio_out / 1_000_000.0
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
(result.keys.size > 1) ? result : nil
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def safe_number(value)
|
|
82
|
+
return nil unless value.is_a?(Numeric) && value.positive?
|
|
83
|
+
value
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Agents
|
|
5
|
+
module Pricing
|
|
6
|
+
# Normalizes LiteLLM bulk JSON into the common pricing format.
|
|
7
|
+
#
|
|
8
|
+
# Supports all model types:
|
|
9
|
+
# - Text LLM: input_cost_per_token, output_cost_per_token
|
|
10
|
+
# - Transcription: input_cost_per_second, input_cost_per_audio_token
|
|
11
|
+
# - TTS/Speech: input_cost_per_character, output_cost_per_character
|
|
12
|
+
# - Image: input_cost_per_image, input_cost_per_pixel
|
|
13
|
+
# - Embedding: input_cost_per_token (with mode: "embedding")
|
|
14
|
+
#
|
|
15
|
+
# @example
|
|
16
|
+
# LiteLLMAdapter.find_model("whisper-1")
|
|
17
|
+
# # => { input_cost_per_second: 0.0001, mode: "audio_transcription", source: :litellm }
|
|
18
|
+
#
|
|
19
|
+
module LiteLLMAdapter
|
|
20
|
+
extend self
|
|
21
|
+
|
|
22
|
+
# Find and normalize pricing for a model
|
|
23
|
+
#
|
|
24
|
+
# @param model_id [String] The model identifier
|
|
25
|
+
# @return [Hash, nil] Normalized pricing hash or nil
|
|
26
|
+
def find_model(model_id)
|
|
27
|
+
data = DataStore.litellm_data
|
|
28
|
+
return nil unless data.is_a?(Hash) && data.any?
|
|
29
|
+
|
|
30
|
+
model_data = find_by_candidates(data, model_id)
|
|
31
|
+
return nil unless model_data
|
|
32
|
+
|
|
33
|
+
normalize(model_data)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
private
|
|
37
|
+
|
|
38
|
+
def find_by_candidates(data, model_id)
|
|
39
|
+
normalized = normalize_model_id(model_id)
|
|
40
|
+
|
|
41
|
+
# Exact and prefix candidate keys
|
|
42
|
+
candidates = [
|
|
43
|
+
model_id,
|
|
44
|
+
normalized,
|
|
45
|
+
"audio_transcription/#{model_id}",
|
|
46
|
+
"tts/#{model_id}",
|
|
47
|
+
"openai/#{model_id}",
|
|
48
|
+
"elevenlabs/#{model_id}",
|
|
49
|
+
"whisper/#{model_id}"
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
candidates.each do |key|
|
|
53
|
+
return data[key] if data[key].is_a?(Hash)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Fuzzy match: find keys containing the normalized model ID
|
|
57
|
+
normalized_lower = normalized.downcase
|
|
58
|
+
data.each do |key, value|
|
|
59
|
+
next unless value.is_a?(Hash)
|
|
60
|
+
key_lower = key.to_s.downcase
|
|
61
|
+
|
|
62
|
+
if key_lower.include?(normalized_lower) || normalized_lower.include?(key_lower.split("/").last.to_s)
|
|
63
|
+
return value
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
nil
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def normalize(raw)
|
|
71
|
+
result = {source: :litellm}
|
|
72
|
+
|
|
73
|
+
# Text LLM / Embedding
|
|
74
|
+
result[:input_cost_per_token] = raw["input_cost_per_token"] if raw["input_cost_per_token"]
|
|
75
|
+
result[:output_cost_per_token] = raw["output_cost_per_token"] if raw["output_cost_per_token"]
|
|
76
|
+
|
|
77
|
+
# Transcription
|
|
78
|
+
result[:input_cost_per_second] = raw["input_cost_per_second"] if raw["input_cost_per_second"]
|
|
79
|
+
result[:input_cost_per_audio_token] = raw["input_cost_per_audio_token"] if raw["input_cost_per_audio_token"]
|
|
80
|
+
|
|
81
|
+
# TTS / Speech
|
|
82
|
+
result[:input_cost_per_character] = raw["input_cost_per_character"] if raw["input_cost_per_character"]
|
|
83
|
+
result[:output_cost_per_character] = raw["output_cost_per_character"] if raw["output_cost_per_character"]
|
|
84
|
+
result[:output_cost_per_audio_token] = raw["output_cost_per_audio_token"] if raw["output_cost_per_audio_token"]
|
|
85
|
+
|
|
86
|
+
# Image
|
|
87
|
+
result[:input_cost_per_image] = raw["input_cost_per_image"] if raw["input_cost_per_image"]
|
|
88
|
+
result[:input_cost_per_pixel] = raw["input_cost_per_pixel"] if raw["input_cost_per_pixel"]
|
|
89
|
+
result[:input_cost_per_image_hd] = raw["input_cost_per_image_hd"] if raw["input_cost_per_image_hd"]
|
|
90
|
+
|
|
91
|
+
# Metadata
|
|
92
|
+
result[:mode] = raw["mode"] if raw["mode"]
|
|
93
|
+
|
|
94
|
+
result
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def normalize_model_id(model_id)
|
|
98
|
+
model_id.to_s.downcase
|
|
99
|
+
.gsub(/[^a-z0-9._\/-]/, "-").squeeze("-")
|
|
100
|
+
.gsub(/^-|-$/, "")
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Agents
|
|
5
|
+
module Pricing
|
|
6
|
+
# Normalizes LLM Pricing AI per-model data into the common pricing format.
|
|
7
|
+
#
|
|
8
|
+
# This API returns **calculated costs** for a given token count, not raw rates.
|
|
9
|
+
# We query with 1M tokens to derive per-token rates.
|
|
10
|
+
#
|
|
11
|
+
# Coverage: ~79 models across 4 providers (OpenAI, Anthropic, Groq, Mistral).
|
|
12
|
+
# Text LLM only — no transcription, TTS, image, or embedding.
|
|
13
|
+
#
|
|
14
|
+
# @example
|
|
15
|
+
# LLMPricingAdapter.find_model("gpt-4o")
|
|
16
|
+
# # => { input_cost_per_token: 0.0000025, output_cost_per_token: 0.00001, source: :llmpricing }
|
|
17
|
+
#
|
|
18
|
+
module LLMPricingAdapter
|
|
19
|
+
extend self
|
|
20
|
+
|
|
21
|
+
PROVIDER_MAP = [
|
|
22
|
+
[/^(gpt-|o1|o3|o4|whisper|dall-e|tts-|chatgpt)/, "OpenAI"],
|
|
23
|
+
[/^claude/, "Anthropic"],
|
|
24
|
+
[/^(mixtral|mistral|codestral|pixtral|ministral)/, "Mistral"],
|
|
25
|
+
[/^(gemma|llama)/, "Groq"]
|
|
26
|
+
].freeze
|
|
27
|
+
|
|
28
|
+
QUERY_TOKENS = 1_000_000
|
|
29
|
+
|
|
30
|
+
# Find and normalize pricing for a model
|
|
31
|
+
#
|
|
32
|
+
# @param model_id [String] The model identifier
|
|
33
|
+
# @return [Hash, nil] Normalized pricing hash or nil
|
|
34
|
+
def find_model(model_id)
|
|
35
|
+
provider = resolve_provider(model_id)
|
|
36
|
+
return nil unless provider
|
|
37
|
+
|
|
38
|
+
raw = DataStore.llmpricing_data(provider, model_id, QUERY_TOKENS, QUERY_TOKENS)
|
|
39
|
+
return nil unless raw.is_a?(Hash)
|
|
40
|
+
return nil unless raw["input_cost"].is_a?(Numeric) && raw["input_cost"].positive?
|
|
41
|
+
|
|
42
|
+
normalize(raw)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
private
|
|
46
|
+
|
|
47
|
+
def resolve_provider(model_id)
|
|
48
|
+
id = model_id.to_s.downcase
|
|
49
|
+
|
|
50
|
+
PROVIDER_MAP.each do |pattern, provider|
|
|
51
|
+
return provider if id.match?(pattern)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
nil
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def normalize(raw)
|
|
58
|
+
result = {source: :llmpricing}
|
|
59
|
+
|
|
60
|
+
if raw["input_cost"].is_a?(Numeric)
|
|
61
|
+
result[:input_cost_per_token] = raw["input_cost"] / QUERY_TOKENS.to_f
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
if raw["output_cost"].is_a?(Numeric) && raw["output_cost"].positive?
|
|
65
|
+
result[:output_cost_per_token] = raw["output_cost"] / QUERY_TOKENS.to_f
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
result
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Agents
|
|
5
|
+
module Pricing
|
|
6
|
+
# Normalizes OpenRouter bulk model list into the common pricing format.
|
|
7
|
+
#
|
|
8
|
+
# OpenRouter prices are **strings** representing USD per token.
|
|
9
|
+
# This adapter converts them to Float.
|
|
10
|
+
#
|
|
11
|
+
# Coverage: 400+ text LLM models, some audio-capable chat models.
|
|
12
|
+
# No transcription, image generation, or embedding models.
|
|
13
|
+
#
|
|
14
|
+
# @example
|
|
15
|
+
# OpenRouterAdapter.find_model("openai/gpt-4o")
|
|
16
|
+
# # => { input_cost_per_token: 0.0000025, output_cost_per_token: 0.00001, source: :openrouter }
|
|
17
|
+
#
|
|
18
|
+
module OpenRouterAdapter
|
|
19
|
+
extend self
|
|
20
|
+
|
|
21
|
+
# Find and normalize pricing for a model
|
|
22
|
+
#
|
|
23
|
+
# @param model_id [String] The model identifier
|
|
24
|
+
# @return [Hash, nil] Normalized pricing hash or nil
|
|
25
|
+
def find_model(model_id)
|
|
26
|
+
models = DataStore.openrouter_data
|
|
27
|
+
return nil unless models.is_a?(Array) && models.any?
|
|
28
|
+
|
|
29
|
+
entry = find_by_id(models, model_id)
|
|
30
|
+
return nil unless entry
|
|
31
|
+
|
|
32
|
+
normalize(entry)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
private
|
|
36
|
+
|
|
37
|
+
def find_by_id(models, model_id)
|
|
38
|
+
normalized = model_id.to_s.downcase
|
|
39
|
+
|
|
40
|
+
# Exact match by id field
|
|
41
|
+
entry = models.find { |m| m["id"]&.downcase == normalized }
|
|
42
|
+
return entry if entry
|
|
43
|
+
|
|
44
|
+
# Try without provider prefix (e.g., "gpt-4o" matches "openai/gpt-4o")
|
|
45
|
+
entry = models.find do |m|
|
|
46
|
+
id = m["id"].to_s.downcase
|
|
47
|
+
id.end_with?("/#{normalized}") || id == normalized
|
|
48
|
+
end
|
|
49
|
+
return entry if entry
|
|
50
|
+
|
|
51
|
+
# Try with common provider prefixes
|
|
52
|
+
prefixes = %w[openai anthropic google meta-llama mistralai cohere deepseek]
|
|
53
|
+
prefixes.each do |prefix|
|
|
54
|
+
entry = models.find { |m| m["id"]&.downcase == "#{prefix}/#{normalized}" }
|
|
55
|
+
return entry if entry
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
nil
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def normalize(entry)
|
|
62
|
+
pricing = entry["pricing"]
|
|
63
|
+
return nil unless pricing.is_a?(Hash)
|
|
64
|
+
|
|
65
|
+
result = {source: :openrouter}
|
|
66
|
+
|
|
67
|
+
prompt_cost = safe_float(pricing["prompt"])
|
|
68
|
+
completion_cost = safe_float(pricing["completion"])
|
|
69
|
+
|
|
70
|
+
result[:input_cost_per_token] = prompt_cost if prompt_cost&.positive?
|
|
71
|
+
result[:output_cost_per_token] = completion_cost if completion_cost&.positive?
|
|
72
|
+
|
|
73
|
+
if pricing["image"]
|
|
74
|
+
image_cost = safe_float(pricing["image"])
|
|
75
|
+
result[:image_cost_raw] = image_cost if image_cost&.positive?
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
(result.keys.size > 1) ? result : nil
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def safe_float(value)
|
|
82
|
+
return nil if value.nil?
|
|
83
|
+
Float(value)
|
|
84
|
+
rescue ArgumentError, TypeError
|
|
85
|
+
nil
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|