open_router_enhanced 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.env.example +1 -0
- data/.rspec +3 -0
- data/.rubocop.yml +13 -0
- data/.rubocop_todo.yml +130 -0
- data/.ruby-version +1 -0
- data/CHANGELOG.md +41 -0
- data/CODE_OF_CONDUCT.md +84 -0
- data/CONTRIBUTING.md +384 -0
- data/Gemfile +22 -0
- data/Gemfile.lock +138 -0
- data/LICENSE.txt +21 -0
- data/MIGRATION.md +556 -0
- data/README.md +1660 -0
- data/Rakefile +334 -0
- data/SECURITY.md +150 -0
- data/VCR_CONFIGURATION.md +80 -0
- data/docs/model_selection.md +637 -0
- data/docs/observability.md +430 -0
- data/docs/prompt_templates.md +422 -0
- data/docs/streaming.md +467 -0
- data/docs/structured_outputs.md +466 -0
- data/docs/tools.md +1016 -0
- data/examples/basic_completion.rb +122 -0
- data/examples/model_selection_example.rb +141 -0
- data/examples/observability_example.rb +199 -0
- data/examples/prompt_template_example.rb +184 -0
- data/examples/smart_completion_example.rb +89 -0
- data/examples/streaming_example.rb +176 -0
- data/examples/structured_outputs_example.rb +191 -0
- data/examples/tool_calling_example.rb +149 -0
- data/lib/open_router/client.rb +552 -0
- data/lib/open_router/http.rb +118 -0
- data/lib/open_router/json_healer.rb +263 -0
- data/lib/open_router/model_registry.rb +378 -0
- data/lib/open_router/model_selector.rb +462 -0
- data/lib/open_router/prompt_template.rb +290 -0
- data/lib/open_router/response.rb +371 -0
- data/lib/open_router/schema.rb +288 -0
- data/lib/open_router/streaming_client.rb +210 -0
- data/lib/open_router/tool.rb +221 -0
- data/lib/open_router/tool_call.rb +180 -0
- data/lib/open_router/usage_tracker.rb +277 -0
- data/lib/open_router/version.rb +5 -0
- data/lib/open_router.rb +123 -0
- data/sig/open_router.rbs +20 -0
- metadata +186 -0
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module OpenRouter
|
|
6
|
+
# A dedicated class for extracting, cleaning, and healing malformed JSON
|
|
7
|
+
# responses from language models.
|
|
8
|
+
class JsonHealer
|
|
9
|
+
# Regex to find a JSON object or array within a markdown code block.
|
|
10
|
+
# It handles optional "json" language identifier. Non-greedy `.*?` is key.
|
|
11
|
+
CODE_BLOCK_JSON_REGEX = /```(?:json)?\s*\n?(.*?)\n?```/m
|
|
12
|
+
|
|
13
|
+
# Regex to find JSON that isn't in a code block. Looks for the first
|
|
14
|
+
# `{` or `[` and captures until the matching last `}` or `]`. This is
|
|
15
|
+
# a heuristic and might not be perfect for all cases.
|
|
16
|
+
LOOSE_JSON_REGEX = /(\{.*\}|\[.*\])/m
|
|
17
|
+
|
|
18
|
+
def initialize(client)
|
|
19
|
+
@client = client
|
|
20
|
+
@configuration = client.configuration
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Enhanced heal method that supports different healing contexts
|
|
24
|
+
def heal(raw_text, schema, context: :generic)
|
|
25
|
+
candidate_json = extract_json_candidate(raw_text)
|
|
26
|
+
raise StructuredOutputError, "No JSON-like content found in the response." if candidate_json.nil?
|
|
27
|
+
|
|
28
|
+
attempts = 0
|
|
29
|
+
max_attempts = @configuration.max_heal_attempts
|
|
30
|
+
original_content = raw_text # Keep track of original for forced extraction context
|
|
31
|
+
all_errors = [] # Track all errors encountered during healing
|
|
32
|
+
|
|
33
|
+
loop do
|
|
34
|
+
# Attempt to parse after simple cleanup
|
|
35
|
+
parsed_json = JSON.parse(cleanup_syntax(candidate_json))
|
|
36
|
+
|
|
37
|
+
# If parsing succeeds, validate against the schema
|
|
38
|
+
if schema.validation_available? && !schema.validate(parsed_json)
|
|
39
|
+
errors = schema.validation_errors(parsed_json)
|
|
40
|
+
raise SchemaValidationError, "Schema validation failed: #{errors.join(", ")}"
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
return parsed_json # Success!
|
|
44
|
+
rescue JSON::ParserError, SchemaValidationError => e
|
|
45
|
+
attempts += 1
|
|
46
|
+
all_errors << e.message
|
|
47
|
+
|
|
48
|
+
if attempts > max_attempts
|
|
49
|
+
final_error_message = build_final_error_message(e, schema, candidate_json, max_attempts)
|
|
50
|
+
raise StructuredOutputError, final_error_message
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Escalate to LLM-based healing with proper context
|
|
54
|
+
candidate_json = fix_with_healer_model(candidate_json, schema, e.message, e.class, original_content, context)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
private
|
|
59
|
+
|
|
60
|
+
# Stage 1: Intelligently extract the JSON string from raw text.
|
|
61
|
+
def extract_json_candidate(text)
|
|
62
|
+
# 1. Prioritize markdown code blocks, as they are the most explicit.
|
|
63
|
+
match = text.match(CODE_BLOCK_JSON_REGEX)
|
|
64
|
+
return match[1].strip if match
|
|
65
|
+
|
|
66
|
+
# 2. If no code block, look for the text after a "JSON:" label.
|
|
67
|
+
# This handles "Here is the JSON: {...}"
|
|
68
|
+
text_after_colon = text.split(/json:/i).last
|
|
69
|
+
return text_after_colon.strip if text_after_colon && text_after_colon.length < text.length
|
|
70
|
+
|
|
71
|
+
# 3. As a fallback, try to find the first balanced JSON-like structure.
|
|
72
|
+
match = text.match(LOOSE_JSON_REGEX)
|
|
73
|
+
return match[1].strip if match
|
|
74
|
+
|
|
75
|
+
# 4. If nothing else works, check if the whole text looks like JSON before using it.
|
|
76
|
+
trimmed = text.strip
|
|
77
|
+
return trimmed if trimmed.start_with?("{", "[")
|
|
78
|
+
|
|
79
|
+
# 5. No JSON-like content found
|
|
80
|
+
nil
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Stage 2: Perform simple, deterministic syntax cleanup.
|
|
84
|
+
def cleanup_syntax(json_string)
|
|
85
|
+
# Remove trailing commas from objects and arrays, a very common LLM error.
|
|
86
|
+
json_string
|
|
87
|
+
.gsub(/,\s*(\}|\])/, '\1') # Remove trailing commas: ",}" -> "}" and ",]" -> "]"
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Stage 4: Use an LLM to fix the broken JSON.
|
|
91
|
+
def fix_with_healer_model(broken_json, schema, error_reason, error_class, original_content, context)
|
|
92
|
+
healer_model = @configuration.healer_model
|
|
93
|
+
prompt = build_healing_prompt(broken_json, schema, error_reason, error_class, original_content, context)
|
|
94
|
+
|
|
95
|
+
# Trigger on_healing callback with healing context
|
|
96
|
+
if @client.respond_to?(:trigger_callbacks)
|
|
97
|
+
@client.trigger_callbacks(:on_healing, {
|
|
98
|
+
broken_json: broken_json,
|
|
99
|
+
error: error_reason,
|
|
100
|
+
schema: schema,
|
|
101
|
+
healer_model: healer_model,
|
|
102
|
+
context: context
|
|
103
|
+
})
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
healing_response = @client.complete(
|
|
107
|
+
[{ role: "user", content: prompt }],
|
|
108
|
+
model: healer_model,
|
|
109
|
+
extras: { temperature: 0.0, max_tokens: 4000 }
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
# The healer's response is now our new best candidate.
|
|
113
|
+
# We extract it again in case the healer also added fluff.
|
|
114
|
+
healed_json = extract_json_candidate(healing_response.content)
|
|
115
|
+
|
|
116
|
+
# Trigger callback with healing result
|
|
117
|
+
if @client.respond_to?(:trigger_callbacks)
|
|
118
|
+
@client.trigger_callbacks(:on_healing, {
|
|
119
|
+
healed: true,
|
|
120
|
+
original: broken_json,
|
|
121
|
+
result: healed_json
|
|
122
|
+
})
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
healed_json
|
|
126
|
+
rescue StandardError => e
|
|
127
|
+
# If the healing call itself fails, we can't proceed.
|
|
128
|
+
# Return the original broken content to let the loop fail naturally.
|
|
129
|
+
warn "[OpenRouter Warning] JSON healing request failed: #{e.message}"
|
|
130
|
+
|
|
131
|
+
# Trigger callback for failed healing
|
|
132
|
+
if @client.respond_to?(:trigger_callbacks)
|
|
133
|
+
@client.trigger_callbacks(:on_healing, {
|
|
134
|
+
healed: false,
|
|
135
|
+
error: e.message,
|
|
136
|
+
original: broken_json
|
|
137
|
+
})
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
broken_json
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def build_healing_prompt(content, schema, error_reason, error_class, original_content, context)
|
|
144
|
+
# Use schema.to_h instead of pure_schema for consistency with existing tests
|
|
145
|
+
schema_json = schema.respond_to?(:to_h) ? schema.to_h.to_json : schema.to_json
|
|
146
|
+
|
|
147
|
+
case error_class.name
|
|
148
|
+
when "JSON::ParserError"
|
|
149
|
+
build_json_parsing_prompt(content, error_reason)
|
|
150
|
+
when "OpenRouter::SchemaValidationError"
|
|
151
|
+
if forced_extraction_context?(context, original_content, content)
|
|
152
|
+
build_forced_extraction_prompt(original_content, schema_json, error_reason)
|
|
153
|
+
else
|
|
154
|
+
build_schema_validation_prompt(content, schema_json, error_reason)
|
|
155
|
+
end
|
|
156
|
+
else
|
|
157
|
+
build_generic_prompt(content, schema_json, error_reason)
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def build_json_parsing_prompt(content, error_reason)
|
|
162
|
+
<<~PROMPT
|
|
163
|
+
Invalid JSON: #{error_reason}
|
|
164
|
+
|
|
165
|
+
Content to fix:
|
|
166
|
+
#{content}
|
|
167
|
+
|
|
168
|
+
Please fix this content to be valid JSON. Return ONLY the fixed JSON, no explanations or additional text.
|
|
169
|
+
PROMPT
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def build_schema_validation_prompt(content, schema_json, error_reason)
|
|
173
|
+
<<~PROMPT
|
|
174
|
+
The following JSON content is invalid because it failed to validate against the provided JSON Schema.
|
|
175
|
+
|
|
176
|
+
Validation Errors:
|
|
177
|
+
#{error_reason}
|
|
178
|
+
|
|
179
|
+
Original Content to Fix:
|
|
180
|
+
```json
|
|
181
|
+
#{content}
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
Required JSON Schema:
|
|
185
|
+
```json
|
|
186
|
+
#{schema_json}
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
Please correct the content to produce a valid JSON object that strictly conforms to the schema.
|
|
190
|
+
Return ONLY the fixed, raw JSON object, without any surrounding text or explanations.
|
|
191
|
+
PROMPT
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def build_forced_extraction_prompt(original_content, schema_json, error_reason)
|
|
195
|
+
<<~PROMPT
|
|
196
|
+
The following response contains explanatory text and JSON that needs to be extracted and fixed to conform to the provided schema.
|
|
197
|
+
|
|
198
|
+
Validation Errors:
|
|
199
|
+
#{error_reason}
|
|
200
|
+
|
|
201
|
+
Original Response Content:
|
|
202
|
+
#{original_content}
|
|
203
|
+
|
|
204
|
+
Required JSON Schema:
|
|
205
|
+
```json
|
|
206
|
+
#{schema_json}
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
Please extract and correct the JSON from the response above to produce a valid JSON object that strictly conforms to the schema.
|
|
210
|
+
Return ONLY the fixed, raw JSON object, without any surrounding text or explanations.
|
|
211
|
+
PROMPT
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
def build_generic_prompt(content, schema_json, error_reason)
|
|
215
|
+
<<~PROMPT
|
|
216
|
+
You are an expert JSON fixing bot. Your task is to correct a malformed JSON string so that it becomes syntactically valid AND conforms to a given JSON Schema.
|
|
217
|
+
|
|
218
|
+
The user's JSON is invalid for the following reason:
|
|
219
|
+
#{error_reason}
|
|
220
|
+
|
|
221
|
+
Here is the malformed JSON content to fix:
|
|
222
|
+
```
|
|
223
|
+
#{content}
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
It MUST be corrected to strictly conform to the following JSON Schema:
|
|
227
|
+
```json
|
|
228
|
+
#{schema_json}
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
CRITICAL INSTRUCTIONS:
|
|
232
|
+
1. Analyze the error, the broken JSON, and the schema.
|
|
233
|
+
2. Correct the JSON so it is syntactically perfect and valid against the schema.
|
|
234
|
+
3. Return ONLY the raw, corrected JSON object. Do not include any text, explanations, or markdown fences.
|
|
235
|
+
PROMPT
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
def forced_extraction_context?(context, original_content, content)
|
|
239
|
+
context == :forced_extraction ||
|
|
240
|
+
(original_content != content && (original_content.include?("```") || original_content.length > 200 || original_content.include?("\n")))
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
def build_final_error_message(error, schema, candidate_json, max_attempts)
|
|
244
|
+
base_message = "Failed to heal JSON after #{max_attempts} healing attempts. Last error: #{error.message}"
|
|
245
|
+
|
|
246
|
+
return base_message unless error.is_a?(SchemaValidationError) && schema.validation_available?
|
|
247
|
+
|
|
248
|
+
# For schema validation errors, include specific validation details
|
|
249
|
+
parsed_json_for_errors = safely_parse_json(candidate_json)
|
|
250
|
+
return base_message unless parsed_json_for_errors
|
|
251
|
+
|
|
252
|
+
validation_errors = schema.validation_errors(parsed_json_for_errors)
|
|
253
|
+
error_details = validation_errors.any? ? ". Last errors: #{validation_errors.join(", ")}" : ""
|
|
254
|
+
"#{base_message}#{error_details}"
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
def safely_parse_json(candidate_json)
|
|
258
|
+
JSON.parse(cleanup_syntax(candidate_json))
|
|
259
|
+
rescue StandardError
|
|
260
|
+
nil
|
|
261
|
+
end
|
|
262
|
+
end
|
|
263
|
+
end
|
|
@@ -0,0 +1,378 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "net/http"
|
|
5
|
+
require "uri"
|
|
6
|
+
require "tmpdir"
|
|
7
|
+
require "fileutils"
|
|
8
|
+
require "openssl"
|
|
9
|
+
|
|
10
|
+
module OpenRouter
|
|
11
|
+
class ModelRegistryError < Error; end
|
|
12
|
+
|
|
13
|
+
class ModelRegistry
|
|
14
|
+
API_BASE = "https://openrouter.ai/api/v1"
|
|
15
|
+
CACHE_DIR = File.join(Dir.tmpdir, "openrouter_cache")
|
|
16
|
+
CACHE_DATA_FILE = File.join(CACHE_DIR, "models_data.json")
|
|
17
|
+
CACHE_METADATA_FILE = File.join(CACHE_DIR, "cache_metadata.json")
|
|
18
|
+
MAX_CACHE_SIZE_MB = 50 # Maximum cache size in megabytes
|
|
19
|
+
|
|
20
|
+
class << self
|
|
21
|
+
# Fetch models from OpenRouter API
|
|
22
|
+
def fetch_models_from_api
|
|
23
|
+
uri = URI("#{API_BASE}/models")
|
|
24
|
+
|
|
25
|
+
# Use configurable timeout and SSL settings
|
|
26
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
27
|
+
http.use_ssl = true
|
|
28
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
|
|
29
|
+
http.read_timeout = OpenRouter.configuration.model_registry_timeout
|
|
30
|
+
http.open_timeout = OpenRouter.configuration.model_registry_timeout
|
|
31
|
+
|
|
32
|
+
request = Net::HTTP::Get.new(uri)
|
|
33
|
+
response = http.request(request)
|
|
34
|
+
|
|
35
|
+
unless response.code == "200"
|
|
36
|
+
raise ModelRegistryError,
|
|
37
|
+
"Failed to fetch models from OpenRouter API: #{response.message}"
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
JSON.parse(response.body)
|
|
41
|
+
rescue JSON::ParserError => e
|
|
42
|
+
raise ModelRegistryError, "Failed to parse OpenRouter API response: #{e.message}"
|
|
43
|
+
rescue StandardError => e
|
|
44
|
+
raise ModelRegistryError, "Network error fetching models: #{e.message}"
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Ensure cache directory exists and set up cleanup
|
|
48
|
+
def ensure_cache_dir
|
|
49
|
+
FileUtils.mkdir_p(CACHE_DIR) unless Dir.exist?(CACHE_DIR)
|
|
50
|
+
setup_cleanup_hook
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Check if cache is stale based on TTL
|
|
54
|
+
def cache_stale?
|
|
55
|
+
return true unless File.exist?(CACHE_METADATA_FILE)
|
|
56
|
+
|
|
57
|
+
begin
|
|
58
|
+
metadata = JSON.parse(File.read(CACHE_METADATA_FILE))
|
|
59
|
+
cache_time = metadata["cached_at"]
|
|
60
|
+
ttl = OpenRouter.configuration.cache_ttl
|
|
61
|
+
|
|
62
|
+
return true unless cache_time
|
|
63
|
+
|
|
64
|
+
Time.now.to_i - cache_time.to_i > ttl
|
|
65
|
+
rescue JSON::ParserError, StandardError
|
|
66
|
+
true # If we can't read metadata, consider cache stale
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Write cache with timestamp metadata
|
|
71
|
+
def write_cache_with_timestamp(models_data)
|
|
72
|
+
ensure_cache_dir
|
|
73
|
+
|
|
74
|
+
# Write the actual models data
|
|
75
|
+
File.write(CACHE_DATA_FILE, JSON.pretty_generate(models_data))
|
|
76
|
+
|
|
77
|
+
# Write metadata with timestamp
|
|
78
|
+
metadata = {
|
|
79
|
+
"cached_at" => Time.now.to_i,
|
|
80
|
+
"version" => "1.0",
|
|
81
|
+
"source" => "openrouter_api"
|
|
82
|
+
}
|
|
83
|
+
File.write(CACHE_METADATA_FILE, JSON.pretty_generate(metadata))
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Read cache only if it's fresh
|
|
87
|
+
def read_cache_if_fresh
|
|
88
|
+
return nil if cache_stale?
|
|
89
|
+
return nil unless File.exist?(CACHE_DATA_FILE)
|
|
90
|
+
|
|
91
|
+
JSON.parse(File.read(CACHE_DATA_FILE))
|
|
92
|
+
rescue JSON::ParserError
|
|
93
|
+
nil
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Clear local cache (both files and memory)
|
|
97
|
+
def clear_cache!
|
|
98
|
+
FileUtils.rm_rf(CACHE_DIR) if Dir.exist?(CACHE_DIR)
|
|
99
|
+
@processed_models = nil
|
|
100
|
+
@all_models = nil
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Refresh models data from API
|
|
104
|
+
def refresh!
|
|
105
|
+
clear_cache!
|
|
106
|
+
fetch_and_cache_models
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Get processed models (fetch if needed)
|
|
110
|
+
def fetch_and_cache_models
|
|
111
|
+
# Try cache first (only if fresh)
|
|
112
|
+
cached_data = read_cache_if_fresh
|
|
113
|
+
|
|
114
|
+
if cached_data
|
|
115
|
+
api_data = cached_data
|
|
116
|
+
else
|
|
117
|
+
# Cache is stale or doesn't exist, fetch from API
|
|
118
|
+
api_data = fetch_models_from_api
|
|
119
|
+
write_cache_with_timestamp(api_data)
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
@processed_models = process_api_models(api_data["data"])
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Find original API model data by model ID
|
|
126
|
+
def find_original_model_data(model_id)
|
|
127
|
+
# Get raw models data (not processed)
|
|
128
|
+
cached_data = read_cache_if_fresh
|
|
129
|
+
|
|
130
|
+
if cached_data
|
|
131
|
+
api_data = cached_data
|
|
132
|
+
else
|
|
133
|
+
api_data = fetch_models_from_api
|
|
134
|
+
write_cache_with_timestamp(api_data)
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
raw_models = api_data["data"] || []
|
|
138
|
+
raw_models.find { |model| model["id"] == model_id }
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Convert API model data to our internal format
|
|
142
|
+
def process_api_models(api_models)
|
|
143
|
+
models = {}
|
|
144
|
+
|
|
145
|
+
api_models.each do |model_data|
|
|
146
|
+
model_id = model_data["id"]
|
|
147
|
+
|
|
148
|
+
models[model_id] = {
|
|
149
|
+
name: model_data["name"],
|
|
150
|
+
cost_per_1k_tokens: {
|
|
151
|
+
input: model_data.dig("pricing", "prompt").to_f,
|
|
152
|
+
output: model_data.dig("pricing", "completion").to_f
|
|
153
|
+
},
|
|
154
|
+
context_length: model_data["context_length"],
|
|
155
|
+
capabilities: extract_capabilities(model_data),
|
|
156
|
+
description: model_data["description"],
|
|
157
|
+
supported_parameters: model_data["supported_parameters"] || [],
|
|
158
|
+
architecture: model_data["architecture"],
|
|
159
|
+
performance_tier: determine_performance_tier(model_data),
|
|
160
|
+
fallbacks: determine_fallbacks(model_id, model_data),
|
|
161
|
+
created_at: model_data["created"]
|
|
162
|
+
}
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
models
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Extract capabilities from model data
|
|
169
|
+
def extract_capabilities(model_data)
|
|
170
|
+
capabilities = [:chat] # All models support basic chat
|
|
171
|
+
|
|
172
|
+
# Check for function calling support
|
|
173
|
+
supported_params = model_data["supported_parameters"] || []
|
|
174
|
+
if supported_params.include?("tools") && supported_params.include?("tool_choice")
|
|
175
|
+
capabilities << :function_calling
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Check for structured output support
|
|
179
|
+
if supported_params.include?("structured_outputs") || supported_params.include?("response_format")
|
|
180
|
+
capabilities << :structured_outputs
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# Check for vision support
|
|
184
|
+
architecture = model_data["architecture"] || {}
|
|
185
|
+
input_modalities = architecture["input_modalities"] || []
|
|
186
|
+
capabilities << :vision if input_modalities.include?("image")
|
|
187
|
+
|
|
188
|
+
# Check for large context support
|
|
189
|
+
context_length = model_data["context_length"] || 0
|
|
190
|
+
capabilities << :long_context if context_length > 100_000
|
|
191
|
+
|
|
192
|
+
capabilities
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
# Determine performance tier based on pricing and capabilities
|
|
196
|
+
def determine_performance_tier(model_data)
|
|
197
|
+
input_cost = model_data.dig("pricing", "prompt").to_f
|
|
198
|
+
|
|
199
|
+
# Higher cost generally indicates premium models
|
|
200
|
+
# Note: pricing is per token, not per 1k tokens
|
|
201
|
+
if input_cost > 0.000001 # > $0.001 per 1k tokens (converted from per-token)
|
|
202
|
+
:premium
|
|
203
|
+
else
|
|
204
|
+
:standard
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
# Determine fallback models (simplified logic)
|
|
209
|
+
def determine_fallbacks(_model_id, _model_data)
|
|
210
|
+
# For now, return empty array - could be enhanced with smart fallback logic
|
|
211
|
+
[]
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
# Find the best model matching given requirements
|
|
215
|
+
def find_best_model(requirements = {})
|
|
216
|
+
candidates = models_meeting_requirements(requirements)
|
|
217
|
+
return nil if candidates.empty?
|
|
218
|
+
|
|
219
|
+
# If pick_newer is true, prefer newer models over cost
|
|
220
|
+
if requirements[:pick_newer]
|
|
221
|
+
candidates.max_by { |_, specs| specs[:created_at] }
|
|
222
|
+
else
|
|
223
|
+
# Sort by cost (cheapest first) as default strategy
|
|
224
|
+
candidates.min_by { |_, specs| calculate_model_cost(specs, requirements) }
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
# Get all models that meet requirements (without sorting)
|
|
229
|
+
def models_meeting_requirements(requirements = {})
|
|
230
|
+
all_models.select do |_model, specs|
|
|
231
|
+
meets_requirements?(specs, requirements)
|
|
232
|
+
end
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
# Get fallback models for a given model
|
|
236
|
+
def get_fallbacks(model)
|
|
237
|
+
model_info = get_model_info(model)
|
|
238
|
+
model_info ? model_info[:fallbacks] || [] : []
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
# Check if a model exists in the registry
|
|
242
|
+
def model_exists?(model)
|
|
243
|
+
all_models.key?(model)
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
# Check if a model has a specific capability
|
|
247
|
+
def has_capability?(model, capability)
|
|
248
|
+
model_info = get_model_info(model)
|
|
249
|
+
return false unless model_info
|
|
250
|
+
|
|
251
|
+
model_info[:capabilities].include?(capability)
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
# Get detailed information about a model
|
|
255
|
+
def get_model_info(model)
|
|
256
|
+
all_models[model]
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
# Get all registered models (fetch from API if needed)
|
|
260
|
+
def all_models
|
|
261
|
+
@all_models ||= fetch_and_cache_models
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
# Calculate estimated cost for a request
|
|
265
|
+
def calculate_estimated_cost(model, input_tokens: 0, output_tokens: 0)
|
|
266
|
+
model_info = get_model_info(model)
|
|
267
|
+
return 0 unless model_info
|
|
268
|
+
|
|
269
|
+
input_cost = (input_tokens / 1000.0) * model_info[:cost_per_1k_tokens][:input]
|
|
270
|
+
output_cost = (output_tokens / 1000.0) * model_info[:cost_per_1k_tokens][:output]
|
|
271
|
+
|
|
272
|
+
input_cost + output_cost
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
private
|
|
276
|
+
|
|
277
|
+
# Check if model specs meet the given requirements
|
|
278
|
+
def meets_requirements?(specs, requirements)
|
|
279
|
+
# Check capability requirements
|
|
280
|
+
if requirements[:capabilities]
|
|
281
|
+
required_caps = Array(requirements[:capabilities])
|
|
282
|
+
return false unless required_caps.all? { |cap| specs[:capabilities].include?(cap) }
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
# Check cost requirements
|
|
286
|
+
if requirements[:max_input_cost] && (specs[:cost_per_1k_tokens][:input] > requirements[:max_input_cost])
|
|
287
|
+
return false
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
if requirements[:max_output_cost] && (specs[:cost_per_1k_tokens][:output] > requirements[:max_output_cost])
|
|
291
|
+
return false
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
# Check context length requirements
|
|
295
|
+
if requirements[:min_context_length] && (specs[:context_length] < requirements[:min_context_length])
|
|
296
|
+
return false
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
# Check performance tier requirements
|
|
300
|
+
if requirements[:performance_tier]
|
|
301
|
+
required_tier = requirements[:performance_tier]
|
|
302
|
+
model_tier = specs[:performance_tier]
|
|
303
|
+
|
|
304
|
+
# Premium tier can satisfy premium or standard requirements
|
|
305
|
+
# Standard tier can only satisfy standard requirements
|
|
306
|
+
case required_tier
|
|
307
|
+
when :premium
|
|
308
|
+
return false unless model_tier == :premium
|
|
309
|
+
when :standard
|
|
310
|
+
return false unless %i[standard premium].include?(model_tier)
|
|
311
|
+
end
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
# Check released after date requirement
|
|
315
|
+
if requirements[:released_after_date]
|
|
316
|
+
required_date = requirements[:released_after_date]
|
|
317
|
+
model_timestamp = specs[:created_at]
|
|
318
|
+
|
|
319
|
+
# Convert date to timestamp if needed
|
|
320
|
+
required_timestamp = case required_date
|
|
321
|
+
when Date
|
|
322
|
+
required_date.to_time.to_i
|
|
323
|
+
when Time
|
|
324
|
+
required_date.to_i
|
|
325
|
+
when Integer
|
|
326
|
+
required_date
|
|
327
|
+
else
|
|
328
|
+
return false
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
return false if model_timestamp < required_timestamp
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
true
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
# Calculate the cost metric for sorting models
|
|
338
|
+
def calculate_model_cost(specs, _requirements)
|
|
339
|
+
# Simple cost calculation for sorting - could be made more sophisticated
|
|
340
|
+
# For now, just use input token cost as the primary metric
|
|
341
|
+
specs[:cost_per_1k_tokens][:input]
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
# Set up cleanup hook to manage cache size
|
|
345
|
+
def setup_cleanup_hook
|
|
346
|
+
return if @cleanup_hook_set
|
|
347
|
+
|
|
348
|
+
at_exit { cleanup_oversized_cache }
|
|
349
|
+
@cleanup_hook_set = true
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
# Clean up cache if it exceeds size limits
|
|
353
|
+
def cleanup_oversized_cache
|
|
354
|
+
return unless Dir.exist?(CACHE_DIR)
|
|
355
|
+
|
|
356
|
+
cache_size_mb = calculate_cache_size_mb
|
|
357
|
+
return unless cache_size_mb > MAX_CACHE_SIZE_MB
|
|
358
|
+
|
|
359
|
+
# Remove cache files if oversized
|
|
360
|
+
FileUtils.rm_rf(CACHE_DIR)
|
|
361
|
+
rescue StandardError
|
|
362
|
+
# Silently ignore cleanup errors - don't break the application
|
|
363
|
+
end
|
|
364
|
+
|
|
365
|
+
# Calculate current cache size in megabytes
|
|
366
|
+
def calculate_cache_size_mb
|
|
367
|
+
total_size = Dir.glob(File.join(CACHE_DIR, "**/*"))
|
|
368
|
+
.select { |f| File.file?(f) }
|
|
369
|
+
.sum do |f|
|
|
370
|
+
File.size(f)
|
|
371
|
+
rescue StandardError
|
|
372
|
+
0
|
|
373
|
+
end
|
|
374
|
+
total_size / (1024.0 * 1024.0)
|
|
375
|
+
end
|
|
376
|
+
end
|
|
377
|
+
end
|
|
378
|
+
end
|