ruby_llm 1.5.0 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ruby_llm/model/info.rb +2 -2
- data/lib/ruby_llm/models.json +428 -433
- data/lib/ruby_llm/models_schema.json +168 -0
- data/lib/ruby_llm/providers/gemini/capabilities.rb +3 -0
- data/lib/ruby_llm/providers/mistral/capabilities.rb +30 -18
- data/lib/ruby_llm/utils.rb +12 -0
- data/lib/ruby_llm/version.rb +1 -1
- data/lib/tasks/models_update.rake +26 -0
- metadata +2 -1
@@ -0,0 +1,168 @@
|
|
1
|
+
{
|
2
|
+
"title": "RubyLLM Models Schema",
|
3
|
+
"description": "Schema for validating the structure of models.json",
|
4
|
+
"type": "array",
|
5
|
+
"items": {
|
6
|
+
"type": "object",
|
7
|
+
"required": ["id", "name", "provider", "context_window", "max_output_tokens"],
|
8
|
+
"properties": {
|
9
|
+
"id": {
|
10
|
+
"type": "string",
|
11
|
+
"description": "Unique identifier for the model"
|
12
|
+
},
|
13
|
+
"name": {
|
14
|
+
"type": "string",
|
15
|
+
"description": "Display name of the model"
|
16
|
+
},
|
17
|
+
"provider": {
|
18
|
+
"type": "string",
|
19
|
+
"description": "Provider of the model (e.g., openai, anthropic, mistral)"
|
20
|
+
},
|
21
|
+
"family": {
|
22
|
+
"type": ["string", "null"],
|
23
|
+
"description": "Model family (e.g., gpt-4, claude-3)"
|
24
|
+
},
|
25
|
+
"created_at": {
|
26
|
+
"type": ["null", {"type": "string", "format": "date-time"}],
|
27
|
+
"description": "Creation date of the model"
|
28
|
+
},
|
29
|
+
"context_window": {
|
30
|
+
"type": ["null", {"type": "integer", "minimum": 0}],
|
31
|
+
"description": "Maximum context window size"
|
32
|
+
},
|
33
|
+
"max_output_tokens": {
|
34
|
+
"type": ["null", {"type": "integer", "minimum": 0}],
|
35
|
+
"description": "Maximum output tokens"
|
36
|
+
},
|
37
|
+
"knowledge_cutoff": {
|
38
|
+
"type": ["null", {"type": "string", "format": "date"}],
|
39
|
+
"description": "Knowledge cutoff date"
|
40
|
+
},
|
41
|
+
"modalities": {
|
42
|
+
"type": "object",
|
43
|
+
"required": ["input", "output"],
|
44
|
+
"properties": {
|
45
|
+
"input": {
|
46
|
+
"type": "array",
|
47
|
+
"items": {
|
48
|
+
"type": "string",
|
49
|
+
"enum": ["text", "image", "audio", "pdf", "video", "file"]
|
50
|
+
},
|
51
|
+
"uniqueItems": true,
|
52
|
+
"description": "Supported input modalities"
|
53
|
+
},
|
54
|
+
"output": {
|
55
|
+
"type": "array",
|
56
|
+
"items": {
|
57
|
+
"type": "string",
|
58
|
+
"enum": ["text", "image", "audio", "embeddings", "moderation"]
|
59
|
+
},
|
60
|
+
"uniqueItems": true,
|
61
|
+
"description": "Supported output modalities"
|
62
|
+
}
|
63
|
+
}
|
64
|
+
},
|
65
|
+
"capabilities": {
|
66
|
+
"type": "array",
|
67
|
+
"items": {
|
68
|
+
"type": "string",
|
69
|
+
"enum": [
|
70
|
+
"streaming", "function_calling", "structured_output", "predicted_outputs",
|
71
|
+
"distillation", "fine_tuning", "batch", "realtime", "image_generation",
|
72
|
+
"speech_generation", "transcription", "translation", "citations", "reasoning",
|
73
|
+
"caching", "moderation", "json_mode", "vision"
|
74
|
+
]
|
75
|
+
},
|
76
|
+
"uniqueItems": true,
|
77
|
+
"description": "Model capabilities"
|
78
|
+
},
|
79
|
+
"pricing": {
|
80
|
+
"type": "object",
|
81
|
+
"properties": {
|
82
|
+
"text_tokens": {
|
83
|
+
"type": "object",
|
84
|
+
"required": ["standard"],
|
85
|
+
"properties": {
|
86
|
+
"standard": {
|
87
|
+
"type": "object",
|
88
|
+
"properties": {
|
89
|
+
"input_per_million": {"type": "number", "minimum": 0},
|
90
|
+
"cached_input_per_million": {"type": "number", "minimum": 0},
|
91
|
+
"output_per_million": {"type": "number", "minimum": 0},
|
92
|
+
"reasoning_output_per_million": {"type": "number", "minimum": 0}
|
93
|
+
}
|
94
|
+
},
|
95
|
+
"batch": {
|
96
|
+
"type": "object",
|
97
|
+
"properties": {
|
98
|
+
"input_per_million": {"type": "number", "minimum": 0},
|
99
|
+
"output_per_million": {"type": "number", "minimum": 0}
|
100
|
+
}
|
101
|
+
}
|
102
|
+
}
|
103
|
+
},
|
104
|
+
"images": {
|
105
|
+
"type": "object",
|
106
|
+
"properties": {
|
107
|
+
"standard": {
|
108
|
+
"type": "object",
|
109
|
+
"properties": {
|
110
|
+
"input": {"type": "number", "minimum": 0},
|
111
|
+
"output": {"type": "number", "minimum": 0}
|
112
|
+
}
|
113
|
+
},
|
114
|
+
"batch": {
|
115
|
+
"type": "object",
|
116
|
+
"properties": {
|
117
|
+
"input": {"type": "number", "minimum": 0},
|
118
|
+
"output": {"type": "number", "minimum": 0}
|
119
|
+
}
|
120
|
+
}
|
121
|
+
}
|
122
|
+
},
|
123
|
+
"audio_tokens": {
|
124
|
+
"type": "object",
|
125
|
+
"properties": {
|
126
|
+
"standard": {
|
127
|
+
"type": "object",
|
128
|
+
"properties": {
|
129
|
+
"input_per_million": {"type": "number", "minimum": 0},
|
130
|
+
"output_per_million": {"type": "number", "minimum": 0}
|
131
|
+
}
|
132
|
+
},
|
133
|
+
"batch": {
|
134
|
+
"type": "object",
|
135
|
+
"properties": {
|
136
|
+
"input_per_million": {"type": "number", "minimum": 0},
|
137
|
+
"output_per_million": {"type": "number", "minimum": 0}
|
138
|
+
}
|
139
|
+
}
|
140
|
+
}
|
141
|
+
},
|
142
|
+
"embeddings": {
|
143
|
+
"type": "object",
|
144
|
+
"properties": {
|
145
|
+
"standard": {
|
146
|
+
"type": "object",
|
147
|
+
"properties": {
|
148
|
+
"input_per_million": {"type": "number", "minimum": 0}
|
149
|
+
}
|
150
|
+
},
|
151
|
+
"batch": {
|
152
|
+
"type": "object",
|
153
|
+
"properties": {
|
154
|
+
"input_per_million": {"type": "number", "minimum": 0}
|
155
|
+
}
|
156
|
+
}
|
157
|
+
}
|
158
|
+
}
|
159
|
+
},
|
160
|
+
"description": "Pricing information for the model"
|
161
|
+
},
|
162
|
+
"metadata": {
|
163
|
+
"type": "object",
|
164
|
+
"description": "Additional metadata about the model"
|
165
|
+
}
|
166
|
+
}
|
167
|
+
}
|
168
|
+
}
|
@@ -280,6 +280,9 @@ module RubyLLM
|
|
280
280
|
# Embedding output
|
281
281
|
modalities[:output] << 'embeddings' if model_id.match?(/embedding|gemini-embedding/)
|
282
282
|
|
283
|
+
# Image output for imagen models
|
284
|
+
modalities[:output] = ['image'] if model_id.match?(/imagen/)
|
285
|
+
|
283
286
|
modalities
|
284
287
|
end
|
285
288
|
|
@@ -7,20 +7,24 @@ module RubyLLM
|
|
7
7
|
module Capabilities
|
8
8
|
module_function
|
9
9
|
|
10
|
-
def supports_streaming?(
|
11
|
-
|
10
|
+
def supports_streaming?(model_id)
|
11
|
+
# All chat models support streaming, but not embedding/moderation/OCR/transcription
|
12
|
+
!model_id.match?(/embed|moderation|ocr|transcriptions/)
|
12
13
|
end
|
13
14
|
|
14
|
-
def supports_tools?(
|
15
|
-
|
15
|
+
def supports_tools?(model_id)
|
16
|
+
# Most chat models support tools except embedding/moderation/OCR/voxtral/transcription
|
17
|
+
!model_id.match?(/embed|moderation|ocr|voxtral|transcriptions|mistral-(tiny|small)-(2312|2402)/)
|
16
18
|
end
|
17
19
|
|
18
20
|
def supports_vision?(model_id)
|
19
|
-
|
21
|
+
# Models with vision capabilities
|
22
|
+
model_id.match?(/pixtral|mistral-small-(2503|2506)|mistral-medium/)
|
20
23
|
end
|
21
24
|
|
22
|
-
def supports_json_mode?(
|
23
|
-
|
25
|
+
def supports_json_mode?(model_id)
|
26
|
+
# Most chat models support JSON mode (structured output)
|
27
|
+
!model_id.match?(/embed|moderation|ocr|voxtral|transcriptions/) && supports_tools?(model_id)
|
24
28
|
end
|
25
29
|
|
26
30
|
def format_display_name(model_id)
|
@@ -71,7 +75,7 @@ module RubyLLM
|
|
71
75
|
when /embed/
|
72
76
|
{
|
73
77
|
input: ['text'],
|
74
|
-
output: ['
|
78
|
+
output: ['embeddings']
|
75
79
|
}
|
76
80
|
else
|
77
81
|
{
|
@@ -81,18 +85,26 @@ module RubyLLM
|
|
81
85
|
end
|
82
86
|
end
|
83
87
|
|
84
|
-
def capabilities_for(model_id)
|
88
|
+
def capabilities_for(model_id) # rubocop:disable Metrics/PerceivedComplexity
|
85
89
|
case model_id
|
86
|
-
when /
|
87
|
-
when /
|
90
|
+
when /moderation/ then ['moderation']
|
91
|
+
when /voxtral.*transcribe/ then ['transcription']
|
92
|
+
when /ocr/ then ['vision']
|
88
93
|
else
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
94
|
+
capabilities = []
|
95
|
+
capabilities << 'streaming' if supports_streaming?(model_id)
|
96
|
+
capabilities << 'function_calling' if supports_tools?(model_id)
|
97
|
+
capabilities << 'structured_output' if supports_json_mode?(model_id)
|
98
|
+
capabilities << 'vision' if supports_vision?(model_id)
|
99
|
+
|
100
|
+
# Model-specific capabilities
|
101
|
+
capabilities << 'reasoning' if model_id.match?(/magistral/)
|
102
|
+
capabilities << 'batch' unless model_id.match?(/voxtral|ocr|embed|moderation/)
|
103
|
+
capabilities << 'fine_tuning' if model_id.match?(/mistral-(small|medium|large)|devstral/)
|
104
|
+
capabilities << 'distillation' if model_id.match?(/ministral/)
|
105
|
+
capabilities << 'predicted_outputs' if model_id.match?(/codestral/)
|
106
|
+
|
107
|
+
capabilities.uniq
|
96
108
|
end
|
97
109
|
end
|
98
110
|
|
data/lib/ruby_llm/utils.rb
CHANGED
@@ -24,6 +24,18 @@ module RubyLLM
|
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
|
+
def to_time(value)
|
28
|
+
return unless value
|
29
|
+
|
30
|
+
value.is_a?(Time) ? value : Time.parse(value.to_s)
|
31
|
+
end
|
32
|
+
|
33
|
+
def to_date(value)
|
34
|
+
return unless value
|
35
|
+
|
36
|
+
value.is_a?(Date) ? value : Date.parse(value.to_s)
|
37
|
+
end
|
38
|
+
|
27
39
|
def deep_merge(params, payload)
|
28
40
|
params.merge(payload) do |_key, params_value, payload_value|
|
29
41
|
if params_value.is_a?(Hash) && payload_value.is_a?(Hash)
|
data/lib/ruby_llm/version.rb
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
require 'dotenv/load'
|
4
4
|
require 'ruby_llm'
|
5
|
+
require 'json-schema'
|
5
6
|
|
6
7
|
task default: ['models:update']
|
7
8
|
|
@@ -49,6 +50,9 @@ def refresh_models
|
|
49
50
|
elsif models.all.size == initial_count && initial_count.positive?
|
50
51
|
puts 'Warning: Model list unchanged.'
|
51
52
|
else
|
53
|
+
puts 'Validating models...'
|
54
|
+
validate_models!(models)
|
55
|
+
|
52
56
|
puts "Saving models.json (#{models.all.size} models)"
|
53
57
|
models.save_models
|
54
58
|
end
|
@@ -56,6 +60,28 @@ def refresh_models
|
|
56
60
|
@models = models
|
57
61
|
end
|
58
62
|
|
63
|
+
def validate_models!(models)
|
64
|
+
schema_path = File.expand_path('../ruby_llm/models_schema.json', __dir__)
|
65
|
+
models_data = models.all.map(&:to_h)
|
66
|
+
|
67
|
+
validation_errors = JSON::Validator.fully_validate(schema_path, models_data)
|
68
|
+
|
69
|
+
unless validation_errors.empty?
|
70
|
+
# Save failed models for inspection
|
71
|
+
failed_path = File.expand_path('../ruby_llm/models.failed.json', __dir__)
|
72
|
+
File.write(failed_path, JSON.pretty_generate(models_data))
|
73
|
+
|
74
|
+
puts 'ERROR: Models validation failed:'
|
75
|
+
puts "\nValidation errors:"
|
76
|
+
validation_errors.first(10).each { |error| puts " - #{error}" }
|
77
|
+
puts " ... and #{validation_errors.size - 10} more errors" if validation_errors.size > 10
|
78
|
+
puts "-> Failed models saved to: #{failed_path}"
|
79
|
+
exit(1)
|
80
|
+
end
|
81
|
+
|
82
|
+
puts '✓ Models validation passed'
|
83
|
+
end
|
84
|
+
|
59
85
|
def display_model_stats
|
60
86
|
puts "\nModel count:"
|
61
87
|
provider_counts = @models.all.group_by(&:provider).transform_values(&:count)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby_llm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.5.
|
4
|
+
version: 1.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Carmine Paolino
|
@@ -168,6 +168,7 @@ files:
|
|
168
168
|
- lib/ruby_llm/model/pricing_tier.rb
|
169
169
|
- lib/ruby_llm/models.json
|
170
170
|
- lib/ruby_llm/models.rb
|
171
|
+
- lib/ruby_llm/models_schema.json
|
171
172
|
- lib/ruby_llm/provider.rb
|
172
173
|
- lib/ruby_llm/providers/anthropic.rb
|
173
174
|
- lib/ruby_llm/providers/anthropic/capabilities.rb
|