ruby_llm 1.5.0 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,168 @@
1
+ {
2
+ "title": "RubyLLM Models Schema",
3
+ "description": "Schema for validating the structure of models.json",
4
+ "type": "array",
5
+ "items": {
6
+ "type": "object",
7
+ "required": ["id", "name", "provider", "context_window", "max_output_tokens"],
8
+ "properties": {
9
+ "id": {
10
+ "type": "string",
11
+ "description": "Unique identifier for the model"
12
+ },
13
+ "name": {
14
+ "type": "string",
15
+ "description": "Display name of the model"
16
+ },
17
+ "provider": {
18
+ "type": "string",
19
+ "description": "Provider of the model (e.g., openai, anthropic, mistral)"
20
+ },
21
+ "family": {
22
+ "type": ["string", "null"],
23
+ "description": "Model family (e.g., gpt-4, claude-3)"
24
+ },
25
+ "created_at": {
26
+ "type": ["null", {"type": "string", "format": "date-time"}],
27
+ "description": "Creation date of the model"
28
+ },
29
+ "context_window": {
30
+ "type": ["null", {"type": "integer", "minimum": 0}],
31
+ "description": "Maximum context window size"
32
+ },
33
+ "max_output_tokens": {
34
+ "type": ["null", {"type": "integer", "minimum": 0}],
35
+ "description": "Maximum output tokens"
36
+ },
37
+ "knowledge_cutoff": {
38
+ "type": ["null", {"type": "string", "format": "date"}],
39
+ "description": "Knowledge cutoff date"
40
+ },
41
+ "modalities": {
42
+ "type": "object",
43
+ "required": ["input", "output"],
44
+ "properties": {
45
+ "input": {
46
+ "type": "array",
47
+ "items": {
48
+ "type": "string",
49
+ "enum": ["text", "image", "audio", "pdf", "video", "file"]
50
+ },
51
+ "uniqueItems": true,
52
+ "description": "Supported input modalities"
53
+ },
54
+ "output": {
55
+ "type": "array",
56
+ "items": {
57
+ "type": "string",
58
+ "enum": ["text", "image", "audio", "embeddings", "moderation"]
59
+ },
60
+ "uniqueItems": true,
61
+ "description": "Supported output modalities"
62
+ }
63
+ }
64
+ },
65
+ "capabilities": {
66
+ "type": "array",
67
+ "items": {
68
+ "type": "string",
69
+ "enum": [
70
+ "streaming", "function_calling", "structured_output", "predicted_outputs",
71
+ "distillation", "fine_tuning", "batch", "realtime", "image_generation",
72
+ "speech_generation", "transcription", "translation", "citations", "reasoning",
73
+ "caching", "moderation", "json_mode", "vision"
74
+ ]
75
+ },
76
+ "uniqueItems": true,
77
+ "description": "Model capabilities"
78
+ },
79
+ "pricing": {
80
+ "type": "object",
81
+ "properties": {
82
+ "text_tokens": {
83
+ "type": "object",
84
+ "required": ["standard"],
85
+ "properties": {
86
+ "standard": {
87
+ "type": "object",
88
+ "properties": {
89
+ "input_per_million": {"type": "number", "minimum": 0},
90
+ "cached_input_per_million": {"type": "number", "minimum": 0},
91
+ "output_per_million": {"type": "number", "minimum": 0},
92
+ "reasoning_output_per_million": {"type": "number", "minimum": 0}
93
+ }
94
+ },
95
+ "batch": {
96
+ "type": "object",
97
+ "properties": {
98
+ "input_per_million": {"type": "number", "minimum": 0},
99
+ "output_per_million": {"type": "number", "minimum": 0}
100
+ }
101
+ }
102
+ }
103
+ },
104
+ "images": {
105
+ "type": "object",
106
+ "properties": {
107
+ "standard": {
108
+ "type": "object",
109
+ "properties": {
110
+ "input": {"type": "number", "minimum": 0},
111
+ "output": {"type": "number", "minimum": 0}
112
+ }
113
+ },
114
+ "batch": {
115
+ "type": "object",
116
+ "properties": {
117
+ "input": {"type": "number", "minimum": 0},
118
+ "output": {"type": "number", "minimum": 0}
119
+ }
120
+ }
121
+ }
122
+ },
123
+ "audio_tokens": {
124
+ "type": "object",
125
+ "properties": {
126
+ "standard": {
127
+ "type": "object",
128
+ "properties": {
129
+ "input_per_million": {"type": "number", "minimum": 0},
130
+ "output_per_million": {"type": "number", "minimum": 0}
131
+ }
132
+ },
133
+ "batch": {
134
+ "type": "object",
135
+ "properties": {
136
+ "input_per_million": {"type": "number", "minimum": 0},
137
+ "output_per_million": {"type": "number", "minimum": 0}
138
+ }
139
+ }
140
+ }
141
+ },
142
+ "embeddings": {
143
+ "type": "object",
144
+ "properties": {
145
+ "standard": {
146
+ "type": "object",
147
+ "properties": {
148
+ "input_per_million": {"type": "number", "minimum": 0}
149
+ }
150
+ },
151
+ "batch": {
152
+ "type": "object",
153
+ "properties": {
154
+ "input_per_million": {"type": "number", "minimum": 0}
155
+ }
156
+ }
157
+ }
158
+ }
159
+ },
160
+ "description": "Pricing information for the model"
161
+ },
162
+ "metadata": {
163
+ "type": "object",
164
+ "description": "Additional metadata about the model"
165
+ }
166
+ }
167
+ }
168
+ }
@@ -280,6 +280,9 @@ module RubyLLM
280
280
  # Embedding output
281
281
  modalities[:output] << 'embeddings' if model_id.match?(/embedding|gemini-embedding/)
282
282
 
283
+ # Image output for imagen models
284
+ modalities[:output] = ['image'] if model_id.match?(/imagen/)
285
+
283
286
  modalities
284
287
  end
285
288
 
@@ -7,20 +7,24 @@ module RubyLLM
7
7
  module Capabilities
8
8
  module_function
9
9
 
10
- def supports_streaming?(_model_id)
11
- true
10
+ def supports_streaming?(model_id)
11
+ # All chat models support streaming, but not embedding/moderation/OCR/transcription
12
+ !model_id.match?(/embed|moderation|ocr|transcriptions/)
12
13
  end
13
14
 
14
- def supports_tools?(_model_id)
15
- true
15
+ def supports_tools?(model_id)
16
+ # Most chat models support tools except embedding/moderation/OCR/voxtral/transcription
17
+ !model_id.match?(/embed|moderation|ocr|voxtral|transcriptions|mistral-(tiny|small)-(2312|2402)/)
16
18
  end
17
19
 
18
20
  def supports_vision?(model_id)
19
- model_id.include?('pixtral')
21
+ # Models with vision capabilities
22
+ model_id.match?(/pixtral|mistral-small-(2503|2506)|mistral-medium/)
20
23
  end
21
24
 
22
- def supports_json_mode?(_model_id)
23
- true
25
+ def supports_json_mode?(model_id)
26
+ # Most chat models support JSON mode (structured output)
27
+ !model_id.match?(/embed|moderation|ocr|voxtral|transcriptions/) && supports_tools?(model_id)
24
28
  end
25
29
 
26
30
  def format_display_name(model_id)
@@ -71,7 +75,7 @@ module RubyLLM
71
75
  when /embed/
72
76
  {
73
77
  input: ['text'],
74
- output: ['embedding']
78
+ output: ['embeddings']
75
79
  }
76
80
  else
77
81
  {
@@ -81,18 +85,26 @@ module RubyLLM
81
85
  end
82
86
  end
83
87
 
84
- def capabilities_for(model_id)
88
+ def capabilities_for(model_id) # rubocop:disable Metrics/PerceivedComplexity
85
89
  case model_id
86
- when /embed/ then { embeddings: true }
87
- when /moderation/ then { moderation: true }
90
+ when /moderation/ then ['moderation']
91
+ when /voxtral.*transcribe/ then ['transcription']
92
+ when /ocr/ then ['vision']
88
93
  else
89
- {
90
- chat: true,
91
- streaming: supports_streaming?(model_id),
92
- tools: supports_tools?(model_id),
93
- vision: supports_vision?(model_id),
94
- json_mode: supports_json_mode?(model_id)
95
- }
94
+ capabilities = []
95
+ capabilities << 'streaming' if supports_streaming?(model_id)
96
+ capabilities << 'function_calling' if supports_tools?(model_id)
97
+ capabilities << 'structured_output' if supports_json_mode?(model_id)
98
+ capabilities << 'vision' if supports_vision?(model_id)
99
+
100
+ # Model-specific capabilities
101
+ capabilities << 'reasoning' if model_id.match?(/magistral/)
102
+ capabilities << 'batch' unless model_id.match?(/voxtral|ocr|embed|moderation/)
103
+ capabilities << 'fine_tuning' if model_id.match?(/mistral-(small|medium|large)|devstral/)
104
+ capabilities << 'distillation' if model_id.match?(/ministral/)
105
+ capabilities << 'predicted_outputs' if model_id.match?(/codestral/)
106
+
107
+ capabilities.uniq
96
108
  end
97
109
  end
98
110
 
@@ -24,6 +24,18 @@ module RubyLLM
24
24
  end
25
25
  end
26
26
 
27
+ def to_time(value)
28
+ return unless value
29
+
30
+ value.is_a?(Time) ? value : Time.parse(value.to_s)
31
+ end
32
+
33
+ def to_date(value)
34
+ return unless value
35
+
36
+ value.is_a?(Date) ? value : Date.parse(value.to_s)
37
+ end
38
+
27
39
  def deep_merge(params, payload)
28
40
  params.merge(payload) do |_key, params_value, payload_value|
29
41
  if params_value.is_a?(Hash) && payload_value.is_a?(Hash)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RubyLLM
4
- VERSION = '1.5.0'
4
+ VERSION = '1.5.1'
5
5
  end
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'dotenv/load'
4
4
  require 'ruby_llm'
5
+ require 'json-schema'
5
6
 
6
7
  task default: ['models:update']
7
8
 
@@ -49,6 +50,9 @@ def refresh_models
49
50
  elsif models.all.size == initial_count && initial_count.positive?
50
51
  puts 'Warning: Model list unchanged.'
51
52
  else
53
+ puts 'Validating models...'
54
+ validate_models!(models)
55
+
52
56
  puts "Saving models.json (#{models.all.size} models)"
53
57
  models.save_models
54
58
  end
@@ -56,6 +60,28 @@ def refresh_models
56
60
  @models = models
57
61
  end
58
62
 
63
+ def validate_models!(models)
64
+ schema_path = File.expand_path('../ruby_llm/models_schema.json', __dir__)
65
+ models_data = models.all.map(&:to_h)
66
+
67
+ validation_errors = JSON::Validator.fully_validate(schema_path, models_data)
68
+
69
+ unless validation_errors.empty?
70
+ # Save failed models for inspection
71
+ failed_path = File.expand_path('../ruby_llm/models.failed.json', __dir__)
72
+ File.write(failed_path, JSON.pretty_generate(models_data))
73
+
74
+ puts 'ERROR: Models validation failed:'
75
+ puts "\nValidation errors:"
76
+ validation_errors.first(10).each { |error| puts " - #{error}" }
77
+ puts " ... and #{validation_errors.size - 10} more errors" if validation_errors.size > 10
78
+ puts "-> Failed models saved to: #{failed_path}"
79
+ exit(1)
80
+ end
81
+
82
+ puts '✓ Models validation passed'
83
+ end
84
+
59
85
  def display_model_stats
60
86
  puts "\nModel count:"
61
87
  provider_counts = @models.all.group_by(&:provider).transform_values(&:count)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby_llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.0
4
+ version: 1.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Carmine Paolino
@@ -168,6 +168,7 @@ files:
168
168
  - lib/ruby_llm/model/pricing_tier.rb
169
169
  - lib/ruby_llm/models.json
170
170
  - lib/ruby_llm/models.rb
171
+ - lib/ruby_llm/models_schema.json
171
172
  - lib/ruby_llm/provider.rb
172
173
  - lib/ruby_llm/providers/anthropic.rb
173
174
  - lib/ruby_llm/providers/anthropic/capabilities.rb