ruby_llm_community 0.0.6 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -3
  3. data/lib/generators/ruby_llm/install/templates/create_models_migration.rb.tt +34 -0
  4. data/lib/generators/ruby_llm/install/templates/initializer.rb.tt +5 -0
  5. data/lib/generators/ruby_llm/install/templates/model_model.rb.tt +6 -0
  6. data/lib/generators/ruby_llm/install_generator.rb +27 -2
  7. data/lib/ruby_llm/active_record/acts_as.rb +163 -24
  8. data/lib/ruby_llm/aliases.json +58 -5
  9. data/lib/ruby_llm/aliases.rb +7 -25
  10. data/lib/ruby_llm/chat.rb +10 -17
  11. data/lib/ruby_llm/configuration.rb +5 -12
  12. data/lib/ruby_llm/connection.rb +4 -4
  13. data/lib/ruby_llm/connection_multipart.rb +19 -0
  14. data/lib/ruby_llm/content.rb +5 -2
  15. data/lib/ruby_llm/embedding.rb +1 -2
  16. data/lib/ruby_llm/error.rb +0 -8
  17. data/lib/ruby_llm/image.rb +23 -8
  18. data/lib/ruby_llm/image_attachment.rb +21 -0
  19. data/lib/ruby_llm/message.rb +6 -6
  20. data/lib/ruby_llm/model/info.rb +12 -10
  21. data/lib/ruby_llm/model/pricing.rb +0 -3
  22. data/lib/ruby_llm/model/pricing_category.rb +0 -2
  23. data/lib/ruby_llm/model/pricing_tier.rb +0 -1
  24. data/lib/ruby_llm/models.json +2147 -470
  25. data/lib/ruby_llm/models.rb +65 -34
  26. data/lib/ruby_llm/provider.rb +8 -8
  27. data/lib/ruby_llm/providers/anthropic/capabilities.rb +1 -46
  28. data/lib/ruby_llm/providers/anthropic/chat.rb +2 -2
  29. data/lib/ruby_llm/providers/anthropic/media.rb +0 -1
  30. data/lib/ruby_llm/providers/anthropic/tools.rb +1 -2
  31. data/lib/ruby_llm/providers/anthropic.rb +1 -2
  32. data/lib/ruby_llm/providers/bedrock/chat.rb +2 -4
  33. data/lib/ruby_llm/providers/bedrock/media.rb +0 -1
  34. data/lib/ruby_llm/providers/bedrock/models.rb +0 -2
  35. data/lib/ruby_llm/providers/bedrock/streaming/base.rb +0 -12
  36. data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +0 -7
  37. data/lib/ruby_llm/providers/bedrock/streaming/message_processing.rb +0 -12
  38. data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +0 -12
  39. data/lib/ruby_llm/providers/bedrock/streaming/prelude_handling.rb +0 -13
  40. data/lib/ruby_llm/providers/bedrock/streaming.rb +0 -18
  41. data/lib/ruby_llm/providers/bedrock.rb +1 -2
  42. data/lib/ruby_llm/providers/deepseek/capabilities.rb +1 -2
  43. data/lib/ruby_llm/providers/deepseek/chat.rb +0 -1
  44. data/lib/ruby_llm/providers/gemini/capabilities.rb +28 -100
  45. data/lib/ruby_llm/providers/gemini/chat.rb +57 -29
  46. data/lib/ruby_llm/providers/gemini/embeddings.rb +0 -2
  47. data/lib/ruby_llm/providers/gemini/images.rb +1 -2
  48. data/lib/ruby_llm/providers/gemini/media.rb +0 -1
  49. data/lib/ruby_llm/providers/gemini/models.rb +1 -2
  50. data/lib/ruby_llm/providers/gemini/streaming.rb +15 -1
  51. data/lib/ruby_llm/providers/gemini/tools.rb +0 -5
  52. data/lib/ruby_llm/providers/gpustack/chat.rb +11 -1
  53. data/lib/ruby_llm/providers/gpustack/media.rb +45 -0
  54. data/lib/ruby_llm/providers/gpustack/models.rb +44 -9
  55. data/lib/ruby_llm/providers/gpustack.rb +1 -0
  56. data/lib/ruby_llm/providers/mistral/capabilities.rb +2 -10
  57. data/lib/ruby_llm/providers/mistral/chat.rb +0 -2
  58. data/lib/ruby_llm/providers/mistral/embeddings.rb +0 -3
  59. data/lib/ruby_llm/providers/mistral/models.rb +0 -1
  60. data/lib/ruby_llm/providers/ollama/chat.rb +0 -1
  61. data/lib/ruby_llm/providers/ollama/media.rb +1 -6
  62. data/lib/ruby_llm/providers/ollama/models.rb +36 -0
  63. data/lib/ruby_llm/providers/ollama.rb +1 -0
  64. data/lib/ruby_llm/providers/openai/capabilities.rb +3 -16
  65. data/lib/ruby_llm/providers/openai/chat.rb +1 -3
  66. data/lib/ruby_llm/providers/openai/embeddings.rb +0 -3
  67. data/lib/ruby_llm/providers/openai/images.rb +73 -3
  68. data/lib/ruby_llm/providers/openai/media.rb +0 -1
  69. data/lib/ruby_llm/providers/openai/response.rb +120 -29
  70. data/lib/ruby_llm/providers/openai/response_media.rb +2 -2
  71. data/lib/ruby_llm/providers/openai/streaming.rb +107 -47
  72. data/lib/ruby_llm/providers/openai/tools.rb +1 -1
  73. data/lib/ruby_llm/providers/openai.rb +1 -3
  74. data/lib/ruby_llm/providers/openai_base.rb +2 -2
  75. data/lib/ruby_llm/providers/openrouter/models.rb +1 -16
  76. data/lib/ruby_llm/providers/perplexity/capabilities.rb +0 -1
  77. data/lib/ruby_llm/providers/perplexity/chat.rb +0 -1
  78. data/lib/ruby_llm/providers/perplexity.rb +1 -5
  79. data/lib/ruby_llm/providers/vertexai/chat.rb +14 -0
  80. data/lib/ruby_llm/providers/vertexai/embeddings.rb +32 -0
  81. data/lib/ruby_llm/providers/vertexai/models.rb +130 -0
  82. data/lib/ruby_llm/providers/vertexai/streaming.rb +14 -0
  83. data/lib/ruby_llm/providers/vertexai.rb +55 -0
  84. data/lib/ruby_llm/railtie.rb +0 -1
  85. data/lib/ruby_llm/stream_accumulator.rb +72 -10
  86. data/lib/ruby_llm/streaming.rb +16 -25
  87. data/lib/ruby_llm/tool.rb +2 -19
  88. data/lib/ruby_llm/tool_call.rb +0 -9
  89. data/lib/ruby_llm/version.rb +1 -1
  90. data/lib/ruby_llm_community.rb +5 -3
  91. data/lib/tasks/models.rake +525 -0
  92. data/lib/tasks/release.rake +37 -2
  93. data/lib/tasks/vcr.rake +0 -7
  94. metadata +13 -4
  95. data/lib/tasks/aliases.rake +0 -235
  96. data/lib/tasks/models_docs.rake +0 -224
  97. data/lib/tasks/models_update.rake +0 -108
@@ -7,9 +7,6 @@ module RubyLLM
7
7
  module Capabilities
8
8
  module_function
9
9
 
10
- # Returns the context window size (input token limit) for the given model
11
- # @param model_id [String] the model identifier
12
- # @return [Integer] the context window size in tokens
13
10
  def context_window_for(model_id)
14
11
  case model_id
15
12
  when /gemini-2\.5-pro-exp-03-25/, /gemini-2\.0-flash/, /gemini-2\.0-flash-lite/, /gemini-1\.5-flash/, /gemini-1\.5-flash-8b/ # rubocop:disable Layout/LineLength
@@ -18,70 +15,49 @@ module RubyLLM
18
15
  when /gemini-embedding-exp/ then 8_192
19
16
  when /text-embedding-004/, /embedding-001/ then 2_048
20
17
  when /aqa/ then 7_168
21
- when /imagen-3/ then nil # No token limit for image generation
22
- else 32_768 # Sensible default for unknown models
18
+ when /imagen-3/ then nil
19
+ else 32_768
23
20
  end
24
21
  end
25
22
 
26
- # Returns the maximum output tokens for the given model
27
- # @param model_id [String] the model identifier
28
- # @return [Integer] the maximum output tokens
29
23
  def max_tokens_for(model_id)
30
24
  case model_id
31
25
  when /gemini-2\.5-pro-exp-03-25/ then 64_000
32
26
  when /gemini-2\.0-flash/, /gemini-2\.0-flash-lite/, /gemini-1\.5-flash/, /gemini-1\.5-flash-8b/, /gemini-1\.5-pro/ # rubocop:disable Layout/LineLength
33
27
  8_192
34
- when /gemini-embedding-exp/ then nil # Elastic, supports 3072, 1536, or 768
35
- when /text-embedding-004/, /embedding-001/ then 768 # Output dimension size for embeddings
36
- when /aqa/ then 1_024
37
- when /imagen-3/ then 4 # Output images
38
- else 4_096 # Sensible default
28
+ when /gemini-embedding-exp/ then nil
29
+ when /text-embedding-004/, /embedding-001/ then 768
30
+ when /imagen-3/ then 4
31
+ else 4_096
39
32
  end
40
33
  end
41
34
 
42
- # Returns the input price per million tokens for the given model
43
- # @param model_id [String] the model identifier
44
- # @return [Float] the price per million tokens in USD
45
35
  def input_price_for(model_id)
46
36
  base_price = PRICES.dig(pricing_family(model_id), :input) || default_input_price
47
37
  return base_price unless long_context_model?(model_id)
48
38
 
49
- # Apply different pricing for prompts longer than 128k tokens
50
39
  context_window_for(model_id) > 128_000 ? base_price * 2 : base_price
51
40
  end
52
41
 
53
- # Returns the output price per million tokens for the given model
54
- # @param model_id [String] the model identifier
55
- # @return [Float] the price per million tokens in USD
56
42
  def output_price_for(model_id)
57
43
  base_price = PRICES.dig(pricing_family(model_id), :output) || default_output_price
58
44
  return base_price unless long_context_model?(model_id)
59
45
 
60
- # Apply different pricing for prompts longer than 128k tokens
61
46
  context_window_for(model_id) > 128_000 ? base_price * 2 : base_price
62
47
  end
63
48
 
64
- # Determines if the model supports vision (image/video) inputs
65
- # @param model_id [String] the model identifier
66
- # @return [Boolean] true if the model supports vision inputs
67
49
  def supports_vision?(model_id)
68
50
  return false if model_id.match?(/text-embedding|embedding-001|aqa/)
69
51
 
70
52
  model_id.match?(/gemini|flash|pro|imagen/)
71
53
  end
72
54
 
73
- # Determines if the model supports function calling
74
- # @param model_id [String] the model identifier
75
- # @return [Boolean] true if the model supports function calling
76
55
  def supports_functions?(model_id)
77
56
  return false if model_id.match?(/text-embedding|embedding-001|aqa|flash-lite|imagen|gemini-2\.0-flash-lite/)
78
57
 
79
58
  model_id.match?(/gemini|pro|flash/)
80
59
  end
81
60
 
82
- # Determines if the model supports JSON mode
83
- # @param model_id [String] the model identifier
84
- # @return [Boolean] true if the model supports JSON mode
85
61
  def supports_json_mode?(model_id)
86
62
  if model_id.match?(/text-embedding|embedding-001|aqa|imagen|gemini-2\.0-flash-lite|gemini-2\.5-pro-exp-03-25/)
87
63
  return false
@@ -90,24 +66,18 @@ module RubyLLM
90
66
  model_id.match?(/gemini|pro|flash/)
91
67
  end
92
68
 
93
- # Formats the model ID into a human-readable display name
94
- # @param model_id [String] the model identifier
95
- # @return [String] the formatted display name
96
69
  def format_display_name(model_id)
97
70
  model_id
98
71
  .delete_prefix('models/')
99
72
  .split('-')
100
73
  .map(&:capitalize)
101
74
  .join(' ')
102
- .gsub(/(\d+\.\d+)/, ' \1') # Add space before version numbers
103
- .gsub(/\s+/, ' ') # Clean up multiple spaces
104
- .gsub('Aqa', 'AQA') # Special case for AQA
75
+ .gsub(/(\d+\.\d+)/, ' \1')
76
+ .gsub(/\s+/, ' ')
77
+ .gsub('Aqa', 'AQA')
105
78
  .strip
106
79
  end
107
80
 
108
- # Determines if the model supports context caching
109
- # @param model_id [String] the model identifier
110
- # @return [Boolean] true if the model supports caching
111
81
  def supports_caching?(model_id)
112
82
  if model_id.match?(/flash-lite|gemini-2\.5-pro-exp-03-25|aqa|imagen|text-embedding|embedding-001/)
113
83
  return false
@@ -116,23 +86,14 @@ module RubyLLM
116
86
  model_id.match?(/gemini|pro|flash/)
117
87
  end
118
88
 
119
- # Determines if the model supports tuning
120
- # @param model_id [String] the model identifier
121
- # @return [Boolean] true if the model supports tuning
122
89
  def supports_tuning?(model_id)
123
90
  model_id.match?(/gemini-1\.5-flash|gemini-1\.5-flash-8b/)
124
91
  end
125
92
 
126
- # Determines if the model supports audio inputs
127
- # @param model_id [String] the model identifier
128
- # @return [Boolean] true if the model supports audio inputs
129
93
  def supports_audio?(model_id)
130
94
  model_id.match?(/gemini|pro|flash/)
131
95
  end
132
96
 
133
- # Returns the type of model (chat, embedding, image)
134
- # @param model_id [String] the model identifier
135
- # @return [String] the model type
136
97
  def model_type(model_id)
137
98
  case model_id
138
99
  when /text-embedding|embedding|gemini-embedding/ then 'embedding'
@@ -141,9 +102,6 @@ module RubyLLM
141
102
  end
142
103
  end
143
104
 
144
- # Returns the model family identifier
145
- # @param model_id [String] the model identifier
146
- # @return [String] the model family identifier
147
105
  def model_family(model_id)
148
106
  case model_id
149
107
  when /gemini-2\.5-pro-exp-03-25/ then 'gemini25_pro_exp'
@@ -161,9 +119,6 @@ module RubyLLM
161
119
  end
162
120
  end
163
121
 
164
- # Returns the pricing family identifier for the model
165
- # @param model_id [String] the model identifier
166
- # @return [Symbol] the pricing family identifier
167
122
  def pricing_family(model_id)
168
123
  case model_id
169
124
  when /gemini-2\.5-pro-exp-03-25/ then :pro_2_5 # rubocop:disable Naming/VariableNumber
@@ -180,86 +135,75 @@ module RubyLLM
180
135
  end
181
136
  end
182
137
 
183
- # Determines if the model supports long context
184
- # @param model_id [String] the model identifier
185
- # @return [Boolean] true if the model supports long context
186
138
  def long_context_model?(model_id)
187
139
  model_id.match?(/gemini-1\.5-(?:pro|flash)|gemini-1\.5-flash-8b/)
188
140
  end
189
141
 
190
- # Returns the context length for the model
191
- # @param model_id [String] the model identifier
192
- # @return [Integer] the context length in tokens
193
142
  def context_length(model_id)
194
143
  context_window_for(model_id)
195
144
  end
196
145
 
197
- # Pricing information for Gemini models (per 1M tokens in USD)
198
146
  PRICES = {
199
- flash_2: { # Gemini 2.0 Flash # rubocop:disable Naming/VariableNumber
147
+ flash_2: { # rubocop:disable Naming/VariableNumber
200
148
  input: 0.10,
201
149
  output: 0.40,
202
150
  audio_input: 0.70,
203
151
  cache: 0.025,
204
152
  cache_storage: 1.00,
205
- grounding_search: 35.00 # per 1K requests after 1.5K free
153
+ grounding_search: 35.00
206
154
  },
207
- flash_lite_2: { # Gemini 2.0 Flash Lite # rubocop:disable Naming/VariableNumber
155
+ flash_lite_2: { # rubocop:disable Naming/VariableNumber
208
156
  input: 0.075,
209
157
  output: 0.30
210
158
  },
211
- flash: { # Gemini 1.5 Flash
159
+ flash: {
212
160
  input: 0.075,
213
161
  output: 0.30,
214
162
  cache: 0.01875,
215
163
  cache_storage: 1.00,
216
- grounding_search: 35.00 # per 1K requests up to 5K per day
164
+ grounding_search: 35.00
217
165
  },
218
- flash_8b: { # Gemini 1.5 Flash 8B
166
+ flash_8b: {
219
167
  input: 0.0375,
220
168
  output: 0.15,
221
169
  cache: 0.01,
222
170
  cache_storage: 0.25,
223
- grounding_search: 35.00 # per 1K requests up to 5K per day
171
+ grounding_search: 35.00
224
172
  },
225
- pro: { # Gemini 1.5 Pro
173
+ pro: {
226
174
  input: 1.25,
227
175
  output: 5.0,
228
176
  cache: 0.3125,
229
177
  cache_storage: 4.50,
230
- grounding_search: 35.00 # per 1K requests up to 5K per day
178
+ grounding_search: 35.00
231
179
  },
232
- pro_2_5: { # Gemini 2.5 Pro Experimental # rubocop:disable Naming/VariableNumber
180
+ pro_2_5: { # rubocop:disable Naming/VariableNumber
233
181
  input: 0.12,
234
182
  output: 0.50
235
183
  },
236
- gemini_embedding: { # Gemini Embedding Experimental
184
+ gemini_embedding: {
237
185
  input: 0.002,
238
186
  output: 0.004
239
187
  },
240
- embedding: { # Text Embedding models
188
+ embedding: {
241
189
  input: 0.00,
242
190
  output: 0.00
243
191
  },
244
- imagen: { # Imagen 3
245
- price: 0.03 # per image
192
+ imagen: {
193
+ price: 0.03
246
194
  },
247
- aqa: { # AQA model
195
+ aqa: {
248
196
  input: 0.00,
249
197
  output: 0.00
250
198
  }
251
199
  }.freeze
252
200
 
253
- # Default input price for unknown models
254
- # @return [Float] the default input price per million tokens
255
201
  def default_input_price
256
- 0.075 # Default to Flash pricing
202
+ 0.075
257
203
  end
258
204
 
259
- # Default output price for unknown models
260
- # @return [Float] the default output price per million tokens
261
205
  def default_output_price
262
- 0.30 # Default to Flash pricing
206
+ 0.30
263
207
  end
264
208
 
265
209
  def modalities_for(model_id)
@@ -268,19 +212,16 @@ module RubyLLM
268
212
  output: ['text']
269
213
  }
270
214
 
271
- # Vision support
272
215
  if supports_vision?(model_id)
273
216
  modalities[:input] << 'image'
274
217
  modalities[:input] << 'pdf'
275
218
  end
276
219
 
277
- # Audio support
278
220
  modalities[:input] << 'audio' if model_id.match?(/audio/)
279
-
280
- # Embedding output
281
221
  modalities[:output] << 'embeddings' if model_id.match?(/embedding|gemini-embedding/)
282
222
 
283
- # Image output for imagen models
223
+ modalities[:output] << 'image' if model_id.match?(/image-generation/)
224
+
284
225
  modalities[:output] = ['image'] if model_id.match?(/imagen/)
285
226
 
286
227
  modalities
@@ -289,21 +230,11 @@ module RubyLLM
289
230
  def capabilities_for(model_id)
290
231
  capabilities = ['streaming']
291
232
 
292
- # Function calling
293
233
  capabilities << 'function_calling' if supports_functions?(model_id)
294
-
295
- # JSON mode
296
234
  capabilities << 'structured_output' if supports_json_mode?(model_id)
297
-
298
- # Batch processing
299
235
  capabilities << 'batch' if model_id.match?(/embedding|flash/)
300
-
301
- # Caching
302
236
  capabilities << 'caching' if supports_caching?(model_id)
303
-
304
- # Tuning
305
237
  capabilities << 'fine_tuning' if supports_tuning?(model_id)
306
-
307
238
  capabilities
308
239
  end
309
240
 
@@ -316,10 +247,8 @@ module RubyLLM
316
247
  output_per_million: prices[:output]
317
248
  }
318
249
 
319
- # Add cached pricing if available
320
250
  standard_pricing[:cached_input_per_million] = prices[:input_hit] if prices[:input_hit]
321
251
 
322
- # Batch pricing (typically 50% discount)
323
252
  batch_pricing = {
324
253
  input_per_million: (standard_pricing[:input_per_million] || 0) * 0.5,
325
254
  output_per_million: (standard_pricing[:output_per_million] || 0) * 0.5
@@ -336,7 +265,6 @@ module RubyLLM
336
265
  }
337
266
  }
338
267
 
339
- # Add embedding pricing if applicable
340
268
  if model_id.match?(/embedding|gemini-embedding/)
341
269
  pricing[:embeddings] = {
342
270
  standard: { input_per_million: prices[:price] || 0.002 }
@@ -12,14 +12,16 @@ module RubyLLM
12
12
  end
13
13
 
14
14
  def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, cache_prompts: {}) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
15
- @model = model # Store model for completion_url/stream_url
15
+ @model = model.id
16
16
  payload = {
17
17
  contents: format_messages(messages),
18
18
  generationConfig: {
19
- temperature: temperature
19
+ responseModalities: capabilities.modalities_for(model.id)[:output]
20
20
  }
21
21
  }
22
22
 
23
+ payload[:generationConfig][:temperature] = temperature unless temperature.nil?
24
+
23
25
  if schema
24
26
  payload[:generationConfig][:responseMimeType] = 'application/json'
25
27
  payload[:generationConfig][:responseSchema] = convert_schema_to_gemini(schema)
@@ -43,7 +45,7 @@ module RubyLLM
43
45
  def format_role(role)
44
46
  case role
45
47
  when :assistant then 'model'
46
- when :system, :tool then 'user' # Gemini doesn't have system, use user role, function responses use user role
48
+ when :system, :tool then 'user'
47
49
  else role.to_s
48
50
  end
49
51
  end
@@ -62,7 +64,7 @@ module RubyLLM
62
64
  name: msg.tool_call_id,
63
65
  response: {
64
66
  name: msg.tool_call_id,
65
- content: msg.content
67
+ content: Media.format_content(msg.content)
66
68
  }
67
69
  }
68
70
  }]
@@ -87,31 +89,12 @@ module RubyLLM
87
89
  )
88
90
  end
89
91
 
90
- def convert_schema_to_gemini(schema) # rubocop:disable Metrics/PerceivedComplexity
92
+ def convert_schema_to_gemini(schema)
91
93
  return nil unless schema
92
94
 
93
- case schema[:type]
94
- when 'object'
95
- {
96
- type: 'OBJECT',
97
- properties: schema[:properties]&.transform_values { |prop| convert_schema_to_gemini(prop) } || {},
98
- required: schema[:required] || []
99
- }
100
- when 'array'
101
- {
102
- type: 'ARRAY',
103
- items: schema[:items] ? convert_schema_to_gemini(schema[:items]) : { type: 'STRING' }
104
- }
105
- when 'string'
106
- result = { type: 'STRING' }
107
- result[:enum] = schema[:enum] if schema[:enum]
108
- result
109
- when 'number', 'integer'
110
- { type: 'NUMBER' }
111
- when 'boolean'
112
- { type: 'BOOLEAN' }
113
- else
114
- { type: 'STRING' }
95
+ build_base_schema(schema).tap do |result|
96
+ result[:description] = schema[:description] if schema[:description]
97
+ apply_type_specific_attributes(result, schema)
115
98
  end
116
99
  end
117
100
 
@@ -119,10 +102,8 @@ module RubyLLM
119
102
  candidate = data.dig('candidates', 0)
120
103
  return '' unless candidate
121
104
 
122
- # Content will be empty for function calls
123
105
  return '' if function_call?(candidate)
124
106
 
125
- # Extract text content
126
107
  parts = candidate.dig('content', 'parts')
127
108
  text_parts = parts&.select { |p| p['text'] }
128
109
  return '' unless text_parts&.any?
@@ -140,6 +121,53 @@ module RubyLLM
140
121
  thoughts = data.dig('usageMetadata', 'thoughtsTokenCount') || 0
141
122
  candidates + thoughts
142
123
  end
124
+
125
+ def build_base_schema(schema)
126
+ case schema[:type]
127
+ when 'object'
128
+ build_object_schema(schema)
129
+ when 'array'
130
+ { type: 'ARRAY', items: schema[:items] ? convert_schema_to_gemini(schema[:items]) : { type: 'STRING' } }
131
+ when 'number'
132
+ { type: 'NUMBER' }
133
+ when 'integer'
134
+ { type: 'INTEGER' }
135
+ when 'boolean'
136
+ { type: 'BOOLEAN' }
137
+ else
138
+ { type: 'STRING' }
139
+ end
140
+ end
141
+
142
+ def build_object_schema(schema)
143
+ {
144
+ type: 'OBJECT',
145
+ properties: (schema[:properties] || {}).transform_values { |prop| convert_schema_to_gemini(prop) },
146
+ required: schema[:required] || []
147
+ }.tap do |object|
148
+ object[:propertyOrdering] = schema[:propertyOrdering] if schema[:propertyOrdering]
149
+ object[:nullable] = schema[:nullable] if schema.key?(:nullable)
150
+ end
151
+ end
152
+
153
+ def apply_type_specific_attributes(result, schema)
154
+ case schema[:type]
155
+ when 'string'
156
+ copy_attributes(result, schema, :enum, :format, :nullable)
157
+ when 'number', 'integer'
158
+ copy_attributes(result, schema, :format, :minimum, :maximum, :enum, :nullable)
159
+ when 'array'
160
+ copy_attributes(result, schema, :minItems, :maxItems, :nullable)
161
+ when 'boolean'
162
+ copy_attributes(result, schema, :nullable)
163
+ end
164
+ end
165
+
166
+ def copy_attributes(target, source, *attributes)
167
+ attributes.each do |attr|
168
+ target[attr] = source[attr] if attr == :nullable ? source.key?(attr) : source[attr]
169
+ end
170
+ end
143
171
  end
144
172
  end
145
173
  end
@@ -17,8 +17,6 @@ module RubyLLM
17
17
 
18
18
  def parse_embedding_response(response, model:, text:)
19
19
  vectors = response.body['embeddings']&.map { |e| e['values'] }
20
- # If we only got one embedding AND the input was a single string (not an array),
21
- # return it as a single vector
22
20
  vectors = vectors.first if vectors&.length == 1 && !text.is_a?(Array)
23
21
 
24
22
  Embedding.new(vectors:, model:, input_tokens: 0)
@@ -9,7 +9,7 @@ module RubyLLM
9
9
  "models/#{@model}:predict"
10
10
  end
11
11
 
12
- def render_image_payload(prompt, model:, size:)
12
+ def render_image_payload(prompt, model:, size:, with:, params:) # rubocop:disable Lint/UnusedMethodArgument
13
13
  RubyLLM.logger.debug "Ignoring size #{size}. Gemini does not support image size customization."
14
14
  @model = model
15
15
  {
@@ -32,7 +32,6 @@ module RubyLLM
32
32
  raise Error, 'Unexpected response format from Gemini image generation API'
33
33
  end
34
34
 
35
- # Extract mime type and base64 data
36
35
  mime_type = image_data['mimeType'] || 'image/png'
37
36
  base64_data = image_data['bytesBase64Encoded']
38
37
 
@@ -8,7 +8,6 @@ module RubyLLM
8
8
  module_function
9
9
 
10
10
  def format_content(content)
11
- # Convert Hash/Array back to JSON string for API
12
11
  return [format_text(content.to_json)] if content.is_a?(Hash) || content.is_a?(Array)
13
12
  return [format_text(content)] unless content.is_a?(Content)
14
13
 
@@ -13,7 +13,6 @@ module RubyLLM
13
13
 
14
14
  def parse_list_models_response(response, slug, capabilities)
15
15
  Array(response.body['models']).map do |model_data|
16
- # Extract model ID without "models/" prefix
17
16
  model_id = model_data['name'].gsub('models/', '')
18
17
 
19
18
  Model::Info.new(
@@ -21,7 +20,7 @@ module RubyLLM
21
20
  name: model_data['displayName'],
22
21
  provider: slug,
23
22
  family: capabilities.model_family(model_id),
24
- created_at: nil, # Gemini API doesn't provide creation date
23
+ created_at: nil,
25
24
  context_window: model_data['inputTokenLimit'] || capabilities.context_window_for(model_id),
26
25
  max_output_tokens: model_data['outputTokenLimit'] || capabilities.max_tokens_for(model_id),
27
26
  modalities: capabilities.modalities_for(model_id),
@@ -35,7 +35,21 @@ module RubyLLM
35
35
  return nil unless parts
36
36
 
37
37
  text_parts = parts.select { |p| p['text'] }
38
- text_parts.map { |p| p['text'] }.join if text_parts.any?
38
+ image_parts = parts.select { |p| p['inlineData'] }
39
+
40
+ content = RubyLLM::Content.new(text_parts.map { |p| p['text'] }.join)
41
+
42
+ image_parts.map do |p|
43
+ content.attach(
44
+ ImageAttachment.new(
45
+ data: p['inlineData']['data'],
46
+ mime_type: p['inlineData']['mimeType'],
47
+ model_id: data['modelVersion']
48
+ )
49
+ )
50
+ end
51
+
52
+ content
39
53
  end
40
54
 
41
55
  def extract_input_tokens(data)
@@ -5,7 +5,6 @@ module RubyLLM
5
5
  class Gemini
6
6
  # Tools methods for the Gemini API implementation
7
7
  module Tools
8
- # Format tools for Gemini API
9
8
  def format_tools(tools)
10
9
  return [] if tools.empty?
11
10
 
@@ -14,7 +13,6 @@ module RubyLLM
14
13
  }]
15
14
  end
16
15
 
17
- # Extract tool calls from response data
18
16
  def extract_tool_calls(data)
19
17
  return nil unless data
20
18
 
@@ -43,7 +41,6 @@ module RubyLLM
43
41
 
44
42
  private
45
43
 
46
- # Format a single tool for Gemini API
47
44
  def function_declaration_for(tool)
48
45
  {
49
46
  name: tool.name,
@@ -52,7 +49,6 @@ module RubyLLM
52
49
  }.compact
53
50
  end
54
51
 
55
- # Format tool parameters for Gemini API
56
52
  def format_parameters(parameters)
57
53
  {
58
54
  type: 'OBJECT',
@@ -66,7 +62,6 @@ module RubyLLM
66
62
  }
67
63
  end
68
64
 
69
- # Convert RubyLLM param types to Gemini API types
70
65
  def param_type_for_gemini(type)
71
66
  case type.to_s.downcase
72
67
  when 'integer', 'number', 'float' then 'NUMBER'
@@ -7,8 +7,18 @@ module RubyLLM
7
7
  module Chat
8
8
  module_function
9
9
 
10
+ def format_messages(messages)
11
+ messages.map do |msg|
12
+ {
13
+ role: format_role(msg.role),
14
+ content: GPUStack::Media.format_content(msg.content),
15
+ tool_calls: format_tool_calls(msg.tool_calls),
16
+ tool_call_id: msg.tool_call_id
17
+ }.compact
18
+ end
19
+ end
20
+
10
21
  def format_role(role)
11
- # GPUStack doesn't use the new OpenAI convention for system prompts
12
22
  role.to_s
13
23
  end
14
24
  end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ class GPUStack
6
+ # Handles formatting of media content (images, audio) for GPUStack APIs
7
+ module Media
8
+ extend OpenAI::Media
9
+
10
+ module_function
11
+
12
+ def format_content(content)
13
+ return content.to_json if content.is_a?(Hash) || content.is_a?(Array)
14
+ return content unless content.is_a?(Content)
15
+
16
+ parts = []
17
+ parts << format_text(content.text) if content.text
18
+
19
+ content.attachments.each do |attachment|
20
+ case attachment.type
21
+ when :image
22
+ parts << GPUStack::Media.format_image(attachment)
23
+ when :text
24
+ parts << format_text_file(attachment)
25
+ else
26
+ raise UnsupportedAttachmentError, attachment.mime_type
27
+ end
28
+ end
29
+
30
+ parts
31
+ end
32
+
33
+ def format_image(image)
34
+ {
35
+ type: 'image_url',
36
+ image_url: {
37
+ url: "data:#{image.mime_type};base64,#{image.encoded}",
38
+ detail: 'auto'
39
+ }
40
+ }
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end