ruby_llm_community 0.0.6 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -3
  3. data/lib/generators/ruby_llm/install/templates/create_models_migration.rb.tt +34 -0
  4. data/lib/generators/ruby_llm/install/templates/initializer.rb.tt +5 -0
  5. data/lib/generators/ruby_llm/install/templates/model_model.rb.tt +6 -0
  6. data/lib/generators/ruby_llm/install_generator.rb +27 -2
  7. data/lib/ruby_llm/active_record/acts_as.rb +163 -24
  8. data/lib/ruby_llm/aliases.json +58 -5
  9. data/lib/ruby_llm/aliases.rb +7 -25
  10. data/lib/ruby_llm/chat.rb +10 -17
  11. data/lib/ruby_llm/configuration.rb +5 -12
  12. data/lib/ruby_llm/connection.rb +4 -4
  13. data/lib/ruby_llm/connection_multipart.rb +19 -0
  14. data/lib/ruby_llm/content.rb +5 -2
  15. data/lib/ruby_llm/embedding.rb +1 -2
  16. data/lib/ruby_llm/error.rb +0 -8
  17. data/lib/ruby_llm/image.rb +23 -8
  18. data/lib/ruby_llm/image_attachment.rb +21 -0
  19. data/lib/ruby_llm/message.rb +6 -6
  20. data/lib/ruby_llm/model/info.rb +12 -10
  21. data/lib/ruby_llm/model/pricing.rb +0 -3
  22. data/lib/ruby_llm/model/pricing_category.rb +0 -2
  23. data/lib/ruby_llm/model/pricing_tier.rb +0 -1
  24. data/lib/ruby_llm/models.json +2147 -470
  25. data/lib/ruby_llm/models.rb +65 -34
  26. data/lib/ruby_llm/provider.rb +8 -8
  27. data/lib/ruby_llm/providers/anthropic/capabilities.rb +1 -46
  28. data/lib/ruby_llm/providers/anthropic/chat.rb +2 -2
  29. data/lib/ruby_llm/providers/anthropic/media.rb +0 -1
  30. data/lib/ruby_llm/providers/anthropic/tools.rb +1 -2
  31. data/lib/ruby_llm/providers/anthropic.rb +1 -2
  32. data/lib/ruby_llm/providers/bedrock/chat.rb +2 -4
  33. data/lib/ruby_llm/providers/bedrock/media.rb +0 -1
  34. data/lib/ruby_llm/providers/bedrock/models.rb +0 -2
  35. data/lib/ruby_llm/providers/bedrock/streaming/base.rb +0 -12
  36. data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +0 -7
  37. data/lib/ruby_llm/providers/bedrock/streaming/message_processing.rb +0 -12
  38. data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +0 -12
  39. data/lib/ruby_llm/providers/bedrock/streaming/prelude_handling.rb +0 -13
  40. data/lib/ruby_llm/providers/bedrock/streaming.rb +0 -18
  41. data/lib/ruby_llm/providers/bedrock.rb +1 -2
  42. data/lib/ruby_llm/providers/deepseek/capabilities.rb +1 -2
  43. data/lib/ruby_llm/providers/deepseek/chat.rb +0 -1
  44. data/lib/ruby_llm/providers/gemini/capabilities.rb +28 -100
  45. data/lib/ruby_llm/providers/gemini/chat.rb +57 -29
  46. data/lib/ruby_llm/providers/gemini/embeddings.rb +0 -2
  47. data/lib/ruby_llm/providers/gemini/images.rb +1 -2
  48. data/lib/ruby_llm/providers/gemini/media.rb +0 -1
  49. data/lib/ruby_llm/providers/gemini/models.rb +1 -2
  50. data/lib/ruby_llm/providers/gemini/streaming.rb +15 -1
  51. data/lib/ruby_llm/providers/gemini/tools.rb +0 -5
  52. data/lib/ruby_llm/providers/gpustack/chat.rb +11 -1
  53. data/lib/ruby_llm/providers/gpustack/media.rb +45 -0
  54. data/lib/ruby_llm/providers/gpustack/models.rb +44 -9
  55. data/lib/ruby_llm/providers/gpustack.rb +1 -0
  56. data/lib/ruby_llm/providers/mistral/capabilities.rb +2 -10
  57. data/lib/ruby_llm/providers/mistral/chat.rb +0 -2
  58. data/lib/ruby_llm/providers/mistral/embeddings.rb +0 -3
  59. data/lib/ruby_llm/providers/mistral/models.rb +0 -1
  60. data/lib/ruby_llm/providers/ollama/chat.rb +0 -1
  61. data/lib/ruby_llm/providers/ollama/media.rb +1 -6
  62. data/lib/ruby_llm/providers/ollama/models.rb +36 -0
  63. data/lib/ruby_llm/providers/ollama.rb +1 -0
  64. data/lib/ruby_llm/providers/openai/capabilities.rb +3 -16
  65. data/lib/ruby_llm/providers/openai/chat.rb +1 -3
  66. data/lib/ruby_llm/providers/openai/embeddings.rb +0 -3
  67. data/lib/ruby_llm/providers/openai/images.rb +73 -3
  68. data/lib/ruby_llm/providers/openai/media.rb +0 -1
  69. data/lib/ruby_llm/providers/openai/response.rb +120 -29
  70. data/lib/ruby_llm/providers/openai/response_media.rb +2 -2
  71. data/lib/ruby_llm/providers/openai/streaming.rb +107 -47
  72. data/lib/ruby_llm/providers/openai/tools.rb +1 -1
  73. data/lib/ruby_llm/providers/openai.rb +1 -3
  74. data/lib/ruby_llm/providers/openai_base.rb +2 -2
  75. data/lib/ruby_llm/providers/openrouter/models.rb +1 -16
  76. data/lib/ruby_llm/providers/perplexity/capabilities.rb +0 -1
  77. data/lib/ruby_llm/providers/perplexity/chat.rb +0 -1
  78. data/lib/ruby_llm/providers/perplexity.rb +1 -5
  79. data/lib/ruby_llm/providers/vertexai/chat.rb +14 -0
  80. data/lib/ruby_llm/providers/vertexai/embeddings.rb +32 -0
  81. data/lib/ruby_llm/providers/vertexai/models.rb +130 -0
  82. data/lib/ruby_llm/providers/vertexai/streaming.rb +14 -0
  83. data/lib/ruby_llm/providers/vertexai.rb +55 -0
  84. data/lib/ruby_llm/railtie.rb +0 -1
  85. data/lib/ruby_llm/stream_accumulator.rb +72 -10
  86. data/lib/ruby_llm/streaming.rb +16 -25
  87. data/lib/ruby_llm/tool.rb +2 -19
  88. data/lib/ruby_llm/tool_call.rb +0 -9
  89. data/lib/ruby_llm/version.rb +1 -1
  90. data/lib/ruby_llm_community.rb +5 -3
  91. data/lib/tasks/models.rake +525 -0
  92. data/lib/tasks/release.rake +37 -2
  93. data/lib/tasks/vcr.rake +0 -7
  94. metadata +13 -4
  95. data/lib/tasks/aliases.rake +0 -235
  96. data/lib/tasks/models_docs.rake +0 -224
  97. data/lib/tasks/models_update.rake +0 -108
@@ -16,10 +16,10 @@ module RubyLLM
16
16
  items.map do |model|
17
17
  Model::Info.new(
18
18
  id: model['name'],
19
+ name: model['name'],
19
20
  created_at: model['created_at'] ? Time.parse(model['created_at']) : nil,
20
- display_name: "#{model['source']}/#{model['name']}",
21
21
  provider: slug,
22
- type: determine_model_type(model),
22
+ family: 'gpustack',
23
23
  metadata: {
24
24
  description: model['description'],
25
25
  source: model['source'],
@@ -30,13 +30,10 @@ module RubyLLM
30
30
  categories: model['categories']
31
31
  },
32
32
  context_window: model.dig('meta', 'n_ctx'),
33
- # Using context window as max tokens since it's not explicitly provided
34
- max_tokens: model.dig('meta', 'n_ctx'),
35
- supports_vision: model.dig('meta', 'support_vision') || false,
36
- supports_functions: model.dig('meta', 'support_tool_calls') || false,
37
- supports_json_mode: true, # Assuming all models support JSON mode
38
- input_price_per_million: 0.0, # Price information not available in new format
39
- output_price_per_million: 0.0 # Price information not available in new format
33
+ max_output_tokens: model.dig('meta', 'n_ctx'),
34
+ capabilities: build_capabilities(model),
35
+ modalities: build_modalities(model),
36
+ pricing: {}
40
37
  )
41
38
  end
42
39
  end
@@ -49,6 +46,44 @@ module RubyLLM
49
46
 
50
47
  'other'
51
48
  end
49
+
50
+ def build_capabilities(model) # rubocop:disable Metrics/PerceivedComplexity
51
+ capabilities = []
52
+
53
+ # Add streaming by default for LLM models
54
+ capabilities << 'streaming' if model['categories']&.include?('llm')
55
+
56
+ # Map GPUStack metadata to standard capabilities
57
+ capabilities << 'function_calling' if model.dig('meta', 'support_tool_calls')
58
+ capabilities << 'vision' if model.dig('meta', 'support_vision')
59
+ capabilities << 'reasoning' if model.dig('meta', 'support_reasoning')
60
+
61
+ # GPUStack models generally support structured output and json mode
62
+ capabilities << 'structured_output' if model['categories']&.include?('llm')
63
+ capabilities << 'json_mode' if model['categories']&.include?('llm')
64
+
65
+ capabilities
66
+ end
67
+
68
+ def build_modalities(model)
69
+ input_modalities = []
70
+ output_modalities = []
71
+
72
+ if model['categories']&.include?('llm')
73
+ input_modalities << 'text'
74
+ input_modalities << 'image' if model.dig('meta', 'support_vision')
75
+ input_modalities << 'audio' if model.dig('meta', 'support_audio')
76
+ output_modalities << 'text'
77
+ elsif model['categories']&.include?('embedding')
78
+ input_modalities << 'text'
79
+ output_modalities << 'embeddings'
80
+ end
81
+
82
+ {
83
+ input: input_modalities,
84
+ output: output_modalities
85
+ }
86
+ end
52
87
  end
53
88
  end
54
89
  end
@@ -6,6 +6,7 @@ module RubyLLM
6
6
  class GPUStack < OpenAIBase
7
7
  include GPUStack::Chat
8
8
  include GPUStack::Models
9
+ include GPUStack::Media
9
10
 
10
11
  def api_base
11
12
  @config.gpustack_api_base
@@ -8,22 +8,18 @@ module RubyLLM
8
8
  module_function
9
9
 
10
10
  def supports_streaming?(model_id)
11
- # All chat models support streaming, but not embedding/moderation/OCR/transcription
12
11
  !model_id.match?(/embed|moderation|ocr|transcriptions/)
13
12
  end
14
13
 
15
14
  def supports_tools?(model_id)
16
- # Most chat models support tools except embedding/moderation/OCR/voxtral/transcription
17
15
  !model_id.match?(/embed|moderation|ocr|voxtral|transcriptions|mistral-(tiny|small)-(2312|2402)/)
18
16
  end
19
17
 
20
18
  def supports_vision?(model_id)
21
- # Models with vision capabilities
22
19
  model_id.match?(/pixtral|mistral-small-(2503|2506)|mistral-medium/)
23
20
  end
24
21
 
25
22
  def supports_json_mode?(model_id)
26
- # Most chat models support JSON mode (structured output)
27
23
  !model_id.match?(/embed|moderation|ocr|voxtral|transcriptions/) && supports_tools?(model_id)
28
24
  end
29
25
 
@@ -58,11 +54,11 @@ module RubyLLM
58
54
  end
59
55
 
60
56
  def context_window_for(_model_id)
61
- 32_768 # Default for most Mistral models
57
+ 32_768
62
58
  end
63
59
 
64
60
  def max_tokens_for(_model_id)
65
- 8192 # Default for most Mistral models
61
+ 8192
66
62
  end
67
63
 
68
64
  def modalities_for(model_id)
@@ -97,7 +93,6 @@ module RubyLLM
97
93
  capabilities << 'structured_output' if supports_json_mode?(model_id)
98
94
  capabilities << 'vision' if supports_vision?(model_id)
99
95
 
100
- # Model-specific capabilities
101
96
  capabilities << 'reasoning' if model_id.match?(/magistral/)
102
97
  capabilities << 'batch' unless model_id.match?(/voxtral|ocr|embed|moderation/)
103
98
  capabilities << 'fine_tuning' if model_id.match?(/mistral-(small|medium|large)|devstral/)
@@ -117,12 +112,10 @@ module RubyLLM
117
112
 
118
113
  def release_date_for(model_id)
119
114
  case model_id
120
- # 2023 releases
121
115
  when 'open-mistral-7b', 'mistral-tiny' then '2023-09-27'
122
116
  when 'mistral-medium-2312', 'mistral-small-2312', 'mistral-small',
123
117
  'open-mixtral-8x7b', 'mistral-tiny-2312' then '2023-12-11'
124
118
 
125
- # 2024 releases
126
119
  when 'mistral-embed' then '2024-01-11'
127
120
  when 'mistral-large-2402', 'mistral-small-2402' then '2024-02-26'
128
121
  when 'open-mixtral-8x22b', 'open-mixtral-8x22b-2404' then '2024-04-17'
@@ -140,7 +133,6 @@ module RubyLLM
140
133
  when 'codestral-2411-rc5', 'mistral-moderation-2411', 'mistral-moderation-latest' then '2024-11-26'
141
134
  when 'codestral-2412' then '2024-12-17'
142
135
 
143
- # 2025 releases
144
136
  when 'mistral-small-2501' then '2025-01-13'
145
137
  when 'codestral-2501' then '2025-01-14'
146
138
  when 'mistral-saba-2502', 'mistral-saba-latest' then '2025-02-18'
@@ -8,14 +8,12 @@ module RubyLLM
8
8
  module_function
9
9
 
10
10
  def format_role(role)
11
- # Mistral doesn't use the new OpenAI convention for system prompts
12
11
  role.to_s
13
12
  end
14
13
 
15
14
  # rubocop:disable Metrics/ParameterLists
16
15
  def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, cache_prompts: {}) # rubocop:disable Metrics/ParameterLists
17
16
  payload = super
18
- # Mistral doesn't support stream_options
19
17
  payload.delete(:stream_options)
20
18
  payload
21
19
  end
@@ -12,7 +12,6 @@ module RubyLLM
12
12
  end
13
13
 
14
14
  def render_embedding_payload(text, model:, dimensions:) # rubocop:disable Lint/UnusedMethodArgument
15
- # Mistral doesn't support dimensions parameter
16
15
  {
17
16
  model: model,
18
17
  input: text
@@ -24,8 +23,6 @@ module RubyLLM
24
23
  input_tokens = data.dig('usage', 'prompt_tokens') || 0
25
24
  vectors = data['data'].map { |d| d['embedding'] }
26
25
 
27
- # If we only got one embedding AND the input was a single string (not an array),
28
- # return it as a single vector
29
26
  vectors = vectors.first if vectors.length == 1 && !text.is_a?(Array)
30
27
 
31
28
  Embedding.new(vectors:, model:, input_tokens:)
@@ -21,7 +21,6 @@ module RubyLLM
21
21
  Array(response.body['data']).map do |model_data|
22
22
  model_id = model_data['id']
23
23
 
24
- # Use fixed release date for Mistral models
25
24
  release_date = capabilities.release_date_for(model_id)
26
25
  created_at = release_date ? Time.parse(release_date) : nil
27
26
 
@@ -19,7 +19,6 @@ module RubyLLM
19
19
  end
20
20
 
21
21
  def format_role(role)
22
- # Ollama doesn't use the new OpenAI convention for system prompts
23
22
  role.to_s
24
23
  end
25
24
  end
@@ -3,14 +3,13 @@
3
3
  module RubyLLM
4
4
  module Providers
5
5
  class Ollama
6
- # Handles formatting of media content (images, audio) for OpenAI APIs
6
+ # Handles formatting of media content (images, audio) for Ollama APIs
7
7
  module Media
8
8
  extend OpenAI::Media
9
9
 
10
10
  module_function
11
11
 
12
12
  def format_content(content)
13
- # Convert Hash/Array back to JSON string for API
14
13
  return content.to_json if content.is_a?(Hash) || content.is_a?(Array)
15
14
  return content unless content.is_a?(Content)
16
15
 
@@ -21,10 +20,6 @@ module RubyLLM
21
20
  case attachment.type
22
21
  when :image
23
22
  parts << Ollama::Media.format_image(attachment)
24
- when :pdf
25
- parts << format_pdf(attachment)
26
- when :audio
27
- parts << format_audio(attachment)
28
23
  when :text
29
24
  parts << format_text_file(attachment)
30
25
  else
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ class Ollama
6
+ # Models methods for the Ollama API integration
7
+ module Models
8
+ def models_url
9
+ 'models'
10
+ end
11
+
12
+ def parse_list_models_response(response, slug, _capabilities)
13
+ data = response.body['data'] || []
14
+ data.map do |model|
15
+ Model::Info.new(
16
+ id: model['id'],
17
+ name: model['id'],
18
+ provider: slug,
19
+ family: 'ollama',
20
+ created_at: model['created'] ? Time.at(model['created']) : nil,
21
+ modalities: {
22
+ input: %w[text image],
23
+ output: %w[text]
24
+ },
25
+ capabilities: %w[streaming function_calling structured_output vision],
26
+ pricing: {},
27
+ metadata: {
28
+ owned_by: model['owned_by']
29
+ }
30
+ )
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -6,6 +6,7 @@ module RubyLLM
6
6
  class Ollama < OpenAIBase
7
7
  include Ollama::Chat
8
8
  include Ollama::Media
9
+ include Ollama::Models
9
10
 
10
11
  def api_base
11
12
  @config.ollama_api_base
@@ -10,6 +10,7 @@ module RubyLLM
10
10
  MODEL_PATTERNS = {
11
11
  dall_e: /^dall-e/,
12
12
  chatgpt4o: /^chatgpt-4o/,
13
+ gpt_image: /^gpt-image/,
13
14
  gpt41: /^gpt-4\.1(?!-(?:mini|nano))/,
14
15
  gpt41_mini: /^gpt-4\.1-mini/,
15
16
  gpt41_nano: /^gpt-4\.1-nano/,
@@ -105,6 +106,7 @@ module RubyLLM
105
106
  end
106
107
 
107
108
  PRICES = {
109
+ gpt_image_1: { input_text: 5.0, input_image: 10.0, output: 8.0, cached_input: 0.5 }, # rubocop:disable Naming/VariableNumber
108
110
  gpt41: { input: 2.0, output: 8.0, cached_input: 0.5 },
109
111
  gpt41_mini: { input: 0.4, output: 1.6, cached_input: 0.1 },
110
112
  gpt41_nano: { input: 0.1, output: 0.4 },
@@ -168,7 +170,7 @@ module RubyLLM
168
170
  when /embedding/ then 'embedding'
169
171
  when /^tts|whisper|gpt4o_(?:mini_)?(?:transcribe|tts)$/ then 'audio'
170
172
  when 'moderation' then 'moderation'
171
- when /dall/ then 'image'
173
+ when /dall-e|gpt-image/ then 'image'
172
174
  else 'chat'
173
175
  end
174
176
  end
@@ -235,20 +237,11 @@ module RubyLLM
235
237
 
236
238
  # Vision support
237
239
  modalities[:input] << 'image' if supports_vision?(model_id)
238
-
239
- # Audio support
240
240
  modalities[:input] << 'audio' if model_id.match?(/whisper|audio|tts|transcribe/)
241
-
242
- # PDF support
243
241
  modalities[:input] << 'pdf' if supports_vision?(model_id)
244
-
245
- # Output modalities
246
242
  modalities[:output] << 'audio' if model_id.match?(/tts|audio/)
247
-
248
243
  modalities[:output] << 'image' if model_id.match?(/dall-e|image/)
249
-
250
244
  modalities[:output] << 'embeddings' if model_id.match?(/embedding/)
251
-
252
245
  modalities[:output] << 'moderation' if model_id.match?(/moderation/)
253
246
 
254
247
  modalities
@@ -257,13 +250,10 @@ module RubyLLM
257
250
  def capabilities_for(model_id) # rubocop:disable Metrics/PerceivedComplexity
258
251
  capabilities = []
259
252
 
260
- # Common capabilities
261
253
  capabilities << 'streaming' unless model_id.match?(/moderation|embedding/)
262
254
  capabilities << 'function_calling' if supports_functions?(model_id)
263
255
  capabilities << 'structured_output' if supports_json_mode?(model_id)
264
256
  capabilities << 'batch' if model_id.match?(/embedding|batch/)
265
-
266
- # Advanced capabilities
267
257
  capabilities << 'reasoning' if model_id.match?(/o\d|gpt-5|codex/)
268
258
 
269
259
  if model_id.match?(/gpt-4-turbo|gpt-4o/)
@@ -281,16 +271,13 @@ module RubyLLM
281
271
  output_per_million: output_price_for(model_id)
282
272
  }
283
273
 
284
- # Add cached pricing if available
285
274
  if respond_to?(:cached_input_price_for)
286
275
  cached_price = cached_input_price_for(model_id)
287
276
  standard_pricing[:cached_input_per_million] = cached_price if cached_price
288
277
  end
289
278
 
290
- # Pricing structure
291
279
  pricing = { text_tokens: { standard: standard_pricing } }
292
280
 
293
- # Add batch pricing if applicable
294
281
  if model_id.match?(/embedding|batch/)
295
282
  pricing[:text_tokens][:batch] = {
296
283
  input_per_million: standard_pricing[:input_per_million] * 0.5,
@@ -13,18 +13,16 @@ module RubyLLM
13
13
 
14
14
  def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, cache_prompts: {}) # rubocop:disable Lint/UnusedMethodArgument, Metrics/ParameterLists
15
15
  payload = {
16
- model: model,
16
+ model: model.id,
17
17
  messages: format_messages(messages),
18
18
  stream: stream
19
19
  }
20
20
 
21
- # Only include temperature if it's not nil (some models don't accept it)
22
21
  payload[:temperature] = temperature unless temperature.nil?
23
22
 
24
23
  payload[:tools] = tools.map { |_, tool| chat_tool_for(tool) } if tools.any?
25
24
 
26
25
  if schema
27
- # Use strict mode from schema if specified, default to true
28
26
  strict = schema[:strict] != false
29
27
 
30
28
  payload[:response_format] = {
@@ -23,9 +23,6 @@ module RubyLLM
23
23
  data = response.body
24
24
  input_tokens = data.dig('usage', 'prompt_tokens') || 0
25
25
  vectors = data['data'].map { |d| d['embedding'] }
26
-
27
- # If we only got one embedding AND the input was a single string (not an array),
28
- # return it as a single vector
29
26
  vectors = vectors.first if vectors.length == 1 && !text.is_a?(Array)
30
27
 
31
28
  Embedding.new(vectors:, model:, input_tokens:)
@@ -5,13 +5,49 @@ module RubyLLM
5
5
  class OpenAI
6
6
  # Image generation methods for the OpenAI API integration
7
7
  module Images
8
+ def paint(prompt, model:, size:, with:, params:)
9
+ @operation = with.nil? ? :generation : :editing
10
+ @connection = connection_multipart(@connection.config) if editing? && !multipart_middleware?(@connection)
11
+ super
12
+ end
13
+
14
+ private
15
+
16
+ def editing?
17
+ @operation == :editing
18
+ end
19
+
20
+ def generating?
21
+ @operation == :generation
22
+ end
23
+
24
+ def multipart_middleware?(connection)
25
+ connection.connection.builder.handlers.include?(Faraday::Multipart::Middleware)
26
+ end
27
+
8
28
  module_function
9
29
 
10
30
  def images_url
31
+ generating? ? generation_url : edits_url
32
+ end
33
+
34
+ def generation_url
11
35
  'images/generations'
12
36
  end
13
37
 
14
- def render_image_payload(prompt, model:, size:)
38
+ def edits_url
39
+ 'images/edits'
40
+ end
41
+
42
+ def render_image_payload(prompt, model:, size:, with:, params:)
43
+ if generating?
44
+ render_generation_payload(prompt, model:, size:)
45
+ else
46
+ render_edit_payload(prompt, model:, with:, params:)
47
+ end
48
+ end
49
+
50
+ def render_generation_payload(prompt, model:, size:)
15
51
  {
16
52
  model: model,
17
53
  prompt: prompt,
@@ -20,16 +56,50 @@ module RubyLLM
20
56
  }
21
57
  end
22
58
 
59
+ def render_edit_payload(prompt, model:, with:, params:)
60
+ content = Content.new(prompt, with)
61
+ params[:image] = []
62
+ content.attachments.each do |attachment|
63
+ params[:image] << Faraday::UploadIO.new(StringIO.new(attachment.content), attachment.mime_type,
64
+ attachment.filename)
65
+ end
66
+ params.merge({
67
+ model:,
68
+ prompt: content.text,
69
+ n: 1
70
+ })
71
+ end
72
+
23
73
  def parse_image_response(response, model:)
74
+ if generating?
75
+ parse_generation_response(response, model:)
76
+ else
77
+ parse_edit_response(response, model:)
78
+ end
79
+ end
80
+
81
+ def parse_generation_response(response, model:)
24
82
  data = response.body
25
83
  image_data = data['data'].first
26
84
 
27
85
  Image.new(
28
86
  url: image_data['url'],
29
- mime_type: 'image/png', # DALL-E typically returns PNGs
87
+ mime_type: 'image/png',
30
88
  revised_prompt: image_data['revised_prompt'],
31
89
  model_id: model,
32
- data: image_data['b64_json']
90
+ data: image_data['b64_json'],
91
+ usage: data['usage']
92
+ )
93
+ end
94
+
95
+ def parse_edit_response(response, model:)
96
+ data = response.body
97
+ image_data = data['data'].first
98
+ Image.new(
99
+ mime_type: 'image/png',
100
+ model_id: model,
101
+ data: image_data['b64_json'],
102
+ usage: data['usage']
33
103
  )
34
104
  end
35
105
  end
@@ -8,7 +8,6 @@ module RubyLLM
8
8
  module_function
9
9
 
10
10
  def format_content(content)
11
- # Convert Hash/Array back to JSON string for API
12
11
  return content.to_json if content.is_a?(Hash) || content.is_a?(Array)
13
12
  return content unless content.is_a?(Content)
14
13