ruby_llm_community 1.1.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +25 -7
  3. data/lib/generators/ruby_llm/chat_ui/chat_ui_generator.rb +127 -67
  4. data/lib/generators/ruby_llm/chat_ui/templates/controllers/chats_controller.rb.tt +12 -12
  5. data/lib/generators/ruby_llm/chat_ui/templates/controllers/messages_controller.rb.tt +7 -7
  6. data/lib/generators/ruby_llm/chat_ui/templates/controllers/models_controller.rb.tt +4 -4
  7. data/lib/generators/ruby_llm/chat_ui/templates/jobs/chat_response_job.rb.tt +6 -6
  8. data/lib/generators/ruby_llm/chat_ui/templates/views/chats/_chat.html.erb.tt +4 -4
  9. data/lib/generators/ruby_llm/chat_ui/templates/views/chats/_form.html.erb.tt +5 -5
  10. data/lib/generators/ruby_llm/chat_ui/templates/views/chats/index.html.erb.tt +5 -5
  11. data/lib/generators/ruby_llm/chat_ui/templates/views/chats/new.html.erb.tt +4 -4
  12. data/lib/generators/ruby_llm/chat_ui/templates/views/chats/show.html.erb.tt +8 -8
  13. data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_content.html.erb.tt +1 -0
  14. data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_form.html.erb.tt +5 -5
  15. data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_message.html.erb.tt +9 -6
  16. data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_tool_calls.html.erb.tt +7 -0
  17. data/lib/generators/ruby_llm/chat_ui/templates/views/messages/create.turbo_stream.erb.tt +5 -5
  18. data/lib/generators/ruby_llm/chat_ui/templates/views/models/_model.html.erb.tt +9 -9
  19. data/lib/generators/ruby_llm/chat_ui/templates/views/models/index.html.erb.tt +4 -6
  20. data/lib/generators/ruby_llm/chat_ui/templates/views/models/show.html.erb.tt +11 -11
  21. data/lib/generators/ruby_llm/generator_helpers.rb +131 -87
  22. data/lib/generators/ruby_llm/install/install_generator.rb +75 -73
  23. data/lib/generators/ruby_llm/install/templates/add_references_to_chats_tool_calls_and_messages_migration.rb.tt +9 -0
  24. data/lib/generators/ruby_llm/install/templates/create_chats_migration.rb.tt +0 -1
  25. data/lib/generators/ruby_llm/install/templates/create_messages_migration.rb.tt +3 -3
  26. data/lib/generators/ruby_llm/install/templates/create_tool_calls_migration.rb.tt +0 -1
  27. data/lib/generators/ruby_llm/install/templates/initializer.rb.tt +1 -1
  28. data/lib/generators/ruby_llm/upgrade_to_v1_7/upgrade_to_v1_7_generator.rb +88 -85
  29. data/lib/generators/ruby_llm/upgrade_to_v1_9/templates/add_v1_9_message_columns.rb.tt +15 -0
  30. data/lib/generators/ruby_llm/upgrade_to_v1_9/upgrade_to_v1_9_generator.rb +49 -0
  31. data/lib/ruby_llm/active_record/acts_as.rb +17 -8
  32. data/lib/ruby_llm/active_record/chat_methods.rb +41 -13
  33. data/lib/ruby_llm/active_record/message_methods.rb +11 -2
  34. data/lib/ruby_llm/active_record/model_methods.rb +1 -1
  35. data/lib/ruby_llm/aliases.json +46 -20
  36. data/lib/ruby_llm/attachment.rb +8 -0
  37. data/lib/ruby_llm/chat.rb +13 -2
  38. data/lib/ruby_llm/configuration.rb +10 -1
  39. data/lib/ruby_llm/connection.rb +4 -4
  40. data/lib/ruby_llm/content.rb +23 -0
  41. data/lib/ruby_llm/message.rb +17 -9
  42. data/lib/ruby_llm/model/info.rb +4 -0
  43. data/lib/ruby_llm/models.json +12050 -9940
  44. data/lib/ruby_llm/models.rb +21 -25
  45. data/lib/ruby_llm/moderation.rb +56 -0
  46. data/lib/ruby_llm/provider.rb +29 -1
  47. data/lib/ruby_llm/providers/anthropic/chat.rb +18 -5
  48. data/lib/ruby_llm/providers/anthropic/content.rb +44 -0
  49. data/lib/ruby_llm/providers/anthropic/media.rb +5 -4
  50. data/lib/ruby_llm/providers/anthropic/models.rb +9 -2
  51. data/lib/ruby_llm/providers/anthropic/tools.rb +20 -18
  52. data/lib/ruby_llm/providers/bedrock/media.rb +2 -1
  53. data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +9 -2
  54. data/lib/ruby_llm/providers/gemini/chat.rb +353 -72
  55. data/lib/ruby_llm/providers/gemini/media.rb +59 -1
  56. data/lib/ruby_llm/providers/gemini/tools.rb +146 -25
  57. data/lib/ruby_llm/providers/gemini/transcription.rb +116 -0
  58. data/lib/ruby_llm/providers/gemini.rb +2 -1
  59. data/lib/ruby_llm/providers/gpustack/media.rb +1 -0
  60. data/lib/ruby_llm/providers/ollama/media.rb +1 -0
  61. data/lib/ruby_llm/providers/openai/capabilities.rb +15 -7
  62. data/lib/ruby_llm/providers/openai/chat.rb +7 -3
  63. data/lib/ruby_llm/providers/openai/media.rb +2 -1
  64. data/lib/ruby_llm/providers/openai/moderation.rb +34 -0
  65. data/lib/ruby_llm/providers/openai/streaming.rb +7 -3
  66. data/lib/ruby_llm/providers/openai/tools.rb +34 -12
  67. data/lib/ruby_llm/providers/openai/transcription.rb +70 -0
  68. data/lib/ruby_llm/providers/openai_base.rb +2 -0
  69. data/lib/ruby_llm/providers/red_candle/capabilities.rb +124 -0
  70. data/lib/ruby_llm/providers/red_candle/chat.rb +317 -0
  71. data/lib/ruby_llm/providers/red_candle/models.rb +121 -0
  72. data/lib/ruby_llm/providers/red_candle/streaming.rb +40 -0
  73. data/lib/ruby_llm/providers/red_candle.rb +90 -0
  74. data/lib/ruby_llm/providers/vertexai/transcription.rb +16 -0
  75. data/lib/ruby_llm/providers/vertexai.rb +3 -0
  76. data/lib/ruby_llm/railtie.rb +1 -1
  77. data/lib/ruby_llm/stream_accumulator.rb +8 -12
  78. data/lib/ruby_llm/tool.rb +126 -0
  79. data/lib/ruby_llm/transcription.rb +35 -0
  80. data/lib/ruby_llm/utils.rb +46 -0
  81. data/lib/ruby_llm/version.rb +1 -1
  82. data/lib/ruby_llm_community.rb +38 -1
  83. metadata +35 -3
@@ -0,0 +1,124 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ class RedCandle
6
+ # Determines capabilities and pricing for RedCandle models
7
+ module Capabilities
8
+ module_function
9
+
10
+ def supports_vision?
11
+ false
12
+ end
13
+
14
+ def supports_functions?(_model_id = nil)
15
+ false
16
+ end
17
+
18
+ def supports_streaming?
19
+ true
20
+ end
21
+
22
+ def supports_structured_output?
23
+ true
24
+ end
25
+
26
+ def supports_regex_constraints?
27
+ true
28
+ end
29
+
30
+ def supports_embeddings?
31
+ false # Future enhancement - Red Candle does support embedding models
32
+ end
33
+
34
+ def supports_audio?
35
+ false
36
+ end
37
+
38
+ def supports_pdf?
39
+ false
40
+ end
41
+
42
+ def normalize_temperature(temperature, _model_id)
43
+ # Red Candle uses standard 0-2 range
44
+ return 0.7 if temperature.nil?
45
+
46
+ temperature = temperature.to_f
47
+ temperature.clamp(0.0, 2.0)
48
+ end
49
+
50
+ def model_context_window(model_id)
51
+ case model_id
52
+ when /gemma-3-4b/i
53
+ 8192
54
+ when /qwen2\.5-1\.5b/i, /mistral-7b/i
55
+ 32_768
56
+ when /tinyllama/i
57
+ 2048
58
+ else
59
+ 4096 # Conservative default
60
+ end
61
+ end
62
+
63
+ def pricing
64
+ # Local execution - no API costs
65
+ {
66
+ input_tokens_per_dollar: Float::INFINITY,
67
+ output_tokens_per_dollar: Float::INFINITY,
68
+ input_price_per_million_tokens: 0.0,
69
+ output_price_per_million_tokens: 0.0
70
+ }
71
+ end
72
+
73
+ def default_max_tokens
74
+ 512
75
+ end
76
+
77
+ def max_temperature
78
+ 2.0
79
+ end
80
+
81
+ def min_temperature
82
+ 0.0
83
+ end
84
+
85
+ def supports_temperature?
86
+ true
87
+ end
88
+
89
+ def supports_top_p?
90
+ true
91
+ end
92
+
93
+ def supports_top_k?
94
+ true
95
+ end
96
+
97
+ def supports_repetition_penalty?
98
+ true
99
+ end
100
+
101
+ def supports_seed?
102
+ true
103
+ end
104
+
105
+ def supports_stop_sequences?
106
+ true
107
+ end
108
+
109
+ def model_families
110
+ %w[gemma llama qwen2 mistral phi]
111
+ end
112
+
113
+ def available_on_platform?
114
+ # Check if Candle can be loaded
115
+
116
+ require 'candle'
117
+ true
118
+ rescue LoadError
119
+ false
120
+ end
121
+ end
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,317 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ class RedCandle
6
+ # Chat implementation for Red Candle provider
7
+ module Chat
8
+ # Override the base complete method to handle local execution
9
+ def complete(messages, tools:, temperature:, cache_prompts:, model:, params: {}, headers: {}, schema: nil, &) # rubocop:disable Metrics/ParameterLists
10
+ _ = headers # Interface compatibility
11
+ _ = cache_prompts # Interface compatibility
12
+ payload = Utils.deep_merge(
13
+ render_payload(
14
+ messages,
15
+ tools: tools,
16
+ temperature: temperature,
17
+ model: model,
18
+ stream: block_given?,
19
+ schema: schema
20
+ ),
21
+ params
22
+ )
23
+
24
+ if block_given?
25
+ perform_streaming_completion!(payload, &)
26
+ else
27
+ result = perform_completion!(payload)
28
+ # Convert to Message object for compatibility
29
+ # Red Candle doesn't provide token counts by default, but we can estimate them
30
+ content = result[:content]
31
+ # Rough estimation: ~4 characters per token
32
+ estimated_output_tokens = (content.length / 4.0).round
33
+ estimated_input_tokens = estimate_input_tokens(payload[:messages])
34
+
35
+ Message.new(
36
+ role: result[:role].to_sym,
37
+ content: content,
38
+ model_id: model.id,
39
+ input_tokens: estimated_input_tokens,
40
+ output_tokens: estimated_output_tokens
41
+ )
42
+ end
43
+ end
44
+
45
+ def render_payload(messages, tools:, temperature:, model:, stream:, schema:) # rubocop:disable Metrics/ParameterLists
46
+ # Red Candle doesn't support tools
47
+ raise Error.new(nil, 'Red Candle provider does not support tool calling') if tools && !tools.empty?
48
+
49
+ {
50
+ messages: messages,
51
+ temperature: temperature,
52
+ model: model.id,
53
+ stream: stream,
54
+ schema: schema
55
+ }
56
+ end
57
+
58
+ def perform_completion!(payload)
59
+ model = ensure_model_loaded!(payload[:model])
60
+ messages = format_messages(payload[:messages])
61
+
62
+ # Apply chat template if available
63
+ prompt = if model.respond_to?(:apply_chat_template)
64
+ model.apply_chat_template(messages)
65
+ else
66
+ # Fallback to simple formatting
67
+ "#{messages.map { |m| "#{m[:role]}: #{m[:content]}" }.join("\n\n")}\n\nassistant:"
68
+ end
69
+
70
+ # Check context length
71
+ validate_context_length!(prompt, payload[:model])
72
+
73
+ # Configure generation
74
+ config_opts = {
75
+ temperature: payload[:temperature] || 0.7,
76
+ max_length: payload[:max_tokens] || 512
77
+ }
78
+
79
+ # Handle structured generation if schema provided
80
+ response = if payload[:schema]
81
+ generate_with_schema(model, prompt, payload[:schema], config_opts)
82
+ else
83
+ model.generate(
84
+ prompt,
85
+ config: ::Candle::GenerationConfig.balanced(**config_opts)
86
+ )
87
+ end
88
+
89
+ format_response(response, payload[:schema])
90
+ end
91
+
92
+ def perform_streaming_completion!(payload, &block)
93
+ model = ensure_model_loaded!(payload[:model])
94
+ messages = format_messages(payload[:messages])
95
+
96
+ # Apply chat template if available
97
+ prompt = if model.respond_to?(:apply_chat_template)
98
+ model.apply_chat_template(messages)
99
+ else
100
+ "#{messages.map { |m| "#{m[:role]}: #{m[:content]}" }.join("\n\n")}\n\nassistant:"
101
+ end
102
+
103
+ # Check context length
104
+ validate_context_length!(prompt, payload[:model])
105
+
106
+ # Configure generation
107
+ config = ::Candle::GenerationConfig.balanced(
108
+ temperature: payload[:temperature] || 0.7,
109
+ max_length: payload[:max_tokens] || 512
110
+ )
111
+
112
+ # Collect all streamed content
113
+ full_content = ''
114
+
115
+ # Stream tokens
116
+ model.generate_stream(prompt, config: config) do |token|
117
+ full_content += token
118
+ chunk = format_stream_chunk(token)
119
+ block.call(chunk)
120
+ end
121
+
122
+ # Send final chunk with empty content (indicates completion)
123
+ final_chunk = format_stream_chunk('')
124
+ block.call(final_chunk)
125
+
126
+ # Return a Message object with the complete response
127
+ estimated_output_tokens = (full_content.length / 4.0).round
128
+ estimated_input_tokens = estimate_input_tokens(payload[:messages])
129
+
130
+ Message.new(
131
+ role: :assistant,
132
+ content: full_content,
133
+ model_id: payload[:model],
134
+ input_tokens: estimated_input_tokens,
135
+ output_tokens: estimated_output_tokens
136
+ )
137
+ end
138
+
139
+ private
140
+
141
+ def ensure_model_loaded!(model_id)
142
+ @loaded_models[model_id] ||= load_model(model_id)
143
+ end
144
+
145
+ def model_options(model_id)
146
+ # Get GGUF file and tokenizer if this is a GGUF model
147
+ # Access the methods from the Models module which is included in the provider
148
+ options = { device: @device }
149
+ options[:gguf_file] = gguf_file_for(model_id) if respond_to?(:gguf_file_for)
150
+ options[:tokenizer] = tokenizer_for(model_id) if respond_to?(:tokenizer_for)
151
+ options
152
+ end
153
+
154
+ def load_model(model_id)
155
+ options = model_options(model_id)
156
+ ::Candle::LLM.from_pretrained(model_id, **options)
157
+ rescue StandardError => e
158
+ if e.message.include?('Failed to find tokenizer')
159
+ raise Error.new(nil, token_error_message(e, options[:tokenizer]))
160
+ elsif e.message.include?('Failed to find model')
161
+ raise Error.new(nil, model_error_message(e, model_id))
162
+ else
163
+ raise Error.new(nil, "Failed to load model #{model_id}: #{e.message}")
164
+ end
165
+ end
166
+
167
+ def token_error_message(exception, tokenizer)
168
+ <<~ERROR_MESSAGE
169
+ Failed to load tokenizer '#{tokenizer}'. The tokenizer may not exist or require authentication.
170
+ Please verify the tokenizer exists at: https://huggingface.co/#{tokenizer}
171
+ And that you have accepted the terms of service for the tokenizer.
172
+ If it requires authentication, login with: huggingface-cli login
173
+ See https://github.com/scientist-labs/red-candle?tab=readme-ov-file#%EF%B8%8F-huggingface-login-warning
174
+ Original error: #{exception.message}"
175
+ ERROR_MESSAGE
176
+ end
177
+
178
+ def model_error_message(exception, model_id)
179
+ <<~ERROR_MESSAGE
180
+ Failed to load model #{model_id}: #{exception.message}
181
+ Please verify the model exists at: https://huggingface.co/#{model_id}
182
+ And that you have accepted the terms of service for the model.
183
+ If it requires authentication, login with: huggingface-cli login
184
+ See https://github.com/scientist-labs/red-candle?tab=readme-ov-file#%EF%B8%8F-huggingface-login-warning
185
+ Original error: #{exception.message}"
186
+ ERROR_MESSAGE
187
+ end
188
+
189
+ def format_messages(messages)
190
+ messages.map do |msg|
191
+ # Handle both hash and Message objects
192
+ if msg.is_a?(Message)
193
+ {
194
+ role: msg.role.to_s,
195
+ content: extract_message_content_from_object(msg)
196
+ }
197
+ else
198
+ {
199
+ role: msg[:role].to_s,
200
+ content: extract_message_content(msg)
201
+ }
202
+ end
203
+ end
204
+ end
205
+
206
+ def extract_message_content_from_object(message)
207
+ content = message.content
208
+
209
+ # Handle Content objects
210
+ if content.is_a?(Content)
211
+ # Extract text from Content object, including attachment text
212
+ handle_content_object(content)
213
+ elsif content.is_a?(String)
214
+ content
215
+ else
216
+ content.to_s
217
+ end
218
+ end
219
+
220
+ def extract_message_content(message)
221
+ content = message[:content]
222
+
223
+ # Handle Content objects
224
+ case content
225
+ when Content
226
+ # Extract text from Content object
227
+ handle_content_object(content)
228
+ when String
229
+ content
230
+ when Array
231
+ # Handle array content (e.g., with images)
232
+ content.filter_map { |part| part[:text] if part[:type] == 'text' }.join(' ')
233
+ else
234
+ content.to_s
235
+ end
236
+ end
237
+
238
+ def handle_content_object(content)
239
+ text_parts = []
240
+ text_parts << content.text if content.text
241
+
242
+ # Add any text from attachments
243
+ content.attachments&.each do |attachment|
244
+ text_parts << attachment.data if attachment.respond_to?(:data) && attachment.data.is_a?(String)
245
+ end
246
+
247
+ text_parts.join(' ')
248
+ end
249
+
250
+ def generate_with_schema(model, prompt, schema, config_opts)
251
+ model.generate_structured(
252
+ prompt,
253
+ schema: schema,
254
+ **config_opts
255
+ )
256
+ rescue StandardError => e
257
+ RubyLLM.logger.warn "Structured generation failed: #{e.message}. Falling back to regular generation."
258
+ model.generate(
259
+ prompt,
260
+ config: ::Candle::GenerationConfig.balanced(**config_opts)
261
+ )
262
+ end
263
+
264
+ def format_response(response, schema)
265
+ content = if schema && !response.is_a?(String)
266
+ # Structured response
267
+ JSON.generate(response)
268
+ else
269
+ response
270
+ end
271
+
272
+ {
273
+ content: content,
274
+ role: 'assistant'
275
+ }
276
+ end
277
+
278
+ def format_stream_chunk(token)
279
+ # Return a Chunk object for streaming compatibility
280
+ Chunk.new(
281
+ role: :assistant,
282
+ content: token
283
+ )
284
+ end
285
+
286
+ def estimate_input_tokens(messages)
287
+ # Rough estimation: ~4 characters per token
288
+ formatted = format_messages(messages)
289
+ total_chars = formatted.sum { |msg| "#{msg[:role]}: #{msg[:content]}".length }
290
+ (total_chars / 4.0).round
291
+ end
292
+
293
+ def validate_context_length!(prompt, model_id)
294
+ # Get the context window for this model
295
+ context_window = if respond_to?(:model_context_window)
296
+ model_context_window(model_id)
297
+ else
298
+ 4096 # Conservative default
299
+ end
300
+
301
+ # Estimate tokens in prompt (~4 characters per token)
302
+ estimated_tokens = (prompt.length / 4.0).round
303
+
304
+ # Check if prompt exceeds context window (leave some room for response)
305
+ max_input_tokens = context_window - 512 # Reserve 512 tokens for response
306
+ return unless estimated_tokens > max_input_tokens
307
+
308
+ raise Error.new(
309
+ nil,
310
+ "Context length exceeded. Estimated #{estimated_tokens} tokens, " \
311
+ "but model #{model_id} has a context window of #{context_window} tokens."
312
+ )
313
+ end
314
+ end
315
+ end
316
+ end
317
+ end
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ class RedCandle
6
+ # Models methods of the RedCandle integration
7
+ module Models
8
+ # TODO: red-candle supports more models, but let's start with some well tested ones.
9
+ SUPPORTED_MODELS = [
10
+ {
11
+ id: 'google/gemma-3-4b-it-qat-q4_0-gguf',
12
+ name: 'Gemma 3 4B Instruct (Quantized)',
13
+ gguf_file: 'gemma-3-4b-it-q4_0.gguf',
14
+ tokenizer: 'google/gemma-3-4b-it', # Tokenizer from base model
15
+ context_window: 8192,
16
+ family: 'gemma',
17
+ architecture: 'gemma2',
18
+ supports_chat: true,
19
+ supports_structured: true
20
+ },
21
+ {
22
+ id: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF',
23
+ name: 'TinyLlama 1.1B Chat (Quantized)',
24
+ gguf_file: 'tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf',
25
+ context_window: 2048,
26
+ family: 'llama',
27
+ architecture: 'llama',
28
+ supports_chat: true,
29
+ supports_structured: true
30
+ },
31
+ {
32
+ id: 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF',
33
+ name: 'Mistral 7B Instruct v0.2 (Quantized)',
34
+ gguf_file: 'mistral-7b-instruct-v0.2.Q4_K_M.gguf',
35
+ tokenizer: 'mistralai/Mistral-7B-Instruct-v0.2',
36
+ context_window: 32_768,
37
+ family: 'mistral',
38
+ architecture: 'mistral',
39
+ supports_chat: true,
40
+ supports_structured: true
41
+ },
42
+ {
43
+ id: 'Qwen/Qwen2.5-1.5B-Instruct-GGUF',
44
+ name: 'Qwen 2.1.5B Instruct (Quantized)',
45
+ gguf_file: 'qwen2.5-1.5b-instruct-q4_k_m.gguf',
46
+ context_window: 32_768,
47
+ family: 'qwen2',
48
+ architecture: 'qwen2',
49
+ supports_chat: true,
50
+ supports_structured: true
51
+ },
52
+ {
53
+ id: 'microsoft/Phi-3-mini-4k-instruct',
54
+ name: 'Phi 3',
55
+ context_window: 4096,
56
+ family: 'phi',
57
+ architecture: 'phi',
58
+ supports_chat: true,
59
+ supports_structured: true
60
+ }
61
+ ].freeze
62
+
63
+ def list_models
64
+ SUPPORTED_MODELS.map do |model_data|
65
+ Model::Info.new(
66
+ id: model_data[:id],
67
+ name: model_data[:name],
68
+ provider: slug,
69
+ family: model_data[:family],
70
+ context_window: model_data[:context_window],
71
+ capabilities: %w[streaming structured_output],
72
+ modalities: { input: %w[text], output: %w[text] }
73
+ )
74
+ end
75
+ end
76
+
77
+ def models
78
+ @models ||= list_models
79
+ end
80
+
81
+ def model(id)
82
+ models.find { |m| m.id == id } ||
83
+ raise(Error.new(nil,
84
+ "Model #{id} not found in Red Candle provider. Available models: #{model_ids.join(', ')}"))
85
+ end
86
+
87
+ def model_available?(id)
88
+ SUPPORTED_MODELS.any? { |m| m[:id] == id }
89
+ end
90
+
91
+ def model_ids
92
+ SUPPORTED_MODELS.map { |m| m[:id] }
93
+ end
94
+
95
+ def model_info(id)
96
+ SUPPORTED_MODELS.find { |m| m[:id] == id }
97
+ end
98
+
99
+ def supports_chat?(model_id)
100
+ info = model_info(model_id)
101
+ info ? info[:supports_chat] : false
102
+ end
103
+
104
+ def supports_structured?(model_id)
105
+ info = model_info(model_id)
106
+ info ? info[:supports_structured] : false
107
+ end
108
+
109
+ def gguf_file_for(model_id)
110
+ info = model_info(model_id)
111
+ info ? info[:gguf_file] : nil
112
+ end
113
+
114
+ def tokenizer_for(model_id)
115
+ info = model_info(model_id)
116
+ info ? info[:tokenizer] : nil
117
+ end
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ class RedCandle
6
+ # Streaming methods of the RedCandle integration
7
+ module Streaming
8
+ def stream(payload, &block)
9
+ if payload[:stream]
10
+ perform_streaming_completion!(payload, &block)
11
+ else
12
+ # Non-streaming fallback
13
+ result = perform_completion!(payload)
14
+ # Yield the complete result as a single chunk
15
+ chunk = {
16
+ content: result[:content],
17
+ role: result[:role],
18
+ finish_reason: result[:finish_reason]
19
+ }
20
+ block.call(chunk)
21
+ end
22
+ end
23
+
24
+ private
25
+
26
+ def stream_processor
27
+ # Red Candle handles streaming internally through blocks
28
+ # This method is here for compatibility with the base streaming interface
29
+ nil
30
+ end
31
+
32
+ def process_stream_response(response)
33
+ # Red Candle doesn't use HTTP responses
34
+ # Streaming is handled directly in perform_streaming_completion!
35
+ response
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ # Red Candle provider for local LLM execution using the Candle Rust crate.
6
+ class RedCandle < Provider
7
+ include RedCandle::Chat
8
+ include RedCandle::Models
9
+ include RedCandle::Capabilities
10
+ include RedCandle::Streaming
11
+
12
+ def initialize(config)
13
+ ensure_red_candle_available!
14
+ super
15
+ @loaded_models = {} # Cache for loaded models
16
+ @device = determine_device(config)
17
+ end
18
+
19
+ def api_base
20
+ nil # Local execution, no API base needed
21
+ end
22
+
23
+ def headers
24
+ {} # No HTTP headers needed
25
+ end
26
+
27
+ class << self
28
+ def capabilities
29
+ RedCandle::Capabilities
30
+ end
31
+
32
+ def configuration_requirements
33
+ [] # No required config, device is optional
34
+ end
35
+
36
+ def local?
37
+ true
38
+ end
39
+
40
+ def supports_functions?(model_id = nil)
41
+ RedCandle::Capabilities.supports_functions?(model_id)
42
+ end
43
+
44
+ def models
45
+ # Return Red Candle models for registration
46
+ RedCandle::Models::SUPPORTED_MODELS.map do |model_data|
47
+ Model::Info.new(
48
+ id: model_data[:id],
49
+ name: model_data[:name],
50
+ provider: 'red_candle',
51
+ type: 'chat',
52
+ family: model_data[:family],
53
+ context_window: model_data[:context_window],
54
+ capabilities: %w[streaming structured_output],
55
+ modalities: { input: %w[text], output: %w[text] }
56
+ )
57
+ end
58
+ end
59
+ end
60
+
61
+ private
62
+
63
+ def ensure_red_candle_available!
64
+ require 'candle'
65
+ rescue LoadError
66
+ raise Error.new(nil, "Red Candle gem is not installed. Add 'gem \"red-candle\", \"~> 1.2.3\"' to your Gemfile.")
67
+ end
68
+
69
+ def determine_device(config)
70
+ if config.red_candle_device
71
+ case config.red_candle_device.to_s.downcase
72
+ when 'cpu'
73
+ ::Candle::Device.cpu
74
+ when 'cuda', 'gpu'
75
+ ::Candle::Device.cuda
76
+ when 'metal'
77
+ ::Candle::Device.metal
78
+ else
79
+ ::Candle::Device.best
80
+ end
81
+ else
82
+ ::Candle::Device.best
83
+ end
84
+ rescue StandardError => e
85
+ RubyLLM.logger.warn "Failed to initialize device: #{e.message}. Falling back to CPU."
86
+ ::Candle::Device.cpu
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ class VertexAI
6
+ # Vertex AI specific helpers for audio transcription
7
+ module Transcription
8
+ private
9
+
10
+ def transcription_url(model)
11
+ "projects/#{@config.vertexai_project_id}/locations/#{@config.vertexai_location}/publishers/google/models/#{model}:generateContent" # rubocop:disable Layout/LineLength
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end