ruby_llm-red_candle 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,445 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module RedCandle
5
+ # Chat implementation for Red Candle provider
6
+ module Chat
7
+ # Override the base complete method to handle local execution
8
+ def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, &block)
9
+ _ = headers # Interface compatibility
10
+ payload = RubyLLM::Utils.deep_merge(
11
+ render_payload(
12
+ messages,
13
+ tools: tools,
14
+ temperature: temperature,
15
+ model: model,
16
+ stream: block_given?,
17
+ schema: schema
18
+ ),
19
+ params
20
+ )
21
+
22
+ if block_given?
23
+ perform_streaming_completion!(payload, &block)
24
+ else
25
+ result = perform_completion!(payload)
26
+ # Convert to Message object for compatibility
27
+ # Red Candle doesn't provide token counts by default, but we can estimate them
28
+ content = result[:content]
29
+ # Rough estimation: ~4 characters per token
30
+ estimated_output_tokens = (content.length / 4.0).round
31
+ estimated_input_tokens = estimate_input_tokens(payload[:messages])
32
+
33
+ RubyLLM::Message.new(
34
+ role: result[:role].to_sym,
35
+ content: content,
36
+ model_id: model.id,
37
+ input_tokens: estimated_input_tokens,
38
+ output_tokens: estimated_output_tokens
39
+ )
40
+ end
41
+ end
42
+
43
+ def render_payload(messages, tools:, temperature:, model:, stream:, schema:)
44
+ # Red Candle doesn't support tools
45
+ if tools && !tools.empty?
46
+ raise RubyLLM::Error.new(nil, "Red Candle provider does not support tool calling")
47
+ end
48
+
49
+ {
50
+ messages: messages,
51
+ temperature: temperature,
52
+ model: model.id,
53
+ stream: stream,
54
+ schema: schema
55
+ }
56
+ end
57
+
58
+ def perform_completion!(payload)
59
+ model = ensure_model_loaded!(payload[:model])
60
+ messages = format_messages(payload[:messages])
61
+
62
+ # Handle structured generation differently - we need to build the prompt
63
+ # with JSON instructions BEFORE applying the chat template
64
+ response = if payload[:schema]
65
+ generate_with_schema(model, messages, payload[:schema], payload)
66
+ else
67
+ prompt = build_prompt(model, messages)
68
+ validate_context_length!(prompt, payload[:model])
69
+ config = build_generation_config(payload)
70
+ generate_with_error_handling(model, prompt, config, payload[:model])
71
+ end
72
+
73
+ format_response(response, payload[:schema])
74
+ end
75
+
76
+ def perform_streaming_completion!(payload, &block)
77
+ model = ensure_model_loaded!(payload[:model])
78
+ messages = format_messages(payload[:messages])
79
+
80
+ prompt = build_prompt(model, messages)
81
+ validate_context_length!(prompt, payload[:model])
82
+ config = build_generation_config(payload)
83
+
84
+ # Collect all streamed content
85
+ full_content = ""
86
+
87
+ # Stream tokens with error handling
88
+ stream_with_error_handling(model, prompt, config, payload[:model]) do |token|
89
+ full_content += token
90
+ chunk = format_stream_chunk(token)
91
+ block.call(chunk)
92
+ end
93
+
94
+ # Send final chunk with empty content (indicates completion)
95
+ final_chunk = format_stream_chunk("")
96
+ block.call(final_chunk)
97
+
98
+ # Return a Message object with the complete response
99
+ estimated_output_tokens = (full_content.length / 4.0).round
100
+ estimated_input_tokens = estimate_input_tokens(payload[:messages])
101
+
102
+ RubyLLM::Message.new(
103
+ role: :assistant,
104
+ content: full_content,
105
+ model_id: payload[:model],
106
+ input_tokens: estimated_input_tokens,
107
+ output_tokens: estimated_output_tokens
108
+ )
109
+ end
110
+
111
+ private
112
+
113
+ # Build the prompt string from messages using the model's chat template
114
+ def build_prompt(model, messages)
115
+ if model.respond_to?(:apply_chat_template)
116
+ model.apply_chat_template(messages)
117
+ else
118
+ # Fallback to simple formatting
119
+ "#{messages.map { |m| "#{m[:role]}: #{m[:content]}" }.join("\n\n")}\n\nassistant:"
120
+ end
121
+ end
122
+
123
+ # Get generation parameters with consistent defaults
124
+ # @param payload [Hash] The request payload
125
+ # @param structured [Boolean] Whether this is for structured generation (uses different defaults)
126
+ # @return [Array<Float, Integer>] temperature and max_length values
127
+ def generation_params(payload, structured: false)
128
+ temperature = payload[:temperature] || (structured ? 0.3 : 0.7)
129
+ max_length = payload[:max_tokens] || (structured ? 1024 : 512)
130
+ [temperature, max_length]
131
+ end
132
+
133
+ # Build generation config with consistent defaults
134
+ # @param payload [Hash] The request payload
135
+ # @param structured [Boolean] Whether this is for structured generation (uses different defaults)
136
+ def build_generation_config(payload, structured: false)
137
+ temperature, max_length = generation_params(payload, structured: structured)
138
+ ::Candle::GenerationConfig.balanced(
139
+ temperature: temperature,
140
+ max_length: max_length
141
+ )
142
+ end
143
+
144
+ def ensure_model_loaded!(model_id)
145
+ @loaded_models[model_id] ||= load_model(model_id)
146
+ end
147
+
148
+ def model_options(model_id)
149
+ # Get GGUF file and tokenizer if this is a GGUF model
150
+ # Access the methods from the Models module which is included in the provider
151
+ options = { device: @device }
152
+ options[:gguf_file] = gguf_file_for(model_id) if respond_to?(:gguf_file_for)
153
+ options[:tokenizer] = tokenizer_for(model_id) if respond_to?(:tokenizer_for)
154
+ options
155
+ end
156
+
157
+ def load_model(model_id)
158
+ options = model_options(model_id)
159
+ ::Candle::LLM.from_pretrained(model_id, **options)
160
+ rescue StandardError => e
161
+ if e.message.include?("Failed to find tokenizer")
162
+ raise RubyLLM::Error.new(nil, token_error_message(e, options[:tokenizer]))
163
+ elsif e.message.include?("Failed to find model")
164
+ raise RubyLLM::Error.new(nil, model_error_message(e, model_id))
165
+ else
166
+ raise RubyLLM::Error.new(nil, "Failed to load model #{model_id}: #{e.message}")
167
+ end
168
+ end
169
+
170
+ def token_error_message(exception, tokenizer)
171
+ <<~ERROR_MESSAGE
172
+ Failed to load tokenizer '#{tokenizer}'. The tokenizer may not exist or require authentication.
173
+ Please verify the tokenizer exists at: https://huggingface.co/#{tokenizer}
174
+ And that you have accepted the terms of service for the tokenizer.
175
+ If it requires authentication, login with: huggingface-cli login
176
+ See https://github.com/scientist-labs/red-candle?tab=readme-ov-file#%EF%B8%8F-huggingface-login-warning
177
+ Original error: #{exception.message}"
178
+ ERROR_MESSAGE
179
+ end
180
+
181
+ def model_error_message(exception, model_id)
182
+ <<~ERROR_MESSAGE
183
+ Failed to load model #{model_id}: #{exception.message}
184
+ Please verify the model exists at: https://huggingface.co/#{model_id}
185
+ And that you have accepted the terms of service for the model.
186
+ If it requires authentication, login with: huggingface-cli login
187
+ See https://github.com/scientist-labs/red-candle?tab=readme-ov-file#%EF%B8%8F-huggingface-login-warning
188
+ Original error: #{exception.message}"
189
+ ERROR_MESSAGE
190
+ end
191
+
192
+ def generate_with_error_handling(model, prompt, config, model_id)
193
+ model.generate(prompt, config: config)
194
+ rescue StandardError => e
195
+ raise RubyLLM::Error.new(nil, generation_error_message(e, model_id))
196
+ end
197
+
198
+ def stream_with_error_handling(model, prompt, config, model_id, &block)
199
+ model.generate_stream(prompt, config: config, &block)
200
+ rescue StandardError => e
201
+ raise RubyLLM::Error.new(nil, generation_error_message(e, model_id))
202
+ end
203
+
204
+ def generation_error_message(exception, model_id)
205
+ message = exception.message.to_s
206
+
207
+ if message.include?("out of memory") || message.include?("OOM")
208
+ <<~ERROR_MESSAGE.strip
209
+ Out of memory while generating with #{model_id}.
210
+ Try using a smaller model or reducing the context length.
211
+ Original error: #{message}
212
+ ERROR_MESSAGE
213
+ elsif message.include?("context") || message.include?("sequence")
214
+ <<~ERROR_MESSAGE.strip
215
+ Context length exceeded for #{model_id}.
216
+ The input is too long for this model's context window.
217
+ Original error: #{message}
218
+ ERROR_MESSAGE
219
+ elsif message.include?("tensor") || message.include?("shape")
220
+ <<~ERROR_MESSAGE.strip
221
+ Model execution error for #{model_id}.
222
+ This may indicate an incompatible model format or corrupted weights.
223
+ Original error: #{message}
224
+ ERROR_MESSAGE
225
+ else
226
+ "Generation failed for #{model_id}: #{message}"
227
+ end
228
+ end
229
+
230
+ def format_messages(messages)
231
+ messages.map do |msg|
232
+ # Handle both hash and Message objects
233
+ if msg.is_a?(RubyLLM::Message)
234
+ {
235
+ role: msg.role.to_s,
236
+ content: extract_message_content_from_object(msg)
237
+ }
238
+ else
239
+ {
240
+ role: msg[:role].to_s,
241
+ content: extract_message_content(msg)
242
+ }
243
+ end
244
+ end
245
+ end
246
+
247
+ def extract_message_content_from_object(message)
248
+ content = message.content
249
+
250
+ # Handle Content objects
251
+ if content.is_a?(RubyLLM::Content)
252
+ # Extract text from Content object, including attachment text
253
+ handle_content_object(content)
254
+ elsif content.is_a?(String)
255
+ content
256
+ else
257
+ content.to_s
258
+ end
259
+ end
260
+
261
+ def extract_message_content(message)
262
+ content = message[:content]
263
+
264
+ # Handle Content objects
265
+ case content
266
+ when RubyLLM::Content
267
+ # Extract text from Content object
268
+ handle_content_object(content)
269
+ when String
270
+ content
271
+ when Array
272
+ # Handle array content (e.g., with images)
273
+ content.filter_map { |part| part[:text] if part[:type] == "text" }.join(" ")
274
+ else
275
+ content.to_s
276
+ end
277
+ end
278
+
279
+ def handle_content_object(content)
280
+ text_parts = []
281
+ text_parts << content.text if content.text
282
+
283
+ # Add any text from attachments
284
+ content.attachments&.each do |attachment|
285
+ text_parts << attachment.data if attachment.respond_to?(:data) && attachment.data.is_a?(String)
286
+ end
287
+
288
+ text_parts.join(" ")
289
+ end
290
+
291
+ def generate_with_schema(model, messages, schema, payload)
292
+ # Use Red Candle's native structured generation which uses the Rust outlines crate
293
+ # for grammar-constrained generation. This ensures valid JSON output.
294
+
295
+ # Normalize schema to ensure consistent symbol keys
296
+ normalized_schema = deep_symbolize_keys(schema)
297
+
298
+ # Validate schema before attempting generation
299
+ SchemaValidator.validate!(normalized_schema)
300
+
301
+ # Debug logging to help diagnose issues
302
+ RubyLLM.logger.debug "=== STRUCTURED GENERATION DEBUG ==="
303
+ RubyLLM.logger.debug "Original schema: #{schema.inspect}"
304
+ RubyLLM.logger.debug "Normalized schema: #{normalized_schema.inspect}"
305
+ RubyLLM.logger.debug "Messages: #{messages.inspect}"
306
+
307
+ # For structured generation, we modify the last user message to include
308
+ # JSON output instructions, then apply the chat template
309
+ structured_messages = build_structured_messages(messages, normalized_schema)
310
+ RubyLLM.logger.debug "Structured messages: #{structured_messages.inspect}"
311
+
312
+ prompt = build_prompt(model, structured_messages)
313
+ RubyLLM.logger.debug "Final prompt:\n#{prompt}"
314
+ RubyLLM.logger.debug "=== END DEBUG ==="
315
+
316
+ validate_context_length!(prompt, payload[:model])
317
+
318
+ # Get generation parameters (structured generation uses different defaults)
319
+ temperature, max_length = generation_params(payload, structured: true)
320
+
321
+ result = model.generate_structured(
322
+ prompt,
323
+ schema: normalized_schema,
324
+ temperature: temperature,
325
+ max_length: max_length,
326
+ warn_on_parse_error: true,
327
+ reset_cache: true
328
+ )
329
+
330
+ RubyLLM.logger.debug "Structured generation result: #{result.inspect}"
331
+
332
+ # generate_structured returns a Hash on success, or raw String on parse failure
333
+ result
334
+ rescue StandardError => e
335
+ # Log at debug level - the raised exception will inform the caller
336
+ RubyLLM.logger.debug "Structured generation failed: #{e.class}: #{e.message}"
337
+ RubyLLM.logger.debug e.backtrace.first(5).join("\n") if e.backtrace
338
+ raise RubyLLM::Error.new(nil, "Structured generation failed: #{e.message}")
339
+ end
340
+
341
+ # Recursively convert all hash keys to symbols
342
+ def deep_symbolize_keys(obj)
343
+ case obj
344
+ when Hash
345
+ obj.each_with_object({}) do |(key, value), result|
346
+ result[key.to_sym] = deep_symbolize_keys(value)
347
+ end
348
+ when Array
349
+ obj.map { |item| deep_symbolize_keys(item) }
350
+ else
351
+ obj
352
+ end
353
+ end
354
+
355
+ def build_structured_messages(messages, schema)
356
+ # Clone messages to avoid modifying the original
357
+ modified_messages = messages.map(&:dup)
358
+
359
+ # Find the last user message and append JSON instructions
360
+ last_user_idx = modified_messages.rindex { |m| m[:role] == "user" }
361
+ return modified_messages unless last_user_idx
362
+
363
+ schema_description = describe_schema(schema)
364
+ json_instruction = Configuration.build_json_instruction(schema_description)
365
+
366
+ modified_messages[last_user_idx][:content] += json_instruction
367
+ modified_messages
368
+ end
369
+
370
+ def describe_schema(schema)
371
+ return "the requested data" unless schema.is_a?(Hash)
372
+
373
+ # Support both symbol and string keys for robustness
374
+ properties = schema[:properties] || schema["properties"]
375
+ return "the requested data" unless properties
376
+
377
+ properties.map do |key, value|
378
+ type = value[:type] || value["type"] || "any"
379
+ enum = value[:enum] || value["enum"]
380
+ if enum
381
+ "#{key} (#{type}, one of: #{enum.join(', ')})"
382
+ else
383
+ "#{key} (#{type})"
384
+ end
385
+ end.join(", ")
386
+ end
387
+
388
+ def format_response(response, schema)
389
+ content = if schema && !response.is_a?(String)
390
+ # Structured response
391
+ JSON.generate(response)
392
+ else
393
+ response
394
+ end
395
+
396
+ {
397
+ content: content,
398
+ role: "assistant"
399
+ }
400
+ end
401
+
402
+ def format_stream_chunk(token)
403
+ # Return a Chunk object for streaming compatibility
404
+ RubyLLM::Chunk.new(
405
+ role: :assistant,
406
+ content: token
407
+ )
408
+ end
409
+
410
+ def estimate_input_tokens(messages)
411
+ # Rough estimation: ~4 characters per token
412
+ formatted = format_messages(messages)
413
+ total_chars = formatted.sum { |msg| "#{msg[:role]}: #{msg[:content]}".length }
414
+ (total_chars / 4.0).round
415
+ end
416
+
417
+ def validate_context_length!(prompt, model_id)
418
+ # Get the context window for this model
419
+ context_window = if respond_to?(:model_context_window)
420
+ model_context_window(model_id)
421
+ else
422
+ 4096 # Conservative default
423
+ end
424
+
425
+ # Estimate tokens in prompt (~4 characters per token)
426
+ estimated_tokens = (prompt.length / 4.0).round
427
+
428
+ # Check if prompt exceeds context window (leave some room for response)
429
+ max_input_tokens = context_window - 512 # Reserve 512 tokens for response
430
+ return unless estimated_tokens > max_input_tokens
431
+
432
+ raise RubyLLM::Error.new(
433
+ nil,
434
+ "Context length exceeded. Estimated #{estimated_tokens} tokens, " \
435
+ "but model #{model_id} has a context window of #{context_window} tokens."
436
+ )
437
+ end
438
+
439
+ # Delegate to Capabilities module for context window lookup
440
+ def model_context_window(model_id)
441
+ Capabilities.model_context_window(model_id)
442
+ end
443
+ end
444
+ end
445
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module RedCandle
5
+ # Configuration options for Red Candle provider
6
+ module Configuration
7
+ # Default JSON instruction template for structured generation
8
+ # Use {schema_description} as a placeholder for the schema description
9
+ DEFAULT_JSON_INSTRUCTION = "\n\nRespond with ONLY a valid JSON object containing: {schema_description}"
10
+
11
+ class << self
12
+ # Get the JSON instruction template
13
+ # @return [String] the template with {schema_description} placeholder
14
+ def json_instruction_template
15
+ @json_instruction_template || DEFAULT_JSON_INSTRUCTION
16
+ end
17
+
18
+ # Set a custom JSON instruction template
19
+ # @param template [String] the template with {schema_description} placeholder
20
+ def json_instruction_template=(template)
21
+ @json_instruction_template = template
22
+ end
23
+
24
+ # Reset configuration to defaults
25
+ def reset!
26
+ @json_instruction_template = nil
27
+ end
28
+
29
+ # Build the JSON instruction by substituting the schema description
30
+ # @param schema_description [String] the human-readable schema description
31
+ # @return [String] the formatted instruction
32
+ def build_json_instruction(schema_description)
33
+ json_instruction_template.gsub("{schema_description}", schema_description)
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,120 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module RedCandle
5
+ # Models methods of the RedCandle integration
6
+ module Models
7
+ # TODO: red-candle supports more models, but let's start with some well tested ones.
8
+ SUPPORTED_MODELS = [
9
+ {
10
+ id: "google/gemma-3-4b-it-qat-q4_0-gguf",
11
+ name: "Gemma 3 4B Instruct (Quantized)",
12
+ gguf_file: "gemma-3-4b-it-q4_0.gguf",
13
+ tokenizer: "google/gemma-3-4b-it", # Tokenizer from base model
14
+ context_window: 8192,
15
+ family: "gemma",
16
+ architecture: "gemma2",
17
+ supports_chat: true,
18
+ supports_structured: true
19
+ },
20
+ {
21
+ id: "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
22
+ name: "TinyLlama 1.1B Chat (Quantized)",
23
+ gguf_file: "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
24
+ context_window: 2048,
25
+ family: "llama",
26
+ architecture: "llama",
27
+ supports_chat: true,
28
+ supports_structured: true
29
+ },
30
+ {
31
+ id: "TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
32
+ name: "Mistral 7B Instruct v0.2 (Quantized)",
33
+ gguf_file: "mistral-7b-instruct-v0.2.Q4_K_M.gguf",
34
+ tokenizer: "mistralai/Mistral-7B-Instruct-v0.2",
35
+ context_window: 32_768,
36
+ family: "mistral",
37
+ architecture: "mistral",
38
+ supports_chat: true,
39
+ supports_structured: true
40
+ },
41
+ {
42
+ id: "Qwen/Qwen2.5-1.5B-Instruct-GGUF",
43
+ name: "Qwen 2.5 1.5B Instruct (Quantized)",
44
+ gguf_file: "qwen2.5-1.5b-instruct-q4_k_m.gguf",
45
+ context_window: 32_768,
46
+ family: "qwen2",
47
+ architecture: "qwen2",
48
+ supports_chat: true,
49
+ supports_structured: true
50
+ },
51
+ {
52
+ id: "microsoft/Phi-3-mini-4k-instruct",
53
+ name: "Phi 3 Mini 4K Instruct",
54
+ context_window: 4096,
55
+ family: "phi",
56
+ architecture: "phi",
57
+ supports_chat: true,
58
+ supports_structured: true
59
+ }
60
+ ].freeze
61
+
62
+ def list_models
63
+ SUPPORTED_MODELS.map do |model_data|
64
+ RubyLLM::Model::Info.new(
65
+ id: model_data[:id],
66
+ name: model_data[:name],
67
+ provider: slug,
68
+ family: model_data[:family],
69
+ context_window: model_data[:context_window],
70
+ capabilities: %w[streaming structured_output],
71
+ modalities: { input: %w[text], output: %w[text] }
72
+ )
73
+ end
74
+ end
75
+
76
+ def models
77
+ @models ||= list_models
78
+ end
79
+
80
+ def model(id)
81
+ models.find { |m| m.id == id } ||
82
+ raise(RubyLLM::Error.new(nil,
83
+ "Model #{id} not found in Red Candle provider. " \
84
+ "Available models: #{model_ids.join(', ')}"))
85
+ end
86
+
87
+ def model_available?(id)
88
+ SUPPORTED_MODELS.any? { |m| m[:id] == id }
89
+ end
90
+
91
+ def model_ids
92
+ SUPPORTED_MODELS.map { |m| m[:id] }
93
+ end
94
+
95
+ def model_info(id)
96
+ SUPPORTED_MODELS.find { |m| m[:id] == id }
97
+ end
98
+
99
+ def supports_chat?(model_id)
100
+ info = model_info(model_id)
101
+ info ? info[:supports_chat] : false
102
+ end
103
+
104
+ def supports_structured?(model_id)
105
+ info = model_info(model_id)
106
+ info ? info[:supports_structured] : false
107
+ end
108
+
109
+ def gguf_file_for(model_id)
110
+ info = model_info(model_id)
111
+ info ? info[:gguf_file] : nil
112
+ end
113
+
114
+ def tokenizer_for(model_id)
115
+ info = model_info(model_id)
116
+ info ? info[:tokenizer] : nil
117
+ end
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "candle"
4
+
5
+ module RubyLLM
6
+ module RedCandle
7
+ # Red Candle provider for local LLM execution using the Candle Rust crate.
8
+ class Provider < RubyLLM::Provider
9
+ include Chat
10
+ include Models
11
+ include Capabilities
12
+ include Streaming
13
+
14
+ def initialize(config)
15
+ ensure_red_candle_available!
16
+ super
17
+ @loaded_models = {} # Cache for loaded models
18
+ @device = determine_device(config)
19
+ end
20
+
21
+ def api_base
22
+ nil # Local execution, no API base needed
23
+ end
24
+
25
+ def headers
26
+ {} # No HTTP headers needed
27
+ end
28
+
29
+ class << self
30
+ def capabilities
31
+ Capabilities
32
+ end
33
+
34
+ def configuration_requirements
35
+ [] # No required config, device is optional
36
+ end
37
+
38
+ def local?
39
+ true
40
+ end
41
+
42
+ def supports_functions?(model_id = nil)
43
+ Capabilities.supports_functions?(model_id)
44
+ end
45
+
46
+ def models
47
+ # Return Red Candle models for registration
48
+ Models::SUPPORTED_MODELS.map do |model_data|
49
+ RubyLLM::Model::Info.new(
50
+ id: model_data[:id],
51
+ name: model_data[:name],
52
+ provider: "red_candle",
53
+ type: "chat",
54
+ family: model_data[:family],
55
+ context_window: model_data[:context_window],
56
+ capabilities: %w[streaming structured_output],
57
+ modalities: { input: %w[text], output: %w[text] }
58
+ )
59
+ end
60
+ end
61
+ end
62
+
63
+ private
64
+
65
+ def ensure_red_candle_available!
66
+ require "candle"
67
+ rescue LoadError
68
+ raise RubyLLM::Error.new(nil, "Red Candle gem is not installed. Add 'gem \"red-candle\"' to your Gemfile.")
69
+ end
70
+
71
+ def determine_device(config)
72
+ if config.respond_to?(:red_candle_device) && config.red_candle_device
73
+ case config.red_candle_device.to_s.downcase
74
+ when "cpu"
75
+ ::Candle::Device.cpu
76
+ when "cuda", "gpu"
77
+ ::Candle::Device.cuda
78
+ when "metal"
79
+ ::Candle::Device.metal
80
+ else
81
+ ::Candle::Device.best
82
+ end
83
+ else
84
+ ::Candle::Device.best
85
+ end
86
+ rescue StandardError => e
87
+ RubyLLM.logger.warn "Failed to initialize device: #{e.message}. Falling back to CPU."
88
+ ::Candle::Device.cpu
89
+ end
90
+ end
91
+ end
92
+ end