ruby_llm_community 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +20 -5
  3. data/lib/generators/ruby_llm/generator_helpers.rb +129 -0
  4. data/lib/generators/ruby_llm/install/install_generator.rb +12 -129
  5. data/lib/generators/ruby_llm/install/templates/add_references_to_chats_tool_calls_and_messages_migration.rb.tt +9 -0
  6. data/lib/generators/ruby_llm/install/templates/create_chats_migration.rb.tt +0 -1
  7. data/lib/generators/ruby_llm/install/templates/create_messages_migration.rb.tt +0 -3
  8. data/lib/generators/ruby_llm/install/templates/create_models_migration.rb.tt +1 -4
  9. data/lib/generators/ruby_llm/install/templates/create_tool_calls_migration.rb.tt +0 -1
  10. data/lib/generators/ruby_llm/upgrade_to_v1_7/templates/migration.rb.tt +8 -0
  11. data/lib/generators/ruby_llm/upgrade_to_v1_7/upgrade_to_v1_7_generator.rb +47 -96
  12. data/lib/ruby_llm/attachment.rb +5 -0
  13. data/lib/ruby_llm/configuration.rb +4 -0
  14. data/lib/ruby_llm/mime_type.rb +4 -0
  15. data/lib/ruby_llm/model/info.rb +4 -0
  16. data/lib/ruby_llm/models.json +780 -511
  17. data/lib/ruby_llm/models.rb +7 -3
  18. data/lib/ruby_llm/moderation.rb +56 -0
  19. data/lib/ruby_llm/provider.rb +6 -0
  20. data/lib/ruby_llm/providers/gemini/capabilities.rb +5 -0
  21. data/lib/ruby_llm/providers/openai/moderation.rb +34 -0
  22. data/lib/ruby_llm/providers/openai_base.rb +1 -0
  23. data/lib/ruby_llm/providers/red_candle/capabilities.rb +124 -0
  24. data/lib/ruby_llm/providers/red_candle/chat.rb +317 -0
  25. data/lib/ruby_llm/providers/red_candle/models.rb +121 -0
  26. data/lib/ruby_llm/providers/red_candle/streaming.rb +40 -0
  27. data/lib/ruby_llm/providers/red_candle.rb +90 -0
  28. data/lib/ruby_llm/railtie.rb +1 -1
  29. data/lib/ruby_llm/version.rb +1 -1
  30. data/lib/ruby_llm_community.rb +32 -0
  31. metadata +10 -1
@@ -194,15 +194,15 @@ module RubyLLM
194
194
  end
195
195
 
196
196
  def embedding_models
197
- self.class.new(all.select { |m| m.type == 'embedding' })
197
+ self.class.new(all.select { |m| m.type == 'embedding' || m.modalities.output.include?('embeddings') })
198
198
  end
199
199
 
200
200
  def audio_models
201
- self.class.new(all.select { |m| m.type == 'audio' })
201
+ self.class.new(all.select { |m| m.type == 'audio' || m.modalities.output.include?('audio') })
202
202
  end
203
203
 
204
204
  def image_models
205
- self.class.new(all.select { |m| m.type == 'image' })
205
+ self.class.new(all.select { |m| m.type == 'image' || m.modalities.output.include?('image') })
206
206
  end
207
207
 
208
208
  def by_family(family)
@@ -217,6 +217,10 @@ module RubyLLM
217
217
  self.class.refresh!(remote_only: remote_only)
218
218
  end
219
219
 
220
+ def resolve(model_id, provider: nil, assume_exists: false, config: nil)
221
+ self.class.resolve(model_id, provider: provider, assume_exists: assume_exists, config: config)
222
+ end
223
+
220
224
  private
221
225
 
222
226
  def find_with_provider(model_id, provider)
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ # Identify potentially harmful content in text.
5
+ # https://platform.openai.com/docs/guides/moderation
6
+ class Moderation
7
+ attr_reader :id, :model, :results
8
+
9
+ def initialize(id:, model:, results:)
10
+ @id = id
11
+ @model = model
12
+ @results = results
13
+ end
14
+
15
+ def self.moderate(input,
16
+ model: nil,
17
+ provider: nil,
18
+ assume_model_exists: false,
19
+ context: nil)
20
+ config = context&.config || RubyLLM.config
21
+ model ||= config.default_moderation_model || 'omni-moderation-latest'
22
+ model, provider_instance = Models.resolve(model, provider: provider, assume_exists: assume_model_exists,
23
+ config: config)
24
+ model_id = model.id
25
+
26
+ provider_instance.moderate(input, model: model_id)
27
+ end
28
+
29
+ # Convenience method to get content from moderation result
30
+ def content
31
+ results
32
+ end
33
+
34
+ # Check if any content was flagged
35
+ def flagged?
36
+ results.any? { |result| result['flagged'] }
37
+ end
38
+
39
+ # Get all flagged categories across all results
40
+ def flagged_categories
41
+ results.flat_map do |result|
42
+ result['categories']&.select { |_category, flagged| flagged }&.keys || []
43
+ end.uniq
44
+ end
45
+
46
+ # Get category scores for the first result (most common case)
47
+ def category_scores
48
+ results.first&.dig('category_scores') || {}
49
+ end
50
+
51
+ # Get categories for the first result (most common case)
52
+ def categories
53
+ results.first&.dig('categories') || {}
54
+ end
55
+ end
56
+ end
@@ -80,6 +80,12 @@ module RubyLLM
80
80
  parse_image_response(response, model:)
81
81
  end
82
82
 
83
+ def moderate(input, model:)
84
+ payload = render_moderation_payload(input, model:)
85
+ response = @connection.post moderation_url, payload
86
+ parse_moderation_response(response, model:)
87
+ end
88
+
83
89
  def configured?
84
90
  configuration_requirements.all? { |req| @config.send(req) }
85
91
  end
@@ -52,6 +52,10 @@ module RubyLLM
52
52
  model_id.match?(/gemini|flash|pro|imagen/)
53
53
  end
54
54
 
55
+ def supports_video?(model_id)
56
+ model_id.match?(/gemini/)
57
+ end
58
+
55
59
  def supports_functions?(model_id)
56
60
  return false if model_id.match?(/text-embedding|embedding-001|aqa|flash-lite|imagen|gemini-2\.0-flash-lite/)
57
61
 
@@ -217,6 +221,7 @@ module RubyLLM
217
221
  modalities[:input] << 'pdf'
218
222
  end
219
223
 
224
+ modalities[:input] << 'video' if supports_video?(model_id)
220
225
  modalities[:input] << 'audio' if model_id.match?(/audio/)
221
226
  modalities[:output] << 'embeddings' if model_id.match?(/embedding|gemini-embedding/)
222
227
 
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ class OpenAI
6
+ # Moderation methods of the OpenAI API integration
7
+ module Moderation
8
+ module_function
9
+
10
+ def moderation_url
11
+ 'moderations'
12
+ end
13
+
14
+ def render_moderation_payload(input, model:)
15
+ {
16
+ model: model,
17
+ input: input
18
+ }
19
+ end
20
+
21
+ def parse_moderation_response(response, model:)
22
+ data = response.body
23
+ raise Error.new(response, data.dig('error', 'message')) if data.dig('error', 'message')
24
+
25
+ RubyLLM::Moderation.new(
26
+ id: data['id'],
27
+ model: model,
28
+ results: data['results'] || []
29
+ )
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -9,6 +9,7 @@ module RubyLLM
9
9
  include OpenAI::Chat
10
10
  include OpenAI::Embeddings
11
11
  include OpenAI::Models
12
+ include OpenAI::Moderation
12
13
  include OpenAI::Streaming
13
14
  include OpenAI::Tools
14
15
  include OpenAI::Images
@@ -0,0 +1,124 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ class RedCandle
6
+ # Determines capabilities and pricing for RedCandle models
7
+ module Capabilities
8
+ module_function
9
+
10
+ def supports_vision?
11
+ false
12
+ end
13
+
14
+ def supports_functions?(_model_id = nil)
15
+ false
16
+ end
17
+
18
+ def supports_streaming?
19
+ true
20
+ end
21
+
22
+ def supports_structured_output?
23
+ true
24
+ end
25
+
26
+ def supports_regex_constraints?
27
+ true
28
+ end
29
+
30
+ def supports_embeddings?
31
+ false # Future enhancement - Red Candle does support embedding models
32
+ end
33
+
34
+ def supports_audio?
35
+ false
36
+ end
37
+
38
+ def supports_pdf?
39
+ false
40
+ end
41
+
42
+ def normalize_temperature(temperature, _model_id)
43
+ # Red Candle uses standard 0-2 range
44
+ return 0.7 if temperature.nil?
45
+
46
+ temperature = temperature.to_f
47
+ temperature.clamp(0.0, 2.0)
48
+ end
49
+
50
+ def model_context_window(model_id)
51
+ case model_id
52
+ when /gemma-3-4b/i
53
+ 8192
54
+ when /qwen2\.5-1\.5b/i, /mistral-7b/i
55
+ 32_768
56
+ when /tinyllama/i
57
+ 2048
58
+ else
59
+ 4096 # Conservative default
60
+ end
61
+ end
62
+
63
+ def pricing
64
+ # Local execution - no API costs
65
+ {
66
+ input_tokens_per_dollar: Float::INFINITY,
67
+ output_tokens_per_dollar: Float::INFINITY,
68
+ input_price_per_million_tokens: 0.0,
69
+ output_price_per_million_tokens: 0.0
70
+ }
71
+ end
72
+
73
+ def default_max_tokens
74
+ 512
75
+ end
76
+
77
+ def max_temperature
78
+ 2.0
79
+ end
80
+
81
+ def min_temperature
82
+ 0.0
83
+ end
84
+
85
+ def supports_temperature?
86
+ true
87
+ end
88
+
89
+ def supports_top_p?
90
+ true
91
+ end
92
+
93
+ def supports_top_k?
94
+ true
95
+ end
96
+
97
+ def supports_repetition_penalty?
98
+ true
99
+ end
100
+
101
+ def supports_seed?
102
+ true
103
+ end
104
+
105
+ def supports_stop_sequences?
106
+ true
107
+ end
108
+
109
+ def model_families
110
+ %w[gemma llama qwen2 mistral phi]
111
+ end
112
+
113
+ def available_on_platform?
114
+ # Check if Candle can be loaded
115
+
116
+ require 'candle'
117
+ true
118
+ rescue LoadError
119
+ false
120
+ end
121
+ end
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,317 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ class RedCandle
6
+ # Chat implementation for Red Candle provider
7
+ module Chat
8
+ # Override the base complete method to handle local execution
9
+ def complete(messages, tools:, temperature:, cache_prompts:, model:, params: {}, headers: {}, schema: nil, &) # rubocop:disable Metrics/ParameterLists
10
+ _ = headers # Interface compatibility
11
+ _ = cache_prompts # Interface compatibility
12
+ payload = Utils.deep_merge(
13
+ render_payload(
14
+ messages,
15
+ tools: tools,
16
+ temperature: temperature,
17
+ model: model,
18
+ stream: block_given?,
19
+ schema: schema
20
+ ),
21
+ params
22
+ )
23
+
24
+ if block_given?
25
+ perform_streaming_completion!(payload, &)
26
+ else
27
+ result = perform_completion!(payload)
28
+ # Convert to Message object for compatibility
29
+ # Red Candle doesn't provide token counts by default, but we can estimate them
30
+ content = result[:content]
31
+ # Rough estimation: ~4 characters per token
32
+ estimated_output_tokens = (content.length / 4.0).round
33
+ estimated_input_tokens = estimate_input_tokens(payload[:messages])
34
+
35
+ Message.new(
36
+ role: result[:role].to_sym,
37
+ content: content,
38
+ model_id: model.id,
39
+ input_tokens: estimated_input_tokens,
40
+ output_tokens: estimated_output_tokens
41
+ )
42
+ end
43
+ end
44
+
45
+ def render_payload(messages, tools:, temperature:, model:, stream:, schema:) # rubocop:disable Metrics/ParameterLists
46
+ # Red Candle doesn't support tools
47
+ raise Error.new(nil, 'Red Candle provider does not support tool calling') if tools && !tools.empty?
48
+
49
+ {
50
+ messages: messages,
51
+ temperature: temperature,
52
+ model: model.id,
53
+ stream: stream,
54
+ schema: schema
55
+ }
56
+ end
57
+
58
+ def perform_completion!(payload)
59
+ model = ensure_model_loaded!(payload[:model])
60
+ messages = format_messages(payload[:messages])
61
+
62
+ # Apply chat template if available
63
+ prompt = if model.respond_to?(:apply_chat_template)
64
+ model.apply_chat_template(messages)
65
+ else
66
+ # Fallback to simple formatting
67
+ "#{messages.map { |m| "#{m[:role]}: #{m[:content]}" }.join("\n\n")}\n\nassistant:"
68
+ end
69
+
70
+ # Check context length
71
+ validate_context_length!(prompt, payload[:model])
72
+
73
+ # Configure generation
74
+ config_opts = {
75
+ temperature: payload[:temperature] || 0.7,
76
+ max_length: payload[:max_tokens] || 512
77
+ }
78
+
79
+ # Handle structured generation if schema provided
80
+ response = if payload[:schema]
81
+ generate_with_schema(model, prompt, payload[:schema], config_opts)
82
+ else
83
+ model.generate(
84
+ prompt,
85
+ config: ::Candle::GenerationConfig.balanced(**config_opts)
86
+ )
87
+ end
88
+
89
+ format_response(response, payload[:schema])
90
+ end
91
+
92
+ def perform_streaming_completion!(payload, &block)
93
+ model = ensure_model_loaded!(payload[:model])
94
+ messages = format_messages(payload[:messages])
95
+
96
+ # Apply chat template if available
97
+ prompt = if model.respond_to?(:apply_chat_template)
98
+ model.apply_chat_template(messages)
99
+ else
100
+ "#{messages.map { |m| "#{m[:role]}: #{m[:content]}" }.join("\n\n")}\n\nassistant:"
101
+ end
102
+
103
+ # Check context length
104
+ validate_context_length!(prompt, payload[:model])
105
+
106
+ # Configure generation
107
+ config = ::Candle::GenerationConfig.balanced(
108
+ temperature: payload[:temperature] || 0.7,
109
+ max_length: payload[:max_tokens] || 512
110
+ )
111
+
112
+ # Collect all streamed content
113
+ full_content = ''
114
+
115
+ # Stream tokens
116
+ model.generate_stream(prompt, config: config) do |token|
117
+ full_content += token
118
+ chunk = format_stream_chunk(token)
119
+ block.call(chunk)
120
+ end
121
+
122
+ # Send final chunk with empty content (indicates completion)
123
+ final_chunk = format_stream_chunk('')
124
+ block.call(final_chunk)
125
+
126
+ # Return a Message object with the complete response
127
+ estimated_output_tokens = (full_content.length / 4.0).round
128
+ estimated_input_tokens = estimate_input_tokens(payload[:messages])
129
+
130
+ Message.new(
131
+ role: :assistant,
132
+ content: full_content,
133
+ model_id: payload[:model],
134
+ input_tokens: estimated_input_tokens,
135
+ output_tokens: estimated_output_tokens
136
+ )
137
+ end
138
+
139
+ private
140
+
141
+ def ensure_model_loaded!(model_id)
142
+ @loaded_models[model_id] ||= load_model(model_id)
143
+ end
144
+
145
+ def model_options(model_id)
146
+ # Get GGUF file and tokenizer if this is a GGUF model
147
+ # Access the methods from the Models module which is included in the provider
148
+ options = { device: @device }
149
+ options[:gguf_file] = gguf_file_for(model_id) if respond_to?(:gguf_file_for)
150
+ options[:tokenizer] = tokenizer_for(model_id) if respond_to?(:tokenizer_for)
151
+ options
152
+ end
153
+
154
+ def load_model(model_id)
155
+ options = model_options(model_id)
156
+ ::Candle::LLM.from_pretrained(model_id, **options)
157
+ rescue StandardError => e
158
+ if e.message.include?('Failed to find tokenizer')
159
+ raise Error.new(nil, token_error_message(e, options[:tokenizer]))
160
+ elsif e.message.include?('Failed to find model')
161
+ raise Error.new(nil, model_error_message(e, model_id))
162
+ else
163
+ raise Error.new(nil, "Failed to load model #{model_id}: #{e.message}")
164
+ end
165
+ end
166
+
167
+ def token_error_message(exception, tokenizer)
168
+ <<~ERROR_MESSAGE
169
+ Failed to load tokenizer '#{tokenizer}'. The tokenizer may not exist or require authentication.
170
+ Please verify the tokenizer exists at: https://huggingface.co/#{tokenizer}
171
+ And that you have accepted the terms of service for the tokenizer.
172
+ If it requires authentication, login with: huggingface-cli login
173
+ See https://github.com/scientist-labs/red-candle?tab=readme-ov-file#%EF%B8%8F-huggingface-login-warning
174
+ Original error: #{exception.message}"
175
+ ERROR_MESSAGE
176
+ end
177
+
178
+ def model_error_message(exception, model_id)
179
+ <<~ERROR_MESSAGE
180
+ Failed to load model #{model_id}: #{exception.message}
181
+ Please verify the model exists at: https://huggingface.co/#{model_id}
182
+ And that you have accepted the terms of service for the model.
183
+ If it requires authentication, login with: huggingface-cli login
184
+ See https://github.com/scientist-labs/red-candle?tab=readme-ov-file#%EF%B8%8F-huggingface-login-warning
185
+ Original error: #{exception.message}"
186
+ ERROR_MESSAGE
187
+ end
188
+
189
+ def format_messages(messages)
190
+ messages.map do |msg|
191
+ # Handle both hash and Message objects
192
+ if msg.is_a?(Message)
193
+ {
194
+ role: msg.role.to_s,
195
+ content: extract_message_content_from_object(msg)
196
+ }
197
+ else
198
+ {
199
+ role: msg[:role].to_s,
200
+ content: extract_message_content(msg)
201
+ }
202
+ end
203
+ end
204
+ end
205
+
206
+ def extract_message_content_from_object(message)
207
+ content = message.content
208
+
209
+ # Handle Content objects
210
+ if content.is_a?(Content)
211
+ # Extract text from Content object, including attachment text
212
+ handle_content_object(content)
213
+ elsif content.is_a?(String)
214
+ content
215
+ else
216
+ content.to_s
217
+ end
218
+ end
219
+
220
+ def extract_message_content(message)
221
+ content = message[:content]
222
+
223
+ # Handle Content objects
224
+ case content
225
+ when Content
226
+ # Extract text from Content object
227
+ handle_content_object(content)
228
+ when String
229
+ content
230
+ when Array
231
+ # Handle array content (e.g., with images)
232
+ content.filter_map { |part| part[:text] if part[:type] == 'text' }.join(' ')
233
+ else
234
+ content.to_s
235
+ end
236
+ end
237
+
238
+ def handle_content_object(content)
239
+ text_parts = []
240
+ text_parts << content.text if content.text
241
+
242
+ # Add any text from attachments
243
+ content.attachments&.each do |attachment|
244
+ text_parts << attachment.data if attachment.respond_to?(:data) && attachment.data.is_a?(String)
245
+ end
246
+
247
+ text_parts.join(' ')
248
+ end
249
+
250
+ def generate_with_schema(model, prompt, schema, config_opts)
251
+ model.generate_structured(
252
+ prompt,
253
+ schema: schema,
254
+ **config_opts
255
+ )
256
+ rescue StandardError => e
257
+ RubyLLM.logger.warn "Structured generation failed: #{e.message}. Falling back to regular generation."
258
+ model.generate(
259
+ prompt,
260
+ config: ::Candle::GenerationConfig.balanced(**config_opts)
261
+ )
262
+ end
263
+
264
+ def format_response(response, schema)
265
+ content = if schema && !response.is_a?(String)
266
+ # Structured response
267
+ JSON.generate(response)
268
+ else
269
+ response
270
+ end
271
+
272
+ {
273
+ content: content,
274
+ role: 'assistant'
275
+ }
276
+ end
277
+
278
+ def format_stream_chunk(token)
279
+ # Return a Chunk object for streaming compatibility
280
+ Chunk.new(
281
+ role: :assistant,
282
+ content: token
283
+ )
284
+ end
285
+
286
+ def estimate_input_tokens(messages)
287
+ # Rough estimation: ~4 characters per token
288
+ formatted = format_messages(messages)
289
+ total_chars = formatted.sum { |msg| "#{msg[:role]}: #{msg[:content]}".length }
290
+ (total_chars / 4.0).round
291
+ end
292
+
293
+ def validate_context_length!(prompt, model_id)
294
+ # Get the context window for this model
295
+ context_window = if respond_to?(:model_context_window)
296
+ model_context_window(model_id)
297
+ else
298
+ 4096 # Conservative default
299
+ end
300
+
301
+ # Estimate tokens in prompt (~4 characters per token)
302
+ estimated_tokens = (prompt.length / 4.0).round
303
+
304
+ # Check if prompt exceeds context window (leave some room for response)
305
+ max_input_tokens = context_window - 512 # Reserve 512 tokens for response
306
+ return unless estimated_tokens > max_input_tokens
307
+
308
+ raise Error.new(
309
+ nil,
310
+ "Context length exceeded. Estimated #{estimated_tokens} tokens, " \
311
+ "but model #{model_id} has a context window of #{context_window} tokens."
312
+ )
313
+ end
314
+ end
315
+ end
316
+ end
317
+ end