ruby_llm_community 1.1.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +16 -3
- data/lib/generators/ruby_llm/install/install_generator.rb +8 -2
- data/lib/generators/ruby_llm/install/templates/add_references_to_chats_tool_calls_and_messages_migration.rb.tt +9 -0
- data/lib/generators/ruby_llm/install/templates/create_chats_migration.rb.tt +0 -1
- data/lib/generators/ruby_llm/install/templates/create_messages_migration.rb.tt +0 -3
- data/lib/generators/ruby_llm/install/templates/create_tool_calls_migration.rb.tt +0 -1
- data/lib/ruby_llm/configuration.rb +4 -0
- data/lib/ruby_llm/models.json +780 -511
- data/lib/ruby_llm/models.rb +7 -3
- data/lib/ruby_llm/moderation.rb +56 -0
- data/lib/ruby_llm/provider.rb +6 -0
- data/lib/ruby_llm/providers/openai/moderation.rb +34 -0
- data/lib/ruby_llm/providers/openai_base.rb +1 -0
- data/lib/ruby_llm/providers/red_candle/capabilities.rb +124 -0
- data/lib/ruby_llm/providers/red_candle/chat.rb +317 -0
- data/lib/ruby_llm/providers/red_candle/models.rb +121 -0
- data/lib/ruby_llm/providers/red_candle/streaming.rb +40 -0
- data/lib/ruby_llm/providers/red_candle.rb +90 -0
- data/lib/ruby_llm/railtie.rb +1 -1
- data/lib/ruby_llm/version.rb +1 -1
- data/lib/ruby_llm_community.rb +32 -0
- metadata +9 -1
data/lib/ruby_llm/models.rb
CHANGED
@@ -194,15 +194,15 @@ module RubyLLM
|
|
194
194
|
end
|
195
195
|
|
196
196
|
def embedding_models
|
197
|
-
self.class.new(all.select { |m| m.type == 'embedding' })
|
197
|
+
self.class.new(all.select { |m| m.type == 'embedding' || m.modalities.output.include?('embeddings') })
|
198
198
|
end
|
199
199
|
|
200
200
|
def audio_models
|
201
|
-
self.class.new(all.select { |m| m.type == 'audio' })
|
201
|
+
self.class.new(all.select { |m| m.type == 'audio' || m.modalities.output.include?('audio') })
|
202
202
|
end
|
203
203
|
|
204
204
|
def image_models
|
205
|
-
self.class.new(all.select { |m| m.type == 'image' })
|
205
|
+
self.class.new(all.select { |m| m.type == 'image' || m.modalities.output.include?('image') })
|
206
206
|
end
|
207
207
|
|
208
208
|
def by_family(family)
|
@@ -217,6 +217,10 @@ module RubyLLM
|
|
217
217
|
self.class.refresh!(remote_only: remote_only)
|
218
218
|
end
|
219
219
|
|
220
|
+
def resolve(model_id, provider: nil, assume_exists: false, config: nil)
|
221
|
+
self.class.resolve(model_id, provider: provider, assume_exists: assume_exists, config: config)
|
222
|
+
end
|
223
|
+
|
220
224
|
private
|
221
225
|
|
222
226
|
def find_with_provider(model_id, provider)
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RubyLLM
|
4
|
+
# Identify potentially harmful content in text.
|
5
|
+
# https://platform.openai.com/docs/guides/moderation
|
6
|
+
class Moderation
|
7
|
+
attr_reader :id, :model, :results
|
8
|
+
|
9
|
+
def initialize(id:, model:, results:)
|
10
|
+
@id = id
|
11
|
+
@model = model
|
12
|
+
@results = results
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.moderate(input,
|
16
|
+
model: nil,
|
17
|
+
provider: nil,
|
18
|
+
assume_model_exists: false,
|
19
|
+
context: nil)
|
20
|
+
config = context&.config || RubyLLM.config
|
21
|
+
model ||= config.default_moderation_model || 'omni-moderation-latest'
|
22
|
+
model, provider_instance = Models.resolve(model, provider: provider, assume_exists: assume_model_exists,
|
23
|
+
config: config)
|
24
|
+
model_id = model.id
|
25
|
+
|
26
|
+
provider_instance.moderate(input, model: model_id)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Convenience method to get content from moderation result
|
30
|
+
def content
|
31
|
+
results
|
32
|
+
end
|
33
|
+
|
34
|
+
# Check if any content was flagged
|
35
|
+
def flagged?
|
36
|
+
results.any? { |result| result['flagged'] }
|
37
|
+
end
|
38
|
+
|
39
|
+
# Get all flagged categories across all results
|
40
|
+
def flagged_categories
|
41
|
+
results.flat_map do |result|
|
42
|
+
result['categories']&.select { |_category, flagged| flagged }&.keys || []
|
43
|
+
end.uniq
|
44
|
+
end
|
45
|
+
|
46
|
+
# Get category scores for the first result (most common case)
|
47
|
+
def category_scores
|
48
|
+
results.first&.dig('category_scores') || {}
|
49
|
+
end
|
50
|
+
|
51
|
+
# Get categories for the first result (most common case)
|
52
|
+
def categories
|
53
|
+
results.first&.dig('categories') || {}
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
data/lib/ruby_llm/provider.rb
CHANGED
@@ -80,6 +80,12 @@ module RubyLLM
|
|
80
80
|
parse_image_response(response, model:)
|
81
81
|
end
|
82
82
|
|
83
|
+
def moderate(input, model:)
|
84
|
+
payload = render_moderation_payload(input, model:)
|
85
|
+
response = @connection.post moderation_url, payload
|
86
|
+
parse_moderation_response(response, model:)
|
87
|
+
end
|
88
|
+
|
83
89
|
def configured?
|
84
90
|
configuration_requirements.all? { |req| @config.send(req) }
|
85
91
|
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RubyLLM
|
4
|
+
module Providers
|
5
|
+
class OpenAI
|
6
|
+
# Moderation methods of the OpenAI API integration
|
7
|
+
module Moderation
|
8
|
+
module_function
|
9
|
+
|
10
|
+
def moderation_url
|
11
|
+
'moderations'
|
12
|
+
end
|
13
|
+
|
14
|
+
def render_moderation_payload(input, model:)
|
15
|
+
{
|
16
|
+
model: model,
|
17
|
+
input: input
|
18
|
+
}
|
19
|
+
end
|
20
|
+
|
21
|
+
def parse_moderation_response(response, model:)
|
22
|
+
data = response.body
|
23
|
+
raise Error.new(response, data.dig('error', 'message')) if data.dig('error', 'message')
|
24
|
+
|
25
|
+
RubyLLM::Moderation.new(
|
26
|
+
id: data['id'],
|
27
|
+
model: model,
|
28
|
+
results: data['results'] || []
|
29
|
+
)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,124 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RubyLLM
|
4
|
+
module Providers
|
5
|
+
class RedCandle
|
6
|
+
# Determines capabilities and pricing for RedCandle models
|
7
|
+
module Capabilities
|
8
|
+
module_function
|
9
|
+
|
10
|
+
def supports_vision?
|
11
|
+
false
|
12
|
+
end
|
13
|
+
|
14
|
+
def supports_functions?(_model_id = nil)
|
15
|
+
false
|
16
|
+
end
|
17
|
+
|
18
|
+
def supports_streaming?
|
19
|
+
true
|
20
|
+
end
|
21
|
+
|
22
|
+
def supports_structured_output?
|
23
|
+
true
|
24
|
+
end
|
25
|
+
|
26
|
+
def supports_regex_constraints?
|
27
|
+
true
|
28
|
+
end
|
29
|
+
|
30
|
+
def supports_embeddings?
|
31
|
+
false # Future enhancement - Red Candle does support embedding models
|
32
|
+
end
|
33
|
+
|
34
|
+
def supports_audio?
|
35
|
+
false
|
36
|
+
end
|
37
|
+
|
38
|
+
def supports_pdf?
|
39
|
+
false
|
40
|
+
end
|
41
|
+
|
42
|
+
def normalize_temperature(temperature, _model_id)
|
43
|
+
# Red Candle uses standard 0-2 range
|
44
|
+
return 0.7 if temperature.nil?
|
45
|
+
|
46
|
+
temperature = temperature.to_f
|
47
|
+
temperature.clamp(0.0, 2.0)
|
48
|
+
end
|
49
|
+
|
50
|
+
def model_context_window(model_id)
|
51
|
+
case model_id
|
52
|
+
when /gemma-3-4b/i
|
53
|
+
8192
|
54
|
+
when /qwen2\.5-1\.5b/i, /mistral-7b/i
|
55
|
+
32_768
|
56
|
+
when /tinyllama/i
|
57
|
+
2048
|
58
|
+
else
|
59
|
+
4096 # Conservative default
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def pricing
|
64
|
+
# Local execution - no API costs
|
65
|
+
{
|
66
|
+
input_tokens_per_dollar: Float::INFINITY,
|
67
|
+
output_tokens_per_dollar: Float::INFINITY,
|
68
|
+
input_price_per_million_tokens: 0.0,
|
69
|
+
output_price_per_million_tokens: 0.0
|
70
|
+
}
|
71
|
+
end
|
72
|
+
|
73
|
+
def default_max_tokens
|
74
|
+
512
|
75
|
+
end
|
76
|
+
|
77
|
+
def max_temperature
|
78
|
+
2.0
|
79
|
+
end
|
80
|
+
|
81
|
+
def min_temperature
|
82
|
+
0.0
|
83
|
+
end
|
84
|
+
|
85
|
+
def supports_temperature?
|
86
|
+
true
|
87
|
+
end
|
88
|
+
|
89
|
+
def supports_top_p?
|
90
|
+
true
|
91
|
+
end
|
92
|
+
|
93
|
+
def supports_top_k?
|
94
|
+
true
|
95
|
+
end
|
96
|
+
|
97
|
+
def supports_repetition_penalty?
|
98
|
+
true
|
99
|
+
end
|
100
|
+
|
101
|
+
def supports_seed?
|
102
|
+
true
|
103
|
+
end
|
104
|
+
|
105
|
+
def supports_stop_sequences?
|
106
|
+
true
|
107
|
+
end
|
108
|
+
|
109
|
+
def model_families
|
110
|
+
%w[gemma llama qwen2 mistral phi]
|
111
|
+
end
|
112
|
+
|
113
|
+
def available_on_platform?
|
114
|
+
# Check if Candle can be loaded
|
115
|
+
|
116
|
+
require 'candle'
|
117
|
+
true
|
118
|
+
rescue LoadError
|
119
|
+
false
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
@@ -0,0 +1,317 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RubyLLM
|
4
|
+
module Providers
|
5
|
+
class RedCandle
|
6
|
+
# Chat implementation for Red Candle provider
|
7
|
+
module Chat
|
8
|
+
# Override the base complete method to handle local execution
|
9
|
+
def complete(messages, tools:, temperature:, cache_prompts:, model:, params: {}, headers: {}, schema: nil, &) # rubocop:disable Metrics/ParameterLists
|
10
|
+
_ = headers # Interface compatibility
|
11
|
+
_ = cache_prompts # Interface compatibility
|
12
|
+
payload = Utils.deep_merge(
|
13
|
+
render_payload(
|
14
|
+
messages,
|
15
|
+
tools: tools,
|
16
|
+
temperature: temperature,
|
17
|
+
model: model,
|
18
|
+
stream: block_given?,
|
19
|
+
schema: schema
|
20
|
+
),
|
21
|
+
params
|
22
|
+
)
|
23
|
+
|
24
|
+
if block_given?
|
25
|
+
perform_streaming_completion!(payload, &)
|
26
|
+
else
|
27
|
+
result = perform_completion!(payload)
|
28
|
+
# Convert to Message object for compatibility
|
29
|
+
# Red Candle doesn't provide token counts by default, but we can estimate them
|
30
|
+
content = result[:content]
|
31
|
+
# Rough estimation: ~4 characters per token
|
32
|
+
estimated_output_tokens = (content.length / 4.0).round
|
33
|
+
estimated_input_tokens = estimate_input_tokens(payload[:messages])
|
34
|
+
|
35
|
+
Message.new(
|
36
|
+
role: result[:role].to_sym,
|
37
|
+
content: content,
|
38
|
+
model_id: model.id,
|
39
|
+
input_tokens: estimated_input_tokens,
|
40
|
+
output_tokens: estimated_output_tokens
|
41
|
+
)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def render_payload(messages, tools:, temperature:, model:, stream:, schema:) # rubocop:disable Metrics/ParameterLists
|
46
|
+
# Red Candle doesn't support tools
|
47
|
+
raise Error.new(nil, 'Red Candle provider does not support tool calling') if tools && !tools.empty?
|
48
|
+
|
49
|
+
{
|
50
|
+
messages: messages,
|
51
|
+
temperature: temperature,
|
52
|
+
model: model.id,
|
53
|
+
stream: stream,
|
54
|
+
schema: schema
|
55
|
+
}
|
56
|
+
end
|
57
|
+
|
58
|
+
def perform_completion!(payload)
|
59
|
+
model = ensure_model_loaded!(payload[:model])
|
60
|
+
messages = format_messages(payload[:messages])
|
61
|
+
|
62
|
+
# Apply chat template if available
|
63
|
+
prompt = if model.respond_to?(:apply_chat_template)
|
64
|
+
model.apply_chat_template(messages)
|
65
|
+
else
|
66
|
+
# Fallback to simple formatting
|
67
|
+
"#{messages.map { |m| "#{m[:role]}: #{m[:content]}" }.join("\n\n")}\n\nassistant:"
|
68
|
+
end
|
69
|
+
|
70
|
+
# Check context length
|
71
|
+
validate_context_length!(prompt, payload[:model])
|
72
|
+
|
73
|
+
# Configure generation
|
74
|
+
config_opts = {
|
75
|
+
temperature: payload[:temperature] || 0.7,
|
76
|
+
max_length: payload[:max_tokens] || 512
|
77
|
+
}
|
78
|
+
|
79
|
+
# Handle structured generation if schema provided
|
80
|
+
response = if payload[:schema]
|
81
|
+
generate_with_schema(model, prompt, payload[:schema], config_opts)
|
82
|
+
else
|
83
|
+
model.generate(
|
84
|
+
prompt,
|
85
|
+
config: ::Candle::GenerationConfig.balanced(**config_opts)
|
86
|
+
)
|
87
|
+
end
|
88
|
+
|
89
|
+
format_response(response, payload[:schema])
|
90
|
+
end
|
91
|
+
|
92
|
+
def perform_streaming_completion!(payload, &block)
|
93
|
+
model = ensure_model_loaded!(payload[:model])
|
94
|
+
messages = format_messages(payload[:messages])
|
95
|
+
|
96
|
+
# Apply chat template if available
|
97
|
+
prompt = if model.respond_to?(:apply_chat_template)
|
98
|
+
model.apply_chat_template(messages)
|
99
|
+
else
|
100
|
+
"#{messages.map { |m| "#{m[:role]}: #{m[:content]}" }.join("\n\n")}\n\nassistant:"
|
101
|
+
end
|
102
|
+
|
103
|
+
# Check context length
|
104
|
+
validate_context_length!(prompt, payload[:model])
|
105
|
+
|
106
|
+
# Configure generation
|
107
|
+
config = ::Candle::GenerationConfig.balanced(
|
108
|
+
temperature: payload[:temperature] || 0.7,
|
109
|
+
max_length: payload[:max_tokens] || 512
|
110
|
+
)
|
111
|
+
|
112
|
+
# Collect all streamed content
|
113
|
+
full_content = ''
|
114
|
+
|
115
|
+
# Stream tokens
|
116
|
+
model.generate_stream(prompt, config: config) do |token|
|
117
|
+
full_content += token
|
118
|
+
chunk = format_stream_chunk(token)
|
119
|
+
block.call(chunk)
|
120
|
+
end
|
121
|
+
|
122
|
+
# Send final chunk with empty content (indicates completion)
|
123
|
+
final_chunk = format_stream_chunk('')
|
124
|
+
block.call(final_chunk)
|
125
|
+
|
126
|
+
# Return a Message object with the complete response
|
127
|
+
estimated_output_tokens = (full_content.length / 4.0).round
|
128
|
+
estimated_input_tokens = estimate_input_tokens(payload[:messages])
|
129
|
+
|
130
|
+
Message.new(
|
131
|
+
role: :assistant,
|
132
|
+
content: full_content,
|
133
|
+
model_id: payload[:model],
|
134
|
+
input_tokens: estimated_input_tokens,
|
135
|
+
output_tokens: estimated_output_tokens
|
136
|
+
)
|
137
|
+
end
|
138
|
+
|
139
|
+
private
|
140
|
+
|
141
|
+
def ensure_model_loaded!(model_id)
|
142
|
+
@loaded_models[model_id] ||= load_model(model_id)
|
143
|
+
end
|
144
|
+
|
145
|
+
def model_options(model_id)
|
146
|
+
# Get GGUF file and tokenizer if this is a GGUF model
|
147
|
+
# Access the methods from the Models module which is included in the provider
|
148
|
+
options = { device: @device }
|
149
|
+
options[:gguf_file] = gguf_file_for(model_id) if respond_to?(:gguf_file_for)
|
150
|
+
options[:tokenizer] = tokenizer_for(model_id) if respond_to?(:tokenizer_for)
|
151
|
+
options
|
152
|
+
end
|
153
|
+
|
154
|
+
def load_model(model_id)
|
155
|
+
options = model_options(model_id)
|
156
|
+
::Candle::LLM.from_pretrained(model_id, **options)
|
157
|
+
rescue StandardError => e
|
158
|
+
if e.message.include?('Failed to find tokenizer')
|
159
|
+
raise Error.new(nil, token_error_message(e, options[:tokenizer]))
|
160
|
+
elsif e.message.include?('Failed to find model')
|
161
|
+
raise Error.new(nil, model_error_message(e, model_id))
|
162
|
+
else
|
163
|
+
raise Error.new(nil, "Failed to load model #{model_id}: #{e.message}")
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
def token_error_message(exception, tokenizer)
|
168
|
+
<<~ERROR_MESSAGE
|
169
|
+
Failed to load tokenizer '#{tokenizer}'. The tokenizer may not exist or require authentication.
|
170
|
+
Please verify the tokenizer exists at: https://huggingface.co/#{tokenizer}
|
171
|
+
And that you have accepted the terms of service for the tokenizer.
|
172
|
+
If it requires authentication, login with: huggingface-cli login
|
173
|
+
See https://github.com/scientist-labs/red-candle?tab=readme-ov-file#%EF%B8%8F-huggingface-login-warning
|
174
|
+
Original error: #{exception.message}"
|
175
|
+
ERROR_MESSAGE
|
176
|
+
end
|
177
|
+
|
178
|
+
def model_error_message(exception, model_id)
|
179
|
+
<<~ERROR_MESSAGE
|
180
|
+
Failed to load model #{model_id}: #{exception.message}
|
181
|
+
Please verify the model exists at: https://huggingface.co/#{model_id}
|
182
|
+
And that you have accepted the terms of service for the model.
|
183
|
+
If it requires authentication, login with: huggingface-cli login
|
184
|
+
See https://github.com/scientist-labs/red-candle?tab=readme-ov-file#%EF%B8%8F-huggingface-login-warning
|
185
|
+
Original error: #{exception.message}"
|
186
|
+
ERROR_MESSAGE
|
187
|
+
end
|
188
|
+
|
189
|
+
def format_messages(messages)
|
190
|
+
messages.map do |msg|
|
191
|
+
# Handle both hash and Message objects
|
192
|
+
if msg.is_a?(Message)
|
193
|
+
{
|
194
|
+
role: msg.role.to_s,
|
195
|
+
content: extract_message_content_from_object(msg)
|
196
|
+
}
|
197
|
+
else
|
198
|
+
{
|
199
|
+
role: msg[:role].to_s,
|
200
|
+
content: extract_message_content(msg)
|
201
|
+
}
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
def extract_message_content_from_object(message)
|
207
|
+
content = message.content
|
208
|
+
|
209
|
+
# Handle Content objects
|
210
|
+
if content.is_a?(Content)
|
211
|
+
# Extract text from Content object, including attachment text
|
212
|
+
handle_content_object(content)
|
213
|
+
elsif content.is_a?(String)
|
214
|
+
content
|
215
|
+
else
|
216
|
+
content.to_s
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
def extract_message_content(message)
|
221
|
+
content = message[:content]
|
222
|
+
|
223
|
+
# Handle Content objects
|
224
|
+
case content
|
225
|
+
when Content
|
226
|
+
# Extract text from Content object
|
227
|
+
handle_content_object(content)
|
228
|
+
when String
|
229
|
+
content
|
230
|
+
when Array
|
231
|
+
# Handle array content (e.g., with images)
|
232
|
+
content.filter_map { |part| part[:text] if part[:type] == 'text' }.join(' ')
|
233
|
+
else
|
234
|
+
content.to_s
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
def handle_content_object(content)
|
239
|
+
text_parts = []
|
240
|
+
text_parts << content.text if content.text
|
241
|
+
|
242
|
+
# Add any text from attachments
|
243
|
+
content.attachments&.each do |attachment|
|
244
|
+
text_parts << attachment.data if attachment.respond_to?(:data) && attachment.data.is_a?(String)
|
245
|
+
end
|
246
|
+
|
247
|
+
text_parts.join(' ')
|
248
|
+
end
|
249
|
+
|
250
|
+
def generate_with_schema(model, prompt, schema, config_opts)
|
251
|
+
model.generate_structured(
|
252
|
+
prompt,
|
253
|
+
schema: schema,
|
254
|
+
**config_opts
|
255
|
+
)
|
256
|
+
rescue StandardError => e
|
257
|
+
RubyLLM.logger.warn "Structured generation failed: #{e.message}. Falling back to regular generation."
|
258
|
+
model.generate(
|
259
|
+
prompt,
|
260
|
+
config: ::Candle::GenerationConfig.balanced(**config_opts)
|
261
|
+
)
|
262
|
+
end
|
263
|
+
|
264
|
+
def format_response(response, schema)
|
265
|
+
content = if schema && !response.is_a?(String)
|
266
|
+
# Structured response
|
267
|
+
JSON.generate(response)
|
268
|
+
else
|
269
|
+
response
|
270
|
+
end
|
271
|
+
|
272
|
+
{
|
273
|
+
content: content,
|
274
|
+
role: 'assistant'
|
275
|
+
}
|
276
|
+
end
|
277
|
+
|
278
|
+
def format_stream_chunk(token)
|
279
|
+
# Return a Chunk object for streaming compatibility
|
280
|
+
Chunk.new(
|
281
|
+
role: :assistant,
|
282
|
+
content: token
|
283
|
+
)
|
284
|
+
end
|
285
|
+
|
286
|
+
def estimate_input_tokens(messages)
|
287
|
+
# Rough estimation: ~4 characters per token
|
288
|
+
formatted = format_messages(messages)
|
289
|
+
total_chars = formatted.sum { |msg| "#{msg[:role]}: #{msg[:content]}".length }
|
290
|
+
(total_chars / 4.0).round
|
291
|
+
end
|
292
|
+
|
293
|
+
def validate_context_length!(prompt, model_id)
|
294
|
+
# Get the context window for this model
|
295
|
+
context_window = if respond_to?(:model_context_window)
|
296
|
+
model_context_window(model_id)
|
297
|
+
else
|
298
|
+
4096 # Conservative default
|
299
|
+
end
|
300
|
+
|
301
|
+
# Estimate tokens in prompt (~4 characters per token)
|
302
|
+
estimated_tokens = (prompt.length / 4.0).round
|
303
|
+
|
304
|
+
# Check if prompt exceeds context window (leave some room for response)
|
305
|
+
max_input_tokens = context_window - 512 # Reserve 512 tokens for response
|
306
|
+
return unless estimated_tokens > max_input_tokens
|
307
|
+
|
308
|
+
raise Error.new(
|
309
|
+
nil,
|
310
|
+
"Context length exceeded. Estimated #{estimated_tokens} tokens, " \
|
311
|
+
"but model #{model_id} has a context window of #{context_window} tokens."
|
312
|
+
)
|
313
|
+
end
|
314
|
+
end
|
315
|
+
end
|
316
|
+
end
|
317
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RubyLLM
|
4
|
+
module Providers
|
5
|
+
class RedCandle
|
6
|
+
# Models methods of the RedCandle integration
|
7
|
+
module Models
|
8
|
+
# TODO: red-candle supports more models, but let's start with some well tested ones.
|
9
|
+
SUPPORTED_MODELS = [
|
10
|
+
{
|
11
|
+
id: 'google/gemma-3-4b-it-qat-q4_0-gguf',
|
12
|
+
name: 'Gemma 3 4B Instruct (Quantized)',
|
13
|
+
gguf_file: 'gemma-3-4b-it-q4_0.gguf',
|
14
|
+
tokenizer: 'google/gemma-3-4b-it', # Tokenizer from base model
|
15
|
+
context_window: 8192,
|
16
|
+
family: 'gemma',
|
17
|
+
architecture: 'gemma2',
|
18
|
+
supports_chat: true,
|
19
|
+
supports_structured: true
|
20
|
+
},
|
21
|
+
{
|
22
|
+
id: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF',
|
23
|
+
name: 'TinyLlama 1.1B Chat (Quantized)',
|
24
|
+
gguf_file: 'tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf',
|
25
|
+
context_window: 2048,
|
26
|
+
family: 'llama',
|
27
|
+
architecture: 'llama',
|
28
|
+
supports_chat: true,
|
29
|
+
supports_structured: true
|
30
|
+
},
|
31
|
+
{
|
32
|
+
id: 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF',
|
33
|
+
name: 'Mistral 7B Instruct v0.2 (Quantized)',
|
34
|
+
gguf_file: 'mistral-7b-instruct-v0.2.Q4_K_M.gguf',
|
35
|
+
tokenizer: 'mistralai/Mistral-7B-Instruct-v0.2',
|
36
|
+
context_window: 32_768,
|
37
|
+
family: 'mistral',
|
38
|
+
architecture: 'mistral',
|
39
|
+
supports_chat: true,
|
40
|
+
supports_structured: true
|
41
|
+
},
|
42
|
+
{
|
43
|
+
id: 'Qwen/Qwen2.5-1.5B-Instruct-GGUF',
|
44
|
+
name: 'Qwen 2.1.5B Instruct (Quantized)',
|
45
|
+
gguf_file: 'qwen2.5-1.5b-instruct-q4_k_m.gguf',
|
46
|
+
context_window: 32_768,
|
47
|
+
family: 'qwen2',
|
48
|
+
architecture: 'qwen2',
|
49
|
+
supports_chat: true,
|
50
|
+
supports_structured: true
|
51
|
+
},
|
52
|
+
{
|
53
|
+
id: 'microsoft/Phi-3-mini-4k-instruct',
|
54
|
+
name: 'Phi 3',
|
55
|
+
context_window: 4096,
|
56
|
+
family: 'phi',
|
57
|
+
architecture: 'phi',
|
58
|
+
supports_chat: true,
|
59
|
+
supports_structured: true
|
60
|
+
}
|
61
|
+
].freeze
|
62
|
+
|
63
|
+
def list_models
|
64
|
+
SUPPORTED_MODELS.map do |model_data|
|
65
|
+
Model::Info.new(
|
66
|
+
id: model_data[:id],
|
67
|
+
name: model_data[:name],
|
68
|
+
provider: slug,
|
69
|
+
family: model_data[:family],
|
70
|
+
context_window: model_data[:context_window],
|
71
|
+
capabilities: %w[streaming structured_output],
|
72
|
+
modalities: { input: %w[text], output: %w[text] }
|
73
|
+
)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def models
|
78
|
+
@models ||= list_models
|
79
|
+
end
|
80
|
+
|
81
|
+
def model(id)
|
82
|
+
models.find { |m| m.id == id } ||
|
83
|
+
raise(Error.new(nil,
|
84
|
+
"Model #{id} not found in Red Candle provider. Available models: #{model_ids.join(', ')}"))
|
85
|
+
end
|
86
|
+
|
87
|
+
def model_available?(id)
|
88
|
+
SUPPORTED_MODELS.any? { |m| m[:id] == id }
|
89
|
+
end
|
90
|
+
|
91
|
+
def model_ids
|
92
|
+
SUPPORTED_MODELS.map { |m| m[:id] }
|
93
|
+
end
|
94
|
+
|
95
|
+
def model_info(id)
|
96
|
+
SUPPORTED_MODELS.find { |m| m[:id] == id }
|
97
|
+
end
|
98
|
+
|
99
|
+
def supports_chat?(model_id)
|
100
|
+
info = model_info(model_id)
|
101
|
+
info ? info[:supports_chat] : false
|
102
|
+
end
|
103
|
+
|
104
|
+
def supports_structured?(model_id)
|
105
|
+
info = model_info(model_id)
|
106
|
+
info ? info[:supports_structured] : false
|
107
|
+
end
|
108
|
+
|
109
|
+
def gguf_file_for(model_id)
|
110
|
+
info = model_info(model_id)
|
111
|
+
info ? info[:gguf_file] : nil
|
112
|
+
end
|
113
|
+
|
114
|
+
def tokenizer_for(model_id)
|
115
|
+
info = model_info(model_id)
|
116
|
+
info ? info[:tokenizer] : nil
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|