red-candle 1.0.2 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +244 -6
- data/README.md +38 -3
- data/Rakefile +46 -1
- data/ext/candle/Cargo.toml +2 -0
- data/ext/candle/src/lib.rs +2 -0
- data/ext/candle/src/llm/constrained_generation_test.rs +316 -0
- data/ext/candle/src/llm/gemma.rs +21 -5
- data/ext/candle/src/llm/generation_config.rs +11 -0
- data/ext/candle/src/llm/llama.rs +21 -5
- data/ext/candle/src/llm/mistral.rs +21 -5
- data/ext/candle/src/llm/mod.rs +5 -0
- data/ext/candle/src/llm/phi.rs +301 -0
- data/ext/candle/src/llm/quantized_gguf.rs +173 -9
- data/ext/candle/src/llm/qwen.rs +245 -0
- data/ext/candle/src/llm/text_generation.rs +183 -26
- data/ext/candle/src/ner.rs +25 -51
- data/ext/candle/src/reranker.rs +41 -68
- data/ext/candle/src/ruby/device.rs +5 -0
- data/ext/candle/src/ruby/llm.rs +119 -55
- data/ext/candle/src/ruby/mod.rs +1 -0
- data/ext/candle/src/ruby/structured.rs +47 -0
- data/ext/candle/src/structured/integration_test.rs +130 -0
- data/ext/candle/src/structured/mod.rs +31 -0
- data/ext/candle/src/structured/schema_processor.rs +215 -0
- data/ext/candle/src/structured/vocabulary_adapter.rs +152 -0
- data/ext/candle/src/structured/vocabulary_adapter_real_test.rs +66 -0
- data/ext/candle/src/structured/vocabulary_adapter_simple_test.rs +70 -0
- data/lib/candle/llm.rb +203 -2
- data/lib/candle/version.rb +1 -1
- metadata +14 -4
data/lib/candle/llm.rb
CHANGED
@@ -1,5 +1,87 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
1
3
|
module Candle
|
2
4
|
class LLM
|
5
|
+
# Cache for EOS token to avoid repeated calls
|
6
|
+
def cached_eos_token
|
7
|
+
@cached_eos_token ||= begin
|
8
|
+
if respond_to?(:eos_token)
|
9
|
+
eos_token rescue nil
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
# Get model-specific EOS tokens
|
15
|
+
def model_eos_tokens
|
16
|
+
@model_eos_tokens ||= begin
|
17
|
+
tokens = []
|
18
|
+
if model_eos = cached_eos_token
|
19
|
+
tokens << model_eos
|
20
|
+
# For Gemma, also include end_of_turn for chat scenarios and </s>
|
21
|
+
# Even though </s> is technically an HTML tag in Gemma's vocabulary,
|
22
|
+
# it seems to use it as a generation boundary in practice
|
23
|
+
if model_name.downcase.include?("gemma")
|
24
|
+
tokens << "<end_of_turn>"
|
25
|
+
tokens << "</s>"
|
26
|
+
end
|
27
|
+
else
|
28
|
+
# Fallback to common tokens only if model doesn't provide one
|
29
|
+
tokens = ["</s>", "<|endoftext|>", "<|im_end|>", "<end>"]
|
30
|
+
end
|
31
|
+
tokens.uniq
|
32
|
+
end
|
33
|
+
end
|
34
|
+
# Create a structured constraint from a JSON schema
|
35
|
+
def constraint_from_schema(schema)
|
36
|
+
schema_str = schema.is_a?(String) ? schema : JSON.generate(schema)
|
37
|
+
StructuredConstraint.from_schema(schema_str, tokenizer)
|
38
|
+
end
|
39
|
+
|
40
|
+
# Create a structured constraint from a regex pattern
|
41
|
+
def constraint_from_regex(pattern)
|
42
|
+
pattern_str = pattern.is_a?(Regexp) ? pattern.source : pattern.to_s
|
43
|
+
StructuredConstraint.from_regex(pattern_str, tokenizer)
|
44
|
+
end
|
45
|
+
|
46
|
+
# Generate with regex constraint
|
47
|
+
def generate_regex(prompt, pattern:, stop_on_match: true, **options)
|
48
|
+
constraint = constraint_from_regex(pattern)
|
49
|
+
|
50
|
+
# Configure generation with early stopping by default
|
51
|
+
config_opts = options.merge(
|
52
|
+
constraint: constraint,
|
53
|
+
stop_on_constraint_satisfaction: options.fetch(:stop_on_constraint_satisfaction, stop_on_match),
|
54
|
+
stop_on_match: stop_on_match
|
55
|
+
)
|
56
|
+
config = options[:config] || GenerationConfig.balanced(**config_opts)
|
57
|
+
|
58
|
+
generate(prompt, config: config, reset_cache: options.fetch(:reset_cache, true))
|
59
|
+
end
|
60
|
+
|
61
|
+
# Generate and parse structured output from a JSON schema
|
62
|
+
def generate_structured(prompt, schema:, **options)
|
63
|
+
constraint = constraint_from_schema(schema)
|
64
|
+
|
65
|
+
# Configure generation with early stopping by default
|
66
|
+
config_opts = options.merge(
|
67
|
+
constraint: constraint,
|
68
|
+
stop_on_constraint_satisfaction: options.fetch(:stop_on_constraint_satisfaction, true)
|
69
|
+
)
|
70
|
+
config = options[:config] || GenerationConfig.balanced(**config_opts)
|
71
|
+
|
72
|
+
result = generate(prompt, config: config, reset_cache: options.fetch(:reset_cache, true))
|
73
|
+
|
74
|
+
# Try to parse as JSON
|
75
|
+
begin
|
76
|
+
# First, try to extract JSON if there's content after stop tokens
|
77
|
+
json_content = extract_json_content(result)
|
78
|
+
JSON.parse(json_content)
|
79
|
+
rescue JSON::ParserError => e
|
80
|
+
# Return the raw string if parsing fails
|
81
|
+
warn "Warning: Generated output is not valid JSON: #{e.message}" if options[:warn_on_parse_error]
|
82
|
+
result
|
83
|
+
end
|
84
|
+
end
|
3
85
|
# Tokenizer registry for automatic detection
|
4
86
|
TOKENIZER_REGISTRY = {
|
5
87
|
# Exact model matches
|
@@ -8,6 +90,18 @@ module Candle
|
|
8
90
|
"TheBloke/Llama-2-7B-Chat-GGUF" => "meta-llama/Llama-2-7b-chat-hf",
|
9
91
|
"TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" => "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
10
92
|
|
93
|
+
# Qwen official GGUF models
|
94
|
+
"Qwen/Qwen3-8B-GGUF" => "Qwen/Qwen3-8B",
|
95
|
+
"Qwen/Qwen3-4B-GGUF" => "Qwen/Qwen3-4B",
|
96
|
+
"Qwen/Qwen3-14B-GGUF" => "Qwen/Qwen3-14B",
|
97
|
+
"Qwen/Qwen3-32B-GGUF" => "Qwen/Qwen3-32B",
|
98
|
+
"Qwen/Qwen3-72B-GGUF" => "Qwen/Qwen3-72B",
|
99
|
+
|
100
|
+
# Phi GGUF models
|
101
|
+
"TheBloke/phi-2-GGUF" => "microsoft/phi-2",
|
102
|
+
"microsoft/phi-4-gguf" => "microsoft/phi-4",
|
103
|
+
"bartowski/Phi-3.5-mini-instruct-GGUF" => "microsoft/Phi-3.5-mini-instruct",
|
104
|
+
|
11
105
|
# Pattern-based fallbacks (evaluated in order)
|
12
106
|
:patterns => [
|
13
107
|
# Mistral models
|
@@ -27,7 +121,31 @@ module Candle
|
|
27
121
|
[/gemma.*?2.*?9b/i, "google/gemma-2-9b"],
|
28
122
|
[/gemma.*?2.*?2b/i, "google/gemma-2-2b"],
|
29
123
|
[/gemma.*?7b/i, "google/gemma-7b"],
|
30
|
-
[/gemma.*?2b/i, "google/gemma-2b"]
|
124
|
+
[/gemma.*?2b/i, "google/gemma-2b"],
|
125
|
+
|
126
|
+
# Qwen models
|
127
|
+
[/qwen.*?3.*?72b/i, "Qwen/Qwen3-72B"],
|
128
|
+
[/qwen.*?3.*?32b/i, "Qwen/Qwen3-32B"],
|
129
|
+
[/qwen.*?3.*?14b/i, "Qwen/Qwen3-14B"],
|
130
|
+
[/qwen.*?3.*?8b/i, "Qwen/Qwen3-8B"],
|
131
|
+
[/qwen.*?3.*?4b/i, "Qwen/Qwen3-4B"],
|
132
|
+
[/qwen.*?3.*?1\.8b/i, "Qwen/Qwen3-1.8B"],
|
133
|
+
[/qwen.*?3.*?0\.5b/i, "Qwen/Qwen3-0.5B"],
|
134
|
+
[/qwen.*?2\.5/i, "Qwen/Qwen2.5-0.5B"],
|
135
|
+
[/qwen.*?2/i, "Qwen/Qwen2-1.5B"],
|
136
|
+
[/qwen/i, "Qwen/Qwen-1_8B"],
|
137
|
+
|
138
|
+
# Phi models (order matters - more specific patterns first)
|
139
|
+
[/phi.*?3\.5.*?mini/i, "microsoft/Phi-3.5-mini-instruct"],
|
140
|
+
[/phi.*?3.*?mini.*?4k/i, "microsoft/Phi-3-mini-4k-instruct"],
|
141
|
+
[/phi.*?3.*?medium/i, "microsoft/Phi-3-medium-4k-instruct"],
|
142
|
+
[/phi.*?3.*?small/i, "microsoft/Phi-3-small-8k-instruct"],
|
143
|
+
[/phi.*?3.*?mini/i, "microsoft/Phi-3-mini-4k-instruct"],
|
144
|
+
[/phi.*?3/i, "microsoft/Phi-3-mini-4k-instruct"],
|
145
|
+
[/phi-4/i, "microsoft/phi-4"],
|
146
|
+
[/phi.*?2/i, "microsoft/phi-2"],
|
147
|
+
[/phi.*?1\.5/i, "microsoft/phi-1_5"],
|
148
|
+
[/phi/i, "microsoft/phi-2"]
|
31
149
|
]
|
32
150
|
}
|
33
151
|
|
@@ -123,6 +241,88 @@ module Candle
|
|
123
241
|
|
124
242
|
private
|
125
243
|
|
244
|
+
# Extract JSON content from generated text, handling stop tokens and extra content
|
245
|
+
def extract_json_content(text)
|
246
|
+
# Remove any content after common stop tokens
|
247
|
+
cleaned = text
|
248
|
+
|
249
|
+
# Check for EOS tokens and truncate at the first one found
|
250
|
+
model_eos_tokens.each do |token|
|
251
|
+
if idx = cleaned.index(token)
|
252
|
+
cleaned = cleaned[0...idx]
|
253
|
+
end
|
254
|
+
end
|
255
|
+
|
256
|
+
# Try to find valid JSON boundaries
|
257
|
+
# First try a simple approach - find the first { or [ and match to its closing } or ]
|
258
|
+
start_idx = cleaned.index(/[\{\[]/)
|
259
|
+
return cleaned.strip unless start_idx
|
260
|
+
|
261
|
+
# Extract from the start position
|
262
|
+
json_candidate = cleaned[start_idx..-1]
|
263
|
+
|
264
|
+
# Try to find a valid JSON object or array
|
265
|
+
# This regex handles nested structures better
|
266
|
+
if json_candidate[0] == '{'
|
267
|
+
# Match a JSON object
|
268
|
+
bracket_count = 0
|
269
|
+
in_string = false
|
270
|
+
escape_next = false
|
271
|
+
|
272
|
+
json_candidate.chars.each_with_index do |char, idx|
|
273
|
+
if !in_string
|
274
|
+
case char
|
275
|
+
when '{'
|
276
|
+
bracket_count += 1
|
277
|
+
when '}'
|
278
|
+
bracket_count -= 1
|
279
|
+
if bracket_count == 0
|
280
|
+
return json_candidate[0..idx]
|
281
|
+
end
|
282
|
+
when '"'
|
283
|
+
in_string = true unless escape_next
|
284
|
+
end
|
285
|
+
else
|
286
|
+
if char == '"' && !escape_next
|
287
|
+
in_string = false
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
escape_next = (!escape_next && char == '\\')
|
292
|
+
end
|
293
|
+
elsif json_candidate[0] == '['
|
294
|
+
# Match a JSON array (similar logic)
|
295
|
+
bracket_count = 0
|
296
|
+
in_string = false
|
297
|
+
escape_next = false
|
298
|
+
|
299
|
+
json_candidate.chars.each_with_index do |char, idx|
|
300
|
+
if !in_string
|
301
|
+
case char
|
302
|
+
when '['
|
303
|
+
bracket_count += 1
|
304
|
+
when ']'
|
305
|
+
bracket_count -= 1
|
306
|
+
if bracket_count == 0
|
307
|
+
return json_candidate[0..idx]
|
308
|
+
end
|
309
|
+
when '"'
|
310
|
+
in_string = true unless escape_next
|
311
|
+
end
|
312
|
+
else
|
313
|
+
if char == '"' && !escape_next
|
314
|
+
in_string = false
|
315
|
+
end
|
316
|
+
end
|
317
|
+
|
318
|
+
escape_next = (!escape_next && char == '\\')
|
319
|
+
end
|
320
|
+
end
|
321
|
+
|
322
|
+
# If no valid JSON structure found, return the cleaned string
|
323
|
+
cleaned.strip
|
324
|
+
end
|
325
|
+
|
126
326
|
# Legacy format messages method - kept for backward compatibility
|
127
327
|
# Use apply_chat_template for proper model-specific formatting
|
128
328
|
def format_messages(messages)
|
@@ -155,7 +355,8 @@ module Candle
|
|
155
355
|
repetition_penalty: repetition_penalty,
|
156
356
|
seed: seed,
|
157
357
|
stop_sequences: stop_sequences,
|
158
|
-
include_prompt: include_prompt
|
358
|
+
include_prompt: include_prompt,
|
359
|
+
constraint: defined?(@constraint) ? @constraint : nil
|
159
360
|
}.compact
|
160
361
|
|
161
362
|
self.class.new(current_config.merge(overrides))
|
data/lib/candle/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red-candle
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Christopher Petersen
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2025-07-
|
12
|
+
date: 2025-07-28 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rb_sys
|
@@ -159,12 +159,15 @@ files:
|
|
159
159
|
- ext/candle/extconf.rb
|
160
160
|
- ext/candle/rustfmt.toml
|
161
161
|
- ext/candle/src/lib.rs
|
162
|
+
- ext/candle/src/llm/constrained_generation_test.rs
|
162
163
|
- ext/candle/src/llm/gemma.rs
|
163
164
|
- ext/candle/src/llm/generation_config.rs
|
164
165
|
- ext/candle/src/llm/llama.rs
|
165
166
|
- ext/candle/src/llm/mistral.rs
|
166
167
|
- ext/candle/src/llm/mod.rs
|
168
|
+
- ext/candle/src/llm/phi.rs
|
167
169
|
- ext/candle/src/llm/quantized_gguf.rs
|
170
|
+
- ext/candle/src/llm/qwen.rs
|
168
171
|
- ext/candle/src/llm/text_generation.rs
|
169
172
|
- ext/candle/src/ner.rs
|
170
173
|
- ext/candle/src/reranker.rs
|
@@ -175,9 +178,16 @@ files:
|
|
175
178
|
- ext/candle/src/ruby/llm.rs
|
176
179
|
- ext/candle/src/ruby/mod.rs
|
177
180
|
- ext/candle/src/ruby/result.rs
|
181
|
+
- ext/candle/src/ruby/structured.rs
|
178
182
|
- ext/candle/src/ruby/tensor.rs
|
179
183
|
- ext/candle/src/ruby/tokenizer.rs
|
180
184
|
- ext/candle/src/ruby/utils.rs
|
185
|
+
- ext/candle/src/structured/integration_test.rs
|
186
|
+
- ext/candle/src/structured/mod.rs
|
187
|
+
- ext/candle/src/structured/schema_processor.rs
|
188
|
+
- ext/candle/src/structured/vocabulary_adapter.rs
|
189
|
+
- ext/candle/src/structured/vocabulary_adapter_real_test.rs
|
190
|
+
- ext/candle/src/structured/vocabulary_adapter_simple_test.rs
|
181
191
|
- ext/candle/src/tokenizer/loader.rs
|
182
192
|
- ext/candle/src/tokenizer/mod.rs
|
183
193
|
- ext/candle/target/release/build/bindgen-0f89ba23b9ca1395/out/host-target.txt
|
@@ -210,14 +220,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
210
220
|
requirements:
|
211
221
|
- - ">="
|
212
222
|
- !ruby/object:Gem::Version
|
213
|
-
version: 2.
|
223
|
+
version: 3.2.0
|
214
224
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
215
225
|
requirements:
|
216
226
|
- - ">="
|
217
227
|
- !ruby/object:Gem::Version
|
218
228
|
version: 3.3.26
|
219
229
|
requirements:
|
220
|
-
- Rust >= 1.
|
230
|
+
- Rust >= 1.85
|
221
231
|
rubygems_version: 3.5.3
|
222
232
|
signing_key:
|
223
233
|
specification_version: 4
|