red-candle 1.0.2 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/candle/llm.rb CHANGED
@@ -1,5 +1,87 @@
1
+ require 'json'
2
+
1
3
  module Candle
2
4
  class LLM
5
+ # Cache for EOS token to avoid repeated calls
6
+ def cached_eos_token
7
+ @cached_eos_token ||= begin
8
+ if respond_to?(:eos_token)
9
+ eos_token rescue nil
10
+ end
11
+ end
12
+ end
13
+
14
+ # Get model-specific EOS tokens
15
+ def model_eos_tokens
16
+ @model_eos_tokens ||= begin
17
+ tokens = []
18
+ if model_eos = cached_eos_token
19
+ tokens << model_eos
20
+ # For Gemma, also include end_of_turn for chat scenarios and </s>
21
+ # Even though </s> is technically an HTML tag in Gemma's vocabulary,
22
+ # it seems to use it as a generation boundary in practice
23
+ if model_name.downcase.include?("gemma")
24
+ tokens << "<end_of_turn>"
25
+ tokens << "</s>"
26
+ end
27
+ else
28
+ # Fallback to common tokens only if model doesn't provide one
29
+ tokens = ["</s>", "<|endoftext|>", "<|im_end|>", "<end>"]
30
+ end
31
+ tokens.uniq
32
+ end
33
+ end
34
+ # Create a structured constraint from a JSON schema
35
+ def constraint_from_schema(schema)
36
+ schema_str = schema.is_a?(String) ? schema : JSON.generate(schema)
37
+ StructuredConstraint.from_schema(schema_str, tokenizer)
38
+ end
39
+
40
+ # Create a structured constraint from a regex pattern
41
+ def constraint_from_regex(pattern)
42
+ pattern_str = pattern.is_a?(Regexp) ? pattern.source : pattern.to_s
43
+ StructuredConstraint.from_regex(pattern_str, tokenizer)
44
+ end
45
+
46
+ # Generate with regex constraint
47
+ def generate_regex(prompt, pattern:, stop_on_match: true, **options)
48
+ constraint = constraint_from_regex(pattern)
49
+
50
+ # Configure generation with early stopping by default
51
+ config_opts = options.merge(
52
+ constraint: constraint,
53
+ stop_on_constraint_satisfaction: options.fetch(:stop_on_constraint_satisfaction, stop_on_match),
54
+ stop_on_match: stop_on_match
55
+ )
56
+ config = options[:config] || GenerationConfig.balanced(**config_opts)
57
+
58
+ generate(prompt, config: config, reset_cache: options.fetch(:reset_cache, true))
59
+ end
60
+
61
+ # Generate and parse structured output from a JSON schema
62
+ def generate_structured(prompt, schema:, **options)
63
+ constraint = constraint_from_schema(schema)
64
+
65
+ # Configure generation with early stopping by default
66
+ config_opts = options.merge(
67
+ constraint: constraint,
68
+ stop_on_constraint_satisfaction: options.fetch(:stop_on_constraint_satisfaction, true)
69
+ )
70
+ config = options[:config] || GenerationConfig.balanced(**config_opts)
71
+
72
+ result = generate(prompt, config: config, reset_cache: options.fetch(:reset_cache, true))
73
+
74
+ # Try to parse as JSON
75
+ begin
76
+ # First, try to extract JSON if there's content after stop tokens
77
+ json_content = extract_json_content(result)
78
+ JSON.parse(json_content)
79
+ rescue JSON::ParserError => e
80
+ # Return the raw string if parsing fails
81
+ warn "Warning: Generated output is not valid JSON: #{e.message}" if options[:warn_on_parse_error]
82
+ result
83
+ end
84
+ end
3
85
  # Tokenizer registry for automatic detection
4
86
  TOKENIZER_REGISTRY = {
5
87
  # Exact model matches
@@ -8,6 +90,18 @@ module Candle
8
90
  "TheBloke/Llama-2-7B-Chat-GGUF" => "meta-llama/Llama-2-7b-chat-hf",
9
91
  "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" => "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
10
92
 
93
+ # Qwen official GGUF models
94
+ "Qwen/Qwen3-8B-GGUF" => "Qwen/Qwen3-8B",
95
+ "Qwen/Qwen3-4B-GGUF" => "Qwen/Qwen3-4B",
96
+ "Qwen/Qwen3-14B-GGUF" => "Qwen/Qwen3-14B",
97
+ "Qwen/Qwen3-32B-GGUF" => "Qwen/Qwen3-32B",
98
+ "Qwen/Qwen3-72B-GGUF" => "Qwen/Qwen3-72B",
99
+
100
+ # Phi GGUF models
101
+ "TheBloke/phi-2-GGUF" => "microsoft/phi-2",
102
+ "microsoft/phi-4-gguf" => "microsoft/phi-4",
103
+ "bartowski/Phi-3.5-mini-instruct-GGUF" => "microsoft/Phi-3.5-mini-instruct",
104
+
11
105
  # Pattern-based fallbacks (evaluated in order)
12
106
  :patterns => [
13
107
  # Mistral models
@@ -27,7 +121,31 @@ module Candle
27
121
  [/gemma.*?2.*?9b/i, "google/gemma-2-9b"],
28
122
  [/gemma.*?2.*?2b/i, "google/gemma-2-2b"],
29
123
  [/gemma.*?7b/i, "google/gemma-7b"],
30
- [/gemma.*?2b/i, "google/gemma-2b"]
124
+ [/gemma.*?2b/i, "google/gemma-2b"],
125
+
126
+ # Qwen models
127
+ [/qwen.*?3.*?72b/i, "Qwen/Qwen3-72B"],
128
+ [/qwen.*?3.*?32b/i, "Qwen/Qwen3-32B"],
129
+ [/qwen.*?3.*?14b/i, "Qwen/Qwen3-14B"],
130
+ [/qwen.*?3.*?8b/i, "Qwen/Qwen3-8B"],
131
+ [/qwen.*?3.*?4b/i, "Qwen/Qwen3-4B"],
132
+ [/qwen.*?3.*?1\.8b/i, "Qwen/Qwen3-1.8B"],
133
+ [/qwen.*?3.*?0\.5b/i, "Qwen/Qwen3-0.5B"],
134
+ [/qwen.*?2\.5/i, "Qwen/Qwen2.5-0.5B"],
135
+ [/qwen.*?2/i, "Qwen/Qwen2-1.5B"],
136
+ [/qwen/i, "Qwen/Qwen-1_8B"],
137
+
138
+ # Phi models (order matters - more specific patterns first)
139
+ [/phi.*?3\.5.*?mini/i, "microsoft/Phi-3.5-mini-instruct"],
140
+ [/phi.*?3.*?mini.*?4k/i, "microsoft/Phi-3-mini-4k-instruct"],
141
+ [/phi.*?3.*?medium/i, "microsoft/Phi-3-medium-4k-instruct"],
142
+ [/phi.*?3.*?small/i, "microsoft/Phi-3-small-8k-instruct"],
143
+ [/phi.*?3.*?mini/i, "microsoft/Phi-3-mini-4k-instruct"],
144
+ [/phi.*?3/i, "microsoft/Phi-3-mini-4k-instruct"],
145
+ [/phi-4/i, "microsoft/phi-4"],
146
+ [/phi.*?2/i, "microsoft/phi-2"],
147
+ [/phi.*?1\.5/i, "microsoft/phi-1_5"],
148
+ [/phi/i, "microsoft/phi-2"]
31
149
  ]
32
150
  }
33
151
 
@@ -123,6 +241,88 @@ module Candle
123
241
 
124
242
  private
125
243
 
244
+ # Extract JSON content from generated text, handling stop tokens and extra content
245
+ def extract_json_content(text)
246
+ # Remove any content after common stop tokens
247
+ cleaned = text
248
+
249
+ # Check for EOS tokens and truncate at the first one found
250
+ model_eos_tokens.each do |token|
251
+ if idx = cleaned.index(token)
252
+ cleaned = cleaned[0...idx]
253
+ end
254
+ end
255
+
256
+ # Try to find valid JSON boundaries
257
+ # First try a simple approach - find the first { or [ and match to its closing } or ]
258
+ start_idx = cleaned.index(/[\{\[]/)
259
+ return cleaned.strip unless start_idx
260
+
261
+ # Extract from the start position
262
+ json_candidate = cleaned[start_idx..-1]
263
+
264
+ # Try to find a valid JSON object or array
265
+ # This regex handles nested structures better
266
+ if json_candidate[0] == '{'
267
+ # Match a JSON object
268
+ bracket_count = 0
269
+ in_string = false
270
+ escape_next = false
271
+
272
+ json_candidate.chars.each_with_index do |char, idx|
273
+ if !in_string
274
+ case char
275
+ when '{'
276
+ bracket_count += 1
277
+ when '}'
278
+ bracket_count -= 1
279
+ if bracket_count == 0
280
+ return json_candidate[0..idx]
281
+ end
282
+ when '"'
283
+ in_string = true unless escape_next
284
+ end
285
+ else
286
+ if char == '"' && !escape_next
287
+ in_string = false
288
+ end
289
+ end
290
+
291
+ escape_next = (!escape_next && char == '\\')
292
+ end
293
+ elsif json_candidate[0] == '['
294
+ # Match a JSON array (similar logic)
295
+ bracket_count = 0
296
+ in_string = false
297
+ escape_next = false
298
+
299
+ json_candidate.chars.each_with_index do |char, idx|
300
+ if !in_string
301
+ case char
302
+ when '['
303
+ bracket_count += 1
304
+ when ']'
305
+ bracket_count -= 1
306
+ if bracket_count == 0
307
+ return json_candidate[0..idx]
308
+ end
309
+ when '"'
310
+ in_string = true unless escape_next
311
+ end
312
+ else
313
+ if char == '"' && !escape_next
314
+ in_string = false
315
+ end
316
+ end
317
+
318
+ escape_next = (!escape_next && char == '\\')
319
+ end
320
+ end
321
+
322
+ # If no valid JSON structure found, return the cleaned string
323
+ cleaned.strip
324
+ end
325
+
126
326
  # Legacy format messages method - kept for backward compatibility
127
327
  # Use apply_chat_template for proper model-specific formatting
128
328
  def format_messages(messages)
@@ -155,7 +355,8 @@ module Candle
155
355
  repetition_penalty: repetition_penalty,
156
356
  seed: seed,
157
357
  stop_sequences: stop_sequences,
158
- include_prompt: include_prompt
358
+ include_prompt: include_prompt,
359
+ constraint: defined?(@constraint) ? @constraint : nil
159
360
  }.compact
160
361
 
161
362
  self.class.new(current_config.merge(overrides))
@@ -1,5 +1,5 @@
1
1
  # :nocov:
2
2
  module Candle
3
- VERSION = "1.0.2"
3
+ VERSION = "1.1.1"
4
4
  end
5
5
  # :nocov:
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-candle
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christopher Petersen
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2025-07-22 00:00:00.000000000 Z
12
+ date: 2025-07-28 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rb_sys
@@ -159,12 +159,15 @@ files:
159
159
  - ext/candle/extconf.rb
160
160
  - ext/candle/rustfmt.toml
161
161
  - ext/candle/src/lib.rs
162
+ - ext/candle/src/llm/constrained_generation_test.rs
162
163
  - ext/candle/src/llm/gemma.rs
163
164
  - ext/candle/src/llm/generation_config.rs
164
165
  - ext/candle/src/llm/llama.rs
165
166
  - ext/candle/src/llm/mistral.rs
166
167
  - ext/candle/src/llm/mod.rs
168
+ - ext/candle/src/llm/phi.rs
167
169
  - ext/candle/src/llm/quantized_gguf.rs
170
+ - ext/candle/src/llm/qwen.rs
168
171
  - ext/candle/src/llm/text_generation.rs
169
172
  - ext/candle/src/ner.rs
170
173
  - ext/candle/src/reranker.rs
@@ -175,9 +178,16 @@ files:
175
178
  - ext/candle/src/ruby/llm.rs
176
179
  - ext/candle/src/ruby/mod.rs
177
180
  - ext/candle/src/ruby/result.rs
181
+ - ext/candle/src/ruby/structured.rs
178
182
  - ext/candle/src/ruby/tensor.rs
179
183
  - ext/candle/src/ruby/tokenizer.rs
180
184
  - ext/candle/src/ruby/utils.rs
185
+ - ext/candle/src/structured/integration_test.rs
186
+ - ext/candle/src/structured/mod.rs
187
+ - ext/candle/src/structured/schema_processor.rs
188
+ - ext/candle/src/structured/vocabulary_adapter.rs
189
+ - ext/candle/src/structured/vocabulary_adapter_real_test.rs
190
+ - ext/candle/src/structured/vocabulary_adapter_simple_test.rs
181
191
  - ext/candle/src/tokenizer/loader.rs
182
192
  - ext/candle/src/tokenizer/mod.rs
183
193
  - ext/candle/target/release/build/bindgen-0f89ba23b9ca1395/out/host-target.txt
@@ -210,14 +220,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
210
220
  requirements:
211
221
  - - ">="
212
222
  - !ruby/object:Gem::Version
213
- version: 2.7.0
223
+ version: 3.2.0
214
224
  required_rubygems_version: !ruby/object:Gem::Requirement
215
225
  requirements:
216
226
  - - ">="
217
227
  - !ruby/object:Gem::Version
218
228
  version: 3.3.26
219
229
  requirements:
220
- - Rust >= 1.65
230
+ - Rust >= 1.85
221
231
  rubygems_version: 3.5.3
222
232
  signing_key:
223
233
  specification_version: 4