ragnar-cli 0.1.0.pre.4 → 0.1.0.pre.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +99 -42
- data/lib/ragnar/cli.rb +94 -105
- data/lib/ragnar/cli_umap.rb +86 -0
- data/lib/ragnar/config.rb +101 -7
- data/lib/ragnar/embedder.rb +1 -1
- data/lib/ragnar/indexer.rb +4 -2
- data/lib/ragnar/llm_manager.rb +31 -30
- data/lib/ragnar/query_processor.rb +87 -52
- data/lib/ragnar/query_rewriter.rb +21 -18
- data/lib/ragnar/umap_processor.rb +54 -30
- data/lib/ragnar/umap_transform_service.rb +1 -1
- data/lib/ragnar/version.rb +1 -1
- data/lib/ragnar.rb +3 -1
- metadata +36 -16
data/lib/ragnar/config.rb
CHANGED
|
@@ -57,12 +57,74 @@ module Ragnar
|
|
|
57
57
|
get('embeddings.chunk_overlap', Ragnar::DEFAULT_CHUNK_OVERLAP)
|
|
58
58
|
end
|
|
59
59
|
|
|
60
|
+
# LLM Profile support
|
|
61
|
+
# Profiles allow switching between LLM providers/models via --profile flag
|
|
62
|
+
# Backwards compatible: flat llm.provider/llm.default_model still work if no profiles defined
|
|
63
|
+
|
|
64
|
+
def set_active_profile(name)
|
|
65
|
+
name = name.to_s
|
|
66
|
+
profiles = llm_profiles
|
|
67
|
+
unless profiles.key?(name)
|
|
68
|
+
available = profiles.keys.join(', ')
|
|
69
|
+
raise ArgumentError, "Unknown profile '#{name}'. Available profiles: #{available}"
|
|
70
|
+
end
|
|
71
|
+
@active_profile = name
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def llm_profile_name
|
|
75
|
+
@active_profile || get('llm.default_profile', nil) || llm_profiles.keys.first || 'default'
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def llm_profiles
|
|
79
|
+
configured = get('llm.profiles', nil)
|
|
80
|
+
if configured.is_a?(Hash) && !configured.empty?
|
|
81
|
+
configured
|
|
82
|
+
else
|
|
83
|
+
# Backwards compat: synthesize a profile from flat keys
|
|
84
|
+
{
|
|
85
|
+
'default' => {
|
|
86
|
+
'provider' => get('llm.provider', 'red_candle'),
|
|
87
|
+
'model' => get('llm.default_model', 'MaziyarPanahi/Qwen3-4B-GGUF')
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def llm_profile
|
|
94
|
+
llm_profiles[llm_profile_name] || llm_profiles.values.first
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def available_profiles
|
|
98
|
+
llm_profiles.keys
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Create a new RubyLLM chat instance with the active profile's settings
|
|
102
|
+
def create_chat
|
|
103
|
+
api_key = llm_api_key
|
|
104
|
+
provider = llm_provider.to_sym
|
|
105
|
+
|
|
106
|
+
# Configure RubyLLM with the API key if present
|
|
107
|
+
if api_key
|
|
108
|
+
configure_provider_api_key(provider, api_key)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
RubyLLM.chat(provider: provider, model: llm_model)
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def llm_provider
|
|
115
|
+
llm_profile&.dig('provider') || get('llm.provider', 'red_candle')
|
|
116
|
+
end
|
|
117
|
+
|
|
60
118
|
def llm_model
|
|
61
|
-
get('llm.default_model',
|
|
119
|
+
llm_profile&.dig('model') || get('llm.default_model', 'MaziyarPanahi/Qwen3-4B-GGUF')
|
|
62
120
|
end
|
|
63
|
-
|
|
121
|
+
|
|
64
122
|
def llm_gguf_file
|
|
65
|
-
get('llm.default_gguf_file', "
|
|
123
|
+
get('llm.default_gguf_file', "Qwen3-4B.Q4_K_M.gguf")
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def llm_api_key
|
|
127
|
+
llm_profile&.dig('api_key') || get('llm.api_key', nil)
|
|
66
128
|
end
|
|
67
129
|
|
|
68
130
|
def interactive_prompt
|
|
@@ -84,6 +146,14 @@ module Ragnar
|
|
|
84
146
|
def enable_query_rewriting?
|
|
85
147
|
get('query.enable_query_rewriting', true)
|
|
86
148
|
end
|
|
149
|
+
|
|
150
|
+
def enable_reranking?
|
|
151
|
+
get('query.enable_reranking', true)
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def reranker_model
|
|
155
|
+
get('query.reranker_model', 'BAAI/bge-reranker-base')
|
|
156
|
+
end
|
|
87
157
|
|
|
88
158
|
# Config file management
|
|
89
159
|
def config_file_path
|
|
@@ -121,12 +191,27 @@ module Ragnar
|
|
|
121
191
|
'model_filename' => 'umap_model.bin'
|
|
122
192
|
},
|
|
123
193
|
'llm' => {
|
|
124
|
-
'
|
|
125
|
-
'
|
|
194
|
+
'default_profile' => 'red_candle',
|
|
195
|
+
'profiles' => {
|
|
196
|
+
'red_candle' => {
|
|
197
|
+
'provider' => 'red_candle',
|
|
198
|
+
'model' => 'MaziyarPanahi/Qwen3-4B-GGUF'
|
|
199
|
+
},
|
|
200
|
+
'opus' => {
|
|
201
|
+
'provider' => 'anthropic',
|
|
202
|
+
'model' => 'claude-opus-4-6'
|
|
203
|
+
},
|
|
204
|
+
'sonnet' => {
|
|
205
|
+
'provider' => 'anthropic',
|
|
206
|
+
'model' => 'claude-sonnet-4-6'
|
|
207
|
+
}
|
|
208
|
+
}
|
|
126
209
|
},
|
|
127
210
|
'query' => {
|
|
128
211
|
'top_k' => 3,
|
|
129
|
-
'enable_query_rewriting' => true
|
|
212
|
+
'enable_query_rewriting' => true,
|
|
213
|
+
'enable_reranking' => true,
|
|
214
|
+
'reranker_model' => 'BAAI/bge-reranker-base'
|
|
130
215
|
},
|
|
131
216
|
'interactive' => {
|
|
132
217
|
'prompt' => 'ragnar> ',
|
|
@@ -146,7 +231,16 @@ module Ragnar
|
|
|
146
231
|
end
|
|
147
232
|
|
|
148
233
|
private
|
|
149
|
-
|
|
234
|
+
|
|
235
|
+
def configure_provider_api_key(provider, api_key)
|
|
236
|
+
case provider
|
|
237
|
+
when :anthropic
|
|
238
|
+
RubyLLM.configure { |c| c.anthropic_api_key = api_key }
|
|
239
|
+
when :openai
|
|
240
|
+
RubyLLM.configure { |c| c.openai_api_key = api_key }
|
|
241
|
+
end
|
|
242
|
+
end
|
|
243
|
+
|
|
150
244
|
def load_config
|
|
151
245
|
@config_file_path = find_config_file
|
|
152
246
|
|
data/lib/ragnar/embedder.rb
CHANGED
|
@@ -34,7 +34,7 @@ module Ragnar
|
|
|
34
34
|
def embed_batch(texts, show_progress: true)
|
|
35
35
|
embeddings = []
|
|
36
36
|
|
|
37
|
-
if show_progress
|
|
37
|
+
if show_progress && $stdout.respond_to?(:ioctl)
|
|
38
38
|
progressbar = TTY::ProgressBar.new(
|
|
39
39
|
"Generating embeddings [:bar] :percent :current/:total",
|
|
40
40
|
total: texts.size,
|
data/lib/ragnar/indexer.rb
CHANGED
|
@@ -31,7 +31,7 @@ module Ragnar
|
|
|
31
31
|
|
|
32
32
|
puts "Found #{files.size} file(s) to process" if @show_progress
|
|
33
33
|
|
|
34
|
-
file_progress = if @show_progress
|
|
34
|
+
file_progress = if @show_progress && $stdout.respond_to?(:ioctl)
|
|
35
35
|
TTY::ProgressBar.new(
|
|
36
36
|
"Processing [:bar] :percent :current/:total - :filename",
|
|
37
37
|
total: files.size,
|
|
@@ -43,13 +43,15 @@ module Ragnar
|
|
|
43
43
|
nil
|
|
44
44
|
end
|
|
45
45
|
|
|
46
|
-
files.
|
|
46
|
+
files.each_with_index do |file_path, idx|
|
|
47
47
|
begin
|
|
48
48
|
if file_progress
|
|
49
49
|
# Update the progress bar with current filename
|
|
50
50
|
filename = File.basename(file_path)
|
|
51
51
|
filename = filename[0..27] + "..." if filename.length > 30
|
|
52
52
|
file_progress.advance(0, filename: filename)
|
|
53
|
+
elsif @show_progress
|
|
54
|
+
puts "Processing (#{idx + 1}/#{files.size}): #{File.basename(file_path)}"
|
|
53
55
|
end
|
|
54
56
|
|
|
55
57
|
process_file(file_path, stats, file_progress)
|
data/lib/ragnar/llm_manager.rb
CHANGED
|
@@ -1,46 +1,47 @@
|
|
|
1
1
|
module Ragnar
|
|
2
|
-
# Singleton manager for
|
|
2
|
+
# Singleton manager for RubyLLM chat instances to avoid reloading models.
|
|
3
|
+
# Supports any RubyLLM provider (red_candle for local, openai, anthropic, etc.)
|
|
3
4
|
class LLMManager
|
|
4
5
|
include Singleton
|
|
5
|
-
|
|
6
|
+
|
|
6
7
|
def initialize
|
|
7
|
-
@
|
|
8
|
+
@chats = {}
|
|
8
9
|
@mutex = Mutex.new
|
|
9
10
|
end
|
|
10
|
-
|
|
11
|
-
# Get or create
|
|
12
|
-
# @param
|
|
13
|
-
# @param
|
|
14
|
-
# @return [
|
|
15
|
-
def
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
11
|
+
|
|
12
|
+
# Get or create a RubyLLM chat instance
|
|
13
|
+
# @param provider [String, Symbol] The RubyLLM provider (default from config)
|
|
14
|
+
# @param model [String] The model identifier (default from config)
|
|
15
|
+
# @return [RubyLLM::Chat] A cached chat instance
|
|
16
|
+
def get_chat(provider: nil, model: nil)
|
|
17
|
+
config = Config.instance
|
|
18
|
+
provider ||= config.llm_provider
|
|
19
|
+
model ||= config.llm_model
|
|
20
|
+
|
|
21
|
+
cache_key = "#{provider}:#{model}"
|
|
22
|
+
|
|
19
23
|
@mutex.synchronize do
|
|
20
|
-
@
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
puts "Loading LLM: #{model_id}..." if show_loading && !@llms.key?(cache_key)
|
|
24
|
-
|
|
25
|
-
if gguf_file
|
|
26
|
-
Candle::LLM.from_pretrained(model_id, gguf_file: gguf_file)
|
|
27
|
-
else
|
|
28
|
-
Candle::LLM.from_pretrained(model_id)
|
|
29
|
-
end
|
|
24
|
+
@chats[cache_key] ||= begin
|
|
25
|
+
puts "Loading LLM: #{model} (#{provider})..." if ENV['DEBUG']
|
|
26
|
+
Config.instance.create_chat
|
|
30
27
|
end
|
|
31
28
|
end
|
|
32
29
|
end
|
|
33
|
-
|
|
34
|
-
# Clear all cached
|
|
30
|
+
|
|
31
|
+
# Clear all cached chat instances (useful for memory management)
|
|
35
32
|
def clear_cache
|
|
36
33
|
@mutex.synchronize do
|
|
37
|
-
@
|
|
34
|
+
@chats.clear
|
|
38
35
|
end
|
|
39
36
|
end
|
|
40
|
-
|
|
41
|
-
# Get the default
|
|
42
|
-
def
|
|
43
|
-
|
|
37
|
+
|
|
38
|
+
# Get the default chat instance for the application
|
|
39
|
+
def default_chat
|
|
40
|
+
get_chat
|
|
44
41
|
end
|
|
42
|
+
|
|
43
|
+
# Backwards compatibility aliases
|
|
44
|
+
alias_method :get_llm, :get_chat
|
|
45
|
+
alias_method :default_llm, :default_chat
|
|
45
46
|
end
|
|
46
|
-
end
|
|
47
|
+
end
|
|
@@ -16,7 +16,7 @@ module Ragnar
|
|
|
16
16
|
@reranker = nil # Will initialize when needed
|
|
17
17
|
end
|
|
18
18
|
|
|
19
|
-
def query(user_query, top_k: 3, verbose: false, enable_rewriting: true)
|
|
19
|
+
def query(user_query, top_k: 3, verbose: false, enable_rewriting: true, enable_reranking: false)
|
|
20
20
|
puts "Processing query: #{user_query}" if verbose
|
|
21
21
|
|
|
22
22
|
# Step 1: Rewrite and analyze the query (if enabled)
|
|
@@ -26,7 +26,15 @@ module Ragnar
|
|
|
26
26
|
puts "-"*60 if verbose
|
|
27
27
|
|
|
28
28
|
rewritten = @rewriter.rewrite(user_query)
|
|
29
|
-
|
|
29
|
+
|
|
30
|
+
# Always include the original query in sub-queries to ensure direct matches
|
|
31
|
+
# are found regardless of how the rewriter reformulates
|
|
32
|
+
sub_queries = rewritten['sub_queries'] || []
|
|
33
|
+
unless sub_queries.include?(user_query)
|
|
34
|
+
sub_queries.unshift(user_query)
|
|
35
|
+
end
|
|
36
|
+
rewritten['sub_queries'] = sub_queries
|
|
37
|
+
|
|
30
38
|
if verbose
|
|
31
39
|
puts "\nOriginal Query: #{user_query}"
|
|
32
40
|
puts "\nRewritten Query Analysis:"
|
|
@@ -95,18 +103,25 @@ module Ragnar
|
|
|
95
103
|
puts "-"*60
|
|
96
104
|
end
|
|
97
105
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
106
|
+
if enable_reranking
|
|
107
|
+
reranked = rerank_documents(
|
|
108
|
+
query: user_query,
|
|
109
|
+
documents: candidates,
|
|
110
|
+
top_k: top_k * 2
|
|
111
|
+
)
|
|
112
|
+
else
|
|
113
|
+
# Use retrieval order (RRF scores) directly — often more reliable than
|
|
114
|
+
# small cross-encoder rerankers on domain-specific corpora
|
|
115
|
+
reranked = candidates
|
|
116
|
+
end
|
|
117
|
+
|
|
104
118
|
if verbose && reranked.any?
|
|
105
|
-
puts "\nTop Reranked Documents:"
|
|
119
|
+
puts "\nTop #{enable_reranking ? 'Reranked' : 'Retrieved'} Documents:"
|
|
106
120
|
reranked[0..2].each_with_index do |doc, idx|
|
|
107
121
|
full_text = (doc[:chunk_text] || doc[:text] || "").gsub(/\s+/, ' ')
|
|
108
122
|
puts " #{idx + 1}. [#{File.basename(doc[:file_path] || 'unknown')}]"
|
|
109
123
|
puts " Score: #{doc[:score]&.round(4) if doc[:score]}"
|
|
124
|
+
puts " Distance: #{doc[:distance]&.round(4) if doc[:distance]}"
|
|
110
125
|
puts " Full chunk (#{full_text.length} chars):"
|
|
111
126
|
puts " \"#{full_text}\""
|
|
112
127
|
puts ""
|
|
@@ -174,12 +189,12 @@ module Ragnar
|
|
|
174
189
|
query: user_query,
|
|
175
190
|
clarified: rewritten['clarified_intent'],
|
|
176
191
|
answer: response,
|
|
177
|
-
sources: context_docs.map { |d|
|
|
192
|
+
sources: context_docs.map { |d|
|
|
178
193
|
{
|
|
179
|
-
source_file: d[:file_path] || d[:source_file],
|
|
180
|
-
chunk_index: d[:chunk_index]
|
|
194
|
+
source_file: d[:file_path] || d[:source_file] || d["file_path"],
|
|
195
|
+
chunk_index: d[:chunk_index] || d["chunk_index"]
|
|
181
196
|
}
|
|
182
|
-
},
|
|
197
|
+
}.reject { |s| s[:source_file].nil? },
|
|
183
198
|
sub_queries: rewritten['sub_queries'],
|
|
184
199
|
confidence: calculate_confidence(reranked[0...top_k])
|
|
185
200
|
}
|
|
@@ -260,22 +275,43 @@ module Ragnar
|
|
|
260
275
|
k: k,
|
|
261
276
|
use_reduced: use_reduced
|
|
262
277
|
)
|
|
263
|
-
|
|
278
|
+
|
|
264
279
|
if verbose
|
|
265
|
-
puts "
|
|
280
|
+
puts " Vector search: #{vector_results.length} matches"
|
|
266
281
|
if vector_results.any?
|
|
267
282
|
best = vector_results.first
|
|
268
|
-
puts " Best match: [#{File.basename(best[:file_path] || 'unknown')}] (distance: #{best[:distance]&.round(3)})"
|
|
283
|
+
puts " Best vector match: [#{File.basename(best[:file_path] || 'unknown')}] (distance: #{best[:distance]&.round(3)})"
|
|
269
284
|
end
|
|
270
285
|
end
|
|
271
|
-
|
|
286
|
+
|
|
272
287
|
# Add query index for RRF
|
|
273
288
|
vector_results.each do |result|
|
|
274
289
|
result[:query_idx] = idx
|
|
275
290
|
result[:retrieval_method] = :vector
|
|
276
291
|
end
|
|
277
|
-
|
|
292
|
+
|
|
278
293
|
all_results.concat(vector_results)
|
|
294
|
+
|
|
295
|
+
# Full-text search for keyword matching (hybrid search)
|
|
296
|
+
begin
|
|
297
|
+
fts_results = @database.full_text_search(query, limit: k)
|
|
298
|
+
if verbose && fts_results.any?
|
|
299
|
+
puts " FTS: #{fts_results.length} matches"
|
|
300
|
+
best_fts = fts_results.first
|
|
301
|
+
puts " Best FTS match: [#{File.basename(best_fts[:file_path] || 'unknown')}]"
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
fts_results.each_with_index do |result, rank|
|
|
305
|
+
# Synthesize a distance from FTS rank (lower rank = better match)
|
|
306
|
+
result[:distance] = 0.1 + (rank * 0.05)
|
|
307
|
+
result[:query_idx] = idx
|
|
308
|
+
result[:retrieval_method] = :fts
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
all_results.concat(fts_results)
|
|
312
|
+
rescue => e
|
|
313
|
+
puts " FTS unavailable: #{e.message}" if verbose
|
|
314
|
+
end
|
|
279
315
|
end
|
|
280
316
|
|
|
281
317
|
if verbose
|
|
@@ -299,10 +335,18 @@ module Ragnar
|
|
|
299
335
|
|
|
300
336
|
results.each do |result|
|
|
301
337
|
doc_id = result[:id]
|
|
302
|
-
doc_scores[doc_id]
|
|
303
|
-
|
|
304
|
-
document
|
|
305
|
-
|
|
338
|
+
if doc_scores[doc_id]
|
|
339
|
+
# Prefer the document with more complete metadata
|
|
340
|
+
existing = doc_scores[doc_id][:document]
|
|
341
|
+
if result[:file_path] && !existing[:file_path]
|
|
342
|
+
doc_scores[doc_id][:document] = result
|
|
343
|
+
end
|
|
344
|
+
else
|
|
345
|
+
doc_scores[doc_id] = {
|
|
346
|
+
score: 0.0,
|
|
347
|
+
document: result
|
|
348
|
+
}
|
|
349
|
+
end
|
|
306
350
|
|
|
307
351
|
# RRF formula: 1 / (k + rank)
|
|
308
352
|
# Using distance as a proxy for rank (lower distance = better rank)
|
|
@@ -337,14 +381,14 @@ module Ragnar
|
|
|
337
381
|
|
|
338
382
|
# Initialize reranker if not already done
|
|
339
383
|
@reranker ||= Candle::Reranker.from_pretrained(
|
|
340
|
-
|
|
384
|
+
Config.instance.reranker_model
|
|
341
385
|
)
|
|
342
386
|
|
|
343
387
|
# Prepare document texts - use chunk_text field
|
|
344
388
|
texts = unique_docs.map { |doc| doc[:chunk_text] || doc[:text] || "" }
|
|
345
389
|
|
|
346
|
-
# Rerank -
|
|
347
|
-
reranked = @reranker.rerank(query, texts)
|
|
390
|
+
# Rerank - use raw logits (no sigmoid) for better score separation
|
|
391
|
+
reranked = @reranker.rerank(query, texts, apply_sigmoid: false)
|
|
348
392
|
|
|
349
393
|
# Map back to original documents with scores
|
|
350
394
|
reranked.map do |result|
|
|
@@ -361,46 +405,37 @@ module Ragnar
|
|
|
361
405
|
# In the future, we could fetch neighboring chunks for more context
|
|
362
406
|
context_size = case context_needed
|
|
363
407
|
when "extensive" then 5
|
|
364
|
-
when "moderate" then
|
|
365
|
-
else
|
|
408
|
+
when "moderate" then 4
|
|
409
|
+
else 3
|
|
366
410
|
end
|
|
367
411
|
|
|
368
412
|
documents.first(context_size)
|
|
369
413
|
end
|
|
370
414
|
|
|
371
415
|
def generate_response(query:, repacked_context:, query_type:)
|
|
372
|
-
#
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
416
|
+
# Create a fresh chat for each query to avoid conversation history bleed
|
|
417
|
+
chat = Config.instance.create_chat
|
|
418
|
+
chat.with_instructions(
|
|
419
|
+
"You are a helpful assistant. Answer questions based ONLY on the provided context. " \
|
|
420
|
+
"If the answer is not in the context, say \"I don't have enough information to answer that question.\" " \
|
|
421
|
+
"Be concise and direct. /no_think"
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
prompt = "Context:\n#{repacked_context}\n\nQuestion: #{query}"
|
|
425
|
+
response = chat.ask(prompt).content
|
|
426
|
+
# Strip <think>...</think> blocks that some models (e.g. Qwen3) include
|
|
427
|
+
strip_think_tags(response)
|
|
380
428
|
rescue => e
|
|
381
429
|
# Fallback to returning the repacked context
|
|
382
430
|
puts "Warning: LLM generation failed (#{e.message})"
|
|
383
431
|
"Based on the retrieved information:\n\n#{repacked_context[0..500]}..."
|
|
384
432
|
end
|
|
385
433
|
|
|
386
|
-
def
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
You are a helpful assistant. Answer questions based ONLY on the provided context.
|
|
390
|
-
If the answer is not in the context, say "I don't have enough information to answer that question."
|
|
391
|
-
</s>
|
|
392
|
-
<|user|>
|
|
393
|
-
Context:
|
|
394
|
-
#{context}
|
|
395
|
-
|
|
396
|
-
Question: #{query}
|
|
397
|
-
</s>
|
|
398
|
-
<|assistant|>
|
|
399
|
-
PROMPT
|
|
400
|
-
|
|
401
|
-
base_prompt
|
|
434
|
+
def strip_think_tags(text)
|
|
435
|
+
return text unless text
|
|
436
|
+
text.gsub(/<think>.*?<\/think>/m, '').strip
|
|
402
437
|
end
|
|
403
|
-
|
|
438
|
+
|
|
404
439
|
def calculate_confidence(documents)
|
|
405
440
|
return 0.0 if documents.empty?
|
|
406
441
|
|
|
@@ -3,11 +3,11 @@ module Ragnar
|
|
|
3
3
|
def initialize(llm_manager: nil)
|
|
4
4
|
@llm_manager = llm_manager || LLMManager.instance
|
|
5
5
|
end
|
|
6
|
-
|
|
6
|
+
|
|
7
7
|
def rewrite(query)
|
|
8
|
-
#
|
|
9
|
-
|
|
10
|
-
|
|
8
|
+
# Create a fresh chat for each rewrite to avoid conversation history bleed
|
|
9
|
+
chat = Config.instance.create_chat
|
|
10
|
+
|
|
11
11
|
# Define the JSON schema for structured output
|
|
12
12
|
schema = {
|
|
13
13
|
type: "object",
|
|
@@ -41,25 +41,28 @@ module Ragnar
|
|
|
41
41
|
},
|
|
42
42
|
required: ["clarified_intent", "query_type", "sub_queries", "key_terms", "context_needed"]
|
|
43
43
|
}
|
|
44
|
-
|
|
44
|
+
|
|
45
45
|
prompt = <<~PROMPT
|
|
46
46
|
Analyze the following user query and break it down for retrieval-augmented generation.
|
|
47
47
|
Focus on understanding the user's intent and creating effective sub-queries for searching.
|
|
48
|
-
|
|
48
|
+
|
|
49
49
|
User Query: #{query}
|
|
50
|
-
|
|
51
|
-
Provide a structured analysis that will help retrieve the most relevant documents.
|
|
50
|
+
|
|
51
|
+
Provide a structured analysis that will help retrieve the most relevant documents. /no_think
|
|
52
52
|
PROMPT
|
|
53
|
-
|
|
53
|
+
|
|
54
54
|
begin
|
|
55
|
-
|
|
56
|
-
result =
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
)
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
55
|
+
response = chat.with_schema(schema).ask(prompt)
|
|
56
|
+
result = response.content
|
|
57
|
+
|
|
58
|
+
# RubyLLM with_schema returns parsed content; handle both String and Hash
|
|
59
|
+
if result.is_a?(String)
|
|
60
|
+
JSON.parse(result)
|
|
61
|
+
elsif result.is_a?(Hash)
|
|
62
|
+
result.transform_keys(&:to_s)
|
|
63
|
+
else
|
|
64
|
+
result
|
|
65
|
+
end
|
|
63
66
|
rescue => e
|
|
64
67
|
# Fallback to simple rewriting if structured generation fails
|
|
65
68
|
{
|
|
@@ -72,4 +75,4 @@ module Ragnar
|
|
|
72
75
|
end
|
|
73
76
|
end
|
|
74
77
|
end
|
|
75
|
-
end
|
|
78
|
+
end
|
|
@@ -138,45 +138,69 @@ module Ragnar
|
|
|
138
138
|
|
|
139
139
|
# Perform the actual training using the class-based API
|
|
140
140
|
puts " Training UMAP model (this may take a moment)..."
|
|
141
|
-
|
|
141
|
+
|
|
142
|
+
attempts = 0
|
|
143
|
+
max_attempts = 3
|
|
144
|
+
|
|
142
145
|
begin
|
|
146
|
+
attempts += 1
|
|
143
147
|
@umap_instance = ClusterKit::Dimensionality::UMAP.new(
|
|
144
148
|
n_components: n_components,
|
|
145
149
|
n_neighbors: n_neighbors
|
|
146
150
|
)
|
|
147
|
-
|
|
151
|
+
|
|
148
152
|
@reduced_embeddings = @umap_instance.fit_transform(embedding_matrix)
|
|
149
|
-
|
|
153
|
+
|
|
150
154
|
puts " ✓ UMAP training complete"
|
|
151
|
-
rescue => e
|
|
152
|
-
#
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
155
|
+
rescue Exception => e
|
|
156
|
+
# Catch Exception (not just StandardError) because Rust panics from
|
|
157
|
+
# ClusterKit raise fatal errors that bypass the default rescue
|
|
158
|
+
if e.message.include?("LapackInvalidValue") || e.message.include?("SGESDD") || e.message.include?("illegal value")
|
|
159
|
+
if attempts < max_attempts
|
|
160
|
+
# LAPACK SVD can fail with certain dimension combinations — retry with fewer components
|
|
161
|
+
n_components = [n_components / 2, 2].max
|
|
162
|
+
n_neighbors = [n_neighbors, n_components - 1, 3].min
|
|
163
|
+
puts " ⚠️ LAPACK error, retrying with n_components=#{n_components}, n_neighbors=#{n_neighbors} (attempt #{attempts + 1}/#{max_attempts})..."
|
|
164
|
+
retry
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
raise RuntimeError, "\n❌ UMAP training failed due to a LAPACK numerical error.\n\n" \
|
|
168
|
+
"This can happen with certain data/dimension combinations.\n" \
|
|
169
|
+
"Try reducing n_components:\n" \
|
|
170
|
+
" ragnar umap train --n-components 10 --n-neighbors 5\n\n" \
|
|
171
|
+
"Current parameters:\n" \
|
|
172
|
+
" • n_components: #{n_components}\n" \
|
|
173
|
+
" • n_neighbors: #{n_neighbors}\n" \
|
|
174
|
+
" • embeddings: #{embeddings.size} samples\n" \
|
|
175
|
+
" • dimensions: #{original_dims}\n"
|
|
176
|
+
elsif e.message.include?("index out of bounds")
|
|
177
|
+
raise RuntimeError, "\n❌ UMAP training failed\n\n" \
|
|
178
|
+
"The UMAP algorithm encountered an index out of bounds error.\n\n" \
|
|
179
|
+
"This typically happens when:\n" \
|
|
180
|
+
" • The embedding data contains invalid values (NaN, Infinity)\n" \
|
|
181
|
+
" • The parameters are incompatible with your data\n" \
|
|
182
|
+
" • There are duplicate or corrupted embeddings\n\n" \
|
|
183
|
+
"Suggested solutions:\n" \
|
|
184
|
+
" 1. Try with more conservative parameters:\n" \
|
|
185
|
+
" ragnar umap train --n-components 10 --n-neighbors 5\n\n" \
|
|
186
|
+
" 2. Re-index your documents to regenerate embeddings:\n" \
|
|
187
|
+
" ragnar index <path> --force\n\n" \
|
|
188
|
+
" 3. Check your embedding model configuration\n\n" \
|
|
189
|
+
"Current parameters:\n" \
|
|
190
|
+
" • n_components: #{n_components}\n" \
|
|
191
|
+
" • n_neighbors: #{n_neighbors}\n" \
|
|
192
|
+
" • embeddings: #{embeddings.size} samples\n" \
|
|
193
|
+
" • dimensions: #{original_dims}\n"
|
|
194
|
+
elsif e.is_a?(StandardError) || e.message.include?("unwrap")
|
|
195
|
+
raise RuntimeError, "\n❌ UMAP training failed\n\n" \
|
|
196
|
+
"Error: #{e.message}\n\n" \
|
|
197
|
+
"This may be due to incompatible parameters or data issues.\n" \
|
|
198
|
+
"Try using more conservative parameters:\n" \
|
|
199
|
+
" ragnar umap train --n-components 10 --n-neighbors 5\n"
|
|
172
200
|
else
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
error_msg += "Try using more conservative parameters:\n"
|
|
176
|
-
error_msg += " ragnar train-umap --n-components 10 --n-neighbors 5\n"
|
|
201
|
+
# Re-raise non-application exceptions (Interrupt, SignalException, etc.)
|
|
202
|
+
raise
|
|
177
203
|
end
|
|
178
|
-
|
|
179
|
-
raise RuntimeError, error_msg
|
|
180
204
|
end
|
|
181
205
|
|
|
182
206
|
# Store the parameters for saving
|
|
@@ -160,7 +160,7 @@ module Ragnar
|
|
|
160
160
|
return if @umap_model
|
|
161
161
|
|
|
162
162
|
unless File.exist?(@model_path)
|
|
163
|
-
raise "UMAP model not found at #{@model_path}. Please train a model first using 'ragnar train
|
|
163
|
+
raise "UMAP model not found at #{@model_path}. Please train a model first using 'ragnar umap train'."
|
|
164
164
|
end
|
|
165
165
|
|
|
166
166
|
@umap_model = ClusterKit::Dimensionality::UMAP.load_model(@model_path)
|
data/lib/ragnar/version.rb
CHANGED