ragnar-cli 0.1.0.pre.2 → 0.1.0.pre.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/ragnar/cli.rb CHANGED
@@ -1,27 +1,82 @@
1
+ require_relative "cli_visualization"
2
+ require_relative "config"
3
+ require "thor/interactive"
4
+ require "stringio"
5
+ require "fileutils"
6
+
1
7
  module Ragnar
2
8
  class CLI < Thor
9
+ include CLIVisualization
10
+ include Thor::Interactive::Command
11
+
12
+ # Configure interactive mode
13
+ configure_interactive(
14
+ prompt: Config.instance.interactive_prompt,
15
+ allow_nested: false,
16
+ history_file: Config.instance.history_file,
17
+ default_handler: proc do |input, thor_instance|
18
+ puts "[DEBUG] Default handler called: #{input}" if ENV["DEBUG"]
19
+
20
+ begin
21
+ # IMPORTANT: Use direct method call, NOT invoke(), to avoid Thor's
22
+ # silent deduplication that prevents repeated calls to the same method
23
+ result = thor_instance.query(input.strip)
24
+ puts "[DEBUG] Default handler completed" if ENV["DEBUG"]
25
+ result
26
+ rescue => e
27
+ puts "[DEBUG] Default handler error: #{e.message}" if ENV["DEBUG"]
28
+ puts "[DEBUG] Backtrace: #{e.backtrace.first(3)}" if ENV["DEBUG"]
29
+ raise e
30
+ end
31
+ end
32
+ )
33
+
34
+ # Class variables for caching expensive resources in interactive mode
35
+ class_variable_set(:@@cached_database, nil)
36
+ class_variable_set(:@@cached_embedder, nil)
37
+ class_variable_set(:@@cached_llm_manager, nil)
38
+ class_variable_set(:@@cached_query_processor, nil)
39
+ class_variable_set(:@@cached_db_path, nil)
40
+
3
41
  desc "index PATH", "Index text files from PATH (file or directory)"
4
- option :db_path, type: :string, default: Ragnar::DEFAULT_DB_PATH, desc: "Path to Lance database"
5
- option :chunk_size, type: :numeric, default: Ragnar::DEFAULT_CHUNK_SIZE, desc: "Chunk size in tokens"
6
- option :chunk_overlap, type: :numeric, default: Ragnar::DEFAULT_CHUNK_OVERLAP, desc: "Chunk overlap in tokens"
7
- option :model, type: :string, default: Ragnar::DEFAULT_EMBEDDING_MODEL, desc: "Embedding model to use"
42
+ option :db_path, type: :string, desc: "Path to Lance database (default from config)"
43
+ option :chunk_size, type: :numeric, desc: "Chunk size in tokens (default from config)"
44
+ option :chunk_overlap, type: :numeric, desc: "Chunk overlap in tokens (default from config)"
45
+ option :model, type: :string, desc: "Embedding model to use (default from config)"
8
46
  def index(path)
9
- unless File.exist?(path)
10
- say "Error: Path does not exist: #{path}", :red
47
+ # Expand user paths (handle ~ in user input)
48
+ expanded_path = File.expand_path(path)
49
+
50
+ unless File.exist?(expanded_path)
51
+ say "Error: Path does not exist: #{expanded_path}", :red
11
52
  exit 1
12
53
  end
13
54
 
14
55
  say "Indexing files from: #{path}", :green
15
56
 
57
+ # Debug options in interactive mode
58
+ puts "Debug - options: #{options.inspect}" if ENV['DEBUG']
59
+
60
+ # Get config instance
61
+ config = Config.instance
62
+
63
+ # Clear database cache when indexing new content
64
+ db_path = options[:db_path] || config.database_path
65
+ if @@cached_db_path == db_path
66
+ @@cached_database = nil
67
+ @@cached_query_processor = nil
68
+ end
69
+
16
70
  indexer = Indexer.new(
17
- db_path: options[:db_path],
18
- chunk_size: options[:chunk_size],
19
- chunk_overlap: options[:chunk_overlap],
20
- embedding_model: options[:model]
71
+ db_path: db_path,
72
+ chunk_size: options[:chunk_size] || config.chunk_size,
73
+ chunk_overlap: options[:chunk_overlap] || config.chunk_overlap,
74
+ embedding_model: options[:model] || config.embedding_model,
75
+ show_progress: config.show_progress?
21
76
  )
22
77
 
23
78
  begin
24
- stats = indexer.index_path(path)
79
+ stats = indexer.index_path(expanded_path)
25
80
  say "\nIndexing complete!", :green
26
81
  say "Files processed: #{stats[:files_processed]}"
27
82
  say "Chunks created: #{stats[:chunks_created]}"
@@ -33,31 +88,39 @@ module Ragnar
33
88
  end
34
89
 
35
90
  desc "train-umap", "Train UMAP model on existing embeddings"
36
- option :db_path, type: :string, default: Ragnar::DEFAULT_DB_PATH, desc: "Path to Lance database"
91
+ option :db_path, type: :string, desc: "Path to Lance database (default from config)"
37
92
  option :n_components, type: :numeric, default: 50, desc: "Number of dimensions for reduction"
38
93
  option :n_neighbors, type: :numeric, default: 15, desc: "Number of neighbors for UMAP"
39
94
  option :min_dist, type: :numeric, default: 0.1, desc: "Minimum distance for UMAP"
40
- option :model_path, type: :string, default: "umap_model.bin", desc: "Path to save UMAP model"
95
+ option :model_path, type: :string, desc: "Path to save UMAP model"
41
96
  def train_umap
42
97
  say "Training UMAP model on embeddings...", :green
43
98
 
99
+ config = Config.instance
100
+ # Use model_path from options if provided, otherwise use config models_dir
101
+ model_path = if options[:model_path]
102
+ options[:model_path]
103
+ else
104
+ File.join(config.models_dir, "umap_model.bin")
105
+ end
106
+
44
107
  processor = UmapProcessor.new(
45
- db_path: options[:db_path],
46
- model_path: options[:model_path]
108
+ db_path: options[:db_path] || config.database_path,
109
+ model_path: model_path
47
110
  )
48
111
 
49
112
  begin
50
113
  stats = processor.train(
51
- n_components: options[:n_components],
52
- n_neighbors: options[:n_neighbors],
53
- min_dist: options[:min_dist]
114
+ n_components: options[:n_components] || 50,
115
+ n_neighbors: options[:n_neighbors] || 15,
116
+ min_dist: options[:min_dist] || 0.1
54
117
  )
55
118
 
56
119
  say "\nUMAP training complete!", :green
57
120
  say "Embeddings processed: #{stats[:embeddings_count]}"
58
121
  say "Original dimensions: #{stats[:original_dims]}"
59
122
  say "Reduced dimensions: #{stats[:reduced_dims]}"
60
- say "Model saved to: #{options[:model_path]}"
123
+ say "Model saved to: #{processor.model_path}"
61
124
  rescue => e
62
125
  say "Error during UMAP training: #{e.message}", :red
63
126
  exit 1
@@ -65,12 +128,19 @@ module Ragnar
65
128
  end
66
129
 
67
130
  desc "apply-umap", "Apply trained UMAP model to reduce embedding dimensions"
68
- option :db_path, type: :string, default: Ragnar::DEFAULT_DB_PATH, desc: "Path to Lance database"
69
- option :model_path, type: :string, default: "umap_model.bin", desc: "Path to UMAP model"
131
+ option :db_path, type: :string, desc: "Path to Lance database (default from config)"
132
+ option :model_path, type: :string, desc: "Path to UMAP model"
70
133
  option :batch_size, type: :numeric, default: 100, desc: "Batch size for processing"
71
134
  def apply_umap
72
- unless File.exist?(options[:model_path])
73
- say "Error: UMAP model not found at: #{options[:model_path]}", :red
135
+ config = Config.instance
136
+ model_path = if options[:model_path]
137
+ options[:model_path]
138
+ else
139
+ File.join(config.models_dir, "umap_model.bin")
140
+ end
141
+
142
+ unless File.exist?(model_path)
143
+ say "Error: UMAP model not found at: #{model_path}", :red
74
144
  say "Please run 'train-umap' first to create a model.", :yellow
75
145
  exit 1
76
146
  end
@@ -78,12 +148,12 @@ module Ragnar
78
148
  say "Applying UMAP model to embeddings...", :green
79
149
 
80
150
  processor = UmapProcessor.new(
81
- db_path: options[:db_path],
82
- model_path: options[:model_path]
151
+ db_path: options[:db_path] || config.database_path,
152
+ model_path: model_path
83
153
  )
84
154
 
85
155
  begin
86
- stats = processor.apply(batch_size: options[:batch_size])
156
+ stats = processor.apply(batch_size: options[:batch_size] || 100)
87
157
 
88
158
  say "\nUMAP application complete!", :green
89
159
  say "Embeddings processed: #{stats[:processed]}"
@@ -96,18 +166,21 @@ module Ragnar
96
166
  end
97
167
 
98
168
  desc "topics", "Extract and display topics from indexed documents"
99
- option :db_path, type: :string, default: Ragnar::DEFAULT_DB_PATH, desc: "Path to Lance database"
169
+ option :db_path, type: :string, desc: "Path to Lance database (default from config)"
100
170
  option :min_cluster_size, type: :numeric, default: 5, desc: "Minimum documents per topic"
101
171
  option :method, type: :string, default: "hybrid", desc: "Labeling method: fast, quality, or hybrid"
102
172
  option :export, type: :string, desc: "Export topics to file (json or html)"
103
173
  option :verbose, type: :boolean, default: false, aliases: "-v", desc: "Show detailed processing"
174
+ option :summarize, type: :boolean, default: false, aliases: "-s", desc: "Generate human-readable topic summaries using LLM"
175
+ option :llm_model, type: :string, default: "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF", desc: "LLM model for summarization"
176
+ option :gguf_file, type: :string, default: "tinyllama-1.1b-chat-v1.0.q4_k_m.gguf", desc: "GGUF file name for LLM model"
104
177
  def topics
105
178
  require_relative 'topic_modeling'
106
179
 
107
180
  say "Extracting topics from indexed documents...", :green
108
181
 
109
- # Load embeddings and documents from database
110
- database = Database.new(options[:db_path])
182
+ # Load embeddings and documents from database - use cache in interactive mode
183
+ database = get_cached_database(options[:db_path] || Config.instance.database_path)
111
184
 
112
185
  begin
113
186
  # Get all documents with embeddings
@@ -127,7 +200,22 @@ module Ragnar
127
200
  exit 1
128
201
  end
129
202
 
130
- embeddings = docs_with_embeddings.map { |d| d[:embedding] }
203
+ # Check if we have reduced embeddings available
204
+ first_doc = docs_with_embeddings.first
205
+ has_reduced = first_doc[:reduced_embedding] && !first_doc[:reduced_embedding].empty?
206
+
207
+ if has_reduced
208
+ embeddings = docs_with_embeddings.map { |d| d[:reduced_embedding] }
209
+ say "Using reduced embeddings (#{embeddings.first.size} dimensions)", :yellow if options[:verbose]
210
+ # Already reduced, so don't reduce again in the engine
211
+ reduce_dims = false
212
+ else
213
+ embeddings = docs_with_embeddings.map { |d| d[:embedding] }
214
+ say "Using original embeddings (#{embeddings.first.size} dimensions)", :yellow if options[:verbose]
215
+ # Let the engine handle dimensionality reduction if needed
216
+ reduce_dims = true
217
+ end
218
+
131
219
  documents = docs_with_embeddings.map { |d| d[:chunk_text] }
132
220
  metadata = docs_with_embeddings.map { |d| { file_path: d[:file_path], chunk_index: d[:chunk_index] } }
133
221
 
@@ -137,7 +225,8 @@ module Ragnar
137
225
  engine = Ragnar::TopicModeling::Engine.new(
138
226
  min_cluster_size: options[:min_cluster_size],
139
227
  labeling_method: options[:method].to_sym,
140
- verbose: options[:verbose]
228
+ verbose: options[:verbose],
229
+ reduce_dimensions: reduce_dims
141
230
  )
142
231
 
143
232
  # Extract topics
@@ -148,12 +237,36 @@ module Ragnar
148
237
  metadata: metadata
149
238
  )
150
239
 
240
+ # Generate summaries if requested
241
+ if options[:summarize] && topics.any?
242
+ say "Generating topic summaries with LLM...", :yellow
243
+ begin
244
+ require 'red-candle'
245
+
246
+ # Initialize LLM for summarization once
247
+ say "Loading model: #{options[:llm_model]}", :cyan if options[:verbose]
248
+ llm = Candle::LLM.from_pretrained(options[:llm_model], gguf_file: options[:gguf_file])
249
+
250
+ # Add summaries to topics
251
+ topics.each_with_index do |topic, i|
252
+ say " Summarizing topic #{i+1}/#{topics.length}...", :yellow if options[:verbose]
253
+ topic.instance_variable_set(:@summary, summarize_topic(topic, llm))
254
+ end
255
+
256
+ say "Topic summaries generated!", :green
257
+ rescue => e
258
+ say "Warning: Could not generate topic summaries: #{e.message}", :yellow
259
+ say "Proceeding without summaries...", :yellow
260
+ end
261
+ end
262
+
151
263
  # Display results
152
- display_topics(topics)
264
+ display_topics(topics, show_summaries: options[:summarize])
153
265
 
154
266
  # Export if requested
155
267
  if options[:export]
156
- export_topics(topics, options[:export])
268
+ # Pass embeddings and cluster IDs for visualization
269
+ export_topics(topics, options[:export], embeddings: embeddings, cluster_ids: engine.instance_variable_get(:@cluster_ids))
157
270
  end
158
271
 
159
272
  rescue => e
@@ -168,51 +281,80 @@ module Ragnar
168
281
  option :k, type: :numeric, default: 5, desc: "Number of results to return"
169
282
  option :show_scores, type: :boolean, default: false, desc: "Show similarity scores"
170
283
  def search(query_text)
171
- database = Database.new(options[:database])
172
- embedder = Embedder.new
173
-
284
+ database = get_cached_database(options[:database] || Config.instance.database_path)
285
+ embedder = get_cached_embedder()
286
+
174
287
  # Generate embedding for query
175
288
  query_embedding = embedder.embed_text(query_text)
176
-
289
+
177
290
  # Search for similar documents
178
291
  results = database.search_similar(query_embedding, k: options[:k])
179
-
292
+
180
293
  if results.empty?
181
294
  say "No results found.", :yellow
182
295
  return
183
296
  end
184
-
297
+
185
298
  say "Found #{results.length} results:\n", :green
186
-
299
+
187
300
  results.each_with_index do |result, idx|
188
301
  say "#{idx + 1}. File: #{result[:file_path]}", :cyan
189
302
  say " Chunk: #{result[:chunk_index]}"
190
-
303
+
191
304
  if options[:show_scores]
192
305
  say " Distance: #{result[:distance].round(4)}"
193
306
  end
194
-
307
+
195
308
  # Show preview of content
196
309
  preview = result[:chunk_text][0..200].gsub(/\s+/, ' ')
197
310
  say " Content: #{preview}..."
198
311
  say ""
199
312
  end
200
313
  end
201
-
314
+
202
315
  desc "query QUESTION", "Query the RAG system"
203
- option :db_path, type: :string, default: Ragnar::DEFAULT_DB_PATH, desc: "Path to Lance database"
316
+ option :db_path, type: :string, desc: "Path to Lance database (default from config)"
204
317
  option :top_k, type: :numeric, default: 3, desc: "Number of top documents to use"
205
318
  option :verbose, type: :boolean, default: false, aliases: "-v", desc: "Show detailed processing steps"
206
319
  option :json, type: :boolean, default: false, desc: "Output as JSON"
207
320
  def query(question)
208
- processor = QueryProcessor.new(db_path: options[:db_path])
321
+ puts "Debug - Query called with: #{question.inspect}" if ENV['DEBUG']
322
+ puts "Debug - Options: #{options.inspect}" if ENV['DEBUG']
323
+
324
+ processor = get_cached_query_processor(options[:db_path] || Config.instance.database_path)
325
+ puts "Debug - Processor: #{processor.class}" if ENV['DEBUG']
209
326
 
210
327
  begin
211
- result = processor.query(question, top_k: options[:top_k], verbose: options[:verbose])
328
+ config = Config.instance
329
+ result = processor.query(
330
+ question,
331
+ top_k: options[:top_k] || config.query_top_k,
332
+ verbose: options[:verbose] || false,
333
+ enable_rewriting: config.enable_query_rewriting?
334
+ )
335
+ puts "Debug - Result keys: #{result.keys}" if ENV['DEBUG']
212
336
 
213
337
  if options[:json]
214
338
  puts JSON.pretty_generate(result)
339
+ elsif interactive?
340
+ # Clean output for interactive mode - just answer, confidence, and sources
341
+ say "" # Add blank line before answer for spacing
342
+ say result[:answer]
343
+
344
+ if result[:confidence]
345
+ say "\nConfidence: #{result[:confidence]}%", :magenta
346
+ end
347
+
348
+ if result[:sources] && !result[:sources].empty?
349
+ say "\nSources:", :blue
350
+ result[:sources].each_with_index do |source, idx|
351
+ say " #{idx + 1}. #{source[:source_file]}" if source[:source_file]
352
+ end
353
+ end
354
+
355
+ say "" # Add blank line for spacing
215
356
  else
357
+ # Full output for CLI mode
216
358
  say "\n" + "="*60, :green
217
359
  say "Query: #{result[:query]}", :cyan
218
360
 
@@ -234,7 +376,7 @@ module Ragnar
234
376
  end
235
377
  end
236
378
 
237
- if options[:verbose] && result[:sub_queries]
379
+ if (options[:verbose] || false) && result[:sub_queries]
238
380
  say "\nSub-queries used:", :yellow
239
381
  result[:sub_queries].each { |sq| say " - #{sq}" }
240
382
  end
@@ -243,15 +385,15 @@ module Ragnar
243
385
  end
244
386
  rescue => e
245
387
  say "Error processing query: #{e.message}", :red
246
- say e.backtrace.first(5).join("\n") if options[:verbose]
388
+ puts "Debug - Full backtrace: #{e.backtrace.join("\n")}" if ENV['DEBUG']
247
389
  exit 1
248
390
  end
249
391
  end
250
392
 
251
393
  desc "stats", "Show database statistics"
252
- option :db_path, type: :string, default: Ragnar::DEFAULT_DB_PATH, desc: "Path to Lance database"
394
+ option :db_path, type: :string, desc: "Path to Lance database (default from config)"
253
395
  def stats
254
- db = Database.new(options[:db_path])
396
+ db = get_cached_database(options[:db_path] || Config.instance.database_path)
255
397
  stats = db.get_stats
256
398
 
257
399
  say "\nDatabase Statistics", :green
@@ -277,8 +419,325 @@ module Ragnar
277
419
  say "Ragnar v#{Ragnar::VERSION}"
278
420
  end
279
421
 
422
+ desc "config", "Show current configuration"
423
+ def config
424
+ config = Config.instance
425
+
426
+ say "\nConfiguration Settings:", :cyan
427
+ say "-" * 40
428
+
429
+ if config.config_exists?
430
+ say "Config file: #{config.config_file_path}", :green
431
+ else
432
+ say "Config file: None (using defaults)", :yellow
433
+ end
434
+
435
+ say "\nPaths:", :cyan
436
+ say " Database: #{config.database_path}"
437
+ say " Models: #{config.models_dir}"
438
+ say " History: #{config.history_file}"
439
+
440
+ say "\nEmbeddings:", :cyan
441
+ say " Model: #{config.embedding_model}"
442
+ say " Chunk size: #{config.chunk_size}"
443
+ say " Chunk overlap: #{config.chunk_overlap}"
444
+
445
+ say "\nLLM:", :cyan
446
+ say " Model: #{config.llm_model}"
447
+ say " GGUF file: #{config.llm_gguf_file}"
448
+
449
+ say "\nUMAP:", :cyan
450
+ say " Reduced dimensions: #{config.get('umap.reduced_dimensions', Ragnar::DEFAULT_REDUCED_DIMENSIONS)}"
451
+ say " N neighbors: #{config.get('umap.n_neighbors', 15)}"
452
+ say " Min distance: #{config.get('umap.min_dist', 0.1)}"
453
+
454
+ say "\nQuery:", :cyan
455
+ say " Top K: #{config.query_top_k}"
456
+ say " Query rewriting: #{config.enable_query_rewriting?}"
457
+ end
458
+
459
+ desc "model", "Show current LLM model information"
460
+ def model
461
+ config = Config.instance
462
+
463
+ say "\nLLM Model Configuration:", :cyan
464
+ say "-" * 40
465
+
466
+ say "\nModel:", :green
467
+ say " Repository: #{config.llm_model}"
468
+ say " GGUF file: #{config.llm_gguf_file}"
469
+
470
+ # Check if model files exist
471
+ model_path = File.join(config.models_dir, config.llm_gguf_file)
472
+ if File.exist?(model_path)
473
+ size_mb = (File.size(model_path) / 1024.0 / 1024.0).round(2)
474
+ say "\nModel file exists: #{model_path} (#{size_mb} MB)", :green
475
+ else
476
+ say "\nModel file not found: #{model_path}", :yellow
477
+ say "Run 'ragnar query' to download automatically", :yellow
478
+ end
479
+ end
480
+
481
+ desc "clear-cache", "Clear cached instances (useful in interactive mode)"
482
+ def clear_cache_command
483
+ clear_cache
484
+ say "Cache cleared. Next commands will create fresh instances.", :green
485
+ end
486
+
487
+ desc "reset", "Reset Ragnar data (database, models, cache)"
488
+ option :all, type: :boolean, default: false, aliases: "-a", desc: "Reset everything (database, models, cache)"
489
+ option :database, type: :boolean, default: false, aliases: "-d", desc: "Reset database only"
490
+ option :models, type: :boolean, default: false, aliases: "-m", desc: "Reset UMAP models only"
491
+ option :cache, type: :boolean, default: false, aliases: "-c", desc: "Clear cache only"
492
+ option :force, type: :boolean, default: false, aliases: "-f", desc: "Skip confirmation prompt"
493
+ def reset
494
+ # Determine what to reset
495
+ reset_all = options[:all]
496
+ reset_db = options[:database] || reset_all
497
+ reset_models = options[:models] || reset_all
498
+ reset_cache = options[:cache] || reset_all
499
+
500
+ # If no specific options, default to all
501
+ if !reset_db && !reset_models && !reset_cache
502
+ reset_all = true
503
+ reset_db = reset_models = reset_cache = true
504
+ end
505
+
506
+ # Build confirmation message
507
+ items_to_reset = []
508
+ items_to_reset << "database" if reset_db
509
+ items_to_reset << "UMAP models" if reset_models
510
+ items_to_reset << "cache" if reset_cache
511
+
512
+ # Get paths that will be affected
513
+ config = Config.instance
514
+ db_path = options[:db_path] || config.database_path
515
+ model_path = File.join(config.models_dir, "umap_model.bin")
516
+
517
+ # Show what will be deleted
518
+ say "\nWARNING: This will delete the following:", :red
519
+ say "-" * 40
520
+
521
+ if reset_db
522
+ say "Database: #{db_path}", :cyan
523
+ if File.exist?(db_path)
524
+ stats = Database.new(db_path).get_stats rescue nil
525
+ if stats
526
+ say " (#{stats[:total_documents]} documents, #{stats[:total_chunks]} chunks)", :white
527
+ end
528
+ else
529
+ say " (does not exist)", :white
530
+ end
531
+ end
532
+
533
+ if reset_models
534
+ say "UMAP models:", :cyan
535
+ model_files = [
536
+ model_path,
537
+ model_path.sub(/\.bin$/, '_metadata.json'),
538
+ model_path.sub(/\.bin$/, '_embeddings.json') # Old format, if exists
539
+ ]
540
+ model_files.each do |file|
541
+ if File.exist?(file)
542
+ say " #{file} (#{(File.size(file) / 1024.0).round(1)} KB)", :white
543
+ end
544
+ end
545
+ if model_files.none? { |f| File.exist?(f) }
546
+ say " (no models found)", :white
547
+ end
548
+ end
549
+
550
+ if reset_cache
551
+ cache_dir = File.expand_path("~/.cache/ragnar")
552
+ say "Cache directory: #{cache_dir}", :cyan
553
+ if Dir.exist?(cache_dir)
554
+ cache_size = Dir.glob(File.join(cache_dir, "**/*"))
555
+ .select { |f| File.file?(f) }
556
+ .sum { |f| File.size(f) } / 1024.0 / 1024.0
557
+ say " (#{cache_size.round(1)} MB)", :white
558
+ else
559
+ say " (does not exist)", :white
560
+ end
561
+ end
562
+
563
+ say "-" * 40
564
+
565
+ # Ask for confirmation unless --force
566
+ unless options[:force]
567
+ message = "\nAre you sure you want to reset #{items_to_reset.join(', ')}?"
568
+
569
+ # Check if we're in interactive mode
570
+ if ENV['THOR_INTERACTIVE_SESSION'] == 'true'
571
+ # In interactive mode, use a simple prompt
572
+ say message, :yellow
573
+ response = ask("Type 'yes' to confirm, anything else to cancel:", :yellow)
574
+ confirmed = response.downcase == 'yes'
575
+ else
576
+ # In CLI mode, use Thor's yes? method
577
+ confirmed = yes?(message + " (y/N)", :yellow)
578
+ end
579
+
580
+ unless confirmed
581
+ say "\nReset cancelled.", :cyan
582
+ return
583
+ end
584
+ end
585
+
586
+ # Perform the reset
587
+ say "\nResetting...", :green
588
+
589
+ if reset_db && File.exist?(db_path)
590
+ say "Removing database: #{db_path}"
591
+ FileUtils.rm_rf(db_path)
592
+ say " ✓ Database removed", :green
593
+ end
594
+
595
+ if reset_models
596
+ model_files = [
597
+ model_path,
598
+ model_path.sub(/\.bin$/, '_metadata.json'),
599
+ model_path.sub(/\.bin$/, '_embeddings.json')
600
+ ]
601
+ model_files.each do |file|
602
+ if File.exist?(file)
603
+ say "Removing model file: #{file}"
604
+ FileUtils.rm_f(file)
605
+ say " ✓ Removed", :green
606
+ end
607
+ end
608
+ end
609
+
610
+ if reset_cache
611
+ # Clear in-memory cache
612
+ clear_cache
613
+
614
+ # Optionally clear cache directory (but preserve history)
615
+ cache_dir = File.expand_path("~/.cache/ragnar")
616
+ if Dir.exist?(cache_dir)
617
+ # Preserve history file
618
+ history_file = File.join(cache_dir, "history")
619
+ history_content = File.read(history_file) if File.exist?(history_file)
620
+
621
+ # Remove cache directory contents except history
622
+ Dir.glob(File.join(cache_dir, "*")).each do |item|
623
+ next if File.basename(item) == "history"
624
+ if File.directory?(item)
625
+ FileUtils.rm_rf(item)
626
+ else
627
+ FileUtils.rm_f(item)
628
+ end
629
+ say "Removed cache item: #{File.basename(item)}", :green
630
+ end
631
+ end
632
+ say " ✓ Cache cleared", :green
633
+ end
634
+
635
+ say "\nReset complete!", :green
636
+ say "You can now start fresh with 'ragnar index <path>'", :cyan
637
+ end
638
+
639
+ desc "init-config", "Generate a configuration file with current defaults"
640
+ option :global, type: :boolean, default: false, aliases: "-g", desc: "Create global config in home directory"
641
+ option :force, type: :boolean, default: false, aliases: "-f", desc: "Overwrite existing config file"
642
+ def init_config
643
+ config = Config.instance
644
+
645
+ if options[:global]
646
+ config_path = File.expand_path('~/.ragnar.yml')
647
+ else
648
+ config_path = File.join(Dir.pwd, '.ragnar.yml')
649
+ end
650
+
651
+ if File.exist?(config_path) && !options[:force]
652
+ say "Config file already exists at: #{config_path}", :yellow
653
+ say "Use --force to overwrite, or choose a different location.", :yellow
654
+ return
655
+ end
656
+
657
+ generated_path = config.generate_config_file(config_path)
658
+ say "Config file created at: #{generated_path}", :green
659
+ say "Edit this file to customize Ragnar's behavior.", :cyan
660
+
661
+ if config.config_exists?
662
+ say "\nNote: Currently using config from: #{config.config_file_path}", :yellow
663
+ end
664
+ end
665
+
280
666
  private
281
667
 
668
+ # Cached instance helpers for interactive mode
669
+ def get_cached_database(db_path = nil)
670
+ # Use config default if no path provided
671
+ db_path ||= Config.instance.database_path
672
+
673
+ # Cache database per path - clear cache if path changes
674
+ if @@cached_db_path != db_path
675
+ @@cached_database = nil
676
+ @@cached_db_path = db_path
677
+ @@cached_query_processor = nil # Also clear dependent caches
678
+ end
679
+
680
+ @@cached_database ||= Database.new(db_path)
681
+ end
682
+
683
+ def get_cached_embedder(model_name = nil)
684
+ # Use config default if no model specified
685
+ model_name ||= Config.instance.embedding_model
686
+ @@cached_embedder ||= Embedder.new(model_name: model_name)
687
+ end
688
+
689
+ def get_cached_llm_manager
690
+ @@cached_llm_manager ||= LLMManager.instance
691
+ end
692
+
693
+ def get_cached_query_processor(db_path = nil)
694
+ # Use config default if no path provided
695
+ db_path ||= Config.instance.database_path
696
+
697
+ # Cache query processor per database path
698
+ if @@cached_db_path != db_path || @@cached_query_processor.nil?
699
+ @@cached_query_processor = QueryProcessor.new(db_path: db_path)
700
+ end
701
+
702
+ @@cached_query_processor
703
+ end
704
+
705
+ def clear_cache
706
+ @@cached_database = nil
707
+ @@cached_embedder = nil
708
+ @@cached_llm_manager = nil
709
+ @@cached_query_processor = nil
710
+ @@cached_db_path = nil
711
+ end
712
+
713
+
714
+ def summarize_topic(topic, llm)
715
+ # Get representative documents for context
716
+ sample_docs = topic.representative_docs(k: 3)
717
+
718
+ # Simple, clear prompt for summarization
719
+ prompt = <<~PROMPT
720
+ Summarize what connects these documents in 1-2 sentences:
721
+
722
+ Key terms: #{topic.terms.first(5).join(', ')}
723
+
724
+ Documents:
725
+ #{sample_docs.map.with_index { |doc, i| "#{i+1}. #{doc}" }.join("\n")}
726
+
727
+ Summary:
728
+ PROMPT
729
+
730
+ begin
731
+ summary = llm.generate(prompt).strip
732
+ # Clean up common artifacts
733
+ summary = summary.lines.first&.strip || "Related documents"
734
+ summary = summary.gsub(/^(Summary:|Topic:|Documents:)/i, '').strip
735
+ summary.empty? ? "Documents about #{topic.terms.first(2).join(' and ')}" : summary
736
+ rescue => e
737
+ "Documents about #{topic.terms.first(2).join(' and ')}"
738
+ end
739
+ end
740
+
282
741
  def fetch_all_documents(database)
283
742
  # Temporary workaround to get all documents
284
743
  # In production, we'd add a proper method to Database class
@@ -305,9 +764,12 @@ module Ragnar
305
764
  []
306
765
  end
307
766
 
308
- def display_topics(topics)
767
+ def display_topics(topics, show_summaries: false)
309
768
  say "\n" + "="*60, :green
310
769
  say "Topic Analysis Results", :cyan
770
+ if show_summaries
771
+ say " (with LLM-generated summaries)", :yellow
772
+ end
311
773
  say "="*60, :green
312
774
 
313
775
  if topics.empty?
@@ -326,21 +788,21 @@ module Ragnar
326
788
  say "\n" + "─" * 40, :blue
327
789
  say "MAJOR TOPICS (≥20 docs)", :blue
328
790
  say "─" * 40, :blue
329
- display_topic_group(large_topics, :cyan)
791
+ display_topic_group(large_topics, :cyan, show_summaries: show_summaries)
330
792
  end
331
793
 
332
794
  if medium_topics.any?
333
795
  say "\n" + "─" * 40, :yellow
334
796
  say "MEDIUM TOPICS (10-19 docs)", :yellow
335
797
  say "─" * 40, :yellow
336
- display_topic_group(medium_topics, :yellow)
798
+ display_topic_group(medium_topics, :yellow, show_summaries: show_summaries)
337
799
  end
338
800
 
339
801
  if small_topics.any?
340
802
  say "\n" + "─" * 40, :white
341
803
  say "MINOR TOPICS (<10 docs)", :white
342
804
  say "─" * 40, :white
343
- display_topic_group(small_topics, :white)
805
+ display_topic_group(small_topics, :white, show_summaries: show_summaries)
344
806
  end
345
807
 
346
808
  # Summary statistics
@@ -364,10 +826,18 @@ module Ragnar
364
826
  say " Small (<10): #{small_topics.length} topics, #{small_topics.sum(&:size)} docs"
365
827
  end
366
828
 
367
- def display_topic_group(topics, color)
829
+ def display_topic_group(topics, color, show_summaries: false)
368
830
  topics.sort_by { |t| -t.size }.each_with_index do |topic, idx|
369
831
  say "\n#{topic.label || 'Unlabeled'} (#{topic.size} docs)", color
370
832
 
833
+ # Show LLM summary if available
834
+ if show_summaries
835
+ summary = topic.instance_variable_get(:@summary)
836
+ if summary
837
+ say " Summary: #{summary}", :green
838
+ end
839
+ end
840
+
371
841
  # Show coherence as a bar
372
842
  if topic.coherence > 0
373
843
  coherence_pct = (topic.coherence * 100).round(0)
@@ -379,8 +849,8 @@ module Ragnar
379
849
  # Compact term display
380
850
  say " Terms: #{topic.terms.first(6).join(' • ')}" if topic.terms.any?
381
851
 
382
- # Short sample
383
- if topic.representative_docs(k: 1).any?
852
+ # Short sample (unless we showed a summary)
853
+ if !show_summaries && topic.representative_docs(k: 1).any?
384
854
  preview = topic.representative_docs(k: 1).first
385
855
  preview = preview[0..100] + "..." if preview.length > 100
386
856
  say " \"#{preview}\"", :white
@@ -388,25 +858,34 @@ module Ragnar
388
858
  end
389
859
  end
390
860
 
391
- def export_topics(topics, format)
861
+ def export_topics(topics, format, embeddings: nil, cluster_ids: nil)
392
862
  case format.downcase
393
863
  when 'json'
394
864
  export_topics_json(topics)
395
865
  when 'html'
396
- export_topics_html(topics)
866
+ export_topics_html(topics, embeddings: embeddings, cluster_ids: cluster_ids)
397
867
  else
398
868
  say "Unknown export format: #{format}. Use 'json' or 'html'.", :red
399
869
  end
400
870
  end
401
871
 
402
872
  def export_topics_json(topics)
873
+ topics_data = topics.map do |topic|
874
+ topic_hash = topic.to_h
875
+ # Add summary if it exists
876
+ summary = topic.instance_variable_get(:@summary)
877
+ topic_hash[:summary] = summary if summary
878
+ topic_hash
879
+ end
880
+
403
881
  data = {
404
882
  generated_at: Time.now.iso8601,
405
- topics: topics.map(&:to_h),
883
+ topics: topics_data,
406
884
  summary: {
407
885
  total_topics: topics.length,
408
886
  total_documents: topics.sum(&:size),
409
- average_size: (topics.sum(&:size).to_f / topics.length).round(1)
887
+ average_size: (topics.sum(&:size).to_f / topics.length).round(1),
888
+ has_summaries: topics.any? { |t| t.instance_variable_get(:@summary) }
410
889
  }
411
890
  }
412
891
 
@@ -415,9 +894,9 @@ module Ragnar
415
894
  say "Topics exported to: #{filename}", :green
416
895
  end
417
896
 
418
- def export_topics_html(topics)
897
+ def export_topics_html(topics, embeddings: nil, cluster_ids: nil)
419
898
  # Generate self-contained HTML with D3.js visualization
420
- html = generate_topic_visualization_html(topics)
899
+ html = generate_topic_visualization_html(topics, embeddings: embeddings, cluster_ids: cluster_ids)
421
900
 
422
901
  filename = "topics_#{Time.now.strftime('%Y%m%d_%H%M%S')}.html"
423
902
  File.write(filename, html)
@@ -430,113 +909,5 @@ module Ragnar
430
909
  end
431
910
  end
432
911
 
433
- def generate_topic_visualization_html(topics)
434
- # Convert topics to JSON for D3.js
435
- topics_json = topics.map do |topic|
436
- {
437
- id: topic.id,
438
- label: topic.label || "Topic #{topic.id}",
439
- size: topic.size,
440
- terms: topic.terms.first(10),
441
- coherence: topic.coherence,
442
- samples: topic.representative_docs(k: 2).map { |d| d[0..200] }
443
- }
444
- end.to_json
445
-
446
- # HTML template with embedded D3.js
447
- <<~HTML
448
- <!DOCTYPE html>
449
- <html>
450
- <head>
451
- <meta charset="utf-8">
452
- <title>Topic Visualization</title>
453
- <script src="https://d3js.org/d3.v7.min.js"></script>
454
- <style>
455
- body { font-family: -apple-system, sans-serif; margin: 20px; }
456
- #viz { width: 100%; height: 500px; border: 1px solid #ddd; }
457
- .topic { cursor: pointer; }
458
- .topic:hover { opacity: 0.8; }
459
- #details { margin-top: 20px; padding: 15px; background: #f5f5f5; }
460
- .term { display: inline-block; margin: 5px; padding: 5px 10px; background: #e0e0e0; border-radius: 3px; }
461
- </style>
462
- </head>
463
- <body>
464
- <h1>Topic Analysis Results</h1>
465
- <div id="viz"></div>
466
- <div id="details">Click on a topic to see details</div>
467
-
468
- <script>
469
- const data = #{topics_json};
470
-
471
- // Create bubble chart
472
- const width = document.getElementById('viz').clientWidth;
473
- const height = 500;
474
-
475
- const svg = d3.select("#viz")
476
- .append("svg")
477
- .attr("width", width)
478
- .attr("height", height);
479
-
480
- // Create scale for bubble sizes
481
- const sizeScale = d3.scaleSqrt()
482
- .domain([0, d3.max(data, d => d.size)])
483
- .range([10, 50]);
484
-
485
- // Create color scale
486
- const colorScale = d3.scaleSequential(d3.interpolateViridis)
487
- .domain([0, 1]);
488
-
489
- // Create force simulation
490
- const simulation = d3.forceSimulation(data)
491
- .force("x", d3.forceX(width / 2).strength(0.05))
492
- .force("y", d3.forceY(height / 2).strength(0.05))
493
- .force("collide", d3.forceCollide(d => sizeScale(d.size) + 2));
494
-
495
- // Create bubbles
496
- const bubbles = svg.selectAll(".topic")
497
- .data(data)
498
- .enter().append("g")
499
- .attr("class", "topic");
500
-
501
- bubbles.append("circle")
502
- .attr("r", d => sizeScale(d.size))
503
- .attr("fill", d => colorScale(d.coherence))
504
- .attr("stroke", "#fff")
505
- .attr("stroke-width", 2);
506
-
507
- bubbles.append("text")
508
- .text(d => d.label)
509
- .attr("text-anchor", "middle")
510
- .attr("dy", ".3em")
511
- .style("font-size", d => Math.min(sizeScale(d.size) / 3, 14) + "px");
512
-
513
- // Add click handler
514
- bubbles.on("click", function(event, d) {
515
- showDetails(d);
516
- });
517
-
518
- // Update positions
519
- simulation.on("tick", () => {
520
- bubbles.attr("transform", d => `translate(${d.x},${d.y})`);
521
- });
522
-
523
- // Show topic details
524
- function showDetails(topic) {
525
- const details = document.getElementById('details');
526
- details.innerHTML = `
527
- <h2>${topic.label}</h2>
528
- <p><strong>Documents:</strong> ${topic.size}</p>
529
- <p><strong>Coherence:</strong> ${(topic.coherence * 100).toFixed(1)}%</p>
530
- <p><strong>Top Terms:</strong></p>
531
- <div>${topic.terms.map(t => `<span class="term">${t}</span>`).join('')}</div>
532
- <p><strong>Sample Documents:</strong></p>
533
- ${topic.samples.map(s => `<p style="font-size: 0.9em; color: #666;">"${s}..."</p>`).join('')}
534
- `;
535
- }
536
- </script>
537
- </body>
538
- </html>
539
- HTML
540
- end
541
912
  end
542
- end
913
+ end