ragnar-cli 0.1.0.pre.3 → 0.1.0.pre.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/ragnar/cli.rb CHANGED
@@ -1,27 +1,82 @@
1
+ require_relative "cli_visualization"
2
+ require_relative "config"
3
+ require "thor/interactive"
4
+ require "stringio"
5
+ require "fileutils"
6
+
1
7
  module Ragnar
2
8
  class CLI < Thor
9
+ include CLIVisualization
10
+ include Thor::Interactive::Command
11
+
12
+ # Configure interactive mode
13
+ configure_interactive(
14
+ prompt: Config.instance.interactive_prompt,
15
+ allow_nested: false,
16
+ history_file: Config.instance.history_file,
17
+ default_handler: proc do |input, thor_instance|
18
+ puts "[DEBUG] Default handler called: #{input}" if ENV["DEBUG"]
19
+
20
+ begin
21
+ # IMPORTANT: Use direct method call, NOT invoke(), to avoid Thor's
22
+ # silent deduplication that prevents repeated calls to the same method
23
+ result = thor_instance.query(input.strip)
24
+ puts "[DEBUG] Default handler completed" if ENV["DEBUG"]
25
+ result
26
+ rescue => e
27
+ puts "[DEBUG] Default handler error: #{e.message}" if ENV["DEBUG"]
28
+ puts "[DEBUG] Backtrace: #{e.backtrace.first(3)}" if ENV["DEBUG"]
29
+ raise e
30
+ end
31
+ end
32
+ )
33
+
34
+ # Class variables for caching expensive resources in interactive mode
35
+ class_variable_set(:@@cached_database, nil)
36
+ class_variable_set(:@@cached_embedder, nil)
37
+ class_variable_set(:@@cached_llm_manager, nil)
38
+ class_variable_set(:@@cached_query_processor, nil)
39
+ class_variable_set(:@@cached_db_path, nil)
40
+
3
41
  desc "index PATH", "Index text files from PATH (file or directory)"
4
- option :db_path, type: :string, default: Ragnar::DEFAULT_DB_PATH, desc: "Path to Lance database"
5
- option :chunk_size, type: :numeric, default: Ragnar::DEFAULT_CHUNK_SIZE, desc: "Chunk size in tokens"
6
- option :chunk_overlap, type: :numeric, default: Ragnar::DEFAULT_CHUNK_OVERLAP, desc: "Chunk overlap in tokens"
7
- option :model, type: :string, default: Ragnar::DEFAULT_EMBEDDING_MODEL, desc: "Embedding model to use"
42
+ option :db_path, type: :string, desc: "Path to Lance database (default from config)"
43
+ option :chunk_size, type: :numeric, desc: "Chunk size in tokens (default from config)"
44
+ option :chunk_overlap, type: :numeric, desc: "Chunk overlap in tokens (default from config)"
45
+ option :model, type: :string, desc: "Embedding model to use (default from config)"
8
46
  def index(path)
9
- unless File.exist?(path)
10
- say "Error: Path does not exist: #{path}", :red
47
+ # Expand user paths (handle ~ in user input)
48
+ expanded_path = File.expand_path(path)
49
+
50
+ unless File.exist?(expanded_path)
51
+ say "Error: Path does not exist: #{expanded_path}", :red
11
52
  exit 1
12
53
  end
13
54
 
14
55
  say "Indexing files from: #{path}", :green
15
56
 
57
+ # Debug options in interactive mode
58
+ puts "Debug - options: #{options.inspect}" if ENV['DEBUG']
59
+
60
+ # Get config instance
61
+ config = Config.instance
62
+
63
+ # Clear database cache when indexing new content
64
+ db_path = options[:db_path] || config.database_path
65
+ if @@cached_db_path == db_path
66
+ @@cached_database = nil
67
+ @@cached_query_processor = nil
68
+ end
69
+
16
70
  indexer = Indexer.new(
17
- db_path: options[:db_path],
18
- chunk_size: options[:chunk_size],
19
- chunk_overlap: options[:chunk_overlap],
20
- embedding_model: options[:model]
71
+ db_path: db_path,
72
+ chunk_size: options[:chunk_size] || config.chunk_size,
73
+ chunk_overlap: options[:chunk_overlap] || config.chunk_overlap,
74
+ embedding_model: options[:model] || config.embedding_model,
75
+ show_progress: config.show_progress?
21
76
  )
22
77
 
23
78
  begin
24
- stats = indexer.index_path(path)
79
+ stats = indexer.index_path(expanded_path)
25
80
  say "\nIndexing complete!", :green
26
81
  say "Files processed: #{stats[:files_processed]}"
27
82
  say "Chunks created: #{stats[:chunks_created]}"
@@ -33,31 +88,39 @@ module Ragnar
33
88
  end
34
89
 
35
90
  desc "train-umap", "Train UMAP model on existing embeddings"
36
- option :db_path, type: :string, default: Ragnar::DEFAULT_DB_PATH, desc: "Path to Lance database"
91
+ option :db_path, type: :string, desc: "Path to Lance database (default from config)"
37
92
  option :n_components, type: :numeric, default: 50, desc: "Number of dimensions for reduction"
38
93
  option :n_neighbors, type: :numeric, default: 15, desc: "Number of neighbors for UMAP"
39
94
  option :min_dist, type: :numeric, default: 0.1, desc: "Minimum distance for UMAP"
40
- option :model_path, type: :string, default: "umap_model.bin", desc: "Path to save UMAP model"
95
+ option :model_path, type: :string, desc: "Path to save UMAP model"
41
96
  def train_umap
42
97
  say "Training UMAP model on embeddings...", :green
43
98
 
99
+ config = Config.instance
100
+ # Use model_path from options if provided, otherwise use config models_dir
101
+ model_path = if options[:model_path]
102
+ options[:model_path]
103
+ else
104
+ File.join(config.models_dir, "umap_model.bin")
105
+ end
106
+
44
107
  processor = UmapProcessor.new(
45
- db_path: options[:db_path],
46
- model_path: options[:model_path]
108
+ db_path: options[:db_path] || config.database_path,
109
+ model_path: model_path
47
110
  )
48
111
 
49
112
  begin
50
113
  stats = processor.train(
51
- n_components: options[:n_components],
52
- n_neighbors: options[:n_neighbors],
53
- min_dist: options[:min_dist]
114
+ n_components: options[:n_components] || 50,
115
+ n_neighbors: options[:n_neighbors] || 15,
116
+ min_dist: options[:min_dist] || 0.1
54
117
  )
55
118
 
56
119
  say "\nUMAP training complete!", :green
57
120
  say "Embeddings processed: #{stats[:embeddings_count]}"
58
121
  say "Original dimensions: #{stats[:original_dims]}"
59
122
  say "Reduced dimensions: #{stats[:reduced_dims]}"
60
- say "Model saved to: #{options[:model_path]}"
123
+ say "Model saved to: #{processor.model_path}"
61
124
  rescue => e
62
125
  say "Error during UMAP training: #{e.message}", :red
63
126
  exit 1
@@ -65,12 +128,19 @@ module Ragnar
65
128
  end
66
129
 
67
130
  desc "apply-umap", "Apply trained UMAP model to reduce embedding dimensions"
68
- option :db_path, type: :string, default: Ragnar::DEFAULT_DB_PATH, desc: "Path to Lance database"
69
- option :model_path, type: :string, default: "umap_model.bin", desc: "Path to UMAP model"
131
+ option :db_path, type: :string, desc: "Path to Lance database (default from config)"
132
+ option :model_path, type: :string, desc: "Path to UMAP model"
70
133
  option :batch_size, type: :numeric, default: 100, desc: "Batch size for processing"
71
134
  def apply_umap
72
- unless File.exist?(options[:model_path])
73
- say "Error: UMAP model not found at: #{options[:model_path]}", :red
135
+ config = Config.instance
136
+ model_path = if options[:model_path]
137
+ options[:model_path]
138
+ else
139
+ File.join(config.models_dir, "umap_model.bin")
140
+ end
141
+
142
+ unless File.exist?(model_path)
143
+ say "Error: UMAP model not found at: #{model_path}", :red
74
144
  say "Please run 'train-umap' first to create a model.", :yellow
75
145
  exit 1
76
146
  end
@@ -78,12 +148,12 @@ module Ragnar
78
148
  say "Applying UMAP model to embeddings...", :green
79
149
 
80
150
  processor = UmapProcessor.new(
81
- db_path: options[:db_path],
82
- model_path: options[:model_path]
151
+ db_path: options[:db_path] || config.database_path,
152
+ model_path: model_path
83
153
  )
84
154
 
85
155
  begin
86
- stats = processor.apply(batch_size: options[:batch_size])
156
+ stats = processor.apply(batch_size: options[:batch_size] || 100)
87
157
 
88
158
  say "\nUMAP application complete!", :green
89
159
  say "Embeddings processed: #{stats[:processed]}"
@@ -96,18 +166,21 @@ module Ragnar
96
166
  end
97
167
 
98
168
  desc "topics", "Extract and display topics from indexed documents"
99
- option :db_path, type: :string, default: Ragnar::DEFAULT_DB_PATH, desc: "Path to Lance database"
169
+ option :db_path, type: :string, desc: "Path to Lance database (default from config)"
100
170
  option :min_cluster_size, type: :numeric, default: 5, desc: "Minimum documents per topic"
101
171
  option :method, type: :string, default: "hybrid", desc: "Labeling method: fast, quality, or hybrid"
102
172
  option :export, type: :string, desc: "Export topics to file (json or html)"
103
173
  option :verbose, type: :boolean, default: false, aliases: "-v", desc: "Show detailed processing"
174
+ option :summarize, type: :boolean, default: false, aliases: "-s", desc: "Generate human-readable topic summaries using LLM"
175
+ option :llm_model, type: :string, default: "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF", desc: "LLM model for summarization"
176
+ option :gguf_file, type: :string, default: "tinyllama-1.1b-chat-v1.0.q4_k_m.gguf", desc: "GGUF file name for LLM model"
104
177
  def topics
105
178
  require_relative 'topic_modeling'
106
179
 
107
180
  say "Extracting topics from indexed documents...", :green
108
181
 
109
- # Load embeddings and documents from database
110
- database = Database.new(options[:db_path])
182
+ # Load embeddings and documents from database - use cache in interactive mode
183
+ database = get_cached_database(options[:db_path] || Config.instance.database_path)
111
184
 
112
185
  begin
113
186
  # Get all documents with embeddings
@@ -130,7 +203,7 @@ module Ragnar
130
203
  # Check if we have reduced embeddings available
131
204
  first_doc = docs_with_embeddings.first
132
205
  has_reduced = first_doc[:reduced_embedding] && !first_doc[:reduced_embedding].empty?
133
-
206
+
134
207
  if has_reduced
135
208
  embeddings = docs_with_embeddings.map { |d| d[:reduced_embedding] }
136
209
  say "Using reduced embeddings (#{embeddings.first.size} dimensions)", :yellow if options[:verbose]
@@ -142,7 +215,7 @@ module Ragnar
142
215
  # Let the engine handle dimensionality reduction if needed
143
216
  reduce_dims = true
144
217
  end
145
-
218
+
146
219
  documents = docs_with_embeddings.map { |d| d[:chunk_text] }
147
220
  metadata = docs_with_embeddings.map { |d| { file_path: d[:file_path], chunk_index: d[:chunk_index] } }
148
221
 
@@ -164,12 +237,36 @@ module Ragnar
164
237
  metadata: metadata
165
238
  )
166
239
 
240
+ # Generate summaries if requested
241
+ if options[:summarize] && topics.any?
242
+ say "Generating topic summaries with LLM...", :yellow
243
+ begin
244
+ require 'red-candle'
245
+
246
+ # Initialize LLM for summarization once
247
+ say "Loading model: #{options[:llm_model]}", :cyan if options[:verbose]
248
+ llm = Candle::LLM.from_pretrained(options[:llm_model], gguf_file: options[:gguf_file])
249
+
250
+ # Add summaries to topics
251
+ topics.each_with_index do |topic, i|
252
+ say " Summarizing topic #{i+1}/#{topics.length}...", :yellow if options[:verbose]
253
+ topic.instance_variable_set(:@summary, summarize_topic(topic, llm))
254
+ end
255
+
256
+ say "Topic summaries generated!", :green
257
+ rescue => e
258
+ say "Warning: Could not generate topic summaries: #{e.message}", :yellow
259
+ say "Proceeding without summaries...", :yellow
260
+ end
261
+ end
262
+
167
263
  # Display results
168
- display_topics(topics)
264
+ display_topics(topics, show_summaries: options[:summarize])
169
265
 
170
266
  # Export if requested
171
267
  if options[:export]
172
- export_topics(topics, options[:export])
268
+ # Pass embeddings and cluster IDs for visualization
269
+ export_topics(topics, options[:export], embeddings: embeddings, cluster_ids: engine.instance_variable_get(:@cluster_ids))
173
270
  end
174
271
 
175
272
  rescue => e
@@ -184,51 +281,80 @@ module Ragnar
184
281
  option :k, type: :numeric, default: 5, desc: "Number of results to return"
185
282
  option :show_scores, type: :boolean, default: false, desc: "Show similarity scores"
186
283
  def search(query_text)
187
- database = Database.new(options[:database])
188
- embedder = Embedder.new
189
-
284
+ database = get_cached_database(options[:database] || Config.instance.database_path)
285
+ embedder = get_cached_embedder()
286
+
190
287
  # Generate embedding for query
191
288
  query_embedding = embedder.embed_text(query_text)
192
-
289
+
193
290
  # Search for similar documents
194
291
  results = database.search_similar(query_embedding, k: options[:k])
195
-
292
+
196
293
  if results.empty?
197
294
  say "No results found.", :yellow
198
295
  return
199
296
  end
200
-
297
+
201
298
  say "Found #{results.length} results:\n", :green
202
-
299
+
203
300
  results.each_with_index do |result, idx|
204
301
  say "#{idx + 1}. File: #{result[:file_path]}", :cyan
205
302
  say " Chunk: #{result[:chunk_index]}"
206
-
303
+
207
304
  if options[:show_scores]
208
305
  say " Distance: #{result[:distance].round(4)}"
209
306
  end
210
-
307
+
211
308
  # Show preview of content
212
309
  preview = result[:chunk_text][0..200].gsub(/\s+/, ' ')
213
310
  say " Content: #{preview}..."
214
311
  say ""
215
312
  end
216
313
  end
217
-
314
+
218
315
  desc "query QUESTION", "Query the RAG system"
219
- option :db_path, type: :string, default: Ragnar::DEFAULT_DB_PATH, desc: "Path to Lance database"
316
+ option :db_path, type: :string, desc: "Path to Lance database (default from config)"
220
317
  option :top_k, type: :numeric, default: 3, desc: "Number of top documents to use"
221
318
  option :verbose, type: :boolean, default: false, aliases: "-v", desc: "Show detailed processing steps"
222
319
  option :json, type: :boolean, default: false, desc: "Output as JSON"
223
320
  def query(question)
224
- processor = QueryProcessor.new(db_path: options[:db_path])
321
+ puts "Debug - Query called with: #{question.inspect}" if ENV['DEBUG']
322
+ puts "Debug - Options: #{options.inspect}" if ENV['DEBUG']
323
+
324
+ processor = get_cached_query_processor(options[:db_path] || Config.instance.database_path)
325
+ puts "Debug - Processor: #{processor.class}" if ENV['DEBUG']
225
326
 
226
327
  begin
227
- result = processor.query(question, top_k: options[:top_k], verbose: options[:verbose])
328
+ config = Config.instance
329
+ result = processor.query(
330
+ question,
331
+ top_k: options[:top_k] || config.query_top_k,
332
+ verbose: options[:verbose] || false,
333
+ enable_rewriting: config.enable_query_rewriting?
334
+ )
335
+ puts "Debug - Result keys: #{result.keys}" if ENV['DEBUG']
228
336
 
229
337
  if options[:json]
230
338
  puts JSON.pretty_generate(result)
339
+ elsif interactive?
340
+ # Clean output for interactive mode - just answer, confidence, and sources
341
+ say "" # Add blank line before answer for spacing
342
+ say result[:answer]
343
+
344
+ if result[:confidence]
345
+ say "\nConfidence: #{result[:confidence]}%", :magenta
346
+ end
347
+
348
+ if result[:sources] && !result[:sources].empty?
349
+ say "\nSources:", :blue
350
+ result[:sources].each_with_index do |source, idx|
351
+ say " #{idx + 1}. #{source[:source_file]}" if source[:source_file]
352
+ end
353
+ end
354
+
355
+ say "" # Add blank line for spacing
231
356
  else
357
+ # Full output for CLI mode
232
358
  say "\n" + "="*60, :green
233
359
  say "Query: #{result[:query]}", :cyan
234
360
 
@@ -250,7 +376,7 @@ module Ragnar
250
376
  end
251
377
  end
252
378
 
253
- if options[:verbose] && result[:sub_queries]
379
+ if (options[:verbose] || false) && result[:sub_queries]
254
380
  say "\nSub-queries used:", :yellow
255
381
  result[:sub_queries].each { |sq| say " - #{sq}" }
256
382
  end
@@ -259,15 +385,15 @@ module Ragnar
259
385
  end
260
386
  rescue => e
261
387
  say "Error processing query: #{e.message}", :red
262
- say e.backtrace.first(5).join("\n") if options[:verbose]
388
+ puts "Debug - Full backtrace: #{e.backtrace.join("\n")}" if ENV['DEBUG']
263
389
  exit 1
264
390
  end
265
391
  end
266
392
 
267
393
  desc "stats", "Show database statistics"
268
- option :db_path, type: :string, default: Ragnar::DEFAULT_DB_PATH, desc: "Path to Lance database"
394
+ option :db_path, type: :string, desc: "Path to Lance database (default from config)"
269
395
  def stats
270
- db = Database.new(options[:db_path])
396
+ db = get_cached_database(options[:db_path] || Config.instance.database_path)
271
397
  stats = db.get_stats
272
398
 
273
399
  say "\nDatabase Statistics", :green
@@ -293,8 +419,325 @@ module Ragnar
293
419
  say "Ragnar v#{Ragnar::VERSION}"
294
420
  end
295
421
 
422
+ desc "config", "Show current configuration"
423
+ def config
424
+ config = Config.instance
425
+
426
+ say "\nConfiguration Settings:", :cyan
427
+ say "-" * 40
428
+
429
+ if config.config_exists?
430
+ say "Config file: #{config.config_file_path}", :green
431
+ else
432
+ say "Config file: None (using defaults)", :yellow
433
+ end
434
+
435
+ say "\nPaths:", :cyan
436
+ say " Database: #{config.database_path}"
437
+ say " Models: #{config.models_dir}"
438
+ say " History: #{config.history_file}"
439
+
440
+ say "\nEmbeddings:", :cyan
441
+ say " Model: #{config.embedding_model}"
442
+ say " Chunk size: #{config.chunk_size}"
443
+ say " Chunk overlap: #{config.chunk_overlap}"
444
+
445
+ say "\nLLM:", :cyan
446
+ say " Model: #{config.llm_model}"
447
+ say " GGUF file: #{config.llm_gguf_file}"
448
+
449
+ say "\nUMAP:", :cyan
450
+ say " Reduced dimensions: #{config.get('umap.reduced_dimensions', Ragnar::DEFAULT_REDUCED_DIMENSIONS)}"
451
+ say " N neighbors: #{config.get('umap.n_neighbors', 15)}"
452
+ say " Min distance: #{config.get('umap.min_dist', 0.1)}"
453
+
454
+ say "\nQuery:", :cyan
455
+ say " Top K: #{config.query_top_k}"
456
+ say " Query rewriting: #{config.enable_query_rewriting?}"
457
+ end
458
+
459
+ desc "model", "Show current LLM model information"
460
+ def model
461
+ config = Config.instance
462
+
463
+ say "\nLLM Model Configuration:", :cyan
464
+ say "-" * 40
465
+
466
+ say "\nModel:", :green
467
+ say " Repository: #{config.llm_model}"
468
+ say " GGUF file: #{config.llm_gguf_file}"
469
+
470
+ # Check if model files exist
471
+ model_path = File.join(config.models_dir, config.llm_gguf_file)
472
+ if File.exist?(model_path)
473
+ size_mb = (File.size(model_path) / 1024.0 / 1024.0).round(2)
474
+ say "\nModel file exists: #{model_path} (#{size_mb} MB)", :green
475
+ else
476
+ say "\nModel file not found: #{model_path}", :yellow
477
+ say "Run 'ragnar query' to download automatically", :yellow
478
+ end
479
+ end
480
+
481
+ desc "clear-cache", "Clear cached instances (useful in interactive mode)"
482
+ def clear_cache_command
483
+ clear_cache
484
+ say "Cache cleared. Next commands will create fresh instances.", :green
485
+ end
486
+
487
+ desc "reset", "Reset Ragnar data (database, models, cache)"
488
+ option :all, type: :boolean, default: false, aliases: "-a", desc: "Reset everything (database, models, cache)"
489
+ option :database, type: :boolean, default: false, aliases: "-d", desc: "Reset database only"
490
+ option :models, type: :boolean, default: false, aliases: "-m", desc: "Reset UMAP models only"
491
+ option :cache, type: :boolean, default: false, aliases: "-c", desc: "Clear cache only"
492
+ option :force, type: :boolean, default: false, aliases: "-f", desc: "Skip confirmation prompt"
493
+ def reset
494
+ # Determine what to reset
495
+ reset_all = options[:all]
496
+ reset_db = options[:database] || reset_all
497
+ reset_models = options[:models] || reset_all
498
+ reset_cache = options[:cache] || reset_all
499
+
500
+ # If no specific options, default to all
501
+ if !reset_db && !reset_models && !reset_cache
502
+ reset_all = true
503
+ reset_db = reset_models = reset_cache = true
504
+ end
505
+
506
+ # Build confirmation message
507
+ items_to_reset = []
508
+ items_to_reset << "database" if reset_db
509
+ items_to_reset << "UMAP models" if reset_models
510
+ items_to_reset << "cache" if reset_cache
511
+
512
+ # Get paths that will be affected
513
+ config = Config.instance
514
+ db_path = options[:db_path] || config.database_path
515
+ model_path = File.join(config.models_dir, "umap_model.bin")
516
+
517
+ # Show what will be deleted
518
+ say "\nWARNING: This will delete the following:", :red
519
+ say "-" * 40
520
+
521
+ if reset_db
522
+ say "Database: #{db_path}", :cyan
523
+ if File.exist?(db_path)
524
+ stats = Database.new(db_path).get_stats rescue nil
525
+ if stats
526
+ say " (#{stats[:total_documents]} documents, #{stats[:total_chunks]} chunks)", :white
527
+ end
528
+ else
529
+ say " (does not exist)", :white
530
+ end
531
+ end
532
+
533
+ if reset_models
534
+ say "UMAP models:", :cyan
535
+ model_files = [
536
+ model_path,
537
+ model_path.sub(/\.bin$/, '_metadata.json'),
538
+ model_path.sub(/\.bin$/, '_embeddings.json') # Old format, if exists
539
+ ]
540
+ model_files.each do |file|
541
+ if File.exist?(file)
542
+ say " #{file} (#{(File.size(file) / 1024.0).round(1)} KB)", :white
543
+ end
544
+ end
545
+ if model_files.none? { |f| File.exist?(f) }
546
+ say " (no models found)", :white
547
+ end
548
+ end
549
+
550
+ if reset_cache
551
+ cache_dir = File.expand_path("~/.cache/ragnar")
552
+ say "Cache directory: #{cache_dir}", :cyan
553
+ if Dir.exist?(cache_dir)
554
+ cache_size = Dir.glob(File.join(cache_dir, "**/*"))
555
+ .select { |f| File.file?(f) }
556
+ .sum { |f| File.size(f) } / 1024.0 / 1024.0
557
+ say " (#{cache_size.round(1)} MB)", :white
558
+ else
559
+ say " (does not exist)", :white
560
+ end
561
+ end
562
+
563
+ say "-" * 40
564
+
565
+ # Ask for confirmation unless --force
566
+ unless options[:force]
567
+ message = "\nAre you sure you want to reset #{items_to_reset.join(', ')}?"
568
+
569
+ # Check if we're in interactive mode
570
+ if ENV['THOR_INTERACTIVE_SESSION'] == 'true'
571
+ # In interactive mode, use a simple prompt
572
+ say message, :yellow
573
+ response = ask("Type 'yes' to confirm, anything else to cancel:", :yellow)
574
+ confirmed = response.downcase == 'yes'
575
+ else
576
+ # In CLI mode, use Thor's yes? method
577
+ confirmed = yes?(message + " (y/N)", :yellow)
578
+ end
579
+
580
+ unless confirmed
581
+ say "\nReset cancelled.", :cyan
582
+ return
583
+ end
584
+ end
585
+
586
+ # Perform the reset
587
+ say "\nResetting...", :green
588
+
589
+ if reset_db && File.exist?(db_path)
590
+ say "Removing database: #{db_path}"
591
+ FileUtils.rm_rf(db_path)
592
+ say " ✓ Database removed", :green
593
+ end
594
+
595
+ if reset_models
596
+ model_files = [
597
+ model_path,
598
+ model_path.sub(/\.bin$/, '_metadata.json'),
599
+ model_path.sub(/\.bin$/, '_embeddings.json')
600
+ ]
601
+ model_files.each do |file|
602
+ if File.exist?(file)
603
+ say "Removing model file: #{file}"
604
+ FileUtils.rm_f(file)
605
+ say " ✓ Removed", :green
606
+ end
607
+ end
608
+ end
609
+
610
+ if reset_cache
611
+ # Clear in-memory cache
612
+ clear_cache
613
+
614
+ # Optionally clear cache directory (but preserve history)
615
+ cache_dir = File.expand_path("~/.cache/ragnar")
616
+ if Dir.exist?(cache_dir)
617
+ # Preserve history file
618
+ history_file = File.join(cache_dir, "history")
619
+ history_content = File.read(history_file) if File.exist?(history_file)
620
+
621
+ # Remove cache directory contents except history
622
+ Dir.glob(File.join(cache_dir, "*")).each do |item|
623
+ next if File.basename(item) == "history"
624
+ if File.directory?(item)
625
+ FileUtils.rm_rf(item)
626
+ else
627
+ FileUtils.rm_f(item)
628
+ end
629
+ say "Removed cache item: #{File.basename(item)}", :green
630
+ end
631
+ end
632
+ say " ✓ Cache cleared", :green
633
+ end
634
+
635
+ say "\nReset complete!", :green
636
+ say "You can now start fresh with 'ragnar index <path>'", :cyan
637
+ end
638
+
639
+ desc "init-config", "Generate a configuration file with current defaults"
640
+ option :global, type: :boolean, default: false, aliases: "-g", desc: "Create global config in home directory"
641
+ option :force, type: :boolean, default: false, aliases: "-f", desc: "Overwrite existing config file"
642
+ def init_config
643
+ config = Config.instance
644
+
645
+ if options[:global]
646
+ config_path = File.expand_path('~/.ragnar.yml')
647
+ else
648
+ config_path = File.join(Dir.pwd, '.ragnar.yml')
649
+ end
650
+
651
+ if File.exist?(config_path) && !options[:force]
652
+ say "Config file already exists at: #{config_path}", :yellow
653
+ say "Use --force to overwrite, or choose a different location.", :yellow
654
+ return
655
+ end
656
+
657
+ generated_path = config.generate_config_file(config_path)
658
+ say "Config file created at: #{generated_path}", :green
659
+ say "Edit this file to customize Ragnar's behavior.", :cyan
660
+
661
+ if config.config_exists?
662
+ say "\nNote: Currently using config from: #{config.config_file_path}", :yellow
663
+ end
664
+ end
665
+
296
666
  private
297
667
 
668
+ # Cached instance helpers for interactive mode
669
+ def get_cached_database(db_path = nil)
670
+ # Use config default if no path provided
671
+ db_path ||= Config.instance.database_path
672
+
673
+ # Cache database per path - clear cache if path changes
674
+ if @@cached_db_path != db_path
675
+ @@cached_database = nil
676
+ @@cached_db_path = db_path
677
+ @@cached_query_processor = nil # Also clear dependent caches
678
+ end
679
+
680
+ @@cached_database ||= Database.new(db_path)
681
+ end
682
+
683
+ def get_cached_embedder(model_name = nil)
684
+ # Use config default if no model specified
685
+ model_name ||= Config.instance.embedding_model
686
+ @@cached_embedder ||= Embedder.new(model_name: model_name)
687
+ end
688
+
689
+ def get_cached_llm_manager
690
+ @@cached_llm_manager ||= LLMManager.instance
691
+ end
692
+
693
+ def get_cached_query_processor(db_path = nil)
694
+ # Use config default if no path provided
695
+ db_path ||= Config.instance.database_path
696
+
697
+ # Cache query processor per database path
698
+ if @@cached_db_path != db_path || @@cached_query_processor.nil?
699
+ @@cached_query_processor = QueryProcessor.new(db_path: db_path)
700
+ end
701
+
702
+ @@cached_query_processor
703
+ end
704
+
705
+ def clear_cache
706
+ @@cached_database = nil
707
+ @@cached_embedder = nil
708
+ @@cached_llm_manager = nil
709
+ @@cached_query_processor = nil
710
+ @@cached_db_path = nil
711
+ end
712
+
713
+
714
+ def summarize_topic(topic, llm)
715
+ # Get representative documents for context
716
+ sample_docs = topic.representative_docs(k: 3)
717
+
718
+ # Simple, clear prompt for summarization
719
+ prompt = <<~PROMPT
720
+ Summarize what connects these documents in 1-2 sentences:
721
+
722
+ Key terms: #{topic.terms.first(5).join(', ')}
723
+
724
+ Documents:
725
+ #{sample_docs.map.with_index { |doc, i| "#{i+1}. #{doc}" }.join("\n")}
726
+
727
+ Summary:
728
+ PROMPT
729
+
730
+ begin
731
+ summary = llm.generate(prompt).strip
732
+ # Clean up common artifacts
733
+ summary = summary.lines.first&.strip || "Related documents"
734
+ summary = summary.gsub(/^(Summary:|Topic:|Documents:)/i, '').strip
735
+ summary.empty? ? "Documents about #{topic.terms.first(2).join(' and ')}" : summary
736
+ rescue => e
737
+ "Documents about #{topic.terms.first(2).join(' and ')}"
738
+ end
739
+ end
740
+
298
741
  def fetch_all_documents(database)
299
742
  # Temporary workaround to get all documents
300
743
  # In production, we'd add a proper method to Database class
@@ -321,9 +764,12 @@ module Ragnar
321
764
  []
322
765
  end
323
766
 
324
- def display_topics(topics)
767
+ def display_topics(topics, show_summaries: false)
325
768
  say "\n" + "="*60, :green
326
769
  say "Topic Analysis Results", :cyan
770
+ if show_summaries
771
+ say " (with LLM-generated summaries)", :yellow
772
+ end
327
773
  say "="*60, :green
328
774
 
329
775
  if topics.empty?
@@ -342,21 +788,21 @@ module Ragnar
342
788
  say "\n" + "─" * 40, :blue
343
789
  say "MAJOR TOPICS (≥20 docs)", :blue
344
790
  say "─" * 40, :blue
345
- display_topic_group(large_topics, :cyan)
791
+ display_topic_group(large_topics, :cyan, show_summaries: show_summaries)
346
792
  end
347
793
 
348
794
  if medium_topics.any?
349
795
  say "\n" + "─" * 40, :yellow
350
796
  say "MEDIUM TOPICS (10-19 docs)", :yellow
351
797
  say "─" * 40, :yellow
352
- display_topic_group(medium_topics, :yellow)
798
+ display_topic_group(medium_topics, :yellow, show_summaries: show_summaries)
353
799
  end
354
800
 
355
801
  if small_topics.any?
356
802
  say "\n" + "─" * 40, :white
357
803
  say "MINOR TOPICS (<10 docs)", :white
358
804
  say "─" * 40, :white
359
- display_topic_group(small_topics, :white)
805
+ display_topic_group(small_topics, :white, show_summaries: show_summaries)
360
806
  end
361
807
 
362
808
  # Summary statistics
@@ -380,10 +826,18 @@ module Ragnar
380
826
  say " Small (<10): #{small_topics.length} topics, #{small_topics.sum(&:size)} docs"
381
827
  end
382
828
 
383
- def display_topic_group(topics, color)
829
+ def display_topic_group(topics, color, show_summaries: false)
384
830
  topics.sort_by { |t| -t.size }.each_with_index do |topic, idx|
385
831
  say "\n#{topic.label || 'Unlabeled'} (#{topic.size} docs)", color
386
832
 
833
+ # Show LLM summary if available
834
+ if show_summaries
835
+ summary = topic.instance_variable_get(:@summary)
836
+ if summary
837
+ say " Summary: #{summary}", :green
838
+ end
839
+ end
840
+
387
841
  # Show coherence as a bar
388
842
  if topic.coherence > 0
389
843
  coherence_pct = (topic.coherence * 100).round(0)
@@ -395,8 +849,8 @@ module Ragnar
395
849
  # Compact term display
396
850
  say " Terms: #{topic.terms.first(6).join(' • ')}" if topic.terms.any?
397
851
 
398
- # Short sample
399
- if topic.representative_docs(k: 1).any?
852
+ # Short sample (unless we showed a summary)
853
+ if !show_summaries && topic.representative_docs(k: 1).any?
400
854
  preview = topic.representative_docs(k: 1).first
401
855
  preview = preview[0..100] + "..." if preview.length > 100
402
856
  say " \"#{preview}\"", :white
@@ -404,25 +858,34 @@ module Ragnar
404
858
  end
405
859
  end
406
860
 
407
- def export_topics(topics, format)
861
+ def export_topics(topics, format, embeddings: nil, cluster_ids: nil)
408
862
  case format.downcase
409
863
  when 'json'
410
864
  export_topics_json(topics)
411
865
  when 'html'
412
- export_topics_html(topics)
866
+ export_topics_html(topics, embeddings: embeddings, cluster_ids: cluster_ids)
413
867
  else
414
868
  say "Unknown export format: #{format}. Use 'json' or 'html'.", :red
415
869
  end
416
870
  end
417
871
 
418
872
  def export_topics_json(topics)
873
+ topics_data = topics.map do |topic|
874
+ topic_hash = topic.to_h
875
+ # Add summary if it exists
876
+ summary = topic.instance_variable_get(:@summary)
877
+ topic_hash[:summary] = summary if summary
878
+ topic_hash
879
+ end
880
+
419
881
  data = {
420
882
  generated_at: Time.now.iso8601,
421
- topics: topics.map(&:to_h),
883
+ topics: topics_data,
422
884
  summary: {
423
885
  total_topics: topics.length,
424
886
  total_documents: topics.sum(&:size),
425
- average_size: (topics.sum(&:size).to_f / topics.length).round(1)
887
+ average_size: (topics.sum(&:size).to_f / topics.length).round(1),
888
+ has_summaries: topics.any? { |t| t.instance_variable_get(:@summary) }
426
889
  }
427
890
  }
428
891
 
@@ -431,9 +894,9 @@ module Ragnar
431
894
  say "Topics exported to: #{filename}", :green
432
895
  end
433
896
 
434
- def export_topics_html(topics)
897
+ def export_topics_html(topics, embeddings: nil, cluster_ids: nil)
435
898
  # Generate self-contained HTML with D3.js visualization
436
- html = generate_topic_visualization_html(topics)
899
+ html = generate_topic_visualization_html(topics, embeddings: embeddings, cluster_ids: cluster_ids)
437
900
 
438
901
  filename = "topics_#{Time.now.strftime('%Y%m%d_%H%M%S')}.html"
439
902
  File.write(filename, html)
@@ -446,113 +909,5 @@ module Ragnar
446
909
  end
447
910
  end
448
911
 
449
- def generate_topic_visualization_html(topics)
450
- # Convert topics to JSON for D3.js
451
- topics_json = topics.map do |topic|
452
- {
453
- id: topic.id,
454
- label: topic.label || "Topic #{topic.id}",
455
- size: topic.size,
456
- terms: topic.terms.first(10),
457
- coherence: topic.coherence,
458
- samples: topic.representative_docs(k: 2).map { |d| d[0..200] }
459
- }
460
- end.to_json
461
-
462
- # HTML template with embedded D3.js
463
- <<~HTML
464
- <!DOCTYPE html>
465
- <html>
466
- <head>
467
- <meta charset="utf-8">
468
- <title>Topic Visualization</title>
469
- <script src="https://d3js.org/d3.v7.min.js"></script>
470
- <style>
471
- body { font-family: -apple-system, sans-serif; margin: 20px; }
472
- #viz { width: 100%; height: 500px; border: 1px solid #ddd; }
473
- .topic { cursor: pointer; }
474
- .topic:hover { opacity: 0.8; }
475
- #details { margin-top: 20px; padding: 15px; background: #f5f5f5; }
476
- .term { display: inline-block; margin: 5px; padding: 5px 10px; background: #e0e0e0; border-radius: 3px; }
477
- </style>
478
- </head>
479
- <body>
480
- <h1>Topic Analysis Results</h1>
481
- <div id="viz"></div>
482
- <div id="details">Click on a topic to see details</div>
483
-
484
- <script>
485
- const data = #{topics_json};
486
-
487
- // Create bubble chart
488
- const width = document.getElementById('viz').clientWidth;
489
- const height = 500;
490
-
491
- const svg = d3.select("#viz")
492
- .append("svg")
493
- .attr("width", width)
494
- .attr("height", height);
495
-
496
- // Create scale for bubble sizes
497
- const sizeScale = d3.scaleSqrt()
498
- .domain([0, d3.max(data, d => d.size)])
499
- .range([10, 50]);
500
-
501
- // Create color scale
502
- const colorScale = d3.scaleSequential(d3.interpolateViridis)
503
- .domain([0, 1]);
504
-
505
- // Create force simulation
506
- const simulation = d3.forceSimulation(data)
507
- .force("x", d3.forceX(width / 2).strength(0.05))
508
- .force("y", d3.forceY(height / 2).strength(0.05))
509
- .force("collide", d3.forceCollide(d => sizeScale(d.size) + 2));
510
-
511
- // Create bubbles
512
- const bubbles = svg.selectAll(".topic")
513
- .data(data)
514
- .enter().append("g")
515
- .attr("class", "topic");
516
-
517
- bubbles.append("circle")
518
- .attr("r", d => sizeScale(d.size))
519
- .attr("fill", d => colorScale(d.coherence))
520
- .attr("stroke", "#fff")
521
- .attr("stroke-width", 2);
522
-
523
- bubbles.append("text")
524
- .text(d => d.label)
525
- .attr("text-anchor", "middle")
526
- .attr("dy", ".3em")
527
- .style("font-size", d => Math.min(sizeScale(d.size) / 3, 14) + "px");
528
-
529
- // Add click handler
530
- bubbles.on("click", function(event, d) {
531
- showDetails(d);
532
- });
533
-
534
- // Update positions
535
- simulation.on("tick", () => {
536
- bubbles.attr("transform", d => `translate(${d.x},${d.y})`);
537
- });
538
-
539
- // Show topic details
540
- function showDetails(topic) {
541
- const details = document.getElementById('details');
542
- details.innerHTML = `
543
- <h2>${topic.label}</h2>
544
- <p><strong>Documents:</strong> ${topic.size}</p>
545
- <p><strong>Coherence:</strong> ${(topic.coherence * 100).toFixed(1)}%</p>
546
- <p><strong>Top Terms:</strong></p>
547
- <div>${topic.terms.map(t => `<span class="term">${t}</span>`).join('')}</div>
548
- <p><strong>Sample Documents:</strong></p>
549
- ${topic.samples.map(s => `<p style="font-size: 0.9em; color: #666;">"${s}..."</p>`).join('')}
550
- `;
551
- }
552
- </script>
553
- </body>
554
- </html>
555
- HTML
556
- end
557
912
  end
558
- end
913
+ end