ragnar-cli 0.1.0.pre.3 → 0.1.0.pre.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/ragnar/cli.rb CHANGED
@@ -1,27 +1,89 @@
1
+ require_relative "cli_visualization"
2
+ require_relative "cli_umap"
3
+ require_relative "config"
4
+ require "thor/interactive"
5
+ require "stringio"
6
+ require "fileutils"
7
+
1
8
  module Ragnar
2
9
  class CLI < Thor
10
+ include CLIVisualization
11
+ include Thor::Interactive::Command
12
+
13
+ default_command :interactive
14
+
15
+ class_option :profile, type: :string, aliases: "-p", desc: "LLM profile to use (e.g., red_candle, opus, sonnet)"
16
+
17
+ # Configure interactive mode
18
+ configure_interactive(
19
+ prompt: Config.instance.interactive_prompt,
20
+ allow_nested: false,
21
+ history_file: Config.instance.history_file,
22
+ ui_mode: :tui,
23
+ default_handler: proc do |input, thor_instance|
24
+ puts "[DEBUG] Default handler called: #{input}" if ENV["DEBUG"]
25
+
26
+ begin
27
+ # IMPORTANT: Use direct method call, NOT invoke(), to avoid Thor's
28
+ # silent deduplication that prevents repeated calls to the same method
29
+ result = thor_instance.query(input.strip)
30
+ puts "[DEBUG] Default handler completed" if ENV["DEBUG"]
31
+ result
32
+ rescue => e
33
+ puts "[DEBUG] Default handler error: #{e.message}" if ENV["DEBUG"]
34
+ puts "[DEBUG] Backtrace: #{e.backtrace.first(3)}" if ENV["DEBUG"]
35
+ raise e
36
+ end
37
+ end
38
+ )
39
+
40
+ # Class variables for caching expensive resources in interactive mode
41
+ class_variable_set(:@@cached_database, nil)
42
+ class_variable_set(:@@cached_embedder, nil)
43
+ class_variable_set(:@@cached_llm_manager, nil)
44
+ class_variable_set(:@@cached_query_processor, nil)
45
+ class_variable_set(:@@cached_db_path, nil)
46
+ class_variable_set(:@@verbose_mode, false)
47
+
3
48
  desc "index PATH", "Index text files from PATH (file or directory)"
4
- option :db_path, type: :string, default: Ragnar::DEFAULT_DB_PATH, desc: "Path to Lance database"
5
- option :chunk_size, type: :numeric, default: Ragnar::DEFAULT_CHUNK_SIZE, desc: "Chunk size in tokens"
6
- option :chunk_overlap, type: :numeric, default: Ragnar::DEFAULT_CHUNK_OVERLAP, desc: "Chunk overlap in tokens"
7
- option :model, type: :string, default: Ragnar::DEFAULT_EMBEDDING_MODEL, desc: "Embedding model to use"
49
+ option :db_path, type: :string, desc: "Path to Lance database (default from config)"
50
+ option :chunk_size, type: :numeric, desc: "Chunk size in tokens (default from config)"
51
+ option :chunk_overlap, type: :numeric, desc: "Chunk overlap in tokens (default from config)"
52
+ option :model, type: :string, desc: "Embedding model to use (default from config)"
8
53
  def index(path)
9
- unless File.exist?(path)
10
- say "Error: Path does not exist: #{path}", :red
54
+ # Expand user paths (handle ~ in user input)
55
+ expanded_path = File.expand_path(path)
56
+
57
+ unless File.exist?(expanded_path)
58
+ say "Error: Path does not exist: #{expanded_path}", :red
11
59
  exit 1
12
60
  end
13
61
 
14
62
  say "Indexing files from: #{path}", :green
15
63
 
64
+ # Debug options in interactive mode
65
+ puts "Debug - options: #{options.inspect}" if ENV['DEBUG']
66
+
67
+ # Get config instance
68
+ config = Config.instance
69
+
70
+ # Clear database cache when indexing new content
71
+ db_path = options[:db_path] || config.database_path
72
+ if @@cached_db_path == db_path
73
+ @@cached_database = nil
74
+ @@cached_query_processor = nil
75
+ end
76
+
16
77
  indexer = Indexer.new(
17
- db_path: options[:db_path],
18
- chunk_size: options[:chunk_size],
19
- chunk_overlap: options[:chunk_overlap],
20
- embedding_model: options[:model]
78
+ db_path: db_path,
79
+ chunk_size: options[:chunk_size] || config.chunk_size,
80
+ chunk_overlap: options[:chunk_overlap] || config.chunk_overlap,
81
+ embedding_model: options[:model] || config.embedding_model,
82
+ show_progress: config.show_progress?
21
83
  )
22
84
 
23
85
  begin
24
- stats = indexer.index_path(path)
86
+ stats = indexer.index_path(expanded_path)
25
87
  say "\nIndexing complete!", :green
26
88
  say "Files processed: #{stats[:files_processed]}"
27
89
  say "Chunks created: #{stats[:chunks_created]}"
@@ -32,82 +94,26 @@ module Ragnar
32
94
  end
33
95
  end
34
96
 
35
- desc "train-umap", "Train UMAP model on existing embeddings"
36
- option :db_path, type: :string, default: Ragnar::DEFAULT_DB_PATH, desc: "Path to Lance database"
37
- option :n_components, type: :numeric, default: 50, desc: "Number of dimensions for reduction"
38
- option :n_neighbors, type: :numeric, default: 15, desc: "Number of neighbors for UMAP"
39
- option :min_dist, type: :numeric, default: 0.1, desc: "Minimum distance for UMAP"
40
- option :model_path, type: :string, default: "umap_model.bin", desc: "Path to save UMAP model"
41
- def train_umap
42
- say "Training UMAP model on embeddings...", :green
43
-
44
- processor = UmapProcessor.new(
45
- db_path: options[:db_path],
46
- model_path: options[:model_path]
47
- )
48
-
49
- begin
50
- stats = processor.train(
51
- n_components: options[:n_components],
52
- n_neighbors: options[:n_neighbors],
53
- min_dist: options[:min_dist]
54
- )
55
-
56
- say "\nUMAP training complete!", :green
57
- say "Embeddings processed: #{stats[:embeddings_count]}"
58
- say "Original dimensions: #{stats[:original_dims]}"
59
- say "Reduced dimensions: #{stats[:reduced_dims]}"
60
- say "Model saved to: #{options[:model_path]}"
61
- rescue => e
62
- say "Error during UMAP training: #{e.message}", :red
63
- exit 1
64
- end
65
- end
66
-
67
- desc "apply-umap", "Apply trained UMAP model to reduce embedding dimensions"
68
- option :db_path, type: :string, default: Ragnar::DEFAULT_DB_PATH, desc: "Path to Lance database"
69
- option :model_path, type: :string, default: "umap_model.bin", desc: "Path to UMAP model"
70
- option :batch_size, type: :numeric, default: 100, desc: "Batch size for processing"
71
- def apply_umap
72
- unless File.exist?(options[:model_path])
73
- say "Error: UMAP model not found at: #{options[:model_path]}", :red
74
- say "Please run 'train-umap' first to create a model.", :yellow
75
- exit 1
76
- end
77
-
78
- say "Applying UMAP model to embeddings...", :green
79
-
80
- processor = UmapProcessor.new(
81
- db_path: options[:db_path],
82
- model_path: options[:model_path]
83
- )
84
-
85
- begin
86
- stats = processor.apply(batch_size: options[:batch_size])
87
-
88
- say "\nUMAP application complete!", :green
89
- say "Embeddings processed: #{stats[:processed]}"
90
- say "Already processed: #{stats[:skipped]}"
91
- say "Errors: #{stats[:errors]}" if stats[:errors] > 0
92
- rescue => e
93
- say "Error applying UMAP: #{e.message}", :red
94
- exit 1
95
- end
96
- end
97
+ desc "umap SUBCOMMAND ...ARGS", "UMAP dimensionality reduction commands"
98
+ subcommand "umap", Umap
97
99
 
98
100
  desc "topics", "Extract and display topics from indexed documents"
99
- option :db_path, type: :string, default: Ragnar::DEFAULT_DB_PATH, desc: "Path to Lance database"
101
+ option :db_path, type: :string, desc: "Path to Lance database (default from config)"
100
102
  option :min_cluster_size, type: :numeric, default: 5, desc: "Minimum documents per topic"
101
103
  option :method, type: :string, default: "hybrid", desc: "Labeling method: fast, quality, or hybrid"
102
104
  option :export, type: :string, desc: "Export topics to file (json or html)"
103
105
  option :verbose, type: :boolean, default: false, aliases: "-v", desc: "Show detailed processing"
106
+ option :summarize, type: :boolean, default: false, aliases: "-s", desc: "Generate human-readable topic summaries using LLM"
107
+ option :llm_model, type: :string, default: "MaziyarPanahi/Qwen3-4B-GGUF", desc: "LLM model for summarization"
108
+ option :gguf_file, type: :string, default: "Qwen3-4B.Q4_K_M.gguf", desc: "GGUF file name for LLM model"
104
109
  def topics
110
+ apply_profile!
105
111
  require_relative 'topic_modeling'
106
112
 
107
113
  say "Extracting topics from indexed documents...", :green
108
114
 
109
- # Load embeddings and documents from database
110
- database = Database.new(options[:db_path])
115
+ # Load embeddings and documents from database - use cache in interactive mode
116
+ database = get_cached_database(options[:db_path] || Config.instance.database_path)
111
117
 
112
118
  begin
113
119
  # Get all documents with embeddings
@@ -130,7 +136,7 @@ module Ragnar
130
136
  # Check if we have reduced embeddings available
131
137
  first_doc = docs_with_embeddings.first
132
138
  has_reduced = first_doc[:reduced_embedding] && !first_doc[:reduced_embedding].empty?
133
-
139
+
134
140
  if has_reduced
135
141
  embeddings = docs_with_embeddings.map { |d| d[:reduced_embedding] }
136
142
  say "Using reduced embeddings (#{embeddings.first.size} dimensions)", :yellow if options[:verbose]
@@ -142,7 +148,7 @@ module Ragnar
142
148
  # Let the engine handle dimensionality reduction if needed
143
149
  reduce_dims = true
144
150
  end
145
-
151
+
146
152
  documents = docs_with_embeddings.map { |d| d[:chunk_text] }
147
153
  metadata = docs_with_embeddings.map { |d| { file_path: d[:file_path], chunk_index: d[:chunk_index] } }
148
154
 
@@ -164,12 +170,32 @@ module Ragnar
164
170
  metadata: metadata
165
171
  )
166
172
 
173
+ # Generate summaries if requested
174
+ if options[:summarize] && topics.any?
175
+ say "Generating topic summaries with LLM...", :yellow
176
+ begin
177
+ chat = LLMManager.instance.default_chat
178
+
179
+ # Add summaries to topics
180
+ topics.each_with_index do |topic, i|
181
+ say " Summarizing topic #{i+1}/#{topics.length}...", :yellow if options[:verbose]
182
+ topic.instance_variable_set(:@summary, summarize_topic(topic, chat))
183
+ end
184
+
185
+ say "Topic summaries generated!", :green
186
+ rescue => e
187
+ say "Warning: Could not generate topic summaries: #{e.message}", :yellow
188
+ say "Proceeding without summaries...", :yellow
189
+ end
190
+ end
191
+
167
192
  # Display results
168
- display_topics(topics)
193
+ display_topics(topics, show_summaries: options[:summarize])
169
194
 
170
195
  # Export if requested
171
196
  if options[:export]
172
- export_topics(topics, options[:export])
197
+ # Pass embeddings and cluster IDs for visualization
198
+ export_topics(topics, options[:export], embeddings: embeddings, cluster_ids: engine.instance_variable_get(:@cluster_ids))
173
199
  end
174
200
 
175
201
  rescue => e
@@ -184,51 +210,83 @@ module Ragnar
184
210
  option :k, type: :numeric, default: 5, desc: "Number of results to return"
185
211
  option :show_scores, type: :boolean, default: false, desc: "Show similarity scores"
186
212
  def search(query_text)
187
- database = Database.new(options[:database])
188
- embedder = Embedder.new
189
-
213
+ database = get_cached_database(options[:database] || Config.instance.database_path)
214
+ embedder = get_cached_embedder()
215
+
190
216
  # Generate embedding for query
191
217
  query_embedding = embedder.embed_text(query_text)
192
-
218
+
193
219
  # Search for similar documents
194
220
  results = database.search_similar(query_embedding, k: options[:k])
195
-
221
+
196
222
  if results.empty?
197
223
  say "No results found.", :yellow
198
224
  return
199
225
  end
200
-
226
+
201
227
  say "Found #{results.length} results:\n", :green
202
-
228
+
203
229
  results.each_with_index do |result, idx|
204
230
  say "#{idx + 1}. File: #{result[:file_path]}", :cyan
205
231
  say " Chunk: #{result[:chunk_index]}"
206
-
232
+
207
233
  if options[:show_scores]
208
234
  say " Distance: #{result[:distance].round(4)}"
209
235
  end
210
-
236
+
211
237
  # Show preview of content
212
238
  preview = result[:chunk_text][0..200].gsub(/\s+/, ' ')
213
239
  say " Content: #{preview}..."
214
240
  say ""
215
241
  end
216
242
  end
217
-
243
+
218
244
  desc "query QUESTION", "Query the RAG system"
219
- option :db_path, type: :string, default: Ragnar::DEFAULT_DB_PATH, desc: "Path to Lance database"
245
+ option :db_path, type: :string, desc: "Path to Lance database (default from config)"
220
246
  option :top_k, type: :numeric, default: 3, desc: "Number of top documents to use"
221
247
  option :verbose, type: :boolean, default: false, aliases: "-v", desc: "Show detailed processing steps"
248
+ option :rerank, type: :boolean, default: nil, desc: "Enable cross-encoder reranking (default from config)"
222
249
  option :json, type: :boolean, default: false, desc: "Output as JSON"
223
250
  def query(question)
224
- processor = QueryProcessor.new(db_path: options[:db_path])
251
+ apply_profile!
252
+ puts "Debug - Query called with: #{question.inspect}" if ENV['DEBUG']
253
+ puts "Debug - Options: #{options.inspect}" if ENV['DEBUG']
254
+
255
+ processor = get_cached_query_processor(options[:db_path] || Config.instance.database_path)
256
+ puts "Debug - Processor: #{processor.class}" if ENV['DEBUG']
225
257
 
226
258
  begin
227
- result = processor.query(question, top_k: options[:top_k], verbose: options[:verbose])
259
+ config = Config.instance
260
+ result = processor.query(
261
+ question,
262
+ top_k: options[:top_k] || config.query_top_k,
263
+ verbose: options[:verbose] || @@verbose_mode,
264
+ enable_rewriting: config.enable_query_rewriting?,
265
+ enable_reranking: options[:rerank].nil? ? config.enable_reranking? : options[:rerank]
266
+ )
267
+ puts "Debug - Result keys: #{result.keys}" if ENV['DEBUG']
228
268
 
229
269
  if options[:json]
230
270
  puts JSON.pretty_generate(result)
271
+ elsif interactive?
272
+ # Clean output for interactive mode - just answer, confidence, and sources
273
+ say "" # Add blank line before answer for spacing
274
+ say result[:answer]
275
+
276
+ if result[:confidence]
277
+ say "\nConfidence: #{result[:confidence]}%", :magenta
278
+ end
279
+
280
+ if result[:sources] && !result[:sources].empty?
281
+ say "\nSources:", :blue
282
+ result[:sources].each_with_index do |source, idx|
283
+ say " #{idx + 1}. #{source[:source_file]}" if source[:source_file]
284
+ end
285
+ end
286
+
287
+ say "" # Add blank line for spacing
231
288
  else
289
+ # Full output for CLI mode
232
290
  say "\n" + "="*60, :green
233
291
  say "Query: #{result[:query]}", :cyan
234
292
 
@@ -250,7 +308,7 @@ module Ragnar
250
308
  end
251
309
  end
252
310
 
253
- if options[:verbose] && result[:sub_queries]
311
+ if (options[:verbose] || false) && result[:sub_queries]
254
312
  say "\nSub-queries used:", :yellow
255
313
  result[:sub_queries].each { |sq| say " - #{sq}" }
256
314
  end
@@ -259,15 +317,15 @@ module Ragnar
259
317
  end
260
318
  rescue => e
261
319
  say "Error processing query: #{e.message}", :red
262
- say e.backtrace.first(5).join("\n") if options[:verbose]
320
+ puts "Debug - Full backtrace: #{e.backtrace.join("\n")}" if ENV['DEBUG']
263
321
  exit 1
264
322
  end
265
323
  end
266
324
 
267
325
  desc "stats", "Show database statistics"
268
- option :db_path, type: :string, default: Ragnar::DEFAULT_DB_PATH, desc: "Path to Lance database"
326
+ option :db_path, type: :string, desc: "Path to Lance database (default from config)"
269
327
  def stats
270
- db = Database.new(options[:db_path])
328
+ db = get_cached_database(options[:db_path] || Config.instance.database_path)
271
329
  stats = db.get_stats
272
330
 
273
331
  say "\nDatabase Statistics", :green
@@ -293,8 +351,382 @@ module Ragnar
293
351
  say "Ragnar v#{Ragnar::VERSION}"
294
352
  end
295
353
 
354
+ desc "config", "Show current configuration"
355
+ def config
356
+ config = Config.instance
357
+
358
+ say "\nConfiguration Settings:", :cyan
359
+ say "-" * 40
360
+
361
+ if config.config_exists?
362
+ say "Config file: #{config.config_file_path}", :green
363
+ else
364
+ say "Config file: None (using defaults)", :yellow
365
+ end
366
+
367
+ say "\nPaths:", :cyan
368
+ say " Database: #{config.database_path}"
369
+ say " Models: #{config.models_dir}"
370
+ say " History: #{config.history_file}"
371
+
372
+ say "\nEmbeddings:", :cyan
373
+ say " Model: #{config.embedding_model}"
374
+ say " Chunk size: #{config.chunk_size}"
375
+ say " Chunk overlap: #{config.chunk_overlap}"
376
+
377
+ say "\nLLM:", :cyan
378
+ say " Active profile: #{config.llm_profile_name}", :green
379
+ say " Provider: #{config.llm_provider}"
380
+ say " Model: #{config.llm_model}"
381
+ if config.available_profiles.size > 1
382
+ say " Available profiles: #{config.available_profiles.join(', ')}"
383
+ end
384
+
385
+ say "\nUMAP:", :cyan
386
+ say " Reduced dimensions: #{config.get('umap.reduced_dimensions', Ragnar::DEFAULT_REDUCED_DIMENSIONS)}"
387
+ say " N neighbors: #{config.get('umap.n_neighbors', 15)}"
388
+ say " Min distance: #{config.get('umap.min_dist', 0.1)}"
389
+
390
+ say "\nQuery:", :cyan
391
+ say " Top K: #{config.query_top_k}"
392
+ say " Query rewriting: #{config.enable_query_rewriting?}"
393
+ say " Reranking: #{config.enable_reranking?}"
394
+ say " Reranker model: #{config.reranker_model}" if config.enable_reranking?
395
+ end
396
+
397
+ desc "model", "Show current LLM model information"
398
+ def model
399
+ config = Config.instance
400
+
401
+ say "\nLLM Model Configuration:", :cyan
402
+ say "-" * 40
403
+
404
+ say "\nProfile: #{config.llm_profile_name}", :green
405
+ say " Provider: #{config.llm_provider}"
406
+ say " Model: #{config.llm_model}"
407
+
408
+ # Only show GGUF/local file info for local providers
409
+ if config.llm_provider == 'red_candle'
410
+ say "\nEmbedding Model: #{config.embedding_model}"
411
+
412
+ # Check if model files exist in HuggingFace cache
413
+ hf_cache = File.expand_path("~/.cache/huggingface/hub")
414
+ model_dir = config.llm_model.gsub("/", "--")
415
+ model_cache = File.join(hf_cache, "models--#{model_dir}")
416
+ if Dir.exist?(model_cache)
417
+ say "\nModel cached: #{model_cache}", :green
418
+ else
419
+ say "\nModel not yet downloaded (will download on first use)", :yellow
420
+ end
421
+ else
422
+ api_key = config.llm_api_key
423
+ env_key = case config.llm_provider
424
+ when 'anthropic' then ENV['ANTHROPIC_API_KEY']
425
+ when 'openai' then ENV['OPENAI_API_KEY']
426
+ end
427
+ has_key = api_key || env_key
428
+ say "\nAPI key: #{has_key ? 'configured' : 'not set'}", has_key ? :green : :red
429
+ end
430
+ end
431
+
432
+ desc "profile [NAME]", "Show or switch LLM profile"
433
+ def profile(name = nil)
434
+ config = Config.instance
435
+
436
+ if name
437
+ begin
438
+ config.set_active_profile(name)
439
+ LLMManager.instance.clear_cache
440
+ say "Switched to profile: #{name}", :green
441
+ say " Provider: #{config.llm_provider}"
442
+ say " Model: #{config.llm_model}"
443
+ rescue ArgumentError => e
444
+ say e.message, :red
445
+ end
446
+ else
447
+ say "\nLLM Profiles:", :cyan
448
+ say "-" * 40
449
+ config.llm_profiles.each do |pname, pconfig|
450
+ active = pname == config.llm_profile_name ? " (active)" : ""
451
+ say " #{pname}#{active}", active.empty? ? :white : :green
452
+ say " Provider: #{pconfig['provider']}"
453
+ say " Model: #{pconfig['model']}"
454
+ end
455
+ end
456
+ end
457
+
458
+ desc "verbose", "Toggle verbose mode on/off"
459
+ def verbose
460
+ @@verbose_mode = !@@verbose_mode
461
+ say "Verbose mode: #{@@verbose_mode ? 'on' : 'off'}", @@verbose_mode ? :green : :yellow
462
+ end
463
+
464
+ desc "clear-cache", "Clear cached instances (useful in interactive mode)"
465
+ def clear_cache_command
466
+ clear_cache
467
+ say "Cache cleared. Next commands will create fresh instances.", :green
468
+ end
469
+
470
+ desc "reset", "Reset Ragnar data (database, models, cache)"
471
+ option :all, type: :boolean, default: false, aliases: "-a", desc: "Reset everything (database, models, cache)"
472
+ option :database, type: :boolean, default: false, aliases: "-d", desc: "Reset database only"
473
+ option :models, type: :boolean, default: false, aliases: "-m", desc: "Reset UMAP models only"
474
+ option :cache, type: :boolean, default: false, aliases: "-c", desc: "Clear cache only"
475
+ option :force, type: :boolean, default: false, aliases: "-f", desc: "Skip confirmation prompt"
476
+ def reset
477
+ # Determine what to reset
478
+ reset_all = options[:all]
479
+ reset_db = options[:database] || reset_all
480
+ reset_models = options[:models] || reset_all
481
+ reset_cache = options[:cache] || reset_all
482
+
483
+ # If no specific options, default to all
484
+ if !reset_db && !reset_models && !reset_cache
485
+ reset_all = true
486
+ reset_db = reset_models = reset_cache = true
487
+ end
488
+
489
+ # Build confirmation message
490
+ items_to_reset = []
491
+ items_to_reset << "database" if reset_db
492
+ items_to_reset << "UMAP models" if reset_models
493
+ items_to_reset << "cache" if reset_cache
494
+
495
+ # Get paths that will be affected
496
+ config = Config.instance
497
+ db_path = options[:db_path] || config.database_path
498
+ model_path = File.join(config.models_dir, "umap_model.bin")
499
+
500
+ # Show what will be deleted
501
+ say "\nWARNING: This will delete the following:", :red
502
+ say "-" * 40
503
+
504
+ if reset_db
505
+ say "Database: #{db_path}", :cyan
506
+ if File.exist?(db_path)
507
+ stats = Database.new(db_path).get_stats rescue nil
508
+ if stats
509
+ say " (#{stats[:total_documents]} documents, #{stats[:total_chunks]} chunks)", :white
510
+ end
511
+ else
512
+ say " (does not exist)", :white
513
+ end
514
+ end
515
+
516
+ if reset_models
517
+ say "UMAP models:", :cyan
518
+ model_files = [
519
+ model_path,
520
+ model_path.sub(/\.bin$/, '_metadata.json'),
521
+ model_path.sub(/\.bin$/, '_embeddings.json') # Old format, if exists
522
+ ]
523
+ model_files.each do |file|
524
+ if File.exist?(file)
525
+ say " #{file} (#{(File.size(file) / 1024.0).round(1)} KB)", :white
526
+ end
527
+ end
528
+ if model_files.none? { |f| File.exist?(f) }
529
+ say " (no models found)", :white
530
+ end
531
+ end
532
+
533
+ if reset_cache
534
+ cache_dir = File.expand_path("~/.cache/ragnar")
535
+ say "Cache directory: #{cache_dir}", :cyan
536
+ if Dir.exist?(cache_dir)
537
+ cache_size = Dir.glob(File.join(cache_dir, "**/*"))
538
+ .select { |f| File.file?(f) }
539
+ .sum { |f| File.size(f) } / 1024.0 / 1024.0
540
+ say " (#{cache_size.round(1)} MB)", :white
541
+ else
542
+ say " (does not exist)", :white
543
+ end
544
+ end
545
+
546
+ say "-" * 40
547
+
548
+ # Ask for confirmation unless --force
549
+ unless options[:force]
550
+ message = "\nAre you sure you want to reset #{items_to_reset.join(', ')}?"
551
+
552
+ # Check if we're in interactive mode
553
+ if ENV['THOR_INTERACTIVE_SESSION'] == 'true'
554
+ # In interactive mode, use a simple prompt
555
+ say message, :yellow
556
+ response = ask("Type 'yes' to confirm, anything else to cancel:", :yellow)
557
+ confirmed = response.downcase == 'yes'
558
+ else
559
+ # In CLI mode, use Thor's yes? method
560
+ confirmed = yes?(message + " (y/N)", :yellow)
561
+ end
562
+
563
+ unless confirmed
564
+ say "\nReset cancelled.", :cyan
565
+ return
566
+ end
567
+ end
568
+
569
+ # Perform the reset
570
+ say "\nResetting...", :green
571
+
572
+ if reset_db && File.exist?(db_path)
573
+ say "Removing database: #{db_path}"
574
+ FileUtils.rm_rf(db_path)
575
+ say " ✓ Database removed", :green
576
+ end
577
+
578
+ if reset_models
579
+ model_files = [
580
+ model_path,
581
+ model_path.sub(/\.bin$/, '_metadata.json'),
582
+ model_path.sub(/\.bin$/, '_embeddings.json')
583
+ ]
584
+ model_files.each do |file|
585
+ if File.exist?(file)
586
+ say "Removing model file: #{file}"
587
+ FileUtils.rm_f(file)
588
+ say " ✓ Removed", :green
589
+ end
590
+ end
591
+ end
592
+
593
+ if reset_cache
594
+ # Clear in-memory cache
595
+ clear_cache
596
+
597
+ # Optionally clear cache directory (but preserve history)
598
+ cache_dir = File.expand_path("~/.cache/ragnar")
599
+ if Dir.exist?(cache_dir)
600
+ # Preserve history file
601
+ history_file = File.join(cache_dir, "history")
602
+ history_content = File.read(history_file) if File.exist?(history_file)
603
+
604
+ # Remove cache directory contents except history
605
+ Dir.glob(File.join(cache_dir, "*")).each do |item|
606
+ next if File.basename(item) == "history"
607
+ if File.directory?(item)
608
+ FileUtils.rm_rf(item)
609
+ else
610
+ FileUtils.rm_f(item)
611
+ end
612
+ say "Removed cache item: #{File.basename(item)}", :green
613
+ end
614
+ end
615
+ say " ✓ Cache cleared", :green
616
+ end
617
+
618
+ say "\nReset complete!", :green
619
+ say "You can now start fresh with 'ragnar index <path>'", :cyan
620
+ end
621
+
622
+ desc "init-config", "Generate a configuration file with current defaults"
623
+ option :global, type: :boolean, default: false, aliases: "-g", desc: "Create global config in home directory"
624
+ option :force, type: :boolean, default: false, aliases: "-f", desc: "Overwrite existing config file"
625
+ def init_config
626
+ config = Config.instance
627
+
628
+ if options[:global]
629
+ config_path = File.expand_path('~/.ragnar.yml')
630
+ else
631
+ config_path = File.join(Dir.pwd, '.ragnar.yml')
632
+ end
633
+
634
+ if File.exist?(config_path) && !options[:force]
635
+ say "Config file already exists at: #{config_path}", :yellow
636
+ say "Use --force to overwrite, or choose a different location.", :yellow
637
+ return
638
+ end
639
+
640
+ generated_path = config.generate_config_file(config_path)
641
+ say "Config file created at: #{generated_path}", :green
642
+ say "Edit this file to customize Ragnar's behavior.", :cyan
643
+
644
+ if config.config_exists?
645
+ say "\nNote: Currently using config from: #{config.config_file_path}", :yellow
646
+ end
647
+ end
648
+
296
649
  private
297
650
 
651
+ def apply_profile!
652
+ return unless options[:profile]
653
+ Config.instance.set_active_profile(options[:profile])
654
+ LLMManager.instance.clear_cache
655
+ end
656
+
657
+ # Cached instance helpers for interactive mode
658
+ def get_cached_database(db_path = nil)
659
+ # Use config default if no path provided
660
+ db_path ||= Config.instance.database_path
661
+
662
+ # Cache database per path - clear cache if path changes
663
+ if @@cached_db_path != db_path
664
+ @@cached_database = nil
665
+ @@cached_db_path = db_path
666
+ @@cached_query_processor = nil # Also clear dependent caches
667
+ end
668
+
669
+ @@cached_database ||= Database.new(db_path)
670
+ end
671
+
672
+ def get_cached_embedder(model_name = nil)
673
+ # Use config default if no model specified
674
+ model_name ||= Config.instance.embedding_model
675
+ @@cached_embedder ||= Embedder.new(model_name: model_name)
676
+ end
677
+
678
+ def get_cached_llm_manager
679
+ @@cached_llm_manager ||= LLMManager.instance
680
+ end
681
+
682
+ def get_cached_query_processor(db_path = nil)
683
+ # Use config default if no path provided
684
+ db_path ||= Config.instance.database_path
685
+
686
+ # Cache query processor per database path
687
+ if @@cached_db_path != db_path || @@cached_query_processor.nil?
688
+ @@cached_query_processor = QueryProcessor.new(db_path: db_path)
689
+ end
690
+
691
+ @@cached_query_processor
692
+ end
693
+
694
+ def clear_cache
695
+ @@cached_database = nil
696
+ @@cached_embedder = nil
697
+ @@cached_llm_manager = nil
698
+ @@cached_query_processor = nil
699
+ @@cached_db_path = nil
700
+ end
701
+
702
+
703
+ def summarize_topic(topic, chat)
704
+ # Get representative documents for context
705
+ sample_docs = topic.representative_docs(k: 3)
706
+
707
+ # Simple, clear prompt for summarization
708
+ prompt = <<~PROMPT
709
+ Summarize what connects these documents in 1-2 sentences:
710
+
711
+ Key terms: #{topic.terms.first(5).join(', ')}
712
+
713
+ Documents:
714
+ #{sample_docs.map.with_index { |doc, i| "#{i+1}. #{doc}" }.join("\n")}
715
+
716
+ Summary:
717
+ PROMPT
718
+
719
+ begin
720
+ summary = chat.ask(prompt).content.strip
721
+ # Clean up common artifacts
722
+ summary = summary.lines.first&.strip || "Related documents"
723
+ summary = summary.gsub(/^(Summary:|Topic:|Documents:)/i, '').strip
724
+ summary.empty? ? "Documents about #{topic.terms.first(2).join(' and ')}" : summary
725
+ rescue => e
726
+ "Documents about #{topic.terms.first(2).join(' and ')}"
727
+ end
728
+ end
729
+
298
730
  def fetch_all_documents(database)
299
731
  # Temporary workaround to get all documents
300
732
  # In production, we'd add a proper method to Database class
@@ -321,9 +753,12 @@ module Ragnar
321
753
  []
322
754
  end
323
755
 
324
- def display_topics(topics)
756
+ def display_topics(topics, show_summaries: false)
325
757
  say "\n" + "="*60, :green
326
758
  say "Topic Analysis Results", :cyan
759
+ if show_summaries
760
+ say " (with LLM-generated summaries)", :yellow
761
+ end
327
762
  say "="*60, :green
328
763
 
329
764
  if topics.empty?
@@ -342,21 +777,21 @@ module Ragnar
342
777
  say "\n" + "─" * 40, :blue
343
778
  say "MAJOR TOPICS (≥20 docs)", :blue
344
779
  say "─" * 40, :blue
345
- display_topic_group(large_topics, :cyan)
780
+ display_topic_group(large_topics, :cyan, show_summaries: show_summaries)
346
781
  end
347
782
 
348
783
  if medium_topics.any?
349
784
  say "\n" + "─" * 40, :yellow
350
785
  say "MEDIUM TOPICS (10-19 docs)", :yellow
351
786
  say "─" * 40, :yellow
352
- display_topic_group(medium_topics, :yellow)
787
+ display_topic_group(medium_topics, :yellow, show_summaries: show_summaries)
353
788
  end
354
789
 
355
790
  if small_topics.any?
356
791
  say "\n" + "─" * 40, :white
357
792
  say "MINOR TOPICS (<10 docs)", :white
358
793
  say "─" * 40, :white
359
- display_topic_group(small_topics, :white)
794
+ display_topic_group(small_topics, :white, show_summaries: show_summaries)
360
795
  end
361
796
 
362
797
  # Summary statistics
@@ -380,10 +815,18 @@ module Ragnar
380
815
  say " Small (<10): #{small_topics.length} topics, #{small_topics.sum(&:size)} docs"
381
816
  end
382
817
 
383
- def display_topic_group(topics, color)
818
+ def display_topic_group(topics, color, show_summaries: false)
384
819
  topics.sort_by { |t| -t.size }.each_with_index do |topic, idx|
385
820
  say "\n#{topic.label || 'Unlabeled'} (#{topic.size} docs)", color
386
821
 
822
+ # Show LLM summary if available
823
+ if show_summaries
824
+ summary = topic.instance_variable_get(:@summary)
825
+ if summary
826
+ say " Summary: #{summary}", :green
827
+ end
828
+ end
829
+
387
830
  # Show coherence as a bar
388
831
  if topic.coherence > 0
389
832
  coherence_pct = (topic.coherence * 100).round(0)
@@ -395,8 +838,8 @@ module Ragnar
395
838
  # Compact term display
396
839
  say " Terms: #{topic.terms.first(6).join(' • ')}" if topic.terms.any?
397
840
 
398
- # Short sample
399
- if topic.representative_docs(k: 1).any?
841
+ # Short sample (unless we showed a summary)
842
+ if !show_summaries && topic.representative_docs(k: 1).any?
400
843
  preview = topic.representative_docs(k: 1).first
401
844
  preview = preview[0..100] + "..." if preview.length > 100
402
845
  say " \"#{preview}\"", :white
@@ -404,25 +847,34 @@ module Ragnar
404
847
  end
405
848
  end
406
849
 
407
- def export_topics(topics, format)
850
+ def export_topics(topics, format, embeddings: nil, cluster_ids: nil)
408
851
  case format.downcase
409
852
  when 'json'
410
853
  export_topics_json(topics)
411
854
  when 'html'
412
- export_topics_html(topics)
855
+ export_topics_html(topics, embeddings: embeddings, cluster_ids: cluster_ids)
413
856
  else
414
857
  say "Unknown export format: #{format}. Use 'json' or 'html'.", :red
415
858
  end
416
859
  end
417
860
 
418
861
  def export_topics_json(topics)
862
+ topics_data = topics.map do |topic|
863
+ topic_hash = topic.to_h
864
+ # Add summary if it exists
865
+ summary = topic.instance_variable_get(:@summary)
866
+ topic_hash[:summary] = summary if summary
867
+ topic_hash
868
+ end
869
+
419
870
  data = {
420
871
  generated_at: Time.now.iso8601,
421
- topics: topics.map(&:to_h),
872
+ topics: topics_data,
422
873
  summary: {
423
874
  total_topics: topics.length,
424
875
  total_documents: topics.sum(&:size),
425
- average_size: (topics.sum(&:size).to_f / topics.length).round(1)
876
+ average_size: (topics.sum(&:size).to_f / topics.length).round(1),
877
+ has_summaries: topics.any? { |t| t.instance_variable_get(:@summary) }
426
878
  }
427
879
  }
428
880
 
@@ -431,9 +883,9 @@ module Ragnar
431
883
  say "Topics exported to: #{filename}", :green
432
884
  end
433
885
 
434
- def export_topics_html(topics)
886
+ def export_topics_html(topics, embeddings: nil, cluster_ids: nil)
435
887
  # Generate self-contained HTML with D3.js visualization
436
- html = generate_topic_visualization_html(topics)
888
+ html = generate_topic_visualization_html(topics, embeddings: embeddings, cluster_ids: cluster_ids)
437
889
 
438
890
  filename = "topics_#{Time.now.strftime('%Y%m%d_%H%M%S')}.html"
439
891
  File.write(filename, html)
@@ -446,113 +898,5 @@ module Ragnar
446
898
  end
447
899
  end
448
900
 
449
- def generate_topic_visualization_html(topics)
450
- # Convert topics to JSON for D3.js
451
- topics_json = topics.map do |topic|
452
- {
453
- id: topic.id,
454
- label: topic.label || "Topic #{topic.id}",
455
- size: topic.size,
456
- terms: topic.terms.first(10),
457
- coherence: topic.coherence,
458
- samples: topic.representative_docs(k: 2).map { |d| d[0..200] }
459
- }
460
- end.to_json
461
-
462
- # HTML template with embedded D3.js
463
- <<~HTML
464
- <!DOCTYPE html>
465
- <html>
466
- <head>
467
- <meta charset="utf-8">
468
- <title>Topic Visualization</title>
469
- <script src="https://d3js.org/d3.v7.min.js"></script>
470
- <style>
471
- body { font-family: -apple-system, sans-serif; margin: 20px; }
472
- #viz { width: 100%; height: 500px; border: 1px solid #ddd; }
473
- .topic { cursor: pointer; }
474
- .topic:hover { opacity: 0.8; }
475
- #details { margin-top: 20px; padding: 15px; background: #f5f5f5; }
476
- .term { display: inline-block; margin: 5px; padding: 5px 10px; background: #e0e0e0; border-radius: 3px; }
477
- </style>
478
- </head>
479
- <body>
480
- <h1>Topic Analysis Results</h1>
481
- <div id="viz"></div>
482
- <div id="details">Click on a topic to see details</div>
483
-
484
- <script>
485
- const data = #{topics_json};
486
-
487
- // Create bubble chart
488
- const width = document.getElementById('viz').clientWidth;
489
- const height = 500;
490
-
491
- const svg = d3.select("#viz")
492
- .append("svg")
493
- .attr("width", width)
494
- .attr("height", height);
495
-
496
- // Create scale for bubble sizes
497
- const sizeScale = d3.scaleSqrt()
498
- .domain([0, d3.max(data, d => d.size)])
499
- .range([10, 50]);
500
-
501
- // Create color scale
502
- const colorScale = d3.scaleSequential(d3.interpolateViridis)
503
- .domain([0, 1]);
504
-
505
- // Create force simulation
506
- const simulation = d3.forceSimulation(data)
507
- .force("x", d3.forceX(width / 2).strength(0.05))
508
- .force("y", d3.forceY(height / 2).strength(0.05))
509
- .force("collide", d3.forceCollide(d => sizeScale(d.size) + 2));
510
-
511
- // Create bubbles
512
- const bubbles = svg.selectAll(".topic")
513
- .data(data)
514
- .enter().append("g")
515
- .attr("class", "topic");
516
-
517
- bubbles.append("circle")
518
- .attr("r", d => sizeScale(d.size))
519
- .attr("fill", d => colorScale(d.coherence))
520
- .attr("stroke", "#fff")
521
- .attr("stroke-width", 2);
522
-
523
- bubbles.append("text")
524
- .text(d => d.label)
525
- .attr("text-anchor", "middle")
526
- .attr("dy", ".3em")
527
- .style("font-size", d => Math.min(sizeScale(d.size) / 3, 14) + "px");
528
-
529
- // Add click handler
530
- bubbles.on("click", function(event, d) {
531
- showDetails(d);
532
- });
533
-
534
- // Update positions
535
- simulation.on("tick", () => {
536
- bubbles.attr("transform", d => `translate(${d.x},${d.y})`);
537
- });
538
-
539
- // Show topic details
540
- function showDetails(topic) {
541
- const details = document.getElementById('details');
542
- details.innerHTML = `
543
- <h2>${topic.label}</h2>
544
- <p><strong>Documents:</strong> ${topic.size}</p>
545
- <p><strong>Coherence:</strong> ${(topic.coherence * 100).toFixed(1)}%</p>
546
- <p><strong>Top Terms:</strong></p>
547
- <div>${topic.terms.map(t => `<span class="term">${t}</span>`).join('')}</div>
548
- <p><strong>Sample Documents:</strong></p>
549
- ${topic.samples.map(s => `<p style="font-size: 0.9em; color: #666;">"${s}..."</p>`).join('')}
550
- `;
551
- }
552
- </script>
553
- </body>
554
- </html>
555
- HTML
556
- end
557
901
  end
558
- end
902
+ end