ragnar-cli 0.1.0.pre.3 → 0.1.0.pre.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,86 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "thor"
4
+
5
+ module Ragnar
6
+ class CLI < Thor
7
+ class Umap < Thor
8
+ desc "train", "Train UMAP model on existing embeddings"
9
+ option :db_path, type: :string, desc: "Path to Lance database (default from config)"
10
+ option :n_components, type: :numeric, default: 50, desc: "Number of dimensions for reduction"
11
+ option :n_neighbors, type: :numeric, default: 15, desc: "Number of neighbors for UMAP"
12
+ option :min_dist, type: :numeric, default: 0.1, desc: "Minimum distance for UMAP"
13
+ option :model_path, type: :string, desc: "Path to save UMAP model"
14
+ def train
15
+ say "Training UMAP model on embeddings...", :green
16
+
17
+ config = Config.instance
18
+ model_path = if options[:model_path]
19
+ options[:model_path]
20
+ else
21
+ File.join(config.models_dir, "umap_model.bin")
22
+ end
23
+
24
+ processor = UmapProcessor.new(
25
+ db_path: options[:db_path] || config.database_path,
26
+ model_path: model_path
27
+ )
28
+
29
+ begin
30
+ stats = processor.train(
31
+ n_components: options[:n_components] || 50,
32
+ n_neighbors: options[:n_neighbors] || 15,
33
+ min_dist: options[:min_dist] || 0.1
34
+ )
35
+
36
+ say "\nUMAP training complete!", :green
37
+ say "Embeddings processed: #{stats[:embeddings_count]}"
38
+ say "Original dimensions: #{stats[:original_dims]}"
39
+ say "Reduced dimensions: #{stats[:reduced_dims]}"
40
+ say "Model saved to: #{processor.model_path}"
41
+ rescue => e
42
+ say "Error during UMAP training: #{e.message}", :red
43
+ exit 1
44
+ end
45
+ end
46
+
47
+ desc "apply", "Apply trained UMAP model to reduce embedding dimensions"
48
+ option :db_path, type: :string, desc: "Path to Lance database (default from config)"
49
+ option :model_path, type: :string, desc: "Path to UMAP model"
50
+ option :batch_size, type: :numeric, default: 100, desc: "Batch size for processing"
51
+ def apply
52
+ config = Config.instance
53
+ model_path = if options[:model_path]
54
+ options[:model_path]
55
+ else
56
+ File.join(config.models_dir, "umap_model.bin")
57
+ end
58
+
59
+ unless File.exist?(model_path)
60
+ say "Error: UMAP model not found at: #{model_path}", :red
61
+ say "Please run 'ragnar umap train' first to create a model.", :yellow
62
+ exit 1
63
+ end
64
+
65
+ say "Applying UMAP model to embeddings...", :green
66
+
67
+ processor = UmapProcessor.new(
68
+ db_path: options[:db_path] || config.database_path,
69
+ model_path: model_path
70
+ )
71
+
72
+ begin
73
+ stats = processor.apply(batch_size: options[:batch_size] || 100)
74
+
75
+ say "\nUMAP application complete!", :green
76
+ say "Embeddings processed: #{stats[:processed]}"
77
+ say "Already processed: #{stats[:skipped]}"
78
+ say "Errors: #{stats[:errors]}" if stats[:errors] > 0
79
+ rescue => e
80
+ say "Error applying UMAP: #{e.message}", :red
81
+ exit 1
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,184 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ragnar
4
+ module CLIVisualization
5
+ def generate_topic_visualization_html(topics, embeddings: nil, cluster_ids: nil)
6
+ # Convert topics to JSON for D3.js
7
+ topics_json = topics.map do |topic|
8
+ topic_data = {
9
+ id: topic.id,
10
+ label: topic.label || "Topic #{topic.id}",
11
+ size: topic.size,
12
+ terms: topic.terms.first(10),
13
+ coherence: topic.coherence,
14
+ samples: topic.representative_docs(k: 2).map { |d| d[0..200] }
15
+ }
16
+
17
+ # Add summary if it exists
18
+ summary = topic.instance_variable_get(:@summary)
19
+ topic_data[:summary] = summary if summary
20
+
21
+ topic_data
22
+ end.to_json
23
+
24
+ # HTML template with enhanced visualization
25
+ <<~HTML
26
+ <!DOCTYPE html>
27
+ <html>
28
+ <head>
29
+ <meta charset="utf-8">
30
+ <title>Topic Visualization</title>
31
+ <script src="https://d3js.org/d3.v7.min.js"></script>
32
+ <style>
33
+ body {
34
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
35
+ margin: 20px;
36
+ background: #f8f9fa;
37
+ }
38
+ .container {
39
+ max-width: 1400px;
40
+ margin: 0 auto;
41
+ }
42
+ h1 {
43
+ color: #2c3e50;
44
+ margin-bottom: 10px;
45
+ }
46
+ .viz-panel {
47
+ background: white;
48
+ border-radius: 8px;
49
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
50
+ padding: 15px;
51
+ }
52
+ #bubble-viz {
53
+ height: 500px;
54
+ }
55
+ .topic { cursor: pointer; }
56
+ .topic:hover { opacity: 0.8; }
57
+ #details {
58
+ background: white;
59
+ padding: 20px;
60
+ border-radius: 8px;
61
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
62
+ margin-top: 20px;
63
+ }
64
+ .term {
65
+ display: inline-block;
66
+ margin: 5px;
67
+ padding: 5px 10px;
68
+ background: #e3f2fd;
69
+ border-radius: 3px;
70
+ color: #1976d2;
71
+ font-size: 14px;
72
+ }
73
+ </style>
74
+ </head>
75
+ <body>
76
+ <div class="container">
77
+ <h1>Topic Analysis Results</h1>
78
+
79
+ <div class="viz-panel">
80
+ <h3>Topic Overview</h3>
81
+ <div id="bubble-viz"></div>
82
+ </div>
83
+
84
+ <div id="details">Click on a topic to see details</div>
85
+ </div>
86
+
87
+ <script>
88
+ const topicsData = #{topics_json};
89
+
90
+ // Create bubble chart
91
+ function createBubbleChart() {
92
+ const width = document.getElementById('bubble-viz').clientWidth - 30;
93
+ const height = 470;
94
+
95
+ const svg = d3.select("#bubble-viz")
96
+ .append("svg")
97
+ .attr("width", width)
98
+ .attr("height", height);
99
+
100
+ // Create scale for bubble sizes
101
+ const sizeScale = d3.scaleSqrt()
102
+ .domain([0, d3.max(topicsData, d => d.size)])
103
+ .range([20, 60]);
104
+
105
+ // Create color scale
106
+ const colorScale = d3.scaleSequential(d3.interpolateTurbo)
107
+ .domain([0, 1]);
108
+
109
+ // Create force simulation
110
+ const simulation = d3.forceSimulation(topicsData)
111
+ .force("x", d3.forceX(width / 2).strength(0.05))
112
+ .force("y", d3.forceY(height / 2).strength(0.05))
113
+ .force("collide", d3.forceCollide(d => sizeScale(d.size) + 3));
114
+
115
+ // Create bubbles
116
+ const bubbles = svg.selectAll(".topic")
117
+ .data(topicsData)
118
+ .enter().append("g")
119
+ .attr("class", "topic");
120
+
121
+ bubbles.append("circle")
122
+ .attr("r", d => sizeScale(d.size))
123
+ .attr("fill", d => colorScale(d.coherence))
124
+ .attr("stroke", "#fff")
125
+ .attr("stroke-width", 2)
126
+ .style("filter", "drop-shadow(0px 2px 3px rgba(0,0,0,0.2))");
127
+
128
+ bubbles.append("text")
129
+ .text(d => d.label)
130
+ .attr("text-anchor", "middle")
131
+ .attr("dy", ".3em")
132
+ .style("font-size", d => Math.min(sizeScale(d.size) / 3, 14) + "px")
133
+ .style("fill", "white")
134
+ .style("font-weight", "500");
135
+
136
+ // Add click handler
137
+ bubbles.on("click", function(event, d) {
138
+ showDetails(d);
139
+ });
140
+
141
+ // Update positions
142
+ simulation.on("tick", () => {
143
+ bubbles.attr("transform", d => `translate(${d.x},${d.y})`);
144
+ });
145
+ }
146
+
147
+ // Show topic details
148
+ function showDetails(topic) {
149
+ const details = document.getElementById('details');
150
+ let summaryHtml = '';
151
+ if (topic.summary) {
152
+ summaryHtml = `
153
+ <p><strong>Summary:</strong></p>
154
+ <p style="font-size: 1.1em; color: #2c5234; padding: 15px; background: #e8f5e8; border-radius: 6px; border-left: 4px solid #4caf50; margin: 15px 0; line-height: 1.5;">${topic.summary}</p>
155
+ `;
156
+ }
157
+
158
+ details.innerHTML = `
159
+ <h2>${topic.label}</h2>
160
+ <p><strong>Documents:</strong> ${topic.size}</p>
161
+ <p><strong>Coherence:</strong> ${(topic.coherence * 100).toFixed(1)}%</p>
162
+ ${summaryHtml}
163
+ <p><strong>Top Terms:</strong></p>
164
+ <div>${topic.terms.map(t => `<span class="term">${t}</span>`).join('')}</div>
165
+ <p><strong>Sample Documents:</strong></p>
166
+ ${topic.samples.map(s => `<p style="font-size: 0.9em; color: #666; padding: 10px; background: #f5f5f5; border-radius: 4px; margin: 10px 0;">"${s}..."</p>`).join('')}
167
+ `;
168
+ }
169
+
170
+ // Initialize visualizations
171
+ createBubbleChart();
172
+
173
+ // Show first topic details by default
174
+ if (topicsData.length > 0) {
175
+ showDetails(topicsData[0]);
176
+ }
177
+ </script>
178
+ </body>
179
+ </html>
180
+ HTML
181
+ end
182
+
183
+ end
184
+ end
@@ -0,0 +1,320 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'yaml'
4
+ require 'pathname'
5
+ require 'singleton'
6
+ require 'fileutils'
7
+
8
+ module Ragnar
9
+ class Config
10
+ include Singleton
11
+
12
+ CONFIG_FILENAMES = ['.ragnar.yml', '.ragnarrc.yml', 'ragnar.yml'].freeze
13
+
14
+ def initialize
15
+ @config = load_config
16
+ ensure_directories_exist
17
+ end
18
+
19
+ # Main config access method
20
+ def get(key_path, default = nil)
21
+ keys = key_path.split('.')
22
+ value = keys.reduce(@config) { |config, key| config&.dig(key) }
23
+
24
+ # Use default only if value is nil (not false)
25
+ result = value.nil? ? default : value
26
+
27
+ # Expand paths that start with ~
28
+ if result.is_a?(String) && result.start_with?('~')
29
+ File.expand_path(result)
30
+ else
31
+ result
32
+ end
33
+ end
34
+
35
+ # Common config accessors
36
+ def database_path
37
+ get('storage.database_path', default_database_path)
38
+ end
39
+
40
+ def history_file
41
+ get('storage.history_file', default_history_file)
42
+ end
43
+
44
+ def models_dir
45
+ get('storage.models_dir', default_models_dir)
46
+ end
47
+
48
+ def embedding_model
49
+ get('embeddings.model', Ragnar::DEFAULT_EMBEDDING_MODEL)
50
+ end
51
+
52
+ def chunk_size
53
+ get('embeddings.chunk_size', Ragnar::DEFAULT_CHUNK_SIZE)
54
+ end
55
+
56
+ def chunk_overlap
57
+ get('embeddings.chunk_overlap', Ragnar::DEFAULT_CHUNK_OVERLAP)
58
+ end
59
+
60
+ # LLM Profile support
61
+ # Profiles allow switching between LLM providers/models via --profile flag
62
+ # Backwards compatible: flat llm.provider/llm.default_model still work if no profiles defined
63
+
64
+ def set_active_profile(name)
65
+ name = name.to_s
66
+ profiles = llm_profiles
67
+ unless profiles.key?(name)
68
+ available = profiles.keys.join(', ')
69
+ raise ArgumentError, "Unknown profile '#{name}'. Available profiles: #{available}"
70
+ end
71
+ @active_profile = name
72
+ end
73
+
74
+ def llm_profile_name
75
+ @active_profile || get('llm.default_profile', nil) || llm_profiles.keys.first || 'default'
76
+ end
77
+
78
+ def llm_profiles
79
+ configured = get('llm.profiles', nil)
80
+ if configured.is_a?(Hash) && !configured.empty?
81
+ configured
82
+ else
83
+ # Backwards compat: synthesize a profile from flat keys
84
+ {
85
+ 'default' => {
86
+ 'provider' => get('llm.provider', 'red_candle'),
87
+ 'model' => get('llm.default_model', 'MaziyarPanahi/Qwen3-4B-GGUF')
88
+ }
89
+ }
90
+ end
91
+ end
92
+
93
+ def llm_profile
94
+ llm_profiles[llm_profile_name] || llm_profiles.values.first
95
+ end
96
+
97
+ def available_profiles
98
+ llm_profiles.keys
99
+ end
100
+
101
+ # Create a new RubyLLM chat instance with the active profile's settings
102
+ def create_chat
103
+ api_key = llm_api_key
104
+ provider = llm_provider.to_sym
105
+
106
+ # Configure RubyLLM with the API key if present
107
+ if api_key
108
+ configure_provider_api_key(provider, api_key)
109
+ end
110
+
111
+ RubyLLM.chat(provider: provider, model: llm_model)
112
+ end
113
+
114
+ def llm_provider
115
+ llm_profile&.dig('provider') || get('llm.provider', 'red_candle')
116
+ end
117
+
118
+ def llm_model
119
+ llm_profile&.dig('model') || get('llm.default_model', 'MaziyarPanahi/Qwen3-4B-GGUF')
120
+ end
121
+
122
+ def llm_gguf_file
123
+ get('llm.default_gguf_file', "Qwen3-4B.Q4_K_M.gguf")
124
+ end
125
+
126
+ def llm_api_key
127
+ llm_profile&.dig('api_key') || get('llm.api_key', nil)
128
+ end
129
+
130
+ def interactive_prompt
131
+ get('interactive.prompt', "ragnar> ")
132
+ end
133
+
134
+ def quiet_mode?
135
+ get('interactive.quiet_mode', true)
136
+ end
137
+
138
+ def show_progress?
139
+ get('output.show_progress', true)
140
+ end
141
+
142
+ def query_top_k
143
+ get('query.top_k', 3)
144
+ end
145
+
146
+ def enable_query_rewriting?
147
+ get('query.enable_query_rewriting', true)
148
+ end
149
+
150
+ def enable_reranking?
151
+ get('query.enable_reranking', true)
152
+ end
153
+
154
+ def reranker_model
155
+ get('query.reranker_model', 'BAAI/bge-reranker-base')
156
+ end
157
+
158
+ # Config file management
159
+ def config_file_path
160
+ @config_file_path
161
+ end
162
+
163
+ def config_exists?
164
+ !@config_file_path.nil?
165
+ end
166
+
167
+ def reload!
168
+ @config = load_config
169
+ ensure_directories_exist
170
+ end
171
+
172
+ # Generate a config file with current/default settings
173
+ def generate_config_file(path = nil)
174
+ path ||= File.expand_path('~/.ragnar.yml')
175
+
176
+ config_content = {
177
+ 'storage' => {
178
+ 'database_path' => '~/.cache/ragnar/database',
179
+ 'models_dir' => '~/.cache/ragnar/models',
180
+ 'history_file' => '~/.cache/ragnar/history'
181
+ },
182
+ 'embeddings' => {
183
+ 'model' => Ragnar::DEFAULT_EMBEDDING_MODEL,
184
+ 'chunk_size' => Ragnar::DEFAULT_CHUNK_SIZE,
185
+ 'chunk_overlap' => Ragnar::DEFAULT_CHUNK_OVERLAP
186
+ },
187
+ 'umap' => {
188
+ 'reduced_dimensions' => Ragnar::DEFAULT_REDUCED_DIMENSIONS,
189
+ 'n_neighbors' => 15,
190
+ 'min_dist' => 0.1,
191
+ 'model_filename' => 'umap_model.bin'
192
+ },
193
+ 'llm' => {
194
+ 'default_profile' => 'red_candle',
195
+ 'profiles' => {
196
+ 'red_candle' => {
197
+ 'provider' => 'red_candle',
198
+ 'model' => 'MaziyarPanahi/Qwen3-4B-GGUF'
199
+ },
200
+ 'opus' => {
201
+ 'provider' => 'anthropic',
202
+ 'model' => 'claude-opus-4-6'
203
+ },
204
+ 'sonnet' => {
205
+ 'provider' => 'anthropic',
206
+ 'model' => 'claude-sonnet-4-6'
207
+ }
208
+ }
209
+ },
210
+ 'query' => {
211
+ 'top_k' => 3,
212
+ 'enable_query_rewriting' => true,
213
+ 'enable_reranking' => true,
214
+ 'reranker_model' => 'BAAI/bge-reranker-base'
215
+ },
216
+ 'interactive' => {
217
+ 'prompt' => 'ragnar> ',
218
+ 'quiet_mode' => true
219
+ },
220
+ 'output' => {
221
+ 'show_progress' => true
222
+ }
223
+ }
224
+
225
+ # Ensure parent directory exists
226
+ FileUtils.mkdir_p(File.dirname(path))
227
+
228
+ # Write config file with comments
229
+ File.write(path, generate_yaml_with_comments(config_content))
230
+ path
231
+ end
232
+
233
+ private
234
+
235
+ def configure_provider_api_key(provider, api_key)
236
+ case provider
237
+ when :anthropic
238
+ RubyLLM.configure { |c| c.anthropic_api_key = api_key }
239
+ when :openai
240
+ RubyLLM.configure { |c| c.openai_api_key = api_key }
241
+ end
242
+ end
243
+
244
+ def load_config
245
+ @config_file_path = find_config_file
246
+
247
+ if @config_file_path && File.exist?(@config_file_path)
248
+ YAML.load_file(@config_file_path) || {}
249
+ else
250
+ {}
251
+ end
252
+ rescue => e
253
+ warn "Warning: Error loading config file #{@config_file_path}: #{e.message}"
254
+ {}
255
+ end
256
+
257
+ def find_config_file
258
+ # Search order: current directory → home directory
259
+ search_paths = [
260
+ Dir.pwd,
261
+ File.expand_path('~')
262
+ ]
263
+
264
+ search_paths.each do |dir|
265
+ CONFIG_FILENAMES.each do |filename|
266
+ path = File.join(dir, filename)
267
+ return path if File.exist?(path)
268
+ end
269
+ end
270
+
271
+ nil
272
+ end
273
+
274
+ def ensure_directories_exist
275
+ directories = [
276
+ database_path,
277
+ models_dir,
278
+ File.dirname(history_file)
279
+ ]
280
+
281
+ directories.each do |dir|
282
+ FileUtils.mkdir_p(dir) unless dir.nil?
283
+ end
284
+ end
285
+
286
+ def default_database_path
287
+ xdg_cache_home = ENV['XDG_CACHE_HOME'] || File.expand_path('~/.cache')
288
+ File.join(xdg_cache_home, 'ragnar', 'database')
289
+ end
290
+
291
+ def default_history_file
292
+ xdg_cache_home = ENV['XDG_CACHE_HOME'] || File.expand_path('~/.cache')
293
+ File.join(xdg_cache_home, 'ragnar', 'history')
294
+ end
295
+
296
+ def default_models_dir
297
+ xdg_cache_home = ENV['XDG_CACHE_HOME'] || File.expand_path('~/.cache')
298
+ File.join(xdg_cache_home, 'ragnar', 'models')
299
+ end
300
+
301
+ def generate_yaml_with_comments(config_hash)
302
+ yaml_content = YAML.dump(config_hash)
303
+
304
+ # Add header comment
305
+ commented = <<~HEADER
306
+ # Ragnar Configuration File
307
+ #
308
+ # This file configures default settings for Ragnar RAG system.
309
+ # Save as .ragnar.yml in your project directory or ~/.ragnar.yml for global settings.
310
+ #
311
+ # Search order: ./.ragnar.yml → ~/.ragnar.yml → built-in defaults
312
+ #
313
+ # All paths support ~ expansion (e.g., ~/.cache/ragnar/database)
314
+
315
+ HEADER
316
+
317
+ commented + yaml_content
318
+ end
319
+ end
320
+ end