ragnar-cli 0.1.0.pre.2 → 0.1.0.pre.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,24 +1,27 @@
1
- # Main entry point for topic modeling functionality
2
- # Designed for future extraction into a separate gem
1
+ # frozen_string_literal: true
3
2
 
4
- require_relative 'topic_modeling/topic'
5
- require_relative 'topic_modeling/term_extractor'
6
- require_relative 'topic_modeling/metrics'
7
- require_relative 'topic_modeling/topic_labeler'
8
- require_relative 'topic_modeling/engine'
3
+ # Topic modeling wrapper that delegates to the Topical gem
4
+ # This maintains backward compatibility while using the extracted library
5
+
6
+ require 'topical'
9
7
 
10
8
  module Ragnar
11
9
  module TopicModeling
10
+ # Re-export Topical classes for backward compatibility
11
+ Topic = Topical::Topic
12
+ Engine = Topical::Engine
13
+
14
+ # Re-export metrics module
15
+ Metrics = Topical::Metrics
12
16
 
13
17
  # Convenience method to create a new topic modeling engine
14
18
  def self.new(**options)
15
- Engine.new(**options)
19
+ Topical::Engine.new(**options)
16
20
  end
17
21
 
18
22
  # Extract topics from embeddings and documents (simple interface)
19
23
  def self.extract(embeddings:, documents:, **options)
20
- engine = Engine.new(**options)
21
- engine.fit(embeddings: embeddings, documents: documents)
24
+ Topical.extract(embeddings: embeddings, documents: documents, **options)
22
25
  end
23
26
  end
24
27
  end
@@ -1,4 +1,5 @@
1
1
  require 'json'
2
+ require 'time'
2
3
 
3
4
  module Ragnar
4
5
  class UmapProcessor
@@ -28,6 +29,67 @@ module Ragnar
28
29
 
29
30
  puts "Found #{embeddings.size} embeddings"
30
31
 
32
+ # Validate embeddings
33
+ embedding_dims = embeddings.map(&:size).uniq
34
+ if embedding_dims.size > 1
35
+ puts " ⚠️ Warning: Inconsistent embedding dimensions found: #{embedding_dims.inspect}"
36
+ puts " This may cause errors during UMAP training."
37
+ # Filter to only embeddings with the most common dimension
38
+ most_common_dim = embedding_dims.max_by { |dim| embeddings.count { |e| e.size == dim } }
39
+ embeddings = embeddings.select { |e| e.size == most_common_dim }
40
+ puts " Using only embeddings with #{most_common_dim} dimensions (#{embeddings.size} embeddings)"
41
+ end
42
+
43
+ # Check for nil or invalid values
44
+ invalid_count = 0
45
+ nan_count = 0
46
+ inf_count = 0
47
+
48
+ valid_embeddings = embeddings.select do |embedding|
49
+ if !embedding.is_a?(Array)
50
+ invalid_count += 1
51
+ false
52
+ elsif embedding.any? { |v| !v.is_a?(Numeric) }
53
+ invalid_count += 1
54
+ false
55
+ elsif embedding.any?(&:nan?)
56
+ nan_count += 1
57
+ false
58
+ elsif embedding.any? { |v| !v.finite? }
59
+ inf_count += 1
60
+ false
61
+ else
62
+ true
63
+ end
64
+ end
65
+
66
+ if valid_embeddings.size < embeddings.size
67
+ puts "\n ⚠️ Data quality issues detected:"
68
+ puts " • Invalid embeddings: #{invalid_count}" if invalid_count > 0
69
+ puts " • Embeddings with NaN: #{nan_count}" if nan_count > 0
70
+ puts " • Embeddings with Infinity: #{inf_count}" if inf_count > 0
71
+ puts " • Total removed: #{embeddings.size - valid_embeddings.size}"
72
+ puts " • Remaining valid: #{valid_embeddings.size}"
73
+
74
+ embeddings = valid_embeddings
75
+ end
76
+
77
+ if embeddings.empty?
78
+ raise "No valid embeddings found after validation.\n\n" \
79
+ "All embeddings contain invalid values (NaN, Infinity, or non-numeric).\n" \
80
+ "This suggests a problem with the embedding model or indexing process.\n\n" \
81
+ "Please try:\n" \
82
+ " 1. Re-indexing your documents: ragnar index <path> --force\n" \
83
+ " 2. Using a different embedding model\n" \
84
+ " 3. Checking your document content for unusual characters"
85
+ end
86
+
87
+ if embeddings.size < 10
88
+ raise "Too few valid embeddings (#{embeddings.size}) for UMAP training.\n\n" \
89
+ "UMAP requires at least 10 samples to work effectively.\n" \
90
+ "Please index more documents or check for data quality issues."
91
+ end
92
+
31
93
  # Adjust parameters based on the number of samples
32
94
  # UMAP requires n_neighbors < n_samples
33
95
  # Also, n_components should be less than n_samples for stability
@@ -55,6 +117,19 @@ module Ragnar
55
117
  embedding_matrix = embeddings
56
118
  original_dims = embeddings.first.size
57
119
 
120
+ # Ensure n_components is reasonable
121
+ if n_components >= original_dims
122
+ puts " ⚠️ Warning: n_components (#{n_components}) >= original dimensions (#{original_dims})"
123
+ n_components = [original_dims / 2, 50].min
124
+ puts " Reducing n_components to #{n_components}"
125
+ end
126
+
127
+ # For very high dimensional data, be more conservative
128
+ if original_dims > 500 && n_components > 50
129
+ puts " ⚠️ Note: High dimensional data (#{original_dims}D) being reduced to #{n_components}D"
130
+ puts " Consider using n_components <= 50 for stability"
131
+ end
132
+
58
133
  puts "\nTraining UMAP model..."
59
134
  puts " Original dimensions: #{original_dims}"
60
135
  puts " Target dimensions: #{n_components}"
@@ -64,14 +139,45 @@ module Ragnar
64
139
  # Perform the actual training using the class-based API
65
140
  puts " Training UMAP model (this may take a moment)..."
66
141
 
67
- @umap_instance = ClusterKit::Dimensionality::UMAP.new(
68
- n_components: n_components,
69
- n_neighbors: n_neighbors
70
- )
71
-
72
- @reduced_embeddings = @umap_instance.fit_transform(embedding_matrix)
73
-
74
- puts " ✓ UMAP training complete"
142
+ begin
143
+ @umap_instance = ClusterKit::Dimensionality::UMAP.new(
144
+ n_components: n_components,
145
+ n_neighbors: n_neighbors
146
+ )
147
+
148
+ @reduced_embeddings = @umap_instance.fit_transform(embedding_matrix)
149
+
150
+ puts " ✓ UMAP training complete"
151
+ rescue => e
152
+ # Provide helpful error message without exposing internal stack trace
153
+ error_msg = "\n❌ UMAP training failed\n\n"
154
+
155
+ if e.message.include?("index out of bounds")
156
+ error_msg += "The UMAP algorithm encountered an index out of bounds error.\n\n"
157
+ error_msg += "This typically happens when:\n"
158
+ error_msg += " • The embedding data contains invalid values (NaN, Infinity)\n"
159
+ error_msg += " • The parameters are incompatible with your data\n"
160
+ error_msg += " • There are duplicate or corrupted embeddings\n\n"
161
+ error_msg += "Suggested solutions:\n"
162
+ error_msg += " 1. Try with more conservative parameters:\n"
163
+ error_msg += " ragnar train-umap --n-components 10 --n-neighbors 5\n\n"
164
+ error_msg += " 2. Re-index your documents to regenerate embeddings:\n"
165
+ error_msg += " ragnar index <path> --force\n\n"
166
+ error_msg += " 3. Check your embedding model configuration\n\n"
167
+ error_msg += "Current parameters:\n"
168
+ error_msg += " • n_components: #{n_components}\n"
169
+ error_msg += " • n_neighbors: #{n_neighbors}\n"
170
+ error_msg += " • embeddings: #{embeddings.size} samples\n"
171
+ error_msg += " • dimensions: #{original_dims}\n"
172
+ else
173
+ error_msg += "Error: #{e.message}\n\n"
174
+ error_msg += "This may be due to incompatible parameters or data issues.\n"
175
+ error_msg += "Try using more conservative parameters:\n"
176
+ error_msg += " ragnar train-umap --n-components 10 --n-neighbors 5\n"
177
+ end
178
+
179
+ raise RuntimeError, error_msg
180
+ end
75
181
 
76
182
  # Store the parameters for saving
77
183
  @model_params = {
@@ -91,10 +197,10 @@ module Ragnar
91
197
  end
92
198
 
93
199
  def apply(batch_size: 100)
94
- # Load the trained UMAP model (reduced embeddings)
95
- reduced_embeddings = load_model
200
+ # Load the trained UMAP model
201
+ umap_model = load_umap_model
96
202
 
97
- puts "Applying saved UMAP embeddings to database..."
203
+ puts "Applying UMAP transformation to database documents..."
98
204
 
99
205
  # Get all embeddings from database
100
206
  all_docs = @database.get_embeddings
@@ -109,84 +215,95 @@ module Ragnar
109
215
  end
110
216
 
111
217
  puts "Found #{all_docs.size} documents in database"
112
- puts "Loaded #{reduced_embeddings.size} reduced embeddings from model"
113
218
 
114
- if all_docs.size != reduced_embeddings.size
115
- puts "⚠️ Warning: Mismatch between database documents (#{all_docs.size}) and model embeddings (#{reduced_embeddings.size})"
116
- puts " This suggests the model was trained on a different dataset."
117
- puts " Please retrain the UMAP model after indexing all your documents."
118
- return {
119
- processed: 0,
120
- skipped: 0,
121
- errors: 1
122
- }
219
+ # Process in batches for memory efficiency
220
+ processed_count = 0
221
+ error_count = 0
222
+ skipped_count = 0
223
+
224
+ all_docs.each_slice(batch_size) do |batch|
225
+ begin
226
+ # Extract embeddings
227
+ embeddings = batch.map { |d| d[:embedding] }
228
+
229
+ # Validate embeddings
230
+ valid_indices = []
231
+ embeddings_to_transform = []
232
+
233
+ embeddings.each_with_index do |emb, idx|
234
+ if emb.nil? || !emb.is_a?(Array) || emb.empty?
235
+ skipped_count += 1
236
+ next
237
+ end
238
+
239
+ if emb.any? { |v| !v.is_a?(Numeric) || v.nan? || !v.finite? }
240
+ skipped_count += 1
241
+ next
242
+ end
243
+
244
+ valid_indices << idx
245
+ embeddings_to_transform << emb
246
+ end
247
+
248
+ next if embeddings_to_transform.empty?
249
+
250
+ # Transform using the loaded UMAP model
251
+ reduced_embeddings = umap_model.transform(embeddings_to_transform)
252
+
253
+ # Prepare updates for valid documents
254
+ updates = valid_indices.map.with_index do |batch_idx, transform_idx|
255
+ {
256
+ id: batch[batch_idx][:id],
257
+ reduced_embedding: reduced_embeddings[transform_idx]
258
+ }
259
+ end
260
+
261
+ # Update database
262
+ @database.update_reduced_embeddings(updates)
263
+ processed_count += updates.size
264
+
265
+ puts " Processed batch: #{updates.size} documents transformed"
266
+ rescue => e
267
+ puts " ⚠️ Error processing batch: #{e.message}"
268
+ error_count += batch.size
269
+ end
123
270
  end
124
271
 
125
- # Prepare updates - match document IDs to reduced embeddings
126
- updates = all_docs.each_with_index.map do |doc, idx|
127
- {
128
- id: doc[:id],
129
- reduced_embedding: reduced_embeddings[idx]
130
- }
131
- end
132
-
133
- puts "Updating database with reduced embeddings..."
134
- @database.update_reduced_embeddings(updates)
272
+ puts "\nUMAP application complete:"
273
+ puts " ✓ Processed: #{processed_count} documents"
274
+ puts " ⚠️ Skipped: #{skipped_count} documents (invalid embeddings)" if skipped_count > 0
275
+ puts " ❌ Errors: #{error_count} documents" if error_count > 0
135
276
 
136
277
  {
137
- processed: updates.size,
138
- skipped: 0,
139
- errors: 0
278
+ processed: processed_count,
279
+ skipped: skipped_count,
280
+ errors: error_count
140
281
  }
141
282
  end
142
283
 
143
284
  private
144
285
 
145
- def process_batch(docs)
146
- # Extract embeddings
147
- embeddings = docs.map { |d| d[:embedding] }
148
-
149
- # Transform using UMAP
150
- # The transform method returns a 2D array where each row is a reduced embedding
151
- reduced = @umap_model.transform(embeddings)
152
-
153
- # Prepare updates
154
- updates = docs.each_with_index.map do |doc, idx|
155
- {
156
- id: doc[:id],
157
- reduced_embedding: reduced[idx]
158
- }
159
- end
160
-
161
- # Update database
162
- @database.update_reduced_embeddings(updates)
163
- end
164
-
165
286
  def save_model
166
- return unless @umap_instance && @reduced_embeddings
287
+ return unless @umap_instance
167
288
 
168
- # Save the trained UMAP model for transforming new queries
289
+ # Save the trained UMAP model for transforming new data
169
290
  @umap_instance.save_model(@model_path)
170
291
  puts "UMAP model saved to: #{@model_path}"
171
292
 
172
- # Also cache the reduced embeddings separately for the apply method
173
- embeddings_path = @model_path.sub(/\.bin$/, '_embeddings.json')
174
- ClusterKit::Dimensionality::UMAP.save_data(@reduced_embeddings, embeddings_path)
175
- puts "Reduced embeddings cached to: #{embeddings_path}"
176
- end
177
-
178
- def load_model
179
- return @reduced_embeddings if @reduced_embeddings
180
-
181
- # For the apply method, we need the pre-computed embeddings
182
- embeddings_path = @model_path.sub(/\.bin$/, '_embeddings.json')
183
- unless File.exist?(embeddings_path)
184
- raise "Cached embeddings not found at #{embeddings_path}. Please train a model first."
293
+ # Save metadata about the training if we have params
294
+ if @model_params
295
+ metadata_path = @model_path.sub(/\.bin$/, '_metadata.json')
296
+ metadata = {
297
+ trained_at: Time.now.iso8601,
298
+ n_components: @model_params[:n_components],
299
+ n_neighbors: @model_params[:n_neighbors],
300
+ min_dist: @model_params[:min_dist],
301
+ document_count: @database.get_embeddings.size,
302
+ model_version: 2 # Version 2: proper transform-based approach
303
+ }
304
+ File.write(metadata_path, JSON.pretty_generate(metadata))
305
+ puts "Model metadata saved to: #{metadata_path}"
185
306
  end
186
-
187
- @reduced_embeddings = ClusterKit::Dimensionality::UMAP.load_data(embeddings_path)
188
- puts "Cached embeddings loaded from: #{embeddings_path}"
189
- @reduced_embeddings
190
307
  end
191
308
 
192
309
  def load_umap_model
@@ -1,124 +1,205 @@
1
+ require 'json'
1
2
  require 'clusterkit'
2
3
 
3
4
  module Ragnar
5
+ # Service for applying UMAP transformations to embeddings
6
+ # Separates transformation logic from training (UmapProcessor)
4
7
  class UmapTransformService
5
- include Singleton
8
+ attr_reader :model_path, :database
6
9
 
7
- def initialize
10
+ def initialize(model_path: "umap_model.bin", database:)
11
+ @model_path = model_path
12
+ @database = database
8
13
  @umap_model = nil
9
- @model_path = "umap_model.bin"
14
+ @model_metadata = nil
10
15
  end
11
16
 
12
- # Transform a query embedding to reduced space using saved UMAP model
13
- def transform_query(query_embedding, model_path = nil)
14
- # Use the real UMAP model's transform capability
15
- model_path ||= @model_path
17
+ # Transform embeddings for specific documents
18
+ # @param document_ids [Array<Integer>] IDs of documents to transform
19
+ # @return [Hash] Results with :processed, :skipped, :errors counts
20
+ def transform_documents(document_ids)
21
+ return { processed: 0, skipped: 0, errors: 0 } if document_ids.empty?
16
22
 
17
- # Load the model if not already loaded
18
- load_model(model_path) unless @umap_model
23
+ load_model!
19
24
 
20
- # Transform the query embedding using the trained UMAP model
21
- # The transform method expects a 2D array (even for a single embedding)
22
- result = @umap_model.transform([query_embedding])
25
+ # Fetch documents
26
+ documents = @database.get_documents_by_ids(document_ids)
23
27
 
24
- # Return the first (and only) transformed embedding
25
- result.first
26
- rescue => e
27
- # Fall back to k-NN approximation if model loading fails
28
- puts "Warning: Could not use UMAP model for transform: #{e.message}"
29
- puts "Falling back to k-NN approximation..."
30
- knn_approximate_transform(query_embedding)
31
- end
32
-
33
- # Check if we can do transforms
34
- def model_available?(model_path = nil)
35
- model_path ||= @model_path
28
+ if documents.empty?
29
+ return { processed: 0, skipped: 0, errors: 0 }
30
+ end
36
31
 
37
- # First check if the actual UMAP model file exists
38
- if File.exist?(model_path)
39
- return true
32
+ # Extract and validate embeddings
33
+ valid_docs = []
34
+ embeddings_to_transform = []
35
+ skipped_count = 0
36
+
37
+ documents.each do |doc|
38
+ emb = doc[:embedding]
39
+
40
+ if emb.nil? || !emb.is_a?(Array) || emb.empty?
41
+ skipped_count += 1
42
+ next
43
+ end
44
+
45
+ if emb.any? { |v| !v.is_a?(Numeric) || v.nan? || !v.finite? }
46
+ skipped_count += 1
47
+ next
48
+ end
49
+
50
+ valid_docs << doc
51
+ embeddings_to_transform << emb
40
52
  end
41
53
 
42
- # Fallback: check if the database has reduced embeddings for k-NN approximation
43
- database = Database.new("./rag_database")
44
- stats = database.get_stats
45
- stats[:with_reduced_embeddings] > 0
54
+ return { processed: 0, skipped: skipped_count, errors: 0 } if embeddings_to_transform.empty?
55
+
56
+ # Transform using UMAP
57
+ begin
58
+ reduced_embeddings = @umap_model.transform(embeddings_to_transform)
59
+
60
+ # Prepare updates
61
+ updates = valid_docs.zip(reduced_embeddings).map do |doc, reduced_emb|
62
+ {
63
+ id: doc[:id],
64
+ reduced_embedding: reduced_emb,
65
+ umap_version: model_version
66
+ }
67
+ end
68
+
69
+ # Update database
70
+ @database.update_reduced_embeddings(updates)
71
+
72
+ { processed: updates.size, skipped: skipped_count, errors: 0 }
73
+ rescue => e
74
+ puts "Error transforming documents: #{e.message}"
75
+ { processed: 0, skipped: skipped_count, errors: valid_docs.size }
76
+ end
46
77
  end
47
78
 
48
- private
49
-
50
- def load_model(model_path)
51
- unless File.exist?(model_path)
52
- raise "UMAP model not found at #{model_path}. Please train a model first."
79
+ # Transform a single query embedding
80
+ # @param embedding [Array<Numeric>] Query embedding to transform
81
+ # @return [Array<Float>, nil] Reduced embedding or nil if error
82
+ def transform_query(embedding)
83
+ return nil if embedding.nil? || !embedding.is_a?(Array) || embedding.empty?
84
+
85
+ # Validate embedding
86
+ if embedding.any? { |v| !v.is_a?(Numeric) || v.nan? || !v.finite? }
87
+ puts "Warning: Invalid query embedding (contains NaN or Infinity)"
88
+ return nil
53
89
  end
54
90
 
55
- @umap_model = ClusterKit::Dimensionality::UMAP.load_model(model_path)
56
- puts "UMAP model loaded for query transformation"
91
+ load_model!
92
+
93
+ begin
94
+ # Transform returns array of arrays, get first (and only) result
95
+ @umap_model.transform([embedding]).first
96
+ rescue => e
97
+ puts "Error transforming query: #{e.message}"
98
+ nil
99
+ end
57
100
  end
58
101
 
59
- def knn_approximate_transform(query_embedding)
60
- # Fallback k-NN approximation method
61
- # Get database stats to know dimensions
62
- database = Database.new("./rag_database")
63
- stats = database.get_stats
64
-
65
- # If we don't have reduced embeddings, we can't transform
66
- if stats[:with_reduced_embeddings] == 0
67
- raise "No reduced embeddings available in database"
68
- end
102
+ # Check if a UMAP model exists
103
+ # @return [Boolean] true if model file exists
104
+ def model_exists?
105
+ File.exist?(@model_path)
106
+ end
107
+
108
+ # Get metadata about the trained model
109
+ # @return [Hash, nil] Model metadata or nil if not found
110
+ def model_metadata
111
+ return @model_metadata if @model_metadata
69
112
 
70
- # Get all documents with their embeddings
71
- all_docs = database.get_embeddings
113
+ metadata_path = @model_path.sub(/\.bin$/, '_metadata.json')
114
+ return nil unless File.exist?(metadata_path)
72
115
 
73
- # Find k nearest neighbors in full embedding space
74
- k = 5
75
- neighbors = []
116
+ @model_metadata = JSON.parse(File.read(metadata_path), symbolize_names: true)
117
+ rescue => e
118
+ puts "Error loading model metadata: #{e.message}"
119
+ nil
120
+ end
121
+
122
+ # Get the version of the current model
123
+ # @return [Integer] Model version (timestamp of file modification)
124
+ def model_version
125
+ return 0 unless File.exist?(@model_path)
126
+ File.mtime(@model_path).to_i
127
+ end
128
+
129
+ # Check if model needs retraining based on staleness
130
+ # @return [Hash] Staleness info with :needs_retraining, :coverage_percentage
131
+ def check_model_staleness
132
+ return { needs_retraining: true, coverage_percentage: 0, reason: "No model exists" } unless model_exists?
76
133
 
77
- all_docs.each_with_index do |doc, idx|
78
- next unless doc[:embedding] && doc[:reduced_embedding]
79
-
80
- distance = euclidean_distance(query_embedding, doc[:embedding])
81
- neighbors << { idx: idx, distance: distance, reduced: doc[:reduced_embedding] }
82
- end
134
+ metadata = model_metadata
135
+ return { needs_retraining: true, coverage_percentage: 0, reason: "No metadata found" } unless metadata
83
136
 
84
- # Sort by distance and take k nearest
85
- neighbors.sort_by! { |n| n[:distance] }
86
- k_nearest = neighbors.first(k)
137
+ trained_count = metadata[:document_count] || 0
138
+ current_count = @database.document_count
87
139
 
88
- # Average the reduced embeddings of k nearest neighbors
89
- # This is a simple approximation of the transform
90
- if k_nearest.empty?
91
- raise "No neighbors found for transform"
140
+ if current_count == 0
141
+ return { needs_retraining: false, coverage_percentage: 100, reason: "No documents" }
92
142
  end
93
143
 
94
- reduced_dims = k_nearest.first[:reduced].size
95
- averaged = Array.new(reduced_dims, 0.0)
144
+ coverage = (trained_count.to_f / current_count * 100).round(1)
145
+ staleness = 100 - coverage
146
+
147
+ {
148
+ needs_retraining: staleness > 30,
149
+ coverage_percentage: coverage,
150
+ trained_documents: trained_count,
151
+ current_documents: current_count,
152
+ staleness_percentage: staleness,
153
+ reason: staleness > 30 ? "Model covers only #{coverage}% of documents" : "Model is up to date"
154
+ }
155
+ end
156
+
157
+ private
158
+
159
+ def load_model!
160
+ return if @umap_model
96
161
 
97
- # Weighted average based on inverse distance
98
- total_weight = 0.0
99
- k_nearest.each do |neighbor|
100
- # Use inverse distance as weight (closer = higher weight)
101
- weight = 1.0 / (neighbor[:distance] + 0.001) # Add small epsilon to avoid division by zero
102
- total_weight += weight
103
-
104
- neighbor[:reduced].each_with_index do |val, idx|
105
- averaged[idx] += val * weight
106
- end
162
+ unless File.exist?(@model_path)
163
+ raise "UMAP model not found at #{@model_path}. Please train a model first using 'ragnar train-umap'."
107
164
  end
108
165
 
109
- # Normalize by total weight
110
- averaged.map { |val| val / total_weight }
166
+ @umap_model = ClusterKit::Dimensionality::UMAP.load_model(@model_path)
111
167
  end
168
+ end
169
+
170
+ # Singleton service for backwards compatibility
171
+ # This allows the old UmapTransformService.instance pattern to work
172
+ class UmapTransformServiceSingleton
173
+ include Singleton
112
174
 
113
- def euclidean_distance(vec1, vec2)
114
- return Float::INFINITY if vec1.size != vec2.size
115
-
116
- sum = 0.0
117
- vec1.each_with_index do |val, idx|
118
- diff = val - vec2[idx]
119
- sum += diff * diff
175
+ def initialize
176
+ @database = Database.new(Config.instance.database_path)
177
+ @service = UmapTransformService.new(database: @database)
178
+ end
179
+
180
+ def transform_query(embedding, model_path = nil)
181
+ if model_path && model_path != @service.model_path
182
+ # Create a new service with different model path
183
+ service = UmapTransformService.new(model_path: model_path, database: @database)
184
+ service.transform_query(embedding)
185
+ else
186
+ @service.transform_query(embedding)
120
187
  end
121
- Math.sqrt(sum)
188
+ end
189
+
190
+ def model_available?(model_path = nil)
191
+ if model_path
192
+ File.exist?(model_path)
193
+ else
194
+ @service.model_exists?
195
+ end
196
+ end
197
+ end
198
+
199
+ # For backwards compatibility - old code uses UmapTransformService.instance
200
+ class << UmapTransformService
201
+ def instance
202
+ UmapTransformServiceSingleton.instance
122
203
  end
123
204
  end
124
205
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Ragnar
4
- VERSION = "0.1.0.pre.2"
4
+ VERSION = "0.1.0.pre.4"
5
5
  end