ragnar-cli 0.1.0.pre.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,398 @@
1
+ require 'json'
2
+ require 'singleton'
3
+ require 'set'
4
+ require 'digest'
5
+
6
+ module Ragnar
7
+ class QueryProcessor
8
+ attr_reader :database, :embedder, :rewriter, :reranker
9
+
10
+ def initialize(db_path: Ragnar::DEFAULT_DB_PATH)
11
+ @database = Database.new(db_path)
12
+ @embedder = Embedder.new
13
+ @llm_manager = LLMManager.instance
14
+ @umap_service = UmapTransformService.instance
15
+ @rewriter = QueryRewriter.new(llm_manager: @llm_manager)
16
+ @reranker = nil # Will initialize when needed
17
+ end
18
+
19
+ def query(user_query, top_k: 3, verbose: false)
20
+ puts "Processing query: #{user_query}" if verbose
21
+
22
+ # Step 1: Rewrite and analyze the query
23
+ puts "\n#{'-'*60}" if verbose
24
+ puts "STEP 1: Query Analysis & Rewriting" if verbose
25
+ puts "-"*60 if verbose
26
+
27
+ rewritten = @rewriter.rewrite(user_query)
28
+
29
+ if verbose
30
+ puts "\nOriginal Query: #{user_query}"
31
+ puts "\nRewritten Query Analysis:"
32
+ puts " Clarified Intent: #{rewritten['clarified_intent']}"
33
+ puts " Query Type: #{rewritten['query_type']}"
34
+ puts " Context Needed: #{rewritten['context_needed']}"
35
+ puts "\nGenerated Sub-queries (#{rewritten['sub_queries'].length}):"
36
+ rewritten['sub_queries'].each_with_index do |sq, idx|
37
+ puts " #{idx + 1}. #{sq}"
38
+ end
39
+ if rewritten['key_terms'] && !rewritten['key_terms'].empty?
40
+ puts "\nKey Terms Identified:"
41
+ puts " #{rewritten['key_terms'].join(', ')}"
42
+ end
43
+ end
44
+
45
+ # Step 2: Retrieve candidates using RRF
46
+ if verbose
47
+ puts "\n#{'-'*60}"
48
+ puts "STEP 2: Document Retrieval with RRF"
49
+ puts "-"*60
50
+ end
51
+
52
+ candidates = retrieve_with_rrf(
53
+ rewritten['sub_queries'],
54
+ k: 20,
55
+ verbose: verbose
56
+ )
57
+
58
+ if candidates.empty?
59
+ return {
60
+ query: user_query,
61
+ clarified: rewritten['clarified_intent'],
62
+ answer: "No relevant documents found in the database.",
63
+ sources: []
64
+ }
65
+ end
66
+
67
+ if verbose
68
+ puts "\nRetrieval Summary:"
69
+ puts " Total candidates found: #{candidates.size}"
70
+ puts " Unique sources: #{candidates.map { |c| c[:file_path] }.uniq.size}"
71
+ end
72
+
73
+ # Step 3: Rerank candidates
74
+ if verbose
75
+ puts "\n#{'-'*60}"
76
+ puts "STEP 3: Document Reranking"
77
+ puts "-"*60
78
+ end
79
+
80
+ reranked = rerank_documents(
81
+ query: rewritten['clarified_intent'],
82
+ documents: candidates,
83
+ top_k: top_k * 2 # Get more than we need for context
84
+ )
85
+
86
+ if verbose && reranked.any?
87
+ puts "\nTop Reranked Documents:"
88
+ reranked[0..2].each_with_index do |doc, idx|
89
+ full_text = (doc[:chunk_text] || doc[:text] || "").gsub(/\s+/, ' ')
90
+ puts " #{idx + 1}. [#{File.basename(doc[:file_path] || 'unknown')}]"
91
+ puts " Score: #{doc[:score]&.round(4) if doc[:score]}"
92
+ puts " Full chunk (#{full_text.length} chars):"
93
+ puts " \"#{full_text}\""
94
+ puts ""
95
+ end
96
+ end
97
+
98
+ # Step 4: Prepare context with neighboring chunks
99
+ if verbose
100
+ puts "\n#{'-'*60}"
101
+ puts "STEP 4: Context Preparation"
102
+ puts "-"*60
103
+ end
104
+
105
+ context_docs = prepare_context(reranked[0...top_k], rewritten['context_needed'])
106
+
107
+ if verbose
108
+ puts "\nContext Documents Selected: #{context_docs.length}"
109
+ puts "Context strategy: #{rewritten['context_needed']}"
110
+ end
111
+
112
+ # Step 5: Repack context for optimal LLM consumption
113
+ if verbose
114
+ puts "\n#{'-'*60}"
115
+ puts "STEP 5: Context Repacking"
116
+ puts "-"*60
117
+ end
118
+
119
+ repacked_context = ContextRepacker.repack(
120
+ context_docs,
121
+ rewritten['clarified_intent']
122
+ )
123
+
124
+ if verbose
125
+ original_size = context_docs.sum { |d| (d[:chunk_text] || "").length }
126
+ puts "\nContext Optimization:"
127
+ puts " Original size: #{original_size} chars"
128
+ puts " Repacked size: #{repacked_context.length} chars"
129
+ puts " Compression ratio: #{(100.0 * repacked_context.length / original_size).round(1)}%"
130
+ puts "\nFull Repacked Context:"
131
+ puts "-" * 40
132
+ puts repacked_context
133
+ puts "-" * 40
134
+ end
135
+
136
+ # Step 6: Generate response
137
+ if verbose
138
+ puts "\n#{'-'*60}"
139
+ puts "STEP 6: Response Generation"
140
+ puts "-"*60
141
+ end
142
+ response = generate_response(
143
+ query: rewritten['clarified_intent'],
144
+ repacked_context: repacked_context,
145
+ query_type: rewritten['query_type']
146
+ )
147
+
148
+ if verbose
149
+ puts "\nGenerated Response:"
150
+ puts "-" * 40
151
+ puts response
152
+ puts "-" * 40
153
+ end
154
+
155
+ result = {
156
+ query: user_query,
157
+ clarified: rewritten['clarified_intent'],
158
+ answer: response,
159
+ sources: context_docs.map { |d|
160
+ {
161
+ source_file: d[:file_path] || d[:source_file],
162
+ chunk_index: d[:chunk_index]
163
+ }
164
+ },
165
+ sub_queries: rewritten['sub_queries'],
166
+ confidence: calculate_confidence(reranked[0...top_k])
167
+ }
168
+
169
+ if verbose
170
+ puts "\n#{'-'*60}"
171
+ puts "FINAL RESULTS"
172
+ puts "-"*60
173
+ puts "\nConfidence Score: #{result[:confidence]}%"
174
+ puts "\nSources Used:"
175
+ result[:sources].each_with_index do |source, idx|
176
+ puts " #{idx + 1}. #{source[:source_file]} (chunk #{source[:chunk_index]})"
177
+ end
178
+ end
179
+
180
+ result
181
+ end
182
+
183
+ private
184
+
185
+ def retrieve_with_rrf(queries, k: 20, verbose: false)
186
+ all_results = []
187
+
188
+ queries.each_with_index do |query, idx|
189
+ if verbose
190
+ puts "\nSub-query #{idx + 1}: \"#{query}\""
191
+ puts " Generating embedding..."
192
+ end
193
+
194
+ # Generate embedding for the query
195
+ query_embedding = @embedder.embed_text(query)
196
+
197
+ if verbose
198
+ puts " Embedding dimensions: #{query_embedding.length}"
199
+ puts " Searching vector database..."
200
+ end
201
+
202
+ # Check if we have reduced embeddings available for more efficient search
203
+ stats = @database.get_stats
204
+ use_reduced = stats[:with_reduced_embeddings] > 0
205
+
206
+ # Prepare the search embedding (either full or reduced)
207
+ search_embedding = query_embedding
208
+
209
+ if use_reduced
210
+ # Check if UMAP model is available
211
+ model_path = "./umap_model.bin"
212
+
213
+ if @umap_service.model_available?(model_path)
214
+ if verbose
215
+ puts " Transforming query to reduced space (#{stats[:reduced_dims]}D)"
216
+ end
217
+
218
+ begin
219
+ # Transform the query embedding to reduced space
220
+ search_embedding = @umap_service.transform_query(query_embedding, model_path)
221
+
222
+ if verbose
223
+ puts " ✓ Query transformed to #{search_embedding.size}D"
224
+ puts " Searching with reduced embeddings..."
225
+ end
226
+ rescue => e
227
+ puts " ⚠️ Failed to transform query: #{e.message}" if verbose
228
+ puts " Falling back to full embeddings" if verbose
229
+ use_reduced = false
230
+ end
231
+ else
232
+ if verbose
233
+ puts " Note: Reduced embeddings available but UMAP model not found"
234
+ puts " Falling back to full embeddings"
235
+ end
236
+ use_reduced = false
237
+ end
238
+ end
239
+
240
+ vector_results = @database.search_similar(
241
+ search_embedding,
242
+ k: k,
243
+ use_reduced: use_reduced
244
+ )
245
+
246
+ if verbose
247
+ puts " Found #{vector_results.length} matches"
248
+ if vector_results.any?
249
+ best = vector_results.first
250
+ puts " Best match: [#{File.basename(best[:file_path] || 'unknown')}] (distance: #{best[:distance]&.round(3)})"
251
+ end
252
+ end
253
+
254
+ # Add query index for RRF
255
+ vector_results.each do |result|
256
+ result[:query_idx] = idx
257
+ result[:retrieval_method] = :vector
258
+ end
259
+
260
+ all_results.concat(vector_results)
261
+ end
262
+
263
+ if verbose
264
+ puts "\nApplying Reciprocal Rank Fusion..."
265
+ puts " Total results before fusion: #{all_results.length}"
266
+ end
267
+
268
+ # Apply Reciprocal Rank Fusion
269
+ fused = apply_rrf(all_results, k: k)
270
+
271
+ if verbose
272
+ puts " Results after RRF: #{fused.length}"
273
+ end
274
+
275
+ fused
276
+ end
277
+
278
+ def apply_rrf(results, k: 60)
279
+ # Group by document ID
280
+ doc_scores = {}
281
+
282
+ results.each do |result|
283
+ doc_id = result[:id]
284
+ doc_scores[doc_id] ||= {
285
+ score: 0.0,
286
+ document: result
287
+ }
288
+
289
+ # RRF formula: 1 / (k + rank)
290
+ # Using distance as a proxy for rank (lower distance = better rank)
291
+ rank = result[:distance] * 100 # Scale distance to rank-like values
292
+ doc_scores[doc_id][:score] += 1.0 / (k + rank)
293
+ end
294
+
295
+ # Sort by RRF score and return documents
296
+ doc_scores.values
297
+ .sort_by { |item| -item[:score] }
298
+ .map { |item| item[:document] }
299
+ end
300
+
301
+ def rerank_documents(query:, documents:, top_k:)
302
+ # Deduplicate documents based on chunk_text before reranking
303
+ seen_texts = Set.new
304
+ unique_docs = []
305
+
306
+ documents.each do |doc|
307
+ text = doc[:chunk_text] || doc[:text] || ""
308
+ text_hash = Digest::SHA256.hexdigest(text)
309
+
310
+ unless seen_texts.include?(text_hash)
311
+ seen_texts.add(text_hash)
312
+ unique_docs << doc
313
+ end
314
+ end
315
+
316
+ if documents.length > unique_docs.length && @verbose
317
+ puts " Deduplicated: #{documents.length} -> #{unique_docs.length} documents"
318
+ end
319
+
320
+ # Initialize reranker if not already done
321
+ @reranker ||= Candle::Reranker.from_pretrained(
322
+ "cross-encoder/ms-marco-MiniLM-L-12-v2"
323
+ )
324
+
325
+ # Prepare document texts - use chunk_text field
326
+ texts = unique_docs.map { |doc| doc[:chunk_text] || doc[:text] || "" }
327
+
328
+ # Rerank - returns array of {doc_id:, score:, text:}
329
+ reranked = @reranker.rerank(query, texts)
330
+
331
+ # Map back to original documents with scores
332
+ reranked.map do |result|
333
+ doc_idx = result[:doc_id]
334
+ unique_docs[doc_idx].merge(score: result[:score])
335
+ end.sort_by { |doc| -doc[:score] }.first(top_k)
336
+ rescue => e
337
+ puts "Warning: Reranking failed (#{e.message}), using original order"
338
+ unique_docs.first(top_k)
339
+ end
340
+
341
+ def prepare_context(documents, context_needed)
342
+ # For now, just return the documents
343
+ # In the future, we could fetch neighboring chunks for more context
344
+ context_size = case context_needed
345
+ when "extensive" then 5
346
+ when "moderate" then 3
347
+ else 2
348
+ end
349
+
350
+ documents.first(context_size)
351
+ end
352
+
353
+ def generate_response(query:, repacked_context:, query_type:)
354
+ # Get cached LLM from manager
355
+ llm = @llm_manager.default_llm
356
+
357
+ # Create prompt with repacked context
358
+ prompt = build_prompt(query, repacked_context, query_type)
359
+
360
+ # Generate response using default config
361
+ llm.generate(prompt)
362
+ rescue => e
363
+ # Fallback to returning the repacked context
364
+ puts "Warning: LLM generation failed (#{e.message})"
365
+ "Based on the retrieved information:\n\n#{repacked_context[0..500]}..."
366
+ end
367
+
368
+ def build_prompt(query, context, query_type)
369
+ base_prompt = <<~PROMPT
370
+ <|system|>
371
+ You are a helpful assistant. Answer questions based ONLY on the provided context.
372
+ If the answer is not in the context, say "I don't have enough information to answer that question."
373
+ </s>
374
+ <|user|>
375
+ Context:
376
+ #{context}
377
+
378
+ Question: #{query}
379
+ </s>
380
+ <|assistant|>
381
+ PROMPT
382
+
383
+ base_prompt
384
+ end
385
+
386
+ def calculate_confidence(documents)
387
+ return 0.0 if documents.empty?
388
+
389
+ # Simple confidence based on average similarity
390
+ avg_distance = documents.map { |d| d[:distance] }.sum / documents.size
391
+
392
+ # Convert distance to confidence (0-1 scale)
393
+ # Assuming distances are typically 0-2
394
+ confidence = [1.0 - (avg_distance / 2.0), 0.0].max
395
+ (confidence * 100).round(1)
396
+ end
397
+ end
398
+ end
@@ -0,0 +1,75 @@
1
+ module Ragnar
2
+ class QueryRewriter
3
+ def initialize(llm_manager: nil)
4
+ @llm_manager = llm_manager || LLMManager.instance
5
+ end
6
+
7
+ def rewrite(query)
8
+ # Get the cached LLM
9
+ model = @llm_manager.default_llm
10
+
11
+ # Define the JSON schema for structured output
12
+ schema = {
13
+ type: "object",
14
+ properties: {
15
+ clarified_intent: {
16
+ type: "string",
17
+ description: "A clear, specific statement of what the user is looking for"
18
+ },
19
+ query_type: {
20
+ type: "string",
21
+ enum: ["factual", "conceptual", "procedural", "comparative", "analytical"],
22
+ description: "The type of query"
23
+ },
24
+ sub_queries: {
25
+ type: "array",
26
+ items: { type: "string" },
27
+ minItems: 2,
28
+ maxItems: 5,
29
+ description: "Simpler, focused queries that together answer the main query"
30
+ },
31
+ key_terms: {
32
+ type: "array",
33
+ items: { type: "string" },
34
+ description: "Important terms and their synonyms for searching"
35
+ },
36
+ context_needed: {
37
+ type: "string",
38
+ enum: ["minimal", "moderate", "extensive"],
39
+ description: "How much context is likely needed to answer this query"
40
+ }
41
+ },
42
+ required: ["clarified_intent", "query_type", "sub_queries", "key_terms", "context_needed"]
43
+ }
44
+
45
+ prompt = <<~PROMPT
46
+ Analyze the following user query and break it down for retrieval-augmented generation.
47
+ Focus on understanding the user's intent and creating effective sub-queries for searching.
48
+
49
+ User Query: #{query}
50
+
51
+ Provide a structured analysis that will help retrieve the most relevant documents.
52
+ PROMPT
53
+
54
+ begin
55
+ # Use structured generation with schema
56
+ result = model.generate_structured(
57
+ prompt,
58
+ schema: schema
59
+ )
60
+
61
+ # The result should already be a JSON string
62
+ JSON.parse(result)
63
+ rescue => e
64
+ # Fallback to simple rewriting if structured generation fails
65
+ {
66
+ "clarified_intent" => query,
67
+ "query_type" => "general",
68
+ "sub_queries" => [query],
69
+ "key_terms" => query.split(/\s+/).select { |w| w.length > 3 },
70
+ "context_needed" => "moderate"
71
+ }
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,221 @@
1
+ require 'json'
2
+
3
+ module Ragnar
4
+ module TopicModeling
5
+ class Engine
6
+ attr_reader :topics, :clusterer, :term_extractor
7
+
8
+ def initialize(
9
+ min_cluster_size: 5,
10
+ min_samples: 3,
11
+ clustering_backend: nil,
12
+ reduce_dimensions: true,
13
+ n_components: 50,
14
+ labeling_method: :hybrid,
15
+ llm_client: nil,
16
+ verbose: false
17
+ )
18
+ @min_cluster_size = min_cluster_size
19
+ @min_samples = min_samples
20
+ @reduce_dimensions = reduce_dimensions
21
+ @n_components = n_components
22
+ @labeling_method = labeling_method
23
+ @verbose = verbose
24
+
25
+ @clusterer = clustering_backend || build_default_clusterer
26
+ @term_extractor = TermExtractor.new
27
+ @labeler = TopicLabeler.new(method: labeling_method, llm_client: llm_client)
28
+ @topics = []
29
+ end
30
+
31
+ def fit(embeddings:, documents:, metadata: nil)
32
+ raise ArgumentError, "Embeddings and documents must have same length" unless embeddings.length == documents.length
33
+
34
+ @embeddings = embeddings
35
+ @documents = documents
36
+ @metadata = metadata || Array.new(documents.length) { {} }
37
+
38
+ puts "Starting topic extraction..." if @verbose
39
+
40
+ # Step 1: Optionally reduce dimensions for better clustering
41
+ working_embeddings = @embeddings
42
+ if @reduce_dimensions && @embeddings.first.length > @n_components
43
+ puts " Reducing dimensions from #{@embeddings.first.length} to #{@n_components}..." if @verbose
44
+ working_embeddings = reduce_dimensions(@embeddings)
45
+ end
46
+
47
+ # Step 2: Cluster embeddings
48
+ puts " Clustering #{working_embeddings.length} documents..." if @verbose
49
+ cluster_ids = @clusterer.fit_predict(working_embeddings)
50
+
51
+ # Step 3: Build topics from clusters
52
+ puts " Building topics..." if @verbose
53
+ @topics = build_topics(cluster_ids)
54
+
55
+ # Step 4: Extract terms for each topic
56
+ puts " Extracting distinctive terms..." if @verbose
57
+ extract_topic_terms
58
+
59
+ # Step 5: Generate labels
60
+ puts " Generating topic labels..." if @verbose
61
+ generate_topic_labels
62
+
63
+ puts "Found #{@topics.length} topics (plus #{count_outliers(cluster_ids)} outliers)" if @verbose
64
+
65
+ @topics
66
+ end
67
+
68
+ def transform(embeddings:, documents: nil)
69
+ # Assign new documents to existing topics
70
+ raise "Must call fit before transform" if @topics.empty?
71
+
72
+ # Use approximate prediction if available
73
+ if @clusterer.respond_to?(:approximate_predict)
74
+ @clusterer.approximate_predict(embeddings)
75
+ else
76
+ # Fallback: assign to nearest topic centroid
77
+ assign_to_nearest_topic(embeddings)
78
+ end
79
+ end
80
+
81
+ def get_topic(topic_id)
82
+ @topics.find { |t| t.id == topic_id }
83
+ end
84
+
85
+ def outliers
86
+ @outliers ||= @documents.each_with_index.select { |_, idx|
87
+ @cluster_ids && @cluster_ids[idx] == -1
88
+ }.map(&:first)
89
+ end
90
+
91
+ def save(path)
92
+ data = {
93
+ topics: @topics.map(&:to_h),
94
+ config: {
95
+ min_cluster_size: @min_cluster_size,
96
+ min_samples: @min_samples,
97
+ reduce_dimensions: @reduce_dimensions,
98
+ n_components: @n_components,
99
+ labeling_method: @labeling_method
100
+ }
101
+ }
102
+ File.write(path, JSON.pretty_generate(data))
103
+ end
104
+
105
+ def self.load(path)
106
+ data = JSON.parse(File.read(path), symbolize_names: true)
107
+ engine = new(**data[:config])
108
+ # Reconstruct topics
109
+ engine.instance_variable_set(:@topics, data[:topics].map { |t| Topic.from_h(t) })
110
+ engine
111
+ end
112
+
113
+ private
114
+
115
+ def build_default_clusterer
116
+ begin
117
+ require 'clusterkit'
118
+ ClusterKit::Clustering::HDBSCAN.new(
119
+ min_cluster_size: @min_cluster_size,
120
+ min_samples: @min_samples,
121
+ metric: 'euclidean'
122
+ )
123
+ rescue LoadError
124
+ raise "ClusterKit required for topic modeling. Add 'gem \"clusterkit\"' to your Gemfile."
125
+ end
126
+ end
127
+
128
+ def reduce_dimensions(embeddings)
129
+ require 'clusterkit'
130
+
131
+ umap = ClusterKit::Dimensionality::UMAP.new(
132
+ n_components: @n_components,
133
+ n_neighbors: 15,
134
+ random_seed: 42 # For reproducibility
135
+ )
136
+
137
+ # Convert to format UMAP expects
138
+ umap.fit_transform(embeddings)
139
+ rescue LoadError
140
+ puts "Warning: Dimensionality reduction requires ClusterKit. Using original embeddings." if @verbose
141
+ embeddings
142
+ end
143
+
144
+ def build_topics(cluster_ids)
145
+ @cluster_ids = cluster_ids
146
+
147
+ # Group documents by cluster
148
+ clusters = {}
149
+ cluster_ids.each_with_index do |cluster_id, doc_idx|
150
+ next if cluster_id == -1 # Skip outliers
151
+ clusters[cluster_id] ||= []
152
+ clusters[cluster_id] << doc_idx
153
+ end
154
+
155
+ # Create Topic objects
156
+ clusters.map do |cluster_id, doc_indices|
157
+ Topic.new(
158
+ id: cluster_id,
159
+ document_indices: doc_indices,
160
+ documents: doc_indices.map { |i| @documents[i] },
161
+ embeddings: doc_indices.map { |i| @embeddings[i] },
162
+ metadata: doc_indices.map { |i| @metadata[i] }
163
+ )
164
+ end.sort_by(&:id)
165
+ end
166
+
167
+ def extract_topic_terms
168
+ # Extract distinctive terms for each topic
169
+ all_docs_text = @documents.join(" ")
170
+
171
+ @topics.each do |topic|
172
+ topic_docs_text = topic.documents.join(" ")
173
+
174
+ # Use c-TF-IDF to find distinctive terms
175
+ terms = @term_extractor.extract_distinctive_terms(
176
+ topic_docs: topic.documents,
177
+ all_docs: @documents,
178
+ top_n: 20
179
+ )
180
+
181
+ topic.set_terms(terms)
182
+ end
183
+ end
184
+
185
+ def generate_topic_labels
186
+ @topics.each do |topic|
187
+ result = @labeler.generate_label(
188
+ topic: topic,
189
+ terms: topic.terms,
190
+ documents: topic.documents.first(3) # Use top 3 representative docs
191
+ )
192
+
193
+ # Set both label and description if available
194
+ topic.set_label(result[:label])
195
+ topic.instance_variable_set(:@description, result[:description]) if result[:description]
196
+ topic.instance_variable_set(:@label_confidence, result[:confidence])
197
+ topic.instance_variable_set(:@themes, result[:themes]) if result[:themes]
198
+ end
199
+ end
200
+
201
+ def count_outliers(cluster_ids)
202
+ cluster_ids.count { |id| id == -1 }
203
+ end
204
+
205
+ def assign_to_nearest_topic(embeddings)
206
+ # Simple nearest centroid assignment
207
+ topic_centroids = @topics.map(&:centroid)
208
+
209
+ embeddings.map do |embedding|
210
+ distances = topic_centroids.map do |centroid|
211
+ # Euclidean distance
212
+ Math.sqrt(embedding.zip(centroid).map { |a, b| (a - b) ** 2 }.sum)
213
+ end
214
+
215
+ min_idx = distances.index(distances.min)
216
+ @topics[min_idx].id
217
+ end
218
+ end
219
+ end
220
+ end
221
+ end