ragnar-cli 0.1.0.pre.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,124 @@
1
+ require 'clusterkit'
2
+
3
+ module Ragnar
4
+ class UmapTransformService
5
+ include Singleton
6
+
7
+ def initialize
8
+ @umap_model = nil
9
+ @model_path = "umap_model.bin"
10
+ end
11
+
12
+ # Transform a query embedding to reduced space using saved UMAP model
13
+ def transform_query(query_embedding, model_path = nil)
14
+ # Use the real UMAP model's transform capability
15
+ model_path ||= @model_path
16
+
17
+ # Load the model if not already loaded
18
+ load_model(model_path) unless @umap_model
19
+
20
+ # Transform the query embedding using the trained UMAP model
21
+ # The transform method expects a 2D array (even for a single embedding)
22
+ result = @umap_model.transform([query_embedding])
23
+
24
+ # Return the first (and only) transformed embedding
25
+ result.first
26
+ rescue => e
27
+ # Fall back to k-NN approximation if model loading fails
28
+ puts "Warning: Could not use UMAP model for transform: #{e.message}"
29
+ puts "Falling back to k-NN approximation..."
30
+ knn_approximate_transform(query_embedding)
31
+ end
32
+
33
+ # Check if we can do transforms
34
+ def model_available?(model_path = nil)
35
+ model_path ||= @model_path
36
+
37
+ # First check if the actual UMAP model file exists
38
+ if File.exist?(model_path)
39
+ return true
40
+ end
41
+
42
+ # Fallback: check if the database has reduced embeddings for k-NN approximation
43
+ database = Database.new("./rag_database")
44
+ stats = database.get_stats
45
+ stats[:with_reduced_embeddings] > 0
46
+ end
47
+
48
+ private
49
+
50
+ def load_model(model_path)
51
+ unless File.exist?(model_path)
52
+ raise "UMAP model not found at #{model_path}. Please train a model first."
53
+ end
54
+
55
+ @umap_model = ClusterKit::Dimensionality::UMAP.load_model(model_path)
56
+ puts "UMAP model loaded for query transformation"
57
+ end
58
+
59
+ def knn_approximate_transform(query_embedding)
60
+ # Fallback k-NN approximation method
61
+ # Get database stats to know dimensions
62
+ database = Database.new("./rag_database")
63
+ stats = database.get_stats
64
+
65
+ # If we don't have reduced embeddings, we can't transform
66
+ if stats[:with_reduced_embeddings] == 0
67
+ raise "No reduced embeddings available in database"
68
+ end
69
+
70
+ # Get all documents with their embeddings
71
+ all_docs = database.get_embeddings
72
+
73
+ # Find k nearest neighbors in full embedding space
74
+ k = 5
75
+ neighbors = []
76
+
77
+ all_docs.each_with_index do |doc, idx|
78
+ next unless doc[:embedding] && doc[:reduced_embedding]
79
+
80
+ distance = euclidean_distance(query_embedding, doc[:embedding])
81
+ neighbors << { idx: idx, distance: distance, reduced: doc[:reduced_embedding] }
82
+ end
83
+
84
+ # Sort by distance and take k nearest
85
+ neighbors.sort_by! { |n| n[:distance] }
86
+ k_nearest = neighbors.first(k)
87
+
88
+ # Average the reduced embeddings of k nearest neighbors
89
+ # This is a simple approximation of the transform
90
+ if k_nearest.empty?
91
+ raise "No neighbors found for transform"
92
+ end
93
+
94
+ reduced_dims = k_nearest.first[:reduced].size
95
+ averaged = Array.new(reduced_dims, 0.0)
96
+
97
+ # Weighted average based on inverse distance
98
+ total_weight = 0.0
99
+ k_nearest.each do |neighbor|
100
+ # Use inverse distance as weight (closer = higher weight)
101
+ weight = 1.0 / (neighbor[:distance] + 0.001) # Add small epsilon to avoid division by zero
102
+ total_weight += weight
103
+
104
+ neighbor[:reduced].each_with_index do |val, idx|
105
+ averaged[idx] += val * weight
106
+ end
107
+ end
108
+
109
+ # Normalize by total weight
110
+ averaged.map { |val| val / total_weight }
111
+ end
112
+
113
+ def euclidean_distance(vec1, vec2)
114
+ return Float::INFINITY if vec1.size != vec2.size
115
+
116
+ sum = 0.0
117
+ vec1.each_with_index do |val, idx|
118
+ diff = val - vec2[idx]
119
+ sum += diff * diff
120
+ end
121
+ Math.sqrt(sum)
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ragnar
4
+ VERSION = "0.1.0.pre.1"
5
+ end
data/lib/ragnar.rb ADDED
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "thor"
4
+ require "red-candle"
5
+ require "lancelot"
6
+ require "clusterkit"
7
+ require "baran"
8
+ require "tty-progressbar"
9
+ require "securerandom"
10
+ require "json"
11
+ require "fileutils"
12
+ require "singleton"
13
+
14
+ module Ragnar
15
+ class Error < StandardError; end
16
+
17
+ DEFAULT_DB_PATH = "ragnar_database"
18
+ DEFAULT_CHUNK_SIZE = 512
19
+ DEFAULT_CHUNK_OVERLAP = 50
20
+ DEFAULT_EMBEDDING_MODEL = "jinaai/jina-embeddings-v2-base-en"
21
+ DEFAULT_REDUCED_DIMENSIONS = 64 # Reduce embeddings from 768D to 64D for faster search
22
+ end
23
+
24
+ require_relative "ragnar/version"
25
+ require_relative "ragnar/database"
26
+ require_relative "ragnar/chunker"
27
+ require_relative "ragnar/embedder"
28
+ require_relative "ragnar/indexer"
29
+ require_relative "ragnar/umap_processor"
30
+ require_relative "ragnar/llm_manager"
31
+ require_relative "ragnar/context_repacker"
32
+ require_relative "ragnar/query_rewriter"
33
+ require_relative "ragnar/umap_transform_service"
34
+ require_relative "ragnar/query_processor"
35
+ require_relative "ragnar/topic_modeling"
36
+ require_relative "ragnar/cli"
data/lib/ragnar_cli.rb ADDED
@@ -0,0 +1,2 @@
1
+ # frozen_string_literal: true
2
+ require 'ragnar'
metadata ADDED
@@ -0,0 +1,234 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ragnar-cli
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0.pre.1
5
+ platform: ruby
6
+ authors:
7
+ - Chris Petersen
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2025-08-22 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: thor
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: red-candle
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: lancelot
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.3'
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: 0.3.2
51
+ type: :runtime
52
+ prerelease: false
53
+ version_requirements: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - "~>"
56
+ - !ruby/object:Gem::Version
57
+ version: '0.3'
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: 0.3.2
61
+ - !ruby/object:Gem::Dependency
62
+ name: clusterkit
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: 0.1.0.pre.2
68
+ type: :runtime
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: 0.1.0.pre.2
75
+ - !ruby/object:Gem::Dependency
76
+ name: baran
77
+ requirement: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - "~>"
80
+ - !ruby/object:Gem::Version
81
+ version: '0.2'
82
+ type: :runtime
83
+ prerelease: false
84
+ version_requirements: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - "~>"
87
+ - !ruby/object:Gem::Version
88
+ version: '0.2'
89
+ - !ruby/object:Gem::Dependency
90
+ name: parsekit
91
+ requirement: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - "~>"
94
+ - !ruby/object:Gem::Version
95
+ version: 0.1.0.pre.1
96
+ type: :runtime
97
+ prerelease: false
98
+ version_requirements: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - "~>"
101
+ - !ruby/object:Gem::Version
102
+ version: 0.1.0.pre.1
103
+ - !ruby/object:Gem::Dependency
104
+ name: tty-progressbar
105
+ requirement: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - "~>"
108
+ - !ruby/object:Gem::Version
109
+ version: '0.18'
110
+ type: :runtime
111
+ prerelease: false
112
+ version_requirements: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - "~>"
115
+ - !ruby/object:Gem::Version
116
+ version: '0.18'
117
+ - !ruby/object:Gem::Dependency
118
+ name: rake
119
+ requirement: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - "~>"
122
+ - !ruby/object:Gem::Version
123
+ version: '13.0'
124
+ type: :development
125
+ prerelease: false
126
+ version_requirements: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - "~>"
129
+ - !ruby/object:Gem::Version
130
+ version: '13.0'
131
+ - !ruby/object:Gem::Dependency
132
+ name: rspec
133
+ requirement: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - "~>"
136
+ - !ruby/object:Gem::Version
137
+ version: '3.0'
138
+ type: :development
139
+ prerelease: false
140
+ version_requirements: !ruby/object:Gem::Requirement
141
+ requirements:
142
+ - - "~>"
143
+ - !ruby/object:Gem::Version
144
+ version: '3.0'
145
+ - !ruby/object:Gem::Dependency
146
+ name: rubocop
147
+ requirement: !ruby/object:Gem::Requirement
148
+ requirements:
149
+ - - "~>"
150
+ - !ruby/object:Gem::Version
151
+ version: '1.21'
152
+ type: :development
153
+ prerelease: false
154
+ version_requirements: !ruby/object:Gem::Requirement
155
+ requirements:
156
+ - - "~>"
157
+ - !ruby/object:Gem::Version
158
+ version: '1.21'
159
+ - !ruby/object:Gem::Dependency
160
+ name: simplecov
161
+ requirement: !ruby/object:Gem::Requirement
162
+ requirements:
163
+ - - "~>"
164
+ - !ruby/object:Gem::Version
165
+ version: '0.22'
166
+ type: :development
167
+ prerelease: false
168
+ version_requirements: !ruby/object:Gem::Requirement
169
+ requirements:
170
+ - - "~>"
171
+ - !ruby/object:Gem::Version
172
+ version: '0.22'
173
+ description: Ragnar is a high-performance RAG system that leverages Rust libraries
174
+ through Ruby bindings for embeddings, vector search, and topic modeling. It provides
175
+ a complete CLI for indexing documents and querying with LLMs.
176
+ email:
177
+ - chris@example.com
178
+ executables:
179
+ - ragnar
180
+ extensions: []
181
+ extra_rdoc_files: []
182
+ files:
183
+ - LICENSE.txt
184
+ - README.md
185
+ - exe/ragnar
186
+ - lib/ragnar.rb
187
+ - lib/ragnar/chunker.rb
188
+ - lib/ragnar/cli.rb
189
+ - lib/ragnar/context_repacker.rb
190
+ - lib/ragnar/database.rb
191
+ - lib/ragnar/embedder.rb
192
+ - lib/ragnar/indexer.rb
193
+ - lib/ragnar/llm_manager.rb
194
+ - lib/ragnar/query_processor.rb
195
+ - lib/ragnar/query_rewriter.rb
196
+ - lib/ragnar/topic_modeling.rb
197
+ - lib/ragnar/topic_modeling/engine.rb
198
+ - lib/ragnar/topic_modeling/labeling_strategies.rb
199
+ - lib/ragnar/topic_modeling/llm_adapter.rb
200
+ - lib/ragnar/topic_modeling/metrics.rb
201
+ - lib/ragnar/topic_modeling/term_extractor.rb
202
+ - lib/ragnar/topic_modeling/topic.rb
203
+ - lib/ragnar/topic_modeling/topic_labeler.rb
204
+ - lib/ragnar/umap_processor.rb
205
+ - lib/ragnar/umap_transform_service.rb
206
+ - lib/ragnar/version.rb
207
+ - lib/ragnar_cli.rb
208
+ homepage: https://github.com/cpetersen/ragnar
209
+ licenses:
210
+ - MIT
211
+ metadata:
212
+ homepage_uri: https://github.com/cpetersen/ragnar
213
+ source_code_uri: https://github.com/cpetersen/ragnar
214
+ changelog_uri: https://github.com/cpetersen/ragnar/blob/main/CHANGELOG.md
215
+ post_install_message:
216
+ rdoc_options: []
217
+ require_paths:
218
+ - lib
219
+ required_ruby_version: !ruby/object:Gem::Requirement
220
+ requirements:
221
+ - - ">="
222
+ - !ruby/object:Gem::Version
223
+ version: 3.0.0
224
+ required_rubygems_version: !ruby/object:Gem::Requirement
225
+ requirements:
226
+ - - ">="
227
+ - !ruby/object:Gem::Version
228
+ version: '0'
229
+ requirements: []
230
+ rubygems_version: 3.5.3
231
+ signing_key:
232
+ specification_version: 4
233
+ summary: A Ruby + Rust powered RAG (Retrieval-Augmented Generation) system
234
+ test_files: []