ragnar-cli 0.1.0.pre.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +439 -0
- data/exe/ragnar +6 -0
- data/lib/ragnar/chunker.rb +97 -0
- data/lib/ragnar/cli.rb +542 -0
- data/lib/ragnar/context_repacker.rb +121 -0
- data/lib/ragnar/database.rb +267 -0
- data/lib/ragnar/embedder.rb +137 -0
- data/lib/ragnar/indexer.rb +234 -0
- data/lib/ragnar/llm_manager.rb +43 -0
- data/lib/ragnar/query_processor.rb +398 -0
- data/lib/ragnar/query_rewriter.rb +75 -0
- data/lib/ragnar/topic_modeling/engine.rb +221 -0
- data/lib/ragnar/topic_modeling/labeling_strategies.rb +300 -0
- data/lib/ragnar/topic_modeling/llm_adapter.rb +131 -0
- data/lib/ragnar/topic_modeling/metrics.rb +186 -0
- data/lib/ragnar/topic_modeling/term_extractor.rb +170 -0
- data/lib/ragnar/topic_modeling/topic.rb +117 -0
- data/lib/ragnar/topic_modeling/topic_labeler.rb +61 -0
- data/lib/ragnar/topic_modeling.rb +24 -0
- data/lib/ragnar/umap_processor.rb +228 -0
- data/lib/ragnar/umap_transform_service.rb +124 -0
- data/lib/ragnar/version.rb +5 -0
- data/lib/ragnar.rb +36 -0
- data/lib/ragnar_cli.rb +2 -0
- metadata +234 -0
@@ -0,0 +1,124 @@
|
|
1
|
+
require 'clusterkit'
|
2
|
+
|
3
|
+
module Ragnar
|
4
|
+
class UmapTransformService
|
5
|
+
include Singleton
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
@umap_model = nil
|
9
|
+
@model_path = "umap_model.bin"
|
10
|
+
end
|
11
|
+
|
12
|
+
# Transform a query embedding to reduced space using saved UMAP model
|
13
|
+
def transform_query(query_embedding, model_path = nil)
|
14
|
+
# Use the real UMAP model's transform capability
|
15
|
+
model_path ||= @model_path
|
16
|
+
|
17
|
+
# Load the model if not already loaded
|
18
|
+
load_model(model_path) unless @umap_model
|
19
|
+
|
20
|
+
# Transform the query embedding using the trained UMAP model
|
21
|
+
# The transform method expects a 2D array (even for a single embedding)
|
22
|
+
result = @umap_model.transform([query_embedding])
|
23
|
+
|
24
|
+
# Return the first (and only) transformed embedding
|
25
|
+
result.first
|
26
|
+
rescue => e
|
27
|
+
# Fall back to k-NN approximation if model loading fails
|
28
|
+
puts "Warning: Could not use UMAP model for transform: #{e.message}"
|
29
|
+
puts "Falling back to k-NN approximation..."
|
30
|
+
knn_approximate_transform(query_embedding)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Check if we can do transforms
|
34
|
+
def model_available?(model_path = nil)
|
35
|
+
model_path ||= @model_path
|
36
|
+
|
37
|
+
# First check if the actual UMAP model file exists
|
38
|
+
if File.exist?(model_path)
|
39
|
+
return true
|
40
|
+
end
|
41
|
+
|
42
|
+
# Fallback: check if the database has reduced embeddings for k-NN approximation
|
43
|
+
database = Database.new("./rag_database")
|
44
|
+
stats = database.get_stats
|
45
|
+
stats[:with_reduced_embeddings] > 0
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def load_model(model_path)
|
51
|
+
unless File.exist?(model_path)
|
52
|
+
raise "UMAP model not found at #{model_path}. Please train a model first."
|
53
|
+
end
|
54
|
+
|
55
|
+
@umap_model = ClusterKit::Dimensionality::UMAP.load_model(model_path)
|
56
|
+
puts "UMAP model loaded for query transformation"
|
57
|
+
end
|
58
|
+
|
59
|
+
def knn_approximate_transform(query_embedding)
|
60
|
+
# Fallback k-NN approximation method
|
61
|
+
# Get database stats to know dimensions
|
62
|
+
database = Database.new("./rag_database")
|
63
|
+
stats = database.get_stats
|
64
|
+
|
65
|
+
# If we don't have reduced embeddings, we can't transform
|
66
|
+
if stats[:with_reduced_embeddings] == 0
|
67
|
+
raise "No reduced embeddings available in database"
|
68
|
+
end
|
69
|
+
|
70
|
+
# Get all documents with their embeddings
|
71
|
+
all_docs = database.get_embeddings
|
72
|
+
|
73
|
+
# Find k nearest neighbors in full embedding space
|
74
|
+
k = 5
|
75
|
+
neighbors = []
|
76
|
+
|
77
|
+
all_docs.each_with_index do |doc, idx|
|
78
|
+
next unless doc[:embedding] && doc[:reduced_embedding]
|
79
|
+
|
80
|
+
distance = euclidean_distance(query_embedding, doc[:embedding])
|
81
|
+
neighbors << { idx: idx, distance: distance, reduced: doc[:reduced_embedding] }
|
82
|
+
end
|
83
|
+
|
84
|
+
# Sort by distance and take k nearest
|
85
|
+
neighbors.sort_by! { |n| n[:distance] }
|
86
|
+
k_nearest = neighbors.first(k)
|
87
|
+
|
88
|
+
# Average the reduced embeddings of k nearest neighbors
|
89
|
+
# This is a simple approximation of the transform
|
90
|
+
if k_nearest.empty?
|
91
|
+
raise "No neighbors found for transform"
|
92
|
+
end
|
93
|
+
|
94
|
+
reduced_dims = k_nearest.first[:reduced].size
|
95
|
+
averaged = Array.new(reduced_dims, 0.0)
|
96
|
+
|
97
|
+
# Weighted average based on inverse distance
|
98
|
+
total_weight = 0.0
|
99
|
+
k_nearest.each do |neighbor|
|
100
|
+
# Use inverse distance as weight (closer = higher weight)
|
101
|
+
weight = 1.0 / (neighbor[:distance] + 0.001) # Add small epsilon to avoid division by zero
|
102
|
+
total_weight += weight
|
103
|
+
|
104
|
+
neighbor[:reduced].each_with_index do |val, idx|
|
105
|
+
averaged[idx] += val * weight
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
# Normalize by total weight
|
110
|
+
averaged.map { |val| val / total_weight }
|
111
|
+
end
|
112
|
+
|
113
|
+
def euclidean_distance(vec1, vec2)
|
114
|
+
return Float::INFINITY if vec1.size != vec2.size
|
115
|
+
|
116
|
+
sum = 0.0
|
117
|
+
vec1.each_with_index do |val, idx|
|
118
|
+
diff = val - vec2[idx]
|
119
|
+
sum += diff * diff
|
120
|
+
end
|
121
|
+
Math.sqrt(sum)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
data/lib/ragnar.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "thor"
|
4
|
+
require "red-candle"
|
5
|
+
require "lancelot"
|
6
|
+
require "clusterkit"
|
7
|
+
require "baran"
|
8
|
+
require "tty-progressbar"
|
9
|
+
require "securerandom"
|
10
|
+
require "json"
|
11
|
+
require "fileutils"
|
12
|
+
require "singleton"
|
13
|
+
|
14
|
+
module Ragnar
|
15
|
+
class Error < StandardError; end
|
16
|
+
|
17
|
+
DEFAULT_DB_PATH = "ragnar_database"
|
18
|
+
DEFAULT_CHUNK_SIZE = 512
|
19
|
+
DEFAULT_CHUNK_OVERLAP = 50
|
20
|
+
DEFAULT_EMBEDDING_MODEL = "jinaai/jina-embeddings-v2-base-en"
|
21
|
+
DEFAULT_REDUCED_DIMENSIONS = 64 # Reduce embeddings from 768D to 64D for faster search
|
22
|
+
end
|
23
|
+
|
24
|
+
require_relative "ragnar/version"
|
25
|
+
require_relative "ragnar/database"
|
26
|
+
require_relative "ragnar/chunker"
|
27
|
+
require_relative "ragnar/embedder"
|
28
|
+
require_relative "ragnar/indexer"
|
29
|
+
require_relative "ragnar/umap_processor"
|
30
|
+
require_relative "ragnar/llm_manager"
|
31
|
+
require_relative "ragnar/context_repacker"
|
32
|
+
require_relative "ragnar/query_rewriter"
|
33
|
+
require_relative "ragnar/umap_transform_service"
|
34
|
+
require_relative "ragnar/query_processor"
|
35
|
+
require_relative "ragnar/topic_modeling"
|
36
|
+
require_relative "ragnar/cli"
|
data/lib/ragnar_cli.rb
ADDED
metadata
ADDED
@@ -0,0 +1,234 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ragnar-cli
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0.pre.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Chris Petersen
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2025-08-22 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: thor
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: red-candle
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: lancelot
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0.3'
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: 0.3.2
|
51
|
+
type: :runtime
|
52
|
+
prerelease: false
|
53
|
+
version_requirements: !ruby/object:Gem::Requirement
|
54
|
+
requirements:
|
55
|
+
- - "~>"
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: '0.3'
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: 0.3.2
|
61
|
+
- !ruby/object:Gem::Dependency
|
62
|
+
name: clusterkit
|
63
|
+
requirement: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - "~>"
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: 0.1.0.pre.2
|
68
|
+
type: :runtime
|
69
|
+
prerelease: false
|
70
|
+
version_requirements: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - "~>"
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: 0.1.0.pre.2
|
75
|
+
- !ruby/object:Gem::Dependency
|
76
|
+
name: baran
|
77
|
+
requirement: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - "~>"
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '0.2'
|
82
|
+
type: :runtime
|
83
|
+
prerelease: false
|
84
|
+
version_requirements: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - "~>"
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '0.2'
|
89
|
+
- !ruby/object:Gem::Dependency
|
90
|
+
name: parsekit
|
91
|
+
requirement: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - "~>"
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: 0.1.0.pre.1
|
96
|
+
type: :runtime
|
97
|
+
prerelease: false
|
98
|
+
version_requirements: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - "~>"
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: 0.1.0.pre.1
|
103
|
+
- !ruby/object:Gem::Dependency
|
104
|
+
name: tty-progressbar
|
105
|
+
requirement: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - "~>"
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0.18'
|
110
|
+
type: :runtime
|
111
|
+
prerelease: false
|
112
|
+
version_requirements: !ruby/object:Gem::Requirement
|
113
|
+
requirements:
|
114
|
+
- - "~>"
|
115
|
+
- !ruby/object:Gem::Version
|
116
|
+
version: '0.18'
|
117
|
+
- !ruby/object:Gem::Dependency
|
118
|
+
name: rake
|
119
|
+
requirement: !ruby/object:Gem::Requirement
|
120
|
+
requirements:
|
121
|
+
- - "~>"
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: '13.0'
|
124
|
+
type: :development
|
125
|
+
prerelease: false
|
126
|
+
version_requirements: !ruby/object:Gem::Requirement
|
127
|
+
requirements:
|
128
|
+
- - "~>"
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: '13.0'
|
131
|
+
- !ruby/object:Gem::Dependency
|
132
|
+
name: rspec
|
133
|
+
requirement: !ruby/object:Gem::Requirement
|
134
|
+
requirements:
|
135
|
+
- - "~>"
|
136
|
+
- !ruby/object:Gem::Version
|
137
|
+
version: '3.0'
|
138
|
+
type: :development
|
139
|
+
prerelease: false
|
140
|
+
version_requirements: !ruby/object:Gem::Requirement
|
141
|
+
requirements:
|
142
|
+
- - "~>"
|
143
|
+
- !ruby/object:Gem::Version
|
144
|
+
version: '3.0'
|
145
|
+
- !ruby/object:Gem::Dependency
|
146
|
+
name: rubocop
|
147
|
+
requirement: !ruby/object:Gem::Requirement
|
148
|
+
requirements:
|
149
|
+
- - "~>"
|
150
|
+
- !ruby/object:Gem::Version
|
151
|
+
version: '1.21'
|
152
|
+
type: :development
|
153
|
+
prerelease: false
|
154
|
+
version_requirements: !ruby/object:Gem::Requirement
|
155
|
+
requirements:
|
156
|
+
- - "~>"
|
157
|
+
- !ruby/object:Gem::Version
|
158
|
+
version: '1.21'
|
159
|
+
- !ruby/object:Gem::Dependency
|
160
|
+
name: simplecov
|
161
|
+
requirement: !ruby/object:Gem::Requirement
|
162
|
+
requirements:
|
163
|
+
- - "~>"
|
164
|
+
- !ruby/object:Gem::Version
|
165
|
+
version: '0.22'
|
166
|
+
type: :development
|
167
|
+
prerelease: false
|
168
|
+
version_requirements: !ruby/object:Gem::Requirement
|
169
|
+
requirements:
|
170
|
+
- - "~>"
|
171
|
+
- !ruby/object:Gem::Version
|
172
|
+
version: '0.22'
|
173
|
+
description: Ragnar is a high-performance RAG system that leverages Rust libraries
|
174
|
+
through Ruby bindings for embeddings, vector search, and topic modeling. It provides
|
175
|
+
a complete CLI for indexing documents and querying with LLMs.
|
176
|
+
email:
|
177
|
+
- chris@example.com
|
178
|
+
executables:
|
179
|
+
- ragnar
|
180
|
+
extensions: []
|
181
|
+
extra_rdoc_files: []
|
182
|
+
files:
|
183
|
+
- LICENSE.txt
|
184
|
+
- README.md
|
185
|
+
- exe/ragnar
|
186
|
+
- lib/ragnar.rb
|
187
|
+
- lib/ragnar/chunker.rb
|
188
|
+
- lib/ragnar/cli.rb
|
189
|
+
- lib/ragnar/context_repacker.rb
|
190
|
+
- lib/ragnar/database.rb
|
191
|
+
- lib/ragnar/embedder.rb
|
192
|
+
- lib/ragnar/indexer.rb
|
193
|
+
- lib/ragnar/llm_manager.rb
|
194
|
+
- lib/ragnar/query_processor.rb
|
195
|
+
- lib/ragnar/query_rewriter.rb
|
196
|
+
- lib/ragnar/topic_modeling.rb
|
197
|
+
- lib/ragnar/topic_modeling/engine.rb
|
198
|
+
- lib/ragnar/topic_modeling/labeling_strategies.rb
|
199
|
+
- lib/ragnar/topic_modeling/llm_adapter.rb
|
200
|
+
- lib/ragnar/topic_modeling/metrics.rb
|
201
|
+
- lib/ragnar/topic_modeling/term_extractor.rb
|
202
|
+
- lib/ragnar/topic_modeling/topic.rb
|
203
|
+
- lib/ragnar/topic_modeling/topic_labeler.rb
|
204
|
+
- lib/ragnar/umap_processor.rb
|
205
|
+
- lib/ragnar/umap_transform_service.rb
|
206
|
+
- lib/ragnar/version.rb
|
207
|
+
- lib/ragnar_cli.rb
|
208
|
+
homepage: https://github.com/cpetersen/ragnar
|
209
|
+
licenses:
|
210
|
+
- MIT
|
211
|
+
metadata:
|
212
|
+
homepage_uri: https://github.com/cpetersen/ragnar
|
213
|
+
source_code_uri: https://github.com/cpetersen/ragnar
|
214
|
+
changelog_uri: https://github.com/cpetersen/ragnar/blob/main/CHANGELOG.md
|
215
|
+
post_install_message:
|
216
|
+
rdoc_options: []
|
217
|
+
require_paths:
|
218
|
+
- lib
|
219
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
220
|
+
requirements:
|
221
|
+
- - ">="
|
222
|
+
- !ruby/object:Gem::Version
|
223
|
+
version: 3.0.0
|
224
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
225
|
+
requirements:
|
226
|
+
- - ">="
|
227
|
+
- !ruby/object:Gem::Version
|
228
|
+
version: '0'
|
229
|
+
requirements: []
|
230
|
+
rubygems_version: 3.5.3
|
231
|
+
signing_key:
|
232
|
+
specification_version: 4
|
233
|
+
summary: A Ruby + Rust powered RAG (Retrieval-Augmented Generation) system
|
234
|
+
test_files: []
|