leann 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +375 -0
- data/exe/leann +167 -0
- data/lib/generators/leann/install/install_generator.rb +51 -0
- data/lib/generators/leann/install/templates/migration.rb.erb +28 -0
- data/lib/leann/backend/base.rb +51 -0
- data/lib/leann/backend/leann_graph.rb +476 -0
- data/lib/leann/builder.rb +317 -0
- data/lib/leann/configuration.rb +148 -0
- data/lib/leann/embedding/base.rb +63 -0
- data/lib/leann/embedding/fastembed.rb +120 -0
- data/lib/leann/embedding/ollama.rb +194 -0
- data/lib/leann/embedding/openai.rb +149 -0
- data/lib/leann/embedding/ruby_llm.rb +57 -0
- data/lib/leann/errors.rb +71 -0
- data/lib/leann/index.rb +236 -0
- data/lib/leann/rails/active_record/index.rb +70 -0
- data/lib/leann/rails/active_record/passage.rb +56 -0
- data/lib/leann/rails/builder.rb +205 -0
- data/lib/leann/rails/railtie.rb +16 -0
- data/lib/leann/rails/searcher.rb +117 -0
- data/lib/leann/rails/storage/active_record_backend.rb +332 -0
- data/lib/leann/rails.rb +90 -0
- data/lib/leann/ruby_llm/search.rb +89 -0
- data/lib/leann/search_result.rb +195 -0
- data/lib/leann/searcher.rb +189 -0
- data/lib/leann/version.rb +3 -0
- data/lib/leann.rb +133 -0
- metadata +177 -0
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Leann
|
|
4
|
+
module Rails
|
|
5
|
+
# ActiveRecord-based storage backend for LEANN graphs
|
|
6
|
+
#
|
|
7
|
+
# Stores graph structure (neighbor lists) in the passages table,
|
|
8
|
+
# avoiding the need for separate binary files.
|
|
9
|
+
#
|
|
10
|
+
class ActiveRecordBackend
|
|
11
|
+
attr_reader :index, :dimensions, :m, :ef_construction
|
|
12
|
+
|
|
13
|
+
# @param index [Leann::Rails::Index] The index record
|
|
14
|
+
def initialize(index)
|
|
15
|
+
@index = index
|
|
16
|
+
@dimensions = index.dimensions
|
|
17
|
+
config = index.config || {}
|
|
18
|
+
@m = config["hnsw_m"] || Leann.configuration.hnsw_m
|
|
19
|
+
@ef_construction = config["hnsw_ef_construction"] || Leann.configuration.hnsw_ef_construction
|
|
20
|
+
@entry_point_id = nil
|
|
21
|
+
@max_level = 0
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Build the graph from documents and embeddings
|
|
25
|
+
#
|
|
26
|
+
# @param documents [Array<Hash>] Documents with :id, :text, :metadata
|
|
27
|
+
# @param embeddings [Array<Array<Float>>] Embedding vectors
|
|
28
|
+
def build(documents, embeddings)
|
|
29
|
+
return if documents.empty?
|
|
30
|
+
|
|
31
|
+
puts "Building LEANN graph with #{documents.size} nodes (M=#{@m})..."
|
|
32
|
+
|
|
33
|
+
# Build in-memory graph first using the core algorithm
|
|
34
|
+
graph = build_graph(documents, embeddings)
|
|
35
|
+
|
|
36
|
+
# Store passages with neighbor information
|
|
37
|
+
store_passages(documents, graph)
|
|
38
|
+
|
|
39
|
+
# Store graph metadata in the index
|
|
40
|
+
update_index_metadata(graph)
|
|
41
|
+
|
|
42
|
+
puts "Graph built and stored in database: #{documents.size} passages"
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Search the graph
|
|
46
|
+
#
|
|
47
|
+
# @param query_embedding [Array<Float>] Query vector
|
|
48
|
+
# @param embedding_provider [Leann::Embedding::Base] Provider for recomputing embeddings
|
|
49
|
+
# @param passages [Hash] Passage texts by ID (for embedding recomputation)
|
|
50
|
+
# @param limit [Integer] Number of results
|
|
51
|
+
# @return [Array<Array>] Array of [id, score] pairs
|
|
52
|
+
def search(query_embedding, embedding_provider:, passages:, limit:)
|
|
53
|
+
return [] if @index.passages.empty?
|
|
54
|
+
|
|
55
|
+
# Load graph metadata
|
|
56
|
+
config = @index.config || {}
|
|
57
|
+
entry_point_id = config["entry_point_id"]
|
|
58
|
+
max_level = config["max_level"] || 0
|
|
59
|
+
|
|
60
|
+
return [] unless entry_point_id
|
|
61
|
+
|
|
62
|
+
# Perform HNSW search with on-the-fly embedding recomputation
|
|
63
|
+
ef_search = [limit * 2, 50].max
|
|
64
|
+
search_hnsw(query_embedding, entry_point_id, max_level, ef_search, embedding_provider, passages, limit)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
private
|
|
68
|
+
|
|
69
|
+
def build_graph(documents, embeddings)
|
|
70
|
+
# Simple HNSW-like graph construction
|
|
71
|
+
graph = {}
|
|
72
|
+
levels = {}
|
|
73
|
+
entry_point = nil
|
|
74
|
+
max_level = 0
|
|
75
|
+
|
|
76
|
+
documents.each_with_index do |doc, idx|
|
|
77
|
+
id = doc[:id]
|
|
78
|
+
embedding = embeddings[idx]
|
|
79
|
+
level = random_level
|
|
80
|
+
|
|
81
|
+
graph[id] = { embedding: embedding, neighbors: Array.new(level + 1) { [] } }
|
|
82
|
+
levels[id] = level
|
|
83
|
+
|
|
84
|
+
if entry_point.nil?
|
|
85
|
+
entry_point = id
|
|
86
|
+
max_level = level
|
|
87
|
+
else
|
|
88
|
+
# Insert into graph
|
|
89
|
+
insert_node(graph, id, embedding, entry_point, max_level, level)
|
|
90
|
+
|
|
91
|
+
if level > max_level
|
|
92
|
+
max_level = level
|
|
93
|
+
entry_point = id
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
{ graph: graph, entry_point: entry_point, max_level: max_level }
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def random_level
|
|
102
|
+
level = 0
|
|
103
|
+
ml = 1.0 / Math.log(@m)
|
|
104
|
+
while rand < Math.exp(-level / ml) && level < 16
|
|
105
|
+
level += 1
|
|
106
|
+
end
|
|
107
|
+
level
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def insert_node(graph, new_id, new_embedding, entry_point, max_level, node_level)
|
|
111
|
+
current = entry_point
|
|
112
|
+
|
|
113
|
+
# Traverse from top to node's level
|
|
114
|
+
(max_level).downto(node_level + 1) do |level|
|
|
115
|
+
current = greedy_search_level(graph, new_embedding, current, level)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Insert at each level
|
|
119
|
+
node_level.downto(0) do |level|
|
|
120
|
+
neighbors = search_level(graph, new_embedding, current, level, @ef_construction)
|
|
121
|
+
|
|
122
|
+
# Select M best neighbors
|
|
123
|
+
selected = select_neighbors(graph, new_embedding, neighbors, @m)
|
|
124
|
+
|
|
125
|
+
graph[new_id][:neighbors][level] = selected
|
|
126
|
+
|
|
127
|
+
# Add bidirectional connections
|
|
128
|
+
selected.each do |neighbor_id|
|
|
129
|
+
neighbor_neighbors = graph[neighbor_id][:neighbors][level] || []
|
|
130
|
+
neighbor_neighbors << new_id
|
|
131
|
+
|
|
132
|
+
# Prune if too many
|
|
133
|
+
if neighbor_neighbors.size > @m * 2
|
|
134
|
+
neighbor_embedding = graph[neighbor_id][:embedding]
|
|
135
|
+
graph[neighbor_id][:neighbors][level] = select_neighbors(
|
|
136
|
+
graph, neighbor_embedding, neighbor_neighbors, @m * 2
|
|
137
|
+
)
|
|
138
|
+
else
|
|
139
|
+
graph[neighbor_id][:neighbors][level] = neighbor_neighbors
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
current = selected.first if selected.any?
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def greedy_search_level(graph, query, entry, level)
|
|
148
|
+
current = entry
|
|
149
|
+
current_dist = cosine_distance(query, graph[current][:embedding])
|
|
150
|
+
|
|
151
|
+
loop do
|
|
152
|
+
changed = false
|
|
153
|
+
neighbors = graph[current][:neighbors][level] || []
|
|
154
|
+
|
|
155
|
+
neighbors.each do |neighbor|
|
|
156
|
+
next unless graph[neighbor]
|
|
157
|
+
|
|
158
|
+
dist = cosine_distance(query, graph[neighbor][:embedding])
|
|
159
|
+
if dist < current_dist
|
|
160
|
+
current = neighbor
|
|
161
|
+
current_dist = dist
|
|
162
|
+
changed = true
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
break unless changed
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
current
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def search_level(graph, query, entry, level, ef)
|
|
173
|
+
visited = Set.new([entry])
|
|
174
|
+
candidates = [[cosine_distance(query, graph[entry][:embedding]), entry]]
|
|
175
|
+
results = candidates.dup
|
|
176
|
+
|
|
177
|
+
while candidates.any?
|
|
178
|
+
candidates.sort_by!(&:first)
|
|
179
|
+
current_dist, current = candidates.shift
|
|
180
|
+
|
|
181
|
+
break if results.any? && current_dist > results.last.first
|
|
182
|
+
|
|
183
|
+
neighbors = graph[current][:neighbors][level] || []
|
|
184
|
+
neighbors.each do |neighbor|
|
|
185
|
+
next if visited.include?(neighbor)
|
|
186
|
+
next unless graph[neighbor]
|
|
187
|
+
|
|
188
|
+
visited << neighbor
|
|
189
|
+
dist = cosine_distance(query, graph[neighbor][:embedding])
|
|
190
|
+
|
|
191
|
+
if results.size < ef || dist < results.last.first
|
|
192
|
+
candidates << [dist, neighbor]
|
|
193
|
+
results << [dist, neighbor]
|
|
194
|
+
results.sort_by!(&:first)
|
|
195
|
+
results.pop if results.size > ef
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
results.map(&:last)
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def select_neighbors(graph, query, candidates, m)
|
|
204
|
+
return candidates if candidates.size <= m
|
|
205
|
+
|
|
206
|
+
scored = candidates.map do |id|
|
|
207
|
+
[cosine_distance(query, graph[id][:embedding]), id]
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
scored.sort_by(&:first).first(m).map(&:last)
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
def cosine_distance(a, b)
|
|
214
|
+
dot = 0.0
|
|
215
|
+
norm_a = 0.0
|
|
216
|
+
norm_b = 0.0
|
|
217
|
+
|
|
218
|
+
a.each_with_index do |val, i|
|
|
219
|
+
dot += val * b[i]
|
|
220
|
+
norm_a += val * val
|
|
221
|
+
norm_b += b[i] * b[i]
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
similarity = dot / (Math.sqrt(norm_a) * Math.sqrt(norm_b) + 1e-10)
|
|
225
|
+
1.0 - similarity
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
def store_passages(documents, graph_data)
|
|
229
|
+
graph = graph_data[:graph]
|
|
230
|
+
|
|
231
|
+
# Bulk insert passages
|
|
232
|
+
passage_records = documents.map do |doc|
|
|
233
|
+
node = graph[doc[:id]]
|
|
234
|
+
# Store only level-0 neighbors (most important for search)
|
|
235
|
+
neighbors = node[:neighbors][0] || []
|
|
236
|
+
|
|
237
|
+
{
|
|
238
|
+
leann_index_id: @index.id,
|
|
239
|
+
external_id: doc[:id],
|
|
240
|
+
text: doc[:text],
|
|
241
|
+
metadata: doc[:metadata] || {},
|
|
242
|
+
neighbors: neighbors,
|
|
243
|
+
created_at: Time.current,
|
|
244
|
+
updated_at: Time.current
|
|
245
|
+
}
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
Passage.insert_all(passage_records)
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
def update_index_metadata(graph_data)
|
|
252
|
+
@index.update!(
|
|
253
|
+
config: (@index.config || {}).merge(
|
|
254
|
+
"entry_point_id" => graph_data[:entry_point],
|
|
255
|
+
"max_level" => graph_data[:max_level],
|
|
256
|
+
"hnsw_m" => @m,
|
|
257
|
+
"hnsw_ef_construction" => @ef_construction
|
|
258
|
+
)
|
|
259
|
+
)
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
def search_hnsw(query_embedding, entry_point_id, max_level, ef, embedding_provider, passages, limit)
|
|
263
|
+
# Load passages with neighbors
|
|
264
|
+
passage_map = @index.passages.index_by(&:external_id)
|
|
265
|
+
|
|
266
|
+
return [] if passage_map.empty?
|
|
267
|
+
|
|
268
|
+
# Get entry point
|
|
269
|
+
entry = passage_map[entry_point_id]
|
|
270
|
+
return [] unless entry
|
|
271
|
+
|
|
272
|
+
# Cache for computed embeddings
|
|
273
|
+
embedding_cache = {}
|
|
274
|
+
|
|
275
|
+
get_embedding = lambda do |id|
|
|
276
|
+
return embedding_cache[id] if embedding_cache[id]
|
|
277
|
+
|
|
278
|
+
text = passages[id] || passage_map[id]&.text
|
|
279
|
+
return nil unless text
|
|
280
|
+
|
|
281
|
+
embedding_cache[id] = embedding_provider.compute([text]).first
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
entry_embedding = get_embedding.call(entry_point_id)
|
|
285
|
+
return [] unless entry_embedding
|
|
286
|
+
|
|
287
|
+
# Simple greedy search at level 0 (most passages only have level 0)
|
|
288
|
+
visited = Set.new([entry_point_id])
|
|
289
|
+
current_dist = cosine_distance(query_embedding, entry_embedding)
|
|
290
|
+
candidates = [[current_dist, entry_point_id]]
|
|
291
|
+
results = candidates.dup
|
|
292
|
+
|
|
293
|
+
while candidates.any?
|
|
294
|
+
candidates.sort_by!(&:first)
|
|
295
|
+
dist, current_id = candidates.shift
|
|
296
|
+
|
|
297
|
+
break if results.size >= ef && dist > results.last.first
|
|
298
|
+
|
|
299
|
+
# Get neighbors from database
|
|
300
|
+
current_passage = passage_map[current_id]
|
|
301
|
+
next unless current_passage
|
|
302
|
+
|
|
303
|
+
neighbors = current_passage.neighbor_ids
|
|
304
|
+
|
|
305
|
+
neighbors.each do |neighbor_id|
|
|
306
|
+
next if visited.include?(neighbor_id)
|
|
307
|
+
|
|
308
|
+
visited << neighbor_id
|
|
309
|
+
|
|
310
|
+
neighbor_embedding = get_embedding.call(neighbor_id)
|
|
311
|
+
next unless neighbor_embedding
|
|
312
|
+
|
|
313
|
+
neighbor_dist = cosine_distance(query_embedding, neighbor_embedding)
|
|
314
|
+
|
|
315
|
+
if results.size < ef || neighbor_dist < results.last.first
|
|
316
|
+
candidates << [neighbor_dist, neighbor_id]
|
|
317
|
+
results << [neighbor_dist, neighbor_id]
|
|
318
|
+
results.sort_by!(&:first)
|
|
319
|
+
results.pop if results.size > ef
|
|
320
|
+
end
|
|
321
|
+
end
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
# Convert distances to similarity scores
|
|
325
|
+
results.first(limit).map do |dist, id|
|
|
326
|
+
score = 1.0 - dist
|
|
327
|
+
[id, score]
|
|
328
|
+
end
|
|
329
|
+
end
|
|
330
|
+
end
|
|
331
|
+
end
|
|
332
|
+
end
|
data/lib/leann/rails.rb
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../leann"
|
|
4
|
+
|
|
5
|
+
module Leann
|
|
6
|
+
module Rails
|
|
7
|
+
autoload :Index, "leann/rails/active_record/index"
|
|
8
|
+
autoload :Passage, "leann/rails/active_record/passage"
|
|
9
|
+
autoload :ActiveRecordBackend, "leann/rails/storage/active_record_backend"
|
|
10
|
+
autoload :Builder, "leann/rails/builder"
|
|
11
|
+
autoload :Searcher, "leann/rails/searcher"
|
|
12
|
+
|
|
13
|
+
class << self
|
|
14
|
+
# Build a new index stored in the database
|
|
15
|
+
#
|
|
16
|
+
# @param name [String] Index name (unique identifier)
|
|
17
|
+
# @param options [Hash] Options for building
|
|
18
|
+
# @option options [Symbol] :embedding (:openai) Embedding provider
|
|
19
|
+
# @option options [String] :model Embedding model name
|
|
20
|
+
#
|
|
21
|
+
# @example
|
|
22
|
+
# Leann::Rails.build("products") do
|
|
23
|
+
# add "Red running shoes for athletes", category: "shoes"
|
|
24
|
+
# add "Blue denim jeans, slim fit", category: "pants"
|
|
25
|
+
# end
|
|
26
|
+
#
|
|
27
|
+
# @return [Leann::Rails::Index] The built index record
|
|
28
|
+
def build(name, **options, &block)
|
|
29
|
+
builder = Builder.new(name, **options)
|
|
30
|
+
builder.instance_eval(&block) if block_given?
|
|
31
|
+
builder.save
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Search an existing database index
|
|
35
|
+
#
|
|
36
|
+
# @param name [String] Index name
|
|
37
|
+
# @param query [String] Search query
|
|
38
|
+
# @param limit [Integer] Maximum results
|
|
39
|
+
# @param threshold [Float] Minimum similarity score
|
|
40
|
+
# @param filters [Hash] Metadata filters
|
|
41
|
+
#
|
|
42
|
+
# @example
|
|
43
|
+
# results = Leann::Rails.search("products", "comfortable shoes")
|
|
44
|
+
#
|
|
45
|
+
# @return [Leann::SearchResults]
|
|
46
|
+
def search(name, query, limit: 5, threshold: nil, filters: nil)
|
|
47
|
+
index = Index.find_by!(name: name)
|
|
48
|
+
searcher = Searcher.new(index)
|
|
49
|
+
searcher.search(query, limit: limit, threshold: threshold, filters: filters)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Open an existing index
|
|
53
|
+
#
|
|
54
|
+
# @param name [String] Index name
|
|
55
|
+
# @return [Leann::Rails::Index]
|
|
56
|
+
def open(name)
|
|
57
|
+
Index.find_by!(name: name)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Check if an index exists
|
|
61
|
+
#
|
|
62
|
+
# @param name [String] Index name
|
|
63
|
+
# @return [Boolean]
|
|
64
|
+
def exists?(name)
|
|
65
|
+
Index.exists?(name: name)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Delete an index and all its passages
|
|
69
|
+
#
|
|
70
|
+
# @param name [String] Index name
|
|
71
|
+
# @return [Boolean]
|
|
72
|
+
def delete(name)
|
|
73
|
+
index = Index.find_by(name: name)
|
|
74
|
+
return false unless index
|
|
75
|
+
|
|
76
|
+
index.destroy
|
|
77
|
+
true
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# List all indexes
|
|
81
|
+
#
|
|
82
|
+
# @return [Array<String>]
|
|
83
|
+
def list
|
|
84
|
+
Index.pluck(:name).sort
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
require "leann/rails/railtie" if defined?(::Rails::Railtie)
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../leann"
|
|
4
|
+
|
|
5
|
+
# Only require ruby_llm if not already defined (allows mocking in tests)
|
|
6
|
+
unless defined?(::RubyLLM::Tool)
|
|
7
|
+
begin
|
|
8
|
+
require "ruby_llm"
|
|
9
|
+
rescue LoadError
|
|
10
|
+
raise LoadError, "RubyLLM is required for Leann::RubyLLM::Search. Add 'ruby_llm' to your Gemfile."
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
module Leann
|
|
15
|
+
module RubyLLM
|
|
16
|
+
# A RubyLLM tool for searching LEANN indexes
|
|
17
|
+
#
|
|
18
|
+
# @example Basic usage
|
|
19
|
+
# chat = ::RubyLLM.chat(model: "gpt-4o")
|
|
20
|
+
# .with_tool(Leann::RubyLLM::Search.new("my_index"))
|
|
21
|
+
#
|
|
22
|
+
# chat.ask("What does LEANN do?")
|
|
23
|
+
# # => LLM searches the index and generates an answer
|
|
24
|
+
#
|
|
25
|
+
# @example Multiple indexes
|
|
26
|
+
# docs_search = Leann::RubyLLM::Search.new("docs", name: "search_docs")
|
|
27
|
+
# code_search = Leann::RubyLLM::Search.new("codebase", name: "search_code")
|
|
28
|
+
#
|
|
29
|
+
# chat = ::RubyLLM.chat(model: "gpt-4o")
|
|
30
|
+
# .with_tools(docs_search, code_search)
|
|
31
|
+
#
|
|
32
|
+
class Search < ::RubyLLM::Tool
|
|
33
|
+
# @param index_name [String] Name of the LEANN index to search
|
|
34
|
+
# @param name [String] Tool name (defaults to "leann_search")
|
|
35
|
+
# @param limit [Integer] Default number of results (default: 5)
|
|
36
|
+
def initialize(index_name, name: "leann_search", limit: 5)
|
|
37
|
+
@index_name = index_name
|
|
38
|
+
@default_limit = limit
|
|
39
|
+
@tool_name = name
|
|
40
|
+
super()
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def name
|
|
44
|
+
@tool_name
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def description
|
|
48
|
+
"Searches the '#{@index_name}' knowledge base for relevant documents. " \
|
|
49
|
+
"Use this to find information before answering questions."
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def params
|
|
53
|
+
::RubyLLM::Schema.new do
|
|
54
|
+
string :query,
|
|
55
|
+
description: "The search query to find relevant documents",
|
|
56
|
+
required: true
|
|
57
|
+
integer :limit,
|
|
58
|
+
description: "Maximum number of results to return (default: #{@default_limit})",
|
|
59
|
+
required: false
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def execute(query:, limit: nil)
|
|
64
|
+
limit ||= @default_limit
|
|
65
|
+
results = Leann.search(@index_name, query, limit: limit)
|
|
66
|
+
|
|
67
|
+
if results.empty?
|
|
68
|
+
{ found: false, message: "No relevant documents found for: #{query}" }
|
|
69
|
+
else
|
|
70
|
+
{
|
|
71
|
+
found: true,
|
|
72
|
+
count: results.size,
|
|
73
|
+
documents: results.map do |r|
|
|
74
|
+
{
|
|
75
|
+
text: r.text,
|
|
76
|
+
score: r.score.round(3),
|
|
77
|
+
metadata: r.metadata
|
|
78
|
+
}
|
|
79
|
+
end
|
|
80
|
+
}
|
|
81
|
+
end
|
|
82
|
+
rescue Leann::IndexNotFoundError
|
|
83
|
+
{ error: "Index '#{@index_name}' not found" }
|
|
84
|
+
rescue StandardError => e
|
|
85
|
+
{ error: e.message }
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Leann
|
|
4
|
+
# Represents a single search result
|
|
5
|
+
#
|
|
6
|
+
# @example
|
|
7
|
+
# results = Leann.search("my_index", "query")
|
|
8
|
+
# results.each do |result|
|
|
9
|
+
# puts result.text
|
|
10
|
+
# puts result.score
|
|
11
|
+
# puts result.metadata[:source]
|
|
12
|
+
# end
|
|
13
|
+
#
|
|
14
|
+
class SearchResult
|
|
15
|
+
# @return [String] Document ID
|
|
16
|
+
attr_reader :id
|
|
17
|
+
|
|
18
|
+
# @return [String] Document text
|
|
19
|
+
attr_reader :text
|
|
20
|
+
|
|
21
|
+
# @return [Float] Similarity score (higher is better)
|
|
22
|
+
attr_reader :score
|
|
23
|
+
|
|
24
|
+
# @return [Hash] Document metadata
|
|
25
|
+
attr_reader :metadata
|
|
26
|
+
|
|
27
|
+
# @param id [String]
|
|
28
|
+
# @param text [String]
|
|
29
|
+
# @param score [Float]
|
|
30
|
+
# @param metadata [Hash]
|
|
31
|
+
def initialize(id:, text:, score:, metadata: {})
|
|
32
|
+
@id = id
|
|
33
|
+
@text = text
|
|
34
|
+
@score = score.to_f
|
|
35
|
+
@metadata = metadata.transform_keys(&:to_sym)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Truncate text to a maximum length
|
|
39
|
+
# @param max_length [Integer]
|
|
40
|
+
# @param omission [String]
|
|
41
|
+
# @return [String]
|
|
42
|
+
def truncated_text(max_length = 100, omission: "...")
|
|
43
|
+
return text if text.length <= max_length
|
|
44
|
+
|
|
45
|
+
text[0, max_length - omission.length] + omission
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Human-readable string representation
|
|
49
|
+
# @return [String]
|
|
50
|
+
def to_s
|
|
51
|
+
"[#{format("%.3f", score)}] #{truncated_text(80)}"
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Detailed inspection
|
|
55
|
+
# @return [String]
|
|
56
|
+
def inspect
|
|
57
|
+
"#<Leann::SearchResult id=#{id.inspect} score=#{format("%.4f", score)} text=#{truncated_text(50).inspect}>"
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Convert to hash
|
|
61
|
+
# @return [Hash]
|
|
62
|
+
def to_h
|
|
63
|
+
{
|
|
64
|
+
id: id,
|
|
65
|
+
text: text,
|
|
66
|
+
score: score,
|
|
67
|
+
metadata: metadata
|
|
68
|
+
}
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Compare by score (for sorting)
|
|
72
|
+
# @param other [SearchResult]
|
|
73
|
+
# @return [Integer]
|
|
74
|
+
def <=>(other)
|
|
75
|
+
other.score <=> score # Descending order
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Check equality
|
|
79
|
+
# @param other [SearchResult]
|
|
80
|
+
# @return [Boolean]
|
|
81
|
+
def ==(other)
|
|
82
|
+
return false unless other.is_a?(SearchResult)
|
|
83
|
+
|
|
84
|
+
id == other.id && text == other.text && score == other.score
|
|
85
|
+
end
|
|
86
|
+
alias eql? ==
|
|
87
|
+
|
|
88
|
+
# Hash code for use as hash key
|
|
89
|
+
# @return [Integer]
|
|
90
|
+
def hash
|
|
91
|
+
[id, text, score].hash
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Collection of search results with utility methods
|
|
96
|
+
class SearchResults
|
|
97
|
+
include Enumerable
|
|
98
|
+
|
|
99
|
+
# @return [Array<SearchResult>]
|
|
100
|
+
attr_reader :results
|
|
101
|
+
|
|
102
|
+
# @return [String] Original query
|
|
103
|
+
attr_reader :query
|
|
104
|
+
|
|
105
|
+
# @return [Float] Search duration in seconds
|
|
106
|
+
attr_reader :duration
|
|
107
|
+
|
|
108
|
+
# @param results [Array<SearchResult>]
|
|
109
|
+
# @param query [String]
|
|
110
|
+
# @param duration [Float]
|
|
111
|
+
def initialize(results, query: nil, duration: nil)
|
|
112
|
+
@results = results
|
|
113
|
+
@query = query
|
|
114
|
+
@duration = duration
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Iterate over results
|
|
118
|
+
def each(&block)
|
|
119
|
+
results.each(&block)
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Number of results
|
|
123
|
+
# @return [Integer]
|
|
124
|
+
def size
|
|
125
|
+
results.size
|
|
126
|
+
end
|
|
127
|
+
alias length size
|
|
128
|
+
alias count size
|
|
129
|
+
|
|
130
|
+
# Check if empty
|
|
131
|
+
# @return [Boolean]
|
|
132
|
+
def empty?
|
|
133
|
+
results.empty?
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Get first result
|
|
137
|
+
# @return [SearchResult, nil]
|
|
138
|
+
def first
|
|
139
|
+
results.first
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Get top n results
|
|
143
|
+
# @param n [Integer]
|
|
144
|
+
# @return [Array<SearchResult>]
|
|
145
|
+
def top(n)
|
|
146
|
+
results.first(n)
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Get result by index
|
|
150
|
+
# @param index [Integer]
|
|
151
|
+
# @return [SearchResult, nil]
|
|
152
|
+
def [](index)
|
|
153
|
+
results[index]
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# Filter results by minimum score
|
|
157
|
+
# @param min_score [Float]
|
|
158
|
+
# @return [SearchResults]
|
|
159
|
+
def above(min_score)
|
|
160
|
+
filtered = results.select { |r| r.score >= min_score }
|
|
161
|
+
SearchResults.new(filtered, query: query, duration: duration)
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Get all texts
|
|
165
|
+
# @return [Array<String>]
|
|
166
|
+
def texts
|
|
167
|
+
results.map(&:text)
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# Join all texts
|
|
171
|
+
# @param separator [String]
|
|
172
|
+
# @return [String]
|
|
173
|
+
def combined_text(separator: "\n\n")
|
|
174
|
+
texts.join(separator)
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# Pretty print results
|
|
178
|
+
# @return [String]
|
|
179
|
+
def to_s
|
|
180
|
+
lines = ["Search results for: #{query.inspect}"]
|
|
181
|
+
lines << "Found #{size} results in #{format("%.3f", duration || 0)}s"
|
|
182
|
+
lines << "-" * 60
|
|
183
|
+
results.each_with_index do |r, i|
|
|
184
|
+
lines << "#{i + 1}. #{r}"
|
|
185
|
+
end
|
|
186
|
+
lines.join("\n")
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Convert to array of hashes
|
|
190
|
+
# @return [Array<Hash>]
|
|
191
|
+
def to_a
|
|
192
|
+
results.map(&:to_h)
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
end
|