leann 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +375 -0
- data/exe/leann +167 -0
- data/lib/generators/leann/install/install_generator.rb +51 -0
- data/lib/generators/leann/install/templates/migration.rb.erb +28 -0
- data/lib/leann/backend/base.rb +51 -0
- data/lib/leann/backend/leann_graph.rb +476 -0
- data/lib/leann/builder.rb +317 -0
- data/lib/leann/configuration.rb +148 -0
- data/lib/leann/embedding/base.rb +63 -0
- data/lib/leann/embedding/fastembed.rb +120 -0
- data/lib/leann/embedding/ollama.rb +194 -0
- data/lib/leann/embedding/openai.rb +149 -0
- data/lib/leann/embedding/ruby_llm.rb +57 -0
- data/lib/leann/errors.rb +71 -0
- data/lib/leann/index.rb +236 -0
- data/lib/leann/rails/active_record/index.rb +70 -0
- data/lib/leann/rails/active_record/passage.rb +56 -0
- data/lib/leann/rails/builder.rb +205 -0
- data/lib/leann/rails/railtie.rb +16 -0
- data/lib/leann/rails/searcher.rb +117 -0
- data/lib/leann/rails/storage/active_record_backend.rb +332 -0
- data/lib/leann/rails.rb +90 -0
- data/lib/leann/ruby_llm/search.rb +89 -0
- data/lib/leann/search_result.rb +195 -0
- data/lib/leann/searcher.rb +189 -0
- data/lib/leann/version.rb +3 -0
- data/lib/leann.rb +133 -0
- metadata +177 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: a59608a07bbd0265d0d049085ddfa8fb5e560464a1ab943baca3b9562e821a6c
|
|
4
|
+
data.tar.gz: de9180940fdab66734ebbab598e0a0a3d75e201b649c38530e5a3268017c5ddd
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 3fd7a484b92c4246fedc25f07692c3174043019706aa8b99e5e840f3d0daa43afe75542d71dd65c924cc3763669bc67adb9b8ea4ee67ac56b84c1b75cc61b11d
|
|
7
|
+
data.tar.gz: 9da1019a629a361a74986e8088f2c19d2e5f9b34cb6345d15e57a30184022c64cd299f6c93e9276b92b8c952378868cd633bdddb97065fb2ea865bca28557424
|
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Chris Hasiński
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,375 @@
|
|
|
1
|
+
# LEANN
|
|
2
|
+
|
|
3
|
+
**Lightweight vector search with 85-96% storage savings.**
|
|
4
|
+
|
|
5
|
+
LEANN stores only the graph structure, not the embeddings. During search, it recomputes embeddings on-the-fly for visited nodes only. This achieves massive storage reduction while maintaining search quality.
|
|
6
|
+
|
|
7
|
+
```ruby
|
|
8
|
+
# Build an index - embeddings are computed but NOT stored
|
|
9
|
+
Leann.build("knowledge_base") do
|
|
10
|
+
add "LEANN saves 85-96% storage compared to traditional vector databases."
|
|
11
|
+
add "It uses graph-only storage with on-demand embedding recomputation."
|
|
12
|
+
add_directory "docs/", extensions: [".md", ".txt"]
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Search semantically
|
|
16
|
+
results = Leann.search("knowledge_base", "storage efficiency")
|
|
17
|
+
puts results.first.text
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Storage Benchmark
|
|
21
|
+
|
|
22
|
+
At **1 million documents**:
|
|
23
|
+
|
|
24
|
+
| Embedding Model | Traditional HNSW | LEANN | Savings |
|
|
25
|
+
|-----------------|------------------|-------|---------:|
|
|
26
|
+
| Ollama (384 dim) | 1.67 GB | 251.8 MB | **85%** |
|
|
27
|
+
| OpenAI (1536 dim) | 5.96 GB | 251.8 MB | **96%** |
|
|
28
|
+
|
|
29
|
+
Full benchmark across scales:
|
|
30
|
+
|
|
31
|
+
| Documents | Traditional HNSW | LEANN | Savings |
|
|
32
|
+
|----------:|------------------|-------|---------:|
|
|
33
|
+
| 1,000 | 1.7 MB | 257 KB | **85%** |
|
|
34
|
+
| 10,000 | 17 MB | 2.5 MB | **85%** |
|
|
35
|
+
| 100,000 | 171 MB | 25 MB | **85%** |
|
|
36
|
+
| 1,000,000 | 1.67 GB | 252 MB | **85%** |
|
|
37
|
+
|
|
38
|
+
*Measured with Ollama all-minilm (384 dimensions). OpenAI models achieve ~96% savings due to larger embeddings.*
|
|
39
|
+
|
|
40
|
+
## How It Works
|
|
41
|
+
|
|
42
|
+
**Traditional vector databases:**
|
|
43
|
+
```
|
|
44
|
+
[Documents] + [Embeddings] + [Index] → Large storage (1.67 GB for 1M docs)
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
**LEANN:**
|
|
48
|
+
```
|
|
49
|
+
[Documents] + [Graph-only] → Tiny storage (252 MB for 1M docs)
|
|
50
|
+
↓
|
|
51
|
+
Embeddings recomputed on-the-fly during search
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
The trade-off: search is slightly slower (requires API calls to recompute embeddings for visited nodes), but storage is dramatically smaller.
|
|
55
|
+
|
|
56
|
+
## Quick Start
|
|
57
|
+
|
|
58
|
+
### 1. Install
|
|
59
|
+
|
|
60
|
+
```ruby
|
|
61
|
+
gem 'leann'
|
|
62
|
+
gem 'ruby_llm' # Recommended - LEANN uses RubyLLM for embeddings automatically
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
### 2. Configure
|
|
66
|
+
|
|
67
|
+
```ruby
|
|
68
|
+
# If you have RubyLLM, just configure it - LEANN uses it automatically
|
|
69
|
+
RubyLLM.configure do |config|
|
|
70
|
+
config.openai_api_key = ENV["OPENAI_API_KEY"]
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Or configure LEANN directly (without RubyLLM)
|
|
74
|
+
Leann.configure do |config|
|
|
75
|
+
config.embedding_provider = :openai # or :ollama
|
|
76
|
+
config.openai_api_key = ENV["OPENAI_API_KEY"]
|
|
77
|
+
end
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### 3. Build & Search
|
|
81
|
+
|
|
82
|
+
```ruby
|
|
83
|
+
# Index some documents
|
|
84
|
+
Leann.build("my_index") do
|
|
85
|
+
add "Ruby is a dynamic programming language."
|
|
86
|
+
add "Rails is a web framework written in Ruby."
|
|
87
|
+
add "Sinatra is a lightweight Ruby web framework."
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Search
|
|
91
|
+
results = Leann.search("my_index", "web frameworks")
|
|
92
|
+
results.each { |r| puts "#{r.score.round(3)}: #{r.text}" }
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## Real-World Use Cases
|
|
96
|
+
|
|
97
|
+
### Documentation Search
|
|
98
|
+
|
|
99
|
+
```ruby
|
|
100
|
+
# Index your documentation
|
|
101
|
+
Leann.build("docs") do
|
|
102
|
+
add_directory "docs/", extensions: [".md"]
|
|
103
|
+
add_directory "guides/", extensions: [".md"]
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Search API
|
|
107
|
+
get "/search" do
|
|
108
|
+
results = Leann.search("docs", params[:q], limit: 10)
|
|
109
|
+
json results.map(&:to_h)
|
|
110
|
+
end
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### Code Search
|
|
114
|
+
|
|
115
|
+
```ruby
|
|
116
|
+
# Index your codebase
|
|
117
|
+
Leann.build("codebase", embedding: :ollama, model: "nomic-embed-text") do
|
|
118
|
+
add_directory "app/", extensions: [".rb"]
|
|
119
|
+
add_directory "lib/", extensions: [".rb"]
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Find relevant code
|
|
123
|
+
results = Leann.search("codebase", "user authentication", limit: 5)
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
### Local-First with Ollama
|
|
127
|
+
|
|
128
|
+
```ruby
|
|
129
|
+
# No API keys needed - runs entirely local
|
|
130
|
+
Leann.configure do |config|
|
|
131
|
+
config.embedding_provider = :ollama
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
Leann.build("local_index") do
|
|
135
|
+
add "Your private data stays on your machine."
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
results = Leann.search("local_index", "privacy")
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## RubyLLM Integration
|
|
142
|
+
|
|
143
|
+
LEANN works seamlessly with [RubyLLM](https://github.com/crmne/ruby_llm):
|
|
144
|
+
|
|
145
|
+
- **Embeddings**: If RubyLLM is present, LEANN uses it automatically for embeddings
|
|
146
|
+
- **RAG**: Use LEANN as a tool for retrieval-augmented generation
|
|
147
|
+
|
|
148
|
+
### Embeddings via RubyLLM
|
|
149
|
+
|
|
150
|
+
```ruby
|
|
151
|
+
# Just configure RubyLLM - LEANN detects and uses it automatically
|
|
152
|
+
RubyLLM.configure do |config|
|
|
153
|
+
config.openai_api_key = ENV["OPENAI_API_KEY"]
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# Build uses RubyLLM.embed under the hood
|
|
157
|
+
Leann.build("docs") do
|
|
158
|
+
add "Your documents here..."
|
|
159
|
+
end
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
### RAG with Search Tool
|
|
163
|
+
|
|
164
|
+
```ruby
|
|
165
|
+
require "leann"
|
|
166
|
+
require "leann/ruby_llm/search"
|
|
167
|
+
|
|
168
|
+
# Build your index
|
|
169
|
+
Leann.build("knowledge_base") do
|
|
170
|
+
add "LEANN saves 85-96% storage compared to traditional vector databases."
|
|
171
|
+
add "It uses graph-only storage for massive space savings."
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
# Create a RubyLLM chat with LEANN as a tool
|
|
175
|
+
chat = RubyLLM.chat(model: "gpt-4o")
|
|
176
|
+
.with_tool(Leann::RubyLLM::Search.new("knowledge_base"))
|
|
177
|
+
|
|
178
|
+
# The LLM will automatically search when needed
|
|
179
|
+
answer = chat.ask("How much storage does LEANN save?")
|
|
180
|
+
puts answer
|
|
181
|
+
# => "Based on my search, LEANN saves 85-96% storage compared to traditional vector databases..."
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
### Multiple Indexes
|
|
185
|
+
|
|
186
|
+
```ruby
|
|
187
|
+
require "leann/ruby_llm/search"
|
|
188
|
+
|
|
189
|
+
docs_tool = Leann::RubyLLM::Search.new("docs", name: "search_docs")
|
|
190
|
+
code_tool = Leann::RubyLLM::Search.new("codebase", name: "search_code")
|
|
191
|
+
|
|
192
|
+
chat = RubyLLM.chat(model: "gpt-4o")
|
|
193
|
+
.with_tools(docs_tool, code_tool)
|
|
194
|
+
|
|
195
|
+
chat.ask("How does authentication work in this project?")
|
|
196
|
+
# => LLM searches both indexes and synthesizes an answer
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
## API Reference
|
|
200
|
+
|
|
201
|
+
### Building Indexes
|
|
202
|
+
|
|
203
|
+
```ruby
|
|
204
|
+
# DSL style (recommended)
|
|
205
|
+
Leann.build("name") do
|
|
206
|
+
add "text", key: "value" # Add text with metadata
|
|
207
|
+
add_file "path/to/file.txt" # Add file contents
|
|
208
|
+
add_directory "docs/" # Add all files from directory
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# Programmatic style
|
|
212
|
+
builder = Leann::Builder.new("name", embedding: :openai)
|
|
213
|
+
builder.add("text one")
|
|
214
|
+
builder.add("text two", category: "example")
|
|
215
|
+
builder.save
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
### Searching
|
|
219
|
+
|
|
220
|
+
```ruby
|
|
221
|
+
# Simple search
|
|
222
|
+
results = Leann.search("index", "query")
|
|
223
|
+
|
|
224
|
+
# With options
|
|
225
|
+
results = Leann.search("index", "query",
|
|
226
|
+
limit: 10, # Max results
|
|
227
|
+
threshold: 0.7, # Min similarity
|
|
228
|
+
filters: { category: "docs" } # Metadata filter
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
# Working with results
|
|
232
|
+
results.each do |r|
|
|
233
|
+
puts r.id # Document ID
|
|
234
|
+
puts r.text # Document text
|
|
235
|
+
puts r.score # Similarity score (0-1)
|
|
236
|
+
puts r.metadata # Metadata hash
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
puts results.combined_text # All texts joined
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
### Configuration
|
|
243
|
+
|
|
244
|
+
```ruby
|
|
245
|
+
Leann.configure do |config|
|
|
246
|
+
# Embedding provider (defaults to :ruby_llm if available, otherwise :openai)
|
|
247
|
+
config.embedding_provider = :ruby_llm # :ruby_llm, :openai, :ollama, or :fastembed
|
|
248
|
+
|
|
249
|
+
# Provider-specific settings (only needed if not using RubyLLM)
|
|
250
|
+
config.openai_api_key = "sk-..."
|
|
251
|
+
config.ollama_host = "http://localhost:11434"
|
|
252
|
+
config.default_embedding_model = "text-embedding-3-small"
|
|
253
|
+
|
|
254
|
+
# Index settings
|
|
255
|
+
config.hnsw_m = 16 # Graph connectivity
|
|
256
|
+
config.hnsw_ef_construction = 200 # Build quality
|
|
257
|
+
end
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
## Rails Integration
|
|
261
|
+
|
|
262
|
+
LEANN provides ActiveRecord integration for storing indexes in your database instead of files.
|
|
263
|
+
|
|
264
|
+
### Setup
|
|
265
|
+
|
|
266
|
+
```bash
|
|
267
|
+
# Generate the migration
|
|
268
|
+
rails generate leann:install
|
|
269
|
+
rails db:migrate
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
This creates two tables:
|
|
273
|
+
- `leann_indexes` - Stores index metadata and graph configuration
|
|
274
|
+
- `leann_passages` - Stores documents with their text, metadata, and graph neighbors
|
|
275
|
+
|
|
276
|
+
### Usage
|
|
277
|
+
|
|
278
|
+
```ruby
|
|
279
|
+
require "leann/rails"
|
|
280
|
+
|
|
281
|
+
# Build an index (stored in database)
|
|
282
|
+
Leann::Rails.build("products") do
|
|
283
|
+
add "Red running shoes for athletes", category: "shoes", price: 89.99
|
|
284
|
+
add "Blue denim jeans, slim fit", category: "pants", price: 59.99
|
|
285
|
+
add "White cotton t-shirt", category: "tops", price: 24.99
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
# Search
|
|
289
|
+
results = Leann::Rails.search("products", "comfortable footwear")
|
|
290
|
+
results.each { |r| puts "#{r.score.round(3)}: #{r.text}" }
|
|
291
|
+
|
|
292
|
+
# Other operations
|
|
293
|
+
Leann::Rails.exists?("products") # => true
|
|
294
|
+
Leann::Rails.list # => ["products"]
|
|
295
|
+
Leann::Rails.delete("products") # => true
|
|
296
|
+
```
|
|
297
|
+
|
|
298
|
+
### In Controllers
|
|
299
|
+
|
|
300
|
+
```ruby
|
|
301
|
+
class SearchController < ApplicationController
|
|
302
|
+
def index
|
|
303
|
+
@results = Leann::Rails.search("products", params[:q], limit: 10)
|
|
304
|
+
end
|
|
305
|
+
end
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
### With ActiveRecord Models
|
|
309
|
+
|
|
310
|
+
```ruby
|
|
311
|
+
# Direct access to index records
|
|
312
|
+
index = Leann::Rails::Index.find_by(name: "products")
|
|
313
|
+
index.document_count # => 3
|
|
314
|
+
index.search("shoes") # Search directly on the index
|
|
315
|
+
|
|
316
|
+
# Access passages
|
|
317
|
+
index.passages.each do |passage|
|
|
318
|
+
puts passage.text
|
|
319
|
+
puts passage.metadata
|
|
320
|
+
end
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
### Benefits of Database Storage
|
|
324
|
+
|
|
325
|
+
- **Transactions**: Index updates are ACID-compliant
|
|
326
|
+
- **Backups**: Indexes are included in database backups
|
|
327
|
+
- **Scaling**: Use read replicas for search-heavy workloads
|
|
328
|
+
- **Deployment**: No need to manage separate index files
|
|
329
|
+
|
|
330
|
+
## Embedding Providers
|
|
331
|
+
|
|
332
|
+
| Provider | Setup | Best For |
|
|
333
|
+
|----------|-------|----------|
|
|
334
|
+
| **RubyLLM** (default) | `gem 'ruby_llm'` | Unified API, multiple backends |
|
|
335
|
+
| **OpenAI** | `OPENAI_API_KEY` | Direct API access |
|
|
336
|
+
| **Ollama** | [Install Ollama](https://ollama.com) | Local, privacy-first |
|
|
337
|
+
| **FastEmbed** | `gem 'fastembed'` | Fast local, no server needed |
|
|
338
|
+
|
|
339
|
+
When RubyLLM is present, LEANN uses it automatically. This gives you access to all embedding providers RubyLLM supports (OpenAI, Ollama, and more) through a single configuration.
|
|
340
|
+
|
|
341
|
+
### FastEmbed (Local, Serverless)
|
|
342
|
+
|
|
343
|
+
FastEmbed provides fast local embeddings using ONNX Runtime - no API keys or running servers required:
|
|
344
|
+
|
|
345
|
+
```ruby
|
|
346
|
+
gem 'fastembed'
|
|
347
|
+
```
|
|
348
|
+
|
|
349
|
+
```ruby
|
|
350
|
+
Leann.configure do |config|
|
|
351
|
+
config.embedding_provider = :fastembed
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
# Or specify model explicitly
|
|
355
|
+
Leann.build("index", embedding: :fastembed, model: "BAAI/bge-small-en-v1.5") do
|
|
356
|
+
add "Your documents here..."
|
|
357
|
+
end
|
|
358
|
+
```
|
|
359
|
+
|
|
360
|
+
**Supported models:**
|
|
361
|
+
- `BAAI/bge-small-en-v1.5` (384 dim, default) - Fast English
|
|
362
|
+
- `BAAI/bge-base-en-v1.5` (768 dim) - Higher accuracy English
|
|
363
|
+
- `intfloat/multilingual-e5-small` (384 dim) - 100+ languages
|
|
364
|
+
- `nomic-ai/nomic-embed-text-v1.5` (768 dim) - Long context (8192 tokens)
|
|
365
|
+
|
|
366
|
+
## Requirements
|
|
367
|
+
|
|
368
|
+
- Ruby 3.0+
|
|
369
|
+
- RubyLLM gem (recommended) OR direct API access:
|
|
370
|
+
- OpenAI API key (for cloud embeddings)
|
|
371
|
+
- Ollama running locally (for local embeddings)
|
|
372
|
+
|
|
373
|
+
## License
|
|
374
|
+
|
|
375
|
+
MIT
|
data/exe/leann
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require "leann"
|
|
5
|
+
|
|
6
|
+
# Simple CLI for LEANN
|
|
7
|
+
class CLI
|
|
8
|
+
COMMANDS = {
|
|
9
|
+
"build" => "Build an index from files",
|
|
10
|
+
"search" => "Search an index",
|
|
11
|
+
"ask" => "Ask a question (RAG)",
|
|
12
|
+
"info" => "Show index information",
|
|
13
|
+
"list" => "List all indexes",
|
|
14
|
+
"delete" => "Delete an index",
|
|
15
|
+
"help" => "Show this help"
|
|
16
|
+
}.freeze
|
|
17
|
+
|
|
18
|
+
def run(args)
|
|
19
|
+
command = args.shift || "help"
|
|
20
|
+
|
|
21
|
+
case command
|
|
22
|
+
when "build"
|
|
23
|
+
build(args)
|
|
24
|
+
when "search"
|
|
25
|
+
search(args)
|
|
26
|
+
when "ask"
|
|
27
|
+
ask(args)
|
|
28
|
+
when "info"
|
|
29
|
+
info(args)
|
|
30
|
+
when "list"
|
|
31
|
+
list_indexes
|
|
32
|
+
when "delete"
|
|
33
|
+
delete(args)
|
|
34
|
+
when "help", "--help", "-h"
|
|
35
|
+
help
|
|
36
|
+
when "--version", "-v"
|
|
37
|
+
puts "leann #{Leann::VERSION}"
|
|
38
|
+
else
|
|
39
|
+
puts "Unknown command: #{command}"
|
|
40
|
+
puts "Run 'leann help' for usage."
|
|
41
|
+
exit 1
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
private
|
|
46
|
+
|
|
47
|
+
def build(args)
|
|
48
|
+
if args.empty?
|
|
49
|
+
puts "Usage: leann build <name> <directory>"
|
|
50
|
+
puts "Example: leann build docs ./docs"
|
|
51
|
+
exit 1
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
name = args.shift
|
|
55
|
+
directory = args.shift || "."
|
|
56
|
+
extensions = %w[.md .txt .rb .py .js .ts]
|
|
57
|
+
|
|
58
|
+
puts "Building index '#{name}' from #{directory}..."
|
|
59
|
+
|
|
60
|
+
Leann.build(name) do
|
|
61
|
+
add_directory directory, extensions: extensions
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
puts "Done!"
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def search(args)
|
|
68
|
+
if args.size < 2
|
|
69
|
+
puts "Usage: leann search <index> <query>"
|
|
70
|
+
puts "Example: leann search docs 'authentication'"
|
|
71
|
+
exit 1
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
name = args.shift
|
|
75
|
+
query = args.join(" ")
|
|
76
|
+
|
|
77
|
+
results = Leann.search(name, query, limit: 5)
|
|
78
|
+
|
|
79
|
+
if results.empty?
|
|
80
|
+
puts "No results found."
|
|
81
|
+
else
|
|
82
|
+
puts results.to_s
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def ask(args)
|
|
87
|
+
if args.size < 2
|
|
88
|
+
puts "Usage: leann ask <index> <question>"
|
|
89
|
+
puts "Example: leann ask docs 'How do I authenticate?'"
|
|
90
|
+
exit 1
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
name = args.shift
|
|
94
|
+
question = args.join(" ")
|
|
95
|
+
|
|
96
|
+
puts "Searching for context..."
|
|
97
|
+
answer = Leann.ask(name, question)
|
|
98
|
+
puts
|
|
99
|
+
puts answer
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def info(args)
|
|
103
|
+
if args.empty?
|
|
104
|
+
puts "Usage: leann info <index>"
|
|
105
|
+
exit 1
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
name = args.first
|
|
109
|
+
index = Leann.open(name)
|
|
110
|
+
puts index.to_s
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def list_indexes
|
|
114
|
+
indexes = Leann.list
|
|
115
|
+
if indexes.empty?
|
|
116
|
+
puts "No indexes found."
|
|
117
|
+
else
|
|
118
|
+
puts "Indexes:"
|
|
119
|
+
indexes.each { |i| puts " - #{i}" }
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def delete(args)
|
|
124
|
+
if args.empty?
|
|
125
|
+
puts "Usage: leann delete <index>"
|
|
126
|
+
exit 1
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
name = args.first
|
|
130
|
+
print "Delete index '#{name}'? [y/N] "
|
|
131
|
+
confirm = $stdin.gets.strip.downcase
|
|
132
|
+
|
|
133
|
+
if confirm == "y"
|
|
134
|
+
Leann.delete(name)
|
|
135
|
+
puts "Deleted."
|
|
136
|
+
else
|
|
137
|
+
puts "Cancelled."
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def help
|
|
142
|
+
puts <<~HELP
|
|
143
|
+
LEANN - Lightweight vector search and RAG for Ruby
|
|
144
|
+
|
|
145
|
+
Usage: leann <command> [options]
|
|
146
|
+
|
|
147
|
+
Commands:
|
|
148
|
+
HELP
|
|
149
|
+
|
|
150
|
+
COMMANDS.each do |cmd, desc|
|
|
151
|
+
puts " #{cmd.ljust(10)} #{desc}"
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
puts <<~EXAMPLES
|
|
155
|
+
|
|
156
|
+
Examples:
|
|
157
|
+
leann build docs ./documentation
|
|
158
|
+
leann search docs "authentication"
|
|
159
|
+
leann ask docs "How do I set up OAuth?"
|
|
160
|
+
leann info docs
|
|
161
|
+
leann list
|
|
162
|
+
leann delete docs
|
|
163
|
+
EXAMPLES
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
CLI.new.run(ARGV)
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "rails/generators"
|
|
4
|
+
require "rails/generators/active_record"
|
|
5
|
+
|
|
6
|
+
module Leann
|
|
7
|
+
module Generators
|
|
8
|
+
class InstallGenerator < ::Rails::Generators::Base
|
|
9
|
+
include ::Rails::Generators::Migration
|
|
10
|
+
|
|
11
|
+
source_root File.expand_path("templates", __dir__)
|
|
12
|
+
|
|
13
|
+
desc "Creates LEANN database tables for storing vector indexes"
|
|
14
|
+
|
|
15
|
+
def self.next_migration_number(dirname)
|
|
16
|
+
::ActiveRecord::Generators::Base.next_migration_number(dirname)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def create_migration_file
|
|
20
|
+
migration_template(
|
|
21
|
+
"migration.rb.erb",
|
|
22
|
+
"db/migrate/create_leann_tables.rb"
|
|
23
|
+
)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def show_instructions
|
|
27
|
+
say ""
|
|
28
|
+
say "LEANN tables will be created!", :green
|
|
29
|
+
say ""
|
|
30
|
+
say "Next steps:"
|
|
31
|
+
say " 1. Run migrations: rails db:migrate"
|
|
32
|
+
say " 2. Configure LEANN in an initializer:"
|
|
33
|
+
say ""
|
|
34
|
+
say " # config/initializers/leann.rb"
|
|
35
|
+
say " Leann.configure do |config|"
|
|
36
|
+
say " config.embedding_provider = :openai"
|
|
37
|
+
say " config.openai_api_key = ENV['OPENAI_API_KEY']"
|
|
38
|
+
say " end"
|
|
39
|
+
say ""
|
|
40
|
+
say " 3. Build and search indexes:"
|
|
41
|
+
say ""
|
|
42
|
+
say " Leann::Rails.build('products') do"
|
|
43
|
+
say " add 'Red running shoes', category: 'shoes'"
|
|
44
|
+
say " end"
|
|
45
|
+
say ""
|
|
46
|
+
say " results = Leann::Rails.search('products', 'comfortable footwear')"
|
|
47
|
+
say ""
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class CreateLeannTables < ActiveRecord::Migration<%= "[#{ActiveRecord::Migration.current_version}]" rescue "[7.0]" %>
|
|
4
|
+
def change
|
|
5
|
+
create_table :leann_indexes do |t|
|
|
6
|
+
t.string :name, null: false, index: { unique: true }
|
|
7
|
+
t.string :embedding_provider, null: false
|
|
8
|
+
t.string :embedding_model
|
|
9
|
+
t.integer :dimensions, null: false
|
|
10
|
+
t.text :config # JSON: stores hnsw_m, hnsw_ef_construction, entry_point_id, max_level
|
|
11
|
+
|
|
12
|
+
t.timestamps
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
create_table :leann_passages do |t|
|
|
16
|
+
t.references :leann_index, null: false, foreign_key: { to_table: :leann_indexes, on_delete: :cascade }
|
|
17
|
+
t.string :external_id, null: false
|
|
18
|
+
t.text :text, null: false
|
|
19
|
+
t.text :metadata # JSON: stores user-defined metadata
|
|
20
|
+
t.text :neighbors # JSON: stores neighbor IDs for graph traversal
|
|
21
|
+
|
|
22
|
+
t.timestamps
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
add_index :leann_passages, [:leann_index_id, :external_id], unique: true
|
|
26
|
+
add_index :leann_passages, :external_id
|
|
27
|
+
end
|
|
28
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Leann
|
|
4
|
+
module Backend
|
|
5
|
+
# Base class for vector storage backends
|
|
6
|
+
#
|
|
7
|
+
# Subclasses must implement:
|
|
8
|
+
# - #build(embeddings, ids, path) - build index from embeddings
|
|
9
|
+
# - #search(query_embedding, limit:) - search for nearest neighbors
|
|
10
|
+
# - .load(path) - load existing index
|
|
11
|
+
#
|
|
12
|
+
class Base
|
|
13
|
+
# @return [Integer] Embedding dimensions
|
|
14
|
+
attr_reader :dimensions
|
|
15
|
+
|
|
16
|
+
# @param dimensions [Integer]
|
|
17
|
+
def initialize(dimensions:)
|
|
18
|
+
@dimensions = dimensions
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Build an index from embeddings
|
|
22
|
+
#
|
|
23
|
+
# @param embeddings [Array<Array<Float>>]
|
|
24
|
+
# @param ids [Array<String>]
|
|
25
|
+
# @param path [String] Index path
|
|
26
|
+
# @raise [NotImplementedError]
|
|
27
|
+
def build(embeddings, ids, path)
|
|
28
|
+
raise NotImplementedError, "Subclasses must implement #build"
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Search for nearest neighbors
|
|
32
|
+
#
|
|
33
|
+
# @param query_embedding [Array<Float>]
|
|
34
|
+
# @param limit [Integer]
|
|
35
|
+
# @return [Array<Array(String, Float)>] Array of [id, score] pairs
|
|
36
|
+
# @raise [NotImplementedError]
|
|
37
|
+
def search(query_embedding, limit:)
|
|
38
|
+
raise NotImplementedError, "Subclasses must implement #search"
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Load an existing index
|
|
42
|
+
#
|
|
43
|
+
# @param path [String]
|
|
44
|
+
# @return [Base]
|
|
45
|
+
# @raise [NotImplementedError]
|
|
46
|
+
def self.load(path)
|
|
47
|
+
raise NotImplementedError, "Subclasses must implement .load"
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|