leann 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,189 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module Leann
6
+ # Handles search operations on an index
7
+ class Searcher
8
+ # @return [Index]
9
+ attr_reader :index
10
+
11
+ # @param index [Index]
12
+ def initialize(index)
13
+ @index = index
14
+ @backend = nil
15
+ @embedding_provider = nil
16
+ @offsets = nil
17
+ @passages_cache = nil
18
+ end
19
+
20
+ # Search the index
21
+ #
22
+ # @param query [String] Search query
23
+ # @param limit [Integer] Maximum results (default: 5)
24
+ # @param threshold [Float, nil] Minimum score threshold (0.0-1.0)
25
+ # @param filters [Hash, nil] Metadata filters
26
+ # @return [SearchResults]
27
+ #
28
+ # @example Basic search
29
+ # results = searcher.search("machine learning")
30
+ #
31
+ # @example With options
32
+ # results = searcher.search("auth", limit: 10, threshold: 0.7)
33
+ #
34
+ # @example With metadata filters
35
+ # results = searcher.search("query", filters: { category: "docs" })
36
+ def search(query, limit: 5, threshold: nil, filters: nil)
37
+ start_time = Time.now
38
+
39
+ # Compute query embedding
40
+ query_embedding = embedding_provider.compute([query]).first
41
+
42
+ # Load all passages for on-the-fly embedding computation
43
+ passages = load_all_passages
44
+
45
+ # Search with on-the-fly embedding recomputation
46
+ raw_results = backend.search(
47
+ query_embedding,
48
+ embedding_provider: embedding_provider,
49
+ passages: passages,
50
+ limit: limit * 2
51
+ )
52
+
53
+ # Load passages and build results
54
+ results = raw_results.map do |id, score|
55
+ passage = load_passage(id)
56
+ next unless passage
57
+
58
+ SearchResult.new(
59
+ id: id,
60
+ text: passage[:text],
61
+ score: score,
62
+ metadata: passage[:metadata] || {}
63
+ )
64
+ end.compact
65
+
66
+ # Apply threshold filter
67
+ results = results.select { |r| r.score >= threshold } if threshold
68
+
69
+ # Apply metadata filters
70
+ results = apply_filters(results, filters) if filters
71
+
72
+ # Limit and sort results
73
+ results = results.sort.first(limit)
74
+
75
+ duration = Time.now - start_time
76
+
77
+ SearchResults.new(results, query: query, duration: duration)
78
+ end
79
+
80
+ private
81
+
82
+ def backend
83
+ @backend ||= load_backend
84
+ end
85
+
86
+ def load_backend
87
+ require_relative "backend/leann_graph"
88
+ Backend::LeannGraph.load(index.path)
89
+ end
90
+
91
+ def load_all_passages
92
+ return @passages_cache if @passages_cache
93
+
94
+ @passages_cache = {}
95
+ passages_file = "#{index.path}#{Index::PASSAGES_SUFFIX}"
96
+ return @passages_cache unless File.exist?(passages_file)
97
+
98
+ File.foreach(passages_file) do |line|
99
+ doc = JSON.parse(line, symbolize_names: true)
100
+ @passages_cache[doc[:id]] = doc[:text]
101
+ end
102
+
103
+ @passages_cache
104
+ end
105
+
106
+ def embedding_provider
107
+ @embedding_provider ||= load_embedding_provider
108
+ end
109
+
110
+ def load_embedding_provider
111
+ require_relative "embedding/base"
112
+
113
+ case index.embedding_provider
114
+ when :ruby_llm
115
+ require_relative "embedding/ruby_llm"
116
+ Embedding::RubyLLM.new(model: index.embedding_model)
117
+ when :openai
118
+ require_relative "embedding/openai"
119
+ Embedding::OpenAI.new(model: index.embedding_model)
120
+ when :ollama
121
+ require_relative "embedding/ollama"
122
+ Embedding::Ollama.new(model: index.embedding_model)
123
+ when :fastembed
124
+ require_relative "embedding/fastembed"
125
+ Embedding::FastEmbed.new(model: index.embedding_model)
126
+ else
127
+ raise ConfigurationError, "Unknown embedding provider: #{index.embedding_provider}"
128
+ end
129
+ end
130
+
131
+ def offsets
132
+ @offsets ||= load_offsets
133
+ end
134
+
135
+ def load_offsets
136
+ offsets_file = "#{index.path}#{Index::OFFSETS_SUFFIX}"
137
+ return {} unless File.exist?(offsets_file)
138
+
139
+ JSON.parse(File.read(offsets_file))
140
+ rescue JSON::ParserError
141
+ {}
142
+ end
143
+
144
+ def load_passage(id)
145
+ passages_file = "#{index.path}#{Index::PASSAGES_SUFFIX}"
146
+ return nil unless File.exist?(passages_file)
147
+
148
+ offset = offsets[id]
149
+
150
+ if offset
151
+ # Fast random access using offset
152
+ File.open(passages_file, "r") do |f|
153
+ f.seek(offset)
154
+ line = f.gets
155
+ return JSON.parse(line, symbolize_names: true) if line
156
+ end
157
+ else
158
+ # Fallback to linear scan (slower but works without offsets)
159
+ File.foreach(passages_file) do |line|
160
+ doc = JSON.parse(line, symbolize_names: true)
161
+ return doc if doc[:id] == id
162
+ end
163
+ end
164
+
165
+ nil
166
+ rescue JSON::ParserError
167
+ nil
168
+ end
169
+
170
+ def apply_filters(results, filters)
171
+ results.select do |result|
172
+ filters.all? do |key, value|
173
+ metadata_value = result.metadata[key.to_sym]
174
+
175
+ case value
176
+ when Range
177
+ value.cover?(metadata_value)
178
+ when Array
179
+ value.include?(metadata_value)
180
+ when Regexp
181
+ value.match?(metadata_value.to_s)
182
+ else
183
+ metadata_value == value
184
+ end
185
+ end
186
+ end
187
+ end
188
+ end
189
+ end
@@ -0,0 +1,3 @@
1
+ module Leann
2
+ VERSION = "0.1.0"
3
+ end
data/lib/leann.rb ADDED
@@ -0,0 +1,133 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "leann/version"
4
+ require_relative "leann/configuration"
5
+ require_relative "leann/errors"
6
+ require_relative "leann/search_result"
7
+ require_relative "leann/embedding/base"
8
+ require_relative "leann/embedding/openai"
9
+ require_relative "leann/embedding/ollama"
10
+ require_relative "leann/backend/base"
11
+ require_relative "leann/backend/leann_graph"
12
+ require_relative "leann/index"
13
+ require_relative "leann/builder"
14
+ require_relative "leann/searcher"
15
+
16
+ # LEANN - Lightweight Embedding-Aware Neural Neighbor search
17
+ #
18
+ # A Ruby gem for building and searching vector indexes with minimal storage.
19
+ # Stores only the graph structure, achieving 85-96% storage savings by
20
+ # recomputing embeddings on-the-fly during search.
21
+ #
22
+ # @example Quick start - build an index
23
+ # Leann.build("knowledge_base") do
24
+ # add "LEANN saves 85-96% storage compared to traditional vector databases."
25
+ # add "It uses graph-only storage with on-demand recomputation."
26
+ # end
27
+ #
28
+ # @example Search
29
+ # results = Leann.search("knowledge_base", "storage savings")
30
+ # results.each { |r| puts "#{r.score}: #{r.text}" }
31
+ #
32
+ module Leann
33
+ class << self
34
+ # Global configuration
35
+ # @return [Configuration]
36
+ def configuration
37
+ @configuration ||= Configuration.new
38
+ end
39
+
40
+ # Configure Leann globally
41
+ #
42
+ # @example
43
+ # Leann.configure do |config|
44
+ # config.embedding_provider = :openai
45
+ # config.openai_api_key = ENV["OPENAI_API_KEY"]
46
+ # end
47
+ #
48
+ # @yield [Configuration]
49
+ def configure
50
+ yield(configuration)
51
+ end
52
+
53
+ # Build a new index with a DSL block
54
+ #
55
+ # @param name [String] Index name (will be created in current directory or specified path)
56
+ # @param options [Hash] Options for building
57
+ # @option options [Symbol] :embedding (:openai) Embedding provider (:openai, :ollama, :ruby_llm)
58
+ # @option options [String] :model Embedding model name
59
+ # @option options [String] :path Custom path for index storage
60
+ #
61
+ # @example Simple usage
62
+ # Leann.build("my_index") do
63
+ # add "First document"
64
+ # add "Second document"
65
+ # end
66
+ #
67
+ # @example With metadata
68
+ # Leann.build("docs", embedding: :ollama) do
69
+ # add "Content here", source: "file.md", chapter: 1
70
+ # add_file "README.md"
71
+ # add_directory "docs/", pattern: "**/*.md"
72
+ # end
73
+ #
74
+ # @return [Index] The built index
75
+ def build(name, **options, &block)
76
+ builder = Builder.new(name, **options)
77
+ builder.instance_eval(&block) if block_given?
78
+ builder.save
79
+ end
80
+
81
+ # Search an existing index
82
+ #
83
+ # @param name [String] Index name or path
84
+ # @param query [String] Search query
85
+ # @param limit [Integer] Maximum results (default: 5)
86
+ # @param threshold [Float] Minimum similarity score (0.0-1.0)
87
+ # @param filters [Hash] Metadata filters
88
+ #
89
+ # @example Basic search
90
+ # results = Leann.search("my_index", "machine learning")
91
+ #
92
+ # @example With filters
93
+ # results = Leann.search("docs", "auth", limit: 10, filters: { chapter: 1..5 })
94
+ #
95
+ # @return [Array<SearchResult>] Search results
96
+ def search(name, query, limit: 5, threshold: nil, filters: nil)
97
+ index = Index.open(name)
98
+ index.search(query, limit: limit, threshold: threshold, filters: filters)
99
+ end
100
+
101
+ # Open an existing index for advanced operations
102
+ #
103
+ # @param name [String] Index name or path
104
+ # @return [Index]
105
+ def open(name)
106
+ Index.open(name)
107
+ end
108
+
109
+ # List all indexes in a directory
110
+ #
111
+ # @param path [String] Directory to scan (default: current directory)
112
+ # @return [Array<String>] Index names
113
+ def list(path: ".")
114
+ Index.list(path)
115
+ end
116
+
117
+ # Check if an index exists
118
+ #
119
+ # @param name [String] Index name or path
120
+ # @return [Boolean]
121
+ def exists?(name)
122
+ Index.exists?(name)
123
+ end
124
+
125
+ # Delete an index
126
+ #
127
+ # @param name [String] Index name or path
128
+ # @return [Boolean] true if deleted
129
+ def delete(name)
130
+ Index.delete(name)
131
+ end
132
+ end
133
+ end
metadata ADDED
@@ -0,0 +1,177 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: leann
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Chris Hasiński
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2026-05-15 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: hnswlib
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.9'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.9'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '1.17'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '1.17'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '13.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '13.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '3.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '3.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: webmock
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '3.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '3.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: vcr
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '6.0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '6.0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rubocop
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '1.0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '1.0'
111
+ description: |
112
+ LEANN (Lightweight Embedding-Aware Neural Neighbor) is a Ruby gem for
113
+ building and searching vector indexes with minimal storage. It provides
114
+ semantic search and RAG capabilities with a beautiful, simple API.
115
+ Supports multiple embedding providers: RubyLLM, OpenAI, Ollama, and FastEmbed.
116
+ email:
117
+ - krzysztof.hasinski@gmail.com
118
+ executables:
119
+ - leann
120
+ extensions: []
121
+ extra_rdoc_files: []
122
+ files:
123
+ - LICENSE.txt
124
+ - README.md
125
+ - exe/leann
126
+ - lib/generators/leann/install/install_generator.rb
127
+ - lib/generators/leann/install/templates/migration.rb.erb
128
+ - lib/leann.rb
129
+ - lib/leann/backend/base.rb
130
+ - lib/leann/backend/leann_graph.rb
131
+ - lib/leann/builder.rb
132
+ - lib/leann/configuration.rb
133
+ - lib/leann/embedding/base.rb
134
+ - lib/leann/embedding/fastembed.rb
135
+ - lib/leann/embedding/ollama.rb
136
+ - lib/leann/embedding/openai.rb
137
+ - lib/leann/embedding/ruby_llm.rb
138
+ - lib/leann/errors.rb
139
+ - lib/leann/index.rb
140
+ - lib/leann/rails.rb
141
+ - lib/leann/rails/active_record/index.rb
142
+ - lib/leann/rails/active_record/passage.rb
143
+ - lib/leann/rails/builder.rb
144
+ - lib/leann/rails/railtie.rb
145
+ - lib/leann/rails/searcher.rb
146
+ - lib/leann/rails/storage/active_record_backend.rb
147
+ - lib/leann/ruby_llm/search.rb
148
+ - lib/leann/search_result.rb
149
+ - lib/leann/searcher.rb
150
+ - lib/leann/version.rb
151
+ homepage: https://github.com/khasinski/leann-rb
152
+ licenses:
153
+ - MIT
154
+ metadata:
155
+ homepage_uri: https://github.com/khasinski/leann-rb
156
+ source_code_uri: https://github.com/khasinski/leann-rb
157
+ changelog_uri: https://github.com/khasinski/leann-rb/blob/main/CHANGELOG.md
158
+ post_install_message:
159
+ rdoc_options: []
160
+ require_paths:
161
+ - lib
162
+ required_ruby_version: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: 3.0.0
167
+ required_rubygems_version: !ruby/object:Gem::Requirement
168
+ requirements:
169
+ - - ">="
170
+ - !ruby/object:Gem::Version
171
+ version: '0'
172
+ requirements: []
173
+ rubygems_version: 3.5.22
174
+ signing_key:
175
+ specification_version: 4
176
+ summary: Lightweight vector search and RAG for Ruby
177
+ test_files: []