leann 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Leann
4
+ module Rails
5
+ # ActiveRecord model for storing LEANN indexes
6
+ #
7
+ # @example
8
+ # index = Leann::Rails::Index.find_by(name: "products")
9
+ # index.search("running shoes")
10
+ #
11
+ class Index < ::ActiveRecord::Base
12
+ self.table_name = "leann_indexes"
13
+
14
+ has_many :passages,
15
+ class_name: "Leann::Rails::Passage",
16
+ foreign_key: :leann_index_id,
17
+ dependent: :delete_all
18
+
19
+ validates :name, presence: true, uniqueness: true
20
+ validates :embedding_provider, presence: true
21
+ validates :dimensions, presence: true, numericality: { greater_than: 0 }
22
+
23
+ serialize :config, coder: JSON
24
+
25
+ # Search this index
26
+ #
27
+ # @param query [String] Search query
28
+ # @param limit [Integer] Maximum results
29
+ # @param threshold [Float] Minimum similarity score
30
+ # @param filters [Hash] Metadata filters
31
+ # @return [Leann::SearchResults]
32
+ def search(query, limit: 5, threshold: nil, filters: nil)
33
+ searcher = Searcher.new(self)
34
+ searcher.search(query, limit: limit, threshold: threshold, filters: filters)
35
+ end
36
+
37
+ # Get number of documents
38
+ # @return [Integer]
39
+ def document_count
40
+ passages.count
41
+ end
42
+
43
+ # Get embedding provider as symbol
44
+ # @return [Symbol]
45
+ def embedding_provider_sym
46
+ embedding_provider.to_sym
47
+ end
48
+
49
+ # Index info as string
50
+ # @return [String]
51
+ def to_s
52
+ lines = [
53
+ "Index: #{name}",
54
+ " Documents: #{document_count}",
55
+ " Embedding: #{embedding_provider}/#{embedding_model}",
56
+ " Dimensions: #{dimensions}",
57
+ " Backend: active_record",
58
+ " Created: #{created_at&.strftime('%Y-%m-%d %H:%M:%S') || 'unknown'}"
59
+ ]
60
+ lines.join("\n")
61
+ end
62
+
63
+ # Detailed inspection
64
+ # @return [String]
65
+ def inspect
66
+ "#<Leann::Rails::Index id=#{id} name=#{name.inspect} documents=#{document_count}>"
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Leann
4
+ module Rails
5
+ # ActiveRecord model for storing passages/documents within an index
6
+ #
7
+ # @example
8
+ # passage = Leann::Rails::Passage.find(123)
9
+ # passage.text # => "Document content..."
10
+ # passage.metadata # => { category: "docs" }
11
+ #
12
+ class Passage < ::ActiveRecord::Base
13
+ self.table_name = "leann_passages"
14
+
15
+ belongs_to :index,
16
+ class_name: "Leann::Rails::Index",
17
+ foreign_key: :leann_index_id
18
+
19
+ validates :external_id, presence: true
20
+ validates :text, presence: true
21
+ validates :leann_index_id, presence: true
22
+
23
+ serialize :metadata, coder: JSON
24
+ serialize :neighbors, coder: JSON
25
+
26
+ # Get metadata with symbolized keys
27
+ # @return [Hash]
28
+ def metadata_sym
29
+ (metadata || {}).transform_keys(&:to_sym)
30
+ end
31
+
32
+ # Get neighbor IDs
33
+ # @return [Array<String>]
34
+ def neighbor_ids
35
+ neighbors || []
36
+ end
37
+
38
+ # Convert to hash for search results
39
+ # @return [Hash]
40
+ def to_h
41
+ {
42
+ id: external_id,
43
+ text: text,
44
+ metadata: metadata_sym
45
+ }
46
+ end
47
+
48
+ # Detailed inspection
49
+ # @return [String]
50
+ def inspect
51
+ text_preview = text.length > 50 ? "#{text[0..47]}..." : text
52
+ "#<Leann::Rails::Passage id=#{id} external_id=#{external_id.inspect} text=#{text_preview.inspect}>"
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,205 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "securerandom"
4
+
5
+ module Leann
6
+ module Rails
7
+ # Builds a new LEANN index stored in the database
8
+ #
9
+ # @example DSL style
10
+ # Leann::Rails.build("products") do
11
+ # add "Red running shoes", category: "shoes"
12
+ # add "Blue denim jeans", category: "pants"
13
+ # end
14
+ #
15
+ # @example Programmatic style
16
+ # builder = Leann::Rails::Builder.new("products")
17
+ # builder.add("Red running shoes", category: "shoes")
18
+ # builder.save
19
+ #
20
+ class Builder
21
+ # @return [String] Index name
22
+ attr_reader :name
23
+
24
+ # @return [Array<Hash>] Documents to be indexed
25
+ attr_reader :documents
26
+
27
+ # @param name [String] Index name (must be unique)
28
+ # @param embedding [Symbol] Embedding provider (:ruby_llm, :openai, :ollama, :fastembed)
29
+ # @param model [String, nil] Embedding model name
30
+ # @param force [Boolean] Overwrite existing index
31
+ def initialize(name, embedding: nil, model: nil, force: false)
32
+ @name = name
33
+ @embedding_provider = embedding || Leann.configuration.embedding_provider
34
+ @embedding_model = model || Leann.configuration.embedding_model_for(@embedding_provider)
35
+ @force = force
36
+ @documents = []
37
+
38
+ check_existing_index unless force
39
+ end
40
+
41
+ # Add a text document
42
+ #
43
+ # @param text [String] Document text
44
+ # @param metadata [Hash] Additional metadata
45
+ # @return [self]
46
+ def add(text, **metadata)
47
+ raise ArgumentError, "Text cannot be nil" if text.nil?
48
+ raise ArgumentError, "Text cannot be empty" if text.to_s.strip.empty?
49
+
50
+ doc = {
51
+ id: metadata.delete(:id) || SecureRandom.uuid,
52
+ text: text.to_s.strip,
53
+ metadata: metadata
54
+ }
55
+
56
+ @documents << doc
57
+ self
58
+ end
59
+
60
+ # Add document (alias for add)
61
+ alias << add
62
+
63
+ # Add content from a file
64
+ #
65
+ # @param file_path [String] Path to file
66
+ # @param metadata [Hash] Additional metadata
67
+ # @return [self]
68
+ def add_file(file_path, **metadata)
69
+ raise ArgumentError, "File not found: #{file_path}" unless File.exist?(file_path)
70
+
71
+ content = File.read(file_path)
72
+ file_metadata = {
73
+ source: file_path,
74
+ filename: File.basename(file_path),
75
+ extension: File.extname(file_path)
76
+ }.merge(metadata)
77
+
78
+ add(content, **file_metadata)
79
+ end
80
+
81
+ # Add all files from a directory
82
+ #
83
+ # @param directory [String] Directory path
84
+ # @param pattern [String] Glob pattern
85
+ # @param extensions [Array<String>, nil] Filter by extensions
86
+ # @param metadata [Hash] Additional metadata for all files
87
+ # @return [self]
88
+ def add_directory(directory, pattern: "**/*", extensions: nil, **metadata)
89
+ raise ArgumentError, "Directory not found: #{directory}" unless Dir.exist?(directory)
90
+
91
+ full_pattern = File.join(directory, pattern)
92
+ Dir.glob(full_pattern).each do |file_path|
93
+ next unless File.file?(file_path)
94
+ next if extensions && !extensions.include?(File.extname(file_path))
95
+
96
+ add_file(file_path, **metadata)
97
+ end
98
+
99
+ self
100
+ end
101
+
102
+ # Add multiple documents at once
103
+ #
104
+ # @param docs [Array<String>, Array<Hash>] Documents to add
105
+ # @return [self]
106
+ def add_all(docs)
107
+ docs.each do |doc|
108
+ case doc
109
+ when String
110
+ add(doc)
111
+ when Hash
112
+ text = doc.delete(:text) || doc.delete("text")
113
+ add(text, **doc.transform_keys(&:to_sym))
114
+ else
115
+ raise ArgumentError, "Invalid document type: #{doc.class}"
116
+ end
117
+ end
118
+
119
+ self
120
+ end
121
+
122
+ # Get number of documents added
123
+ # @return [Integer]
124
+ def count
125
+ @documents.size
126
+ end
127
+ alias size count
128
+
129
+ # Check if any documents have been added
130
+ # @return [Boolean]
131
+ def empty?
132
+ @documents.empty?
133
+ end
134
+
135
+ # Build and save the index to the database
136
+ # @return [Leann::Rails::Index] The built index record
137
+ def save
138
+ raise Leann::EmptyIndexError if empty?
139
+
140
+ puts "Building index '#{name}' with #{count} documents..."
141
+
142
+ # Delete existing if force mode
143
+ Index.find_by(name: name)&.destroy if @force
144
+
145
+ # Compute embeddings
146
+ embeddings = compute_embeddings
147
+
148
+ # Create index record
149
+ index = Index.create!(
150
+ name: name,
151
+ embedding_provider: @embedding_provider.to_s,
152
+ embedding_model: @embedding_model,
153
+ dimensions: embeddings.first&.size || 0,
154
+ config: {
155
+ hnsw_m: Leann.configuration.hnsw_m,
156
+ hnsw_ef_construction: Leann.configuration.hnsw_ef_construction
157
+ }
158
+ )
159
+
160
+ # Build and store graph
161
+ backend = ActiveRecordBackend.new(index)
162
+ backend.build(@documents, embeddings)
163
+
164
+ puts "Index '#{name}' created successfully!"
165
+
166
+ index
167
+ end
168
+ alias build save
169
+
170
+ private
171
+
172
+ def check_existing_index
173
+ raise Leann::IndexExistsError, name if Index.exists?(name: name)
174
+ end
175
+
176
+ def compute_embeddings
177
+ texts = @documents.map { |d| d[:text] }
178
+ embedding_provider.compute(texts)
179
+ end
180
+
181
+ def embedding_provider
182
+ @_embedding_provider ||= load_embedding_provider
183
+ end
184
+
185
+ def load_embedding_provider
186
+ case @embedding_provider
187
+ when :ruby_llm
188
+ require "leann/embedding/ruby_llm"
189
+ Leann::Embedding::RubyLLM.new(model: @embedding_model)
190
+ when :openai
191
+ require "leann/embedding/openai"
192
+ Leann::Embedding::OpenAI.new(model: @embedding_model)
193
+ when :ollama
194
+ require "leann/embedding/ollama"
195
+ Leann::Embedding::Ollama.new(model: @embedding_model)
196
+ when :fastembed
197
+ require "leann/embedding/fastembed"
198
+ Leann::Embedding::FastEmbed.new(model: @embedding_model)
199
+ else
200
+ raise Leann::ConfigurationError, "Unknown embedding provider: #{@embedding_provider}"
201
+ end
202
+ end
203
+ end
204
+ end
205
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Leann
4
+ module Rails
5
+ class Railtie < ::Rails::Railtie
6
+ initializer "leann.configure_rails" do
7
+ # Auto-configure based on Rails environment
8
+ end
9
+
10
+ # Expose generators
11
+ generators do
12
+ require "generators/leann/install/install_generator"
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,117 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Leann
4
+ module Rails
5
+ # Handles search operations on a database-stored index
6
+ class Searcher
7
+ # @return [Leann::Rails::Index]
8
+ attr_reader :index
9
+
10
+ # @param index [Leann::Rails::Index]
11
+ def initialize(index)
12
+ @index = index
13
+ @embedding_provider = nil
14
+ end
15
+
16
+ # Search the index
17
+ #
18
+ # @param query [String] Search query
19
+ # @param limit [Integer] Maximum results
20
+ # @param threshold [Float, nil] Minimum score threshold
21
+ # @param filters [Hash, nil] Metadata filters
22
+ # @return [Leann::SearchResults]
23
+ def search(query, limit: 5, threshold: nil, filters: nil)
24
+ start_time = Time.now
25
+
26
+ # Compute query embedding
27
+ query_embedding = embedding_provider.compute([query]).first
28
+
29
+ # Load all passages for embedding recomputation
30
+ passages = load_all_passages
31
+
32
+ # Search with on-the-fly embedding recomputation
33
+ backend = ActiveRecordBackend.new(index)
34
+ raw_results = backend.search(
35
+ query_embedding,
36
+ embedding_provider: embedding_provider,
37
+ passages: passages,
38
+ limit: limit * 2
39
+ )
40
+
41
+ # Build results
42
+ results = raw_results.map do |id, score|
43
+ passage = index.passages.find_by(external_id: id)
44
+ next unless passage
45
+
46
+ Leann::SearchResult.new(
47
+ id: id,
48
+ text: passage.text,
49
+ score: score,
50
+ metadata: passage.metadata_sym
51
+ )
52
+ end.compact
53
+
54
+ # Apply threshold filter
55
+ results = results.select { |r| r.score >= threshold } if threshold
56
+
57
+ # Apply metadata filters
58
+ results = apply_filters(results, filters) if filters
59
+
60
+ # Limit and sort results
61
+ results = results.sort.first(limit)
62
+
63
+ duration = Time.now - start_time
64
+
65
+ Leann::SearchResults.new(results, query: query, duration: duration)
66
+ end
67
+
68
+ private
69
+
70
+ def embedding_provider
71
+ @embedding_provider ||= load_embedding_provider
72
+ end
73
+
74
+ def load_embedding_provider
75
+ case index.embedding_provider_sym
76
+ when :ruby_llm
77
+ require "leann/embedding/ruby_llm"
78
+ Leann::Embedding::RubyLLM.new(model: index.embedding_model)
79
+ when :openai
80
+ require "leann/embedding/openai"
81
+ Leann::Embedding::OpenAI.new(model: index.embedding_model)
82
+ when :ollama
83
+ require "leann/embedding/ollama"
84
+ Leann::Embedding::Ollama.new(model: index.embedding_model)
85
+ when :fastembed
86
+ require "leann/embedding/fastembed"
87
+ Leann::Embedding::FastEmbed.new(model: index.embedding_model)
88
+ else
89
+ raise Leann::ConfigurationError, "Unknown embedding provider: #{index.embedding_provider}"
90
+ end
91
+ end
92
+
93
+ def load_all_passages
94
+ index.passages.pluck(:external_id, :text).to_h
95
+ end
96
+
97
+ def apply_filters(results, filters)
98
+ results.select do |result|
99
+ filters.all? do |key, value|
100
+ metadata_value = result.metadata[key.to_sym]
101
+
102
+ case value
103
+ when Range
104
+ value.cover?(metadata_value)
105
+ when Array
106
+ value.include?(metadata_value)
107
+ when Regexp
108
+ value.match?(metadata_value.to_s)
109
+ else
110
+ metadata_value == value
111
+ end
112
+ end
113
+ end
114
+ end
115
+ end
116
+ end
117
+ end