neighbor 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +16 -8
- data/lib/neighbor/model.rb +23 -7
- data/lib/neighbor/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5818fdfa27cc4b0678fb125e82d2fa0a3066ea173674ca541987b9084a94ac14
|
4
|
+
data.tar.gz: e874541532172dce9932a98506bd5ce7866a0f5c3e600ffd9d6473f226829368
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 96211da20caf70383018ca626cbd83bbda5e4ffaf95e5a9b5405686b192643916325f89a652b4ba9e34e17daa1b84e8045cc644b606bab8aa6ea339ed4d7ea0d
|
7
|
+
data.tar.gz: bf855dc34617d489618417b9c807969f828358de19462458b7c09e1743102a8b9967b32b09660fab35f35543b77d588297c69971edb7442af0995cdca3b6ff8e
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -117,18 +117,26 @@ For vector, add an approximate index to speed up queries. Create a migration wit
|
|
117
117
|
class AddIndexToItemsNeighborVector < ActiveRecord::Migration[7.0]
|
118
118
|
def change
|
119
119
|
add_index :items, :embedding, using: :ivfflat, opclass: :vector_l2_ops
|
120
|
+
# or with pgvector 0.5.0+
|
121
|
+
add_index :items, :embedding, using: :hnsw, opclass: :vector_l2_ops
|
120
122
|
end
|
121
123
|
end
|
122
124
|
```
|
123
125
|
|
124
126
|
Use `:vector_cosine_ops` for cosine distance and `:vector_ip_ops` for inner product.
|
125
127
|
|
126
|
-
Set the number of probes
|
128
|
+
Set the number of probes with IVFFlat
|
127
129
|
|
128
130
|
```ruby
|
129
131
|
Item.connection.execute("SET ivfflat.probes = 3")
|
130
132
|
```
|
131
133
|
|
134
|
+
Or the size of the dynamic candidate list with HNSW
|
135
|
+
|
136
|
+
```ruby
|
137
|
+
Item.connection.execute("SET hnsw.ef_search = 100")
|
138
|
+
```
|
139
|
+
|
132
140
|
## Examples
|
133
141
|
|
134
142
|
- [OpenAI Embeddings](#openai-embeddings)
|
@@ -139,14 +147,14 @@ Item.connection.execute("SET ivfflat.probes = 3")
|
|
139
147
|
Generate a model
|
140
148
|
|
141
149
|
```sh
|
142
|
-
rails generate model
|
150
|
+
rails generate model Document content:text embedding:vector{1536}
|
143
151
|
rails db:migrate
|
144
152
|
```
|
145
153
|
|
146
154
|
And add `has_neighbors`
|
147
155
|
|
148
156
|
```ruby
|
149
|
-
class
|
157
|
+
class Document < ApplicationRecord
|
150
158
|
has_neighbors :embedding
|
151
159
|
end
|
152
160
|
```
|
@@ -184,18 +192,18 @@ embeddings = fetch_embeddings(input)
|
|
184
192
|
Store the embeddings
|
185
193
|
|
186
194
|
```ruby
|
187
|
-
|
195
|
+
documents = []
|
188
196
|
input.zip(embeddings) do |content, embedding|
|
189
|
-
|
197
|
+
documents << {content: content, embedding: embedding}
|
190
198
|
end
|
191
|
-
|
199
|
+
Document.insert_all!(documents)
|
192
200
|
```
|
193
201
|
|
194
202
|
And get similar articles
|
195
203
|
|
196
204
|
```ruby
|
197
|
-
|
198
|
-
|
205
|
+
document = Document.first
|
206
|
+
document.nearest_neighbors(:embedding, distance: "cosine").first(5).map(&:content)
|
199
207
|
```
|
200
208
|
|
201
209
|
See the [complete code](examples/openai_embeddings.rb)
|
data/lib/neighbor/model.rb
CHANGED
@@ -1,7 +1,11 @@
|
|
1
1
|
module Neighbor
|
2
2
|
module Model
|
3
|
-
def has_neighbors(
|
4
|
-
|
3
|
+
def has_neighbors(*attribute_names, dimensions: nil, normalize: nil)
|
4
|
+
if attribute_names.empty?
|
5
|
+
attribute_names << :neighbor_vector
|
6
|
+
else
|
7
|
+
attribute_names.map!(&:to_sym)
|
8
|
+
end
|
5
9
|
|
6
10
|
class_eval do
|
7
11
|
@neighbor_attributes ||= {}
|
@@ -19,14 +23,26 @@ module Neighbor
|
|
19
23
|
end
|
20
24
|
end
|
21
25
|
|
22
|
-
|
23
|
-
|
26
|
+
attribute_names.each do |attribute_name|
|
27
|
+
raise Error, "has_neighbors already called for #{attribute_name.inspect}" if neighbor_attributes[attribute_name]
|
28
|
+
@neighbor_attributes[attribute_name] = {dimensions: dimensions, normalize: normalize}
|
29
|
+
|
30
|
+
attribute attribute_name, Neighbor::Vector.new(dimensions: dimensions, normalize: normalize, model: self, attribute_name: attribute_name)
|
31
|
+
end
|
24
32
|
|
25
|
-
|
33
|
+
return if @neighbor_attributes.size != attribute_names.size
|
26
34
|
|
27
|
-
|
35
|
+
scope :nearest_neighbors, ->(attribute_name, vector = nil, options = nil) {
|
36
|
+
# cannot use keyword arguments with scope with Ruby 3.2 and Active Record 6.1
|
37
|
+
# https://github.com/rails/rails/issues/46934
|
38
|
+
if options.nil? && vector.is_a?(Hash)
|
39
|
+
options = vector
|
40
|
+
vector = nil
|
41
|
+
end
|
42
|
+
raise ArgumentError, "missing keyword: :distance" unless options.is_a?(Hash) && options.key?(:distance)
|
43
|
+
distance = options.delete(:distance)
|
44
|
+
raise ArgumentError, "unknown keywords: #{options.keys.map(&:inspect).join(", ")}" if options.any?
|
28
45
|
|
29
|
-
scope :nearest_neighbors, ->(attribute_name, vector = nil, distance:) {
|
30
46
|
if vector.nil? && !attribute_name.nil? && attribute_name.respond_to?(:to_a)
|
31
47
|
vector = attribute_name
|
32
48
|
attribute_name = :neighbor_vector
|
data/lib/neighbor/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: neighbor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-09-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|