neighbor 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +16 -8
- data/lib/neighbor/model.rb +23 -7
- data/lib/neighbor/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5818fdfa27cc4b0678fb125e82d2fa0a3066ea173674ca541987b9084a94ac14
|
4
|
+
data.tar.gz: e874541532172dce9932a98506bd5ce7866a0f5c3e600ffd9d6473f226829368
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 96211da20caf70383018ca626cbd83bbda5e4ffaf95e5a9b5405686b192643916325f89a652b4ba9e34e17daa1b84e8045cc644b606bab8aa6ea339ed4d7ea0d
|
7
|
+
data.tar.gz: bf855dc34617d489618417b9c807969f828358de19462458b7c09e1743102a8b9967b32b09660fab35f35543b77d588297c69971edb7442af0995cdca3b6ff8e
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -117,18 +117,26 @@ For vector, add an approximate index to speed up queries. Create a migration wit
|
|
117
117
|
class AddIndexToItemsNeighborVector < ActiveRecord::Migration[7.0]
|
118
118
|
def change
|
119
119
|
add_index :items, :embedding, using: :ivfflat, opclass: :vector_l2_ops
|
120
|
+
# or with pgvector 0.5.0+
|
121
|
+
add_index :items, :embedding, using: :hnsw, opclass: :vector_l2_ops
|
120
122
|
end
|
121
123
|
end
|
122
124
|
```
|
123
125
|
|
124
126
|
Use `:vector_cosine_ops` for cosine distance and `:vector_ip_ops` for inner product.
|
125
127
|
|
126
|
-
Set the number of probes
|
128
|
+
Set the number of probes with IVFFlat
|
127
129
|
|
128
130
|
```ruby
|
129
131
|
Item.connection.execute("SET ivfflat.probes = 3")
|
130
132
|
```
|
131
133
|
|
134
|
+
Or the size of the dynamic candidate list with HNSW
|
135
|
+
|
136
|
+
```ruby
|
137
|
+
Item.connection.execute("SET hnsw.ef_search = 100")
|
138
|
+
```
|
139
|
+
|
132
140
|
## Examples
|
133
141
|
|
134
142
|
- [OpenAI Embeddings](#openai-embeddings)
|
@@ -139,14 +147,14 @@ Item.connection.execute("SET ivfflat.probes = 3")
|
|
139
147
|
Generate a model
|
140
148
|
|
141
149
|
```sh
|
142
|
-
rails generate model
|
150
|
+
rails generate model Document content:text embedding:vector{1536}
|
143
151
|
rails db:migrate
|
144
152
|
```
|
145
153
|
|
146
154
|
And add `has_neighbors`
|
147
155
|
|
148
156
|
```ruby
|
149
|
-
class
|
157
|
+
class Document < ApplicationRecord
|
150
158
|
has_neighbors :embedding
|
151
159
|
end
|
152
160
|
```
|
@@ -184,18 +192,18 @@ embeddings = fetch_embeddings(input)
|
|
184
192
|
Store the embeddings
|
185
193
|
|
186
194
|
```ruby
|
187
|
-
|
195
|
+
documents = []
|
188
196
|
input.zip(embeddings) do |content, embedding|
|
189
|
-
|
197
|
+
documents << {content: content, embedding: embedding}
|
190
198
|
end
|
191
|
-
|
199
|
+
Document.insert_all!(documents)
|
192
200
|
```
|
193
201
|
|
194
202
|
And get similar articles
|
195
203
|
|
196
204
|
```ruby
|
197
|
-
|
198
|
-
|
205
|
+
document = Document.first
|
206
|
+
document.nearest_neighbors(:embedding, distance: "cosine").first(5).map(&:content)
|
199
207
|
```
|
200
208
|
|
201
209
|
See the [complete code](examples/openai_embeddings.rb)
|
data/lib/neighbor/model.rb
CHANGED
@@ -1,7 +1,11 @@
|
|
1
1
|
module Neighbor
|
2
2
|
module Model
|
3
|
-
def has_neighbors(
|
4
|
-
|
3
|
+
def has_neighbors(*attribute_names, dimensions: nil, normalize: nil)
|
4
|
+
if attribute_names.empty?
|
5
|
+
attribute_names << :neighbor_vector
|
6
|
+
else
|
7
|
+
attribute_names.map!(&:to_sym)
|
8
|
+
end
|
5
9
|
|
6
10
|
class_eval do
|
7
11
|
@neighbor_attributes ||= {}
|
@@ -19,14 +23,26 @@ module Neighbor
|
|
19
23
|
end
|
20
24
|
end
|
21
25
|
|
22
|
-
|
23
|
-
|
26
|
+
attribute_names.each do |attribute_name|
|
27
|
+
raise Error, "has_neighbors already called for #{attribute_name.inspect}" if neighbor_attributes[attribute_name]
|
28
|
+
@neighbor_attributes[attribute_name] = {dimensions: dimensions, normalize: normalize}
|
29
|
+
|
30
|
+
attribute attribute_name, Neighbor::Vector.new(dimensions: dimensions, normalize: normalize, model: self, attribute_name: attribute_name)
|
31
|
+
end
|
24
32
|
|
25
|
-
|
33
|
+
return if @neighbor_attributes.size != attribute_names.size
|
26
34
|
|
27
|
-
|
35
|
+
scope :nearest_neighbors, ->(attribute_name, vector = nil, options = nil) {
|
36
|
+
# cannot use keyword arguments with scope with Ruby 3.2 and Active Record 6.1
|
37
|
+
# https://github.com/rails/rails/issues/46934
|
38
|
+
if options.nil? && vector.is_a?(Hash)
|
39
|
+
options = vector
|
40
|
+
vector = nil
|
41
|
+
end
|
42
|
+
raise ArgumentError, "missing keyword: :distance" unless options.is_a?(Hash) && options.key?(:distance)
|
43
|
+
distance = options.delete(:distance)
|
44
|
+
raise ArgumentError, "unknown keywords: #{options.keys.map(&:inspect).join(", ")}" if options.any?
|
28
45
|
|
29
|
-
scope :nearest_neighbors, ->(attribute_name, vector = nil, distance:) {
|
30
46
|
if vector.nil? && !attribute_name.nil? && attribute_name.respond_to?(:to_a)
|
31
47
|
vector = attribute_name
|
32
48
|
attribute_name = :neighbor_vector
|
data/lib/neighbor/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: neighbor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-09-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|