neighbor 0.3.0 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/README.md +19 -11
- data/lib/neighbor/model.rb +30 -8
- data/lib/neighbor/type/cube.rb +38 -0
- data/lib/neighbor/type/vector.rb +14 -0
- data/lib/neighbor/version.rb +1 -1
- data/lib/neighbor.rb +4 -2
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0c8b5d19222742f33f51f2c30f9d03108ebd3ed99908a7e9dd5f4e49caa2e225
|
4
|
+
data.tar.gz: c9cfa942f2cdd8b9757c9ecfe5e89d0aced11263f8a559004ee15fa0c8adb3f4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e9e0050031ce7691baa9242b3b6b5aa76afb1fe7c63575129e68b2f5c027143b3c08f68a7babfcf2a9b02f1d9327679f75e9c40b95ac2245ea7c8dd3025d3cdb
|
7
|
+
data.tar.gz: a9c505740cba454437617733d4025360848a16ef9a4c9c83fc16d5bc82a3e5521c77e3cba874ef3cf318cf3a1e319567958a6156481f7fd82ef72ebaa87d97eb
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
## 0.3.2 (2023-12-12)
|
2
|
+
|
3
|
+
- Added deprecation warning for `has_neighbors` without an attribute name
|
4
|
+
- Added deprecation warning for `nearest_neighbors` without an attribute name
|
5
|
+
|
6
|
+
## 0.3.1 (2023-09-25)
|
7
|
+
|
8
|
+
- Added support for passing multiple attributes to `has_neighbors`
|
9
|
+
- Fixed error with `nearest_neighbors` scope with Ruby 3.2 and Active Record 6.1
|
10
|
+
|
1
11
|
## 0.3.0 (2023-07-24)
|
2
12
|
|
3
13
|
- Dropped support for Ruby < 3 and Active Record < 6.1
|
data/README.md
CHANGED
@@ -14,7 +14,7 @@ gem "neighbor"
|
|
14
14
|
|
15
15
|
## Choose An Extension
|
16
16
|
|
17
|
-
Neighbor supports two extensions: [cube](https://www.postgresql.org/docs/current/cube.html) and [vector](https://github.com/pgvector/pgvector). cube ships with Postgres, while vector supports approximate nearest neighbor search.
|
17
|
+
Neighbor supports two extensions: [cube](https://www.postgresql.org/docs/current/cube.html) and [vector](https://github.com/pgvector/pgvector). cube ships with Postgres, while vector supports more dimensions and approximate nearest neighbor search.
|
18
18
|
|
19
19
|
For cube, run:
|
20
20
|
|
@@ -35,7 +35,7 @@ rails db:migrate
|
|
35
35
|
Create a migration
|
36
36
|
|
37
37
|
```ruby
|
38
|
-
class
|
38
|
+
class AddEmbeddingToItems < ActiveRecord::Migration[7.1]
|
39
39
|
def change
|
40
40
|
add_column :items, :embedding, :cube
|
41
41
|
# or
|
@@ -114,21 +114,29 @@ end
|
|
114
114
|
For vector, add an approximate index to speed up queries. Create a migration with:
|
115
115
|
|
116
116
|
```ruby
|
117
|
-
class
|
117
|
+
class AddIndexToItemsEmbedding < ActiveRecord::Migration[7.1]
|
118
118
|
def change
|
119
119
|
add_index :items, :embedding, using: :ivfflat, opclass: :vector_l2_ops
|
120
|
+
# or with pgvector 0.5.0+
|
121
|
+
add_index :items, :embedding, using: :hnsw, opclass: :vector_l2_ops
|
120
122
|
end
|
121
123
|
end
|
122
124
|
```
|
123
125
|
|
124
126
|
Use `:vector_cosine_ops` for cosine distance and `:vector_ip_ops` for inner product.
|
125
127
|
|
126
|
-
Set the number of probes
|
128
|
+
Set the number of probes with IVFFlat
|
127
129
|
|
128
130
|
```ruby
|
129
131
|
Item.connection.execute("SET ivfflat.probes = 3")
|
130
132
|
```
|
131
133
|
|
134
|
+
Or the size of the dynamic candidate list with HNSW
|
135
|
+
|
136
|
+
```ruby
|
137
|
+
Item.connection.execute("SET hnsw.ef_search = 100")
|
138
|
+
```
|
139
|
+
|
132
140
|
## Examples
|
133
141
|
|
134
142
|
- [OpenAI Embeddings](#openai-embeddings)
|
@@ -139,14 +147,14 @@ Item.connection.execute("SET ivfflat.probes = 3")
|
|
139
147
|
Generate a model
|
140
148
|
|
141
149
|
```sh
|
142
|
-
rails generate model
|
150
|
+
rails generate model Document content:text embedding:vector{1536}
|
143
151
|
rails db:migrate
|
144
152
|
```
|
145
153
|
|
146
154
|
And add `has_neighbors`
|
147
155
|
|
148
156
|
```ruby
|
149
|
-
class
|
157
|
+
class Document < ApplicationRecord
|
150
158
|
has_neighbors :embedding
|
151
159
|
end
|
152
160
|
```
|
@@ -184,18 +192,18 @@ embeddings = fetch_embeddings(input)
|
|
184
192
|
Store the embeddings
|
185
193
|
|
186
194
|
```ruby
|
187
|
-
|
195
|
+
documents = []
|
188
196
|
input.zip(embeddings) do |content, embedding|
|
189
|
-
|
197
|
+
documents << {content: content, embedding: embedding}
|
190
198
|
end
|
191
|
-
|
199
|
+
Document.insert_all!(documents)
|
192
200
|
```
|
193
201
|
|
194
202
|
And get similar articles
|
195
203
|
|
196
204
|
```ruby
|
197
|
-
|
198
|
-
|
205
|
+
document = Document.first
|
206
|
+
document.nearest_neighbors(:embedding, distance: "cosine").first(5).map(&:content)
|
199
207
|
```
|
200
208
|
|
201
209
|
See the [complete code](examples/openai_embeddings.rb)
|
data/lib/neighbor/model.rb
CHANGED
@@ -1,7 +1,12 @@
|
|
1
1
|
module Neighbor
|
2
2
|
module Model
|
3
|
-
def has_neighbors(
|
4
|
-
|
3
|
+
def has_neighbors(*attribute_names, dimensions: nil, normalize: nil)
|
4
|
+
if attribute_names.empty?
|
5
|
+
warn "[neighbor] has_neighbors without an attribute name is deprecated"
|
6
|
+
attribute_names << :neighbor_vector
|
7
|
+
else
|
8
|
+
attribute_names.map!(&:to_sym)
|
9
|
+
end
|
5
10
|
|
6
11
|
class_eval do
|
7
12
|
@neighbor_attributes ||= {}
|
@@ -19,15 +24,28 @@ module Neighbor
|
|
19
24
|
end
|
20
25
|
end
|
21
26
|
|
22
|
-
|
23
|
-
|
27
|
+
attribute_names.each do |attribute_name|
|
28
|
+
raise Error, "has_neighbors already called for #{attribute_name.inspect}" if neighbor_attributes[attribute_name]
|
29
|
+
@neighbor_attributes[attribute_name] = {dimensions: dimensions, normalize: normalize}
|
30
|
+
|
31
|
+
attribute attribute_name, Neighbor::Vector.new(dimensions: dimensions, normalize: normalize, model: self, attribute_name: attribute_name)
|
32
|
+
end
|
24
33
|
|
25
|
-
|
34
|
+
return if @neighbor_attributes.size != attribute_names.size
|
26
35
|
|
27
|
-
|
36
|
+
scope :nearest_neighbors, ->(attribute_name, vector = nil, options = nil) {
|
37
|
+
# cannot use keyword arguments with scope with Ruby 3.2 and Active Record 6.1
|
38
|
+
# https://github.com/rails/rails/issues/46934
|
39
|
+
if options.nil? && vector.is_a?(Hash)
|
40
|
+
options = vector
|
41
|
+
vector = nil
|
42
|
+
end
|
43
|
+
raise ArgumentError, "missing keyword: :distance" unless options.is_a?(Hash) && options.key?(:distance)
|
44
|
+
distance = options.delete(:distance)
|
45
|
+
raise ArgumentError, "unknown keywords: #{options.keys.map(&:inspect).join(", ")}" if options.any?
|
28
46
|
|
29
|
-
scope :nearest_neighbors, ->(attribute_name, vector = nil, distance:) {
|
30
47
|
if vector.nil? && !attribute_name.nil? && attribute_name.respond_to?(:to_a)
|
48
|
+
warn "[neighbor] nearest_neighbors without an attribute name is deprecated"
|
31
49
|
vector = attribute_name
|
32
50
|
attribute_name = :neighbor_vector
|
33
51
|
end
|
@@ -107,7 +125,11 @@ module Neighbor
|
|
107
125
|
.order(Arel.sql(order))
|
108
126
|
}
|
109
127
|
|
110
|
-
def nearest_neighbors(attribute_name =
|
128
|
+
def nearest_neighbors(attribute_name = nil, **options)
|
129
|
+
if attribute_name.nil?
|
130
|
+
warn "[neighbor] nearest_neighbors without an attribute name is deprecated"
|
131
|
+
attribute_name = :neighbor_vector
|
132
|
+
end
|
111
133
|
attribute_name = attribute_name.to_sym
|
112
134
|
# important! check if neighbor attribute before calling send
|
113
135
|
raise ArgumentError, "Invalid attribute" unless self.class.neighbor_attributes[attribute_name]
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Neighbor
|
2
|
+
module Type
|
3
|
+
class Cube < ActiveRecord::Type::String
|
4
|
+
def type
|
5
|
+
:cube
|
6
|
+
end
|
7
|
+
|
8
|
+
def cast(value)
|
9
|
+
if value.is_a?(Array)
|
10
|
+
if value.first.is_a?(Array)
|
11
|
+
value.map { |v| cast_point(v) }.join(", ")
|
12
|
+
else
|
13
|
+
cast_point(value)
|
14
|
+
end
|
15
|
+
else
|
16
|
+
super
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# TODO uncomment in 0.4.0
|
21
|
+
# def deserialize(value)
|
22
|
+
# if value.nil?
|
23
|
+
# super
|
24
|
+
# elsif value.include?("),(")
|
25
|
+
# value[1..-1].split("),(").map { |v| v.split(",").map(&:to_f) }
|
26
|
+
# else
|
27
|
+
# value[1..-1].split(",").map(&:to_f)
|
28
|
+
# end
|
29
|
+
# end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def cast_point(value)
|
34
|
+
"(#{value.map(&:to_f).join(", ")})"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
data/lib/neighbor/version.rb
CHANGED
data/lib/neighbor.rb
CHANGED
@@ -10,10 +10,10 @@ module Neighbor
|
|
10
10
|
module RegisterTypes
|
11
11
|
def initialize_type_map(m = type_map)
|
12
12
|
super
|
13
|
-
m.register_type "cube",
|
13
|
+
m.register_type "cube", Type::Cube.new
|
14
14
|
m.register_type "vector" do |_, _, sql_type|
|
15
15
|
limit = extract_limit(sql_type)
|
16
|
-
|
16
|
+
Type::Vector.new(limit: limit)
|
17
17
|
end
|
18
18
|
end
|
19
19
|
end
|
@@ -22,6 +22,8 @@ end
|
|
22
22
|
ActiveSupport.on_load(:active_record) do
|
23
23
|
require_relative "neighbor/model"
|
24
24
|
require_relative "neighbor/vector"
|
25
|
+
require_relative "neighbor/type/cube"
|
26
|
+
require_relative "neighbor/type/vector"
|
25
27
|
|
26
28
|
extend Neighbor::Model
|
27
29
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: neighbor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-12-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -40,6 +40,8 @@ files:
|
|
40
40
|
- lib/neighbor.rb
|
41
41
|
- lib/neighbor/model.rb
|
42
42
|
- lib/neighbor/railtie.rb
|
43
|
+
- lib/neighbor/type/cube.rb
|
44
|
+
- lib/neighbor/type/vector.rb
|
43
45
|
- lib/neighbor/vector.rb
|
44
46
|
- lib/neighbor/version.rb
|
45
47
|
homepage: https://github.com/ankane/neighbor
|