neighbor 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/README.md +19 -11
- data/lib/neighbor/model.rb +30 -8
- data/lib/neighbor/type/cube.rb +38 -0
- data/lib/neighbor/type/vector.rb +14 -0
- data/lib/neighbor/version.rb +1 -1
- data/lib/neighbor.rb +4 -2
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0c8b5d19222742f33f51f2c30f9d03108ebd3ed99908a7e9dd5f4e49caa2e225
|
4
|
+
data.tar.gz: c9cfa942f2cdd8b9757c9ecfe5e89d0aced11263f8a559004ee15fa0c8adb3f4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e9e0050031ce7691baa9242b3b6b5aa76afb1fe7c63575129e68b2f5c027143b3c08f68a7babfcf2a9b02f1d9327679f75e9c40b95ac2245ea7c8dd3025d3cdb
|
7
|
+
data.tar.gz: a9c505740cba454437617733d4025360848a16ef9a4c9c83fc16d5bc82a3e5521c77e3cba874ef3cf318cf3a1e319567958a6156481f7fd82ef72ebaa87d97eb
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
## 0.3.2 (2023-12-12)
|
2
|
+
|
3
|
+
- Added deprecation warning for `has_neighbors` without an attribute name
|
4
|
+
- Added deprecation warning for `nearest_neighbors` without an attribute name
|
5
|
+
|
6
|
+
## 0.3.1 (2023-09-25)
|
7
|
+
|
8
|
+
- Added support for passing multiple attributes to `has_neighbors`
|
9
|
+
- Fixed error with `nearest_neighbors` scope with Ruby 3.2 and Active Record 6.1
|
10
|
+
|
1
11
|
## 0.3.0 (2023-07-24)
|
2
12
|
|
3
13
|
- Dropped support for Ruby < 3 and Active Record < 6.1
|
data/README.md
CHANGED
@@ -14,7 +14,7 @@ gem "neighbor"
|
|
14
14
|
|
15
15
|
## Choose An Extension
|
16
16
|
|
17
|
-
Neighbor supports two extensions: [cube](https://www.postgresql.org/docs/current/cube.html) and [vector](https://github.com/pgvector/pgvector). cube ships with Postgres, while vector supports approximate nearest neighbor search.
|
17
|
+
Neighbor supports two extensions: [cube](https://www.postgresql.org/docs/current/cube.html) and [vector](https://github.com/pgvector/pgvector). cube ships with Postgres, while vector supports more dimensions and approximate nearest neighbor search.
|
18
18
|
|
19
19
|
For cube, run:
|
20
20
|
|
@@ -35,7 +35,7 @@ rails db:migrate
|
|
35
35
|
Create a migration
|
36
36
|
|
37
37
|
```ruby
|
38
|
-
class
|
38
|
+
class AddEmbeddingToItems < ActiveRecord::Migration[7.1]
|
39
39
|
def change
|
40
40
|
add_column :items, :embedding, :cube
|
41
41
|
# or
|
@@ -114,21 +114,29 @@ end
|
|
114
114
|
For vector, add an approximate index to speed up queries. Create a migration with:
|
115
115
|
|
116
116
|
```ruby
|
117
|
-
class
|
117
|
+
class AddIndexToItemsEmbedding < ActiveRecord::Migration[7.1]
|
118
118
|
def change
|
119
119
|
add_index :items, :embedding, using: :ivfflat, opclass: :vector_l2_ops
|
120
|
+
# or with pgvector 0.5.0+
|
121
|
+
add_index :items, :embedding, using: :hnsw, opclass: :vector_l2_ops
|
120
122
|
end
|
121
123
|
end
|
122
124
|
```
|
123
125
|
|
124
126
|
Use `:vector_cosine_ops` for cosine distance and `:vector_ip_ops` for inner product.
|
125
127
|
|
126
|
-
Set the number of probes
|
128
|
+
Set the number of probes with IVFFlat
|
127
129
|
|
128
130
|
```ruby
|
129
131
|
Item.connection.execute("SET ivfflat.probes = 3")
|
130
132
|
```
|
131
133
|
|
134
|
+
Or the size of the dynamic candidate list with HNSW
|
135
|
+
|
136
|
+
```ruby
|
137
|
+
Item.connection.execute("SET hnsw.ef_search = 100")
|
138
|
+
```
|
139
|
+
|
132
140
|
## Examples
|
133
141
|
|
134
142
|
- [OpenAI Embeddings](#openai-embeddings)
|
@@ -139,14 +147,14 @@ Item.connection.execute("SET ivfflat.probes = 3")
|
|
139
147
|
Generate a model
|
140
148
|
|
141
149
|
```sh
|
142
|
-
rails generate model
|
150
|
+
rails generate model Document content:text embedding:vector{1536}
|
143
151
|
rails db:migrate
|
144
152
|
```
|
145
153
|
|
146
154
|
And add `has_neighbors`
|
147
155
|
|
148
156
|
```ruby
|
149
|
-
class
|
157
|
+
class Document < ApplicationRecord
|
150
158
|
has_neighbors :embedding
|
151
159
|
end
|
152
160
|
```
|
@@ -184,18 +192,18 @@ embeddings = fetch_embeddings(input)
|
|
184
192
|
Store the embeddings
|
185
193
|
|
186
194
|
```ruby
|
187
|
-
|
195
|
+
documents = []
|
188
196
|
input.zip(embeddings) do |content, embedding|
|
189
|
-
|
197
|
+
documents << {content: content, embedding: embedding}
|
190
198
|
end
|
191
|
-
|
199
|
+
Document.insert_all!(documents)
|
192
200
|
```
|
193
201
|
|
194
202
|
And get similar articles
|
195
203
|
|
196
204
|
```ruby
|
197
|
-
|
198
|
-
|
205
|
+
document = Document.first
|
206
|
+
document.nearest_neighbors(:embedding, distance: "cosine").first(5).map(&:content)
|
199
207
|
```
|
200
208
|
|
201
209
|
See the [complete code](examples/openai_embeddings.rb)
|
data/lib/neighbor/model.rb
CHANGED
@@ -1,7 +1,12 @@
|
|
1
1
|
module Neighbor
|
2
2
|
module Model
|
3
|
-
def has_neighbors(
|
4
|
-
|
3
|
+
def has_neighbors(*attribute_names, dimensions: nil, normalize: nil)
|
4
|
+
if attribute_names.empty?
|
5
|
+
warn "[neighbor] has_neighbors without an attribute name is deprecated"
|
6
|
+
attribute_names << :neighbor_vector
|
7
|
+
else
|
8
|
+
attribute_names.map!(&:to_sym)
|
9
|
+
end
|
5
10
|
|
6
11
|
class_eval do
|
7
12
|
@neighbor_attributes ||= {}
|
@@ -19,15 +24,28 @@ module Neighbor
|
|
19
24
|
end
|
20
25
|
end
|
21
26
|
|
22
|
-
|
23
|
-
|
27
|
+
attribute_names.each do |attribute_name|
|
28
|
+
raise Error, "has_neighbors already called for #{attribute_name.inspect}" if neighbor_attributes[attribute_name]
|
29
|
+
@neighbor_attributes[attribute_name] = {dimensions: dimensions, normalize: normalize}
|
30
|
+
|
31
|
+
attribute attribute_name, Neighbor::Vector.new(dimensions: dimensions, normalize: normalize, model: self, attribute_name: attribute_name)
|
32
|
+
end
|
24
33
|
|
25
|
-
|
34
|
+
return if @neighbor_attributes.size != attribute_names.size
|
26
35
|
|
27
|
-
|
36
|
+
scope :nearest_neighbors, ->(attribute_name, vector = nil, options = nil) {
|
37
|
+
# cannot use keyword arguments with scope with Ruby 3.2 and Active Record 6.1
|
38
|
+
# https://github.com/rails/rails/issues/46934
|
39
|
+
if options.nil? && vector.is_a?(Hash)
|
40
|
+
options = vector
|
41
|
+
vector = nil
|
42
|
+
end
|
43
|
+
raise ArgumentError, "missing keyword: :distance" unless options.is_a?(Hash) && options.key?(:distance)
|
44
|
+
distance = options.delete(:distance)
|
45
|
+
raise ArgumentError, "unknown keywords: #{options.keys.map(&:inspect).join(", ")}" if options.any?
|
28
46
|
|
29
|
-
scope :nearest_neighbors, ->(attribute_name, vector = nil, distance:) {
|
30
47
|
if vector.nil? && !attribute_name.nil? && attribute_name.respond_to?(:to_a)
|
48
|
+
warn "[neighbor] nearest_neighbors without an attribute name is deprecated"
|
31
49
|
vector = attribute_name
|
32
50
|
attribute_name = :neighbor_vector
|
33
51
|
end
|
@@ -107,7 +125,11 @@ module Neighbor
|
|
107
125
|
.order(Arel.sql(order))
|
108
126
|
}
|
109
127
|
|
110
|
-
def nearest_neighbors(attribute_name =
|
128
|
+
def nearest_neighbors(attribute_name = nil, **options)
|
129
|
+
if attribute_name.nil?
|
130
|
+
warn "[neighbor] nearest_neighbors without an attribute name is deprecated"
|
131
|
+
attribute_name = :neighbor_vector
|
132
|
+
end
|
111
133
|
attribute_name = attribute_name.to_sym
|
112
134
|
# important! check if neighbor attribute before calling send
|
113
135
|
raise ArgumentError, "Invalid attribute" unless self.class.neighbor_attributes[attribute_name]
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Neighbor
|
2
|
+
module Type
|
3
|
+
class Cube < ActiveRecord::Type::String
|
4
|
+
def type
|
5
|
+
:cube
|
6
|
+
end
|
7
|
+
|
8
|
+
def cast(value)
|
9
|
+
if value.is_a?(Array)
|
10
|
+
if value.first.is_a?(Array)
|
11
|
+
value.map { |v| cast_point(v) }.join(", ")
|
12
|
+
else
|
13
|
+
cast_point(value)
|
14
|
+
end
|
15
|
+
else
|
16
|
+
super
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# TODO uncomment in 0.4.0
|
21
|
+
# def deserialize(value)
|
22
|
+
# if value.nil?
|
23
|
+
# super
|
24
|
+
# elsif value.include?("),(")
|
25
|
+
# value[1..-1].split("),(").map { |v| v.split(",").map(&:to_f) }
|
26
|
+
# else
|
27
|
+
# value[1..-1].split(",").map(&:to_f)
|
28
|
+
# end
|
29
|
+
# end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def cast_point(value)
|
34
|
+
"(#{value.map(&:to_f).join(", ")})"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
data/lib/neighbor/version.rb
CHANGED
data/lib/neighbor.rb
CHANGED
@@ -10,10 +10,10 @@ module Neighbor
|
|
10
10
|
module RegisterTypes
|
11
11
|
def initialize_type_map(m = type_map)
|
12
12
|
super
|
13
|
-
m.register_type "cube",
|
13
|
+
m.register_type "cube", Type::Cube.new
|
14
14
|
m.register_type "vector" do |_, _, sql_type|
|
15
15
|
limit = extract_limit(sql_type)
|
16
|
-
|
16
|
+
Type::Vector.new(limit: limit)
|
17
17
|
end
|
18
18
|
end
|
19
19
|
end
|
@@ -22,6 +22,8 @@ end
|
|
22
22
|
ActiveSupport.on_load(:active_record) do
|
23
23
|
require_relative "neighbor/model"
|
24
24
|
require_relative "neighbor/vector"
|
25
|
+
require_relative "neighbor/type/cube"
|
26
|
+
require_relative "neighbor/type/vector"
|
25
27
|
|
26
28
|
extend Neighbor::Model
|
27
29
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: neighbor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-12-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -40,6 +40,8 @@ files:
|
|
40
40
|
- lib/neighbor.rb
|
41
41
|
- lib/neighbor/model.rb
|
42
42
|
- lib/neighbor/railtie.rb
|
43
|
+
- lib/neighbor/type/cube.rb
|
44
|
+
- lib/neighbor/type/vector.rb
|
43
45
|
- lib/neighbor/vector.rb
|
44
46
|
- lib/neighbor/version.rb
|
45
47
|
homepage: https://github.com/ankane/neighbor
|