neighbor 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/LICENSE.txt +1 -1
- data/README.md +94 -23
- data/lib/neighbor/model.rb +39 -8
- data/lib/neighbor/railtie.rb +19 -0
- data/lib/neighbor/version.rb +1 -1
- data/lib/neighbor.rb +2 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 48a9bcfda91ac7ed0af8c593216b9a84a2b488d15e9375d897f063e44bd0f5c8
|
4
|
+
data.tar.gz: c4cf3ca35811336d7574eff4733078cc08e4b1cb7140cf5c5a12ad06a34506e7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 83d90c764613158ca0a753796c9d1ccbe19127792ac8a73c32eddcc2c6537ad919c21e32c43be5b88833eae92fc5d723e3275e07307e17c0951e8436faed27b5
|
7
|
+
data.tar.gz: 990ad6a45d982e7ababbb57112a65d1fcf8a37f3a37a6459cf4b0483aee407cfc3eda2575e6ae3ecd8389daaca8925b045d8df50c8787f0d1102e9e913b4269f
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
## 0.2.3 (2023-04-02)
|
2
|
+
|
3
|
+
- Added support for dimensions to model generator
|
4
|
+
|
5
|
+
## 0.2.2 (2022-07-13)
|
6
|
+
|
7
|
+
- Added support for configurable attribute name
|
8
|
+
- Added support for multiple attributes per model
|
9
|
+
|
1
10
|
## 0.2.1 (2021-12-15)
|
2
11
|
|
3
12
|
- Added support for Active Record 7
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -9,12 +9,12 @@ Nearest neighbor search for Rails and Postgres
|
|
9
9
|
Add this line to your application’s Gemfile:
|
10
10
|
|
11
11
|
```ruby
|
12
|
-
gem
|
12
|
+
gem "neighbor"
|
13
13
|
```
|
14
14
|
|
15
15
|
## Choose An Extension
|
16
16
|
|
17
|
-
Neighbor supports two extensions: [cube](https://www.postgresql.org/docs/current/cube.html) and [vector](https://github.com/
|
17
|
+
Neighbor supports two extensions: [cube](https://www.postgresql.org/docs/current/cube.html) and [vector](https://github.com/pgvector/pgvector). cube ships with Postgres, while vector supports approximate nearest neighbor search.
|
18
18
|
|
19
19
|
For cube, run:
|
20
20
|
|
@@ -23,7 +23,7 @@ rails generate neighbor:cube
|
|
23
23
|
rails db:migrate
|
24
24
|
```
|
25
25
|
|
26
|
-
For vector, [install pgvector](https://github.com/
|
26
|
+
For vector, [install pgvector](https://github.com/pgvector/pgvector#installation) and run:
|
27
27
|
|
28
28
|
```sh
|
29
29
|
rails generate neighbor:vector
|
@@ -35,11 +35,11 @@ rails db:migrate
|
|
35
35
|
Create a migration
|
36
36
|
|
37
37
|
```ruby
|
38
|
-
class AddNeighborVectorToItems < ActiveRecord::Migration[
|
38
|
+
class AddNeighborVectorToItems < ActiveRecord::Migration[7.0]
|
39
39
|
def change
|
40
|
-
add_column :items, :
|
40
|
+
add_column :items, :embedding, :cube
|
41
41
|
# or
|
42
|
-
add_column :items, :
|
42
|
+
add_column :items, :embedding, :vector, limit: 3 # dimensions
|
43
43
|
end
|
44
44
|
end
|
45
45
|
```
|
@@ -48,26 +48,26 @@ Add to your model
|
|
48
48
|
|
49
49
|
```ruby
|
50
50
|
class Item < ApplicationRecord
|
51
|
-
has_neighbors
|
51
|
+
has_neighbors :embedding
|
52
52
|
end
|
53
53
|
```
|
54
54
|
|
55
55
|
Update the vectors
|
56
56
|
|
57
57
|
```ruby
|
58
|
-
item.update(
|
58
|
+
item.update(embedding: [1.0, 1.2, 0.5])
|
59
59
|
```
|
60
60
|
|
61
61
|
Get the nearest neighbors to a record
|
62
62
|
|
63
63
|
```ruby
|
64
|
-
item.nearest_neighbors(distance: "euclidean").first(5)
|
64
|
+
item.nearest_neighbors(:embedding, distance: "euclidean").first(5)
|
65
65
|
```
|
66
66
|
|
67
67
|
Get the nearest neighbors to a vector
|
68
68
|
|
69
69
|
```ruby
|
70
|
-
Item.nearest_neighbors([0.9, 1.3, 1.1], distance: "euclidean").first(5)
|
70
|
+
Item.nearest_neighbors(:embedding, [0.9, 1.3, 1.1], distance: "euclidean").first(5)
|
71
71
|
```
|
72
72
|
|
73
73
|
## Distance
|
@@ -84,7 +84,7 @@ For cosine distance with cube, vectors must be normalized before being stored.
|
|
84
84
|
|
85
85
|
```ruby
|
86
86
|
class Item < ApplicationRecord
|
87
|
-
has_neighbors normalize: true
|
87
|
+
has_neighbors :embedding, normalize: true
|
88
88
|
end
|
89
89
|
```
|
90
90
|
|
@@ -93,19 +93,19 @@ For inner product with cube, see [this example](examples/disco_user_recs_cube.rb
|
|
93
93
|
Records returned from `nearest_neighbors` will have a `neighbor_distance` attribute
|
94
94
|
|
95
95
|
```ruby
|
96
|
-
nearest_item = item.nearest_neighbors(distance: "euclidean").first
|
96
|
+
nearest_item = item.nearest_neighbors(:embedding, distance: "euclidean").first
|
97
97
|
nearest_item.neighbor_distance
|
98
98
|
```
|
99
99
|
|
100
100
|
## Dimensions
|
101
101
|
|
102
|
-
The cube data type
|
102
|
+
The cube data type can have up to 100 dimensions by default. See the [Postgres docs](https://www.postgresql.org/docs/current/cube.html) for how to increase this. The vector data type can have up to 16,000 dimensions, and vectors with up to 2,000 dimensions can be indexed.
|
103
103
|
|
104
104
|
For cube, it’s a good idea to specify the number of dimensions to ensure all records have the same number.
|
105
105
|
|
106
106
|
```ruby
|
107
|
-
class
|
108
|
-
has_neighbors dimensions: 3
|
107
|
+
class Item < ApplicationRecord
|
108
|
+
has_neighbors :embedding, dimensions: 3
|
109
109
|
end
|
110
110
|
```
|
111
111
|
|
@@ -114,9 +114,9 @@ end
|
|
114
114
|
For vector, add an approximate index to speed up queries. Create a migration with:
|
115
115
|
|
116
116
|
```ruby
|
117
|
-
class AddIndexToItemsNeighborVector < ActiveRecord::Migration[
|
117
|
+
class AddIndexToItemsNeighborVector < ActiveRecord::Migration[7.0]
|
118
118
|
def change
|
119
|
-
add_index :items, :
|
119
|
+
add_index :items, :embedding, using: :ivfflat, opclass: :vector_l2_ops
|
120
120
|
end
|
121
121
|
end
|
122
122
|
```
|
@@ -129,14 +129,85 @@ Set the number of probes
|
|
129
129
|
Item.connection.execute("SET ivfflat.probes = 3")
|
130
130
|
```
|
131
131
|
|
132
|
-
##
|
132
|
+
## Examples
|
133
|
+
|
134
|
+
- [OpenAI Embeddings](#openai-embeddings)
|
135
|
+
- [Disco Recommendations](#disco-recommendations)
|
136
|
+
|
137
|
+
### OpenAI Embeddings
|
138
|
+
|
139
|
+
Generate a model
|
140
|
+
|
141
|
+
```sh
|
142
|
+
rails generate model Article content:text embedding:vector{1536}
|
143
|
+
rails db:migrate
|
144
|
+
```
|
145
|
+
|
146
|
+
And add `has_neighbors`
|
147
|
+
|
148
|
+
```ruby
|
149
|
+
class Article < ApplicationRecord
|
150
|
+
has_neighbors :embedding
|
151
|
+
end
|
152
|
+
```
|
153
|
+
|
154
|
+
Create a method to call the [embeddings API](https://platform.openai.com/docs/guides/embeddings)
|
155
|
+
|
156
|
+
```ruby
|
157
|
+
def fetch_embeddings(input)
|
158
|
+
url = "https://api.openai.com/v1/embeddings"
|
159
|
+
headers = {
|
160
|
+
"Authorization" => "Bearer #{ENV.fetch("OPENAI_API_KEY")}",
|
161
|
+
"Content-Type" => "application/json"
|
162
|
+
}
|
163
|
+
data = {
|
164
|
+
input: input,
|
165
|
+
model: "text-embedding-ada-002"
|
166
|
+
}
|
167
|
+
|
168
|
+
response = Net::HTTP.post(URI(url), data.to_json, headers)
|
169
|
+
JSON.parse(response.body)["data"].map { |v| v["embedding"] }
|
170
|
+
end
|
171
|
+
```
|
172
|
+
|
173
|
+
Pass your input
|
174
|
+
|
175
|
+
```ruby
|
176
|
+
input = [
|
177
|
+
"The dog is barking",
|
178
|
+
"The cat is purring",
|
179
|
+
"The bear is growling"
|
180
|
+
]
|
181
|
+
embeddings = fetch_embeddings(input)
|
182
|
+
```
|
183
|
+
|
184
|
+
Store the embeddings
|
185
|
+
|
186
|
+
```ruby
|
187
|
+
articles = []
|
188
|
+
input.zip(embeddings) do |content, embedding|
|
189
|
+
articles << {content: content, embedding: embedding}
|
190
|
+
end
|
191
|
+
Article.insert_all!(articles) # use create! for Active Record < 6
|
192
|
+
```
|
193
|
+
|
194
|
+
And get similar articles
|
195
|
+
|
196
|
+
```ruby
|
197
|
+
article = Article.first
|
198
|
+
article.nearest_neighbors(:embedding, distance: "inner_product").first(5).map(&:content)
|
199
|
+
```
|
200
|
+
|
201
|
+
See the [complete code](examples/openai_embeddings.rb)
|
202
|
+
|
203
|
+
### Disco Recommendations
|
133
204
|
|
134
205
|
You can use Neighbor for online item-based recommendations with [Disco](https://github.com/ankane/disco). We’ll use MovieLens data for this example.
|
135
206
|
|
136
207
|
Generate a model
|
137
208
|
|
138
209
|
```sh
|
139
|
-
rails generate model Movie name:string
|
210
|
+
rails generate model Movie name:string factors:cube
|
140
211
|
rails db:migrate
|
141
212
|
```
|
142
213
|
|
@@ -144,7 +215,7 @@ And add `has_neighbors`
|
|
144
215
|
|
145
216
|
```ruby
|
146
217
|
class Movie < ApplicationRecord
|
147
|
-
has_neighbors dimensions: 20, normalize: true
|
218
|
+
has_neighbors :factors, dimensions: 20, normalize: true
|
148
219
|
end
|
149
220
|
```
|
150
221
|
|
@@ -156,12 +227,12 @@ recommender = Disco::Recommender.new(factors: 20)
|
|
156
227
|
recommender.fit(data)
|
157
228
|
```
|
158
229
|
|
159
|
-
|
230
|
+
Store the item factors
|
160
231
|
|
161
232
|
```ruby
|
162
233
|
movies = []
|
163
234
|
recommender.item_ids.each do |item_id|
|
164
|
-
movies << {name: item_id,
|
235
|
+
movies << {name: item_id, factors: recommender.item_factors(item_id)}
|
165
236
|
end
|
166
237
|
Movie.insert_all!(movies) # use create! for Active Record < 6
|
167
238
|
```
|
@@ -170,7 +241,7 @@ And get similar movies
|
|
170
241
|
|
171
242
|
```ruby
|
172
243
|
movie = Movie.find_by(name: "Star Wars (1977)")
|
173
|
-
movie.nearest_neighbors(distance: "cosine").first(5).map(&:name)
|
244
|
+
movie.nearest_neighbors(:factors, distance: "cosine").first(5).map(&:name)
|
174
245
|
```
|
175
246
|
|
176
247
|
See the complete code for [cube](examples/disco_item_recs_cube.rb) and [vector](examples/disco_item_recs_vector.rb)
|
data/lib/neighbor/model.rb
CHANGED
@@ -1,16 +1,43 @@
|
|
1
1
|
module Neighbor
|
2
2
|
module Model
|
3
|
-
def has_neighbors(dimensions: nil, normalize: nil)
|
4
|
-
|
5
|
-
# likely use argument
|
6
|
-
attribute_name = :neighbor_vector
|
3
|
+
def has_neighbors(attribute_name = :neighbor_vector, dimensions: nil, normalize: nil)
|
4
|
+
attribute_name = attribute_name.to_sym
|
7
5
|
|
8
6
|
class_eval do
|
9
|
-
|
7
|
+
@neighbor_attributes ||= {}
|
8
|
+
|
9
|
+
if @neighbor_attributes.empty?
|
10
|
+
def self.neighbor_attributes
|
11
|
+
parent_attributes =
|
12
|
+
if superclass.respond_to?(:neighbor_attributes)
|
13
|
+
superclass.neighbor_attributes
|
14
|
+
else
|
15
|
+
{}
|
16
|
+
end
|
17
|
+
|
18
|
+
parent_attributes.merge(@neighbor_attributes || {})
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
raise Error, "has_neighbors already called for #{attribute_name.inspect}" if neighbor_attributes[attribute_name]
|
23
|
+
@neighbor_attributes[attribute_name] = {dimensions: dimensions, normalize: normalize}
|
10
24
|
|
11
25
|
attribute attribute_name, Neighbor::Vector.new(dimensions: dimensions, normalize: normalize, model: self, attribute_name: attribute_name)
|
12
26
|
|
13
|
-
|
27
|
+
return if @neighbor_attributes.size != 1
|
28
|
+
|
29
|
+
scope :nearest_neighbors, ->(attribute_name, vector = nil, distance:) {
|
30
|
+
if vector.nil? && !attribute_name.nil? && attribute_name.respond_to?(:to_a)
|
31
|
+
vector = attribute_name
|
32
|
+
attribute_name = :neighbor_vector
|
33
|
+
end
|
34
|
+
attribute_name = attribute_name.to_sym
|
35
|
+
|
36
|
+
options = neighbor_attributes[attribute_name]
|
37
|
+
raise ArgumentError, "Invalid attribute" unless options
|
38
|
+
normalize = options[:normalize]
|
39
|
+
dimensions = options[:dimensions]
|
40
|
+
|
14
41
|
return none if vector.nil?
|
15
42
|
|
16
43
|
distance = distance.to_s
|
@@ -80,10 +107,14 @@ module Neighbor
|
|
80
107
|
.order(Arel.sql(order))
|
81
108
|
}
|
82
109
|
|
83
|
-
|
110
|
+
def nearest_neighbors(attribute_name = :neighbor_vector, **options)
|
111
|
+
attribute_name = attribute_name.to_sym
|
112
|
+
# important! check if neighbor attribute before calling send
|
113
|
+
raise ArgumentError, "Invalid attribute" unless self.class.neighbor_attributes[attribute_name]
|
114
|
+
|
84
115
|
self.class
|
85
116
|
.where.not(self.class.primary_key => send(self.class.primary_key))
|
86
|
-
.nearest_neighbors(send(attribute_name), **options)
|
117
|
+
.nearest_neighbors(attribute_name, send(attribute_name), **options)
|
87
118
|
end
|
88
119
|
end
|
89
120
|
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Neighbor
|
2
|
+
class Railtie < Rails::Railtie
|
3
|
+
generators do
|
4
|
+
# rails generate model Item embedding:vector{3}
|
5
|
+
if defined?(Rails::Generators::GeneratedAttribute)
|
6
|
+
Rails::Generators::GeneratedAttribute.singleton_class.prepend(Neighbor::GeneratedAttribute)
|
7
|
+
end
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
module GeneratedAttribute
|
12
|
+
def parse_type_and_options(type, *, **)
|
13
|
+
if type =~ /\A(vector)\{(\d+)\}\z/
|
14
|
+
return $1, limit: $2.to_i
|
15
|
+
end
|
16
|
+
super
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/neighbor/version.rb
CHANGED
data/lib/neighbor.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: neighbor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-04-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -39,6 +39,7 @@ files:
|
|
39
39
|
- lib/generators/neighbor/vector_generator.rb
|
40
40
|
- lib/neighbor.rb
|
41
41
|
- lib/neighbor/model.rb
|
42
|
+
- lib/neighbor/railtie.rb
|
42
43
|
- lib/neighbor/vector.rb
|
43
44
|
- lib/neighbor/version.rb
|
44
45
|
homepage: https://github.com/ankane/neighbor
|
@@ -60,7 +61,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
60
61
|
- !ruby/object:Gem::Version
|
61
62
|
version: '0'
|
62
63
|
requirements: []
|
63
|
-
rubygems_version: 3.
|
64
|
+
rubygems_version: 3.4.10
|
64
65
|
signing_key:
|
65
66
|
specification_version: 4
|
66
67
|
summary: Nearest neighbor search for Rails and Postgres
|