neighbor 0.2.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +1 -1
- data/README.md +91 -20
- data/lib/neighbor/railtie.rb +19 -0
- data/lib/neighbor/version.rb +1 -1
- data/lib/neighbor.rb +2 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 48a9bcfda91ac7ed0af8c593216b9a84a2b488d15e9375d897f063e44bd0f5c8
|
4
|
+
data.tar.gz: c4cf3ca35811336d7574eff4733078cc08e4b1cb7140cf5c5a12ad06a34506e7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 83d90c764613158ca0a753796c9d1ccbe19127792ac8a73c32eddcc2c6537ad919c21e32c43be5b88833eae92fc5d723e3275e07307e17c0951e8436faed27b5
|
7
|
+
data.tar.gz: 990ad6a45d982e7ababbb57112a65d1fcf8a37f3a37a6459cf4b0483aee407cfc3eda2575e6ae3ecd8389daaca8925b045d8df50c8787f0d1102e9e913b4269f
|
data/CHANGELOG.md
CHANGED
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -35,11 +35,11 @@ rails db:migrate
|
|
35
35
|
Create a migration
|
36
36
|
|
37
37
|
```ruby
|
38
|
-
class AddNeighborVectorToItems < ActiveRecord::Migration[
|
38
|
+
class AddNeighborVectorToItems < ActiveRecord::Migration[7.0]
|
39
39
|
def change
|
40
|
-
add_column :items, :
|
40
|
+
add_column :items, :embedding, :cube
|
41
41
|
# or
|
42
|
-
add_column :items, :
|
42
|
+
add_column :items, :embedding, :vector, limit: 3 # dimensions
|
43
43
|
end
|
44
44
|
end
|
45
45
|
```
|
@@ -48,26 +48,26 @@ Add to your model
|
|
48
48
|
|
49
49
|
```ruby
|
50
50
|
class Item < ApplicationRecord
|
51
|
-
has_neighbors
|
51
|
+
has_neighbors :embedding
|
52
52
|
end
|
53
53
|
```
|
54
54
|
|
55
55
|
Update the vectors
|
56
56
|
|
57
57
|
```ruby
|
58
|
-
item.update(
|
58
|
+
item.update(embedding: [1.0, 1.2, 0.5])
|
59
59
|
```
|
60
60
|
|
61
61
|
Get the nearest neighbors to a record
|
62
62
|
|
63
63
|
```ruby
|
64
|
-
item.nearest_neighbors(distance: "euclidean").first(5)
|
64
|
+
item.nearest_neighbors(:embedding, distance: "euclidean").first(5)
|
65
65
|
```
|
66
66
|
|
67
67
|
Get the nearest neighbors to a vector
|
68
68
|
|
69
69
|
```ruby
|
70
|
-
Item.nearest_neighbors([0.9, 1.3, 1.1], distance: "euclidean").first(5)
|
70
|
+
Item.nearest_neighbors(:embedding, [0.9, 1.3, 1.1], distance: "euclidean").first(5)
|
71
71
|
```
|
72
72
|
|
73
73
|
## Distance
|
@@ -84,7 +84,7 @@ For cosine distance with cube, vectors must be normalized before being stored.
|
|
84
84
|
|
85
85
|
```ruby
|
86
86
|
class Item < ApplicationRecord
|
87
|
-
has_neighbors normalize: true
|
87
|
+
has_neighbors :embedding, normalize: true
|
88
88
|
end
|
89
89
|
```
|
90
90
|
|
@@ -93,19 +93,19 @@ For inner product with cube, see [this example](examples/disco_user_recs_cube.rb
|
|
93
93
|
Records returned from `nearest_neighbors` will have a `neighbor_distance` attribute
|
94
94
|
|
95
95
|
```ruby
|
96
|
-
nearest_item = item.nearest_neighbors(distance: "euclidean").first
|
96
|
+
nearest_item = item.nearest_neighbors(:embedding, distance: "euclidean").first
|
97
97
|
nearest_item.neighbor_distance
|
98
98
|
```
|
99
99
|
|
100
100
|
## Dimensions
|
101
101
|
|
102
|
-
The cube data type
|
102
|
+
The cube data type can have up to 100 dimensions by default. See the [Postgres docs](https://www.postgresql.org/docs/current/cube.html) for how to increase this. The vector data type can have up to 16,000 dimensions, and vectors with up to 2,000 dimensions can be indexed.
|
103
103
|
|
104
104
|
For cube, it’s a good idea to specify the number of dimensions to ensure all records have the same number.
|
105
105
|
|
106
106
|
```ruby
|
107
|
-
class
|
108
|
-
has_neighbors dimensions: 3
|
107
|
+
class Item < ApplicationRecord
|
108
|
+
has_neighbors :embedding, dimensions: 3
|
109
109
|
end
|
110
110
|
```
|
111
111
|
|
@@ -114,9 +114,9 @@ end
|
|
114
114
|
For vector, add an approximate index to speed up queries. Create a migration with:
|
115
115
|
|
116
116
|
```ruby
|
117
|
-
class AddIndexToItemsNeighborVector < ActiveRecord::Migration[
|
117
|
+
class AddIndexToItemsNeighborVector < ActiveRecord::Migration[7.0]
|
118
118
|
def change
|
119
|
-
add_index :items, :
|
119
|
+
add_index :items, :embedding, using: :ivfflat, opclass: :vector_l2_ops
|
120
120
|
end
|
121
121
|
end
|
122
122
|
```
|
@@ -129,14 +129,85 @@ Set the number of probes
|
|
129
129
|
Item.connection.execute("SET ivfflat.probes = 3")
|
130
130
|
```
|
131
131
|
|
132
|
-
##
|
132
|
+
## Examples
|
133
|
+
|
134
|
+
- [OpenAI Embeddings](#openai-embeddings)
|
135
|
+
- [Disco Recommendations](#disco-recommendations)
|
136
|
+
|
137
|
+
### OpenAI Embeddings
|
138
|
+
|
139
|
+
Generate a model
|
140
|
+
|
141
|
+
```sh
|
142
|
+
rails generate model Article content:text embedding:vector{1536}
|
143
|
+
rails db:migrate
|
144
|
+
```
|
145
|
+
|
146
|
+
And add `has_neighbors`
|
147
|
+
|
148
|
+
```ruby
|
149
|
+
class Article < ApplicationRecord
|
150
|
+
has_neighbors :embedding
|
151
|
+
end
|
152
|
+
```
|
153
|
+
|
154
|
+
Create a method to call the [embeddings API](https://platform.openai.com/docs/guides/embeddings)
|
155
|
+
|
156
|
+
```ruby
|
157
|
+
def fetch_embeddings(input)
|
158
|
+
url = "https://api.openai.com/v1/embeddings"
|
159
|
+
headers = {
|
160
|
+
"Authorization" => "Bearer #{ENV.fetch("OPENAI_API_KEY")}",
|
161
|
+
"Content-Type" => "application/json"
|
162
|
+
}
|
163
|
+
data = {
|
164
|
+
input: input,
|
165
|
+
model: "text-embedding-ada-002"
|
166
|
+
}
|
167
|
+
|
168
|
+
response = Net::HTTP.post(URI(url), data.to_json, headers)
|
169
|
+
JSON.parse(response.body)["data"].map { |v| v["embedding"] }
|
170
|
+
end
|
171
|
+
```
|
172
|
+
|
173
|
+
Pass your input
|
174
|
+
|
175
|
+
```ruby
|
176
|
+
input = [
|
177
|
+
"The dog is barking",
|
178
|
+
"The cat is purring",
|
179
|
+
"The bear is growling"
|
180
|
+
]
|
181
|
+
embeddings = fetch_embeddings(input)
|
182
|
+
```
|
183
|
+
|
184
|
+
Store the embeddings
|
185
|
+
|
186
|
+
```ruby
|
187
|
+
articles = []
|
188
|
+
input.zip(embeddings) do |content, embedding|
|
189
|
+
articles << {content: content, embedding: embedding}
|
190
|
+
end
|
191
|
+
Article.insert_all!(articles) # use create! for Active Record < 6
|
192
|
+
```
|
193
|
+
|
194
|
+
And get similar articles
|
195
|
+
|
196
|
+
```ruby
|
197
|
+
article = Article.first
|
198
|
+
article.nearest_neighbors(:embedding, distance: "inner_product").first(5).map(&:content)
|
199
|
+
```
|
200
|
+
|
201
|
+
See the [complete code](examples/openai_embeddings.rb)
|
202
|
+
|
203
|
+
### Disco Recommendations
|
133
204
|
|
134
205
|
You can use Neighbor for online item-based recommendations with [Disco](https://github.com/ankane/disco). We’ll use MovieLens data for this example.
|
135
206
|
|
136
207
|
Generate a model
|
137
208
|
|
138
209
|
```sh
|
139
|
-
rails generate model Movie name:string
|
210
|
+
rails generate model Movie name:string factors:cube
|
140
211
|
rails db:migrate
|
141
212
|
```
|
142
213
|
|
@@ -144,7 +215,7 @@ And add `has_neighbors`
|
|
144
215
|
|
145
216
|
```ruby
|
146
217
|
class Movie < ApplicationRecord
|
147
|
-
has_neighbors dimensions: 20, normalize: true
|
218
|
+
has_neighbors :factors, dimensions: 20, normalize: true
|
148
219
|
end
|
149
220
|
```
|
150
221
|
|
@@ -156,12 +227,12 @@ recommender = Disco::Recommender.new(factors: 20)
|
|
156
227
|
recommender.fit(data)
|
157
228
|
```
|
158
229
|
|
159
|
-
|
230
|
+
Store the item factors
|
160
231
|
|
161
232
|
```ruby
|
162
233
|
movies = []
|
163
234
|
recommender.item_ids.each do |item_id|
|
164
|
-
movies << {name: item_id,
|
235
|
+
movies << {name: item_id, factors: recommender.item_factors(item_id)}
|
165
236
|
end
|
166
237
|
Movie.insert_all!(movies) # use create! for Active Record < 6
|
167
238
|
```
|
@@ -170,7 +241,7 @@ And get similar movies
|
|
170
241
|
|
171
242
|
```ruby
|
172
243
|
movie = Movie.find_by(name: "Star Wars (1977)")
|
173
|
-
movie.nearest_neighbors(distance: "cosine").first(5).map(&:name)
|
244
|
+
movie.nearest_neighbors(:factors, distance: "cosine").first(5).map(&:name)
|
174
245
|
```
|
175
246
|
|
176
247
|
See the complete code for [cube](examples/disco_item_recs_cube.rb) and [vector](examples/disco_item_recs_vector.rb)
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Neighbor
|
2
|
+
class Railtie < Rails::Railtie
|
3
|
+
generators do
|
4
|
+
# rails generate model Item embedding:vector{3}
|
5
|
+
if defined?(Rails::Generators::GeneratedAttribute)
|
6
|
+
Rails::Generators::GeneratedAttribute.singleton_class.prepend(Neighbor::GeneratedAttribute)
|
7
|
+
end
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
module GeneratedAttribute
|
12
|
+
def parse_type_and_options(type, *, **)
|
13
|
+
if type =~ /\A(vector)\{(\d+)\}\z/
|
14
|
+
return $1, limit: $2.to_i
|
15
|
+
end
|
16
|
+
super
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/neighbor/version.rb
CHANGED
data/lib/neighbor.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: neighbor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-04-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -39,6 +39,7 @@ files:
|
|
39
39
|
- lib/generators/neighbor/vector_generator.rb
|
40
40
|
- lib/neighbor.rb
|
41
41
|
- lib/neighbor/model.rb
|
42
|
+
- lib/neighbor/railtie.rb
|
42
43
|
- lib/neighbor/vector.rb
|
43
44
|
- lib/neighbor/version.rb
|
44
45
|
homepage: https://github.com/ankane/neighbor
|
@@ -60,7 +61,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
60
61
|
- !ruby/object:Gem::Version
|
61
62
|
version: '0'
|
62
63
|
requirements: []
|
63
|
-
rubygems_version: 3.
|
64
|
+
rubygems_version: 3.4.10
|
64
65
|
signing_key:
|
65
66
|
specification_version: 4
|
66
67
|
summary: Nearest neighbor search for Rails and Postgres
|