neighbor 0.5.0 → 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +21 -29
- data/lib/neighbor/utils.rb +7 -7
- data/lib/neighbor/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ee3864aa1511aa273c2d619e408bff8bcb491adc6dd111c5c88f7b79ef0baafc
|
4
|
+
data.tar.gz: 7d79f79814a0041e77d18edf83cb87b5fba5f59d1a25a71370d8b28c6a5cd289
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c8d63b19a366d670212d6d5296f3b0f0de45e007635b7026bc9474bbd9248c9fc9a7ba4ee8b7c1b796ecdd7eef93460fe4e4e746eb49a8c19d471f4827530ecf
|
7
|
+
data.tar.gz: c371a55e3f1579b6a62fcc5cc55c5ac54cb318faa12278c736089b97de1990ecf56a17863bcc7043b4a2cde9be50cb02b8b7dcecaf5032d371e1675a062723f2
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -6,7 +6,7 @@ Supports:
|
|
6
6
|
|
7
7
|
- Postgres (cube and pgvector)
|
8
8
|
- SQLite (sqlite-vec) - experimental
|
9
|
-
- MariaDB 11.
|
9
|
+
- MariaDB 11.7 - experimental
|
10
10
|
- MySQL 9 (searching requires HeatWave) - experimental
|
11
11
|
|
12
12
|
[![Build Status](https://github.com/ankane/neighbor/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/neighbor/actions)
|
@@ -56,15 +56,15 @@ rails generate neighbor:sqlite
|
|
56
56
|
Create a migration
|
57
57
|
|
58
58
|
```ruby
|
59
|
-
class AddEmbeddingToItems < ActiveRecord::Migration[
|
59
|
+
class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
|
60
60
|
def change
|
61
61
|
# cube
|
62
62
|
add_column :items, :embedding, :cube
|
63
63
|
|
64
|
-
# pgvector and MySQL
|
64
|
+
# pgvector, MariaDB, and MySQL
|
65
65
|
add_column :items, :embedding, :vector, limit: 3 # dimensions
|
66
66
|
|
67
|
-
# sqlite-vec
|
67
|
+
# sqlite-vec
|
68
68
|
add_column :items, :embedding, :binary
|
69
69
|
end
|
70
70
|
end
|
@@ -174,7 +174,7 @@ The `sparsevec` type can have up to 16,000 non-zero elements, and sparse vectors
|
|
174
174
|
Add an approximate index to speed up queries. Create a migration with:
|
175
175
|
|
176
176
|
```ruby
|
177
|
-
class AddIndexToItemsEmbedding < ActiveRecord::Migration[
|
177
|
+
class AddIndexToItemsEmbedding < ActiveRecord::Migration[8.0]
|
178
178
|
def change
|
179
179
|
add_index :items, :embedding, using: :hnsw, opclass: :vector_l2_ops
|
180
180
|
# or
|
@@ -202,7 +202,7 @@ Item.connection.execute("SET ivfflat.probes = 3")
|
|
202
202
|
Use the `halfvec` type to store half-precision vectors
|
203
203
|
|
204
204
|
```ruby
|
205
|
-
class AddEmbeddingToItems < ActiveRecord::Migration[
|
205
|
+
class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
|
206
206
|
def change
|
207
207
|
add_column :items, :embedding, :halfvec, limit: 3 # dimensions
|
208
208
|
end
|
@@ -214,7 +214,7 @@ end
|
|
214
214
|
Index vectors at half precision for smaller indexes
|
215
215
|
|
216
216
|
```ruby
|
217
|
-
class AddIndexToItemsEmbedding < ActiveRecord::Migration[
|
217
|
+
class AddIndexToItemsEmbedding < ActiveRecord::Migration[8.0]
|
218
218
|
def change
|
219
219
|
add_index :items, "(embedding::halfvec(3)) vector_l2_ops", using: :hnsw
|
220
220
|
end
|
@@ -232,7 +232,7 @@ Item.nearest_neighbors(:embedding, [0.9, 1.3, 1.1], distance: "euclidean", preci
|
|
232
232
|
Use the `bit` type to store binary vectors
|
233
233
|
|
234
234
|
```ruby
|
235
|
-
class AddEmbeddingToItems < ActiveRecord::Migration[
|
235
|
+
class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
|
236
236
|
def change
|
237
237
|
add_column :items, :embedding, :bit, limit: 3 # dimensions
|
238
238
|
end
|
@@ -250,7 +250,7 @@ Item.nearest_neighbors(:embedding, "101", distance: "hamming").first(5)
|
|
250
250
|
Use expression indexing for binary quantization
|
251
251
|
|
252
252
|
```ruby
|
253
|
-
class AddIndexToItemsEmbedding < ActiveRecord::Migration[
|
253
|
+
class AddIndexToItemsEmbedding < ActiveRecord::Migration[8.0]
|
254
254
|
def change
|
255
255
|
add_index :items, "(binary_quantize(embedding)::bit(3)) bit_hamming_ops", using: :hnsw
|
256
256
|
end
|
@@ -262,7 +262,7 @@ end
|
|
262
262
|
Use the `sparsevec` type to store sparse vectors
|
263
263
|
|
264
264
|
```ruby
|
265
|
-
class AddEmbeddingToItems < ActiveRecord::Migration[
|
265
|
+
class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
|
266
266
|
def change
|
267
267
|
add_column :items, :embedding, :sparsevec, limit: 3 # dimensions
|
268
268
|
end
|
@@ -302,19 +302,19 @@ end
|
|
302
302
|
You can also use [virtual tables](https://alexgarcia.xyz/sqlite-vec/features/knn.html)
|
303
303
|
|
304
304
|
```ruby
|
305
|
-
class AddEmbeddingToItems < ActiveRecord::Migration[
|
305
|
+
class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
|
306
306
|
def change
|
307
|
+
# Rails 8+
|
308
|
+
create_virtual_table :items, :vec0, [
|
309
|
+
"embedding float[3] distance_metric=L2"
|
310
|
+
]
|
311
|
+
|
307
312
|
# Rails < 8
|
308
313
|
execute <<~SQL
|
309
314
|
CREATE VIRTUAL TABLE items USING vec0(
|
310
315
|
embedding float[3] distance_metric=L2
|
311
316
|
)
|
312
317
|
SQL
|
313
|
-
|
314
|
-
# Rails 8+
|
315
|
-
create_virtual_table :items, :vec0, [
|
316
|
-
"embedding float[3] distance_metric=L2"
|
317
|
-
]
|
318
318
|
end
|
319
319
|
end
|
320
320
|
```
|
@@ -387,23 +387,15 @@ Supported values are:
|
|
387
387
|
- `cosine`
|
388
388
|
- `hamming`
|
389
389
|
|
390
|
-
For cosine distance with MariaDB, vectors must be normalized before being stored.
|
391
|
-
|
392
|
-
```ruby
|
393
|
-
class Item < ApplicationRecord
|
394
|
-
has_neighbors :embedding, normalize: true
|
395
|
-
end
|
396
|
-
```
|
397
|
-
|
398
390
|
### Indexing
|
399
391
|
|
400
392
|
Vector columns must use `null: false` to add a vector index
|
401
393
|
|
402
394
|
```ruby
|
403
|
-
class CreateItems < ActiveRecord::Migration[
|
395
|
+
class CreateItems < ActiveRecord::Migration[8.0]
|
404
396
|
def change
|
405
397
|
create_table :items do |t|
|
406
|
-
t.
|
398
|
+
t.vector :embedding, limit: 3, null: false
|
407
399
|
t.index :embedding, type: :vector
|
408
400
|
end
|
409
401
|
end
|
@@ -415,7 +407,7 @@ end
|
|
415
407
|
Use the `bigint` type to store binary vectors
|
416
408
|
|
417
409
|
```ruby
|
418
|
-
class AddEmbeddingToItems < ActiveRecord::Migration[
|
410
|
+
class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
|
419
411
|
def change
|
420
412
|
add_column :items, :embedding, :bigint
|
421
413
|
end
|
@@ -447,7 +439,7 @@ Note: The `DISTANCE()` function is [only available on HeatWave](https://dev.mysq
|
|
447
439
|
Use the `binary` type to store binary vectors
|
448
440
|
|
449
441
|
```ruby
|
450
|
-
class AddEmbeddingToItems < ActiveRecord::Migration[
|
442
|
+
class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
|
451
443
|
def change
|
452
444
|
add_column :items, :embedding, :binary
|
453
445
|
end
|
@@ -892,7 +884,7 @@ bundle exec rake test:postgresql
|
|
892
884
|
bundle exec rake test:sqlite
|
893
885
|
|
894
886
|
# MariaDB
|
895
|
-
docker run -e MARIADB_ALLOW_EMPTY_ROOT_PASSWORD=1 -e MARIADB_DATABASE=neighbor_test -p 3307:3306
|
887
|
+
docker run -e MARIADB_ALLOW_EMPTY_ROOT_PASSWORD=1 -e MARIADB_DATABASE=neighbor_test -p 3307:3306 mariadb:11.7-rc
|
896
888
|
bundle exec rake test:mariadb
|
897
889
|
|
898
890
|
# MySQL
|
data/lib/neighbor/utils.rb
CHANGED
@@ -33,7 +33,7 @@ module Neighbor
|
|
33
33
|
def self.normalize(value, column_info:)
|
34
34
|
return nil if value.nil?
|
35
35
|
|
36
|
-
raise Error, "Normalize not supported for type" unless [:cube, :vector, :halfvec
|
36
|
+
raise Error, "Normalize not supported for type" unless [:cube, :vector, :halfvec].include?(column_info&.type)
|
37
37
|
|
38
38
|
norm = Math.sqrt(value.sum { |v| v * v })
|
39
39
|
|
@@ -86,10 +86,12 @@ module Neighbor
|
|
86
86
|
end
|
87
87
|
when :mariadb
|
88
88
|
case column_type
|
89
|
-
when :
|
89
|
+
when :vector
|
90
90
|
case distance
|
91
|
-
when "euclidean"
|
92
|
-
"
|
91
|
+
when "euclidean"
|
92
|
+
"VEC_DISTANCE_EUCLIDEAN"
|
93
|
+
when "cosine"
|
94
|
+
"VEC_DISTANCE_COSINE"
|
93
95
|
end
|
94
96
|
when :integer
|
95
97
|
case distance
|
@@ -168,7 +170,7 @@ module Neighbor
|
|
168
170
|
if operator == "BIT_COUNT"
|
169
171
|
"BIT_COUNT(#{quoted_attribute} ^ #{query})"
|
170
172
|
else
|
171
|
-
"
|
173
|
+
"#{operator}(#{quoted_attribute}, #{query})"
|
172
174
|
end
|
173
175
|
when :mysql
|
174
176
|
if operator == "BIT_COUNT"
|
@@ -191,8 +193,6 @@ module Neighbor
|
|
191
193
|
case adapter
|
192
194
|
when :postgresql
|
193
195
|
column_type == :cube
|
194
|
-
when :mariadb
|
195
|
-
true
|
196
196
|
else
|
197
197
|
false
|
198
198
|
end
|
data/lib/neighbor/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: neighbor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-12-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -77,7 +77,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
77
77
|
- !ruby/object:Gem::Version
|
78
78
|
version: '0'
|
79
79
|
requirements: []
|
80
|
-
rubygems_version: 3.5.
|
80
|
+
rubygems_version: 3.5.22
|
81
81
|
signing_key:
|
82
82
|
specification_version: 4
|
83
83
|
summary: Nearest neighbor search for Rails
|