neighbor 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +21 -29
- data/lib/neighbor/utils.rb +7 -7
- data/lib/neighbor/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ee3864aa1511aa273c2d619e408bff8bcb491adc6dd111c5c88f7b79ef0baafc
|
|
4
|
+
data.tar.gz: 7d79f79814a0041e77d18edf83cb87b5fba5f59d1a25a71370d8b28c6a5cd289
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c8d63b19a366d670212d6d5296f3b0f0de45e007635b7026bc9474bbd9248c9fc9a7ba4ee8b7c1b796ecdd7eef93460fe4e4e746eb49a8c19d471f4827530ecf
|
|
7
|
+
data.tar.gz: c371a55e3f1579b6a62fcc5cc55c5ac54cb318faa12278c736089b97de1990ecf56a17863bcc7043b4a2cde9be50cb02b8b7dcecaf5032d371e1675a062723f2
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
|
@@ -6,7 +6,7 @@ Supports:
|
|
|
6
6
|
|
|
7
7
|
- Postgres (cube and pgvector)
|
|
8
8
|
- SQLite (sqlite-vec) - experimental
|
|
9
|
-
- MariaDB 11.
|
|
9
|
+
- MariaDB 11.7 - experimental
|
|
10
10
|
- MySQL 9 (searching requires HeatWave) - experimental
|
|
11
11
|
|
|
12
12
|
[](https://github.com/ankane/neighbor/actions)
|
|
@@ -56,15 +56,15 @@ rails generate neighbor:sqlite
|
|
|
56
56
|
Create a migration
|
|
57
57
|
|
|
58
58
|
```ruby
|
|
59
|
-
class AddEmbeddingToItems < ActiveRecord::Migration[
|
|
59
|
+
class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
|
|
60
60
|
def change
|
|
61
61
|
# cube
|
|
62
62
|
add_column :items, :embedding, :cube
|
|
63
63
|
|
|
64
|
-
# pgvector and MySQL
|
|
64
|
+
# pgvector, MariaDB, and MySQL
|
|
65
65
|
add_column :items, :embedding, :vector, limit: 3 # dimensions
|
|
66
66
|
|
|
67
|
-
# sqlite-vec
|
|
67
|
+
# sqlite-vec
|
|
68
68
|
add_column :items, :embedding, :binary
|
|
69
69
|
end
|
|
70
70
|
end
|
|
@@ -174,7 +174,7 @@ The `sparsevec` type can have up to 16,000 non-zero elements, and sparse vectors
|
|
|
174
174
|
Add an approximate index to speed up queries. Create a migration with:
|
|
175
175
|
|
|
176
176
|
```ruby
|
|
177
|
-
class AddIndexToItemsEmbedding < ActiveRecord::Migration[
|
|
177
|
+
class AddIndexToItemsEmbedding < ActiveRecord::Migration[8.0]
|
|
178
178
|
def change
|
|
179
179
|
add_index :items, :embedding, using: :hnsw, opclass: :vector_l2_ops
|
|
180
180
|
# or
|
|
@@ -202,7 +202,7 @@ Item.connection.execute("SET ivfflat.probes = 3")
|
|
|
202
202
|
Use the `halfvec` type to store half-precision vectors
|
|
203
203
|
|
|
204
204
|
```ruby
|
|
205
|
-
class AddEmbeddingToItems < ActiveRecord::Migration[
|
|
205
|
+
class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
|
|
206
206
|
def change
|
|
207
207
|
add_column :items, :embedding, :halfvec, limit: 3 # dimensions
|
|
208
208
|
end
|
|
@@ -214,7 +214,7 @@ end
|
|
|
214
214
|
Index vectors at half precision for smaller indexes
|
|
215
215
|
|
|
216
216
|
```ruby
|
|
217
|
-
class AddIndexToItemsEmbedding < ActiveRecord::Migration[
|
|
217
|
+
class AddIndexToItemsEmbedding < ActiveRecord::Migration[8.0]
|
|
218
218
|
def change
|
|
219
219
|
add_index :items, "(embedding::halfvec(3)) vector_l2_ops", using: :hnsw
|
|
220
220
|
end
|
|
@@ -232,7 +232,7 @@ Item.nearest_neighbors(:embedding, [0.9, 1.3, 1.1], distance: "euclidean", preci
|
|
|
232
232
|
Use the `bit` type to store binary vectors
|
|
233
233
|
|
|
234
234
|
```ruby
|
|
235
|
-
class AddEmbeddingToItems < ActiveRecord::Migration[
|
|
235
|
+
class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
|
|
236
236
|
def change
|
|
237
237
|
add_column :items, :embedding, :bit, limit: 3 # dimensions
|
|
238
238
|
end
|
|
@@ -250,7 +250,7 @@ Item.nearest_neighbors(:embedding, "101", distance: "hamming").first(5)
|
|
|
250
250
|
Use expression indexing for binary quantization
|
|
251
251
|
|
|
252
252
|
```ruby
|
|
253
|
-
class AddIndexToItemsEmbedding < ActiveRecord::Migration[
|
|
253
|
+
class AddIndexToItemsEmbedding < ActiveRecord::Migration[8.0]
|
|
254
254
|
def change
|
|
255
255
|
add_index :items, "(binary_quantize(embedding)::bit(3)) bit_hamming_ops", using: :hnsw
|
|
256
256
|
end
|
|
@@ -262,7 +262,7 @@ end
|
|
|
262
262
|
Use the `sparsevec` type to store sparse vectors
|
|
263
263
|
|
|
264
264
|
```ruby
|
|
265
|
-
class AddEmbeddingToItems < ActiveRecord::Migration[
|
|
265
|
+
class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
|
|
266
266
|
def change
|
|
267
267
|
add_column :items, :embedding, :sparsevec, limit: 3 # dimensions
|
|
268
268
|
end
|
|
@@ -302,19 +302,19 @@ end
|
|
|
302
302
|
You can also use [virtual tables](https://alexgarcia.xyz/sqlite-vec/features/knn.html)
|
|
303
303
|
|
|
304
304
|
```ruby
|
|
305
|
-
class AddEmbeddingToItems < ActiveRecord::Migration[
|
|
305
|
+
class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
|
|
306
306
|
def change
|
|
307
|
+
# Rails 8+
|
|
308
|
+
create_virtual_table :items, :vec0, [
|
|
309
|
+
"embedding float[3] distance_metric=L2"
|
|
310
|
+
]
|
|
311
|
+
|
|
307
312
|
# Rails < 8
|
|
308
313
|
execute <<~SQL
|
|
309
314
|
CREATE VIRTUAL TABLE items USING vec0(
|
|
310
315
|
embedding float[3] distance_metric=L2
|
|
311
316
|
)
|
|
312
317
|
SQL
|
|
313
|
-
|
|
314
|
-
# Rails 8+
|
|
315
|
-
create_virtual_table :items, :vec0, [
|
|
316
|
-
"embedding float[3] distance_metric=L2"
|
|
317
|
-
]
|
|
318
318
|
end
|
|
319
319
|
end
|
|
320
320
|
```
|
|
@@ -387,23 +387,15 @@ Supported values are:
|
|
|
387
387
|
- `cosine`
|
|
388
388
|
- `hamming`
|
|
389
389
|
|
|
390
|
-
For cosine distance with MariaDB, vectors must be normalized before being stored.
|
|
391
|
-
|
|
392
|
-
```ruby
|
|
393
|
-
class Item < ApplicationRecord
|
|
394
|
-
has_neighbors :embedding, normalize: true
|
|
395
|
-
end
|
|
396
|
-
```
|
|
397
|
-
|
|
398
390
|
### Indexing
|
|
399
391
|
|
|
400
392
|
Vector columns must use `null: false` to add a vector index
|
|
401
393
|
|
|
402
394
|
```ruby
|
|
403
|
-
class CreateItems < ActiveRecord::Migration[
|
|
395
|
+
class CreateItems < ActiveRecord::Migration[8.0]
|
|
404
396
|
def change
|
|
405
397
|
create_table :items do |t|
|
|
406
|
-
t.
|
|
398
|
+
t.vector :embedding, limit: 3, null: false
|
|
407
399
|
t.index :embedding, type: :vector
|
|
408
400
|
end
|
|
409
401
|
end
|
|
@@ -415,7 +407,7 @@ end
|
|
|
415
407
|
Use the `bigint` type to store binary vectors
|
|
416
408
|
|
|
417
409
|
```ruby
|
|
418
|
-
class AddEmbeddingToItems < ActiveRecord::Migration[
|
|
410
|
+
class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
|
|
419
411
|
def change
|
|
420
412
|
add_column :items, :embedding, :bigint
|
|
421
413
|
end
|
|
@@ -447,7 +439,7 @@ Note: The `DISTANCE()` function is [only available on HeatWave](https://dev.mysq
|
|
|
447
439
|
Use the `binary` type to store binary vectors
|
|
448
440
|
|
|
449
441
|
```ruby
|
|
450
|
-
class AddEmbeddingToItems < ActiveRecord::Migration[
|
|
442
|
+
class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
|
|
451
443
|
def change
|
|
452
444
|
add_column :items, :embedding, :binary
|
|
453
445
|
end
|
|
@@ -892,7 +884,7 @@ bundle exec rake test:postgresql
|
|
|
892
884
|
bundle exec rake test:sqlite
|
|
893
885
|
|
|
894
886
|
# MariaDB
|
|
895
|
-
docker run -e MARIADB_ALLOW_EMPTY_ROOT_PASSWORD=1 -e MARIADB_DATABASE=neighbor_test -p 3307:3306
|
|
887
|
+
docker run -e MARIADB_ALLOW_EMPTY_ROOT_PASSWORD=1 -e MARIADB_DATABASE=neighbor_test -p 3307:3306 mariadb:11.7-rc
|
|
896
888
|
bundle exec rake test:mariadb
|
|
897
889
|
|
|
898
890
|
# MySQL
|
data/lib/neighbor/utils.rb
CHANGED
|
@@ -33,7 +33,7 @@ module Neighbor
|
|
|
33
33
|
def self.normalize(value, column_info:)
|
|
34
34
|
return nil if value.nil?
|
|
35
35
|
|
|
36
|
-
raise Error, "Normalize not supported for type" unless [:cube, :vector, :halfvec
|
|
36
|
+
raise Error, "Normalize not supported for type" unless [:cube, :vector, :halfvec].include?(column_info&.type)
|
|
37
37
|
|
|
38
38
|
norm = Math.sqrt(value.sum { |v| v * v })
|
|
39
39
|
|
|
@@ -86,10 +86,12 @@ module Neighbor
|
|
|
86
86
|
end
|
|
87
87
|
when :mariadb
|
|
88
88
|
case column_type
|
|
89
|
-
when :
|
|
89
|
+
when :vector
|
|
90
90
|
case distance
|
|
91
|
-
when "euclidean"
|
|
92
|
-
"
|
|
91
|
+
when "euclidean"
|
|
92
|
+
"VEC_DISTANCE_EUCLIDEAN"
|
|
93
|
+
when "cosine"
|
|
94
|
+
"VEC_DISTANCE_COSINE"
|
|
93
95
|
end
|
|
94
96
|
when :integer
|
|
95
97
|
case distance
|
|
@@ -168,7 +170,7 @@ module Neighbor
|
|
|
168
170
|
if operator == "BIT_COUNT"
|
|
169
171
|
"BIT_COUNT(#{quoted_attribute} ^ #{query})"
|
|
170
172
|
else
|
|
171
|
-
"
|
|
173
|
+
"#{operator}(#{quoted_attribute}, #{query})"
|
|
172
174
|
end
|
|
173
175
|
when :mysql
|
|
174
176
|
if operator == "BIT_COUNT"
|
|
@@ -191,8 +193,6 @@ module Neighbor
|
|
|
191
193
|
case adapter
|
|
192
194
|
when :postgresql
|
|
193
195
|
column_type == :cube
|
|
194
|
-
when :mariadb
|
|
195
|
-
true
|
|
196
196
|
else
|
|
197
197
|
false
|
|
198
198
|
end
|
data/lib/neighbor/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: neighbor
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.5.
|
|
4
|
+
version: 0.5.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2024-
|
|
11
|
+
date: 2024-12-03 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: activerecord
|
|
@@ -77,7 +77,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
77
77
|
- !ruby/object:Gem::Version
|
|
78
78
|
version: '0'
|
|
79
79
|
requirements: []
|
|
80
|
-
rubygems_version: 3.5.
|
|
80
|
+
rubygems_version: 3.5.22
|
|
81
81
|
signing_key:
|
|
82
82
|
specification_version: 4
|
|
83
83
|
summary: Nearest neighbor search for Rails
|