neighbor 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +24 -40
- data/lib/neighbor/postgresql.rb +15 -0
- data/lib/neighbor/utils.rb +7 -7
- data/lib/neighbor/version.rb +1 -1
- metadata +3 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fb8418a71159a849442d146643f732c28d10a5237d94a5b5b7d7466a224a40b5
|
4
|
+
data.tar.gz: 8a7ededb3071fdef4a77bbbdf4596d74ed323327d4d29ea15538e344d25af2f7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 05b3b8ccd07570f531aef0132a80563aef046b88508e732b2f16fe02f6c0303bd6f6fdb00804a646845086ba7fd133358503a7c7b69b098c02f548434c859575
|
7
|
+
data.tar.gz: 9bcb61d4e1453e30daeab1fff0364c023278405d170b435cdfb262aac0aa0d91622e19471fbf3f3076e4c944d09318dc81cf334fdac7767fa6f72d95c8560907
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
## 0.5.2 (2025-01-05)
|
2
|
+
|
3
|
+
- Improved support for Postgres arrays
|
4
|
+
|
5
|
+
## 0.5.1 (2024-12-03)
|
6
|
+
|
7
|
+
- Added experimental support for MariaDB 11.7
|
8
|
+
- Dropped experimental support for MariaDB 11.6 Vector
|
9
|
+
|
1
10
|
## 0.5.0 (2024-10-07)
|
2
11
|
|
3
12
|
- Added experimental support for SQLite (sqlite-vec)
|
data/README.md
CHANGED
@@ -6,7 +6,7 @@ Supports:
|
|
6
6
|
|
7
7
|
- Postgres (cube and pgvector)
|
8
8
|
- SQLite (sqlite-vec) - experimental
|
9
|
-
- MariaDB 11.
|
9
|
+
- MariaDB 11.7 - experimental
|
10
10
|
- MySQL 9 (searching requires HeatWave) - experimental
|
11
11
|
|
12
12
|
[](https://github.com/ankane/neighbor/actions)
|
@@ -56,15 +56,15 @@ rails generate neighbor:sqlite
|
|
56
56
|
Create a migration
|
57
57
|
|
58
58
|
```ruby
|
59
|
-
class AddEmbeddingToItems < ActiveRecord::Migration[
|
59
|
+
class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
|
60
60
|
def change
|
61
61
|
# cube
|
62
62
|
add_column :items, :embedding, :cube
|
63
63
|
|
64
|
-
# pgvector and MySQL
|
64
|
+
# pgvector, MariaDB, and MySQL
|
65
65
|
add_column :items, :embedding, :vector, limit: 3 # dimensions
|
66
66
|
|
67
|
-
# sqlite-vec
|
67
|
+
# sqlite-vec
|
68
68
|
add_column :items, :embedding, :binary
|
69
69
|
end
|
70
70
|
end
|
@@ -174,7 +174,7 @@ The `sparsevec` type can have up to 16,000 non-zero elements, and sparse vectors
|
|
174
174
|
Add an approximate index to speed up queries. Create a migration with:
|
175
175
|
|
176
176
|
```ruby
|
177
|
-
class AddIndexToItemsEmbedding < ActiveRecord::Migration[
|
177
|
+
class AddIndexToItemsEmbedding < ActiveRecord::Migration[8.0]
|
178
178
|
def change
|
179
179
|
add_index :items, :embedding, using: :hnsw, opclass: :vector_l2_ops
|
180
180
|
# or
|
@@ -202,7 +202,7 @@ Item.connection.execute("SET ivfflat.probes = 3")
|
|
202
202
|
Use the `halfvec` type to store half-precision vectors
|
203
203
|
|
204
204
|
```ruby
|
205
|
-
class AddEmbeddingToItems < ActiveRecord::Migration[
|
205
|
+
class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
|
206
206
|
def change
|
207
207
|
add_column :items, :embedding, :halfvec, limit: 3 # dimensions
|
208
208
|
end
|
@@ -214,7 +214,7 @@ end
|
|
214
214
|
Index vectors at half precision for smaller indexes
|
215
215
|
|
216
216
|
```ruby
|
217
|
-
class AddIndexToItemsEmbedding < ActiveRecord::Migration[
|
217
|
+
class AddIndexToItemsEmbedding < ActiveRecord::Migration[8.0]
|
218
218
|
def change
|
219
219
|
add_index :items, "(embedding::halfvec(3)) vector_l2_ops", using: :hnsw
|
220
220
|
end
|
@@ -232,7 +232,7 @@ Item.nearest_neighbors(:embedding, [0.9, 1.3, 1.1], distance: "euclidean", preci
|
|
232
232
|
Use the `bit` type to store binary vectors
|
233
233
|
|
234
234
|
```ruby
|
235
|
-
class AddEmbeddingToItems < ActiveRecord::Migration[
|
235
|
+
class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
|
236
236
|
def change
|
237
237
|
add_column :items, :embedding, :bit, limit: 3 # dimensions
|
238
238
|
end
|
@@ -250,7 +250,7 @@ Item.nearest_neighbors(:embedding, "101", distance: "hamming").first(5)
|
|
250
250
|
Use expression indexing for binary quantization
|
251
251
|
|
252
252
|
```ruby
|
253
|
-
class AddIndexToItemsEmbedding < ActiveRecord::Migration[
|
253
|
+
class AddIndexToItemsEmbedding < ActiveRecord::Migration[8.0]
|
254
254
|
def change
|
255
255
|
add_index :items, "(binary_quantize(embedding)::bit(3)) bit_hamming_ops", using: :hnsw
|
256
256
|
end
|
@@ -262,7 +262,7 @@ end
|
|
262
262
|
Use the `sparsevec` type to store sparse vectors
|
263
263
|
|
264
264
|
```ruby
|
265
|
-
class AddEmbeddingToItems < ActiveRecord::Migration[
|
265
|
+
class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
|
266
266
|
def change
|
267
267
|
add_column :items, :embedding, :sparsevec, limit: 3 # dimensions
|
268
268
|
end
|
@@ -302,19 +302,21 @@ end
|
|
302
302
|
You can also use [virtual tables](https://alexgarcia.xyz/sqlite-vec/features/knn.html)
|
303
303
|
|
304
304
|
```ruby
|
305
|
-
class AddEmbeddingToItems < ActiveRecord::Migration[
|
305
|
+
class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
|
306
306
|
def change
|
307
|
+
# Rails 8+
|
308
|
+
create_virtual_table :items, :vec0, [
|
309
|
+
"id integer PRIMARY KEY AUTOINCREMENT NOT NULL",
|
310
|
+
"embedding float[3] distance_metric=L2"
|
311
|
+
]
|
312
|
+
|
307
313
|
# Rails < 8
|
308
314
|
execute <<~SQL
|
309
315
|
CREATE VIRTUAL TABLE items USING vec0(
|
316
|
+
id integer PRIMARY KEY AUTOINCREMENT NOT NULL,
|
310
317
|
embedding float[3] distance_metric=L2
|
311
318
|
)
|
312
319
|
SQL
|
313
|
-
|
314
|
-
# Rails 8+
|
315
|
-
create_virtual_table :items, :vec0, [
|
316
|
-
"embedding float[3] distance_metric=L2"
|
317
|
-
]
|
318
320
|
end
|
319
321
|
end
|
320
322
|
```
|
@@ -329,16 +331,6 @@ ActiveRecord::SchemaDumper.ignore_tables += [
|
|
329
331
|
]
|
330
332
|
```
|
331
333
|
|
332
|
-
Create a model with `rowid` as the primary key
|
333
|
-
|
334
|
-
```ruby
|
335
|
-
class Item < ApplicationRecord
|
336
|
-
self.primary_key = "rowid"
|
337
|
-
|
338
|
-
has_neighbors :embedding, dimensions: 3
|
339
|
-
end
|
340
|
-
```
|
341
|
-
|
342
334
|
Get the `k` nearest neighbors
|
343
335
|
|
344
336
|
```ruby
|
@@ -348,7 +340,7 @@ Item.where("embedding MATCH ?", [1, 2, 3].to_s).where(k: 5).order(:distance)
|
|
348
340
|
Filter by primary key
|
349
341
|
|
350
342
|
```ruby
|
351
|
-
Item.where(
|
343
|
+
Item.where(id: [2, 3]).where("embedding MATCH ?", [1, 2, 3].to_s).where(k: 5).order(:distance)
|
352
344
|
```
|
353
345
|
|
354
346
|
### Int8 Vectors
|
@@ -387,23 +379,15 @@ Supported values are:
|
|
387
379
|
- `cosine`
|
388
380
|
- `hamming`
|
389
381
|
|
390
|
-
For cosine distance with MariaDB, vectors must be normalized before being stored.
|
391
|
-
|
392
|
-
```ruby
|
393
|
-
class Item < ApplicationRecord
|
394
|
-
has_neighbors :embedding, normalize: true
|
395
|
-
end
|
396
|
-
```
|
397
|
-
|
398
382
|
### Indexing
|
399
383
|
|
400
384
|
Vector columns must use `null: false` to add a vector index
|
401
385
|
|
402
386
|
```ruby
|
403
|
-
class CreateItems < ActiveRecord::Migration[
|
387
|
+
class CreateItems < ActiveRecord::Migration[8.0]
|
404
388
|
def change
|
405
389
|
create_table :items do |t|
|
406
|
-
t.
|
390
|
+
t.vector :embedding, limit: 3, null: false
|
407
391
|
t.index :embedding, type: :vector
|
408
392
|
end
|
409
393
|
end
|
@@ -415,7 +399,7 @@ end
|
|
415
399
|
Use the `bigint` type to store binary vectors
|
416
400
|
|
417
401
|
```ruby
|
418
|
-
class AddEmbeddingToItems < ActiveRecord::Migration[
|
402
|
+
class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
|
419
403
|
def change
|
420
404
|
add_column :items, :embedding, :bigint
|
421
405
|
end
|
@@ -447,7 +431,7 @@ Note: The `DISTANCE()` function is [only available on HeatWave](https://dev.mysq
|
|
447
431
|
Use the `binary` type to store binary vectors
|
448
432
|
|
449
433
|
```ruby
|
450
|
-
class AddEmbeddingToItems < ActiveRecord::Migration[
|
434
|
+
class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
|
451
435
|
def change
|
452
436
|
add_column :items, :embedding, :binary
|
453
437
|
end
|
@@ -892,7 +876,7 @@ bundle exec rake test:postgresql
|
|
892
876
|
bundle exec rake test:sqlite
|
893
877
|
|
894
878
|
# MariaDB
|
895
|
-
docker run -e MARIADB_ALLOW_EMPTY_ROOT_PASSWORD=1 -e MARIADB_DATABASE=neighbor_test -p 3307:3306
|
879
|
+
docker run -e MARIADB_ALLOW_EMPTY_ROOT_PASSWORD=1 -e MARIADB_DATABASE=neighbor_test -p 3307:3306 mariadb:11.7-rc
|
896
880
|
bundle exec rake test:mariadb
|
897
881
|
|
898
882
|
# MySQL
|
data/lib/neighbor/postgresql.rb
CHANGED
@@ -19,6 +19,9 @@ module Neighbor
|
|
19
19
|
|
20
20
|
# prevent unknown OID warning
|
21
21
|
ActiveRecord::ConnectionAdapters::PostgreSQLAdapter.singleton_class.prepend(RegisterTypes)
|
22
|
+
|
23
|
+
# support vector[]/halfvec[]
|
24
|
+
ActiveRecord::ConnectionAdapters::PostgreSQL::OID::Array.prepend(ArrayMethods)
|
22
25
|
end
|
23
26
|
|
24
27
|
module RegisterTypes
|
@@ -39,5 +42,17 @@ module Neighbor
|
|
39
42
|
end
|
40
43
|
end
|
41
44
|
end
|
45
|
+
|
46
|
+
ArrayWrapper = Struct.new(:to_a)
|
47
|
+
|
48
|
+
module ArrayMethods
|
49
|
+
def type_cast_array(value, method, ...)
|
50
|
+
if (subtype.is_a?(Neighbor::Type::Vector) || subtype.is_a?(Neighbor::Type::Halfvec)) && method != :deserialize && value.is_a?(::Array) && value.all? { |v| v.is_a?(::Numeric) }
|
51
|
+
super(ArrayWrapper.new(value), method, ...)
|
52
|
+
else
|
53
|
+
super
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
42
57
|
end
|
43
58
|
end
|
data/lib/neighbor/utils.rb
CHANGED
@@ -33,7 +33,7 @@ module Neighbor
|
|
33
33
|
def self.normalize(value, column_info:)
|
34
34
|
return nil if value.nil?
|
35
35
|
|
36
|
-
raise Error, "Normalize not supported for type" unless [:cube, :vector, :halfvec
|
36
|
+
raise Error, "Normalize not supported for type" unless [:cube, :vector, :halfvec].include?(column_info&.type)
|
37
37
|
|
38
38
|
norm = Math.sqrt(value.sum { |v| v * v })
|
39
39
|
|
@@ -86,10 +86,12 @@ module Neighbor
|
|
86
86
|
end
|
87
87
|
when :mariadb
|
88
88
|
case column_type
|
89
|
-
when :
|
89
|
+
when :vector
|
90
90
|
case distance
|
91
|
-
when "euclidean"
|
92
|
-
"
|
91
|
+
when "euclidean"
|
92
|
+
"VEC_DISTANCE_EUCLIDEAN"
|
93
|
+
when "cosine"
|
94
|
+
"VEC_DISTANCE_COSINE"
|
93
95
|
end
|
94
96
|
when :integer
|
95
97
|
case distance
|
@@ -168,7 +170,7 @@ module Neighbor
|
|
168
170
|
if operator == "BIT_COUNT"
|
169
171
|
"BIT_COUNT(#{quoted_attribute} ^ #{query})"
|
170
172
|
else
|
171
|
-
"
|
173
|
+
"#{operator}(#{quoted_attribute}, #{query})"
|
172
174
|
end
|
173
175
|
when :mysql
|
174
176
|
if operator == "BIT_COUNT"
|
@@ -191,8 +193,6 @@ module Neighbor
|
|
191
193
|
case adapter
|
192
194
|
when :postgresql
|
193
195
|
column_type == :cube
|
194
|
-
when :mariadb
|
195
|
-
true
|
196
196
|
else
|
197
197
|
false
|
198
198
|
end
|
data/lib/neighbor/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: neighbor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
|
-
autorequire:
|
9
8
|
bindir: bin
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 2025-01-05 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
12
|
- !ruby/object:Gem::Dependency
|
14
13
|
name: activerecord
|
@@ -24,7 +23,6 @@ dependencies:
|
|
24
23
|
- - ">="
|
25
24
|
- !ruby/object:Gem::Version
|
26
25
|
version: '7'
|
27
|
-
description:
|
28
26
|
email: andrew@ankane.org
|
29
27
|
executables: []
|
30
28
|
extensions: []
|
@@ -62,7 +60,6 @@ homepage: https://github.com/ankane/neighbor
|
|
62
60
|
licenses:
|
63
61
|
- MIT
|
64
62
|
metadata: {}
|
65
|
-
post_install_message:
|
66
63
|
rdoc_options: []
|
67
64
|
require_paths:
|
68
65
|
- lib
|
@@ -77,8 +74,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
77
74
|
- !ruby/object:Gem::Version
|
78
75
|
version: '0'
|
79
76
|
requirements: []
|
80
|
-
rubygems_version: 3.
|
81
|
-
signing_key:
|
77
|
+
rubygems_version: 3.6.2
|
82
78
|
specification_version: 4
|
83
79
|
summary: Nearest neighbor search for Rails
|
84
80
|
test_files: []
|