neighbor 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/LICENSE.txt +1 -1
- data/README.md +87 -95
- data/lib/neighbor/model.rb +2 -10
- data/lib/neighbor/mysql.rb +0 -3
- data/lib/neighbor/postgresql.rb +15 -0
- data/lib/neighbor/sqlite.rb +1 -1
- data/lib/neighbor/version.rb +1 -1
- metadata +6 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9f4fa263cadd7a89f4f99faef6238db1f355597ce1510a84c722563a669c62d8
|
4
|
+
data.tar.gz: b3876b14c21276b0e2bfefa87d98265a3c766ae2b1b32459e085bcedfde8afcb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f90e43582600d41411c3a9e129ca03b606d08ece9bfc39227aab97e2c2af77950eb885cfa4900bef94eb8524d121af1b25823e495b99756ea8f04398d2497c8f
|
7
|
+
data.tar.gz: 17cb10ce768c76f1a0e273c887486f1e0a9a2b73f3d5d8758ebaada7aa12366e4d9e061b6a125b81cb36167013bf2a6f07dba786a5c0de4af99543e70421d961
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
## 0.6.0 (2025-06-12)
|
2
|
+
|
3
|
+
- Added support for MariaDB 11.8
|
4
|
+
- Dropped experimental support for MariaDB 11.7
|
5
|
+
- Dropped support for Ruby < 3.2 and Active Record < 7.1
|
6
|
+
|
7
|
+
## 0.5.2 (2025-01-05)
|
8
|
+
|
9
|
+
- Improved support for Postgres arrays
|
10
|
+
|
1
11
|
## 0.5.1 (2024-12-03)
|
2
12
|
|
3
13
|
- Added experimental support for MariaDB 11.7
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -5,9 +5,9 @@ Nearest neighbor search for Rails
|
|
5
5
|
Supports:
|
6
6
|
|
7
7
|
- Postgres (cube and pgvector)
|
8
|
-
-
|
9
|
-
- MariaDB 11.7 - experimental
|
8
|
+
- MariaDB 11.8
|
10
9
|
- MySQL 9 (searching requires HeatWave) - experimental
|
10
|
+
- SQLite (sqlite-vec) - experimental
|
11
11
|
|
12
12
|
[](https://github.com/ankane/neighbor/actions)
|
13
13
|
|
@@ -107,9 +107,9 @@ See the additional docs for:
|
|
107
107
|
|
108
108
|
- [cube](#cube)
|
109
109
|
- [pgvector](#pgvector)
|
110
|
-
- [sqlite-vec](#sqlite-vec)
|
111
110
|
- [MariaDB](#mariadb)
|
112
111
|
- [MySQL](#mysql)
|
112
|
+
- [sqlite-vec](#sqlite-vec)
|
113
113
|
|
114
114
|
Or check out some [examples](#examples)
|
115
115
|
|
@@ -216,7 +216,7 @@ Index vectors at half precision for smaller indexes
|
|
216
216
|
```ruby
|
217
217
|
class AddIndexToItemsEmbedding < ActiveRecord::Migration[8.0]
|
218
218
|
def change
|
219
|
-
add_index :items, "(embedding::halfvec(3))
|
219
|
+
add_index :items, "(embedding::halfvec(3)) halfvec_l2_ops", using: :hnsw
|
220
220
|
end
|
221
221
|
end
|
222
222
|
```
|
@@ -276,7 +276,7 @@ embedding = Neighbor::SparseVector.new({0 => 0.9, 1 => 1.3, 2 => 1.1}, 3)
|
|
276
276
|
Item.nearest_neighbors(:embedding, embedding, distance: "euclidean").first(5)
|
277
277
|
```
|
278
278
|
|
279
|
-
##
|
279
|
+
## MariaDB
|
280
280
|
|
281
281
|
### Distance
|
282
282
|
|
@@ -284,90 +284,64 @@ Supported values are:
|
|
284
284
|
|
285
285
|
- `euclidean`
|
286
286
|
- `cosine`
|
287
|
-
- `taxicab`
|
288
287
|
- `hamming`
|
289
288
|
|
290
|
-
###
|
289
|
+
### Indexing
|
291
290
|
|
292
|
-
|
291
|
+
Vector columns must use `null: false` to add a vector index
|
293
292
|
|
294
293
|
```ruby
|
295
|
-
class
|
296
|
-
|
294
|
+
class CreateItems < ActiveRecord::Migration[8.0]
|
295
|
+
def change
|
296
|
+
create_table :items do |t|
|
297
|
+
t.vector :embedding, limit: 3, null: false
|
298
|
+
t.index :embedding, type: :vector
|
299
|
+
end
|
300
|
+
end
|
297
301
|
end
|
298
302
|
```
|
299
303
|
|
300
|
-
###
|
304
|
+
### Binary Vectors
|
301
305
|
|
302
|
-
|
306
|
+
Use the `bigint` type to store binary vectors
|
303
307
|
|
304
308
|
```ruby
|
305
309
|
class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
|
306
310
|
def change
|
307
|
-
|
308
|
-
create_virtual_table :items, :vec0, [
|
309
|
-
"embedding float[3] distance_metric=L2"
|
310
|
-
]
|
311
|
-
|
312
|
-
# Rails < 8
|
313
|
-
execute <<~SQL
|
314
|
-
CREATE VIRTUAL TABLE items USING vec0(
|
315
|
-
embedding float[3] distance_metric=L2
|
316
|
-
)
|
317
|
-
SQL
|
311
|
+
add_column :items, :embedding, :bigint
|
318
312
|
end
|
319
313
|
end
|
320
314
|
```
|
321
315
|
|
322
|
-
|
323
|
-
|
324
|
-
You can optionally ignore any shadow tables that are created
|
325
|
-
|
326
|
-
```ruby
|
327
|
-
ActiveRecord::SchemaDumper.ignore_tables += [
|
328
|
-
"items_chunks", "items_rowids", "items_vector_chunks00"
|
329
|
-
]
|
330
|
-
```
|
331
|
-
|
332
|
-
Create a model with `rowid` as the primary key
|
333
|
-
|
334
|
-
```ruby
|
335
|
-
class Item < ApplicationRecord
|
336
|
-
self.primary_key = "rowid"
|
337
|
-
|
338
|
-
has_neighbors :embedding, dimensions: 3
|
339
|
-
end
|
340
|
-
```
|
316
|
+
Note: Binary vectors can have up to 64 dimensions
|
341
317
|
|
342
|
-
Get the
|
318
|
+
Get the nearest neighbors by Hamming distance
|
343
319
|
|
344
320
|
```ruby
|
345
|
-
Item.
|
321
|
+
Item.nearest_neighbors(:embedding, 5, distance: "hamming").first(5)
|
346
322
|
```
|
347
323
|
|
348
|
-
|
324
|
+
## MySQL
|
349
325
|
|
350
|
-
|
351
|
-
Item.where(rowid: [2, 3]).where("embedding MATCH ?", [1, 2, 3].to_s).where(k: 5).order(:distance)
|
352
|
-
```
|
326
|
+
### Distance
|
353
327
|
|
354
|
-
|
328
|
+
Supported values are:
|
355
329
|
|
356
|
-
|
330
|
+
- `euclidean`
|
331
|
+
- `cosine`
|
332
|
+
- `hamming`
|
357
333
|
|
358
|
-
|
359
|
-
class Item < ApplicationRecord
|
360
|
-
has_neighbors :embedding, dimensions: 3, type: :int8
|
361
|
-
end
|
362
|
-
```
|
334
|
+
Note: The `DISTANCE()` function is [only available on HeatWave](https://dev.mysql.com/doc/refman/9.0/en/vector-functions.html)
|
363
335
|
|
364
336
|
### Binary Vectors
|
365
337
|
|
366
|
-
Use the `
|
338
|
+
Use the `binary` type to store binary vectors
|
367
339
|
|
368
340
|
```ruby
|
369
|
-
class
|
370
|
-
|
341
|
+
class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
|
342
|
+
def change
|
343
|
+
add_column :items, :embedding, :binary
|
344
|
+
end
|
371
345
|
end
|
372
346
|
```
|
373
347
|
|
@@ -377,7 +351,7 @@ Get the nearest neighbors by Hamming distance
|
|
377
351
|
Item.nearest_neighbors(:embedding, "\x05", distance: "hamming").first(5)
|
378
352
|
```
|
379
353
|
|
380
|
-
##
|
354
|
+
## sqlite-vec
|
381
355
|
|
382
356
|
### Distance
|
383
357
|
|
@@ -385,64 +359,82 @@ Supported values are:
|
|
385
359
|
|
386
360
|
- `euclidean`
|
387
361
|
- `cosine`
|
362
|
+
- `taxicab`
|
388
363
|
- `hamming`
|
389
364
|
|
390
|
-
###
|
365
|
+
### Dimensions
|
391
366
|
|
392
|
-
|
367
|
+
For sqlite-vec, it’s a good idea to specify the number of dimensions to ensure all records have the same number.
|
393
368
|
|
394
369
|
```ruby
|
395
|
-
class
|
396
|
-
|
397
|
-
create_table :items do |t|
|
398
|
-
t.vector :embedding, limit: 3, null: false
|
399
|
-
t.index :embedding, type: :vector
|
400
|
-
end
|
401
|
-
end
|
370
|
+
class Item < ApplicationRecord
|
371
|
+
has_neighbors :embedding, dimensions: 3
|
402
372
|
end
|
403
373
|
```
|
404
374
|
|
405
|
-
###
|
375
|
+
### Virtual Tables
|
406
376
|
|
407
|
-
|
377
|
+
You can also use [virtual tables](https://alexgarcia.xyz/sqlite-vec/features/knn.html)
|
408
378
|
|
409
379
|
```ruby
|
410
380
|
class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
|
411
381
|
def change
|
412
|
-
|
382
|
+
# Rails 8+
|
383
|
+
create_virtual_table :items, :vec0, [
|
384
|
+
"id integer PRIMARY KEY AUTOINCREMENT NOT NULL",
|
385
|
+
"embedding float[3] distance_metric=L2"
|
386
|
+
]
|
387
|
+
|
388
|
+
# Rails < 8
|
389
|
+
execute <<~SQL
|
390
|
+
CREATE VIRTUAL TABLE items USING vec0(
|
391
|
+
id integer PRIMARY KEY AUTOINCREMENT NOT NULL,
|
392
|
+
embedding float[3] distance_metric=L2
|
393
|
+
)
|
394
|
+
SQL
|
413
395
|
end
|
414
396
|
end
|
415
397
|
```
|
416
398
|
|
417
|
-
|
399
|
+
Use `distance_metric=cosine` for cosine distance
|
418
400
|
|
419
|
-
|
401
|
+
You can optionally ignore any shadow tables that are created
|
420
402
|
|
421
403
|
```ruby
|
422
|
-
|
404
|
+
ActiveRecord::SchemaDumper.ignore_tables += [
|
405
|
+
"items_chunks", "items_rowids", "items_vector_chunks00"
|
406
|
+
]
|
423
407
|
```
|
424
408
|
|
425
|
-
|
409
|
+
Get the `k` nearest neighbors
|
426
410
|
|
427
|
-
|
411
|
+
```ruby
|
412
|
+
Item.where("embedding MATCH ?", [1, 2, 3].to_s).where(k: 5).order(:distance)
|
413
|
+
```
|
428
414
|
|
429
|
-
|
415
|
+
Filter by primary key
|
430
416
|
|
431
|
-
|
432
|
-
|
433
|
-
|
417
|
+
```ruby
|
418
|
+
Item.where(id: [2, 3]).where("embedding MATCH ?", [1, 2, 3].to_s).where(k: 5).order(:distance)
|
419
|
+
```
|
434
420
|
|
435
|
-
|
421
|
+
### Int8 Vectors
|
422
|
+
|
423
|
+
Use the `type` option for int8 vectors
|
424
|
+
|
425
|
+
```ruby
|
426
|
+
class Item < ApplicationRecord
|
427
|
+
has_neighbors :embedding, dimensions: 3, type: :int8
|
428
|
+
end
|
429
|
+
```
|
436
430
|
|
437
431
|
### Binary Vectors
|
438
432
|
|
439
|
-
Use the `
|
433
|
+
Use the `type` option for binary vectors
|
440
434
|
|
441
435
|
```ruby
|
442
|
-
class
|
443
|
-
|
444
|
-
add_column :items, :embedding, :binary
|
445
|
-
end
|
436
|
+
class Item < ApplicationRecord
|
437
|
+
has_neighbors :embedding, dimensions: 8, type: :bit
|
446
438
|
end
|
447
439
|
```
|
448
440
|
|
@@ -481,7 +473,7 @@ end
|
|
481
473
|
Create a method to call the [embeddings API](https://platform.openai.com/docs/guides/embeddings)
|
482
474
|
|
483
475
|
```ruby
|
484
|
-
def
|
476
|
+
def embed(input)
|
485
477
|
url = "https://api.openai.com/v1/embeddings"
|
486
478
|
headers = {
|
487
479
|
"Authorization" => "Bearer #{ENV.fetch("OPENAI_API_KEY")}",
|
@@ -505,7 +497,7 @@ input = [
|
|
505
497
|
"The cat is purring",
|
506
498
|
"The bear is growling"
|
507
499
|
]
|
508
|
-
embeddings =
|
500
|
+
embeddings = embed(input)
|
509
501
|
```
|
510
502
|
|
511
503
|
Store the embeddings
|
@@ -532,7 +524,7 @@ See the [complete code](examples/openai/example.rb)
|
|
532
524
|
Generate a model
|
533
525
|
|
534
526
|
```sh
|
535
|
-
rails generate model Document content:text embedding:bit{
|
527
|
+
rails generate model Document content:text embedding:bit{1536}
|
536
528
|
rails db:migrate
|
537
529
|
```
|
538
530
|
|
@@ -547,15 +539,15 @@ end
|
|
547
539
|
Create a method to call the [embed API](https://docs.cohere.com/reference/embed)
|
548
540
|
|
549
541
|
```ruby
|
550
|
-
def
|
551
|
-
url = "https://api.cohere.com/
|
542
|
+
def embed(input, input_type)
|
543
|
+
url = "https://api.cohere.com/v2/embed"
|
552
544
|
headers = {
|
553
545
|
"Authorization" => "Bearer #{ENV.fetch("CO_API_KEY")}",
|
554
546
|
"Content-Type" => "application/json"
|
555
547
|
}
|
556
548
|
data = {
|
557
549
|
texts: input,
|
558
|
-
model: "embed-
|
550
|
+
model: "embed-v4.0",
|
559
551
|
input_type: input_type,
|
560
552
|
embedding_types: ["ubinary"]
|
561
553
|
}
|
@@ -573,7 +565,7 @@ input = [
|
|
573
565
|
"The cat is purring",
|
574
566
|
"The bear is growling"
|
575
567
|
]
|
576
|
-
embeddings =
|
568
|
+
embeddings = embed(input, "search_document")
|
577
569
|
```
|
578
570
|
|
579
571
|
Store the embeddings
|
@@ -590,7 +582,7 @@ Embed the search query
|
|
590
582
|
|
591
583
|
```ruby
|
592
584
|
query = "forest"
|
593
|
-
query_embedding =
|
585
|
+
query_embedding = embed([query], "search_query")[0]
|
594
586
|
```
|
595
587
|
|
596
588
|
And search the documents
|
@@ -884,7 +876,7 @@ bundle exec rake test:postgresql
|
|
884
876
|
bundle exec rake test:sqlite
|
885
877
|
|
886
878
|
# MariaDB
|
887
|
-
docker run -e MARIADB_ALLOW_EMPTY_ROOT_PASSWORD=1 -e MARIADB_DATABASE=neighbor_test -p 3307:3306 mariadb:11.
|
879
|
+
docker run -e MARIADB_ALLOW_EMPTY_ROOT_PASSWORD=1 -e MARIADB_DATABASE=neighbor_test -p 3307:3306 mariadb:11.8
|
888
880
|
bundle exec rake test:mariadb
|
889
881
|
|
890
882
|
# MySQL
|
data/lib/neighbor/model.rb
CHANGED
@@ -40,16 +40,8 @@ module Neighbor
|
|
40
40
|
end
|
41
41
|
|
42
42
|
if normalize
|
43
|
-
|
44
|
-
|
45
|
-
normalizes attribute_name, with: ->(v) { Neighbor::Utils.normalize(v, column_info: columns_hash[attribute_name.to_s]) }
|
46
|
-
end
|
47
|
-
else
|
48
|
-
attribute_names.each do |attribute_name|
|
49
|
-
attribute attribute_name do |cast_type|
|
50
|
-
Neighbor::NormalizedAttribute.new(cast_type: cast_type, model: self, attribute_name: attribute_name)
|
51
|
-
end
|
52
|
-
end
|
43
|
+
attribute_names.each do |attribute_name|
|
44
|
+
normalizes attribute_name, with: ->(v) { Neighbor::Utils.normalize(v, column_info: columns_hash[attribute_name.to_s]) }
|
53
45
|
end
|
54
46
|
end
|
55
47
|
|
data/lib/neighbor/mysql.rb
CHANGED
@@ -15,9 +15,6 @@ module Neighbor
|
|
15
15
|
|
16
16
|
# prevent unknown OID warning
|
17
17
|
ActiveRecord::ConnectionAdapters::AbstractMysqlAdapter.singleton_class.prepend(RegisterTypes)
|
18
|
-
if ActiveRecord::VERSION::STRING.to_f < 7.1
|
19
|
-
ActiveRecord::ConnectionAdapters::AbstractMysqlAdapter.register_vector_type(ActiveRecord::ConnectionAdapters::AbstractMysqlAdapter::TYPE_MAP)
|
20
|
-
end
|
21
18
|
end
|
22
19
|
|
23
20
|
module RegisterTypes
|
data/lib/neighbor/postgresql.rb
CHANGED
@@ -19,6 +19,9 @@ module Neighbor
|
|
19
19
|
|
20
20
|
# prevent unknown OID warning
|
21
21
|
ActiveRecord::ConnectionAdapters::PostgreSQLAdapter.singleton_class.prepend(RegisterTypes)
|
22
|
+
|
23
|
+
# support vector[]/halfvec[]
|
24
|
+
ActiveRecord::ConnectionAdapters::PostgreSQL::OID::Array.prepend(ArrayMethods)
|
22
25
|
end
|
23
26
|
|
24
27
|
module RegisterTypes
|
@@ -39,5 +42,17 @@ module Neighbor
|
|
39
42
|
end
|
40
43
|
end
|
41
44
|
end
|
45
|
+
|
46
|
+
ArrayWrapper = Struct.new(:to_a)
|
47
|
+
|
48
|
+
module ArrayMethods
|
49
|
+
def type_cast_array(value, method, ...)
|
50
|
+
if (subtype.is_a?(Neighbor::Type::Vector) || subtype.is_a?(Neighbor::Type::Halfvec)) && method != :deserialize && value.is_a?(::Array) && value.all? { |v| v.is_a?(::Numeric) }
|
51
|
+
super(ArrayWrapper.new(value), method, ...)
|
52
|
+
else
|
53
|
+
super
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
42
57
|
end
|
43
58
|
end
|
data/lib/neighbor/sqlite.rb
CHANGED
@@ -18,7 +18,7 @@ module Neighbor
|
|
18
18
|
module InstanceMethods
|
19
19
|
def configure_connection
|
20
20
|
super
|
21
|
-
db =
|
21
|
+
db = @raw_connection
|
22
22
|
db.enable_load_extension(1)
|
23
23
|
SqliteVec.load(db)
|
24
24
|
db.enable_load_extension(0)
|
data/lib/neighbor/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: neighbor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
|
-
autorequire:
|
9
8
|
bindir: bin
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
12
|
- !ruby/object:Gem::Dependency
|
14
13
|
name: activerecord
|
@@ -16,15 +15,14 @@ dependencies:
|
|
16
15
|
requirements:
|
17
16
|
- - ">="
|
18
17
|
- !ruby/object:Gem::Version
|
19
|
-
version: '7'
|
18
|
+
version: '7.1'
|
20
19
|
type: :runtime
|
21
20
|
prerelease: false
|
22
21
|
version_requirements: !ruby/object:Gem::Requirement
|
23
22
|
requirements:
|
24
23
|
- - ">="
|
25
24
|
- !ruby/object:Gem::Version
|
26
|
-
version: '7'
|
27
|
-
description:
|
25
|
+
version: '7.1'
|
28
26
|
email: andrew@ankane.org
|
29
27
|
executables: []
|
30
28
|
extensions: []
|
@@ -62,7 +60,6 @@ homepage: https://github.com/ankane/neighbor
|
|
62
60
|
licenses:
|
63
61
|
- MIT
|
64
62
|
metadata: {}
|
65
|
-
post_install_message:
|
66
63
|
rdoc_options: []
|
67
64
|
require_paths:
|
68
65
|
- lib
|
@@ -70,15 +67,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
70
67
|
requirements:
|
71
68
|
- - ">="
|
72
69
|
- !ruby/object:Gem::Version
|
73
|
-
version: '3.
|
70
|
+
version: '3.2'
|
74
71
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
75
72
|
requirements:
|
76
73
|
- - ">="
|
77
74
|
- !ruby/object:Gem::Version
|
78
75
|
version: '0'
|
79
76
|
requirements: []
|
80
|
-
rubygems_version: 3.
|
81
|
-
signing_key:
|
77
|
+
rubygems_version: 3.6.7
|
82
78
|
specification_version: 4
|
83
79
|
summary: Nearest neighbor search for Rails
|
84
80
|
test_files: []
|