neighbor 0.5.2 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/LICENSE.txt +1 -1
- data/README.md +87 -87
- data/lib/neighbor/model.rb +2 -10
- data/lib/neighbor/mysql.rb +0 -3
- data/lib/neighbor/sqlite.rb +1 -1
- data/lib/neighbor/version.rb +1 -1
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9f4fa263cadd7a89f4f99faef6238db1f355597ce1510a84c722563a669c62d8
|
4
|
+
data.tar.gz: b3876b14c21276b0e2bfefa87d98265a3c766ae2b1b32459e085bcedfde8afcb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f90e43582600d41411c3a9e129ca03b606d08ece9bfc39227aab97e2c2af77950eb885cfa4900bef94eb8524d121af1b25823e495b99756ea8f04398d2497c8f
|
7
|
+
data.tar.gz: 17cb10ce768c76f1a0e273c887486f1e0a9a2b73f3d5d8758ebaada7aa12366e4d9e061b6a125b81cb36167013bf2a6f07dba786a5c0de4af99543e70421d961
|
data/CHANGELOG.md
CHANGED
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -5,9 +5,9 @@ Nearest neighbor search for Rails
|
|
5
5
|
Supports:
|
6
6
|
|
7
7
|
- Postgres (cube and pgvector)
|
8
|
-
-
|
9
|
-
- MariaDB 11.7 - experimental
|
8
|
+
- MariaDB 11.8
|
10
9
|
- MySQL 9 (searching requires HeatWave) - experimental
|
10
|
+
- SQLite (sqlite-vec) - experimental
|
11
11
|
|
12
12
|
[](https://github.com/ankane/neighbor/actions)
|
13
13
|
|
@@ -107,9 +107,9 @@ See the additional docs for:
|
|
107
107
|
|
108
108
|
- [cube](#cube)
|
109
109
|
- [pgvector](#pgvector)
|
110
|
-
- [sqlite-vec](#sqlite-vec)
|
111
110
|
- [MariaDB](#mariadb)
|
112
111
|
- [MySQL](#mysql)
|
112
|
+
- [sqlite-vec](#sqlite-vec)
|
113
113
|
|
114
114
|
Or check out some [examples](#examples)
|
115
115
|
|
@@ -216,7 +216,7 @@ Index vectors at half precision for smaller indexes
|
|
216
216
|
```ruby
|
217
217
|
class AddIndexToItemsEmbedding < ActiveRecord::Migration[8.0]
|
218
218
|
def change
|
219
|
-
add_index :items, "(embedding::halfvec(3))
|
219
|
+
add_index :items, "(embedding::halfvec(3)) halfvec_l2_ops", using: :hnsw
|
220
220
|
end
|
221
221
|
end
|
222
222
|
```
|
@@ -276,7 +276,7 @@ embedding = Neighbor::SparseVector.new({0 => 0.9, 1 => 1.3, 2 => 1.1}, 3)
|
|
276
276
|
Item.nearest_neighbors(:embedding, embedding, distance: "euclidean").first(5)
|
277
277
|
```
|
278
278
|
|
279
|
-
##
|
279
|
+
## MariaDB
|
280
280
|
|
281
281
|
### Distance
|
282
282
|
|
@@ -284,82 +284,64 @@ Supported values are:
|
|
284
284
|
|
285
285
|
- `euclidean`
|
286
286
|
- `cosine`
|
287
|
-
- `taxicab`
|
288
287
|
- `hamming`
|
289
288
|
|
290
|
-
###
|
289
|
+
### Indexing
|
291
290
|
|
292
|
-
|
291
|
+
Vector columns must use `null: false` to add a vector index
|
293
292
|
|
294
293
|
```ruby
|
295
|
-
class
|
296
|
-
|
294
|
+
class CreateItems < ActiveRecord::Migration[8.0]
|
295
|
+
def change
|
296
|
+
create_table :items do |t|
|
297
|
+
t.vector :embedding, limit: 3, null: false
|
298
|
+
t.index :embedding, type: :vector
|
299
|
+
end
|
300
|
+
end
|
297
301
|
end
|
298
302
|
```
|
299
303
|
|
300
|
-
###
|
304
|
+
### Binary Vectors
|
301
305
|
|
302
|
-
|
306
|
+
Use the `bigint` type to store binary vectors
|
303
307
|
|
304
308
|
```ruby
|
305
309
|
class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
|
306
310
|
def change
|
307
|
-
|
308
|
-
create_virtual_table :items, :vec0, [
|
309
|
-
"id integer PRIMARY KEY AUTOINCREMENT NOT NULL",
|
310
|
-
"embedding float[3] distance_metric=L2"
|
311
|
-
]
|
312
|
-
|
313
|
-
# Rails < 8
|
314
|
-
execute <<~SQL
|
315
|
-
CREATE VIRTUAL TABLE items USING vec0(
|
316
|
-
id integer PRIMARY KEY AUTOINCREMENT NOT NULL,
|
317
|
-
embedding float[3] distance_metric=L2
|
318
|
-
)
|
319
|
-
SQL
|
311
|
+
add_column :items, :embedding, :bigint
|
320
312
|
end
|
321
313
|
end
|
322
314
|
```
|
323
315
|
|
324
|
-
|
325
|
-
|
326
|
-
You can optionally ignore any shadow tables that are created
|
327
|
-
|
328
|
-
```ruby
|
329
|
-
ActiveRecord::SchemaDumper.ignore_tables += [
|
330
|
-
"items_chunks", "items_rowids", "items_vector_chunks00"
|
331
|
-
]
|
332
|
-
```
|
316
|
+
Note: Binary vectors can have up to 64 dimensions
|
333
317
|
|
334
|
-
Get the
|
318
|
+
Get the nearest neighbors by Hamming distance
|
335
319
|
|
336
320
|
```ruby
|
337
|
-
Item.
|
321
|
+
Item.nearest_neighbors(:embedding, 5, distance: "hamming").first(5)
|
338
322
|
```
|
339
323
|
|
340
|
-
|
324
|
+
## MySQL
|
341
325
|
|
342
|
-
|
343
|
-
Item.where(id: [2, 3]).where("embedding MATCH ?", [1, 2, 3].to_s).where(k: 5).order(:distance)
|
344
|
-
```
|
326
|
+
### Distance
|
345
327
|
|
346
|
-
|
328
|
+
Supported values are:
|
347
329
|
|
348
|
-
|
330
|
+
- `euclidean`
|
331
|
+
- `cosine`
|
332
|
+
- `hamming`
|
349
333
|
|
350
|
-
|
351
|
-
class Item < ApplicationRecord
|
352
|
-
has_neighbors :embedding, dimensions: 3, type: :int8
|
353
|
-
end
|
354
|
-
```
|
334
|
+
Note: The `DISTANCE()` function is [only available on HeatWave](https://dev.mysql.com/doc/refman/9.0/en/vector-functions.html)
|
355
335
|
|
356
336
|
### Binary Vectors
|
357
337
|
|
358
|
-
Use the `
|
338
|
+
Use the `binary` type to store binary vectors
|
359
339
|
|
360
340
|
```ruby
|
361
|
-
class
|
362
|
-
|
341
|
+
class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
|
342
|
+
def change
|
343
|
+
add_column :items, :embedding, :binary
|
344
|
+
end
|
363
345
|
end
|
364
346
|
```
|
365
347
|
|
@@ -369,7 +351,7 @@ Get the nearest neighbors by Hamming distance
|
|
369
351
|
Item.nearest_neighbors(:embedding, "\x05", distance: "hamming").first(5)
|
370
352
|
```
|
371
353
|
|
372
|
-
##
|
354
|
+
## sqlite-vec
|
373
355
|
|
374
356
|
### Distance
|
375
357
|
|
@@ -377,64 +359,82 @@ Supported values are:
|
|
377
359
|
|
378
360
|
- `euclidean`
|
379
361
|
- `cosine`
|
362
|
+
- `taxicab`
|
380
363
|
- `hamming`
|
381
364
|
|
382
|
-
###
|
365
|
+
### Dimensions
|
383
366
|
|
384
|
-
|
367
|
+
For sqlite-vec, it’s a good idea to specify the number of dimensions to ensure all records have the same number.
|
385
368
|
|
386
369
|
```ruby
|
387
|
-
class
|
388
|
-
|
389
|
-
create_table :items do |t|
|
390
|
-
t.vector :embedding, limit: 3, null: false
|
391
|
-
t.index :embedding, type: :vector
|
392
|
-
end
|
393
|
-
end
|
370
|
+
class Item < ApplicationRecord
|
371
|
+
has_neighbors :embedding, dimensions: 3
|
394
372
|
end
|
395
373
|
```
|
396
374
|
|
397
|
-
###
|
375
|
+
### Virtual Tables
|
398
376
|
|
399
|
-
|
377
|
+
You can also use [virtual tables](https://alexgarcia.xyz/sqlite-vec/features/knn.html)
|
400
378
|
|
401
379
|
```ruby
|
402
380
|
class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
|
403
381
|
def change
|
404
|
-
|
382
|
+
# Rails 8+
|
383
|
+
create_virtual_table :items, :vec0, [
|
384
|
+
"id integer PRIMARY KEY AUTOINCREMENT NOT NULL",
|
385
|
+
"embedding float[3] distance_metric=L2"
|
386
|
+
]
|
387
|
+
|
388
|
+
# Rails < 8
|
389
|
+
execute <<~SQL
|
390
|
+
CREATE VIRTUAL TABLE items USING vec0(
|
391
|
+
id integer PRIMARY KEY AUTOINCREMENT NOT NULL,
|
392
|
+
embedding float[3] distance_metric=L2
|
393
|
+
)
|
394
|
+
SQL
|
405
395
|
end
|
406
396
|
end
|
407
397
|
```
|
408
398
|
|
409
|
-
|
399
|
+
Use `distance_metric=cosine` for cosine distance
|
410
400
|
|
411
|
-
|
401
|
+
You can optionally ignore any shadow tables that are created
|
412
402
|
|
413
403
|
```ruby
|
414
|
-
|
404
|
+
ActiveRecord::SchemaDumper.ignore_tables += [
|
405
|
+
"items_chunks", "items_rowids", "items_vector_chunks00"
|
406
|
+
]
|
415
407
|
```
|
416
408
|
|
417
|
-
|
409
|
+
Get the `k` nearest neighbors
|
418
410
|
|
419
|
-
|
411
|
+
```ruby
|
412
|
+
Item.where("embedding MATCH ?", [1, 2, 3].to_s).where(k: 5).order(:distance)
|
413
|
+
```
|
420
414
|
|
421
|
-
|
415
|
+
Filter by primary key
|
422
416
|
|
423
|
-
|
424
|
-
|
425
|
-
|
417
|
+
```ruby
|
418
|
+
Item.where(id: [2, 3]).where("embedding MATCH ?", [1, 2, 3].to_s).where(k: 5).order(:distance)
|
419
|
+
```
|
426
420
|
|
427
|
-
|
421
|
+
### Int8 Vectors
|
422
|
+
|
423
|
+
Use the `type` option for int8 vectors
|
424
|
+
|
425
|
+
```ruby
|
426
|
+
class Item < ApplicationRecord
|
427
|
+
has_neighbors :embedding, dimensions: 3, type: :int8
|
428
|
+
end
|
429
|
+
```
|
428
430
|
|
429
431
|
### Binary Vectors
|
430
432
|
|
431
|
-
Use the `
|
433
|
+
Use the `type` option for binary vectors
|
432
434
|
|
433
435
|
```ruby
|
434
|
-
class
|
435
|
-
|
436
|
-
add_column :items, :embedding, :binary
|
437
|
-
end
|
436
|
+
class Item < ApplicationRecord
|
437
|
+
has_neighbors :embedding, dimensions: 8, type: :bit
|
438
438
|
end
|
439
439
|
```
|
440
440
|
|
@@ -473,7 +473,7 @@ end
|
|
473
473
|
Create a method to call the [embeddings API](https://platform.openai.com/docs/guides/embeddings)
|
474
474
|
|
475
475
|
```ruby
|
476
|
-
def
|
476
|
+
def embed(input)
|
477
477
|
url = "https://api.openai.com/v1/embeddings"
|
478
478
|
headers = {
|
479
479
|
"Authorization" => "Bearer #{ENV.fetch("OPENAI_API_KEY")}",
|
@@ -497,7 +497,7 @@ input = [
|
|
497
497
|
"The cat is purring",
|
498
498
|
"The bear is growling"
|
499
499
|
]
|
500
|
-
embeddings =
|
500
|
+
embeddings = embed(input)
|
501
501
|
```
|
502
502
|
|
503
503
|
Store the embeddings
|
@@ -524,7 +524,7 @@ See the [complete code](examples/openai/example.rb)
|
|
524
524
|
Generate a model
|
525
525
|
|
526
526
|
```sh
|
527
|
-
rails generate model Document content:text embedding:bit{
|
527
|
+
rails generate model Document content:text embedding:bit{1536}
|
528
528
|
rails db:migrate
|
529
529
|
```
|
530
530
|
|
@@ -539,15 +539,15 @@ end
|
|
539
539
|
Create a method to call the [embed API](https://docs.cohere.com/reference/embed)
|
540
540
|
|
541
541
|
```ruby
|
542
|
-
def
|
543
|
-
url = "https://api.cohere.com/
|
542
|
+
def embed(input, input_type)
|
543
|
+
url = "https://api.cohere.com/v2/embed"
|
544
544
|
headers = {
|
545
545
|
"Authorization" => "Bearer #{ENV.fetch("CO_API_KEY")}",
|
546
546
|
"Content-Type" => "application/json"
|
547
547
|
}
|
548
548
|
data = {
|
549
549
|
texts: input,
|
550
|
-
model: "embed-
|
550
|
+
model: "embed-v4.0",
|
551
551
|
input_type: input_type,
|
552
552
|
embedding_types: ["ubinary"]
|
553
553
|
}
|
@@ -565,7 +565,7 @@ input = [
|
|
565
565
|
"The cat is purring",
|
566
566
|
"The bear is growling"
|
567
567
|
]
|
568
|
-
embeddings =
|
568
|
+
embeddings = embed(input, "search_document")
|
569
569
|
```
|
570
570
|
|
571
571
|
Store the embeddings
|
@@ -582,7 +582,7 @@ Embed the search query
|
|
582
582
|
|
583
583
|
```ruby
|
584
584
|
query = "forest"
|
585
|
-
query_embedding =
|
585
|
+
query_embedding = embed([query], "search_query")[0]
|
586
586
|
```
|
587
587
|
|
588
588
|
And search the documents
|
@@ -876,7 +876,7 @@ bundle exec rake test:postgresql
|
|
876
876
|
bundle exec rake test:sqlite
|
877
877
|
|
878
878
|
# MariaDB
|
879
|
-
docker run -e MARIADB_ALLOW_EMPTY_ROOT_PASSWORD=1 -e MARIADB_DATABASE=neighbor_test -p 3307:3306 mariadb:11.
|
879
|
+
docker run -e MARIADB_ALLOW_EMPTY_ROOT_PASSWORD=1 -e MARIADB_DATABASE=neighbor_test -p 3307:3306 mariadb:11.8
|
880
880
|
bundle exec rake test:mariadb
|
881
881
|
|
882
882
|
# MySQL
|
data/lib/neighbor/model.rb
CHANGED
@@ -40,16 +40,8 @@ module Neighbor
|
|
40
40
|
end
|
41
41
|
|
42
42
|
if normalize
|
43
|
-
|
44
|
-
|
45
|
-
normalizes attribute_name, with: ->(v) { Neighbor::Utils.normalize(v, column_info: columns_hash[attribute_name.to_s]) }
|
46
|
-
end
|
47
|
-
else
|
48
|
-
attribute_names.each do |attribute_name|
|
49
|
-
attribute attribute_name do |cast_type|
|
50
|
-
Neighbor::NormalizedAttribute.new(cast_type: cast_type, model: self, attribute_name: attribute_name)
|
51
|
-
end
|
52
|
-
end
|
43
|
+
attribute_names.each do |attribute_name|
|
44
|
+
normalizes attribute_name, with: ->(v) { Neighbor::Utils.normalize(v, column_info: columns_hash[attribute_name.to_s]) }
|
53
45
|
end
|
54
46
|
end
|
55
47
|
|
data/lib/neighbor/mysql.rb
CHANGED
@@ -15,9 +15,6 @@ module Neighbor
|
|
15
15
|
|
16
16
|
# prevent unknown OID warning
|
17
17
|
ActiveRecord::ConnectionAdapters::AbstractMysqlAdapter.singleton_class.prepend(RegisterTypes)
|
18
|
-
if ActiveRecord::VERSION::STRING.to_f < 7.1
|
19
|
-
ActiveRecord::ConnectionAdapters::AbstractMysqlAdapter.register_vector_type(ActiveRecord::ConnectionAdapters::AbstractMysqlAdapter::TYPE_MAP)
|
20
|
-
end
|
21
18
|
end
|
22
19
|
|
23
20
|
module RegisterTypes
|
data/lib/neighbor/sqlite.rb
CHANGED
@@ -18,7 +18,7 @@ module Neighbor
|
|
18
18
|
module InstanceMethods
|
19
19
|
def configure_connection
|
20
20
|
super
|
21
|
-
db =
|
21
|
+
db = @raw_connection
|
22
22
|
db.enable_load_extension(1)
|
23
23
|
SqliteVec.load(db)
|
24
24
|
db.enable_load_extension(0)
|
data/lib/neighbor/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: neighbor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date:
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: activerecord
|
@@ -15,14 +15,14 @@ dependencies:
|
|
15
15
|
requirements:
|
16
16
|
- - ">="
|
17
17
|
- !ruby/object:Gem::Version
|
18
|
-
version: '7'
|
18
|
+
version: '7.1'
|
19
19
|
type: :runtime
|
20
20
|
prerelease: false
|
21
21
|
version_requirements: !ruby/object:Gem::Requirement
|
22
22
|
requirements:
|
23
23
|
- - ">="
|
24
24
|
- !ruby/object:Gem::Version
|
25
|
-
version: '7'
|
25
|
+
version: '7.1'
|
26
26
|
email: andrew@ankane.org
|
27
27
|
executables: []
|
28
28
|
extensions: []
|
@@ -67,14 +67,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
67
67
|
requirements:
|
68
68
|
- - ">="
|
69
69
|
- !ruby/object:Gem::Version
|
70
|
-
version: '3.
|
70
|
+
version: '3.2'
|
71
71
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
75
|
version: '0'
|
76
76
|
requirements: []
|
77
|
-
rubygems_version: 3.6.
|
77
|
+
rubygems_version: 3.6.7
|
78
78
|
specification_version: 4
|
79
79
|
summary: Nearest neighbor search for Rails
|
80
80
|
test_files: []
|