neighbor 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +136 -89
- data/lib/neighbor/model.rb +3 -1
- data/lib/neighbor/postgresql.rb +1 -1
- data/lib/neighbor/sqlite.rb +120 -8
- data/lib/neighbor/utils.rb +36 -7
- data/lib/neighbor/version.rb +1 -1
- data/lib/neighbor.rb +1 -0
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 1154f0138248270d73d1681962c7bfecc8f81e074104a24071d32d76d5405705
|
|
4
|
+
data.tar.gz: 23c823cc022efcfebf9533d0388dd3abaec0fca61cc7e0911a2972dfb3bf429b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 67cc38e53cb43b8775561dd234cf073d2d45115b1c517aa32f890ab9f3d059106915158ee8bc08d2af9091e2107273ba9a65da6c66a1646afc419b62a8a669e0
|
|
7
|
+
data.tar.gz: 63cf2a9765043884726a59a6c058730d004be2b15bed0565be70579ef1717b322bb424e1311ced9a5685c8f45b7521333b0c66cca42c83cd52cb94cd13a7a5fd
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
|
@@ -4,10 +4,10 @@ Nearest neighbor search for Rails
|
|
|
4
4
|
|
|
5
5
|
Supports:
|
|
6
6
|
|
|
7
|
-
- Postgres (
|
|
7
|
+
- Postgres (pgvector and cube)
|
|
8
8
|
- MariaDB 11.8
|
|
9
9
|
- MySQL 9 (searching requires HeatWave) - experimental
|
|
10
|
-
- SQLite
|
|
10
|
+
- SQLite - experimental
|
|
11
11
|
|
|
12
12
|
Also available for [Redis](https://github.com/ankane/neighbor-redis) and [S3 Vectors](https://github.com/ankane/neighbor-s3)
|
|
13
13
|
|
|
@@ -23,14 +23,7 @@ gem "neighbor"
|
|
|
23
23
|
|
|
24
24
|
### For Postgres
|
|
25
25
|
|
|
26
|
-
Neighbor supports two extensions: [
|
|
27
|
-
|
|
28
|
-
For cube, run:
|
|
29
|
-
|
|
30
|
-
```sh
|
|
31
|
-
rails generate neighbor:cube
|
|
32
|
-
rails db:migrate
|
|
33
|
-
```
|
|
26
|
+
Neighbor supports two extensions for Postgres: [pgvector](https://github.com/pgvector/pgvector) and [cube](https://www.postgresql.org/docs/current/cube.html). cube ships with Postgres, while pgvector supports more dimensions and approximate nearest neighbor search.
|
|
34
27
|
|
|
35
28
|
For pgvector, [install the extension](https://github.com/pgvector/pgvector#installation) and run:
|
|
36
29
|
|
|
@@ -39,18 +32,11 @@ rails generate neighbor:vector
|
|
|
39
32
|
rails db:migrate
|
|
40
33
|
```
|
|
41
34
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
Add this line to your application’s Gemfile:
|
|
45
|
-
|
|
46
|
-
```ruby
|
|
47
|
-
gem "sqlite-vec"
|
|
48
|
-
```
|
|
49
|
-
|
|
50
|
-
And run:
|
|
35
|
+
For cube, run:
|
|
51
36
|
|
|
52
37
|
```sh
|
|
53
|
-
rails generate neighbor:
|
|
38
|
+
rails generate neighbor:cube
|
|
39
|
+
rails db:migrate
|
|
54
40
|
```
|
|
55
41
|
|
|
56
42
|
## Getting Started
|
|
@@ -60,13 +46,13 @@ Create a migration
|
|
|
60
46
|
```ruby
|
|
61
47
|
class AddEmbeddingToItems < ActiveRecord::Migration[8.1]
|
|
62
48
|
def change
|
|
63
|
-
# cube
|
|
64
|
-
add_column :items, :embedding, :cube
|
|
65
|
-
|
|
66
49
|
# pgvector, MariaDB, and MySQL
|
|
67
50
|
add_column :items, :embedding, :vector, limit: 3 # dimensions
|
|
68
51
|
|
|
69
|
-
#
|
|
52
|
+
# cube
|
|
53
|
+
add_column :items, :embedding, :cube
|
|
54
|
+
|
|
55
|
+
# SQLite
|
|
70
56
|
add_column :items, :embedding, :binary
|
|
71
57
|
end
|
|
72
58
|
end
|
|
@@ -107,47 +93,14 @@ nearest_item.neighbor_distance
|
|
|
107
93
|
|
|
108
94
|
See the additional docs for:
|
|
109
95
|
|
|
110
|
-
- [cube](#cube)
|
|
111
96
|
- [pgvector](#pgvector)
|
|
97
|
+
- [cube](#cube)
|
|
112
98
|
- [MariaDB](#mariadb)
|
|
113
99
|
- [MySQL](#mysql)
|
|
114
|
-
- [
|
|
100
|
+
- [SQLite](#sqlite)
|
|
115
101
|
|
|
116
102
|
Or check out some [examples](#examples)
|
|
117
103
|
|
|
118
|
-
## cube
|
|
119
|
-
|
|
120
|
-
### Distance
|
|
121
|
-
|
|
122
|
-
Supported values are:
|
|
123
|
-
|
|
124
|
-
- `euclidean`
|
|
125
|
-
- `cosine`
|
|
126
|
-
- `taxicab`
|
|
127
|
-
- `chebyshev`
|
|
128
|
-
|
|
129
|
-
For cosine distance with cube, vectors must be normalized before being stored.
|
|
130
|
-
|
|
131
|
-
```ruby
|
|
132
|
-
class Item < ApplicationRecord
|
|
133
|
-
has_neighbors :embedding, normalize: true
|
|
134
|
-
end
|
|
135
|
-
```
|
|
136
|
-
|
|
137
|
-
For inner product with cube, see [this example](examples/disco/user_recs_cube.rb).
|
|
138
|
-
|
|
139
|
-
### Dimensions
|
|
140
|
-
|
|
141
|
-
The `cube` type can have up to 100 dimensions by default. See the [Postgres docs](https://www.postgresql.org/docs/current/cube.html) for how to increase this.
|
|
142
|
-
|
|
143
|
-
For cube, it’s a good idea to specify the number of dimensions to ensure all records have the same number.
|
|
144
|
-
|
|
145
|
-
```ruby
|
|
146
|
-
class Item < ApplicationRecord
|
|
147
|
-
has_neighbors :embedding, dimensions: 3
|
|
148
|
-
end
|
|
149
|
-
```
|
|
150
|
-
|
|
151
104
|
## pgvector
|
|
152
105
|
|
|
153
106
|
### Distance
|
|
@@ -278,6 +231,39 @@ embedding = Neighbor::SparseVector.new({0 => 0.9, 1 => 1.3, 2 => 1.1}, 3)
|
|
|
278
231
|
Item.nearest_neighbors(:embedding, embedding, distance: "euclidean").first(5)
|
|
279
232
|
```
|
|
280
233
|
|
|
234
|
+
## cube
|
|
235
|
+
|
|
236
|
+
### Distance
|
|
237
|
+
|
|
238
|
+
Supported values are:
|
|
239
|
+
|
|
240
|
+
- `euclidean`
|
|
241
|
+
- `cosine`
|
|
242
|
+
- `taxicab`
|
|
243
|
+
- `chebyshev`
|
|
244
|
+
|
|
245
|
+
For cosine distance with cube, vectors must be normalized before being stored.
|
|
246
|
+
|
|
247
|
+
```ruby
|
|
248
|
+
class Item < ApplicationRecord
|
|
249
|
+
has_neighbors :embedding, normalize: true
|
|
250
|
+
end
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
For inner product with cube, see [this example](examples/disco/user_recs_cube.rb).
|
|
254
|
+
|
|
255
|
+
### Dimensions
|
|
256
|
+
|
|
257
|
+
The `cube` type can have up to 100 dimensions by default. See the [Postgres docs](https://www.postgresql.org/docs/current/cube.html) for how to increase this.
|
|
258
|
+
|
|
259
|
+
For cube, it’s a good idea to specify the number of dimensions to ensure all records have the same number.
|
|
260
|
+
|
|
261
|
+
```ruby
|
|
262
|
+
class Item < ApplicationRecord
|
|
263
|
+
has_neighbors :embedding, dimensions: 3
|
|
264
|
+
end
|
|
265
|
+
```
|
|
266
|
+
|
|
281
267
|
## MariaDB
|
|
282
268
|
|
|
283
269
|
### Distance
|
|
@@ -353,20 +339,22 @@ Get the nearest neighbors by Hamming distance
|
|
|
353
339
|
Item.nearest_neighbors(:embedding, "\x05", distance: "hamming").first(5)
|
|
354
340
|
```
|
|
355
341
|
|
|
356
|
-
##
|
|
342
|
+
## SQLite
|
|
357
343
|
|
|
358
344
|
### Distance
|
|
359
345
|
|
|
360
346
|
Supported values are:
|
|
361
347
|
|
|
362
348
|
- `euclidean`
|
|
349
|
+
- `inner_product`
|
|
363
350
|
- `cosine`
|
|
364
351
|
- `taxicab`
|
|
365
352
|
- `hamming`
|
|
353
|
+
- `jaccard`
|
|
366
354
|
|
|
367
355
|
### Dimensions
|
|
368
356
|
|
|
369
|
-
For
|
|
357
|
+
For SQLite, it’s a good idea to specify the number of dimensions to ensure all records have the same number.
|
|
370
358
|
|
|
371
359
|
```ruby
|
|
372
360
|
class Item < ApplicationRecord
|
|
@@ -374,12 +362,97 @@ class Item < ApplicationRecord
|
|
|
374
362
|
end
|
|
375
363
|
```
|
|
376
364
|
|
|
377
|
-
###
|
|
365
|
+
### Int8 Vectors
|
|
366
|
+
|
|
367
|
+
Use the `type` option for int8 vectors
|
|
368
|
+
|
|
369
|
+
```ruby
|
|
370
|
+
class Item < ApplicationRecord
|
|
371
|
+
has_neighbors :embedding, dimensions: 3, type: :int8
|
|
372
|
+
end
|
|
373
|
+
```
|
|
374
|
+
|
|
375
|
+
### Binary Vectors
|
|
376
|
+
|
|
377
|
+
Use the `type` option for binary vectors
|
|
378
|
+
|
|
379
|
+
```ruby
|
|
380
|
+
class Item < ApplicationRecord
|
|
381
|
+
has_neighbors :embedding, dimensions: 8, type: :bit
|
|
382
|
+
end
|
|
383
|
+
```
|
|
384
|
+
|
|
385
|
+
Get the nearest neighbors by Hamming distance
|
|
386
|
+
|
|
387
|
+
```ruby
|
|
388
|
+
Item.nearest_neighbors(:embedding, "\x05", distance: "hamming").first(5)
|
|
389
|
+
```
|
|
390
|
+
|
|
391
|
+
### SQLite Extensions
|
|
392
|
+
|
|
393
|
+
Improve performance with extensions:
|
|
394
|
+
|
|
395
|
+
- [Vec1](#vec1)
|
|
396
|
+
- [sqlite-vec](#sqlite-vec)
|
|
397
|
+
|
|
398
|
+
### Vec1
|
|
399
|
+
|
|
400
|
+
For [Vec1](https://sqlite.org/vec1/doc/trunk/doc/vec1.md), [build the extension](https://sqlite.org/vec1/doc/trunk/doc/vec1.md#2-building-the-extension) and create `config/initializers/neighbor.rb` with:
|
|
401
|
+
|
|
402
|
+
```ruby
|
|
403
|
+
Neighbor::SQLite.initialize!(extension: "/path/to/vec1.so")
|
|
404
|
+
```
|
|
405
|
+
|
|
406
|
+
This speeds up `euclidean` and `cosine` distance
|
|
407
|
+
|
|
408
|
+
You can also use [virtual tables](https://sqlite.org/vec1/doc/trunk/doc/vec1intro.md#1-using-the-virtual-table)
|
|
409
|
+
|
|
410
|
+
```ruby
|
|
411
|
+
class CreateItems < ActiveRecord::Migration[8.1]
|
|
412
|
+
def change
|
|
413
|
+
# Rails 8+
|
|
414
|
+
create_virtual_table :items, :vec1, ["embedding", "id"]
|
|
415
|
+
|
|
416
|
+
# Rails < 8
|
|
417
|
+
execute "CREATE VIRTUAL TABLE items USING vec1(embedding, id)"
|
|
418
|
+
end
|
|
419
|
+
end
|
|
420
|
+
```
|
|
421
|
+
|
|
422
|
+
You can optionally ignore any shadow tables that are created
|
|
423
|
+
|
|
424
|
+
```ruby
|
|
425
|
+
ActiveRecord::SchemaDumper.ignore_tables += [
|
|
426
|
+
"items_base", "items_config", "items_idx", "items_meta", "items_model"
|
|
427
|
+
]
|
|
428
|
+
```
|
|
429
|
+
|
|
430
|
+
Get the `k` nearest neighbors
|
|
431
|
+
|
|
432
|
+
```ruby
|
|
433
|
+
Item.find_by_sql("SELECT * FROM items(vec1_from_json(?), ?)", [[1, 2, 3].to_json, {k: 5}.to_json])
|
|
434
|
+
```
|
|
435
|
+
|
|
436
|
+
### sqlite-vec
|
|
437
|
+
|
|
438
|
+
For [sqlite-vec](https://github.com/asg017/sqlite-vec), add this line to your application’s Gemfile:
|
|
439
|
+
|
|
440
|
+
```ruby
|
|
441
|
+
gem "sqlite-vec"
|
|
442
|
+
```
|
|
443
|
+
|
|
444
|
+
And run:
|
|
445
|
+
|
|
446
|
+
```sh
|
|
447
|
+
rails generate neighbor:sqlite
|
|
448
|
+
```
|
|
449
|
+
|
|
450
|
+
This speeds up `euclidean`, `cosine`, `taxicab`, and `hamming` distance
|
|
378
451
|
|
|
379
452
|
You can also use [virtual tables](https://alexgarcia.xyz/sqlite-vec/features/knn.html)
|
|
380
453
|
|
|
381
454
|
```ruby
|
|
382
|
-
class
|
|
455
|
+
class CreateItems < ActiveRecord::Migration[8.1]
|
|
383
456
|
def change
|
|
384
457
|
# Rails 8+
|
|
385
458
|
create_virtual_table :items, :vec0, [
|
|
@@ -420,32 +493,6 @@ Filter by primary key
|
|
|
420
493
|
Item.where(id: [2, 3]).where("embedding MATCH ?", [1, 2, 3].to_s).where(k: 5).order(:distance)
|
|
421
494
|
```
|
|
422
495
|
|
|
423
|
-
### Int8 Vectors
|
|
424
|
-
|
|
425
|
-
Use the `type` option for int8 vectors
|
|
426
|
-
|
|
427
|
-
```ruby
|
|
428
|
-
class Item < ApplicationRecord
|
|
429
|
-
has_neighbors :embedding, dimensions: 3, type: :int8
|
|
430
|
-
end
|
|
431
|
-
```
|
|
432
|
-
|
|
433
|
-
### Binary Vectors
|
|
434
|
-
|
|
435
|
-
Use the `type` option for binary vectors
|
|
436
|
-
|
|
437
|
-
```ruby
|
|
438
|
-
class Item < ApplicationRecord
|
|
439
|
-
has_neighbors :embedding, dimensions: 8, type: :bit
|
|
440
|
-
end
|
|
441
|
-
```
|
|
442
|
-
|
|
443
|
-
Get the nearest neighbors by Hamming distance
|
|
444
|
-
|
|
445
|
-
```ruby
|
|
446
|
-
Item.nearest_neighbors(:embedding, "\x05", distance: "hamming").first(5)
|
|
447
|
-
```
|
|
448
|
-
|
|
449
496
|
## Examples
|
|
450
497
|
|
|
451
498
|
- [Embeddings](#openai-embeddings) with OpenAI
|
data/lib/neighbor/model.rb
CHANGED
|
@@ -127,8 +127,10 @@ module Neighbor
|
|
|
127
127
|
neighbor_distance =
|
|
128
128
|
if distance == "cosine" && normalize_required
|
|
129
129
|
"POWER(#{order}, 2) / 2.0"
|
|
130
|
-
elsif
|
|
130
|
+
elsif distance == "inner_product"
|
|
131
131
|
"(#{order}) * -1"
|
|
132
|
+
elsif adapter == :sqlite && order.start_with?("vec1_l2_distance")
|
|
133
|
+
"sqrt(#{order})"
|
|
132
134
|
else
|
|
133
135
|
order
|
|
134
136
|
end
|
data/lib/neighbor/postgresql.rb
CHANGED
|
@@ -47,7 +47,7 @@ module Neighbor
|
|
|
47
47
|
|
|
48
48
|
module ArrayMethods
|
|
49
49
|
def type_cast_array(value, method, ...)
|
|
50
|
-
if (subtype.is_a?(Neighbor::Type::Vector) || subtype.is_a?(Neighbor::Type::Halfvec)) && method != :deserialize && value.is_a?(::Array) && value.all?
|
|
50
|
+
if (subtype.is_a?(Neighbor::Type::Vector) || subtype.is_a?(Neighbor::Type::Halfvec)) && method != :deserialize && value.is_a?(::Array) && value.all?(::Numeric)
|
|
51
51
|
super(ArrayWrapper.new(value), method, ...)
|
|
52
52
|
else
|
|
53
53
|
super
|
data/lib/neighbor/sqlite.rb
CHANGED
|
@@ -1,27 +1,139 @@
|
|
|
1
1
|
module Neighbor
|
|
2
2
|
module SQLite
|
|
3
|
+
class << self
|
|
4
|
+
attr_reader :extensions
|
|
5
|
+
end
|
|
6
|
+
|
|
3
7
|
# note: this is a public API (unlike PostgreSQL and MySQL)
|
|
4
|
-
def self.initialize!
|
|
5
|
-
|
|
8
|
+
def self.initialize!(extension: :sqlite_vec)
|
|
9
|
+
if extension == :sqlite_vec
|
|
10
|
+
require "sqlite_vec"
|
|
11
|
+
elsif !extension.is_a?(String)
|
|
12
|
+
raise ArgumentError, "Unsupported extension"
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
(@extensions ||= []) << extension
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def self.initialize_adapter!
|
|
19
|
+
@extensions ||= []
|
|
6
20
|
|
|
7
21
|
require_relative "type/sqlite_vector"
|
|
8
22
|
require_relative "type/sqlite_int8_vector"
|
|
9
23
|
|
|
10
|
-
require "sqlite_vec"
|
|
11
24
|
require "active_record/connection_adapters/sqlite3_adapter"
|
|
12
|
-
|
|
13
25
|
ActiveRecord::ConnectionAdapters::SQLite3Adapter.prepend(InstanceMethods)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def self.vec1?
|
|
29
|
+
extensions.any?(String)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def self.sqlite_vec?
|
|
33
|
+
extensions.include?(:sqlite_vec)
|
|
34
|
+
end
|
|
14
35
|
|
|
15
|
-
|
|
36
|
+
def self.setup_functions(db)
|
|
37
|
+
db.create_function("neighbor_l2_distance", 2) do |func, a, b, c|
|
|
38
|
+
func.result =
|
|
39
|
+
if a.nil? || b.nil?
|
|
40
|
+
nil
|
|
41
|
+
else
|
|
42
|
+
raise SQLite3::SQLException, "different vector dimensions" if a.bytesize != b.bytesize
|
|
43
|
+
fmt = c == 1 ? "c*" : "f*"
|
|
44
|
+
a = a.unpack(fmt)
|
|
45
|
+
b = b.unpack(fmt)
|
|
46
|
+
Math.sqrt(a.zip(b).sum { |ai, bi| (ai - bi)**2 })
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
db.create_function("neighbor_max_inner_product", 2) do |func, a, b, c|
|
|
51
|
+
func.result =
|
|
52
|
+
if a.nil? || b.nil?
|
|
53
|
+
nil
|
|
54
|
+
else
|
|
55
|
+
raise SQLite3::SQLException, "different vector dimensions" if a.bytesize != b.bytesize
|
|
56
|
+
fmt = c == 1 ? "c*" : "f*"
|
|
57
|
+
a = a.unpack(fmt)
|
|
58
|
+
b = b.unpack(fmt)
|
|
59
|
+
-a.zip(b).sum { |ai, bi| ai * bi }
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
db.create_function("neighbor_cosine_distance", 2) do |func, a, b, c|
|
|
64
|
+
func.result =
|
|
65
|
+
if a.nil? || b.nil?
|
|
66
|
+
nil
|
|
67
|
+
else
|
|
68
|
+
raise SQLite3::SQLException, "different vector dimensions" if a.bytesize != b.bytesize
|
|
69
|
+
fmt = c == 1 ? "c*" : "f*"
|
|
70
|
+
a = a.unpack(fmt)
|
|
71
|
+
b = b.unpack(fmt)
|
|
72
|
+
similarity = a.zip(b).sum { |ai, bi| ai * bi }
|
|
73
|
+
norma = a.sum { |v| v * v }
|
|
74
|
+
normb = b.sum { |v| v * v }
|
|
75
|
+
1.0 - (similarity / Math.sqrt(norma * normb)).clamp(-1.0, 1.0)
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
db.create_function("neighbor_l1_distance", 2) do |func, a, b, c|
|
|
80
|
+
func.result =
|
|
81
|
+
if a.nil? || b.nil?
|
|
82
|
+
nil
|
|
83
|
+
else
|
|
84
|
+
raise SQLite3::SQLException, "different vector dimensions" if a.bytesize != b.bytesize
|
|
85
|
+
fmt = c == 1 ? "c*" : "f*"
|
|
86
|
+
a = a.unpack(fmt)
|
|
87
|
+
b = b.unpack(fmt)
|
|
88
|
+
a.zip(b).sum { |ai, bi| (ai - bi).abs }
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
db.create_function("neighbor_hamming_distance", 2) do |func, a, b|
|
|
93
|
+
func.result =
|
|
94
|
+
if a.nil? || b.nil?
|
|
95
|
+
nil
|
|
96
|
+
else
|
|
97
|
+
raise SQLite3::SQLException, "different vector dimensions" if a.bytesize != b.bytesize
|
|
98
|
+
# TODO improve
|
|
99
|
+
a.each_byte.zip(b.each_byte).sum { |ai, bi| (ai ^ bi).to_s(2).count("1") }
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
db.create_function("neighbor_jaccard_distance", 2) do |func, a, b|
|
|
104
|
+
func.result =
|
|
105
|
+
if a.nil? || b.nil?
|
|
106
|
+
nil
|
|
107
|
+
else
|
|
108
|
+
raise SQLite3::SQLException, "different vector dimensions" if a.bytesize != b.bytesize
|
|
109
|
+
# TODO improve
|
|
110
|
+
ab = a.each_byte.zip(b.each_byte).sum { |ai, bi| (ai & bi).to_s(2).count("1") }
|
|
111
|
+
aa = a.unpack1("B*").count("1")
|
|
112
|
+
bb = b.unpack1("B*").count("1")
|
|
113
|
+
ab == 0 ? 1.0 : 1.0 - (ab / (aa + bb - ab).to_f)
|
|
114
|
+
end
|
|
115
|
+
end
|
|
16
116
|
end
|
|
17
117
|
|
|
18
118
|
module InstanceMethods
|
|
19
119
|
def configure_connection
|
|
20
120
|
super
|
|
21
121
|
db = @raw_connection
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
122
|
+
SQLite.setup_functions(db)
|
|
123
|
+
if SQLite.extensions.any?
|
|
124
|
+
db.enable_load_extension(1)
|
|
125
|
+
begin
|
|
126
|
+
SQLite.extensions.each do |extension|
|
|
127
|
+
if extension == :sqlite_vec
|
|
128
|
+
SqliteVec.load(db)
|
|
129
|
+
else
|
|
130
|
+
db.load_extension(extension)
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
ensure
|
|
134
|
+
db.enable_load_extension(0)
|
|
135
|
+
end
|
|
136
|
+
end
|
|
25
137
|
end
|
|
26
138
|
end
|
|
27
139
|
end
|
data/lib/neighbor/utils.rb
CHANGED
|
@@ -76,13 +76,37 @@ module Neighbor
|
|
|
76
76
|
when :sqlite
|
|
77
77
|
case distance
|
|
78
78
|
when "euclidean"
|
|
79
|
-
|
|
79
|
+
if SQLite.vec1?
|
|
80
|
+
"vec1_l2_distance"
|
|
81
|
+
elsif SQLite.sqlite_vec?
|
|
82
|
+
"vec_distance_L2"
|
|
83
|
+
else
|
|
84
|
+
"neighbor_l2_distance"
|
|
85
|
+
end
|
|
80
86
|
when "cosine"
|
|
81
|
-
|
|
87
|
+
if SQLite.vec1?
|
|
88
|
+
"vec1_cos_distance"
|
|
89
|
+
elsif SQLite.sqlite_vec?
|
|
90
|
+
"vec_distance_cosine"
|
|
91
|
+
else
|
|
92
|
+
"neighbor_cosine_distance"
|
|
93
|
+
end
|
|
82
94
|
when "taxicab"
|
|
83
|
-
|
|
95
|
+
if SQLite.sqlite_vec?
|
|
96
|
+
"vec_distance_L1"
|
|
97
|
+
else
|
|
98
|
+
"neighbor_l1_distance"
|
|
99
|
+
end
|
|
100
|
+
when "inner_product"
|
|
101
|
+
"neighbor_max_inner_product"
|
|
84
102
|
when "hamming"
|
|
85
|
-
|
|
103
|
+
if SQLite.sqlite_vec?
|
|
104
|
+
"vec_distance_hamming"
|
|
105
|
+
else
|
|
106
|
+
"neighbor_hamming_distance"
|
|
107
|
+
end
|
|
108
|
+
when "jaccard"
|
|
109
|
+
"neighbor_jaccard_distance"
|
|
86
110
|
end
|
|
87
111
|
when :mariadb
|
|
88
112
|
case column_type
|
|
@@ -158,10 +182,15 @@ module Neighbor
|
|
|
158
182
|
def self.order(adapter, type, operator, quoted_attribute, query)
|
|
159
183
|
case adapter
|
|
160
184
|
when :sqlite
|
|
161
|
-
|
|
162
|
-
|
|
185
|
+
if operator.start_with?("neighbor")
|
|
186
|
+
if type == :bit
|
|
187
|
+
"#{operator}(#{quoted_attribute}, #{query})"
|
|
188
|
+
else
|
|
189
|
+
"#{operator}(#{quoted_attribute}, #{query}, #{type == :int8 ? 1 : 0})"
|
|
190
|
+
end
|
|
191
|
+
elsif type == :int8
|
|
163
192
|
"#{operator}(vec_int8(#{quoted_attribute}), vec_int8(#{query}))"
|
|
164
|
-
|
|
193
|
+
elsif type == :bit
|
|
165
194
|
"#{operator}(vec_bit(#{quoted_attribute}), vec_bit(#{query}))"
|
|
166
195
|
else
|
|
167
196
|
"#{operator}(#{quoted_attribute}, #{query})"
|
data/lib/neighbor/version.rb
CHANGED
data/lib/neighbor.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: neighbor
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.1.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
@@ -74,7 +74,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
74
74
|
- !ruby/object:Gem::Version
|
|
75
75
|
version: '0'
|
|
76
76
|
requirements: []
|
|
77
|
-
rubygems_version: 4.0.
|
|
77
|
+
rubygems_version: 4.0.6
|
|
78
78
|
specification_version: 4
|
|
79
79
|
summary: Nearest neighbor search for Rails
|
|
80
80
|
test_files: []
|