neighbor 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5d7036a69b1c57161eaeb11e38feee92c1d5082ddbe8907a83ac3126adf9ae56
4
- data.tar.gz: c88e4400b75d2a87f766f7e0b7ff6c5311d9c7866d32e7c2b9aa7601f60c474f
3
+ metadata.gz: fb8418a71159a849442d146643f732c28d10a5237d94a5b5b7d7466a224a40b5
4
+ data.tar.gz: 8a7ededb3071fdef4a77bbbdf4596d74ed323327d4d29ea15538e344d25af2f7
5
5
  SHA512:
6
- metadata.gz: d3c4c25404fb64f324fbba70edcf06d827d3708905ed4a84404a6c9ce39f27b6890d449b285ce302e24495a666c34f0bf3050270b54ef8d283f53ebeb19e4e91
7
- data.tar.gz: 63927a8801a88edd48f74ce85d056d7112fa526b37d473b232256b5f2d47e5254b34b25e0312afc01bfedcd6a9d7826496f208e0ab0d3f57c3307fa298b8984e
6
+ metadata.gz: 05b3b8ccd07570f531aef0132a80563aef046b88508e732b2f16fe02f6c0303bd6f6fdb00804a646845086ba7fd133358503a7c7b69b098c02f548434c859575
7
+ data.tar.gz: 9bcb61d4e1453e30daeab1fff0364c023278405d170b435cdfb262aac0aa0d91622e19471fbf3f3076e4c944d09318dc81cf334fdac7767fa6f72d95c8560907
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ ## 0.5.2 (2025-01-05)
2
+
3
+ - Improved support for Postgres arrays
4
+
5
+ ## 0.5.1 (2024-12-03)
6
+
7
+ - Added experimental support for MariaDB 11.7
8
+ - Dropped experimental support for MariaDB 11.6 Vector
9
+
1
10
  ## 0.5.0 (2024-10-07)
2
11
 
3
12
  - Added experimental support for SQLite (sqlite-vec)
data/README.md CHANGED
@@ -6,7 +6,7 @@ Supports:
6
6
 
7
7
  - Postgres (cube and pgvector)
8
8
  - SQLite (sqlite-vec) - experimental
9
- - MariaDB 11.6 Vector - experimental
9
+ - MariaDB 11.7 - experimental
10
10
  - MySQL 9 (searching requires HeatWave) - experimental
11
11
 
12
12
  [![Build Status](https://github.com/ankane/neighbor/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/neighbor/actions)
@@ -56,15 +56,15 @@ rails generate neighbor:sqlite
56
56
  Create a migration
57
57
 
58
58
  ```ruby
59
- class AddEmbeddingToItems < ActiveRecord::Migration[7.2]
59
+ class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
60
60
  def change
61
61
  # cube
62
62
  add_column :items, :embedding, :cube
63
63
 
64
- # pgvector and MySQL
64
+ # pgvector, MariaDB, and MySQL
65
65
  add_column :items, :embedding, :vector, limit: 3 # dimensions
66
66
 
67
- # sqlite-vec and MariaDB
67
+ # sqlite-vec
68
68
  add_column :items, :embedding, :binary
69
69
  end
70
70
  end
@@ -174,7 +174,7 @@ The `sparsevec` type can have up to 16,000 non-zero elements, and sparse vectors
174
174
  Add an approximate index to speed up queries. Create a migration with:
175
175
 
176
176
  ```ruby
177
- class AddIndexToItemsEmbedding < ActiveRecord::Migration[7.2]
177
+ class AddIndexToItemsEmbedding < ActiveRecord::Migration[8.0]
178
178
  def change
179
179
  add_index :items, :embedding, using: :hnsw, opclass: :vector_l2_ops
180
180
  # or
@@ -202,7 +202,7 @@ Item.connection.execute("SET ivfflat.probes = 3")
202
202
  Use the `halfvec` type to store half-precision vectors
203
203
 
204
204
  ```ruby
205
- class AddEmbeddingToItems < ActiveRecord::Migration[7.2]
205
+ class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
206
206
  def change
207
207
  add_column :items, :embedding, :halfvec, limit: 3 # dimensions
208
208
  end
@@ -214,7 +214,7 @@ end
214
214
  Index vectors at half precision for smaller indexes
215
215
 
216
216
  ```ruby
217
- class AddIndexToItemsEmbedding < ActiveRecord::Migration[7.2]
217
+ class AddIndexToItemsEmbedding < ActiveRecord::Migration[8.0]
218
218
  def change
219
219
  add_index :items, "(embedding::halfvec(3)) vector_l2_ops", using: :hnsw
220
220
  end
@@ -232,7 +232,7 @@ Item.nearest_neighbors(:embedding, [0.9, 1.3, 1.1], distance: "euclidean", preci
232
232
  Use the `bit` type to store binary vectors
233
233
 
234
234
  ```ruby
235
- class AddEmbeddingToItems < ActiveRecord::Migration[7.2]
235
+ class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
236
236
  def change
237
237
  add_column :items, :embedding, :bit, limit: 3 # dimensions
238
238
  end
@@ -250,7 +250,7 @@ Item.nearest_neighbors(:embedding, "101", distance: "hamming").first(5)
250
250
  Use expression indexing for binary quantization
251
251
 
252
252
  ```ruby
253
- class AddIndexToItemsEmbedding < ActiveRecord::Migration[7.2]
253
+ class AddIndexToItemsEmbedding < ActiveRecord::Migration[8.0]
254
254
  def change
255
255
  add_index :items, "(binary_quantize(embedding)::bit(3)) bit_hamming_ops", using: :hnsw
256
256
  end
@@ -262,7 +262,7 @@ end
262
262
  Use the `sparsevec` type to store sparse vectors
263
263
 
264
264
  ```ruby
265
- class AddEmbeddingToItems < ActiveRecord::Migration[7.2]
265
+ class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
266
266
  def change
267
267
  add_column :items, :embedding, :sparsevec, limit: 3 # dimensions
268
268
  end
@@ -302,19 +302,21 @@ end
302
302
  You can also use [virtual tables](https://alexgarcia.xyz/sqlite-vec/features/knn.html)
303
303
 
304
304
  ```ruby
305
- class AddEmbeddingToItems < ActiveRecord::Migration[7.2]
305
+ class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
306
306
  def change
307
+ # Rails 8+
308
+ create_virtual_table :items, :vec0, [
309
+ "id integer PRIMARY KEY AUTOINCREMENT NOT NULL",
310
+ "embedding float[3] distance_metric=L2"
311
+ ]
312
+
307
313
  # Rails < 8
308
314
  execute <<~SQL
309
315
  CREATE VIRTUAL TABLE items USING vec0(
316
+ id integer PRIMARY KEY AUTOINCREMENT NOT NULL,
310
317
  embedding float[3] distance_metric=L2
311
318
  )
312
319
  SQL
313
-
314
- # Rails 8+
315
- create_virtual_table :items, :vec0, [
316
- "embedding float[3] distance_metric=L2"
317
- ]
318
320
  end
319
321
  end
320
322
  ```
@@ -329,16 +331,6 @@ ActiveRecord::SchemaDumper.ignore_tables += [
329
331
  ]
330
332
  ```
331
333
 
332
- Create a model with `rowid` as the primary key
333
-
334
- ```ruby
335
- class Item < ApplicationRecord
336
- self.primary_key = "rowid"
337
-
338
- has_neighbors :embedding, dimensions: 3
339
- end
340
- ```
341
-
342
334
  Get the `k` nearest neighbors
343
335
 
344
336
  ```ruby
@@ -348,7 +340,7 @@ Item.where("embedding MATCH ?", [1, 2, 3].to_s).where(k: 5).order(:distance)
348
340
  Filter by primary key
349
341
 
350
342
  ```ruby
351
- Item.where(rowid: [2, 3]).where("embedding MATCH ?", [1, 2, 3].to_s).where(k: 5).order(:distance)
343
+ Item.where(id: [2, 3]).where("embedding MATCH ?", [1, 2, 3].to_s).where(k: 5).order(:distance)
352
344
  ```
353
345
 
354
346
  ### Int8 Vectors
@@ -387,23 +379,15 @@ Supported values are:
387
379
  - `cosine`
388
380
  - `hamming`
389
381
 
390
- For cosine distance with MariaDB, vectors must be normalized before being stored.
391
-
392
- ```ruby
393
- class Item < ApplicationRecord
394
- has_neighbors :embedding, normalize: true
395
- end
396
- ```
397
-
398
382
  ### Indexing
399
383
 
400
384
  Vector columns must use `null: false` to add a vector index
401
385
 
402
386
  ```ruby
403
- class CreateItems < ActiveRecord::Migration[7.2]
387
+ class CreateItems < ActiveRecord::Migration[8.0]
404
388
  def change
405
389
  create_table :items do |t|
406
- t.binary :embedding, null: false
390
+ t.vector :embedding, limit: 3, null: false
407
391
  t.index :embedding, type: :vector
408
392
  end
409
393
  end
@@ -415,7 +399,7 @@ end
415
399
  Use the `bigint` type to store binary vectors
416
400
 
417
401
  ```ruby
418
- class AddEmbeddingToItems < ActiveRecord::Migration[7.2]
402
+ class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
419
403
  def change
420
404
  add_column :items, :embedding, :bigint
421
405
  end
@@ -447,7 +431,7 @@ Note: The `DISTANCE()` function is [only available on HeatWave](https://dev.mysq
447
431
  Use the `binary` type to store binary vectors
448
432
 
449
433
  ```ruby
450
- class AddEmbeddingToItems < ActiveRecord::Migration[7.2]
434
+ class AddEmbeddingToItems < ActiveRecord::Migration[8.0]
451
435
  def change
452
436
  add_column :items, :embedding, :binary
453
437
  end
@@ -892,7 +876,7 @@ bundle exec rake test:postgresql
892
876
  bundle exec rake test:sqlite
893
877
 
894
878
  # MariaDB
895
- docker run -e MARIADB_ALLOW_EMPTY_ROOT_PASSWORD=1 -e MARIADB_DATABASE=neighbor_test -p 3307:3306 quay.io/mariadb-foundation/mariadb-devel:11.6-vector-preview
879
+ docker run -e MARIADB_ALLOW_EMPTY_ROOT_PASSWORD=1 -e MARIADB_DATABASE=neighbor_test -p 3307:3306 mariadb:11.7-rc
896
880
  bundle exec rake test:mariadb
897
881
 
898
882
  # MySQL
@@ -19,6 +19,9 @@ module Neighbor
19
19
 
20
20
  # prevent unknown OID warning
21
21
  ActiveRecord::ConnectionAdapters::PostgreSQLAdapter.singleton_class.prepend(RegisterTypes)
22
+
23
+ # support vector[]/halfvec[]
24
+ ActiveRecord::ConnectionAdapters::PostgreSQL::OID::Array.prepend(ArrayMethods)
22
25
  end
23
26
 
24
27
  module RegisterTypes
@@ -39,5 +42,17 @@ module Neighbor
39
42
  end
40
43
  end
41
44
  end
45
+
46
+ ArrayWrapper = Struct.new(:to_a)
47
+
48
+ module ArrayMethods
49
+ def type_cast_array(value, method, ...)
50
+ if (subtype.is_a?(Neighbor::Type::Vector) || subtype.is_a?(Neighbor::Type::Halfvec)) && method != :deserialize && value.is_a?(::Array) && value.all? { |v| v.is_a?(::Numeric) }
51
+ super(ArrayWrapper.new(value), method, ...)
52
+ else
53
+ super
54
+ end
55
+ end
56
+ end
42
57
  end
43
58
  end
@@ -33,7 +33,7 @@ module Neighbor
33
33
  def self.normalize(value, column_info:)
34
34
  return nil if value.nil?
35
35
 
36
- raise Error, "Normalize not supported for type" unless [:cube, :vector, :halfvec, :binary].include?(column_info&.type)
36
+ raise Error, "Normalize not supported for type" unless [:cube, :vector, :halfvec].include?(column_info&.type)
37
37
 
38
38
  norm = Math.sqrt(value.sum { |v| v * v })
39
39
 
@@ -86,10 +86,12 @@ module Neighbor
86
86
  end
87
87
  when :mariadb
88
88
  case column_type
89
- when :binary
89
+ when :vector
90
90
  case distance
91
- when "euclidean", "cosine"
92
- "VEC_DISTANCE"
91
+ when "euclidean"
92
+ "VEC_DISTANCE_EUCLIDEAN"
93
+ when "cosine"
94
+ "VEC_DISTANCE_COSINE"
93
95
  end
94
96
  when :integer
95
97
  case distance
@@ -168,7 +170,7 @@ module Neighbor
168
170
  if operator == "BIT_COUNT"
169
171
  "BIT_COUNT(#{quoted_attribute} ^ #{query})"
170
172
  else
171
- "VEC_DISTANCE(#{quoted_attribute}, #{query})"
173
+ "#{operator}(#{quoted_attribute}, #{query})"
172
174
  end
173
175
  when :mysql
174
176
  if operator == "BIT_COUNT"
@@ -191,8 +193,6 @@ module Neighbor
191
193
  case adapter
192
194
  when :postgresql
193
195
  column_type == :cube
194
- when :mariadb
195
- true
196
196
  else
197
197
  false
198
198
  end
@@ -1,3 +1,3 @@
1
1
  module Neighbor
2
- VERSION = "0.5.0"
2
+ VERSION = "0.5.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: neighbor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2024-10-08 00:00:00.000000000 Z
10
+ date: 2025-01-05 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: activerecord
@@ -24,7 +23,6 @@ dependencies:
24
23
  - - ">="
25
24
  - !ruby/object:Gem::Version
26
25
  version: '7'
27
- description:
28
26
  email: andrew@ankane.org
29
27
  executables: []
30
28
  extensions: []
@@ -62,7 +60,6 @@ homepage: https://github.com/ankane/neighbor
62
60
  licenses:
63
61
  - MIT
64
62
  metadata: {}
65
- post_install_message:
66
63
  rdoc_options: []
67
64
  require_paths:
68
65
  - lib
@@ -77,8 +74,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
77
74
  - !ruby/object:Gem::Version
78
75
  version: '0'
79
76
  requirements: []
80
- rubygems_version: 3.5.16
81
- signing_key:
77
+ rubygems_version: 3.6.2
82
78
  specification_version: 4
83
79
  summary: Nearest neighbor search for Rails
84
80
  test_files: []