neighbor 0.3.2 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0c8b5d19222742f33f51f2c30f9d03108ebd3ed99908a7e9dd5f4e49caa2e225
4
- data.tar.gz: c9cfa942f2cdd8b9757c9ecfe5e89d0aced11263f8a559004ee15fa0c8adb3f4
3
+ metadata.gz: '09edc5a7eebbf6b14f06cb51340c5def49117a318340b4d2265321a8ce6a0bec'
4
+ data.tar.gz: fc8c8319cf715612f195836c84861eb327765355a0430f2d58fb5ab57857844e
5
5
  SHA512:
6
- metadata.gz: e9e0050031ce7691baa9242b3b6b5aa76afb1fe7c63575129e68b2f5c027143b3c08f68a7babfcf2a9b02f1d9327679f75e9c40b95ac2245ea7c8dd3025d3cdb
7
- data.tar.gz: a9c505740cba454437617733d4025360848a16ef9a4c9c83fc16d5bc82a3e5521c77e3cba874ef3cf318cf3a1e319567958a6156481f7fd82ef72ebaa87d97eb
6
+ metadata.gz: caa86d17e8a3f710988486264434767c33f8b197f9a8721d6dc762235a0bc959d5c186670f7518b9d628a771454861df1beb603a175ec804aa67cf6eb9e14361
7
+ data.tar.gz: 3ac9d60c57cc3e82b617820f205282b42684517070de22af5d94878959ef00e3758fb88f821ba3d3f2369602919a41d1706314f77436c8b3e5ef95acc38e3c17
data/CHANGELOG.md CHANGED
@@ -1,3 +1,16 @@
1
+ ## 0.4.0 (2024-06-25)
2
+
3
+ - Added support for `halfvec` and `sparsevec` types
4
+ - Added support for `taxicab`, `hamming`, and `jaccard` distances with `vector` extension
5
+ - Added deserialization for `cube` and `vector` columns without `has_neighbor`
6
+ - Added support for composite primary keys
7
+ - Changed `nearest_neighbors` to replace previous `order` scopes
8
+ - Changed `normalize` option to use `before_save` callback
9
+ - Changed dimensions and finite values checks to use Active Record validations
10
+ - Fixed issue with `nearest_neighbors` scope overriding `select` values
11
+ - Removed default attribute name
12
+ - Dropped support for Ruby < 3.1
13
+
1
14
  ## 0.3.2 (2023-12-12)
2
15
 
3
16
  - Added deprecation warning for `has_neighbors` without an attribute name
data/LICENSE.txt CHANGED
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2021-2023 Andrew Kane
3
+ Copyright (c) 2021-2024 Andrew Kane
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Nearest neighbor search for Rails and Postgres
4
4
 
5
- [![Build Status](https://github.com/ankane/neighbor/workflows/build/badge.svg?branch=master)](https://github.com/ankane/neighbor/actions)
5
+ [![Build Status](https://github.com/ankane/neighbor/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/neighbor/actions)
6
6
 
7
7
  ## Installation
8
8
 
@@ -116,25 +116,25 @@ For vector, add an approximate index to speed up queries. Create a migration wit
116
116
  ```ruby
117
117
  class AddIndexToItemsEmbedding < ActiveRecord::Migration[7.1]
118
118
  def change
119
- add_index :items, :embedding, using: :ivfflat, opclass: :vector_l2_ops
120
- # or with pgvector 0.5.0+
121
119
  add_index :items, :embedding, using: :hnsw, opclass: :vector_l2_ops
120
+ # or
121
+ add_index :items, :embedding, using: :ivfflat, opclass: :vector_l2_ops
122
122
  end
123
123
  end
124
124
  ```
125
125
 
126
126
  Use `:vector_cosine_ops` for cosine distance and `:vector_ip_ops` for inner product.
127
127
 
128
- Set the number of probes with IVFFlat
128
+ Set the size of the dynamic candidate list with HNSW
129
129
 
130
130
  ```ruby
131
- Item.connection.execute("SET ivfflat.probes = 3")
131
+ Item.connection.execute("SET hnsw.ef_search = 100")
132
132
  ```
133
133
 
134
- Or the size of the dynamic candidate list with HNSW
134
+ Or the number of probes with IVFFlat
135
135
 
136
136
  ```ruby
137
- Item.connection.execute("SET hnsw.ef_search = 100")
137
+ Item.connection.execute("SET ivfflat.probes = 3")
138
138
  ```
139
139
 
140
140
  ## Examples
@@ -242,7 +242,7 @@ movies = []
242
242
  recommender.item_ids.each do |item_id|
243
243
  movies << {name: item_id, factors: recommender.item_factors(item_id)}
244
244
  end
245
- Movie.insert_all!(movies) # use create! for Active Record < 6
245
+ Movie.insert_all!(movies)
246
246
  ```
247
247
 
248
248
  And get similar movies
@@ -286,10 +286,5 @@ git clone https://github.com/ankane/neighbor.git
286
286
  cd neighbor
287
287
  bundle install
288
288
  createdb neighbor_test
289
-
290
- # cube
291
289
  bundle exec rake test
292
-
293
- # vector
294
- EXT=vector bundle exec rake test
295
290
  ```
@@ -1,3 +1,4 @@
1
+ require "rails/generators"
1
2
  require "rails/generators/active_record"
2
3
 
3
4
  module Neighbor
@@ -1,3 +1,4 @@
1
+ require "rails/generators"
1
2
  require "rails/generators/active_record"
2
3
 
3
4
  module Neighbor
@@ -2,11 +2,9 @@ module Neighbor
2
2
  module Model
3
3
  def has_neighbors(*attribute_names, dimensions: nil, normalize: nil)
4
4
  if attribute_names.empty?
5
- warn "[neighbor] has_neighbors without an attribute name is deprecated"
6
- attribute_names << :neighbor_vector
7
- else
8
- attribute_names.map!(&:to_sym)
5
+ raise ArgumentError, "has_neighbors requires an attribute name"
9
6
  end
7
+ attribute_names.map!(&:to_sym)
10
8
 
11
9
  class_eval do
12
10
  @neighbor_attributes ||= {}
@@ -27,30 +25,45 @@ module Neighbor
27
25
  attribute_names.each do |attribute_name|
28
26
  raise Error, "has_neighbors already called for #{attribute_name.inspect}" if neighbor_attributes[attribute_name]
29
27
  @neighbor_attributes[attribute_name] = {dimensions: dimensions, normalize: normalize}
30
-
31
- attribute attribute_name, Neighbor::Vector.new(dimensions: dimensions, normalize: normalize, model: self, attribute_name: attribute_name)
32
28
  end
33
29
 
34
30
  return if @neighbor_attributes.size != attribute_names.size
35
31
 
36
- scope :nearest_neighbors, ->(attribute_name, vector = nil, options = nil) {
37
- # cannot use keyword arguments with scope with Ruby 3.2 and Active Record 6.1
38
- # https://github.com/rails/rails/issues/46934
39
- if options.nil? && vector.is_a?(Hash)
40
- options = vector
41
- vector = nil
32
+ validate do
33
+ self.class.neighbor_attributes.each do |k, v|
34
+ value = read_attribute(k)
35
+ next if value.nil?
36
+
37
+ column_info = self.class.columns_hash[k.to_s]
38
+ dimensions = v[:dimensions] || column_info&.limit
39
+
40
+ if !Neighbor::Utils.validate_dimensions(value, column_info&.type, dimensions).nil?
41
+ errors.add(k, "must have #{dimensions} dimensions")
42
+ end
43
+ if !Neighbor::Utils.validate_finite(value, column_info&.type)
44
+ errors.add(k, "must have finite values")
45
+ end
42
46
  end
47
+ end
48
+
49
+ # TODO move to normalizes when Active Record < 7.1 no longer supported
50
+ before_save do
51
+ self.class.neighbor_attributes.each do |k, v|
52
+ next unless v[:normalize]
53
+ value = read_attribute(k)
54
+ next if value.nil?
55
+ self[k] = Neighbor::Utils.normalize(value, column_info: self.class.columns_hash[k.to_s])
56
+ end
57
+ end
58
+
59
+ # cannot use keyword arguments with scope with Ruby 3.2 and Active Record 6.1
60
+ # https://github.com/rails/rails/issues/46934
61
+ scope :nearest_neighbors, ->(attribute_name, vector, options = nil) {
43
62
  raise ArgumentError, "missing keyword: :distance" unless options.is_a?(Hash) && options.key?(:distance)
44
63
  distance = options.delete(:distance)
45
64
  raise ArgumentError, "unknown keywords: #{options.keys.map(&:inspect).join(", ")}" if options.any?
46
65
 
47
- if vector.nil? && !attribute_name.nil? && attribute_name.respond_to?(:to_a)
48
- warn "[neighbor] nearest_neighbors without an attribute name is deprecated"
49
- vector = attribute_name
50
- attribute_name = :neighbor_vector
51
- end
52
66
  attribute_name = attribute_name.to_sym
53
-
54
67
  options = neighbor_attributes[attribute_name]
55
68
  raise ArgumentError, "Invalid attribute" unless options
56
69
  normalize = options[:normalize]
@@ -62,10 +75,21 @@ module Neighbor
62
75
 
63
76
  quoted_attribute = "#{connection.quote_table_name(table_name)}.#{connection.quote_column_name(attribute_name)}"
64
77
 
65
- column_info = klass.type_for_attribute(attribute_name).column_info
78
+ column_info = columns_hash[attribute_name.to_s]
79
+ column_type = column_info&.type
66
80
 
67
81
  operator =
68
- if column_info[:type] == :vector
82
+ case column_type
83
+ when :bit
84
+ case distance
85
+ when "hamming"
86
+ "<~>"
87
+ when "jaccard"
88
+ "<%>"
89
+ when "hamming2"
90
+ "#"
91
+ end
92
+ when :vector, :halfvec, :sparsevec
69
93
  case distance
70
94
  when "inner_product"
71
95
  "<#>"
@@ -73,8 +97,10 @@ module Neighbor
73
97
  "<=>"
74
98
  when "euclidean"
75
99
  "<->"
100
+ when "taxicab"
101
+ "<+>"
76
102
  end
77
- else
103
+ when :cube
78
104
  case distance
79
105
  when "taxicab"
80
106
  "<#>"
@@ -83,27 +109,27 @@ module Neighbor
83
109
  when "euclidean", "cosine"
84
110
  "<->"
85
111
  end
112
+ else
113
+ raise ArgumentError, "Unsupported type: #{column_type}"
86
114
  end
87
115
 
88
116
  raise ArgumentError, "Invalid distance: #{distance}" unless operator
89
117
 
90
118
  # ensure normalize set (can be true or false)
91
- if distance == "cosine" && column_info[:type] == :cube && normalize.nil?
119
+ if distance == "cosine" && column_type == :cube && normalize.nil?
92
120
  raise Neighbor::Error, "Set normalize for cosine distance with cube"
93
121
  end
94
122
 
95
- vector = Neighbor::Vector.cast(vector, dimensions: dimensions, normalize: normalize, column_info: column_info)
96
-
97
- # important! neighbor_vector should already be typecast
98
- # but use to_f as extra safeguard against SQL injection
99
- query =
100
- if column_info[:type] == :vector
101
- connection.quote("[#{vector.map(&:to_f).join(", ")}]")
102
- else
103
- "cube(array[#{vector.map(&:to_f).join(", ")}])"
104
- end
123
+ column_attribute = klass.type_for_attribute(attribute_name)
124
+ vector = column_attribute.cast(vector)
125
+ Neighbor::Utils.validate(vector, dimensions: dimensions, column_info: column_info)
126
+ vector = Neighbor::Utils.normalize(vector, column_info: column_info) if normalize
105
127
 
128
+ query = connection.quote(column_attribute.serialize(vector))
106
129
  order = "#{quoted_attribute} #{operator} #{query}"
130
+ if operator == "#"
131
+ order = "bit_count(#{order})"
132
+ end
107
133
 
108
134
  # https://stats.stackexchange.com/questions/146221/is-cosine-similarity-identical-to-l2-normalized-euclidean-distance
109
135
  # with normalized vectors:
@@ -111,31 +137,28 @@ module Neighbor
111
137
  # cosine distance = 1 - cosine similarity
112
138
  # this transformation doesn't change the order, so only needed for select
113
139
  neighbor_distance =
114
- if column_info[:type] != :vector && distance == "cosine"
140
+ if column_type == :cube && distance == "cosine"
115
141
  "POWER(#{order}, 2) / 2.0"
116
- elsif column_info[:type] == :vector && distance == "inner_product"
142
+ elsif [:vector, :halfvec, :sparsevec].include?(column_type) && distance == "inner_product"
117
143
  "(#{order}) * -1"
118
144
  else
119
145
  order
120
146
  end
121
147
 
122
148
  # for select, use column_names instead of * to account for ignored columns
123
- select(*column_names, "#{neighbor_distance} AS neighbor_distance")
149
+ select_columns = select_values.any? ? [] : column_names
150
+ select(*select_columns, "#{neighbor_distance} AS neighbor_distance")
124
151
  .where.not(attribute_name => nil)
125
- .order(Arel.sql(order))
152
+ .reorder(Arel.sql(order))
126
153
  }
127
154
 
128
- def nearest_neighbors(attribute_name = nil, **options)
129
- if attribute_name.nil?
130
- warn "[neighbor] nearest_neighbors without an attribute name is deprecated"
131
- attribute_name = :neighbor_vector
132
- end
155
+ def nearest_neighbors(attribute_name, **options)
133
156
  attribute_name = attribute_name.to_sym
134
- # important! check if neighbor attribute before calling send
157
+ # important! check if neighbor attribute before accessing
135
158
  raise ArgumentError, "Invalid attribute" unless self.class.neighbor_attributes[attribute_name]
136
159
 
137
160
  self.class
138
- .where.not(self.class.primary_key => self[self.class.primary_key])
161
+ .where.not(Array(self.class.primary_key).to_h { |k| [k, self[k]] })
139
162
  .nearest_neighbors(attribute_name, self[attribute_name], **options)
140
163
  end
141
164
  end
@@ -1,16 +1,16 @@
1
1
  module Neighbor
2
2
  class Railtie < Rails::Railtie
3
3
  generators do
4
+ require "rails/generators/generated_attribute"
5
+
4
6
  # rails generate model Item embedding:vector{3}
5
- if defined?(Rails::Generators::GeneratedAttribute)
6
- Rails::Generators::GeneratedAttribute.singleton_class.prepend(Neighbor::GeneratedAttribute)
7
- end
7
+ Rails::Generators::GeneratedAttribute.singleton_class.prepend(Neighbor::GeneratedAttribute)
8
8
  end
9
9
  end
10
10
 
11
11
  module GeneratedAttribute
12
12
  def parse_type_and_options(type, *, **)
13
- if type =~ /\A(vector)\{(\d+)\}\z/
13
+ if type =~ /\A(vector|halfvec|sparsevec)\{(\d+)\}\z/
14
14
  return $1, limit: $2.to_i
15
15
  end
16
16
  super
@@ -0,0 +1,79 @@
1
+ module Neighbor
2
+ class SparseVector
3
+ attr_reader :dimensions, :indices, :values
4
+
5
+ NO_DEFAULT = Object.new
6
+
7
+ def initialize(value, dimensions = NO_DEFAULT)
8
+ if value.is_a?(Hash)
9
+ if dimensions == NO_DEFAULT
10
+ raise ArgumentError, "missing dimensions"
11
+ end
12
+ from_hash(value, dimensions)
13
+ else
14
+ unless dimensions == NO_DEFAULT
15
+ raise ArgumentError, "extra argument"
16
+ end
17
+ from_array(value)
18
+ end
19
+ end
20
+
21
+ def to_s
22
+ "{#{@indices.zip(@values).map { |i, v| "#{i.to_i + 1}:#{v.to_f}" }.join(",")}}/#{@dimensions.to_i}"
23
+ end
24
+
25
+ def to_a
26
+ arr = Array.new(dimensions, 0.0)
27
+ @indices.zip(@values) do |i, v|
28
+ arr[i] = v
29
+ end
30
+ arr
31
+ end
32
+
33
+ private
34
+
35
+ def from_hash(data, dimensions)
36
+ elements = data.select { |_, v| v != 0 }.sort
37
+ @dimensions = dimensions.to_i
38
+ @indices = elements.map { |v| v[0].to_i }
39
+ @values = elements.map { |v| v[1].to_f }
40
+ end
41
+
42
+ def from_array(arr)
43
+ arr = arr.to_a
44
+ @dimensions = arr.size
45
+ @indices = []
46
+ @values = []
47
+ arr.each_with_index do |v, i|
48
+ if v != 0
49
+ @indices << i
50
+ @values << v.to_f
51
+ end
52
+ end
53
+ end
54
+
55
+ class << self
56
+ def from_text(string)
57
+ elements, dimensions = string.split("/", 2)
58
+ indices = []
59
+ values = []
60
+ elements[1..-2].split(",").each do |e|
61
+ index, value = e.split(":", 2)
62
+ indices << index.to_i - 1
63
+ values << value.to_f
64
+ end
65
+ from_parts(dimensions.to_i, indices, values)
66
+ end
67
+
68
+ private
69
+
70
+ def from_parts(dimensions, indices, values)
71
+ vec = allocate
72
+ vec.instance_variable_set(:@dimensions, dimensions)
73
+ vec.instance_variable_set(:@indices, indices)
74
+ vec.instance_variable_set(:@values, values)
75
+ vec
76
+ end
77
+ end
78
+ end
79
+ end
@@ -1,36 +1,40 @@
1
1
  module Neighbor
2
2
  module Type
3
- class Cube < ActiveRecord::Type::String
3
+ class Cube < ActiveRecord::Type::Value
4
4
  def type
5
5
  :cube
6
6
  end
7
7
 
8
- def cast(value)
8
+ def serialize(value)
9
9
  if value.is_a?(Array)
10
10
  if value.first.is_a?(Array)
11
- value.map { |v| cast_point(v) }.join(", ")
11
+ value = value.map { |v| serialize_point(v) }.join(", ")
12
12
  else
13
- cast_point(value)
13
+ value = serialize_point(value)
14
14
  end
15
- else
16
- super
17
15
  end
16
+ super(value)
18
17
  end
19
18
 
20
- # TODO uncomment in 0.4.0
21
- # def deserialize(value)
22
- # if value.nil?
23
- # super
24
- # elsif value.include?("),(")
25
- # value[1..-1].split("),(").map { |v| v.split(",").map(&:to_f) }
26
- # else
27
- # value[1..-1].split(",").map(&:to_f)
28
- # end
29
- # end
30
-
31
19
  private
32
20
 
33
- def cast_point(value)
21
+ def cast_value(value)
22
+ if value.is_a?(Array)
23
+ value
24
+ elsif value.is_a?(Numeric)
25
+ [value]
26
+ elsif value.is_a?(String)
27
+ if value.include?("),(")
28
+ value[1..-1].split("),(").map { |v| v.split(",").map(&:to_f) }
29
+ else
30
+ value[1..-1].split(",").map(&:to_f)
31
+ end
32
+ else
33
+ raise "can't cast #{value.class.name} to cube"
34
+ end
35
+ end
36
+
37
+ def serialize_point(value)
34
38
  "(#{value.map(&:to_f).join(", ")})"
35
39
  end
36
40
  end
@@ -0,0 +1,28 @@
1
+ module Neighbor
2
+ module Type
3
+ class Halfvec < ActiveRecord::Type::Value
4
+ def type
5
+ :halfvec
6
+ end
7
+
8
+ def serialize(value)
9
+ if value.is_a?(Array)
10
+ value = "[#{value.map(&:to_f).join(",")}]"
11
+ end
12
+ super(value)
13
+ end
14
+
15
+ private
16
+
17
+ def cast_value(value)
18
+ if value.is_a?(String)
19
+ value[1..-1].split(",").map(&:to_f)
20
+ elsif value.is_a?(Array)
21
+ value
22
+ else
23
+ raise "can't cast #{value.class.name} to halfvec"
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,30 @@
1
+ module Neighbor
2
+ module Type
3
+ class Sparsevec < ActiveRecord::Type::Value
4
+ def type
5
+ :sparsevec
6
+ end
7
+
8
+ def serialize(value)
9
+ if value.is_a?(SparseVector)
10
+ value = "{#{value.indices.zip(value.values).map { |i, v| "#{i.to_i + 1}:#{v.to_f}" }.join(",")}}/#{value.dimensions.to_i}"
11
+ end
12
+ super(value)
13
+ end
14
+
15
+ private
16
+
17
+ def cast_value(value)
18
+ if value.is_a?(SparseVector)
19
+ value
20
+ elsif value.is_a?(String)
21
+ SparseVector.from_text(value)
22
+ elsif value.is_a?(Array)
23
+ value = SparseVector.new(value)
24
+ else
25
+ raise "can't cast #{value.class.name} to sparsevec"
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -1,14 +1,28 @@
1
1
  module Neighbor
2
2
  module Type
3
- class Vector < ActiveRecord::Type::String
3
+ class Vector < ActiveRecord::Type::Value
4
4
  def type
5
5
  :vector
6
6
  end
7
7
 
8
- # TODO uncomment in 0.4.0
9
- # def deserialize(value)
10
- # value[1..-1].split(",").map(&:to_f) unless value.nil?
11
- # end
8
+ def serialize(value)
9
+ if value.is_a?(Array)
10
+ value = "[#{value.map(&:to_f).join(",")}]"
11
+ end
12
+ super(value)
13
+ end
14
+
15
+ private
16
+
17
+ def cast_value(value)
18
+ if value.is_a?(String)
19
+ value[1..-1].split(",").map(&:to_f)
20
+ elsif value.is_a?(Array)
21
+ value
22
+ else
23
+ raise "can't cast #{value.class.name} to vector"
24
+ end
25
+ end
12
26
  end
13
27
  end
14
28
  end
@@ -0,0 +1,42 @@
1
+ module Neighbor
2
+ module Utils
3
+ def self.validate_dimensions(value, type, expected)
4
+ dimensions = type == :sparsevec ? value.dimensions : value.size
5
+ if expected && dimensions != expected
6
+ "Expected #{expected} dimensions, not #{dimensions}"
7
+ end
8
+ end
9
+
10
+ def self.validate_finite(value, type)
11
+ case type
12
+ when :bit
13
+ true
14
+ when :sparsevec
15
+ value.values.all?(&:finite?)
16
+ else
17
+ value.all?(&:finite?)
18
+ end
19
+ end
20
+
21
+ def self.validate(value, dimensions:, column_info:)
22
+ if (message = validate_dimensions(value, column_info&.type, dimensions || column_info&.limit))
23
+ raise Error, message
24
+ end
25
+
26
+ if !validate_finite(value, column_info&.type)
27
+ raise Error, "Values must be finite"
28
+ end
29
+ end
30
+
31
+ def self.normalize(value, column_info:)
32
+ raise Error, "Normalize not supported for type" unless [:cube, :vector, :halfvec].include?(column_info&.type)
33
+
34
+ norm = Math.sqrt(value.sum { |v| v * v })
35
+
36
+ # store zero vector as all zeros
37
+ # since NaN makes the distance always 0
38
+ # could also throw error
39
+ norm > 0 ? value.map { |v| v / norm } : value
40
+ end
41
+ end
42
+ end
@@ -1,3 +1,3 @@
1
1
  module Neighbor
2
- VERSION = "0.3.2"
2
+ VERSION = "0.4.0"
3
3
  end
data/lib/neighbor.rb CHANGED
@@ -2,6 +2,8 @@
2
2
  require "active_support"
3
3
 
4
4
  # modules
5
+ require_relative "neighbor/sparse_vector"
6
+ require_relative "neighbor/utils"
5
7
  require_relative "neighbor/version"
6
8
 
7
9
  module Neighbor
@@ -11,6 +13,14 @@ module Neighbor
11
13
  def initialize_type_map(m = type_map)
12
14
  super
13
15
  m.register_type "cube", Type::Cube.new
16
+ m.register_type "halfvec" do |_, _, sql_type|
17
+ limit = extract_limit(sql_type)
18
+ Type::Halfvec.new(limit: limit)
19
+ end
20
+ m.register_type "sparsevec" do |_, _, sql_type|
21
+ limit = extract_limit(sql_type)
22
+ Type::Sparsevec.new(limit: limit)
23
+ end
14
24
  m.register_type "vector" do |_, _, sql_type|
15
25
  limit = extract_limit(sql_type)
16
26
  Type::Vector.new(limit: limit)
@@ -21,8 +31,9 @@ end
21
31
 
22
32
  ActiveSupport.on_load(:active_record) do
23
33
  require_relative "neighbor/model"
24
- require_relative "neighbor/vector"
25
34
  require_relative "neighbor/type/cube"
35
+ require_relative "neighbor/type/halfvec"
36
+ require_relative "neighbor/type/sparsevec"
26
37
  require_relative "neighbor/type/vector"
27
38
 
28
39
  extend Neighbor::Model
@@ -31,10 +42,12 @@ ActiveSupport.on_load(:active_record) do
31
42
 
32
43
  # ensure schema can be dumped
33
44
  ActiveRecord::ConnectionAdapters::PostgreSQLAdapter::NATIVE_DATABASE_TYPES[:cube] = {name: "cube"}
45
+ ActiveRecord::ConnectionAdapters::PostgreSQLAdapter::NATIVE_DATABASE_TYPES[:halfvec] = {name: "halfvec"}
46
+ ActiveRecord::ConnectionAdapters::PostgreSQLAdapter::NATIVE_DATABASE_TYPES[:sparsevec] = {name: "sparsevec"}
34
47
  ActiveRecord::ConnectionAdapters::PostgreSQLAdapter::NATIVE_DATABASE_TYPES[:vector] = {name: "vector"}
35
48
 
36
49
  # ensure schema can be loaded
37
- ActiveRecord::ConnectionAdapters::TableDefinition.send(:define_column_methods, :cube, :vector)
50
+ ActiveRecord::ConnectionAdapters::TableDefinition.send(:define_column_methods, :cube, :halfvec, :sparsevec, :vector)
38
51
 
39
52
  # prevent unknown OID warning
40
53
  if ActiveRecord::VERSION::MAJOR >= 7
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: neighbor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-12-12 00:00:00.000000000 Z
11
+ date: 2024-06-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -40,9 +40,12 @@ files:
40
40
  - lib/neighbor.rb
41
41
  - lib/neighbor/model.rb
42
42
  - lib/neighbor/railtie.rb
43
+ - lib/neighbor/sparse_vector.rb
43
44
  - lib/neighbor/type/cube.rb
45
+ - lib/neighbor/type/halfvec.rb
46
+ - lib/neighbor/type/sparsevec.rb
44
47
  - lib/neighbor/type/vector.rb
45
- - lib/neighbor/vector.rb
48
+ - lib/neighbor/utils.rb
46
49
  - lib/neighbor/version.rb
47
50
  homepage: https://github.com/ankane/neighbor
48
51
  licenses:
@@ -56,14 +59,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
56
59
  requirements:
57
60
  - - ">="
58
61
  - !ruby/object:Gem::Version
59
- version: '3'
62
+ version: '3.1'
60
63
  required_rubygems_version: !ruby/object:Gem::Requirement
61
64
  requirements:
62
65
  - - ">="
63
66
  - !ruby/object:Gem::Version
64
67
  version: '0'
65
68
  requirements: []
66
- rubygems_version: 3.4.10
69
+ rubygems_version: 3.5.11
67
70
  signing_key:
68
71
  specification_version: 4
69
72
  summary: Nearest neighbor search for Rails and Postgres
@@ -1,65 +0,0 @@
1
- module Neighbor
2
- class Vector < ActiveRecord::Type::Value
3
- def initialize(dimensions:, normalize:, model:, attribute_name:)
4
- super()
5
- @dimensions = dimensions
6
- @normalize = normalize
7
- @model = model
8
- @attribute_name = attribute_name
9
- end
10
-
11
- def self.cast(value, dimensions:, normalize:, column_info:)
12
- value = value.to_a.map(&:to_f)
13
-
14
- dimensions ||= column_info[:dimensions]
15
- raise Error, "Expected #{dimensions} dimensions, not #{value.size}" if dimensions && value.size != dimensions
16
-
17
- raise Error, "Values must be finite" unless value.all?(&:finite?)
18
-
19
- if normalize
20
- norm = Math.sqrt(value.sum { |v| v * v })
21
-
22
- # store zero vector as all zeros
23
- # since NaN makes the distance always 0
24
- # could also throw error
25
-
26
- # safe to update in-place since earlier map dups
27
- value.map! { |v| v / norm } if norm > 0
28
- end
29
-
30
- value
31
- end
32
-
33
- def self.column_info(model, attribute_name)
34
- attribute_name = attribute_name.to_s
35
- column = model.columns.detect { |c| c.name == attribute_name }
36
- {
37
- type: column.try(:type),
38
- dimensions: column.try(:limit)
39
- }
40
- end
41
-
42
- # need to be careful to avoid loading column info before needed
43
- def column_info
44
- @column_info ||= self.class.column_info(@model, @attribute_name)
45
- end
46
-
47
- def cast(value)
48
- self.class.cast(value, dimensions: @dimensions, normalize: @normalize, column_info: column_info) unless value.nil?
49
- end
50
-
51
- def serialize(value)
52
- unless value.nil?
53
- if column_info[:type] == :vector
54
- "[#{cast(value).join(", ")}]"
55
- else
56
- "(#{cast(value).join(", ")})"
57
- end
58
- end
59
- end
60
-
61
- def deserialize(value)
62
- value[1..-1].split(",").map(&:to_f) unless value.nil?
63
- end
64
- end
65
- end