neighbor 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +14 -5
- data/lib/neighbor.rb +14 -1
- data/lib/neighbor/model.rb +9 -3
- data/lib/neighbor/vector.rb +1 -5
- data/lib/neighbor/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 01df817b036ee9b4d0c54ddadfb3be7fb31b36bb81504321b8dfd3d7a1ba83a4
|
4
|
+
data.tar.gz: f7a1757914f2e1226bc3b95fe5e0df9837a530315d6e627062df565e331b1708
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8381834f92092cb13d2c8898b588f0758e99e48f84e3b506aab15acf54fb0b3f72449734ca68db10de466eecde4a7b277cceca72192240ce21567125ad0af52f
|
7
|
+
data.tar.gz: 79ddd953bf0c134d73c92c61a8051524c024e2e1c95710e346a191a8a7c098f55562f81263752ea2e2ccaef9ced8e376dbf6431a4e745931d256ee2ca854d309
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -20,6 +20,8 @@ rails generate neighbor:install
|
|
20
20
|
rails db:migrate
|
21
21
|
```
|
22
22
|
|
23
|
+
This enables the [cube extension](https://www.postgresql.org/docs/current/cube.html) in Postgres
|
24
|
+
|
23
25
|
## Getting Started
|
24
26
|
|
25
27
|
Create a migration
|
@@ -48,13 +50,13 @@ item.update(neighbor_vector: [1.0, 1.2, 0.5])
|
|
48
50
|
|
49
51
|
> With cosine distance (the default), vectors are normalized before being stored
|
50
52
|
|
51
|
-
|
53
|
+
Get the nearest neighbors
|
52
54
|
|
53
55
|
```ruby
|
54
56
|
item.nearest_neighbors.first(5)
|
55
57
|
```
|
56
58
|
|
57
|
-
##
|
59
|
+
## Distance
|
58
60
|
|
59
61
|
Specify the distance metric
|
60
62
|
|
@@ -64,19 +66,24 @@ class Item < ApplicationRecord
|
|
64
66
|
end
|
65
67
|
```
|
66
68
|
|
67
|
-
Supported
|
69
|
+
Supported values are:
|
68
70
|
|
69
71
|
- `cosine` (default)
|
70
72
|
- `euclidean`
|
71
73
|
- `taxicab`
|
72
74
|
- `chebyshev`
|
73
75
|
|
74
|
-
|
76
|
+
Records returned from `nearest_neighbors` will have a `neighbor_distance` attribute
|
75
77
|
|
76
78
|
```ruby
|
77
|
-
|
79
|
+
nearest_item = item.nearest_neighbors.first
|
80
|
+
nearest_item.neighbor_distance
|
78
81
|
```
|
79
82
|
|
83
|
+
## Dimensions
|
84
|
+
|
85
|
+
By default, Postgres limits the `cube` data type to 100 dimensions. See the [Postgres docs](https://www.postgresql.org/docs/current/cube.html) for how to increase this.
|
86
|
+
|
80
87
|
## Example
|
81
88
|
|
82
89
|
You can use Neighbor for online item recommendations with [Disco](https://github.com/ankane/disco). We’ll use MovieLens data for this example.
|
@@ -119,6 +126,8 @@ movie = Movie.find_by(name: "Star Wars (1977)")
|
|
119
126
|
movie.nearest_neighbors.first(5).map(&:name)
|
120
127
|
```
|
121
128
|
|
129
|
+
[Complete code](examples/disco.rb)
|
130
|
+
|
122
131
|
## History
|
123
132
|
|
124
133
|
View the [changelog](https://github.com/ankane/neighbor/blob/master/CHANGELOG.md)
|
data/lib/neighbor.rb
CHANGED
@@ -21,7 +21,20 @@ ActiveSupport.on_load(:active_record) do
|
|
21
21
|
|
22
22
|
extend Neighbor::Model
|
23
23
|
|
24
|
-
# prevent unknown OID warning
|
25
24
|
require "active_record/connection_adapters/postgresql_adapter"
|
25
|
+
|
26
|
+
# ensure schema can be dumped
|
27
|
+
ActiveRecord::ConnectionAdapters::PostgreSQLAdapter::NATIVE_DATABASE_TYPES[:cube] = {name: "cube"}
|
28
|
+
|
29
|
+
# ensure schema can be loaded
|
30
|
+
if ActiveRecord::VERSION::MAJOR >= 6
|
31
|
+
ActiveRecord::ConnectionAdapters::TableDefinition.send(:define_column_methods, :cube)
|
32
|
+
else
|
33
|
+
ActiveRecord::ConnectionAdapters::TableDefinition.define_method :cube do |*args, **options|
|
34
|
+
args.each { |name| column(name, :cube, options) }
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# prevent unknown OID warning
|
26
39
|
ActiveRecord::ConnectionAdapters::PostgreSQLAdapter.prepend(Neighbor::RegisterCubeType)
|
27
40
|
end
|
data/lib/neighbor/model.rb
CHANGED
@@ -4,8 +4,12 @@ module Neighbor
|
|
4
4
|
distance = distance.to_s
|
5
5
|
raise ArgumentError, "Invalid distance: #{distance}" unless %w(cosine euclidean taxicab chebyshev).include?(distance)
|
6
6
|
|
7
|
+
# TODO make configurable
|
8
|
+
# likely use argument
|
9
|
+
attribute_name = :neighbor_vector
|
10
|
+
|
7
11
|
class_eval do
|
8
|
-
attribute
|
12
|
+
attribute attribute_name, Neighbor::Vector.new(dimensions: dimensions, distance: distance)
|
9
13
|
|
10
14
|
define_method :nearest_neighbors do
|
11
15
|
return self.class.none if neighbor_vector.nil?
|
@@ -20,9 +24,11 @@ module Neighbor
|
|
20
24
|
"<->"
|
21
25
|
end
|
22
26
|
|
27
|
+
quoted_attribute = "#{self.class.connection.quote_table_name(self.class.table_name)}.#{self.class.connection.quote_column_name(attribute_name)}"
|
28
|
+
|
23
29
|
# important! neighbor_vector should already be typecast
|
24
30
|
# but use to_f as extra safeguard against SQL injection
|
25
|
-
order = "
|
31
|
+
order = "#{quoted_attribute} #{operator} cube(array[#{neighbor_vector.map(&:to_f).join(", ")}])"
|
26
32
|
|
27
33
|
# https://stats.stackexchange.com/questions/146221/is-cosine-similarity-identical-to-l2-normalized-euclidean-distance
|
28
34
|
# with normalized vectors:
|
@@ -35,7 +41,7 @@ module Neighbor
|
|
35
41
|
self.class
|
36
42
|
.select(*self.class.column_names, "#{neighbor_distance} AS neighbor_distance")
|
37
43
|
.where.not(self.class.primary_key => send(self.class.primary_key))
|
38
|
-
.where.not(
|
44
|
+
.where.not(attribute_name => nil)
|
39
45
|
.order(Arel.sql(order))
|
40
46
|
end
|
41
47
|
end
|
data/lib/neighbor/vector.rb
CHANGED
@@ -13,11 +13,7 @@ module Neighbor
|
|
13
13
|
raise Error, "Expected #{@dimensions} dimensions, not #{value.size}" unless value.size == @dimensions
|
14
14
|
|
15
15
|
if @distance == "cosine"
|
16
|
-
norm =
|
17
|
-
value.each do |v|
|
18
|
-
norm += v * v
|
19
|
-
end
|
20
|
-
norm = Math.sqrt(norm)
|
16
|
+
norm = Math.sqrt(value.sum { |v| v * v })
|
21
17
|
value.map { |v| v / norm }
|
22
18
|
else
|
23
19
|
value
|
data/lib/neighbor/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: neighbor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-02-
|
11
|
+
date: 2021-02-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -58,7 +58,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
58
58
|
- !ruby/object:Gem::Version
|
59
59
|
version: '0'
|
60
60
|
requirements: []
|
61
|
-
rubygems_version: 3.
|
61
|
+
rubygems_version: 3.2.3
|
62
62
|
signing_key:
|
63
63
|
specification_version: 4
|
64
64
|
summary: Nearest neighbor search for Rails and Postgres
|