neighbor 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +21 -0
- data/README.md +142 -0
- data/lib/generators/neighbor/install_generator.rb +18 -0
- data/lib/generators/neighbor/templates/migration.rb.tt +5 -0
- data/lib/neighbor.rb +27 -0
- data/lib/neighbor/model.rb +44 -0
- data/lib/neighbor/vector.rb +35 -0
- data/lib/neighbor/version.rb +3 -0
- metadata +65 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: f346d121745bd39998267c45f77009970c014de887376022f8b6425192d37354
|
4
|
+
data.tar.gz: '0293bc9fd3633ca3ee811940c59995407357d4b880fcf8ea8cd9223e5df0e75b'
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: cf080c46ad8133a460453c773faabac02e229821625fa9f2d51a320195b1e821b230c94c7fe45bc7f7d12b0b51ce6d3e5f1d4c0d74e446b0ce22dde2f5171cde
|
7
|
+
data.tar.gz: f2e0ae2247979bd3dd083b1869b56f7b6ed879ce8dd1c0ef31d965392d8250548acb304a78ba99a1d6cdfea116c10e0d67aff9c06185a793e576bddae4d624f1
|
data/CHANGELOG.md
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2021 Andrew Kane
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,142 @@
|
|
1
|
+
# Neighbor
|
2
|
+
|
3
|
+
Nearest neighbor search for Rails and Postgres
|
4
|
+
|
5
|
+
[](https://github.com/ankane/neighbor/actions)
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application’s Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'neighbor'
|
13
|
+
```
|
14
|
+
|
15
|
+
And run:
|
16
|
+
|
17
|
+
```sh
|
18
|
+
bundle install
|
19
|
+
rails generate neighbor:install
|
20
|
+
rails db:migrate
|
21
|
+
```
|
22
|
+
|
23
|
+
## Getting Started
|
24
|
+
|
25
|
+
Create a migration
|
26
|
+
|
27
|
+
```ruby
|
28
|
+
class AddNeighborVectorToItems < ActiveRecord::Migration[6.1]
|
29
|
+
def change
|
30
|
+
add_column :items, :neighbor_vector, :cube
|
31
|
+
end
|
32
|
+
end
|
33
|
+
```
|
34
|
+
|
35
|
+
Add to your model
|
36
|
+
|
37
|
+
```ruby
|
38
|
+
class Item < ApplicationRecord
|
39
|
+
has_neighbors dimensions: 3
|
40
|
+
end
|
41
|
+
```
|
42
|
+
|
43
|
+
Update the vectors
|
44
|
+
|
45
|
+
```ruby
|
46
|
+
item.update(neighbor_vector: [1.0, 1.2, 0.5])
|
47
|
+
```
|
48
|
+
|
49
|
+
> With cosine distance (the default), vectors are normalized before being stored
|
50
|
+
|
51
|
+
And get the nearest neighbors
|
52
|
+
|
53
|
+
```ruby
|
54
|
+
item.nearest_neighbors.first(5)
|
55
|
+
```
|
56
|
+
|
57
|
+
## Distances
|
58
|
+
|
59
|
+
Specify the distance metric
|
60
|
+
|
61
|
+
```ruby
|
62
|
+
class Item < ApplicationRecord
|
63
|
+
has_neighbors dimensions: 20, distance: "euclidean"
|
64
|
+
end
|
65
|
+
```
|
66
|
+
|
67
|
+
Supported distances are:
|
68
|
+
|
69
|
+
- `cosine` (default)
|
70
|
+
- `euclidean`
|
71
|
+
- `taxicab`
|
72
|
+
- `chebyshev`
|
73
|
+
|
74
|
+
Returned records will have a `neighbor_distance` attribute
|
75
|
+
|
76
|
+
```ruby
|
77
|
+
returned_item.neighbor_distance
|
78
|
+
```
|
79
|
+
|
80
|
+
## Example
|
81
|
+
|
82
|
+
You can use Neighbor for online item recommendations with [Disco](https://github.com/ankane/disco). We’ll use MovieLens data for this example.
|
83
|
+
|
84
|
+
Generate a model
|
85
|
+
|
86
|
+
```sh
|
87
|
+
rails generate model Movie name:string neighbor_vector:cube
|
88
|
+
rails db:migrate
|
89
|
+
```
|
90
|
+
|
91
|
+
And add `has_neighbors`
|
92
|
+
|
93
|
+
```ruby
|
94
|
+
class Movie < ApplicationRecord
|
95
|
+
has_neighbors dimensions: 20
|
96
|
+
end
|
97
|
+
```
|
98
|
+
|
99
|
+
Fit the recommender
|
100
|
+
|
101
|
+
```ruby
|
102
|
+
data = Disco.load_movielens
|
103
|
+
recommender = Disco::Recommender.new(factors: 20)
|
104
|
+
recommender.fit(data)
|
105
|
+
```
|
106
|
+
|
107
|
+
Use item factors for the neighbor vector
|
108
|
+
|
109
|
+
```ruby
|
110
|
+
recommender.item_ids.each do |item_id|
|
111
|
+
Movie.create!(name: item_id, neighbor_vector: recommender.item_factors(item_id))
|
112
|
+
end
|
113
|
+
```
|
114
|
+
|
115
|
+
And get similar movies
|
116
|
+
|
117
|
+
```ruby
|
118
|
+
movie = Movie.find_by(name: "Star Wars (1977)")
|
119
|
+
movie.nearest_neighbors.first(5).map(&:name)
|
120
|
+
```
|
121
|
+
|
122
|
+
## History
|
123
|
+
|
124
|
+
View the [changelog](https://github.com/ankane/neighbor/blob/master/CHANGELOG.md)
|
125
|
+
|
126
|
+
## Contributing
|
127
|
+
|
128
|
+
Everyone is encouraged to help improve this project. Here are a few ways you can help:
|
129
|
+
|
130
|
+
- [Report bugs](https://github.com/ankane/neighbor/issues)
|
131
|
+
- Fix bugs and [submit pull requests](https://github.com/ankane/neighbor/pulls)
|
132
|
+
- Write, clarify, or fix documentation
|
133
|
+
- Suggest or add new features
|
134
|
+
|
135
|
+
To get started with development:
|
136
|
+
|
137
|
+
```sh
|
138
|
+
git clone https://github.com/ankane/neighbor.git
|
139
|
+
cd neighbor
|
140
|
+
bundle install
|
141
|
+
bundle exec rake test
|
142
|
+
```
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require "rails/generators/active_record"
|
2
|
+
|
3
|
+
module Neighbor
|
4
|
+
module Generators
|
5
|
+
class InstallGenerator < Rails::Generators::Base
|
6
|
+
include ActiveRecord::Generators::Migration
|
7
|
+
source_root File.join(__dir__, "templates")
|
8
|
+
|
9
|
+
def copy_migration
|
10
|
+
migration_template "migration.rb", "db/migrate/install_neighbor.rb", migration_version: migration_version
|
11
|
+
end
|
12
|
+
|
13
|
+
def migration_version
|
14
|
+
"[#{ActiveRecord::VERSION::MAJOR}.#{ActiveRecord::VERSION::MINOR}]"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/neighbor.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# dependencies
|
2
|
+
require "active_support"
|
3
|
+
|
4
|
+
# modules
|
5
|
+
require "neighbor/version"
|
6
|
+
|
7
|
+
module Neighbor
|
8
|
+
class Error < StandardError; end
|
9
|
+
|
10
|
+
module RegisterCubeType
|
11
|
+
def initialize_type_map(m = type_map)
|
12
|
+
super
|
13
|
+
m.register_type "cube", ActiveRecord::ConnectionAdapters::PostgreSQL::OID::SpecializedString.new(:cube)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
ActiveSupport.on_load(:active_record) do
|
19
|
+
require "neighbor/model"
|
20
|
+
require "neighbor/vector"
|
21
|
+
|
22
|
+
extend Neighbor::Model
|
23
|
+
|
24
|
+
# prevent unknown OID warning
|
25
|
+
require "active_record/connection_adapters/postgresql_adapter"
|
26
|
+
ActiveRecord::ConnectionAdapters::PostgreSQLAdapter.prepend(Neighbor::RegisterCubeType)
|
27
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Neighbor
|
2
|
+
module Model
|
3
|
+
def has_neighbors(dimensions:, distance: "cosine")
|
4
|
+
distance = distance.to_s
|
5
|
+
raise ArgumentError, "Invalid distance: #{distance}" unless %w(cosine euclidean taxicab chebyshev).include?(distance)
|
6
|
+
|
7
|
+
class_eval do
|
8
|
+
attribute :neighbor_vector, Neighbor::Vector.new(dimensions: dimensions, distance: distance)
|
9
|
+
|
10
|
+
define_method :nearest_neighbors do
|
11
|
+
return self.class.none if neighbor_vector.nil?
|
12
|
+
|
13
|
+
operator =
|
14
|
+
case distance
|
15
|
+
when "taxicab"
|
16
|
+
"<#>"
|
17
|
+
when "chebyshev"
|
18
|
+
"<=>"
|
19
|
+
else
|
20
|
+
"<->"
|
21
|
+
end
|
22
|
+
|
23
|
+
# important! neighbor_vector should already be typecast
|
24
|
+
# but use to_f as extra safeguard against SQL injection
|
25
|
+
order = "neighbor_vector #{operator} cube(array[#{neighbor_vector.map(&:to_f).join(", ")}])"
|
26
|
+
|
27
|
+
# https://stats.stackexchange.com/questions/146221/is-cosine-similarity-identical-to-l2-normalized-euclidean-distance
|
28
|
+
# with normalized vectors:
|
29
|
+
# cosine similarity = 1 - (euclidean distance)**2 / 2
|
30
|
+
# cosine distance = 1 - cosine similarity
|
31
|
+
# this transformation doesn't change the order, so only needed for select
|
32
|
+
neighbor_distance = distance == "cosine" ? "POWER(#{order}, 2) / 2.0" : order
|
33
|
+
|
34
|
+
# for select, use column_names instead of * to account for ignored columns
|
35
|
+
self.class
|
36
|
+
.select(*self.class.column_names, "#{neighbor_distance} AS neighbor_distance")
|
37
|
+
.where.not(self.class.primary_key => send(self.class.primary_key))
|
38
|
+
.where.not(neighbor_vector: nil)
|
39
|
+
.order(Arel.sql(order))
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Neighbor
|
2
|
+
class Vector < ActiveRecord::Type::Value
|
3
|
+
def initialize(dimensions:, distance:)
|
4
|
+
super()
|
5
|
+
@dimensions = dimensions
|
6
|
+
@distance = distance
|
7
|
+
end
|
8
|
+
|
9
|
+
def cast(value)
|
10
|
+
return if value.nil?
|
11
|
+
|
12
|
+
value = value.to_a.map(&:to_f)
|
13
|
+
raise Error, "Expected #{@dimensions} dimensions, not #{value.size}" unless value.size == @dimensions
|
14
|
+
|
15
|
+
if @distance == "cosine"
|
16
|
+
norm = 0.0
|
17
|
+
value.each do |v|
|
18
|
+
norm += v * v
|
19
|
+
end
|
20
|
+
norm = Math.sqrt(norm)
|
21
|
+
value.map { |v| v / norm }
|
22
|
+
else
|
23
|
+
value
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def serialize(value)
|
28
|
+
"(#{cast(value).join(", ")})" unless value.nil?
|
29
|
+
end
|
30
|
+
|
31
|
+
def deserialize(value)
|
32
|
+
value[1..-1].split(",").map(&:to_f) unless value.nil?
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
metadata
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: neighbor
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Andrew Kane
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2021-02-16 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activerecord
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
description:
|
28
|
+
email: andrew@ankane.org
|
29
|
+
executables: []
|
30
|
+
extensions: []
|
31
|
+
extra_rdoc_files: []
|
32
|
+
files:
|
33
|
+
- CHANGELOG.md
|
34
|
+
- LICENSE.txt
|
35
|
+
- README.md
|
36
|
+
- lib/generators/neighbor/install_generator.rb
|
37
|
+
- lib/generators/neighbor/templates/migration.rb.tt
|
38
|
+
- lib/neighbor.rb
|
39
|
+
- lib/neighbor/model.rb
|
40
|
+
- lib/neighbor/vector.rb
|
41
|
+
- lib/neighbor/version.rb
|
42
|
+
homepage: https://github.com/ankane/neighbor
|
43
|
+
licenses:
|
44
|
+
- MIT
|
45
|
+
metadata: {}
|
46
|
+
post_install_message:
|
47
|
+
rdoc_options: []
|
48
|
+
require_paths:
|
49
|
+
- lib
|
50
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '2.6'
|
55
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '0'
|
60
|
+
requirements: []
|
61
|
+
rubygems_version: 3.1.4
|
62
|
+
signing_key:
|
63
|
+
specification_version: 4
|
64
|
+
summary: Nearest neighbor search for Rails and Postgres
|
65
|
+
test_files: []
|