neighbor 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +21 -0
- data/README.md +142 -0
- data/lib/generators/neighbor/install_generator.rb +18 -0
- data/lib/generators/neighbor/templates/migration.rb.tt +5 -0
- data/lib/neighbor.rb +27 -0
- data/lib/neighbor/model.rb +44 -0
- data/lib/neighbor/vector.rb +35 -0
- data/lib/neighbor/version.rb +3 -0
- metadata +65 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: f346d121745bd39998267c45f77009970c014de887376022f8b6425192d37354
|
4
|
+
data.tar.gz: '0293bc9fd3633ca3ee811940c59995407357d4b880fcf8ea8cd9223e5df0e75b'
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: cf080c46ad8133a460453c773faabac02e229821625fa9f2d51a320195b1e821b230c94c7fe45bc7f7d12b0b51ce6d3e5f1d4c0d74e446b0ce22dde2f5171cde
|
7
|
+
data.tar.gz: f2e0ae2247979bd3dd083b1869b56f7b6ed879ce8dd1c0ef31d965392d8250548acb304a78ba99a1d6cdfea116c10e0d67aff9c06185a793e576bddae4d624f1
|
data/CHANGELOG.md
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2021 Andrew Kane
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,142 @@
|
|
1
|
+
# Neighbor
|
2
|
+
|
3
|
+
Nearest neighbor search for Rails and Postgres
|
4
|
+
|
5
|
+
[![Build Status](https://github.com/ankane/neighbor/workflows/build/badge.svg?branch=master)](https://github.com/ankane/neighbor/actions)
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application’s Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'neighbor'
|
13
|
+
```
|
14
|
+
|
15
|
+
And run:
|
16
|
+
|
17
|
+
```sh
|
18
|
+
bundle install
|
19
|
+
rails generate neighbor:install
|
20
|
+
rails db:migrate
|
21
|
+
```
|
22
|
+
|
23
|
+
## Getting Started
|
24
|
+
|
25
|
+
Create a migration
|
26
|
+
|
27
|
+
```ruby
|
28
|
+
class AddNeighborVectorToItems < ActiveRecord::Migration[6.1]
|
29
|
+
def change
|
30
|
+
add_column :items, :neighbor_vector, :cube
|
31
|
+
end
|
32
|
+
end
|
33
|
+
```
|
34
|
+
|
35
|
+
Add to your model
|
36
|
+
|
37
|
+
```ruby
|
38
|
+
class Item < ApplicationRecord
|
39
|
+
has_neighbors dimensions: 3
|
40
|
+
end
|
41
|
+
```
|
42
|
+
|
43
|
+
Update the vectors
|
44
|
+
|
45
|
+
```ruby
|
46
|
+
item.update(neighbor_vector: [1.0, 1.2, 0.5])
|
47
|
+
```
|
48
|
+
|
49
|
+
> With cosine distance (the default), vectors are normalized before being stored
|
50
|
+
|
51
|
+
And get the nearest neighbors
|
52
|
+
|
53
|
+
```ruby
|
54
|
+
item.nearest_neighbors.first(5)
|
55
|
+
```
|
56
|
+
|
57
|
+
## Distances
|
58
|
+
|
59
|
+
Specify the distance metric
|
60
|
+
|
61
|
+
```ruby
|
62
|
+
class Item < ApplicationRecord
|
63
|
+
has_neighbors dimensions: 20, distance: "euclidean"
|
64
|
+
end
|
65
|
+
```
|
66
|
+
|
67
|
+
Supported distances are:
|
68
|
+
|
69
|
+
- `cosine` (default)
|
70
|
+
- `euclidean`
|
71
|
+
- `taxicab`
|
72
|
+
- `chebyshev`
|
73
|
+
|
74
|
+
Returned records will have a `neighbor_distance` attribute
|
75
|
+
|
76
|
+
```ruby
|
77
|
+
returned_item.neighbor_distance
|
78
|
+
```
|
79
|
+
|
80
|
+
## Example
|
81
|
+
|
82
|
+
You can use Neighbor for online item recommendations with [Disco](https://github.com/ankane/disco). We’ll use MovieLens data for this example.
|
83
|
+
|
84
|
+
Generate a model
|
85
|
+
|
86
|
+
```sh
|
87
|
+
rails generate model Movie name:string neighbor_vector:cube
|
88
|
+
rails db:migrate
|
89
|
+
```
|
90
|
+
|
91
|
+
And add `has_neighbors`
|
92
|
+
|
93
|
+
```ruby
|
94
|
+
class Movie < ApplicationRecord
|
95
|
+
has_neighbors dimensions: 20
|
96
|
+
end
|
97
|
+
```
|
98
|
+
|
99
|
+
Fit the recommender
|
100
|
+
|
101
|
+
```ruby
|
102
|
+
data = Disco.load_movielens
|
103
|
+
recommender = Disco::Recommender.new(factors: 20)
|
104
|
+
recommender.fit(data)
|
105
|
+
```
|
106
|
+
|
107
|
+
Use item factors for the neighbor vector
|
108
|
+
|
109
|
+
```ruby
|
110
|
+
recommender.item_ids.each do |item_id|
|
111
|
+
Movie.create!(name: item_id, neighbor_vector: recommender.item_factors(item_id))
|
112
|
+
end
|
113
|
+
```
|
114
|
+
|
115
|
+
And get similar movies
|
116
|
+
|
117
|
+
```ruby
|
118
|
+
movie = Movie.find_by(name: "Star Wars (1977)")
|
119
|
+
movie.nearest_neighbors.first(5).map(&:name)
|
120
|
+
```
|
121
|
+
|
122
|
+
## History
|
123
|
+
|
124
|
+
View the [changelog](https://github.com/ankane/neighbor/blob/master/CHANGELOG.md)
|
125
|
+
|
126
|
+
## Contributing
|
127
|
+
|
128
|
+
Everyone is encouraged to help improve this project. Here are a few ways you can help:
|
129
|
+
|
130
|
+
- [Report bugs](https://github.com/ankane/neighbor/issues)
|
131
|
+
- Fix bugs and [submit pull requests](https://github.com/ankane/neighbor/pulls)
|
132
|
+
- Write, clarify, or fix documentation
|
133
|
+
- Suggest or add new features
|
134
|
+
|
135
|
+
To get started with development:
|
136
|
+
|
137
|
+
```sh
|
138
|
+
git clone https://github.com/ankane/neighbor.git
|
139
|
+
cd neighbor
|
140
|
+
bundle install
|
141
|
+
bundle exec rake test
|
142
|
+
```
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require "rails/generators/active_record"
|
2
|
+
|
3
|
+
module Neighbor
|
4
|
+
module Generators
|
5
|
+
class InstallGenerator < Rails::Generators::Base
|
6
|
+
include ActiveRecord::Generators::Migration
|
7
|
+
source_root File.join(__dir__, "templates")
|
8
|
+
|
9
|
+
def copy_migration
|
10
|
+
migration_template "migration.rb", "db/migrate/install_neighbor.rb", migration_version: migration_version
|
11
|
+
end
|
12
|
+
|
13
|
+
def migration_version
|
14
|
+
"[#{ActiveRecord::VERSION::MAJOR}.#{ActiveRecord::VERSION::MINOR}]"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/neighbor.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# dependencies
|
2
|
+
require "active_support"
|
3
|
+
|
4
|
+
# modules
|
5
|
+
require "neighbor/version"
|
6
|
+
|
7
|
+
module Neighbor
|
8
|
+
class Error < StandardError; end
|
9
|
+
|
10
|
+
module RegisterCubeType
|
11
|
+
def initialize_type_map(m = type_map)
|
12
|
+
super
|
13
|
+
m.register_type "cube", ActiveRecord::ConnectionAdapters::PostgreSQL::OID::SpecializedString.new(:cube)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
ActiveSupport.on_load(:active_record) do
|
19
|
+
require "neighbor/model"
|
20
|
+
require "neighbor/vector"
|
21
|
+
|
22
|
+
extend Neighbor::Model
|
23
|
+
|
24
|
+
# prevent unknown OID warning
|
25
|
+
require "active_record/connection_adapters/postgresql_adapter"
|
26
|
+
ActiveRecord::ConnectionAdapters::PostgreSQLAdapter.prepend(Neighbor::RegisterCubeType)
|
27
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Neighbor
|
2
|
+
module Model
|
3
|
+
def has_neighbors(dimensions:, distance: "cosine")
|
4
|
+
distance = distance.to_s
|
5
|
+
raise ArgumentError, "Invalid distance: #{distance}" unless %w(cosine euclidean taxicab chebyshev).include?(distance)
|
6
|
+
|
7
|
+
class_eval do
|
8
|
+
attribute :neighbor_vector, Neighbor::Vector.new(dimensions: dimensions, distance: distance)
|
9
|
+
|
10
|
+
define_method :nearest_neighbors do
|
11
|
+
return self.class.none if neighbor_vector.nil?
|
12
|
+
|
13
|
+
operator =
|
14
|
+
case distance
|
15
|
+
when "taxicab"
|
16
|
+
"<#>"
|
17
|
+
when "chebyshev"
|
18
|
+
"<=>"
|
19
|
+
else
|
20
|
+
"<->"
|
21
|
+
end
|
22
|
+
|
23
|
+
# important! neighbor_vector should already be typecast
|
24
|
+
# but use to_f as extra safeguard against SQL injection
|
25
|
+
order = "neighbor_vector #{operator} cube(array[#{neighbor_vector.map(&:to_f).join(", ")}])"
|
26
|
+
|
27
|
+
# https://stats.stackexchange.com/questions/146221/is-cosine-similarity-identical-to-l2-normalized-euclidean-distance
|
28
|
+
# with normalized vectors:
|
29
|
+
# cosine similarity = 1 - (euclidean distance)**2 / 2
|
30
|
+
# cosine distance = 1 - cosine similarity
|
31
|
+
# this transformation doesn't change the order, so only needed for select
|
32
|
+
neighbor_distance = distance == "cosine" ? "POWER(#{order}, 2) / 2.0" : order
|
33
|
+
|
34
|
+
# for select, use column_names instead of * to account for ignored columns
|
35
|
+
self.class
|
36
|
+
.select(*self.class.column_names, "#{neighbor_distance} AS neighbor_distance")
|
37
|
+
.where.not(self.class.primary_key => send(self.class.primary_key))
|
38
|
+
.where.not(neighbor_vector: nil)
|
39
|
+
.order(Arel.sql(order))
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Neighbor
|
2
|
+
class Vector < ActiveRecord::Type::Value
|
3
|
+
def initialize(dimensions:, distance:)
|
4
|
+
super()
|
5
|
+
@dimensions = dimensions
|
6
|
+
@distance = distance
|
7
|
+
end
|
8
|
+
|
9
|
+
def cast(value)
|
10
|
+
return if value.nil?
|
11
|
+
|
12
|
+
value = value.to_a.map(&:to_f)
|
13
|
+
raise Error, "Expected #{@dimensions} dimensions, not #{value.size}" unless value.size == @dimensions
|
14
|
+
|
15
|
+
if @distance == "cosine"
|
16
|
+
norm = 0.0
|
17
|
+
value.each do |v|
|
18
|
+
norm += v * v
|
19
|
+
end
|
20
|
+
norm = Math.sqrt(norm)
|
21
|
+
value.map { |v| v / norm }
|
22
|
+
else
|
23
|
+
value
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def serialize(value)
|
28
|
+
"(#{cast(value).join(", ")})" unless value.nil?
|
29
|
+
end
|
30
|
+
|
31
|
+
def deserialize(value)
|
32
|
+
value[1..-1].split(",").map(&:to_f) unless value.nil?
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
metadata
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: neighbor
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Andrew Kane
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2021-02-16 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activerecord
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
description:
|
28
|
+
email: andrew@ankane.org
|
29
|
+
executables: []
|
30
|
+
extensions: []
|
31
|
+
extra_rdoc_files: []
|
32
|
+
files:
|
33
|
+
- CHANGELOG.md
|
34
|
+
- LICENSE.txt
|
35
|
+
- README.md
|
36
|
+
- lib/generators/neighbor/install_generator.rb
|
37
|
+
- lib/generators/neighbor/templates/migration.rb.tt
|
38
|
+
- lib/neighbor.rb
|
39
|
+
- lib/neighbor/model.rb
|
40
|
+
- lib/neighbor/vector.rb
|
41
|
+
- lib/neighbor/version.rb
|
42
|
+
homepage: https://github.com/ankane/neighbor
|
43
|
+
licenses:
|
44
|
+
- MIT
|
45
|
+
metadata: {}
|
46
|
+
post_install_message:
|
47
|
+
rdoc_options: []
|
48
|
+
require_paths:
|
49
|
+
- lib
|
50
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '2.6'
|
55
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '0'
|
60
|
+
requirements: []
|
61
|
+
rubygems_version: 3.1.4
|
62
|
+
signing_key:
|
63
|
+
specification_version: 4
|
64
|
+
summary: Nearest neighbor search for Rails and Postgres
|
65
|
+
test_files: []
|