rumale-nearest_neighbors 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +27 -0
- data/README.md +33 -0
- data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +115 -0
- data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +92 -0
- data/lib/rumale/nearest_neighbors/version.rb +10 -0
- data/lib/rumale/nearest_neighbors.rb +7 -0
- metadata +86 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: df277519a7d271e0bdf9fef8076a2ebb2476817dc2d9f3ca5076b2e0b0ac5b52
|
4
|
+
data.tar.gz: 8399d40f09a44e36c71a444c3d44ad5e419b42e0380dee531c68da3f484a8ab3
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9322178b341aa9a76e30ed13dec1e2711d75aea6868934f1cf47401f4e335d01290baa707ec2dcc0e94f8a15fb3d4b8eff1db65ef64bc65edab095c5e2df5d57
|
7
|
+
data.tar.gz: b0f554518ae703b23e21adc6c039f79e7e84da3b4c4a95f9776e38bd1a84fa468f7a60aeee30b0f3177d82630f739b9609e99da5c521b13366b88f685a3dd628
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
Copyright (c) 2022 Atsushi Tatsuma
|
2
|
+
All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
5
|
+
modification, are permitted provided that the following conditions are met:
|
6
|
+
|
7
|
+
* Redistributions of source code must retain the above copyright notice, this
|
8
|
+
list of conditions and the following disclaimer.
|
9
|
+
|
10
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
11
|
+
this list of conditions and the following disclaimer in the documentation
|
12
|
+
and/or other materials provided with the distribution.
|
13
|
+
|
14
|
+
* Neither the name of the copyright holder nor the names of its
|
15
|
+
contributors may be used to endorse or promote products derived from
|
16
|
+
this software without specific prior written permission.
|
17
|
+
|
18
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
19
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
20
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
21
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
22
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
23
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
24
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
25
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
26
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
27
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README.md
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# Rumale::NearestNeighbors
|
2
|
+
|
3
|
+
[](https://badge.fury.io/rb/rumale-nearest_neighbors)
|
4
|
+
[](https://github.com/yoshoku/rumale/blob/main/rumale-nearest_neighbors/LICENSE.txt)
|
5
|
+
[](https://yoshoku.github.io/rumale/doc/Rumale/NearestNeighbors.html)
|
6
|
+
|
7
|
+
Rumale is a machine learning library in Ruby.
|
8
|
+
Rumale::NearestNeighbors provides classifier and regression based on nearest neighbors rule
|
9
|
+
with Rumale interface.
|
10
|
+
|
11
|
+
## Installation
|
12
|
+
|
13
|
+
Add this line to your application's Gemfile:
|
14
|
+
|
15
|
+
```ruby
|
16
|
+
gem 'rumale-nearest_neighbors'
|
17
|
+
```
|
18
|
+
|
19
|
+
And then execute:
|
20
|
+
|
21
|
+
$ bundle install
|
22
|
+
|
23
|
+
Or install it yourself as:
|
24
|
+
|
25
|
+
$ gem install rumale-nearest_neighbors
|
26
|
+
|
27
|
+
## Documentation
|
28
|
+
|
29
|
+
- [Rumale API Documentation - NearestNeighbors](https://yoshoku.github.io/rumale/doc/Rumale/NearestNeighbors.html)
|
30
|
+
|
31
|
+
## License
|
32
|
+
|
33
|
+
The gem is available as open source under the terms of the [BSD-3-Clause License](https://opensource.org/licenses/BSD-3-Clause).
|
@@ -0,0 +1,115 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/estimator'
|
4
|
+
require 'rumale/base/classifier'
|
5
|
+
require 'rumale/pairwise_metric'
|
6
|
+
require 'rumale/validation'
|
7
|
+
|
8
|
+
module Rumale
|
9
|
+
# This module consists of the classes that implement estimators based on nearest neighbors rule.
|
10
|
+
module NearestNeighbors
|
11
|
+
# KNeighborsClassifier is a class that implements the classifier with the k-nearest neighbors rule.
|
12
|
+
# The current implementation uses the Euclidean distance for finding the neighbors.
|
13
|
+
#
|
14
|
+
# @example
|
15
|
+
# require 'rumale/nearest_neighbors/k_neighbors_classifier'
|
16
|
+
#
|
17
|
+
# estimator =
|
18
|
+
# Rumale::NearestNeighbors::KNeighborsClassifier.new(n_neighbors: 5)
|
19
|
+
# estimator.fit(training_samples, traininig_labels)
|
20
|
+
# results = estimator.predict(testing_samples)
|
21
|
+
#
|
22
|
+
class KNeighborsClassifier < ::Rumale::Base::Estimator
|
23
|
+
include ::Rumale::Base::Classifier
|
24
|
+
|
25
|
+
# Return the prototypes for the nearest neighbor classifier.
|
26
|
+
# If the metric is 'precomputed', that returns nil.
|
27
|
+
# @return [Numo::DFloat] (shape: [n_training_samples, n_features])
|
28
|
+
attr_reader :prototypes
|
29
|
+
|
30
|
+
# Return the labels of the prototypes
|
31
|
+
# @return [Numo::Int32] (size: n_training_samples)
|
32
|
+
attr_reader :labels
|
33
|
+
|
34
|
+
# Return the class labels.
|
35
|
+
# @return [Numo::Int32] (size: n_classes)
|
36
|
+
attr_reader :classes
|
37
|
+
|
38
|
+
# Create a new classifier with the nearest neighbor rule.
|
39
|
+
#
|
40
|
+
# @param n_neighbors [Integer] The number of neighbors.
|
41
|
+
# @param metric [String] The metric to calculate the distances.
|
42
|
+
# If metric is 'euclidean', Euclidean distance is calculated for distance between points.
|
43
|
+
# If metric is 'precomputed', the fit and predict methods expect to be given a distance matrix.
|
44
|
+
def initialize(n_neighbors: 5, metric: 'euclidean')
|
45
|
+
super()
|
46
|
+
@params = {
|
47
|
+
n_neighbors: n_neighbors,
|
48
|
+
metric: (metric == 'precomputed' ? 'precomputed' : 'euclidean')
|
49
|
+
}
|
50
|
+
end
|
51
|
+
|
52
|
+
# Fit the model with given training data.
|
53
|
+
#
|
54
|
+
# @param x [Numo::DFloat] (shape: [n_training_samples, n_features]) The training data to be used for fitting the model.
|
55
|
+
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_training_samples, n_training_samples]).
|
56
|
+
# @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
|
57
|
+
# @return [KNeighborsClassifier] The learned classifier itself.
|
58
|
+
def fit(x, y)
|
59
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
60
|
+
y = ::Rumale::Validation.check_convert_label_array(y)
|
61
|
+
::Rumale::Validation.check_sample_size(x, y)
|
62
|
+
if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
63
|
+
raise ArgumentError, 'Expect the input distance matrix to be square.'
|
64
|
+
end
|
65
|
+
|
66
|
+
@prototypes = x.dup if @params[:metric] == 'euclidean'
|
67
|
+
@labels = Numo::Int32.asarray(y.to_a)
|
68
|
+
@classes = Numo::Int32.asarray(y.to_a.uniq.sort)
|
69
|
+
self
|
70
|
+
end
|
71
|
+
|
72
|
+
# Calculate confidence scores for samples.
|
73
|
+
#
|
74
|
+
# @param x [Numo::DFloat] (shape: [n_testing_samples, n_features]) The samples to compute the scores.
|
75
|
+
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_testing_samples, n_training_samples]).
|
76
|
+
# @return [Numo::DFloat] (shape: [n_testing_samples, n_classes]) Confidence scores per sample for each class.
|
77
|
+
def decision_function(x)
|
78
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
79
|
+
if @params[:metric] == 'precomputed' && x.shape[1] != @labels.size
|
80
|
+
raise ArgumentError, 'Expect the size input matrix to be n_testing_samples-by-n_training_samples.'
|
81
|
+
end
|
82
|
+
|
83
|
+
n_prototypes = @labels.size
|
84
|
+
n_neighbors = [@params[:n_neighbors], n_prototypes].min
|
85
|
+
n_samples = x.shape[0]
|
86
|
+
n_classes = @classes.size
|
87
|
+
scores = Numo::DFloat.zeros(n_samples, n_classes)
|
88
|
+
|
89
|
+
distance_matrix = @params[:metric] == 'precomputed' ? x : ::Rumale::PairwiseMetric.euclidean_distance(x, @prototypes)
|
90
|
+
n_samples.times do |m|
|
91
|
+
neighbor_ids = distance_matrix[m, true].to_a.each_with_index.sort.map(&:last)[0...n_neighbors]
|
92
|
+
neighbor_ids.each { |n| scores[m, @classes.to_a.index(@labels[n])] += 1.0 }
|
93
|
+
end
|
94
|
+
|
95
|
+
scores
|
96
|
+
end
|
97
|
+
|
98
|
+
# Predict class labels for samples.
|
99
|
+
#
|
100
|
+
# @param x [Numo::DFloat] (shape: [n_testing_samples, n_features]) The samples to predict the labels.
|
101
|
+
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_testing_samples, n_training_samples]).
|
102
|
+
# @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
|
103
|
+
def predict(x)
|
104
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
105
|
+
if @params[:metric] == 'precomputed' && x.shape[1] != @labels.size
|
106
|
+
raise ArgumentError, 'Expect the size input matrix to be n_samples-by-n_training_samples.'
|
107
|
+
end
|
108
|
+
|
109
|
+
decision_values = decision_function(x)
|
110
|
+
n_samples = x.shape[0]
|
111
|
+
Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/estimator'
|
4
|
+
require 'rumale/base/regressor'
|
5
|
+
require 'rumale/pairwise_metric'
|
6
|
+
require 'rumale/validation'
|
7
|
+
|
8
|
+
module Rumale
|
9
|
+
module NearestNeighbors
|
10
|
+
# KNeighborsRegressor is a class that implements the regressor with the k-nearest neighbors rule.
|
11
|
+
# The current implementation uses the Euclidean distance for finding the neighbors.
|
12
|
+
#
|
13
|
+
# @example
|
14
|
+
# require 'rumale/nearest_neighbors/k_neighbors_regressor'
|
15
|
+
#
|
16
|
+
# estimator =
|
17
|
+
# Rumale::NearestNeighbors::KNeighborsRegressor.new(n_neighbors: 5)
|
18
|
+
# estimator.fit(training_samples, traininig_target_values)
|
19
|
+
# results = estimator.predict(testing_samples)
|
20
|
+
#
|
21
|
+
class KNeighborsRegressor < ::Rumale::Base::Estimator
|
22
|
+
include ::Rumale::Base::Regressor
|
23
|
+
|
24
|
+
# Return the prototypes for the nearest neighbor regressor.
|
25
|
+
# If the metric is 'precomputed', that returns nil.
|
26
|
+
# If the algorithm is 'vptree', that returns Rumale::NearestNeighbors::VPTree.
|
27
|
+
# @return [Numo::DFloat] (shape: [n_training_samples, n_features])
|
28
|
+
attr_reader :prototypes
|
29
|
+
|
30
|
+
# Return the values of the prototypes
|
31
|
+
# @return [Numo::DFloat] (shape: [n_training_samples, n_outputs])
|
32
|
+
attr_reader :values
|
33
|
+
|
34
|
+
# Create a new regressor with the nearest neighbor rule.
|
35
|
+
#
|
36
|
+
# @param n_neighbors [Integer] The number of neighbors.
|
37
|
+
# @param metric [String] The metric to calculate the distances.
|
38
|
+
# If metric is 'euclidean', Euclidean distance is calculated for distance between points.
|
39
|
+
# If metric is 'precomputed', the fit and predict methods expect to be given a distance matrix.
|
40
|
+
def initialize(n_neighbors: 5, metric: 'euclidean')
|
41
|
+
super()
|
42
|
+
@params = {
|
43
|
+
n_neighbors: n_neighbors,
|
44
|
+
metric: (metric == 'precomputed' ? 'precomputed' : 'euclidean')
|
45
|
+
}
|
46
|
+
end
|
47
|
+
|
48
|
+
# Fit the model with given training data.
|
49
|
+
#
|
50
|
+
# @param x [Numo::DFloat] (shape: [n_training_samples, n_features]) The training data to be used for fitting the model.
|
51
|
+
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_training_samples, n_training_samples]).
|
52
|
+
# @param y [Numo::DFloat] (shape: [n_training_samples, n_outputs]) The target values to be used for fitting the model.
|
53
|
+
# @return [KNeighborsRegressor] The learned regressor itself.
|
54
|
+
def fit(x, y)
|
55
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
56
|
+
y = ::Rumale::Validation.check_convert_target_value_array(y)
|
57
|
+
::Rumale::Validation.check_sample_size(x, y)
|
58
|
+
if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
59
|
+
raise ArgumentError, 'Expect the input distance matrix to be square.'
|
60
|
+
end
|
61
|
+
|
62
|
+
@prototypes = x.dup if @params[:metric] == 'euclidean'
|
63
|
+
@values = y.dup
|
64
|
+
self
|
65
|
+
end
|
66
|
+
|
67
|
+
# Predict values for samples.
|
68
|
+
#
|
69
|
+
# @param x [Numo::DFloat] (shape: [n_testing_samples, n_features]) The samples to predict the values.
|
70
|
+
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_testing_samples, n_training_samples]).
|
71
|
+
# @return [Numo::DFloat] (shape: [n_testing_samples, n_outputs]) Predicted values per sample.
|
72
|
+
def predict(x)
|
73
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
74
|
+
if @params[:metric] == 'precomputed' && x.shape[1] != @values.shape[0]
|
75
|
+
raise ArgumentError, 'Expect the size input matrix to be n_testing_samples-by-n_training_samples.'
|
76
|
+
end
|
77
|
+
|
78
|
+
# Initialize some variables.
|
79
|
+
n_samples = x.shape[0]
|
80
|
+
n_prototypes, n_outputs = @values.shape
|
81
|
+
n_neighbors = [@params[:n_neighbors], n_prototypes].min
|
82
|
+
# Predict values for the given samples.
|
83
|
+
distance_matrix = @params[:metric] == 'precomputed' ? x : ::Rumale::PairwiseMetric.euclidean_distance(x, @prototypes)
|
84
|
+
predicted_values = Array.new(n_samples) do |n|
|
85
|
+
neighbor_ids = distance_matrix[n, true].to_a.each_with_index.sort.map(&:last)[0...n_neighbors]
|
86
|
+
n_outputs.nil? ? @values[neighbor_ids].mean : @values[neighbor_ids, true].mean(0).to_a
|
87
|
+
end
|
88
|
+
Numo::DFloat[*predicted_values]
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Rumale is a machine learning library in Ruby.
|
4
|
+
module Rumale
|
5
|
+
# This module consists of the classes that implement estimators based on nearest neighbors rule.
|
6
|
+
module NearestNeighbors
|
7
|
+
# @!visibility private
|
8
|
+
VERSION = '0.24.0'
|
9
|
+
end
|
10
|
+
end
|
metadata
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rumale-nearest_neighbors
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.24.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- yoshoku
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2022-12-31 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: numo-narray
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.9.1
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.9.1
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rumale-core
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.24.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.24.0
|
41
|
+
description: 'Rumale::NearestNeighbors provides classifier and regression based on
|
42
|
+
nearest neighbors rule with Rumale interface.
|
43
|
+
|
44
|
+
'
|
45
|
+
email:
|
46
|
+
- yoshoku@outlook.com
|
47
|
+
executables: []
|
48
|
+
extensions: []
|
49
|
+
extra_rdoc_files: []
|
50
|
+
files:
|
51
|
+
- LICENSE.txt
|
52
|
+
- README.md
|
53
|
+
- lib/rumale/nearest_neighbors.rb
|
54
|
+
- lib/rumale/nearest_neighbors/k_neighbors_classifier.rb
|
55
|
+
- lib/rumale/nearest_neighbors/k_neighbors_regressor.rb
|
56
|
+
- lib/rumale/nearest_neighbors/version.rb
|
57
|
+
homepage: https://github.com/yoshoku/rumale
|
58
|
+
licenses:
|
59
|
+
- BSD-3-Clause
|
60
|
+
metadata:
|
61
|
+
homepage_uri: https://github.com/yoshoku/rumale
|
62
|
+
source_code_uri: https://github.com/yoshoku/rumale/tree/main/rumale-nearest_neighbors
|
63
|
+
changelog_uri: https://github.com/yoshoku/rumale/blob/main/CHANGELOG.md
|
64
|
+
documentation_uri: https://yoshoku.github.io/rumale/doc/
|
65
|
+
rubygems_mfa_required: 'true'
|
66
|
+
post_install_message:
|
67
|
+
rdoc_options: []
|
68
|
+
require_paths:
|
69
|
+
- lib
|
70
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0'
|
75
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
80
|
+
requirements: []
|
81
|
+
rubygems_version: 3.3.26
|
82
|
+
signing_key:
|
83
|
+
specification_version: 4
|
84
|
+
summary: Rumale::NearestNeighbors provides classifier and regression based on nearest
|
85
|
+
neighbors rule with Rumale interface.
|
86
|
+
test_files: []
|