rumale-nearest_neighbors 0.24.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: df277519a7d271e0bdf9fef8076a2ebb2476817dc2d9f3ca5076b2e0b0ac5b52
4
+ data.tar.gz: 8399d40f09a44e36c71a444c3d44ad5e419b42e0380dee531c68da3f484a8ab3
5
+ SHA512:
6
+ metadata.gz: 9322178b341aa9a76e30ed13dec1e2711d75aea6868934f1cf47401f4e335d01290baa707ec2dcc0e94f8a15fb3d4b8eff1db65ef64bc65edab095c5e2df5d57
7
+ data.tar.gz: b0f554518ae703b23e21adc6c039f79e7e84da3b4c4a95f9776e38bd1a84fa468f7a60aeee30b0f3177d82630f739b9609e99da5c521b13366b88f685a3dd628
data/LICENSE.txt ADDED
@@ -0,0 +1,27 @@
1
+ Copyright (c) 2022 Atsushi Tatsuma
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are met:
6
+
7
+ * Redistributions of source code must retain the above copyright notice, this
8
+ list of conditions and the following disclaimer.
9
+
10
+ * Redistributions in binary form must reproduce the above copyright notice,
11
+ this list of conditions and the following disclaimer in the documentation
12
+ and/or other materials provided with the distribution.
13
+
14
+ * Neither the name of the copyright holder nor the names of its
15
+ contributors may be used to endorse or promote products derived from
16
+ this software without specific prior written permission.
17
+
18
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/README.md ADDED
@@ -0,0 +1,33 @@
1
+ # Rumale::NearestNeighbors
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/rumale-nearest_neighbors.svg)](https://badge.fury.io/rb/rumale-nearest_neighbors)
4
+ [![BSD 3-Clause License](https://img.shields.io/badge/License-BSD%203--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/main/rumale-nearest_neighbors/LICENSE.txt)
5
+ [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/rumale/doc/Rumale/NearestNeighbors.html)
6
+
7
+ Rumale is a machine learning library in Ruby.
8
+ Rumale::NearestNeighbors provides classifier and regression based on nearest neighbors rule
9
+ with Rumale interface.
10
+
11
+ ## Installation
12
+
13
+ Add this line to your application's Gemfile:
14
+
15
+ ```ruby
16
+ gem 'rumale-nearest_neighbors'
17
+ ```
18
+
19
+ And then execute:
20
+
21
+ $ bundle install
22
+
23
+ Or install it yourself as:
24
+
25
+ $ gem install rumale-nearest_neighbors
26
+
27
+ ## Documentation
28
+
29
+ - [Rumale API Documentation - NearestNeighbors](https://yoshoku.github.io/rumale/doc/Rumale/NearestNeighbors.html)
30
+
31
+ ## License
32
+
33
+ The gem is available as open source under the terms of the [BSD-3-Clause License](https://opensource.org/licenses/BSD-3-Clause).
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/classifier'
5
+ require 'rumale/pairwise_metric'
6
+ require 'rumale/validation'
7
+
8
+ module Rumale
9
+ # This module consists of the classes that implement estimators based on nearest neighbors rule.
10
+ module NearestNeighbors
11
+ # KNeighborsClassifier is a class that implements the classifier with the k-nearest neighbors rule.
12
+ # The current implementation uses the Euclidean distance for finding the neighbors.
13
+ #
14
+ # @example
15
+ # require 'rumale/nearest_neighbors/k_neighbors_classifier'
16
+ #
17
+ # estimator =
18
+ # Rumale::NearestNeighbors::KNeighborsClassifier.new(n_neighbors: 5)
19
+ # estimator.fit(training_samples, traininig_labels)
20
+ # results = estimator.predict(testing_samples)
21
+ #
22
+ class KNeighborsClassifier < ::Rumale::Base::Estimator
23
+ include ::Rumale::Base::Classifier
24
+
25
+ # Return the prototypes for the nearest neighbor classifier.
26
+ # If the metric is 'precomputed', that returns nil.
27
+ # @return [Numo::DFloat] (shape: [n_training_samples, n_features])
28
+ attr_reader :prototypes
29
+
30
+ # Return the labels of the prototypes
31
+ # @return [Numo::Int32] (size: n_training_samples)
32
+ attr_reader :labels
33
+
34
+ # Return the class labels.
35
+ # @return [Numo::Int32] (size: n_classes)
36
+ attr_reader :classes
37
+
38
+ # Create a new classifier with the nearest neighbor rule.
39
+ #
40
+ # @param n_neighbors [Integer] The number of neighbors.
41
+ # @param metric [String] The metric to calculate the distances.
42
+ # If metric is 'euclidean', Euclidean distance is calculated for distance between points.
43
+ # If metric is 'precomputed', the fit and predict methods expect to be given a distance matrix.
44
+ def initialize(n_neighbors: 5, metric: 'euclidean')
45
+ super()
46
+ @params = {
47
+ n_neighbors: n_neighbors,
48
+ metric: (metric == 'precomputed' ? 'precomputed' : 'euclidean')
49
+ }
50
+ end
51
+
52
+ # Fit the model with given training data.
53
+ #
54
+ # @param x [Numo::DFloat] (shape: [n_training_samples, n_features]) The training data to be used for fitting the model.
55
+ # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_training_samples, n_training_samples]).
56
+ # @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
57
+ # @return [KNeighborsClassifier] The learned classifier itself.
58
+ def fit(x, y)
59
+ x = ::Rumale::Validation.check_convert_sample_array(x)
60
+ y = ::Rumale::Validation.check_convert_label_array(y)
61
+ ::Rumale::Validation.check_sample_size(x, y)
62
+ if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
63
+ raise ArgumentError, 'Expect the input distance matrix to be square.'
64
+ end
65
+
66
+ @prototypes = x.dup if @params[:metric] == 'euclidean'
67
+ @labels = Numo::Int32.asarray(y.to_a)
68
+ @classes = Numo::Int32.asarray(y.to_a.uniq.sort)
69
+ self
70
+ end
71
+
72
+ # Calculate confidence scores for samples.
73
+ #
74
+ # @param x [Numo::DFloat] (shape: [n_testing_samples, n_features]) The samples to compute the scores.
75
+ # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_testing_samples, n_training_samples]).
76
+ # @return [Numo::DFloat] (shape: [n_testing_samples, n_classes]) Confidence scores per sample for each class.
77
+ def decision_function(x)
78
+ x = ::Rumale::Validation.check_convert_sample_array(x)
79
+ if @params[:metric] == 'precomputed' && x.shape[1] != @labels.size
80
+ raise ArgumentError, 'Expect the size input matrix to be n_testing_samples-by-n_training_samples.'
81
+ end
82
+
83
+ n_prototypes = @labels.size
84
+ n_neighbors = [@params[:n_neighbors], n_prototypes].min
85
+ n_samples = x.shape[0]
86
+ n_classes = @classes.size
87
+ scores = Numo::DFloat.zeros(n_samples, n_classes)
88
+
89
+ distance_matrix = @params[:metric] == 'precomputed' ? x : ::Rumale::PairwiseMetric.euclidean_distance(x, @prototypes)
90
+ n_samples.times do |m|
91
+ neighbor_ids = distance_matrix[m, true].to_a.each_with_index.sort.map(&:last)[0...n_neighbors]
92
+ neighbor_ids.each { |n| scores[m, @classes.to_a.index(@labels[n])] += 1.0 }
93
+ end
94
+
95
+ scores
96
+ end
97
+
98
+ # Predict class labels for samples.
99
+ #
100
+ # @param x [Numo::DFloat] (shape: [n_testing_samples, n_features]) The samples to predict the labels.
101
+ # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_testing_samples, n_training_samples]).
102
+ # @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
103
+ def predict(x)
104
+ x = ::Rumale::Validation.check_convert_sample_array(x)
105
+ if @params[:metric] == 'precomputed' && x.shape[1] != @labels.size
106
+ raise ArgumentError, 'Expect the size input matrix to be n_samples-by-n_training_samples.'
107
+ end
108
+
109
+ decision_values = decision_function(x)
110
+ n_samples = x.shape[0]
111
+ Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/regressor'
5
+ require 'rumale/pairwise_metric'
6
+ require 'rumale/validation'
7
+
8
+ module Rumale
9
+ module NearestNeighbors
10
+ # KNeighborsRegressor is a class that implements the regressor with the k-nearest neighbors rule.
11
+ # The current implementation uses the Euclidean distance for finding the neighbors.
12
+ #
13
+ # @example
14
+ # require 'rumale/nearest_neighbors/k_neighbors_regressor'
15
+ #
16
+ # estimator =
17
+ # Rumale::NearestNeighbors::KNeighborsRegressor.new(n_neighbors: 5)
18
+ # estimator.fit(training_samples, traininig_target_values)
19
+ # results = estimator.predict(testing_samples)
20
+ #
21
+ class KNeighborsRegressor < ::Rumale::Base::Estimator
22
+ include ::Rumale::Base::Regressor
23
+
24
+ # Return the prototypes for the nearest neighbor regressor.
25
+ # If the metric is 'precomputed', that returns nil.
26
+ # If the algorithm is 'vptree', that returns Rumale::NearestNeighbors::VPTree.
27
+ # @return [Numo::DFloat] (shape: [n_training_samples, n_features])
28
+ attr_reader :prototypes
29
+
30
+ # Return the values of the prototypes
31
+ # @return [Numo::DFloat] (shape: [n_training_samples, n_outputs])
32
+ attr_reader :values
33
+
34
+ # Create a new regressor with the nearest neighbor rule.
35
+ #
36
+ # @param n_neighbors [Integer] The number of neighbors.
37
+ # @param metric [String] The metric to calculate the distances.
38
+ # If metric is 'euclidean', Euclidean distance is calculated for distance between points.
39
+ # If metric is 'precomputed', the fit and predict methods expect to be given a distance matrix.
40
+ def initialize(n_neighbors: 5, metric: 'euclidean')
41
+ super()
42
+ @params = {
43
+ n_neighbors: n_neighbors,
44
+ metric: (metric == 'precomputed' ? 'precomputed' : 'euclidean')
45
+ }
46
+ end
47
+
48
+ # Fit the model with given training data.
49
+ #
50
+ # @param x [Numo::DFloat] (shape: [n_training_samples, n_features]) The training data to be used for fitting the model.
51
+ # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_training_samples, n_training_samples]).
52
+ # @param y [Numo::DFloat] (shape: [n_training_samples, n_outputs]) The target values to be used for fitting the model.
53
+ # @return [KNeighborsRegressor] The learned regressor itself.
54
+ def fit(x, y)
55
+ x = ::Rumale::Validation.check_convert_sample_array(x)
56
+ y = ::Rumale::Validation.check_convert_target_value_array(y)
57
+ ::Rumale::Validation.check_sample_size(x, y)
58
+ if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
59
+ raise ArgumentError, 'Expect the input distance matrix to be square.'
60
+ end
61
+
62
+ @prototypes = x.dup if @params[:metric] == 'euclidean'
63
+ @values = y.dup
64
+ self
65
+ end
66
+
67
+ # Predict values for samples.
68
+ #
69
+ # @param x [Numo::DFloat] (shape: [n_testing_samples, n_features]) The samples to predict the values.
70
+ # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_testing_samples, n_training_samples]).
71
+ # @return [Numo::DFloat] (shape: [n_testing_samples, n_outputs]) Predicted values per sample.
72
+ def predict(x)
73
+ x = ::Rumale::Validation.check_convert_sample_array(x)
74
+ if @params[:metric] == 'precomputed' && x.shape[1] != @values.shape[0]
75
+ raise ArgumentError, 'Expect the size input matrix to be n_testing_samples-by-n_training_samples.'
76
+ end
77
+
78
+ # Initialize some variables.
79
+ n_samples = x.shape[0]
80
+ n_prototypes, n_outputs = @values.shape
81
+ n_neighbors = [@params[:n_neighbors], n_prototypes].min
82
+ # Predict values for the given samples.
83
+ distance_matrix = @params[:metric] == 'precomputed' ? x : ::Rumale::PairwiseMetric.euclidean_distance(x, @prototypes)
84
+ predicted_values = Array.new(n_samples) do |n|
85
+ neighbor_ids = distance_matrix[n, true].to_a.each_with_index.sort.map(&:last)[0...n_neighbors]
86
+ n_outputs.nil? ? @values[neighbor_ids].mean : @values[neighbor_ids, true].mean(0).to_a
87
+ end
88
+ Numo::DFloat[*predicted_values]
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Rumale is a machine learning library in Ruby.
4
+ module Rumale
5
+ # This module consists of the classes that implement estimators based on nearest neighbors rule.
6
+ module NearestNeighbors
7
+ # @!visibility private
8
+ VERSION = '0.24.0'
9
+ end
10
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'numo/narray'
4
+
5
+ require_relative 'nearest_neighbors/k_neighbors_classifier'
6
+ require_relative 'nearest_neighbors/k_neighbors_regressor'
7
+ require_relative 'nearest_neighbors/version'
metadata ADDED
@@ -0,0 +1,86 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rumale-nearest_neighbors
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.24.0
5
+ platform: ruby
6
+ authors:
7
+ - yoshoku
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2022-12-31 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: numo-narray
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.9.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 0.9.1
27
+ - !ruby/object:Gem::Dependency
28
+ name: rumale-core
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.24.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.24.0
41
+ description: 'Rumale::NearestNeighbors provides classifier and regression based on
42
+ nearest neighbors rule with Rumale interface.
43
+
44
+ '
45
+ email:
46
+ - yoshoku@outlook.com
47
+ executables: []
48
+ extensions: []
49
+ extra_rdoc_files: []
50
+ files:
51
+ - LICENSE.txt
52
+ - README.md
53
+ - lib/rumale/nearest_neighbors.rb
54
+ - lib/rumale/nearest_neighbors/k_neighbors_classifier.rb
55
+ - lib/rumale/nearest_neighbors/k_neighbors_regressor.rb
56
+ - lib/rumale/nearest_neighbors/version.rb
57
+ homepage: https://github.com/yoshoku/rumale
58
+ licenses:
59
+ - BSD-3-Clause
60
+ metadata:
61
+ homepage_uri: https://github.com/yoshoku/rumale
62
+ source_code_uri: https://github.com/yoshoku/rumale/tree/main/rumale-nearest_neighbors
63
+ changelog_uri: https://github.com/yoshoku/rumale/blob/main/CHANGELOG.md
64
+ documentation_uri: https://yoshoku.github.io/rumale/doc/
65
+ rubygems_mfa_required: 'true'
66
+ post_install_message:
67
+ rdoc_options: []
68
+ require_paths:
69
+ - lib
70
+ required_ruby_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ required_rubygems_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ requirements: []
81
+ rubygems_version: 3.3.26
82
+ signing_key:
83
+ specification_version: 4
84
+ summary: Rumale::NearestNeighbors provides classifier and regression based on nearest
85
+ neighbors rule with Rumale interface.
86
+ test_files: []