rumale-nearest_neighbors 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: df277519a7d271e0bdf9fef8076a2ebb2476817dc2d9f3ca5076b2e0b0ac5b52
4
+ data.tar.gz: 8399d40f09a44e36c71a444c3d44ad5e419b42e0380dee531c68da3f484a8ab3
5
+ SHA512:
6
+ metadata.gz: 9322178b341aa9a76e30ed13dec1e2711d75aea6868934f1cf47401f4e335d01290baa707ec2dcc0e94f8a15fb3d4b8eff1db65ef64bc65edab095c5e2df5d57
7
+ data.tar.gz: b0f554518ae703b23e21adc6c039f79e7e84da3b4c4a95f9776e38bd1a84fa468f7a60aeee30b0f3177d82630f739b9609e99da5c521b13366b88f685a3dd628
data/LICENSE.txt ADDED
@@ -0,0 +1,27 @@
1
+ Copyright (c) 2022 Atsushi Tatsuma
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are met:
6
+
7
+ * Redistributions of source code must retain the above copyright notice, this
8
+ list of conditions and the following disclaimer.
9
+
10
+ * Redistributions in binary form must reproduce the above copyright notice,
11
+ this list of conditions and the following disclaimer in the documentation
12
+ and/or other materials provided with the distribution.
13
+
14
+ * Neither the name of the copyright holder nor the names of its
15
+ contributors may be used to endorse or promote products derived from
16
+ this software without specific prior written permission.
17
+
18
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/README.md ADDED
@@ -0,0 +1,33 @@
1
+ # Rumale::NearestNeighbors
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/rumale-nearest_neighbors.svg)](https://badge.fury.io/rb/rumale-nearest_neighbors)
4
+ [![BSD 3-Clause License](https://img.shields.io/badge/License-BSD%203--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/main/rumale-nearest_neighbors/LICENSE.txt)
5
+ [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/rumale/doc/Rumale/NearestNeighbors.html)
6
+
7
+ Rumale is a machine learning library in Ruby.
8
+ Rumale::NearestNeighbors provides classifier and regression based on nearest neighbors rule
9
+ with Rumale interface.
10
+
11
+ ## Installation
12
+
13
+ Add this line to your application's Gemfile:
14
+
15
+ ```ruby
16
+ gem 'rumale-nearest_neighbors'
17
+ ```
18
+
19
+ And then execute:
20
+
21
+ $ bundle install
22
+
23
+ Or install it yourself as:
24
+
25
+ $ gem install rumale-nearest_neighbors
26
+
27
+ ## Documentation
28
+
29
+ - [Rumale API Documentation - NearestNeighbors](https://yoshoku.github.io/rumale/doc/Rumale/NearestNeighbors.html)
30
+
31
+ ## License
32
+
33
+ The gem is available as open source under the terms of the [BSD-3-Clause License](https://opensource.org/licenses/BSD-3-Clause).
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/classifier'
5
+ require 'rumale/pairwise_metric'
6
+ require 'rumale/validation'
7
+
8
+ module Rumale
9
+ # This module consists of the classes that implement estimators based on nearest neighbors rule.
10
+ module NearestNeighbors
11
+ # KNeighborsClassifier is a class that implements the classifier with the k-nearest neighbors rule.
12
+ # The current implementation uses the Euclidean distance for finding the neighbors.
13
+ #
14
+ # @example
15
+ # require 'rumale/nearest_neighbors/k_neighbors_classifier'
16
+ #
17
+ # estimator =
18
+ # Rumale::NearestNeighbors::KNeighborsClassifier.new(n_neighbors: 5)
19
+ # estimator.fit(training_samples, traininig_labels)
20
+ # results = estimator.predict(testing_samples)
21
+ #
22
+ class KNeighborsClassifier < ::Rumale::Base::Estimator
23
+ include ::Rumale::Base::Classifier
24
+
25
+ # Return the prototypes for the nearest neighbor classifier.
26
+ # If the metric is 'precomputed', that returns nil.
27
+ # @return [Numo::DFloat] (shape: [n_training_samples, n_features])
28
+ attr_reader :prototypes
29
+
30
+ # Return the labels of the prototypes
31
+ # @return [Numo::Int32] (size: n_training_samples)
32
+ attr_reader :labels
33
+
34
+ # Return the class labels.
35
+ # @return [Numo::Int32] (size: n_classes)
36
+ attr_reader :classes
37
+
38
+ # Create a new classifier with the nearest neighbor rule.
39
+ #
40
+ # @param n_neighbors [Integer] The number of neighbors.
41
+ # @param metric [String] The metric to calculate the distances.
42
+ # If metric is 'euclidean', Euclidean distance is calculated for distance between points.
43
+ # If metric is 'precomputed', the fit and predict methods expect to be given a distance matrix.
44
+ def initialize(n_neighbors: 5, metric: 'euclidean')
45
+ super()
46
+ @params = {
47
+ n_neighbors: n_neighbors,
48
+ metric: (metric == 'precomputed' ? 'precomputed' : 'euclidean')
49
+ }
50
+ end
51
+
52
+ # Fit the model with given training data.
53
+ #
54
+ # @param x [Numo::DFloat] (shape: [n_training_samples, n_features]) The training data to be used for fitting the model.
55
+ # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_training_samples, n_training_samples]).
56
+ # @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
57
+ # @return [KNeighborsClassifier] The learned classifier itself.
58
+ def fit(x, y)
59
+ x = ::Rumale::Validation.check_convert_sample_array(x)
60
+ y = ::Rumale::Validation.check_convert_label_array(y)
61
+ ::Rumale::Validation.check_sample_size(x, y)
62
+ if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
63
+ raise ArgumentError, 'Expect the input distance matrix to be square.'
64
+ end
65
+
66
+ @prototypes = x.dup if @params[:metric] == 'euclidean'
67
+ @labels = Numo::Int32.asarray(y.to_a)
68
+ @classes = Numo::Int32.asarray(y.to_a.uniq.sort)
69
+ self
70
+ end
71
+
72
+ # Calculate confidence scores for samples.
73
+ #
74
+ # @param x [Numo::DFloat] (shape: [n_testing_samples, n_features]) The samples to compute the scores.
75
+ # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_testing_samples, n_training_samples]).
76
+ # @return [Numo::DFloat] (shape: [n_testing_samples, n_classes]) Confidence scores per sample for each class.
77
+ def decision_function(x)
78
+ x = ::Rumale::Validation.check_convert_sample_array(x)
79
+ if @params[:metric] == 'precomputed' && x.shape[1] != @labels.size
80
+ raise ArgumentError, 'Expect the size input matrix to be n_testing_samples-by-n_training_samples.'
81
+ end
82
+
83
+ n_prototypes = @labels.size
84
+ n_neighbors = [@params[:n_neighbors], n_prototypes].min
85
+ n_samples = x.shape[0]
86
+ n_classes = @classes.size
87
+ scores = Numo::DFloat.zeros(n_samples, n_classes)
88
+
89
+ distance_matrix = @params[:metric] == 'precomputed' ? x : ::Rumale::PairwiseMetric.euclidean_distance(x, @prototypes)
90
+ n_samples.times do |m|
91
+ neighbor_ids = distance_matrix[m, true].to_a.each_with_index.sort.map(&:last)[0...n_neighbors]
92
+ neighbor_ids.each { |n| scores[m, @classes.to_a.index(@labels[n])] += 1.0 }
93
+ end
94
+
95
+ scores
96
+ end
97
+
98
+ # Predict class labels for samples.
99
+ #
100
+ # @param x [Numo::DFloat] (shape: [n_testing_samples, n_features]) The samples to predict the labels.
101
+ # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_testing_samples, n_training_samples]).
102
+ # @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
103
+ def predict(x)
104
+ x = ::Rumale::Validation.check_convert_sample_array(x)
105
+ if @params[:metric] == 'precomputed' && x.shape[1] != @labels.size
106
+ raise ArgumentError, 'Expect the size input matrix to be n_samples-by-n_training_samples.'
107
+ end
108
+
109
+ decision_values = decision_function(x)
110
+ n_samples = x.shape[0]
111
+ Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/regressor'
5
+ require 'rumale/pairwise_metric'
6
+ require 'rumale/validation'
7
+
8
+ module Rumale
9
+ module NearestNeighbors
10
+ # KNeighborsRegressor is a class that implements the regressor with the k-nearest neighbors rule.
11
+ # The current implementation uses the Euclidean distance for finding the neighbors.
12
+ #
13
+ # @example
14
+ # require 'rumale/nearest_neighbors/k_neighbors_regressor'
15
+ #
16
+ # estimator =
17
+ # Rumale::NearestNeighbors::KNeighborsRegressor.new(n_neighbors: 5)
18
+ # estimator.fit(training_samples, traininig_target_values)
19
+ # results = estimator.predict(testing_samples)
20
+ #
21
+ class KNeighborsRegressor < ::Rumale::Base::Estimator
22
+ include ::Rumale::Base::Regressor
23
+
24
+ # Return the prototypes for the nearest neighbor regressor.
25
+ # If the metric is 'precomputed', that returns nil.
26
+ # If the algorithm is 'vptree', that returns Rumale::NearestNeighbors::VPTree.
27
+ # @return [Numo::DFloat] (shape: [n_training_samples, n_features])
28
+ attr_reader :prototypes
29
+
30
+ # Return the values of the prototypes
31
+ # @return [Numo::DFloat] (shape: [n_training_samples, n_outputs])
32
+ attr_reader :values
33
+
34
+ # Create a new regressor with the nearest neighbor rule.
35
+ #
36
+ # @param n_neighbors [Integer] The number of neighbors.
37
+ # @param metric [String] The metric to calculate the distances.
38
+ # If metric is 'euclidean', Euclidean distance is calculated for distance between points.
39
+ # If metric is 'precomputed', the fit and predict methods expect to be given a distance matrix.
40
+ def initialize(n_neighbors: 5, metric: 'euclidean')
41
+ super()
42
+ @params = {
43
+ n_neighbors: n_neighbors,
44
+ metric: (metric == 'precomputed' ? 'precomputed' : 'euclidean')
45
+ }
46
+ end
47
+
48
+ # Fit the model with given training data.
49
+ #
50
+ # @param x [Numo::DFloat] (shape: [n_training_samples, n_features]) The training data to be used for fitting the model.
51
+ # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_training_samples, n_training_samples]).
52
+ # @param y [Numo::DFloat] (shape: [n_training_samples, n_outputs]) The target values to be used for fitting the model.
53
+ # @return [KNeighborsRegressor] The learned regressor itself.
54
+ def fit(x, y)
55
+ x = ::Rumale::Validation.check_convert_sample_array(x)
56
+ y = ::Rumale::Validation.check_convert_target_value_array(y)
57
+ ::Rumale::Validation.check_sample_size(x, y)
58
+ if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
59
+ raise ArgumentError, 'Expect the input distance matrix to be square.'
60
+ end
61
+
62
+ @prototypes = x.dup if @params[:metric] == 'euclidean'
63
+ @values = y.dup
64
+ self
65
+ end
66
+
67
+ # Predict values for samples.
68
+ #
69
+ # @param x [Numo::DFloat] (shape: [n_testing_samples, n_features]) The samples to predict the values.
70
+ # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_testing_samples, n_training_samples]).
71
+ # @return [Numo::DFloat] (shape: [n_testing_samples, n_outputs]) Predicted values per sample.
72
+ def predict(x)
73
+ x = ::Rumale::Validation.check_convert_sample_array(x)
74
+ if @params[:metric] == 'precomputed' && x.shape[1] != @values.shape[0]
75
+ raise ArgumentError, 'Expect the size input matrix to be n_testing_samples-by-n_training_samples.'
76
+ end
77
+
78
+ # Initialize some variables.
79
+ n_samples = x.shape[0]
80
+ n_prototypes, n_outputs = @values.shape
81
+ n_neighbors = [@params[:n_neighbors], n_prototypes].min
82
+ # Predict values for the given samples.
83
+ distance_matrix = @params[:metric] == 'precomputed' ? x : ::Rumale::PairwiseMetric.euclidean_distance(x, @prototypes)
84
+ predicted_values = Array.new(n_samples) do |n|
85
+ neighbor_ids = distance_matrix[n, true].to_a.each_with_index.sort.map(&:last)[0...n_neighbors]
86
+ n_outputs.nil? ? @values[neighbor_ids].mean : @values[neighbor_ids, true].mean(0).to_a
87
+ end
88
+ Numo::DFloat[*predicted_values]
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Rumale is a machine learning library in Ruby.
4
+ module Rumale
5
+ # This module consists of the classes that implement estimators based on nearest neighbors rule.
6
+ module NearestNeighbors
7
+ # @!visibility private
8
+ VERSION = '0.24.0'
9
+ end
10
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'numo/narray'
4
+
5
+ require_relative 'nearest_neighbors/k_neighbors_classifier'
6
+ require_relative 'nearest_neighbors/k_neighbors_regressor'
7
+ require_relative 'nearest_neighbors/version'
metadata ADDED
@@ -0,0 +1,86 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rumale-nearest_neighbors
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.24.0
5
+ platform: ruby
6
+ authors:
7
+ - yoshoku
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2022-12-31 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: numo-narray
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.9.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 0.9.1
27
+ - !ruby/object:Gem::Dependency
28
+ name: rumale-core
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.24.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.24.0
41
+ description: 'Rumale::NearestNeighbors provides classifier and regression based on
42
+ nearest neighbors rule with Rumale interface.
43
+
44
+ '
45
+ email:
46
+ - yoshoku@outlook.com
47
+ executables: []
48
+ extensions: []
49
+ extra_rdoc_files: []
50
+ files:
51
+ - LICENSE.txt
52
+ - README.md
53
+ - lib/rumale/nearest_neighbors.rb
54
+ - lib/rumale/nearest_neighbors/k_neighbors_classifier.rb
55
+ - lib/rumale/nearest_neighbors/k_neighbors_regressor.rb
56
+ - lib/rumale/nearest_neighbors/version.rb
57
+ homepage: https://github.com/yoshoku/rumale
58
+ licenses:
59
+ - BSD-3-Clause
60
+ metadata:
61
+ homepage_uri: https://github.com/yoshoku/rumale
62
+ source_code_uri: https://github.com/yoshoku/rumale/tree/main/rumale-nearest_neighbors
63
+ changelog_uri: https://github.com/yoshoku/rumale/blob/main/CHANGELOG.md
64
+ documentation_uri: https://yoshoku.github.io/rumale/doc/
65
+ rubygems_mfa_required: 'true'
66
+ post_install_message:
67
+ rdoc_options: []
68
+ require_paths:
69
+ - lib
70
+ required_ruby_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ required_rubygems_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ requirements: []
81
+ rubygems_version: 3.3.26
82
+ signing_key:
83
+ specification_version: 4
84
+ summary: Rumale::NearestNeighbors provides classifier and regression based on nearest
85
+ neighbors rule with Rumale interface.
86
+ test_files: []