rumale-kernel_machine 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 6c53bda994de48371ca60e6fb8e957a0f77e96943de0f73d989deeaa9edbcf89
4
+ data.tar.gz: 6a08ecbfd73f1e9b64a29ffd11114766bebc5b9b8cf8def6325e89528af50b82
5
+ SHA512:
6
+ metadata.gz: 7a1b192c7e8c31f769d2fef8ccf8be830bf0e129e0249269e807cd9e69f7bed06c8968612515b4d7817de59be4755e713dbf1a0e206b9dc4f6cd55308cf0111e
7
+ data.tar.gz: cd368663f5f48eca166ac8b3832df2c26e24ac09e64164886bf3815ba308f0cb64fd908768765d126d62d5103ec1edc99adf8a54f1900ee6581bbe5e17a65cb3
data/LICENSE.txt ADDED
@@ -0,0 +1,27 @@
1
+ Copyright (c) 2022 Atsushi Tatsuma
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are met:
6
+
7
+ * Redistributions of source code must retain the above copyright notice, this
8
+ list of conditions and the following disclaimer.
9
+
10
+ * Redistributions in binary form must reproduce the above copyright notice,
11
+ this list of conditions and the following disclaimer in the documentation
12
+ and/or other materials provided with the distribution.
13
+
14
+ * Neither the name of the copyright holder nor the names of its
15
+ contributors may be used to endorse or promote products derived from
16
+ this software without specific prior written permission.
17
+
18
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/README.md ADDED
@@ -0,0 +1,34 @@
1
+ # Rumale::KernelMachine
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/rumale-kernel_machine.svg)](https://badge.fury.io/rb/rumale-kernel_machine)
4
+ [![BSD 3-Clause License](https://img.shields.io/badge/License-BSD%203--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/main/rumale-kernel_machine/LICENSE.txt)
5
+ [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/rumale/doc/Rumale/KernelMachine.html)
6
+
7
+ Rumale is a machine learning library in Ruby.
8
+ Rumale::KernelMachine provides kernel method-based algorithms,
9
+ such as Kernel Support Vector Machine, Kernel Principal Componenet Analysis, and Kernel Ridge Regression,
10
+ with Rumale interface.
11
+
12
+ ## Installation
13
+
14
+ Add this line to your application's Gemfile:
15
+
16
+ ```ruby
17
+ gem 'rumale-kernel_machine'
18
+ ```
19
+
20
+ And then execute:
21
+
22
+ $ bundle install
23
+
24
+ Or install it yourself as:
25
+
26
+ $ gem install rumale-kernel_machine
27
+
28
+ ## Documentation
29
+
30
+ - [Rumale API Documentation - KernelMachine](https://yoshoku.github.io/rumale/doc/Rumale/KernelMachine.html)
31
+
32
+ ## License
33
+
34
+ The gem is available as open source under the terms of the [BSD-3-Clause License](https://opensource.org/licenses/BSD-3-Clause).
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/validation'
6
+
7
+ module Rumale
8
+ module KernelMachine
9
+ # KernelFDA is a class that implements Kernel Fisher Discriminant Analysis.
10
+ #
11
+ # @example
12
+ # require 'numo/linalg/autoloader'
13
+ # require 'rumale/pairwise_metric'
14
+ # require 'rumale/kernel_machine/kernel_fda'
15
+ #
16
+ # kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(x_train)
17
+ # kfda = Rumale::KernelMachine::KernelFDA.new
18
+ # mapped_traininig_samples = kfda.fit_transform(kernel_mat_train, y)
19
+ #
20
+ # kernel_mat_test = Rumale::PairwiseMetric::rbf_kernel(x_test, x_train)
21
+ # mapped_test_samples = kfda.transform(kernel_mat_test)
22
+ #
23
+ # *Reference*
24
+ # - Baudat, G., and Anouar, F., "Generalized Discriminant Analysis using a Kernel Approach," Neural Computation, vol. 12, pp. 2385--2404, 2000.
25
+ class KernelFDA < ::Rumale::Base::Estimator
26
+ include ::Rumale::Base::Transformer
27
+
28
+ # Returns the eigenvectors for embedding.
29
+ # @return [Numo::DFloat] (shape: [n_training_sampes, n_components])
30
+ attr_reader :alphas
31
+
32
+ # Create a new transformer with Kernel FDA.
33
+ #
34
+ # @param n_components [Integer] The number of components.
35
+ # @param reg_param [Float] The regularization parameter.
36
+ def initialize(n_components: nil, reg_param: 1e-8)
37
+ super()
38
+ @params = {
39
+ n_components: n_components,
40
+ reg_param: reg_param
41
+ }
42
+ end
43
+
44
+ # Fit the model with given training data.
45
+ # To execute this method, Numo::Linalg must be loaded.
46
+ #
47
+ # @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
48
+ # The kernel matrix of the training data to be used for fitting the model.
49
+ # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
50
+ # @return [KernelFDA] The learned transformer itself.
51
+ def fit(x, y)
52
+ x = ::Rumale::Validation.check_convert_sample_array(x)
53
+ y = ::Rumale::Validation.check_convert_label_array(y)
54
+ ::Rumale::Validation.check_sample_size(x, y)
55
+ raise ArgumentError, 'Expect the kernel matrix of training data to be square.' unless x.shape[0] == x.shape[1]
56
+ raise 'KernelFDA#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?(warning: false)
57
+
58
+ # initialize some variables.
59
+ n_samples = x.shape[0]
60
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
61
+ n_classes = @classes.size
62
+ n_components = if @params[:n_components].nil?
63
+ [n_samples, n_classes - 1].min
64
+ else
65
+ [n_samples, @params[:n_components]].min
66
+ end
67
+
68
+ # centering
69
+ @row_mean = x.mean(0)
70
+ @all_mean = @row_mean.sum.fdiv(n_samples)
71
+ centered_kernel_mat = x - x.mean(1).expand_dims(1) - @row_mean + @all_mean
72
+
73
+ # calculate between and within scatter matrix.
74
+ class_mat = Numo::DFloat.zeros(n_samples, n_samples)
75
+ @classes.each do |label|
76
+ idx_vec = y.eq(label)
77
+ class_mat += Numo::DFloat.cast(idx_vec).outer(idx_vec) / idx_vec.count
78
+ end
79
+ between_mat = centered_kernel_mat.dot(class_mat).dot(centered_kernel_mat.transpose)
80
+ within_mat = centered_kernel_mat.dot(centered_kernel_mat.transpose) + @params[:reg_param] * Numo::DFloat.eye(n_samples)
81
+
82
+ # calculate projection matrix.
83
+ _, eig_vecs = Numo::Linalg.eigh(
84
+ between_mat, within_mat,
85
+ vals_range: (n_samples - n_components)...n_samples
86
+ )
87
+ @alphas = eig_vecs.reverse(1).dup
88
+ self
89
+ end
90
+
91
+ # Fit the model with training data, and then transform them with the learned model.
92
+ # To execute this method, Numo::Linalg must be loaded.
93
+ #
94
+ # @param x [Numo::DFloat] (shape: [n_samples, n_samples])
95
+ # The kernel matrix of the training data to be used for fitting the model and transformed.
96
+ # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
97
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
98
+ def fit_transform(x, y)
99
+ x = ::Rumale::Validation.check_convert_sample_array(x)
100
+ y = ::Rumale::Validation.check_convert_label_array(y)
101
+ ::Rumale::Validation.check_sample_size(x, y)
102
+
103
+ fit(x, y).transform(x)
104
+ end
105
+
106
+ # Transform the given data with the learned model.
107
+ #
108
+ # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
109
+ # The kernel matrix between testing samples and training samples to be transformed.
110
+ # @return [Numo::DFloat] (shape: [n_testing_samples, n_components]) The transformed data.
111
+ def transform(x)
112
+ x = ::Rumale::Validation.check_convert_sample_array(x)
113
+
114
+ col_mean = x.sum(axis: 1) / @row_mean.shape[0]
115
+ centered_kernel_mat = x - col_mean.expand_dims(1) - @row_mean + @all_mean
116
+ transformed = centered_kernel_mat.dot(@alphas)
117
+ @params[:n_components] == 1 ? transformed[true, 0].dup : transformed
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/validation'
6
+
7
+ module Rumale
8
+ module KernelMachine
9
+ # KernelPCA is a class that implements Kernel Principal Component Analysis.
10
+ #
11
+ # @example
12
+ # require 'numo/linalg/autoloader'
13
+ # require 'rumale/pairwise_metric'
14
+ # require 'rumale/kernel_machine/kernel_pca'
15
+ #
16
+ # kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(training_samples)
17
+ # kpca = Rumale::KernelMachine::KernelPCA.new(n_components: 2)
18
+ # mapped_traininig_samples = kpca.fit_transform(kernel_mat_train)
19
+ #
20
+ # kernel_mat_test = Rumale::PairwiseMetric::rbf_kernel(test_samples, training_samples)
21
+ # mapped_test_samples = kpca.transform(kernel_mat_test)
22
+ #
23
+ # *Reference*
24
+ # - Scholkopf, B., Smola, A., and Muller, K-R., "Nonlinear Component Analysis as a Kernel Eigenvalue Problem," Neural Computation, Vol. 10 (5), pp. 1299--1319, 1998.
25
+ class KernelPCA < ::Rumale::Base::Estimator
26
+ include ::Rumale::Base::Transformer
27
+
28
+ # Returns the eigenvalues of the centered kernel matrix.
29
+ # @return [Numo::DFloat] (shape: [n_components])
30
+ attr_reader :lambdas
31
+
32
+ # Returns the eigenvectors of the centered kernel matrix.
33
+ # @return [Numo::DFloat] (shape: [n_training_sampes, n_components])
34
+ attr_reader :alphas
35
+
36
+ # Create a new transformer with Kernel PCA.
37
+ #
38
+ # @param n_components [Integer] The number of components.
39
+ def initialize(n_components: 2)
40
+ super()
41
+ @params = {
42
+ n_components: n_components
43
+ }
44
+ end
45
+
46
+ # Fit the model with given training data.
47
+ # To execute this method, Numo::Linalg must be loaded.
48
+ #
49
+ # @overload fit(x) -> KernelPCA
50
+ # @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
51
+ # The kernel matrix of the training data to be used for fitting the model.
52
+ # @return [KernelPCA] The learned transformer itself.
53
+ def fit(x, _y = nil)
54
+ x = ::Rumale::Validation.check_convert_sample_array(x)
55
+ raise ArgumentError, 'Expect the kernel matrix of training data to be square.' unless x.shape[0] == x.shape[1]
56
+ raise 'KernelPCA#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?(warning: false)
57
+
58
+ n_samples = x.shape[0]
59
+ @row_mean = x.mean(0)
60
+ @all_mean = @row_mean.sum.fdiv(n_samples)
61
+ centered_kernel_mat = x - x.mean(1).expand_dims(1) - @row_mean + @all_mean
62
+ eig_vals, eig_vecs = Numo::Linalg.eigh(centered_kernel_mat,
63
+ vals_range: (n_samples - @params[:n_components])...n_samples)
64
+ @alphas = eig_vecs.reverse(1).dup
65
+ @lambdas = eig_vals.reverse.dup
66
+ @transform_mat = @alphas.dot((1.0 / Numo::NMath.sqrt(@lambdas)).diag)
67
+ self
68
+ end
69
+
70
+ # Fit the model with training data, and then transform them with the learned model.
71
+ # To execute this method, Numo::Linalg must be loaded.
72
+ #
73
+ # @overload fit_transform(x) -> Numo::DFloat
74
+ # @param x [Numo::DFloat] (shape: [n_samples, n_samples])
75
+ # The kernel matrix of the training data to be used for fitting the model and transformed.
76
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
77
+ def fit_transform(x, _y = nil)
78
+ x = ::Rumale::Validation.check_convert_sample_array(x)
79
+
80
+ fit(x).transform(x)
81
+ end
82
+
83
+ # Transform the given data with the learned model.
84
+ #
85
+ # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
86
+ # The kernel matrix between testing samples and training samples to be transformed.
87
+ # @return [Numo::DFloat] (shape: [n_testing_samples, n_components]) The transformed data.
88
+ def transform(x)
89
+ x = ::Rumale::Validation.check_convert_sample_array(x)
90
+
91
+ col_mean = x.sum(axis: 1) / @row_mean.shape[0]
92
+ centered_kernel_mat = x - col_mean.expand_dims(1) - @row_mean + @all_mean
93
+ transformed = centered_kernel_mat.dot(@transform_mat)
94
+ @params[:n_components] == 1 ? transformed[true, 0].dup : transformed
95
+ end
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/regressor'
5
+ require 'rumale/validation'
6
+
7
+ module Rumale
8
+ module KernelMachine
9
+ # KernelRidge is a class that implements kernel ridge regression.
10
+ #
11
+ # @example
12
+ # require 'numo/linalg/autoloader'
13
+ # require 'rumale/pairwise_metric'
14
+ # require 'rumale/kernel_machine/kernel_ridge'
15
+ #
16
+ # kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(training_samples)
17
+ # kridge = Rumale::KernelMachine::KernelRidge.new(reg_param: 1.0)
18
+ # kridge.fit(kernel_mat_train, traininig_values)
19
+ #
20
+ # kernel_mat_test = Rumale::PairwiseMetric::rbf_kernel(test_samples, training_samples)
21
+ # results = kridge.predict(kernel_mat_test)
22
+ class KernelRidge < ::Rumale::Base::Estimator
23
+ include ::Rumale::Base::Regressor
24
+
25
+ # Return the weight vector.
26
+ # @return [Numo::DFloat] (shape: [n_training_sample, n_outputs])
27
+ attr_reader :weight_vec
28
+
29
+ # Create a new regressor with kernel ridge regression.
30
+ #
31
+ # @param reg_param [Float/Numo::DFloat] The regularization parameter.
32
+ def initialize(reg_param: 1.0)
33
+ super()
34
+ @params = {
35
+ reg_param: reg_param
36
+ }
37
+ end
38
+
39
+ # Fit the model with given training data.
40
+ #
41
+ # @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
42
+ # The kernel matrix of the training data to be used for fitting the model.
43
+ # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The taget values to be used for fitting the model.
44
+ # @return [KernelRidge] The learned regressor itself.
45
+ def fit(x, y)
46
+ x = ::Rumale::Validation.check_convert_sample_array(x)
47
+ y = ::Rumale::Validation.check_convert_target_value_array(y)
48
+ ::Rumale::Validation.check_sample_size(x, y)
49
+ raise ArgumentError, 'Expect the kernel matrix of training data to be square.' unless x.shape[0] == x.shape[1]
50
+ raise 'KernelRidge#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?(warning: false)
51
+
52
+ n_samples = x.shape[0]
53
+
54
+ if @params[:reg_param].is_a?(Float)
55
+ reg_kernel_mat = x + Numo::DFloat.eye(n_samples) * @params[:reg_param]
56
+ @weight_vec = Numo::Linalg.solve(reg_kernel_mat, y, driver: 'sym')
57
+ else
58
+ n_outputs = y.shape[1]
59
+ @weight_vec = Numo::DFloat.zeros(n_samples, n_outputs)
60
+ n_outputs.times do |n|
61
+ reg_kernel_mat = x + Numo::DFloat.eye(n_samples) * @params[:reg_param][n]
62
+ @weight_vec[true, n] = Numo::Linalg.solve(reg_kernel_mat, y[true, n], driver: 'sym')
63
+ end
64
+ end
65
+
66
+ self
67
+ end
68
+
69
+ # Predict values for samples.
70
+ #
71
+ # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
72
+ # The kernel matrix between testing samples and training samples to predict values.
73
+ # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
74
+ def predict(x)
75
+ x = ::Rumale::Validation.check_convert_sample_array(x)
76
+
77
+ x.dot(@weight_vec)
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/classifier'
5
+ require 'rumale/utils'
6
+ require 'rumale/validation'
7
+
8
+ module Rumale
9
+ module KernelMachine
10
+ # KernelRidgeClassifier is a class that implements classifier based-on kernel ridge regression.
11
+ # It learns a classifier by converting labels to target values { -1, 1 } and performing kernel ridge regression.
12
+ #
13
+ # @example
14
+ # require 'numo/linalg/autoloader'
15
+ # require 'rumale/pairwise_metric'
16
+ # require 'rumale/kernel_machine/kernel_ridge_classifier'
17
+ #
18
+ # kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(training_samples)
19
+ # kridge = Rumale::KernelMachine::KernelRidgeClassifier.new(reg_param: 0.5)
20
+ # kridge.fit(kernel_mat_train, traininig_values)
21
+ #
22
+ # kernel_mat_test = Rumale::PairwiseMetric::rbf_kernel(test_samples, training_samples)
23
+ # results = kridge.predict(kernel_mat_test)
24
+ class KernelRidgeClassifier < ::Rumale::Base::Estimator
25
+ include ::Rumale::Base::Classifier
26
+
27
+ # Return the class labels.
28
+ # @return [Numo::Int32] (size: n_classes)
29
+ attr_reader :classes
30
+
31
+ # Return the weight vector.
32
+ # @return [Numo::DFloat] (shape: [n_training_sample, n_classes])
33
+ attr_reader :weight_vec
34
+
35
+ # Create a new regressor with kernel ridge classifier.
36
+ #
37
+ # @param reg_param [Float/Numo::DFloat] The regularization parameter.
38
+ def initialize(reg_param: 1.0)
39
+ super()
40
+ @params = {
41
+ reg_param: reg_param
42
+ }
43
+ end
44
+
45
+ # Fit the model with given training data.
46
+ #
47
+ # @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
48
+ # The kernel matrix of the training data to be used for fitting the model.
49
+ # @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
50
+ # @return [KernelRidgeClassifier] The learned classifier itself.
51
+ def fit(x, y)
52
+ x = ::Rumale::Validation.check_convert_sample_array(x)
53
+ y = ::Rumale::Validation.check_convert_label_array(y)
54
+ ::Rumale::Validation.check_sample_size(x, y)
55
+ raise ArgumentError, 'Expect the kernel matrix of training data to be square.' unless x.shape[0] == x.shape[1]
56
+ raise 'KernelRidgeClassifier#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?(warning: false)
57
+
58
+ y_encoded = Numo::DFloat.cast(::Rumale::Utils.binarize_labels(y)) * 2 - 1
59
+ @classes = Numo::NArray[*y.to_a.uniq.sort]
60
+
61
+ n_samples = x.shape[0]
62
+ reg_kernel_mat = x + Numo::DFloat.eye(n_samples) * @params[:reg_param]
63
+ @weight_vec = Numo::Linalg.solve(reg_kernel_mat, y_encoded, driver: 'sym')
64
+
65
+ self
66
+ end
67
+
68
+ # Calculate confidence scores for samples.
69
+ #
70
+ # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
71
+ # The kernel matrix between testing samples and training samples to predict values.
72
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
73
+ def decision_function(x)
74
+ x = ::Rumale::Validation.check_convert_sample_array(x)
75
+
76
+ x.dot(@weight_vec)
77
+ end
78
+
79
+ # Predict class labels for samples.
80
+ #
81
+ # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
82
+ # The kernel matrix between testing samples and training samples to predict the labels.
83
+ # @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
84
+ def predict(x)
85
+ x = ::Rumale::Validation.check_convert_sample_array(x)
86
+
87
+ scores = decision_function(x)
88
+ n_samples, n_classes = scores.shape
89
+ label_ids = scores.max_index(axis: 1) - Numo::Int32.new(n_samples).seq * n_classes
90
+ @classes[label_ids].dup
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,187 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/classifier'
5
+ require 'rumale/probabilistic_output'
6
+ require 'rumale/validation'
7
+
8
+ module Rumale
9
+ module KernelMachine
10
+ # KernelSVC is a class that implements (Nonlinear) Kernel Support Vector Classifier
11
+ # with stochastic gradient descent (SGD) optimization.
12
+ # For multiclass classification problem, it uses one-vs-the-rest strategy.
13
+ #
14
+ # @note
15
+ # Rumale::SVM provides kernel support vector classifier based on LIBSVM.
16
+ # If you prefer execution speed, you should use Rumale::SVM::SVC.
17
+ # https://github.com/yoshoku/rumale-svm
18
+ #
19
+ # @example
20
+ # require 'rumale/pairwise_metric'
21
+ # require 'rumale/kernel_machine/kernel_svc'
22
+ #
23
+ # training_kernel_matrix = Rumale::PairwiseMetric::rbf_kernel(training_samples)
24
+ # estimator =
25
+ # Rumale::KernelMachine::KernelSVC.new(reg_param: 1.0, max_iter: 1000, random_seed: 1)
26
+ # estimator.fit(training_kernel_matrix, traininig_labels)
27
+ # testing_kernel_matrix = Rumale::PairwiseMetric::rbf_kernel(testing_samples, training_samples)
28
+ # results = estimator.predict(testing_kernel_matrix)
29
+ #
30
+ # *Reference*
31
+ # - Shalev-Shwartz, S., Singer, Y., Srebro, N., and Cotter, A., "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
32
+ class KernelSVC < ::Rumale::Base::Estimator
33
+ include ::Rumale::Base::Classifier
34
+
35
+ # Return the weight vector for Kernel SVC.
36
+ # @return [Numo::DFloat] (shape: [n_classes, n_trainig_sample])
37
+ attr_reader :weight_vec
38
+
39
+ # Return the class labels.
40
+ # @return [Numo::Int32] (shape: [n_classes])
41
+ attr_reader :classes
42
+
43
+ # Return the random generator for performing random sampling.
44
+ # @return [Random]
45
+ attr_reader :rng
46
+
47
+ # Create a new classifier with Kernel Support Vector Machine by the SGD optimization.
48
+ #
49
+ # @param reg_param [Float] The regularization parameter.
50
+ # @param max_iter [Integer] The maximum number of iterations.
51
+ # @param probability [Boolean] The flag indicating whether to perform probability estimation.
52
+ # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
53
+ # If nil is given, the methods do not execute in parallel.
54
+ # If zero or less is given, it becomes equal to the number of processors.
55
+ # This parameter is ignored if the Parallel gem is not loaded.
56
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
57
+ def initialize(reg_param: 1.0, max_iter: 1000, probability: false, n_jobs: nil, random_seed: nil)
58
+ super()
59
+ @params = {
60
+ reg_param: reg_param,
61
+ max_iter: max_iter,
62
+ probability: probability,
63
+ n_jobs: n_jobs,
64
+ random_seed: (random_seed || srand)
65
+ }
66
+ @rng = Random.new(@params[:random_seed])
67
+ end
68
+
69
+ # Fit the model with given training data.
70
+ #
71
+ # @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
72
+ # The kernel matrix of the training data to be used for fitting the model.
73
+ # @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
74
+ # @return [KernelSVC] The learned classifier itself.
75
+ def fit(x, y)
76
+ x = ::Rumale::Validation.check_convert_sample_array(x)
77
+ y = ::Rumale::Validation.check_convert_label_array(y)
78
+ ::Rumale::Validation.check_sample_size(x, y)
79
+
80
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
81
+ n_classes = @classes.size
82
+ n_features = x.shape[1]
83
+
84
+ if n_classes > 2
85
+ @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
86
+ @prob_param = Numo::DFloat.zeros(n_classes, 2)
87
+ models = if enable_parallel?
88
+ parallel_map(n_classes) do |n|
89
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
90
+ partial_fit(x, bin_y)
91
+ end
92
+ else
93
+ Array.new(n_classes) do |n|
94
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
95
+ partial_fit(x, bin_y)
96
+ end
97
+ end
98
+ models.each_with_index { |model, n| @weight_vec[n, true], @prob_param[n, true] = model }
99
+ else
100
+ negative_label = y.to_a.uniq.min
101
+ bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
102
+ @weight_vec, @prob_param = partial_fit(x, bin_y)
103
+ end
104
+
105
+ self
106
+ end
107
+
108
+ # Calculate confidence scores for samples.
109
+ #
110
+ # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
111
+ # The kernel matrix between testing samples and training samples to compute the scores.
112
+ # @return [Numo::DFloat] (shape: [n_testing_samples, n_classes]) Confidence score per sample.
113
+ def decision_function(x)
114
+ x = ::Rumale::Validation.check_convert_sample_array(x)
115
+
116
+ x.dot(@weight_vec.transpose)
117
+ end
118
+
119
+ # Predict class labels for samples.
120
+ #
121
+ # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
122
+ # The kernel matrix between testing samples and training samples to predict the labels.
123
+ # @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
124
+ def predict(x)
125
+ x = ::Rumale::Validation.check_convert_sample_array(x)
126
+
127
+ return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
128
+
129
+ n_samples, = x.shape
130
+ decision_values = decision_function(x)
131
+ predicted = if enable_parallel?
132
+ parallel_map(n_samples) { |n| @classes[decision_values[n, true].max_index] }
133
+ else
134
+ Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] }
135
+ end
136
+ Numo::Int32.asarray(predicted)
137
+ end
138
+
139
+ # Predict probability for samples.
140
+ #
141
+ # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
142
+ # The kernel matrix between testing samples and training samples to predict the labels.
143
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
144
+ def predict_proba(x)
145
+ x = ::Rumale::Validation.check_convert_sample_array(x)
146
+
147
+ if @classes.size > 2
148
+ probs = 1.0 / (Numo::NMath.exp(@prob_param[true, 0] * decision_function(x) + @prob_param[true, 1]) + 1.0)
149
+ return (probs.transpose / probs.sum(axis: 1)).transpose.dup
150
+ end
151
+
152
+ n_samples, = x.shape
153
+ probs = Numo::DFloat.zeros(n_samples, 2)
154
+ probs[true, 1] = 1.0 / (Numo::NMath.exp(@prob_param[0] * decision_function(x) + @prob_param[1]) + 1.0)
155
+ probs[true, 0] = 1.0 - probs[true, 1]
156
+ probs
157
+ end
158
+
159
+ private
160
+
161
+ def partial_fit(x, bin_y)
162
+ # Initialize some variables.
163
+ n_training_samples = x.shape[0]
164
+ rand_ids = []
165
+ weight_vec = Numo::DFloat.zeros(n_training_samples)
166
+ sub_rng = @rng.dup
167
+ # Start optimization.
168
+ @params[:max_iter].times do |t|
169
+ # random sampling
170
+ rand_ids = Array(0...n_training_samples).shuffle(random: sub_rng) if rand_ids.empty?
171
+ target_id = rand_ids.shift
172
+ # update the weight vector
173
+ func = (weight_vec * bin_y).dot(x[target_id, true].transpose).to_f
174
+ func *= bin_y[target_id] / (@params[:reg_param] * (t + 1))
175
+ weight_vec[target_id] += 1.0 if func < 1.0
176
+ end
177
+ w = weight_vec * bin_y
178
+ p = if @params[:probability]
179
+ ::Rumale::ProbabilisticOutput.fit_sigmoid(x.dot(w), bin_y)
180
+ else
181
+ Numo::DFloat[1, 0]
182
+ end
183
+ [w, p]
184
+ end
185
+ end
186
+ end
187
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Rumale is a machine learning library in Ruby.
4
+ module Rumale
5
+ # This module consists of the classes that implement kernel method-based estimator.
6
+ module KernelMachine
7
+ # @!visibility private
8
+ VERSION = '0.24.0'
9
+ end
10
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'numo/narray'
4
+
5
+ require_relative 'kernel_machine/kernel_fda'
6
+ require_relative 'kernel_machine/kernel_pca'
7
+ require_relative 'kernel_machine/kernel_ridge'
8
+ require_relative 'kernel_machine/kernel_ridge_classifier'
9
+ require_relative 'kernel_machine/kernel_svc'
10
+ require_relative 'kernel_machine/version'
metadata ADDED
@@ -0,0 +1,89 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rumale-kernel_machine
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.24.0
5
+ platform: ruby
6
+ authors:
7
+ - yoshoku
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2022-12-31 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: numo-narray
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.9.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 0.9.1
27
+ - !ruby/object:Gem::Dependency
28
+ name: rumale-core
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.24.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.24.0
41
+ description: |
42
+ Rumale::KernelMachine provides kernel method-based algorithms,
43
+ such as Kernel Support Vector Machine, Kernel Principal Componenet Analysis, and Kernel Ridge Regression,
44
+ with Rumale interface.
45
+ email:
46
+ - yoshoku@outlook.com
47
+ executables: []
48
+ extensions: []
49
+ extra_rdoc_files: []
50
+ files:
51
+ - LICENSE.txt
52
+ - README.md
53
+ - lib/rumale/kernel_machine.rb
54
+ - lib/rumale/kernel_machine/kernel_fda.rb
55
+ - lib/rumale/kernel_machine/kernel_pca.rb
56
+ - lib/rumale/kernel_machine/kernel_ridge.rb
57
+ - lib/rumale/kernel_machine/kernel_ridge_classifier.rb
58
+ - lib/rumale/kernel_machine/kernel_svc.rb
59
+ - lib/rumale/kernel_machine/version.rb
60
+ homepage: https://github.com/yoshoku/rumale
61
+ licenses:
62
+ - BSD-3-Clause
63
+ metadata:
64
+ homepage_uri: https://github.com/yoshoku/rumale
65
+ source_code_uri: https://github.com/yoshoku/rumale/tree/main/rumale-kernel_machine
66
+ changelog_uri: https://github.com/yoshoku/rumale/blob/main/CHANGELOG.md
67
+ documentation_uri: https://yoshoku.github.io/rumale/doc/
68
+ rubygems_mfa_required: 'true'
69
+ post_install_message:
70
+ rdoc_options: []
71
+ require_paths:
72
+ - lib
73
+ required_ruby_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ">="
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ required_rubygems_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ requirements: []
84
+ rubygems_version: 3.3.26
85
+ signing_key:
86
+ specification_version: 4
87
+ summary: Rumale::KernelMachine provides kernel method-based algorithms with Rumale
88
+ interface.
89
+ test_files: []