rumale 0.23.3 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.txt +5 -1
- data/README.md +3 -288
- data/lib/rumale/version.rb +1 -1
- data/lib/rumale.rb +20 -131
- metadata +252 -150
- data/CHANGELOG.md +0 -643
- data/CODE_OF_CONDUCT.md +0 -74
- data/ext/rumale/extconf.rb +0 -37
- data/ext/rumale/rumaleext.c +0 -545
- data/ext/rumale/rumaleext.h +0 -12
- data/lib/rumale/base/base_estimator.rb +0 -49
- data/lib/rumale/base/classifier.rb +0 -36
- data/lib/rumale/base/cluster_analyzer.rb +0 -31
- data/lib/rumale/base/evaluator.rb +0 -17
- data/lib/rumale/base/regressor.rb +0 -36
- data/lib/rumale/base/splitter.rb +0 -21
- data/lib/rumale/base/transformer.rb +0 -22
- data/lib/rumale/clustering/dbscan.rb +0 -123
- data/lib/rumale/clustering/gaussian_mixture.rb +0 -218
- data/lib/rumale/clustering/hdbscan.rb +0 -291
- data/lib/rumale/clustering/k_means.rb +0 -122
- data/lib/rumale/clustering/k_medoids.rb +0 -141
- data/lib/rumale/clustering/mini_batch_k_means.rb +0 -139
- data/lib/rumale/clustering/power_iteration.rb +0 -127
- data/lib/rumale/clustering/single_linkage.rb +0 -203
- data/lib/rumale/clustering/snn.rb +0 -76
- data/lib/rumale/clustering/spectral_clustering.rb +0 -115
- data/lib/rumale/dataset.rb +0 -246
- data/lib/rumale/decomposition/factor_analysis.rb +0 -150
- data/lib/rumale/decomposition/fast_ica.rb +0 -188
- data/lib/rumale/decomposition/nmf.rb +0 -124
- data/lib/rumale/decomposition/pca.rb +0 -159
- data/lib/rumale/ensemble/ada_boost_classifier.rb +0 -179
- data/lib/rumale/ensemble/ada_boost_regressor.rb +0 -160
- data/lib/rumale/ensemble/extra_trees_classifier.rb +0 -139
- data/lib/rumale/ensemble/extra_trees_regressor.rb +0 -125
- data/lib/rumale/ensemble/gradient_boosting_classifier.rb +0 -306
- data/lib/rumale/ensemble/gradient_boosting_regressor.rb +0 -237
- data/lib/rumale/ensemble/random_forest_classifier.rb +0 -189
- data/lib/rumale/ensemble/random_forest_regressor.rb +0 -153
- data/lib/rumale/ensemble/stacking_classifier.rb +0 -215
- data/lib/rumale/ensemble/stacking_regressor.rb +0 -163
- data/lib/rumale/ensemble/voting_classifier.rb +0 -126
- data/lib/rumale/ensemble/voting_regressor.rb +0 -82
- data/lib/rumale/evaluation_measure/accuracy.rb +0 -29
- data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +0 -74
- data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +0 -56
- data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +0 -53
- data/lib/rumale/evaluation_measure/explained_variance_score.rb +0 -39
- data/lib/rumale/evaluation_measure/f_score.rb +0 -50
- data/lib/rumale/evaluation_measure/function.rb +0 -147
- data/lib/rumale/evaluation_measure/log_loss.rb +0 -45
- data/lib/rumale/evaluation_measure/mean_absolute_error.rb +0 -29
- data/lib/rumale/evaluation_measure/mean_squared_error.rb +0 -29
- data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +0 -29
- data/lib/rumale/evaluation_measure/median_absolute_error.rb +0 -30
- data/lib/rumale/evaluation_measure/mutual_information.rb +0 -49
- data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +0 -53
- data/lib/rumale/evaluation_measure/precision.rb +0 -50
- data/lib/rumale/evaluation_measure/precision_recall.rb +0 -96
- data/lib/rumale/evaluation_measure/purity.rb +0 -40
- data/lib/rumale/evaluation_measure/r2_score.rb +0 -43
- data/lib/rumale/evaluation_measure/recall.rb +0 -50
- data/lib/rumale/evaluation_measure/roc_auc.rb +0 -130
- data/lib/rumale/evaluation_measure/silhouette_score.rb +0 -82
- data/lib/rumale/feature_extraction/feature_hasher.rb +0 -110
- data/lib/rumale/feature_extraction/hash_vectorizer.rb +0 -155
- data/lib/rumale/feature_extraction/tfidf_transformer.rb +0 -113
- data/lib/rumale/kernel_approximation/nystroem.rb +0 -126
- data/lib/rumale/kernel_approximation/rbf.rb +0 -102
- data/lib/rumale/kernel_machine/kernel_fda.rb +0 -120
- data/lib/rumale/kernel_machine/kernel_pca.rb +0 -97
- data/lib/rumale/kernel_machine/kernel_ridge.rb +0 -82
- data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +0 -92
- data/lib/rumale/kernel_machine/kernel_svc.rb +0 -193
- data/lib/rumale/linear_model/base_sgd.rb +0 -285
- data/lib/rumale/linear_model/elastic_net.rb +0 -119
- data/lib/rumale/linear_model/lasso.rb +0 -115
- data/lib/rumale/linear_model/linear_regression.rb +0 -201
- data/lib/rumale/linear_model/logistic_regression.rb +0 -275
- data/lib/rumale/linear_model/nnls.rb +0 -137
- data/lib/rumale/linear_model/ridge.rb +0 -209
- data/lib/rumale/linear_model/svc.rb +0 -213
- data/lib/rumale/linear_model/svr.rb +0 -132
- data/lib/rumale/manifold/mds.rb +0 -155
- data/lib/rumale/manifold/tsne.rb +0 -222
- data/lib/rumale/metric_learning/fisher_discriminant_analysis.rb +0 -113
- data/lib/rumale/metric_learning/mlkr.rb +0 -161
- data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +0 -167
- data/lib/rumale/model_selection/cross_validation.rb +0 -125
- data/lib/rumale/model_selection/function.rb +0 -42
- data/lib/rumale/model_selection/grid_search_cv.rb +0 -225
- data/lib/rumale/model_selection/group_k_fold.rb +0 -93
- data/lib/rumale/model_selection/group_shuffle_split.rb +0 -115
- data/lib/rumale/model_selection/k_fold.rb +0 -81
- data/lib/rumale/model_selection/shuffle_split.rb +0 -90
- data/lib/rumale/model_selection/stratified_k_fold.rb +0 -99
- data/lib/rumale/model_selection/stratified_shuffle_split.rb +0 -118
- data/lib/rumale/model_selection/time_series_split.rb +0 -91
- data/lib/rumale/multiclass/one_vs_rest_classifier.rb +0 -83
- data/lib/rumale/naive_bayes/base_naive_bayes.rb +0 -47
- data/lib/rumale/naive_bayes/bernoulli_nb.rb +0 -82
- data/lib/rumale/naive_bayes/complement_nb.rb +0 -85
- data/lib/rumale/naive_bayes/gaussian_nb.rb +0 -69
- data/lib/rumale/naive_bayes/multinomial_nb.rb +0 -74
- data/lib/rumale/naive_bayes/negation_nb.rb +0 -71
- data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +0 -133
- data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +0 -108
- data/lib/rumale/nearest_neighbors/vp_tree.rb +0 -132
- data/lib/rumale/neural_network/adam.rb +0 -56
- data/lib/rumale/neural_network/base_mlp.rb +0 -248
- data/lib/rumale/neural_network/mlp_classifier.rb +0 -120
- data/lib/rumale/neural_network/mlp_regressor.rb +0 -90
- data/lib/rumale/pairwise_metric.rb +0 -152
- data/lib/rumale/pipeline/feature_union.rb +0 -69
- data/lib/rumale/pipeline/pipeline.rb +0 -175
- data/lib/rumale/preprocessing/bin_discretizer.rb +0 -93
- data/lib/rumale/preprocessing/binarizer.rb +0 -60
- data/lib/rumale/preprocessing/kernel_calculator.rb +0 -92
- data/lib/rumale/preprocessing/l1_normalizer.rb +0 -62
- data/lib/rumale/preprocessing/l2_normalizer.rb +0 -63
- data/lib/rumale/preprocessing/label_binarizer.rb +0 -89
- data/lib/rumale/preprocessing/label_encoder.rb +0 -79
- data/lib/rumale/preprocessing/max_abs_scaler.rb +0 -61
- data/lib/rumale/preprocessing/max_normalizer.rb +0 -62
- data/lib/rumale/preprocessing/min_max_scaler.rb +0 -76
- data/lib/rumale/preprocessing/one_hot_encoder.rb +0 -100
- data/lib/rumale/preprocessing/ordinal_encoder.rb +0 -109
- data/lib/rumale/preprocessing/polynomial_features.rb +0 -109
- data/lib/rumale/preprocessing/standard_scaler.rb +0 -71
- data/lib/rumale/probabilistic_output.rb +0 -114
- data/lib/rumale/tree/base_decision_tree.rb +0 -150
- data/lib/rumale/tree/decision_tree_classifier.rb +0 -150
- data/lib/rumale/tree/decision_tree_regressor.rb +0 -116
- data/lib/rumale/tree/extra_tree_classifier.rb +0 -107
- data/lib/rumale/tree/extra_tree_regressor.rb +0 -94
- data/lib/rumale/tree/gradient_tree_regressor.rb +0 -202
- data/lib/rumale/tree/node.rb +0 -39
- data/lib/rumale/utils.rb +0 -42
- data/lib/rumale/validation.rb +0 -128
- data/lib/rumale/values.rb +0 -13
|
@@ -1,97 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'rumale/base/base_estimator'
|
|
4
|
-
require 'rumale/base/transformer'
|
|
5
|
-
|
|
6
|
-
module Rumale
|
|
7
|
-
module KernelMachine
|
|
8
|
-
# KernelPCA is a class that implements Kernel Principal Component Analysis.
|
|
9
|
-
#
|
|
10
|
-
# @example
|
|
11
|
-
# require 'numo/linalg/autoloader'
|
|
12
|
-
#
|
|
13
|
-
# kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(training_samples)
|
|
14
|
-
# kpca = Rumale::KernelMachine::KernelPCA.new(n_components: 2)
|
|
15
|
-
# mapped_traininig_samples = kpca.fit_transform(kernel_mat_train)
|
|
16
|
-
#
|
|
17
|
-
# kernel_mat_test = Rumale::PairwiseMetric::rbf_kernel(test_samples, training_samples)
|
|
18
|
-
# mapped_test_samples = kpca.transform(kernel_mat_test)
|
|
19
|
-
#
|
|
20
|
-
# *Reference*
|
|
21
|
-
# - Scholkopf, B., Smola, A., and Muller, K-R., "Nonlinear Component Analysis as a Kernel Eigenvalue Problem," Neural Computation, Vol. 10 (5), pp. 1299--1319, 1998.
|
|
22
|
-
class KernelPCA
|
|
23
|
-
include Base::BaseEstimator
|
|
24
|
-
include Base::Transformer
|
|
25
|
-
|
|
26
|
-
# Returns the eigenvalues of the centered kernel matrix.
|
|
27
|
-
# @return [Numo::DFloat] (shape: [n_components])
|
|
28
|
-
attr_reader :lambdas
|
|
29
|
-
|
|
30
|
-
# Returns the eigenvectors of the centered kernel matrix.
|
|
31
|
-
# @return [Numo::DFloat] (shape: [n_training_sampes, n_components])
|
|
32
|
-
attr_reader :alphas
|
|
33
|
-
|
|
34
|
-
# Create a new transformer with Kernel PCA.
|
|
35
|
-
#
|
|
36
|
-
# @param n_components [Integer] The number of components.
|
|
37
|
-
def initialize(n_components: 2)
|
|
38
|
-
check_params_numeric(n_components: n_components)
|
|
39
|
-
@params = {}
|
|
40
|
-
@params[:n_components] = n_components
|
|
41
|
-
@alphas = nil
|
|
42
|
-
@lambdas = nil
|
|
43
|
-
@transform_mat = nil
|
|
44
|
-
@row_mean = nil
|
|
45
|
-
@all_mean = nil
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
# Fit the model with given training data.
|
|
49
|
-
# To execute this method, Numo::Linalg must be loaded.
|
|
50
|
-
#
|
|
51
|
-
# @overload fit(x) -> KernelPCA
|
|
52
|
-
# @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
|
|
53
|
-
# The kernel matrix of the training data to be used for fitting the model.
|
|
54
|
-
# @return [KernelPCA] The learned transformer itself.
|
|
55
|
-
def fit(x, _y = nil)
|
|
56
|
-
x = check_convert_sample_array(x)
|
|
57
|
-
raise ArgumentError, 'Expect the kernel matrix of training data to be square.' unless x.shape[0] == x.shape[1]
|
|
58
|
-
raise 'KernelPCA#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
|
|
59
|
-
|
|
60
|
-
n_samples = x.shape[0]
|
|
61
|
-
@row_mean = x.mean(0)
|
|
62
|
-
@all_mean = @row_mean.sum.fdiv(n_samples)
|
|
63
|
-
centered_kernel_mat = x - x.mean(1).expand_dims(1) - @row_mean + @all_mean
|
|
64
|
-
eig_vals, eig_vecs = Numo::Linalg.eigh(centered_kernel_mat, vals_range: (n_samples - @params[:n_components])...n_samples)
|
|
65
|
-
@alphas = eig_vecs.reverse(1).dup
|
|
66
|
-
@lambdas = eig_vals.reverse.dup
|
|
67
|
-
@transform_mat = @alphas.dot((1.0 / Numo::NMath.sqrt(@lambdas)).diag)
|
|
68
|
-
self
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
# Fit the model with training data, and then transform them with the learned model.
|
|
72
|
-
# To execute this method, Numo::Linalg must be loaded.
|
|
73
|
-
#
|
|
74
|
-
# @overload fit_transform(x) -> Numo::DFloat
|
|
75
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_samples])
|
|
76
|
-
# The kernel matrix of the training data to be used for fitting the model and transformed.
|
|
77
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
|
78
|
-
def fit_transform(x, _y = nil)
|
|
79
|
-
x = check_convert_sample_array(x)
|
|
80
|
-
fit(x).transform(x)
|
|
81
|
-
end
|
|
82
|
-
|
|
83
|
-
# Transform the given data with the learned model.
|
|
84
|
-
#
|
|
85
|
-
# @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
|
|
86
|
-
# The kernel matrix between testing samples and training samples to be transformed.
|
|
87
|
-
# @return [Numo::DFloat] (shape: [n_testing_samples, n_components]) The transformed data.
|
|
88
|
-
def transform(x)
|
|
89
|
-
x = check_convert_sample_array(x)
|
|
90
|
-
col_mean = x.sum(1) / @row_mean.shape[0]
|
|
91
|
-
centered_kernel_mat = x - col_mean.expand_dims(1) - @row_mean + @all_mean
|
|
92
|
-
transformed = centered_kernel_mat.dot(@transform_mat)
|
|
93
|
-
@params[:n_components] == 1 ? transformed[true, 0].dup : transformed
|
|
94
|
-
end
|
|
95
|
-
end
|
|
96
|
-
end
|
|
97
|
-
end
|
|
@@ -1,82 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'rumale/base/base_estimator'
|
|
4
|
-
require 'rumale/base/regressor'
|
|
5
|
-
|
|
6
|
-
module Rumale
|
|
7
|
-
module KernelMachine
|
|
8
|
-
# KernelRidge is a class that implements kernel ridge regression.
|
|
9
|
-
#
|
|
10
|
-
# @example
|
|
11
|
-
# require 'numo/linalg/autoloader'
|
|
12
|
-
#
|
|
13
|
-
# kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(training_samples)
|
|
14
|
-
# kridge = Rumale::KernelMachine::KernelRidge.new(reg_param: 1.0)
|
|
15
|
-
# kridge.fit(kernel_mat_train, traininig_values)
|
|
16
|
-
#
|
|
17
|
-
# kernel_mat_test = Rumale::PairwiseMetric::rbf_kernel(test_samples, training_samples)
|
|
18
|
-
# results = kridge.predict(kernel_mat_test)
|
|
19
|
-
class KernelRidge
|
|
20
|
-
include Base::BaseEstimator
|
|
21
|
-
include Base::Regressor
|
|
22
|
-
|
|
23
|
-
# Return the weight vector.
|
|
24
|
-
# @return [Numo::DFloat] (shape: [n_training_sample, n_outputs])
|
|
25
|
-
attr_reader :weight_vec
|
|
26
|
-
|
|
27
|
-
# Create a new regressor with kernel ridge regression.
|
|
28
|
-
#
|
|
29
|
-
# @param reg_param [Float/Numo::DFloat] The regularization parameter.
|
|
30
|
-
def initialize(reg_param: 1.0)
|
|
31
|
-
raise TypeError, 'Expect class of reg_param to be Float or Numo::DFloat' unless reg_param.is_a?(Float) || reg_param.is_a?(Numo::DFloat)
|
|
32
|
-
raise ArgumentError, 'Expect reg_param array to be 1-D arrray' if reg_param.is_a?(Numo::DFloat) && reg_param.shape.size != 1
|
|
33
|
-
|
|
34
|
-
@params = {}
|
|
35
|
-
@params[:reg_param] = reg_param
|
|
36
|
-
@weight_vec = nil
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
# Fit the model with given training data.
|
|
40
|
-
#
|
|
41
|
-
# @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
|
|
42
|
-
# The kernel matrix of the training data to be used for fitting the model.
|
|
43
|
-
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The taget values to be used for fitting the model.
|
|
44
|
-
# @return [KernelRidge] The learned regressor itself.
|
|
45
|
-
def fit(x, y)
|
|
46
|
-
x = check_convert_sample_array(x)
|
|
47
|
-
y = check_convert_tvalue_array(y)
|
|
48
|
-
check_sample_tvalue_size(x, y)
|
|
49
|
-
raise ArgumentError, 'Expect the kernel matrix of training data to be square.' unless x.shape[0] == x.shape[1]
|
|
50
|
-
raise 'KernelRidge#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
|
|
51
|
-
|
|
52
|
-
n_samples = x.shape[0]
|
|
53
|
-
|
|
54
|
-
if @params[:reg_param].is_a?(Float)
|
|
55
|
-
reg_kernel_mat = x + Numo::DFloat.eye(n_samples) * @params[:reg_param]
|
|
56
|
-
@weight_vec = Numo::Linalg.solve(reg_kernel_mat, y, driver: 'sym')
|
|
57
|
-
else
|
|
58
|
-
raise ArgumentError, 'Expect y and reg_param to have the same number of elements.' unless y.shape[1] == @params[:reg_param].shape[0]
|
|
59
|
-
|
|
60
|
-
n_outputs = y.shape[1]
|
|
61
|
-
@weight_vec = Numo::DFloat.zeros(n_samples, n_outputs)
|
|
62
|
-
n_outputs.times do |n|
|
|
63
|
-
reg_kernel_mat = x + Numo::DFloat.eye(n_samples) * @params[:reg_param][n]
|
|
64
|
-
@weight_vec[true, n] = Numo::Linalg.solve(reg_kernel_mat, y[true, n], driver: 'sym')
|
|
65
|
-
end
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
self
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
# Predict values for samples.
|
|
72
|
-
#
|
|
73
|
-
# @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
|
|
74
|
-
# The kernel matrix between testing samples and training samples to predict values.
|
|
75
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
|
|
76
|
-
def predict(x)
|
|
77
|
-
x = check_convert_sample_array(x)
|
|
78
|
-
x.dot(@weight_vec)
|
|
79
|
-
end
|
|
80
|
-
end
|
|
81
|
-
end
|
|
82
|
-
end
|
|
@@ -1,92 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'rumale/base/base_estimator'
|
|
4
|
-
require 'rumale/base/classifier'
|
|
5
|
-
require 'rumale/preprocessing/label_binarizer'
|
|
6
|
-
|
|
7
|
-
module Rumale
|
|
8
|
-
module KernelMachine
|
|
9
|
-
# KernelRidgeClassifier is a class that implements classifier based-on kernel ridge regression.
|
|
10
|
-
# It learns a classifier by converting labels to target values { -1, 1 } and performing kernel ridge regression.
|
|
11
|
-
#
|
|
12
|
-
# @example
|
|
13
|
-
# require 'numo/linalg/autoloader'
|
|
14
|
-
# require 'rumale'
|
|
15
|
-
#
|
|
16
|
-
# kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(training_samples)
|
|
17
|
-
# kridge = Rumale::KernelMachine::KernelRidgeClassifier.new(reg_param: 0.5)
|
|
18
|
-
# kridge.fit(kernel_mat_train, traininig_values)
|
|
19
|
-
#
|
|
20
|
-
# kernel_mat_test = Rumale::PairwiseMetric::rbf_kernel(test_samples, training_samples)
|
|
21
|
-
# results = kridge.predict(kernel_mat_test)
|
|
22
|
-
class KernelRidgeClassifier
|
|
23
|
-
include Base::BaseEstimator
|
|
24
|
-
include Base::Classifier
|
|
25
|
-
|
|
26
|
-
# Return the class labels.
|
|
27
|
-
# @return [Numo::Int32] (size: n_classes)
|
|
28
|
-
attr_reader :classes
|
|
29
|
-
|
|
30
|
-
# Return the weight vector.
|
|
31
|
-
# @return [Numo::DFloat] (shape: [n_training_sample, n_classes])
|
|
32
|
-
attr_reader :weight_vec
|
|
33
|
-
|
|
34
|
-
# Create a new regressor with kernel ridge classifier.
|
|
35
|
-
#
|
|
36
|
-
# @param reg_param [Float/Numo::DFloat] The regularization parameter.
|
|
37
|
-
def initialize(reg_param: 1.0)
|
|
38
|
-
@params = {}
|
|
39
|
-
@params[:reg_param] = reg_param
|
|
40
|
-
@classes = nil
|
|
41
|
-
@weight_vec = nil
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
# Fit the model with given training data.
|
|
45
|
-
#
|
|
46
|
-
# @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
|
|
47
|
-
# The kernel matrix of the training data to be used for fitting the model.
|
|
48
|
-
# @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
|
|
49
|
-
# @return [KernelRidgeClassifier] The learned classifier itself.
|
|
50
|
-
def fit(x, y)
|
|
51
|
-
x = check_convert_sample_array(x)
|
|
52
|
-
y = check_convert_label_array(y)
|
|
53
|
-
check_sample_label_size(x, y)
|
|
54
|
-
raise ArgumentError, 'Expect the kernel matrix of training data to be square.' unless x.shape[0] == x.shape[1]
|
|
55
|
-
raise 'KernelRidgeClassifier#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
|
|
56
|
-
|
|
57
|
-
@encoder = Rumale::Preprocessing::LabelBinarizer.new
|
|
58
|
-
y_encoded = Numo::DFloat.cast(@encoder.fit_transform(y)) * 2 - 1
|
|
59
|
-
@classes = Numo::NArray[*@encoder.classes]
|
|
60
|
-
|
|
61
|
-
n_samples = x.shape[0]
|
|
62
|
-
reg_kernel_mat = x + Numo::DFloat.eye(n_samples) * @params[:reg_param]
|
|
63
|
-
@weight_vec = Numo::Linalg.solve(reg_kernel_mat, y_encoded, driver: 'sym')
|
|
64
|
-
|
|
65
|
-
self
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
# Calculate confidence scores for samples.
|
|
69
|
-
#
|
|
70
|
-
# @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
|
|
71
|
-
# The kernel matrix between testing samples and training samples to predict values.
|
|
72
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
|
|
73
|
-
def decision_function(x)
|
|
74
|
-
x = check_convert_sample_array(x)
|
|
75
|
-
x.dot(@weight_vec)
|
|
76
|
-
end
|
|
77
|
-
|
|
78
|
-
# Predict class labels for samples.
|
|
79
|
-
#
|
|
80
|
-
# @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
|
|
81
|
-
# The kernel matrix between testing samples and training samples to predict the labels.
|
|
82
|
-
# @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
|
|
83
|
-
def predict(x)
|
|
84
|
-
x = check_convert_sample_array(x)
|
|
85
|
-
scores = decision_function(x)
|
|
86
|
-
n_samples, n_classes = scores.shape
|
|
87
|
-
label_ids = scores.max_index(axis: 1) - Numo::Int32.new(n_samples).seq * n_classes
|
|
88
|
-
@classes[label_ids].dup
|
|
89
|
-
end
|
|
90
|
-
end
|
|
91
|
-
end
|
|
92
|
-
end
|
|
@@ -1,193 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'rumale/base/base_estimator'
|
|
4
|
-
require 'rumale/base/classifier'
|
|
5
|
-
require 'rumale/probabilistic_output'
|
|
6
|
-
|
|
7
|
-
module Rumale
|
|
8
|
-
# This module consists of the classes that implement kernel method-based estimator.
|
|
9
|
-
module KernelMachine
|
|
10
|
-
# KernelSVC is a class that implements (Nonlinear) Kernel Support Vector Classifier
|
|
11
|
-
# with stochastic gradient descent (SGD) optimization.
|
|
12
|
-
# For multiclass classification problem, it uses one-vs-the-rest strategy.
|
|
13
|
-
#
|
|
14
|
-
# @note
|
|
15
|
-
# Rumale::SVM provides kernel support vector classifier based on LIBSVM.
|
|
16
|
-
# If you prefer execution speed, you should use Rumale::SVM::SVC.
|
|
17
|
-
# https://github.com/yoshoku/rumale-svm
|
|
18
|
-
#
|
|
19
|
-
# @example
|
|
20
|
-
# training_kernel_matrix = Rumale::PairwiseMetric::rbf_kernel(training_samples)
|
|
21
|
-
# estimator =
|
|
22
|
-
# Rumale::KernelMachine::KernelSVC.new(reg_param: 1.0, max_iter: 1000, random_seed: 1)
|
|
23
|
-
# estimator.fit(training_kernel_matrix, traininig_labels)
|
|
24
|
-
# testing_kernel_matrix = Rumale::PairwiseMetric::rbf_kernel(testing_samples, training_samples)
|
|
25
|
-
# results = estimator.predict(testing_kernel_matrix)
|
|
26
|
-
#
|
|
27
|
-
# *Reference*
|
|
28
|
-
# - Shalev-Shwartz, S., Singer, Y., Srebro, N., and Cotter, A., "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
|
|
29
|
-
class KernelSVC
|
|
30
|
-
include Base::BaseEstimator
|
|
31
|
-
include Base::Classifier
|
|
32
|
-
|
|
33
|
-
# Return the weight vector for Kernel SVC.
|
|
34
|
-
# @return [Numo::DFloat] (shape: [n_classes, n_trainig_sample])
|
|
35
|
-
attr_reader :weight_vec
|
|
36
|
-
|
|
37
|
-
# Return the class labels.
|
|
38
|
-
# @return [Numo::Int32] (shape: [n_classes])
|
|
39
|
-
attr_reader :classes
|
|
40
|
-
|
|
41
|
-
# Return the random generator for performing random sampling.
|
|
42
|
-
# @return [Random]
|
|
43
|
-
attr_reader :rng
|
|
44
|
-
|
|
45
|
-
# Create a new classifier with Kernel Support Vector Machine by the SGD optimization.
|
|
46
|
-
#
|
|
47
|
-
# @param reg_param [Float] The regularization parameter.
|
|
48
|
-
# @param max_iter [Integer] The maximum number of iterations.
|
|
49
|
-
# @param probability [Boolean] The flag indicating whether to perform probability estimation.
|
|
50
|
-
# @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
|
|
51
|
-
# If nil is given, the methods do not execute in parallel.
|
|
52
|
-
# If zero or less is given, it becomes equal to the number of processors.
|
|
53
|
-
# This parameter is ignored if the Parallel gem is not loaded.
|
|
54
|
-
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
|
55
|
-
def initialize(reg_param: 1.0, max_iter: 1000, probability: false, n_jobs: nil, random_seed: nil)
|
|
56
|
-
check_params_numeric(reg_param: reg_param, max_iter: max_iter)
|
|
57
|
-
check_params_boolean(probability: probability)
|
|
58
|
-
check_params_numeric_or_nil(n_jobs: n_jobs, random_seed: random_seed)
|
|
59
|
-
check_params_positive(reg_param: reg_param, max_iter: max_iter)
|
|
60
|
-
@params = {}
|
|
61
|
-
@params[:reg_param] = reg_param
|
|
62
|
-
@params[:max_iter] = max_iter
|
|
63
|
-
@params[:probability] = probability
|
|
64
|
-
@params[:n_jobs] = n_jobs
|
|
65
|
-
@params[:random_seed] = random_seed
|
|
66
|
-
@params[:random_seed] ||= srand
|
|
67
|
-
@weight_vec = nil
|
|
68
|
-
@prob_param = nil
|
|
69
|
-
@classes = nil
|
|
70
|
-
@rng = Random.new(@params[:random_seed])
|
|
71
|
-
end
|
|
72
|
-
|
|
73
|
-
# Fit the model with given training data.
|
|
74
|
-
#
|
|
75
|
-
# @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
|
|
76
|
-
# The kernel matrix of the training data to be used for fitting the model.
|
|
77
|
-
# @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
|
|
78
|
-
# @return [KernelSVC] The learned classifier itself.
|
|
79
|
-
def fit(x, y)
|
|
80
|
-
x = check_convert_sample_array(x)
|
|
81
|
-
y = check_convert_label_array(y)
|
|
82
|
-
check_sample_label_size(x, y)
|
|
83
|
-
|
|
84
|
-
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
|
85
|
-
n_classes = @classes.size
|
|
86
|
-
n_features = x.shape[1]
|
|
87
|
-
|
|
88
|
-
if n_classes > 2
|
|
89
|
-
@weight_vec = Numo::DFloat.zeros(n_classes, n_features)
|
|
90
|
-
@prob_param = Numo::DFloat.zeros(n_classes, 2)
|
|
91
|
-
models = if enable_parallel?
|
|
92
|
-
# :nocov:
|
|
93
|
-
parallel_map(n_classes) do |n|
|
|
94
|
-
bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
|
|
95
|
-
partial_fit(x, bin_y)
|
|
96
|
-
end
|
|
97
|
-
# :nocov:
|
|
98
|
-
else
|
|
99
|
-
Array.new(n_classes) do |n|
|
|
100
|
-
bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
|
|
101
|
-
partial_fit(x, bin_y)
|
|
102
|
-
end
|
|
103
|
-
end
|
|
104
|
-
models.each_with_index { |model, n| @weight_vec[n, true], @prob_param[n, true] = model }
|
|
105
|
-
else
|
|
106
|
-
negative_label = y.to_a.uniq.min
|
|
107
|
-
bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
|
|
108
|
-
@weight_vec, @prob_param = partial_fit(x, bin_y)
|
|
109
|
-
end
|
|
110
|
-
|
|
111
|
-
self
|
|
112
|
-
end
|
|
113
|
-
|
|
114
|
-
# Calculate confidence scores for samples.
|
|
115
|
-
#
|
|
116
|
-
# @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
|
|
117
|
-
# The kernel matrix between testing samples and training samples to compute the scores.
|
|
118
|
-
# @return [Numo::DFloat] (shape: [n_testing_samples, n_classes]) Confidence score per sample.
|
|
119
|
-
def decision_function(x)
|
|
120
|
-
x = check_convert_sample_array(x)
|
|
121
|
-
|
|
122
|
-
x.dot(@weight_vec.transpose)
|
|
123
|
-
end
|
|
124
|
-
|
|
125
|
-
# Predict class labels for samples.
|
|
126
|
-
#
|
|
127
|
-
# @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
|
|
128
|
-
# The kernel matrix between testing samples and training samples to predict the labels.
|
|
129
|
-
# @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
|
|
130
|
-
def predict(x)
|
|
131
|
-
x = check_convert_sample_array(x)
|
|
132
|
-
|
|
133
|
-
return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
|
|
134
|
-
|
|
135
|
-
n_samples, = x.shape
|
|
136
|
-
decision_values = decision_function(x)
|
|
137
|
-
predicted = if enable_parallel?
|
|
138
|
-
parallel_map(n_samples) { |n| @classes[decision_values[n, true].max_index] }
|
|
139
|
-
else
|
|
140
|
-
Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] }
|
|
141
|
-
end
|
|
142
|
-
Numo::Int32.asarray(predicted)
|
|
143
|
-
end
|
|
144
|
-
|
|
145
|
-
# Predict probability for samples.
|
|
146
|
-
#
|
|
147
|
-
# @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
|
|
148
|
-
# The kernel matrix between testing samples and training samples to predict the labels.
|
|
149
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
|
150
|
-
def predict_proba(x)
|
|
151
|
-
x = check_convert_sample_array(x)
|
|
152
|
-
|
|
153
|
-
if @classes.size > 2
|
|
154
|
-
probs = 1.0 / (Numo::NMath.exp(@prob_param[true, 0] * decision_function(x) + @prob_param[true, 1]) + 1.0)
|
|
155
|
-
return (probs.transpose / probs.sum(axis: 1)).transpose.dup
|
|
156
|
-
end
|
|
157
|
-
|
|
158
|
-
n_samples, = x.shape
|
|
159
|
-
probs = Numo::DFloat.zeros(n_samples, 2)
|
|
160
|
-
probs[true, 1] = 1.0 / (Numo::NMath.exp(@prob_param[0] * decision_function(x) + @prob_param[1]) + 1.0)
|
|
161
|
-
probs[true, 0] = 1.0 - probs[true, 1]
|
|
162
|
-
probs
|
|
163
|
-
end
|
|
164
|
-
|
|
165
|
-
private
|
|
166
|
-
|
|
167
|
-
def partial_fit(x, bin_y)
|
|
168
|
-
# Initialize some variables.
|
|
169
|
-
n_training_samples = x.shape[0]
|
|
170
|
-
rand_ids = []
|
|
171
|
-
weight_vec = Numo::DFloat.zeros(n_training_samples)
|
|
172
|
-
sub_rng = @rng.dup
|
|
173
|
-
# Start optimization.
|
|
174
|
-
@params[:max_iter].times do |t|
|
|
175
|
-
# random sampling
|
|
176
|
-
rand_ids = Array(0...n_training_samples).shuffle(random: sub_rng) if rand_ids.empty?
|
|
177
|
-
target_id = rand_ids.shift
|
|
178
|
-
# update the weight vector
|
|
179
|
-
func = (weight_vec * bin_y).dot(x[target_id, true].transpose).to_f
|
|
180
|
-
func *= bin_y[target_id] / (@params[:reg_param] * (t + 1))
|
|
181
|
-
weight_vec[target_id] += 1.0 if func < 1.0
|
|
182
|
-
end
|
|
183
|
-
w = weight_vec * bin_y
|
|
184
|
-
p = if @params[:probability]
|
|
185
|
-
Rumale::ProbabilisticOutput.fit_sigmoid(x.dot(w), bin_y)
|
|
186
|
-
else
|
|
187
|
-
Numo::DFloat[1, 0]
|
|
188
|
-
end
|
|
189
|
-
[w, p]
|
|
190
|
-
end
|
|
191
|
-
end
|
|
192
|
-
end
|
|
193
|
-
end
|