rumale 0.23.3 → 0.24.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.txt +5 -1
  3. data/README.md +3 -288
  4. data/lib/rumale/version.rb +1 -1
  5. data/lib/rumale.rb +20 -131
  6. metadata +252 -150
  7. data/CHANGELOG.md +0 -643
  8. data/CODE_OF_CONDUCT.md +0 -74
  9. data/ext/rumale/extconf.rb +0 -37
  10. data/ext/rumale/rumaleext.c +0 -545
  11. data/ext/rumale/rumaleext.h +0 -12
  12. data/lib/rumale/base/base_estimator.rb +0 -49
  13. data/lib/rumale/base/classifier.rb +0 -36
  14. data/lib/rumale/base/cluster_analyzer.rb +0 -31
  15. data/lib/rumale/base/evaluator.rb +0 -17
  16. data/lib/rumale/base/regressor.rb +0 -36
  17. data/lib/rumale/base/splitter.rb +0 -21
  18. data/lib/rumale/base/transformer.rb +0 -22
  19. data/lib/rumale/clustering/dbscan.rb +0 -123
  20. data/lib/rumale/clustering/gaussian_mixture.rb +0 -218
  21. data/lib/rumale/clustering/hdbscan.rb +0 -291
  22. data/lib/rumale/clustering/k_means.rb +0 -122
  23. data/lib/rumale/clustering/k_medoids.rb +0 -141
  24. data/lib/rumale/clustering/mini_batch_k_means.rb +0 -139
  25. data/lib/rumale/clustering/power_iteration.rb +0 -127
  26. data/lib/rumale/clustering/single_linkage.rb +0 -203
  27. data/lib/rumale/clustering/snn.rb +0 -76
  28. data/lib/rumale/clustering/spectral_clustering.rb +0 -115
  29. data/lib/rumale/dataset.rb +0 -246
  30. data/lib/rumale/decomposition/factor_analysis.rb +0 -150
  31. data/lib/rumale/decomposition/fast_ica.rb +0 -188
  32. data/lib/rumale/decomposition/nmf.rb +0 -124
  33. data/lib/rumale/decomposition/pca.rb +0 -159
  34. data/lib/rumale/ensemble/ada_boost_classifier.rb +0 -179
  35. data/lib/rumale/ensemble/ada_boost_regressor.rb +0 -160
  36. data/lib/rumale/ensemble/extra_trees_classifier.rb +0 -139
  37. data/lib/rumale/ensemble/extra_trees_regressor.rb +0 -125
  38. data/lib/rumale/ensemble/gradient_boosting_classifier.rb +0 -306
  39. data/lib/rumale/ensemble/gradient_boosting_regressor.rb +0 -237
  40. data/lib/rumale/ensemble/random_forest_classifier.rb +0 -189
  41. data/lib/rumale/ensemble/random_forest_regressor.rb +0 -153
  42. data/lib/rumale/ensemble/stacking_classifier.rb +0 -215
  43. data/lib/rumale/ensemble/stacking_regressor.rb +0 -163
  44. data/lib/rumale/ensemble/voting_classifier.rb +0 -126
  45. data/lib/rumale/ensemble/voting_regressor.rb +0 -82
  46. data/lib/rumale/evaluation_measure/accuracy.rb +0 -29
  47. data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +0 -74
  48. data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +0 -56
  49. data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +0 -53
  50. data/lib/rumale/evaluation_measure/explained_variance_score.rb +0 -39
  51. data/lib/rumale/evaluation_measure/f_score.rb +0 -50
  52. data/lib/rumale/evaluation_measure/function.rb +0 -147
  53. data/lib/rumale/evaluation_measure/log_loss.rb +0 -45
  54. data/lib/rumale/evaluation_measure/mean_absolute_error.rb +0 -29
  55. data/lib/rumale/evaluation_measure/mean_squared_error.rb +0 -29
  56. data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +0 -29
  57. data/lib/rumale/evaluation_measure/median_absolute_error.rb +0 -30
  58. data/lib/rumale/evaluation_measure/mutual_information.rb +0 -49
  59. data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +0 -53
  60. data/lib/rumale/evaluation_measure/precision.rb +0 -50
  61. data/lib/rumale/evaluation_measure/precision_recall.rb +0 -96
  62. data/lib/rumale/evaluation_measure/purity.rb +0 -40
  63. data/lib/rumale/evaluation_measure/r2_score.rb +0 -43
  64. data/lib/rumale/evaluation_measure/recall.rb +0 -50
  65. data/lib/rumale/evaluation_measure/roc_auc.rb +0 -130
  66. data/lib/rumale/evaluation_measure/silhouette_score.rb +0 -82
  67. data/lib/rumale/feature_extraction/feature_hasher.rb +0 -110
  68. data/lib/rumale/feature_extraction/hash_vectorizer.rb +0 -155
  69. data/lib/rumale/feature_extraction/tfidf_transformer.rb +0 -113
  70. data/lib/rumale/kernel_approximation/nystroem.rb +0 -126
  71. data/lib/rumale/kernel_approximation/rbf.rb +0 -102
  72. data/lib/rumale/kernel_machine/kernel_fda.rb +0 -120
  73. data/lib/rumale/kernel_machine/kernel_pca.rb +0 -97
  74. data/lib/rumale/kernel_machine/kernel_ridge.rb +0 -82
  75. data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +0 -92
  76. data/lib/rumale/kernel_machine/kernel_svc.rb +0 -193
  77. data/lib/rumale/linear_model/base_sgd.rb +0 -285
  78. data/lib/rumale/linear_model/elastic_net.rb +0 -119
  79. data/lib/rumale/linear_model/lasso.rb +0 -115
  80. data/lib/rumale/linear_model/linear_regression.rb +0 -201
  81. data/lib/rumale/linear_model/logistic_regression.rb +0 -275
  82. data/lib/rumale/linear_model/nnls.rb +0 -137
  83. data/lib/rumale/linear_model/ridge.rb +0 -209
  84. data/lib/rumale/linear_model/svc.rb +0 -213
  85. data/lib/rumale/linear_model/svr.rb +0 -132
  86. data/lib/rumale/manifold/mds.rb +0 -155
  87. data/lib/rumale/manifold/tsne.rb +0 -222
  88. data/lib/rumale/metric_learning/fisher_discriminant_analysis.rb +0 -113
  89. data/lib/rumale/metric_learning/mlkr.rb +0 -161
  90. data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +0 -167
  91. data/lib/rumale/model_selection/cross_validation.rb +0 -125
  92. data/lib/rumale/model_selection/function.rb +0 -42
  93. data/lib/rumale/model_selection/grid_search_cv.rb +0 -225
  94. data/lib/rumale/model_selection/group_k_fold.rb +0 -93
  95. data/lib/rumale/model_selection/group_shuffle_split.rb +0 -115
  96. data/lib/rumale/model_selection/k_fold.rb +0 -81
  97. data/lib/rumale/model_selection/shuffle_split.rb +0 -90
  98. data/lib/rumale/model_selection/stratified_k_fold.rb +0 -99
  99. data/lib/rumale/model_selection/stratified_shuffle_split.rb +0 -118
  100. data/lib/rumale/model_selection/time_series_split.rb +0 -91
  101. data/lib/rumale/multiclass/one_vs_rest_classifier.rb +0 -83
  102. data/lib/rumale/naive_bayes/base_naive_bayes.rb +0 -47
  103. data/lib/rumale/naive_bayes/bernoulli_nb.rb +0 -82
  104. data/lib/rumale/naive_bayes/complement_nb.rb +0 -85
  105. data/lib/rumale/naive_bayes/gaussian_nb.rb +0 -69
  106. data/lib/rumale/naive_bayes/multinomial_nb.rb +0 -74
  107. data/lib/rumale/naive_bayes/negation_nb.rb +0 -71
  108. data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +0 -133
  109. data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +0 -108
  110. data/lib/rumale/nearest_neighbors/vp_tree.rb +0 -132
  111. data/lib/rumale/neural_network/adam.rb +0 -56
  112. data/lib/rumale/neural_network/base_mlp.rb +0 -248
  113. data/lib/rumale/neural_network/mlp_classifier.rb +0 -120
  114. data/lib/rumale/neural_network/mlp_regressor.rb +0 -90
  115. data/lib/rumale/pairwise_metric.rb +0 -152
  116. data/lib/rumale/pipeline/feature_union.rb +0 -69
  117. data/lib/rumale/pipeline/pipeline.rb +0 -175
  118. data/lib/rumale/preprocessing/bin_discretizer.rb +0 -93
  119. data/lib/rumale/preprocessing/binarizer.rb +0 -60
  120. data/lib/rumale/preprocessing/kernel_calculator.rb +0 -92
  121. data/lib/rumale/preprocessing/l1_normalizer.rb +0 -62
  122. data/lib/rumale/preprocessing/l2_normalizer.rb +0 -63
  123. data/lib/rumale/preprocessing/label_binarizer.rb +0 -89
  124. data/lib/rumale/preprocessing/label_encoder.rb +0 -79
  125. data/lib/rumale/preprocessing/max_abs_scaler.rb +0 -61
  126. data/lib/rumale/preprocessing/max_normalizer.rb +0 -62
  127. data/lib/rumale/preprocessing/min_max_scaler.rb +0 -76
  128. data/lib/rumale/preprocessing/one_hot_encoder.rb +0 -100
  129. data/lib/rumale/preprocessing/ordinal_encoder.rb +0 -109
  130. data/lib/rumale/preprocessing/polynomial_features.rb +0 -109
  131. data/lib/rumale/preprocessing/standard_scaler.rb +0 -71
  132. data/lib/rumale/probabilistic_output.rb +0 -114
  133. data/lib/rumale/tree/base_decision_tree.rb +0 -150
  134. data/lib/rumale/tree/decision_tree_classifier.rb +0 -150
  135. data/lib/rumale/tree/decision_tree_regressor.rb +0 -116
  136. data/lib/rumale/tree/extra_tree_classifier.rb +0 -107
  137. data/lib/rumale/tree/extra_tree_regressor.rb +0 -94
  138. data/lib/rumale/tree/gradient_tree_regressor.rb +0 -202
  139. data/lib/rumale/tree/node.rb +0 -39
  140. data/lib/rumale/utils.rb +0 -42
  141. data/lib/rumale/validation.rb +0 -128
  142. data/lib/rumale/values.rb +0 -13
@@ -1,97 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/transformer'
5
-
6
- module Rumale
7
- module KernelMachine
8
- # KernelPCA is a class that implements Kernel Principal Component Analysis.
9
- #
10
- # @example
11
- # require 'numo/linalg/autoloader'
12
- #
13
- # kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(training_samples)
14
- # kpca = Rumale::KernelMachine::KernelPCA.new(n_components: 2)
15
- # mapped_traininig_samples = kpca.fit_transform(kernel_mat_train)
16
- #
17
- # kernel_mat_test = Rumale::PairwiseMetric::rbf_kernel(test_samples, training_samples)
18
- # mapped_test_samples = kpca.transform(kernel_mat_test)
19
- #
20
- # *Reference*
21
- # - Scholkopf, B., Smola, A., and Muller, K-R., "Nonlinear Component Analysis as a Kernel Eigenvalue Problem," Neural Computation, Vol. 10 (5), pp. 1299--1319, 1998.
22
- class KernelPCA
23
- include Base::BaseEstimator
24
- include Base::Transformer
25
-
26
- # Returns the eigenvalues of the centered kernel matrix.
27
- # @return [Numo::DFloat] (shape: [n_components])
28
- attr_reader :lambdas
29
-
30
- # Returns the eigenvectors of the centered kernel matrix.
31
- # @return [Numo::DFloat] (shape: [n_training_sampes, n_components])
32
- attr_reader :alphas
33
-
34
- # Create a new transformer with Kernel PCA.
35
- #
36
- # @param n_components [Integer] The number of components.
37
- def initialize(n_components: 2)
38
- check_params_numeric(n_components: n_components)
39
- @params = {}
40
- @params[:n_components] = n_components
41
- @alphas = nil
42
- @lambdas = nil
43
- @transform_mat = nil
44
- @row_mean = nil
45
- @all_mean = nil
46
- end
47
-
48
- # Fit the model with given training data.
49
- # To execute this method, Numo::Linalg must be loaded.
50
- #
51
- # @overload fit(x) -> KernelPCA
52
- # @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
53
- # The kernel matrix of the training data to be used for fitting the model.
54
- # @return [KernelPCA] The learned transformer itself.
55
- def fit(x, _y = nil)
56
- x = check_convert_sample_array(x)
57
- raise ArgumentError, 'Expect the kernel matrix of training data to be square.' unless x.shape[0] == x.shape[1]
58
- raise 'KernelPCA#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
59
-
60
- n_samples = x.shape[0]
61
- @row_mean = x.mean(0)
62
- @all_mean = @row_mean.sum.fdiv(n_samples)
63
- centered_kernel_mat = x - x.mean(1).expand_dims(1) - @row_mean + @all_mean
64
- eig_vals, eig_vecs = Numo::Linalg.eigh(centered_kernel_mat, vals_range: (n_samples - @params[:n_components])...n_samples)
65
- @alphas = eig_vecs.reverse(1).dup
66
- @lambdas = eig_vals.reverse.dup
67
- @transform_mat = @alphas.dot((1.0 / Numo::NMath.sqrt(@lambdas)).diag)
68
- self
69
- end
70
-
71
- # Fit the model with training data, and then transform them with the learned model.
72
- # To execute this method, Numo::Linalg must be loaded.
73
- #
74
- # @overload fit_transform(x) -> Numo::DFloat
75
- # @param x [Numo::DFloat] (shape: [n_samples, n_samples])
76
- # The kernel matrix of the training data to be used for fitting the model and transformed.
77
- # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
78
- def fit_transform(x, _y = nil)
79
- x = check_convert_sample_array(x)
80
- fit(x).transform(x)
81
- end
82
-
83
- # Transform the given data with the learned model.
84
- #
85
- # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
86
- # The kernel matrix between testing samples and training samples to be transformed.
87
- # @return [Numo::DFloat] (shape: [n_testing_samples, n_components]) The transformed data.
88
- def transform(x)
89
- x = check_convert_sample_array(x)
90
- col_mean = x.sum(1) / @row_mean.shape[0]
91
- centered_kernel_mat = x - col_mean.expand_dims(1) - @row_mean + @all_mean
92
- transformed = centered_kernel_mat.dot(@transform_mat)
93
- @params[:n_components] == 1 ? transformed[true, 0].dup : transformed
94
- end
95
- end
96
- end
97
- end
@@ -1,82 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/regressor'
5
-
6
- module Rumale
7
- module KernelMachine
8
- # KernelRidge is a class that implements kernel ridge regression.
9
- #
10
- # @example
11
- # require 'numo/linalg/autoloader'
12
- #
13
- # kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(training_samples)
14
- # kridge = Rumale::KernelMachine::KernelRidge.new(reg_param: 1.0)
15
- # kridge.fit(kernel_mat_train, traininig_values)
16
- #
17
- # kernel_mat_test = Rumale::PairwiseMetric::rbf_kernel(test_samples, training_samples)
18
- # results = kridge.predict(kernel_mat_test)
19
- class KernelRidge
20
- include Base::BaseEstimator
21
- include Base::Regressor
22
-
23
- # Return the weight vector.
24
- # @return [Numo::DFloat] (shape: [n_training_sample, n_outputs])
25
- attr_reader :weight_vec
26
-
27
- # Create a new regressor with kernel ridge regression.
28
- #
29
- # @param reg_param [Float/Numo::DFloat] The regularization parameter.
30
- def initialize(reg_param: 1.0)
31
- raise TypeError, 'Expect class of reg_param to be Float or Numo::DFloat' unless reg_param.is_a?(Float) || reg_param.is_a?(Numo::DFloat)
32
- raise ArgumentError, 'Expect reg_param array to be 1-D arrray' if reg_param.is_a?(Numo::DFloat) && reg_param.shape.size != 1
33
-
34
- @params = {}
35
- @params[:reg_param] = reg_param
36
- @weight_vec = nil
37
- end
38
-
39
- # Fit the model with given training data.
40
- #
41
- # @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
42
- # The kernel matrix of the training data to be used for fitting the model.
43
- # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The taget values to be used for fitting the model.
44
- # @return [KernelRidge] The learned regressor itself.
45
- def fit(x, y)
46
- x = check_convert_sample_array(x)
47
- y = check_convert_tvalue_array(y)
48
- check_sample_tvalue_size(x, y)
49
- raise ArgumentError, 'Expect the kernel matrix of training data to be square.' unless x.shape[0] == x.shape[1]
50
- raise 'KernelRidge#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
51
-
52
- n_samples = x.shape[0]
53
-
54
- if @params[:reg_param].is_a?(Float)
55
- reg_kernel_mat = x + Numo::DFloat.eye(n_samples) * @params[:reg_param]
56
- @weight_vec = Numo::Linalg.solve(reg_kernel_mat, y, driver: 'sym')
57
- else
58
- raise ArgumentError, 'Expect y and reg_param to have the same number of elements.' unless y.shape[1] == @params[:reg_param].shape[0]
59
-
60
- n_outputs = y.shape[1]
61
- @weight_vec = Numo::DFloat.zeros(n_samples, n_outputs)
62
- n_outputs.times do |n|
63
- reg_kernel_mat = x + Numo::DFloat.eye(n_samples) * @params[:reg_param][n]
64
- @weight_vec[true, n] = Numo::Linalg.solve(reg_kernel_mat, y[true, n], driver: 'sym')
65
- end
66
- end
67
-
68
- self
69
- end
70
-
71
- # Predict values for samples.
72
- #
73
- # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
74
- # The kernel matrix between testing samples and training samples to predict values.
75
- # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
76
- def predict(x)
77
- x = check_convert_sample_array(x)
78
- x.dot(@weight_vec)
79
- end
80
- end
81
- end
82
- end
@@ -1,92 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/classifier'
5
- require 'rumale/preprocessing/label_binarizer'
6
-
7
- module Rumale
8
- module KernelMachine
9
- # KernelRidgeClassifier is a class that implements classifier based-on kernel ridge regression.
10
- # It learns a classifier by converting labels to target values { -1, 1 } and performing kernel ridge regression.
11
- #
12
- # @example
13
- # require 'numo/linalg/autoloader'
14
- # require 'rumale'
15
- #
16
- # kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(training_samples)
17
- # kridge = Rumale::KernelMachine::KernelRidgeClassifier.new(reg_param: 0.5)
18
- # kridge.fit(kernel_mat_train, traininig_values)
19
- #
20
- # kernel_mat_test = Rumale::PairwiseMetric::rbf_kernel(test_samples, training_samples)
21
- # results = kridge.predict(kernel_mat_test)
22
- class KernelRidgeClassifier
23
- include Base::BaseEstimator
24
- include Base::Classifier
25
-
26
- # Return the class labels.
27
- # @return [Numo::Int32] (size: n_classes)
28
- attr_reader :classes
29
-
30
- # Return the weight vector.
31
- # @return [Numo::DFloat] (shape: [n_training_sample, n_classes])
32
- attr_reader :weight_vec
33
-
34
- # Create a new regressor with kernel ridge classifier.
35
- #
36
- # @param reg_param [Float/Numo::DFloat] The regularization parameter.
37
- def initialize(reg_param: 1.0)
38
- @params = {}
39
- @params[:reg_param] = reg_param
40
- @classes = nil
41
- @weight_vec = nil
42
- end
43
-
44
- # Fit the model with given training data.
45
- #
46
- # @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
47
- # The kernel matrix of the training data to be used for fitting the model.
48
- # @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
49
- # @return [KernelRidgeClassifier] The learned classifier itself.
50
- def fit(x, y)
51
- x = check_convert_sample_array(x)
52
- y = check_convert_label_array(y)
53
- check_sample_label_size(x, y)
54
- raise ArgumentError, 'Expect the kernel matrix of training data to be square.' unless x.shape[0] == x.shape[1]
55
- raise 'KernelRidgeClassifier#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
56
-
57
- @encoder = Rumale::Preprocessing::LabelBinarizer.new
58
- y_encoded = Numo::DFloat.cast(@encoder.fit_transform(y)) * 2 - 1
59
- @classes = Numo::NArray[*@encoder.classes]
60
-
61
- n_samples = x.shape[0]
62
- reg_kernel_mat = x + Numo::DFloat.eye(n_samples) * @params[:reg_param]
63
- @weight_vec = Numo::Linalg.solve(reg_kernel_mat, y_encoded, driver: 'sym')
64
-
65
- self
66
- end
67
-
68
- # Calculate confidence scores for samples.
69
- #
70
- # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
71
- # The kernel matrix between testing samples and training samples to predict values.
72
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
73
- def decision_function(x)
74
- x = check_convert_sample_array(x)
75
- x.dot(@weight_vec)
76
- end
77
-
78
- # Predict class labels for samples.
79
- #
80
- # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
81
- # The kernel matrix between testing samples and training samples to predict the labels.
82
- # @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
83
- def predict(x)
84
- x = check_convert_sample_array(x)
85
- scores = decision_function(x)
86
- n_samples, n_classes = scores.shape
87
- label_ids = scores.max_index(axis: 1) - Numo::Int32.new(n_samples).seq * n_classes
88
- @classes[label_ids].dup
89
- end
90
- end
91
- end
92
- end
@@ -1,193 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/classifier'
5
- require 'rumale/probabilistic_output'
6
-
7
- module Rumale
8
- # This module consists of the classes that implement kernel method-based estimator.
9
- module KernelMachine
10
- # KernelSVC is a class that implements (Nonlinear) Kernel Support Vector Classifier
11
- # with stochastic gradient descent (SGD) optimization.
12
- # For multiclass classification problem, it uses one-vs-the-rest strategy.
13
- #
14
- # @note
15
- # Rumale::SVM provides kernel support vector classifier based on LIBSVM.
16
- # If you prefer execution speed, you should use Rumale::SVM::SVC.
17
- # https://github.com/yoshoku/rumale-svm
18
- #
19
- # @example
20
- # training_kernel_matrix = Rumale::PairwiseMetric::rbf_kernel(training_samples)
21
- # estimator =
22
- # Rumale::KernelMachine::KernelSVC.new(reg_param: 1.0, max_iter: 1000, random_seed: 1)
23
- # estimator.fit(training_kernel_matrix, traininig_labels)
24
- # testing_kernel_matrix = Rumale::PairwiseMetric::rbf_kernel(testing_samples, training_samples)
25
- # results = estimator.predict(testing_kernel_matrix)
26
- #
27
- # *Reference*
28
- # - Shalev-Shwartz, S., Singer, Y., Srebro, N., and Cotter, A., "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
29
- class KernelSVC
30
- include Base::BaseEstimator
31
- include Base::Classifier
32
-
33
- # Return the weight vector for Kernel SVC.
34
- # @return [Numo::DFloat] (shape: [n_classes, n_trainig_sample])
35
- attr_reader :weight_vec
36
-
37
- # Return the class labels.
38
- # @return [Numo::Int32] (shape: [n_classes])
39
- attr_reader :classes
40
-
41
- # Return the random generator for performing random sampling.
42
- # @return [Random]
43
- attr_reader :rng
44
-
45
- # Create a new classifier with Kernel Support Vector Machine by the SGD optimization.
46
- #
47
- # @param reg_param [Float] The regularization parameter.
48
- # @param max_iter [Integer] The maximum number of iterations.
49
- # @param probability [Boolean] The flag indicating whether to perform probability estimation.
50
- # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
51
- # If nil is given, the methods do not execute in parallel.
52
- # If zero or less is given, it becomes equal to the number of processors.
53
- # This parameter is ignored if the Parallel gem is not loaded.
54
- # @param random_seed [Integer] The seed value using to initialize the random generator.
55
- def initialize(reg_param: 1.0, max_iter: 1000, probability: false, n_jobs: nil, random_seed: nil)
56
- check_params_numeric(reg_param: reg_param, max_iter: max_iter)
57
- check_params_boolean(probability: probability)
58
- check_params_numeric_or_nil(n_jobs: n_jobs, random_seed: random_seed)
59
- check_params_positive(reg_param: reg_param, max_iter: max_iter)
60
- @params = {}
61
- @params[:reg_param] = reg_param
62
- @params[:max_iter] = max_iter
63
- @params[:probability] = probability
64
- @params[:n_jobs] = n_jobs
65
- @params[:random_seed] = random_seed
66
- @params[:random_seed] ||= srand
67
- @weight_vec = nil
68
- @prob_param = nil
69
- @classes = nil
70
- @rng = Random.new(@params[:random_seed])
71
- end
72
-
73
- # Fit the model with given training data.
74
- #
75
- # @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
76
- # The kernel matrix of the training data to be used for fitting the model.
77
- # @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
78
- # @return [KernelSVC] The learned classifier itself.
79
- def fit(x, y)
80
- x = check_convert_sample_array(x)
81
- y = check_convert_label_array(y)
82
- check_sample_label_size(x, y)
83
-
84
- @classes = Numo::Int32[*y.to_a.uniq.sort]
85
- n_classes = @classes.size
86
- n_features = x.shape[1]
87
-
88
- if n_classes > 2
89
- @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
90
- @prob_param = Numo::DFloat.zeros(n_classes, 2)
91
- models = if enable_parallel?
92
- # :nocov:
93
- parallel_map(n_classes) do |n|
94
- bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
95
- partial_fit(x, bin_y)
96
- end
97
- # :nocov:
98
- else
99
- Array.new(n_classes) do |n|
100
- bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
101
- partial_fit(x, bin_y)
102
- end
103
- end
104
- models.each_with_index { |model, n| @weight_vec[n, true], @prob_param[n, true] = model }
105
- else
106
- negative_label = y.to_a.uniq.min
107
- bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
108
- @weight_vec, @prob_param = partial_fit(x, bin_y)
109
- end
110
-
111
- self
112
- end
113
-
114
- # Calculate confidence scores for samples.
115
- #
116
- # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
117
- # The kernel matrix between testing samples and training samples to compute the scores.
118
- # @return [Numo::DFloat] (shape: [n_testing_samples, n_classes]) Confidence score per sample.
119
- def decision_function(x)
120
- x = check_convert_sample_array(x)
121
-
122
- x.dot(@weight_vec.transpose)
123
- end
124
-
125
- # Predict class labels for samples.
126
- #
127
- # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
128
- # The kernel matrix between testing samples and training samples to predict the labels.
129
- # @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
130
- def predict(x)
131
- x = check_convert_sample_array(x)
132
-
133
- return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
134
-
135
- n_samples, = x.shape
136
- decision_values = decision_function(x)
137
- predicted = if enable_parallel?
138
- parallel_map(n_samples) { |n| @classes[decision_values[n, true].max_index] }
139
- else
140
- Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] }
141
- end
142
- Numo::Int32.asarray(predicted)
143
- end
144
-
145
- # Predict probability for samples.
146
- #
147
- # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
148
- # The kernel matrix between testing samples and training samples to predict the labels.
149
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
150
- def predict_proba(x)
151
- x = check_convert_sample_array(x)
152
-
153
- if @classes.size > 2
154
- probs = 1.0 / (Numo::NMath.exp(@prob_param[true, 0] * decision_function(x) + @prob_param[true, 1]) + 1.0)
155
- return (probs.transpose / probs.sum(axis: 1)).transpose.dup
156
- end
157
-
158
- n_samples, = x.shape
159
- probs = Numo::DFloat.zeros(n_samples, 2)
160
- probs[true, 1] = 1.0 / (Numo::NMath.exp(@prob_param[0] * decision_function(x) + @prob_param[1]) + 1.0)
161
- probs[true, 0] = 1.0 - probs[true, 1]
162
- probs
163
- end
164
-
165
- private
166
-
167
- def partial_fit(x, bin_y)
168
- # Initialize some variables.
169
- n_training_samples = x.shape[0]
170
- rand_ids = []
171
- weight_vec = Numo::DFloat.zeros(n_training_samples)
172
- sub_rng = @rng.dup
173
- # Start optimization.
174
- @params[:max_iter].times do |t|
175
- # random sampling
176
- rand_ids = Array(0...n_training_samples).shuffle(random: sub_rng) if rand_ids.empty?
177
- target_id = rand_ids.shift
178
- # update the weight vector
179
- func = (weight_vec * bin_y).dot(x[target_id, true].transpose).to_f
180
- func *= bin_y[target_id] / (@params[:reg_param] * (t + 1))
181
- weight_vec[target_id] += 1.0 if func < 1.0
182
- end
183
- w = weight_vec * bin_y
184
- p = if @params[:probability]
185
- Rumale::ProbabilisticOutput.fit_sigmoid(x.dot(w), bin_y)
186
- else
187
- Numo::DFloat[1, 0]
188
- end
189
- [w, p]
190
- end
191
- end
192
- end
193
- end