rumale 0.23.3 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.txt +5 -1
  3. data/README.md +3 -288
  4. data/lib/rumale/version.rb +1 -1
  5. data/lib/rumale.rb +20 -131
  6. metadata +252 -150
  7. data/CHANGELOG.md +0 -643
  8. data/CODE_OF_CONDUCT.md +0 -74
  9. data/ext/rumale/extconf.rb +0 -37
  10. data/ext/rumale/rumaleext.c +0 -545
  11. data/ext/rumale/rumaleext.h +0 -12
  12. data/lib/rumale/base/base_estimator.rb +0 -49
  13. data/lib/rumale/base/classifier.rb +0 -36
  14. data/lib/rumale/base/cluster_analyzer.rb +0 -31
  15. data/lib/rumale/base/evaluator.rb +0 -17
  16. data/lib/rumale/base/regressor.rb +0 -36
  17. data/lib/rumale/base/splitter.rb +0 -21
  18. data/lib/rumale/base/transformer.rb +0 -22
  19. data/lib/rumale/clustering/dbscan.rb +0 -123
  20. data/lib/rumale/clustering/gaussian_mixture.rb +0 -218
  21. data/lib/rumale/clustering/hdbscan.rb +0 -291
  22. data/lib/rumale/clustering/k_means.rb +0 -122
  23. data/lib/rumale/clustering/k_medoids.rb +0 -141
  24. data/lib/rumale/clustering/mini_batch_k_means.rb +0 -139
  25. data/lib/rumale/clustering/power_iteration.rb +0 -127
  26. data/lib/rumale/clustering/single_linkage.rb +0 -203
  27. data/lib/rumale/clustering/snn.rb +0 -76
  28. data/lib/rumale/clustering/spectral_clustering.rb +0 -115
  29. data/lib/rumale/dataset.rb +0 -246
  30. data/lib/rumale/decomposition/factor_analysis.rb +0 -150
  31. data/lib/rumale/decomposition/fast_ica.rb +0 -188
  32. data/lib/rumale/decomposition/nmf.rb +0 -124
  33. data/lib/rumale/decomposition/pca.rb +0 -159
  34. data/lib/rumale/ensemble/ada_boost_classifier.rb +0 -179
  35. data/lib/rumale/ensemble/ada_boost_regressor.rb +0 -160
  36. data/lib/rumale/ensemble/extra_trees_classifier.rb +0 -139
  37. data/lib/rumale/ensemble/extra_trees_regressor.rb +0 -125
  38. data/lib/rumale/ensemble/gradient_boosting_classifier.rb +0 -306
  39. data/lib/rumale/ensemble/gradient_boosting_regressor.rb +0 -237
  40. data/lib/rumale/ensemble/random_forest_classifier.rb +0 -189
  41. data/lib/rumale/ensemble/random_forest_regressor.rb +0 -153
  42. data/lib/rumale/ensemble/stacking_classifier.rb +0 -215
  43. data/lib/rumale/ensemble/stacking_regressor.rb +0 -163
  44. data/lib/rumale/ensemble/voting_classifier.rb +0 -126
  45. data/lib/rumale/ensemble/voting_regressor.rb +0 -82
  46. data/lib/rumale/evaluation_measure/accuracy.rb +0 -29
  47. data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +0 -74
  48. data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +0 -56
  49. data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +0 -53
  50. data/lib/rumale/evaluation_measure/explained_variance_score.rb +0 -39
  51. data/lib/rumale/evaluation_measure/f_score.rb +0 -50
  52. data/lib/rumale/evaluation_measure/function.rb +0 -147
  53. data/lib/rumale/evaluation_measure/log_loss.rb +0 -45
  54. data/lib/rumale/evaluation_measure/mean_absolute_error.rb +0 -29
  55. data/lib/rumale/evaluation_measure/mean_squared_error.rb +0 -29
  56. data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +0 -29
  57. data/lib/rumale/evaluation_measure/median_absolute_error.rb +0 -30
  58. data/lib/rumale/evaluation_measure/mutual_information.rb +0 -49
  59. data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +0 -53
  60. data/lib/rumale/evaluation_measure/precision.rb +0 -50
  61. data/lib/rumale/evaluation_measure/precision_recall.rb +0 -96
  62. data/lib/rumale/evaluation_measure/purity.rb +0 -40
  63. data/lib/rumale/evaluation_measure/r2_score.rb +0 -43
  64. data/lib/rumale/evaluation_measure/recall.rb +0 -50
  65. data/lib/rumale/evaluation_measure/roc_auc.rb +0 -130
  66. data/lib/rumale/evaluation_measure/silhouette_score.rb +0 -82
  67. data/lib/rumale/feature_extraction/feature_hasher.rb +0 -110
  68. data/lib/rumale/feature_extraction/hash_vectorizer.rb +0 -155
  69. data/lib/rumale/feature_extraction/tfidf_transformer.rb +0 -113
  70. data/lib/rumale/kernel_approximation/nystroem.rb +0 -126
  71. data/lib/rumale/kernel_approximation/rbf.rb +0 -102
  72. data/lib/rumale/kernel_machine/kernel_fda.rb +0 -120
  73. data/lib/rumale/kernel_machine/kernel_pca.rb +0 -97
  74. data/lib/rumale/kernel_machine/kernel_ridge.rb +0 -82
  75. data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +0 -92
  76. data/lib/rumale/kernel_machine/kernel_svc.rb +0 -193
  77. data/lib/rumale/linear_model/base_sgd.rb +0 -285
  78. data/lib/rumale/linear_model/elastic_net.rb +0 -119
  79. data/lib/rumale/linear_model/lasso.rb +0 -115
  80. data/lib/rumale/linear_model/linear_regression.rb +0 -201
  81. data/lib/rumale/linear_model/logistic_regression.rb +0 -275
  82. data/lib/rumale/linear_model/nnls.rb +0 -137
  83. data/lib/rumale/linear_model/ridge.rb +0 -209
  84. data/lib/rumale/linear_model/svc.rb +0 -213
  85. data/lib/rumale/linear_model/svr.rb +0 -132
  86. data/lib/rumale/manifold/mds.rb +0 -155
  87. data/lib/rumale/manifold/tsne.rb +0 -222
  88. data/lib/rumale/metric_learning/fisher_discriminant_analysis.rb +0 -113
  89. data/lib/rumale/metric_learning/mlkr.rb +0 -161
  90. data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +0 -167
  91. data/lib/rumale/model_selection/cross_validation.rb +0 -125
  92. data/lib/rumale/model_selection/function.rb +0 -42
  93. data/lib/rumale/model_selection/grid_search_cv.rb +0 -225
  94. data/lib/rumale/model_selection/group_k_fold.rb +0 -93
  95. data/lib/rumale/model_selection/group_shuffle_split.rb +0 -115
  96. data/lib/rumale/model_selection/k_fold.rb +0 -81
  97. data/lib/rumale/model_selection/shuffle_split.rb +0 -90
  98. data/lib/rumale/model_selection/stratified_k_fold.rb +0 -99
  99. data/lib/rumale/model_selection/stratified_shuffle_split.rb +0 -118
  100. data/lib/rumale/model_selection/time_series_split.rb +0 -91
  101. data/lib/rumale/multiclass/one_vs_rest_classifier.rb +0 -83
  102. data/lib/rumale/naive_bayes/base_naive_bayes.rb +0 -47
  103. data/lib/rumale/naive_bayes/bernoulli_nb.rb +0 -82
  104. data/lib/rumale/naive_bayes/complement_nb.rb +0 -85
  105. data/lib/rumale/naive_bayes/gaussian_nb.rb +0 -69
  106. data/lib/rumale/naive_bayes/multinomial_nb.rb +0 -74
  107. data/lib/rumale/naive_bayes/negation_nb.rb +0 -71
  108. data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +0 -133
  109. data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +0 -108
  110. data/lib/rumale/nearest_neighbors/vp_tree.rb +0 -132
  111. data/lib/rumale/neural_network/adam.rb +0 -56
  112. data/lib/rumale/neural_network/base_mlp.rb +0 -248
  113. data/lib/rumale/neural_network/mlp_classifier.rb +0 -120
  114. data/lib/rumale/neural_network/mlp_regressor.rb +0 -90
  115. data/lib/rumale/pairwise_metric.rb +0 -152
  116. data/lib/rumale/pipeline/feature_union.rb +0 -69
  117. data/lib/rumale/pipeline/pipeline.rb +0 -175
  118. data/lib/rumale/preprocessing/bin_discretizer.rb +0 -93
  119. data/lib/rumale/preprocessing/binarizer.rb +0 -60
  120. data/lib/rumale/preprocessing/kernel_calculator.rb +0 -92
  121. data/lib/rumale/preprocessing/l1_normalizer.rb +0 -62
  122. data/lib/rumale/preprocessing/l2_normalizer.rb +0 -63
  123. data/lib/rumale/preprocessing/label_binarizer.rb +0 -89
  124. data/lib/rumale/preprocessing/label_encoder.rb +0 -79
  125. data/lib/rumale/preprocessing/max_abs_scaler.rb +0 -61
  126. data/lib/rumale/preprocessing/max_normalizer.rb +0 -62
  127. data/lib/rumale/preprocessing/min_max_scaler.rb +0 -76
  128. data/lib/rumale/preprocessing/one_hot_encoder.rb +0 -100
  129. data/lib/rumale/preprocessing/ordinal_encoder.rb +0 -109
  130. data/lib/rumale/preprocessing/polynomial_features.rb +0 -109
  131. data/lib/rumale/preprocessing/standard_scaler.rb +0 -71
  132. data/lib/rumale/probabilistic_output.rb +0 -114
  133. data/lib/rumale/tree/base_decision_tree.rb +0 -150
  134. data/lib/rumale/tree/decision_tree_classifier.rb +0 -150
  135. data/lib/rumale/tree/decision_tree_regressor.rb +0 -116
  136. data/lib/rumale/tree/extra_tree_classifier.rb +0 -107
  137. data/lib/rumale/tree/extra_tree_regressor.rb +0 -94
  138. data/lib/rumale/tree/gradient_tree_regressor.rb +0 -202
  139. data/lib/rumale/tree/node.rb +0 -39
  140. data/lib/rumale/utils.rb +0 -42
  141. data/lib/rumale/validation.rb +0 -128
  142. data/lib/rumale/values.rb +0 -13
@@ -1,97 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/transformer'
5
-
6
- module Rumale
7
- module KernelMachine
8
- # KernelPCA is a class that implements Kernel Principal Component Analysis.
9
- #
10
- # @example
11
- # require 'numo/linalg/autoloader'
12
- #
13
- # kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(training_samples)
14
- # kpca = Rumale::KernelMachine::KernelPCA.new(n_components: 2)
15
- # mapped_traininig_samples = kpca.fit_transform(kernel_mat_train)
16
- #
17
- # kernel_mat_test = Rumale::PairwiseMetric::rbf_kernel(test_samples, training_samples)
18
- # mapped_test_samples = kpca.transform(kernel_mat_test)
19
- #
20
- # *Reference*
21
- # - Scholkopf, B., Smola, A., and Muller, K-R., "Nonlinear Component Analysis as a Kernel Eigenvalue Problem," Neural Computation, Vol. 10 (5), pp. 1299--1319, 1998.
22
- class KernelPCA
23
- include Base::BaseEstimator
24
- include Base::Transformer
25
-
26
- # Returns the eigenvalues of the centered kernel matrix.
27
- # @return [Numo::DFloat] (shape: [n_components])
28
- attr_reader :lambdas
29
-
30
- # Returns the eigenvectors of the centered kernel matrix.
31
- # @return [Numo::DFloat] (shape: [n_training_sampes, n_components])
32
- attr_reader :alphas
33
-
34
- # Create a new transformer with Kernel PCA.
35
- #
36
- # @param n_components [Integer] The number of components.
37
- def initialize(n_components: 2)
38
- check_params_numeric(n_components: n_components)
39
- @params = {}
40
- @params[:n_components] = n_components
41
- @alphas = nil
42
- @lambdas = nil
43
- @transform_mat = nil
44
- @row_mean = nil
45
- @all_mean = nil
46
- end
47
-
48
- # Fit the model with given training data.
49
- # To execute this method, Numo::Linalg must be loaded.
50
- #
51
- # @overload fit(x) -> KernelPCA
52
- # @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
53
- # The kernel matrix of the training data to be used for fitting the model.
54
- # @return [KernelPCA] The learned transformer itself.
55
- def fit(x, _y = nil)
56
- x = check_convert_sample_array(x)
57
- raise ArgumentError, 'Expect the kernel matrix of training data to be square.' unless x.shape[0] == x.shape[1]
58
- raise 'KernelPCA#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
59
-
60
- n_samples = x.shape[0]
61
- @row_mean = x.mean(0)
62
- @all_mean = @row_mean.sum.fdiv(n_samples)
63
- centered_kernel_mat = x - x.mean(1).expand_dims(1) - @row_mean + @all_mean
64
- eig_vals, eig_vecs = Numo::Linalg.eigh(centered_kernel_mat, vals_range: (n_samples - @params[:n_components])...n_samples)
65
- @alphas = eig_vecs.reverse(1).dup
66
- @lambdas = eig_vals.reverse.dup
67
- @transform_mat = @alphas.dot((1.0 / Numo::NMath.sqrt(@lambdas)).diag)
68
- self
69
- end
70
-
71
- # Fit the model with training data, and then transform them with the learned model.
72
- # To execute this method, Numo::Linalg must be loaded.
73
- #
74
- # @overload fit_transform(x) -> Numo::DFloat
75
- # @param x [Numo::DFloat] (shape: [n_samples, n_samples])
76
- # The kernel matrix of the training data to be used for fitting the model and transformed.
77
- # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
78
- def fit_transform(x, _y = nil)
79
- x = check_convert_sample_array(x)
80
- fit(x).transform(x)
81
- end
82
-
83
- # Transform the given data with the learned model.
84
- #
85
- # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
86
- # The kernel matrix between testing samples and training samples to be transformed.
87
- # @return [Numo::DFloat] (shape: [n_testing_samples, n_components]) The transformed data.
88
- def transform(x)
89
- x = check_convert_sample_array(x)
90
- col_mean = x.sum(1) / @row_mean.shape[0]
91
- centered_kernel_mat = x - col_mean.expand_dims(1) - @row_mean + @all_mean
92
- transformed = centered_kernel_mat.dot(@transform_mat)
93
- @params[:n_components] == 1 ? transformed[true, 0].dup : transformed
94
- end
95
- end
96
- end
97
- end
@@ -1,82 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/regressor'
5
-
6
- module Rumale
7
- module KernelMachine
8
- # KernelRidge is a class that implements kernel ridge regression.
9
- #
10
- # @example
11
- # require 'numo/linalg/autoloader'
12
- #
13
- # kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(training_samples)
14
- # kridge = Rumale::KernelMachine::KernelRidge.new(reg_param: 1.0)
15
- # kridge.fit(kernel_mat_train, traininig_values)
16
- #
17
- # kernel_mat_test = Rumale::PairwiseMetric::rbf_kernel(test_samples, training_samples)
18
- # results = kridge.predict(kernel_mat_test)
19
- class KernelRidge
20
- include Base::BaseEstimator
21
- include Base::Regressor
22
-
23
- # Return the weight vector.
24
- # @return [Numo::DFloat] (shape: [n_training_sample, n_outputs])
25
- attr_reader :weight_vec
26
-
27
- # Create a new regressor with kernel ridge regression.
28
- #
29
- # @param reg_param [Float/Numo::DFloat] The regularization parameter.
30
- def initialize(reg_param: 1.0)
31
- raise TypeError, 'Expect class of reg_param to be Float or Numo::DFloat' unless reg_param.is_a?(Float) || reg_param.is_a?(Numo::DFloat)
32
- raise ArgumentError, 'Expect reg_param array to be 1-D arrray' if reg_param.is_a?(Numo::DFloat) && reg_param.shape.size != 1
33
-
34
- @params = {}
35
- @params[:reg_param] = reg_param
36
- @weight_vec = nil
37
- end
38
-
39
- # Fit the model with given training data.
40
- #
41
- # @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
42
- # The kernel matrix of the training data to be used for fitting the model.
43
- # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The taget values to be used for fitting the model.
44
- # @return [KernelRidge] The learned regressor itself.
45
- def fit(x, y)
46
- x = check_convert_sample_array(x)
47
- y = check_convert_tvalue_array(y)
48
- check_sample_tvalue_size(x, y)
49
- raise ArgumentError, 'Expect the kernel matrix of training data to be square.' unless x.shape[0] == x.shape[1]
50
- raise 'KernelRidge#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
51
-
52
- n_samples = x.shape[0]
53
-
54
- if @params[:reg_param].is_a?(Float)
55
- reg_kernel_mat = x + Numo::DFloat.eye(n_samples) * @params[:reg_param]
56
- @weight_vec = Numo::Linalg.solve(reg_kernel_mat, y, driver: 'sym')
57
- else
58
- raise ArgumentError, 'Expect y and reg_param to have the same number of elements.' unless y.shape[1] == @params[:reg_param].shape[0]
59
-
60
- n_outputs = y.shape[1]
61
- @weight_vec = Numo::DFloat.zeros(n_samples, n_outputs)
62
- n_outputs.times do |n|
63
- reg_kernel_mat = x + Numo::DFloat.eye(n_samples) * @params[:reg_param][n]
64
- @weight_vec[true, n] = Numo::Linalg.solve(reg_kernel_mat, y[true, n], driver: 'sym')
65
- end
66
- end
67
-
68
- self
69
- end
70
-
71
- # Predict values for samples.
72
- #
73
- # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
74
- # The kernel matrix between testing samples and training samples to predict values.
75
- # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
76
- def predict(x)
77
- x = check_convert_sample_array(x)
78
- x.dot(@weight_vec)
79
- end
80
- end
81
- end
82
- end
@@ -1,92 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/classifier'
5
- require 'rumale/preprocessing/label_binarizer'
6
-
7
- module Rumale
8
- module KernelMachine
9
- # KernelRidgeClassifier is a class that implements classifier based-on kernel ridge regression.
10
- # It learns a classifier by converting labels to target values { -1, 1 } and performing kernel ridge regression.
11
- #
12
- # @example
13
- # require 'numo/linalg/autoloader'
14
- # require 'rumale'
15
- #
16
- # kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(training_samples)
17
- # kridge = Rumale::KernelMachine::KernelRidgeClassifier.new(reg_param: 0.5)
18
- # kridge.fit(kernel_mat_train, traininig_values)
19
- #
20
- # kernel_mat_test = Rumale::PairwiseMetric::rbf_kernel(test_samples, training_samples)
21
- # results = kridge.predict(kernel_mat_test)
22
- class KernelRidgeClassifier
23
- include Base::BaseEstimator
24
- include Base::Classifier
25
-
26
- # Return the class labels.
27
- # @return [Numo::Int32] (size: n_classes)
28
- attr_reader :classes
29
-
30
- # Return the weight vector.
31
- # @return [Numo::DFloat] (shape: [n_training_sample, n_classes])
32
- attr_reader :weight_vec
33
-
34
- # Create a new regressor with kernel ridge classifier.
35
- #
36
- # @param reg_param [Float/Numo::DFloat] The regularization parameter.
37
- def initialize(reg_param: 1.0)
38
- @params = {}
39
- @params[:reg_param] = reg_param
40
- @classes = nil
41
- @weight_vec = nil
42
- end
43
-
44
- # Fit the model with given training data.
45
- #
46
- # @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
47
- # The kernel matrix of the training data to be used for fitting the model.
48
- # @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
49
- # @return [KernelRidgeClassifier] The learned classifier itself.
50
- def fit(x, y)
51
- x = check_convert_sample_array(x)
52
- y = check_convert_label_array(y)
53
- check_sample_label_size(x, y)
54
- raise ArgumentError, 'Expect the kernel matrix of training data to be square.' unless x.shape[0] == x.shape[1]
55
- raise 'KernelRidgeClassifier#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
56
-
57
- @encoder = Rumale::Preprocessing::LabelBinarizer.new
58
- y_encoded = Numo::DFloat.cast(@encoder.fit_transform(y)) * 2 - 1
59
- @classes = Numo::NArray[*@encoder.classes]
60
-
61
- n_samples = x.shape[0]
62
- reg_kernel_mat = x + Numo::DFloat.eye(n_samples) * @params[:reg_param]
63
- @weight_vec = Numo::Linalg.solve(reg_kernel_mat, y_encoded, driver: 'sym')
64
-
65
- self
66
- end
67
-
68
- # Calculate confidence scores for samples.
69
- #
70
- # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
71
- # The kernel matrix between testing samples and training samples to predict values.
72
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
73
- def decision_function(x)
74
- x = check_convert_sample_array(x)
75
- x.dot(@weight_vec)
76
- end
77
-
78
- # Predict class labels for samples.
79
- #
80
- # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
81
- # The kernel matrix between testing samples and training samples to predict the labels.
82
- # @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
83
- def predict(x)
84
- x = check_convert_sample_array(x)
85
- scores = decision_function(x)
86
- n_samples, n_classes = scores.shape
87
- label_ids = scores.max_index(axis: 1) - Numo::Int32.new(n_samples).seq * n_classes
88
- @classes[label_ids].dup
89
- end
90
- end
91
- end
92
- end
@@ -1,193 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/classifier'
5
- require 'rumale/probabilistic_output'
6
-
7
- module Rumale
8
- # This module consists of the classes that implement kernel method-based estimator.
9
- module KernelMachine
10
- # KernelSVC is a class that implements (Nonlinear) Kernel Support Vector Classifier
11
- # with stochastic gradient descent (SGD) optimization.
12
- # For multiclass classification problem, it uses one-vs-the-rest strategy.
13
- #
14
- # @note
15
- # Rumale::SVM provides kernel support vector classifier based on LIBSVM.
16
- # If you prefer execution speed, you should use Rumale::SVM::SVC.
17
- # https://github.com/yoshoku/rumale-svm
18
- #
19
- # @example
20
- # training_kernel_matrix = Rumale::PairwiseMetric::rbf_kernel(training_samples)
21
- # estimator =
22
- # Rumale::KernelMachine::KernelSVC.new(reg_param: 1.0, max_iter: 1000, random_seed: 1)
23
- # estimator.fit(training_kernel_matrix, traininig_labels)
24
- # testing_kernel_matrix = Rumale::PairwiseMetric::rbf_kernel(testing_samples, training_samples)
25
- # results = estimator.predict(testing_kernel_matrix)
26
- #
27
- # *Reference*
28
- # - Shalev-Shwartz, S., Singer, Y., Srebro, N., and Cotter, A., "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
29
- class KernelSVC
30
- include Base::BaseEstimator
31
- include Base::Classifier
32
-
33
- # Return the weight vector for Kernel SVC.
34
- # @return [Numo::DFloat] (shape: [n_classes, n_trainig_sample])
35
- attr_reader :weight_vec
36
-
37
- # Return the class labels.
38
- # @return [Numo::Int32] (shape: [n_classes])
39
- attr_reader :classes
40
-
41
- # Return the random generator for performing random sampling.
42
- # @return [Random]
43
- attr_reader :rng
44
-
45
- # Create a new classifier with Kernel Support Vector Machine by the SGD optimization.
46
- #
47
- # @param reg_param [Float] The regularization parameter.
48
- # @param max_iter [Integer] The maximum number of iterations.
49
- # @param probability [Boolean] The flag indicating whether to perform probability estimation.
50
- # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
51
- # If nil is given, the methods do not execute in parallel.
52
- # If zero or less is given, it becomes equal to the number of processors.
53
- # This parameter is ignored if the Parallel gem is not loaded.
54
- # @param random_seed [Integer] The seed value using to initialize the random generator.
55
- def initialize(reg_param: 1.0, max_iter: 1000, probability: false, n_jobs: nil, random_seed: nil)
56
- check_params_numeric(reg_param: reg_param, max_iter: max_iter)
57
- check_params_boolean(probability: probability)
58
- check_params_numeric_or_nil(n_jobs: n_jobs, random_seed: random_seed)
59
- check_params_positive(reg_param: reg_param, max_iter: max_iter)
60
- @params = {}
61
- @params[:reg_param] = reg_param
62
- @params[:max_iter] = max_iter
63
- @params[:probability] = probability
64
- @params[:n_jobs] = n_jobs
65
- @params[:random_seed] = random_seed
66
- @params[:random_seed] ||= srand
67
- @weight_vec = nil
68
- @prob_param = nil
69
- @classes = nil
70
- @rng = Random.new(@params[:random_seed])
71
- end
72
-
73
- # Fit the model with given training data.
74
- #
75
- # @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
76
- # The kernel matrix of the training data to be used for fitting the model.
77
- # @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
78
- # @return [KernelSVC] The learned classifier itself.
79
- def fit(x, y)
80
- x = check_convert_sample_array(x)
81
- y = check_convert_label_array(y)
82
- check_sample_label_size(x, y)
83
-
84
- @classes = Numo::Int32[*y.to_a.uniq.sort]
85
- n_classes = @classes.size
86
- n_features = x.shape[1]
87
-
88
- if n_classes > 2
89
- @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
90
- @prob_param = Numo::DFloat.zeros(n_classes, 2)
91
- models = if enable_parallel?
92
- # :nocov:
93
- parallel_map(n_classes) do |n|
94
- bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
95
- partial_fit(x, bin_y)
96
- end
97
- # :nocov:
98
- else
99
- Array.new(n_classes) do |n|
100
- bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
101
- partial_fit(x, bin_y)
102
- end
103
- end
104
- models.each_with_index { |model, n| @weight_vec[n, true], @prob_param[n, true] = model }
105
- else
106
- negative_label = y.to_a.uniq.min
107
- bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
108
- @weight_vec, @prob_param = partial_fit(x, bin_y)
109
- end
110
-
111
- self
112
- end
113
-
114
- # Calculate confidence scores for samples.
115
- #
116
- # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
117
- # The kernel matrix between testing samples and training samples to compute the scores.
118
- # @return [Numo::DFloat] (shape: [n_testing_samples, n_classes]) Confidence score per sample.
119
- def decision_function(x)
120
- x = check_convert_sample_array(x)
121
-
122
- x.dot(@weight_vec.transpose)
123
- end
124
-
125
- # Predict class labels for samples.
126
- #
127
- # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
128
- # The kernel matrix between testing samples and training samples to predict the labels.
129
- # @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
130
- def predict(x)
131
- x = check_convert_sample_array(x)
132
-
133
- return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
134
-
135
- n_samples, = x.shape
136
- decision_values = decision_function(x)
137
- predicted = if enable_parallel?
138
- parallel_map(n_samples) { |n| @classes[decision_values[n, true].max_index] }
139
- else
140
- Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] }
141
- end
142
- Numo::Int32.asarray(predicted)
143
- end
144
-
145
- # Predict probability for samples.
146
- #
147
- # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
148
- # The kernel matrix between testing samples and training samples to predict the labels.
149
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
150
- def predict_proba(x)
151
- x = check_convert_sample_array(x)
152
-
153
- if @classes.size > 2
154
- probs = 1.0 / (Numo::NMath.exp(@prob_param[true, 0] * decision_function(x) + @prob_param[true, 1]) + 1.0)
155
- return (probs.transpose / probs.sum(axis: 1)).transpose.dup
156
- end
157
-
158
- n_samples, = x.shape
159
- probs = Numo::DFloat.zeros(n_samples, 2)
160
- probs[true, 1] = 1.0 / (Numo::NMath.exp(@prob_param[0] * decision_function(x) + @prob_param[1]) + 1.0)
161
- probs[true, 0] = 1.0 - probs[true, 1]
162
- probs
163
- end
164
-
165
- private
166
-
167
- def partial_fit(x, bin_y)
168
- # Initialize some variables.
169
- n_training_samples = x.shape[0]
170
- rand_ids = []
171
- weight_vec = Numo::DFloat.zeros(n_training_samples)
172
- sub_rng = @rng.dup
173
- # Start optimization.
174
- @params[:max_iter].times do |t|
175
- # random sampling
176
- rand_ids = Array(0...n_training_samples).shuffle(random: sub_rng) if rand_ids.empty?
177
- target_id = rand_ids.shift
178
- # update the weight vector
179
- func = (weight_vec * bin_y).dot(x[target_id, true].transpose).to_f
180
- func *= bin_y[target_id] / (@params[:reg_param] * (t + 1))
181
- weight_vec[target_id] += 1.0 if func < 1.0
182
- end
183
- w = weight_vec * bin_y
184
- p = if @params[:probability]
185
- Rumale::ProbabilisticOutput.fit_sigmoid(x.dot(w), bin_y)
186
- else
187
- Numo::DFloat[1, 0]
188
- end
189
- [w, p]
190
- end
191
- end
192
- end
193
- end