rumale 0.23.3 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.txt +5 -1
  3. data/README.md +3 -288
  4. data/lib/rumale/version.rb +1 -1
  5. data/lib/rumale.rb +20 -131
  6. metadata +252 -150
  7. data/CHANGELOG.md +0 -643
  8. data/CODE_OF_CONDUCT.md +0 -74
  9. data/ext/rumale/extconf.rb +0 -37
  10. data/ext/rumale/rumaleext.c +0 -545
  11. data/ext/rumale/rumaleext.h +0 -12
  12. data/lib/rumale/base/base_estimator.rb +0 -49
  13. data/lib/rumale/base/classifier.rb +0 -36
  14. data/lib/rumale/base/cluster_analyzer.rb +0 -31
  15. data/lib/rumale/base/evaluator.rb +0 -17
  16. data/lib/rumale/base/regressor.rb +0 -36
  17. data/lib/rumale/base/splitter.rb +0 -21
  18. data/lib/rumale/base/transformer.rb +0 -22
  19. data/lib/rumale/clustering/dbscan.rb +0 -123
  20. data/lib/rumale/clustering/gaussian_mixture.rb +0 -218
  21. data/lib/rumale/clustering/hdbscan.rb +0 -291
  22. data/lib/rumale/clustering/k_means.rb +0 -122
  23. data/lib/rumale/clustering/k_medoids.rb +0 -141
  24. data/lib/rumale/clustering/mini_batch_k_means.rb +0 -139
  25. data/lib/rumale/clustering/power_iteration.rb +0 -127
  26. data/lib/rumale/clustering/single_linkage.rb +0 -203
  27. data/lib/rumale/clustering/snn.rb +0 -76
  28. data/lib/rumale/clustering/spectral_clustering.rb +0 -115
  29. data/lib/rumale/dataset.rb +0 -246
  30. data/lib/rumale/decomposition/factor_analysis.rb +0 -150
  31. data/lib/rumale/decomposition/fast_ica.rb +0 -188
  32. data/lib/rumale/decomposition/nmf.rb +0 -124
  33. data/lib/rumale/decomposition/pca.rb +0 -159
  34. data/lib/rumale/ensemble/ada_boost_classifier.rb +0 -179
  35. data/lib/rumale/ensemble/ada_boost_regressor.rb +0 -160
  36. data/lib/rumale/ensemble/extra_trees_classifier.rb +0 -139
  37. data/lib/rumale/ensemble/extra_trees_regressor.rb +0 -125
  38. data/lib/rumale/ensemble/gradient_boosting_classifier.rb +0 -306
  39. data/lib/rumale/ensemble/gradient_boosting_regressor.rb +0 -237
  40. data/lib/rumale/ensemble/random_forest_classifier.rb +0 -189
  41. data/lib/rumale/ensemble/random_forest_regressor.rb +0 -153
  42. data/lib/rumale/ensemble/stacking_classifier.rb +0 -215
  43. data/lib/rumale/ensemble/stacking_regressor.rb +0 -163
  44. data/lib/rumale/ensemble/voting_classifier.rb +0 -126
  45. data/lib/rumale/ensemble/voting_regressor.rb +0 -82
  46. data/lib/rumale/evaluation_measure/accuracy.rb +0 -29
  47. data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +0 -74
  48. data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +0 -56
  49. data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +0 -53
  50. data/lib/rumale/evaluation_measure/explained_variance_score.rb +0 -39
  51. data/lib/rumale/evaluation_measure/f_score.rb +0 -50
  52. data/lib/rumale/evaluation_measure/function.rb +0 -147
  53. data/lib/rumale/evaluation_measure/log_loss.rb +0 -45
  54. data/lib/rumale/evaluation_measure/mean_absolute_error.rb +0 -29
  55. data/lib/rumale/evaluation_measure/mean_squared_error.rb +0 -29
  56. data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +0 -29
  57. data/lib/rumale/evaluation_measure/median_absolute_error.rb +0 -30
  58. data/lib/rumale/evaluation_measure/mutual_information.rb +0 -49
  59. data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +0 -53
  60. data/lib/rumale/evaluation_measure/precision.rb +0 -50
  61. data/lib/rumale/evaluation_measure/precision_recall.rb +0 -96
  62. data/lib/rumale/evaluation_measure/purity.rb +0 -40
  63. data/lib/rumale/evaluation_measure/r2_score.rb +0 -43
  64. data/lib/rumale/evaluation_measure/recall.rb +0 -50
  65. data/lib/rumale/evaluation_measure/roc_auc.rb +0 -130
  66. data/lib/rumale/evaluation_measure/silhouette_score.rb +0 -82
  67. data/lib/rumale/feature_extraction/feature_hasher.rb +0 -110
  68. data/lib/rumale/feature_extraction/hash_vectorizer.rb +0 -155
  69. data/lib/rumale/feature_extraction/tfidf_transformer.rb +0 -113
  70. data/lib/rumale/kernel_approximation/nystroem.rb +0 -126
  71. data/lib/rumale/kernel_approximation/rbf.rb +0 -102
  72. data/lib/rumale/kernel_machine/kernel_fda.rb +0 -120
  73. data/lib/rumale/kernel_machine/kernel_pca.rb +0 -97
  74. data/lib/rumale/kernel_machine/kernel_ridge.rb +0 -82
  75. data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +0 -92
  76. data/lib/rumale/kernel_machine/kernel_svc.rb +0 -193
  77. data/lib/rumale/linear_model/base_sgd.rb +0 -285
  78. data/lib/rumale/linear_model/elastic_net.rb +0 -119
  79. data/lib/rumale/linear_model/lasso.rb +0 -115
  80. data/lib/rumale/linear_model/linear_regression.rb +0 -201
  81. data/lib/rumale/linear_model/logistic_regression.rb +0 -275
  82. data/lib/rumale/linear_model/nnls.rb +0 -137
  83. data/lib/rumale/linear_model/ridge.rb +0 -209
  84. data/lib/rumale/linear_model/svc.rb +0 -213
  85. data/lib/rumale/linear_model/svr.rb +0 -132
  86. data/lib/rumale/manifold/mds.rb +0 -155
  87. data/lib/rumale/manifold/tsne.rb +0 -222
  88. data/lib/rumale/metric_learning/fisher_discriminant_analysis.rb +0 -113
  89. data/lib/rumale/metric_learning/mlkr.rb +0 -161
  90. data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +0 -167
  91. data/lib/rumale/model_selection/cross_validation.rb +0 -125
  92. data/lib/rumale/model_selection/function.rb +0 -42
  93. data/lib/rumale/model_selection/grid_search_cv.rb +0 -225
  94. data/lib/rumale/model_selection/group_k_fold.rb +0 -93
  95. data/lib/rumale/model_selection/group_shuffle_split.rb +0 -115
  96. data/lib/rumale/model_selection/k_fold.rb +0 -81
  97. data/lib/rumale/model_selection/shuffle_split.rb +0 -90
  98. data/lib/rumale/model_selection/stratified_k_fold.rb +0 -99
  99. data/lib/rumale/model_selection/stratified_shuffle_split.rb +0 -118
  100. data/lib/rumale/model_selection/time_series_split.rb +0 -91
  101. data/lib/rumale/multiclass/one_vs_rest_classifier.rb +0 -83
  102. data/lib/rumale/naive_bayes/base_naive_bayes.rb +0 -47
  103. data/lib/rumale/naive_bayes/bernoulli_nb.rb +0 -82
  104. data/lib/rumale/naive_bayes/complement_nb.rb +0 -85
  105. data/lib/rumale/naive_bayes/gaussian_nb.rb +0 -69
  106. data/lib/rumale/naive_bayes/multinomial_nb.rb +0 -74
  107. data/lib/rumale/naive_bayes/negation_nb.rb +0 -71
  108. data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +0 -133
  109. data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +0 -108
  110. data/lib/rumale/nearest_neighbors/vp_tree.rb +0 -132
  111. data/lib/rumale/neural_network/adam.rb +0 -56
  112. data/lib/rumale/neural_network/base_mlp.rb +0 -248
  113. data/lib/rumale/neural_network/mlp_classifier.rb +0 -120
  114. data/lib/rumale/neural_network/mlp_regressor.rb +0 -90
  115. data/lib/rumale/pairwise_metric.rb +0 -152
  116. data/lib/rumale/pipeline/feature_union.rb +0 -69
  117. data/lib/rumale/pipeline/pipeline.rb +0 -175
  118. data/lib/rumale/preprocessing/bin_discretizer.rb +0 -93
  119. data/lib/rumale/preprocessing/binarizer.rb +0 -60
  120. data/lib/rumale/preprocessing/kernel_calculator.rb +0 -92
  121. data/lib/rumale/preprocessing/l1_normalizer.rb +0 -62
  122. data/lib/rumale/preprocessing/l2_normalizer.rb +0 -63
  123. data/lib/rumale/preprocessing/label_binarizer.rb +0 -89
  124. data/lib/rumale/preprocessing/label_encoder.rb +0 -79
  125. data/lib/rumale/preprocessing/max_abs_scaler.rb +0 -61
  126. data/lib/rumale/preprocessing/max_normalizer.rb +0 -62
  127. data/lib/rumale/preprocessing/min_max_scaler.rb +0 -76
  128. data/lib/rumale/preprocessing/one_hot_encoder.rb +0 -100
  129. data/lib/rumale/preprocessing/ordinal_encoder.rb +0 -109
  130. data/lib/rumale/preprocessing/polynomial_features.rb +0 -109
  131. data/lib/rumale/preprocessing/standard_scaler.rb +0 -71
  132. data/lib/rumale/probabilistic_output.rb +0 -114
  133. data/lib/rumale/tree/base_decision_tree.rb +0 -150
  134. data/lib/rumale/tree/decision_tree_classifier.rb +0 -150
  135. data/lib/rumale/tree/decision_tree_regressor.rb +0 -116
  136. data/lib/rumale/tree/extra_tree_classifier.rb +0 -107
  137. data/lib/rumale/tree/extra_tree_regressor.rb +0 -94
  138. data/lib/rumale/tree/gradient_tree_regressor.rb +0 -202
  139. data/lib/rumale/tree/node.rb +0 -39
  140. data/lib/rumale/utils.rb +0 -42
  141. data/lib/rumale/validation.rb +0 -128
  142. data/lib/rumale/values.rb +0 -13
@@ -1,201 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'lbfgsb'
4
-
5
- require 'rumale/linear_model/base_sgd'
6
- require 'rumale/base/regressor'
7
-
8
- module Rumale
9
- module LinearModel
10
- # LinearRegression is a class that implements ordinary least square linear regression
11
- # with stochastic gradient descent (SGD) optimization,
12
- # singular value decomposition (SVD), or L-BFGS optimization.
13
- #
14
- # @example
15
- # estimator =
16
- # Rumale::LinearModel::LinearRegression.new(max_iter: 1000, batch_size: 20, random_seed: 1)
17
- # estimator.fit(training_samples, traininig_values)
18
- # results = estimator.predict(testing_samples)
19
- #
20
- # # If Numo::Linalg is installed, you can specify 'svd' for the solver option.
21
- # require 'numo/linalg/autoloader'
22
- # estimator = Rumale::LinearModel::LinearRegression.new(solver: 'svd')
23
- # estimator.fit(training_samples, traininig_values)
24
- # results = estimator.predict(testing_samples)
25
- #
26
- # *Reference*
27
- # - Bottou, L., "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
28
- class LinearRegression < BaseSGD
29
- include Base::Regressor
30
-
31
- # Return the weight vector.
32
- # @return [Numo::DFloat] (shape: [n_outputs, n_features])
33
- attr_reader :weight_vec
34
-
35
- # Return the bias term (a.k.a. intercept).
36
- # @return [Numo::DFloat] (shape: [n_outputs])
37
- attr_reader :bias_term
38
-
39
- # Return the random generator for random sampling.
40
- # @return [Random]
41
- attr_reader :rng
42
-
43
- # Create a new ordinary least square linear regressor.
44
- #
45
- # @param learning_rate [Float] The initial value of learning rate.
46
- # The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
47
- # If solver is not 'sgd', this parameter is ignored.
48
- # @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
49
- # If nil is given, the decay sets to 'learning_rate'.
50
- # If solver is not 'sgd', this parameter is ignored.
51
- # @param momentum [Float] The momentum factor.
52
- # If solver is not 'sgd', this parameter is ignored.
53
- # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
54
- # @param bias_scale [Float] The scale of the bias term.
55
- # @param max_iter [Integer] The maximum number of epochs that indicates
56
- # how many times the whole data is given to the training process.
57
- # If solver is 'svd', this parameter is ignored.
58
- # @param batch_size [Integer] The size of the mini batches.
59
- # If solver is not 'sgd', this parameter is ignored.
60
- # @param tol [Float] The tolerance of loss for terminating optimization.
61
- # If solver is 'svd', this parameter is ignored.
62
- # @param solver [String] The algorithm to calculate weights. ('auto', 'sgd', 'svd' or 'lbfgs').
63
- # 'auto' chooses the 'svd' solver if Numo::Linalg is loaded. Otherwise, it chooses the 'lbfgs' solver.
64
- # 'sgd' uses the stochastic gradient descent optimization.
65
- # 'svd' performs singular value decomposition of samples.
66
- # 'lbfgs' uses the L-BFGS method for optimization.
67
- # @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
68
- # If nil is given, the method does not execute in parallel.
69
- # If zero or less is given, it becomes equal to the number of processors.
70
- # This parameter is ignored if the Parallel gem is not loaded or solver is not 'sgd'.
71
- # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
72
- # If solver is 'svd', this parameter is ignored.
73
- # @param random_seed [Integer] The seed value using to initialize the random generator.
74
- def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
75
- fit_bias: true, bias_scale: 1.0, max_iter: 1000, batch_size: 50, tol: 1e-4,
76
- solver: 'auto',
77
- n_jobs: nil, verbose: false, random_seed: nil)
78
- check_params_numeric(learning_rate: learning_rate, momentum: momentum,
79
- bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
80
- check_params_boolean(fit_bias: fit_bias, verbose: verbose)
81
- check_params_string(solver: solver)
82
- check_params_numeric_or_nil(decay: decay, n_jobs: n_jobs, random_seed: random_seed)
83
- check_params_positive(learning_rate: learning_rate, max_iter: max_iter, batch_size: batch_size)
84
- super()
85
- @params.merge!(method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h)
86
- @params[:solver] = if solver == 'auto'
87
- enable_linalg?(warning: false) ? 'svd' : 'lbfgs'
88
- else
89
- solver.match?(/^svd$|^sgd$|^lbfgs$/) ? solver : 'lbfgs'
90
- end
91
- @params[:decay] ||= @params[:learning_rate]
92
- @params[:random_seed] ||= srand
93
- @rng = Random.new(@params[:random_seed])
94
- @loss_func = LinearModel::Loss::MeanSquaredError.new
95
- @weight_vec = nil
96
- @bias_term = nil
97
- end
98
-
99
- # Fit the model with given training data.
100
- #
101
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
102
- # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
103
- # @return [LinearRegression] The learned regressor itself.
104
- def fit(x, y)
105
- x = check_convert_sample_array(x)
106
- y = check_convert_tvalue_array(y)
107
- check_sample_tvalue_size(x, y)
108
-
109
- if @params[:solver] == 'svd' && enable_linalg?(warning: false)
110
- fit_svd(x, y)
111
- elsif @params[:solver] == 'lbfgs'
112
- fit_lbfgs(x, y)
113
- else
114
- fit_sgd(x, y)
115
- end
116
-
117
- self
118
- end
119
-
120
- # Predict values for samples.
121
- #
122
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
123
- # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
124
- def predict(x)
125
- x = check_convert_sample_array(x)
126
- x.dot(@weight_vec.transpose) + @bias_term
127
- end
128
-
129
- private
130
-
131
- def fit_svd(x, y)
132
- x = expand_feature(x) if fit_bias?
133
- w = Numo::Linalg.pinv(x, driver: 'svd').dot(y)
134
- @weight_vec, @bias_term = single_target?(y) ? split_weight(w) : split_weight_mult(w)
135
- end
136
-
137
- def fit_lbfgs(x, y)
138
- fnc = proc do |w, x, y| # rubocop:disable Lint/ShadowingOuterLocalVariable
139
- n_samples, n_features = x.shape
140
- w = w.reshape(y.shape[1], n_features) unless y.shape[1].nil?
141
- z = x.dot(w.transpose)
142
- d = z - y
143
- loss = (d**2).sum.fdiv(n_samples)
144
- gradient = 2.fdiv(n_samples) * d.transpose.dot(x)
145
- [loss, gradient.flatten.dup]
146
- end
147
-
148
- x = expand_feature(x) if fit_bias?
149
-
150
- n_features = x.shape[1]
151
- n_outputs = single_target?(y) ? 1 : y.shape[1]
152
-
153
- res = Lbfgsb.minimize(
154
- fnc: fnc, jcb: true, x_init: init_weight(n_features, n_outputs), args: [x, y],
155
- maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON,
156
- verbose: @params[:verbose] ? 1 : -1
157
- )
158
-
159
- @weight_vec, @bias_term =
160
- if single_target?(y)
161
- split_weight(res[:x])
162
- else
163
- split_weight_mult(res[:x].reshape(n_outputs, n_features).transpose)
164
- end
165
- end
166
-
167
- def fit_sgd(x, y)
168
- if single_target?(y)
169
- @weight_vec, @bias_term = partial_fit(x, y)
170
- else
171
- n_outputs = y.shape[1]
172
- n_features = x.shape[1]
173
- @weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
174
- @bias_term = Numo::DFloat.zeros(n_outputs)
175
- if enable_parallel?
176
- models = parallel_map(n_outputs) { |n| partial_fit(x, y[true, n]) }
177
- n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
178
- else
179
- n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
180
- end
181
- end
182
- end
183
-
184
- def single_target?(y)
185
- y.ndim == 1
186
- end
187
-
188
- def init_weight(n_features, n_outputs)
189
- Rumale::Utils.rand_normal([n_outputs, n_features], @rng.dup).flatten.dup
190
- end
191
-
192
- def split_weight_mult(w)
193
- if fit_bias?
194
- [w[0...-1, true].dup, w[-1, true].dup]
195
- else
196
- [w.dup, Numo::DFloat.zeros(w.shape[1])]
197
- end
198
- end
199
- end
200
- end
201
- end
@@ -1,275 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'lbfgsb'
4
- require 'rumale/base/classifier'
5
- require 'rumale/linear_model/base_sgd'
6
- require 'rumale/preprocessing/label_binarizer'
7
-
8
- module Rumale
9
- module LinearModel
10
- # LogisticRegression is a class that implements Logistic Regression.
11
- # In multiclass classification problem, it uses one-vs-the-rest strategy for the sgd solver
12
- # and multinomial logistic regression for the lbfgs solver.
13
- #
14
- # @note
15
- # Rumale::SVM provides Logistic Regression based on LIBLINEAR.
16
- # If you prefer execution speed, you should use Rumale::SVM::LogisticRegression.
17
- # https://github.com/yoshoku/rumale-svm
18
- #
19
- # @example
20
- # estimator =
21
- # Rumale::LinearModel::LogisticRegression.new(reg_param: 1.0, random_seed: 1)
22
- # estimator.fit(training_samples, traininig_labels)
23
- # results = estimator.predict(testing_samples)
24
- #
25
- # *Reference*
26
- # - Shalev-Shwartz, S., Singer, Y., Srebro, N., and Cotter, A., "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
27
- # - Tsuruoka, Y., Tsujii, J., and Ananiadou, S., "Stochastic Gradient Descent Training for L1-regularized Log-linear Models with Cumulative Penalty," Proc. ACL'09, pp. 477--485, 2009.
28
- # - Bottou, L., "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
29
- class LogisticRegression < BaseSGD
30
- include Base::Classifier
31
-
32
- # Return the weight vector for Logistic Regression.
33
- # @return [Numo::DFloat] (shape: [n_classes, n_features])
34
- attr_reader :weight_vec
35
-
36
- # Return the bias term (a.k.a. intercept) for Logistic Regression.
37
- # @return [Numo::DFloat] (shape: [n_classes])
38
- attr_reader :bias_term
39
-
40
- # Return the class labels.
41
- # @return [Numo::Int32] (shape: [n_classes])
42
- attr_reader :classes
43
-
44
- # Return the random generator for performing random sampling.
45
- # @return [Random]
46
- attr_reader :rng
47
-
48
- # Create a new classifier with Logisitc Regression.
49
- #
50
- # @param learning_rate [Float] The initial value of learning rate.
51
- # The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
52
- # If solver = 'lbfgs', this parameter is ignored.
53
- # @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
54
- # If nil is given, the decay sets to 'reg_param * learning_rate'.
55
- # If solver = 'lbfgs', this parameter is ignored.
56
- # @param momentum [Float] The momentum factor.
57
- # If solver = 'lbfgs', this parameter is ignored.
58
- # @param penalty [String] The regularization type to be used ('l1', 'l2', and 'elasticnet').
59
- # If solver = 'lbfgs', only 'l2' can be selected for this parameter.
60
- # @param l1_ratio [Float] The elastic-net type regularization mixing parameter.
61
- # If penalty set to 'l2' or 'l1', this parameter is ignored.
62
- # If l1_ratio = 1, the regularization is similar to Lasso.
63
- # If l1_ratio = 0, the regularization is similar to Ridge.
64
- # If 0 < l1_ratio < 1, the regularization is a combination of L1 and L2.
65
- # If solver = 'lbfgs', this parameter is ignored.
66
- # @param reg_param [Float] The regularization parameter.
67
- # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
68
- # @param bias_scale [Float] The scale of the bias term.
69
- # If fit_bias is true, the feature vector v becoms [v; bias_scale].
70
- # @param max_iter [Integer] The maximum number of epochs that indicates
71
- # how many times the whole data is given to the training process.
72
- # @param batch_size [Integer] The size of the mini batches.
73
- # If solver = 'lbfgs', this parameter is ignored.
74
- # @param tol [Float] The tolerance of loss for terminating optimization.
75
- # If solver = 'lbfgs', this value is given as tol / Lbfgsb::DBL_EPSILON to the factr argument of Lbfgsb.minimize method.
76
- # @param solver [String] The algorithm for optimization. ('lbfgs' or 'sgd').
77
- # 'lbfgs' uses the L-BFGS with lbfgs.rb gem.
78
- # 'sgd' uses the stochastic gradient descent optimization.
79
- # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
80
- # If nil is given, the methods do not execute in parallel.
81
- # If zero or less is given, it becomes equal to the number of processors.
82
- # This parameter is ignored if the Parallel gem is not loaded or the solver is 'lbfgs'.
83
- # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
84
- # If solver = 'lbfgs' and true is given, 'iterate.dat' file is generated by lbfgsb.rb.
85
- # @param random_seed [Integer] The seed value using to initialize the random generator.
86
- def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
87
- penalty: 'l2', reg_param: 1.0, l1_ratio: 0.5,
88
- fit_bias: true, bias_scale: 1.0,
89
- max_iter: 1000, batch_size: 50, tol: 1e-4,
90
- solver: 'lbfgs',
91
- n_jobs: nil, verbose: false, random_seed: nil)
92
- check_params_numeric(learning_rate: learning_rate, momentum: momentum,
93
- reg_param: reg_param, l1_ratio: l1_ratio, bias_scale: bias_scale,
94
- max_iter: max_iter, batch_size: batch_size, tol: tol)
95
- check_params_boolean(fit_bias: fit_bias, verbose: verbose)
96
- check_params_string(solver: solver, penalty: penalty)
97
- check_params_numeric_or_nil(decay: decay, n_jobs: n_jobs, random_seed: random_seed)
98
- check_params_positive(learning_rate: learning_rate, reg_param: reg_param,
99
- bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
100
- raise ArgumentError, "The 'lbfgs' solver supports only 'l2' penalties." if solver == 'lbfgs' && penalty != 'l2'
101
-
102
- super()
103
- @params.merge!(method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h)
104
- @params[:solver] = solver == 'sgd' ? 'sgd' : 'lbfgs'
105
- @params[:decay] ||= @params[:reg_param] * @params[:learning_rate]
106
- @params[:random_seed] ||= srand
107
- @rng = Random.new(@params[:random_seed])
108
- @penalty_type = @params[:penalty]
109
- @loss_func = LinearModel::Loss::LogLoss.new
110
- @weight_vec = nil
111
- @bias_term = nil
112
- @classes = nil
113
- end
114
-
115
- # Fit the model with given training data.
116
- #
117
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
118
- # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
119
- # @return [LogisticRegression] The learned classifier itself.
120
- def fit(x, y)
121
- x = check_convert_sample_array(x)
122
- y = check_convert_label_array(y)
123
- check_sample_label_size(x, y)
124
-
125
- @classes = Numo::Int32[*y.to_a.uniq.sort]
126
- if @params[:solver] == 'sgd'
127
- fit_sgd(x, y)
128
- else
129
- fit_lbfgs(x, y)
130
- end
131
-
132
- self
133
- end
134
-
135
- # Calculate confidence scores for samples.
136
- #
137
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
138
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
139
- def decision_function(x)
140
- x = check_convert_sample_array(x)
141
- x.dot(@weight_vec.transpose) + @bias_term
142
- end
143
-
144
- # Predict class labels for samples.
145
- #
146
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
147
- # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
148
- def predict(x)
149
- x = check_convert_sample_array(x)
150
-
151
- n_samples, = x.shape
152
- decision_values = predict_proba(x)
153
- predicted = if enable_parallel?
154
- parallel_map(n_samples) { |n| @classes[decision_values[n, true].max_index] }
155
- else
156
- Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] }
157
- end
158
- Numo::Int32.asarray(predicted)
159
- end
160
-
161
- # Predict probability for samples.
162
- #
163
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
164
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
165
- def predict_proba(x)
166
- x = check_convert_sample_array(x)
167
-
168
- proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
169
- return (proba.transpose / proba.sum(axis: 1)).transpose.dup if multiclass_problem?
170
-
171
- n_samples, = x.shape
172
- probs = Numo::DFloat.zeros(n_samples, 2)
173
- probs[true, 1] = proba
174
- probs[true, 0] = 1.0 - proba
175
- probs
176
- end
177
-
178
- private
179
-
180
- def multiclass_problem?
181
- @classes.size > 2
182
- end
183
-
184
- def fit_lbfgs(base_x, base_y) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
185
- if multiclass_problem?
186
- fnc = proc do |w, x, y, a|
187
- n_features = x.shape[1]
188
- n_classes = y.shape[1]
189
- z = x.dot(w.reshape(n_classes, n_features).transpose)
190
- # logsumexp and softmax
191
- z_max = z.max(-1).expand_dims(-1).dup
192
- z_max[~z_max.isfinite] = 0.0
193
- lgsexp = Numo::NMath.log(Numo::NMath.exp(z - z_max).sum(-1)).expand_dims(-1) + z_max
194
- t = z - lgsexp
195
- sftmax = Numo::NMath.exp(t)
196
- # loss and gradient
197
- loss = -(y * t).sum + 0.5 * a * w.dot(w)
198
- grad = (sftmax - y).transpose.dot(x).flatten.dup + a * w
199
- [loss, grad]
200
- end
201
-
202
- base_x = expand_feature(base_x) if fit_bias?
203
- encoder = Rumale::Preprocessing::LabelBinarizer.new
204
- onehot_y = encoder.fit_transform(base_y)
205
- n_classes = @classes.size
206
- n_features = base_x.shape[1]
207
- w_init = Numo::DFloat.zeros(n_classes * n_features)
208
-
209
- verbose = @params[:verbose] ? 1 : -1
210
- res = Lbfgsb.minimize(
211
- fnc: fnc, jcb: true, x_init: w_init, args: [base_x, onehot_y, @params[:reg_param]],
212
- maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: verbose
213
- )
214
-
215
- if fit_bias?
216
- weight = res[:x].reshape(n_classes, n_features)
217
- @weight_vec = weight[true, 0...-1].dup
218
- @bias_term = weight[true, -1].dup
219
- else
220
- @weight_vec = res[:x].reshape(n_classes, n_features)
221
- @bias_term = Numo::DFloat.zeros(n_classes)
222
- end
223
- else
224
- fnc = proc do |w, x, y, a|
225
- z = 1 + Numo::NMath.exp(-y * x.dot(w))
226
- loss = Numo::NMath.log(z).sum + 0.5 * a * w.dot(w)
227
- grad = (y / z - y).dot(x) + a * w
228
- [loss, grad]
229
- end
230
-
231
- base_x = expand_feature(base_x) if fit_bias?
232
- negative_label = @classes[0]
233
- bin_y = Numo::Int32.cast(base_y.ne(negative_label)) * 2 - 1
234
- n_features = base_x.shape[1]
235
- w_init = Numo::DFloat.zeros(n_features)
236
-
237
- verbose = @params[:verbose] ? 1 : -1
238
- res = Lbfgsb.minimize(
239
- fnc: fnc, jcb: true, x_init: w_init, args: [base_x, bin_y, @params[:reg_param]],
240
- maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: verbose
241
- )
242
-
243
- @weight_vec, @bias_term = split_weight(res[:x])
244
- end
245
- end
246
-
247
- def fit_sgd(x, y)
248
- if multiclass_problem?
249
- n_classes = @classes.size
250
- n_features = x.shape[1]
251
- @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
252
- @bias_term = Numo::DFloat.zeros(n_classes)
253
- if enable_parallel?
254
- # :nocov:
255
- models = parallel_map(n_classes) do |n|
256
- bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
257
- partial_fit(x, bin_y)
258
- end
259
- # :nocov:
260
- n_classes.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
261
- else
262
- n_classes.times do |n|
263
- bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
264
- @weight_vec[n, true], @bias_term[n] = partial_fit(x, bin_y)
265
- end
266
- end
267
- else
268
- negative_label = @classes[0]
269
- bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
270
- @weight_vec, @bias_term = partial_fit(x, bin_y)
271
- end
272
- end
273
- end
274
- end
275
- end
@@ -1,137 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'lbfgsb'
4
-
5
- require 'rumale/base/base_estimator'
6
- require 'rumale/base/regressor'
7
-
8
- module Rumale
9
- module LinearModel
10
- # NNLS is a class that implements non-negative least squares regression.
11
- # NNLS solves least squares problem under non-negative constraints on the coefficient using L-BFGS-B method.
12
- #
13
- # @example
14
- # estimator = Rumale::LinearModel::NNLS.new(reg_param: 0.01, random_seed: 1)
15
- # estimator.fit(training_samples, traininig_values)
16
- # results = estimator.predict(testing_samples)
17
- #
18
- class NNLS
19
- include Base::BaseEstimator
20
- include Base::Regressor
21
-
22
- # Return the weight vector.
23
- # @return [Numo::DFloat] (shape: [n_outputs, n_features])
24
- attr_reader :weight_vec
25
-
26
- # Return the bias term (a.k.a. intercept).
27
- # @return [Numo::DFloat] (shape: [n_outputs])
28
- attr_reader :bias_term
29
-
30
- # Returns the number of iterations when converged.
31
- # @return [Integer]
32
- attr_reader :n_iter
33
-
34
- # Return the random generator for initializing weight.
35
- # @return [Random]
36
- attr_reader :rng
37
-
38
- # Create a new regressor with non-negative least squares method.
39
- #
40
- # @param reg_param [Float] The regularization parameter for L2 regularization term.
41
- # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
42
- # @param bias_scale [Float] The scale of the bias term.
43
- # @param max_iter [Integer] The maximum number of epochs that indicates
44
- # how many times the whole data is given to the training process.
45
- # @param tol [Float] The tolerance of loss for terminating optimization.
46
- # If solver = 'svd', this parameter is ignored.
47
- # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
48
- # @param random_seed [Integer] The seed value using to initialize the random generator.
49
- def initialize(reg_param: 1.0, fit_bias: true, bias_scale: 1.0,
50
- max_iter: 1000, tol: 1e-4, verbose: false, random_seed: nil)
51
- check_params_numeric(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, tol: tol)
52
- check_params_boolean(fit_bias: fit_bias, verbose: verbose)
53
- check_params_numeric_or_nil(random_seed: random_seed)
54
- check_params_positive(reg_param: reg_param, max_iter: max_iter)
55
- @params = method(:initialize).parameters.each_with_object({}) { |(_, prm), obj| obj[prm] = binding.local_variable_get(prm) }
56
- @params[:random_seed] ||= srand
57
- @n_iter = nil
58
- @weight_vec = nil
59
- @bias_term = nil
60
- @rng = Random.new(@params[:random_seed])
61
- end
62
-
63
- # Fit the model with given training data.
64
- #
65
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
66
- # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
67
- # @return [NonneagtiveLeastSquare] The learned regressor itself.
68
- def fit(x, y)
69
- x = check_convert_sample_array(x)
70
- y = check_convert_tvalue_array(y)
71
- check_sample_tvalue_size(x, y)
72
-
73
- x = expand_feature(x) if fit_bias?
74
-
75
- n_features = x.shape[1]
76
- n_outputs = single_target?(y) ? 1 : y.shape[1]
77
-
78
- w_init = Rumale::Utils.rand_normal([n_outputs, n_features], @rng.dup).flatten.dup
79
- w_init[w_init.lt(0)] = 0
80
- bounds = Numo::DFloat.zeros(n_outputs * n_features, 2)
81
- bounds.shape[0].times { |n| bounds[n, 1] = Float::INFINITY }
82
-
83
- res = Lbfgsb.minimize(
84
- fnc: method(:nnls_fnc), jcb: true, x_init: w_init, args: [x, y, @params[:reg_param]], bounds: bounds,
85
- maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: @params[:verbose] ? 1 : -1
86
- )
87
-
88
- @n_iter = res[:n_iter]
89
- w = single_target?(y) ? res[:x] : res[:x].reshape(n_outputs, n_features).transpose
90
-
91
- if fit_bias?
92
- @weight_vec = single_target?(y) ? w[0...-1].dup : w[0...-1, true].dup
93
- @bias_term = single_target?(y) ? w[-1] : w[-1, true].dup
94
- else
95
- @weight_vec = w.dup
96
- @bias_term = single_target?(y) ? 0 : Numo::DFloat.zeros(y.shape[1])
97
- end
98
-
99
- self
100
- end
101
-
102
- # Predict values for samples.
103
- #
104
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
105
- # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
106
- def predict(x)
107
- x = check_convert_sample_array(x)
108
- x.dot(@weight_vec.transpose) + @bias_term
109
- end
110
-
111
- private
112
-
113
- def nnls_fnc(w, x, y, alpha)
114
- n_samples, n_features = x.shape
115
- w = w.reshape(y.shape[1], n_features) unless y.shape[1].nil?
116
- z = x.dot(w.transpose)
117
- d = z - y
118
- loss = (d**2).sum.fdiv(n_samples) + alpha * (w * w).sum
119
- gradient = 2.fdiv(n_samples) * d.transpose.dot(x) + 2.0 * alpha * w
120
- [loss, gradient.flatten.dup]
121
- end
122
-
123
- def expand_feature(x)
124
- n_samples = x.shape[0]
125
- Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * @params[:bias_scale]])
126
- end
127
-
128
- def fit_bias?
129
- @params[:fit_bias] == true
130
- end
131
-
132
- def single_target?(y)
133
- y.ndim == 1
134
- end
135
- end
136
- end
137
- end