rumale 0.13.8 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/README.md +8 -10
- data/lib/rumale.rb +3 -0
- data/lib/rumale/base/classifier.rb +2 -2
- data/lib/rumale/base/cluster_analyzer.rb +2 -2
- data/lib/rumale/base/regressor.rb +2 -2
- data/lib/rumale/clustering/dbscan.rb +3 -4
- data/lib/rumale/clustering/gaussian_mixture.rb +5 -6
- data/lib/rumale/clustering/hdbscan.rb +4 -4
- data/lib/rumale/clustering/k_means.rb +5 -6
- data/lib/rumale/clustering/k_medoids.rb +5 -6
- data/lib/rumale/clustering/power_iteration.rb +4 -6
- data/lib/rumale/clustering/single_linkage.rb +3 -3
- data/lib/rumale/clustering/snn.rb +1 -1
- data/lib/rumale/clustering/spectral_clustering.rb +4 -6
- data/lib/rumale/dataset.rb +6 -10
- data/lib/rumale/decomposition/factor_analysis.rb +4 -4
- data/lib/rumale/decomposition/fast_ica.rb +6 -7
- data/lib/rumale/decomposition/nmf.rb +6 -7
- data/lib/rumale/decomposition/pca.rb +6 -7
- data/lib/rumale/ensemble/ada_boost_classifier.rb +8 -8
- data/lib/rumale/ensemble/ada_boost_regressor.rb +7 -7
- data/lib/rumale/ensemble/extra_trees_classifier.rb +8 -8
- data/lib/rumale/ensemble/extra_trees_regressor.rb +7 -7
- data/lib/rumale/ensemble/gradient_boosting_classifier.rb +8 -8
- data/lib/rumale/ensemble/gradient_boosting_regressor.rb +8 -8
- data/lib/rumale/ensemble/random_forest_classifier.rb +8 -8
- data/lib/rumale/ensemble/random_forest_regressor.rb +7 -7
- data/lib/rumale/evaluation_measure/accuracy.rb +2 -2
- data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +2 -2
- data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +2 -2
- data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +2 -2
- data/lib/rumale/evaluation_measure/explained_variance_score.rb +2 -2
- data/lib/rumale/evaluation_measure/f_score.rb +2 -2
- data/lib/rumale/evaluation_measure/log_loss.rb +2 -2
- data/lib/rumale/evaluation_measure/mean_absolute_error.rb +2 -2
- data/lib/rumale/evaluation_measure/mean_squared_error.rb +2 -2
- data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +2 -2
- data/lib/rumale/evaluation_measure/median_absolute_error.rb +2 -2
- data/lib/rumale/evaluation_measure/mutual_information.rb +2 -2
- data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +2 -2
- data/lib/rumale/evaluation_measure/precision.rb +2 -2
- data/lib/rumale/evaluation_measure/purity.rb +2 -2
- data/lib/rumale/evaluation_measure/r2_score.rb +2 -2
- data/lib/rumale/evaluation_measure/recall.rb +2 -2
- data/lib/rumale/evaluation_measure/roc_auc.rb +6 -3
- data/lib/rumale/evaluation_measure/silhouette_score.rb +2 -2
- data/lib/rumale/kernel_approximation/rbf.rb +5 -6
- data/lib/rumale/kernel_machine/kernel_pca.rb +4 -4
- data/lib/rumale/kernel_machine/kernel_ridge.rb +3 -3
- data/lib/rumale/kernel_machine/kernel_svc.rb +7 -8
- data/lib/rumale/linear_model/lasso.rb +5 -6
- data/lib/rumale/linear_model/linear_regression.rb +5 -6
- data/lib/rumale/linear_model/logistic_regression.rb +16 -15
- data/lib/rumale/linear_model/ridge.rb +5 -6
- data/lib/rumale/linear_model/svc.rb +34 -28
- data/lib/rumale/linear_model/svr.rb +5 -6
- data/lib/rumale/manifold/mds.rb +3 -4
- data/lib/rumale/manifold/tsne.rb +3 -5
- data/lib/rumale/model_selection/cross_validation.rb +6 -5
- data/lib/rumale/model_selection/grid_search_cv.rb +6 -6
- data/lib/rumale/model_selection/k_fold.rb +3 -3
- data/lib/rumale/model_selection/shuffle_split.rb +3 -5
- data/lib/rumale/model_selection/stratified_k_fold.rb +4 -4
- data/lib/rumale/model_selection/stratified_shuffle_split.rb +4 -6
- data/lib/rumale/multiclass/one_vs_rest_classifier.rb +4 -4
- data/lib/rumale/naive_bayes/naive_bayes.rb +14 -14
- data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +5 -5
- data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +4 -4
- data/lib/rumale/neural_network/base_mlp.rb +244 -0
- data/lib/rumale/neural_network/mlp_classifier.rb +119 -0
- data/lib/rumale/neural_network/mlp_regressor.rb +89 -0
- data/lib/rumale/optimizer/ada_grad.rb +1 -1
- data/lib/rumale/optimizer/adam.rb +3 -3
- data/lib/rumale/optimizer/nadam.rb +1 -1
- data/lib/rumale/optimizer/rmsprop.rb +1 -1
- data/lib/rumale/optimizer/sgd.rb +1 -1
- data/lib/rumale/optimizer/yellow_fin.rb +1 -2
- data/lib/rumale/pairwise_metric.rb +17 -19
- data/lib/rumale/pipeline/pipeline.rb +10 -10
- data/lib/rumale/polynomial_model/factorization_machine_classifier.rb +29 -21
- data/lib/rumale/polynomial_model/factorization_machine_regressor.rb +6 -6
- data/lib/rumale/preprocessing/bin_discretizer.rb +3 -3
- data/lib/rumale/preprocessing/l2_normalizer.rb +2 -2
- data/lib/rumale/preprocessing/label_binarizer.rb +2 -2
- data/lib/rumale/preprocessing/label_encoder.rb +1 -1
- data/lib/rumale/preprocessing/max_abs_scaler.rb +3 -3
- data/lib/rumale/preprocessing/min_max_scaler.rb +3 -3
- data/lib/rumale/preprocessing/one_hot_encoder.rb +4 -3
- data/lib/rumale/preprocessing/ordinal_encoder.rb +1 -1
- data/lib/rumale/preprocessing/standard_scaler.rb +3 -3
- data/lib/rumale/tree/base_decision_tree.rb +1 -1
- data/lib/rumale/tree/decision_tree_classifier.rb +7 -7
- data/lib/rumale/tree/decision_tree_regressor.rb +6 -6
- data/lib/rumale/tree/extra_tree_classifier.rb +7 -7
- data/lib/rumale/tree/extra_tree_regressor.rb +6 -6
- data/lib/rumale/tree/gradient_tree_regressor.rb +9 -9
- data/lib/rumale/validation.rb +32 -2
- data/lib/rumale/version.rb +1 -1
- data/rumale.gemspec +7 -7
- metadata +11 -7
@@ -36,9 +36,9 @@ module Rumale
|
|
36
36
|
# @param shuffle [Boolean] The flag indicating whether to shuffle the dataset.
|
37
37
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
38
38
|
def initialize(n_splits: 3, shuffle: false, random_seed: nil)
|
39
|
-
|
39
|
+
check_params_numeric(n_splits: n_splits)
|
40
40
|
check_params_boolean(shuffle: shuffle)
|
41
|
-
|
41
|
+
check_params_numeric_or_nil(random_seed: random_seed)
|
42
42
|
check_params_positive(n_splits: n_splits)
|
43
43
|
@n_splits = n_splits
|
44
44
|
@shuffle = shuffle
|
@@ -56,8 +56,8 @@ module Rumale
|
|
56
56
|
# The labels to be used to generate data indices for stratified K-fold cross validation.
|
57
57
|
# @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
|
58
58
|
def split(x, y)
|
59
|
-
|
60
|
-
|
59
|
+
x = check_convert_sample_array(x)
|
60
|
+
y = check_convert_label_array(y)
|
61
61
|
check_sample_label_size(x, y)
|
62
62
|
# Check the number of samples in each class.
|
63
63
|
unless valid_n_splits?(y)
|
@@ -33,10 +33,8 @@ module Rumale
|
|
33
33
|
# @param train_size [Float] The ratio of number of samples for train data.
|
34
34
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
35
35
|
def initialize(n_splits: 3, test_size: 0.1, train_size: nil, random_seed: nil)
|
36
|
-
|
37
|
-
|
38
|
-
check_params_type_or_nil(Float, train_size: train_size)
|
39
|
-
check_params_type_or_nil(Integer, random_seed: random_seed)
|
36
|
+
check_params_numeric(n_splits: n_splits, test_size: test_size)
|
37
|
+
check_params_numeric_or_nil(train_size: train_size, random_seed: random_seed)
|
40
38
|
check_params_positive(n_splits: n_splits)
|
41
39
|
check_params_positive(test_size: test_size)
|
42
40
|
check_params_positive(train_size: train_size) unless train_size.nil?
|
@@ -57,8 +55,8 @@ module Rumale
|
|
57
55
|
# The labels to be used to generate data indices for stratified random permutation cross validation.
|
58
56
|
# @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
|
59
57
|
def split(x, y)
|
60
|
-
|
61
|
-
|
58
|
+
x = check_convert_sample_array(x)
|
59
|
+
y = check_convert_label_array(y)
|
62
60
|
check_sample_label_size(x, y)
|
63
61
|
# Initialize and check some variables.
|
64
62
|
train_sz = @train_size.nil? ? 1.0 - @test_size : @train_size
|
@@ -46,8 +46,8 @@ module Rumale
|
|
46
46
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
47
47
|
# @return [OneVsRestClassifier] The learned classifier itself.
|
48
48
|
def fit(x, y)
|
49
|
-
|
50
|
-
|
49
|
+
x = check_convert_sample_array(x)
|
50
|
+
y = check_convert_label_array(y)
|
51
51
|
check_sample_label_size(x, y)
|
52
52
|
y_arr = y.to_a
|
53
53
|
@classes = Numo::Int32.asarray(y_arr.uniq.sort)
|
@@ -63,7 +63,7 @@ module Rumale
|
|
63
63
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
64
64
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
65
65
|
def decision_function(x)
|
66
|
-
|
66
|
+
x = check_convert_sample_array(x)
|
67
67
|
n_classes = @classes.size
|
68
68
|
Numo::DFloat.asarray(Array.new(n_classes) { |m| @estimators[m].decision_function(x).to_a }).transpose
|
69
69
|
end
|
@@ -73,7 +73,7 @@ module Rumale
|
|
73
73
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
74
74
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
75
75
|
def predict(x)
|
76
|
-
|
76
|
+
x = check_convert_sample_array(x)
|
77
77
|
n_samples, = x.shape
|
78
78
|
decision_values = decision_function(x)
|
79
79
|
Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
|
@@ -16,7 +16,7 @@ module Rumale
|
|
16
16
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
17
17
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
18
18
|
def predict(x)
|
19
|
-
|
19
|
+
x = check_convert_sample_array(x)
|
20
20
|
n_samples = x.shape.first
|
21
21
|
decision_values = decision_function(x)
|
22
22
|
Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
|
@@ -27,7 +27,7 @@ module Rumale
|
|
27
27
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
|
28
28
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
|
29
29
|
def predict_log_proba(x)
|
30
|
-
|
30
|
+
x = check_convert_sample_array(x)
|
31
31
|
n_samples, = x.shape
|
32
32
|
log_likelihoods = decision_function(x)
|
33
33
|
log_likelihoods - Numo::NMath.log(Numo::NMath.exp(log_likelihoods).sum(1)).reshape(n_samples, 1)
|
@@ -38,7 +38,7 @@ module Rumale
|
|
38
38
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
39
39
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
40
40
|
def predict_proba(x)
|
41
|
-
|
41
|
+
x = check_convert_sample_array(x)
|
42
42
|
Numo::NMath.exp(predict_log_proba(x)).abs
|
43
43
|
end
|
44
44
|
end
|
@@ -78,8 +78,8 @@ module Rumale
|
|
78
78
|
# to be used for fitting the model.
|
79
79
|
# @return [GaussianNB] The learned classifier itself.
|
80
80
|
def fit(x, y)
|
81
|
-
|
82
|
-
|
81
|
+
x = check_convert_sample_array(x)
|
82
|
+
y = check_convert_label_array(y)
|
83
83
|
check_sample_label_size(x, y)
|
84
84
|
n_samples, = x.shape
|
85
85
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
@@ -94,7 +94,7 @@ module Rumale
|
|
94
94
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
95
95
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
96
96
|
def decision_function(x)
|
97
|
-
|
97
|
+
x = check_convert_sample_array(x)
|
98
98
|
n_classes = @classes.size
|
99
99
|
log_likelihoods = Array.new(n_classes) do |l|
|
100
100
|
Math.log(@class_priors[l]) - 0.5 * (
|
@@ -154,7 +154,7 @@ module Rumale
|
|
154
154
|
#
|
155
155
|
# @param smoothing_param [Float] The Laplace smoothing parameter.
|
156
156
|
def initialize(smoothing_param: 1.0)
|
157
|
-
|
157
|
+
check_params_numeric(smoothing_param: smoothing_param)
|
158
158
|
check_params_positive(smoothing_param: smoothing_param)
|
159
159
|
@params = {}
|
160
160
|
@params[:smoothing_param] = smoothing_param
|
@@ -167,8 +167,8 @@ module Rumale
|
|
167
167
|
# to be used for fitting the model.
|
168
168
|
# @return [MultinomialNB] The learned classifier itself.
|
169
169
|
def fit(x, y)
|
170
|
-
|
171
|
-
|
170
|
+
x = check_convert_sample_array(x)
|
171
|
+
y = check_convert_label_array(y)
|
172
172
|
check_sample_label_size(x, y)
|
173
173
|
n_samples, = x.shape
|
174
174
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
@@ -185,7 +185,7 @@ module Rumale
|
|
185
185
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
186
186
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
187
187
|
def decision_function(x)
|
188
|
-
|
188
|
+
x = check_convert_sample_array(x)
|
189
189
|
n_classes = @classes.size
|
190
190
|
bin_x = x.gt(0)
|
191
191
|
log_likelihoods = Array.new(n_classes) do |l|
|
@@ -243,7 +243,7 @@ module Rumale
|
|
243
243
|
# @param smoothing_param [Float] The Laplace smoothing parameter.
|
244
244
|
# @param bin_threshold [Float] The threshold for binarizing of features.
|
245
245
|
def initialize(smoothing_param: 1.0, bin_threshold: 0.0)
|
246
|
-
|
246
|
+
check_params_numeric(smoothing_param: smoothing_param, bin_threshold: bin_threshold)
|
247
247
|
check_params_positive(smoothing_param: smoothing_param)
|
248
248
|
@params = {}
|
249
249
|
@params[:smoothing_param] = smoothing_param
|
@@ -257,8 +257,8 @@ module Rumale
|
|
257
257
|
# to be used for fitting the model.
|
258
258
|
# @return [BernoulliNB] The learned classifier itself.
|
259
259
|
def fit(x, y)
|
260
|
-
|
261
|
-
|
260
|
+
x = check_convert_sample_array(x)
|
261
|
+
y = check_convert_label_array(y)
|
262
262
|
check_sample_label_size(x, y)
|
263
263
|
n_samples, = x.shape
|
264
264
|
bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
|
@@ -278,7 +278,7 @@ module Rumale
|
|
278
278
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
279
279
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
280
280
|
def decision_function(x)
|
281
|
-
|
281
|
+
x = check_convert_sample_array(x)
|
282
282
|
n_classes = @classes.size
|
283
283
|
bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
|
284
284
|
not_bin_x = Numo::DFloat[*x.le(@params[:bin_threshold])]
|
@@ -35,7 +35,7 @@ module Rumale
|
|
35
35
|
#
|
36
36
|
# @param n_neighbors [Integer] The number of neighbors.
|
37
37
|
def initialize(n_neighbors: 5)
|
38
|
-
|
38
|
+
check_params_numeric(n_neighbors: n_neighbors)
|
39
39
|
check_params_positive(n_neighbors: n_neighbors)
|
40
40
|
@params = {}
|
41
41
|
@params[:n_neighbors] = n_neighbors
|
@@ -50,8 +50,8 @@ module Rumale
|
|
50
50
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
51
51
|
# @return [KNeighborsClassifier] The learned classifier itself.
|
52
52
|
def fit(x, y)
|
53
|
-
|
54
|
-
|
53
|
+
x = check_convert_sample_array(x)
|
54
|
+
y = check_convert_label_array(y)
|
55
55
|
check_sample_label_size(x, y)
|
56
56
|
@prototypes = Numo::DFloat.asarray(x.to_a)
|
57
57
|
@labels = Numo::Int32.asarray(y.to_a)
|
@@ -64,7 +64,7 @@ module Rumale
|
|
64
64
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
65
65
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
66
66
|
def decision_function(x)
|
67
|
-
|
67
|
+
x = check_convert_sample_array(x)
|
68
68
|
distance_matrix = PairwiseMetric.euclidean_distance(x, @prototypes)
|
69
69
|
n_samples, n_prototypes = distance_matrix.shape
|
70
70
|
n_classes = @classes.size
|
@@ -82,7 +82,7 @@ module Rumale
|
|
82
82
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
83
83
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
84
84
|
def predict(x)
|
85
|
-
|
85
|
+
x = check_convert_sample_array(x)
|
86
86
|
n_samples = x.shape.first
|
87
87
|
decision_values = decision_function(x)
|
88
88
|
Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
|
@@ -30,7 +30,7 @@ module Rumale
|
|
30
30
|
#
|
31
31
|
# @param n_neighbors [Integer] The number of neighbors.
|
32
32
|
def initialize(n_neighbors: 5)
|
33
|
-
|
33
|
+
check_params_numeric(n_neighbors: n_neighbors)
|
34
34
|
check_params_positive(n_neighbors: n_neighbors)
|
35
35
|
@params = {}
|
36
36
|
@params[:n_neighbors] = n_neighbors
|
@@ -44,8 +44,8 @@ module Rumale
|
|
44
44
|
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
45
45
|
# @return [KNeighborsRegressor] The learned regressor itself.
|
46
46
|
def fit(x, y)
|
47
|
-
|
48
|
-
|
47
|
+
x = check_convert_sample_array(x)
|
48
|
+
y = check_convert_tvalue_array(y)
|
49
49
|
check_sample_tvalue_size(x, y)
|
50
50
|
@prototypes = x.dup
|
51
51
|
@values = y.dup
|
@@ -57,7 +57,7 @@ module Rumale
|
|
57
57
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
58
58
|
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
|
59
59
|
def predict(x)
|
60
|
-
|
60
|
+
x = check_convert_sample_array(x)
|
61
61
|
# Initialize some variables.
|
62
62
|
n_samples, = x.shape
|
63
63
|
n_prototypes, n_outputs = @values.shape
|
@@ -0,0 +1,244 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
# This module consists of the modules and classes for implementation multi-layer perceptron estimator.
|
7
|
+
module NeuralNetwork
|
8
|
+
# @!visibility private
|
9
|
+
# This module consists of the classes that implement layer functions of neural network.
|
10
|
+
module Layer
|
11
|
+
# @!visibility private
|
12
|
+
# Affine is a class that calculates the linear transform.
|
13
|
+
# This class is used internally.
|
14
|
+
class Affine
|
15
|
+
# @!visibility private
|
16
|
+
def initialize(n_inputs: nil, n_outputs: nil, optimizer: nil, rng: nil)
|
17
|
+
@weight = 0.01 * Rumale::Utils.rand_normal([n_inputs, n_outputs], rng)
|
18
|
+
@bias = Numo::DFloat.zeros(n_outputs)
|
19
|
+
@optimizer_weight = optimizer.dup
|
20
|
+
@optimizer_bias = optimizer.dup
|
21
|
+
end
|
22
|
+
|
23
|
+
# @!visibility private
|
24
|
+
def forward(x)
|
25
|
+
out = x.dot(@weight) + @bias
|
26
|
+
|
27
|
+
backward = proc do |dout|
|
28
|
+
dx = dout.dot(@weight.transpose)
|
29
|
+
dw = x.transpose.dot(dout)
|
30
|
+
db = dout.sum(0)
|
31
|
+
|
32
|
+
@weight = @optimizer_weight.call(@weight, dw)
|
33
|
+
@bias = @optimizer_bias.call(@bias, db)
|
34
|
+
|
35
|
+
dx
|
36
|
+
end
|
37
|
+
|
38
|
+
[out, backward]
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# @!visibility private
|
43
|
+
# Dropout is a class that performs dropout regularization.
|
44
|
+
# This class is used internally.
|
45
|
+
class Dropout
|
46
|
+
# @!visibility private
|
47
|
+
def initialize(rate: 0.3, rng: nil)
|
48
|
+
@rate = rate
|
49
|
+
@rng = rng
|
50
|
+
end
|
51
|
+
|
52
|
+
# @!visibility private
|
53
|
+
def forward(x)
|
54
|
+
rand_mat = Rumale::Utils.rand_uniform(x.shape, @rng)
|
55
|
+
mask = rand_mat.ge(@rate)
|
56
|
+
out = x * mask
|
57
|
+
out *= 1.fdiv(1 - @rate) if @rate < 1.0
|
58
|
+
|
59
|
+
backward = proc { |dout| dout * mask }
|
60
|
+
|
61
|
+
[out, backward]
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# @!visibility private
|
66
|
+
# ReLU is a class that calculates rectified linear function.
|
67
|
+
# This class is used internally.
|
68
|
+
class Relu
|
69
|
+
# @!visibility private
|
70
|
+
def forward(x)
|
71
|
+
mask = x.gt(0)
|
72
|
+
out = x * mask
|
73
|
+
|
74
|
+
backward = proc { |dout| dout * mask }
|
75
|
+
|
76
|
+
[out, backward]
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# @!visibility private
|
82
|
+
# This module consists of the classes that implement loss function for neural network.
|
83
|
+
module Loss
|
84
|
+
# @!visibility private
|
85
|
+
# MeanSquaredError is a class that calculates mean squared error for regression task.
|
86
|
+
# This class is used internally.
|
87
|
+
class MeanSquaredError
|
88
|
+
# @!visibility private
|
89
|
+
def call(out, y)
|
90
|
+
sz_batch = y.shape[0]
|
91
|
+
diff = out - y
|
92
|
+
loss = (diff**2).sum.fdiv(sz_batch)
|
93
|
+
dout = 2.fdiv(sz_batch) * diff
|
94
|
+
[loss, dout]
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
# @!visibility private
|
99
|
+
# SoftmaxCrossEntropy is a class that calculates softmax cross-entropy for classification task.
|
100
|
+
# This class is used internally.
|
101
|
+
class SoftmaxCrossEntropy
|
102
|
+
# @!visibility private
|
103
|
+
def call(out, y)
|
104
|
+
sz_batch = y.shape[0]
|
105
|
+
z = softmax(out)
|
106
|
+
loss = -(y * Numo::NMath.log(z + 1e-8)).sum.fdiv(sz_batch)
|
107
|
+
dout = (z - y) / sz_batch
|
108
|
+
[loss, dout]
|
109
|
+
end
|
110
|
+
|
111
|
+
private
|
112
|
+
|
113
|
+
def softmax(x)
|
114
|
+
clip = x.max(-1).expand_dims(-1)
|
115
|
+
exp_x = Numo::NMath.exp(x - clip)
|
116
|
+
exp_x / exp_x.sum(-1).expand_dims(-1)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
# @!visibility private
|
122
|
+
# This module consists of the classes for implementing neural network model.
|
123
|
+
module Model
|
124
|
+
# @!visibility private
|
125
|
+
attr_reader :layers
|
126
|
+
|
127
|
+
# @!visibility private
|
128
|
+
# Sequential is a class that implements linear stack model.
|
129
|
+
# This class is used internally.
|
130
|
+
class Sequential
|
131
|
+
# @!visibility private
|
132
|
+
def initialize
|
133
|
+
@layers = []
|
134
|
+
end
|
135
|
+
|
136
|
+
# @!visibility private
|
137
|
+
def push(ops)
|
138
|
+
@layers.push(ops)
|
139
|
+
self
|
140
|
+
end
|
141
|
+
|
142
|
+
# @!visibility private
|
143
|
+
def delete_dropout
|
144
|
+
@layers.delete_if { |node| node.is_a?(Layer::Dropout) }
|
145
|
+
self
|
146
|
+
end
|
147
|
+
|
148
|
+
# @!visibility private
|
149
|
+
def forward(x)
|
150
|
+
backprops = []
|
151
|
+
out = x.dup
|
152
|
+
|
153
|
+
@layers.each do |l|
|
154
|
+
out, bw = l.forward(out)
|
155
|
+
backprops.push(bw)
|
156
|
+
end
|
157
|
+
|
158
|
+
backward = proc do |dout|
|
159
|
+
backprops.reverse_each { |bw| dout = bw.call(dout) }
|
160
|
+
dout
|
161
|
+
end
|
162
|
+
|
163
|
+
[out, backward]
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
# BaseMLP is an abstract class for implementation of multi-layer peceptron estimator.
|
169
|
+
# This class is used internally.
|
170
|
+
class BaseMLP
|
171
|
+
include Base::BaseEstimator
|
172
|
+
|
173
|
+
# Create a multi-layer perceptron estimator.
|
174
|
+
#
|
175
|
+
# @param hidden_units [Array] The number of units in the i-th hidden layer.
|
176
|
+
# @param dropout_rate [Float] The rate of the units to drop.
|
177
|
+
# @param learning_rate [Float] The initial value of learning rate in Adam optimizer.
|
178
|
+
# @param decay1 [Float] The smoothing parameter for the first moment in Adam optimizer.
|
179
|
+
# @param decay2 [Float] The smoothing parameter for the second moment in Adam optimizer.
|
180
|
+
# @param max_iter [Integer] The maximum number of iterations.
|
181
|
+
# @param batch_size [Intger] The size of the mini batches.
|
182
|
+
# @param tol [Float] The tolerance of loss for terminating optimization.
|
183
|
+
# @param verbose [Boolean] The flag indicating whether to output loss during iteration.
|
184
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
185
|
+
def initialize(hidden_units: [128, 128], dropout_rate: 0.4, learning_rate: 0.001, decay1: 0.9, decay2: 0.999,
|
186
|
+
max_iter: 10000, batch_size: 50, tol: 1e-4, verbose: false, random_seed: nil)
|
187
|
+
@params = {}
|
188
|
+
@params[:hidden_units] = hidden_units
|
189
|
+
@params[:dropout_rate] = dropout_rate
|
190
|
+
@params[:learning_rate] = learning_rate
|
191
|
+
@params[:decay1] = decay1
|
192
|
+
@params[:decay2] = decay2
|
193
|
+
@params[:max_iter] = max_iter
|
194
|
+
@params[:batch_size] = batch_size
|
195
|
+
@params[:tol] = tol
|
196
|
+
@params[:verbose] = verbose
|
197
|
+
@params[:random_seed] = random_seed
|
198
|
+
@params[:random_seed] ||= srand
|
199
|
+
@n_iter = nil
|
200
|
+
@rng = Random.new(@params[:random_seed])
|
201
|
+
end
|
202
|
+
|
203
|
+
private
|
204
|
+
|
205
|
+
def buld_network(n_inputs, n_outputs, srng = nil)
|
206
|
+
adam = Rumale::Optimizer::Adam.new(learning_rate: @params[:learning_rate], decay1: @params[:decay1], decay2: @params[:decay2])
|
207
|
+
model = Model::Sequential.new
|
208
|
+
n_units = [n_inputs, *@params[:hidden_units]]
|
209
|
+
n_units.each_cons(2) do |n_in, n_out|
|
210
|
+
model.push(Layer::Affine.new(n_inputs: n_in, n_outputs: n_out, optimizer: adam, rng: srng))
|
211
|
+
model.push(Layer::Relu.new)
|
212
|
+
model.push(Layer::Dropout.new(rate: @params[:dropout_rate], rng: srng))
|
213
|
+
end
|
214
|
+
model.push(Layer::Affine.new(n_inputs: n_units[-1], n_outputs: n_outputs, optimizer: adam, rng: srng))
|
215
|
+
end
|
216
|
+
|
217
|
+
def train(x, y, network, loss_func, srng = nil)
|
218
|
+
class_name = self.class.to_s.split('::').last
|
219
|
+
|
220
|
+
n_samples = x.shape[0]
|
221
|
+
rand_ids = [*0...n_samples].shuffle(random: srng)
|
222
|
+
|
223
|
+
@params[:max_iter].times do |t|
|
224
|
+
# random sampling
|
225
|
+
subset_ids = rand_ids.shift(@params[:batch_size])
|
226
|
+
rand_ids.concat(subset_ids)
|
227
|
+
sub_x = x[subset_ids, true].dup
|
228
|
+
sub_y = y[subset_ids, true].dup
|
229
|
+
# forward
|
230
|
+
out, backward = network.forward(sub_x)
|
231
|
+
# calc loss function
|
232
|
+
loss, dout = loss_func.call(out, sub_y)
|
233
|
+
@n_iter = t + 1
|
234
|
+
puts "[#{class_name}] Loss after #{@n_iter} iterations: #{loss}" if @params[:verbose] && (@n_iter % 10).zero?
|
235
|
+
break if loss < @params[:tol]
|
236
|
+
# backward
|
237
|
+
backward.call(dout)
|
238
|
+
end
|
239
|
+
|
240
|
+
network
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|