rumale 0.13.8 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +24 -0
  3. data/README.md +8 -10
  4. data/lib/rumale.rb +3 -0
  5. data/lib/rumale/base/classifier.rb +2 -2
  6. data/lib/rumale/base/cluster_analyzer.rb +2 -2
  7. data/lib/rumale/base/regressor.rb +2 -2
  8. data/lib/rumale/clustering/dbscan.rb +3 -4
  9. data/lib/rumale/clustering/gaussian_mixture.rb +5 -6
  10. data/lib/rumale/clustering/hdbscan.rb +4 -4
  11. data/lib/rumale/clustering/k_means.rb +5 -6
  12. data/lib/rumale/clustering/k_medoids.rb +5 -6
  13. data/lib/rumale/clustering/power_iteration.rb +4 -6
  14. data/lib/rumale/clustering/single_linkage.rb +3 -3
  15. data/lib/rumale/clustering/snn.rb +1 -1
  16. data/lib/rumale/clustering/spectral_clustering.rb +4 -6
  17. data/lib/rumale/dataset.rb +6 -10
  18. data/lib/rumale/decomposition/factor_analysis.rb +4 -4
  19. data/lib/rumale/decomposition/fast_ica.rb +6 -7
  20. data/lib/rumale/decomposition/nmf.rb +6 -7
  21. data/lib/rumale/decomposition/pca.rb +6 -7
  22. data/lib/rumale/ensemble/ada_boost_classifier.rb +8 -8
  23. data/lib/rumale/ensemble/ada_boost_regressor.rb +7 -7
  24. data/lib/rumale/ensemble/extra_trees_classifier.rb +8 -8
  25. data/lib/rumale/ensemble/extra_trees_regressor.rb +7 -7
  26. data/lib/rumale/ensemble/gradient_boosting_classifier.rb +8 -8
  27. data/lib/rumale/ensemble/gradient_boosting_regressor.rb +8 -8
  28. data/lib/rumale/ensemble/random_forest_classifier.rb +8 -8
  29. data/lib/rumale/ensemble/random_forest_regressor.rb +7 -7
  30. data/lib/rumale/evaluation_measure/accuracy.rb +2 -2
  31. data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +2 -2
  32. data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +2 -2
  33. data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +2 -2
  34. data/lib/rumale/evaluation_measure/explained_variance_score.rb +2 -2
  35. data/lib/rumale/evaluation_measure/f_score.rb +2 -2
  36. data/lib/rumale/evaluation_measure/log_loss.rb +2 -2
  37. data/lib/rumale/evaluation_measure/mean_absolute_error.rb +2 -2
  38. data/lib/rumale/evaluation_measure/mean_squared_error.rb +2 -2
  39. data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +2 -2
  40. data/lib/rumale/evaluation_measure/median_absolute_error.rb +2 -2
  41. data/lib/rumale/evaluation_measure/mutual_information.rb +2 -2
  42. data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +2 -2
  43. data/lib/rumale/evaluation_measure/precision.rb +2 -2
  44. data/lib/rumale/evaluation_measure/purity.rb +2 -2
  45. data/lib/rumale/evaluation_measure/r2_score.rb +2 -2
  46. data/lib/rumale/evaluation_measure/recall.rb +2 -2
  47. data/lib/rumale/evaluation_measure/roc_auc.rb +6 -3
  48. data/lib/rumale/evaluation_measure/silhouette_score.rb +2 -2
  49. data/lib/rumale/kernel_approximation/rbf.rb +5 -6
  50. data/lib/rumale/kernel_machine/kernel_pca.rb +4 -4
  51. data/lib/rumale/kernel_machine/kernel_ridge.rb +3 -3
  52. data/lib/rumale/kernel_machine/kernel_svc.rb +7 -8
  53. data/lib/rumale/linear_model/lasso.rb +5 -6
  54. data/lib/rumale/linear_model/linear_regression.rb +5 -6
  55. data/lib/rumale/linear_model/logistic_regression.rb +16 -15
  56. data/lib/rumale/linear_model/ridge.rb +5 -6
  57. data/lib/rumale/linear_model/svc.rb +34 -28
  58. data/lib/rumale/linear_model/svr.rb +5 -6
  59. data/lib/rumale/manifold/mds.rb +3 -4
  60. data/lib/rumale/manifold/tsne.rb +3 -5
  61. data/lib/rumale/model_selection/cross_validation.rb +6 -5
  62. data/lib/rumale/model_selection/grid_search_cv.rb +6 -6
  63. data/lib/rumale/model_selection/k_fold.rb +3 -3
  64. data/lib/rumale/model_selection/shuffle_split.rb +3 -5
  65. data/lib/rumale/model_selection/stratified_k_fold.rb +4 -4
  66. data/lib/rumale/model_selection/stratified_shuffle_split.rb +4 -6
  67. data/lib/rumale/multiclass/one_vs_rest_classifier.rb +4 -4
  68. data/lib/rumale/naive_bayes/naive_bayes.rb +14 -14
  69. data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +5 -5
  70. data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +4 -4
  71. data/lib/rumale/neural_network/base_mlp.rb +244 -0
  72. data/lib/rumale/neural_network/mlp_classifier.rb +119 -0
  73. data/lib/rumale/neural_network/mlp_regressor.rb +89 -0
  74. data/lib/rumale/optimizer/ada_grad.rb +1 -1
  75. data/lib/rumale/optimizer/adam.rb +3 -3
  76. data/lib/rumale/optimizer/nadam.rb +1 -1
  77. data/lib/rumale/optimizer/rmsprop.rb +1 -1
  78. data/lib/rumale/optimizer/sgd.rb +1 -1
  79. data/lib/rumale/optimizer/yellow_fin.rb +1 -2
  80. data/lib/rumale/pairwise_metric.rb +17 -19
  81. data/lib/rumale/pipeline/pipeline.rb +10 -10
  82. data/lib/rumale/polynomial_model/factorization_machine_classifier.rb +29 -21
  83. data/lib/rumale/polynomial_model/factorization_machine_regressor.rb +6 -6
  84. data/lib/rumale/preprocessing/bin_discretizer.rb +3 -3
  85. data/lib/rumale/preprocessing/l2_normalizer.rb +2 -2
  86. data/lib/rumale/preprocessing/label_binarizer.rb +2 -2
  87. data/lib/rumale/preprocessing/label_encoder.rb +1 -1
  88. data/lib/rumale/preprocessing/max_abs_scaler.rb +3 -3
  89. data/lib/rumale/preprocessing/min_max_scaler.rb +3 -3
  90. data/lib/rumale/preprocessing/one_hot_encoder.rb +4 -3
  91. data/lib/rumale/preprocessing/ordinal_encoder.rb +1 -1
  92. data/lib/rumale/preprocessing/standard_scaler.rb +3 -3
  93. data/lib/rumale/tree/base_decision_tree.rb +1 -1
  94. data/lib/rumale/tree/decision_tree_classifier.rb +7 -7
  95. data/lib/rumale/tree/decision_tree_regressor.rb +6 -6
  96. data/lib/rumale/tree/extra_tree_classifier.rb +7 -7
  97. data/lib/rumale/tree/extra_tree_regressor.rb +6 -6
  98. data/lib/rumale/tree/gradient_tree_regressor.rb +9 -9
  99. data/lib/rumale/validation.rb +32 -2
  100. data/lib/rumale/version.rb +1 -1
  101. data/rumale.gemspec +7 -7
  102. metadata +11 -7
@@ -36,9 +36,9 @@ module Rumale
36
36
  # @param shuffle [Boolean] The flag indicating whether to shuffle the dataset.
37
37
  # @param random_seed [Integer] The seed value using to initialize the random generator.
38
38
  def initialize(n_splits: 3, shuffle: false, random_seed: nil)
39
- check_params_integer(n_splits: n_splits)
39
+ check_params_numeric(n_splits: n_splits)
40
40
  check_params_boolean(shuffle: shuffle)
41
- check_params_type_or_nil(Integer, random_seed: random_seed)
41
+ check_params_numeric_or_nil(random_seed: random_seed)
42
42
  check_params_positive(n_splits: n_splits)
43
43
  @n_splits = n_splits
44
44
  @shuffle = shuffle
@@ -56,8 +56,8 @@ module Rumale
56
56
  # The labels to be used to generate data indices for stratified K-fold cross validation.
57
57
  # @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
58
58
  def split(x, y)
59
- check_sample_array(x)
60
- check_label_array(y)
59
+ x = check_convert_sample_array(x)
60
+ y = check_convert_label_array(y)
61
61
  check_sample_label_size(x, y)
62
62
  # Check the number of samples in each class.
63
63
  unless valid_n_splits?(y)
@@ -33,10 +33,8 @@ module Rumale
33
33
  # @param train_size [Float] The ratio of number of samples for train data.
34
34
  # @param random_seed [Integer] The seed value using to initialize the random generator.
35
35
  def initialize(n_splits: 3, test_size: 0.1, train_size: nil, random_seed: nil)
36
- check_params_integer(n_splits: n_splits)
37
- check_params_float(test_size: test_size)
38
- check_params_type_or_nil(Float, train_size: train_size)
39
- check_params_type_or_nil(Integer, random_seed: random_seed)
36
+ check_params_numeric(n_splits: n_splits, test_size: test_size)
37
+ check_params_numeric_or_nil(train_size: train_size, random_seed: random_seed)
40
38
  check_params_positive(n_splits: n_splits)
41
39
  check_params_positive(test_size: test_size)
42
40
  check_params_positive(train_size: train_size) unless train_size.nil?
@@ -57,8 +55,8 @@ module Rumale
57
55
  # The labels to be used to generate data indices for stratified random permutation cross validation.
58
56
  # @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
59
57
  def split(x, y)
60
- check_sample_array(x)
61
- check_label_array(y)
58
+ x = check_convert_sample_array(x)
59
+ y = check_convert_label_array(y)
62
60
  check_sample_label_size(x, y)
63
61
  # Initialize and check some variables.
64
62
  train_sz = @train_size.nil? ? 1.0 - @test_size : @train_size
@@ -46,8 +46,8 @@ module Rumale
46
46
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
47
47
  # @return [OneVsRestClassifier] The learned classifier itself.
48
48
  def fit(x, y)
49
- check_sample_array(x)
50
- check_label_array(y)
49
+ x = check_convert_sample_array(x)
50
+ y = check_convert_label_array(y)
51
51
  check_sample_label_size(x, y)
52
52
  y_arr = y.to_a
53
53
  @classes = Numo::Int32.asarray(y_arr.uniq.sort)
@@ -63,7 +63,7 @@ module Rumale
63
63
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
64
64
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
65
65
  def decision_function(x)
66
- check_sample_array(x)
66
+ x = check_convert_sample_array(x)
67
67
  n_classes = @classes.size
68
68
  Numo::DFloat.asarray(Array.new(n_classes) { |m| @estimators[m].decision_function(x).to_a }).transpose
69
69
  end
@@ -73,7 +73,7 @@ module Rumale
73
73
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
74
74
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
75
75
  def predict(x)
76
- check_sample_array(x)
76
+ x = check_convert_sample_array(x)
77
77
  n_samples, = x.shape
78
78
  decision_values = decision_function(x)
79
79
  Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
@@ -16,7 +16,7 @@ module Rumale
16
16
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
17
17
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
18
18
  def predict(x)
19
- check_sample_array(x)
19
+ x = check_convert_sample_array(x)
20
20
  n_samples = x.shape.first
21
21
  decision_values = decision_function(x)
22
22
  Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
@@ -27,7 +27,7 @@ module Rumale
27
27
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
28
28
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
29
29
  def predict_log_proba(x)
30
- check_sample_array(x)
30
+ x = check_convert_sample_array(x)
31
31
  n_samples, = x.shape
32
32
  log_likelihoods = decision_function(x)
33
33
  log_likelihoods - Numo::NMath.log(Numo::NMath.exp(log_likelihoods).sum(1)).reshape(n_samples, 1)
@@ -38,7 +38,7 @@ module Rumale
38
38
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
39
39
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
40
40
  def predict_proba(x)
41
- check_sample_array(x)
41
+ x = check_convert_sample_array(x)
42
42
  Numo::NMath.exp(predict_log_proba(x)).abs
43
43
  end
44
44
  end
@@ -78,8 +78,8 @@ module Rumale
78
78
  # to be used for fitting the model.
79
79
  # @return [GaussianNB] The learned classifier itself.
80
80
  def fit(x, y)
81
- check_sample_array(x)
82
- check_label_array(y)
81
+ x = check_convert_sample_array(x)
82
+ y = check_convert_label_array(y)
83
83
  check_sample_label_size(x, y)
84
84
  n_samples, = x.shape
85
85
  @classes = Numo::Int32[*y.to_a.uniq.sort]
@@ -94,7 +94,7 @@ module Rumale
94
94
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
95
95
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
96
96
  def decision_function(x)
97
- check_sample_array(x)
97
+ x = check_convert_sample_array(x)
98
98
  n_classes = @classes.size
99
99
  log_likelihoods = Array.new(n_classes) do |l|
100
100
  Math.log(@class_priors[l]) - 0.5 * (
@@ -154,7 +154,7 @@ module Rumale
154
154
  #
155
155
  # @param smoothing_param [Float] The Laplace smoothing parameter.
156
156
  def initialize(smoothing_param: 1.0)
157
- check_params_float(smoothing_param: smoothing_param)
157
+ check_params_numeric(smoothing_param: smoothing_param)
158
158
  check_params_positive(smoothing_param: smoothing_param)
159
159
  @params = {}
160
160
  @params[:smoothing_param] = smoothing_param
@@ -167,8 +167,8 @@ module Rumale
167
167
  # to be used for fitting the model.
168
168
  # @return [MultinomialNB] The learned classifier itself.
169
169
  def fit(x, y)
170
- check_sample_array(x)
171
- check_label_array(y)
170
+ x = check_convert_sample_array(x)
171
+ y = check_convert_label_array(y)
172
172
  check_sample_label_size(x, y)
173
173
  n_samples, = x.shape
174
174
  @classes = Numo::Int32[*y.to_a.uniq.sort]
@@ -185,7 +185,7 @@ module Rumale
185
185
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
186
186
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
187
187
  def decision_function(x)
188
- check_sample_array(x)
188
+ x = check_convert_sample_array(x)
189
189
  n_classes = @classes.size
190
190
  bin_x = x.gt(0)
191
191
  log_likelihoods = Array.new(n_classes) do |l|
@@ -243,7 +243,7 @@ module Rumale
243
243
  # @param smoothing_param [Float] The Laplace smoothing parameter.
244
244
  # @param bin_threshold [Float] The threshold for binarizing of features.
245
245
  def initialize(smoothing_param: 1.0, bin_threshold: 0.0)
246
- check_params_float(smoothing_param: smoothing_param, bin_threshold: bin_threshold)
246
+ check_params_numeric(smoothing_param: smoothing_param, bin_threshold: bin_threshold)
247
247
  check_params_positive(smoothing_param: smoothing_param)
248
248
  @params = {}
249
249
  @params[:smoothing_param] = smoothing_param
@@ -257,8 +257,8 @@ module Rumale
257
257
  # to be used for fitting the model.
258
258
  # @return [BernoulliNB] The learned classifier itself.
259
259
  def fit(x, y)
260
- check_sample_array(x)
261
- check_label_array(y)
260
+ x = check_convert_sample_array(x)
261
+ y = check_convert_label_array(y)
262
262
  check_sample_label_size(x, y)
263
263
  n_samples, = x.shape
264
264
  bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
@@ -278,7 +278,7 @@ module Rumale
278
278
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
279
279
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
280
280
  def decision_function(x)
281
- check_sample_array(x)
281
+ x = check_convert_sample_array(x)
282
282
  n_classes = @classes.size
283
283
  bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
284
284
  not_bin_x = Numo::DFloat[*x.le(@params[:bin_threshold])]
@@ -35,7 +35,7 @@ module Rumale
35
35
  #
36
36
  # @param n_neighbors [Integer] The number of neighbors.
37
37
  def initialize(n_neighbors: 5)
38
- check_params_integer(n_neighbors: n_neighbors)
38
+ check_params_numeric(n_neighbors: n_neighbors)
39
39
  check_params_positive(n_neighbors: n_neighbors)
40
40
  @params = {}
41
41
  @params[:n_neighbors] = n_neighbors
@@ -50,8 +50,8 @@ module Rumale
50
50
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
51
51
  # @return [KNeighborsClassifier] The learned classifier itself.
52
52
  def fit(x, y)
53
- check_sample_array(x)
54
- check_label_array(y)
53
+ x = check_convert_sample_array(x)
54
+ y = check_convert_label_array(y)
55
55
  check_sample_label_size(x, y)
56
56
  @prototypes = Numo::DFloat.asarray(x.to_a)
57
57
  @labels = Numo::Int32.asarray(y.to_a)
@@ -64,7 +64,7 @@ module Rumale
64
64
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
65
65
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
66
66
  def decision_function(x)
67
- check_sample_array(x)
67
+ x = check_convert_sample_array(x)
68
68
  distance_matrix = PairwiseMetric.euclidean_distance(x, @prototypes)
69
69
  n_samples, n_prototypes = distance_matrix.shape
70
70
  n_classes = @classes.size
@@ -82,7 +82,7 @@ module Rumale
82
82
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
83
83
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
84
84
  def predict(x)
85
- check_sample_array(x)
85
+ x = check_convert_sample_array(x)
86
86
  n_samples = x.shape.first
87
87
  decision_values = decision_function(x)
88
88
  Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
@@ -30,7 +30,7 @@ module Rumale
30
30
  #
31
31
  # @param n_neighbors [Integer] The number of neighbors.
32
32
  def initialize(n_neighbors: 5)
33
- check_params_integer(n_neighbors: n_neighbors)
33
+ check_params_numeric(n_neighbors: n_neighbors)
34
34
  check_params_positive(n_neighbors: n_neighbors)
35
35
  @params = {}
36
36
  @params[:n_neighbors] = n_neighbors
@@ -44,8 +44,8 @@ module Rumale
44
44
  # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
45
45
  # @return [KNeighborsRegressor] The learned regressor itself.
46
46
  def fit(x, y)
47
- check_sample_array(x)
48
- check_tvalue_array(y)
47
+ x = check_convert_sample_array(x)
48
+ y = check_convert_tvalue_array(y)
49
49
  check_sample_tvalue_size(x, y)
50
50
  @prototypes = x.dup
51
51
  @values = y.dup
@@ -57,7 +57,7 @@ module Rumale
57
57
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
58
58
  # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
59
59
  def predict(x)
60
- check_sample_array(x)
60
+ x = check_convert_sample_array(x)
61
61
  # Initialize some variables.
62
62
  n_samples, = x.shape
63
63
  n_prototypes, n_outputs = @values.shape
@@ -0,0 +1,244 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+
5
+ module Rumale
6
+ # This module consists of the modules and classes for implementation multi-layer perceptron estimator.
7
+ module NeuralNetwork
8
+ # @!visibility private
9
+ # This module consists of the classes that implement layer functions of neural network.
10
+ module Layer
11
+ # @!visibility private
12
+ # Affine is a class that calculates the linear transform.
13
+ # This class is used internally.
14
+ class Affine
15
+ # @!visibility private
16
+ def initialize(n_inputs: nil, n_outputs: nil, optimizer: nil, rng: nil)
17
+ @weight = 0.01 * Rumale::Utils.rand_normal([n_inputs, n_outputs], rng)
18
+ @bias = Numo::DFloat.zeros(n_outputs)
19
+ @optimizer_weight = optimizer.dup
20
+ @optimizer_bias = optimizer.dup
21
+ end
22
+
23
+ # @!visibility private
24
+ def forward(x)
25
+ out = x.dot(@weight) + @bias
26
+
27
+ backward = proc do |dout|
28
+ dx = dout.dot(@weight.transpose)
29
+ dw = x.transpose.dot(dout)
30
+ db = dout.sum(0)
31
+
32
+ @weight = @optimizer_weight.call(@weight, dw)
33
+ @bias = @optimizer_bias.call(@bias, db)
34
+
35
+ dx
36
+ end
37
+
38
+ [out, backward]
39
+ end
40
+ end
41
+
42
+ # @!visibility private
43
+ # Dropout is a class that performs dropout regularization.
44
+ # This class is used internally.
45
+ class Dropout
46
+ # @!visibility private
47
+ def initialize(rate: 0.3, rng: nil)
48
+ @rate = rate
49
+ @rng = rng
50
+ end
51
+
52
+ # @!visibility private
53
+ def forward(x)
54
+ rand_mat = Rumale::Utils.rand_uniform(x.shape, @rng)
55
+ mask = rand_mat.ge(@rate)
56
+ out = x * mask
57
+ out *= 1.fdiv(1 - @rate) if @rate < 1.0
58
+
59
+ backward = proc { |dout| dout * mask }
60
+
61
+ [out, backward]
62
+ end
63
+ end
64
+
65
+ # @!visibility private
66
+ # ReLU is a class that calculates rectified linear function.
67
+ # This class is used internally.
68
+ class Relu
69
+ # @!visibility private
70
+ def forward(x)
71
+ mask = x.gt(0)
72
+ out = x * mask
73
+
74
+ backward = proc { |dout| dout * mask }
75
+
76
+ [out, backward]
77
+ end
78
+ end
79
+ end
80
+
81
+ # @!visibility private
82
+ # This module consists of the classes that implement loss function for neural network.
83
+ module Loss
84
+ # @!visibility private
85
+ # MeanSquaredError is a class that calculates mean squared error for regression task.
86
+ # This class is used internally.
87
+ class MeanSquaredError
88
+ # @!visibility private
89
+ def call(out, y)
90
+ sz_batch = y.shape[0]
91
+ diff = out - y
92
+ loss = (diff**2).sum.fdiv(sz_batch)
93
+ dout = 2.fdiv(sz_batch) * diff
94
+ [loss, dout]
95
+ end
96
+ end
97
+
98
+ # @!visibility private
99
+ # SoftmaxCrossEntropy is a class that calculates softmax cross-entropy for classification task.
100
+ # This class is used internally.
101
+ class SoftmaxCrossEntropy
102
+ # @!visibility private
103
+ def call(out, y)
104
+ sz_batch = y.shape[0]
105
+ z = softmax(out)
106
+ loss = -(y * Numo::NMath.log(z + 1e-8)).sum.fdiv(sz_batch)
107
+ dout = (z - y) / sz_batch
108
+ [loss, dout]
109
+ end
110
+
111
+ private
112
+
113
+ def softmax(x)
114
+ clip = x.max(-1).expand_dims(-1)
115
+ exp_x = Numo::NMath.exp(x - clip)
116
+ exp_x / exp_x.sum(-1).expand_dims(-1)
117
+ end
118
+ end
119
+ end
120
+
121
+ # @!visibility private
122
+ # This module consists of the classes for implementing neural network model.
123
+ module Model
124
+ # @!visibility private
125
+ attr_reader :layers
126
+
127
+ # @!visibility private
128
+ # Sequential is a class that implements linear stack model.
129
+ # This class is used internally.
130
+ class Sequential
131
+ # @!visibility private
132
+ def initialize
133
+ @layers = []
134
+ end
135
+
136
+ # @!visibility private
137
+ def push(ops)
138
+ @layers.push(ops)
139
+ self
140
+ end
141
+
142
+ # @!visibility private
143
+ def delete_dropout
144
+ @layers.delete_if { |node| node.is_a?(Layer::Dropout) }
145
+ self
146
+ end
147
+
148
+ # @!visibility private
149
+ def forward(x)
150
+ backprops = []
151
+ out = x.dup
152
+
153
+ @layers.each do |l|
154
+ out, bw = l.forward(out)
155
+ backprops.push(bw)
156
+ end
157
+
158
+ backward = proc do |dout|
159
+ backprops.reverse_each { |bw| dout = bw.call(dout) }
160
+ dout
161
+ end
162
+
163
+ [out, backward]
164
+ end
165
+ end
166
+ end
167
+
168
+ # BaseMLP is an abstract class for implementation of multi-layer peceptron estimator.
169
+ # This class is used internally.
170
+ class BaseMLP
171
+ include Base::BaseEstimator
172
+
173
+ # Create a multi-layer perceptron estimator.
174
+ #
175
+ # @param hidden_units [Array] The number of units in the i-th hidden layer.
176
+ # @param dropout_rate [Float] The rate of the units to drop.
177
+ # @param learning_rate [Float] The initial value of learning rate in Adam optimizer.
178
+ # @param decay1 [Float] The smoothing parameter for the first moment in Adam optimizer.
179
+ # @param decay2 [Float] The smoothing parameter for the second moment in Adam optimizer.
180
+ # @param max_iter [Integer] The maximum number of iterations.
181
+ # @param batch_size [Intger] The size of the mini batches.
182
+ # @param tol [Float] The tolerance of loss for terminating optimization.
183
+ # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
184
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
185
+ def initialize(hidden_units: [128, 128], dropout_rate: 0.4, learning_rate: 0.001, decay1: 0.9, decay2: 0.999,
186
+ max_iter: 10000, batch_size: 50, tol: 1e-4, verbose: false, random_seed: nil)
187
+ @params = {}
188
+ @params[:hidden_units] = hidden_units
189
+ @params[:dropout_rate] = dropout_rate
190
+ @params[:learning_rate] = learning_rate
191
+ @params[:decay1] = decay1
192
+ @params[:decay2] = decay2
193
+ @params[:max_iter] = max_iter
194
+ @params[:batch_size] = batch_size
195
+ @params[:tol] = tol
196
+ @params[:verbose] = verbose
197
+ @params[:random_seed] = random_seed
198
+ @params[:random_seed] ||= srand
199
+ @n_iter = nil
200
+ @rng = Random.new(@params[:random_seed])
201
+ end
202
+
203
+ private
204
+
205
+ def buld_network(n_inputs, n_outputs, srng = nil)
206
+ adam = Rumale::Optimizer::Adam.new(learning_rate: @params[:learning_rate], decay1: @params[:decay1], decay2: @params[:decay2])
207
+ model = Model::Sequential.new
208
+ n_units = [n_inputs, *@params[:hidden_units]]
209
+ n_units.each_cons(2) do |n_in, n_out|
210
+ model.push(Layer::Affine.new(n_inputs: n_in, n_outputs: n_out, optimizer: adam, rng: srng))
211
+ model.push(Layer::Relu.new)
212
+ model.push(Layer::Dropout.new(rate: @params[:dropout_rate], rng: srng))
213
+ end
214
+ model.push(Layer::Affine.new(n_inputs: n_units[-1], n_outputs: n_outputs, optimizer: adam, rng: srng))
215
+ end
216
+
217
+ def train(x, y, network, loss_func, srng = nil)
218
+ class_name = self.class.to_s.split('::').last
219
+
220
+ n_samples = x.shape[0]
221
+ rand_ids = [*0...n_samples].shuffle(random: srng)
222
+
223
+ @params[:max_iter].times do |t|
224
+ # random sampling
225
+ subset_ids = rand_ids.shift(@params[:batch_size])
226
+ rand_ids.concat(subset_ids)
227
+ sub_x = x[subset_ids, true].dup
228
+ sub_y = y[subset_ids, true].dup
229
+ # forward
230
+ out, backward = network.forward(sub_x)
231
+ # calc loss function
232
+ loss, dout = loss_func.call(out, sub_y)
233
+ @n_iter = t + 1
234
+ puts "[#{class_name}] Loss after #{@n_iter} iterations: #{loss}" if @params[:verbose] && (@n_iter % 10).zero?
235
+ break if loss < @params[:tol]
236
+ # backward
237
+ backward.call(dout)
238
+ end
239
+
240
+ network
241
+ end
242
+ end
243
+ end
244
+ end