rumale 0.13.8 → 0.14.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +24 -0
  3. data/README.md +8 -10
  4. data/lib/rumale.rb +3 -0
  5. data/lib/rumale/base/classifier.rb +2 -2
  6. data/lib/rumale/base/cluster_analyzer.rb +2 -2
  7. data/lib/rumale/base/regressor.rb +2 -2
  8. data/lib/rumale/clustering/dbscan.rb +3 -4
  9. data/lib/rumale/clustering/gaussian_mixture.rb +5 -6
  10. data/lib/rumale/clustering/hdbscan.rb +4 -4
  11. data/lib/rumale/clustering/k_means.rb +5 -6
  12. data/lib/rumale/clustering/k_medoids.rb +5 -6
  13. data/lib/rumale/clustering/power_iteration.rb +4 -6
  14. data/lib/rumale/clustering/single_linkage.rb +3 -3
  15. data/lib/rumale/clustering/snn.rb +1 -1
  16. data/lib/rumale/clustering/spectral_clustering.rb +4 -6
  17. data/lib/rumale/dataset.rb +6 -10
  18. data/lib/rumale/decomposition/factor_analysis.rb +4 -4
  19. data/lib/rumale/decomposition/fast_ica.rb +6 -7
  20. data/lib/rumale/decomposition/nmf.rb +6 -7
  21. data/lib/rumale/decomposition/pca.rb +6 -7
  22. data/lib/rumale/ensemble/ada_boost_classifier.rb +8 -8
  23. data/lib/rumale/ensemble/ada_boost_regressor.rb +7 -7
  24. data/lib/rumale/ensemble/extra_trees_classifier.rb +8 -8
  25. data/lib/rumale/ensemble/extra_trees_regressor.rb +7 -7
  26. data/lib/rumale/ensemble/gradient_boosting_classifier.rb +8 -8
  27. data/lib/rumale/ensemble/gradient_boosting_regressor.rb +8 -8
  28. data/lib/rumale/ensemble/random_forest_classifier.rb +8 -8
  29. data/lib/rumale/ensemble/random_forest_regressor.rb +7 -7
  30. data/lib/rumale/evaluation_measure/accuracy.rb +2 -2
  31. data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +2 -2
  32. data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +2 -2
  33. data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +2 -2
  34. data/lib/rumale/evaluation_measure/explained_variance_score.rb +2 -2
  35. data/lib/rumale/evaluation_measure/f_score.rb +2 -2
  36. data/lib/rumale/evaluation_measure/log_loss.rb +2 -2
  37. data/lib/rumale/evaluation_measure/mean_absolute_error.rb +2 -2
  38. data/lib/rumale/evaluation_measure/mean_squared_error.rb +2 -2
  39. data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +2 -2
  40. data/lib/rumale/evaluation_measure/median_absolute_error.rb +2 -2
  41. data/lib/rumale/evaluation_measure/mutual_information.rb +2 -2
  42. data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +2 -2
  43. data/lib/rumale/evaluation_measure/precision.rb +2 -2
  44. data/lib/rumale/evaluation_measure/purity.rb +2 -2
  45. data/lib/rumale/evaluation_measure/r2_score.rb +2 -2
  46. data/lib/rumale/evaluation_measure/recall.rb +2 -2
  47. data/lib/rumale/evaluation_measure/roc_auc.rb +6 -3
  48. data/lib/rumale/evaluation_measure/silhouette_score.rb +2 -2
  49. data/lib/rumale/kernel_approximation/rbf.rb +5 -6
  50. data/lib/rumale/kernel_machine/kernel_pca.rb +4 -4
  51. data/lib/rumale/kernel_machine/kernel_ridge.rb +3 -3
  52. data/lib/rumale/kernel_machine/kernel_svc.rb +7 -8
  53. data/lib/rumale/linear_model/lasso.rb +5 -6
  54. data/lib/rumale/linear_model/linear_regression.rb +5 -6
  55. data/lib/rumale/linear_model/logistic_regression.rb +16 -15
  56. data/lib/rumale/linear_model/ridge.rb +5 -6
  57. data/lib/rumale/linear_model/svc.rb +34 -28
  58. data/lib/rumale/linear_model/svr.rb +5 -6
  59. data/lib/rumale/manifold/mds.rb +3 -4
  60. data/lib/rumale/manifold/tsne.rb +3 -5
  61. data/lib/rumale/model_selection/cross_validation.rb +6 -5
  62. data/lib/rumale/model_selection/grid_search_cv.rb +6 -6
  63. data/lib/rumale/model_selection/k_fold.rb +3 -3
  64. data/lib/rumale/model_selection/shuffle_split.rb +3 -5
  65. data/lib/rumale/model_selection/stratified_k_fold.rb +4 -4
  66. data/lib/rumale/model_selection/stratified_shuffle_split.rb +4 -6
  67. data/lib/rumale/multiclass/one_vs_rest_classifier.rb +4 -4
  68. data/lib/rumale/naive_bayes/naive_bayes.rb +14 -14
  69. data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +5 -5
  70. data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +4 -4
  71. data/lib/rumale/neural_network/base_mlp.rb +244 -0
  72. data/lib/rumale/neural_network/mlp_classifier.rb +119 -0
  73. data/lib/rumale/neural_network/mlp_regressor.rb +89 -0
  74. data/lib/rumale/optimizer/ada_grad.rb +1 -1
  75. data/lib/rumale/optimizer/adam.rb +3 -3
  76. data/lib/rumale/optimizer/nadam.rb +1 -1
  77. data/lib/rumale/optimizer/rmsprop.rb +1 -1
  78. data/lib/rumale/optimizer/sgd.rb +1 -1
  79. data/lib/rumale/optimizer/yellow_fin.rb +1 -2
  80. data/lib/rumale/pairwise_metric.rb +17 -19
  81. data/lib/rumale/pipeline/pipeline.rb +10 -10
  82. data/lib/rumale/polynomial_model/factorization_machine_classifier.rb +29 -21
  83. data/lib/rumale/polynomial_model/factorization_machine_regressor.rb +6 -6
  84. data/lib/rumale/preprocessing/bin_discretizer.rb +3 -3
  85. data/lib/rumale/preprocessing/l2_normalizer.rb +2 -2
  86. data/lib/rumale/preprocessing/label_binarizer.rb +2 -2
  87. data/lib/rumale/preprocessing/label_encoder.rb +1 -1
  88. data/lib/rumale/preprocessing/max_abs_scaler.rb +3 -3
  89. data/lib/rumale/preprocessing/min_max_scaler.rb +3 -3
  90. data/lib/rumale/preprocessing/one_hot_encoder.rb +4 -3
  91. data/lib/rumale/preprocessing/ordinal_encoder.rb +1 -1
  92. data/lib/rumale/preprocessing/standard_scaler.rb +3 -3
  93. data/lib/rumale/tree/base_decision_tree.rb +1 -1
  94. data/lib/rumale/tree/decision_tree_classifier.rb +7 -7
  95. data/lib/rumale/tree/decision_tree_regressor.rb +6 -6
  96. data/lib/rumale/tree/extra_tree_classifier.rb +7 -7
  97. data/lib/rumale/tree/extra_tree_regressor.rb +6 -6
  98. data/lib/rumale/tree/gradient_tree_regressor.rb +9 -9
  99. data/lib/rumale/validation.rb +32 -2
  100. data/lib/rumale/version.rb +1 -1
  101. data/rumale.gemspec +7 -7
  102. metadata +11 -7
@@ -36,9 +36,9 @@ module Rumale
36
36
  # @param shuffle [Boolean] The flag indicating whether to shuffle the dataset.
37
37
  # @param random_seed [Integer] The seed value using to initialize the random generator.
38
38
  def initialize(n_splits: 3, shuffle: false, random_seed: nil)
39
- check_params_integer(n_splits: n_splits)
39
+ check_params_numeric(n_splits: n_splits)
40
40
  check_params_boolean(shuffle: shuffle)
41
- check_params_type_or_nil(Integer, random_seed: random_seed)
41
+ check_params_numeric_or_nil(random_seed: random_seed)
42
42
  check_params_positive(n_splits: n_splits)
43
43
  @n_splits = n_splits
44
44
  @shuffle = shuffle
@@ -56,8 +56,8 @@ module Rumale
56
56
  # The labels to be used to generate data indices for stratified K-fold cross validation.
57
57
  # @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
58
58
  def split(x, y)
59
- check_sample_array(x)
60
- check_label_array(y)
59
+ x = check_convert_sample_array(x)
60
+ y = check_convert_label_array(y)
61
61
  check_sample_label_size(x, y)
62
62
  # Check the number of samples in each class.
63
63
  unless valid_n_splits?(y)
@@ -33,10 +33,8 @@ module Rumale
33
33
  # @param train_size [Float] The ratio of number of samples for train data.
34
34
  # @param random_seed [Integer] The seed value using to initialize the random generator.
35
35
  def initialize(n_splits: 3, test_size: 0.1, train_size: nil, random_seed: nil)
36
- check_params_integer(n_splits: n_splits)
37
- check_params_float(test_size: test_size)
38
- check_params_type_or_nil(Float, train_size: train_size)
39
- check_params_type_or_nil(Integer, random_seed: random_seed)
36
+ check_params_numeric(n_splits: n_splits, test_size: test_size)
37
+ check_params_numeric_or_nil(train_size: train_size, random_seed: random_seed)
40
38
  check_params_positive(n_splits: n_splits)
41
39
  check_params_positive(test_size: test_size)
42
40
  check_params_positive(train_size: train_size) unless train_size.nil?
@@ -57,8 +55,8 @@ module Rumale
57
55
  # The labels to be used to generate data indices for stratified random permutation cross validation.
58
56
  # @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
59
57
  def split(x, y)
60
- check_sample_array(x)
61
- check_label_array(y)
58
+ x = check_convert_sample_array(x)
59
+ y = check_convert_label_array(y)
62
60
  check_sample_label_size(x, y)
63
61
  # Initialize and check some variables.
64
62
  train_sz = @train_size.nil? ? 1.0 - @test_size : @train_size
@@ -46,8 +46,8 @@ module Rumale
46
46
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
47
47
  # @return [OneVsRestClassifier] The learned classifier itself.
48
48
  def fit(x, y)
49
- check_sample_array(x)
50
- check_label_array(y)
49
+ x = check_convert_sample_array(x)
50
+ y = check_convert_label_array(y)
51
51
  check_sample_label_size(x, y)
52
52
  y_arr = y.to_a
53
53
  @classes = Numo::Int32.asarray(y_arr.uniq.sort)
@@ -63,7 +63,7 @@ module Rumale
63
63
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
64
64
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
65
65
  def decision_function(x)
66
- check_sample_array(x)
66
+ x = check_convert_sample_array(x)
67
67
  n_classes = @classes.size
68
68
  Numo::DFloat.asarray(Array.new(n_classes) { |m| @estimators[m].decision_function(x).to_a }).transpose
69
69
  end
@@ -73,7 +73,7 @@ module Rumale
73
73
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
74
74
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
75
75
  def predict(x)
76
- check_sample_array(x)
76
+ x = check_convert_sample_array(x)
77
77
  n_samples, = x.shape
78
78
  decision_values = decision_function(x)
79
79
  Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
@@ -16,7 +16,7 @@ module Rumale
16
16
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
17
17
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
18
18
  def predict(x)
19
- check_sample_array(x)
19
+ x = check_convert_sample_array(x)
20
20
  n_samples = x.shape.first
21
21
  decision_values = decision_function(x)
22
22
  Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
@@ -27,7 +27,7 @@ module Rumale
27
27
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
28
28
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
29
29
  def predict_log_proba(x)
30
- check_sample_array(x)
30
+ x = check_convert_sample_array(x)
31
31
  n_samples, = x.shape
32
32
  log_likelihoods = decision_function(x)
33
33
  log_likelihoods - Numo::NMath.log(Numo::NMath.exp(log_likelihoods).sum(1)).reshape(n_samples, 1)
@@ -38,7 +38,7 @@ module Rumale
38
38
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
39
39
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
40
40
  def predict_proba(x)
41
- check_sample_array(x)
41
+ x = check_convert_sample_array(x)
42
42
  Numo::NMath.exp(predict_log_proba(x)).abs
43
43
  end
44
44
  end
@@ -78,8 +78,8 @@ module Rumale
78
78
  # to be used for fitting the model.
79
79
  # @return [GaussianNB] The learned classifier itself.
80
80
  def fit(x, y)
81
- check_sample_array(x)
82
- check_label_array(y)
81
+ x = check_convert_sample_array(x)
82
+ y = check_convert_label_array(y)
83
83
  check_sample_label_size(x, y)
84
84
  n_samples, = x.shape
85
85
  @classes = Numo::Int32[*y.to_a.uniq.sort]
@@ -94,7 +94,7 @@ module Rumale
94
94
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
95
95
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
96
96
  def decision_function(x)
97
- check_sample_array(x)
97
+ x = check_convert_sample_array(x)
98
98
  n_classes = @classes.size
99
99
  log_likelihoods = Array.new(n_classes) do |l|
100
100
  Math.log(@class_priors[l]) - 0.5 * (
@@ -154,7 +154,7 @@ module Rumale
154
154
  #
155
155
  # @param smoothing_param [Float] The Laplace smoothing parameter.
156
156
  def initialize(smoothing_param: 1.0)
157
- check_params_float(smoothing_param: smoothing_param)
157
+ check_params_numeric(smoothing_param: smoothing_param)
158
158
  check_params_positive(smoothing_param: smoothing_param)
159
159
  @params = {}
160
160
  @params[:smoothing_param] = smoothing_param
@@ -167,8 +167,8 @@ module Rumale
167
167
  # to be used for fitting the model.
168
168
  # @return [MultinomialNB] The learned classifier itself.
169
169
  def fit(x, y)
170
- check_sample_array(x)
171
- check_label_array(y)
170
+ x = check_convert_sample_array(x)
171
+ y = check_convert_label_array(y)
172
172
  check_sample_label_size(x, y)
173
173
  n_samples, = x.shape
174
174
  @classes = Numo::Int32[*y.to_a.uniq.sort]
@@ -185,7 +185,7 @@ module Rumale
185
185
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
186
186
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
187
187
  def decision_function(x)
188
- check_sample_array(x)
188
+ x = check_convert_sample_array(x)
189
189
  n_classes = @classes.size
190
190
  bin_x = x.gt(0)
191
191
  log_likelihoods = Array.new(n_classes) do |l|
@@ -243,7 +243,7 @@ module Rumale
243
243
  # @param smoothing_param [Float] The Laplace smoothing parameter.
244
244
  # @param bin_threshold [Float] The threshold for binarizing of features.
245
245
  def initialize(smoothing_param: 1.0, bin_threshold: 0.0)
246
- check_params_float(smoothing_param: smoothing_param, bin_threshold: bin_threshold)
246
+ check_params_numeric(smoothing_param: smoothing_param, bin_threshold: bin_threshold)
247
247
  check_params_positive(smoothing_param: smoothing_param)
248
248
  @params = {}
249
249
  @params[:smoothing_param] = smoothing_param
@@ -257,8 +257,8 @@ module Rumale
257
257
  # to be used for fitting the model.
258
258
  # @return [BernoulliNB] The learned classifier itself.
259
259
  def fit(x, y)
260
- check_sample_array(x)
261
- check_label_array(y)
260
+ x = check_convert_sample_array(x)
261
+ y = check_convert_label_array(y)
262
262
  check_sample_label_size(x, y)
263
263
  n_samples, = x.shape
264
264
  bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
@@ -278,7 +278,7 @@ module Rumale
278
278
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
279
279
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
280
280
  def decision_function(x)
281
- check_sample_array(x)
281
+ x = check_convert_sample_array(x)
282
282
  n_classes = @classes.size
283
283
  bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
284
284
  not_bin_x = Numo::DFloat[*x.le(@params[:bin_threshold])]
@@ -35,7 +35,7 @@ module Rumale
35
35
  #
36
36
  # @param n_neighbors [Integer] The number of neighbors.
37
37
  def initialize(n_neighbors: 5)
38
- check_params_integer(n_neighbors: n_neighbors)
38
+ check_params_numeric(n_neighbors: n_neighbors)
39
39
  check_params_positive(n_neighbors: n_neighbors)
40
40
  @params = {}
41
41
  @params[:n_neighbors] = n_neighbors
@@ -50,8 +50,8 @@ module Rumale
50
50
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
51
51
  # @return [KNeighborsClassifier] The learned classifier itself.
52
52
  def fit(x, y)
53
- check_sample_array(x)
54
- check_label_array(y)
53
+ x = check_convert_sample_array(x)
54
+ y = check_convert_label_array(y)
55
55
  check_sample_label_size(x, y)
56
56
  @prototypes = Numo::DFloat.asarray(x.to_a)
57
57
  @labels = Numo::Int32.asarray(y.to_a)
@@ -64,7 +64,7 @@ module Rumale
64
64
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
65
65
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
66
66
  def decision_function(x)
67
- check_sample_array(x)
67
+ x = check_convert_sample_array(x)
68
68
  distance_matrix = PairwiseMetric.euclidean_distance(x, @prototypes)
69
69
  n_samples, n_prototypes = distance_matrix.shape
70
70
  n_classes = @classes.size
@@ -82,7 +82,7 @@ module Rumale
82
82
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
83
83
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
84
84
  def predict(x)
85
- check_sample_array(x)
85
+ x = check_convert_sample_array(x)
86
86
  n_samples = x.shape.first
87
87
  decision_values = decision_function(x)
88
88
  Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
@@ -30,7 +30,7 @@ module Rumale
30
30
  #
31
31
  # @param n_neighbors [Integer] The number of neighbors.
32
32
  def initialize(n_neighbors: 5)
33
- check_params_integer(n_neighbors: n_neighbors)
33
+ check_params_numeric(n_neighbors: n_neighbors)
34
34
  check_params_positive(n_neighbors: n_neighbors)
35
35
  @params = {}
36
36
  @params[:n_neighbors] = n_neighbors
@@ -44,8 +44,8 @@ module Rumale
44
44
  # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
45
45
  # @return [KNeighborsRegressor] The learned regressor itself.
46
46
  def fit(x, y)
47
- check_sample_array(x)
48
- check_tvalue_array(y)
47
+ x = check_convert_sample_array(x)
48
+ y = check_convert_tvalue_array(y)
49
49
  check_sample_tvalue_size(x, y)
50
50
  @prototypes = x.dup
51
51
  @values = y.dup
@@ -57,7 +57,7 @@ module Rumale
57
57
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
58
58
  # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
59
59
  def predict(x)
60
- check_sample_array(x)
60
+ x = check_convert_sample_array(x)
61
61
  # Initialize some variables.
62
62
  n_samples, = x.shape
63
63
  n_prototypes, n_outputs = @values.shape
@@ -0,0 +1,244 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+
5
+ module Rumale
6
+ # This module consists of the modules and classes for implementation multi-layer perceptron estimator.
7
+ module NeuralNetwork
8
+ # @!visibility private
9
+ # This module consists of the classes that implement layer functions of neural network.
10
+ module Layer
11
+ # @!visibility private
12
+ # Affine is a class that calculates the linear transform.
13
+ # This class is used internally.
14
+ class Affine
15
+ # @!visibility private
16
+ def initialize(n_inputs: nil, n_outputs: nil, optimizer: nil, rng: nil)
17
+ @weight = 0.01 * Rumale::Utils.rand_normal([n_inputs, n_outputs], rng)
18
+ @bias = Numo::DFloat.zeros(n_outputs)
19
+ @optimizer_weight = optimizer.dup
20
+ @optimizer_bias = optimizer.dup
21
+ end
22
+
23
+ # @!visibility private
24
+ def forward(x)
25
+ out = x.dot(@weight) + @bias
26
+
27
+ backward = proc do |dout|
28
+ dx = dout.dot(@weight.transpose)
29
+ dw = x.transpose.dot(dout)
30
+ db = dout.sum(0)
31
+
32
+ @weight = @optimizer_weight.call(@weight, dw)
33
+ @bias = @optimizer_bias.call(@bias, db)
34
+
35
+ dx
36
+ end
37
+
38
+ [out, backward]
39
+ end
40
+ end
41
+
42
+ # @!visibility private
43
+ # Dropout is a class that performs dropout regularization.
44
+ # This class is used internally.
45
+ class Dropout
46
+ # @!visibility private
47
+ def initialize(rate: 0.3, rng: nil)
48
+ @rate = rate
49
+ @rng = rng
50
+ end
51
+
52
+ # @!visibility private
53
+ def forward(x)
54
+ rand_mat = Rumale::Utils.rand_uniform(x.shape, @rng)
55
+ mask = rand_mat.ge(@rate)
56
+ out = x * mask
57
+ out *= 1.fdiv(1 - @rate) if @rate < 1.0
58
+
59
+ backward = proc { |dout| dout * mask }
60
+
61
+ [out, backward]
62
+ end
63
+ end
64
+
65
+ # @!visibility private
66
+ # ReLU is a class that calculates rectified linear function.
67
+ # This class is used internally.
68
+ class Relu
69
+ # @!visibility private
70
+ def forward(x)
71
+ mask = x.gt(0)
72
+ out = x * mask
73
+
74
+ backward = proc { |dout| dout * mask }
75
+
76
+ [out, backward]
77
+ end
78
+ end
79
+ end
80
+
81
+ # @!visibility private
82
+ # This module consists of the classes that implement loss function for neural network.
83
+ module Loss
84
+ # @!visibility private
85
+ # MeanSquaredError is a class that calculates mean squared error for regression task.
86
+ # This class is used internally.
87
+ class MeanSquaredError
88
+ # @!visibility private
89
+ def call(out, y)
90
+ sz_batch = y.shape[0]
91
+ diff = out - y
92
+ loss = (diff**2).sum.fdiv(sz_batch)
93
+ dout = 2.fdiv(sz_batch) * diff
94
+ [loss, dout]
95
+ end
96
+ end
97
+
98
+ # @!visibility private
99
+ # SoftmaxCrossEntropy is a class that calculates softmax cross-entropy for classification task.
100
+ # This class is used internally.
101
+ class SoftmaxCrossEntropy
102
+ # @!visibility private
103
+ def call(out, y)
104
+ sz_batch = y.shape[0]
105
+ z = softmax(out)
106
+ loss = -(y * Numo::NMath.log(z + 1e-8)).sum.fdiv(sz_batch)
107
+ dout = (z - y) / sz_batch
108
+ [loss, dout]
109
+ end
110
+
111
+ private
112
+
113
+ def softmax(x)
114
+ clip = x.max(-1).expand_dims(-1)
115
+ exp_x = Numo::NMath.exp(x - clip)
116
+ exp_x / exp_x.sum(-1).expand_dims(-1)
117
+ end
118
+ end
119
+ end
120
+
121
+ # @!visibility private
122
+ # This module consists of the classes for implementing neural network model.
123
+ module Model
124
+ # @!visibility private
125
+ attr_reader :layers
126
+
127
+ # @!visibility private
128
+ # Sequential is a class that implements linear stack model.
129
+ # This class is used internally.
130
+ class Sequential
131
+ # @!visibility private
132
+ def initialize
133
+ @layers = []
134
+ end
135
+
136
+ # @!visibility private
137
+ def push(ops)
138
+ @layers.push(ops)
139
+ self
140
+ end
141
+
142
+ # @!visibility private
143
+ def delete_dropout
144
+ @layers.delete_if { |node| node.is_a?(Layer::Dropout) }
145
+ self
146
+ end
147
+
148
+ # @!visibility private
149
+ def forward(x)
150
+ backprops = []
151
+ out = x.dup
152
+
153
+ @layers.each do |l|
154
+ out, bw = l.forward(out)
155
+ backprops.push(bw)
156
+ end
157
+
158
+ backward = proc do |dout|
159
+ backprops.reverse_each { |bw| dout = bw.call(dout) }
160
+ dout
161
+ end
162
+
163
+ [out, backward]
164
+ end
165
+ end
166
+ end
167
+
168
+ # BaseMLP is an abstract class for implementation of multi-layer peceptron estimator.
169
+ # This class is used internally.
170
+ class BaseMLP
171
+ include Base::BaseEstimator
172
+
173
+ # Create a multi-layer perceptron estimator.
174
+ #
175
+ # @param hidden_units [Array] The number of units in the i-th hidden layer.
176
+ # @param dropout_rate [Float] The rate of the units to drop.
177
+ # @param learning_rate [Float] The initial value of learning rate in Adam optimizer.
178
+ # @param decay1 [Float] The smoothing parameter for the first moment in Adam optimizer.
179
+ # @param decay2 [Float] The smoothing parameter for the second moment in Adam optimizer.
180
+ # @param max_iter [Integer] The maximum number of iterations.
181
+ # @param batch_size [Intger] The size of the mini batches.
182
+ # @param tol [Float] The tolerance of loss for terminating optimization.
183
+ # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
184
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
185
+ def initialize(hidden_units: [128, 128], dropout_rate: 0.4, learning_rate: 0.001, decay1: 0.9, decay2: 0.999,
186
+ max_iter: 10000, batch_size: 50, tol: 1e-4, verbose: false, random_seed: nil)
187
+ @params = {}
188
+ @params[:hidden_units] = hidden_units
189
+ @params[:dropout_rate] = dropout_rate
190
+ @params[:learning_rate] = learning_rate
191
+ @params[:decay1] = decay1
192
+ @params[:decay2] = decay2
193
+ @params[:max_iter] = max_iter
194
+ @params[:batch_size] = batch_size
195
+ @params[:tol] = tol
196
+ @params[:verbose] = verbose
197
+ @params[:random_seed] = random_seed
198
+ @params[:random_seed] ||= srand
199
+ @n_iter = nil
200
+ @rng = Random.new(@params[:random_seed])
201
+ end
202
+
203
+ private
204
+
205
+ def buld_network(n_inputs, n_outputs, srng = nil)
206
+ adam = Rumale::Optimizer::Adam.new(learning_rate: @params[:learning_rate], decay1: @params[:decay1], decay2: @params[:decay2])
207
+ model = Model::Sequential.new
208
+ n_units = [n_inputs, *@params[:hidden_units]]
209
+ n_units.each_cons(2) do |n_in, n_out|
210
+ model.push(Layer::Affine.new(n_inputs: n_in, n_outputs: n_out, optimizer: adam, rng: srng))
211
+ model.push(Layer::Relu.new)
212
+ model.push(Layer::Dropout.new(rate: @params[:dropout_rate], rng: srng))
213
+ end
214
+ model.push(Layer::Affine.new(n_inputs: n_units[-1], n_outputs: n_outputs, optimizer: adam, rng: srng))
215
+ end
216
+
217
+ def train(x, y, network, loss_func, srng = nil)
218
+ class_name = self.class.to_s.split('::').last
219
+
220
+ n_samples = x.shape[0]
221
+ rand_ids = [*0...n_samples].shuffle(random: srng)
222
+
223
+ @params[:max_iter].times do |t|
224
+ # random sampling
225
+ subset_ids = rand_ids.shift(@params[:batch_size])
226
+ rand_ids.concat(subset_ids)
227
+ sub_x = x[subset_ids, true].dup
228
+ sub_y = y[subset_ids, true].dup
229
+ # forward
230
+ out, backward = network.forward(sub_x)
231
+ # calc loss function
232
+ loss, dout = loss_func.call(out, sub_y)
233
+ @n_iter = t + 1
234
+ puts "[#{class_name}] Loss after #{@n_iter} iterations: #{loss}" if @params[:verbose] && (@n_iter % 10).zero?
235
+ break if loss < @params[:tol]
236
+ # backward
237
+ backward.call(dout)
238
+ end
239
+
240
+ network
241
+ end
242
+ end
243
+ end
244
+ end