rumale 0.13.8 → 0.14.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +24 -0
  3. data/README.md +8 -10
  4. data/lib/rumale.rb +3 -0
  5. data/lib/rumale/base/classifier.rb +2 -2
  6. data/lib/rumale/base/cluster_analyzer.rb +2 -2
  7. data/lib/rumale/base/regressor.rb +2 -2
  8. data/lib/rumale/clustering/dbscan.rb +3 -4
  9. data/lib/rumale/clustering/gaussian_mixture.rb +5 -6
  10. data/lib/rumale/clustering/hdbscan.rb +4 -4
  11. data/lib/rumale/clustering/k_means.rb +5 -6
  12. data/lib/rumale/clustering/k_medoids.rb +5 -6
  13. data/lib/rumale/clustering/power_iteration.rb +4 -6
  14. data/lib/rumale/clustering/single_linkage.rb +3 -3
  15. data/lib/rumale/clustering/snn.rb +1 -1
  16. data/lib/rumale/clustering/spectral_clustering.rb +4 -6
  17. data/lib/rumale/dataset.rb +6 -10
  18. data/lib/rumale/decomposition/factor_analysis.rb +4 -4
  19. data/lib/rumale/decomposition/fast_ica.rb +6 -7
  20. data/lib/rumale/decomposition/nmf.rb +6 -7
  21. data/lib/rumale/decomposition/pca.rb +6 -7
  22. data/lib/rumale/ensemble/ada_boost_classifier.rb +8 -8
  23. data/lib/rumale/ensemble/ada_boost_regressor.rb +7 -7
  24. data/lib/rumale/ensemble/extra_trees_classifier.rb +8 -8
  25. data/lib/rumale/ensemble/extra_trees_regressor.rb +7 -7
  26. data/lib/rumale/ensemble/gradient_boosting_classifier.rb +8 -8
  27. data/lib/rumale/ensemble/gradient_boosting_regressor.rb +8 -8
  28. data/lib/rumale/ensemble/random_forest_classifier.rb +8 -8
  29. data/lib/rumale/ensemble/random_forest_regressor.rb +7 -7
  30. data/lib/rumale/evaluation_measure/accuracy.rb +2 -2
  31. data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +2 -2
  32. data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +2 -2
  33. data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +2 -2
  34. data/lib/rumale/evaluation_measure/explained_variance_score.rb +2 -2
  35. data/lib/rumale/evaluation_measure/f_score.rb +2 -2
  36. data/lib/rumale/evaluation_measure/log_loss.rb +2 -2
  37. data/lib/rumale/evaluation_measure/mean_absolute_error.rb +2 -2
  38. data/lib/rumale/evaluation_measure/mean_squared_error.rb +2 -2
  39. data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +2 -2
  40. data/lib/rumale/evaluation_measure/median_absolute_error.rb +2 -2
  41. data/lib/rumale/evaluation_measure/mutual_information.rb +2 -2
  42. data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +2 -2
  43. data/lib/rumale/evaluation_measure/precision.rb +2 -2
  44. data/lib/rumale/evaluation_measure/purity.rb +2 -2
  45. data/lib/rumale/evaluation_measure/r2_score.rb +2 -2
  46. data/lib/rumale/evaluation_measure/recall.rb +2 -2
  47. data/lib/rumale/evaluation_measure/roc_auc.rb +6 -3
  48. data/lib/rumale/evaluation_measure/silhouette_score.rb +2 -2
  49. data/lib/rumale/kernel_approximation/rbf.rb +5 -6
  50. data/lib/rumale/kernel_machine/kernel_pca.rb +4 -4
  51. data/lib/rumale/kernel_machine/kernel_ridge.rb +3 -3
  52. data/lib/rumale/kernel_machine/kernel_svc.rb +7 -8
  53. data/lib/rumale/linear_model/lasso.rb +5 -6
  54. data/lib/rumale/linear_model/linear_regression.rb +5 -6
  55. data/lib/rumale/linear_model/logistic_regression.rb +16 -15
  56. data/lib/rumale/linear_model/ridge.rb +5 -6
  57. data/lib/rumale/linear_model/svc.rb +34 -28
  58. data/lib/rumale/linear_model/svr.rb +5 -6
  59. data/lib/rumale/manifold/mds.rb +3 -4
  60. data/lib/rumale/manifold/tsne.rb +3 -5
  61. data/lib/rumale/model_selection/cross_validation.rb +6 -5
  62. data/lib/rumale/model_selection/grid_search_cv.rb +6 -6
  63. data/lib/rumale/model_selection/k_fold.rb +3 -3
  64. data/lib/rumale/model_selection/shuffle_split.rb +3 -5
  65. data/lib/rumale/model_selection/stratified_k_fold.rb +4 -4
  66. data/lib/rumale/model_selection/stratified_shuffle_split.rb +4 -6
  67. data/lib/rumale/multiclass/one_vs_rest_classifier.rb +4 -4
  68. data/lib/rumale/naive_bayes/naive_bayes.rb +14 -14
  69. data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +5 -5
  70. data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +4 -4
  71. data/lib/rumale/neural_network/base_mlp.rb +244 -0
  72. data/lib/rumale/neural_network/mlp_classifier.rb +119 -0
  73. data/lib/rumale/neural_network/mlp_regressor.rb +89 -0
  74. data/lib/rumale/optimizer/ada_grad.rb +1 -1
  75. data/lib/rumale/optimizer/adam.rb +3 -3
  76. data/lib/rumale/optimizer/nadam.rb +1 -1
  77. data/lib/rumale/optimizer/rmsprop.rb +1 -1
  78. data/lib/rumale/optimizer/sgd.rb +1 -1
  79. data/lib/rumale/optimizer/yellow_fin.rb +1 -2
  80. data/lib/rumale/pairwise_metric.rb +17 -19
  81. data/lib/rumale/pipeline/pipeline.rb +10 -10
  82. data/lib/rumale/polynomial_model/factorization_machine_classifier.rb +29 -21
  83. data/lib/rumale/polynomial_model/factorization_machine_regressor.rb +6 -6
  84. data/lib/rumale/preprocessing/bin_discretizer.rb +3 -3
  85. data/lib/rumale/preprocessing/l2_normalizer.rb +2 -2
  86. data/lib/rumale/preprocessing/label_binarizer.rb +2 -2
  87. data/lib/rumale/preprocessing/label_encoder.rb +1 -1
  88. data/lib/rumale/preprocessing/max_abs_scaler.rb +3 -3
  89. data/lib/rumale/preprocessing/min_max_scaler.rb +3 -3
  90. data/lib/rumale/preprocessing/one_hot_encoder.rb +4 -3
  91. data/lib/rumale/preprocessing/ordinal_encoder.rb +1 -1
  92. data/lib/rumale/preprocessing/standard_scaler.rb +3 -3
  93. data/lib/rumale/tree/base_decision_tree.rb +1 -1
  94. data/lib/rumale/tree/decision_tree_classifier.rb +7 -7
  95. data/lib/rumale/tree/decision_tree_regressor.rb +6 -6
  96. data/lib/rumale/tree/extra_tree_classifier.rb +7 -7
  97. data/lib/rumale/tree/extra_tree_regressor.rb +6 -6
  98. data/lib/rumale/tree/gradient_tree_regressor.rb +9 -9
  99. data/lib/rumale/validation.rb +32 -2
  100. data/lib/rumale/version.rb +1 -1
  101. data/rumale.gemspec +7 -7
  102. metadata +11 -7
@@ -61,10 +61,10 @@ module Rumale
61
61
  # @param random_seed [Integer] The seed value using to initialize the random generator.
62
62
  def initialize(n_factors: 2, loss: 'hinge', reg_param_linear: 1.0, reg_param_factor: 1.0,
63
63
  max_iter: 1000, batch_size: 10, optimizer: nil, n_jobs: nil, random_seed: nil)
64
- check_params_float(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor)
65
- check_params_integer(n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
64
+ check_params_numeric(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
65
+ n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
66
66
  check_params_string(loss: loss)
67
- check_params_type_or_nil(Integer, n_jobs: n_jobs, random_seed: random_seed)
67
+ check_params_numeric_or_nil(n_jobs: n_jobs, random_seed: random_seed)
68
68
  check_params_positive(n_factors: n_factors,
69
69
  reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
70
70
  max_iter: max_iter, batch_size: batch_size)
@@ -78,15 +78,15 @@ module Rumale
78
78
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
79
79
  # @return [FactorizationMachineClassifier] The learned classifier itself.
80
80
  def fit(x, y)
81
- check_sample_array(x)
82
- check_label_array(y)
81
+ x = check_convert_sample_array(x)
82
+ y = check_convert_label_array(y)
83
83
  check_sample_label_size(x, y)
84
84
 
85
85
  @classes = Numo::Int32[*y.to_a.uniq.sort]
86
- n_classes = @classes.size
87
- _n_samples, n_features = x.shape
88
86
 
89
- if n_classes > 2
87
+ if multiclass_problem?
88
+ n_classes = @classes.size
89
+ n_features = x.shape[1]
90
90
  @factor_mat = Numo::DFloat.zeros(n_classes, @params[:n_factors], n_features)
91
91
  @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
92
92
  @bias_term = Numo::DFloat.zeros(n_classes)
@@ -105,7 +105,7 @@ module Rumale
105
105
  end
106
106
  end
107
107
  else
108
- negative_label = y.to_a.uniq.min
108
+ negative_label = @classes[0]
109
109
  bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
110
110
  @factor_mat, @weight_vec, @bias_term = partial_fit(x, bin_y)
111
111
  end
@@ -118,12 +118,12 @@ module Rumale
118
118
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
119
119
  # @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
120
120
  def decision_function(x)
121
- check_sample_array(x)
121
+ x = check_convert_sample_array(x)
122
122
  linear_term = @bias_term + x.dot(@weight_vec.transpose)
123
- factor_term = if @classes.size <= 2
124
- 0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(0)
125
- else
123
+ factor_term = if multiclass_problem?
126
124
  0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(1).transpose
125
+ else
126
+ 0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(0)
127
127
  end
128
128
  linear_term + factor_term
129
129
  end
@@ -133,15 +133,19 @@ module Rumale
133
133
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
134
134
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
135
135
  def predict(x)
136
- check_sample_array(x)
137
- return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
136
+ x = check_convert_sample_array(x)
138
137
 
139
138
  n_samples = x.shape[0]
140
- decision_values = decision_function(x)
141
- predicted = if enable_parallel?
142
- parallel_map(n_samples) { |n| @classes[decision_values[n, true].max_index] }
139
+ predicted = if multiclass_problem?
140
+ decision_values = decision_function(x)
141
+ if enable_parallel?
142
+ parallel_map(n_samples) { |n| @classes[decision_values[n, true].max_index] }
143
+ else
144
+ Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] }
145
+ end
143
146
  else
144
- Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] }
147
+ decision_values = decision_function(x).ge(0.0).to_a
148
+ Array.new(n_samples) { |n| @classes[decision_values[n]] }
145
149
  end
146
150
  Numo::Int32.asarray(predicted)
147
151
  end
@@ -151,9 +155,9 @@ module Rumale
151
155
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
152
156
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
153
157
  def predict_proba(x)
154
- check_sample_array(x)
158
+ x = check_convert_sample_array(x)
155
159
  proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
156
- return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
160
+ return (proba.transpose / proba.sum(axis: 1)).transpose.dup if multiclass_problem?
157
161
 
158
162
  n_samples, = x.shape
159
163
  probs = Numo::DFloat.zeros(n_samples, 2)
@@ -211,6 +215,10 @@ module Rumale
211
215
  logistic_loss_gradient(x, ex_x, y, factor, weight)
212
216
  end
213
217
  end
218
+
219
+ def multiclass_problem?
220
+ @classes.size > 2
221
+ end
214
222
  end
215
223
  end
216
224
  end
@@ -54,9 +54,9 @@ module Rumale
54
54
  # @param random_seed [Integer] The seed value using to initialize the random generator.
55
55
  def initialize(n_factors: 2, reg_param_linear: 1.0, reg_param_factor: 1.0,
56
56
  max_iter: 1000, batch_size: 10, optimizer: nil, n_jobs: nil, random_seed: nil)
57
- check_params_float(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor)
58
- check_params_integer(n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
59
- check_params_type_or_nil(Integer, n_jobs: n_jobs, random_seed: random_seed)
57
+ check_params_numeric(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
58
+ n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
59
+ check_params_numeric_or_nil(n_jobs: n_jobs, random_seed: random_seed)
60
60
  check_params_positive(n_factors: n_factors, reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
61
61
  max_iter: max_iter, batch_size: batch_size)
62
62
  keywd_args = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h.merge(loss: nil)
@@ -69,8 +69,8 @@ module Rumale
69
69
  # @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
70
70
  # @return [FactorizationMachineRegressor] The learned regressor itself.
71
71
  def fit(x, y)
72
- check_sample_array(x)
73
- check_tvalue_array(y)
72
+ x = check_convert_sample_array(x)
73
+ y = check_convert_tvalue_array(y)
74
74
  check_sample_tvalue_size(x, y)
75
75
 
76
76
  n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
@@ -98,7 +98,7 @@ module Rumale
98
98
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
99
99
  # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
100
100
  def predict(x)
101
- check_sample_array(x)
101
+ x = check_convert_sample_array(x)
102
102
  linear_term = @bias_term + x.dot(@weight_vec.transpose)
103
103
  factor_term = if @weight_vec.shape[1].nil?
104
104
  0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(0)
@@ -50,7 +50,7 @@ module Rumale
50
50
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the feature ranges.
51
51
  # @return [BinDiscretizer]
52
52
  def fit(x, _y = nil)
53
- check_sample_array(x)
53
+ x = check_convert_sample_array(x)
54
54
  n_features = x.shape[1]
55
55
  max_vals = x.max(0)
56
56
  min_vals = x.min(0)
@@ -67,7 +67,7 @@ module Rumale
67
67
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be discretized.
68
68
  # @return [Numo::DFloat] The discretized samples.
69
69
  def fit_transform(x, _y = nil)
70
- check_sample_array(x)
70
+ x = check_convert_sample_array(x)
71
71
  fit(x).transform(x)
72
72
  end
73
73
 
@@ -76,7 +76,7 @@ module Rumale
76
76
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be discretized.
77
77
  # @return [Numo::DFloat] The discretized samples.
78
78
  def transform(x)
79
- check_sample_array(x)
79
+ x = check_convert_sample_array(x)
80
80
  n_samples, n_features = x.shape
81
81
  transformed = Numo::DFloat.zeros(n_samples, n_features)
82
82
  n_features.times do |n|
@@ -32,7 +32,7 @@ module Rumale
32
32
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
33
33
  # @return [L2Normalizer]
34
34
  def fit(x, _y = nil)
35
- check_sample_array(x)
35
+ x = check_convert_sample_array(x)
36
36
  @norm_vec = Numo::NMath.sqrt((x**2).sum(1))
37
37
  self
38
38
  end
@@ -44,7 +44,7 @@ module Rumale
44
44
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
45
45
  # @return [Numo::DFloat] The normalized samples.
46
46
  def fit_transform(x, _y = nil)
47
- check_sample_array(x)
47
+ x = check_convert_sample_array(x)
48
48
  fit(x)
49
49
  x / @norm_vec.tile(x.shape[1], 1).transpose
50
50
  end
@@ -31,7 +31,7 @@ module Rumale
31
31
  # @param neg_label [Integer] The value represents negative label.
32
32
  # @param pos_label [Integer] The value represents positive label.
33
33
  def initialize(neg_label: 0, pos_label: 1)
34
- check_params_integer(neg_label: neg_label, pos_label: pos_label)
34
+ check_params_numeric(neg_label: neg_label, pos_label: pos_label)
35
35
  @params = {}
36
36
  @params[:neg_label] = neg_label
37
37
  @params[:pos_label] = pos_label
@@ -80,7 +80,7 @@ module Rumale
80
80
  # @param x [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels to be decoded.
81
81
  # @return [Array] (shape: [n_samples]) The decoded labels.
82
82
  def inverse_transform(x)
83
- check_params_type(Numo::Int32, x: x)
83
+ x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
84
84
  n_samples = x.shape[0]
85
85
  Array.new(n_samples) { |n| @classes[x[n, true].ne(@params[:neg_label]).where[0]] }
86
86
  end
@@ -71,7 +71,7 @@ module Rumale
71
71
  # @param x [Numo::Int32] (shape: [n_samples]) The labels to be decoded.
72
72
  # @return [Array] The decoded labels.
73
73
  def inverse_transform(x)
74
- check_label_array(x)
74
+ x = check_convert_label_array(x)
75
75
  x.to_a.map { |n| @classes[n] }
76
76
  end
77
77
 
@@ -32,7 +32,7 @@ module Rumale
32
32
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum absolute value for each feature.
33
33
  # @return [MaxAbsScaler]
34
34
  def fit(x, _y = nil)
35
- check_sample_array(x)
35
+ x = check_convert_sample_array(x)
36
36
  @max_abs_vec = x.abs.max(0)
37
37
  self
38
38
  end
@@ -44,7 +44,7 @@ module Rumale
44
44
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum absolute value for each feature.
45
45
  # @return [Numo::DFloat] The scaled samples.
46
46
  def fit_transform(x, _y = nil)
47
- check_sample_array(x)
47
+ x = check_convert_sample_array(x)
48
48
  fit(x).transform(x)
49
49
  end
50
50
 
@@ -53,7 +53,7 @@ module Rumale
53
53
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
54
54
  # @return [Numo::DFloat] The scaled samples.
55
55
  def transform(x)
56
- check_sample_array(x)
56
+ x = check_convert_sample_array(x)
57
57
  x / @max_abs_vec
58
58
  end
59
59
 
@@ -42,7 +42,7 @@ module Rumale
42
42
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
43
43
  # @return [MinMaxScaler]
44
44
  def fit(x, _y = nil)
45
- check_sample_array(x)
45
+ x = check_convert_sample_array(x)
46
46
  @min_vec = x.min(0)
47
47
  @max_vec = x.max(0)
48
48
  self
@@ -55,7 +55,7 @@ module Rumale
55
55
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
56
56
  # @return [Numo::DFloat] The scaled samples.
57
57
  def fit_transform(x, _y = nil)
58
- check_sample_array(x)
58
+ x = check_convert_sample_array(x)
59
59
  fit(x).transform(x)
60
60
  end
61
61
 
@@ -64,7 +64,7 @@ module Rumale
64
64
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
65
65
  # @return [Numo::DFloat] The scaled samples.
66
66
  def transform(x)
67
- check_sample_array(x)
67
+ x = check_convert_sample_array(x)
68
68
  n_samples, = x.shape
69
69
  dif_vec = @max_vec - @min_vec
70
70
  dif_vec[dif_vec.eq(0)] = 1.0
@@ -49,7 +49,7 @@ module Rumale
49
49
  # @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to fit one-hot-encoder.
50
50
  # @return [OneHotEncoder]
51
51
  def fit(x, _y = nil)
52
- check_params_type(Numo::Int32, x: x)
52
+ x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
53
53
  raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
54
54
  @n_values = x.max(0) + 1
55
55
  @feature_indices = Numo::Int32.hstack([[0], @n_values]).cumsum
@@ -64,7 +64,8 @@ module Rumale
64
64
  # @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to encode into one-hot-vectors.
65
65
  # @return [Numo::DFloat] The one-hot-vectors.
66
66
  def fit_transform(x, _y = nil)
67
- check_params_type(Numo::Int32, x: x)
67
+ x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
68
+ raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
68
69
  raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
69
70
  fit(x).transform(x)
70
71
  end
@@ -74,7 +75,7 @@ module Rumale
74
75
  # @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to encode into one-hot-vectors.
75
76
  # @return [Numo::DFloat] The one-hot-vectors.
76
77
  def transform(x)
77
- check_params_type(Numo::Int32, x: x)
78
+ x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
78
79
  raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
79
80
  codes = encode(x, @feature_indices)
80
81
  codes[true, @active_features].dup
@@ -91,7 +91,7 @@ module Rumale
91
91
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples consisting of values transformed from categorical features.
92
92
  # @return [Numo::NArray] The decoded features.
93
93
  def inverse_transform(x)
94
- check_sample_array(x)
94
+ x = check_convert_sample_array(x)
95
95
 
96
96
  n_features = x.shape[1]
97
97
  raise ArgumentError, 'Expect the number of features and the number of categories to be equal' if n_features != @categories.size
@@ -39,7 +39,7 @@ module Rumale
39
39
  # The samples to calculate the mean values and standard deviations.
40
40
  # @return [StandardScaler]
41
41
  def fit(x, _y = nil)
42
- check_sample_array(x)
42
+ x = check_convert_sample_array(x)
43
43
  @mean_vec = x.mean(0)
44
44
  @std_vec = x.stddev(0)
45
45
  self
@@ -53,7 +53,7 @@ module Rumale
53
53
  # The samples to calculate the mean values and standard deviations.
54
54
  # @return [Numo::DFloat] The scaled samples.
55
55
  def fit_transform(x, _y = nil)
56
- check_sample_array(x)
56
+ x = check_convert_sample_array(x)
57
57
  fit(x).transform(x)
58
58
  end
59
59
 
@@ -62,7 +62,7 @@ module Rumale
62
62
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
63
63
  # @return [Numo::DFloat] The scaled samples.
64
64
  def transform(x)
65
- check_sample_array(x)
65
+ x = check_convert_sample_array(x)
66
66
  n_samples, = x.shape
67
67
  (x - @mean_vec.tile(n_samples, 1)) / @std_vec.tile(n_samples, 1)
68
68
  end
@@ -43,7 +43,7 @@ module Rumale
43
43
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
44
44
  # @return [Numo::Int32] (shape: [n_samples]) Leaf index for sample.
45
45
  def apply(x)
46
- check_sample_array(x)
46
+ x = check_convert_sample_array(x)
47
47
  Numo::Int32[*(Array.new(x.shape[0]) { |n| apply_at_node(@tree, x[n, true]) })]
48
48
  end
49
49
 
@@ -53,9 +53,9 @@ module Rumale
53
53
  # It is used to randomly determine the order of features when deciding spliting point.
54
54
  def initialize(criterion: 'gini', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1, max_features: nil,
55
55
  random_seed: nil)
56
- check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
57
- max_features: max_features, random_seed: random_seed)
58
- check_params_integer(min_samples_leaf: min_samples_leaf)
56
+ check_params_numeric_or_nil(max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
57
+ max_features: max_features, random_seed: random_seed)
58
+ check_params_numeric(min_samples_leaf: min_samples_leaf)
59
59
  check_params_string(criterion: criterion)
60
60
  check_params_positive(max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
61
61
  min_samples_leaf: min_samples_leaf, max_features: max_features)
@@ -69,8 +69,8 @@ module Rumale
69
69
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
70
70
  # @return [DecisionTreeClassifier] The learned classifier itself.
71
71
  def fit(x, y)
72
- check_sample_array(x)
73
- check_label_array(y)
72
+ x = check_convert_sample_array(x)
73
+ y = check_convert_label_array(y)
74
74
  check_sample_label_size(x, y)
75
75
  n_samples, n_features = x.shape
76
76
  @params[:max_features] = n_features if @params[:max_features].nil?
@@ -91,7 +91,7 @@ module Rumale
91
91
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
92
92
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
93
93
  def predict(x)
94
- check_sample_array(x)
94
+ x = check_convert_sample_array(x)
95
95
  @leaf_labels[apply(x)].dup
96
96
  end
97
97
 
@@ -100,7 +100,7 @@ module Rumale
100
100
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
101
101
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
102
102
  def predict_proba(x)
103
- check_sample_array(x)
103
+ x = check_convert_sample_array(x)
104
104
  Numo::DFloat[*(Array.new(x.shape[0]) { |n| predict_proba_at_node(@tree, x[n, true]) })]
105
105
  end
106
106
 
@@ -49,9 +49,9 @@ module Rumale
49
49
  # It is used to randomly determine the order of features when deciding spliting point.
50
50
  def initialize(criterion: 'mse', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1, max_features: nil,
51
51
  random_seed: nil)
52
- check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
53
- max_features: max_features, random_seed: random_seed)
54
- check_params_integer(min_samples_leaf: min_samples_leaf)
52
+ check_params_numeric_or_nil(max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
53
+ max_features: max_features, random_seed: random_seed)
54
+ check_params_numeric(min_samples_leaf: min_samples_leaf)
55
55
  check_params_string(criterion: criterion)
56
56
  check_params_positive(max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
57
57
  min_samples_leaf: min_samples_leaf, max_features: max_features)
@@ -65,8 +65,8 @@ module Rumale
65
65
  # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The taget values to be used for fitting the model.
66
66
  # @return [DecisionTreeRegressor] The learned regressor itself.
67
67
  def fit(x, y)
68
- check_sample_array(x)
69
- check_tvalue_array(y)
68
+ x = check_convert_sample_array(x)
69
+ y = check_convert_tvalue_array(y)
70
70
  check_sample_tvalue_size(x, y)
71
71
  n_samples, n_features = x.shape
72
72
  @params[:max_features] = n_features if @params[:max_features].nil?
@@ -86,7 +86,7 @@ module Rumale
86
86
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
87
87
  # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
88
88
  def predict(x)
89
- check_sample_array(x)
89
+ x = check_convert_sample_array(x)
90
90
  @leaf_values.shape[1].nil? ? @leaf_values[apply(x)].dup : @leaf_values[apply(x), true].dup
91
91
  end
92
92
 
@@ -50,9 +50,9 @@ module Rumale
50
50
  # It is used to randomly determine the order of features when deciding spliting point.
51
51
  def initialize(criterion: 'gini', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1, max_features: nil,
52
52
  random_seed: nil)
53
- check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
54
- max_features: max_features, random_seed: random_seed)
55
- check_params_integer(min_samples_leaf: min_samples_leaf)
53
+ check_params_numeric_or_nil(max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
54
+ max_features: max_features, random_seed: random_seed)
55
+ check_params_numeric(min_samples_leaf: min_samples_leaf)
56
56
  check_params_string(criterion: criterion)
57
57
  check_params_positive(max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
58
58
  min_samples_leaf: min_samples_leaf, max_features: max_features)
@@ -65,8 +65,8 @@ module Rumale
65
65
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
66
66
  # @return [ExtraTreeClassifier] The learned classifier itself.
67
67
  def fit(x, y)
68
- check_sample_array(x)
69
- check_label_array(y)
68
+ x = check_convert_sample_array(x)
69
+ y = check_convert_label_array(y)
70
70
  check_sample_label_size(x, y)
71
71
  super
72
72
  end
@@ -76,7 +76,7 @@ module Rumale
76
76
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
77
77
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
78
78
  def predict(x)
79
- check_sample_array(x)
79
+ x = check_convert_sample_array(x)
80
80
  super
81
81
  end
82
82
 
@@ -85,7 +85,7 @@ module Rumale
85
85
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
86
86
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
87
87
  def predict_proba(x)
88
- check_sample_array(x)
88
+ x = check_convert_sample_array(x)
89
89
  super
90
90
  end
91
91