rumale 0.13.8 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +24 -0
  3. data/README.md +8 -10
  4. data/lib/rumale.rb +3 -0
  5. data/lib/rumale/base/classifier.rb +2 -2
  6. data/lib/rumale/base/cluster_analyzer.rb +2 -2
  7. data/lib/rumale/base/regressor.rb +2 -2
  8. data/lib/rumale/clustering/dbscan.rb +3 -4
  9. data/lib/rumale/clustering/gaussian_mixture.rb +5 -6
  10. data/lib/rumale/clustering/hdbscan.rb +4 -4
  11. data/lib/rumale/clustering/k_means.rb +5 -6
  12. data/lib/rumale/clustering/k_medoids.rb +5 -6
  13. data/lib/rumale/clustering/power_iteration.rb +4 -6
  14. data/lib/rumale/clustering/single_linkage.rb +3 -3
  15. data/lib/rumale/clustering/snn.rb +1 -1
  16. data/lib/rumale/clustering/spectral_clustering.rb +4 -6
  17. data/lib/rumale/dataset.rb +6 -10
  18. data/lib/rumale/decomposition/factor_analysis.rb +4 -4
  19. data/lib/rumale/decomposition/fast_ica.rb +6 -7
  20. data/lib/rumale/decomposition/nmf.rb +6 -7
  21. data/lib/rumale/decomposition/pca.rb +6 -7
  22. data/lib/rumale/ensemble/ada_boost_classifier.rb +8 -8
  23. data/lib/rumale/ensemble/ada_boost_regressor.rb +7 -7
  24. data/lib/rumale/ensemble/extra_trees_classifier.rb +8 -8
  25. data/lib/rumale/ensemble/extra_trees_regressor.rb +7 -7
  26. data/lib/rumale/ensemble/gradient_boosting_classifier.rb +8 -8
  27. data/lib/rumale/ensemble/gradient_boosting_regressor.rb +8 -8
  28. data/lib/rumale/ensemble/random_forest_classifier.rb +8 -8
  29. data/lib/rumale/ensemble/random_forest_regressor.rb +7 -7
  30. data/lib/rumale/evaluation_measure/accuracy.rb +2 -2
  31. data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +2 -2
  32. data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +2 -2
  33. data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +2 -2
  34. data/lib/rumale/evaluation_measure/explained_variance_score.rb +2 -2
  35. data/lib/rumale/evaluation_measure/f_score.rb +2 -2
  36. data/lib/rumale/evaluation_measure/log_loss.rb +2 -2
  37. data/lib/rumale/evaluation_measure/mean_absolute_error.rb +2 -2
  38. data/lib/rumale/evaluation_measure/mean_squared_error.rb +2 -2
  39. data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +2 -2
  40. data/lib/rumale/evaluation_measure/median_absolute_error.rb +2 -2
  41. data/lib/rumale/evaluation_measure/mutual_information.rb +2 -2
  42. data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +2 -2
  43. data/lib/rumale/evaluation_measure/precision.rb +2 -2
  44. data/lib/rumale/evaluation_measure/purity.rb +2 -2
  45. data/lib/rumale/evaluation_measure/r2_score.rb +2 -2
  46. data/lib/rumale/evaluation_measure/recall.rb +2 -2
  47. data/lib/rumale/evaluation_measure/roc_auc.rb +6 -3
  48. data/lib/rumale/evaluation_measure/silhouette_score.rb +2 -2
  49. data/lib/rumale/kernel_approximation/rbf.rb +5 -6
  50. data/lib/rumale/kernel_machine/kernel_pca.rb +4 -4
  51. data/lib/rumale/kernel_machine/kernel_ridge.rb +3 -3
  52. data/lib/rumale/kernel_machine/kernel_svc.rb +7 -8
  53. data/lib/rumale/linear_model/lasso.rb +5 -6
  54. data/lib/rumale/linear_model/linear_regression.rb +5 -6
  55. data/lib/rumale/linear_model/logistic_regression.rb +16 -15
  56. data/lib/rumale/linear_model/ridge.rb +5 -6
  57. data/lib/rumale/linear_model/svc.rb +34 -28
  58. data/lib/rumale/linear_model/svr.rb +5 -6
  59. data/lib/rumale/manifold/mds.rb +3 -4
  60. data/lib/rumale/manifold/tsne.rb +3 -5
  61. data/lib/rumale/model_selection/cross_validation.rb +6 -5
  62. data/lib/rumale/model_selection/grid_search_cv.rb +6 -6
  63. data/lib/rumale/model_selection/k_fold.rb +3 -3
  64. data/lib/rumale/model_selection/shuffle_split.rb +3 -5
  65. data/lib/rumale/model_selection/stratified_k_fold.rb +4 -4
  66. data/lib/rumale/model_selection/stratified_shuffle_split.rb +4 -6
  67. data/lib/rumale/multiclass/one_vs_rest_classifier.rb +4 -4
  68. data/lib/rumale/naive_bayes/naive_bayes.rb +14 -14
  69. data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +5 -5
  70. data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +4 -4
  71. data/lib/rumale/neural_network/base_mlp.rb +244 -0
  72. data/lib/rumale/neural_network/mlp_classifier.rb +119 -0
  73. data/lib/rumale/neural_network/mlp_regressor.rb +89 -0
  74. data/lib/rumale/optimizer/ada_grad.rb +1 -1
  75. data/lib/rumale/optimizer/adam.rb +3 -3
  76. data/lib/rumale/optimizer/nadam.rb +1 -1
  77. data/lib/rumale/optimizer/rmsprop.rb +1 -1
  78. data/lib/rumale/optimizer/sgd.rb +1 -1
  79. data/lib/rumale/optimizer/yellow_fin.rb +1 -2
  80. data/lib/rumale/pairwise_metric.rb +17 -19
  81. data/lib/rumale/pipeline/pipeline.rb +10 -10
  82. data/lib/rumale/polynomial_model/factorization_machine_classifier.rb +29 -21
  83. data/lib/rumale/polynomial_model/factorization_machine_regressor.rb +6 -6
  84. data/lib/rumale/preprocessing/bin_discretizer.rb +3 -3
  85. data/lib/rumale/preprocessing/l2_normalizer.rb +2 -2
  86. data/lib/rumale/preprocessing/label_binarizer.rb +2 -2
  87. data/lib/rumale/preprocessing/label_encoder.rb +1 -1
  88. data/lib/rumale/preprocessing/max_abs_scaler.rb +3 -3
  89. data/lib/rumale/preprocessing/min_max_scaler.rb +3 -3
  90. data/lib/rumale/preprocessing/one_hot_encoder.rb +4 -3
  91. data/lib/rumale/preprocessing/ordinal_encoder.rb +1 -1
  92. data/lib/rumale/preprocessing/standard_scaler.rb +3 -3
  93. data/lib/rumale/tree/base_decision_tree.rb +1 -1
  94. data/lib/rumale/tree/decision_tree_classifier.rb +7 -7
  95. data/lib/rumale/tree/decision_tree_regressor.rb +6 -6
  96. data/lib/rumale/tree/extra_tree_classifier.rb +7 -7
  97. data/lib/rumale/tree/extra_tree_regressor.rb +6 -6
  98. data/lib/rumale/tree/gradient_tree_regressor.rb +9 -9
  99. data/lib/rumale/validation.rb +32 -2
  100. data/lib/rumale/version.rb +1 -1
  101. data/rumale.gemspec +7 -7
  102. metadata +11 -7
@@ -61,10 +61,10 @@ module Rumale
61
61
  # @param random_seed [Integer] The seed value using to initialize the random generator.
62
62
  def initialize(n_factors: 2, loss: 'hinge', reg_param_linear: 1.0, reg_param_factor: 1.0,
63
63
  max_iter: 1000, batch_size: 10, optimizer: nil, n_jobs: nil, random_seed: nil)
64
- check_params_float(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor)
65
- check_params_integer(n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
64
+ check_params_numeric(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
65
+ n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
66
66
  check_params_string(loss: loss)
67
- check_params_type_or_nil(Integer, n_jobs: n_jobs, random_seed: random_seed)
67
+ check_params_numeric_or_nil(n_jobs: n_jobs, random_seed: random_seed)
68
68
  check_params_positive(n_factors: n_factors,
69
69
  reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
70
70
  max_iter: max_iter, batch_size: batch_size)
@@ -78,15 +78,15 @@ module Rumale
78
78
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
79
79
  # @return [FactorizationMachineClassifier] The learned classifier itself.
80
80
  def fit(x, y)
81
- check_sample_array(x)
82
- check_label_array(y)
81
+ x = check_convert_sample_array(x)
82
+ y = check_convert_label_array(y)
83
83
  check_sample_label_size(x, y)
84
84
 
85
85
  @classes = Numo::Int32[*y.to_a.uniq.sort]
86
- n_classes = @classes.size
87
- _n_samples, n_features = x.shape
88
86
 
89
- if n_classes > 2
87
+ if multiclass_problem?
88
+ n_classes = @classes.size
89
+ n_features = x.shape[1]
90
90
  @factor_mat = Numo::DFloat.zeros(n_classes, @params[:n_factors], n_features)
91
91
  @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
92
92
  @bias_term = Numo::DFloat.zeros(n_classes)
@@ -105,7 +105,7 @@ module Rumale
105
105
  end
106
106
  end
107
107
  else
108
- negative_label = y.to_a.uniq.min
108
+ negative_label = @classes[0]
109
109
  bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
110
110
  @factor_mat, @weight_vec, @bias_term = partial_fit(x, bin_y)
111
111
  end
@@ -118,12 +118,12 @@ module Rumale
118
118
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
119
119
  # @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
120
120
  def decision_function(x)
121
- check_sample_array(x)
121
+ x = check_convert_sample_array(x)
122
122
  linear_term = @bias_term + x.dot(@weight_vec.transpose)
123
- factor_term = if @classes.size <= 2
124
- 0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(0)
125
- else
123
+ factor_term = if multiclass_problem?
126
124
  0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(1).transpose
125
+ else
126
+ 0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(0)
127
127
  end
128
128
  linear_term + factor_term
129
129
  end
@@ -133,15 +133,19 @@ module Rumale
133
133
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
134
134
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
135
135
  def predict(x)
136
- check_sample_array(x)
137
- return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
136
+ x = check_convert_sample_array(x)
138
137
 
139
138
  n_samples = x.shape[0]
140
- decision_values = decision_function(x)
141
- predicted = if enable_parallel?
142
- parallel_map(n_samples) { |n| @classes[decision_values[n, true].max_index] }
139
+ predicted = if multiclass_problem?
140
+ decision_values = decision_function(x)
141
+ if enable_parallel?
142
+ parallel_map(n_samples) { |n| @classes[decision_values[n, true].max_index] }
143
+ else
144
+ Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] }
145
+ end
143
146
  else
144
- Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] }
147
+ decision_values = decision_function(x).ge(0.0).to_a
148
+ Array.new(n_samples) { |n| @classes[decision_values[n]] }
145
149
  end
146
150
  Numo::Int32.asarray(predicted)
147
151
  end
@@ -151,9 +155,9 @@ module Rumale
151
155
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
152
156
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
153
157
  def predict_proba(x)
154
- check_sample_array(x)
158
+ x = check_convert_sample_array(x)
155
159
  proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
156
- return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
160
+ return (proba.transpose / proba.sum(axis: 1)).transpose.dup if multiclass_problem?
157
161
 
158
162
  n_samples, = x.shape
159
163
  probs = Numo::DFloat.zeros(n_samples, 2)
@@ -211,6 +215,10 @@ module Rumale
211
215
  logistic_loss_gradient(x, ex_x, y, factor, weight)
212
216
  end
213
217
  end
218
+
219
+ def multiclass_problem?
220
+ @classes.size > 2
221
+ end
214
222
  end
215
223
  end
216
224
  end
@@ -54,9 +54,9 @@ module Rumale
54
54
  # @param random_seed [Integer] The seed value using to initialize the random generator.
55
55
  def initialize(n_factors: 2, reg_param_linear: 1.0, reg_param_factor: 1.0,
56
56
  max_iter: 1000, batch_size: 10, optimizer: nil, n_jobs: nil, random_seed: nil)
57
- check_params_float(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor)
58
- check_params_integer(n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
59
- check_params_type_or_nil(Integer, n_jobs: n_jobs, random_seed: random_seed)
57
+ check_params_numeric(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
58
+ n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
59
+ check_params_numeric_or_nil(n_jobs: n_jobs, random_seed: random_seed)
60
60
  check_params_positive(n_factors: n_factors, reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
61
61
  max_iter: max_iter, batch_size: batch_size)
62
62
  keywd_args = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h.merge(loss: nil)
@@ -69,8 +69,8 @@ module Rumale
69
69
  # @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
70
70
  # @return [FactorizationMachineRegressor] The learned regressor itself.
71
71
  def fit(x, y)
72
- check_sample_array(x)
73
- check_tvalue_array(y)
72
+ x = check_convert_sample_array(x)
73
+ y = check_convert_tvalue_array(y)
74
74
  check_sample_tvalue_size(x, y)
75
75
 
76
76
  n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
@@ -98,7 +98,7 @@ module Rumale
98
98
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
99
99
  # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
100
100
  def predict(x)
101
- check_sample_array(x)
101
+ x = check_convert_sample_array(x)
102
102
  linear_term = @bias_term + x.dot(@weight_vec.transpose)
103
103
  factor_term = if @weight_vec.shape[1].nil?
104
104
  0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(0)
@@ -50,7 +50,7 @@ module Rumale
50
50
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the feature ranges.
51
51
  # @return [BinDiscretizer]
52
52
  def fit(x, _y = nil)
53
- check_sample_array(x)
53
+ x = check_convert_sample_array(x)
54
54
  n_features = x.shape[1]
55
55
  max_vals = x.max(0)
56
56
  min_vals = x.min(0)
@@ -67,7 +67,7 @@ module Rumale
67
67
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be discretized.
68
68
  # @return [Numo::DFloat] The discretized samples.
69
69
  def fit_transform(x, _y = nil)
70
- check_sample_array(x)
70
+ x = check_convert_sample_array(x)
71
71
  fit(x).transform(x)
72
72
  end
73
73
 
@@ -76,7 +76,7 @@ module Rumale
76
76
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be discretized.
77
77
  # @return [Numo::DFloat] The discretized samples.
78
78
  def transform(x)
79
- check_sample_array(x)
79
+ x = check_convert_sample_array(x)
80
80
  n_samples, n_features = x.shape
81
81
  transformed = Numo::DFloat.zeros(n_samples, n_features)
82
82
  n_features.times do |n|
@@ -32,7 +32,7 @@ module Rumale
32
32
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
33
33
  # @return [L2Normalizer]
34
34
  def fit(x, _y = nil)
35
- check_sample_array(x)
35
+ x = check_convert_sample_array(x)
36
36
  @norm_vec = Numo::NMath.sqrt((x**2).sum(1))
37
37
  self
38
38
  end
@@ -44,7 +44,7 @@ module Rumale
44
44
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
45
45
  # @return [Numo::DFloat] The normalized samples.
46
46
  def fit_transform(x, _y = nil)
47
- check_sample_array(x)
47
+ x = check_convert_sample_array(x)
48
48
  fit(x)
49
49
  x / @norm_vec.tile(x.shape[1], 1).transpose
50
50
  end
@@ -31,7 +31,7 @@ module Rumale
31
31
  # @param neg_label [Integer] The value represents negative label.
32
32
  # @param pos_label [Integer] The value represents positive label.
33
33
  def initialize(neg_label: 0, pos_label: 1)
34
- check_params_integer(neg_label: neg_label, pos_label: pos_label)
34
+ check_params_numeric(neg_label: neg_label, pos_label: pos_label)
35
35
  @params = {}
36
36
  @params[:neg_label] = neg_label
37
37
  @params[:pos_label] = pos_label
@@ -80,7 +80,7 @@ module Rumale
80
80
  # @param x [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels to be decoded.
81
81
  # @return [Array] (shape: [n_samples]) The decoded labels.
82
82
  def inverse_transform(x)
83
- check_params_type(Numo::Int32, x: x)
83
+ x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
84
84
  n_samples = x.shape[0]
85
85
  Array.new(n_samples) { |n| @classes[x[n, true].ne(@params[:neg_label]).where[0]] }
86
86
  end
@@ -71,7 +71,7 @@ module Rumale
71
71
  # @param x [Numo::Int32] (shape: [n_samples]) The labels to be decoded.
72
72
  # @return [Array] The decoded labels.
73
73
  def inverse_transform(x)
74
- check_label_array(x)
74
+ x = check_convert_label_array(x)
75
75
  x.to_a.map { |n| @classes[n] }
76
76
  end
77
77
 
@@ -32,7 +32,7 @@ module Rumale
32
32
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum absolute value for each feature.
33
33
  # @return [MaxAbsScaler]
34
34
  def fit(x, _y = nil)
35
- check_sample_array(x)
35
+ x = check_convert_sample_array(x)
36
36
  @max_abs_vec = x.abs.max(0)
37
37
  self
38
38
  end
@@ -44,7 +44,7 @@ module Rumale
44
44
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum absolute value for each feature.
45
45
  # @return [Numo::DFloat] The scaled samples.
46
46
  def fit_transform(x, _y = nil)
47
- check_sample_array(x)
47
+ x = check_convert_sample_array(x)
48
48
  fit(x).transform(x)
49
49
  end
50
50
 
@@ -53,7 +53,7 @@ module Rumale
53
53
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
54
54
  # @return [Numo::DFloat] The scaled samples.
55
55
  def transform(x)
56
- check_sample_array(x)
56
+ x = check_convert_sample_array(x)
57
57
  x / @max_abs_vec
58
58
  end
59
59
 
@@ -42,7 +42,7 @@ module Rumale
42
42
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
43
43
  # @return [MinMaxScaler]
44
44
  def fit(x, _y = nil)
45
- check_sample_array(x)
45
+ x = check_convert_sample_array(x)
46
46
  @min_vec = x.min(0)
47
47
  @max_vec = x.max(0)
48
48
  self
@@ -55,7 +55,7 @@ module Rumale
55
55
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
56
56
  # @return [Numo::DFloat] The scaled samples.
57
57
  def fit_transform(x, _y = nil)
58
- check_sample_array(x)
58
+ x = check_convert_sample_array(x)
59
59
  fit(x).transform(x)
60
60
  end
61
61
 
@@ -64,7 +64,7 @@ module Rumale
64
64
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
65
65
  # @return [Numo::DFloat] The scaled samples.
66
66
  def transform(x)
67
- check_sample_array(x)
67
+ x = check_convert_sample_array(x)
68
68
  n_samples, = x.shape
69
69
  dif_vec = @max_vec - @min_vec
70
70
  dif_vec[dif_vec.eq(0)] = 1.0
@@ -49,7 +49,7 @@ module Rumale
49
49
  # @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to fit one-hot-encoder.
50
50
  # @return [OneHotEncoder]
51
51
  def fit(x, _y = nil)
52
- check_params_type(Numo::Int32, x: x)
52
+ x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
53
53
  raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
54
54
  @n_values = x.max(0) + 1
55
55
  @feature_indices = Numo::Int32.hstack([[0], @n_values]).cumsum
@@ -64,7 +64,8 @@ module Rumale
64
64
  # @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to encode into one-hot-vectors.
65
65
  # @return [Numo::DFloat] The one-hot-vectors.
66
66
  def fit_transform(x, _y = nil)
67
- check_params_type(Numo::Int32, x: x)
67
+ x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
68
+ raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
68
69
  raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
69
70
  fit(x).transform(x)
70
71
  end
@@ -74,7 +75,7 @@ module Rumale
74
75
  # @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to encode into one-hot-vectors.
75
76
  # @return [Numo::DFloat] The one-hot-vectors.
76
77
  def transform(x)
77
- check_params_type(Numo::Int32, x: x)
78
+ x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
78
79
  raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
79
80
  codes = encode(x, @feature_indices)
80
81
  codes[true, @active_features].dup
@@ -91,7 +91,7 @@ module Rumale
91
91
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples consisting of values transformed from categorical features.
92
92
  # @return [Numo::NArray] The decoded features.
93
93
  def inverse_transform(x)
94
- check_sample_array(x)
94
+ x = check_convert_sample_array(x)
95
95
 
96
96
  n_features = x.shape[1]
97
97
  raise ArgumentError, 'Expect the number of features and the number of categories to be equal' if n_features != @categories.size
@@ -39,7 +39,7 @@ module Rumale
39
39
  # The samples to calculate the mean values and standard deviations.
40
40
  # @return [StandardScaler]
41
41
  def fit(x, _y = nil)
42
- check_sample_array(x)
42
+ x = check_convert_sample_array(x)
43
43
  @mean_vec = x.mean(0)
44
44
  @std_vec = x.stddev(0)
45
45
  self
@@ -53,7 +53,7 @@ module Rumale
53
53
  # The samples to calculate the mean values and standard deviations.
54
54
  # @return [Numo::DFloat] The scaled samples.
55
55
  def fit_transform(x, _y = nil)
56
- check_sample_array(x)
56
+ x = check_convert_sample_array(x)
57
57
  fit(x).transform(x)
58
58
  end
59
59
 
@@ -62,7 +62,7 @@ module Rumale
62
62
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
63
63
  # @return [Numo::DFloat] The scaled samples.
64
64
  def transform(x)
65
- check_sample_array(x)
65
+ x = check_convert_sample_array(x)
66
66
  n_samples, = x.shape
67
67
  (x - @mean_vec.tile(n_samples, 1)) / @std_vec.tile(n_samples, 1)
68
68
  end
@@ -43,7 +43,7 @@ module Rumale
43
43
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
44
44
  # @return [Numo::Int32] (shape: [n_samples]) Leaf index for sample.
45
45
  def apply(x)
46
- check_sample_array(x)
46
+ x = check_convert_sample_array(x)
47
47
  Numo::Int32[*(Array.new(x.shape[0]) { |n| apply_at_node(@tree, x[n, true]) })]
48
48
  end
49
49
 
@@ -53,9 +53,9 @@ module Rumale
53
53
  # It is used to randomly determine the order of features when deciding spliting point.
54
54
  def initialize(criterion: 'gini', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1, max_features: nil,
55
55
  random_seed: nil)
56
- check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
57
- max_features: max_features, random_seed: random_seed)
58
- check_params_integer(min_samples_leaf: min_samples_leaf)
56
+ check_params_numeric_or_nil(max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
57
+ max_features: max_features, random_seed: random_seed)
58
+ check_params_numeric(min_samples_leaf: min_samples_leaf)
59
59
  check_params_string(criterion: criterion)
60
60
  check_params_positive(max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
61
61
  min_samples_leaf: min_samples_leaf, max_features: max_features)
@@ -69,8 +69,8 @@ module Rumale
69
69
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
70
70
  # @return [DecisionTreeClassifier] The learned classifier itself.
71
71
  def fit(x, y)
72
- check_sample_array(x)
73
- check_label_array(y)
72
+ x = check_convert_sample_array(x)
73
+ y = check_convert_label_array(y)
74
74
  check_sample_label_size(x, y)
75
75
  n_samples, n_features = x.shape
76
76
  @params[:max_features] = n_features if @params[:max_features].nil?
@@ -91,7 +91,7 @@ module Rumale
91
91
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
92
92
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
93
93
  def predict(x)
94
- check_sample_array(x)
94
+ x = check_convert_sample_array(x)
95
95
  @leaf_labels[apply(x)].dup
96
96
  end
97
97
 
@@ -100,7 +100,7 @@ module Rumale
100
100
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
101
101
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
102
102
  def predict_proba(x)
103
- check_sample_array(x)
103
+ x = check_convert_sample_array(x)
104
104
  Numo::DFloat[*(Array.new(x.shape[0]) { |n| predict_proba_at_node(@tree, x[n, true]) })]
105
105
  end
106
106
 
@@ -49,9 +49,9 @@ module Rumale
49
49
  # It is used to randomly determine the order of features when deciding spliting point.
50
50
  def initialize(criterion: 'mse', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1, max_features: nil,
51
51
  random_seed: nil)
52
- check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
53
- max_features: max_features, random_seed: random_seed)
54
- check_params_integer(min_samples_leaf: min_samples_leaf)
52
+ check_params_numeric_or_nil(max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
53
+ max_features: max_features, random_seed: random_seed)
54
+ check_params_numeric(min_samples_leaf: min_samples_leaf)
55
55
  check_params_string(criterion: criterion)
56
56
  check_params_positive(max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
57
57
  min_samples_leaf: min_samples_leaf, max_features: max_features)
@@ -65,8 +65,8 @@ module Rumale
65
65
  # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The taget values to be used for fitting the model.
66
66
  # @return [DecisionTreeRegressor] The learned regressor itself.
67
67
  def fit(x, y)
68
- check_sample_array(x)
69
- check_tvalue_array(y)
68
+ x = check_convert_sample_array(x)
69
+ y = check_convert_tvalue_array(y)
70
70
  check_sample_tvalue_size(x, y)
71
71
  n_samples, n_features = x.shape
72
72
  @params[:max_features] = n_features if @params[:max_features].nil?
@@ -86,7 +86,7 @@ module Rumale
86
86
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
87
87
  # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
88
88
  def predict(x)
89
- check_sample_array(x)
89
+ x = check_convert_sample_array(x)
90
90
  @leaf_values.shape[1].nil? ? @leaf_values[apply(x)].dup : @leaf_values[apply(x), true].dup
91
91
  end
92
92
 
@@ -50,9 +50,9 @@ module Rumale
50
50
  # It is used to randomly determine the order of features when deciding spliting point.
51
51
  def initialize(criterion: 'gini', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1, max_features: nil,
52
52
  random_seed: nil)
53
- check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
54
- max_features: max_features, random_seed: random_seed)
55
- check_params_integer(min_samples_leaf: min_samples_leaf)
53
+ check_params_numeric_or_nil(max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
54
+ max_features: max_features, random_seed: random_seed)
55
+ check_params_numeric(min_samples_leaf: min_samples_leaf)
56
56
  check_params_string(criterion: criterion)
57
57
  check_params_positive(max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
58
58
  min_samples_leaf: min_samples_leaf, max_features: max_features)
@@ -65,8 +65,8 @@ module Rumale
65
65
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
66
66
  # @return [ExtraTreeClassifier] The learned classifier itself.
67
67
  def fit(x, y)
68
- check_sample_array(x)
69
- check_label_array(y)
68
+ x = check_convert_sample_array(x)
69
+ y = check_convert_label_array(y)
70
70
  check_sample_label_size(x, y)
71
71
  super
72
72
  end
@@ -76,7 +76,7 @@ module Rumale
76
76
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
77
77
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
78
78
  def predict(x)
79
- check_sample_array(x)
79
+ x = check_convert_sample_array(x)
80
80
  super
81
81
  end
82
82
 
@@ -85,7 +85,7 @@ module Rumale
85
85
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
86
86
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
87
87
  def predict_proba(x)
88
- check_sample_array(x)
88
+ x = check_convert_sample_array(x)
89
89
  super
90
90
  end
91
91