rumale 0.13.8 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/README.md +8 -10
- data/lib/rumale.rb +3 -0
- data/lib/rumale/base/classifier.rb +2 -2
- data/lib/rumale/base/cluster_analyzer.rb +2 -2
- data/lib/rumale/base/regressor.rb +2 -2
- data/lib/rumale/clustering/dbscan.rb +3 -4
- data/lib/rumale/clustering/gaussian_mixture.rb +5 -6
- data/lib/rumale/clustering/hdbscan.rb +4 -4
- data/lib/rumale/clustering/k_means.rb +5 -6
- data/lib/rumale/clustering/k_medoids.rb +5 -6
- data/lib/rumale/clustering/power_iteration.rb +4 -6
- data/lib/rumale/clustering/single_linkage.rb +3 -3
- data/lib/rumale/clustering/snn.rb +1 -1
- data/lib/rumale/clustering/spectral_clustering.rb +4 -6
- data/lib/rumale/dataset.rb +6 -10
- data/lib/rumale/decomposition/factor_analysis.rb +4 -4
- data/lib/rumale/decomposition/fast_ica.rb +6 -7
- data/lib/rumale/decomposition/nmf.rb +6 -7
- data/lib/rumale/decomposition/pca.rb +6 -7
- data/lib/rumale/ensemble/ada_boost_classifier.rb +8 -8
- data/lib/rumale/ensemble/ada_boost_regressor.rb +7 -7
- data/lib/rumale/ensemble/extra_trees_classifier.rb +8 -8
- data/lib/rumale/ensemble/extra_trees_regressor.rb +7 -7
- data/lib/rumale/ensemble/gradient_boosting_classifier.rb +8 -8
- data/lib/rumale/ensemble/gradient_boosting_regressor.rb +8 -8
- data/lib/rumale/ensemble/random_forest_classifier.rb +8 -8
- data/lib/rumale/ensemble/random_forest_regressor.rb +7 -7
- data/lib/rumale/evaluation_measure/accuracy.rb +2 -2
- data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +2 -2
- data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +2 -2
- data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +2 -2
- data/lib/rumale/evaluation_measure/explained_variance_score.rb +2 -2
- data/lib/rumale/evaluation_measure/f_score.rb +2 -2
- data/lib/rumale/evaluation_measure/log_loss.rb +2 -2
- data/lib/rumale/evaluation_measure/mean_absolute_error.rb +2 -2
- data/lib/rumale/evaluation_measure/mean_squared_error.rb +2 -2
- data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +2 -2
- data/lib/rumale/evaluation_measure/median_absolute_error.rb +2 -2
- data/lib/rumale/evaluation_measure/mutual_information.rb +2 -2
- data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +2 -2
- data/lib/rumale/evaluation_measure/precision.rb +2 -2
- data/lib/rumale/evaluation_measure/purity.rb +2 -2
- data/lib/rumale/evaluation_measure/r2_score.rb +2 -2
- data/lib/rumale/evaluation_measure/recall.rb +2 -2
- data/lib/rumale/evaluation_measure/roc_auc.rb +6 -3
- data/lib/rumale/evaluation_measure/silhouette_score.rb +2 -2
- data/lib/rumale/kernel_approximation/rbf.rb +5 -6
- data/lib/rumale/kernel_machine/kernel_pca.rb +4 -4
- data/lib/rumale/kernel_machine/kernel_ridge.rb +3 -3
- data/lib/rumale/kernel_machine/kernel_svc.rb +7 -8
- data/lib/rumale/linear_model/lasso.rb +5 -6
- data/lib/rumale/linear_model/linear_regression.rb +5 -6
- data/lib/rumale/linear_model/logistic_regression.rb +16 -15
- data/lib/rumale/linear_model/ridge.rb +5 -6
- data/lib/rumale/linear_model/svc.rb +34 -28
- data/lib/rumale/linear_model/svr.rb +5 -6
- data/lib/rumale/manifold/mds.rb +3 -4
- data/lib/rumale/manifold/tsne.rb +3 -5
- data/lib/rumale/model_selection/cross_validation.rb +6 -5
- data/lib/rumale/model_selection/grid_search_cv.rb +6 -6
- data/lib/rumale/model_selection/k_fold.rb +3 -3
- data/lib/rumale/model_selection/shuffle_split.rb +3 -5
- data/lib/rumale/model_selection/stratified_k_fold.rb +4 -4
- data/lib/rumale/model_selection/stratified_shuffle_split.rb +4 -6
- data/lib/rumale/multiclass/one_vs_rest_classifier.rb +4 -4
- data/lib/rumale/naive_bayes/naive_bayes.rb +14 -14
- data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +5 -5
- data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +4 -4
- data/lib/rumale/neural_network/base_mlp.rb +244 -0
- data/lib/rumale/neural_network/mlp_classifier.rb +119 -0
- data/lib/rumale/neural_network/mlp_regressor.rb +89 -0
- data/lib/rumale/optimizer/ada_grad.rb +1 -1
- data/lib/rumale/optimizer/adam.rb +3 -3
- data/lib/rumale/optimizer/nadam.rb +1 -1
- data/lib/rumale/optimizer/rmsprop.rb +1 -1
- data/lib/rumale/optimizer/sgd.rb +1 -1
- data/lib/rumale/optimizer/yellow_fin.rb +1 -2
- data/lib/rumale/pairwise_metric.rb +17 -19
- data/lib/rumale/pipeline/pipeline.rb +10 -10
- data/lib/rumale/polynomial_model/factorization_machine_classifier.rb +29 -21
- data/lib/rumale/polynomial_model/factorization_machine_regressor.rb +6 -6
- data/lib/rumale/preprocessing/bin_discretizer.rb +3 -3
- data/lib/rumale/preprocessing/l2_normalizer.rb +2 -2
- data/lib/rumale/preprocessing/label_binarizer.rb +2 -2
- data/lib/rumale/preprocessing/label_encoder.rb +1 -1
- data/lib/rumale/preprocessing/max_abs_scaler.rb +3 -3
- data/lib/rumale/preprocessing/min_max_scaler.rb +3 -3
- data/lib/rumale/preprocessing/one_hot_encoder.rb +4 -3
- data/lib/rumale/preprocessing/ordinal_encoder.rb +1 -1
- data/lib/rumale/preprocessing/standard_scaler.rb +3 -3
- data/lib/rumale/tree/base_decision_tree.rb +1 -1
- data/lib/rumale/tree/decision_tree_classifier.rb +7 -7
- data/lib/rumale/tree/decision_tree_regressor.rb +6 -6
- data/lib/rumale/tree/extra_tree_classifier.rb +7 -7
- data/lib/rumale/tree/extra_tree_regressor.rb +6 -6
- data/lib/rumale/tree/gradient_tree_regressor.rb +9 -9
- data/lib/rumale/validation.rb +32 -2
- data/lib/rumale/version.rb +1 -1
- data/rumale.gemspec +7 -7
- metadata +11 -7
@@ -61,10 +61,10 @@ module Rumale
|
|
61
61
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
62
62
|
def initialize(n_factors: 2, loss: 'hinge', reg_param_linear: 1.0, reg_param_factor: 1.0,
|
63
63
|
max_iter: 1000, batch_size: 10, optimizer: nil, n_jobs: nil, random_seed: nil)
|
64
|
-
|
65
|
-
|
64
|
+
check_params_numeric(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
|
65
|
+
n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
|
66
66
|
check_params_string(loss: loss)
|
67
|
-
|
67
|
+
check_params_numeric_or_nil(n_jobs: n_jobs, random_seed: random_seed)
|
68
68
|
check_params_positive(n_factors: n_factors,
|
69
69
|
reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
|
70
70
|
max_iter: max_iter, batch_size: batch_size)
|
@@ -78,15 +78,15 @@ module Rumale
|
|
78
78
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
79
79
|
# @return [FactorizationMachineClassifier] The learned classifier itself.
|
80
80
|
def fit(x, y)
|
81
|
-
|
82
|
-
|
81
|
+
x = check_convert_sample_array(x)
|
82
|
+
y = check_convert_label_array(y)
|
83
83
|
check_sample_label_size(x, y)
|
84
84
|
|
85
85
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
86
|
-
n_classes = @classes.size
|
87
|
-
_n_samples, n_features = x.shape
|
88
86
|
|
89
|
-
if
|
87
|
+
if multiclass_problem?
|
88
|
+
n_classes = @classes.size
|
89
|
+
n_features = x.shape[1]
|
90
90
|
@factor_mat = Numo::DFloat.zeros(n_classes, @params[:n_factors], n_features)
|
91
91
|
@weight_vec = Numo::DFloat.zeros(n_classes, n_features)
|
92
92
|
@bias_term = Numo::DFloat.zeros(n_classes)
|
@@ -105,7 +105,7 @@ module Rumale
|
|
105
105
|
end
|
106
106
|
end
|
107
107
|
else
|
108
|
-
negative_label =
|
108
|
+
negative_label = @classes[0]
|
109
109
|
bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
|
110
110
|
@factor_mat, @weight_vec, @bias_term = partial_fit(x, bin_y)
|
111
111
|
end
|
@@ -118,12 +118,12 @@ module Rumale
|
|
118
118
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
119
119
|
# @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
|
120
120
|
def decision_function(x)
|
121
|
-
|
121
|
+
x = check_convert_sample_array(x)
|
122
122
|
linear_term = @bias_term + x.dot(@weight_vec.transpose)
|
123
|
-
factor_term = if
|
124
|
-
0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(0)
|
125
|
-
else
|
123
|
+
factor_term = if multiclass_problem?
|
126
124
|
0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(1).transpose
|
125
|
+
else
|
126
|
+
0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(0)
|
127
127
|
end
|
128
128
|
linear_term + factor_term
|
129
129
|
end
|
@@ -133,15 +133,19 @@ module Rumale
|
|
133
133
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
134
134
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
135
135
|
def predict(x)
|
136
|
-
|
137
|
-
return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
|
136
|
+
x = check_convert_sample_array(x)
|
138
137
|
|
139
138
|
n_samples = x.shape[0]
|
140
|
-
|
141
|
-
|
142
|
-
|
139
|
+
predicted = if multiclass_problem?
|
140
|
+
decision_values = decision_function(x)
|
141
|
+
if enable_parallel?
|
142
|
+
parallel_map(n_samples) { |n| @classes[decision_values[n, true].max_index] }
|
143
|
+
else
|
144
|
+
Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] }
|
145
|
+
end
|
143
146
|
else
|
144
|
-
|
147
|
+
decision_values = decision_function(x).ge(0.0).to_a
|
148
|
+
Array.new(n_samples) { |n| @classes[decision_values[n]] }
|
145
149
|
end
|
146
150
|
Numo::Int32.asarray(predicted)
|
147
151
|
end
|
@@ -151,9 +155,9 @@ module Rumale
|
|
151
155
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
152
156
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
153
157
|
def predict_proba(x)
|
154
|
-
|
158
|
+
x = check_convert_sample_array(x)
|
155
159
|
proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
|
156
|
-
return (proba.transpose / proba.sum(axis: 1)).transpose if
|
160
|
+
return (proba.transpose / proba.sum(axis: 1)).transpose.dup if multiclass_problem?
|
157
161
|
|
158
162
|
n_samples, = x.shape
|
159
163
|
probs = Numo::DFloat.zeros(n_samples, 2)
|
@@ -211,6 +215,10 @@ module Rumale
|
|
211
215
|
logistic_loss_gradient(x, ex_x, y, factor, weight)
|
212
216
|
end
|
213
217
|
end
|
218
|
+
|
219
|
+
def multiclass_problem?
|
220
|
+
@classes.size > 2
|
221
|
+
end
|
214
222
|
end
|
215
223
|
end
|
216
224
|
end
|
@@ -54,9 +54,9 @@ module Rumale
|
|
54
54
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
55
55
|
def initialize(n_factors: 2, reg_param_linear: 1.0, reg_param_factor: 1.0,
|
56
56
|
max_iter: 1000, batch_size: 10, optimizer: nil, n_jobs: nil, random_seed: nil)
|
57
|
-
|
58
|
-
|
59
|
-
|
57
|
+
check_params_numeric(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
|
58
|
+
n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
|
59
|
+
check_params_numeric_or_nil(n_jobs: n_jobs, random_seed: random_seed)
|
60
60
|
check_params_positive(n_factors: n_factors, reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
|
61
61
|
max_iter: max_iter, batch_size: batch_size)
|
62
62
|
keywd_args = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h.merge(loss: nil)
|
@@ -69,8 +69,8 @@ module Rumale
|
|
69
69
|
# @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
70
70
|
# @return [FactorizationMachineRegressor] The learned regressor itself.
|
71
71
|
def fit(x, y)
|
72
|
-
|
73
|
-
|
72
|
+
x = check_convert_sample_array(x)
|
73
|
+
y = check_convert_tvalue_array(y)
|
74
74
|
check_sample_tvalue_size(x, y)
|
75
75
|
|
76
76
|
n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
|
@@ -98,7 +98,7 @@ module Rumale
|
|
98
98
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
99
99
|
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
|
100
100
|
def predict(x)
|
101
|
-
|
101
|
+
x = check_convert_sample_array(x)
|
102
102
|
linear_term = @bias_term + x.dot(@weight_vec.transpose)
|
103
103
|
factor_term = if @weight_vec.shape[1].nil?
|
104
104
|
0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(0)
|
@@ -50,7 +50,7 @@ module Rumale
|
|
50
50
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the feature ranges.
|
51
51
|
# @return [BinDiscretizer]
|
52
52
|
def fit(x, _y = nil)
|
53
|
-
|
53
|
+
x = check_convert_sample_array(x)
|
54
54
|
n_features = x.shape[1]
|
55
55
|
max_vals = x.max(0)
|
56
56
|
min_vals = x.min(0)
|
@@ -67,7 +67,7 @@ module Rumale
|
|
67
67
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be discretized.
|
68
68
|
# @return [Numo::DFloat] The discretized samples.
|
69
69
|
def fit_transform(x, _y = nil)
|
70
|
-
|
70
|
+
x = check_convert_sample_array(x)
|
71
71
|
fit(x).transform(x)
|
72
72
|
end
|
73
73
|
|
@@ -76,7 +76,7 @@ module Rumale
|
|
76
76
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be discretized.
|
77
77
|
# @return [Numo::DFloat] The discretized samples.
|
78
78
|
def transform(x)
|
79
|
-
|
79
|
+
x = check_convert_sample_array(x)
|
80
80
|
n_samples, n_features = x.shape
|
81
81
|
transformed = Numo::DFloat.zeros(n_samples, n_features)
|
82
82
|
n_features.times do |n|
|
@@ -32,7 +32,7 @@ module Rumale
|
|
32
32
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
|
33
33
|
# @return [L2Normalizer]
|
34
34
|
def fit(x, _y = nil)
|
35
|
-
|
35
|
+
x = check_convert_sample_array(x)
|
36
36
|
@norm_vec = Numo::NMath.sqrt((x**2).sum(1))
|
37
37
|
self
|
38
38
|
end
|
@@ -44,7 +44,7 @@ module Rumale
|
|
44
44
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
|
45
45
|
# @return [Numo::DFloat] The normalized samples.
|
46
46
|
def fit_transform(x, _y = nil)
|
47
|
-
|
47
|
+
x = check_convert_sample_array(x)
|
48
48
|
fit(x)
|
49
49
|
x / @norm_vec.tile(x.shape[1], 1).transpose
|
50
50
|
end
|
@@ -31,7 +31,7 @@ module Rumale
|
|
31
31
|
# @param neg_label [Integer] The value represents negative label.
|
32
32
|
# @param pos_label [Integer] The value represents positive label.
|
33
33
|
def initialize(neg_label: 0, pos_label: 1)
|
34
|
-
|
34
|
+
check_params_numeric(neg_label: neg_label, pos_label: pos_label)
|
35
35
|
@params = {}
|
36
36
|
@params[:neg_label] = neg_label
|
37
37
|
@params[:pos_label] = pos_label
|
@@ -80,7 +80,7 @@ module Rumale
|
|
80
80
|
# @param x [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels to be decoded.
|
81
81
|
# @return [Array] (shape: [n_samples]) The decoded labels.
|
82
82
|
def inverse_transform(x)
|
83
|
-
|
83
|
+
x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
|
84
84
|
n_samples = x.shape[0]
|
85
85
|
Array.new(n_samples) { |n| @classes[x[n, true].ne(@params[:neg_label]).where[0]] }
|
86
86
|
end
|
@@ -71,7 +71,7 @@ module Rumale
|
|
71
71
|
# @param x [Numo::Int32] (shape: [n_samples]) The labels to be decoded.
|
72
72
|
# @return [Array] The decoded labels.
|
73
73
|
def inverse_transform(x)
|
74
|
-
|
74
|
+
x = check_convert_label_array(x)
|
75
75
|
x.to_a.map { |n| @classes[n] }
|
76
76
|
end
|
77
77
|
|
@@ -32,7 +32,7 @@ module Rumale
|
|
32
32
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum absolute value for each feature.
|
33
33
|
# @return [MaxAbsScaler]
|
34
34
|
def fit(x, _y = nil)
|
35
|
-
|
35
|
+
x = check_convert_sample_array(x)
|
36
36
|
@max_abs_vec = x.abs.max(0)
|
37
37
|
self
|
38
38
|
end
|
@@ -44,7 +44,7 @@ module Rumale
|
|
44
44
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum absolute value for each feature.
|
45
45
|
# @return [Numo::DFloat] The scaled samples.
|
46
46
|
def fit_transform(x, _y = nil)
|
47
|
-
|
47
|
+
x = check_convert_sample_array(x)
|
48
48
|
fit(x).transform(x)
|
49
49
|
end
|
50
50
|
|
@@ -53,7 +53,7 @@ module Rumale
|
|
53
53
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
|
54
54
|
# @return [Numo::DFloat] The scaled samples.
|
55
55
|
def transform(x)
|
56
|
-
|
56
|
+
x = check_convert_sample_array(x)
|
57
57
|
x / @max_abs_vec
|
58
58
|
end
|
59
59
|
|
@@ -42,7 +42,7 @@ module Rumale
|
|
42
42
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
|
43
43
|
# @return [MinMaxScaler]
|
44
44
|
def fit(x, _y = nil)
|
45
|
-
|
45
|
+
x = check_convert_sample_array(x)
|
46
46
|
@min_vec = x.min(0)
|
47
47
|
@max_vec = x.max(0)
|
48
48
|
self
|
@@ -55,7 +55,7 @@ module Rumale
|
|
55
55
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
|
56
56
|
# @return [Numo::DFloat] The scaled samples.
|
57
57
|
def fit_transform(x, _y = nil)
|
58
|
-
|
58
|
+
x = check_convert_sample_array(x)
|
59
59
|
fit(x).transform(x)
|
60
60
|
end
|
61
61
|
|
@@ -64,7 +64,7 @@ module Rumale
|
|
64
64
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
|
65
65
|
# @return [Numo::DFloat] The scaled samples.
|
66
66
|
def transform(x)
|
67
|
-
|
67
|
+
x = check_convert_sample_array(x)
|
68
68
|
n_samples, = x.shape
|
69
69
|
dif_vec = @max_vec - @min_vec
|
70
70
|
dif_vec[dif_vec.eq(0)] = 1.0
|
@@ -49,7 +49,7 @@ module Rumale
|
|
49
49
|
# @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to fit one-hot-encoder.
|
50
50
|
# @return [OneHotEncoder]
|
51
51
|
def fit(x, _y = nil)
|
52
|
-
|
52
|
+
x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
|
53
53
|
raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
|
54
54
|
@n_values = x.max(0) + 1
|
55
55
|
@feature_indices = Numo::Int32.hstack([[0], @n_values]).cumsum
|
@@ -64,7 +64,8 @@ module Rumale
|
|
64
64
|
# @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to encode into one-hot-vectors.
|
65
65
|
# @return [Numo::DFloat] The one-hot-vectors.
|
66
66
|
def fit_transform(x, _y = nil)
|
67
|
-
|
67
|
+
x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
|
68
|
+
raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
|
68
69
|
raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
|
69
70
|
fit(x).transform(x)
|
70
71
|
end
|
@@ -74,7 +75,7 @@ module Rumale
|
|
74
75
|
# @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to encode into one-hot-vectors.
|
75
76
|
# @return [Numo::DFloat] The one-hot-vectors.
|
76
77
|
def transform(x)
|
77
|
-
|
78
|
+
x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
|
78
79
|
raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
|
79
80
|
codes = encode(x, @feature_indices)
|
80
81
|
codes[true, @active_features].dup
|
@@ -91,7 +91,7 @@ module Rumale
|
|
91
91
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples consisting of values transformed from categorical features.
|
92
92
|
# @return [Numo::NArray] The decoded features.
|
93
93
|
def inverse_transform(x)
|
94
|
-
|
94
|
+
x = check_convert_sample_array(x)
|
95
95
|
|
96
96
|
n_features = x.shape[1]
|
97
97
|
raise ArgumentError, 'Expect the number of features and the number of categories to be equal' if n_features != @categories.size
|
@@ -39,7 +39,7 @@ module Rumale
|
|
39
39
|
# The samples to calculate the mean values and standard deviations.
|
40
40
|
# @return [StandardScaler]
|
41
41
|
def fit(x, _y = nil)
|
42
|
-
|
42
|
+
x = check_convert_sample_array(x)
|
43
43
|
@mean_vec = x.mean(0)
|
44
44
|
@std_vec = x.stddev(0)
|
45
45
|
self
|
@@ -53,7 +53,7 @@ module Rumale
|
|
53
53
|
# The samples to calculate the mean values and standard deviations.
|
54
54
|
# @return [Numo::DFloat] The scaled samples.
|
55
55
|
def fit_transform(x, _y = nil)
|
56
|
-
|
56
|
+
x = check_convert_sample_array(x)
|
57
57
|
fit(x).transform(x)
|
58
58
|
end
|
59
59
|
|
@@ -62,7 +62,7 @@ module Rumale
|
|
62
62
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
|
63
63
|
# @return [Numo::DFloat] The scaled samples.
|
64
64
|
def transform(x)
|
65
|
-
|
65
|
+
x = check_convert_sample_array(x)
|
66
66
|
n_samples, = x.shape
|
67
67
|
(x - @mean_vec.tile(n_samples, 1)) / @std_vec.tile(n_samples, 1)
|
68
68
|
end
|
@@ -43,7 +43,7 @@ module Rumale
|
|
43
43
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
44
44
|
# @return [Numo::Int32] (shape: [n_samples]) Leaf index for sample.
|
45
45
|
def apply(x)
|
46
|
-
|
46
|
+
x = check_convert_sample_array(x)
|
47
47
|
Numo::Int32[*(Array.new(x.shape[0]) { |n| apply_at_node(@tree, x[n, true]) })]
|
48
48
|
end
|
49
49
|
|
@@ -53,9 +53,9 @@ module Rumale
|
|
53
53
|
# It is used to randomly determine the order of features when deciding spliting point.
|
54
54
|
def initialize(criterion: 'gini', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1, max_features: nil,
|
55
55
|
random_seed: nil)
|
56
|
-
|
57
|
-
|
58
|
-
|
56
|
+
check_params_numeric_or_nil(max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
|
57
|
+
max_features: max_features, random_seed: random_seed)
|
58
|
+
check_params_numeric(min_samples_leaf: min_samples_leaf)
|
59
59
|
check_params_string(criterion: criterion)
|
60
60
|
check_params_positive(max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
|
61
61
|
min_samples_leaf: min_samples_leaf, max_features: max_features)
|
@@ -69,8 +69,8 @@ module Rumale
|
|
69
69
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
70
70
|
# @return [DecisionTreeClassifier] The learned classifier itself.
|
71
71
|
def fit(x, y)
|
72
|
-
|
73
|
-
|
72
|
+
x = check_convert_sample_array(x)
|
73
|
+
y = check_convert_label_array(y)
|
74
74
|
check_sample_label_size(x, y)
|
75
75
|
n_samples, n_features = x.shape
|
76
76
|
@params[:max_features] = n_features if @params[:max_features].nil?
|
@@ -91,7 +91,7 @@ module Rumale
|
|
91
91
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
92
92
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
93
93
|
def predict(x)
|
94
|
-
|
94
|
+
x = check_convert_sample_array(x)
|
95
95
|
@leaf_labels[apply(x)].dup
|
96
96
|
end
|
97
97
|
|
@@ -100,7 +100,7 @@ module Rumale
|
|
100
100
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
101
101
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
102
102
|
def predict_proba(x)
|
103
|
-
|
103
|
+
x = check_convert_sample_array(x)
|
104
104
|
Numo::DFloat[*(Array.new(x.shape[0]) { |n| predict_proba_at_node(@tree, x[n, true]) })]
|
105
105
|
end
|
106
106
|
|
@@ -49,9 +49,9 @@ module Rumale
|
|
49
49
|
# It is used to randomly determine the order of features when deciding spliting point.
|
50
50
|
def initialize(criterion: 'mse', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1, max_features: nil,
|
51
51
|
random_seed: nil)
|
52
|
-
|
53
|
-
|
54
|
-
|
52
|
+
check_params_numeric_or_nil(max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
|
53
|
+
max_features: max_features, random_seed: random_seed)
|
54
|
+
check_params_numeric(min_samples_leaf: min_samples_leaf)
|
55
55
|
check_params_string(criterion: criterion)
|
56
56
|
check_params_positive(max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
|
57
57
|
min_samples_leaf: min_samples_leaf, max_features: max_features)
|
@@ -65,8 +65,8 @@ module Rumale
|
|
65
65
|
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The taget values to be used for fitting the model.
|
66
66
|
# @return [DecisionTreeRegressor] The learned regressor itself.
|
67
67
|
def fit(x, y)
|
68
|
-
|
69
|
-
|
68
|
+
x = check_convert_sample_array(x)
|
69
|
+
y = check_convert_tvalue_array(y)
|
70
70
|
check_sample_tvalue_size(x, y)
|
71
71
|
n_samples, n_features = x.shape
|
72
72
|
@params[:max_features] = n_features if @params[:max_features].nil?
|
@@ -86,7 +86,7 @@ module Rumale
|
|
86
86
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
87
87
|
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
|
88
88
|
def predict(x)
|
89
|
-
|
89
|
+
x = check_convert_sample_array(x)
|
90
90
|
@leaf_values.shape[1].nil? ? @leaf_values[apply(x)].dup : @leaf_values[apply(x), true].dup
|
91
91
|
end
|
92
92
|
|
@@ -50,9 +50,9 @@ module Rumale
|
|
50
50
|
# It is used to randomly determine the order of features when deciding spliting point.
|
51
51
|
def initialize(criterion: 'gini', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1, max_features: nil,
|
52
52
|
random_seed: nil)
|
53
|
-
|
54
|
-
|
55
|
-
|
53
|
+
check_params_numeric_or_nil(max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
|
54
|
+
max_features: max_features, random_seed: random_seed)
|
55
|
+
check_params_numeric(min_samples_leaf: min_samples_leaf)
|
56
56
|
check_params_string(criterion: criterion)
|
57
57
|
check_params_positive(max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
|
58
58
|
min_samples_leaf: min_samples_leaf, max_features: max_features)
|
@@ -65,8 +65,8 @@ module Rumale
|
|
65
65
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
66
66
|
# @return [ExtraTreeClassifier] The learned classifier itself.
|
67
67
|
def fit(x, y)
|
68
|
-
|
69
|
-
|
68
|
+
x = check_convert_sample_array(x)
|
69
|
+
y = check_convert_label_array(y)
|
70
70
|
check_sample_label_size(x, y)
|
71
71
|
super
|
72
72
|
end
|
@@ -76,7 +76,7 @@ module Rumale
|
|
76
76
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
77
77
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
78
78
|
def predict(x)
|
79
|
-
|
79
|
+
x = check_convert_sample_array(x)
|
80
80
|
super
|
81
81
|
end
|
82
82
|
|
@@ -85,7 +85,7 @@ module Rumale
|
|
85
85
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
86
86
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
87
87
|
def predict_proba(x)
|
88
|
-
|
88
|
+
x = check_convert_sample_array(x)
|
89
89
|
super
|
90
90
|
end
|
91
91
|
|