rumale 0.13.8 → 0.14.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/README.md +8 -10
- data/lib/rumale.rb +3 -0
- data/lib/rumale/base/classifier.rb +2 -2
- data/lib/rumale/base/cluster_analyzer.rb +2 -2
- data/lib/rumale/base/regressor.rb +2 -2
- data/lib/rumale/clustering/dbscan.rb +3 -4
- data/lib/rumale/clustering/gaussian_mixture.rb +5 -6
- data/lib/rumale/clustering/hdbscan.rb +4 -4
- data/lib/rumale/clustering/k_means.rb +5 -6
- data/lib/rumale/clustering/k_medoids.rb +5 -6
- data/lib/rumale/clustering/power_iteration.rb +4 -6
- data/lib/rumale/clustering/single_linkage.rb +3 -3
- data/lib/rumale/clustering/snn.rb +1 -1
- data/lib/rumale/clustering/spectral_clustering.rb +4 -6
- data/lib/rumale/dataset.rb +6 -10
- data/lib/rumale/decomposition/factor_analysis.rb +4 -4
- data/lib/rumale/decomposition/fast_ica.rb +6 -7
- data/lib/rumale/decomposition/nmf.rb +6 -7
- data/lib/rumale/decomposition/pca.rb +6 -7
- data/lib/rumale/ensemble/ada_boost_classifier.rb +8 -8
- data/lib/rumale/ensemble/ada_boost_regressor.rb +7 -7
- data/lib/rumale/ensemble/extra_trees_classifier.rb +8 -8
- data/lib/rumale/ensemble/extra_trees_regressor.rb +7 -7
- data/lib/rumale/ensemble/gradient_boosting_classifier.rb +8 -8
- data/lib/rumale/ensemble/gradient_boosting_regressor.rb +8 -8
- data/lib/rumale/ensemble/random_forest_classifier.rb +8 -8
- data/lib/rumale/ensemble/random_forest_regressor.rb +7 -7
- data/lib/rumale/evaluation_measure/accuracy.rb +2 -2
- data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +2 -2
- data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +2 -2
- data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +2 -2
- data/lib/rumale/evaluation_measure/explained_variance_score.rb +2 -2
- data/lib/rumale/evaluation_measure/f_score.rb +2 -2
- data/lib/rumale/evaluation_measure/log_loss.rb +2 -2
- data/lib/rumale/evaluation_measure/mean_absolute_error.rb +2 -2
- data/lib/rumale/evaluation_measure/mean_squared_error.rb +2 -2
- data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +2 -2
- data/lib/rumale/evaluation_measure/median_absolute_error.rb +2 -2
- data/lib/rumale/evaluation_measure/mutual_information.rb +2 -2
- data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +2 -2
- data/lib/rumale/evaluation_measure/precision.rb +2 -2
- data/lib/rumale/evaluation_measure/purity.rb +2 -2
- data/lib/rumale/evaluation_measure/r2_score.rb +2 -2
- data/lib/rumale/evaluation_measure/recall.rb +2 -2
- data/lib/rumale/evaluation_measure/roc_auc.rb +6 -3
- data/lib/rumale/evaluation_measure/silhouette_score.rb +2 -2
- data/lib/rumale/kernel_approximation/rbf.rb +5 -6
- data/lib/rumale/kernel_machine/kernel_pca.rb +4 -4
- data/lib/rumale/kernel_machine/kernel_ridge.rb +3 -3
- data/lib/rumale/kernel_machine/kernel_svc.rb +7 -8
- data/lib/rumale/linear_model/lasso.rb +5 -6
- data/lib/rumale/linear_model/linear_regression.rb +5 -6
- data/lib/rumale/linear_model/logistic_regression.rb +16 -15
- data/lib/rumale/linear_model/ridge.rb +5 -6
- data/lib/rumale/linear_model/svc.rb +34 -28
- data/lib/rumale/linear_model/svr.rb +5 -6
- data/lib/rumale/manifold/mds.rb +3 -4
- data/lib/rumale/manifold/tsne.rb +3 -5
- data/lib/rumale/model_selection/cross_validation.rb +6 -5
- data/lib/rumale/model_selection/grid_search_cv.rb +6 -6
- data/lib/rumale/model_selection/k_fold.rb +3 -3
- data/lib/rumale/model_selection/shuffle_split.rb +3 -5
- data/lib/rumale/model_selection/stratified_k_fold.rb +4 -4
- data/lib/rumale/model_selection/stratified_shuffle_split.rb +4 -6
- data/lib/rumale/multiclass/one_vs_rest_classifier.rb +4 -4
- data/lib/rumale/naive_bayes/naive_bayes.rb +14 -14
- data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +5 -5
- data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +4 -4
- data/lib/rumale/neural_network/base_mlp.rb +244 -0
- data/lib/rumale/neural_network/mlp_classifier.rb +119 -0
- data/lib/rumale/neural_network/mlp_regressor.rb +89 -0
- data/lib/rumale/optimizer/ada_grad.rb +1 -1
- data/lib/rumale/optimizer/adam.rb +3 -3
- data/lib/rumale/optimizer/nadam.rb +1 -1
- data/lib/rumale/optimizer/rmsprop.rb +1 -1
- data/lib/rumale/optimizer/sgd.rb +1 -1
- data/lib/rumale/optimizer/yellow_fin.rb +1 -2
- data/lib/rumale/pairwise_metric.rb +17 -19
- data/lib/rumale/pipeline/pipeline.rb +10 -10
- data/lib/rumale/polynomial_model/factorization_machine_classifier.rb +29 -21
- data/lib/rumale/polynomial_model/factorization_machine_regressor.rb +6 -6
- data/lib/rumale/preprocessing/bin_discretizer.rb +3 -3
- data/lib/rumale/preprocessing/l2_normalizer.rb +2 -2
- data/lib/rumale/preprocessing/label_binarizer.rb +2 -2
- data/lib/rumale/preprocessing/label_encoder.rb +1 -1
- data/lib/rumale/preprocessing/max_abs_scaler.rb +3 -3
- data/lib/rumale/preprocessing/min_max_scaler.rb +3 -3
- data/lib/rumale/preprocessing/one_hot_encoder.rb +4 -3
- data/lib/rumale/preprocessing/ordinal_encoder.rb +1 -1
- data/lib/rumale/preprocessing/standard_scaler.rb +3 -3
- data/lib/rumale/tree/base_decision_tree.rb +1 -1
- data/lib/rumale/tree/decision_tree_classifier.rb +7 -7
- data/lib/rumale/tree/decision_tree_regressor.rb +6 -6
- data/lib/rumale/tree/extra_tree_classifier.rb +7 -7
- data/lib/rumale/tree/extra_tree_regressor.rb +6 -6
- data/lib/rumale/tree/gradient_tree_regressor.rb +9 -9
- data/lib/rumale/validation.rb +32 -2
- data/lib/rumale/version.rb +1 -1
- data/rumale.gemspec +7 -7
- metadata +11 -7
@@ -55,11 +55,10 @@ module Rumale
|
|
55
55
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
56
56
|
def initialize(fit_bias: false, bias_scale: 1.0, max_iter: 1000, batch_size: 10, optimizer: nil,
|
57
57
|
solver: 'sgd', n_jobs: nil, random_seed: nil)
|
58
|
-
|
59
|
-
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
58
|
+
check_params_numeric(bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
|
60
59
|
check_params_boolean(fit_bias: fit_bias)
|
61
60
|
check_params_string(solver: solver)
|
62
|
-
|
61
|
+
check_params_numeric_or_nil(n_jobs: n_jobs, random_seed: random_seed)
|
63
62
|
check_params_positive(max_iter: max_iter, batch_size: batch_size)
|
64
63
|
keywd_args = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h.merge(reg_param: 0.0)
|
65
64
|
keywd_args.delete(:solver)
|
@@ -73,8 +72,8 @@ module Rumale
|
|
73
72
|
# @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
74
73
|
# @return [LinearRegression] The learned regressor itself.
|
75
74
|
def fit(x, y)
|
76
|
-
|
77
|
-
|
75
|
+
x = check_convert_sample_array(x)
|
76
|
+
y = check_convert_tvalue_array(y)
|
78
77
|
check_sample_tvalue_size(x, y)
|
79
78
|
|
80
79
|
if @params[:solver] == 'svd' && enable_linalg?
|
@@ -91,7 +90,7 @@ module Rumale
|
|
91
90
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
92
91
|
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
|
93
92
|
def predict(x)
|
94
|
-
|
93
|
+
x = check_convert_sample_array(x)
|
95
94
|
x.dot(@weight_vec.transpose) + @bias_term
|
96
95
|
end
|
97
96
|
|
@@ -57,10 +57,9 @@ module Rumale
|
|
57
57
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
58
58
|
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
|
59
59
|
max_iter: 1000, batch_size: 20, optimizer: nil, n_jobs: nil, random_seed: nil)
|
60
|
-
|
61
|
-
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
60
|
+
check_params_numeric(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
|
62
61
|
check_params_boolean(fit_bias: fit_bias)
|
63
|
-
|
62
|
+
check_params_numeric_or_nil(n_jobs: n_jobs, random_seed: random_seed)
|
64
63
|
check_params_positive(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
|
65
64
|
super
|
66
65
|
@classes = nil
|
@@ -72,15 +71,15 @@ module Rumale
|
|
72
71
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
73
72
|
# @return [LogisticRegression] The learned classifier itself.
|
74
73
|
def fit(x, y)
|
75
|
-
|
76
|
-
|
74
|
+
x = check_convert_sample_array(x)
|
75
|
+
y = check_convert_label_array(y)
|
77
76
|
check_sample_label_size(x, y)
|
78
77
|
|
79
78
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
80
|
-
n_classes = @classes.size
|
81
|
-
n_features = x.shape[1]
|
82
79
|
|
83
|
-
if
|
80
|
+
if multiclass_problem?
|
81
|
+
n_classes = @classes.size
|
82
|
+
n_features = x.shape[1]
|
84
83
|
@weight_vec = Numo::DFloat.zeros(n_classes, n_features)
|
85
84
|
@bias_term = Numo::DFloat.zeros(n_classes)
|
86
85
|
if enable_parallel?
|
@@ -98,7 +97,7 @@ module Rumale
|
|
98
97
|
end
|
99
98
|
end
|
100
99
|
else
|
101
|
-
negative_label =
|
100
|
+
negative_label = @classes[0]
|
102
101
|
bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
|
103
102
|
@weight_vec, @bias_term = partial_fit(x, bin_y)
|
104
103
|
end
|
@@ -111,7 +110,7 @@ module Rumale
|
|
111
110
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
112
111
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
|
113
112
|
def decision_function(x)
|
114
|
-
|
113
|
+
x = check_convert_sample_array(x)
|
115
114
|
x.dot(@weight_vec.transpose) + @bias_term
|
116
115
|
end
|
117
116
|
|
@@ -120,9 +119,7 @@ module Rumale
|
|
120
119
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
121
120
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
122
121
|
def predict(x)
|
123
|
-
|
124
|
-
|
125
|
-
return Numo::Int32.cast(predict_proba(x)[true, 1].ge(0.5)) * 2 - 1 if @classes.size <= 2
|
122
|
+
x = check_convert_sample_array(x)
|
126
123
|
|
127
124
|
n_samples, = x.shape
|
128
125
|
decision_values = predict_proba(x)
|
@@ -139,10 +136,10 @@ module Rumale
|
|
139
136
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
140
137
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
141
138
|
def predict_proba(x)
|
142
|
-
|
139
|
+
x = check_convert_sample_array(x)
|
143
140
|
|
144
141
|
proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
|
145
|
-
return (proba.transpose / proba.sum(axis: 1)).transpose if
|
142
|
+
return (proba.transpose / proba.sum(axis: 1)).transpose.dup if multiclass_problem?
|
146
143
|
|
147
144
|
n_samples, = x.shape
|
148
145
|
probs = Numo::DFloat.zeros(n_samples, 2)
|
@@ -177,6 +174,10 @@ module Rumale
|
|
177
174
|
def calc_loss_gradient(x, y, weight)
|
178
175
|
y / (Numo::NMath.exp(-y * x.dot(weight)) + 1.0) - y
|
179
176
|
end
|
177
|
+
|
178
|
+
def multiclass_problem?
|
179
|
+
@classes.size > 2
|
180
|
+
end
|
180
181
|
end
|
181
182
|
end
|
182
183
|
end
|
@@ -56,11 +56,10 @@ module Rumale
|
|
56
56
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
57
57
|
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, max_iter: 1000, batch_size: 10, optimizer: nil,
|
58
58
|
solver: 'sgd', n_jobs: nil, random_seed: nil)
|
59
|
-
|
60
|
-
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
59
|
+
check_params_numeric(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
|
61
60
|
check_params_boolean(fit_bias: fit_bias)
|
62
61
|
check_params_string(solver: solver)
|
63
|
-
|
62
|
+
check_params_numeric_or_nil(n_jobs: n_jobs, random_seed: random_seed)
|
64
63
|
check_params_positive(reg_param: reg_param, max_iter: max_iter, batch_size: batch_size)
|
65
64
|
keywd_args = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h
|
66
65
|
keywd_args.delete(:solver)
|
@@ -74,8 +73,8 @@ module Rumale
|
|
74
73
|
# @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
75
74
|
# @return [Ridge] The learned regressor itself.
|
76
75
|
def fit(x, y)
|
77
|
-
|
78
|
-
|
76
|
+
x = check_convert_sample_array(x)
|
77
|
+
y = check_convert_tvalue_array(y)
|
79
78
|
check_sample_tvalue_size(x, y)
|
80
79
|
|
81
80
|
if @params[:solver] == 'svd' && enable_linalg?
|
@@ -92,7 +91,7 @@ module Rumale
|
|
92
91
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
93
92
|
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
|
94
93
|
def predict(x)
|
95
|
-
|
94
|
+
x = check_convert_sample_array(x)
|
96
95
|
x.dot(@weight_vec.transpose) + @bias_term
|
97
96
|
end
|
98
97
|
|
@@ -59,10 +59,9 @@ module Rumale
|
|
59
59
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
60
60
|
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
|
61
61
|
max_iter: 1000, batch_size: 20, probability: false, optimizer: nil, n_jobs: nil, random_seed: nil)
|
62
|
-
|
63
|
-
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
62
|
+
check_params_numeric(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
|
64
63
|
check_params_boolean(fit_bias: fit_bias, probability: probability)
|
65
|
-
|
64
|
+
check_params_numeric_or_nil(n_jobs: n_jobs, random_seed: random_seed)
|
66
65
|
check_params_positive(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
|
67
66
|
keywd_args = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h
|
68
67
|
keywd_args.delete(:probability)
|
@@ -78,15 +77,15 @@ module Rumale
|
|
78
77
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
79
78
|
# @return [SVC] The learned classifier itself.
|
80
79
|
def fit(x, y)
|
81
|
-
|
82
|
-
|
80
|
+
x = check_convert_sample_array(x)
|
81
|
+
y = check_convert_label_array(y)
|
83
82
|
check_sample_label_size(x, y)
|
84
83
|
|
85
84
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
86
|
-
n_classes = @classes.size
|
87
|
-
n_features = x.shape[1]
|
88
85
|
|
89
|
-
if
|
86
|
+
if multiclass_problem?
|
87
|
+
n_classes = @classes.size
|
88
|
+
n_features = x.shape[1]
|
90
89
|
# initialize model.
|
91
90
|
@weight_vec = Numo::DFloat.zeros(n_classes, n_features)
|
92
91
|
@bias_term = Numo::DFloat.zeros(n_classes)
|
@@ -108,7 +107,7 @@ module Rumale
|
|
108
107
|
# store model.
|
109
108
|
models.each_with_index { |model, n| @weight_vec[n, true], @bias_term[n], @prob_param[n, true] = model }
|
110
109
|
else
|
111
|
-
negative_label =
|
110
|
+
negative_label = @classes[0]
|
112
111
|
bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
|
113
112
|
@weight_vec, @bias_term, @prob_param = partial_fit(x, bin_y)
|
114
113
|
end
|
@@ -121,7 +120,7 @@ module Rumale
|
|
121
120
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
122
121
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
|
123
122
|
def decision_function(x)
|
124
|
-
|
123
|
+
x = check_convert_sample_array(x)
|
125
124
|
x.dot(@weight_vec.transpose) + @bias_term
|
126
125
|
end
|
127
126
|
|
@@ -130,16 +129,19 @@ module Rumale
|
|
130
129
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
131
130
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
132
131
|
def predict(x)
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
132
|
+
x = check_convert_sample_array(x)
|
133
|
+
|
134
|
+
n_samples = x.shape[0]
|
135
|
+
predicted = if multiclass_problem?
|
136
|
+
decision_values = decision_function(x)
|
137
|
+
if enable_parallel?
|
138
|
+
parallel_map(n_samples) { |n| @classes[decision_values[n, true].max_index] }
|
139
|
+
else
|
140
|
+
Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] }
|
141
|
+
end
|
141
142
|
else
|
142
|
-
|
143
|
+
decision_values = decision_function(x).ge(0.0).to_a
|
144
|
+
Array.new(n_samples) { |n| @classes[decision_values[n]] }
|
143
145
|
end
|
144
146
|
Numo::Int32.asarray(predicted)
|
145
147
|
end
|
@@ -149,18 +151,18 @@ module Rumale
|
|
149
151
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
150
152
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
151
153
|
def predict_proba(x)
|
152
|
-
|
154
|
+
x = check_convert_sample_array(x)
|
153
155
|
|
154
|
-
if
|
156
|
+
if multiclass_problem?
|
155
157
|
probs = 1.0 / (Numo::NMath.exp(@prob_param[true, 0] * decision_function(x) + @prob_param[true, 1]) + 1.0)
|
156
|
-
|
158
|
+
(probs.transpose / probs.sum(axis: 1)).transpose.dup
|
159
|
+
else
|
160
|
+
n_samples, = x.shape
|
161
|
+
probs = Numo::DFloat.zeros(n_samples, 2)
|
162
|
+
probs[true, 1] = 1.0 / (Numo::NMath.exp(@prob_param[0] * decision_function(x) + @prob_param[1]) + 1.0)
|
163
|
+
probs[true, 0] = 1.0 - probs[true, 1]
|
164
|
+
probs
|
157
165
|
end
|
158
|
-
|
159
|
-
n_samples, = x.shape
|
160
|
-
probs = Numo::DFloat.zeros(n_samples, 2)
|
161
|
-
probs[true, 1] = 1.0 / (Numo::NMath.exp(@prob_param[0] * decision_function(x) + @prob_param[1]) + 1.0)
|
162
|
-
probs[true, 0] = 1.0 - probs[true, 1]
|
163
|
-
probs
|
164
166
|
end
|
165
167
|
|
166
168
|
# Dump marshal data.
|
@@ -204,6 +206,10 @@ module Rumale
|
|
204
206
|
grad[target_ids] = -y[target_ids]
|
205
207
|
grad
|
206
208
|
end
|
209
|
+
|
210
|
+
def multiclass_problem?
|
211
|
+
@classes.size > 2
|
212
|
+
end
|
207
213
|
end
|
208
214
|
end
|
209
215
|
end
|
@@ -52,10 +52,9 @@ module Rumale
|
|
52
52
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
53
53
|
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, epsilon: 0.1,
|
54
54
|
max_iter: 1000, batch_size: 20, optimizer: nil, n_jobs: nil, random_seed: nil)
|
55
|
-
|
56
|
-
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
55
|
+
check_params_numeric(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon, max_iter: max_iter, batch_size: batch_size)
|
57
56
|
check_params_boolean(fit_bias: fit_bias)
|
58
|
-
|
57
|
+
check_params_numeric_or_nil(n_jobs: n_jobs, random_seed: random_seed)
|
59
58
|
check_params_positive(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon,
|
60
59
|
max_iter: max_iter, batch_size: batch_size)
|
61
60
|
keywd_args = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h
|
@@ -70,8 +69,8 @@ module Rumale
|
|
70
69
|
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
71
70
|
# @return [SVR] The learned regressor itself.
|
72
71
|
def fit(x, y)
|
73
|
-
|
74
|
-
|
72
|
+
x = check_convert_sample_array(x)
|
73
|
+
y = check_convert_tvalue_array(y)
|
75
74
|
check_sample_tvalue_size(x, y)
|
76
75
|
|
77
76
|
n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
|
@@ -98,7 +97,7 @@ module Rumale
|
|
98
97
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
99
98
|
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
|
100
99
|
def predict(x)
|
101
|
-
|
100
|
+
x = check_convert_sample_array(x)
|
102
101
|
x.dot(@weight_vec.transpose) + @bias_term
|
103
102
|
end
|
104
103
|
|
data/lib/rumale/manifold/mds.rb
CHANGED
@@ -53,11 +53,10 @@ module Rumale
|
|
53
53
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
54
54
|
def initialize(n_components: 2, metric: 'euclidean', init: 'random',
|
55
55
|
max_iter: 300, tol: nil, verbose: false, random_seed: nil)
|
56
|
-
|
56
|
+
check_params_numeric(n_components: n_components, max_iter: max_iter)
|
57
57
|
check_params_string(metric: metric, init: init)
|
58
58
|
check_params_boolean(verbose: verbose)
|
59
|
-
|
60
|
-
check_params_type_or_nil(Integer, random_seed: random_seed)
|
59
|
+
check_params_numeric_or_nil(tol: tol, random_seed: random_seed)
|
61
60
|
check_params_positive(n_components: n_components, max_iter: max_iter)
|
62
61
|
@params = {}
|
63
62
|
@params[:n_components] = n_components
|
@@ -82,7 +81,7 @@ module Rumale
|
|
82
81
|
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
83
82
|
# @return [MDS] The learned transformer itself.
|
84
83
|
def fit(x, _not_used = nil)
|
85
|
-
|
84
|
+
x = check_convert_sample_array(x)
|
86
85
|
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
87
86
|
# initialize some varibales.
|
88
87
|
n_samples = x.shape[0]
|
data/lib/rumale/manifold/tsne.rb
CHANGED
@@ -58,12 +58,10 @@ module Rumale
|
|
58
58
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
59
59
|
def initialize(n_components: 2, perplexity: 30.0, metric: 'euclidean', init: 'random',
|
60
60
|
max_iter: 500, tol: nil, verbose: false, random_seed: nil)
|
61
|
-
|
62
|
-
check_params_float(perplexity: perplexity)
|
61
|
+
check_params_numeric(n_components: n_components, max_iter: max_iter, perplexity: perplexity)
|
63
62
|
check_params_string(metric: metric, init: init)
|
64
63
|
check_params_boolean(verbose: verbose)
|
65
|
-
|
66
|
-
check_params_type_or_nil(Integer, random_seed: random_seed)
|
64
|
+
check_params_numeric_or_nil(tol: tol, random_seed: random_seed)
|
67
65
|
check_params_positive(n_components: n_components, perplexity: perplexity, max_iter: max_iter)
|
68
66
|
@params = {}
|
69
67
|
@params[:n_components] = n_components
|
@@ -89,7 +87,7 @@ module Rumale
|
|
89
87
|
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
90
88
|
# @return [TSNE] The learned transformer itself.
|
91
89
|
def fit(x, _not_used = nil)
|
92
|
-
|
90
|
+
x = check_convert_sample_array(x)
|
93
91
|
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
94
92
|
# initialize some varibales.
|
95
93
|
@n_iter = 0
|
@@ -68,14 +68,15 @@ module Rumale
|
|
68
68
|
# * :train_score (Array<Float>) The scores of training dataset for each split. This option is nil if
|
69
69
|
# the return_train_score is false.
|
70
70
|
def perform(x, y)
|
71
|
-
|
71
|
+
x = check_convert_sample_array(x)
|
72
72
|
if @estimator.is_a?(Rumale::Base::Classifier)
|
73
|
-
|
73
|
+
y = check_convert_label_array(y)
|
74
74
|
check_sample_label_size(x, y)
|
75
|
-
|
76
|
-
|
77
|
-
check_tvalue_array(y)
|
75
|
+
elsif @estimator.is_a?(Rumale::Base::Regressor)
|
76
|
+
y = check_convert_tvalue_array(y)
|
78
77
|
check_sample_tvalue_size(x, y)
|
78
|
+
else
|
79
|
+
y = Numo::NArray.asarray(y)
|
79
80
|
end
|
80
81
|
# Initialize the report of cross validation.
|
81
82
|
report = { test_score: [], train_score: nil, fit_time: [] }
|
@@ -88,7 +88,7 @@ module Rumale
|
|
88
88
|
# @param y [Numo::NArray] (shape: [n_samples, n_outputs]) The target values or labels to be used for fitting the model.
|
89
89
|
# @return [GridSearchCV] The learned estimator with grid search.
|
90
90
|
def fit(x, y)
|
91
|
-
|
91
|
+
x = check_convert_sample_array(x)
|
92
92
|
|
93
93
|
init_attrs
|
94
94
|
|
@@ -111,7 +111,7 @@ module Rumale
|
|
111
111
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
112
112
|
# @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
|
113
113
|
def decision_function(x)
|
114
|
-
|
114
|
+
x = check_convert_sample_array(x)
|
115
115
|
@best_estimator.decision_function(x)
|
116
116
|
end
|
117
117
|
|
@@ -120,7 +120,7 @@ module Rumale
|
|
120
120
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to obtain prediction result.
|
121
121
|
# @return [Numo::NArray] Predicted results.
|
122
122
|
def predict(x)
|
123
|
-
|
123
|
+
x = check_convert_sample_array(x)
|
124
124
|
@best_estimator.predict(x)
|
125
125
|
end
|
126
126
|
|
@@ -129,7 +129,7 @@ module Rumale
|
|
129
129
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
|
130
130
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
|
131
131
|
def predict_log_proba(x)
|
132
|
-
|
132
|
+
x = check_convert_sample_array(x)
|
133
133
|
@best_estimator.predict_log_proba(x)
|
134
134
|
end
|
135
135
|
|
@@ -138,7 +138,7 @@ module Rumale
|
|
138
138
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
139
139
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
140
140
|
def predict_proba(x)
|
141
|
-
|
141
|
+
x = check_convert_sample_array(x)
|
142
142
|
@best_estimator.predict_proba(x)
|
143
143
|
end
|
144
144
|
|
@@ -148,7 +148,7 @@ module Rumale
|
|
148
148
|
# @param y [Numo::NArray] (shape: [n_samples, n_outputs]) True target values or labels for testing data.
|
149
149
|
# @return [Float] The score of estimator.
|
150
150
|
def score(x, y)
|
151
|
-
|
151
|
+
x = check_convert_sample_array(x)
|
152
152
|
@best_estimator.score(x, y)
|
153
153
|
end
|
154
154
|
|
@@ -36,9 +36,9 @@ module Rumale
|
|
36
36
|
# @param shuffle [Boolean] The flag indicating whether to shuffle the dataset.
|
37
37
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
38
38
|
def initialize(n_splits: 3, shuffle: false, random_seed: nil)
|
39
|
-
|
39
|
+
check_params_numeric(n_splits: n_splits)
|
40
40
|
check_params_boolean(shuffle: shuffle)
|
41
|
-
|
41
|
+
check_params_numeric_or_nil(random_seed: random_seed)
|
42
42
|
check_params_positive(n_splits: n_splits)
|
43
43
|
@n_splits = n_splits
|
44
44
|
@shuffle = shuffle
|
@@ -53,7 +53,7 @@ module Rumale
|
|
53
53
|
# The dataset to be used to generate data indices for K-fold cross validation.
|
54
54
|
# @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
|
55
55
|
def split(x, _y = nil)
|
56
|
-
|
56
|
+
x = check_convert_sample_array(x)
|
57
57
|
# Initialize and check some variables.
|
58
58
|
n_samples, = x.shape
|
59
59
|
unless @n_splits.between?(2, n_samples)
|
@@ -32,10 +32,8 @@ module Rumale
|
|
32
32
|
# @param train_size [Float] The ratio of number of samples for train data.
|
33
33
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
34
34
|
def initialize(n_splits: 3, test_size: 0.1, train_size: nil, random_seed: nil)
|
35
|
-
|
36
|
-
|
37
|
-
check_params_type_or_nil(Float, train_size: train_size)
|
38
|
-
check_params_type_or_nil(Integer, random_seed: random_seed)
|
35
|
+
check_params_numeric(n_splits: n_splits, test_size: test_size)
|
36
|
+
check_params_numeric_or_nil(train_size: train_size, random_seed: random_seed)
|
39
37
|
check_params_positive(n_splits: n_splits)
|
40
38
|
check_params_positive(test_size: test_size)
|
41
39
|
check_params_positive(train_size: train_size) unless train_size.nil?
|
@@ -53,7 +51,7 @@ module Rumale
|
|
53
51
|
# The dataset to be used to generate data indices for random permutation cross validation.
|
54
52
|
# @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
|
55
53
|
def split(x, _y = nil)
|
56
|
-
|
54
|
+
x = check_convert_sample_array(x)
|
57
55
|
# Initialize and check some variables.
|
58
56
|
n_samples = x.shape[0]
|
59
57
|
n_test_samples = (@test_size * n_samples).to_i
|