rumale 0.13.8 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/README.md +8 -10
- data/lib/rumale.rb +3 -0
- data/lib/rumale/base/classifier.rb +2 -2
- data/lib/rumale/base/cluster_analyzer.rb +2 -2
- data/lib/rumale/base/regressor.rb +2 -2
- data/lib/rumale/clustering/dbscan.rb +3 -4
- data/lib/rumale/clustering/gaussian_mixture.rb +5 -6
- data/lib/rumale/clustering/hdbscan.rb +4 -4
- data/lib/rumale/clustering/k_means.rb +5 -6
- data/lib/rumale/clustering/k_medoids.rb +5 -6
- data/lib/rumale/clustering/power_iteration.rb +4 -6
- data/lib/rumale/clustering/single_linkage.rb +3 -3
- data/lib/rumale/clustering/snn.rb +1 -1
- data/lib/rumale/clustering/spectral_clustering.rb +4 -6
- data/lib/rumale/dataset.rb +6 -10
- data/lib/rumale/decomposition/factor_analysis.rb +4 -4
- data/lib/rumale/decomposition/fast_ica.rb +6 -7
- data/lib/rumale/decomposition/nmf.rb +6 -7
- data/lib/rumale/decomposition/pca.rb +6 -7
- data/lib/rumale/ensemble/ada_boost_classifier.rb +8 -8
- data/lib/rumale/ensemble/ada_boost_regressor.rb +7 -7
- data/lib/rumale/ensemble/extra_trees_classifier.rb +8 -8
- data/lib/rumale/ensemble/extra_trees_regressor.rb +7 -7
- data/lib/rumale/ensemble/gradient_boosting_classifier.rb +8 -8
- data/lib/rumale/ensemble/gradient_boosting_regressor.rb +8 -8
- data/lib/rumale/ensemble/random_forest_classifier.rb +8 -8
- data/lib/rumale/ensemble/random_forest_regressor.rb +7 -7
- data/lib/rumale/evaluation_measure/accuracy.rb +2 -2
- data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +2 -2
- data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +2 -2
- data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +2 -2
- data/lib/rumale/evaluation_measure/explained_variance_score.rb +2 -2
- data/lib/rumale/evaluation_measure/f_score.rb +2 -2
- data/lib/rumale/evaluation_measure/log_loss.rb +2 -2
- data/lib/rumale/evaluation_measure/mean_absolute_error.rb +2 -2
- data/lib/rumale/evaluation_measure/mean_squared_error.rb +2 -2
- data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +2 -2
- data/lib/rumale/evaluation_measure/median_absolute_error.rb +2 -2
- data/lib/rumale/evaluation_measure/mutual_information.rb +2 -2
- data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +2 -2
- data/lib/rumale/evaluation_measure/precision.rb +2 -2
- data/lib/rumale/evaluation_measure/purity.rb +2 -2
- data/lib/rumale/evaluation_measure/r2_score.rb +2 -2
- data/lib/rumale/evaluation_measure/recall.rb +2 -2
- data/lib/rumale/evaluation_measure/roc_auc.rb +6 -3
- data/lib/rumale/evaluation_measure/silhouette_score.rb +2 -2
- data/lib/rumale/kernel_approximation/rbf.rb +5 -6
- data/lib/rumale/kernel_machine/kernel_pca.rb +4 -4
- data/lib/rumale/kernel_machine/kernel_ridge.rb +3 -3
- data/lib/rumale/kernel_machine/kernel_svc.rb +7 -8
- data/lib/rumale/linear_model/lasso.rb +5 -6
- data/lib/rumale/linear_model/linear_regression.rb +5 -6
- data/lib/rumale/linear_model/logistic_regression.rb +16 -15
- data/lib/rumale/linear_model/ridge.rb +5 -6
- data/lib/rumale/linear_model/svc.rb +34 -28
- data/lib/rumale/linear_model/svr.rb +5 -6
- data/lib/rumale/manifold/mds.rb +3 -4
- data/lib/rumale/manifold/tsne.rb +3 -5
- data/lib/rumale/model_selection/cross_validation.rb +6 -5
- data/lib/rumale/model_selection/grid_search_cv.rb +6 -6
- data/lib/rumale/model_selection/k_fold.rb +3 -3
- data/lib/rumale/model_selection/shuffle_split.rb +3 -5
- data/lib/rumale/model_selection/stratified_k_fold.rb +4 -4
- data/lib/rumale/model_selection/stratified_shuffle_split.rb +4 -6
- data/lib/rumale/multiclass/one_vs_rest_classifier.rb +4 -4
- data/lib/rumale/naive_bayes/naive_bayes.rb +14 -14
- data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +5 -5
- data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +4 -4
- data/lib/rumale/neural_network/base_mlp.rb +244 -0
- data/lib/rumale/neural_network/mlp_classifier.rb +119 -0
- data/lib/rumale/neural_network/mlp_regressor.rb +89 -0
- data/lib/rumale/optimizer/ada_grad.rb +1 -1
- data/lib/rumale/optimizer/adam.rb +3 -3
- data/lib/rumale/optimizer/nadam.rb +1 -1
- data/lib/rumale/optimizer/rmsprop.rb +1 -1
- data/lib/rumale/optimizer/sgd.rb +1 -1
- data/lib/rumale/optimizer/yellow_fin.rb +1 -2
- data/lib/rumale/pairwise_metric.rb +17 -19
- data/lib/rumale/pipeline/pipeline.rb +10 -10
- data/lib/rumale/polynomial_model/factorization_machine_classifier.rb +29 -21
- data/lib/rumale/polynomial_model/factorization_machine_regressor.rb +6 -6
- data/lib/rumale/preprocessing/bin_discretizer.rb +3 -3
- data/lib/rumale/preprocessing/l2_normalizer.rb +2 -2
- data/lib/rumale/preprocessing/label_binarizer.rb +2 -2
- data/lib/rumale/preprocessing/label_encoder.rb +1 -1
- data/lib/rumale/preprocessing/max_abs_scaler.rb +3 -3
- data/lib/rumale/preprocessing/min_max_scaler.rb +3 -3
- data/lib/rumale/preprocessing/one_hot_encoder.rb +4 -3
- data/lib/rumale/preprocessing/ordinal_encoder.rb +1 -1
- data/lib/rumale/preprocessing/standard_scaler.rb +3 -3
- data/lib/rumale/tree/base_decision_tree.rb +1 -1
- data/lib/rumale/tree/decision_tree_classifier.rb +7 -7
- data/lib/rumale/tree/decision_tree_regressor.rb +6 -6
- data/lib/rumale/tree/extra_tree_classifier.rb +7 -7
- data/lib/rumale/tree/extra_tree_regressor.rb +6 -6
- data/lib/rumale/tree/gradient_tree_regressor.rb +9 -9
- data/lib/rumale/validation.rb +32 -2
- data/lib/rumale/version.rb +1 -1
- data/rumale.gemspec +7 -7
- metadata +11 -7
@@ -55,11 +55,10 @@ module Rumale
|
|
55
55
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
56
56
|
def initialize(fit_bias: false, bias_scale: 1.0, max_iter: 1000, batch_size: 10, optimizer: nil,
|
57
57
|
solver: 'sgd', n_jobs: nil, random_seed: nil)
|
58
|
-
|
59
|
-
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
58
|
+
check_params_numeric(bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
|
60
59
|
check_params_boolean(fit_bias: fit_bias)
|
61
60
|
check_params_string(solver: solver)
|
62
|
-
|
61
|
+
check_params_numeric_or_nil(n_jobs: n_jobs, random_seed: random_seed)
|
63
62
|
check_params_positive(max_iter: max_iter, batch_size: batch_size)
|
64
63
|
keywd_args = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h.merge(reg_param: 0.0)
|
65
64
|
keywd_args.delete(:solver)
|
@@ -73,8 +72,8 @@ module Rumale
|
|
73
72
|
# @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
74
73
|
# @return [LinearRegression] The learned regressor itself.
|
75
74
|
def fit(x, y)
|
76
|
-
|
77
|
-
|
75
|
+
x = check_convert_sample_array(x)
|
76
|
+
y = check_convert_tvalue_array(y)
|
78
77
|
check_sample_tvalue_size(x, y)
|
79
78
|
|
80
79
|
if @params[:solver] == 'svd' && enable_linalg?
|
@@ -91,7 +90,7 @@ module Rumale
|
|
91
90
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
92
91
|
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
|
93
92
|
def predict(x)
|
94
|
-
|
93
|
+
x = check_convert_sample_array(x)
|
95
94
|
x.dot(@weight_vec.transpose) + @bias_term
|
96
95
|
end
|
97
96
|
|
@@ -57,10 +57,9 @@ module Rumale
|
|
57
57
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
58
58
|
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
|
59
59
|
max_iter: 1000, batch_size: 20, optimizer: nil, n_jobs: nil, random_seed: nil)
|
60
|
-
|
61
|
-
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
60
|
+
check_params_numeric(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
|
62
61
|
check_params_boolean(fit_bias: fit_bias)
|
63
|
-
|
62
|
+
check_params_numeric_or_nil(n_jobs: n_jobs, random_seed: random_seed)
|
64
63
|
check_params_positive(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
|
65
64
|
super
|
66
65
|
@classes = nil
|
@@ -72,15 +71,15 @@ module Rumale
|
|
72
71
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
73
72
|
# @return [LogisticRegression] The learned classifier itself.
|
74
73
|
def fit(x, y)
|
75
|
-
|
76
|
-
|
74
|
+
x = check_convert_sample_array(x)
|
75
|
+
y = check_convert_label_array(y)
|
77
76
|
check_sample_label_size(x, y)
|
78
77
|
|
79
78
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
80
|
-
n_classes = @classes.size
|
81
|
-
n_features = x.shape[1]
|
82
79
|
|
83
|
-
if
|
80
|
+
if multiclass_problem?
|
81
|
+
n_classes = @classes.size
|
82
|
+
n_features = x.shape[1]
|
84
83
|
@weight_vec = Numo::DFloat.zeros(n_classes, n_features)
|
85
84
|
@bias_term = Numo::DFloat.zeros(n_classes)
|
86
85
|
if enable_parallel?
|
@@ -98,7 +97,7 @@ module Rumale
|
|
98
97
|
end
|
99
98
|
end
|
100
99
|
else
|
101
|
-
negative_label =
|
100
|
+
negative_label = @classes[0]
|
102
101
|
bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
|
103
102
|
@weight_vec, @bias_term = partial_fit(x, bin_y)
|
104
103
|
end
|
@@ -111,7 +110,7 @@ module Rumale
|
|
111
110
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
112
111
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
|
113
112
|
def decision_function(x)
|
114
|
-
|
113
|
+
x = check_convert_sample_array(x)
|
115
114
|
x.dot(@weight_vec.transpose) + @bias_term
|
116
115
|
end
|
117
116
|
|
@@ -120,9 +119,7 @@ module Rumale
|
|
120
119
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
121
120
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
122
121
|
def predict(x)
|
123
|
-
|
124
|
-
|
125
|
-
return Numo::Int32.cast(predict_proba(x)[true, 1].ge(0.5)) * 2 - 1 if @classes.size <= 2
|
122
|
+
x = check_convert_sample_array(x)
|
126
123
|
|
127
124
|
n_samples, = x.shape
|
128
125
|
decision_values = predict_proba(x)
|
@@ -139,10 +136,10 @@ module Rumale
|
|
139
136
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
140
137
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
141
138
|
def predict_proba(x)
|
142
|
-
|
139
|
+
x = check_convert_sample_array(x)
|
143
140
|
|
144
141
|
proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
|
145
|
-
return (proba.transpose / proba.sum(axis: 1)).transpose if
|
142
|
+
return (proba.transpose / proba.sum(axis: 1)).transpose.dup if multiclass_problem?
|
146
143
|
|
147
144
|
n_samples, = x.shape
|
148
145
|
probs = Numo::DFloat.zeros(n_samples, 2)
|
@@ -177,6 +174,10 @@ module Rumale
|
|
177
174
|
def calc_loss_gradient(x, y, weight)
|
178
175
|
y / (Numo::NMath.exp(-y * x.dot(weight)) + 1.0) - y
|
179
176
|
end
|
177
|
+
|
178
|
+
def multiclass_problem?
|
179
|
+
@classes.size > 2
|
180
|
+
end
|
180
181
|
end
|
181
182
|
end
|
182
183
|
end
|
@@ -56,11 +56,10 @@ module Rumale
|
|
56
56
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
57
57
|
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, max_iter: 1000, batch_size: 10, optimizer: nil,
|
58
58
|
solver: 'sgd', n_jobs: nil, random_seed: nil)
|
59
|
-
|
60
|
-
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
59
|
+
check_params_numeric(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
|
61
60
|
check_params_boolean(fit_bias: fit_bias)
|
62
61
|
check_params_string(solver: solver)
|
63
|
-
|
62
|
+
check_params_numeric_or_nil(n_jobs: n_jobs, random_seed: random_seed)
|
64
63
|
check_params_positive(reg_param: reg_param, max_iter: max_iter, batch_size: batch_size)
|
65
64
|
keywd_args = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h
|
66
65
|
keywd_args.delete(:solver)
|
@@ -74,8 +73,8 @@ module Rumale
|
|
74
73
|
# @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
75
74
|
# @return [Ridge] The learned regressor itself.
|
76
75
|
def fit(x, y)
|
77
|
-
|
78
|
-
|
76
|
+
x = check_convert_sample_array(x)
|
77
|
+
y = check_convert_tvalue_array(y)
|
79
78
|
check_sample_tvalue_size(x, y)
|
80
79
|
|
81
80
|
if @params[:solver] == 'svd' && enable_linalg?
|
@@ -92,7 +91,7 @@ module Rumale
|
|
92
91
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
93
92
|
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
|
94
93
|
def predict(x)
|
95
|
-
|
94
|
+
x = check_convert_sample_array(x)
|
96
95
|
x.dot(@weight_vec.transpose) + @bias_term
|
97
96
|
end
|
98
97
|
|
@@ -59,10 +59,9 @@ module Rumale
|
|
59
59
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
60
60
|
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
|
61
61
|
max_iter: 1000, batch_size: 20, probability: false, optimizer: nil, n_jobs: nil, random_seed: nil)
|
62
|
-
|
63
|
-
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
62
|
+
check_params_numeric(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
|
64
63
|
check_params_boolean(fit_bias: fit_bias, probability: probability)
|
65
|
-
|
64
|
+
check_params_numeric_or_nil(n_jobs: n_jobs, random_seed: random_seed)
|
66
65
|
check_params_positive(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
|
67
66
|
keywd_args = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h
|
68
67
|
keywd_args.delete(:probability)
|
@@ -78,15 +77,15 @@ module Rumale
|
|
78
77
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
79
78
|
# @return [SVC] The learned classifier itself.
|
80
79
|
def fit(x, y)
|
81
|
-
|
82
|
-
|
80
|
+
x = check_convert_sample_array(x)
|
81
|
+
y = check_convert_label_array(y)
|
83
82
|
check_sample_label_size(x, y)
|
84
83
|
|
85
84
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
86
|
-
n_classes = @classes.size
|
87
|
-
n_features = x.shape[1]
|
88
85
|
|
89
|
-
if
|
86
|
+
if multiclass_problem?
|
87
|
+
n_classes = @classes.size
|
88
|
+
n_features = x.shape[1]
|
90
89
|
# initialize model.
|
91
90
|
@weight_vec = Numo::DFloat.zeros(n_classes, n_features)
|
92
91
|
@bias_term = Numo::DFloat.zeros(n_classes)
|
@@ -108,7 +107,7 @@ module Rumale
|
|
108
107
|
# store model.
|
109
108
|
models.each_with_index { |model, n| @weight_vec[n, true], @bias_term[n], @prob_param[n, true] = model }
|
110
109
|
else
|
111
|
-
negative_label =
|
110
|
+
negative_label = @classes[0]
|
112
111
|
bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
|
113
112
|
@weight_vec, @bias_term, @prob_param = partial_fit(x, bin_y)
|
114
113
|
end
|
@@ -121,7 +120,7 @@ module Rumale
|
|
121
120
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
122
121
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
|
123
122
|
def decision_function(x)
|
124
|
-
|
123
|
+
x = check_convert_sample_array(x)
|
125
124
|
x.dot(@weight_vec.transpose) + @bias_term
|
126
125
|
end
|
127
126
|
|
@@ -130,16 +129,19 @@ module Rumale
|
|
130
129
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
131
130
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
132
131
|
def predict(x)
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
132
|
+
x = check_convert_sample_array(x)
|
133
|
+
|
134
|
+
n_samples = x.shape[0]
|
135
|
+
predicted = if multiclass_problem?
|
136
|
+
decision_values = decision_function(x)
|
137
|
+
if enable_parallel?
|
138
|
+
parallel_map(n_samples) { |n| @classes[decision_values[n, true].max_index] }
|
139
|
+
else
|
140
|
+
Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] }
|
141
|
+
end
|
141
142
|
else
|
142
|
-
|
143
|
+
decision_values = decision_function(x).ge(0.0).to_a
|
144
|
+
Array.new(n_samples) { |n| @classes[decision_values[n]] }
|
143
145
|
end
|
144
146
|
Numo::Int32.asarray(predicted)
|
145
147
|
end
|
@@ -149,18 +151,18 @@ module Rumale
|
|
149
151
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
150
152
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
151
153
|
def predict_proba(x)
|
152
|
-
|
154
|
+
x = check_convert_sample_array(x)
|
153
155
|
|
154
|
-
if
|
156
|
+
if multiclass_problem?
|
155
157
|
probs = 1.0 / (Numo::NMath.exp(@prob_param[true, 0] * decision_function(x) + @prob_param[true, 1]) + 1.0)
|
156
|
-
|
158
|
+
(probs.transpose / probs.sum(axis: 1)).transpose.dup
|
159
|
+
else
|
160
|
+
n_samples, = x.shape
|
161
|
+
probs = Numo::DFloat.zeros(n_samples, 2)
|
162
|
+
probs[true, 1] = 1.0 / (Numo::NMath.exp(@prob_param[0] * decision_function(x) + @prob_param[1]) + 1.0)
|
163
|
+
probs[true, 0] = 1.0 - probs[true, 1]
|
164
|
+
probs
|
157
165
|
end
|
158
|
-
|
159
|
-
n_samples, = x.shape
|
160
|
-
probs = Numo::DFloat.zeros(n_samples, 2)
|
161
|
-
probs[true, 1] = 1.0 / (Numo::NMath.exp(@prob_param[0] * decision_function(x) + @prob_param[1]) + 1.0)
|
162
|
-
probs[true, 0] = 1.0 - probs[true, 1]
|
163
|
-
probs
|
164
166
|
end
|
165
167
|
|
166
168
|
# Dump marshal data.
|
@@ -204,6 +206,10 @@ module Rumale
|
|
204
206
|
grad[target_ids] = -y[target_ids]
|
205
207
|
grad
|
206
208
|
end
|
209
|
+
|
210
|
+
def multiclass_problem?
|
211
|
+
@classes.size > 2
|
212
|
+
end
|
207
213
|
end
|
208
214
|
end
|
209
215
|
end
|
@@ -52,10 +52,9 @@ module Rumale
|
|
52
52
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
53
53
|
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, epsilon: 0.1,
|
54
54
|
max_iter: 1000, batch_size: 20, optimizer: nil, n_jobs: nil, random_seed: nil)
|
55
|
-
|
56
|
-
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
55
|
+
check_params_numeric(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon, max_iter: max_iter, batch_size: batch_size)
|
57
56
|
check_params_boolean(fit_bias: fit_bias)
|
58
|
-
|
57
|
+
check_params_numeric_or_nil(n_jobs: n_jobs, random_seed: random_seed)
|
59
58
|
check_params_positive(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon,
|
60
59
|
max_iter: max_iter, batch_size: batch_size)
|
61
60
|
keywd_args = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h
|
@@ -70,8 +69,8 @@ module Rumale
|
|
70
69
|
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
71
70
|
# @return [SVR] The learned regressor itself.
|
72
71
|
def fit(x, y)
|
73
|
-
|
74
|
-
|
72
|
+
x = check_convert_sample_array(x)
|
73
|
+
y = check_convert_tvalue_array(y)
|
75
74
|
check_sample_tvalue_size(x, y)
|
76
75
|
|
77
76
|
n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
|
@@ -98,7 +97,7 @@ module Rumale
|
|
98
97
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
99
98
|
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
|
100
99
|
def predict(x)
|
101
|
-
|
100
|
+
x = check_convert_sample_array(x)
|
102
101
|
x.dot(@weight_vec.transpose) + @bias_term
|
103
102
|
end
|
104
103
|
|
data/lib/rumale/manifold/mds.rb
CHANGED
@@ -53,11 +53,10 @@ module Rumale
|
|
53
53
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
54
54
|
def initialize(n_components: 2, metric: 'euclidean', init: 'random',
|
55
55
|
max_iter: 300, tol: nil, verbose: false, random_seed: nil)
|
56
|
-
|
56
|
+
check_params_numeric(n_components: n_components, max_iter: max_iter)
|
57
57
|
check_params_string(metric: metric, init: init)
|
58
58
|
check_params_boolean(verbose: verbose)
|
59
|
-
|
60
|
-
check_params_type_or_nil(Integer, random_seed: random_seed)
|
59
|
+
check_params_numeric_or_nil(tol: tol, random_seed: random_seed)
|
61
60
|
check_params_positive(n_components: n_components, max_iter: max_iter)
|
62
61
|
@params = {}
|
63
62
|
@params[:n_components] = n_components
|
@@ -82,7 +81,7 @@ module Rumale
|
|
82
81
|
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
83
82
|
# @return [MDS] The learned transformer itself.
|
84
83
|
def fit(x, _not_used = nil)
|
85
|
-
|
84
|
+
x = check_convert_sample_array(x)
|
86
85
|
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
87
86
|
# initialize some varibales.
|
88
87
|
n_samples = x.shape[0]
|
data/lib/rumale/manifold/tsne.rb
CHANGED
@@ -58,12 +58,10 @@ module Rumale
|
|
58
58
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
59
59
|
def initialize(n_components: 2, perplexity: 30.0, metric: 'euclidean', init: 'random',
|
60
60
|
max_iter: 500, tol: nil, verbose: false, random_seed: nil)
|
61
|
-
|
62
|
-
check_params_float(perplexity: perplexity)
|
61
|
+
check_params_numeric(n_components: n_components, max_iter: max_iter, perplexity: perplexity)
|
63
62
|
check_params_string(metric: metric, init: init)
|
64
63
|
check_params_boolean(verbose: verbose)
|
65
|
-
|
66
|
-
check_params_type_or_nil(Integer, random_seed: random_seed)
|
64
|
+
check_params_numeric_or_nil(tol: tol, random_seed: random_seed)
|
67
65
|
check_params_positive(n_components: n_components, perplexity: perplexity, max_iter: max_iter)
|
68
66
|
@params = {}
|
69
67
|
@params[:n_components] = n_components
|
@@ -89,7 +87,7 @@ module Rumale
|
|
89
87
|
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
90
88
|
# @return [TSNE] The learned transformer itself.
|
91
89
|
def fit(x, _not_used = nil)
|
92
|
-
|
90
|
+
x = check_convert_sample_array(x)
|
93
91
|
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
94
92
|
# initialize some varibales.
|
95
93
|
@n_iter = 0
|
@@ -68,14 +68,15 @@ module Rumale
|
|
68
68
|
# * :train_score (Array<Float>) The scores of training dataset for each split. This option is nil if
|
69
69
|
# the return_train_score is false.
|
70
70
|
def perform(x, y)
|
71
|
-
|
71
|
+
x = check_convert_sample_array(x)
|
72
72
|
if @estimator.is_a?(Rumale::Base::Classifier)
|
73
|
-
|
73
|
+
y = check_convert_label_array(y)
|
74
74
|
check_sample_label_size(x, y)
|
75
|
-
|
76
|
-
|
77
|
-
check_tvalue_array(y)
|
75
|
+
elsif @estimator.is_a?(Rumale::Base::Regressor)
|
76
|
+
y = check_convert_tvalue_array(y)
|
78
77
|
check_sample_tvalue_size(x, y)
|
78
|
+
else
|
79
|
+
y = Numo::NArray.asarray(y)
|
79
80
|
end
|
80
81
|
# Initialize the report of cross validation.
|
81
82
|
report = { test_score: [], train_score: nil, fit_time: [] }
|
@@ -88,7 +88,7 @@ module Rumale
|
|
88
88
|
# @param y [Numo::NArray] (shape: [n_samples, n_outputs]) The target values or labels to be used for fitting the model.
|
89
89
|
# @return [GridSearchCV] The learned estimator with grid search.
|
90
90
|
def fit(x, y)
|
91
|
-
|
91
|
+
x = check_convert_sample_array(x)
|
92
92
|
|
93
93
|
init_attrs
|
94
94
|
|
@@ -111,7 +111,7 @@ module Rumale
|
|
111
111
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
112
112
|
# @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
|
113
113
|
def decision_function(x)
|
114
|
-
|
114
|
+
x = check_convert_sample_array(x)
|
115
115
|
@best_estimator.decision_function(x)
|
116
116
|
end
|
117
117
|
|
@@ -120,7 +120,7 @@ module Rumale
|
|
120
120
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to obtain prediction result.
|
121
121
|
# @return [Numo::NArray] Predicted results.
|
122
122
|
def predict(x)
|
123
|
-
|
123
|
+
x = check_convert_sample_array(x)
|
124
124
|
@best_estimator.predict(x)
|
125
125
|
end
|
126
126
|
|
@@ -129,7 +129,7 @@ module Rumale
|
|
129
129
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
|
130
130
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
|
131
131
|
def predict_log_proba(x)
|
132
|
-
|
132
|
+
x = check_convert_sample_array(x)
|
133
133
|
@best_estimator.predict_log_proba(x)
|
134
134
|
end
|
135
135
|
|
@@ -138,7 +138,7 @@ module Rumale
|
|
138
138
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
139
139
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
140
140
|
def predict_proba(x)
|
141
|
-
|
141
|
+
x = check_convert_sample_array(x)
|
142
142
|
@best_estimator.predict_proba(x)
|
143
143
|
end
|
144
144
|
|
@@ -148,7 +148,7 @@ module Rumale
|
|
148
148
|
# @param y [Numo::NArray] (shape: [n_samples, n_outputs]) True target values or labels for testing data.
|
149
149
|
# @return [Float] The score of estimator.
|
150
150
|
def score(x, y)
|
151
|
-
|
151
|
+
x = check_convert_sample_array(x)
|
152
152
|
@best_estimator.score(x, y)
|
153
153
|
end
|
154
154
|
|
@@ -36,9 +36,9 @@ module Rumale
|
|
36
36
|
# @param shuffle [Boolean] The flag indicating whether to shuffle the dataset.
|
37
37
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
38
38
|
def initialize(n_splits: 3, shuffle: false, random_seed: nil)
|
39
|
-
|
39
|
+
check_params_numeric(n_splits: n_splits)
|
40
40
|
check_params_boolean(shuffle: shuffle)
|
41
|
-
|
41
|
+
check_params_numeric_or_nil(random_seed: random_seed)
|
42
42
|
check_params_positive(n_splits: n_splits)
|
43
43
|
@n_splits = n_splits
|
44
44
|
@shuffle = shuffle
|
@@ -53,7 +53,7 @@ module Rumale
|
|
53
53
|
# The dataset to be used to generate data indices for K-fold cross validation.
|
54
54
|
# @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
|
55
55
|
def split(x, _y = nil)
|
56
|
-
|
56
|
+
x = check_convert_sample_array(x)
|
57
57
|
# Initialize and check some variables.
|
58
58
|
n_samples, = x.shape
|
59
59
|
unless @n_splits.between?(2, n_samples)
|
@@ -32,10 +32,8 @@ module Rumale
|
|
32
32
|
# @param train_size [Float] The ratio of number of samples for train data.
|
33
33
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
34
34
|
def initialize(n_splits: 3, test_size: 0.1, train_size: nil, random_seed: nil)
|
35
|
-
|
36
|
-
|
37
|
-
check_params_type_or_nil(Float, train_size: train_size)
|
38
|
-
check_params_type_or_nil(Integer, random_seed: random_seed)
|
35
|
+
check_params_numeric(n_splits: n_splits, test_size: test_size)
|
36
|
+
check_params_numeric_or_nil(train_size: train_size, random_seed: random_seed)
|
39
37
|
check_params_positive(n_splits: n_splits)
|
40
38
|
check_params_positive(test_size: test_size)
|
41
39
|
check_params_positive(train_size: train_size) unless train_size.nil?
|
@@ -53,7 +51,7 @@ module Rumale
|
|
53
51
|
# The dataset to be used to generate data indices for random permutation cross validation.
|
54
52
|
# @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
|
55
53
|
def split(x, _y = nil)
|
56
|
-
|
54
|
+
x = check_convert_sample_array(x)
|
57
55
|
# Initialize and check some variables.
|
58
56
|
n_samples = x.shape[0]
|
59
57
|
n_test_samples = (@test_size * n_samples).to_i
|