rumale 0.20.2 → 0.22.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/build.yml +23 -0
- data/.rubocop.yml +10 -0
- data/CHANGELOG.md +26 -0
- data/Gemfile +5 -2
- data/README.md +17 -14
- data/lib/rumale.rb +3 -0
- data/lib/rumale/clustering/snn.rb +1 -1
- data/lib/rumale/decomposition/pca.rb +1 -1
- data/lib/rumale/ensemble/stacking_classifier.rb +214 -0
- data/lib/rumale/ensemble/stacking_regressor.rb +163 -0
- data/lib/rumale/evaluation_measure/roc_auc.rb +3 -0
- data/lib/rumale/feature_extraction/feature_hasher.rb +1 -1
- data/lib/rumale/feature_extraction/hash_vectorizer.rb +1 -1
- data/lib/rumale/kernel_machine/kernel_svc.rb +4 -3
- data/lib/rumale/linear_model/base_sgd.rb +1 -1
- data/lib/rumale/linear_model/elastic_net.rb +2 -2
- data/lib/rumale/linear_model/lasso.rb +2 -2
- data/lib/rumale/linear_model/linear_regression.rb +3 -3
- data/lib/rumale/linear_model/logistic_regression.rb +123 -35
- data/lib/rumale/linear_model/ridge.rb +3 -3
- data/lib/rumale/linear_model/svc.rb +6 -5
- data/lib/rumale/linear_model/svr.rb +6 -5
- data/lib/rumale/metric_learning/mlkr.rb +161 -0
- data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +19 -48
- data/lib/rumale/pairwise_metric.rb +1 -1
- data/lib/rumale/pipeline/pipeline.rb +1 -1
- data/lib/rumale/tree/base_decision_tree.rb +2 -9
- data/lib/rumale/tree/gradient_tree_regressor.rb +3 -10
- data/lib/rumale/validation.rb +1 -1
- data/lib/rumale/version.rb +1 -1
- data/rumale.gemspec +2 -1
- metadata +25 -8
- data/.coveralls.yml +0 -1
@@ -10,7 +10,7 @@ module Rumale
|
|
10
10
|
#
|
11
11
|
# @example
|
12
12
|
# estimator =
|
13
|
-
# Rumale::LinearModel::Ridge.new(reg_param: 0.1, max_iter:
|
13
|
+
# Rumale::LinearModel::Ridge.new(reg_param: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
|
14
14
|
# estimator.fit(training_samples, traininig_values)
|
15
15
|
# results = estimator.predict(testing_samples)
|
16
16
|
#
|
@@ -70,7 +70,7 @@ module Rumale
|
|
70
70
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
71
71
|
def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
|
72
72
|
reg_param: 1.0, fit_bias: true, bias_scale: 1.0,
|
73
|
-
max_iter:
|
73
|
+
max_iter: 1000, batch_size: 50, tol: 1e-4,
|
74
74
|
solver: 'auto',
|
75
75
|
n_jobs: nil, verbose: false, random_seed: nil)
|
76
76
|
check_params_numeric(learning_rate: learning_rate, momentum: momentum,
|
@@ -85,7 +85,7 @@ module Rumale
|
|
85
85
|
@params[:solver] = if solver == 'auto'
|
86
86
|
load_linalg? ? 'svd' : 'sgd'
|
87
87
|
else
|
88
|
-
solver != 'svd' ? 'sgd' : 'svd'
|
88
|
+
solver != 'svd' ? 'sgd' : 'svd' # rubocop:disable Style/NegatedIfElseCondition
|
89
89
|
end
|
90
90
|
@params[:decay] ||= @params[:reg_param] * @params[:learning_rate]
|
91
91
|
@params[:random_seed] ||= srand
|
@@ -11,13 +11,14 @@ module Rumale
|
|
11
11
|
# with stochastic gradient descent optimization.
|
12
12
|
# For multiclass classification problem, it uses one-vs-the-rest strategy.
|
13
13
|
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
14
|
+
# @note
|
15
|
+
# Rumale::SVM provides linear support vector classifier based on LIBLINEAR.
|
16
|
+
# If you prefer execution speed, you should use Rumale::SVM::LinearSVC.
|
17
|
+
# https://github.com/yoshoku/rumale-svm
|
17
18
|
#
|
18
19
|
# @example
|
19
20
|
# estimator =
|
20
|
-
# Rumale::LinearModel::SVC.new(reg_param: 1.0, max_iter:
|
21
|
+
# Rumale::LinearModel::SVC.new(reg_param: 1.0, max_iter: 1000, batch_size: 50, random_seed: 1)
|
21
22
|
# estimator.fit(training_samples, traininig_labels)
|
22
23
|
# results = estimator.predict(testing_samples)
|
23
24
|
#
|
@@ -74,7 +75,7 @@ module Rumale
|
|
74
75
|
def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
|
75
76
|
penalty: 'l2', reg_param: 1.0, l1_ratio: 0.5,
|
76
77
|
fit_bias: true, bias_scale: 1.0,
|
77
|
-
max_iter:
|
78
|
+
max_iter: 1000, batch_size: 50, tol: 1e-4,
|
78
79
|
probability: false,
|
79
80
|
n_jobs: nil, verbose: false, random_seed: nil)
|
80
81
|
check_params_numeric(learning_rate: learning_rate, momentum: momentum,
|
@@ -8,13 +8,14 @@ module Rumale
|
|
8
8
|
# SVR is a class that implements Support Vector Regressor
|
9
9
|
# with stochastic gradient descent optimization.
|
10
10
|
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
11
|
+
# @note
|
12
|
+
# Rumale::SVM provides linear and kernel support vector regressor based on LIBLINEAR and LIBSVM.
|
13
|
+
# If you prefer execution speed, you should use Rumale::SVM::LinearSVR.
|
14
|
+
# https://github.com/yoshoku/rumale-svm
|
14
15
|
#
|
15
16
|
# @example
|
16
17
|
# estimator =
|
17
|
-
# Rumale::LinearModel::SVR.new(reg_param: 1.0, epsilon: 0.1, max_iter:
|
18
|
+
# Rumale::LinearModel::SVR.new(reg_param: 1.0, epsilon: 0.1, max_iter: 1000, batch_size: 50, random_seed: 1)
|
18
19
|
# estimator.fit(training_samples, traininig_target_values)
|
19
20
|
# results = estimator.predict(testing_samples)
|
20
21
|
#
|
@@ -68,7 +69,7 @@ module Rumale
|
|
68
69
|
penalty: 'l2', reg_param: 1.0, l1_ratio: 0.5,
|
69
70
|
fit_bias: true, bias_scale: 1.0,
|
70
71
|
epsilon: 0.1,
|
71
|
-
max_iter:
|
72
|
+
max_iter: 1000, batch_size: 50, tol: 1e-4,
|
72
73
|
n_jobs: nil, verbose: false, random_seed: nil)
|
73
74
|
check_params_numeric(learning_rate: learning_rate, momentum: momentum,
|
74
75
|
reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon,
|
@@ -0,0 +1,161 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
require 'rumale/decomposition/pca'
|
6
|
+
require 'rumale/pairwise_metric'
|
7
|
+
require 'rumale/utils'
|
8
|
+
require 'lbfgsb'
|
9
|
+
|
10
|
+
module Rumale
|
11
|
+
module MetricLearning
|
12
|
+
# MLKR is a class that implements Metric Learning for Kernel Regression.
|
13
|
+
#
|
14
|
+
# @example
|
15
|
+
# transformer = Rumale::MetricLearning::MLKR.new
|
16
|
+
# transformer.fit(training_samples, traininig_target_values)
|
17
|
+
# low_samples = transformer.transform(testing_samples)
|
18
|
+
#
|
19
|
+
# *Reference*
|
20
|
+
# - Weinberger, K. Q. and Tesauro, G., "Metric Learning for Kernel Regression," Proc. AISTATS'07, pp. 612--629, 2007.
|
21
|
+
class MLKR
|
22
|
+
include Base::BaseEstimator
|
23
|
+
include Base::Transformer
|
24
|
+
|
25
|
+
# Returns the metric components.
|
26
|
+
# @return [Numo::DFloat] (shape: [n_components, n_features])
|
27
|
+
attr_reader :components
|
28
|
+
|
29
|
+
# Return the number of iterations run for optimization
|
30
|
+
# @return [Integer]
|
31
|
+
attr_reader :n_iter
|
32
|
+
|
33
|
+
# Return the random generator.
|
34
|
+
# @return [Random]
|
35
|
+
attr_reader :rng
|
36
|
+
|
37
|
+
# Create a new transformer with MLKR.
|
38
|
+
#
|
39
|
+
# @param n_components [Integer] The number of components.
|
40
|
+
# @param init [String] The initialization method for components ('random' or 'pca').
|
41
|
+
# @param max_iter [Integer] The maximum number of iterations.
|
42
|
+
# @param tol [Float] The tolerance of termination criterion.
|
43
|
+
# This value is given as tol / Lbfgsb::DBL_EPSILON to the factr argument of Lbfgsb.minimize method.
|
44
|
+
# @param verbose [Boolean] The flag indicating whether to output loss during iteration.
|
45
|
+
# If true is given, 'iterate.dat' file is generated by lbfgsb.rb.
|
46
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
47
|
+
def initialize(n_components: nil, init: 'random', max_iter: 100, tol: 1e-6, verbose: false, random_seed: nil)
|
48
|
+
check_params_numeric_or_nil(n_components: n_components, random_seed: random_seed)
|
49
|
+
check_params_numeric(max_iter: max_iter, tol: tol)
|
50
|
+
check_params_string(init: init)
|
51
|
+
check_params_boolean(verbose: verbose)
|
52
|
+
@params = {}
|
53
|
+
@params[:n_components] = n_components
|
54
|
+
@params[:init] = init
|
55
|
+
@params[:max_iter] = max_iter
|
56
|
+
@params[:tol] = tol
|
57
|
+
@params[:verbose] = verbose
|
58
|
+
@params[:random_seed] = random_seed
|
59
|
+
@params[:random_seed] ||= srand
|
60
|
+
@components = nil
|
61
|
+
@n_iter = nil
|
62
|
+
@rng = Random.new(@params[:random_seed])
|
63
|
+
end
|
64
|
+
|
65
|
+
# Fit the model with given training data.
|
66
|
+
#
|
67
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
68
|
+
# @param y [Numo::DFloat] (shape: [n_samples]) The target values to be used for fitting the model.
|
69
|
+
# @return [MLKR] The learned classifier itself.
|
70
|
+
def fit(x, y)
|
71
|
+
x = check_convert_sample_array(x)
|
72
|
+
y = check_convert_tvalue_array(y)
|
73
|
+
check_sample_tvalue_size(x, y)
|
74
|
+
n_features = x.shape[1]
|
75
|
+
n_components = if @params[:n_components].nil?
|
76
|
+
n_features
|
77
|
+
else
|
78
|
+
[n_features, @params[:n_components]].min
|
79
|
+
end
|
80
|
+
@components, @n_iter = optimize_components(x, y, n_features, n_components)
|
81
|
+
@prototypes = x.dot(@components.transpose)
|
82
|
+
@values = y
|
83
|
+
self
|
84
|
+
end
|
85
|
+
|
86
|
+
# Fit the model with training data, and then transform them with the learned model.
|
87
|
+
#
|
88
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
89
|
+
# @param y [Numo::DFloat] (shape: [n_samples]) The target values to be used for fitting the model.
|
90
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
91
|
+
def fit_transform(x, y)
|
92
|
+
x = check_convert_sample_array(x)
|
93
|
+
y = check_convert_tvalue_array(y)
|
94
|
+
check_sample_tvalue_size(x, y)
|
95
|
+
fit(x, y).transform(x)
|
96
|
+
end
|
97
|
+
|
98
|
+
# Transform the given data with the learned model.
|
99
|
+
#
|
100
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
|
101
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
102
|
+
def transform(x)
|
103
|
+
x = check_convert_sample_array(x)
|
104
|
+
x.dot(@components.transpose)
|
105
|
+
end
|
106
|
+
|
107
|
+
private
|
108
|
+
|
109
|
+
def init_components(x, n_features, n_components)
|
110
|
+
if @params[:init] == 'pca'
|
111
|
+
pca = Rumale::Decomposition::PCA.new(n_components: n_components)
|
112
|
+
pca.fit(x).components.flatten.dup
|
113
|
+
else
|
114
|
+
Rumale::Utils.rand_normal([n_features, n_components], @rng.dup).flatten.dup
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def optimize_components(x, y, n_features, n_components)
|
119
|
+
# initialize components.
|
120
|
+
comp_init = init_components(x, n_features, n_components)
|
121
|
+
# initialize optimization results.
|
122
|
+
res = {}
|
123
|
+
res[:x] = comp_init
|
124
|
+
res[:n_iter] = 0
|
125
|
+
# perform optimization.
|
126
|
+
verbose = @params[:verbose] ? 1 : -1
|
127
|
+
res = Lbfgsb.minimize(
|
128
|
+
fnc: method(:mlkr_fnc), jcb: true, x_init: comp_init, args: [x, y],
|
129
|
+
maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: verbose
|
130
|
+
)
|
131
|
+
# return the results.
|
132
|
+
n_iter = res[:n_iter]
|
133
|
+
comps = n_components == 1 ? res[:x].dup : res[:x].reshape(n_components, n_features)
|
134
|
+
[comps, n_iter]
|
135
|
+
end
|
136
|
+
|
137
|
+
def mlkr_fnc(w, x, y)
|
138
|
+
# initialize some variables.
|
139
|
+
n_features = x.shape[1]
|
140
|
+
n_components = w.size / n_features
|
141
|
+
# projection.
|
142
|
+
w = w.reshape(n_components, n_features)
|
143
|
+
z = x.dot(w.transpose)
|
144
|
+
# predict values.
|
145
|
+
kernel_mat = Numo::NMath.exp(-Rumale::PairwiseMetric.squared_error(z))
|
146
|
+
kernel_mat[kernel_mat.diag_indices] = 0.0
|
147
|
+
norm = kernel_mat.sum(1)
|
148
|
+
norm[norm.eq(0)] = 1
|
149
|
+
y_pred = kernel_mat.dot(y) / norm
|
150
|
+
# calculate loss.
|
151
|
+
y_diff = y_pred - y
|
152
|
+
loss = (y_diff**2).sum
|
153
|
+
# calculate gradient.
|
154
|
+
weight_mat = y_diff * y_diff.expand_dims(1) * kernel_mat
|
155
|
+
weight_mat = weight_mat.sum(0).diag - weight_mat
|
156
|
+
gradient = 8 * z.transpose.dot(weight_mat).dot(x)
|
157
|
+
[loss, gradient.flatten.dup]
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
@@ -2,13 +2,15 @@
|
|
2
2
|
|
3
3
|
require 'rumale/base/base_estimator'
|
4
4
|
require 'rumale/base/transformer'
|
5
|
+
require 'rumale/utils'
|
6
|
+
require 'rumale/pairwise_metric'
|
7
|
+
require 'lbfgsb'
|
5
8
|
|
6
9
|
module Rumale
|
7
10
|
module MetricLearning
|
8
11
|
# NeighbourhoodComponentAnalysis is a class that implements Neighbourhood Component Analysis.
|
9
12
|
#
|
10
13
|
# @example
|
11
|
-
# require 'mopti'
|
12
14
|
# require 'rumale'
|
13
15
|
#
|
14
16
|
# transformer = Rumale::MetricLearning::NeighbourhoodComponentAnalysis.new
|
@@ -39,7 +41,9 @@ module Rumale
|
|
39
41
|
# @param init [String] The initialization method for components ('random' or 'pca').
|
40
42
|
# @param max_iter [Integer] The maximum number of iterations.
|
41
43
|
# @param tol [Float] The tolerance of termination criterion.
|
44
|
+
# This value is given as tol / Lbfgsb::DBL_EPSILON to the factr argument of Lbfgsb.minimize method.
|
42
45
|
# @param verbose [Boolean] The flag indicating whether to output loss during iteration.
|
46
|
+
# If true is given, 'iterate.dat' file is generated by lbfgsb.rb.
|
43
47
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
44
48
|
def initialize(n_components: nil, init: 'random', max_iter: 100, tol: 1e-6, verbose: false, random_seed: nil)
|
45
49
|
check_params_numeric_or_nil(n_components: n_components, random_seed: random_seed)
|
@@ -65,8 +69,6 @@ module Rumale
|
|
65
69
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
66
70
|
# @return [NeighbourhoodComponentAnalysis] The learned classifier itself.
|
67
71
|
def fit(x, y)
|
68
|
-
raise 'NeighbourhoodComponentAnalysis#fit requires Mopti but that is not loaded.' unless enable_mopti?
|
69
|
-
|
70
72
|
x = check_convert_sample_array(x)
|
71
73
|
y = check_convert_label_array(y)
|
72
74
|
check_sample_label_size(x, y)
|
@@ -102,17 +104,9 @@ module Rumale
|
|
102
104
|
|
103
105
|
private
|
104
106
|
|
105
|
-
def enable_mopti?
|
106
|
-
if defined?(Mopti).nil?
|
107
|
-
warn('NeighbourhoodComponentAnalysis#fit requires Mopti but that is not loaded. You should intall and load mopti gem in advance.')
|
108
|
-
return false
|
109
|
-
end
|
110
|
-
true
|
111
|
-
end
|
112
|
-
|
113
107
|
def init_components(x, n_features, n_components)
|
114
108
|
if @params[:init] == 'pca'
|
115
|
-
pca = Rumale::Decomposition::PCA.new(n_components: n_components
|
109
|
+
pca = Rumale::Decomposition::PCA.new(n_components: n_components)
|
116
110
|
pca.fit(x).components.flatten.dup
|
117
111
|
else
|
118
112
|
Rumale::Utils.rand_normal([n_features, n_components], @rng.dup).flatten.dup
|
@@ -127,28 +121,18 @@ module Rumale
|
|
127
121
|
res[:x] = comp_init
|
128
122
|
res[:n_iter] = 0
|
129
123
|
# perform optimization.
|
130
|
-
|
131
|
-
|
132
|
-
x_init: comp_init, args: [x, y],
|
133
|
-
|
124
|
+
verbose = @params[:verbose] ? 1 : -1
|
125
|
+
res = Lbfgsb.minimize(
|
126
|
+
fnc: method(:nca_fnc), jcb: true, x_init: comp_init, args: [x, y],
|
127
|
+
maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: verbose
|
134
128
|
)
|
135
|
-
fold = 0.0
|
136
|
-
dold = 0.0
|
137
|
-
optimizer.each do |prm|
|
138
|
-
res = prm
|
139
|
-
puts "[NeighbourhoodComponentAnalysis] The value of objective function after #{res[:n_iter]} epochs: #{x.shape[0] - res[:fnc]}" if @params[:verbose]
|
140
|
-
break if (fold - res[:fnc]).abs <= @params[:tol] && (dold - res[:jcb]).abs <= @params[:tol]
|
141
|
-
|
142
|
-
fold = res[:fnc]
|
143
|
-
dold = res[:jcb]
|
144
|
-
end
|
145
129
|
# return the results.
|
146
130
|
n_iter = res[:n_iter]
|
147
131
|
comps = n_components == 1 ? res[:x].dup : res[:x].reshape(n_components, n_features)
|
148
132
|
[comps, n_iter]
|
149
133
|
end
|
150
134
|
|
151
|
-
def
|
135
|
+
def nca_fnc(w, x, y)
|
152
136
|
# initialize some variables.
|
153
137
|
n_samples, n_features = x.shape
|
154
138
|
n_components = w.size / n_features
|
@@ -157,32 +141,19 @@ module Rumale
|
|
157
141
|
z = x.dot(w.transpose)
|
158
142
|
# calculate probability matrix.
|
159
143
|
prob_mat = probability_matrix(z)
|
160
|
-
# calculate loss.
|
144
|
+
# calculate loss and gradient.
|
161
145
|
# NOTE:
|
162
146
|
# NCA attempts to maximize its objective function.
|
163
147
|
# For the minization algorithm, the objective function value is subtracted from the maixmum value (n_samples).
|
164
148
|
mask_mat = y.expand_dims(1).eq(y)
|
165
149
|
masked_prob_mat = prob_mat * mask_mat
|
166
|
-
n_samples - masked_prob_mat.sum
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
# projection.
|
174
|
-
w = w.reshape(n_components, n_features)
|
175
|
-
z = x.dot(w.transpose)
|
176
|
-
# calculate probability matrix.
|
177
|
-
prob_mat = probability_matrix(z)
|
178
|
-
# calculate gradient.
|
179
|
-
mask_mat = y.expand_dims(1).eq(y)
|
180
|
-
masked_prob_mat = prob_mat * mask_mat
|
181
|
-
weighted_prob_mat = masked_prob_mat - prob_mat * masked_prob_mat.sum(1).expand_dims(1)
|
182
|
-
weighted_prob_mat += weighted_prob_mat.transpose
|
183
|
-
weighted_prob_mat[weighted_prob_mat.diag_indices] = -weighted_prob_mat.sum(0)
|
184
|
-
gradient = 2 * z.transpose.dot(weighted_prob_mat).dot(x)
|
185
|
-
-gradient.flatten.dup
|
150
|
+
loss = n_samples - masked_prob_mat.sum
|
151
|
+
sum_probs = masked_prob_mat.sum(1)
|
152
|
+
weight_mat = (sum_probs.expand_dims(1) * prob_mat - masked_prob_mat)
|
153
|
+
weight_mat += weight_mat.transpose
|
154
|
+
weight_mat = weight_mat.sum(0).diag - weight_mat
|
155
|
+
gradient = -2 * z.transpose.dot(weight_mat).dot(x)
|
156
|
+
[loss, gradient.flatten.dup]
|
186
157
|
end
|
187
158
|
|
188
159
|
def probability_matrix(z)
|
@@ -123,7 +123,7 @@ module Rumale
|
|
123
123
|
# @param gamma [Float] The parameter of polynomial kernel, if nil it is 1 / n_features.
|
124
124
|
# @param coef [Integer] The parameter of polynomial kernel.
|
125
125
|
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
126
|
-
def polynomial_kernel(x, y = nil, degree = 3, gamma = nil, coef = 1)
|
126
|
+
def polynomial_kernel(x, y = nil, degree = 3, gamma = nil, coef = 1) # rubocop:disable Metrics/ParameterLists
|
127
127
|
y = x if y.nil?
|
128
128
|
gamma ||= 1.0 / x.shape[1]
|
129
129
|
x = Rumale::Validation.check_convert_sample_array(x)
|
@@ -140,7 +140,7 @@ module Rumale
|
|
140
140
|
def validate_steps(steps)
|
141
141
|
steps.keys[0...-1].each do |name|
|
142
142
|
transformer = steps[name]
|
143
|
-
next if transformer.nil? ||
|
143
|
+
next if transformer.nil? || (transformer.class.method_defined?(:fit) && transformer.class.method_defined?(:transform))
|
144
144
|
|
145
145
|
raise TypeError,
|
146
146
|
'Class of intermediate step in pipeline should be implemented fit and transform methods: ' \
|
@@ -75,17 +75,10 @@ module Rumale
|
|
75
75
|
node = Node.new(depth: depth, impurity: impurity, n_samples: n_samples)
|
76
76
|
|
77
77
|
# terminate growing.
|
78
|
-
|
79
|
-
return nil if @n_leaves >= @params[:max_leaf_nodes]
|
80
|
-
end
|
81
|
-
|
78
|
+
return nil if !@params[:max_leaf_nodes].nil? && @n_leaves >= @params[:max_leaf_nodes]
|
82
79
|
return nil if n_samples < @params[:min_samples_leaf]
|
83
80
|
return put_leaf(node, y) if n_samples == @params[:min_samples_leaf]
|
84
|
-
|
85
|
-
unless @params[:max_depth].nil?
|
86
|
-
return put_leaf(node, y) if depth == @params[:max_depth]
|
87
|
-
end
|
88
|
-
|
81
|
+
return put_leaf(node, y) if !@params[:max_depth].nil? && depth == @params[:max_depth]
|
89
82
|
return put_leaf(node, y) if stop_growing?(y)
|
90
83
|
|
91
84
|
# calculate optimal parameters.
|
@@ -138,7 +138,7 @@ module Rumale
|
|
138
138
|
nil
|
139
139
|
end
|
140
140
|
|
141
|
-
def grow_node(depth, x, y, g, h)
|
141
|
+
def grow_node(depth, x, y, g, h) # rubocop:disable Metrics/AbcSize
|
142
142
|
# intialize some variables.
|
143
143
|
sum_g = g.sum
|
144
144
|
sum_h = h.sum
|
@@ -146,17 +146,10 @@ module Rumale
|
|
146
146
|
node = Node.new(depth: depth, n_samples: n_samples)
|
147
147
|
|
148
148
|
# terminate growing.
|
149
|
-
|
150
|
-
return nil if @n_leaves >= @params[:max_leaf_nodes]
|
151
|
-
end
|
152
|
-
|
149
|
+
return nil if !@params[:max_leaf_nodes].nil? && @n_leaves >= @params[:max_leaf_nodes]
|
153
150
|
return nil if n_samples < @params[:min_samples_leaf]
|
154
151
|
return put_leaf(node, sum_g, sum_h) if n_samples == @params[:min_samples_leaf]
|
155
|
-
|
156
|
-
unless @params[:max_depth].nil?
|
157
|
-
return put_leaf(node, sum_g, sum_h) if depth == @params[:max_depth]
|
158
|
-
end
|
159
|
-
|
152
|
+
return put_leaf(node, sum_g, sum_h) if !@params[:max_depth].nil? && depth == @params[:max_depth]
|
160
153
|
return put_leaf(node, sum_g, sum_h) if stop_growing?(y)
|
161
154
|
|
162
155
|
# calculate optimal parameters.
|
data/lib/rumale/validation.rb
CHANGED
@@ -109,7 +109,7 @@ module Rumale
|
|
109
109
|
|
110
110
|
# @!visibility private
|
111
111
|
def check_params_positive(params = {})
|
112
|
-
params.
|
112
|
+
params.compact.each { |k, v| raise ArgumentError, "Expect #{k} to be positive value" if v.negative? }
|
113
113
|
nil
|
114
114
|
end
|
115
115
|
end
|