rumale 0.22.1 → 0.22.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/build.yml +1 -1
- data/.rubocop.yml +1 -0
- data/CHANGELOG.md +7 -1
- data/Gemfile +3 -2
- data/README.md +2 -2
- data/lib/rumale.rb +2 -0
- data/lib/rumale/decomposition/pca.rb +1 -1
- data/lib/rumale/ensemble/stacking_classifier.rb +214 -0
- data/lib/rumale/ensemble/stacking_regressor.rb +163 -0
- data/lib/rumale/feature_extraction/feature_hasher.rb +1 -1
- data/lib/rumale/feature_extraction/hash_vectorizer.rb +1 -1
- data/lib/rumale/linear_model/linear_regression.rb +1 -1
- data/lib/rumale/linear_model/logistic_regression.rb +1 -1
- data/lib/rumale/linear_model/ridge.rb +1 -1
- data/lib/rumale/pairwise_metric.rb +1 -1
- data/lib/rumale/validation.rb +1 -1
- data/lib/rumale/version.rb +1 -1
- data/rumale.gemspec +1 -1
- metadata +8 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 703a6895f4218ca45c5d5ae5e86559b077cf1be213d4939eb1e9ab94eac4621d
|
4
|
+
data.tar.gz: 5862466e565d1e6030c35494b5028ae980a47d373e90050c62266055fcecd374
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 988d55c681a102e0c65b9133c6aeafc049e33755955f959d6e6046f5601dd192af881424355a2b373ed2e7a5a16b74236698aef5372e09584b10fe28d1b7bc21
|
7
|
+
data.tar.gz: adc58efa3b46d9fc1a87ddb2a4df32472507d61f21a3a0eb07026068cc5e41af166fb0a0f8ae23f1b23aec649b22835a50edbed79d35255e8cc231b82b31eb8c
|
data/.github/workflows/build.yml
CHANGED
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,11 @@
|
|
1
|
+
# 0.22.2
|
2
|
+
- Add classifier and regressor classes for stacking method.
|
3
|
+
- [StackingClassifier](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/StackingClassifier.html)
|
4
|
+
- [StackingRegressor](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/StackingRegressor.html)
|
5
|
+
- Refactor some codes with Rubocop.
|
6
|
+
|
1
7
|
# 0.22.1
|
2
|
-
- Add transfomer class for MLKR, that implements Metric Learning for Kernel Regression.
|
8
|
+
- Add transfomer class for [MLKR](https://yoshoku.github.io/rumale/doc/Rumale/MetricLearning/MLKR.html), that implements Metric Learning for Kernel Regression.
|
3
9
|
- Refactor NeighbourhoodComponentAnalysis.
|
4
10
|
- Update API documentation.
|
5
11
|
|
data/Gemfile
CHANGED
@@ -9,7 +9,8 @@ gem 'parallel', '>= 1.17.0'
|
|
9
9
|
gem 'rake', '~> 12.0'
|
10
10
|
gem 'rake-compiler', '~> 1.0'
|
11
11
|
gem 'rspec', '~> 3.0'
|
12
|
-
gem 'rubocop', '~> 0
|
12
|
+
gem 'rubocop', '~> 1.0'
|
13
13
|
gem 'rubocop-performance', '~> 1.8'
|
14
|
-
gem 'rubocop-
|
14
|
+
gem 'rubocop-rake', '~> 0.5'
|
15
|
+
gem 'rubocop-rspec', '~> 2.0'
|
15
16
|
gem 'simplecov', '~> 0.19'
|
data/README.md
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
[](https://github.com/yoshoku/rumale/actions?query=workflow%3Abuild)
|
6
6
|
[](https://badge.fury.io/rb/rumale)
|
7
|
-
[](https://github.com/yoshoku/rumale/blob/
|
7
|
+
[](https://github.com/yoshoku/rumale/blob/main/LICENSE.txt)
|
8
8
|
[](https://yoshoku.github.io/rumale/doc/)
|
9
9
|
|
10
10
|
Rumale (**Ru**by **ma**chine **le**arning) is a machine learning library in Ruby.
|
@@ -244,4 +244,4 @@ The gem is available as open source under the terms of the [BSD 2-clause License
|
|
244
244
|
## Code of Conduct
|
245
245
|
|
246
246
|
Everyone interacting in the Rumale project’s codebases, issue trackers,
|
247
|
-
chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/yoshoku/Rumale/blob/
|
247
|
+
chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/yoshoku/Rumale/blob/main/CODE_OF_CONDUCT.md).
|
data/lib/rumale.rb
CHANGED
@@ -59,6 +59,8 @@ require 'rumale/ensemble/random_forest_classifier'
|
|
59
59
|
require 'rumale/ensemble/random_forest_regressor'
|
60
60
|
require 'rumale/ensemble/extra_trees_classifier'
|
61
61
|
require 'rumale/ensemble/extra_trees_regressor'
|
62
|
+
require 'rumale/ensemble/stacking_classifier'
|
63
|
+
require 'rumale/ensemble/stacking_regressor'
|
62
64
|
require 'rumale/clustering/k_means'
|
63
65
|
require 'rumale/clustering/mini_batch_k_means'
|
64
66
|
require 'rumale/clustering/k_medoids'
|
@@ -59,7 +59,7 @@ module Rumale
|
|
59
59
|
@params[:solver] = if solver == 'auto'
|
60
60
|
load_linalg? ? 'evd' : 'fpt'
|
61
61
|
else
|
62
|
-
solver != 'evd' ? 'fpt' : 'evd'
|
62
|
+
solver != 'evd' ? 'fpt' : 'evd' # rubocop:disable Style/NegatedIfElseCondition
|
63
63
|
end
|
64
64
|
@params[:n_components] = n_components
|
65
65
|
@params[:max_iter] = max_iter
|
@@ -0,0 +1,214 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/classifier'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module Ensemble
|
8
|
+
# StackingClassifier is a class that implements classifier with stacking method.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# estimators = {
|
12
|
+
# lgr: Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-2, random_seed: 1),
|
13
|
+
# mlp: Rumele::NeuralNetwork::MLPClassifier.new(hidden_units: [256], random_seed: 1),
|
14
|
+
# rnd: Rumale::Ensemble::RandomForestClassifier.new(random_seed: 1)
|
15
|
+
# }
|
16
|
+
# meta_estimator = Rumale::LinearModel::LogisticRegression.new(random_seed: 1)
|
17
|
+
# classifier = Rumale::Ensemble::StackedClassifier.new(
|
18
|
+
# estimators: estimators, meta_estimator: meta_estimator, random_seed: 1
|
19
|
+
# )
|
20
|
+
# classifier.fit(training_samples, traininig_labels)
|
21
|
+
# results = classifier.predict(testing_samples)
|
22
|
+
#
|
23
|
+
# *Reference*
|
24
|
+
# - Zhou, Z-H., "Ensemble Mehotds - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
|
25
|
+
class StackingClassifier
|
26
|
+
include Base::BaseEstimator
|
27
|
+
include Base::Classifier
|
28
|
+
|
29
|
+
# Return the base classifiers.
|
30
|
+
# @return [Hash<Symbol,Classifier>]
|
31
|
+
attr_reader :estimators
|
32
|
+
|
33
|
+
# Return the meta classifier.
|
34
|
+
# @return [Classifier]
|
35
|
+
attr_reader :meta_estimator
|
36
|
+
|
37
|
+
# Return the class labels.
|
38
|
+
# @return [Numo::Int32] (size: n_classes)
|
39
|
+
attr_reader :classes
|
40
|
+
|
41
|
+
# Return the method used by each base classifier.
|
42
|
+
# @return [Hash<Symbol,Symbol>]
|
43
|
+
attr_reader :stack_method
|
44
|
+
|
45
|
+
# Create a new classifier with stacking method.
|
46
|
+
#
|
47
|
+
# @param estimators [Hash<Symbol,Classifier>] The base classifiers for extracting meta features.
|
48
|
+
# @param meta_estimator [Classifier/Nil] The meta classifier that predicts class label.
|
49
|
+
# If nil is given, LogisticRegression is used.
|
50
|
+
# @param n_splits [Integer] The number of folds for cross validation with stratified k-fold on meta feature extraction in training phase.
|
51
|
+
# @param shuffle [Boolean] The flag indicating whether to shuffle the dataset on cross validation.
|
52
|
+
# @param stack_method [String] The method name of base classifier for using meta feature extraction.
|
53
|
+
# If 'auto' is given, it searches the callable method in the order 'predict_proba', 'decision_function', and 'predict'
|
54
|
+
# on each classifier.
|
55
|
+
# @param passthrough [Boolean] The flag indicating whether to concatenate the original features and meta features when training the meta classifier.
|
56
|
+
# @param random_seed [Integer/Nil] The seed value using to initialize the random generator on cross validation.
|
57
|
+
def initialize(estimators:, meta_estimator: nil, n_splits: 5, shuffle: true, stack_method: 'auto', passthrough: false, random_seed: nil)
|
58
|
+
check_params_type(Hash, estimators: estimators)
|
59
|
+
check_params_numeric(n_splits: n_splits)
|
60
|
+
check_params_string(stack_method: stack_method)
|
61
|
+
check_params_boolean(shuffle: shuffle, passthrough: passthrough)
|
62
|
+
check_params_numeric_or_nil(random_seed: random_seed)
|
63
|
+
@estimators = estimators
|
64
|
+
@meta_estimator = meta_estimator || Rumale::LinearModel::LogisticRegression.new
|
65
|
+
@classes = nil
|
66
|
+
@stack_method = nil
|
67
|
+
@output_size = nil
|
68
|
+
@params = {}
|
69
|
+
@params[:n_splits] = n_splits
|
70
|
+
@params[:shuffle] = shuffle
|
71
|
+
@params[:stack_method] = stack_method
|
72
|
+
@params[:passthrough] = passthrough
|
73
|
+
@params[:random_seed] = random_seed || srand
|
74
|
+
end
|
75
|
+
|
76
|
+
# Fit the model with given training data.
|
77
|
+
#
|
78
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
79
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
80
|
+
# @return [StackedClassifier] The learned classifier itself.
|
81
|
+
def fit(x, y)
|
82
|
+
x = check_convert_sample_array(x)
|
83
|
+
y = check_convert_label_array(y)
|
84
|
+
check_sample_label_size(x, y)
|
85
|
+
|
86
|
+
n_samples, n_features = x.shape
|
87
|
+
|
88
|
+
@encoder = Rumale::Preprocessing::LabelEncoder.new
|
89
|
+
y_encoded = @encoder.fit_transform(y)
|
90
|
+
@classes = Numo::NArray[*@encoder.classes]
|
91
|
+
|
92
|
+
# training base classifiers with all training data.
|
93
|
+
@estimators.each_key { |name| @estimators[name].fit(x, y_encoded) }
|
94
|
+
|
95
|
+
# detecting feature extraction method and its size of output for each base classifier.
|
96
|
+
@stack_method = detect_stack_method
|
97
|
+
@output_size = detect_output_size(n_features)
|
98
|
+
|
99
|
+
# extracting meta features with base classifiers.
|
100
|
+
n_components = @output_size.values.inject(:+)
|
101
|
+
z = Numo::DFloat.zeros(n_samples, n_components)
|
102
|
+
|
103
|
+
kf = Rumale::ModelSelection::StratifiedKFold.new(
|
104
|
+
n_splits: @params[:n_splits], shuffle: @params[:shuffle], random_seed: @params[:random_seed]
|
105
|
+
)
|
106
|
+
|
107
|
+
kf.split(x, y_encoded).each do |train_ids, valid_ids|
|
108
|
+
x_train = x[train_ids, true]
|
109
|
+
y_train = y_encoded[train_ids]
|
110
|
+
x_valid = x[valid_ids, true]
|
111
|
+
f_start = 0
|
112
|
+
@estimators.each_key do |name|
|
113
|
+
est_fold = Marshal.load(Marshal.dump(@estimators[name]))
|
114
|
+
f_last = f_start + @output_size[name]
|
115
|
+
f_position = @output_size[name] == 1 ? f_start : f_start...f_last
|
116
|
+
z[valid_ids, f_position] = est_fold.fit(x_train, y_train).public_send(@stack_method[name], x_valid)
|
117
|
+
f_start = f_last
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
# concatenating original features.
|
122
|
+
z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
|
123
|
+
|
124
|
+
# training meta classifier.
|
125
|
+
@meta_estimator.fit(z, y_encoded)
|
126
|
+
|
127
|
+
self
|
128
|
+
end
|
129
|
+
|
130
|
+
# Calculate confidence scores for samples.
|
131
|
+
#
|
132
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
133
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
|
134
|
+
def decision_function(x)
|
135
|
+
x = check_convert_sample_array(x)
|
136
|
+
z = transform(x)
|
137
|
+
@meta_estimator.decision_function(z)
|
138
|
+
end
|
139
|
+
|
140
|
+
# Predict class labels for samples.
|
141
|
+
#
|
142
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
143
|
+
# @return [Numo::Int32] (shape: [n_samples]) The predicted class label per sample.
|
144
|
+
def predict(x)
|
145
|
+
x = check_convert_sample_array(x)
|
146
|
+
z = transform(x)
|
147
|
+
Numo::Int32.cast(@encoder.inverse_transform(@meta_estimator.predict(z)))
|
148
|
+
end
|
149
|
+
|
150
|
+
# Predict probability for samples.
|
151
|
+
#
|
152
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
153
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) The predicted probability of each class per sample.
|
154
|
+
def predict_proba(x)
|
155
|
+
x = check_convert_sample_array(x)
|
156
|
+
z = transform(x)
|
157
|
+
@meta_estimator.predict_proba(z)
|
158
|
+
end
|
159
|
+
|
160
|
+
# Transform the given data with the learned model.
|
161
|
+
#
|
162
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed with the learned model.
|
163
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for samples.
|
164
|
+
def transform(x)
|
165
|
+
x = check_convert_sample_array(x)
|
166
|
+
n_samples = x.shape[0]
|
167
|
+
n_components = @output_size.values.inject(:+)
|
168
|
+
z = Numo::DFloat.zeros(n_samples, n_components)
|
169
|
+
f_start = 0
|
170
|
+
@estimators.each_key do |name|
|
171
|
+
f_last = f_start + @output_size[name]
|
172
|
+
f_position = @output_size[name] == 1 ? f_start : f_start...f_last
|
173
|
+
z[true, f_position] = @estimators[name].public_send(@stack_method[name], x)
|
174
|
+
f_start = f_last
|
175
|
+
end
|
176
|
+
z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
|
177
|
+
z
|
178
|
+
end
|
179
|
+
|
180
|
+
# Fit the model with training data, and then transform them with the learned model.
|
181
|
+
#
|
182
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
183
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
184
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for training data.
|
185
|
+
def fit_transform(x, y)
|
186
|
+
x = check_convert_sample_array(x)
|
187
|
+
y = check_convert_label_array(y)
|
188
|
+
fit(x, y).transform(x)
|
189
|
+
end
|
190
|
+
|
191
|
+
private
|
192
|
+
|
193
|
+
STACK_METHODS = %i[predict_proba decision_function predict].freeze
|
194
|
+
|
195
|
+
private_constant :STACK_METHODS
|
196
|
+
|
197
|
+
def detect_stack_method
|
198
|
+
if @params[:stack_method] == 'auto'
|
199
|
+
@estimators.each_key.with_object({}) { |name, obj| obj[name] = STACK_METHODS.detect { |m| @estimators[name].respond_to?(m) } }
|
200
|
+
else
|
201
|
+
@estimators.each_key.with_object({}) { |name, obj| obj[name] = @params[:stack_method].to_sym }
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
def detect_output_size(n_features)
|
206
|
+
x_dummy = Numo::DFloat.new(2, n_features).rand
|
207
|
+
@estimators.each_key.with_object({}) do |name, obj|
|
208
|
+
output_dummy = @estimators[name].public_send(@stack_method[name], x_dummy)
|
209
|
+
obj[name] = output_dummy.ndim == 1 ? 1 : output_dummy.shape[1]
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
214
|
+
end
|
@@ -0,0 +1,163 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/regressor'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module Ensemble
|
8
|
+
# StackingRegressor is a class that implements regressor with stacking method.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# estimators = {
|
12
|
+
# las: Rumale::LinearModel::Lasso.new(reg_param: 1e-2, random_seed: 1),
|
13
|
+
# mlp: Rumele::NeuralNetwork::MLPRegressor.new(hidden_units: [256], random_seed: 1),
|
14
|
+
# rnd: Rumale::Ensemble::RandomForestRegressor.new(random_seed: 1)
|
15
|
+
# }
|
16
|
+
# meta_estimator = Rumale::LinearModel::Ridge.new(random_seed: 1)
|
17
|
+
# regressor = Rumale::Ensemble::StackedRegressor.new(
|
18
|
+
# estimators: estimators, meta_estimator: meta_estimator, random_seed: 1
|
19
|
+
# )
|
20
|
+
# regressor.fit(training_samples, traininig_values)
|
21
|
+
# results = regressor.predict(testing_samples)
|
22
|
+
#
|
23
|
+
# *Reference*
|
24
|
+
# - Zhou, Z-H., "Ensemble Mehotds - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
|
25
|
+
class StackingRegressor
|
26
|
+
include Base::BaseEstimator
|
27
|
+
include Base::Regressor
|
28
|
+
|
29
|
+
# Return the base regressors.
|
30
|
+
# @return [Hash<Symbol,Regressor>]
|
31
|
+
attr_reader :estimators
|
32
|
+
|
33
|
+
# Return the meta regressor.
|
34
|
+
# @return [Regressor]
|
35
|
+
attr_reader :meta_estimator
|
36
|
+
|
37
|
+
# Create a new regressor with stacking method.
|
38
|
+
#
|
39
|
+
# @param estimators [Hash<Symbol,Regressor>] The base regressors for extracting meta features.
|
40
|
+
# @param meta_estimator [Regressor/Nil] The meta regressor that predicts values.
|
41
|
+
# If nil is given, Ridge is used.
|
42
|
+
# @param n_splits [Integer] The number of folds for cross validation with k-fold on meta feature extraction in training phase.
|
43
|
+
# @param shuffle [Boolean] The flag indicating whether to shuffle the dataset on cross validation.
|
44
|
+
# @param passthrough [Boolean] The flag indicating whether to concatenate the original features and meta features when training the meta regressor.
|
45
|
+
# @param random_seed [Integer/Nil] The seed value using to initialize the random generator on cross validation.
|
46
|
+
def initialize(estimators:, meta_estimator: nil, n_splits: 5, shuffle: true, passthrough: false, random_seed: nil)
|
47
|
+
check_params_type(Hash, estimators: estimators)
|
48
|
+
check_params_numeric(n_splits: n_splits)
|
49
|
+
check_params_boolean(shuffle: shuffle, passthrough: passthrough)
|
50
|
+
check_params_numeric_or_nil(random_seed: random_seed)
|
51
|
+
@estimators = estimators
|
52
|
+
@meta_estimator = meta_estimator || Rumale::LinearModel::Ridge.new
|
53
|
+
@output_size = nil
|
54
|
+
@params = {}
|
55
|
+
@params[:n_splits] = n_splits
|
56
|
+
@params[:shuffle] = shuffle
|
57
|
+
@params[:passthrough] = passthrough
|
58
|
+
@params[:random_seed] = random_seed || srand
|
59
|
+
end
|
60
|
+
|
61
|
+
# Fit the model with given training data.
|
62
|
+
#
|
63
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
64
|
+
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target variables to be used for fitting the model.
|
65
|
+
# @return [StackedRegressor] The learned regressor itself.
|
66
|
+
def fit(x, y)
|
67
|
+
x = check_convert_sample_array(x)
|
68
|
+
y = check_convert_tvalue_array(y)
|
69
|
+
check_sample_tvalue_size(x, y)
|
70
|
+
|
71
|
+
n_samples, n_features = x.shape
|
72
|
+
n_outputs = y.ndim == 1 ? 1 : y.shape[1]
|
73
|
+
|
74
|
+
# training base regressors with all training data.
|
75
|
+
@estimators.each_key { |name| @estimators[name].fit(x, y) }
|
76
|
+
|
77
|
+
# detecting size of output for each base regressor.
|
78
|
+
@output_size = detect_output_size(n_features)
|
79
|
+
|
80
|
+
# extracting meta features with base regressors.
|
81
|
+
n_components = @output_size.values.inject(:+)
|
82
|
+
z = Numo::DFloat.zeros(n_samples, n_components)
|
83
|
+
|
84
|
+
kf = Rumale::ModelSelection::KFold.new(
|
85
|
+
n_splits: @params[:n_splits], shuffle: @params[:shuffle], random_seed: @params[:random_seed]
|
86
|
+
)
|
87
|
+
|
88
|
+
kf.split(x, y).each do |train_ids, valid_ids|
|
89
|
+
x_train = x[train_ids, true]
|
90
|
+
y_train = n_outputs == 1 ? y[train_ids] : y[train_ids, true]
|
91
|
+
x_valid = x[valid_ids, true]
|
92
|
+
f_start = 0
|
93
|
+
@estimators.each_key do |name|
|
94
|
+
est_fold = Marshal.load(Marshal.dump(@estimators[name]))
|
95
|
+
f_last = f_start + @output_size[name]
|
96
|
+
f_position = @output_size[name] == 1 ? f_start : f_start...f_last
|
97
|
+
z[valid_ids, f_position] = est_fold.fit(x_train, y_train).predict(x_valid)
|
98
|
+
f_start = f_last
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# concatenating original features.
|
103
|
+
z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
|
104
|
+
|
105
|
+
# training meta regressor.
|
106
|
+
@meta_estimator.fit(z, y)
|
107
|
+
|
108
|
+
self
|
109
|
+
end
|
110
|
+
|
111
|
+
# Predict values for samples.
|
112
|
+
#
|
113
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
114
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) The predicted values per sample.
|
115
|
+
def predict(x)
|
116
|
+
x = check_convert_sample_array(x)
|
117
|
+
z = transform(x)
|
118
|
+
@meta_estimator.predict(z)
|
119
|
+
end
|
120
|
+
|
121
|
+
# Transform the given data with the learned model.
|
122
|
+
#
|
123
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed with the learned model.
|
124
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for samples.
|
125
|
+
def transform(x)
|
126
|
+
x = check_convert_sample_array(x)
|
127
|
+
n_samples = x.shape[0]
|
128
|
+
n_components = @output_size.values.inject(:+)
|
129
|
+
z = Numo::DFloat.zeros(n_samples, n_components)
|
130
|
+
f_start = 0
|
131
|
+
@estimators.each_key do |name|
|
132
|
+
f_last = f_start + @output_size[name]
|
133
|
+
f_position = @output_size[name] == 1 ? f_start : f_start...f_last
|
134
|
+
z[true, f_position] = @estimators[name].predict(x)
|
135
|
+
f_start = f_last
|
136
|
+
end
|
137
|
+
z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
|
138
|
+
z
|
139
|
+
end
|
140
|
+
|
141
|
+
# Fit the model with training data, and then transform them with the learned model.
|
142
|
+
#
|
143
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
144
|
+
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target variables to be used for fitting the model.
|
145
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for training data.
|
146
|
+
def fit_transform(x, y)
|
147
|
+
x = check_convert_sample_array(x)
|
148
|
+
y = check_convert_tvalue_array(y)
|
149
|
+
fit(x, y).transform(x)
|
150
|
+
end
|
151
|
+
|
152
|
+
private
|
153
|
+
|
154
|
+
def detect_output_size(n_features)
|
155
|
+
x_dummy = Numo::DFloat.new(2, n_features).rand
|
156
|
+
@estimators.each_key.with_object({}) do |name, obj|
|
157
|
+
output_dummy = @estimators[name].predict(x_dummy)
|
158
|
+
obj[name] = output_dummy.ndim == 1 ? 1 : output_dummy.shape[1]
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
@@ -67,7 +67,7 @@ module Rumale
|
|
67
67
|
def transform(x)
|
68
68
|
raise 'FeatureHasher#transform requires Mmh3 but that is not loaded.' unless enable_mmh3?
|
69
69
|
|
70
|
-
x = [x] unless x.is_a?(Array)
|
70
|
+
x = [x] unless x.is_a?(Array)
|
71
71
|
n_samples = x.size
|
72
72
|
|
73
73
|
z = Numo::DFloat.zeros(n_samples, n_features)
|
@@ -99,7 +99,7 @@ module Rumale
|
|
99
99
|
# @param x [Array<Hash>] (shape: [n_samples]) The array of hash consisting of feature names and values.
|
100
100
|
# @return [Numo::DFloat] (shape: [n_samples, n_features]) The encoded sample array.
|
101
101
|
def transform(x)
|
102
|
-
x = [x] unless x.is_a?(Array)
|
102
|
+
x = [x] unless x.is_a?(Array)
|
103
103
|
n_samples = x.size
|
104
104
|
n_features = @vocabulary.size
|
105
105
|
z = Numo::DFloat.zeros(n_samples, n_features)
|
@@ -82,7 +82,7 @@ module Rumale
|
|
82
82
|
@params[:solver] = if solver == 'auto'
|
83
83
|
load_linalg? ? 'svd' : 'sgd'
|
84
84
|
else
|
85
|
-
solver != 'svd' ? 'sgd' : 'svd'
|
85
|
+
solver != 'svd' ? 'sgd' : 'svd' # rubocop:disable Style/NegatedIfElseCondition
|
86
86
|
end
|
87
87
|
@params[:decay] ||= @params[:learning_rate]
|
88
88
|
@params[:random_seed] ||= srand
|
@@ -181,7 +181,7 @@ module Rumale
|
|
181
181
|
@classes.size > 2
|
182
182
|
end
|
183
183
|
|
184
|
-
def fit_lbfgs(base_x, base_y)
|
184
|
+
def fit_lbfgs(base_x, base_y) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
185
185
|
if multiclass_problem?
|
186
186
|
fnc = proc do |w, x, y, a|
|
187
187
|
n_features = x.shape[1]
|
@@ -85,7 +85,7 @@ module Rumale
|
|
85
85
|
@params[:solver] = if solver == 'auto'
|
86
86
|
load_linalg? ? 'svd' : 'sgd'
|
87
87
|
else
|
88
|
-
solver != 'svd' ? 'sgd' : 'svd'
|
88
|
+
solver != 'svd' ? 'sgd' : 'svd' # rubocop:disable Style/NegatedIfElseCondition
|
89
89
|
end
|
90
90
|
@params[:decay] ||= @params[:reg_param] * @params[:learning_rate]
|
91
91
|
@params[:random_seed] ||= srand
|
@@ -123,7 +123,7 @@ module Rumale
|
|
123
123
|
# @param gamma [Float] The parameter of polynomial kernel, if nil it is 1 / n_features.
|
124
124
|
# @param coef [Integer] The parameter of polynomial kernel.
|
125
125
|
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
126
|
-
def polynomial_kernel(x, y = nil, degree = 3, gamma = nil, coef = 1)
|
126
|
+
def polynomial_kernel(x, y = nil, degree = 3, gamma = nil, coef = 1) # rubocop:disable Metrics/ParameterLists
|
127
127
|
y = x if y.nil?
|
128
128
|
gamma ||= 1.0 / x.shape[1]
|
129
129
|
x = Rumale::Validation.check_convert_sample_array(x)
|
data/lib/rumale/validation.rb
CHANGED
@@ -109,7 +109,7 @@ module Rumale
|
|
109
109
|
|
110
110
|
# @!visibility private
|
111
111
|
def check_params_positive(params = {})
|
112
|
-
params.
|
112
|
+
params.compact.each { |k, v| raise ArgumentError, "Expect #{k} to be positive value" if v.negative? }
|
113
113
|
nil
|
114
114
|
end
|
115
115
|
end
|
data/lib/rumale/version.rb
CHANGED
data/rumale.gemspec
CHANGED
@@ -38,7 +38,7 @@ Gem::Specification.new do |spec|
|
|
38
38
|
|
39
39
|
spec.metadata = {
|
40
40
|
'homepage_uri' => 'https://github.com/yoshoku/rumale',
|
41
|
-
'changelog_uri' => 'https://github.com/yoshoku/rumale/blob/
|
41
|
+
'changelog_uri' => 'https://github.com/yoshoku/rumale/blob/main/CHANGELOG.md',
|
42
42
|
'source_code_uri' => 'https://github.com/yoshoku/rumale',
|
43
43
|
'documentation_uri' => 'https://yoshoku.github.io/rumale/doc/',
|
44
44
|
'bug_tracker_uri' => 'https://github.com/yoshoku/rumale/issues'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.22.
|
4
|
+
version: 0.22.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-01-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -104,6 +104,8 @@ files:
|
|
104
104
|
- lib/rumale/ensemble/gradient_boosting_regressor.rb
|
105
105
|
- lib/rumale/ensemble/random_forest_classifier.rb
|
106
106
|
- lib/rumale/ensemble/random_forest_regressor.rb
|
107
|
+
- lib/rumale/ensemble/stacking_classifier.rb
|
108
|
+
- lib/rumale/ensemble/stacking_regressor.rb
|
107
109
|
- lib/rumale/evaluation_measure/accuracy.rb
|
108
110
|
- lib/rumale/evaluation_measure/adjusted_rand_score.rb
|
109
111
|
- lib/rumale/evaluation_measure/calinski_harabasz_score.rb
|
@@ -205,11 +207,11 @@ licenses:
|
|
205
207
|
- BSD-2-Clause
|
206
208
|
metadata:
|
207
209
|
homepage_uri: https://github.com/yoshoku/rumale
|
208
|
-
changelog_uri: https://github.com/yoshoku/rumale/blob/
|
210
|
+
changelog_uri: https://github.com/yoshoku/rumale/blob/main/CHANGELOG.md
|
209
211
|
source_code_uri: https://github.com/yoshoku/rumale
|
210
212
|
documentation_uri: https://yoshoku.github.io/rumale/doc/
|
211
213
|
bug_tracker_uri: https://github.com/yoshoku/rumale/issues
|
212
|
-
post_install_message:
|
214
|
+
post_install_message:
|
213
215
|
rdoc_options: []
|
214
216
|
require_paths:
|
215
217
|
- lib
|
@@ -225,7 +227,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
225
227
|
version: '0'
|
226
228
|
requirements: []
|
227
229
|
rubygems_version: 3.1.4
|
228
|
-
signing_key:
|
230
|
+
signing_key:
|
229
231
|
specification_version: 4
|
230
232
|
summary: Rumale is a machine learning library in Ruby. Rumale provides machine learning
|
231
233
|
algorithms with interfaces similar to Scikit-Learn in Python.
|