rumale 0.22.1 → 0.22.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/build.yml +1 -1
- data/.rubocop.yml +1 -0
- data/CHANGELOG.md +7 -1
- data/Gemfile +3 -2
- data/README.md +2 -2
- data/lib/rumale.rb +2 -0
- data/lib/rumale/decomposition/pca.rb +1 -1
- data/lib/rumale/ensemble/stacking_classifier.rb +214 -0
- data/lib/rumale/ensemble/stacking_regressor.rb +163 -0
- data/lib/rumale/feature_extraction/feature_hasher.rb +1 -1
- data/lib/rumale/feature_extraction/hash_vectorizer.rb +1 -1
- data/lib/rumale/linear_model/linear_regression.rb +1 -1
- data/lib/rumale/linear_model/logistic_regression.rb +1 -1
- data/lib/rumale/linear_model/ridge.rb +1 -1
- data/lib/rumale/pairwise_metric.rb +1 -1
- data/lib/rumale/validation.rb +1 -1
- data/lib/rumale/version.rb +1 -1
- data/rumale.gemspec +1 -1
- metadata +8 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 703a6895f4218ca45c5d5ae5e86559b077cf1be213d4939eb1e9ab94eac4621d
|
4
|
+
data.tar.gz: 5862466e565d1e6030c35494b5028ae980a47d373e90050c62266055fcecd374
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 988d55c681a102e0c65b9133c6aeafc049e33755955f959d6e6046f5601dd192af881424355a2b373ed2e7a5a16b74236698aef5372e09584b10fe28d1b7bc21
|
7
|
+
data.tar.gz: adc58efa3b46d9fc1a87ddb2a4df32472507d61f21a3a0eb07026068cc5e41af166fb0a0f8ae23f1b23aec649b22835a50edbed79d35255e8cc231b82b31eb8c
|
data/.github/workflows/build.yml
CHANGED
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,11 @@
|
|
1
|
+
# 0.22.2
|
2
|
+
- Add classifier and regressor classes for stacking method.
|
3
|
+
- [StackingClassifier](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/StackingClassifier.html)
|
4
|
+
- [StackingRegressor](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/StackingRegressor.html)
|
5
|
+
- Refactor some codes with Rubocop.
|
6
|
+
|
1
7
|
# 0.22.1
|
2
|
-
- Add transfomer class for MLKR, that implements Metric Learning for Kernel Regression.
|
8
|
+
- Add transfomer class for [MLKR](https://yoshoku.github.io/rumale/doc/Rumale/MetricLearning/MLKR.html), that implements Metric Learning for Kernel Regression.
|
3
9
|
- Refactor NeighbourhoodComponentAnalysis.
|
4
10
|
- Update API documentation.
|
5
11
|
|
data/Gemfile
CHANGED
@@ -9,7 +9,8 @@ gem 'parallel', '>= 1.17.0'
|
|
9
9
|
gem 'rake', '~> 12.0'
|
10
10
|
gem 'rake-compiler', '~> 1.0'
|
11
11
|
gem 'rspec', '~> 3.0'
|
12
|
-
gem 'rubocop', '~> 0
|
12
|
+
gem 'rubocop', '~> 1.0'
|
13
13
|
gem 'rubocop-performance', '~> 1.8'
|
14
|
-
gem 'rubocop-
|
14
|
+
gem 'rubocop-rake', '~> 0.5'
|
15
|
+
gem 'rubocop-rspec', '~> 2.0'
|
15
16
|
gem 'simplecov', '~> 0.19'
|
data/README.md
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
[![Build Status](https://github.com/yoshoku/rumale/workflows/build/badge.svg)](https://github.com/yoshoku/rumale/actions?query=workflow%3Abuild)
|
6
6
|
[![Gem Version](https://badge.fury.io/rb/rumale.svg)](https://badge.fury.io/rb/rumale)
|
7
|
-
[![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/
|
7
|
+
[![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/main/LICENSE.txt)
|
8
8
|
[![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/rumale/doc/)
|
9
9
|
|
10
10
|
Rumale (**Ru**by **ma**chine **le**arning) is a machine learning library in Ruby.
|
@@ -244,4 +244,4 @@ The gem is available as open source under the terms of the [BSD 2-clause License
|
|
244
244
|
## Code of Conduct
|
245
245
|
|
246
246
|
Everyone interacting in the Rumale project’s codebases, issue trackers,
|
247
|
-
chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/yoshoku/Rumale/blob/
|
247
|
+
chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/yoshoku/Rumale/blob/main/CODE_OF_CONDUCT.md).
|
data/lib/rumale.rb
CHANGED
@@ -59,6 +59,8 @@ require 'rumale/ensemble/random_forest_classifier'
|
|
59
59
|
require 'rumale/ensemble/random_forest_regressor'
|
60
60
|
require 'rumale/ensemble/extra_trees_classifier'
|
61
61
|
require 'rumale/ensemble/extra_trees_regressor'
|
62
|
+
require 'rumale/ensemble/stacking_classifier'
|
63
|
+
require 'rumale/ensemble/stacking_regressor'
|
62
64
|
require 'rumale/clustering/k_means'
|
63
65
|
require 'rumale/clustering/mini_batch_k_means'
|
64
66
|
require 'rumale/clustering/k_medoids'
|
@@ -59,7 +59,7 @@ module Rumale
|
|
59
59
|
@params[:solver] = if solver == 'auto'
|
60
60
|
load_linalg? ? 'evd' : 'fpt'
|
61
61
|
else
|
62
|
-
solver != 'evd' ? 'fpt' : 'evd'
|
62
|
+
solver != 'evd' ? 'fpt' : 'evd' # rubocop:disable Style/NegatedIfElseCondition
|
63
63
|
end
|
64
64
|
@params[:n_components] = n_components
|
65
65
|
@params[:max_iter] = max_iter
|
@@ -0,0 +1,214 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/classifier'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module Ensemble
|
8
|
+
# StackingClassifier is a class that implements classifier with stacking method.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# estimators = {
|
12
|
+
# lgr: Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-2, random_seed: 1),
|
13
|
+
# mlp: Rumele::NeuralNetwork::MLPClassifier.new(hidden_units: [256], random_seed: 1),
|
14
|
+
# rnd: Rumale::Ensemble::RandomForestClassifier.new(random_seed: 1)
|
15
|
+
# }
|
16
|
+
# meta_estimator = Rumale::LinearModel::LogisticRegression.new(random_seed: 1)
|
17
|
+
# classifier = Rumale::Ensemble::StackedClassifier.new(
|
18
|
+
# estimators: estimators, meta_estimator: meta_estimator, random_seed: 1
|
19
|
+
# )
|
20
|
+
# classifier.fit(training_samples, traininig_labels)
|
21
|
+
# results = classifier.predict(testing_samples)
|
22
|
+
#
|
23
|
+
# *Reference*
|
24
|
+
# - Zhou, Z-H., "Ensemble Mehotds - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
|
25
|
+
class StackingClassifier
|
26
|
+
include Base::BaseEstimator
|
27
|
+
include Base::Classifier
|
28
|
+
|
29
|
+
# Return the base classifiers.
|
30
|
+
# @return [Hash<Symbol,Classifier>]
|
31
|
+
attr_reader :estimators
|
32
|
+
|
33
|
+
# Return the meta classifier.
|
34
|
+
# @return [Classifier]
|
35
|
+
attr_reader :meta_estimator
|
36
|
+
|
37
|
+
# Return the class labels.
|
38
|
+
# @return [Numo::Int32] (size: n_classes)
|
39
|
+
attr_reader :classes
|
40
|
+
|
41
|
+
# Return the method used by each base classifier.
|
42
|
+
# @return [Hash<Symbol,Symbol>]
|
43
|
+
attr_reader :stack_method
|
44
|
+
|
45
|
+
# Create a new classifier with stacking method.
|
46
|
+
#
|
47
|
+
# @param estimators [Hash<Symbol,Classifier>] The base classifiers for extracting meta features.
|
48
|
+
# @param meta_estimator [Classifier/Nil] The meta classifier that predicts class label.
|
49
|
+
# If nil is given, LogisticRegression is used.
|
50
|
+
# @param n_splits [Integer] The number of folds for cross validation with stratified k-fold on meta feature extraction in training phase.
|
51
|
+
# @param shuffle [Boolean] The flag indicating whether to shuffle the dataset on cross validation.
|
52
|
+
# @param stack_method [String] The method name of base classifier for using meta feature extraction.
|
53
|
+
# If 'auto' is given, it searches the callable method in the order 'predict_proba', 'decision_function', and 'predict'
|
54
|
+
# on each classifier.
|
55
|
+
# @param passthrough [Boolean] The flag indicating whether to concatenate the original features and meta features when training the meta classifier.
|
56
|
+
# @param random_seed [Integer/Nil] The seed value using to initialize the random generator on cross validation.
|
57
|
+
def initialize(estimators:, meta_estimator: nil, n_splits: 5, shuffle: true, stack_method: 'auto', passthrough: false, random_seed: nil)
|
58
|
+
check_params_type(Hash, estimators: estimators)
|
59
|
+
check_params_numeric(n_splits: n_splits)
|
60
|
+
check_params_string(stack_method: stack_method)
|
61
|
+
check_params_boolean(shuffle: shuffle, passthrough: passthrough)
|
62
|
+
check_params_numeric_or_nil(random_seed: random_seed)
|
63
|
+
@estimators = estimators
|
64
|
+
@meta_estimator = meta_estimator || Rumale::LinearModel::LogisticRegression.new
|
65
|
+
@classes = nil
|
66
|
+
@stack_method = nil
|
67
|
+
@output_size = nil
|
68
|
+
@params = {}
|
69
|
+
@params[:n_splits] = n_splits
|
70
|
+
@params[:shuffle] = shuffle
|
71
|
+
@params[:stack_method] = stack_method
|
72
|
+
@params[:passthrough] = passthrough
|
73
|
+
@params[:random_seed] = random_seed || srand
|
74
|
+
end
|
75
|
+
|
76
|
+
# Fit the model with given training data.
|
77
|
+
#
|
78
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
79
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
80
|
+
# @return [StackedClassifier] The learned classifier itself.
|
81
|
+
def fit(x, y)
|
82
|
+
x = check_convert_sample_array(x)
|
83
|
+
y = check_convert_label_array(y)
|
84
|
+
check_sample_label_size(x, y)
|
85
|
+
|
86
|
+
n_samples, n_features = x.shape
|
87
|
+
|
88
|
+
@encoder = Rumale::Preprocessing::LabelEncoder.new
|
89
|
+
y_encoded = @encoder.fit_transform(y)
|
90
|
+
@classes = Numo::NArray[*@encoder.classes]
|
91
|
+
|
92
|
+
# training base classifiers with all training data.
|
93
|
+
@estimators.each_key { |name| @estimators[name].fit(x, y_encoded) }
|
94
|
+
|
95
|
+
# detecting feature extraction method and its size of output for each base classifier.
|
96
|
+
@stack_method = detect_stack_method
|
97
|
+
@output_size = detect_output_size(n_features)
|
98
|
+
|
99
|
+
# extracting meta features with base classifiers.
|
100
|
+
n_components = @output_size.values.inject(:+)
|
101
|
+
z = Numo::DFloat.zeros(n_samples, n_components)
|
102
|
+
|
103
|
+
kf = Rumale::ModelSelection::StratifiedKFold.new(
|
104
|
+
n_splits: @params[:n_splits], shuffle: @params[:shuffle], random_seed: @params[:random_seed]
|
105
|
+
)
|
106
|
+
|
107
|
+
kf.split(x, y_encoded).each do |train_ids, valid_ids|
|
108
|
+
x_train = x[train_ids, true]
|
109
|
+
y_train = y_encoded[train_ids]
|
110
|
+
x_valid = x[valid_ids, true]
|
111
|
+
f_start = 0
|
112
|
+
@estimators.each_key do |name|
|
113
|
+
est_fold = Marshal.load(Marshal.dump(@estimators[name]))
|
114
|
+
f_last = f_start + @output_size[name]
|
115
|
+
f_position = @output_size[name] == 1 ? f_start : f_start...f_last
|
116
|
+
z[valid_ids, f_position] = est_fold.fit(x_train, y_train).public_send(@stack_method[name], x_valid)
|
117
|
+
f_start = f_last
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
# concatenating original features.
|
122
|
+
z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
|
123
|
+
|
124
|
+
# training meta classifier.
|
125
|
+
@meta_estimator.fit(z, y_encoded)
|
126
|
+
|
127
|
+
self
|
128
|
+
end
|
129
|
+
|
130
|
+
# Calculate confidence scores for samples.
|
131
|
+
#
|
132
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
133
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
|
134
|
+
def decision_function(x)
|
135
|
+
x = check_convert_sample_array(x)
|
136
|
+
z = transform(x)
|
137
|
+
@meta_estimator.decision_function(z)
|
138
|
+
end
|
139
|
+
|
140
|
+
# Predict class labels for samples.
|
141
|
+
#
|
142
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
143
|
+
# @return [Numo::Int32] (shape: [n_samples]) The predicted class label per sample.
|
144
|
+
def predict(x)
|
145
|
+
x = check_convert_sample_array(x)
|
146
|
+
z = transform(x)
|
147
|
+
Numo::Int32.cast(@encoder.inverse_transform(@meta_estimator.predict(z)))
|
148
|
+
end
|
149
|
+
|
150
|
+
# Predict probability for samples.
|
151
|
+
#
|
152
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
153
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) The predicted probability of each class per sample.
|
154
|
+
def predict_proba(x)
|
155
|
+
x = check_convert_sample_array(x)
|
156
|
+
z = transform(x)
|
157
|
+
@meta_estimator.predict_proba(z)
|
158
|
+
end
|
159
|
+
|
160
|
+
# Transform the given data with the learned model.
|
161
|
+
#
|
162
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed with the learned model.
|
163
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for samples.
|
164
|
+
def transform(x)
|
165
|
+
x = check_convert_sample_array(x)
|
166
|
+
n_samples = x.shape[0]
|
167
|
+
n_components = @output_size.values.inject(:+)
|
168
|
+
z = Numo::DFloat.zeros(n_samples, n_components)
|
169
|
+
f_start = 0
|
170
|
+
@estimators.each_key do |name|
|
171
|
+
f_last = f_start + @output_size[name]
|
172
|
+
f_position = @output_size[name] == 1 ? f_start : f_start...f_last
|
173
|
+
z[true, f_position] = @estimators[name].public_send(@stack_method[name], x)
|
174
|
+
f_start = f_last
|
175
|
+
end
|
176
|
+
z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
|
177
|
+
z
|
178
|
+
end
|
179
|
+
|
180
|
+
# Fit the model with training data, and then transform them with the learned model.
|
181
|
+
#
|
182
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
183
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
184
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for training data.
|
185
|
+
def fit_transform(x, y)
|
186
|
+
x = check_convert_sample_array(x)
|
187
|
+
y = check_convert_label_array(y)
|
188
|
+
fit(x, y).transform(x)
|
189
|
+
end
|
190
|
+
|
191
|
+
private
|
192
|
+
|
193
|
+
STACK_METHODS = %i[predict_proba decision_function predict].freeze
|
194
|
+
|
195
|
+
private_constant :STACK_METHODS
|
196
|
+
|
197
|
+
def detect_stack_method
|
198
|
+
if @params[:stack_method] == 'auto'
|
199
|
+
@estimators.each_key.with_object({}) { |name, obj| obj[name] = STACK_METHODS.detect { |m| @estimators[name].respond_to?(m) } }
|
200
|
+
else
|
201
|
+
@estimators.each_key.with_object({}) { |name, obj| obj[name] = @params[:stack_method].to_sym }
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
def detect_output_size(n_features)
|
206
|
+
x_dummy = Numo::DFloat.new(2, n_features).rand
|
207
|
+
@estimators.each_key.with_object({}) do |name, obj|
|
208
|
+
output_dummy = @estimators[name].public_send(@stack_method[name], x_dummy)
|
209
|
+
obj[name] = output_dummy.ndim == 1 ? 1 : output_dummy.shape[1]
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
214
|
+
end
|
@@ -0,0 +1,163 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/regressor'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module Ensemble
|
8
|
+
# StackingRegressor is a class that implements regressor with stacking method.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# estimators = {
|
12
|
+
# las: Rumale::LinearModel::Lasso.new(reg_param: 1e-2, random_seed: 1),
|
13
|
+
# mlp: Rumele::NeuralNetwork::MLPRegressor.new(hidden_units: [256], random_seed: 1),
|
14
|
+
# rnd: Rumale::Ensemble::RandomForestRegressor.new(random_seed: 1)
|
15
|
+
# }
|
16
|
+
# meta_estimator = Rumale::LinearModel::Ridge.new(random_seed: 1)
|
17
|
+
# regressor = Rumale::Ensemble::StackedRegressor.new(
|
18
|
+
# estimators: estimators, meta_estimator: meta_estimator, random_seed: 1
|
19
|
+
# )
|
20
|
+
# regressor.fit(training_samples, traininig_values)
|
21
|
+
# results = regressor.predict(testing_samples)
|
22
|
+
#
|
23
|
+
# *Reference*
|
24
|
+
# - Zhou, Z-H., "Ensemble Mehotds - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
|
25
|
+
class StackingRegressor
|
26
|
+
include Base::BaseEstimator
|
27
|
+
include Base::Regressor
|
28
|
+
|
29
|
+
# Return the base regressors.
|
30
|
+
# @return [Hash<Symbol,Regressor>]
|
31
|
+
attr_reader :estimators
|
32
|
+
|
33
|
+
# Return the meta regressor.
|
34
|
+
# @return [Regressor]
|
35
|
+
attr_reader :meta_estimator
|
36
|
+
|
37
|
+
# Create a new regressor with stacking method.
|
38
|
+
#
|
39
|
+
# @param estimators [Hash<Symbol,Regressor>] The base regressors for extracting meta features.
|
40
|
+
# @param meta_estimator [Regressor/Nil] The meta regressor that predicts values.
|
41
|
+
# If nil is given, Ridge is used.
|
42
|
+
# @param n_splits [Integer] The number of folds for cross validation with k-fold on meta feature extraction in training phase.
|
43
|
+
# @param shuffle [Boolean] The flag indicating whether to shuffle the dataset on cross validation.
|
44
|
+
# @param passthrough [Boolean] The flag indicating whether to concatenate the original features and meta features when training the meta regressor.
|
45
|
+
# @param random_seed [Integer/Nil] The seed value using to initialize the random generator on cross validation.
|
46
|
+
def initialize(estimators:, meta_estimator: nil, n_splits: 5, shuffle: true, passthrough: false, random_seed: nil)
|
47
|
+
check_params_type(Hash, estimators: estimators)
|
48
|
+
check_params_numeric(n_splits: n_splits)
|
49
|
+
check_params_boolean(shuffle: shuffle, passthrough: passthrough)
|
50
|
+
check_params_numeric_or_nil(random_seed: random_seed)
|
51
|
+
@estimators = estimators
|
52
|
+
@meta_estimator = meta_estimator || Rumale::LinearModel::Ridge.new
|
53
|
+
@output_size = nil
|
54
|
+
@params = {}
|
55
|
+
@params[:n_splits] = n_splits
|
56
|
+
@params[:shuffle] = shuffle
|
57
|
+
@params[:passthrough] = passthrough
|
58
|
+
@params[:random_seed] = random_seed || srand
|
59
|
+
end
|
60
|
+
|
61
|
+
# Fit the model with given training data.
|
62
|
+
#
|
63
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
64
|
+
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target variables to be used for fitting the model.
|
65
|
+
# @return [StackedRegressor] The learned regressor itself.
|
66
|
+
def fit(x, y)
|
67
|
+
x = check_convert_sample_array(x)
|
68
|
+
y = check_convert_tvalue_array(y)
|
69
|
+
check_sample_tvalue_size(x, y)
|
70
|
+
|
71
|
+
n_samples, n_features = x.shape
|
72
|
+
n_outputs = y.ndim == 1 ? 1 : y.shape[1]
|
73
|
+
|
74
|
+
# training base regressors with all training data.
|
75
|
+
@estimators.each_key { |name| @estimators[name].fit(x, y) }
|
76
|
+
|
77
|
+
# detecting size of output for each base regressor.
|
78
|
+
@output_size = detect_output_size(n_features)
|
79
|
+
|
80
|
+
# extracting meta features with base regressors.
|
81
|
+
n_components = @output_size.values.inject(:+)
|
82
|
+
z = Numo::DFloat.zeros(n_samples, n_components)
|
83
|
+
|
84
|
+
kf = Rumale::ModelSelection::KFold.new(
|
85
|
+
n_splits: @params[:n_splits], shuffle: @params[:shuffle], random_seed: @params[:random_seed]
|
86
|
+
)
|
87
|
+
|
88
|
+
kf.split(x, y).each do |train_ids, valid_ids|
|
89
|
+
x_train = x[train_ids, true]
|
90
|
+
y_train = n_outputs == 1 ? y[train_ids] : y[train_ids, true]
|
91
|
+
x_valid = x[valid_ids, true]
|
92
|
+
f_start = 0
|
93
|
+
@estimators.each_key do |name|
|
94
|
+
est_fold = Marshal.load(Marshal.dump(@estimators[name]))
|
95
|
+
f_last = f_start + @output_size[name]
|
96
|
+
f_position = @output_size[name] == 1 ? f_start : f_start...f_last
|
97
|
+
z[valid_ids, f_position] = est_fold.fit(x_train, y_train).predict(x_valid)
|
98
|
+
f_start = f_last
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# concatenating original features.
|
103
|
+
z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
|
104
|
+
|
105
|
+
# training meta regressor.
|
106
|
+
@meta_estimator.fit(z, y)
|
107
|
+
|
108
|
+
self
|
109
|
+
end
|
110
|
+
|
111
|
+
# Predict values for samples.
|
112
|
+
#
|
113
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
114
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) The predicted values per sample.
|
115
|
+
def predict(x)
|
116
|
+
x = check_convert_sample_array(x)
|
117
|
+
z = transform(x)
|
118
|
+
@meta_estimator.predict(z)
|
119
|
+
end
|
120
|
+
|
121
|
+
# Transform the given data with the learned model.
|
122
|
+
#
|
123
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed with the learned model.
|
124
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for samples.
|
125
|
+
def transform(x)
|
126
|
+
x = check_convert_sample_array(x)
|
127
|
+
n_samples = x.shape[0]
|
128
|
+
n_components = @output_size.values.inject(:+)
|
129
|
+
z = Numo::DFloat.zeros(n_samples, n_components)
|
130
|
+
f_start = 0
|
131
|
+
@estimators.each_key do |name|
|
132
|
+
f_last = f_start + @output_size[name]
|
133
|
+
f_position = @output_size[name] == 1 ? f_start : f_start...f_last
|
134
|
+
z[true, f_position] = @estimators[name].predict(x)
|
135
|
+
f_start = f_last
|
136
|
+
end
|
137
|
+
z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
|
138
|
+
z
|
139
|
+
end
|
140
|
+
|
141
|
+
# Fit the model with training data, and then transform them with the learned model.
|
142
|
+
#
|
143
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
144
|
+
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target variables to be used for fitting the model.
|
145
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for training data.
|
146
|
+
def fit_transform(x, y)
|
147
|
+
x = check_convert_sample_array(x)
|
148
|
+
y = check_convert_tvalue_array(y)
|
149
|
+
fit(x, y).transform(x)
|
150
|
+
end
|
151
|
+
|
152
|
+
private
|
153
|
+
|
154
|
+
def detect_output_size(n_features)
|
155
|
+
x_dummy = Numo::DFloat.new(2, n_features).rand
|
156
|
+
@estimators.each_key.with_object({}) do |name, obj|
|
157
|
+
output_dummy = @estimators[name].predict(x_dummy)
|
158
|
+
obj[name] = output_dummy.ndim == 1 ? 1 : output_dummy.shape[1]
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
@@ -67,7 +67,7 @@ module Rumale
|
|
67
67
|
def transform(x)
|
68
68
|
raise 'FeatureHasher#transform requires Mmh3 but that is not loaded.' unless enable_mmh3?
|
69
69
|
|
70
|
-
x = [x] unless x.is_a?(Array)
|
70
|
+
x = [x] unless x.is_a?(Array)
|
71
71
|
n_samples = x.size
|
72
72
|
|
73
73
|
z = Numo::DFloat.zeros(n_samples, n_features)
|
@@ -99,7 +99,7 @@ module Rumale
|
|
99
99
|
# @param x [Array<Hash>] (shape: [n_samples]) The array of hash consisting of feature names and values.
|
100
100
|
# @return [Numo::DFloat] (shape: [n_samples, n_features]) The encoded sample array.
|
101
101
|
def transform(x)
|
102
|
-
x = [x] unless x.is_a?(Array)
|
102
|
+
x = [x] unless x.is_a?(Array)
|
103
103
|
n_samples = x.size
|
104
104
|
n_features = @vocabulary.size
|
105
105
|
z = Numo::DFloat.zeros(n_samples, n_features)
|
@@ -82,7 +82,7 @@ module Rumale
|
|
82
82
|
@params[:solver] = if solver == 'auto'
|
83
83
|
load_linalg? ? 'svd' : 'sgd'
|
84
84
|
else
|
85
|
-
solver != 'svd' ? 'sgd' : 'svd'
|
85
|
+
solver != 'svd' ? 'sgd' : 'svd' # rubocop:disable Style/NegatedIfElseCondition
|
86
86
|
end
|
87
87
|
@params[:decay] ||= @params[:learning_rate]
|
88
88
|
@params[:random_seed] ||= srand
|
@@ -181,7 +181,7 @@ module Rumale
|
|
181
181
|
@classes.size > 2
|
182
182
|
end
|
183
183
|
|
184
|
-
def fit_lbfgs(base_x, base_y)
|
184
|
+
def fit_lbfgs(base_x, base_y) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
185
185
|
if multiclass_problem?
|
186
186
|
fnc = proc do |w, x, y, a|
|
187
187
|
n_features = x.shape[1]
|
@@ -85,7 +85,7 @@ module Rumale
|
|
85
85
|
@params[:solver] = if solver == 'auto'
|
86
86
|
load_linalg? ? 'svd' : 'sgd'
|
87
87
|
else
|
88
|
-
solver != 'svd' ? 'sgd' : 'svd'
|
88
|
+
solver != 'svd' ? 'sgd' : 'svd' # rubocop:disable Style/NegatedIfElseCondition
|
89
89
|
end
|
90
90
|
@params[:decay] ||= @params[:reg_param] * @params[:learning_rate]
|
91
91
|
@params[:random_seed] ||= srand
|
@@ -123,7 +123,7 @@ module Rumale
|
|
123
123
|
# @param gamma [Float] The parameter of polynomial kernel, if nil it is 1 / n_features.
|
124
124
|
# @param coef [Integer] The parameter of polynomial kernel.
|
125
125
|
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
126
|
-
def polynomial_kernel(x, y = nil, degree = 3, gamma = nil, coef = 1)
|
126
|
+
def polynomial_kernel(x, y = nil, degree = 3, gamma = nil, coef = 1) # rubocop:disable Metrics/ParameterLists
|
127
127
|
y = x if y.nil?
|
128
128
|
gamma ||= 1.0 / x.shape[1]
|
129
129
|
x = Rumale::Validation.check_convert_sample_array(x)
|
data/lib/rumale/validation.rb
CHANGED
@@ -109,7 +109,7 @@ module Rumale
|
|
109
109
|
|
110
110
|
# @!visibility private
|
111
111
|
def check_params_positive(params = {})
|
112
|
-
params.
|
112
|
+
params.compact.each { |k, v| raise ArgumentError, "Expect #{k} to be positive value" if v.negative? }
|
113
113
|
nil
|
114
114
|
end
|
115
115
|
end
|
data/lib/rumale/version.rb
CHANGED
data/rumale.gemspec
CHANGED
@@ -38,7 +38,7 @@ Gem::Specification.new do |spec|
|
|
38
38
|
|
39
39
|
spec.metadata = {
|
40
40
|
'homepage_uri' => 'https://github.com/yoshoku/rumale',
|
41
|
-
'changelog_uri' => 'https://github.com/yoshoku/rumale/blob/
|
41
|
+
'changelog_uri' => 'https://github.com/yoshoku/rumale/blob/main/CHANGELOG.md',
|
42
42
|
'source_code_uri' => 'https://github.com/yoshoku/rumale',
|
43
43
|
'documentation_uri' => 'https://yoshoku.github.io/rumale/doc/',
|
44
44
|
'bug_tracker_uri' => 'https://github.com/yoshoku/rumale/issues'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.22.
|
4
|
+
version: 0.22.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-01-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -104,6 +104,8 @@ files:
|
|
104
104
|
- lib/rumale/ensemble/gradient_boosting_regressor.rb
|
105
105
|
- lib/rumale/ensemble/random_forest_classifier.rb
|
106
106
|
- lib/rumale/ensemble/random_forest_regressor.rb
|
107
|
+
- lib/rumale/ensemble/stacking_classifier.rb
|
108
|
+
- lib/rumale/ensemble/stacking_regressor.rb
|
107
109
|
- lib/rumale/evaluation_measure/accuracy.rb
|
108
110
|
- lib/rumale/evaluation_measure/adjusted_rand_score.rb
|
109
111
|
- lib/rumale/evaluation_measure/calinski_harabasz_score.rb
|
@@ -205,11 +207,11 @@ licenses:
|
|
205
207
|
- BSD-2-Clause
|
206
208
|
metadata:
|
207
209
|
homepage_uri: https://github.com/yoshoku/rumale
|
208
|
-
changelog_uri: https://github.com/yoshoku/rumale/blob/
|
210
|
+
changelog_uri: https://github.com/yoshoku/rumale/blob/main/CHANGELOG.md
|
209
211
|
source_code_uri: https://github.com/yoshoku/rumale
|
210
212
|
documentation_uri: https://yoshoku.github.io/rumale/doc/
|
211
213
|
bug_tracker_uri: https://github.com/yoshoku/rumale/issues
|
212
|
-
post_install_message:
|
214
|
+
post_install_message:
|
213
215
|
rdoc_options: []
|
214
216
|
require_paths:
|
215
217
|
- lib
|
@@ -225,7 +227,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
225
227
|
version: '0'
|
226
228
|
requirements: []
|
227
229
|
rubygems_version: 3.1.4
|
228
|
-
signing_key:
|
230
|
+
signing_key:
|
229
231
|
specification_version: 4
|
230
232
|
summary: Rumale is a machine learning library in Ruby. Rumale provides machine learning
|
231
233
|
algorithms with interfaces similar to Scikit-Learn in Python.
|