rumale 0.22.2 → 0.23.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.clang-format +149 -0
- data/.coveralls.yml +1 -0
- data/.github/workflows/build.yml +5 -2
- data/.github/workflows/coverage.yml +30 -0
- data/.gitignore +1 -0
- data/CHANGELOG.md +38 -0
- data/Gemfile +3 -2
- data/LICENSE.txt +1 -1
- data/README.md +45 -8
- data/Rakefile +2 -1
- data/ext/rumale/extconf.rb +1 -1
- data/ext/rumale/{rumale.c → rumaleext.c} +2 -3
- data/ext/rumale/{rumale.h → rumaleext.h} +1 -1
- data/ext/rumale/tree.c +76 -96
- data/ext/rumale/tree.h +2 -0
- data/lib/rumale.rb +6 -1
- data/lib/rumale/base/base_estimator.rb +5 -3
- data/lib/rumale/dataset.rb +7 -3
- data/lib/rumale/decomposition/fast_ica.rb +1 -1
- data/lib/rumale/ensemble/gradient_boosting_classifier.rb +2 -2
- data/lib/rumale/ensemble/gradient_boosting_regressor.rb +1 -1
- data/lib/rumale/ensemble/random_forest_classifier.rb +1 -1
- data/lib/rumale/ensemble/random_forest_regressor.rb +1 -1
- data/lib/rumale/ensemble/stacking_classifier.rb +5 -4
- data/lib/rumale/ensemble/stacking_regressor.rb +3 -3
- data/lib/rumale/ensemble/voting_classifier.rb +126 -0
- data/lib/rumale/ensemble/voting_regressor.rb +82 -0
- data/lib/rumale/kernel_approximation/nystroem.rb +30 -10
- data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +92 -0
- data/lib/rumale/kernel_machine/kernel_svc.rb +1 -1
- data/lib/rumale/linear_model/elastic_net.rb +1 -1
- data/lib/rumale/linear_model/lasso.rb +1 -1
- data/lib/rumale/linear_model/linear_regression.rb +66 -35
- data/lib/rumale/linear_model/nnls.rb +137 -0
- data/lib/rumale/linear_model/ridge.rb +71 -34
- data/lib/rumale/model_selection/grid_search_cv.rb +3 -3
- data/lib/rumale/naive_bayes/bernoulli_nb.rb +1 -1
- data/lib/rumale/naive_bayes/gaussian_nb.rb +1 -1
- data/lib/rumale/naive_bayes/multinomial_nb.rb +1 -1
- data/lib/rumale/preprocessing/kernel_calculator.rb +92 -0
- data/lib/rumale/tree/base_decision_tree.rb +15 -10
- data/lib/rumale/tree/decision_tree_classifier.rb +14 -11
- data/lib/rumale/tree/decision_tree_regressor.rb +0 -1
- data/lib/rumale/tree/gradient_tree_regressor.rb +15 -11
- data/lib/rumale/validation.rb +12 -0
- data/lib/rumale/version.rb +1 -1
- metadata +13 -6
- data/.travis.yml +0 -17
data/ext/rumale/tree.h
CHANGED
data/lib/rumale.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
require 'numo/narray'
|
4
4
|
|
5
|
-
require 'rumale/
|
5
|
+
require 'rumale/rumaleext'
|
6
6
|
|
7
7
|
require 'rumale/version'
|
8
8
|
require 'rumale/validation'
|
@@ -30,10 +30,12 @@ require 'rumale/linear_model/linear_regression'
|
|
30
30
|
require 'rumale/linear_model/ridge'
|
31
31
|
require 'rumale/linear_model/lasso'
|
32
32
|
require 'rumale/linear_model/elastic_net'
|
33
|
+
require 'rumale/linear_model/nnls'
|
33
34
|
require 'rumale/kernel_machine/kernel_svc'
|
34
35
|
require 'rumale/kernel_machine/kernel_pca'
|
35
36
|
require 'rumale/kernel_machine/kernel_fda'
|
36
37
|
require 'rumale/kernel_machine/kernel_ridge'
|
38
|
+
require 'rumale/kernel_machine/kernel_ridge_classifier'
|
37
39
|
require 'rumale/multiclass/one_vs_rest_classifier'
|
38
40
|
require 'rumale/nearest_neighbors/vp_tree'
|
39
41
|
require 'rumale/nearest_neighbors/k_neighbors_classifier'
|
@@ -61,6 +63,8 @@ require 'rumale/ensemble/extra_trees_classifier'
|
|
61
63
|
require 'rumale/ensemble/extra_trees_regressor'
|
62
64
|
require 'rumale/ensemble/stacking_classifier'
|
63
65
|
require 'rumale/ensemble/stacking_regressor'
|
66
|
+
require 'rumale/ensemble/voting_classifier'
|
67
|
+
require 'rumale/ensemble/voting_regressor'
|
64
68
|
require 'rumale/clustering/k_means'
|
65
69
|
require 'rumale/clustering/mini_batch_k_means'
|
66
70
|
require 'rumale/clustering/k_medoids'
|
@@ -100,6 +104,7 @@ require 'rumale/preprocessing/one_hot_encoder'
|
|
100
104
|
require 'rumale/preprocessing/ordinal_encoder'
|
101
105
|
require 'rumale/preprocessing/binarizer'
|
102
106
|
require 'rumale/preprocessing/polynomial_features'
|
107
|
+
require 'rumale/preprocessing/kernel_calculator'
|
103
108
|
require 'rumale/model_selection/k_fold'
|
104
109
|
require 'rumale/model_selection/group_k_fold'
|
105
110
|
require 'rumale/model_selection/stratified_k_fold'
|
@@ -11,13 +11,15 @@ module Rumale
|
|
11
11
|
|
12
12
|
private
|
13
13
|
|
14
|
-
def enable_linalg?
|
14
|
+
def enable_linalg?(warning: true)
|
15
15
|
if defined?(Numo::Linalg).nil?
|
16
|
-
warn('If you want to use features that depend on Numo::Linalg, you should install and load Numo::Linalg in advance.')
|
16
|
+
warn('If you want to use features that depend on Numo::Linalg, you should install and load Numo::Linalg in advance.') if warning
|
17
17
|
return false
|
18
18
|
end
|
19
19
|
if Numo::Linalg::VERSION < '0.1.4'
|
20
|
-
|
20
|
+
if warning
|
21
|
+
warn('The loaded Numo::Linalg does not implement the methods required by Rumale. Please load Numo::Linalg version 0.1.4 or later.')
|
22
|
+
end
|
21
23
|
return false
|
22
24
|
end
|
23
25
|
true
|
data/lib/rumale/dataset.rb
CHANGED
@@ -12,22 +12,26 @@ module Rumale
|
|
12
12
|
# Load a dataset with the libsvm file format into Numo::NArray.
|
13
13
|
#
|
14
14
|
# @param filename [String] A path to a dataset file.
|
15
|
+
# @param n_features [Integer/Nil] The number of features of data to load.
|
16
|
+
# If nil is given, it will be detected automatically from given file.
|
15
17
|
# @param zero_based [Boolean] Whether the column index starts from 0 (true) or 1 (false).
|
16
18
|
# @param dtype [Numo::NArray] Data type of Numo::NArray for features to be loaded.
|
17
19
|
#
|
18
20
|
# @return [Array<Numo::NArray>]
|
19
21
|
# Returns array containing the (n_samples x n_features) matrix for feature vectors
|
20
22
|
# and (n_samples) vector for labels or target values.
|
21
|
-
def load_libsvm_file(filename, zero_based: false, dtype: Numo::DFloat)
|
23
|
+
def load_libsvm_file(filename, n_features: nil, zero_based: false, dtype: Numo::DFloat)
|
22
24
|
ftvecs = []
|
23
25
|
labels = []
|
24
|
-
|
26
|
+
n_features_detected = 0
|
25
27
|
CSV.foreach(filename, col_sep: "\s", headers: false) do |line|
|
26
28
|
label, ftvec, max_idx = parse_libsvm_line(line, zero_based)
|
27
29
|
labels.push(label)
|
28
30
|
ftvecs.push(ftvec)
|
29
|
-
|
31
|
+
n_features_detected = max_idx if n_features_detected < max_idx
|
30
32
|
end
|
33
|
+
n_features ||= n_features_detected
|
34
|
+
n_features = [n_features, n_features_detected].max
|
31
35
|
[convert_to_matrix(ftvecs, n_features, dtype), Numo::NArray.asarray(labels)]
|
32
36
|
end
|
33
37
|
|
@@ -81,7 +81,7 @@ module Rumale
|
|
81
81
|
wx = @params[:whiten] ? (x - @mean).dot(whiten_mat.transpose) : x
|
82
82
|
unmixing, @n_iter = ica(wx, @params[:fun], @params[:max_iter], @params[:tol], @rng.dup)
|
83
83
|
@components = @params[:whiten] ? unmixing.dot(whiten_mat) : unmixing
|
84
|
-
@mixing = Numo::Linalg.pinv(@components)
|
84
|
+
@mixing = Numo::Linalg.pinv(@components).dup
|
85
85
|
if @params[:n_components] == 1
|
86
86
|
@components = @components.flatten.dup
|
87
87
|
@mixing = @mixing.flatten.dup
|
@@ -161,7 +161,7 @@ module Rumale
|
|
161
161
|
|
162
162
|
proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
|
163
163
|
|
164
|
-
return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
|
164
|
+
return (proba.transpose / proba.sum(axis: 1)).transpose.dup if @classes.size > 2
|
165
165
|
|
166
166
|
n_samples, = x.shape
|
167
167
|
probs = Numo::DFloat.zeros(n_samples, 2)
|
@@ -182,7 +182,7 @@ module Rumale
|
|
182
182
|
else
|
183
183
|
@estimators.map { |tree| tree.apply(x) }
|
184
184
|
end
|
185
|
-
Numo::Int32[*leaf_ids].transpose
|
185
|
+
Numo::Int32[*leaf_ids].transpose.dup
|
186
186
|
end
|
187
187
|
|
188
188
|
private
|
@@ -159,7 +159,7 @@ module Rumale
|
|
159
159
|
# @return [Numo::Int32] (shape: [n_samples, n_estimators]) Leaf index for sample.
|
160
160
|
def apply(x)
|
161
161
|
x = check_convert_sample_array(x)
|
162
|
-
Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose
|
162
|
+
Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose.dup
|
163
163
|
end
|
164
164
|
|
165
165
|
private
|
@@ -136,7 +136,7 @@ module Rumale
|
|
136
136
|
# @return [Numo::Int32] (shape: [n_samples, n_estimators]) Leaf index for sample.
|
137
137
|
def apply(x)
|
138
138
|
x = check_convert_sample_array(x)
|
139
|
-
Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose
|
139
|
+
Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose.dup
|
140
140
|
end
|
141
141
|
|
142
142
|
private
|
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
require 'rumale/base/base_estimator'
|
4
4
|
require 'rumale/base/classifier'
|
5
|
+
require 'rumale/preprocessing/label_encoder'
|
5
6
|
|
6
7
|
module Rumale
|
7
8
|
module Ensemble
|
@@ -10,18 +11,18 @@ module Rumale
|
|
10
11
|
# @example
|
11
12
|
# estimators = {
|
12
13
|
# lgr: Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-2, random_seed: 1),
|
13
|
-
# mlp:
|
14
|
+
# mlp: Rumale::NeuralNetwork::MLPClassifier.new(hidden_units: [256], random_seed: 1),
|
14
15
|
# rnd: Rumale::Ensemble::RandomForestClassifier.new(random_seed: 1)
|
15
16
|
# }
|
16
17
|
# meta_estimator = Rumale::LinearModel::LogisticRegression.new(random_seed: 1)
|
17
18
|
# classifier = Rumale::Ensemble::StackedClassifier.new(
|
18
19
|
# estimators: estimators, meta_estimator: meta_estimator, random_seed: 1
|
19
20
|
# )
|
20
|
-
# classifier.fit(training_samples,
|
21
|
+
# classifier.fit(training_samples, training_labels)
|
21
22
|
# results = classifier.predict(testing_samples)
|
22
23
|
#
|
23
24
|
# *Reference*
|
24
|
-
# - Zhou, Z-H., "Ensemble
|
25
|
+
# - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
|
25
26
|
class StackingClassifier
|
26
27
|
include Base::BaseEstimator
|
27
28
|
include Base::Classifier
|
@@ -149,7 +150,7 @@ module Rumale
|
|
149
150
|
|
150
151
|
# Predict probability for samples.
|
151
152
|
#
|
152
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the
|
153
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probabilities.
|
153
154
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) The predicted probability of each class per sample.
|
154
155
|
def predict_proba(x)
|
155
156
|
x = check_convert_sample_array(x)
|
@@ -10,18 +10,18 @@ module Rumale
|
|
10
10
|
# @example
|
11
11
|
# estimators = {
|
12
12
|
# las: Rumale::LinearModel::Lasso.new(reg_param: 1e-2, random_seed: 1),
|
13
|
-
# mlp:
|
13
|
+
# mlp: Rumale::NeuralNetwork::MLPRegressor.new(hidden_units: [256], random_seed: 1),
|
14
14
|
# rnd: Rumale::Ensemble::RandomForestRegressor.new(random_seed: 1)
|
15
15
|
# }
|
16
16
|
# meta_estimator = Rumale::LinearModel::Ridge.new(random_seed: 1)
|
17
17
|
# regressor = Rumale::Ensemble::StackedRegressor.new(
|
18
18
|
# estimators: estimators, meta_estimator: meta_estimator, random_seed: 1
|
19
19
|
# )
|
20
|
-
# regressor.fit(training_samples,
|
20
|
+
# regressor.fit(training_samples, training_values)
|
21
21
|
# results = regressor.predict(testing_samples)
|
22
22
|
#
|
23
23
|
# *Reference*
|
24
|
-
# - Zhou, Z-H., "Ensemble
|
24
|
+
# - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
|
25
25
|
class StackingRegressor
|
26
26
|
include Base::BaseEstimator
|
27
27
|
include Base::Regressor
|
@@ -0,0 +1,126 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/classifier'
|
5
|
+
require 'rumale/preprocessing/label_encoder'
|
6
|
+
|
7
|
+
module Rumale
|
8
|
+
module Ensemble
|
9
|
+
# VotingClassifier is a class that implements classifier with voting ensemble method.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# estimators = {
|
13
|
+
# lgr: Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-2, random_seed: 1),
|
14
|
+
# mlp: Rumale::NeuralNetwork::MLPClassifier.new(hidden_units: [256], random_seed: 1),
|
15
|
+
# rnd: Rumale::Ensemble::RandomForestClassifier.new(random_seed: 1)
|
16
|
+
# }
|
17
|
+
# weights = { lgr: 0.2, mlp: 0.3, rnd: 0.5 }
|
18
|
+
#
|
19
|
+
# classifier = Rumale::Ensemble::VotingClassifier.new(estimators: estimators, weights: weights, voting: 'soft')
|
20
|
+
# classifier.fit(x_train, y_train)
|
21
|
+
# results = classifier.predict(x_test)
|
22
|
+
#
|
23
|
+
# *Reference*
|
24
|
+
# - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
|
25
|
+
class VotingClassifier
|
26
|
+
include Base::BaseEstimator
|
27
|
+
include Base::Classifier
|
28
|
+
|
29
|
+
# Return the sub-classifiers that voted.
|
30
|
+
# @return [Hash<Symbol,Classifier>]
|
31
|
+
attr_reader :estimators
|
32
|
+
|
33
|
+
# Return the class labels.
|
34
|
+
# @return [Numo::Int32] (size: n_classes)
|
35
|
+
attr_reader :classes
|
36
|
+
|
37
|
+
# Create a new ensembled classifier with voting rule.
|
38
|
+
#
|
39
|
+
# @param estimators [Hash<Symbol,Classifier>] The sub-classifiers to vote.
|
40
|
+
# @param weights [Hash<Symbol,Float>] The weight value for each classifier.
|
41
|
+
# @param voting [String] The voting rule for the predicted results of each classifier.
|
42
|
+
# If 'hard' is given, the ensembled classifier predicts the class label by majority vote.
|
43
|
+
# If 'soft' is given, the ensembled classifier uses the weighted average of predicted probabilities for the prediction.
|
44
|
+
def initialize(estimators:, weights: nil, voting: 'hard')
|
45
|
+
check_params_type(Hash, estimators: estimators)
|
46
|
+
check_params_type_or_nil(Hash, weights: weights)
|
47
|
+
check_params_string(voting: voting)
|
48
|
+
@estimators = estimators
|
49
|
+
@classes = nil
|
50
|
+
@params = {}
|
51
|
+
@params[:weights] = weights || estimators.each_key.with_object({}) { |name, w| w[name] = 1.0 }
|
52
|
+
@params[:voting] = voting
|
53
|
+
end
|
54
|
+
|
55
|
+
# Fit the model with given training data.
|
56
|
+
#
|
57
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
58
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
59
|
+
# @return [VotingClassifier] The learned classifier itself.
|
60
|
+
def fit(x, y)
|
61
|
+
x = check_convert_sample_array(x)
|
62
|
+
y = check_convert_label_array(y)
|
63
|
+
check_sample_label_size(x, y)
|
64
|
+
|
65
|
+
@encoder = Rumale::Preprocessing::LabelEncoder.new
|
66
|
+
y_encoded = @encoder.fit_transform(y)
|
67
|
+
@classes = Numo::NArray[*@encoder.classes]
|
68
|
+
@estimators.each_key { |name| @estimators[name].fit(x, y_encoded) }
|
69
|
+
|
70
|
+
self
|
71
|
+
end
|
72
|
+
|
73
|
+
# Calculate confidence scores for samples.
|
74
|
+
#
|
75
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
76
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
|
77
|
+
def decision_function(x)
|
78
|
+
x = check_convert_sample_array(x)
|
79
|
+
return predict_proba(x) if soft_voting?
|
80
|
+
|
81
|
+
n_samples = x.shape[0]
|
82
|
+
n_classes = @classes.size
|
83
|
+
z = Numo::DFloat.zeros(n_samples, n_classes)
|
84
|
+
@estimators.each do |name, estimator|
|
85
|
+
estimator.predict(x).to_a.each_with_index { |c, i| z[i, c] += @params[:weights][name] }
|
86
|
+
end
|
87
|
+
z
|
88
|
+
end
|
89
|
+
|
90
|
+
# Predict class labels for samples.
|
91
|
+
#
|
92
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
93
|
+
# @return [Numo::Int32] (shape: [n_samples]) The predicted class label per sample.
|
94
|
+
def predict(x)
|
95
|
+
x = check_convert_sample_array(x)
|
96
|
+
n_samples = x.shape[0]
|
97
|
+
n_classes = @classes.size
|
98
|
+
z = decision_function(x)
|
99
|
+
predicted = z.max_index(axis: 1) - Numo::Int32.new(n_samples).seq * n_classes
|
100
|
+
Numo::Int32.cast(@encoder.inverse_transform(predicted))
|
101
|
+
end
|
102
|
+
|
103
|
+
# Predict probability for samples.
|
104
|
+
#
|
105
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probabilities.
|
106
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
107
|
+
def predict_proba(x)
|
108
|
+
x = check_convert_sample_array(x)
|
109
|
+
n_samples = x.shape[0]
|
110
|
+
n_classes = @classes.size
|
111
|
+
z = Numo::DFloat.zeros(n_samples, n_classes)
|
112
|
+
sum_weight = @params[:weights].each_value.inject(&:+)
|
113
|
+
@estimators.each do |name, estimator|
|
114
|
+
z += @params[:weights][name] * estimator.predict_proba(x)
|
115
|
+
end
|
116
|
+
z /= sum_weight
|
117
|
+
end
|
118
|
+
|
119
|
+
private
|
120
|
+
|
121
|
+
def soft_voting?
|
122
|
+
@params[:voting] == 'soft'
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/regressor'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module Ensemble
|
8
|
+
# VotingRegressor is a class that implements regressor with voting ensemble method.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# estimators = {
|
12
|
+
# rdg: Rumale::LinearModel::Ridge.new(reg_param: 1e-2, random_seed: 1),
|
13
|
+
# mlp: Rumale::NeuralNetwork::MLPRegressor.new(hidden_units: [256], random_seed: 1),
|
14
|
+
# rnd: Rumale::Ensemble::RandomForestRegressor.new(random_seed: 1)
|
15
|
+
# }
|
16
|
+
# weights = { rdg: 0.2, mlp: 0.3, rnd: 0.5 }
|
17
|
+
#
|
18
|
+
# regressor = Rumale::Ensemble::VotingRegressor.new(estimators: estimators, weights: weights, voting: 'soft')
|
19
|
+
# regressor.fit(x_train, y_train)
|
20
|
+
# results = regressor.predict(x_test)
|
21
|
+
#
|
22
|
+
# *Reference*
|
23
|
+
# - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
|
24
|
+
class VotingRegressor
|
25
|
+
include Base::BaseEstimator
|
26
|
+
include Base::Regressor
|
27
|
+
|
28
|
+
# Return the sub-regressors that voted.
|
29
|
+
# @return [Hash<Symbol,Regressor>]
|
30
|
+
attr_reader :estimators
|
31
|
+
|
32
|
+
# Create a new ensembled regressor with voting rule.
|
33
|
+
#
|
34
|
+
# @param estimators [Hash<Symbol,Regressor>] The sub-regressors to vote.
|
35
|
+
# @param weights [Hash<Symbol,Float>] The weight value for each regressor.
|
36
|
+
def initialize(estimators:, weights: nil)
|
37
|
+
check_params_type(Hash, estimators: estimators)
|
38
|
+
check_params_type_or_nil(Hash, weights: weights)
|
39
|
+
@estimators = estimators
|
40
|
+
@n_outputs = nil
|
41
|
+
@params = {}
|
42
|
+
@params[:weights] = weights || estimators.each_key.with_object({}) { |name, w| w[name] = 1.0 }
|
43
|
+
end
|
44
|
+
|
45
|
+
# Fit the model with given training data.
|
46
|
+
#
|
47
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
48
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
49
|
+
# @return [VotingRegressor] The learned regressor itself.
|
50
|
+
def fit(x, y)
|
51
|
+
x = check_convert_sample_array(x)
|
52
|
+
y = check_convert_tvalue_array(y)
|
53
|
+
check_sample_tvalue_size(x, y)
|
54
|
+
|
55
|
+
@n_outputs = y.ndim > 1 ? y.shape[1] : 1
|
56
|
+
@estimators.each_key { |name| @estimators[name].fit(x, y) }
|
57
|
+
|
58
|
+
self
|
59
|
+
end
|
60
|
+
|
61
|
+
# Predict values for samples.
|
62
|
+
#
|
63
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
64
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted value per sample.
|
65
|
+
def predict(x)
|
66
|
+
x = check_convert_sample_array(x)
|
67
|
+
z = single_target? ? Numo::DFloat.zeros(x.shape[0]) : Numo::DFloat.zeros(x.shape[0], @n_outputs)
|
68
|
+
sum_weight = @params[:weights].each_value.inject(&:+)
|
69
|
+
@estimators.each do |name, estimator|
|
70
|
+
z += @params[:weights][name] * estimator.predict(x)
|
71
|
+
end
|
72
|
+
z / sum_weight
|
73
|
+
end
|
74
|
+
|
75
|
+
private
|
76
|
+
|
77
|
+
def single_target?
|
78
|
+
@n_outputs == 1
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -11,7 +11,7 @@ module Rumale
|
|
11
11
|
# @example
|
12
12
|
# require 'numo/linalg/autoloader'
|
13
13
|
#
|
14
|
-
# transformer = Rumale::KernelApproximation::Nystroem.new(gamma: 1, n_components: 128, random_seed: 1)
|
14
|
+
# transformer = Rumale::KernelApproximation::Nystroem.new(kernel: 'rbf', gamma: 1, n_components: 128, random_seed: 1)
|
15
15
|
# new_training_samples = transformer.fit_transform(training_samples)
|
16
16
|
# new_testing_samples = transformer.transform(testing_samples)
|
17
17
|
#
|
@@ -39,12 +39,15 @@ module Rumale
|
|
39
39
|
|
40
40
|
# Create a new transformer for mapping to kernel feature space with Nystrom method.
|
41
41
|
#
|
42
|
-
# @param kernel [String] The type of kernel
|
43
|
-
# @param gamma [Float] The parameter
|
44
|
-
# @param
|
42
|
+
# @param kernel [String] The type of kernel function ('rbf', 'linear', 'poly', and 'sigmoid)
|
43
|
+
# @param gamma [Float] The gamma parameter in rbf/poly/sigmoid kernel function.
|
44
|
+
# @param degree [Integer] The degree parameter in polynomial kernel function.
|
45
|
+
# @param coef [Float] The coefficient in poly/sigmoid kernel function.
|
46
|
+
# @param n_components [Integer] The number of dimensions of the kernel feature space.
|
45
47
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
46
|
-
def initialize(kernel: 'rbf', gamma: 1, n_components: 100, random_seed: nil)
|
47
|
-
|
48
|
+
def initialize(kernel: 'rbf', gamma: 1, degree: 3, coef: 1, n_components: 100, random_seed: nil)
|
49
|
+
check_params_string(kernel: kernel)
|
50
|
+
check_params_numeric(gamma: gamma, coef: coef, degree: degree, n_components: n_components)
|
48
51
|
check_params_numeric_or_nil(random_seed: random_seed)
|
49
52
|
@params = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h
|
50
53
|
@params[:random_seed] ||= srand
|
@@ -56,7 +59,7 @@ module Rumale
|
|
56
59
|
|
57
60
|
# Fit the model with given training data.
|
58
61
|
#
|
59
|
-
# @overload fit(x) ->
|
62
|
+
# @overload fit(x) -> Nystroem
|
60
63
|
# @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
61
64
|
# @return [Nystroem] The learned transformer itself.
|
62
65
|
def fit(x, _y = nil)
|
@@ -70,10 +73,10 @@ module Rumale
|
|
70
73
|
|
71
74
|
# random sampling.
|
72
75
|
@component_indices = Numo::Int32.cast(Array(0...n_samples).shuffle(random: sub_rng)[0...n_components])
|
73
|
-
@components = x[@component_indices, true]
|
76
|
+
@components = x[@component_indices, true].dup
|
74
77
|
|
75
78
|
# calculate normalizing factor.
|
76
|
-
kernel_mat =
|
79
|
+
kernel_mat = kernel_mat(@components)
|
77
80
|
eig_vals, eig_vecs = Numo::Linalg.eigh(kernel_mat)
|
78
81
|
la = eig_vals.class.maximum(eig_vals.reverse, 1e-12)
|
79
82
|
u = eig_vecs.reverse(1)
|
@@ -98,9 +101,26 @@ module Rumale
|
|
98
101
|
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
99
102
|
def transform(x)
|
100
103
|
x = check_convert_sample_array(x)
|
101
|
-
z =
|
104
|
+
z = kernel_mat(x, @components)
|
102
105
|
z.dot(@normalizer)
|
103
106
|
end
|
107
|
+
|
108
|
+
private
|
109
|
+
|
110
|
+
def kernel_mat(x, y = nil)
|
111
|
+
case @params[:kernel]
|
112
|
+
when 'rbf'
|
113
|
+
Rumale::PairwiseMetric.rbf_kernel(x, y, @params[:gamma])
|
114
|
+
when 'poly'
|
115
|
+
Rumale::PairwiseMetric.polynomial_kernel(x, y, @params[:degree], @params[:gamma], @params[:coef])
|
116
|
+
when 'sigmoid'
|
117
|
+
Rumale::PairwiseMetric.sigmoid_kernel(x, y, @params[:gamma], @params[:coef])
|
118
|
+
when 'linear'
|
119
|
+
Rumale::PairwiseMetric.linear_kernel(x, y)
|
120
|
+
else
|
121
|
+
raise ArgumentError, "Expect kernel parameter to be given 'rbf', 'linear', 'poly', or 'sigmoid'."
|
122
|
+
end
|
123
|
+
end
|
104
124
|
end
|
105
125
|
end
|
106
126
|
end
|