rumale 0.22.2 → 0.23.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.clang-format +149 -0
- data/.coveralls.yml +1 -0
- data/.github/workflows/build.yml +5 -2
- data/.github/workflows/coverage.yml +30 -0
- data/.gitignore +1 -0
- data/CHANGELOG.md +38 -0
- data/Gemfile +3 -2
- data/LICENSE.txt +1 -1
- data/README.md +45 -8
- data/Rakefile +2 -1
- data/ext/rumale/extconf.rb +1 -1
- data/ext/rumale/{rumale.c → rumaleext.c} +2 -3
- data/ext/rumale/{rumale.h → rumaleext.h} +1 -1
- data/ext/rumale/tree.c +76 -96
- data/ext/rumale/tree.h +2 -0
- data/lib/rumale.rb +6 -1
- data/lib/rumale/base/base_estimator.rb +5 -3
- data/lib/rumale/dataset.rb +7 -3
- data/lib/rumale/decomposition/fast_ica.rb +1 -1
- data/lib/rumale/ensemble/gradient_boosting_classifier.rb +2 -2
- data/lib/rumale/ensemble/gradient_boosting_regressor.rb +1 -1
- data/lib/rumale/ensemble/random_forest_classifier.rb +1 -1
- data/lib/rumale/ensemble/random_forest_regressor.rb +1 -1
- data/lib/rumale/ensemble/stacking_classifier.rb +5 -4
- data/lib/rumale/ensemble/stacking_regressor.rb +3 -3
- data/lib/rumale/ensemble/voting_classifier.rb +126 -0
- data/lib/rumale/ensemble/voting_regressor.rb +82 -0
- data/lib/rumale/kernel_approximation/nystroem.rb +30 -10
- data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +92 -0
- data/lib/rumale/kernel_machine/kernel_svc.rb +1 -1
- data/lib/rumale/linear_model/elastic_net.rb +1 -1
- data/lib/rumale/linear_model/lasso.rb +1 -1
- data/lib/rumale/linear_model/linear_regression.rb +66 -35
- data/lib/rumale/linear_model/nnls.rb +137 -0
- data/lib/rumale/linear_model/ridge.rb +71 -34
- data/lib/rumale/model_selection/grid_search_cv.rb +3 -3
- data/lib/rumale/naive_bayes/bernoulli_nb.rb +1 -1
- data/lib/rumale/naive_bayes/gaussian_nb.rb +1 -1
- data/lib/rumale/naive_bayes/multinomial_nb.rb +1 -1
- data/lib/rumale/preprocessing/kernel_calculator.rb +92 -0
- data/lib/rumale/tree/base_decision_tree.rb +15 -10
- data/lib/rumale/tree/decision_tree_classifier.rb +14 -11
- data/lib/rumale/tree/decision_tree_regressor.rb +0 -1
- data/lib/rumale/tree/gradient_tree_regressor.rb +15 -11
- data/lib/rumale/validation.rb +12 -0
- data/lib/rumale/version.rb +1 -1
- metadata +13 -6
- data/.travis.yml +0 -17
data/ext/rumale/tree.h
CHANGED
data/lib/rumale.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
require 'numo/narray'
|
4
4
|
|
5
|
-
require 'rumale/
|
5
|
+
require 'rumale/rumaleext'
|
6
6
|
|
7
7
|
require 'rumale/version'
|
8
8
|
require 'rumale/validation'
|
@@ -30,10 +30,12 @@ require 'rumale/linear_model/linear_regression'
|
|
30
30
|
require 'rumale/linear_model/ridge'
|
31
31
|
require 'rumale/linear_model/lasso'
|
32
32
|
require 'rumale/linear_model/elastic_net'
|
33
|
+
require 'rumale/linear_model/nnls'
|
33
34
|
require 'rumale/kernel_machine/kernel_svc'
|
34
35
|
require 'rumale/kernel_machine/kernel_pca'
|
35
36
|
require 'rumale/kernel_machine/kernel_fda'
|
36
37
|
require 'rumale/kernel_machine/kernel_ridge'
|
38
|
+
require 'rumale/kernel_machine/kernel_ridge_classifier'
|
37
39
|
require 'rumale/multiclass/one_vs_rest_classifier'
|
38
40
|
require 'rumale/nearest_neighbors/vp_tree'
|
39
41
|
require 'rumale/nearest_neighbors/k_neighbors_classifier'
|
@@ -61,6 +63,8 @@ require 'rumale/ensemble/extra_trees_classifier'
|
|
61
63
|
require 'rumale/ensemble/extra_trees_regressor'
|
62
64
|
require 'rumale/ensemble/stacking_classifier'
|
63
65
|
require 'rumale/ensemble/stacking_regressor'
|
66
|
+
require 'rumale/ensemble/voting_classifier'
|
67
|
+
require 'rumale/ensemble/voting_regressor'
|
64
68
|
require 'rumale/clustering/k_means'
|
65
69
|
require 'rumale/clustering/mini_batch_k_means'
|
66
70
|
require 'rumale/clustering/k_medoids'
|
@@ -100,6 +104,7 @@ require 'rumale/preprocessing/one_hot_encoder'
|
|
100
104
|
require 'rumale/preprocessing/ordinal_encoder'
|
101
105
|
require 'rumale/preprocessing/binarizer'
|
102
106
|
require 'rumale/preprocessing/polynomial_features'
|
107
|
+
require 'rumale/preprocessing/kernel_calculator'
|
103
108
|
require 'rumale/model_selection/k_fold'
|
104
109
|
require 'rumale/model_selection/group_k_fold'
|
105
110
|
require 'rumale/model_selection/stratified_k_fold'
|
@@ -11,13 +11,15 @@ module Rumale
|
|
11
11
|
|
12
12
|
private
|
13
13
|
|
14
|
-
def enable_linalg?
|
14
|
+
def enable_linalg?(warning: true)
|
15
15
|
if defined?(Numo::Linalg).nil?
|
16
|
-
warn('If you want to use features that depend on Numo::Linalg, you should install and load Numo::Linalg in advance.')
|
16
|
+
warn('If you want to use features that depend on Numo::Linalg, you should install and load Numo::Linalg in advance.') if warning
|
17
17
|
return false
|
18
18
|
end
|
19
19
|
if Numo::Linalg::VERSION < '0.1.4'
|
20
|
-
|
20
|
+
if warning
|
21
|
+
warn('The loaded Numo::Linalg does not implement the methods required by Rumale. Please load Numo::Linalg version 0.1.4 or later.')
|
22
|
+
end
|
21
23
|
return false
|
22
24
|
end
|
23
25
|
true
|
data/lib/rumale/dataset.rb
CHANGED
@@ -12,22 +12,26 @@ module Rumale
|
|
12
12
|
# Load a dataset with the libsvm file format into Numo::NArray.
|
13
13
|
#
|
14
14
|
# @param filename [String] A path to a dataset file.
|
15
|
+
# @param n_features [Integer/Nil] The number of features of data to load.
|
16
|
+
# If nil is given, it will be detected automatically from given file.
|
15
17
|
# @param zero_based [Boolean] Whether the column index starts from 0 (true) or 1 (false).
|
16
18
|
# @param dtype [Numo::NArray] Data type of Numo::NArray for features to be loaded.
|
17
19
|
#
|
18
20
|
# @return [Array<Numo::NArray>]
|
19
21
|
# Returns array containing the (n_samples x n_features) matrix for feature vectors
|
20
22
|
# and (n_samples) vector for labels or target values.
|
21
|
-
def load_libsvm_file(filename, zero_based: false, dtype: Numo::DFloat)
|
23
|
+
def load_libsvm_file(filename, n_features: nil, zero_based: false, dtype: Numo::DFloat)
|
22
24
|
ftvecs = []
|
23
25
|
labels = []
|
24
|
-
|
26
|
+
n_features_detected = 0
|
25
27
|
CSV.foreach(filename, col_sep: "\s", headers: false) do |line|
|
26
28
|
label, ftvec, max_idx = parse_libsvm_line(line, zero_based)
|
27
29
|
labels.push(label)
|
28
30
|
ftvecs.push(ftvec)
|
29
|
-
|
31
|
+
n_features_detected = max_idx if n_features_detected < max_idx
|
30
32
|
end
|
33
|
+
n_features ||= n_features_detected
|
34
|
+
n_features = [n_features, n_features_detected].max
|
31
35
|
[convert_to_matrix(ftvecs, n_features, dtype), Numo::NArray.asarray(labels)]
|
32
36
|
end
|
33
37
|
|
@@ -81,7 +81,7 @@ module Rumale
|
|
81
81
|
wx = @params[:whiten] ? (x - @mean).dot(whiten_mat.transpose) : x
|
82
82
|
unmixing, @n_iter = ica(wx, @params[:fun], @params[:max_iter], @params[:tol], @rng.dup)
|
83
83
|
@components = @params[:whiten] ? unmixing.dot(whiten_mat) : unmixing
|
84
|
-
@mixing = Numo::Linalg.pinv(@components)
|
84
|
+
@mixing = Numo::Linalg.pinv(@components).dup
|
85
85
|
if @params[:n_components] == 1
|
86
86
|
@components = @components.flatten.dup
|
87
87
|
@mixing = @mixing.flatten.dup
|
@@ -161,7 +161,7 @@ module Rumale
|
|
161
161
|
|
162
162
|
proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
|
163
163
|
|
164
|
-
return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
|
164
|
+
return (proba.transpose / proba.sum(axis: 1)).transpose.dup if @classes.size > 2
|
165
165
|
|
166
166
|
n_samples, = x.shape
|
167
167
|
probs = Numo::DFloat.zeros(n_samples, 2)
|
@@ -182,7 +182,7 @@ module Rumale
|
|
182
182
|
else
|
183
183
|
@estimators.map { |tree| tree.apply(x) }
|
184
184
|
end
|
185
|
-
Numo::Int32[*leaf_ids].transpose
|
185
|
+
Numo::Int32[*leaf_ids].transpose.dup
|
186
186
|
end
|
187
187
|
|
188
188
|
private
|
@@ -159,7 +159,7 @@ module Rumale
|
|
159
159
|
# @return [Numo::Int32] (shape: [n_samples, n_estimators]) Leaf index for sample.
|
160
160
|
def apply(x)
|
161
161
|
x = check_convert_sample_array(x)
|
162
|
-
Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose
|
162
|
+
Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose.dup
|
163
163
|
end
|
164
164
|
|
165
165
|
private
|
@@ -136,7 +136,7 @@ module Rumale
|
|
136
136
|
# @return [Numo::Int32] (shape: [n_samples, n_estimators]) Leaf index for sample.
|
137
137
|
def apply(x)
|
138
138
|
x = check_convert_sample_array(x)
|
139
|
-
Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose
|
139
|
+
Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose.dup
|
140
140
|
end
|
141
141
|
|
142
142
|
private
|
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
require 'rumale/base/base_estimator'
|
4
4
|
require 'rumale/base/classifier'
|
5
|
+
require 'rumale/preprocessing/label_encoder'
|
5
6
|
|
6
7
|
module Rumale
|
7
8
|
module Ensemble
|
@@ -10,18 +11,18 @@ module Rumale
|
|
10
11
|
# @example
|
11
12
|
# estimators = {
|
12
13
|
# lgr: Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-2, random_seed: 1),
|
13
|
-
# mlp:
|
14
|
+
# mlp: Rumale::NeuralNetwork::MLPClassifier.new(hidden_units: [256], random_seed: 1),
|
14
15
|
# rnd: Rumale::Ensemble::RandomForestClassifier.new(random_seed: 1)
|
15
16
|
# }
|
16
17
|
# meta_estimator = Rumale::LinearModel::LogisticRegression.new(random_seed: 1)
|
17
18
|
# classifier = Rumale::Ensemble::StackedClassifier.new(
|
18
19
|
# estimators: estimators, meta_estimator: meta_estimator, random_seed: 1
|
19
20
|
# )
|
20
|
-
# classifier.fit(training_samples,
|
21
|
+
# classifier.fit(training_samples, training_labels)
|
21
22
|
# results = classifier.predict(testing_samples)
|
22
23
|
#
|
23
24
|
# *Reference*
|
24
|
-
# - Zhou, Z-H., "Ensemble
|
25
|
+
# - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
|
25
26
|
class StackingClassifier
|
26
27
|
include Base::BaseEstimator
|
27
28
|
include Base::Classifier
|
@@ -149,7 +150,7 @@ module Rumale
|
|
149
150
|
|
150
151
|
# Predict probability for samples.
|
151
152
|
#
|
152
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the
|
153
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probabilities.
|
153
154
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) The predicted probability of each class per sample.
|
154
155
|
def predict_proba(x)
|
155
156
|
x = check_convert_sample_array(x)
|
@@ -10,18 +10,18 @@ module Rumale
|
|
10
10
|
# @example
|
11
11
|
# estimators = {
|
12
12
|
# las: Rumale::LinearModel::Lasso.new(reg_param: 1e-2, random_seed: 1),
|
13
|
-
# mlp:
|
13
|
+
# mlp: Rumale::NeuralNetwork::MLPRegressor.new(hidden_units: [256], random_seed: 1),
|
14
14
|
# rnd: Rumale::Ensemble::RandomForestRegressor.new(random_seed: 1)
|
15
15
|
# }
|
16
16
|
# meta_estimator = Rumale::LinearModel::Ridge.new(random_seed: 1)
|
17
17
|
# regressor = Rumale::Ensemble::StackedRegressor.new(
|
18
18
|
# estimators: estimators, meta_estimator: meta_estimator, random_seed: 1
|
19
19
|
# )
|
20
|
-
# regressor.fit(training_samples,
|
20
|
+
# regressor.fit(training_samples, training_values)
|
21
21
|
# results = regressor.predict(testing_samples)
|
22
22
|
#
|
23
23
|
# *Reference*
|
24
|
-
# - Zhou, Z-H., "Ensemble
|
24
|
+
# - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
|
25
25
|
class StackingRegressor
|
26
26
|
include Base::BaseEstimator
|
27
27
|
include Base::Regressor
|
@@ -0,0 +1,126 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/classifier'
|
5
|
+
require 'rumale/preprocessing/label_encoder'
|
6
|
+
|
7
|
+
module Rumale
|
8
|
+
module Ensemble
|
9
|
+
# VotingClassifier is a class that implements classifier with voting ensemble method.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# estimators = {
|
13
|
+
# lgr: Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-2, random_seed: 1),
|
14
|
+
# mlp: Rumale::NeuralNetwork::MLPClassifier.new(hidden_units: [256], random_seed: 1),
|
15
|
+
# rnd: Rumale::Ensemble::RandomForestClassifier.new(random_seed: 1)
|
16
|
+
# }
|
17
|
+
# weights = { lgr: 0.2, mlp: 0.3, rnd: 0.5 }
|
18
|
+
#
|
19
|
+
# classifier = Rumale::Ensemble::VotingClassifier.new(estimators: estimators, weights: weights, voting: 'soft')
|
20
|
+
# classifier.fit(x_train, y_train)
|
21
|
+
# results = classifier.predict(x_test)
|
22
|
+
#
|
23
|
+
# *Reference*
|
24
|
+
# - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
|
25
|
+
class VotingClassifier
|
26
|
+
include Base::BaseEstimator
|
27
|
+
include Base::Classifier
|
28
|
+
|
29
|
+
# Return the sub-classifiers that voted.
|
30
|
+
# @return [Hash<Symbol,Classifier>]
|
31
|
+
attr_reader :estimators
|
32
|
+
|
33
|
+
# Return the class labels.
|
34
|
+
# @return [Numo::Int32] (size: n_classes)
|
35
|
+
attr_reader :classes
|
36
|
+
|
37
|
+
# Create a new ensembled classifier with voting rule.
|
38
|
+
#
|
39
|
+
# @param estimators [Hash<Symbol,Classifier>] The sub-classifiers to vote.
|
40
|
+
# @param weights [Hash<Symbol,Float>] The weight value for each classifier.
|
41
|
+
# @param voting [String] The voting rule for the predicted results of each classifier.
|
42
|
+
# If 'hard' is given, the ensembled classifier predicts the class label by majority vote.
|
43
|
+
# If 'soft' is given, the ensembled classifier uses the weighted average of predicted probabilities for the prediction.
|
44
|
+
def initialize(estimators:, weights: nil, voting: 'hard')
|
45
|
+
check_params_type(Hash, estimators: estimators)
|
46
|
+
check_params_type_or_nil(Hash, weights: weights)
|
47
|
+
check_params_string(voting: voting)
|
48
|
+
@estimators = estimators
|
49
|
+
@classes = nil
|
50
|
+
@params = {}
|
51
|
+
@params[:weights] = weights || estimators.each_key.with_object({}) { |name, w| w[name] = 1.0 }
|
52
|
+
@params[:voting] = voting
|
53
|
+
end
|
54
|
+
|
55
|
+
# Fit the model with given training data.
|
56
|
+
#
|
57
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
58
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
59
|
+
# @return [VotingClassifier] The learned classifier itself.
|
60
|
+
def fit(x, y)
|
61
|
+
x = check_convert_sample_array(x)
|
62
|
+
y = check_convert_label_array(y)
|
63
|
+
check_sample_label_size(x, y)
|
64
|
+
|
65
|
+
@encoder = Rumale::Preprocessing::LabelEncoder.new
|
66
|
+
y_encoded = @encoder.fit_transform(y)
|
67
|
+
@classes = Numo::NArray[*@encoder.classes]
|
68
|
+
@estimators.each_key { |name| @estimators[name].fit(x, y_encoded) }
|
69
|
+
|
70
|
+
self
|
71
|
+
end
|
72
|
+
|
73
|
+
# Calculate confidence scores for samples.
|
74
|
+
#
|
75
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
76
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
|
77
|
+
def decision_function(x)
|
78
|
+
x = check_convert_sample_array(x)
|
79
|
+
return predict_proba(x) if soft_voting?
|
80
|
+
|
81
|
+
n_samples = x.shape[0]
|
82
|
+
n_classes = @classes.size
|
83
|
+
z = Numo::DFloat.zeros(n_samples, n_classes)
|
84
|
+
@estimators.each do |name, estimator|
|
85
|
+
estimator.predict(x).to_a.each_with_index { |c, i| z[i, c] += @params[:weights][name] }
|
86
|
+
end
|
87
|
+
z
|
88
|
+
end
|
89
|
+
|
90
|
+
# Predict class labels for samples.
|
91
|
+
#
|
92
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
93
|
+
# @return [Numo::Int32] (shape: [n_samples]) The predicted class label per sample.
|
94
|
+
def predict(x)
|
95
|
+
x = check_convert_sample_array(x)
|
96
|
+
n_samples = x.shape[0]
|
97
|
+
n_classes = @classes.size
|
98
|
+
z = decision_function(x)
|
99
|
+
predicted = z.max_index(axis: 1) - Numo::Int32.new(n_samples).seq * n_classes
|
100
|
+
Numo::Int32.cast(@encoder.inverse_transform(predicted))
|
101
|
+
end
|
102
|
+
|
103
|
+
# Predict probability for samples.
|
104
|
+
#
|
105
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probabilities.
|
106
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
107
|
+
def predict_proba(x)
|
108
|
+
x = check_convert_sample_array(x)
|
109
|
+
n_samples = x.shape[0]
|
110
|
+
n_classes = @classes.size
|
111
|
+
z = Numo::DFloat.zeros(n_samples, n_classes)
|
112
|
+
sum_weight = @params[:weights].each_value.inject(&:+)
|
113
|
+
@estimators.each do |name, estimator|
|
114
|
+
z += @params[:weights][name] * estimator.predict_proba(x)
|
115
|
+
end
|
116
|
+
z /= sum_weight
|
117
|
+
end
|
118
|
+
|
119
|
+
private
|
120
|
+
|
121
|
+
def soft_voting?
|
122
|
+
@params[:voting] == 'soft'
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/regressor'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module Ensemble
|
8
|
+
# VotingRegressor is a class that implements regressor with voting ensemble method.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# estimators = {
|
12
|
+
# rdg: Rumale::LinearModel::Ridge.new(reg_param: 1e-2, random_seed: 1),
|
13
|
+
# mlp: Rumale::NeuralNetwork::MLPRegressor.new(hidden_units: [256], random_seed: 1),
|
14
|
+
# rnd: Rumale::Ensemble::RandomForestRegressor.new(random_seed: 1)
|
15
|
+
# }
|
16
|
+
# weights = { rdg: 0.2, mlp: 0.3, rnd: 0.5 }
|
17
|
+
#
|
18
|
+
# regressor = Rumale::Ensemble::VotingRegressor.new(estimators: estimators, weights: weights, voting: 'soft')
|
19
|
+
# regressor.fit(x_train, y_train)
|
20
|
+
# results = regressor.predict(x_test)
|
21
|
+
#
|
22
|
+
# *Reference*
|
23
|
+
# - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
|
24
|
+
class VotingRegressor
|
25
|
+
include Base::BaseEstimator
|
26
|
+
include Base::Regressor
|
27
|
+
|
28
|
+
# Return the sub-regressors that voted.
|
29
|
+
# @return [Hash<Symbol,Regressor>]
|
30
|
+
attr_reader :estimators
|
31
|
+
|
32
|
+
# Create a new ensembled regressor with voting rule.
|
33
|
+
#
|
34
|
+
# @param estimators [Hash<Symbol,Regressor>] The sub-regressors to vote.
|
35
|
+
# @param weights [Hash<Symbol,Float>] The weight value for each regressor.
|
36
|
+
def initialize(estimators:, weights: nil)
|
37
|
+
check_params_type(Hash, estimators: estimators)
|
38
|
+
check_params_type_or_nil(Hash, weights: weights)
|
39
|
+
@estimators = estimators
|
40
|
+
@n_outputs = nil
|
41
|
+
@params = {}
|
42
|
+
@params[:weights] = weights || estimators.each_key.with_object({}) { |name, w| w[name] = 1.0 }
|
43
|
+
end
|
44
|
+
|
45
|
+
# Fit the model with given training data.
|
46
|
+
#
|
47
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
48
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
49
|
+
# @return [VotingRegressor] The learned regressor itself.
|
50
|
+
def fit(x, y)
|
51
|
+
x = check_convert_sample_array(x)
|
52
|
+
y = check_convert_tvalue_array(y)
|
53
|
+
check_sample_tvalue_size(x, y)
|
54
|
+
|
55
|
+
@n_outputs = y.ndim > 1 ? y.shape[1] : 1
|
56
|
+
@estimators.each_key { |name| @estimators[name].fit(x, y) }
|
57
|
+
|
58
|
+
self
|
59
|
+
end
|
60
|
+
|
61
|
+
# Predict values for samples.
|
62
|
+
#
|
63
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
64
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted value per sample.
|
65
|
+
def predict(x)
|
66
|
+
x = check_convert_sample_array(x)
|
67
|
+
z = single_target? ? Numo::DFloat.zeros(x.shape[0]) : Numo::DFloat.zeros(x.shape[0], @n_outputs)
|
68
|
+
sum_weight = @params[:weights].each_value.inject(&:+)
|
69
|
+
@estimators.each do |name, estimator|
|
70
|
+
z += @params[:weights][name] * estimator.predict(x)
|
71
|
+
end
|
72
|
+
z / sum_weight
|
73
|
+
end
|
74
|
+
|
75
|
+
private
|
76
|
+
|
77
|
+
def single_target?
|
78
|
+
@n_outputs == 1
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -11,7 +11,7 @@ module Rumale
|
|
11
11
|
# @example
|
12
12
|
# require 'numo/linalg/autoloader'
|
13
13
|
#
|
14
|
-
# transformer = Rumale::KernelApproximation::Nystroem.new(gamma: 1, n_components: 128, random_seed: 1)
|
14
|
+
# transformer = Rumale::KernelApproximation::Nystroem.new(kernel: 'rbf', gamma: 1, n_components: 128, random_seed: 1)
|
15
15
|
# new_training_samples = transformer.fit_transform(training_samples)
|
16
16
|
# new_testing_samples = transformer.transform(testing_samples)
|
17
17
|
#
|
@@ -39,12 +39,15 @@ module Rumale
|
|
39
39
|
|
40
40
|
# Create a new transformer for mapping to kernel feature space with Nystrom method.
|
41
41
|
#
|
42
|
-
# @param kernel [String] The type of kernel
|
43
|
-
# @param gamma [Float] The parameter
|
44
|
-
# @param
|
42
|
+
# @param kernel [String] The type of kernel function ('rbf', 'linear', 'poly', and 'sigmoid)
|
43
|
+
# @param gamma [Float] The gamma parameter in rbf/poly/sigmoid kernel function.
|
44
|
+
# @param degree [Integer] The degree parameter in polynomial kernel function.
|
45
|
+
# @param coef [Float] The coefficient in poly/sigmoid kernel function.
|
46
|
+
# @param n_components [Integer] The number of dimensions of the kernel feature space.
|
45
47
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
46
|
-
def initialize(kernel: 'rbf', gamma: 1, n_components: 100, random_seed: nil)
|
47
|
-
|
48
|
+
def initialize(kernel: 'rbf', gamma: 1, degree: 3, coef: 1, n_components: 100, random_seed: nil)
|
49
|
+
check_params_string(kernel: kernel)
|
50
|
+
check_params_numeric(gamma: gamma, coef: coef, degree: degree, n_components: n_components)
|
48
51
|
check_params_numeric_or_nil(random_seed: random_seed)
|
49
52
|
@params = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h
|
50
53
|
@params[:random_seed] ||= srand
|
@@ -56,7 +59,7 @@ module Rumale
|
|
56
59
|
|
57
60
|
# Fit the model with given training data.
|
58
61
|
#
|
59
|
-
# @overload fit(x) ->
|
62
|
+
# @overload fit(x) -> Nystroem
|
60
63
|
# @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
61
64
|
# @return [Nystroem] The learned transformer itself.
|
62
65
|
def fit(x, _y = nil)
|
@@ -70,10 +73,10 @@ module Rumale
|
|
70
73
|
|
71
74
|
# random sampling.
|
72
75
|
@component_indices = Numo::Int32.cast(Array(0...n_samples).shuffle(random: sub_rng)[0...n_components])
|
73
|
-
@components = x[@component_indices, true]
|
76
|
+
@components = x[@component_indices, true].dup
|
74
77
|
|
75
78
|
# calculate normalizing factor.
|
76
|
-
kernel_mat =
|
79
|
+
kernel_mat = kernel_mat(@components)
|
77
80
|
eig_vals, eig_vecs = Numo::Linalg.eigh(kernel_mat)
|
78
81
|
la = eig_vals.class.maximum(eig_vals.reverse, 1e-12)
|
79
82
|
u = eig_vecs.reverse(1)
|
@@ -98,9 +101,26 @@ module Rumale
|
|
98
101
|
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
99
102
|
def transform(x)
|
100
103
|
x = check_convert_sample_array(x)
|
101
|
-
z =
|
104
|
+
z = kernel_mat(x, @components)
|
102
105
|
z.dot(@normalizer)
|
103
106
|
end
|
107
|
+
|
108
|
+
private
|
109
|
+
|
110
|
+
def kernel_mat(x, y = nil)
|
111
|
+
case @params[:kernel]
|
112
|
+
when 'rbf'
|
113
|
+
Rumale::PairwiseMetric.rbf_kernel(x, y, @params[:gamma])
|
114
|
+
when 'poly'
|
115
|
+
Rumale::PairwiseMetric.polynomial_kernel(x, y, @params[:degree], @params[:gamma], @params[:coef])
|
116
|
+
when 'sigmoid'
|
117
|
+
Rumale::PairwiseMetric.sigmoid_kernel(x, y, @params[:gamma], @params[:coef])
|
118
|
+
when 'linear'
|
119
|
+
Rumale::PairwiseMetric.linear_kernel(x, y)
|
120
|
+
else
|
121
|
+
raise ArgumentError, "Expect kernel parameter to be given 'rbf', 'linear', 'poly', or 'sigmoid'."
|
122
|
+
end
|
123
|
+
end
|
104
124
|
end
|
105
125
|
end
|
106
126
|
end
|