rumale 0.22.2 → 0.23.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.clang-format +149 -0
  3. data/.coveralls.yml +1 -0
  4. data/.github/workflows/build.yml +5 -2
  5. data/.github/workflows/coverage.yml +30 -0
  6. data/.gitignore +1 -0
  7. data/CHANGELOG.md +38 -0
  8. data/Gemfile +3 -2
  9. data/LICENSE.txt +1 -1
  10. data/README.md +45 -8
  11. data/Rakefile +2 -1
  12. data/ext/rumale/extconf.rb +1 -1
  13. data/ext/rumale/{rumale.c → rumaleext.c} +2 -3
  14. data/ext/rumale/{rumale.h → rumaleext.h} +1 -1
  15. data/ext/rumale/tree.c +76 -96
  16. data/ext/rumale/tree.h +2 -0
  17. data/lib/rumale.rb +6 -1
  18. data/lib/rumale/base/base_estimator.rb +5 -3
  19. data/lib/rumale/dataset.rb +7 -3
  20. data/lib/rumale/decomposition/fast_ica.rb +1 -1
  21. data/lib/rumale/ensemble/gradient_boosting_classifier.rb +2 -2
  22. data/lib/rumale/ensemble/gradient_boosting_regressor.rb +1 -1
  23. data/lib/rumale/ensemble/random_forest_classifier.rb +1 -1
  24. data/lib/rumale/ensemble/random_forest_regressor.rb +1 -1
  25. data/lib/rumale/ensemble/stacking_classifier.rb +5 -4
  26. data/lib/rumale/ensemble/stacking_regressor.rb +3 -3
  27. data/lib/rumale/ensemble/voting_classifier.rb +126 -0
  28. data/lib/rumale/ensemble/voting_regressor.rb +82 -0
  29. data/lib/rumale/kernel_approximation/nystroem.rb +30 -10
  30. data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +92 -0
  31. data/lib/rumale/kernel_machine/kernel_svc.rb +1 -1
  32. data/lib/rumale/linear_model/elastic_net.rb +1 -1
  33. data/lib/rumale/linear_model/lasso.rb +1 -1
  34. data/lib/rumale/linear_model/linear_regression.rb +66 -35
  35. data/lib/rumale/linear_model/nnls.rb +137 -0
  36. data/lib/rumale/linear_model/ridge.rb +71 -34
  37. data/lib/rumale/model_selection/grid_search_cv.rb +3 -3
  38. data/lib/rumale/naive_bayes/bernoulli_nb.rb +1 -1
  39. data/lib/rumale/naive_bayes/gaussian_nb.rb +1 -1
  40. data/lib/rumale/naive_bayes/multinomial_nb.rb +1 -1
  41. data/lib/rumale/preprocessing/kernel_calculator.rb +92 -0
  42. data/lib/rumale/tree/base_decision_tree.rb +15 -10
  43. data/lib/rumale/tree/decision_tree_classifier.rb +14 -11
  44. data/lib/rumale/tree/decision_tree_regressor.rb +0 -1
  45. data/lib/rumale/tree/gradient_tree_regressor.rb +15 -11
  46. data/lib/rumale/validation.rb +12 -0
  47. data/lib/rumale/version.rb +1 -1
  48. metadata +13 -6
  49. data/.travis.yml +0 -17
data/ext/rumale/tree.h CHANGED
@@ -3,7 +3,9 @@
3
3
 
4
4
  #include <math.h>
5
5
  #include <string.h>
6
+
6
7
  #include <ruby.h>
8
+
7
9
  #include <numo/narray.h>
8
10
  #include <numo/template.h>
9
11
 
data/lib/rumale.rb CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  require 'numo/narray'
4
4
 
5
- require 'rumale/rumale'
5
+ require 'rumale/rumaleext'
6
6
 
7
7
  require 'rumale/version'
8
8
  require 'rumale/validation'
@@ -30,10 +30,12 @@ require 'rumale/linear_model/linear_regression'
30
30
  require 'rumale/linear_model/ridge'
31
31
  require 'rumale/linear_model/lasso'
32
32
  require 'rumale/linear_model/elastic_net'
33
+ require 'rumale/linear_model/nnls'
33
34
  require 'rumale/kernel_machine/kernel_svc'
34
35
  require 'rumale/kernel_machine/kernel_pca'
35
36
  require 'rumale/kernel_machine/kernel_fda'
36
37
  require 'rumale/kernel_machine/kernel_ridge'
38
+ require 'rumale/kernel_machine/kernel_ridge_classifier'
37
39
  require 'rumale/multiclass/one_vs_rest_classifier'
38
40
  require 'rumale/nearest_neighbors/vp_tree'
39
41
  require 'rumale/nearest_neighbors/k_neighbors_classifier'
@@ -61,6 +63,8 @@ require 'rumale/ensemble/extra_trees_classifier'
61
63
  require 'rumale/ensemble/extra_trees_regressor'
62
64
  require 'rumale/ensemble/stacking_classifier'
63
65
  require 'rumale/ensemble/stacking_regressor'
66
+ require 'rumale/ensemble/voting_classifier'
67
+ require 'rumale/ensemble/voting_regressor'
64
68
  require 'rumale/clustering/k_means'
65
69
  require 'rumale/clustering/mini_batch_k_means'
66
70
  require 'rumale/clustering/k_medoids'
@@ -100,6 +104,7 @@ require 'rumale/preprocessing/one_hot_encoder'
100
104
  require 'rumale/preprocessing/ordinal_encoder'
101
105
  require 'rumale/preprocessing/binarizer'
102
106
  require 'rumale/preprocessing/polynomial_features'
107
+ require 'rumale/preprocessing/kernel_calculator'
103
108
  require 'rumale/model_selection/k_fold'
104
109
  require 'rumale/model_selection/group_k_fold'
105
110
  require 'rumale/model_selection/stratified_k_fold'
@@ -11,13 +11,15 @@ module Rumale
11
11
 
12
12
  private
13
13
 
14
- def enable_linalg?
14
+ def enable_linalg?(warning: true)
15
15
  if defined?(Numo::Linalg).nil?
16
- warn('If you want to use features that depend on Numo::Linalg, you should install and load Numo::Linalg in advance.')
16
+ warn('If you want to use features that depend on Numo::Linalg, you should install and load Numo::Linalg in advance.') if warning
17
17
  return false
18
18
  end
19
19
  if Numo::Linalg::VERSION < '0.1.4'
20
- warn('The loaded Numo::Linalg does not implement the methods required by Rumale. Please load Numo::Linalg version 0.1.4 or later.')
20
+ if warning
21
+ warn('The loaded Numo::Linalg does not implement the methods required by Rumale. Please load Numo::Linalg version 0.1.4 or later.')
22
+ end
21
23
  return false
22
24
  end
23
25
  true
@@ -12,22 +12,26 @@ module Rumale
12
12
  # Load a dataset with the libsvm file format into Numo::NArray.
13
13
  #
14
14
  # @param filename [String] A path to a dataset file.
15
+ # @param n_features [Integer/Nil] The number of features of data to load.
16
+ # If nil is given, it will be detected automatically from given file.
15
17
  # @param zero_based [Boolean] Whether the column index starts from 0 (true) or 1 (false).
16
18
  # @param dtype [Numo::NArray] Data type of Numo::NArray for features to be loaded.
17
19
  #
18
20
  # @return [Array<Numo::NArray>]
19
21
  # Returns array containing the (n_samples x n_features) matrix for feature vectors
20
22
  # and (n_samples) vector for labels or target values.
21
- def load_libsvm_file(filename, zero_based: false, dtype: Numo::DFloat)
23
+ def load_libsvm_file(filename, n_features: nil, zero_based: false, dtype: Numo::DFloat)
22
24
  ftvecs = []
23
25
  labels = []
24
- n_features = 0
26
+ n_features_detected = 0
25
27
  CSV.foreach(filename, col_sep: "\s", headers: false) do |line|
26
28
  label, ftvec, max_idx = parse_libsvm_line(line, zero_based)
27
29
  labels.push(label)
28
30
  ftvecs.push(ftvec)
29
- n_features = max_idx if n_features < max_idx
31
+ n_features_detected = max_idx if n_features_detected < max_idx
30
32
  end
33
+ n_features ||= n_features_detected
34
+ n_features = [n_features, n_features_detected].max
31
35
  [convert_to_matrix(ftvecs, n_features, dtype), Numo::NArray.asarray(labels)]
32
36
  end
33
37
 
@@ -81,7 +81,7 @@ module Rumale
81
81
  wx = @params[:whiten] ? (x - @mean).dot(whiten_mat.transpose) : x
82
82
  unmixing, @n_iter = ica(wx, @params[:fun], @params[:max_iter], @params[:tol], @rng.dup)
83
83
  @components = @params[:whiten] ? unmixing.dot(whiten_mat) : unmixing
84
- @mixing = Numo::Linalg.pinv(@components)
84
+ @mixing = Numo::Linalg.pinv(@components).dup
85
85
  if @params[:n_components] == 1
86
86
  @components = @components.flatten.dup
87
87
  @mixing = @mixing.flatten.dup
@@ -161,7 +161,7 @@ module Rumale
161
161
 
162
162
  proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
163
163
 
164
- return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
164
+ return (proba.transpose / proba.sum(axis: 1)).transpose.dup if @classes.size > 2
165
165
 
166
166
  n_samples, = x.shape
167
167
  probs = Numo::DFloat.zeros(n_samples, 2)
@@ -182,7 +182,7 @@ module Rumale
182
182
  else
183
183
  @estimators.map { |tree| tree.apply(x) }
184
184
  end
185
- Numo::Int32[*leaf_ids].transpose
185
+ Numo::Int32[*leaf_ids].transpose.dup
186
186
  end
187
187
 
188
188
  private
@@ -144,7 +144,7 @@ module Rumale
144
144
  else
145
145
  @estimators.map { |tree| tree.apply(x) }
146
146
  end
147
- Numo::Int32[*leaf_ids].transpose
147
+ Numo::Int32[*leaf_ids].transpose.dup
148
148
  end
149
149
 
150
150
  private
@@ -159,7 +159,7 @@ module Rumale
159
159
  # @return [Numo::Int32] (shape: [n_samples, n_estimators]) Leaf index for sample.
160
160
  def apply(x)
161
161
  x = check_convert_sample_array(x)
162
- Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose
162
+ Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose.dup
163
163
  end
164
164
 
165
165
  private
@@ -136,7 +136,7 @@ module Rumale
136
136
  # @return [Numo::Int32] (shape: [n_samples, n_estimators]) Leaf index for sample.
137
137
  def apply(x)
138
138
  x = check_convert_sample_array(x)
139
- Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose
139
+ Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose.dup
140
140
  end
141
141
 
142
142
  private
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'rumale/base/base_estimator'
4
4
  require 'rumale/base/classifier'
5
+ require 'rumale/preprocessing/label_encoder'
5
6
 
6
7
  module Rumale
7
8
  module Ensemble
@@ -10,18 +11,18 @@ module Rumale
10
11
  # @example
11
12
  # estimators = {
12
13
  # lgr: Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-2, random_seed: 1),
13
- # mlp: Rumele::NeuralNetwork::MLPClassifier.new(hidden_units: [256], random_seed: 1),
14
+ # mlp: Rumale::NeuralNetwork::MLPClassifier.new(hidden_units: [256], random_seed: 1),
14
15
  # rnd: Rumale::Ensemble::RandomForestClassifier.new(random_seed: 1)
15
16
  # }
16
17
  # meta_estimator = Rumale::LinearModel::LogisticRegression.new(random_seed: 1)
17
18
  # classifier = Rumale::Ensemble::StackedClassifier.new(
18
19
  # estimators: estimators, meta_estimator: meta_estimator, random_seed: 1
19
20
  # )
20
- # classifier.fit(training_samples, traininig_labels)
21
+ # classifier.fit(training_samples, training_labels)
21
22
  # results = classifier.predict(testing_samples)
22
23
  #
23
24
  # *Reference*
24
- # - Zhou, Z-H., "Ensemble Mehotds - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
25
+ # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
25
26
  class StackingClassifier
26
27
  include Base::BaseEstimator
27
28
  include Base::Classifier
@@ -149,7 +150,7 @@ module Rumale
149
150
 
150
151
  # Predict probability for samples.
151
152
  #
152
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
153
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probabilities.
153
154
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The predicted probability of each class per sample.
154
155
  def predict_proba(x)
155
156
  x = check_convert_sample_array(x)
@@ -10,18 +10,18 @@ module Rumale
10
10
  # @example
11
11
  # estimators = {
12
12
  # las: Rumale::LinearModel::Lasso.new(reg_param: 1e-2, random_seed: 1),
13
- # mlp: Rumele::NeuralNetwork::MLPRegressor.new(hidden_units: [256], random_seed: 1),
13
+ # mlp: Rumale::NeuralNetwork::MLPRegressor.new(hidden_units: [256], random_seed: 1),
14
14
  # rnd: Rumale::Ensemble::RandomForestRegressor.new(random_seed: 1)
15
15
  # }
16
16
  # meta_estimator = Rumale::LinearModel::Ridge.new(random_seed: 1)
17
17
  # regressor = Rumale::Ensemble::StackedRegressor.new(
18
18
  # estimators: estimators, meta_estimator: meta_estimator, random_seed: 1
19
19
  # )
20
- # regressor.fit(training_samples, traininig_values)
20
+ # regressor.fit(training_samples, training_values)
21
21
  # results = regressor.predict(testing_samples)
22
22
  #
23
23
  # *Reference*
24
- # - Zhou, Z-H., "Ensemble Mehotds - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
24
+ # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
25
25
  class StackingRegressor
26
26
  include Base::BaseEstimator
27
27
  include Base::Regressor
@@ -0,0 +1,126 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/classifier'
5
+ require 'rumale/preprocessing/label_encoder'
6
+
7
+ module Rumale
8
+ module Ensemble
9
+ # VotingClassifier is a class that implements classifier with voting ensemble method.
10
+ #
11
+ # @example
12
+ # estimators = {
13
+ # lgr: Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-2, random_seed: 1),
14
+ # mlp: Rumale::NeuralNetwork::MLPClassifier.new(hidden_units: [256], random_seed: 1),
15
+ # rnd: Rumale::Ensemble::RandomForestClassifier.new(random_seed: 1)
16
+ # }
17
+ # weights = { lgr: 0.2, mlp: 0.3, rnd: 0.5 }
18
+ #
19
+ # classifier = Rumale::Ensemble::VotingClassifier.new(estimators: estimators, weights: weights, voting: 'soft')
20
+ # classifier.fit(x_train, y_train)
21
+ # results = classifier.predict(x_test)
22
+ #
23
+ # *Reference*
24
+ # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
25
+ class VotingClassifier
26
+ include Base::BaseEstimator
27
+ include Base::Classifier
28
+
29
+ # Return the sub-classifiers that voted.
30
+ # @return [Hash<Symbol,Classifier>]
31
+ attr_reader :estimators
32
+
33
+ # Return the class labels.
34
+ # @return [Numo::Int32] (size: n_classes)
35
+ attr_reader :classes
36
+
37
+ # Create a new ensembled classifier with voting rule.
38
+ #
39
+ # @param estimators [Hash<Symbol,Classifier>] The sub-classifiers to vote.
40
+ # @param weights [Hash<Symbol,Float>] The weight value for each classifier.
41
+ # @param voting [String] The voting rule for the predicted results of each classifier.
42
+ # If 'hard' is given, the ensembled classifier predicts the class label by majority vote.
43
+ # If 'soft' is given, the ensembled classifier uses the weighted average of predicted probabilities for the prediction.
44
+ def initialize(estimators:, weights: nil, voting: 'hard')
45
+ check_params_type(Hash, estimators: estimators)
46
+ check_params_type_or_nil(Hash, weights: weights)
47
+ check_params_string(voting: voting)
48
+ @estimators = estimators
49
+ @classes = nil
50
+ @params = {}
51
+ @params[:weights] = weights || estimators.each_key.with_object({}) { |name, w| w[name] = 1.0 }
52
+ @params[:voting] = voting
53
+ end
54
+
55
+ # Fit the model with given training data.
56
+ #
57
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
58
+ # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
59
+ # @return [VotingClassifier] The learned classifier itself.
60
+ def fit(x, y)
61
+ x = check_convert_sample_array(x)
62
+ y = check_convert_label_array(y)
63
+ check_sample_label_size(x, y)
64
+
65
+ @encoder = Rumale::Preprocessing::LabelEncoder.new
66
+ y_encoded = @encoder.fit_transform(y)
67
+ @classes = Numo::NArray[*@encoder.classes]
68
+ @estimators.each_key { |name| @estimators[name].fit(x, y_encoded) }
69
+
70
+ self
71
+ end
72
+
73
+ # Calculate confidence scores for samples.
74
+ #
75
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
76
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
77
+ def decision_function(x)
78
+ x = check_convert_sample_array(x)
79
+ return predict_proba(x) if soft_voting?
80
+
81
+ n_samples = x.shape[0]
82
+ n_classes = @classes.size
83
+ z = Numo::DFloat.zeros(n_samples, n_classes)
84
+ @estimators.each do |name, estimator|
85
+ estimator.predict(x).to_a.each_with_index { |c, i| z[i, c] += @params[:weights][name] }
86
+ end
87
+ z
88
+ end
89
+
90
+ # Predict class labels for samples.
91
+ #
92
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
93
+ # @return [Numo::Int32] (shape: [n_samples]) The predicted class label per sample.
94
+ def predict(x)
95
+ x = check_convert_sample_array(x)
96
+ n_samples = x.shape[0]
97
+ n_classes = @classes.size
98
+ z = decision_function(x)
99
+ predicted = z.max_index(axis: 1) - Numo::Int32.new(n_samples).seq * n_classes
100
+ Numo::Int32.cast(@encoder.inverse_transform(predicted))
101
+ end
102
+
103
+ # Predict probability for samples.
104
+ #
105
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probabilities.
106
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
107
+ def predict_proba(x)
108
+ x = check_convert_sample_array(x)
109
+ n_samples = x.shape[0]
110
+ n_classes = @classes.size
111
+ z = Numo::DFloat.zeros(n_samples, n_classes)
112
+ sum_weight = @params[:weights].each_value.inject(&:+)
113
+ @estimators.each do |name, estimator|
114
+ z += @params[:weights][name] * estimator.predict_proba(x)
115
+ end
116
+ z /= sum_weight
117
+ end
118
+
119
+ private
120
+
121
+ def soft_voting?
122
+ @params[:voting] == 'soft'
123
+ end
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/regressor'
5
+
6
+ module Rumale
7
+ module Ensemble
8
+ # VotingRegressor is a class that implements regressor with voting ensemble method.
9
+ #
10
+ # @example
11
+ # estimators = {
12
+ # rdg: Rumale::LinearModel::Ridge.new(reg_param: 1e-2, random_seed: 1),
13
+ # mlp: Rumale::NeuralNetwork::MLPRegressor.new(hidden_units: [256], random_seed: 1),
14
+ # rnd: Rumale::Ensemble::RandomForestRegressor.new(random_seed: 1)
15
+ # }
16
+ # weights = { rdg: 0.2, mlp: 0.3, rnd: 0.5 }
17
+ #
18
+ # regressor = Rumale::Ensemble::VotingRegressor.new(estimators: estimators, weights: weights, voting: 'soft')
19
+ # regressor.fit(x_train, y_train)
20
+ # results = regressor.predict(x_test)
21
+ #
22
+ # *Reference*
23
+ # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
24
+ class VotingRegressor
25
+ include Base::BaseEstimator
26
+ include Base::Regressor
27
+
28
+ # Return the sub-regressors that voted.
29
+ # @return [Hash<Symbol,Regressor>]
30
+ attr_reader :estimators
31
+
32
+ # Create a new ensembled regressor with voting rule.
33
+ #
34
+ # @param estimators [Hash<Symbol,Regressor>] The sub-regressors to vote.
35
+ # @param weights [Hash<Symbol,Float>] The weight value for each regressor.
36
+ def initialize(estimators:, weights: nil)
37
+ check_params_type(Hash, estimators: estimators)
38
+ check_params_type_or_nil(Hash, weights: weights)
39
+ @estimators = estimators
40
+ @n_outputs = nil
41
+ @params = {}
42
+ @params[:weights] = weights || estimators.each_key.with_object({}) { |name, w| w[name] = 1.0 }
43
+ end
44
+
45
+ # Fit the model with given training data.
46
+ #
47
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
48
+ # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
49
+ # @return [VotingRegressor] The learned regressor itself.
50
+ def fit(x, y)
51
+ x = check_convert_sample_array(x)
52
+ y = check_convert_tvalue_array(y)
53
+ check_sample_tvalue_size(x, y)
54
+
55
+ @n_outputs = y.ndim > 1 ? y.shape[1] : 1
56
+ @estimators.each_key { |name| @estimators[name].fit(x, y) }
57
+
58
+ self
59
+ end
60
+
61
+ # Predict values for samples.
62
+ #
63
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
64
+ # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted value per sample.
65
+ def predict(x)
66
+ x = check_convert_sample_array(x)
67
+ z = single_target? ? Numo::DFloat.zeros(x.shape[0]) : Numo::DFloat.zeros(x.shape[0], @n_outputs)
68
+ sum_weight = @params[:weights].each_value.inject(&:+)
69
+ @estimators.each do |name, estimator|
70
+ z += @params[:weights][name] * estimator.predict(x)
71
+ end
72
+ z / sum_weight
73
+ end
74
+
75
+ private
76
+
77
+ def single_target?
78
+ @n_outputs == 1
79
+ end
80
+ end
81
+ end
82
+ end
@@ -11,7 +11,7 @@ module Rumale
11
11
  # @example
12
12
  # require 'numo/linalg/autoloader'
13
13
  #
14
- # transformer = Rumale::KernelApproximation::Nystroem.new(gamma: 1, n_components: 128, random_seed: 1)
14
+ # transformer = Rumale::KernelApproximation::Nystroem.new(kernel: 'rbf', gamma: 1, n_components: 128, random_seed: 1)
15
15
  # new_training_samples = transformer.fit_transform(training_samples)
16
16
  # new_testing_samples = transformer.transform(testing_samples)
17
17
  #
@@ -39,12 +39,15 @@ module Rumale
39
39
 
40
40
  # Create a new transformer for mapping to kernel feature space with Nystrom method.
41
41
  #
42
- # @param kernel [String] The type of kernel. This parameter is ignored in the current implementation.
43
- # @param gamma [Float] The parameter of RBF kernel: exp(-gamma * x^2).
44
- # @param n_components [Integer] The number of dimensions of the RBF kernel feature space.
42
+ # @param kernel [String] The type of kernel function ('rbf', 'linear', 'poly', and 'sigmoid)
43
+ # @param gamma [Float] The gamma parameter in rbf/poly/sigmoid kernel function.
44
+ # @param degree [Integer] The degree parameter in polynomial kernel function.
45
+ # @param coef [Float] The coefficient in poly/sigmoid kernel function.
46
+ # @param n_components [Integer] The number of dimensions of the kernel feature space.
45
47
  # @param random_seed [Integer] The seed value using to initialize the random generator.
46
- def initialize(kernel: 'rbf', gamma: 1, n_components: 100, random_seed: nil)
47
- check_params_numeric(gamma: gamma, n_components: n_components)
48
+ def initialize(kernel: 'rbf', gamma: 1, degree: 3, coef: 1, n_components: 100, random_seed: nil)
49
+ check_params_string(kernel: kernel)
50
+ check_params_numeric(gamma: gamma, coef: coef, degree: degree, n_components: n_components)
48
51
  check_params_numeric_or_nil(random_seed: random_seed)
49
52
  @params = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h
50
53
  @params[:random_seed] ||= srand
@@ -56,7 +59,7 @@ module Rumale
56
59
 
57
60
  # Fit the model with given training data.
58
61
  #
59
- # @overload fit(x) -> RBF
62
+ # @overload fit(x) -> Nystroem
60
63
  # @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
61
64
  # @return [Nystroem] The learned transformer itself.
62
65
  def fit(x, _y = nil)
@@ -70,10 +73,10 @@ module Rumale
70
73
 
71
74
  # random sampling.
72
75
  @component_indices = Numo::Int32.cast(Array(0...n_samples).shuffle(random: sub_rng)[0...n_components])
73
- @components = x[@component_indices, true]
76
+ @components = x[@component_indices, true].dup
74
77
 
75
78
  # calculate normalizing factor.
76
- kernel_mat = Rumale::PairwiseMetric.rbf_kernel(@components, nil, @params[:gamma])
79
+ kernel_mat = kernel_mat(@components)
77
80
  eig_vals, eig_vecs = Numo::Linalg.eigh(kernel_mat)
78
81
  la = eig_vals.class.maximum(eig_vals.reverse, 1e-12)
79
82
  u = eig_vecs.reverse(1)
@@ -98,9 +101,26 @@ module Rumale
98
101
  # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
99
102
  def transform(x)
100
103
  x = check_convert_sample_array(x)
101
- z = Rumale::PairwiseMetric.rbf_kernel(x, @components, @params[:gamma])
104
+ z = kernel_mat(x, @components)
102
105
  z.dot(@normalizer)
103
106
  end
107
+
108
+ private
109
+
110
+ def kernel_mat(x, y = nil)
111
+ case @params[:kernel]
112
+ when 'rbf'
113
+ Rumale::PairwiseMetric.rbf_kernel(x, y, @params[:gamma])
114
+ when 'poly'
115
+ Rumale::PairwiseMetric.polynomial_kernel(x, y, @params[:degree], @params[:gamma], @params[:coef])
116
+ when 'sigmoid'
117
+ Rumale::PairwiseMetric.sigmoid_kernel(x, y, @params[:gamma], @params[:coef])
118
+ when 'linear'
119
+ Rumale::PairwiseMetric.linear_kernel(x, y)
120
+ else
121
+ raise ArgumentError, "Expect kernel parameter to be given 'rbf', 'linear', 'poly', or 'sigmoid'."
122
+ end
123
+ end
104
124
  end
105
125
  end
106
126
  end