rumale 0.22.2 → 0.23.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.clang-format +149 -0
  3. data/.coveralls.yml +1 -0
  4. data/.github/workflows/build.yml +5 -2
  5. data/.github/workflows/coverage.yml +30 -0
  6. data/.gitignore +1 -0
  7. data/CHANGELOG.md +38 -0
  8. data/Gemfile +3 -2
  9. data/LICENSE.txt +1 -1
  10. data/README.md +45 -8
  11. data/Rakefile +2 -1
  12. data/ext/rumale/extconf.rb +1 -1
  13. data/ext/rumale/{rumale.c → rumaleext.c} +2 -3
  14. data/ext/rumale/{rumale.h → rumaleext.h} +1 -1
  15. data/ext/rumale/tree.c +76 -96
  16. data/ext/rumale/tree.h +2 -0
  17. data/lib/rumale.rb +6 -1
  18. data/lib/rumale/base/base_estimator.rb +5 -3
  19. data/lib/rumale/dataset.rb +7 -3
  20. data/lib/rumale/decomposition/fast_ica.rb +1 -1
  21. data/lib/rumale/ensemble/gradient_boosting_classifier.rb +2 -2
  22. data/lib/rumale/ensemble/gradient_boosting_regressor.rb +1 -1
  23. data/lib/rumale/ensemble/random_forest_classifier.rb +1 -1
  24. data/lib/rumale/ensemble/random_forest_regressor.rb +1 -1
  25. data/lib/rumale/ensemble/stacking_classifier.rb +5 -4
  26. data/lib/rumale/ensemble/stacking_regressor.rb +3 -3
  27. data/lib/rumale/ensemble/voting_classifier.rb +126 -0
  28. data/lib/rumale/ensemble/voting_regressor.rb +82 -0
  29. data/lib/rumale/kernel_approximation/nystroem.rb +30 -10
  30. data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +92 -0
  31. data/lib/rumale/kernel_machine/kernel_svc.rb +1 -1
  32. data/lib/rumale/linear_model/elastic_net.rb +1 -1
  33. data/lib/rumale/linear_model/lasso.rb +1 -1
  34. data/lib/rumale/linear_model/linear_regression.rb +66 -35
  35. data/lib/rumale/linear_model/nnls.rb +137 -0
  36. data/lib/rumale/linear_model/ridge.rb +71 -34
  37. data/lib/rumale/model_selection/grid_search_cv.rb +3 -3
  38. data/lib/rumale/naive_bayes/bernoulli_nb.rb +1 -1
  39. data/lib/rumale/naive_bayes/gaussian_nb.rb +1 -1
  40. data/lib/rumale/naive_bayes/multinomial_nb.rb +1 -1
  41. data/lib/rumale/preprocessing/kernel_calculator.rb +92 -0
  42. data/lib/rumale/tree/base_decision_tree.rb +15 -10
  43. data/lib/rumale/tree/decision_tree_classifier.rb +14 -11
  44. data/lib/rumale/tree/decision_tree_regressor.rb +0 -1
  45. data/lib/rumale/tree/gradient_tree_regressor.rb +15 -11
  46. data/lib/rumale/validation.rb +12 -0
  47. data/lib/rumale/version.rb +1 -1
  48. metadata +13 -6
  49. data/.travis.yml +0 -17
data/ext/rumale/tree.h CHANGED
@@ -3,7 +3,9 @@
3
3
 
4
4
  #include <math.h>
5
5
  #include <string.h>
6
+
6
7
  #include <ruby.h>
8
+
7
9
  #include <numo/narray.h>
8
10
  #include <numo/template.h>
9
11
 
data/lib/rumale.rb CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  require 'numo/narray'
4
4
 
5
- require 'rumale/rumale'
5
+ require 'rumale/rumaleext'
6
6
 
7
7
  require 'rumale/version'
8
8
  require 'rumale/validation'
@@ -30,10 +30,12 @@ require 'rumale/linear_model/linear_regression'
30
30
  require 'rumale/linear_model/ridge'
31
31
  require 'rumale/linear_model/lasso'
32
32
  require 'rumale/linear_model/elastic_net'
33
+ require 'rumale/linear_model/nnls'
33
34
  require 'rumale/kernel_machine/kernel_svc'
34
35
  require 'rumale/kernel_machine/kernel_pca'
35
36
  require 'rumale/kernel_machine/kernel_fda'
36
37
  require 'rumale/kernel_machine/kernel_ridge'
38
+ require 'rumale/kernel_machine/kernel_ridge_classifier'
37
39
  require 'rumale/multiclass/one_vs_rest_classifier'
38
40
  require 'rumale/nearest_neighbors/vp_tree'
39
41
  require 'rumale/nearest_neighbors/k_neighbors_classifier'
@@ -61,6 +63,8 @@ require 'rumale/ensemble/extra_trees_classifier'
61
63
  require 'rumale/ensemble/extra_trees_regressor'
62
64
  require 'rumale/ensemble/stacking_classifier'
63
65
  require 'rumale/ensemble/stacking_regressor'
66
+ require 'rumale/ensemble/voting_classifier'
67
+ require 'rumale/ensemble/voting_regressor'
64
68
  require 'rumale/clustering/k_means'
65
69
  require 'rumale/clustering/mini_batch_k_means'
66
70
  require 'rumale/clustering/k_medoids'
@@ -100,6 +104,7 @@ require 'rumale/preprocessing/one_hot_encoder'
100
104
  require 'rumale/preprocessing/ordinal_encoder'
101
105
  require 'rumale/preprocessing/binarizer'
102
106
  require 'rumale/preprocessing/polynomial_features'
107
+ require 'rumale/preprocessing/kernel_calculator'
103
108
  require 'rumale/model_selection/k_fold'
104
109
  require 'rumale/model_selection/group_k_fold'
105
110
  require 'rumale/model_selection/stratified_k_fold'
@@ -11,13 +11,15 @@ module Rumale
11
11
 
12
12
  private
13
13
 
14
- def enable_linalg?
14
+ def enable_linalg?(warning: true)
15
15
  if defined?(Numo::Linalg).nil?
16
- warn('If you want to use features that depend on Numo::Linalg, you should install and load Numo::Linalg in advance.')
16
+ warn('If you want to use features that depend on Numo::Linalg, you should install and load Numo::Linalg in advance.') if warning
17
17
  return false
18
18
  end
19
19
  if Numo::Linalg::VERSION < '0.1.4'
20
- warn('The loaded Numo::Linalg does not implement the methods required by Rumale. Please load Numo::Linalg version 0.1.4 or later.')
20
+ if warning
21
+ warn('The loaded Numo::Linalg does not implement the methods required by Rumale. Please load Numo::Linalg version 0.1.4 or later.')
22
+ end
21
23
  return false
22
24
  end
23
25
  true
@@ -12,22 +12,26 @@ module Rumale
12
12
  # Load a dataset with the libsvm file format into Numo::NArray.
13
13
  #
14
14
  # @param filename [String] A path to a dataset file.
15
+ # @param n_features [Integer/Nil] The number of features of data to load.
16
+ # If nil is given, it will be detected automatically from given file.
15
17
  # @param zero_based [Boolean] Whether the column index starts from 0 (true) or 1 (false).
16
18
  # @param dtype [Numo::NArray] Data type of Numo::NArray for features to be loaded.
17
19
  #
18
20
  # @return [Array<Numo::NArray>]
19
21
  # Returns array containing the (n_samples x n_features) matrix for feature vectors
20
22
  # and (n_samples) vector for labels or target values.
21
- def load_libsvm_file(filename, zero_based: false, dtype: Numo::DFloat)
23
+ def load_libsvm_file(filename, n_features: nil, zero_based: false, dtype: Numo::DFloat)
22
24
  ftvecs = []
23
25
  labels = []
24
- n_features = 0
26
+ n_features_detected = 0
25
27
  CSV.foreach(filename, col_sep: "\s", headers: false) do |line|
26
28
  label, ftvec, max_idx = parse_libsvm_line(line, zero_based)
27
29
  labels.push(label)
28
30
  ftvecs.push(ftvec)
29
- n_features = max_idx if n_features < max_idx
31
+ n_features_detected = max_idx if n_features_detected < max_idx
30
32
  end
33
+ n_features ||= n_features_detected
34
+ n_features = [n_features, n_features_detected].max
31
35
  [convert_to_matrix(ftvecs, n_features, dtype), Numo::NArray.asarray(labels)]
32
36
  end
33
37
 
@@ -81,7 +81,7 @@ module Rumale
81
81
  wx = @params[:whiten] ? (x - @mean).dot(whiten_mat.transpose) : x
82
82
  unmixing, @n_iter = ica(wx, @params[:fun], @params[:max_iter], @params[:tol], @rng.dup)
83
83
  @components = @params[:whiten] ? unmixing.dot(whiten_mat) : unmixing
84
- @mixing = Numo::Linalg.pinv(@components)
84
+ @mixing = Numo::Linalg.pinv(@components).dup
85
85
  if @params[:n_components] == 1
86
86
  @components = @components.flatten.dup
87
87
  @mixing = @mixing.flatten.dup
@@ -161,7 +161,7 @@ module Rumale
161
161
 
162
162
  proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
163
163
 
164
- return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
164
+ return (proba.transpose / proba.sum(axis: 1)).transpose.dup if @classes.size > 2
165
165
 
166
166
  n_samples, = x.shape
167
167
  probs = Numo::DFloat.zeros(n_samples, 2)
@@ -182,7 +182,7 @@ module Rumale
182
182
  else
183
183
  @estimators.map { |tree| tree.apply(x) }
184
184
  end
185
- Numo::Int32[*leaf_ids].transpose
185
+ Numo::Int32[*leaf_ids].transpose.dup
186
186
  end
187
187
 
188
188
  private
@@ -144,7 +144,7 @@ module Rumale
144
144
  else
145
145
  @estimators.map { |tree| tree.apply(x) }
146
146
  end
147
- Numo::Int32[*leaf_ids].transpose
147
+ Numo::Int32[*leaf_ids].transpose.dup
148
148
  end
149
149
 
150
150
  private
@@ -159,7 +159,7 @@ module Rumale
159
159
  # @return [Numo::Int32] (shape: [n_samples, n_estimators]) Leaf index for sample.
160
160
  def apply(x)
161
161
  x = check_convert_sample_array(x)
162
- Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose
162
+ Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose.dup
163
163
  end
164
164
 
165
165
  private
@@ -136,7 +136,7 @@ module Rumale
136
136
  # @return [Numo::Int32] (shape: [n_samples, n_estimators]) Leaf index for sample.
137
137
  def apply(x)
138
138
  x = check_convert_sample_array(x)
139
- Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose
139
+ Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose.dup
140
140
  end
141
141
 
142
142
  private
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'rumale/base/base_estimator'
4
4
  require 'rumale/base/classifier'
5
+ require 'rumale/preprocessing/label_encoder'
5
6
 
6
7
  module Rumale
7
8
  module Ensemble
@@ -10,18 +11,18 @@ module Rumale
10
11
  # @example
11
12
  # estimators = {
12
13
  # lgr: Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-2, random_seed: 1),
13
- # mlp: Rumele::NeuralNetwork::MLPClassifier.new(hidden_units: [256], random_seed: 1),
14
+ # mlp: Rumale::NeuralNetwork::MLPClassifier.new(hidden_units: [256], random_seed: 1),
14
15
  # rnd: Rumale::Ensemble::RandomForestClassifier.new(random_seed: 1)
15
16
  # }
16
17
  # meta_estimator = Rumale::LinearModel::LogisticRegression.new(random_seed: 1)
17
18
  # classifier = Rumale::Ensemble::StackedClassifier.new(
18
19
  # estimators: estimators, meta_estimator: meta_estimator, random_seed: 1
19
20
  # )
20
- # classifier.fit(training_samples, traininig_labels)
21
+ # classifier.fit(training_samples, training_labels)
21
22
  # results = classifier.predict(testing_samples)
22
23
  #
23
24
  # *Reference*
24
- # - Zhou, Z-H., "Ensemble Mehotds - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
25
+ # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
25
26
  class StackingClassifier
26
27
  include Base::BaseEstimator
27
28
  include Base::Classifier
@@ -149,7 +150,7 @@ module Rumale
149
150
 
150
151
  # Predict probability for samples.
151
152
  #
152
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
153
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probabilities.
153
154
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The predicted probability of each class per sample.
154
155
  def predict_proba(x)
155
156
  x = check_convert_sample_array(x)
@@ -10,18 +10,18 @@ module Rumale
10
10
  # @example
11
11
  # estimators = {
12
12
  # las: Rumale::LinearModel::Lasso.new(reg_param: 1e-2, random_seed: 1),
13
- # mlp: Rumele::NeuralNetwork::MLPRegressor.new(hidden_units: [256], random_seed: 1),
13
+ # mlp: Rumale::NeuralNetwork::MLPRegressor.new(hidden_units: [256], random_seed: 1),
14
14
  # rnd: Rumale::Ensemble::RandomForestRegressor.new(random_seed: 1)
15
15
  # }
16
16
  # meta_estimator = Rumale::LinearModel::Ridge.new(random_seed: 1)
17
17
  # regressor = Rumale::Ensemble::StackedRegressor.new(
18
18
  # estimators: estimators, meta_estimator: meta_estimator, random_seed: 1
19
19
  # )
20
- # regressor.fit(training_samples, traininig_values)
20
+ # regressor.fit(training_samples, training_values)
21
21
  # results = regressor.predict(testing_samples)
22
22
  #
23
23
  # *Reference*
24
- # - Zhou, Z-H., "Ensemble Mehotds - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
24
+ # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
25
25
  class StackingRegressor
26
26
  include Base::BaseEstimator
27
27
  include Base::Regressor
@@ -0,0 +1,126 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/classifier'
5
+ require 'rumale/preprocessing/label_encoder'
6
+
7
+ module Rumale
8
+ module Ensemble
9
+ # VotingClassifier is a class that implements classifier with voting ensemble method.
10
+ #
11
+ # @example
12
+ # estimators = {
13
+ # lgr: Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-2, random_seed: 1),
14
+ # mlp: Rumale::NeuralNetwork::MLPClassifier.new(hidden_units: [256], random_seed: 1),
15
+ # rnd: Rumale::Ensemble::RandomForestClassifier.new(random_seed: 1)
16
+ # }
17
+ # weights = { lgr: 0.2, mlp: 0.3, rnd: 0.5 }
18
+ #
19
+ # classifier = Rumale::Ensemble::VotingClassifier.new(estimators: estimators, weights: weights, voting: 'soft')
20
+ # classifier.fit(x_train, y_train)
21
+ # results = classifier.predict(x_test)
22
+ #
23
+ # *Reference*
24
+ # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
25
+ class VotingClassifier
26
+ include Base::BaseEstimator
27
+ include Base::Classifier
28
+
29
+ # Return the sub-classifiers that voted.
30
+ # @return [Hash<Symbol,Classifier>]
31
+ attr_reader :estimators
32
+
33
+ # Return the class labels.
34
+ # @return [Numo::Int32] (size: n_classes)
35
+ attr_reader :classes
36
+
37
+ # Create a new ensembled classifier with voting rule.
38
+ #
39
+ # @param estimators [Hash<Symbol,Classifier>] The sub-classifiers to vote.
40
+ # @param weights [Hash<Symbol,Float>] The weight value for each classifier.
41
+ # @param voting [String] The voting rule for the predicted results of each classifier.
42
+ # If 'hard' is given, the ensembled classifier predicts the class label by majority vote.
43
+ # If 'soft' is given, the ensembled classifier uses the weighted average of predicted probabilities for the prediction.
44
+ def initialize(estimators:, weights: nil, voting: 'hard')
45
+ check_params_type(Hash, estimators: estimators)
46
+ check_params_type_or_nil(Hash, weights: weights)
47
+ check_params_string(voting: voting)
48
+ @estimators = estimators
49
+ @classes = nil
50
+ @params = {}
51
+ @params[:weights] = weights || estimators.each_key.with_object({}) { |name, w| w[name] = 1.0 }
52
+ @params[:voting] = voting
53
+ end
54
+
55
+ # Fit the model with given training data.
56
+ #
57
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
58
+ # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
59
+ # @return [VotingClassifier] The learned classifier itself.
60
+ def fit(x, y)
61
+ x = check_convert_sample_array(x)
62
+ y = check_convert_label_array(y)
63
+ check_sample_label_size(x, y)
64
+
65
+ @encoder = Rumale::Preprocessing::LabelEncoder.new
66
+ y_encoded = @encoder.fit_transform(y)
67
+ @classes = Numo::NArray[*@encoder.classes]
68
+ @estimators.each_key { |name| @estimators[name].fit(x, y_encoded) }
69
+
70
+ self
71
+ end
72
+
73
+ # Calculate confidence scores for samples.
74
+ #
75
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
76
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
77
+ def decision_function(x)
78
+ x = check_convert_sample_array(x)
79
+ return predict_proba(x) if soft_voting?
80
+
81
+ n_samples = x.shape[0]
82
+ n_classes = @classes.size
83
+ z = Numo::DFloat.zeros(n_samples, n_classes)
84
+ @estimators.each do |name, estimator|
85
+ estimator.predict(x).to_a.each_with_index { |c, i| z[i, c] += @params[:weights][name] }
86
+ end
87
+ z
88
+ end
89
+
90
+ # Predict class labels for samples.
91
+ #
92
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
93
+ # @return [Numo::Int32] (shape: [n_samples]) The predicted class label per sample.
94
+ def predict(x)
95
+ x = check_convert_sample_array(x)
96
+ n_samples = x.shape[0]
97
+ n_classes = @classes.size
98
+ z = decision_function(x)
99
+ predicted = z.max_index(axis: 1) - Numo::Int32.new(n_samples).seq * n_classes
100
+ Numo::Int32.cast(@encoder.inverse_transform(predicted))
101
+ end
102
+
103
+ # Predict probability for samples.
104
+ #
105
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probabilities.
106
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
107
+ def predict_proba(x)
108
+ x = check_convert_sample_array(x)
109
+ n_samples = x.shape[0]
110
+ n_classes = @classes.size
111
+ z = Numo::DFloat.zeros(n_samples, n_classes)
112
+ sum_weight = @params[:weights].each_value.inject(&:+)
113
+ @estimators.each do |name, estimator|
114
+ z += @params[:weights][name] * estimator.predict_proba(x)
115
+ end
116
+ z /= sum_weight
117
+ end
118
+
119
+ private
120
+
121
+ def soft_voting?
122
+ @params[:voting] == 'soft'
123
+ end
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/regressor'
5
+
6
+ module Rumale
7
+ module Ensemble
8
+ # VotingRegressor is a class that implements regressor with voting ensemble method.
9
+ #
10
+ # @example
11
+ # estimators = {
12
+ # rdg: Rumale::LinearModel::Ridge.new(reg_param: 1e-2, random_seed: 1),
13
+ # mlp: Rumale::NeuralNetwork::MLPRegressor.new(hidden_units: [256], random_seed: 1),
14
+ # rnd: Rumale::Ensemble::RandomForestRegressor.new(random_seed: 1)
15
+ # }
16
+ # weights = { rdg: 0.2, mlp: 0.3, rnd: 0.5 }
17
+ #
18
+ # regressor = Rumale::Ensemble::VotingRegressor.new(estimators: estimators, weights: weights, voting: 'soft')
19
+ # regressor.fit(x_train, y_train)
20
+ # results = regressor.predict(x_test)
21
+ #
22
+ # *Reference*
23
+ # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
24
+ class VotingRegressor
25
+ include Base::BaseEstimator
26
+ include Base::Regressor
27
+
28
+ # Return the sub-regressors that voted.
29
+ # @return [Hash<Symbol,Regressor>]
30
+ attr_reader :estimators
31
+
32
+ # Create a new ensembled regressor with voting rule.
33
+ #
34
+ # @param estimators [Hash<Symbol,Regressor>] The sub-regressors to vote.
35
+ # @param weights [Hash<Symbol,Float>] The weight value for each regressor.
36
+ def initialize(estimators:, weights: nil)
37
+ check_params_type(Hash, estimators: estimators)
38
+ check_params_type_or_nil(Hash, weights: weights)
39
+ @estimators = estimators
40
+ @n_outputs = nil
41
+ @params = {}
42
+ @params[:weights] = weights || estimators.each_key.with_object({}) { |name, w| w[name] = 1.0 }
43
+ end
44
+
45
+ # Fit the model with given training data.
46
+ #
47
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
48
+ # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
49
+ # @return [VotingRegressor] The learned regressor itself.
50
+ def fit(x, y)
51
+ x = check_convert_sample_array(x)
52
+ y = check_convert_tvalue_array(y)
53
+ check_sample_tvalue_size(x, y)
54
+
55
+ @n_outputs = y.ndim > 1 ? y.shape[1] : 1
56
+ @estimators.each_key { |name| @estimators[name].fit(x, y) }
57
+
58
+ self
59
+ end
60
+
61
+ # Predict values for samples.
62
+ #
63
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
64
+ # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted value per sample.
65
+ def predict(x)
66
+ x = check_convert_sample_array(x)
67
+ z = single_target? ? Numo::DFloat.zeros(x.shape[0]) : Numo::DFloat.zeros(x.shape[0], @n_outputs)
68
+ sum_weight = @params[:weights].each_value.inject(&:+)
69
+ @estimators.each do |name, estimator|
70
+ z += @params[:weights][name] * estimator.predict(x)
71
+ end
72
+ z / sum_weight
73
+ end
74
+
75
+ private
76
+
77
+ def single_target?
78
+ @n_outputs == 1
79
+ end
80
+ end
81
+ end
82
+ end
@@ -11,7 +11,7 @@ module Rumale
11
11
  # @example
12
12
  # require 'numo/linalg/autoloader'
13
13
  #
14
- # transformer = Rumale::KernelApproximation::Nystroem.new(gamma: 1, n_components: 128, random_seed: 1)
14
+ # transformer = Rumale::KernelApproximation::Nystroem.new(kernel: 'rbf', gamma: 1, n_components: 128, random_seed: 1)
15
15
  # new_training_samples = transformer.fit_transform(training_samples)
16
16
  # new_testing_samples = transformer.transform(testing_samples)
17
17
  #
@@ -39,12 +39,15 @@ module Rumale
39
39
 
40
40
  # Create a new transformer for mapping to kernel feature space with Nystrom method.
41
41
  #
42
- # @param kernel [String] The type of kernel. This parameter is ignored in the current implementation.
43
- # @param gamma [Float] The parameter of RBF kernel: exp(-gamma * x^2).
44
- # @param n_components [Integer] The number of dimensions of the RBF kernel feature space.
42
+ # @param kernel [String] The type of kernel function ('rbf', 'linear', 'poly', and 'sigmoid)
43
+ # @param gamma [Float] The gamma parameter in rbf/poly/sigmoid kernel function.
44
+ # @param degree [Integer] The degree parameter in polynomial kernel function.
45
+ # @param coef [Float] The coefficient in poly/sigmoid kernel function.
46
+ # @param n_components [Integer] The number of dimensions of the kernel feature space.
45
47
  # @param random_seed [Integer] The seed value using to initialize the random generator.
46
- def initialize(kernel: 'rbf', gamma: 1, n_components: 100, random_seed: nil)
47
- check_params_numeric(gamma: gamma, n_components: n_components)
48
+ def initialize(kernel: 'rbf', gamma: 1, degree: 3, coef: 1, n_components: 100, random_seed: nil)
49
+ check_params_string(kernel: kernel)
50
+ check_params_numeric(gamma: gamma, coef: coef, degree: degree, n_components: n_components)
48
51
  check_params_numeric_or_nil(random_seed: random_seed)
49
52
  @params = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h
50
53
  @params[:random_seed] ||= srand
@@ -56,7 +59,7 @@ module Rumale
56
59
 
57
60
  # Fit the model with given training data.
58
61
  #
59
- # @overload fit(x) -> RBF
62
+ # @overload fit(x) -> Nystroem
60
63
  # @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
61
64
  # @return [Nystroem] The learned transformer itself.
62
65
  def fit(x, _y = nil)
@@ -70,10 +73,10 @@ module Rumale
70
73
 
71
74
  # random sampling.
72
75
  @component_indices = Numo::Int32.cast(Array(0...n_samples).shuffle(random: sub_rng)[0...n_components])
73
- @components = x[@component_indices, true]
76
+ @components = x[@component_indices, true].dup
74
77
 
75
78
  # calculate normalizing factor.
76
- kernel_mat = Rumale::PairwiseMetric.rbf_kernel(@components, nil, @params[:gamma])
79
+ kernel_mat = kernel_mat(@components)
77
80
  eig_vals, eig_vecs = Numo::Linalg.eigh(kernel_mat)
78
81
  la = eig_vals.class.maximum(eig_vals.reverse, 1e-12)
79
82
  u = eig_vecs.reverse(1)
@@ -98,9 +101,26 @@ module Rumale
98
101
  # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
99
102
  def transform(x)
100
103
  x = check_convert_sample_array(x)
101
- z = Rumale::PairwiseMetric.rbf_kernel(x, @components, @params[:gamma])
104
+ z = kernel_mat(x, @components)
102
105
  z.dot(@normalizer)
103
106
  end
107
+
108
+ private
109
+
110
+ def kernel_mat(x, y = nil)
111
+ case @params[:kernel]
112
+ when 'rbf'
113
+ Rumale::PairwiseMetric.rbf_kernel(x, y, @params[:gamma])
114
+ when 'poly'
115
+ Rumale::PairwiseMetric.polynomial_kernel(x, y, @params[:degree], @params[:gamma], @params[:coef])
116
+ when 'sigmoid'
117
+ Rumale::PairwiseMetric.sigmoid_kernel(x, y, @params[:gamma], @params[:coef])
118
+ when 'linear'
119
+ Rumale::PairwiseMetric.linear_kernel(x, y)
120
+ else
121
+ raise ArgumentError, "Expect kernel parameter to be given 'rbf', 'linear', 'poly', or 'sigmoid'."
122
+ end
123
+ end
104
124
  end
105
125
  end
106
126
  end