rumale 0.22.2 → 0.23.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.clang-format +149 -0
  3. data/.coveralls.yml +1 -0
  4. data/.github/workflows/build.yml +5 -2
  5. data/.github/workflows/coverage.yml +30 -0
  6. data/.gitignore +1 -0
  7. data/CHANGELOG.md +38 -0
  8. data/Gemfile +3 -2
  9. data/LICENSE.txt +1 -1
  10. data/README.md +45 -8
  11. data/Rakefile +2 -1
  12. data/ext/rumale/extconf.rb +1 -1
  13. data/ext/rumale/{rumale.c → rumaleext.c} +2 -3
  14. data/ext/rumale/{rumale.h → rumaleext.h} +1 -1
  15. data/ext/rumale/tree.c +76 -96
  16. data/ext/rumale/tree.h +2 -0
  17. data/lib/rumale.rb +6 -1
  18. data/lib/rumale/base/base_estimator.rb +5 -3
  19. data/lib/rumale/dataset.rb +7 -3
  20. data/lib/rumale/decomposition/fast_ica.rb +1 -1
  21. data/lib/rumale/ensemble/gradient_boosting_classifier.rb +2 -2
  22. data/lib/rumale/ensemble/gradient_boosting_regressor.rb +1 -1
  23. data/lib/rumale/ensemble/random_forest_classifier.rb +1 -1
  24. data/lib/rumale/ensemble/random_forest_regressor.rb +1 -1
  25. data/lib/rumale/ensemble/stacking_classifier.rb +5 -4
  26. data/lib/rumale/ensemble/stacking_regressor.rb +3 -3
  27. data/lib/rumale/ensemble/voting_classifier.rb +126 -0
  28. data/lib/rumale/ensemble/voting_regressor.rb +82 -0
  29. data/lib/rumale/kernel_approximation/nystroem.rb +30 -10
  30. data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +92 -0
  31. data/lib/rumale/kernel_machine/kernel_svc.rb +1 -1
  32. data/lib/rumale/linear_model/elastic_net.rb +1 -1
  33. data/lib/rumale/linear_model/lasso.rb +1 -1
  34. data/lib/rumale/linear_model/linear_regression.rb +66 -35
  35. data/lib/rumale/linear_model/nnls.rb +137 -0
  36. data/lib/rumale/linear_model/ridge.rb +71 -34
  37. data/lib/rumale/model_selection/grid_search_cv.rb +3 -3
  38. data/lib/rumale/naive_bayes/bernoulli_nb.rb +1 -1
  39. data/lib/rumale/naive_bayes/gaussian_nb.rb +1 -1
  40. data/lib/rumale/naive_bayes/multinomial_nb.rb +1 -1
  41. data/lib/rumale/preprocessing/kernel_calculator.rb +92 -0
  42. data/lib/rumale/tree/base_decision_tree.rb +15 -10
  43. data/lib/rumale/tree/decision_tree_classifier.rb +14 -11
  44. data/lib/rumale/tree/decision_tree_regressor.rb +0 -1
  45. data/lib/rumale/tree/gradient_tree_regressor.rb +15 -11
  46. data/lib/rumale/validation.rb +12 -0
  47. data/lib/rumale/version.rb +1 -1
  48. metadata +13 -6
  49. data/.travis.yml +0 -17
@@ -160,15 +160,15 @@ module Rumale
160
160
  grid = [grid] if grid.is_a?(Hash)
161
161
  grid.each do |h|
162
162
  raise TypeError, 'Expect class of elements in param_grid to be Hash' unless h.is_a?(Hash)
163
- raise TypeError, 'Expect class of parameter values in param_grid to be Array' unless h.values.all? { |v| v.is_a?(Array) }
163
+ raise TypeError, 'Expect class of parameter values in param_grid to be Array' unless h.values.all?(Array)
164
164
  end
165
165
  grid
166
166
  end
167
167
 
168
168
  def param_combinations
169
169
  @param_combinations ||= @params[:param_grid].map do |prm|
170
- x = Hash[prm.sort].map { |k, v| [k].product(v) }
171
- x[0].product(*x[1...x.size]).map { |v| Hash[v] }
170
+ x = prm.sort.to_h.map { |k, v| [k].product(v) }
171
+ x[0].product(*x[1...x.size]).map(&:to_h)
172
172
  end
173
173
  end
174
174
 
@@ -75,7 +75,7 @@ module Rumale
75
75
  (Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(1)
76
76
  (Numo::DFloat[*not_bin_x] * Numo::NMath.log(1.0 - @feature_probs[l, true])).sum(1))
77
77
  end
78
- Numo::DFloat[*log_likelihoods].transpose
78
+ Numo::DFloat[*log_likelihoods].transpose.dup
79
79
  end
80
80
  end
81
81
  end
@@ -62,7 +62,7 @@ module Rumale
62
62
  Numo::NMath.log(2.0 * Math::PI * @variances[l, true]) +
63
63
  ((x - @means[l, true])**2 / @variances[l, true])).sum(1)
64
64
  end
65
- Numo::DFloat[*log_likelihoods].transpose
65
+ Numo::DFloat[*log_likelihoods].transpose.dup
66
66
  end
67
67
  end
68
68
  end
@@ -67,7 +67,7 @@ module Rumale
67
67
  log_likelihoods = Array.new(n_classes) do |l|
68
68
  Math.log(@class_priors[l]) + (Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(1)
69
69
  end
70
- Numo::DFloat[*log_likelihoods].transpose
70
+ Numo::DFloat[*log_likelihoods].transpose.dup
71
71
  end
72
72
  end
73
73
  end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/pairwise_metric'
6
+
7
+ module Rumale
8
+ module Preprocessing
9
+ # KernelCalculator is a class that calculates the kernel matrix with training data.
10
+ #
11
+ # @example
12
+ # transformer = Rumale::Preprocessing::KernelCalculator.new(kernel: 'rbf', gamma: 0.5)
13
+ # regressor = Rumale::KernelMachine::KernelRidge.new
14
+ # pipeline = Rumale::Pipeline::Pipeline.new(
15
+ # steps: { trs: transfomer, est: regressor }
16
+ # )
17
+ # pipeline.fit(x_train, y_train)
18
+ # results = pipeline.predict(x_test)
19
+ class KernelCalculator
20
+ include Base::BaseEstimator
21
+ include Base::Transformer
22
+
23
+ # Returns the training data for calculating kernel matrix.
24
+ # @return [Numo::DFloat] (shape: n_components, n_features)
25
+ attr_reader :components
26
+
27
+ # Create a new transformer that transforms feature vectors into a kernel matrix.
28
+ #
29
+ # @param kernel [String] The type of kernel function ('rbf', 'linear', 'poly', and 'sigmoid').
30
+ # @param gamma [Float] The gamma parameter in rbf/poly/sigmoid kernel function.
31
+ # @param degree [Integer] The degree parameter in polynomial kernel function.
32
+ # @param coef [Float] The coefficient in poly/sigmoid kernel function.
33
+ def initialize(kernel: 'rbf', gamma: 1, degree: 3, coef: 1)
34
+ check_params_string(kernel: kernel)
35
+ check_params_numeric(gamma: gamma, coef: coef, degree: degree)
36
+ @params = {}
37
+ @params[:kernel] = kernel
38
+ @params[:gamma] = gamma
39
+ @params[:degree] = degree
40
+ @params[:coef] = coef
41
+ @components = nil
42
+ end
43
+
44
+ # Fit the model with given training data.
45
+ #
46
+ # @overload fit(x) -> KernelCalculator
47
+ # @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for calculating kernel matrix.
48
+ # @return [KernelCalculator] The learned transformer itself.
49
+ def fit(x, _y = nil)
50
+ x = check_convert_sample_array(x)
51
+ @components = x.dup
52
+ self
53
+ end
54
+
55
+ # Fit the model with training data, and then transform them with the learned model.
56
+ #
57
+ # @overload fit_transform(x) -> Numo::DFloat
58
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for calculating kernel matrix.
59
+ # @return [Numo::DFloat] (shape: [n_samples, n_samples]) The calculated kernel matrix.
60
+ def fit_transform(x, y = nil)
61
+ x = check_convert_sample_array(x)
62
+ fit(x, y).transform(x)
63
+ end
64
+
65
+ # Transform the given data with the learned model.
66
+ #
67
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be used for calculating kernel matrix with the training data.
68
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The calculated kernel matrix.
69
+ def transform(x)
70
+ x = check_convert_sample_array(x)
71
+ kernel_mat(x, @components)
72
+ end
73
+
74
+ private
75
+
76
+ def kernel_mat(x, y)
77
+ case @params[:kernel]
78
+ when 'rbf'
79
+ Rumale::PairwiseMetric.rbf_kernel(x, y, @params[:gamma])
80
+ when 'poly'
81
+ Rumale::PairwiseMetric.polynomial_kernel(x, y, @params[:degree], @params[:gamma], @params[:coef])
82
+ when 'sigmoid'
83
+ Rumale::PairwiseMetric.sigmoid_kernel(x, y, @params[:gamma], @params[:coef])
84
+ when 'linear'
85
+ Rumale::PairwiseMetric.linear_kernel(x, y)
86
+ else
87
+ raise ArgumentError, "Expect kernel parameter to be given 'rbf', 'linear', 'poly', or 'sigmoid'."
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'rumale/base/base_estimator'
4
4
  require 'rumale/tree/node'
5
+ require 'rumale/rumaleext'
5
6
 
6
7
  module Rumale
7
8
  # This module consists of the classes that implement tree models.
@@ -44,21 +45,25 @@ module Rumale
44
45
  # @return [Numo::Int32] (shape: [n_samples]) Leaf index for sample.
45
46
  def apply(x)
46
47
  x = check_convert_sample_array(x)
47
- Numo::Int32[*(Array.new(x.shape[0]) { |n| apply_at_node(@tree, x[n, true]) })]
48
+ Numo::Int32[*(Array.new(x.shape[0]) { |n| partial_apply(@tree, x[n, true]) })]
48
49
  end
49
50
 
50
51
  private
51
52
 
52
- def apply_at_node(node, sample)
53
- return node.leaf_id if node.leaf
54
- return apply_at_node(node.left, sample) if node.right.nil?
55
- return apply_at_node(node.right, sample) if node.left.nil?
56
-
57
- if sample[node.feature_id] <= node.threshold
58
- apply_at_node(node.left, sample)
59
- else
60
- apply_at_node(node.right, sample)
53
+ def partial_apply(tree, sample)
54
+ node = tree
55
+ until node.leaf
56
+ # :nocov:
57
+ node = if node.right.nil?
58
+ node.left
59
+ elsif node.left.nil?
60
+ node.right
61
+ # :nocov:
62
+ else
63
+ sample[node.feature_id] <= node.threshold ? node.left : node.right
64
+ end
61
65
  end
66
+ node.leaf_id
62
67
  end
63
68
 
64
69
  def build_tree(x, y)
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'rumale/rumale'
4
3
  require 'rumale/tree/base_decision_tree'
5
4
  require 'rumale/base/classifier'
6
5
 
@@ -101,21 +100,25 @@ module Rumale
101
100
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
102
101
  def predict_proba(x)
103
102
  x = check_convert_sample_array(x)
104
- Numo::DFloat[*(Array.new(x.shape[0]) { |n| predict_proba_at_node(@tree, x[n, true]) })]
103
+ Numo::DFloat[*(Array.new(x.shape[0]) { |n| partial_predict_proba(@tree, x[n, true]) })]
105
104
  end
106
105
 
107
106
  private
108
107
 
109
- def predict_proba_at_node(node, sample)
110
- return node.probs if node.leaf
111
- return predict_proba_at_node(node.left, sample) if node.right.nil?
112
- return predict_proba_at_node(node.right, sample) if node.left.nil?
113
-
114
- if sample[node.feature_id] <= node.threshold
115
- predict_proba_at_node(node.left, sample)
116
- else
117
- predict_proba_at_node(node.right, sample)
108
+ def partial_predict_proba(tree, sample)
109
+ node = tree
110
+ until node.leaf
111
+ # :nocov:
112
+ node = if node.right.nil?
113
+ node.left
114
+ elsif node.left.nil?
115
+ node.right
116
+ # :nocov:
117
+ else
118
+ sample[node.feature_id] <= node.threshold ? node.left : node.right
119
+ end
118
120
  end
121
+ node.probs
119
122
  end
120
123
 
121
124
  def stop_growing?(y)
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'rumale/rumale'
4
3
  require 'rumale/tree/base_decision_tree'
5
4
  require 'rumale/base/regressor'
6
5
 
@@ -1,8 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'rumale/rumale'
4
3
  require 'rumale/base/base_estimator'
5
4
  require 'rumale/base/regressor'
5
+ require 'rumale/rumaleext'
6
6
  require 'rumale/tree/node'
7
7
 
8
8
  module Rumale
@@ -114,21 +114,25 @@ module Rumale
114
114
  # @return [Numo::Int32] (shape: [n_samples]) Leaf index for sample.
115
115
  def apply(x)
116
116
  x = check_convert_sample_array(x)
117
- Numo::Int32[*(Array.new(x.shape[0]) { |n| apply_at_node(@tree, x[n, true]) })]
117
+ Numo::Int32[*(Array.new(x.shape[0]) { |n| partial_apply(@tree, x[n, true]) })]
118
118
  end
119
119
 
120
120
  private
121
121
 
122
- def apply_at_node(node, sample)
123
- return node.leaf_id if node.leaf
124
- return apply_at_node(node.left, sample) if node.right.nil?
125
- return apply_at_node(node.right, sample) if node.left.nil?
126
-
127
- if sample[node.feature_id] <= node.threshold
128
- apply_at_node(node.left, sample)
129
- else
130
- apply_at_node(node.right, sample)
122
+ def partial_apply(tree, sample)
123
+ node = tree
124
+ until node.leaf
125
+ # :nocov:
126
+ node = if node.right.nil?
127
+ node.left
128
+ elsif node.left.nil?
129
+ node.right
130
+ # :nocov:
131
+ else
132
+ sample[node.feature_id] <= node.threshold ? node.left : node.right
133
+ end
131
134
  end
135
+ node.leaf_id
132
136
  end
133
137
 
134
138
  def build_tree(x, y, g, h)
@@ -27,6 +27,7 @@ module Rumale
27
27
  y
28
28
  end
29
29
 
30
+ # @deprecated Use check_convert_sample_array instead of this method.
30
31
  # @!visibility private
31
32
  def check_sample_array(x)
32
33
  raise TypeError, 'Expect class of sample matrix to be Numo::DFloat' unless x.is_a?(Numo::DFloat)
@@ -35,6 +36,7 @@ module Rumale
35
36
  nil
36
37
  end
37
38
 
39
+ # @deprecated Use check_convert_label_array instead of this method.
38
40
  # @!visibility private
39
41
  def check_label_array(y)
40
42
  raise TypeError, 'Expect class of label vector to be Numo::Int32' unless y.is_a?(Numo::Int32)
@@ -43,6 +45,7 @@ module Rumale
43
45
  nil
44
46
  end
45
47
 
48
+ # @deprecated Use check_convert_tvalue_array instead of this method.
46
49
  # @!visibility private
47
50
  def check_tvalue_array(y)
48
51
  raise TypeError, 'Expect class of target value vector to be Numo::DFloat' unless y.is_a?(Numo::DFloat)
@@ -64,49 +67,58 @@ module Rumale
64
67
  nil
65
68
  end
66
69
 
70
+ # TODO: Better to replace with RBS in the future.
67
71
  # @!visibility private
68
72
  def check_params_type(type, params = {})
69
73
  params.each { |k, v| raise TypeError, "Expect class of #{k} to be #{type}" unless v.is_a?(type) }
70
74
  nil
71
75
  end
72
76
 
77
+ # TODO: Better to replace with RBS in the future.
73
78
  # @!visibility private
74
79
  def check_params_type_or_nil(type, params = {})
75
80
  params.each { |k, v| raise TypeError, "Expect class of #{k} to be #{type} or nil" unless v.is_a?(type) || v.is_a?(NilClass) }
76
81
  nil
77
82
  end
78
83
 
84
+ # TODO: Better to replace with RBS in the future.
79
85
  # @!visibility private
80
86
  def check_params_numeric(params = {})
81
87
  check_params_type(Numeric, params)
82
88
  end
83
89
 
90
+ # TODO: Better to replace with RBS in the future.
84
91
  # @!visibility private
85
92
  def check_params_numeric_or_nil(params = {})
86
93
  check_params_type_or_nil(Numeric, params)
87
94
  end
88
95
 
96
+ # @deprecated Use check_params_numeric instead of this method.
89
97
  # @!visibility private
90
98
  def check_params_float(params = {})
91
99
  check_params_type(Float, params)
92
100
  end
93
101
 
102
+ # @deprecated Use check_params_numeric instead of this method.
94
103
  # @!visibility private
95
104
  def check_params_integer(params = {})
96
105
  check_params_type(Integer, params)
97
106
  end
98
107
 
108
+ # TODO: Better to replace with RBS in the future.
99
109
  # @!visibility private
100
110
  def check_params_string(params = {})
101
111
  check_params_type(String, params)
102
112
  end
103
113
 
114
+ # TODO: Better to replace with RBS in the future.
104
115
  # @!visibility private
105
116
  def check_params_boolean(params = {})
106
117
  params.each { |k, v| raise TypeError, "Expect class of #{k} to be Boolean" unless v.is_a?(FalseClass) || v.is_a?(TrueClass) }
107
118
  nil
108
119
  end
109
120
 
121
+ # TODO: Better to replace with RBS in the future.
110
122
  # @!visibility private
111
123
  def check_params_positive(params = {})
112
124
  params.compact.each { |k, v| raise ArgumentError, "Expect #{k} to be positive value" if v.negative? }
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.22.2'
6
+ VERSION = '0.23.1'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.22.2
4
+ version: 0.23.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-01-10 00:00:00.000000000 Z
11
+ date: 2021-06-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -57,11 +57,13 @@ extensions:
57
57
  - ext/rumale/extconf.rb
58
58
  extra_rdoc_files: []
59
59
  files:
60
+ - ".clang-format"
61
+ - ".coveralls.yml"
60
62
  - ".github/workflows/build.yml"
63
+ - ".github/workflows/coverage.yml"
61
64
  - ".gitignore"
62
65
  - ".rspec"
63
66
  - ".rubocop.yml"
64
- - ".travis.yml"
65
67
  - CHANGELOG.md
66
68
  - CODE_OF_CONDUCT.md
67
69
  - Gemfile
@@ -69,8 +71,8 @@ files:
69
71
  - README.md
70
72
  - Rakefile
71
73
  - ext/rumale/extconf.rb
72
- - ext/rumale/rumale.c
73
- - ext/rumale/rumale.h
74
+ - ext/rumale/rumaleext.c
75
+ - ext/rumale/rumaleext.h
74
76
  - ext/rumale/tree.c
75
77
  - ext/rumale/tree.h
76
78
  - lib/rumale.rb
@@ -106,6 +108,8 @@ files:
106
108
  - lib/rumale/ensemble/random_forest_regressor.rb
107
109
  - lib/rumale/ensemble/stacking_classifier.rb
108
110
  - lib/rumale/ensemble/stacking_regressor.rb
111
+ - lib/rumale/ensemble/voting_classifier.rb
112
+ - lib/rumale/ensemble/voting_regressor.rb
109
113
  - lib/rumale/evaluation_measure/accuracy.rb
110
114
  - lib/rumale/evaluation_measure/adjusted_rand_score.rb
111
115
  - lib/rumale/evaluation_measure/calinski_harabasz_score.rb
@@ -135,12 +139,14 @@ files:
135
139
  - lib/rumale/kernel_machine/kernel_fda.rb
136
140
  - lib/rumale/kernel_machine/kernel_pca.rb
137
141
  - lib/rumale/kernel_machine/kernel_ridge.rb
142
+ - lib/rumale/kernel_machine/kernel_ridge_classifier.rb
138
143
  - lib/rumale/kernel_machine/kernel_svc.rb
139
144
  - lib/rumale/linear_model/base_sgd.rb
140
145
  - lib/rumale/linear_model/elastic_net.rb
141
146
  - lib/rumale/linear_model/lasso.rb
142
147
  - lib/rumale/linear_model/linear_regression.rb
143
148
  - lib/rumale/linear_model/logistic_regression.rb
149
+ - lib/rumale/linear_model/nnls.rb
144
150
  - lib/rumale/linear_model/ridge.rb
145
151
  - lib/rumale/linear_model/svc.rb
146
152
  - lib/rumale/linear_model/svr.rb
@@ -178,6 +184,7 @@ files:
178
184
  - lib/rumale/pipeline/pipeline.rb
179
185
  - lib/rumale/preprocessing/bin_discretizer.rb
180
186
  - lib/rumale/preprocessing/binarizer.rb
187
+ - lib/rumale/preprocessing/kernel_calculator.rb
181
188
  - lib/rumale/preprocessing/l1_normalizer.rb
182
189
  - lib/rumale/preprocessing/l2_normalizer.rb
183
190
  - lib/rumale/preprocessing/label_binarizer.rb
@@ -226,7 +233,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
226
233
  - !ruby/object:Gem::Version
227
234
  version: '0'
228
235
  requirements: []
229
- rubygems_version: 3.1.4
236
+ rubygems_version: 3.1.6
230
237
  signing_key:
231
238
  specification_version: 4
232
239
  summary: Rumale is a machine learning library in Ruby. Rumale provides machine learning