rumale 0.22.2 → 0.23.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.clang-format +149 -0
  3. data/.coveralls.yml +1 -0
  4. data/.github/workflows/build.yml +5 -2
  5. data/.github/workflows/coverage.yml +30 -0
  6. data/.gitignore +1 -0
  7. data/CHANGELOG.md +38 -0
  8. data/Gemfile +3 -2
  9. data/LICENSE.txt +1 -1
  10. data/README.md +45 -8
  11. data/Rakefile +2 -1
  12. data/ext/rumale/extconf.rb +1 -1
  13. data/ext/rumale/{rumale.c → rumaleext.c} +2 -3
  14. data/ext/rumale/{rumale.h → rumaleext.h} +1 -1
  15. data/ext/rumale/tree.c +76 -96
  16. data/ext/rumale/tree.h +2 -0
  17. data/lib/rumale.rb +6 -1
  18. data/lib/rumale/base/base_estimator.rb +5 -3
  19. data/lib/rumale/dataset.rb +7 -3
  20. data/lib/rumale/decomposition/fast_ica.rb +1 -1
  21. data/lib/rumale/ensemble/gradient_boosting_classifier.rb +2 -2
  22. data/lib/rumale/ensemble/gradient_boosting_regressor.rb +1 -1
  23. data/lib/rumale/ensemble/random_forest_classifier.rb +1 -1
  24. data/lib/rumale/ensemble/random_forest_regressor.rb +1 -1
  25. data/lib/rumale/ensemble/stacking_classifier.rb +5 -4
  26. data/lib/rumale/ensemble/stacking_regressor.rb +3 -3
  27. data/lib/rumale/ensemble/voting_classifier.rb +126 -0
  28. data/lib/rumale/ensemble/voting_regressor.rb +82 -0
  29. data/lib/rumale/kernel_approximation/nystroem.rb +30 -10
  30. data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +92 -0
  31. data/lib/rumale/kernel_machine/kernel_svc.rb +1 -1
  32. data/lib/rumale/linear_model/elastic_net.rb +1 -1
  33. data/lib/rumale/linear_model/lasso.rb +1 -1
  34. data/lib/rumale/linear_model/linear_regression.rb +66 -35
  35. data/lib/rumale/linear_model/nnls.rb +137 -0
  36. data/lib/rumale/linear_model/ridge.rb +71 -34
  37. data/lib/rumale/model_selection/grid_search_cv.rb +3 -3
  38. data/lib/rumale/naive_bayes/bernoulli_nb.rb +1 -1
  39. data/lib/rumale/naive_bayes/gaussian_nb.rb +1 -1
  40. data/lib/rumale/naive_bayes/multinomial_nb.rb +1 -1
  41. data/lib/rumale/preprocessing/kernel_calculator.rb +92 -0
  42. data/lib/rumale/tree/base_decision_tree.rb +15 -10
  43. data/lib/rumale/tree/decision_tree_classifier.rb +14 -11
  44. data/lib/rumale/tree/decision_tree_regressor.rb +0 -1
  45. data/lib/rumale/tree/gradient_tree_regressor.rb +15 -11
  46. data/lib/rumale/validation.rb +12 -0
  47. data/lib/rumale/version.rb +1 -1
  48. metadata +13 -6
  49. data/.travis.yml +0 -17
@@ -160,15 +160,15 @@ module Rumale
160
160
  grid = [grid] if grid.is_a?(Hash)
161
161
  grid.each do |h|
162
162
  raise TypeError, 'Expect class of elements in param_grid to be Hash' unless h.is_a?(Hash)
163
- raise TypeError, 'Expect class of parameter values in param_grid to be Array' unless h.values.all? { |v| v.is_a?(Array) }
163
+ raise TypeError, 'Expect class of parameter values in param_grid to be Array' unless h.values.all?(Array)
164
164
  end
165
165
  grid
166
166
  end
167
167
 
168
168
  def param_combinations
169
169
  @param_combinations ||= @params[:param_grid].map do |prm|
170
- x = Hash[prm.sort].map { |k, v| [k].product(v) }
171
- x[0].product(*x[1...x.size]).map { |v| Hash[v] }
170
+ x = prm.sort.to_h.map { |k, v| [k].product(v) }
171
+ x[0].product(*x[1...x.size]).map(&:to_h)
172
172
  end
173
173
  end
174
174
 
@@ -75,7 +75,7 @@ module Rumale
75
75
  (Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(1)
76
76
  (Numo::DFloat[*not_bin_x] * Numo::NMath.log(1.0 - @feature_probs[l, true])).sum(1))
77
77
  end
78
- Numo::DFloat[*log_likelihoods].transpose
78
+ Numo::DFloat[*log_likelihoods].transpose.dup
79
79
  end
80
80
  end
81
81
  end
@@ -62,7 +62,7 @@ module Rumale
62
62
  Numo::NMath.log(2.0 * Math::PI * @variances[l, true]) +
63
63
  ((x - @means[l, true])**2 / @variances[l, true])).sum(1)
64
64
  end
65
- Numo::DFloat[*log_likelihoods].transpose
65
+ Numo::DFloat[*log_likelihoods].transpose.dup
66
66
  end
67
67
  end
68
68
  end
@@ -67,7 +67,7 @@ module Rumale
67
67
  log_likelihoods = Array.new(n_classes) do |l|
68
68
  Math.log(@class_priors[l]) + (Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(1)
69
69
  end
70
- Numo::DFloat[*log_likelihoods].transpose
70
+ Numo::DFloat[*log_likelihoods].transpose.dup
71
71
  end
72
72
  end
73
73
  end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/pairwise_metric'
6
+
7
+ module Rumale
8
+ module Preprocessing
9
+ # KernelCalculator is a class that calculates the kernel matrix with training data.
10
+ #
11
+ # @example
12
+ # transformer = Rumale::Preprocessing::KernelCalculator.new(kernel: 'rbf', gamma: 0.5)
13
+ # regressor = Rumale::KernelMachine::KernelRidge.new
14
+ # pipeline = Rumale::Pipeline::Pipeline.new(
15
+ # steps: { trs: transfomer, est: regressor }
16
+ # )
17
+ # pipeline.fit(x_train, y_train)
18
+ # results = pipeline.predict(x_test)
19
+ class KernelCalculator
20
+ include Base::BaseEstimator
21
+ include Base::Transformer
22
+
23
+ # Returns the training data for calculating kernel matrix.
24
+ # @return [Numo::DFloat] (shape: n_components, n_features)
25
+ attr_reader :components
26
+
27
+ # Create a new transformer that transforms feature vectors into a kernel matrix.
28
+ #
29
+ # @param kernel [String] The type of kernel function ('rbf', 'linear', 'poly', and 'sigmoid').
30
+ # @param gamma [Float] The gamma parameter in rbf/poly/sigmoid kernel function.
31
+ # @param degree [Integer] The degree parameter in polynomial kernel function.
32
+ # @param coef [Float] The coefficient in poly/sigmoid kernel function.
33
+ def initialize(kernel: 'rbf', gamma: 1, degree: 3, coef: 1)
34
+ check_params_string(kernel: kernel)
35
+ check_params_numeric(gamma: gamma, coef: coef, degree: degree)
36
+ @params = {}
37
+ @params[:kernel] = kernel
38
+ @params[:gamma] = gamma
39
+ @params[:degree] = degree
40
+ @params[:coef] = coef
41
+ @components = nil
42
+ end
43
+
44
+ # Fit the model with given training data.
45
+ #
46
+ # @overload fit(x) -> KernelCalculator
47
+ # @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for calculating kernel matrix.
48
+ # @return [KernelCalculator] The learned transformer itself.
49
+ def fit(x, _y = nil)
50
+ x = check_convert_sample_array(x)
51
+ @components = x.dup
52
+ self
53
+ end
54
+
55
+ # Fit the model with training data, and then transform them with the learned model.
56
+ #
57
+ # @overload fit_transform(x) -> Numo::DFloat
58
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for calculating kernel matrix.
59
+ # @return [Numo::DFloat] (shape: [n_samples, n_samples]) The calculated kernel matrix.
60
+ def fit_transform(x, y = nil)
61
+ x = check_convert_sample_array(x)
62
+ fit(x, y).transform(x)
63
+ end
64
+
65
+ # Transform the given data with the learned model.
66
+ #
67
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be used for calculating kernel matrix with the training data.
68
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The calculated kernel matrix.
69
+ def transform(x)
70
+ x = check_convert_sample_array(x)
71
+ kernel_mat(x, @components)
72
+ end
73
+
74
+ private
75
+
76
+ def kernel_mat(x, y)
77
+ case @params[:kernel]
78
+ when 'rbf'
79
+ Rumale::PairwiseMetric.rbf_kernel(x, y, @params[:gamma])
80
+ when 'poly'
81
+ Rumale::PairwiseMetric.polynomial_kernel(x, y, @params[:degree], @params[:gamma], @params[:coef])
82
+ when 'sigmoid'
83
+ Rumale::PairwiseMetric.sigmoid_kernel(x, y, @params[:gamma], @params[:coef])
84
+ when 'linear'
85
+ Rumale::PairwiseMetric.linear_kernel(x, y)
86
+ else
87
+ raise ArgumentError, "Expect kernel parameter to be given 'rbf', 'linear', 'poly', or 'sigmoid'."
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'rumale/base/base_estimator'
4
4
  require 'rumale/tree/node'
5
+ require 'rumale/rumaleext'
5
6
 
6
7
  module Rumale
7
8
  # This module consists of the classes that implement tree models.
@@ -44,21 +45,25 @@ module Rumale
44
45
  # @return [Numo::Int32] (shape: [n_samples]) Leaf index for sample.
45
46
  def apply(x)
46
47
  x = check_convert_sample_array(x)
47
- Numo::Int32[*(Array.new(x.shape[0]) { |n| apply_at_node(@tree, x[n, true]) })]
48
+ Numo::Int32[*(Array.new(x.shape[0]) { |n| partial_apply(@tree, x[n, true]) })]
48
49
  end
49
50
 
50
51
  private
51
52
 
52
- def apply_at_node(node, sample)
53
- return node.leaf_id if node.leaf
54
- return apply_at_node(node.left, sample) if node.right.nil?
55
- return apply_at_node(node.right, sample) if node.left.nil?
56
-
57
- if sample[node.feature_id] <= node.threshold
58
- apply_at_node(node.left, sample)
59
- else
60
- apply_at_node(node.right, sample)
53
+ def partial_apply(tree, sample)
54
+ node = tree
55
+ until node.leaf
56
+ # :nocov:
57
+ node = if node.right.nil?
58
+ node.left
59
+ elsif node.left.nil?
60
+ node.right
61
+ # :nocov:
62
+ else
63
+ sample[node.feature_id] <= node.threshold ? node.left : node.right
64
+ end
61
65
  end
66
+ node.leaf_id
62
67
  end
63
68
 
64
69
  def build_tree(x, y)
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'rumale/rumale'
4
3
  require 'rumale/tree/base_decision_tree'
5
4
  require 'rumale/base/classifier'
6
5
 
@@ -101,21 +100,25 @@ module Rumale
101
100
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
102
101
  def predict_proba(x)
103
102
  x = check_convert_sample_array(x)
104
- Numo::DFloat[*(Array.new(x.shape[0]) { |n| predict_proba_at_node(@tree, x[n, true]) })]
103
+ Numo::DFloat[*(Array.new(x.shape[0]) { |n| partial_predict_proba(@tree, x[n, true]) })]
105
104
  end
106
105
 
107
106
  private
108
107
 
109
- def predict_proba_at_node(node, sample)
110
- return node.probs if node.leaf
111
- return predict_proba_at_node(node.left, sample) if node.right.nil?
112
- return predict_proba_at_node(node.right, sample) if node.left.nil?
113
-
114
- if sample[node.feature_id] <= node.threshold
115
- predict_proba_at_node(node.left, sample)
116
- else
117
- predict_proba_at_node(node.right, sample)
108
+ def partial_predict_proba(tree, sample)
109
+ node = tree
110
+ until node.leaf
111
+ # :nocov:
112
+ node = if node.right.nil?
113
+ node.left
114
+ elsif node.left.nil?
115
+ node.right
116
+ # :nocov:
117
+ else
118
+ sample[node.feature_id] <= node.threshold ? node.left : node.right
119
+ end
118
120
  end
121
+ node.probs
119
122
  end
120
123
 
121
124
  def stop_growing?(y)
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'rumale/rumale'
4
3
  require 'rumale/tree/base_decision_tree'
5
4
  require 'rumale/base/regressor'
6
5
 
@@ -1,8 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'rumale/rumale'
4
3
  require 'rumale/base/base_estimator'
5
4
  require 'rumale/base/regressor'
5
+ require 'rumale/rumaleext'
6
6
  require 'rumale/tree/node'
7
7
 
8
8
  module Rumale
@@ -114,21 +114,25 @@ module Rumale
114
114
  # @return [Numo::Int32] (shape: [n_samples]) Leaf index for sample.
115
115
  def apply(x)
116
116
  x = check_convert_sample_array(x)
117
- Numo::Int32[*(Array.new(x.shape[0]) { |n| apply_at_node(@tree, x[n, true]) })]
117
+ Numo::Int32[*(Array.new(x.shape[0]) { |n| partial_apply(@tree, x[n, true]) })]
118
118
  end
119
119
 
120
120
  private
121
121
 
122
- def apply_at_node(node, sample)
123
- return node.leaf_id if node.leaf
124
- return apply_at_node(node.left, sample) if node.right.nil?
125
- return apply_at_node(node.right, sample) if node.left.nil?
126
-
127
- if sample[node.feature_id] <= node.threshold
128
- apply_at_node(node.left, sample)
129
- else
130
- apply_at_node(node.right, sample)
122
+ def partial_apply(tree, sample)
123
+ node = tree
124
+ until node.leaf
125
+ # :nocov:
126
+ node = if node.right.nil?
127
+ node.left
128
+ elsif node.left.nil?
129
+ node.right
130
+ # :nocov:
131
+ else
132
+ sample[node.feature_id] <= node.threshold ? node.left : node.right
133
+ end
131
134
  end
135
+ node.leaf_id
132
136
  end
133
137
 
134
138
  def build_tree(x, y, g, h)
@@ -27,6 +27,7 @@ module Rumale
27
27
  y
28
28
  end
29
29
 
30
+ # @deprecated Use check_convert_sample_array instead of this method.
30
31
  # @!visibility private
31
32
  def check_sample_array(x)
32
33
  raise TypeError, 'Expect class of sample matrix to be Numo::DFloat' unless x.is_a?(Numo::DFloat)
@@ -35,6 +36,7 @@ module Rumale
35
36
  nil
36
37
  end
37
38
 
39
+ # @deprecated Use check_convert_label_array instead of this method.
38
40
  # @!visibility private
39
41
  def check_label_array(y)
40
42
  raise TypeError, 'Expect class of label vector to be Numo::Int32' unless y.is_a?(Numo::Int32)
@@ -43,6 +45,7 @@ module Rumale
43
45
  nil
44
46
  end
45
47
 
48
+ # @deprecated Use check_convert_tvalue_array instead of this method.
46
49
  # @!visibility private
47
50
  def check_tvalue_array(y)
48
51
  raise TypeError, 'Expect class of target value vector to be Numo::DFloat' unless y.is_a?(Numo::DFloat)
@@ -64,49 +67,58 @@ module Rumale
64
67
  nil
65
68
  end
66
69
 
70
+ # TODO: Better to replace with RBS in the future.
67
71
  # @!visibility private
68
72
  def check_params_type(type, params = {})
69
73
  params.each { |k, v| raise TypeError, "Expect class of #{k} to be #{type}" unless v.is_a?(type) }
70
74
  nil
71
75
  end
72
76
 
77
+ # TODO: Better to replace with RBS in the future.
73
78
  # @!visibility private
74
79
  def check_params_type_or_nil(type, params = {})
75
80
  params.each { |k, v| raise TypeError, "Expect class of #{k} to be #{type} or nil" unless v.is_a?(type) || v.is_a?(NilClass) }
76
81
  nil
77
82
  end
78
83
 
84
+ # TODO: Better to replace with RBS in the future.
79
85
  # @!visibility private
80
86
  def check_params_numeric(params = {})
81
87
  check_params_type(Numeric, params)
82
88
  end
83
89
 
90
+ # TODO: Better to replace with RBS in the future.
84
91
  # @!visibility private
85
92
  def check_params_numeric_or_nil(params = {})
86
93
  check_params_type_or_nil(Numeric, params)
87
94
  end
88
95
 
96
+ # @deprecated Use check_params_numeric instead of this method.
89
97
  # @!visibility private
90
98
  def check_params_float(params = {})
91
99
  check_params_type(Float, params)
92
100
  end
93
101
 
102
+ # @deprecated Use check_params_numeric instead of this method.
94
103
  # @!visibility private
95
104
  def check_params_integer(params = {})
96
105
  check_params_type(Integer, params)
97
106
  end
98
107
 
108
+ # TODO: Better to replace with RBS in the future.
99
109
  # @!visibility private
100
110
  def check_params_string(params = {})
101
111
  check_params_type(String, params)
102
112
  end
103
113
 
114
+ # TODO: Better to replace with RBS in the future.
104
115
  # @!visibility private
105
116
  def check_params_boolean(params = {})
106
117
  params.each { |k, v| raise TypeError, "Expect class of #{k} to be Boolean" unless v.is_a?(FalseClass) || v.is_a?(TrueClass) }
107
118
  nil
108
119
  end
109
120
 
121
+ # TODO: Better to replace with RBS in the future.
110
122
  # @!visibility private
111
123
  def check_params_positive(params = {})
112
124
  params.compact.each { |k, v| raise ArgumentError, "Expect #{k} to be positive value" if v.negative? }
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.22.2'
6
+ VERSION = '0.23.1'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.22.2
4
+ version: 0.23.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-01-10 00:00:00.000000000 Z
11
+ date: 2021-06-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -57,11 +57,13 @@ extensions:
57
57
  - ext/rumale/extconf.rb
58
58
  extra_rdoc_files: []
59
59
  files:
60
+ - ".clang-format"
61
+ - ".coveralls.yml"
60
62
  - ".github/workflows/build.yml"
63
+ - ".github/workflows/coverage.yml"
61
64
  - ".gitignore"
62
65
  - ".rspec"
63
66
  - ".rubocop.yml"
64
- - ".travis.yml"
65
67
  - CHANGELOG.md
66
68
  - CODE_OF_CONDUCT.md
67
69
  - Gemfile
@@ -69,8 +71,8 @@ files:
69
71
  - README.md
70
72
  - Rakefile
71
73
  - ext/rumale/extconf.rb
72
- - ext/rumale/rumale.c
73
- - ext/rumale/rumale.h
74
+ - ext/rumale/rumaleext.c
75
+ - ext/rumale/rumaleext.h
74
76
  - ext/rumale/tree.c
75
77
  - ext/rumale/tree.h
76
78
  - lib/rumale.rb
@@ -106,6 +108,8 @@ files:
106
108
  - lib/rumale/ensemble/random_forest_regressor.rb
107
109
  - lib/rumale/ensemble/stacking_classifier.rb
108
110
  - lib/rumale/ensemble/stacking_regressor.rb
111
+ - lib/rumale/ensemble/voting_classifier.rb
112
+ - lib/rumale/ensemble/voting_regressor.rb
109
113
  - lib/rumale/evaluation_measure/accuracy.rb
110
114
  - lib/rumale/evaluation_measure/adjusted_rand_score.rb
111
115
  - lib/rumale/evaluation_measure/calinski_harabasz_score.rb
@@ -135,12 +139,14 @@ files:
135
139
  - lib/rumale/kernel_machine/kernel_fda.rb
136
140
  - lib/rumale/kernel_machine/kernel_pca.rb
137
141
  - lib/rumale/kernel_machine/kernel_ridge.rb
142
+ - lib/rumale/kernel_machine/kernel_ridge_classifier.rb
138
143
  - lib/rumale/kernel_machine/kernel_svc.rb
139
144
  - lib/rumale/linear_model/base_sgd.rb
140
145
  - lib/rumale/linear_model/elastic_net.rb
141
146
  - lib/rumale/linear_model/lasso.rb
142
147
  - lib/rumale/linear_model/linear_regression.rb
143
148
  - lib/rumale/linear_model/logistic_regression.rb
149
+ - lib/rumale/linear_model/nnls.rb
144
150
  - lib/rumale/linear_model/ridge.rb
145
151
  - lib/rumale/linear_model/svc.rb
146
152
  - lib/rumale/linear_model/svr.rb
@@ -178,6 +184,7 @@ files:
178
184
  - lib/rumale/pipeline/pipeline.rb
179
185
  - lib/rumale/preprocessing/bin_discretizer.rb
180
186
  - lib/rumale/preprocessing/binarizer.rb
187
+ - lib/rumale/preprocessing/kernel_calculator.rb
181
188
  - lib/rumale/preprocessing/l1_normalizer.rb
182
189
  - lib/rumale/preprocessing/l2_normalizer.rb
183
190
  - lib/rumale/preprocessing/label_binarizer.rb
@@ -226,7 +233,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
226
233
  - !ruby/object:Gem::Version
227
234
  version: '0'
228
235
  requirements: []
229
- rubygems_version: 3.1.4
236
+ rubygems_version: 3.1.6
230
237
  signing_key:
231
238
  specification_version: 4
232
239
  summary: Rumale is a machine learning library in Ruby. Rumale provides machine learning