rumale 0.22.3 → 0.22.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2bcd9baeafc1a271f75ccd74123f50ebd9d4fbe9065c2583f376c562f8e49155
4
- data.tar.gz: 937dda6bbe4c41953f1e6eb1ea205eaa54277ae9f4202fa8a1e7e789348a76ad
3
+ metadata.gz: 4936b7c7b0ed920383f88743f8eba2e827d586dae471e40a6974dd1fe19342fe
4
+ data.tar.gz: 5a33c242b3cd881b0003db5e5f2d77905d0571442eb7494a64dff08262ce0c14
5
5
  SHA512:
6
- metadata.gz: cbad4cc283bb449116b360bc4ef8002928add3399005bcc30aaccdf95ea03233f0d035862de643b4aa4d688eedbeaaa7dc029c67a2336156d7e03c9435468cfa
7
- data.tar.gz: 83bfa0f53d7c0e094f271bfb3ddfef21ca58d41d77e1278886b5e26216a5b614629c9be33bc587bccc62e280612c75dbd0356fce772a727ed8cc003f86a03976
6
+ metadata.gz: b45a243c247610d918eeb6cfbb31c461e5773b5404c989fe7e0b8758e0482d165ea1e0cf1d61642d71233458821e1b92e45eb6ff0d0fcb11080c6c1e9692ef91
7
+ data.tar.gz: feddfc807995b08e753b1ad635901f2db8e806e300478a1f6bdb24a5bf1123cb7fbd0ee402da92ddcdd079a8ad653eec4224e22be9d2c6609ea73ea84bc47ca1
@@ -6,8 +6,9 @@ jobs:
6
6
  build:
7
7
  runs-on: ubuntu-latest
8
8
  strategy:
9
+ fail-fast: false
9
10
  matrix:
10
- ruby: [ '2.5', '2.6', '2.7' ]
11
+ ruby: [ '2.5', '2.6', '2.7', '3.0' ]
11
12
  steps:
12
13
  - uses: actions/checkout@v2
13
14
  - name: Install BLAS and LAPACK
@@ -17,7 +18,9 @@ jobs:
17
18
  with:
18
19
  ruby-version: ${{ matrix.ruby }}
19
20
  - name: Build and test with Rake
21
+ env:
22
+ LD_LIBRARY_PATH: '/usr/lib/x86_64-linux-gnu/'
20
23
  run: |
21
- gem install bundler
24
+ gem install --no-document bundler
22
25
  bundle install --jobs 4 --retry 3
23
26
  bundle exec rake
data/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ # 0.22.4
2
+ - Add classifier and regressor classes for voting ensemble method.
3
+ - [VotingClassifier](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/VotingClassifier.html)
4
+ - [VotingRegressor](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/VotingRegressor.html)
5
+ - Refactor some codes.
6
+ - Fix some typos on API documentation.
7
+
1
8
  # 0.22.3
2
9
  - Add regressor class for non-negative least square method.
3
10
  - [NNLS](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/NNLS.html)
data/Gemfile CHANGED
@@ -6,7 +6,7 @@ gemspec
6
6
  gem 'mmh3', '>= 1.0'
7
7
  gem 'numo-linalg', '>= 0.1.4'
8
8
  gem 'parallel', '>= 1.17.0'
9
- gem 'rake', '~> 12.0'
9
+ gem 'rake', '~> 13.0'
10
10
  gem 'rake-compiler', '~> 1.0'
11
11
  gem 'rspec', '~> 3.0'
12
12
  gem 'rubocop', '~> 1.0'
data/ext/rumale/tree.c CHANGED
@@ -5,9 +5,8 @@ RUBY_EXTERN VALUE mRumale;
5
5
  double*
6
6
  alloc_dbl_array(const long n_dimensions)
7
7
  {
8
- long i;
9
8
  double* arr = ALLOC_N(double, n_dimensions);
10
- for (i = 0; i < n_dimensions; i++) { arr[i] = 0.0; }
9
+ memset(arr, 0, n_dimensions * sizeof(double));
11
10
  return arr;
12
11
  }
13
12
 
data/lib/rumale.rb CHANGED
@@ -62,6 +62,8 @@ require 'rumale/ensemble/extra_trees_classifier'
62
62
  require 'rumale/ensemble/extra_trees_regressor'
63
63
  require 'rumale/ensemble/stacking_classifier'
64
64
  require 'rumale/ensemble/stacking_regressor'
65
+ require 'rumale/ensemble/voting_classifier'
66
+ require 'rumale/ensemble/voting_regressor'
65
67
  require 'rumale/clustering/k_means'
66
68
  require 'rumale/clustering/mini_batch_k_means'
67
69
  require 'rumale/clustering/k_medoids'
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'rumale/base/base_estimator'
4
4
  require 'rumale/base/classifier'
5
+ require 'rumale/preprocessing/label_encoder'
5
6
 
6
7
  module Rumale
7
8
  module Ensemble
@@ -10,18 +11,18 @@ module Rumale
10
11
  # @example
11
12
  # estimators = {
12
13
  # lgr: Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-2, random_seed: 1),
13
- # mlp: Rumele::NeuralNetwork::MLPClassifier.new(hidden_units: [256], random_seed: 1),
14
+ # mlp: Rumale::NeuralNetwork::MLPClassifier.new(hidden_units: [256], random_seed: 1),
14
15
  # rnd: Rumale::Ensemble::RandomForestClassifier.new(random_seed: 1)
15
16
  # }
16
17
  # meta_estimator = Rumale::LinearModel::LogisticRegression.new(random_seed: 1)
17
18
  # classifier = Rumale::Ensemble::StackedClassifier.new(
18
19
  # estimators: estimators, meta_estimator: meta_estimator, random_seed: 1
19
20
  # )
20
- # classifier.fit(training_samples, traininig_labels)
21
+ # classifier.fit(training_samples, training_labels)
21
22
  # results = classifier.predict(testing_samples)
22
23
  #
23
24
  # *Reference*
24
- # - Zhou, Z-H., "Ensemble Mehotds - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
25
+ # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
25
26
  class StackingClassifier
26
27
  include Base::BaseEstimator
27
28
  include Base::Classifier
@@ -149,7 +150,7 @@ module Rumale
149
150
 
150
151
  # Predict probability for samples.
151
152
  #
152
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
153
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probabilities.
153
154
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The predicted probability of each class per sample.
154
155
  def predict_proba(x)
155
156
  x = check_convert_sample_array(x)
@@ -10,18 +10,18 @@ module Rumale
10
10
  # @example
11
11
  # estimators = {
12
12
  # las: Rumale::LinearModel::Lasso.new(reg_param: 1e-2, random_seed: 1),
13
- # mlp: Rumele::NeuralNetwork::MLPRegressor.new(hidden_units: [256], random_seed: 1),
13
+ # mlp: Rumale::NeuralNetwork::MLPRegressor.new(hidden_units: [256], random_seed: 1),
14
14
  # rnd: Rumale::Ensemble::RandomForestRegressor.new(random_seed: 1)
15
15
  # }
16
16
  # meta_estimator = Rumale::LinearModel::Ridge.new(random_seed: 1)
17
17
  # regressor = Rumale::Ensemble::StackedRegressor.new(
18
18
  # estimators: estimators, meta_estimator: meta_estimator, random_seed: 1
19
19
  # )
20
- # regressor.fit(training_samples, traininig_values)
20
+ # regressor.fit(training_samples, training_values)
21
21
  # results = regressor.predict(testing_samples)
22
22
  #
23
23
  # *Reference*
24
- # - Zhou, Z-H., "Ensemble Mehotds - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
24
+ # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
25
25
  class StackingRegressor
26
26
  include Base::BaseEstimator
27
27
  include Base::Regressor
@@ -0,0 +1,126 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/classifier'
5
+ require 'rumale/preprocessing/label_encoder'
6
+
7
+ module Rumale
8
+ module Ensemble
9
+ # VotingClassifier is a class that implements classifier with voting ensemble method.
10
+ #
11
+ # @example
12
+ # estimators = {
13
+ # lgr: Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-2, random_seed: 1),
14
+ # mlp: Rumale::NeuralNetwork::MLPClassifier.new(hidden_units: [256], random_seed: 1),
15
+ # rnd: Rumale::Ensemble::RandomForestClassifier.new(random_seed: 1)
16
+ # }
17
+ # weights = { lgr: 0.2, mlp: 0.3, rnd: 0.5 }
18
+ #
19
+ # classifier = Rumale::Ensemble::VotingClassifier.new(estimators: estimators, weights: weights, voting: 'soft')
20
+ # classifier.fit(x_train, y_train)
21
+ # results = classifier.predict(x_test)
22
+ #
23
+ # *Reference*
24
+ # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
25
+ class VotingClassifier
26
+ include Base::BaseEstimator
27
+ include Base::Classifier
28
+
29
+ # Return the sub-classifiers that voted.
30
+ # @return [Hash<Symbol,Classifier>]
31
+ attr_reader :estimators
32
+
33
+ # Return the class labels.
34
+ # @return [Numo::Int32] (size: n_classes)
35
+ attr_reader :classes
36
+
37
+ # Create a new ensembled classifier with voting rule.
38
+ #
39
+ # @param estimators [Hash<Symbol,Classifier>] The sub-classifiers to vote.
40
+ # @param weights [Hash<Symbol,Float>] The weight value for each classifier.
41
+ # @param voting [String] The voting rule for the predicted results of each classifier.
42
+ # If 'hard' is given, the ensembled classifier predicts the class label by majority vote.
43
+ # If 'soft' is given, the ensembled classifier uses the weighted average of predicted probabilities for the prediction.
44
+ def initialize(estimators:, weights: nil, voting: 'hard')
45
+ check_params_type(Hash, estimators: estimators)
46
+ check_params_type_or_nil(Hash, weights: weights)
47
+ check_params_string(voting: voting)
48
+ @estimators = estimators
49
+ @classes = nil
50
+ @params = {}
51
+ @params[:weights] = weights || estimators.each_key.with_object({}) { |name, w| w[name] = 1.0 }
52
+ @params[:voting] = voting
53
+ end
54
+
55
+ # Fit the model with given training data.
56
+ #
57
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
58
+ # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
59
+ # @return [VotingClassifier] The learned classifier itself.
60
+ def fit(x, y)
61
+ x = check_convert_sample_array(x)
62
+ y = check_convert_label_array(y)
63
+ check_sample_label_size(x, y)
64
+
65
+ @encoder = Rumale::Preprocessing::LabelEncoder.new
66
+ y_encoded = @encoder.fit_transform(y)
67
+ @classes = Numo::NArray[*@encoder.classes]
68
+ @estimators.each_key { |name| @estimators[name].fit(x, y_encoded) }
69
+
70
+ self
71
+ end
72
+
73
+ # Calculate confidence scores for samples.
74
+ #
75
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
76
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
77
+ def decision_function(x)
78
+ x = check_convert_sample_array(x)
79
+ return predict_proba(x) if soft_voting?
80
+
81
+ n_samples = x.shape[0]
82
+ n_classes = @classes.size
83
+ z = Numo::DFloat.zeros(n_samples, n_classes)
84
+ @estimators.each do |name, estimator|
85
+ estimator.predict(x).to_a.each_with_index { |c, i| z[i, c] += @params[:weights][name] }
86
+ end
87
+ z
88
+ end
89
+
90
+ # Predict class labels for samples.
91
+ #
92
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
93
+ # @return [Numo::Int32] (shape: [n_samples]) The predicted class label per sample.
94
+ def predict(x)
95
+ x = check_convert_sample_array(x)
96
+ n_samples = x.shape[0]
97
+ n_classes = @classes.size
98
+ z = decision_function(x)
99
+ predicted = z.max_index(axis: 1) - Numo::Int32.new(n_samples).seq * n_classes
100
+ Numo::Int32.cast(@encoder.inverse_transform(predicted))
101
+ end
102
+
103
+ # Predict probability for samples.
104
+ #
105
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probabilities.
106
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
107
+ def predict_proba(x)
108
+ x = check_convert_sample_array(x)
109
+ n_samples = x.shape[0]
110
+ n_classes = @classes.size
111
+ z = Numo::DFloat.zeros(n_samples, n_classes)
112
+ sum_weight = @params[:weights].each_value.inject(&:+)
113
+ @estimators.each do |name, estimator|
114
+ z += @params[:weights][name] * estimator.predict_proba(x)
115
+ end
116
+ z /= sum_weight
117
+ end
118
+
119
+ private
120
+
121
+ def soft_voting?
122
+ @params[:voting] == 'soft'
123
+ end
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/regressor'
5
+
6
+ module Rumale
7
+ module Ensemble
8
+ # VotingRegressor is a class that implements regressor with voting ensemble method.
9
+ #
10
+ # @example
11
+ # estimators = {
12
+ # rdg: Rumale::LinearModel::Ridge.new(reg_param: 1e-2, random_seed: 1),
13
+ # mlp: Rumale::NeuralNetwork::MLPRegressor.new(hidden_units: [256], random_seed: 1),
14
+ # rnd: Rumale::Ensemble::RandomForestRegressor.new(random_seed: 1)
15
+ # }
16
+ # weights = { rdg: 0.2, mlp: 0.3, rnd: 0.5 }
17
+ #
18
+ # regressor = Rumale::Ensemble::VotingRegressor.new(estimators: estimators, weights: weights, voting: 'soft')
19
+ # regressor.fit(x_train, y_train)
20
+ # results = regressor.predict(x_test)
21
+ #
22
+ # *Reference*
23
+ # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
24
+ class VotingRegressor
25
+ include Base::BaseEstimator
26
+ include Base::Regressor
27
+
28
+ # Return the sub-regressors that voted.
29
+ # @return [Hash<Symbol,Regressor>]
30
+ attr_reader :estimators
31
+
32
+ # Create a new ensembled regressor with voting rule.
33
+ #
34
+ # @param estimators [Hash<Symbol,Regressor>] The sub-regressors to vote.
35
+ # @param weights [Hash<Symbol,Float>] The weight value for each regressor.
36
+ def initialize(estimators:, weights: nil)
37
+ check_params_type(Hash, estimators: estimators)
38
+ check_params_type_or_nil(Hash, weights: weights)
39
+ @estimators = estimators
40
+ @n_outputs = nil
41
+ @params = {}
42
+ @params[:weights] = weights || estimators.each_key.with_object({}) { |name, w| w[name] = 1.0 }
43
+ end
44
+
45
+ # Fit the model with given training data.
46
+ #
47
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
48
+ # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
49
+ # @return [VotingRegressor] The learned regressor itself.
50
+ def fit(x, y)
51
+ x = check_convert_sample_array(x)
52
+ y = check_convert_tvalue_array(y)
53
+ check_sample_tvalue_size(x, y)
54
+
55
+ @n_outputs = y.ndim > 1 ? y.shape[1] : 1
56
+ @estimators.each_key { |name| @estimators[name].fit(x, y) }
57
+
58
+ self
59
+ end
60
+
61
+ # Predict values for samples.
62
+ #
63
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
64
+ # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted value per sample.
65
+ def predict(x)
66
+ x = check_convert_sample_array(x)
67
+ z = single_target? ? Numo::DFloat.zeros(x.shape[0]) : Numo::DFloat.zeros(x.shape[0], @n_outputs)
68
+ sum_weight = @params[:weights].each_value.inject(&:+)
69
+ @estimators.each do |name, estimator|
70
+ z += @params[:weights][name] * estimator.predict(x)
71
+ end
72
+ z / sum_weight
73
+ end
74
+
75
+ private
76
+
77
+ def single_target?
78
+ @n_outputs == 1
79
+ end
80
+ end
81
+ end
82
+ end
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.22.3'
6
+ VERSION = '0.22.4'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.22.3
4
+ version: 0.22.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-01-23 00:00:00.000000000 Z
11
+ date: 2021-02-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -108,6 +108,8 @@ files:
108
108
  - lib/rumale/ensemble/random_forest_regressor.rb
109
109
  - lib/rumale/ensemble/stacking_classifier.rb
110
110
  - lib/rumale/ensemble/stacking_regressor.rb
111
+ - lib/rumale/ensemble/voting_classifier.rb
112
+ - lib/rumale/ensemble/voting_regressor.rb
111
113
  - lib/rumale/evaluation_measure/accuracy.rb
112
114
  - lib/rumale/evaluation_measure/adjusted_rand_score.rb
113
115
  - lib/rumale/evaluation_measure/calinski_harabasz_score.rb
@@ -229,7 +231,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
229
231
  - !ruby/object:Gem::Version
230
232
  version: '0'
231
233
  requirements: []
232
- rubygems_version: 3.2.3
234
+ rubygems_version: 3.2.7
233
235
  signing_key:
234
236
  specification_version: 4
235
237
  summary: Rumale is a machine learning library in Ruby. Rumale provides machine learning