rumale 0.21.0 → 0.22.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fd5ca16629a5258be9e577771dc8c6b42dbfdf3a60a4c43d4ee170cc17b72bea
4
- data.tar.gz: 70b26bcf0b39bb8e716b9bbb4aba100c496152b1c4e71879105046440c7d8758
3
+ metadata.gz: 4936b7c7b0ed920383f88743f8eba2e827d586dae471e40a6974dd1fe19342fe
4
+ data.tar.gz: 5a33c242b3cd881b0003db5e5f2d77905d0571442eb7494a64dff08262ce0c14
5
5
  SHA512:
6
- metadata.gz: 645a6bda6e3601534c69f5ecfbd840c1d6c1ed7a5a3b8bd57995621a03d970cd02e9749a8a70be5af2678c029a26a5e6e1c32376a4514a64e96d6a9b4b12aa3e
7
- data.tar.gz: 5904c64da9cc30cf0c288dfbeb3051bca3333e588e153cf19619c169d713e93edcb95e6902134e58c823d621ebad9e6a56310123c110b6d810033f5f96a40fbb
6
+ metadata.gz: b45a243c247610d918eeb6cfbb31c461e5773b5404c989fe7e0b8758e0482d165ea1e0cf1d61642d71233458821e1b92e45eb6ff0d0fcb11080c6c1e9692ef91
7
+ data.tar.gz: feddfc807995b08e753b1ad635901f2db8e806e300478a1f6bdb24a5bf1123cb7fbd0ee402da92ddcdd079a8ad653eec4224e22be9d2c6609ea73ea84bc47ca1
data/.coveralls.yml CHANGED
@@ -1 +1 @@
1
- service_name: travis-ci
1
+ service_name: github-ci
@@ -0,0 +1,26 @@
1
+ name: build
2
+
3
+ on: [push, pull_request]
4
+
5
+ jobs:
6
+ build:
7
+ runs-on: ubuntu-latest
8
+ strategy:
9
+ fail-fast: false
10
+ matrix:
11
+ ruby: [ '2.5', '2.6', '2.7', '3.0' ]
12
+ steps:
13
+ - uses: actions/checkout@v2
14
+ - name: Install BLAS and LAPACK
15
+ run: sudo apt-get install -y libopenblas-dev liblapacke-dev
16
+ - name: Set up Ruby ${{ matrix.ruby }}
17
+ uses: actions/setup-ruby@v1
18
+ with:
19
+ ruby-version: ${{ matrix.ruby }}
20
+ - name: Build and test with Rake
21
+ env:
22
+ LD_LIBRARY_PATH: '/usr/lib/x86_64-linux-gnu/'
23
+ run: |
24
+ gem install --no-document bundler
25
+ bundle install --jobs 4 --retry 3
26
+ bundle exec rake
@@ -0,0 +1,28 @@
1
+ name: coverage
2
+
3
+ on:
4
+ push:
5
+ branches: [ main ]
6
+ pull_request:
7
+ branches: [ main ]
8
+
9
+ jobs:
10
+ coverage:
11
+ runs-on: ubuntu-20.04
12
+ steps:
13
+ - uses: actions/checkout@v2
14
+ - name: Install BLAS and LAPACK
15
+ run: sudo apt-get install -y libopenblas-dev liblapacke-dev
16
+ - name: Set up Ruby 2.7
17
+ uses: actions/setup-ruby@v1
18
+ with:
19
+ ruby-version: '2.7'
20
+ - name: Build and test with Rake
21
+ run: |
22
+ gem install bundler
23
+ bundle install
24
+ bundle exec rake
25
+ - name: Coveralls GitHub Action
26
+ uses: coverallsapp/github-action@v1.1.2
27
+ with:
28
+ github-token: ${{ secrets.GITHUB_TOKEN }}
data/.gitignore CHANGED
@@ -16,6 +16,7 @@
16
16
  tags
17
17
  .DS_Store
18
18
  .ruby-version
19
+ iterate.dat
19
20
  /spec/dump_dbl.t
20
21
  /spec/dump_int.t
21
22
  /spec/dump_mult_dbl.t
data/.rubocop.yml CHANGED
@@ -1,5 +1,6 @@
1
1
  require:
2
2
  - rubocop-performance
3
+ - rubocop-rake
3
4
  - rubocop-rspec
4
5
 
5
6
  AllCops:
data/CHANGELOG.md CHANGED
@@ -1,3 +1,36 @@
1
+ # 0.22.4
2
+ - Add classifier and regressor classes for voting ensemble method.
3
+ - [VotingClassifier](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/VotingClassifier.html)
4
+ - [VotingRegressor](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/VotingRegressor.html)
5
+ - Refactor some codes.
6
+ - Fix some typos on API documentation.
7
+
8
+ # 0.22.3
9
+ - Add regressor class for non-negative least square method.
10
+ - [NNLS](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/NNLS.html)
11
+ - Add lbfgs solver to [Ridge](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/Ridge.html) and [LinearRegression](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/LinearRegression.html).
12
+ - In version 0.23.0, these classes will be changed to attempt to optimize with 'svd' or 'lbfgs' solver if 'auto' is given to
13
+ the solver parameter. If you use 'sgd' solver, you need specify it explicitly.
14
+ - Add GC guard to native extension codes.
15
+ - Update API documentation.
16
+
17
+ # 0.22.2
18
+ - Add classifier and regressor classes for stacking method.
19
+ - [StackingClassifier](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/StackingClassifier.html)
20
+ - [StackingRegressor](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/StackingRegressor.html)
21
+ - Refactor some codes with Rubocop.
22
+
23
+ # 0.22.1
24
+ - Add transfomer class for [MLKR](https://yoshoku.github.io/rumale/doc/Rumale/MetricLearning/MLKR.html), that implements Metric Learning for Kernel Regression.
25
+ - Refactor NeighbourhoodComponentAnalysis.
26
+ - Update API documentation.
27
+
28
+ # 0.22.0
29
+ ## Breaking change
30
+ - Add lbfgsb.rb gem to runtime dependencies. Rumale uses lbfgsb gem for optimization.
31
+ This eliminates the need to require the mopti gem when using [NeighbourhoodComponentAnalysis](https://yoshoku.github.io/rumale/doc/Rumale/MetricLearning/NeighbourhoodComponentAnalysis.html).
32
+ - Add lbfgs solver to [LogisticRegression](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/LogisticRegression.html) and make it the default solver.
33
+
1
34
  # 0.21.0
2
35
  ## Breaking change
3
36
  - Change the default value of max_iter argument on LinearModel estimators to 1000.
data/Gemfile CHANGED
@@ -3,14 +3,15 @@ source 'https://rubygems.org'
3
3
  # Specify your gem's dependencies in rumale.gemspec
4
4
  gemspec
5
5
 
6
- gem 'coveralls', '~> 0.8'
7
6
  gem 'mmh3', '>= 1.0'
8
- gem 'mopti', '>= 0.1.0'
9
7
  gem 'numo-linalg', '>= 0.1.4'
10
8
  gem 'parallel', '>= 1.17.0'
11
- gem 'rake', '~> 12.0'
9
+ gem 'rake', '~> 13.0'
12
10
  gem 'rake-compiler', '~> 1.0'
13
11
  gem 'rspec', '~> 3.0'
14
- gem 'rubocop', '~> 0.91'
12
+ gem 'rubocop', '~> 1.0'
15
13
  gem 'rubocop-performance', '~> 1.8'
16
- gem 'rubocop-rspec', '~> 1.43'
14
+ gem 'rubocop-rake', '~> 0.5'
15
+ gem 'rubocop-rspec', '~> 2.0'
16
+ gem 'simplecov', '~> 0.21'
17
+ gem 'simplecov-lcov', '~> 0.8'
data/LICENSE.txt CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2017-2020 Atsushi Tatsuma
1
+ Copyright (c) 2017-2021 Atsushi Tatsuma
2
2
  All rights reserved.
3
3
 
4
4
  Redistribution and use in source and binary forms, with or without
data/README.md CHANGED
@@ -2,10 +2,10 @@
2
2
 
3
3
  ![Rumale](https://dl.dropboxusercontent.com/s/joxruk2720ur66o/rumale_header_400.png)
4
4
 
5
- [![Build Status](https://travis-ci.org/yoshoku/rumale.svg?branch=master)](https://travis-ci.org/yoshoku/rumale)
6
- [![Coverage Status](https://coveralls.io/repos/github/yoshoku/rumale/badge.svg?branch=master)](https://coveralls.io/github/yoshoku/rumale?branch=master)
5
+ [![Build Status](https://github.com/yoshoku/rumale/workflows/build/badge.svg)](https://github.com/yoshoku/rumale/actions?query=workflow%3Abuild)
6
+ [![Coverage Status](https://coveralls.io/repos/github/yoshoku/rumale/badge.svg?branch=main)](https://coveralls.io/github/yoshoku/rumale?branch=main)
7
7
  [![Gem Version](https://badge.fury.io/rb/rumale.svg)](https://badge.fury.io/rb/rumale)
8
- [![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/master/LICENSE.txt)
8
+ [![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/main/LICENSE.txt)
9
9
  [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/rumale/doc/)
10
10
 
11
11
  Rumale (**Ru**by **ma**chine **le**arning) is a machine learning library in Ruby.
@@ -114,10 +114,10 @@ require 'rumale'
114
114
  samples, labels = Rumale::Dataset.load_libsvm_file('pendigits')
115
115
 
116
116
  # Define the estimator to be evaluated.
117
- lr = Rumale::LinearModel::LogisticRegression.new(learning_rate: 0.00001, reg_param: 0.0001, random_seed: 1)
117
+ lr = Rumale::LinearModel::LogisticRegression.new
118
118
 
119
119
  # Define the evaluation measure, splitting strategy, and cross validation.
120
- ev = Rumale::EvaluationMeasure::LogLoss.new
120
+ ev = Rumale::EvaluationMeasure::Accuracy.new
121
121
  kf = Rumale::ModelSelection::StratifiedKFold.new(n_splits: 5, shuffle: true, random_seed: 1)
122
122
  cv = Rumale::ModelSelection::CrossValidation.new(estimator: lr, splitter: kf, evaluator: ev)
123
123
 
@@ -125,15 +125,15 @@ cv = Rumale::ModelSelection::CrossValidation.new(estimator: lr, splitter: kf, ev
125
125
  report = cv.perform(samples, labels)
126
126
 
127
127
  # Output result.
128
- mean_logloss = report[:test_score].inject(:+) / kf.n_splits
129
- puts("5-CV mean log-loss: %.3f" % mean_logloss)
128
+ mean_accuracy = report[:test_score].sum / kf.n_splits
129
+ puts "5-CV mean accuracy: %.1f%%" % (100.0 * mean_accuracy)
130
130
  ```
131
131
 
132
132
  Execution of the above scripts result in the following.
133
133
 
134
134
  ```bash
135
135
  $ ruby cross_validation.rb
136
- 5-CV mean log-loss: 0.355
136
+ 5-CV mean accuracy: 95.4%
137
137
  ```
138
138
 
139
139
  ### Example 3. Pipeline
@@ -144,10 +144,10 @@ require 'rumale'
144
144
  # Load dataset.
145
145
  samples, labels = Rumale::Dataset.load_libsvm_file('pendigits')
146
146
 
147
- # Construct pipeline with kernel approximation and SVC.
148
- rbf = Rumale::KernelApproximation::RBF.new(gamma: 0.0001, n_components: 800, random_seed: 1)
149
- svc = Rumale::LinearModel::SVC.new(reg_param: 0.0001, random_seed: 1)
150
- pipeline = Rumale::Pipeline::Pipeline.new(steps: { trns: rbf, clsf: svc })
147
+ # Construct pipeline with kernel approximation and LogisticRegression.
148
+ rbf = Rumale::KernelApproximation::RBF.new(gamma: 1e-4, n_components: 800, random_seed: 1)
149
+ lr = Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-3)
150
+ pipeline = Rumale::Pipeline::Pipeline.new(steps: { trns: rbf, clsf: lr })
151
151
 
152
152
  # Define the splitting strategy and cross validation.
153
153
  kf = Rumale::ModelSelection::StratifiedKFold.new(n_splits: 5, shuffle: true, random_seed: 1)
@@ -157,7 +157,7 @@ cv = Rumale::ModelSelection::CrossValidation.new(estimator: pipeline, splitter:
157
157
  report = cv.perform(samples, labels)
158
158
 
159
159
  # Output result.
160
- mean_accuracy = report[:test_score].inject(:+) / kf.n_splits
160
+ mean_accuracy = report[:test_score].sum / kf.n_splits
161
161
  puts("5-CV mean accuracy: %.1f %%" % (mean_accuracy * 100.0))
162
162
  ```
163
163
 
@@ -177,7 +177,7 @@ For example, using the [OpenBLAS](https://github.com/xianyi/OpenBLAS) speeds up
177
177
 
178
178
  Install OpenBLAS library.
179
179
 
180
- Mac:
180
+ macOS:
181
181
 
182
182
  ```bash
183
183
  $ brew install openblas
@@ -186,12 +186,13 @@ $ brew install openblas
186
186
  Ubuntu:
187
187
 
188
188
  ```bash
189
- $ sudo apt-get install gcc gfortran
190
- $ wget https://github.com/xianyi/OpenBLAS/archive/v0.3.5.tar.gz
191
- $ tar xzf v0.3.5.tar.gz
192
- $ cd OpenBLAS-0.3.5
193
- $ make USE_OPENMP=1
194
- $ sudo make PREFIX=/usr/local install
189
+ $ sudo apt-get install libopenblas-dev liblapacke-dev
190
+ ```
191
+
192
+ Windows (MSYS2):
193
+
194
+ ```bash
195
+ $ pacman -S mingw-w64-x86_64-ruby mingw-w64-x86_64-openblas mingw-w64-x86_64-lapack
195
196
  ```
196
197
 
197
198
  Install Numo::Linalg gem.
@@ -207,6 +208,37 @@ require 'numo/linalg/autoloader'
207
208
  require 'rumale'
208
209
  ```
209
210
 
211
+ ### Numo::OpenBLAS
212
+ [Numo::OpenBLAS](https://github.com/yoshoku/numo-openblas) downloads and builds OpenBLAS during installation
213
+ and uses that as a background library for Numo::Linalg.
214
+
215
+ Install compilers for building OpenBLAS.
216
+
217
+ macOS:
218
+
219
+ ```bash
220
+ $ brew install gcc gfortran make
221
+ ```
222
+
223
+ Ubuntu:
224
+
225
+ ```bash
226
+ $ sudo apt-get install gcc gfortran make
227
+ ```
228
+
229
+ Install Numo::OpenBLAS gem.
230
+
231
+ ```bash
232
+ $ gem install numo-openblas
233
+ ```
234
+
235
+ Load Numo::OpenBLAS gem instead of Numo::Linalg.
236
+
237
+ ```ruby
238
+ require 'numo/openblas'
239
+ require 'rumale'
240
+ ```
241
+
210
242
  ### Parallel
211
243
  Several estimators in Rumale support parallel processing.
212
244
  Parallel processing in Rumale is realized by [Parallel](https://github.com/grosser/parallel) gem,
@@ -228,6 +260,10 @@ When -1 is given to n_jobs parameter, all processors are used.
228
260
  estimator = Rumale::Ensemble::RandomForestClassifier.new(n_jobs: -1, random_seed: 1)
229
261
  ```
230
262
 
263
+ ## Related Projects
264
+ - [Rumale::SVM](https://github.com/yoshoku/rumale-svm) provides support vector machine algorithms in LIBSVM and LIBLINEAR with Rumale interface.
265
+ - [Rumale::Torch](https://github.com/yoshoku/rumale-torch) provides the learning and inference by the neural network defined in torch.rb with Rumale interface.
266
+
231
267
  ## Novelties
232
268
 
233
269
  * [Rumale SHOP](https://suzuri.jp/yoshoku)
@@ -245,4 +281,4 @@ The gem is available as open source under the terms of the [BSD 2-clause License
245
281
  ## Code of Conduct
246
282
 
247
283
  Everyone interacting in the Rumale project’s codebases, issue trackers,
248
- chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/yoshoku/Rumale/blob/master/CODE_OF_CONDUCT.md).
284
+ chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/yoshoku/Rumale/blob/main/CODE_OF_CONDUCT.md).
data/ext/rumale/tree.c CHANGED
@@ -5,9 +5,8 @@ RUBY_EXTERN VALUE mRumale;
5
5
  double*
6
6
  alloc_dbl_array(const long n_dimensions)
7
7
  {
8
- long i;
9
8
  double* arr = ALLOC_N(double, n_dimensions);
10
- for (i = 0; i < n_dimensions; i++) { arr[i] = 0.0; }
9
+ memset(arr, 0, n_dimensions * sizeof(double));
11
10
  return arr;
12
11
  }
13
12
 
@@ -257,10 +256,13 @@ find_split_params_cls(VALUE self, VALUE criterion, VALUE impurity, VALUE order,
257
256
  split_opts_cls opts = { StringValuePtr(criterion), NUM2LONG(n_classes), NUM2DBL(impurity) };
258
257
  VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, labels);
259
258
  VALUE results = rb_ary_new2(4);
260
- rb_ary_store(results, 0, DBL2NUM(((double*)na_get_pointer_for_read(params))[0]));
261
- rb_ary_store(results, 1, DBL2NUM(((double*)na_get_pointer_for_read(params))[1]));
262
- rb_ary_store(results, 2, DBL2NUM(((double*)na_get_pointer_for_read(params))[2]));
263
- rb_ary_store(results, 3, DBL2NUM(((double*)na_get_pointer_for_read(params))[3]));
259
+ double* params_ptr = (double*)na_get_pointer_for_read(params);
260
+ rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
261
+ rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
262
+ rb_ary_store(results, 2, DBL2NUM(params_ptr[2]));
263
+ rb_ary_store(results, 3, DBL2NUM(params_ptr[3]));
264
+ RB_GC_GUARD(params);
265
+ RB_GC_GUARD(criterion);
264
266
  return results;
265
267
  }
266
268
 
@@ -375,10 +377,13 @@ find_split_params_reg(VALUE self, VALUE criterion, VALUE impurity, VALUE order,
375
377
  split_opts_reg opts = { StringValuePtr(criterion), NUM2DBL(impurity) };
376
378
  VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, targets);
377
379
  VALUE results = rb_ary_new2(4);
378
- rb_ary_store(results, 0, DBL2NUM(((double*)na_get_pointer_for_read(params))[0]));
379
- rb_ary_store(results, 1, DBL2NUM(((double*)na_get_pointer_for_read(params))[1]));
380
- rb_ary_store(results, 2, DBL2NUM(((double*)na_get_pointer_for_read(params))[2]));
381
- rb_ary_store(results, 3, DBL2NUM(((double*)na_get_pointer_for_read(params))[3]));
380
+ double* params_ptr = (double*)na_get_pointer_for_read(params);
381
+ rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
382
+ rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
383
+ rb_ary_store(results, 2, DBL2NUM(params_ptr[2]));
384
+ rb_ary_store(results, 3, DBL2NUM(params_ptr[3]));
385
+ RB_GC_GUARD(params);
386
+ RB_GC_GUARD(criterion);
382
387
  return results;
383
388
  }
384
389
 
@@ -464,8 +469,10 @@ find_split_params_grad_reg
464
469
  double opts[3] = { NUM2DBL(sum_gradient), NUM2DBL(sum_hessian), NUM2DBL(reg_lambda) };
465
470
  VALUE params = na_ndloop3(&ndf, opts, 4, order, features, gradients, hessians);
466
471
  VALUE results = rb_ary_new2(2);
467
- rb_ary_store(results, 0, DBL2NUM(((double*)na_get_pointer_for_read(params))[0]));
468
- rb_ary_store(results, 1, DBL2NUM(((double*)na_get_pointer_for_read(params))[1]));
472
+ double* params_ptr = (double*)na_get_pointer_for_read(params);
473
+ rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
474
+ rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
475
+ RB_GC_GUARD(params);
469
476
  return results;
470
477
  }
471
478
 
@@ -497,6 +504,9 @@ node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_,
497
504
 
498
505
  xfree(histogram);
499
506
 
507
+ RB_GC_GUARD(y_nary);
508
+ RB_GC_GUARD(criterion);
509
+
500
510
  return ret;
501
511
  }
502
512
 
@@ -531,6 +541,8 @@ node_impurity_reg(VALUE self, VALUE criterion, VALUE y)
531
541
 
532
542
  xfree(sum_vec);
533
543
 
544
+ RB_GC_GUARD(criterion);
545
+
534
546
  return ret;
535
547
  }
536
548
 
data/lib/rumale.rb CHANGED
@@ -30,6 +30,7 @@ require 'rumale/linear_model/linear_regression'
30
30
  require 'rumale/linear_model/ridge'
31
31
  require 'rumale/linear_model/lasso'
32
32
  require 'rumale/linear_model/elastic_net'
33
+ require 'rumale/linear_model/nnls'
33
34
  require 'rumale/kernel_machine/kernel_svc'
34
35
  require 'rumale/kernel_machine/kernel_pca'
35
36
  require 'rumale/kernel_machine/kernel_fda'
@@ -59,6 +60,10 @@ require 'rumale/ensemble/random_forest_classifier'
59
60
  require 'rumale/ensemble/random_forest_regressor'
60
61
  require 'rumale/ensemble/extra_trees_classifier'
61
62
  require 'rumale/ensemble/extra_trees_regressor'
63
+ require 'rumale/ensemble/stacking_classifier'
64
+ require 'rumale/ensemble/stacking_regressor'
65
+ require 'rumale/ensemble/voting_classifier'
66
+ require 'rumale/ensemble/voting_regressor'
62
67
  require 'rumale/clustering/k_means'
63
68
  require 'rumale/clustering/mini_batch_k_means'
64
69
  require 'rumale/clustering/k_medoids'
@@ -77,6 +82,7 @@ require 'rumale/manifold/tsne'
77
82
  require 'rumale/manifold/mds'
78
83
  require 'rumale/metric_learning/fisher_discriminant_analysis'
79
84
  require 'rumale/metric_learning/neighbourhood_component_analysis'
85
+ require 'rumale/metric_learning/mlkr'
80
86
  require 'rumale/neural_network/adam'
81
87
  require 'rumale/neural_network/base_mlp'
82
88
  require 'rumale/neural_network/mlp_regressor'
@@ -11,13 +11,15 @@ module Rumale
11
11
 
12
12
  private
13
13
 
14
- def enable_linalg?
14
+ def enable_linalg?(warning: true)
15
15
  if defined?(Numo::Linalg).nil?
16
- warn('If you want to use features that depend on Numo::Linalg, you should install and load Numo::Linalg in advance.')
16
+ warn('If you want to use features that depend on Numo::Linalg, you should install and load Numo::Linalg in advance.') if warning
17
17
  return false
18
18
  end
19
19
  if Numo::Linalg::VERSION < '0.1.4'
20
- warn('The loaded Numo::Linalg does not implement the methods required by Rumale. Please load Numo::Linalg version 0.1.4 or later.')
20
+ if warning
21
+ warn('The loaded Numo::Linalg does not implement the methods required by Rumale. Please load Numo::Linalg version 0.1.4 or later.')
22
+ end
21
23
  return false
22
24
  end
23
25
  true
@@ -59,7 +59,7 @@ module Rumale
59
59
  @params[:solver] = if solver == 'auto'
60
60
  load_linalg? ? 'evd' : 'fpt'
61
61
  else
62
- solver != 'evd' ? 'fpt' : 'evd'
62
+ solver != 'evd' ? 'fpt' : 'evd' # rubocop:disable Style/NegatedIfElseCondition
63
63
  end
64
64
  @params[:n_components] = n_components
65
65
  @params[:max_iter] = max_iter
@@ -0,0 +1,215 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/classifier'
5
+ require 'rumale/preprocessing/label_encoder'
6
+
7
+ module Rumale
8
+ module Ensemble
9
+ # StackingClassifier is a class that implements classifier with stacking method.
10
+ #
11
+ # @example
12
+ # estimators = {
13
+ # lgr: Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-2, random_seed: 1),
14
+ # mlp: Rumale::NeuralNetwork::MLPClassifier.new(hidden_units: [256], random_seed: 1),
15
+ # rnd: Rumale::Ensemble::RandomForestClassifier.new(random_seed: 1)
16
+ # }
17
+ # meta_estimator = Rumale::LinearModel::LogisticRegression.new(random_seed: 1)
18
+ # classifier = Rumale::Ensemble::StackedClassifier.new(
19
+ # estimators: estimators, meta_estimator: meta_estimator, random_seed: 1
20
+ # )
21
+ # classifier.fit(training_samples, training_labels)
22
+ # results = classifier.predict(testing_samples)
23
+ #
24
+ # *Reference*
25
+ # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
26
+ class StackingClassifier
27
+ include Base::BaseEstimator
28
+ include Base::Classifier
29
+
30
+ # Return the base classifiers.
31
+ # @return [Hash<Symbol,Classifier>]
32
+ attr_reader :estimators
33
+
34
+ # Return the meta classifier.
35
+ # @return [Classifier]
36
+ attr_reader :meta_estimator
37
+
38
+ # Return the class labels.
39
+ # @return [Numo::Int32] (size: n_classes)
40
+ attr_reader :classes
41
+
42
+ # Return the method used by each base classifier.
43
+ # @return [Hash<Symbol,Symbol>]
44
+ attr_reader :stack_method
45
+
46
+ # Create a new classifier with stacking method.
47
+ #
48
+ # @param estimators [Hash<Symbol,Classifier>] The base classifiers for extracting meta features.
49
+ # @param meta_estimator [Classifier/Nil] The meta classifier that predicts class label.
50
+ # If nil is given, LogisticRegression is used.
51
+ # @param n_splits [Integer] The number of folds for cross validation with stratified k-fold on meta feature extraction in training phase.
52
+ # @param shuffle [Boolean] The flag indicating whether to shuffle the dataset on cross validation.
53
+ # @param stack_method [String] The method name of base classifier for using meta feature extraction.
54
+ # If 'auto' is given, it searches the callable method in the order 'predict_proba', 'decision_function', and 'predict'
55
+ # on each classifier.
56
+ # @param passthrough [Boolean] The flag indicating whether to concatenate the original features and meta features when training the meta classifier.
57
+ # @param random_seed [Integer/Nil] The seed value using to initialize the random generator on cross validation.
58
+ def initialize(estimators:, meta_estimator: nil, n_splits: 5, shuffle: true, stack_method: 'auto', passthrough: false, random_seed: nil)
59
+ check_params_type(Hash, estimators: estimators)
60
+ check_params_numeric(n_splits: n_splits)
61
+ check_params_string(stack_method: stack_method)
62
+ check_params_boolean(shuffle: shuffle, passthrough: passthrough)
63
+ check_params_numeric_or_nil(random_seed: random_seed)
64
+ @estimators = estimators
65
+ @meta_estimator = meta_estimator || Rumale::LinearModel::LogisticRegression.new
66
+ @classes = nil
67
+ @stack_method = nil
68
+ @output_size = nil
69
+ @params = {}
70
+ @params[:n_splits] = n_splits
71
+ @params[:shuffle] = shuffle
72
+ @params[:stack_method] = stack_method
73
+ @params[:passthrough] = passthrough
74
+ @params[:random_seed] = random_seed || srand
75
+ end
76
+
77
+ # Fit the model with given training data.
78
+ #
79
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
80
+ # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
81
+ # @return [StackedClassifier] The learned classifier itself.
82
+ def fit(x, y)
83
+ x = check_convert_sample_array(x)
84
+ y = check_convert_label_array(y)
85
+ check_sample_label_size(x, y)
86
+
87
+ n_samples, n_features = x.shape
88
+
89
+ @encoder = Rumale::Preprocessing::LabelEncoder.new
90
+ y_encoded = @encoder.fit_transform(y)
91
+ @classes = Numo::NArray[*@encoder.classes]
92
+
93
+ # training base classifiers with all training data.
94
+ @estimators.each_key { |name| @estimators[name].fit(x, y_encoded) }
95
+
96
+ # detecting feature extraction method and its size of output for each base classifier.
97
+ @stack_method = detect_stack_method
98
+ @output_size = detect_output_size(n_features)
99
+
100
+ # extracting meta features with base classifiers.
101
+ n_components = @output_size.values.inject(:+)
102
+ z = Numo::DFloat.zeros(n_samples, n_components)
103
+
104
+ kf = Rumale::ModelSelection::StratifiedKFold.new(
105
+ n_splits: @params[:n_splits], shuffle: @params[:shuffle], random_seed: @params[:random_seed]
106
+ )
107
+
108
+ kf.split(x, y_encoded).each do |train_ids, valid_ids|
109
+ x_train = x[train_ids, true]
110
+ y_train = y_encoded[train_ids]
111
+ x_valid = x[valid_ids, true]
112
+ f_start = 0
113
+ @estimators.each_key do |name|
114
+ est_fold = Marshal.load(Marshal.dump(@estimators[name]))
115
+ f_last = f_start + @output_size[name]
116
+ f_position = @output_size[name] == 1 ? f_start : f_start...f_last
117
+ z[valid_ids, f_position] = est_fold.fit(x_train, y_train).public_send(@stack_method[name], x_valid)
118
+ f_start = f_last
119
+ end
120
+ end
121
+
122
+ # concatenating original features.
123
+ z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
124
+
125
+ # training meta classifier.
126
+ @meta_estimator.fit(z, y_encoded)
127
+
128
+ self
129
+ end
130
+
131
+ # Calculate confidence scores for samples.
132
+ #
133
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
134
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
135
+ def decision_function(x)
136
+ x = check_convert_sample_array(x)
137
+ z = transform(x)
138
+ @meta_estimator.decision_function(z)
139
+ end
140
+
141
+ # Predict class labels for samples.
142
+ #
143
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
144
+ # @return [Numo::Int32] (shape: [n_samples]) The predicted class label per sample.
145
+ def predict(x)
146
+ x = check_convert_sample_array(x)
147
+ z = transform(x)
148
+ Numo::Int32.cast(@encoder.inverse_transform(@meta_estimator.predict(z)))
149
+ end
150
+
151
+ # Predict probability for samples.
152
+ #
153
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probabilities.
154
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The predicted probability of each class per sample.
155
+ def predict_proba(x)
156
+ x = check_convert_sample_array(x)
157
+ z = transform(x)
158
+ @meta_estimator.predict_proba(z)
159
+ end
160
+
161
+ # Transform the given data with the learned model.
162
+ #
163
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed with the learned model.
164
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for samples.
165
+ def transform(x)
166
+ x = check_convert_sample_array(x)
167
+ n_samples = x.shape[0]
168
+ n_components = @output_size.values.inject(:+)
169
+ z = Numo::DFloat.zeros(n_samples, n_components)
170
+ f_start = 0
171
+ @estimators.each_key do |name|
172
+ f_last = f_start + @output_size[name]
173
+ f_position = @output_size[name] == 1 ? f_start : f_start...f_last
174
+ z[true, f_position] = @estimators[name].public_send(@stack_method[name], x)
175
+ f_start = f_last
176
+ end
177
+ z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
178
+ z
179
+ end
180
+
181
+ # Fit the model with training data, and then transform them with the learned model.
182
+ #
183
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
184
+ # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
185
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for training data.
186
+ def fit_transform(x, y)
187
+ x = check_convert_sample_array(x)
188
+ y = check_convert_label_array(y)
189
+ fit(x, y).transform(x)
190
+ end
191
+
192
+ private
193
+
194
+ STACK_METHODS = %i[predict_proba decision_function predict].freeze
195
+
196
+ private_constant :STACK_METHODS
197
+
198
+ def detect_stack_method
199
+ if @params[:stack_method] == 'auto'
200
+ @estimators.each_key.with_object({}) { |name, obj| obj[name] = STACK_METHODS.detect { |m| @estimators[name].respond_to?(m) } }
201
+ else
202
+ @estimators.each_key.with_object({}) { |name, obj| obj[name] = @params[:stack_method].to_sym }
203
+ end
204
+ end
205
+
206
+ def detect_output_size(n_features)
207
+ x_dummy = Numo::DFloat.new(2, n_features).rand
208
+ @estimators.each_key.with_object({}) do |name, obj|
209
+ output_dummy = @estimators[name].public_send(@stack_method[name], x_dummy)
210
+ obj[name] = output_dummy.ndim == 1 ? 1 : output_dummy.shape[1]
211
+ end
212
+ end
213
+ end
214
+ end
215
+ end