rumale 0.20.3 → 0.22.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.coveralls.yml +1 -1
- data/.github/workflows/build.yml +23 -0
- data/.github/workflows/coverage.yml +28 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +1 -0
- data/CHANGELOG.md +30 -0
- data/Gemfile +5 -4
- data/LICENSE.txt +1 -1
- data/README.md +57 -21
- data/ext/rumale/tree.c +23 -10
- data/lib/rumale.rb +4 -0
- data/lib/rumale/base/base_estimator.rb +5 -3
- data/lib/rumale/decomposition/pca.rb +1 -1
- data/lib/rumale/ensemble/stacking_classifier.rb +214 -0
- data/lib/rumale/ensemble/stacking_regressor.rb +163 -0
- data/lib/rumale/feature_extraction/feature_hasher.rb +1 -1
- data/lib/rumale/feature_extraction/hash_vectorizer.rb +1 -1
- data/lib/rumale/kernel_machine/kernel_svc.rb +4 -3
- data/lib/rumale/linear_model/base_sgd.rb +1 -1
- data/lib/rumale/linear_model/elastic_net.rb +3 -3
- data/lib/rumale/linear_model/lasso.rb +3 -3
- data/lib/rumale/linear_model/linear_regression.rb +65 -36
- data/lib/rumale/linear_model/logistic_regression.rb +123 -35
- data/lib/rumale/linear_model/nnls.rb +137 -0
- data/lib/rumale/linear_model/ridge.rb +72 -35
- data/lib/rumale/linear_model/svc.rb +6 -5
- data/lib/rumale/linear_model/svr.rb +6 -5
- data/lib/rumale/metric_learning/mlkr.rb +161 -0
- data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +18 -47
- data/lib/rumale/pairwise_metric.rb +1 -1
- data/lib/rumale/validation.rb +13 -1
- data/lib/rumale/version.rb +1 -1
- data/rumale.gemspec +2 -1
- metadata +24 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2bcd9baeafc1a271f75ccd74123f50ebd9d4fbe9065c2583f376c562f8e49155
|
|
4
|
+
data.tar.gz: 937dda6bbe4c41953f1e6eb1ea205eaa54277ae9f4202fa8a1e7e789348a76ad
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: cbad4cc283bb449116b360bc4ef8002928add3399005bcc30aaccdf95ea03233f0d035862de643b4aa4d688eedbeaaa7dc029c67a2336156d7e03c9435468cfa
|
|
7
|
+
data.tar.gz: 83bfa0f53d7c0e094f271bfb3ddfef21ca58d41d77e1278886b5e26216a5b614629c9be33bc587bccc62e280612c75dbd0356fce772a727ed8cc003f86a03976
|
data/.coveralls.yml
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
service_name:
|
|
1
|
+
service_name: github-ci
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
name: build
|
|
2
|
+
|
|
3
|
+
on: [push, pull_request]
|
|
4
|
+
|
|
5
|
+
jobs:
|
|
6
|
+
build:
|
|
7
|
+
runs-on: ubuntu-latest
|
|
8
|
+
strategy:
|
|
9
|
+
matrix:
|
|
10
|
+
ruby: [ '2.5', '2.6', '2.7' ]
|
|
11
|
+
steps:
|
|
12
|
+
- uses: actions/checkout@v2
|
|
13
|
+
- name: Install BLAS and LAPACK
|
|
14
|
+
run: sudo apt-get install -y libopenblas-dev liblapacke-dev
|
|
15
|
+
- name: Set up Ruby ${{ matrix.ruby }}
|
|
16
|
+
uses: actions/setup-ruby@v1
|
|
17
|
+
with:
|
|
18
|
+
ruby-version: ${{ matrix.ruby }}
|
|
19
|
+
- name: Build and test with Rake
|
|
20
|
+
run: |
|
|
21
|
+
gem install bundler
|
|
22
|
+
bundle install --jobs 4 --retry 3
|
|
23
|
+
bundle exec rake
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
name: coverage
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [ main ]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [ main ]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
coverage:
|
|
11
|
+
runs-on: ubuntu-20.04
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v2
|
|
14
|
+
- name: Install BLAS and LAPACK
|
|
15
|
+
run: sudo apt-get install -y libopenblas-dev liblapacke-dev
|
|
16
|
+
- name: Set up Ruby 2.7
|
|
17
|
+
uses: actions/setup-ruby@v1
|
|
18
|
+
with:
|
|
19
|
+
ruby-version: '2.7'
|
|
20
|
+
- name: Build and test with Rake
|
|
21
|
+
run: |
|
|
22
|
+
gem install bundler
|
|
23
|
+
bundle install
|
|
24
|
+
bundle exec rake
|
|
25
|
+
- name: Coveralls GitHub Action
|
|
26
|
+
uses: coverallsapp/github-action@v1.1.2
|
|
27
|
+
with:
|
|
28
|
+
github-token: ${{ secrets.GITHUB_TOKEN }}
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,33 @@
|
|
|
1
|
+
# 0.22.3
|
|
2
|
+
- Add regressor class for non-negative least square method.
|
|
3
|
+
- [NNLS](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/NNLS.html)
|
|
4
|
+
- Add lbfgs solver to [Ridge](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/Ridge.html) and [LinearRegression](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/LinearRegression.html).
|
|
5
|
+
- In version 0.23.0, these classes will be changed to attempt to optimize with 'svd' or 'lbfgs' solver if 'auto' is given to
|
|
6
|
+
the solver parameter. If you use 'sgd' solver, you need specify it explicitly.
|
|
7
|
+
- Add GC guard to native extension codes.
|
|
8
|
+
- Update API documentation.
|
|
9
|
+
|
|
10
|
+
# 0.22.2
|
|
11
|
+
- Add classifier and regressor classes for stacking method.
|
|
12
|
+
- [StackingClassifier](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/StackingClassifier.html)
|
|
13
|
+
- [StackingRegressor](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/StackingRegressor.html)
|
|
14
|
+
- Refactor some codes with Rubocop.
|
|
15
|
+
|
|
16
|
+
# 0.22.1
|
|
17
|
+
- Add transfomer class for [MLKR](https://yoshoku.github.io/rumale/doc/Rumale/MetricLearning/MLKR.html), that implements Metric Learning for Kernel Regression.
|
|
18
|
+
- Refactor NeighbourhoodComponentAnalysis.
|
|
19
|
+
- Update API documentation.
|
|
20
|
+
|
|
21
|
+
# 0.22.0
|
|
22
|
+
## Breaking change
|
|
23
|
+
- Add lbfgsb.rb gem to runtime dependencies. Rumale uses lbfgsb gem for optimization.
|
|
24
|
+
This eliminates the need to require the mopti gem when using [NeighbourhoodComponentAnalysis](https://yoshoku.github.io/rumale/doc/Rumale/MetricLearning/NeighbourhoodComponentAnalysis.html).
|
|
25
|
+
- Add lbfgs solver to [LogisticRegression](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/LogisticRegression.html) and make it the default solver.
|
|
26
|
+
|
|
27
|
+
# 0.21.0
|
|
28
|
+
## Breaking change
|
|
29
|
+
- Change the default value of max_iter argument on LinearModel estimators to 1000.
|
|
30
|
+
|
|
1
31
|
# 0.20.3
|
|
2
32
|
- Fix to use automatic solver of PCA in NeighbourhoodComponentAnalysis.
|
|
3
33
|
- Refactor some codes with Rubocop.
|
data/Gemfile
CHANGED
|
@@ -3,14 +3,15 @@ source 'https://rubygems.org'
|
|
|
3
3
|
# Specify your gem's dependencies in rumale.gemspec
|
|
4
4
|
gemspec
|
|
5
5
|
|
|
6
|
-
gem 'coveralls', '~> 0.8'
|
|
7
6
|
gem 'mmh3', '>= 1.0'
|
|
8
|
-
gem 'mopti', '>= 0.1.0'
|
|
9
7
|
gem 'numo-linalg', '>= 0.1.4'
|
|
10
8
|
gem 'parallel', '>= 1.17.0'
|
|
11
9
|
gem 'rake', '~> 12.0'
|
|
12
10
|
gem 'rake-compiler', '~> 1.0'
|
|
13
11
|
gem 'rspec', '~> 3.0'
|
|
14
|
-
gem 'rubocop', '~> 0
|
|
12
|
+
gem 'rubocop', '~> 1.0'
|
|
15
13
|
gem 'rubocop-performance', '~> 1.8'
|
|
16
|
-
gem 'rubocop-
|
|
14
|
+
gem 'rubocop-rake', '~> 0.5'
|
|
15
|
+
gem 'rubocop-rspec', '~> 2.0'
|
|
16
|
+
gem 'simplecov', '~> 0.21'
|
|
17
|
+
gem 'simplecov-lcov', '~> 0.8'
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
|
@@ -2,10 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|

|
|
4
4
|
|
|
5
|
-
[](https://github.com/yoshoku/rumale/actions?query=workflow%3Abuild)
|
|
6
|
+
[](https://coveralls.io/github/yoshoku/rumale?branch=main)
|
|
7
7
|
[](https://badge.fury.io/rb/rumale)
|
|
8
|
-
[](https://github.com/yoshoku/rumale/blob/
|
|
8
|
+
[](https://github.com/yoshoku/rumale/blob/main/LICENSE.txt)
|
|
9
9
|
[](https://yoshoku.github.io/rumale/doc/)
|
|
10
10
|
|
|
11
11
|
Rumale (**Ru**by **ma**chine **le**arning) is a machine learning library in Ruby.
|
|
@@ -114,10 +114,10 @@ require 'rumale'
|
|
|
114
114
|
samples, labels = Rumale::Dataset.load_libsvm_file('pendigits')
|
|
115
115
|
|
|
116
116
|
# Define the estimator to be evaluated.
|
|
117
|
-
lr = Rumale::LinearModel::LogisticRegression.new
|
|
117
|
+
lr = Rumale::LinearModel::LogisticRegression.new
|
|
118
118
|
|
|
119
119
|
# Define the evaluation measure, splitting strategy, and cross validation.
|
|
120
|
-
ev = Rumale::EvaluationMeasure::
|
|
120
|
+
ev = Rumale::EvaluationMeasure::Accuracy.new
|
|
121
121
|
kf = Rumale::ModelSelection::StratifiedKFold.new(n_splits: 5, shuffle: true, random_seed: 1)
|
|
122
122
|
cv = Rumale::ModelSelection::CrossValidation.new(estimator: lr, splitter: kf, evaluator: ev)
|
|
123
123
|
|
|
@@ -125,15 +125,15 @@ cv = Rumale::ModelSelection::CrossValidation.new(estimator: lr, splitter: kf, ev
|
|
|
125
125
|
report = cv.perform(samples, labels)
|
|
126
126
|
|
|
127
127
|
# Output result.
|
|
128
|
-
|
|
129
|
-
puts
|
|
128
|
+
mean_accuracy = report[:test_score].sum / kf.n_splits
|
|
129
|
+
puts "5-CV mean accuracy: %.1f%%" % (100.0 * mean_accuracy)
|
|
130
130
|
```
|
|
131
131
|
|
|
132
132
|
Execution of the above scripts result in the following.
|
|
133
133
|
|
|
134
134
|
```bash
|
|
135
135
|
$ ruby cross_validation.rb
|
|
136
|
-
5-CV mean
|
|
136
|
+
5-CV mean accuracy: 95.4%
|
|
137
137
|
```
|
|
138
138
|
|
|
139
139
|
### Example 3. Pipeline
|
|
@@ -144,10 +144,10 @@ require 'rumale'
|
|
|
144
144
|
# Load dataset.
|
|
145
145
|
samples, labels = Rumale::Dataset.load_libsvm_file('pendigits')
|
|
146
146
|
|
|
147
|
-
# Construct pipeline with kernel approximation and
|
|
148
|
-
rbf = Rumale::KernelApproximation::RBF.new(gamma:
|
|
149
|
-
|
|
150
|
-
pipeline = Rumale::Pipeline::Pipeline.new(steps: { trns: rbf, clsf:
|
|
147
|
+
# Construct pipeline with kernel approximation and LogisticRegression.
|
|
148
|
+
rbf = Rumale::KernelApproximation::RBF.new(gamma: 1e-4, n_components: 800, random_seed: 1)
|
|
149
|
+
lr = Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-3)
|
|
150
|
+
pipeline = Rumale::Pipeline::Pipeline.new(steps: { trns: rbf, clsf: lr })
|
|
151
151
|
|
|
152
152
|
# Define the splitting strategy and cross validation.
|
|
153
153
|
kf = Rumale::ModelSelection::StratifiedKFold.new(n_splits: 5, shuffle: true, random_seed: 1)
|
|
@@ -157,7 +157,7 @@ cv = Rumale::ModelSelection::CrossValidation.new(estimator: pipeline, splitter:
|
|
|
157
157
|
report = cv.perform(samples, labels)
|
|
158
158
|
|
|
159
159
|
# Output result.
|
|
160
|
-
mean_accuracy = report[:test_score].
|
|
160
|
+
mean_accuracy = report[:test_score].sum / kf.n_splits
|
|
161
161
|
puts("5-CV mean accuracy: %.1f %%" % (mean_accuracy * 100.0))
|
|
162
162
|
```
|
|
163
163
|
|
|
@@ -177,7 +177,7 @@ For example, using the [OpenBLAS](https://github.com/xianyi/OpenBLAS) speeds up
|
|
|
177
177
|
|
|
178
178
|
Install OpenBLAS library.
|
|
179
179
|
|
|
180
|
-
|
|
180
|
+
macOS:
|
|
181
181
|
|
|
182
182
|
```bash
|
|
183
183
|
$ brew install openblas
|
|
@@ -186,12 +186,13 @@ $ brew install openblas
|
|
|
186
186
|
Ubuntu:
|
|
187
187
|
|
|
188
188
|
```bash
|
|
189
|
-
$ sudo apt-get install
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
189
|
+
$ sudo apt-get install libopenblas-dev liblapacke-dev
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
Windows (MSYS2):
|
|
193
|
+
|
|
194
|
+
```bash
|
|
195
|
+
$ pacman -S mingw-w64-x86_64-ruby mingw-w64-x86_64-openblas mingw-w64-x86_64-lapack
|
|
195
196
|
```
|
|
196
197
|
|
|
197
198
|
Install Numo::Linalg gem.
|
|
@@ -207,6 +208,37 @@ require 'numo/linalg/autoloader'
|
|
|
207
208
|
require 'rumale'
|
|
208
209
|
```
|
|
209
210
|
|
|
211
|
+
### Numo::OpenBLAS
|
|
212
|
+
[Numo::OpenBLAS](https://github.com/yoshoku/numo-openblas) downloads and builds OpenBLAS during installation
|
|
213
|
+
and uses that as a background library for Numo::Linalg.
|
|
214
|
+
|
|
215
|
+
Install compilers for building OpenBLAS.
|
|
216
|
+
|
|
217
|
+
macOS:
|
|
218
|
+
|
|
219
|
+
```bash
|
|
220
|
+
$ brew install gcc gfortran make
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
Ubuntu:
|
|
224
|
+
|
|
225
|
+
```bash
|
|
226
|
+
$ sudo apt-get install gcc gfortran make
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
Install Numo::OpenBLAS gem.
|
|
230
|
+
|
|
231
|
+
```bash
|
|
232
|
+
$ gem install numo-openblas
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
Load Numo::OpenBLAS gem instead of Numo::Linalg.
|
|
236
|
+
|
|
237
|
+
```ruby
|
|
238
|
+
require 'numo/openblas'
|
|
239
|
+
require 'rumale'
|
|
240
|
+
```
|
|
241
|
+
|
|
210
242
|
### Parallel
|
|
211
243
|
Several estimators in Rumale support parallel processing.
|
|
212
244
|
Parallel processing in Rumale is realized by [Parallel](https://github.com/grosser/parallel) gem,
|
|
@@ -228,6 +260,10 @@ When -1 is given to n_jobs parameter, all processors are used.
|
|
|
228
260
|
estimator = Rumale::Ensemble::RandomForestClassifier.new(n_jobs: -1, random_seed: 1)
|
|
229
261
|
```
|
|
230
262
|
|
|
263
|
+
## Related Projects
|
|
264
|
+
- [Rumale::SVM](https://github.com/yoshoku/rumale-svm) provides support vector machine algorithms in LIBSVM and LIBLINEAR with Rumale interface.
|
|
265
|
+
- [Rumale::Torch](https://github.com/yoshoku/rumale-torch) provides the learning and inference by the neural network defined in torch.rb with Rumale interface.
|
|
266
|
+
|
|
231
267
|
## Novelties
|
|
232
268
|
|
|
233
269
|
* [Rumale SHOP](https://suzuri.jp/yoshoku)
|
|
@@ -245,4 +281,4 @@ The gem is available as open source under the terms of the [BSD 2-clause License
|
|
|
245
281
|
## Code of Conduct
|
|
246
282
|
|
|
247
283
|
Everyone interacting in the Rumale project’s codebases, issue trackers,
|
|
248
|
-
chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/yoshoku/Rumale/blob/
|
|
284
|
+
chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/yoshoku/Rumale/blob/main/CODE_OF_CONDUCT.md).
|
data/ext/rumale/tree.c
CHANGED
|
@@ -257,10 +257,13 @@ find_split_params_cls(VALUE self, VALUE criterion, VALUE impurity, VALUE order,
|
|
|
257
257
|
split_opts_cls opts = { StringValuePtr(criterion), NUM2LONG(n_classes), NUM2DBL(impurity) };
|
|
258
258
|
VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, labels);
|
|
259
259
|
VALUE results = rb_ary_new2(4);
|
|
260
|
-
|
|
261
|
-
rb_ary_store(results,
|
|
262
|
-
rb_ary_store(results,
|
|
263
|
-
rb_ary_store(results,
|
|
260
|
+
double* params_ptr = (double*)na_get_pointer_for_read(params);
|
|
261
|
+
rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
|
|
262
|
+
rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
|
|
263
|
+
rb_ary_store(results, 2, DBL2NUM(params_ptr[2]));
|
|
264
|
+
rb_ary_store(results, 3, DBL2NUM(params_ptr[3]));
|
|
265
|
+
RB_GC_GUARD(params);
|
|
266
|
+
RB_GC_GUARD(criterion);
|
|
264
267
|
return results;
|
|
265
268
|
}
|
|
266
269
|
|
|
@@ -375,10 +378,13 @@ find_split_params_reg(VALUE self, VALUE criterion, VALUE impurity, VALUE order,
|
|
|
375
378
|
split_opts_reg opts = { StringValuePtr(criterion), NUM2DBL(impurity) };
|
|
376
379
|
VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, targets);
|
|
377
380
|
VALUE results = rb_ary_new2(4);
|
|
378
|
-
|
|
379
|
-
rb_ary_store(results,
|
|
380
|
-
rb_ary_store(results,
|
|
381
|
-
rb_ary_store(results,
|
|
381
|
+
double* params_ptr = (double*)na_get_pointer_for_read(params);
|
|
382
|
+
rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
|
|
383
|
+
rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
|
|
384
|
+
rb_ary_store(results, 2, DBL2NUM(params_ptr[2]));
|
|
385
|
+
rb_ary_store(results, 3, DBL2NUM(params_ptr[3]));
|
|
386
|
+
RB_GC_GUARD(params);
|
|
387
|
+
RB_GC_GUARD(criterion);
|
|
382
388
|
return results;
|
|
383
389
|
}
|
|
384
390
|
|
|
@@ -464,8 +470,10 @@ find_split_params_grad_reg
|
|
|
464
470
|
double opts[3] = { NUM2DBL(sum_gradient), NUM2DBL(sum_hessian), NUM2DBL(reg_lambda) };
|
|
465
471
|
VALUE params = na_ndloop3(&ndf, opts, 4, order, features, gradients, hessians);
|
|
466
472
|
VALUE results = rb_ary_new2(2);
|
|
467
|
-
|
|
468
|
-
rb_ary_store(results,
|
|
473
|
+
double* params_ptr = (double*)na_get_pointer_for_read(params);
|
|
474
|
+
rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
|
|
475
|
+
rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
|
|
476
|
+
RB_GC_GUARD(params);
|
|
469
477
|
return results;
|
|
470
478
|
}
|
|
471
479
|
|
|
@@ -497,6 +505,9 @@ node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_,
|
|
|
497
505
|
|
|
498
506
|
xfree(histogram);
|
|
499
507
|
|
|
508
|
+
RB_GC_GUARD(y_nary);
|
|
509
|
+
RB_GC_GUARD(criterion);
|
|
510
|
+
|
|
500
511
|
return ret;
|
|
501
512
|
}
|
|
502
513
|
|
|
@@ -531,6 +542,8 @@ node_impurity_reg(VALUE self, VALUE criterion, VALUE y)
|
|
|
531
542
|
|
|
532
543
|
xfree(sum_vec);
|
|
533
544
|
|
|
545
|
+
RB_GC_GUARD(criterion);
|
|
546
|
+
|
|
534
547
|
return ret;
|
|
535
548
|
}
|
|
536
549
|
|
data/lib/rumale.rb
CHANGED
|
@@ -30,6 +30,7 @@ require 'rumale/linear_model/linear_regression'
|
|
|
30
30
|
require 'rumale/linear_model/ridge'
|
|
31
31
|
require 'rumale/linear_model/lasso'
|
|
32
32
|
require 'rumale/linear_model/elastic_net'
|
|
33
|
+
require 'rumale/linear_model/nnls'
|
|
33
34
|
require 'rumale/kernel_machine/kernel_svc'
|
|
34
35
|
require 'rumale/kernel_machine/kernel_pca'
|
|
35
36
|
require 'rumale/kernel_machine/kernel_fda'
|
|
@@ -59,6 +60,8 @@ require 'rumale/ensemble/random_forest_classifier'
|
|
|
59
60
|
require 'rumale/ensemble/random_forest_regressor'
|
|
60
61
|
require 'rumale/ensemble/extra_trees_classifier'
|
|
61
62
|
require 'rumale/ensemble/extra_trees_regressor'
|
|
63
|
+
require 'rumale/ensemble/stacking_classifier'
|
|
64
|
+
require 'rumale/ensemble/stacking_regressor'
|
|
62
65
|
require 'rumale/clustering/k_means'
|
|
63
66
|
require 'rumale/clustering/mini_batch_k_means'
|
|
64
67
|
require 'rumale/clustering/k_medoids'
|
|
@@ -77,6 +80,7 @@ require 'rumale/manifold/tsne'
|
|
|
77
80
|
require 'rumale/manifold/mds'
|
|
78
81
|
require 'rumale/metric_learning/fisher_discriminant_analysis'
|
|
79
82
|
require 'rumale/metric_learning/neighbourhood_component_analysis'
|
|
83
|
+
require 'rumale/metric_learning/mlkr'
|
|
80
84
|
require 'rumale/neural_network/adam'
|
|
81
85
|
require 'rumale/neural_network/base_mlp'
|
|
82
86
|
require 'rumale/neural_network/mlp_regressor'
|
|
@@ -11,13 +11,15 @@ module Rumale
|
|
|
11
11
|
|
|
12
12
|
private
|
|
13
13
|
|
|
14
|
-
def enable_linalg?
|
|
14
|
+
def enable_linalg?(warning: true)
|
|
15
15
|
if defined?(Numo::Linalg).nil?
|
|
16
|
-
warn('If you want to use features that depend on Numo::Linalg, you should install and load Numo::Linalg in advance.')
|
|
16
|
+
warn('If you want to use features that depend on Numo::Linalg, you should install and load Numo::Linalg in advance.') if warning
|
|
17
17
|
return false
|
|
18
18
|
end
|
|
19
19
|
if Numo::Linalg::VERSION < '0.1.4'
|
|
20
|
-
|
|
20
|
+
if warning
|
|
21
|
+
warn('The loaded Numo::Linalg does not implement the methods required by Rumale. Please load Numo::Linalg version 0.1.4 or later.')
|
|
22
|
+
end
|
|
21
23
|
return false
|
|
22
24
|
end
|
|
23
25
|
true
|
|
@@ -59,7 +59,7 @@ module Rumale
|
|
|
59
59
|
@params[:solver] = if solver == 'auto'
|
|
60
60
|
load_linalg? ? 'evd' : 'fpt'
|
|
61
61
|
else
|
|
62
|
-
solver != 'evd' ? 'fpt' : 'evd'
|
|
62
|
+
solver != 'evd' ? 'fpt' : 'evd' # rubocop:disable Style/NegatedIfElseCondition
|
|
63
63
|
end
|
|
64
64
|
@params[:n_components] = n_components
|
|
65
65
|
@params[:max_iter] = max_iter
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'rumale/base/base_estimator'
|
|
4
|
+
require 'rumale/base/classifier'
|
|
5
|
+
|
|
6
|
+
module Rumale
|
|
7
|
+
module Ensemble
|
|
8
|
+
# StackingClassifier is a class that implements classifier with stacking method.
|
|
9
|
+
#
|
|
10
|
+
# @example
|
|
11
|
+
# estimators = {
|
|
12
|
+
# lgr: Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-2, random_seed: 1),
|
|
13
|
+
# mlp: Rumele::NeuralNetwork::MLPClassifier.new(hidden_units: [256], random_seed: 1),
|
|
14
|
+
# rnd: Rumale::Ensemble::RandomForestClassifier.new(random_seed: 1)
|
|
15
|
+
# }
|
|
16
|
+
# meta_estimator = Rumale::LinearModel::LogisticRegression.new(random_seed: 1)
|
|
17
|
+
# classifier = Rumale::Ensemble::StackedClassifier.new(
|
|
18
|
+
# estimators: estimators, meta_estimator: meta_estimator, random_seed: 1
|
|
19
|
+
# )
|
|
20
|
+
# classifier.fit(training_samples, traininig_labels)
|
|
21
|
+
# results = classifier.predict(testing_samples)
|
|
22
|
+
#
|
|
23
|
+
# *Reference*
|
|
24
|
+
# - Zhou, Z-H., "Ensemble Mehotds - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
|
|
25
|
+
class StackingClassifier
|
|
26
|
+
include Base::BaseEstimator
|
|
27
|
+
include Base::Classifier
|
|
28
|
+
|
|
29
|
+
# Return the base classifiers.
|
|
30
|
+
# @return [Hash<Symbol,Classifier>]
|
|
31
|
+
attr_reader :estimators
|
|
32
|
+
|
|
33
|
+
# Return the meta classifier.
|
|
34
|
+
# @return [Classifier]
|
|
35
|
+
attr_reader :meta_estimator
|
|
36
|
+
|
|
37
|
+
# Return the class labels.
|
|
38
|
+
# @return [Numo::Int32] (size: n_classes)
|
|
39
|
+
attr_reader :classes
|
|
40
|
+
|
|
41
|
+
# Return the method used by each base classifier.
|
|
42
|
+
# @return [Hash<Symbol,Symbol>]
|
|
43
|
+
attr_reader :stack_method
|
|
44
|
+
|
|
45
|
+
# Create a new classifier with stacking method.
|
|
46
|
+
#
|
|
47
|
+
# @param estimators [Hash<Symbol,Classifier>] The base classifiers for extracting meta features.
|
|
48
|
+
# @param meta_estimator [Classifier/Nil] The meta classifier that predicts class label.
|
|
49
|
+
# If nil is given, LogisticRegression is used.
|
|
50
|
+
# @param n_splits [Integer] The number of folds for cross validation with stratified k-fold on meta feature extraction in training phase.
|
|
51
|
+
# @param shuffle [Boolean] The flag indicating whether to shuffle the dataset on cross validation.
|
|
52
|
+
# @param stack_method [String] The method name of base classifier for using meta feature extraction.
|
|
53
|
+
# If 'auto' is given, it searches the callable method in the order 'predict_proba', 'decision_function', and 'predict'
|
|
54
|
+
# on each classifier.
|
|
55
|
+
# @param passthrough [Boolean] The flag indicating whether to concatenate the original features and meta features when training the meta classifier.
|
|
56
|
+
# @param random_seed [Integer/Nil] The seed value using to initialize the random generator on cross validation.
|
|
57
|
+
def initialize(estimators:, meta_estimator: nil, n_splits: 5, shuffle: true, stack_method: 'auto', passthrough: false, random_seed: nil)
|
|
58
|
+
check_params_type(Hash, estimators: estimators)
|
|
59
|
+
check_params_numeric(n_splits: n_splits)
|
|
60
|
+
check_params_string(stack_method: stack_method)
|
|
61
|
+
check_params_boolean(shuffle: shuffle, passthrough: passthrough)
|
|
62
|
+
check_params_numeric_or_nil(random_seed: random_seed)
|
|
63
|
+
@estimators = estimators
|
|
64
|
+
@meta_estimator = meta_estimator || Rumale::LinearModel::LogisticRegression.new
|
|
65
|
+
@classes = nil
|
|
66
|
+
@stack_method = nil
|
|
67
|
+
@output_size = nil
|
|
68
|
+
@params = {}
|
|
69
|
+
@params[:n_splits] = n_splits
|
|
70
|
+
@params[:shuffle] = shuffle
|
|
71
|
+
@params[:stack_method] = stack_method
|
|
72
|
+
@params[:passthrough] = passthrough
|
|
73
|
+
@params[:random_seed] = random_seed || srand
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Fit the model with given training data.
|
|
77
|
+
#
|
|
78
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
|
79
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
|
80
|
+
# @return [StackedClassifier] The learned classifier itself.
|
|
81
|
+
def fit(x, y)
|
|
82
|
+
x = check_convert_sample_array(x)
|
|
83
|
+
y = check_convert_label_array(y)
|
|
84
|
+
check_sample_label_size(x, y)
|
|
85
|
+
|
|
86
|
+
n_samples, n_features = x.shape
|
|
87
|
+
|
|
88
|
+
@encoder = Rumale::Preprocessing::LabelEncoder.new
|
|
89
|
+
y_encoded = @encoder.fit_transform(y)
|
|
90
|
+
@classes = Numo::NArray[*@encoder.classes]
|
|
91
|
+
|
|
92
|
+
# training base classifiers with all training data.
|
|
93
|
+
@estimators.each_key { |name| @estimators[name].fit(x, y_encoded) }
|
|
94
|
+
|
|
95
|
+
# detecting feature extraction method and its size of output for each base classifier.
|
|
96
|
+
@stack_method = detect_stack_method
|
|
97
|
+
@output_size = detect_output_size(n_features)
|
|
98
|
+
|
|
99
|
+
# extracting meta features with base classifiers.
|
|
100
|
+
n_components = @output_size.values.inject(:+)
|
|
101
|
+
z = Numo::DFloat.zeros(n_samples, n_components)
|
|
102
|
+
|
|
103
|
+
kf = Rumale::ModelSelection::StratifiedKFold.new(
|
|
104
|
+
n_splits: @params[:n_splits], shuffle: @params[:shuffle], random_seed: @params[:random_seed]
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
kf.split(x, y_encoded).each do |train_ids, valid_ids|
|
|
108
|
+
x_train = x[train_ids, true]
|
|
109
|
+
y_train = y_encoded[train_ids]
|
|
110
|
+
x_valid = x[valid_ids, true]
|
|
111
|
+
f_start = 0
|
|
112
|
+
@estimators.each_key do |name|
|
|
113
|
+
est_fold = Marshal.load(Marshal.dump(@estimators[name]))
|
|
114
|
+
f_last = f_start + @output_size[name]
|
|
115
|
+
f_position = @output_size[name] == 1 ? f_start : f_start...f_last
|
|
116
|
+
z[valid_ids, f_position] = est_fold.fit(x_train, y_train).public_send(@stack_method[name], x_valid)
|
|
117
|
+
f_start = f_last
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# concatenating original features.
|
|
122
|
+
z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
|
|
123
|
+
|
|
124
|
+
# training meta classifier.
|
|
125
|
+
@meta_estimator.fit(z, y_encoded)
|
|
126
|
+
|
|
127
|
+
self
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Calculate confidence scores for samples.
|
|
131
|
+
#
|
|
132
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
|
133
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
|
|
134
|
+
def decision_function(x)
|
|
135
|
+
x = check_convert_sample_array(x)
|
|
136
|
+
z = transform(x)
|
|
137
|
+
@meta_estimator.decision_function(z)
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Predict class labels for samples.
|
|
141
|
+
#
|
|
142
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
|
143
|
+
# @return [Numo::Int32] (shape: [n_samples]) The predicted class label per sample.
|
|
144
|
+
def predict(x)
|
|
145
|
+
x = check_convert_sample_array(x)
|
|
146
|
+
z = transform(x)
|
|
147
|
+
Numo::Int32.cast(@encoder.inverse_transform(@meta_estimator.predict(z)))
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Predict probability for samples.
|
|
151
|
+
#
|
|
152
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
|
153
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) The predicted probability of each class per sample.
|
|
154
|
+
def predict_proba(x)
|
|
155
|
+
x = check_convert_sample_array(x)
|
|
156
|
+
z = transform(x)
|
|
157
|
+
@meta_estimator.predict_proba(z)
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Transform the given data with the learned model.
|
|
161
|
+
#
|
|
162
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed with the learned model.
|
|
163
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for samples.
|
|
164
|
+
def transform(x)
|
|
165
|
+
x = check_convert_sample_array(x)
|
|
166
|
+
n_samples = x.shape[0]
|
|
167
|
+
n_components = @output_size.values.inject(:+)
|
|
168
|
+
z = Numo::DFloat.zeros(n_samples, n_components)
|
|
169
|
+
f_start = 0
|
|
170
|
+
@estimators.each_key do |name|
|
|
171
|
+
f_last = f_start + @output_size[name]
|
|
172
|
+
f_position = @output_size[name] == 1 ? f_start : f_start...f_last
|
|
173
|
+
z[true, f_position] = @estimators[name].public_send(@stack_method[name], x)
|
|
174
|
+
f_start = f_last
|
|
175
|
+
end
|
|
176
|
+
z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
|
|
177
|
+
z
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# Fit the model with training data, and then transform them with the learned model.
|
|
181
|
+
#
|
|
182
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
|
183
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
|
184
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for training data.
|
|
185
|
+
def fit_transform(x, y)
|
|
186
|
+
x = check_convert_sample_array(x)
|
|
187
|
+
y = check_convert_label_array(y)
|
|
188
|
+
fit(x, y).transform(x)
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
private
|
|
192
|
+
|
|
193
|
+
STACK_METHODS = %i[predict_proba decision_function predict].freeze
|
|
194
|
+
|
|
195
|
+
private_constant :STACK_METHODS
|
|
196
|
+
|
|
197
|
+
def detect_stack_method
|
|
198
|
+
if @params[:stack_method] == 'auto'
|
|
199
|
+
@estimators.each_key.with_object({}) { |name, obj| obj[name] = STACK_METHODS.detect { |m| @estimators[name].respond_to?(m) } }
|
|
200
|
+
else
|
|
201
|
+
@estimators.each_key.with_object({}) { |name, obj| obj[name] = @params[:stack_method].to_sym }
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
def detect_output_size(n_features)
|
|
206
|
+
x_dummy = Numo::DFloat.new(2, n_features).rand
|
|
207
|
+
@estimators.each_key.with_object({}) do |name, obj|
|
|
208
|
+
output_dummy = @estimators[name].public_send(@stack_method[name], x_dummy)
|
|
209
|
+
obj[name] = output_dummy.ndim == 1 ? 1 : output_dummy.shape[1]
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
end
|
|
214
|
+
end
|