rumale 0.22.0 → 0.22.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/.coveralls.yml +1 -0
  3. data/.github/workflows/build.yml +6 -3
  4. data/.github/workflows/coverage.yml +28 -0
  5. data/.gitignore +1 -0
  6. data/.rubocop.yml +1 -0
  7. data/CHANGELOG.md +35 -0
  8. data/Gemfile +6 -4
  9. data/LICENSE.txt +1 -1
  10. data/README.md +56 -19
  11. data/ext/rumale/tree.c +24 -12
  12. data/lib/rumale.rb +8 -0
  13. data/lib/rumale/base/base_estimator.rb +5 -3
  14. data/lib/rumale/dataset.rb +7 -3
  15. data/lib/rumale/decomposition/pca.rb +1 -1
  16. data/lib/rumale/ensemble/stacking_classifier.rb +215 -0
  17. data/lib/rumale/ensemble/stacking_regressor.rb +163 -0
  18. data/lib/rumale/ensemble/voting_classifier.rb +126 -0
  19. data/lib/rumale/ensemble/voting_regressor.rb +82 -0
  20. data/lib/rumale/feature_extraction/feature_hasher.rb +1 -1
  21. data/lib/rumale/feature_extraction/hash_vectorizer.rb +1 -1
  22. data/lib/rumale/kernel_approximation/nystroem.rb +29 -9
  23. data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +92 -0
  24. data/lib/rumale/kernel_machine/kernel_svc.rb +4 -3
  25. data/lib/rumale/linear_model/elastic_net.rb +1 -1
  26. data/lib/rumale/linear_model/lasso.rb +1 -1
  27. data/lib/rumale/linear_model/linear_regression.rb +63 -34
  28. data/lib/rumale/linear_model/logistic_regression.rb +1 -1
  29. data/lib/rumale/linear_model/nnls.rb +137 -0
  30. data/lib/rumale/linear_model/ridge.rb +70 -33
  31. data/lib/rumale/linear_model/svc.rb +4 -3
  32. data/lib/rumale/linear_model/svr.rb +4 -3
  33. data/lib/rumale/metric_learning/mlkr.rb +161 -0
  34. data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +7 -4
  35. data/lib/rumale/pairwise_metric.rb +1 -1
  36. data/lib/rumale/preprocessing/kernel_calculator.rb +92 -0
  37. data/lib/rumale/validation.rb +13 -1
  38. data/lib/rumale/version.rb +1 -1
  39. data/rumale.gemspec +1 -1
  40. metadata +14 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4e2f68b3182ada73537901e7bc74bddd100aff75264f9147c88d8240fb624e29
4
- data.tar.gz: e2639a55fc84d1399b925f65b3a56b38f2ae3150dd15ab8556120af28d408cae
3
+ metadata.gz: '058078489d3ff66d67432e1418ae786292c263e05e75b6703fb5a7e65e88bd46'
4
+ data.tar.gz: bd7ed9b223e0cd0074ffdd3e521b01c195f82909013c93f4736ab338d5920c96
5
5
  SHA512:
6
- metadata.gz: 91ffcbade578bbb9c6a5d87a54ebd89a2b5990eb70835e7a5549afe78541dbfeafe3af50833725bee751fa89c059484970e5add7ebf8adee3e25bc000fbe3778
7
- data.tar.gz: 2ee2b1448a486581ef98561f65bc3446b2e161c89a3a12bd6cd78867350e26151bc0b350bd431902d21f6979493ab2d01a6ee81b55c1099f631aa84c84a704e6
6
+ metadata.gz: 79ce4715a503b1b5a618526832adad5912daac72af3e8f1892ff2df14b7695e546419d6f85e5ea735abd2ea06da649a763f96c82b95eee75341934fa65fce93e
7
+ data.tar.gz: 5948583ec6c5ca10b320e09c447f9cc1e244dd3bfbf95800338dba2eb7e1b46a342d44020fa24c26a7161786feb63c82b0677654e4aa087c311337a606880a22
data/.coveralls.yml ADDED
@@ -0,0 +1 @@
1
+ service_name: github-ci
@@ -1,13 +1,14 @@
1
1
  name: build
2
2
 
3
- on: [push]
3
+ on: [push, pull_request]
4
4
 
5
5
  jobs:
6
6
  build:
7
7
  runs-on: ubuntu-latest
8
8
  strategy:
9
+ fail-fast: false
9
10
  matrix:
10
- ruby: [ '2.5', '2.6', '2.7' ]
11
+ ruby: [ '2.5', '2.6', '2.7', '3.0' ]
11
12
  steps:
12
13
  - uses: actions/checkout@v2
13
14
  - name: Install BLAS and LAPACK
@@ -17,7 +18,9 @@ jobs:
17
18
  with:
18
19
  ruby-version: ${{ matrix.ruby }}
19
20
  - name: Build and test with Rake
21
+ env:
22
+ LD_LIBRARY_PATH: '/usr/lib/x86_64-linux-gnu/'
20
23
  run: |
21
- gem install bundler
24
+ gem install --no-document bundler
22
25
  bundle install --jobs 4 --retry 3
23
26
  bundle exec rake
@@ -0,0 +1,28 @@
1
+ name: coverage
2
+
3
+ on:
4
+ push:
5
+ branches: [ main ]
6
+ pull_request:
7
+ branches: [ main ]
8
+
9
+ jobs:
10
+ coverage:
11
+ runs-on: ubuntu-20.04
12
+ steps:
13
+ - uses: actions/checkout@v2
14
+ - name: Install BLAS and LAPACK
15
+ run: sudo apt-get install -y libopenblas-dev liblapacke-dev
16
+ - name: Set up Ruby 2.7
17
+ uses: actions/setup-ruby@v1
18
+ with:
19
+ ruby-version: '2.7'
20
+ - name: Build and test with Rake
21
+ run: |
22
+ gem install bundler
23
+ bundle install
24
+ bundle exec rake
25
+ - name: Coveralls GitHub Action
26
+ uses: coverallsapp/github-action@v1.1.2
27
+ with:
28
+ github-token: ${{ secrets.GITHUB_TOKEN }}
data/.gitignore CHANGED
@@ -16,6 +16,7 @@
16
16
  tags
17
17
  .DS_Store
18
18
  .ruby-version
19
+ iterate.dat
19
20
  /spec/dump_dbl.t
20
21
  /spec/dump_int.t
21
22
  /spec/dump_mult_dbl.t
data/.rubocop.yml CHANGED
@@ -1,5 +1,6 @@
1
1
  require:
2
2
  - rubocop-performance
3
+ - rubocop-rake
3
4
  - rubocop-rspec
4
5
 
5
6
  AllCops:
data/CHANGELOG.md CHANGED
@@ -1,3 +1,38 @@
1
+ # 0.22.5
2
+ - Add transformer class for calculating kernel matrix.
3
+ - [KernelCalculator](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/KernelCalculator.html)
4
+ - Add classifier class based on Ridge regression.
5
+ - [KernelRidgeClassifier](https://yoshoku.github.io/rumale/doc/Rumale/KernelMachine/KernelRidgeClassifier.html)
6
+ - Add supported kernel functions to [Nystroem](https://yoshoku.github.io/rumale/doc/Rumale/KernelApproximation/Nystroem.html).
7
+ - Add parameter for specifying the number of features to [load_libsvm_file](https://yoshoku.github.io/rumale/doc/Rumale/Dataset.html#load_libsvm_file-class_method).
8
+
9
+ # 0.22.4
10
+ - Add classifier and regressor classes for voting ensemble method.
11
+ - [VotingClassifier](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/VotingClassifier.html)
12
+ - [VotingRegressor](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/VotingRegressor.html)
13
+ - Refactor some codes.
14
+ - Fix some typos on API documentation.
15
+
16
+ # 0.22.3
17
+ - Add regressor class for non-negative least square method.
18
+ - [NNLS](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/NNLS.html)
19
+ - Add lbfgs solver to [Ridge](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/Ridge.html) and [LinearRegression](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/LinearRegression.html).
20
+ - In version 0.23.0, these classes will be changed to attempt to optimize with 'svd' or 'lbfgs' solver if 'auto' is given to
21
+ the solver parameter. If you use 'sgd' solver, you need specify it explicitly.
22
+ - Add GC guard to native extension codes.
23
+ - Update API documentation.
24
+
25
+ # 0.22.2
26
+ - Add classifier and regressor classes for stacking method.
27
+ - [StackingClassifier](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/StackingClassifier.html)
28
+ - [StackingRegressor](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/StackingRegressor.html)
29
+ - Refactor some codes with Rubocop.
30
+
31
+ # 0.22.1
32
+ - Add transfomer class for [MLKR](https://yoshoku.github.io/rumale/doc/Rumale/MetricLearning/MLKR.html), that implements Metric Learning for Kernel Regression.
33
+ - Refactor NeighbourhoodComponentAnalysis.
34
+ - Update API documentation.
35
+
1
36
  # 0.22.0
2
37
  ## Breaking change
3
38
  - Add lbfgsb.rb gem to runtime dependencies. Rumale uses lbfgsb gem for optimization.
data/Gemfile CHANGED
@@ -6,10 +6,12 @@ gemspec
6
6
  gem 'mmh3', '>= 1.0'
7
7
  gem 'numo-linalg', '>= 0.1.4'
8
8
  gem 'parallel', '>= 1.17.0'
9
- gem 'rake', '~> 12.0'
9
+ gem 'rake', '~> 13.0'
10
10
  gem 'rake-compiler', '~> 1.0'
11
11
  gem 'rspec', '~> 3.0'
12
- gem 'rubocop', '~> 0.91'
12
+ gem 'rubocop', '~> 1.0'
13
13
  gem 'rubocop-performance', '~> 1.8'
14
- gem 'rubocop-rspec', '~> 1.43'
15
- gem 'simplecov', '~> 0.19'
14
+ gem 'rubocop-rake', '~> 0.5'
15
+ gem 'rubocop-rspec', '~> 2.0'
16
+ gem 'simplecov', '~> 0.21'
17
+ gem 'simplecov-lcov', '~> 0.8'
data/LICENSE.txt CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2017-2020 Atsushi Tatsuma
1
+ Copyright (c) 2017-2021 Atsushi Tatsuma
2
2
  All rights reserved.
3
3
 
4
4
  Redistribution and use in source and binary forms, with or without
data/README.md CHANGED
@@ -3,8 +3,9 @@
3
3
  ![Rumale](https://dl.dropboxusercontent.com/s/joxruk2720ur66o/rumale_header_400.png)
4
4
 
5
5
  [![Build Status](https://github.com/yoshoku/rumale/workflows/build/badge.svg)](https://github.com/yoshoku/rumale/actions?query=workflow%3Abuild)
6
+ [![Coverage Status](https://coveralls.io/repos/github/yoshoku/rumale/badge.svg?branch=main)](https://coveralls.io/github/yoshoku/rumale?branch=main)
6
7
  [![Gem Version](https://badge.fury.io/rb/rumale.svg)](https://badge.fury.io/rb/rumale)
7
- [![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/master/LICENSE.txt)
8
+ [![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/main/LICENSE.txt)
8
9
  [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/rumale/doc/)
9
10
 
10
11
  Rumale (**Ru**by **ma**chine **le**arning) is a machine learning library in Ruby.
@@ -113,10 +114,10 @@ require 'rumale'
113
114
  samples, labels = Rumale::Dataset.load_libsvm_file('pendigits')
114
115
 
115
116
  # Define the estimator to be evaluated.
116
- lr = Rumale::LinearModel::LogisticRegression.new(learning_rate: 0.00001, reg_param: 0.0001, random_seed: 1)
117
+ lr = Rumale::LinearModel::LogisticRegression.new
117
118
 
118
119
  # Define the evaluation measure, splitting strategy, and cross validation.
119
- ev = Rumale::EvaluationMeasure::LogLoss.new
120
+ ev = Rumale::EvaluationMeasure::Accuracy.new
120
121
  kf = Rumale::ModelSelection::StratifiedKFold.new(n_splits: 5, shuffle: true, random_seed: 1)
121
122
  cv = Rumale::ModelSelection::CrossValidation.new(estimator: lr, splitter: kf, evaluator: ev)
122
123
 
@@ -124,15 +125,15 @@ cv = Rumale::ModelSelection::CrossValidation.new(estimator: lr, splitter: kf, ev
124
125
  report = cv.perform(samples, labels)
125
126
 
126
127
  # Output result.
127
- mean_logloss = report[:test_score].inject(:+) / kf.n_splits
128
- puts("5-CV mean log-loss: %.3f" % mean_logloss)
128
+ mean_accuracy = report[:test_score].sum / kf.n_splits
129
+ puts "5-CV mean accuracy: %.1f%%" % (100.0 * mean_accuracy)
129
130
  ```
130
131
 
131
132
  Execution of the above scripts result in the following.
132
133
 
133
134
  ```bash
134
135
  $ ruby cross_validation.rb
135
- 5-CV mean log-loss: 0.355
136
+ 5-CV mean accuracy: 95.4%
136
137
  ```
137
138
 
138
139
  ### Example 3. Pipeline
@@ -143,10 +144,10 @@ require 'rumale'
143
144
  # Load dataset.
144
145
  samples, labels = Rumale::Dataset.load_libsvm_file('pendigits')
145
146
 
146
- # Construct pipeline with kernel approximation and SVC.
147
- rbf = Rumale::KernelApproximation::RBF.new(gamma: 0.0001, n_components: 800, random_seed: 1)
148
- svc = Rumale::LinearModel::SVC.new(reg_param: 0.0001, random_seed: 1)
149
- pipeline = Rumale::Pipeline::Pipeline.new(steps: { trns: rbf, clsf: svc })
147
+ # Construct pipeline with kernel approximation and LogisticRegression.
148
+ rbf = Rumale::KernelApproximation::RBF.new(gamma: 1e-4, n_components: 800, random_seed: 1)
149
+ lr = Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-3)
150
+ pipeline = Rumale::Pipeline::Pipeline.new(steps: { trns: rbf, clsf: lr })
150
151
 
151
152
  # Define the splitting strategy and cross validation.
152
153
  kf = Rumale::ModelSelection::StratifiedKFold.new(n_splits: 5, shuffle: true, random_seed: 1)
@@ -156,7 +157,7 @@ cv = Rumale::ModelSelection::CrossValidation.new(estimator: pipeline, splitter:
156
157
  report = cv.perform(samples, labels)
157
158
 
158
159
  # Output result.
159
- mean_accuracy = report[:test_score].inject(:+) / kf.n_splits
160
+ mean_accuracy = report[:test_score].sum / kf.n_splits
160
161
  puts("5-CV mean accuracy: %.1f %%" % (mean_accuracy * 100.0))
161
162
  ```
162
163
 
@@ -176,7 +177,7 @@ For example, using the [OpenBLAS](https://github.com/xianyi/OpenBLAS) speeds up
176
177
 
177
178
  Install OpenBLAS library.
178
179
 
179
- Mac:
180
+ macOS:
180
181
 
181
182
  ```bash
182
183
  $ brew install openblas
@@ -185,12 +186,13 @@ $ brew install openblas
185
186
  Ubuntu:
186
187
 
187
188
  ```bash
188
- $ sudo apt-get install gcc gfortran
189
- $ wget https://github.com/xianyi/OpenBLAS/archive/v0.3.5.tar.gz
190
- $ tar xzf v0.3.5.tar.gz
191
- $ cd OpenBLAS-0.3.5
192
- $ make USE_OPENMP=1
193
- $ sudo make PREFIX=/usr/local install
189
+ $ sudo apt-get install libopenblas-dev liblapacke-dev
190
+ ```
191
+
192
+ Windows (MSYS2):
193
+
194
+ ```bash
195
+ $ pacman -S mingw-w64-x86_64-ruby mingw-w64-x86_64-openblas mingw-w64-x86_64-lapack
194
196
  ```
195
197
 
196
198
  Install Numo::Linalg gem.
@@ -206,6 +208,37 @@ require 'numo/linalg/autoloader'
206
208
  require 'rumale'
207
209
  ```
208
210
 
211
+ ### Numo::OpenBLAS
212
+ [Numo::OpenBLAS](https://github.com/yoshoku/numo-openblas) downloads and builds OpenBLAS during installation
213
+ and uses that as a background library for Numo::Linalg.
214
+
215
+ Install compilers for building OpenBLAS.
216
+
217
+ macOS:
218
+
219
+ ```bash
220
+ $ brew install gcc gfortran make
221
+ ```
222
+
223
+ Ubuntu:
224
+
225
+ ```bash
226
+ $ sudo apt-get install gcc gfortran make
227
+ ```
228
+
229
+ Install Numo::OpenBLAS gem.
230
+
231
+ ```bash
232
+ $ gem install numo-openblas
233
+ ```
234
+
235
+ Load Numo::OpenBLAS gem instead of Numo::Linalg.
236
+
237
+ ```ruby
238
+ require 'numo/openblas'
239
+ require 'rumale'
240
+ ```
241
+
209
242
  ### Parallel
210
243
  Several estimators in Rumale support parallel processing.
211
244
  Parallel processing in Rumale is realized by [Parallel](https://github.com/grosser/parallel) gem,
@@ -227,6 +260,10 @@ When -1 is given to n_jobs parameter, all processors are used.
227
260
  estimator = Rumale::Ensemble::RandomForestClassifier.new(n_jobs: -1, random_seed: 1)
228
261
  ```
229
262
 
263
+ ## Related Projects
264
+ - [Rumale::SVM](https://github.com/yoshoku/rumale-svm) provides support vector machine algorithms in LIBSVM and LIBLINEAR with Rumale interface.
265
+ - [Rumale::Torch](https://github.com/yoshoku/rumale-torch) provides the learning and inference by the neural network defined in torch.rb with Rumale interface.
266
+
230
267
  ## Novelties
231
268
 
232
269
  * [Rumale SHOP](https://suzuri.jp/yoshoku)
@@ -244,4 +281,4 @@ The gem is available as open source under the terms of the [BSD 2-clause License
244
281
  ## Code of Conduct
245
282
 
246
283
  Everyone interacting in the Rumale project’s codebases, issue trackers,
247
- chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/yoshoku/Rumale/blob/master/CODE_OF_CONDUCT.md).
284
+ chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/yoshoku/Rumale/blob/main/CODE_OF_CONDUCT.md).
data/ext/rumale/tree.c CHANGED
@@ -5,9 +5,8 @@ RUBY_EXTERN VALUE mRumale;
5
5
  double*
6
6
  alloc_dbl_array(const long n_dimensions)
7
7
  {
8
- long i;
9
8
  double* arr = ALLOC_N(double, n_dimensions);
10
- for (i = 0; i < n_dimensions; i++) { arr[i] = 0.0; }
9
+ memset(arr, 0, n_dimensions * sizeof(double));
11
10
  return arr;
12
11
  }
13
12
 
@@ -257,10 +256,13 @@ find_split_params_cls(VALUE self, VALUE criterion, VALUE impurity, VALUE order,
257
256
  split_opts_cls opts = { StringValuePtr(criterion), NUM2LONG(n_classes), NUM2DBL(impurity) };
258
257
  VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, labels);
259
258
  VALUE results = rb_ary_new2(4);
260
- rb_ary_store(results, 0, DBL2NUM(((double*)na_get_pointer_for_read(params))[0]));
261
- rb_ary_store(results, 1, DBL2NUM(((double*)na_get_pointer_for_read(params))[1]));
262
- rb_ary_store(results, 2, DBL2NUM(((double*)na_get_pointer_for_read(params))[2]));
263
- rb_ary_store(results, 3, DBL2NUM(((double*)na_get_pointer_for_read(params))[3]));
259
+ double* params_ptr = (double*)na_get_pointer_for_read(params);
260
+ rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
261
+ rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
262
+ rb_ary_store(results, 2, DBL2NUM(params_ptr[2]));
263
+ rb_ary_store(results, 3, DBL2NUM(params_ptr[3]));
264
+ RB_GC_GUARD(params);
265
+ RB_GC_GUARD(criterion);
264
266
  return results;
265
267
  }
266
268
 
@@ -375,10 +377,13 @@ find_split_params_reg(VALUE self, VALUE criterion, VALUE impurity, VALUE order,
375
377
  split_opts_reg opts = { StringValuePtr(criterion), NUM2DBL(impurity) };
376
378
  VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, targets);
377
379
  VALUE results = rb_ary_new2(4);
378
- rb_ary_store(results, 0, DBL2NUM(((double*)na_get_pointer_for_read(params))[0]));
379
- rb_ary_store(results, 1, DBL2NUM(((double*)na_get_pointer_for_read(params))[1]));
380
- rb_ary_store(results, 2, DBL2NUM(((double*)na_get_pointer_for_read(params))[2]));
381
- rb_ary_store(results, 3, DBL2NUM(((double*)na_get_pointer_for_read(params))[3]));
380
+ double* params_ptr = (double*)na_get_pointer_for_read(params);
381
+ rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
382
+ rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
383
+ rb_ary_store(results, 2, DBL2NUM(params_ptr[2]));
384
+ rb_ary_store(results, 3, DBL2NUM(params_ptr[3]));
385
+ RB_GC_GUARD(params);
386
+ RB_GC_GUARD(criterion);
382
387
  return results;
383
388
  }
384
389
 
@@ -464,8 +469,10 @@ find_split_params_grad_reg
464
469
  double opts[3] = { NUM2DBL(sum_gradient), NUM2DBL(sum_hessian), NUM2DBL(reg_lambda) };
465
470
  VALUE params = na_ndloop3(&ndf, opts, 4, order, features, gradients, hessians);
466
471
  VALUE results = rb_ary_new2(2);
467
- rb_ary_store(results, 0, DBL2NUM(((double*)na_get_pointer_for_read(params))[0]));
468
- rb_ary_store(results, 1, DBL2NUM(((double*)na_get_pointer_for_read(params))[1]));
472
+ double* params_ptr = (double*)na_get_pointer_for_read(params);
473
+ rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
474
+ rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
475
+ RB_GC_GUARD(params);
469
476
  return results;
470
477
  }
471
478
 
@@ -497,6 +504,9 @@ node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_,
497
504
 
498
505
  xfree(histogram);
499
506
 
507
+ RB_GC_GUARD(y_nary);
508
+ RB_GC_GUARD(criterion);
509
+
500
510
  return ret;
501
511
  }
502
512
 
@@ -531,6 +541,8 @@ node_impurity_reg(VALUE self, VALUE criterion, VALUE y)
531
541
 
532
542
  xfree(sum_vec);
533
543
 
544
+ RB_GC_GUARD(criterion);
545
+
534
546
  return ret;
535
547
  }
536
548
 
data/lib/rumale.rb CHANGED
@@ -30,10 +30,12 @@ require 'rumale/linear_model/linear_regression'
30
30
  require 'rumale/linear_model/ridge'
31
31
  require 'rumale/linear_model/lasso'
32
32
  require 'rumale/linear_model/elastic_net'
33
+ require 'rumale/linear_model/nnls'
33
34
  require 'rumale/kernel_machine/kernel_svc'
34
35
  require 'rumale/kernel_machine/kernel_pca'
35
36
  require 'rumale/kernel_machine/kernel_fda'
36
37
  require 'rumale/kernel_machine/kernel_ridge'
38
+ require 'rumale/kernel_machine/kernel_ridge_classifier'
37
39
  require 'rumale/multiclass/one_vs_rest_classifier'
38
40
  require 'rumale/nearest_neighbors/vp_tree'
39
41
  require 'rumale/nearest_neighbors/k_neighbors_classifier'
@@ -59,6 +61,10 @@ require 'rumale/ensemble/random_forest_classifier'
59
61
  require 'rumale/ensemble/random_forest_regressor'
60
62
  require 'rumale/ensemble/extra_trees_classifier'
61
63
  require 'rumale/ensemble/extra_trees_regressor'
64
+ require 'rumale/ensemble/stacking_classifier'
65
+ require 'rumale/ensemble/stacking_regressor'
66
+ require 'rumale/ensemble/voting_classifier'
67
+ require 'rumale/ensemble/voting_regressor'
62
68
  require 'rumale/clustering/k_means'
63
69
  require 'rumale/clustering/mini_batch_k_means'
64
70
  require 'rumale/clustering/k_medoids'
@@ -77,6 +83,7 @@ require 'rumale/manifold/tsne'
77
83
  require 'rumale/manifold/mds'
78
84
  require 'rumale/metric_learning/fisher_discriminant_analysis'
79
85
  require 'rumale/metric_learning/neighbourhood_component_analysis'
86
+ require 'rumale/metric_learning/mlkr'
80
87
  require 'rumale/neural_network/adam'
81
88
  require 'rumale/neural_network/base_mlp'
82
89
  require 'rumale/neural_network/mlp_regressor'
@@ -97,6 +104,7 @@ require 'rumale/preprocessing/one_hot_encoder'
97
104
  require 'rumale/preprocessing/ordinal_encoder'
98
105
  require 'rumale/preprocessing/binarizer'
99
106
  require 'rumale/preprocessing/polynomial_features'
107
+ require 'rumale/preprocessing/kernel_calculator'
100
108
  require 'rumale/model_selection/k_fold'
101
109
  require 'rumale/model_selection/group_k_fold'
102
110
  require 'rumale/model_selection/stratified_k_fold'
@@ -11,13 +11,15 @@ module Rumale
11
11
 
12
12
  private
13
13
 
14
- def enable_linalg?
14
+ def enable_linalg?(warning: true)
15
15
  if defined?(Numo::Linalg).nil?
16
- warn('If you want to use features that depend on Numo::Linalg, you should install and load Numo::Linalg in advance.')
16
+ warn('If you want to use features that depend on Numo::Linalg, you should install and load Numo::Linalg in advance.') if warning
17
17
  return false
18
18
  end
19
19
  if Numo::Linalg::VERSION < '0.1.4'
20
- warn('The loaded Numo::Linalg does not implement the methods required by Rumale. Please load Numo::Linalg version 0.1.4 or later.')
20
+ if warning
21
+ warn('The loaded Numo::Linalg does not implement the methods required by Rumale. Please load Numo::Linalg version 0.1.4 or later.')
22
+ end
21
23
  return false
22
24
  end
23
25
  true
@@ -12,22 +12,26 @@ module Rumale
12
12
  # Load a dataset with the libsvm file format into Numo::NArray.
13
13
  #
14
14
  # @param filename [String] A path to a dataset file.
15
+ # @param n_features [Integer/Nil] The number of features of data to load.
16
+ # If nil is given, it will be detected automatically from given file.
15
17
  # @param zero_based [Boolean] Whether the column index starts from 0 (true) or 1 (false).
16
18
  # @param dtype [Numo::NArray] Data type of Numo::NArray for features to be loaded.
17
19
  #
18
20
  # @return [Array<Numo::NArray>]
19
21
  # Returns array containing the (n_samples x n_features) matrix for feature vectors
20
22
  # and (n_samples) vector for labels or target values.
21
- def load_libsvm_file(filename, zero_based: false, dtype: Numo::DFloat)
23
+ def load_libsvm_file(filename, n_features: nil, zero_based: false, dtype: Numo::DFloat)
22
24
  ftvecs = []
23
25
  labels = []
24
- n_features = 0
26
+ n_features_detected = 0
25
27
  CSV.foreach(filename, col_sep: "\s", headers: false) do |line|
26
28
  label, ftvec, max_idx = parse_libsvm_line(line, zero_based)
27
29
  labels.push(label)
28
30
  ftvecs.push(ftvec)
29
- n_features = max_idx if n_features < max_idx
31
+ n_features_detected = max_idx if n_features_detected < max_idx
30
32
  end
33
+ n_features ||= n_features_detected
34
+ n_features = [n_features, n_features_detected].max
31
35
  [convert_to_matrix(ftvecs, n_features, dtype), Numo::NArray.asarray(labels)]
32
36
  end
33
37