RubyGems - rumale - Versions diffs - 0.22.0 → 0.22.5 - Mend

rumale 0.22.0 → 0.22.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

checksums.yaml +4 -4
data/.coveralls.yml +1 -0
data/.github/workflows/build.yml +6 -3
data/.github/workflows/coverage.yml +28 -0
data/.gitignore +1 -0
data/.rubocop.yml +1 -0
data/CHANGELOG.md +35 -0
data/Gemfile +6 -4
data/LICENSE.txt +1 -1
data/README.md +56 -19
data/ext/rumale/tree.c +24 -12
data/lib/rumale.rb +8 -0
data/lib/rumale/base/base_estimator.rb +5 -3
data/lib/rumale/dataset.rb +7 -3
data/lib/rumale/decomposition/pca.rb +1 -1
data/lib/rumale/ensemble/stacking_classifier.rb +215 -0
data/lib/rumale/ensemble/stacking_regressor.rb +163 -0
data/lib/rumale/ensemble/voting_classifier.rb +126 -0
data/lib/rumale/ensemble/voting_regressor.rb +82 -0
data/lib/rumale/feature_extraction/feature_hasher.rb +1 -1
data/lib/rumale/feature_extraction/hash_vectorizer.rb +1 -1
data/lib/rumale/kernel_approximation/nystroem.rb +29 -9
data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +92 -0
data/lib/rumale/kernel_machine/kernel_svc.rb +4 -3
data/lib/rumale/linear_model/elastic_net.rb +1 -1
data/lib/rumale/linear_model/lasso.rb +1 -1
data/lib/rumale/linear_model/linear_regression.rb +63 -34
data/lib/rumale/linear_model/logistic_regression.rb +1 -1
data/lib/rumale/linear_model/nnls.rb +137 -0
data/lib/rumale/linear_model/ridge.rb +70 -33
data/lib/rumale/linear_model/svc.rb +4 -3
data/lib/rumale/linear_model/svr.rb +4 -3
data/lib/rumale/metric_learning/mlkr.rb +161 -0
data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +7 -4
data/lib/rumale/pairwise_metric.rb +1 -1
data/lib/rumale/preprocessing/kernel_calculator.rb +92 -0
data/lib/rumale/validation.rb +13 -1
data/lib/rumale/version.rb +1 -1
data/rumale.gemspec +1 -1
metadata +14 -4

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 4e2f68b3182ada73537901e7bc74bddd100aff75264f9147c88d8240fb624e29
-  data.tar.gz: e2639a55fc84d1399b925f65b3a56b38f2ae3150dd15ab8556120af28d408cae
+  metadata.gz: '058078489d3ff66d67432e1418ae786292c263e05e75b6703fb5a7e65e88bd46'
+  data.tar.gz: bd7ed9b223e0cd0074ffdd3e521b01c195f82909013c93f4736ab338d5920c96
 SHA512:
-  metadata.gz: 91ffcbade578bbb9c6a5d87a54ebd89a2b5990eb70835e7a5549afe78541dbfeafe3af50833725bee751fa89c059484970e5add7ebf8adee3e25bc000fbe3778
-  data.tar.gz: 2ee2b1448a486581ef98561f65bc3446b2e161c89a3a12bd6cd78867350e26151bc0b350bd431902d21f6979493ab2d01a6ee81b55c1099f631aa84c84a704e6
+  metadata.gz: 79ce4715a503b1b5a618526832adad5912daac72af3e8f1892ff2df14b7695e546419d6f85e5ea735abd2ea06da649a763f96c82b95eee75341934fa65fce93e
+  data.tar.gz: 5948583ec6c5ca10b320e09c447f9cc1e244dd3bfbf95800338dba2eb7e1b46a342d44020fa24c26a7161786feb63c82b0677654e4aa087c311337a606880a22

data/.coveralls.yml ADDED Viewed

	@@ -0,0 +1 @@
1	+ service_name: github-ci

data/.github/workflows/build.yml CHANGED Viewed

@@ -1,13 +1,14 @@
 name: build
-on: [push]
+on: [push, pull_request]
 jobs:
   build:
     runs-on: ubuntu-latest
     strategy:
+      fail-fast: false
       matrix:
-        ruby: [ '2.5', '2.6', '2.7' ]
+        ruby: [ '2.5', '2.6', '2.7', '3.0' ]
     steps:
       - uses: actions/checkout@v2
       - name: Install BLAS and LAPACK
@@ -17,7 +18,9 @@ jobs:
         with:
           ruby-version: ${{ matrix.ruby }}
       - name: Build and test with Rake
+        env:
+          LD_LIBRARY_PATH: '/usr/lib/x86_64-linux-gnu/'
         run: |
-          gem install bundler
+          gem install --no-document bundler
           bundle install --jobs 4 --retry 3
           bundle exec rake

data/.github/workflows/coverage.yml ADDED Viewed

@@ -0,0 +1,28 @@
+name: coverage
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+jobs:
+  coverage:
+    runs-on: ubuntu-20.04
+    steps:
+      - uses: actions/checkout@v2
+      - name: Install BLAS and LAPACK
+        run: sudo apt-get install -y libopenblas-dev liblapacke-dev
+      - name: Set up Ruby 2.7
+        uses: actions/setup-ruby@v1
+        with:
+          ruby-version: '2.7'
+      - name: Build and test with Rake
+        run: |
+          gem install bundler
+          bundle install
+          bundle exec rake
+      - name: Coveralls GitHub Action
+        uses: coverallsapp/github-action@v1.1.2
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}

data/.gitignore CHANGED Viewed

@@ -16,6 +16,7 @@
 tags
 .DS_Store
 .ruby-version
+iterate.dat
 /spec/dump_dbl.t
 /spec/dump_int.t
 /spec/dump_mult_dbl.t

data/.rubocop.yml CHANGED Viewed

@@ -1,5 +1,6 @@
 require:
   - rubocop-performance
+  - rubocop-rake
   - rubocop-rspec
 AllCops:

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,38 @@
+# 0.22.5
+- Add transformer class for calculating kernel matrix.
+  - [KernelCalculator](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/KernelCalculator.html)
+- Add classifier class based on Ridge regression.
+  - [KernelRidgeClassifier](https://yoshoku.github.io/rumale/doc/Rumale/KernelMachine/KernelRidgeClassifier.html)
+- Add supported kernel functions to [Nystroem](https://yoshoku.github.io/rumale/doc/Rumale/KernelApproximation/Nystroem.html).
+- Add parameter for specifying the number of features to [load_libsvm_file](https://yoshoku.github.io/rumale/doc/Rumale/Dataset.html#load_libsvm_file-class_method).
+# 0.22.4
+- Add classifier and regressor classes for voting ensemble method.
+  - [VotingClassifier](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/VotingClassifier.html)
+  - [VotingRegressor](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/VotingRegressor.html)
+- Refactor some codes.
+- Fix some typos on API documentation.
+# 0.22.3
+- Add regressor class for non-negative least square method.
+  - [NNLS](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/NNLS.html)
+- Add lbfgs solver to [Ridge](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/Ridge.html) and [LinearRegression](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/LinearRegression.html).
+  - In version 0.23.0, these classes will be changed to attempt to optimize with 'svd' or 'lbfgs' solver if 'auto' is given to
+  the solver parameter. If you use 'sgd' solver, you need specify it explicitly.
+- Add GC guard to native extension codes.
+- Update API documentation.
+# 0.22.2
+- Add classifier and regressor classes for stacking method.
+  - [StackingClassifier](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/StackingClassifier.html)
+  - [StackingRegressor](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/StackingRegressor.html)
+- Refactor some codes with Rubocop.
+# 0.22.1
+- Add transfomer class for [MLKR](https://yoshoku.github.io/rumale/doc/Rumale/MetricLearning/MLKR.html), that implements Metric Learning for Kernel Regression.
+- Refactor NeighbourhoodComponentAnalysis.
+- Update API documentation.
 # 0.22.0
 ## Breaking change
 - Add lbfgsb.rb gem to runtime dependencies. Rumale uses lbfgsb gem for optimization.

data/Gemfile CHANGED Viewed

@@ -6,10 +6,12 @@ gemspec
 gem 'mmh3', '>= 1.0'
 gem 'numo-linalg', '>= 0.1.4'
 gem 'parallel', '>= 1.17.0'
-gem 'rake', '~> 12.0'
+gem 'rake', '~> 13.0'
 gem 'rake-compiler', '~> 1.0'
 gem 'rspec', '~> 3.0'
-gem 'rubocop', '~> 0.91'
+gem 'rubocop', '~> 1.0'
 gem 'rubocop-performance', '~> 1.8'
-gem 'rubocop-rspec', '~> 1.43'
-gem 'simplecov', '~> 0.19'
+gem 'rubocop-rake', '~> 0.5'
+gem 'rubocop-rspec', '~> 2.0'
+gem 'simplecov', '~> 0.21'
+gem 'simplecov-lcov', '~> 0.8'

data/LICENSE.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-Copyright (c) 2017-2020 Atsushi Tatsuma
+Copyright (c) 2017-2021 Atsushi Tatsuma
 All rights reserved.
 Redistribution and use in source and binary forms, with or without

data/README.md CHANGED Viewed

@@ -3,8 +3,9 @@
 ![Rumale](https://dl.dropboxusercontent.com/s/joxruk2720ur66o/rumale_header_400.png)
 [![Build Status](https://github.com/yoshoku/rumale/workflows/build/badge.svg)](https://github.com/yoshoku/rumale/actions?query=workflow%3Abuild)
+[![Coverage Status](https://coveralls.io/repos/github/yoshoku/rumale/badge.svg?branch=main)](https://coveralls.io/github/yoshoku/rumale?branch=main)
 [![Gem Version](https://badge.fury.io/rb/rumale.svg)](https://badge.fury.io/rb/rumale)
-[![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/master/LICENSE.txt)
+[![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/main/LICENSE.txt)
 [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/rumale/doc/)
 Rumale (**Ru**by **ma**chine **le**arning) is a machine learning library in Ruby.
@@ -113,10 +114,10 @@ require 'rumale'
 samples, labels = Rumale::Dataset.load_libsvm_file('pendigits')
 # Define the estimator to be evaluated.
-lr = Rumale::LinearModel::LogisticRegression.new(learning_rate: 0.00001, reg_param: 0.0001, random_seed: 1)
+lr = Rumale::LinearModel::LogisticRegression.new
 # Define the evaluation measure, splitting strategy, and cross validation.
-ev = Rumale::EvaluationMeasure::LogLoss.new
+ev = Rumale::EvaluationMeasure::Accuracy.new
 kf = Rumale::ModelSelection::StratifiedKFold.new(n_splits: 5, shuffle: true, random_seed: 1)
 cv = Rumale::ModelSelection::CrossValidation.new(estimator: lr, splitter: kf, evaluator: ev)
@@ -124,15 +125,15 @@ cv = Rumale::ModelSelection::CrossValidation.new(estimator: lr, splitter: kf, ev
 report = cv.perform(samples, labels)
 # Output result.
-mean_logloss = report[:test_score].inject(:+) / kf.n_splits
-puts("5-CV mean log-loss: %.3f" % mean_logloss)
+mean_accuracy = report[:test_score].sum / kf.n_splits
+puts "5-CV mean accuracy: %.1f%%" % (100.0 * mean_accuracy)
 ```
 Execution of the above scripts result in the following.
 ```bash
 $ ruby cross_validation.rb
-5-CV mean log-loss: 0.355
+5-CV mean accuracy: 95.4%
 ```
 ### Example 3. Pipeline
@@ -143,10 +144,10 @@ require 'rumale'
 # Load dataset.
 samples, labels = Rumale::Dataset.load_libsvm_file('pendigits')
-# Construct pipeline with kernel approximation and SVC.
-rbf = Rumale::KernelApproximation::RBF.new(gamma: 0.0001, n_components: 800, random_seed: 1)
-svc = Rumale::LinearModel::SVC.new(reg_param: 0.0001, random_seed: 1)
-pipeline = Rumale::Pipeline::Pipeline.new(steps: { trns: rbf, clsf: svc })
+# Construct pipeline with kernel approximation and LogisticRegression.
+rbf = Rumale::KernelApproximation::RBF.new(gamma: 1e-4, n_components: 800, random_seed: 1)
+lr = Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-3)
+pipeline = Rumale::Pipeline::Pipeline.new(steps: { trns: rbf, clsf: lr })
 # Define the splitting strategy and cross validation.
 kf = Rumale::ModelSelection::StratifiedKFold.new(n_splits: 5, shuffle: true, random_seed: 1)
@@ -156,7 +157,7 @@ cv = Rumale::ModelSelection::CrossValidation.new(estimator: pipeline, splitter:
 report = cv.perform(samples, labels)
 # Output result.
-mean_accuracy = report[:test_score].inject(:+) / kf.n_splits
+mean_accuracy = report[:test_score].sum / kf.n_splits
 puts("5-CV mean accuracy: %.1f %%" % (mean_accuracy * 100.0))
 ```
@@ -176,7 +177,7 @@ For example, using the [OpenBLAS](https://github.com/xianyi/OpenBLAS) speeds up
 Install OpenBLAS library.
-Mac:
+macOS:
 ```bash
 $ brew install openblas
@@ -185,12 +186,13 @@ $ brew install openblas
 Ubuntu:
 ```bash
-$ sudo apt-get install gcc gfortran
-$ wget https://github.com/xianyi/OpenBLAS/archive/v0.3.5.tar.gz
-$ tar xzf v0.3.5.tar.gz
-$ cd OpenBLAS-0.3.5
-$ make USE_OPENMP=1
-$ sudo make PREFIX=/usr/local install
+$ sudo apt-get install libopenblas-dev liblapacke-dev
+```
+Windows (MSYS2):
+```bash
+$ pacman -S mingw-w64-x86_64-ruby mingw-w64-x86_64-openblas mingw-w64-x86_64-lapack
 ```
 Install Numo::Linalg gem.
@@ -206,6 +208,37 @@ require 'numo/linalg/autoloader'
 require 'rumale'
 ```
+### Numo::OpenBLAS
+[Numo::OpenBLAS](https://github.com/yoshoku/numo-openblas) downloads and builds OpenBLAS during installation
+and uses that as a background library for Numo::Linalg.
+Install compilers for building OpenBLAS.
+macOS:
+```bash
+$ brew install gcc gfortran make
+```
+Ubuntu:
+```bash
+$ sudo apt-get install gcc gfortran make
+```
+Install Numo::OpenBLAS gem.
+```bash
+$ gem install numo-openblas
+```
+Load Numo::OpenBLAS gem instead of Numo::Linalg.
+```ruby
+require 'numo/openblas'
+require 'rumale'
+```
 ### Parallel
 Several estimators in Rumale support parallel processing.
 Parallel processing in Rumale is realized by [Parallel](https://github.com/grosser/parallel) gem,
@@ -227,6 +260,10 @@ When -1 is given to n_jobs parameter, all processors are used.
 estimator = Rumale::Ensemble::RandomForestClassifier.new(n_jobs: -1, random_seed: 1)
 ```
+## Related Projects
+- [Rumale::SVM](https://github.com/yoshoku/rumale-svm) provides support vector machine algorithms in LIBSVM and LIBLINEAR with Rumale interface.
+- [Rumale::Torch](https://github.com/yoshoku/rumale-torch) provides the learning and inference by the neural network defined in torch.rb with Rumale interface.
 ## Novelties
 * [Rumale SHOP](https://suzuri.jp/yoshoku)
@@ -244,4 +281,4 @@ The gem is available as open source under the terms of the [BSD 2-clause License
 ## Code of Conduct
 Everyone interacting in the Rumale project’s codebases, issue trackers,
-chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/yoshoku/Rumale/blob/master/CODE_OF_CONDUCT.md).
+chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/yoshoku/Rumale/blob/main/CODE_OF_CONDUCT.md).

data/ext/rumale/tree.c CHANGED Viewed

@@ -5,9 +5,8 @@ RUBY_EXTERN VALUE mRumale;
 double*
 alloc_dbl_array(const long n_dimensions)
 {
-  long i;
   double* arr = ALLOC_N(double, n_dimensions);
-  for (i = 0; i < n_dimensions; i++) { arr[i] = 0.0; }
+  memset(arr, 0, n_dimensions * sizeof(double));
   return arr;
 }
@@ -257,10 +256,13 @@ find_split_params_cls(VALUE self, VALUE criterion, VALUE impurity, VALUE order,
   split_opts_cls opts = { StringValuePtr(criterion), NUM2LONG(n_classes), NUM2DBL(impurity) };
   VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, labels);
   VALUE results = rb_ary_new2(4);
-  rb_ary_store(results, 0, DBL2NUM(((double*)na_get_pointer_for_read(params))[0]));
-  rb_ary_store(results, 1, DBL2NUM(((double*)na_get_pointer_for_read(params))[1]));
-  rb_ary_store(results, 2, DBL2NUM(((double*)na_get_pointer_for_read(params))[2]));
-  rb_ary_store(results, 3, DBL2NUM(((double*)na_get_pointer_for_read(params))[3]));
+  double* params_ptr = (double*)na_get_pointer_for_read(params);
+  rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
+  rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
+  rb_ary_store(results, 2, DBL2NUM(params_ptr[2]));
+  rb_ary_store(results, 3, DBL2NUM(params_ptr[3]));
+  RB_GC_GUARD(params);
+  RB_GC_GUARD(criterion);
   return results;
 }
@@ -375,10 +377,13 @@ find_split_params_reg(VALUE self, VALUE criterion, VALUE impurity, VALUE order,
   split_opts_reg opts = { StringValuePtr(criterion), NUM2DBL(impurity) };
   VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, targets);
   VALUE results = rb_ary_new2(4);
-  rb_ary_store(results, 0, DBL2NUM(((double*)na_get_pointer_for_read(params))[0]));
-  rb_ary_store(results, 1, DBL2NUM(((double*)na_get_pointer_for_read(params))[1]));
-  rb_ary_store(results, 2, DBL2NUM(((double*)na_get_pointer_for_read(params))[2]));
-  rb_ary_store(results, 3, DBL2NUM(((double*)na_get_pointer_for_read(params))[3]));
+  double* params_ptr = (double*)na_get_pointer_for_read(params);
+  rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
+  rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
+  rb_ary_store(results, 2, DBL2NUM(params_ptr[2]));
+  rb_ary_store(results, 3, DBL2NUM(params_ptr[3]));
+  RB_GC_GUARD(params);
+  RB_GC_GUARD(criterion);
   return results;
 }
@@ -464,8 +469,10 @@ find_split_params_grad_reg
   double opts[3] = { NUM2DBL(sum_gradient), NUM2DBL(sum_hessian), NUM2DBL(reg_lambda) };
   VALUE params = na_ndloop3(&ndf, opts, 4, order, features, gradients, hessians);
   VALUE results = rb_ary_new2(2);
-  rb_ary_store(results, 0, DBL2NUM(((double*)na_get_pointer_for_read(params))[0]));
-  rb_ary_store(results, 1, DBL2NUM(((double*)na_get_pointer_for_read(params))[1]));
+  double* params_ptr = (double*)na_get_pointer_for_read(params);
+  rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
+  rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
+  RB_GC_GUARD(params);
   return results;
 }
@@ -497,6 +504,9 @@ node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_,
   xfree(histogram);
+  RB_GC_GUARD(y_nary);
+  RB_GC_GUARD(criterion);
   return ret;
 }
@@ -531,6 +541,8 @@ node_impurity_reg(VALUE self, VALUE criterion, VALUE y)
   xfree(sum_vec);
+  RB_GC_GUARD(criterion);
   return ret;
 }

data/lib/rumale.rb CHANGED Viewed

@@ -30,10 +30,12 @@ require 'rumale/linear_model/linear_regression'
 require 'rumale/linear_model/ridge'
 require 'rumale/linear_model/lasso'
 require 'rumale/linear_model/elastic_net'
+require 'rumale/linear_model/nnls'
 require 'rumale/kernel_machine/kernel_svc'
 require 'rumale/kernel_machine/kernel_pca'
 require 'rumale/kernel_machine/kernel_fda'
 require 'rumale/kernel_machine/kernel_ridge'
+require 'rumale/kernel_machine/kernel_ridge_classifier'
 require 'rumale/multiclass/one_vs_rest_classifier'
 require 'rumale/nearest_neighbors/vp_tree'
 require 'rumale/nearest_neighbors/k_neighbors_classifier'
@@ -59,6 +61,10 @@ require 'rumale/ensemble/random_forest_classifier'
 require 'rumale/ensemble/random_forest_regressor'
 require 'rumale/ensemble/extra_trees_classifier'
 require 'rumale/ensemble/extra_trees_regressor'
+require 'rumale/ensemble/stacking_classifier'
+require 'rumale/ensemble/stacking_regressor'
+require 'rumale/ensemble/voting_classifier'
+require 'rumale/ensemble/voting_regressor'
 require 'rumale/clustering/k_means'
 require 'rumale/clustering/mini_batch_k_means'
 require 'rumale/clustering/k_medoids'
@@ -77,6 +83,7 @@ require 'rumale/manifold/tsne'
 require 'rumale/manifold/mds'
 require 'rumale/metric_learning/fisher_discriminant_analysis'
 require 'rumale/metric_learning/neighbourhood_component_analysis'
+require 'rumale/metric_learning/mlkr'
 require 'rumale/neural_network/adam'
 require 'rumale/neural_network/base_mlp'
 require 'rumale/neural_network/mlp_regressor'
@@ -97,6 +104,7 @@ require 'rumale/preprocessing/one_hot_encoder'
 require 'rumale/preprocessing/ordinal_encoder'
 require 'rumale/preprocessing/binarizer'
 require 'rumale/preprocessing/polynomial_features'
+require 'rumale/preprocessing/kernel_calculator'
 require 'rumale/model_selection/k_fold'
 require 'rumale/model_selection/group_k_fold'
 require 'rumale/model_selection/stratified_k_fold'

data/lib/rumale/base/base_estimator.rb CHANGED Viewed

@@ -11,13 +11,15 @@ module Rumale
       private
-      def enable_linalg?
+      def enable_linalg?(warning: true)
         if defined?(Numo::Linalg).nil?
-          warn('If you want to use features that depend on Numo::Linalg, you should install and load Numo::Linalg in advance.')
+          warn('If you want to use features that depend on Numo::Linalg, you should install and load Numo::Linalg in advance.') if warning
           return false
         end
         if Numo::Linalg::VERSION < '0.1.4'
-          warn('The loaded Numo::Linalg does not implement the methods required by Rumale. Please load Numo::Linalg version 0.1.4 or later.')
+          if warning
+            warn('The loaded Numo::Linalg does not implement the methods required by Rumale. Please load Numo::Linalg version 0.1.4 or later.')
+          end
           return false
         end
         true

data/lib/rumale/dataset.rb CHANGED Viewed

@@ -12,22 +12,26 @@ module Rumale
       # Load a dataset with the libsvm file format into Numo::NArray.
       #
       # @param filename [String] A path to a dataset file.
+      # @param n_features [Integer/Nil] The number of features of data to load.
+      #   If nil is given, it will be detected automatically from given file.
       # @param zero_based [Boolean] Whether the column index starts from 0 (true) or 1 (false).
       # @param dtype [Numo::NArray] Data type of Numo::NArray for features to be loaded.
       #
       # @return [Array<Numo::NArray>]
       #   Returns array containing the (n_samples x n_features) matrix for feature vectors
       #   and (n_samples) vector for labels or target values.
-      def load_libsvm_file(filename, zero_based: false, dtype: Numo::DFloat)
+      def load_libsvm_file(filename, n_features: nil, zero_based: false, dtype: Numo::DFloat)
         ftvecs = []
         labels = []
-        n_features = 0
+        n_features_detected = 0
         CSV.foreach(filename, col_sep: "\s", headers: false) do |line|
           label, ftvec, max_idx = parse_libsvm_line(line, zero_based)
           labels.push(label)
           ftvecs.push(ftvec)
-          n_features = max_idx if n_features < max_idx
+          n_features_detected = max_idx if n_features_detected < max_idx
         end
+        n_features ||= n_features_detected
+        n_features = [n_features, n_features_detected].max
         [convert_to_matrix(ftvecs, n_features, dtype), Numo::NArray.asarray(labels)]
       end