RubyGems - rumale - Versions diffs - 0.20.1 → 0.22.1 - Mend

rumale 0.20.1 → 0.22.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

checksums.yaml +4 -4
data/.github/workflows/build.yml +23 -0
data/.rubocop.yml +9 -0
data/CHANGELOG.md +24 -0
data/Gemfile +4 -2
data/README.md +15 -12
data/lib/rumale.rb +2 -0
data/lib/rumale/clustering/snn.rb +1 -1
data/lib/rumale/evaluation_measure/roc_auc.rb +3 -0
data/lib/rumale/kernel_machine/kernel_svc.rb +4 -3
data/lib/rumale/linear_model/base_sgd.rb +1 -1
data/lib/rumale/linear_model/elastic_net.rb +2 -2
data/lib/rumale/linear_model/lasso.rb +2 -2
data/lib/rumale/linear_model/linear_regression.rb +2 -2
data/lib/rumale/linear_model/logistic_regression.rb +123 -35
data/lib/rumale/linear_model/ridge.rb +2 -2
data/lib/rumale/linear_model/svc.rb +6 -5
data/lib/rumale/linear_model/svr.rb +6 -5
data/lib/rumale/metric_learning/mlkr.rb +161 -0
data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +19 -48
data/lib/rumale/model_selection/time_series_split.rb +91 -0
data/lib/rumale/pipeline/pipeline.rb +1 -1
data/lib/rumale/tree/base_decision_tree.rb +2 -9
data/lib/rumale/tree/gradient_tree_regressor.rb +3 -10
data/lib/rumale/version.rb +1 -1
data/rumale.gemspec +1 -0
metadata +20 -4
data/.coveralls.yml +0 -1

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 0f361026cd2922a2d36846a817eee855bf0c000156ed6c756bca29d2e42d67a2
-  data.tar.gz: 016fa40aa2546824cacbc32353263cbfc9427f0ceabb7e703f99854914bb9a2e
+  metadata.gz: 475798da3815141b5337bc7803eca26978bbc98c36a2be4d681bc63f778f5840
+  data.tar.gz: 71841127edccbeea2b30c4bd8a744735933c1fcf8c7d6afa507cd2d361c8b5c8
 SHA512:
-  metadata.gz: 7a53a958db7ec8b56236018505370b9908ae81a9afc9d7c8ff0b16d83971539c1ad729b5ab350eb49ae9b90ada43a8912ed2404a37eef97a4d34dad90b1d3e9f
-  data.tar.gz: 2f2b3d48625c7120464179bc7759c01ba7de85cb0d54720665eaf1e4822f24c1870474ebc24a47cff123e44a8626b0e0fac6a7e81216c057286071770ea5ba79
+  metadata.gz: 8c90eaffa3847e3cc4f31c58e3d74f66a86e2cf0bd1c6e5aa386f9519de3984ac1c605187119e0ec01585c82cf8c06e4f4aa2f19c7b40883b784e834e2e801d8
+  data.tar.gz: 336b1afcc35e52e1c13ced74f527d54c994ff66509cf9b8b2f81dce62692078964a453df790a5bcaf8ea2bd156277d719a33958de2f57aa64d97beeafdc48d01

data/.github/workflows/build.yml ADDED

@@ -0,0 +1,23 @@
+name: build
+on: [push]
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        ruby: [ '2.5', '2.6', '2.7' ]
+    steps:
+      - uses: actions/checkout@v2
+      - name: Install BLAS and LAPACK
+        run: sudo apt-get install -y libopenblas-dev liblapacke-dev
+      - name: Set up Ruby ${{ matrix.ruby }}
+        uses: actions/setup-ruby@v1
+        with:
+          ruby-version: ${{ matrix.ruby }}
+      - name: Build and test with Rake
+        run: |
+          gem install bundler
+          bundle install --jobs 4 --retry 3
+          bundle exec rake

data/.rubocop.yml CHANGED

@@ -20,6 +20,9 @@ Layout/LineLength:
   Max: 145
   IgnoredPatterns: ['(\A|\s)#']
+Lint/ConstantDefinitionInBlock:
+  Enabled: false
 Lint/MissingSuper:
   Enabled: false
@@ -70,6 +73,9 @@ Style/StringConcatenation:
 RSpec/MultipleExpectations:
   Enabled: false
+RSpec/MultipleMemoizedHelpers:
+  Max: 25
 RSpec/NestedGroups:
   Max: 4
@@ -81,3 +87,6 @@ RSpec/InstanceVariable:
 RSpec/LeakyConstantDeclaration:
   Enabled: false
+Performance/Sum:
+  Enabled: false

data/CHANGELOG.md CHANGED

@@ -1,3 +1,27 @@
+# 0.22.1
+- Add transfomer class for MLKR, that implements Metric Learning for Kernel Regression.
+- Refactor NeighbourhoodComponentAnalysis.
+- Update API documentation.
+# 0.22.0
+## Breaking change
+- Add lbfgsb.rb gem to runtime dependencies. Rumale uses lbfgsb gem for optimization.
+This eliminates the need to require the mopti gem when using [NeighbourhoodComponentAnalysis](https://yoshoku.github.io/rumale/doc/Rumale/MetricLearning/NeighbourhoodComponentAnalysis.html).
+- Add lbfgs solver to [LogisticRegression](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/LogisticRegression.html) and make it the default solver.
+# 0.21.0
+## Breaking change
+- Change the default value of max_iter argument on LinearModel estimators to 1000.
+# 0.20.3
+- Fix to use automatic solver of PCA in NeighbourhoodComponentAnalysis.
+- Refactor some codes with Rubocop.
+- Update README.
+# 0.20.2
+- Add cross-validator class for time-series data.
+  - [TimeSeriesSplit](https://yoshoku.github.io/rumale/doc/Rumale/ModelSelection/TimeSeriesSplit.html)
 # 0.20.1
 - Add cross-validator classes that split data according group labels.
   - [GroupKFold](https://yoshoku.github.io/rumale/doc/Rumale/ModelSelection/GroupKFold.html)

data/Gemfile CHANGED

@@ -3,11 +3,13 @@ source 'https://rubygems.org'
 # Specify your gem's dependencies in rumale.gemspec
 gemspec
-gem 'coveralls', '~> 0.8'
 gem 'mmh3', '>= 1.0'
-gem 'mopti', '>= 0.1.0'
 gem 'numo-linalg', '>= 0.1.4'
 gem 'parallel', '>= 1.17.0'
 gem 'rake', '~> 12.0'
 gem 'rake-compiler', '~> 1.0'
 gem 'rspec', '~> 3.0'
+gem 'rubocop', '~> 0.91'
+gem 'rubocop-performance', '~> 1.8'
+gem 'rubocop-rspec', '~> 1.43'
+gem 'simplecov', '~> 0.19'

data/README.md CHANGED

@@ -2,8 +2,7 @@
 ![Rumale](https://dl.dropboxusercontent.com/s/joxruk2720ur66o/rumale_header_400.png)
-[![Build Status](https://travis-ci.org/yoshoku/rumale.svg?branch=master)](https://travis-ci.org/yoshoku/rumale)
-[![Coverage Status](https://coveralls.io/repos/github/yoshoku/rumale/badge.svg?branch=master)](https://coveralls.io/github/yoshoku/rumale?branch=master)
+[![Build Status](https://github.com/yoshoku/rumale/workflows/build/badge.svg)](https://github.com/yoshoku/rumale/actions?query=workflow%3Abuild)
 [![Gem Version](https://badge.fury.io/rb/rumale.svg)](https://badge.fury.io/rb/rumale)
 [![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/master/LICENSE.txt)
 [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/rumale/doc/)
@@ -114,10 +113,10 @@ require 'rumale'
 samples, labels = Rumale::Dataset.load_libsvm_file('pendigits')
 # Define the estimator to be evaluated.
-lr = Rumale::LinearModel::LogisticRegression.new(learning_rate: 0.00001, reg_param: 0.0001, random_seed: 1)
+lr = Rumale::LinearModel::LogisticRegression.new
 # Define the evaluation measure, splitting strategy, and cross validation.
-ev = Rumale::EvaluationMeasure::LogLoss.new
+ev = Rumale::EvaluationMeasure::Accuracy.new
 kf = Rumale::ModelSelection::StratifiedKFold.new(n_splits: 5, shuffle: true, random_seed: 1)
 cv = Rumale::ModelSelection::CrossValidation.new(estimator: lr, splitter: kf, evaluator: ev)
@@ -125,15 +124,15 @@ cv = Rumale::ModelSelection::CrossValidation.new(estimator: lr, splitter: kf, ev
 report = cv.perform(samples, labels)
 # Output result.
-mean_logloss = report[:test_score].inject(:+) / kf.n_splits
-puts("5-CV mean log-loss: %.3f" % mean_logloss)
+mean_accuracy = report[:test_score].sum / kf.n_splits
+puts "5-CV mean accuracy: %.1f%%" % (100.0 * mean_accuracy)
 ```
 Execution of the above scripts result in the following.
 ```bash
 $ ruby cross_validation.rb
-5-CV mean log-loss: 0.355
+5-CV mean accuracy: 95.4%
 ```
 ### Example 3. Pipeline
@@ -144,10 +143,10 @@ require 'rumale'
 # Load dataset.
 samples, labels = Rumale::Dataset.load_libsvm_file('pendigits')
-# Construct pipeline with kernel approximation and SVC.
-rbf = Rumale::KernelApproximation::RBF.new(gamma: 0.0001, n_components: 800, random_seed: 1)
-svc = Rumale::LinearModel::SVC.new(reg_param: 0.0001, random_seed: 1)
-pipeline = Rumale::Pipeline::Pipeline.new(steps: { trns: rbf, clsf: svc })
+# Construct pipeline with kernel approximation and LogisticRegression.
+rbf = Rumale::KernelApproximation::RBF.new(gamma: 1e-4, n_components: 800, random_seed: 1)
+lr = Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-3)
+pipeline = Rumale::Pipeline::Pipeline.new(steps: { trns: rbf, clsf: lr })
 # Define the splitting strategy and cross validation.
 kf = Rumale::ModelSelection::StratifiedKFold.new(n_splits: 5, shuffle: true, random_seed: 1)
@@ -157,7 +156,7 @@ cv = Rumale::ModelSelection::CrossValidation.new(estimator: pipeline, splitter:
 report = cv.perform(samples, labels)
 # Output result.
-mean_accuracy = report[:test_score].inject(:+) / kf.n_splits
+mean_accuracy = report[:test_score].sum / kf.n_splits
 puts("5-CV mean accuracy: %.1f %%" % (mean_accuracy * 100.0))
 ```
@@ -228,6 +227,10 @@ When -1 is given to n_jobs parameter, all processors are used.
 estimator = Rumale::Ensemble::RandomForestClassifier.new(n_jobs: -1, random_seed: 1)
 ```
+## Novelties
+* [Rumale SHOP](https://suzuri.jp/yoshoku)
 ## Contributing
 Bug reports and pull requests are welcome on GitHub at https://github.com/yoshoku/rumale.

data/lib/rumale.rb CHANGED

@@ -77,6 +77,7 @@ require 'rumale/manifold/tsne'
 require 'rumale/manifold/mds'
 require 'rumale/metric_learning/fisher_discriminant_analysis'
 require 'rumale/metric_learning/neighbourhood_component_analysis'
+require 'rumale/metric_learning/mlkr'
 require 'rumale/neural_network/adam'
 require 'rumale/neural_network/base_mlp'
 require 'rumale/neural_network/mlp_regressor'
@@ -103,6 +104,7 @@ require 'rumale/model_selection/stratified_k_fold'
 require 'rumale/model_selection/shuffle_split'
 require 'rumale/model_selection/group_shuffle_split'
 require 'rumale/model_selection/stratified_shuffle_split'
+require 'rumale/model_selection/time_series_split'
 require 'rumale/model_selection/cross_validation'
 require 'rumale/model_selection/grid_search_cv'
 require 'rumale/model_selection/function'

data/lib/rumale/clustering/snn.rb CHANGED

@@ -51,7 +51,7 @@ module Rumale
       # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be used for cluster analysis.
       #   If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
       # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
-      def fit_predict(x)
+      def fit_predict(x) # rubocop:disable Lint/UselessMethodDefinition
         super
       end

data/lib/rumale/evaluation_measure/roc_auc.rb CHANGED

@@ -75,9 +75,12 @@ module Rumale
         false_pos, true_pos, thresholds = binary_roc_curve(y_true, y_score, pos_label)
         if true_pos.size.zero? || false_pos[0] != 0 || true_pos[0] != 0
+          # NOTE: Numo::NArray#insert is not a destructive method.
+          # rubocop:disable Style/RedundantSelfAssignment
           true_pos = true_pos.insert(0, 0)
           false_pos = false_pos.insert(0, 0)
           thresholds = thresholds.insert(0, thresholds[0] + 1)
+          # rubocop:enable Style/RedundantSelfAssignment
         end
         tpr = true_pos / true_pos[-1].to_f

data/lib/rumale/kernel_machine/kernel_svc.rb CHANGED

@@ -11,9 +11,10 @@ module Rumale
     # with stochastic gradient descent (SGD) optimization.
     # For multiclass classification problem, it uses one-vs-the-rest strategy.
     #
-    # Rumale::SVM provides kernel support vector classifier based on LIBSVM.
-    # If you prefer execution speed, you should use Rumale::SVM::SVC.
-    # https://github.com/yoshoku/rumale-svm
+    # @note
+    #   Rumale::SVM provides kernel support vector classifier based on LIBSVM.
+    #   If you prefer execution speed, you should use Rumale::SVM::SVC.
+    #   https://github.com/yoshoku/rumale-svm
     #
     # @example
     #   training_kernel_matrix = Rumale::PairwiseMetric::rbf_kernel(training_samples)

data/lib/rumale/linear_model/base_sgd.rb CHANGED

@@ -171,7 +171,7 @@ module Rumale
         @params[:fit_bias] = true
         @params[:reg_param] = 0.0
         @params[:l1_ratio] = 0.0
-        @params[:max_iter] = 200
+        @params[:max_iter] = 1000
         @params[:batch_size] = 50
         @params[:tol] = 0.0001
         @params[:verbose] = false

data/lib/rumale/linear_model/elastic_net.rb CHANGED

@@ -10,7 +10,7 @@ module Rumale
     #
     # @example
     #   estimator =
-    #     Rumale::LinearModel::ElasticNet.new(reg_param: 0.1, l1_ratio: 0.5, max_iter: 200, batch_size: 50, random_seed: 1)
+    #     Rumale::LinearModel::ElasticNet.new(reg_param: 0.1, l1_ratio: 0.5, max_iter: 1000, batch_size: 50, random_seed: 1)
     #   estimator.fit(training_samples, traininig_values)
     #   results = estimator.predict(testing_samples)
     #
@@ -59,7 +59,7 @@ module Rumale
       # @param random_seed [Integer] The seed value using to initialize the random generator.
       def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
                      reg_param: 1.0, l1_ratio: 0.5, fit_bias: true, bias_scale: 1.0,
-                     max_iter: 200, batch_size: 50, tol: 1e-4,
+                     max_iter: 1000, batch_size: 50, tol: 1e-4,
                      n_jobs: nil, verbose: false, random_seed: nil)
         check_params_numeric(learning_rate: learning_rate, momentum: momentum,
                              reg_param: reg_param, l1_ratio: l1_ratio, bias_scale: bias_scale,

data/lib/rumale/linear_model/lasso.rb CHANGED

@@ -10,7 +10,7 @@ module Rumale
     #
     # @example
     #   estimator =
-    #     Rumale::LinearModel::Lasso.new(reg_param: 0.1, max_iter: 500, batch_size: 20, random_seed: 1)
+    #     Rumale::LinearModel::Lasso.new(reg_param: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
     #   estimator.fit(training_samples, traininig_values)
     #   results = estimator.predict(testing_samples)
     #
@@ -55,7 +55,7 @@ module Rumale
       # @param random_seed [Integer] The seed value using to initialize the random generator.
       def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
                      reg_param: 1.0, fit_bias: true, bias_scale: 1.0,
-                     max_iter: 200, batch_size: 50, tol: 1e-4,
+                     max_iter: 1000, batch_size: 50, tol: 1e-4,
                      n_jobs: nil, verbose: false, random_seed: nil)
         check_params_numeric(learning_rate: learning_rate, momentum: momentum,
                              reg_param: reg_param, bias_scale: bias_scale,

data/lib/rumale/linear_model/linear_regression.rb CHANGED

@@ -10,7 +10,7 @@ module Rumale
     #
     # @example
     #   estimator =
-    #     Rumale::LinearModel::LinearRegression.new(max_iter: 500, batch_size: 20, random_seed: 1)
+    #     Rumale::LinearModel::LinearRegression.new(max_iter: 1000, batch_size: 20, random_seed: 1)
     #   estimator.fit(training_samples, traininig_values)
     #   results = estimator.predict(testing_samples)
     #
@@ -68,7 +68,7 @@ module Rumale
       #   If solver = 'svd', this parameter is ignored.
       # @param random_seed [Integer] The seed value using to initialize the random generator.
       def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
-                     fit_bias: true, bias_scale: 1.0, max_iter: 200, batch_size: 50, tol: 1e-4,
+                     fit_bias: true, bias_scale: 1.0, max_iter: 1000, batch_size: 50, tol: 1e-4,
                      solver: 'auto',
                      n_jobs: nil, verbose: false, random_seed: nil)
         check_params_numeric(learning_rate: learning_rate, momentum: momentum,

data/lib/rumale/linear_model/logistic_regression.rb CHANGED

@@ -1,21 +1,24 @@
 # frozen_string_literal: true
-require 'rumale/linear_model/base_sgd'
+require 'lbfgsb'
 require 'rumale/base/classifier'
+require 'rumale/linear_model/base_sgd'
+require 'rumale/preprocessing/label_binarizer'
 module Rumale
   module LinearModel
-    # LogisticRegression is a class that implements Logistic Regression
-    # with stochastic gradient descent optimization.
-    # For multiclass classification problem, it uses one-vs-the-rest strategy.
+    # LogisticRegression is a class that implements Logistic Regression.
+    # In multiclass classification problem, it uses one-vs-the-rest strategy for the sgd solver
+    # and multinomial logistic regression for the lbfgs solver.
     #
-    # Rumale::SVM provides Logistic Regression based on LIBLINEAR.
-    # If you prefer execution speed, you should use Rumale::SVM::LogisticRegression.
-    # https://github.com/yoshoku/rumale-svm
+    # @note
+    #   Rumale::SVM provides Logistic Regression based on LIBLINEAR.
+    #   If you prefer execution speed, you should use Rumale::SVM::LogisticRegression.
+    #   https://github.com/yoshoku/rumale-svm
     #
     # @example
     #   estimator =
-    #     Rumale::LinearModel::LogisticRegression.new(reg_param: 1.0, max_iter: 200, batch_size: 50, random_seed: 1)
+    #     Rumale::LinearModel::LogisticRegression.new(reg_param: 1.0, random_seed: 1)
     #   estimator.fit(training_samples, traininig_labels)
     #   results = estimator.predict(testing_samples)
     #
@@ -42,19 +45,24 @@ module Rumale
       # @return [Random]
       attr_reader :rng
-      # Create a new classifier with Logisitc Regression by the SGD optimization.
+      # Create a new classifier with Logisitc Regression.
       #
       # @param learning_rate [Float] The initial value of learning rate.
       #   The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
+      #   If solver = 'lbfgs', this parameter is ignored.
       # @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
       #   If nil is given, the decay sets to 'reg_param * learning_rate'.
+      #   If solver = 'lbfgs', this parameter is ignored.
       # @param momentum [Float] The momentum factor.
+      #   If solver = 'lbfgs', this parameter is ignored.
       # @param penalty [String] The regularization type to be used ('l1', 'l2', and 'elasticnet').
+      #   If solver = 'lbfgs', only 'l2' can be selected for this parameter.
       # @param l1_ratio [Float] The elastic-net type regularization mixing parameter.
       #   If penalty set to 'l2' or 'l1', this parameter is ignored.
       #   If l1_ratio = 1, the regularization is similar to Lasso.
       #   If l1_ratio = 0, the regularization is similar to Ridge.
       #   If 0 < l1_ratio < 1, the regularization is a combination of L1 and L2.
+      #   If solver = 'lbfgs', this parameter is ignored.
       # @param reg_param [Float] The regularization parameter.
       # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
       # @param bias_scale [Float] The scale of the bias term.
@@ -62,28 +70,38 @@ module Rumale
       # @param max_iter [Integer] The maximum number of epochs that indicates
       #   how many times the whole data is given to the training process.
       # @param batch_size [Integer] The size of the mini batches.
+      #   If solver = 'lbfgs', this parameter is ignored.
       # @param tol [Float] The tolerance of loss for terminating optimization.
+      #   If solver = 'lbfgs', this value is given as tol / Lbfgsb::DBL_EPSILON to the factr argument of Lbfgsb.minimize method.
+      # @param solver [String] The algorithm for optimization. ('lbfgs' or 'sgd').
+      #   'lbfgs' uses the L-BFGS with lbfgs.rb gem.
+      #   'sgd' uses the stochastic gradient descent optimization.
       # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
       #   If nil is given, the methods do not execute in parallel.
       #   If zero or less is given, it becomes equal to the number of processors.
-      #   This parameter is ignored if the Parallel gem is not loaded.
+      #   This parameter is ignored if the Parallel gem is not loaded or the solver is 'lbfgs'.
       # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
+      #   If solver = 'lbfgs' and true is given, 'iterate.dat' file is generated by lbfgsb.rb.
       # @param random_seed [Integer] The seed value using to initialize the random generator.
       def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
                      penalty: 'l2', reg_param: 1.0, l1_ratio: 0.5,
                      fit_bias: true, bias_scale: 1.0,
-                     max_iter: 200, batch_size: 50, tol: 1e-4,
+                     max_iter: 1000, batch_size: 50, tol: 1e-4,
+                     solver: 'lbfgs',
                      n_jobs: nil, verbose: false, random_seed: nil)
         check_params_numeric(learning_rate: learning_rate, momentum: momentum,
                              reg_param: reg_param, l1_ratio: l1_ratio, bias_scale: bias_scale,
                              max_iter: max_iter, batch_size: batch_size, tol: tol)
         check_params_boolean(fit_bias: fit_bias, verbose: verbose)
-        check_params_string(penalty: penalty)
+        check_params_string(solver: solver, penalty: penalty)
         check_params_numeric_or_nil(decay: decay, n_jobs: n_jobs, random_seed: random_seed)
         check_params_positive(learning_rate: learning_rate, reg_param: reg_param,
                               bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
+        raise ArgumentError, "The 'lbfgs' solver supports only 'l2' penalties." if solver == 'lbfgs' && penalty != 'l2'
         super()
         @params.merge!(method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h)
+        @params[:solver] = solver == 'sgd' ? 'sgd' : 'lbfgs'
         @params[:decay] ||= @params[:reg_param] * @params[:learning_rate]
         @params[:random_seed] ||= srand
         @rng = Random.new(@params[:random_seed])
@@ -105,30 +123,10 @@ module Rumale
         check_sample_label_size(x, y)
         @classes = Numo::Int32[*y.to_a.uniq.sort]
-        if multiclass_problem?
-          n_classes = @classes.size
-          n_features = x.shape[1]
-          @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
-          @bias_term = Numo::DFloat.zeros(n_classes)
-          if enable_parallel?
-            # :nocov:
-            models = parallel_map(n_classes) do |n|
-              bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
-              partial_fit(x, bin_y)
-            end
-            # :nocov:
-            n_classes.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
-          else
-            n_classes.times do |n|
-              bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
-              @weight_vec[n, true], @bias_term[n] = partial_fit(x, bin_y)
-            end
-          end
+        if @params[:solver] == 'sgd'
+          fit_sgd(x, y)
         else
-          negative_label = @classes[0]
-          bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
-          @weight_vec, @bias_term = partial_fit(x, bin_y)
+          fit_lbfgs(x, y)
         end
         self
@@ -182,6 +180,96 @@ module Rumale
       def multiclass_problem?
         @classes.size > 2
       end
+      def fit_lbfgs(base_x, base_y)
+        if multiclass_problem?
+          fnc = proc do |w, x, y, a|
+            n_features = x.shape[1]
+            n_classes = y.shape[1]
+            z = x.dot(w.reshape(n_classes, n_features).transpose)
+            # logsumexp and softmax
+            z_max = z.max(-1).expand_dims(-1).dup
+            z_max[~z_max.isfinite] = 0.0
+            lgsexp = Numo::NMath.log(Numo::NMath.exp(z - z_max).sum(-1)).expand_dims(-1) + z_max
+            t = z - lgsexp
+            sftmax = Numo::NMath.exp(t)
+            # loss and gradient
+            loss = -(y * t).sum + 0.5 * a * w.dot(w)
+            grad = (sftmax - y).transpose.dot(x).flatten.dup + a * w
+            [loss, grad]
+          end
+          base_x = expand_feature(base_x) if fit_bias?
+          encoder = Rumale::Preprocessing::LabelBinarizer.new
+          onehot_y = encoder.fit_transform(base_y)
+          n_classes = @classes.size
+          n_features = base_x.shape[1]
+          w_init = Numo::DFloat.zeros(n_classes * n_features)
+          verbose = @params[:verbose] ? 1 : -1
+          res = Lbfgsb.minimize(
+            fnc: fnc, jcb: true, x_init: w_init, args: [base_x, onehot_y, @params[:reg_param]],
+            maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: verbose
+          )
+          if fit_bias?
+            weight = res[:x].reshape(n_classes, n_features)
+            @weight_vec = weight[true, 0...-1].dup
+            @bias_term = weight[true, -1].dup
+          else
+            @weight_vec = res[:x].reshape(n_classes, n_features)
+            @bias_term = Numo::DFloat.zeros(n_classes)
+          end
+        else
+          fnc = proc do |w, x, y, a|
+            z = 1 + Numo::NMath.exp(-y * x.dot(w))
+            loss = Numo::NMath.log(z).sum + 0.5 * a * w.dot(w)
+            grad = (y / z - y).dot(x) + a * w
+            [loss, grad]
+          end
+          base_x = expand_feature(base_x) if fit_bias?
+          negative_label = @classes[0]
+          bin_y = Numo::Int32.cast(base_y.ne(negative_label)) * 2 - 1
+          n_features = base_x.shape[1]
+          w_init = Numo::DFloat.zeros(n_features)
+          verbose = @params[:verbose] ? 1 : -1
+          res = Lbfgsb.minimize(
+            fnc: fnc, jcb: true, x_init: w_init, args: [base_x, bin_y, @params[:reg_param]],
+            maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: verbose
+          )
+          @weight_vec, @bias_term = split_weight(res[:x])
+        end
+      end
+      def fit_sgd(x, y)
+        if multiclass_problem?
+          n_classes = @classes.size
+          n_features = x.shape[1]
+          @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
+          @bias_term = Numo::DFloat.zeros(n_classes)
+          if enable_parallel?
+            # :nocov:
+            models = parallel_map(n_classes) do |n|
+              bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
+              partial_fit(x, bin_y)
+            end
+            # :nocov:
+            n_classes.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
+          else
+            n_classes.times do |n|
+              bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
+              @weight_vec[n, true], @bias_term[n] = partial_fit(x, bin_y)
+            end
+          end
+        else
+          negative_label = @classes[0]
+          bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
+          @weight_vec, @bias_term = partial_fit(x, bin_y)
+        end
+      end
     end
   end
 end

data/lib/rumale/linear_model/ridge.rb CHANGED

@@ -10,7 +10,7 @@ module Rumale
     #
     # @example
     #   estimator =
-    #     Rumale::LinearModel::Ridge.new(reg_param: 0.1, max_iter: 500, batch_size: 20, random_seed: 1)
+    #     Rumale::LinearModel::Ridge.new(reg_param: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
     #   estimator.fit(training_samples, traininig_values)
     #   results = estimator.predict(testing_samples)
     #
@@ -70,7 +70,7 @@ module Rumale
       # @param random_seed [Integer] The seed value using to initialize the random generator.
       def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
                      reg_param: 1.0, fit_bias: true, bias_scale: 1.0,
-                     max_iter: 200, batch_size: 50, tol: 1e-4,
+                     max_iter: 1000, batch_size: 50, tol: 1e-4,
                      solver: 'auto',
                      n_jobs: nil, verbose: false, random_seed: nil)
         check_params_numeric(learning_rate: learning_rate, momentum: momentum,

data/lib/rumale/linear_model/svc.rb CHANGED

@@ -11,13 +11,14 @@ module Rumale
     # with stochastic gradient descent optimization.
     # For multiclass classification problem, it uses one-vs-the-rest strategy.
     #
-    # Rumale::SVM provides linear support vector classifier based on LIBLINEAR.
-    # If you prefer execution speed, you should use Rumale::SVM::LinearSVC.
-    # https://github.com/yoshoku/rumale-svm
+    # @note
+    #   Rumale::SVM provides linear support vector classifier based on LIBLINEAR.
+    #   If you prefer execution speed, you should use Rumale::SVM::LinearSVC.
+    #   https://github.com/yoshoku/rumale-svm
     #
     # @example
     #   estimator =
-    #     Rumale::LinearModel::SVC.new(reg_param: 1.0, max_iter: 200, batch_size: 50, random_seed: 1)
+    #     Rumale::LinearModel::SVC.new(reg_param: 1.0, max_iter: 1000, batch_size: 50, random_seed: 1)
     #   estimator.fit(training_samples, traininig_labels)
     #   results = estimator.predict(testing_samples)
     #
@@ -74,7 +75,7 @@ module Rumale
       def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
                      penalty: 'l2', reg_param: 1.0, l1_ratio: 0.5,
                      fit_bias: true, bias_scale: 1.0,
-                     max_iter: 200, batch_size: 50, tol: 1e-4,
+                     max_iter: 1000, batch_size: 50, tol: 1e-4,
                      probability: false,
                      n_jobs: nil, verbose: false, random_seed: nil)
         check_params_numeric(learning_rate: learning_rate, momentum: momentum,

data/lib/rumale/linear_model/svr.rb CHANGED

@@ -8,13 +8,14 @@ module Rumale
     # SVR is a class that implements Support Vector Regressor
     # with stochastic gradient descent optimization.
     #
-    # Rumale::SVM provides linear and kernel support vector regressor based on LIBLINEAR and LIBSVM.
-    # If you prefer execution speed, you should use Rumale::SVM::LinearSVR.
-    # https://github.com/yoshoku/rumale-svm
+    # @note
+    #   Rumale::SVM provides linear and kernel support vector regressor based on LIBLINEAR and LIBSVM.
+    #   If you prefer execution speed, you should use Rumale::SVM::LinearSVR.
+    #   https://github.com/yoshoku/rumale-svm
     #
     # @example
     #   estimator =
-    #     Rumale::LinearModel::SVR.new(reg_param: 1.0, epsilon: 0.1, max_iter: 200, batch_size: 50, random_seed: 1)
+    #     Rumale::LinearModel::SVR.new(reg_param: 1.0, epsilon: 0.1, max_iter: 1000, batch_size: 50, random_seed: 1)
     #   estimator.fit(training_samples, traininig_target_values)
     #   results = estimator.predict(testing_samples)
     #
@@ -68,7 +69,7 @@ module Rumale
                      penalty: 'l2', reg_param: 1.0, l1_ratio: 0.5,
                      fit_bias: true, bias_scale: 1.0,
                      epsilon: 0.1,
-                     max_iter: 200, batch_size: 50, tol: 1e-4,
+                     max_iter: 1000, batch_size: 50, tol: 1e-4,
                      n_jobs: nil, verbose: false, random_seed: nil)
         check_params_numeric(learning_rate: learning_rate, momentum: momentum,
                              reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon,

data/lib/rumale/metric_learning/mlkr.rb ADDED

@@ -0,0 +1,161 @@
+# frozen_string_literal: true
+require 'rumale/base/base_estimator'
+require 'rumale/base/transformer'
+require 'rumale/decomposition/pca'
+require 'rumale/pairwise_metric'
+require 'rumale/utils'
+require 'lbfgsb'
+module Rumale
+  module MetricLearning
+    # MLKR is a class that implements Metric Learning for Kernel Regression.
+    #
+    # @example
+    #   transformer = Rumale::MetricLearning::MLKR.new
+    #   transformer.fit(training_samples, traininig_target_values)
+    #   low_samples = transformer.transform(testing_samples)
+    #
+    # *Reference*
+    # - Weinberger, K. Q. and Tesauro, G., "Metric Learning for Kernel Regression," Proc. AISTATS'07, pp. 612--629, 2007.
+    class MLKR
+      include Base::BaseEstimator
+      include Base::Transformer
+      # Returns the metric components.
+      # @return [Numo::DFloat] (shape: [n_components, n_features])
+      attr_reader :components
+      # Return the number of iterations run for optimization
+      # @return [Integer]
+      attr_reader :n_iter
+      # Return the random generator.
+      # @return [Random]
+      attr_reader :rng
+      # Create a new transformer with MLKR.
+      #
+      # @param n_components [Integer] The number of components.
+      # @param init [String] The initialization method for components ('random' or 'pca').
+      # @param max_iter [Integer] The maximum number of iterations.
+      # @param tol [Float] The tolerance of termination criterion.
+      #   This value is given as tol / Lbfgsb::DBL_EPSILON to the factr argument of Lbfgsb.minimize method.
+      # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
+      #   If true is given, 'iterate.dat' file is generated by lbfgsb.rb.
+      # @param random_seed [Integer] The seed value using to initialize the random generator.
+      def initialize(n_components: nil, init: 'random', max_iter: 100, tol: 1e-6, verbose: false, random_seed: nil)
+        check_params_numeric_or_nil(n_components: n_components, random_seed: random_seed)
+        check_params_numeric(max_iter: max_iter, tol: tol)
+        check_params_string(init: init)
+        check_params_boolean(verbose: verbose)
+        @params = {}
+        @params[:n_components] = n_components
+        @params[:init] = init
+        @params[:max_iter] = max_iter
+        @params[:tol] = tol
+        @params[:verbose] = verbose
+        @params[:random_seed] = random_seed
+        @params[:random_seed] ||= srand
+        @components = nil
+        @n_iter = nil
+        @rng = Random.new(@params[:random_seed])
+      end
+      # Fit the model with given training data.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @param y [Numo::DFloat] (shape: [n_samples]) The target values to be used for fitting the model.
+      # @return [MLKR] The learned classifier itself.
+      def fit(x, y)
+        x = check_convert_sample_array(x)
+        y = check_convert_tvalue_array(y)
+        check_sample_tvalue_size(x, y)
+        n_features = x.shape[1]
+        n_components = if @params[:n_components].nil?
+                         n_features
+                       else
+                         [n_features, @params[:n_components]].min
+                       end
+        @components, @n_iter = optimize_components(x, y, n_features, n_components)
+        @prototypes = x.dot(@components.transpose)
+        @values = y
+        self
+      end
+      # Fit the model with training data, and then transform them with the learned model.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @param y [Numo::DFloat] (shape: [n_samples]) The target values to be used for fitting the model.
+      # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
+      def fit_transform(x, y)
+        x = check_convert_sample_array(x)
+        y = check_convert_tvalue_array(y)
+        check_sample_tvalue_size(x, y)
+        fit(x, y).transform(x)
+      end
+      # Transform the given data with the learned model.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
+      # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
+      def transform(x)
+        x = check_convert_sample_array(x)
+        x.dot(@components.transpose)
+      end
+      private
+      def init_components(x, n_features, n_components)
+        if @params[:init] == 'pca'
+          pca = Rumale::Decomposition::PCA.new(n_components: n_components)
+          pca.fit(x).components.flatten.dup
+        else
+          Rumale::Utils.rand_normal([n_features, n_components], @rng.dup).flatten.dup
+        end
+      end
+      def optimize_components(x, y, n_features, n_components)
+        # initialize components.
+        comp_init = init_components(x, n_features, n_components)
+        # initialize optimization results.
+        res = {}
+        res[:x] = comp_init
+        res[:n_iter] = 0
+        # perform optimization.
+        verbose = @params[:verbose] ? 1 : -1
+        res = Lbfgsb.minimize(
+          fnc: method(:mlkr_fnc), jcb: true, x_init: comp_init, args: [x, y],
+          maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: verbose
+        )
+        # return the results.
+        n_iter = res[:n_iter]
+        comps = n_components == 1 ? res[:x].dup : res[:x].reshape(n_components, n_features)
+        [comps, n_iter]
+      end
+      def mlkr_fnc(w, x, y)
+        # initialize some variables.
+        n_features = x.shape[1]
+        n_components = w.size / n_features
+        # projection.
+        w = w.reshape(n_components, n_features)
+        z = x.dot(w.transpose)
+        # predict values.
+        kernel_mat = Numo::NMath.exp(-Rumale::PairwiseMetric.squared_error(z))
+        kernel_mat[kernel_mat.diag_indices] = 0.0
+        norm = kernel_mat.sum(1)
+        norm[norm.eq(0)] = 1
+        y_pred = kernel_mat.dot(y) / norm
+        # calculate loss.
+        y_diff = y_pred - y
+        loss = (y_diff**2).sum
+        # calculate gradient.
+        weight_mat = y_diff * y_diff.expand_dims(1) * kernel_mat
+        weight_mat = weight_mat.sum(0).diag - weight_mat
+        gradient = 8 * z.transpose.dot(weight_mat).dot(x)
+        [loss, gradient.flatten.dup]
+      end
+    end
+  end
+end

data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb CHANGED

@@ -2,13 +2,15 @@
 require 'rumale/base/base_estimator'
 require 'rumale/base/transformer'
+require 'rumale/utils'
+require 'rumale/pairwise_metric'
+require 'lbfgsb'
 module Rumale
   module MetricLearning
     # NeighbourhoodComponentAnalysis is a class that implements Neighbourhood Component Analysis.
     #
     # @example
-    #   require 'mopti'
     #   require 'rumale'
     #
     #   transformer = Rumale::MetricLearning::NeighbourhoodComponentAnalysis.new
@@ -39,7 +41,9 @@ module Rumale
       # @param init [String] The initialization method for components ('random' or 'pca').
       # @param max_iter [Integer] The maximum number of iterations.
       # @param tol [Float] The tolerance of termination criterion.
+      #   This value is given as tol / Lbfgsb::DBL_EPSILON to the factr argument of Lbfgsb.minimize method.
       # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
+      #   If true is given, 'iterate.dat' file is generated by lbfgsb.rb.
       # @param random_seed [Integer] The seed value using to initialize the random generator.
       def initialize(n_components: nil, init: 'random', max_iter: 100, tol: 1e-6, verbose: false, random_seed: nil)
         check_params_numeric_or_nil(n_components: n_components, random_seed: random_seed)
@@ -65,8 +69,6 @@ module Rumale
       # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
       # @return [NeighbourhoodComponentAnalysis] The learned classifier itself.
       def fit(x, y)
-        raise 'NeighbourhoodComponentAnalysis#fit requires Mopti but that is not loaded.' unless enable_mopti?
         x = check_convert_sample_array(x)
         y = check_convert_label_array(y)
         check_sample_label_size(x, y)
@@ -102,17 +104,9 @@ module Rumale
       private
-      def enable_mopti?
-        if defined?(Mopti).nil?
-          warn('NeighbourhoodComponentAnalysis#fit requires Mopti but that is not loaded. You should intall and load mopti gem in advance.')
-          return false
-        end
-        true
-      end
       def init_components(x, n_features, n_components)
         if @params[:init] == 'pca'
-          pca = Rumale::Decomposition::PCA.new(n_components: n_components, solver: 'evd')
+          pca = Rumale::Decomposition::PCA.new(n_components: n_components)
           pca.fit(x).components.flatten.dup
         else
           Rumale::Utils.rand_normal([n_features, n_components], @rng.dup).flatten.dup
@@ -127,28 +121,18 @@ module Rumale
         res[:x] = comp_init
         res[:n_iter] = 0
         # perform optimization.
-        optimizer = Mopti::ScaledConjugateGradient.new(
-          fnc: method(:nca_loss), jcb: method(:nca_dloss),
-          x_init: comp_init, args: [x, y],
-          max_iter: @params[:max_iter], ftol: @params[:tol]
+        verbose = @params[:verbose] ? 1 : -1
+        res = Lbfgsb.minimize(
+          fnc: method(:nca_fnc), jcb: true, x_init: comp_init, args: [x, y],
+          maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: verbose
         )
-        fold = 0.0
-        dold = 0.0
-        optimizer.each do |prm|
-          res = prm
-          puts "[NeighbourhoodComponentAnalysis] The value of objective function after #{res[:n_iter]} epochs: #{x.shape[0] - res[:fnc]}" if @params[:verbose]
-          break if (fold - res[:fnc]).abs <= @params[:tol] && (dold - res[:jcb]).abs <= @params[:tol]
-          fold = res[:fnc]
-          dold = res[:jcb]
-        end
         # return the results.
         n_iter = res[:n_iter]
         comps = n_components == 1 ? res[:x].dup : res[:x].reshape(n_components, n_features)
         [comps, n_iter]
       end
-      def nca_loss(w, x, y)
+      def nca_fnc(w, x, y)
         # initialize some variables.
         n_samples, n_features = x.shape
         n_components = w.size / n_features
@@ -157,32 +141,19 @@ module Rumale
         z = x.dot(w.transpose)
         # calculate probability matrix.
         prob_mat = probability_matrix(z)
-        # calculate loss.
+        # calculate loss and gradient.
         # NOTE:
         # NCA attempts to maximize its objective function.
         # For the minization algorithm, the objective function value is subtracted from the maixmum value (n_samples).
         mask_mat = y.expand_dims(1).eq(y)
         masked_prob_mat = prob_mat * mask_mat
-        n_samples - masked_prob_mat.sum
-      end
-      def nca_dloss(w, x, y)
-        # initialize some variables.
-        n_features = x.shape[1]
-        n_components = w.size / n_features
-        # projection.
-        w = w.reshape(n_components, n_features)
-        z = x.dot(w.transpose)
-        # calculate probability matrix.
-        prob_mat = probability_matrix(z)
-        # calculate gradient.
-        mask_mat = y.expand_dims(1).eq(y)
-        masked_prob_mat = prob_mat * mask_mat
-        weighted_prob_mat = masked_prob_mat - prob_mat * masked_prob_mat.sum(1).expand_dims(1)
-        weighted_prob_mat += weighted_prob_mat.transpose
-        weighted_prob_mat[weighted_prob_mat.diag_indices] = -weighted_prob_mat.sum(0)
-        gradient = 2 * z.transpose.dot(weighted_prob_mat).dot(x)
-        -gradient.flatten.dup
+        loss = n_samples - masked_prob_mat.sum
+        sum_probs = masked_prob_mat.sum(1)
+        weight_mat = (sum_probs.expand_dims(1) * prob_mat - masked_prob_mat)
+        weight_mat += weight_mat.transpose
+        weight_mat = weight_mat.sum(0).diag - weight_mat
+        gradient = -2 * z.transpose.dot(weight_mat).dot(x)
+        [loss, gradient.flatten.dup]
       end
       def probability_matrix(z)

data/lib/rumale/model_selection/time_series_split.rb ADDED

@@ -0,0 +1,91 @@
+# frozen_string_literal: true
+require 'rumale/base/splitter'
+module Rumale
+  module ModelSelection
+    # TimeSeriesSplit is a class that generates the set of data indices for time series cross-validation.
+    # It is assumed that the dataset given are already ordered by time information.
+    #
+    # @example
+    #   cv = Rumale::ModelSelection::TimeSeriesSplit.new(n_splits: 5)
+    #   x = Numo::DFloat.new(6, 2).rand
+    #   cv.split(x, nil).each do |train_ids, test_ids|
+    #     puts '---'
+    #     pp train_ids
+    #     pp test_ids
+    #   end
+    #
+    #   # ---
+    #   # [0]
+    #   # [1]
+    #   # ---
+    #   # [0, 1]
+    #   # [2]
+    #   # ---
+    #   # [0, 1, 2]
+    #   # [3]
+    #   # ---
+    #   # [0, 1, 2, 3]
+    #   # [4]
+    #   # ---
+    #   # [0, 1, 2, 3, 4]
+    #   # [5]
+    #
+    class TimeSeriesSplit
+      include Base::Splitter
+      # Return the number of splits.
+      # @return [Integer]
+      attr_reader :n_splits
+      # Return the maximum number of training samples in a split.
+      # @return [Integer/Nil]
+      attr_reader :max_train_size
+      # Create a new data splitter for time series cross-validation.
+      #
+      # @param n_splits [Integer] The number of splits.
+      # @param max_train_size [Integer/Nil] The maximum number of training samples in a split.
+      def initialize(n_splits: 5, max_train_size: nil)
+        check_params_numeric(n_splits: n_splits)
+        check_params_numeric_or_nil(max_train_size: max_train_size)
+        @n_splits = n_splits
+        @max_train_size = max_train_size
+      end
+      # Generate data indices for time series cross-validation.
+      #
+      # @overload split(x, y) -> Array
+      #   @param x [Numo::DFloat] (shape: [n_samples, n_features])
+      #     The dataset to be used to generate data indices for time series cross-validation.
+      #     It is expected that the data will be ordered by time information.
+      #   @param y [Numo::Int32] (shape: [n_samples])
+      #     This argument exists to unify the interface between the K-fold methods, it is not used in the method.
+      # @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
+      def split(x, _y)
+        x = check_convert_sample_array(x)
+        n_samples = x.shape[0]
+        unless (@n_splits + 1).between?(2, n_samples)
+          raise ArgumentError,
+                'The number of folds (n_splits + 1) must be not less than 2 and not more than the number of samples.'
+        end
+        test_size = n_samples / (@n_splits + 1)
+        offset = test_size + n_samples % (@n_splits + 1)
+        Array.new(@n_splits) do |n|
+          start = offset * (n + 1)
+          train_ids = if !@max_train_size.nil? && @max_train_size < test_size
+                        Array((start - @max_train_size)...start)
+                      else
+                        Array(0...start)
+                      end
+          test_ids = Array(start...(start + test_size))
+          [train_ids, test_ids]
+        end
+      end
+    end
+  end
+end

data/lib/rumale/pipeline/pipeline.rb CHANGED

@@ -140,7 +140,7 @@ module Rumale
       def validate_steps(steps)
         steps.keys[0...-1].each do |name|
           transformer = steps[name]
-          next if transformer.nil? || %i[fit transform].all? { |m| transformer.class.method_defined?(m) }
+          next if transformer.nil? || (transformer.class.method_defined?(:fit) && transformer.class.method_defined?(:transform))
           raise TypeError,
                 'Class of intermediate step in pipeline should be implemented fit and transform methods: ' \

data/lib/rumale/tree/base_decision_tree.rb CHANGED

@@ -75,17 +75,10 @@ module Rumale
         node = Node.new(depth: depth, impurity: impurity, n_samples: n_samples)
         # terminate growing.
-        unless @params[:max_leaf_nodes].nil?
-          return nil if @n_leaves >= @params[:max_leaf_nodes]
-        end
+        return nil if !@params[:max_leaf_nodes].nil? && @n_leaves >= @params[:max_leaf_nodes]
         return nil if n_samples < @params[:min_samples_leaf]
         return put_leaf(node, y) if n_samples == @params[:min_samples_leaf]
-        unless @params[:max_depth].nil?
-          return put_leaf(node, y) if depth == @params[:max_depth]
-        end
+        return put_leaf(node, y) if !@params[:max_depth].nil? && depth == @params[:max_depth]
         return put_leaf(node, y) if stop_growing?(y)
         # calculate optimal parameters.

data/lib/rumale/tree/gradient_tree_regressor.rb CHANGED

@@ -138,7 +138,7 @@ module Rumale
         nil
       end
-      def grow_node(depth, x, y, g, h)
+      def grow_node(depth, x, y, g, h) # rubocop:disable Metrics/AbcSize
         # intialize some variables.
         sum_g = g.sum
         sum_h = h.sum
@@ -146,17 +146,10 @@ module Rumale
         node = Node.new(depth: depth, n_samples: n_samples)
         # terminate growing.
-        unless @params[:max_leaf_nodes].nil?
-          return nil if @n_leaves >= @params[:max_leaf_nodes]
-        end
+        return nil if !@params[:max_leaf_nodes].nil? && @n_leaves >= @params[:max_leaf_nodes]
         return nil if n_samples < @params[:min_samples_leaf]
         return put_leaf(node, sum_g, sum_h) if n_samples == @params[:min_samples_leaf]
-        unless @params[:max_depth].nil?
-          return put_leaf(node, sum_g, sum_h) if depth == @params[:max_depth]
-        end
+        return put_leaf(node, sum_g, sum_h) if !@params[:max_depth].nil? && depth == @params[:max_depth]
         return put_leaf(node, sum_g, sum_h) if stop_growing?(y)
         # calculate optimal parameters.

data/lib/rumale/version.rb CHANGED

@@ -3,5 +3,5 @@
 # Rumale is a machine learning library in Ruby.
 module Rumale
   # The version of Rumale you are using.
-  VERSION = '0.20.1'
+  VERSION = '0.22.1'
 end

data/rumale.gemspec CHANGED

@@ -45,4 +45,5 @@ Gem::Specification.new do |spec|
   }
   spec.add_runtime_dependency 'numo-narray', '>= 0.9.1'
+  spec.add_runtime_dependency 'lbfgsb', '>=0.3.0'
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rumale
 version: !ruby/object:Gem::Version
-  version: 0.20.1
+  version: 0.22.1
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2020-08-23 00:00:00.000000000 Z
+date: 2020-12-05 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: numo-narray
@@ -24,6 +24,20 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: 0.9.1
+- !ruby/object:Gem::Dependency
+  name: lbfgsb
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 0.3.0
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 0.3.0
 description: |
   Rumale is a machine learning library in Ruby.
   Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
@@ -43,7 +57,7 @@ extensions:
 - ext/rumale/extconf.rb
 extra_rdoc_files: []
 files:
-- ".coveralls.yml"
+- ".github/workflows/build.yml"
 - ".gitignore"
 - ".rspec"
 - ".rubocop.yml"
@@ -131,6 +145,7 @@ files:
 - lib/rumale/manifold/mds.rb
 - lib/rumale/manifold/tsne.rb
 - lib/rumale/metric_learning/fisher_discriminant_analysis.rb
+- lib/rumale/metric_learning/mlkr.rb
 - lib/rumale/metric_learning/neighbourhood_component_analysis.rb
 - lib/rumale/model_selection/cross_validation.rb
 - lib/rumale/model_selection/function.rb
@@ -141,6 +156,7 @@ files:
 - lib/rumale/model_selection/shuffle_split.rb
 - lib/rumale/model_selection/stratified_k_fold.rb
 - lib/rumale/model_selection/stratified_shuffle_split.rb
+- lib/rumale/model_selection/time_series_split.rb
 - lib/rumale/multiclass/one_vs_rest_classifier.rb
 - lib/rumale/naive_bayes/base_naive_bayes.rb
 - lib/rumale/naive_bayes/bernoulli_nb.rb
@@ -208,7 +224,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.1.2
+rubygems_version: 3.1.4
 signing_key:
 specification_version: 4
 summary: Rumale is a machine learning library in Ruby. Rumale provides machine learning

data/.coveralls.yml DELETED

	@@ -1 +0,0 @@
1	- service_name: travis-ci