rumale-linear_model 0.24.0 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.txt +1 -1
- data/lib/rumale/linear_model/base_estimator.rb +46 -0
- data/lib/rumale/linear_model/elastic_net.rb +112 -61
- data/lib/rumale/linear_model/lasso.rb +107 -61
- data/lib/rumale/linear_model/linear_regression.rb +39 -104
- data/lib/rumale/linear_model/logistic_regression.rb +36 -119
- data/lib/rumale/linear_model/nnls.rb +14 -46
- data/lib/rumale/linear_model/ridge.rb +39 -108
- data/lib/rumale/linear_model/sgd_classifier.rb +262 -0
- data/lib/rumale/linear_model/{base_sgd.rb → sgd_estimator.rb} +36 -21
- data/lib/rumale/linear_model/sgd_regressor.rb +138 -0
- data/lib/rumale/linear_model/svc.rb +60 -68
- data/lib/rumale/linear_model/svr.rb +54 -52
- data/lib/rumale/linear_model/version.rb +1 -1
- data/lib/rumale/linear_model.rb +2 -1
- metadata +8 -5
| @@ -4,19 +4,18 @@ require 'lbfgsb' | |
| 4 4 |  | 
| 5 5 | 
             
            require 'rumale/base/regressor'
         | 
| 6 6 | 
             
            require 'rumale/validation'
         | 
| 7 | 
            -
             | 
| 7 | 
            +
             | 
| 8 | 
            +
            require_relative 'base_estimator'
         | 
| 8 9 |  | 
| 9 10 | 
             
            module Rumale
         | 
| 10 11 | 
             
              module LinearModel
         | 
| 11 12 | 
             
                # Ridge is a class that implements Ridge Regression
         | 
| 12 | 
            -
                # with  | 
| 13 | 
            -
                # singular value decomposition (SVD), or L-BFGS optimization.
         | 
| 13 | 
            +
                # with singular value decomposition (SVD) or L-BFGS optimization.
         | 
| 14 14 | 
             
                #
         | 
| 15 15 | 
             
                # @example
         | 
| 16 16 | 
             
                #   require 'rumale/linear_model/ridge'
         | 
| 17 17 | 
             
                #
         | 
| 18 | 
            -
                #   estimator =
         | 
| 19 | 
            -
                #     Rumale::LinearModel::Ridge.new(reg_param: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
         | 
| 18 | 
            +
                #   estimator = Rumale::LinearModel::Ridge.new(reg_param: 0.1)
         | 
| 20 19 | 
             
                #   estimator.fit(training_samples, traininig_values)
         | 
| 21 20 | 
             
                #   results = estimator.predict(testing_samples)
         | 
| 22 21 | 
             
                #
         | 
| @@ -27,73 +26,40 @@ module Rumale | |
| 27 26 | 
             
                #   estimator = Rumale::LinearModel::Ridge.new(reg_param: 0.1, solver: 'svd')
         | 
| 28 27 | 
             
                #   estimator.fit(training_samples, traininig_values)
         | 
| 29 28 | 
             
                #   results = estimator.predict(testing_samples)
         | 
| 30 | 
            -
                 | 
| 31 | 
            -
             | 
| 32 | 
            -
                # - Bottou, L., "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
         | 
| 33 | 
            -
                class Ridge < BaseSGD
         | 
| 34 | 
            -
                  include ::Rumale::Base::Regressor
         | 
| 35 | 
            -
             | 
| 36 | 
            -
                  # Return the weight vector.
         | 
| 37 | 
            -
                  # @return [Numo::DFloat] (shape: [n_outputs, n_features])
         | 
| 38 | 
            -
                  attr_reader :weight_vec
         | 
| 39 | 
            -
             | 
| 40 | 
            -
                  # Return the bias term (a.k.a. intercept).
         | 
| 41 | 
            -
                  # @return [Numo::DFloat] (shape: [n_outputs])
         | 
| 42 | 
            -
                  attr_reader :bias_term
         | 
| 43 | 
            -
             | 
| 44 | 
            -
                  # Return the random generator for random sampling.
         | 
| 45 | 
            -
                  # @return [Random]
         | 
| 46 | 
            -
                  attr_reader :rng
         | 
| 29 | 
            +
                class Ridge < Rumale::LinearModel::BaseEstimator
         | 
| 30 | 
            +
                  include Rumale::Base::Regressor
         | 
| 47 31 |  | 
| 48 32 | 
             
                  # Create a new Ridge regressor.
         | 
| 49 33 | 
             
                  #
         | 
| 50 | 
            -
                  # @param learning_rate [Float] The initial value of learning rate.
         | 
| 51 | 
            -
                  #   The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
         | 
| 52 | 
            -
                  #   If solver is not 'sgd', this parameter is ignored.
         | 
| 53 | 
            -
                  # @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
         | 
| 54 | 
            -
                  #   If nil is given, the decay sets to 'reg_param * learning_rate'.
         | 
| 55 | 
            -
                  #   If solver is not 'sgd', this parameter is ignored.
         | 
| 56 | 
            -
                  # @param momentum [Float] The momentum factor.
         | 
| 57 | 
            -
                  #   If solver is not 'sgd', this parameter is ignored.
         | 
| 58 34 | 
             
                  # @param reg_param [Float] The regularization parameter.
         | 
| 59 35 | 
             
                  # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
         | 
| 60 36 | 
             
                  # @param bias_scale [Float] The scale of the bias term.
         | 
| 61 37 | 
             
                  # @param max_iter [Integer] The maximum number of epochs that indicates
         | 
| 62 38 | 
             
                  #   how many times the whole data is given to the training process.
         | 
| 63 39 | 
             
                  #   If solver is 'svd', this parameter is ignored.
         | 
| 64 | 
            -
                  # @param batch_size [Integer] The size of the mini batches.
         | 
| 65 | 
            -
                  #   If solver is not 'sgd', this parameter is ignored.
         | 
| 66 40 | 
             
                  # @param tol [Float] The tolerance of loss for terminating optimization.
         | 
| 67 41 | 
             
                  #   If solver is 'svd', this parameter is ignored.
         | 
| 68 | 
            -
                  # @param solver [String] The algorithm to calculate weights. ('auto', ' | 
| 42 | 
            +
                  # @param solver [String] The algorithm to calculate weights. ('auto', 'svd', or 'lbfgs').
         | 
| 69 43 | 
             
                  #   'auto' chooses the 'svd' solver if Numo::Linalg is loaded. Otherwise, it chooses the 'lbfgs' solver.
         | 
| 70 | 
            -
                  #   'sgd' uses the stochastic gradient descent optimization.
         | 
| 71 44 | 
             
                  #   'svd' performs singular value decomposition of samples.
         | 
| 72 45 | 
             
                  #   'lbfgs' uses the L-BFGS method for optimization.
         | 
| 73 | 
            -
                  # @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
         | 
| 74 | 
            -
                  #   If nil is given, the method does not execute in parallel.
         | 
| 75 | 
            -
                  #   If zero or less is given, it becomes equal to the number of processors.
         | 
| 76 | 
            -
                  #   This parameter is ignored if the Parallel gem is not loaded or solver is not 'sgd'.
         | 
| 77 46 | 
             
                  # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
         | 
| 78 47 | 
             
                  #   If solver is 'svd', this parameter is ignored.
         | 
| 79 | 
            -
                   | 
| 80 | 
            -
                  def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
         | 
| 81 | 
            -
                                 reg_param: 1.0, fit_bias: true, bias_scale: 1.0,
         | 
| 82 | 
            -
                                 max_iter: 1000, batch_size: 50, tol: 1e-4,
         | 
| 83 | 
            -
                                 solver: 'auto',
         | 
| 84 | 
            -
                                 n_jobs: nil, verbose: false, random_seed: nil)
         | 
| 48 | 
            +
                  def initialize(reg_param: 1.0, fit_bias: true, bias_scale: 1.0, max_iter: 1000, tol: 1e-4, solver: 'auto', verbose: false)
         | 
| 85 49 | 
             
                    super()
         | 
| 86 | 
            -
                    @params | 
| 50 | 
            +
                    @params = {
         | 
| 51 | 
            +
                      reg_param: reg_param,
         | 
| 52 | 
            +
                      fit_bias: fit_bias,
         | 
| 53 | 
            +
                      bias_scale: bias_scale,
         | 
| 54 | 
            +
                      max_iter: max_iter,
         | 
| 55 | 
            +
                      tol: tol,
         | 
| 56 | 
            +
                      verbose: verbose
         | 
| 57 | 
            +
                    }
         | 
| 87 58 | 
             
                    @params[:solver] = if solver == 'auto'
         | 
| 88 59 | 
             
                                         enable_linalg?(warning: false) ? 'svd' : 'lbfgs'
         | 
| 89 60 | 
             
                                       else
         | 
| 90 | 
            -
                                         solver.match?(/^svd$|^ | 
| 61 | 
            +
                                         solver.match?(/^svd$|^lbfgs$/) ? solver : 'lbfgs'
         | 
| 91 62 | 
             
                                       end
         | 
| 92 | 
            -
                    @params[:decay] ||= @params[:reg_param] * @params[:learning_rate]
         | 
| 93 | 
            -
                    @params[:random_seed] ||= srand
         | 
| 94 | 
            -
                    @rng = Random.new(@params[:random_seed])
         | 
| 95 | 
            -
                    @penalty_type = L2_PENALTY
         | 
| 96 | 
            -
                    @loss_func = ::Rumale::LinearModel::Loss::MeanSquaredError.new
         | 
| 97 63 | 
             
                  end
         | 
| 98 64 |  | 
| 99 65 | 
             
                  # Fit the model with given training data.
         | 
| @@ -102,17 +68,15 @@ module Rumale | |
| 102 68 | 
             
                  # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
         | 
| 103 69 | 
             
                  # @return [Ridge] The learned regressor itself.
         | 
| 104 70 | 
             
                  def fit(x, y)
         | 
| 105 | 
            -
                    x =  | 
| 106 | 
            -
                    y =  | 
| 107 | 
            -
                     | 
| 108 | 
            -
             | 
| 109 | 
            -
                    if @params[:solver] == 'svd' && enable_linalg?(warning: false)
         | 
| 110 | 
            -
             | 
| 111 | 
            -
             | 
| 112 | 
            -
             | 
| 113 | 
            -
             | 
| 114 | 
            -
                      fit_sgd(x, y)
         | 
| 115 | 
            -
                    end
         | 
| 71 | 
            +
                    x = Rumale::Validation.check_convert_sample_array(x)
         | 
| 72 | 
            +
                    y = Rumale::Validation.check_convert_target_value_array(y)
         | 
| 73 | 
            +
                    Rumale::Validation.check_sample_size(x, y)
         | 
| 74 | 
            +
             | 
| 75 | 
            +
                    @weight_vec, @bias_term = if @params[:solver] == 'svd' && enable_linalg?(warning: false)
         | 
| 76 | 
            +
                                                partial_fit_svd(x, y)
         | 
| 77 | 
            +
                                              else
         | 
| 78 | 
            +
                                                partial_fit_lbfgs(x, y)
         | 
| 79 | 
            +
                                              end
         | 
| 116 80 |  | 
| 117 81 | 
             
                    self
         | 
| 118 82 | 
             
                  end
         | 
| @@ -122,25 +86,24 @@ module Rumale | |
| 122 86 | 
             
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
         | 
| 123 87 | 
             
                  # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
         | 
| 124 88 | 
             
                  def predict(x)
         | 
| 125 | 
            -
                    x =  | 
| 89 | 
            +
                    x = Rumale::Validation.check_convert_sample_array(x)
         | 
| 126 90 |  | 
| 127 91 | 
             
                    x.dot(@weight_vec.transpose) + @bias_term
         | 
| 128 92 | 
             
                  end
         | 
| 129 93 |  | 
| 130 94 | 
             
                  private
         | 
| 131 95 |  | 
| 132 | 
            -
                  def  | 
| 96 | 
            +
                  def partial_fit_svd(x, y)
         | 
| 133 97 | 
             
                    x = expand_feature(x) if fit_bias?
         | 
| 134 | 
            -
             | 
| 135 98 | 
             
                    s, u, vt = Numo::Linalg.svd(x, driver: 'sdd', job: 'S')
         | 
| 136 99 | 
             
                    d = (s / (s**2 + @params[:reg_param])).diag
         | 
| 137 100 | 
             
                    w = vt.transpose.dot(d).dot(u.transpose).dot(y)
         | 
| 138 | 
            -
             | 
| 139 | 
            -
                     | 
| 101 | 
            +
                    w = w.transpose.dup unless single_target?(y)
         | 
| 102 | 
            +
                    split_weight(w)
         | 
| 140 103 | 
             
                  end
         | 
| 141 104 |  | 
| 142 | 
            -
                  def  | 
| 143 | 
            -
                    fnc = proc do |w, x, y, a| | 
| 105 | 
            +
                  def partial_fit_lbfgs(base_x, base_y)
         | 
| 106 | 
            +
                    fnc = proc do |w, x, y, a|
         | 
| 144 107 | 
             
                      n_samples, n_features = x.shape
         | 
| 145 108 | 
             
                      w = w.reshape(y.shape[1], n_features) unless y.shape[1].nil?
         | 
| 146 109 | 
             
                      z = x.dot(w.transpose)
         | 
| @@ -150,57 +113,25 @@ module Rumale | |
| 150 113 | 
             
                      [loss, gradient.flatten.dup]
         | 
| 151 114 | 
             
                    end
         | 
| 152 115 |  | 
| 153 | 
            -
                     | 
| 116 | 
            +
                    base_x = expand_feature(base_x) if fit_bias?
         | 
| 154 117 |  | 
| 155 | 
            -
                    n_features =  | 
| 156 | 
            -
                    n_outputs = single_target?( | 
| 118 | 
            +
                    n_features = base_x.shape[1]
         | 
| 119 | 
            +
                    n_outputs = single_target?(base_y) ? 1 : base_y.shape[1]
         | 
| 120 | 
            +
                    w_init = Numo::DFloat.zeros(n_outputs * n_features)
         | 
| 157 121 |  | 
| 158 122 | 
             
                    res = Lbfgsb.minimize(
         | 
| 159 | 
            -
                      fnc: fnc, jcb: true, x_init:  | 
| 123 | 
            +
                      fnc: fnc, jcb: true, x_init: w_init, args: [base_x, base_y, @params[:reg_param]],
         | 
| 160 124 | 
             
                      maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON,
         | 
| 161 125 | 
             
                      verbose: @params[:verbose] ? 1 : -1
         | 
| 162 126 | 
             
                    )
         | 
| 163 127 |  | 
| 164 | 
            -
                     | 
| 165 | 
            -
             | 
| 166 | 
            -
                        split_weight(res[:x])
         | 
| 167 | 
            -
                      else
         | 
| 168 | 
            -
                        split_weight_mult(res[:x].reshape(n_outputs, n_features).transpose)
         | 
| 169 | 
            -
                      end
         | 
| 170 | 
            -
                  end
         | 
| 171 | 
            -
             | 
| 172 | 
            -
                  def fit_sgd(x, y)
         | 
| 173 | 
            -
                    if single_target?(y)
         | 
| 174 | 
            -
                      @weight_vec, @bias_term = partial_fit(x, y)
         | 
| 175 | 
            -
                    else
         | 
| 176 | 
            -
                      n_outputs = y.shape[1]
         | 
| 177 | 
            -
                      n_features = x.shape[1]
         | 
| 178 | 
            -
                      @weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
         | 
| 179 | 
            -
                      @bias_term = Numo::DFloat.zeros(n_outputs)
         | 
| 180 | 
            -
                      if enable_parallel?
         | 
| 181 | 
            -
                        models = parallel_map(n_outputs) { |n| partial_fit(x, y[true, n]) }
         | 
| 182 | 
            -
                        n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
         | 
| 183 | 
            -
                      else
         | 
| 184 | 
            -
                        n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
         | 
| 185 | 
            -
                      end
         | 
| 186 | 
            -
                    end
         | 
| 128 | 
            +
                    w = single_target?(base_y) ? res[:x] : res[:x].reshape(n_outputs, n_features)
         | 
| 129 | 
            +
                    split_weight(w)
         | 
| 187 130 | 
             
                  end
         | 
| 188 131 |  | 
| 189 132 | 
             
                  def single_target?(y)
         | 
| 190 133 | 
             
                    y.ndim == 1
         | 
| 191 134 | 
             
                  end
         | 
| 192 | 
            -
             | 
| 193 | 
            -
                  def init_weight(n_features, n_outputs)
         | 
| 194 | 
            -
                    ::Rumale::Utils.rand_normal([n_outputs, n_features], @rng.dup).flatten.dup
         | 
| 195 | 
            -
                  end
         | 
| 196 | 
            -
             | 
| 197 | 
            -
                  def split_weight_mult(w)
         | 
| 198 | 
            -
                    if fit_bias?
         | 
| 199 | 
            -
                      [w[0...-1, true].dup, w[-1, true].dup]
         | 
| 200 | 
            -
                    else
         | 
| 201 | 
            -
                      [w.dup, Numo::DFloat.zeros(w.shape[1])]
         | 
| 202 | 
            -
                    end
         | 
| 203 | 
            -
                  end
         | 
| 204 135 | 
             
                end
         | 
| 205 136 | 
             
              end
         | 
| 206 137 | 
             
            end
         | 
| @@ -0,0 +1,262 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'rumale/base/classifier'
         | 
| 4 | 
            +
            require 'rumale/probabilistic_output'
         | 
| 5 | 
            +
            require 'rumale/validation'
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            require_relative 'sgd_estimator'
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            module Rumale
         | 
| 10 | 
            +
              module LinearModel
         | 
| 11 | 
            +
                # SGDClassifier is a class that implements linear classifier with stochastic gradient descent optimization.
         | 
| 12 | 
            +
                #
         | 
| 13 | 
            +
                # @example
         | 
| 14 | 
            +
                #   require 'rumale/linear_model/sgd_classifier'
         | 
| 15 | 
            +
                #
         | 
| 16 | 
            +
                #   estimator =
         | 
| 17 | 
            +
                #     Rumale::LinearModel::SGDClassifier.new(loss: 'hinge', reg_param: 1.0, max_iter: 1000, batch_size: 50, random_seed: 1)
         | 
| 18 | 
            +
                #   estimator.fit(training_samples, traininig_labels)
         | 
| 19 | 
            +
                #   results = estimator.predict(testing_samples)
         | 
| 20 | 
            +
                #
         | 
| 21 | 
            +
                # *Reference*
         | 
| 22 | 
            +
                # - Shalev-Shwartz, S., and Singer, Y., "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
         | 
| 23 | 
            +
                # - Tsuruoka, Y., Tsujii, J., and Ananiadou, S., "Stochastic Gradient Descent Training for L1-regularized Log-linear Models with Cumulative Penalty," Proc. ACL'09, pp. 477--485, 2009.
         | 
| 24 | 
            +
                # - Bottou, L., "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
         | 
| 25 | 
            +
                class SGDClassifier < Rumale::LinearModel::SGDEstimator # rubocop:disable Metrics/ClassLength
         | 
| 26 | 
            +
                  include Rumale::Base::Classifier
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                  # Return the class labels.
         | 
| 29 | 
            +
                  # @return [Numo::Int32] (shape: [n_classes])
         | 
| 30 | 
            +
                  attr_reader :classes
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                  # Return the random generator for performing random sampling.
         | 
| 33 | 
            +
                  # @return [Random]
         | 
| 34 | 
            +
                  attr_reader :rng
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                  # Create a new linear classifier with stochastic gradient descent optimization.
         | 
| 37 | 
            +
                  #
         | 
| 38 | 
            +
                  # @param loss [String] The loss function to be used ('hinge' and 'log_loss').
         | 
| 39 | 
            +
                  # @param learning_rate [Float] The initial value of learning rate.
         | 
| 40 | 
            +
                  #   The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
         | 
| 41 | 
            +
                  # @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
         | 
| 42 | 
            +
                  #   If nil is given, the decay sets to 'reg_param * learning_rate'.
         | 
| 43 | 
            +
                  # @param momentum [Float] The momentum factor.
         | 
| 44 | 
            +
                  # @param penalty [String] The regularization type to be used ('l1', 'l2', and 'elasticnet').
         | 
| 45 | 
            +
                  # @param l1_ratio [Float] The elastic-net type regularization mixing parameter.
         | 
| 46 | 
            +
                  #   If penalty set to 'l2' or 'l1', this parameter is ignored.
         | 
| 47 | 
            +
                  #   If l1_ratio = 1, the regularization is similar to Lasso.
         | 
| 48 | 
            +
                  #   If l1_ratio = 0, the regularization is similar to Ridge.
         | 
| 49 | 
            +
                  #   If 0 < l1_ratio < 1, the regularization is a combination of L1 and L2.
         | 
| 50 | 
            +
                  # @param reg_param [Float] The regularization parameter.
         | 
| 51 | 
            +
                  # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
         | 
| 52 | 
            +
                  # @param bias_scale [Float] The scale of the bias term.
         | 
| 53 | 
            +
                  # @param max_iter [Integer] The maximum number of epochs that indicates
         | 
| 54 | 
            +
                  #   how many times the whole data is given to the training process.
         | 
| 55 | 
            +
                  # @param batch_size [Integer] The size of the mini batches.
         | 
| 56 | 
            +
                  # @param tol [Float] The tolerance of loss for terminating optimization.
         | 
| 57 | 
            +
                  # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
         | 
| 58 | 
            +
                  #   If nil is given, the methods do not execute in parallel.
         | 
| 59 | 
            +
                  #   If zero or less is given, it becomes equal to the number of processors.
         | 
| 60 | 
            +
                  #   This parameter is ignored if the Parallel gem is not loaded.
         | 
| 61 | 
            +
                  # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
         | 
| 62 | 
            +
                  # @param random_seed [Integer] The seed value using to initialize the random generator.
         | 
| 63 | 
            +
                  def initialize(loss: 'hinge', learning_rate: 0.01, decay: nil, momentum: 0.9,
         | 
| 64 | 
            +
                                 penalty: 'l2', reg_param: 1.0, l1_ratio: 0.5,
         | 
| 65 | 
            +
                                 fit_bias: true, bias_scale: 1.0,
         | 
| 66 | 
            +
                                 max_iter: 1000, batch_size: 50, tol: 1e-4,
         | 
| 67 | 
            +
                                 n_jobs: nil, verbose: false, random_seed: nil)
         | 
| 68 | 
            +
                    super()
         | 
| 69 | 
            +
                    @params.merge!(
         | 
| 70 | 
            +
                      loss: loss,
         | 
| 71 | 
            +
                      learning_rate: learning_rate,
         | 
| 72 | 
            +
                      decay: decay,
         | 
| 73 | 
            +
                      momentum: momentum,
         | 
| 74 | 
            +
                      penalty: penalty,
         | 
| 75 | 
            +
                      reg_param: reg_param,
         | 
| 76 | 
            +
                      l1_ratio: l1_ratio,
         | 
| 77 | 
            +
                      fit_bias: fit_bias,
         | 
| 78 | 
            +
                      bias_scale: bias_scale,
         | 
| 79 | 
            +
                      max_iter: max_iter,
         | 
| 80 | 
            +
                      batch_size: batch_size,
         | 
| 81 | 
            +
                      tol: tol,
         | 
| 82 | 
            +
                      n_jobs: n_jobs,
         | 
| 83 | 
            +
                      verbose: verbose,
         | 
| 84 | 
            +
                      random_seed: random_seed
         | 
| 85 | 
            +
                    )
         | 
| 86 | 
            +
                    @params[:decay] ||= @params[:reg_param] * @params[:learning_rate]
         | 
| 87 | 
            +
                    @params[:random_seed] ||= srand
         | 
| 88 | 
            +
                    @rng = Random.new(@params[:random_seed])
         | 
| 89 | 
            +
                    @penalty_type = @params[:penalty]
         | 
| 90 | 
            +
                    @loss_func = case @params[:loss]
         | 
| 91 | 
            +
                                 when Rumale::LinearModel::Loss::HingeLoss::NAME
         | 
| 92 | 
            +
                                   Rumale::LinearModel::Loss::HingeLoss.new
         | 
| 93 | 
            +
                                 when Rumale::LinearModel::Loss::LogLoss::NAME
         | 
| 94 | 
            +
                                   Rumale::LinearModel::Loss::LogLoss.new
         | 
| 95 | 
            +
                                 else
         | 
| 96 | 
            +
                                   raise ArgumentError, "given loss '#{loss}' is not supported."
         | 
| 97 | 
            +
                                 end
         | 
| 98 | 
            +
                  end
         | 
| 99 | 
            +
             | 
| 100 | 
            +
                  # Fit the model with given training data.
         | 
| 101 | 
            +
                  #
         | 
| 102 | 
            +
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
         | 
| 103 | 
            +
                  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
         | 
| 104 | 
            +
                  # @return [SGDClassifier] The learned classifier itself.
         | 
| 105 | 
            +
                  def fit(x, y)
         | 
| 106 | 
            +
                    x = Rumale::Validation.check_convert_sample_array(x)
         | 
| 107 | 
            +
                    y = Rumale::Validation.check_convert_label_array(y)
         | 
| 108 | 
            +
                    Rumale::Validation.check_sample_size(x, y)
         | 
| 109 | 
            +
             | 
| 110 | 
            +
                    @classes = Numo::Int32[*y.to_a.uniq.sort]
         | 
| 111 | 
            +
             | 
| 112 | 
            +
                    send("fit_#{@loss_func.name}", x, y)
         | 
| 113 | 
            +
             | 
| 114 | 
            +
                    self
         | 
| 115 | 
            +
                  end
         | 
| 116 | 
            +
             | 
| 117 | 
            +
                  # Calculate confidence scores for samples.
         | 
| 118 | 
            +
                  #
         | 
| 119 | 
            +
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
         | 
| 120 | 
            +
                  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
         | 
| 121 | 
            +
                  def decision_function(x)
         | 
| 122 | 
            +
                    x = ::Rumale::Validation.check_convert_sample_array(x)
         | 
| 123 | 
            +
             | 
| 124 | 
            +
                    x.dot(@weight_vec.transpose) + @bias_term
         | 
| 125 | 
            +
                  end
         | 
| 126 | 
            +
             | 
| 127 | 
            +
                  # Predict class labels for samples.
         | 
| 128 | 
            +
                  #
         | 
| 129 | 
            +
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
         | 
| 130 | 
            +
                  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
         | 
| 131 | 
            +
                  def predict(x)
         | 
| 132 | 
            +
                    x = ::Rumale::Validation.check_convert_sample_array(x)
         | 
| 133 | 
            +
             | 
| 134 | 
            +
                    send("predict_#{@loss_func.name}", x)
         | 
| 135 | 
            +
                  end
         | 
| 136 | 
            +
             | 
| 137 | 
            +
                  # Predict probability for samples.
         | 
| 138 | 
            +
                  #
         | 
| 139 | 
            +
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
         | 
| 140 | 
            +
                  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
         | 
| 141 | 
            +
                  def predict_proba(x)
         | 
| 142 | 
            +
                    x = ::Rumale::Validation.check_convert_sample_array(x)
         | 
| 143 | 
            +
             | 
| 144 | 
            +
                    send("predict_proba_#{@loss_func.name}", x)
         | 
| 145 | 
            +
                  end
         | 
| 146 | 
            +
             | 
| 147 | 
            +
                  private
         | 
| 148 | 
            +
             | 
| 149 | 
            +
                  def fit_hinge(x, y)
         | 
| 150 | 
            +
                    if multiclass_problem?
         | 
| 151 | 
            +
                      n_classes = @classes.size
         | 
| 152 | 
            +
                      n_features = x.shape[1]
         | 
| 153 | 
            +
                      @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
         | 
| 154 | 
            +
                      @bias_term = Numo::DFloat.zeros(n_classes)
         | 
| 155 | 
            +
                      @prob_param = Numo::DFloat.zeros(n_classes, 2)
         | 
| 156 | 
            +
                      models = if enable_parallel?
         | 
| 157 | 
            +
                                 parallel_map(n_classes) do |n|
         | 
| 158 | 
            +
                                   bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
         | 
| 159 | 
            +
                                   w, b = partial_fit(x, bin_y)
         | 
| 160 | 
            +
                                   prb = Rumale::ProbabilisticOutput.fit_sigmoid(x.dot(w.transpose) + b, bin_y)
         | 
| 161 | 
            +
                                   [w, b, prb]
         | 
| 162 | 
            +
                                 end
         | 
| 163 | 
            +
                               else
         | 
| 164 | 
            +
                                 Array.new(n_classes) do |n|
         | 
| 165 | 
            +
                                   bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
         | 
| 166 | 
            +
                                   w, b = partial_fit(x, bin_y)
         | 
| 167 | 
            +
                                   prb = Rumale::ProbabilisticOutput.fit_sigmoid(x.dot(w.transpose) + b, bin_y)
         | 
| 168 | 
            +
                                   [w, b, prb]
         | 
| 169 | 
            +
                                 end
         | 
| 170 | 
            +
                               end
         | 
| 171 | 
            +
                      # store model.
         | 
| 172 | 
            +
                      models.each_with_index { |model, n| @weight_vec[n, true], @bias_term[n], @prob_param[n, true] = model }
         | 
| 173 | 
            +
                    else
         | 
| 174 | 
            +
                      negative_label = @classes[0]
         | 
| 175 | 
            +
                      bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
         | 
| 176 | 
            +
                      @weight_vec, @bias_term = partial_fit(x, bin_y)
         | 
| 177 | 
            +
                      @prob_param = Rumale::ProbabilisticOutput.fit_sigmoid(x.dot(@weight_vec.transpose) + @bias_term, bin_y)
         | 
| 178 | 
            +
                    end
         | 
| 179 | 
            +
                  end
         | 
| 180 | 
            +
             | 
| 181 | 
            +
                  def fit_log_loss(x, y)
         | 
| 182 | 
            +
                    if multiclass_problem?
         | 
| 183 | 
            +
                      n_classes = @classes.size
         | 
| 184 | 
            +
                      n_features = x.shape[1]
         | 
| 185 | 
            +
                      @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
         | 
| 186 | 
            +
                      @bias_term = Numo::DFloat.zeros(n_classes)
         | 
| 187 | 
            +
                      if enable_parallel?
         | 
| 188 | 
            +
                        models = parallel_map(n_classes) do |n|
         | 
| 189 | 
            +
                          bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
         | 
| 190 | 
            +
                          partial_fit(x, bin_y)
         | 
| 191 | 
            +
                        end
         | 
| 192 | 
            +
                        n_classes.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
         | 
| 193 | 
            +
                      else
         | 
| 194 | 
            +
                        n_classes.times do |n|
         | 
| 195 | 
            +
                          bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
         | 
| 196 | 
            +
                          @weight_vec[n, true], @bias_term[n] = partial_fit(x, bin_y)
         | 
| 197 | 
            +
                        end
         | 
| 198 | 
            +
                      end
         | 
| 199 | 
            +
                    else
         | 
| 200 | 
            +
                      negative_label = @classes[0]
         | 
| 201 | 
            +
                      bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
         | 
| 202 | 
            +
                      @weight_vec, @bias_term = partial_fit(x, bin_y)
         | 
| 203 | 
            +
                    end
         | 
| 204 | 
            +
                  end
         | 
| 205 | 
            +
             | 
| 206 | 
            +
                  def predict_proba_hinge(x)
         | 
| 207 | 
            +
                    if multiclass_problem?
         | 
| 208 | 
            +
                      probs = 1.0 / (Numo::NMath.exp(@prob_param[true, 0] * decision_function(x) + @prob_param[true, 1]) + 1.0)
         | 
| 209 | 
            +
                      (probs.transpose / probs.sum(axis: 1)).transpose.dup
         | 
| 210 | 
            +
                    else
         | 
| 211 | 
            +
                      n_samples = x.shape[0]
         | 
| 212 | 
            +
                      probs = Numo::DFloat.zeros(n_samples, 2)
         | 
| 213 | 
            +
                      probs[true, 1] = 1.0 / (Numo::NMath.exp(@prob_param[0] * decision_function(x) + @prob_param[1]) + 1.0)
         | 
| 214 | 
            +
                      probs[true, 0] = 1.0 - probs[true, 1]
         | 
| 215 | 
            +
                      probs
         | 
| 216 | 
            +
                    end
         | 
| 217 | 
            +
                  end
         | 
| 218 | 
            +
             | 
| 219 | 
            +
                  def predict_proba_log_loss(x)
         | 
| 220 | 
            +
                    proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
         | 
| 221 | 
            +
                    return (proba.transpose / proba.sum(axis: 1)).transpose.dup if multiclass_problem?
         | 
| 222 | 
            +
             | 
| 223 | 
            +
                    n_samples = x.shape[0]
         | 
| 224 | 
            +
                    probs = Numo::DFloat.zeros(n_samples, 2)
         | 
| 225 | 
            +
                    probs[true, 1] = proba
         | 
| 226 | 
            +
                    probs[true, 0] = 1.0 - proba
         | 
| 227 | 
            +
                    probs
         | 
| 228 | 
            +
                  end
         | 
| 229 | 
            +
             | 
| 230 | 
            +
                  def predict_hinge(x)
         | 
| 231 | 
            +
                    n_samples = x.shape[0]
         | 
| 232 | 
            +
                    predicted = if multiclass_problem?
         | 
| 233 | 
            +
                                  decision_values = decision_function(x)
         | 
| 234 | 
            +
                                  if enable_parallel?
         | 
| 235 | 
            +
                                    parallel_map(n_samples) { |n| @classes[decision_values[n, true].max_index] }
         | 
| 236 | 
            +
                                  else
         | 
| 237 | 
            +
                                    Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] }
         | 
| 238 | 
            +
                                  end
         | 
| 239 | 
            +
                                else
         | 
| 240 | 
            +
                                  decision_values = decision_function(x).ge(0.0).to_a
         | 
| 241 | 
            +
                                  Array.new(n_samples) { |n| @classes[decision_values[n]] }
         | 
| 242 | 
            +
                                end
         | 
| 243 | 
            +
                    Numo::Int32.asarray(predicted)
         | 
| 244 | 
            +
                  end
         | 
| 245 | 
            +
             | 
| 246 | 
            +
                  def predict_log_loss(x)
         | 
| 247 | 
            +
                    n_samples = x.shape[0]
         | 
| 248 | 
            +
                    decision_values = predict_proba_log_loss(x)
         | 
| 249 | 
            +
                    predicted = if enable_parallel?
         | 
| 250 | 
            +
                                  parallel_map(n_samples) { |n| @classes[decision_values[n, true].max_index] }
         | 
| 251 | 
            +
                                else
         | 
| 252 | 
            +
                                  Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] }
         | 
| 253 | 
            +
                                end
         | 
| 254 | 
            +
                    Numo::Int32.asarray(predicted)
         | 
| 255 | 
            +
                  end
         | 
| 256 | 
            +
             | 
| 257 | 
            +
                  def multiclass_problem?
         | 
| 258 | 
            +
                    @classes.size > 2
         | 
| 259 | 
            +
                  end
         | 
| 260 | 
            +
                end
         | 
| 261 | 
            +
              end
         | 
| 262 | 
            +
            end
         | 
| @@ -1,6 +1,6 @@ | |
| 1 1 | 
             
            # frozen_string_literal: true
         | 
| 2 2 |  | 
| 3 | 
            -
             | 
| 3 | 
            +
            require_relative 'base_estimator'
         | 
| 4 4 |  | 
| 5 5 | 
             
            module Rumale
         | 
| 6 6 | 
             
              module LinearModel
         | 
| @@ -96,6 +96,14 @@ module Rumale | |
| 96 96 | 
             
                    def dloss(out, y)
         | 
| 97 97 | 
             
                      2.fdiv(y.shape[0]) * (out - y)
         | 
| 98 98 | 
             
                    end
         | 
| 99 | 
            +
             | 
| 100 | 
            +
                    # @!visibility private
         | 
| 101 | 
            +
                    def name
         | 
| 102 | 
            +
                      NAME
         | 
| 103 | 
            +
                    end
         | 
| 104 | 
            +
             | 
| 105 | 
            +
                    # @!visibility private
         | 
| 106 | 
            +
                    NAME = 'squared_error'
         | 
| 99 107 | 
             
                  end
         | 
| 100 108 |  | 
| 101 109 | 
             
                  # @!visibility private
         | 
| @@ -110,6 +118,14 @@ module Rumale | |
| 110 118 | 
             
                    def dloss(out, y)
         | 
| 111 119 | 
             
                      y / (1 + Numo::NMath.exp(-y * out)) - y
         | 
| 112 120 | 
             
                    end
         | 
| 121 | 
            +
             | 
| 122 | 
            +
                    # @!visibility private
         | 
| 123 | 
            +
                    def name
         | 
| 124 | 
            +
                      NAME
         | 
| 125 | 
            +
                    end
         | 
| 126 | 
            +
             | 
| 127 | 
            +
                    # @!visibility private
         | 
| 128 | 
            +
                    NAME = 'log_loss'
         | 
| 113 129 | 
             
                  end
         | 
| 114 130 |  | 
| 115 131 | 
             
                  # @!visibility private
         | 
| @@ -127,6 +143,14 @@ module Rumale | |
| 127 143 | 
             
                      d[tids] = -y[tids] if tids.count.positive?
         | 
| 128 144 | 
             
                      d
         | 
| 129 145 | 
             
                    end
         | 
| 146 | 
            +
             | 
| 147 | 
            +
                    # @!visibility private
         | 
| 148 | 
            +
                    def name
         | 
| 149 | 
            +
                      NAME
         | 
| 150 | 
            +
                    end
         | 
| 151 | 
            +
             | 
| 152 | 
            +
                    # @!visibility private
         | 
| 153 | 
            +
                    NAME = 'hinge'
         | 
| 130 154 | 
             
                  end
         | 
| 131 155 |  | 
| 132 156 | 
             
                  # @!visibility private
         | 
| @@ -151,13 +175,21 @@ module Rumale | |
| 151 175 | 
             
                      d[tids] = -1 if tids.count.positive?
         | 
| 152 176 | 
             
                      d
         | 
| 153 177 | 
             
                    end
         | 
| 178 | 
            +
             | 
| 179 | 
            +
                    # @!visibility private
         | 
| 180 | 
            +
                    def name
         | 
| 181 | 
            +
                      NAME
         | 
| 182 | 
            +
                    end
         | 
| 183 | 
            +
             | 
| 184 | 
            +
                    # @!visibility private
         | 
| 185 | 
            +
                    NAME = 'epsilon_insensitive'
         | 
| 154 186 | 
             
                  end
         | 
| 155 187 | 
             
                end
         | 
| 156 188 |  | 
| 157 | 
            -
                #  | 
| 189 | 
            +
                # SGDEstimator is an abstract class for implementation of linear model with mini-batch stochastic gradient descent (SGD) optimization.
         | 
| 158 190 | 
             
                # This class is used internally.
         | 
| 159 | 
            -
                class  | 
| 160 | 
            -
                  # Create an initial linear model.
         | 
| 191 | 
            +
                class SGDEstimator < Rumale::LinearModel::BaseEstimator
         | 
| 192 | 
            +
                  # Create an initial linear model with SGD.
         | 
| 161 193 | 
             
                  def initialize
         | 
| 162 194 | 
             
                    super()
         | 
| 163 195 | 
             
                    @params = {
         | 
| @@ -224,23 +256,6 @@ module Rumale | |
| 224 256 | 
             
                    split_weight(weight)
         | 
| 225 257 | 
             
                  end
         | 
| 226 258 |  | 
| 227 | 
            -
                  def expand_feature(x)
         | 
| 228 | 
            -
                    n_samples = x.shape[0]
         | 
| 229 | 
            -
                    Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * @params[:bias_scale]])
         | 
| 230 | 
            -
                  end
         | 
| 231 | 
            -
             | 
| 232 | 
            -
                  def split_weight(weight)
         | 
| 233 | 
            -
                    if fit_bias?
         | 
| 234 | 
            -
                      [weight[0...-1].dup, weight[-1]]
         | 
| 235 | 
            -
                    else
         | 
| 236 | 
            -
                      [weight, 0.0]
         | 
| 237 | 
            -
                    end
         | 
| 238 | 
            -
                  end
         | 
| 239 | 
            -
             | 
| 240 | 
            -
                  def fit_bias?
         | 
| 241 | 
            -
                    @params[:fit_bias] == true
         | 
| 242 | 
            -
                  end
         | 
| 243 | 
            -
             | 
| 244 259 | 
             
                  def apply_l2_penalty?
         | 
| 245 260 | 
             
                    @penalty_type == L2_PENALTY || @penalty_type == ELASTICNET_PENALTY
         | 
| 246 261 | 
             
                  end
         |