RubyGems - rumale - Versions diffs - 0.16.0 → 0.16.1 - Mend

rumale 0.16.0 → 0.16.1

Files changed (8) hide show

checksums.yaml +5 -5
data/CHANGELOG.md +9 -0
data/lib/rumale.rb +2 -0
data/lib/rumale/linear_model/base_linear_model.rb +1 -1
data/lib/rumale/linear_model/base_sgd.rb +230 -0
data/lib/rumale/linear_model/elastic_net.rb +119 -0
data/lib/rumale/version.rb +1 -1
metadata +5 -4

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
-SHA1:
-  metadata.gz: e97522eaf22db6c80513f8d69513b6aef7dc1a9f
-  data.tar.gz: a2c2d51a15465f5bac67bc0b60aef1c70067bba4
+SHA256:
+  metadata.gz: 30476b58c5c5b39567f1cb3c8346a7c354fbf8d30401555fa2e02995021b759d
+  data.tar.gz: 6f664b0c279e0fef2dc47e608cdc2737318274b45017d6d60f0dd516aa2ebb48
 SHA512:
-  metadata.gz: b04158c2f4247c58593dad54e1d8c79030aff2a3826032610b9d7afee73490843caa3c5636c86fc3fc3196ed45affdb2da17c7d926091f77c784f1970464c2a4
-  data.tar.gz: e75814bcbe3aa087f2a45a15790a180016147d9197442d5d329a6484d1baee8b4d2726265d640a3d47ada45189d6dfb3157f2fa5eec7e334824cf96bd8fba493
+  metadata.gz: aa51f865e4995901e5587e3089fae724a57022d96c95d2b852cfde99f85f9aae7035c4edfe6c4a7899c22674778e1bfc0332ef83b6f234a8c9e8aa982e55e833
+  data.tar.gz: 55e209725a0c716b1f450bed025fceefe36dafe278b96648ac60079e3968778840bc4e1e75ff4181abafc4f98eb93cc40d8e2e0e3b5bf078bc94cf8b9a5dc50d

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,12 @@
+# 0.16.1
+- Add regressor class for [ElasticNet](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/ElasticNet.html).
+- Add new linear model abstract class.
+  - In version 0.17.0, all LinearModel estimators will be changed to use this new abstract class.
+  The major differences from the existing abstract class are that
+  the optimizer of LinearModel estimators is fixed to mini-batch SGD with momentum term,
+  the max_iter parameter indicates the number of epochs instead of the maximum number of iterations,
+  the fit_bias parameter is true by default, and elastic-net style regularization can be used.
 # 0.16.0
 ## Breaking changes
 - The meaning of the `max_iter` parameter of the multi-layer perceptron estimators

data/lib/rumale.rb CHANGED Viewed

@@ -27,12 +27,14 @@ require 'rumale/optimizer/yellow_fin'
 require 'rumale/pipeline/pipeline'
 require 'rumale/kernel_approximation/rbf'
 require 'rumale/linear_model/base_linear_model'
+require 'rumale/linear_model/base_sgd'
 require 'rumale/linear_model/svc'
 require 'rumale/linear_model/svr'
 require 'rumale/linear_model/logistic_regression'
 require 'rumale/linear_model/linear_regression'
 require 'rumale/linear_model/ridge'
 require 'rumale/linear_model/lasso'
+require 'rumale/linear_model/elastic_net'
 require 'rumale/kernel_machine/kernel_svc'
 require 'rumale/kernel_machine/kernel_pca'
 require 'rumale/kernel_machine/kernel_ridge'

data/lib/rumale/linear_model/base_linear_model.rb CHANGED Viewed

@@ -33,7 +33,7 @@ module Rumale
         @params[:max_iter] = max_iter
         @params[:batch_size] = batch_size
         @params[:optimizer] = optimizer
-        @params[:optimizer] ||= Optimizer::Nadam.new
+        @params[:optimizer] ||= Rumale::Optimizer::Nadam.new
         @params[:n_jobs] = n_jobs
         @params[:random_seed] = random_seed
         @params[:random_seed] ||= srand

data/lib/rumale/linear_model/base_sgd.rb ADDED Viewed

@@ -0,0 +1,230 @@
+# frozen_string_literal: true
+require 'rumale/base/base_estimator'
+module Rumale
+  module LinearModel
+    # @!visibility private
+    # This module consists of the classes that implement penalty (regularization) term.
+    module Penalty
+      # @!visibility private
+      # L2Penalty is a class that applies L2 penalty to weight vector of linear model.
+      # This class is used internally.
+      class L2Penalty
+        # @!visibility private
+        def initialize(reg_param:)
+          @reg_param = reg_param
+        end
+        # @!visibility private
+        def call(weight, lr)
+          weight - @reg_param * lr * weight
+        end
+      end
+      # @!visibility private
+      # L1Penalty is a class that applies L1 penalty to weight vector of linear model.
+      # This class is used internally.
+      class L1Penalty
+        # @!visibility private
+        def initialize(reg_param:)
+          @q_vec = nil
+          @u = 0
+          @reg_param = reg_param
+        end
+        # @!visibility private
+        def call(weight, lr)
+          @q_vec ||= Numo::DFloat.zeros(weight.shape[0])
+          @u += @reg_param * lr
+          z = weight.dup
+          gt = weight.gt(0)
+          lt = weight.lt(0)
+          weight[gt] = Numo::DFloat.maximum(0.0, weight[gt] - (@u + @q_vec[gt])) if gt.count.positive?
+          weight[lt] = Numo::DFloat.minimum(0.0, weight[lt] + (@u - @q_vec[lt])) if lt.count.positive?
+          @q_vec += weight - z
+          weight
+        end
+      end
+    end
+    # @!visibility private
+    # This module consists of the class that implements stochastic gradient descent (SGD) optimizer.
+    module Optimizer
+      # @!visibility private
+      # SGD is a class that implements SGD optimizer.
+      # This class is used internally.
+      class SGD
+        # @!visibility private
+        # Create a new SGD optimizer.
+        # @param learning_rate [Float] The initial value of learning rate.
+        # @param momentum [Float] The initial value of momentum.
+        # @param decay [Float] The smooting parameter.
+        def initialize(learning_rate: 0.01, momentum: 0.0, decay: 0.0)
+          @learning_rate = learning_rate
+          @momentum = momentum
+          @decay = decay
+          @update = nil
+          @iter = 0
+        end
+        # @!visibility private
+        def current_learning_rate
+          @learning_rate / (1.0 + @decay * @iter)
+        end
+        # @!visibility private
+        def call(weight, gradient)
+          @update ||= Numo::DFloat.zeros(weight.shape[0])
+          @update = @momentum * @update - current_learning_rate * gradient
+          @iter += 1
+          weight + @update
+        end
+      end
+    end
+    # @!visibility private
+    # This module consists of the classes that implement loss function for linear model.
+    module Loss
+      # @!visibility private
+      # MeanSquaredError is a class that calculates mean squared error for linear regression model.
+      class MeanSquaredError
+        # @!visibility private
+        def loss(out, y)
+          ((out - y)**2).sum.fdiv(y.shape[0])
+        end
+        # @!visibility private
+        def dloss(out, y)
+          2.fdiv(y.shape[0]) * (out - y)
+        end
+      end
+    end
+    # BaseSGD is an abstract class for implementation of linear model with mini-batch stochastic gradient descent (SGD) optimization.
+    # This class is used internally.
+    class BaseSGD
+      include Rumale::Base::BaseEstimator
+      # Create an initial linear model.
+      def initialize
+        @params = {}
+        @params[:learning_rate] = 0.01
+        @params[:decay] = nil
+        @params[:momentum] = 0.0
+        @params[:bias_scale] = 1.0
+        @params[:fit_bias] = true
+        @params[:reg_param] = 0.0
+        @params[:l1_ratio] = 0.0
+        @params[:max_iter] = 200
+        @params[:batch_size] = 50
+        @params[:tol] = 0.0001
+        @params[:verbose] = false
+        @penalty_type = nil
+        @loss_func = nil
+        @weight_vec = nil
+        @bias_term = nil
+        @n_iter = nil
+        @rng = nil
+      end
+      private
+      L2_PENALTY = 'l2'
+      L1_PENALTY = 'l1'
+      ELASTICNET_PENALTY = 'elasticnet'
+      private_constant :L2_PENALTY, :L1_PENALTY, :ELASTICNET_PENALTY
+      def partial_fit(x, y)
+        class_name = self.class.to_s.split('::').last if @params[:verbose]
+        narr = x.class
+        # Expand feature vectors for bias term.
+        x = expand_feature(x) if fit_bias?
+        # Initialize some variables.
+        sub_rng = @rng.dup
+        n_samples, n_features = x.shape
+        weight = Numo::DFloat.zeros(n_features)
+        optimizer = LinearModel::Optimizer::SGD.new(
+          learning_rate: @params[:learning_rate],
+          momentum: @params[:momentum],
+          decay: @params[:decay]
+        )
+        l2_penalty = LinearModel::Penalty::L2Penalty.new(reg_param: l2_reg_param) if apply_l2_penalty?
+        l1_penalty = LinearModel::Penalty::L1Penalty.new(reg_param: l1_reg_param) if apply_l1_penalty?
+        # Optimization.
+        @params[:max_iter].times do |t|
+          sample_ids = [*0...n_samples]
+          sample_ids.shuffle!(random: sub_rng)
+          until (subset_ids = sample_ids.shift(@params[:batch_size])).empty?
+            # sampling
+            sub_x = x[subset_ids, true]
+            sub_y = y[subset_ids]
+            # calculate gradient
+            dloss = @loss_func.dloss(sub_x.dot(weight), sub_y)
+            dloss = narr.minimum(1e12, narr.maximum(-1e12, dloss))
+            gradient = dloss.dot(sub_x)
+            # update weight
+            lr = optimizer.current_learning_rate
+            weight = optimizer.call(weight, gradient)
+            # l2 regularization
+            weight = l2_penalty.call(weight, lr) if apply_l2_penalty?
+            # l1 regularization
+            weight = l1_penalty.call(weight, lr) if apply_l1_penalty?
+          end
+          loss = @loss_func.loss(x.dot(weight), y)
+          puts "[#{class_name}] Loss after #{t + 1} epochs: #{loss}" if @params[:verbose]
+          break if loss < @params[:tol]
+        end
+        split_weight(weight)
+      end
+      def expand_feature(x)
+        n_samples = x.shape[0]
+        Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * @params[:bias_scale]])
+      end
+      def split_weight(weight)
+        if fit_bias?
+          [weight[0...-1].dup, weight[-1]]
+        else
+          [weight, 0.0]
+        end
+      end
+      def fit_bias?
+        @params[:fit_bias] == true
+      end
+      def apply_l2_penalty?
+        @penalty_type == L2_PENALTY || @penalty_type == ELASTICNET_PENALTY
+      end
+      def apply_l1_penalty?
+        @penalty_type == L1_PENALTY || @penalty_type == ELASTICNET_PENALTY
+      end
+      def l2_reg_param
+        case @penalty_type
+        when ELASTICNET_PENALTY
+          @params[:reg_param] * (1.0 - @params[:l1_ratio])
+        when L2_PENALTY
+          @params[:reg_param]
+        else
+          0.0
+        end
+      end
+      def l1_reg_param
+        case @penalty_type
+        when ELASTICNET_PENALTY
+          @params[:reg_param] * @params[:l1_ratio]
+        when L1_PENALTY
+          @params[:reg_param]
+        else
+          0.0
+        end
+      end
+    end
+  end
+end

data/lib/rumale/linear_model/elastic_net.rb ADDED Viewed

@@ -0,0 +1,119 @@
+# frozen_string_literal: true
+require 'rumale/linear_model/base_sgd'
+require 'rumale/base/regressor'
+module Rumale
+  module LinearModel
+    # ElasticNet is a class that implements Elastic-net Regression
+    # with stochastic gradient descent (SGD) optimization.
+    #
+    # @example
+    #   estimator =
+    #     Rumale::LinearModel::ElasticNet.new(reg_param: 0.1, l1_ratio: 0.5, max_iter: 200, batch_size: 50, random_seed: 1)
+    #   estimator.fit(training_samples, traininig_values)
+    #   results = estimator.predict(testing_samples)
+    #
+    # *Reference*
+    # - S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
+    # - Y. Tsuruoka, J. Tsujii, and S. Ananiadou, "Stochastic Gradient Descent Training for L1-regularized Log-linear Models with Cumulative Penalty," Proc. ACL'09, pp. 477--485, 2009.
+    # - L. Bottou, "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
+    class ElasticNet < BaseSGD
+      include Base::Regressor
+      # Return the weight vector.
+      # @return [Numo::DFloat] (shape: [n_outputs, n_features])
+      attr_reader :weight_vec
+      # Return the bias term (a.k.a. intercept).
+      # @return [Numo::DFloat] (shape: [n_outputs])
+      attr_reader :bias_term
+      # Return the random generator for random sampling.
+      # @return [Random]
+      attr_reader :rng
+      # Create a new Elastic-net regressor.
+      #
+      # @param learning_rate [Float] The initial value of learning rate.
+      #   The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
+      # @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
+      #   If nil is given, the decay sets to 'reg_param * learning_rate'.
+      # @param momentum [Float] The momentum factor.
+      # @param reg_param [Float] The regularization parameter.
+      # @param l1_ratio [Float] The elastic-net mixing parameter.
+      #   If l1_ratio = 1, the regularization is similar to Lasso.
+      #   If l1_ratio = 0, the regularization is similar to Ridge.
+      #   If 0 < l1_ratio < 1, the regularization is a combination of L1 and L2.
+      # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
+      # @param bias_scale [Float] The scale of the bias term.
+      # @param max_iter [Integer] The maximum number of epochs that indicates
+      #   how many times the whole data is given to the training process.
+      # @param batch_size [Integer] The size of the mini batches.
+      # @param tol [Float] The tolerance of loss for terminating optimization.
+      # @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
+      #   If nil is given, the method does not execute in parallel.
+      #   If zero or less is given, it becomes equal to the number of processors.
+      #   This parameter is ignored if the Parallel gem is not loaded.
+      # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
+      # @param random_seed [Integer] The seed value using to initialize the random generator.
+      def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
+                     reg_param: 1.0, l1_ratio: 0.5, fit_bias: true, bias_scale: 1.0,
+                     max_iter: 100, batch_size: 50, tol: 1e-4,
+                     n_jobs: nil, verbose: false, random_seed: nil)
+        check_params_numeric(learning_rate: learning_rate, momentum: momentum,
+                             reg_param: reg_param, l1_ratio: l1_ratio, bias_scale: bias_scale,
+                             max_iter: max_iter, batch_size: batch_size, tol: tol)
+        check_params_boolean(fit_bias: fit_bias, verbose: verbose)
+        check_params_numeric_or_nil(decay: nil, n_jobs: n_jobs, random_seed: random_seed)
+        check_params_positive(learning_rate: learning_rate, reg_param: reg_param, max_iter: max_iter, batch_size: batch_size)
+        super()
+        @params.merge!(method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h)
+        @params[:decay] ||= @params[:reg_param] * @params[:learning_rate]
+        @params[:random_seed] ||= srand
+        @rng = Random.new(@params[:random_seed])
+        @penalty_type = ELASTICNET_PENALTY
+        @loss_func = LinearModel::Loss::MeanSquaredError.new
+        @weight_vec = nil
+        @bias_term = nil
+      end
+      # Fit the model with given training data.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
+      # @return [ElasticNet] The learned regressor itself.
+      def fit(x, y)
+        x = check_convert_sample_array(x)
+        y = check_convert_tvalue_array(y)
+        check_sample_tvalue_size(x, y)
+        n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
+        n_features = x.shape[1]
+        if n_outputs > 1
+          @weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
+          @bias_term = Numo::DFloat.zeros(n_outputs)
+          if enable_parallel?
+            models = parallel_map(n_outputs) { |n| partial_fit(x, y[true, n]) }
+            n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
+          else
+            n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
+          end
+        else
+          @weight_vec, @bias_term = partial_fit(x, y)
+        end
+        self
+      end
+      # Predict values for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
+      # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
+      def predict(x)
+        x = check_convert_sample_array(x)
+        x.dot(@weight_vec.transpose) + @bias_term
+      end
+    end
+  end
+end

data/lib/rumale/version.rb CHANGED Viewed

@@ -3,5 +3,5 @@
 # Rumale is a machine learning library in Ruby.
 module Rumale
   # The version of Rumale you are using.
-  VERSION = '0.16.0'
+  VERSION = '0.16.1'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rumale
 version: !ruby/object:Gem::Version
-  version: 0.16.0
+  version: 0.16.1
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2020-01-04 00:00:00.000000000 Z
+date: 2020-01-11 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: numo-narray
@@ -214,6 +214,8 @@ files:
 - lib/rumale/kernel_machine/kernel_ridge.rb
 - lib/rumale/kernel_machine/kernel_svc.rb
 - lib/rumale/linear_model/base_linear_model.rb
+- lib/rumale/linear_model/base_sgd.rb
+- lib/rumale/linear_model/elastic_net.rb
 - lib/rumale/linear_model/lasso.rb
 - lib/rumale/linear_model/linear_regression.rb
 - lib/rumale/linear_model/logistic_regression.rb
@@ -294,8 +296,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubyforge_project:
-rubygems_version: 2.6.14.4
+rubygems_version: 3.1.2
 signing_key:
 specification_version: 4
 summary: Rumale is a machine learning library in Ruby. Rumale provides machine learning