svmkit 0.3.3 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/HISTORY.md +22 -0
- data/lib/svmkit.rb +1 -0
- data/lib/svmkit/linear_model/lasso.rb +14 -32
- data/lib/svmkit/linear_model/logistic_regression.rb +37 -36
- data/lib/svmkit/linear_model/ridge.rb +10 -32
- data/lib/svmkit/linear_model/svc.rb +40 -39
- data/lib/svmkit/linear_model/svr.rb +34 -31
- data/lib/svmkit/optimizer/nadam.rb +64 -0
- data/lib/svmkit/polynomial_model/factorization_machine_classifier.rb +53 -61
- data/lib/svmkit/polynomial_model/factorization_machine_regressor.rb +30 -66
- data/lib/svmkit/version.rb +1 -1
- metadata +3 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: cef050a2ac6b55583414cb3ce9c3678dd6d2d1c8b2be04a249222683e10465e1
         | 
| 4 | 
            +
              data.tar.gz: 7c67ab0e90246f1d9b7e5d0bfb19ed76061d0edf17a05014f521b8ef41e41aed
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 15341450f3bf3ca49901ae55b507d647468261682c7fdb0b058c21a470c2eec261718b6721ca0e2ad7738cfdabd184128a588d68ad6d079e53c9b1e916efa2b1
         | 
| 7 | 
            +
              data.tar.gz: fd562db538be12896c005840e065f867e342691e899b33f0524a4db26da33439bfc174141e022d4de3d805657d09e854a4593b9b05b2d9eb99f6cd41da064a1d
         | 
    
        data/HISTORY.md
    CHANGED
    
    | @@ -1,3 +1,25 @@ | |
| 1 | 
            +
            # 0.4.0
         | 
| 2 | 
            +
            ## Breaking changes
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            SVMKit introduces optimizer algorithm that calculates learning rates adaptively
         | 
| 5 | 
            +
            on each iteration of stochastic gradient descent (SGD).
         | 
| 6 | 
            +
            While Pegasos SGD runs fast, it sometimes fails to optimize complicated models
         | 
| 7 | 
            +
            like Factorization Machine.
         | 
| 8 | 
            +
            To solve this problem, in version 0.3.3, SVMKit introduced optimization with RMSProp on
         | 
| 9 | 
            +
            FactorizationMachineRegressor, Ridge and Lasso.
         | 
| 10 | 
            +
            This attempt realized stable optimization of those estimators.
         | 
| 11 | 
            +
            Following the success of the attempt, author decided to use modern optimizer algorithms
         | 
| 12 | 
            +
            with all SGD optimizations in SVMKit.
         | 
| 13 | 
            +
            Through some preliminary experiments, author implemented Nadam as the default optimizer.
         | 
| 14 | 
            +
            SVMKit plans to add other optimizer algorithms sequentially, so that users can select them.
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            - Fix to use Nadam for optimization on SVC, SVR, LogisticRegression, Ridge, Lasso, and Factorization Machine estimators.
         | 
| 17 | 
            +
              - Combine reg_param_weight and reg_param_bias parameters on Factorization Machine estimators into the unified parameter named reg_param_linear.
         | 
| 18 | 
            +
              - Remove init_std paramter on Factorization Machine estimators.
         | 
| 19 | 
            +
              - Remove learning_rate, decay, and momentum parameters on Ridge, Lasso, and FactorizationMachineRegressor.
         | 
| 20 | 
            +
              - Remove normalize parameter on SVC, SVR, and LogisticRegression.
         | 
| 21 | 
            +
             | 
| 22 | 
            +
             | 
| 1 23 | 
             
            # 0.3.3
         | 
| 2 24 | 
             
            - Add class for Ridge regressor.
         | 
| 3 25 | 
             
            - Add class for Lasso regressor.
         | 
    
        data/lib/svmkit.rb
    CHANGED
    
    | @@ -13,6 +13,7 @@ require 'svmkit/base/regressor' | |
| 13 13 | 
             
            require 'svmkit/base/transformer'
         | 
| 14 14 | 
             
            require 'svmkit/base/splitter'
         | 
| 15 15 | 
             
            require 'svmkit/base/evaluator'
         | 
| 16 | 
            +
            require 'svmkit/optimizer/nadam'
         | 
| 16 17 | 
             
            require 'svmkit/kernel_approximation/rbf'
         | 
| 17 18 | 
             
            require 'svmkit/linear_model/svc'
         | 
| 18 19 | 
             
            require 'svmkit/linear_model/svr'
         | 
| @@ -3,6 +3,7 @@ | |
| 3 3 | 
             
            require 'svmkit/validation'
         | 
| 4 4 | 
             
            require 'svmkit/base/base_estimator'
         | 
| 5 5 | 
             
            require 'svmkit/base/regressor'
         | 
| 6 | 
            +
            require 'svmkit/optimizer/nadam'
         | 
| 6 7 |  | 
| 7 8 | 
             
            module SVMKit
         | 
| 8 9 | 
             
              module LinearModel
         | 
| @@ -11,15 +12,13 @@ module SVMKit | |
| 11 12 | 
             
                #
         | 
| 12 13 | 
             
                # @example
         | 
| 13 14 | 
             
                #   estimator =
         | 
| 14 | 
            -
                #     SVMKit::LinearModel::Lasso.new(reg_param: 0.1, max_iter:  | 
| 15 | 
            +
                #     SVMKit::LinearModel::Lasso.new(reg_param: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
         | 
| 15 16 | 
             
                #   estimator.fit(training_samples, traininig_values)
         | 
| 16 17 | 
             
                #   results = estimator.predict(testing_samples)
         | 
| 17 18 | 
             
                #
         | 
| 18 19 | 
             
                # *Reference*
         | 
| 19 20 | 
             
                # - S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
         | 
| 20 21 | 
             
                # - L. Bottou, "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
         | 
| 21 | 
            -
                # - I. Sutskever, J. Martens, G. Dahl, and G. Hinton, "On the importance of initialization and momentum in deep learning," Proc. ICML'13, pp. 1139--1147, 2013.
         | 
| 22 | 
            -
                # - G. Hinton, N. Srivastava, and K. Swersky, "Lecture 6e rmsprop," Neural Networks for Machine Learning, 2012.
         | 
| 23 22 | 
             
                class Lasso
         | 
| 24 23 | 
             
                  include Base::BaseEstimator
         | 
| 25 24 | 
             
                  include Base::Regressor
         | 
| @@ -41,30 +40,23 @@ module SVMKit | |
| 41 40 | 
             
                  #
         | 
| 42 41 | 
             
                  # @param reg_param [Float] The regularization parameter.
         | 
| 43 42 | 
             
                  # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
         | 
| 44 | 
            -
                  # @param learning_rate [Float] The learning rate for optimization.
         | 
| 45 | 
            -
                  # @param decay [Float] The discounting factor for RMS prop optimization.
         | 
| 46 | 
            -
                  # @param momentum [Float] The momentum for optimization.
         | 
| 47 43 | 
             
                  # @param max_iter [Integer] The maximum number of iterations.
         | 
| 48 44 | 
             
                  # @param batch_size [Integer] The size of the mini batches.
         | 
| 45 | 
            +
                  # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
         | 
| 46 | 
            +
                  #   Nadam is selected automatically on current version.
         | 
| 49 47 | 
             
                  # @param random_seed [Integer] The seed value using to initialize the random generator.
         | 
| 50 | 
            -
                  def initialize(reg_param: 1.0, fit_bias: false,  | 
| 51 | 
            -
             | 
| 52 | 
            -
                    check_params_float(reg_param: reg_param,
         | 
| 53 | 
            -
                                       learning_rate: learning_rate, decay: decay, momentum: momentum)
         | 
| 48 | 
            +
                  def initialize(reg_param: 1.0, fit_bias: false, max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
         | 
| 49 | 
            +
                    check_params_float(reg_param: reg_param)
         | 
| 54 50 | 
             
                    check_params_integer(max_iter: max_iter, batch_size: batch_size)
         | 
| 55 51 | 
             
                    check_params_boolean(fit_bias: fit_bias)
         | 
| 56 52 | 
             
                    check_params_type_or_nil(Integer, random_seed: random_seed)
         | 
| 57 | 
            -
                    check_params_positive(reg_param: reg_param,
         | 
| 58 | 
            -
                                          learning_rate: learning_rate, decay: decay, momentum: momentum,
         | 
| 59 | 
            -
                                          max_iter: max_iter, batch_size: batch_size)
         | 
| 53 | 
            +
                    check_params_positive(reg_param: reg_param, max_iter: max_iter, batch_size: batch_size)
         | 
| 60 54 | 
             
                    @params = {}
         | 
| 61 55 | 
             
                    @params[:reg_param] = reg_param
         | 
| 62 56 | 
             
                    @params[:fit_bias] = fit_bias
         | 
| 63 | 
            -
                    @params[:learning_rate] = learning_rate
         | 
| 64 | 
            -
                    @params[:decay] = decay
         | 
| 65 | 
            -
                    @params[:momentum] = momentum
         | 
| 66 57 | 
             
                    @params[:max_iter] = max_iter
         | 
| 67 58 | 
             
                    @params[:batch_size] = batch_size
         | 
| 59 | 
            +
                    @params[:optimizer] = optimizer
         | 
| 68 60 | 
             
                    @params[:random_seed] = random_seed
         | 
| 69 61 | 
             
                    @params[:random_seed] ||= srand
         | 
| 70 62 | 
             
                    @weight_vec = nil
         | 
| @@ -138,11 +130,9 @@ module SVMKit | |
| 138 130 | 
             
                    rand_ids = [*0...n_samples].shuffle(random: @rng)
         | 
| 139 131 | 
             
                    weight_vec = Numo::DFloat.zeros(n_features)
         | 
| 140 132 | 
             
                    left_weight_vec = Numo::DFloat.zeros(n_features)
         | 
| 141 | 
            -
                    left_weight_sqrsum = Numo::DFloat.zeros(n_features)
         | 
| 142 | 
            -
                    left_weight_update = Numo::DFloat.zeros(n_features)
         | 
| 143 133 | 
             
                    right_weight_vec = Numo::DFloat.zeros(n_features)
         | 
| 144 | 
            -
                     | 
| 145 | 
            -
                     | 
| 134 | 
            +
                    left_optimizer = Optimizer::Nadam.new
         | 
| 135 | 
            +
                    right_optimizer = Optimizer::Nadam.new
         | 
| 146 136 | 
             
                    # Start optimization.
         | 
| 147 137 | 
             
                    @params[:max_iter].times do |_t|
         | 
| 148 138 | 
             
                      # Random sampling.
         | 
| @@ -154,12 +144,8 @@ module SVMKit | |
| 154 144 | 
             
                      loss_grad = loss_gradient(data, values, weight_vec)
         | 
| 155 145 | 
             
                      next if loss_grad.ne(0.0).count.zero?
         | 
| 156 146 | 
             
                      # Update weight.
         | 
| 157 | 
            -
                      left_weight_vec,  | 
| 158 | 
            -
             | 
| 159 | 
            -
                                      left_weight_gradient(loss_grad, data))
         | 
| 160 | 
            -
                      right_weight_vec, right_weight_sqrsum, right_weight_update =
         | 
| 161 | 
            -
                        update_weight(right_weight_vec, right_weight_sqrsum, right_weight_update,
         | 
| 162 | 
            -
                                      right_weight_gradient(loss_grad, data))
         | 
| 147 | 
            +
                      left_weight_vec = round_weight(left_optimizer.call(left_weight_vec, left_weight_gradient(loss_grad, data)))
         | 
| 148 | 
            +
                      right_weight_vec = round_weight(right_optimizer.call(right_weight_vec, right_weight_gradient(loss_grad, data)))
         | 
| 163 149 | 
             
                      weight_vec = left_weight_vec - right_weight_vec
         | 
| 164 150 | 
             
                    end
         | 
| 165 151 | 
             
                    split_weight_vec_bias(weight_vec)
         | 
| @@ -177,12 +163,8 @@ module SVMKit | |
| 177 163 | 
             
                    ((@params[:reg_param] - loss_grad).expand_dims(1) * data).mean(0)
         | 
| 178 164 | 
             
                  end
         | 
| 179 165 |  | 
| 180 | 
            -
                  def  | 
| 181 | 
            -
                     | 
| 182 | 
            -
                    new_update = (@params[:learning_rate] / ((new_sqrsum + 1.0e-8)**0.5)) * gr
         | 
| 183 | 
            -
                    new_weight = weight - (new_update + @params[:momentum] * update)
         | 
| 184 | 
            -
                    new_weight = 0.5 * (new_weight + new_weight.abs)
         | 
| 185 | 
            -
                    [new_weight, new_sqrsum, new_update]
         | 
| 166 | 
            +
                  def round_weight(weight)
         | 
| 167 | 
            +
                    0.5 * (weight + weight.abs)
         | 
| 186 168 | 
             
                  end
         | 
| 187 169 |  | 
| 188 170 | 
             
                  def expand_feature(x)
         | 
| @@ -3,25 +3,26 @@ | |
| 3 3 | 
             
            require 'svmkit/validation'
         | 
| 4 4 | 
             
            require 'svmkit/base/base_estimator'
         | 
| 5 5 | 
             
            require 'svmkit/base/classifier'
         | 
| 6 | 
            +
            require 'svmkit/optimizer/nadam'
         | 
| 6 7 |  | 
| 7 8 | 
             
            module SVMKit
         | 
| 8 | 
            -
              # This module consists of the classes that implement generalized linear models.
         | 
| 9 9 | 
             
              module LinearModel
         | 
| 10 10 | 
             
                # LogisticRegression is a class that implements Logistic Regression
         | 
| 11 | 
            -
                # with stochastic gradient descent  | 
| 11 | 
            +
                # with mini-batch stochastic gradient descent optimization.
         | 
| 12 12 | 
             
                # For multiclass classification problem, it uses one-vs-the-rest strategy.
         | 
| 13 13 | 
             
                #
         | 
| 14 14 | 
             
                # @example
         | 
| 15 15 | 
             
                #   estimator =
         | 
| 16 | 
            -
                #     SVMKit::LinearModel::LogisticRegression.new(reg_param: 1.0, max_iter:  | 
| 16 | 
            +
                #     SVMKit::LinearModel::LogisticRegression.new(reg_param: 1.0, max_iter: 1000, batch_size: 20, random_seed: 1)
         | 
| 17 17 | 
             
                #   estimator.fit(training_samples, traininig_labels)
         | 
| 18 18 | 
             
                #   results = estimator.predict(testing_samples)
         | 
| 19 19 | 
             
                #
         | 
| 20 20 | 
             
                # *Reference*
         | 
| 21 | 
            -
                #  | 
| 21 | 
            +
                # - S. Shalev-Shwartz, Y. Singer, N. Srebro, and A. Cotter, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
         | 
| 22 22 | 
             
                class LogisticRegression
         | 
| 23 23 | 
             
                  include Base::BaseEstimator
         | 
| 24 24 | 
             
                  include Base::Classifier
         | 
| 25 | 
            +
                  include Validation
         | 
| 25 26 |  | 
| 26 27 | 
             
                  # Return the weight vector for Logistic Regression.
         | 
| 27 28 | 
             
                  # @return [Numo::DFloat] (shape: [n_classes, n_features])
         | 
| @@ -47,23 +48,23 @@ module SVMKit | |
| 47 48 | 
             
                  #   If fit_bias is true, the feature vector v becoms [v; bias_scale].
         | 
| 48 49 | 
             
                  # @param max_iter [Integer] The maximum number of iterations.
         | 
| 49 50 | 
             
                  # @param batch_size [Integer] The size of the mini batches.
         | 
| 50 | 
            -
                  # @param  | 
| 51 | 
            +
                  # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
         | 
| 52 | 
            +
                  #   Nadam is selected automatically on current version.
         | 
| 51 53 | 
             
                  # @param random_seed [Integer] The seed value using to initialize the random generator.
         | 
| 52 54 | 
             
                  def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
         | 
| 53 | 
            -
                                 max_iter:  | 
| 54 | 
            -
                     | 
| 55 | 
            -
                     | 
| 56 | 
            -
                     | 
| 57 | 
            -
                     | 
| 58 | 
            -
                     | 
| 59 | 
            -
                                                             batch_size: batch_size)
         | 
| 55 | 
            +
                                 max_iter: 1000, batch_size: 20, optimizer: nil, random_seed: nil)
         | 
| 56 | 
            +
                    check_params_float(reg_param: reg_param, bias_scale: bias_scale)
         | 
| 57 | 
            +
                    check_params_integer(max_iter: max_iter, batch_size: batch_size)
         | 
| 58 | 
            +
                    check_params_boolean(fit_bias: fit_bias)
         | 
| 59 | 
            +
                    check_params_type_or_nil(Integer, random_seed: random_seed)
         | 
| 60 | 
            +
                    check_params_positive(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
         | 
| 60 61 | 
             
                    @params = {}
         | 
| 61 62 | 
             
                    @params[:reg_param] = reg_param
         | 
| 62 63 | 
             
                    @params[:fit_bias] = fit_bias
         | 
| 63 64 | 
             
                    @params[:bias_scale] = bias_scale
         | 
| 64 65 | 
             
                    @params[:max_iter] = max_iter
         | 
| 65 66 | 
             
                    @params[:batch_size] = batch_size
         | 
| 66 | 
            -
                    @params[: | 
| 67 | 
            +
                    @params[:optimizer] = optimizer
         | 
| 67 68 | 
             
                    @params[:random_seed] = random_seed
         | 
| 68 69 | 
             
                    @params[:random_seed] ||= srand
         | 
| 69 70 | 
             
                    @weight_vec = nil
         | 
| @@ -78,9 +79,9 @@ module SVMKit | |
| 78 79 | 
             
                  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
         | 
| 79 80 | 
             
                  # @return [LogisticRegression] The learned classifier itself.
         | 
| 80 81 | 
             
                  def fit(x, y)
         | 
| 81 | 
            -
                     | 
| 82 | 
            -
                     | 
| 83 | 
            -
                     | 
| 82 | 
            +
                    check_sample_array(x)
         | 
| 83 | 
            +
                    check_label_array(y)
         | 
| 84 | 
            +
                    check_sample_label_size(x, y)
         | 
| 84 85 |  | 
| 85 86 | 
             
                    @classes = Numo::Int32[*y.to_a.uniq.sort]
         | 
| 86 87 | 
             
                    n_classes = @classes.size
         | 
| @@ -109,8 +110,7 @@ module SVMKit | |
| 109 110 | 
             
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
         | 
| 110 111 | 
             
                  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
         | 
| 111 112 | 
             
                  def decision_function(x)
         | 
| 112 | 
            -
                     | 
| 113 | 
            -
             | 
| 113 | 
            +
                    check_sample_array(x)
         | 
| 114 114 | 
             
                    x.dot(@weight_vec.transpose) + @bias_term
         | 
| 115 115 | 
             
                  end
         | 
| 116 116 |  | 
| @@ -119,7 +119,7 @@ module SVMKit | |
| 119 119 | 
             
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
         | 
| 120 120 | 
             
                  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
         | 
| 121 121 | 
             
                  def predict(x)
         | 
| 122 | 
            -
                     | 
| 122 | 
            +
                    check_sample_array(x)
         | 
| 123 123 |  | 
| 124 124 | 
             
                    return Numo::Int32.cast(predict_proba(x)[true, 1].ge(0.5)) * 2 - 1 if @classes.size <= 2
         | 
| 125 125 |  | 
| @@ -133,7 +133,7 @@ module SVMKit | |
| 133 133 | 
             
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
         | 
| 134 134 | 
             
                  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
         | 
| 135 135 | 
             
                  def predict_proba(x)
         | 
| 136 | 
            -
                     | 
| 136 | 
            +
                    check_sample_array(x)
         | 
| 137 137 |  | 
| 138 138 | 
             
                    proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
         | 
| 139 139 | 
             
                    return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
         | 
| @@ -168,40 +168,41 @@ module SVMKit | |
| 168 168 |  | 
| 169 169 | 
             
                  private
         | 
| 170 170 |  | 
| 171 | 
            -
                  def binary_fit(x,  | 
| 171 | 
            +
                  def binary_fit(x, y)
         | 
| 172 172 | 
             
                    # Expand feature vectors for bias term.
         | 
| 173 173 | 
             
                    samples = @params[:fit_bias] ? expand_feature(x) : x
         | 
| 174 174 | 
             
                    # Initialize some variables.
         | 
| 175 175 | 
             
                    n_samples, n_features = samples.shape
         | 
| 176 176 | 
             
                    rand_ids = [*0...n_samples].shuffle(random: @rng)
         | 
| 177 177 | 
             
                    weight_vec = Numo::DFloat.zeros(n_features)
         | 
| 178 | 
            +
                    optimizer = Optimizer::Nadam.new
         | 
| 178 179 | 
             
                    # Start optimization.
         | 
| 179 | 
            -
                    @params[:max_iter].times do | | 
| 180 | 
            +
                    @params[:max_iter].times do |_t|
         | 
| 180 181 | 
             
                      # random sampling
         | 
| 181 182 | 
             
                      subset_ids = rand_ids.shift(@params[:batch_size])
         | 
| 182 183 | 
             
                      rand_ids.concat(subset_ids)
         | 
| 183 | 
            -
                       | 
| 184 | 
            -
                       | 
| 185 | 
            -
                       | 
| 186 | 
            -
                       | 
| 187 | 
            -
                       | 
| 188 | 
            -
                       | 
| 189 | 
            -
                      normalize_weight_vec(weight_vec) if @params[:normalize]
         | 
| 184 | 
            +
                      data = samples[subset_ids, true]
         | 
| 185 | 
            +
                      labels = y[subset_ids]
         | 
| 186 | 
            +
                      # calculate gradient for loss function.
         | 
| 187 | 
            +
                      loss_grad = loss_gradient(data, labels, weight_vec)
         | 
| 188 | 
            +
                      # update weight.
         | 
| 189 | 
            +
                      weight_vec = optimizer.call(weight_vec, weight_gradient(loss_grad, data, weight_vec))
         | 
| 190 190 | 
             
                    end
         | 
| 191 191 | 
             
                    split_weight_vec_bias(weight_vec)
         | 
| 192 192 | 
             
                  end
         | 
| 193 193 |  | 
| 194 | 
            -
                  def  | 
| 195 | 
            -
                     | 
| 194 | 
            +
                  def loss_gradient(x, y, weight)
         | 
| 195 | 
            +
                    z = x.dot(weight)
         | 
| 196 | 
            +
                    grad = y / (Numo::NMath.exp(-y * z) + 1.0) - y
         | 
| 197 | 
            +
                    grad
         | 
| 196 198 | 
             
                  end
         | 
| 197 199 |  | 
| 198 | 
            -
                  def  | 
| 199 | 
            -
                     | 
| 200 | 
            +
                  def weight_gradient(loss_grad, x, weight)
         | 
| 201 | 
            +
                    x.transpose.dot(loss_grad) / @params[:batch_size] + @params[:reg_param] * weight
         | 
| 200 202 | 
             
                  end
         | 
| 201 203 |  | 
| 202 | 
            -
                  def  | 
| 203 | 
            -
                     | 
| 204 | 
            -
                    weight_vec * [1.0, (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)].min
         | 
| 204 | 
            +
                  def expand_feature(x)
         | 
| 205 | 
            +
                    Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
         | 
| 205 206 | 
             
                  end
         | 
| 206 207 |  | 
| 207 208 | 
             
                  def split_weight_vec_bias(weight_vec)
         | 
| @@ -3,22 +3,19 @@ | |
| 3 3 | 
             
            require 'svmkit/validation'
         | 
| 4 4 | 
             
            require 'svmkit/base/base_estimator'
         | 
| 5 5 | 
             
            require 'svmkit/base/regressor'
         | 
| 6 | 
            +
            require 'svmkit/optimizer/nadam'
         | 
| 6 7 |  | 
| 7 8 | 
             
            module SVMKit
         | 
| 8 9 | 
             
              module LinearModel
         | 
| 9 10 | 
             
                # Ridge is a class that implements Ridge Regression
         | 
| 10 | 
            -
                # with stochastic gradient descent  | 
| 11 | 
            +
                # with mini-batch stochastic gradient descent optimization.
         | 
| 11 12 | 
             
                #
         | 
| 12 13 | 
             
                # @example
         | 
| 13 14 | 
             
                #   estimator =
         | 
| 14 | 
            -
                #     SVMKit::LinearModel::Ridge.new(reg_param: 0.1, max_iter:  | 
| 15 | 
            +
                #     SVMKit::LinearModel::Ridge.new(reg_param: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
         | 
| 15 16 | 
             
                #   estimator.fit(training_samples, traininig_values)
         | 
| 16 17 | 
             
                #   results = estimator.predict(testing_samples)
         | 
| 17 18 | 
             
                #
         | 
| 18 | 
            -
                # *Reference*
         | 
| 19 | 
            -
                # - S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
         | 
| 20 | 
            -
                # - I. Sutskever, J. Martens, G. Dahl, and G. Hinton, "On the importance of initialization and momentum in deep learning," Proc. ICML'13, pp. 1139--1147, 2013.
         | 
| 21 | 
            -
                # - G. Hinton, N. Srivastava, and K. Swersky, "Lecture 6e rmsprop," Neural Networks for Machine Learning, 2012.
         | 
| 22 19 | 
             
                class Ridge
         | 
| 23 20 | 
             
                  include Base::BaseEstimator
         | 
| 24 21 | 
             
                  include Base::Regressor
         | 
| @@ -40,30 +37,21 @@ module SVMKit | |
| 40 37 | 
             
                  #
         | 
| 41 38 | 
             
                  # @param reg_param [Float] The regularization parameter.
         | 
| 42 39 | 
             
                  # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
         | 
| 43 | 
            -
                  # @param learning_rate [Float] The learning rate for optimization.
         | 
| 44 | 
            -
                  # @param decay [Float] The discounting factor for RMS prop optimization.
         | 
| 45 | 
            -
                  # @param momentum [Float] The Nesterov momentum for optimization.
         | 
| 46 40 | 
             
                  # @param max_iter [Integer] The maximum number of iterations.
         | 
| 47 41 | 
             
                  # @param batch_size [Integer] The size of the mini batches.
         | 
| 48 42 | 
             
                  # @param random_seed [Integer] The seed value using to initialize the random generator.
         | 
| 49 | 
            -
                  def initialize(reg_param: 1.0, fit_bias: false,  | 
| 50 | 
            -
             | 
| 51 | 
            -
                    check_params_float(reg_param: reg_param,
         | 
| 52 | 
            -
                                       learning_rate: learning_rate, decay: decay, momentum: momentum)
         | 
| 43 | 
            +
                  def initialize(reg_param: 1.0, fit_bias: false, max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
         | 
| 44 | 
            +
                    check_params_float(reg_param: reg_param)
         | 
| 53 45 | 
             
                    check_params_integer(max_iter: max_iter, batch_size: batch_size)
         | 
| 54 46 | 
             
                    check_params_boolean(fit_bias: fit_bias)
         | 
| 55 47 | 
             
                    check_params_type_or_nil(Integer, random_seed: random_seed)
         | 
| 56 | 
            -
                    check_params_positive(reg_param: reg_param,
         | 
| 57 | 
            -
                                          learning_rate: learning_rate, decay: decay, momentum: momentum,
         | 
| 58 | 
            -
                                          max_iter: max_iter, batch_size: batch_size)
         | 
| 48 | 
            +
                    check_params_positive(reg_param: reg_param, max_iter: max_iter, batch_size: batch_size)
         | 
| 59 49 | 
             
                    @params = {}
         | 
| 60 50 | 
             
                    @params[:reg_param] = reg_param
         | 
| 61 51 | 
             
                    @params[:fit_bias] = fit_bias
         | 
| 62 | 
            -
                    @params[:learning_rate] = learning_rate
         | 
| 63 | 
            -
                    @params[:decay] = decay
         | 
| 64 | 
            -
                    @params[:momentum] = momentum
         | 
| 65 52 | 
             
                    @params[:max_iter] = max_iter
         | 
| 66 53 | 
             
                    @params[:batch_size] = batch_size
         | 
| 54 | 
            +
                    @params[:optimizer] = optimizer
         | 
| 67 55 | 
             
                    @params[:random_seed] = random_seed
         | 
| 68 56 | 
             
                    @params[:random_seed] ||= srand
         | 
| 69 57 | 
             
                    @weight_vec = nil
         | 
| @@ -136,8 +124,7 @@ module SVMKit | |
| 136 124 | 
             
                    n_samples, n_features = samples.shape
         | 
| 137 125 | 
             
                    rand_ids = [*0...n_samples].shuffle(random: @rng)
         | 
| 138 126 | 
             
                    weight_vec = Numo::DFloat.zeros(n_features)
         | 
| 139 | 
            -
                     | 
| 140 | 
            -
                    weight_update = Numo::DFloat.zeros(n_features)
         | 
| 127 | 
            +
                    optimizer = Optimizer::Nadam.new
         | 
| 141 128 | 
             
                    # Start optimization.
         | 
| 142 129 | 
             
                    @params[:max_iter].times do |_t|
         | 
| 143 130 | 
             
                      # Random sampling.
         | 
| @@ -146,12 +133,10 @@ module SVMKit | |
| 146 133 | 
             
                      data = samples[subset_ids, true]
         | 
| 147 134 | 
             
                      values = y[subset_ids]
         | 
| 148 135 | 
             
                      # Calculate gradients for loss function.
         | 
| 149 | 
            -
                      loss_grad = loss_gradient(data, values, weight_vec | 
| 136 | 
            +
                      loss_grad = loss_gradient(data, values, weight_vec)
         | 
| 150 137 | 
             
                      next if loss_grad.ne(0.0).count.zero?
         | 
| 151 138 | 
             
                      # Update weight.
         | 
| 152 | 
            -
                      weight_vec,  | 
| 153 | 
            -
                        update_weight(weight_vec, weight_sqrsum, weight_update,
         | 
| 154 | 
            -
                                      weight_gradient(loss_grad, data, weight_vec - @params[:momentum] * weight_update))
         | 
| 139 | 
            +
                      weight_vec = optimizer.call(weight_vec, weight_gradient(loss_grad, data, weight_vec))
         | 
| 155 140 | 
             
                    end
         | 
| 156 141 | 
             
                    split_weight_vec_bias(weight_vec)
         | 
| 157 142 | 
             
                  end
         | 
| @@ -164,13 +149,6 @@ module SVMKit | |
| 164 149 | 
             
                    (loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param] * weight
         | 
| 165 150 | 
             
                  end
         | 
| 166 151 |  | 
| 167 | 
            -
                  def update_weight(weight, sqrsum, update, gr)
         | 
| 168 | 
            -
                    new_sqrsum = @params[:decay] * sqrsum + (1.0 - @params[:decay]) * gr**2
         | 
| 169 | 
            -
                    new_update = (@params[:learning_rate] / ((new_sqrsum + 1.0e-8)**0.5)) * gr
         | 
| 170 | 
            -
                    new_weight = weight - (new_update + @params[:momentum] * update)
         | 
| 171 | 
            -
                    [new_weight, new_sqrsum, new_update]
         | 
| 172 | 
            -
                  end
         | 
| 173 | 
            -
             | 
| 174 152 | 
             
                  def expand_feature(x)
         | 
| 175 153 | 
             
                    Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1])])
         | 
| 176 154 | 
             
                  end
         | 
| @@ -3,26 +3,28 @@ | |
| 3 3 | 
             
            require 'svmkit/validation'
         | 
| 4 4 | 
             
            require 'svmkit/base/base_estimator'
         | 
| 5 5 | 
             
            require 'svmkit/base/classifier'
         | 
| 6 | 
            +
            require 'svmkit/optimizer/nadam'
         | 
| 6 7 | 
             
            require 'svmkit/probabilistic_output'
         | 
| 7 8 |  | 
| 8 9 | 
             
            module SVMKit
         | 
| 9 10 | 
             
              # This module consists of the classes that implement generalized linear models.
         | 
| 10 11 | 
             
              module LinearModel
         | 
| 11 12 | 
             
                # SVC is a class that implements Support Vector Classifier
         | 
| 12 | 
            -
                # with stochastic gradient descent  | 
| 13 | 
            +
                # with mini-batch stochastic gradient descent optimization.
         | 
| 13 14 | 
             
                # For multiclass classification problem, it uses one-vs-the-rest strategy.
         | 
| 14 15 | 
             
                #
         | 
| 15 16 | 
             
                # @example
         | 
| 16 17 | 
             
                #   estimator =
         | 
| 17 | 
            -
                #     SVMKit::LinearModel::SVC.new(reg_param: 1.0, max_iter:  | 
| 18 | 
            +
                #     SVMKit::LinearModel::SVC.new(reg_param: 1.0, max_iter: 1000, batch_size: 20, random_seed: 1)
         | 
| 18 19 | 
             
                #   estimator.fit(training_samples, traininig_labels)
         | 
| 19 20 | 
             
                #   results = estimator.predict(testing_samples)
         | 
| 20 21 | 
             
                #
         | 
| 21 22 | 
             
                # *Reference*
         | 
| 22 | 
            -
                #  | 
| 23 | 
            +
                # - S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
         | 
| 23 24 | 
             
                class SVC
         | 
| 24 25 | 
             
                  include Base::BaseEstimator
         | 
| 25 26 | 
             
                  include Base::Classifier
         | 
| 27 | 
            +
                  include Validation
         | 
| 26 28 |  | 
| 27 29 | 
             
                  # Return the weight vector for SVC.
         | 
| 28 30 | 
             
                  # @return [Numo::DFloat] (shape: [n_classes, n_features])
         | 
| @@ -48,16 +50,16 @@ module SVMKit | |
| 48 50 | 
             
                  # @param max_iter [Integer] The maximum number of iterations.
         | 
| 49 51 | 
             
                  # @param batch_size [Integer] The size of the mini batches.
         | 
| 50 52 | 
             
                  # @param probability [Boolean] The flag indicating whether to perform probability estimation.
         | 
| 51 | 
            -
                  # @param  | 
| 53 | 
            +
                  # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
         | 
| 54 | 
            +
                  #   Nadam is selected automatically on current version.
         | 
| 52 55 | 
             
                  # @param random_seed [Integer] The seed value using to initialize the random generator.
         | 
| 53 56 | 
             
                  def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
         | 
| 54 | 
            -
                                 max_iter:  | 
| 55 | 
            -
                     | 
| 56 | 
            -
                     | 
| 57 | 
            -
                     | 
| 58 | 
            -
                     | 
| 59 | 
            -
                     | 
| 60 | 
            -
                                                             batch_size: batch_size)
         | 
| 57 | 
            +
                                 max_iter: 1000, batch_size: 20, probability: false, optimizer: nil, random_seed: nil)
         | 
| 58 | 
            +
                    check_params_float(reg_param: reg_param, bias_scale: bias_scale)
         | 
| 59 | 
            +
                    check_params_integer(max_iter: max_iter, batch_size: batch_size)
         | 
| 60 | 
            +
                    check_params_boolean(fit_bias: fit_bias, probability: probability)
         | 
| 61 | 
            +
                    check_params_type_or_nil(Integer, random_seed: random_seed)
         | 
| 62 | 
            +
                    check_params_positive(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
         | 
| 61 63 | 
             
                    @params = {}
         | 
| 62 64 | 
             
                    @params[:reg_param] = reg_param
         | 
| 63 65 | 
             
                    @params[:fit_bias] = fit_bias
         | 
| @@ -65,7 +67,7 @@ module SVMKit | |
| 65 67 | 
             
                    @params[:max_iter] = max_iter
         | 
| 66 68 | 
             
                    @params[:batch_size] = batch_size
         | 
| 67 69 | 
             
                    @params[:probability] = probability
         | 
| 68 | 
            -
                    @params[: | 
| 70 | 
            +
                    @params[:optimizer] = optimizer
         | 
| 69 71 | 
             
                    @params[:random_seed] = random_seed
         | 
| 70 72 | 
             
                    @params[:random_seed] ||= srand
         | 
| 71 73 | 
             
                    @weight_vec = nil
         | 
| @@ -81,9 +83,9 @@ module SVMKit | |
| 81 83 | 
             
                  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
         | 
| 82 84 | 
             
                  # @return [SVC] The learned classifier itself.
         | 
| 83 85 | 
             
                  def fit(x, y)
         | 
| 84 | 
            -
                     | 
| 85 | 
            -
                     | 
| 86 | 
            -
                     | 
| 86 | 
            +
                    check_sample_array(x)
         | 
| 87 | 
            +
                    check_label_array(y)
         | 
| 88 | 
            +
                    check_sample_label_size(x, y)
         | 
| 87 89 |  | 
| 88 90 | 
             
                    @classes = Numo::Int32[*y.to_a.uniq.sort]
         | 
| 89 91 | 
             
                    n_classes = @classes.size
         | 
| @@ -123,8 +125,7 @@ module SVMKit | |
| 123 125 | 
             
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
         | 
| 124 126 | 
             
                  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
         | 
| 125 127 | 
             
                  def decision_function(x)
         | 
| 126 | 
            -
                     | 
| 127 | 
            -
             | 
| 128 | 
            +
                    check_sample_array(x)
         | 
| 128 129 | 
             
                    x.dot(@weight_vec.transpose) + @bias_term
         | 
| 129 130 | 
             
                  end
         | 
| 130 131 |  | 
| @@ -133,7 +134,7 @@ module SVMKit | |
| 133 134 | 
             
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
         | 
| 134 135 | 
             
                  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
         | 
| 135 136 | 
             
                  def predict(x)
         | 
| 136 | 
            -
                     | 
| 137 | 
            +
                    check_sample_array(x)
         | 
| 137 138 |  | 
| 138 139 | 
             
                    return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
         | 
| 139 140 |  | 
| @@ -147,7 +148,7 @@ module SVMKit | |
| 147 148 | 
             
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
         | 
| 148 149 | 
             
                  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
         | 
| 149 150 | 
             
                  def predict_proba(x)
         | 
| 150 | 
            -
                     | 
| 151 | 
            +
                    check_sample_array(x)
         | 
| 151 152 |  | 
| 152 153 | 
             
                    if @classes.size > 2
         | 
| 153 154 | 
             
                      probs = 1.0 / (Numo::NMath.exp(@prob_param[true, 0] * decision_function(x) + @prob_param[true, 1]) + 1.0)
         | 
| @@ -186,43 +187,43 @@ module SVMKit | |
| 186 187 |  | 
| 187 188 | 
             
                  private
         | 
| 188 189 |  | 
| 189 | 
            -
                  def binary_fit(x,  | 
| 190 | 
            +
                  def binary_fit(x, y)
         | 
| 190 191 | 
             
                    # Expand feature vectors for bias term.
         | 
| 191 192 | 
             
                    samples = @params[:fit_bias] ? expand_feature(x) : x
         | 
| 192 193 | 
             
                    # Initialize some variables.
         | 
| 193 194 | 
             
                    n_samples, n_features = samples.shape
         | 
| 194 195 | 
             
                    rand_ids = [*0...n_samples].shuffle(random: @rng)
         | 
| 195 196 | 
             
                    weight_vec = Numo::DFloat.zeros(n_features)
         | 
| 197 | 
            +
                    optimizer = Optimizer::Nadam.new
         | 
| 196 198 | 
             
                    # Start optimization.
         | 
| 197 | 
            -
                    @params[:max_iter].times do | | 
| 198 | 
            -
                      # random sampling
         | 
| 199 | 
            +
                    @params[:max_iter].times do |_t|
         | 
| 200 | 
            +
                      # random sampling.
         | 
| 199 201 | 
             
                      subset_ids = rand_ids.shift(@params[:batch_size])
         | 
| 200 202 | 
             
                      rand_ids.concat(subset_ids)
         | 
| 201 | 
            -
                       | 
| 202 | 
            -
                       | 
| 203 | 
            -
                       | 
| 204 | 
            -
                       | 
| 205 | 
            -
                      next if  | 
| 206 | 
            -
                      # update  | 
| 207 | 
            -
                       | 
| 208 | 
            -
                      weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec - mean_vec)
         | 
| 209 | 
            -
                      # scale the weight vector.
         | 
| 210 | 
            -
                      normalize_weight_vec(weight_vec) if @params[:normalize]
         | 
| 203 | 
            +
                      data = samples[subset_ids, true]
         | 
| 204 | 
            +
                      labels = y[subset_ids]
         | 
| 205 | 
            +
                      # calculate gradient for loss function.
         | 
| 206 | 
            +
                      loss_grad = loss_gradient(data, labels, weight_vec)
         | 
| 207 | 
            +
                      next if loss_grad.ne(0.0).count.zero?
         | 
| 208 | 
            +
                      # update weight.
         | 
| 209 | 
            +
                      weight_vec = optimizer.call(weight_vec, weight_gradient(loss_grad, data, weight_vec))
         | 
| 211 210 | 
             
                    end
         | 
| 212 211 | 
             
                    split_weight_vec_bias(weight_vec)
         | 
| 213 212 | 
             
                  end
         | 
| 214 213 |  | 
| 215 | 
            -
                  def  | 
| 216 | 
            -
                     | 
| 214 | 
            +
                  def loss_gradient(x, y, weight)
         | 
| 215 | 
            +
                    target_ids = (x.dot(weight) * y).lt(1.0).where
         | 
| 216 | 
            +
                    grad = Numo::DFloat.zeros(@params[:batch_size])
         | 
| 217 | 
            +
                    grad[target_ids] = -y[target_ids]
         | 
| 218 | 
            +
                    grad
         | 
| 217 219 | 
             
                  end
         | 
| 218 220 |  | 
| 219 | 
            -
                  def  | 
| 220 | 
            -
                     | 
| 221 | 
            +
                  def weight_gradient(loss_grad, x, weight)
         | 
| 222 | 
            +
                    x.transpose.dot(loss_grad) / @params[:batch_size] + @params[:reg_param] * weight
         | 
| 221 223 | 
             
                  end
         | 
| 222 224 |  | 
| 223 | 
            -
                  def  | 
| 224 | 
            -
                     | 
| 225 | 
            -
                    weight_vec * [1.0, (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)].min
         | 
| 225 | 
            +
                  def expand_feature(x)
         | 
| 226 | 
            +
                    Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
         | 
| 226 227 | 
             
                  end
         | 
| 227 228 |  | 
| 228 229 | 
             
                  def split_weight_vec_bias(weight_vec)
         | 
| @@ -3,15 +3,16 @@ | |
| 3 3 | 
             
            require 'svmkit/validation'
         | 
| 4 4 | 
             
            require 'svmkit/base/base_estimator'
         | 
| 5 5 | 
             
            require 'svmkit/base/regressor'
         | 
| 6 | 
            +
            require 'svmkit/optimizer/nadam'
         | 
| 6 7 |  | 
| 7 8 | 
             
            module SVMKit
         | 
| 8 9 | 
             
              module LinearModel
         | 
| 9 10 | 
             
                # SVR is a class that implements Support Vector Regressor
         | 
| 10 | 
            -
                # with stochastic gradient descent  | 
| 11 | 
            +
                # with mini-batch stochastic gradient descent optimization.
         | 
| 11 12 | 
             
                #
         | 
| 12 13 | 
             
                # @example
         | 
| 13 14 | 
             
                #   estimator =
         | 
| 14 | 
            -
                #     SVMKit::LinearModel::SVR.new(reg_param: 1.0, epsilon: 0.1, max_iter:  | 
| 15 | 
            +
                #     SVMKit::LinearModel::SVR.new(reg_param: 1.0, epsilon: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
         | 
| 15 16 | 
             
                #   estimator.fit(training_samples, traininig_target_values)
         | 
| 16 17 | 
             
                #   results = estimator.predict(testing_samples)
         | 
| 17 18 | 
             
                #
         | 
| @@ -20,6 +21,7 @@ module SVMKit | |
| 20 21 | 
             
                class SVR
         | 
| 21 22 | 
             
                  include Base::BaseEstimator
         | 
| 22 23 | 
             
                  include Base::Regressor
         | 
| 24 | 
            +
                  include Validation
         | 
| 23 25 |  | 
| 24 26 | 
             
                  # Return the weight vector for SVR.
         | 
| 25 27 | 
             
                  # @return [Numo::DFloat] (shape: [n_outputs, n_features])
         | 
| @@ -41,16 +43,17 @@ module SVMKit | |
| 41 43 | 
             
                  # @param epsilon [Float] The margin of tolerance.
         | 
| 42 44 | 
             
                  # @param max_iter [Integer] The maximum number of iterations.
         | 
| 43 45 | 
             
                  # @param batch_size [Integer] The size of the mini batches.
         | 
| 44 | 
            -
                  # @param  | 
| 46 | 
            +
                  # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
         | 
| 47 | 
            +
                  #   Nadam is selected automatically on current version.
         | 
| 45 48 | 
             
                  # @param random_seed [Integer] The seed value using to initialize the random generator.
         | 
| 46 49 | 
             
                  def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, epsilon: 0.1,
         | 
| 47 | 
            -
                                 max_iter:  | 
| 48 | 
            -
                     | 
| 49 | 
            -
                     | 
| 50 | 
            -
                     | 
| 51 | 
            -
                     | 
| 52 | 
            -
                     | 
| 53 | 
            -
             | 
| 50 | 
            +
                                 max_iter: 1000, batch_size: 20, optimizer: nil, random_seed: nil)
         | 
| 51 | 
            +
                    check_params_float(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon)
         | 
| 52 | 
            +
                    check_params_integer(max_iter: max_iter, batch_size: batch_size)
         | 
| 53 | 
            +
                    check_params_boolean(fit_bias: fit_bias)
         | 
| 54 | 
            +
                    check_params_type_or_nil(Integer, random_seed: random_seed)
         | 
| 55 | 
            +
                    check_params_positive(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon,
         | 
| 56 | 
            +
                                          max_iter: max_iter, batch_size: batch_size)
         | 
| 54 57 | 
             
                    @params = {}
         | 
| 55 58 | 
             
                    @params[:reg_param] = reg_param
         | 
| 56 59 | 
             
                    @params[:fit_bias] = fit_bias
         | 
| @@ -58,7 +61,7 @@ module SVMKit | |
| 58 61 | 
             
                    @params[:epsilon] = epsilon
         | 
| 59 62 | 
             
                    @params[:max_iter] = max_iter
         | 
| 60 63 | 
             
                    @params[:batch_size] = batch_size
         | 
| 61 | 
            -
                    @params[: | 
| 64 | 
            +
                    @params[:optimizer] = optimizer
         | 
| 62 65 | 
             
                    @params[:random_seed] = random_seed
         | 
| 63 66 | 
             
                    @params[:random_seed] ||= srand
         | 
| 64 67 | 
             
                    @weight_vec = nil
         | 
| @@ -72,9 +75,9 @@ module SVMKit | |
| 72 75 | 
             
                  # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
         | 
| 73 76 | 
             
                  # @return [SVR] The learned regressor itself.
         | 
| 74 77 | 
             
                  def fit(x, y)
         | 
| 75 | 
            -
                     | 
| 76 | 
            -
                     | 
| 77 | 
            -
                     | 
| 78 | 
            +
                    check_sample_array(x)
         | 
| 79 | 
            +
                    check_tvalue_array(y)
         | 
| 80 | 
            +
                    check_sample_tvalue_size(x, y)
         | 
| 78 81 |  | 
| 79 82 | 
             
                    n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
         | 
| 80 83 | 
             
                    _n_samples, n_features = x.shape
         | 
| @@ -99,7 +102,7 @@ module SVMKit | |
| 99 102 | 
             
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
         | 
| 100 103 | 
             
                  # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
         | 
| 101 104 | 
             
                  def predict(x)
         | 
| 102 | 
            -
                     | 
| 105 | 
            +
                    check_sample_array(x)
         | 
| 103 106 | 
             
                    x.dot(@weight_vec.transpose) + @bias_term
         | 
| 104 107 | 
             
                  end
         | 
| 105 108 |  | 
| @@ -131,35 +134,35 @@ module SVMKit | |
| 131 134 | 
             
                    n_samples, n_features = samples.shape
         | 
| 132 135 | 
             
                    rand_ids = [*0...n_samples].shuffle(random: @rng)
         | 
| 133 136 | 
             
                    weight_vec = Numo::DFloat.zeros(n_features)
         | 
| 137 | 
            +
                    optimizer = Optimizer::Nadam.new
         | 
| 134 138 | 
             
                    # Start optimization.
         | 
| 135 | 
            -
                    @params[:max_iter].times do | | 
| 139 | 
            +
                    @params[:max_iter].times do |_t|
         | 
| 136 140 | 
             
                      # random sampling
         | 
| 137 141 | 
             
                      subset_ids = rand_ids.shift(@params[:batch_size])
         | 
| 138 142 | 
             
                      rand_ids.concat(subset_ids)
         | 
| 143 | 
            +
                      data = samples[subset_ids, true]
         | 
| 144 | 
            +
                      values = y[subset_ids]
         | 
| 139 145 | 
             
                      # update the weight vector.
         | 
| 140 | 
            -
                       | 
| 141 | 
            -
                       | 
| 142 | 
            -
                      coef[(z - y[subset_ids]).gt(@params[:epsilon]).where] = 1
         | 
| 143 | 
            -
                      coef[(y[subset_ids] - z).gt(@params[:epsilon]).where] = -1
         | 
| 144 | 
            -
                      mean_vec = samples[subset_ids, true].transpose.dot(coef) / @params[:batch_size]
         | 
| 145 | 
            -
                      weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec + mean_vec)
         | 
| 146 | 
            -
                      # scale the weight vector.
         | 
| 147 | 
            -
                      normalize_weight_vec(weight_vec) if @params[:normalize]
         | 
| 146 | 
            +
                      loss_grad = loss_gradient(data, values, weight_vec)
         | 
| 147 | 
            +
                      weight_vec = optimizer.call(weight_vec, weight_gradient(loss_grad, data, weight_vec))
         | 
| 148 148 | 
             
                    end
         | 
| 149 149 | 
             
                    split_weight_vec_bias(weight_vec)
         | 
| 150 150 | 
             
                  end
         | 
| 151 151 |  | 
| 152 | 
            -
                  def  | 
| 153 | 
            -
                     | 
| 152 | 
            +
                  def loss_gradient(x, y, weight)
         | 
| 153 | 
            +
                    z = x.dot(weight)
         | 
| 154 | 
            +
                    grad = Numo::DFloat.zeros(@params[:batch_size])
         | 
| 155 | 
            +
                    grad[(z - y).gt(@params[:epsilon]).where] = 1
         | 
| 156 | 
            +
                    grad[(y - z).gt(@params[:epsilon]).where] = -1
         | 
| 157 | 
            +
                    grad
         | 
| 154 158 | 
             
                  end
         | 
| 155 159 |  | 
| 156 | 
            -
                  def  | 
| 157 | 
            -
                     | 
| 160 | 
            +
                  def weight_gradient(loss_grad, x, weight)
         | 
| 161 | 
            +
                    x.transpose.dot(loss_grad) / @params[:batch_size] + @params[:reg_param] * weight
         | 
| 158 162 | 
             
                  end
         | 
| 159 163 |  | 
| 160 | 
            -
                  def  | 
| 161 | 
            -
                     | 
| 162 | 
            -
                    weight_vec * [1.0, (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)].min
         | 
| 164 | 
            +
                  def expand_feature(x)
         | 
| 165 | 
            +
                    Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
         | 
| 163 166 | 
             
                  end
         | 
| 164 167 |  | 
| 165 168 | 
             
                  def split_weight_vec_bias(weight_vec)
         | 
| @@ -0,0 +1,64 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'svmkit/validation'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            module SVMKit
         | 
| 6 | 
            +
              # This module consists of the classes that implement optimizers adaptively tuning hyperparameters.
         | 
| 7 | 
            +
              module Optimizer
         | 
| 8 | 
            +
                # Nadam is a class that implements Nadam optimizer.
         | 
| 9 | 
            +
                # This class is used for internal processes.
         | 
| 10 | 
            +
                #
         | 
| 11 | 
            +
                # *Reference*
         | 
| 12 | 
            +
                # - T. Dozat, "Incorporating Nesterov Momentum into Adam," Tech. Repo. Stanford University, 2015.
         | 
| 13 | 
            +
                class Nadam
         | 
| 14 | 
            +
                  include Validation
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                  # Create a new optimizer with Nadam
         | 
| 17 | 
            +
                  #
         | 
| 18 | 
            +
                  # @param learning_rate [Float] The initial value of learning rate.
         | 
| 19 | 
            +
                  # @param momentum [Float] The initial value of momentum.
         | 
| 20 | 
            +
                  # @param decay1 [Float] The smoothing parameter for the first moment.
         | 
| 21 | 
            +
                  # @param decay2 [Float] The smoothing parameter for the second moment.
         | 
| 22 | 
            +
                  # @param schedule_decay [Float] The smooting parameter.
         | 
| 23 | 
            +
                  def initialize(learning_rate: 0.01, momentum: 0.9, decay1: 0.9, decay2: 0.999)
         | 
| 24 | 
            +
                    check_params_float(learning_rate: learning_rate, momentum: momentum, decay1: decay1, decay2: decay2)
         | 
| 25 | 
            +
                    check_params_positive(learning_rate: learning_rate, momentum: momentum, decay1: decay1, decay2: decay2)
         | 
| 26 | 
            +
                    @params = {}
         | 
| 27 | 
            +
                    @params[:learning_rate] = learning_rate
         | 
| 28 | 
            +
                    @params[:momentum] = momentum
         | 
| 29 | 
            +
                    @params[:decay1] = decay1
         | 
| 30 | 
            +
                    @params[:decay2] = decay2
         | 
| 31 | 
            +
                    @fst_moment = nil
         | 
| 32 | 
            +
                    @sec_moment = nil
         | 
| 33 | 
            +
                    @decay1_prod = 1.0
         | 
| 34 | 
            +
                    @iter = 0
         | 
| 35 | 
            +
                  end
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                  # Calculate the updated weight with Nadam adaptive learning rate.
         | 
| 38 | 
            +
                  #
         | 
| 39 | 
            +
                  # @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
         | 
| 40 | 
            +
                  # @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
         | 
| 41 | 
            +
                  # @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
         | 
| 42 | 
            +
                  def call(weight, gradient)
         | 
| 43 | 
            +
                    @fst_moment ||= Numo::DFloat.zeros(weight.shape[0])
         | 
| 44 | 
            +
                    @sec_moment ||= Numo::DFloat.zeros(weight.shape[0])
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                    @iter += 1
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                    decay1_curr = @params[:decay1] * (1.0 - 0.5 * 0.96**(@iter * 0.004))
         | 
| 49 | 
            +
                    decay1_next = @params[:decay1] * (1.0 - 0.5 * 0.96**((@iter + 1) * 0.004))
         | 
| 50 | 
            +
                    decay1_prod_curr = @decay1_prod * decay1_curr
         | 
| 51 | 
            +
                    decay1_prod_next = @decay1_prod * decay1_curr * decay1_next
         | 
| 52 | 
            +
                    @decay1_prod = decay1_prod_curr
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                    @fst_moment = @params[:decay1] * @fst_moment + (1.0 - @params[:decay1]) * gradient
         | 
| 55 | 
            +
                    @sec_moment = @params[:decay2] * @sec_moment + (1.0 - @params[:decay2]) * gradient**2
         | 
| 56 | 
            +
                    nm_gradient = gradient / (1.0 - decay1_prod_curr)
         | 
| 57 | 
            +
                    nm_fst_moment = @fst_moment / (1.0 - decay1_prod_next)
         | 
| 58 | 
            +
                    nm_sec_moment = @sec_moment / (1.0 - @params[:decay2]**@iter)
         | 
| 59 | 
            +
             | 
| 60 | 
            +
                    weight - (@params[:learning_rate] / (nm_sec_moment**0.5 + 1e-8)) * ((1 - decay1_curr) * nm_gradient + decay1_next * nm_fst_moment)
         | 
| 61 | 
            +
                  end
         | 
| 62 | 
            +
                end
         | 
| 63 | 
            +
              end
         | 
| 64 | 
            +
            end
         | 
| @@ -3,6 +3,7 @@ | |
| 3 3 | 
             
            require 'svmkit/validation'
         | 
| 4 4 | 
             
            require 'svmkit/base/base_estimator'
         | 
| 5 5 | 
             
            require 'svmkit/base/classifier'
         | 
| 6 | 
            +
            require 'svmkit/optimizer/nadam'
         | 
| 6 7 |  | 
| 7 8 | 
             
            module SVMKit
         | 
| 8 9 | 
             
              # This module consists of the classes that implement polynomial models.
         | 
| @@ -14,7 +15,7 @@ module SVMKit | |
| 14 15 | 
             
                # @example
         | 
| 15 16 | 
             
                #   estimator =
         | 
| 16 17 | 
             
                #     SVMKit::PolynomialModel::FactorizationMachineClassifier.new(
         | 
| 17 | 
            -
                #      n_factors: 10, loss: 'hinge',  | 
| 18 | 
            +
                #      n_factors: 10, loss: 'hinge', reg_param_linear: 0.001, reg_param_factor: 0.001,
         | 
| 18 19 | 
             
                #      max_iter: 5000, batch_size: 50, random_seed: 1)
         | 
| 19 20 | 
             
                #   estimator.fit(training_samples, traininig_labels)
         | 
| 20 21 | 
             
                #   results = estimator.predict(testing_samples)
         | 
| @@ -25,6 +26,7 @@ module SVMKit | |
| 25 26 | 
             
                class FactorizationMachineClassifier
         | 
| 26 27 | 
             
                  include Base::BaseEstimator
         | 
| 27 28 | 
             
                  include Base::Classifier
         | 
| 29 | 
            +
                  include Validation
         | 
| 28 30 |  | 
| 29 31 | 
             
                  # Return the factor matrix for Factorization Machine.
         | 
| 30 32 | 
             
                  # @return [Numo::DFloat] (shape: [n_classes, n_factors, n_features])
         | 
| @@ -50,32 +52,30 @@ module SVMKit | |
| 50 52 | 
             
                  #
         | 
| 51 53 | 
             
                  # @param n_factors [Integer] The maximum number of iterations.
         | 
| 52 54 | 
             
                  # @param loss [String] The loss function ('hinge' or 'logistic').
         | 
| 53 | 
            -
                  # @param  | 
| 54 | 
            -
                  # @param reg_param_weight [Float] The regularization parameter for weight vector.
         | 
| 55 | 
            +
                  # @param reg_param_linear [Float] The regularization parameter for linear model.
         | 
| 55 56 | 
             
                  # @param reg_param_factor [Float] The regularization parameter for factor matrix.
         | 
| 56 | 
            -
                  # @param init_std [Float] The standard deviation of normal random number for initialization of factor matrix.
         | 
| 57 57 | 
             
                  # @param max_iter [Integer] The maximum number of iterations.
         | 
| 58 58 | 
             
                  # @param batch_size [Integer] The size of the mini batches.
         | 
| 59 | 
            +
                  # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
         | 
| 60 | 
            +
                  #   Nadam is selected automatically on current version.
         | 
| 59 61 | 
             
                  # @param random_seed [Integer] The seed value using to initialize the random generator.
         | 
| 60 | 
            -
                  def initialize(n_factors: 2, loss: 'hinge',  | 
| 61 | 
            -
                                  | 
| 62 | 
            -
                     | 
| 63 | 
            -
             | 
| 64 | 
            -
                     | 
| 65 | 
            -
                     | 
| 66 | 
            -
                     | 
| 67 | 
            -
             | 
| 68 | 
            -
             | 
| 69 | 
            -
                                                             max_iter: max_iter, batch_size: batch_size)
         | 
| 62 | 
            +
                  def initialize(n_factors: 2, loss: 'hinge', reg_param_linear: 1.0, reg_param_factor: 1.0,
         | 
| 63 | 
            +
                                 max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
         | 
| 64 | 
            +
                    check_params_float(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor)
         | 
| 65 | 
            +
                    check_params_integer(n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
         | 
| 66 | 
            +
                    check_params_string(loss: loss)
         | 
| 67 | 
            +
                    check_params_type_or_nil(Integer, random_seed: random_seed)
         | 
| 68 | 
            +
                    check_params_positive(n_factors: n_factors,
         | 
| 69 | 
            +
                                          reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
         | 
| 70 | 
            +
                                          max_iter: max_iter, batch_size: batch_size)
         | 
| 70 71 | 
             
                    @params = {}
         | 
| 71 72 | 
             
                    @params[:n_factors] = n_factors
         | 
| 72 73 | 
             
                    @params[:loss] = loss
         | 
| 73 | 
            -
                    @params[: | 
| 74 | 
            -
                    @params[:reg_param_weight] = reg_param_weight
         | 
| 74 | 
            +
                    @params[:reg_param_linear] = reg_param_linear
         | 
| 75 75 | 
             
                    @params[:reg_param_factor] = reg_param_factor
         | 
| 76 | 
            -
                    @params[:init_std] = init_std
         | 
| 77 76 | 
             
                    @params[:max_iter] = max_iter
         | 
| 78 77 | 
             
                    @params[:batch_size] = batch_size
         | 
| 78 | 
            +
                    @params[:optimizer] = optimizer
         | 
| 79 79 | 
             
                    @params[:random_seed] = random_seed
         | 
| 80 80 | 
             
                    @params[:random_seed] ||= srand
         | 
| 81 81 | 
             
                    @factor_mat = nil
         | 
| @@ -91,9 +91,9 @@ module SVMKit | |
| 91 91 | 
             
                  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
         | 
| 92 92 | 
             
                  # @return [FactorizationMachineClassifier] The learned classifier itself.
         | 
| 93 93 | 
             
                  def fit(x, y)
         | 
| 94 | 
            -
                     | 
| 95 | 
            -
                     | 
| 96 | 
            -
                     | 
| 94 | 
            +
                    check_sample_array(x)
         | 
| 95 | 
            +
                    check_label_array(y)
         | 
| 96 | 
            +
                    check_sample_label_size(x, y)
         | 
| 97 97 |  | 
| 98 98 | 
             
                    @classes = Numo::Int32[*y.to_a.uniq.sort]
         | 
| 99 99 | 
             
                    n_classes = @classes.size
         | 
| @@ -124,7 +124,7 @@ module SVMKit | |
| 124 124 | 
             
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
         | 
| 125 125 | 
             
                  # @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
         | 
| 126 126 | 
             
                  def decision_function(x)
         | 
| 127 | 
            -
                     | 
| 127 | 
            +
                    check_sample_array(x)
         | 
| 128 128 | 
             
                    linear_term = @bias_term + x.dot(@weight_vec.transpose)
         | 
| 129 129 | 
             
                    factor_term = if @classes.size <= 2
         | 
| 130 130 | 
             
                                    0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(0)
         | 
| @@ -139,7 +139,7 @@ module SVMKit | |
| 139 139 | 
             
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
         | 
| 140 140 | 
             
                  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
         | 
| 141 141 | 
             
                  def predict(x)
         | 
| 142 | 
            -
                     | 
| 142 | 
            +
                    check_sample_array(x)
         | 
| 143 143 | 
             
                    return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
         | 
| 144 144 |  | 
| 145 145 | 
             
                    n_samples, = x.shape
         | 
| @@ -152,7 +152,7 @@ module SVMKit | |
| 152 152 | 
             
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
         | 
| 153 153 | 
             
                  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
         | 
| 154 154 | 
             
                  def predict_proba(x)
         | 
| 155 | 
            -
                     | 
| 155 | 
            +
                    check_sample_array(x)
         | 
| 156 156 | 
             
                    proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
         | 
| 157 157 | 
             
                    return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
         | 
| 158 158 |  | 
| @@ -188,84 +188,76 @@ module SVMKit | |
| 188 188 |  | 
| 189 189 | 
             
                  private
         | 
| 190 190 |  | 
| 191 | 
            -
                  def binary_fit(x,  | 
| 191 | 
            +
                  def binary_fit(x, y)
         | 
| 192 192 | 
             
                    # Initialize some variables.
         | 
| 193 193 | 
             
                    n_samples, n_features = x.shape
         | 
| 194 194 | 
             
                    rand_ids = [*0...n_samples].shuffle(random: @rng)
         | 
| 195 | 
            -
                     | 
| 196 | 
            -
                     | 
| 197 | 
            -
                     | 
| 195 | 
            +
                    weight_vec = Numo::DFloat.zeros(n_features + 1)
         | 
| 196 | 
            +
                    factor_mat = Numo::DFloat.zeros(@params[:n_factors], n_features)
         | 
| 197 | 
            +
                    weight_optimizer = Optimizer::Nadam.new
         | 
| 198 | 
            +
                    factor_optimizers = Array.new(@params[:n_factors]) { Optimizer::Nadam.new }
         | 
| 198 199 | 
             
                    # Start optimization.
         | 
| 199 | 
            -
                    @params[:max_iter].times do | | 
| 200 | 
            +
                    @params[:max_iter].times do |_t|
         | 
| 200 201 | 
             
                      # Random sampling.
         | 
| 201 202 | 
             
                      subset_ids = rand_ids.shift(@params[:batch_size])
         | 
| 202 203 | 
             
                      rand_ids.concat(subset_ids)
         | 
| 203 204 | 
             
                      data = x[subset_ids, true]
         | 
| 204 | 
            -
                       | 
| 205 | 
            +
                      ex_data = expand_feature(data)
         | 
| 206 | 
            +
                      label = y[subset_ids]
         | 
| 205 207 | 
             
                      # Calculate gradients for loss function.
         | 
| 206 | 
            -
                      loss_grad = loss_gradient(data, label, factor_mat, weight_vec | 
| 208 | 
            +
                      loss_grad = loss_gradient(data, ex_data, label, factor_mat, weight_vec)
         | 
| 207 209 | 
             
                      next if loss_grad.ne(0.0).count.zero?
         | 
| 208 210 | 
             
                      # Update each parameter.
         | 
| 209 | 
            -
                       | 
| 210 | 
            -
                      weight_vec -= learning_rate(@params[:reg_param_weight], t) * weight_gradient(loss_grad, data, weight_vec)
         | 
| 211 | 
            +
                      weight_vec = weight_optimizer.call(weight_vec, weight_gradient(loss_grad, ex_data, weight_vec))
         | 
| 211 212 | 
             
                      @params[:n_factors].times do |n|
         | 
| 212 | 
            -
                        factor_mat[n, true]  | 
| 213 | 
            -
             | 
| 213 | 
            +
                        factor_mat[n, true] = factor_optimizers[n].call(factor_mat[n, true],
         | 
| 214 | 
            +
                                                                        factor_gradient(loss_grad, data, factor_mat[n, true]))
         | 
| 214 215 | 
             
                      end
         | 
| 215 216 | 
             
                    end
         | 
| 216 | 
            -
                    [factor_mat, weight_vec | 
| 217 | 
            +
                    [factor_mat, *split_weight_vec_bias(weight_vec)]
         | 
| 217 218 | 
             
                  end
         | 
| 218 219 |  | 
| 219 | 
            -
                  def bin_decision_function(x, factor, weight | 
| 220 | 
            -
                     | 
| 220 | 
            +
                  def bin_decision_function(x, ex_x, factor, weight)
         | 
| 221 | 
            +
                    ex_x.dot(weight) + 0.5 * (factor.dot(x.transpose)**2 - (factor**2).dot(x.transpose**2)).sum(0)
         | 
| 221 222 | 
             
                  end
         | 
| 222 223 |  | 
| 223 | 
            -
                  def hinge_loss_gradient(x, y, factor, weight | 
| 224 | 
            -
                    evaluated = y * bin_decision_function(x, factor, weight | 
| 224 | 
            +
                  def hinge_loss_gradient(x, ex_x, y, factor, weight)
         | 
| 225 | 
            +
                    evaluated = y * bin_decision_function(x, ex_x, factor, weight)
         | 
| 225 226 | 
             
                    gradient = Numo::DFloat.zeros(evaluated.size)
         | 
| 226 227 | 
             
                    gradient[evaluated < 1.0] = -y[evaluated < 1.0]
         | 
| 227 228 | 
             
                    gradient
         | 
| 228 229 | 
             
                  end
         | 
| 229 230 |  | 
| 230 | 
            -
                  def logistic_loss_gradient(x, y, factor, weight | 
| 231 | 
            -
                    evaluated = y * bin_decision_function(x, factor, weight | 
| 231 | 
            +
                  def logistic_loss_gradient(x, ex_x, y, factor, weight)
         | 
| 232 | 
            +
                    evaluated = y * bin_decision_function(x, ex_x, factor, weight)
         | 
| 232 233 | 
             
                    sigmoid_func = 1.0 / (Numo::NMath.exp(-evaluated) + 1.0)
         | 
| 233 234 | 
             
                    (sigmoid_func - 1.0) * y
         | 
| 234 235 | 
             
                  end
         | 
| 235 236 |  | 
| 236 | 
            -
                  def loss_gradient(x, y, factor, weight | 
| 237 | 
            +
                  def loss_gradient(x, ex_x, y, factor, weight)
         | 
| 237 238 | 
             
                    if @params[:loss] == 'hinge'
         | 
| 238 | 
            -
                      hinge_loss_gradient(x, y, factor, weight | 
| 239 | 
            +
                      hinge_loss_gradient(x, ex_x, y, factor, weight)
         | 
| 239 240 | 
             
                    else
         | 
| 240 | 
            -
                      logistic_loss_gradient(x, y, factor, weight | 
| 241 | 
            +
                      logistic_loss_gradient(x, ex_x, y, factor, weight)
         | 
| 241 242 | 
             
                    end
         | 
| 242 243 | 
             
                  end
         | 
| 243 244 |  | 
| 244 | 
            -
                  def learning_rate(reg_param, iter)
         | 
| 245 | 
            -
                    1.0 / (reg_param * (iter + 1))
         | 
| 246 | 
            -
                  end
         | 
| 247 | 
            -
             | 
| 248 | 
            -
                  def bias_gradient(loss_grad, bias)
         | 
| 249 | 
            -
                    loss_grad.mean + @params[:reg_param_bias] * bias
         | 
| 250 | 
            -
                  end
         | 
| 251 | 
            -
             | 
| 252 245 | 
             
                  def weight_gradient(loss_grad, data, weight)
         | 
| 253 | 
            -
                    (loss_grad.expand_dims(1) * data).mean(0) + @params[: | 
| 246 | 
            +
                    (loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param_linear] * weight
         | 
| 254 247 | 
             
                  end
         | 
| 255 248 |  | 
| 256 249 | 
             
                  def factor_gradient(loss_grad, data, factor)
         | 
| 257 | 
            -
                     | 
| 258 | 
            -
                    (loss_grad.expand_dims(1) * (data * data.dot(factor).expand_dims(1) - factor * (data**2))).mean(0) + reg_term
         | 
| 250 | 
            +
                    (loss_grad.expand_dims(1) * (data * data.dot(factor).expand_dims(1) - factor * (data**2))).mean(0) + @params[:reg_param_factor] * factor
         | 
| 259 251 | 
             
                  end
         | 
| 260 252 |  | 
| 261 | 
            -
                  def  | 
| 262 | 
            -
                    Numo:: | 
| 253 | 
            +
                  def expand_feature(x)
         | 
| 254 | 
            +
                    Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1])])
         | 
| 263 255 | 
             
                  end
         | 
| 264 256 |  | 
| 265 | 
            -
                  def  | 
| 266 | 
            -
                     | 
| 267 | 
            -
                     | 
| 268 | 
            -
                     | 
| 257 | 
            +
                  def split_weight_vec_bias(weight_vec)
         | 
| 258 | 
            +
                    weights = weight_vec[0...-1]
         | 
| 259 | 
            +
                    bias = weight_vec[-1]
         | 
| 260 | 
            +
                    [weights, bias]
         | 
| 269 261 | 
             
                  end
         | 
| 270 262 | 
             
                end
         | 
| 271 263 | 
             
              end
         | 
| @@ -3,6 +3,7 @@ | |
| 3 3 | 
             
            require 'svmkit/validation'
         | 
| 4 4 | 
             
            require 'svmkit/base/base_estimator'
         | 
| 5 5 | 
             
            require 'svmkit/base/regressor'
         | 
| 6 | 
            +
            require 'svmkit/optimizer/nadam'
         | 
| 6 7 |  | 
| 7 8 | 
             
            module SVMKit
         | 
| 8 9 | 
             
              module PolynomialModel
         | 
| @@ -12,7 +13,7 @@ module SVMKit | |
| 12 13 | 
             
                # @example
         | 
| 13 14 | 
             
                #   estimator =
         | 
| 14 15 | 
             
                #     SVMKit::PolynomialModel::FactorizationMachineRegressor.new(
         | 
| 15 | 
            -
                #      n_factors: 10,  | 
| 16 | 
            +
                #      n_factors: 10, reg_param_linear: 0.1, reg_param_factor: 0.1,
         | 
| 16 17 | 
             
                #      max_iter: 5000, batch_size: 50, random_seed: 1)
         | 
| 17 18 | 
             
                #   estimator.fit(training_samples, traininig_values)
         | 
| 18 19 | 
             
                #   results = estimator.predict(testing_samples)
         | 
| @@ -20,8 +21,6 @@ module SVMKit | |
| 20 21 | 
             
                # *Reference*
         | 
| 21 22 | 
             
                # - S. Rendle, "Factorization Machines with libFM," ACM Transactions on Intelligent Systems and Technology, vol. 3 (3), pp. 57:1--57:22, 2012.
         | 
| 22 23 | 
             
                # - S. Rendle, "Factorization Machines," Proc. the 10th IEEE International Conference on Data Mining (ICDM'10), pp. 995--1000, 2010.
         | 
| 23 | 
            -
                # - I. Sutskever, J. Martens, G. Dahl, and G. Hinton, "On the importance of initialization and momentum in deep learning," Proc. the 30th  International Conference on Machine Learning (ICML' 13), pp. 1139--1147, 2013.
         | 
| 24 | 
            -
                # - G. Hinton, N. Srivastava, and K. Swersky, "Lecture 6e rmsprop," Neural Networks for Machine Learning, 2012.
         | 
| 25 24 | 
             
                class FactorizationMachineRegressor
         | 
| 26 25 | 
             
                  include Base::BaseEstimator
         | 
| 27 26 | 
             
                  include Base::Regressor
         | 
| @@ -46,40 +45,27 @@ module SVMKit | |
| 46 45 | 
             
                  # Create a new regressor with Factorization Machine.
         | 
| 47 46 | 
             
                  #
         | 
| 48 47 | 
             
                  # @param n_factors [Integer] The maximum number of iterations.
         | 
| 49 | 
            -
                  # @param  | 
| 50 | 
            -
                  # @param reg_param_weight [Float] The regularization parameter for weight vector.
         | 
| 48 | 
            +
                  # @param reg_param_linear [Float] The regularization parameter for linear model.
         | 
| 51 49 | 
             
                  # @param reg_param_factor [Float] The regularization parameter for factor matrix.
         | 
| 52 | 
            -
                  # @param init_std [Float] The standard deviation of normal random number for initialization of factor matrix.
         | 
| 53 | 
            -
                  # @param learning_rate [Float] The learning rate for optimization.
         | 
| 54 | 
            -
                  # @param decay [Float] The discounting factor for RMS prop optimization.
         | 
| 55 | 
            -
                  # @param momentum [Float] The Nesterov momentum for optimization.
         | 
| 56 50 | 
             
                  # @param max_iter [Integer] The maximum number of iterations.
         | 
| 57 51 | 
             
                  # @param batch_size [Integer] The size of the mini batches.
         | 
| 52 | 
            +
                  # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
         | 
| 53 | 
            +
                  #   Nadam is selected automatically on current version.
         | 
| 58 54 | 
             
                  # @param random_seed [Integer] The seed value using to initialize the random generator.
         | 
| 59 | 
            -
                  def initialize(n_factors: 2,
         | 
| 60 | 
            -
                                  | 
| 61 | 
            -
             | 
| 62 | 
            -
                                 max_iter: 1000, batch_size: 10, random_seed: nil)
         | 
| 63 | 
            -
                    check_params_float(reg_param_bias: reg_param_bias, reg_param_weight: reg_param_weight,
         | 
| 64 | 
            -
                                       reg_param_factor: reg_param_factor, init_std: init_std,
         | 
| 65 | 
            -
                                       learning_rate: learning_rate, decay: decay, momentum: momentum)
         | 
| 55 | 
            +
                  def initialize(n_factors: 2, reg_param_linear: 1.0, reg_param_factor: 1.0,
         | 
| 56 | 
            +
                                 max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
         | 
| 57 | 
            +
                    check_params_float(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor)
         | 
| 66 58 | 
             
                    check_params_integer(n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
         | 
| 67 59 | 
             
                    check_params_type_or_nil(Integer, random_seed: random_seed)
         | 
| 68 | 
            -
                    check_params_positive(n_factors: n_factors,  | 
| 69 | 
            -
                                          reg_param_weight: reg_param_weight, reg_param_factor: reg_param_factor,
         | 
| 70 | 
            -
                                          learning_rate: learning_rate, decay: decay, momentum: momentum,
         | 
| 60 | 
            +
                    check_params_positive(n_factors: n_factors, reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
         | 
| 71 61 | 
             
                                          max_iter: max_iter, batch_size: batch_size)
         | 
| 72 62 | 
             
                    @params = {}
         | 
| 73 63 | 
             
                    @params[:n_factors] = n_factors
         | 
| 74 | 
            -
                    @params[: | 
| 75 | 
            -
                    @params[:reg_param_weight] = reg_param_weight
         | 
| 64 | 
            +
                    @params[:reg_param_linear] = reg_param_linear
         | 
| 76 65 | 
             
                    @params[:reg_param_factor] = reg_param_factor
         | 
| 77 | 
            -
                    @params[:init_std] = init_std
         | 
| 78 | 
            -
                    @params[:learning_rate] = learning_rate
         | 
| 79 | 
            -
                    @params[:decay] = decay
         | 
| 80 | 
            -
                    @params[:momentum] = momentum
         | 
| 81 66 | 
             
                    @params[:max_iter] = max_iter
         | 
| 82 67 | 
             
                    @params[:batch_size] = batch_size
         | 
| 68 | 
            +
                    @params[:optimizer] = optimizer
         | 
| 83 69 | 
             
                    @params[:random_seed] = random_seed
         | 
| 84 70 | 
             
                    @params[:random_seed] ||= srand
         | 
| 85 71 | 
             
                    @factor_mat = nil
         | 
| @@ -160,74 +146,52 @@ module SVMKit | |
| 160 146 | 
             
                    # Initialize some variables.
         | 
| 161 147 | 
             
                    n_samples, n_features = x.shape
         | 
| 162 148 | 
             
                    rand_ids = [*0...n_samples].shuffle(random: @rng)
         | 
| 163 | 
            -
                     | 
| 164 | 
            -
                     | 
| 165 | 
            -
                     | 
| 166 | 
            -
                     | 
| 167 | 
            -
                    weight_sqrsum = Numo::DFloat.zeros(n_features)
         | 
| 168 | 
            -
                    weight_update = Numo::DFloat.zeros(n_features)
         | 
| 169 | 
            -
                    bias_term = 0.0
         | 
| 170 | 
            -
                    bias_sqrsum = 0.0
         | 
| 171 | 
            -
                    bias_update = 0.0
         | 
| 149 | 
            +
                    weight_vec = Numo::DFloat.zeros(n_features + 1)
         | 
| 150 | 
            +
                    factor_mat = Numo::DFloat.zeros(@params[:n_factors], n_features)
         | 
| 151 | 
            +
                    weight_optimizer = Optimizer::Nadam.new
         | 
| 152 | 
            +
                    factor_optimizers = Array.new(@params[:n_factors]) { Optimizer::Nadam.new }
         | 
| 172 153 | 
             
                    # Start optimization.
         | 
| 173 154 | 
             
                    @params[:max_iter].times do |_t|
         | 
| 174 155 | 
             
                      # Random sampling.
         | 
| 175 156 | 
             
                      subset_ids = rand_ids.shift(@params[:batch_size])
         | 
| 176 157 | 
             
                      rand_ids.concat(subset_ids)
         | 
| 177 158 | 
             
                      data = x[subset_ids, true]
         | 
| 159 | 
            +
                      ex_data = expand_feature(data)
         | 
| 178 160 | 
             
                      values = y[subset_ids]
         | 
| 179 161 | 
             
                      # Calculate gradients for loss function.
         | 
| 180 | 
            -
                      loss_grad = loss_gradient(data, values,
         | 
| 181 | 
            -
                                                factor_mat - @params[:momentum] * factor_update,
         | 
| 182 | 
            -
                                                weight_vec - @params[:momentum] * weight_update,
         | 
| 183 | 
            -
                                                bias_term - @params[:momentum] * bias_update)
         | 
| 162 | 
            +
                      loss_grad = loss_gradient(data, ex_data, values, factor_mat, weight_vec)
         | 
| 184 163 | 
             
                      next if loss_grad.ne(0.0).count.zero?
         | 
| 185 164 | 
             
                      # Update each parameter.
         | 
| 186 | 
            -
                       | 
| 187 | 
            -
                        update_param(bias_term, bias_sqrsum, bias_update,
         | 
| 188 | 
            -
                                     bias_gradient(loss_grad, bias_term - @params[:momentum] * bias_update))
         | 
| 189 | 
            -
                      weight_vec, weight_sqrsum, weight_update =
         | 
| 190 | 
            -
                        update_param(weight_vec, weight_sqrsum, weight_update,
         | 
| 191 | 
            -
                                     weight_gradient(loss_grad, data, weight_vec - @params[:momentum] * weight_update))
         | 
| 165 | 
            +
                      weight_vec = weight_optimizer.call(weight_vec, weight_gradient(loss_grad, ex_data, weight_vec))
         | 
| 192 166 | 
             
                      @params[:n_factors].times do |n|
         | 
| 193 | 
            -
                         | 
| 194 | 
            -
             | 
| 195 | 
            -
                                       factor_gradient(loss_grad, data, factor_mat[n, true] - @params[:momentum] * factor_update[n, true]))
         | 
| 167 | 
            +
                        factor_mat[n, true] = factor_optimizers[n].call(factor_mat[n, true],
         | 
| 168 | 
            +
                                                                        factor_gradient(loss_grad, data, factor_mat[n, true]))
         | 
| 196 169 | 
             
                      end
         | 
| 197 170 | 
             
                    end
         | 
| 198 | 
            -
                    [factor_mat, weight_vec | 
| 171 | 
            +
                    [factor_mat, *split_weight_vec_bias(weight_vec)]
         | 
| 199 172 | 
             
                  end
         | 
| 200 173 |  | 
| 201 | 
            -
                  def loss_gradient(x, y, factor, weight | 
| 202 | 
            -
                    z =  | 
| 174 | 
            +
                  def loss_gradient(x, ex_x, y, factor, weight)
         | 
| 175 | 
            +
                    z = ex_x.dot(weight) + 0.5 * (factor.dot(x.transpose)**2 - (factor**2).dot(x.transpose**2)).sum(0)
         | 
| 203 176 | 
             
                    2.0 * (z - y)
         | 
| 204 177 | 
             
                  end
         | 
| 205 178 |  | 
| 206 | 
            -
                  def bias_gradient(loss_grad, bias)
         | 
| 207 | 
            -
                    loss_grad.mean + @params[:reg_param_bias] * bias
         | 
| 208 | 
            -
                  end
         | 
| 209 | 
            -
             | 
| 210 179 | 
             
                  def weight_gradient(loss_grad, data, weight)
         | 
| 211 | 
            -
                    (loss_grad.expand_dims(1) * data).mean(0) + @params[: | 
| 180 | 
            +
                    (loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param_linear] * weight
         | 
| 212 181 | 
             
                  end
         | 
| 213 182 |  | 
| 214 183 | 
             
                  def factor_gradient(loss_grad, data, factor)
         | 
| 215 184 | 
             
                    (loss_grad.expand_dims(1) * (data * data.dot(factor).expand_dims(1) - factor * (data**2))).mean(0) + @params[:reg_param_factor] * factor
         | 
| 216 185 | 
             
                  end
         | 
| 217 186 |  | 
| 218 | 
            -
                  def  | 
| 219 | 
            -
                     | 
| 220 | 
            -
                    new_update = (@params[:learning_rate] / ((new_sqrsum + 1.0e-8)**0.5)) * gr
         | 
| 221 | 
            -
                    new_param = param - (new_update + @params[:momentum] * update)
         | 
| 222 | 
            -
                    [new_param, new_sqrsum, new_update]
         | 
| 223 | 
            -
                  end
         | 
| 224 | 
            -
             | 
| 225 | 
            -
                  def rand_uniform(shape)
         | 
| 226 | 
            -
                    Numo::DFloat[*Array.new(shape.inject(&:*)) { @rng.rand }].reshape(*shape)
         | 
| 187 | 
            +
                  def expand_feature(x)
         | 
| 188 | 
            +
                    Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1])])
         | 
| 227 189 | 
             
                  end
         | 
| 228 190 |  | 
| 229 | 
            -
                  def  | 
| 230 | 
            -
                     | 
| 191 | 
            +
                  def split_weight_vec_bias(weight_vec)
         | 
| 192 | 
            +
                    weights = weight_vec[0...-1]
         | 
| 193 | 
            +
                    bias = weight_vec[-1]
         | 
| 194 | 
            +
                    [weights, bias]
         | 
| 231 195 | 
             
                  end
         | 
| 232 196 | 
             
                end
         | 
| 233 197 | 
             
              end
         | 
    
        data/lib/svmkit/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: svmkit
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0. | 
| 4 | 
            +
              version: 0.4.0
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - yoshoku
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: exe
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2018- | 
| 11 | 
            +
            date: 2018-06-02 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: numo-narray
         | 
| @@ -139,6 +139,7 @@ files: | |
| 139 139 | 
             
            - lib/svmkit/naive_bayes/naive_bayes.rb
         | 
| 140 140 | 
             
            - lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb
         | 
| 141 141 | 
             
            - lib/svmkit/nearest_neighbors/k_neighbors_regressor.rb
         | 
| 142 | 
            +
            - lib/svmkit/optimizer/nadam.rb
         | 
| 142 143 | 
             
            - lib/svmkit/pairwise_metric.rb
         | 
| 143 144 | 
             
            - lib/svmkit/polynomial_model/factorization_machine_classifier.rb
         | 
| 144 145 | 
             
            - lib/svmkit/polynomial_model/factorization_machine_regressor.rb
         |