RubyGems - rumale - Versions diffs - 0.22.0 → 0.22.5 - Mend

rumale 0.22.0 → 0.22.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

checksums.yaml +4 -4
data/.coveralls.yml +1 -0
data/.github/workflows/build.yml +6 -3
data/.github/workflows/coverage.yml +28 -0
data/.gitignore +1 -0
data/.rubocop.yml +1 -0
data/CHANGELOG.md +35 -0
data/Gemfile +6 -4
data/LICENSE.txt +1 -1
data/README.md +56 -19
data/ext/rumale/tree.c +24 -12
data/lib/rumale.rb +8 -0
data/lib/rumale/base/base_estimator.rb +5 -3
data/lib/rumale/dataset.rb +7 -3
data/lib/rumale/decomposition/pca.rb +1 -1
data/lib/rumale/ensemble/stacking_classifier.rb +215 -0
data/lib/rumale/ensemble/stacking_regressor.rb +163 -0
data/lib/rumale/ensemble/voting_classifier.rb +126 -0
data/lib/rumale/ensemble/voting_regressor.rb +82 -0
data/lib/rumale/feature_extraction/feature_hasher.rb +1 -1
data/lib/rumale/feature_extraction/hash_vectorizer.rb +1 -1
data/lib/rumale/kernel_approximation/nystroem.rb +29 -9
data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +92 -0
data/lib/rumale/kernel_machine/kernel_svc.rb +4 -3
data/lib/rumale/linear_model/elastic_net.rb +1 -1
data/lib/rumale/linear_model/lasso.rb +1 -1
data/lib/rumale/linear_model/linear_regression.rb +63 -34
data/lib/rumale/linear_model/logistic_regression.rb +1 -1
data/lib/rumale/linear_model/nnls.rb +137 -0
data/lib/rumale/linear_model/ridge.rb +70 -33
data/lib/rumale/linear_model/svc.rb +4 -3
data/lib/rumale/linear_model/svr.rb +4 -3
data/lib/rumale/metric_learning/mlkr.rb +161 -0
data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +7 -4
data/lib/rumale/pairwise_metric.rb +1 -1
data/lib/rumale/preprocessing/kernel_calculator.rb +92 -0
data/lib/rumale/validation.rb +13 -1
data/lib/rumale/version.rb +1 -1
data/rumale.gemspec +1 -1
metadata +14 -4

data/lib/rumale/linear_model/logistic_regression.rb CHANGED Viewed

@@ -181,7 +181,7 @@ module Rumale
         @classes.size > 2
       end
-      def fit_lbfgs(base_x, base_y)
+      def fit_lbfgs(base_x, base_y) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
         if multiclass_problem?
           fnc = proc do |w, x, y, a|
             n_features = x.shape[1]

data/lib/rumale/linear_model/nnls.rb ADDED Viewed

@@ -0,0 +1,137 @@
+# frozen_string_literal: true
+require 'lbfgsb'
+require 'rumale/base/base_estimator'
+require 'rumale/base/regressor'
+module Rumale
+  module LinearModel
+    # NNLS is a class that implements non-negative least squares regression.
+    # NNLS solves least squares problem under non-negative constraints on the coefficient using L-BFGS-B method.
+    #
+    # @example
+    #   estimator = Rumale::LinearModel::NNLS.new(reg_param: 0.01, random_seed: 1)
+    #   estimator.fit(training_samples, traininig_values)
+    #   results = estimator.predict(testing_samples)
+    #
+    class NNLS
+      include Base::BaseEstimator
+      include Base::Regressor
+      # Return the weight vector.
+      # @return [Numo::DFloat] (shape: [n_outputs, n_features])
+      attr_reader :weight_vec
+      # Return the bias term (a.k.a. intercept).
+      # @return [Numo::DFloat] (shape: [n_outputs])
+      attr_reader :bias_term
+      # Returns the number of iterations when converged.
+      # @return [Integer]
+      attr_reader :n_iter
+      # Return the random generator for initializing weight.
+      # @return [Random]
+      attr_reader :rng
+      # Create a new regressor with non-negative least squares method.
+      #
+      # @param reg_param [Float] The regularization parameter for L2 regularization term.
+      # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
+      # @param bias_scale [Float] The scale of the bias term.
+      # @param max_iter [Integer] The maximum number of epochs that indicates
+      #   how many times the whole data is given to the training process.
+      # @param tol [Float] The tolerance of loss for terminating optimization.
+      #   If solver = 'svd', this parameter is ignored.
+      # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
+      # @param random_seed [Integer] The seed value using to initialize the random generator.
+      def initialize(reg_param: 1.0, fit_bias: true, bias_scale: 1.0,
+                     max_iter: 1000, tol: 1e-4, verbose: false, random_seed: nil)
+        check_params_numeric(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, tol: tol)
+        check_params_boolean(fit_bias: fit_bias, verbose: verbose)
+        check_params_numeric_or_nil(random_seed: random_seed)
+        check_params_positive(reg_param: reg_param, max_iter: max_iter)
+        @params = method(:initialize).parameters.each_with_object({}) { |(_, prm), obj| obj[prm] = binding.local_variable_get(prm) }
+        @params[:random_seed] ||= srand
+        @n_iter = nil
+        @weight_vec = nil
+        @bias_term = nil
+        @rng = Random.new(@params[:random_seed])
+      end
+      # Fit the model with given training data.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
+      # @return [NonneagtiveLeastSquare] The learned regressor itself.
+      def fit(x, y)
+        x = check_convert_sample_array(x)
+        y = check_convert_tvalue_array(y)
+        check_sample_tvalue_size(x, y)
+        x = expand_feature(x) if fit_bias?
+        n_features = x.shape[1]
+        n_outputs = single_target?(y) ? 1 : y.shape[1]
+        w_init = Rumale::Utils.rand_normal([n_outputs, n_features], @rng.dup).flatten.dup
+        w_init[w_init.lt(0)] = 0
+        bounds = Numo::DFloat.zeros(n_outputs * n_features, 2)
+        bounds.shape[0].times { |n| bounds[n, 1] = Float::INFINITY }
+        res = Lbfgsb.minimize(
+          fnc: method(:nnls_fnc), jcb: true, x_init: w_init, args: [x, y, @params[:reg_param]], bounds: bounds,
+          maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: @params[:verbose] ? 1 : -1
+        )
+        @n_iter = res[:n_iter]
+        w = single_target?(y) ? res[:x] : res[:x].reshape(n_outputs, n_features).transpose
+        if fit_bias?
+          @weight_vec = single_target?(y) ? w[0...-1].dup : w[0...-1, true].dup
+          @bias_term = single_target?(y) ? w[-1] : w[-1, true].dup
+        else
+          @weight_vec = w.dup
+          @bias_term = single_target?(y) ? 0 : Numo::DFloat.zeros(y.shape[1])
+        end
+        self
+      end
+      # Predict values for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
+      # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
+      def predict(x)
+        x = check_convert_sample_array(x)
+        x.dot(@weight_vec.transpose) + @bias_term
+      end
+      private
+      def nnls_fnc(w, x, y, alpha)
+        n_samples, n_features = x.shape
+        w = w.reshape(y.shape[1], n_features) unless y.shape[1].nil?
+        z = x.dot(w.transpose)
+        d = z - y
+        loss = (d**2).sum.fdiv(n_samples) + alpha * (w * w).sum
+        gradient = 2.fdiv(n_samples) * d.transpose.dot(x) + 2.0 * alpha * w
+        [loss, gradient.flatten.dup]
+      end
+      def expand_feature(x)
+        n_samples = x.shape[0]
+        Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * @params[:bias_scale]])
+      end
+      def fit_bias?
+        @params[:fit_bias] == true
+      end
+      def single_target?(y)
+        y.ndim == 1
+      end
+    end
+  end
+end

data/lib/rumale/linear_model/ridge.rb CHANGED Viewed

@@ -1,12 +1,15 @@
 # frozen_string_literal: true
+require 'lbfgsb'
 require 'rumale/linear_model/base_sgd'
 require 'rumale/base/regressor'
 module Rumale
   module LinearModel
     # Ridge is a class that implements Ridge Regression
-    # with stochastic gradient descent (SGD) optimization or singular value decomposition (SVD).
+    # with stochastic gradient descent (SGD) optimization,
+    # singular value decomposition (SVD), or L-BFGS optimization.
     #
     # @example
     #   estimator =
@@ -41,32 +44,33 @@ module Rumale
       #
       # @param learning_rate [Float] The initial value of learning rate.
       #   The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
-      #   If solver = 'svd', this parameter is ignored.
+      #   If solver is not 'sgd', this parameter is ignored.
       # @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
       #   If nil is given, the decay sets to 'reg_param * learning_rate'.
-      #   If solver = 'svd', this parameter is ignored.
+      #   If solver is not 'sgd', this parameter is ignored.
       # @param momentum [Float] The momentum factor.
-      #   If solver = 'svd', this parameter is ignored.
+      #   If solver is not 'sgd', this parameter is ignored.
       # @param reg_param [Float] The regularization parameter.
       # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
       # @param bias_scale [Float] The scale of the bias term.
       # @param max_iter [Integer] The maximum number of epochs that indicates
       #   how many times the whole data is given to the training process.
-      #   If solver = 'svd', this parameter is ignored.
+      #   If solver is 'svd', this parameter is ignored.
       # @param batch_size [Integer] The size of the mini batches.
-      #   If solver = 'svd', this parameter is ignored.
+      #   If solver is not 'sgd', this parameter is ignored.
       # @param tol [Float] The tolerance of loss for terminating optimization.
-      #   If solver = 'svd', this parameter is ignored.
-      # @param solver [String] The algorithm to calculate weights. ('auto', 'sgd' or 'svd').
+      #   If solver is 'svd', this parameter is ignored.
+      # @param solver [String] The algorithm to calculate weights. ('auto', 'sgd', 'svd', or 'lbfgs').
       #   'auto' chooses the 'svd' solver if Numo::Linalg is loaded. Otherwise, it chooses the 'sgd' solver.
       #   'sgd' uses the stochastic gradient descent optimization.
       #   'svd' performs singular value decomposition of samples.
+      #   'lbfgs' uses the L-BFGS method for optimization.
       # @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
       #   If nil is given, the method does not execute in parallel.
       #   If zero or less is given, it becomes equal to the number of processors.
-      #   This parameter is ignored if the Parallel gem is not loaded or the solver is 'svd'.
+      #   This parameter is ignored if the Parallel gem is not loaded or solver is not 'sgd'.
       # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
-      #   If solver = 'svd', this parameter is ignored.
+      #   If solver is 'svd', this parameter is ignored.
       # @param random_seed [Integer] The seed value using to initialize the random generator.
       def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
                      reg_param: 1.0, fit_bias: true, bias_scale: 1.0,
@@ -83,9 +87,9 @@ module Rumale
         super()
         @params.merge!(method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h)
         @params[:solver] = if solver == 'auto'
-                             load_linalg? ? 'svd' : 'sgd'
+                             enable_linalg?(warning: false) ? 'svd' : 'sgd'
                            else
-                             solver != 'svd' ? 'sgd' : 'svd'
+                             solver.match?(/^svd$|^sgd$|^lbfgs$/) ? solver : 'sgd'
                            end
         @params[:decay] ||= @params[:reg_param] * @params[:learning_rate]
         @params[:random_seed] ||= srand
@@ -99,15 +103,17 @@ module Rumale
       # Fit the model with given training data.
       #
       # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
-      # @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
+      # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
       # @return [Ridge] The learned regressor itself.
       def fit(x, y)
         x = check_convert_sample_array(x)
         y = check_convert_tvalue_array(y)
         check_sample_tvalue_size(x, y)
-        if @params[:solver] == 'svd' && enable_linalg?
+        if @params[:solver] == 'svd' && enable_linalg?(warning: false)
           fit_svd(x, y)
+        elsif @params[:solver] == 'lbfgs'
+          fit_lbfgs(x, y)
         else
           fit_sgd(x, y)
         end
@@ -127,27 +133,51 @@ module Rumale
       private
       def fit_svd(x, y)
-        samples = @params[:fit_bias] ? expand_feature(x) : x
+        x = expand_feature(x) if fit_bias?
-        s, u, vt = Numo::Linalg.svd(samples, driver: 'sdd', job: 'S')
+        s, u, vt = Numo::Linalg.svd(x, driver: 'sdd', job: 'S')
         d = (s / (s**2 + @params[:reg_param])).diag
         w = vt.transpose.dot(d).dot(u.transpose).dot(y)
-        is_single_target_vals = y.shape[1].nil?
-        if @params[:fit_bias]
-          @weight_vec = is_single_target_vals ? w[0...-1].dup : w[0...-1, true].dup
-          @bias_term = is_single_target_vals ? w[-1] : w[-1, true].dup
-        else
-          @weight_vec = w.dup
-          @bias_term = is_single_target_vals ? 0 : Numo::DFloat.zeros(y.shape[1])
-        end
+        @weight_vec, @bias_term = single_target?(y) ? split_weight(w) : split_weight_mult(w)
       end
-      def fit_sgd(x, y)
-        n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
+      def fit_lbfgs(x, y)
+        fnc = proc do |w, x, y, a| # rubocop:disable Lint/ShadowingOuterLocalVariable
+          n_samples, n_features = x.shape
+          w = w.reshape(y.shape[1], n_features) unless y.shape[1].nil?
+          z = x.dot(w.transpose)
+          d = z - y
+          loss = (d**2).sum.fdiv(n_samples) + a * (w * w).sum
+          gradient = 2.fdiv(n_samples) * d.transpose.dot(x) + 2.0 * a * w
+          [loss, gradient.flatten.dup]
+        end
+        x = expand_feature(x) if fit_bias?
         n_features = x.shape[1]
+        n_outputs = single_target?(y) ? 1 : y.shape[1]
+        res = Lbfgsb.minimize(
+          fnc: fnc, jcb: true, x_init: init_weight(n_features, n_outputs), args: [x, y, @params[:reg_param]],
+          maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON,
+          verbose: @params[:verbose] ? 1 : -1
+        )
+        @weight_vec, @bias_term =
+          if single_target?(y)
+            split_weight(res[:x])
+          else
+            split_weight_mult(res[:x].reshape(n_outputs, n_features).transpose)
+          end
+      end
-        if n_outputs > 1
+      def fit_sgd(x, y)
+        if single_target?(y)
+          @weight_vec, @bias_term = partial_fit(x, y)
+        else
+          n_outputs = y.shape[1]
+          n_features = x.shape[1]
           @weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
           @bias_term = Numo::DFloat.zeros(n_outputs)
           if enable_parallel?
@@ -156,16 +186,23 @@ module Rumale
           else
             n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
           end
-        else
-          @weight_vec, @bias_term = partial_fit(x, y)
         end
       end
-      def load_linalg?
-        return false if defined?(Numo::Linalg).nil?
-        return false if Numo::Linalg::VERSION < '0.1.4'
+      def single_target?(y)
+        y.ndim == 1
+      end
+      def init_weight(n_features, n_outputs)
+        Rumale::Utils.rand_normal([n_outputs, n_features], @rng.dup).flatten.dup
+      end
-        true
+      def split_weight_mult(w)
+        if fit_bias?
+          [w[0...-1, true].dup, w[-1, true].dup]
+        else
+          [w.dup, Numo::DFloat.zeros(w.shape[1])]
+        end
       end
     end
   end

data/lib/rumale/linear_model/svc.rb CHANGED Viewed

@@ -11,9 +11,10 @@ module Rumale
     # with stochastic gradient descent optimization.
     # For multiclass classification problem, it uses one-vs-the-rest strategy.
     #
-    # Rumale::SVM provides linear support vector classifier based on LIBLINEAR.
-    # If you prefer execution speed, you should use Rumale::SVM::LinearSVC.
-    # https://github.com/yoshoku/rumale-svm
+    # @note
+    #   Rumale::SVM provides linear support vector classifier based on LIBLINEAR.
+    #   If you prefer execution speed, you should use Rumale::SVM::LinearSVC.
+    #   https://github.com/yoshoku/rumale-svm
     #
     # @example
     #   estimator =

data/lib/rumale/linear_model/svr.rb CHANGED Viewed

@@ -8,9 +8,10 @@ module Rumale
     # SVR is a class that implements Support Vector Regressor
     # with stochastic gradient descent optimization.
     #
-    # Rumale::SVM provides linear and kernel support vector regressor based on LIBLINEAR and LIBSVM.
-    # If you prefer execution speed, you should use Rumale::SVM::LinearSVR.
-    # https://github.com/yoshoku/rumale-svm
+    # @note
+    #   Rumale::SVM provides linear and kernel support vector regressor based on LIBLINEAR and LIBSVM.
+    #   If you prefer execution speed, you should use Rumale::SVM::LinearSVR.
+    #   https://github.com/yoshoku/rumale-svm
     #
     # @example
     #   estimator =

data/lib/rumale/metric_learning/mlkr.rb ADDED Viewed

@@ -0,0 +1,161 @@
+# frozen_string_literal: true
+require 'rumale/base/base_estimator'
+require 'rumale/base/transformer'
+require 'rumale/decomposition/pca'
+require 'rumale/pairwise_metric'
+require 'rumale/utils'
+require 'lbfgsb'
+module Rumale
+  module MetricLearning
+    # MLKR is a class that implements Metric Learning for Kernel Regression.
+    #
+    # @example
+    #   transformer = Rumale::MetricLearning::MLKR.new
+    #   transformer.fit(training_samples, traininig_target_values)
+    #   low_samples = transformer.transform(testing_samples)
+    #
+    # *Reference*
+    # - Weinberger, K. Q. and Tesauro, G., "Metric Learning for Kernel Regression," Proc. AISTATS'07, pp. 612--629, 2007.
+    class MLKR
+      include Base::BaseEstimator
+      include Base::Transformer
+      # Returns the metric components.
+      # @return [Numo::DFloat] (shape: [n_components, n_features])
+      attr_reader :components
+      # Return the number of iterations run for optimization
+      # @return [Integer]
+      attr_reader :n_iter
+      # Return the random generator.
+      # @return [Random]
+      attr_reader :rng
+      # Create a new transformer with MLKR.
+      #
+      # @param n_components [Integer] The number of components.
+      # @param init [String] The initialization method for components ('random' or 'pca').
+      # @param max_iter [Integer] The maximum number of iterations.
+      # @param tol [Float] The tolerance of termination criterion.
+      #   This value is given as tol / Lbfgsb::DBL_EPSILON to the factr argument of Lbfgsb.minimize method.
+      # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
+      #   If true is given, 'iterate.dat' file is generated by lbfgsb.rb.
+      # @param random_seed [Integer] The seed value using to initialize the random generator.
+      def initialize(n_components: nil, init: 'random', max_iter: 100, tol: 1e-6, verbose: false, random_seed: nil)
+        check_params_numeric_or_nil(n_components: n_components, random_seed: random_seed)
+        check_params_numeric(max_iter: max_iter, tol: tol)
+        check_params_string(init: init)
+        check_params_boolean(verbose: verbose)
+        @params = {}
+        @params[:n_components] = n_components
+        @params[:init] = init
+        @params[:max_iter] = max_iter
+        @params[:tol] = tol
+        @params[:verbose] = verbose
+        @params[:random_seed] = random_seed
+        @params[:random_seed] ||= srand
+        @components = nil
+        @n_iter = nil
+        @rng = Random.new(@params[:random_seed])
+      end
+      # Fit the model with given training data.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @param y [Numo::DFloat] (shape: [n_samples]) The target values to be used for fitting the model.
+      # @return [MLKR] The learned classifier itself.
+      def fit(x, y)
+        x = check_convert_sample_array(x)
+        y = check_convert_tvalue_array(y)
+        check_sample_tvalue_size(x, y)
+        n_features = x.shape[1]
+        n_components = if @params[:n_components].nil?
+                         n_features
+                       else
+                         [n_features, @params[:n_components]].min
+                       end
+        @components, @n_iter = optimize_components(x, y, n_features, n_components)
+        @prototypes = x.dot(@components.transpose)
+        @values = y
+        self
+      end
+      # Fit the model with training data, and then transform them with the learned model.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @param y [Numo::DFloat] (shape: [n_samples]) The target values to be used for fitting the model.
+      # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
+      def fit_transform(x, y)
+        x = check_convert_sample_array(x)
+        y = check_convert_tvalue_array(y)
+        check_sample_tvalue_size(x, y)
+        fit(x, y).transform(x)
+      end
+      # Transform the given data with the learned model.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
+      # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
+      def transform(x)
+        x = check_convert_sample_array(x)
+        x.dot(@components.transpose)
+      end
+      private
+      def init_components(x, n_features, n_components)
+        if @params[:init] == 'pca'
+          pca = Rumale::Decomposition::PCA.new(n_components: n_components)
+          pca.fit(x).components.flatten.dup
+        else
+          Rumale::Utils.rand_normal([n_features, n_components], @rng.dup).flatten.dup
+        end
+      end
+      def optimize_components(x, y, n_features, n_components)
+        # initialize components.
+        comp_init = init_components(x, n_features, n_components)
+        # initialize optimization results.
+        res = {}
+        res[:x] = comp_init
+        res[:n_iter] = 0
+        # perform optimization.
+        verbose = @params[:verbose] ? 1 : -1
+        res = Lbfgsb.minimize(
+          fnc: method(:mlkr_fnc), jcb: true, x_init: comp_init, args: [x, y],
+          maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: verbose
+        )
+        # return the results.
+        n_iter = res[:n_iter]
+        comps = n_components == 1 ? res[:x].dup : res[:x].reshape(n_components, n_features)
+        [comps, n_iter]
+      end
+      def mlkr_fnc(w, x, y)
+        # initialize some variables.
+        n_features = x.shape[1]
+        n_components = w.size / n_features
+        # projection.
+        w = w.reshape(n_components, n_features)
+        z = x.dot(w.transpose)
+        # predict values.
+        kernel_mat = Numo::NMath.exp(-Rumale::PairwiseMetric.squared_error(z))
+        kernel_mat[kernel_mat.diag_indices] = 0.0
+        norm = kernel_mat.sum(1)
+        norm[norm.eq(0)] = 1
+        y_pred = kernel_mat.dot(y) / norm
+        # calculate loss.
+        y_diff = y_pred - y
+        loss = (y_diff**2).sum
+        # calculate gradient.
+        weight_mat = y_diff * y_diff.expand_dims(1) * kernel_mat
+        weight_mat = weight_mat.sum(0).diag - weight_mat
+        gradient = 8 * z.transpose.dot(weight_mat).dot(x)
+        [loss, gradient.flatten.dup]
+      end
+    end
+  end
+end