RubyGems - svmkit - Versions diffs - 0.2.9 → 0.3.0 - Mend

svmkit 0.2.9 → 0.3.0

Files changed (40) hide show

checksums.yaml +4 -4
data/.rubocop.yml +2 -2
data/HISTORY.md +11 -0
data/lib/svmkit/base/classifier.rb +4 -1
data/lib/svmkit/base/regressor.rb +34 -0
data/lib/svmkit/ensemble/random_forest_classifier.rb +1 -0
data/lib/svmkit/evaluation_measure/accuracy.rb +2 -1
data/lib/svmkit/evaluation_measure/f_score.rb +2 -1
data/lib/svmkit/evaluation_measure/log_loss.rb +4 -2
data/lib/svmkit/evaluation_measure/mean_absolute_error.rb +30 -0
data/lib/svmkit/evaluation_measure/mean_squared_error.rb +30 -0
data/lib/svmkit/evaluation_measure/precision.rb +2 -1
data/lib/svmkit/evaluation_measure/r2_score.rb +44 -0
data/lib/svmkit/evaluation_measure/recall.rb +2 -1
data/lib/svmkit/kernel_approximation/rbf.rb +1 -0
data/lib/svmkit/kernel_machine/kernel_svc.rb +3 -1
data/lib/svmkit/linear_model/logistic_regression.rb +2 -1
data/lib/svmkit/linear_model/svc.rb +3 -1
data/lib/svmkit/linear_model/svr.rb +172 -0
data/lib/svmkit/model_selection/cross_validation.rb +19 -7
data/lib/svmkit/model_selection/k_fold.rb +1 -0
data/lib/svmkit/model_selection/stratified_k_fold.rb +1 -0
data/lib/svmkit/multiclass/one_vs_rest_classifier.rb +1 -0
data/lib/svmkit/naive_bayes/naive_bayes.rb +1 -0
data/lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb +1 -0
data/lib/svmkit/nearest_neighbors/k_neighbors_regressor.rb +94 -0
data/lib/svmkit/pairwise_metric.rb +2 -0
data/lib/svmkit/polynomial_model/factorization_machine_classifier.rb +2 -1
data/lib/svmkit/preprocessing/l2_normalizer.rb +1 -0
data/lib/svmkit/preprocessing/label_encoder.rb +1 -0
data/lib/svmkit/preprocessing/min_max_scaler.rb +1 -0
data/lib/svmkit/preprocessing/one_hot_encoder.rb +1 -0
data/lib/svmkit/preprocessing/standard_scaler.rb +1 -0
data/lib/svmkit/probabilistic_output.rb +1 -1
data/lib/svmkit/tree/decision_tree_classifier.rb +1 -1
data/lib/svmkit/validation.rb +12 -0
data/lib/svmkit/version.rb +1 -1
data/lib/svmkit.rb +6 -0
data/svmkit.gemspec +1 -10
metadata +10 -10

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 533508a3afd82d2bae3ddea3a5669f6d389688155d44649fd3eafaaff8207e0f
-  data.tar.gz: 43ff09b3bab72b68bc7a6b3740902be64508496337a4cde61057d33b91d0f349
+  metadata.gz: 855d3ac2dcfbfde9eb82a4661f17cebb75b4b7c57ba37ee26a8aa03d0f1ccab8
+  data.tar.gz: 13ec3e84fd6f4fcd973d164b3d6f395c024e42095eae63819bea6ef4179697d9
 SHA512:
-  metadata.gz: e1c1bed8269d3c768d75bd8a5e731b5d2da689ef7a235a70c5ea87090aac79889c9fe0a004eca73c3015aae42d068f44b2b1e3a61a03b641607b2909441513b6
-  data.tar.gz: 80a18ca4ec7eb2740148829024f0625c835f24b771bb321168d0cc3233d8e152257b5515355d99a968dc25a670f9a69f3e30b42bf190757206a64bbcd2babcd6
+  metadata.gz: be8b4e4528e70ab99c8b9f1ad0d93d717b0359d30ca9b142b4d5cb44b6b3875cc9d97f199021ab48ab3b9cc1e635f37c14dbb15efe81b55de673d83b65cc10ab
+  data.tar.gz: 9c8ae85dc3ca8dd7fe6bd15bd4b3eb46b775b32923691a12f9621032bcbffea7a3405404a9b8924f68a344d771cac56f439e6e2c024f68deacd00d7358c3c4f4

data/.rubocop.yml CHANGED Viewed

@@ -1,7 +1,7 @@
 inherit_from: .rubocop_todo.yml
 AllCops:
-  TargetRubyVersion: 2.2
+  TargetRubyVersion: 2.1
   DisplayCopNames: true
   DisplayStyleGuide: true
@@ -9,7 +9,7 @@ Documentation:
   Enabled: false
 Metrics/LineLength:
-  Max: 140
+  Max: 145
   IgnoredPatterns: ['(\A|\s)#']
 Metrics/ModuleLength:

data/HISTORY.md CHANGED Viewed

@@ -1,3 +1,14 @@
+# 0.3.0
+- Add class for Support Vector Regression.
+- Add class for K-Nearest Neighbor Regression.
+- Add class for evaluating coefficient of determination.
+- Add class for evaluating mean squared error.
+- Add class for evaluating mean absolute error.
+- Fix to use min method instead of sort and first methods.
+- Fix cross validation class to be able to use for regression problem.
+- Fix some typos on document.
+- Rename spec filename for Factorization Machine classifier.
 # 0.2.9
 - Add predict_proba method to SVC and KernelSVC.
 - Add class for evaluating logarithmic loss.

data/lib/svmkit/base/classifier.rb CHANGED Viewed

@@ -1,5 +1,8 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
+require 'svmkit/evaluation_measure/accuracy'
 module SVMKit
   module Base
     # Module for all classifiers in SVMKit.
@@ -14,7 +17,7 @@ module SVMKit
         raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
       end
-      # Claculate the mean accuracy of the given testing data.
+      # Calculate the mean accuracy of the given testing data.
       #
       # @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
       # @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.

data/lib/svmkit/base/regressor.rb ADDED Viewed

@@ -0,0 +1,34 @@
+# frozen_string_literal: true
+require 'svmkit/validation'
+require 'svmkit/evaluation_measure/r2_score'
+module SVMKit
+  module Base
+    # Module for all regressors in SVMKit.
+    module Regressor
+      # An abstract method for fitting a model.
+      def fit
+        raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
+      end
+      # An abstract method for predicting labels.
+      def predict
+        raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
+      end
+      # Calculate the coefficient of determination for the given testing data.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
+      # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) Target values for testing data.
+      # @return [Float] Coefficient of determination
+      def score(x, y)
+        SVMKit::Validation.check_sample_array(x)
+        SVMKit::Validation.check_tvalue_array(y)
+        SVMKit::Validation.check_sample_tvalue_size(x, y)
+        evaluator = SVMKit::EvaluationMeasure::R2Score.new
+        evaluator.score(y, predict(x))
+      end
+    end
+  end
+end

data/lib/svmkit/ensemble/random_forest_classifier.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/classifier'

data/lib/svmkit/evaluation_measure/accuracy.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/evaluator'
 module SVMKit
@@ -13,7 +14,7 @@ module SVMKit
     class Accuracy
       include Base::Evaluator
-      # Claculate mean accuracy.
+      # Calculate mean accuracy.
       #
       # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
       # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.

data/lib/svmkit/evaluation_measure/f_score.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/evaluator'
 require 'svmkit/evaluation_measure/precision_recall'
@@ -27,7 +28,7 @@ module SVMKit
         @average = average
       end
-      # Claculate average F1-score
+      # Calculate average F1-score
       #
       # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
       # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.

data/lib/svmkit/evaluation_measure/log_loss.rb CHANGED Viewed

@@ -1,6 +1,8 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/evaluator'
+require 'svmkit/preprocessing/one_hot_encoder'
 module SVMKit
   module EvaluationMeasure
@@ -12,7 +14,7 @@ module SVMKit
     class LogLoss
       include Base::Evaluator
-      # Claculate mean logarithmic loss.
+      # Calculate mean logarithmic loss.
       # If both y_true and y_pred are array (both shapes are [n_samples]), this method calculates
       # mean logarithmic loss for binary classification.
       #
@@ -28,7 +30,7 @@ module SVMKit
         clipped_p = y_pred.clip(eps, 1 - eps)
         log_loss = if n_classes.nil?
-                     negative_label = y_true.to_a.uniq.sort.first
+                     negative_label = y_true.to_a.uniq.min
                      bin_y_true = Numo::DFloat.cast(y_true.ne(negative_label))
                      -(bin_y_true * Numo::NMath.log(clipped_p) + (1 - bin_y_true) * Numo::NMath.log(1 - clipped_p))
                    else

data/lib/svmkit/evaluation_measure/mean_absolute_error.rb ADDED Viewed

@@ -0,0 +1,30 @@
+# frozen_string_literal: true
+require 'svmkit/validation'
+require 'svmkit/base/evaluator'
+module SVMKit
+  module EvaluationMeasure
+    # MeanAbsoluteError is a class that calculates the mean absolute error.
+    #
+    # @example
+    #   evaluator = SVMKit::EvaluationMeasure::MeanAbsoluteError.new
+    #   puts evaluator.score(ground_truth, predicted)
+    class MeanAbsoluteError
+      include Base::Evaluator
+      # Calculate mean absolute error.
+      #
+      # @param y_true [Numo::DFloat] (shape: [n_samples, n_outputs]) Ground truth target values.
+      # @param y_pred [Numo::DFloat] (shape: [n_samples, n_outputs]) Estimated target values.
+      # @return [Float] Mean absolute error
+      def score(y_true, y_pred)
+        SVMKit::Validation.check_tvalue_array(y_true)
+        SVMKit::Validation.check_tvalue_array(y_pred)
+        raise ArgumentError, 'Expect to have the same size both y_true and y_pred.' unless y_true.shape == y_pred.shape
+        (y_true - y_pred).abs.mean
+      end
+    end
+  end
+end

data/lib/svmkit/evaluation_measure/mean_squared_error.rb ADDED Viewed

@@ -0,0 +1,30 @@
+# frozen_string_literal: true
+require 'svmkit/validation'
+require 'svmkit/base/evaluator'
+module SVMKit
+  module EvaluationMeasure
+    # MeanSquaredError is a class that calculates the mean squared error.
+    #
+    # @example
+    #   evaluator = SVMKit::EvaluationMeasure::MeanSquaredError.new
+    #   puts evaluator.score(ground_truth, predicted)
+    class MeanSquaredError
+      include Base::Evaluator
+      # Calculate mean squared error.
+      #
+      # @param y_true [Numo::DFloat] (shape: [n_samples, n_outputs]) Ground truth target values.
+      # @param y_pred [Numo::DFloat] (shape: [n_samples, n_outputs]) Estimated target values.
+      # @return [Float] Mean squared error
+      def score(y_true, y_pred)
+        SVMKit::Validation.check_tvalue_array(y_true)
+        SVMKit::Validation.check_tvalue_array(y_pred)
+        raise ArgumentError, 'Expect to have the same size both y_true and y_pred.' unless y_true.shape == y_pred.shape
+        ((y_true - y_pred)**2).mean
+      end
+    end
+  end
+end

data/lib/svmkit/evaluation_measure/precision.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/evaluator'
 require 'svmkit/evaluation_measure/precision_recall'
@@ -27,7 +28,7 @@ module SVMKit
         @average = average
       end
-      # Claculate average precision.
+      # Calculate average precision.
       #
       # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
       # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.

data/lib/svmkit/evaluation_measure/r2_score.rb ADDED Viewed

@@ -0,0 +1,44 @@
+# frozen_string_literal: true
+require 'svmkit/validation'
+require 'svmkit/base/evaluator'
+require 'svmkit/evaluation_measure/precision_recall'
+module SVMKit
+  module EvaluationMeasure
+    # R2Score is a class that calculates the coefficient of determination for the predicted values.
+    #
+    # @example
+    #   evaluator = SVMKit::EvaluationMeasure::R2Score.new
+    #   puts evaluator.score(ground_truth, predicted)
+    class R2Score
+      include Base::Evaluator
+      # Create a new evaluation measure calculater for coefficient of determination.
+      def initialize; end
+      # Calculate the coefficient of determination.
+      #
+      # @param y_true [Numo::DFloat] (shape: [n_samples, n_outputs]) Ground truth target values.
+      # @param y_pred [Numo::DFloat] (shape: [n_samples, n_outputs]) Estimated taget values.
+      # @return [Float] Coefficient of determination
+      def score(y_true, y_pred)
+        SVMKit::Validation.check_tvalue_array(y_true)
+        SVMKit::Validation.check_tvalue_array(y_pred)
+        raise ArgumentError, 'Expect to have the same size both y_true and y_pred.' unless y_true.shape == y_pred.shape
+        n_samples, n_outputs = y_true.shape
+        numerator = ((y_true - y_pred)**2).sum(0)
+        yt_mean = y_true.sum(0) / n_samples
+        denominator = ((y_true - yt_mean)**2).sum(0)
+        if n_outputs.nil?
+          denominator.zero? ? 0.0 : 1.0 - numerator / denominator
+        else
+          scores = 1 - numerator / denominator
+          scores[denominator.eq(0)] = 0.0
+          scores.sum / scores.size
+        end
+      end
+    end
+  end
+end

data/lib/svmkit/evaluation_measure/recall.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/evaluator'
 require 'svmkit/evaluation_measure/precision_recall'
@@ -27,7 +28,7 @@ module SVMKit
         @average = average
       end
-      # Claculate average recall
+      # Calculate average recall
       #
       # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
       # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.

data/lib/svmkit/kernel_approximation/rbf.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/transformer'

data/lib/svmkit/kernel_machine/kernel_svc.rb CHANGED Viewed

@@ -1,7 +1,9 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/classifier'
+require 'svmkit/probabilistic_output'
 module SVMKit
   # This module consists of the classes that implement kernel method-based estimator.
@@ -88,7 +90,7 @@ module SVMKit
                                    end
           end
         else
-          negative_label = y.to_a.uniq.sort.first
+          negative_label = y.to_a.uniq.min
           bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
           @weight_vec = binary_fit(x, bin_y)
           @prob_param = if @params[:probability]

data/lib/svmkit/linear_model/logistic_regression.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/classifier'
@@ -95,7 +96,7 @@ module SVMKit
             @bias_term[n] = bias
           end
         else
-          negative_label = y.to_a.uniq.sort.first
+          negative_label = y.to_a.uniq.min
           bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
           @weight_vec, @bias_term = binary_fit(x, bin_y)
         end

data/lib/svmkit/linear_model/svc.rb CHANGED Viewed

@@ -1,7 +1,9 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/classifier'
+require 'svmkit/probabilistic_output'
 module SVMKit
   # This module consists of the classes that implement generalized linear models.
@@ -103,7 +105,7 @@ module SVMKit
                                    end
           end
         else
-          negative_label = y.to_a.uniq.sort.first
+          negative_label = y.to_a.uniq.min
           bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
           @weight_vec, @bias_term = binary_fit(x, bin_y)
           @prob_param = if @params[:probability]

data/lib/svmkit/linear_model/svr.rb ADDED Viewed

@@ -0,0 +1,172 @@
+# frozen_string_literal: true
+require 'svmkit/validation'
+require 'svmkit/base/base_estimator'
+require 'svmkit/base/regressor'
+module SVMKit
+  module LinearModel
+    # SVR is a class that implements Support Vector Regressor
+    # with stochastic gradient descent (SGD) optimization.
+    #
+    # @example
+    #   estimator =
+    #     SVMKit::LinearModel::SVR.new(reg_param: 1.0, epsilon: 0.1, max_iter: 100, batch_size: 20, random_seed: 1)
+    #   estimator.fit(training_samples, traininig_target_values)
+    #   results = estimator.predict(testing_samples)
+    #
+    # *Reference*
+    # 1. S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
+    class SVR
+      include Base::BaseEstimator
+      include Base::Regressor
+      # Return the weight vector for SVC.
+      # @return [Numo::DFloat] (shape: [n_outputs, n_features])
+      attr_reader :weight_vec
+      # Return the bias term (a.k.a. intercept) for SVC.
+      # @return [Numo::DFloat] (shape: [n_outputs])
+      attr_reader :bias_term
+      # Return the random generator for performing random sampling.
+      # @return [Random]
+      attr_reader :rng
+      # Create a new regressor with Support Vector Machine by the SGD optimization.
+      #
+      # @param reg_param [Float] The regularization parameter.
+      # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
+      # @param bias_scale [Float] The scale of the bias term.
+      # @param epsilon [Float] The margin of tolerance.
+      # @param max_iter [Integer] The maximum number of iterations.
+      # @param batch_size [Integer] The size of the mini batches.
+      # @param normalize [Boolean] The flag indicating whether to normalize the weight vector.
+      # @param random_seed [Integer] The seed value using to initialize the random generator.
+      def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, epsilon: 0.1,
+                     max_iter: 100, batch_size: 50, normalize: true, random_seed: nil)
+        SVMKit::Validation.check_params_float(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon)
+        SVMKit::Validation.check_params_integer(max_iter: max_iter, batch_size: batch_size)
+        SVMKit::Validation.check_params_boolean(fit_bias: fit_bias, normalize: normalize)
+        SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
+        SVMKit::Validation.check_params_positive(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon,
+                                                 max_iter: max_iter, batch_size: batch_size)
+        @params = {}
+        @params[:reg_param] = reg_param
+        @params[:fit_bias] = fit_bias
+        @params[:bias_scale] = bias_scale
+        @params[:epsilon] = epsilon
+        @params[:max_iter] = max_iter
+        @params[:batch_size] = batch_size
+        @params[:normalize] = normalize
+        @params[:random_seed] = random_seed
+        @params[:random_seed] ||= srand
+        @weight_vec = nil
+        @bias_term = nil
+        @rng = Random.new(@params[:random_seed])
+      end
+      # Fit the model with given training data.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
+      # @return [SVR] The learned regressor itself.
+      def fit(x, y)
+        SVMKit::Validation.check_sample_array(x)
+        SVMKit::Validation.check_tvalue_array(y)
+        SVMKit::Validation.check_sample_tvalue_size(x, y)
+        n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
+        _n_samples, n_features = x.shape
+        if n_outputs > 1
+          @weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
+          @bias_term = Numo::DFloat.zeros(n_outputs)
+          n_outputs.times do |n|
+            weight, bias = single_fit(x, y[true, n])
+            @weight_vec[n, true] = weight
+            @bias_term[n] = bias
+          end
+        else
+          @weight_vec, @bias_term = single_fit(x, y)
+        end
+        self
+      end
+      # Predict values for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
+      # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
+      def predict(x)
+        SVMKit::Validation.check_sample_array(x)
+        x.dot(@weight_vec.transpose) + @bias_term
+      end
+      # Dump marshal data.
+      # @return [Hash] The marshal data about SVC.
+      def marshal_dump
+        { params: @params,
+          weight_vec: @weight_vec,
+          bias_term: @bias_term,
+          rng: @rng }
+      end
+      # Load marshal data.
+      # @return [nil]
+      def marshal_load(obj)
+        @params = obj[:params]
+        @weight_vec = obj[:weight_vec]
+        @bias_term = obj[:bias_term]
+        @rng = obj[:rng]
+        nil
+      end
+      private
+      def single_fit(x, y)
+        # Expand feature vectors for bias term.
+        samples = @params[:fit_bias] ? expand_feature(x) : x
+        # Initialize some variables.
+        n_samples, n_features = samples.shape
+        rand_ids = [*0...n_samples].shuffle(random: @rng)
+        weight_vec = Numo::DFloat.zeros(n_features)
+        # Start optimization.
+        @params[:max_iter].times do |t|
+          # random sampling
+          subset_ids = rand_ids.shift(@params[:batch_size])
+          rand_ids.concat(subset_ids)
+          # update the weight vector.
+          z = samples[subset_ids, true].dot(weight_vec.transpose)
+          coef = Numo::DFloat.zeros(@params[:batch_size])
+          coef[(z - y[subset_ids]).gt(@params[:epsilon]).where] = 1
+          coef[(y[subset_ids] - z).gt(@params[:epsilon]).where] = -1
+          mean_vec = samples[subset_ids, true].transpose.dot(coef) / @params[:batch_size]
+          weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec + mean_vec)
+          # scale the weight vector.
+          normalize_weight_vec(weight_vec) if @params[:normalize]
+        end
+        split_weight_vec_bias(weight_vec)
+      end
+      def expand_feature(x)
+        Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
+      end
+      def learning_rate(iter)
+        1.0 / (@params[:reg_param] * (iter + 1))
+      end
+      def normalize_weight_vec(weight_vec)
+        norm = Math.sqrt(weight_vec.dot(weight_vec))
+        weight_vec * [1.0, (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)].min
+      end
+      def split_weight_vec_bias(weight_vec)
+        weights = @params[:fit_bias] ? weight_vec[0...-1] : weight_vec
+        bias = @params[:fit_bias] ? weight_vec[-1] : 0.0
+        [weights, bias]
+      end
+    end
+  end
+end

data/lib/svmkit/model_selection/cross_validation.rb CHANGED Viewed

@@ -1,6 +1,12 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
+require 'svmkit/base/base_estimator'
+require 'svmkit/base/classifier'
+require 'svmkit/base/regressor'
 require 'svmkit/base/splitter'
+require 'svmkit/base/evaluator'
+require 'svmkit/evaluation_measure/log_loss'
 module SVMKit
   # This module consists of the classes for model validation techniques.
@@ -51,9 +57,9 @@ module SVMKit
       # Perform the evalution of given classifier with cross-validation method.
       #
       # @param x [Numo::DFloat] (shape: [n_samples, n_features])
-      #   The dataset to be used to evaluate the classifier.
-      # @param y [Numo::Int32] (shape: [n_samples])
-      #   The labels to be used to evaluate the classifier.
+      #   The dataset to be used to evaluate the estimator.
+      # @param y [Numo::Int32 / Numo::DFloat] (shape: [n_samples] / [n_samples, n_outputs])
+      #   The labels to be used to evaluate the classifier / The target values to be used to evaluate the regressor.
       # @return [Hash] The report summarizing the results of cross-validation.
       #   * :fit_time (Array<Float>) The calculation times of fitting the estimator for each split.
       #   * :test_score (Array<Float>) The scores of testing dataset for each split.
@@ -61,8 +67,14 @@ module SVMKit
       #     the return_train_score is false.
       def perform(x, y)
         SVMKit::Validation.check_sample_array(x)
-        SVMKit::Validation.check_label_array(y)
-        SVMKit::Validation.check_sample_label_size(x, y)
+        if @estimator.is_a?(SVMKit::Base::Classifier)
+          SVMKit::Validation.check_label_array(y)
+          SVMKit::Validation.check_sample_label_size(x, y)
+        end
+        if @estimator.is_a?(SVMKit::Base::Regressor)
+          SVMKit::Validation.check_tvalue_array(y)
+          SVMKit::Validation.check_sample_tvalue_size(x, y)
+        end
         # Initialize the report of cross validation.
         report = { test_score: [], train_score: nil, fit_time: [] }
         report[:train_score] = [] if @return_train_score
@@ -71,9 +83,9 @@ module SVMKit
           # Split dataset into training and testing dataset.
           feature_ids = !kernel_machine? || train_ids
           train_x = x[train_ids, feature_ids]
-          train_y = y[train_ids]
+          train_y = y.shape[1].nil? ? y[train_ids] : y[train_ids, true]
           test_x = x[test_ids, feature_ids]
-          test_y = y[test_ids]
+          test_y = y.shape[1].nil? ? y[test_ids] : y[test_ids, true]
           # Fit the estimator.
           start_time = Time.now.to_i
           @estimator.fit(train_x, train_y)

data/lib/svmkit/model_selection/k_fold.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/splitter'
 module SVMKit

data/lib/svmkit/model_selection/stratified_k_fold.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/splitter'
 module SVMKit

data/lib/svmkit/multiclass/one_vs_rest_classifier.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator.rb'
 require 'svmkit/base/classifier.rb'

data/lib/svmkit/naive_bayes/naive_bayes.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/classifier'

data/lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/classifier'

data/lib/svmkit/nearest_neighbors/k_neighbors_regressor.rb ADDED Viewed

@@ -0,0 +1,94 @@
+# frozen_string_literal: true
+require 'svmkit/validation'
+require 'svmkit/base/base_estimator'
+require 'svmkit/base/regressor'
+module SVMKit
+  module NearestNeighbors
+    # KNeighborsRegressor is a class that implements the regressor with the k-nearest neighbors rule.
+    # The current implementation uses the Euclidean distance for finding the neighbors.
+    #
+    # @example
+    #   estimator =
+    #     SVMKit::NearestNeighbor::KNeighborsRegressor.new(n_neighbors = 5)
+    #   estimator.fit(training_samples, traininig_target_values)
+    #   results = estimator.predict(testing_samples)
+    #
+    class KNeighborsRegressor
+      include Base::BaseEstimator
+      include Base::Regressor
+      # Return the prototypes for the nearest neighbor regressor.
+      # @return [Numo::DFloat] (shape: [n_samples, n_features])
+      attr_reader :prototypes
+      # Return the values of the prototypes
+      # @return [Numo::DFloat] (shape: [n_samples, n_outputs])
+      attr_reader :values
+      # Create a new regressor with the nearest neighbor rule.
+      #
+      # @param n_neighbors [Integer] The number of neighbors.
+      def initialize(n_neighbors: 5)
+        SVMKit::Validation.check_params_integer(n_neighbors: n_neighbors)
+        SVMKit::Validation.check_params_positive(n_neighbors: n_neighbors)
+        @params = {}
+        @params[:n_neighbors] = n_neighbors
+        @prototypes = nil
+        @values = nil
+      end
+      # Fit the model with given training data.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
+      # @return [KNeighborsRegressor] The learned regressor itself.
+      def fit(x, y)
+        SVMKit::Validation.check_sample_array(x)
+        SVMKit::Validation.check_tvalue_array(y)
+        SVMKit::Validation.check_sample_tvalue_size(x, y)
+        @prototypes = x.dup
+        @values = y.dup
+        self
+      end
+      # Predict values for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
+      # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
+      def predict(x)
+        SVMKit::Validation.check_sample_array(x)
+        # Initialize some variables.
+        n_samples, = x.shape
+        n_prototypes, n_outputs = @values.shape
+        n_neighbors = [@params[:n_neighbors], n_prototypes].min
+        # Calculate distance matrix.
+        distance_matrix = PairwiseMetric.euclidean_distance(x, @prototypes)
+        # Predict values for the given samples.
+        predicted_values = Array.new(n_samples) do |n|
+          neighbor_ids = distance_matrix[n, true].to_a.each_with_index.sort.map(&:last)[0...n_neighbors]
+          n_outputs.nil? ? @values[neighbor_ids].mean : @values[neighbor_ids, true].mean(0).to_a
+        end
+        Numo::DFloat[*predicted_values]
+      end
+      # Dump marshal data.
+      # @return [Hash] The marshal data about KNeighborsRegressor.
+      def marshal_dump
+        { params: @params,
+          prototypes: @prototypes,
+          values: @values }
+      end
+      # Load marshal data.
+      # @return [nil]
+      def marshal_load(obj)
+        @params = obj[:params]
+        @prototypes = obj[:prototypes]
+        @values = obj[:values]
+        nil
+      end
+    end
+  end
+end

data/lib/svmkit/pairwise_metric.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 module SVMKit
   # Module for calculating pairwise distances, similarities, and kernels.
   module PairwiseMetric

data/lib/svmkit/polynomial_model/factorization_machine_classifier.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/classifier'
@@ -110,7 +111,7 @@ module SVMKit
             @bias_term[n] = bias
           end
         else
-          negative_label = y.to_a.uniq.sort.first
+          negative_label = y.to_a.uniq.min
           bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
           @factor_mat, @weight_vec, @bias_term = binary_fit(x, bin_y)
         end

data/lib/svmkit/preprocessing/l2_normalizer.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/transformer'

data/lib/svmkit/preprocessing/label_encoder.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/transformer'

data/lib/svmkit/preprocessing/min_max_scaler.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/transformer'

data/lib/svmkit/preprocessing/one_hot_encoder.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/transformer'

data/lib/svmkit/preprocessing/standard_scaler.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/transformer'

data/lib/svmkit/probabilistic_output.rb CHANGED Viewed

@@ -27,7 +27,7 @@ module SVMKit
       def fit_sigmoid(df, bin_y, max_iter = 100, min_step = 1e-10, sigma = 1e-12)
         # Initialize some variables.
         n_samples = bin_y.size
-        negative_label = bin_y.to_a.uniq.sort.first
+        negative_label = bin_y.to_a.uniq.min
         pos = bin_y.ne(negative_label)
         neg = bin_y.eq(negative_label)
         n_pos_samples = pos.count

data/lib/svmkit/tree/decision_tree_classifier.rb CHANGED Viewed

@@ -1,8 +1,8 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/classifier'
-require 'ostruct'
 module SVMKit
   # This module consists of the classes that implement tree models.

data/lib/svmkit/validation.rb CHANGED Viewed

@@ -19,12 +19,24 @@ module SVMKit
       nil
     end
+    # @!visibility private
+    def check_tvalue_array(y)
+      raise TypeError, 'Expect class of target value vector to be Numo::DFloat' unless y.is_a?(Numo::DFloat)
+      nil
+    end
     # @!visibility private
     def check_sample_label_size(x, y)
       raise ArgumentError, 'Expect to have the same number of samples for sample matrix and label vector' unless x.shape[0] == y.shape[0]
       nil
     end
+    # @!visibility private
+    def check_sample_tvalue_size(x, y)
+      raise ArgumentError, 'Expect to have the same number of samples for sample matrix and target value vector' unless x.shape[0] == y.shape[0]
+      nil
+    end
     # @!visibility private
     def check_params_type(type, params = {})
       params.each { |k, v| raise TypeError, "Expect class of #{k} to be #{type}" unless v.is_a?(type) }

data/lib/svmkit/version.rb CHANGED Viewed

@@ -3,5 +3,5 @@
 # SVMKit is a machine learning library in Ruby.
 module SVMKit
   # @!visibility private
-  VERSION = '0.2.9'.freeze
+  VERSION = '0.3.0'.freeze
 end

data/lib/svmkit.rb CHANGED Viewed

@@ -9,16 +9,19 @@ require 'svmkit/dataset'
 require 'svmkit/probabilistic_output'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/classifier'
+require 'svmkit/base/regressor'
 require 'svmkit/base/transformer'
 require 'svmkit/base/splitter'
 require 'svmkit/base/evaluator'
 require 'svmkit/kernel_approximation/rbf'
 require 'svmkit/linear_model/svc'
+require 'svmkit/linear_model/svr'
 require 'svmkit/linear_model/logistic_regression'
 require 'svmkit/kernel_machine/kernel_svc'
 require 'svmkit/polynomial_model/factorization_machine_classifier'
 require 'svmkit/multiclass/one_vs_rest_classifier'
 require 'svmkit/nearest_neighbors/k_neighbors_classifier'
+require 'svmkit/nearest_neighbors/k_neighbors_regressor'
 require 'svmkit/naive_bayes/naive_bayes'
 require 'svmkit/tree/decision_tree_classifier'
 require 'svmkit/ensemble/random_forest_classifier'
@@ -35,3 +38,6 @@ require 'svmkit/evaluation_measure/precision'
 require 'svmkit/evaluation_measure/recall'
 require 'svmkit/evaluation_measure/f_score'
 require 'svmkit/evaluation_measure/log_loss'
+require 'svmkit/evaluation_measure/r2_score'
+require 'svmkit/evaluation_measure/mean_squared_error'
+require 'svmkit/evaluation_measure/mean_absolute_error'

data/svmkit.gemspec CHANGED Viewed

@@ -18,7 +18,7 @@ SVMKit is a machine learninig library in Ruby.
 SVMKit provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
 SVMKit currently supports Linear / Kernel Support Vector Machine,
 Logistic Regression, Factorization Machine, Naive Bayes, Decision Tree, Random Forest,
-K-nearest neighbor classifier, and cross-validation.
+K-nearest neighbor algorithm, and cross-validation.
 MSG
   spec.homepage      = 'https://github.com/yoshoku/svmkit'
   spec.license       = 'BSD-2-Clause'
@@ -38,13 +38,4 @@ MSG
   spec.add_development_dependency 'coveralls', '~> 0.8'
   spec.add_development_dependency 'rake', '~> 12.0'
   spec.add_development_dependency 'rspec', '~> 3.0'
-  spec.post_install_message = <<MSG
-*************************************************************************
-Thank you for installing SVMKit!!
-Note that the SVMKit has been changed to use Numo::NArray for
-linear algebra library from version 0.2.0.
-*************************************************************************
-MSG
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: svmkit
 version: !ruby/object:Gem::Version
-  version: 0.2.9
+  version: 0.3.0
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2018-05-02 00:00:00.000000000 Z
+date: 2018-05-13 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: numo-narray
@@ -85,7 +85,7 @@ description: |
   SVMKit provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
   SVMKit currently supports Linear / Kernel Support Vector Machine,
   Logistic Regression, Factorization Machine, Naive Bayes, Decision Tree, Random Forest,
-  K-nearest neighbor classifier, and cross-validation.
+  K-nearest neighbor algorithm, and cross-validation.
 email:
 - yoshoku@outlook.com
 executables: []
@@ -110,6 +110,7 @@ files:
 - lib/svmkit/base/base_estimator.rb
 - lib/svmkit/base/classifier.rb
 - lib/svmkit/base/evaluator.rb
+- lib/svmkit/base/regressor.rb
 - lib/svmkit/base/splitter.rb
 - lib/svmkit/base/transformer.rb
 - lib/svmkit/dataset.rb
@@ -117,19 +118,24 @@ files:
 - lib/svmkit/evaluation_measure/accuracy.rb
 - lib/svmkit/evaluation_measure/f_score.rb
 - lib/svmkit/evaluation_measure/log_loss.rb
+- lib/svmkit/evaluation_measure/mean_absolute_error.rb
+- lib/svmkit/evaluation_measure/mean_squared_error.rb
 - lib/svmkit/evaluation_measure/precision.rb
 - lib/svmkit/evaluation_measure/precision_recall.rb
+- lib/svmkit/evaluation_measure/r2_score.rb
 - lib/svmkit/evaluation_measure/recall.rb
 - lib/svmkit/kernel_approximation/rbf.rb
 - lib/svmkit/kernel_machine/kernel_svc.rb
 - lib/svmkit/linear_model/logistic_regression.rb
 - lib/svmkit/linear_model/svc.rb
+- lib/svmkit/linear_model/svr.rb
 - lib/svmkit/model_selection/cross_validation.rb
 - lib/svmkit/model_selection/k_fold.rb
 - lib/svmkit/model_selection/stratified_k_fold.rb
 - lib/svmkit/multiclass/one_vs_rest_classifier.rb
 - lib/svmkit/naive_bayes/naive_bayes.rb
 - lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb
+- lib/svmkit/nearest_neighbors/k_neighbors_regressor.rb
 - lib/svmkit/pairwise_metric.rb
 - lib/svmkit/polynomial_model/factorization_machine_classifier.rb
 - lib/svmkit/preprocessing/l2_normalizer.rb
@@ -146,13 +152,7 @@ homepage: https://github.com/yoshoku/svmkit
 licenses:
 - BSD-2-Clause
 metadata: {}
-post_install_message: |
-  *************************************************************************
-  Thank you for installing SVMKit!!
-  Note that the SVMKit has been changed to use Numo::NArray for
-  linear algebra library from version 0.2.0.
-  *************************************************************************
+post_install_message:
 rdoc_options: []
 require_paths:
 - lib