RubyGems - svmkit - Versions diffs - 0.2.9 → 0.3.0 - Mend

svmkit 0.2.9 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

checksums.yaml +4 -4
data/.rubocop.yml +2 -2
data/HISTORY.md +11 -0
data/lib/svmkit/base/classifier.rb +4 -1
data/lib/svmkit/base/regressor.rb +34 -0
data/lib/svmkit/ensemble/random_forest_classifier.rb +1 -0
data/lib/svmkit/evaluation_measure/accuracy.rb +2 -1
data/lib/svmkit/evaluation_measure/f_score.rb +2 -1
data/lib/svmkit/evaluation_measure/log_loss.rb +4 -2
data/lib/svmkit/evaluation_measure/mean_absolute_error.rb +30 -0
data/lib/svmkit/evaluation_measure/mean_squared_error.rb +30 -0
data/lib/svmkit/evaluation_measure/precision.rb +2 -1
data/lib/svmkit/evaluation_measure/r2_score.rb +44 -0
data/lib/svmkit/evaluation_measure/recall.rb +2 -1
data/lib/svmkit/kernel_approximation/rbf.rb +1 -0
data/lib/svmkit/kernel_machine/kernel_svc.rb +3 -1
data/lib/svmkit/linear_model/logistic_regression.rb +2 -1
data/lib/svmkit/linear_model/svc.rb +3 -1
data/lib/svmkit/linear_model/svr.rb +172 -0
data/lib/svmkit/model_selection/cross_validation.rb +19 -7
data/lib/svmkit/model_selection/k_fold.rb +1 -0
data/lib/svmkit/model_selection/stratified_k_fold.rb +1 -0
data/lib/svmkit/multiclass/one_vs_rest_classifier.rb +1 -0
data/lib/svmkit/naive_bayes/naive_bayes.rb +1 -0
data/lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb +1 -0
data/lib/svmkit/nearest_neighbors/k_neighbors_regressor.rb +94 -0
data/lib/svmkit/pairwise_metric.rb +2 -0
data/lib/svmkit/polynomial_model/factorization_machine_classifier.rb +2 -1
data/lib/svmkit/preprocessing/l2_normalizer.rb +1 -0
data/lib/svmkit/preprocessing/label_encoder.rb +1 -0
data/lib/svmkit/preprocessing/min_max_scaler.rb +1 -0
data/lib/svmkit/preprocessing/one_hot_encoder.rb +1 -0
data/lib/svmkit/preprocessing/standard_scaler.rb +1 -0
data/lib/svmkit/probabilistic_output.rb +1 -1
data/lib/svmkit/tree/decision_tree_classifier.rb +1 -1
data/lib/svmkit/validation.rb +12 -0
data/lib/svmkit/version.rb +1 -1
data/lib/svmkit.rb +6 -0
data/svmkit.gemspec +1 -10
metadata +10 -10

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 533508a3afd82d2bae3ddea3a5669f6d389688155d44649fd3eafaaff8207e0f
-  data.tar.gz: 43ff09b3bab72b68bc7a6b3740902be64508496337a4cde61057d33b91d0f349
+  metadata.gz: 855d3ac2dcfbfde9eb82a4661f17cebb75b4b7c57ba37ee26a8aa03d0f1ccab8
+  data.tar.gz: 13ec3e84fd6f4fcd973d164b3d6f395c024e42095eae63819bea6ef4179697d9
 SHA512:
-  metadata.gz: e1c1bed8269d3c768d75bd8a5e731b5d2da689ef7a235a70c5ea87090aac79889c9fe0a004eca73c3015aae42d068f44b2b1e3a61a03b641607b2909441513b6
-  data.tar.gz: 80a18ca4ec7eb2740148829024f0625c835f24b771bb321168d0cc3233d8e152257b5515355d99a968dc25a670f9a69f3e30b42bf190757206a64bbcd2babcd6
+  metadata.gz: be8b4e4528e70ab99c8b9f1ad0d93d717b0359d30ca9b142b4d5cb44b6b3875cc9d97f199021ab48ab3b9cc1e635f37c14dbb15efe81b55de673d83b65cc10ab
+  data.tar.gz: 9c8ae85dc3ca8dd7fe6bd15bd4b3eb46b775b32923691a12f9621032bcbffea7a3405404a9b8924f68a344d771cac56f439e6e2c024f68deacd00d7358c3c4f4

data/.rubocop.yml CHANGED Viewed

@@ -1,7 +1,7 @@
 inherit_from: .rubocop_todo.yml
 AllCops:
-  TargetRubyVersion: 2.2
+  TargetRubyVersion: 2.1
   DisplayCopNames: true
   DisplayStyleGuide: true
@@ -9,7 +9,7 @@ Documentation:
   Enabled: false
 Metrics/LineLength:
-  Max: 140
+  Max: 145
   IgnoredPatterns: ['(\A|\s)#']
 Metrics/ModuleLength:

data/HISTORY.md CHANGED Viewed

@@ -1,3 +1,14 @@
+# 0.3.0
+- Add class for Support Vector Regression.
+- Add class for K-Nearest Neighbor Regression.
+- Add class for evaluating coefficient of determination.
+- Add class for evaluating mean squared error.
+- Add class for evaluating mean absolute error.
+- Fix to use min method instead of sort and first methods.
+- Fix cross validation class to be able to use for regression problem.
+- Fix some typos on document.
+- Rename spec filename for Factorization Machine classifier.
 # 0.2.9
 - Add predict_proba method to SVC and KernelSVC.
 - Add class for evaluating logarithmic loss.

data/lib/svmkit/base/classifier.rb CHANGED Viewed

@@ -1,5 +1,8 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
+require 'svmkit/evaluation_measure/accuracy'
 module SVMKit
   module Base
     # Module for all classifiers in SVMKit.
@@ -14,7 +17,7 @@ module SVMKit
         raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
       end
-      # Claculate the mean accuracy of the given testing data.
+      # Calculate the mean accuracy of the given testing data.
       #
       # @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
       # @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.

data/lib/svmkit/base/regressor.rb ADDED Viewed

@@ -0,0 +1,34 @@
+# frozen_string_literal: true
+require 'svmkit/validation'
+require 'svmkit/evaluation_measure/r2_score'
+module SVMKit
+  module Base
+    # Module for all regressors in SVMKit.
+    module Regressor
+      # An abstract method for fitting a model.
+      def fit
+        raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
+      end
+      # An abstract method for predicting labels.
+      def predict
+        raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
+      end
+      # Calculate the coefficient of determination for the given testing data.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
+      # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) Target values for testing data.
+      # @return [Float] Coefficient of determination
+      def score(x, y)
+        SVMKit::Validation.check_sample_array(x)
+        SVMKit::Validation.check_tvalue_array(y)
+        SVMKit::Validation.check_sample_tvalue_size(x, y)
+        evaluator = SVMKit::EvaluationMeasure::R2Score.new
+        evaluator.score(y, predict(x))
+      end
+    end
+  end
+end

data/lib/svmkit/ensemble/random_forest_classifier.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/classifier'

data/lib/svmkit/evaluation_measure/accuracy.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/evaluator'
 module SVMKit
@@ -13,7 +14,7 @@ module SVMKit
     class Accuracy
       include Base::Evaluator
-      # Claculate mean accuracy.
+      # Calculate mean accuracy.
       #
       # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
       # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.

data/lib/svmkit/evaluation_measure/f_score.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/evaluator'
 require 'svmkit/evaluation_measure/precision_recall'
@@ -27,7 +28,7 @@ module SVMKit
         @average = average
       end
-      # Claculate average F1-score
+      # Calculate average F1-score
       #
       # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
       # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.

data/lib/svmkit/evaluation_measure/log_loss.rb CHANGED Viewed

@@ -1,6 +1,8 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/evaluator'
+require 'svmkit/preprocessing/one_hot_encoder'
 module SVMKit
   module EvaluationMeasure
@@ -12,7 +14,7 @@ module SVMKit
     class LogLoss
       include Base::Evaluator
-      # Claculate mean logarithmic loss.
+      # Calculate mean logarithmic loss.
       # If both y_true and y_pred are array (both shapes are [n_samples]), this method calculates
       # mean logarithmic loss for binary classification.
       #
@@ -28,7 +30,7 @@ module SVMKit
         clipped_p = y_pred.clip(eps, 1 - eps)
         log_loss = if n_classes.nil?
-                     negative_label = y_true.to_a.uniq.sort.first
+                     negative_label = y_true.to_a.uniq.min
                      bin_y_true = Numo::DFloat.cast(y_true.ne(negative_label))
                      -(bin_y_true * Numo::NMath.log(clipped_p) + (1 - bin_y_true) * Numo::NMath.log(1 - clipped_p))
                    else

data/lib/svmkit/evaluation_measure/mean_absolute_error.rb ADDED Viewed

@@ -0,0 +1,30 @@
+# frozen_string_literal: true
+require 'svmkit/validation'
+require 'svmkit/base/evaluator'
+module SVMKit
+  module EvaluationMeasure
+    # MeanAbsoluteError is a class that calculates the mean absolute error.
+    #
+    # @example
+    #   evaluator = SVMKit::EvaluationMeasure::MeanAbsoluteError.new
+    #   puts evaluator.score(ground_truth, predicted)
+    class MeanAbsoluteError
+      include Base::Evaluator
+      # Calculate mean absolute error.
+      #
+      # @param y_true [Numo::DFloat] (shape: [n_samples, n_outputs]) Ground truth target values.
+      # @param y_pred [Numo::DFloat] (shape: [n_samples, n_outputs]) Estimated target values.
+      # @return [Float] Mean absolute error
+      def score(y_true, y_pred)
+        SVMKit::Validation.check_tvalue_array(y_true)
+        SVMKit::Validation.check_tvalue_array(y_pred)
+        raise ArgumentError, 'Expect to have the same size both y_true and y_pred.' unless y_true.shape == y_pred.shape
+        (y_true - y_pred).abs.mean
+      end
+    end
+  end
+end

data/lib/svmkit/evaluation_measure/mean_squared_error.rb ADDED Viewed

@@ -0,0 +1,30 @@
+# frozen_string_literal: true
+require 'svmkit/validation'
+require 'svmkit/base/evaluator'
+module SVMKit
+  module EvaluationMeasure
+    # MeanSquaredError is a class that calculates the mean squared error.
+    #
+    # @example
+    #   evaluator = SVMKit::EvaluationMeasure::MeanSquaredError.new
+    #   puts evaluator.score(ground_truth, predicted)
+    class MeanSquaredError
+      include Base::Evaluator
+      # Calculate mean squared error.
+      #
+      # @param y_true [Numo::DFloat] (shape: [n_samples, n_outputs]) Ground truth target values.
+      # @param y_pred [Numo::DFloat] (shape: [n_samples, n_outputs]) Estimated target values.
+      # @return [Float] Mean squared error
+      def score(y_true, y_pred)
+        SVMKit::Validation.check_tvalue_array(y_true)
+        SVMKit::Validation.check_tvalue_array(y_pred)
+        raise ArgumentError, 'Expect to have the same size both y_true and y_pred.' unless y_true.shape == y_pred.shape
+        ((y_true - y_pred)**2).mean
+      end
+    end
+  end
+end

data/lib/svmkit/evaluation_measure/precision.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/evaluator'
 require 'svmkit/evaluation_measure/precision_recall'
@@ -27,7 +28,7 @@ module SVMKit
         @average = average
       end
-      # Claculate average precision.
+      # Calculate average precision.
       #
       # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
       # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.

data/lib/svmkit/evaluation_measure/r2_score.rb ADDED Viewed

@@ -0,0 +1,44 @@
+# frozen_string_literal: true
+require 'svmkit/validation'
+require 'svmkit/base/evaluator'
+require 'svmkit/evaluation_measure/precision_recall'
+module SVMKit
+  module EvaluationMeasure
+    # R2Score is a class that calculates the coefficient of determination for the predicted values.
+    #
+    # @example
+    #   evaluator = SVMKit::EvaluationMeasure::R2Score.new
+    #   puts evaluator.score(ground_truth, predicted)
+    class R2Score
+      include Base::Evaluator
+      # Create a new evaluation measure calculater for coefficient of determination.
+      def initialize; end
+      # Calculate the coefficient of determination.
+      #
+      # @param y_true [Numo::DFloat] (shape: [n_samples, n_outputs]) Ground truth target values.
+      # @param y_pred [Numo::DFloat] (shape: [n_samples, n_outputs]) Estimated taget values.
+      # @return [Float] Coefficient of determination
+      def score(y_true, y_pred)
+        SVMKit::Validation.check_tvalue_array(y_true)
+        SVMKit::Validation.check_tvalue_array(y_pred)
+        raise ArgumentError, 'Expect to have the same size both y_true and y_pred.' unless y_true.shape == y_pred.shape
+        n_samples, n_outputs = y_true.shape
+        numerator = ((y_true - y_pred)**2).sum(0)
+        yt_mean = y_true.sum(0) / n_samples
+        denominator = ((y_true - yt_mean)**2).sum(0)
+        if n_outputs.nil?
+          denominator.zero? ? 0.0 : 1.0 - numerator / denominator
+        else
+          scores = 1 - numerator / denominator
+          scores[denominator.eq(0)] = 0.0
+          scores.sum / scores.size
+        end
+      end
+    end
+  end
+end

data/lib/svmkit/evaluation_measure/recall.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/evaluator'
 require 'svmkit/evaluation_measure/precision_recall'
@@ -27,7 +28,7 @@ module SVMKit
         @average = average
       end
-      # Claculate average recall
+      # Calculate average recall
       #
       # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
       # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.

data/lib/svmkit/kernel_approximation/rbf.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/transformer'

data/lib/svmkit/kernel_machine/kernel_svc.rb CHANGED Viewed

@@ -1,7 +1,9 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/classifier'
+require 'svmkit/probabilistic_output'
 module SVMKit
   # This module consists of the classes that implement kernel method-based estimator.
@@ -88,7 +90,7 @@ module SVMKit
                                    end
           end
         else
-          negative_label = y.to_a.uniq.sort.first
+          negative_label = y.to_a.uniq.min
           bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
           @weight_vec = binary_fit(x, bin_y)
           @prob_param = if @params[:probability]

data/lib/svmkit/linear_model/logistic_regression.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/classifier'
@@ -95,7 +96,7 @@ module SVMKit
             @bias_term[n] = bias
           end
         else
-          negative_label = y.to_a.uniq.sort.first
+          negative_label = y.to_a.uniq.min
           bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
           @weight_vec, @bias_term = binary_fit(x, bin_y)
         end

data/lib/svmkit/linear_model/svc.rb CHANGED Viewed

@@ -1,7 +1,9 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/classifier'
+require 'svmkit/probabilistic_output'
 module SVMKit
   # This module consists of the classes that implement generalized linear models.
@@ -103,7 +105,7 @@ module SVMKit
                                    end
           end
         else
-          negative_label = y.to_a.uniq.sort.first
+          negative_label = y.to_a.uniq.min
           bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
           @weight_vec, @bias_term = binary_fit(x, bin_y)
           @prob_param = if @params[:probability]

data/lib/svmkit/linear_model/svr.rb ADDED Viewed

@@ -0,0 +1,172 @@
+# frozen_string_literal: true
+require 'svmkit/validation'
+require 'svmkit/base/base_estimator'
+require 'svmkit/base/regressor'
+module SVMKit
+  module LinearModel
+    # SVR is a class that implements Support Vector Regressor
+    # with stochastic gradient descent (SGD) optimization.
+    #
+    # @example
+    #   estimator =
+    #     SVMKit::LinearModel::SVR.new(reg_param: 1.0, epsilon: 0.1, max_iter: 100, batch_size: 20, random_seed: 1)
+    #   estimator.fit(training_samples, traininig_target_values)
+    #   results = estimator.predict(testing_samples)
+    #
+    # *Reference*
+    # 1. S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
+    class SVR
+      include Base::BaseEstimator
+      include Base::Regressor
+      # Return the weight vector for SVC.
+      # @return [Numo::DFloat] (shape: [n_outputs, n_features])
+      attr_reader :weight_vec
+      # Return the bias term (a.k.a. intercept) for SVC.
+      # @return [Numo::DFloat] (shape: [n_outputs])
+      attr_reader :bias_term
+      # Return the random generator for performing random sampling.
+      # @return [Random]
+      attr_reader :rng
+      # Create a new regressor with Support Vector Machine by the SGD optimization.
+      #
+      # @param reg_param [Float] The regularization parameter.
+      # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
+      # @param bias_scale [Float] The scale of the bias term.
+      # @param epsilon [Float] The margin of tolerance.
+      # @param max_iter [Integer] The maximum number of iterations.
+      # @param batch_size [Integer] The size of the mini batches.
+      # @param normalize [Boolean] The flag indicating whether to normalize the weight vector.
+      # @param random_seed [Integer] The seed value using to initialize the random generator.
+      def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, epsilon: 0.1,
+                     max_iter: 100, batch_size: 50, normalize: true, random_seed: nil)
+        SVMKit::Validation.check_params_float(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon)
+        SVMKit::Validation.check_params_integer(max_iter: max_iter, batch_size: batch_size)
+        SVMKit::Validation.check_params_boolean(fit_bias: fit_bias, normalize: normalize)
+        SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
+        SVMKit::Validation.check_params_positive(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon,
+                                                 max_iter: max_iter, batch_size: batch_size)
+        @params = {}
+        @params[:reg_param] = reg_param
+        @params[:fit_bias] = fit_bias
+        @params[:bias_scale] = bias_scale
+        @params[:epsilon] = epsilon
+        @params[:max_iter] = max_iter
+        @params[:batch_size] = batch_size
+        @params[:normalize] = normalize
+        @params[:random_seed] = random_seed
+        @params[:random_seed] ||= srand
+        @weight_vec = nil
+        @bias_term = nil
+        @rng = Random.new(@params[:random_seed])
+      end
+      # Fit the model with given training data.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
+      # @return [SVR] The learned regressor itself.
+      def fit(x, y)
+        SVMKit::Validation.check_sample_array(x)
+        SVMKit::Validation.check_tvalue_array(y)
+        SVMKit::Validation.check_sample_tvalue_size(x, y)
+        n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
+        _n_samples, n_features = x.shape
+        if n_outputs > 1
+          @weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
+          @bias_term = Numo::DFloat.zeros(n_outputs)
+          n_outputs.times do |n|
+            weight, bias = single_fit(x, y[true, n])
+            @weight_vec[n, true] = weight
+            @bias_term[n] = bias
+          end
+        else
+          @weight_vec, @bias_term = single_fit(x, y)
+        end
+        self
+      end
+      # Predict values for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
+      # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
+      def predict(x)
+        SVMKit::Validation.check_sample_array(x)
+        x.dot(@weight_vec.transpose) + @bias_term
+      end
+      # Dump marshal data.
+      # @return [Hash] The marshal data about SVC.
+      def marshal_dump
+        { params: @params,
+          weight_vec: @weight_vec,
+          bias_term: @bias_term,
+          rng: @rng }
+      end
+      # Load marshal data.
+      # @return [nil]
+      def marshal_load(obj)
+        @params = obj[:params]
+        @weight_vec = obj[:weight_vec]
+        @bias_term = obj[:bias_term]
+        @rng = obj[:rng]
+        nil
+      end
+      private
+      def single_fit(x, y)
+        # Expand feature vectors for bias term.
+        samples = @params[:fit_bias] ? expand_feature(x) : x
+        # Initialize some variables.
+        n_samples, n_features = samples.shape
+        rand_ids = [*0...n_samples].shuffle(random: @rng)
+        weight_vec = Numo::DFloat.zeros(n_features)
+        # Start optimization.
+        @params[:max_iter].times do |t|
+          # random sampling
+          subset_ids = rand_ids.shift(@params[:batch_size])
+          rand_ids.concat(subset_ids)
+          # update the weight vector.
+          z = samples[subset_ids, true].dot(weight_vec.transpose)
+          coef = Numo::DFloat.zeros(@params[:batch_size])
+          coef[(z - y[subset_ids]).gt(@params[:epsilon]).where] = 1
+          coef[(y[subset_ids] - z).gt(@params[:epsilon]).where] = -1
+          mean_vec = samples[subset_ids, true].transpose.dot(coef) / @params[:batch_size]
+          weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec + mean_vec)
+          # scale the weight vector.
+          normalize_weight_vec(weight_vec) if @params[:normalize]
+        end
+        split_weight_vec_bias(weight_vec)
+      end
+      def expand_feature(x)
+        Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
+      end
+      def learning_rate(iter)
+        1.0 / (@params[:reg_param] * (iter + 1))
+      end
+      def normalize_weight_vec(weight_vec)
+        norm = Math.sqrt(weight_vec.dot(weight_vec))
+        weight_vec * [1.0, (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)].min
+      end
+      def split_weight_vec_bias(weight_vec)
+        weights = @params[:fit_bias] ? weight_vec[0...-1] : weight_vec
+        bias = @params[:fit_bias] ? weight_vec[-1] : 0.0
+        [weights, bias]
+      end
+    end
+  end
+end

data/lib/svmkit/model_selection/cross_validation.rb CHANGED Viewed

@@ -1,6 +1,12 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
+require 'svmkit/base/base_estimator'
+require 'svmkit/base/classifier'
+require 'svmkit/base/regressor'
 require 'svmkit/base/splitter'
+require 'svmkit/base/evaluator'
+require 'svmkit/evaluation_measure/log_loss'
 module SVMKit
   # This module consists of the classes for model validation techniques.
@@ -51,9 +57,9 @@ module SVMKit
       # Perform the evalution of given classifier with cross-validation method.
       #
       # @param x [Numo::DFloat] (shape: [n_samples, n_features])
-      #   The dataset to be used to evaluate the classifier.
-      # @param y [Numo::Int32] (shape: [n_samples])
-      #   The labels to be used to evaluate the classifier.
+      #   The dataset to be used to evaluate the estimator.
+      # @param y [Numo::Int32 / Numo::DFloat] (shape: [n_samples] / [n_samples, n_outputs])
+      #   The labels to be used to evaluate the classifier / The target values to be used to evaluate the regressor.
       # @return [Hash] The report summarizing the results of cross-validation.
       #   * :fit_time (Array<Float>) The calculation times of fitting the estimator for each split.
       #   * :test_score (Array<Float>) The scores of testing dataset for each split.
@@ -61,8 +67,14 @@ module SVMKit
       #     the return_train_score is false.
       def perform(x, y)
         SVMKit::Validation.check_sample_array(x)
-        SVMKit::Validation.check_label_array(y)
-        SVMKit::Validation.check_sample_label_size(x, y)
+        if @estimator.is_a?(SVMKit::Base::Classifier)
+          SVMKit::Validation.check_label_array(y)
+          SVMKit::Validation.check_sample_label_size(x, y)
+        end
+        if @estimator.is_a?(SVMKit::Base::Regressor)
+          SVMKit::Validation.check_tvalue_array(y)
+          SVMKit::Validation.check_sample_tvalue_size(x, y)
+        end
         # Initialize the report of cross validation.
         report = { test_score: [], train_score: nil, fit_time: [] }
         report[:train_score] = [] if @return_train_score
@@ -71,9 +83,9 @@ module SVMKit
           # Split dataset into training and testing dataset.
           feature_ids = !kernel_machine? || train_ids
           train_x = x[train_ids, feature_ids]
-          train_y = y[train_ids]
+          train_y = y.shape[1].nil? ? y[train_ids] : y[train_ids, true]
           test_x = x[test_ids, feature_ids]
-          test_y = y[test_ids]
+          test_y = y.shape[1].nil? ? y[test_ids] : y[test_ids, true]
           # Fit the estimator.
           start_time = Time.now.to_i
           @estimator.fit(train_x, train_y)

data/lib/svmkit/model_selection/k_fold.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/splitter'
 module SVMKit

data/lib/svmkit/model_selection/stratified_k_fold.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/splitter'
 module SVMKit

data/lib/svmkit/multiclass/one_vs_rest_classifier.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator.rb'
 require 'svmkit/base/classifier.rb'

data/lib/svmkit/naive_bayes/naive_bayes.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/classifier'

data/lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/classifier'

data/lib/svmkit/nearest_neighbors/k_neighbors_regressor.rb ADDED Viewed

@@ -0,0 +1,94 @@
+# frozen_string_literal: true
+require 'svmkit/validation'
+require 'svmkit/base/base_estimator'
+require 'svmkit/base/regressor'
+module SVMKit
+  module NearestNeighbors
+    # KNeighborsRegressor is a class that implements the regressor with the k-nearest neighbors rule.
+    # The current implementation uses the Euclidean distance for finding the neighbors.
+    #
+    # @example
+    #   estimator =
+    #     SVMKit::NearestNeighbor::KNeighborsRegressor.new(n_neighbors = 5)
+    #   estimator.fit(training_samples, traininig_target_values)
+    #   results = estimator.predict(testing_samples)
+    #
+    class KNeighborsRegressor
+      include Base::BaseEstimator
+      include Base::Regressor
+      # Return the prototypes for the nearest neighbor regressor.
+      # @return [Numo::DFloat] (shape: [n_samples, n_features])
+      attr_reader :prototypes
+      # Return the values of the prototypes
+      # @return [Numo::DFloat] (shape: [n_samples, n_outputs])
+      attr_reader :values
+      # Create a new regressor with the nearest neighbor rule.
+      #
+      # @param n_neighbors [Integer] The number of neighbors.
+      def initialize(n_neighbors: 5)
+        SVMKit::Validation.check_params_integer(n_neighbors: n_neighbors)
+        SVMKit::Validation.check_params_positive(n_neighbors: n_neighbors)
+        @params = {}
+        @params[:n_neighbors] = n_neighbors
+        @prototypes = nil
+        @values = nil
+      end
+      # Fit the model with given training data.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
+      # @return [KNeighborsRegressor] The learned regressor itself.
+      def fit(x, y)
+        SVMKit::Validation.check_sample_array(x)
+        SVMKit::Validation.check_tvalue_array(y)
+        SVMKit::Validation.check_sample_tvalue_size(x, y)
+        @prototypes = x.dup
+        @values = y.dup
+        self
+      end
+      # Predict values for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
+      # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
+      def predict(x)
+        SVMKit::Validation.check_sample_array(x)
+        # Initialize some variables.
+        n_samples, = x.shape
+        n_prototypes, n_outputs = @values.shape
+        n_neighbors = [@params[:n_neighbors], n_prototypes].min
+        # Calculate distance matrix.
+        distance_matrix = PairwiseMetric.euclidean_distance(x, @prototypes)
+        # Predict values for the given samples.
+        predicted_values = Array.new(n_samples) do |n|
+          neighbor_ids = distance_matrix[n, true].to_a.each_with_index.sort.map(&:last)[0...n_neighbors]
+          n_outputs.nil? ? @values[neighbor_ids].mean : @values[neighbor_ids, true].mean(0).to_a
+        end
+        Numo::DFloat[*predicted_values]
+      end
+      # Dump marshal data.
+      # @return [Hash] The marshal data about KNeighborsRegressor.
+      def marshal_dump
+        { params: @params,
+          prototypes: @prototypes,
+          values: @values }
+      end
+      # Load marshal data.
+      # @return [nil]
+      def marshal_load(obj)
+        @params = obj[:params]
+        @prototypes = obj[:prototypes]
+        @values = obj[:values]
+        nil
+      end
+    end
+  end
+end

data/lib/svmkit/pairwise_metric.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 module SVMKit
   # Module for calculating pairwise distances, similarities, and kernels.
   module PairwiseMetric

data/lib/svmkit/polynomial_model/factorization_machine_classifier.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/classifier'
@@ -110,7 +111,7 @@ module SVMKit
             @bias_term[n] = bias
           end
         else
-          negative_label = y.to_a.uniq.sort.first
+          negative_label = y.to_a.uniq.min
           bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
           @factor_mat, @weight_vec, @bias_term = binary_fit(x, bin_y)
         end

data/lib/svmkit/preprocessing/l2_normalizer.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/transformer'

data/lib/svmkit/preprocessing/label_encoder.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/transformer'

data/lib/svmkit/preprocessing/min_max_scaler.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/transformer'

data/lib/svmkit/preprocessing/one_hot_encoder.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/transformer'

data/lib/svmkit/preprocessing/standard_scaler.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/transformer'

data/lib/svmkit/probabilistic_output.rb CHANGED Viewed

@@ -27,7 +27,7 @@ module SVMKit
       def fit_sigmoid(df, bin_y, max_iter = 100, min_step = 1e-10, sigma = 1e-12)
         # Initialize some variables.
         n_samples = bin_y.size
-        negative_label = bin_y.to_a.uniq.sort.first
+        negative_label = bin_y.to_a.uniq.min
         pos = bin_y.ne(negative_label)
         neg = bin_y.eq(negative_label)
         n_pos_samples = pos.count

data/lib/svmkit/tree/decision_tree_classifier.rb CHANGED Viewed

@@ -1,8 +1,8 @@
 # frozen_string_literal: true
+require 'svmkit/validation'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/classifier'
-require 'ostruct'
 module SVMKit
   # This module consists of the classes that implement tree models.

data/lib/svmkit/validation.rb CHANGED Viewed

@@ -19,12 +19,24 @@ module SVMKit
       nil
     end
+    # @!visibility private
+    def check_tvalue_array(y)
+      raise TypeError, 'Expect class of target value vector to be Numo::DFloat' unless y.is_a?(Numo::DFloat)
+      nil
+    end
     # @!visibility private
     def check_sample_label_size(x, y)
       raise ArgumentError, 'Expect to have the same number of samples for sample matrix and label vector' unless x.shape[0] == y.shape[0]
       nil
     end
+    # @!visibility private
+    def check_sample_tvalue_size(x, y)
+      raise ArgumentError, 'Expect to have the same number of samples for sample matrix and target value vector' unless x.shape[0] == y.shape[0]
+      nil
+    end
     # @!visibility private
     def check_params_type(type, params = {})
       params.each { |k, v| raise TypeError, "Expect class of #{k} to be #{type}" unless v.is_a?(type) }

data/lib/svmkit/version.rb CHANGED Viewed

@@ -3,5 +3,5 @@
 # SVMKit is a machine learning library in Ruby.
 module SVMKit
   # @!visibility private
-  VERSION = '0.2.9'.freeze
+  VERSION = '0.3.0'.freeze
 end

data/lib/svmkit.rb CHANGED Viewed

@@ -9,16 +9,19 @@ require 'svmkit/dataset'
 require 'svmkit/probabilistic_output'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/classifier'
+require 'svmkit/base/regressor'
 require 'svmkit/base/transformer'
 require 'svmkit/base/splitter'
 require 'svmkit/base/evaluator'
 require 'svmkit/kernel_approximation/rbf'
 require 'svmkit/linear_model/svc'
+require 'svmkit/linear_model/svr'
 require 'svmkit/linear_model/logistic_regression'
 require 'svmkit/kernel_machine/kernel_svc'
 require 'svmkit/polynomial_model/factorization_machine_classifier'
 require 'svmkit/multiclass/one_vs_rest_classifier'
 require 'svmkit/nearest_neighbors/k_neighbors_classifier'
+require 'svmkit/nearest_neighbors/k_neighbors_regressor'
 require 'svmkit/naive_bayes/naive_bayes'
 require 'svmkit/tree/decision_tree_classifier'
 require 'svmkit/ensemble/random_forest_classifier'
@@ -35,3 +38,6 @@ require 'svmkit/evaluation_measure/precision'
 require 'svmkit/evaluation_measure/recall'
 require 'svmkit/evaluation_measure/f_score'
 require 'svmkit/evaluation_measure/log_loss'
+require 'svmkit/evaluation_measure/r2_score'
+require 'svmkit/evaluation_measure/mean_squared_error'
+require 'svmkit/evaluation_measure/mean_absolute_error'

data/svmkit.gemspec CHANGED Viewed

@@ -18,7 +18,7 @@ SVMKit is a machine learninig library in Ruby.
 SVMKit provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
 SVMKit currently supports Linear / Kernel Support Vector Machine,
 Logistic Regression, Factorization Machine, Naive Bayes, Decision Tree, Random Forest,
-K-nearest neighbor classifier, and cross-validation.
+K-nearest neighbor algorithm, and cross-validation.
 MSG
   spec.homepage      = 'https://github.com/yoshoku/svmkit'
   spec.license       = 'BSD-2-Clause'
@@ -38,13 +38,4 @@ MSG
   spec.add_development_dependency 'coveralls', '~> 0.8'
   spec.add_development_dependency 'rake', '~> 12.0'
   spec.add_development_dependency 'rspec', '~> 3.0'
-  spec.post_install_message = <<MSG
-*************************************************************************
-Thank you for installing SVMKit!!
-Note that the SVMKit has been changed to use Numo::NArray for
-linear algebra library from version 0.2.0.
-*************************************************************************
-MSG
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: svmkit
 version: !ruby/object:Gem::Version
-  version: 0.2.9
+  version: 0.3.0
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2018-05-02 00:00:00.000000000 Z
+date: 2018-05-13 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: numo-narray
@@ -85,7 +85,7 @@ description: |
   SVMKit provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
   SVMKit currently supports Linear / Kernel Support Vector Machine,
   Logistic Regression, Factorization Machine, Naive Bayes, Decision Tree, Random Forest,
-  K-nearest neighbor classifier, and cross-validation.
+  K-nearest neighbor algorithm, and cross-validation.
 email:
 - yoshoku@outlook.com
 executables: []
@@ -110,6 +110,7 @@ files:
 - lib/svmkit/base/base_estimator.rb
 - lib/svmkit/base/classifier.rb
 - lib/svmkit/base/evaluator.rb
+- lib/svmkit/base/regressor.rb
 - lib/svmkit/base/splitter.rb
 - lib/svmkit/base/transformer.rb
 - lib/svmkit/dataset.rb
@@ -117,19 +118,24 @@ files:
 - lib/svmkit/evaluation_measure/accuracy.rb
 - lib/svmkit/evaluation_measure/f_score.rb
 - lib/svmkit/evaluation_measure/log_loss.rb
+- lib/svmkit/evaluation_measure/mean_absolute_error.rb
+- lib/svmkit/evaluation_measure/mean_squared_error.rb
 - lib/svmkit/evaluation_measure/precision.rb
 - lib/svmkit/evaluation_measure/precision_recall.rb
+- lib/svmkit/evaluation_measure/r2_score.rb
 - lib/svmkit/evaluation_measure/recall.rb
 - lib/svmkit/kernel_approximation/rbf.rb
 - lib/svmkit/kernel_machine/kernel_svc.rb
 - lib/svmkit/linear_model/logistic_regression.rb
 - lib/svmkit/linear_model/svc.rb
+- lib/svmkit/linear_model/svr.rb
 - lib/svmkit/model_selection/cross_validation.rb
 - lib/svmkit/model_selection/k_fold.rb
 - lib/svmkit/model_selection/stratified_k_fold.rb
 - lib/svmkit/multiclass/one_vs_rest_classifier.rb
 - lib/svmkit/naive_bayes/naive_bayes.rb
 - lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb
+- lib/svmkit/nearest_neighbors/k_neighbors_regressor.rb
 - lib/svmkit/pairwise_metric.rb
 - lib/svmkit/polynomial_model/factorization_machine_classifier.rb
 - lib/svmkit/preprocessing/l2_normalizer.rb
@@ -146,13 +152,7 @@ homepage: https://github.com/yoshoku/svmkit
 licenses:
 - BSD-2-Clause
 metadata: {}
-post_install_message: |
-  *************************************************************************
-  Thank you for installing SVMKit!!
-  Note that the SVMKit has been changed to use Numo::NArray for
-  linear algebra library from version 0.2.0.
-  *************************************************************************
+post_install_message:
 rdoc_options: []
 require_paths:
 - lib