RubyGems - svmkit - Versions diffs - 0.1.2 → 0.1.3 - Mend

svmkit 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/HISTORY.md +4 -0
data/lib/svmkit.rb +2 -0
data/lib/svmkit/kernel_approximation/rbf.rb +4 -3
data/lib/svmkit/kernel_machine/kernel_svc.rb +128 -0
data/lib/svmkit/linear_model/logistic_regression.rb +7 -6
data/lib/svmkit/linear_model/pegasos_svc.rb +7 -6
data/lib/svmkit/multiclass/one_vs_rest_classifier.rb +3 -2
data/lib/svmkit/pairwise_metric.rb +72 -0
data/lib/svmkit/preprocessing/min_max_scaler.rb +2 -1
data/lib/svmkit/version.rb +1 -1
metadata +4 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: c2538f07704c57e4dc90caa85411d0795ac23763
-  data.tar.gz: 3815cf50ee5978cca24d944c6e7f577216dae0e3
+  metadata.gz: 6a6941fcd2f0f465de1d6a3b6f658e7ee0fdc8fb
+  data.tar.gz: b9dc50c6fa8e3d7470adf89ffc950b2ae63db4e1
 SHA512:
-  metadata.gz: e448dd5f8fddb4a2a805b1dcddb7ab9c53d7c3db3460760b3165940d0ab93ae82ba1b0fec089e7a1d6651154b5f0437f3d4400531cc11017fd16f9e2029e2611
-  data.tar.gz: 1416d8c3ea1f55abd1fb269bdaf86f80faaa31be298d0ed1349f6b708d05e64545bf0ad4c3865c7ced26057441ff0999dd82ca77eae54209190527b87ba4ec27
+  metadata.gz: 4795995b936f2902dc50e19dc30c46fdb2a1b6a732869a311efa791da8ec09305f6ea6dbfd9aab7da8c934465c8eebe9c45dcaab57090b09f0cc20c1ccacff77
+  data.tar.gz: b8afdc306dba4a10922e63756bb6d18731e54a4a5f04293a478b7c897b6a58622c9f88eb6bdb47837fa7114ab80d879e6e1ea3e993a5937f291d69e5d72f1699

data/HISTORY.md CHANGED Viewed

@@ -1,3 +1,7 @@
+# 0.1.3
+- Added class for Kernel Support Vector Machine with Pegasos algorithm.
+- Added module for calculating pairwise kernel fuctions and euclidean distances.
 # 0.1.2
 - Added the function learning a model with bias term to the PegasosSVC and LogisticRegression classes.
 - Rewrited the document with yard notation.

data/lib/svmkit.rb CHANGED Viewed

@@ -5,12 +5,14 @@ end
 require 'svmkit/version'
 require 'svmkit/utils'
+require 'svmkit/pairwise_metric'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/classifier'
 require 'svmkit/base/transformer'
 require 'svmkit/kernel_approximation/rbf'
 require 'svmkit/linear_model/pegasos_svc'
 require 'svmkit/linear_model/logistic_regression'
+require 'svmkit/kernel_machine/kernel_svc'
 require 'svmkit/multiclass/one_vs_rest_classifier'
 require 'svmkit/preprocessing/l2_normalizer'
 require 'svmkit/preprocessing/min_max_scaler'

data/lib/svmkit/kernel_approximation/rbf.rb CHANGED Viewed

@@ -40,9 +40,10 @@ module SVMKit
       #
       # @overload new(gamma: 1.0, n_components: 128, random_seed: 1) -> RBF
       #
-      # @param gamma        [Float] (defaults to: 1.0) The parameter of RBF kernel: exp(-gamma * x^2).
-      # @param n_components [Integer] (defaults to: 128) The number of dimensions of the RBF kernel feature space.
-      # @param random_seed  [Integer] (defaults to: nil) The seed value using to initialize the random generator.
+      # @param params [Hash] The parameters for RBF kernel approximation.
+      # @option params [Float]   :gamma (1.0) The parameter of RBF kernel: exp(-gamma * x^2).
+      # @option params [Integer] :n_components (128) The number of dimensions of the RBF kernel feature space.
+      # @option params [Integer] :random_seed (nil) The seed value using to initialize the random generator.
       def initialize(params = {})
         self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
         self.params[:random_seed] ||= srand

data/lib/svmkit/kernel_machine/kernel_svc.rb ADDED Viewed

@@ -0,0 +1,128 @@
+require 'svmkit/base/base_estimator'
+require 'svmkit/base/classifier'
+module SVMKit
+  # This module consists of the classes that implement generalized linear models.
+  module KernelMachine
+    # KernelSVC is a class that implements (Nonlinear) Kernel Support Vector Classifier with the Pegasos algorithm.
+    #
+    # @example
+    #   training_kernel_matrix = SVMKit::PairwiseMetric::rbf_kernel(training_samples)
+    #   estimator =
+    #     SVMKit::KernelMachine::KernelSVC.new(reg_param: 1.0, max_iter: 1000, random_seed: 1)
+    #   estimator.fit(training_kernel_matrix, traininig_labels)
+    #   testing_kernel_matrix = SVMKit::PairwiseMetric::rbf_kernel(testing_samples, training_samples)
+    #   results = estimator.predict(testing_kernel_matrix)
+    #
+    # *Reference*
+    # 1. S. Shalev-Shwartz, Y. Singer, N. Srebro, and A. Cotter, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
+    class KernelSVC
+      include Base::BaseEstimator
+      include Base::Classifier
+      # @!visibility private
+      DEFAULT_PARAMS = {
+        reg_param: 1.0,
+        max_iter: 1000,
+        random_seed: nil
+      }.freeze
+      # Return the weight vector for Kernel SVC.
+      # @return [NMatrix] (shape: [1, n_trainig_sample])
+      attr_reader :weight_vec
+      # Return the random generator for performing random sampling in the Pegasos algorithm.
+      # @return [Random]
+      attr_reader :rng
+      # Create a new classifier with Kernel Support Vector Machine by the Pegasos algorithm.
+      #
+      # @overload new(reg_param: 1.0, max_iter: 1000, random_seed: 1) -> KernelSVC
+      #
+      # @param params [Hash] The parameters for Kernel SVC.
+      # @option params [Float]   :reg_param (1.0) The regularization parameter.
+      # @option params [Integer] :max_iter (1000) The maximum number of iterations.
+      # @option params [Integer] :random_seed (nil) The seed value using to initialize the random generator.
+      def initialize(params = {})
+        self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
+        self.params[:random_seed] ||= srand
+        @weight_vec = nil
+        @rng = Random.new(self.params[:random_seed])
+      end
+      # Fit the model with given training data.
+      #
+      # @param x [NMatrix] (shape: [n_training_samples, n_training_samples])
+      #   The kernel matrix of the training data to be used for fitting the model.
+      # @param y [NMatrix] (shape: [1, n_training_samples]) The labels to be used for fitting the model.
+      # @return [KernelSVC] The learned classifier itself.
+      def fit(x, y)
+        # Generate binary labels
+        negative_label = y.uniq.sort.shift
+        bin_y = y.to_flat_a.map { |l| l != negative_label ? 1 : -1 }
+        # Initialize some variables.
+        n_training_samples = x.shape[0]
+        rand_ids = []
+        weight_vec = NMatrix.zeros([1, n_training_samples])
+        # Start optimization.
+        params[:max_iter].times do |t|
+          # random sampling
+          rand_ids = [*0...n_training_samples].shuffle(random: @rng) if rand_ids.empty?
+          target_id = rand_ids.shift
+          # update the weight vector
+          func = (weight_vec * bin_y[target_id]).dot(x.row(target_id).transpose).to_f
+          func *= bin_y[target_id] / (params[:reg_param] * (t + 1))
+          weight_vec[target_id] += 1.0 if func < 1.0
+        end
+        # Store the learned model.
+        @weight_vec = weight_vec * NMatrix.new([1, n_training_samples], bin_y)
+        self
+      end
+      # Calculate confidence scores for samples.
+      #
+      # @param x [NMatrix] (shape: [n_testing_samples, n_training_samples])
+      #     The kernel matrix between testing samples and training samples to compute the scores.
+      # @return [NMatrix] (shape: [1, n_testing_samples]) Confidence score per sample.
+      def decision_function(x)
+        @weight_vec.dot(x.transpose)
+      end
+      # Predict class labels for samples.
+      #
+      # @param x [NMatrix] (shape: [n_testing_samples, n_training_samples])
+      #     The kernel matrix between testing samples and training samples to predict the labels.
+      # @return [NMatrix] (shape: [1, n_testing_samples]) Predicted class label per sample.
+      def predict(x)
+        decision_function(x).map { |v| v >= 0 ? 1 : -1 }
+      end
+      # Claculate the mean accuracy of the given testing data.
+      #
+      # @param x [NMatrix] (shape: [n_testing_samples, n_training_samples])
+      #     The kernel matrix between testing samples and training samples.
+      # @param y [NMatrix] (shape: [1, n_testing_samples]) True labels for testing data.
+      # @return [Float] Mean accuracy
+      def score(x, y)
+        p = predict(x)
+        n_hits = (y.to_flat_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
+        n_hits / y.size.to_f
+      end
+      # Dump marshal data.
+      # @return [Hash] The marshal data about KernelSVC.
+      def marshal_dump
+        { params: params, weight_vec: Utils.dump_nmatrix(@weight_vec), rng: @rng }
+      end
+      # Load marshal data.
+      # @return [nil]
+      def marshal_load(obj)
+        self.params = obj[:params]
+        @weight_vec = Utils.restore_nmatrix(obj[:weight_vec])
+        @rng = obj[:rng]
+        nil
+      end
+    end
+  end
+end

data/lib/svmkit/linear_model/logistic_regression.rb CHANGED Viewed

@@ -46,13 +46,14 @@ module SVMKit
       #
       # @overload new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> LogisiticRegression
       #
-      # @param reg_param   [Float] (defaults to: 1.0) The regularization parameter.
-      # @param fit_bias    [Boolean] (defaults to: false) The flag indicating whether to fit the bias term.
-      # @param bias_scale  [Float] (defaults to: 1.0) The scale of the bias term.
+      # @param params [Hash] The parameters for Logistic Regression.
+      # @option params [Float]   :reg_param (1.0) The regularization parameter.
+      # @option params [Boolean] :fit_bias (false) The flag indicating whether to fit the bias term.
+      # @option params [Float]   :bias_scale (1.0) The scale of the bias term.
       #   If fit_bias is true, the feature vector v becoms [v; bias_scale].
-      # @param max_iter    [Integer] (defaults to: 100) The maximum number of iterations.
-      # @param batch_size  [Integer] (defaults to: 50) The size of the mini batches.
-      # @param random_seed [Integer] (defaults to: nil) The seed value using to initialize the random generator.
+      # @option params [Integer] :max_iter (100) The maximum number of iterations.
+      # @option params [Integer] :batch_size (50) The size of the mini batches.
+      # @option params [Integer] :random_seed (nil) The seed value using to initialize the random generator.
       def initialize(params = {})
         self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
         self.params[:random_seed] ||= srand

data/lib/svmkit/linear_model/pegasos_svc.rb CHANGED Viewed

@@ -44,12 +44,13 @@ module SVMKit
       #
       # @overload new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> PegasosSVC
       #
-      # @param reg_param   [Float] (defaults to: 1.0) The regularization parameter.
-      # @param fit_bias    [Boolean] (defaults to: false) The flag indicating whether to fit the bias term.
-      # @param bias_scale  [Float] (defaults to: 1.0) The scale of the bias term.
-      # @param max_iter    [Integer] (defaults to: 100) The maximum number of iterations.
-      # @param batch_size  [Integer] (defaults to: 50) The size of the mini batches.
-      # @param random_seed [Integer] (defaults to: nil) The seed value using to initialize the random generator.
+      # @param params [Hash] The parameters for SVC.
+      # @option params [Float]   :reg_param (1.0) The regularization parameter.
+      # @option params [Boolean] :fit_bias (false) The flag indicating whether to fit the bias term.
+      # @option params [Float]   :bias_scale (1.0) The scale of the bias term.
+      # @option params [Integer] :max_iter (100) The maximum number of iterations.
+      # @option params [Integer] :batch_size (50) The size of the mini batches.
+      # @option params [Integer] :random_seed (nil) The seed value using to initialize the random generator.
       def initialize(params = {})
         self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
         self.params[:random_seed] ||= srand

data/lib/svmkit/multiclass/one_vs_rest_classifier.rb CHANGED Viewed

@@ -32,8 +32,9 @@ module SVMKit
       # Create a new multi-label classifier with the one-vs-rest startegy.
       #
       # @overload new(estimator: base_estimator) -> OneVsRestClassifier
-      # @param estimator [Classifier] (defaults to: nil)
-      #   The (binary) classifier for construction a multi-label classifier.
+      #
+      # @param params [Hash] The parameters for OneVsRestClassifier.
+      # @option params [Classifier] :estimator (nil) The (binary) classifier for construction a multi-label classifier.
       def initialize(params = {})
         self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
         @estimators = nil

data/lib/svmkit/pairwise_metric.rb ADDED Viewed

@@ -0,0 +1,72 @@
+module SVMKit
+  # Module for calculating pairwise distances, similarities, and kernels.
+  module PairwiseMetric
+    class << self
+      # Calculate the pairwise euclidean distances between x and y.
+      #
+      # @param x [NMatrix] (shape: [n_samples_x, n_features])
+      # @param y [NMatrix] (shape: [n_samples_y, n_features])
+      # @return [NMatrix] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
+      def euclidean_distance(x, y = nil)
+        y = x if y.nil?
+        sum_x_vec = (x**2).sum(1)
+        sum_y_vec = (y**2).sum(1)
+        dot_xy_mat = x.dot(y.transpose)
+        distance_matrix = dot_xy_mat * -2.0 +
+                          sum_x_vec.repeat(y.shape[0], 1) +
+                          sum_y_vec.transpose.repeat(x.shape[0], 0)
+        distance_matrix.abs.sqrt
+      end
+      # Calculate the rbf kernel between x and y.
+      #
+      # @param x [NMatrix] (shape: [n_samples_x, n_features])
+      # @param y [NMatrix] (shape: [n_samples_y, n_features])
+      # @param gamma [Float] The parameter of rbf kernel, if nil it is 1 / n_features.
+      # @return [NMatrix] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
+      def rbf_kernel(x, y = nil, gamma = nil)
+        y = x if y.nil?
+        gamma ||= 1.0 / x.shape[1]
+        distance_matrix = euclidean_distance(x, y)
+        ((distance_matrix**2) * -gamma).exp
+      end
+      # Calculate the linear kernel between x and y.
+      #
+      # @param x [NMatrix] (shape: [n_samples_x, n_features])
+      # @param y [NMatrix] (shape: [n_samples_y, n_features])
+      # @return [NMatrix] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
+      def linear_kernel(x, y = nil)
+        y = x if y.nil?
+        x.dot(y.transpose)
+      end
+      # Calculate the polynomial kernel between x and y.
+      #
+      # @param x [NMatrix] (shape: [n_samples_x, n_features])
+      # @param y [NMatrix] (shape: [n_samples_y, n_features])
+      # @param degree [Integer] The parameter of polynomial kernel.
+      # @param gamma [Float] The parameter of polynomial kernel, if nil it is 1 / n_features.
+      # @param coef [Integer] The parameter of polynomial kernel.
+      # @return [NMatrix] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
+      def polynomial_kernel(x, y = nil, degree = 3, gamma = nil, coef = 1)
+        y = x if y.nil?
+        gamma ||= 1.0 / x.shape[1]
+        (x.dot(y.transpose) * gamma + coef)**degree
+      end
+      # Calculate the sigmoid kernel between x and y.
+      #
+      # @param x [NMatrix] (shape: [n_samples_x, n_features])
+      # @param y [NMatrix] (shape: [n_samples_y, n_features])
+      # @param gamma [Float] The parameter of polynomial kernel, if nil it is 1 / n_features.
+      # @param coef [Integer] The parameter of polynomial kernel.
+      # @return [NMatrix] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
+      def sigmoid_kernel(x, y = nil, gamma = nil, coef = 1)
+        y = x if y.nil?
+        gamma ||= 1.0 / x.shape[1]
+        (x.dot(y.transpose) * gamma + coef).tanh
+      end
+    end
+  end
+end

data/lib/svmkit/preprocessing/min_max_scaler.rb CHANGED Viewed

@@ -31,7 +31,8 @@ module SVMKit
       #
       # @overload new(feature_range: [0.0, 1.0]) -> MinMaxScaler
       #
-      # @param feature_range [Array] (defaults to: [0.0, 1.0]) The desired range of samples.
+      # @param params [Hash] The parameters for MinMaxScaler.
+      # @option params [Array<Float>] :feature_range ([0.0, 1.0]) The desired range of samples.
       def initialize(params = {})
         @params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
         @min_vec = nil

data/lib/svmkit/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # SVMKit is an experimental library of machine learning in Ruby.
 module SVMKit
   # @!visibility private
-  VERSION = '0.1.2'.freeze
+  VERSION = '0.1.3'.freeze
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: svmkit
 version: !ruby/object:Gem::Version
-  version: 0.1.2
+  version: 0.1.3
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2017-10-08 00:00:00.000000000 Z
+date: 2017-10-18 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -93,9 +93,11 @@ files:
 - lib/svmkit/base/classifier.rb
 - lib/svmkit/base/transformer.rb
 - lib/svmkit/kernel_approximation/rbf.rb
+- lib/svmkit/kernel_machine/kernel_svc.rb
 - lib/svmkit/linear_model/logistic_regression.rb
 - lib/svmkit/linear_model/pegasos_svc.rb
 - lib/svmkit/multiclass/one_vs_rest_classifier.rb
+- lib/svmkit/pairwise_metric.rb
 - lib/svmkit/preprocessing/l2_normalizer.rb
 - lib/svmkit/preprocessing/min_max_scaler.rb
 - lib/svmkit/preprocessing/standard_scaler.rb