RubyGems - svmkit - Versions diffs - 0.1.0 → 0.1.1 - Mend

svmkit 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/README.md +1 -1
data/lib/svmkit.rb +1 -0
data/lib/svmkit/linear_model/logistic_regression.rb +162 -0
data/lib/svmkit/linear_model/pegasos_svc.rb +1 -1
data/lib/svmkit/version.rb +1 -1
metadata +3 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 62fdc5c03a044a7625bf2374159cf84ef32a6869
-  data.tar.gz: 4cd1c86a344cd1410a3a5c0f4bdebd04d80e7e7b
+  metadata.gz: 1d3f4f2c398faa6b9e936ec813dac3023d9d1728
+  data.tar.gz: 6fe4f9b6ac5a656fb1a3f49662997d0ebbdd8d4b
 SHA512:
-  metadata.gz: 1b704e536e183f881e6f16895ccdc1620dc8b694db7b44772db669e579ed07652df16c8de88794c65b5eeca0eeb805c415f1e44c36446cad3bdd230f3354b320
-  data.tar.gz: e25ca447621cef29ea1807168cbe6e7210308549a298db7c8797d54e127bfb1e7fe7de3c3e9a9d719cee6de1100705e352a3b76a5127282441027fd1b389e2e1
+  metadata.gz: fb1ac8798124f25cdd4dd0738dd856c8bae7e87aacac260a8ea8b1fb7388e3a966045f2382e48f241292111312eb7cb6cd69035010ee1487645f725f364ee16b
+  data.tar.gz: 99bdb17d5a2d2825e904ce2e788e31d100e1850b1d9bfc32f7e7cc48ba1b13da59b667d1ed117a6768d852d5c15d3a4c3132994bc13350315c8b07016bcbcd41

data/README.md CHANGED Viewed

@@ -36,7 +36,7 @@ transformer = SVMKit::KernelApproximation::RBF.new(gamma: 2.0, n_components: 102
 transformed = transformer.fit_transform(normalized)
 base_classifier =
-  SVMKit::LinearModel::PegasosSVC.new(penalty: 1.0, max_iter: 50, batch_size: 20, random_seed: 1)
+  SVMKit::LinearModel::PegasosSVC.new(reg_param: 1.0, max_iter: 50, batch_size: 20, random_seed: 1)
 classifier = SVMKit::Multiclass::OneVsRestClassifier.new(estimator: base_classifier)
 classifier.fit(transformed, labels)

data/lib/svmkit.rb CHANGED Viewed

@@ -10,6 +10,7 @@ require 'svmkit/base/classifier'
 require 'svmkit/base/transformer'
 require 'svmkit/kernel_approximation/rbf'
 require 'svmkit/linear_model/pegasos_svc'
+require 'svmkit/linear_model/logistic_regression'
 require 'svmkit/multiclass/one_vs_rest_classifier'
 require 'svmkit/preprocessing/l2_normalizer'
 require 'svmkit/preprocessing/min_max_scaler'

data/lib/svmkit/linear_model/logistic_regression.rb ADDED Viewed

@@ -0,0 +1,162 @@
+require 'svmkit/base/base_estimator'
+require 'svmkit/base/classifier'
+module SVMKit
+  # This module consists of the classes that implement generalized linear models.
+  module LinearModel
+    # LogisticRegression is a class that implements Logistic Regression with stochastic gradient descent (SGD) optimization.
+    # Note that the Logistic Regression of SVMKit performs as a binary classifier.
+    #
+    #   estimator =
+    #     SVMKit::LinearModel::LogisticRegression.new(reg_param: 1.0, max_iter: 100, batch_size: 20, random_seed: 1)
+    #   estimator.fit(training_samples, traininig_labels)
+    #   results = estimator.predict(testing_samples)
+    #
+    # * *Reference*:
+    #   - S. Shalev-Shwartz, Y. Singer, N. Srebro, and A. Cotter, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
+    #
+    class LogisticRegression
+      include Base::BaseEstimator
+      include Base::Classifier
+      DEFAULT_PARAMS = { # :nodoc:
+        reg_param: 1.0,
+        max_iter: 100,
+        batch_size: 50,
+        random_seed: nil
+      }.freeze
+      # The weight vector for Logistic Regression.
+      attr_reader :weight_vec
+      # The random generator for performing random sampling in the SGD optimization.
+      attr_reader :rng
+      # Create a new classifier with Logisitc Regression by the SGD optimization.
+      #
+      # :call-seq:
+      #   new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> LogisiticRegression
+      #
+      # * *Arguments* :
+      #   - +:reg_param+ (Float) (defaults to: 1.0) -- The regularization parameter.
+      #   - +:max_iter+ (Integer) (defaults to: 100) -- The maximum number of iterations.
+      #   - +:batch_size+ (Integer) (defaults to: 50) -- The size of the mini batches.
+      #   - +:random_seed+ (Integer) (defaults to: nil) -- The seed value using to initialize the random generator.
+      def initialize(params = {})
+        self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
+        self.params[:random_seed] ||= srand
+        @weight_vec = nil
+        @rng = Random.new(self.params[:random_seed])
+      end
+      # Fit the model with given training data.
+      #
+      # :call-seq:
+      #   fit(x, y) -> LogisticRegression
+      #
+      # * *Arguments* :
+      #   - +x+ (NMatrix, shape: [n_samples, n_features]) -- The training data to be used for fitting the model.
+      #   - +y+ (NMatrix, shape: [1, n_samples]) -- The categorical variables (e.g. labels) to be used for fitting the model.
+      # * *Returns* :
+      #   - The learned classifier itself.
+      def fit(x, y)
+        # Generate binary labels
+        negative_label = y.uniq.sort.shift
+        bin_y = y.to_flat_a.map { |l| l != negative_label ? 1 : 0 }
+        # Initialize some variables.
+        n_samples, n_features = x.shape
+        rand_ids = [*0..n_samples - 1].shuffle(random: @rng)
+        @weight_vec = NMatrix.zeros([1, n_features])
+        # Start optimization.
+        params[:max_iter].times do |t|
+          # random sampling
+          subset_ids = rand_ids.shift(params[:batch_size])
+          rand_ids.concat(subset_ids)
+          # update the weight vector.
+          eta = 1.0 / (params[:reg_param] * (t + 1))
+          mean_vec = NMatrix.zeros([1, n_features])
+          subset_ids.each do |n|
+            z = @weight_vec.dot(x.row(n).transpose)[0]
+            coef = bin_y[n] / (1.0 + Math.exp(bin_y[n] * z))
+            mean_vec += x.row(n) * coef
+          end
+          mean_vec *= eta / params[:batch_size]
+          @weight_vec = @weight_vec * (1.0 - eta * params[:reg_param]) + mean_vec
+          # scale the weight vector.
+          scaler = (1.0 / params[:reg_param]**0.5) / @weight_vec.norm2
+          @weight_vec *= [1.0, scaler].min
+        end
+        self
+      end
+      # Calculate confidence scores for samples.
+      #
+      # :call-seq:
+      #   decision_function(x) -> NMatrix, shape: [1, n_samples]
+      #
+      # * *Arguments* :
+      #   - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to compute the scores.
+      # * *Returns* :
+      #   - Confidence score per sample.
+      def decision_function(x)
+        w = (@weight_vec.dot(x.transpose) * -1.0).exp + 1.0
+        w.map { |v| 1.0 / v }
+      end
+      # Predict class labels for samples.
+      #
+      # :call-seq:
+      #   predict(x) -> NMatrix, shape: [1, n_samples]
+      #
+      # * *Arguments* :
+      #   - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to predict the labels.
+      # * *Returns* :
+      #   - Predicted class label per sample.
+      def predict(x)
+        decision_function(x).map { |v| v >= 0.5 ? 1 : -1 }
+      end
+      # Predict probability for samples.
+      #
+      # :call-seq:
+      #   predict_proba(x) -> NMatrix, shape: [1, n_samples]
+      #
+      # * *Arguments* :
+      #   - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to predict the probailities.
+      # * *Returns* :
+      #   - Predicted probability per sample.
+      def predict_proba(x)
+        decision_function(x)
+      end
+      # Claculate the mean accuracy of the given testing data.
+      #
+      # :call-seq:
+      #   score(x, y) -> Float
+      #
+      # * *Arguments* :
+      #   - +x+ (NMatrix, shape: [n_samples, n_features]) -- Testing data.
+      #   - +y+ (NMatrix, shape: [1, n_samples]) -- True labels for testing data.
+      # * *Returns* :
+      #   - Mean accuracy
+      def score(x, y)
+        p = predict(x)
+        n_hits = (y.to_flat_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
+        n_hits / y.size.to_f
+      end
+      # Serializes object through Marshal#dump.
+      def marshal_dump # :nodoc:
+        { params: params, weight_vec: Utils.dump_nmatrix(@weight_vec), rng: @rng }
+      end
+      # Deserialize object through Marshal#load.
+      def marshal_load(obj) # :nodoc:
+        self.params = obj[:params]
+        @weight_vec = Utils.restore_nmatrix(obj[:weight_vec])
+        @rng = obj[:rng]
+        nil
+      end
+    end
+  end
+end

data/lib/svmkit/linear_model/pegasos_svc.rb CHANGED Viewed

@@ -118,7 +118,7 @@ module SVMKit
       # Claculate the mean accuracy of the given testing data.
       #
       # :call-seq:
-      #   predict(x, y) -> Float
+      #   score(x, y) -> Float
       #
       # * *Arguments* :
       #   - +x+ (NMatrix, shape: [n_samples, n_features]) -- Testing data.

data/lib/svmkit/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module SVMKit
-  VERSION = '0.1.0'.freeze
+  VERSION = '0.1.1'.freeze
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: svmkit
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.1.1
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2017-09-30 00:00:00.000000000 Z
+date: 2017-10-03 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -93,6 +93,7 @@ files:
 - lib/svmkit/base/classifier.rb
 - lib/svmkit/base/transformer.rb
 - lib/svmkit/kernel_approximation/rbf.rb
+- lib/svmkit/linear_model/logistic_regression.rb
 - lib/svmkit/linear_model/pegasos_svc.rb
 - lib/svmkit/multiclass/one_vs_rest_classifier.rb
 - lib/svmkit/preprocessing/l2_normalizer.rb