RubyGems - svmkit - Versions diffs - 0.2.6 → 0.2.7 - Mend

svmkit 0.2.6 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +4 -4
data/HISTORY.md +3 -0
data/README.md +3 -7
data/lib/svmkit/kernel_machine/kernel_svc.rb +58 -26
data/lib/svmkit/linear_model/logistic_regression.rb +91 -57
data/lib/svmkit/linear_model/svc.rb +92 -50
data/lib/svmkit/multiclass/one_vs_rest_classifier.rb +9 -6
data/lib/svmkit/polynomial_model/factorization_machine_classifier.rb +104 -53
data/lib/svmkit/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: b36f6b299c47d1107d587aafeb7bb66531f1208c
-  data.tar.gz: 1bd382f3339b8fb08454493a45a2338020791b6c
+  metadata.gz: 46878b59860b61bae7b522fb02af984208609f56
+  data.tar.gz: 6e889c6ad8382c654455a242d2f7f27de41de2d5
 SHA512:
-  metadata.gz: 0676f2e9b3ef4ac9786f10ca976721e73d2cd918a9c939900281e36267ab14a3413c3a719d9504415f527e5d6163d5640ea2af186e023c3980327cb7c476afba
-  data.tar.gz: e8dcf72f7d1641903a4625bb23399deabf9a19931a5c00bd2c5077b525a2d0361b194b8c43dee1b7a365b25eafdabb460e0a4ae21f17afd5b401379671379463
+  metadata.gz: cddb239bf0768e6d983ce942ed6a7bdda8b827fa2e73e51c1b4591e8af3c641339377417f844358159c3a2bdff51d2f5678ef07fe21fe86e51136289e69ea38c
+  data.tar.gz: 64c2029c729de580765ad9ee89fd57821a40773721eac291201cb9b9f4c72697f5945c8f5259ed8d6a879f0b35dac841bcdd6d5322014c9cc78b9a42046dc310

data/HISTORY.md CHANGED Viewed

@@ -1,3 +1,6 @@
+# 0.2.7
+- Fixed to support multiclass classifiction into LinearSVC, LogisticRegression, KernelSVC, and FactorizationMachineClassifier
 # 0.2.6
 - Added class for Decision Tree classifier.
 - Added class for Random Forest classifier.

data/README.md CHANGED Viewed

@@ -42,9 +42,7 @@ normalized = normalizer.fit_transform(samples)
 transformer = SVMKit::KernelApproximation::RBF.new(gamma: 2.0, n_components: 1024, random_seed: 1)
 transformed = transformer.fit_transform(normalized)
-base_classifier =
-  SVMKit::LinearModel::SVC.new(reg_param: 1.0, max_iter: 1000, batch_size: 20, random_seed: 1)
-classifier = SVMKit::Multiclass::OneVsRestClassifier.new(estimator: base_classifier)
+classifier = SVMKit::LinearModel::SVC.new(reg_param: 1.0, max_iter: 1000, batch_size: 20, random_seed: 1)
 classifier.fit(transformed, labels)
 File.open('trained_normalizer.dat', 'wb') { |f| f.write(Marshal.dump(normalizer)) }
@@ -76,12 +74,10 @@ require 'svmkit'
 samples, labels = SVMKit::Dataset.load_libsvm_file('pendigits')
-kernel_svc =
-  SVMKit::KernelMachine::KernelSVC.new(reg_param: 1.0, max_iter: 1000, random_seed: 1)
-ovr_kernel_svc = SVMKit::Multiclass::OneVsRestClassifier.new(estimator: kernel_svc)
+kernel_svc = SVMKit::KernelMachine::KernelSVC.new(reg_param: 1.0, max_iter: 1000, random_seed: 1)
 kf = SVMKit::ModelSelection::StratifiedKFold.new(n_splits: 5, shuffle: true, random_seed: 1)
-cv = SVMKit::ModelSelection::CrossValidation.new(estimator: ovr_kernel_svc, splitter: kf)
+cv = SVMKit::ModelSelection::CrossValidation.new(estimator: kernel_svc, splitter: kf)
 kernel_mat = SVMKit::PairwiseMetric::rbf_kernel(samples, nil, 0.005)
 report = cv.perform(kernel_mat, labels)

data/lib/svmkit/kernel_machine/kernel_svc.rb CHANGED Viewed

@@ -6,7 +6,9 @@ require 'svmkit/base/classifier'
 module SVMKit
   # This module consists of the classes that implement kernel method-based estimator.
   module KernelMachine
-    # KernelSVC is a class that implements (Nonlinear) Kernel Support Vector Classifier with the Pegasos algorithm.
+    # KernelSVC is a class that implements (Nonlinear) Kernel Support Vector Classifier
+    # with stochastic gradient descent (SGD) optimization.
+    # For multiclass classification problem, it uses one-vs-the-rest strategy.
     #
     # @example
     #   training_kernel_matrix = SVMKit::PairwiseMetric::rbf_kernel(training_samples)
@@ -23,14 +25,18 @@ module SVMKit
       include Base::Classifier
       # Return the weight vector for Kernel SVC.
-      # @return [Numo::DFloat] (shape: [n_trainig_sample])
+      # @return [Numo::DFloat] (shape: [n_classes, n_trainig_sample])
       attr_reader :weight_vec
-      # Return the random generator for performing random sampling in the Pegasos algorithm.
+      # Return the class labels.
+      # @return [Numo::Int32] (shape: [n_classes])
+      attr_reader :classes
+      # Return the random generator for performing random sampling.
       # @return [Random]
       attr_reader :rng
-      # Create a new classifier with Kernel Support Vector Machine by the Pegasos algorithm.
+      # Create a new classifier with Kernel Support Vector Machine by the SGD optimization.
       #
       # @param reg_param [Float] The regularization parameter.
       # @param max_iter [Integer] The maximum number of iterations.
@@ -42,6 +48,7 @@ module SVMKit
         @params[:random_seed] = random_seed
         @params[:random_seed] ||= srand
         @weight_vec = nil
+        @classes
         @rng = Random.new(@params[:random_seed])
       end
@@ -52,25 +59,22 @@ module SVMKit
       # @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
       # @return [KernelSVC] The learned classifier itself.
       def fit(x, y)
-        # Generate binary labels
-        negative_label = y.to_a.uniq.sort.shift
-        bin_y = y.to_a.map { |l| l != negative_label ? 1 : -1 }
-        # Initialize some variables.
-        n_training_samples = x.shape[0]
-        rand_ids = []
-        weight_vec = Numo::DFloat.zeros(n_training_samples)
-        # Start optimization.
-        @params[:max_iter].times do |t|
-          # random sampling
-          rand_ids = [*0...n_training_samples].shuffle(random: @rng) if rand_ids.empty?
-          target_id = rand_ids.shift
-          # update the weight vector
-          func = (weight_vec * bin_y[target_id]).dot(x[target_id, true].transpose).to_f
-          func *= bin_y[target_id] / (@params[:reg_param] * (t + 1))
-          weight_vec[target_id] += 1.0 if func < 1.0
+        @classes = Numo::Int32[*y.to_a.uniq.sort]
+        n_classes = @classes.size
+        _n_samples, n_features = x.shape
+        if n_classes > 2
+          @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
+          n_classes.times do |n|
+            bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
+            @weight_vec[n, true] = binary_fit(x, bin_y)
+          end
+        else
+          negative_label = y.to_a.uniq.sort.first
+          bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
+          @weight_vec = binary_fit(x, bin_y)
         end
-        # Store the learned model.
-        @weight_vec = weight_vec * Numo::DFloat[*bin_y]
         self
       end
@@ -78,9 +82,9 @@ module SVMKit
       #
       # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
       #     The kernel matrix between testing samples and training samples to compute the scores.
-      # @return [Numo::DFloat] (shape: [n_testing_samples]) Confidence score per sample.
+      # @return [Numo::DFloat] (shape: [n_testing_samples, n_classes]) Confidence score per sample.
       def decision_function(x)
-        x.dot(@weight_vec)
+        x.dot(@weight_vec.transpose)
       end
       # Predict class labels for samples.
@@ -89,7 +93,11 @@ module SVMKit
       #     The kernel matrix between testing samples and training samples to predict the labels.
       # @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
       def predict(x)
-        Numo::Int32.cast(decision_function(x).map { |v| v >= 0 ? 1 : -1 })
+        return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
+        n_samples, = x.shape
+        decision_values = decision_function(x)
+        Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
       end
       # Claculate the mean accuracy of the given testing data.
@@ -105,7 +113,10 @@ module SVMKit
       # Dump marshal data.
       # @return [Hash] The marshal data about KernelSVC.
       def marshal_dump
-        { params: @params, weight_vec: @weight_vec, rng: @rng }
+        { params: @params,
+          weight_vec: @weight_vec,
+          classes: @classes,
+          rng: @rng }
       end
       # Load marshal data.
@@ -113,9 +124,30 @@ module SVMKit
       def marshal_load(obj)
         @params = obj[:params]
         @weight_vec = obj[:weight_vec]
+        @classes = obj[:classes]
         @rng = obj[:rng]
         nil
       end
+      private
+      def binary_fit(x, bin_y)
+        # Initialize some variables.
+        n_training_samples = x.shape[0]
+        rand_ids = []
+        weight_vec = Numo::DFloat.zeros(n_training_samples)
+        # Start optimization.
+        @params[:max_iter].times do |t|
+          # random sampling
+          rand_ids = [*0...n_training_samples].shuffle(random: @rng) if rand_ids.empty?
+          target_id = rand_ids.shift
+          # update the weight vector
+          func = (weight_vec * bin_y[target_id]).dot(x[target_id, true].transpose).to_f
+          func *= bin_y[target_id] / (@params[:reg_param] * (t + 1))
+          weight_vec[target_id] += 1.0 if func < 1.0
+        end
+        weight_vec * Numo::DFloat[*bin_y]
+      end
     end
   end
 end

data/lib/svmkit/linear_model/logistic_regression.rb CHANGED Viewed

@@ -8,7 +8,7 @@ module SVMKit
   module LinearModel
     # LogisticRegression is a class that implements Logistic Regression
     # with stochastic gradient descent (SGD) optimization.
-    # Note that the class performs as a binary classifier.
+    # For multiclass classification problem, it uses one-vs-the-rest strategy.
     #
     # @example
     #   estimator =
@@ -23,14 +23,18 @@ module SVMKit
       include Base::Classifier
       # Return the weight vector for Logistic Regression.
-      # @return [Numo::DFloat] (shape: [n_features])
+      # @return [Numo::DFloat] (shape: [n_classes, n_features])
       attr_reader :weight_vec
       # Return the bias term (a.k.a. intercept) for Logistic Regression.
-      # @return [Float]
+      # @return [Numo::DFloat] (shape: [n_classes])
       attr_reader :bias_term
-      # Return the random generator for transformation.
+      # Return the class labels.
+      # @return [Numo::Int32] (shape: [n_classes])
+      attr_reader :classes
+      # Return the random generator for performing random sampling.
       # @return [Random]
       attr_reader :rng
@@ -42,79 +46,59 @@ module SVMKit
       #   If fit_bias is true, the feature vector v becoms [v; bias_scale].
       # @param max_iter [Integer] The maximum number of iterations.
       # @param batch_size [Integer] The size of the mini batches.
+      # @param normalize [Boolean] The flag indicating whether to normalize the weight vector.
       # @param random_seed [Integer] The seed value using to initialize the random generator.
-      def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, max_iter: 100, batch_size: 50, random_seed: nil)
+      def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
+                     max_iter: 100, batch_size: 50, normalize: true, random_seed: nil)
         @params = {}
         @params[:reg_param] = reg_param
         @params[:fit_bias] = fit_bias
         @params[:bias_scale] = bias_scale
         @params[:max_iter] = max_iter
         @params[:batch_size] = batch_size
+        @params[:normalize] = normalize
         @params[:random_seed] = random_seed
         @params[:random_seed] ||= srand
         @weight_vec = nil
-        @bias_term = 0.0
+        @bias_term = nil
+        @classes = nil
         @rng = Random.new(@params[:random_seed])
       end
       # Fit the model with given training data.
       #
       # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
-      # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
-      #   to be used for fitting the model.
+      # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
       # @return [LogisticRegression] The learned classifier itself.
       def fit(x, y)
-        # Generate binary labels.
-        negative_label = y.to_a.uniq.sort.shift
-        bin_y = y.to_a.map { |l| l != negative_label ? 1 : 0 }
-        # Expand feature vectors for bias term.
-        samples = x
-        if @params[:fit_bias]
-          samples = Numo::NArray.hstack(
-            [samples, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]]
-          )
-        end
-        # Initialize some variables.
-        n_samples, n_features = samples.shape
-        rand_ids = [*0...n_samples].shuffle(random: @rng)
-        weight_vec = Numo::DFloat.zeros(n_features)
-        # Start optimization.
-        @params[:max_iter].times do |t|
-          # random sampling
-          subset_ids = rand_ids.shift(@params[:batch_size])
-          rand_ids.concat(subset_ids)
-          # update the weight vector.
-          eta = 1.0 / (@params[:reg_param] * (t + 1))
-          mean_vec = Numo::DFloat.zeros(n_features)
-          subset_ids.each do |n|
-            z = weight_vec.dot(samples[n, true])
-            coef = bin_y[n] / (1.0 + Math.exp(bin_y[n] * z))
-            mean_vec += samples[n, true] * coef
+        @classes = Numo::Int32[*y.to_a.uniq.sort]
+        n_classes = @classes.size
+        _n_samples, n_features = x.shape
+        if n_classes > 2
+          @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
+          @bias_term = Numo::DFloat.zeros(n_classes)
+          n_classes.times do |n|
+            bin_y = Numo::Int32.cast(y.eq(@classes[n]))
+            weight, bias = binary_fit(x, bin_y)
+            @weight_vec[n, true] = weight
+            @bias_term[n] = bias
           end
-          mean_vec *= eta / @params[:batch_size]
-          weight_vec = weight_vec * (1.0 - eta * @params[:reg_param]) + mean_vec
-          # scale the weight vector.
-          norm = Math.sqrt(weight_vec.dot(weight_vec))
-          scaler = (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)
-          weight_vec *= [1.0, scaler].min
-        end
-        # Store the learned model.
-        if @params[:fit_bias]
-          @weight_vec = weight_vec[0...n_features - 1]
-          @bias_term = weight_vec[n_features - 1]
         else
-          @weight_vec = weight_vec[0...n_features]
-          @bias_term = 0.0
+          negative_label = y.to_a.uniq.sort.first
+          bin_y = Numo::Int32.cast(y.ne(negative_label))
+          @weight_vec, @bias_term = binary_fit(x, bin_y)
         end
         self
       end
       # Calculate confidence scores for samples.
       #
       # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
-      # @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
+      # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
       def decision_function(x)
-        @weight_vec.dot(x.transpose) + @bias_term
+        x.dot(@weight_vec.transpose) + @bias_term
       end
       # Predict class labels for samples.
@@ -122,7 +106,11 @@ module SVMKit
       # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
       # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
       def predict(x)
-        Numo::Int32.cast(sigmoid(decision_function(x)).map { |v| v >= 0.5 ? 1 : -1 })
+        return Numo::Int32.cast(decision_function(x).ge(0.5)) * 2 - 1 if @classes.size <= 2
+        n_samples, = x.shape
+        decision_values = decision_function(x)
+        Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
       end
       # Predict probability for samples.
@@ -130,17 +118,24 @@ module SVMKit
       # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
       # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
       def predict_proba(x)
+        proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
+        return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
         n_samples, = x.shape
-        proba = Numo::DFloat.zeros(n_samples, 2)
-        proba[true, 1] = sigmoid(decision_function(x))
-        proba[true, 0] = 1.0 - proba[true, 1]
-        proba
+        probs = Numo::DFloat.zeros(n_samples, 2)
+        probs[true, 1] = proba
+        probs[true, 0] = 1.0 - proba
+        probs
       end
       # Dump marshal data.
       # @return [Hash] The marshal data about LogisticRegression.
       def marshal_dump
-        { params: @params, weight_vec: @weight_vec, bias_term: @bias_term, rng: @rng }
+        { params: @params,
+          weight_vec: @weight_vec,
+          bias_term: @bias_term,
+          classes: @classes,
+          rng: @rng }
       end
       # Load marshal data.
@@ -149,14 +144,53 @@ module SVMKit
         @params = obj[:params]
         @weight_vec = obj[:weight_vec]
         @bias_term = obj[:bias_term]
+        @classes = obj[:classes]
         @rng = obj[:rng]
         nil
       end
       private
-      def sigmoid(x)
-        1.0 / (Numo::NMath.exp(-x) + 1.0)
+      def binary_fit(x, bin_y)
+        # Expand feature vectors for bias term.
+        samples = @params[:fit_bias] ? expand_feature(x) : x
+        # Initialize some variables.
+        n_samples, n_features = samples.shape
+        rand_ids = [*0...n_samples].shuffle(random: @rng)
+        weight_vec = Numo::DFloat.zeros(n_features)
+        # Start optimization.
+        @params[:max_iter].times do |t|
+          # random sampling
+          subset_ids = rand_ids.shift(@params[:batch_size])
+          rand_ids.concat(subset_ids)
+          # update the weight vector.
+          df = samples[subset_ids, true].dot(weight_vec.transpose)
+          coef = bin_y[subset_ids] / (Numo::NMath.exp(-bin_y[subset_ids] * df) + 1.0)
+          mean_vec = samples[subset_ids, true].transpose.dot(coef) / @params[:batch_size]
+          weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec - mean_vec)
+          # scale the weight vector.
+          normalize_weight_vec(weight_vec) if @params[:normalize]
+        end
+        split_weight_vec_bias(weight_vec)
+      end
+      def expand_feature(x)
+        Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
+      end
+      def learning_rate(iter)
+        1.0 / (@params[:reg_param] * (iter + 1))
+      end
+      def normalize_weight_vec(weight_vec)
+        norm = Math.sqrt(weight_vec.dot(weight_vec))
+        weight_vec * [1.0, (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)].min
+      end
+      def split_weight_vec_bias(weight_vec)
+        weights = @params[:fit_bias] ? weight_vec[0...-1] : weight_vec
+        bias = @params[:fit_bias] ? weight_vec[-1] : 0.0
+        [weights, bias]
       end
     end
   end

data/lib/svmkit/linear_model/svc.rb CHANGED Viewed

@@ -6,7 +6,9 @@ require 'svmkit/base/classifier'
 module SVMKit
   # This module consists of the classes that implement generalized linear models.
   module LinearModel
-    # SVC is a class that implements Support Vector Classifier with the Pegasos algorithm.
+    # SVC is a class that implements Support Vector Classifier
+    # with stochastic gradient descent (SGD) optimization.
+    # For multiclass classification problem, it uses one-vs-the-rest strategy.
     #
     # @example
     #   estimator =
@@ -21,36 +23,44 @@ module SVMKit
       include Base::Classifier
       # Return the weight vector for SVC.
-      # @return [Numo::DFloat] (shape: [n_features])
+      # @return [Numo::DFloat] (shape: [n_classes, n_features])
       attr_reader :weight_vec
       # Return the bias term (a.k.a. intercept) for SVC.
-      # @return [Float]
+      # @return [Numo::DFloat] (shape: [n_classes])
       attr_reader :bias_term
-      # Return the random generator for performing random sampling in the Pegasos algorithm.
+      # Return the class labels.
+      # @return [Numo::Int32] (shape: [n_classes])
+      attr_reader :classes
+      # Return the random generator for performing random sampling.
       # @return [Random]
       attr_reader :rng
-      # Create a new classifier with Support Vector Machine by the Pegasos algorithm.
+      # Create a new classifier with Support Vector Machine by the SGD optimization.
       #
       # @param reg_param [Float] The regularization parameter.
       # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
       # @param bias_scale [Float] The scale of the bias term.
       # @param max_iter [Integer] The maximum number of iterations.
       # @param batch_size [Integer] The size of the mini batches.
+      # @param normalize [Boolean] The flag indicating whether to normalize the weight vector.
       # @param random_seed [Integer] The seed value using to initialize the random generator.
-      def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, max_iter: 100, batch_size: 50, random_seed: nil)
+      def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
+                     max_iter: 100, batch_size: 50, normalize: true, random_seed: nil)
         @params = {}
         @params[:reg_param] = reg_param
         @params[:fit_bias] = fit_bias
         @params[:bias_scale] = bias_scale
         @params[:max_iter] = max_iter
         @params[:batch_size] = batch_size
+        @params[:normalize] = normalize
         @params[:random_seed] = random_seed
         @params[:random_seed] ||= srand
         @weight_vec = nil
-        @bias_term = 0.0
+        @bias_term = nil
+        @classes = nil
         @rng = Random.new(@params[:random_seed])
       end
@@ -60,56 +70,34 @@ module SVMKit
       # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
       # @return [SVC] The learned classifier itself.
       def fit(x, y)
-        # Generate binary labels
-        negative_label = y.to_a.uniq.sort.shift
-        bin_y = y.to_a.map { |l| l != negative_label ? 1 : -1 }
-        # Expand feature vectors for bias term.
-        samples = x
-        if @params[:fit_bias]
-          samples = Numo::NArray.hstack(
-            [samples, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]]
-          )
-        end
-        # Initialize some variables.
-        n_samples, n_features = samples.shape
-        rand_ids = [*0...n_samples].shuffle(random: @rng)
-        weight_vec = Numo::DFloat.zeros(n_features)
-        # Start optimization.
-        @params[:max_iter].times do |t|
-          # random sampling
-          subset_ids = rand_ids.shift(@params[:batch_size])
-          rand_ids.concat(subset_ids)
-          target_ids = subset_ids.map { |n| n if weight_vec.dot(samples[n, true]) * bin_y[n] < 1 }.compact
-          n_subsamples = target_ids.size
-          next if n_subsamples.zero?
-          # update the weight vector.
-          eta = 1.0 / (@params[:reg_param] * (t + 1))
-          mean_vec = Numo::DFloat.zeros(n_features)
-          target_ids.each { |n| mean_vec += samples[n, true] * bin_y[n] }
-          mean_vec *= eta / n_subsamples
-          weight_vec = weight_vec * (1.0 - eta * @params[:reg_param]) + mean_vec
-          # scale the weight vector.
-          norm = Math.sqrt(weight_vec.dot(weight_vec))
-          scaler = (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)
-          weight_vec *= [1.0, scaler].min
-        end
-        # Store the learned model.
-        if @params[:fit_bias]
-          @weight_vec = weight_vec[0...n_features - 1]
-          @bias_term = weight_vec[n_features - 1]
+        @classes = Numo::Int32[*y.to_a.uniq.sort]
+        n_classes = @classes.size
+        _n_samples, n_features = x.shape
+        if n_classes > 2
+          @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
+          @bias_term = Numo::DFloat.zeros(n_classes)
+          n_classes.times do |n|
+            bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
+            weight, bias = binary_fit(x, bin_y)
+            @weight_vec[n, true] = weight
+            @bias_term[n] = bias
+          end
         else
-          @weight_vec = weight_vec[0...n_features]
-          @bias_term = 0.0
+          negative_label = y.to_a.uniq.sort.first
+          bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
+          @weight_vec, @bias_term = binary_fit(x, bin_y)
         end
         self
       end
       # Calculate confidence scores for samples.
       #
       # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
-      # @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
+      # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
       def decision_function(x)
-        @weight_vec.dot(x.transpose) + @bias_term
+        x.dot(@weight_vec.transpose) + @bias_term
       end
       # Predict class labels for samples.
@@ -117,13 +105,21 @@ module SVMKit
       # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
       # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
       def predict(x)
-        Numo::Int32.cast(decision_function(x).map { |v| v >= 0 ? 1 : -1 })
+        return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
+        n_samples, = x.shape
+        decision_values = decision_function(x)
+        Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
       end
       # Dump marshal data.
       # @return [Hash] The marshal data about SVC.
       def marshal_dump
-        { params: @params, weight_vec: @weight_vec, bias_term: @bias_term, rng: @rng }
+        { params: @params,
+          weight_vec: @weight_vec,
+          bias_term: @bias_term,
+          classes: @classes,
+          rng: @rng }
       end
       # Load marshal data.
@@ -132,9 +128,55 @@ module SVMKit
         @params = obj[:params]
         @weight_vec = obj[:weight_vec]
         @bias_term = obj[:bias_term]
+        @classes = obj[:classes]
         @rng = obj[:rng]
         nil
       end
+      private
+      def binary_fit(x, bin_y)
+        # Expand feature vectors for bias term.
+        samples = @params[:fit_bias] ? expand_feature(x) : x
+        # Initialize some variables.
+        n_samples, n_features = samples.shape
+        rand_ids = [*0...n_samples].shuffle(random: @rng)
+        weight_vec = Numo::DFloat.zeros(n_features)
+        # Start optimization.
+        @params[:max_iter].times do |t|
+          # random sampling
+          subset_ids = rand_ids.shift(@params[:batch_size])
+          rand_ids.concat(subset_ids)
+          target_ids = subset_ids.map { |n| n if weight_vec.dot(samples[n, true]) * bin_y[n] < 1 }.compact
+          n_subsamples = target_ids.size
+          next if n_subsamples.zero?
+          # update the weight vector.
+          mean_vec = samples[target_ids, true].transpose.dot(bin_y[target_ids]) / n_subsamples
+          weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec - mean_vec)
+          # scale the weight vector.
+          normalize_weight_vec(weight_vec) if @params[:normalize]
+        end
+        split_weight_vec_bias(weight_vec)
+      end
+      def expand_feature(x)
+        Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
+      end
+      def learning_rate(iter)
+        1.0 / (@params[:reg_param] * (iter + 1))
+      end
+      def normalize_weight_vec(weight_vec)
+        norm = Math.sqrt(weight_vec.dot(weight_vec))
+        weight_vec * [1.0, (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)].min
+      end
+      def split_weight_vec_bias(weight_vec)
+        weights = @params[:fit_bias] ? weight_vec[0...-1] : weight_vec
+        bias = @params[:fit_bias] ? weight_vec[-1] : 0.0
+        [weights, bias]
+      end
     end
   end
 end

data/lib/svmkit/multiclass/one_vs_rest_classifier.rb CHANGED Viewed

@@ -4,13 +4,16 @@ require 'svmkit/base/base_estimator.rb'
 require 'svmkit/base/classifier.rb'
 module SVMKit
-  # This module consists of the classes that implement multi-label classification strategy.
+  # This module consists of the classes that implement multi-class classification strategy.
   module Multiclass
-    # OneVsRestClassifier is a class that implements One-vs-Rest (OvR) strategy for multi-label classification.
+    # @note
+    #   All classifier in SVMKit support multi-class classifiction since version 0.2.7.
+    #   There is no need to explicitly use this class for multiclass classifiction.
+    #
+    # OneVsRestClassifier is a class that implements One-vs-Rest (OvR) strategy for multi-class classification.
     #
     # @example
-    #   base_estimator =
-    #    SVMKit::LinearModel::PegasosSVC.new(penalty: 1.0, max_iter: 100, batch_size: 20, random_seed: 1)
+    #   base_estimator = SVMKit::LinearModel::LogisticRegression.new
     #   estimator = SVMKit::Multiclass::OneVsRestClassifier.new(estimator: base_estimator)
     #   estimator.fit(training_samples, training_labels)
     #   results = estimator.predict(testing_samples)
@@ -26,9 +29,9 @@ module SVMKit
       # @return [Numo::Int32] (shape: [n_classes])
       attr_reader :classes
-      # Create a new multi-label classifier with the one-vs-rest startegy.
+      # Create a new multi-class classifier with the one-vs-rest startegy.
       #
-      # @param estimator [Classifier] The (binary) classifier for construction a multi-label classifier.
+      # @param estimator [Classifier] The (binary) classifier for construction a multi-class classifier.
       def initialize(estimator: nil)
         @params = {}
         @params[:estimator] = estimator

data/lib/svmkit/polynomial_model/factorization_machine_classifier.rb CHANGED Viewed

@@ -6,9 +6,9 @@ require 'svmkit/base/classifier'
 module SVMKit
   # This module consists of the classes that implement polynomial models.
   module PolynomialModel
-    # FactorizationMachineClassifier is a class that
-    # implements Fatorization Machine for binary classification
-    # with (mini-batch) stochastic gradient descent optimization.
+    # FactorizationMachineClassifier is a class that implements Factorization Machine
+    # with stochastic gradient descent (SGD) optimization.
+    # For multiclass classification problem, it uses one-vs-the-rest strategy.
     #
     # @example
     #   estimator =
@@ -26,22 +26,26 @@ module SVMKit
       include Base::Classifier
       # Return the factor matrix for Factorization Machine.
-      # @return [Numo::DFloat] (shape: [n_factors, n_features])
+      # @return [Numo::DFloat] (shape: [n_classes, n_factors, n_features])
       attr_reader :factor_mat
       # Return the weight vector for Factorization Machine.
-      # @return [Numo::DFloat] (shape: [n_features])
+      # @return [Numo::DFloat] (shape: [n_classes, n_features])
       attr_reader :weight_vec
       # Return the bias term for Factoriazation Machine.
-      # @return [Float]
+      # @return [Numo::DFloat] (shape: [n_classes])
       attr_reader :bias_term
-      # Return the random generator for transformation.
+      # Return the class labels.
+      # @return [Numo::Int32] (shape: [n_classes])
+      attr_reader :classes
+      # Return the random generator for random sampling.
       # @return [Random]
       attr_reader :rng
-      # Create a new classifier with Support Vector Machine by the Pegasos algorithm.
+      # Create a new classifier with Factorization Machine.
       #
       # @param n_factors [Integer] The maximum number of iterations.
       # @param loss [String] The loss function ('hinge' or 'logistic').
@@ -67,7 +71,8 @@ module SVMKit
         @params[:random_seed] ||= srand
         @factor_mat = nil
         @weight_vec = nil
-        @bias_term = 0.0
+        @bias_term = nil
+        @classes = nil
         @rng = Random.new(@params[:random_seed])
       end
@@ -77,33 +82,27 @@ module SVMKit
       # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
       # @return [FactorizationMachineClassifier] The learned classifier itself.
       def fit(x, y)
-        # Generate binary labels.
-        negative_label = y.to_a.uniq.sort.shift
-        bin_y = y.map { |l| l != negative_label ? 1.0 : -1.0 }
-        # Initialize some variables.
-        n_samples, n_features = x.shape
-        rand_ids = [*0...n_samples].shuffle(random: @rng)
-        @factor_mat = rand_normal([@params[:n_factors], n_features], 0, @params[:init_std])
-        @weight_vec = Numo::DFloat.zeros(n_features)
-        @bias_term = 0.0
-        # Start optimization.
-        @params[:max_iter].times do |t|
-          # Random sampling.
-          subset_ids = rand_ids.shift(@params[:batch_size])
-          rand_ids.concat(subset_ids)
-          data = x[subset_ids, true]
-          label = bin_y[subset_ids]
-          # Calculate gradients for loss function.
-          loss_grad = loss_gradient(data, label)
-          next if loss_grad.ne(0.0).count.zero?
-          # Update each parameter.
-          @bias_term -= learning_rate(@params[:reg_param_bias], t) * bias_gradient(loss_grad)
-          @weight_vec -= learning_rate(@params[:reg_param_weight], t) * weight_gradient(loss_grad, data)
-          @params[:n_factors].times do |n|
-            @factor_mat[n, true] -= learning_rate(@params[:reg_param_factor], t) *
-                                    factor_gradient(loss_grad, data, @factor_mat[n, true])
+        @classes = Numo::Int32[*y.to_a.uniq.sort]
+        n_classes = @classes.size
+        _n_samples, n_features = x.shape
+        if n_classes > 2
+          @factor_mat = Numo::DFloat.zeros(n_classes, @params[:n_factors], n_features)
+          @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
+          @bias_term = Numo::DFloat.zeros(n_classes)
+          n_classes.times do |n|
+            bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
+            factor, weight, bias = binary_fit(x, bin_y)
+            @factor_mat[n, true, true] = factor
+            @weight_vec[n, true] = weight
+            @bias_term[n] = bias
           end
+        else
+          negative_label = y.to_a.uniq.sort.first
+          bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
+          @factor_mat, @weight_vec, @bias_term = binary_fit(x, bin_y)
         end
         self
       end
@@ -112,8 +111,12 @@ module SVMKit
       # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
       # @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
       def decision_function(x)
-        linear_term = @bias_term + x.dot(@weight_vec)
-        factor_term = 0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum
+        linear_term = @bias_term + x.dot(@weight_vec.transpose)
+        factor_term = if @classes.size <= 2
+                        0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum
+                      else
+                        0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(1).transpose
+                      end
         linear_term + factor_term
       end
@@ -122,26 +125,37 @@ module SVMKit
       # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
       # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
       def predict(x)
-        Numo::Int32.cast(decision_function(x).map { |v| v >= 0.0 ? 1 : -1 })
+        return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
+        n_samples, = x.shape
+        decision_values = decision_function(x)
+        Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
       end
       # Predict probability for samples.
-      # Note that this method works normally only if the 'loss' parameter is set to 'logistic'.
       #
       # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
       # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
       def predict_proba(x)
+        proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
+        return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
         n_samples, = x.shape
-        proba = Numo::DFloat.zeros(n_samples, 2)
-        proba[true, 1] = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
-        proba[true, 0] = 1.0 - proba[true, 1]
-        proba
+        probs = Numo::DFloat.zeros(n_samples, 2)
+        probs[true, 1] = proba
+        probs[true, 0] = 1.0 - proba
+        probs
       end
       # Dump marshal data.
       # @return [Hash] The marshal data about FactorizationMachineClassifier
       def marshal_dump
-        { params: @params, factor_mat: @factor_mat, weight_vec: @weight_vec, bias_term: @bias_term, rng: @rng }
+        { params: @params,
+          factor_mat: @factor_mat,
+          weight_vec: @weight_vec,
+          bias_term: @bias_term,
+          classes: @classes,
+          rng: @rng }
       end
       # Load marshal data.
@@ -151,39 +165,76 @@ module SVMKit
         @factor_mat = obj[:factor_mat]
         @weight_vec = obj[:weight_vec]
         @bias_term = obj[:bias_term]
+        @classes = obj[:classes]
         @rng = obj[:rng]
         nil
       end
       private
-      def hinge_loss_gradient(x, y)
-        evaluated = y * decision_function(x)
+      def binary_fit(x, bin_y)
+        # Initialize some variables.
+        n_samples, n_features = x.shape
+        rand_ids = [*0...n_samples].shuffle(random: @rng)
+        factor_mat = rand_normal([@params[:n_factors], n_features], 0, @params[:init_std])
+        weight_vec = Numo::DFloat.zeros(n_features)
+        bias_term = 0.0
+        # Start optimization.
+        @params[:max_iter].times do |t|
+          # Random sampling.
+          subset_ids = rand_ids.shift(@params[:batch_size])
+          rand_ids.concat(subset_ids)
+          data = x[subset_ids, true]
+          label = bin_y[subset_ids]
+          # Calculate gradients for loss function.
+          loss_grad = loss_gradient(data, label, factor_mat, weight_vec, bias_term)
+          next if loss_grad.ne(0.0).count.zero?
+          # Update each parameter.
+          bias_term -= learning_rate(@params[:reg_param_bias], t) * bias_gradient(loss_grad, bias_term)
+          weight_vec -= learning_rate(@params[:reg_param_weight], t) * weight_gradient(loss_grad, data, weight_vec)
+          @params[:n_factors].times do |n|
+            factor_mat[n, true] -= learning_rate(@params[:reg_param_factor], t) *
+                                   factor_gradient(loss_grad, data, factor_mat[n, true])
+          end
+        end
+        [factor_mat, weight_vec, bias_term]
+      end
+      def bin_decision_function(x, factor, weight, bias)
+        bias + x.dot(weight) + 0.5 * (factor.dot(x.transpose)**2 - (factor**2).dot(x.transpose**2)).sum
+      end
+      def hinge_loss_gradient(x, y, factor, weight, bias)
+        evaluated = y * bin_decision_function(x, factor, weight, bias)
         gradient = Numo::DFloat.zeros(evaluated.size)
         gradient[evaluated < 1.0] = -y[evaluated < 1.0]
         gradient
       end
-      def logistic_loss_gradient(x, y)
-        evaluated = y * decision_function(x)
+      def logistic_loss_gradient(x, y, factor, weight, bias)
+        evaluated = y * bin_decision_function(x, factor, weight, bias)
         sigmoid_func = 1.0 / (Numo::NMath.exp(-evaluated) + 1.0)
         (sigmoid_func - 1.0) * y
       end
-      def loss_gradient(x, y)
-        @params[:loss] == 'hinge' ? hinge_loss_gradient(x, y) : logistic_loss_gradient(x, y)
+      def loss_gradient(x, y, factor, weight, bias)
+        if @params[:loss] == 'hinge'
+          hinge_loss_gradient(x, y, factor, weight, bias)
+        else
+          logistic_loss_gradient(x, y, factor, weight, bias)
+        end
       end
       def learning_rate(reg_param, iter)
         1.0 / (reg_param * (iter + 1))
       end
-      def bias_gradient(loss_grad)
-        loss_grad.mean + @params[:reg_param_bias] * @bias_term
+      def bias_gradient(loss_grad, bias)
+        loss_grad.mean + @params[:reg_param_bias] * bias
       end
-      def weight_gradient(loss_grad, data)
-        (loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param_weight] * @weight_vec
+      def weight_gradient(loss_grad, data, weight)
+        (loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param_weight] * weight
       end
       def factor_gradient(loss_grad, data, factor)

data/lib/svmkit/version.rb CHANGED Viewed

@@ -3,5 +3,5 @@
 # SVMKit is a machine learning library in Ruby.
 module SVMKit
   # @!visibility private
-  VERSION = '0.2.6'
+  VERSION = '0.2.7'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: svmkit
 version: !ruby/object:Gem::Version
-  version: 0.2.6
+  version: 0.2.7
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2018-03-11 00:00:00.000000000 Z
+date: 2018-04-01 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: numo-narray