RubyGems - rumale-svm - Versions diffs - 0.1.0 - Mend

rumale-svm 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml +7 -0
data/.coveralls.yml +1 -0
data/.gitignore +18 -0
data/.rspec +3 -0
data/.travis.yml +13 -0
data/CODE_OF_CONDUCT.md +74 -0
data/Gemfile +4 -0
data/LICENSE.txt +27 -0
data/README.md +92 -0
data/Rakefile +6 -0
data/bin/console +14 -0
data/bin/setup +8 -0
data/lib/rumale/svm.rb +11 -0
data/lib/rumale/svm/linear_svc.rb +238 -0
data/lib/rumale/svm/linear_svr.rb +150 -0
data/lib/rumale/svm/logistic_regression.rb +190 -0
data/lib/rumale/svm/nu_svc.rb +193 -0
data/lib/rumale/svm/nu_svr.rb +156 -0
data/lib/rumale/svm/one_class_svm.rb +150 -0
data/lib/rumale/svm/svc.rb +194 -0
data/lib/rumale/svm/svr.rb +160 -0
data/lib/rumale/svm/version.rb +10 -0
data/rumale-svm.gemspec +40 -0
metadata +171 -0

data/lib/rumale/svm/linear_svr.rb ADDED

@@ -0,0 +1,150 @@
+# frozen_string_literal: true
+require 'numo/libsvm'
+require 'rumale/base/base_estimator'
+require 'rumale/base/regressor'
+module Rumale
+  module SVM
+    # LinearSVR is a class that provides Support Vector Regressor in LIBLINEAR with Rumale interface.
+    #
+    # @example
+    #   estimator = Rumale::SVM::LinearSVR.new(reg_param: 1.0, random_seed: 1)
+    #   estimator.fit(training_samples, traininig_target_values)
+    #   results = estimator.predict(testing_samples)
+    class LinearSVR
+      include Base::BaseEstimator
+      include Base::Regressor
+      # Return the weight vector for LinearSVR.
+      # @return [Numo::DFloat] (shape: [n_classes, n_features])
+      attr_reader :weight_vec
+      # Return the bias term (a.k.a. intercept) for LinearSVR.
+      # @return [Numo::DFloat] (shape: [n_classes])
+      attr_reader :bias_term
+      # Create a new regressor with Support Vector Regressor.
+      #
+      # @param loss [String] The type of loss function ('squared_epsilon_insensitive' or 'epsilon_insensitive').
+      # @param dual [Boolean] The flag indicating whether to solve dual optimization problem.
+      #   When n_samples > n_features, dual = false is more preferable.
+      #   This parameter is ignored if loss = 'epsilon_insensitive'.
+      # @param reg_param [Float] The regularization parameter.
+      # @param epsilon [Float] The epsilon parameter in loss function of espsilon-svr.
+      # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
+      # @param bias_scale [Float] The scale of the bias term.
+      #   This parameter is ignored if fit_bias = false.
+      # @param tol [Float] The tolerance of termination criterion.
+      # @param verbose [Boolean] The flag indicating whether to output learning process message
+      # @param random_seed [Integer/Nil] The seed value using to initialize the random generator.
+      def initialize(loss: 'squared_epsilon_insensitive', dual: true, reg_param: 1.0, epsilon: 0.1,
+                     fit_bias: true, bias_scale: 1.0, tol: 1e-3, verbose: false, random_seed: nil)
+        check_params_string(loss: loss)
+        check_params_float(reg_param: reg_param, epsilon: epsilon, bias_scale: bias_scale, tol: tol)
+        check_params_boolean(dual: dual, fit_bias: fit_bias, verbose: verbose)
+        check_params_type_or_nil(Integer, random_seed: random_seed)
+        @params = {}
+        @params[:loss] = loss == 'epsilon_insensitive' ? 'epsilon_insensitive' : 'squared_epsilon_insensitive'
+        @params[:dual] = dual
+        @params[:reg_param] = reg_param
+        @params[:epsilon] = epsilon
+        @params[:fit_bias] = fit_bias
+        @params[:bias_scale] = bias_scale
+        @params[:tol] = tol
+        @params[:verbose] = verbose
+        @params[:random_seed] = random_seed
+        @model = nil
+        @weight_vec = nil
+        @bias_term = nil
+      end
+      # Fit the model with given training data.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @param y [Numo::DFloat] (shape: [n_samples]) The target values to be used for fitting the model.
+      # @return [LinearSVR] The learned regressor itself.
+      def fit(x, y)
+        check_sample_array(x)
+        check_tvalue_array(y)
+        check_sample_tvalue_size(x, y)
+        xx = fit_bias? ? expand_feature(x) : x
+        @model = Numo::Liblinear.train(xx, y, liblinear_params)
+        @weight_vec, @bias_term = weight_and_bias(@model[:w])
+        self
+      end
+      # Predict values for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
+      # @return [Numo::DFloat] (shape: [n_samples]) Predicted value per sample.
+      def predict(x)
+        check_sample_array(x)
+        xx = fit_bias? ? expand_feature(x) : x
+        Numo::Liblinear.predict(xx, liblinear_params, @model)
+      end
+      # Dump marshal data.
+      # @return [Hash] The marshal data about LinearSVR.
+      def marshal_dump
+        { params: @params,
+          model: @model,
+          weight_vec: @weight_vec,
+          bias_term: @bias_term }
+      end
+      # Load marshal data.
+      # @return [nil]
+      def marshal_load(obj)
+        @params = obj[:params]
+        @model = obj[:model]
+        @weight_vec = obj[:weight_vec]
+        @bias_term = obj[:bias_term]
+        nil
+      end
+      private
+      def expand_feature(x)
+        n_samples = x.shape[0]
+        Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * bias_scale])
+      end
+      def weight_and_bias(base_weight)
+        bias_vec = 0.0
+        weight_mat = base_weight.dup
+        if fit_bias?
+          bias_vec = weight_mat[-1]
+          weight_mat = weight_mat[0...-1].dup
+        end
+        [weight_mat, bias_vec]
+      end
+      def liblinear_params
+        res = {}
+        res[:solver_type] = solver_type
+        res[:eps] = @params[:tol]
+        res[:C] = @params[:reg_param]
+        res[:p] = @params[:epsilon]
+        res[:verbose] = @params[:verbose]
+        res[:random_seed] = @params[:random_seed]
+        res
+      end
+      def solver_type
+        return Numo::Liblinear::SolverType::L2R_L1LOSS_SVR_DUAL if @params[:loss] == 'epsilon_insensitive'
+        return Numo::Liblinear::SolverType::L2R_L2LOSS_SVR_DUAL if @params[:dual]
+        Numo::Liblinear::SolverType::L2R_L2LOSS_SVR
+      end
+      def fit_bias?
+        @params[:fit_bias]
+      end
+      def bias_scale
+        @params[:bias_scale]
+      end
+    end
+  end
+end

data/lib/rumale/svm/logistic_regression.rb ADDED

@@ -0,0 +1,190 @@
+# frozen_string_literal: true
+require 'numo/liblinear'
+require 'rumale/base/base_estimator'
+require 'rumale/base/classifier'
+module Rumale
+  module SVM
+    # LogisticRegression is a class that provides Logistic Regression in LIBLINEAR with Rumale interface
+    #
+    # @example
+    #   estimator = Rumale::SVM::LogisticRegression.new(penalty: 'l2', dual: false, reg_param: 1.0, random_seed: 1)
+    #   estimator.fit(training_samples, traininig_labels)
+    #   results = estimator.predict(testing_samples)
+    class LogisticRegression
+      include Base::BaseEstimator
+      include Base::Classifier
+      # Return the weight vector for LogisticRegression.
+      # @return [Numo::DFloat] (shape: [n_classes, n_features])
+      attr_reader :weight_vec
+      # Return the bias term (a.k.a. intercept) for LogisticRegression.
+      # @return [Numo::DFloat] (shape: [n_classes])
+      attr_reader :bias_term
+      # Create a new classifier with Logistic Regression.
+      #
+      # @param penalty [String] The type of norm used in the penalization ('l2' or 'l1').
+      # @param dual [Boolean] The flag indicating whether to solve dual optimization problem.
+      #   When n_samples > n_features, dual = false is more preferable.
+      #   This parameter is ignored if penalty = 'l1'.
+      # @param reg_param [Float] The regularization parameter.
+      # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
+      # @param bias_scale [Float] The scale of the bias term.
+      #   This parameter is ignored if fit_bias = false.
+      # @param tol [Float] The tolerance of termination criterion.
+      # @param verbose [Boolean] The flag indicating whether to output learning process message
+      # @param random_seed [Integer/Nil] The seed value using to initialize the random generator.
+      def initialize(penalty: 'l2', dual: true, reg_param: 1.0,
+                     fit_bias: true, bias_scale: 1.0,
+                     tol: 1e-3, verbose: false, random_seed: nil)
+        check_params_string(penalty: penalty)
+        check_params_float(reg_param: reg_param, bias_scale: bias_scale, tol: tol)
+        check_params_boolean(dual: dual, fit_bias: fit_bias, verbose: verbose)
+        check_params_type_or_nil(Integer, random_seed: random_seed)
+        @params = {}
+        @params[:penalty] = penalty == 'l1' ? 'l1' : 'l2'
+        @params[:dual] = dual
+        @params[:reg_param] = reg_param
+        @params[:fit_bias] = fit_bias
+        @params[:bias_scale] = bias_scale
+        @params[:tol] = tol
+        @params[:verbose] = verbose
+        @params[:random_seed] = random_seed
+        @model = nil
+        @weight_vec = nil
+        @bias_term = nil
+      end
+      # Fit the model with given training data.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
+      # @return [LogisticRegression] The learned classifier itself.
+      def fit(x, y)
+        check_sample_array(x)
+        check_label_array(y)
+        check_sample_label_size(x, y)
+        xx = fit_bias? ? expand_feature(x) : x
+        @model = Numo::Liblinear.train(xx, y, liblinear_params)
+        @weight_vec, @bias_term = weight_and_bias(@model[:w])
+        self
+      end
+      # Calculate confidence scores for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
+      # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
+      def decision_function(x)
+        check_sample_array(x)
+        xx = fit_bias? ? expand_feature(x) : x
+        Numo::Liblinear.decision_function(xx, liblinear_params, @model)
+      end
+      # Predict class labels for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
+      # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
+      def predict(x)
+        check_sample_array(x)
+        xx = fit_bias? ? expand_feature(x) : x
+        Numo::Int32.cast(Numo::Liblinear.predict(xx, liblinear_params, @model))
+      end
+      # Predict class probability for samples.
+      # This method works correctly only if the probability parameter is true.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
+      # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
+      def predict_proba(x)
+        check_sample_array(x)
+        xx = fit_bias? ? expand_feature(x) : x
+        Numo::Liblinear.predict_proba(xx, liblinear_params, @model)
+      end
+      # Dump marshal data.
+      # @return [Hash] The marshal data about LogisticRegression.
+      def marshal_dump
+        { params: @params,
+          model: @model,
+          weight_vec: @weight_vec,
+          bias_term: @bias_term }
+      end
+      # Load marshal data.
+      # @return [nil]
+      def marshal_load(obj)
+        @params = obj[:params]
+        @model = obj[:model]
+        @weight_vec = obj[:weight_vec]
+        @bias_term = obj[:bias_term]
+        nil
+      end
+      private
+      def expand_feature(x)
+        n_samples = x.shape[0]
+        Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * bias_scale])
+      end
+      def weight_and_bias(base_weight)
+        if binary_class?
+          bias_vec = 0.0
+          weight_mat = base_weight.dup
+          if fit_bias?
+            bias_vec = weight_mat[-1]
+            weight_mat = weight_mat[0...-1].dup
+          end
+        else
+          bias_vec = Numo::DFloat.zeros(n_classes)
+          weight_mat = base_weight.reshape(n_features, n_classes).transpose.dup
+          if fit_bias?
+            bias_vec = weight_mat[true, -1].dup
+            weight_mat = weight_mat[true, 0...-1].dup
+          end
+        end
+        [weight_mat, bias_vec]
+      end
+      def liblinear_params
+        res = {}
+        res[:solver_type] = solver_type
+        res[:eps] = @params[:tol]
+        res[:C] = @params[:reg_param]
+        res[:verbose] = @params[:verbose]
+        res[:random_seed] = @params[:random_seed]
+        res
+      end
+      def solver_type
+        return Numo::Liblinear::SolverType::L1R_LR if @params[:penalty] == 'l1'
+        return Numo::Liblinear::SolverType::L2R_LR_DUAL if @params[:dual]
+        Numo::Liblinear::SolverType::L2R_LR
+      end
+      def binary_class?
+        @model[:nr_class] == 2
+      end
+      def fit_bias?
+        @params[:fit_bias]
+      end
+      def bias_scale
+        @params[:bias_scale]
+      end
+      def n_classes
+        @model[:nr_class]
+      end
+      def n_features
+        @model[:nr_feature]
+      end
+    end
+  end
+end

data/lib/rumale/svm/nu_svc.rb ADDED

@@ -0,0 +1,193 @@
+# frozen_string_literal: true
+require 'numo/libsvm'
+require 'rumale/base/base_estimator'
+require 'rumale/base/classifier'
+module Rumale
+  module SVM
+    # NuSVC is a class that provides Kernel Nu-Support Vector Classifier in LIBSVM with Rumale interface.
+    #
+    # @example
+    #   estimator = Rumale::SVM::NuSVC.new(nu: 0.5, kernel: 'rbf', gamma: 10.0, random_seed: 1)
+    #   estimator.fit(training_samples, traininig_labels)
+    #   results = estimator.predict(testing_samples)
+    class NuSVC
+      include Base::BaseEstimator
+      include Base::Classifier
+      # Create a new classifier with Kernel Nu-Support Vector Classifier.
+      #
+      # @param nu [Float] The regularization parameter. The interval of nu is (0, 1].
+      # @param kernel [String] The type of kernel function ('rbf', 'linear', 'poly', 'sigmoid', and 'precomputed').
+      # @param degree [Integer] The degree parameter in polynomial kernel function.
+      # @param gamma [Float] The gamma parameter in rbf/poly/sigmoid kernel function.
+      # @param coef0 [Float] The coefficient in poly/sigmoid kernel function.
+      # @param shrinking [Boolean] The flag indicating whether to use the shrinking heuristics.
+      # @param probability [Boolean] The flag indicating whether to train the parameter for probability estimation.
+      # @param cache_size [Float] The cache memory size in MB.
+      # @param tol [Float] The tolerance of termination criterion.
+      # @param verbose [Boolean] The flag indicating whether to output learning process message
+      # @param random_seed [Integer/Nil] The seed value using to initialize the random generator.
+      def initialize(nu: 0.5, kernel: 'rbf', degree: 3, gamma: 1.0, coef0: 0.0,
+                     shrinking: true, probability: true, cache_size: 200.0, tol: 1e-3, verbose: false, random_seed: nil)
+        check_params_float(nu: nu, gamma: gamma, coef0: coef0, cache_size: cache_size, tol: tol)
+        check_params_integer(degree: degree)
+        check_params_boolean(shrinking: shrinking, probability: probability, verbose: verbose)
+        check_params_type_or_nil(Integer, random_seed: random_seed)
+        @params = {}
+        @params[:nu] = nu
+        @params[:kernel] = kernel
+        @params[:degree] = degree
+        @params[:gamma] = gamma
+        @params[:coef0] = coef0
+        @params[:shrinking] = shrinking
+        @params[:probability] = probability
+        @params[:cache_size] = cache_size
+        @params[:tol] = tol
+        @params[:verbose] = verbose
+        @params[:random_seed] = random_seed
+        @model = nil
+      end
+      # Fit the model with given training data.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      #   If the kernel is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
+      # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
+      # @return [NuSVC] The learned classifier itself.
+      def fit(x, y)
+        check_sample_array(x)
+        check_label_array(y)
+        check_sample_label_size(x, y)
+        xx = precomputed_kernel? ? add_index_col(x) : x
+        @model = Numo::Libsvm.train(xx, y, libsvm_params)
+        self
+      end
+      # Calculate confidence scores for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
+      #   If the kernel is 'precomputed', the shape of x must be [n_samples, n_training_samples].
+      # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
+      def decision_function(x)
+        check_sample_array(x)
+        xx = precomputed_kernel? ? add_index_col(x) : x
+        Numo::Libsvm.decision_function(xx, libsvm_params, @model)
+      end
+      # Predict class labels for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
+      #   If the kernel is 'precomputed', the shape of x must be [n_samples, n_training_samples].
+      # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
+      def predict(x)
+        check_sample_array(x)
+        xx = precomputed_kernel? ? add_index_col(x) : x
+        Numo::Int32.cast(Numo::Libsvm.predict(xx, libsvm_params, @model))
+      end
+      # Predict class probability for samples.
+      # This method works correctly only if the probability parameter is true.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
+      #   If the kernel is 'precomputed', the shape of x must be [n_samples, n_training_samples].
+      # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
+      def predict_proba(x)
+        check_sample_array(x)
+        xx = precomputed_kernel? ? add_index_col(x) : x
+        Numo::Libsvm.predict_proba(xx, libsvm_params, @model)
+      end
+      # Dump marshal data.
+      # @return [Hash] The marshal data about NuSVC.
+      def marshal_dump
+        { params: @params,
+          model: @model }
+      end
+      # Load marshal data.
+      # @return [nil]
+      def marshal_load(obj)
+        @params = obj[:params]
+        @model = obj[:model]
+        nil
+      end
+      # Return the indices of support vectors.
+      # @return [Numo::Int32] (shape: [n_support_vectors])
+      def support
+        @model[:sv_indices]
+      end
+      # Return the support_vectors.
+      # @return [Numo::DFloat] (shape: [n_support_vectors, n_features])
+      def support_vectors
+        precomputed_kernel? ? del_index_col(@model[:SV]) : @model[:SV]
+      end
+      # Return the number of support vectors for each class.
+      # @return [Numo::Int32] (shape: [n_classes])
+      def n_support
+        @model[:nSV]
+      end
+      # Return the coefficients of the support vector in decision function.
+      # @return [Numo::DFloat] (shape: [n_classes - 1, n_support_vectors])
+      def duel_coef
+        @model[:sv_coef]
+      end
+      # Return the intercepts in decision function.
+      # @return [Numo::DFloat] (shape: [n_classes * (n_classes - 1) / 2])
+      def intercept
+        @model[:rho]
+      end
+      # Return the probability parameter alpha.
+      # @return [Numo::DFloat] (shape: [n_classes * (n_classes - 1) / 2])
+      def prob_a
+        @model[:probA]
+      end
+      # Return the probability parameter beta.
+      # @return [Numo::DFloat] (shape: [n_classes * (n_classes - 1) / 2])
+      def prob_b
+        @model[:probB]
+      end
+      private
+      def add_index_col(x)
+        idx = Numo::Int32.new(x.shape[0]).seq + 1
+        Numo::NArray.hstack([idx.expand_dims(1), x])
+      end
+      def del_index_col(x)
+        x[true, 1..-1].dup
+      end
+      def precomputed_kernel?
+        @params[:kernel] == 'precomputed'
+      end
+      def libsvm_params
+        res = @params.merge(svm_type: Numo::Libsvm::SvmType::C_SVC)
+        res[:kernel_type] = case res.delete(:kernel)
+                            when 'linear'
+                              Numo::Libsvm::KernelType::LINEAR
+                            when 'poly'
+                              Numo::Libsvm::KernelType::POLY
+                            when 'sigmoid'
+                              Numo::Libsvm::KernelType::SIGMOID
+                            when 'precomputed'
+                              Numo::Libsvm::KernelType::PRECOMPUTED
+                            else
+                              Numo::Libsvm::KernelType::RBF
+                            end
+        res[:eps] = res.delete(:tol)
+        res
+      end
+    end
+  end
+end