RubyGems - rumale-svm - Versions diffs - 0.10.0 → 0.11.0 - Mend

rumale-svm 0.10.0 → 0.11.0

Files changed (7) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +3 -0
data/lib/rumale/svm/clustered_svc.rb +171 -0
data/lib/rumale/svm/version.rb +1 -1
data/lib/rumale/svm.rb +1 -0
data/sig/rumale/svm/clustered_svc.rbs +29 -0
metadata +4 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: d027826bfde557a8e724b62252182549d71a3bc90ebcfa7b3e5f4a4f915a553e
-  data.tar.gz: ba0baf950d204dcbf993b41c99ca8a79a63b0ffac87fe5204211c7b56759374e
+  metadata.gz: e355b5a28902d6452614e8c667fd5d8ad192320e8b9f32d3410c3ec8bf37cff1
+  data.tar.gz: ec0c545c306df2ab8146f105d55c6565ba119efbd7f2c3a276da1f1e0d127727
 SHA512:
-  metadata.gz: 65f9a78800033bdbc0354d146cf6150d35aa4924c07164a7bfe578704642d4cde0d49a604fcf5dd75a135462282ee2e18fbcf9157773ce8d827c5a671be6eaa6
-  data.tar.gz: c3d8da2ad2790c8cc656194c1dd0a083a5dbc364ef3b14c768cf5edcb83449d4ef0d61f3d00d8b358171c492e09f2053bd73c7e3b56418234d996fd70945c23b
+  metadata.gz: c864deab7055371b9afc3372ee21cccc81164f8eadf75cdc110657a0dcf2f922fc5e5bfbe332759fbbbb952496df2ab417235e3dbed68b956d0380c8558bc7f3
+  data.tar.gz: 31cbab3030f09ab7bf6cef223d374bbffbd4d91bf8be65696e7b3542fecc6efb6626f38dd56e4594034bc4d4b0cc6c0a9b5586d7f680cc797733e490f7c21c02

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,6 @@
+# [[0.11.0](https://github.com/yoshoku/rumale-svm/compare/v0.10.0...v0.11.0)]
+- Add Rumale::SVM::ClusteredSVC that is classifier with clustered support vector machine.
 # 0.10.0
 - Add Rumale::SVM::RandomRecursiveSVC that is classifier with random recursive support vector machine.
 - Add type declaration files for RandomRecursiveSVC and LocallyLinearSVC.

data/lib/rumale/svm/clustered_svc.rb ADDED Viewed

@@ -0,0 +1,171 @@
+# frozen_string_literal: true
+require 'rumale/base/estimator'
+require 'rumale/base/classifier'
+require 'rumale/pairwise_metric'
+require 'rumale/validation'
+require 'rumale/svm/linear_svc'
+module Rumale
+  module SVM
+    # ClusteredSVC is a class that implements Clustered Support Vector Classifier.
+    #
+    # @example
+    #   require 'rumale/svm'
+    #
+    #   estimator = Rumale::SVM::ClusteredSVC.new(n_clusters: 16, reg_param_global: 1.0, random_seed: 1)
+    #   estimator.fit(training_samples, training_labels)
+    #   results = estimator.predict(testing_samples)
+    #
+    # *Reference*
+    # - Gu, Q., and Han, J., "Clustered Support Vector Machines," In Proc. AISTATS'13, pp. 307--315, 2013.
+    class ClusteredSVC < Rumale::Base::Estimator
+      include Rumale::Base::Classifier
+      # Return the classifier.
+      # @return [LinearSVC]
+      attr_reader :model
+      # Return the centroids.
+      # @return [Numo::DFloat] (shape: [n_clusters, n_features])
+      attr_accessor :cluster_centers
+      # Create a new classifier with Random Recursive Support Vector Machine.
+      #
+      # @param n_clusters [Integer] The number of clusters.
+      # @param reg_param_global [Float] The regularization parameter for global reference vector.
+      # @param max_iter_kmeans [Integer] The maximum number of iterations for k-means clustering.
+      # @param tol_kmeans [Float] The tolerance of termination criterion for k-means clustering.
+      # @param penalty [String] The type of norm used in the penalization ('l2' or 'l1').
+      # @param loss [String] The type of loss function ('squared_hinge' or 'hinge').
+      #   This parameter is ignored if penalty = 'l1'.
+      # @param dual [Boolean] The flag indicating whether to solve dual optimization problem.
+      #   When n_samples > n_features, dual = false is more preferable.
+      #   This parameter is ignored if loss = 'hinge'.
+      # @param reg_param [Float] The regularization parameter.
+      # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
+      # @param bias_scale [Float] The scale of the bias term.
+      #   This parameter is ignored if fit_bias = false.
+      # @param tol [Float] The tolerance of termination criterion.
+      # @param verbose [Boolean] The flag indicating whether to output learning process message
+      # @param random_seed [Integer/Nil] The seed value using to initialize the random generator.
+      def initialize(n_clusters: 8, reg_param_global: 1.0, max_iter_kmeans: 100, tol_kmeans: 1e-6, # rubocop:disable Metrics/ParameterLists
+                     penalty: 'l2', loss: 'squared_hinge', dual: true, reg_param: 1.0,
+                     fit_bias: true, bias_scale: 1.0, tol: 1e-3, verbose: false, random_seed: nil)
+        super()
+        @params = {
+          n_clusters: n_clusters,
+          reg_param_global: reg_param_global,
+          max_iter_kmeans: max_iter_kmeans,
+          tol_kmeans: tol_kmeans,
+          penalty: penalty == 'l1' ? 'l1' : 'l2',
+          loss: loss == 'hinge' ? 'hinge' : 'squared_hinge',
+          dual: dual,
+          reg_param: reg_param.to_f,
+          fit_bias: fit_bias,
+          bias_scale: bias_scale.to_f,
+          tol: tol.to_f,
+          verbose: verbose,
+          random_seed: random_seed || Random.rand(4_294_967_295)
+        }
+        @rng = Random.new(@params[:random_seed])
+        @cluster_centers = nil
+      end
+      # Fit the model with given training data.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
+      # @return [ClusteredSVC] The learned classifier itself.
+      def fit(x, y)
+        z = transform(x)
+        @model = LinearSVC.new(**linear_svc_params).fit(z, y)
+        self
+      end
+      # Calculate confidence scores for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
+      # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
+      def decision_function(x)
+        z = transform(x)
+        @model.decision_function(z)
+      end
+      # Predict class labels for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
+      # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
+      def predict(x)
+        z = transform(x)
+        @model.predict(z)
+      end
+      # Transform the given data with the learned model.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
+      # @return [Numo::DFloat] (shape: [n_samples, n_features + n_features * n_clusters]) The transformed data.
+      def transform(x)
+        clustering(x) if @cluster_centers.nil?
+        cluster_ids = assign_cluster_id(x)
+        x = expand_feature(x) if fit_bias?
+        n_samples, n_features = x.shape
+        z = Numo::DFloat.zeros(n_samples, n_features * (1 + @params[:n_clusters]))
+        z[true, 0...n_features] = 1.fdiv(Math.sqrt(@params[:reg_param_global])) * x
+        @params[:n_clusters].times do |n|
+          assigned_bits = cluster_ids.eq(n)
+          z[assigned_bits.where, n_features * (n + 1)...n_features * (n + 2)] = x[assigned_bits.where, true]
+        end
+        z
+      end
+      private
+      def linear_svc_params
+        @params.reject { |key, _| CLUSTERED_SVC_BINARY_PARAMS.include?(key) }.merge(fit_bias: false)
+      end
+      def clustering(x)
+        n_samples = x.shape[0]
+        sub_rng = @rng.dup
+        rand_id = Array.new(@params[:n_clusters]) { |_v| sub_rng.rand(0...n_samples) }
+        @cluster_centers = x[rand_id, true].dup
+        @params[:max_iter_kmeans].times do |_t|
+          center_ids = assign_cluster_id(x)
+          old_centers = @cluster_centers.dup
+          @params[:n_clusters].times do |n|
+            assigned_bits = center_ids.eq(n)
+            @cluster_centers[n, true] = x[assigned_bits.where, true].mean(axis: 0) if assigned_bits.count.positive?
+          end
+          error = Numo::NMath.sqrt(((old_centers - @cluster_centers)**2).sum(axis: 1)).mean
+          break if error <= @params[:tol_kmeans]
+        end
+      end
+      def assign_cluster_id(x)
+        distance_matrix = ::Rumale::PairwiseMetric.euclidean_distance(x, @cluster_centers)
+        distance_matrix.min_index(axis: 1) - Numo::Int32[*0.step(distance_matrix.size - 1, @cluster_centers.shape[0])]
+      end
+      def expand_feature(x)
+        n_samples = x.shape[0]
+        Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * @params[:bias_scale]])
+      end
+      def fit_bias?
+        return false if @params[:fit_bias].nil? || @params[:fit_bias] == false
+        true
+      end
+      CLUSTERED_SVC_BINARY_PARAMS = %i[n_clusters reg_param_global max_iter_kmeans tol_kmeans].freeze
+      private_constant :CLUSTERED_SVC_BINARY_PARAMS
+    end
+  end
+end

data/lib/rumale/svm/version.rb CHANGED Viewed

@@ -5,6 +5,6 @@ module Rumale
   # This module consists of Rumale interfaces for suppor vector machine algorithms using LIBSVM and LIBLINEAR.
   module SVM
     # The version of Rumale::SVM you are using.
-    VERSION = '0.10.0'
+    VERSION = '0.11.0'
   end
 end

data/lib/rumale/svm.rb CHANGED Viewed

@@ -12,3 +12,4 @@ require 'rumale/svm/logistic_regression'
 require 'rumale/svm/linear_one_class_svm'
 require 'rumale/svm/locally_linear_svc'
 require 'rumale/svm/random_recursive_svc'
+require 'rumale/svm/clustered_svc'

data/sig/rumale/svm/clustered_svc.rbs ADDED Viewed

@@ -0,0 +1,29 @@
+# TypeProf 0.21.8
+# Classes
+module Rumale
+  module SVM
+    class ClusteredSVC
+      @params: {n_clusters: Integer, reg_param_global: Float, max_iter_kmeans: Integer, tol_kmeans: Float, penalty: String, loss: String, dual: bool, reg_param: Float, fit_bias: bool, bias_scale: Float, tol: Float, verbose: bool, random_seed: Integer}
+      @rng: Random
+      attr_reader model: Rumale::SVM::LinearSVC
+      attr_accessor cluster_centers: Numo::DFloat
+      def initialize: (?n_clusters: Integer, ?reg_param_global: Float, ?max_iter_kmeans: Integer, ?tol_kmeans: Float, ?penalty: String, ?loss: String, ?dual: bool, ?reg_param: Float, ?fit_bias: bool, ?bias_scale: Float, ?tol: Float, ?verbose: bool, ?random_seed: (nil | Integer)) -> void
+      def fit: (Numo::DFloat x, Numo::Int32 y) -> ClusteredSVC
+      def decision_function: (Numo::DFloat x) -> Numo::DFloat
+      def predict: (Numo::DFloat x) -> Numo::Int32
+      def transform: (Numo::DFloat x) -> Numo::DFloat
+      private
+      def linear_svc_params: -> (Hash[:bias_scale | :dual | :fit_bias | :loss | :max_iter_kmeans | :n_clusters | :penalty | :random_seed | :reg_param | :reg_param_global | :tol | :tol_kmeans | :verbose, Float | Integer | String | bool])
+      def clustering: (Numo::DFloat x) -> void
+      def assign_cluster_id: (Numo::DFloat x) -> Numo::Int32
+      def expand_feature: (Numo::DFloat x) -> Numo::DFloat
+      def fit_bias?: -> bool
+      CLUSTERED_SVC_BINARY_PARAMS: [:n_clusters, :reg_param_global, :max_iter_kmeans, :tol_kmeans]
+    end
+  end
+end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rumale-svm
 version: !ruby/object:Gem::Version
-  version: 0.10.0
+  version: 0.11.0
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2023-12-02 00:00:00.000000000 Z
+date: 2023-12-23 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: numo-liblinear
@@ -66,6 +66,7 @@ files:
 - LICENSE.txt
 - README.md
 - lib/rumale/svm.rb
+- lib/rumale/svm/clustered_svc.rb
 - lib/rumale/svm/linear_one_class_svm.rb
 - lib/rumale/svm/linear_svc.rb
 - lib/rumale/svm/linear_svr.rb
@@ -79,6 +80,7 @@ files:
 - lib/rumale/svm/svr.rb
 - lib/rumale/svm/version.rb
 - sig/rumale/svm.rbs
+- sig/rumale/svm/clustered_svc.rbs
 - sig/rumale/svm/linear_one_class_svm.rbs
 - sig/rumale/svm/linear_svc.rbs
 - sig/rumale/svm/linear_svr.rbs