RubyGems - rumale-svm - Versions diffs - 0.8.0 → 0.9.0 - Mend

rumale-svm 0.8.0 → 0.9.0

Files changed (8) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +3 -0
data/LICENSE.txt +1 -1
data/README.md +1 -5
data/lib/rumale/svm/locally_linear_svc.rb +261 -0
data/lib/rumale/svm/version.rb +2 -2
data/lib/rumale/svm.rb +1 -0
metadata +7 -6

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 5611ab7bee24d673f0803b5679532326389a9a485dd5beafeb9d448ab5538363
-  data.tar.gz: 033024146647d6ba5e4acf08eb876dc35e0fe25f141555891dec77e84cd6572b
+  metadata.gz: 8853a60f150df418c30832c66b04f6531a1f3fa16f96ed3cb2286ca88a553dbe
+  data.tar.gz: aded967a3b1f82ded39b07ef3e1f435959f26c8d8392fb212acee3fcd006366f
 SHA512:
-  metadata.gz: b70772fd9480c0de3f28d477bdf35ccae3260f9a9d1156423cac8654caebbc1891a9182e1ee3a4b3b7add8ce885ac06c2e57e8d3ca46a71e9d7de99d46d1c33d
-  data.tar.gz: a38ccc2f952bb1e3fe5c4906e9225bb2d6c5c938b93aa6d08a8b66a2b20f4e94b0d96fe08c86f489050051eb149cdf68b74826b6a5f7439fbab5ee47de2b67e6
+  metadata.gz: 3f2ff48c445b9b5cfc804a17acb3f0c2563512eca01482c81c18fa8e662f9855e45abaede8ab5d7a8b087de84c131f2863f730d0313374b6af97480509fdd18f
+  data.tar.gz: 53747b28e162327cb11a9a5db9343d6baaad09e636909d8149c8ced58ba930a757b6251252968ddc7a7187c651abc56fb0ed046fde3ec6fcdc6a0d718404f420

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,6 @@
+# 0.9.0
+- Add Rumale::SVM::LocallyLinearSVC that is classifier with locally linear support vector machine.
 # 0.8.0
 - Refactor to support the new Rumale API.

data/LICENSE.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-Copyright (c) 2019-2022 Atsushi Tatsuma
+Copyright (c) 2019-2023 Atsushi Tatsuma
 All rights reserved.
 Redistribution and use in source and binary forms, with or without

data/README.md CHANGED Viewed

@@ -5,13 +5,9 @@
 [![BSD 3-Clause License](https://img.shields.io/badge/License-BSD%203--Clause-orange.svg)](https://github.com/yoshoku/rumale-svm/blob/main/LICENSE.txt)
 [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/rumale-svm/doc/)
-Rumale::SVM provides support vector machine algorithms in
+Rumale::SVM provides support vector machine algorithms using
 [LIBSVM](https://www.csie.ntu.edu.tw/~cjlin/libsvm/) and [LIBLINEAR](https://www.csie.ntu.edu.tw/~cjlin/liblinear/)
 with [Rumale](https://github.com/yoshoku/rumale) interface.
-Many machine learning libraries use LIBSVM and LIBLINEAR as background libraries of support vector machine algorithms.
-On the other hand, Rumale implements support vector machine algorithms based on the mini-batch stochastic gradient descent method
-implemented in Ruby.
-Rumale::SVM adds the functions of support vector machine similar to general machine learning libraries to Rumale.
 ## Installation

data/lib/rumale/svm/locally_linear_svc.rb ADDED Viewed

@@ -0,0 +1,261 @@
+# frozen_string_literal: true
+require 'rumale/base/estimator'
+require 'rumale/base/classifier'
+require 'rumale/pairwise_metric'
+require 'rumale/utils'
+require 'rumale/validation'
+module Rumale
+  module SVM
+    # LocallyLinearSVC is a class that implements Locally Linear Support Vector Classifier with the squared hinge loss.
+    # This classifier requires Numo::Linalg (or Numo::TinyLinalg) and Lbfgsb gems,
+    # but they are listed in the runtime dependencies of Rumale::SVM.
+    # Therefore, you should install and load Numo::Linalg and Lbfgsb gems explicitly to use this classifier.
+    #
+    # @example
+    #   require 'numo/linalg/autoloader'
+    #   require 'lbfgsb'
+    #   require 'rumale/svm'
+    #
+    #   estimator = Rumale::SVM::LocallyLinearSVC.new(reg_param: 1.0, n_anchors: 128)
+    #   estimator.fit(training_samples, traininig_labels)
+    #   results = estimator.predict(testing_samples)
+    #
+    # *Reference*
+    # - Ladicky, L., and Torr, P H.S., "Locally Linear Support Vector Machines," Proc. ICML'11, pp. 985--992, 2011.
+    class LocallyLinearSVC < Rumale::Base::Estimator
+      include Rumale::Base::Classifier
+      # Return the class labels.
+      # @return [Numo::Int32] (size: n_classes)
+      attr_reader :classes
+      # Return the anchor vectors.
+      # @return [Numo::DFloat] (shape: [n_anchors, n_features])
+      attr_reader :anchors
+      # Return the weight vector.
+      # @return [Numo::DFloat] (shape: [n_classes, n_anchors, n_features])
+      attr_reader :weight_vec
+      # Return the bias term (a.k.a. intercept).
+      # @return [Numo::DFloat] (shape: [n_classes, n_anchors])
+      attr_reader :bias_term
+      # Create a new classifier with Locally Linear Support Vector Machine.
+      #
+      # @param reg_param [Float] The regularization parameter for weight vector.
+      # @param reg_param_local [Float] The regularization parameter for local coordinate.
+      # @param max_iter [Integer] The maximum number of iterations.
+      # @param tol [Float] The tolerance of termination criterion for finding anchors with k-means algorithm.
+      # @param n_anchors [Integer] The number of anchors.
+      # @param n_neighbors [Integer] The number of neighbors.
+      # @param fit_bias [Boolean] The flag indicating whether to fit bias term.
+      # @param bias_scale [Float] The scale parameter for bias term.
+      # @param random_seed [Integer] The seed value using to initialize the random generator.
+      def initialize(reg_param: 1.0, reg_param_local: 1e-4, max_iter: 100, tol: 1e-4,
+                     n_anchors: 128, n_neighbors: 10, fit_bias: true, bias_scale: 1.0, random_seed: nil)
+        raise 'LocallyLinearSVC requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)
+        super()
+        @params = {
+          reg_param: reg_param,
+          reg_param_local: reg_param_local,
+          max_iter: max_iter,
+          n_anchors: n_anchors,
+          tol: tol,
+          n_neighbors: n_neighbors,
+          fit_bias: fit_bias,
+          bias_scale: bias_scale,
+          random_seed: random_seed || srand
+        }
+        @rng = Random.new(@params[:random_seed])
+      end
+      # Fit the model with given training data.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
+      # @return [LocallyLinearSVC] The learned classifier itself.
+      def fit(x, y)
+        x = Rumale::Validation.check_convert_sample_array(x)
+        y = Rumale::Validation.check_convert_label_array(y)
+        Rumale::Validation.check_sample_size(x, y)
+        raise 'LocallyLinearSVC#fit requires Lbfgsb but that is not loaded' unless defined?(Lbfgsb)
+        @classes = Numo::Int32[*y.to_a.uniq.sort]
+        find_anchors(x)
+        n_samples, n_features = x.shape
+        @coeff = Numo::DFloat.zeros(n_samples, @params[:n_anchors])
+        n_samples.times do |i|
+          xi = x[i, true]
+          @coeff[i, true] = local_coordinates(xi)
+        end
+        x = expand_feature(x) if fit_bias?
+        if multiclass_problem?
+          n_classes = @classes.size
+          @weight_vec = Numo::DFloat.zeros(n_classes, @params[:n_anchors], n_features)
+          @bias_term = Numo::DFloat.zeros(n_classes, @params[:n_anchors])
+          n_classes.times do |n|
+            bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
+            w, b = partial_fit(x, bin_y)
+            @weight_vec[n, true, true] = w
+            @bias_term[n, true] = b
+          end
+        else
+          negative_label = @classes[0]
+          bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
+          @weight_vec, @bias_term = partial_fit(x, bin_y)
+        end
+        self
+      end
+      # Calculate confidence scores for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
+      # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
+      def decision_function(x)
+        x = Rumale::Validation.check_convert_sample_array(x)
+        n_samples = x.shape[0]
+        if multiclass_problem?
+          n_classes = @classes.size
+          df = Numo::DFloat.zeros(n_samples, n_classes)
+          n_samples.times do |i|
+            xi = x[i, true]
+            coeff = local_coordinates(xi)
+            n_classes.times do |j|
+              df[i, j] = coeff.dot(@weight_vec[j, true, true]).dot(xi) + coeff.dot(@bias_term[j, true])
+            end
+          end
+        else
+          df = Numo::DFloat.zeros(n_samples)
+          n_samples.times do |i|
+            xi = x[i, true]
+            coeff = local_coordinates(xi)
+            df[i] = coeff.dot(@weight_vec).dot(xi) + coeff.dot(@bias_term)
+          end
+        end
+        df
+      end
+      # Predict class labels for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
+      # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
+      def predict(x)
+        x = Rumale::Validation.check_convert_sample_array(x)
+        n_samples = x.shape[0]
+        if multiclass_problem?
+          df = decision_function(x)
+          predicted = Array.new(n_samples) { |n| @classes[df[n, true].max_index] }
+        else
+          df = decision_function(x).ge(0.0).to_a
+          predicted = Array.new(n_samples) { |n| @classes[df[n]] }
+        end
+        Numo::Int32.asarray(predicted)
+      end
+      private
+      def partial_fit(base_x, bin_y) # rubocop:disable Metrics/AbcSize
+        fnc = proc do |w, x, y, coeff, reg_param|
+          n_anchors = coeff.shape[1]
+          n_samples, n_features = x.shape
+          w = w.reshape(n_anchors, n_features)
+          z = (coeff * x.dot(w.transpose)).sum(axis: 1)
+          t = 1 - y * z
+          indices = t.gt(0)
+          grad = reg_param * w
+          if indices.count.positive?
+            sx = x[indices, true]
+            sy = y[indices]
+            sc = coeff[indices, true]
+            sz = z[indices]
+            grad += 2.fdiv(n_samples) * (sc.transpose * (sz - sy)).dot(sx)
+          end
+          loss = 0.5 * reg_param * w.dot(w.transpose).trace + (x.class.maximum(0, t)**2).sum.fdiv(n_samples)
+          [loss, grad.reshape(n_anchors * n_features)]
+        end
+        n_features = base_x.shape[1]
+        sub_rng = @rng.dup
+        w_init = 2.0 * ::Rumale::Utils.rand_uniform(@params[:n_anchors] * n_features, sub_rng) - 1.0
+        res = Lbfgsb.minimize(
+          fnc: fnc, jcb: true, x_init: w_init, args: [base_x, bin_y, @coeff, @params[:reg_param]],
+          maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON,
+          verbose: @params[:verbose] ? 1 : -1
+        )
+        w = res[:x].reshape(@params[:n_anchors], n_features)
+        if fit_bias?
+          [w[true, 0...-1].dup, w[true, -1].dup]
+        else
+          [w, Numo::DFloat.zeros(@params[:n_anchors])]
+        end
+      end
+      def local_coordinates(xi)
+        neighbor_ids = find_neighbors(xi)
+        diff = @anchors[neighbor_ids, true] - xi
+        gram_mat = diff.dot(diff.transpose)
+        gram_mat[gram_mat.diag_indices] += @params[:reg_param_local].fdiv(@params[:n_neighbors]) * gram_mat.trace
+        local_coeff = Numo::Linalg.solve(gram_mat, Numo::DFloat.ones(@params[:n_neighbors]))
+        local_coeff /= local_coeff.sum # + 1e-8
+        coeff = Numo::DFloat.zeros(@params[:n_anchors])
+        coeff[neighbor_ids] = local_coeff
+        coeff
+      end
+      def find_neighbors(xi)
+        diff = @anchors - xi
+        dist = (diff**2).sum(axis: 1)
+        dist.sort_index.to_a[0...@params[:n_neighbors]]
+      end
+      def find_anchors(x)
+        n_samples = x.shape[0]
+        sub_rng = @rng.dup
+        rand_id = Array.new(@params[:n_anchors]) { |_v| sub_rng.rand(0...n_samples) }
+        @anchors = x[rand_id, true].dup
+        @params[:max_iter].times do |_t|
+          center_ids = assign_anchors(x)
+          old_anchors = @anchors.dup
+          @params[:n_anchors].times do |n|
+            assigned_bits = center_ids.eq(n)
+            @anchors[n, true] = x[assigned_bits.where, true].mean(axis: 0) if assigned_bits.count.positive?
+          end
+          error = Numo::NMath.sqrt(((old_anchors - @anchors)**2).sum(axis: 1)).mean
+          break if error <= @params[:tol]
+        end
+      end
+      def assign_anchors(x)
+        distance_matrix = ::Rumale::PairwiseMetric.euclidean_distance(x, @anchors)
+        distance_matrix.min_index(axis: 1) - Numo::Int32[*0.step(distance_matrix.size - 1, @anchors.shape[0])]
+      end
+      def fit_bias?
+        @params[:fit_bias] == true
+      end
+      def expand_feature(x)
+        n_samples = x.shape[0]
+        Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * @params[:bias_scale]])
+      end
+      def multiclass_problem?
+        @classes.size > 2
+      end
+    end
+  end
+end

data/lib/rumale/svm/version.rb CHANGED Viewed

@@ -2,9 +2,9 @@
 # Rumale is a machine learning library in Ruby.
 module Rumale
-  # This module consists of Rumale interfaces for suppor vector machine algorithms with LIBSVM and LIBLINEAR.
+  # This module consists of Rumale interfaces for suppor vector machine algorithms using LIBSVM and LIBLINEAR.
   module SVM
     # The version of Rumale::SVM you are using.
-    VERSION = '0.8.0'
+    VERSION = '0.9.0'
   end
 end

data/lib/rumale/svm.rb CHANGED Viewed

@@ -10,3 +10,4 @@ require 'rumale/svm/linear_svc'
 require 'rumale/svm/linear_svr'
 require 'rumale/svm/logistic_regression'
 require 'rumale/svm/linear_one_class_svm'
+require 'rumale/svm/locally_linear_svc'

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rumale-svm
 version: !ruby/object:Gem::Version
-  version: 0.8.0
+  version: 0.9.0
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2023-01-01 00:00:00.000000000 Z
+date: 2023-11-25 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: numo-liblinear
@@ -52,8 +52,8 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: '0.24'
-description: 'Rumale::SVM provides support vector machine algorithms of LIBSVM and
-  LIBLINEAR with Rumale interface.
+description: 'Rumale::SVM provides support vector machine algorithms using LIBSVM
+  and LIBLINEAR with Rumale interface.
   '
 email:
@@ -69,6 +69,7 @@ files:
 - lib/rumale/svm/linear_one_class_svm.rb
 - lib/rumale/svm/linear_svc.rb
 - lib/rumale/svm/linear_svr.rb
+- lib/rumale/svm/locally_linear_svc.rb
 - lib/rumale/svm/logistic_regression.rb
 - lib/rumale/svm/nu_svc.rb
 - lib/rumale/svm/nu_svr.rb
@@ -110,9 +111,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.3.26
+rubygems_version: 3.4.20
 signing_key:
 specification_version: 4
-summary: Rumale::SVM provides support vector machine algorithms of LIBSVM and LIBLINEAR
+summary: Rumale::SVM provides support vector machine algorithms using LIBSVM and LIBLINEAR
   with Rumale interface.
 test_files: []