rumale-svm 0.10.0 → 0.11.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d027826bfde557a8e724b62252182549d71a3bc90ebcfa7b3e5f4a4f915a553e
4
- data.tar.gz: ba0baf950d204dcbf993b41c99ca8a79a63b0ffac87fe5204211c7b56759374e
3
+ metadata.gz: e355b5a28902d6452614e8c667fd5d8ad192320e8b9f32d3410c3ec8bf37cff1
4
+ data.tar.gz: ec0c545c306df2ab8146f105d55c6565ba119efbd7f2c3a276da1f1e0d127727
5
5
  SHA512:
6
- metadata.gz: 65f9a78800033bdbc0354d146cf6150d35aa4924c07164a7bfe578704642d4cde0d49a604fcf5dd75a135462282ee2e18fbcf9157773ce8d827c5a671be6eaa6
7
- data.tar.gz: c3d8da2ad2790c8cc656194c1dd0a083a5dbc364ef3b14c768cf5edcb83449d4ef0d61f3d00d8b358171c492e09f2053bd73c7e3b56418234d996fd70945c23b
6
+ metadata.gz: c864deab7055371b9afc3372ee21cccc81164f8eadf75cdc110657a0dcf2f922fc5e5bfbe332759fbbbb952496df2ab417235e3dbed68b956d0380c8558bc7f3
7
+ data.tar.gz: 31cbab3030f09ab7bf6cef223d374bbffbd4d91bf8be65696e7b3542fecc6efb6626f38dd56e4594034bc4d4b0cc6c0a9b5586d7f680cc797733e490f7c21c02
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ # [[0.11.0](https://github.com/yoshoku/rumale-svm/compare/v0.10.0...v0.11.0)]
2
+ - Add Rumale::SVM::ClusteredSVC that is classifier with clustered support vector machine.
3
+
1
4
  # 0.10.0
2
5
  - Add Rumale::SVM::RandomRecursiveSVC that is classifier with random recursive support vector machine.
3
6
  - Add type declaration files for RandomRecursiveSVC and LocallyLinearSVC.
@@ -0,0 +1,171 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/classifier'
5
+ require 'rumale/pairwise_metric'
6
+ require 'rumale/validation'
7
+ require 'rumale/svm/linear_svc'
8
+
9
+ module Rumale
10
+ module SVM
11
+ # ClusteredSVC is a class that implements Clustered Support Vector Classifier.
12
+ #
13
+ # @example
14
+ # require 'rumale/svm'
15
+ #
16
+ # estimator = Rumale::SVM::ClusteredSVC.new(n_clusters: 16, reg_param_global: 1.0, random_seed: 1)
17
+ # estimator.fit(training_samples, training_labels)
18
+ # results = estimator.predict(testing_samples)
19
+ #
20
+ # *Reference*
21
+ # - Gu, Q., and Han, J., "Clustered Support Vector Machines," In Proc. AISTATS'13, pp. 307--315, 2013.
22
+ class ClusteredSVC < Rumale::Base::Estimator
23
+ include Rumale::Base::Classifier
24
+
25
+ # Return the classifier.
26
+ # @return [LinearSVC]
27
+ attr_reader :model
28
+
29
+ # Return the centroids.
30
+ # @return [Numo::DFloat] (shape: [n_clusters, n_features])
31
+ attr_accessor :cluster_centers
32
+
33
+ # Create a new classifier with Random Recursive Support Vector Machine.
34
+ #
35
+ # @param n_clusters [Integer] The number of clusters.
36
+ # @param reg_param_global [Float] The regularization parameter for global reference vector.
37
+ # @param max_iter_kmeans [Integer] The maximum number of iterations for k-means clustering.
38
+ # @param tol_kmeans [Float] The tolerance of termination criterion for k-means clustering.
39
+ # @param penalty [String] The type of norm used in the penalization ('l2' or 'l1').
40
+ # @param loss [String] The type of loss function ('squared_hinge' or 'hinge').
41
+ # This parameter is ignored if penalty = 'l1'.
42
+ # @param dual [Boolean] The flag indicating whether to solve dual optimization problem.
43
+ # When n_samples > n_features, dual = false is more preferable.
44
+ # This parameter is ignored if loss = 'hinge'.
45
+ # @param reg_param [Float] The regularization parameter.
46
+ # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
47
+ # @param bias_scale [Float] The scale of the bias term.
48
+ # This parameter is ignored if fit_bias = false.
49
+ # @param tol [Float] The tolerance of termination criterion.
50
+ # @param verbose [Boolean] The flag indicating whether to output learning process message
51
+ # @param random_seed [Integer/Nil] The seed value using to initialize the random generator.
52
+ def initialize(n_clusters: 8, reg_param_global: 1.0, max_iter_kmeans: 100, tol_kmeans: 1e-6, # rubocop:disable Metrics/ParameterLists
53
+ penalty: 'l2', loss: 'squared_hinge', dual: true, reg_param: 1.0,
54
+ fit_bias: true, bias_scale: 1.0, tol: 1e-3, verbose: false, random_seed: nil)
55
+ super()
56
+ @params = {
57
+ n_clusters: n_clusters,
58
+ reg_param_global: reg_param_global,
59
+ max_iter_kmeans: max_iter_kmeans,
60
+ tol_kmeans: tol_kmeans,
61
+ penalty: penalty == 'l1' ? 'l1' : 'l2',
62
+ loss: loss == 'hinge' ? 'hinge' : 'squared_hinge',
63
+ dual: dual,
64
+ reg_param: reg_param.to_f,
65
+ fit_bias: fit_bias,
66
+ bias_scale: bias_scale.to_f,
67
+ tol: tol.to_f,
68
+ verbose: verbose,
69
+ random_seed: random_seed || Random.rand(4_294_967_295)
70
+ }
71
+ @rng = Random.new(@params[:random_seed])
72
+ @cluster_centers = nil
73
+ end
74
+
75
+ # Fit the model with given training data.
76
+ #
77
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
78
+ # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
79
+ # @return [ClusteredSVC] The learned classifier itself.
80
+ def fit(x, y)
81
+ z = transform(x)
82
+ @model = LinearSVC.new(**linear_svc_params).fit(z, y)
83
+ self
84
+ end
85
+
86
+ # Calculate confidence scores for samples.
87
+ #
88
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
89
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
90
+ def decision_function(x)
91
+ z = transform(x)
92
+ @model.decision_function(z)
93
+ end
94
+
95
+ # Predict class labels for samples.
96
+ #
97
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
98
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
99
+ def predict(x)
100
+ z = transform(x)
101
+ @model.predict(z)
102
+ end
103
+
104
+ # Transform the given data with the learned model.
105
+ #
106
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
107
+ # @return [Numo::DFloat] (shape: [n_samples, n_features + n_features * n_clusters]) The transformed data.
108
+ def transform(x)
109
+ clustering(x) if @cluster_centers.nil?
110
+
111
+ cluster_ids = assign_cluster_id(x)
112
+
113
+ x = expand_feature(x) if fit_bias?
114
+
115
+ n_samples, n_features = x.shape
116
+ z = Numo::DFloat.zeros(n_samples, n_features * (1 + @params[:n_clusters]))
117
+ z[true, 0...n_features] = 1.fdiv(Math.sqrt(@params[:reg_param_global])) * x
118
+ @params[:n_clusters].times do |n|
119
+ assigned_bits = cluster_ids.eq(n)
120
+ z[assigned_bits.where, n_features * (n + 1)...n_features * (n + 2)] = x[assigned_bits.where, true]
121
+ end
122
+
123
+ z
124
+ end
125
+
126
+ private
127
+
128
+ def linear_svc_params
129
+ @params.reject { |key, _| CLUSTERED_SVC_BINARY_PARAMS.include?(key) }.merge(fit_bias: false)
130
+ end
131
+
132
+ def clustering(x)
133
+ n_samples = x.shape[0]
134
+ sub_rng = @rng.dup
135
+ rand_id = Array.new(@params[:n_clusters]) { |_v| sub_rng.rand(0...n_samples) }
136
+ @cluster_centers = x[rand_id, true].dup
137
+
138
+ @params[:max_iter_kmeans].times do |_t|
139
+ center_ids = assign_cluster_id(x)
140
+ old_centers = @cluster_centers.dup
141
+ @params[:n_clusters].times do |n|
142
+ assigned_bits = center_ids.eq(n)
143
+ @cluster_centers[n, true] = x[assigned_bits.where, true].mean(axis: 0) if assigned_bits.count.positive?
144
+ end
145
+ error = Numo::NMath.sqrt(((old_centers - @cluster_centers)**2).sum(axis: 1)).mean
146
+ break if error <= @params[:tol_kmeans]
147
+ end
148
+ end
149
+
150
+ def assign_cluster_id(x)
151
+ distance_matrix = ::Rumale::PairwiseMetric.euclidean_distance(x, @cluster_centers)
152
+ distance_matrix.min_index(axis: 1) - Numo::Int32[*0.step(distance_matrix.size - 1, @cluster_centers.shape[0])]
153
+ end
154
+
155
+ def expand_feature(x)
156
+ n_samples = x.shape[0]
157
+ Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * @params[:bias_scale]])
158
+ end
159
+
160
+ def fit_bias?
161
+ return false if @params[:fit_bias].nil? || @params[:fit_bias] == false
162
+
163
+ true
164
+ end
165
+
166
+ CLUSTERED_SVC_BINARY_PARAMS = %i[n_clusters reg_param_global max_iter_kmeans tol_kmeans].freeze
167
+
168
+ private_constant :CLUSTERED_SVC_BINARY_PARAMS
169
+ end
170
+ end
171
+ end
@@ -5,6 +5,6 @@ module Rumale
5
5
  # This module consists of Rumale interfaces for suppor vector machine algorithms using LIBSVM and LIBLINEAR.
6
6
  module SVM
7
7
  # The version of Rumale::SVM you are using.
8
- VERSION = '0.10.0'
8
+ VERSION = '0.11.0'
9
9
  end
10
10
  end
data/lib/rumale/svm.rb CHANGED
@@ -12,3 +12,4 @@ require 'rumale/svm/logistic_regression'
12
12
  require 'rumale/svm/linear_one_class_svm'
13
13
  require 'rumale/svm/locally_linear_svc'
14
14
  require 'rumale/svm/random_recursive_svc'
15
+ require 'rumale/svm/clustered_svc'
@@ -0,0 +1,29 @@
1
+ # TypeProf 0.21.8
2
+
3
+ # Classes
4
+ module Rumale
5
+ module SVM
6
+ class ClusteredSVC
7
+ @params: {n_clusters: Integer, reg_param_global: Float, max_iter_kmeans: Integer, tol_kmeans: Float, penalty: String, loss: String, dual: bool, reg_param: Float, fit_bias: bool, bias_scale: Float, tol: Float, verbose: bool, random_seed: Integer}
8
+ @rng: Random
9
+
10
+ attr_reader model: Rumale::SVM::LinearSVC
11
+ attr_accessor cluster_centers: Numo::DFloat
12
+ def initialize: (?n_clusters: Integer, ?reg_param_global: Float, ?max_iter_kmeans: Integer, ?tol_kmeans: Float, ?penalty: String, ?loss: String, ?dual: bool, ?reg_param: Float, ?fit_bias: bool, ?bias_scale: Float, ?tol: Float, ?verbose: bool, ?random_seed: (nil | Integer)) -> void
13
+ def fit: (Numo::DFloat x, Numo::Int32 y) -> ClusteredSVC
14
+ def decision_function: (Numo::DFloat x) -> Numo::DFloat
15
+ def predict: (Numo::DFloat x) -> Numo::Int32
16
+ def transform: (Numo::DFloat x) -> Numo::DFloat
17
+
18
+ private
19
+
20
+ def linear_svc_params: -> (Hash[:bias_scale | :dual | :fit_bias | :loss | :max_iter_kmeans | :n_clusters | :penalty | :random_seed | :reg_param | :reg_param_global | :tol | :tol_kmeans | :verbose, Float | Integer | String | bool])
21
+ def clustering: (Numo::DFloat x) -> void
22
+ def assign_cluster_id: (Numo::DFloat x) -> Numo::Int32
23
+ def expand_feature: (Numo::DFloat x) -> Numo::DFloat
24
+ def fit_bias?: -> bool
25
+
26
+ CLUSTERED_SVC_BINARY_PARAMS: [:n_clusters, :reg_param_global, :max_iter_kmeans, :tol_kmeans]
27
+ end
28
+ end
29
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale-svm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.0
4
+ version: 0.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-12-02 00:00:00.000000000 Z
11
+ date: 2023-12-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-liblinear
@@ -66,6 +66,7 @@ files:
66
66
  - LICENSE.txt
67
67
  - README.md
68
68
  - lib/rumale/svm.rb
69
+ - lib/rumale/svm/clustered_svc.rb
69
70
  - lib/rumale/svm/linear_one_class_svm.rb
70
71
  - lib/rumale/svm/linear_svc.rb
71
72
  - lib/rumale/svm/linear_svr.rb
@@ -79,6 +80,7 @@ files:
79
80
  - lib/rumale/svm/svr.rb
80
81
  - lib/rumale/svm/version.rb
81
82
  - sig/rumale/svm.rbs
83
+ - sig/rumale/svm/clustered_svc.rbs
82
84
  - sig/rumale/svm/linear_one_class_svm.rbs
83
85
  - sig/rumale/svm/linear_svc.rbs
84
86
  - sig/rumale/svm/linear_svr.rbs