rumale-svm 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,150 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'numo/libsvm'
4
+ require 'rumale/base/base_estimator'
5
+ require 'rumale/base/regressor'
6
+
7
+ module Rumale
8
+ module SVM
9
+ # LinearSVR is a class that provides Support Vector Regressor in LIBLINEAR with Rumale interface.
10
+ #
11
+ # @example
12
+ # estimator = Rumale::SVM::LinearSVR.new(reg_param: 1.0, random_seed: 1)
13
+ # estimator.fit(training_samples, traininig_target_values)
14
+ # results = estimator.predict(testing_samples)
15
+ class LinearSVR
16
+ include Base::BaseEstimator
17
+ include Base::Regressor
18
+
19
+ # Return the weight vector for LinearSVR.
20
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
21
+ attr_reader :weight_vec
22
+
23
+ # Return the bias term (a.k.a. intercept) for LinearSVR.
24
+ # @return [Numo::DFloat] (shape: [n_classes])
25
+ attr_reader :bias_term
26
+
27
+ # Create a new regressor with Support Vector Regressor.
28
+ #
29
+ # @param loss [String] The type of loss function ('squared_epsilon_insensitive' or 'epsilon_insensitive').
30
+ # @param dual [Boolean] The flag indicating whether to solve dual optimization problem.
31
+ # When n_samples > n_features, dual = false is more preferable.
32
+ # This parameter is ignored if loss = 'epsilon_insensitive'.
33
+ # @param reg_param [Float] The regularization parameter.
34
+ # @param epsilon [Float] The epsilon parameter in loss function of espsilon-svr.
35
+ # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
36
+ # @param bias_scale [Float] The scale of the bias term.
37
+ # This parameter is ignored if fit_bias = false.
38
+ # @param tol [Float] The tolerance of termination criterion.
39
+ # @param verbose [Boolean] The flag indicating whether to output learning process message
40
+ # @param random_seed [Integer/Nil] The seed value using to initialize the random generator.
41
+ def initialize(loss: 'squared_epsilon_insensitive', dual: true, reg_param: 1.0, epsilon: 0.1,
42
+ fit_bias: true, bias_scale: 1.0, tol: 1e-3, verbose: false, random_seed: nil)
43
+ check_params_string(loss: loss)
44
+ check_params_float(reg_param: reg_param, epsilon: epsilon, bias_scale: bias_scale, tol: tol)
45
+ check_params_boolean(dual: dual, fit_bias: fit_bias, verbose: verbose)
46
+ check_params_type_or_nil(Integer, random_seed: random_seed)
47
+ @params = {}
48
+ @params[:loss] = loss == 'epsilon_insensitive' ? 'epsilon_insensitive' : 'squared_epsilon_insensitive'
49
+ @params[:dual] = dual
50
+ @params[:reg_param] = reg_param
51
+ @params[:epsilon] = epsilon
52
+ @params[:fit_bias] = fit_bias
53
+ @params[:bias_scale] = bias_scale
54
+ @params[:tol] = tol
55
+ @params[:verbose] = verbose
56
+ @params[:random_seed] = random_seed
57
+ @model = nil
58
+ @weight_vec = nil
59
+ @bias_term = nil
60
+ end
61
+
62
+ # Fit the model with given training data.
63
+ #
64
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
65
+ # @param y [Numo::DFloat] (shape: [n_samples]) The target values to be used for fitting the model.
66
+ # @return [LinearSVR] The learned regressor itself.
67
+ def fit(x, y)
68
+ check_sample_array(x)
69
+ check_tvalue_array(y)
70
+ check_sample_tvalue_size(x, y)
71
+ xx = fit_bias? ? expand_feature(x) : x
72
+ @model = Numo::Liblinear.train(xx, y, liblinear_params)
73
+ @weight_vec, @bias_term = weight_and_bias(@model[:w])
74
+ self
75
+ end
76
+
77
+ # Predict values for samples.
78
+ #
79
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
80
+ # @return [Numo::DFloat] (shape: [n_samples]) Predicted value per sample.
81
+ def predict(x)
82
+ check_sample_array(x)
83
+ xx = fit_bias? ? expand_feature(x) : x
84
+ Numo::Liblinear.predict(xx, liblinear_params, @model)
85
+ end
86
+
87
+ # Dump marshal data.
88
+ # @return [Hash] The marshal data about LinearSVR.
89
+ def marshal_dump
90
+ { params: @params,
91
+ model: @model,
92
+ weight_vec: @weight_vec,
93
+ bias_term: @bias_term }
94
+ end
95
+
96
+ # Load marshal data.
97
+ # @return [nil]
98
+ def marshal_load(obj)
99
+ @params = obj[:params]
100
+ @model = obj[:model]
101
+ @weight_vec = obj[:weight_vec]
102
+ @bias_term = obj[:bias_term]
103
+ nil
104
+ end
105
+
106
+ private
107
+
108
+ def expand_feature(x)
109
+ n_samples = x.shape[0]
110
+ Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * bias_scale])
111
+ end
112
+
113
+ def weight_and_bias(base_weight)
114
+ bias_vec = 0.0
115
+ weight_mat = base_weight.dup
116
+ if fit_bias?
117
+ bias_vec = weight_mat[-1]
118
+ weight_mat = weight_mat[0...-1].dup
119
+ end
120
+ [weight_mat, bias_vec]
121
+ end
122
+
123
+ def liblinear_params
124
+ res = {}
125
+ res[:solver_type] = solver_type
126
+ res[:eps] = @params[:tol]
127
+ res[:C] = @params[:reg_param]
128
+ res[:p] = @params[:epsilon]
129
+ res[:verbose] = @params[:verbose]
130
+ res[:random_seed] = @params[:random_seed]
131
+ res
132
+ end
133
+
134
+ def solver_type
135
+ return Numo::Liblinear::SolverType::L2R_L1LOSS_SVR_DUAL if @params[:loss] == 'epsilon_insensitive'
136
+ return Numo::Liblinear::SolverType::L2R_L2LOSS_SVR_DUAL if @params[:dual]
137
+
138
+ Numo::Liblinear::SolverType::L2R_L2LOSS_SVR
139
+ end
140
+
141
+ def fit_bias?
142
+ @params[:fit_bias]
143
+ end
144
+
145
+ def bias_scale
146
+ @params[:bias_scale]
147
+ end
148
+ end
149
+ end
150
+ end
@@ -0,0 +1,190 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'numo/liblinear'
4
+ require 'rumale/base/base_estimator'
5
+ require 'rumale/base/classifier'
6
+
7
+ module Rumale
8
+ module SVM
9
+ # LogisticRegression is a class that provides Logistic Regression in LIBLINEAR with Rumale interface
10
+ #
11
+ # @example
12
+ # estimator = Rumale::SVM::LogisticRegression.new(penalty: 'l2', dual: false, reg_param: 1.0, random_seed: 1)
13
+ # estimator.fit(training_samples, traininig_labels)
14
+ # results = estimator.predict(testing_samples)
15
+ class LogisticRegression
16
+ include Base::BaseEstimator
17
+ include Base::Classifier
18
+
19
+ # Return the weight vector for LogisticRegression.
20
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
21
+ attr_reader :weight_vec
22
+
23
+ # Return the bias term (a.k.a. intercept) for LogisticRegression.
24
+ # @return [Numo::DFloat] (shape: [n_classes])
25
+ attr_reader :bias_term
26
+
27
+ # Create a new classifier with Logistic Regression.
28
+ #
29
+ # @param penalty [String] The type of norm used in the penalization ('l2' or 'l1').
30
+ # @param dual [Boolean] The flag indicating whether to solve dual optimization problem.
31
+ # When n_samples > n_features, dual = false is more preferable.
32
+ # This parameter is ignored if penalty = 'l1'.
33
+ # @param reg_param [Float] The regularization parameter.
34
+ # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
35
+ # @param bias_scale [Float] The scale of the bias term.
36
+ # This parameter is ignored if fit_bias = false.
37
+ # @param tol [Float] The tolerance of termination criterion.
38
+ # @param verbose [Boolean] The flag indicating whether to output learning process message
39
+ # @param random_seed [Integer/Nil] The seed value using to initialize the random generator.
40
+ def initialize(penalty: 'l2', dual: true, reg_param: 1.0,
41
+ fit_bias: true, bias_scale: 1.0,
42
+ tol: 1e-3, verbose: false, random_seed: nil)
43
+ check_params_string(penalty: penalty)
44
+ check_params_float(reg_param: reg_param, bias_scale: bias_scale, tol: tol)
45
+ check_params_boolean(dual: dual, fit_bias: fit_bias, verbose: verbose)
46
+ check_params_type_or_nil(Integer, random_seed: random_seed)
47
+ @params = {}
48
+ @params[:penalty] = penalty == 'l1' ? 'l1' : 'l2'
49
+ @params[:dual] = dual
50
+ @params[:reg_param] = reg_param
51
+ @params[:fit_bias] = fit_bias
52
+ @params[:bias_scale] = bias_scale
53
+ @params[:tol] = tol
54
+ @params[:verbose] = verbose
55
+ @params[:random_seed] = random_seed
56
+ @model = nil
57
+ @weight_vec = nil
58
+ @bias_term = nil
59
+ end
60
+
61
+ # Fit the model with given training data.
62
+ #
63
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
64
+ # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
65
+ # @return [LogisticRegression] The learned classifier itself.
66
+ def fit(x, y)
67
+ check_sample_array(x)
68
+ check_label_array(y)
69
+ check_sample_label_size(x, y)
70
+ xx = fit_bias? ? expand_feature(x) : x
71
+ @model = Numo::Liblinear.train(xx, y, liblinear_params)
72
+ @weight_vec, @bias_term = weight_and_bias(@model[:w])
73
+ self
74
+ end
75
+
76
+ # Calculate confidence scores for samples.
77
+ #
78
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
79
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
80
+ def decision_function(x)
81
+ check_sample_array(x)
82
+ xx = fit_bias? ? expand_feature(x) : x
83
+ Numo::Liblinear.decision_function(xx, liblinear_params, @model)
84
+ end
85
+
86
+ # Predict class labels for samples.
87
+ #
88
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
89
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
90
+ def predict(x)
91
+ check_sample_array(x)
92
+ xx = fit_bias? ? expand_feature(x) : x
93
+ Numo::Int32.cast(Numo::Liblinear.predict(xx, liblinear_params, @model))
94
+ end
95
+
96
+ # Predict class probability for samples.
97
+ # This method works correctly only if the probability parameter is true.
98
+ #
99
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
100
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
101
+ def predict_proba(x)
102
+ check_sample_array(x)
103
+ xx = fit_bias? ? expand_feature(x) : x
104
+ Numo::Liblinear.predict_proba(xx, liblinear_params, @model)
105
+ end
106
+
107
+ # Dump marshal data.
108
+ # @return [Hash] The marshal data about LogisticRegression.
109
+ def marshal_dump
110
+ { params: @params,
111
+ model: @model,
112
+ weight_vec: @weight_vec,
113
+ bias_term: @bias_term }
114
+ end
115
+
116
+ # Load marshal data.
117
+ # @return [nil]
118
+ def marshal_load(obj)
119
+ @params = obj[:params]
120
+ @model = obj[:model]
121
+ @weight_vec = obj[:weight_vec]
122
+ @bias_term = obj[:bias_term]
123
+ nil
124
+ end
125
+
126
+ private
127
+
128
+ def expand_feature(x)
129
+ n_samples = x.shape[0]
130
+ Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * bias_scale])
131
+ end
132
+
133
+ def weight_and_bias(base_weight)
134
+ if binary_class?
135
+ bias_vec = 0.0
136
+ weight_mat = base_weight.dup
137
+ if fit_bias?
138
+ bias_vec = weight_mat[-1]
139
+ weight_mat = weight_mat[0...-1].dup
140
+ end
141
+ else
142
+ bias_vec = Numo::DFloat.zeros(n_classes)
143
+ weight_mat = base_weight.reshape(n_features, n_classes).transpose.dup
144
+ if fit_bias?
145
+ bias_vec = weight_mat[true, -1].dup
146
+ weight_mat = weight_mat[true, 0...-1].dup
147
+ end
148
+ end
149
+ [weight_mat, bias_vec]
150
+ end
151
+
152
+ def liblinear_params
153
+ res = {}
154
+ res[:solver_type] = solver_type
155
+ res[:eps] = @params[:tol]
156
+ res[:C] = @params[:reg_param]
157
+ res[:verbose] = @params[:verbose]
158
+ res[:random_seed] = @params[:random_seed]
159
+ res
160
+ end
161
+
162
+ def solver_type
163
+ return Numo::Liblinear::SolverType::L1R_LR if @params[:penalty] == 'l1'
164
+ return Numo::Liblinear::SolverType::L2R_LR_DUAL if @params[:dual]
165
+
166
+ Numo::Liblinear::SolverType::L2R_LR
167
+ end
168
+
169
+ def binary_class?
170
+ @model[:nr_class] == 2
171
+ end
172
+
173
+ def fit_bias?
174
+ @params[:fit_bias]
175
+ end
176
+
177
+ def bias_scale
178
+ @params[:bias_scale]
179
+ end
180
+
181
+ def n_classes
182
+ @model[:nr_class]
183
+ end
184
+
185
+ def n_features
186
+ @model[:nr_feature]
187
+ end
188
+ end
189
+ end
190
+ end
@@ -0,0 +1,193 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'numo/libsvm'
4
+ require 'rumale/base/base_estimator'
5
+ require 'rumale/base/classifier'
6
+
7
+ module Rumale
8
+ module SVM
9
+ # NuSVC is a class that provides Kernel Nu-Support Vector Classifier in LIBSVM with Rumale interface.
10
+ #
11
+ # @example
12
+ # estimator = Rumale::SVM::NuSVC.new(nu: 0.5, kernel: 'rbf', gamma: 10.0, random_seed: 1)
13
+ # estimator.fit(training_samples, traininig_labels)
14
+ # results = estimator.predict(testing_samples)
15
+ class NuSVC
16
+ include Base::BaseEstimator
17
+ include Base::Classifier
18
+
19
+ # Create a new classifier with Kernel Nu-Support Vector Classifier.
20
+ #
21
+ # @param nu [Float] The regularization parameter. The interval of nu is (0, 1].
22
+ # @param kernel [String] The type of kernel function ('rbf', 'linear', 'poly', 'sigmoid', and 'precomputed').
23
+ # @param degree [Integer] The degree parameter in polynomial kernel function.
24
+ # @param gamma [Float] The gamma parameter in rbf/poly/sigmoid kernel function.
25
+ # @param coef0 [Float] The coefficient in poly/sigmoid kernel function.
26
+ # @param shrinking [Boolean] The flag indicating whether to use the shrinking heuristics.
27
+ # @param probability [Boolean] The flag indicating whether to train the parameter for probability estimation.
28
+ # @param cache_size [Float] The cache memory size in MB.
29
+ # @param tol [Float] The tolerance of termination criterion.
30
+ # @param verbose [Boolean] The flag indicating whether to output learning process message
31
+ # @param random_seed [Integer/Nil] The seed value using to initialize the random generator.
32
+ def initialize(nu: 0.5, kernel: 'rbf', degree: 3, gamma: 1.0, coef0: 0.0,
33
+ shrinking: true, probability: true, cache_size: 200.0, tol: 1e-3, verbose: false, random_seed: nil)
34
+ check_params_float(nu: nu, gamma: gamma, coef0: coef0, cache_size: cache_size, tol: tol)
35
+ check_params_integer(degree: degree)
36
+ check_params_boolean(shrinking: shrinking, probability: probability, verbose: verbose)
37
+ check_params_type_or_nil(Integer, random_seed: random_seed)
38
+ @params = {}
39
+ @params[:nu] = nu
40
+ @params[:kernel] = kernel
41
+ @params[:degree] = degree
42
+ @params[:gamma] = gamma
43
+ @params[:coef0] = coef0
44
+ @params[:shrinking] = shrinking
45
+ @params[:probability] = probability
46
+ @params[:cache_size] = cache_size
47
+ @params[:tol] = tol
48
+ @params[:verbose] = verbose
49
+ @params[:random_seed] = random_seed
50
+ @model = nil
51
+ end
52
+
53
+ # Fit the model with given training data.
54
+ #
55
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
56
+ # If the kernel is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
57
+ # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
58
+ # @return [NuSVC] The learned classifier itself.
59
+ def fit(x, y)
60
+ check_sample_array(x)
61
+ check_label_array(y)
62
+ check_sample_label_size(x, y)
63
+ xx = precomputed_kernel? ? add_index_col(x) : x
64
+ @model = Numo::Libsvm.train(xx, y, libsvm_params)
65
+ self
66
+ end
67
+
68
+ # Calculate confidence scores for samples.
69
+ #
70
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
71
+ # If the kernel is 'precomputed', the shape of x must be [n_samples, n_training_samples].
72
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
73
+ def decision_function(x)
74
+ check_sample_array(x)
75
+ xx = precomputed_kernel? ? add_index_col(x) : x
76
+ Numo::Libsvm.decision_function(xx, libsvm_params, @model)
77
+ end
78
+
79
+ # Predict class labels for samples.
80
+ #
81
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
82
+ # If the kernel is 'precomputed', the shape of x must be [n_samples, n_training_samples].
83
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
84
+ def predict(x)
85
+ check_sample_array(x)
86
+ xx = precomputed_kernel? ? add_index_col(x) : x
87
+ Numo::Int32.cast(Numo::Libsvm.predict(xx, libsvm_params, @model))
88
+ end
89
+
90
+ # Predict class probability for samples.
91
+ # This method works correctly only if the probability parameter is true.
92
+ #
93
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
94
+ # If the kernel is 'precomputed', the shape of x must be [n_samples, n_training_samples].
95
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
96
+ def predict_proba(x)
97
+ check_sample_array(x)
98
+ xx = precomputed_kernel? ? add_index_col(x) : x
99
+ Numo::Libsvm.predict_proba(xx, libsvm_params, @model)
100
+ end
101
+
102
+ # Dump marshal data.
103
+ # @return [Hash] The marshal data about NuSVC.
104
+ def marshal_dump
105
+ { params: @params,
106
+ model: @model }
107
+ end
108
+
109
+ # Load marshal data.
110
+ # @return [nil]
111
+ def marshal_load(obj)
112
+ @params = obj[:params]
113
+ @model = obj[:model]
114
+ nil
115
+ end
116
+
117
+ # Return the indices of support vectors.
118
+ # @return [Numo::Int32] (shape: [n_support_vectors])
119
+ def support
120
+ @model[:sv_indices]
121
+ end
122
+
123
+ # Return the support_vectors.
124
+ # @return [Numo::DFloat] (shape: [n_support_vectors, n_features])
125
+ def support_vectors
126
+ precomputed_kernel? ? del_index_col(@model[:SV]) : @model[:SV]
127
+ end
128
+
129
+ # Return the number of support vectors for each class.
130
+ # @return [Numo::Int32] (shape: [n_classes])
131
+ def n_support
132
+ @model[:nSV]
133
+ end
134
+
135
+ # Return the coefficients of the support vector in decision function.
136
+ # @return [Numo::DFloat] (shape: [n_classes - 1, n_support_vectors])
137
+ def duel_coef
138
+ @model[:sv_coef]
139
+ end
140
+
141
+ # Return the intercepts in decision function.
142
+ # @return [Numo::DFloat] (shape: [n_classes * (n_classes - 1) / 2])
143
+ def intercept
144
+ @model[:rho]
145
+ end
146
+
147
+ # Return the probability parameter alpha.
148
+ # @return [Numo::DFloat] (shape: [n_classes * (n_classes - 1) / 2])
149
+ def prob_a
150
+ @model[:probA]
151
+ end
152
+
153
+ # Return the probability parameter beta.
154
+ # @return [Numo::DFloat] (shape: [n_classes * (n_classes - 1) / 2])
155
+ def prob_b
156
+ @model[:probB]
157
+ end
158
+
159
+ private
160
+
161
+ def add_index_col(x)
162
+ idx = Numo::Int32.new(x.shape[0]).seq + 1
163
+ Numo::NArray.hstack([idx.expand_dims(1), x])
164
+ end
165
+
166
+ def del_index_col(x)
167
+ x[true, 1..-1].dup
168
+ end
169
+
170
+ def precomputed_kernel?
171
+ @params[:kernel] == 'precomputed'
172
+ end
173
+
174
+ def libsvm_params
175
+ res = @params.merge(svm_type: Numo::Libsvm::SvmType::C_SVC)
176
+ res[:kernel_type] = case res.delete(:kernel)
177
+ when 'linear'
178
+ Numo::Libsvm::KernelType::LINEAR
179
+ when 'poly'
180
+ Numo::Libsvm::KernelType::POLY
181
+ when 'sigmoid'
182
+ Numo::Libsvm::KernelType::SIGMOID
183
+ when 'precomputed'
184
+ Numo::Libsvm::KernelType::PRECOMPUTED
185
+ else
186
+ Numo::Libsvm::KernelType::RBF
187
+ end
188
+ res[:eps] = res.delete(:tol)
189
+ res
190
+ end
191
+ end
192
+ end
193
+ end