rumale-svm 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,150 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'numo/libsvm'
4
+ require 'rumale/base/base_estimator'
5
+ require 'rumale/base/regressor'
6
+
7
+ module Rumale
8
+ module SVM
9
+ # LinearSVR is a class that provides Support Vector Regressor in LIBLINEAR with Rumale interface.
10
+ #
11
+ # @example
12
+ # estimator = Rumale::SVM::LinearSVR.new(reg_param: 1.0, random_seed: 1)
13
+ # estimator.fit(training_samples, traininig_target_values)
14
+ # results = estimator.predict(testing_samples)
15
+ class LinearSVR
16
+ include Base::BaseEstimator
17
+ include Base::Regressor
18
+
19
+ # Return the weight vector for LinearSVR.
20
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
21
+ attr_reader :weight_vec
22
+
23
+ # Return the bias term (a.k.a. intercept) for LinearSVR.
24
+ # @return [Numo::DFloat] (shape: [n_classes])
25
+ attr_reader :bias_term
26
+
27
+ # Create a new regressor with Support Vector Regressor.
28
+ #
29
+ # @param loss [String] The type of loss function ('squared_epsilon_insensitive' or 'epsilon_insensitive').
30
+ # @param dual [Boolean] The flag indicating whether to solve dual optimization problem.
31
+ # When n_samples > n_features, dual = false is more preferable.
32
+ # This parameter is ignored if loss = 'epsilon_insensitive'.
33
+ # @param reg_param [Float] The regularization parameter.
34
+ # @param epsilon [Float] The epsilon parameter in loss function of espsilon-svr.
35
+ # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
36
+ # @param bias_scale [Float] The scale of the bias term.
37
+ # This parameter is ignored if fit_bias = false.
38
+ # @param tol [Float] The tolerance of termination criterion.
39
+ # @param verbose [Boolean] The flag indicating whether to output learning process message
40
+ # @param random_seed [Integer/Nil] The seed value using to initialize the random generator.
41
+ def initialize(loss: 'squared_epsilon_insensitive', dual: true, reg_param: 1.0, epsilon: 0.1,
42
+ fit_bias: true, bias_scale: 1.0, tol: 1e-3, verbose: false, random_seed: nil)
43
+ check_params_string(loss: loss)
44
+ check_params_float(reg_param: reg_param, epsilon: epsilon, bias_scale: bias_scale, tol: tol)
45
+ check_params_boolean(dual: dual, fit_bias: fit_bias, verbose: verbose)
46
+ check_params_type_or_nil(Integer, random_seed: random_seed)
47
+ @params = {}
48
+ @params[:loss] = loss == 'epsilon_insensitive' ? 'epsilon_insensitive' : 'squared_epsilon_insensitive'
49
+ @params[:dual] = dual
50
+ @params[:reg_param] = reg_param
51
+ @params[:epsilon] = epsilon
52
+ @params[:fit_bias] = fit_bias
53
+ @params[:bias_scale] = bias_scale
54
+ @params[:tol] = tol
55
+ @params[:verbose] = verbose
56
+ @params[:random_seed] = random_seed
57
+ @model = nil
58
+ @weight_vec = nil
59
+ @bias_term = nil
60
+ end
61
+
62
+ # Fit the model with given training data.
63
+ #
64
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
65
+ # @param y [Numo::DFloat] (shape: [n_samples]) The target values to be used for fitting the model.
66
+ # @return [LinearSVR] The learned regressor itself.
67
+ def fit(x, y)
68
+ check_sample_array(x)
69
+ check_tvalue_array(y)
70
+ check_sample_tvalue_size(x, y)
71
+ xx = fit_bias? ? expand_feature(x) : x
72
+ @model = Numo::Liblinear.train(xx, y, liblinear_params)
73
+ @weight_vec, @bias_term = weight_and_bias(@model[:w])
74
+ self
75
+ end
76
+
77
+ # Predict values for samples.
78
+ #
79
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
80
+ # @return [Numo::DFloat] (shape: [n_samples]) Predicted value per sample.
81
+ def predict(x)
82
+ check_sample_array(x)
83
+ xx = fit_bias? ? expand_feature(x) : x
84
+ Numo::Liblinear.predict(xx, liblinear_params, @model)
85
+ end
86
+
87
+ # Dump marshal data.
88
+ # @return [Hash] The marshal data about LinearSVR.
89
+ def marshal_dump
90
+ { params: @params,
91
+ model: @model,
92
+ weight_vec: @weight_vec,
93
+ bias_term: @bias_term }
94
+ end
95
+
96
+ # Load marshal data.
97
+ # @return [nil]
98
+ def marshal_load(obj)
99
+ @params = obj[:params]
100
+ @model = obj[:model]
101
+ @weight_vec = obj[:weight_vec]
102
+ @bias_term = obj[:bias_term]
103
+ nil
104
+ end
105
+
106
+ private
107
+
108
+ def expand_feature(x)
109
+ n_samples = x.shape[0]
110
+ Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * bias_scale])
111
+ end
112
+
113
+ def weight_and_bias(base_weight)
114
+ bias_vec = 0.0
115
+ weight_mat = base_weight.dup
116
+ if fit_bias?
117
+ bias_vec = weight_mat[-1]
118
+ weight_mat = weight_mat[0...-1].dup
119
+ end
120
+ [weight_mat, bias_vec]
121
+ end
122
+
123
+ def liblinear_params
124
+ res = {}
125
+ res[:solver_type] = solver_type
126
+ res[:eps] = @params[:tol]
127
+ res[:C] = @params[:reg_param]
128
+ res[:p] = @params[:epsilon]
129
+ res[:verbose] = @params[:verbose]
130
+ res[:random_seed] = @params[:random_seed]
131
+ res
132
+ end
133
+
134
+ def solver_type
135
+ return Numo::Liblinear::SolverType::L2R_L1LOSS_SVR_DUAL if @params[:loss] == 'epsilon_insensitive'
136
+ return Numo::Liblinear::SolverType::L2R_L2LOSS_SVR_DUAL if @params[:dual]
137
+
138
+ Numo::Liblinear::SolverType::L2R_L2LOSS_SVR
139
+ end
140
+
141
+ def fit_bias?
142
+ @params[:fit_bias]
143
+ end
144
+
145
+ def bias_scale
146
+ @params[:bias_scale]
147
+ end
148
+ end
149
+ end
150
+ end
@@ -0,0 +1,190 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'numo/liblinear'
4
+ require 'rumale/base/base_estimator'
5
+ require 'rumale/base/classifier'
6
+
7
+ module Rumale
8
+ module SVM
9
+ # LogisticRegression is a class that provides Logistic Regression in LIBLINEAR with Rumale interface
10
+ #
11
+ # @example
12
+ # estimator = Rumale::SVM::LogisticRegression.new(penalty: 'l2', dual: false, reg_param: 1.0, random_seed: 1)
13
+ # estimator.fit(training_samples, traininig_labels)
14
+ # results = estimator.predict(testing_samples)
15
+ class LogisticRegression
16
+ include Base::BaseEstimator
17
+ include Base::Classifier
18
+
19
+ # Return the weight vector for LogisticRegression.
20
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
21
+ attr_reader :weight_vec
22
+
23
+ # Return the bias term (a.k.a. intercept) for LogisticRegression.
24
+ # @return [Numo::DFloat] (shape: [n_classes])
25
+ attr_reader :bias_term
26
+
27
+ # Create a new classifier with Logistic Regression.
28
+ #
29
+ # @param penalty [String] The type of norm used in the penalization ('l2' or 'l1').
30
+ # @param dual [Boolean] The flag indicating whether to solve dual optimization problem.
31
+ # When n_samples > n_features, dual = false is more preferable.
32
+ # This parameter is ignored if penalty = 'l1'.
33
+ # @param reg_param [Float] The regularization parameter.
34
+ # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
35
+ # @param bias_scale [Float] The scale of the bias term.
36
+ # This parameter is ignored if fit_bias = false.
37
+ # @param tol [Float] The tolerance of termination criterion.
38
+ # @param verbose [Boolean] The flag indicating whether to output learning process message
39
+ # @param random_seed [Integer/Nil] The seed value using to initialize the random generator.
40
+ def initialize(penalty: 'l2', dual: true, reg_param: 1.0,
41
+ fit_bias: true, bias_scale: 1.0,
42
+ tol: 1e-3, verbose: false, random_seed: nil)
43
+ check_params_string(penalty: penalty)
44
+ check_params_float(reg_param: reg_param, bias_scale: bias_scale, tol: tol)
45
+ check_params_boolean(dual: dual, fit_bias: fit_bias, verbose: verbose)
46
+ check_params_type_or_nil(Integer, random_seed: random_seed)
47
+ @params = {}
48
+ @params[:penalty] = penalty == 'l1' ? 'l1' : 'l2'
49
+ @params[:dual] = dual
50
+ @params[:reg_param] = reg_param
51
+ @params[:fit_bias] = fit_bias
52
+ @params[:bias_scale] = bias_scale
53
+ @params[:tol] = tol
54
+ @params[:verbose] = verbose
55
+ @params[:random_seed] = random_seed
56
+ @model = nil
57
+ @weight_vec = nil
58
+ @bias_term = nil
59
+ end
60
+
61
+ # Fit the model with given training data.
62
+ #
63
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
64
+ # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
65
+ # @return [LogisticRegression] The learned classifier itself.
66
+ def fit(x, y)
67
+ check_sample_array(x)
68
+ check_label_array(y)
69
+ check_sample_label_size(x, y)
70
+ xx = fit_bias? ? expand_feature(x) : x
71
+ @model = Numo::Liblinear.train(xx, y, liblinear_params)
72
+ @weight_vec, @bias_term = weight_and_bias(@model[:w])
73
+ self
74
+ end
75
+
76
+ # Calculate confidence scores for samples.
77
+ #
78
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
79
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
80
+ def decision_function(x)
81
+ check_sample_array(x)
82
+ xx = fit_bias? ? expand_feature(x) : x
83
+ Numo::Liblinear.decision_function(xx, liblinear_params, @model)
84
+ end
85
+
86
+ # Predict class labels for samples.
87
+ #
88
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
89
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
90
+ def predict(x)
91
+ check_sample_array(x)
92
+ xx = fit_bias? ? expand_feature(x) : x
93
+ Numo::Int32.cast(Numo::Liblinear.predict(xx, liblinear_params, @model))
94
+ end
95
+
96
+ # Predict class probability for samples.
97
+ # This method works correctly only if the probability parameter is true.
98
+ #
99
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
100
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
101
+ def predict_proba(x)
102
+ check_sample_array(x)
103
+ xx = fit_bias? ? expand_feature(x) : x
104
+ Numo::Liblinear.predict_proba(xx, liblinear_params, @model)
105
+ end
106
+
107
+ # Dump marshal data.
108
+ # @return [Hash] The marshal data about LogisticRegression.
109
+ def marshal_dump
110
+ { params: @params,
111
+ model: @model,
112
+ weight_vec: @weight_vec,
113
+ bias_term: @bias_term }
114
+ end
115
+
116
+ # Load marshal data.
117
+ # @return [nil]
118
+ def marshal_load(obj)
119
+ @params = obj[:params]
120
+ @model = obj[:model]
121
+ @weight_vec = obj[:weight_vec]
122
+ @bias_term = obj[:bias_term]
123
+ nil
124
+ end
125
+
126
+ private
127
+
128
+ def expand_feature(x)
129
+ n_samples = x.shape[0]
130
+ Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * bias_scale])
131
+ end
132
+
133
+ def weight_and_bias(base_weight)
134
+ if binary_class?
135
+ bias_vec = 0.0
136
+ weight_mat = base_weight.dup
137
+ if fit_bias?
138
+ bias_vec = weight_mat[-1]
139
+ weight_mat = weight_mat[0...-1].dup
140
+ end
141
+ else
142
+ bias_vec = Numo::DFloat.zeros(n_classes)
143
+ weight_mat = base_weight.reshape(n_features, n_classes).transpose.dup
144
+ if fit_bias?
145
+ bias_vec = weight_mat[true, -1].dup
146
+ weight_mat = weight_mat[true, 0...-1].dup
147
+ end
148
+ end
149
+ [weight_mat, bias_vec]
150
+ end
151
+
152
+ def liblinear_params
153
+ res = {}
154
+ res[:solver_type] = solver_type
155
+ res[:eps] = @params[:tol]
156
+ res[:C] = @params[:reg_param]
157
+ res[:verbose] = @params[:verbose]
158
+ res[:random_seed] = @params[:random_seed]
159
+ res
160
+ end
161
+
162
+ def solver_type
163
+ return Numo::Liblinear::SolverType::L1R_LR if @params[:penalty] == 'l1'
164
+ return Numo::Liblinear::SolverType::L2R_LR_DUAL if @params[:dual]
165
+
166
+ Numo::Liblinear::SolverType::L2R_LR
167
+ end
168
+
169
+ def binary_class?
170
+ @model[:nr_class] == 2
171
+ end
172
+
173
+ def fit_bias?
174
+ @params[:fit_bias]
175
+ end
176
+
177
+ def bias_scale
178
+ @params[:bias_scale]
179
+ end
180
+
181
+ def n_classes
182
+ @model[:nr_class]
183
+ end
184
+
185
+ def n_features
186
+ @model[:nr_feature]
187
+ end
188
+ end
189
+ end
190
+ end
@@ -0,0 +1,193 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'numo/libsvm'
4
+ require 'rumale/base/base_estimator'
5
+ require 'rumale/base/classifier'
6
+
7
+ module Rumale
8
+ module SVM
9
+ # NuSVC is a class that provides Kernel Nu-Support Vector Classifier in LIBSVM with Rumale interface.
10
+ #
11
+ # @example
12
+ # estimator = Rumale::SVM::NuSVC.new(nu: 0.5, kernel: 'rbf', gamma: 10.0, random_seed: 1)
13
+ # estimator.fit(training_samples, traininig_labels)
14
+ # results = estimator.predict(testing_samples)
15
+ class NuSVC
16
+ include Base::BaseEstimator
17
+ include Base::Classifier
18
+
19
+ # Create a new classifier with Kernel Nu-Support Vector Classifier.
20
+ #
21
+ # @param nu [Float] The regularization parameter. The interval of nu is (0, 1].
22
+ # @param kernel [String] The type of kernel function ('rbf', 'linear', 'poly', 'sigmoid', and 'precomputed').
23
+ # @param degree [Integer] The degree parameter in polynomial kernel function.
24
+ # @param gamma [Float] The gamma parameter in rbf/poly/sigmoid kernel function.
25
+ # @param coef0 [Float] The coefficient in poly/sigmoid kernel function.
26
+ # @param shrinking [Boolean] The flag indicating whether to use the shrinking heuristics.
27
+ # @param probability [Boolean] The flag indicating whether to train the parameter for probability estimation.
28
+ # @param cache_size [Float] The cache memory size in MB.
29
+ # @param tol [Float] The tolerance of termination criterion.
30
+ # @param verbose [Boolean] The flag indicating whether to output learning process message
31
+ # @param random_seed [Integer/Nil] The seed value using to initialize the random generator.
32
+ def initialize(nu: 0.5, kernel: 'rbf', degree: 3, gamma: 1.0, coef0: 0.0,
33
+ shrinking: true, probability: true, cache_size: 200.0, tol: 1e-3, verbose: false, random_seed: nil)
34
+ check_params_float(nu: nu, gamma: gamma, coef0: coef0, cache_size: cache_size, tol: tol)
35
+ check_params_integer(degree: degree)
36
+ check_params_boolean(shrinking: shrinking, probability: probability, verbose: verbose)
37
+ check_params_type_or_nil(Integer, random_seed: random_seed)
38
+ @params = {}
39
+ @params[:nu] = nu
40
+ @params[:kernel] = kernel
41
+ @params[:degree] = degree
42
+ @params[:gamma] = gamma
43
+ @params[:coef0] = coef0
44
+ @params[:shrinking] = shrinking
45
+ @params[:probability] = probability
46
+ @params[:cache_size] = cache_size
47
+ @params[:tol] = tol
48
+ @params[:verbose] = verbose
49
+ @params[:random_seed] = random_seed
50
+ @model = nil
51
+ end
52
+
53
+ # Fit the model with given training data.
54
+ #
55
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
56
+ # If the kernel is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
57
+ # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
58
+ # @return [NuSVC] The learned classifier itself.
59
+ def fit(x, y)
60
+ check_sample_array(x)
61
+ check_label_array(y)
62
+ check_sample_label_size(x, y)
63
+ xx = precomputed_kernel? ? add_index_col(x) : x
64
+ @model = Numo::Libsvm.train(xx, y, libsvm_params)
65
+ self
66
+ end
67
+
68
+ # Calculate confidence scores for samples.
69
+ #
70
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
71
+ # If the kernel is 'precomputed', the shape of x must be [n_samples, n_training_samples].
72
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
73
+ def decision_function(x)
74
+ check_sample_array(x)
75
+ xx = precomputed_kernel? ? add_index_col(x) : x
76
+ Numo::Libsvm.decision_function(xx, libsvm_params, @model)
77
+ end
78
+
79
+ # Predict class labels for samples.
80
+ #
81
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
82
+ # If the kernel is 'precomputed', the shape of x must be [n_samples, n_training_samples].
83
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
84
+ def predict(x)
85
+ check_sample_array(x)
86
+ xx = precomputed_kernel? ? add_index_col(x) : x
87
+ Numo::Int32.cast(Numo::Libsvm.predict(xx, libsvm_params, @model))
88
+ end
89
+
90
+ # Predict class probability for samples.
91
+ # This method works correctly only if the probability parameter is true.
92
+ #
93
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
94
+ # If the kernel is 'precomputed', the shape of x must be [n_samples, n_training_samples].
95
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
96
+ def predict_proba(x)
97
+ check_sample_array(x)
98
+ xx = precomputed_kernel? ? add_index_col(x) : x
99
+ Numo::Libsvm.predict_proba(xx, libsvm_params, @model)
100
+ end
101
+
102
+ # Dump marshal data.
103
+ # @return [Hash] The marshal data about NuSVC.
104
+ def marshal_dump
105
+ { params: @params,
106
+ model: @model }
107
+ end
108
+
109
+ # Load marshal data.
110
+ # @return [nil]
111
+ def marshal_load(obj)
112
+ @params = obj[:params]
113
+ @model = obj[:model]
114
+ nil
115
+ end
116
+
117
+ # Return the indices of support vectors.
118
+ # @return [Numo::Int32] (shape: [n_support_vectors])
119
+ def support
120
+ @model[:sv_indices]
121
+ end
122
+
123
+ # Return the support_vectors.
124
+ # @return [Numo::DFloat] (shape: [n_support_vectors, n_features])
125
+ def support_vectors
126
+ precomputed_kernel? ? del_index_col(@model[:SV]) : @model[:SV]
127
+ end
128
+
129
+ # Return the number of support vectors for each class.
130
+ # @return [Numo::Int32] (shape: [n_classes])
131
+ def n_support
132
+ @model[:nSV]
133
+ end
134
+
135
+ # Return the coefficients of the support vector in decision function.
136
+ # @return [Numo::DFloat] (shape: [n_classes - 1, n_support_vectors])
137
+ def duel_coef
138
+ @model[:sv_coef]
139
+ end
140
+
141
+ # Return the intercepts in decision function.
142
+ # @return [Numo::DFloat] (shape: [n_classes * (n_classes - 1) / 2])
143
+ def intercept
144
+ @model[:rho]
145
+ end
146
+
147
+ # Return the probability parameter alpha.
148
+ # @return [Numo::DFloat] (shape: [n_classes * (n_classes - 1) / 2])
149
+ def prob_a
150
+ @model[:probA]
151
+ end
152
+
153
+ # Return the probability parameter beta.
154
+ # @return [Numo::DFloat] (shape: [n_classes * (n_classes - 1) / 2])
155
+ def prob_b
156
+ @model[:probB]
157
+ end
158
+
159
+ private
160
+
161
+ def add_index_col(x)
162
+ idx = Numo::Int32.new(x.shape[0]).seq + 1
163
+ Numo::NArray.hstack([idx.expand_dims(1), x])
164
+ end
165
+
166
+ def del_index_col(x)
167
+ x[true, 1..-1].dup
168
+ end
169
+
170
+ def precomputed_kernel?
171
+ @params[:kernel] == 'precomputed'
172
+ end
173
+
174
+ def libsvm_params
175
+ res = @params.merge(svm_type: Numo::Libsvm::SvmType::C_SVC)
176
+ res[:kernel_type] = case res.delete(:kernel)
177
+ when 'linear'
178
+ Numo::Libsvm::KernelType::LINEAR
179
+ when 'poly'
180
+ Numo::Libsvm::KernelType::POLY
181
+ when 'sigmoid'
182
+ Numo::Libsvm::KernelType::SIGMOID
183
+ when 'precomputed'
184
+ Numo::Libsvm::KernelType::PRECOMPUTED
185
+ else
186
+ Numo::Libsvm::KernelType::RBF
187
+ end
188
+ res[:eps] = res.delete(:tol)
189
+ res
190
+ end
191
+ end
192
+ end
193
+ end