svmkit 0.2.6 → 0.2.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b36f6b299c47d1107d587aafeb7bb66531f1208c
4
- data.tar.gz: 1bd382f3339b8fb08454493a45a2338020791b6c
3
+ metadata.gz: 46878b59860b61bae7b522fb02af984208609f56
4
+ data.tar.gz: 6e889c6ad8382c654455a242d2f7f27de41de2d5
5
5
  SHA512:
6
- metadata.gz: 0676f2e9b3ef4ac9786f10ca976721e73d2cd918a9c939900281e36267ab14a3413c3a719d9504415f527e5d6163d5640ea2af186e023c3980327cb7c476afba
7
- data.tar.gz: e8dcf72f7d1641903a4625bb23399deabf9a19931a5c00bd2c5077b525a2d0361b194b8c43dee1b7a365b25eafdabb460e0a4ae21f17afd5b401379671379463
6
+ metadata.gz: cddb239bf0768e6d983ce942ed6a7bdda8b827fa2e73e51c1b4591e8af3c641339377417f844358159c3a2bdff51d2f5678ef07fe21fe86e51136289e69ea38c
7
+ data.tar.gz: 64c2029c729de580765ad9ee89fd57821a40773721eac291201cb9b9f4c72697f5945c8f5259ed8d6a879f0b35dac841bcdd6d5322014c9cc78b9a42046dc310
data/HISTORY.md CHANGED
@@ -1,3 +1,6 @@
1
+ # 0.2.7
2
+ - Fixed to support multiclass classifiction into LinearSVC, LogisticRegression, KernelSVC, and FactorizationMachineClassifier
3
+
1
4
  # 0.2.6
2
5
  - Added class for Decision Tree classifier.
3
6
  - Added class for Random Forest classifier.
data/README.md CHANGED
@@ -42,9 +42,7 @@ normalized = normalizer.fit_transform(samples)
42
42
  transformer = SVMKit::KernelApproximation::RBF.new(gamma: 2.0, n_components: 1024, random_seed: 1)
43
43
  transformed = transformer.fit_transform(normalized)
44
44
 
45
- base_classifier =
46
- SVMKit::LinearModel::SVC.new(reg_param: 1.0, max_iter: 1000, batch_size: 20, random_seed: 1)
47
- classifier = SVMKit::Multiclass::OneVsRestClassifier.new(estimator: base_classifier)
45
+ classifier = SVMKit::LinearModel::SVC.new(reg_param: 1.0, max_iter: 1000, batch_size: 20, random_seed: 1)
48
46
  classifier.fit(transformed, labels)
49
47
 
50
48
  File.open('trained_normalizer.dat', 'wb') { |f| f.write(Marshal.dump(normalizer)) }
@@ -76,12 +74,10 @@ require 'svmkit'
76
74
 
77
75
  samples, labels = SVMKit::Dataset.load_libsvm_file('pendigits')
78
76
 
79
- kernel_svc =
80
- SVMKit::KernelMachine::KernelSVC.new(reg_param: 1.0, max_iter: 1000, random_seed: 1)
81
- ovr_kernel_svc = SVMKit::Multiclass::OneVsRestClassifier.new(estimator: kernel_svc)
77
+ kernel_svc = SVMKit::KernelMachine::KernelSVC.new(reg_param: 1.0, max_iter: 1000, random_seed: 1)
82
78
 
83
79
  kf = SVMKit::ModelSelection::StratifiedKFold.new(n_splits: 5, shuffle: true, random_seed: 1)
84
- cv = SVMKit::ModelSelection::CrossValidation.new(estimator: ovr_kernel_svc, splitter: kf)
80
+ cv = SVMKit::ModelSelection::CrossValidation.new(estimator: kernel_svc, splitter: kf)
85
81
 
86
82
  kernel_mat = SVMKit::PairwiseMetric::rbf_kernel(samples, nil, 0.005)
87
83
  report = cv.perform(kernel_mat, labels)
@@ -6,7 +6,9 @@ require 'svmkit/base/classifier'
6
6
  module SVMKit
7
7
  # This module consists of the classes that implement kernel method-based estimator.
8
8
  module KernelMachine
9
- # KernelSVC is a class that implements (Nonlinear) Kernel Support Vector Classifier with the Pegasos algorithm.
9
+ # KernelSVC is a class that implements (Nonlinear) Kernel Support Vector Classifier
10
+ # with stochastic gradient descent (SGD) optimization.
11
+ # For multiclass classification problem, it uses one-vs-the-rest strategy.
10
12
  #
11
13
  # @example
12
14
  # training_kernel_matrix = SVMKit::PairwiseMetric::rbf_kernel(training_samples)
@@ -23,14 +25,18 @@ module SVMKit
23
25
  include Base::Classifier
24
26
 
25
27
  # Return the weight vector for Kernel SVC.
26
- # @return [Numo::DFloat] (shape: [n_trainig_sample])
28
+ # @return [Numo::DFloat] (shape: [n_classes, n_trainig_sample])
27
29
  attr_reader :weight_vec
28
30
 
29
- # Return the random generator for performing random sampling in the Pegasos algorithm.
31
+ # Return the class labels.
32
+ # @return [Numo::Int32] (shape: [n_classes])
33
+ attr_reader :classes
34
+
35
+ # Return the random generator for performing random sampling.
30
36
  # @return [Random]
31
37
  attr_reader :rng
32
38
 
33
- # Create a new classifier with Kernel Support Vector Machine by the Pegasos algorithm.
39
+ # Create a new classifier with Kernel Support Vector Machine by the SGD optimization.
34
40
  #
35
41
  # @param reg_param [Float] The regularization parameter.
36
42
  # @param max_iter [Integer] The maximum number of iterations.
@@ -42,6 +48,7 @@ module SVMKit
42
48
  @params[:random_seed] = random_seed
43
49
  @params[:random_seed] ||= srand
44
50
  @weight_vec = nil
51
+ @classes
45
52
  @rng = Random.new(@params[:random_seed])
46
53
  end
47
54
 
@@ -52,25 +59,22 @@ module SVMKit
52
59
  # @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
53
60
  # @return [KernelSVC] The learned classifier itself.
54
61
  def fit(x, y)
55
- # Generate binary labels
56
- negative_label = y.to_a.uniq.sort.shift
57
- bin_y = y.to_a.map { |l| l != negative_label ? 1 : -1 }
58
- # Initialize some variables.
59
- n_training_samples = x.shape[0]
60
- rand_ids = []
61
- weight_vec = Numo::DFloat.zeros(n_training_samples)
62
- # Start optimization.
63
- @params[:max_iter].times do |t|
64
- # random sampling
65
- rand_ids = [*0...n_training_samples].shuffle(random: @rng) if rand_ids.empty?
66
- target_id = rand_ids.shift
67
- # update the weight vector
68
- func = (weight_vec * bin_y[target_id]).dot(x[target_id, true].transpose).to_f
69
- func *= bin_y[target_id] / (@params[:reg_param] * (t + 1))
70
- weight_vec[target_id] += 1.0 if func < 1.0
62
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
63
+ n_classes = @classes.size
64
+ _n_samples, n_features = x.shape
65
+
66
+ if n_classes > 2
67
+ @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
68
+ n_classes.times do |n|
69
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
70
+ @weight_vec[n, true] = binary_fit(x, bin_y)
71
+ end
72
+ else
73
+ negative_label = y.to_a.uniq.sort.first
74
+ bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
75
+ @weight_vec = binary_fit(x, bin_y)
71
76
  end
72
- # Store the learned model.
73
- @weight_vec = weight_vec * Numo::DFloat[*bin_y]
77
+
74
78
  self
75
79
  end
76
80
 
@@ -78,9 +82,9 @@ module SVMKit
78
82
  #
79
83
  # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
80
84
  # The kernel matrix between testing samples and training samples to compute the scores.
81
- # @return [Numo::DFloat] (shape: [n_testing_samples]) Confidence score per sample.
85
+ # @return [Numo::DFloat] (shape: [n_testing_samples, n_classes]) Confidence score per sample.
82
86
  def decision_function(x)
83
- x.dot(@weight_vec)
87
+ x.dot(@weight_vec.transpose)
84
88
  end
85
89
 
86
90
  # Predict class labels for samples.
@@ -89,7 +93,11 @@ module SVMKit
89
93
  # The kernel matrix between testing samples and training samples to predict the labels.
90
94
  # @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
91
95
  def predict(x)
92
- Numo::Int32.cast(decision_function(x).map { |v| v >= 0 ? 1 : -1 })
96
+ return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
97
+
98
+ n_samples, = x.shape
99
+ decision_values = decision_function(x)
100
+ Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
93
101
  end
94
102
 
95
103
  # Claculate the mean accuracy of the given testing data.
@@ -105,7 +113,10 @@ module SVMKit
105
113
  # Dump marshal data.
106
114
  # @return [Hash] The marshal data about KernelSVC.
107
115
  def marshal_dump
108
- { params: @params, weight_vec: @weight_vec, rng: @rng }
116
+ { params: @params,
117
+ weight_vec: @weight_vec,
118
+ classes: @classes,
119
+ rng: @rng }
109
120
  end
110
121
 
111
122
  # Load marshal data.
@@ -113,9 +124,30 @@ module SVMKit
113
124
  def marshal_load(obj)
114
125
  @params = obj[:params]
115
126
  @weight_vec = obj[:weight_vec]
127
+ @classes = obj[:classes]
116
128
  @rng = obj[:rng]
117
129
  nil
118
130
  end
131
+
132
+ private
133
+
134
+ def binary_fit(x, bin_y)
135
+ # Initialize some variables.
136
+ n_training_samples = x.shape[0]
137
+ rand_ids = []
138
+ weight_vec = Numo::DFloat.zeros(n_training_samples)
139
+ # Start optimization.
140
+ @params[:max_iter].times do |t|
141
+ # random sampling
142
+ rand_ids = [*0...n_training_samples].shuffle(random: @rng) if rand_ids.empty?
143
+ target_id = rand_ids.shift
144
+ # update the weight vector
145
+ func = (weight_vec * bin_y[target_id]).dot(x[target_id, true].transpose).to_f
146
+ func *= bin_y[target_id] / (@params[:reg_param] * (t + 1))
147
+ weight_vec[target_id] += 1.0 if func < 1.0
148
+ end
149
+ weight_vec * Numo::DFloat[*bin_y]
150
+ end
119
151
  end
120
152
  end
121
153
  end
@@ -8,7 +8,7 @@ module SVMKit
8
8
  module LinearModel
9
9
  # LogisticRegression is a class that implements Logistic Regression
10
10
  # with stochastic gradient descent (SGD) optimization.
11
- # Note that the class performs as a binary classifier.
11
+ # For multiclass classification problem, it uses one-vs-the-rest strategy.
12
12
  #
13
13
  # @example
14
14
  # estimator =
@@ -23,14 +23,18 @@ module SVMKit
23
23
  include Base::Classifier
24
24
 
25
25
  # Return the weight vector for Logistic Regression.
26
- # @return [Numo::DFloat] (shape: [n_features])
26
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
27
27
  attr_reader :weight_vec
28
28
 
29
29
  # Return the bias term (a.k.a. intercept) for Logistic Regression.
30
- # @return [Float]
30
+ # @return [Numo::DFloat] (shape: [n_classes])
31
31
  attr_reader :bias_term
32
32
 
33
- # Return the random generator for transformation.
33
+ # Return the class labels.
34
+ # @return [Numo::Int32] (shape: [n_classes])
35
+ attr_reader :classes
36
+
37
+ # Return the random generator for performing random sampling.
34
38
  # @return [Random]
35
39
  attr_reader :rng
36
40
 
@@ -42,79 +46,59 @@ module SVMKit
42
46
  # If fit_bias is true, the feature vector v becoms [v; bias_scale].
43
47
  # @param max_iter [Integer] The maximum number of iterations.
44
48
  # @param batch_size [Integer] The size of the mini batches.
49
+ # @param normalize [Boolean] The flag indicating whether to normalize the weight vector.
45
50
  # @param random_seed [Integer] The seed value using to initialize the random generator.
46
- def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, max_iter: 100, batch_size: 50, random_seed: nil)
51
+ def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
52
+ max_iter: 100, batch_size: 50, normalize: true, random_seed: nil)
47
53
  @params = {}
48
54
  @params[:reg_param] = reg_param
49
55
  @params[:fit_bias] = fit_bias
50
56
  @params[:bias_scale] = bias_scale
51
57
  @params[:max_iter] = max_iter
52
58
  @params[:batch_size] = batch_size
59
+ @params[:normalize] = normalize
53
60
  @params[:random_seed] = random_seed
54
61
  @params[:random_seed] ||= srand
55
62
  @weight_vec = nil
56
- @bias_term = 0.0
63
+ @bias_term = nil
64
+ @classes = nil
57
65
  @rng = Random.new(@params[:random_seed])
58
66
  end
59
67
 
60
68
  # Fit the model with given training data.
61
69
  #
62
70
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
63
- # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
64
- # to be used for fitting the model.
71
+ # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
65
72
  # @return [LogisticRegression] The learned classifier itself.
66
73
  def fit(x, y)
67
- # Generate binary labels.
68
- negative_label = y.to_a.uniq.sort.shift
69
- bin_y = y.to_a.map { |l| l != negative_label ? 1 : 0 }
70
- # Expand feature vectors for bias term.
71
- samples = x
72
- if @params[:fit_bias]
73
- samples = Numo::NArray.hstack(
74
- [samples, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]]
75
- )
76
- end
77
- # Initialize some variables.
78
- n_samples, n_features = samples.shape
79
- rand_ids = [*0...n_samples].shuffle(random: @rng)
80
- weight_vec = Numo::DFloat.zeros(n_features)
81
- # Start optimization.
82
- @params[:max_iter].times do |t|
83
- # random sampling
84
- subset_ids = rand_ids.shift(@params[:batch_size])
85
- rand_ids.concat(subset_ids)
86
- # update the weight vector.
87
- eta = 1.0 / (@params[:reg_param] * (t + 1))
88
- mean_vec = Numo::DFloat.zeros(n_features)
89
- subset_ids.each do |n|
90
- z = weight_vec.dot(samples[n, true])
91
- coef = bin_y[n] / (1.0 + Math.exp(bin_y[n] * z))
92
- mean_vec += samples[n, true] * coef
74
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
75
+ n_classes = @classes.size
76
+ _n_samples, n_features = x.shape
77
+
78
+ if n_classes > 2
79
+ @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
80
+ @bias_term = Numo::DFloat.zeros(n_classes)
81
+ n_classes.times do |n|
82
+ bin_y = Numo::Int32.cast(y.eq(@classes[n]))
83
+ weight, bias = binary_fit(x, bin_y)
84
+ @weight_vec[n, true] = weight
85
+ @bias_term[n] = bias
93
86
  end
94
- mean_vec *= eta / @params[:batch_size]
95
- weight_vec = weight_vec * (1.0 - eta * @params[:reg_param]) + mean_vec
96
- # scale the weight vector.
97
- norm = Math.sqrt(weight_vec.dot(weight_vec))
98
- scaler = (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)
99
- weight_vec *= [1.0, scaler].min
100
- end
101
- # Store the learned model.
102
- if @params[:fit_bias]
103
- @weight_vec = weight_vec[0...n_features - 1]
104
- @bias_term = weight_vec[n_features - 1]
105
87
  else
106
- @weight_vec = weight_vec[0...n_features]
107
- @bias_term = 0.0
88
+ negative_label = y.to_a.uniq.sort.first
89
+ bin_y = Numo::Int32.cast(y.ne(negative_label))
90
+ @weight_vec, @bias_term = binary_fit(x, bin_y)
108
91
  end
92
+
109
93
  self
110
94
  end
111
95
 
112
96
  # Calculate confidence scores for samples.
113
97
  #
114
98
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
115
- # @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
99
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
116
100
  def decision_function(x)
117
- @weight_vec.dot(x.transpose) + @bias_term
101
+ x.dot(@weight_vec.transpose) + @bias_term
118
102
  end
119
103
 
120
104
  # Predict class labels for samples.
@@ -122,7 +106,11 @@ module SVMKit
122
106
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
123
107
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
124
108
  def predict(x)
125
- Numo::Int32.cast(sigmoid(decision_function(x)).map { |v| v >= 0.5 ? 1 : -1 })
109
+ return Numo::Int32.cast(decision_function(x).ge(0.5)) * 2 - 1 if @classes.size <= 2
110
+
111
+ n_samples, = x.shape
112
+ decision_values = decision_function(x)
113
+ Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
126
114
  end
127
115
 
128
116
  # Predict probability for samples.
@@ -130,17 +118,24 @@ module SVMKit
130
118
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
131
119
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
132
120
  def predict_proba(x)
121
+ proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
122
+ return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
123
+
133
124
  n_samples, = x.shape
134
- proba = Numo::DFloat.zeros(n_samples, 2)
135
- proba[true, 1] = sigmoid(decision_function(x))
136
- proba[true, 0] = 1.0 - proba[true, 1]
137
- proba
125
+ probs = Numo::DFloat.zeros(n_samples, 2)
126
+ probs[true, 1] = proba
127
+ probs[true, 0] = 1.0 - proba
128
+ probs
138
129
  end
139
130
 
140
131
  # Dump marshal data.
141
132
  # @return [Hash] The marshal data about LogisticRegression.
142
133
  def marshal_dump
143
- { params: @params, weight_vec: @weight_vec, bias_term: @bias_term, rng: @rng }
134
+ { params: @params,
135
+ weight_vec: @weight_vec,
136
+ bias_term: @bias_term,
137
+ classes: @classes,
138
+ rng: @rng }
144
139
  end
145
140
 
146
141
  # Load marshal data.
@@ -149,14 +144,53 @@ module SVMKit
149
144
  @params = obj[:params]
150
145
  @weight_vec = obj[:weight_vec]
151
146
  @bias_term = obj[:bias_term]
147
+ @classes = obj[:classes]
152
148
  @rng = obj[:rng]
153
149
  nil
154
150
  end
155
151
 
156
152
  private
157
153
 
158
- def sigmoid(x)
159
- 1.0 / (Numo::NMath.exp(-x) + 1.0)
154
+ def binary_fit(x, bin_y)
155
+ # Expand feature vectors for bias term.
156
+ samples = @params[:fit_bias] ? expand_feature(x) : x
157
+ # Initialize some variables.
158
+ n_samples, n_features = samples.shape
159
+ rand_ids = [*0...n_samples].shuffle(random: @rng)
160
+ weight_vec = Numo::DFloat.zeros(n_features)
161
+ # Start optimization.
162
+ @params[:max_iter].times do |t|
163
+ # random sampling
164
+ subset_ids = rand_ids.shift(@params[:batch_size])
165
+ rand_ids.concat(subset_ids)
166
+ # update the weight vector.
167
+ df = samples[subset_ids, true].dot(weight_vec.transpose)
168
+ coef = bin_y[subset_ids] / (Numo::NMath.exp(-bin_y[subset_ids] * df) + 1.0)
169
+ mean_vec = samples[subset_ids, true].transpose.dot(coef) / @params[:batch_size]
170
+ weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec - mean_vec)
171
+ # scale the weight vector.
172
+ normalize_weight_vec(weight_vec) if @params[:normalize]
173
+ end
174
+ split_weight_vec_bias(weight_vec)
175
+ end
176
+
177
+ def expand_feature(x)
178
+ Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
179
+ end
180
+
181
+ def learning_rate(iter)
182
+ 1.0 / (@params[:reg_param] * (iter + 1))
183
+ end
184
+
185
+ def normalize_weight_vec(weight_vec)
186
+ norm = Math.sqrt(weight_vec.dot(weight_vec))
187
+ weight_vec * [1.0, (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)].min
188
+ end
189
+
190
+ def split_weight_vec_bias(weight_vec)
191
+ weights = @params[:fit_bias] ? weight_vec[0...-1] : weight_vec
192
+ bias = @params[:fit_bias] ? weight_vec[-1] : 0.0
193
+ [weights, bias]
160
194
  end
161
195
  end
162
196
  end
@@ -6,7 +6,9 @@ require 'svmkit/base/classifier'
6
6
  module SVMKit
7
7
  # This module consists of the classes that implement generalized linear models.
8
8
  module LinearModel
9
- # SVC is a class that implements Support Vector Classifier with the Pegasos algorithm.
9
+ # SVC is a class that implements Support Vector Classifier
10
+ # with stochastic gradient descent (SGD) optimization.
11
+ # For multiclass classification problem, it uses one-vs-the-rest strategy.
10
12
  #
11
13
  # @example
12
14
  # estimator =
@@ -21,36 +23,44 @@ module SVMKit
21
23
  include Base::Classifier
22
24
 
23
25
  # Return the weight vector for SVC.
24
- # @return [Numo::DFloat] (shape: [n_features])
26
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
25
27
  attr_reader :weight_vec
26
28
 
27
29
  # Return the bias term (a.k.a. intercept) for SVC.
28
- # @return [Float]
30
+ # @return [Numo::DFloat] (shape: [n_classes])
29
31
  attr_reader :bias_term
30
32
 
31
- # Return the random generator for performing random sampling in the Pegasos algorithm.
33
+ # Return the class labels.
34
+ # @return [Numo::Int32] (shape: [n_classes])
35
+ attr_reader :classes
36
+
37
+ # Return the random generator for performing random sampling.
32
38
  # @return [Random]
33
39
  attr_reader :rng
34
40
 
35
- # Create a new classifier with Support Vector Machine by the Pegasos algorithm.
41
+ # Create a new classifier with Support Vector Machine by the SGD optimization.
36
42
  #
37
43
  # @param reg_param [Float] The regularization parameter.
38
44
  # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
39
45
  # @param bias_scale [Float] The scale of the bias term.
40
46
  # @param max_iter [Integer] The maximum number of iterations.
41
47
  # @param batch_size [Integer] The size of the mini batches.
48
+ # @param normalize [Boolean] The flag indicating whether to normalize the weight vector.
42
49
  # @param random_seed [Integer] The seed value using to initialize the random generator.
43
- def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, max_iter: 100, batch_size: 50, random_seed: nil)
50
+ def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
51
+ max_iter: 100, batch_size: 50, normalize: true, random_seed: nil)
44
52
  @params = {}
45
53
  @params[:reg_param] = reg_param
46
54
  @params[:fit_bias] = fit_bias
47
55
  @params[:bias_scale] = bias_scale
48
56
  @params[:max_iter] = max_iter
49
57
  @params[:batch_size] = batch_size
58
+ @params[:normalize] = normalize
50
59
  @params[:random_seed] = random_seed
51
60
  @params[:random_seed] ||= srand
52
61
  @weight_vec = nil
53
- @bias_term = 0.0
62
+ @bias_term = nil
63
+ @classes = nil
54
64
  @rng = Random.new(@params[:random_seed])
55
65
  end
56
66
 
@@ -60,56 +70,34 @@ module SVMKit
60
70
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
61
71
  # @return [SVC] The learned classifier itself.
62
72
  def fit(x, y)
63
- # Generate binary labels
64
- negative_label = y.to_a.uniq.sort.shift
65
- bin_y = y.to_a.map { |l| l != negative_label ? 1 : -1 }
66
- # Expand feature vectors for bias term.
67
- samples = x
68
- if @params[:fit_bias]
69
- samples = Numo::NArray.hstack(
70
- [samples, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]]
71
- )
72
- end
73
- # Initialize some variables.
74
- n_samples, n_features = samples.shape
75
- rand_ids = [*0...n_samples].shuffle(random: @rng)
76
- weight_vec = Numo::DFloat.zeros(n_features)
77
- # Start optimization.
78
- @params[:max_iter].times do |t|
79
- # random sampling
80
- subset_ids = rand_ids.shift(@params[:batch_size])
81
- rand_ids.concat(subset_ids)
82
- target_ids = subset_ids.map { |n| n if weight_vec.dot(samples[n, true]) * bin_y[n] < 1 }.compact
83
- n_subsamples = target_ids.size
84
- next if n_subsamples.zero?
85
- # update the weight vector.
86
- eta = 1.0 / (@params[:reg_param] * (t + 1))
87
- mean_vec = Numo::DFloat.zeros(n_features)
88
- target_ids.each { |n| mean_vec += samples[n, true] * bin_y[n] }
89
- mean_vec *= eta / n_subsamples
90
- weight_vec = weight_vec * (1.0 - eta * @params[:reg_param]) + mean_vec
91
- # scale the weight vector.
92
- norm = Math.sqrt(weight_vec.dot(weight_vec))
93
- scaler = (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)
94
- weight_vec *= [1.0, scaler].min
95
- end
96
- # Store the learned model.
97
- if @params[:fit_bias]
98
- @weight_vec = weight_vec[0...n_features - 1]
99
- @bias_term = weight_vec[n_features - 1]
73
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
74
+ n_classes = @classes.size
75
+ _n_samples, n_features = x.shape
76
+
77
+ if n_classes > 2
78
+ @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
79
+ @bias_term = Numo::DFloat.zeros(n_classes)
80
+ n_classes.times do |n|
81
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
82
+ weight, bias = binary_fit(x, bin_y)
83
+ @weight_vec[n, true] = weight
84
+ @bias_term[n] = bias
85
+ end
100
86
  else
101
- @weight_vec = weight_vec[0...n_features]
102
- @bias_term = 0.0
87
+ negative_label = y.to_a.uniq.sort.first
88
+ bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
89
+ @weight_vec, @bias_term = binary_fit(x, bin_y)
103
90
  end
91
+
104
92
  self
105
93
  end
106
94
 
107
95
  # Calculate confidence scores for samples.
108
96
  #
109
97
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
110
- # @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
98
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
111
99
  def decision_function(x)
112
- @weight_vec.dot(x.transpose) + @bias_term
100
+ x.dot(@weight_vec.transpose) + @bias_term
113
101
  end
114
102
 
115
103
  # Predict class labels for samples.
@@ -117,13 +105,21 @@ module SVMKit
117
105
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
118
106
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
119
107
  def predict(x)
120
- Numo::Int32.cast(decision_function(x).map { |v| v >= 0 ? 1 : -1 })
108
+ return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
109
+
110
+ n_samples, = x.shape
111
+ decision_values = decision_function(x)
112
+ Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
121
113
  end
122
114
 
123
115
  # Dump marshal data.
124
116
  # @return [Hash] The marshal data about SVC.
125
117
  def marshal_dump
126
- { params: @params, weight_vec: @weight_vec, bias_term: @bias_term, rng: @rng }
118
+ { params: @params,
119
+ weight_vec: @weight_vec,
120
+ bias_term: @bias_term,
121
+ classes: @classes,
122
+ rng: @rng }
127
123
  end
128
124
 
129
125
  # Load marshal data.
@@ -132,9 +128,55 @@ module SVMKit
132
128
  @params = obj[:params]
133
129
  @weight_vec = obj[:weight_vec]
134
130
  @bias_term = obj[:bias_term]
131
+ @classes = obj[:classes]
135
132
  @rng = obj[:rng]
136
133
  nil
137
134
  end
135
+
136
+ private
137
+
138
+ def binary_fit(x, bin_y)
139
+ # Expand feature vectors for bias term.
140
+ samples = @params[:fit_bias] ? expand_feature(x) : x
141
+ # Initialize some variables.
142
+ n_samples, n_features = samples.shape
143
+ rand_ids = [*0...n_samples].shuffle(random: @rng)
144
+ weight_vec = Numo::DFloat.zeros(n_features)
145
+ # Start optimization.
146
+ @params[:max_iter].times do |t|
147
+ # random sampling
148
+ subset_ids = rand_ids.shift(@params[:batch_size])
149
+ rand_ids.concat(subset_ids)
150
+ target_ids = subset_ids.map { |n| n if weight_vec.dot(samples[n, true]) * bin_y[n] < 1 }.compact
151
+ n_subsamples = target_ids.size
152
+ next if n_subsamples.zero?
153
+ # update the weight vector.
154
+ mean_vec = samples[target_ids, true].transpose.dot(bin_y[target_ids]) / n_subsamples
155
+ weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec - mean_vec)
156
+ # scale the weight vector.
157
+ normalize_weight_vec(weight_vec) if @params[:normalize]
158
+ end
159
+ split_weight_vec_bias(weight_vec)
160
+ end
161
+
162
+ def expand_feature(x)
163
+ Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
164
+ end
165
+
166
+ def learning_rate(iter)
167
+ 1.0 / (@params[:reg_param] * (iter + 1))
168
+ end
169
+
170
+ def normalize_weight_vec(weight_vec)
171
+ norm = Math.sqrt(weight_vec.dot(weight_vec))
172
+ weight_vec * [1.0, (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)].min
173
+ end
174
+
175
+ def split_weight_vec_bias(weight_vec)
176
+ weights = @params[:fit_bias] ? weight_vec[0...-1] : weight_vec
177
+ bias = @params[:fit_bias] ? weight_vec[-1] : 0.0
178
+ [weights, bias]
179
+ end
138
180
  end
139
181
  end
140
182
  end
@@ -4,13 +4,16 @@ require 'svmkit/base/base_estimator.rb'
4
4
  require 'svmkit/base/classifier.rb'
5
5
 
6
6
  module SVMKit
7
- # This module consists of the classes that implement multi-label classification strategy.
7
+ # This module consists of the classes that implement multi-class classification strategy.
8
8
  module Multiclass
9
- # OneVsRestClassifier is a class that implements One-vs-Rest (OvR) strategy for multi-label classification.
9
+ # @note
10
+ # All classifier in SVMKit support multi-class classifiction since version 0.2.7.
11
+ # There is no need to explicitly use this class for multiclass classifiction.
12
+ #
13
+ # OneVsRestClassifier is a class that implements One-vs-Rest (OvR) strategy for multi-class classification.
10
14
  #
11
15
  # @example
12
- # base_estimator =
13
- # SVMKit::LinearModel::PegasosSVC.new(penalty: 1.0, max_iter: 100, batch_size: 20, random_seed: 1)
16
+ # base_estimator = SVMKit::LinearModel::LogisticRegression.new
14
17
  # estimator = SVMKit::Multiclass::OneVsRestClassifier.new(estimator: base_estimator)
15
18
  # estimator.fit(training_samples, training_labels)
16
19
  # results = estimator.predict(testing_samples)
@@ -26,9 +29,9 @@ module SVMKit
26
29
  # @return [Numo::Int32] (shape: [n_classes])
27
30
  attr_reader :classes
28
31
 
29
- # Create a new multi-label classifier with the one-vs-rest startegy.
32
+ # Create a new multi-class classifier with the one-vs-rest startegy.
30
33
  #
31
- # @param estimator [Classifier] The (binary) classifier for construction a multi-label classifier.
34
+ # @param estimator [Classifier] The (binary) classifier for construction a multi-class classifier.
32
35
  def initialize(estimator: nil)
33
36
  @params = {}
34
37
  @params[:estimator] = estimator
@@ -6,9 +6,9 @@ require 'svmkit/base/classifier'
6
6
  module SVMKit
7
7
  # This module consists of the classes that implement polynomial models.
8
8
  module PolynomialModel
9
- # FactorizationMachineClassifier is a class that
10
- # implements Fatorization Machine for binary classification
11
- # with (mini-batch) stochastic gradient descent optimization.
9
+ # FactorizationMachineClassifier is a class that implements Factorization Machine
10
+ # with stochastic gradient descent (SGD) optimization.
11
+ # For multiclass classification problem, it uses one-vs-the-rest strategy.
12
12
  #
13
13
  # @example
14
14
  # estimator =
@@ -26,22 +26,26 @@ module SVMKit
26
26
  include Base::Classifier
27
27
 
28
28
  # Return the factor matrix for Factorization Machine.
29
- # @return [Numo::DFloat] (shape: [n_factors, n_features])
29
+ # @return [Numo::DFloat] (shape: [n_classes, n_factors, n_features])
30
30
  attr_reader :factor_mat
31
31
 
32
32
  # Return the weight vector for Factorization Machine.
33
- # @return [Numo::DFloat] (shape: [n_features])
33
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
34
34
  attr_reader :weight_vec
35
35
 
36
36
  # Return the bias term for Factoriazation Machine.
37
- # @return [Float]
37
+ # @return [Numo::DFloat] (shape: [n_classes])
38
38
  attr_reader :bias_term
39
39
 
40
- # Return the random generator for transformation.
40
+ # Return the class labels.
41
+ # @return [Numo::Int32] (shape: [n_classes])
42
+ attr_reader :classes
43
+
44
+ # Return the random generator for random sampling.
41
45
  # @return [Random]
42
46
  attr_reader :rng
43
47
 
44
- # Create a new classifier with Support Vector Machine by the Pegasos algorithm.
48
+ # Create a new classifier with Factorization Machine.
45
49
  #
46
50
  # @param n_factors [Integer] The maximum number of iterations.
47
51
  # @param loss [String] The loss function ('hinge' or 'logistic').
@@ -67,7 +71,8 @@ module SVMKit
67
71
  @params[:random_seed] ||= srand
68
72
  @factor_mat = nil
69
73
  @weight_vec = nil
70
- @bias_term = 0.0
74
+ @bias_term = nil
75
+ @classes = nil
71
76
  @rng = Random.new(@params[:random_seed])
72
77
  end
73
78
 
@@ -77,33 +82,27 @@ module SVMKit
77
82
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
78
83
  # @return [FactorizationMachineClassifier] The learned classifier itself.
79
84
  def fit(x, y)
80
- # Generate binary labels.
81
- negative_label = y.to_a.uniq.sort.shift
82
- bin_y = y.map { |l| l != negative_label ? 1.0 : -1.0 }
83
- # Initialize some variables.
84
- n_samples, n_features = x.shape
85
- rand_ids = [*0...n_samples].shuffle(random: @rng)
86
- @factor_mat = rand_normal([@params[:n_factors], n_features], 0, @params[:init_std])
87
- @weight_vec = Numo::DFloat.zeros(n_features)
88
- @bias_term = 0.0
89
- # Start optimization.
90
- @params[:max_iter].times do |t|
91
- # Random sampling.
92
- subset_ids = rand_ids.shift(@params[:batch_size])
93
- rand_ids.concat(subset_ids)
94
- data = x[subset_ids, true]
95
- label = bin_y[subset_ids]
96
- # Calculate gradients for loss function.
97
- loss_grad = loss_gradient(data, label)
98
- next if loss_grad.ne(0.0).count.zero?
99
- # Update each parameter.
100
- @bias_term -= learning_rate(@params[:reg_param_bias], t) * bias_gradient(loss_grad)
101
- @weight_vec -= learning_rate(@params[:reg_param_weight], t) * weight_gradient(loss_grad, data)
102
- @params[:n_factors].times do |n|
103
- @factor_mat[n, true] -= learning_rate(@params[:reg_param_factor], t) *
104
- factor_gradient(loss_grad, data, @factor_mat[n, true])
85
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
86
+ n_classes = @classes.size
87
+ _n_samples, n_features = x.shape
88
+
89
+ if n_classes > 2
90
+ @factor_mat = Numo::DFloat.zeros(n_classes, @params[:n_factors], n_features)
91
+ @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
92
+ @bias_term = Numo::DFloat.zeros(n_classes)
93
+ n_classes.times do |n|
94
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
95
+ factor, weight, bias = binary_fit(x, bin_y)
96
+ @factor_mat[n, true, true] = factor
97
+ @weight_vec[n, true] = weight
98
+ @bias_term[n] = bias
105
99
  end
100
+ else
101
+ negative_label = y.to_a.uniq.sort.first
102
+ bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
103
+ @factor_mat, @weight_vec, @bias_term = binary_fit(x, bin_y)
106
104
  end
105
+
107
106
  self
108
107
  end
109
108
 
@@ -112,8 +111,12 @@ module SVMKit
112
111
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
113
112
  # @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
114
113
  def decision_function(x)
115
- linear_term = @bias_term + x.dot(@weight_vec)
116
- factor_term = 0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum
114
+ linear_term = @bias_term + x.dot(@weight_vec.transpose)
115
+ factor_term = if @classes.size <= 2
116
+ 0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum
117
+ else
118
+ 0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(1).transpose
119
+ end
117
120
  linear_term + factor_term
118
121
  end
119
122
 
@@ -122,26 +125,37 @@ module SVMKit
122
125
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
123
126
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
124
127
  def predict(x)
125
- Numo::Int32.cast(decision_function(x).map { |v| v >= 0.0 ? 1 : -1 })
128
+ return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
129
+
130
+ n_samples, = x.shape
131
+ decision_values = decision_function(x)
132
+ Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
126
133
  end
127
134
 
128
135
  # Predict probability for samples.
129
- # Note that this method works normally only if the 'loss' parameter is set to 'logistic'.
130
136
  #
131
137
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
132
138
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
133
139
  def predict_proba(x)
140
+ proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
141
+ return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
142
+
134
143
  n_samples, = x.shape
135
- proba = Numo::DFloat.zeros(n_samples, 2)
136
- proba[true, 1] = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
137
- proba[true, 0] = 1.0 - proba[true, 1]
138
- proba
144
+ probs = Numo::DFloat.zeros(n_samples, 2)
145
+ probs[true, 1] = proba
146
+ probs[true, 0] = 1.0 - proba
147
+ probs
139
148
  end
140
149
 
141
150
  # Dump marshal data.
142
151
  # @return [Hash] The marshal data about FactorizationMachineClassifier
143
152
  def marshal_dump
144
- { params: @params, factor_mat: @factor_mat, weight_vec: @weight_vec, bias_term: @bias_term, rng: @rng }
153
+ { params: @params,
154
+ factor_mat: @factor_mat,
155
+ weight_vec: @weight_vec,
156
+ bias_term: @bias_term,
157
+ classes: @classes,
158
+ rng: @rng }
145
159
  end
146
160
 
147
161
  # Load marshal data.
@@ -151,39 +165,76 @@ module SVMKit
151
165
  @factor_mat = obj[:factor_mat]
152
166
  @weight_vec = obj[:weight_vec]
153
167
  @bias_term = obj[:bias_term]
168
+ @classes = obj[:classes]
154
169
  @rng = obj[:rng]
155
170
  nil
156
171
  end
157
172
 
158
173
  private
159
174
 
160
- def hinge_loss_gradient(x, y)
161
- evaluated = y * decision_function(x)
175
+ def binary_fit(x, bin_y)
176
+ # Initialize some variables.
177
+ n_samples, n_features = x.shape
178
+ rand_ids = [*0...n_samples].shuffle(random: @rng)
179
+ factor_mat = rand_normal([@params[:n_factors], n_features], 0, @params[:init_std])
180
+ weight_vec = Numo::DFloat.zeros(n_features)
181
+ bias_term = 0.0
182
+ # Start optimization.
183
+ @params[:max_iter].times do |t|
184
+ # Random sampling.
185
+ subset_ids = rand_ids.shift(@params[:batch_size])
186
+ rand_ids.concat(subset_ids)
187
+ data = x[subset_ids, true]
188
+ label = bin_y[subset_ids]
189
+ # Calculate gradients for loss function.
190
+ loss_grad = loss_gradient(data, label, factor_mat, weight_vec, bias_term)
191
+ next if loss_grad.ne(0.0).count.zero?
192
+ # Update each parameter.
193
+ bias_term -= learning_rate(@params[:reg_param_bias], t) * bias_gradient(loss_grad, bias_term)
194
+ weight_vec -= learning_rate(@params[:reg_param_weight], t) * weight_gradient(loss_grad, data, weight_vec)
195
+ @params[:n_factors].times do |n|
196
+ factor_mat[n, true] -= learning_rate(@params[:reg_param_factor], t) *
197
+ factor_gradient(loss_grad, data, factor_mat[n, true])
198
+ end
199
+ end
200
+ [factor_mat, weight_vec, bias_term]
201
+ end
202
+
203
+ def bin_decision_function(x, factor, weight, bias)
204
+ bias + x.dot(weight) + 0.5 * (factor.dot(x.transpose)**2 - (factor**2).dot(x.transpose**2)).sum
205
+ end
206
+
207
+ def hinge_loss_gradient(x, y, factor, weight, bias)
208
+ evaluated = y * bin_decision_function(x, factor, weight, bias)
162
209
  gradient = Numo::DFloat.zeros(evaluated.size)
163
210
  gradient[evaluated < 1.0] = -y[evaluated < 1.0]
164
211
  gradient
165
212
  end
166
213
 
167
- def logistic_loss_gradient(x, y)
168
- evaluated = y * decision_function(x)
214
+ def logistic_loss_gradient(x, y, factor, weight, bias)
215
+ evaluated = y * bin_decision_function(x, factor, weight, bias)
169
216
  sigmoid_func = 1.0 / (Numo::NMath.exp(-evaluated) + 1.0)
170
217
  (sigmoid_func - 1.0) * y
171
218
  end
172
219
 
173
- def loss_gradient(x, y)
174
- @params[:loss] == 'hinge' ? hinge_loss_gradient(x, y) : logistic_loss_gradient(x, y)
220
+ def loss_gradient(x, y, factor, weight, bias)
221
+ if @params[:loss] == 'hinge'
222
+ hinge_loss_gradient(x, y, factor, weight, bias)
223
+ else
224
+ logistic_loss_gradient(x, y, factor, weight, bias)
225
+ end
175
226
  end
176
227
 
177
228
  def learning_rate(reg_param, iter)
178
229
  1.0 / (reg_param * (iter + 1))
179
230
  end
180
231
 
181
- def bias_gradient(loss_grad)
182
- loss_grad.mean + @params[:reg_param_bias] * @bias_term
232
+ def bias_gradient(loss_grad, bias)
233
+ loss_grad.mean + @params[:reg_param_bias] * bias
183
234
  end
184
235
 
185
- def weight_gradient(loss_grad, data)
186
- (loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param_weight] * @weight_vec
236
+ def weight_gradient(loss_grad, data, weight)
237
+ (loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param_weight] * weight
187
238
  end
188
239
 
189
240
  def factor_gradient(loss_grad, data, factor)
@@ -3,5 +3,5 @@
3
3
  # SVMKit is a machine learning library in Ruby.
4
4
  module SVMKit
5
5
  # @!visibility private
6
- VERSION = '0.2.6'
6
+ VERSION = '0.2.7'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: svmkit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.6
4
+ version: 0.2.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-03-11 00:00:00.000000000 Z
11
+ date: 2018-04-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray