svmkit 0.2.6 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b36f6b299c47d1107d587aafeb7bb66531f1208c
4
- data.tar.gz: 1bd382f3339b8fb08454493a45a2338020791b6c
3
+ metadata.gz: 46878b59860b61bae7b522fb02af984208609f56
4
+ data.tar.gz: 6e889c6ad8382c654455a242d2f7f27de41de2d5
5
5
  SHA512:
6
- metadata.gz: 0676f2e9b3ef4ac9786f10ca976721e73d2cd918a9c939900281e36267ab14a3413c3a719d9504415f527e5d6163d5640ea2af186e023c3980327cb7c476afba
7
- data.tar.gz: e8dcf72f7d1641903a4625bb23399deabf9a19931a5c00bd2c5077b525a2d0361b194b8c43dee1b7a365b25eafdabb460e0a4ae21f17afd5b401379671379463
6
+ metadata.gz: cddb239bf0768e6d983ce942ed6a7bdda8b827fa2e73e51c1b4591e8af3c641339377417f844358159c3a2bdff51d2f5678ef07fe21fe86e51136289e69ea38c
7
+ data.tar.gz: 64c2029c729de580765ad9ee89fd57821a40773721eac291201cb9b9f4c72697f5945c8f5259ed8d6a879f0b35dac841bcdd6d5322014c9cc78b9a42046dc310
data/HISTORY.md CHANGED
@@ -1,3 +1,6 @@
1
+ # 0.2.7
2
+ - Fixed to support multiclass classifiction into LinearSVC, LogisticRegression, KernelSVC, and FactorizationMachineClassifier
3
+
1
4
  # 0.2.6
2
5
  - Added class for Decision Tree classifier.
3
6
  - Added class for Random Forest classifier.
data/README.md CHANGED
@@ -42,9 +42,7 @@ normalized = normalizer.fit_transform(samples)
42
42
  transformer = SVMKit::KernelApproximation::RBF.new(gamma: 2.0, n_components: 1024, random_seed: 1)
43
43
  transformed = transformer.fit_transform(normalized)
44
44
 
45
- base_classifier =
46
- SVMKit::LinearModel::SVC.new(reg_param: 1.0, max_iter: 1000, batch_size: 20, random_seed: 1)
47
- classifier = SVMKit::Multiclass::OneVsRestClassifier.new(estimator: base_classifier)
45
+ classifier = SVMKit::LinearModel::SVC.new(reg_param: 1.0, max_iter: 1000, batch_size: 20, random_seed: 1)
48
46
  classifier.fit(transformed, labels)
49
47
 
50
48
  File.open('trained_normalizer.dat', 'wb') { |f| f.write(Marshal.dump(normalizer)) }
@@ -76,12 +74,10 @@ require 'svmkit'
76
74
 
77
75
  samples, labels = SVMKit::Dataset.load_libsvm_file('pendigits')
78
76
 
79
- kernel_svc =
80
- SVMKit::KernelMachine::KernelSVC.new(reg_param: 1.0, max_iter: 1000, random_seed: 1)
81
- ovr_kernel_svc = SVMKit::Multiclass::OneVsRestClassifier.new(estimator: kernel_svc)
77
+ kernel_svc = SVMKit::KernelMachine::KernelSVC.new(reg_param: 1.0, max_iter: 1000, random_seed: 1)
82
78
 
83
79
  kf = SVMKit::ModelSelection::StratifiedKFold.new(n_splits: 5, shuffle: true, random_seed: 1)
84
- cv = SVMKit::ModelSelection::CrossValidation.new(estimator: ovr_kernel_svc, splitter: kf)
80
+ cv = SVMKit::ModelSelection::CrossValidation.new(estimator: kernel_svc, splitter: kf)
85
81
 
86
82
  kernel_mat = SVMKit::PairwiseMetric::rbf_kernel(samples, nil, 0.005)
87
83
  report = cv.perform(kernel_mat, labels)
@@ -6,7 +6,9 @@ require 'svmkit/base/classifier'
6
6
  module SVMKit
7
7
  # This module consists of the classes that implement kernel method-based estimator.
8
8
  module KernelMachine
9
- # KernelSVC is a class that implements (Nonlinear) Kernel Support Vector Classifier with the Pegasos algorithm.
9
+ # KernelSVC is a class that implements (Nonlinear) Kernel Support Vector Classifier
10
+ # with stochastic gradient descent (SGD) optimization.
11
+ # For multiclass classification problem, it uses one-vs-the-rest strategy.
10
12
  #
11
13
  # @example
12
14
  # training_kernel_matrix = SVMKit::PairwiseMetric::rbf_kernel(training_samples)
@@ -23,14 +25,18 @@ module SVMKit
23
25
  include Base::Classifier
24
26
 
25
27
  # Return the weight vector for Kernel SVC.
26
- # @return [Numo::DFloat] (shape: [n_trainig_sample])
28
+ # @return [Numo::DFloat] (shape: [n_classes, n_trainig_sample])
27
29
  attr_reader :weight_vec
28
30
 
29
- # Return the random generator for performing random sampling in the Pegasos algorithm.
31
+ # Return the class labels.
32
+ # @return [Numo::Int32] (shape: [n_classes])
33
+ attr_reader :classes
34
+
35
+ # Return the random generator for performing random sampling.
30
36
  # @return [Random]
31
37
  attr_reader :rng
32
38
 
33
- # Create a new classifier with Kernel Support Vector Machine by the Pegasos algorithm.
39
+ # Create a new classifier with Kernel Support Vector Machine by the SGD optimization.
34
40
  #
35
41
  # @param reg_param [Float] The regularization parameter.
36
42
  # @param max_iter [Integer] The maximum number of iterations.
@@ -42,6 +48,7 @@ module SVMKit
42
48
  @params[:random_seed] = random_seed
43
49
  @params[:random_seed] ||= srand
44
50
  @weight_vec = nil
51
+ @classes
45
52
  @rng = Random.new(@params[:random_seed])
46
53
  end
47
54
 
@@ -52,25 +59,22 @@ module SVMKit
52
59
  # @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
53
60
  # @return [KernelSVC] The learned classifier itself.
54
61
  def fit(x, y)
55
- # Generate binary labels
56
- negative_label = y.to_a.uniq.sort.shift
57
- bin_y = y.to_a.map { |l| l != negative_label ? 1 : -1 }
58
- # Initialize some variables.
59
- n_training_samples = x.shape[0]
60
- rand_ids = []
61
- weight_vec = Numo::DFloat.zeros(n_training_samples)
62
- # Start optimization.
63
- @params[:max_iter].times do |t|
64
- # random sampling
65
- rand_ids = [*0...n_training_samples].shuffle(random: @rng) if rand_ids.empty?
66
- target_id = rand_ids.shift
67
- # update the weight vector
68
- func = (weight_vec * bin_y[target_id]).dot(x[target_id, true].transpose).to_f
69
- func *= bin_y[target_id] / (@params[:reg_param] * (t + 1))
70
- weight_vec[target_id] += 1.0 if func < 1.0
62
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
63
+ n_classes = @classes.size
64
+ _n_samples, n_features = x.shape
65
+
66
+ if n_classes > 2
67
+ @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
68
+ n_classes.times do |n|
69
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
70
+ @weight_vec[n, true] = binary_fit(x, bin_y)
71
+ end
72
+ else
73
+ negative_label = y.to_a.uniq.sort.first
74
+ bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
75
+ @weight_vec = binary_fit(x, bin_y)
71
76
  end
72
- # Store the learned model.
73
- @weight_vec = weight_vec * Numo::DFloat[*bin_y]
77
+
74
78
  self
75
79
  end
76
80
 
@@ -78,9 +82,9 @@ module SVMKit
78
82
  #
79
83
  # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
80
84
  # The kernel matrix between testing samples and training samples to compute the scores.
81
- # @return [Numo::DFloat] (shape: [n_testing_samples]) Confidence score per sample.
85
+ # @return [Numo::DFloat] (shape: [n_testing_samples, n_classes]) Confidence score per sample.
82
86
  def decision_function(x)
83
- x.dot(@weight_vec)
87
+ x.dot(@weight_vec.transpose)
84
88
  end
85
89
 
86
90
  # Predict class labels for samples.
@@ -89,7 +93,11 @@ module SVMKit
89
93
  # The kernel matrix between testing samples and training samples to predict the labels.
90
94
  # @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
91
95
  def predict(x)
92
- Numo::Int32.cast(decision_function(x).map { |v| v >= 0 ? 1 : -1 })
96
+ return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
97
+
98
+ n_samples, = x.shape
99
+ decision_values = decision_function(x)
100
+ Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
93
101
  end
94
102
 
95
103
  # Claculate the mean accuracy of the given testing data.
@@ -105,7 +113,10 @@ module SVMKit
105
113
  # Dump marshal data.
106
114
  # @return [Hash] The marshal data about KernelSVC.
107
115
  def marshal_dump
108
- { params: @params, weight_vec: @weight_vec, rng: @rng }
116
+ { params: @params,
117
+ weight_vec: @weight_vec,
118
+ classes: @classes,
119
+ rng: @rng }
109
120
  end
110
121
 
111
122
  # Load marshal data.
@@ -113,9 +124,30 @@ module SVMKit
113
124
  def marshal_load(obj)
114
125
  @params = obj[:params]
115
126
  @weight_vec = obj[:weight_vec]
127
+ @classes = obj[:classes]
116
128
  @rng = obj[:rng]
117
129
  nil
118
130
  end
131
+
132
+ private
133
+
134
+ def binary_fit(x, bin_y)
135
+ # Initialize some variables.
136
+ n_training_samples = x.shape[0]
137
+ rand_ids = []
138
+ weight_vec = Numo::DFloat.zeros(n_training_samples)
139
+ # Start optimization.
140
+ @params[:max_iter].times do |t|
141
+ # random sampling
142
+ rand_ids = [*0...n_training_samples].shuffle(random: @rng) if rand_ids.empty?
143
+ target_id = rand_ids.shift
144
+ # update the weight vector
145
+ func = (weight_vec * bin_y[target_id]).dot(x[target_id, true].transpose).to_f
146
+ func *= bin_y[target_id] / (@params[:reg_param] * (t + 1))
147
+ weight_vec[target_id] += 1.0 if func < 1.0
148
+ end
149
+ weight_vec * Numo::DFloat[*bin_y]
150
+ end
119
151
  end
120
152
  end
121
153
  end
@@ -8,7 +8,7 @@ module SVMKit
8
8
  module LinearModel
9
9
  # LogisticRegression is a class that implements Logistic Regression
10
10
  # with stochastic gradient descent (SGD) optimization.
11
- # Note that the class performs as a binary classifier.
11
+ # For multiclass classification problem, it uses one-vs-the-rest strategy.
12
12
  #
13
13
  # @example
14
14
  # estimator =
@@ -23,14 +23,18 @@ module SVMKit
23
23
  include Base::Classifier
24
24
 
25
25
  # Return the weight vector for Logistic Regression.
26
- # @return [Numo::DFloat] (shape: [n_features])
26
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
27
27
  attr_reader :weight_vec
28
28
 
29
29
  # Return the bias term (a.k.a. intercept) for Logistic Regression.
30
- # @return [Float]
30
+ # @return [Numo::DFloat] (shape: [n_classes])
31
31
  attr_reader :bias_term
32
32
 
33
- # Return the random generator for transformation.
33
+ # Return the class labels.
34
+ # @return [Numo::Int32] (shape: [n_classes])
35
+ attr_reader :classes
36
+
37
+ # Return the random generator for performing random sampling.
34
38
  # @return [Random]
35
39
  attr_reader :rng
36
40
 
@@ -42,79 +46,59 @@ module SVMKit
42
46
  # If fit_bias is true, the feature vector v becoms [v; bias_scale].
43
47
  # @param max_iter [Integer] The maximum number of iterations.
44
48
  # @param batch_size [Integer] The size of the mini batches.
49
+ # @param normalize [Boolean] The flag indicating whether to normalize the weight vector.
45
50
  # @param random_seed [Integer] The seed value using to initialize the random generator.
46
- def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, max_iter: 100, batch_size: 50, random_seed: nil)
51
+ def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
52
+ max_iter: 100, batch_size: 50, normalize: true, random_seed: nil)
47
53
  @params = {}
48
54
  @params[:reg_param] = reg_param
49
55
  @params[:fit_bias] = fit_bias
50
56
  @params[:bias_scale] = bias_scale
51
57
  @params[:max_iter] = max_iter
52
58
  @params[:batch_size] = batch_size
59
+ @params[:normalize] = normalize
53
60
  @params[:random_seed] = random_seed
54
61
  @params[:random_seed] ||= srand
55
62
  @weight_vec = nil
56
- @bias_term = 0.0
63
+ @bias_term = nil
64
+ @classes = nil
57
65
  @rng = Random.new(@params[:random_seed])
58
66
  end
59
67
 
60
68
  # Fit the model with given training data.
61
69
  #
62
70
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
63
- # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
64
- # to be used for fitting the model.
71
+ # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
65
72
  # @return [LogisticRegression] The learned classifier itself.
66
73
  def fit(x, y)
67
- # Generate binary labels.
68
- negative_label = y.to_a.uniq.sort.shift
69
- bin_y = y.to_a.map { |l| l != negative_label ? 1 : 0 }
70
- # Expand feature vectors for bias term.
71
- samples = x
72
- if @params[:fit_bias]
73
- samples = Numo::NArray.hstack(
74
- [samples, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]]
75
- )
76
- end
77
- # Initialize some variables.
78
- n_samples, n_features = samples.shape
79
- rand_ids = [*0...n_samples].shuffle(random: @rng)
80
- weight_vec = Numo::DFloat.zeros(n_features)
81
- # Start optimization.
82
- @params[:max_iter].times do |t|
83
- # random sampling
84
- subset_ids = rand_ids.shift(@params[:batch_size])
85
- rand_ids.concat(subset_ids)
86
- # update the weight vector.
87
- eta = 1.0 / (@params[:reg_param] * (t + 1))
88
- mean_vec = Numo::DFloat.zeros(n_features)
89
- subset_ids.each do |n|
90
- z = weight_vec.dot(samples[n, true])
91
- coef = bin_y[n] / (1.0 + Math.exp(bin_y[n] * z))
92
- mean_vec += samples[n, true] * coef
74
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
75
+ n_classes = @classes.size
76
+ _n_samples, n_features = x.shape
77
+
78
+ if n_classes > 2
79
+ @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
80
+ @bias_term = Numo::DFloat.zeros(n_classes)
81
+ n_classes.times do |n|
82
+ bin_y = Numo::Int32.cast(y.eq(@classes[n]))
83
+ weight, bias = binary_fit(x, bin_y)
84
+ @weight_vec[n, true] = weight
85
+ @bias_term[n] = bias
93
86
  end
94
- mean_vec *= eta / @params[:batch_size]
95
- weight_vec = weight_vec * (1.0 - eta * @params[:reg_param]) + mean_vec
96
- # scale the weight vector.
97
- norm = Math.sqrt(weight_vec.dot(weight_vec))
98
- scaler = (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)
99
- weight_vec *= [1.0, scaler].min
100
- end
101
- # Store the learned model.
102
- if @params[:fit_bias]
103
- @weight_vec = weight_vec[0...n_features - 1]
104
- @bias_term = weight_vec[n_features - 1]
105
87
  else
106
- @weight_vec = weight_vec[0...n_features]
107
- @bias_term = 0.0
88
+ negative_label = y.to_a.uniq.sort.first
89
+ bin_y = Numo::Int32.cast(y.ne(negative_label))
90
+ @weight_vec, @bias_term = binary_fit(x, bin_y)
108
91
  end
92
+
109
93
  self
110
94
  end
111
95
 
112
96
  # Calculate confidence scores for samples.
113
97
  #
114
98
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
115
- # @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
99
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
116
100
  def decision_function(x)
117
- @weight_vec.dot(x.transpose) + @bias_term
101
+ x.dot(@weight_vec.transpose) + @bias_term
118
102
  end
119
103
 
120
104
  # Predict class labels for samples.
@@ -122,7 +106,11 @@ module SVMKit
122
106
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
123
107
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
124
108
  def predict(x)
125
- Numo::Int32.cast(sigmoid(decision_function(x)).map { |v| v >= 0.5 ? 1 : -1 })
109
+ return Numo::Int32.cast(decision_function(x).ge(0.5)) * 2 - 1 if @classes.size <= 2
110
+
111
+ n_samples, = x.shape
112
+ decision_values = decision_function(x)
113
+ Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
126
114
  end
127
115
 
128
116
  # Predict probability for samples.
@@ -130,17 +118,24 @@ module SVMKit
130
118
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
131
119
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
132
120
  def predict_proba(x)
121
+ proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
122
+ return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
123
+
133
124
  n_samples, = x.shape
134
- proba = Numo::DFloat.zeros(n_samples, 2)
135
- proba[true, 1] = sigmoid(decision_function(x))
136
- proba[true, 0] = 1.0 - proba[true, 1]
137
- proba
125
+ probs = Numo::DFloat.zeros(n_samples, 2)
126
+ probs[true, 1] = proba
127
+ probs[true, 0] = 1.0 - proba
128
+ probs
138
129
  end
139
130
 
140
131
  # Dump marshal data.
141
132
  # @return [Hash] The marshal data about LogisticRegression.
142
133
  def marshal_dump
143
- { params: @params, weight_vec: @weight_vec, bias_term: @bias_term, rng: @rng }
134
+ { params: @params,
135
+ weight_vec: @weight_vec,
136
+ bias_term: @bias_term,
137
+ classes: @classes,
138
+ rng: @rng }
144
139
  end
145
140
 
146
141
  # Load marshal data.
@@ -149,14 +144,53 @@ module SVMKit
149
144
  @params = obj[:params]
150
145
  @weight_vec = obj[:weight_vec]
151
146
  @bias_term = obj[:bias_term]
147
+ @classes = obj[:classes]
152
148
  @rng = obj[:rng]
153
149
  nil
154
150
  end
155
151
 
156
152
  private
157
153
 
158
- def sigmoid(x)
159
- 1.0 / (Numo::NMath.exp(-x) + 1.0)
154
+ def binary_fit(x, bin_y)
155
+ # Expand feature vectors for bias term.
156
+ samples = @params[:fit_bias] ? expand_feature(x) : x
157
+ # Initialize some variables.
158
+ n_samples, n_features = samples.shape
159
+ rand_ids = [*0...n_samples].shuffle(random: @rng)
160
+ weight_vec = Numo::DFloat.zeros(n_features)
161
+ # Start optimization.
162
+ @params[:max_iter].times do |t|
163
+ # random sampling
164
+ subset_ids = rand_ids.shift(@params[:batch_size])
165
+ rand_ids.concat(subset_ids)
166
+ # update the weight vector.
167
+ df = samples[subset_ids, true].dot(weight_vec.transpose)
168
+ coef = bin_y[subset_ids] / (Numo::NMath.exp(-bin_y[subset_ids] * df) + 1.0)
169
+ mean_vec = samples[subset_ids, true].transpose.dot(coef) / @params[:batch_size]
170
+ weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec - mean_vec)
171
+ # scale the weight vector.
172
+ normalize_weight_vec(weight_vec) if @params[:normalize]
173
+ end
174
+ split_weight_vec_bias(weight_vec)
175
+ end
176
+
177
+ def expand_feature(x)
178
+ Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
179
+ end
180
+
181
+ def learning_rate(iter)
182
+ 1.0 / (@params[:reg_param] * (iter + 1))
183
+ end
184
+
185
+ def normalize_weight_vec(weight_vec)
186
+ norm = Math.sqrt(weight_vec.dot(weight_vec))
187
+ weight_vec * [1.0, (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)].min
188
+ end
189
+
190
+ def split_weight_vec_bias(weight_vec)
191
+ weights = @params[:fit_bias] ? weight_vec[0...-1] : weight_vec
192
+ bias = @params[:fit_bias] ? weight_vec[-1] : 0.0
193
+ [weights, bias]
160
194
  end
161
195
  end
162
196
  end
@@ -6,7 +6,9 @@ require 'svmkit/base/classifier'
6
6
  module SVMKit
7
7
  # This module consists of the classes that implement generalized linear models.
8
8
  module LinearModel
9
- # SVC is a class that implements Support Vector Classifier with the Pegasos algorithm.
9
+ # SVC is a class that implements Support Vector Classifier
10
+ # with stochastic gradient descent (SGD) optimization.
11
+ # For multiclass classification problem, it uses one-vs-the-rest strategy.
10
12
  #
11
13
  # @example
12
14
  # estimator =
@@ -21,36 +23,44 @@ module SVMKit
21
23
  include Base::Classifier
22
24
 
23
25
  # Return the weight vector for SVC.
24
- # @return [Numo::DFloat] (shape: [n_features])
26
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
25
27
  attr_reader :weight_vec
26
28
 
27
29
  # Return the bias term (a.k.a. intercept) for SVC.
28
- # @return [Float]
30
+ # @return [Numo::DFloat] (shape: [n_classes])
29
31
  attr_reader :bias_term
30
32
 
31
- # Return the random generator for performing random sampling in the Pegasos algorithm.
33
+ # Return the class labels.
34
+ # @return [Numo::Int32] (shape: [n_classes])
35
+ attr_reader :classes
36
+
37
+ # Return the random generator for performing random sampling.
32
38
  # @return [Random]
33
39
  attr_reader :rng
34
40
 
35
- # Create a new classifier with Support Vector Machine by the Pegasos algorithm.
41
+ # Create a new classifier with Support Vector Machine by the SGD optimization.
36
42
  #
37
43
  # @param reg_param [Float] The regularization parameter.
38
44
  # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
39
45
  # @param bias_scale [Float] The scale of the bias term.
40
46
  # @param max_iter [Integer] The maximum number of iterations.
41
47
  # @param batch_size [Integer] The size of the mini batches.
48
+ # @param normalize [Boolean] The flag indicating whether to normalize the weight vector.
42
49
  # @param random_seed [Integer] The seed value using to initialize the random generator.
43
- def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, max_iter: 100, batch_size: 50, random_seed: nil)
50
+ def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
51
+ max_iter: 100, batch_size: 50, normalize: true, random_seed: nil)
44
52
  @params = {}
45
53
  @params[:reg_param] = reg_param
46
54
  @params[:fit_bias] = fit_bias
47
55
  @params[:bias_scale] = bias_scale
48
56
  @params[:max_iter] = max_iter
49
57
  @params[:batch_size] = batch_size
58
+ @params[:normalize] = normalize
50
59
  @params[:random_seed] = random_seed
51
60
  @params[:random_seed] ||= srand
52
61
  @weight_vec = nil
53
- @bias_term = 0.0
62
+ @bias_term = nil
63
+ @classes = nil
54
64
  @rng = Random.new(@params[:random_seed])
55
65
  end
56
66
 
@@ -60,56 +70,34 @@ module SVMKit
60
70
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
61
71
  # @return [SVC] The learned classifier itself.
62
72
  def fit(x, y)
63
- # Generate binary labels
64
- negative_label = y.to_a.uniq.sort.shift
65
- bin_y = y.to_a.map { |l| l != negative_label ? 1 : -1 }
66
- # Expand feature vectors for bias term.
67
- samples = x
68
- if @params[:fit_bias]
69
- samples = Numo::NArray.hstack(
70
- [samples, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]]
71
- )
72
- end
73
- # Initialize some variables.
74
- n_samples, n_features = samples.shape
75
- rand_ids = [*0...n_samples].shuffle(random: @rng)
76
- weight_vec = Numo::DFloat.zeros(n_features)
77
- # Start optimization.
78
- @params[:max_iter].times do |t|
79
- # random sampling
80
- subset_ids = rand_ids.shift(@params[:batch_size])
81
- rand_ids.concat(subset_ids)
82
- target_ids = subset_ids.map { |n| n if weight_vec.dot(samples[n, true]) * bin_y[n] < 1 }.compact
83
- n_subsamples = target_ids.size
84
- next if n_subsamples.zero?
85
- # update the weight vector.
86
- eta = 1.0 / (@params[:reg_param] * (t + 1))
87
- mean_vec = Numo::DFloat.zeros(n_features)
88
- target_ids.each { |n| mean_vec += samples[n, true] * bin_y[n] }
89
- mean_vec *= eta / n_subsamples
90
- weight_vec = weight_vec * (1.0 - eta * @params[:reg_param]) + mean_vec
91
- # scale the weight vector.
92
- norm = Math.sqrt(weight_vec.dot(weight_vec))
93
- scaler = (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)
94
- weight_vec *= [1.0, scaler].min
95
- end
96
- # Store the learned model.
97
- if @params[:fit_bias]
98
- @weight_vec = weight_vec[0...n_features - 1]
99
- @bias_term = weight_vec[n_features - 1]
73
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
74
+ n_classes = @classes.size
75
+ _n_samples, n_features = x.shape
76
+
77
+ if n_classes > 2
78
+ @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
79
+ @bias_term = Numo::DFloat.zeros(n_classes)
80
+ n_classes.times do |n|
81
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
82
+ weight, bias = binary_fit(x, bin_y)
83
+ @weight_vec[n, true] = weight
84
+ @bias_term[n] = bias
85
+ end
100
86
  else
101
- @weight_vec = weight_vec[0...n_features]
102
- @bias_term = 0.0
87
+ negative_label = y.to_a.uniq.sort.first
88
+ bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
89
+ @weight_vec, @bias_term = binary_fit(x, bin_y)
103
90
  end
91
+
104
92
  self
105
93
  end
106
94
 
107
95
  # Calculate confidence scores for samples.
108
96
  #
109
97
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
110
- # @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
98
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
111
99
  def decision_function(x)
112
- @weight_vec.dot(x.transpose) + @bias_term
100
+ x.dot(@weight_vec.transpose) + @bias_term
113
101
  end
114
102
 
115
103
  # Predict class labels for samples.
@@ -117,13 +105,21 @@ module SVMKit
117
105
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
118
106
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
119
107
  def predict(x)
120
- Numo::Int32.cast(decision_function(x).map { |v| v >= 0 ? 1 : -1 })
108
+ return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
109
+
110
+ n_samples, = x.shape
111
+ decision_values = decision_function(x)
112
+ Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
121
113
  end
122
114
 
123
115
  # Dump marshal data.
124
116
  # @return [Hash] The marshal data about SVC.
125
117
  def marshal_dump
126
- { params: @params, weight_vec: @weight_vec, bias_term: @bias_term, rng: @rng }
118
+ { params: @params,
119
+ weight_vec: @weight_vec,
120
+ bias_term: @bias_term,
121
+ classes: @classes,
122
+ rng: @rng }
127
123
  end
128
124
 
129
125
  # Load marshal data.
@@ -132,9 +128,55 @@ module SVMKit
132
128
  @params = obj[:params]
133
129
  @weight_vec = obj[:weight_vec]
134
130
  @bias_term = obj[:bias_term]
131
+ @classes = obj[:classes]
135
132
  @rng = obj[:rng]
136
133
  nil
137
134
  end
135
+
136
+ private
137
+
138
+ def binary_fit(x, bin_y)
139
+ # Expand feature vectors for bias term.
140
+ samples = @params[:fit_bias] ? expand_feature(x) : x
141
+ # Initialize some variables.
142
+ n_samples, n_features = samples.shape
143
+ rand_ids = [*0...n_samples].shuffle(random: @rng)
144
+ weight_vec = Numo::DFloat.zeros(n_features)
145
+ # Start optimization.
146
+ @params[:max_iter].times do |t|
147
+ # random sampling
148
+ subset_ids = rand_ids.shift(@params[:batch_size])
149
+ rand_ids.concat(subset_ids)
150
+ target_ids = subset_ids.map { |n| n if weight_vec.dot(samples[n, true]) * bin_y[n] < 1 }.compact
151
+ n_subsamples = target_ids.size
152
+ next if n_subsamples.zero?
153
+ # update the weight vector.
154
+ mean_vec = samples[target_ids, true].transpose.dot(bin_y[target_ids]) / n_subsamples
155
+ weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec - mean_vec)
156
+ # scale the weight vector.
157
+ normalize_weight_vec(weight_vec) if @params[:normalize]
158
+ end
159
+ split_weight_vec_bias(weight_vec)
160
+ end
161
+
162
+ def expand_feature(x)
163
+ Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
164
+ end
165
+
166
+ def learning_rate(iter)
167
+ 1.0 / (@params[:reg_param] * (iter + 1))
168
+ end
169
+
170
+ def normalize_weight_vec(weight_vec)
171
+ norm = Math.sqrt(weight_vec.dot(weight_vec))
172
+ weight_vec * [1.0, (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)].min
173
+ end
174
+
175
+ def split_weight_vec_bias(weight_vec)
176
+ weights = @params[:fit_bias] ? weight_vec[0...-1] : weight_vec
177
+ bias = @params[:fit_bias] ? weight_vec[-1] : 0.0
178
+ [weights, bias]
179
+ end
138
180
  end
139
181
  end
140
182
  end
@@ -4,13 +4,16 @@ require 'svmkit/base/base_estimator.rb'
4
4
  require 'svmkit/base/classifier.rb'
5
5
 
6
6
  module SVMKit
7
- # This module consists of the classes that implement multi-label classification strategy.
7
+ # This module consists of the classes that implement multi-class classification strategy.
8
8
  module Multiclass
9
- # OneVsRestClassifier is a class that implements One-vs-Rest (OvR) strategy for multi-label classification.
9
+ # @note
10
+ # All classifier in SVMKit support multi-class classifiction since version 0.2.7.
11
+ # There is no need to explicitly use this class for multiclass classifiction.
12
+ #
13
+ # OneVsRestClassifier is a class that implements One-vs-Rest (OvR) strategy for multi-class classification.
10
14
  #
11
15
  # @example
12
- # base_estimator =
13
- # SVMKit::LinearModel::PegasosSVC.new(penalty: 1.0, max_iter: 100, batch_size: 20, random_seed: 1)
16
+ # base_estimator = SVMKit::LinearModel::LogisticRegression.new
14
17
  # estimator = SVMKit::Multiclass::OneVsRestClassifier.new(estimator: base_estimator)
15
18
  # estimator.fit(training_samples, training_labels)
16
19
  # results = estimator.predict(testing_samples)
@@ -26,9 +29,9 @@ module SVMKit
26
29
  # @return [Numo::Int32] (shape: [n_classes])
27
30
  attr_reader :classes
28
31
 
29
- # Create a new multi-label classifier with the one-vs-rest startegy.
32
+ # Create a new multi-class classifier with the one-vs-rest startegy.
30
33
  #
31
- # @param estimator [Classifier] The (binary) classifier for construction a multi-label classifier.
34
+ # @param estimator [Classifier] The (binary) classifier for construction a multi-class classifier.
32
35
  def initialize(estimator: nil)
33
36
  @params = {}
34
37
  @params[:estimator] = estimator
@@ -6,9 +6,9 @@ require 'svmkit/base/classifier'
6
6
  module SVMKit
7
7
  # This module consists of the classes that implement polynomial models.
8
8
  module PolynomialModel
9
- # FactorizationMachineClassifier is a class that
10
- # implements Fatorization Machine for binary classification
11
- # with (mini-batch) stochastic gradient descent optimization.
9
+ # FactorizationMachineClassifier is a class that implements Factorization Machine
10
+ # with stochastic gradient descent (SGD) optimization.
11
+ # For multiclass classification problem, it uses one-vs-the-rest strategy.
12
12
  #
13
13
  # @example
14
14
  # estimator =
@@ -26,22 +26,26 @@ module SVMKit
26
26
  include Base::Classifier
27
27
 
28
28
  # Return the factor matrix for Factorization Machine.
29
- # @return [Numo::DFloat] (shape: [n_factors, n_features])
29
+ # @return [Numo::DFloat] (shape: [n_classes, n_factors, n_features])
30
30
  attr_reader :factor_mat
31
31
 
32
32
  # Return the weight vector for Factorization Machine.
33
- # @return [Numo::DFloat] (shape: [n_features])
33
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
34
34
  attr_reader :weight_vec
35
35
 
36
36
  # Return the bias term for Factoriazation Machine.
37
- # @return [Float]
37
+ # @return [Numo::DFloat] (shape: [n_classes])
38
38
  attr_reader :bias_term
39
39
 
40
- # Return the random generator for transformation.
40
+ # Return the class labels.
41
+ # @return [Numo::Int32] (shape: [n_classes])
42
+ attr_reader :classes
43
+
44
+ # Return the random generator for random sampling.
41
45
  # @return [Random]
42
46
  attr_reader :rng
43
47
 
44
- # Create a new classifier with Support Vector Machine by the Pegasos algorithm.
48
+ # Create a new classifier with Factorization Machine.
45
49
  #
46
50
  # @param n_factors [Integer] The maximum number of iterations.
47
51
  # @param loss [String] The loss function ('hinge' or 'logistic').
@@ -67,7 +71,8 @@ module SVMKit
67
71
  @params[:random_seed] ||= srand
68
72
  @factor_mat = nil
69
73
  @weight_vec = nil
70
- @bias_term = 0.0
74
+ @bias_term = nil
75
+ @classes = nil
71
76
  @rng = Random.new(@params[:random_seed])
72
77
  end
73
78
 
@@ -77,33 +82,27 @@ module SVMKit
77
82
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
78
83
  # @return [FactorizationMachineClassifier] The learned classifier itself.
79
84
  def fit(x, y)
80
- # Generate binary labels.
81
- negative_label = y.to_a.uniq.sort.shift
82
- bin_y = y.map { |l| l != negative_label ? 1.0 : -1.0 }
83
- # Initialize some variables.
84
- n_samples, n_features = x.shape
85
- rand_ids = [*0...n_samples].shuffle(random: @rng)
86
- @factor_mat = rand_normal([@params[:n_factors], n_features], 0, @params[:init_std])
87
- @weight_vec = Numo::DFloat.zeros(n_features)
88
- @bias_term = 0.0
89
- # Start optimization.
90
- @params[:max_iter].times do |t|
91
- # Random sampling.
92
- subset_ids = rand_ids.shift(@params[:batch_size])
93
- rand_ids.concat(subset_ids)
94
- data = x[subset_ids, true]
95
- label = bin_y[subset_ids]
96
- # Calculate gradients for loss function.
97
- loss_grad = loss_gradient(data, label)
98
- next if loss_grad.ne(0.0).count.zero?
99
- # Update each parameter.
100
- @bias_term -= learning_rate(@params[:reg_param_bias], t) * bias_gradient(loss_grad)
101
- @weight_vec -= learning_rate(@params[:reg_param_weight], t) * weight_gradient(loss_grad, data)
102
- @params[:n_factors].times do |n|
103
- @factor_mat[n, true] -= learning_rate(@params[:reg_param_factor], t) *
104
- factor_gradient(loss_grad, data, @factor_mat[n, true])
85
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
86
+ n_classes = @classes.size
87
+ _n_samples, n_features = x.shape
88
+
89
+ if n_classes > 2
90
+ @factor_mat = Numo::DFloat.zeros(n_classes, @params[:n_factors], n_features)
91
+ @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
92
+ @bias_term = Numo::DFloat.zeros(n_classes)
93
+ n_classes.times do |n|
94
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
95
+ factor, weight, bias = binary_fit(x, bin_y)
96
+ @factor_mat[n, true, true] = factor
97
+ @weight_vec[n, true] = weight
98
+ @bias_term[n] = bias
105
99
  end
100
+ else
101
+ negative_label = y.to_a.uniq.sort.first
102
+ bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
103
+ @factor_mat, @weight_vec, @bias_term = binary_fit(x, bin_y)
106
104
  end
105
+
107
106
  self
108
107
  end
109
108
 
@@ -112,8 +111,12 @@ module SVMKit
112
111
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
113
112
  # @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
114
113
  def decision_function(x)
115
- linear_term = @bias_term + x.dot(@weight_vec)
116
- factor_term = 0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum
114
+ linear_term = @bias_term + x.dot(@weight_vec.transpose)
115
+ factor_term = if @classes.size <= 2
116
+ 0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum
117
+ else
118
+ 0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(1).transpose
119
+ end
117
120
  linear_term + factor_term
118
121
  end
119
122
 
@@ -122,26 +125,37 @@ module SVMKit
122
125
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
123
126
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
124
127
  def predict(x)
125
- Numo::Int32.cast(decision_function(x).map { |v| v >= 0.0 ? 1 : -1 })
128
+ return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
129
+
130
+ n_samples, = x.shape
131
+ decision_values = decision_function(x)
132
+ Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
126
133
  end
127
134
 
128
135
  # Predict probability for samples.
129
- # Note that this method works normally only if the 'loss' parameter is set to 'logistic'.
130
136
  #
131
137
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
132
138
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
133
139
  def predict_proba(x)
140
+ proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
141
+ return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
142
+
134
143
  n_samples, = x.shape
135
- proba = Numo::DFloat.zeros(n_samples, 2)
136
- proba[true, 1] = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
137
- proba[true, 0] = 1.0 - proba[true, 1]
138
- proba
144
+ probs = Numo::DFloat.zeros(n_samples, 2)
145
+ probs[true, 1] = proba
146
+ probs[true, 0] = 1.0 - proba
147
+ probs
139
148
  end
140
149
 
141
150
  # Dump marshal data.
142
151
  # @return [Hash] The marshal data about FactorizationMachineClassifier
143
152
  def marshal_dump
144
- { params: @params, factor_mat: @factor_mat, weight_vec: @weight_vec, bias_term: @bias_term, rng: @rng }
153
+ { params: @params,
154
+ factor_mat: @factor_mat,
155
+ weight_vec: @weight_vec,
156
+ bias_term: @bias_term,
157
+ classes: @classes,
158
+ rng: @rng }
145
159
  end
146
160
 
147
161
  # Load marshal data.
@@ -151,39 +165,76 @@ module SVMKit
151
165
  @factor_mat = obj[:factor_mat]
152
166
  @weight_vec = obj[:weight_vec]
153
167
  @bias_term = obj[:bias_term]
168
+ @classes = obj[:classes]
154
169
  @rng = obj[:rng]
155
170
  nil
156
171
  end
157
172
 
158
173
  private
159
174
 
160
- def hinge_loss_gradient(x, y)
161
- evaluated = y * decision_function(x)
175
+ def binary_fit(x, bin_y)
176
+ # Initialize some variables.
177
+ n_samples, n_features = x.shape
178
+ rand_ids = [*0...n_samples].shuffle(random: @rng)
179
+ factor_mat = rand_normal([@params[:n_factors], n_features], 0, @params[:init_std])
180
+ weight_vec = Numo::DFloat.zeros(n_features)
181
+ bias_term = 0.0
182
+ # Start optimization.
183
+ @params[:max_iter].times do |t|
184
+ # Random sampling.
185
+ subset_ids = rand_ids.shift(@params[:batch_size])
186
+ rand_ids.concat(subset_ids)
187
+ data = x[subset_ids, true]
188
+ label = bin_y[subset_ids]
189
+ # Calculate gradients for loss function.
190
+ loss_grad = loss_gradient(data, label, factor_mat, weight_vec, bias_term)
191
+ next if loss_grad.ne(0.0).count.zero?
192
+ # Update each parameter.
193
+ bias_term -= learning_rate(@params[:reg_param_bias], t) * bias_gradient(loss_grad, bias_term)
194
+ weight_vec -= learning_rate(@params[:reg_param_weight], t) * weight_gradient(loss_grad, data, weight_vec)
195
+ @params[:n_factors].times do |n|
196
+ factor_mat[n, true] -= learning_rate(@params[:reg_param_factor], t) *
197
+ factor_gradient(loss_grad, data, factor_mat[n, true])
198
+ end
199
+ end
200
+ [factor_mat, weight_vec, bias_term]
201
+ end
202
+
203
+ def bin_decision_function(x, factor, weight, bias)
204
+ bias + x.dot(weight) + 0.5 * (factor.dot(x.transpose)**2 - (factor**2).dot(x.transpose**2)).sum
205
+ end
206
+
207
+ def hinge_loss_gradient(x, y, factor, weight, bias)
208
+ evaluated = y * bin_decision_function(x, factor, weight, bias)
162
209
  gradient = Numo::DFloat.zeros(evaluated.size)
163
210
  gradient[evaluated < 1.0] = -y[evaluated < 1.0]
164
211
  gradient
165
212
  end
166
213
 
167
- def logistic_loss_gradient(x, y)
168
- evaluated = y * decision_function(x)
214
+ def logistic_loss_gradient(x, y, factor, weight, bias)
215
+ evaluated = y * bin_decision_function(x, factor, weight, bias)
169
216
  sigmoid_func = 1.0 / (Numo::NMath.exp(-evaluated) + 1.0)
170
217
  (sigmoid_func - 1.0) * y
171
218
  end
172
219
 
173
- def loss_gradient(x, y)
174
- @params[:loss] == 'hinge' ? hinge_loss_gradient(x, y) : logistic_loss_gradient(x, y)
220
+ def loss_gradient(x, y, factor, weight, bias)
221
+ if @params[:loss] == 'hinge'
222
+ hinge_loss_gradient(x, y, factor, weight, bias)
223
+ else
224
+ logistic_loss_gradient(x, y, factor, weight, bias)
225
+ end
175
226
  end
176
227
 
177
228
  def learning_rate(reg_param, iter)
178
229
  1.0 / (reg_param * (iter + 1))
179
230
  end
180
231
 
181
- def bias_gradient(loss_grad)
182
- loss_grad.mean + @params[:reg_param_bias] * @bias_term
232
+ def bias_gradient(loss_grad, bias)
233
+ loss_grad.mean + @params[:reg_param_bias] * bias
183
234
  end
184
235
 
185
- def weight_gradient(loss_grad, data)
186
- (loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param_weight] * @weight_vec
236
+ def weight_gradient(loss_grad, data, weight)
237
+ (loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param_weight] * weight
187
238
  end
188
239
 
189
240
  def factor_gradient(loss_grad, data, factor)
@@ -3,5 +3,5 @@
3
3
  # SVMKit is a machine learning library in Ruby.
4
4
  module SVMKit
5
5
  # @!visibility private
6
- VERSION = '0.2.6'
6
+ VERSION = '0.2.7'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: svmkit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.6
4
+ version: 0.2.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-03-11 00:00:00.000000000 Z
11
+ date: 2018-04-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray