svmkit 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 62fdc5c03a044a7625bf2374159cf84ef32a6869
4
- data.tar.gz: 4cd1c86a344cd1410a3a5c0f4bdebd04d80e7e7b
3
+ metadata.gz: 1d3f4f2c398faa6b9e936ec813dac3023d9d1728
4
+ data.tar.gz: 6fe4f9b6ac5a656fb1a3f49662997d0ebbdd8d4b
5
5
  SHA512:
6
- metadata.gz: 1b704e536e183f881e6f16895ccdc1620dc8b694db7b44772db669e579ed07652df16c8de88794c65b5eeca0eeb805c415f1e44c36446cad3bdd230f3354b320
7
- data.tar.gz: e25ca447621cef29ea1807168cbe6e7210308549a298db7c8797d54e127bfb1e7fe7de3c3e9a9d719cee6de1100705e352a3b76a5127282441027fd1b389e2e1
6
+ metadata.gz: fb1ac8798124f25cdd4dd0738dd856c8bae7e87aacac260a8ea8b1fb7388e3a966045f2382e48f241292111312eb7cb6cd69035010ee1487645f725f364ee16b
7
+ data.tar.gz: 99bdb17d5a2d2825e904ce2e788e31d100e1850b1d9bfc32f7e7cc48ba1b13da59b667d1ed117a6768d852d5c15d3a4c3132994bc13350315c8b07016bcbcd41
data/README.md CHANGED
@@ -36,7 +36,7 @@ transformer = SVMKit::KernelApproximation::RBF.new(gamma: 2.0, n_components: 102
36
36
  transformed = transformer.fit_transform(normalized)
37
37
 
38
38
  base_classifier =
39
- SVMKit::LinearModel::PegasosSVC.new(penalty: 1.0, max_iter: 50, batch_size: 20, random_seed: 1)
39
+ SVMKit::LinearModel::PegasosSVC.new(reg_param: 1.0, max_iter: 50, batch_size: 20, random_seed: 1)
40
40
  classifier = SVMKit::Multiclass::OneVsRestClassifier.new(estimator: base_classifier)
41
41
  classifier.fit(transformed, labels)
42
42
 
data/lib/svmkit.rb CHANGED
@@ -10,6 +10,7 @@ require 'svmkit/base/classifier'
10
10
  require 'svmkit/base/transformer'
11
11
  require 'svmkit/kernel_approximation/rbf'
12
12
  require 'svmkit/linear_model/pegasos_svc'
13
+ require 'svmkit/linear_model/logistic_regression'
13
14
  require 'svmkit/multiclass/one_vs_rest_classifier'
14
15
  require 'svmkit/preprocessing/l2_normalizer'
15
16
  require 'svmkit/preprocessing/min_max_scaler'
@@ -0,0 +1,162 @@
1
+ require 'svmkit/base/base_estimator'
2
+ require 'svmkit/base/classifier'
3
+
4
+ module SVMKit
5
+ # This module consists of the classes that implement generalized linear models.
6
+ module LinearModel
7
+ # LogisticRegression is a class that implements Logistic Regression with stochastic gradient descent (SGD) optimization.
8
+ # Note that the Logistic Regression of SVMKit performs as a binary classifier.
9
+ #
10
+ # estimator =
11
+ # SVMKit::LinearModel::LogisticRegression.new(reg_param: 1.0, max_iter: 100, batch_size: 20, random_seed: 1)
12
+ # estimator.fit(training_samples, traininig_labels)
13
+ # results = estimator.predict(testing_samples)
14
+ #
15
+ # * *Reference*:
16
+ # - S. Shalev-Shwartz, Y. Singer, N. Srebro, and A. Cotter, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
17
+ #
18
+ class LogisticRegression
19
+ include Base::BaseEstimator
20
+ include Base::Classifier
21
+
22
+ DEFAULT_PARAMS = { # :nodoc:
23
+ reg_param: 1.0,
24
+ max_iter: 100,
25
+ batch_size: 50,
26
+ random_seed: nil
27
+ }.freeze
28
+
29
+ # The weight vector for Logistic Regression.
30
+ attr_reader :weight_vec
31
+
32
+ # The random generator for performing random sampling in the SGD optimization.
33
+ attr_reader :rng
34
+
35
+ # Create a new classifier with Logisitc Regression by the SGD optimization.
36
+ #
37
+ # :call-seq:
38
+ # new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> LogisiticRegression
39
+ #
40
+ # * *Arguments* :
41
+ # - +:reg_param+ (Float) (defaults to: 1.0) -- The regularization parameter.
42
+ # - +:max_iter+ (Integer) (defaults to: 100) -- The maximum number of iterations.
43
+ # - +:batch_size+ (Integer) (defaults to: 50) -- The size of the mini batches.
44
+ # - +:random_seed+ (Integer) (defaults to: nil) -- The seed value using to initialize the random generator.
45
+ def initialize(params = {})
46
+ self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
47
+ self.params[:random_seed] ||= srand
48
+ @weight_vec = nil
49
+ @rng = Random.new(self.params[:random_seed])
50
+ end
51
+
52
+ # Fit the model with given training data.
53
+ #
54
+ # :call-seq:
55
+ # fit(x, y) -> LogisticRegression
56
+ #
57
+ # * *Arguments* :
58
+ # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The training data to be used for fitting the model.
59
+ # - +y+ (NMatrix, shape: [1, n_samples]) -- The categorical variables (e.g. labels) to be used for fitting the model.
60
+ # * *Returns* :
61
+ # - The learned classifier itself.
62
+ def fit(x, y)
63
+ # Generate binary labels
64
+ negative_label = y.uniq.sort.shift
65
+ bin_y = y.to_flat_a.map { |l| l != negative_label ? 1 : 0 }
66
+ # Initialize some variables.
67
+ n_samples, n_features = x.shape
68
+ rand_ids = [*0..n_samples - 1].shuffle(random: @rng)
69
+ @weight_vec = NMatrix.zeros([1, n_features])
70
+ # Start optimization.
71
+ params[:max_iter].times do |t|
72
+ # random sampling
73
+ subset_ids = rand_ids.shift(params[:batch_size])
74
+ rand_ids.concat(subset_ids)
75
+ # update the weight vector.
76
+ eta = 1.0 / (params[:reg_param] * (t + 1))
77
+ mean_vec = NMatrix.zeros([1, n_features])
78
+ subset_ids.each do |n|
79
+ z = @weight_vec.dot(x.row(n).transpose)[0]
80
+ coef = bin_y[n] / (1.0 + Math.exp(bin_y[n] * z))
81
+ mean_vec += x.row(n) * coef
82
+ end
83
+ mean_vec *= eta / params[:batch_size]
84
+ @weight_vec = @weight_vec * (1.0 - eta * params[:reg_param]) + mean_vec
85
+ # scale the weight vector.
86
+ scaler = (1.0 / params[:reg_param]**0.5) / @weight_vec.norm2
87
+ @weight_vec *= [1.0, scaler].min
88
+ end
89
+ self
90
+ end
91
+
92
+ # Calculate confidence scores for samples.
93
+ #
94
+ # :call-seq:
95
+ # decision_function(x) -> NMatrix, shape: [1, n_samples]
96
+ #
97
+ # * *Arguments* :
98
+ # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to compute the scores.
99
+ # * *Returns* :
100
+ # - Confidence score per sample.
101
+ def decision_function(x)
102
+ w = (@weight_vec.dot(x.transpose) * -1.0).exp + 1.0
103
+ w.map { |v| 1.0 / v }
104
+ end
105
+
106
+ # Predict class labels for samples.
107
+ #
108
+ # :call-seq:
109
+ # predict(x) -> NMatrix, shape: [1, n_samples]
110
+ #
111
+ # * *Arguments* :
112
+ # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to predict the labels.
113
+ # * *Returns* :
114
+ # - Predicted class label per sample.
115
+ def predict(x)
116
+ decision_function(x).map { |v| v >= 0.5 ? 1 : -1 }
117
+ end
118
+
119
+ # Predict probability for samples.
120
+ #
121
+ # :call-seq:
122
+ # predict_proba(x) -> NMatrix, shape: [1, n_samples]
123
+ #
124
+ # * *Arguments* :
125
+ # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to predict the probailities.
126
+ # * *Returns* :
127
+ # - Predicted probability per sample.
128
+ def predict_proba(x)
129
+ decision_function(x)
130
+ end
131
+
132
+ # Claculate the mean accuracy of the given testing data.
133
+ #
134
+ # :call-seq:
135
+ # score(x, y) -> Float
136
+ #
137
+ # * *Arguments* :
138
+ # - +x+ (NMatrix, shape: [n_samples, n_features]) -- Testing data.
139
+ # - +y+ (NMatrix, shape: [1, n_samples]) -- True labels for testing data.
140
+ # * *Returns* :
141
+ # - Mean accuracy
142
+ def score(x, y)
143
+ p = predict(x)
144
+ n_hits = (y.to_flat_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
145
+ n_hits / y.size.to_f
146
+ end
147
+
148
+ # Serializes object through Marshal#dump.
149
+ def marshal_dump # :nodoc:
150
+ { params: params, weight_vec: Utils.dump_nmatrix(@weight_vec), rng: @rng }
151
+ end
152
+
153
+ # Deserialize object through Marshal#load.
154
+ def marshal_load(obj) # :nodoc:
155
+ self.params = obj[:params]
156
+ @weight_vec = Utils.restore_nmatrix(obj[:weight_vec])
157
+ @rng = obj[:rng]
158
+ nil
159
+ end
160
+ end
161
+ end
162
+ end
@@ -118,7 +118,7 @@ module SVMKit
118
118
  # Claculate the mean accuracy of the given testing data.
119
119
  #
120
120
  # :call-seq:
121
- # predict(x, y) -> Float
121
+ # score(x, y) -> Float
122
122
  #
123
123
  # * *Arguments* :
124
124
  # - +x+ (NMatrix, shape: [n_samples, n_features]) -- Testing data.
@@ -1,3 +1,3 @@
1
1
  module SVMKit
2
- VERSION = '0.1.0'.freeze
2
+ VERSION = '0.1.1'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: svmkit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-09-30 00:00:00.000000000 Z
11
+ date: 2017-10-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -93,6 +93,7 @@ files:
93
93
  - lib/svmkit/base/classifier.rb
94
94
  - lib/svmkit/base/transformer.rb
95
95
  - lib/svmkit/kernel_approximation/rbf.rb
96
+ - lib/svmkit/linear_model/logistic_regression.rb
96
97
  - lib/svmkit/linear_model/pegasos_svc.rb
97
98
  - lib/svmkit/multiclass/one_vs_rest_classifier.rb
98
99
  - lib/svmkit/preprocessing/l2_normalizer.rb