svmkit 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 62fdc5c03a044a7625bf2374159cf84ef32a6869
4
- data.tar.gz: 4cd1c86a344cd1410a3a5c0f4bdebd04d80e7e7b
3
+ metadata.gz: 1d3f4f2c398faa6b9e936ec813dac3023d9d1728
4
+ data.tar.gz: 6fe4f9b6ac5a656fb1a3f49662997d0ebbdd8d4b
5
5
  SHA512:
6
- metadata.gz: 1b704e536e183f881e6f16895ccdc1620dc8b694db7b44772db669e579ed07652df16c8de88794c65b5eeca0eeb805c415f1e44c36446cad3bdd230f3354b320
7
- data.tar.gz: e25ca447621cef29ea1807168cbe6e7210308549a298db7c8797d54e127bfb1e7fe7de3c3e9a9d719cee6de1100705e352a3b76a5127282441027fd1b389e2e1
6
+ metadata.gz: fb1ac8798124f25cdd4dd0738dd856c8bae7e87aacac260a8ea8b1fb7388e3a966045f2382e48f241292111312eb7cb6cd69035010ee1487645f725f364ee16b
7
+ data.tar.gz: 99bdb17d5a2d2825e904ce2e788e31d100e1850b1d9bfc32f7e7cc48ba1b13da59b667d1ed117a6768d852d5c15d3a4c3132994bc13350315c8b07016bcbcd41
data/README.md CHANGED
@@ -36,7 +36,7 @@ transformer = SVMKit::KernelApproximation::RBF.new(gamma: 2.0, n_components: 102
36
36
  transformed = transformer.fit_transform(normalized)
37
37
 
38
38
  base_classifier =
39
- SVMKit::LinearModel::PegasosSVC.new(penalty: 1.0, max_iter: 50, batch_size: 20, random_seed: 1)
39
+ SVMKit::LinearModel::PegasosSVC.new(reg_param: 1.0, max_iter: 50, batch_size: 20, random_seed: 1)
40
40
  classifier = SVMKit::Multiclass::OneVsRestClassifier.new(estimator: base_classifier)
41
41
  classifier.fit(transformed, labels)
42
42
 
data/lib/svmkit.rb CHANGED
@@ -10,6 +10,7 @@ require 'svmkit/base/classifier'
10
10
  require 'svmkit/base/transformer'
11
11
  require 'svmkit/kernel_approximation/rbf'
12
12
  require 'svmkit/linear_model/pegasos_svc'
13
+ require 'svmkit/linear_model/logistic_regression'
13
14
  require 'svmkit/multiclass/one_vs_rest_classifier'
14
15
  require 'svmkit/preprocessing/l2_normalizer'
15
16
  require 'svmkit/preprocessing/min_max_scaler'
@@ -0,0 +1,162 @@
1
+ require 'svmkit/base/base_estimator'
2
+ require 'svmkit/base/classifier'
3
+
4
+ module SVMKit
5
+ # This module consists of the classes that implement generalized linear models.
6
+ module LinearModel
7
+ # LogisticRegression is a class that implements Logistic Regression with stochastic gradient descent (SGD) optimization.
8
+ # Note that the Logistic Regression of SVMKit performs as a binary classifier.
9
+ #
10
+ # estimator =
11
+ # SVMKit::LinearModel::LogisticRegression.new(reg_param: 1.0, max_iter: 100, batch_size: 20, random_seed: 1)
12
+ # estimator.fit(training_samples, traininig_labels)
13
+ # results = estimator.predict(testing_samples)
14
+ #
15
+ # * *Reference*:
16
+ # - S. Shalev-Shwartz, Y. Singer, N. Srebro, and A. Cotter, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
17
+ #
18
+ class LogisticRegression
19
+ include Base::BaseEstimator
20
+ include Base::Classifier
21
+
22
+ DEFAULT_PARAMS = { # :nodoc:
23
+ reg_param: 1.0,
24
+ max_iter: 100,
25
+ batch_size: 50,
26
+ random_seed: nil
27
+ }.freeze
28
+
29
+ # The weight vector for Logistic Regression.
30
+ attr_reader :weight_vec
31
+
32
+ # The random generator for performing random sampling in the SGD optimization.
33
+ attr_reader :rng
34
+
35
+ # Create a new classifier with Logisitc Regression by the SGD optimization.
36
+ #
37
+ # :call-seq:
38
+ # new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> LogisiticRegression
39
+ #
40
+ # * *Arguments* :
41
+ # - +:reg_param+ (Float) (defaults to: 1.0) -- The regularization parameter.
42
+ # - +:max_iter+ (Integer) (defaults to: 100) -- The maximum number of iterations.
43
+ # - +:batch_size+ (Integer) (defaults to: 50) -- The size of the mini batches.
44
+ # - +:random_seed+ (Integer) (defaults to: nil) -- The seed value using to initialize the random generator.
45
+ def initialize(params = {})
46
+ self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
47
+ self.params[:random_seed] ||= srand
48
+ @weight_vec = nil
49
+ @rng = Random.new(self.params[:random_seed])
50
+ end
51
+
52
+ # Fit the model with given training data.
53
+ #
54
+ # :call-seq:
55
+ # fit(x, y) -> LogisticRegression
56
+ #
57
+ # * *Arguments* :
58
+ # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The training data to be used for fitting the model.
59
+ # - +y+ (NMatrix, shape: [1, n_samples]) -- The categorical variables (e.g. labels) to be used for fitting the model.
60
+ # * *Returns* :
61
+ # - The learned classifier itself.
62
+ def fit(x, y)
63
+ # Generate binary labels
64
+ negative_label = y.uniq.sort.shift
65
+ bin_y = y.to_flat_a.map { |l| l != negative_label ? 1 : 0 }
66
+ # Initialize some variables.
67
+ n_samples, n_features = x.shape
68
+ rand_ids = [*0..n_samples - 1].shuffle(random: @rng)
69
+ @weight_vec = NMatrix.zeros([1, n_features])
70
+ # Start optimization.
71
+ params[:max_iter].times do |t|
72
+ # random sampling
73
+ subset_ids = rand_ids.shift(params[:batch_size])
74
+ rand_ids.concat(subset_ids)
75
+ # update the weight vector.
76
+ eta = 1.0 / (params[:reg_param] * (t + 1))
77
+ mean_vec = NMatrix.zeros([1, n_features])
78
+ subset_ids.each do |n|
79
+ z = @weight_vec.dot(x.row(n).transpose)[0]
80
+ coef = bin_y[n] / (1.0 + Math.exp(bin_y[n] * z))
81
+ mean_vec += x.row(n) * coef
82
+ end
83
+ mean_vec *= eta / params[:batch_size]
84
+ @weight_vec = @weight_vec * (1.0 - eta * params[:reg_param]) + mean_vec
85
+ # scale the weight vector.
86
+ scaler = (1.0 / params[:reg_param]**0.5) / @weight_vec.norm2
87
+ @weight_vec *= [1.0, scaler].min
88
+ end
89
+ self
90
+ end
91
+
92
+ # Calculate confidence scores for samples.
93
+ #
94
+ # :call-seq:
95
+ # decision_function(x) -> NMatrix, shape: [1, n_samples]
96
+ #
97
+ # * *Arguments* :
98
+ # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to compute the scores.
99
+ # * *Returns* :
100
+ # - Confidence score per sample.
101
+ def decision_function(x)
102
+ w = (@weight_vec.dot(x.transpose) * -1.0).exp + 1.0
103
+ w.map { |v| 1.0 / v }
104
+ end
105
+
106
+ # Predict class labels for samples.
107
+ #
108
+ # :call-seq:
109
+ # predict(x) -> NMatrix, shape: [1, n_samples]
110
+ #
111
+ # * *Arguments* :
112
+ # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to predict the labels.
113
+ # * *Returns* :
114
+ # - Predicted class label per sample.
115
+ def predict(x)
116
+ decision_function(x).map { |v| v >= 0.5 ? 1 : -1 }
117
+ end
118
+
119
+ # Predict probability for samples.
120
+ #
121
+ # :call-seq:
122
+ # predict_proba(x) -> NMatrix, shape: [1, n_samples]
123
+ #
124
+ # * *Arguments* :
125
+ # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to predict the probailities.
126
+ # * *Returns* :
127
+ # - Predicted probability per sample.
128
+ def predict_proba(x)
129
+ decision_function(x)
130
+ end
131
+
132
+ # Claculate the mean accuracy of the given testing data.
133
+ #
134
+ # :call-seq:
135
+ # score(x, y) -> Float
136
+ #
137
+ # * *Arguments* :
138
+ # - +x+ (NMatrix, shape: [n_samples, n_features]) -- Testing data.
139
+ # - +y+ (NMatrix, shape: [1, n_samples]) -- True labels for testing data.
140
+ # * *Returns* :
141
+ # - Mean accuracy
142
+ def score(x, y)
143
+ p = predict(x)
144
+ n_hits = (y.to_flat_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
145
+ n_hits / y.size.to_f
146
+ end
147
+
148
+ # Serializes object through Marshal#dump.
149
+ def marshal_dump # :nodoc:
150
+ { params: params, weight_vec: Utils.dump_nmatrix(@weight_vec), rng: @rng }
151
+ end
152
+
153
+ # Deserialize object through Marshal#load.
154
+ def marshal_load(obj) # :nodoc:
155
+ self.params = obj[:params]
156
+ @weight_vec = Utils.restore_nmatrix(obj[:weight_vec])
157
+ @rng = obj[:rng]
158
+ nil
159
+ end
160
+ end
161
+ end
162
+ end
@@ -118,7 +118,7 @@ module SVMKit
118
118
  # Claculate the mean accuracy of the given testing data.
119
119
  #
120
120
  # :call-seq:
121
- # predict(x, y) -> Float
121
+ # score(x, y) -> Float
122
122
  #
123
123
  # * *Arguments* :
124
124
  # - +x+ (NMatrix, shape: [n_samples, n_features]) -- Testing data.
@@ -1,3 +1,3 @@
1
1
  module SVMKit
2
- VERSION = '0.1.0'.freeze
2
+ VERSION = '0.1.1'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: svmkit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-09-30 00:00:00.000000000 Z
11
+ date: 2017-10-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -93,6 +93,7 @@ files:
93
93
  - lib/svmkit/base/classifier.rb
94
94
  - lib/svmkit/base/transformer.rb
95
95
  - lib/svmkit/kernel_approximation/rbf.rb
96
+ - lib/svmkit/linear_model/logistic_regression.rb
96
97
  - lib/svmkit/linear_model/pegasos_svc.rb
97
98
  - lib/svmkit/multiclass/one_vs_rest_classifier.rb
98
99
  - lib/svmkit/preprocessing/l2_normalizer.rb