svmkit 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/svmkit.rb +1 -0
- data/lib/svmkit/linear_model/logistic_regression.rb +162 -0
- data/lib/svmkit/linear_model/pegasos_svc.rb +1 -1
- data/lib/svmkit/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1d3f4f2c398faa6b9e936ec813dac3023d9d1728
|
4
|
+
data.tar.gz: 6fe4f9b6ac5a656fb1a3f49662997d0ebbdd8d4b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fb1ac8798124f25cdd4dd0738dd856c8bae7e87aacac260a8ea8b1fb7388e3a966045f2382e48f241292111312eb7cb6cd69035010ee1487645f725f364ee16b
|
7
|
+
data.tar.gz: 99bdb17d5a2d2825e904ce2e788e31d100e1850b1d9bfc32f7e7cc48ba1b13da59b667d1ed117a6768d852d5c15d3a4c3132994bc13350315c8b07016bcbcd41
|
data/README.md
CHANGED
@@ -36,7 +36,7 @@ transformer = SVMKit::KernelApproximation::RBF.new(gamma: 2.0, n_components: 102
|
|
36
36
|
transformed = transformer.fit_transform(normalized)
|
37
37
|
|
38
38
|
base_classifier =
|
39
|
-
SVMKit::LinearModel::PegasosSVC.new(
|
39
|
+
SVMKit::LinearModel::PegasosSVC.new(reg_param: 1.0, max_iter: 50, batch_size: 20, random_seed: 1)
|
40
40
|
classifier = SVMKit::Multiclass::OneVsRestClassifier.new(estimator: base_classifier)
|
41
41
|
classifier.fit(transformed, labels)
|
42
42
|
|
data/lib/svmkit.rb
CHANGED
@@ -10,6 +10,7 @@ require 'svmkit/base/classifier'
|
|
10
10
|
require 'svmkit/base/transformer'
|
11
11
|
require 'svmkit/kernel_approximation/rbf'
|
12
12
|
require 'svmkit/linear_model/pegasos_svc'
|
13
|
+
require 'svmkit/linear_model/logistic_regression'
|
13
14
|
require 'svmkit/multiclass/one_vs_rest_classifier'
|
14
15
|
require 'svmkit/preprocessing/l2_normalizer'
|
15
16
|
require 'svmkit/preprocessing/min_max_scaler'
|
@@ -0,0 +1,162 @@
|
|
1
|
+
require 'svmkit/base/base_estimator'
|
2
|
+
require 'svmkit/base/classifier'
|
3
|
+
|
4
|
+
module SVMKit
|
5
|
+
# This module consists of the classes that implement generalized linear models.
|
6
|
+
module LinearModel
|
7
|
+
# LogisticRegression is a class that implements Logistic Regression with stochastic gradient descent (SGD) optimization.
|
8
|
+
# Note that the Logistic Regression of SVMKit performs as a binary classifier.
|
9
|
+
#
|
10
|
+
# estimator =
|
11
|
+
# SVMKit::LinearModel::LogisticRegression.new(reg_param: 1.0, max_iter: 100, batch_size: 20, random_seed: 1)
|
12
|
+
# estimator.fit(training_samples, traininig_labels)
|
13
|
+
# results = estimator.predict(testing_samples)
|
14
|
+
#
|
15
|
+
# * *Reference*:
|
16
|
+
# - S. Shalev-Shwartz, Y. Singer, N. Srebro, and A. Cotter, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
|
17
|
+
#
|
18
|
+
class LogisticRegression
|
19
|
+
include Base::BaseEstimator
|
20
|
+
include Base::Classifier
|
21
|
+
|
22
|
+
DEFAULT_PARAMS = { # :nodoc:
|
23
|
+
reg_param: 1.0,
|
24
|
+
max_iter: 100,
|
25
|
+
batch_size: 50,
|
26
|
+
random_seed: nil
|
27
|
+
}.freeze
|
28
|
+
|
29
|
+
# The weight vector for Logistic Regression.
|
30
|
+
attr_reader :weight_vec
|
31
|
+
|
32
|
+
# The random generator for performing random sampling in the SGD optimization.
|
33
|
+
attr_reader :rng
|
34
|
+
|
35
|
+
# Create a new classifier with Logisitc Regression by the SGD optimization.
|
36
|
+
#
|
37
|
+
# :call-seq:
|
38
|
+
# new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> LogisiticRegression
|
39
|
+
#
|
40
|
+
# * *Arguments* :
|
41
|
+
# - +:reg_param+ (Float) (defaults to: 1.0) -- The regularization parameter.
|
42
|
+
# - +:max_iter+ (Integer) (defaults to: 100) -- The maximum number of iterations.
|
43
|
+
# - +:batch_size+ (Integer) (defaults to: 50) -- The size of the mini batches.
|
44
|
+
# - +:random_seed+ (Integer) (defaults to: nil) -- The seed value using to initialize the random generator.
|
45
|
+
def initialize(params = {})
|
46
|
+
self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
|
47
|
+
self.params[:random_seed] ||= srand
|
48
|
+
@weight_vec = nil
|
49
|
+
@rng = Random.new(self.params[:random_seed])
|
50
|
+
end
|
51
|
+
|
52
|
+
# Fit the model with given training data.
|
53
|
+
#
|
54
|
+
# :call-seq:
|
55
|
+
# fit(x, y) -> LogisticRegression
|
56
|
+
#
|
57
|
+
# * *Arguments* :
|
58
|
+
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The training data to be used for fitting the model.
|
59
|
+
# - +y+ (NMatrix, shape: [1, n_samples]) -- The categorical variables (e.g. labels) to be used for fitting the model.
|
60
|
+
# * *Returns* :
|
61
|
+
# - The learned classifier itself.
|
62
|
+
def fit(x, y)
|
63
|
+
# Generate binary labels
|
64
|
+
negative_label = y.uniq.sort.shift
|
65
|
+
bin_y = y.to_flat_a.map { |l| l != negative_label ? 1 : 0 }
|
66
|
+
# Initialize some variables.
|
67
|
+
n_samples, n_features = x.shape
|
68
|
+
rand_ids = [*0..n_samples - 1].shuffle(random: @rng)
|
69
|
+
@weight_vec = NMatrix.zeros([1, n_features])
|
70
|
+
# Start optimization.
|
71
|
+
params[:max_iter].times do |t|
|
72
|
+
# random sampling
|
73
|
+
subset_ids = rand_ids.shift(params[:batch_size])
|
74
|
+
rand_ids.concat(subset_ids)
|
75
|
+
# update the weight vector.
|
76
|
+
eta = 1.0 / (params[:reg_param] * (t + 1))
|
77
|
+
mean_vec = NMatrix.zeros([1, n_features])
|
78
|
+
subset_ids.each do |n|
|
79
|
+
z = @weight_vec.dot(x.row(n).transpose)[0]
|
80
|
+
coef = bin_y[n] / (1.0 + Math.exp(bin_y[n] * z))
|
81
|
+
mean_vec += x.row(n) * coef
|
82
|
+
end
|
83
|
+
mean_vec *= eta / params[:batch_size]
|
84
|
+
@weight_vec = @weight_vec * (1.0 - eta * params[:reg_param]) + mean_vec
|
85
|
+
# scale the weight vector.
|
86
|
+
scaler = (1.0 / params[:reg_param]**0.5) / @weight_vec.norm2
|
87
|
+
@weight_vec *= [1.0, scaler].min
|
88
|
+
end
|
89
|
+
self
|
90
|
+
end
|
91
|
+
|
92
|
+
# Calculate confidence scores for samples.
|
93
|
+
#
|
94
|
+
# :call-seq:
|
95
|
+
# decision_function(x) -> NMatrix, shape: [1, n_samples]
|
96
|
+
#
|
97
|
+
# * *Arguments* :
|
98
|
+
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to compute the scores.
|
99
|
+
# * *Returns* :
|
100
|
+
# - Confidence score per sample.
|
101
|
+
def decision_function(x)
|
102
|
+
w = (@weight_vec.dot(x.transpose) * -1.0).exp + 1.0
|
103
|
+
w.map { |v| 1.0 / v }
|
104
|
+
end
|
105
|
+
|
106
|
+
# Predict class labels for samples.
|
107
|
+
#
|
108
|
+
# :call-seq:
|
109
|
+
# predict(x) -> NMatrix, shape: [1, n_samples]
|
110
|
+
#
|
111
|
+
# * *Arguments* :
|
112
|
+
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to predict the labels.
|
113
|
+
# * *Returns* :
|
114
|
+
# - Predicted class label per sample.
|
115
|
+
def predict(x)
|
116
|
+
decision_function(x).map { |v| v >= 0.5 ? 1 : -1 }
|
117
|
+
end
|
118
|
+
|
119
|
+
# Predict probability for samples.
|
120
|
+
#
|
121
|
+
# :call-seq:
|
122
|
+
# predict_proba(x) -> NMatrix, shape: [1, n_samples]
|
123
|
+
#
|
124
|
+
# * *Arguments* :
|
125
|
+
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to predict the probailities.
|
126
|
+
# * *Returns* :
|
127
|
+
# - Predicted probability per sample.
|
128
|
+
def predict_proba(x)
|
129
|
+
decision_function(x)
|
130
|
+
end
|
131
|
+
|
132
|
+
# Claculate the mean accuracy of the given testing data.
|
133
|
+
#
|
134
|
+
# :call-seq:
|
135
|
+
# score(x, y) -> Float
|
136
|
+
#
|
137
|
+
# * *Arguments* :
|
138
|
+
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- Testing data.
|
139
|
+
# - +y+ (NMatrix, shape: [1, n_samples]) -- True labels for testing data.
|
140
|
+
# * *Returns* :
|
141
|
+
# - Mean accuracy
|
142
|
+
def score(x, y)
|
143
|
+
p = predict(x)
|
144
|
+
n_hits = (y.to_flat_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
|
145
|
+
n_hits / y.size.to_f
|
146
|
+
end
|
147
|
+
|
148
|
+
# Serializes object through Marshal#dump.
|
149
|
+
def marshal_dump # :nodoc:
|
150
|
+
{ params: params, weight_vec: Utils.dump_nmatrix(@weight_vec), rng: @rng }
|
151
|
+
end
|
152
|
+
|
153
|
+
# Deserialize object through Marshal#load.
|
154
|
+
def marshal_load(obj) # :nodoc:
|
155
|
+
self.params = obj[:params]
|
156
|
+
@weight_vec = Utils.restore_nmatrix(obj[:weight_vec])
|
157
|
+
@rng = obj[:rng]
|
158
|
+
nil
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
@@ -118,7 +118,7 @@ module SVMKit
|
|
118
118
|
# Claculate the mean accuracy of the given testing data.
|
119
119
|
#
|
120
120
|
# :call-seq:
|
121
|
-
#
|
121
|
+
# score(x, y) -> Float
|
122
122
|
#
|
123
123
|
# * *Arguments* :
|
124
124
|
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- Testing data.
|
data/lib/svmkit/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: svmkit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-10-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -93,6 +93,7 @@ files:
|
|
93
93
|
- lib/svmkit/base/classifier.rb
|
94
94
|
- lib/svmkit/base/transformer.rb
|
95
95
|
- lib/svmkit/kernel_approximation/rbf.rb
|
96
|
+
- lib/svmkit/linear_model/logistic_regression.rb
|
96
97
|
- lib/svmkit/linear_model/pegasos_svc.rb
|
97
98
|
- lib/svmkit/multiclass/one_vs_rest_classifier.rb
|
98
99
|
- lib/svmkit/preprocessing/l2_normalizer.rb
|