svmkit 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/svmkit.rb +1 -0
- data/lib/svmkit/linear_model/logistic_regression.rb +162 -0
- data/lib/svmkit/linear_model/pegasos_svc.rb +1 -1
- data/lib/svmkit/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1d3f4f2c398faa6b9e936ec813dac3023d9d1728
|
4
|
+
data.tar.gz: 6fe4f9b6ac5a656fb1a3f49662997d0ebbdd8d4b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fb1ac8798124f25cdd4dd0738dd856c8bae7e87aacac260a8ea8b1fb7388e3a966045f2382e48f241292111312eb7cb6cd69035010ee1487645f725f364ee16b
|
7
|
+
data.tar.gz: 99bdb17d5a2d2825e904ce2e788e31d100e1850b1d9bfc32f7e7cc48ba1b13da59b667d1ed117a6768d852d5c15d3a4c3132994bc13350315c8b07016bcbcd41
|
data/README.md
CHANGED
@@ -36,7 +36,7 @@ transformer = SVMKit::KernelApproximation::RBF.new(gamma: 2.0, n_components: 102
|
|
36
36
|
transformed = transformer.fit_transform(normalized)
|
37
37
|
|
38
38
|
base_classifier =
|
39
|
-
SVMKit::LinearModel::PegasosSVC.new(
|
39
|
+
SVMKit::LinearModel::PegasosSVC.new(reg_param: 1.0, max_iter: 50, batch_size: 20, random_seed: 1)
|
40
40
|
classifier = SVMKit::Multiclass::OneVsRestClassifier.new(estimator: base_classifier)
|
41
41
|
classifier.fit(transformed, labels)
|
42
42
|
|
data/lib/svmkit.rb
CHANGED
@@ -10,6 +10,7 @@ require 'svmkit/base/classifier'
|
|
10
10
|
require 'svmkit/base/transformer'
|
11
11
|
require 'svmkit/kernel_approximation/rbf'
|
12
12
|
require 'svmkit/linear_model/pegasos_svc'
|
13
|
+
require 'svmkit/linear_model/logistic_regression'
|
13
14
|
require 'svmkit/multiclass/one_vs_rest_classifier'
|
14
15
|
require 'svmkit/preprocessing/l2_normalizer'
|
15
16
|
require 'svmkit/preprocessing/min_max_scaler'
|
@@ -0,0 +1,162 @@
|
|
1
|
+
require 'svmkit/base/base_estimator'
|
2
|
+
require 'svmkit/base/classifier'
|
3
|
+
|
4
|
+
module SVMKit
|
5
|
+
# This module consists of the classes that implement generalized linear models.
|
6
|
+
module LinearModel
|
7
|
+
# LogisticRegression is a class that implements Logistic Regression with stochastic gradient descent (SGD) optimization.
|
8
|
+
# Note that the Logistic Regression of SVMKit performs as a binary classifier.
|
9
|
+
#
|
10
|
+
# estimator =
|
11
|
+
# SVMKit::LinearModel::LogisticRegression.new(reg_param: 1.0, max_iter: 100, batch_size: 20, random_seed: 1)
|
12
|
+
# estimator.fit(training_samples, traininig_labels)
|
13
|
+
# results = estimator.predict(testing_samples)
|
14
|
+
#
|
15
|
+
# * *Reference*:
|
16
|
+
# - S. Shalev-Shwartz, Y. Singer, N. Srebro, and A. Cotter, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
|
17
|
+
#
|
18
|
+
class LogisticRegression
|
19
|
+
include Base::BaseEstimator
|
20
|
+
include Base::Classifier
|
21
|
+
|
22
|
+
DEFAULT_PARAMS = { # :nodoc:
|
23
|
+
reg_param: 1.0,
|
24
|
+
max_iter: 100,
|
25
|
+
batch_size: 50,
|
26
|
+
random_seed: nil
|
27
|
+
}.freeze
|
28
|
+
|
29
|
+
# The weight vector for Logistic Regression.
|
30
|
+
attr_reader :weight_vec
|
31
|
+
|
32
|
+
# The random generator for performing random sampling in the SGD optimization.
|
33
|
+
attr_reader :rng
|
34
|
+
|
35
|
+
# Create a new classifier with Logisitc Regression by the SGD optimization.
|
36
|
+
#
|
37
|
+
# :call-seq:
|
38
|
+
# new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> LogisiticRegression
|
39
|
+
#
|
40
|
+
# * *Arguments* :
|
41
|
+
# - +:reg_param+ (Float) (defaults to: 1.0) -- The regularization parameter.
|
42
|
+
# - +:max_iter+ (Integer) (defaults to: 100) -- The maximum number of iterations.
|
43
|
+
# - +:batch_size+ (Integer) (defaults to: 50) -- The size of the mini batches.
|
44
|
+
# - +:random_seed+ (Integer) (defaults to: nil) -- The seed value using to initialize the random generator.
|
45
|
+
def initialize(params = {})
|
46
|
+
self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
|
47
|
+
self.params[:random_seed] ||= srand
|
48
|
+
@weight_vec = nil
|
49
|
+
@rng = Random.new(self.params[:random_seed])
|
50
|
+
end
|
51
|
+
|
52
|
+
# Fit the model with given training data.
|
53
|
+
#
|
54
|
+
# :call-seq:
|
55
|
+
# fit(x, y) -> LogisticRegression
|
56
|
+
#
|
57
|
+
# * *Arguments* :
|
58
|
+
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The training data to be used for fitting the model.
|
59
|
+
# - +y+ (NMatrix, shape: [1, n_samples]) -- The categorical variables (e.g. labels) to be used for fitting the model.
|
60
|
+
# * *Returns* :
|
61
|
+
# - The learned classifier itself.
|
62
|
+
def fit(x, y)
|
63
|
+
# Generate binary labels
|
64
|
+
negative_label = y.uniq.sort.shift
|
65
|
+
bin_y = y.to_flat_a.map { |l| l != negative_label ? 1 : 0 }
|
66
|
+
# Initialize some variables.
|
67
|
+
n_samples, n_features = x.shape
|
68
|
+
rand_ids = [*0..n_samples - 1].shuffle(random: @rng)
|
69
|
+
@weight_vec = NMatrix.zeros([1, n_features])
|
70
|
+
# Start optimization.
|
71
|
+
params[:max_iter].times do |t|
|
72
|
+
# random sampling
|
73
|
+
subset_ids = rand_ids.shift(params[:batch_size])
|
74
|
+
rand_ids.concat(subset_ids)
|
75
|
+
# update the weight vector.
|
76
|
+
eta = 1.0 / (params[:reg_param] * (t + 1))
|
77
|
+
mean_vec = NMatrix.zeros([1, n_features])
|
78
|
+
subset_ids.each do |n|
|
79
|
+
z = @weight_vec.dot(x.row(n).transpose)[0]
|
80
|
+
coef = bin_y[n] / (1.0 + Math.exp(bin_y[n] * z))
|
81
|
+
mean_vec += x.row(n) * coef
|
82
|
+
end
|
83
|
+
mean_vec *= eta / params[:batch_size]
|
84
|
+
@weight_vec = @weight_vec * (1.0 - eta * params[:reg_param]) + mean_vec
|
85
|
+
# scale the weight vector.
|
86
|
+
scaler = (1.0 / params[:reg_param]**0.5) / @weight_vec.norm2
|
87
|
+
@weight_vec *= [1.0, scaler].min
|
88
|
+
end
|
89
|
+
self
|
90
|
+
end
|
91
|
+
|
92
|
+
# Calculate confidence scores for samples.
|
93
|
+
#
|
94
|
+
# :call-seq:
|
95
|
+
# decision_function(x) -> NMatrix, shape: [1, n_samples]
|
96
|
+
#
|
97
|
+
# * *Arguments* :
|
98
|
+
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to compute the scores.
|
99
|
+
# * *Returns* :
|
100
|
+
# - Confidence score per sample.
|
101
|
+
def decision_function(x)
|
102
|
+
w = (@weight_vec.dot(x.transpose) * -1.0).exp + 1.0
|
103
|
+
w.map { |v| 1.0 / v }
|
104
|
+
end
|
105
|
+
|
106
|
+
# Predict class labels for samples.
|
107
|
+
#
|
108
|
+
# :call-seq:
|
109
|
+
# predict(x) -> NMatrix, shape: [1, n_samples]
|
110
|
+
#
|
111
|
+
# * *Arguments* :
|
112
|
+
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to predict the labels.
|
113
|
+
# * *Returns* :
|
114
|
+
# - Predicted class label per sample.
|
115
|
+
def predict(x)
|
116
|
+
decision_function(x).map { |v| v >= 0.5 ? 1 : -1 }
|
117
|
+
end
|
118
|
+
|
119
|
+
# Predict probability for samples.
|
120
|
+
#
|
121
|
+
# :call-seq:
|
122
|
+
# predict_proba(x) -> NMatrix, shape: [1, n_samples]
|
123
|
+
#
|
124
|
+
# * *Arguments* :
|
125
|
+
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to predict the probailities.
|
126
|
+
# * *Returns* :
|
127
|
+
# - Predicted probability per sample.
|
128
|
+
def predict_proba(x)
|
129
|
+
decision_function(x)
|
130
|
+
end
|
131
|
+
|
132
|
+
# Claculate the mean accuracy of the given testing data.
|
133
|
+
#
|
134
|
+
# :call-seq:
|
135
|
+
# score(x, y) -> Float
|
136
|
+
#
|
137
|
+
# * *Arguments* :
|
138
|
+
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- Testing data.
|
139
|
+
# - +y+ (NMatrix, shape: [1, n_samples]) -- True labels for testing data.
|
140
|
+
# * *Returns* :
|
141
|
+
# - Mean accuracy
|
142
|
+
def score(x, y)
|
143
|
+
p = predict(x)
|
144
|
+
n_hits = (y.to_flat_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
|
145
|
+
n_hits / y.size.to_f
|
146
|
+
end
|
147
|
+
|
148
|
+
# Serializes object through Marshal#dump.
|
149
|
+
def marshal_dump # :nodoc:
|
150
|
+
{ params: params, weight_vec: Utils.dump_nmatrix(@weight_vec), rng: @rng }
|
151
|
+
end
|
152
|
+
|
153
|
+
# Deserialize object through Marshal#load.
|
154
|
+
def marshal_load(obj) # :nodoc:
|
155
|
+
self.params = obj[:params]
|
156
|
+
@weight_vec = Utils.restore_nmatrix(obj[:weight_vec])
|
157
|
+
@rng = obj[:rng]
|
158
|
+
nil
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
@@ -118,7 +118,7 @@ module SVMKit
|
|
118
118
|
# Claculate the mean accuracy of the given testing data.
|
119
119
|
#
|
120
120
|
# :call-seq:
|
121
|
-
#
|
121
|
+
# score(x, y) -> Float
|
122
122
|
#
|
123
123
|
# * *Arguments* :
|
124
124
|
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- Testing data.
|
data/lib/svmkit/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: svmkit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-10-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -93,6 +93,7 @@ files:
|
|
93
93
|
- lib/svmkit/base/classifier.rb
|
94
94
|
- lib/svmkit/base/transformer.rb
|
95
95
|
- lib/svmkit/kernel_approximation/rbf.rb
|
96
|
+
- lib/svmkit/linear_model/logistic_regression.rb
|
96
97
|
- lib/svmkit/linear_model/pegasos_svc.rb
|
97
98
|
- lib/svmkit/multiclass/one_vs_rest_classifier.rb
|
98
99
|
- lib/svmkit/preprocessing/l2_normalizer.rb
|