svmkit 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +6 -5
- data/.rubocop_todo.yml +18 -0
- data/HISTORY.md +5 -0
- data/README.md +4 -4
- data/lib/svmkit.rb +1 -0
- data/lib/svmkit/kernel_approximation/rbf.rb +1 -1
- data/lib/svmkit/linear_model/logistic_regression.rb +9 -4
- data/lib/svmkit/naive_bayes/naive_bayes.rb +304 -0
- data/lib/svmkit/polynomial_model/factorization_machine_classifier.rb +28 -4
- data/lib/svmkit/version.rb +1 -1
- data/svmkit.gemspec +5 -5
- metadata +10 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 42b8bbee820defc7646b422fa160ade5dd0ffddd
|
4
|
+
data.tar.gz: 5ba44c2c18a02231646456ab1ff73fee409c50c0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 23bdab14e55581e61b9050c88167dd74ebd5c086dfa2c37f57aaebe2110e8d7bac70df659ae34615c788d36cd03fac36663267ed07cf950564da7b1e496e9b59
|
7
|
+
data.tar.gz: c36b3565bd731e1613ee63f83ad83f2021e8c2c255267df00a0872ec8f1d9f2e9ab73edd10d5f2a5bef6ca1af8712aef4220434bc680f1f5228bf9727c94b6e4
|
data/.rubocop.yml
CHANGED
@@ -1,12 +1,13 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
inherit_from: .rubocop_todo.yml
|
2
|
+
|
3
|
+
AllCops:
|
4
|
+
TargetRubyVersion: 2.2
|
5
|
+
DisplayCopNames: true
|
6
|
+
DisplayStyleGuide: true
|
3
7
|
|
4
8
|
Documentation:
|
5
9
|
Enabled: false
|
6
10
|
|
7
|
-
Metrics/AbcSize:
|
8
|
-
Max: 30
|
9
|
-
|
10
11
|
Metrics/LineLength:
|
11
12
|
Max: 120
|
12
13
|
IgnoredPatterns: ['(\A|\s)#']
|
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2018-02-04 11:34:28 +0900 using RuboCop version 0.52.1.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 5
|
10
|
+
Metrics/AbcSize:
|
11
|
+
Max: 70
|
12
|
+
|
13
|
+
# Offense count: 4
|
14
|
+
# Configuration parameters: .
|
15
|
+
# SupportedStyles: annotated, template, unannotated
|
16
|
+
Style/FormatStringToken:
|
17
|
+
EnforcedStyle: unannotated
|
18
|
+
Enabled: false
|
data/HISTORY.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
# 0.2.5
|
2
|
+
- Added classes for Naive Bayes classifier.
|
3
|
+
- Fixed decision function method on Logistic Regression class.
|
4
|
+
- Fixed method visibility on RBF kernel approximation class.
|
5
|
+
|
1
6
|
# 0.2.4
|
2
7
|
- Added class for Factorization Machine classifier.
|
3
8
|
- Added classes for evaluation measures.
|
data/README.md
CHANGED
@@ -5,10 +5,10 @@
|
|
5
5
|
[](https://github.com/yoshoku/SVMKit/blob/master/LICENSE.txt)
|
6
6
|
|
7
7
|
SVMKit is a machine learninig library in Ruby.
|
8
|
-
SVMKit
|
9
|
-
|
10
|
-
|
11
|
-
|
8
|
+
SVMKit provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
|
9
|
+
SVMKit currently supports Linear / Kernel Support Vector Machine,
|
10
|
+
Logistic Regression, Factorization Machine, Naive Bayes,
|
11
|
+
K-nearest neighbor classifier, and cross-validation.
|
12
12
|
|
13
13
|
## Installation
|
14
14
|
|
data/lib/svmkit.rb
CHANGED
@@ -16,6 +16,7 @@ require 'svmkit/kernel_machine/kernel_svc'
|
|
16
16
|
require 'svmkit/polynomial_model/factorization_machine_classifier'
|
17
17
|
require 'svmkit/multiclass/one_vs_rest_classifier'
|
18
18
|
require 'svmkit/nearest_neighbors/k_neighbors_classifier'
|
19
|
+
require 'svmkit/naive_bayes/naive_bayes'
|
19
20
|
require 'svmkit/preprocessing/l2_normalizer'
|
20
21
|
require 'svmkit/preprocessing/min_max_scaler'
|
21
22
|
require 'svmkit/preprocessing/standard_scaler'
|
@@ -112,8 +112,7 @@ module SVMKit
|
|
112
112
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
113
113
|
# @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
|
114
114
|
def decision_function(x)
|
115
|
-
|
116
|
-
w.map { |v| 1.0 / v }
|
115
|
+
@weight_vec.dot(x.transpose) + @bias_term
|
117
116
|
end
|
118
117
|
|
119
118
|
# Predict class labels for samples.
|
@@ -121,7 +120,7 @@ module SVMKit
|
|
121
120
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
122
121
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
123
122
|
def predict(x)
|
124
|
-
Numo::Int32.cast(decision_function(x).map { |v| v >= 0.5 ? 1 : -1 })
|
123
|
+
Numo::Int32.cast(sigmoid(decision_function(x)).map { |v| v >= 0.5 ? 1 : -1 })
|
125
124
|
end
|
126
125
|
|
127
126
|
# Predict probability for samples.
|
@@ -131,7 +130,7 @@ module SVMKit
|
|
131
130
|
def predict_proba(x)
|
132
131
|
n_samples, = x.shape
|
133
132
|
proba = Numo::DFloat.zeros(n_samples, 2)
|
134
|
-
proba[true, 1] = decision_function(x)
|
133
|
+
proba[true, 1] = sigmoid(decision_function(x))
|
135
134
|
proba[true, 0] = 1.0 - proba[true, 1]
|
136
135
|
proba
|
137
136
|
end
|
@@ -162,6 +161,12 @@ module SVMKit
|
|
162
161
|
@rng = obj[:rng]
|
163
162
|
nil
|
164
163
|
end
|
164
|
+
|
165
|
+
private
|
166
|
+
|
167
|
+
def sigmoid(x)
|
168
|
+
1.0 / (Numo::NMath.exp(-x) + 1.0)
|
169
|
+
end
|
165
170
|
end
|
166
171
|
end
|
167
172
|
end
|
@@ -0,0 +1,304 @@
|
|
1
|
+
require 'svmkit/base/base_estimator'
|
2
|
+
require 'svmkit/base/classifier'
|
3
|
+
|
4
|
+
module SVMKit
|
5
|
+
# This module consists of the classes that implement naive bayes models.
|
6
|
+
module NaiveBayes
|
7
|
+
# BaseNaiveBayes is a class that has methods for common processes of naive bayes classifier.
|
8
|
+
class BaseNaiveBayes
|
9
|
+
include Base::BaseEstimator
|
10
|
+
include Base::Classifier
|
11
|
+
|
12
|
+
# Predict class labels for samples.
|
13
|
+
#
|
14
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
15
|
+
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
16
|
+
def predict(x)
|
17
|
+
n_samples = x.shape.first
|
18
|
+
decision_values = decision_function(x)
|
19
|
+
Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
|
20
|
+
end
|
21
|
+
|
22
|
+
# Predict log-probability for samples.
|
23
|
+
#
|
24
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
|
25
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
|
26
|
+
def predict_log_proba(x)
|
27
|
+
n_samples, = x.shape
|
28
|
+
log_likelihoods = decision_function(x)
|
29
|
+
log_likelihoods - Numo::NMath.log(Numo::NMath.exp(log_likelihoods).sum(1)).reshape(n_samples, 1)
|
30
|
+
end
|
31
|
+
|
32
|
+
# Predict probability for samples.
|
33
|
+
#
|
34
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
35
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
36
|
+
def predict_proba(x)
|
37
|
+
Numo::NMath.exp(predict_log_proba(x)).abs
|
38
|
+
end
|
39
|
+
|
40
|
+
# Claculate the mean accuracy of the given testing data.
|
41
|
+
#
|
42
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
|
43
|
+
# @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.
|
44
|
+
# @return [Float] Mean accuracy
|
45
|
+
def score(x, y)
|
46
|
+
evaluator = SVMKit::EvaluationMeasure::Accuracy.new
|
47
|
+
evaluator.score(y, predict(x))
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# GaussianNB is a class that implements Gaussian Naive Bayes classifier.
|
52
|
+
#
|
53
|
+
# @example
|
54
|
+
# estimator = SVMKit::NaiveBayes::GaussianNB.new
|
55
|
+
# estimator.fit(training_samples, training_labels)
|
56
|
+
# results = estimator.predict(testing_samples)
|
57
|
+
class GaussianNB < BaseNaiveBayes
|
58
|
+
# Return the class labels.
|
59
|
+
# @return [Numo::Int32] (size: n_classes)
|
60
|
+
attr_reader :classes
|
61
|
+
|
62
|
+
# Return the prior probabilities of the classes.
|
63
|
+
# @return [Numo::DFloat] (shape: [n_classes])
|
64
|
+
attr_reader :class_priors
|
65
|
+
|
66
|
+
# Return the mean vectors of the classes.
|
67
|
+
# @return [Numo::DFloat] (shape: [n_classes, n_features])
|
68
|
+
attr_reader :means
|
69
|
+
|
70
|
+
# Return the variance vectors of the classes.
|
71
|
+
# @return [Numo::DFloat] (shape: [n_classes, n_features])
|
72
|
+
attr_reader :variances
|
73
|
+
|
74
|
+
# Create a new classifier with Gaussian Naive Bayes.
|
75
|
+
def initialize
|
76
|
+
@params = {}
|
77
|
+
end
|
78
|
+
|
79
|
+
# Fit the model with given training data.
|
80
|
+
#
|
81
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
82
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
|
83
|
+
# to be used for fitting the model.
|
84
|
+
# @return [GaussianNB] The learned classifier itself.
|
85
|
+
def fit(x, y)
|
86
|
+
n_samples, = x.shape
|
87
|
+
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
88
|
+
@class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
|
89
|
+
@means = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].mean(0) }]
|
90
|
+
@variances = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].var(0) }]
|
91
|
+
self
|
92
|
+
end
|
93
|
+
|
94
|
+
# Calculate confidence scores for samples.
|
95
|
+
#
|
96
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
97
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
98
|
+
def decision_function(x)
|
99
|
+
n_classes = @classes.size
|
100
|
+
log_likelihoods = Array.new(n_classes) do |l|
|
101
|
+
Math.log(@class_priors[l]) - 0.5 * (
|
102
|
+
Numo::NMath.log(2.0 * Math::PI * @variances[l, true]) +
|
103
|
+
((x - @means[l, true])**2 / @variances[l, true])).sum(1)
|
104
|
+
end
|
105
|
+
Numo::DFloat[*log_likelihoods].transpose
|
106
|
+
end
|
107
|
+
|
108
|
+
# Dump marshal data.
|
109
|
+
#
|
110
|
+
# @return [Hash] The marshal data about GaussianNB.
|
111
|
+
def marshal_dump
|
112
|
+
{ params: params,
|
113
|
+
classes: @classes,
|
114
|
+
class_priors: @class_priors,
|
115
|
+
means: @means,
|
116
|
+
variances: @variances }
|
117
|
+
end
|
118
|
+
|
119
|
+
# Load marshal data.
|
120
|
+
#
|
121
|
+
# @return [nil]
|
122
|
+
def marshal_load(obj)
|
123
|
+
@params = obj[:params]
|
124
|
+
@classes = obj[:classes]
|
125
|
+
@class_priors = obj[:class_priors]
|
126
|
+
@means = obj[:means]
|
127
|
+
@variances = obj[:variances]
|
128
|
+
nil
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# MultinomialNB is a class that implements Multinomial Naive Bayes classifier.
|
133
|
+
#
|
134
|
+
# @example
|
135
|
+
# estimator = SVMKit::NaiveBayes::MultinomialNB.new(smoothing_param: 1.0)
|
136
|
+
# estimator.fit(training_samples, training_labels)
|
137
|
+
# results = estimator.predict(testing_samples)
|
138
|
+
#
|
139
|
+
# *Reference*
|
140
|
+
# - C D. Manning, P. Raghavan, and H. Schutze, "Introduction to Information Retrieval," Cambridge University Press., 2008.
|
141
|
+
class MultinomialNB < BaseNaiveBayes
|
142
|
+
# Return the class labels.
|
143
|
+
# @return [Numo::Int32] (size: n_classes)
|
144
|
+
attr_reader :classes
|
145
|
+
|
146
|
+
# Return the prior probabilities of the classes.
|
147
|
+
# @return [Numo::DFloat] (shape: [n_classes])
|
148
|
+
attr_reader :class_priors
|
149
|
+
|
150
|
+
# Return the conditional probabilities for features of each class.
|
151
|
+
# @return [Numo::DFloat] (shape: [n_classes, n_features])
|
152
|
+
attr_reader :feature_probs
|
153
|
+
|
154
|
+
# Create a new classifier with Multinomial Naive Bayes.
|
155
|
+
#
|
156
|
+
# @param smoothing_param [Float] The Laplace smoothing parameter.
|
157
|
+
def initialize(smoothing_param: 1.0)
|
158
|
+
@params = {}
|
159
|
+
@params[:smoothing_param] = smoothing_param
|
160
|
+
end
|
161
|
+
|
162
|
+
# Fit the model with given training data.
|
163
|
+
#
|
164
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
165
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
|
166
|
+
# to be used for fitting the model.
|
167
|
+
# @return [MultinomialNB] The learned classifier itself.
|
168
|
+
def fit(x, y)
|
169
|
+
n_samples, = x.shape
|
170
|
+
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
171
|
+
@class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
|
172
|
+
count_features = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].sum(0) }]
|
173
|
+
count_features += @params[:smoothing_param]
|
174
|
+
n_classes = @classes.size
|
175
|
+
@feature_probs = count_features / count_features.sum(1).reshape(n_classes, 1)
|
176
|
+
self
|
177
|
+
end
|
178
|
+
|
179
|
+
# Calculate confidence scores for samples.
|
180
|
+
#
|
181
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
182
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
183
|
+
def decision_function(x)
|
184
|
+
n_classes = @classes.size
|
185
|
+
bin_x = x.gt(0)
|
186
|
+
log_likelihoods = Array.new(n_classes) do |l|
|
187
|
+
Math.log(@class_priors[l]) + (Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(1)
|
188
|
+
end
|
189
|
+
Numo::DFloat[*log_likelihoods].transpose
|
190
|
+
end
|
191
|
+
|
192
|
+
# Dump marshal data.
|
193
|
+
#
|
194
|
+
# @return [Hash] The marshal data about MultinomialNB.
|
195
|
+
def marshal_dump
|
196
|
+
{ params: params,
|
197
|
+
classes: @classes,
|
198
|
+
class_priors: @class_priors,
|
199
|
+
feature_probs: @feature_probs }
|
200
|
+
end
|
201
|
+
|
202
|
+
# Load marshal data.
|
203
|
+
#
|
204
|
+
# @return [nil]
|
205
|
+
def marshal_load(obj)
|
206
|
+
@params = obj[:params]
|
207
|
+
@classes = obj[:classes]
|
208
|
+
@class_priors = obj[:class_priors]
|
209
|
+
@feature_probs = obj[:feature_probs]
|
210
|
+
nil
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
# BernoulliNB is a class that implements Bernoulli Naive Bayes classifier.
|
215
|
+
#
|
216
|
+
# @example
|
217
|
+
# estimator = SVMKit::NaiveBayes::BernoulliNB.new(smoothing_param: 1.0, bin_threshold: 0.0)
|
218
|
+
# estimator.fit(training_samples, training_labels)
|
219
|
+
# results = estimator.predict(testing_samples)
|
220
|
+
#
|
221
|
+
# *Reference*
|
222
|
+
# - C D. Manning, P. Raghavan, and H. Schutze, "Introduction to Information Retrieval," Cambridge University Press., 2008.
|
223
|
+
class BernoulliNB < BaseNaiveBayes
|
224
|
+
# Return the class labels.
|
225
|
+
# @return [Numo::Int32] (size: n_classes)
|
226
|
+
attr_reader :classes
|
227
|
+
|
228
|
+
# Return the prior probabilities of the classes.
|
229
|
+
# @return [Numo::DFloat] (shape: [n_classes])
|
230
|
+
attr_reader :class_priors
|
231
|
+
|
232
|
+
# Return the conditional probabilities for features of each class.
|
233
|
+
# @return [Numo::DFloat] (shape: [n_classes, n_features])
|
234
|
+
attr_reader :feature_probs
|
235
|
+
|
236
|
+
# Create a new classifier with Bernoulli Naive Bayes.
|
237
|
+
#
|
238
|
+
# @param smoothing_param [Float] The Laplace smoothing parameter.
|
239
|
+
# @param bin_threshold [Float] The threshold for binarizing of features.
|
240
|
+
def initialize(smoothing_param: 1.0, bin_threshold: 0.0)
|
241
|
+
@params = {}
|
242
|
+
@params[:smoothing_param] = smoothing_param
|
243
|
+
@params[:bin_threshold] = bin_threshold
|
244
|
+
end
|
245
|
+
|
246
|
+
# Fit the model with given training data.
|
247
|
+
#
|
248
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
249
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
|
250
|
+
# to be used for fitting the model.
|
251
|
+
# @return [BernoulliNB] The learned classifier itself.
|
252
|
+
def fit(x, y)
|
253
|
+
n_samples, = x.shape
|
254
|
+
bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
|
255
|
+
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
256
|
+
n_samples_each_class = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count.to_f }]
|
257
|
+
@class_priors = n_samples_each_class / n_samples
|
258
|
+
count_features = Numo::DFloat[*@classes.to_a.map { |l| bin_x[y.eq(l).where, true].sum(0) }]
|
259
|
+
count_features += @params[:smoothing_param]
|
260
|
+
n_samples_each_class += 2.0 * @params[:smoothing_param]
|
261
|
+
n_classes = @classes.size
|
262
|
+
@feature_probs = count_features / n_samples_each_class.reshape(n_classes, 1)
|
263
|
+
self
|
264
|
+
end
|
265
|
+
|
266
|
+
# Calculate confidence scores for samples.
|
267
|
+
#
|
268
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
269
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
270
|
+
def decision_function(x)
|
271
|
+
n_classes = @classes.size
|
272
|
+
bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
|
273
|
+
not_bin_x = Numo::DFloat[*x.le(@params[:bin_threshold])]
|
274
|
+
log_likelihoods = Array.new(n_classes) do |l|
|
275
|
+
Math.log(@class_priors[l]) + (
|
276
|
+
(Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(1)
|
277
|
+
(Numo::DFloat[*not_bin_x] * Numo::NMath.log(1.0 - @feature_probs[l, true])).sum(1))
|
278
|
+
end
|
279
|
+
Numo::DFloat[*log_likelihoods].transpose
|
280
|
+
end
|
281
|
+
|
282
|
+
# Dump marshal data.
|
283
|
+
#
|
284
|
+
# @return [Hash] The marshal data about BernoulliNB.
|
285
|
+
def marshal_dump
|
286
|
+
{ params: params,
|
287
|
+
classes: @classes,
|
288
|
+
class_priors: @class_priors,
|
289
|
+
feature_probs: @feature_probs }
|
290
|
+
end
|
291
|
+
|
292
|
+
# Load marshal data.
|
293
|
+
#
|
294
|
+
# @return [nil]
|
295
|
+
def marshal_load(obj)
|
296
|
+
@params = obj[:params]
|
297
|
+
@classes = obj[:classes]
|
298
|
+
@class_priors = obj[:class_priors]
|
299
|
+
@feature_probs = obj[:feature_probs]
|
300
|
+
nil
|
301
|
+
end
|
302
|
+
end
|
303
|
+
end
|
304
|
+
end
|
@@ -7,12 +7,11 @@ module SVMKit
|
|
7
7
|
# FactorizationMachineClassifier is a class that
|
8
8
|
# implements Fatorization Machine for binary classification
|
9
9
|
# with (mini-batch) stochastic gradient descent optimization.
|
10
|
-
# Note that this implementation uses hinge loss for the loss function.
|
11
10
|
#
|
12
11
|
# @example
|
13
12
|
# estimator =
|
14
13
|
# SVMKit::PolynomialModel::FactorizationMachineClassifier.new(
|
15
|
-
# n_factors: 10, reg_param_bias: 0.001, reg_param_weight: 0.001, reg_param_factor: 0.001,
|
14
|
+
# n_factors: 10, loss: 'hinge', reg_param_bias: 0.001, reg_param_weight: 0.001, reg_param_factor: 0.001,
|
16
15
|
# max_iter: 5000, batch_size: 50, random_seed: 1)
|
17
16
|
# estimator.fit(training_samples, traininig_labels)
|
18
17
|
# results = estimator.predict(testing_samples)
|
@@ -43,6 +42,7 @@ module SVMKit
|
|
43
42
|
# Create a new classifier with Support Vector Machine by the Pegasos algorithm.
|
44
43
|
#
|
45
44
|
# @param n_factors [Integer] The maximum number of iterations.
|
45
|
+
# @param loss [String] The loss function ('hinge' or 'logistic').
|
46
46
|
# @param reg_param_bias [Float] The regularization parameter for bias term.
|
47
47
|
# @param reg_param_weight [Float] The regularization parameter for weight vector.
|
48
48
|
# @param reg_param_factor [Float] The regularization parameter for factor matrix.
|
@@ -50,10 +50,11 @@ module SVMKit
|
|
50
50
|
# @param max_iter [Integer] The maximum number of iterations.
|
51
51
|
# @param batch_size [Integer] The size of the mini batches.
|
52
52
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
53
|
-
def initialize(n_factors: 2, reg_param_bias: 1.0, reg_param_weight: 1.0, reg_param_factor: 1.0,
|
53
|
+
def initialize(n_factors: 2, loss: 'hinge', reg_param_bias: 1.0, reg_param_weight: 1.0, reg_param_factor: 1.0,
|
54
54
|
init_std: 0.1, max_iter: 1000, batch_size: 10, random_seed: nil)
|
55
55
|
@params = {}
|
56
56
|
@params[:n_factors] = n_factors
|
57
|
+
@params[:loss] = loss
|
57
58
|
@params[:reg_param_bias] = reg_param_bias
|
58
59
|
@params[:reg_param_weight] = reg_param_weight
|
59
60
|
@params[:reg_param_factor] = reg_param_factor
|
@@ -122,6 +123,19 @@ module SVMKit
|
|
122
123
|
Numo::Int32.cast(decision_function(x).map { |v| v >= 0.0 ? 1 : -1 })
|
123
124
|
end
|
124
125
|
|
126
|
+
# Predict probability for samples.
|
127
|
+
# Note that this method works normally only if the 'loss' parameter is set to 'logistic'.
|
128
|
+
#
|
129
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
130
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
131
|
+
def predict_proba(x)
|
132
|
+
n_samples, = x.shape
|
133
|
+
proba = Numo::DFloat.zeros(n_samples, 2)
|
134
|
+
proba[true, 1] = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
|
135
|
+
proba[true, 0] = 1.0 - proba[true, 1]
|
136
|
+
proba
|
137
|
+
end
|
138
|
+
|
125
139
|
# Claculate the mean accuracy of the given testing data.
|
126
140
|
#
|
127
141
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
|
@@ -152,13 +166,23 @@ module SVMKit
|
|
152
166
|
|
153
167
|
private
|
154
168
|
|
155
|
-
def
|
169
|
+
def hinge_loss_gradient(x, y)
|
156
170
|
evaluated = y * decision_function(x)
|
157
171
|
gradient = Numo::DFloat.zeros(evaluated.size)
|
158
172
|
gradient[evaluated < 1.0] = -y[evaluated < 1.0]
|
159
173
|
gradient
|
160
174
|
end
|
161
175
|
|
176
|
+
def logistic_loss_gradient(x, y)
|
177
|
+
evaluated = y * decision_function(x)
|
178
|
+
sigmoid_func = 1.0 / (Numo::NMath.exp(-evaluated) + 1.0)
|
179
|
+
(sigmoid_func - 1.0) * y
|
180
|
+
end
|
181
|
+
|
182
|
+
def loss_gradient(x, y)
|
183
|
+
@params[:loss] == 'hinge' ? hinge_loss_gradient(x, y) : logistic_loss_gradient(x, y)
|
184
|
+
end
|
185
|
+
|
162
186
|
def learning_rate(reg_param, iter)
|
163
187
|
1.0 / (reg_param * (iter + 1))
|
164
188
|
end
|
data/lib/svmkit/version.rb
CHANGED
data/svmkit.gemspec
CHANGED
@@ -12,14 +12,14 @@ Gem::Specification.new do |spec|
|
|
12
12
|
|
13
13
|
spec.summary = <<MSG
|
14
14
|
SVMKit is a machine learninig library in Ruby.
|
15
|
-
SVMKit
|
15
|
+
SVMKit provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
|
16
16
|
MSG
|
17
17
|
spec.description = <<MSG
|
18
18
|
SVMKit is a machine learninig library in Ruby.
|
19
|
-
SVMKit
|
20
|
-
|
21
|
-
|
22
|
-
|
19
|
+
SVMKit provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
|
20
|
+
SVMKit currently supports Linear / Kernel Support Vector Machine,
|
21
|
+
Logistic Regression, Factorization Machine, Naive Bayes,
|
22
|
+
K-nearest neighbor classifier, and cross-validation.
|
23
23
|
MSG
|
24
24
|
spec.homepage = 'https://github.com/yoshoku/svmkit'
|
25
25
|
spec.license = 'BSD-2-Clause'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: svmkit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-02-
|
11
|
+
date: 2018-02-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -82,10 +82,10 @@ dependencies:
|
|
82
82
|
version: '0.15'
|
83
83
|
description: |
|
84
84
|
SVMKit is a machine learninig library in Ruby.
|
85
|
-
SVMKit
|
86
|
-
|
87
|
-
|
88
|
-
|
85
|
+
SVMKit provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
|
86
|
+
SVMKit currently supports Linear / Kernel Support Vector Machine,
|
87
|
+
Logistic Regression, Factorization Machine, Naive Bayes,
|
88
|
+
K-nearest neighbor classifier, and cross-validation.
|
89
89
|
email:
|
90
90
|
- yoshoku@outlook.com
|
91
91
|
executables: []
|
@@ -95,6 +95,7 @@ files:
|
|
95
95
|
- ".gitignore"
|
96
96
|
- ".rspec"
|
97
97
|
- ".rubocop.yml"
|
98
|
+
- ".rubocop_todo.yml"
|
98
99
|
- ".travis.yml"
|
99
100
|
- CODE_OF_CONDUCT.md
|
100
101
|
- Gemfile
|
@@ -124,6 +125,7 @@ files:
|
|
124
125
|
- lib/svmkit/model_selection/k_fold.rb
|
125
126
|
- lib/svmkit/model_selection/stratified_k_fold.rb
|
126
127
|
- lib/svmkit/multiclass/one_vs_rest_classifier.rb
|
128
|
+
- lib/svmkit/naive_bayes/naive_bayes.rb
|
127
129
|
- lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb
|
128
130
|
- lib/svmkit/pairwise_metric.rb
|
129
131
|
- lib/svmkit/polynomial_model/factorization_machine_classifier.rb
|
@@ -161,6 +163,6 @@ rubyforge_project:
|
|
161
163
|
rubygems_version: 2.4.5.4
|
162
164
|
signing_key:
|
163
165
|
specification_version: 4
|
164
|
-
summary: SVMKit is a machine learninig library in Ruby. SVMKit
|
165
|
-
|
166
|
+
summary: SVMKit is a machine learninig library in Ruby. SVMKit provides machine learning
|
167
|
+
algorithms with interfaces similar to Scikit-Learn in Python.
|
166
168
|
test_files: []
|