svmkit 0.2.4 → 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +6 -5
- data/.rubocop_todo.yml +18 -0
- data/HISTORY.md +5 -0
- data/README.md +4 -4
- data/lib/svmkit.rb +1 -0
- data/lib/svmkit/kernel_approximation/rbf.rb +1 -1
- data/lib/svmkit/linear_model/logistic_regression.rb +9 -4
- data/lib/svmkit/naive_bayes/naive_bayes.rb +304 -0
- data/lib/svmkit/polynomial_model/factorization_machine_classifier.rb +28 -4
- data/lib/svmkit/version.rb +1 -1
- data/svmkit.gemspec +5 -5
- metadata +10 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 42b8bbee820defc7646b422fa160ade5dd0ffddd
|
4
|
+
data.tar.gz: 5ba44c2c18a02231646456ab1ff73fee409c50c0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 23bdab14e55581e61b9050c88167dd74ebd5c086dfa2c37f57aaebe2110e8d7bac70df659ae34615c788d36cd03fac36663267ed07cf950564da7b1e496e9b59
|
7
|
+
data.tar.gz: c36b3565bd731e1613ee63f83ad83f2021e8c2c255267df00a0872ec8f1d9f2e9ab73edd10d5f2a5bef6ca1af8712aef4220434bc680f1f5228bf9727c94b6e4
|
data/.rubocop.yml
CHANGED
@@ -1,12 +1,13 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
inherit_from: .rubocop_todo.yml
|
2
|
+
|
3
|
+
AllCops:
|
4
|
+
TargetRubyVersion: 2.2
|
5
|
+
DisplayCopNames: true
|
6
|
+
DisplayStyleGuide: true
|
3
7
|
|
4
8
|
Documentation:
|
5
9
|
Enabled: false
|
6
10
|
|
7
|
-
Metrics/AbcSize:
|
8
|
-
Max: 30
|
9
|
-
|
10
11
|
Metrics/LineLength:
|
11
12
|
Max: 120
|
12
13
|
IgnoredPatterns: ['(\A|\s)#']
|
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2018-02-04 11:34:28 +0900 using RuboCop version 0.52.1.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 5
|
10
|
+
Metrics/AbcSize:
|
11
|
+
Max: 70
|
12
|
+
|
13
|
+
# Offense count: 4
|
14
|
+
# Configuration parameters: .
|
15
|
+
# SupportedStyles: annotated, template, unannotated
|
16
|
+
Style/FormatStringToken:
|
17
|
+
EnforcedStyle: unannotated
|
18
|
+
Enabled: false
|
data/HISTORY.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
# 0.2.5
|
2
|
+
- Added classes for Naive Bayes classifier.
|
3
|
+
- Fixed decision function method on Logistic Regression class.
|
4
|
+
- Fixed method visibility on RBF kernel approximation class.
|
5
|
+
|
1
6
|
# 0.2.4
|
2
7
|
- Added class for Factorization Machine classifier.
|
3
8
|
- Added classes for evaluation measures.
|
data/README.md
CHANGED
@@ -5,10 +5,10 @@
|
|
5
5
|
[![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/SVMKit/blob/master/LICENSE.txt)
|
6
6
|
|
7
7
|
SVMKit is a machine learninig library in Ruby.
|
8
|
-
SVMKit
|
9
|
-
|
10
|
-
|
11
|
-
|
8
|
+
SVMKit provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
|
9
|
+
SVMKit currently supports Linear / Kernel Support Vector Machine,
|
10
|
+
Logistic Regression, Factorization Machine, Naive Bayes,
|
11
|
+
K-nearest neighbor classifier, and cross-validation.
|
12
12
|
|
13
13
|
## Installation
|
14
14
|
|
data/lib/svmkit.rb
CHANGED
@@ -16,6 +16,7 @@ require 'svmkit/kernel_machine/kernel_svc'
|
|
16
16
|
require 'svmkit/polynomial_model/factorization_machine_classifier'
|
17
17
|
require 'svmkit/multiclass/one_vs_rest_classifier'
|
18
18
|
require 'svmkit/nearest_neighbors/k_neighbors_classifier'
|
19
|
+
require 'svmkit/naive_bayes/naive_bayes'
|
19
20
|
require 'svmkit/preprocessing/l2_normalizer'
|
20
21
|
require 'svmkit/preprocessing/min_max_scaler'
|
21
22
|
require 'svmkit/preprocessing/standard_scaler'
|
@@ -112,8 +112,7 @@ module SVMKit
|
|
112
112
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
113
113
|
# @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
|
114
114
|
def decision_function(x)
|
115
|
-
|
116
|
-
w.map { |v| 1.0 / v }
|
115
|
+
@weight_vec.dot(x.transpose) + @bias_term
|
117
116
|
end
|
118
117
|
|
119
118
|
# Predict class labels for samples.
|
@@ -121,7 +120,7 @@ module SVMKit
|
|
121
120
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
122
121
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
123
122
|
def predict(x)
|
124
|
-
Numo::Int32.cast(decision_function(x).map { |v| v >= 0.5 ? 1 : -1 })
|
123
|
+
Numo::Int32.cast(sigmoid(decision_function(x)).map { |v| v >= 0.5 ? 1 : -1 })
|
125
124
|
end
|
126
125
|
|
127
126
|
# Predict probability for samples.
|
@@ -131,7 +130,7 @@ module SVMKit
|
|
131
130
|
def predict_proba(x)
|
132
131
|
n_samples, = x.shape
|
133
132
|
proba = Numo::DFloat.zeros(n_samples, 2)
|
134
|
-
proba[true, 1] = decision_function(x)
|
133
|
+
proba[true, 1] = sigmoid(decision_function(x))
|
135
134
|
proba[true, 0] = 1.0 - proba[true, 1]
|
136
135
|
proba
|
137
136
|
end
|
@@ -162,6 +161,12 @@ module SVMKit
|
|
162
161
|
@rng = obj[:rng]
|
163
162
|
nil
|
164
163
|
end
|
164
|
+
|
165
|
+
private
|
166
|
+
|
167
|
+
def sigmoid(x)
|
168
|
+
1.0 / (Numo::NMath.exp(-x) + 1.0)
|
169
|
+
end
|
165
170
|
end
|
166
171
|
end
|
167
172
|
end
|
@@ -0,0 +1,304 @@
|
|
1
|
+
require 'svmkit/base/base_estimator'
|
2
|
+
require 'svmkit/base/classifier'
|
3
|
+
|
4
|
+
module SVMKit
|
5
|
+
# This module consists of the classes that implement naive bayes models.
|
6
|
+
module NaiveBayes
|
7
|
+
# BaseNaiveBayes is a class that has methods for common processes of naive bayes classifier.
|
8
|
+
class BaseNaiveBayes
|
9
|
+
include Base::BaseEstimator
|
10
|
+
include Base::Classifier
|
11
|
+
|
12
|
+
# Predict class labels for samples.
|
13
|
+
#
|
14
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
15
|
+
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
16
|
+
def predict(x)
|
17
|
+
n_samples = x.shape.first
|
18
|
+
decision_values = decision_function(x)
|
19
|
+
Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
|
20
|
+
end
|
21
|
+
|
22
|
+
# Predict log-probability for samples.
|
23
|
+
#
|
24
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
|
25
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
|
26
|
+
def predict_log_proba(x)
|
27
|
+
n_samples, = x.shape
|
28
|
+
log_likelihoods = decision_function(x)
|
29
|
+
log_likelihoods - Numo::NMath.log(Numo::NMath.exp(log_likelihoods).sum(1)).reshape(n_samples, 1)
|
30
|
+
end
|
31
|
+
|
32
|
+
# Predict probability for samples.
|
33
|
+
#
|
34
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
35
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
36
|
+
def predict_proba(x)
|
37
|
+
Numo::NMath.exp(predict_log_proba(x)).abs
|
38
|
+
end
|
39
|
+
|
40
|
+
# Claculate the mean accuracy of the given testing data.
|
41
|
+
#
|
42
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
|
43
|
+
# @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.
|
44
|
+
# @return [Float] Mean accuracy
|
45
|
+
def score(x, y)
|
46
|
+
evaluator = SVMKit::EvaluationMeasure::Accuracy.new
|
47
|
+
evaluator.score(y, predict(x))
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# GaussianNB is a class that implements Gaussian Naive Bayes classifier.
|
52
|
+
#
|
53
|
+
# @example
|
54
|
+
# estimator = SVMKit::NaiveBayes::GaussianNB.new
|
55
|
+
# estimator.fit(training_samples, training_labels)
|
56
|
+
# results = estimator.predict(testing_samples)
|
57
|
+
class GaussianNB < BaseNaiveBayes
|
58
|
+
# Return the class labels.
|
59
|
+
# @return [Numo::Int32] (size: n_classes)
|
60
|
+
attr_reader :classes
|
61
|
+
|
62
|
+
# Return the prior probabilities of the classes.
|
63
|
+
# @return [Numo::DFloat] (shape: [n_classes])
|
64
|
+
attr_reader :class_priors
|
65
|
+
|
66
|
+
# Return the mean vectors of the classes.
|
67
|
+
# @return [Numo::DFloat] (shape: [n_classes, n_features])
|
68
|
+
attr_reader :means
|
69
|
+
|
70
|
+
# Return the variance vectors of the classes.
|
71
|
+
# @return [Numo::DFloat] (shape: [n_classes, n_features])
|
72
|
+
attr_reader :variances
|
73
|
+
|
74
|
+
# Create a new classifier with Gaussian Naive Bayes.
|
75
|
+
def initialize
|
76
|
+
@params = {}
|
77
|
+
end
|
78
|
+
|
79
|
+
# Fit the model with given training data.
|
80
|
+
#
|
81
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
82
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
|
83
|
+
# to be used for fitting the model.
|
84
|
+
# @return [GaussianNB] The learned classifier itself.
|
85
|
+
def fit(x, y)
|
86
|
+
n_samples, = x.shape
|
87
|
+
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
88
|
+
@class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
|
89
|
+
@means = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].mean(0) }]
|
90
|
+
@variances = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].var(0) }]
|
91
|
+
self
|
92
|
+
end
|
93
|
+
|
94
|
+
# Calculate confidence scores for samples.
|
95
|
+
#
|
96
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
97
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
98
|
+
def decision_function(x)
|
99
|
+
n_classes = @classes.size
|
100
|
+
log_likelihoods = Array.new(n_classes) do |l|
|
101
|
+
Math.log(@class_priors[l]) - 0.5 * (
|
102
|
+
Numo::NMath.log(2.0 * Math::PI * @variances[l, true]) +
|
103
|
+
((x - @means[l, true])**2 / @variances[l, true])).sum(1)
|
104
|
+
end
|
105
|
+
Numo::DFloat[*log_likelihoods].transpose
|
106
|
+
end
|
107
|
+
|
108
|
+
# Dump marshal data.
|
109
|
+
#
|
110
|
+
# @return [Hash] The marshal data about GaussianNB.
|
111
|
+
def marshal_dump
|
112
|
+
{ params: params,
|
113
|
+
classes: @classes,
|
114
|
+
class_priors: @class_priors,
|
115
|
+
means: @means,
|
116
|
+
variances: @variances }
|
117
|
+
end
|
118
|
+
|
119
|
+
# Load marshal data.
|
120
|
+
#
|
121
|
+
# @return [nil]
|
122
|
+
def marshal_load(obj)
|
123
|
+
@params = obj[:params]
|
124
|
+
@classes = obj[:classes]
|
125
|
+
@class_priors = obj[:class_priors]
|
126
|
+
@means = obj[:means]
|
127
|
+
@variances = obj[:variances]
|
128
|
+
nil
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# MultinomialNB is a class that implements Multinomial Naive Bayes classifier.
|
133
|
+
#
|
134
|
+
# @example
|
135
|
+
# estimator = SVMKit::NaiveBayes::MultinomialNB.new(smoothing_param: 1.0)
|
136
|
+
# estimator.fit(training_samples, training_labels)
|
137
|
+
# results = estimator.predict(testing_samples)
|
138
|
+
#
|
139
|
+
# *Reference*
|
140
|
+
# - C D. Manning, P. Raghavan, and H. Schutze, "Introduction to Information Retrieval," Cambridge University Press., 2008.
|
141
|
+
class MultinomialNB < BaseNaiveBayes
|
142
|
+
# Return the class labels.
|
143
|
+
# @return [Numo::Int32] (size: n_classes)
|
144
|
+
attr_reader :classes
|
145
|
+
|
146
|
+
# Return the prior probabilities of the classes.
|
147
|
+
# @return [Numo::DFloat] (shape: [n_classes])
|
148
|
+
attr_reader :class_priors
|
149
|
+
|
150
|
+
# Return the conditional probabilities for features of each class.
|
151
|
+
# @return [Numo::DFloat] (shape: [n_classes, n_features])
|
152
|
+
attr_reader :feature_probs
|
153
|
+
|
154
|
+
# Create a new classifier with Multinomial Naive Bayes.
|
155
|
+
#
|
156
|
+
# @param smoothing_param [Float] The Laplace smoothing parameter.
|
157
|
+
def initialize(smoothing_param: 1.0)
|
158
|
+
@params = {}
|
159
|
+
@params[:smoothing_param] = smoothing_param
|
160
|
+
end
|
161
|
+
|
162
|
+
# Fit the model with given training data.
|
163
|
+
#
|
164
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
165
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
|
166
|
+
# to be used for fitting the model.
|
167
|
+
# @return [MultinomialNB] The learned classifier itself.
|
168
|
+
def fit(x, y)
|
169
|
+
n_samples, = x.shape
|
170
|
+
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
171
|
+
@class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
|
172
|
+
count_features = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].sum(0) }]
|
173
|
+
count_features += @params[:smoothing_param]
|
174
|
+
n_classes = @classes.size
|
175
|
+
@feature_probs = count_features / count_features.sum(1).reshape(n_classes, 1)
|
176
|
+
self
|
177
|
+
end
|
178
|
+
|
179
|
+
# Calculate confidence scores for samples.
|
180
|
+
#
|
181
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
182
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
183
|
+
def decision_function(x)
|
184
|
+
n_classes = @classes.size
|
185
|
+
bin_x = x.gt(0)
|
186
|
+
log_likelihoods = Array.new(n_classes) do |l|
|
187
|
+
Math.log(@class_priors[l]) + (Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(1)
|
188
|
+
end
|
189
|
+
Numo::DFloat[*log_likelihoods].transpose
|
190
|
+
end
|
191
|
+
|
192
|
+
# Dump marshal data.
|
193
|
+
#
|
194
|
+
# @return [Hash] The marshal data about MultinomialNB.
|
195
|
+
def marshal_dump
|
196
|
+
{ params: params,
|
197
|
+
classes: @classes,
|
198
|
+
class_priors: @class_priors,
|
199
|
+
feature_probs: @feature_probs }
|
200
|
+
end
|
201
|
+
|
202
|
+
# Load marshal data.
|
203
|
+
#
|
204
|
+
# @return [nil]
|
205
|
+
def marshal_load(obj)
|
206
|
+
@params = obj[:params]
|
207
|
+
@classes = obj[:classes]
|
208
|
+
@class_priors = obj[:class_priors]
|
209
|
+
@feature_probs = obj[:feature_probs]
|
210
|
+
nil
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
# BernoulliNB is a class that implements Bernoulli Naive Bayes classifier.
|
215
|
+
#
|
216
|
+
# @example
|
217
|
+
# estimator = SVMKit::NaiveBayes::BernoulliNB.new(smoothing_param: 1.0, bin_threshold: 0.0)
|
218
|
+
# estimator.fit(training_samples, training_labels)
|
219
|
+
# results = estimator.predict(testing_samples)
|
220
|
+
#
|
221
|
+
# *Reference*
|
222
|
+
# - C D. Manning, P. Raghavan, and H. Schutze, "Introduction to Information Retrieval," Cambridge University Press., 2008.
|
223
|
+
class BernoulliNB < BaseNaiveBayes
|
224
|
+
# Return the class labels.
|
225
|
+
# @return [Numo::Int32] (size: n_classes)
|
226
|
+
attr_reader :classes
|
227
|
+
|
228
|
+
# Return the prior probabilities of the classes.
|
229
|
+
# @return [Numo::DFloat] (shape: [n_classes])
|
230
|
+
attr_reader :class_priors
|
231
|
+
|
232
|
+
# Return the conditional probabilities for features of each class.
|
233
|
+
# @return [Numo::DFloat] (shape: [n_classes, n_features])
|
234
|
+
attr_reader :feature_probs
|
235
|
+
|
236
|
+
# Create a new classifier with Bernoulli Naive Bayes.
|
237
|
+
#
|
238
|
+
# @param smoothing_param [Float] The Laplace smoothing parameter.
|
239
|
+
# @param bin_threshold [Float] The threshold for binarizing of features.
|
240
|
+
def initialize(smoothing_param: 1.0, bin_threshold: 0.0)
|
241
|
+
@params = {}
|
242
|
+
@params[:smoothing_param] = smoothing_param
|
243
|
+
@params[:bin_threshold] = bin_threshold
|
244
|
+
end
|
245
|
+
|
246
|
+
# Fit the model with given training data.
|
247
|
+
#
|
248
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
249
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
|
250
|
+
# to be used for fitting the model.
|
251
|
+
# @return [BernoulliNB] The learned classifier itself.
|
252
|
+
def fit(x, y)
|
253
|
+
n_samples, = x.shape
|
254
|
+
bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
|
255
|
+
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
256
|
+
n_samples_each_class = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count.to_f }]
|
257
|
+
@class_priors = n_samples_each_class / n_samples
|
258
|
+
count_features = Numo::DFloat[*@classes.to_a.map { |l| bin_x[y.eq(l).where, true].sum(0) }]
|
259
|
+
count_features += @params[:smoothing_param]
|
260
|
+
n_samples_each_class += 2.0 * @params[:smoothing_param]
|
261
|
+
n_classes = @classes.size
|
262
|
+
@feature_probs = count_features / n_samples_each_class.reshape(n_classes, 1)
|
263
|
+
self
|
264
|
+
end
|
265
|
+
|
266
|
+
# Calculate confidence scores for samples.
|
267
|
+
#
|
268
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
269
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
270
|
+
def decision_function(x)
|
271
|
+
n_classes = @classes.size
|
272
|
+
bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
|
273
|
+
not_bin_x = Numo::DFloat[*x.le(@params[:bin_threshold])]
|
274
|
+
log_likelihoods = Array.new(n_classes) do |l|
|
275
|
+
Math.log(@class_priors[l]) + (
|
276
|
+
(Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(1)
|
277
|
+
(Numo::DFloat[*not_bin_x] * Numo::NMath.log(1.0 - @feature_probs[l, true])).sum(1))
|
278
|
+
end
|
279
|
+
Numo::DFloat[*log_likelihoods].transpose
|
280
|
+
end
|
281
|
+
|
282
|
+
# Dump marshal data.
|
283
|
+
#
|
284
|
+
# @return [Hash] The marshal data about BernoulliNB.
|
285
|
+
def marshal_dump
|
286
|
+
{ params: params,
|
287
|
+
classes: @classes,
|
288
|
+
class_priors: @class_priors,
|
289
|
+
feature_probs: @feature_probs }
|
290
|
+
end
|
291
|
+
|
292
|
+
# Load marshal data.
|
293
|
+
#
|
294
|
+
# @return [nil]
|
295
|
+
def marshal_load(obj)
|
296
|
+
@params = obj[:params]
|
297
|
+
@classes = obj[:classes]
|
298
|
+
@class_priors = obj[:class_priors]
|
299
|
+
@feature_probs = obj[:feature_probs]
|
300
|
+
nil
|
301
|
+
end
|
302
|
+
end
|
303
|
+
end
|
304
|
+
end
|
@@ -7,12 +7,11 @@ module SVMKit
|
|
7
7
|
# FactorizationMachineClassifier is a class that
|
8
8
|
# implements Fatorization Machine for binary classification
|
9
9
|
# with (mini-batch) stochastic gradient descent optimization.
|
10
|
-
# Note that this implementation uses hinge loss for the loss function.
|
11
10
|
#
|
12
11
|
# @example
|
13
12
|
# estimator =
|
14
13
|
# SVMKit::PolynomialModel::FactorizationMachineClassifier.new(
|
15
|
-
# n_factors: 10, reg_param_bias: 0.001, reg_param_weight: 0.001, reg_param_factor: 0.001,
|
14
|
+
# n_factors: 10, loss: 'hinge', reg_param_bias: 0.001, reg_param_weight: 0.001, reg_param_factor: 0.001,
|
16
15
|
# max_iter: 5000, batch_size: 50, random_seed: 1)
|
17
16
|
# estimator.fit(training_samples, traininig_labels)
|
18
17
|
# results = estimator.predict(testing_samples)
|
@@ -43,6 +42,7 @@ module SVMKit
|
|
43
42
|
# Create a new classifier with Support Vector Machine by the Pegasos algorithm.
|
44
43
|
#
|
45
44
|
# @param n_factors [Integer] The maximum number of iterations.
|
45
|
+
# @param loss [String] The loss function ('hinge' or 'logistic').
|
46
46
|
# @param reg_param_bias [Float] The regularization parameter for bias term.
|
47
47
|
# @param reg_param_weight [Float] The regularization parameter for weight vector.
|
48
48
|
# @param reg_param_factor [Float] The regularization parameter for factor matrix.
|
@@ -50,10 +50,11 @@ module SVMKit
|
|
50
50
|
# @param max_iter [Integer] The maximum number of iterations.
|
51
51
|
# @param batch_size [Integer] The size of the mini batches.
|
52
52
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
53
|
-
def initialize(n_factors: 2, reg_param_bias: 1.0, reg_param_weight: 1.0, reg_param_factor: 1.0,
|
53
|
+
def initialize(n_factors: 2, loss: 'hinge', reg_param_bias: 1.0, reg_param_weight: 1.0, reg_param_factor: 1.0,
|
54
54
|
init_std: 0.1, max_iter: 1000, batch_size: 10, random_seed: nil)
|
55
55
|
@params = {}
|
56
56
|
@params[:n_factors] = n_factors
|
57
|
+
@params[:loss] = loss
|
57
58
|
@params[:reg_param_bias] = reg_param_bias
|
58
59
|
@params[:reg_param_weight] = reg_param_weight
|
59
60
|
@params[:reg_param_factor] = reg_param_factor
|
@@ -122,6 +123,19 @@ module SVMKit
|
|
122
123
|
Numo::Int32.cast(decision_function(x).map { |v| v >= 0.0 ? 1 : -1 })
|
123
124
|
end
|
124
125
|
|
126
|
+
# Predict probability for samples.
|
127
|
+
# Note that this method works normally only if the 'loss' parameter is set to 'logistic'.
|
128
|
+
#
|
129
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
130
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
131
|
+
def predict_proba(x)
|
132
|
+
n_samples, = x.shape
|
133
|
+
proba = Numo::DFloat.zeros(n_samples, 2)
|
134
|
+
proba[true, 1] = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
|
135
|
+
proba[true, 0] = 1.0 - proba[true, 1]
|
136
|
+
proba
|
137
|
+
end
|
138
|
+
|
125
139
|
# Claculate the mean accuracy of the given testing data.
|
126
140
|
#
|
127
141
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
|
@@ -152,13 +166,23 @@ module SVMKit
|
|
152
166
|
|
153
167
|
private
|
154
168
|
|
155
|
-
def
|
169
|
+
def hinge_loss_gradient(x, y)
|
156
170
|
evaluated = y * decision_function(x)
|
157
171
|
gradient = Numo::DFloat.zeros(evaluated.size)
|
158
172
|
gradient[evaluated < 1.0] = -y[evaluated < 1.0]
|
159
173
|
gradient
|
160
174
|
end
|
161
175
|
|
176
|
+
def logistic_loss_gradient(x, y)
|
177
|
+
evaluated = y * decision_function(x)
|
178
|
+
sigmoid_func = 1.0 / (Numo::NMath.exp(-evaluated) + 1.0)
|
179
|
+
(sigmoid_func - 1.0) * y
|
180
|
+
end
|
181
|
+
|
182
|
+
def loss_gradient(x, y)
|
183
|
+
@params[:loss] == 'hinge' ? hinge_loss_gradient(x, y) : logistic_loss_gradient(x, y)
|
184
|
+
end
|
185
|
+
|
162
186
|
def learning_rate(reg_param, iter)
|
163
187
|
1.0 / (reg_param * (iter + 1))
|
164
188
|
end
|
data/lib/svmkit/version.rb
CHANGED
data/svmkit.gemspec
CHANGED
@@ -12,14 +12,14 @@ Gem::Specification.new do |spec|
|
|
12
12
|
|
13
13
|
spec.summary = <<MSG
|
14
14
|
SVMKit is a machine learninig library in Ruby.
|
15
|
-
SVMKit
|
15
|
+
SVMKit provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
|
16
16
|
MSG
|
17
17
|
spec.description = <<MSG
|
18
18
|
SVMKit is a machine learninig library in Ruby.
|
19
|
-
SVMKit
|
20
|
-
|
21
|
-
|
22
|
-
|
19
|
+
SVMKit provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
|
20
|
+
SVMKit currently supports Linear / Kernel Support Vector Machine,
|
21
|
+
Logistic Regression, Factorization Machine, Naive Bayes,
|
22
|
+
K-nearest neighbor classifier, and cross-validation.
|
23
23
|
MSG
|
24
24
|
spec.homepage = 'https://github.com/yoshoku/svmkit'
|
25
25
|
spec.license = 'BSD-2-Clause'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: svmkit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-02-
|
11
|
+
date: 2018-02-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -82,10 +82,10 @@ dependencies:
|
|
82
82
|
version: '0.15'
|
83
83
|
description: |
|
84
84
|
SVMKit is a machine learninig library in Ruby.
|
85
|
-
SVMKit
|
86
|
-
|
87
|
-
|
88
|
-
|
85
|
+
SVMKit provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
|
86
|
+
SVMKit currently supports Linear / Kernel Support Vector Machine,
|
87
|
+
Logistic Regression, Factorization Machine, Naive Bayes,
|
88
|
+
K-nearest neighbor classifier, and cross-validation.
|
89
89
|
email:
|
90
90
|
- yoshoku@outlook.com
|
91
91
|
executables: []
|
@@ -95,6 +95,7 @@ files:
|
|
95
95
|
- ".gitignore"
|
96
96
|
- ".rspec"
|
97
97
|
- ".rubocop.yml"
|
98
|
+
- ".rubocop_todo.yml"
|
98
99
|
- ".travis.yml"
|
99
100
|
- CODE_OF_CONDUCT.md
|
100
101
|
- Gemfile
|
@@ -124,6 +125,7 @@ files:
|
|
124
125
|
- lib/svmkit/model_selection/k_fold.rb
|
125
126
|
- lib/svmkit/model_selection/stratified_k_fold.rb
|
126
127
|
- lib/svmkit/multiclass/one_vs_rest_classifier.rb
|
128
|
+
- lib/svmkit/naive_bayes/naive_bayes.rb
|
127
129
|
- lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb
|
128
130
|
- lib/svmkit/pairwise_metric.rb
|
129
131
|
- lib/svmkit/polynomial_model/factorization_machine_classifier.rb
|
@@ -161,6 +163,6 @@ rubyforge_project:
|
|
161
163
|
rubygems_version: 2.4.5.4
|
162
164
|
signing_key:
|
163
165
|
specification_version: 4
|
164
|
-
summary: SVMKit is a machine learninig library in Ruby. SVMKit
|
165
|
-
|
166
|
+
summary: SVMKit is a machine learninig library in Ruby. SVMKit provides machine learning
|
167
|
+
algorithms with interfaces similar to Scikit-Learn in Python.
|
166
168
|
test_files: []
|