svmkit 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -0
- data/HISTORY.md +5 -0
- data/README.md +5 -3
- data/lib/svmkit/base/evaluator.rb +12 -0
- data/lib/svmkit/evaluation_measure/accuracy.rb +24 -0
- data/lib/svmkit/evaluation_measure/f_score.rb +44 -0
- data/lib/svmkit/evaluation_measure/precision.rb +44 -0
- data/lib/svmkit/evaluation_measure/precision_recall.rb +89 -0
- data/lib/svmkit/evaluation_measure/recall.rb +44 -0
- data/lib/svmkit/linear_model/logistic_regression.rb +6 -2
- data/lib/svmkit/model_selection/cross_validation.rb +15 -4
- data/lib/svmkit/polynomial_model/factorization_machine_classifier.rb +190 -0
- data/lib/svmkit/version.rb +1 -1
- data/lib/svmkit.rb +6 -0
- data/svmkit.gemspec +7 -4
- metadata +16 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 22a165e3fa579d5a5d0dbd9095d208dd09d5fbd0
|
4
|
+
data.tar.gz: c3f7fa2f4fe115dc821ec5050cf7c1f0fdd75a36
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4002a03944871f2fd9cf305fd64b02d4a1899b291167289d2f78112f977c54b33fc60041b2d2c506d755fb938e4d226002b1d6dc41ea74ad28d0e215adc0ee2b
|
7
|
+
data.tar.gz: 59dc53541aee6832ea66b826cc5f92e1329adba108aa2683ddba9105d8ac28fba7fae483ce1d2c092c60d83e9d3b3adfb574bf11401ad74f24435f237d262871
|
data/.rubocop.yml
CHANGED
data/HISTORY.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
# 0.2.4
|
2
|
+
- Added class for Factorization Machine classifier.
|
3
|
+
- Added classes for evaluation measures.
|
4
|
+
- Fixed the method for prediction of class probability in Logistic Regression.
|
5
|
+
|
1
6
|
# 0.2.3
|
2
7
|
- Added class for cross validation.
|
3
8
|
- Added specs for base modules.
|
data/README.md
CHANGED
@@ -4,9 +4,11 @@
|
|
4
4
|
[](https://badge.fury.io/rb/svmkit)
|
5
5
|
[](https://github.com/yoshoku/SVMKit/blob/master/LICENSE.txt)
|
6
6
|
|
7
|
-
SVMKit is a
|
8
|
-
SVMKit implements machine learning algorithms with
|
9
|
-
|
7
|
+
SVMKit is a machine learninig library in Ruby.
|
8
|
+
SVMKit implements machine learning algorithms with interfaces similar to Scikit-Learn in Python.
|
9
|
+
Since the development of SVMKit has just getting started,
|
10
|
+
the methods implemented in SVMKit are only Linear / Kernel SVC,
|
11
|
+
Logistic Regression, Factorization Machine, K-nearest neighbor classifier, and cross-validation.
|
10
12
|
|
11
13
|
## Installation
|
12
14
|
|
@@ -0,0 +1,12 @@
|
|
1
|
+
|
2
|
+
module SVMKit
|
3
|
+
module Base
|
4
|
+
# Module for all evaluation measures in SVMKit.
|
5
|
+
module Evaluator
|
6
|
+
# An abstract method for evaluation of model.
|
7
|
+
def score
|
8
|
+
raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'svmkit/base/evaluator'
|
2
|
+
|
3
|
+
module SVMKit
|
4
|
+
# This module consists of the classes for model evaluation.
|
5
|
+
module EvaluationMeasure
|
6
|
+
# Accuracy is a class that calculates the accuracy of classifier from the predicted labels.
|
7
|
+
#
|
8
|
+
# @example
|
9
|
+
# evaluator = SVMKit::EvaluationMeasure::Accuracy.new
|
10
|
+
# puts evaluator.score(ground_truth, predicted)
|
11
|
+
class Accuracy
|
12
|
+
include Base::Evaluator
|
13
|
+
|
14
|
+
# Claculate mean accuracy.
|
15
|
+
#
|
16
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
|
17
|
+
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
|
18
|
+
# @return [Float] Mean accuracy
|
19
|
+
def score(y_true, y_pred)
|
20
|
+
(y_true.to_a.map.with_index { |label, n| label == y_pred[n] ? 1 : 0 }).inject(:+) / y_true.size.to_f
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'svmkit/base/evaluator'
|
2
|
+
require 'svmkit/evaluation_measure/precision_recall'
|
3
|
+
|
4
|
+
module SVMKit
|
5
|
+
# This module consists of the classes for model evaluation.
|
6
|
+
module EvaluationMeasure
|
7
|
+
# FScore is a class that calculates the F1-score of the predicted labels.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# evaluator = SVMKit::EvaluationMeasure::FScore.new
|
11
|
+
# puts evaluator.score(ground_truth, predicted)
|
12
|
+
class FScore
|
13
|
+
include Base::Evaluator
|
14
|
+
include EvaluationMeasure::PrecisionRecall
|
15
|
+
|
16
|
+
# Return the average type for calculation of F1-score.
|
17
|
+
# @return [String] ('binary', 'micro', 'macro')
|
18
|
+
attr_reader :average
|
19
|
+
|
20
|
+
# Create a new evaluation measure calculater for F1-score.
|
21
|
+
#
|
22
|
+
# @param average [String] The average type ('binary', 'micro', 'macro')
|
23
|
+
def initialize(average: 'binary')
|
24
|
+
@average = average
|
25
|
+
end
|
26
|
+
|
27
|
+
# Claculate average F1-score
|
28
|
+
#
|
29
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
|
30
|
+
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
|
31
|
+
# @return [Float] Average F1-score
|
32
|
+
def score(y_true, y_pred)
|
33
|
+
case @average
|
34
|
+
when 'binary'
|
35
|
+
f_score_each_class(y_true, y_pred).last
|
36
|
+
when 'micro'
|
37
|
+
micro_average_f_score(y_true, y_pred)
|
38
|
+
when 'macro'
|
39
|
+
macro_average_f_score(y_true, y_pred)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'svmkit/base/evaluator'
|
2
|
+
require 'svmkit/evaluation_measure/precision_recall'
|
3
|
+
|
4
|
+
module SVMKit
|
5
|
+
# This module consists of the classes for model evaluation.
|
6
|
+
module EvaluationMeasure
|
7
|
+
# Precision is a class that calculates the preicision of the predicted labels.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# evaluator = SVMKit::EvaluationMeasure::Precision.new
|
11
|
+
# puts evaluator.score(ground_truth, predicted)
|
12
|
+
class Precision
|
13
|
+
include Base::Evaluator
|
14
|
+
include EvaluationMeasure::PrecisionRecall
|
15
|
+
|
16
|
+
# Return the average type for calculation of precision.
|
17
|
+
# @return [String] ('binary', 'micro', 'macro')
|
18
|
+
attr_reader :average
|
19
|
+
|
20
|
+
# Create a new evaluation measure calculater for precision score.
|
21
|
+
#
|
22
|
+
# @param average [String] The average type ('binary', 'micro', 'macro')
|
23
|
+
def initialize(average: 'binary')
|
24
|
+
@average = average
|
25
|
+
end
|
26
|
+
|
27
|
+
# Claculate average precision.
|
28
|
+
#
|
29
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
|
30
|
+
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
|
31
|
+
# @return [Float] Average precision
|
32
|
+
def score(y_true, y_pred)
|
33
|
+
case @average
|
34
|
+
when 'binary'
|
35
|
+
precision_each_class(y_true, y_pred).last
|
36
|
+
when 'micro'
|
37
|
+
micro_average_precision(y_true, y_pred)
|
38
|
+
when 'macro'
|
39
|
+
macro_average_precision(y_true, y_pred)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
require 'svmkit/base/evaluator'
|
2
|
+
|
3
|
+
module SVMKit
|
4
|
+
# This module consists of the classes for model evaluation.
|
5
|
+
module EvaluationMeasure
|
6
|
+
# @!visibility private
|
7
|
+
module PrecisionRecall
|
8
|
+
module_function
|
9
|
+
|
10
|
+
# @!visibility private
|
11
|
+
def precision_each_class(y_true, y_pred)
|
12
|
+
y_true.sort.to_a.uniq.map do |label|
|
13
|
+
target_positions = y_pred.eq(label)
|
14
|
+
next 0.0 if y_pred[target_positions].empty?
|
15
|
+
n_true_positives = Numo::Int32.cast(y_true[target_positions].eq(y_pred[target_positions])).sum.to_f
|
16
|
+
n_false_positives = Numo::Int32.cast(y_true[target_positions].ne(y_pred[target_positions])).sum.to_f
|
17
|
+
n_true_positives / (n_true_positives + n_false_positives)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# @!visibility private
|
22
|
+
def recall_each_class(y_true, y_pred)
|
23
|
+
y_true.sort.to_a.uniq.map do |label|
|
24
|
+
target_positions = y_true.eq(label)
|
25
|
+
next 0.0 if y_pred[target_positions].empty?
|
26
|
+
n_true_positives = Numo::Int32.cast(y_true[target_positions].eq(y_pred[target_positions])).sum.to_f
|
27
|
+
n_false_negatives = Numo::Int32.cast(y_true[target_positions].ne(y_pred[target_positions])).sum.to_f
|
28
|
+
n_true_positives / (n_true_positives + n_false_negatives)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# @!visibility private
|
33
|
+
def f_score_each_class(y_true, y_pred)
|
34
|
+
precision_each_class(y_true, y_pred).zip(recall_each_class(y_true, y_pred)).map do |p, r|
|
35
|
+
next 0.0 if p.zero? && r.zero?
|
36
|
+
(2.0 * p * r) / (p + r)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
# @!visibility private
|
41
|
+
def micro_average_precision(y_true, y_pred)
|
42
|
+
evaluated_values = y_true.sort.to_a.uniq.map do |label|
|
43
|
+
target_positions = y_pred.eq(label)
|
44
|
+
next [0.0, 0.0] if y_pred[target_positions].empty?
|
45
|
+
n_true_positives = Numo::Int32.cast(y_true[target_positions].eq(y_pred[target_positions])).sum.to_f
|
46
|
+
n_false_positives = Numo::Int32.cast(y_true[target_positions].ne(y_pred[target_positions])).sum.to_f
|
47
|
+
[n_true_positives, n_true_positives + n_false_positives]
|
48
|
+
end
|
49
|
+
res = evaluated_values.transpose.map { |v| v.inject(:+) }
|
50
|
+
res.first / res.last
|
51
|
+
end
|
52
|
+
|
53
|
+
# @!visibility private
|
54
|
+
def micro_average_recall(y_true, y_pred)
|
55
|
+
evaluated_values = y_true.sort.to_a.uniq.map do |label|
|
56
|
+
target_positions = y_true.eq(label)
|
57
|
+
next 0.0 if y_pred[target_positions].empty?
|
58
|
+
n_true_positives = Numo::Int32.cast(y_true[target_positions].eq(y_pred[target_positions])).sum.to_f
|
59
|
+
n_false_negatives = Numo::Int32.cast(y_true[target_positions].ne(y_pred[target_positions])).sum.to_f
|
60
|
+
[n_true_positives, n_true_positives + n_false_negatives]
|
61
|
+
end
|
62
|
+
res = evaluated_values.transpose.map { |v| v.inject(:+) }
|
63
|
+
res.first / res.last
|
64
|
+
end
|
65
|
+
|
66
|
+
# @!visibility private
|
67
|
+
def micro_average_f_score(y_true, y_pred)
|
68
|
+
p = micro_average_precision(y_true, y_pred)
|
69
|
+
r = micro_average_recall(y_true, y_pred)
|
70
|
+
(2.0 * p * r) / (p + r)
|
71
|
+
end
|
72
|
+
|
73
|
+
# @!visibility private
|
74
|
+
def macro_average_precision(y_true, y_pred)
|
75
|
+
precision_each_class(y_true, y_pred).inject(:+) / y_true.to_a.uniq.size
|
76
|
+
end
|
77
|
+
|
78
|
+
# @!visibility private
|
79
|
+
def macro_average_recall(y_true, y_pred)
|
80
|
+
recall_each_class(y_true, y_pred).inject(:+) / y_true.to_a.uniq.size
|
81
|
+
end
|
82
|
+
|
83
|
+
# @!visibility private
|
84
|
+
def macro_average_f_score(y_true, y_pred)
|
85
|
+
f_score_each_class(y_true, y_pred).inject(:+) / y_true.to_a.uniq.size
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'svmkit/base/evaluator'
|
2
|
+
require 'svmkit/evaluation_measure/precision_recall'
|
3
|
+
|
4
|
+
module SVMKit
|
5
|
+
# This module consists of the classes for model evaluation.
|
6
|
+
module EvaluationMeasure
|
7
|
+
# Recall is a class that calculates the recall of the predicted labels.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# evaluator = SVMKit::EvaluationMeasure::Recall.new
|
11
|
+
# puts evaluator.score(ground_truth, predicted)
|
12
|
+
class Recall
|
13
|
+
include Base::Evaluator
|
14
|
+
include EvaluationMeasure::PrecisionRecall
|
15
|
+
|
16
|
+
# Return the average type for calculation of recall.
|
17
|
+
# @return [String] ('binary', 'micro', 'macro')
|
18
|
+
attr_reader :average
|
19
|
+
|
20
|
+
# Create a new evaluation measure calculater for recall score.
|
21
|
+
#
|
22
|
+
# @param average [String] The average type ('binary', 'micro', 'macro')
|
23
|
+
def initialize(average: 'binary')
|
24
|
+
@average = average
|
25
|
+
end
|
26
|
+
|
27
|
+
# Claculate average recall
|
28
|
+
#
|
29
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
|
30
|
+
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
|
31
|
+
# @return [Float] Average recall
|
32
|
+
def score(y_true, y_pred)
|
33
|
+
case @average
|
34
|
+
when 'binary'
|
35
|
+
recall_each_class(y_true, y_pred).last
|
36
|
+
when 'micro'
|
37
|
+
micro_average_recall(y_true, y_pred)
|
38
|
+
when 'macro'
|
39
|
+
macro_average_recall(y_true, y_pred)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -127,9 +127,13 @@ module SVMKit
|
|
127
127
|
# Predict probability for samples.
|
128
128
|
#
|
129
129
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
130
|
-
# @return [Numo::DFloat] (shape: [n_samples]) Predicted probability per sample.
|
130
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
131
131
|
def predict_proba(x)
|
132
|
-
|
132
|
+
n_samples, = x.shape
|
133
|
+
proba = Numo::DFloat.zeros(n_samples, 2)
|
134
|
+
proba[true, 1] = decision_function(x)
|
135
|
+
proba[true, 0] = 1.0 - proba[true, 1]
|
136
|
+
proba
|
133
137
|
end
|
134
138
|
|
135
139
|
# Claculate the mean accuracy of the given testing data.
|
@@ -21,6 +21,10 @@ module SVMKit
|
|
21
21
|
# @return [Splitter]
|
22
22
|
attr_reader :splitter
|
23
23
|
|
24
|
+
# Return the evaluator that calculates score.
|
25
|
+
# @return [Evaluator]
|
26
|
+
attr_reader :evaluator
|
27
|
+
|
24
28
|
# Return the flag indicating whether to caculate the score of training dataset.
|
25
29
|
# @return [Boolean]
|
26
30
|
attr_reader :return_train_score
|
@@ -29,10 +33,12 @@ module SVMKit
|
|
29
33
|
#
|
30
34
|
# @param estimator [Classifier] The classifier of which performance is evaluated.
|
31
35
|
# @param splitter [Splitter] The splitter that divides dataset to training and testing dataset.
|
36
|
+
# @param evaluator [Evaluator] The evaluator that calculates score of estimator results.
|
32
37
|
# @param return_train_score [Boolean] The flag indicating whether to calculate the score of training dataset.
|
33
|
-
def initialize(estimator: nil, splitter: nil, return_train_score: false)
|
38
|
+
def initialize(estimator: nil, splitter: nil, evaluator: nil, return_train_score: false)
|
34
39
|
@estimator = estimator
|
35
40
|
@splitter = splitter
|
41
|
+
@evaluator = evaluator
|
36
42
|
@return_train_score = return_train_score
|
37
43
|
end
|
38
44
|
|
@@ -49,7 +55,7 @@ module SVMKit
|
|
49
55
|
# the return_train_score is false.
|
50
56
|
def perform(x, y)
|
51
57
|
# Initialize the report of cross validation.
|
52
|
-
report = {test_score: [], train_score: nil, fit_time: []}
|
58
|
+
report = { test_score: [], train_score: nil, fit_time: [] }
|
53
59
|
report[:train_score] = [] if @return_train_score
|
54
60
|
# Evaluate the estimator on each split.
|
55
61
|
@splitter.split(x, y).each do |train_ids, test_ids|
|
@@ -64,8 +70,13 @@ module SVMKit
|
|
64
70
|
@estimator.fit(train_x, train_y)
|
65
71
|
# Calculate scores and prepare the report.
|
66
72
|
report[:fit_time].push(Time.now.to_i - start_time)
|
67
|
-
|
68
|
-
|
73
|
+
if @evaluator.nil?
|
74
|
+
report[:test_score].push(@estimator.score(test_x, test_y))
|
75
|
+
report[:train_score].push(@estimator.score(train_x, train_y)) if @return_train_score
|
76
|
+
else
|
77
|
+
report[:test_score].push(@evaluator.score(test_y, @estimator.predict(test_x)))
|
78
|
+
report[:train_score].push(@estimator.score(train_x, @estimator.predict(train_x))) if @return_train_score
|
79
|
+
end
|
69
80
|
end
|
70
81
|
report
|
71
82
|
end
|
@@ -0,0 +1,190 @@
|
|
1
|
+
require 'svmkit/base/base_estimator'
|
2
|
+
require 'svmkit/base/classifier'
|
3
|
+
|
4
|
+
module SVMKit
|
5
|
+
# This module consists of the classes that implemnt polynomial models.
|
6
|
+
module PolynomialModel
|
7
|
+
# FactorizationMachineClassifier is a class that
|
8
|
+
# implements Fatorization Machine for binary classification
|
9
|
+
# with (mini-batch) stochastic gradient descent optimization.
|
10
|
+
# Note that this implementation uses hinge loss for the loss function.
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# estimator =
|
14
|
+
# SVMKit::PolynomialModel::FactorizationMachineClassifier.new(
|
15
|
+
# n_factors: 10, reg_param_bias: 0.001, reg_param_weight: 0.001, reg_param_factor: 0.001,
|
16
|
+
# max_iter: 5000, batch_size: 50, random_seed: 1)
|
17
|
+
# estimator.fit(training_samples, traininig_labels)
|
18
|
+
# results = estimator.predict(testing_samples)
|
19
|
+
#
|
20
|
+
# *Reference*
|
21
|
+
# - S. Rendle, "Factorization Machines with libFM," ACM Transactions on Intelligent Systems and Technology, vol. 3 (3), pp. 57:1--57:22, 2012.
|
22
|
+
# - S. Rendle, "Factorization Machines," Proceedings of the 10th IEEE International Conference on Data Mining (ICDM'10), pp. 995--1000, 2010.
|
23
|
+
class FactorizationMachineClassifier
|
24
|
+
include Base::BaseEstimator
|
25
|
+
include Base::Classifier
|
26
|
+
|
27
|
+
# Return the factor matrix for Factorization Machine.
|
28
|
+
# @return [Numo::DFloat] (shape: [n_factors, n_features])
|
29
|
+
attr_reader :factor_mat
|
30
|
+
|
31
|
+
# Return the weight vector for Factorization Machine.
|
32
|
+
# @return [Numo::DFloat] (shape: [n_features])
|
33
|
+
attr_reader :weight_vec
|
34
|
+
|
35
|
+
# Return the bias term for Factoriazation Machine.
|
36
|
+
# @return [Float]
|
37
|
+
attr_reader :bias_term
|
38
|
+
|
39
|
+
# Return the random generator for transformation.
|
40
|
+
# @return [Random]
|
41
|
+
attr_reader :rng
|
42
|
+
|
43
|
+
# Create a new classifier with Support Vector Machine by the Pegasos algorithm.
|
44
|
+
#
|
45
|
+
# @param n_factors [Integer] The maximum number of iterations.
|
46
|
+
# @param reg_param_bias [Float] The regularization parameter for bias term.
|
47
|
+
# @param reg_param_weight [Float] The regularization parameter for weight vector.
|
48
|
+
# @param reg_param_factor [Float] The regularization parameter for factor matrix.
|
49
|
+
# @param init_std [Float] The standard deviation of normal random number for initialization of factor matrix.
|
50
|
+
# @param max_iter [Integer] The maximum number of iterations.
|
51
|
+
# @param batch_size [Integer] The size of the mini batches.
|
52
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
53
|
+
def initialize(n_factors: 2, reg_param_bias: 1.0, reg_param_weight: 1.0, reg_param_factor: 1.0,
|
54
|
+
init_std: 0.1, max_iter: 1000, batch_size: 10, random_seed: nil)
|
55
|
+
@params = {}
|
56
|
+
@params[:n_factors] = n_factors
|
57
|
+
@params[:reg_param_bias] = reg_param_bias
|
58
|
+
@params[:reg_param_weight] = reg_param_weight
|
59
|
+
@params[:reg_param_factor] = reg_param_factor
|
60
|
+
@params[:init_std] = init_std
|
61
|
+
@params[:max_iter] = max_iter
|
62
|
+
@params[:batch_size] = batch_size
|
63
|
+
@params[:random_seed] = random_seed
|
64
|
+
@params[:random_seed] ||= srand
|
65
|
+
@factor_mat = nil
|
66
|
+
@weight_vec = nil
|
67
|
+
@bias_term = 0.0
|
68
|
+
@rng = Random.new(@params[:random_seed])
|
69
|
+
end
|
70
|
+
|
71
|
+
# Fit the model with given training data.
|
72
|
+
#
|
73
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
74
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
75
|
+
# @return [FactorizationMachineClassifier] The learned classifier itself.
|
76
|
+
def fit(x, y)
|
77
|
+
# Generate binary labels.
|
78
|
+
negative_label = y.to_a.uniq.sort.shift
|
79
|
+
bin_y = y.map { |l| l != negative_label ? 1.0 : -1.0 }
|
80
|
+
# Initialize some variables.
|
81
|
+
n_samples, n_features = x.shape
|
82
|
+
rand_ids = [*0...n_samples].shuffle(random: @rng)
|
83
|
+
@factor_mat = rand_normal([@params[:n_factors], n_features], 0, @params[:init_std])
|
84
|
+
@weight_vec = Numo::DFloat.zeros(n_features)
|
85
|
+
@bias_term = 0.0
|
86
|
+
# Start optimization.
|
87
|
+
@params[:max_iter].times do |t|
|
88
|
+
# Random sampling.
|
89
|
+
subset_ids = rand_ids.shift(@params[:batch_size])
|
90
|
+
rand_ids.concat(subset_ids)
|
91
|
+
data = x[subset_ids, true]
|
92
|
+
label = bin_y[subset_ids]
|
93
|
+
# Calculate gradients for loss function.
|
94
|
+
loss_grad = loss_gradient(data, label)
|
95
|
+
next if loss_grad.ne(0.0).count.zero?
|
96
|
+
# Update each parameter.
|
97
|
+
@bias_term -= learning_rate(@params[:reg_param_bias], t) * bias_gradient(loss_grad)
|
98
|
+
@weight_vec -= learning_rate(@params[:reg_param_weight], t) * weight_gradient(loss_grad, data)
|
99
|
+
@params[:n_factors].times do |n|
|
100
|
+
@factor_mat[n, true] -= learning_rate(@params[:reg_param_factor], t) *
|
101
|
+
factor_gradient(loss_grad, data, @factor_mat[n, true])
|
102
|
+
end
|
103
|
+
end
|
104
|
+
self
|
105
|
+
end
|
106
|
+
|
107
|
+
# Calculate confidence scores for samples.
|
108
|
+
#
|
109
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
110
|
+
# @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
|
111
|
+
def decision_function(x)
|
112
|
+
linear_term = @bias_term + x.dot(@weight_vec)
|
113
|
+
factor_term = 0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum
|
114
|
+
linear_term + factor_term
|
115
|
+
end
|
116
|
+
|
117
|
+
# Predict class labels for samples.
|
118
|
+
#
|
119
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
120
|
+
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
121
|
+
def predict(x)
|
122
|
+
Numo::Int32.cast(decision_function(x).map { |v| v >= 0.0 ? 1 : -1 })
|
123
|
+
end
|
124
|
+
|
125
|
+
# Claculate the mean accuracy of the given testing data.
|
126
|
+
#
|
127
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
|
128
|
+
# @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.
|
129
|
+
# @return [Float] Mean accuracy
|
130
|
+
def score(x, y)
|
131
|
+
p = predict(x)
|
132
|
+
n_hits = (y.to_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
|
133
|
+
n_hits / y.size.to_f
|
134
|
+
end
|
135
|
+
|
136
|
+
# Dump marshal data.
|
137
|
+
# @return [Hash] The marshal data about FactorizationMachineClassifier
|
138
|
+
def marshal_dump
|
139
|
+
{ params: @params, factor_mat: @factor_mat, weight_vec: @weight_vec, bias_term: @bias_term, rng: @rng }
|
140
|
+
end
|
141
|
+
|
142
|
+
# Load marshal data.
|
143
|
+
# @return [nil]
|
144
|
+
def marshal_load(obj)
|
145
|
+
@params = obj[:params]
|
146
|
+
@factor_mat = obj[:factor_mat]
|
147
|
+
@weight_vec = obj[:weight_vec]
|
148
|
+
@bias_term = obj[:bias_term]
|
149
|
+
@rng = obj[:rng]
|
150
|
+
nil
|
151
|
+
end
|
152
|
+
|
153
|
+
private
|
154
|
+
|
155
|
+
def loss_gradient(x, y)
|
156
|
+
evaluated = y * decision_function(x)
|
157
|
+
gradient = Numo::DFloat.zeros(evaluated.size)
|
158
|
+
gradient[evaluated < 1.0] = -y[evaluated < 1.0]
|
159
|
+
gradient
|
160
|
+
end
|
161
|
+
|
162
|
+
def learning_rate(reg_param, iter)
|
163
|
+
1.0 / (reg_param * (iter + 1))
|
164
|
+
end
|
165
|
+
|
166
|
+
def bias_gradient(loss_grad)
|
167
|
+
loss_grad.mean + @params[:reg_param_bias] * @bias_term
|
168
|
+
end
|
169
|
+
|
170
|
+
def weight_gradient(loss_grad, data)
|
171
|
+
(loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param_weight] * @weight_vec
|
172
|
+
end
|
173
|
+
|
174
|
+
def factor_gradient(loss_grad, data, factor)
|
175
|
+
reg_term = @params[:reg_param_factor] * factor
|
176
|
+
(loss_grad.expand_dims(1) * (data * data.dot(factor).expand_dims(1) - factor * (data**2))).mean(0) + reg_term
|
177
|
+
end
|
178
|
+
|
179
|
+
def rand_uniform(shape)
|
180
|
+
Numo::DFloat[*Array.new(shape.inject(&:*)) { @rng.rand }].reshape(*shape)
|
181
|
+
end
|
182
|
+
|
183
|
+
def rand_normal(shape, mu, sigma)
|
184
|
+
a = rand_uniform(shape)
|
185
|
+
b = rand_uniform(shape)
|
186
|
+
mu + sigma * (Numo::NMath.sqrt(-2.0 * Numo::NMath.log(a)) * Numo::NMath.sin(2.0 * Math::PI * b))
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
data/lib/svmkit/version.rb
CHANGED
data/lib/svmkit.rb
CHANGED
@@ -8,10 +8,12 @@ require 'svmkit/base/base_estimator'
|
|
8
8
|
require 'svmkit/base/classifier'
|
9
9
|
require 'svmkit/base/transformer'
|
10
10
|
require 'svmkit/base/splitter'
|
11
|
+
require 'svmkit/base/evaluator'
|
11
12
|
require 'svmkit/kernel_approximation/rbf'
|
12
13
|
require 'svmkit/linear_model/svc'
|
13
14
|
require 'svmkit/linear_model/logistic_regression'
|
14
15
|
require 'svmkit/kernel_machine/kernel_svc'
|
16
|
+
require 'svmkit/polynomial_model/factorization_machine_classifier'
|
15
17
|
require 'svmkit/multiclass/one_vs_rest_classifier'
|
16
18
|
require 'svmkit/nearest_neighbors/k_neighbors_classifier'
|
17
19
|
require 'svmkit/preprocessing/l2_normalizer'
|
@@ -20,3 +22,7 @@ require 'svmkit/preprocessing/standard_scaler'
|
|
20
22
|
require 'svmkit/model_selection/k_fold'
|
21
23
|
require 'svmkit/model_selection/stratified_k_fold'
|
22
24
|
require 'svmkit/model_selection/cross_validation'
|
25
|
+
require 'svmkit/evaluation_measure/accuracy'
|
26
|
+
require 'svmkit/evaluation_measure/precision'
|
27
|
+
require 'svmkit/evaluation_measure/recall'
|
28
|
+
require 'svmkit/evaluation_measure/f_score'
|
data/svmkit.gemspec
CHANGED
@@ -11,12 +11,15 @@ Gem::Specification.new do |spec|
|
|
11
11
|
spec.email = ['yoshoku@outlook.com']
|
12
12
|
|
13
13
|
spec.summary = <<MSG
|
14
|
-
SVMKit is
|
14
|
+
SVMKit is a machine learninig library in Ruby.
|
15
|
+
SVMKit implements machine learning algorithms with interfaces similar to Scikit-Learn in Python.
|
15
16
|
MSG
|
16
17
|
spec.description = <<MSG
|
17
|
-
SVMKit is a
|
18
|
-
SVMKit implements machine learning algorithms with
|
19
|
-
|
18
|
+
SVMKit is a machine learninig library in Ruby.
|
19
|
+
SVMKit implements machine learning algorithms with interfaces similar to Scikit-Learn in Python.
|
20
|
+
Since the development of SVMKit has just getting started,
|
21
|
+
the methods implemented in SVMKit are only Linear / Kernel SVC,
|
22
|
+
Logistic Regression, Factorization Machine, K-nearest neighbor method, and cross-validation.
|
20
23
|
MSG
|
21
24
|
spec.homepage = 'https://github.com/yoshoku/svmkit'
|
22
25
|
spec.license = 'BSD-2-Clause'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: svmkit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-02-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -81,9 +81,11 @@ dependencies:
|
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0.15'
|
83
83
|
description: |
|
84
|
-
SVMKit is a
|
85
|
-
SVMKit implements machine learning algorithms with
|
86
|
-
|
84
|
+
SVMKit is a machine learninig library in Ruby.
|
85
|
+
SVMKit implements machine learning algorithms with interfaces similar to Scikit-Learn in Python.
|
86
|
+
Since the development of SVMKit has just getting started,
|
87
|
+
the methods implemented in SVMKit are only Linear / Kernel SVC,
|
88
|
+
Logistic Regression, Factorization Machine, K-nearest neighbor method, and cross-validation.
|
87
89
|
email:
|
88
90
|
- yoshoku@outlook.com
|
89
91
|
executables: []
|
@@ -105,9 +107,15 @@ files:
|
|
105
107
|
- lib/svmkit.rb
|
106
108
|
- lib/svmkit/base/base_estimator.rb
|
107
109
|
- lib/svmkit/base/classifier.rb
|
110
|
+
- lib/svmkit/base/evaluator.rb
|
108
111
|
- lib/svmkit/base/splitter.rb
|
109
112
|
- lib/svmkit/base/transformer.rb
|
110
113
|
- lib/svmkit/dataset.rb
|
114
|
+
- lib/svmkit/evaluation_measure/accuracy.rb
|
115
|
+
- lib/svmkit/evaluation_measure/f_score.rb
|
116
|
+
- lib/svmkit/evaluation_measure/precision.rb
|
117
|
+
- lib/svmkit/evaluation_measure/precision_recall.rb
|
118
|
+
- lib/svmkit/evaluation_measure/recall.rb
|
111
119
|
- lib/svmkit/kernel_approximation/rbf.rb
|
112
120
|
- lib/svmkit/kernel_machine/kernel_svc.rb
|
113
121
|
- lib/svmkit/linear_model/logistic_regression.rb
|
@@ -118,6 +126,7 @@ files:
|
|
118
126
|
- lib/svmkit/multiclass/one_vs_rest_classifier.rb
|
119
127
|
- lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb
|
120
128
|
- lib/svmkit/pairwise_metric.rb
|
129
|
+
- lib/svmkit/polynomial_model/factorization_machine_classifier.rb
|
121
130
|
- lib/svmkit/preprocessing/l2_normalizer.rb
|
122
131
|
- lib/svmkit/preprocessing/min_max_scaler.rb
|
123
132
|
- lib/svmkit/preprocessing/standard_scaler.rb
|
@@ -152,5 +161,6 @@ rubyforge_project:
|
|
152
161
|
rubygems_version: 2.4.5.4
|
153
162
|
signing_key:
|
154
163
|
specification_version: 4
|
155
|
-
summary: SVMKit is
|
164
|
+
summary: SVMKit is a machine learninig library in Ruby. SVMKit implements machine
|
165
|
+
learning algorithms with interfaces similar to Scikit-Learn in Python.
|
156
166
|
test_files: []
|