svmkit 0.7.3 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +0 -9
- data/.rspec +1 -0
- data/.travis.yml +4 -12
- data/LICENSE.txt +1 -1
- data/README.md +11 -13
- data/lib/svmkit.rb +3 -66
- data/svmkit.gemspec +12 -7
- metadata +16 -81
- data/.coveralls.yml +0 -1
- data/.rubocop.yml +0 -47
- data/.rubocop_todo.yml +0 -58
- data/HISTORY.md +0 -168
- data/lib/svmkit/base/base_estimator.rb +0 -13
- data/lib/svmkit/base/classifier.rb +0 -34
- data/lib/svmkit/base/cluster_analyzer.rb +0 -29
- data/lib/svmkit/base/evaluator.rb +0 -13
- data/lib/svmkit/base/regressor.rb +0 -34
- data/lib/svmkit/base/splitter.rb +0 -17
- data/lib/svmkit/base/transformer.rb +0 -18
- data/lib/svmkit/clustering/dbscan.rb +0 -127
- data/lib/svmkit/clustering/k_means.rb +0 -140
- data/lib/svmkit/dataset.rb +0 -109
- data/lib/svmkit/decomposition/nmf.rb +0 -147
- data/lib/svmkit/decomposition/pca.rb +0 -150
- data/lib/svmkit/ensemble/ada_boost_classifier.rb +0 -198
- data/lib/svmkit/ensemble/ada_boost_regressor.rb +0 -180
- data/lib/svmkit/ensemble/random_forest_classifier.rb +0 -182
- data/lib/svmkit/ensemble/random_forest_regressor.rb +0 -143
- data/lib/svmkit/evaluation_measure/accuracy.rb +0 -30
- data/lib/svmkit/evaluation_measure/f_score.rb +0 -51
- data/lib/svmkit/evaluation_measure/log_loss.rb +0 -46
- data/lib/svmkit/evaluation_measure/mean_absolute_error.rb +0 -30
- data/lib/svmkit/evaluation_measure/mean_squared_error.rb +0 -30
- data/lib/svmkit/evaluation_measure/normalized_mutual_information.rb +0 -63
- data/lib/svmkit/evaluation_measure/precision.rb +0 -51
- data/lib/svmkit/evaluation_measure/precision_recall.rb +0 -91
- data/lib/svmkit/evaluation_measure/purity.rb +0 -41
- data/lib/svmkit/evaluation_measure/r2_score.rb +0 -44
- data/lib/svmkit/evaluation_measure/recall.rb +0 -51
- data/lib/svmkit/kernel_approximation/rbf.rb +0 -136
- data/lib/svmkit/kernel_machine/kernel_svc.rb +0 -194
- data/lib/svmkit/linear_model/lasso.rb +0 -138
- data/lib/svmkit/linear_model/linear_regression.rb +0 -112
- data/lib/svmkit/linear_model/logistic_regression.rb +0 -161
- data/lib/svmkit/linear_model/ridge.rb +0 -112
- data/lib/svmkit/linear_model/sgd_linear_estimator.rb +0 -89
- data/lib/svmkit/linear_model/svc.rb +0 -184
- data/lib/svmkit/linear_model/svr.rb +0 -123
- data/lib/svmkit/model_selection/cross_validation.rb +0 -121
- data/lib/svmkit/model_selection/grid_search_cv.rb +0 -247
- data/lib/svmkit/model_selection/k_fold.rb +0 -77
- data/lib/svmkit/model_selection/stratified_k_fold.rb +0 -95
- data/lib/svmkit/multiclass/one_vs_rest_classifier.rb +0 -101
- data/lib/svmkit/naive_bayes/naive_bayes.rb +0 -316
- data/lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb +0 -112
- data/lib/svmkit/nearest_neighbors/k_neighbors_regressor.rb +0 -94
- data/lib/svmkit/optimizer/nadam.rb +0 -90
- data/lib/svmkit/optimizer/rmsprop.rb +0 -69
- data/lib/svmkit/optimizer/sgd.rb +0 -65
- data/lib/svmkit/optimizer/yellow_fin.rb +0 -144
- data/lib/svmkit/pairwise_metric.rb +0 -91
- data/lib/svmkit/pipeline/pipeline.rb +0 -197
- data/lib/svmkit/polynomial_model/factorization_machine_classifier.rb +0 -262
- data/lib/svmkit/polynomial_model/factorization_machine_regressor.rb +0 -194
- data/lib/svmkit/preprocessing/l2_normalizer.rb +0 -63
- data/lib/svmkit/preprocessing/label_encoder.rb +0 -95
- data/lib/svmkit/preprocessing/min_max_scaler.rb +0 -93
- data/lib/svmkit/preprocessing/one_hot_encoder.rb +0 -99
- data/lib/svmkit/preprocessing/standard_scaler.rb +0 -87
- data/lib/svmkit/probabilistic_output.rb +0 -112
- data/lib/svmkit/tree/decision_tree_classifier.rb +0 -276
- data/lib/svmkit/tree/decision_tree_regressor.rb +0 -251
- data/lib/svmkit/tree/node.rb +0 -70
- data/lib/svmkit/utils.rb +0 -22
- data/lib/svmkit/validation.rb +0 -79
- data/lib/svmkit/values.rb +0 -13
- data/lib/svmkit/version.rb +0 -7
@@ -1,89 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'svmkit/base/base_estimator'
|
4
|
-
require 'svmkit/optimizer/nadam'
|
5
|
-
|
6
|
-
module SVMKit
|
7
|
-
module LinearModel
|
8
|
-
# SGDLinearEstimator is an abstract class for implementation of linear estimator
|
9
|
-
# with mini-batch stochastic gradient descent optimization.
|
10
|
-
# This class is used for internal process.
|
11
|
-
class SGDLinearEstimator
|
12
|
-
include Base::BaseEstimator
|
13
|
-
|
14
|
-
# Initialize a linear estimator.
|
15
|
-
#
|
16
|
-
# @param reg_param [Float] The regularization parameter.
|
17
|
-
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
18
|
-
# @param bias_scale [Float] The scale of the bias term.
|
19
|
-
# @param max_iter [Integer] The maximum number of iterations.
|
20
|
-
# @param batch_size [Integer] The size of the mini batches.
|
21
|
-
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
22
|
-
# If nil is given, Nadam is used.
|
23
|
-
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
24
|
-
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
|
25
|
-
max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
|
26
|
-
@params = {}
|
27
|
-
@params[:reg_param] = reg_param
|
28
|
-
@params[:fit_bias] = fit_bias
|
29
|
-
@params[:bias_scale] = bias_scale
|
30
|
-
@params[:max_iter] = max_iter
|
31
|
-
@params[:batch_size] = batch_size
|
32
|
-
@params[:optimizer] = optimizer
|
33
|
-
@params[:optimizer] ||= Optimizer::Nadam.new
|
34
|
-
@params[:random_seed] = random_seed
|
35
|
-
@params[:random_seed] ||= srand
|
36
|
-
@weight_vec = nil
|
37
|
-
@bias_term = nil
|
38
|
-
@rng = Random.new(@params[:random_seed])
|
39
|
-
end
|
40
|
-
|
41
|
-
private
|
42
|
-
|
43
|
-
def partial_fit(x, y)
|
44
|
-
# Expand feature vectors for bias term.
|
45
|
-
samples = @params[:fit_bias] ? expand_feature(x) : x
|
46
|
-
# Initialize some variables.
|
47
|
-
n_samples, n_features = samples.shape
|
48
|
-
rand_ids = [*0...n_samples].shuffle(random: @rng)
|
49
|
-
weight = Numo::DFloat.zeros(n_features)
|
50
|
-
optimizer = @params[:optimizer].dup
|
51
|
-
# Optimization.
|
52
|
-
@params[:max_iter].times do |_t|
|
53
|
-
# Random sampling
|
54
|
-
subset_ids = rand_ids.shift(@params[:batch_size])
|
55
|
-
rand_ids.concat(subset_ids)
|
56
|
-
sub_samples = samples[subset_ids, true]
|
57
|
-
sub_targets = y[subset_ids]
|
58
|
-
# Update weight.
|
59
|
-
loss_gradient = calc_loss_gradient(sub_samples, sub_targets, weight)
|
60
|
-
next if loss_gradient.ne(0.0).count.zero?
|
61
|
-
weight = calc_new_weight(optimizer, sub_samples, weight, loss_gradient)
|
62
|
-
end
|
63
|
-
split_weight(weight)
|
64
|
-
end
|
65
|
-
|
66
|
-
def calc_loss_gradient(_x, _y, _weight)
|
67
|
-
raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
|
68
|
-
end
|
69
|
-
|
70
|
-
def calc_new_weight(optimizer, x, weight, loss_gradient)
|
71
|
-
weight_gradient = x.transpose.dot(loss_gradient) / @params[:batch_size] + @params[:reg_param] * weight
|
72
|
-
optimizer.call(weight, weight_gradient)
|
73
|
-
end
|
74
|
-
|
75
|
-
def expand_feature(x)
|
76
|
-
n_samples = x.shape[0]
|
77
|
-
Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * @params[:bias_scale]])
|
78
|
-
end
|
79
|
-
|
80
|
-
def split_weight(weight)
|
81
|
-
if @params[:fit_bias]
|
82
|
-
[weight[0...-1].dup, weight[-1]]
|
83
|
-
else
|
84
|
-
[weight, 0.0]
|
85
|
-
end
|
86
|
-
end
|
87
|
-
end
|
88
|
-
end
|
89
|
-
end
|
@@ -1,184 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'svmkit/validation'
|
4
|
-
require 'svmkit/linear_model/sgd_linear_estimator'
|
5
|
-
require 'svmkit/base/classifier'
|
6
|
-
require 'svmkit/probabilistic_output'
|
7
|
-
|
8
|
-
module SVMKit
|
9
|
-
# This module consists of the classes that implement generalized linear models.
|
10
|
-
module LinearModel
|
11
|
-
# SVC is a class that implements Support Vector Classifier
|
12
|
-
# with mini-batch stochastic gradient descent optimization.
|
13
|
-
# For multiclass classification problem, it uses one-vs-the-rest strategy.
|
14
|
-
#
|
15
|
-
# @example
|
16
|
-
# estimator =
|
17
|
-
# SVMKit::LinearModel::SVC.new(reg_param: 1.0, max_iter: 1000, batch_size: 20, random_seed: 1)
|
18
|
-
# estimator.fit(training_samples, traininig_labels)
|
19
|
-
# results = estimator.predict(testing_samples)
|
20
|
-
#
|
21
|
-
# *Reference*
|
22
|
-
# - S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
|
23
|
-
class SVC < SGDLinearEstimator
|
24
|
-
include Base::Classifier
|
25
|
-
include Validation
|
26
|
-
|
27
|
-
# Return the weight vector for SVC.
|
28
|
-
# @return [Numo::DFloat] (shape: [n_classes, n_features])
|
29
|
-
attr_reader :weight_vec
|
30
|
-
|
31
|
-
# Return the bias term (a.k.a. intercept) for SVC.
|
32
|
-
# @return [Numo::DFloat] (shape: [n_classes])
|
33
|
-
attr_reader :bias_term
|
34
|
-
|
35
|
-
# Return the class labels.
|
36
|
-
# @return [Numo::Int32] (shape: [n_classes])
|
37
|
-
attr_reader :classes
|
38
|
-
|
39
|
-
# Return the random generator for performing random sampling.
|
40
|
-
# @return [Random]
|
41
|
-
attr_reader :rng
|
42
|
-
|
43
|
-
# Create a new classifier with Support Vector Machine by the SGD optimization.
|
44
|
-
#
|
45
|
-
# @param reg_param [Float] The regularization parameter.
|
46
|
-
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
47
|
-
# @param bias_scale [Float] The scale of the bias term.
|
48
|
-
# @param max_iter [Integer] The maximum number of iterations.
|
49
|
-
# @param batch_size [Integer] The size of the mini batches.
|
50
|
-
# @param probability [Boolean] The flag indicating whether to perform probability estimation.
|
51
|
-
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
52
|
-
# If nil is given, Nadam is used.
|
53
|
-
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
54
|
-
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
|
55
|
-
max_iter: 1000, batch_size: 20, probability: false, optimizer: nil, random_seed: nil)
|
56
|
-
check_params_float(reg_param: reg_param, bias_scale: bias_scale)
|
57
|
-
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
58
|
-
check_params_boolean(fit_bias: fit_bias, probability: probability)
|
59
|
-
check_params_type_or_nil(Integer, random_seed: random_seed)
|
60
|
-
check_params_positive(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
|
61
|
-
super(reg_param: reg_param, fit_bias: fit_bias, bias_scale: bias_scale,
|
62
|
-
max_iter: max_iter, batch_size: batch_size, optimizer: optimizer, random_seed: random_seed)
|
63
|
-
@params[:probability] = probability
|
64
|
-
@prob_param = nil
|
65
|
-
@classes = nil
|
66
|
-
end
|
67
|
-
|
68
|
-
# Fit the model with given training data.
|
69
|
-
#
|
70
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
71
|
-
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
72
|
-
# @return [SVC] The learned classifier itself.
|
73
|
-
def fit(x, y)
|
74
|
-
check_sample_array(x)
|
75
|
-
check_label_array(y)
|
76
|
-
check_sample_label_size(x, y)
|
77
|
-
|
78
|
-
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
79
|
-
n_classes = @classes.size
|
80
|
-
n_features = x.shape[1]
|
81
|
-
|
82
|
-
if n_classes > 2
|
83
|
-
@weight_vec = Numo::DFloat.zeros(n_classes, n_features)
|
84
|
-
@bias_term = Numo::DFloat.zeros(n_classes)
|
85
|
-
@prob_param = Numo::DFloat.zeros(n_classes, 2)
|
86
|
-
n_classes.times do |n|
|
87
|
-
bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
|
88
|
-
@weight_vec[n, true], @bias_term[n] = partial_fit(x, bin_y)
|
89
|
-
@prob_param[n, true] = if @params[:probability]
|
90
|
-
SVMKit::ProbabilisticOutput.fit_sigmoid(x.dot(@weight_vec[n, true].transpose) + @bias_term[n], bin_y)
|
91
|
-
else
|
92
|
-
Numo::DFloat[1, 0]
|
93
|
-
end
|
94
|
-
end
|
95
|
-
else
|
96
|
-
negative_label = y.to_a.uniq.min
|
97
|
-
bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
|
98
|
-
@weight_vec, @bias_term = partial_fit(x, bin_y)
|
99
|
-
@prob_param = if @params[:probability]
|
100
|
-
SVMKit::ProbabilisticOutput.fit_sigmoid(x.dot(@weight_vec.transpose) + @bias_term, bin_y)
|
101
|
-
else
|
102
|
-
Numo::DFloat[1, 0]
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
self
|
107
|
-
end
|
108
|
-
|
109
|
-
# Calculate confidence scores for samples.
|
110
|
-
#
|
111
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
112
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
|
113
|
-
def decision_function(x)
|
114
|
-
check_sample_array(x)
|
115
|
-
x.dot(@weight_vec.transpose) + @bias_term
|
116
|
-
end
|
117
|
-
|
118
|
-
# Predict class labels for samples.
|
119
|
-
#
|
120
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
121
|
-
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
122
|
-
def predict(x)
|
123
|
-
check_sample_array(x)
|
124
|
-
|
125
|
-
return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
|
126
|
-
|
127
|
-
n_samples, = x.shape
|
128
|
-
decision_values = decision_function(x)
|
129
|
-
Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
|
130
|
-
end
|
131
|
-
|
132
|
-
# Predict probability for samples.
|
133
|
-
#
|
134
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
135
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
136
|
-
def predict_proba(x)
|
137
|
-
check_sample_array(x)
|
138
|
-
|
139
|
-
if @classes.size > 2
|
140
|
-
probs = 1.0 / (Numo::NMath.exp(@prob_param[true, 0] * decision_function(x) + @prob_param[true, 1]) + 1.0)
|
141
|
-
return (probs.transpose / probs.sum(axis: 1)).transpose
|
142
|
-
end
|
143
|
-
|
144
|
-
n_samples, = x.shape
|
145
|
-
probs = Numo::DFloat.zeros(n_samples, 2)
|
146
|
-
probs[true, 1] = 1.0 / (Numo::NMath.exp(@prob_param[0] * decision_function(x) + @prob_param[1]) + 1.0)
|
147
|
-
probs[true, 0] = 1.0 - probs[true, 1]
|
148
|
-
probs
|
149
|
-
end
|
150
|
-
|
151
|
-
# Dump marshal data.
|
152
|
-
# @return [Hash] The marshal data about SVC.
|
153
|
-
def marshal_dump
|
154
|
-
{ params: @params,
|
155
|
-
weight_vec: @weight_vec,
|
156
|
-
bias_term: @bias_term,
|
157
|
-
prob_param: @prob_param,
|
158
|
-
classes: @classes,
|
159
|
-
rng: @rng }
|
160
|
-
end
|
161
|
-
|
162
|
-
# Load marshal data.
|
163
|
-
# @return [nil]
|
164
|
-
def marshal_load(obj)
|
165
|
-
@params = obj[:params]
|
166
|
-
@weight_vec = obj[:weight_vec]
|
167
|
-
@bias_term = obj[:bias_term]
|
168
|
-
@prob_param = obj[:prob_param]
|
169
|
-
@classes = obj[:classes]
|
170
|
-
@rng = obj[:rng]
|
171
|
-
nil
|
172
|
-
end
|
173
|
-
|
174
|
-
private
|
175
|
-
|
176
|
-
def calc_loss_gradient(x, y, weight)
|
177
|
-
target_ids = (x.dot(weight) * y).lt(1.0).where
|
178
|
-
grad = Numo::DFloat.zeros(@params[:batch_size])
|
179
|
-
grad[target_ids] = -y[target_ids]
|
180
|
-
grad
|
181
|
-
end
|
182
|
-
end
|
183
|
-
end
|
184
|
-
end
|
@@ -1,123 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'svmkit/validation'
|
4
|
-
require 'svmkit/linear_model/sgd_linear_estimator'
|
5
|
-
require 'svmkit/base/regressor'
|
6
|
-
|
7
|
-
module SVMKit
|
8
|
-
module LinearModel
|
9
|
-
# SVR is a class that implements Support Vector Regressor
|
10
|
-
# with mini-batch stochastic gradient descent optimization.
|
11
|
-
#
|
12
|
-
# @example
|
13
|
-
# estimator =
|
14
|
-
# SVMKit::LinearModel::SVR.new(reg_param: 1.0, epsilon: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
|
15
|
-
# estimator.fit(training_samples, traininig_target_values)
|
16
|
-
# results = estimator.predict(testing_samples)
|
17
|
-
#
|
18
|
-
# *Reference*
|
19
|
-
# 1. S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
|
20
|
-
class SVR < SGDLinearEstimator
|
21
|
-
include Base::Regressor
|
22
|
-
include Validation
|
23
|
-
|
24
|
-
# Return the weight vector for SVR.
|
25
|
-
# @return [Numo::DFloat] (shape: [n_outputs, n_features])
|
26
|
-
attr_reader :weight_vec
|
27
|
-
|
28
|
-
# Return the bias term (a.k.a. intercept) for SVR.
|
29
|
-
# @return [Numo::DFloat] (shape: [n_outputs])
|
30
|
-
attr_reader :bias_term
|
31
|
-
|
32
|
-
# Return the random generator for performing random sampling.
|
33
|
-
# @return [Random]
|
34
|
-
attr_reader :rng
|
35
|
-
|
36
|
-
# Create a new regressor with Support Vector Machine by the SGD optimization.
|
37
|
-
#
|
38
|
-
# @param reg_param [Float] The regularization parameter.
|
39
|
-
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
40
|
-
# @param bias_scale [Float] The scale of the bias term.
|
41
|
-
# @param epsilon [Float] The margin of tolerance.
|
42
|
-
# @param max_iter [Integer] The maximum number of iterations.
|
43
|
-
# @param batch_size [Integer] The size of the mini batches.
|
44
|
-
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
45
|
-
# If nil is given, Nadam is used.
|
46
|
-
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
47
|
-
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, epsilon: 0.1,
|
48
|
-
max_iter: 1000, batch_size: 20, optimizer: nil, random_seed: nil)
|
49
|
-
check_params_float(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon)
|
50
|
-
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
51
|
-
check_params_boolean(fit_bias: fit_bias)
|
52
|
-
check_params_type_or_nil(Integer, random_seed: random_seed)
|
53
|
-
check_params_positive(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon,
|
54
|
-
max_iter: max_iter, batch_size: batch_size)
|
55
|
-
super(reg_param: reg_param, fit_bias: fit_bias, bias_scale: bias_scale,
|
56
|
-
max_iter: max_iter, batch_size: batch_size, optimizer: optimizer, random_seed: random_seed)
|
57
|
-
@params[:epsilon] = epsilon
|
58
|
-
end
|
59
|
-
|
60
|
-
# Fit the model with given training data.
|
61
|
-
#
|
62
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
63
|
-
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
64
|
-
# @return [SVR] The learned regressor itself.
|
65
|
-
def fit(x, y)
|
66
|
-
check_sample_array(x)
|
67
|
-
check_tvalue_array(y)
|
68
|
-
check_sample_tvalue_size(x, y)
|
69
|
-
|
70
|
-
n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
|
71
|
-
n_features = x.shape[1]
|
72
|
-
|
73
|
-
if n_outputs > 1
|
74
|
-
@weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
|
75
|
-
@bias_term = Numo::DFloat.zeros(n_outputs)
|
76
|
-
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
|
77
|
-
else
|
78
|
-
@weight_vec, @bias_term = partial_fit(x, y)
|
79
|
-
end
|
80
|
-
|
81
|
-
self
|
82
|
-
end
|
83
|
-
|
84
|
-
# Predict values for samples.
|
85
|
-
#
|
86
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
87
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
|
88
|
-
def predict(x)
|
89
|
-
check_sample_array(x)
|
90
|
-
x.dot(@weight_vec.transpose) + @bias_term
|
91
|
-
end
|
92
|
-
|
93
|
-
# Dump marshal data.
|
94
|
-
# @return [Hash] The marshal data about SVR.
|
95
|
-
def marshal_dump
|
96
|
-
{ params: @params,
|
97
|
-
weight_vec: @weight_vec,
|
98
|
-
bias_term: @bias_term,
|
99
|
-
rng: @rng }
|
100
|
-
end
|
101
|
-
|
102
|
-
# Load marshal data.
|
103
|
-
# @return [nil]
|
104
|
-
def marshal_load(obj)
|
105
|
-
@params = obj[:params]
|
106
|
-
@weight_vec = obj[:weight_vec]
|
107
|
-
@bias_term = obj[:bias_term]
|
108
|
-
@rng = obj[:rng]
|
109
|
-
nil
|
110
|
-
end
|
111
|
-
|
112
|
-
private
|
113
|
-
|
114
|
-
def calc_loss_gradient(x, y, weight)
|
115
|
-
z = x.dot(weight)
|
116
|
-
grad = Numo::DFloat.zeros(@params[:batch_size])
|
117
|
-
grad[(z - y).gt(@params[:epsilon]).where] = 1
|
118
|
-
grad[(y - z).gt(@params[:epsilon]).where] = -1
|
119
|
-
grad
|
120
|
-
end
|
121
|
-
end
|
122
|
-
end
|
123
|
-
end
|
@@ -1,121 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'svmkit/validation'
|
4
|
-
require 'svmkit/base/base_estimator'
|
5
|
-
require 'svmkit/base/classifier'
|
6
|
-
require 'svmkit/base/regressor'
|
7
|
-
require 'svmkit/base/splitter'
|
8
|
-
require 'svmkit/base/evaluator'
|
9
|
-
require 'svmkit/evaluation_measure/log_loss'
|
10
|
-
|
11
|
-
module SVMKit
|
12
|
-
# This module consists of the classes for model validation techniques.
|
13
|
-
module ModelSelection
|
14
|
-
# CrossValidation is a class that evaluates a given classifier with cross-validation method.
|
15
|
-
#
|
16
|
-
# @example
|
17
|
-
# svc = SVMKit::LinearModel::SVC.new
|
18
|
-
# kf = SVMKit::ModelSelection::StratifiedKFold.new(n_splits: 5)
|
19
|
-
# cv = SVMKit::ModelSelection::CrossValidation.new(estimator: svc, splitter: kf)
|
20
|
-
# report = cv.perform(samples, lables)
|
21
|
-
# mean_test_score = report[:test_score].inject(:+) / kf.n_splits
|
22
|
-
#
|
23
|
-
class CrossValidation
|
24
|
-
# Return the classifier of which performance is evaluated.
|
25
|
-
# @return [Classifier]
|
26
|
-
attr_reader :estimator
|
27
|
-
|
28
|
-
# Return the splitter that divides dataset.
|
29
|
-
# @return [Splitter]
|
30
|
-
attr_reader :splitter
|
31
|
-
|
32
|
-
# Return the evaluator that calculates score.
|
33
|
-
# @return [Evaluator]
|
34
|
-
attr_reader :evaluator
|
35
|
-
|
36
|
-
# Return the flag indicating whether to caculate the score of training dataset.
|
37
|
-
# @return [Boolean]
|
38
|
-
attr_reader :return_train_score
|
39
|
-
|
40
|
-
# Create a new evaluator with cross-validation method.
|
41
|
-
#
|
42
|
-
# @param estimator [Classifier] The classifier of which performance is evaluated.
|
43
|
-
# @param splitter [Splitter] The splitter that divides dataset to training and testing dataset.
|
44
|
-
# @param evaluator [Evaluator] The evaluator that calculates score of estimator results.
|
45
|
-
# @param return_train_score [Boolean] The flag indicating whether to calculate the score of training dataset.
|
46
|
-
def initialize(estimator: nil, splitter: nil, evaluator: nil, return_train_score: false)
|
47
|
-
SVMKit::Validation.check_params_type(SVMKit::Base::BaseEstimator, estimator: estimator)
|
48
|
-
SVMKit::Validation.check_params_type(SVMKit::Base::Splitter, splitter: splitter)
|
49
|
-
SVMKit::Validation.check_params_type_or_nil(SVMKit::Base::Evaluator, evaluator: evaluator)
|
50
|
-
SVMKit::Validation.check_params_boolean(return_train_score: return_train_score)
|
51
|
-
@estimator = estimator
|
52
|
-
@splitter = splitter
|
53
|
-
@evaluator = evaluator
|
54
|
-
@return_train_score = return_train_score
|
55
|
-
end
|
56
|
-
|
57
|
-
# Perform the evalution of given classifier with cross-validation method.
|
58
|
-
#
|
59
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features])
|
60
|
-
# The dataset to be used to evaluate the estimator.
|
61
|
-
# @param y [Numo::Int32 / Numo::DFloat] (shape: [n_samples] / [n_samples, n_outputs])
|
62
|
-
# The labels to be used to evaluate the classifier / The target values to be used to evaluate the regressor.
|
63
|
-
# @return [Hash] The report summarizing the results of cross-validation.
|
64
|
-
# * :fit_time (Array<Float>) The calculation times of fitting the estimator for each split.
|
65
|
-
# * :test_score (Array<Float>) The scores of testing dataset for each split.
|
66
|
-
# * :train_score (Array<Float>) The scores of training dataset for each split. This option is nil if
|
67
|
-
# the return_train_score is false.
|
68
|
-
def perform(x, y)
|
69
|
-
SVMKit::Validation.check_sample_array(x)
|
70
|
-
if @estimator.is_a?(SVMKit::Base::Classifier)
|
71
|
-
SVMKit::Validation.check_label_array(y)
|
72
|
-
SVMKit::Validation.check_sample_label_size(x, y)
|
73
|
-
end
|
74
|
-
if @estimator.is_a?(SVMKit::Base::Regressor)
|
75
|
-
SVMKit::Validation.check_tvalue_array(y)
|
76
|
-
SVMKit::Validation.check_sample_tvalue_size(x, y)
|
77
|
-
end
|
78
|
-
# Initialize the report of cross validation.
|
79
|
-
report = { test_score: [], train_score: nil, fit_time: [] }
|
80
|
-
report[:train_score] = [] if @return_train_score
|
81
|
-
# Evaluate the estimator on each split.
|
82
|
-
@splitter.split(x, y).each do |train_ids, test_ids|
|
83
|
-
# Split dataset into training and testing dataset.
|
84
|
-
feature_ids = !kernel_machine? || train_ids
|
85
|
-
train_x = x[train_ids, feature_ids]
|
86
|
-
train_y = y.shape[1].nil? ? y[train_ids] : y[train_ids, true]
|
87
|
-
test_x = x[test_ids, feature_ids]
|
88
|
-
test_y = y.shape[1].nil? ? y[test_ids] : y[test_ids, true]
|
89
|
-
# Fit the estimator.
|
90
|
-
start_time = Time.now.to_i
|
91
|
-
@estimator.fit(train_x, train_y)
|
92
|
-
# Calculate scores and prepare the report.
|
93
|
-
report[:fit_time].push(Time.now.to_i - start_time)
|
94
|
-
if @evaluator.nil?
|
95
|
-
report[:test_score].push(@estimator.score(test_x, test_y))
|
96
|
-
report[:train_score].push(@estimator.score(train_x, train_y)) if @return_train_score
|
97
|
-
elsif log_loss?
|
98
|
-
report[:test_score].push(@evaluator.score(test_y, @estimator.predict_proba(test_x)))
|
99
|
-
report[:train_score].push(@evaluator.score(train_y, @estimator.predict_proba(train_x))) if @return_train_score
|
100
|
-
else
|
101
|
-
report[:test_score].push(@evaluator.score(test_y, @estimator.predict(test_x)))
|
102
|
-
report[:train_score].push(@evaluator.score(train_y, @estimator.predict(train_x))) if @return_train_score
|
103
|
-
end
|
104
|
-
end
|
105
|
-
report
|
106
|
-
end
|
107
|
-
|
108
|
-
private
|
109
|
-
|
110
|
-
def kernel_machine?
|
111
|
-
class_name = @estimator.class.to_s
|
112
|
-
class_name = @estimator.params[:estimator].class.to_s if class_name.include?('Multiclass')
|
113
|
-
class_name.include?('KernelMachine')
|
114
|
-
end
|
115
|
-
|
116
|
-
def log_loss?
|
117
|
-
@evaluator.is_a?(SVMKit::EvaluationMeasure::LogLoss)
|
118
|
-
end
|
119
|
-
end
|
120
|
-
end
|
121
|
-
end
|