svmkit 0.7.3 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +0 -9
- data/.rspec +1 -0
- data/.travis.yml +4 -12
- data/LICENSE.txt +1 -1
- data/README.md +11 -13
- data/lib/svmkit.rb +3 -66
- data/svmkit.gemspec +12 -7
- metadata +16 -81
- data/.coveralls.yml +0 -1
- data/.rubocop.yml +0 -47
- data/.rubocop_todo.yml +0 -58
- data/HISTORY.md +0 -168
- data/lib/svmkit/base/base_estimator.rb +0 -13
- data/lib/svmkit/base/classifier.rb +0 -34
- data/lib/svmkit/base/cluster_analyzer.rb +0 -29
- data/lib/svmkit/base/evaluator.rb +0 -13
- data/lib/svmkit/base/regressor.rb +0 -34
- data/lib/svmkit/base/splitter.rb +0 -17
- data/lib/svmkit/base/transformer.rb +0 -18
- data/lib/svmkit/clustering/dbscan.rb +0 -127
- data/lib/svmkit/clustering/k_means.rb +0 -140
- data/lib/svmkit/dataset.rb +0 -109
- data/lib/svmkit/decomposition/nmf.rb +0 -147
- data/lib/svmkit/decomposition/pca.rb +0 -150
- data/lib/svmkit/ensemble/ada_boost_classifier.rb +0 -198
- data/lib/svmkit/ensemble/ada_boost_regressor.rb +0 -180
- data/lib/svmkit/ensemble/random_forest_classifier.rb +0 -182
- data/lib/svmkit/ensemble/random_forest_regressor.rb +0 -143
- data/lib/svmkit/evaluation_measure/accuracy.rb +0 -30
- data/lib/svmkit/evaluation_measure/f_score.rb +0 -51
- data/lib/svmkit/evaluation_measure/log_loss.rb +0 -46
- data/lib/svmkit/evaluation_measure/mean_absolute_error.rb +0 -30
- data/lib/svmkit/evaluation_measure/mean_squared_error.rb +0 -30
- data/lib/svmkit/evaluation_measure/normalized_mutual_information.rb +0 -63
- data/lib/svmkit/evaluation_measure/precision.rb +0 -51
- data/lib/svmkit/evaluation_measure/precision_recall.rb +0 -91
- data/lib/svmkit/evaluation_measure/purity.rb +0 -41
- data/lib/svmkit/evaluation_measure/r2_score.rb +0 -44
- data/lib/svmkit/evaluation_measure/recall.rb +0 -51
- data/lib/svmkit/kernel_approximation/rbf.rb +0 -136
- data/lib/svmkit/kernel_machine/kernel_svc.rb +0 -194
- data/lib/svmkit/linear_model/lasso.rb +0 -138
- data/lib/svmkit/linear_model/linear_regression.rb +0 -112
- data/lib/svmkit/linear_model/logistic_regression.rb +0 -161
- data/lib/svmkit/linear_model/ridge.rb +0 -112
- data/lib/svmkit/linear_model/sgd_linear_estimator.rb +0 -89
- data/lib/svmkit/linear_model/svc.rb +0 -184
- data/lib/svmkit/linear_model/svr.rb +0 -123
- data/lib/svmkit/model_selection/cross_validation.rb +0 -121
- data/lib/svmkit/model_selection/grid_search_cv.rb +0 -247
- data/lib/svmkit/model_selection/k_fold.rb +0 -77
- data/lib/svmkit/model_selection/stratified_k_fold.rb +0 -95
- data/lib/svmkit/multiclass/one_vs_rest_classifier.rb +0 -101
- data/lib/svmkit/naive_bayes/naive_bayes.rb +0 -316
- data/lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb +0 -112
- data/lib/svmkit/nearest_neighbors/k_neighbors_regressor.rb +0 -94
- data/lib/svmkit/optimizer/nadam.rb +0 -90
- data/lib/svmkit/optimizer/rmsprop.rb +0 -69
- data/lib/svmkit/optimizer/sgd.rb +0 -65
- data/lib/svmkit/optimizer/yellow_fin.rb +0 -144
- data/lib/svmkit/pairwise_metric.rb +0 -91
- data/lib/svmkit/pipeline/pipeline.rb +0 -197
- data/lib/svmkit/polynomial_model/factorization_machine_classifier.rb +0 -262
- data/lib/svmkit/polynomial_model/factorization_machine_regressor.rb +0 -194
- data/lib/svmkit/preprocessing/l2_normalizer.rb +0 -63
- data/lib/svmkit/preprocessing/label_encoder.rb +0 -95
- data/lib/svmkit/preprocessing/min_max_scaler.rb +0 -93
- data/lib/svmkit/preprocessing/one_hot_encoder.rb +0 -99
- data/lib/svmkit/preprocessing/standard_scaler.rb +0 -87
- data/lib/svmkit/probabilistic_output.rb +0 -112
- data/lib/svmkit/tree/decision_tree_classifier.rb +0 -276
- data/lib/svmkit/tree/decision_tree_regressor.rb +0 -251
- data/lib/svmkit/tree/node.rb +0 -70
- data/lib/svmkit/utils.rb +0 -22
- data/lib/svmkit/validation.rb +0 -79
- data/lib/svmkit/values.rb +0 -13
- data/lib/svmkit/version.rb +0 -7
@@ -1,138 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'svmkit/validation'
|
4
|
-
require 'svmkit/linear_model/sgd_linear_estimator'
|
5
|
-
require 'svmkit/base/regressor'
|
6
|
-
|
7
|
-
module SVMKit
|
8
|
-
module LinearModel
|
9
|
-
# Lasso is a class that implements Lasso Regression
|
10
|
-
# with stochastic gradient descent (SGD) optimization.
|
11
|
-
#
|
12
|
-
# @example
|
13
|
-
# estimator =
|
14
|
-
# SVMKit::LinearModel::Lasso.new(reg_param: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
|
15
|
-
# estimator.fit(training_samples, traininig_values)
|
16
|
-
# results = estimator.predict(testing_samples)
|
17
|
-
#
|
18
|
-
# *Reference*
|
19
|
-
# - S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
|
20
|
-
# - L. Bottou, "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
|
21
|
-
class Lasso < SGDLinearEstimator
|
22
|
-
include Base::Regressor
|
23
|
-
include Validation
|
24
|
-
|
25
|
-
# Return the weight vector.
|
26
|
-
# @return [Numo::DFloat] (shape: [n_outputs, n_features])
|
27
|
-
attr_reader :weight_vec
|
28
|
-
|
29
|
-
# Return the bias term (a.k.a. intercept).
|
30
|
-
# @return [Numo::DFloat] (shape: [n_outputs])
|
31
|
-
attr_reader :bias_term
|
32
|
-
|
33
|
-
# Return the random generator for random sampling.
|
34
|
-
# @return [Random]
|
35
|
-
attr_reader :rng
|
36
|
-
|
37
|
-
# Create a new Lasso regressor.
|
38
|
-
#
|
39
|
-
# @param reg_param [Float] The regularization parameter.
|
40
|
-
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
41
|
-
# @param bias_scale [Float] The scale of the bias term.
|
42
|
-
# @param max_iter [Integer] The maximum number of iterations.
|
43
|
-
# @param batch_size [Integer] The size of the mini batches.
|
44
|
-
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
45
|
-
# If nil is given, Nadam is used.
|
46
|
-
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
47
|
-
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
|
48
|
-
check_params_float(reg_param: reg_param, bias_scale: bias_scale)
|
49
|
-
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
50
|
-
check_params_boolean(fit_bias: fit_bias)
|
51
|
-
check_params_type_or_nil(Integer, random_seed: random_seed)
|
52
|
-
check_params_positive(reg_param: reg_param, max_iter: max_iter, batch_size: batch_size)
|
53
|
-
super
|
54
|
-
end
|
55
|
-
|
56
|
-
# Fit the model with given training data.
|
57
|
-
#
|
58
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
59
|
-
# @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
60
|
-
# @return [Lasso] The learned regressor itself.
|
61
|
-
def fit(x, y)
|
62
|
-
check_sample_array(x)
|
63
|
-
check_tvalue_array(y)
|
64
|
-
check_sample_tvalue_size(x, y)
|
65
|
-
|
66
|
-
n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
|
67
|
-
n_features = x.shape[1]
|
68
|
-
|
69
|
-
if n_outputs > 1
|
70
|
-
@weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
|
71
|
-
@bias_term = Numo::DFloat.zeros(n_outputs)
|
72
|
-
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
|
73
|
-
else
|
74
|
-
@weight_vec, @bias_term = partial_fit(x, y)
|
75
|
-
end
|
76
|
-
|
77
|
-
self
|
78
|
-
end
|
79
|
-
|
80
|
-
# Predict values for samples.
|
81
|
-
#
|
82
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
83
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
|
84
|
-
def predict(x)
|
85
|
-
check_sample_array(x)
|
86
|
-
x.dot(@weight_vec.transpose) + @bias_term
|
87
|
-
end
|
88
|
-
|
89
|
-
# Dump marshal data.
|
90
|
-
# @return [Hash] The marshal data about Lasso.
|
91
|
-
def marshal_dump
|
92
|
-
{ params: @params,
|
93
|
-
weight_vec: @weight_vec,
|
94
|
-
bias_term: @bias_term,
|
95
|
-
rng: @rng }
|
96
|
-
end
|
97
|
-
|
98
|
-
# Load marshal data.
|
99
|
-
# @return [nil]
|
100
|
-
def marshal_load(obj)
|
101
|
-
@params = obj[:params]
|
102
|
-
@weight_vec = obj[:weight_vec]
|
103
|
-
@bias_term = obj[:bias_term]
|
104
|
-
@rng = obj[:rng]
|
105
|
-
nil
|
106
|
-
end
|
107
|
-
|
108
|
-
private
|
109
|
-
|
110
|
-
def partial_fit(x, y)
|
111
|
-
n_features = @params[:fit_bias] ? x.shape[1] + 1 : x.shape[1]
|
112
|
-
@left_weight = Numo::DFloat.zeros(n_features)
|
113
|
-
@right_weight = Numo::DFloat.zeros(n_features)
|
114
|
-
@left_optimizer = @params[:optimizer].dup
|
115
|
-
@right_optimizer = @params[:optimizer].dup
|
116
|
-
super
|
117
|
-
end
|
118
|
-
|
119
|
-
def calc_loss_gradient(x, y, weight)
|
120
|
-
2.0 * (x.dot(weight) - y)
|
121
|
-
end
|
122
|
-
|
123
|
-
def calc_new_weight(_optimizer, x, _weight, loss_gradient)
|
124
|
-
@left_weight = round_weight(@left_optimizer.call(@left_weight, calc_weight_gradient(loss_gradient, x)))
|
125
|
-
@right_weight = round_weight(@right_optimizer.call(@right_weight, calc_weight_gradient(-loss_gradient, x)))
|
126
|
-
@left_weight - @right_weight
|
127
|
-
end
|
128
|
-
|
129
|
-
def calc_weight_gradient(loss_gradient, data)
|
130
|
-
((@params[:reg_param] + loss_gradient).expand_dims(1) * data).mean(0)
|
131
|
-
end
|
132
|
-
|
133
|
-
def round_weight(weight)
|
134
|
-
0.5 * (weight + weight.abs)
|
135
|
-
end
|
136
|
-
end
|
137
|
-
end
|
138
|
-
end
|
@@ -1,112 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'svmkit/validation'
|
4
|
-
require 'svmkit/linear_model/sgd_linear_estimator'
|
5
|
-
require 'svmkit/base/regressor'
|
6
|
-
|
7
|
-
module SVMKit
|
8
|
-
module LinearModel
|
9
|
-
# LinearRegression is a class that implements ordinary least square linear regression
|
10
|
-
# with mini-batch stochastic gradient descent optimization.
|
11
|
-
#
|
12
|
-
# @example
|
13
|
-
# estimator =
|
14
|
-
# SVMKit::LinearModel::LinearRegression.new(max_iter: 1000, batch_size: 20, random_seed: 1)
|
15
|
-
# estimator.fit(training_samples, traininig_values)
|
16
|
-
# results = estimator.predict(testing_samples)
|
17
|
-
#
|
18
|
-
class LinearRegression < SGDLinearEstimator
|
19
|
-
include Base::Regressor
|
20
|
-
include Validation
|
21
|
-
|
22
|
-
# Return the weight vector.
|
23
|
-
# @return [Numo::DFloat] (shape: [n_outputs, n_features])
|
24
|
-
attr_reader :weight_vec
|
25
|
-
|
26
|
-
# Return the bias term (a.k.a. intercept).
|
27
|
-
# @return [Numo::DFloat] (shape: [n_outputs])
|
28
|
-
attr_reader :bias_term
|
29
|
-
|
30
|
-
# Return the random generator for random sampling.
|
31
|
-
# @return [Random]
|
32
|
-
attr_reader :rng
|
33
|
-
|
34
|
-
# Create a new ordinary least square linear regressor.
|
35
|
-
#
|
36
|
-
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
37
|
-
# @param bias_scale [Float] The scale of the bias term.
|
38
|
-
# @param max_iter [Integer] The maximum number of iterations.
|
39
|
-
# @param batch_size [Integer] The size of the mini batches.
|
40
|
-
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
41
|
-
# If nil is given, Nadam is used.
|
42
|
-
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
43
|
-
def initialize(fit_bias: false, bias_scale: 1.0, max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
|
44
|
-
check_params_float(bias_scale: bias_scale)
|
45
|
-
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
46
|
-
check_params_boolean(fit_bias: fit_bias)
|
47
|
-
check_params_type_or_nil(Integer, random_seed: random_seed)
|
48
|
-
check_params_positive(max_iter: max_iter, batch_size: batch_size)
|
49
|
-
super(reg_param: 0.0, fit_bias: fit_bias, bias_scale: bias_scale,
|
50
|
-
max_iter: max_iter, batch_size: batch_size, optimizer: optimizer, random_seed: random_seed)
|
51
|
-
end
|
52
|
-
|
53
|
-
# Fit the model with given training data.
|
54
|
-
#
|
55
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
56
|
-
# @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
57
|
-
# @return [LinearRegression] The learned regressor itself.
|
58
|
-
def fit(x, y)
|
59
|
-
check_sample_array(x)
|
60
|
-
check_tvalue_array(y)
|
61
|
-
check_sample_tvalue_size(x, y)
|
62
|
-
|
63
|
-
n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
|
64
|
-
n_features = x.shape[1]
|
65
|
-
|
66
|
-
if n_outputs > 1
|
67
|
-
@weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
|
68
|
-
@bias_term = Numo::DFloat.zeros(n_outputs)
|
69
|
-
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
|
70
|
-
else
|
71
|
-
@weight_vec, @bias_term = partial_fit(x, y)
|
72
|
-
end
|
73
|
-
|
74
|
-
self
|
75
|
-
end
|
76
|
-
|
77
|
-
# Predict values for samples.
|
78
|
-
#
|
79
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
80
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
|
81
|
-
def predict(x)
|
82
|
-
check_sample_array(x)
|
83
|
-
x.dot(@weight_vec.transpose) + @bias_term
|
84
|
-
end
|
85
|
-
|
86
|
-
# Dump marshal data.
|
87
|
-
# @return [Hash] The marshal data about LinearRegression.
|
88
|
-
def marshal_dump
|
89
|
-
{ params: @params,
|
90
|
-
weight_vec: @weight_vec,
|
91
|
-
bias_term: @bias_term,
|
92
|
-
rng: @rng }
|
93
|
-
end
|
94
|
-
|
95
|
-
# Load marshal data.
|
96
|
-
# @return [nil]
|
97
|
-
def marshal_load(obj)
|
98
|
-
@params = obj[:params]
|
99
|
-
@weight_vec = obj[:weight_vec]
|
100
|
-
@bias_term = obj[:bias_term]
|
101
|
-
@rng = obj[:rng]
|
102
|
-
nil
|
103
|
-
end
|
104
|
-
|
105
|
-
private
|
106
|
-
|
107
|
-
def calc_loss_gradient(x, y, weight)
|
108
|
-
2.0 * (x.dot(weight) - y)
|
109
|
-
end
|
110
|
-
end
|
111
|
-
end
|
112
|
-
end
|
@@ -1,161 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'svmkit/validation'
|
4
|
-
require 'svmkit/linear_model/sgd_linear_estimator'
|
5
|
-
require 'svmkit/base/classifier'
|
6
|
-
|
7
|
-
module SVMKit
|
8
|
-
module LinearModel
|
9
|
-
# LogisticRegression is a class that implements Logistic Regression
|
10
|
-
# with mini-batch stochastic gradient descent optimization.
|
11
|
-
# For multiclass classification problem, it uses one-vs-the-rest strategy.
|
12
|
-
#
|
13
|
-
# @example
|
14
|
-
# estimator =
|
15
|
-
# SVMKit::LinearModel::LogisticRegression.new(reg_param: 1.0, max_iter: 1000, batch_size: 20, random_seed: 1)
|
16
|
-
# estimator.fit(training_samples, traininig_labels)
|
17
|
-
# results = estimator.predict(testing_samples)
|
18
|
-
#
|
19
|
-
# *Reference*
|
20
|
-
# - S. Shalev-Shwartz, Y. Singer, N. Srebro, and A. Cotter, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
|
21
|
-
class LogisticRegression < SGDLinearEstimator
|
22
|
-
include Base::Classifier
|
23
|
-
include Validation
|
24
|
-
|
25
|
-
# Return the weight vector for Logistic Regression.
|
26
|
-
# @return [Numo::DFloat] (shape: [n_classes, n_features])
|
27
|
-
attr_reader :weight_vec
|
28
|
-
|
29
|
-
# Return the bias term (a.k.a. intercept) for Logistic Regression.
|
30
|
-
# @return [Numo::DFloat] (shape: [n_classes])
|
31
|
-
attr_reader :bias_term
|
32
|
-
|
33
|
-
# Return the class labels.
|
34
|
-
# @return [Numo::Int32] (shape: [n_classes])
|
35
|
-
attr_reader :classes
|
36
|
-
|
37
|
-
# Return the random generator for performing random sampling.
|
38
|
-
# @return [Random]
|
39
|
-
attr_reader :rng
|
40
|
-
|
41
|
-
# Create a new classifier with Logisitc Regression by the SGD optimization.
|
42
|
-
#
|
43
|
-
# @param reg_param [Float] The regularization parameter.
|
44
|
-
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
45
|
-
# @param bias_scale [Float] The scale of the bias term.
|
46
|
-
# If fit_bias is true, the feature vector v becoms [v; bias_scale].
|
47
|
-
# @param max_iter [Integer] The maximum number of iterations.
|
48
|
-
# @param batch_size [Integer] The size of the mini batches.
|
49
|
-
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
50
|
-
# If nil is given, Nadam is used.
|
51
|
-
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
52
|
-
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
|
53
|
-
max_iter: 1000, batch_size: 20, optimizer: nil, random_seed: nil)
|
54
|
-
check_params_float(reg_param: reg_param, bias_scale: bias_scale)
|
55
|
-
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
56
|
-
check_params_boolean(fit_bias: fit_bias)
|
57
|
-
check_params_type_or_nil(Integer, random_seed: random_seed)
|
58
|
-
check_params_positive(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
|
59
|
-
super
|
60
|
-
@classes = nil
|
61
|
-
end
|
62
|
-
|
63
|
-
# Fit the model with given training data.
|
64
|
-
#
|
65
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
66
|
-
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
67
|
-
# @return [LogisticRegression] The learned classifier itself.
|
68
|
-
def fit(x, y)
|
69
|
-
check_sample_array(x)
|
70
|
-
check_label_array(y)
|
71
|
-
check_sample_label_size(x, y)
|
72
|
-
|
73
|
-
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
74
|
-
n_classes = @classes.size
|
75
|
-
n_features = x.shape[1]
|
76
|
-
|
77
|
-
if n_classes > 2
|
78
|
-
@weight_vec = Numo::DFloat.zeros(n_classes, n_features)
|
79
|
-
@bias_term = Numo::DFloat.zeros(n_classes)
|
80
|
-
n_classes.times do |n|
|
81
|
-
bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
|
82
|
-
@weight_vec[n, true], @bias_term[n] = partial_fit(x, bin_y)
|
83
|
-
end
|
84
|
-
else
|
85
|
-
negative_label = y.to_a.uniq.min
|
86
|
-
bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
|
87
|
-
@weight_vec, @bias_term = partial_fit(x, bin_y)
|
88
|
-
end
|
89
|
-
|
90
|
-
self
|
91
|
-
end
|
92
|
-
|
93
|
-
# Calculate confidence scores for samples.
|
94
|
-
#
|
95
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
96
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
|
97
|
-
def decision_function(x)
|
98
|
-
check_sample_array(x)
|
99
|
-
x.dot(@weight_vec.transpose) + @bias_term
|
100
|
-
end
|
101
|
-
|
102
|
-
# Predict class labels for samples.
|
103
|
-
#
|
104
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
105
|
-
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
106
|
-
def predict(x)
|
107
|
-
check_sample_array(x)
|
108
|
-
|
109
|
-
return Numo::Int32.cast(predict_proba(x)[true, 1].ge(0.5)) * 2 - 1 if @classes.size <= 2
|
110
|
-
|
111
|
-
n_samples, = x.shape
|
112
|
-
decision_values = predict_proba(x)
|
113
|
-
Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
|
114
|
-
end
|
115
|
-
|
116
|
-
# Predict probability for samples.
|
117
|
-
#
|
118
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
119
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
120
|
-
def predict_proba(x)
|
121
|
-
check_sample_array(x)
|
122
|
-
|
123
|
-
proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
|
124
|
-
return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
|
125
|
-
|
126
|
-
n_samples, = x.shape
|
127
|
-
probs = Numo::DFloat.zeros(n_samples, 2)
|
128
|
-
probs[true, 1] = proba
|
129
|
-
probs[true, 0] = 1.0 - proba
|
130
|
-
probs
|
131
|
-
end
|
132
|
-
|
133
|
-
# Dump marshal data.
|
134
|
-
# @return [Hash] The marshal data about LogisticRegression.
|
135
|
-
def marshal_dump
|
136
|
-
{ params: @params,
|
137
|
-
weight_vec: @weight_vec,
|
138
|
-
bias_term: @bias_term,
|
139
|
-
classes: @classes,
|
140
|
-
rng: @rng }
|
141
|
-
end
|
142
|
-
|
143
|
-
# Load marshal data.
|
144
|
-
# @return [nil]
|
145
|
-
def marshal_load(obj)
|
146
|
-
@params = obj[:params]
|
147
|
-
@weight_vec = obj[:weight_vec]
|
148
|
-
@bias_term = obj[:bias_term]
|
149
|
-
@classes = obj[:classes]
|
150
|
-
@rng = obj[:rng]
|
151
|
-
nil
|
152
|
-
end
|
153
|
-
|
154
|
-
private
|
155
|
-
|
156
|
-
def calc_loss_gradient(x, y, weight)
|
157
|
-
y / (Numo::NMath.exp(-y * x.dot(weight)) + 1.0) - y
|
158
|
-
end
|
159
|
-
end
|
160
|
-
end
|
161
|
-
end
|
@@ -1,112 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'svmkit/validation'
|
4
|
-
require 'svmkit/linear_model/sgd_linear_estimator'
|
5
|
-
require 'svmkit/base/regressor'
|
6
|
-
|
7
|
-
module SVMKit
|
8
|
-
module LinearModel
|
9
|
-
# Ridge is a class that implements Ridge Regression
|
10
|
-
# with mini-batch stochastic gradient descent optimization.
|
11
|
-
#
|
12
|
-
# @example
|
13
|
-
# estimator =
|
14
|
-
# SVMKit::LinearModel::Ridge.new(reg_param: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
|
15
|
-
# estimator.fit(training_samples, traininig_values)
|
16
|
-
# results = estimator.predict(testing_samples)
|
17
|
-
#
|
18
|
-
class Ridge < SGDLinearEstimator
|
19
|
-
include Base::Regressor
|
20
|
-
include Validation
|
21
|
-
|
22
|
-
# Return the weight vector.
|
23
|
-
# @return [Numo::DFloat] (shape: [n_outputs, n_features])
|
24
|
-
attr_reader :weight_vec
|
25
|
-
|
26
|
-
# Return the bias term (a.k.a. intercept).
|
27
|
-
# @return [Numo::DFloat] (shape: [n_outputs])
|
28
|
-
attr_reader :bias_term
|
29
|
-
|
30
|
-
# Return the random generator for random sampling.
|
31
|
-
# @return [Random]
|
32
|
-
attr_reader :rng
|
33
|
-
|
34
|
-
# Create a new Ridge regressor.
|
35
|
-
#
|
36
|
-
# @param reg_param [Float] The regularization parameter.
|
37
|
-
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
38
|
-
# @param bias_scale [Float] The scale of the bias term.
|
39
|
-
# @param max_iter [Integer] The maximum number of iterations.
|
40
|
-
# @param batch_size [Integer] The size of the mini batches.
|
41
|
-
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
42
|
-
# If nil is given, Nadam is used.
|
43
|
-
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
44
|
-
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
|
45
|
-
check_params_float(reg_param: reg_param, bias_scale: bias_scale)
|
46
|
-
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
47
|
-
check_params_boolean(fit_bias: fit_bias)
|
48
|
-
check_params_type_or_nil(Integer, random_seed: random_seed)
|
49
|
-
check_params_positive(reg_param: reg_param, max_iter: max_iter, batch_size: batch_size)
|
50
|
-
super
|
51
|
-
end
|
52
|
-
|
53
|
-
# Fit the model with given training data.
|
54
|
-
#
|
55
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
56
|
-
# @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
57
|
-
# @return [Ridge] The learned regressor itself.
|
58
|
-
def fit(x, y)
|
59
|
-
check_sample_array(x)
|
60
|
-
check_tvalue_array(y)
|
61
|
-
check_sample_tvalue_size(x, y)
|
62
|
-
|
63
|
-
n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
|
64
|
-
n_features = x.shape[1]
|
65
|
-
|
66
|
-
if n_outputs > 1
|
67
|
-
@weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
|
68
|
-
@bias_term = Numo::DFloat.zeros(n_outputs)
|
69
|
-
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
|
70
|
-
else
|
71
|
-
@weight_vec, @bias_term = partial_fit(x, y)
|
72
|
-
end
|
73
|
-
|
74
|
-
self
|
75
|
-
end
|
76
|
-
|
77
|
-
# Predict values for samples.
|
78
|
-
#
|
79
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
80
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
|
81
|
-
def predict(x)
|
82
|
-
check_sample_array(x)
|
83
|
-
x.dot(@weight_vec.transpose) + @bias_term
|
84
|
-
end
|
85
|
-
|
86
|
-
# Dump marshal data.
|
87
|
-
# @return [Hash] The marshal data about Ridge.
|
88
|
-
def marshal_dump
|
89
|
-
{ params: @params,
|
90
|
-
weight_vec: @weight_vec,
|
91
|
-
bias_term: @bias_term,
|
92
|
-
rng: @rng }
|
93
|
-
end
|
94
|
-
|
95
|
-
# Load marshal data.
|
96
|
-
# @return [nil]
|
97
|
-
def marshal_load(obj)
|
98
|
-
@params = obj[:params]
|
99
|
-
@weight_vec = obj[:weight_vec]
|
100
|
-
@bias_term = obj[:bias_term]
|
101
|
-
@rng = obj[:rng]
|
102
|
-
nil
|
103
|
-
end
|
104
|
-
|
105
|
-
private
|
106
|
-
|
107
|
-
def calc_loss_gradient(x, y, weight)
|
108
|
-
2.0 * (x.dot(weight) - y)
|
109
|
-
end
|
110
|
-
end
|
111
|
-
end
|
112
|
-
end
|