svmkit 0.7.3 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +0 -9
  3. data/.rspec +1 -0
  4. data/.travis.yml +4 -12
  5. data/LICENSE.txt +1 -1
  6. data/README.md +11 -13
  7. data/lib/svmkit.rb +3 -66
  8. data/svmkit.gemspec +12 -7
  9. metadata +16 -81
  10. data/.coveralls.yml +0 -1
  11. data/.rubocop.yml +0 -47
  12. data/.rubocop_todo.yml +0 -58
  13. data/HISTORY.md +0 -168
  14. data/lib/svmkit/base/base_estimator.rb +0 -13
  15. data/lib/svmkit/base/classifier.rb +0 -34
  16. data/lib/svmkit/base/cluster_analyzer.rb +0 -29
  17. data/lib/svmkit/base/evaluator.rb +0 -13
  18. data/lib/svmkit/base/regressor.rb +0 -34
  19. data/lib/svmkit/base/splitter.rb +0 -17
  20. data/lib/svmkit/base/transformer.rb +0 -18
  21. data/lib/svmkit/clustering/dbscan.rb +0 -127
  22. data/lib/svmkit/clustering/k_means.rb +0 -140
  23. data/lib/svmkit/dataset.rb +0 -109
  24. data/lib/svmkit/decomposition/nmf.rb +0 -147
  25. data/lib/svmkit/decomposition/pca.rb +0 -150
  26. data/lib/svmkit/ensemble/ada_boost_classifier.rb +0 -198
  27. data/lib/svmkit/ensemble/ada_boost_regressor.rb +0 -180
  28. data/lib/svmkit/ensemble/random_forest_classifier.rb +0 -182
  29. data/lib/svmkit/ensemble/random_forest_regressor.rb +0 -143
  30. data/lib/svmkit/evaluation_measure/accuracy.rb +0 -30
  31. data/lib/svmkit/evaluation_measure/f_score.rb +0 -51
  32. data/lib/svmkit/evaluation_measure/log_loss.rb +0 -46
  33. data/lib/svmkit/evaluation_measure/mean_absolute_error.rb +0 -30
  34. data/lib/svmkit/evaluation_measure/mean_squared_error.rb +0 -30
  35. data/lib/svmkit/evaluation_measure/normalized_mutual_information.rb +0 -63
  36. data/lib/svmkit/evaluation_measure/precision.rb +0 -51
  37. data/lib/svmkit/evaluation_measure/precision_recall.rb +0 -91
  38. data/lib/svmkit/evaluation_measure/purity.rb +0 -41
  39. data/lib/svmkit/evaluation_measure/r2_score.rb +0 -44
  40. data/lib/svmkit/evaluation_measure/recall.rb +0 -51
  41. data/lib/svmkit/kernel_approximation/rbf.rb +0 -136
  42. data/lib/svmkit/kernel_machine/kernel_svc.rb +0 -194
  43. data/lib/svmkit/linear_model/lasso.rb +0 -138
  44. data/lib/svmkit/linear_model/linear_regression.rb +0 -112
  45. data/lib/svmkit/linear_model/logistic_regression.rb +0 -161
  46. data/lib/svmkit/linear_model/ridge.rb +0 -112
  47. data/lib/svmkit/linear_model/sgd_linear_estimator.rb +0 -89
  48. data/lib/svmkit/linear_model/svc.rb +0 -184
  49. data/lib/svmkit/linear_model/svr.rb +0 -123
  50. data/lib/svmkit/model_selection/cross_validation.rb +0 -121
  51. data/lib/svmkit/model_selection/grid_search_cv.rb +0 -247
  52. data/lib/svmkit/model_selection/k_fold.rb +0 -77
  53. data/lib/svmkit/model_selection/stratified_k_fold.rb +0 -95
  54. data/lib/svmkit/multiclass/one_vs_rest_classifier.rb +0 -101
  55. data/lib/svmkit/naive_bayes/naive_bayes.rb +0 -316
  56. data/lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb +0 -112
  57. data/lib/svmkit/nearest_neighbors/k_neighbors_regressor.rb +0 -94
  58. data/lib/svmkit/optimizer/nadam.rb +0 -90
  59. data/lib/svmkit/optimizer/rmsprop.rb +0 -69
  60. data/lib/svmkit/optimizer/sgd.rb +0 -65
  61. data/lib/svmkit/optimizer/yellow_fin.rb +0 -144
  62. data/lib/svmkit/pairwise_metric.rb +0 -91
  63. data/lib/svmkit/pipeline/pipeline.rb +0 -197
  64. data/lib/svmkit/polynomial_model/factorization_machine_classifier.rb +0 -262
  65. data/lib/svmkit/polynomial_model/factorization_machine_regressor.rb +0 -194
  66. data/lib/svmkit/preprocessing/l2_normalizer.rb +0 -63
  67. data/lib/svmkit/preprocessing/label_encoder.rb +0 -95
  68. data/lib/svmkit/preprocessing/min_max_scaler.rb +0 -93
  69. data/lib/svmkit/preprocessing/one_hot_encoder.rb +0 -99
  70. data/lib/svmkit/preprocessing/standard_scaler.rb +0 -87
  71. data/lib/svmkit/probabilistic_output.rb +0 -112
  72. data/lib/svmkit/tree/decision_tree_classifier.rb +0 -276
  73. data/lib/svmkit/tree/decision_tree_regressor.rb +0 -251
  74. data/lib/svmkit/tree/node.rb +0 -70
  75. data/lib/svmkit/utils.rb +0 -22
  76. data/lib/svmkit/validation.rb +0 -79
  77. data/lib/svmkit/values.rb +0 -13
  78. data/lib/svmkit/version.rb +0 -7
@@ -1,89 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'svmkit/base/base_estimator'
4
- require 'svmkit/optimizer/nadam'
5
-
6
- module SVMKit
7
- module LinearModel
8
- # SGDLinearEstimator is an abstract class for implementation of linear estimator
9
- # with mini-batch stochastic gradient descent optimization.
10
- # This class is used for internal process.
11
- class SGDLinearEstimator
12
- include Base::BaseEstimator
13
-
14
- # Initialize a linear estimator.
15
- #
16
- # @param reg_param [Float] The regularization parameter.
17
- # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
18
- # @param bias_scale [Float] The scale of the bias term.
19
- # @param max_iter [Integer] The maximum number of iterations.
20
- # @param batch_size [Integer] The size of the mini batches.
21
- # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
22
- # If nil is given, Nadam is used.
23
- # @param random_seed [Integer] The seed value using to initialize the random generator.
24
- def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
25
- max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
26
- @params = {}
27
- @params[:reg_param] = reg_param
28
- @params[:fit_bias] = fit_bias
29
- @params[:bias_scale] = bias_scale
30
- @params[:max_iter] = max_iter
31
- @params[:batch_size] = batch_size
32
- @params[:optimizer] = optimizer
33
- @params[:optimizer] ||= Optimizer::Nadam.new
34
- @params[:random_seed] = random_seed
35
- @params[:random_seed] ||= srand
36
- @weight_vec = nil
37
- @bias_term = nil
38
- @rng = Random.new(@params[:random_seed])
39
- end
40
-
41
- private
42
-
43
- def partial_fit(x, y)
44
- # Expand feature vectors for bias term.
45
- samples = @params[:fit_bias] ? expand_feature(x) : x
46
- # Initialize some variables.
47
- n_samples, n_features = samples.shape
48
- rand_ids = [*0...n_samples].shuffle(random: @rng)
49
- weight = Numo::DFloat.zeros(n_features)
50
- optimizer = @params[:optimizer].dup
51
- # Optimization.
52
- @params[:max_iter].times do |_t|
53
- # Random sampling
54
- subset_ids = rand_ids.shift(@params[:batch_size])
55
- rand_ids.concat(subset_ids)
56
- sub_samples = samples[subset_ids, true]
57
- sub_targets = y[subset_ids]
58
- # Update weight.
59
- loss_gradient = calc_loss_gradient(sub_samples, sub_targets, weight)
60
- next if loss_gradient.ne(0.0).count.zero?
61
- weight = calc_new_weight(optimizer, sub_samples, weight, loss_gradient)
62
- end
63
- split_weight(weight)
64
- end
65
-
66
- def calc_loss_gradient(_x, _y, _weight)
67
- raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
68
- end
69
-
70
- def calc_new_weight(optimizer, x, weight, loss_gradient)
71
- weight_gradient = x.transpose.dot(loss_gradient) / @params[:batch_size] + @params[:reg_param] * weight
72
- optimizer.call(weight, weight_gradient)
73
- end
74
-
75
- def expand_feature(x)
76
- n_samples = x.shape[0]
77
- Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * @params[:bias_scale]])
78
- end
79
-
80
- def split_weight(weight)
81
- if @params[:fit_bias]
82
- [weight[0...-1].dup, weight[-1]]
83
- else
84
- [weight, 0.0]
85
- end
86
- end
87
- end
88
- end
89
- end
@@ -1,184 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'svmkit/validation'
4
- require 'svmkit/linear_model/sgd_linear_estimator'
5
- require 'svmkit/base/classifier'
6
- require 'svmkit/probabilistic_output'
7
-
8
- module SVMKit
9
- # This module consists of the classes that implement generalized linear models.
10
- module LinearModel
11
- # SVC is a class that implements Support Vector Classifier
12
- # with mini-batch stochastic gradient descent optimization.
13
- # For multiclass classification problem, it uses one-vs-the-rest strategy.
14
- #
15
- # @example
16
- # estimator =
17
- # SVMKit::LinearModel::SVC.new(reg_param: 1.0, max_iter: 1000, batch_size: 20, random_seed: 1)
18
- # estimator.fit(training_samples, traininig_labels)
19
- # results = estimator.predict(testing_samples)
20
- #
21
- # *Reference*
22
- # - S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
23
- class SVC < SGDLinearEstimator
24
- include Base::Classifier
25
- include Validation
26
-
27
- # Return the weight vector for SVC.
28
- # @return [Numo::DFloat] (shape: [n_classes, n_features])
29
- attr_reader :weight_vec
30
-
31
- # Return the bias term (a.k.a. intercept) for SVC.
32
- # @return [Numo::DFloat] (shape: [n_classes])
33
- attr_reader :bias_term
34
-
35
- # Return the class labels.
36
- # @return [Numo::Int32] (shape: [n_classes])
37
- attr_reader :classes
38
-
39
- # Return the random generator for performing random sampling.
40
- # @return [Random]
41
- attr_reader :rng
42
-
43
- # Create a new classifier with Support Vector Machine by the SGD optimization.
44
- #
45
- # @param reg_param [Float] The regularization parameter.
46
- # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
47
- # @param bias_scale [Float] The scale of the bias term.
48
- # @param max_iter [Integer] The maximum number of iterations.
49
- # @param batch_size [Integer] The size of the mini batches.
50
- # @param probability [Boolean] The flag indicating whether to perform probability estimation.
51
- # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
52
- # If nil is given, Nadam is used.
53
- # @param random_seed [Integer] The seed value using to initialize the random generator.
54
- def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
55
- max_iter: 1000, batch_size: 20, probability: false, optimizer: nil, random_seed: nil)
56
- check_params_float(reg_param: reg_param, bias_scale: bias_scale)
57
- check_params_integer(max_iter: max_iter, batch_size: batch_size)
58
- check_params_boolean(fit_bias: fit_bias, probability: probability)
59
- check_params_type_or_nil(Integer, random_seed: random_seed)
60
- check_params_positive(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
61
- super(reg_param: reg_param, fit_bias: fit_bias, bias_scale: bias_scale,
62
- max_iter: max_iter, batch_size: batch_size, optimizer: optimizer, random_seed: random_seed)
63
- @params[:probability] = probability
64
- @prob_param = nil
65
- @classes = nil
66
- end
67
-
68
- # Fit the model with given training data.
69
- #
70
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
71
- # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
72
- # @return [SVC] The learned classifier itself.
73
- def fit(x, y)
74
- check_sample_array(x)
75
- check_label_array(y)
76
- check_sample_label_size(x, y)
77
-
78
- @classes = Numo::Int32[*y.to_a.uniq.sort]
79
- n_classes = @classes.size
80
- n_features = x.shape[1]
81
-
82
- if n_classes > 2
83
- @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
84
- @bias_term = Numo::DFloat.zeros(n_classes)
85
- @prob_param = Numo::DFloat.zeros(n_classes, 2)
86
- n_classes.times do |n|
87
- bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
88
- @weight_vec[n, true], @bias_term[n] = partial_fit(x, bin_y)
89
- @prob_param[n, true] = if @params[:probability]
90
- SVMKit::ProbabilisticOutput.fit_sigmoid(x.dot(@weight_vec[n, true].transpose) + @bias_term[n], bin_y)
91
- else
92
- Numo::DFloat[1, 0]
93
- end
94
- end
95
- else
96
- negative_label = y.to_a.uniq.min
97
- bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
98
- @weight_vec, @bias_term = partial_fit(x, bin_y)
99
- @prob_param = if @params[:probability]
100
- SVMKit::ProbabilisticOutput.fit_sigmoid(x.dot(@weight_vec.transpose) + @bias_term, bin_y)
101
- else
102
- Numo::DFloat[1, 0]
103
- end
104
- end
105
-
106
- self
107
- end
108
-
109
- # Calculate confidence scores for samples.
110
- #
111
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
112
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
113
- def decision_function(x)
114
- check_sample_array(x)
115
- x.dot(@weight_vec.transpose) + @bias_term
116
- end
117
-
118
- # Predict class labels for samples.
119
- #
120
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
121
- # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
122
- def predict(x)
123
- check_sample_array(x)
124
-
125
- return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
126
-
127
- n_samples, = x.shape
128
- decision_values = decision_function(x)
129
- Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
130
- end
131
-
132
- # Predict probability for samples.
133
- #
134
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
135
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
136
- def predict_proba(x)
137
- check_sample_array(x)
138
-
139
- if @classes.size > 2
140
- probs = 1.0 / (Numo::NMath.exp(@prob_param[true, 0] * decision_function(x) + @prob_param[true, 1]) + 1.0)
141
- return (probs.transpose / probs.sum(axis: 1)).transpose
142
- end
143
-
144
- n_samples, = x.shape
145
- probs = Numo::DFloat.zeros(n_samples, 2)
146
- probs[true, 1] = 1.0 / (Numo::NMath.exp(@prob_param[0] * decision_function(x) + @prob_param[1]) + 1.0)
147
- probs[true, 0] = 1.0 - probs[true, 1]
148
- probs
149
- end
150
-
151
- # Dump marshal data.
152
- # @return [Hash] The marshal data about SVC.
153
- def marshal_dump
154
- { params: @params,
155
- weight_vec: @weight_vec,
156
- bias_term: @bias_term,
157
- prob_param: @prob_param,
158
- classes: @classes,
159
- rng: @rng }
160
- end
161
-
162
- # Load marshal data.
163
- # @return [nil]
164
- def marshal_load(obj)
165
- @params = obj[:params]
166
- @weight_vec = obj[:weight_vec]
167
- @bias_term = obj[:bias_term]
168
- @prob_param = obj[:prob_param]
169
- @classes = obj[:classes]
170
- @rng = obj[:rng]
171
- nil
172
- end
173
-
174
- private
175
-
176
- def calc_loss_gradient(x, y, weight)
177
- target_ids = (x.dot(weight) * y).lt(1.0).where
178
- grad = Numo::DFloat.zeros(@params[:batch_size])
179
- grad[target_ids] = -y[target_ids]
180
- grad
181
- end
182
- end
183
- end
184
- end
@@ -1,123 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'svmkit/validation'
4
- require 'svmkit/linear_model/sgd_linear_estimator'
5
- require 'svmkit/base/regressor'
6
-
7
- module SVMKit
8
- module LinearModel
9
- # SVR is a class that implements Support Vector Regressor
10
- # with mini-batch stochastic gradient descent optimization.
11
- #
12
- # @example
13
- # estimator =
14
- # SVMKit::LinearModel::SVR.new(reg_param: 1.0, epsilon: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
15
- # estimator.fit(training_samples, traininig_target_values)
16
- # results = estimator.predict(testing_samples)
17
- #
18
- # *Reference*
19
- # 1. S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
20
- class SVR < SGDLinearEstimator
21
- include Base::Regressor
22
- include Validation
23
-
24
- # Return the weight vector for SVR.
25
- # @return [Numo::DFloat] (shape: [n_outputs, n_features])
26
- attr_reader :weight_vec
27
-
28
- # Return the bias term (a.k.a. intercept) for SVR.
29
- # @return [Numo::DFloat] (shape: [n_outputs])
30
- attr_reader :bias_term
31
-
32
- # Return the random generator for performing random sampling.
33
- # @return [Random]
34
- attr_reader :rng
35
-
36
- # Create a new regressor with Support Vector Machine by the SGD optimization.
37
- #
38
- # @param reg_param [Float] The regularization parameter.
39
- # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
40
- # @param bias_scale [Float] The scale of the bias term.
41
- # @param epsilon [Float] The margin of tolerance.
42
- # @param max_iter [Integer] The maximum number of iterations.
43
- # @param batch_size [Integer] The size of the mini batches.
44
- # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
45
- # If nil is given, Nadam is used.
46
- # @param random_seed [Integer] The seed value using to initialize the random generator.
47
- def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, epsilon: 0.1,
48
- max_iter: 1000, batch_size: 20, optimizer: nil, random_seed: nil)
49
- check_params_float(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon)
50
- check_params_integer(max_iter: max_iter, batch_size: batch_size)
51
- check_params_boolean(fit_bias: fit_bias)
52
- check_params_type_or_nil(Integer, random_seed: random_seed)
53
- check_params_positive(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon,
54
- max_iter: max_iter, batch_size: batch_size)
55
- super(reg_param: reg_param, fit_bias: fit_bias, bias_scale: bias_scale,
56
- max_iter: max_iter, batch_size: batch_size, optimizer: optimizer, random_seed: random_seed)
57
- @params[:epsilon] = epsilon
58
- end
59
-
60
- # Fit the model with given training data.
61
- #
62
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
63
- # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
64
- # @return [SVR] The learned regressor itself.
65
- def fit(x, y)
66
- check_sample_array(x)
67
- check_tvalue_array(y)
68
- check_sample_tvalue_size(x, y)
69
-
70
- n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
71
- n_features = x.shape[1]
72
-
73
- if n_outputs > 1
74
- @weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
75
- @bias_term = Numo::DFloat.zeros(n_outputs)
76
- n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
77
- else
78
- @weight_vec, @bias_term = partial_fit(x, y)
79
- end
80
-
81
- self
82
- end
83
-
84
- # Predict values for samples.
85
- #
86
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
87
- # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
88
- def predict(x)
89
- check_sample_array(x)
90
- x.dot(@weight_vec.transpose) + @bias_term
91
- end
92
-
93
- # Dump marshal data.
94
- # @return [Hash] The marshal data about SVR.
95
- def marshal_dump
96
- { params: @params,
97
- weight_vec: @weight_vec,
98
- bias_term: @bias_term,
99
- rng: @rng }
100
- end
101
-
102
- # Load marshal data.
103
- # @return [nil]
104
- def marshal_load(obj)
105
- @params = obj[:params]
106
- @weight_vec = obj[:weight_vec]
107
- @bias_term = obj[:bias_term]
108
- @rng = obj[:rng]
109
- nil
110
- end
111
-
112
- private
113
-
114
- def calc_loss_gradient(x, y, weight)
115
- z = x.dot(weight)
116
- grad = Numo::DFloat.zeros(@params[:batch_size])
117
- grad[(z - y).gt(@params[:epsilon]).where] = 1
118
- grad[(y - z).gt(@params[:epsilon]).where] = -1
119
- grad
120
- end
121
- end
122
- end
123
- end
@@ -1,121 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'svmkit/validation'
4
- require 'svmkit/base/base_estimator'
5
- require 'svmkit/base/classifier'
6
- require 'svmkit/base/regressor'
7
- require 'svmkit/base/splitter'
8
- require 'svmkit/base/evaluator'
9
- require 'svmkit/evaluation_measure/log_loss'
10
-
11
- module SVMKit
12
- # This module consists of the classes for model validation techniques.
13
- module ModelSelection
14
- # CrossValidation is a class that evaluates a given classifier with cross-validation method.
15
- #
16
- # @example
17
- # svc = SVMKit::LinearModel::SVC.new
18
- # kf = SVMKit::ModelSelection::StratifiedKFold.new(n_splits: 5)
19
- # cv = SVMKit::ModelSelection::CrossValidation.new(estimator: svc, splitter: kf)
20
- # report = cv.perform(samples, lables)
21
- # mean_test_score = report[:test_score].inject(:+) / kf.n_splits
22
- #
23
- class CrossValidation
24
- # Return the classifier of which performance is evaluated.
25
- # @return [Classifier]
26
- attr_reader :estimator
27
-
28
- # Return the splitter that divides dataset.
29
- # @return [Splitter]
30
- attr_reader :splitter
31
-
32
- # Return the evaluator that calculates score.
33
- # @return [Evaluator]
34
- attr_reader :evaluator
35
-
36
- # Return the flag indicating whether to caculate the score of training dataset.
37
- # @return [Boolean]
38
- attr_reader :return_train_score
39
-
40
- # Create a new evaluator with cross-validation method.
41
- #
42
- # @param estimator [Classifier] The classifier of which performance is evaluated.
43
- # @param splitter [Splitter] The splitter that divides dataset to training and testing dataset.
44
- # @param evaluator [Evaluator] The evaluator that calculates score of estimator results.
45
- # @param return_train_score [Boolean] The flag indicating whether to calculate the score of training dataset.
46
- def initialize(estimator: nil, splitter: nil, evaluator: nil, return_train_score: false)
47
- SVMKit::Validation.check_params_type(SVMKit::Base::BaseEstimator, estimator: estimator)
48
- SVMKit::Validation.check_params_type(SVMKit::Base::Splitter, splitter: splitter)
49
- SVMKit::Validation.check_params_type_or_nil(SVMKit::Base::Evaluator, evaluator: evaluator)
50
- SVMKit::Validation.check_params_boolean(return_train_score: return_train_score)
51
- @estimator = estimator
52
- @splitter = splitter
53
- @evaluator = evaluator
54
- @return_train_score = return_train_score
55
- end
56
-
57
- # Perform the evalution of given classifier with cross-validation method.
58
- #
59
- # @param x [Numo::DFloat] (shape: [n_samples, n_features])
60
- # The dataset to be used to evaluate the estimator.
61
- # @param y [Numo::Int32 / Numo::DFloat] (shape: [n_samples] / [n_samples, n_outputs])
62
- # The labels to be used to evaluate the classifier / The target values to be used to evaluate the regressor.
63
- # @return [Hash] The report summarizing the results of cross-validation.
64
- # * :fit_time (Array<Float>) The calculation times of fitting the estimator for each split.
65
- # * :test_score (Array<Float>) The scores of testing dataset for each split.
66
- # * :train_score (Array<Float>) The scores of training dataset for each split. This option is nil if
67
- # the return_train_score is false.
68
- def perform(x, y)
69
- SVMKit::Validation.check_sample_array(x)
70
- if @estimator.is_a?(SVMKit::Base::Classifier)
71
- SVMKit::Validation.check_label_array(y)
72
- SVMKit::Validation.check_sample_label_size(x, y)
73
- end
74
- if @estimator.is_a?(SVMKit::Base::Regressor)
75
- SVMKit::Validation.check_tvalue_array(y)
76
- SVMKit::Validation.check_sample_tvalue_size(x, y)
77
- end
78
- # Initialize the report of cross validation.
79
- report = { test_score: [], train_score: nil, fit_time: [] }
80
- report[:train_score] = [] if @return_train_score
81
- # Evaluate the estimator on each split.
82
- @splitter.split(x, y).each do |train_ids, test_ids|
83
- # Split dataset into training and testing dataset.
84
- feature_ids = !kernel_machine? || train_ids
85
- train_x = x[train_ids, feature_ids]
86
- train_y = y.shape[1].nil? ? y[train_ids] : y[train_ids, true]
87
- test_x = x[test_ids, feature_ids]
88
- test_y = y.shape[1].nil? ? y[test_ids] : y[test_ids, true]
89
- # Fit the estimator.
90
- start_time = Time.now.to_i
91
- @estimator.fit(train_x, train_y)
92
- # Calculate scores and prepare the report.
93
- report[:fit_time].push(Time.now.to_i - start_time)
94
- if @evaluator.nil?
95
- report[:test_score].push(@estimator.score(test_x, test_y))
96
- report[:train_score].push(@estimator.score(train_x, train_y)) if @return_train_score
97
- elsif log_loss?
98
- report[:test_score].push(@evaluator.score(test_y, @estimator.predict_proba(test_x)))
99
- report[:train_score].push(@evaluator.score(train_y, @estimator.predict_proba(train_x))) if @return_train_score
100
- else
101
- report[:test_score].push(@evaluator.score(test_y, @estimator.predict(test_x)))
102
- report[:train_score].push(@evaluator.score(train_y, @estimator.predict(train_x))) if @return_train_score
103
- end
104
- end
105
- report
106
- end
107
-
108
- private
109
-
110
- def kernel_machine?
111
- class_name = @estimator.class.to_s
112
- class_name = @estimator.params[:estimator].class.to_s if class_name.include?('Multiclass')
113
- class_name.include?('KernelMachine')
114
- end
115
-
116
- def log_loss?
117
- @evaluator.is_a?(SVMKit::EvaluationMeasure::LogLoss)
118
- end
119
- end
120
- end
121
- end