svmkit 0.7.3 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +0 -9
  3. data/.rspec +1 -0
  4. data/.travis.yml +4 -12
  5. data/LICENSE.txt +1 -1
  6. data/README.md +11 -13
  7. data/lib/svmkit.rb +3 -66
  8. data/svmkit.gemspec +12 -7
  9. metadata +16 -81
  10. data/.coveralls.yml +0 -1
  11. data/.rubocop.yml +0 -47
  12. data/.rubocop_todo.yml +0 -58
  13. data/HISTORY.md +0 -168
  14. data/lib/svmkit/base/base_estimator.rb +0 -13
  15. data/lib/svmkit/base/classifier.rb +0 -34
  16. data/lib/svmkit/base/cluster_analyzer.rb +0 -29
  17. data/lib/svmkit/base/evaluator.rb +0 -13
  18. data/lib/svmkit/base/regressor.rb +0 -34
  19. data/lib/svmkit/base/splitter.rb +0 -17
  20. data/lib/svmkit/base/transformer.rb +0 -18
  21. data/lib/svmkit/clustering/dbscan.rb +0 -127
  22. data/lib/svmkit/clustering/k_means.rb +0 -140
  23. data/lib/svmkit/dataset.rb +0 -109
  24. data/lib/svmkit/decomposition/nmf.rb +0 -147
  25. data/lib/svmkit/decomposition/pca.rb +0 -150
  26. data/lib/svmkit/ensemble/ada_boost_classifier.rb +0 -198
  27. data/lib/svmkit/ensemble/ada_boost_regressor.rb +0 -180
  28. data/lib/svmkit/ensemble/random_forest_classifier.rb +0 -182
  29. data/lib/svmkit/ensemble/random_forest_regressor.rb +0 -143
  30. data/lib/svmkit/evaluation_measure/accuracy.rb +0 -30
  31. data/lib/svmkit/evaluation_measure/f_score.rb +0 -51
  32. data/lib/svmkit/evaluation_measure/log_loss.rb +0 -46
  33. data/lib/svmkit/evaluation_measure/mean_absolute_error.rb +0 -30
  34. data/lib/svmkit/evaluation_measure/mean_squared_error.rb +0 -30
  35. data/lib/svmkit/evaluation_measure/normalized_mutual_information.rb +0 -63
  36. data/lib/svmkit/evaluation_measure/precision.rb +0 -51
  37. data/lib/svmkit/evaluation_measure/precision_recall.rb +0 -91
  38. data/lib/svmkit/evaluation_measure/purity.rb +0 -41
  39. data/lib/svmkit/evaluation_measure/r2_score.rb +0 -44
  40. data/lib/svmkit/evaluation_measure/recall.rb +0 -51
  41. data/lib/svmkit/kernel_approximation/rbf.rb +0 -136
  42. data/lib/svmkit/kernel_machine/kernel_svc.rb +0 -194
  43. data/lib/svmkit/linear_model/lasso.rb +0 -138
  44. data/lib/svmkit/linear_model/linear_regression.rb +0 -112
  45. data/lib/svmkit/linear_model/logistic_regression.rb +0 -161
  46. data/lib/svmkit/linear_model/ridge.rb +0 -112
  47. data/lib/svmkit/linear_model/sgd_linear_estimator.rb +0 -89
  48. data/lib/svmkit/linear_model/svc.rb +0 -184
  49. data/lib/svmkit/linear_model/svr.rb +0 -123
  50. data/lib/svmkit/model_selection/cross_validation.rb +0 -121
  51. data/lib/svmkit/model_selection/grid_search_cv.rb +0 -247
  52. data/lib/svmkit/model_selection/k_fold.rb +0 -77
  53. data/lib/svmkit/model_selection/stratified_k_fold.rb +0 -95
  54. data/lib/svmkit/multiclass/one_vs_rest_classifier.rb +0 -101
  55. data/lib/svmkit/naive_bayes/naive_bayes.rb +0 -316
  56. data/lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb +0 -112
  57. data/lib/svmkit/nearest_neighbors/k_neighbors_regressor.rb +0 -94
  58. data/lib/svmkit/optimizer/nadam.rb +0 -90
  59. data/lib/svmkit/optimizer/rmsprop.rb +0 -69
  60. data/lib/svmkit/optimizer/sgd.rb +0 -65
  61. data/lib/svmkit/optimizer/yellow_fin.rb +0 -144
  62. data/lib/svmkit/pairwise_metric.rb +0 -91
  63. data/lib/svmkit/pipeline/pipeline.rb +0 -197
  64. data/lib/svmkit/polynomial_model/factorization_machine_classifier.rb +0 -262
  65. data/lib/svmkit/polynomial_model/factorization_machine_regressor.rb +0 -194
  66. data/lib/svmkit/preprocessing/l2_normalizer.rb +0 -63
  67. data/lib/svmkit/preprocessing/label_encoder.rb +0 -95
  68. data/lib/svmkit/preprocessing/min_max_scaler.rb +0 -93
  69. data/lib/svmkit/preprocessing/one_hot_encoder.rb +0 -99
  70. data/lib/svmkit/preprocessing/standard_scaler.rb +0 -87
  71. data/lib/svmkit/probabilistic_output.rb +0 -112
  72. data/lib/svmkit/tree/decision_tree_classifier.rb +0 -276
  73. data/lib/svmkit/tree/decision_tree_regressor.rb +0 -251
  74. data/lib/svmkit/tree/node.rb +0 -70
  75. data/lib/svmkit/utils.rb +0 -22
  76. data/lib/svmkit/validation.rb +0 -79
  77. data/lib/svmkit/values.rb +0 -13
  78. data/lib/svmkit/version.rb +0 -7
@@ -1,138 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'svmkit/validation'
4
- require 'svmkit/linear_model/sgd_linear_estimator'
5
- require 'svmkit/base/regressor'
6
-
7
- module SVMKit
8
- module LinearModel
9
- # Lasso is a class that implements Lasso Regression
10
- # with stochastic gradient descent (SGD) optimization.
11
- #
12
- # @example
13
- # estimator =
14
- # SVMKit::LinearModel::Lasso.new(reg_param: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
15
- # estimator.fit(training_samples, traininig_values)
16
- # results = estimator.predict(testing_samples)
17
- #
18
- # *Reference*
19
- # - S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
20
- # - L. Bottou, "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
21
- class Lasso < SGDLinearEstimator
22
- include Base::Regressor
23
- include Validation
24
-
25
- # Return the weight vector.
26
- # @return [Numo::DFloat] (shape: [n_outputs, n_features])
27
- attr_reader :weight_vec
28
-
29
- # Return the bias term (a.k.a. intercept).
30
- # @return [Numo::DFloat] (shape: [n_outputs])
31
- attr_reader :bias_term
32
-
33
- # Return the random generator for random sampling.
34
- # @return [Random]
35
- attr_reader :rng
36
-
37
- # Create a new Lasso regressor.
38
- #
39
- # @param reg_param [Float] The regularization parameter.
40
- # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
41
- # @param bias_scale [Float] The scale of the bias term.
42
- # @param max_iter [Integer] The maximum number of iterations.
43
- # @param batch_size [Integer] The size of the mini batches.
44
- # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
45
- # If nil is given, Nadam is used.
46
- # @param random_seed [Integer] The seed value using to initialize the random generator.
47
- def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
48
- check_params_float(reg_param: reg_param, bias_scale: bias_scale)
49
- check_params_integer(max_iter: max_iter, batch_size: batch_size)
50
- check_params_boolean(fit_bias: fit_bias)
51
- check_params_type_or_nil(Integer, random_seed: random_seed)
52
- check_params_positive(reg_param: reg_param, max_iter: max_iter, batch_size: batch_size)
53
- super
54
- end
55
-
56
- # Fit the model with given training data.
57
- #
58
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
59
- # @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
60
- # @return [Lasso] The learned regressor itself.
61
- def fit(x, y)
62
- check_sample_array(x)
63
- check_tvalue_array(y)
64
- check_sample_tvalue_size(x, y)
65
-
66
- n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
67
- n_features = x.shape[1]
68
-
69
- if n_outputs > 1
70
- @weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
71
- @bias_term = Numo::DFloat.zeros(n_outputs)
72
- n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
73
- else
74
- @weight_vec, @bias_term = partial_fit(x, y)
75
- end
76
-
77
- self
78
- end
79
-
80
- # Predict values for samples.
81
- #
82
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
83
- # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
84
- def predict(x)
85
- check_sample_array(x)
86
- x.dot(@weight_vec.transpose) + @bias_term
87
- end
88
-
89
- # Dump marshal data.
90
- # @return [Hash] The marshal data about Lasso.
91
- def marshal_dump
92
- { params: @params,
93
- weight_vec: @weight_vec,
94
- bias_term: @bias_term,
95
- rng: @rng }
96
- end
97
-
98
- # Load marshal data.
99
- # @return [nil]
100
- def marshal_load(obj)
101
- @params = obj[:params]
102
- @weight_vec = obj[:weight_vec]
103
- @bias_term = obj[:bias_term]
104
- @rng = obj[:rng]
105
- nil
106
- end
107
-
108
- private
109
-
110
- def partial_fit(x, y)
111
- n_features = @params[:fit_bias] ? x.shape[1] + 1 : x.shape[1]
112
- @left_weight = Numo::DFloat.zeros(n_features)
113
- @right_weight = Numo::DFloat.zeros(n_features)
114
- @left_optimizer = @params[:optimizer].dup
115
- @right_optimizer = @params[:optimizer].dup
116
- super
117
- end
118
-
119
- def calc_loss_gradient(x, y, weight)
120
- 2.0 * (x.dot(weight) - y)
121
- end
122
-
123
- def calc_new_weight(_optimizer, x, _weight, loss_gradient)
124
- @left_weight = round_weight(@left_optimizer.call(@left_weight, calc_weight_gradient(loss_gradient, x)))
125
- @right_weight = round_weight(@right_optimizer.call(@right_weight, calc_weight_gradient(-loss_gradient, x)))
126
- @left_weight - @right_weight
127
- end
128
-
129
- def calc_weight_gradient(loss_gradient, data)
130
- ((@params[:reg_param] + loss_gradient).expand_dims(1) * data).mean(0)
131
- end
132
-
133
- def round_weight(weight)
134
- 0.5 * (weight + weight.abs)
135
- end
136
- end
137
- end
138
- end
@@ -1,112 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'svmkit/validation'
4
- require 'svmkit/linear_model/sgd_linear_estimator'
5
- require 'svmkit/base/regressor'
6
-
7
- module SVMKit
8
- module LinearModel
9
- # LinearRegression is a class that implements ordinary least square linear regression
10
- # with mini-batch stochastic gradient descent optimization.
11
- #
12
- # @example
13
- # estimator =
14
- # SVMKit::LinearModel::LinearRegression.new(max_iter: 1000, batch_size: 20, random_seed: 1)
15
- # estimator.fit(training_samples, traininig_values)
16
- # results = estimator.predict(testing_samples)
17
- #
18
- class LinearRegression < SGDLinearEstimator
19
- include Base::Regressor
20
- include Validation
21
-
22
- # Return the weight vector.
23
- # @return [Numo::DFloat] (shape: [n_outputs, n_features])
24
- attr_reader :weight_vec
25
-
26
- # Return the bias term (a.k.a. intercept).
27
- # @return [Numo::DFloat] (shape: [n_outputs])
28
- attr_reader :bias_term
29
-
30
- # Return the random generator for random sampling.
31
- # @return [Random]
32
- attr_reader :rng
33
-
34
- # Create a new ordinary least square linear regressor.
35
- #
36
- # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
37
- # @param bias_scale [Float] The scale of the bias term.
38
- # @param max_iter [Integer] The maximum number of iterations.
39
- # @param batch_size [Integer] The size of the mini batches.
40
- # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
41
- # If nil is given, Nadam is used.
42
- # @param random_seed [Integer] The seed value using to initialize the random generator.
43
- def initialize(fit_bias: false, bias_scale: 1.0, max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
44
- check_params_float(bias_scale: bias_scale)
45
- check_params_integer(max_iter: max_iter, batch_size: batch_size)
46
- check_params_boolean(fit_bias: fit_bias)
47
- check_params_type_or_nil(Integer, random_seed: random_seed)
48
- check_params_positive(max_iter: max_iter, batch_size: batch_size)
49
- super(reg_param: 0.0, fit_bias: fit_bias, bias_scale: bias_scale,
50
- max_iter: max_iter, batch_size: batch_size, optimizer: optimizer, random_seed: random_seed)
51
- end
52
-
53
- # Fit the model with given training data.
54
- #
55
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
56
- # @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
57
- # @return [LinearRegression] The learned regressor itself.
58
- def fit(x, y)
59
- check_sample_array(x)
60
- check_tvalue_array(y)
61
- check_sample_tvalue_size(x, y)
62
-
63
- n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
64
- n_features = x.shape[1]
65
-
66
- if n_outputs > 1
67
- @weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
68
- @bias_term = Numo::DFloat.zeros(n_outputs)
69
- n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
70
- else
71
- @weight_vec, @bias_term = partial_fit(x, y)
72
- end
73
-
74
- self
75
- end
76
-
77
- # Predict values for samples.
78
- #
79
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
80
- # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
81
- def predict(x)
82
- check_sample_array(x)
83
- x.dot(@weight_vec.transpose) + @bias_term
84
- end
85
-
86
- # Dump marshal data.
87
- # @return [Hash] The marshal data about LinearRegression.
88
- def marshal_dump
89
- { params: @params,
90
- weight_vec: @weight_vec,
91
- bias_term: @bias_term,
92
- rng: @rng }
93
- end
94
-
95
- # Load marshal data.
96
- # @return [nil]
97
- def marshal_load(obj)
98
- @params = obj[:params]
99
- @weight_vec = obj[:weight_vec]
100
- @bias_term = obj[:bias_term]
101
- @rng = obj[:rng]
102
- nil
103
- end
104
-
105
- private
106
-
107
- def calc_loss_gradient(x, y, weight)
108
- 2.0 * (x.dot(weight) - y)
109
- end
110
- end
111
- end
112
- end
@@ -1,161 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'svmkit/validation'
4
- require 'svmkit/linear_model/sgd_linear_estimator'
5
- require 'svmkit/base/classifier'
6
-
7
- module SVMKit
8
- module LinearModel
9
- # LogisticRegression is a class that implements Logistic Regression
10
- # with mini-batch stochastic gradient descent optimization.
11
- # For multiclass classification problem, it uses one-vs-the-rest strategy.
12
- #
13
- # @example
14
- # estimator =
15
- # SVMKit::LinearModel::LogisticRegression.new(reg_param: 1.0, max_iter: 1000, batch_size: 20, random_seed: 1)
16
- # estimator.fit(training_samples, traininig_labels)
17
- # results = estimator.predict(testing_samples)
18
- #
19
- # *Reference*
20
- # - S. Shalev-Shwartz, Y. Singer, N. Srebro, and A. Cotter, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
21
- class LogisticRegression < SGDLinearEstimator
22
- include Base::Classifier
23
- include Validation
24
-
25
- # Return the weight vector for Logistic Regression.
26
- # @return [Numo::DFloat] (shape: [n_classes, n_features])
27
- attr_reader :weight_vec
28
-
29
- # Return the bias term (a.k.a. intercept) for Logistic Regression.
30
- # @return [Numo::DFloat] (shape: [n_classes])
31
- attr_reader :bias_term
32
-
33
- # Return the class labels.
34
- # @return [Numo::Int32] (shape: [n_classes])
35
- attr_reader :classes
36
-
37
- # Return the random generator for performing random sampling.
38
- # @return [Random]
39
- attr_reader :rng
40
-
41
- # Create a new classifier with Logisitc Regression by the SGD optimization.
42
- #
43
- # @param reg_param [Float] The regularization parameter.
44
- # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
45
- # @param bias_scale [Float] The scale of the bias term.
46
- # If fit_bias is true, the feature vector v becoms [v; bias_scale].
47
- # @param max_iter [Integer] The maximum number of iterations.
48
- # @param batch_size [Integer] The size of the mini batches.
49
- # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
50
- # If nil is given, Nadam is used.
51
- # @param random_seed [Integer] The seed value using to initialize the random generator.
52
- def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
53
- max_iter: 1000, batch_size: 20, optimizer: nil, random_seed: nil)
54
- check_params_float(reg_param: reg_param, bias_scale: bias_scale)
55
- check_params_integer(max_iter: max_iter, batch_size: batch_size)
56
- check_params_boolean(fit_bias: fit_bias)
57
- check_params_type_or_nil(Integer, random_seed: random_seed)
58
- check_params_positive(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
59
- super
60
- @classes = nil
61
- end
62
-
63
- # Fit the model with given training data.
64
- #
65
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
66
- # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
67
- # @return [LogisticRegression] The learned classifier itself.
68
- def fit(x, y)
69
- check_sample_array(x)
70
- check_label_array(y)
71
- check_sample_label_size(x, y)
72
-
73
- @classes = Numo::Int32[*y.to_a.uniq.sort]
74
- n_classes = @classes.size
75
- n_features = x.shape[1]
76
-
77
- if n_classes > 2
78
- @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
79
- @bias_term = Numo::DFloat.zeros(n_classes)
80
- n_classes.times do |n|
81
- bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
82
- @weight_vec[n, true], @bias_term[n] = partial_fit(x, bin_y)
83
- end
84
- else
85
- negative_label = y.to_a.uniq.min
86
- bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
87
- @weight_vec, @bias_term = partial_fit(x, bin_y)
88
- end
89
-
90
- self
91
- end
92
-
93
- # Calculate confidence scores for samples.
94
- #
95
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
96
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
97
- def decision_function(x)
98
- check_sample_array(x)
99
- x.dot(@weight_vec.transpose) + @bias_term
100
- end
101
-
102
- # Predict class labels for samples.
103
- #
104
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
105
- # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
106
- def predict(x)
107
- check_sample_array(x)
108
-
109
- return Numo::Int32.cast(predict_proba(x)[true, 1].ge(0.5)) * 2 - 1 if @classes.size <= 2
110
-
111
- n_samples, = x.shape
112
- decision_values = predict_proba(x)
113
- Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
114
- end
115
-
116
- # Predict probability for samples.
117
- #
118
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
119
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
120
- def predict_proba(x)
121
- check_sample_array(x)
122
-
123
- proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
124
- return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
125
-
126
- n_samples, = x.shape
127
- probs = Numo::DFloat.zeros(n_samples, 2)
128
- probs[true, 1] = proba
129
- probs[true, 0] = 1.0 - proba
130
- probs
131
- end
132
-
133
- # Dump marshal data.
134
- # @return [Hash] The marshal data about LogisticRegression.
135
- def marshal_dump
136
- { params: @params,
137
- weight_vec: @weight_vec,
138
- bias_term: @bias_term,
139
- classes: @classes,
140
- rng: @rng }
141
- end
142
-
143
- # Load marshal data.
144
- # @return [nil]
145
- def marshal_load(obj)
146
- @params = obj[:params]
147
- @weight_vec = obj[:weight_vec]
148
- @bias_term = obj[:bias_term]
149
- @classes = obj[:classes]
150
- @rng = obj[:rng]
151
- nil
152
- end
153
-
154
- private
155
-
156
- def calc_loss_gradient(x, y, weight)
157
- y / (Numo::NMath.exp(-y * x.dot(weight)) + 1.0) - y
158
- end
159
- end
160
- end
161
- end
@@ -1,112 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'svmkit/validation'
4
- require 'svmkit/linear_model/sgd_linear_estimator'
5
- require 'svmkit/base/regressor'
6
-
7
- module SVMKit
8
- module LinearModel
9
- # Ridge is a class that implements Ridge Regression
10
- # with mini-batch stochastic gradient descent optimization.
11
- #
12
- # @example
13
- # estimator =
14
- # SVMKit::LinearModel::Ridge.new(reg_param: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
15
- # estimator.fit(training_samples, traininig_values)
16
- # results = estimator.predict(testing_samples)
17
- #
18
- class Ridge < SGDLinearEstimator
19
- include Base::Regressor
20
- include Validation
21
-
22
- # Return the weight vector.
23
- # @return [Numo::DFloat] (shape: [n_outputs, n_features])
24
- attr_reader :weight_vec
25
-
26
- # Return the bias term (a.k.a. intercept).
27
- # @return [Numo::DFloat] (shape: [n_outputs])
28
- attr_reader :bias_term
29
-
30
- # Return the random generator for random sampling.
31
- # @return [Random]
32
- attr_reader :rng
33
-
34
- # Create a new Ridge regressor.
35
- #
36
- # @param reg_param [Float] The regularization parameter.
37
- # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
38
- # @param bias_scale [Float] The scale of the bias term.
39
- # @param max_iter [Integer] The maximum number of iterations.
40
- # @param batch_size [Integer] The size of the mini batches.
41
- # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
42
- # If nil is given, Nadam is used.
43
- # @param random_seed [Integer] The seed value using to initialize the random generator.
44
- def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
45
- check_params_float(reg_param: reg_param, bias_scale: bias_scale)
46
- check_params_integer(max_iter: max_iter, batch_size: batch_size)
47
- check_params_boolean(fit_bias: fit_bias)
48
- check_params_type_or_nil(Integer, random_seed: random_seed)
49
- check_params_positive(reg_param: reg_param, max_iter: max_iter, batch_size: batch_size)
50
- super
51
- end
52
-
53
- # Fit the model with given training data.
54
- #
55
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
56
- # @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
57
- # @return [Ridge] The learned regressor itself.
58
- def fit(x, y)
59
- check_sample_array(x)
60
- check_tvalue_array(y)
61
- check_sample_tvalue_size(x, y)
62
-
63
- n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
64
- n_features = x.shape[1]
65
-
66
- if n_outputs > 1
67
- @weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
68
- @bias_term = Numo::DFloat.zeros(n_outputs)
69
- n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
70
- else
71
- @weight_vec, @bias_term = partial_fit(x, y)
72
- end
73
-
74
- self
75
- end
76
-
77
- # Predict values for samples.
78
- #
79
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
80
- # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
81
- def predict(x)
82
- check_sample_array(x)
83
- x.dot(@weight_vec.transpose) + @bias_term
84
- end
85
-
86
- # Dump marshal data.
87
- # @return [Hash] The marshal data about Ridge.
88
- def marshal_dump
89
- { params: @params,
90
- weight_vec: @weight_vec,
91
- bias_term: @bias_term,
92
- rng: @rng }
93
- end
94
-
95
- # Load marshal data.
96
- # @return [nil]
97
- def marshal_load(obj)
98
- @params = obj[:params]
99
- @weight_vec = obj[:weight_vec]
100
- @bias_term = obj[:bias_term]
101
- @rng = obj[:rng]
102
- nil
103
- end
104
-
105
- private
106
-
107
- def calc_loss_gradient(x, y, weight)
108
- 2.0 * (x.dot(weight) - y)
109
- end
110
- end
111
- end
112
- end