rumale 0.23.3 → 0.24.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.txt +5 -1
  3. data/README.md +3 -288
  4. data/lib/rumale/version.rb +1 -1
  5. data/lib/rumale.rb +20 -131
  6. metadata +252 -150
  7. data/CHANGELOG.md +0 -643
  8. data/CODE_OF_CONDUCT.md +0 -74
  9. data/ext/rumale/extconf.rb +0 -37
  10. data/ext/rumale/rumaleext.c +0 -545
  11. data/ext/rumale/rumaleext.h +0 -12
  12. data/lib/rumale/base/base_estimator.rb +0 -49
  13. data/lib/rumale/base/classifier.rb +0 -36
  14. data/lib/rumale/base/cluster_analyzer.rb +0 -31
  15. data/lib/rumale/base/evaluator.rb +0 -17
  16. data/lib/rumale/base/regressor.rb +0 -36
  17. data/lib/rumale/base/splitter.rb +0 -21
  18. data/lib/rumale/base/transformer.rb +0 -22
  19. data/lib/rumale/clustering/dbscan.rb +0 -123
  20. data/lib/rumale/clustering/gaussian_mixture.rb +0 -218
  21. data/lib/rumale/clustering/hdbscan.rb +0 -291
  22. data/lib/rumale/clustering/k_means.rb +0 -122
  23. data/lib/rumale/clustering/k_medoids.rb +0 -141
  24. data/lib/rumale/clustering/mini_batch_k_means.rb +0 -139
  25. data/lib/rumale/clustering/power_iteration.rb +0 -127
  26. data/lib/rumale/clustering/single_linkage.rb +0 -203
  27. data/lib/rumale/clustering/snn.rb +0 -76
  28. data/lib/rumale/clustering/spectral_clustering.rb +0 -115
  29. data/lib/rumale/dataset.rb +0 -246
  30. data/lib/rumale/decomposition/factor_analysis.rb +0 -150
  31. data/lib/rumale/decomposition/fast_ica.rb +0 -188
  32. data/lib/rumale/decomposition/nmf.rb +0 -124
  33. data/lib/rumale/decomposition/pca.rb +0 -159
  34. data/lib/rumale/ensemble/ada_boost_classifier.rb +0 -179
  35. data/lib/rumale/ensemble/ada_boost_regressor.rb +0 -160
  36. data/lib/rumale/ensemble/extra_trees_classifier.rb +0 -139
  37. data/lib/rumale/ensemble/extra_trees_regressor.rb +0 -125
  38. data/lib/rumale/ensemble/gradient_boosting_classifier.rb +0 -306
  39. data/lib/rumale/ensemble/gradient_boosting_regressor.rb +0 -237
  40. data/lib/rumale/ensemble/random_forest_classifier.rb +0 -189
  41. data/lib/rumale/ensemble/random_forest_regressor.rb +0 -153
  42. data/lib/rumale/ensemble/stacking_classifier.rb +0 -215
  43. data/lib/rumale/ensemble/stacking_regressor.rb +0 -163
  44. data/lib/rumale/ensemble/voting_classifier.rb +0 -126
  45. data/lib/rumale/ensemble/voting_regressor.rb +0 -82
  46. data/lib/rumale/evaluation_measure/accuracy.rb +0 -29
  47. data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +0 -74
  48. data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +0 -56
  49. data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +0 -53
  50. data/lib/rumale/evaluation_measure/explained_variance_score.rb +0 -39
  51. data/lib/rumale/evaluation_measure/f_score.rb +0 -50
  52. data/lib/rumale/evaluation_measure/function.rb +0 -147
  53. data/lib/rumale/evaluation_measure/log_loss.rb +0 -45
  54. data/lib/rumale/evaluation_measure/mean_absolute_error.rb +0 -29
  55. data/lib/rumale/evaluation_measure/mean_squared_error.rb +0 -29
  56. data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +0 -29
  57. data/lib/rumale/evaluation_measure/median_absolute_error.rb +0 -30
  58. data/lib/rumale/evaluation_measure/mutual_information.rb +0 -49
  59. data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +0 -53
  60. data/lib/rumale/evaluation_measure/precision.rb +0 -50
  61. data/lib/rumale/evaluation_measure/precision_recall.rb +0 -96
  62. data/lib/rumale/evaluation_measure/purity.rb +0 -40
  63. data/lib/rumale/evaluation_measure/r2_score.rb +0 -43
  64. data/lib/rumale/evaluation_measure/recall.rb +0 -50
  65. data/lib/rumale/evaluation_measure/roc_auc.rb +0 -130
  66. data/lib/rumale/evaluation_measure/silhouette_score.rb +0 -82
  67. data/lib/rumale/feature_extraction/feature_hasher.rb +0 -110
  68. data/lib/rumale/feature_extraction/hash_vectorizer.rb +0 -155
  69. data/lib/rumale/feature_extraction/tfidf_transformer.rb +0 -113
  70. data/lib/rumale/kernel_approximation/nystroem.rb +0 -126
  71. data/lib/rumale/kernel_approximation/rbf.rb +0 -102
  72. data/lib/rumale/kernel_machine/kernel_fda.rb +0 -120
  73. data/lib/rumale/kernel_machine/kernel_pca.rb +0 -97
  74. data/lib/rumale/kernel_machine/kernel_ridge.rb +0 -82
  75. data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +0 -92
  76. data/lib/rumale/kernel_machine/kernel_svc.rb +0 -193
  77. data/lib/rumale/linear_model/base_sgd.rb +0 -285
  78. data/lib/rumale/linear_model/elastic_net.rb +0 -119
  79. data/lib/rumale/linear_model/lasso.rb +0 -115
  80. data/lib/rumale/linear_model/linear_regression.rb +0 -201
  81. data/lib/rumale/linear_model/logistic_regression.rb +0 -275
  82. data/lib/rumale/linear_model/nnls.rb +0 -137
  83. data/lib/rumale/linear_model/ridge.rb +0 -209
  84. data/lib/rumale/linear_model/svc.rb +0 -213
  85. data/lib/rumale/linear_model/svr.rb +0 -132
  86. data/lib/rumale/manifold/mds.rb +0 -155
  87. data/lib/rumale/manifold/tsne.rb +0 -222
  88. data/lib/rumale/metric_learning/fisher_discriminant_analysis.rb +0 -113
  89. data/lib/rumale/metric_learning/mlkr.rb +0 -161
  90. data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +0 -167
  91. data/lib/rumale/model_selection/cross_validation.rb +0 -125
  92. data/lib/rumale/model_selection/function.rb +0 -42
  93. data/lib/rumale/model_selection/grid_search_cv.rb +0 -225
  94. data/lib/rumale/model_selection/group_k_fold.rb +0 -93
  95. data/lib/rumale/model_selection/group_shuffle_split.rb +0 -115
  96. data/lib/rumale/model_selection/k_fold.rb +0 -81
  97. data/lib/rumale/model_selection/shuffle_split.rb +0 -90
  98. data/lib/rumale/model_selection/stratified_k_fold.rb +0 -99
  99. data/lib/rumale/model_selection/stratified_shuffle_split.rb +0 -118
  100. data/lib/rumale/model_selection/time_series_split.rb +0 -91
  101. data/lib/rumale/multiclass/one_vs_rest_classifier.rb +0 -83
  102. data/lib/rumale/naive_bayes/base_naive_bayes.rb +0 -47
  103. data/lib/rumale/naive_bayes/bernoulli_nb.rb +0 -82
  104. data/lib/rumale/naive_bayes/complement_nb.rb +0 -85
  105. data/lib/rumale/naive_bayes/gaussian_nb.rb +0 -69
  106. data/lib/rumale/naive_bayes/multinomial_nb.rb +0 -74
  107. data/lib/rumale/naive_bayes/negation_nb.rb +0 -71
  108. data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +0 -133
  109. data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +0 -108
  110. data/lib/rumale/nearest_neighbors/vp_tree.rb +0 -132
  111. data/lib/rumale/neural_network/adam.rb +0 -56
  112. data/lib/rumale/neural_network/base_mlp.rb +0 -248
  113. data/lib/rumale/neural_network/mlp_classifier.rb +0 -120
  114. data/lib/rumale/neural_network/mlp_regressor.rb +0 -90
  115. data/lib/rumale/pairwise_metric.rb +0 -152
  116. data/lib/rumale/pipeline/feature_union.rb +0 -69
  117. data/lib/rumale/pipeline/pipeline.rb +0 -175
  118. data/lib/rumale/preprocessing/bin_discretizer.rb +0 -93
  119. data/lib/rumale/preprocessing/binarizer.rb +0 -60
  120. data/lib/rumale/preprocessing/kernel_calculator.rb +0 -92
  121. data/lib/rumale/preprocessing/l1_normalizer.rb +0 -62
  122. data/lib/rumale/preprocessing/l2_normalizer.rb +0 -63
  123. data/lib/rumale/preprocessing/label_binarizer.rb +0 -89
  124. data/lib/rumale/preprocessing/label_encoder.rb +0 -79
  125. data/lib/rumale/preprocessing/max_abs_scaler.rb +0 -61
  126. data/lib/rumale/preprocessing/max_normalizer.rb +0 -62
  127. data/lib/rumale/preprocessing/min_max_scaler.rb +0 -76
  128. data/lib/rumale/preprocessing/one_hot_encoder.rb +0 -100
  129. data/lib/rumale/preprocessing/ordinal_encoder.rb +0 -109
  130. data/lib/rumale/preprocessing/polynomial_features.rb +0 -109
  131. data/lib/rumale/preprocessing/standard_scaler.rb +0 -71
  132. data/lib/rumale/probabilistic_output.rb +0 -114
  133. data/lib/rumale/tree/base_decision_tree.rb +0 -150
  134. data/lib/rumale/tree/decision_tree_classifier.rb +0 -150
  135. data/lib/rumale/tree/decision_tree_regressor.rb +0 -116
  136. data/lib/rumale/tree/extra_tree_classifier.rb +0 -107
  137. data/lib/rumale/tree/extra_tree_regressor.rb +0 -94
  138. data/lib/rumale/tree/gradient_tree_regressor.rb +0 -202
  139. data/lib/rumale/tree/node.rb +0 -39
  140. data/lib/rumale/utils.rb +0 -42
  141. data/lib/rumale/validation.rb +0 -128
  142. data/lib/rumale/values.rb +0 -13
@@ -1,215 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/classifier'
5
- require 'rumale/preprocessing/label_encoder'
6
-
7
- module Rumale
8
- module Ensemble
9
- # StackingClassifier is a class that implements classifier with stacking method.
10
- #
11
- # @example
12
- # estimators = {
13
- # lgr: Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-2, random_seed: 1),
14
- # mlp: Rumale::NeuralNetwork::MLPClassifier.new(hidden_units: [256], random_seed: 1),
15
- # rnd: Rumale::Ensemble::RandomForestClassifier.new(random_seed: 1)
16
- # }
17
- # meta_estimator = Rumale::LinearModel::LogisticRegression.new(random_seed: 1)
18
- # classifier = Rumale::Ensemble::StackedClassifier.new(
19
- # estimators: estimators, meta_estimator: meta_estimator, random_seed: 1
20
- # )
21
- # classifier.fit(training_samples, training_labels)
22
- # results = classifier.predict(testing_samples)
23
- #
24
- # *Reference*
25
- # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
26
- class StackingClassifier
27
- include Base::BaseEstimator
28
- include Base::Classifier
29
-
30
- # Return the base classifiers.
31
- # @return [Hash<Symbol,Classifier>]
32
- attr_reader :estimators
33
-
34
- # Return the meta classifier.
35
- # @return [Classifier]
36
- attr_reader :meta_estimator
37
-
38
- # Return the class labels.
39
- # @return [Numo::Int32] (size: n_classes)
40
- attr_reader :classes
41
-
42
- # Return the method used by each base classifier.
43
- # @return [Hash<Symbol,Symbol>]
44
- attr_reader :stack_method
45
-
46
- # Create a new classifier with stacking method.
47
- #
48
- # @param estimators [Hash<Symbol,Classifier>] The base classifiers for extracting meta features.
49
- # @param meta_estimator [Classifier/Nil] The meta classifier that predicts class label.
50
- # If nil is given, LogisticRegression is used.
51
- # @param n_splits [Integer] The number of folds for cross validation with stratified k-fold on meta feature extraction in training phase.
52
- # @param shuffle [Boolean] The flag indicating whether to shuffle the dataset on cross validation.
53
- # @param stack_method [String] The method name of base classifier for using meta feature extraction.
54
- # If 'auto' is given, it searches the callable method in the order 'predict_proba', 'decision_function', and 'predict'
55
- # on each classifier.
56
- # @param passthrough [Boolean] The flag indicating whether to concatenate the original features and meta features when training the meta classifier.
57
- # @param random_seed [Integer/Nil] The seed value using to initialize the random generator on cross validation.
58
- def initialize(estimators:, meta_estimator: nil, n_splits: 5, shuffle: true, stack_method: 'auto', passthrough: false, random_seed: nil)
59
- check_params_type(Hash, estimators: estimators)
60
- check_params_numeric(n_splits: n_splits)
61
- check_params_string(stack_method: stack_method)
62
- check_params_boolean(shuffle: shuffle, passthrough: passthrough)
63
- check_params_numeric_or_nil(random_seed: random_seed)
64
- @estimators = estimators
65
- @meta_estimator = meta_estimator || Rumale::LinearModel::LogisticRegression.new
66
- @classes = nil
67
- @stack_method = nil
68
- @output_size = nil
69
- @params = {}
70
- @params[:n_splits] = n_splits
71
- @params[:shuffle] = shuffle
72
- @params[:stack_method] = stack_method
73
- @params[:passthrough] = passthrough
74
- @params[:random_seed] = random_seed || srand
75
- end
76
-
77
- # Fit the model with given training data.
78
- #
79
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
80
- # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
81
- # @return [StackedClassifier] The learned classifier itself.
82
- def fit(x, y)
83
- x = check_convert_sample_array(x)
84
- y = check_convert_label_array(y)
85
- check_sample_label_size(x, y)
86
-
87
- n_samples, n_features = x.shape
88
-
89
- @encoder = Rumale::Preprocessing::LabelEncoder.new
90
- y_encoded = @encoder.fit_transform(y)
91
- @classes = Numo::NArray[*@encoder.classes]
92
-
93
- # training base classifiers with all training data.
94
- @estimators.each_key { |name| @estimators[name].fit(x, y_encoded) }
95
-
96
- # detecting feature extraction method and its size of output for each base classifier.
97
- @stack_method = detect_stack_method
98
- @output_size = detect_output_size(n_features)
99
-
100
- # extracting meta features with base classifiers.
101
- n_components = @output_size.values.inject(:+)
102
- z = Numo::DFloat.zeros(n_samples, n_components)
103
-
104
- kf = Rumale::ModelSelection::StratifiedKFold.new(
105
- n_splits: @params[:n_splits], shuffle: @params[:shuffle], random_seed: @params[:random_seed]
106
- )
107
-
108
- kf.split(x, y_encoded).each do |train_ids, valid_ids|
109
- x_train = x[train_ids, true]
110
- y_train = y_encoded[train_ids]
111
- x_valid = x[valid_ids, true]
112
- f_start = 0
113
- @estimators.each_key do |name|
114
- est_fold = Marshal.load(Marshal.dump(@estimators[name]))
115
- f_last = f_start + @output_size[name]
116
- f_position = @output_size[name] == 1 ? f_start : f_start...f_last
117
- z[valid_ids, f_position] = est_fold.fit(x_train, y_train).public_send(@stack_method[name], x_valid)
118
- f_start = f_last
119
- end
120
- end
121
-
122
- # concatenating original features.
123
- z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
124
-
125
- # training meta classifier.
126
- @meta_estimator.fit(z, y_encoded)
127
-
128
- self
129
- end
130
-
131
- # Calculate confidence scores for samples.
132
- #
133
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
134
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
135
- def decision_function(x)
136
- x = check_convert_sample_array(x)
137
- z = transform(x)
138
- @meta_estimator.decision_function(z)
139
- end
140
-
141
- # Predict class labels for samples.
142
- #
143
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
144
- # @return [Numo::Int32] (shape: [n_samples]) The predicted class label per sample.
145
- def predict(x)
146
- x = check_convert_sample_array(x)
147
- z = transform(x)
148
- Numo::Int32.cast(@encoder.inverse_transform(@meta_estimator.predict(z)))
149
- end
150
-
151
- # Predict probability for samples.
152
- #
153
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probabilities.
154
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The predicted probability of each class per sample.
155
- def predict_proba(x)
156
- x = check_convert_sample_array(x)
157
- z = transform(x)
158
- @meta_estimator.predict_proba(z)
159
- end
160
-
161
- # Transform the given data with the learned model.
162
- #
163
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed with the learned model.
164
- # @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for samples.
165
- def transform(x)
166
- x = check_convert_sample_array(x)
167
- n_samples = x.shape[0]
168
- n_components = @output_size.values.inject(:+)
169
- z = Numo::DFloat.zeros(n_samples, n_components)
170
- f_start = 0
171
- @estimators.each_key do |name|
172
- f_last = f_start + @output_size[name]
173
- f_position = @output_size[name] == 1 ? f_start : f_start...f_last
174
- z[true, f_position] = @estimators[name].public_send(@stack_method[name], x)
175
- f_start = f_last
176
- end
177
- z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
178
- z
179
- end
180
-
181
- # Fit the model with training data, and then transform them with the learned model.
182
- #
183
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
184
- # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
185
- # @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for training data.
186
- def fit_transform(x, y)
187
- x = check_convert_sample_array(x)
188
- y = check_convert_label_array(y)
189
- fit(x, y).transform(x)
190
- end
191
-
192
- private
193
-
194
- STACK_METHODS = %i[predict_proba decision_function predict].freeze
195
-
196
- private_constant :STACK_METHODS
197
-
198
- def detect_stack_method
199
- if @params[:stack_method] == 'auto'
200
- @estimators.each_key.with_object({}) { |name, obj| obj[name] = STACK_METHODS.detect { |m| @estimators[name].respond_to?(m) } }
201
- else
202
- @estimators.each_key.with_object({}) { |name, obj| obj[name] = @params[:stack_method].to_sym }
203
- end
204
- end
205
-
206
- def detect_output_size(n_features)
207
- x_dummy = Numo::DFloat.new(2, n_features).rand
208
- @estimators.each_key.with_object({}) do |name, obj|
209
- output_dummy = @estimators[name].public_send(@stack_method[name], x_dummy)
210
- obj[name] = output_dummy.ndim == 1 ? 1 : output_dummy.shape[1]
211
- end
212
- end
213
- end
214
- end
215
- end
@@ -1,163 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/regressor'
5
-
6
- module Rumale
7
- module Ensemble
8
- # StackingRegressor is a class that implements regressor with stacking method.
9
- #
10
- # @example
11
- # estimators = {
12
- # las: Rumale::LinearModel::Lasso.new(reg_param: 1e-2, random_seed: 1),
13
- # mlp: Rumale::NeuralNetwork::MLPRegressor.new(hidden_units: [256], random_seed: 1),
14
- # rnd: Rumale::Ensemble::RandomForestRegressor.new(random_seed: 1)
15
- # }
16
- # meta_estimator = Rumale::LinearModel::Ridge.new(random_seed: 1)
17
- # regressor = Rumale::Ensemble::StackedRegressor.new(
18
- # estimators: estimators, meta_estimator: meta_estimator, random_seed: 1
19
- # )
20
- # regressor.fit(training_samples, training_values)
21
- # results = regressor.predict(testing_samples)
22
- #
23
- # *Reference*
24
- # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
25
- class StackingRegressor
26
- include Base::BaseEstimator
27
- include Base::Regressor
28
-
29
- # Return the base regressors.
30
- # @return [Hash<Symbol,Regressor>]
31
- attr_reader :estimators
32
-
33
- # Return the meta regressor.
34
- # @return [Regressor]
35
- attr_reader :meta_estimator
36
-
37
- # Create a new regressor with stacking method.
38
- #
39
- # @param estimators [Hash<Symbol,Regressor>] The base regressors for extracting meta features.
40
- # @param meta_estimator [Regressor/Nil] The meta regressor that predicts values.
41
- # If nil is given, Ridge is used.
42
- # @param n_splits [Integer] The number of folds for cross validation with k-fold on meta feature extraction in training phase.
43
- # @param shuffle [Boolean] The flag indicating whether to shuffle the dataset on cross validation.
44
- # @param passthrough [Boolean] The flag indicating whether to concatenate the original features and meta features when training the meta regressor.
45
- # @param random_seed [Integer/Nil] The seed value using to initialize the random generator on cross validation.
46
- def initialize(estimators:, meta_estimator: nil, n_splits: 5, shuffle: true, passthrough: false, random_seed: nil)
47
- check_params_type(Hash, estimators: estimators)
48
- check_params_numeric(n_splits: n_splits)
49
- check_params_boolean(shuffle: shuffle, passthrough: passthrough)
50
- check_params_numeric_or_nil(random_seed: random_seed)
51
- @estimators = estimators
52
- @meta_estimator = meta_estimator || Rumale::LinearModel::Ridge.new
53
- @output_size = nil
54
- @params = {}
55
- @params[:n_splits] = n_splits
56
- @params[:shuffle] = shuffle
57
- @params[:passthrough] = passthrough
58
- @params[:random_seed] = random_seed || srand
59
- end
60
-
61
- # Fit the model with given training data.
62
- #
63
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
64
- # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target variables to be used for fitting the model.
65
- # @return [StackedRegressor] The learned regressor itself.
66
- def fit(x, y)
67
- x = check_convert_sample_array(x)
68
- y = check_convert_tvalue_array(y)
69
- check_sample_tvalue_size(x, y)
70
-
71
- n_samples, n_features = x.shape
72
- n_outputs = y.ndim == 1 ? 1 : y.shape[1]
73
-
74
- # training base regressors with all training data.
75
- @estimators.each_key { |name| @estimators[name].fit(x, y) }
76
-
77
- # detecting size of output for each base regressor.
78
- @output_size = detect_output_size(n_features)
79
-
80
- # extracting meta features with base regressors.
81
- n_components = @output_size.values.inject(:+)
82
- z = Numo::DFloat.zeros(n_samples, n_components)
83
-
84
- kf = Rumale::ModelSelection::KFold.new(
85
- n_splits: @params[:n_splits], shuffle: @params[:shuffle], random_seed: @params[:random_seed]
86
- )
87
-
88
- kf.split(x, y).each do |train_ids, valid_ids|
89
- x_train = x[train_ids, true]
90
- y_train = n_outputs == 1 ? y[train_ids] : y[train_ids, true]
91
- x_valid = x[valid_ids, true]
92
- f_start = 0
93
- @estimators.each_key do |name|
94
- est_fold = Marshal.load(Marshal.dump(@estimators[name]))
95
- f_last = f_start + @output_size[name]
96
- f_position = @output_size[name] == 1 ? f_start : f_start...f_last
97
- z[valid_ids, f_position] = est_fold.fit(x_train, y_train).predict(x_valid)
98
- f_start = f_last
99
- end
100
- end
101
-
102
- # concatenating original features.
103
- z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
104
-
105
- # training meta regressor.
106
- @meta_estimator.fit(z, y)
107
-
108
- self
109
- end
110
-
111
- # Predict values for samples.
112
- #
113
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
114
- # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) The predicted values per sample.
115
- def predict(x)
116
- x = check_convert_sample_array(x)
117
- z = transform(x)
118
- @meta_estimator.predict(z)
119
- end
120
-
121
- # Transform the given data with the learned model.
122
- #
123
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed with the learned model.
124
- # @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for samples.
125
- def transform(x)
126
- x = check_convert_sample_array(x)
127
- n_samples = x.shape[0]
128
- n_components = @output_size.values.inject(:+)
129
- z = Numo::DFloat.zeros(n_samples, n_components)
130
- f_start = 0
131
- @estimators.each_key do |name|
132
- f_last = f_start + @output_size[name]
133
- f_position = @output_size[name] == 1 ? f_start : f_start...f_last
134
- z[true, f_position] = @estimators[name].predict(x)
135
- f_start = f_last
136
- end
137
- z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
138
- z
139
- end
140
-
141
- # Fit the model with training data, and then transform them with the learned model.
142
- #
143
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
144
- # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target variables to be used for fitting the model.
145
- # @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for training data.
146
- def fit_transform(x, y)
147
- x = check_convert_sample_array(x)
148
- y = check_convert_tvalue_array(y)
149
- fit(x, y).transform(x)
150
- end
151
-
152
- private
153
-
154
- def detect_output_size(n_features)
155
- x_dummy = Numo::DFloat.new(2, n_features).rand
156
- @estimators.each_key.with_object({}) do |name, obj|
157
- output_dummy = @estimators[name].predict(x_dummy)
158
- obj[name] = output_dummy.ndim == 1 ? 1 : output_dummy.shape[1]
159
- end
160
- end
161
- end
162
- end
163
- end
@@ -1,126 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/classifier'
5
- require 'rumale/preprocessing/label_encoder'
6
-
7
- module Rumale
8
- module Ensemble
9
- # VotingClassifier is a class that implements classifier with voting ensemble method.
10
- #
11
- # @example
12
- # estimators = {
13
- # lgr: Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-2, random_seed: 1),
14
- # mlp: Rumale::NeuralNetwork::MLPClassifier.new(hidden_units: [256], random_seed: 1),
15
- # rnd: Rumale::Ensemble::RandomForestClassifier.new(random_seed: 1)
16
- # }
17
- # weights = { lgr: 0.2, mlp: 0.3, rnd: 0.5 }
18
- #
19
- # classifier = Rumale::Ensemble::VotingClassifier.new(estimators: estimators, weights: weights, voting: 'soft')
20
- # classifier.fit(x_train, y_train)
21
- # results = classifier.predict(x_test)
22
- #
23
- # *Reference*
24
- # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
25
- class VotingClassifier
26
- include Base::BaseEstimator
27
- include Base::Classifier
28
-
29
- # Return the sub-classifiers that voted.
30
- # @return [Hash<Symbol,Classifier>]
31
- attr_reader :estimators
32
-
33
- # Return the class labels.
34
- # @return [Numo::Int32] (size: n_classes)
35
- attr_reader :classes
36
-
37
- # Create a new ensembled classifier with voting rule.
38
- #
39
- # @param estimators [Hash<Symbol,Classifier>] The sub-classifiers to vote.
40
- # @param weights [Hash<Symbol,Float>] The weight value for each classifier.
41
- # @param voting [String] The voting rule for the predicted results of each classifier.
42
- # If 'hard' is given, the ensembled classifier predicts the class label by majority vote.
43
- # If 'soft' is given, the ensembled classifier uses the weighted average of predicted probabilities for the prediction.
44
- def initialize(estimators:, weights: nil, voting: 'hard')
45
- check_params_type(Hash, estimators: estimators)
46
- check_params_type_or_nil(Hash, weights: weights)
47
- check_params_string(voting: voting)
48
- @estimators = estimators
49
- @classes = nil
50
- @params = {}
51
- @params[:weights] = weights || estimators.each_key.with_object({}) { |name, w| w[name] = 1.0 }
52
- @params[:voting] = voting
53
- end
54
-
55
- # Fit the model with given training data.
56
- #
57
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
58
- # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
59
- # @return [VotingClassifier] The learned classifier itself.
60
- def fit(x, y)
61
- x = check_convert_sample_array(x)
62
- y = check_convert_label_array(y)
63
- check_sample_label_size(x, y)
64
-
65
- @encoder = Rumale::Preprocessing::LabelEncoder.new
66
- y_encoded = @encoder.fit_transform(y)
67
- @classes = Numo::NArray[*@encoder.classes]
68
- @estimators.each_key { |name| @estimators[name].fit(x, y_encoded) }
69
-
70
- self
71
- end
72
-
73
- # Calculate confidence scores for samples.
74
- #
75
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
76
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
77
- def decision_function(x)
78
- x = check_convert_sample_array(x)
79
- return predict_proba(x) if soft_voting?
80
-
81
- n_samples = x.shape[0]
82
- n_classes = @classes.size
83
- z = Numo::DFloat.zeros(n_samples, n_classes)
84
- @estimators.each do |name, estimator|
85
- estimator.predict(x).to_a.each_with_index { |c, i| z[i, c] += @params[:weights][name] }
86
- end
87
- z
88
- end
89
-
90
- # Predict class labels for samples.
91
- #
92
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
93
- # @return [Numo::Int32] (shape: [n_samples]) The predicted class label per sample.
94
- def predict(x)
95
- x = check_convert_sample_array(x)
96
- n_samples = x.shape[0]
97
- n_classes = @classes.size
98
- z = decision_function(x)
99
- predicted = z.max_index(axis: 1) - Numo::Int32.new(n_samples).seq * n_classes
100
- Numo::Int32.cast(@encoder.inverse_transform(predicted))
101
- end
102
-
103
- # Predict probability for samples.
104
- #
105
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probabilities.
106
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
107
- def predict_proba(x)
108
- x = check_convert_sample_array(x)
109
- n_samples = x.shape[0]
110
- n_classes = @classes.size
111
- z = Numo::DFloat.zeros(n_samples, n_classes)
112
- sum_weight = @params[:weights].each_value.inject(&:+)
113
- @estimators.each do |name, estimator|
114
- z += @params[:weights][name] * estimator.predict_proba(x)
115
- end
116
- z /= sum_weight
117
- end
118
-
119
- private
120
-
121
- def soft_voting?
122
- @params[:voting] == 'soft'
123
- end
124
- end
125
- end
126
- end
@@ -1,82 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/regressor'
5
-
6
- module Rumale
7
- module Ensemble
8
- # VotingRegressor is a class that implements regressor with voting ensemble method.
9
- #
10
- # @example
11
- # estimators = {
12
- # rdg: Rumale::LinearModel::Ridge.new(reg_param: 1e-2, random_seed: 1),
13
- # mlp: Rumale::NeuralNetwork::MLPRegressor.new(hidden_units: [256], random_seed: 1),
14
- # rnd: Rumale::Ensemble::RandomForestRegressor.new(random_seed: 1)
15
- # }
16
- # weights = { rdg: 0.2, mlp: 0.3, rnd: 0.5 }
17
- #
18
- # regressor = Rumale::Ensemble::VotingRegressor.new(estimators: estimators, weights: weights, voting: 'soft')
19
- # regressor.fit(x_train, y_train)
20
- # results = regressor.predict(x_test)
21
- #
22
- # *Reference*
23
- # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
24
- class VotingRegressor
25
- include Base::BaseEstimator
26
- include Base::Regressor
27
-
28
- # Return the sub-regressors that voted.
29
- # @return [Hash<Symbol,Regressor>]
30
- attr_reader :estimators
31
-
32
- # Create a new ensembled regressor with voting rule.
33
- #
34
- # @param estimators [Hash<Symbol,Regressor>] The sub-regressors to vote.
35
- # @param weights [Hash<Symbol,Float>] The weight value for each regressor.
36
- def initialize(estimators:, weights: nil)
37
- check_params_type(Hash, estimators: estimators)
38
- check_params_type_or_nil(Hash, weights: weights)
39
- @estimators = estimators
40
- @n_outputs = nil
41
- @params = {}
42
- @params[:weights] = weights || estimators.each_key.with_object({}) { |name, w| w[name] = 1.0 }
43
- end
44
-
45
- # Fit the model with given training data.
46
- #
47
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
48
- # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
49
- # @return [VotingRegressor] The learned regressor itself.
50
- def fit(x, y)
51
- x = check_convert_sample_array(x)
52
- y = check_convert_tvalue_array(y)
53
- check_sample_tvalue_size(x, y)
54
-
55
- @n_outputs = y.ndim > 1 ? y.shape[1] : 1
56
- @estimators.each_key { |name| @estimators[name].fit(x, y) }
57
-
58
- self
59
- end
60
-
61
- # Predict values for samples.
62
- #
63
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
64
- # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted value per sample.
65
- def predict(x)
66
- x = check_convert_sample_array(x)
67
- z = single_target? ? Numo::DFloat.zeros(x.shape[0]) : Numo::DFloat.zeros(x.shape[0], @n_outputs)
68
- sum_weight = @params[:weights].each_value.inject(&:+)
69
- @estimators.each do |name, estimator|
70
- z += @params[:weights][name] * estimator.predict(x)
71
- end
72
- z / sum_weight
73
- end
74
-
75
- private
76
-
77
- def single_target?
78
- @n_outputs == 1
79
- end
80
- end
81
- end
82
- end
@@ -1,29 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/evaluator'
4
-
5
- module Rumale
6
- # This module consists of the classes for model evaluation.
7
- module EvaluationMeasure
8
- # Accuracy is a class that calculates the accuracy of classifier from the predicted labels.
9
- #
10
- # @example
11
- # evaluator = Rumale::EvaluationMeasure::Accuracy.new
12
- # puts evaluator.score(ground_truth, predicted)
13
- class Accuracy
14
- include Base::Evaluator
15
-
16
- # Calculate mean accuracy.
17
- #
18
- # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
19
- # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
20
- # @return [Float] Mean accuracy
21
- def score(y_true, y_pred)
22
- y_true = check_convert_label_array(y_true)
23
- y_pred = check_convert_label_array(y_pred)
24
-
25
- (y_true.to_a.map.with_index { |label, n| label == y_pred[n] ? 1 : 0 }).inject(:+) / y_true.size.to_f
26
- end
27
- end
28
- end
29
- end
@@ -1,74 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/evaluator'
4
-
5
- module Rumale
6
- module EvaluationMeasure
7
- # AdjustedRandScore is a class that calculates the adjusted rand index.
8
- #
9
- # @example
10
- # evaluator = Rumale::EvaluationMeasure::AdjustedRandScore.new
11
- # puts evaluator.score(ground_truth, predicted)
12
- #
13
- # *Reference*
14
- # - Vinh, N X., Epps, J., and Bailey, J., "Information Theoretic Measures for Clusterings Comparison: Variants, Properties, Normalization and Correction for Chance", J. Machine Learnig Research, Vol. 11, pp.2837--2854, 2010.
15
- class AdjustedRandScore
16
- include Base::Evaluator
17
-
18
- # Calculate adjusted rand index.
19
- #
20
- # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
21
- # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted cluster labels.
22
- # @return [Float] Adjusted rand index.
23
- def score(y_true, y_pred)
24
- y_true = check_convert_label_array(y_true)
25
- y_pred = check_convert_label_array(y_pred)
26
-
27
- # initiazlie some variables.
28
- n_samples = y_pred.size
29
- n_classes = y_true.to_a.uniq.size
30
- n_clusters = y_pred.to_a.uniq.size
31
-
32
- # check special cases.
33
- return 1.0 if special_cases?(n_samples, n_classes, n_clusters)
34
-
35
- # calculate adjusted rand index.
36
- table = contingency_table(y_true, y_pred)
37
- sum_comb_a = table.sum(axis: 1).map { |v| comb_two(v) }.sum
38
- sum_comb_b = table.sum(axis: 0).map { |v| comb_two(v) }.sum
39
- sum_comb = table.flatten.map { |v| comb_two(v) }.sum
40
- prod_comb = (sum_comb_a * sum_comb_b).fdiv(comb_two(n_samples))
41
- mean_comb = (sum_comb_a + sum_comb_b).fdiv(2)
42
- (sum_comb - prod_comb).fdiv(mean_comb - prod_comb)
43
- end
44
-
45
- private
46
-
47
- def contingency_table(y_true, y_pred)
48
- class_ids = y_true.to_a.uniq
49
- cluster_ids = y_pred.to_a.uniq
50
- n_classes = class_ids.size
51
- n_clusters = cluster_ids.size
52
- table = Numo::Int32.zeros(n_classes, n_clusters)
53
- n_classes.times do |i|
54
- b_true = y_true.eq(class_ids[i])
55
- n_clusters.times do |j|
56
- b_pred = y_pred.eq(cluster_ids[j])
57
- table[i, j] = (b_true & b_pred).count
58
- end
59
- end
60
- table
61
- end
62
-
63
- def special_cases?(n_samples, n_classes, n_clusters)
64
- ((n_classes.zero? && n_clusters.zero?) ||
65
- (n_classes == 1 && n_clusters == 1) ||
66
- (n_classes == n_samples && n_clusters == n_samples))
67
- end
68
-
69
- def comb_two(k)
70
- k * (k - 1) / 2
71
- end
72
- end
73
- end
74
- end