rumale 0.23.3 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.txt +5 -1
  3. data/README.md +3 -288
  4. data/lib/rumale/version.rb +1 -1
  5. data/lib/rumale.rb +20 -131
  6. metadata +252 -150
  7. data/CHANGELOG.md +0 -643
  8. data/CODE_OF_CONDUCT.md +0 -74
  9. data/ext/rumale/extconf.rb +0 -37
  10. data/ext/rumale/rumaleext.c +0 -545
  11. data/ext/rumale/rumaleext.h +0 -12
  12. data/lib/rumale/base/base_estimator.rb +0 -49
  13. data/lib/rumale/base/classifier.rb +0 -36
  14. data/lib/rumale/base/cluster_analyzer.rb +0 -31
  15. data/lib/rumale/base/evaluator.rb +0 -17
  16. data/lib/rumale/base/regressor.rb +0 -36
  17. data/lib/rumale/base/splitter.rb +0 -21
  18. data/lib/rumale/base/transformer.rb +0 -22
  19. data/lib/rumale/clustering/dbscan.rb +0 -123
  20. data/lib/rumale/clustering/gaussian_mixture.rb +0 -218
  21. data/lib/rumale/clustering/hdbscan.rb +0 -291
  22. data/lib/rumale/clustering/k_means.rb +0 -122
  23. data/lib/rumale/clustering/k_medoids.rb +0 -141
  24. data/lib/rumale/clustering/mini_batch_k_means.rb +0 -139
  25. data/lib/rumale/clustering/power_iteration.rb +0 -127
  26. data/lib/rumale/clustering/single_linkage.rb +0 -203
  27. data/lib/rumale/clustering/snn.rb +0 -76
  28. data/lib/rumale/clustering/spectral_clustering.rb +0 -115
  29. data/lib/rumale/dataset.rb +0 -246
  30. data/lib/rumale/decomposition/factor_analysis.rb +0 -150
  31. data/lib/rumale/decomposition/fast_ica.rb +0 -188
  32. data/lib/rumale/decomposition/nmf.rb +0 -124
  33. data/lib/rumale/decomposition/pca.rb +0 -159
  34. data/lib/rumale/ensemble/ada_boost_classifier.rb +0 -179
  35. data/lib/rumale/ensemble/ada_boost_regressor.rb +0 -160
  36. data/lib/rumale/ensemble/extra_trees_classifier.rb +0 -139
  37. data/lib/rumale/ensemble/extra_trees_regressor.rb +0 -125
  38. data/lib/rumale/ensemble/gradient_boosting_classifier.rb +0 -306
  39. data/lib/rumale/ensemble/gradient_boosting_regressor.rb +0 -237
  40. data/lib/rumale/ensemble/random_forest_classifier.rb +0 -189
  41. data/lib/rumale/ensemble/random_forest_regressor.rb +0 -153
  42. data/lib/rumale/ensemble/stacking_classifier.rb +0 -215
  43. data/lib/rumale/ensemble/stacking_regressor.rb +0 -163
  44. data/lib/rumale/ensemble/voting_classifier.rb +0 -126
  45. data/lib/rumale/ensemble/voting_regressor.rb +0 -82
  46. data/lib/rumale/evaluation_measure/accuracy.rb +0 -29
  47. data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +0 -74
  48. data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +0 -56
  49. data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +0 -53
  50. data/lib/rumale/evaluation_measure/explained_variance_score.rb +0 -39
  51. data/lib/rumale/evaluation_measure/f_score.rb +0 -50
  52. data/lib/rumale/evaluation_measure/function.rb +0 -147
  53. data/lib/rumale/evaluation_measure/log_loss.rb +0 -45
  54. data/lib/rumale/evaluation_measure/mean_absolute_error.rb +0 -29
  55. data/lib/rumale/evaluation_measure/mean_squared_error.rb +0 -29
  56. data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +0 -29
  57. data/lib/rumale/evaluation_measure/median_absolute_error.rb +0 -30
  58. data/lib/rumale/evaluation_measure/mutual_information.rb +0 -49
  59. data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +0 -53
  60. data/lib/rumale/evaluation_measure/precision.rb +0 -50
  61. data/lib/rumale/evaluation_measure/precision_recall.rb +0 -96
  62. data/lib/rumale/evaluation_measure/purity.rb +0 -40
  63. data/lib/rumale/evaluation_measure/r2_score.rb +0 -43
  64. data/lib/rumale/evaluation_measure/recall.rb +0 -50
  65. data/lib/rumale/evaluation_measure/roc_auc.rb +0 -130
  66. data/lib/rumale/evaluation_measure/silhouette_score.rb +0 -82
  67. data/lib/rumale/feature_extraction/feature_hasher.rb +0 -110
  68. data/lib/rumale/feature_extraction/hash_vectorizer.rb +0 -155
  69. data/lib/rumale/feature_extraction/tfidf_transformer.rb +0 -113
  70. data/lib/rumale/kernel_approximation/nystroem.rb +0 -126
  71. data/lib/rumale/kernel_approximation/rbf.rb +0 -102
  72. data/lib/rumale/kernel_machine/kernel_fda.rb +0 -120
  73. data/lib/rumale/kernel_machine/kernel_pca.rb +0 -97
  74. data/lib/rumale/kernel_machine/kernel_ridge.rb +0 -82
  75. data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +0 -92
  76. data/lib/rumale/kernel_machine/kernel_svc.rb +0 -193
  77. data/lib/rumale/linear_model/base_sgd.rb +0 -285
  78. data/lib/rumale/linear_model/elastic_net.rb +0 -119
  79. data/lib/rumale/linear_model/lasso.rb +0 -115
  80. data/lib/rumale/linear_model/linear_regression.rb +0 -201
  81. data/lib/rumale/linear_model/logistic_regression.rb +0 -275
  82. data/lib/rumale/linear_model/nnls.rb +0 -137
  83. data/lib/rumale/linear_model/ridge.rb +0 -209
  84. data/lib/rumale/linear_model/svc.rb +0 -213
  85. data/lib/rumale/linear_model/svr.rb +0 -132
  86. data/lib/rumale/manifold/mds.rb +0 -155
  87. data/lib/rumale/manifold/tsne.rb +0 -222
  88. data/lib/rumale/metric_learning/fisher_discriminant_analysis.rb +0 -113
  89. data/lib/rumale/metric_learning/mlkr.rb +0 -161
  90. data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +0 -167
  91. data/lib/rumale/model_selection/cross_validation.rb +0 -125
  92. data/lib/rumale/model_selection/function.rb +0 -42
  93. data/lib/rumale/model_selection/grid_search_cv.rb +0 -225
  94. data/lib/rumale/model_selection/group_k_fold.rb +0 -93
  95. data/lib/rumale/model_selection/group_shuffle_split.rb +0 -115
  96. data/lib/rumale/model_selection/k_fold.rb +0 -81
  97. data/lib/rumale/model_selection/shuffle_split.rb +0 -90
  98. data/lib/rumale/model_selection/stratified_k_fold.rb +0 -99
  99. data/lib/rumale/model_selection/stratified_shuffle_split.rb +0 -118
  100. data/lib/rumale/model_selection/time_series_split.rb +0 -91
  101. data/lib/rumale/multiclass/one_vs_rest_classifier.rb +0 -83
  102. data/lib/rumale/naive_bayes/base_naive_bayes.rb +0 -47
  103. data/lib/rumale/naive_bayes/bernoulli_nb.rb +0 -82
  104. data/lib/rumale/naive_bayes/complement_nb.rb +0 -85
  105. data/lib/rumale/naive_bayes/gaussian_nb.rb +0 -69
  106. data/lib/rumale/naive_bayes/multinomial_nb.rb +0 -74
  107. data/lib/rumale/naive_bayes/negation_nb.rb +0 -71
  108. data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +0 -133
  109. data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +0 -108
  110. data/lib/rumale/nearest_neighbors/vp_tree.rb +0 -132
  111. data/lib/rumale/neural_network/adam.rb +0 -56
  112. data/lib/rumale/neural_network/base_mlp.rb +0 -248
  113. data/lib/rumale/neural_network/mlp_classifier.rb +0 -120
  114. data/lib/rumale/neural_network/mlp_regressor.rb +0 -90
  115. data/lib/rumale/pairwise_metric.rb +0 -152
  116. data/lib/rumale/pipeline/feature_union.rb +0 -69
  117. data/lib/rumale/pipeline/pipeline.rb +0 -175
  118. data/lib/rumale/preprocessing/bin_discretizer.rb +0 -93
  119. data/lib/rumale/preprocessing/binarizer.rb +0 -60
  120. data/lib/rumale/preprocessing/kernel_calculator.rb +0 -92
  121. data/lib/rumale/preprocessing/l1_normalizer.rb +0 -62
  122. data/lib/rumale/preprocessing/l2_normalizer.rb +0 -63
  123. data/lib/rumale/preprocessing/label_binarizer.rb +0 -89
  124. data/lib/rumale/preprocessing/label_encoder.rb +0 -79
  125. data/lib/rumale/preprocessing/max_abs_scaler.rb +0 -61
  126. data/lib/rumale/preprocessing/max_normalizer.rb +0 -62
  127. data/lib/rumale/preprocessing/min_max_scaler.rb +0 -76
  128. data/lib/rumale/preprocessing/one_hot_encoder.rb +0 -100
  129. data/lib/rumale/preprocessing/ordinal_encoder.rb +0 -109
  130. data/lib/rumale/preprocessing/polynomial_features.rb +0 -109
  131. data/lib/rumale/preprocessing/standard_scaler.rb +0 -71
  132. data/lib/rumale/probabilistic_output.rb +0 -114
  133. data/lib/rumale/tree/base_decision_tree.rb +0 -150
  134. data/lib/rumale/tree/decision_tree_classifier.rb +0 -150
  135. data/lib/rumale/tree/decision_tree_regressor.rb +0 -116
  136. data/lib/rumale/tree/extra_tree_classifier.rb +0 -107
  137. data/lib/rumale/tree/extra_tree_regressor.rb +0 -94
  138. data/lib/rumale/tree/gradient_tree_regressor.rb +0 -202
  139. data/lib/rumale/tree/node.rb +0 -39
  140. data/lib/rumale/utils.rb +0 -42
  141. data/lib/rumale/validation.rb +0 -128
  142. data/lib/rumale/values.rb +0 -13
@@ -1,215 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/classifier'
5
- require 'rumale/preprocessing/label_encoder'
6
-
7
- module Rumale
8
- module Ensemble
9
- # StackingClassifier is a class that implements classifier with stacking method.
10
- #
11
- # @example
12
- # estimators = {
13
- # lgr: Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-2, random_seed: 1),
14
- # mlp: Rumale::NeuralNetwork::MLPClassifier.new(hidden_units: [256], random_seed: 1),
15
- # rnd: Rumale::Ensemble::RandomForestClassifier.new(random_seed: 1)
16
- # }
17
- # meta_estimator = Rumale::LinearModel::LogisticRegression.new(random_seed: 1)
18
- # classifier = Rumale::Ensemble::StackedClassifier.new(
19
- # estimators: estimators, meta_estimator: meta_estimator, random_seed: 1
20
- # )
21
- # classifier.fit(training_samples, training_labels)
22
- # results = classifier.predict(testing_samples)
23
- #
24
- # *Reference*
25
- # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
26
- class StackingClassifier
27
- include Base::BaseEstimator
28
- include Base::Classifier
29
-
30
- # Return the base classifiers.
31
- # @return [Hash<Symbol,Classifier>]
32
- attr_reader :estimators
33
-
34
- # Return the meta classifier.
35
- # @return [Classifier]
36
- attr_reader :meta_estimator
37
-
38
- # Return the class labels.
39
- # @return [Numo::Int32] (size: n_classes)
40
- attr_reader :classes
41
-
42
- # Return the method used by each base classifier.
43
- # @return [Hash<Symbol,Symbol>]
44
- attr_reader :stack_method
45
-
46
- # Create a new classifier with stacking method.
47
- #
48
- # @param estimators [Hash<Symbol,Classifier>] The base classifiers for extracting meta features.
49
- # @param meta_estimator [Classifier/Nil] The meta classifier that predicts class label.
50
- # If nil is given, LogisticRegression is used.
51
- # @param n_splits [Integer] The number of folds for cross validation with stratified k-fold on meta feature extraction in training phase.
52
- # @param shuffle [Boolean] The flag indicating whether to shuffle the dataset on cross validation.
53
- # @param stack_method [String] The method name of base classifier for using meta feature extraction.
54
- # If 'auto' is given, it searches the callable method in the order 'predict_proba', 'decision_function', and 'predict'
55
- # on each classifier.
56
- # @param passthrough [Boolean] The flag indicating whether to concatenate the original features and meta features when training the meta classifier.
57
- # @param random_seed [Integer/Nil] The seed value using to initialize the random generator on cross validation.
58
- def initialize(estimators:, meta_estimator: nil, n_splits: 5, shuffle: true, stack_method: 'auto', passthrough: false, random_seed: nil)
59
- check_params_type(Hash, estimators: estimators)
60
- check_params_numeric(n_splits: n_splits)
61
- check_params_string(stack_method: stack_method)
62
- check_params_boolean(shuffle: shuffle, passthrough: passthrough)
63
- check_params_numeric_or_nil(random_seed: random_seed)
64
- @estimators = estimators
65
- @meta_estimator = meta_estimator || Rumale::LinearModel::LogisticRegression.new
66
- @classes = nil
67
- @stack_method = nil
68
- @output_size = nil
69
- @params = {}
70
- @params[:n_splits] = n_splits
71
- @params[:shuffle] = shuffle
72
- @params[:stack_method] = stack_method
73
- @params[:passthrough] = passthrough
74
- @params[:random_seed] = random_seed || srand
75
- end
76
-
77
- # Fit the model with given training data.
78
- #
79
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
80
- # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
81
- # @return [StackedClassifier] The learned classifier itself.
82
- def fit(x, y)
83
- x = check_convert_sample_array(x)
84
- y = check_convert_label_array(y)
85
- check_sample_label_size(x, y)
86
-
87
- n_samples, n_features = x.shape
88
-
89
- @encoder = Rumale::Preprocessing::LabelEncoder.new
90
- y_encoded = @encoder.fit_transform(y)
91
- @classes = Numo::NArray[*@encoder.classes]
92
-
93
- # training base classifiers with all training data.
94
- @estimators.each_key { |name| @estimators[name].fit(x, y_encoded) }
95
-
96
- # detecting feature extraction method and its size of output for each base classifier.
97
- @stack_method = detect_stack_method
98
- @output_size = detect_output_size(n_features)
99
-
100
- # extracting meta features with base classifiers.
101
- n_components = @output_size.values.inject(:+)
102
- z = Numo::DFloat.zeros(n_samples, n_components)
103
-
104
- kf = Rumale::ModelSelection::StratifiedKFold.new(
105
- n_splits: @params[:n_splits], shuffle: @params[:shuffle], random_seed: @params[:random_seed]
106
- )
107
-
108
- kf.split(x, y_encoded).each do |train_ids, valid_ids|
109
- x_train = x[train_ids, true]
110
- y_train = y_encoded[train_ids]
111
- x_valid = x[valid_ids, true]
112
- f_start = 0
113
- @estimators.each_key do |name|
114
- est_fold = Marshal.load(Marshal.dump(@estimators[name]))
115
- f_last = f_start + @output_size[name]
116
- f_position = @output_size[name] == 1 ? f_start : f_start...f_last
117
- z[valid_ids, f_position] = est_fold.fit(x_train, y_train).public_send(@stack_method[name], x_valid)
118
- f_start = f_last
119
- end
120
- end
121
-
122
- # concatenating original features.
123
- z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
124
-
125
- # training meta classifier.
126
- @meta_estimator.fit(z, y_encoded)
127
-
128
- self
129
- end
130
-
131
- # Calculate confidence scores for samples.
132
- #
133
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
134
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
135
- def decision_function(x)
136
- x = check_convert_sample_array(x)
137
- z = transform(x)
138
- @meta_estimator.decision_function(z)
139
- end
140
-
141
- # Predict class labels for samples.
142
- #
143
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
144
- # @return [Numo::Int32] (shape: [n_samples]) The predicted class label per sample.
145
- def predict(x)
146
- x = check_convert_sample_array(x)
147
- z = transform(x)
148
- Numo::Int32.cast(@encoder.inverse_transform(@meta_estimator.predict(z)))
149
- end
150
-
151
- # Predict probability for samples.
152
- #
153
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probabilities.
154
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The predicted probability of each class per sample.
155
- def predict_proba(x)
156
- x = check_convert_sample_array(x)
157
- z = transform(x)
158
- @meta_estimator.predict_proba(z)
159
- end
160
-
161
- # Transform the given data with the learned model.
162
- #
163
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed with the learned model.
164
- # @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for samples.
165
- def transform(x)
166
- x = check_convert_sample_array(x)
167
- n_samples = x.shape[0]
168
- n_components = @output_size.values.inject(:+)
169
- z = Numo::DFloat.zeros(n_samples, n_components)
170
- f_start = 0
171
- @estimators.each_key do |name|
172
- f_last = f_start + @output_size[name]
173
- f_position = @output_size[name] == 1 ? f_start : f_start...f_last
174
- z[true, f_position] = @estimators[name].public_send(@stack_method[name], x)
175
- f_start = f_last
176
- end
177
- z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
178
- z
179
- end
180
-
181
- # Fit the model with training data, and then transform them with the learned model.
182
- #
183
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
184
- # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
185
- # @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for training data.
186
- def fit_transform(x, y)
187
- x = check_convert_sample_array(x)
188
- y = check_convert_label_array(y)
189
- fit(x, y).transform(x)
190
- end
191
-
192
- private
193
-
194
- STACK_METHODS = %i[predict_proba decision_function predict].freeze
195
-
196
- private_constant :STACK_METHODS
197
-
198
- def detect_stack_method
199
- if @params[:stack_method] == 'auto'
200
- @estimators.each_key.with_object({}) { |name, obj| obj[name] = STACK_METHODS.detect { |m| @estimators[name].respond_to?(m) } }
201
- else
202
- @estimators.each_key.with_object({}) { |name, obj| obj[name] = @params[:stack_method].to_sym }
203
- end
204
- end
205
-
206
- def detect_output_size(n_features)
207
- x_dummy = Numo::DFloat.new(2, n_features).rand
208
- @estimators.each_key.with_object({}) do |name, obj|
209
- output_dummy = @estimators[name].public_send(@stack_method[name], x_dummy)
210
- obj[name] = output_dummy.ndim == 1 ? 1 : output_dummy.shape[1]
211
- end
212
- end
213
- end
214
- end
215
- end
@@ -1,163 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/regressor'
5
-
6
- module Rumale
7
- module Ensemble
8
- # StackingRegressor is a class that implements regressor with stacking method.
9
- #
10
- # @example
11
- # estimators = {
12
- # las: Rumale::LinearModel::Lasso.new(reg_param: 1e-2, random_seed: 1),
13
- # mlp: Rumale::NeuralNetwork::MLPRegressor.new(hidden_units: [256], random_seed: 1),
14
- # rnd: Rumale::Ensemble::RandomForestRegressor.new(random_seed: 1)
15
- # }
16
- # meta_estimator = Rumale::LinearModel::Ridge.new(random_seed: 1)
17
- # regressor = Rumale::Ensemble::StackedRegressor.new(
18
- # estimators: estimators, meta_estimator: meta_estimator, random_seed: 1
19
- # )
20
- # regressor.fit(training_samples, training_values)
21
- # results = regressor.predict(testing_samples)
22
- #
23
- # *Reference*
24
- # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
25
- class StackingRegressor
26
- include Base::BaseEstimator
27
- include Base::Regressor
28
-
29
- # Return the base regressors.
30
- # @return [Hash<Symbol,Regressor>]
31
- attr_reader :estimators
32
-
33
- # Return the meta regressor.
34
- # @return [Regressor]
35
- attr_reader :meta_estimator
36
-
37
- # Create a new regressor with stacking method.
38
- #
39
- # @param estimators [Hash<Symbol,Regressor>] The base regressors for extracting meta features.
40
- # @param meta_estimator [Regressor/Nil] The meta regressor that predicts values.
41
- # If nil is given, Ridge is used.
42
- # @param n_splits [Integer] The number of folds for cross validation with k-fold on meta feature extraction in training phase.
43
- # @param shuffle [Boolean] The flag indicating whether to shuffle the dataset on cross validation.
44
- # @param passthrough [Boolean] The flag indicating whether to concatenate the original features and meta features when training the meta regressor.
45
- # @param random_seed [Integer/Nil] The seed value using to initialize the random generator on cross validation.
46
- def initialize(estimators:, meta_estimator: nil, n_splits: 5, shuffle: true, passthrough: false, random_seed: nil)
47
- check_params_type(Hash, estimators: estimators)
48
- check_params_numeric(n_splits: n_splits)
49
- check_params_boolean(shuffle: shuffle, passthrough: passthrough)
50
- check_params_numeric_or_nil(random_seed: random_seed)
51
- @estimators = estimators
52
- @meta_estimator = meta_estimator || Rumale::LinearModel::Ridge.new
53
- @output_size = nil
54
- @params = {}
55
- @params[:n_splits] = n_splits
56
- @params[:shuffle] = shuffle
57
- @params[:passthrough] = passthrough
58
- @params[:random_seed] = random_seed || srand
59
- end
60
-
61
- # Fit the model with given training data.
62
- #
63
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
64
- # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target variables to be used for fitting the model.
65
- # @return [StackedRegressor] The learned regressor itself.
66
- def fit(x, y)
67
- x = check_convert_sample_array(x)
68
- y = check_convert_tvalue_array(y)
69
- check_sample_tvalue_size(x, y)
70
-
71
- n_samples, n_features = x.shape
72
- n_outputs = y.ndim == 1 ? 1 : y.shape[1]
73
-
74
- # training base regressors with all training data.
75
- @estimators.each_key { |name| @estimators[name].fit(x, y) }
76
-
77
- # detecting size of output for each base regressor.
78
- @output_size = detect_output_size(n_features)
79
-
80
- # extracting meta features with base regressors.
81
- n_components = @output_size.values.inject(:+)
82
- z = Numo::DFloat.zeros(n_samples, n_components)
83
-
84
- kf = Rumale::ModelSelection::KFold.new(
85
- n_splits: @params[:n_splits], shuffle: @params[:shuffle], random_seed: @params[:random_seed]
86
- )
87
-
88
- kf.split(x, y).each do |train_ids, valid_ids|
89
- x_train = x[train_ids, true]
90
- y_train = n_outputs == 1 ? y[train_ids] : y[train_ids, true]
91
- x_valid = x[valid_ids, true]
92
- f_start = 0
93
- @estimators.each_key do |name|
94
- est_fold = Marshal.load(Marshal.dump(@estimators[name]))
95
- f_last = f_start + @output_size[name]
96
- f_position = @output_size[name] == 1 ? f_start : f_start...f_last
97
- z[valid_ids, f_position] = est_fold.fit(x_train, y_train).predict(x_valid)
98
- f_start = f_last
99
- end
100
- end
101
-
102
- # concatenating original features.
103
- z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
104
-
105
- # training meta regressor.
106
- @meta_estimator.fit(z, y)
107
-
108
- self
109
- end
110
-
111
- # Predict values for samples.
112
- #
113
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
114
- # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) The predicted values per sample.
115
- def predict(x)
116
- x = check_convert_sample_array(x)
117
- z = transform(x)
118
- @meta_estimator.predict(z)
119
- end
120
-
121
- # Transform the given data with the learned model.
122
- #
123
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed with the learned model.
124
- # @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for samples.
125
- def transform(x)
126
- x = check_convert_sample_array(x)
127
- n_samples = x.shape[0]
128
- n_components = @output_size.values.inject(:+)
129
- z = Numo::DFloat.zeros(n_samples, n_components)
130
- f_start = 0
131
- @estimators.each_key do |name|
132
- f_last = f_start + @output_size[name]
133
- f_position = @output_size[name] == 1 ? f_start : f_start...f_last
134
- z[true, f_position] = @estimators[name].predict(x)
135
- f_start = f_last
136
- end
137
- z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
138
- z
139
- end
140
-
141
- # Fit the model with training data, and then transform them with the learned model.
142
- #
143
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
144
- # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target variables to be used for fitting the model.
145
- # @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for training data.
146
- def fit_transform(x, y)
147
- x = check_convert_sample_array(x)
148
- y = check_convert_tvalue_array(y)
149
- fit(x, y).transform(x)
150
- end
151
-
152
- private
153
-
154
- def detect_output_size(n_features)
155
- x_dummy = Numo::DFloat.new(2, n_features).rand
156
- @estimators.each_key.with_object({}) do |name, obj|
157
- output_dummy = @estimators[name].predict(x_dummy)
158
- obj[name] = output_dummy.ndim == 1 ? 1 : output_dummy.shape[1]
159
- end
160
- end
161
- end
162
- end
163
- end
@@ -1,126 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/classifier'
5
- require 'rumale/preprocessing/label_encoder'
6
-
7
- module Rumale
8
- module Ensemble
9
- # VotingClassifier is a class that implements classifier with voting ensemble method.
10
- #
11
- # @example
12
- # estimators = {
13
- # lgr: Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-2, random_seed: 1),
14
- # mlp: Rumale::NeuralNetwork::MLPClassifier.new(hidden_units: [256], random_seed: 1),
15
- # rnd: Rumale::Ensemble::RandomForestClassifier.new(random_seed: 1)
16
- # }
17
- # weights = { lgr: 0.2, mlp: 0.3, rnd: 0.5 }
18
- #
19
- # classifier = Rumale::Ensemble::VotingClassifier.new(estimators: estimators, weights: weights, voting: 'soft')
20
- # classifier.fit(x_train, y_train)
21
- # results = classifier.predict(x_test)
22
- #
23
- # *Reference*
24
- # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
25
- class VotingClassifier
26
- include Base::BaseEstimator
27
- include Base::Classifier
28
-
29
- # Return the sub-classifiers that voted.
30
- # @return [Hash<Symbol,Classifier>]
31
- attr_reader :estimators
32
-
33
- # Return the class labels.
34
- # @return [Numo::Int32] (size: n_classes)
35
- attr_reader :classes
36
-
37
- # Create a new ensembled classifier with voting rule.
38
- #
39
- # @param estimators [Hash<Symbol,Classifier>] The sub-classifiers to vote.
40
- # @param weights [Hash<Symbol,Float>] The weight value for each classifier.
41
- # @param voting [String] The voting rule for the predicted results of each classifier.
42
- # If 'hard' is given, the ensembled classifier predicts the class label by majority vote.
43
- # If 'soft' is given, the ensembled classifier uses the weighted average of predicted probabilities for the prediction.
44
- def initialize(estimators:, weights: nil, voting: 'hard')
45
- check_params_type(Hash, estimators: estimators)
46
- check_params_type_or_nil(Hash, weights: weights)
47
- check_params_string(voting: voting)
48
- @estimators = estimators
49
- @classes = nil
50
- @params = {}
51
- @params[:weights] = weights || estimators.each_key.with_object({}) { |name, w| w[name] = 1.0 }
52
- @params[:voting] = voting
53
- end
54
-
55
- # Fit the model with given training data.
56
- #
57
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
58
- # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
59
- # @return [VotingClassifier] The learned classifier itself.
60
- def fit(x, y)
61
- x = check_convert_sample_array(x)
62
- y = check_convert_label_array(y)
63
- check_sample_label_size(x, y)
64
-
65
- @encoder = Rumale::Preprocessing::LabelEncoder.new
66
- y_encoded = @encoder.fit_transform(y)
67
- @classes = Numo::NArray[*@encoder.classes]
68
- @estimators.each_key { |name| @estimators[name].fit(x, y_encoded) }
69
-
70
- self
71
- end
72
-
73
- # Calculate confidence scores for samples.
74
- #
75
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
76
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
77
- def decision_function(x)
78
- x = check_convert_sample_array(x)
79
- return predict_proba(x) if soft_voting?
80
-
81
- n_samples = x.shape[0]
82
- n_classes = @classes.size
83
- z = Numo::DFloat.zeros(n_samples, n_classes)
84
- @estimators.each do |name, estimator|
85
- estimator.predict(x).to_a.each_with_index { |c, i| z[i, c] += @params[:weights][name] }
86
- end
87
- z
88
- end
89
-
90
- # Predict class labels for samples.
91
- #
92
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
93
- # @return [Numo::Int32] (shape: [n_samples]) The predicted class label per sample.
94
- def predict(x)
95
- x = check_convert_sample_array(x)
96
- n_samples = x.shape[0]
97
- n_classes = @classes.size
98
- z = decision_function(x)
99
- predicted = z.max_index(axis: 1) - Numo::Int32.new(n_samples).seq * n_classes
100
- Numo::Int32.cast(@encoder.inverse_transform(predicted))
101
- end
102
-
103
- # Predict probability for samples.
104
- #
105
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probabilities.
106
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
107
- def predict_proba(x)
108
- x = check_convert_sample_array(x)
109
- n_samples = x.shape[0]
110
- n_classes = @classes.size
111
- z = Numo::DFloat.zeros(n_samples, n_classes)
112
- sum_weight = @params[:weights].each_value.inject(&:+)
113
- @estimators.each do |name, estimator|
114
- z += @params[:weights][name] * estimator.predict_proba(x)
115
- end
116
- z /= sum_weight
117
- end
118
-
119
- private
120
-
121
- def soft_voting?
122
- @params[:voting] == 'soft'
123
- end
124
- end
125
- end
126
- end
@@ -1,82 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/regressor'
5
-
6
- module Rumale
7
- module Ensemble
8
- # VotingRegressor is a class that implements regressor with voting ensemble method.
9
- #
10
- # @example
11
- # estimators = {
12
- # rdg: Rumale::LinearModel::Ridge.new(reg_param: 1e-2, random_seed: 1),
13
- # mlp: Rumale::NeuralNetwork::MLPRegressor.new(hidden_units: [256], random_seed: 1),
14
- # rnd: Rumale::Ensemble::RandomForestRegressor.new(random_seed: 1)
15
- # }
16
- # weights = { rdg: 0.2, mlp: 0.3, rnd: 0.5 }
17
- #
18
- # regressor = Rumale::Ensemble::VotingRegressor.new(estimators: estimators, weights: weights, voting: 'soft')
19
- # regressor.fit(x_train, y_train)
20
- # results = regressor.predict(x_test)
21
- #
22
- # *Reference*
23
- # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
24
- class VotingRegressor
25
- include Base::BaseEstimator
26
- include Base::Regressor
27
-
28
- # Return the sub-regressors that voted.
29
- # @return [Hash<Symbol,Regressor>]
30
- attr_reader :estimators
31
-
32
- # Create a new ensembled regressor with voting rule.
33
- #
34
- # @param estimators [Hash<Symbol,Regressor>] The sub-regressors to vote.
35
- # @param weights [Hash<Symbol,Float>] The weight value for each regressor.
36
- def initialize(estimators:, weights: nil)
37
- check_params_type(Hash, estimators: estimators)
38
- check_params_type_or_nil(Hash, weights: weights)
39
- @estimators = estimators
40
- @n_outputs = nil
41
- @params = {}
42
- @params[:weights] = weights || estimators.each_key.with_object({}) { |name, w| w[name] = 1.0 }
43
- end
44
-
45
- # Fit the model with given training data.
46
- #
47
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
48
- # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
49
- # @return [VotingRegressor] The learned regressor itself.
50
- def fit(x, y)
51
- x = check_convert_sample_array(x)
52
- y = check_convert_tvalue_array(y)
53
- check_sample_tvalue_size(x, y)
54
-
55
- @n_outputs = y.ndim > 1 ? y.shape[1] : 1
56
- @estimators.each_key { |name| @estimators[name].fit(x, y) }
57
-
58
- self
59
- end
60
-
61
- # Predict values for samples.
62
- #
63
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
64
- # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted value per sample.
65
- def predict(x)
66
- x = check_convert_sample_array(x)
67
- z = single_target? ? Numo::DFloat.zeros(x.shape[0]) : Numo::DFloat.zeros(x.shape[0], @n_outputs)
68
- sum_weight = @params[:weights].each_value.inject(&:+)
69
- @estimators.each do |name, estimator|
70
- z += @params[:weights][name] * estimator.predict(x)
71
- end
72
- z / sum_weight
73
- end
74
-
75
- private
76
-
77
- def single_target?
78
- @n_outputs == 1
79
- end
80
- end
81
- end
82
- end
@@ -1,29 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/evaluator'
4
-
5
- module Rumale
6
- # This module consists of the classes for model evaluation.
7
- module EvaluationMeasure
8
- # Accuracy is a class that calculates the accuracy of classifier from the predicted labels.
9
- #
10
- # @example
11
- # evaluator = Rumale::EvaluationMeasure::Accuracy.new
12
- # puts evaluator.score(ground_truth, predicted)
13
- class Accuracy
14
- include Base::Evaluator
15
-
16
- # Calculate mean accuracy.
17
- #
18
- # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
19
- # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
20
- # @return [Float] Mean accuracy
21
- def score(y_true, y_pred)
22
- y_true = check_convert_label_array(y_true)
23
- y_pred = check_convert_label_array(y_pred)
24
-
25
- (y_true.to_a.map.with_index { |label, n| label == y_pred[n] ? 1 : 0 }).inject(:+) / y_true.size.to_f
26
- end
27
- end
28
- end
29
- end
@@ -1,74 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/evaluator'
4
-
5
- module Rumale
6
- module EvaluationMeasure
7
- # AdjustedRandScore is a class that calculates the adjusted rand index.
8
- #
9
- # @example
10
- # evaluator = Rumale::EvaluationMeasure::AdjustedRandScore.new
11
- # puts evaluator.score(ground_truth, predicted)
12
- #
13
- # *Reference*
14
- # - Vinh, N X., Epps, J., and Bailey, J., "Information Theoretic Measures for Clusterings Comparison: Variants, Properties, Normalization and Correction for Chance", J. Machine Learnig Research, Vol. 11, pp.2837--2854, 2010.
15
- class AdjustedRandScore
16
- include Base::Evaluator
17
-
18
- # Calculate adjusted rand index.
19
- #
20
- # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
21
- # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted cluster labels.
22
- # @return [Float] Adjusted rand index.
23
- def score(y_true, y_pred)
24
- y_true = check_convert_label_array(y_true)
25
- y_pred = check_convert_label_array(y_pred)
26
-
27
- # initiazlie some variables.
28
- n_samples = y_pred.size
29
- n_classes = y_true.to_a.uniq.size
30
- n_clusters = y_pred.to_a.uniq.size
31
-
32
- # check special cases.
33
- return 1.0 if special_cases?(n_samples, n_classes, n_clusters)
34
-
35
- # calculate adjusted rand index.
36
- table = contingency_table(y_true, y_pred)
37
- sum_comb_a = table.sum(axis: 1).map { |v| comb_two(v) }.sum
38
- sum_comb_b = table.sum(axis: 0).map { |v| comb_two(v) }.sum
39
- sum_comb = table.flatten.map { |v| comb_two(v) }.sum
40
- prod_comb = (sum_comb_a * sum_comb_b).fdiv(comb_two(n_samples))
41
- mean_comb = (sum_comb_a + sum_comb_b).fdiv(2)
42
- (sum_comb - prod_comb).fdiv(mean_comb - prod_comb)
43
- end
44
-
45
- private
46
-
47
- def contingency_table(y_true, y_pred)
48
- class_ids = y_true.to_a.uniq
49
- cluster_ids = y_pred.to_a.uniq
50
- n_classes = class_ids.size
51
- n_clusters = cluster_ids.size
52
- table = Numo::Int32.zeros(n_classes, n_clusters)
53
- n_classes.times do |i|
54
- b_true = y_true.eq(class_ids[i])
55
- n_clusters.times do |j|
56
- b_pred = y_pred.eq(cluster_ids[j])
57
- table[i, j] = (b_true & b_pred).count
58
- end
59
- end
60
- table
61
- end
62
-
63
- def special_cases?(n_samples, n_classes, n_clusters)
64
- ((n_classes.zero? && n_clusters.zero?) ||
65
- (n_classes == 1 && n_clusters == 1) ||
66
- (n_classes == n_samples && n_clusters == n_samples))
67
- end
68
-
69
- def comb_two(k)
70
- k * (k - 1) / 2
71
- end
72
- end
73
- end
74
- end