rumale 0.23.3 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.txt +5 -1
  3. data/README.md +3 -288
  4. data/lib/rumale/version.rb +1 -1
  5. data/lib/rumale.rb +20 -131
  6. metadata +252 -150
  7. data/CHANGELOG.md +0 -643
  8. data/CODE_OF_CONDUCT.md +0 -74
  9. data/ext/rumale/extconf.rb +0 -37
  10. data/ext/rumale/rumaleext.c +0 -545
  11. data/ext/rumale/rumaleext.h +0 -12
  12. data/lib/rumale/base/base_estimator.rb +0 -49
  13. data/lib/rumale/base/classifier.rb +0 -36
  14. data/lib/rumale/base/cluster_analyzer.rb +0 -31
  15. data/lib/rumale/base/evaluator.rb +0 -17
  16. data/lib/rumale/base/regressor.rb +0 -36
  17. data/lib/rumale/base/splitter.rb +0 -21
  18. data/lib/rumale/base/transformer.rb +0 -22
  19. data/lib/rumale/clustering/dbscan.rb +0 -123
  20. data/lib/rumale/clustering/gaussian_mixture.rb +0 -218
  21. data/lib/rumale/clustering/hdbscan.rb +0 -291
  22. data/lib/rumale/clustering/k_means.rb +0 -122
  23. data/lib/rumale/clustering/k_medoids.rb +0 -141
  24. data/lib/rumale/clustering/mini_batch_k_means.rb +0 -139
  25. data/lib/rumale/clustering/power_iteration.rb +0 -127
  26. data/lib/rumale/clustering/single_linkage.rb +0 -203
  27. data/lib/rumale/clustering/snn.rb +0 -76
  28. data/lib/rumale/clustering/spectral_clustering.rb +0 -115
  29. data/lib/rumale/dataset.rb +0 -246
  30. data/lib/rumale/decomposition/factor_analysis.rb +0 -150
  31. data/lib/rumale/decomposition/fast_ica.rb +0 -188
  32. data/lib/rumale/decomposition/nmf.rb +0 -124
  33. data/lib/rumale/decomposition/pca.rb +0 -159
  34. data/lib/rumale/ensemble/ada_boost_classifier.rb +0 -179
  35. data/lib/rumale/ensemble/ada_boost_regressor.rb +0 -160
  36. data/lib/rumale/ensemble/extra_trees_classifier.rb +0 -139
  37. data/lib/rumale/ensemble/extra_trees_regressor.rb +0 -125
  38. data/lib/rumale/ensemble/gradient_boosting_classifier.rb +0 -306
  39. data/lib/rumale/ensemble/gradient_boosting_regressor.rb +0 -237
  40. data/lib/rumale/ensemble/random_forest_classifier.rb +0 -189
  41. data/lib/rumale/ensemble/random_forest_regressor.rb +0 -153
  42. data/lib/rumale/ensemble/stacking_classifier.rb +0 -215
  43. data/lib/rumale/ensemble/stacking_regressor.rb +0 -163
  44. data/lib/rumale/ensemble/voting_classifier.rb +0 -126
  45. data/lib/rumale/ensemble/voting_regressor.rb +0 -82
  46. data/lib/rumale/evaluation_measure/accuracy.rb +0 -29
  47. data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +0 -74
  48. data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +0 -56
  49. data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +0 -53
  50. data/lib/rumale/evaluation_measure/explained_variance_score.rb +0 -39
  51. data/lib/rumale/evaluation_measure/f_score.rb +0 -50
  52. data/lib/rumale/evaluation_measure/function.rb +0 -147
  53. data/lib/rumale/evaluation_measure/log_loss.rb +0 -45
  54. data/lib/rumale/evaluation_measure/mean_absolute_error.rb +0 -29
  55. data/lib/rumale/evaluation_measure/mean_squared_error.rb +0 -29
  56. data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +0 -29
  57. data/lib/rumale/evaluation_measure/median_absolute_error.rb +0 -30
  58. data/lib/rumale/evaluation_measure/mutual_information.rb +0 -49
  59. data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +0 -53
  60. data/lib/rumale/evaluation_measure/precision.rb +0 -50
  61. data/lib/rumale/evaluation_measure/precision_recall.rb +0 -96
  62. data/lib/rumale/evaluation_measure/purity.rb +0 -40
  63. data/lib/rumale/evaluation_measure/r2_score.rb +0 -43
  64. data/lib/rumale/evaluation_measure/recall.rb +0 -50
  65. data/lib/rumale/evaluation_measure/roc_auc.rb +0 -130
  66. data/lib/rumale/evaluation_measure/silhouette_score.rb +0 -82
  67. data/lib/rumale/feature_extraction/feature_hasher.rb +0 -110
  68. data/lib/rumale/feature_extraction/hash_vectorizer.rb +0 -155
  69. data/lib/rumale/feature_extraction/tfidf_transformer.rb +0 -113
  70. data/lib/rumale/kernel_approximation/nystroem.rb +0 -126
  71. data/lib/rumale/kernel_approximation/rbf.rb +0 -102
  72. data/lib/rumale/kernel_machine/kernel_fda.rb +0 -120
  73. data/lib/rumale/kernel_machine/kernel_pca.rb +0 -97
  74. data/lib/rumale/kernel_machine/kernel_ridge.rb +0 -82
  75. data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +0 -92
  76. data/lib/rumale/kernel_machine/kernel_svc.rb +0 -193
  77. data/lib/rumale/linear_model/base_sgd.rb +0 -285
  78. data/lib/rumale/linear_model/elastic_net.rb +0 -119
  79. data/lib/rumale/linear_model/lasso.rb +0 -115
  80. data/lib/rumale/linear_model/linear_regression.rb +0 -201
  81. data/lib/rumale/linear_model/logistic_regression.rb +0 -275
  82. data/lib/rumale/linear_model/nnls.rb +0 -137
  83. data/lib/rumale/linear_model/ridge.rb +0 -209
  84. data/lib/rumale/linear_model/svc.rb +0 -213
  85. data/lib/rumale/linear_model/svr.rb +0 -132
  86. data/lib/rumale/manifold/mds.rb +0 -155
  87. data/lib/rumale/manifold/tsne.rb +0 -222
  88. data/lib/rumale/metric_learning/fisher_discriminant_analysis.rb +0 -113
  89. data/lib/rumale/metric_learning/mlkr.rb +0 -161
  90. data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +0 -167
  91. data/lib/rumale/model_selection/cross_validation.rb +0 -125
  92. data/lib/rumale/model_selection/function.rb +0 -42
  93. data/lib/rumale/model_selection/grid_search_cv.rb +0 -225
  94. data/lib/rumale/model_selection/group_k_fold.rb +0 -93
  95. data/lib/rumale/model_selection/group_shuffle_split.rb +0 -115
  96. data/lib/rumale/model_selection/k_fold.rb +0 -81
  97. data/lib/rumale/model_selection/shuffle_split.rb +0 -90
  98. data/lib/rumale/model_selection/stratified_k_fold.rb +0 -99
  99. data/lib/rumale/model_selection/stratified_shuffle_split.rb +0 -118
  100. data/lib/rumale/model_selection/time_series_split.rb +0 -91
  101. data/lib/rumale/multiclass/one_vs_rest_classifier.rb +0 -83
  102. data/lib/rumale/naive_bayes/base_naive_bayes.rb +0 -47
  103. data/lib/rumale/naive_bayes/bernoulli_nb.rb +0 -82
  104. data/lib/rumale/naive_bayes/complement_nb.rb +0 -85
  105. data/lib/rumale/naive_bayes/gaussian_nb.rb +0 -69
  106. data/lib/rumale/naive_bayes/multinomial_nb.rb +0 -74
  107. data/lib/rumale/naive_bayes/negation_nb.rb +0 -71
  108. data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +0 -133
  109. data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +0 -108
  110. data/lib/rumale/nearest_neighbors/vp_tree.rb +0 -132
  111. data/lib/rumale/neural_network/adam.rb +0 -56
  112. data/lib/rumale/neural_network/base_mlp.rb +0 -248
  113. data/lib/rumale/neural_network/mlp_classifier.rb +0 -120
  114. data/lib/rumale/neural_network/mlp_regressor.rb +0 -90
  115. data/lib/rumale/pairwise_metric.rb +0 -152
  116. data/lib/rumale/pipeline/feature_union.rb +0 -69
  117. data/lib/rumale/pipeline/pipeline.rb +0 -175
  118. data/lib/rumale/preprocessing/bin_discretizer.rb +0 -93
  119. data/lib/rumale/preprocessing/binarizer.rb +0 -60
  120. data/lib/rumale/preprocessing/kernel_calculator.rb +0 -92
  121. data/lib/rumale/preprocessing/l1_normalizer.rb +0 -62
  122. data/lib/rumale/preprocessing/l2_normalizer.rb +0 -63
  123. data/lib/rumale/preprocessing/label_binarizer.rb +0 -89
  124. data/lib/rumale/preprocessing/label_encoder.rb +0 -79
  125. data/lib/rumale/preprocessing/max_abs_scaler.rb +0 -61
  126. data/lib/rumale/preprocessing/max_normalizer.rb +0 -62
  127. data/lib/rumale/preprocessing/min_max_scaler.rb +0 -76
  128. data/lib/rumale/preprocessing/one_hot_encoder.rb +0 -100
  129. data/lib/rumale/preprocessing/ordinal_encoder.rb +0 -109
  130. data/lib/rumale/preprocessing/polynomial_features.rb +0 -109
  131. data/lib/rumale/preprocessing/standard_scaler.rb +0 -71
  132. data/lib/rumale/probabilistic_output.rb +0 -114
  133. data/lib/rumale/tree/base_decision_tree.rb +0 -150
  134. data/lib/rumale/tree/decision_tree_classifier.rb +0 -150
  135. data/lib/rumale/tree/decision_tree_regressor.rb +0 -116
  136. data/lib/rumale/tree/extra_tree_classifier.rb +0 -107
  137. data/lib/rumale/tree/extra_tree_regressor.rb +0 -94
  138. data/lib/rumale/tree/gradient_tree_regressor.rb +0 -202
  139. data/lib/rumale/tree/node.rb +0 -39
  140. data/lib/rumale/utils.rb +0 -42
  141. data/lib/rumale/validation.rb +0 -128
  142. data/lib/rumale/values.rb +0 -13
@@ -1,152 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/validation'
4
-
5
- module Rumale
6
- # Module for calculating pairwise distances, similarities, and kernels.
7
- module PairwiseMetric
8
- class << self
9
- # Calculate the pairwise euclidean distances between x and y.
10
- #
11
- # @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
12
- # @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
13
- # @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
14
- def euclidean_distance(x, y = nil)
15
- y = x if y.nil?
16
- x = Rumale::Validation.check_convert_sample_array(x)
17
- y = Rumale::Validation.check_convert_sample_array(y)
18
- Numo::NMath.sqrt(squared_error(x, y).abs)
19
- end
20
-
21
- # Calculate the pairwise manhattan distances between x and y.
22
- #
23
- # @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
24
- # @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
25
- # @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
26
- def manhattan_distance(x, y = nil)
27
- y = x if y.nil?
28
- x = Rumale::Validation.check_convert_sample_array(x)
29
- y = Rumale::Validation.check_convert_sample_array(y)
30
- n_samples_x = x.shape[0]
31
- n_samples_y = y.shape[0]
32
- distance_mat = Numo::DFloat.zeros(n_samples_x, n_samples_y)
33
- n_samples_x.times do |n|
34
- distance_mat[n, true] = (y - x[n, true]).abs.sum(axis: 1)
35
- end
36
- distance_mat
37
- end
38
-
39
- # Calculate the pairwise squared errors between x and y.
40
- #
41
- # @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
42
- # @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
43
- # @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
44
- def squared_error(x, y = nil)
45
- y_not_given = y.nil?
46
- y = x if y_not_given
47
- x = Rumale::Validation.check_convert_sample_array(x)
48
- y = Rumale::Validation.check_convert_sample_array(y) unless y_not_given
49
- sum_x_vec = (x**2).sum(1).expand_dims(1)
50
- sum_y_vec = y_not_given ? sum_x_vec.transpose : (y**2).sum(1).expand_dims(1).transpose
51
- err_mat = -2 * x.dot(y.transpose)
52
- err_mat += sum_x_vec
53
- err_mat += sum_y_vec
54
- err_mat.class.maximum(err_mat, 0)
55
- end
56
-
57
- # Calculate the pairwise cosine simlarities between x and y.
58
- #
59
- # @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
60
- # @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
61
- # @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
62
- def cosine_similarity(x, y = nil)
63
- y_not_given = y.nil?
64
- x = Rumale::Validation.check_convert_sample_array(x)
65
- y = Rumale::Validation.check_convert_sample_array(y) unless y_not_given
66
- x_norm = Numo::NMath.sqrt((x**2).sum(1))
67
- x_norm[x_norm.eq(0)] = 1
68
- x /= x_norm.expand_dims(1)
69
- if y_not_given
70
- x.dot(x.transpose)
71
- else
72
- y_norm = Numo::NMath.sqrt((y**2).sum(1))
73
- y_norm[y_norm.eq(0)] = 1
74
- y /= y_norm.expand_dims(1)
75
- x.dot(y.transpose)
76
- end
77
- end
78
-
79
- # Calculate the pairwise cosine distances between x and y.
80
- #
81
- # @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
82
- # @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
83
- # @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
84
- def cosine_distance(x, y = nil)
85
- dist_mat = 1 - cosine_similarity(x, y)
86
- dist_mat[dist_mat.diag_indices] = 0 if y.nil?
87
- dist_mat.clip(0, 2)
88
- end
89
-
90
- # Calculate the rbf kernel between x and y.
91
- #
92
- # @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
93
- # @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
94
- # @param gamma [Float] The parameter of rbf kernel, if nil it is 1 / n_features.
95
- # @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
96
- def rbf_kernel(x, y = nil, gamma = nil)
97
- y_not_given = y.nil?
98
- y = x if y_not_given
99
- x = Rumale::Validation.check_convert_sample_array(x)
100
- y = Rumale::Validation.check_convert_sample_array(y) unless y_not_given
101
- gamma ||= 1.0 / x.shape[1]
102
- Rumale::Validation.check_params_numeric(gamma: gamma)
103
- Numo::NMath.exp(-gamma * squared_error(x, y))
104
- end
105
-
106
- # Calculate the linear kernel between x and y.
107
- #
108
- # @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
109
- # @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
110
- # @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
111
- def linear_kernel(x, y = nil)
112
- y = x if y.nil?
113
- x = Rumale::Validation.check_convert_sample_array(x)
114
- y = Rumale::Validation.check_convert_sample_array(y)
115
- x.dot(y.transpose)
116
- end
117
-
118
- # Calculate the polynomial kernel between x and y.
119
- #
120
- # @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
121
- # @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
122
- # @param degree [Integer] The parameter of polynomial kernel.
123
- # @param gamma [Float] The parameter of polynomial kernel, if nil it is 1 / n_features.
124
- # @param coef [Integer] The parameter of polynomial kernel.
125
- # @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
126
- def polynomial_kernel(x, y = nil, degree = 3, gamma = nil, coef = 1) # rubocop:disable Metrics/ParameterLists
127
- y = x if y.nil?
128
- gamma ||= 1.0 / x.shape[1]
129
- x = Rumale::Validation.check_convert_sample_array(x)
130
- y = Rumale::Validation.check_convert_sample_array(y)
131
- Rumale::Validation.check_params_numeric(gamma: gamma, degree: degree, coef: coef)
132
- (x.dot(y.transpose) * gamma + coef)**degree
133
- end
134
-
135
- # Calculate the sigmoid kernel between x and y.
136
- #
137
- # @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
138
- # @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
139
- # @param gamma [Float] The parameter of polynomial kernel, if nil it is 1 / n_features.
140
- # @param coef [Integer] The parameter of polynomial kernel.
141
- # @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
142
- def sigmoid_kernel(x, y = nil, gamma = nil, coef = 1)
143
- y = x if y.nil?
144
- gamma ||= 1.0 / x.shape[1]
145
- x = Rumale::Validation.check_convert_sample_array(x)
146
- y = Rumale::Validation.check_convert_sample_array(y)
147
- Rumale::Validation.check_params_numeric(gamma: gamma, coef: coef)
148
- Numo::NMath.tanh(x.dot(y.transpose) * gamma + coef)
149
- end
150
- end
151
- end
152
- end
@@ -1,69 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/validation'
4
- require 'rumale/base/base_estimator'
5
-
6
- module Rumale
7
- module Pipeline
8
- # FeatureUnion is a class that implements the function concatenating the multi-transformer results.
9
- #
10
- # @example
11
- # fu = Rumale::Pipeline::FeatureUnion.new(
12
- # transformers: {
13
- # 'rbf': Rumale::KernelApproximation::RBF.new(gamma: 1.0, n_components: 96, random_seed: 1),
14
- # 'pca': Rumale::Decomposition::PCA.new(n_components: 32)
15
- # }
16
- # )
17
- # fu.fit(training_samples, traininig_labels)
18
- # results = fu.predict(testing_samples)
19
- #
20
- # # > p results.shape[1]
21
- # # > 128
22
- #
23
- class FeatureUnion
24
- include Base::BaseEstimator
25
- include Validation
26
-
27
- # Return the transformers
28
- # @return [Hash]
29
- attr_reader :transformers
30
-
31
- # Create a new feature union.
32
- #
33
- # @param transformers [Hash] List of transformers. The order of transforms follows the insertion order of hash keys.
34
- def initialize(transformers:)
35
- check_params_type(Hash, transformers: transformers)
36
- @params = {}
37
- @transformers = transformers
38
- end
39
-
40
- # Fit the model with given training data.
41
- #
42
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the transformers.
43
- # @param y [Numo::NArray/Nil] (shape: [n_samples, n_outputs]) The target values or labels to be used for fitting the transformers.
44
- # @return [FeatureUnion] The learned feature union itself.
45
- def fit(x, y = nil)
46
- @transformers.each { |_k, t| t.fit(x, y) }
47
- self
48
- end
49
-
50
- # Fit the model with training data, and then transform them with the learned model.
51
- #
52
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the transformers.
53
- # @param y [Numo::NArray/Nil] (shape: [n_samples, n_outputs]) The target values or labels to be used for fitting the transformers.
54
- # @return [Numo::DFloat] (shape: [n_samples, sum_n_components]) The transformed and concatenated data.
55
- def fit_transform(x, y = nil)
56
- fit(x, y).transform(x)
57
- end
58
-
59
- # Transform the given data with the learned model.
60
- #
61
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned transformers.
62
- # @return [Numo::DFloat] (shape: [n_samples, sum_n_components]) The transformed and concatenated data.
63
- def transform(x)
64
- z = @transformers.values.map { |t| t.transform(x) }
65
- Numo::NArray.hstack(z)
66
- end
67
- end
68
- end
69
- end
@@ -1,175 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/validation'
4
- require 'rumale/base/base_estimator'
5
-
6
- module Rumale
7
- # Module implements utilities of pipeline that cosists of a chain of transfomers and estimators.
8
- module Pipeline
9
- # Pipeline is a class that implements the function to perform the transformers and estimators sequencially.
10
- #
11
- # @example
12
- # rbf = Rumale::KernelApproximation::RBF.new(gamma: 1.0, n_components: 128, random_seed: 1)
13
- # svc = Rumale::LinearModel::SVC.new(reg_param: 1.0, fit_bias: true, max_iter: 5000, random_seed: 1)
14
- # pipeline = Rumale::Pipeline::Pipeline.new(steps: { trs: rbf, est: svc })
15
- # pipeline.fit(training_samples, traininig_labels)
16
- # results = pipeline.predict(testing_samples)
17
- #
18
- class Pipeline
19
- include Base::BaseEstimator
20
- include Validation
21
-
22
- # Return the steps.
23
- # @return [Hash]
24
- attr_reader :steps
25
-
26
- # Create a new pipeline.
27
- #
28
- # @param steps [Hash] List of transformers and estimators. The order of transforms follows the insertion order of hash keys.
29
- # The last entry is considered an estimator.
30
- def initialize(steps:)
31
- check_params_type(Hash, steps: steps)
32
- validate_steps(steps)
33
- @params = {}
34
- @steps = steps
35
- end
36
-
37
- # Fit the model with given training data.
38
- #
39
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be transformed and used for fitting the model.
40
- # @param y [Numo::NArray] (shape: [n_samples, n_outputs]) The target values or labels to be used for fitting the model.
41
- # @return [Pipeline] The learned pipeline itself.
42
- def fit(x, y)
43
- trans_x = apply_transforms(x, y, fit: true)
44
- last_estimator&.fit(trans_x, y)
45
- self
46
- end
47
-
48
- # Call the fit_predict method of last estimator after applying all transforms.
49
- #
50
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be transformed and used for fitting the model.
51
- # @param y [Numo::NArray] (shape: [n_samples, n_outputs], default: nil) The target values or labels to be used for fitting the model.
52
- # @return [Numo::NArray] The predicted results by last estimator.
53
- def fit_predict(x, y = nil)
54
- trans_x = apply_transforms(x, y, fit: true)
55
- last_estimator.fit_predict(trans_x)
56
- end
57
-
58
- # Call the fit_transform method of last estimator after applying all transforms.
59
- #
60
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be transformed and used for fitting the model.
61
- # @param y [Numo::NArray] (shape: [n_samples, n_outputs], default: nil) The target values or labels to be used for fitting the model.
62
- # @return [Numo::NArray] The predicted results by last estimator.
63
- def fit_transform(x, y = nil)
64
- trans_x = apply_transforms(x, y, fit: true)
65
- last_estimator.fit_transform(trans_x, y)
66
- end
67
-
68
- # Call the decision_function method of last estimator after applying all transforms.
69
- #
70
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
71
- # @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
72
- def decision_function(x)
73
- trans_x = apply_transforms(x)
74
- last_estimator.decision_function(trans_x)
75
- end
76
-
77
- # Call the predict method of last estimator after applying all transforms.
78
- #
79
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to obtain prediction result.
80
- # @return [Numo::NArray] The predicted results by last estimator.
81
- def predict(x)
82
- trans_x = apply_transforms(x)
83
- last_estimator.predict(trans_x)
84
- end
85
-
86
- # Call the predict_log_proba method of last estimator after applying all transforms.
87
- #
88
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
89
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
90
- def predict_log_proba(x)
91
- trans_x = apply_transforms(x)
92
- last_estimator.predict_log_proba(trans_x)
93
- end
94
-
95
- # Call the predict_proba method of last estimator after applying all transforms.
96
- #
97
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
98
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
99
- def predict_proba(x)
100
- trans_x = apply_transforms(x)
101
- last_estimator.predict_proba(trans_x)
102
- end
103
-
104
- # Call the transform method of last estimator after applying all transforms.
105
- #
106
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed.
107
- # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed samples.
108
- def transform(x)
109
- trans_x = apply_transforms(x)
110
- last_estimator.nil? ? trans_x : last_estimator.transform(trans_x)
111
- end
112
-
113
- # Call the inverse_transform method in reverse order.
114
- #
115
- # @param z [Numo::DFloat] (shape: [n_samples, n_components]) The transformed samples to be restored into original space.
116
- # @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored samples.
117
- def inverse_transform(z)
118
- itrans_z = z
119
- @steps.keys.reverse_each do |name|
120
- transformer = @steps[name]
121
- next if transformer.nil?
122
-
123
- itrans_z = transformer.inverse_transform(itrans_z)
124
- end
125
- itrans_z
126
- end
127
-
128
- # Call the score method of last estimator after applying all transforms.
129
- #
130
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
131
- # @param y [Numo::NArray] (shape: [n_samples, n_outputs]) True target values or labels for testing data.
132
- # @return [Float] The score of last estimator
133
- def score(x, y)
134
- trans_x = apply_transforms(x)
135
- last_estimator.score(trans_x, y)
136
- end
137
-
138
- private
139
-
140
- def validate_steps(steps)
141
- steps.keys[0...-1].each do |name|
142
- transformer = steps[name]
143
- next if transformer.nil? || (transformer.class.method_defined?(:fit) && transformer.class.method_defined?(:transform))
144
-
145
- raise TypeError,
146
- 'Class of intermediate step in pipeline should be implemented fit and transform methods: ' \
147
- "#{name} => #{transformer.class}"
148
- end
149
-
150
- estimator = steps[steps.keys.last]
151
- unless estimator.nil? || estimator.class.method_defined?(:fit) # rubocop:disable Style/GuardClause
152
- raise TypeError,
153
- 'Class of last step in pipeline should be implemented fit method: ' \
154
- "#{steps.keys.last} => #{estimator.class}"
155
- end
156
- end
157
-
158
- def apply_transforms(x, y = nil, fit: false)
159
- trans_x = x
160
- @steps.keys[0...-1].each do |name|
161
- transformer = @steps[name]
162
- next if transformer.nil?
163
-
164
- transformer.fit(trans_x, y) if fit
165
- trans_x = transformer.transform(trans_x)
166
- end
167
- trans_x
168
- end
169
-
170
- def last_estimator
171
- @steps[@steps.keys.last]
172
- end
173
- end
174
- end
175
- end
@@ -1,93 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/transformer'
5
-
6
- module Rumale
7
- module Preprocessing
8
- # Discretizes features with a given number of bins.
9
- # In some cases, discretizing features may accelerate decision tree training.
10
- #
11
- # @example
12
- # discretizer = Rumale::Preprocessing::BinDiscretizer.new(n_bins: 4)
13
- # samples = Numo::DFloat.new(5, 2).rand - 0.5
14
- # transformed = discretizer.fit_transform(samples)
15
- # # > pp samples
16
- # # Numo::DFloat#shape=[5,2]
17
- # # [[-0.438246, -0.126933],
18
- # # [ 0.294815, -0.298958],
19
- # # [-0.383959, -0.155968],
20
- # # [ 0.039948, 0.237815],
21
- # # [-0.334911, -0.449117]]
22
- # # > pp transformed
23
- # # Numo::DFloat#shape=[5,2]
24
- # # [[0, 1],
25
- # # [3, 0],
26
- # # [0, 1],
27
- # # [2, 3],
28
- # # [0, 0]]
29
- class BinDiscretizer
30
- include Base::BaseEstimator
31
- include Base::Transformer
32
-
33
- # Return the feature steps to be used discretizing.
34
- # @return [Array<Numo::DFloat>] (shape: [n_features, n_bins])
35
- attr_reader :feature_steps
36
-
37
- # Create a new discretizer for features with given number of bins.
38
- #
39
- # @param n_bins [Integer] The number of bins to be used disretizing feature values.
40
- def initialize(n_bins: 32)
41
- @params = {}
42
- @params[:n_bins] = n_bins
43
- @feature_steps = nil
44
- end
45
-
46
- # Fit feature ranges to be discretized.
47
- #
48
- # @overload fit(x) -> BinDiscretizer
49
- #
50
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the feature ranges.
51
- # @return [BinDiscretizer]
52
- def fit(x, _y = nil)
53
- x = check_convert_sample_array(x)
54
- n_features = x.shape[1]
55
- max_vals = x.max(0)
56
- min_vals = x.min(0)
57
- @feature_steps = Array.new(n_features) do |n|
58
- Numo::DFloat.linspace(min_vals[n], max_vals[n], @params[:n_bins] + 1)[0...@params[:n_bins]]
59
- end
60
- self
61
- end
62
-
63
- # Fit feature ranges to be discretized, then return discretized samples.
64
- #
65
- # @overload fit_transform(x) -> Numo::DFloat
66
- #
67
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be discretized.
68
- # @return [Numo::DFloat] The discretized samples.
69
- def fit_transform(x, _y = nil)
70
- x = check_convert_sample_array(x)
71
- fit(x).transform(x)
72
- end
73
-
74
- # Peform discretizing the given samples.
75
- #
76
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be discretized.
77
- # @return [Numo::DFloat] The discretized samples.
78
- def transform(x)
79
- x = check_convert_sample_array(x)
80
- n_samples, n_features = x.shape
81
- transformed = Numo::DFloat.zeros(n_samples, n_features)
82
- n_features.times do |n|
83
- steps = @feature_steps[n]
84
- @params[:n_bins].times do |bin|
85
- mask = x[true, n].ge(steps[bin]).where
86
- transformed[mask, n] = bin
87
- end
88
- end
89
- transformed
90
- end
91
- end
92
- end
93
- end
@@ -1,60 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/transformer'
5
-
6
- module Rumale
7
- module Preprocessing
8
- # Binarize samples according to a threshold
9
- #
10
- # @example
11
- # binarizer = Rumale::Preprocessing::Binarizer.new
12
- # x = Numo::DFloat[[-1.2, 3.2], [2.4, -0.5], [4.5, 0.8]]
13
- # b = binarizer.transform(x)
14
- # p b
15
- #
16
- # # Numo::DFloat#shape=[3, 2]
17
- # # [[0, 1],
18
- # # [1, 0],
19
- # # [1, 1]]
20
- class Binarizer
21
- include Base::BaseEstimator
22
- include Base::Transformer
23
-
24
- # Create a new transformer for binarization.
25
- # @param threshold [Float] The threshold value for binarization.
26
- def initialize(threshold: 0.0)
27
- check_params_numeric(threshold: threshold)
28
- @params = { threshold: threshold }
29
- end
30
-
31
- # This method does nothing and returns the object itself.
32
- # For compatibility with other transformer, this method exists.
33
- #
34
- # @overload fit() -> Binarizer
35
- #
36
- # @return [Binarizer]
37
- def fit(_x = nil, _y = nil)
38
- self
39
- end
40
-
41
- # Binarize each sample.
42
- #
43
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be binarized.
44
- # @return [Numo::DFloat] The binarized samples.
45
- def transform(x)
46
- x = check_convert_sample_array(x)
47
- x.class.cast(x.gt(@params[:threshold]))
48
- end
49
-
50
- # The output of this method is the same as that of the transform method.
51
- # For compatibility with other transformer, this method exists.
52
- #
53
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be binarized.
54
- # @return [Numo::DFloat] The binarized samples.
55
- def fit_transform(x, _y = nil)
56
- fit(x).transform(x)
57
- end
58
- end
59
- end
60
- end
@@ -1,92 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/transformer'
5
- require 'rumale/pairwise_metric'
6
-
7
- module Rumale
8
- module Preprocessing
9
- # KernelCalculator is a class that calculates the kernel matrix with training data.
10
- #
11
- # @example
12
- # transformer = Rumale::Preprocessing::KernelCalculator.new(kernel: 'rbf', gamma: 0.5)
13
- # regressor = Rumale::KernelMachine::KernelRidge.new
14
- # pipeline = Rumale::Pipeline::Pipeline.new(
15
- # steps: { trs: transfomer, est: regressor }
16
- # )
17
- # pipeline.fit(x_train, y_train)
18
- # results = pipeline.predict(x_test)
19
- class KernelCalculator
20
- include Base::BaseEstimator
21
- include Base::Transformer
22
-
23
- # Returns the training data for calculating kernel matrix.
24
- # @return [Numo::DFloat] (shape: n_components, n_features)
25
- attr_reader :components
26
-
27
- # Create a new transformer that transforms feature vectors into a kernel matrix.
28
- #
29
- # @param kernel [String] The type of kernel function ('rbf', 'linear', 'poly', and 'sigmoid').
30
- # @param gamma [Float] The gamma parameter in rbf/poly/sigmoid kernel function.
31
- # @param degree [Integer] The degree parameter in polynomial kernel function.
32
- # @param coef [Float] The coefficient in poly/sigmoid kernel function.
33
- def initialize(kernel: 'rbf', gamma: 1, degree: 3, coef: 1)
34
- check_params_string(kernel: kernel)
35
- check_params_numeric(gamma: gamma, coef: coef, degree: degree)
36
- @params = {}
37
- @params[:kernel] = kernel
38
- @params[:gamma] = gamma
39
- @params[:degree] = degree
40
- @params[:coef] = coef
41
- @components = nil
42
- end
43
-
44
- # Fit the model with given training data.
45
- #
46
- # @overload fit(x) -> KernelCalculator
47
- # @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for calculating kernel matrix.
48
- # @return [KernelCalculator] The learned transformer itself.
49
- def fit(x, _y = nil)
50
- x = check_convert_sample_array(x)
51
- @components = x.dup
52
- self
53
- end
54
-
55
- # Fit the model with training data, and then transform them with the learned model.
56
- #
57
- # @overload fit_transform(x) -> Numo::DFloat
58
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for calculating kernel matrix.
59
- # @return [Numo::DFloat] (shape: [n_samples, n_samples]) The calculated kernel matrix.
60
- def fit_transform(x, y = nil)
61
- x = check_convert_sample_array(x)
62
- fit(x, y).transform(x)
63
- end
64
-
65
- # Transform the given data with the learned model.
66
- #
67
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be used for calculating kernel matrix with the training data.
68
- # @return [Numo::DFloat] (shape: [n_samples, n_components]) The calculated kernel matrix.
69
- def transform(x)
70
- x = check_convert_sample_array(x)
71
- kernel_mat(x, @components)
72
- end
73
-
74
- private
75
-
76
- def kernel_mat(x, y)
77
- case @params[:kernel]
78
- when 'rbf'
79
- Rumale::PairwiseMetric.rbf_kernel(x, y, @params[:gamma])
80
- when 'poly'
81
- Rumale::PairwiseMetric.polynomial_kernel(x, y, @params[:degree], @params[:gamma], @params[:coef])
82
- when 'sigmoid'
83
- Rumale::PairwiseMetric.sigmoid_kernel(x, y, @params[:gamma], @params[:coef])
84
- when 'linear'
85
- Rumale::PairwiseMetric.linear_kernel(x, y)
86
- else
87
- raise ArgumentError, "Expect kernel parameter to be given 'rbf', 'linear', 'poly', or 'sigmoid'."
88
- end
89
- end
90
- end
91
- end
92
- end