rumale 0.23.3 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.txt +5 -1
  3. data/README.md +3 -288
  4. data/lib/rumale/version.rb +1 -1
  5. data/lib/rumale.rb +20 -131
  6. metadata +252 -150
  7. data/CHANGELOG.md +0 -643
  8. data/CODE_OF_CONDUCT.md +0 -74
  9. data/ext/rumale/extconf.rb +0 -37
  10. data/ext/rumale/rumaleext.c +0 -545
  11. data/ext/rumale/rumaleext.h +0 -12
  12. data/lib/rumale/base/base_estimator.rb +0 -49
  13. data/lib/rumale/base/classifier.rb +0 -36
  14. data/lib/rumale/base/cluster_analyzer.rb +0 -31
  15. data/lib/rumale/base/evaluator.rb +0 -17
  16. data/lib/rumale/base/regressor.rb +0 -36
  17. data/lib/rumale/base/splitter.rb +0 -21
  18. data/lib/rumale/base/transformer.rb +0 -22
  19. data/lib/rumale/clustering/dbscan.rb +0 -123
  20. data/lib/rumale/clustering/gaussian_mixture.rb +0 -218
  21. data/lib/rumale/clustering/hdbscan.rb +0 -291
  22. data/lib/rumale/clustering/k_means.rb +0 -122
  23. data/lib/rumale/clustering/k_medoids.rb +0 -141
  24. data/lib/rumale/clustering/mini_batch_k_means.rb +0 -139
  25. data/lib/rumale/clustering/power_iteration.rb +0 -127
  26. data/lib/rumale/clustering/single_linkage.rb +0 -203
  27. data/lib/rumale/clustering/snn.rb +0 -76
  28. data/lib/rumale/clustering/spectral_clustering.rb +0 -115
  29. data/lib/rumale/dataset.rb +0 -246
  30. data/lib/rumale/decomposition/factor_analysis.rb +0 -150
  31. data/lib/rumale/decomposition/fast_ica.rb +0 -188
  32. data/lib/rumale/decomposition/nmf.rb +0 -124
  33. data/lib/rumale/decomposition/pca.rb +0 -159
  34. data/lib/rumale/ensemble/ada_boost_classifier.rb +0 -179
  35. data/lib/rumale/ensemble/ada_boost_regressor.rb +0 -160
  36. data/lib/rumale/ensemble/extra_trees_classifier.rb +0 -139
  37. data/lib/rumale/ensemble/extra_trees_regressor.rb +0 -125
  38. data/lib/rumale/ensemble/gradient_boosting_classifier.rb +0 -306
  39. data/lib/rumale/ensemble/gradient_boosting_regressor.rb +0 -237
  40. data/lib/rumale/ensemble/random_forest_classifier.rb +0 -189
  41. data/lib/rumale/ensemble/random_forest_regressor.rb +0 -153
  42. data/lib/rumale/ensemble/stacking_classifier.rb +0 -215
  43. data/lib/rumale/ensemble/stacking_regressor.rb +0 -163
  44. data/lib/rumale/ensemble/voting_classifier.rb +0 -126
  45. data/lib/rumale/ensemble/voting_regressor.rb +0 -82
  46. data/lib/rumale/evaluation_measure/accuracy.rb +0 -29
  47. data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +0 -74
  48. data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +0 -56
  49. data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +0 -53
  50. data/lib/rumale/evaluation_measure/explained_variance_score.rb +0 -39
  51. data/lib/rumale/evaluation_measure/f_score.rb +0 -50
  52. data/lib/rumale/evaluation_measure/function.rb +0 -147
  53. data/lib/rumale/evaluation_measure/log_loss.rb +0 -45
  54. data/lib/rumale/evaluation_measure/mean_absolute_error.rb +0 -29
  55. data/lib/rumale/evaluation_measure/mean_squared_error.rb +0 -29
  56. data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +0 -29
  57. data/lib/rumale/evaluation_measure/median_absolute_error.rb +0 -30
  58. data/lib/rumale/evaluation_measure/mutual_information.rb +0 -49
  59. data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +0 -53
  60. data/lib/rumale/evaluation_measure/precision.rb +0 -50
  61. data/lib/rumale/evaluation_measure/precision_recall.rb +0 -96
  62. data/lib/rumale/evaluation_measure/purity.rb +0 -40
  63. data/lib/rumale/evaluation_measure/r2_score.rb +0 -43
  64. data/lib/rumale/evaluation_measure/recall.rb +0 -50
  65. data/lib/rumale/evaluation_measure/roc_auc.rb +0 -130
  66. data/lib/rumale/evaluation_measure/silhouette_score.rb +0 -82
  67. data/lib/rumale/feature_extraction/feature_hasher.rb +0 -110
  68. data/lib/rumale/feature_extraction/hash_vectorizer.rb +0 -155
  69. data/lib/rumale/feature_extraction/tfidf_transformer.rb +0 -113
  70. data/lib/rumale/kernel_approximation/nystroem.rb +0 -126
  71. data/lib/rumale/kernel_approximation/rbf.rb +0 -102
  72. data/lib/rumale/kernel_machine/kernel_fda.rb +0 -120
  73. data/lib/rumale/kernel_machine/kernel_pca.rb +0 -97
  74. data/lib/rumale/kernel_machine/kernel_ridge.rb +0 -82
  75. data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +0 -92
  76. data/lib/rumale/kernel_machine/kernel_svc.rb +0 -193
  77. data/lib/rumale/linear_model/base_sgd.rb +0 -285
  78. data/lib/rumale/linear_model/elastic_net.rb +0 -119
  79. data/lib/rumale/linear_model/lasso.rb +0 -115
  80. data/lib/rumale/linear_model/linear_regression.rb +0 -201
  81. data/lib/rumale/linear_model/logistic_regression.rb +0 -275
  82. data/lib/rumale/linear_model/nnls.rb +0 -137
  83. data/lib/rumale/linear_model/ridge.rb +0 -209
  84. data/lib/rumale/linear_model/svc.rb +0 -213
  85. data/lib/rumale/linear_model/svr.rb +0 -132
  86. data/lib/rumale/manifold/mds.rb +0 -155
  87. data/lib/rumale/manifold/tsne.rb +0 -222
  88. data/lib/rumale/metric_learning/fisher_discriminant_analysis.rb +0 -113
  89. data/lib/rumale/metric_learning/mlkr.rb +0 -161
  90. data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +0 -167
  91. data/lib/rumale/model_selection/cross_validation.rb +0 -125
  92. data/lib/rumale/model_selection/function.rb +0 -42
  93. data/lib/rumale/model_selection/grid_search_cv.rb +0 -225
  94. data/lib/rumale/model_selection/group_k_fold.rb +0 -93
  95. data/lib/rumale/model_selection/group_shuffle_split.rb +0 -115
  96. data/lib/rumale/model_selection/k_fold.rb +0 -81
  97. data/lib/rumale/model_selection/shuffle_split.rb +0 -90
  98. data/lib/rumale/model_selection/stratified_k_fold.rb +0 -99
  99. data/lib/rumale/model_selection/stratified_shuffle_split.rb +0 -118
  100. data/lib/rumale/model_selection/time_series_split.rb +0 -91
  101. data/lib/rumale/multiclass/one_vs_rest_classifier.rb +0 -83
  102. data/lib/rumale/naive_bayes/base_naive_bayes.rb +0 -47
  103. data/lib/rumale/naive_bayes/bernoulli_nb.rb +0 -82
  104. data/lib/rumale/naive_bayes/complement_nb.rb +0 -85
  105. data/lib/rumale/naive_bayes/gaussian_nb.rb +0 -69
  106. data/lib/rumale/naive_bayes/multinomial_nb.rb +0 -74
  107. data/lib/rumale/naive_bayes/negation_nb.rb +0 -71
  108. data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +0 -133
  109. data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +0 -108
  110. data/lib/rumale/nearest_neighbors/vp_tree.rb +0 -132
  111. data/lib/rumale/neural_network/adam.rb +0 -56
  112. data/lib/rumale/neural_network/base_mlp.rb +0 -248
  113. data/lib/rumale/neural_network/mlp_classifier.rb +0 -120
  114. data/lib/rumale/neural_network/mlp_regressor.rb +0 -90
  115. data/lib/rumale/pairwise_metric.rb +0 -152
  116. data/lib/rumale/pipeline/feature_union.rb +0 -69
  117. data/lib/rumale/pipeline/pipeline.rb +0 -175
  118. data/lib/rumale/preprocessing/bin_discretizer.rb +0 -93
  119. data/lib/rumale/preprocessing/binarizer.rb +0 -60
  120. data/lib/rumale/preprocessing/kernel_calculator.rb +0 -92
  121. data/lib/rumale/preprocessing/l1_normalizer.rb +0 -62
  122. data/lib/rumale/preprocessing/l2_normalizer.rb +0 -63
  123. data/lib/rumale/preprocessing/label_binarizer.rb +0 -89
  124. data/lib/rumale/preprocessing/label_encoder.rb +0 -79
  125. data/lib/rumale/preprocessing/max_abs_scaler.rb +0 -61
  126. data/lib/rumale/preprocessing/max_normalizer.rb +0 -62
  127. data/lib/rumale/preprocessing/min_max_scaler.rb +0 -76
  128. data/lib/rumale/preprocessing/one_hot_encoder.rb +0 -100
  129. data/lib/rumale/preprocessing/ordinal_encoder.rb +0 -109
  130. data/lib/rumale/preprocessing/polynomial_features.rb +0 -109
  131. data/lib/rumale/preprocessing/standard_scaler.rb +0 -71
  132. data/lib/rumale/probabilistic_output.rb +0 -114
  133. data/lib/rumale/tree/base_decision_tree.rb +0 -150
  134. data/lib/rumale/tree/decision_tree_classifier.rb +0 -150
  135. data/lib/rumale/tree/decision_tree_regressor.rb +0 -116
  136. data/lib/rumale/tree/extra_tree_classifier.rb +0 -107
  137. data/lib/rumale/tree/extra_tree_regressor.rb +0 -94
  138. data/lib/rumale/tree/gradient_tree_regressor.rb +0 -202
  139. data/lib/rumale/tree/node.rb +0 -39
  140. data/lib/rumale/utils.rb +0 -42
  141. data/lib/rumale/validation.rb +0 -128
  142. data/lib/rumale/values.rb +0 -13
@@ -1,62 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/transformer'
5
-
6
- module Rumale
7
- module Preprocessing
8
- # Normalize samples to unit L1-norm.
9
- #
10
- # @example
11
- # normalizer = Rumale::Preprocessing::L1Normalizer.new
12
- # new_samples = normalizer.fit_transform(samples)
13
- class L1Normalizer
14
- include Base::BaseEstimator
15
- include Base::Transformer
16
-
17
- # Return the vector consists of L1-norm for each sample.
18
- # @return [Numo::DFloat] (shape: [n_samples])
19
- attr_reader :norm_vec # :nodoc:
20
-
21
- # Create a new normalizer for normaliing to L1-norm.
22
- def initialize
23
- @params = {}
24
- @norm_vec = nil
25
- end
26
-
27
- # Calculate L1-norms of each sample.
28
- #
29
- # @overload fit(x) -> L1Normalizer
30
- #
31
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
32
- # @return [L1Normalizer]
33
- def fit(x, _y = nil)
34
- x = check_convert_sample_array(x)
35
- @norm_vec = x.abs.sum(1)
36
- @norm_vec[@norm_vec.eq(0)] = 1
37
- self
38
- end
39
-
40
- # Calculate L1-norms of each sample, and then normalize samples to L1-norm.
41
- #
42
- # @overload fit_transform(x) -> Numo::DFloat
43
- #
44
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
45
- # @return [Numo::DFloat] The normalized samples.
46
- def fit_transform(x, _y = nil)
47
- x = check_convert_sample_array(x)
48
- fit(x)
49
- x / @norm_vec.expand_dims(1)
50
- end
51
-
52
- # Calculate L1-norms of each sample, and then normalize samples to L1-norm.
53
- # This method calls the fit_transform method. This method exists for the Pipeline class.
54
- #
55
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
56
- # @return [Numo::DFloat] The normalized samples.
57
- def transform(x)
58
- fit_transform(x)
59
- end
60
- end
61
- end
62
- end
@@ -1,63 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/transformer'
5
-
6
- module Rumale
7
- # This module consists of the classes that perform preprocessings.
8
- module Preprocessing
9
- # Normalize samples to unit L2-norm.
10
- #
11
- # @example
12
- # normalizer = Rumale::Preprocessing::L2Normalizer.new
13
- # new_samples = normalizer.fit_transform(samples)
14
- class L2Normalizer
15
- include Base::BaseEstimator
16
- include Base::Transformer
17
-
18
- # Return the vector consists of L2-norm for each sample.
19
- # @return [Numo::DFloat] (shape: [n_samples])
20
- attr_reader :norm_vec # :nodoc:
21
-
22
- # Create a new normalizer for normaliing to unit L2-norm.
23
- def initialize
24
- @params = {}
25
- @norm_vec = nil
26
- end
27
-
28
- # Calculate L2-norms of each sample.
29
- #
30
- # @overload fit(x) -> L2Normalizer
31
- #
32
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
33
- # @return [L2Normalizer]
34
- def fit(x, _y = nil)
35
- x = check_convert_sample_array(x)
36
- @norm_vec = Numo::NMath.sqrt((x**2).sum(1))
37
- @norm_vec[@norm_vec.eq(0)] = 1
38
- self
39
- end
40
-
41
- # Calculate L2-norms of each sample, and then normalize samples to unit L2-norm.
42
- #
43
- # @overload fit_transform(x) -> Numo::DFloat
44
- #
45
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
46
- # @return [Numo::DFloat] The normalized samples.
47
- def fit_transform(x, _y = nil)
48
- x = check_convert_sample_array(x)
49
- fit(x)
50
- x / @norm_vec.expand_dims(1)
51
- end
52
-
53
- # Calculate L2-norms of each sample, and then normalize samples to unit L2-norm.
54
- # This method calls the fit_transform method. This method exists for the Pipeline class.
55
- #
56
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
57
- # @return [Numo::DFloat] The normalized samples.
58
- def transform(x)
59
- fit_transform(x)
60
- end
61
- end
62
- end
63
- end
@@ -1,89 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/transformer'
5
-
6
- module Rumale
7
- module Preprocessing
8
- # Encode labels to binary labels with one-vs-all scheme.
9
- #
10
- # @example
11
- # encoder = Rumale::Preprocessing::LabelBinarizer.new
12
- # label = [0, -1, 3, 3, 1, 1]
13
- # p encoder.fit_transform(label)
14
- # # Numo::Int32#shape=[6,4]
15
- # # [[0, 1, 0, 0],
16
- # # [1, 0, 0, 0],
17
- # # [0, 0, 0, 1],
18
- # # [0, 0, 0, 1],
19
- # # [0, 0, 1, 0],
20
- # # [0, 0, 1, 0]]
21
- class LabelBinarizer
22
- include Base::BaseEstimator
23
- include Base::Transformer
24
-
25
- # Return the class labels.
26
- # @return [Array] (size: [n_classes])
27
- attr_reader :classes
28
-
29
- # Create a new encoder for binarizing labels with one-vs-all scheme.
30
- #
31
- # @param neg_label [Integer] The value represents negative label.
32
- # @param pos_label [Integer] The value represents positive label.
33
- def initialize(neg_label: 0, pos_label: 1)
34
- check_params_numeric(neg_label: neg_label, pos_label: pos_label)
35
- @params = {}
36
- @params[:neg_label] = neg_label
37
- @params[:pos_label] = pos_label
38
- @classes = nil
39
- end
40
-
41
- # Fit encoder to labels.
42
- #
43
- # @overload fit(y) -> LabelBinarizer
44
- # @param y [Numo::NArray/Array] (shape: [n_samples]) The labels to fit encoder.
45
- # @return [LabelBinarizer]
46
- def fit(y, _not_used = nil)
47
- y = y.to_a if y.is_a?(Numo::NArray)
48
- check_params_type(Array, y: y)
49
- @classes = y.uniq.sort
50
- self
51
- end
52
-
53
- # Fit encoder to labels, then return binarized labels.
54
- #
55
- # @overload fit_transform(y) -> Numo::DFloat
56
- # @param y [Numo::NArray/Array] (shape: [n_samples]) The labels to fit encoder.
57
- # @return [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels.
58
- def fit_transform(y, _not_used = nil)
59
- y = y.to_a if y.is_a?(Numo::NArray)
60
- check_params_type(Array, y: y)
61
- fit(y).transform(y)
62
- end
63
-
64
- # Encode labels.
65
- #
66
- # @param y [Array] (shape: [n_samples]) The labels to be encoded.
67
- # @return [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels.
68
- def transform(y)
69
- y = y.to_a if y.is_a?(Numo::NArray)
70
- check_params_type(Array, y: y)
71
- n_classes = @classes.size
72
- n_samples = y.size
73
- codes = Numo::Int32.zeros(n_samples, n_classes) + @params[:neg_label]
74
- n_samples.times { |n| codes[n, @classes.index(y[n])] = @params[:pos_label] }
75
- codes
76
- end
77
-
78
- # Decode binarized labels.
79
- #
80
- # @param x [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels to be decoded.
81
- # @return [Array] (shape: [n_samples]) The decoded labels.
82
- def inverse_transform(x)
83
- x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
84
- n_samples = x.shape[0]
85
- Array.new(n_samples) { |n| @classes[x[n, true].ne(@params[:neg_label]).where[0]] }
86
- end
87
- end
88
- end
89
- end
@@ -1,79 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/transformer'
5
-
6
- module Rumale
7
- module Preprocessing
8
- # Encode labels to values between 0 and n_classes - 1.
9
- #
10
- # @example
11
- # encoder = Rumale::Preprocessing::LabelEncoder.new
12
- # labels = Numo::Int32[1, 8, 8, 15, 0]
13
- # encoded_labels = encoder.fit_transform(labels)
14
- # # > pp encoded_labels
15
- # # Numo::Int32#shape=[5]
16
- # # [1, 2, 2, 3, 0]
17
- # decoded_labels = encoder.inverse_transform(encoded_labels)
18
- # # > pp decoded_labels
19
- # # [1, 8, 8, 15, 0]
20
- class LabelEncoder
21
- include Base::BaseEstimator
22
- include Base::Transformer
23
-
24
- # Return the class labels.
25
- # @return [Array] (size: [n_classes])
26
- attr_reader :classes
27
-
28
- # Create a new encoder for encoding labels to values between 0 and n_classes - 1.
29
- def initialize
30
- @params = {}
31
- @classes = nil
32
- end
33
-
34
- # Fit label-encoder to labels.
35
- #
36
- # @overload fit(x) -> LabelEncoder
37
- #
38
- # @param x [Array] (shape: [n_samples]) The labels to fit label-encoder.
39
- # @return [LabelEncoder]
40
- def fit(x, _y = nil)
41
- x = x.to_a if x.is_a?(Numo::NArray)
42
- check_params_type(Array, x: x)
43
- @classes = x.sort.uniq
44
- self
45
- end
46
-
47
- # Fit label-encoder to labels, then return encoded labels.
48
- #
49
- # @overload fit_transform(x) -> Numo::DFloat
50
- #
51
- # @param x [Array] (shape: [n_samples]) The labels to fit label-encoder.
52
- # @return [Numo::Int32] The encoded labels.
53
- def fit_transform(x, _y = nil)
54
- x = x.to_a if x.is_a?(Numo::NArray)
55
- check_params_type(Array, x: x)
56
- fit(x).transform(x)
57
- end
58
-
59
- # Encode labels.
60
- #
61
- # @param x [Array] (shape: [n_samples]) The labels to be encoded.
62
- # @return [Numo::Int32] The encoded labels.
63
- def transform(x)
64
- x = x.to_a if x.is_a?(Numo::NArray)
65
- check_params_type(Array, x: x)
66
- Numo::Int32[*(x.map { |v| @classes.index(v) })]
67
- end
68
-
69
- # Decode encoded labels.
70
- #
71
- # @param x [Numo::Int32] (shape: [n_samples]) The labels to be decoded.
72
- # @return [Array] The decoded labels.
73
- def inverse_transform(x)
74
- x = check_convert_label_array(x)
75
- x.to_a.map { |n| @classes[n] }
76
- end
77
- end
78
- end
79
- end
@@ -1,61 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/transformer'
5
-
6
- module Rumale
7
- module Preprocessing
8
- # Normalize samples by scaling each feature with its maximum absolute value.
9
- #
10
- # @example
11
- # normalizer = Rumale::Preprocessing::MaxAbsScaler.new
12
- # new_training_samples = normalizer.fit_transform(training_samples)
13
- # new_testing_samples = normalizer.transform(testing_samples)
14
- class MaxAbsScaler
15
- include Base::BaseEstimator
16
- include Base::Transformer
17
-
18
- # Return the vector consists of the maximum absolute value for each feature.
19
- # @return [Numo::DFloat] (shape: [n_features])
20
- attr_reader :max_abs_vec
21
-
22
- # Creates a new normalizer for scaling each feature with its maximum absolute value.
23
- def initialize
24
- @params = {}
25
- @max_abs_vec = nil
26
- end
27
-
28
- # Calculate the minimum and maximum value of each feature for scaling.
29
- #
30
- # @overload fit(x) -> MaxAbsScaler
31
- #
32
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum absolute value for each feature.
33
- # @return [MaxAbsScaler]
34
- def fit(x, _y = nil)
35
- x = check_convert_sample_array(x)
36
- @max_abs_vec = x.abs.max(0)
37
- self
38
- end
39
-
40
- # Calculate the maximum absolute value for each feature, and then normalize samples.
41
- #
42
- # @overload fit_transform(x) -> Numo::DFloat
43
- #
44
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum absolute value for each feature.
45
- # @return [Numo::DFloat] The scaled samples.
46
- def fit_transform(x, _y = nil)
47
- x = check_convert_sample_array(x)
48
- fit(x).transform(x)
49
- end
50
-
51
- # Perform scaling the given samples with maximum absolute value for each feature.
52
- #
53
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
54
- # @return [Numo::DFloat] The scaled samples.
55
- def transform(x)
56
- x = check_convert_sample_array(x)
57
- x / @max_abs_vec
58
- end
59
- end
60
- end
61
- end
@@ -1,62 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/transformer'
5
-
6
- module Rumale
7
- module Preprocessing
8
- # Normalize samples with the maximum of the absolute values.
9
- #
10
- # @example
11
- # normalizer = Rumale::Preprocessing::MaxNormalizer.new
12
- # new_samples = normalizer.fit_transform(samples)
13
- class MaxNormalizer
14
- include Base::BaseEstimator
15
- include Base::Transformer
16
-
17
- # Return the vector consists of the maximum norm for each sample.
18
- # @return [Numo::DFloat] (shape: [n_samples])
19
- attr_reader :norm_vec # :nodoc:
20
-
21
- # Create a new normalizer for normaliing to max-norm.
22
- def initialize
23
- @params = {}
24
- @norm_vec = nil
25
- end
26
-
27
- # Calculate the maximum norms of each sample.
28
- #
29
- # @overload fit(x) -> MaxNormalizer
30
- #
31
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the maximum norms.
32
- # @return [MaxNormalizer]
33
- def fit(x, _y = nil)
34
- x = check_convert_sample_array(x)
35
- @norm_vec = x.abs.max(1)
36
- @norm_vec[@norm_vec.eq(0)] = 1
37
- self
38
- end
39
-
40
- # Calculate the maximums norm of each sample, and then normalize samples with the norms.
41
- #
42
- # @overload fit_transform(x) -> Numo::DFloat
43
- #
44
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum norms.
45
- # @return [Numo::DFloat] The normalized samples.
46
- def fit_transform(x, _y = nil)
47
- x = check_convert_sample_array(x)
48
- fit(x)
49
- x / @norm_vec.expand_dims(1)
50
- end
51
-
52
- # Calculate the maximum norms of each sample, and then normalize samples with the norms.
53
- # This method calls the fit_transform method. This method exists for the Pipeline class.
54
- #
55
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum norms.
56
- # @return [Numo::DFloat] The normalized samples.
57
- def transform(x)
58
- fit_transform(x)
59
- end
60
- end
61
- end
62
- end
@@ -1,76 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/transformer'
5
-
6
- module Rumale
7
- # This module consists of the classes that perform preprocessings.
8
- module Preprocessing
9
- # Normalize samples by scaling each feature to a given range.
10
- #
11
- # @example
12
- # normalizer = Rumale::Preprocessing::MinMaxScaler.new(feature_range: [0.0, 1.0])
13
- # new_training_samples = normalizer.fit_transform(training_samples)
14
- # new_testing_samples = normalizer.transform(testing_samples)
15
- class MinMaxScaler
16
- include Base::BaseEstimator
17
- include Base::Transformer
18
-
19
- # Return the vector consists of the minimum value for each feature.
20
- # @return [Numo::DFloat] (shape: [n_features])
21
- attr_reader :min_vec
22
-
23
- # Return the vector consists of the maximum value for each feature.
24
- # @return [Numo::DFloat] (shape: [n_features])
25
- attr_reader :max_vec
26
-
27
- # Creates a new normalizer for scaling each feature to a given range.
28
- #
29
- # @param feature_range [Array<Float>] The desired range of samples.
30
- def initialize(feature_range: [0.0, 1.0])
31
- check_params_type(Array, feature_range: feature_range)
32
- @params = {}
33
- @params[:feature_range] = feature_range
34
- @min_vec = nil
35
- @max_vec = nil
36
- end
37
-
38
- # Calculate the minimum and maximum value of each feature for scaling.
39
- #
40
- # @overload fit(x) -> MinMaxScaler
41
- #
42
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
43
- # @return [MinMaxScaler]
44
- def fit(x, _y = nil)
45
- x = check_convert_sample_array(x)
46
- @min_vec = x.min(0)
47
- @max_vec = x.max(0)
48
- self
49
- end
50
-
51
- # Calculate the minimum and maximum values, and then normalize samples to feature_range.
52
- #
53
- # @overload fit_transform(x) -> Numo::DFloat
54
- #
55
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
56
- # @return [Numo::DFloat] The scaled samples.
57
- def fit_transform(x, _y = nil)
58
- x = check_convert_sample_array(x)
59
- fit(x).transform(x)
60
- end
61
-
62
- # Perform scaling the given samples according to feature_range.
63
- #
64
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
65
- # @return [Numo::DFloat] The scaled samples.
66
- def transform(x)
67
- x = check_convert_sample_array(x)
68
- n_samples, = x.shape
69
- dif_vec = @max_vec - @min_vec
70
- dif_vec[dif_vec.eq(0)] = 1.0
71
- nx = (x - @min_vec.tile(n_samples, 1)) / dif_vec.tile(n_samples, 1)
72
- nx * (@params[:feature_range][1] - @params[:feature_range][0]) + @params[:feature_range][0]
73
- end
74
- end
75
- end
76
- end
@@ -1,100 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/transformer'
5
-
6
- module Rumale
7
- module Preprocessing
8
- # Encode categorical integer features to one-hot-vectors.
9
- #
10
- # @example
11
- # encoder = Rumale::Preprocessing::OneHotEncoder.new
12
- # labels = Numo::Int32[0, 0, 2, 3, 2, 1]
13
- # one_hot_vectors = encoder.fit_transform(labels)
14
- # # > pp one_hot_vectors
15
- # # Numo::DFloat#shape[6, 4]
16
- # # [[1, 0, 0, 0],
17
- # # [1, 0, 0, 0],
18
- # # [0, 0, 1, 0],
19
- # # [0, 0, 0, 1],
20
- # # [0, 0, 1, 0],
21
- # # [0, 1, 0, 0]]
22
- class OneHotEncoder
23
- include Base::BaseEstimator
24
- include Base::Transformer
25
-
26
- # Return the maximum values for each feature.
27
- # @return [Numo::Int32] (shape: [n_features])
28
- attr_reader :n_values
29
-
30
- # Return the indices for feature values that actually occur in the training set.
31
- # @return [Nimo::Int32]
32
- attr_reader :active_features
33
-
34
- # Return the indices to feature ranges.
35
- # @return [Numo::Int32] (shape: [n_features + 1])
36
- attr_reader :feature_indices
37
-
38
- # Create a new encoder for encoding categorical integer features to one-hot-vectors
39
- def initialize
40
- @params = {}
41
- @n_values = nil
42
- @active_features = nil
43
- @feature_indices = nil
44
- end
45
-
46
- # Fit one-hot-encoder to samples.
47
- #
48
- # @overload fit(x) -> OneHotEncoder
49
- # @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to fit one-hot-encoder.
50
- # @return [OneHotEncoder]
51
- def fit(x, _y = nil)
52
- x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
53
- raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
54
-
55
- @n_values = x.max(0) + 1
56
- @feature_indices = Numo::Int32.hstack([[0], @n_values]).cumsum
57
- @active_features = encode(x, @feature_indices).sum(0).ne(0).where
58
- self
59
- end
60
-
61
- # Fit one-hot-encoder to samples, then encode samples into one-hot-vectors
62
- #
63
- # @overload fit_transform(x) -> Numo::DFloat
64
- #
65
- # @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to encode into one-hot-vectors.
66
- # @return [Numo::DFloat] The one-hot-vectors.
67
- def fit_transform(x, _y = nil)
68
- x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
69
- raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
70
- raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
71
-
72
- fit(x).transform(x)
73
- end
74
-
75
- # Encode samples into one-hot-vectors.
76
- #
77
- # @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to encode into one-hot-vectors.
78
- # @return [Numo::DFloat] The one-hot-vectors.
79
- def transform(x)
80
- x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
81
- raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
82
-
83
- codes = encode(x, @feature_indices)
84
- codes[true, @active_features].dup
85
- end
86
-
87
- private
88
-
89
- def encode(x, indices)
90
- n_samples, n_features = x.shape
91
- n_features = 1 if n_features.nil?
92
- col_indices = (x + indices[0...-1]).flatten.to_a
93
- row_indices = Numo::Int32.new(n_samples).seq.repeat(n_features).to_a
94
- codes = Numo::DFloat.zeros(n_samples, indices[-1])
95
- row_indices.zip(col_indices).each { |r, c| codes[r, c] = 1.0 }
96
- codes
97
- end
98
- end
99
- end
100
- end