rumale 0.23.3 → 0.24.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.txt +5 -1
  3. data/README.md +3 -288
  4. data/lib/rumale/version.rb +1 -1
  5. data/lib/rumale.rb +20 -131
  6. metadata +252 -150
  7. data/CHANGELOG.md +0 -643
  8. data/CODE_OF_CONDUCT.md +0 -74
  9. data/ext/rumale/extconf.rb +0 -37
  10. data/ext/rumale/rumaleext.c +0 -545
  11. data/ext/rumale/rumaleext.h +0 -12
  12. data/lib/rumale/base/base_estimator.rb +0 -49
  13. data/lib/rumale/base/classifier.rb +0 -36
  14. data/lib/rumale/base/cluster_analyzer.rb +0 -31
  15. data/lib/rumale/base/evaluator.rb +0 -17
  16. data/lib/rumale/base/regressor.rb +0 -36
  17. data/lib/rumale/base/splitter.rb +0 -21
  18. data/lib/rumale/base/transformer.rb +0 -22
  19. data/lib/rumale/clustering/dbscan.rb +0 -123
  20. data/lib/rumale/clustering/gaussian_mixture.rb +0 -218
  21. data/lib/rumale/clustering/hdbscan.rb +0 -291
  22. data/lib/rumale/clustering/k_means.rb +0 -122
  23. data/lib/rumale/clustering/k_medoids.rb +0 -141
  24. data/lib/rumale/clustering/mini_batch_k_means.rb +0 -139
  25. data/lib/rumale/clustering/power_iteration.rb +0 -127
  26. data/lib/rumale/clustering/single_linkage.rb +0 -203
  27. data/lib/rumale/clustering/snn.rb +0 -76
  28. data/lib/rumale/clustering/spectral_clustering.rb +0 -115
  29. data/lib/rumale/dataset.rb +0 -246
  30. data/lib/rumale/decomposition/factor_analysis.rb +0 -150
  31. data/lib/rumale/decomposition/fast_ica.rb +0 -188
  32. data/lib/rumale/decomposition/nmf.rb +0 -124
  33. data/lib/rumale/decomposition/pca.rb +0 -159
  34. data/lib/rumale/ensemble/ada_boost_classifier.rb +0 -179
  35. data/lib/rumale/ensemble/ada_boost_regressor.rb +0 -160
  36. data/lib/rumale/ensemble/extra_trees_classifier.rb +0 -139
  37. data/lib/rumale/ensemble/extra_trees_regressor.rb +0 -125
  38. data/lib/rumale/ensemble/gradient_boosting_classifier.rb +0 -306
  39. data/lib/rumale/ensemble/gradient_boosting_regressor.rb +0 -237
  40. data/lib/rumale/ensemble/random_forest_classifier.rb +0 -189
  41. data/lib/rumale/ensemble/random_forest_regressor.rb +0 -153
  42. data/lib/rumale/ensemble/stacking_classifier.rb +0 -215
  43. data/lib/rumale/ensemble/stacking_regressor.rb +0 -163
  44. data/lib/rumale/ensemble/voting_classifier.rb +0 -126
  45. data/lib/rumale/ensemble/voting_regressor.rb +0 -82
  46. data/lib/rumale/evaluation_measure/accuracy.rb +0 -29
  47. data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +0 -74
  48. data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +0 -56
  49. data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +0 -53
  50. data/lib/rumale/evaluation_measure/explained_variance_score.rb +0 -39
  51. data/lib/rumale/evaluation_measure/f_score.rb +0 -50
  52. data/lib/rumale/evaluation_measure/function.rb +0 -147
  53. data/lib/rumale/evaluation_measure/log_loss.rb +0 -45
  54. data/lib/rumale/evaluation_measure/mean_absolute_error.rb +0 -29
  55. data/lib/rumale/evaluation_measure/mean_squared_error.rb +0 -29
  56. data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +0 -29
  57. data/lib/rumale/evaluation_measure/median_absolute_error.rb +0 -30
  58. data/lib/rumale/evaluation_measure/mutual_information.rb +0 -49
  59. data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +0 -53
  60. data/lib/rumale/evaluation_measure/precision.rb +0 -50
  61. data/lib/rumale/evaluation_measure/precision_recall.rb +0 -96
  62. data/lib/rumale/evaluation_measure/purity.rb +0 -40
  63. data/lib/rumale/evaluation_measure/r2_score.rb +0 -43
  64. data/lib/rumale/evaluation_measure/recall.rb +0 -50
  65. data/lib/rumale/evaluation_measure/roc_auc.rb +0 -130
  66. data/lib/rumale/evaluation_measure/silhouette_score.rb +0 -82
  67. data/lib/rumale/feature_extraction/feature_hasher.rb +0 -110
  68. data/lib/rumale/feature_extraction/hash_vectorizer.rb +0 -155
  69. data/lib/rumale/feature_extraction/tfidf_transformer.rb +0 -113
  70. data/lib/rumale/kernel_approximation/nystroem.rb +0 -126
  71. data/lib/rumale/kernel_approximation/rbf.rb +0 -102
  72. data/lib/rumale/kernel_machine/kernel_fda.rb +0 -120
  73. data/lib/rumale/kernel_machine/kernel_pca.rb +0 -97
  74. data/lib/rumale/kernel_machine/kernel_ridge.rb +0 -82
  75. data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +0 -92
  76. data/lib/rumale/kernel_machine/kernel_svc.rb +0 -193
  77. data/lib/rumale/linear_model/base_sgd.rb +0 -285
  78. data/lib/rumale/linear_model/elastic_net.rb +0 -119
  79. data/lib/rumale/linear_model/lasso.rb +0 -115
  80. data/lib/rumale/linear_model/linear_regression.rb +0 -201
  81. data/lib/rumale/linear_model/logistic_regression.rb +0 -275
  82. data/lib/rumale/linear_model/nnls.rb +0 -137
  83. data/lib/rumale/linear_model/ridge.rb +0 -209
  84. data/lib/rumale/linear_model/svc.rb +0 -213
  85. data/lib/rumale/linear_model/svr.rb +0 -132
  86. data/lib/rumale/manifold/mds.rb +0 -155
  87. data/lib/rumale/manifold/tsne.rb +0 -222
  88. data/lib/rumale/metric_learning/fisher_discriminant_analysis.rb +0 -113
  89. data/lib/rumale/metric_learning/mlkr.rb +0 -161
  90. data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +0 -167
  91. data/lib/rumale/model_selection/cross_validation.rb +0 -125
  92. data/lib/rumale/model_selection/function.rb +0 -42
  93. data/lib/rumale/model_selection/grid_search_cv.rb +0 -225
  94. data/lib/rumale/model_selection/group_k_fold.rb +0 -93
  95. data/lib/rumale/model_selection/group_shuffle_split.rb +0 -115
  96. data/lib/rumale/model_selection/k_fold.rb +0 -81
  97. data/lib/rumale/model_selection/shuffle_split.rb +0 -90
  98. data/lib/rumale/model_selection/stratified_k_fold.rb +0 -99
  99. data/lib/rumale/model_selection/stratified_shuffle_split.rb +0 -118
  100. data/lib/rumale/model_selection/time_series_split.rb +0 -91
  101. data/lib/rumale/multiclass/one_vs_rest_classifier.rb +0 -83
  102. data/lib/rumale/naive_bayes/base_naive_bayes.rb +0 -47
  103. data/lib/rumale/naive_bayes/bernoulli_nb.rb +0 -82
  104. data/lib/rumale/naive_bayes/complement_nb.rb +0 -85
  105. data/lib/rumale/naive_bayes/gaussian_nb.rb +0 -69
  106. data/lib/rumale/naive_bayes/multinomial_nb.rb +0 -74
  107. data/lib/rumale/naive_bayes/negation_nb.rb +0 -71
  108. data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +0 -133
  109. data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +0 -108
  110. data/lib/rumale/nearest_neighbors/vp_tree.rb +0 -132
  111. data/lib/rumale/neural_network/adam.rb +0 -56
  112. data/lib/rumale/neural_network/base_mlp.rb +0 -248
  113. data/lib/rumale/neural_network/mlp_classifier.rb +0 -120
  114. data/lib/rumale/neural_network/mlp_regressor.rb +0 -90
  115. data/lib/rumale/pairwise_metric.rb +0 -152
  116. data/lib/rumale/pipeline/feature_union.rb +0 -69
  117. data/lib/rumale/pipeline/pipeline.rb +0 -175
  118. data/lib/rumale/preprocessing/bin_discretizer.rb +0 -93
  119. data/lib/rumale/preprocessing/binarizer.rb +0 -60
  120. data/lib/rumale/preprocessing/kernel_calculator.rb +0 -92
  121. data/lib/rumale/preprocessing/l1_normalizer.rb +0 -62
  122. data/lib/rumale/preprocessing/l2_normalizer.rb +0 -63
  123. data/lib/rumale/preprocessing/label_binarizer.rb +0 -89
  124. data/lib/rumale/preprocessing/label_encoder.rb +0 -79
  125. data/lib/rumale/preprocessing/max_abs_scaler.rb +0 -61
  126. data/lib/rumale/preprocessing/max_normalizer.rb +0 -62
  127. data/lib/rumale/preprocessing/min_max_scaler.rb +0 -76
  128. data/lib/rumale/preprocessing/one_hot_encoder.rb +0 -100
  129. data/lib/rumale/preprocessing/ordinal_encoder.rb +0 -109
  130. data/lib/rumale/preprocessing/polynomial_features.rb +0 -109
  131. data/lib/rumale/preprocessing/standard_scaler.rb +0 -71
  132. data/lib/rumale/probabilistic_output.rb +0 -114
  133. data/lib/rumale/tree/base_decision_tree.rb +0 -150
  134. data/lib/rumale/tree/decision_tree_classifier.rb +0 -150
  135. data/lib/rumale/tree/decision_tree_regressor.rb +0 -116
  136. data/lib/rumale/tree/extra_tree_classifier.rb +0 -107
  137. data/lib/rumale/tree/extra_tree_regressor.rb +0 -94
  138. data/lib/rumale/tree/gradient_tree_regressor.rb +0 -202
  139. data/lib/rumale/tree/node.rb +0 -39
  140. data/lib/rumale/utils.rb +0 -42
  141. data/lib/rumale/validation.rb +0 -128
  142. data/lib/rumale/values.rb +0 -13
@@ -1,62 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/transformer'
5
-
6
- module Rumale
7
- module Preprocessing
8
- # Normalize samples to unit L1-norm.
9
- #
10
- # @example
11
- # normalizer = Rumale::Preprocessing::L1Normalizer.new
12
- # new_samples = normalizer.fit_transform(samples)
13
- class L1Normalizer
14
- include Base::BaseEstimator
15
- include Base::Transformer
16
-
17
- # Return the vector consists of L1-norm for each sample.
18
- # @return [Numo::DFloat] (shape: [n_samples])
19
- attr_reader :norm_vec # :nodoc:
20
-
21
- # Create a new normalizer for normaliing to L1-norm.
22
- def initialize
23
- @params = {}
24
- @norm_vec = nil
25
- end
26
-
27
- # Calculate L1-norms of each sample.
28
- #
29
- # @overload fit(x) -> L1Normalizer
30
- #
31
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
32
- # @return [L1Normalizer]
33
- def fit(x, _y = nil)
34
- x = check_convert_sample_array(x)
35
- @norm_vec = x.abs.sum(1)
36
- @norm_vec[@norm_vec.eq(0)] = 1
37
- self
38
- end
39
-
40
- # Calculate L1-norms of each sample, and then normalize samples to L1-norm.
41
- #
42
- # @overload fit_transform(x) -> Numo::DFloat
43
- #
44
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
45
- # @return [Numo::DFloat] The normalized samples.
46
- def fit_transform(x, _y = nil)
47
- x = check_convert_sample_array(x)
48
- fit(x)
49
- x / @norm_vec.expand_dims(1)
50
- end
51
-
52
- # Calculate L1-norms of each sample, and then normalize samples to L1-norm.
53
- # This method calls the fit_transform method. This method exists for the Pipeline class.
54
- #
55
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
56
- # @return [Numo::DFloat] The normalized samples.
57
- def transform(x)
58
- fit_transform(x)
59
- end
60
- end
61
- end
62
- end
@@ -1,63 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/transformer'
5
-
6
- module Rumale
7
- # This module consists of the classes that perform preprocessings.
8
- module Preprocessing
9
- # Normalize samples to unit L2-norm.
10
- #
11
- # @example
12
- # normalizer = Rumale::Preprocessing::L2Normalizer.new
13
- # new_samples = normalizer.fit_transform(samples)
14
- class L2Normalizer
15
- include Base::BaseEstimator
16
- include Base::Transformer
17
-
18
- # Return the vector consists of L2-norm for each sample.
19
- # @return [Numo::DFloat] (shape: [n_samples])
20
- attr_reader :norm_vec # :nodoc:
21
-
22
- # Create a new normalizer for normaliing to unit L2-norm.
23
- def initialize
24
- @params = {}
25
- @norm_vec = nil
26
- end
27
-
28
- # Calculate L2-norms of each sample.
29
- #
30
- # @overload fit(x) -> L2Normalizer
31
- #
32
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
33
- # @return [L2Normalizer]
34
- def fit(x, _y = nil)
35
- x = check_convert_sample_array(x)
36
- @norm_vec = Numo::NMath.sqrt((x**2).sum(1))
37
- @norm_vec[@norm_vec.eq(0)] = 1
38
- self
39
- end
40
-
41
- # Calculate L2-norms of each sample, and then normalize samples to unit L2-norm.
42
- #
43
- # @overload fit_transform(x) -> Numo::DFloat
44
- #
45
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
46
- # @return [Numo::DFloat] The normalized samples.
47
- def fit_transform(x, _y = nil)
48
- x = check_convert_sample_array(x)
49
- fit(x)
50
- x / @norm_vec.expand_dims(1)
51
- end
52
-
53
- # Calculate L2-norms of each sample, and then normalize samples to unit L2-norm.
54
- # This method calls the fit_transform method. This method exists for the Pipeline class.
55
- #
56
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
57
- # @return [Numo::DFloat] The normalized samples.
58
- def transform(x)
59
- fit_transform(x)
60
- end
61
- end
62
- end
63
- end
@@ -1,89 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/transformer'
5
-
6
- module Rumale
7
- module Preprocessing
8
- # Encode labels to binary labels with one-vs-all scheme.
9
- #
10
- # @example
11
- # encoder = Rumale::Preprocessing::LabelBinarizer.new
12
- # label = [0, -1, 3, 3, 1, 1]
13
- # p encoder.fit_transform(label)
14
- # # Numo::Int32#shape=[6,4]
15
- # # [[0, 1, 0, 0],
16
- # # [1, 0, 0, 0],
17
- # # [0, 0, 0, 1],
18
- # # [0, 0, 0, 1],
19
- # # [0, 0, 1, 0],
20
- # # [0, 0, 1, 0]]
21
- class LabelBinarizer
22
- include Base::BaseEstimator
23
- include Base::Transformer
24
-
25
- # Return the class labels.
26
- # @return [Array] (size: [n_classes])
27
- attr_reader :classes
28
-
29
- # Create a new encoder for binarizing labels with one-vs-all scheme.
30
- #
31
- # @param neg_label [Integer] The value represents negative label.
32
- # @param pos_label [Integer] The value represents positive label.
33
- def initialize(neg_label: 0, pos_label: 1)
34
- check_params_numeric(neg_label: neg_label, pos_label: pos_label)
35
- @params = {}
36
- @params[:neg_label] = neg_label
37
- @params[:pos_label] = pos_label
38
- @classes = nil
39
- end
40
-
41
- # Fit encoder to labels.
42
- #
43
- # @overload fit(y) -> LabelBinarizer
44
- # @param y [Numo::NArray/Array] (shape: [n_samples]) The labels to fit encoder.
45
- # @return [LabelBinarizer]
46
- def fit(y, _not_used = nil)
47
- y = y.to_a if y.is_a?(Numo::NArray)
48
- check_params_type(Array, y: y)
49
- @classes = y.uniq.sort
50
- self
51
- end
52
-
53
- # Fit encoder to labels, then return binarized labels.
54
- #
55
- # @overload fit_transform(y) -> Numo::DFloat
56
- # @param y [Numo::NArray/Array] (shape: [n_samples]) The labels to fit encoder.
57
- # @return [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels.
58
- def fit_transform(y, _not_used = nil)
59
- y = y.to_a if y.is_a?(Numo::NArray)
60
- check_params_type(Array, y: y)
61
- fit(y).transform(y)
62
- end
63
-
64
- # Encode labels.
65
- #
66
- # @param y [Array] (shape: [n_samples]) The labels to be encoded.
67
- # @return [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels.
68
- def transform(y)
69
- y = y.to_a if y.is_a?(Numo::NArray)
70
- check_params_type(Array, y: y)
71
- n_classes = @classes.size
72
- n_samples = y.size
73
- codes = Numo::Int32.zeros(n_samples, n_classes) + @params[:neg_label]
74
- n_samples.times { |n| codes[n, @classes.index(y[n])] = @params[:pos_label] }
75
- codes
76
- end
77
-
78
- # Decode binarized labels.
79
- #
80
- # @param x [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels to be decoded.
81
- # @return [Array] (shape: [n_samples]) The decoded labels.
82
- def inverse_transform(x)
83
- x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
84
- n_samples = x.shape[0]
85
- Array.new(n_samples) { |n| @classes[x[n, true].ne(@params[:neg_label]).where[0]] }
86
- end
87
- end
88
- end
89
- end
@@ -1,79 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/transformer'
5
-
6
- module Rumale
7
- module Preprocessing
8
- # Encode labels to values between 0 and n_classes - 1.
9
- #
10
- # @example
11
- # encoder = Rumale::Preprocessing::LabelEncoder.new
12
- # labels = Numo::Int32[1, 8, 8, 15, 0]
13
- # encoded_labels = encoder.fit_transform(labels)
14
- # # > pp encoded_labels
15
- # # Numo::Int32#shape=[5]
16
- # # [1, 2, 2, 3, 0]
17
- # decoded_labels = encoder.inverse_transform(encoded_labels)
18
- # # > pp decoded_labels
19
- # # [1, 8, 8, 15, 0]
20
- class LabelEncoder
21
- include Base::BaseEstimator
22
- include Base::Transformer
23
-
24
- # Return the class labels.
25
- # @return [Array] (size: [n_classes])
26
- attr_reader :classes
27
-
28
- # Create a new encoder for encoding labels to values between 0 and n_classes - 1.
29
- def initialize
30
- @params = {}
31
- @classes = nil
32
- end
33
-
34
- # Fit label-encoder to labels.
35
- #
36
- # @overload fit(x) -> LabelEncoder
37
- #
38
- # @param x [Array] (shape: [n_samples]) The labels to fit label-encoder.
39
- # @return [LabelEncoder]
40
- def fit(x, _y = nil)
41
- x = x.to_a if x.is_a?(Numo::NArray)
42
- check_params_type(Array, x: x)
43
- @classes = x.sort.uniq
44
- self
45
- end
46
-
47
- # Fit label-encoder to labels, then return encoded labels.
48
- #
49
- # @overload fit_transform(x) -> Numo::DFloat
50
- #
51
- # @param x [Array] (shape: [n_samples]) The labels to fit label-encoder.
52
- # @return [Numo::Int32] The encoded labels.
53
- def fit_transform(x, _y = nil)
54
- x = x.to_a if x.is_a?(Numo::NArray)
55
- check_params_type(Array, x: x)
56
- fit(x).transform(x)
57
- end
58
-
59
- # Encode labels.
60
- #
61
- # @param x [Array] (shape: [n_samples]) The labels to be encoded.
62
- # @return [Numo::Int32] The encoded labels.
63
- def transform(x)
64
- x = x.to_a if x.is_a?(Numo::NArray)
65
- check_params_type(Array, x: x)
66
- Numo::Int32[*(x.map { |v| @classes.index(v) })]
67
- end
68
-
69
- # Decode encoded labels.
70
- #
71
- # @param x [Numo::Int32] (shape: [n_samples]) The labels to be decoded.
72
- # @return [Array] The decoded labels.
73
- def inverse_transform(x)
74
- x = check_convert_label_array(x)
75
- x.to_a.map { |n| @classes[n] }
76
- end
77
- end
78
- end
79
- end
@@ -1,61 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/transformer'
5
-
6
- module Rumale
7
- module Preprocessing
8
- # Normalize samples by scaling each feature with its maximum absolute value.
9
- #
10
- # @example
11
- # normalizer = Rumale::Preprocessing::MaxAbsScaler.new
12
- # new_training_samples = normalizer.fit_transform(training_samples)
13
- # new_testing_samples = normalizer.transform(testing_samples)
14
- class MaxAbsScaler
15
- include Base::BaseEstimator
16
- include Base::Transformer
17
-
18
- # Return the vector consists of the maximum absolute value for each feature.
19
- # @return [Numo::DFloat] (shape: [n_features])
20
- attr_reader :max_abs_vec
21
-
22
- # Creates a new normalizer for scaling each feature with its maximum absolute value.
23
- def initialize
24
- @params = {}
25
- @max_abs_vec = nil
26
- end
27
-
28
- # Calculate the minimum and maximum value of each feature for scaling.
29
- #
30
- # @overload fit(x) -> MaxAbsScaler
31
- #
32
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum absolute value for each feature.
33
- # @return [MaxAbsScaler]
34
- def fit(x, _y = nil)
35
- x = check_convert_sample_array(x)
36
- @max_abs_vec = x.abs.max(0)
37
- self
38
- end
39
-
40
- # Calculate the maximum absolute value for each feature, and then normalize samples.
41
- #
42
- # @overload fit_transform(x) -> Numo::DFloat
43
- #
44
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum absolute value for each feature.
45
- # @return [Numo::DFloat] The scaled samples.
46
- def fit_transform(x, _y = nil)
47
- x = check_convert_sample_array(x)
48
- fit(x).transform(x)
49
- end
50
-
51
- # Perform scaling the given samples with maximum absolute value for each feature.
52
- #
53
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
54
- # @return [Numo::DFloat] The scaled samples.
55
- def transform(x)
56
- x = check_convert_sample_array(x)
57
- x / @max_abs_vec
58
- end
59
- end
60
- end
61
- end
@@ -1,62 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/transformer'
5
-
6
- module Rumale
7
- module Preprocessing
8
- # Normalize samples with the maximum of the absolute values.
9
- #
10
- # @example
11
- # normalizer = Rumale::Preprocessing::MaxNormalizer.new
12
- # new_samples = normalizer.fit_transform(samples)
13
- class MaxNormalizer
14
- include Base::BaseEstimator
15
- include Base::Transformer
16
-
17
- # Return the vector consists of the maximum norm for each sample.
18
- # @return [Numo::DFloat] (shape: [n_samples])
19
- attr_reader :norm_vec # :nodoc:
20
-
21
- # Create a new normalizer for normaliing to max-norm.
22
- def initialize
23
- @params = {}
24
- @norm_vec = nil
25
- end
26
-
27
- # Calculate the maximum norms of each sample.
28
- #
29
- # @overload fit(x) -> MaxNormalizer
30
- #
31
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the maximum norms.
32
- # @return [MaxNormalizer]
33
- def fit(x, _y = nil)
34
- x = check_convert_sample_array(x)
35
- @norm_vec = x.abs.max(1)
36
- @norm_vec[@norm_vec.eq(0)] = 1
37
- self
38
- end
39
-
40
- # Calculate the maximums norm of each sample, and then normalize samples with the norms.
41
- #
42
- # @overload fit_transform(x) -> Numo::DFloat
43
- #
44
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum norms.
45
- # @return [Numo::DFloat] The normalized samples.
46
- def fit_transform(x, _y = nil)
47
- x = check_convert_sample_array(x)
48
- fit(x)
49
- x / @norm_vec.expand_dims(1)
50
- end
51
-
52
- # Calculate the maximum norms of each sample, and then normalize samples with the norms.
53
- # This method calls the fit_transform method. This method exists for the Pipeline class.
54
- #
55
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum norms.
56
- # @return [Numo::DFloat] The normalized samples.
57
- def transform(x)
58
- fit_transform(x)
59
- end
60
- end
61
- end
62
- end
@@ -1,76 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/transformer'
5
-
6
- module Rumale
7
- # This module consists of the classes that perform preprocessings.
8
- module Preprocessing
9
- # Normalize samples by scaling each feature to a given range.
10
- #
11
- # @example
12
- # normalizer = Rumale::Preprocessing::MinMaxScaler.new(feature_range: [0.0, 1.0])
13
- # new_training_samples = normalizer.fit_transform(training_samples)
14
- # new_testing_samples = normalizer.transform(testing_samples)
15
- class MinMaxScaler
16
- include Base::BaseEstimator
17
- include Base::Transformer
18
-
19
- # Return the vector consists of the minimum value for each feature.
20
- # @return [Numo::DFloat] (shape: [n_features])
21
- attr_reader :min_vec
22
-
23
- # Return the vector consists of the maximum value for each feature.
24
- # @return [Numo::DFloat] (shape: [n_features])
25
- attr_reader :max_vec
26
-
27
- # Creates a new normalizer for scaling each feature to a given range.
28
- #
29
- # @param feature_range [Array<Float>] The desired range of samples.
30
- def initialize(feature_range: [0.0, 1.0])
31
- check_params_type(Array, feature_range: feature_range)
32
- @params = {}
33
- @params[:feature_range] = feature_range
34
- @min_vec = nil
35
- @max_vec = nil
36
- end
37
-
38
- # Calculate the minimum and maximum value of each feature for scaling.
39
- #
40
- # @overload fit(x) -> MinMaxScaler
41
- #
42
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
43
- # @return [MinMaxScaler]
44
- def fit(x, _y = nil)
45
- x = check_convert_sample_array(x)
46
- @min_vec = x.min(0)
47
- @max_vec = x.max(0)
48
- self
49
- end
50
-
51
- # Calculate the minimum and maximum values, and then normalize samples to feature_range.
52
- #
53
- # @overload fit_transform(x) -> Numo::DFloat
54
- #
55
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
56
- # @return [Numo::DFloat] The scaled samples.
57
- def fit_transform(x, _y = nil)
58
- x = check_convert_sample_array(x)
59
- fit(x).transform(x)
60
- end
61
-
62
- # Perform scaling the given samples according to feature_range.
63
- #
64
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
65
- # @return [Numo::DFloat] The scaled samples.
66
- def transform(x)
67
- x = check_convert_sample_array(x)
68
- n_samples, = x.shape
69
- dif_vec = @max_vec - @min_vec
70
- dif_vec[dif_vec.eq(0)] = 1.0
71
- nx = (x - @min_vec.tile(n_samples, 1)) / dif_vec.tile(n_samples, 1)
72
- nx * (@params[:feature_range][1] - @params[:feature_range][0]) + @params[:feature_range][0]
73
- end
74
- end
75
- end
76
- end
@@ -1,100 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/transformer'
5
-
6
- module Rumale
7
- module Preprocessing
8
- # Encode categorical integer features to one-hot-vectors.
9
- #
10
- # @example
11
- # encoder = Rumale::Preprocessing::OneHotEncoder.new
12
- # labels = Numo::Int32[0, 0, 2, 3, 2, 1]
13
- # one_hot_vectors = encoder.fit_transform(labels)
14
- # # > pp one_hot_vectors
15
- # # Numo::DFloat#shape[6, 4]
16
- # # [[1, 0, 0, 0],
17
- # # [1, 0, 0, 0],
18
- # # [0, 0, 1, 0],
19
- # # [0, 0, 0, 1],
20
- # # [0, 0, 1, 0],
21
- # # [0, 1, 0, 0]]
22
- class OneHotEncoder
23
- include Base::BaseEstimator
24
- include Base::Transformer
25
-
26
- # Return the maximum values for each feature.
27
- # @return [Numo::Int32] (shape: [n_features])
28
- attr_reader :n_values
29
-
30
- # Return the indices for feature values that actually occur in the training set.
31
- # @return [Nimo::Int32]
32
- attr_reader :active_features
33
-
34
- # Return the indices to feature ranges.
35
- # @return [Numo::Int32] (shape: [n_features + 1])
36
- attr_reader :feature_indices
37
-
38
- # Create a new encoder for encoding categorical integer features to one-hot-vectors
39
- def initialize
40
- @params = {}
41
- @n_values = nil
42
- @active_features = nil
43
- @feature_indices = nil
44
- end
45
-
46
- # Fit one-hot-encoder to samples.
47
- #
48
- # @overload fit(x) -> OneHotEncoder
49
- # @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to fit one-hot-encoder.
50
- # @return [OneHotEncoder]
51
- def fit(x, _y = nil)
52
- x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
53
- raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
54
-
55
- @n_values = x.max(0) + 1
56
- @feature_indices = Numo::Int32.hstack([[0], @n_values]).cumsum
57
- @active_features = encode(x, @feature_indices).sum(0).ne(0).where
58
- self
59
- end
60
-
61
- # Fit one-hot-encoder to samples, then encode samples into one-hot-vectors
62
- #
63
- # @overload fit_transform(x) -> Numo::DFloat
64
- #
65
- # @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to encode into one-hot-vectors.
66
- # @return [Numo::DFloat] The one-hot-vectors.
67
- def fit_transform(x, _y = nil)
68
- x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
69
- raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
70
- raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
71
-
72
- fit(x).transform(x)
73
- end
74
-
75
- # Encode samples into one-hot-vectors.
76
- #
77
- # @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to encode into one-hot-vectors.
78
- # @return [Numo::DFloat] The one-hot-vectors.
79
- def transform(x)
80
- x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
81
- raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
82
-
83
- codes = encode(x, @feature_indices)
84
- codes[true, @active_features].dup
85
- end
86
-
87
- private
88
-
89
- def encode(x, indices)
90
- n_samples, n_features = x.shape
91
- n_features = 1 if n_features.nil?
92
- col_indices = (x + indices[0...-1]).flatten.to_a
93
- row_indices = Numo::Int32.new(n_samples).seq.repeat(n_features).to_a
94
- codes = Numo::DFloat.zeros(n_samples, indices[-1])
95
- row_indices.zip(col_indices).each { |r, c| codes[r, c] = 1.0 }
96
- codes
97
- end
98
- end
99
- end
100
- end