rumale-preprocessing 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/validation'
6
+
7
+ module Rumale
8
+ module Preprocessing
9
+ # Normalize samples by scaling each feature with its maximum absolute value.
10
+ #
11
+ # @example
12
+ # require 'rumale/preprocessing/max_abs_scaler'
13
+ #
14
+ # normalizer = Rumale::Preprocessing::MaxAbsScaler.new
15
+ # new_training_samples = normalizer.fit_transform(training_samples)
16
+ # new_testing_samples = normalizer.transform(testing_samples)
17
+ class MaxAbsScaler < ::Rumale::Base::Estimator
18
+ include ::Rumale::Base::Transformer
19
+
20
+ # Return the vector consists of the maximum absolute value for each feature.
21
+ # @return [Numo::DFloat] (shape: [n_features])
22
+ attr_reader :max_abs_vec
23
+
24
+ # Creates a new normalizer for scaling each feature with its maximum absolute value.
25
+ def initialize # rubocop:disable Lint/UselessMethodDefinition
26
+ super()
27
+ end
28
+
29
+ # Calculate the minimum and maximum value of each feature for scaling.
30
+ #
31
+ # @overload fit(x) -> MaxAbsScaler
32
+ #
33
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum absolute value for each feature.
34
+ # @return [MaxAbsScaler]
35
+ def fit(x, _y = nil)
36
+ x = ::Rumale::Validation.check_convert_sample_array(x)
37
+
38
+ @max_abs_vec = x.abs.max(0)
39
+ self
40
+ end
41
+
42
+ # Calculate the maximum absolute value for each feature, and then normalize samples.
43
+ #
44
+ # @overload fit_transform(x) -> Numo::DFloat
45
+ #
46
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum absolute value for each feature.
47
+ # @return [Numo::DFloat] The scaled samples.
48
+ def fit_transform(x, _y = nil)
49
+ x = ::Rumale::Validation.check_convert_sample_array(x)
50
+
51
+ fit(x).transform(x)
52
+ end
53
+
54
+ # Perform scaling the given samples with maximum absolute value for each feature.
55
+ #
56
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
57
+ # @return [Numo::DFloat] The scaled samples.
58
+ def transform(x)
59
+ x = ::Rumale::Validation.check_convert_sample_array(x)
60
+
61
+ x / @max_abs_vec
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/validation'
6
+
7
+ module Rumale
8
+ module Preprocessing
9
+ # Normalize samples with the maximum of the absolute values.
10
+ #
11
+ # @example
12
+ # require 'rumale/preprocessing/max_normalizer'
13
+ #
14
+ # normalizer = Rumale::Preprocessing::MaxNormalizer.new
15
+ # new_samples = normalizer.fit_transform(samples)
16
+ class MaxNormalizer < ::Rumale::Base::Estimator
17
+ include ::Rumale::Base::Transformer
18
+
19
+ # Return the vector consists of the maximum norm for each sample.
20
+ # @return [Numo::DFloat] (shape: [n_samples])
21
+ attr_reader :norm_vec # :nodoc:
22
+
23
+ # Create a new normalizer for normaliing to max-norm.
24
+ def initialize # rubocop:disable Lint/UselessMethodDefinition
25
+ super()
26
+ end
27
+
28
+ # Calculate the maximum norms of each sample.
29
+ #
30
+ # @overload fit(x) -> MaxNormalizer
31
+ #
32
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the maximum norms.
33
+ # @return [MaxNormalizer]
34
+ def fit(x, _y = nil)
35
+ x = ::Rumale::Validation.check_convert_sample_array(x)
36
+
37
+ @norm_vec = x.abs.max(1)
38
+ @norm_vec[@norm_vec.eq(0)] = 1
39
+ self
40
+ end
41
+
42
+ # Calculate the maximums norm of each sample, and then normalize samples with the norms.
43
+ #
44
+ # @overload fit_transform(x) -> Numo::DFloat
45
+ #
46
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum norms.
47
+ # @return [Numo::DFloat] The normalized samples.
48
+ def fit_transform(x, _y = nil)
49
+ x = ::Rumale::Validation.check_convert_sample_array(x)
50
+
51
+ fit(x)
52
+ x / @norm_vec.expand_dims(1)
53
+ end
54
+
55
+ # Calculate the maximum norms of each sample, and then normalize samples with the norms.
56
+ # This method calls the fit_transform method. This method exists for the Pipeline class.
57
+ #
58
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum norms.
59
+ # @return [Numo::DFloat] The normalized samples.
60
+ def transform(x)
61
+ x = ::Rumale::Validation.check_convert_sample_array(x)
62
+
63
+ fit_transform(x)
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/validation'
6
+
7
+ module Rumale
8
+ # This module consists of the classes that perform preprocessings.
9
+ module Preprocessing
10
+ # Normalize samples by scaling each feature to a given range.
11
+ #
12
+ # @example
13
+ # require 'rumale/preprocessing/min_max_scaler'
14
+ #
15
+ # normalizer = Rumale::Preprocessing::MinMaxScaler.new(feature_range: [0.0, 1.0])
16
+ # new_training_samples = normalizer.fit_transform(training_samples)
17
+ # new_testing_samples = normalizer.transform(testing_samples)
18
+ class MinMaxScaler < ::Rumale::Base::Estimator
19
+ include ::Rumale::Base::Transformer
20
+
21
+ # Return the vector consists of the minimum value for each feature.
22
+ # @return [Numo::DFloat] (shape: [n_features])
23
+ attr_reader :min_vec
24
+
25
+ # Return the vector consists of the maximum value for each feature.
26
+ # @return [Numo::DFloat] (shape: [n_features])
27
+ attr_reader :max_vec
28
+
29
+ # Creates a new normalizer for scaling each feature to a given range.
30
+ #
31
+ # @param feature_range [Array<Float>] The desired range of samples.
32
+ def initialize(feature_range: [0.0, 1.0])
33
+ super()
34
+ @params = { feature_range: feature_range }
35
+ end
36
+
37
+ # Calculate the minimum and maximum value of each feature for scaling.
38
+ #
39
+ # @overload fit(x) -> MinMaxScaler
40
+ #
41
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
42
+ # @return [MinMaxScaler]
43
+ def fit(x, _y = nil)
44
+ x = ::Rumale::Validation.check_convert_sample_array(x)
45
+
46
+ @min_vec = x.min(0)
47
+ @max_vec = x.max(0)
48
+ self
49
+ end
50
+
51
+ # Calculate the minimum and maximum values, and then normalize samples to feature_range.
52
+ #
53
+ # @overload fit_transform(x) -> Numo::DFloat
54
+ #
55
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
56
+ # @return [Numo::DFloat] The scaled samples.
57
+ def fit_transform(x, _y = nil)
58
+ x = ::Rumale::Validation.check_convert_sample_array(x)
59
+
60
+ fit(x).transform(x)
61
+ end
62
+
63
+ # Perform scaling the given samples according to feature_range.
64
+ #
65
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
66
+ # @return [Numo::DFloat] The scaled samples.
67
+ def transform(x)
68
+ x = ::Rumale::Validation.check_convert_sample_array(x)
69
+
70
+ n_samples, = x.shape
71
+ dif_vec = @max_vec - @min_vec
72
+ dif_vec[dif_vec.eq(0)] = 1.0
73
+ nx = (x - @min_vec.tile(n_samples, 1)) / dif_vec.tile(n_samples, 1)
74
+ nx * (@params[:feature_range][1] - @params[:feature_range][0]) + @params[:feature_range][0]
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+
6
+ module Rumale
7
+ module Preprocessing
8
+ # Encode categorical integer features to one-hot-vectors.
9
+ #
10
+ # @example
11
+ # require 'rumale/preprocessing/one_hot_encoder'
12
+ #
13
+ # encoder = Rumale::Preprocessing::OneHotEncoder.new
14
+ # labels = Numo::Int32[0, 0, 2, 3, 2, 1]
15
+ # one_hot_vectors = encoder.fit_transform(labels)
16
+ # # > pp one_hot_vectors
17
+ # # Numo::DFloat#shape[6, 4]
18
+ # # [[1, 0, 0, 0],
19
+ # # [1, 0, 0, 0],
20
+ # # [0, 0, 1, 0],
21
+ # # [0, 0, 0, 1],
22
+ # # [0, 0, 1, 0],
23
+ # # [0, 1, 0, 0]]
24
+ class OneHotEncoder < ::Rumale::Base::Estimator
25
+ include ::Rumale::Base::Transformer
26
+
27
+ # Return the maximum values for each feature.
28
+ # @return [Numo::Int32] (shape: [n_features])
29
+ attr_reader :n_values
30
+
31
+ # Return the indices for feature values that actually occur in the training set.
32
+ # @return [Nimo::Int32]
33
+ attr_reader :active_features
34
+
35
+ # Return the indices to feature ranges.
36
+ # @return [Numo::Int32] (shape: [n_features + 1])
37
+ attr_reader :feature_indices
38
+
39
+ # Create a new encoder for encoding categorical integer features to one-hot-vectors
40
+ def initialize # rubocop:disable Lint/UselessMethodDefinition
41
+ super()
42
+ end
43
+
44
+ # Fit one-hot-encoder to samples.
45
+ #
46
+ # @overload fit(x) -> OneHotEncoder
47
+ # @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to fit one-hot-encoder.
48
+ # @return [OneHotEncoder]
49
+ def fit(x, _y = nil)
50
+ raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
51
+
52
+ @n_values = x.max(0) + 1
53
+ @feature_indices = Numo::Int32.hstack([[0], @n_values]).cumsum
54
+ @active_features = encode(x, @feature_indices).sum(axis: 0).ne(0).where
55
+ self
56
+ end
57
+
58
+ # Fit one-hot-encoder to samples, then encode samples into one-hot-vectors
59
+ #
60
+ # @overload fit_transform(x) -> Numo::DFloat
61
+ #
62
+ # @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to encode into one-hot-vectors.
63
+ # @return [Numo::DFloat] The one-hot-vectors.
64
+ def fit_transform(x, _y = nil)
65
+ raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
66
+
67
+ fit(x).transform(x)
68
+ end
69
+
70
+ # Encode samples into one-hot-vectors.
71
+ #
72
+ # @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to encode into one-hot-vectors.
73
+ # @return [Numo::DFloat] The one-hot-vectors.
74
+ def transform(x)
75
+ raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
76
+
77
+ codes = encode(x, @feature_indices)
78
+ codes[true, @active_features].dup
79
+ end
80
+
81
+ private
82
+
83
+ def encode(x, indices)
84
+ n_samples, n_features = x.shape
85
+ n_features = 1 if n_features.nil?
86
+ col_indices = (x + indices[0...-1]).flatten.to_a
87
+ row_indices = Numo::Int32.new(n_samples).seq.repeat(n_features).to_a
88
+ codes = Numo::DFloat.zeros(n_samples, indices[-1])
89
+ row_indices.zip(col_indices).each { |r, c| codes[r, c] = 1.0 }
90
+ codes
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,111 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+
6
+ module Rumale
7
+ module Preprocessing
8
+ # Transfrom categorical features to integer values.
9
+ #
10
+ # @example
11
+ # require 'rumale/preprocessing/ordinal_encoder'
12
+ #
13
+ # encoder = Rumale::Preprocessing::OrdinalEncoder.new
14
+ # training_samples = [['left', 10], ['right', 15], ['right', 20]]
15
+ # training_samples = Numo::NArray.asarray(training_samples)
16
+ # encoder.fit(training_samples)
17
+ # p encoder.categories
18
+ # # [["left", "right"], [10, 15, 20]]
19
+ # testing_samples = [['left', 20], ['right', 10]]
20
+ # testing_samples = Numo::NArray.asarray(testing_samples)
21
+ # encoded = encoder.transform(testing_samples)
22
+ # p encoded
23
+ # # Numo::DFloat#shape=[2,2]
24
+ # # [[0, 2],
25
+ # # [1, 0]]
26
+ # p encoder.inverse_transform(encoded)
27
+ # # Numo::RObject#shape=[2,2]
28
+ # # [["left", 20],
29
+ # # ["right", 10]]
30
+ class OrdinalEncoder < ::Rumale::Base::Estimator
31
+ include ::Rumale::Base::Transformer
32
+
33
+ # Return the array consists of categorical value each feature.
34
+ # @return [Array] (size: n_features)
35
+ attr_reader :categories
36
+
37
+ # Create a new encoder that transform categorical features to integer values.
38
+ #
39
+ # @param categories [Nil/Array] The category list for each feature.
40
+ # If nil is given, extracted categories from the training data by calling the fit method are used.
41
+ def initialize(categories: nil)
42
+ super()
43
+ @categories = categories
44
+ end
45
+
46
+ # Fit encoder by extracting the category for each feature.
47
+ #
48
+ # @overload fit(x) -> OrdinalEncoder
49
+ #
50
+ # @param x [Numo::NArray] (shape: [n_samples, n_features]) The samples consisting of categorical features.
51
+ # @return [LabelEncoder]
52
+ def fit(x, _y = nil)
53
+ raise ArgumentError, 'Expect sample matrix to be 2-D array' unless x.shape.size == 2
54
+
55
+ n_features = x.shape[1]
56
+ @categories = Array.new(n_features) { |n| x[true, n].to_a.uniq.sort }
57
+ self
58
+ end
59
+
60
+ # Fit encoder, then return encoded categorical features to integer values.
61
+ #
62
+ # @overload fit_transform(x) -> Numo::DFloat
63
+ #
64
+ # @param x [Numo::NArray] (shape: [n_samples, n_features]) The samples consisting of categorical features.
65
+ # @return [Numo::DFloat] The encoded categorical features to integer values.
66
+ def fit_transform(x, _y = nil)
67
+ raise ArgumentError, 'Expect sample matrix to be 2-D array' unless x.shape.size == 2
68
+
69
+ fit(x).transform(x)
70
+ end
71
+
72
+ # Encode categorical features.
73
+ #
74
+ # @param x [Numo::NArray] (shape: [n_samples, n_features]) The samples consisting of categorical features.
75
+ # @return [Numo::DFloat] The encoded categorical features to integer values.
76
+ def transform(x)
77
+ raise ArgumentError, 'Expect sample matrix to be 2-D array' unless x.shape.size == 2
78
+
79
+ n_features = x.shape[1]
80
+ if n_features != @categories.size
81
+ raise ArgumentError,
82
+ 'Expect the number of features and the number of categories to be equal'
83
+ end
84
+
85
+ transformed = Array.new(n_features) do |n|
86
+ x[true, n].to_a.map { |v| @categories[n].index(v) }
87
+ end
88
+
89
+ Numo::DFloat.asarray(transformed.transpose)
90
+ end
91
+
92
+ # Decode values to categorical features.
93
+ #
94
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples consisting of values transformed from categorical features.
95
+ # @return [Numo::NArray] The decoded features.
96
+ def inverse_transform(x)
97
+ n_features = x.shape[1]
98
+ if n_features != @categories.size
99
+ raise ArgumentError,
100
+ 'Expect the number of features and the number of categories to be equal'
101
+ end
102
+
103
+ inv_transformed = Array.new(n_features) do |n|
104
+ x[true, n].to_a.map { |i| @categories[n][i.to_i] }
105
+ end
106
+
107
+ Numo::NArray.asarray(inv_transformed.transpose)
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,114 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/validation'
6
+
7
+ module Rumale
8
+ module Preprocessing
9
+ # Generating polynomial features from the given samples.
10
+ #
11
+ # @example
12
+ # require 'rumale/preprocessing/polynomial_features'
13
+ #
14
+ # transformer = Rumale::Preprocessing::PolynomialFeatures.new(degree: 2)
15
+ # x = Numo::DFloat[[0, 1], [2, 3], [4, 5]]
16
+ # z = transformer.fit_transform(x)
17
+ # p z
18
+ #
19
+ # # Numo::DFloat#shape=[3,6]
20
+ # # [[1, 0, 1, 0, 0, 1],
21
+ # # [1, 2, 3, 4, 6, 9],
22
+ # # [1, 4, 5, 16, 20, 25]]
23
+ #
24
+ # # If you want to perform polynomial regression, combine it with LinearRegression as follows:
25
+ # require 'rumale/preprocessing/polynomial_features'
26
+ # require 'rumale/linear_model/linear_regression'
27
+ # require 'rumale/pipeline/pipeline'
28
+ #
29
+ # ply = Rumale::Preprocessing::PolynomialFeatures.new(degree: 2)
30
+ # reg = Rumale::LinearModel::LinearRegression.new(fit_bias: false, random_seed: 1)
31
+ # pipeline = Rumale::Pipeline::Pipeline.new(steps: { trs: ply, est: reg })
32
+ # pipeline.fit(training_samples, training_values)
33
+ # results = pipeline.predict(testing_samples)
34
+ #
35
+ class PolynomialFeatures < ::Rumale::Base::Estimator
36
+ include ::Rumale::Base::Transformer
37
+
38
+ # Return the number of polynomial features.
39
+ # @return [Integer]
40
+ attr_reader :n_output_features
41
+
42
+ # Create a transformer for generating polynomial features.
43
+ #
44
+ # @param degree [Integer] The degree of polynomial features.
45
+ def initialize(degree: 2)
46
+ raise ArgumentError, 'Expect the value of degree parameter greater than or eqaul to 1.' if degree < 1
47
+
48
+ super()
49
+ @params = { degree: degree }
50
+ end
51
+
52
+ # Calculate the number of output polynomial fetures.
53
+ #
54
+ # @overload fit(x) -> PolynomialFeatures
55
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the number of output polynomial fetures.
56
+ # @return [PolynomialFeatures]
57
+ def fit(x, _y = nil)
58
+ x = ::Rumale::Validation.check_convert_sample_array(x)
59
+
60
+ n_features = x.shape[1]
61
+ @n_output_features = 1
62
+ @params[:degree].times do |t|
63
+ @n_output_features += Array.new(n_features) { |n| n }.repeated_combination(t + 1).size
64
+ end
65
+ self
66
+ end
67
+
68
+ # Calculate the number of polynomial features, and then transform samples to polynomial features.
69
+ #
70
+ # @overload fit_transform(x) -> Numo::DFloat
71
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the number of polynomial features
72
+ # and be transformed.
73
+ # @return [Numo::DFloat] (shape: [n_samples, n_output_features]) The transformed samples.
74
+ def fit_transform(x, _y = nil)
75
+ x = ::Rumale::Validation.check_convert_sample_array(x)
76
+
77
+ fit(x).transform(x)
78
+ end
79
+
80
+ # Transform the given samples to polynomial features.
81
+ #
82
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed.
83
+ # @return [Numo::DFloat] (shape: [n_samples, n_output_features]) The transformed samples.
84
+ def transform(x)
85
+ x = ::Rumale::Validation.check_convert_sample_array(x)
86
+
87
+ # initialize transformed features
88
+ n_samples, n_features = x.shape
89
+ z = Numo::DFloat.zeros(n_samples, n_output_features)
90
+ # bias
91
+ z[true, 0] = 1
92
+ curr_col = 1
93
+ # itself
94
+ z[true, 1..n_features] = x
95
+ curr_col += n_features
96
+ # high degree features
97
+ curr_feat_ids = Array.new(n_features + 1) { |n| n + 1 }
98
+ (1...@params[:degree]).each do
99
+ next_feat_ids = []
100
+ n_features.times do |d|
101
+ f_range = curr_feat_ids[d]...curr_feat_ids.last
102
+ next_col = curr_col + f_range.size
103
+ z[true, curr_col...next_col] = z[true, f_range] * x[true, d..d]
104
+ next_feat_ids.push(curr_col)
105
+ curr_col = next_col
106
+ end
107
+ next_feat_ids.push(curr_col)
108
+ curr_feat_ids = next_feat_ids
109
+ end
110
+ z
111
+ end
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/validation'
6
+
7
+ module Rumale
8
+ # This module consists of the classes that perform preprocessings.
9
+ module Preprocessing
10
+ # Normalize samples by centering and scaling to unit variance.
11
+ #
12
+ # @example
13
+ # require 'rumale/preprocessing/standard_scaler'
14
+ #
15
+ # normalizer = Rumale::Preprocessing::StandardScaler.new
16
+ # new_training_samples = normalizer.fit_transform(training_samples)
17
+ # new_testing_samples = normalizer.transform(testing_samples)
18
+ class StandardScaler < ::Rumale::Base::Estimator
19
+ include ::Rumale::Base::Transformer
20
+
21
+ # Return the vector consists of the mean value for each feature.
22
+ # @return [Numo::DFloat] (shape: [n_features])
23
+ attr_reader :mean_vec
24
+
25
+ # Return the vector consists of the standard deviation for each feature.
26
+ # @return [Numo::DFloat] (shape: [n_features])
27
+ attr_reader :std_vec
28
+
29
+ # Create a new normalizer for centering and scaling to unit variance.
30
+ def initialize # rubocop:disable Lint/UselessMethodDefinition
31
+ super()
32
+ end
33
+
34
+ # Calculate the mean value and standard deviation of each feature for scaling.
35
+ #
36
+ # @overload fit(x) -> StandardScaler
37
+ #
38
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features])
39
+ # The samples to calculate the mean values and standard deviations.
40
+ # @return [StandardScaler]
41
+ def fit(x, _y = nil)
42
+ x = ::Rumale::Validation.check_convert_sample_array(x)
43
+
44
+ @mean_vec = x.mean(0)
45
+ @std_vec = x.stddev(0)
46
+ self
47
+ end
48
+
49
+ # Calculate the mean values and standard deviations, and then normalize samples using them.
50
+ #
51
+ # @overload fit_transform(x) -> Numo::DFloat
52
+ #
53
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features])
54
+ # The samples to calculate the mean values and standard deviations.
55
+ # @return [Numo::DFloat] The scaled samples.
56
+ def fit_transform(x, _y = nil)
57
+ x = ::Rumale::Validation.check_convert_sample_array(x)
58
+
59
+ fit(x).transform(x)
60
+ end
61
+
62
+ # Perform standardization the given samples.
63
+ #
64
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
65
+ # @return [Numo::DFloat] The scaled samples.
66
+ def transform(x)
67
+ x = ::Rumale::Validation.check_convert_sample_array(x)
68
+
69
+ n_samples, = x.shape
70
+ (x - @mean_vec.tile(n_samples, 1)) / @std_vec.tile(n_samples, 1)
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Rumale is a machine learning library in Ruby.
4
+ module Rumale
5
+ # This module consists of the classes that perform preprocessings.
6
+ module Preprocessing
7
+ # @!visibility private
8
+ VERSION = '0.24.0'
9
+ end
10
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'numo/narray'
4
+
5
+ require_relative 'preprocessing/bin_discretizer'
6
+ require_relative 'preprocessing/binarizer'
7
+ require_relative 'preprocessing/kernel_calculator'
8
+ require_relative 'preprocessing/l1_normalizer'
9
+ require_relative 'preprocessing/l2_normalizer'
10
+ require_relative 'preprocessing/label_binarizer'
11
+ require_relative 'preprocessing/label_encoder'
12
+ require_relative 'preprocessing/max_abs_scaler'
13
+ require_relative 'preprocessing/max_normalizer'
14
+ require_relative 'preprocessing/min_max_scaler'
15
+ require_relative 'preprocessing/one_hot_encoder'
16
+ require_relative 'preprocessing/ordinal_encoder'
17
+ require_relative 'preprocessing/polynomial_features'
18
+ require_relative 'preprocessing/standard_scaler'
19
+ require_relative 'preprocessing/version'