rumale-preprocessing 0.24.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/validation'
6
+
7
+ module Rumale
8
+ module Preprocessing
9
+ # Normalize samples by scaling each feature with its maximum absolute value.
10
+ #
11
+ # @example
12
+ # require 'rumale/preprocessing/max_abs_scaler'
13
+ #
14
+ # normalizer = Rumale::Preprocessing::MaxAbsScaler.new
15
+ # new_training_samples = normalizer.fit_transform(training_samples)
16
+ # new_testing_samples = normalizer.transform(testing_samples)
17
+ class MaxAbsScaler < ::Rumale::Base::Estimator
18
+ include ::Rumale::Base::Transformer
19
+
20
+ # Return the vector consists of the maximum absolute value for each feature.
21
+ # @return [Numo::DFloat] (shape: [n_features])
22
+ attr_reader :max_abs_vec
23
+
24
+ # Creates a new normalizer for scaling each feature with its maximum absolute value.
25
+ def initialize # rubocop:disable Lint/UselessMethodDefinition
26
+ super()
27
+ end
28
+
29
+ # Calculate the minimum and maximum value of each feature for scaling.
30
+ #
31
+ # @overload fit(x) -> MaxAbsScaler
32
+ #
33
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum absolute value for each feature.
34
+ # @return [MaxAbsScaler]
35
+ def fit(x, _y = nil)
36
+ x = ::Rumale::Validation.check_convert_sample_array(x)
37
+
38
+ @max_abs_vec = x.abs.max(0)
39
+ self
40
+ end
41
+
42
+ # Calculate the maximum absolute value for each feature, and then normalize samples.
43
+ #
44
+ # @overload fit_transform(x) -> Numo::DFloat
45
+ #
46
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum absolute value for each feature.
47
+ # @return [Numo::DFloat] The scaled samples.
48
+ def fit_transform(x, _y = nil)
49
+ x = ::Rumale::Validation.check_convert_sample_array(x)
50
+
51
+ fit(x).transform(x)
52
+ end
53
+
54
+ # Perform scaling the given samples with maximum absolute value for each feature.
55
+ #
56
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
57
+ # @return [Numo::DFloat] The scaled samples.
58
+ def transform(x)
59
+ x = ::Rumale::Validation.check_convert_sample_array(x)
60
+
61
+ x / @max_abs_vec
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/validation'
6
+
7
+ module Rumale
8
+ module Preprocessing
9
+ # Normalize samples with the maximum of the absolute values.
10
+ #
11
+ # @example
12
+ # require 'rumale/preprocessing/max_normalizer'
13
+ #
14
+ # normalizer = Rumale::Preprocessing::MaxNormalizer.new
15
+ # new_samples = normalizer.fit_transform(samples)
16
+ class MaxNormalizer < ::Rumale::Base::Estimator
17
+ include ::Rumale::Base::Transformer
18
+
19
+ # Return the vector consists of the maximum norm for each sample.
20
+ # @return [Numo::DFloat] (shape: [n_samples])
21
+ attr_reader :norm_vec # :nodoc:
22
+
23
+ # Create a new normalizer for normaliing to max-norm.
24
+ def initialize # rubocop:disable Lint/UselessMethodDefinition
25
+ super()
26
+ end
27
+
28
+ # Calculate the maximum norms of each sample.
29
+ #
30
+ # @overload fit(x) -> MaxNormalizer
31
+ #
32
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the maximum norms.
33
+ # @return [MaxNormalizer]
34
+ def fit(x, _y = nil)
35
+ x = ::Rumale::Validation.check_convert_sample_array(x)
36
+
37
+ @norm_vec = x.abs.max(1)
38
+ @norm_vec[@norm_vec.eq(0)] = 1
39
+ self
40
+ end
41
+
42
+ # Calculate the maximums norm of each sample, and then normalize samples with the norms.
43
+ #
44
+ # @overload fit_transform(x) -> Numo::DFloat
45
+ #
46
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum norms.
47
+ # @return [Numo::DFloat] The normalized samples.
48
+ def fit_transform(x, _y = nil)
49
+ x = ::Rumale::Validation.check_convert_sample_array(x)
50
+
51
+ fit(x)
52
+ x / @norm_vec.expand_dims(1)
53
+ end
54
+
55
+ # Calculate the maximum norms of each sample, and then normalize samples with the norms.
56
+ # This method calls the fit_transform method. This method exists for the Pipeline class.
57
+ #
58
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum norms.
59
+ # @return [Numo::DFloat] The normalized samples.
60
+ def transform(x)
61
+ x = ::Rumale::Validation.check_convert_sample_array(x)
62
+
63
+ fit_transform(x)
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/validation'
6
+
7
+ module Rumale
8
+ # This module consists of the classes that perform preprocessings.
9
+ module Preprocessing
10
+ # Normalize samples by scaling each feature to a given range.
11
+ #
12
+ # @example
13
+ # require 'rumale/preprocessing/min_max_scaler'
14
+ #
15
+ # normalizer = Rumale::Preprocessing::MinMaxScaler.new(feature_range: [0.0, 1.0])
16
+ # new_training_samples = normalizer.fit_transform(training_samples)
17
+ # new_testing_samples = normalizer.transform(testing_samples)
18
+ class MinMaxScaler < ::Rumale::Base::Estimator
19
+ include ::Rumale::Base::Transformer
20
+
21
+ # Return the vector consists of the minimum value for each feature.
22
+ # @return [Numo::DFloat] (shape: [n_features])
23
+ attr_reader :min_vec
24
+
25
+ # Return the vector consists of the maximum value for each feature.
26
+ # @return [Numo::DFloat] (shape: [n_features])
27
+ attr_reader :max_vec
28
+
29
+ # Creates a new normalizer for scaling each feature to a given range.
30
+ #
31
+ # @param feature_range [Array<Float>] The desired range of samples.
32
+ def initialize(feature_range: [0.0, 1.0])
33
+ super()
34
+ @params = { feature_range: feature_range }
35
+ end
36
+
37
+ # Calculate the minimum and maximum value of each feature for scaling.
38
+ #
39
+ # @overload fit(x) -> MinMaxScaler
40
+ #
41
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
42
+ # @return [MinMaxScaler]
43
+ def fit(x, _y = nil)
44
+ x = ::Rumale::Validation.check_convert_sample_array(x)
45
+
46
+ @min_vec = x.min(0)
47
+ @max_vec = x.max(0)
48
+ self
49
+ end
50
+
51
+ # Calculate the minimum and maximum values, and then normalize samples to feature_range.
52
+ #
53
+ # @overload fit_transform(x) -> Numo::DFloat
54
+ #
55
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
56
+ # @return [Numo::DFloat] The scaled samples.
57
+ def fit_transform(x, _y = nil)
58
+ x = ::Rumale::Validation.check_convert_sample_array(x)
59
+
60
+ fit(x).transform(x)
61
+ end
62
+
63
+ # Perform scaling the given samples according to feature_range.
64
+ #
65
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
66
+ # @return [Numo::DFloat] The scaled samples.
67
+ def transform(x)
68
+ x = ::Rumale::Validation.check_convert_sample_array(x)
69
+
70
+ n_samples, = x.shape
71
+ dif_vec = @max_vec - @min_vec
72
+ dif_vec[dif_vec.eq(0)] = 1.0
73
+ nx = (x - @min_vec.tile(n_samples, 1)) / dif_vec.tile(n_samples, 1)
74
+ nx * (@params[:feature_range][1] - @params[:feature_range][0]) + @params[:feature_range][0]
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+
6
+ module Rumale
7
+ module Preprocessing
8
+ # Encode categorical integer features to one-hot-vectors.
9
+ #
10
+ # @example
11
+ # require 'rumale/preprocessing/one_hot_encoder'
12
+ #
13
+ # encoder = Rumale::Preprocessing::OneHotEncoder.new
14
+ # labels = Numo::Int32[0, 0, 2, 3, 2, 1]
15
+ # one_hot_vectors = encoder.fit_transform(labels)
16
+ # # > pp one_hot_vectors
17
+ # # Numo::DFloat#shape[6, 4]
18
+ # # [[1, 0, 0, 0],
19
+ # # [1, 0, 0, 0],
20
+ # # [0, 0, 1, 0],
21
+ # # [0, 0, 0, 1],
22
+ # # [0, 0, 1, 0],
23
+ # # [0, 1, 0, 0]]
24
+ class OneHotEncoder < ::Rumale::Base::Estimator
25
+ include ::Rumale::Base::Transformer
26
+
27
+ # Return the maximum values for each feature.
28
+ # @return [Numo::Int32] (shape: [n_features])
29
+ attr_reader :n_values
30
+
31
+ # Return the indices for feature values that actually occur in the training set.
32
+ # @return [Nimo::Int32]
33
+ attr_reader :active_features
34
+
35
+ # Return the indices to feature ranges.
36
+ # @return [Numo::Int32] (shape: [n_features + 1])
37
+ attr_reader :feature_indices
38
+
39
+ # Create a new encoder for encoding categorical integer features to one-hot-vectors
40
+ def initialize # rubocop:disable Lint/UselessMethodDefinition
41
+ super()
42
+ end
43
+
44
+ # Fit one-hot-encoder to samples.
45
+ #
46
+ # @overload fit(x) -> OneHotEncoder
47
+ # @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to fit one-hot-encoder.
48
+ # @return [OneHotEncoder]
49
+ def fit(x, _y = nil)
50
+ raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
51
+
52
+ @n_values = x.max(0) + 1
53
+ @feature_indices = Numo::Int32.hstack([[0], @n_values]).cumsum
54
+ @active_features = encode(x, @feature_indices).sum(axis: 0).ne(0).where
55
+ self
56
+ end
57
+
58
+ # Fit one-hot-encoder to samples, then encode samples into one-hot-vectors
59
+ #
60
+ # @overload fit_transform(x) -> Numo::DFloat
61
+ #
62
+ # @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to encode into one-hot-vectors.
63
+ # @return [Numo::DFloat] The one-hot-vectors.
64
+ def fit_transform(x, _y = nil)
65
+ raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
66
+
67
+ fit(x).transform(x)
68
+ end
69
+
70
+ # Encode samples into one-hot-vectors.
71
+ #
72
+ # @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to encode into one-hot-vectors.
73
+ # @return [Numo::DFloat] The one-hot-vectors.
74
+ def transform(x)
75
+ raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
76
+
77
+ codes = encode(x, @feature_indices)
78
+ codes[true, @active_features].dup
79
+ end
80
+
81
+ private
82
+
83
+ def encode(x, indices)
84
+ n_samples, n_features = x.shape
85
+ n_features = 1 if n_features.nil?
86
+ col_indices = (x + indices[0...-1]).flatten.to_a
87
+ row_indices = Numo::Int32.new(n_samples).seq.repeat(n_features).to_a
88
+ codes = Numo::DFloat.zeros(n_samples, indices[-1])
89
+ row_indices.zip(col_indices).each { |r, c| codes[r, c] = 1.0 }
90
+ codes
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,111 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+
6
+ module Rumale
7
+ module Preprocessing
8
+ # Transfrom categorical features to integer values.
9
+ #
10
+ # @example
11
+ # require 'rumale/preprocessing/ordinal_encoder'
12
+ #
13
+ # encoder = Rumale::Preprocessing::OrdinalEncoder.new
14
+ # training_samples = [['left', 10], ['right', 15], ['right', 20]]
15
+ # training_samples = Numo::NArray.asarray(training_samples)
16
+ # encoder.fit(training_samples)
17
+ # p encoder.categories
18
+ # # [["left", "right"], [10, 15, 20]]
19
+ # testing_samples = [['left', 20], ['right', 10]]
20
+ # testing_samples = Numo::NArray.asarray(testing_samples)
21
+ # encoded = encoder.transform(testing_samples)
22
+ # p encoded
23
+ # # Numo::DFloat#shape=[2,2]
24
+ # # [[0, 2],
25
+ # # [1, 0]]
26
+ # p encoder.inverse_transform(encoded)
27
+ # # Numo::RObject#shape=[2,2]
28
+ # # [["left", 20],
29
+ # # ["right", 10]]
30
+ class OrdinalEncoder < ::Rumale::Base::Estimator
31
+ include ::Rumale::Base::Transformer
32
+
33
+ # Return the array consists of categorical value each feature.
34
+ # @return [Array] (size: n_features)
35
+ attr_reader :categories
36
+
37
+ # Create a new encoder that transform categorical features to integer values.
38
+ #
39
+ # @param categories [Nil/Array] The category list for each feature.
40
+ # If nil is given, extracted categories from the training data by calling the fit method are used.
41
+ def initialize(categories: nil)
42
+ super()
43
+ @categories = categories
44
+ end
45
+
46
+ # Fit encoder by extracting the category for each feature.
47
+ #
48
+ # @overload fit(x) -> OrdinalEncoder
49
+ #
50
+ # @param x [Numo::NArray] (shape: [n_samples, n_features]) The samples consisting of categorical features.
51
+ # @return [LabelEncoder]
52
+ def fit(x, _y = nil)
53
+ raise ArgumentError, 'Expect sample matrix to be 2-D array' unless x.shape.size == 2
54
+
55
+ n_features = x.shape[1]
56
+ @categories = Array.new(n_features) { |n| x[true, n].to_a.uniq.sort }
57
+ self
58
+ end
59
+
60
+ # Fit encoder, then return encoded categorical features to integer values.
61
+ #
62
+ # @overload fit_transform(x) -> Numo::DFloat
63
+ #
64
+ # @param x [Numo::NArray] (shape: [n_samples, n_features]) The samples consisting of categorical features.
65
+ # @return [Numo::DFloat] The encoded categorical features to integer values.
66
+ def fit_transform(x, _y = nil)
67
+ raise ArgumentError, 'Expect sample matrix to be 2-D array' unless x.shape.size == 2
68
+
69
+ fit(x).transform(x)
70
+ end
71
+
72
+ # Encode categorical features.
73
+ #
74
+ # @param x [Numo::NArray] (shape: [n_samples, n_features]) The samples consisting of categorical features.
75
+ # @return [Numo::DFloat] The encoded categorical features to integer values.
76
+ def transform(x)
77
+ raise ArgumentError, 'Expect sample matrix to be 2-D array' unless x.shape.size == 2
78
+
79
+ n_features = x.shape[1]
80
+ if n_features != @categories.size
81
+ raise ArgumentError,
82
+ 'Expect the number of features and the number of categories to be equal'
83
+ end
84
+
85
+ transformed = Array.new(n_features) do |n|
86
+ x[true, n].to_a.map { |v| @categories[n].index(v) }
87
+ end
88
+
89
+ Numo::DFloat.asarray(transformed.transpose)
90
+ end
91
+
92
+ # Decode values to categorical features.
93
+ #
94
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples consisting of values transformed from categorical features.
95
+ # @return [Numo::NArray] The decoded features.
96
+ def inverse_transform(x)
97
+ n_features = x.shape[1]
98
+ if n_features != @categories.size
99
+ raise ArgumentError,
100
+ 'Expect the number of features and the number of categories to be equal'
101
+ end
102
+
103
+ inv_transformed = Array.new(n_features) do |n|
104
+ x[true, n].to_a.map { |i| @categories[n][i.to_i] }
105
+ end
106
+
107
+ Numo::NArray.asarray(inv_transformed.transpose)
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,114 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/validation'
6
+
7
+ module Rumale
8
+ module Preprocessing
9
+ # Generating polynomial features from the given samples.
10
+ #
11
+ # @example
12
+ # require 'rumale/preprocessing/polynomial_features'
13
+ #
14
+ # transformer = Rumale::Preprocessing::PolynomialFeatures.new(degree: 2)
15
+ # x = Numo::DFloat[[0, 1], [2, 3], [4, 5]]
16
+ # z = transformer.fit_transform(x)
17
+ # p z
18
+ #
19
+ # # Numo::DFloat#shape=[3,6]
20
+ # # [[1, 0, 1, 0, 0, 1],
21
+ # # [1, 2, 3, 4, 6, 9],
22
+ # # [1, 4, 5, 16, 20, 25]]
23
+ #
24
+ # # If you want to perform polynomial regression, combine it with LinearRegression as follows:
25
+ # require 'rumale/preprocessing/polynomial_features'
26
+ # require 'rumale/linear_model/linear_regression'
27
+ # require 'rumale/pipeline/pipeline'
28
+ #
29
+ # ply = Rumale::Preprocessing::PolynomialFeatures.new(degree: 2)
30
+ # reg = Rumale::LinearModel::LinearRegression.new(fit_bias: false, random_seed: 1)
31
+ # pipeline = Rumale::Pipeline::Pipeline.new(steps: { trs: ply, est: reg })
32
+ # pipeline.fit(training_samples, training_values)
33
+ # results = pipeline.predict(testing_samples)
34
+ #
35
+ class PolynomialFeatures < ::Rumale::Base::Estimator
36
+ include ::Rumale::Base::Transformer
37
+
38
+ # Return the number of polynomial features.
39
+ # @return [Integer]
40
+ attr_reader :n_output_features
41
+
42
+ # Create a transformer for generating polynomial features.
43
+ #
44
+ # @param degree [Integer] The degree of polynomial features.
45
+ def initialize(degree: 2)
46
+ raise ArgumentError, 'Expect the value of degree parameter greater than or eqaul to 1.' if degree < 1
47
+
48
+ super()
49
+ @params = { degree: degree }
50
+ end
51
+
52
+ # Calculate the number of output polynomial fetures.
53
+ #
54
+ # @overload fit(x) -> PolynomialFeatures
55
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the number of output polynomial fetures.
56
+ # @return [PolynomialFeatures]
57
+ def fit(x, _y = nil)
58
+ x = ::Rumale::Validation.check_convert_sample_array(x)
59
+
60
+ n_features = x.shape[1]
61
+ @n_output_features = 1
62
+ @params[:degree].times do |t|
63
+ @n_output_features += Array.new(n_features) { |n| n }.repeated_combination(t + 1).size
64
+ end
65
+ self
66
+ end
67
+
68
+ # Calculate the number of polynomial features, and then transform samples to polynomial features.
69
+ #
70
+ # @overload fit_transform(x) -> Numo::DFloat
71
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the number of polynomial features
72
+ # and be transformed.
73
+ # @return [Numo::DFloat] (shape: [n_samples, n_output_features]) The transformed samples.
74
+ def fit_transform(x, _y = nil)
75
+ x = ::Rumale::Validation.check_convert_sample_array(x)
76
+
77
+ fit(x).transform(x)
78
+ end
79
+
80
+ # Transform the given samples to polynomial features.
81
+ #
82
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed.
83
+ # @return [Numo::DFloat] (shape: [n_samples, n_output_features]) The transformed samples.
84
+ def transform(x)
85
+ x = ::Rumale::Validation.check_convert_sample_array(x)
86
+
87
+ # initialize transformed features
88
+ n_samples, n_features = x.shape
89
+ z = Numo::DFloat.zeros(n_samples, n_output_features)
90
+ # bias
91
+ z[true, 0] = 1
92
+ curr_col = 1
93
+ # itself
94
+ z[true, 1..n_features] = x
95
+ curr_col += n_features
96
+ # high degree features
97
+ curr_feat_ids = Array.new(n_features + 1) { |n| n + 1 }
98
+ (1...@params[:degree]).each do
99
+ next_feat_ids = []
100
+ n_features.times do |d|
101
+ f_range = curr_feat_ids[d]...curr_feat_ids.last
102
+ next_col = curr_col + f_range.size
103
+ z[true, curr_col...next_col] = z[true, f_range] * x[true, d..d]
104
+ next_feat_ids.push(curr_col)
105
+ curr_col = next_col
106
+ end
107
+ next_feat_ids.push(curr_col)
108
+ curr_feat_ids = next_feat_ids
109
+ end
110
+ z
111
+ end
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/validation'
6
+
7
+ module Rumale
8
+ # This module consists of the classes that perform preprocessings.
9
+ module Preprocessing
10
+ # Normalize samples by centering and scaling to unit variance.
11
+ #
12
+ # @example
13
+ # require 'rumale/preprocessing/standard_scaler'
14
+ #
15
+ # normalizer = Rumale::Preprocessing::StandardScaler.new
16
+ # new_training_samples = normalizer.fit_transform(training_samples)
17
+ # new_testing_samples = normalizer.transform(testing_samples)
18
+ class StandardScaler < ::Rumale::Base::Estimator
19
+ include ::Rumale::Base::Transformer
20
+
21
+ # Return the vector consists of the mean value for each feature.
22
+ # @return [Numo::DFloat] (shape: [n_features])
23
+ attr_reader :mean_vec
24
+
25
+ # Return the vector consists of the standard deviation for each feature.
26
+ # @return [Numo::DFloat] (shape: [n_features])
27
+ attr_reader :std_vec
28
+
29
+ # Create a new normalizer for centering and scaling to unit variance.
30
+ def initialize # rubocop:disable Lint/UselessMethodDefinition
31
+ super()
32
+ end
33
+
34
+ # Calculate the mean value and standard deviation of each feature for scaling.
35
+ #
36
+ # @overload fit(x) -> StandardScaler
37
+ #
38
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features])
39
+ # The samples to calculate the mean values and standard deviations.
40
+ # @return [StandardScaler]
41
+ def fit(x, _y = nil)
42
+ x = ::Rumale::Validation.check_convert_sample_array(x)
43
+
44
+ @mean_vec = x.mean(0)
45
+ @std_vec = x.stddev(0)
46
+ self
47
+ end
48
+
49
+ # Calculate the mean values and standard deviations, and then normalize samples using them.
50
+ #
51
+ # @overload fit_transform(x) -> Numo::DFloat
52
+ #
53
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features])
54
+ # The samples to calculate the mean values and standard deviations.
55
+ # @return [Numo::DFloat] The scaled samples.
56
+ def fit_transform(x, _y = nil)
57
+ x = ::Rumale::Validation.check_convert_sample_array(x)
58
+
59
+ fit(x).transform(x)
60
+ end
61
+
62
+ # Perform standardization the given samples.
63
+ #
64
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
65
+ # @return [Numo::DFloat] The scaled samples.
66
+ def transform(x)
67
+ x = ::Rumale::Validation.check_convert_sample_array(x)
68
+
69
+ n_samples, = x.shape
70
+ (x - @mean_vec.tile(n_samples, 1)) / @std_vec.tile(n_samples, 1)
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Rumale is a machine learning library in Ruby.
4
+ module Rumale
5
+ # This module consists of the classes that perform preprocessings.
6
+ module Preprocessing
7
+ # @!visibility private
8
+ VERSION = '0.24.0'
9
+ end
10
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'numo/narray'
4
+
5
+ require_relative 'preprocessing/bin_discretizer'
6
+ require_relative 'preprocessing/binarizer'
7
+ require_relative 'preprocessing/kernel_calculator'
8
+ require_relative 'preprocessing/l1_normalizer'
9
+ require_relative 'preprocessing/l2_normalizer'
10
+ require_relative 'preprocessing/label_binarizer'
11
+ require_relative 'preprocessing/label_encoder'
12
+ require_relative 'preprocessing/max_abs_scaler'
13
+ require_relative 'preprocessing/max_normalizer'
14
+ require_relative 'preprocessing/min_max_scaler'
15
+ require_relative 'preprocessing/one_hot_encoder'
16
+ require_relative 'preprocessing/ordinal_encoder'
17
+ require_relative 'preprocessing/polynomial_features'
18
+ require_relative 'preprocessing/standard_scaler'
19
+ require_relative 'preprocessing/version'