rumale-preprocessing 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 5e6ea7b493b3ea7d2eaeb8e4782e0dbdb79a265691b6e26f66786e713f2ad305
4
+ data.tar.gz: 4888e8622e364a8e5f62b595e43e32f72d41ed39ef2abb8b3fe79d0063191dc3
5
+ SHA512:
6
+ metadata.gz: 0b9937675f98ff5f3c7cb8e16b6c56ad0613267b6383eb766ef14f26d1eeeb06539d1621a43b4391a13e54a2732f58d92fcf3212953ba170e886baa19580b7bb
7
+ data.tar.gz: eabf752b7c17d32fc075fb27d146e81efe766a33c4d12650605cebac2cbd7eefca8bb0f15324163e70432db189c20b6ef1e64867c81ed53fd1011d565e539076
data/LICENSE.txt ADDED
@@ -0,0 +1,27 @@
1
+ Copyright (c) 2022 Atsushi Tatsuma
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are met:
6
+
7
+ * Redistributions of source code must retain the above copyright notice, this
8
+ list of conditions and the following disclaimer.
9
+
10
+ * Redistributions in binary form must reproduce the above copyright notice,
11
+ this list of conditions and the following disclaimer in the documentation
12
+ and/or other materials provided with the distribution.
13
+
14
+ * Neither the name of the copyright holder nor the names of its
15
+ contributors may be used to endorse or promote products derived from
16
+ this software without specific prior written permission.
17
+
18
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/README.md ADDED
@@ -0,0 +1,34 @@
1
+ # Rumale::Preprocessing
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/rumale-preprocessing.svg)](https://badge.fury.io/rb/rumale-preprocessing)
4
+ [![BSD 3-Clause License](https://img.shields.io/badge/License-BSD%203--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/main/rumale-preprocessing/LICENSE.txt)
5
+ [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing.html)
6
+
7
+ Rumale is a machine learning library in Ruby.
8
+ Rumale::Preprocessing provides preprocessing techniques,
9
+ such as L2 normalization, standard scaling, and one-hot encoding,
10
+ with Rumale interface.
11
+
12
+ ## Installation
13
+
14
+ Add this line to your application's Gemfile:
15
+
16
+ ```ruby
17
+ gem 'rumale-preprocessing'
18
+ ```
19
+
20
+ And then execute:
21
+
22
+ $ bundle install
23
+
24
+ Or install it yourself as:
25
+
26
+ $ gem install rumale-preprocessing
27
+
28
+ ## Documentation
29
+
30
+ - [Rumale API Documentation - Preprocessing](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing.html)
31
+
32
+ ## License
33
+
34
+ The gem is available as open source under the terms of the [BSD-3-Clause License](https://opensource.org/licenses/BSD-3-Clause).
@@ -0,0 +1,97 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/validation'
6
+
7
+ module Rumale
8
+ module Preprocessing
9
+ # Discretizes features with a given number of bins.
10
+ # In some cases, discretizing features may accelerate decision tree training.
11
+ #
12
+ # @example
13
+ # require 'rumale/preprocessing/bin_discretizer'
14
+ #
15
+ # discretizer = Rumale::Preprocessing::BinDiscretizer.new(n_bins: 4)
16
+ # samples = Numo::DFloat.new(5, 2).rand - 0.5
17
+ # transformed = discretizer.fit_transform(samples)
18
+ # # > pp samples
19
+ # # Numo::DFloat#shape=[5,2]
20
+ # # [[-0.438246, -0.126933],
21
+ # # [ 0.294815, -0.298958],
22
+ # # [-0.383959, -0.155968],
23
+ # # [ 0.039948, 0.237815],
24
+ # # [-0.334911, -0.449117]]
25
+ # # > pp transformed
26
+ # # Numo::DFloat#shape=[5,2]
27
+ # # [[0, 1],
28
+ # # [3, 0],
29
+ # # [0, 1],
30
+ # # [2, 3],
31
+ # # [0, 0]]
32
+ class BinDiscretizer < ::Rumale::Base::Estimator
33
+ include ::Rumale::Base::Transformer
34
+
35
+ # Return the feature steps to be used discretizing.
36
+ # @return [Array<Numo::DFloat>] (shape: [n_features, n_bins])
37
+ attr_reader :feature_steps
38
+
39
+ # Create a new discretizer for features with given number of bins.
40
+ #
41
+ # @param n_bins [Integer] The number of bins to be used disretizing feature values.
42
+ def initialize(n_bins: 32)
43
+ super()
44
+ @params = { n_bins: n_bins }
45
+ end
46
+
47
+ # Fit feature ranges to be discretized.
48
+ #
49
+ # @overload fit(x) -> BinDiscretizer
50
+ #
51
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the feature ranges.
52
+ # @return [BinDiscretizer]
53
+ def fit(x, _y = nil)
54
+ x = ::Rumale::Validation.check_convert_sample_array(x)
55
+
56
+ n_features = x.shape[1]
57
+ max_vals = x.max(0)
58
+ min_vals = x.min(0)
59
+ @feature_steps = Array.new(n_features) do |n|
60
+ Numo::DFloat.linspace(min_vals[n], max_vals[n], @params[:n_bins] + 1)[0...@params[:n_bins]]
61
+ end
62
+ self
63
+ end
64
+
65
+ # Fit feature ranges to be discretized, then return discretized samples.
66
+ #
67
+ # @overload fit_transform(x) -> Numo::DFloat
68
+ #
69
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be discretized.
70
+ # @return [Numo::DFloat] The discretized samples.
71
+ def fit_transform(x, _y = nil)
72
+ x = ::Rumale::Validation.check_convert_sample_array(x)
73
+
74
+ fit(x).transform(x)
75
+ end
76
+
77
+ # Peform discretizing the given samples.
78
+ #
79
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be discretized.
80
+ # @return [Numo::DFloat] The discretized samples.
81
+ def transform(x)
82
+ x = ::Rumale::Validation.check_convert_sample_array(x)
83
+
84
+ n_samples, n_features = x.shape
85
+ transformed = Numo::DFloat.zeros(n_samples, n_features)
86
+ n_features.times do |n|
87
+ steps = @feature_steps[n]
88
+ @params[:n_bins].times do |bin|
89
+ mask = x[true, n].ge(steps[bin]).where
90
+ transformed[mask, n] = bin
91
+ end
92
+ end
93
+ transformed
94
+ end
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/validation'
6
+
7
+ module Rumale
8
+ module Preprocessing
9
+ # Binarize samples according to a threshold
10
+ #
11
+ # @example
12
+ # require 'rumale/preprocessing/binarizer'
13
+ #
14
+ # binarizer = Rumale::Preprocessing::Binarizer.new
15
+ # x = Numo::DFloat[[-1.2, 3.2], [2.4, -0.5], [4.5, 0.8]]
16
+ # b = binarizer.transform(x)
17
+ # p b
18
+ #
19
+ # # Numo::DFloat#shape=[3, 2]
20
+ # # [[0, 1],
21
+ # # [1, 0],
22
+ # # [1, 1]]
23
+ class Binarizer < ::Rumale::Base::Estimator
24
+ include ::Rumale::Base::Transformer
25
+
26
+ # Create a new transformer for binarization.
27
+ # @param threshold [Float] The threshold value for binarization.
28
+ def initialize(threshold: 0.0)
29
+ super()
30
+ @params = { threshold: threshold }
31
+ end
32
+
33
+ # This method does nothing and returns the object itself.
34
+ # For compatibility with other transformer, this method exists.
35
+ #
36
+ # @overload fit() -> Binarizer
37
+ #
38
+ # @return [Binarizer]
39
+ def fit(_x = nil, _y = nil)
40
+ self
41
+ end
42
+
43
+ # Binarize each sample.
44
+ #
45
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be binarized.
46
+ # @return [Numo::DFloat] The binarized samples.
47
+ def transform(x)
48
+ x = ::Rumale::Validation.check_convert_sample_array(x)
49
+
50
+ x.class.cast(x.gt(@params[:threshold]))
51
+ end
52
+
53
+ # The output of this method is the same as that of the transform method.
54
+ # For compatibility with other transformer, this method exists.
55
+ #
56
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be binarized.
57
+ # @return [Numo::DFloat] The binarized samples.
58
+ def fit_transform(x, _y = nil)
59
+ x = ::Rumale::Validation.check_convert_sample_array(x)
60
+
61
+ fit(x).transform(x)
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/pairwise_metric'
6
+ require 'rumale/validation'
7
+
8
+ module Rumale
9
+ module Preprocessing
10
+ # KernelCalculator is a class that calculates the kernel matrix with training data.
11
+ #
12
+ # @example
13
+ # require 'rumale/preprocessing/kernel_calculator'
14
+ # require 'rumale/kernel_machine/kernel_ridge'
15
+ # require 'rumale/pipeline/pipeline'
16
+ #
17
+ # transformer = Rumale::Preprocessing::KernelCalculator.new(kernel: 'rbf', gamma: 0.5)
18
+ # regressor = Rumale::KernelMachine::KernelRidge.new
19
+ # pipeline = Rumale::Pipeline::Pipeline.new(
20
+ # steps: { trs: transfomer, est: regressor }
21
+ # )
22
+ # pipeline.fit(x_train, y_train)
23
+ # results = pipeline.predict(x_test)
24
+ class KernelCalculator < ::Rumale::Base::Estimator
25
+ include ::Rumale::Base::Transformer
26
+
27
+ # Returns the training data for calculating kernel matrix.
28
+ # @return [Numo::DFloat] (shape: n_components, n_features)
29
+ attr_reader :components
30
+
31
+ # Create a new transformer that transforms feature vectors into a kernel matrix.
32
+ #
33
+ # @param kernel [String] The type of kernel function ('rbf', 'linear', 'poly', and 'sigmoid').
34
+ # @param gamma [Float] The gamma parameter in rbf/poly/sigmoid kernel function.
35
+ # @param degree [Integer] The degree parameter in polynomial kernel function.
36
+ # @param coef [Float] The coefficient in poly/sigmoid kernel function.
37
+ def initialize(kernel: 'rbf', gamma: 1, degree: 3, coef: 1)
38
+ super()
39
+ @params = {
40
+ kernel: kernel,
41
+ gamma: gamma,
42
+ degree: degree,
43
+ coef: coef
44
+ }
45
+ end
46
+
47
+ # Fit the model with given training data.
48
+ #
49
+ # @overload fit(x) -> KernelCalculator
50
+ # @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for calculating kernel matrix.
51
+ # @return [KernelCalculator] The learned transformer itself.
52
+ def fit(x, _y = nil)
53
+ x = ::Rumale::Validation.check_convert_sample_array(x)
54
+
55
+ @components = x.dup
56
+ self
57
+ end
58
+
59
+ # Fit the model with training data, and then transform them with the learned model.
60
+ #
61
+ # @overload fit_transform(x) -> Numo::DFloat
62
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for calculating kernel matrix.
63
+ # @return [Numo::DFloat] (shape: [n_samples, n_samples]) The calculated kernel matrix.
64
+ def fit_transform(x, y = nil)
65
+ x = ::Rumale::Validation.check_convert_sample_array(x)
66
+
67
+ fit(x, y).transform(x)
68
+ end
69
+
70
+ # Transform the given data with the learned model.
71
+ #
72
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be used for calculating kernel matrix with the training data.
73
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The calculated kernel matrix.
74
+ def transform(x)
75
+ x = ::Rumale::Validation.check_convert_sample_array(x)
76
+
77
+ kernel_mat(x, @components)
78
+ end
79
+
80
+ private
81
+
82
+ def kernel_mat(x, y)
83
+ case @params[:kernel]
84
+ when 'rbf'
85
+ ::Rumale::PairwiseMetric.rbf_kernel(x, y, @params[:gamma])
86
+ when 'poly'
87
+ ::Rumale::PairwiseMetric.polynomial_kernel(x, y, @params[:degree], @params[:gamma], @params[:coef])
88
+ when 'sigmoid'
89
+ ::Rumale::PairwiseMetric.sigmoid_kernel(x, y, @params[:gamma], @params[:coef])
90
+ when 'linear'
91
+ ::Rumale::PairwiseMetric.linear_kernel(x, y)
92
+ else
93
+ raise ArgumentError, "Expect kernel parameter to be given 'rbf', 'linear', 'poly', or 'sigmoid'."
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/validation'
6
+
7
+ module Rumale
8
+ module Preprocessing
9
+ # Normalize samples to unit L1-norm.
10
+ #
11
+ # @example
12
+ # require 'rumale/preprocessing/l1_normalizer'
13
+ #
14
+ # normalizer = Rumale::Preprocessing::L1Normalizer.new
15
+ # new_samples = normalizer.fit_transform(samples)
16
+ class L1Normalizer < ::Rumale::Base::Estimator
17
+ include ::Rumale::Base::Transformer
18
+
19
+ # Return the vector consists of L1-norm for each sample.
20
+ # @return [Numo::DFloat] (shape: [n_samples])
21
+ attr_reader :norm_vec # :nodoc:
22
+
23
+ # Create a new normalizer for normaliing to L1-norm.
24
+ def initialize # rubocop:disable Lint/UselessMethodDefinition
25
+ super()
26
+ end
27
+
28
+ # Calculate L1-norms of each sample.
29
+ #
30
+ # @overload fit(x) -> L1Normalizer
31
+ #
32
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
33
+ # @return [L1Normalizer]
34
+ def fit(x, _y = nil)
35
+ x = ::Rumale::Validation.check_convert_sample_array(x)
36
+
37
+ @norm_vec = x.abs.sum(axis: 1)
38
+ @norm_vec[@norm_vec.eq(0)] = 1
39
+ self
40
+ end
41
+
42
+ # Calculate L1-norms of each sample, and then normalize samples to L1-norm.
43
+ #
44
+ # @overload fit_transform(x) -> Numo::DFloat
45
+ #
46
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
47
+ # @return [Numo::DFloat] The normalized samples.
48
+ def fit_transform(x, _y = nil)
49
+ x = ::Rumale::Validation.check_convert_sample_array(x)
50
+
51
+ fit(x)
52
+ x / @norm_vec.expand_dims(1)
53
+ end
54
+
55
+ # Calculate L1-norms of each sample, and then normalize samples to L1-norm.
56
+ # This method calls the fit_transform method. This method exists for the Pipeline class.
57
+ #
58
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
59
+ # @return [Numo::DFloat] The normalized samples.
60
+ def transform(x)
61
+ x = ::Rumale::Validation.check_convert_sample_array(x)
62
+
63
+ fit_transform(x)
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/validation'
6
+
7
+ module Rumale
8
+ # This module consists of the classes that perform preprocessings.
9
+ module Preprocessing
10
+ # Normalize samples to unit L2-norm.
11
+ #
12
+ # @example
13
+ # require 'rumale/preprocessing/l2_normalizer'
14
+ #
15
+ # normalizer = Rumale::Preprocessing::L2Normalizer.new
16
+ # new_samples = normalizer.fit_transform(samples)
17
+ class L2Normalizer < ::Rumale::Base::Estimator
18
+ include ::Rumale::Base::Transformer
19
+
20
+ # Return the vector consists of L2-norm for each sample.
21
+ # @return [Numo::DFloat] (shape: [n_samples])
22
+ attr_reader :norm_vec # :nodoc:
23
+
24
+ # Create a new normalizer for normaliing to unit L2-norm.
25
+ def initialize # rubocop:disable Lint/UselessMethodDefinition
26
+ super()
27
+ end
28
+
29
+ # Calculate L2-norms of each sample.
30
+ #
31
+ # @overload fit(x) -> L2Normalizer
32
+ #
33
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
34
+ # @return [L2Normalizer]
35
+ def fit(x, _y = nil)
36
+ x = ::Rumale::Validation.check_convert_sample_array(x)
37
+
38
+ @norm_vec = Numo::NMath.sqrt((x**2).sum(axis: 1))
39
+ @norm_vec[@norm_vec.eq(0)] = 1
40
+ self
41
+ end
42
+
43
+ # Calculate L2-norms of each sample, and then normalize samples to unit L2-norm.
44
+ #
45
+ # @overload fit_transform(x) -> Numo::DFloat
46
+ #
47
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
48
+ # @return [Numo::DFloat] The normalized samples.
49
+ def fit_transform(x, _y = nil)
50
+ x = ::Rumale::Validation.check_convert_sample_array(x)
51
+
52
+ fit(x)
53
+ x / @norm_vec.expand_dims(1)
54
+ end
55
+
56
+ # Calculate L2-norms of each sample, and then normalize samples to unit L2-norm.
57
+ # This method calls the fit_transform method. This method exists for the Pipeline class.
58
+ #
59
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
60
+ # @return [Numo::DFloat] The normalized samples.
61
+ def transform(x)
62
+ x = ::Rumale::Validation.check_convert_sample_array(x)
63
+
64
+ fit_transform(x)
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,86 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+
6
+ module Rumale
7
+ module Preprocessing
8
+ # Encode labels to binary labels with one-vs-all scheme.
9
+ #
10
+ # @example
11
+ # require 'rumale/preprocessing/label_binarizer'
12
+ #
13
+ # encoder = Rumale::Preprocessing::LabelBinarizer.new
14
+ # label = [0, -1, 3, 3, 1, 1]
15
+ # p encoder.fit_transform(label)
16
+ # # Numo::Int32#shape=[6,4]
17
+ # # [[0, 1, 0, 0],
18
+ # # [1, 0, 0, 0],
19
+ # # [0, 0, 0, 1],
20
+ # # [0, 0, 0, 1],
21
+ # # [0, 0, 1, 0],
22
+ # # [0, 0, 1, 0]]
23
+ class LabelBinarizer < ::Rumale::Base::Estimator
24
+ include ::Rumale::Base::Transformer
25
+
26
+ # Return the class labels.
27
+ # @return [Array] (size: [n_classes])
28
+ attr_reader :classes
29
+
30
+ # Create a new encoder for binarizing labels with one-vs-all scheme.
31
+ #
32
+ # @param neg_label [Integer] The value represents negative label.
33
+ # @param pos_label [Integer] The value represents positive label.
34
+ def initialize(neg_label: 0, pos_label: 1)
35
+ super()
36
+ @params = {
37
+ neg_label: neg_label,
38
+ pos_label: pos_label
39
+ }
40
+ end
41
+
42
+ # Fit encoder to labels.
43
+ #
44
+ # @overload fit(y) -> LabelBinarizer
45
+ # @param y [Numo::NArray/Array] (shape: [n_samples]) The labels to fit encoder.
46
+ # @return [LabelBinarizer]
47
+ def fit(y, _not_used = nil)
48
+ y = y.to_a if y.is_a?(Numo::NArray)
49
+ @classes = y.uniq.sort
50
+ self
51
+ end
52
+
53
+ # Fit encoder to labels, then return binarized labels.
54
+ #
55
+ # @overload fit_transform(y) -> Numo::DFloat
56
+ # @param y [Numo::NArray/Array] (shape: [n_samples]) The labels to fit encoder.
57
+ # @return [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels.
58
+ def fit_transform(y, _not_used = nil)
59
+ y = y.to_a if y.is_a?(Numo::NArray)
60
+ fit(y).transform(y)
61
+ end
62
+
63
+ # Encode labels.
64
+ #
65
+ # @param y [Array] (shape: [n_samples]) The labels to be encoded.
66
+ # @return [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels.
67
+ def transform(y)
68
+ y = y.to_a if y.is_a?(Numo::NArray)
69
+ n_classes = @classes.size
70
+ n_samples = y.size
71
+ codes = Numo::Int32.zeros(n_samples, n_classes) + @params[:neg_label]
72
+ n_samples.times { |n| codes[n, @classes.index(y[n])] = @params[:pos_label] }
73
+ codes
74
+ end
75
+
76
+ # Decode binarized labels.
77
+ #
78
+ # @param x [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels to be decoded.
79
+ # @return [Array] (shape: [n_samples]) The decoded labels.
80
+ def inverse_transform(x)
81
+ n_samples = x.shape[0]
82
+ Array.new(n_samples) { |n| @classes[x[n, true].ne(@params[:neg_label]).where[0]] }
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+
6
+ module Rumale
7
+ module Preprocessing
8
+ # Encode labels to values between 0 and n_classes - 1.
9
+ #
10
+ # @example
11
+ # require 'rumale/preprocessing/label_encoder'
12
+ #
13
+ # encoder = Rumale::Preprocessing::LabelEncoder.new
14
+ # labels = Numo::Int32[1, 8, 8, 15, 0]
15
+ # encoded_labels = encoder.fit_transform(labels)
16
+ # # > pp encoded_labels
17
+ # # Numo::Int32#shape=[5]
18
+ # # [1, 2, 2, 3, 0]
19
+ # decoded_labels = encoder.inverse_transform(encoded_labels)
20
+ # # > pp decoded_labels
21
+ # # [1, 8, 8, 15, 0]
22
+ class LabelEncoder < ::Rumale::Base::Estimator
23
+ include ::Rumale::Base::Transformer
24
+
25
+ # Return the class labels.
26
+ # @return [Array] (size: [n_classes])
27
+ attr_reader :classes
28
+
29
+ # Create a new encoder for encoding labels to values between 0 and n_classes - 1.
30
+ def initialize # rubocop:disable Lint/UselessMethodDefinition
31
+ super()
32
+ end
33
+
34
+ # Fit label-encoder to labels.
35
+ #
36
+ # @overload fit(x) -> LabelEncoder
37
+ #
38
+ # @param x [Array] (shape: [n_samples]) The labels to fit label-encoder.
39
+ # @return [LabelEncoder]
40
+ def fit(x, _y = nil)
41
+ x = x.to_a if x.is_a?(Numo::NArray)
42
+ @classes = x.sort.uniq
43
+ self
44
+ end
45
+
46
+ # Fit label-encoder to labels, then return encoded labels.
47
+ #
48
+ # @overload fit_transform(x) -> Numo::DFloat
49
+ #
50
+ # @param x [Array] (shape: [n_samples]) The labels to fit label-encoder.
51
+ # @return [Numo::Int32] The encoded labels.
52
+ def fit_transform(x, _y = nil)
53
+ x = x.to_a if x.is_a?(Numo::NArray)
54
+ fit(x).transform(x)
55
+ end
56
+
57
+ # Encode labels.
58
+ #
59
+ # @param x [Array] (shape: [n_samples]) The labels to be encoded.
60
+ # @return [Numo::Int32] The encoded labels.
61
+ def transform(x)
62
+ x = x.to_a if x.is_a?(Numo::NArray)
63
+ Numo::Int32[*(x.map { |v| @classes.index(v) })]
64
+ end
65
+
66
+ # Decode encoded labels.
67
+ #
68
+ # @param x [Numo::Int32] (shape: [n_samples]) The labels to be decoded.
69
+ # @return [Array] The decoded labels.
70
+ def inverse_transform(x)
71
+ x.to_a.map { |n| @classes[n] }
72
+ end
73
+ end
74
+ end
75
+ end