rumale-preprocessing 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +27 -0
- data/README.md +34 -0
- data/lib/rumale/preprocessing/bin_discretizer.rb +97 -0
- data/lib/rumale/preprocessing/binarizer.rb +65 -0
- data/lib/rumale/preprocessing/kernel_calculator.rb +98 -0
- data/lib/rumale/preprocessing/l1_normalizer.rb +67 -0
- data/lib/rumale/preprocessing/l2_normalizer.rb +68 -0
- data/lib/rumale/preprocessing/label_binarizer.rb +86 -0
- data/lib/rumale/preprocessing/label_encoder.rb +75 -0
- data/lib/rumale/preprocessing/max_abs_scaler.rb +65 -0
- data/lib/rumale/preprocessing/max_normalizer.rb +67 -0
- data/lib/rumale/preprocessing/min_max_scaler.rb +78 -0
- data/lib/rumale/preprocessing/one_hot_encoder.rb +94 -0
- data/lib/rumale/preprocessing/ordinal_encoder.rb +111 -0
- data/lib/rumale/preprocessing/polynomial_features.rb +114 -0
- data/lib/rumale/preprocessing/standard_scaler.rb +74 -0
- data/lib/rumale/preprocessing/version.rb +10 -0
- data/lib/rumale/preprocessing.rb +19 -0
- metadata +97 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 5e6ea7b493b3ea7d2eaeb8e4782e0dbdb79a265691b6e26f66786e713f2ad305
|
4
|
+
data.tar.gz: 4888e8622e364a8e5f62b595e43e32f72d41ed39ef2abb8b3fe79d0063191dc3
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 0b9937675f98ff5f3c7cb8e16b6c56ad0613267b6383eb766ef14f26d1eeeb06539d1621a43b4391a13e54a2732f58d92fcf3212953ba170e886baa19580b7bb
|
7
|
+
data.tar.gz: eabf752b7c17d32fc075fb27d146e81efe766a33c4d12650605cebac2cbd7eefca8bb0f15324163e70432db189c20b6ef1e64867c81ed53fd1011d565e539076
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
Copyright (c) 2022 Atsushi Tatsuma
|
2
|
+
All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
5
|
+
modification, are permitted provided that the following conditions are met:
|
6
|
+
|
7
|
+
* Redistributions of source code must retain the above copyright notice, this
|
8
|
+
list of conditions and the following disclaimer.
|
9
|
+
|
10
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
11
|
+
this list of conditions and the following disclaimer in the documentation
|
12
|
+
and/or other materials provided with the distribution.
|
13
|
+
|
14
|
+
* Neither the name of the copyright holder nor the names of its
|
15
|
+
contributors may be used to endorse or promote products derived from
|
16
|
+
this software without specific prior written permission.
|
17
|
+
|
18
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
19
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
20
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
21
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
22
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
23
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
24
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
25
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
26
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
27
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README.md
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# Rumale::Preprocessing
|
2
|
+
|
3
|
+
[](https://badge.fury.io/rb/rumale-preprocessing)
|
4
|
+
[](https://github.com/yoshoku/rumale/blob/main/rumale-preprocessing/LICENSE.txt)
|
5
|
+
[](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing.html)
|
6
|
+
|
7
|
+
Rumale is a machine learning library in Ruby.
|
8
|
+
Rumale::Preprocessing provides preprocessing techniques,
|
9
|
+
such as L2 normalization, standard scaling, and one-hot encoding,
|
10
|
+
with Rumale interface.
|
11
|
+
|
12
|
+
## Installation
|
13
|
+
|
14
|
+
Add this line to your application's Gemfile:
|
15
|
+
|
16
|
+
```ruby
|
17
|
+
gem 'rumale-preprocessing'
|
18
|
+
```
|
19
|
+
|
20
|
+
And then execute:
|
21
|
+
|
22
|
+
$ bundle install
|
23
|
+
|
24
|
+
Or install it yourself as:
|
25
|
+
|
26
|
+
$ gem install rumale-preprocessing
|
27
|
+
|
28
|
+
## Documentation
|
29
|
+
|
30
|
+
- [Rumale API Documentation - Preprocessing](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing.html)
|
31
|
+
|
32
|
+
## License
|
33
|
+
|
34
|
+
The gem is available as open source under the terms of the [BSD-3-Clause License](https://opensource.org/licenses/BSD-3-Clause).
|
@@ -0,0 +1,97 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
require 'rumale/validation'
|
6
|
+
|
7
|
+
module Rumale
|
8
|
+
module Preprocessing
|
9
|
+
# Discretizes features with a given number of bins.
|
10
|
+
# In some cases, discretizing features may accelerate decision tree training.
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# require 'rumale/preprocessing/bin_discretizer'
|
14
|
+
#
|
15
|
+
# discretizer = Rumale::Preprocessing::BinDiscretizer.new(n_bins: 4)
|
16
|
+
# samples = Numo::DFloat.new(5, 2).rand - 0.5
|
17
|
+
# transformed = discretizer.fit_transform(samples)
|
18
|
+
# # > pp samples
|
19
|
+
# # Numo::DFloat#shape=[5,2]
|
20
|
+
# # [[-0.438246, -0.126933],
|
21
|
+
# # [ 0.294815, -0.298958],
|
22
|
+
# # [-0.383959, -0.155968],
|
23
|
+
# # [ 0.039948, 0.237815],
|
24
|
+
# # [-0.334911, -0.449117]]
|
25
|
+
# # > pp transformed
|
26
|
+
# # Numo::DFloat#shape=[5,2]
|
27
|
+
# # [[0, 1],
|
28
|
+
# # [3, 0],
|
29
|
+
# # [0, 1],
|
30
|
+
# # [2, 3],
|
31
|
+
# # [0, 0]]
|
32
|
+
class BinDiscretizer < ::Rumale::Base::Estimator
|
33
|
+
include ::Rumale::Base::Transformer
|
34
|
+
|
35
|
+
# Return the feature steps to be used discretizing.
|
36
|
+
# @return [Array<Numo::DFloat>] (shape: [n_features, n_bins])
|
37
|
+
attr_reader :feature_steps
|
38
|
+
|
39
|
+
# Create a new discretizer for features with given number of bins.
|
40
|
+
#
|
41
|
+
# @param n_bins [Integer] The number of bins to be used disretizing feature values.
|
42
|
+
def initialize(n_bins: 32)
|
43
|
+
super()
|
44
|
+
@params = { n_bins: n_bins }
|
45
|
+
end
|
46
|
+
|
47
|
+
# Fit feature ranges to be discretized.
|
48
|
+
#
|
49
|
+
# @overload fit(x) -> BinDiscretizer
|
50
|
+
#
|
51
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the feature ranges.
|
52
|
+
# @return [BinDiscretizer]
|
53
|
+
def fit(x, _y = nil)
|
54
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
55
|
+
|
56
|
+
n_features = x.shape[1]
|
57
|
+
max_vals = x.max(0)
|
58
|
+
min_vals = x.min(0)
|
59
|
+
@feature_steps = Array.new(n_features) do |n|
|
60
|
+
Numo::DFloat.linspace(min_vals[n], max_vals[n], @params[:n_bins] + 1)[0...@params[:n_bins]]
|
61
|
+
end
|
62
|
+
self
|
63
|
+
end
|
64
|
+
|
65
|
+
# Fit feature ranges to be discretized, then return discretized samples.
|
66
|
+
#
|
67
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
68
|
+
#
|
69
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be discretized.
|
70
|
+
# @return [Numo::DFloat] The discretized samples.
|
71
|
+
def fit_transform(x, _y = nil)
|
72
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
73
|
+
|
74
|
+
fit(x).transform(x)
|
75
|
+
end
|
76
|
+
|
77
|
+
# Peform discretizing the given samples.
|
78
|
+
#
|
79
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be discretized.
|
80
|
+
# @return [Numo::DFloat] The discretized samples.
|
81
|
+
def transform(x)
|
82
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
83
|
+
|
84
|
+
n_samples, n_features = x.shape
|
85
|
+
transformed = Numo::DFloat.zeros(n_samples, n_features)
|
86
|
+
n_features.times do |n|
|
87
|
+
steps = @feature_steps[n]
|
88
|
+
@params[:n_bins].times do |bin|
|
89
|
+
mask = x[true, n].ge(steps[bin]).where
|
90
|
+
transformed[mask, n] = bin
|
91
|
+
end
|
92
|
+
end
|
93
|
+
transformed
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
require 'rumale/validation'
|
6
|
+
|
7
|
+
module Rumale
|
8
|
+
module Preprocessing
|
9
|
+
# Binarize samples according to a threshold
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# require 'rumale/preprocessing/binarizer'
|
13
|
+
#
|
14
|
+
# binarizer = Rumale::Preprocessing::Binarizer.new
|
15
|
+
# x = Numo::DFloat[[-1.2, 3.2], [2.4, -0.5], [4.5, 0.8]]
|
16
|
+
# b = binarizer.transform(x)
|
17
|
+
# p b
|
18
|
+
#
|
19
|
+
# # Numo::DFloat#shape=[3, 2]
|
20
|
+
# # [[0, 1],
|
21
|
+
# # [1, 0],
|
22
|
+
# # [1, 1]]
|
23
|
+
class Binarizer < ::Rumale::Base::Estimator
|
24
|
+
include ::Rumale::Base::Transformer
|
25
|
+
|
26
|
+
# Create a new transformer for binarization.
|
27
|
+
# @param threshold [Float] The threshold value for binarization.
|
28
|
+
def initialize(threshold: 0.0)
|
29
|
+
super()
|
30
|
+
@params = { threshold: threshold }
|
31
|
+
end
|
32
|
+
|
33
|
+
# This method does nothing and returns the object itself.
|
34
|
+
# For compatibility with other transformer, this method exists.
|
35
|
+
#
|
36
|
+
# @overload fit() -> Binarizer
|
37
|
+
#
|
38
|
+
# @return [Binarizer]
|
39
|
+
def fit(_x = nil, _y = nil)
|
40
|
+
self
|
41
|
+
end
|
42
|
+
|
43
|
+
# Binarize each sample.
|
44
|
+
#
|
45
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be binarized.
|
46
|
+
# @return [Numo::DFloat] The binarized samples.
|
47
|
+
def transform(x)
|
48
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
49
|
+
|
50
|
+
x.class.cast(x.gt(@params[:threshold]))
|
51
|
+
end
|
52
|
+
|
53
|
+
# The output of this method is the same as that of the transform method.
|
54
|
+
# For compatibility with other transformer, this method exists.
|
55
|
+
#
|
56
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be binarized.
|
57
|
+
# @return [Numo::DFloat] The binarized samples.
|
58
|
+
def fit_transform(x, _y = nil)
|
59
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
60
|
+
|
61
|
+
fit(x).transform(x)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
require 'rumale/pairwise_metric'
|
6
|
+
require 'rumale/validation'
|
7
|
+
|
8
|
+
module Rumale
|
9
|
+
module Preprocessing
|
10
|
+
# KernelCalculator is a class that calculates the kernel matrix with training data.
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# require 'rumale/preprocessing/kernel_calculator'
|
14
|
+
# require 'rumale/kernel_machine/kernel_ridge'
|
15
|
+
# require 'rumale/pipeline/pipeline'
|
16
|
+
#
|
17
|
+
# transformer = Rumale::Preprocessing::KernelCalculator.new(kernel: 'rbf', gamma: 0.5)
|
18
|
+
# regressor = Rumale::KernelMachine::KernelRidge.new
|
19
|
+
# pipeline = Rumale::Pipeline::Pipeline.new(
|
20
|
+
# steps: { trs: transfomer, est: regressor }
|
21
|
+
# )
|
22
|
+
# pipeline.fit(x_train, y_train)
|
23
|
+
# results = pipeline.predict(x_test)
|
24
|
+
class KernelCalculator < ::Rumale::Base::Estimator
|
25
|
+
include ::Rumale::Base::Transformer
|
26
|
+
|
27
|
+
# Returns the training data for calculating kernel matrix.
|
28
|
+
# @return [Numo::DFloat] (shape: n_components, n_features)
|
29
|
+
attr_reader :components
|
30
|
+
|
31
|
+
# Create a new transformer that transforms feature vectors into a kernel matrix.
|
32
|
+
#
|
33
|
+
# @param kernel [String] The type of kernel function ('rbf', 'linear', 'poly', and 'sigmoid').
|
34
|
+
# @param gamma [Float] The gamma parameter in rbf/poly/sigmoid kernel function.
|
35
|
+
# @param degree [Integer] The degree parameter in polynomial kernel function.
|
36
|
+
# @param coef [Float] The coefficient in poly/sigmoid kernel function.
|
37
|
+
def initialize(kernel: 'rbf', gamma: 1, degree: 3, coef: 1)
|
38
|
+
super()
|
39
|
+
@params = {
|
40
|
+
kernel: kernel,
|
41
|
+
gamma: gamma,
|
42
|
+
degree: degree,
|
43
|
+
coef: coef
|
44
|
+
}
|
45
|
+
end
|
46
|
+
|
47
|
+
# Fit the model with given training data.
|
48
|
+
#
|
49
|
+
# @overload fit(x) -> KernelCalculator
|
50
|
+
# @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for calculating kernel matrix.
|
51
|
+
# @return [KernelCalculator] The learned transformer itself.
|
52
|
+
def fit(x, _y = nil)
|
53
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
54
|
+
|
55
|
+
@components = x.dup
|
56
|
+
self
|
57
|
+
end
|
58
|
+
|
59
|
+
# Fit the model with training data, and then transform them with the learned model.
|
60
|
+
#
|
61
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
62
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for calculating kernel matrix.
|
63
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_samples]) The calculated kernel matrix.
|
64
|
+
def fit_transform(x, y = nil)
|
65
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
66
|
+
|
67
|
+
fit(x, y).transform(x)
|
68
|
+
end
|
69
|
+
|
70
|
+
# Transform the given data with the learned model.
|
71
|
+
#
|
72
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be used for calculating kernel matrix with the training data.
|
73
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The calculated kernel matrix.
|
74
|
+
def transform(x)
|
75
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
76
|
+
|
77
|
+
kernel_mat(x, @components)
|
78
|
+
end
|
79
|
+
|
80
|
+
private
|
81
|
+
|
82
|
+
def kernel_mat(x, y)
|
83
|
+
case @params[:kernel]
|
84
|
+
when 'rbf'
|
85
|
+
::Rumale::PairwiseMetric.rbf_kernel(x, y, @params[:gamma])
|
86
|
+
when 'poly'
|
87
|
+
::Rumale::PairwiseMetric.polynomial_kernel(x, y, @params[:degree], @params[:gamma], @params[:coef])
|
88
|
+
when 'sigmoid'
|
89
|
+
::Rumale::PairwiseMetric.sigmoid_kernel(x, y, @params[:gamma], @params[:coef])
|
90
|
+
when 'linear'
|
91
|
+
::Rumale::PairwiseMetric.linear_kernel(x, y)
|
92
|
+
else
|
93
|
+
raise ArgumentError, "Expect kernel parameter to be given 'rbf', 'linear', 'poly', or 'sigmoid'."
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
require 'rumale/validation'
|
6
|
+
|
7
|
+
module Rumale
|
8
|
+
module Preprocessing
|
9
|
+
# Normalize samples to unit L1-norm.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# require 'rumale/preprocessing/l1_normalizer'
|
13
|
+
#
|
14
|
+
# normalizer = Rumale::Preprocessing::L1Normalizer.new
|
15
|
+
# new_samples = normalizer.fit_transform(samples)
|
16
|
+
class L1Normalizer < ::Rumale::Base::Estimator
|
17
|
+
include ::Rumale::Base::Transformer
|
18
|
+
|
19
|
+
# Return the vector consists of L1-norm for each sample.
|
20
|
+
# @return [Numo::DFloat] (shape: [n_samples])
|
21
|
+
attr_reader :norm_vec # :nodoc:
|
22
|
+
|
23
|
+
# Create a new normalizer for normaliing to L1-norm.
|
24
|
+
def initialize # rubocop:disable Lint/UselessMethodDefinition
|
25
|
+
super()
|
26
|
+
end
|
27
|
+
|
28
|
+
# Calculate L1-norms of each sample.
|
29
|
+
#
|
30
|
+
# @overload fit(x) -> L1Normalizer
|
31
|
+
#
|
32
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
|
33
|
+
# @return [L1Normalizer]
|
34
|
+
def fit(x, _y = nil)
|
35
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
36
|
+
|
37
|
+
@norm_vec = x.abs.sum(axis: 1)
|
38
|
+
@norm_vec[@norm_vec.eq(0)] = 1
|
39
|
+
self
|
40
|
+
end
|
41
|
+
|
42
|
+
# Calculate L1-norms of each sample, and then normalize samples to L1-norm.
|
43
|
+
#
|
44
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
45
|
+
#
|
46
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
|
47
|
+
# @return [Numo::DFloat] The normalized samples.
|
48
|
+
def fit_transform(x, _y = nil)
|
49
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
50
|
+
|
51
|
+
fit(x)
|
52
|
+
x / @norm_vec.expand_dims(1)
|
53
|
+
end
|
54
|
+
|
55
|
+
# Calculate L1-norms of each sample, and then normalize samples to L1-norm.
|
56
|
+
# This method calls the fit_transform method. This method exists for the Pipeline class.
|
57
|
+
#
|
58
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
|
59
|
+
# @return [Numo::DFloat] The normalized samples.
|
60
|
+
def transform(x)
|
61
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
62
|
+
|
63
|
+
fit_transform(x)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
require 'rumale/validation'
|
6
|
+
|
7
|
+
module Rumale
|
8
|
+
# This module consists of the classes that perform preprocessings.
|
9
|
+
module Preprocessing
|
10
|
+
# Normalize samples to unit L2-norm.
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# require 'rumale/preprocessing/l2_normalizer'
|
14
|
+
#
|
15
|
+
# normalizer = Rumale::Preprocessing::L2Normalizer.new
|
16
|
+
# new_samples = normalizer.fit_transform(samples)
|
17
|
+
class L2Normalizer < ::Rumale::Base::Estimator
|
18
|
+
include ::Rumale::Base::Transformer
|
19
|
+
|
20
|
+
# Return the vector consists of L2-norm for each sample.
|
21
|
+
# @return [Numo::DFloat] (shape: [n_samples])
|
22
|
+
attr_reader :norm_vec # :nodoc:
|
23
|
+
|
24
|
+
# Create a new normalizer for normaliing to unit L2-norm.
|
25
|
+
def initialize # rubocop:disable Lint/UselessMethodDefinition
|
26
|
+
super()
|
27
|
+
end
|
28
|
+
|
29
|
+
# Calculate L2-norms of each sample.
|
30
|
+
#
|
31
|
+
# @overload fit(x) -> L2Normalizer
|
32
|
+
#
|
33
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
|
34
|
+
# @return [L2Normalizer]
|
35
|
+
def fit(x, _y = nil)
|
36
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
37
|
+
|
38
|
+
@norm_vec = Numo::NMath.sqrt((x**2).sum(axis: 1))
|
39
|
+
@norm_vec[@norm_vec.eq(0)] = 1
|
40
|
+
self
|
41
|
+
end
|
42
|
+
|
43
|
+
# Calculate L2-norms of each sample, and then normalize samples to unit L2-norm.
|
44
|
+
#
|
45
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
46
|
+
#
|
47
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
|
48
|
+
# @return [Numo::DFloat] The normalized samples.
|
49
|
+
def fit_transform(x, _y = nil)
|
50
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
51
|
+
|
52
|
+
fit(x)
|
53
|
+
x / @norm_vec.expand_dims(1)
|
54
|
+
end
|
55
|
+
|
56
|
+
# Calculate L2-norms of each sample, and then normalize samples to unit L2-norm.
|
57
|
+
# This method calls the fit_transform method. This method exists for the Pipeline class.
|
58
|
+
#
|
59
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
|
60
|
+
# @return [Numo::DFloat] The normalized samples.
|
61
|
+
def transform(x)
|
62
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
63
|
+
|
64
|
+
fit_transform(x)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module Preprocessing
|
8
|
+
# Encode labels to binary labels with one-vs-all scheme.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# require 'rumale/preprocessing/label_binarizer'
|
12
|
+
#
|
13
|
+
# encoder = Rumale::Preprocessing::LabelBinarizer.new
|
14
|
+
# label = [0, -1, 3, 3, 1, 1]
|
15
|
+
# p encoder.fit_transform(label)
|
16
|
+
# # Numo::Int32#shape=[6,4]
|
17
|
+
# # [[0, 1, 0, 0],
|
18
|
+
# # [1, 0, 0, 0],
|
19
|
+
# # [0, 0, 0, 1],
|
20
|
+
# # [0, 0, 0, 1],
|
21
|
+
# # [0, 0, 1, 0],
|
22
|
+
# # [0, 0, 1, 0]]
|
23
|
+
class LabelBinarizer < ::Rumale::Base::Estimator
|
24
|
+
include ::Rumale::Base::Transformer
|
25
|
+
|
26
|
+
# Return the class labels.
|
27
|
+
# @return [Array] (size: [n_classes])
|
28
|
+
attr_reader :classes
|
29
|
+
|
30
|
+
# Create a new encoder for binarizing labels with one-vs-all scheme.
|
31
|
+
#
|
32
|
+
# @param neg_label [Integer] The value represents negative label.
|
33
|
+
# @param pos_label [Integer] The value represents positive label.
|
34
|
+
def initialize(neg_label: 0, pos_label: 1)
|
35
|
+
super()
|
36
|
+
@params = {
|
37
|
+
neg_label: neg_label,
|
38
|
+
pos_label: pos_label
|
39
|
+
}
|
40
|
+
end
|
41
|
+
|
42
|
+
# Fit encoder to labels.
|
43
|
+
#
|
44
|
+
# @overload fit(y) -> LabelBinarizer
|
45
|
+
# @param y [Numo::NArray/Array] (shape: [n_samples]) The labels to fit encoder.
|
46
|
+
# @return [LabelBinarizer]
|
47
|
+
def fit(y, _not_used = nil)
|
48
|
+
y = y.to_a if y.is_a?(Numo::NArray)
|
49
|
+
@classes = y.uniq.sort
|
50
|
+
self
|
51
|
+
end
|
52
|
+
|
53
|
+
# Fit encoder to labels, then return binarized labels.
|
54
|
+
#
|
55
|
+
# @overload fit_transform(y) -> Numo::DFloat
|
56
|
+
# @param y [Numo::NArray/Array] (shape: [n_samples]) The labels to fit encoder.
|
57
|
+
# @return [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels.
|
58
|
+
def fit_transform(y, _not_used = nil)
|
59
|
+
y = y.to_a if y.is_a?(Numo::NArray)
|
60
|
+
fit(y).transform(y)
|
61
|
+
end
|
62
|
+
|
63
|
+
# Encode labels.
|
64
|
+
#
|
65
|
+
# @param y [Array] (shape: [n_samples]) The labels to be encoded.
|
66
|
+
# @return [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels.
|
67
|
+
def transform(y)
|
68
|
+
y = y.to_a if y.is_a?(Numo::NArray)
|
69
|
+
n_classes = @classes.size
|
70
|
+
n_samples = y.size
|
71
|
+
codes = Numo::Int32.zeros(n_samples, n_classes) + @params[:neg_label]
|
72
|
+
n_samples.times { |n| codes[n, @classes.index(y[n])] = @params[:pos_label] }
|
73
|
+
codes
|
74
|
+
end
|
75
|
+
|
76
|
+
# Decode binarized labels.
|
77
|
+
#
|
78
|
+
# @param x [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels to be decoded.
|
79
|
+
# @return [Array] (shape: [n_samples]) The decoded labels.
|
80
|
+
def inverse_transform(x)
|
81
|
+
n_samples = x.shape[0]
|
82
|
+
Array.new(n_samples) { |n| @classes[x[n, true].ne(@params[:neg_label]).where[0]] }
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module Preprocessing
|
8
|
+
# Encode labels to values between 0 and n_classes - 1.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# require 'rumale/preprocessing/label_encoder'
|
12
|
+
#
|
13
|
+
# encoder = Rumale::Preprocessing::LabelEncoder.new
|
14
|
+
# labels = Numo::Int32[1, 8, 8, 15, 0]
|
15
|
+
# encoded_labels = encoder.fit_transform(labels)
|
16
|
+
# # > pp encoded_labels
|
17
|
+
# # Numo::Int32#shape=[5]
|
18
|
+
# # [1, 2, 2, 3, 0]
|
19
|
+
# decoded_labels = encoder.inverse_transform(encoded_labels)
|
20
|
+
# # > pp decoded_labels
|
21
|
+
# # [1, 8, 8, 15, 0]
|
22
|
+
class LabelEncoder < ::Rumale::Base::Estimator
|
23
|
+
include ::Rumale::Base::Transformer
|
24
|
+
|
25
|
+
# Return the class labels.
|
26
|
+
# @return [Array] (size: [n_classes])
|
27
|
+
attr_reader :classes
|
28
|
+
|
29
|
+
# Create a new encoder for encoding labels to values between 0 and n_classes - 1.
|
30
|
+
def initialize # rubocop:disable Lint/UselessMethodDefinition
|
31
|
+
super()
|
32
|
+
end
|
33
|
+
|
34
|
+
# Fit label-encoder to labels.
|
35
|
+
#
|
36
|
+
# @overload fit(x) -> LabelEncoder
|
37
|
+
#
|
38
|
+
# @param x [Array] (shape: [n_samples]) The labels to fit label-encoder.
|
39
|
+
# @return [LabelEncoder]
|
40
|
+
def fit(x, _y = nil)
|
41
|
+
x = x.to_a if x.is_a?(Numo::NArray)
|
42
|
+
@classes = x.sort.uniq
|
43
|
+
self
|
44
|
+
end
|
45
|
+
|
46
|
+
# Fit label-encoder to labels, then return encoded labels.
|
47
|
+
#
|
48
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
49
|
+
#
|
50
|
+
# @param x [Array] (shape: [n_samples]) The labels to fit label-encoder.
|
51
|
+
# @return [Numo::Int32] The encoded labels.
|
52
|
+
def fit_transform(x, _y = nil)
|
53
|
+
x = x.to_a if x.is_a?(Numo::NArray)
|
54
|
+
fit(x).transform(x)
|
55
|
+
end
|
56
|
+
|
57
|
+
# Encode labels.
|
58
|
+
#
|
59
|
+
# @param x [Array] (shape: [n_samples]) The labels to be encoded.
|
60
|
+
# @return [Numo::Int32] The encoded labels.
|
61
|
+
def transform(x)
|
62
|
+
x = x.to_a if x.is_a?(Numo::NArray)
|
63
|
+
Numo::Int32[*(x.map { |v| @classes.index(v) })]
|
64
|
+
end
|
65
|
+
|
66
|
+
# Decode encoded labels.
|
67
|
+
#
|
68
|
+
# @param x [Numo::Int32] (shape: [n_samples]) The labels to be decoded.
|
69
|
+
# @return [Array] The decoded labels.
|
70
|
+
def inverse_transform(x)
|
71
|
+
x.to_a.map { |n| @classes[n] }
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|