rumale 0.23.3 → 0.24.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE.txt +5 -1
- data/README.md +3 -288
- data/lib/rumale/version.rb +1 -1
- data/lib/rumale.rb +20 -131
- metadata +252 -150
- data/CHANGELOG.md +0 -643
- data/CODE_OF_CONDUCT.md +0 -74
- data/ext/rumale/extconf.rb +0 -37
- data/ext/rumale/rumaleext.c +0 -545
- data/ext/rumale/rumaleext.h +0 -12
- data/lib/rumale/base/base_estimator.rb +0 -49
- data/lib/rumale/base/classifier.rb +0 -36
- data/lib/rumale/base/cluster_analyzer.rb +0 -31
- data/lib/rumale/base/evaluator.rb +0 -17
- data/lib/rumale/base/regressor.rb +0 -36
- data/lib/rumale/base/splitter.rb +0 -21
- data/lib/rumale/base/transformer.rb +0 -22
- data/lib/rumale/clustering/dbscan.rb +0 -123
- data/lib/rumale/clustering/gaussian_mixture.rb +0 -218
- data/lib/rumale/clustering/hdbscan.rb +0 -291
- data/lib/rumale/clustering/k_means.rb +0 -122
- data/lib/rumale/clustering/k_medoids.rb +0 -141
- data/lib/rumale/clustering/mini_batch_k_means.rb +0 -139
- data/lib/rumale/clustering/power_iteration.rb +0 -127
- data/lib/rumale/clustering/single_linkage.rb +0 -203
- data/lib/rumale/clustering/snn.rb +0 -76
- data/lib/rumale/clustering/spectral_clustering.rb +0 -115
- data/lib/rumale/dataset.rb +0 -246
- data/lib/rumale/decomposition/factor_analysis.rb +0 -150
- data/lib/rumale/decomposition/fast_ica.rb +0 -188
- data/lib/rumale/decomposition/nmf.rb +0 -124
- data/lib/rumale/decomposition/pca.rb +0 -159
- data/lib/rumale/ensemble/ada_boost_classifier.rb +0 -179
- data/lib/rumale/ensemble/ada_boost_regressor.rb +0 -160
- data/lib/rumale/ensemble/extra_trees_classifier.rb +0 -139
- data/lib/rumale/ensemble/extra_trees_regressor.rb +0 -125
- data/lib/rumale/ensemble/gradient_boosting_classifier.rb +0 -306
- data/lib/rumale/ensemble/gradient_boosting_regressor.rb +0 -237
- data/lib/rumale/ensemble/random_forest_classifier.rb +0 -189
- data/lib/rumale/ensemble/random_forest_regressor.rb +0 -153
- data/lib/rumale/ensemble/stacking_classifier.rb +0 -215
- data/lib/rumale/ensemble/stacking_regressor.rb +0 -163
- data/lib/rumale/ensemble/voting_classifier.rb +0 -126
- data/lib/rumale/ensemble/voting_regressor.rb +0 -82
- data/lib/rumale/evaluation_measure/accuracy.rb +0 -29
- data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +0 -74
- data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +0 -56
- data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +0 -53
- data/lib/rumale/evaluation_measure/explained_variance_score.rb +0 -39
- data/lib/rumale/evaluation_measure/f_score.rb +0 -50
- data/lib/rumale/evaluation_measure/function.rb +0 -147
- data/lib/rumale/evaluation_measure/log_loss.rb +0 -45
- data/lib/rumale/evaluation_measure/mean_absolute_error.rb +0 -29
- data/lib/rumale/evaluation_measure/mean_squared_error.rb +0 -29
- data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +0 -29
- data/lib/rumale/evaluation_measure/median_absolute_error.rb +0 -30
- data/lib/rumale/evaluation_measure/mutual_information.rb +0 -49
- data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +0 -53
- data/lib/rumale/evaluation_measure/precision.rb +0 -50
- data/lib/rumale/evaluation_measure/precision_recall.rb +0 -96
- data/lib/rumale/evaluation_measure/purity.rb +0 -40
- data/lib/rumale/evaluation_measure/r2_score.rb +0 -43
- data/lib/rumale/evaluation_measure/recall.rb +0 -50
- data/lib/rumale/evaluation_measure/roc_auc.rb +0 -130
- data/lib/rumale/evaluation_measure/silhouette_score.rb +0 -82
- data/lib/rumale/feature_extraction/feature_hasher.rb +0 -110
- data/lib/rumale/feature_extraction/hash_vectorizer.rb +0 -155
- data/lib/rumale/feature_extraction/tfidf_transformer.rb +0 -113
- data/lib/rumale/kernel_approximation/nystroem.rb +0 -126
- data/lib/rumale/kernel_approximation/rbf.rb +0 -102
- data/lib/rumale/kernel_machine/kernel_fda.rb +0 -120
- data/lib/rumale/kernel_machine/kernel_pca.rb +0 -97
- data/lib/rumale/kernel_machine/kernel_ridge.rb +0 -82
- data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +0 -92
- data/lib/rumale/kernel_machine/kernel_svc.rb +0 -193
- data/lib/rumale/linear_model/base_sgd.rb +0 -285
- data/lib/rumale/linear_model/elastic_net.rb +0 -119
- data/lib/rumale/linear_model/lasso.rb +0 -115
- data/lib/rumale/linear_model/linear_regression.rb +0 -201
- data/lib/rumale/linear_model/logistic_regression.rb +0 -275
- data/lib/rumale/linear_model/nnls.rb +0 -137
- data/lib/rumale/linear_model/ridge.rb +0 -209
- data/lib/rumale/linear_model/svc.rb +0 -213
- data/lib/rumale/linear_model/svr.rb +0 -132
- data/lib/rumale/manifold/mds.rb +0 -155
- data/lib/rumale/manifold/tsne.rb +0 -222
- data/lib/rumale/metric_learning/fisher_discriminant_analysis.rb +0 -113
- data/lib/rumale/metric_learning/mlkr.rb +0 -161
- data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +0 -167
- data/lib/rumale/model_selection/cross_validation.rb +0 -125
- data/lib/rumale/model_selection/function.rb +0 -42
- data/lib/rumale/model_selection/grid_search_cv.rb +0 -225
- data/lib/rumale/model_selection/group_k_fold.rb +0 -93
- data/lib/rumale/model_selection/group_shuffle_split.rb +0 -115
- data/lib/rumale/model_selection/k_fold.rb +0 -81
- data/lib/rumale/model_selection/shuffle_split.rb +0 -90
- data/lib/rumale/model_selection/stratified_k_fold.rb +0 -99
- data/lib/rumale/model_selection/stratified_shuffle_split.rb +0 -118
- data/lib/rumale/model_selection/time_series_split.rb +0 -91
- data/lib/rumale/multiclass/one_vs_rest_classifier.rb +0 -83
- data/lib/rumale/naive_bayes/base_naive_bayes.rb +0 -47
- data/lib/rumale/naive_bayes/bernoulli_nb.rb +0 -82
- data/lib/rumale/naive_bayes/complement_nb.rb +0 -85
- data/lib/rumale/naive_bayes/gaussian_nb.rb +0 -69
- data/lib/rumale/naive_bayes/multinomial_nb.rb +0 -74
- data/lib/rumale/naive_bayes/negation_nb.rb +0 -71
- data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +0 -133
- data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +0 -108
- data/lib/rumale/nearest_neighbors/vp_tree.rb +0 -132
- data/lib/rumale/neural_network/adam.rb +0 -56
- data/lib/rumale/neural_network/base_mlp.rb +0 -248
- data/lib/rumale/neural_network/mlp_classifier.rb +0 -120
- data/lib/rumale/neural_network/mlp_regressor.rb +0 -90
- data/lib/rumale/pairwise_metric.rb +0 -152
- data/lib/rumale/pipeline/feature_union.rb +0 -69
- data/lib/rumale/pipeline/pipeline.rb +0 -175
- data/lib/rumale/preprocessing/bin_discretizer.rb +0 -93
- data/lib/rumale/preprocessing/binarizer.rb +0 -60
- data/lib/rumale/preprocessing/kernel_calculator.rb +0 -92
- data/lib/rumale/preprocessing/l1_normalizer.rb +0 -62
- data/lib/rumale/preprocessing/l2_normalizer.rb +0 -63
- data/lib/rumale/preprocessing/label_binarizer.rb +0 -89
- data/lib/rumale/preprocessing/label_encoder.rb +0 -79
- data/lib/rumale/preprocessing/max_abs_scaler.rb +0 -61
- data/lib/rumale/preprocessing/max_normalizer.rb +0 -62
- data/lib/rumale/preprocessing/min_max_scaler.rb +0 -76
- data/lib/rumale/preprocessing/one_hot_encoder.rb +0 -100
- data/lib/rumale/preprocessing/ordinal_encoder.rb +0 -109
- data/lib/rumale/preprocessing/polynomial_features.rb +0 -109
- data/lib/rumale/preprocessing/standard_scaler.rb +0 -71
- data/lib/rumale/probabilistic_output.rb +0 -114
- data/lib/rumale/tree/base_decision_tree.rb +0 -150
- data/lib/rumale/tree/decision_tree_classifier.rb +0 -150
- data/lib/rumale/tree/decision_tree_regressor.rb +0 -116
- data/lib/rumale/tree/extra_tree_classifier.rb +0 -107
- data/lib/rumale/tree/extra_tree_regressor.rb +0 -94
- data/lib/rumale/tree/gradient_tree_regressor.rb +0 -202
- data/lib/rumale/tree/node.rb +0 -39
- data/lib/rumale/utils.rb +0 -42
- data/lib/rumale/validation.rb +0 -128
- data/lib/rumale/values.rb +0 -13
@@ -1,62 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/base/base_estimator'
|
4
|
-
require 'rumale/base/transformer'
|
5
|
-
|
6
|
-
module Rumale
|
7
|
-
module Preprocessing
|
8
|
-
# Normalize samples to unit L1-norm.
|
9
|
-
#
|
10
|
-
# @example
|
11
|
-
# normalizer = Rumale::Preprocessing::L1Normalizer.new
|
12
|
-
# new_samples = normalizer.fit_transform(samples)
|
13
|
-
class L1Normalizer
|
14
|
-
include Base::BaseEstimator
|
15
|
-
include Base::Transformer
|
16
|
-
|
17
|
-
# Return the vector consists of L1-norm for each sample.
|
18
|
-
# @return [Numo::DFloat] (shape: [n_samples])
|
19
|
-
attr_reader :norm_vec # :nodoc:
|
20
|
-
|
21
|
-
# Create a new normalizer for normaliing to L1-norm.
|
22
|
-
def initialize
|
23
|
-
@params = {}
|
24
|
-
@norm_vec = nil
|
25
|
-
end
|
26
|
-
|
27
|
-
# Calculate L1-norms of each sample.
|
28
|
-
#
|
29
|
-
# @overload fit(x) -> L1Normalizer
|
30
|
-
#
|
31
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
|
32
|
-
# @return [L1Normalizer]
|
33
|
-
def fit(x, _y = nil)
|
34
|
-
x = check_convert_sample_array(x)
|
35
|
-
@norm_vec = x.abs.sum(1)
|
36
|
-
@norm_vec[@norm_vec.eq(0)] = 1
|
37
|
-
self
|
38
|
-
end
|
39
|
-
|
40
|
-
# Calculate L1-norms of each sample, and then normalize samples to L1-norm.
|
41
|
-
#
|
42
|
-
# @overload fit_transform(x) -> Numo::DFloat
|
43
|
-
#
|
44
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
|
45
|
-
# @return [Numo::DFloat] The normalized samples.
|
46
|
-
def fit_transform(x, _y = nil)
|
47
|
-
x = check_convert_sample_array(x)
|
48
|
-
fit(x)
|
49
|
-
x / @norm_vec.expand_dims(1)
|
50
|
-
end
|
51
|
-
|
52
|
-
# Calculate L1-norms of each sample, and then normalize samples to L1-norm.
|
53
|
-
# This method calls the fit_transform method. This method exists for the Pipeline class.
|
54
|
-
#
|
55
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
|
56
|
-
# @return [Numo::DFloat] The normalized samples.
|
57
|
-
def transform(x)
|
58
|
-
fit_transform(x)
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
@@ -1,63 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/base/base_estimator'
|
4
|
-
require 'rumale/base/transformer'
|
5
|
-
|
6
|
-
module Rumale
|
7
|
-
# This module consists of the classes that perform preprocessings.
|
8
|
-
module Preprocessing
|
9
|
-
# Normalize samples to unit L2-norm.
|
10
|
-
#
|
11
|
-
# @example
|
12
|
-
# normalizer = Rumale::Preprocessing::L2Normalizer.new
|
13
|
-
# new_samples = normalizer.fit_transform(samples)
|
14
|
-
class L2Normalizer
|
15
|
-
include Base::BaseEstimator
|
16
|
-
include Base::Transformer
|
17
|
-
|
18
|
-
# Return the vector consists of L2-norm for each sample.
|
19
|
-
# @return [Numo::DFloat] (shape: [n_samples])
|
20
|
-
attr_reader :norm_vec # :nodoc:
|
21
|
-
|
22
|
-
# Create a new normalizer for normaliing to unit L2-norm.
|
23
|
-
def initialize
|
24
|
-
@params = {}
|
25
|
-
@norm_vec = nil
|
26
|
-
end
|
27
|
-
|
28
|
-
# Calculate L2-norms of each sample.
|
29
|
-
#
|
30
|
-
# @overload fit(x) -> L2Normalizer
|
31
|
-
#
|
32
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
|
33
|
-
# @return [L2Normalizer]
|
34
|
-
def fit(x, _y = nil)
|
35
|
-
x = check_convert_sample_array(x)
|
36
|
-
@norm_vec = Numo::NMath.sqrt((x**2).sum(1))
|
37
|
-
@norm_vec[@norm_vec.eq(0)] = 1
|
38
|
-
self
|
39
|
-
end
|
40
|
-
|
41
|
-
# Calculate L2-norms of each sample, and then normalize samples to unit L2-norm.
|
42
|
-
#
|
43
|
-
# @overload fit_transform(x) -> Numo::DFloat
|
44
|
-
#
|
45
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
|
46
|
-
# @return [Numo::DFloat] The normalized samples.
|
47
|
-
def fit_transform(x, _y = nil)
|
48
|
-
x = check_convert_sample_array(x)
|
49
|
-
fit(x)
|
50
|
-
x / @norm_vec.expand_dims(1)
|
51
|
-
end
|
52
|
-
|
53
|
-
# Calculate L2-norms of each sample, and then normalize samples to unit L2-norm.
|
54
|
-
# This method calls the fit_transform method. This method exists for the Pipeline class.
|
55
|
-
#
|
56
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
|
57
|
-
# @return [Numo::DFloat] The normalized samples.
|
58
|
-
def transform(x)
|
59
|
-
fit_transform(x)
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
63
|
-
end
|
@@ -1,89 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/base/base_estimator'
|
4
|
-
require 'rumale/base/transformer'
|
5
|
-
|
6
|
-
module Rumale
|
7
|
-
module Preprocessing
|
8
|
-
# Encode labels to binary labels with one-vs-all scheme.
|
9
|
-
#
|
10
|
-
# @example
|
11
|
-
# encoder = Rumale::Preprocessing::LabelBinarizer.new
|
12
|
-
# label = [0, -1, 3, 3, 1, 1]
|
13
|
-
# p encoder.fit_transform(label)
|
14
|
-
# # Numo::Int32#shape=[6,4]
|
15
|
-
# # [[0, 1, 0, 0],
|
16
|
-
# # [1, 0, 0, 0],
|
17
|
-
# # [0, 0, 0, 1],
|
18
|
-
# # [0, 0, 0, 1],
|
19
|
-
# # [0, 0, 1, 0],
|
20
|
-
# # [0, 0, 1, 0]]
|
21
|
-
class LabelBinarizer
|
22
|
-
include Base::BaseEstimator
|
23
|
-
include Base::Transformer
|
24
|
-
|
25
|
-
# Return the class labels.
|
26
|
-
# @return [Array] (size: [n_classes])
|
27
|
-
attr_reader :classes
|
28
|
-
|
29
|
-
# Create a new encoder for binarizing labels with one-vs-all scheme.
|
30
|
-
#
|
31
|
-
# @param neg_label [Integer] The value represents negative label.
|
32
|
-
# @param pos_label [Integer] The value represents positive label.
|
33
|
-
def initialize(neg_label: 0, pos_label: 1)
|
34
|
-
check_params_numeric(neg_label: neg_label, pos_label: pos_label)
|
35
|
-
@params = {}
|
36
|
-
@params[:neg_label] = neg_label
|
37
|
-
@params[:pos_label] = pos_label
|
38
|
-
@classes = nil
|
39
|
-
end
|
40
|
-
|
41
|
-
# Fit encoder to labels.
|
42
|
-
#
|
43
|
-
# @overload fit(y) -> LabelBinarizer
|
44
|
-
# @param y [Numo::NArray/Array] (shape: [n_samples]) The labels to fit encoder.
|
45
|
-
# @return [LabelBinarizer]
|
46
|
-
def fit(y, _not_used = nil)
|
47
|
-
y = y.to_a if y.is_a?(Numo::NArray)
|
48
|
-
check_params_type(Array, y: y)
|
49
|
-
@classes = y.uniq.sort
|
50
|
-
self
|
51
|
-
end
|
52
|
-
|
53
|
-
# Fit encoder to labels, then return binarized labels.
|
54
|
-
#
|
55
|
-
# @overload fit_transform(y) -> Numo::DFloat
|
56
|
-
# @param y [Numo::NArray/Array] (shape: [n_samples]) The labels to fit encoder.
|
57
|
-
# @return [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels.
|
58
|
-
def fit_transform(y, _not_used = nil)
|
59
|
-
y = y.to_a if y.is_a?(Numo::NArray)
|
60
|
-
check_params_type(Array, y: y)
|
61
|
-
fit(y).transform(y)
|
62
|
-
end
|
63
|
-
|
64
|
-
# Encode labels.
|
65
|
-
#
|
66
|
-
# @param y [Array] (shape: [n_samples]) The labels to be encoded.
|
67
|
-
# @return [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels.
|
68
|
-
def transform(y)
|
69
|
-
y = y.to_a if y.is_a?(Numo::NArray)
|
70
|
-
check_params_type(Array, y: y)
|
71
|
-
n_classes = @classes.size
|
72
|
-
n_samples = y.size
|
73
|
-
codes = Numo::Int32.zeros(n_samples, n_classes) + @params[:neg_label]
|
74
|
-
n_samples.times { |n| codes[n, @classes.index(y[n])] = @params[:pos_label] }
|
75
|
-
codes
|
76
|
-
end
|
77
|
-
|
78
|
-
# Decode binarized labels.
|
79
|
-
#
|
80
|
-
# @param x [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels to be decoded.
|
81
|
-
# @return [Array] (shape: [n_samples]) The decoded labels.
|
82
|
-
def inverse_transform(x)
|
83
|
-
x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
|
84
|
-
n_samples = x.shape[0]
|
85
|
-
Array.new(n_samples) { |n| @classes[x[n, true].ne(@params[:neg_label]).where[0]] }
|
86
|
-
end
|
87
|
-
end
|
88
|
-
end
|
89
|
-
end
|
@@ -1,79 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/base/base_estimator'
|
4
|
-
require 'rumale/base/transformer'
|
5
|
-
|
6
|
-
module Rumale
|
7
|
-
module Preprocessing
|
8
|
-
# Encode labels to values between 0 and n_classes - 1.
|
9
|
-
#
|
10
|
-
# @example
|
11
|
-
# encoder = Rumale::Preprocessing::LabelEncoder.new
|
12
|
-
# labels = Numo::Int32[1, 8, 8, 15, 0]
|
13
|
-
# encoded_labels = encoder.fit_transform(labels)
|
14
|
-
# # > pp encoded_labels
|
15
|
-
# # Numo::Int32#shape=[5]
|
16
|
-
# # [1, 2, 2, 3, 0]
|
17
|
-
# decoded_labels = encoder.inverse_transform(encoded_labels)
|
18
|
-
# # > pp decoded_labels
|
19
|
-
# # [1, 8, 8, 15, 0]
|
20
|
-
class LabelEncoder
|
21
|
-
include Base::BaseEstimator
|
22
|
-
include Base::Transformer
|
23
|
-
|
24
|
-
# Return the class labels.
|
25
|
-
# @return [Array] (size: [n_classes])
|
26
|
-
attr_reader :classes
|
27
|
-
|
28
|
-
# Create a new encoder for encoding labels to values between 0 and n_classes - 1.
|
29
|
-
def initialize
|
30
|
-
@params = {}
|
31
|
-
@classes = nil
|
32
|
-
end
|
33
|
-
|
34
|
-
# Fit label-encoder to labels.
|
35
|
-
#
|
36
|
-
# @overload fit(x) -> LabelEncoder
|
37
|
-
#
|
38
|
-
# @param x [Array] (shape: [n_samples]) The labels to fit label-encoder.
|
39
|
-
# @return [LabelEncoder]
|
40
|
-
def fit(x, _y = nil)
|
41
|
-
x = x.to_a if x.is_a?(Numo::NArray)
|
42
|
-
check_params_type(Array, x: x)
|
43
|
-
@classes = x.sort.uniq
|
44
|
-
self
|
45
|
-
end
|
46
|
-
|
47
|
-
# Fit label-encoder to labels, then return encoded labels.
|
48
|
-
#
|
49
|
-
# @overload fit_transform(x) -> Numo::DFloat
|
50
|
-
#
|
51
|
-
# @param x [Array] (shape: [n_samples]) The labels to fit label-encoder.
|
52
|
-
# @return [Numo::Int32] The encoded labels.
|
53
|
-
def fit_transform(x, _y = nil)
|
54
|
-
x = x.to_a if x.is_a?(Numo::NArray)
|
55
|
-
check_params_type(Array, x: x)
|
56
|
-
fit(x).transform(x)
|
57
|
-
end
|
58
|
-
|
59
|
-
# Encode labels.
|
60
|
-
#
|
61
|
-
# @param x [Array] (shape: [n_samples]) The labels to be encoded.
|
62
|
-
# @return [Numo::Int32] The encoded labels.
|
63
|
-
def transform(x)
|
64
|
-
x = x.to_a if x.is_a?(Numo::NArray)
|
65
|
-
check_params_type(Array, x: x)
|
66
|
-
Numo::Int32[*(x.map { |v| @classes.index(v) })]
|
67
|
-
end
|
68
|
-
|
69
|
-
# Decode encoded labels.
|
70
|
-
#
|
71
|
-
# @param x [Numo::Int32] (shape: [n_samples]) The labels to be decoded.
|
72
|
-
# @return [Array] The decoded labels.
|
73
|
-
def inverse_transform(x)
|
74
|
-
x = check_convert_label_array(x)
|
75
|
-
x.to_a.map { |n| @classes[n] }
|
76
|
-
end
|
77
|
-
end
|
78
|
-
end
|
79
|
-
end
|
@@ -1,61 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/base/base_estimator'
|
4
|
-
require 'rumale/base/transformer'
|
5
|
-
|
6
|
-
module Rumale
|
7
|
-
module Preprocessing
|
8
|
-
# Normalize samples by scaling each feature with its maximum absolute value.
|
9
|
-
#
|
10
|
-
# @example
|
11
|
-
# normalizer = Rumale::Preprocessing::MaxAbsScaler.new
|
12
|
-
# new_training_samples = normalizer.fit_transform(training_samples)
|
13
|
-
# new_testing_samples = normalizer.transform(testing_samples)
|
14
|
-
class MaxAbsScaler
|
15
|
-
include Base::BaseEstimator
|
16
|
-
include Base::Transformer
|
17
|
-
|
18
|
-
# Return the vector consists of the maximum absolute value for each feature.
|
19
|
-
# @return [Numo::DFloat] (shape: [n_features])
|
20
|
-
attr_reader :max_abs_vec
|
21
|
-
|
22
|
-
# Creates a new normalizer for scaling each feature with its maximum absolute value.
|
23
|
-
def initialize
|
24
|
-
@params = {}
|
25
|
-
@max_abs_vec = nil
|
26
|
-
end
|
27
|
-
|
28
|
-
# Calculate the minimum and maximum value of each feature for scaling.
|
29
|
-
#
|
30
|
-
# @overload fit(x) -> MaxAbsScaler
|
31
|
-
#
|
32
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum absolute value for each feature.
|
33
|
-
# @return [MaxAbsScaler]
|
34
|
-
def fit(x, _y = nil)
|
35
|
-
x = check_convert_sample_array(x)
|
36
|
-
@max_abs_vec = x.abs.max(0)
|
37
|
-
self
|
38
|
-
end
|
39
|
-
|
40
|
-
# Calculate the maximum absolute value for each feature, and then normalize samples.
|
41
|
-
#
|
42
|
-
# @overload fit_transform(x) -> Numo::DFloat
|
43
|
-
#
|
44
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum absolute value for each feature.
|
45
|
-
# @return [Numo::DFloat] The scaled samples.
|
46
|
-
def fit_transform(x, _y = nil)
|
47
|
-
x = check_convert_sample_array(x)
|
48
|
-
fit(x).transform(x)
|
49
|
-
end
|
50
|
-
|
51
|
-
# Perform scaling the given samples with maximum absolute value for each feature.
|
52
|
-
#
|
53
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
|
54
|
-
# @return [Numo::DFloat] The scaled samples.
|
55
|
-
def transform(x)
|
56
|
-
x = check_convert_sample_array(x)
|
57
|
-
x / @max_abs_vec
|
58
|
-
end
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
@@ -1,62 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/base/base_estimator'
|
4
|
-
require 'rumale/base/transformer'
|
5
|
-
|
6
|
-
module Rumale
|
7
|
-
module Preprocessing
|
8
|
-
# Normalize samples with the maximum of the absolute values.
|
9
|
-
#
|
10
|
-
# @example
|
11
|
-
# normalizer = Rumale::Preprocessing::MaxNormalizer.new
|
12
|
-
# new_samples = normalizer.fit_transform(samples)
|
13
|
-
class MaxNormalizer
|
14
|
-
include Base::BaseEstimator
|
15
|
-
include Base::Transformer
|
16
|
-
|
17
|
-
# Return the vector consists of the maximum norm for each sample.
|
18
|
-
# @return [Numo::DFloat] (shape: [n_samples])
|
19
|
-
attr_reader :norm_vec # :nodoc:
|
20
|
-
|
21
|
-
# Create a new normalizer for normaliing to max-norm.
|
22
|
-
def initialize
|
23
|
-
@params = {}
|
24
|
-
@norm_vec = nil
|
25
|
-
end
|
26
|
-
|
27
|
-
# Calculate the maximum norms of each sample.
|
28
|
-
#
|
29
|
-
# @overload fit(x) -> MaxNormalizer
|
30
|
-
#
|
31
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the maximum norms.
|
32
|
-
# @return [MaxNormalizer]
|
33
|
-
def fit(x, _y = nil)
|
34
|
-
x = check_convert_sample_array(x)
|
35
|
-
@norm_vec = x.abs.max(1)
|
36
|
-
@norm_vec[@norm_vec.eq(0)] = 1
|
37
|
-
self
|
38
|
-
end
|
39
|
-
|
40
|
-
# Calculate the maximums norm of each sample, and then normalize samples with the norms.
|
41
|
-
#
|
42
|
-
# @overload fit_transform(x) -> Numo::DFloat
|
43
|
-
#
|
44
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum norms.
|
45
|
-
# @return [Numo::DFloat] The normalized samples.
|
46
|
-
def fit_transform(x, _y = nil)
|
47
|
-
x = check_convert_sample_array(x)
|
48
|
-
fit(x)
|
49
|
-
x / @norm_vec.expand_dims(1)
|
50
|
-
end
|
51
|
-
|
52
|
-
# Calculate the maximum norms of each sample, and then normalize samples with the norms.
|
53
|
-
# This method calls the fit_transform method. This method exists for the Pipeline class.
|
54
|
-
#
|
55
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum norms.
|
56
|
-
# @return [Numo::DFloat] The normalized samples.
|
57
|
-
def transform(x)
|
58
|
-
fit_transform(x)
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
@@ -1,76 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/base/base_estimator'
|
4
|
-
require 'rumale/base/transformer'
|
5
|
-
|
6
|
-
module Rumale
|
7
|
-
# This module consists of the classes that perform preprocessings.
|
8
|
-
module Preprocessing
|
9
|
-
# Normalize samples by scaling each feature to a given range.
|
10
|
-
#
|
11
|
-
# @example
|
12
|
-
# normalizer = Rumale::Preprocessing::MinMaxScaler.new(feature_range: [0.0, 1.0])
|
13
|
-
# new_training_samples = normalizer.fit_transform(training_samples)
|
14
|
-
# new_testing_samples = normalizer.transform(testing_samples)
|
15
|
-
class MinMaxScaler
|
16
|
-
include Base::BaseEstimator
|
17
|
-
include Base::Transformer
|
18
|
-
|
19
|
-
# Return the vector consists of the minimum value for each feature.
|
20
|
-
# @return [Numo::DFloat] (shape: [n_features])
|
21
|
-
attr_reader :min_vec
|
22
|
-
|
23
|
-
# Return the vector consists of the maximum value for each feature.
|
24
|
-
# @return [Numo::DFloat] (shape: [n_features])
|
25
|
-
attr_reader :max_vec
|
26
|
-
|
27
|
-
# Creates a new normalizer for scaling each feature to a given range.
|
28
|
-
#
|
29
|
-
# @param feature_range [Array<Float>] The desired range of samples.
|
30
|
-
def initialize(feature_range: [0.0, 1.0])
|
31
|
-
check_params_type(Array, feature_range: feature_range)
|
32
|
-
@params = {}
|
33
|
-
@params[:feature_range] = feature_range
|
34
|
-
@min_vec = nil
|
35
|
-
@max_vec = nil
|
36
|
-
end
|
37
|
-
|
38
|
-
# Calculate the minimum and maximum value of each feature for scaling.
|
39
|
-
#
|
40
|
-
# @overload fit(x) -> MinMaxScaler
|
41
|
-
#
|
42
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
|
43
|
-
# @return [MinMaxScaler]
|
44
|
-
def fit(x, _y = nil)
|
45
|
-
x = check_convert_sample_array(x)
|
46
|
-
@min_vec = x.min(0)
|
47
|
-
@max_vec = x.max(0)
|
48
|
-
self
|
49
|
-
end
|
50
|
-
|
51
|
-
# Calculate the minimum and maximum values, and then normalize samples to feature_range.
|
52
|
-
#
|
53
|
-
# @overload fit_transform(x) -> Numo::DFloat
|
54
|
-
#
|
55
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
|
56
|
-
# @return [Numo::DFloat] The scaled samples.
|
57
|
-
def fit_transform(x, _y = nil)
|
58
|
-
x = check_convert_sample_array(x)
|
59
|
-
fit(x).transform(x)
|
60
|
-
end
|
61
|
-
|
62
|
-
# Perform scaling the given samples according to feature_range.
|
63
|
-
#
|
64
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
|
65
|
-
# @return [Numo::DFloat] The scaled samples.
|
66
|
-
def transform(x)
|
67
|
-
x = check_convert_sample_array(x)
|
68
|
-
n_samples, = x.shape
|
69
|
-
dif_vec = @max_vec - @min_vec
|
70
|
-
dif_vec[dif_vec.eq(0)] = 1.0
|
71
|
-
nx = (x - @min_vec.tile(n_samples, 1)) / dif_vec.tile(n_samples, 1)
|
72
|
-
nx * (@params[:feature_range][1] - @params[:feature_range][0]) + @params[:feature_range][0]
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
@@ -1,100 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/base/base_estimator'
|
4
|
-
require 'rumale/base/transformer'
|
5
|
-
|
6
|
-
module Rumale
|
7
|
-
module Preprocessing
|
8
|
-
# Encode categorical integer features to one-hot-vectors.
|
9
|
-
#
|
10
|
-
# @example
|
11
|
-
# encoder = Rumale::Preprocessing::OneHotEncoder.new
|
12
|
-
# labels = Numo::Int32[0, 0, 2, 3, 2, 1]
|
13
|
-
# one_hot_vectors = encoder.fit_transform(labels)
|
14
|
-
# # > pp one_hot_vectors
|
15
|
-
# # Numo::DFloat#shape[6, 4]
|
16
|
-
# # [[1, 0, 0, 0],
|
17
|
-
# # [1, 0, 0, 0],
|
18
|
-
# # [0, 0, 1, 0],
|
19
|
-
# # [0, 0, 0, 1],
|
20
|
-
# # [0, 0, 1, 0],
|
21
|
-
# # [0, 1, 0, 0]]
|
22
|
-
class OneHotEncoder
|
23
|
-
include Base::BaseEstimator
|
24
|
-
include Base::Transformer
|
25
|
-
|
26
|
-
# Return the maximum values for each feature.
|
27
|
-
# @return [Numo::Int32] (shape: [n_features])
|
28
|
-
attr_reader :n_values
|
29
|
-
|
30
|
-
# Return the indices for feature values that actually occur in the training set.
|
31
|
-
# @return [Nimo::Int32]
|
32
|
-
attr_reader :active_features
|
33
|
-
|
34
|
-
# Return the indices to feature ranges.
|
35
|
-
# @return [Numo::Int32] (shape: [n_features + 1])
|
36
|
-
attr_reader :feature_indices
|
37
|
-
|
38
|
-
# Create a new encoder for encoding categorical integer features to one-hot-vectors
|
39
|
-
def initialize
|
40
|
-
@params = {}
|
41
|
-
@n_values = nil
|
42
|
-
@active_features = nil
|
43
|
-
@feature_indices = nil
|
44
|
-
end
|
45
|
-
|
46
|
-
# Fit one-hot-encoder to samples.
|
47
|
-
#
|
48
|
-
# @overload fit(x) -> OneHotEncoder
|
49
|
-
# @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to fit one-hot-encoder.
|
50
|
-
# @return [OneHotEncoder]
|
51
|
-
def fit(x, _y = nil)
|
52
|
-
x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
|
53
|
-
raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
|
54
|
-
|
55
|
-
@n_values = x.max(0) + 1
|
56
|
-
@feature_indices = Numo::Int32.hstack([[0], @n_values]).cumsum
|
57
|
-
@active_features = encode(x, @feature_indices).sum(0).ne(0).where
|
58
|
-
self
|
59
|
-
end
|
60
|
-
|
61
|
-
# Fit one-hot-encoder to samples, then encode samples into one-hot-vectors
|
62
|
-
#
|
63
|
-
# @overload fit_transform(x) -> Numo::DFloat
|
64
|
-
#
|
65
|
-
# @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to encode into one-hot-vectors.
|
66
|
-
# @return [Numo::DFloat] The one-hot-vectors.
|
67
|
-
def fit_transform(x, _y = nil)
|
68
|
-
x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
|
69
|
-
raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
|
70
|
-
raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
|
71
|
-
|
72
|
-
fit(x).transform(x)
|
73
|
-
end
|
74
|
-
|
75
|
-
# Encode samples into one-hot-vectors.
|
76
|
-
#
|
77
|
-
# @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to encode into one-hot-vectors.
|
78
|
-
# @return [Numo::DFloat] The one-hot-vectors.
|
79
|
-
def transform(x)
|
80
|
-
x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
|
81
|
-
raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
|
82
|
-
|
83
|
-
codes = encode(x, @feature_indices)
|
84
|
-
codes[true, @active_features].dup
|
85
|
-
end
|
86
|
-
|
87
|
-
private
|
88
|
-
|
89
|
-
def encode(x, indices)
|
90
|
-
n_samples, n_features = x.shape
|
91
|
-
n_features = 1 if n_features.nil?
|
92
|
-
col_indices = (x + indices[0...-1]).flatten.to_a
|
93
|
-
row_indices = Numo::Int32.new(n_samples).seq.repeat(n_features).to_a
|
94
|
-
codes = Numo::DFloat.zeros(n_samples, indices[-1])
|
95
|
-
row_indices.zip(col_indices).each { |r, c| codes[r, c] = 1.0 }
|
96
|
-
codes
|
97
|
-
end
|
98
|
-
end
|
99
|
-
end
|
100
|
-
end
|