rumale 0.23.3 → 0.24.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE.txt +5 -1
- data/README.md +3 -288
- data/lib/rumale/version.rb +1 -1
- data/lib/rumale.rb +20 -131
- metadata +252 -150
- data/CHANGELOG.md +0 -643
- data/CODE_OF_CONDUCT.md +0 -74
- data/ext/rumale/extconf.rb +0 -37
- data/ext/rumale/rumaleext.c +0 -545
- data/ext/rumale/rumaleext.h +0 -12
- data/lib/rumale/base/base_estimator.rb +0 -49
- data/lib/rumale/base/classifier.rb +0 -36
- data/lib/rumale/base/cluster_analyzer.rb +0 -31
- data/lib/rumale/base/evaluator.rb +0 -17
- data/lib/rumale/base/regressor.rb +0 -36
- data/lib/rumale/base/splitter.rb +0 -21
- data/lib/rumale/base/transformer.rb +0 -22
- data/lib/rumale/clustering/dbscan.rb +0 -123
- data/lib/rumale/clustering/gaussian_mixture.rb +0 -218
- data/lib/rumale/clustering/hdbscan.rb +0 -291
- data/lib/rumale/clustering/k_means.rb +0 -122
- data/lib/rumale/clustering/k_medoids.rb +0 -141
- data/lib/rumale/clustering/mini_batch_k_means.rb +0 -139
- data/lib/rumale/clustering/power_iteration.rb +0 -127
- data/lib/rumale/clustering/single_linkage.rb +0 -203
- data/lib/rumale/clustering/snn.rb +0 -76
- data/lib/rumale/clustering/spectral_clustering.rb +0 -115
- data/lib/rumale/dataset.rb +0 -246
- data/lib/rumale/decomposition/factor_analysis.rb +0 -150
- data/lib/rumale/decomposition/fast_ica.rb +0 -188
- data/lib/rumale/decomposition/nmf.rb +0 -124
- data/lib/rumale/decomposition/pca.rb +0 -159
- data/lib/rumale/ensemble/ada_boost_classifier.rb +0 -179
- data/lib/rumale/ensemble/ada_boost_regressor.rb +0 -160
- data/lib/rumale/ensemble/extra_trees_classifier.rb +0 -139
- data/lib/rumale/ensemble/extra_trees_regressor.rb +0 -125
- data/lib/rumale/ensemble/gradient_boosting_classifier.rb +0 -306
- data/lib/rumale/ensemble/gradient_boosting_regressor.rb +0 -237
- data/lib/rumale/ensemble/random_forest_classifier.rb +0 -189
- data/lib/rumale/ensemble/random_forest_regressor.rb +0 -153
- data/lib/rumale/ensemble/stacking_classifier.rb +0 -215
- data/lib/rumale/ensemble/stacking_regressor.rb +0 -163
- data/lib/rumale/ensemble/voting_classifier.rb +0 -126
- data/lib/rumale/ensemble/voting_regressor.rb +0 -82
- data/lib/rumale/evaluation_measure/accuracy.rb +0 -29
- data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +0 -74
- data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +0 -56
- data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +0 -53
- data/lib/rumale/evaluation_measure/explained_variance_score.rb +0 -39
- data/lib/rumale/evaluation_measure/f_score.rb +0 -50
- data/lib/rumale/evaluation_measure/function.rb +0 -147
- data/lib/rumale/evaluation_measure/log_loss.rb +0 -45
- data/lib/rumale/evaluation_measure/mean_absolute_error.rb +0 -29
- data/lib/rumale/evaluation_measure/mean_squared_error.rb +0 -29
- data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +0 -29
- data/lib/rumale/evaluation_measure/median_absolute_error.rb +0 -30
- data/lib/rumale/evaluation_measure/mutual_information.rb +0 -49
- data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +0 -53
- data/lib/rumale/evaluation_measure/precision.rb +0 -50
- data/lib/rumale/evaluation_measure/precision_recall.rb +0 -96
- data/lib/rumale/evaluation_measure/purity.rb +0 -40
- data/lib/rumale/evaluation_measure/r2_score.rb +0 -43
- data/lib/rumale/evaluation_measure/recall.rb +0 -50
- data/lib/rumale/evaluation_measure/roc_auc.rb +0 -130
- data/lib/rumale/evaluation_measure/silhouette_score.rb +0 -82
- data/lib/rumale/feature_extraction/feature_hasher.rb +0 -110
- data/lib/rumale/feature_extraction/hash_vectorizer.rb +0 -155
- data/lib/rumale/feature_extraction/tfidf_transformer.rb +0 -113
- data/lib/rumale/kernel_approximation/nystroem.rb +0 -126
- data/lib/rumale/kernel_approximation/rbf.rb +0 -102
- data/lib/rumale/kernel_machine/kernel_fda.rb +0 -120
- data/lib/rumale/kernel_machine/kernel_pca.rb +0 -97
- data/lib/rumale/kernel_machine/kernel_ridge.rb +0 -82
- data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +0 -92
- data/lib/rumale/kernel_machine/kernel_svc.rb +0 -193
- data/lib/rumale/linear_model/base_sgd.rb +0 -285
- data/lib/rumale/linear_model/elastic_net.rb +0 -119
- data/lib/rumale/linear_model/lasso.rb +0 -115
- data/lib/rumale/linear_model/linear_regression.rb +0 -201
- data/lib/rumale/linear_model/logistic_regression.rb +0 -275
- data/lib/rumale/linear_model/nnls.rb +0 -137
- data/lib/rumale/linear_model/ridge.rb +0 -209
- data/lib/rumale/linear_model/svc.rb +0 -213
- data/lib/rumale/linear_model/svr.rb +0 -132
- data/lib/rumale/manifold/mds.rb +0 -155
- data/lib/rumale/manifold/tsne.rb +0 -222
- data/lib/rumale/metric_learning/fisher_discriminant_analysis.rb +0 -113
- data/lib/rumale/metric_learning/mlkr.rb +0 -161
- data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +0 -167
- data/lib/rumale/model_selection/cross_validation.rb +0 -125
- data/lib/rumale/model_selection/function.rb +0 -42
- data/lib/rumale/model_selection/grid_search_cv.rb +0 -225
- data/lib/rumale/model_selection/group_k_fold.rb +0 -93
- data/lib/rumale/model_selection/group_shuffle_split.rb +0 -115
- data/lib/rumale/model_selection/k_fold.rb +0 -81
- data/lib/rumale/model_selection/shuffle_split.rb +0 -90
- data/lib/rumale/model_selection/stratified_k_fold.rb +0 -99
- data/lib/rumale/model_selection/stratified_shuffle_split.rb +0 -118
- data/lib/rumale/model_selection/time_series_split.rb +0 -91
- data/lib/rumale/multiclass/one_vs_rest_classifier.rb +0 -83
- data/lib/rumale/naive_bayes/base_naive_bayes.rb +0 -47
- data/lib/rumale/naive_bayes/bernoulli_nb.rb +0 -82
- data/lib/rumale/naive_bayes/complement_nb.rb +0 -85
- data/lib/rumale/naive_bayes/gaussian_nb.rb +0 -69
- data/lib/rumale/naive_bayes/multinomial_nb.rb +0 -74
- data/lib/rumale/naive_bayes/negation_nb.rb +0 -71
- data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +0 -133
- data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +0 -108
- data/lib/rumale/nearest_neighbors/vp_tree.rb +0 -132
- data/lib/rumale/neural_network/adam.rb +0 -56
- data/lib/rumale/neural_network/base_mlp.rb +0 -248
- data/lib/rumale/neural_network/mlp_classifier.rb +0 -120
- data/lib/rumale/neural_network/mlp_regressor.rb +0 -90
- data/lib/rumale/pairwise_metric.rb +0 -152
- data/lib/rumale/pipeline/feature_union.rb +0 -69
- data/lib/rumale/pipeline/pipeline.rb +0 -175
- data/lib/rumale/preprocessing/bin_discretizer.rb +0 -93
- data/lib/rumale/preprocessing/binarizer.rb +0 -60
- data/lib/rumale/preprocessing/kernel_calculator.rb +0 -92
- data/lib/rumale/preprocessing/l1_normalizer.rb +0 -62
- data/lib/rumale/preprocessing/l2_normalizer.rb +0 -63
- data/lib/rumale/preprocessing/label_binarizer.rb +0 -89
- data/lib/rumale/preprocessing/label_encoder.rb +0 -79
- data/lib/rumale/preprocessing/max_abs_scaler.rb +0 -61
- data/lib/rumale/preprocessing/max_normalizer.rb +0 -62
- data/lib/rumale/preprocessing/min_max_scaler.rb +0 -76
- data/lib/rumale/preprocessing/one_hot_encoder.rb +0 -100
- data/lib/rumale/preprocessing/ordinal_encoder.rb +0 -109
- data/lib/rumale/preprocessing/polynomial_features.rb +0 -109
- data/lib/rumale/preprocessing/standard_scaler.rb +0 -71
- data/lib/rumale/probabilistic_output.rb +0 -114
- data/lib/rumale/tree/base_decision_tree.rb +0 -150
- data/lib/rumale/tree/decision_tree_classifier.rb +0 -150
- data/lib/rumale/tree/decision_tree_regressor.rb +0 -116
- data/lib/rumale/tree/extra_tree_classifier.rb +0 -107
- data/lib/rumale/tree/extra_tree_regressor.rb +0 -94
- data/lib/rumale/tree/gradient_tree_regressor.rb +0 -202
- data/lib/rumale/tree/node.rb +0 -39
- data/lib/rumale/utils.rb +0 -42
- data/lib/rumale/validation.rb +0 -128
- data/lib/rumale/values.rb +0 -13
@@ -1,152 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/validation'
|
4
|
-
|
5
|
-
module Rumale
|
6
|
-
# Module for calculating pairwise distances, similarities, and kernels.
|
7
|
-
module PairwiseMetric
|
8
|
-
class << self
|
9
|
-
# Calculate the pairwise euclidean distances between x and y.
|
10
|
-
#
|
11
|
-
# @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
|
12
|
-
# @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
|
13
|
-
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
14
|
-
def euclidean_distance(x, y = nil)
|
15
|
-
y = x if y.nil?
|
16
|
-
x = Rumale::Validation.check_convert_sample_array(x)
|
17
|
-
y = Rumale::Validation.check_convert_sample_array(y)
|
18
|
-
Numo::NMath.sqrt(squared_error(x, y).abs)
|
19
|
-
end
|
20
|
-
|
21
|
-
# Calculate the pairwise manhattan distances between x and y.
|
22
|
-
#
|
23
|
-
# @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
|
24
|
-
# @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
|
25
|
-
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
26
|
-
def manhattan_distance(x, y = nil)
|
27
|
-
y = x if y.nil?
|
28
|
-
x = Rumale::Validation.check_convert_sample_array(x)
|
29
|
-
y = Rumale::Validation.check_convert_sample_array(y)
|
30
|
-
n_samples_x = x.shape[0]
|
31
|
-
n_samples_y = y.shape[0]
|
32
|
-
distance_mat = Numo::DFloat.zeros(n_samples_x, n_samples_y)
|
33
|
-
n_samples_x.times do |n|
|
34
|
-
distance_mat[n, true] = (y - x[n, true]).abs.sum(axis: 1)
|
35
|
-
end
|
36
|
-
distance_mat
|
37
|
-
end
|
38
|
-
|
39
|
-
# Calculate the pairwise squared errors between x and y.
|
40
|
-
#
|
41
|
-
# @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
|
42
|
-
# @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
|
43
|
-
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
44
|
-
def squared_error(x, y = nil)
|
45
|
-
y_not_given = y.nil?
|
46
|
-
y = x if y_not_given
|
47
|
-
x = Rumale::Validation.check_convert_sample_array(x)
|
48
|
-
y = Rumale::Validation.check_convert_sample_array(y) unless y_not_given
|
49
|
-
sum_x_vec = (x**2).sum(1).expand_dims(1)
|
50
|
-
sum_y_vec = y_not_given ? sum_x_vec.transpose : (y**2).sum(1).expand_dims(1).transpose
|
51
|
-
err_mat = -2 * x.dot(y.transpose)
|
52
|
-
err_mat += sum_x_vec
|
53
|
-
err_mat += sum_y_vec
|
54
|
-
err_mat.class.maximum(err_mat, 0)
|
55
|
-
end
|
56
|
-
|
57
|
-
# Calculate the pairwise cosine simlarities between x and y.
|
58
|
-
#
|
59
|
-
# @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
|
60
|
-
# @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
|
61
|
-
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
62
|
-
def cosine_similarity(x, y = nil)
|
63
|
-
y_not_given = y.nil?
|
64
|
-
x = Rumale::Validation.check_convert_sample_array(x)
|
65
|
-
y = Rumale::Validation.check_convert_sample_array(y) unless y_not_given
|
66
|
-
x_norm = Numo::NMath.sqrt((x**2).sum(1))
|
67
|
-
x_norm[x_norm.eq(0)] = 1
|
68
|
-
x /= x_norm.expand_dims(1)
|
69
|
-
if y_not_given
|
70
|
-
x.dot(x.transpose)
|
71
|
-
else
|
72
|
-
y_norm = Numo::NMath.sqrt((y**2).sum(1))
|
73
|
-
y_norm[y_norm.eq(0)] = 1
|
74
|
-
y /= y_norm.expand_dims(1)
|
75
|
-
x.dot(y.transpose)
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
# Calculate the pairwise cosine distances between x and y.
|
80
|
-
#
|
81
|
-
# @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
|
82
|
-
# @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
|
83
|
-
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
84
|
-
def cosine_distance(x, y = nil)
|
85
|
-
dist_mat = 1 - cosine_similarity(x, y)
|
86
|
-
dist_mat[dist_mat.diag_indices] = 0 if y.nil?
|
87
|
-
dist_mat.clip(0, 2)
|
88
|
-
end
|
89
|
-
|
90
|
-
# Calculate the rbf kernel between x and y.
|
91
|
-
#
|
92
|
-
# @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
|
93
|
-
# @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
|
94
|
-
# @param gamma [Float] The parameter of rbf kernel, if nil it is 1 / n_features.
|
95
|
-
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
96
|
-
def rbf_kernel(x, y = nil, gamma = nil)
|
97
|
-
y_not_given = y.nil?
|
98
|
-
y = x if y_not_given
|
99
|
-
x = Rumale::Validation.check_convert_sample_array(x)
|
100
|
-
y = Rumale::Validation.check_convert_sample_array(y) unless y_not_given
|
101
|
-
gamma ||= 1.0 / x.shape[1]
|
102
|
-
Rumale::Validation.check_params_numeric(gamma: gamma)
|
103
|
-
Numo::NMath.exp(-gamma * squared_error(x, y))
|
104
|
-
end
|
105
|
-
|
106
|
-
# Calculate the linear kernel between x and y.
|
107
|
-
#
|
108
|
-
# @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
|
109
|
-
# @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
|
110
|
-
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
111
|
-
def linear_kernel(x, y = nil)
|
112
|
-
y = x if y.nil?
|
113
|
-
x = Rumale::Validation.check_convert_sample_array(x)
|
114
|
-
y = Rumale::Validation.check_convert_sample_array(y)
|
115
|
-
x.dot(y.transpose)
|
116
|
-
end
|
117
|
-
|
118
|
-
# Calculate the polynomial kernel between x and y.
|
119
|
-
#
|
120
|
-
# @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
|
121
|
-
# @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
|
122
|
-
# @param degree [Integer] The parameter of polynomial kernel.
|
123
|
-
# @param gamma [Float] The parameter of polynomial kernel, if nil it is 1 / n_features.
|
124
|
-
# @param coef [Integer] The parameter of polynomial kernel.
|
125
|
-
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
126
|
-
def polynomial_kernel(x, y = nil, degree = 3, gamma = nil, coef = 1) # rubocop:disable Metrics/ParameterLists
|
127
|
-
y = x if y.nil?
|
128
|
-
gamma ||= 1.0 / x.shape[1]
|
129
|
-
x = Rumale::Validation.check_convert_sample_array(x)
|
130
|
-
y = Rumale::Validation.check_convert_sample_array(y)
|
131
|
-
Rumale::Validation.check_params_numeric(gamma: gamma, degree: degree, coef: coef)
|
132
|
-
(x.dot(y.transpose) * gamma + coef)**degree
|
133
|
-
end
|
134
|
-
|
135
|
-
# Calculate the sigmoid kernel between x and y.
|
136
|
-
#
|
137
|
-
# @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
|
138
|
-
# @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
|
139
|
-
# @param gamma [Float] The parameter of polynomial kernel, if nil it is 1 / n_features.
|
140
|
-
# @param coef [Integer] The parameter of polynomial kernel.
|
141
|
-
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
142
|
-
def sigmoid_kernel(x, y = nil, gamma = nil, coef = 1)
|
143
|
-
y = x if y.nil?
|
144
|
-
gamma ||= 1.0 / x.shape[1]
|
145
|
-
x = Rumale::Validation.check_convert_sample_array(x)
|
146
|
-
y = Rumale::Validation.check_convert_sample_array(y)
|
147
|
-
Rumale::Validation.check_params_numeric(gamma: gamma, coef: coef)
|
148
|
-
Numo::NMath.tanh(x.dot(y.transpose) * gamma + coef)
|
149
|
-
end
|
150
|
-
end
|
151
|
-
end
|
152
|
-
end
|
@@ -1,69 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/validation'
|
4
|
-
require 'rumale/base/base_estimator'
|
5
|
-
|
6
|
-
module Rumale
|
7
|
-
module Pipeline
|
8
|
-
# FeatureUnion is a class that implements the function concatenating the multi-transformer results.
|
9
|
-
#
|
10
|
-
# @example
|
11
|
-
# fu = Rumale::Pipeline::FeatureUnion.new(
|
12
|
-
# transformers: {
|
13
|
-
# 'rbf': Rumale::KernelApproximation::RBF.new(gamma: 1.0, n_components: 96, random_seed: 1),
|
14
|
-
# 'pca': Rumale::Decomposition::PCA.new(n_components: 32)
|
15
|
-
# }
|
16
|
-
# )
|
17
|
-
# fu.fit(training_samples, traininig_labels)
|
18
|
-
# results = fu.predict(testing_samples)
|
19
|
-
#
|
20
|
-
# # > p results.shape[1]
|
21
|
-
# # > 128
|
22
|
-
#
|
23
|
-
class FeatureUnion
|
24
|
-
include Base::BaseEstimator
|
25
|
-
include Validation
|
26
|
-
|
27
|
-
# Return the transformers
|
28
|
-
# @return [Hash]
|
29
|
-
attr_reader :transformers
|
30
|
-
|
31
|
-
# Create a new feature union.
|
32
|
-
#
|
33
|
-
# @param transformers [Hash] List of transformers. The order of transforms follows the insertion order of hash keys.
|
34
|
-
def initialize(transformers:)
|
35
|
-
check_params_type(Hash, transformers: transformers)
|
36
|
-
@params = {}
|
37
|
-
@transformers = transformers
|
38
|
-
end
|
39
|
-
|
40
|
-
# Fit the model with given training data.
|
41
|
-
#
|
42
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the transformers.
|
43
|
-
# @param y [Numo::NArray/Nil] (shape: [n_samples, n_outputs]) The target values or labels to be used for fitting the transformers.
|
44
|
-
# @return [FeatureUnion] The learned feature union itself.
|
45
|
-
def fit(x, y = nil)
|
46
|
-
@transformers.each { |_k, t| t.fit(x, y) }
|
47
|
-
self
|
48
|
-
end
|
49
|
-
|
50
|
-
# Fit the model with training data, and then transform them with the learned model.
|
51
|
-
#
|
52
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the transformers.
|
53
|
-
# @param y [Numo::NArray/Nil] (shape: [n_samples, n_outputs]) The target values or labels to be used for fitting the transformers.
|
54
|
-
# @return [Numo::DFloat] (shape: [n_samples, sum_n_components]) The transformed and concatenated data.
|
55
|
-
def fit_transform(x, y = nil)
|
56
|
-
fit(x, y).transform(x)
|
57
|
-
end
|
58
|
-
|
59
|
-
# Transform the given data with the learned model.
|
60
|
-
#
|
61
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned transformers.
|
62
|
-
# @return [Numo::DFloat] (shape: [n_samples, sum_n_components]) The transformed and concatenated data.
|
63
|
-
def transform(x)
|
64
|
-
z = @transformers.values.map { |t| t.transform(x) }
|
65
|
-
Numo::NArray.hstack(z)
|
66
|
-
end
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
@@ -1,175 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/validation'
|
4
|
-
require 'rumale/base/base_estimator'
|
5
|
-
|
6
|
-
module Rumale
|
7
|
-
# Module implements utilities of pipeline that cosists of a chain of transfomers and estimators.
|
8
|
-
module Pipeline
|
9
|
-
# Pipeline is a class that implements the function to perform the transformers and estimators sequencially.
|
10
|
-
#
|
11
|
-
# @example
|
12
|
-
# rbf = Rumale::KernelApproximation::RBF.new(gamma: 1.0, n_components: 128, random_seed: 1)
|
13
|
-
# svc = Rumale::LinearModel::SVC.new(reg_param: 1.0, fit_bias: true, max_iter: 5000, random_seed: 1)
|
14
|
-
# pipeline = Rumale::Pipeline::Pipeline.new(steps: { trs: rbf, est: svc })
|
15
|
-
# pipeline.fit(training_samples, traininig_labels)
|
16
|
-
# results = pipeline.predict(testing_samples)
|
17
|
-
#
|
18
|
-
class Pipeline
|
19
|
-
include Base::BaseEstimator
|
20
|
-
include Validation
|
21
|
-
|
22
|
-
# Return the steps.
|
23
|
-
# @return [Hash]
|
24
|
-
attr_reader :steps
|
25
|
-
|
26
|
-
# Create a new pipeline.
|
27
|
-
#
|
28
|
-
# @param steps [Hash] List of transformers and estimators. The order of transforms follows the insertion order of hash keys.
|
29
|
-
# The last entry is considered an estimator.
|
30
|
-
def initialize(steps:)
|
31
|
-
check_params_type(Hash, steps: steps)
|
32
|
-
validate_steps(steps)
|
33
|
-
@params = {}
|
34
|
-
@steps = steps
|
35
|
-
end
|
36
|
-
|
37
|
-
# Fit the model with given training data.
|
38
|
-
#
|
39
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be transformed and used for fitting the model.
|
40
|
-
# @param y [Numo::NArray] (shape: [n_samples, n_outputs]) The target values or labels to be used for fitting the model.
|
41
|
-
# @return [Pipeline] The learned pipeline itself.
|
42
|
-
def fit(x, y)
|
43
|
-
trans_x = apply_transforms(x, y, fit: true)
|
44
|
-
last_estimator&.fit(trans_x, y)
|
45
|
-
self
|
46
|
-
end
|
47
|
-
|
48
|
-
# Call the fit_predict method of last estimator after applying all transforms.
|
49
|
-
#
|
50
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be transformed and used for fitting the model.
|
51
|
-
# @param y [Numo::NArray] (shape: [n_samples, n_outputs], default: nil) The target values or labels to be used for fitting the model.
|
52
|
-
# @return [Numo::NArray] The predicted results by last estimator.
|
53
|
-
def fit_predict(x, y = nil)
|
54
|
-
trans_x = apply_transforms(x, y, fit: true)
|
55
|
-
last_estimator.fit_predict(trans_x)
|
56
|
-
end
|
57
|
-
|
58
|
-
# Call the fit_transform method of last estimator after applying all transforms.
|
59
|
-
#
|
60
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be transformed and used for fitting the model.
|
61
|
-
# @param y [Numo::NArray] (shape: [n_samples, n_outputs], default: nil) The target values or labels to be used for fitting the model.
|
62
|
-
# @return [Numo::NArray] The predicted results by last estimator.
|
63
|
-
def fit_transform(x, y = nil)
|
64
|
-
trans_x = apply_transforms(x, y, fit: true)
|
65
|
-
last_estimator.fit_transform(trans_x, y)
|
66
|
-
end
|
67
|
-
|
68
|
-
# Call the decision_function method of last estimator after applying all transforms.
|
69
|
-
#
|
70
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
71
|
-
# @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
|
72
|
-
def decision_function(x)
|
73
|
-
trans_x = apply_transforms(x)
|
74
|
-
last_estimator.decision_function(trans_x)
|
75
|
-
end
|
76
|
-
|
77
|
-
# Call the predict method of last estimator after applying all transforms.
|
78
|
-
#
|
79
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to obtain prediction result.
|
80
|
-
# @return [Numo::NArray] The predicted results by last estimator.
|
81
|
-
def predict(x)
|
82
|
-
trans_x = apply_transforms(x)
|
83
|
-
last_estimator.predict(trans_x)
|
84
|
-
end
|
85
|
-
|
86
|
-
# Call the predict_log_proba method of last estimator after applying all transforms.
|
87
|
-
#
|
88
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
|
89
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
|
90
|
-
def predict_log_proba(x)
|
91
|
-
trans_x = apply_transforms(x)
|
92
|
-
last_estimator.predict_log_proba(trans_x)
|
93
|
-
end
|
94
|
-
|
95
|
-
# Call the predict_proba method of last estimator after applying all transforms.
|
96
|
-
#
|
97
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
98
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
99
|
-
def predict_proba(x)
|
100
|
-
trans_x = apply_transforms(x)
|
101
|
-
last_estimator.predict_proba(trans_x)
|
102
|
-
end
|
103
|
-
|
104
|
-
# Call the transform method of last estimator after applying all transforms.
|
105
|
-
#
|
106
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed.
|
107
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed samples.
|
108
|
-
def transform(x)
|
109
|
-
trans_x = apply_transforms(x)
|
110
|
-
last_estimator.nil? ? trans_x : last_estimator.transform(trans_x)
|
111
|
-
end
|
112
|
-
|
113
|
-
# Call the inverse_transform method in reverse order.
|
114
|
-
#
|
115
|
-
# @param z [Numo::DFloat] (shape: [n_samples, n_components]) The transformed samples to be restored into original space.
|
116
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored samples.
|
117
|
-
def inverse_transform(z)
|
118
|
-
itrans_z = z
|
119
|
-
@steps.keys.reverse_each do |name|
|
120
|
-
transformer = @steps[name]
|
121
|
-
next if transformer.nil?
|
122
|
-
|
123
|
-
itrans_z = transformer.inverse_transform(itrans_z)
|
124
|
-
end
|
125
|
-
itrans_z
|
126
|
-
end
|
127
|
-
|
128
|
-
# Call the score method of last estimator after applying all transforms.
|
129
|
-
#
|
130
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
|
131
|
-
# @param y [Numo::NArray] (shape: [n_samples, n_outputs]) True target values or labels for testing data.
|
132
|
-
# @return [Float] The score of last estimator
|
133
|
-
def score(x, y)
|
134
|
-
trans_x = apply_transforms(x)
|
135
|
-
last_estimator.score(trans_x, y)
|
136
|
-
end
|
137
|
-
|
138
|
-
private
|
139
|
-
|
140
|
-
def validate_steps(steps)
|
141
|
-
steps.keys[0...-1].each do |name|
|
142
|
-
transformer = steps[name]
|
143
|
-
next if transformer.nil? || (transformer.class.method_defined?(:fit) && transformer.class.method_defined?(:transform))
|
144
|
-
|
145
|
-
raise TypeError,
|
146
|
-
'Class of intermediate step in pipeline should be implemented fit and transform methods: ' \
|
147
|
-
"#{name} => #{transformer.class}"
|
148
|
-
end
|
149
|
-
|
150
|
-
estimator = steps[steps.keys.last]
|
151
|
-
unless estimator.nil? || estimator.class.method_defined?(:fit) # rubocop:disable Style/GuardClause
|
152
|
-
raise TypeError,
|
153
|
-
'Class of last step in pipeline should be implemented fit method: ' \
|
154
|
-
"#{steps.keys.last} => #{estimator.class}"
|
155
|
-
end
|
156
|
-
end
|
157
|
-
|
158
|
-
def apply_transforms(x, y = nil, fit: false)
|
159
|
-
trans_x = x
|
160
|
-
@steps.keys[0...-1].each do |name|
|
161
|
-
transformer = @steps[name]
|
162
|
-
next if transformer.nil?
|
163
|
-
|
164
|
-
transformer.fit(trans_x, y) if fit
|
165
|
-
trans_x = transformer.transform(trans_x)
|
166
|
-
end
|
167
|
-
trans_x
|
168
|
-
end
|
169
|
-
|
170
|
-
def last_estimator
|
171
|
-
@steps[@steps.keys.last]
|
172
|
-
end
|
173
|
-
end
|
174
|
-
end
|
175
|
-
end
|
@@ -1,93 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/base/base_estimator'
|
4
|
-
require 'rumale/base/transformer'
|
5
|
-
|
6
|
-
module Rumale
|
7
|
-
module Preprocessing
|
8
|
-
# Discretizes features with a given number of bins.
|
9
|
-
# In some cases, discretizing features may accelerate decision tree training.
|
10
|
-
#
|
11
|
-
# @example
|
12
|
-
# discretizer = Rumale::Preprocessing::BinDiscretizer.new(n_bins: 4)
|
13
|
-
# samples = Numo::DFloat.new(5, 2).rand - 0.5
|
14
|
-
# transformed = discretizer.fit_transform(samples)
|
15
|
-
# # > pp samples
|
16
|
-
# # Numo::DFloat#shape=[5,2]
|
17
|
-
# # [[-0.438246, -0.126933],
|
18
|
-
# # [ 0.294815, -0.298958],
|
19
|
-
# # [-0.383959, -0.155968],
|
20
|
-
# # [ 0.039948, 0.237815],
|
21
|
-
# # [-0.334911, -0.449117]]
|
22
|
-
# # > pp transformed
|
23
|
-
# # Numo::DFloat#shape=[5,2]
|
24
|
-
# # [[0, 1],
|
25
|
-
# # [3, 0],
|
26
|
-
# # [0, 1],
|
27
|
-
# # [2, 3],
|
28
|
-
# # [0, 0]]
|
29
|
-
class BinDiscretizer
|
30
|
-
include Base::BaseEstimator
|
31
|
-
include Base::Transformer
|
32
|
-
|
33
|
-
# Return the feature steps to be used discretizing.
|
34
|
-
# @return [Array<Numo::DFloat>] (shape: [n_features, n_bins])
|
35
|
-
attr_reader :feature_steps
|
36
|
-
|
37
|
-
# Create a new discretizer for features with given number of bins.
|
38
|
-
#
|
39
|
-
# @param n_bins [Integer] The number of bins to be used disretizing feature values.
|
40
|
-
def initialize(n_bins: 32)
|
41
|
-
@params = {}
|
42
|
-
@params[:n_bins] = n_bins
|
43
|
-
@feature_steps = nil
|
44
|
-
end
|
45
|
-
|
46
|
-
# Fit feature ranges to be discretized.
|
47
|
-
#
|
48
|
-
# @overload fit(x) -> BinDiscretizer
|
49
|
-
#
|
50
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the feature ranges.
|
51
|
-
# @return [BinDiscretizer]
|
52
|
-
def fit(x, _y = nil)
|
53
|
-
x = check_convert_sample_array(x)
|
54
|
-
n_features = x.shape[1]
|
55
|
-
max_vals = x.max(0)
|
56
|
-
min_vals = x.min(0)
|
57
|
-
@feature_steps = Array.new(n_features) do |n|
|
58
|
-
Numo::DFloat.linspace(min_vals[n], max_vals[n], @params[:n_bins] + 1)[0...@params[:n_bins]]
|
59
|
-
end
|
60
|
-
self
|
61
|
-
end
|
62
|
-
|
63
|
-
# Fit feature ranges to be discretized, then return discretized samples.
|
64
|
-
#
|
65
|
-
# @overload fit_transform(x) -> Numo::DFloat
|
66
|
-
#
|
67
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be discretized.
|
68
|
-
# @return [Numo::DFloat] The discretized samples.
|
69
|
-
def fit_transform(x, _y = nil)
|
70
|
-
x = check_convert_sample_array(x)
|
71
|
-
fit(x).transform(x)
|
72
|
-
end
|
73
|
-
|
74
|
-
# Peform discretizing the given samples.
|
75
|
-
#
|
76
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be discretized.
|
77
|
-
# @return [Numo::DFloat] The discretized samples.
|
78
|
-
def transform(x)
|
79
|
-
x = check_convert_sample_array(x)
|
80
|
-
n_samples, n_features = x.shape
|
81
|
-
transformed = Numo::DFloat.zeros(n_samples, n_features)
|
82
|
-
n_features.times do |n|
|
83
|
-
steps = @feature_steps[n]
|
84
|
-
@params[:n_bins].times do |bin|
|
85
|
-
mask = x[true, n].ge(steps[bin]).where
|
86
|
-
transformed[mask, n] = bin
|
87
|
-
end
|
88
|
-
end
|
89
|
-
transformed
|
90
|
-
end
|
91
|
-
end
|
92
|
-
end
|
93
|
-
end
|
@@ -1,60 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/base/base_estimator'
|
4
|
-
require 'rumale/base/transformer'
|
5
|
-
|
6
|
-
module Rumale
|
7
|
-
module Preprocessing
|
8
|
-
# Binarize samples according to a threshold
|
9
|
-
#
|
10
|
-
# @example
|
11
|
-
# binarizer = Rumale::Preprocessing::Binarizer.new
|
12
|
-
# x = Numo::DFloat[[-1.2, 3.2], [2.4, -0.5], [4.5, 0.8]]
|
13
|
-
# b = binarizer.transform(x)
|
14
|
-
# p b
|
15
|
-
#
|
16
|
-
# # Numo::DFloat#shape=[3, 2]
|
17
|
-
# # [[0, 1],
|
18
|
-
# # [1, 0],
|
19
|
-
# # [1, 1]]
|
20
|
-
class Binarizer
|
21
|
-
include Base::BaseEstimator
|
22
|
-
include Base::Transformer
|
23
|
-
|
24
|
-
# Create a new transformer for binarization.
|
25
|
-
# @param threshold [Float] The threshold value for binarization.
|
26
|
-
def initialize(threshold: 0.0)
|
27
|
-
check_params_numeric(threshold: threshold)
|
28
|
-
@params = { threshold: threshold }
|
29
|
-
end
|
30
|
-
|
31
|
-
# This method does nothing and returns the object itself.
|
32
|
-
# For compatibility with other transformer, this method exists.
|
33
|
-
#
|
34
|
-
# @overload fit() -> Binarizer
|
35
|
-
#
|
36
|
-
# @return [Binarizer]
|
37
|
-
def fit(_x = nil, _y = nil)
|
38
|
-
self
|
39
|
-
end
|
40
|
-
|
41
|
-
# Binarize each sample.
|
42
|
-
#
|
43
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be binarized.
|
44
|
-
# @return [Numo::DFloat] The binarized samples.
|
45
|
-
def transform(x)
|
46
|
-
x = check_convert_sample_array(x)
|
47
|
-
x.class.cast(x.gt(@params[:threshold]))
|
48
|
-
end
|
49
|
-
|
50
|
-
# The output of this method is the same as that of the transform method.
|
51
|
-
# For compatibility with other transformer, this method exists.
|
52
|
-
#
|
53
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be binarized.
|
54
|
-
# @return [Numo::DFloat] The binarized samples.
|
55
|
-
def fit_transform(x, _y = nil)
|
56
|
-
fit(x).transform(x)
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|
60
|
-
end
|
@@ -1,92 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/base/base_estimator'
|
4
|
-
require 'rumale/base/transformer'
|
5
|
-
require 'rumale/pairwise_metric'
|
6
|
-
|
7
|
-
module Rumale
|
8
|
-
module Preprocessing
|
9
|
-
# KernelCalculator is a class that calculates the kernel matrix with training data.
|
10
|
-
#
|
11
|
-
# @example
|
12
|
-
# transformer = Rumale::Preprocessing::KernelCalculator.new(kernel: 'rbf', gamma: 0.5)
|
13
|
-
# regressor = Rumale::KernelMachine::KernelRidge.new
|
14
|
-
# pipeline = Rumale::Pipeline::Pipeline.new(
|
15
|
-
# steps: { trs: transfomer, est: regressor }
|
16
|
-
# )
|
17
|
-
# pipeline.fit(x_train, y_train)
|
18
|
-
# results = pipeline.predict(x_test)
|
19
|
-
class KernelCalculator
|
20
|
-
include Base::BaseEstimator
|
21
|
-
include Base::Transformer
|
22
|
-
|
23
|
-
# Returns the training data for calculating kernel matrix.
|
24
|
-
# @return [Numo::DFloat] (shape: n_components, n_features)
|
25
|
-
attr_reader :components
|
26
|
-
|
27
|
-
# Create a new transformer that transforms feature vectors into a kernel matrix.
|
28
|
-
#
|
29
|
-
# @param kernel [String] The type of kernel function ('rbf', 'linear', 'poly', and 'sigmoid').
|
30
|
-
# @param gamma [Float] The gamma parameter in rbf/poly/sigmoid kernel function.
|
31
|
-
# @param degree [Integer] The degree parameter in polynomial kernel function.
|
32
|
-
# @param coef [Float] The coefficient in poly/sigmoid kernel function.
|
33
|
-
def initialize(kernel: 'rbf', gamma: 1, degree: 3, coef: 1)
|
34
|
-
check_params_string(kernel: kernel)
|
35
|
-
check_params_numeric(gamma: gamma, coef: coef, degree: degree)
|
36
|
-
@params = {}
|
37
|
-
@params[:kernel] = kernel
|
38
|
-
@params[:gamma] = gamma
|
39
|
-
@params[:degree] = degree
|
40
|
-
@params[:coef] = coef
|
41
|
-
@components = nil
|
42
|
-
end
|
43
|
-
|
44
|
-
# Fit the model with given training data.
|
45
|
-
#
|
46
|
-
# @overload fit(x) -> KernelCalculator
|
47
|
-
# @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for calculating kernel matrix.
|
48
|
-
# @return [KernelCalculator] The learned transformer itself.
|
49
|
-
def fit(x, _y = nil)
|
50
|
-
x = check_convert_sample_array(x)
|
51
|
-
@components = x.dup
|
52
|
-
self
|
53
|
-
end
|
54
|
-
|
55
|
-
# Fit the model with training data, and then transform them with the learned model.
|
56
|
-
#
|
57
|
-
# @overload fit_transform(x) -> Numo::DFloat
|
58
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for calculating kernel matrix.
|
59
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_samples]) The calculated kernel matrix.
|
60
|
-
def fit_transform(x, y = nil)
|
61
|
-
x = check_convert_sample_array(x)
|
62
|
-
fit(x, y).transform(x)
|
63
|
-
end
|
64
|
-
|
65
|
-
# Transform the given data with the learned model.
|
66
|
-
#
|
67
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be used for calculating kernel matrix with the training data.
|
68
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The calculated kernel matrix.
|
69
|
-
def transform(x)
|
70
|
-
x = check_convert_sample_array(x)
|
71
|
-
kernel_mat(x, @components)
|
72
|
-
end
|
73
|
-
|
74
|
-
private
|
75
|
-
|
76
|
-
def kernel_mat(x, y)
|
77
|
-
case @params[:kernel]
|
78
|
-
when 'rbf'
|
79
|
-
Rumale::PairwiseMetric.rbf_kernel(x, y, @params[:gamma])
|
80
|
-
when 'poly'
|
81
|
-
Rumale::PairwiseMetric.polynomial_kernel(x, y, @params[:degree], @params[:gamma], @params[:coef])
|
82
|
-
when 'sigmoid'
|
83
|
-
Rumale::PairwiseMetric.sigmoid_kernel(x, y, @params[:gamma], @params[:coef])
|
84
|
-
when 'linear'
|
85
|
-
Rumale::PairwiseMetric.linear_kernel(x, y)
|
86
|
-
else
|
87
|
-
raise ArgumentError, "Expect kernel parameter to be given 'rbf', 'linear', 'poly', or 'sigmoid'."
|
88
|
-
end
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|
92
|
-
end
|