rumale 0.23.3 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.txt +5 -1
- data/README.md +3 -288
- data/lib/rumale/version.rb +1 -1
- data/lib/rumale.rb +20 -131
- metadata +252 -150
- data/CHANGELOG.md +0 -643
- data/CODE_OF_CONDUCT.md +0 -74
- data/ext/rumale/extconf.rb +0 -37
- data/ext/rumale/rumaleext.c +0 -545
- data/ext/rumale/rumaleext.h +0 -12
- data/lib/rumale/base/base_estimator.rb +0 -49
- data/lib/rumale/base/classifier.rb +0 -36
- data/lib/rumale/base/cluster_analyzer.rb +0 -31
- data/lib/rumale/base/evaluator.rb +0 -17
- data/lib/rumale/base/regressor.rb +0 -36
- data/lib/rumale/base/splitter.rb +0 -21
- data/lib/rumale/base/transformer.rb +0 -22
- data/lib/rumale/clustering/dbscan.rb +0 -123
- data/lib/rumale/clustering/gaussian_mixture.rb +0 -218
- data/lib/rumale/clustering/hdbscan.rb +0 -291
- data/lib/rumale/clustering/k_means.rb +0 -122
- data/lib/rumale/clustering/k_medoids.rb +0 -141
- data/lib/rumale/clustering/mini_batch_k_means.rb +0 -139
- data/lib/rumale/clustering/power_iteration.rb +0 -127
- data/lib/rumale/clustering/single_linkage.rb +0 -203
- data/lib/rumale/clustering/snn.rb +0 -76
- data/lib/rumale/clustering/spectral_clustering.rb +0 -115
- data/lib/rumale/dataset.rb +0 -246
- data/lib/rumale/decomposition/factor_analysis.rb +0 -150
- data/lib/rumale/decomposition/fast_ica.rb +0 -188
- data/lib/rumale/decomposition/nmf.rb +0 -124
- data/lib/rumale/decomposition/pca.rb +0 -159
- data/lib/rumale/ensemble/ada_boost_classifier.rb +0 -179
- data/lib/rumale/ensemble/ada_boost_regressor.rb +0 -160
- data/lib/rumale/ensemble/extra_trees_classifier.rb +0 -139
- data/lib/rumale/ensemble/extra_trees_regressor.rb +0 -125
- data/lib/rumale/ensemble/gradient_boosting_classifier.rb +0 -306
- data/lib/rumale/ensemble/gradient_boosting_regressor.rb +0 -237
- data/lib/rumale/ensemble/random_forest_classifier.rb +0 -189
- data/lib/rumale/ensemble/random_forest_regressor.rb +0 -153
- data/lib/rumale/ensemble/stacking_classifier.rb +0 -215
- data/lib/rumale/ensemble/stacking_regressor.rb +0 -163
- data/lib/rumale/ensemble/voting_classifier.rb +0 -126
- data/lib/rumale/ensemble/voting_regressor.rb +0 -82
- data/lib/rumale/evaluation_measure/accuracy.rb +0 -29
- data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +0 -74
- data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +0 -56
- data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +0 -53
- data/lib/rumale/evaluation_measure/explained_variance_score.rb +0 -39
- data/lib/rumale/evaluation_measure/f_score.rb +0 -50
- data/lib/rumale/evaluation_measure/function.rb +0 -147
- data/lib/rumale/evaluation_measure/log_loss.rb +0 -45
- data/lib/rumale/evaluation_measure/mean_absolute_error.rb +0 -29
- data/lib/rumale/evaluation_measure/mean_squared_error.rb +0 -29
- data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +0 -29
- data/lib/rumale/evaluation_measure/median_absolute_error.rb +0 -30
- data/lib/rumale/evaluation_measure/mutual_information.rb +0 -49
- data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +0 -53
- data/lib/rumale/evaluation_measure/precision.rb +0 -50
- data/lib/rumale/evaluation_measure/precision_recall.rb +0 -96
- data/lib/rumale/evaluation_measure/purity.rb +0 -40
- data/lib/rumale/evaluation_measure/r2_score.rb +0 -43
- data/lib/rumale/evaluation_measure/recall.rb +0 -50
- data/lib/rumale/evaluation_measure/roc_auc.rb +0 -130
- data/lib/rumale/evaluation_measure/silhouette_score.rb +0 -82
- data/lib/rumale/feature_extraction/feature_hasher.rb +0 -110
- data/lib/rumale/feature_extraction/hash_vectorizer.rb +0 -155
- data/lib/rumale/feature_extraction/tfidf_transformer.rb +0 -113
- data/lib/rumale/kernel_approximation/nystroem.rb +0 -126
- data/lib/rumale/kernel_approximation/rbf.rb +0 -102
- data/lib/rumale/kernel_machine/kernel_fda.rb +0 -120
- data/lib/rumale/kernel_machine/kernel_pca.rb +0 -97
- data/lib/rumale/kernel_machine/kernel_ridge.rb +0 -82
- data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +0 -92
- data/lib/rumale/kernel_machine/kernel_svc.rb +0 -193
- data/lib/rumale/linear_model/base_sgd.rb +0 -285
- data/lib/rumale/linear_model/elastic_net.rb +0 -119
- data/lib/rumale/linear_model/lasso.rb +0 -115
- data/lib/rumale/linear_model/linear_regression.rb +0 -201
- data/lib/rumale/linear_model/logistic_regression.rb +0 -275
- data/lib/rumale/linear_model/nnls.rb +0 -137
- data/lib/rumale/linear_model/ridge.rb +0 -209
- data/lib/rumale/linear_model/svc.rb +0 -213
- data/lib/rumale/linear_model/svr.rb +0 -132
- data/lib/rumale/manifold/mds.rb +0 -155
- data/lib/rumale/manifold/tsne.rb +0 -222
- data/lib/rumale/metric_learning/fisher_discriminant_analysis.rb +0 -113
- data/lib/rumale/metric_learning/mlkr.rb +0 -161
- data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +0 -167
- data/lib/rumale/model_selection/cross_validation.rb +0 -125
- data/lib/rumale/model_selection/function.rb +0 -42
- data/lib/rumale/model_selection/grid_search_cv.rb +0 -225
- data/lib/rumale/model_selection/group_k_fold.rb +0 -93
- data/lib/rumale/model_selection/group_shuffle_split.rb +0 -115
- data/lib/rumale/model_selection/k_fold.rb +0 -81
- data/lib/rumale/model_selection/shuffle_split.rb +0 -90
- data/lib/rumale/model_selection/stratified_k_fold.rb +0 -99
- data/lib/rumale/model_selection/stratified_shuffle_split.rb +0 -118
- data/lib/rumale/model_selection/time_series_split.rb +0 -91
- data/lib/rumale/multiclass/one_vs_rest_classifier.rb +0 -83
- data/lib/rumale/naive_bayes/base_naive_bayes.rb +0 -47
- data/lib/rumale/naive_bayes/bernoulli_nb.rb +0 -82
- data/lib/rumale/naive_bayes/complement_nb.rb +0 -85
- data/lib/rumale/naive_bayes/gaussian_nb.rb +0 -69
- data/lib/rumale/naive_bayes/multinomial_nb.rb +0 -74
- data/lib/rumale/naive_bayes/negation_nb.rb +0 -71
- data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +0 -133
- data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +0 -108
- data/lib/rumale/nearest_neighbors/vp_tree.rb +0 -132
- data/lib/rumale/neural_network/adam.rb +0 -56
- data/lib/rumale/neural_network/base_mlp.rb +0 -248
- data/lib/rumale/neural_network/mlp_classifier.rb +0 -120
- data/lib/rumale/neural_network/mlp_regressor.rb +0 -90
- data/lib/rumale/pairwise_metric.rb +0 -152
- data/lib/rumale/pipeline/feature_union.rb +0 -69
- data/lib/rumale/pipeline/pipeline.rb +0 -175
- data/lib/rumale/preprocessing/bin_discretizer.rb +0 -93
- data/lib/rumale/preprocessing/binarizer.rb +0 -60
- data/lib/rumale/preprocessing/kernel_calculator.rb +0 -92
- data/lib/rumale/preprocessing/l1_normalizer.rb +0 -62
- data/lib/rumale/preprocessing/l2_normalizer.rb +0 -63
- data/lib/rumale/preprocessing/label_binarizer.rb +0 -89
- data/lib/rumale/preprocessing/label_encoder.rb +0 -79
- data/lib/rumale/preprocessing/max_abs_scaler.rb +0 -61
- data/lib/rumale/preprocessing/max_normalizer.rb +0 -62
- data/lib/rumale/preprocessing/min_max_scaler.rb +0 -76
- data/lib/rumale/preprocessing/one_hot_encoder.rb +0 -100
- data/lib/rumale/preprocessing/ordinal_encoder.rb +0 -109
- data/lib/rumale/preprocessing/polynomial_features.rb +0 -109
- data/lib/rumale/preprocessing/standard_scaler.rb +0 -71
- data/lib/rumale/probabilistic_output.rb +0 -114
- data/lib/rumale/tree/base_decision_tree.rb +0 -150
- data/lib/rumale/tree/decision_tree_classifier.rb +0 -150
- data/lib/rumale/tree/decision_tree_regressor.rb +0 -116
- data/lib/rumale/tree/extra_tree_classifier.rb +0 -107
- data/lib/rumale/tree/extra_tree_regressor.rb +0 -94
- data/lib/rumale/tree/gradient_tree_regressor.rb +0 -202
- data/lib/rumale/tree/node.rb +0 -39
- data/lib/rumale/utils.rb +0 -42
- data/lib/rumale/validation.rb +0 -128
- data/lib/rumale/values.rb +0 -13
|
@@ -1,155 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'rumale/base/base_estimator'
|
|
4
|
-
require 'rumale/base/transformer'
|
|
5
|
-
|
|
6
|
-
module Rumale
|
|
7
|
-
# This module consists of the classes that extract features from raw data.
|
|
8
|
-
module FeatureExtraction
|
|
9
|
-
# Encode array of feature-value hash to vectors.
|
|
10
|
-
# This encoder turns array of mappings (Array<Hash>) with pairs of feature names and values into Numo::NArray.
|
|
11
|
-
#
|
|
12
|
-
# @example
|
|
13
|
-
# encoder = Rumale::FeatureExtraction::HashVectorizer.new
|
|
14
|
-
# x = encoder.fit_transform([
|
|
15
|
-
# { foo: 1, bar: 2 },
|
|
16
|
-
# { foo: 3, baz: 1 }
|
|
17
|
-
# ])
|
|
18
|
-
# # > pp x
|
|
19
|
-
# # Numo::DFloat#shape=[2,3]
|
|
20
|
-
# # [[2, 0, 1],
|
|
21
|
-
# # [0, 1, 3]]
|
|
22
|
-
#
|
|
23
|
-
# x = encoder.fit_transform([
|
|
24
|
-
# { city: 'Dubai', temperature: 33 },
|
|
25
|
-
# { city: 'London', temperature: 12 },
|
|
26
|
-
# { city: 'San Francisco', temperature: 18 }
|
|
27
|
-
# ])
|
|
28
|
-
# # > pp x
|
|
29
|
-
# # Numo::DFloat#shape=[3,4]
|
|
30
|
-
# # [[1, 0, 0, 33],
|
|
31
|
-
# # [0, 1, 0, 12],
|
|
32
|
-
# # [0, 0, 1, 18]]
|
|
33
|
-
# # > pp encoder.inverse_transform(x)
|
|
34
|
-
# # [{:city=>"Dubai", :temperature=>33.0},
|
|
35
|
-
# # {:city=>"London", :temperature=>12.0},
|
|
36
|
-
# # {:city=>"San Francisco", :temperature=>18.0}]
|
|
37
|
-
class HashVectorizer
|
|
38
|
-
include Base::BaseEstimator
|
|
39
|
-
include Base::Transformer
|
|
40
|
-
|
|
41
|
-
# Return the list of feature names.
|
|
42
|
-
# @return [Array] (size: [n_features])
|
|
43
|
-
attr_reader :feature_names
|
|
44
|
-
|
|
45
|
-
# Return the hash consisting of pairs of feature names and indices.
|
|
46
|
-
# @return [Hash] (size: [n_features])
|
|
47
|
-
attr_reader :vocabulary
|
|
48
|
-
|
|
49
|
-
# Create a new encoder for converting array of hash consisting of feature names and values to vectors.
|
|
50
|
-
#
|
|
51
|
-
# @param separator [String] The separator string used for constructing new feature names for categorical feature.
|
|
52
|
-
# @param sort [Boolean] The flag indicating whether to sort feature names.
|
|
53
|
-
def initialize(separator: '=', sort: true)
|
|
54
|
-
check_params_string(separator: separator)
|
|
55
|
-
check_params_boolean(sort: sort)
|
|
56
|
-
@params = {}
|
|
57
|
-
@params[:separator] = separator
|
|
58
|
-
@params[:sort] = sort
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
# Fit the encoder with given training data.
|
|
62
|
-
#
|
|
63
|
-
# @overload fit(x) -> HashVectorizer
|
|
64
|
-
# @param x [Array<Hash>] (shape: [n_samples]) The array of hash consisting of feature names and values.
|
|
65
|
-
# @return [HashVectorizer]
|
|
66
|
-
def fit(x, _y = nil)
|
|
67
|
-
@feature_names = []
|
|
68
|
-
@vocabulary = {}
|
|
69
|
-
|
|
70
|
-
x.each do |f|
|
|
71
|
-
f.each do |k, v|
|
|
72
|
-
k = "#{k}#{separator}#{v}".to_sym if v.is_a?(String)
|
|
73
|
-
next if @vocabulary.key?(k)
|
|
74
|
-
|
|
75
|
-
@feature_names.push(k)
|
|
76
|
-
@vocabulary[k] = @vocabulary.size
|
|
77
|
-
end
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
if sort_feature?
|
|
81
|
-
@feature_names.sort!
|
|
82
|
-
@feature_names.each_with_index { |k, i| @vocabulary[k] = i }
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
self
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
# Fit the encoder with given training data, then return encoded data.
|
|
89
|
-
#
|
|
90
|
-
# @overload fit_transform(x) -> Numo::DFloat
|
|
91
|
-
# @param x [Array<Hash>] (shape: [n_samples]) The array of hash consisting of feature names and values.
|
|
92
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_features]) The encoded sample array.
|
|
93
|
-
def fit_transform(x, _y = nil)
|
|
94
|
-
fit(x).transform(x)
|
|
95
|
-
end
|
|
96
|
-
|
|
97
|
-
# Encode given the array of feature-value hash.
|
|
98
|
-
#
|
|
99
|
-
# @param x [Array<Hash>] (shape: [n_samples]) The array of hash consisting of feature names and values.
|
|
100
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_features]) The encoded sample array.
|
|
101
|
-
def transform(x)
|
|
102
|
-
x = [x] unless x.is_a?(Array)
|
|
103
|
-
n_samples = x.size
|
|
104
|
-
n_features = @vocabulary.size
|
|
105
|
-
z = Numo::DFloat.zeros(n_samples, n_features)
|
|
106
|
-
|
|
107
|
-
x.each_with_index do |f, i|
|
|
108
|
-
f.each do |k, v|
|
|
109
|
-
if v.is_a?(String)
|
|
110
|
-
k = "#{k}#{separator}#{v}".to_sym
|
|
111
|
-
v = 1
|
|
112
|
-
end
|
|
113
|
-
z[i, @vocabulary[k]] = v if @vocabulary.key?(k)
|
|
114
|
-
end
|
|
115
|
-
end
|
|
116
|
-
|
|
117
|
-
z
|
|
118
|
-
end
|
|
119
|
-
|
|
120
|
-
# Decode sample matirx to the array of feature-value hash.
|
|
121
|
-
#
|
|
122
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The encoded sample array.
|
|
123
|
-
# @return [Array<Hash>] The array of hash consisting of feature names and values.
|
|
124
|
-
def inverse_transform(x)
|
|
125
|
-
n_samples = x.shape[0]
|
|
126
|
-
reconst = []
|
|
127
|
-
|
|
128
|
-
n_samples.times do |i|
|
|
129
|
-
f = {}
|
|
130
|
-
x[i, true].each_with_index do |el, j|
|
|
131
|
-
feature_key_val(@feature_names[j], el).tap { |k, v| f[k.to_sym] = v } unless el.zero?
|
|
132
|
-
end
|
|
133
|
-
reconst.push(f)
|
|
134
|
-
end
|
|
135
|
-
|
|
136
|
-
reconst
|
|
137
|
-
end
|
|
138
|
-
|
|
139
|
-
private
|
|
140
|
-
|
|
141
|
-
def feature_key_val(fname, fval)
|
|
142
|
-
f = fname.to_s.split(separator)
|
|
143
|
-
f.size == 2 ? f : [fname, fval]
|
|
144
|
-
end
|
|
145
|
-
|
|
146
|
-
def separator
|
|
147
|
-
@params[:separator]
|
|
148
|
-
end
|
|
149
|
-
|
|
150
|
-
def sort_feature?
|
|
151
|
-
@params[:sort]
|
|
152
|
-
end
|
|
153
|
-
end
|
|
154
|
-
end
|
|
155
|
-
end
|
|
@@ -1,113 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'rumale/base/base_estimator'
|
|
4
|
-
require 'rumale/base/transformer'
|
|
5
|
-
require 'rumale/preprocessing/l1_normalizer'
|
|
6
|
-
require 'rumale/preprocessing/l2_normalizer'
|
|
7
|
-
|
|
8
|
-
module Rumale
|
|
9
|
-
module FeatureExtraction
|
|
10
|
-
# Transform sample matrix with term frequecy (tf) to a normalized tf-idf (inverse document frequency) reprensentation.
|
|
11
|
-
#
|
|
12
|
-
# @example
|
|
13
|
-
# encoder = Rumale::FeatureExtraction::HashVectorizer.new
|
|
14
|
-
# x = encoder.fit_transform([
|
|
15
|
-
# { foo: 1, bar: 2 },
|
|
16
|
-
# { foo: 3, baz: 1 }
|
|
17
|
-
# ])
|
|
18
|
-
#
|
|
19
|
-
# # > pp x
|
|
20
|
-
# # Numo::DFloat#shape=[2,3]
|
|
21
|
-
# # [[2, 0, 1],
|
|
22
|
-
# # [0, 1, 3]]
|
|
23
|
-
#
|
|
24
|
-
# transformer = Rumale::FeatureExtraction::TfidfTransformer.new
|
|
25
|
-
# x_tfidf = transformer.fit_transform(x)
|
|
26
|
-
#
|
|
27
|
-
# # > pp x_tfidf
|
|
28
|
-
# # Numo::DFloat#shape=[2,3]
|
|
29
|
-
# # [[0.959056, 0, 0.283217],
|
|
30
|
-
# # [0, 0.491506, 0.870874]]
|
|
31
|
-
#
|
|
32
|
-
# *Reference*
|
|
33
|
-
# - Manning, C D., Raghavan, P., and Schutze, H., "Introduction to Information Retrieval," Cambridge University Press., 2008.
|
|
34
|
-
class TfidfTransformer
|
|
35
|
-
include Base::BaseEstimator
|
|
36
|
-
include Base::Transformer
|
|
37
|
-
|
|
38
|
-
# Return the vector consists of inverse document frequency.
|
|
39
|
-
# @return [Numo::DFloat] (shape: [n_features])
|
|
40
|
-
attr_reader :idf
|
|
41
|
-
|
|
42
|
-
# Create a new transfomer for converting tf vectors to tf-idf vectors.
|
|
43
|
-
#
|
|
44
|
-
# @param norm [String] The normalization method to be used ('l1', 'l2' and 'none').
|
|
45
|
-
# @param use_idf [Boolean] The flag indicating whether to use inverse document frequency weighting.
|
|
46
|
-
# @param smooth_idf [Boolean] The flag indicating whether to apply idf smoothing by log((n_samples + 1) / (df + 1)) + 1.
|
|
47
|
-
# @param sublinear_tf [Boolean] The flag indicating whether to perform subliner tf scaling by 1 + log(tf).
|
|
48
|
-
def initialize(norm: 'l2', use_idf: true, smooth_idf: false, sublinear_tf: false)
|
|
49
|
-
check_params_string(norm: norm)
|
|
50
|
-
check_params_boolean(use_idf: use_idf, smooth_idf: smooth_idf, sublinear_tf: sublinear_tf)
|
|
51
|
-
@params = {}
|
|
52
|
-
@params[:norm] = norm
|
|
53
|
-
@params[:use_idf] = use_idf
|
|
54
|
-
@params[:smooth_idf] = smooth_idf
|
|
55
|
-
@params[:sublinear_tf] = sublinear_tf
|
|
56
|
-
@idf = nil
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
# Calculate the inverse document frequency for weighting.
|
|
60
|
-
#
|
|
61
|
-
# @overload fit(x) -> TfidfTransformer
|
|
62
|
-
#
|
|
63
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the idf values.
|
|
64
|
-
# @return [TfidfTransformer]
|
|
65
|
-
def fit(x, _y = nil)
|
|
66
|
-
return self unless @params[:use_idf]
|
|
67
|
-
|
|
68
|
-
x = check_convert_sample_array(x)
|
|
69
|
-
|
|
70
|
-
n_samples = x.shape[0]
|
|
71
|
-
df = x.class.cast(x.gt(0.0).count(0))
|
|
72
|
-
|
|
73
|
-
if @params[:smooth_idf]
|
|
74
|
-
df += 1
|
|
75
|
-
n_samples += 1
|
|
76
|
-
end
|
|
77
|
-
|
|
78
|
-
@idf = Numo::NMath.log(n_samples / df) + 1
|
|
79
|
-
|
|
80
|
-
self
|
|
81
|
-
end
|
|
82
|
-
|
|
83
|
-
# Calculate the idf values, and then transfrom samples to the tf-idf representation.
|
|
84
|
-
#
|
|
85
|
-
# @overload fit_transform(x) -> Numo::DFloat
|
|
86
|
-
#
|
|
87
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate idf and be transformed to tf-idf representation.
|
|
88
|
-
# @return [Numo::DFloat] The transformed samples.
|
|
89
|
-
def fit_transform(x, _y = nil)
|
|
90
|
-
fit(x).transform(x)
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
# Perform transforming the given samples to the tf-idf representation.
|
|
94
|
-
#
|
|
95
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed.
|
|
96
|
-
# @return [Numo::DFloat] The transformed samples.
|
|
97
|
-
def transform(x)
|
|
98
|
-
x = check_convert_sample_array(x)
|
|
99
|
-
z = x.dup
|
|
100
|
-
|
|
101
|
-
z[z.ne(0)] = Numo::NMath.log(z[z.ne(0)]) + 1 if @params[:sublinear_tf]
|
|
102
|
-
z *= @idf if @params[:use_idf]
|
|
103
|
-
case @params[:norm]
|
|
104
|
-
when 'l2'
|
|
105
|
-
z = Rumale::Preprocessing::L2Normalizer.new.fit_transform(z)
|
|
106
|
-
when 'l1'
|
|
107
|
-
z = Rumale::Preprocessing::L1Normalizer.new.fit_transform(z)
|
|
108
|
-
end
|
|
109
|
-
z
|
|
110
|
-
end
|
|
111
|
-
end
|
|
112
|
-
end
|
|
113
|
-
end
|
|
@@ -1,126 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'rumale/base/base_estimator'
|
|
4
|
-
require 'rumale/base/transformer'
|
|
5
|
-
require 'rumale/pairwise_metric'
|
|
6
|
-
|
|
7
|
-
module Rumale
|
|
8
|
-
module KernelApproximation
|
|
9
|
-
# Nystroem is a class that implements feature mapping with Nystroem method.
|
|
10
|
-
#
|
|
11
|
-
# @example
|
|
12
|
-
# require 'numo/linalg/autoloader'
|
|
13
|
-
#
|
|
14
|
-
# transformer = Rumale::KernelApproximation::Nystroem.new(kernel: 'rbf', gamma: 1, n_components: 128, random_seed: 1)
|
|
15
|
-
# new_training_samples = transformer.fit_transform(training_samples)
|
|
16
|
-
# new_testing_samples = transformer.transform(testing_samples)
|
|
17
|
-
#
|
|
18
|
-
# *Reference*
|
|
19
|
-
# - Yang, T., Li, Y., Mahdavi, M., Jin, R., and Zhou, Z-H., "Nystrom Method vs Random Fourier Features: A Theoretical and Empirical Comparison," Advances in NIPS'12, Vol. 1, pp. 476--484, 2012.
|
|
20
|
-
class Nystroem
|
|
21
|
-
include Base::BaseEstimator
|
|
22
|
-
include Base::Transformer
|
|
23
|
-
|
|
24
|
-
# Returns the randomly sampled training data for feature mapping.
|
|
25
|
-
# @return [Numo::DFloat] (shape: n_components, n_features])
|
|
26
|
-
attr_reader :components
|
|
27
|
-
|
|
28
|
-
# Returns the indices sampled training data.
|
|
29
|
-
# @return [Numo::Int32] (shape: [n_components])
|
|
30
|
-
attr_reader :component_indices
|
|
31
|
-
|
|
32
|
-
# Returns the normalizing factors.
|
|
33
|
-
# @return [Numo::DFloat] (shape: [n_components, n_components])
|
|
34
|
-
attr_reader :normalizer
|
|
35
|
-
|
|
36
|
-
# Return the random generator for transformation.
|
|
37
|
-
# @return [Random]
|
|
38
|
-
attr_reader :rng
|
|
39
|
-
|
|
40
|
-
# Create a new transformer for mapping to kernel feature space with Nystrom method.
|
|
41
|
-
#
|
|
42
|
-
# @param kernel [String] The type of kernel function ('rbf', 'linear', 'poly', and 'sigmoid)
|
|
43
|
-
# @param gamma [Float] The gamma parameter in rbf/poly/sigmoid kernel function.
|
|
44
|
-
# @param degree [Integer] The degree parameter in polynomial kernel function.
|
|
45
|
-
# @param coef [Float] The coefficient in poly/sigmoid kernel function.
|
|
46
|
-
# @param n_components [Integer] The number of dimensions of the kernel feature space.
|
|
47
|
-
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
|
48
|
-
def initialize(kernel: 'rbf', gamma: 1, degree: 3, coef: 1, n_components: 100, random_seed: nil)
|
|
49
|
-
check_params_string(kernel: kernel)
|
|
50
|
-
check_params_numeric(gamma: gamma, coef: coef, degree: degree, n_components: n_components)
|
|
51
|
-
check_params_numeric_or_nil(random_seed: random_seed)
|
|
52
|
-
@params = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h
|
|
53
|
-
@params[:random_seed] ||= srand
|
|
54
|
-
@rng = Random.new(@params[:random_seed])
|
|
55
|
-
@component_indices = nil
|
|
56
|
-
@components = nil
|
|
57
|
-
@normalizer = nil
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
# Fit the model with given training data.
|
|
61
|
-
#
|
|
62
|
-
# @overload fit(x) -> Nystroem
|
|
63
|
-
# @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
|
64
|
-
# @return [Nystroem] The learned transformer itself.
|
|
65
|
-
def fit(x, _y = nil)
|
|
66
|
-
x = check_convert_sample_array(x)
|
|
67
|
-
raise 'Nystroem#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
|
|
68
|
-
|
|
69
|
-
# initialize some variables.
|
|
70
|
-
sub_rng = @rng.dup
|
|
71
|
-
n_samples = x.shape[0]
|
|
72
|
-
n_components = [1, [@params[:n_components], n_samples].min].max
|
|
73
|
-
|
|
74
|
-
# random sampling.
|
|
75
|
-
@component_indices = Numo::Int32.cast(Array(0...n_samples).shuffle(random: sub_rng)[0...n_components])
|
|
76
|
-
@components = x[@component_indices, true].dup
|
|
77
|
-
|
|
78
|
-
# calculate normalizing factor.
|
|
79
|
-
kernel_mat = kernel_mat(@components)
|
|
80
|
-
eig_vals, eig_vecs = Numo::Linalg.eigh(kernel_mat)
|
|
81
|
-
la = eig_vals.class.maximum(eig_vals.reverse, 1e-12)
|
|
82
|
-
u = eig_vecs.reverse(1)
|
|
83
|
-
@normalizer = u.dot((1.0 / Numo::NMath.sqrt(la)).diag)
|
|
84
|
-
|
|
85
|
-
self
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
# Fit the model with training data, and then transform them with the learned model.
|
|
89
|
-
#
|
|
90
|
-
# @overload fit_transform(x) -> Numo::DFloat
|
|
91
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
|
92
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
|
93
|
-
def fit_transform(x, _y = nil)
|
|
94
|
-
x = check_convert_sample_array(x)
|
|
95
|
-
fit(x).transform(x)
|
|
96
|
-
end
|
|
97
|
-
|
|
98
|
-
# Transform the given data with the learned model.
|
|
99
|
-
#
|
|
100
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
|
|
101
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
|
102
|
-
def transform(x)
|
|
103
|
-
x = check_convert_sample_array(x)
|
|
104
|
-
z = kernel_mat(x, @components)
|
|
105
|
-
z.dot(@normalizer)
|
|
106
|
-
end
|
|
107
|
-
|
|
108
|
-
private
|
|
109
|
-
|
|
110
|
-
def kernel_mat(x, y = nil)
|
|
111
|
-
case @params[:kernel]
|
|
112
|
-
when 'rbf'
|
|
113
|
-
Rumale::PairwiseMetric.rbf_kernel(x, y, @params[:gamma])
|
|
114
|
-
when 'poly'
|
|
115
|
-
Rumale::PairwiseMetric.polynomial_kernel(x, y, @params[:degree], @params[:gamma], @params[:coef])
|
|
116
|
-
when 'sigmoid'
|
|
117
|
-
Rumale::PairwiseMetric.sigmoid_kernel(x, y, @params[:gamma], @params[:coef])
|
|
118
|
-
when 'linear'
|
|
119
|
-
Rumale::PairwiseMetric.linear_kernel(x, y)
|
|
120
|
-
else
|
|
121
|
-
raise ArgumentError, "Expect kernel parameter to be given 'rbf', 'linear', 'poly', or 'sigmoid'."
|
|
122
|
-
end
|
|
123
|
-
end
|
|
124
|
-
end
|
|
125
|
-
end
|
|
126
|
-
end
|
|
@@ -1,102 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'rumale/utils'
|
|
4
|
-
require 'rumale/base/base_estimator'
|
|
5
|
-
require 'rumale/base/transformer'
|
|
6
|
-
|
|
7
|
-
module Rumale
|
|
8
|
-
# Module for kernel approximation algorithms.
|
|
9
|
-
module KernelApproximation
|
|
10
|
-
# Class for RBF kernel feature mapping.
|
|
11
|
-
#
|
|
12
|
-
# @example
|
|
13
|
-
# transformer = Rumale::KernelApproximation::RBF.new(gamma: 1.0, n_components: 128, random_seed: 1)
|
|
14
|
-
# new_training_samples = transformer.fit_transform(training_samples)
|
|
15
|
-
# new_testing_samples = transformer.transform(testing_samples)
|
|
16
|
-
#
|
|
17
|
-
# *Refernce*:
|
|
18
|
-
# - Rahimi, A., and Recht, B., "Random Features for Large-Scale Kernel Machines," Proc. NIPS'07, pp.1177--1184, 2007.
|
|
19
|
-
class RBF
|
|
20
|
-
include Base::BaseEstimator
|
|
21
|
-
include Base::Transformer
|
|
22
|
-
|
|
23
|
-
# Return the random matrix for transformation.
|
|
24
|
-
# @return [Numo::DFloat] (shape: [n_features, n_components])
|
|
25
|
-
attr_reader :random_mat
|
|
26
|
-
|
|
27
|
-
# Return the random vector for transformation.
|
|
28
|
-
# @return [Numo::DFloat] (shape: [n_components])
|
|
29
|
-
attr_reader :random_vec
|
|
30
|
-
|
|
31
|
-
# Return the random generator for transformation.
|
|
32
|
-
# @return [Random]
|
|
33
|
-
attr_reader :rng
|
|
34
|
-
|
|
35
|
-
# Create a new transformer for mapping to RBF kernel feature space.
|
|
36
|
-
#
|
|
37
|
-
# @param gamma [Float] The parameter of RBF kernel: exp(-gamma * x^2).
|
|
38
|
-
# @param n_components [Integer] The number of dimensions of the RBF kernel feature space.
|
|
39
|
-
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
|
40
|
-
def initialize(gamma: 1.0, n_components: 128, random_seed: nil)
|
|
41
|
-
check_params_numeric(gamma: gamma, n_components: n_components)
|
|
42
|
-
check_params_numeric_or_nil(random_seed: random_seed)
|
|
43
|
-
check_params_positive(gamma: gamma, n_components: n_components)
|
|
44
|
-
@params = {}
|
|
45
|
-
@params[:gamma] = gamma
|
|
46
|
-
@params[:n_components] = n_components
|
|
47
|
-
@params[:random_seed] = random_seed
|
|
48
|
-
@params[:random_seed] ||= srand
|
|
49
|
-
@random_mat = nil
|
|
50
|
-
@random_vec = nil
|
|
51
|
-
@rng = Random.new(@params[:random_seed])
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
# Fit the model with given training data.
|
|
55
|
-
#
|
|
56
|
-
# @overload fit(x) -> RBF
|
|
57
|
-
#
|
|
58
|
-
# @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
|
59
|
-
# This method uses only the number of features of the data.
|
|
60
|
-
# @return [RBF] The learned transformer itself.
|
|
61
|
-
def fit(x, _y = nil)
|
|
62
|
-
x = check_convert_sample_array(x)
|
|
63
|
-
|
|
64
|
-
n_features = x.shape[1]
|
|
65
|
-
sub_rng = @rng.dup
|
|
66
|
-
@params[:n_components] = 2 * n_features if @params[:n_components] <= 0
|
|
67
|
-
@random_mat = Rumale::Utils.rand_normal([n_features, @params[:n_components]], sub_rng) * (2.0 * @params[:gamma])**0.5
|
|
68
|
-
n_half_components = @params[:n_components] / 2
|
|
69
|
-
@random_vec = Numo::DFloat.zeros(@params[:n_components] - n_half_components).concatenate(
|
|
70
|
-
Numo::DFloat.ones(n_half_components) * (0.5 * Math::PI)
|
|
71
|
-
)
|
|
72
|
-
self
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
# Fit the model with training data, and then transform them with the learned model.
|
|
76
|
-
#
|
|
77
|
-
# @overload fit_transform(x) -> Numo::DFloat
|
|
78
|
-
#
|
|
79
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
|
80
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
|
81
|
-
def fit_transform(x, _y = nil)
|
|
82
|
-
x = check_convert_sample_array(x)
|
|
83
|
-
|
|
84
|
-
fit(x).transform(x)
|
|
85
|
-
end
|
|
86
|
-
|
|
87
|
-
# Transform the given data with the learned model.
|
|
88
|
-
#
|
|
89
|
-
# @overload transform(x) -> Numo::DFloat
|
|
90
|
-
#
|
|
91
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
|
|
92
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
|
93
|
-
def transform(x)
|
|
94
|
-
x = check_convert_sample_array(x)
|
|
95
|
-
|
|
96
|
-
n_samples, = x.shape
|
|
97
|
-
projection = x.dot(@random_mat) + @random_vec.tile(n_samples, 1)
|
|
98
|
-
Numo::NMath.sin(projection) * ((2.0 / @params[:n_components])**0.5)
|
|
99
|
-
end
|
|
100
|
-
end
|
|
101
|
-
end
|
|
102
|
-
end
|
|
@@ -1,120 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'rumale/base/base_estimator'
|
|
4
|
-
require 'rumale/base/transformer'
|
|
5
|
-
|
|
6
|
-
module Rumale
|
|
7
|
-
module KernelMachine
|
|
8
|
-
# KernelFDA is a class that implements Kernel Fisher Discriminant Analysis.
|
|
9
|
-
#
|
|
10
|
-
# @example
|
|
11
|
-
# require 'numo/linalg/autoloader'
|
|
12
|
-
#
|
|
13
|
-
# kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(x_train)
|
|
14
|
-
# kfda = Rumale::KernelMachine::KernelFDA.new
|
|
15
|
-
# mapped_traininig_samples = kfda.fit_transform(kernel_mat_train, y)
|
|
16
|
-
#
|
|
17
|
-
# kernel_mat_test = Rumale::PairwiseMetric::rbf_kernel(x_test, x_train)
|
|
18
|
-
# mapped_test_samples = kfda.transform(kernel_mat_test)
|
|
19
|
-
#
|
|
20
|
-
# *Reference*
|
|
21
|
-
# - Baudat, G., and Anouar, F., "Generalized Discriminant Analysis using a Kernel Approach," Neural Computation, vol. 12, pp. 2385--2404, 2000.
|
|
22
|
-
class KernelFDA
|
|
23
|
-
include Base::BaseEstimator
|
|
24
|
-
include Base::Transformer
|
|
25
|
-
|
|
26
|
-
# Returns the eigenvectors for embedding.
|
|
27
|
-
# @return [Numo::DFloat] (shape: [n_training_sampes, n_components])
|
|
28
|
-
attr_reader :alphas
|
|
29
|
-
|
|
30
|
-
# Create a new transformer with Kernel FDA.
|
|
31
|
-
#
|
|
32
|
-
# @param n_components [Integer] The number of components.
|
|
33
|
-
# @param reg_param [Float] The regularization parameter.
|
|
34
|
-
def initialize(n_components: nil, reg_param: 1e-8)
|
|
35
|
-
check_params_numeric_or_nil(n_components: n_components)
|
|
36
|
-
check_params_numeric(reg_param: reg_param)
|
|
37
|
-
@params = {}
|
|
38
|
-
@params[:n_components] = n_components
|
|
39
|
-
@params[:reg_param] = reg_param
|
|
40
|
-
@alphas = nil
|
|
41
|
-
@row_mean = nil
|
|
42
|
-
@all_mean = nil
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
# Fit the model with given training data.
|
|
46
|
-
# To execute this method, Numo::Linalg must be loaded.
|
|
47
|
-
#
|
|
48
|
-
# @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
|
|
49
|
-
# The kernel matrix of the training data to be used for fitting the model.
|
|
50
|
-
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
|
51
|
-
# @return [KernelFDA] The learned transformer itself.
|
|
52
|
-
def fit(x, y)
|
|
53
|
-
x = check_convert_sample_array(x)
|
|
54
|
-
y = check_convert_label_array(y)
|
|
55
|
-
check_sample_label_size(x, y)
|
|
56
|
-
raise ArgumentError, 'Expect the kernel matrix of training data to be square.' unless x.shape[0] == x.shape[1]
|
|
57
|
-
raise 'KernelFDA#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
|
|
58
|
-
|
|
59
|
-
# initialize some variables.
|
|
60
|
-
n_samples = x.shape[0]
|
|
61
|
-
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
|
62
|
-
n_classes = @classes.size
|
|
63
|
-
n_components = if @params[:n_components].nil?
|
|
64
|
-
[n_samples, n_classes - 1].min
|
|
65
|
-
else
|
|
66
|
-
[n_samples, @params[:n_components]].min
|
|
67
|
-
end
|
|
68
|
-
|
|
69
|
-
# centering
|
|
70
|
-
@row_mean = x.mean(0)
|
|
71
|
-
@all_mean = @row_mean.sum.fdiv(n_samples)
|
|
72
|
-
centered_kernel_mat = x - x.mean(1).expand_dims(1) - @row_mean + @all_mean
|
|
73
|
-
|
|
74
|
-
# calculate between and within scatter matrix.
|
|
75
|
-
class_mat = Numo::DFloat.zeros(n_samples, n_samples)
|
|
76
|
-
@classes.each do |label|
|
|
77
|
-
idx_vec = y.eq(label)
|
|
78
|
-
class_mat += Numo::DFloat.cast(idx_vec).outer(idx_vec) / idx_vec.count
|
|
79
|
-
end
|
|
80
|
-
between_mat = centered_kernel_mat.dot(class_mat).dot(centered_kernel_mat.transpose)
|
|
81
|
-
within_mat = centered_kernel_mat.dot(centered_kernel_mat.transpose) + @params[:reg_param] * Numo::DFloat.eye(n_samples)
|
|
82
|
-
|
|
83
|
-
# calculate projection matrix.
|
|
84
|
-
_, eig_vecs = Numo::Linalg.eigh(
|
|
85
|
-
between_mat, within_mat,
|
|
86
|
-
vals_range: (n_samples - n_components)...n_samples
|
|
87
|
-
)
|
|
88
|
-
@alphas = eig_vecs.reverse(1).dup
|
|
89
|
-
self
|
|
90
|
-
end
|
|
91
|
-
|
|
92
|
-
# Fit the model with training data, and then transform them with the learned model.
|
|
93
|
-
# To execute this method, Numo::Linalg must be loaded.
|
|
94
|
-
#
|
|
95
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_samples])
|
|
96
|
-
# The kernel matrix of the training data to be used for fitting the model and transformed.
|
|
97
|
-
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
|
98
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
|
99
|
-
def fit_transform(x, y)
|
|
100
|
-
x = check_convert_sample_array(x)
|
|
101
|
-
y = check_convert_label_array(y)
|
|
102
|
-
check_sample_label_size(x, y)
|
|
103
|
-
fit(x, y).transform(x)
|
|
104
|
-
end
|
|
105
|
-
|
|
106
|
-
# Transform the given data with the learned model.
|
|
107
|
-
#
|
|
108
|
-
# @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
|
|
109
|
-
# The kernel matrix between testing samples and training samples to be transformed.
|
|
110
|
-
# @return [Numo::DFloat] (shape: [n_testing_samples, n_components]) The transformed data.
|
|
111
|
-
def transform(x)
|
|
112
|
-
x = check_convert_sample_array(x)
|
|
113
|
-
col_mean = x.sum(1) / @row_mean.shape[0]
|
|
114
|
-
centered_kernel_mat = x - col_mean.expand_dims(1) - @row_mean + @all_mean
|
|
115
|
-
transformed = centered_kernel_mat.dot(@alphas)
|
|
116
|
-
@params[:n_components] == 1 ? transformed[true, 0].dup : transformed
|
|
117
|
-
end
|
|
118
|
-
end
|
|
119
|
-
end
|
|
120
|
-
end
|