rumale 0.23.3 → 0.24.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE.txt +5 -1
- data/README.md +3 -288
- data/lib/rumale/version.rb +1 -1
- data/lib/rumale.rb +20 -131
- metadata +252 -150
- data/CHANGELOG.md +0 -643
- data/CODE_OF_CONDUCT.md +0 -74
- data/ext/rumale/extconf.rb +0 -37
- data/ext/rumale/rumaleext.c +0 -545
- data/ext/rumale/rumaleext.h +0 -12
- data/lib/rumale/base/base_estimator.rb +0 -49
- data/lib/rumale/base/classifier.rb +0 -36
- data/lib/rumale/base/cluster_analyzer.rb +0 -31
- data/lib/rumale/base/evaluator.rb +0 -17
- data/lib/rumale/base/regressor.rb +0 -36
- data/lib/rumale/base/splitter.rb +0 -21
- data/lib/rumale/base/transformer.rb +0 -22
- data/lib/rumale/clustering/dbscan.rb +0 -123
- data/lib/rumale/clustering/gaussian_mixture.rb +0 -218
- data/lib/rumale/clustering/hdbscan.rb +0 -291
- data/lib/rumale/clustering/k_means.rb +0 -122
- data/lib/rumale/clustering/k_medoids.rb +0 -141
- data/lib/rumale/clustering/mini_batch_k_means.rb +0 -139
- data/lib/rumale/clustering/power_iteration.rb +0 -127
- data/lib/rumale/clustering/single_linkage.rb +0 -203
- data/lib/rumale/clustering/snn.rb +0 -76
- data/lib/rumale/clustering/spectral_clustering.rb +0 -115
- data/lib/rumale/dataset.rb +0 -246
- data/lib/rumale/decomposition/factor_analysis.rb +0 -150
- data/lib/rumale/decomposition/fast_ica.rb +0 -188
- data/lib/rumale/decomposition/nmf.rb +0 -124
- data/lib/rumale/decomposition/pca.rb +0 -159
- data/lib/rumale/ensemble/ada_boost_classifier.rb +0 -179
- data/lib/rumale/ensemble/ada_boost_regressor.rb +0 -160
- data/lib/rumale/ensemble/extra_trees_classifier.rb +0 -139
- data/lib/rumale/ensemble/extra_trees_regressor.rb +0 -125
- data/lib/rumale/ensemble/gradient_boosting_classifier.rb +0 -306
- data/lib/rumale/ensemble/gradient_boosting_regressor.rb +0 -237
- data/lib/rumale/ensemble/random_forest_classifier.rb +0 -189
- data/lib/rumale/ensemble/random_forest_regressor.rb +0 -153
- data/lib/rumale/ensemble/stacking_classifier.rb +0 -215
- data/lib/rumale/ensemble/stacking_regressor.rb +0 -163
- data/lib/rumale/ensemble/voting_classifier.rb +0 -126
- data/lib/rumale/ensemble/voting_regressor.rb +0 -82
- data/lib/rumale/evaluation_measure/accuracy.rb +0 -29
- data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +0 -74
- data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +0 -56
- data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +0 -53
- data/lib/rumale/evaluation_measure/explained_variance_score.rb +0 -39
- data/lib/rumale/evaluation_measure/f_score.rb +0 -50
- data/lib/rumale/evaluation_measure/function.rb +0 -147
- data/lib/rumale/evaluation_measure/log_loss.rb +0 -45
- data/lib/rumale/evaluation_measure/mean_absolute_error.rb +0 -29
- data/lib/rumale/evaluation_measure/mean_squared_error.rb +0 -29
- data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +0 -29
- data/lib/rumale/evaluation_measure/median_absolute_error.rb +0 -30
- data/lib/rumale/evaluation_measure/mutual_information.rb +0 -49
- data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +0 -53
- data/lib/rumale/evaluation_measure/precision.rb +0 -50
- data/lib/rumale/evaluation_measure/precision_recall.rb +0 -96
- data/lib/rumale/evaluation_measure/purity.rb +0 -40
- data/lib/rumale/evaluation_measure/r2_score.rb +0 -43
- data/lib/rumale/evaluation_measure/recall.rb +0 -50
- data/lib/rumale/evaluation_measure/roc_auc.rb +0 -130
- data/lib/rumale/evaluation_measure/silhouette_score.rb +0 -82
- data/lib/rumale/feature_extraction/feature_hasher.rb +0 -110
- data/lib/rumale/feature_extraction/hash_vectorizer.rb +0 -155
- data/lib/rumale/feature_extraction/tfidf_transformer.rb +0 -113
- data/lib/rumale/kernel_approximation/nystroem.rb +0 -126
- data/lib/rumale/kernel_approximation/rbf.rb +0 -102
- data/lib/rumale/kernel_machine/kernel_fda.rb +0 -120
- data/lib/rumale/kernel_machine/kernel_pca.rb +0 -97
- data/lib/rumale/kernel_machine/kernel_ridge.rb +0 -82
- data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +0 -92
- data/lib/rumale/kernel_machine/kernel_svc.rb +0 -193
- data/lib/rumale/linear_model/base_sgd.rb +0 -285
- data/lib/rumale/linear_model/elastic_net.rb +0 -119
- data/lib/rumale/linear_model/lasso.rb +0 -115
- data/lib/rumale/linear_model/linear_regression.rb +0 -201
- data/lib/rumale/linear_model/logistic_regression.rb +0 -275
- data/lib/rumale/linear_model/nnls.rb +0 -137
- data/lib/rumale/linear_model/ridge.rb +0 -209
- data/lib/rumale/linear_model/svc.rb +0 -213
- data/lib/rumale/linear_model/svr.rb +0 -132
- data/lib/rumale/manifold/mds.rb +0 -155
- data/lib/rumale/manifold/tsne.rb +0 -222
- data/lib/rumale/metric_learning/fisher_discriminant_analysis.rb +0 -113
- data/lib/rumale/metric_learning/mlkr.rb +0 -161
- data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +0 -167
- data/lib/rumale/model_selection/cross_validation.rb +0 -125
- data/lib/rumale/model_selection/function.rb +0 -42
- data/lib/rumale/model_selection/grid_search_cv.rb +0 -225
- data/lib/rumale/model_selection/group_k_fold.rb +0 -93
- data/lib/rumale/model_selection/group_shuffle_split.rb +0 -115
- data/lib/rumale/model_selection/k_fold.rb +0 -81
- data/lib/rumale/model_selection/shuffle_split.rb +0 -90
- data/lib/rumale/model_selection/stratified_k_fold.rb +0 -99
- data/lib/rumale/model_selection/stratified_shuffle_split.rb +0 -118
- data/lib/rumale/model_selection/time_series_split.rb +0 -91
- data/lib/rumale/multiclass/one_vs_rest_classifier.rb +0 -83
- data/lib/rumale/naive_bayes/base_naive_bayes.rb +0 -47
- data/lib/rumale/naive_bayes/bernoulli_nb.rb +0 -82
- data/lib/rumale/naive_bayes/complement_nb.rb +0 -85
- data/lib/rumale/naive_bayes/gaussian_nb.rb +0 -69
- data/lib/rumale/naive_bayes/multinomial_nb.rb +0 -74
- data/lib/rumale/naive_bayes/negation_nb.rb +0 -71
- data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +0 -133
- data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +0 -108
- data/lib/rumale/nearest_neighbors/vp_tree.rb +0 -132
- data/lib/rumale/neural_network/adam.rb +0 -56
- data/lib/rumale/neural_network/base_mlp.rb +0 -248
- data/lib/rumale/neural_network/mlp_classifier.rb +0 -120
- data/lib/rumale/neural_network/mlp_regressor.rb +0 -90
- data/lib/rumale/pairwise_metric.rb +0 -152
- data/lib/rumale/pipeline/feature_union.rb +0 -69
- data/lib/rumale/pipeline/pipeline.rb +0 -175
- data/lib/rumale/preprocessing/bin_discretizer.rb +0 -93
- data/lib/rumale/preprocessing/binarizer.rb +0 -60
- data/lib/rumale/preprocessing/kernel_calculator.rb +0 -92
- data/lib/rumale/preprocessing/l1_normalizer.rb +0 -62
- data/lib/rumale/preprocessing/l2_normalizer.rb +0 -63
- data/lib/rumale/preprocessing/label_binarizer.rb +0 -89
- data/lib/rumale/preprocessing/label_encoder.rb +0 -79
- data/lib/rumale/preprocessing/max_abs_scaler.rb +0 -61
- data/lib/rumale/preprocessing/max_normalizer.rb +0 -62
- data/lib/rumale/preprocessing/min_max_scaler.rb +0 -76
- data/lib/rumale/preprocessing/one_hot_encoder.rb +0 -100
- data/lib/rumale/preprocessing/ordinal_encoder.rb +0 -109
- data/lib/rumale/preprocessing/polynomial_features.rb +0 -109
- data/lib/rumale/preprocessing/standard_scaler.rb +0 -71
- data/lib/rumale/probabilistic_output.rb +0 -114
- data/lib/rumale/tree/base_decision_tree.rb +0 -150
- data/lib/rumale/tree/decision_tree_classifier.rb +0 -150
- data/lib/rumale/tree/decision_tree_regressor.rb +0 -116
- data/lib/rumale/tree/extra_tree_classifier.rb +0 -107
- data/lib/rumale/tree/extra_tree_regressor.rb +0 -94
- data/lib/rumale/tree/gradient_tree_regressor.rb +0 -202
- data/lib/rumale/tree/node.rb +0 -39
- data/lib/rumale/utils.rb +0 -42
- data/lib/rumale/validation.rb +0 -128
- data/lib/rumale/values.rb +0 -13
@@ -1,155 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/base/base_estimator'
|
4
|
-
require 'rumale/base/transformer'
|
5
|
-
|
6
|
-
module Rumale
|
7
|
-
# This module consists of the classes that extract features from raw data.
|
8
|
-
module FeatureExtraction
|
9
|
-
# Encode array of feature-value hash to vectors.
|
10
|
-
# This encoder turns array of mappings (Array<Hash>) with pairs of feature names and values into Numo::NArray.
|
11
|
-
#
|
12
|
-
# @example
|
13
|
-
# encoder = Rumale::FeatureExtraction::HashVectorizer.new
|
14
|
-
# x = encoder.fit_transform([
|
15
|
-
# { foo: 1, bar: 2 },
|
16
|
-
# { foo: 3, baz: 1 }
|
17
|
-
# ])
|
18
|
-
# # > pp x
|
19
|
-
# # Numo::DFloat#shape=[2,3]
|
20
|
-
# # [[2, 0, 1],
|
21
|
-
# # [0, 1, 3]]
|
22
|
-
#
|
23
|
-
# x = encoder.fit_transform([
|
24
|
-
# { city: 'Dubai', temperature: 33 },
|
25
|
-
# { city: 'London', temperature: 12 },
|
26
|
-
# { city: 'San Francisco', temperature: 18 }
|
27
|
-
# ])
|
28
|
-
# # > pp x
|
29
|
-
# # Numo::DFloat#shape=[3,4]
|
30
|
-
# # [[1, 0, 0, 33],
|
31
|
-
# # [0, 1, 0, 12],
|
32
|
-
# # [0, 0, 1, 18]]
|
33
|
-
# # > pp encoder.inverse_transform(x)
|
34
|
-
# # [{:city=>"Dubai", :temperature=>33.0},
|
35
|
-
# # {:city=>"London", :temperature=>12.0},
|
36
|
-
# # {:city=>"San Francisco", :temperature=>18.0}]
|
37
|
-
class HashVectorizer
|
38
|
-
include Base::BaseEstimator
|
39
|
-
include Base::Transformer
|
40
|
-
|
41
|
-
# Return the list of feature names.
|
42
|
-
# @return [Array] (size: [n_features])
|
43
|
-
attr_reader :feature_names
|
44
|
-
|
45
|
-
# Return the hash consisting of pairs of feature names and indices.
|
46
|
-
# @return [Hash] (size: [n_features])
|
47
|
-
attr_reader :vocabulary
|
48
|
-
|
49
|
-
# Create a new encoder for converting array of hash consisting of feature names and values to vectors.
|
50
|
-
#
|
51
|
-
# @param separator [String] The separator string used for constructing new feature names for categorical feature.
|
52
|
-
# @param sort [Boolean] The flag indicating whether to sort feature names.
|
53
|
-
def initialize(separator: '=', sort: true)
|
54
|
-
check_params_string(separator: separator)
|
55
|
-
check_params_boolean(sort: sort)
|
56
|
-
@params = {}
|
57
|
-
@params[:separator] = separator
|
58
|
-
@params[:sort] = sort
|
59
|
-
end
|
60
|
-
|
61
|
-
# Fit the encoder with given training data.
|
62
|
-
#
|
63
|
-
# @overload fit(x) -> HashVectorizer
|
64
|
-
# @param x [Array<Hash>] (shape: [n_samples]) The array of hash consisting of feature names and values.
|
65
|
-
# @return [HashVectorizer]
|
66
|
-
def fit(x, _y = nil)
|
67
|
-
@feature_names = []
|
68
|
-
@vocabulary = {}
|
69
|
-
|
70
|
-
x.each do |f|
|
71
|
-
f.each do |k, v|
|
72
|
-
k = "#{k}#{separator}#{v}".to_sym if v.is_a?(String)
|
73
|
-
next if @vocabulary.key?(k)
|
74
|
-
|
75
|
-
@feature_names.push(k)
|
76
|
-
@vocabulary[k] = @vocabulary.size
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
if sort_feature?
|
81
|
-
@feature_names.sort!
|
82
|
-
@feature_names.each_with_index { |k, i| @vocabulary[k] = i }
|
83
|
-
end
|
84
|
-
|
85
|
-
self
|
86
|
-
end
|
87
|
-
|
88
|
-
# Fit the encoder with given training data, then return encoded data.
|
89
|
-
#
|
90
|
-
# @overload fit_transform(x) -> Numo::DFloat
|
91
|
-
# @param x [Array<Hash>] (shape: [n_samples]) The array of hash consisting of feature names and values.
|
92
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_features]) The encoded sample array.
|
93
|
-
def fit_transform(x, _y = nil)
|
94
|
-
fit(x).transform(x)
|
95
|
-
end
|
96
|
-
|
97
|
-
# Encode given the array of feature-value hash.
|
98
|
-
#
|
99
|
-
# @param x [Array<Hash>] (shape: [n_samples]) The array of hash consisting of feature names and values.
|
100
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_features]) The encoded sample array.
|
101
|
-
def transform(x)
|
102
|
-
x = [x] unless x.is_a?(Array)
|
103
|
-
n_samples = x.size
|
104
|
-
n_features = @vocabulary.size
|
105
|
-
z = Numo::DFloat.zeros(n_samples, n_features)
|
106
|
-
|
107
|
-
x.each_with_index do |f, i|
|
108
|
-
f.each do |k, v|
|
109
|
-
if v.is_a?(String)
|
110
|
-
k = "#{k}#{separator}#{v}".to_sym
|
111
|
-
v = 1
|
112
|
-
end
|
113
|
-
z[i, @vocabulary[k]] = v if @vocabulary.key?(k)
|
114
|
-
end
|
115
|
-
end
|
116
|
-
|
117
|
-
z
|
118
|
-
end
|
119
|
-
|
120
|
-
# Decode sample matirx to the array of feature-value hash.
|
121
|
-
#
|
122
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The encoded sample array.
|
123
|
-
# @return [Array<Hash>] The array of hash consisting of feature names and values.
|
124
|
-
def inverse_transform(x)
|
125
|
-
n_samples = x.shape[0]
|
126
|
-
reconst = []
|
127
|
-
|
128
|
-
n_samples.times do |i|
|
129
|
-
f = {}
|
130
|
-
x[i, true].each_with_index do |el, j|
|
131
|
-
feature_key_val(@feature_names[j], el).tap { |k, v| f[k.to_sym] = v } unless el.zero?
|
132
|
-
end
|
133
|
-
reconst.push(f)
|
134
|
-
end
|
135
|
-
|
136
|
-
reconst
|
137
|
-
end
|
138
|
-
|
139
|
-
private
|
140
|
-
|
141
|
-
def feature_key_val(fname, fval)
|
142
|
-
f = fname.to_s.split(separator)
|
143
|
-
f.size == 2 ? f : [fname, fval]
|
144
|
-
end
|
145
|
-
|
146
|
-
def separator
|
147
|
-
@params[:separator]
|
148
|
-
end
|
149
|
-
|
150
|
-
def sort_feature?
|
151
|
-
@params[:sort]
|
152
|
-
end
|
153
|
-
end
|
154
|
-
end
|
155
|
-
end
|
@@ -1,113 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/base/base_estimator'
|
4
|
-
require 'rumale/base/transformer'
|
5
|
-
require 'rumale/preprocessing/l1_normalizer'
|
6
|
-
require 'rumale/preprocessing/l2_normalizer'
|
7
|
-
|
8
|
-
module Rumale
|
9
|
-
module FeatureExtraction
|
10
|
-
# Transform sample matrix with term frequecy (tf) to a normalized tf-idf (inverse document frequency) reprensentation.
|
11
|
-
#
|
12
|
-
# @example
|
13
|
-
# encoder = Rumale::FeatureExtraction::HashVectorizer.new
|
14
|
-
# x = encoder.fit_transform([
|
15
|
-
# { foo: 1, bar: 2 },
|
16
|
-
# { foo: 3, baz: 1 }
|
17
|
-
# ])
|
18
|
-
#
|
19
|
-
# # > pp x
|
20
|
-
# # Numo::DFloat#shape=[2,3]
|
21
|
-
# # [[2, 0, 1],
|
22
|
-
# # [0, 1, 3]]
|
23
|
-
#
|
24
|
-
# transformer = Rumale::FeatureExtraction::TfidfTransformer.new
|
25
|
-
# x_tfidf = transformer.fit_transform(x)
|
26
|
-
#
|
27
|
-
# # > pp x_tfidf
|
28
|
-
# # Numo::DFloat#shape=[2,3]
|
29
|
-
# # [[0.959056, 0, 0.283217],
|
30
|
-
# # [0, 0.491506, 0.870874]]
|
31
|
-
#
|
32
|
-
# *Reference*
|
33
|
-
# - Manning, C D., Raghavan, P., and Schutze, H., "Introduction to Information Retrieval," Cambridge University Press., 2008.
|
34
|
-
class TfidfTransformer
|
35
|
-
include Base::BaseEstimator
|
36
|
-
include Base::Transformer
|
37
|
-
|
38
|
-
# Return the vector consists of inverse document frequency.
|
39
|
-
# @return [Numo::DFloat] (shape: [n_features])
|
40
|
-
attr_reader :idf
|
41
|
-
|
42
|
-
# Create a new transfomer for converting tf vectors to tf-idf vectors.
|
43
|
-
#
|
44
|
-
# @param norm [String] The normalization method to be used ('l1', 'l2' and 'none').
|
45
|
-
# @param use_idf [Boolean] The flag indicating whether to use inverse document frequency weighting.
|
46
|
-
# @param smooth_idf [Boolean] The flag indicating whether to apply idf smoothing by log((n_samples + 1) / (df + 1)) + 1.
|
47
|
-
# @param sublinear_tf [Boolean] The flag indicating whether to perform subliner tf scaling by 1 + log(tf).
|
48
|
-
def initialize(norm: 'l2', use_idf: true, smooth_idf: false, sublinear_tf: false)
|
49
|
-
check_params_string(norm: norm)
|
50
|
-
check_params_boolean(use_idf: use_idf, smooth_idf: smooth_idf, sublinear_tf: sublinear_tf)
|
51
|
-
@params = {}
|
52
|
-
@params[:norm] = norm
|
53
|
-
@params[:use_idf] = use_idf
|
54
|
-
@params[:smooth_idf] = smooth_idf
|
55
|
-
@params[:sublinear_tf] = sublinear_tf
|
56
|
-
@idf = nil
|
57
|
-
end
|
58
|
-
|
59
|
-
# Calculate the inverse document frequency for weighting.
|
60
|
-
#
|
61
|
-
# @overload fit(x) -> TfidfTransformer
|
62
|
-
#
|
63
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the idf values.
|
64
|
-
# @return [TfidfTransformer]
|
65
|
-
def fit(x, _y = nil)
|
66
|
-
return self unless @params[:use_idf]
|
67
|
-
|
68
|
-
x = check_convert_sample_array(x)
|
69
|
-
|
70
|
-
n_samples = x.shape[0]
|
71
|
-
df = x.class.cast(x.gt(0.0).count(0))
|
72
|
-
|
73
|
-
if @params[:smooth_idf]
|
74
|
-
df += 1
|
75
|
-
n_samples += 1
|
76
|
-
end
|
77
|
-
|
78
|
-
@idf = Numo::NMath.log(n_samples / df) + 1
|
79
|
-
|
80
|
-
self
|
81
|
-
end
|
82
|
-
|
83
|
-
# Calculate the idf values, and then transfrom samples to the tf-idf representation.
|
84
|
-
#
|
85
|
-
# @overload fit_transform(x) -> Numo::DFloat
|
86
|
-
#
|
87
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate idf and be transformed to tf-idf representation.
|
88
|
-
# @return [Numo::DFloat] The transformed samples.
|
89
|
-
def fit_transform(x, _y = nil)
|
90
|
-
fit(x).transform(x)
|
91
|
-
end
|
92
|
-
|
93
|
-
# Perform transforming the given samples to the tf-idf representation.
|
94
|
-
#
|
95
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed.
|
96
|
-
# @return [Numo::DFloat] The transformed samples.
|
97
|
-
def transform(x)
|
98
|
-
x = check_convert_sample_array(x)
|
99
|
-
z = x.dup
|
100
|
-
|
101
|
-
z[z.ne(0)] = Numo::NMath.log(z[z.ne(0)]) + 1 if @params[:sublinear_tf]
|
102
|
-
z *= @idf if @params[:use_idf]
|
103
|
-
case @params[:norm]
|
104
|
-
when 'l2'
|
105
|
-
z = Rumale::Preprocessing::L2Normalizer.new.fit_transform(z)
|
106
|
-
when 'l1'
|
107
|
-
z = Rumale::Preprocessing::L1Normalizer.new.fit_transform(z)
|
108
|
-
end
|
109
|
-
z
|
110
|
-
end
|
111
|
-
end
|
112
|
-
end
|
113
|
-
end
|
@@ -1,126 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/base/base_estimator'
|
4
|
-
require 'rumale/base/transformer'
|
5
|
-
require 'rumale/pairwise_metric'
|
6
|
-
|
7
|
-
module Rumale
|
8
|
-
module KernelApproximation
|
9
|
-
# Nystroem is a class that implements feature mapping with Nystroem method.
|
10
|
-
#
|
11
|
-
# @example
|
12
|
-
# require 'numo/linalg/autoloader'
|
13
|
-
#
|
14
|
-
# transformer = Rumale::KernelApproximation::Nystroem.new(kernel: 'rbf', gamma: 1, n_components: 128, random_seed: 1)
|
15
|
-
# new_training_samples = transformer.fit_transform(training_samples)
|
16
|
-
# new_testing_samples = transformer.transform(testing_samples)
|
17
|
-
#
|
18
|
-
# *Reference*
|
19
|
-
# - Yang, T., Li, Y., Mahdavi, M., Jin, R., and Zhou, Z-H., "Nystrom Method vs Random Fourier Features: A Theoretical and Empirical Comparison," Advances in NIPS'12, Vol. 1, pp. 476--484, 2012.
|
20
|
-
class Nystroem
|
21
|
-
include Base::BaseEstimator
|
22
|
-
include Base::Transformer
|
23
|
-
|
24
|
-
# Returns the randomly sampled training data for feature mapping.
|
25
|
-
# @return [Numo::DFloat] (shape: n_components, n_features])
|
26
|
-
attr_reader :components
|
27
|
-
|
28
|
-
# Returns the indices sampled training data.
|
29
|
-
# @return [Numo::Int32] (shape: [n_components])
|
30
|
-
attr_reader :component_indices
|
31
|
-
|
32
|
-
# Returns the normalizing factors.
|
33
|
-
# @return [Numo::DFloat] (shape: [n_components, n_components])
|
34
|
-
attr_reader :normalizer
|
35
|
-
|
36
|
-
# Return the random generator for transformation.
|
37
|
-
# @return [Random]
|
38
|
-
attr_reader :rng
|
39
|
-
|
40
|
-
# Create a new transformer for mapping to kernel feature space with Nystrom method.
|
41
|
-
#
|
42
|
-
# @param kernel [String] The type of kernel function ('rbf', 'linear', 'poly', and 'sigmoid)
|
43
|
-
# @param gamma [Float] The gamma parameter in rbf/poly/sigmoid kernel function.
|
44
|
-
# @param degree [Integer] The degree parameter in polynomial kernel function.
|
45
|
-
# @param coef [Float] The coefficient in poly/sigmoid kernel function.
|
46
|
-
# @param n_components [Integer] The number of dimensions of the kernel feature space.
|
47
|
-
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
48
|
-
def initialize(kernel: 'rbf', gamma: 1, degree: 3, coef: 1, n_components: 100, random_seed: nil)
|
49
|
-
check_params_string(kernel: kernel)
|
50
|
-
check_params_numeric(gamma: gamma, coef: coef, degree: degree, n_components: n_components)
|
51
|
-
check_params_numeric_or_nil(random_seed: random_seed)
|
52
|
-
@params = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h
|
53
|
-
@params[:random_seed] ||= srand
|
54
|
-
@rng = Random.new(@params[:random_seed])
|
55
|
-
@component_indices = nil
|
56
|
-
@components = nil
|
57
|
-
@normalizer = nil
|
58
|
-
end
|
59
|
-
|
60
|
-
# Fit the model with given training data.
|
61
|
-
#
|
62
|
-
# @overload fit(x) -> Nystroem
|
63
|
-
# @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
64
|
-
# @return [Nystroem] The learned transformer itself.
|
65
|
-
def fit(x, _y = nil)
|
66
|
-
x = check_convert_sample_array(x)
|
67
|
-
raise 'Nystroem#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
|
68
|
-
|
69
|
-
# initialize some variables.
|
70
|
-
sub_rng = @rng.dup
|
71
|
-
n_samples = x.shape[0]
|
72
|
-
n_components = [1, [@params[:n_components], n_samples].min].max
|
73
|
-
|
74
|
-
# random sampling.
|
75
|
-
@component_indices = Numo::Int32.cast(Array(0...n_samples).shuffle(random: sub_rng)[0...n_components])
|
76
|
-
@components = x[@component_indices, true].dup
|
77
|
-
|
78
|
-
# calculate normalizing factor.
|
79
|
-
kernel_mat = kernel_mat(@components)
|
80
|
-
eig_vals, eig_vecs = Numo::Linalg.eigh(kernel_mat)
|
81
|
-
la = eig_vals.class.maximum(eig_vals.reverse, 1e-12)
|
82
|
-
u = eig_vecs.reverse(1)
|
83
|
-
@normalizer = u.dot((1.0 / Numo::NMath.sqrt(la)).diag)
|
84
|
-
|
85
|
-
self
|
86
|
-
end
|
87
|
-
|
88
|
-
# Fit the model with training data, and then transform them with the learned model.
|
89
|
-
#
|
90
|
-
# @overload fit_transform(x) -> Numo::DFloat
|
91
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
92
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
93
|
-
def fit_transform(x, _y = nil)
|
94
|
-
x = check_convert_sample_array(x)
|
95
|
-
fit(x).transform(x)
|
96
|
-
end
|
97
|
-
|
98
|
-
# Transform the given data with the learned model.
|
99
|
-
#
|
100
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
|
101
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
102
|
-
def transform(x)
|
103
|
-
x = check_convert_sample_array(x)
|
104
|
-
z = kernel_mat(x, @components)
|
105
|
-
z.dot(@normalizer)
|
106
|
-
end
|
107
|
-
|
108
|
-
private
|
109
|
-
|
110
|
-
def kernel_mat(x, y = nil)
|
111
|
-
case @params[:kernel]
|
112
|
-
when 'rbf'
|
113
|
-
Rumale::PairwiseMetric.rbf_kernel(x, y, @params[:gamma])
|
114
|
-
when 'poly'
|
115
|
-
Rumale::PairwiseMetric.polynomial_kernel(x, y, @params[:degree], @params[:gamma], @params[:coef])
|
116
|
-
when 'sigmoid'
|
117
|
-
Rumale::PairwiseMetric.sigmoid_kernel(x, y, @params[:gamma], @params[:coef])
|
118
|
-
when 'linear'
|
119
|
-
Rumale::PairwiseMetric.linear_kernel(x, y)
|
120
|
-
else
|
121
|
-
raise ArgumentError, "Expect kernel parameter to be given 'rbf', 'linear', 'poly', or 'sigmoid'."
|
122
|
-
end
|
123
|
-
end
|
124
|
-
end
|
125
|
-
end
|
126
|
-
end
|
@@ -1,102 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/utils'
|
4
|
-
require 'rumale/base/base_estimator'
|
5
|
-
require 'rumale/base/transformer'
|
6
|
-
|
7
|
-
module Rumale
|
8
|
-
# Module for kernel approximation algorithms.
|
9
|
-
module KernelApproximation
|
10
|
-
# Class for RBF kernel feature mapping.
|
11
|
-
#
|
12
|
-
# @example
|
13
|
-
# transformer = Rumale::KernelApproximation::RBF.new(gamma: 1.0, n_components: 128, random_seed: 1)
|
14
|
-
# new_training_samples = transformer.fit_transform(training_samples)
|
15
|
-
# new_testing_samples = transformer.transform(testing_samples)
|
16
|
-
#
|
17
|
-
# *Refernce*:
|
18
|
-
# - Rahimi, A., and Recht, B., "Random Features for Large-Scale Kernel Machines," Proc. NIPS'07, pp.1177--1184, 2007.
|
19
|
-
class RBF
|
20
|
-
include Base::BaseEstimator
|
21
|
-
include Base::Transformer
|
22
|
-
|
23
|
-
# Return the random matrix for transformation.
|
24
|
-
# @return [Numo::DFloat] (shape: [n_features, n_components])
|
25
|
-
attr_reader :random_mat
|
26
|
-
|
27
|
-
# Return the random vector for transformation.
|
28
|
-
# @return [Numo::DFloat] (shape: [n_components])
|
29
|
-
attr_reader :random_vec
|
30
|
-
|
31
|
-
# Return the random generator for transformation.
|
32
|
-
# @return [Random]
|
33
|
-
attr_reader :rng
|
34
|
-
|
35
|
-
# Create a new transformer for mapping to RBF kernel feature space.
|
36
|
-
#
|
37
|
-
# @param gamma [Float] The parameter of RBF kernel: exp(-gamma * x^2).
|
38
|
-
# @param n_components [Integer] The number of dimensions of the RBF kernel feature space.
|
39
|
-
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
40
|
-
def initialize(gamma: 1.0, n_components: 128, random_seed: nil)
|
41
|
-
check_params_numeric(gamma: gamma, n_components: n_components)
|
42
|
-
check_params_numeric_or_nil(random_seed: random_seed)
|
43
|
-
check_params_positive(gamma: gamma, n_components: n_components)
|
44
|
-
@params = {}
|
45
|
-
@params[:gamma] = gamma
|
46
|
-
@params[:n_components] = n_components
|
47
|
-
@params[:random_seed] = random_seed
|
48
|
-
@params[:random_seed] ||= srand
|
49
|
-
@random_mat = nil
|
50
|
-
@random_vec = nil
|
51
|
-
@rng = Random.new(@params[:random_seed])
|
52
|
-
end
|
53
|
-
|
54
|
-
# Fit the model with given training data.
|
55
|
-
#
|
56
|
-
# @overload fit(x) -> RBF
|
57
|
-
#
|
58
|
-
# @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
59
|
-
# This method uses only the number of features of the data.
|
60
|
-
# @return [RBF] The learned transformer itself.
|
61
|
-
def fit(x, _y = nil)
|
62
|
-
x = check_convert_sample_array(x)
|
63
|
-
|
64
|
-
n_features = x.shape[1]
|
65
|
-
sub_rng = @rng.dup
|
66
|
-
@params[:n_components] = 2 * n_features if @params[:n_components] <= 0
|
67
|
-
@random_mat = Rumale::Utils.rand_normal([n_features, @params[:n_components]], sub_rng) * (2.0 * @params[:gamma])**0.5
|
68
|
-
n_half_components = @params[:n_components] / 2
|
69
|
-
@random_vec = Numo::DFloat.zeros(@params[:n_components] - n_half_components).concatenate(
|
70
|
-
Numo::DFloat.ones(n_half_components) * (0.5 * Math::PI)
|
71
|
-
)
|
72
|
-
self
|
73
|
-
end
|
74
|
-
|
75
|
-
# Fit the model with training data, and then transform them with the learned model.
|
76
|
-
#
|
77
|
-
# @overload fit_transform(x) -> Numo::DFloat
|
78
|
-
#
|
79
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
80
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
81
|
-
def fit_transform(x, _y = nil)
|
82
|
-
x = check_convert_sample_array(x)
|
83
|
-
|
84
|
-
fit(x).transform(x)
|
85
|
-
end
|
86
|
-
|
87
|
-
# Transform the given data with the learned model.
|
88
|
-
#
|
89
|
-
# @overload transform(x) -> Numo::DFloat
|
90
|
-
#
|
91
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
|
92
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
93
|
-
def transform(x)
|
94
|
-
x = check_convert_sample_array(x)
|
95
|
-
|
96
|
-
n_samples, = x.shape
|
97
|
-
projection = x.dot(@random_mat) + @random_vec.tile(n_samples, 1)
|
98
|
-
Numo::NMath.sin(projection) * ((2.0 / @params[:n_components])**0.5)
|
99
|
-
end
|
100
|
-
end
|
101
|
-
end
|
102
|
-
end
|
@@ -1,120 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/base/base_estimator'
|
4
|
-
require 'rumale/base/transformer'
|
5
|
-
|
6
|
-
module Rumale
|
7
|
-
module KernelMachine
|
8
|
-
# KernelFDA is a class that implements Kernel Fisher Discriminant Analysis.
|
9
|
-
#
|
10
|
-
# @example
|
11
|
-
# require 'numo/linalg/autoloader'
|
12
|
-
#
|
13
|
-
# kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(x_train)
|
14
|
-
# kfda = Rumale::KernelMachine::KernelFDA.new
|
15
|
-
# mapped_traininig_samples = kfda.fit_transform(kernel_mat_train, y)
|
16
|
-
#
|
17
|
-
# kernel_mat_test = Rumale::PairwiseMetric::rbf_kernel(x_test, x_train)
|
18
|
-
# mapped_test_samples = kfda.transform(kernel_mat_test)
|
19
|
-
#
|
20
|
-
# *Reference*
|
21
|
-
# - Baudat, G., and Anouar, F., "Generalized Discriminant Analysis using a Kernel Approach," Neural Computation, vol. 12, pp. 2385--2404, 2000.
|
22
|
-
class KernelFDA
|
23
|
-
include Base::BaseEstimator
|
24
|
-
include Base::Transformer
|
25
|
-
|
26
|
-
# Returns the eigenvectors for embedding.
|
27
|
-
# @return [Numo::DFloat] (shape: [n_training_sampes, n_components])
|
28
|
-
attr_reader :alphas
|
29
|
-
|
30
|
-
# Create a new transformer with Kernel FDA.
|
31
|
-
#
|
32
|
-
# @param n_components [Integer] The number of components.
|
33
|
-
# @param reg_param [Float] The regularization parameter.
|
34
|
-
def initialize(n_components: nil, reg_param: 1e-8)
|
35
|
-
check_params_numeric_or_nil(n_components: n_components)
|
36
|
-
check_params_numeric(reg_param: reg_param)
|
37
|
-
@params = {}
|
38
|
-
@params[:n_components] = n_components
|
39
|
-
@params[:reg_param] = reg_param
|
40
|
-
@alphas = nil
|
41
|
-
@row_mean = nil
|
42
|
-
@all_mean = nil
|
43
|
-
end
|
44
|
-
|
45
|
-
# Fit the model with given training data.
|
46
|
-
# To execute this method, Numo::Linalg must be loaded.
|
47
|
-
#
|
48
|
-
# @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
|
49
|
-
# The kernel matrix of the training data to be used for fitting the model.
|
50
|
-
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
51
|
-
# @return [KernelFDA] The learned transformer itself.
|
52
|
-
def fit(x, y)
|
53
|
-
x = check_convert_sample_array(x)
|
54
|
-
y = check_convert_label_array(y)
|
55
|
-
check_sample_label_size(x, y)
|
56
|
-
raise ArgumentError, 'Expect the kernel matrix of training data to be square.' unless x.shape[0] == x.shape[1]
|
57
|
-
raise 'KernelFDA#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
|
58
|
-
|
59
|
-
# initialize some variables.
|
60
|
-
n_samples = x.shape[0]
|
61
|
-
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
62
|
-
n_classes = @classes.size
|
63
|
-
n_components = if @params[:n_components].nil?
|
64
|
-
[n_samples, n_classes - 1].min
|
65
|
-
else
|
66
|
-
[n_samples, @params[:n_components]].min
|
67
|
-
end
|
68
|
-
|
69
|
-
# centering
|
70
|
-
@row_mean = x.mean(0)
|
71
|
-
@all_mean = @row_mean.sum.fdiv(n_samples)
|
72
|
-
centered_kernel_mat = x - x.mean(1).expand_dims(1) - @row_mean + @all_mean
|
73
|
-
|
74
|
-
# calculate between and within scatter matrix.
|
75
|
-
class_mat = Numo::DFloat.zeros(n_samples, n_samples)
|
76
|
-
@classes.each do |label|
|
77
|
-
idx_vec = y.eq(label)
|
78
|
-
class_mat += Numo::DFloat.cast(idx_vec).outer(idx_vec) / idx_vec.count
|
79
|
-
end
|
80
|
-
between_mat = centered_kernel_mat.dot(class_mat).dot(centered_kernel_mat.transpose)
|
81
|
-
within_mat = centered_kernel_mat.dot(centered_kernel_mat.transpose) + @params[:reg_param] * Numo::DFloat.eye(n_samples)
|
82
|
-
|
83
|
-
# calculate projection matrix.
|
84
|
-
_, eig_vecs = Numo::Linalg.eigh(
|
85
|
-
between_mat, within_mat,
|
86
|
-
vals_range: (n_samples - n_components)...n_samples
|
87
|
-
)
|
88
|
-
@alphas = eig_vecs.reverse(1).dup
|
89
|
-
self
|
90
|
-
end
|
91
|
-
|
92
|
-
# Fit the model with training data, and then transform them with the learned model.
|
93
|
-
# To execute this method, Numo::Linalg must be loaded.
|
94
|
-
#
|
95
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_samples])
|
96
|
-
# The kernel matrix of the training data to be used for fitting the model and transformed.
|
97
|
-
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
98
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
99
|
-
def fit_transform(x, y)
|
100
|
-
x = check_convert_sample_array(x)
|
101
|
-
y = check_convert_label_array(y)
|
102
|
-
check_sample_label_size(x, y)
|
103
|
-
fit(x, y).transform(x)
|
104
|
-
end
|
105
|
-
|
106
|
-
# Transform the given data with the learned model.
|
107
|
-
#
|
108
|
-
# @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
|
109
|
-
# The kernel matrix between testing samples and training samples to be transformed.
|
110
|
-
# @return [Numo::DFloat] (shape: [n_testing_samples, n_components]) The transformed data.
|
111
|
-
def transform(x)
|
112
|
-
x = check_convert_sample_array(x)
|
113
|
-
col_mean = x.sum(1) / @row_mean.shape[0]
|
114
|
-
centered_kernel_mat = x - col_mean.expand_dims(1) - @row_mean + @all_mean
|
115
|
-
transformed = centered_kernel_mat.dot(@alphas)
|
116
|
-
@params[:n_components] == 1 ? transformed[true, 0].dup : transformed
|
117
|
-
end
|
118
|
-
end
|
119
|
-
end
|
120
|
-
end
|