rumale 0.23.3 → 0.24.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.txt +5 -1
  3. data/README.md +3 -288
  4. data/lib/rumale/version.rb +1 -1
  5. data/lib/rumale.rb +20 -131
  6. metadata +252 -150
  7. data/CHANGELOG.md +0 -643
  8. data/CODE_OF_CONDUCT.md +0 -74
  9. data/ext/rumale/extconf.rb +0 -37
  10. data/ext/rumale/rumaleext.c +0 -545
  11. data/ext/rumale/rumaleext.h +0 -12
  12. data/lib/rumale/base/base_estimator.rb +0 -49
  13. data/lib/rumale/base/classifier.rb +0 -36
  14. data/lib/rumale/base/cluster_analyzer.rb +0 -31
  15. data/lib/rumale/base/evaluator.rb +0 -17
  16. data/lib/rumale/base/regressor.rb +0 -36
  17. data/lib/rumale/base/splitter.rb +0 -21
  18. data/lib/rumale/base/transformer.rb +0 -22
  19. data/lib/rumale/clustering/dbscan.rb +0 -123
  20. data/lib/rumale/clustering/gaussian_mixture.rb +0 -218
  21. data/lib/rumale/clustering/hdbscan.rb +0 -291
  22. data/lib/rumale/clustering/k_means.rb +0 -122
  23. data/lib/rumale/clustering/k_medoids.rb +0 -141
  24. data/lib/rumale/clustering/mini_batch_k_means.rb +0 -139
  25. data/lib/rumale/clustering/power_iteration.rb +0 -127
  26. data/lib/rumale/clustering/single_linkage.rb +0 -203
  27. data/lib/rumale/clustering/snn.rb +0 -76
  28. data/lib/rumale/clustering/spectral_clustering.rb +0 -115
  29. data/lib/rumale/dataset.rb +0 -246
  30. data/lib/rumale/decomposition/factor_analysis.rb +0 -150
  31. data/lib/rumale/decomposition/fast_ica.rb +0 -188
  32. data/lib/rumale/decomposition/nmf.rb +0 -124
  33. data/lib/rumale/decomposition/pca.rb +0 -159
  34. data/lib/rumale/ensemble/ada_boost_classifier.rb +0 -179
  35. data/lib/rumale/ensemble/ada_boost_regressor.rb +0 -160
  36. data/lib/rumale/ensemble/extra_trees_classifier.rb +0 -139
  37. data/lib/rumale/ensemble/extra_trees_regressor.rb +0 -125
  38. data/lib/rumale/ensemble/gradient_boosting_classifier.rb +0 -306
  39. data/lib/rumale/ensemble/gradient_boosting_regressor.rb +0 -237
  40. data/lib/rumale/ensemble/random_forest_classifier.rb +0 -189
  41. data/lib/rumale/ensemble/random_forest_regressor.rb +0 -153
  42. data/lib/rumale/ensemble/stacking_classifier.rb +0 -215
  43. data/lib/rumale/ensemble/stacking_regressor.rb +0 -163
  44. data/lib/rumale/ensemble/voting_classifier.rb +0 -126
  45. data/lib/rumale/ensemble/voting_regressor.rb +0 -82
  46. data/lib/rumale/evaluation_measure/accuracy.rb +0 -29
  47. data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +0 -74
  48. data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +0 -56
  49. data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +0 -53
  50. data/lib/rumale/evaluation_measure/explained_variance_score.rb +0 -39
  51. data/lib/rumale/evaluation_measure/f_score.rb +0 -50
  52. data/lib/rumale/evaluation_measure/function.rb +0 -147
  53. data/lib/rumale/evaluation_measure/log_loss.rb +0 -45
  54. data/lib/rumale/evaluation_measure/mean_absolute_error.rb +0 -29
  55. data/lib/rumale/evaluation_measure/mean_squared_error.rb +0 -29
  56. data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +0 -29
  57. data/lib/rumale/evaluation_measure/median_absolute_error.rb +0 -30
  58. data/lib/rumale/evaluation_measure/mutual_information.rb +0 -49
  59. data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +0 -53
  60. data/lib/rumale/evaluation_measure/precision.rb +0 -50
  61. data/lib/rumale/evaluation_measure/precision_recall.rb +0 -96
  62. data/lib/rumale/evaluation_measure/purity.rb +0 -40
  63. data/lib/rumale/evaluation_measure/r2_score.rb +0 -43
  64. data/lib/rumale/evaluation_measure/recall.rb +0 -50
  65. data/lib/rumale/evaluation_measure/roc_auc.rb +0 -130
  66. data/lib/rumale/evaluation_measure/silhouette_score.rb +0 -82
  67. data/lib/rumale/feature_extraction/feature_hasher.rb +0 -110
  68. data/lib/rumale/feature_extraction/hash_vectorizer.rb +0 -155
  69. data/lib/rumale/feature_extraction/tfidf_transformer.rb +0 -113
  70. data/lib/rumale/kernel_approximation/nystroem.rb +0 -126
  71. data/lib/rumale/kernel_approximation/rbf.rb +0 -102
  72. data/lib/rumale/kernel_machine/kernel_fda.rb +0 -120
  73. data/lib/rumale/kernel_machine/kernel_pca.rb +0 -97
  74. data/lib/rumale/kernel_machine/kernel_ridge.rb +0 -82
  75. data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +0 -92
  76. data/lib/rumale/kernel_machine/kernel_svc.rb +0 -193
  77. data/lib/rumale/linear_model/base_sgd.rb +0 -285
  78. data/lib/rumale/linear_model/elastic_net.rb +0 -119
  79. data/lib/rumale/linear_model/lasso.rb +0 -115
  80. data/lib/rumale/linear_model/linear_regression.rb +0 -201
  81. data/lib/rumale/linear_model/logistic_regression.rb +0 -275
  82. data/lib/rumale/linear_model/nnls.rb +0 -137
  83. data/lib/rumale/linear_model/ridge.rb +0 -209
  84. data/lib/rumale/linear_model/svc.rb +0 -213
  85. data/lib/rumale/linear_model/svr.rb +0 -132
  86. data/lib/rumale/manifold/mds.rb +0 -155
  87. data/lib/rumale/manifold/tsne.rb +0 -222
  88. data/lib/rumale/metric_learning/fisher_discriminant_analysis.rb +0 -113
  89. data/lib/rumale/metric_learning/mlkr.rb +0 -161
  90. data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +0 -167
  91. data/lib/rumale/model_selection/cross_validation.rb +0 -125
  92. data/lib/rumale/model_selection/function.rb +0 -42
  93. data/lib/rumale/model_selection/grid_search_cv.rb +0 -225
  94. data/lib/rumale/model_selection/group_k_fold.rb +0 -93
  95. data/lib/rumale/model_selection/group_shuffle_split.rb +0 -115
  96. data/lib/rumale/model_selection/k_fold.rb +0 -81
  97. data/lib/rumale/model_selection/shuffle_split.rb +0 -90
  98. data/lib/rumale/model_selection/stratified_k_fold.rb +0 -99
  99. data/lib/rumale/model_selection/stratified_shuffle_split.rb +0 -118
  100. data/lib/rumale/model_selection/time_series_split.rb +0 -91
  101. data/lib/rumale/multiclass/one_vs_rest_classifier.rb +0 -83
  102. data/lib/rumale/naive_bayes/base_naive_bayes.rb +0 -47
  103. data/lib/rumale/naive_bayes/bernoulli_nb.rb +0 -82
  104. data/lib/rumale/naive_bayes/complement_nb.rb +0 -85
  105. data/lib/rumale/naive_bayes/gaussian_nb.rb +0 -69
  106. data/lib/rumale/naive_bayes/multinomial_nb.rb +0 -74
  107. data/lib/rumale/naive_bayes/negation_nb.rb +0 -71
  108. data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +0 -133
  109. data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +0 -108
  110. data/lib/rumale/nearest_neighbors/vp_tree.rb +0 -132
  111. data/lib/rumale/neural_network/adam.rb +0 -56
  112. data/lib/rumale/neural_network/base_mlp.rb +0 -248
  113. data/lib/rumale/neural_network/mlp_classifier.rb +0 -120
  114. data/lib/rumale/neural_network/mlp_regressor.rb +0 -90
  115. data/lib/rumale/pairwise_metric.rb +0 -152
  116. data/lib/rumale/pipeline/feature_union.rb +0 -69
  117. data/lib/rumale/pipeline/pipeline.rb +0 -175
  118. data/lib/rumale/preprocessing/bin_discretizer.rb +0 -93
  119. data/lib/rumale/preprocessing/binarizer.rb +0 -60
  120. data/lib/rumale/preprocessing/kernel_calculator.rb +0 -92
  121. data/lib/rumale/preprocessing/l1_normalizer.rb +0 -62
  122. data/lib/rumale/preprocessing/l2_normalizer.rb +0 -63
  123. data/lib/rumale/preprocessing/label_binarizer.rb +0 -89
  124. data/lib/rumale/preprocessing/label_encoder.rb +0 -79
  125. data/lib/rumale/preprocessing/max_abs_scaler.rb +0 -61
  126. data/lib/rumale/preprocessing/max_normalizer.rb +0 -62
  127. data/lib/rumale/preprocessing/min_max_scaler.rb +0 -76
  128. data/lib/rumale/preprocessing/one_hot_encoder.rb +0 -100
  129. data/lib/rumale/preprocessing/ordinal_encoder.rb +0 -109
  130. data/lib/rumale/preprocessing/polynomial_features.rb +0 -109
  131. data/lib/rumale/preprocessing/standard_scaler.rb +0 -71
  132. data/lib/rumale/probabilistic_output.rb +0 -114
  133. data/lib/rumale/tree/base_decision_tree.rb +0 -150
  134. data/lib/rumale/tree/decision_tree_classifier.rb +0 -150
  135. data/lib/rumale/tree/decision_tree_regressor.rb +0 -116
  136. data/lib/rumale/tree/extra_tree_classifier.rb +0 -107
  137. data/lib/rumale/tree/extra_tree_regressor.rb +0 -94
  138. data/lib/rumale/tree/gradient_tree_regressor.rb +0 -202
  139. data/lib/rumale/tree/node.rb +0 -39
  140. data/lib/rumale/utils.rb +0 -42
  141. data/lib/rumale/validation.rb +0 -128
  142. data/lib/rumale/values.rb +0 -13
@@ -1,155 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/transformer'
5
-
6
- module Rumale
7
- # This module consists of the classes that extract features from raw data.
8
- module FeatureExtraction
9
- # Encode array of feature-value hash to vectors.
10
- # This encoder turns array of mappings (Array<Hash>) with pairs of feature names and values into Numo::NArray.
11
- #
12
- # @example
13
- # encoder = Rumale::FeatureExtraction::HashVectorizer.new
14
- # x = encoder.fit_transform([
15
- # { foo: 1, bar: 2 },
16
- # { foo: 3, baz: 1 }
17
- # ])
18
- # # > pp x
19
- # # Numo::DFloat#shape=[2,3]
20
- # # [[2, 0, 1],
21
- # # [0, 1, 3]]
22
- #
23
- # x = encoder.fit_transform([
24
- # { city: 'Dubai', temperature: 33 },
25
- # { city: 'London', temperature: 12 },
26
- # { city: 'San Francisco', temperature: 18 }
27
- # ])
28
- # # > pp x
29
- # # Numo::DFloat#shape=[3,4]
30
- # # [[1, 0, 0, 33],
31
- # # [0, 1, 0, 12],
32
- # # [0, 0, 1, 18]]
33
- # # > pp encoder.inverse_transform(x)
34
- # # [{:city=>"Dubai", :temperature=>33.0},
35
- # # {:city=>"London", :temperature=>12.0},
36
- # # {:city=>"San Francisco", :temperature=>18.0}]
37
- class HashVectorizer
38
- include Base::BaseEstimator
39
- include Base::Transformer
40
-
41
- # Return the list of feature names.
42
- # @return [Array] (size: [n_features])
43
- attr_reader :feature_names
44
-
45
- # Return the hash consisting of pairs of feature names and indices.
46
- # @return [Hash] (size: [n_features])
47
- attr_reader :vocabulary
48
-
49
- # Create a new encoder for converting array of hash consisting of feature names and values to vectors.
50
- #
51
- # @param separator [String] The separator string used for constructing new feature names for categorical feature.
52
- # @param sort [Boolean] The flag indicating whether to sort feature names.
53
- def initialize(separator: '=', sort: true)
54
- check_params_string(separator: separator)
55
- check_params_boolean(sort: sort)
56
- @params = {}
57
- @params[:separator] = separator
58
- @params[:sort] = sort
59
- end
60
-
61
- # Fit the encoder with given training data.
62
- #
63
- # @overload fit(x) -> HashVectorizer
64
- # @param x [Array<Hash>] (shape: [n_samples]) The array of hash consisting of feature names and values.
65
- # @return [HashVectorizer]
66
- def fit(x, _y = nil)
67
- @feature_names = []
68
- @vocabulary = {}
69
-
70
- x.each do |f|
71
- f.each do |k, v|
72
- k = "#{k}#{separator}#{v}".to_sym if v.is_a?(String)
73
- next if @vocabulary.key?(k)
74
-
75
- @feature_names.push(k)
76
- @vocabulary[k] = @vocabulary.size
77
- end
78
- end
79
-
80
- if sort_feature?
81
- @feature_names.sort!
82
- @feature_names.each_with_index { |k, i| @vocabulary[k] = i }
83
- end
84
-
85
- self
86
- end
87
-
88
- # Fit the encoder with given training data, then return encoded data.
89
- #
90
- # @overload fit_transform(x) -> Numo::DFloat
91
- # @param x [Array<Hash>] (shape: [n_samples]) The array of hash consisting of feature names and values.
92
- # @return [Numo::DFloat] (shape: [n_samples, n_features]) The encoded sample array.
93
- def fit_transform(x, _y = nil)
94
- fit(x).transform(x)
95
- end
96
-
97
- # Encode given the array of feature-value hash.
98
- #
99
- # @param x [Array<Hash>] (shape: [n_samples]) The array of hash consisting of feature names and values.
100
- # @return [Numo::DFloat] (shape: [n_samples, n_features]) The encoded sample array.
101
- def transform(x)
102
- x = [x] unless x.is_a?(Array)
103
- n_samples = x.size
104
- n_features = @vocabulary.size
105
- z = Numo::DFloat.zeros(n_samples, n_features)
106
-
107
- x.each_with_index do |f, i|
108
- f.each do |k, v|
109
- if v.is_a?(String)
110
- k = "#{k}#{separator}#{v}".to_sym
111
- v = 1
112
- end
113
- z[i, @vocabulary[k]] = v if @vocabulary.key?(k)
114
- end
115
- end
116
-
117
- z
118
- end
119
-
120
- # Decode sample matirx to the array of feature-value hash.
121
- #
122
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The encoded sample array.
123
- # @return [Array<Hash>] The array of hash consisting of feature names and values.
124
- def inverse_transform(x)
125
- n_samples = x.shape[0]
126
- reconst = []
127
-
128
- n_samples.times do |i|
129
- f = {}
130
- x[i, true].each_with_index do |el, j|
131
- feature_key_val(@feature_names[j], el).tap { |k, v| f[k.to_sym] = v } unless el.zero?
132
- end
133
- reconst.push(f)
134
- end
135
-
136
- reconst
137
- end
138
-
139
- private
140
-
141
- def feature_key_val(fname, fval)
142
- f = fname.to_s.split(separator)
143
- f.size == 2 ? f : [fname, fval]
144
- end
145
-
146
- def separator
147
- @params[:separator]
148
- end
149
-
150
- def sort_feature?
151
- @params[:sort]
152
- end
153
- end
154
- end
155
- end
@@ -1,113 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/transformer'
5
- require 'rumale/preprocessing/l1_normalizer'
6
- require 'rumale/preprocessing/l2_normalizer'
7
-
8
- module Rumale
9
- module FeatureExtraction
10
- # Transform sample matrix with term frequecy (tf) to a normalized tf-idf (inverse document frequency) reprensentation.
11
- #
12
- # @example
13
- # encoder = Rumale::FeatureExtraction::HashVectorizer.new
14
- # x = encoder.fit_transform([
15
- # { foo: 1, bar: 2 },
16
- # { foo: 3, baz: 1 }
17
- # ])
18
- #
19
- # # > pp x
20
- # # Numo::DFloat#shape=[2,3]
21
- # # [[2, 0, 1],
22
- # # [0, 1, 3]]
23
- #
24
- # transformer = Rumale::FeatureExtraction::TfidfTransformer.new
25
- # x_tfidf = transformer.fit_transform(x)
26
- #
27
- # # > pp x_tfidf
28
- # # Numo::DFloat#shape=[2,3]
29
- # # [[0.959056, 0, 0.283217],
30
- # # [0, 0.491506, 0.870874]]
31
- #
32
- # *Reference*
33
- # - Manning, C D., Raghavan, P., and Schutze, H., "Introduction to Information Retrieval," Cambridge University Press., 2008.
34
- class TfidfTransformer
35
- include Base::BaseEstimator
36
- include Base::Transformer
37
-
38
- # Return the vector consists of inverse document frequency.
39
- # @return [Numo::DFloat] (shape: [n_features])
40
- attr_reader :idf
41
-
42
- # Create a new transfomer for converting tf vectors to tf-idf vectors.
43
- #
44
- # @param norm [String] The normalization method to be used ('l1', 'l2' and 'none').
45
- # @param use_idf [Boolean] The flag indicating whether to use inverse document frequency weighting.
46
- # @param smooth_idf [Boolean] The flag indicating whether to apply idf smoothing by log((n_samples + 1) / (df + 1)) + 1.
47
- # @param sublinear_tf [Boolean] The flag indicating whether to perform subliner tf scaling by 1 + log(tf).
48
- def initialize(norm: 'l2', use_idf: true, smooth_idf: false, sublinear_tf: false)
49
- check_params_string(norm: norm)
50
- check_params_boolean(use_idf: use_idf, smooth_idf: smooth_idf, sublinear_tf: sublinear_tf)
51
- @params = {}
52
- @params[:norm] = norm
53
- @params[:use_idf] = use_idf
54
- @params[:smooth_idf] = smooth_idf
55
- @params[:sublinear_tf] = sublinear_tf
56
- @idf = nil
57
- end
58
-
59
- # Calculate the inverse document frequency for weighting.
60
- #
61
- # @overload fit(x) -> TfidfTransformer
62
- #
63
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the idf values.
64
- # @return [TfidfTransformer]
65
- def fit(x, _y = nil)
66
- return self unless @params[:use_idf]
67
-
68
- x = check_convert_sample_array(x)
69
-
70
- n_samples = x.shape[0]
71
- df = x.class.cast(x.gt(0.0).count(0))
72
-
73
- if @params[:smooth_idf]
74
- df += 1
75
- n_samples += 1
76
- end
77
-
78
- @idf = Numo::NMath.log(n_samples / df) + 1
79
-
80
- self
81
- end
82
-
83
- # Calculate the idf values, and then transfrom samples to the tf-idf representation.
84
- #
85
- # @overload fit_transform(x) -> Numo::DFloat
86
- #
87
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate idf and be transformed to tf-idf representation.
88
- # @return [Numo::DFloat] The transformed samples.
89
- def fit_transform(x, _y = nil)
90
- fit(x).transform(x)
91
- end
92
-
93
- # Perform transforming the given samples to the tf-idf representation.
94
- #
95
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed.
96
- # @return [Numo::DFloat] The transformed samples.
97
- def transform(x)
98
- x = check_convert_sample_array(x)
99
- z = x.dup
100
-
101
- z[z.ne(0)] = Numo::NMath.log(z[z.ne(0)]) + 1 if @params[:sublinear_tf]
102
- z *= @idf if @params[:use_idf]
103
- case @params[:norm]
104
- when 'l2'
105
- z = Rumale::Preprocessing::L2Normalizer.new.fit_transform(z)
106
- when 'l1'
107
- z = Rumale::Preprocessing::L1Normalizer.new.fit_transform(z)
108
- end
109
- z
110
- end
111
- end
112
- end
113
- end
@@ -1,126 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/transformer'
5
- require 'rumale/pairwise_metric'
6
-
7
- module Rumale
8
- module KernelApproximation
9
- # Nystroem is a class that implements feature mapping with Nystroem method.
10
- #
11
- # @example
12
- # require 'numo/linalg/autoloader'
13
- #
14
- # transformer = Rumale::KernelApproximation::Nystroem.new(kernel: 'rbf', gamma: 1, n_components: 128, random_seed: 1)
15
- # new_training_samples = transformer.fit_transform(training_samples)
16
- # new_testing_samples = transformer.transform(testing_samples)
17
- #
18
- # *Reference*
19
- # - Yang, T., Li, Y., Mahdavi, M., Jin, R., and Zhou, Z-H., "Nystrom Method vs Random Fourier Features: A Theoretical and Empirical Comparison," Advances in NIPS'12, Vol. 1, pp. 476--484, 2012.
20
- class Nystroem
21
- include Base::BaseEstimator
22
- include Base::Transformer
23
-
24
- # Returns the randomly sampled training data for feature mapping.
25
- # @return [Numo::DFloat] (shape: n_components, n_features])
26
- attr_reader :components
27
-
28
- # Returns the indices sampled training data.
29
- # @return [Numo::Int32] (shape: [n_components])
30
- attr_reader :component_indices
31
-
32
- # Returns the normalizing factors.
33
- # @return [Numo::DFloat] (shape: [n_components, n_components])
34
- attr_reader :normalizer
35
-
36
- # Return the random generator for transformation.
37
- # @return [Random]
38
- attr_reader :rng
39
-
40
- # Create a new transformer for mapping to kernel feature space with Nystrom method.
41
- #
42
- # @param kernel [String] The type of kernel function ('rbf', 'linear', 'poly', and 'sigmoid)
43
- # @param gamma [Float] The gamma parameter in rbf/poly/sigmoid kernel function.
44
- # @param degree [Integer] The degree parameter in polynomial kernel function.
45
- # @param coef [Float] The coefficient in poly/sigmoid kernel function.
46
- # @param n_components [Integer] The number of dimensions of the kernel feature space.
47
- # @param random_seed [Integer] The seed value using to initialize the random generator.
48
- def initialize(kernel: 'rbf', gamma: 1, degree: 3, coef: 1, n_components: 100, random_seed: nil)
49
- check_params_string(kernel: kernel)
50
- check_params_numeric(gamma: gamma, coef: coef, degree: degree, n_components: n_components)
51
- check_params_numeric_or_nil(random_seed: random_seed)
52
- @params = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h
53
- @params[:random_seed] ||= srand
54
- @rng = Random.new(@params[:random_seed])
55
- @component_indices = nil
56
- @components = nil
57
- @normalizer = nil
58
- end
59
-
60
- # Fit the model with given training data.
61
- #
62
- # @overload fit(x) -> Nystroem
63
- # @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
64
- # @return [Nystroem] The learned transformer itself.
65
- def fit(x, _y = nil)
66
- x = check_convert_sample_array(x)
67
- raise 'Nystroem#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
68
-
69
- # initialize some variables.
70
- sub_rng = @rng.dup
71
- n_samples = x.shape[0]
72
- n_components = [1, [@params[:n_components], n_samples].min].max
73
-
74
- # random sampling.
75
- @component_indices = Numo::Int32.cast(Array(0...n_samples).shuffle(random: sub_rng)[0...n_components])
76
- @components = x[@component_indices, true].dup
77
-
78
- # calculate normalizing factor.
79
- kernel_mat = kernel_mat(@components)
80
- eig_vals, eig_vecs = Numo::Linalg.eigh(kernel_mat)
81
- la = eig_vals.class.maximum(eig_vals.reverse, 1e-12)
82
- u = eig_vecs.reverse(1)
83
- @normalizer = u.dot((1.0 / Numo::NMath.sqrt(la)).diag)
84
-
85
- self
86
- end
87
-
88
- # Fit the model with training data, and then transform them with the learned model.
89
- #
90
- # @overload fit_transform(x) -> Numo::DFloat
91
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
92
- # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
93
- def fit_transform(x, _y = nil)
94
- x = check_convert_sample_array(x)
95
- fit(x).transform(x)
96
- end
97
-
98
- # Transform the given data with the learned model.
99
- #
100
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
101
- # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
102
- def transform(x)
103
- x = check_convert_sample_array(x)
104
- z = kernel_mat(x, @components)
105
- z.dot(@normalizer)
106
- end
107
-
108
- private
109
-
110
- def kernel_mat(x, y = nil)
111
- case @params[:kernel]
112
- when 'rbf'
113
- Rumale::PairwiseMetric.rbf_kernel(x, y, @params[:gamma])
114
- when 'poly'
115
- Rumale::PairwiseMetric.polynomial_kernel(x, y, @params[:degree], @params[:gamma], @params[:coef])
116
- when 'sigmoid'
117
- Rumale::PairwiseMetric.sigmoid_kernel(x, y, @params[:gamma], @params[:coef])
118
- when 'linear'
119
- Rumale::PairwiseMetric.linear_kernel(x, y)
120
- else
121
- raise ArgumentError, "Expect kernel parameter to be given 'rbf', 'linear', 'poly', or 'sigmoid'."
122
- end
123
- end
124
- end
125
- end
126
- end
@@ -1,102 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/utils'
4
- require 'rumale/base/base_estimator'
5
- require 'rumale/base/transformer'
6
-
7
- module Rumale
8
- # Module for kernel approximation algorithms.
9
- module KernelApproximation
10
- # Class for RBF kernel feature mapping.
11
- #
12
- # @example
13
- # transformer = Rumale::KernelApproximation::RBF.new(gamma: 1.0, n_components: 128, random_seed: 1)
14
- # new_training_samples = transformer.fit_transform(training_samples)
15
- # new_testing_samples = transformer.transform(testing_samples)
16
- #
17
- # *Refernce*:
18
- # - Rahimi, A., and Recht, B., "Random Features for Large-Scale Kernel Machines," Proc. NIPS'07, pp.1177--1184, 2007.
19
- class RBF
20
- include Base::BaseEstimator
21
- include Base::Transformer
22
-
23
- # Return the random matrix for transformation.
24
- # @return [Numo::DFloat] (shape: [n_features, n_components])
25
- attr_reader :random_mat
26
-
27
- # Return the random vector for transformation.
28
- # @return [Numo::DFloat] (shape: [n_components])
29
- attr_reader :random_vec
30
-
31
- # Return the random generator for transformation.
32
- # @return [Random]
33
- attr_reader :rng
34
-
35
- # Create a new transformer for mapping to RBF kernel feature space.
36
- #
37
- # @param gamma [Float] The parameter of RBF kernel: exp(-gamma * x^2).
38
- # @param n_components [Integer] The number of dimensions of the RBF kernel feature space.
39
- # @param random_seed [Integer] The seed value using to initialize the random generator.
40
- def initialize(gamma: 1.0, n_components: 128, random_seed: nil)
41
- check_params_numeric(gamma: gamma, n_components: n_components)
42
- check_params_numeric_or_nil(random_seed: random_seed)
43
- check_params_positive(gamma: gamma, n_components: n_components)
44
- @params = {}
45
- @params[:gamma] = gamma
46
- @params[:n_components] = n_components
47
- @params[:random_seed] = random_seed
48
- @params[:random_seed] ||= srand
49
- @random_mat = nil
50
- @random_vec = nil
51
- @rng = Random.new(@params[:random_seed])
52
- end
53
-
54
- # Fit the model with given training data.
55
- #
56
- # @overload fit(x) -> RBF
57
- #
58
- # @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
59
- # This method uses only the number of features of the data.
60
- # @return [RBF] The learned transformer itself.
61
- def fit(x, _y = nil)
62
- x = check_convert_sample_array(x)
63
-
64
- n_features = x.shape[1]
65
- sub_rng = @rng.dup
66
- @params[:n_components] = 2 * n_features if @params[:n_components] <= 0
67
- @random_mat = Rumale::Utils.rand_normal([n_features, @params[:n_components]], sub_rng) * (2.0 * @params[:gamma])**0.5
68
- n_half_components = @params[:n_components] / 2
69
- @random_vec = Numo::DFloat.zeros(@params[:n_components] - n_half_components).concatenate(
70
- Numo::DFloat.ones(n_half_components) * (0.5 * Math::PI)
71
- )
72
- self
73
- end
74
-
75
- # Fit the model with training data, and then transform them with the learned model.
76
- #
77
- # @overload fit_transform(x) -> Numo::DFloat
78
- #
79
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
80
- # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
81
- def fit_transform(x, _y = nil)
82
- x = check_convert_sample_array(x)
83
-
84
- fit(x).transform(x)
85
- end
86
-
87
- # Transform the given data with the learned model.
88
- #
89
- # @overload transform(x) -> Numo::DFloat
90
- #
91
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
92
- # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
93
- def transform(x)
94
- x = check_convert_sample_array(x)
95
-
96
- n_samples, = x.shape
97
- projection = x.dot(@random_mat) + @random_vec.tile(n_samples, 1)
98
- Numo::NMath.sin(projection) * ((2.0 / @params[:n_components])**0.5)
99
- end
100
- end
101
- end
102
- end
@@ -1,120 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/transformer'
5
-
6
- module Rumale
7
- module KernelMachine
8
- # KernelFDA is a class that implements Kernel Fisher Discriminant Analysis.
9
- #
10
- # @example
11
- # require 'numo/linalg/autoloader'
12
- #
13
- # kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(x_train)
14
- # kfda = Rumale::KernelMachine::KernelFDA.new
15
- # mapped_traininig_samples = kfda.fit_transform(kernel_mat_train, y)
16
- #
17
- # kernel_mat_test = Rumale::PairwiseMetric::rbf_kernel(x_test, x_train)
18
- # mapped_test_samples = kfda.transform(kernel_mat_test)
19
- #
20
- # *Reference*
21
- # - Baudat, G., and Anouar, F., "Generalized Discriminant Analysis using a Kernel Approach," Neural Computation, vol. 12, pp. 2385--2404, 2000.
22
- class KernelFDA
23
- include Base::BaseEstimator
24
- include Base::Transformer
25
-
26
- # Returns the eigenvectors for embedding.
27
- # @return [Numo::DFloat] (shape: [n_training_sampes, n_components])
28
- attr_reader :alphas
29
-
30
- # Create a new transformer with Kernel FDA.
31
- #
32
- # @param n_components [Integer] The number of components.
33
- # @param reg_param [Float] The regularization parameter.
34
- def initialize(n_components: nil, reg_param: 1e-8)
35
- check_params_numeric_or_nil(n_components: n_components)
36
- check_params_numeric(reg_param: reg_param)
37
- @params = {}
38
- @params[:n_components] = n_components
39
- @params[:reg_param] = reg_param
40
- @alphas = nil
41
- @row_mean = nil
42
- @all_mean = nil
43
- end
44
-
45
- # Fit the model with given training data.
46
- # To execute this method, Numo::Linalg must be loaded.
47
- #
48
- # @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
49
- # The kernel matrix of the training data to be used for fitting the model.
50
- # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
51
- # @return [KernelFDA] The learned transformer itself.
52
- def fit(x, y)
53
- x = check_convert_sample_array(x)
54
- y = check_convert_label_array(y)
55
- check_sample_label_size(x, y)
56
- raise ArgumentError, 'Expect the kernel matrix of training data to be square.' unless x.shape[0] == x.shape[1]
57
- raise 'KernelFDA#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
58
-
59
- # initialize some variables.
60
- n_samples = x.shape[0]
61
- @classes = Numo::Int32[*y.to_a.uniq.sort]
62
- n_classes = @classes.size
63
- n_components = if @params[:n_components].nil?
64
- [n_samples, n_classes - 1].min
65
- else
66
- [n_samples, @params[:n_components]].min
67
- end
68
-
69
- # centering
70
- @row_mean = x.mean(0)
71
- @all_mean = @row_mean.sum.fdiv(n_samples)
72
- centered_kernel_mat = x - x.mean(1).expand_dims(1) - @row_mean + @all_mean
73
-
74
- # calculate between and within scatter matrix.
75
- class_mat = Numo::DFloat.zeros(n_samples, n_samples)
76
- @classes.each do |label|
77
- idx_vec = y.eq(label)
78
- class_mat += Numo::DFloat.cast(idx_vec).outer(idx_vec) / idx_vec.count
79
- end
80
- between_mat = centered_kernel_mat.dot(class_mat).dot(centered_kernel_mat.transpose)
81
- within_mat = centered_kernel_mat.dot(centered_kernel_mat.transpose) + @params[:reg_param] * Numo::DFloat.eye(n_samples)
82
-
83
- # calculate projection matrix.
84
- _, eig_vecs = Numo::Linalg.eigh(
85
- between_mat, within_mat,
86
- vals_range: (n_samples - n_components)...n_samples
87
- )
88
- @alphas = eig_vecs.reverse(1).dup
89
- self
90
- end
91
-
92
- # Fit the model with training data, and then transform them with the learned model.
93
- # To execute this method, Numo::Linalg must be loaded.
94
- #
95
- # @param x [Numo::DFloat] (shape: [n_samples, n_samples])
96
- # The kernel matrix of the training data to be used for fitting the model and transformed.
97
- # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
98
- # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
99
- def fit_transform(x, y)
100
- x = check_convert_sample_array(x)
101
- y = check_convert_label_array(y)
102
- check_sample_label_size(x, y)
103
- fit(x, y).transform(x)
104
- end
105
-
106
- # Transform the given data with the learned model.
107
- #
108
- # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
109
- # The kernel matrix between testing samples and training samples to be transformed.
110
- # @return [Numo::DFloat] (shape: [n_testing_samples, n_components]) The transformed data.
111
- def transform(x)
112
- x = check_convert_sample_array(x)
113
- col_mean = x.sum(1) / @row_mean.shape[0]
114
- centered_kernel_mat = x - col_mean.expand_dims(1) - @row_mean + @all_mean
115
- transformed = centered_kernel_mat.dot(@alphas)
116
- @params[:n_components] == 1 ? transformed[true, 0].dup : transformed
117
- end
118
- end
119
- end
120
- end