rumale 0.23.3 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.txt +5 -1
  3. data/README.md +3 -288
  4. data/lib/rumale/version.rb +1 -1
  5. data/lib/rumale.rb +20 -131
  6. metadata +252 -150
  7. data/CHANGELOG.md +0 -643
  8. data/CODE_OF_CONDUCT.md +0 -74
  9. data/ext/rumale/extconf.rb +0 -37
  10. data/ext/rumale/rumaleext.c +0 -545
  11. data/ext/rumale/rumaleext.h +0 -12
  12. data/lib/rumale/base/base_estimator.rb +0 -49
  13. data/lib/rumale/base/classifier.rb +0 -36
  14. data/lib/rumale/base/cluster_analyzer.rb +0 -31
  15. data/lib/rumale/base/evaluator.rb +0 -17
  16. data/lib/rumale/base/regressor.rb +0 -36
  17. data/lib/rumale/base/splitter.rb +0 -21
  18. data/lib/rumale/base/transformer.rb +0 -22
  19. data/lib/rumale/clustering/dbscan.rb +0 -123
  20. data/lib/rumale/clustering/gaussian_mixture.rb +0 -218
  21. data/lib/rumale/clustering/hdbscan.rb +0 -291
  22. data/lib/rumale/clustering/k_means.rb +0 -122
  23. data/lib/rumale/clustering/k_medoids.rb +0 -141
  24. data/lib/rumale/clustering/mini_batch_k_means.rb +0 -139
  25. data/lib/rumale/clustering/power_iteration.rb +0 -127
  26. data/lib/rumale/clustering/single_linkage.rb +0 -203
  27. data/lib/rumale/clustering/snn.rb +0 -76
  28. data/lib/rumale/clustering/spectral_clustering.rb +0 -115
  29. data/lib/rumale/dataset.rb +0 -246
  30. data/lib/rumale/decomposition/factor_analysis.rb +0 -150
  31. data/lib/rumale/decomposition/fast_ica.rb +0 -188
  32. data/lib/rumale/decomposition/nmf.rb +0 -124
  33. data/lib/rumale/decomposition/pca.rb +0 -159
  34. data/lib/rumale/ensemble/ada_boost_classifier.rb +0 -179
  35. data/lib/rumale/ensemble/ada_boost_regressor.rb +0 -160
  36. data/lib/rumale/ensemble/extra_trees_classifier.rb +0 -139
  37. data/lib/rumale/ensemble/extra_trees_regressor.rb +0 -125
  38. data/lib/rumale/ensemble/gradient_boosting_classifier.rb +0 -306
  39. data/lib/rumale/ensemble/gradient_boosting_regressor.rb +0 -237
  40. data/lib/rumale/ensemble/random_forest_classifier.rb +0 -189
  41. data/lib/rumale/ensemble/random_forest_regressor.rb +0 -153
  42. data/lib/rumale/ensemble/stacking_classifier.rb +0 -215
  43. data/lib/rumale/ensemble/stacking_regressor.rb +0 -163
  44. data/lib/rumale/ensemble/voting_classifier.rb +0 -126
  45. data/lib/rumale/ensemble/voting_regressor.rb +0 -82
  46. data/lib/rumale/evaluation_measure/accuracy.rb +0 -29
  47. data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +0 -74
  48. data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +0 -56
  49. data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +0 -53
  50. data/lib/rumale/evaluation_measure/explained_variance_score.rb +0 -39
  51. data/lib/rumale/evaluation_measure/f_score.rb +0 -50
  52. data/lib/rumale/evaluation_measure/function.rb +0 -147
  53. data/lib/rumale/evaluation_measure/log_loss.rb +0 -45
  54. data/lib/rumale/evaluation_measure/mean_absolute_error.rb +0 -29
  55. data/lib/rumale/evaluation_measure/mean_squared_error.rb +0 -29
  56. data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +0 -29
  57. data/lib/rumale/evaluation_measure/median_absolute_error.rb +0 -30
  58. data/lib/rumale/evaluation_measure/mutual_information.rb +0 -49
  59. data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +0 -53
  60. data/lib/rumale/evaluation_measure/precision.rb +0 -50
  61. data/lib/rumale/evaluation_measure/precision_recall.rb +0 -96
  62. data/lib/rumale/evaluation_measure/purity.rb +0 -40
  63. data/lib/rumale/evaluation_measure/r2_score.rb +0 -43
  64. data/lib/rumale/evaluation_measure/recall.rb +0 -50
  65. data/lib/rumale/evaluation_measure/roc_auc.rb +0 -130
  66. data/lib/rumale/evaluation_measure/silhouette_score.rb +0 -82
  67. data/lib/rumale/feature_extraction/feature_hasher.rb +0 -110
  68. data/lib/rumale/feature_extraction/hash_vectorizer.rb +0 -155
  69. data/lib/rumale/feature_extraction/tfidf_transformer.rb +0 -113
  70. data/lib/rumale/kernel_approximation/nystroem.rb +0 -126
  71. data/lib/rumale/kernel_approximation/rbf.rb +0 -102
  72. data/lib/rumale/kernel_machine/kernel_fda.rb +0 -120
  73. data/lib/rumale/kernel_machine/kernel_pca.rb +0 -97
  74. data/lib/rumale/kernel_machine/kernel_ridge.rb +0 -82
  75. data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +0 -92
  76. data/lib/rumale/kernel_machine/kernel_svc.rb +0 -193
  77. data/lib/rumale/linear_model/base_sgd.rb +0 -285
  78. data/lib/rumale/linear_model/elastic_net.rb +0 -119
  79. data/lib/rumale/linear_model/lasso.rb +0 -115
  80. data/lib/rumale/linear_model/linear_regression.rb +0 -201
  81. data/lib/rumale/linear_model/logistic_regression.rb +0 -275
  82. data/lib/rumale/linear_model/nnls.rb +0 -137
  83. data/lib/rumale/linear_model/ridge.rb +0 -209
  84. data/lib/rumale/linear_model/svc.rb +0 -213
  85. data/lib/rumale/linear_model/svr.rb +0 -132
  86. data/lib/rumale/manifold/mds.rb +0 -155
  87. data/lib/rumale/manifold/tsne.rb +0 -222
  88. data/lib/rumale/metric_learning/fisher_discriminant_analysis.rb +0 -113
  89. data/lib/rumale/metric_learning/mlkr.rb +0 -161
  90. data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +0 -167
  91. data/lib/rumale/model_selection/cross_validation.rb +0 -125
  92. data/lib/rumale/model_selection/function.rb +0 -42
  93. data/lib/rumale/model_selection/grid_search_cv.rb +0 -225
  94. data/lib/rumale/model_selection/group_k_fold.rb +0 -93
  95. data/lib/rumale/model_selection/group_shuffle_split.rb +0 -115
  96. data/lib/rumale/model_selection/k_fold.rb +0 -81
  97. data/lib/rumale/model_selection/shuffle_split.rb +0 -90
  98. data/lib/rumale/model_selection/stratified_k_fold.rb +0 -99
  99. data/lib/rumale/model_selection/stratified_shuffle_split.rb +0 -118
  100. data/lib/rumale/model_selection/time_series_split.rb +0 -91
  101. data/lib/rumale/multiclass/one_vs_rest_classifier.rb +0 -83
  102. data/lib/rumale/naive_bayes/base_naive_bayes.rb +0 -47
  103. data/lib/rumale/naive_bayes/bernoulli_nb.rb +0 -82
  104. data/lib/rumale/naive_bayes/complement_nb.rb +0 -85
  105. data/lib/rumale/naive_bayes/gaussian_nb.rb +0 -69
  106. data/lib/rumale/naive_bayes/multinomial_nb.rb +0 -74
  107. data/lib/rumale/naive_bayes/negation_nb.rb +0 -71
  108. data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +0 -133
  109. data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +0 -108
  110. data/lib/rumale/nearest_neighbors/vp_tree.rb +0 -132
  111. data/lib/rumale/neural_network/adam.rb +0 -56
  112. data/lib/rumale/neural_network/base_mlp.rb +0 -248
  113. data/lib/rumale/neural_network/mlp_classifier.rb +0 -120
  114. data/lib/rumale/neural_network/mlp_regressor.rb +0 -90
  115. data/lib/rumale/pairwise_metric.rb +0 -152
  116. data/lib/rumale/pipeline/feature_union.rb +0 -69
  117. data/lib/rumale/pipeline/pipeline.rb +0 -175
  118. data/lib/rumale/preprocessing/bin_discretizer.rb +0 -93
  119. data/lib/rumale/preprocessing/binarizer.rb +0 -60
  120. data/lib/rumale/preprocessing/kernel_calculator.rb +0 -92
  121. data/lib/rumale/preprocessing/l1_normalizer.rb +0 -62
  122. data/lib/rumale/preprocessing/l2_normalizer.rb +0 -63
  123. data/lib/rumale/preprocessing/label_binarizer.rb +0 -89
  124. data/lib/rumale/preprocessing/label_encoder.rb +0 -79
  125. data/lib/rumale/preprocessing/max_abs_scaler.rb +0 -61
  126. data/lib/rumale/preprocessing/max_normalizer.rb +0 -62
  127. data/lib/rumale/preprocessing/min_max_scaler.rb +0 -76
  128. data/lib/rumale/preprocessing/one_hot_encoder.rb +0 -100
  129. data/lib/rumale/preprocessing/ordinal_encoder.rb +0 -109
  130. data/lib/rumale/preprocessing/polynomial_features.rb +0 -109
  131. data/lib/rumale/preprocessing/standard_scaler.rb +0 -71
  132. data/lib/rumale/probabilistic_output.rb +0 -114
  133. data/lib/rumale/tree/base_decision_tree.rb +0 -150
  134. data/lib/rumale/tree/decision_tree_classifier.rb +0 -150
  135. data/lib/rumale/tree/decision_tree_regressor.rb +0 -116
  136. data/lib/rumale/tree/extra_tree_classifier.rb +0 -107
  137. data/lib/rumale/tree/extra_tree_regressor.rb +0 -94
  138. data/lib/rumale/tree/gradient_tree_regressor.rb +0 -202
  139. data/lib/rumale/tree/node.rb +0 -39
  140. data/lib/rumale/utils.rb +0 -42
  141. data/lib/rumale/validation.rb +0 -128
  142. data/lib/rumale/values.rb +0 -13
@@ -1,155 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/transformer'
5
-
6
- module Rumale
7
- # This module consists of the classes that extract features from raw data.
8
- module FeatureExtraction
9
- # Encode array of feature-value hash to vectors.
10
- # This encoder turns array of mappings (Array<Hash>) with pairs of feature names and values into Numo::NArray.
11
- #
12
- # @example
13
- # encoder = Rumale::FeatureExtraction::HashVectorizer.new
14
- # x = encoder.fit_transform([
15
- # { foo: 1, bar: 2 },
16
- # { foo: 3, baz: 1 }
17
- # ])
18
- # # > pp x
19
- # # Numo::DFloat#shape=[2,3]
20
- # # [[2, 0, 1],
21
- # # [0, 1, 3]]
22
- #
23
- # x = encoder.fit_transform([
24
- # { city: 'Dubai', temperature: 33 },
25
- # { city: 'London', temperature: 12 },
26
- # { city: 'San Francisco', temperature: 18 }
27
- # ])
28
- # # > pp x
29
- # # Numo::DFloat#shape=[3,4]
30
- # # [[1, 0, 0, 33],
31
- # # [0, 1, 0, 12],
32
- # # [0, 0, 1, 18]]
33
- # # > pp encoder.inverse_transform(x)
34
- # # [{:city=>"Dubai", :temperature=>33.0},
35
- # # {:city=>"London", :temperature=>12.0},
36
- # # {:city=>"San Francisco", :temperature=>18.0}]
37
- class HashVectorizer
38
- include Base::BaseEstimator
39
- include Base::Transformer
40
-
41
- # Return the list of feature names.
42
- # @return [Array] (size: [n_features])
43
- attr_reader :feature_names
44
-
45
- # Return the hash consisting of pairs of feature names and indices.
46
- # @return [Hash] (size: [n_features])
47
- attr_reader :vocabulary
48
-
49
- # Create a new encoder for converting array of hash consisting of feature names and values to vectors.
50
- #
51
- # @param separator [String] The separator string used for constructing new feature names for categorical feature.
52
- # @param sort [Boolean] The flag indicating whether to sort feature names.
53
- def initialize(separator: '=', sort: true)
54
- check_params_string(separator: separator)
55
- check_params_boolean(sort: sort)
56
- @params = {}
57
- @params[:separator] = separator
58
- @params[:sort] = sort
59
- end
60
-
61
- # Fit the encoder with given training data.
62
- #
63
- # @overload fit(x) -> HashVectorizer
64
- # @param x [Array<Hash>] (shape: [n_samples]) The array of hash consisting of feature names and values.
65
- # @return [HashVectorizer]
66
- def fit(x, _y = nil)
67
- @feature_names = []
68
- @vocabulary = {}
69
-
70
- x.each do |f|
71
- f.each do |k, v|
72
- k = "#{k}#{separator}#{v}".to_sym if v.is_a?(String)
73
- next if @vocabulary.key?(k)
74
-
75
- @feature_names.push(k)
76
- @vocabulary[k] = @vocabulary.size
77
- end
78
- end
79
-
80
- if sort_feature?
81
- @feature_names.sort!
82
- @feature_names.each_with_index { |k, i| @vocabulary[k] = i }
83
- end
84
-
85
- self
86
- end
87
-
88
- # Fit the encoder with given training data, then return encoded data.
89
- #
90
- # @overload fit_transform(x) -> Numo::DFloat
91
- # @param x [Array<Hash>] (shape: [n_samples]) The array of hash consisting of feature names and values.
92
- # @return [Numo::DFloat] (shape: [n_samples, n_features]) The encoded sample array.
93
- def fit_transform(x, _y = nil)
94
- fit(x).transform(x)
95
- end
96
-
97
- # Encode given the array of feature-value hash.
98
- #
99
- # @param x [Array<Hash>] (shape: [n_samples]) The array of hash consisting of feature names and values.
100
- # @return [Numo::DFloat] (shape: [n_samples, n_features]) The encoded sample array.
101
- def transform(x)
102
- x = [x] unless x.is_a?(Array)
103
- n_samples = x.size
104
- n_features = @vocabulary.size
105
- z = Numo::DFloat.zeros(n_samples, n_features)
106
-
107
- x.each_with_index do |f, i|
108
- f.each do |k, v|
109
- if v.is_a?(String)
110
- k = "#{k}#{separator}#{v}".to_sym
111
- v = 1
112
- end
113
- z[i, @vocabulary[k]] = v if @vocabulary.key?(k)
114
- end
115
- end
116
-
117
- z
118
- end
119
-
120
- # Decode sample matirx to the array of feature-value hash.
121
- #
122
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The encoded sample array.
123
- # @return [Array<Hash>] The array of hash consisting of feature names and values.
124
- def inverse_transform(x)
125
- n_samples = x.shape[0]
126
- reconst = []
127
-
128
- n_samples.times do |i|
129
- f = {}
130
- x[i, true].each_with_index do |el, j|
131
- feature_key_val(@feature_names[j], el).tap { |k, v| f[k.to_sym] = v } unless el.zero?
132
- end
133
- reconst.push(f)
134
- end
135
-
136
- reconst
137
- end
138
-
139
- private
140
-
141
- def feature_key_val(fname, fval)
142
- f = fname.to_s.split(separator)
143
- f.size == 2 ? f : [fname, fval]
144
- end
145
-
146
- def separator
147
- @params[:separator]
148
- end
149
-
150
- def sort_feature?
151
- @params[:sort]
152
- end
153
- end
154
- end
155
- end
@@ -1,113 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/transformer'
5
- require 'rumale/preprocessing/l1_normalizer'
6
- require 'rumale/preprocessing/l2_normalizer'
7
-
8
- module Rumale
9
- module FeatureExtraction
10
- # Transform sample matrix with term frequecy (tf) to a normalized tf-idf (inverse document frequency) reprensentation.
11
- #
12
- # @example
13
- # encoder = Rumale::FeatureExtraction::HashVectorizer.new
14
- # x = encoder.fit_transform([
15
- # { foo: 1, bar: 2 },
16
- # { foo: 3, baz: 1 }
17
- # ])
18
- #
19
- # # > pp x
20
- # # Numo::DFloat#shape=[2,3]
21
- # # [[2, 0, 1],
22
- # # [0, 1, 3]]
23
- #
24
- # transformer = Rumale::FeatureExtraction::TfidfTransformer.new
25
- # x_tfidf = transformer.fit_transform(x)
26
- #
27
- # # > pp x_tfidf
28
- # # Numo::DFloat#shape=[2,3]
29
- # # [[0.959056, 0, 0.283217],
30
- # # [0, 0.491506, 0.870874]]
31
- #
32
- # *Reference*
33
- # - Manning, C D., Raghavan, P., and Schutze, H., "Introduction to Information Retrieval," Cambridge University Press., 2008.
34
- class TfidfTransformer
35
- include Base::BaseEstimator
36
- include Base::Transformer
37
-
38
- # Return the vector consists of inverse document frequency.
39
- # @return [Numo::DFloat] (shape: [n_features])
40
- attr_reader :idf
41
-
42
- # Create a new transfomer for converting tf vectors to tf-idf vectors.
43
- #
44
- # @param norm [String] The normalization method to be used ('l1', 'l2' and 'none').
45
- # @param use_idf [Boolean] The flag indicating whether to use inverse document frequency weighting.
46
- # @param smooth_idf [Boolean] The flag indicating whether to apply idf smoothing by log((n_samples + 1) / (df + 1)) + 1.
47
- # @param sublinear_tf [Boolean] The flag indicating whether to perform subliner tf scaling by 1 + log(tf).
48
- def initialize(norm: 'l2', use_idf: true, smooth_idf: false, sublinear_tf: false)
49
- check_params_string(norm: norm)
50
- check_params_boolean(use_idf: use_idf, smooth_idf: smooth_idf, sublinear_tf: sublinear_tf)
51
- @params = {}
52
- @params[:norm] = norm
53
- @params[:use_idf] = use_idf
54
- @params[:smooth_idf] = smooth_idf
55
- @params[:sublinear_tf] = sublinear_tf
56
- @idf = nil
57
- end
58
-
59
- # Calculate the inverse document frequency for weighting.
60
- #
61
- # @overload fit(x) -> TfidfTransformer
62
- #
63
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the idf values.
64
- # @return [TfidfTransformer]
65
- def fit(x, _y = nil)
66
- return self unless @params[:use_idf]
67
-
68
- x = check_convert_sample_array(x)
69
-
70
- n_samples = x.shape[0]
71
- df = x.class.cast(x.gt(0.0).count(0))
72
-
73
- if @params[:smooth_idf]
74
- df += 1
75
- n_samples += 1
76
- end
77
-
78
- @idf = Numo::NMath.log(n_samples / df) + 1
79
-
80
- self
81
- end
82
-
83
- # Calculate the idf values, and then transfrom samples to the tf-idf representation.
84
- #
85
- # @overload fit_transform(x) -> Numo::DFloat
86
- #
87
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate idf and be transformed to tf-idf representation.
88
- # @return [Numo::DFloat] The transformed samples.
89
- def fit_transform(x, _y = nil)
90
- fit(x).transform(x)
91
- end
92
-
93
- # Perform transforming the given samples to the tf-idf representation.
94
- #
95
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed.
96
- # @return [Numo::DFloat] The transformed samples.
97
- def transform(x)
98
- x = check_convert_sample_array(x)
99
- z = x.dup
100
-
101
- z[z.ne(0)] = Numo::NMath.log(z[z.ne(0)]) + 1 if @params[:sublinear_tf]
102
- z *= @idf if @params[:use_idf]
103
- case @params[:norm]
104
- when 'l2'
105
- z = Rumale::Preprocessing::L2Normalizer.new.fit_transform(z)
106
- when 'l1'
107
- z = Rumale::Preprocessing::L1Normalizer.new.fit_transform(z)
108
- end
109
- z
110
- end
111
- end
112
- end
113
- end
@@ -1,126 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/transformer'
5
- require 'rumale/pairwise_metric'
6
-
7
- module Rumale
8
- module KernelApproximation
9
- # Nystroem is a class that implements feature mapping with Nystroem method.
10
- #
11
- # @example
12
- # require 'numo/linalg/autoloader'
13
- #
14
- # transformer = Rumale::KernelApproximation::Nystroem.new(kernel: 'rbf', gamma: 1, n_components: 128, random_seed: 1)
15
- # new_training_samples = transformer.fit_transform(training_samples)
16
- # new_testing_samples = transformer.transform(testing_samples)
17
- #
18
- # *Reference*
19
- # - Yang, T., Li, Y., Mahdavi, M., Jin, R., and Zhou, Z-H., "Nystrom Method vs Random Fourier Features: A Theoretical and Empirical Comparison," Advances in NIPS'12, Vol. 1, pp. 476--484, 2012.
20
- class Nystroem
21
- include Base::BaseEstimator
22
- include Base::Transformer
23
-
24
- # Returns the randomly sampled training data for feature mapping.
25
- # @return [Numo::DFloat] (shape: n_components, n_features])
26
- attr_reader :components
27
-
28
- # Returns the indices sampled training data.
29
- # @return [Numo::Int32] (shape: [n_components])
30
- attr_reader :component_indices
31
-
32
- # Returns the normalizing factors.
33
- # @return [Numo::DFloat] (shape: [n_components, n_components])
34
- attr_reader :normalizer
35
-
36
- # Return the random generator for transformation.
37
- # @return [Random]
38
- attr_reader :rng
39
-
40
- # Create a new transformer for mapping to kernel feature space with Nystrom method.
41
- #
42
- # @param kernel [String] The type of kernel function ('rbf', 'linear', 'poly', and 'sigmoid)
43
- # @param gamma [Float] The gamma parameter in rbf/poly/sigmoid kernel function.
44
- # @param degree [Integer] The degree parameter in polynomial kernel function.
45
- # @param coef [Float] The coefficient in poly/sigmoid kernel function.
46
- # @param n_components [Integer] The number of dimensions of the kernel feature space.
47
- # @param random_seed [Integer] The seed value using to initialize the random generator.
48
- def initialize(kernel: 'rbf', gamma: 1, degree: 3, coef: 1, n_components: 100, random_seed: nil)
49
- check_params_string(kernel: kernel)
50
- check_params_numeric(gamma: gamma, coef: coef, degree: degree, n_components: n_components)
51
- check_params_numeric_or_nil(random_seed: random_seed)
52
- @params = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h
53
- @params[:random_seed] ||= srand
54
- @rng = Random.new(@params[:random_seed])
55
- @component_indices = nil
56
- @components = nil
57
- @normalizer = nil
58
- end
59
-
60
- # Fit the model with given training data.
61
- #
62
- # @overload fit(x) -> Nystroem
63
- # @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
64
- # @return [Nystroem] The learned transformer itself.
65
- def fit(x, _y = nil)
66
- x = check_convert_sample_array(x)
67
- raise 'Nystroem#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
68
-
69
- # initialize some variables.
70
- sub_rng = @rng.dup
71
- n_samples = x.shape[0]
72
- n_components = [1, [@params[:n_components], n_samples].min].max
73
-
74
- # random sampling.
75
- @component_indices = Numo::Int32.cast(Array(0...n_samples).shuffle(random: sub_rng)[0...n_components])
76
- @components = x[@component_indices, true].dup
77
-
78
- # calculate normalizing factor.
79
- kernel_mat = kernel_mat(@components)
80
- eig_vals, eig_vecs = Numo::Linalg.eigh(kernel_mat)
81
- la = eig_vals.class.maximum(eig_vals.reverse, 1e-12)
82
- u = eig_vecs.reverse(1)
83
- @normalizer = u.dot((1.0 / Numo::NMath.sqrt(la)).diag)
84
-
85
- self
86
- end
87
-
88
- # Fit the model with training data, and then transform them with the learned model.
89
- #
90
- # @overload fit_transform(x) -> Numo::DFloat
91
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
92
- # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
93
- def fit_transform(x, _y = nil)
94
- x = check_convert_sample_array(x)
95
- fit(x).transform(x)
96
- end
97
-
98
- # Transform the given data with the learned model.
99
- #
100
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
101
- # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
102
- def transform(x)
103
- x = check_convert_sample_array(x)
104
- z = kernel_mat(x, @components)
105
- z.dot(@normalizer)
106
- end
107
-
108
- private
109
-
110
- def kernel_mat(x, y = nil)
111
- case @params[:kernel]
112
- when 'rbf'
113
- Rumale::PairwiseMetric.rbf_kernel(x, y, @params[:gamma])
114
- when 'poly'
115
- Rumale::PairwiseMetric.polynomial_kernel(x, y, @params[:degree], @params[:gamma], @params[:coef])
116
- when 'sigmoid'
117
- Rumale::PairwiseMetric.sigmoid_kernel(x, y, @params[:gamma], @params[:coef])
118
- when 'linear'
119
- Rumale::PairwiseMetric.linear_kernel(x, y)
120
- else
121
- raise ArgumentError, "Expect kernel parameter to be given 'rbf', 'linear', 'poly', or 'sigmoid'."
122
- end
123
- end
124
- end
125
- end
126
- end
@@ -1,102 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/utils'
4
- require 'rumale/base/base_estimator'
5
- require 'rumale/base/transformer'
6
-
7
- module Rumale
8
- # Module for kernel approximation algorithms.
9
- module KernelApproximation
10
- # Class for RBF kernel feature mapping.
11
- #
12
- # @example
13
- # transformer = Rumale::KernelApproximation::RBF.new(gamma: 1.0, n_components: 128, random_seed: 1)
14
- # new_training_samples = transformer.fit_transform(training_samples)
15
- # new_testing_samples = transformer.transform(testing_samples)
16
- #
17
- # *Refernce*:
18
- # - Rahimi, A., and Recht, B., "Random Features for Large-Scale Kernel Machines," Proc. NIPS'07, pp.1177--1184, 2007.
19
- class RBF
20
- include Base::BaseEstimator
21
- include Base::Transformer
22
-
23
- # Return the random matrix for transformation.
24
- # @return [Numo::DFloat] (shape: [n_features, n_components])
25
- attr_reader :random_mat
26
-
27
- # Return the random vector for transformation.
28
- # @return [Numo::DFloat] (shape: [n_components])
29
- attr_reader :random_vec
30
-
31
- # Return the random generator for transformation.
32
- # @return [Random]
33
- attr_reader :rng
34
-
35
- # Create a new transformer for mapping to RBF kernel feature space.
36
- #
37
- # @param gamma [Float] The parameter of RBF kernel: exp(-gamma * x^2).
38
- # @param n_components [Integer] The number of dimensions of the RBF kernel feature space.
39
- # @param random_seed [Integer] The seed value using to initialize the random generator.
40
- def initialize(gamma: 1.0, n_components: 128, random_seed: nil)
41
- check_params_numeric(gamma: gamma, n_components: n_components)
42
- check_params_numeric_or_nil(random_seed: random_seed)
43
- check_params_positive(gamma: gamma, n_components: n_components)
44
- @params = {}
45
- @params[:gamma] = gamma
46
- @params[:n_components] = n_components
47
- @params[:random_seed] = random_seed
48
- @params[:random_seed] ||= srand
49
- @random_mat = nil
50
- @random_vec = nil
51
- @rng = Random.new(@params[:random_seed])
52
- end
53
-
54
- # Fit the model with given training data.
55
- #
56
- # @overload fit(x) -> RBF
57
- #
58
- # @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
59
- # This method uses only the number of features of the data.
60
- # @return [RBF] The learned transformer itself.
61
- def fit(x, _y = nil)
62
- x = check_convert_sample_array(x)
63
-
64
- n_features = x.shape[1]
65
- sub_rng = @rng.dup
66
- @params[:n_components] = 2 * n_features if @params[:n_components] <= 0
67
- @random_mat = Rumale::Utils.rand_normal([n_features, @params[:n_components]], sub_rng) * (2.0 * @params[:gamma])**0.5
68
- n_half_components = @params[:n_components] / 2
69
- @random_vec = Numo::DFloat.zeros(@params[:n_components] - n_half_components).concatenate(
70
- Numo::DFloat.ones(n_half_components) * (0.5 * Math::PI)
71
- )
72
- self
73
- end
74
-
75
- # Fit the model with training data, and then transform them with the learned model.
76
- #
77
- # @overload fit_transform(x) -> Numo::DFloat
78
- #
79
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
80
- # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
81
- def fit_transform(x, _y = nil)
82
- x = check_convert_sample_array(x)
83
-
84
- fit(x).transform(x)
85
- end
86
-
87
- # Transform the given data with the learned model.
88
- #
89
- # @overload transform(x) -> Numo::DFloat
90
- #
91
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
92
- # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
93
- def transform(x)
94
- x = check_convert_sample_array(x)
95
-
96
- n_samples, = x.shape
97
- projection = x.dot(@random_mat) + @random_vec.tile(n_samples, 1)
98
- Numo::NMath.sin(projection) * ((2.0 / @params[:n_components])**0.5)
99
- end
100
- end
101
- end
102
- end
@@ -1,120 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/transformer'
5
-
6
- module Rumale
7
- module KernelMachine
8
- # KernelFDA is a class that implements Kernel Fisher Discriminant Analysis.
9
- #
10
- # @example
11
- # require 'numo/linalg/autoloader'
12
- #
13
- # kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(x_train)
14
- # kfda = Rumale::KernelMachine::KernelFDA.new
15
- # mapped_traininig_samples = kfda.fit_transform(kernel_mat_train, y)
16
- #
17
- # kernel_mat_test = Rumale::PairwiseMetric::rbf_kernel(x_test, x_train)
18
- # mapped_test_samples = kfda.transform(kernel_mat_test)
19
- #
20
- # *Reference*
21
- # - Baudat, G., and Anouar, F., "Generalized Discriminant Analysis using a Kernel Approach," Neural Computation, vol. 12, pp. 2385--2404, 2000.
22
- class KernelFDA
23
- include Base::BaseEstimator
24
- include Base::Transformer
25
-
26
- # Returns the eigenvectors for embedding.
27
- # @return [Numo::DFloat] (shape: [n_training_sampes, n_components])
28
- attr_reader :alphas
29
-
30
- # Create a new transformer with Kernel FDA.
31
- #
32
- # @param n_components [Integer] The number of components.
33
- # @param reg_param [Float] The regularization parameter.
34
- def initialize(n_components: nil, reg_param: 1e-8)
35
- check_params_numeric_or_nil(n_components: n_components)
36
- check_params_numeric(reg_param: reg_param)
37
- @params = {}
38
- @params[:n_components] = n_components
39
- @params[:reg_param] = reg_param
40
- @alphas = nil
41
- @row_mean = nil
42
- @all_mean = nil
43
- end
44
-
45
- # Fit the model with given training data.
46
- # To execute this method, Numo::Linalg must be loaded.
47
- #
48
- # @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
49
- # The kernel matrix of the training data to be used for fitting the model.
50
- # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
51
- # @return [KernelFDA] The learned transformer itself.
52
- def fit(x, y)
53
- x = check_convert_sample_array(x)
54
- y = check_convert_label_array(y)
55
- check_sample_label_size(x, y)
56
- raise ArgumentError, 'Expect the kernel matrix of training data to be square.' unless x.shape[0] == x.shape[1]
57
- raise 'KernelFDA#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
58
-
59
- # initialize some variables.
60
- n_samples = x.shape[0]
61
- @classes = Numo::Int32[*y.to_a.uniq.sort]
62
- n_classes = @classes.size
63
- n_components = if @params[:n_components].nil?
64
- [n_samples, n_classes - 1].min
65
- else
66
- [n_samples, @params[:n_components]].min
67
- end
68
-
69
- # centering
70
- @row_mean = x.mean(0)
71
- @all_mean = @row_mean.sum.fdiv(n_samples)
72
- centered_kernel_mat = x - x.mean(1).expand_dims(1) - @row_mean + @all_mean
73
-
74
- # calculate between and within scatter matrix.
75
- class_mat = Numo::DFloat.zeros(n_samples, n_samples)
76
- @classes.each do |label|
77
- idx_vec = y.eq(label)
78
- class_mat += Numo::DFloat.cast(idx_vec).outer(idx_vec) / idx_vec.count
79
- end
80
- between_mat = centered_kernel_mat.dot(class_mat).dot(centered_kernel_mat.transpose)
81
- within_mat = centered_kernel_mat.dot(centered_kernel_mat.transpose) + @params[:reg_param] * Numo::DFloat.eye(n_samples)
82
-
83
- # calculate projection matrix.
84
- _, eig_vecs = Numo::Linalg.eigh(
85
- between_mat, within_mat,
86
- vals_range: (n_samples - n_components)...n_samples
87
- )
88
- @alphas = eig_vecs.reverse(1).dup
89
- self
90
- end
91
-
92
- # Fit the model with training data, and then transform them with the learned model.
93
- # To execute this method, Numo::Linalg must be loaded.
94
- #
95
- # @param x [Numo::DFloat] (shape: [n_samples, n_samples])
96
- # The kernel matrix of the training data to be used for fitting the model and transformed.
97
- # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
98
- # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
99
- def fit_transform(x, y)
100
- x = check_convert_sample_array(x)
101
- y = check_convert_label_array(y)
102
- check_sample_label_size(x, y)
103
- fit(x, y).transform(x)
104
- end
105
-
106
- # Transform the given data with the learned model.
107
- #
108
- # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
109
- # The kernel matrix between testing samples and training samples to be transformed.
110
- # @return [Numo::DFloat] (shape: [n_testing_samples, n_components]) The transformed data.
111
- def transform(x)
112
- x = check_convert_sample_array(x)
113
- col_mean = x.sum(1) / @row_mean.shape[0]
114
- centered_kernel_mat = x - col_mean.expand_dims(1) - @row_mean + @all_mean
115
- transformed = centered_kernel_mat.dot(@alphas)
116
- @params[:n_components] == 1 ? transformed[true, 0].dup : transformed
117
- end
118
- end
119
- end
120
- end