rumale 0.18.1 → 0.18.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +0 -1
  3. data/CHANGELOG.md +16 -4
  4. data/lib/rumale.rb +6 -1
  5. data/lib/rumale/clustering/dbscan.rb +0 -17
  6. data/lib/rumale/clustering/gaussian_mixture.rb +0 -21
  7. data/lib/rumale/clustering/hdbscan.rb +0 -15
  8. data/lib/rumale/clustering/k_means.rb +0 -17
  9. data/lib/rumale/clustering/k_medoids.rb +0 -19
  10. data/lib/rumale/clustering/power_iteration.rb +0 -19
  11. data/lib/rumale/clustering/single_linkage.rb +0 -17
  12. data/lib/rumale/clustering/spectral_clustering.rb +0 -17
  13. data/lib/rumale/evaluation_measure/function.rb +34 -0
  14. data/lib/rumale/kernel_approximation/rbf.rb +0 -19
  15. data/lib/rumale/kernel_machine/kernel_pca.rb +0 -21
  16. data/lib/rumale/kernel_machine/kernel_ridge.rb +0 -15
  17. data/lib/rumale/kernel_machine/kernel_svc.rb +0 -21
  18. data/lib/rumale/naive_bayes/base_naive_bayes.rb +47 -0
  19. data/lib/rumale/naive_bayes/bernoulli_nb.rb +82 -0
  20. data/lib/rumale/naive_bayes/complement_nb.rb +85 -0
  21. data/lib/rumale/naive_bayes/gaussian_nb.rb +69 -0
  22. data/lib/rumale/naive_bayes/multinomial_nb.rb +74 -0
  23. data/lib/rumale/naive_bayes/negation_nb.rb +71 -0
  24. data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +0 -19
  25. data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +0 -17
  26. data/lib/rumale/neural_network/adam.rb +0 -19
  27. data/lib/rumale/preprocessing/bin_discretizer.rb +0 -15
  28. data/lib/rumale/preprocessing/label_binarizer.rb +0 -15
  29. data/lib/rumale/preprocessing/label_encoder.rb +0 -15
  30. data/lib/rumale/preprocessing/max_abs_scaler.rb +0 -15
  31. data/lib/rumale/preprocessing/min_max_scaler.rb +0 -17
  32. data/lib/rumale/preprocessing/one_hot_encoder.rb +0 -19
  33. data/lib/rumale/preprocessing/ordinal_encoder.rb +0 -13
  34. data/lib/rumale/preprocessing/standard_scaler.rb +0 -15
  35. data/lib/rumale/version.rb +1 -1
  36. metadata +8 -3
  37. data/lib/rumale/naive_bayes/naive_bayes.rb +0 -250
@@ -88,21 +88,6 @@ module Rumale
88
88
  end
89
89
  transformed
90
90
  end
91
-
92
- # Dump marshal data.
93
- # @return [Hash] The marshal data about BinDiscretizer
94
- def marshal_dump
95
- { params: @params,
96
- feature_steps: @feature_steps }
97
- end
98
-
99
- # Load marshal data.
100
- # @return [nil]
101
- def marshal_load(obj)
102
- @params = obj[:params]
103
- @feature_steps = obj[:feature_steps]
104
- nil
105
- end
106
91
  end
107
92
  end
108
93
  end
@@ -84,21 +84,6 @@ module Rumale
84
84
  n_samples = x.shape[0]
85
85
  Array.new(n_samples) { |n| @classes[x[n, true].ne(@params[:neg_label]).where[0]] }
86
86
  end
87
-
88
- # Dump marshal data.
89
- # @return [Hash] The marshal data about LabelBinarizer.
90
- def marshal_dump
91
- { params: @params,
92
- classes: @classes }
93
- end
94
-
95
- # Load marshal data.
96
- # @return [nil]
97
- def marshal_load(obj)
98
- @params = obj[:params]
99
- @classes = obj[:classes]
100
- nil
101
- end
102
87
  end
103
88
  end
104
89
  end
@@ -74,21 +74,6 @@ module Rumale
74
74
  x = check_convert_label_array(x)
75
75
  x.to_a.map { |n| @classes[n] }
76
76
  end
77
-
78
- # Dump marshal data.
79
- # @return [Hash] The marshal data about LabelEncoder
80
- def marshal_dump
81
- { params: @params,
82
- classes: @classes }
83
- end
84
-
85
- # Load marshal data.
86
- # @return [nil]
87
- def marshal_load(obj)
88
- @params = obj[:params]
89
- @classes = obj[:classes]
90
- nil
91
- end
92
77
  end
93
78
  end
94
79
  end
@@ -56,21 +56,6 @@ module Rumale
56
56
  x = check_convert_sample_array(x)
57
57
  x / @max_abs_vec
58
58
  end
59
-
60
- # Dump marshal data.
61
- # @return [Hash] The marshal data about MaxAbsScaler.
62
- def marshal_dump
63
- { params: @params,
64
- max_abs_vec: @max_abs_vec }
65
- end
66
-
67
- # Load marshal data.
68
- # @return [nil]
69
- def marshal_load(obj)
70
- @params = obj[:params]
71
- @max_abs_vec = obj[:max_abs_vec]
72
- nil
73
- end
74
59
  end
75
60
  end
76
61
  end
@@ -71,23 +71,6 @@ module Rumale
71
71
  nx = (x - @min_vec.tile(n_samples, 1)) / dif_vec.tile(n_samples, 1)
72
72
  nx * (@params[:feature_range][1] - @params[:feature_range][0]) + @params[:feature_range][0]
73
73
  end
74
-
75
- # Dump marshal data.
76
- # @return [Hash] The marshal data about MinMaxScaler.
77
- def marshal_dump
78
- { params: @params,
79
- min_vec: @min_vec,
80
- max_vec: @max_vec }
81
- end
82
-
83
- # Load marshal data.
84
- # @return [nil]
85
- def marshal_load(obj)
86
- @params = obj[:params]
87
- @min_vec = obj[:min_vec]
88
- @max_vec = obj[:max_vec]
89
- nil
90
- end
91
74
  end
92
75
  end
93
76
  end
@@ -81,25 +81,6 @@ module Rumale
81
81
  codes[true, @active_features].dup
82
82
  end
83
83
 
84
- # Dump marshal data.
85
- # @return [Hash] The marshal data about OneHotEncoder.
86
- def marshal_dump
87
- { params: @params,
88
- n_values: @n_values,
89
- active_features: @active_features,
90
- feature_indices: @feature_indices }
91
- end
92
-
93
- # Load marshal data.
94
- # @return [nil]
95
- def marshal_load(obj)
96
- @params = obj[:params]
97
- @n_values = obj[:n_values]
98
- @active_features = obj[:active_features]
99
- @feature_indices = obj[:feature_indices]
100
- nil
101
- end
102
-
103
84
  private
104
85
 
105
86
  def encode(x, indices)
@@ -102,19 +102,6 @@ module Rumale
102
102
 
103
103
  Numo::NArray.asarray(inv_transformed.transpose)
104
104
  end
105
-
106
- # Dump marshal data.
107
- # @return [Hash] The marshal data about OrdinalEncoder.
108
- def marshal_dump
109
- { categories: @categories }
110
- end
111
-
112
- # Load marshal data.
113
- # @return [nil]
114
- def marshal_load(obj)
115
- @categories = obj[:categories]
116
- nil
117
- end
118
105
  end
119
106
  end
120
107
  end
@@ -66,21 +66,6 @@ module Rumale
66
66
  n_samples, = x.shape
67
67
  (x - @mean_vec.tile(n_samples, 1)) / @std_vec.tile(n_samples, 1)
68
68
  end
69
-
70
- # Dump marshal data.
71
- # @return [Hash] The marshal data about StandardScaler.
72
- def marshal_dump
73
- { mean_vec: @mean_vec,
74
- std_vec: @std_vec }
75
- end
76
-
77
- # Load marshal data.
78
- # @return [nil]
79
- def marshal_load(obj)
80
- @mean_vec = obj[:mean_vec]
81
- @std_vec = obj[:std_vec]
82
- nil
83
- end
84
69
  end
85
70
  end
86
71
  end
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.18.1'
6
+ VERSION = '0.18.2'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.18.1
4
+ version: 0.18.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-03-14 00:00:00.000000000 Z
11
+ date: 2020-03-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -266,7 +266,12 @@ files:
266
266
  - lib/rumale/model_selection/stratified_k_fold.rb
267
267
  - lib/rumale/model_selection/stratified_shuffle_split.rb
268
268
  - lib/rumale/multiclass/one_vs_rest_classifier.rb
269
- - lib/rumale/naive_bayes/naive_bayes.rb
269
+ - lib/rumale/naive_bayes/base_naive_bayes.rb
270
+ - lib/rumale/naive_bayes/bernoulli_nb.rb
271
+ - lib/rumale/naive_bayes/complement_nb.rb
272
+ - lib/rumale/naive_bayes/gaussian_nb.rb
273
+ - lib/rumale/naive_bayes/multinomial_nb.rb
274
+ - lib/rumale/naive_bayes/negation_nb.rb
270
275
  - lib/rumale/nearest_neighbors/k_neighbors_classifier.rb
271
276
  - lib/rumale/nearest_neighbors/k_neighbors_regressor.rb
272
277
  - lib/rumale/nearest_neighbors/vp_tree.rb
@@ -1,250 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/classifier'
5
-
6
- module Rumale
7
- # This module consists of the classes that implement naive bayes models.
8
- module NaiveBayes
9
- # BaseNaiveBayes is a class that has methods for common processes of naive bayes classifier.
10
- class BaseNaiveBayes
11
- include Base::BaseEstimator
12
- include Base::Classifier
13
-
14
- # Predict class labels for samples.
15
- #
16
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
17
- # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
18
- def predict(x)
19
- x = check_convert_sample_array(x)
20
- n_samples = x.shape.first
21
- decision_values = decision_function(x)
22
- Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
23
- end
24
-
25
- # Predict log-probability for samples.
26
- #
27
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
28
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
29
- def predict_log_proba(x)
30
- x = check_convert_sample_array(x)
31
- n_samples, = x.shape
32
- log_likelihoods = decision_function(x)
33
- log_likelihoods - Numo::NMath.log(Numo::NMath.exp(log_likelihoods).sum(1)).reshape(n_samples, 1)
34
- end
35
-
36
- # Predict probability for samples.
37
- #
38
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
39
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
40
- def predict_proba(x)
41
- x = check_convert_sample_array(x)
42
- Numo::NMath.exp(predict_log_proba(x)).abs
43
- end
44
- end
45
-
46
- # GaussianNB is a class that implements Gaussian Naive Bayes classifier.
47
- #
48
- # @example
49
- # estimator = Rumale::NaiveBayes::GaussianNB.new
50
- # estimator.fit(training_samples, training_labels)
51
- # results = estimator.predict(testing_samples)
52
- class GaussianNB < BaseNaiveBayes
53
- # Return the class labels.
54
- # @return [Numo::Int32] (size: n_classes)
55
- attr_reader :classes
56
-
57
- # Return the prior probabilities of the classes.
58
- # @return [Numo::DFloat] (shape: [n_classes])
59
- attr_reader :class_priors
60
-
61
- # Return the mean vectors of the classes.
62
- # @return [Numo::DFloat] (shape: [n_classes, n_features])
63
- attr_reader :means
64
-
65
- # Return the variance vectors of the classes.
66
- # @return [Numo::DFloat] (shape: [n_classes, n_features])
67
- attr_reader :variances
68
-
69
- # Create a new classifier with Gaussian Naive Bayes.
70
- def initialize
71
- @params = {}
72
- end
73
-
74
- # Fit the model with given training data.
75
- #
76
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
77
- # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
78
- # to be used for fitting the model.
79
- # @return [GaussianNB] The learned classifier itself.
80
- def fit(x, y)
81
- x = check_convert_sample_array(x)
82
- y = check_convert_label_array(y)
83
- check_sample_label_size(x, y)
84
- n_samples, = x.shape
85
- @classes = Numo::Int32[*y.to_a.uniq.sort]
86
- @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
87
- @means = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].mean(0) }]
88
- @variances = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].var(0) }]
89
- self
90
- end
91
-
92
- # Calculate confidence scores for samples.
93
- #
94
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
95
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
96
- def decision_function(x)
97
- x = check_convert_sample_array(x)
98
- n_classes = @classes.size
99
- log_likelihoods = Array.new(n_classes) do |l|
100
- Math.log(@class_priors[l]) - 0.5 * (
101
- Numo::NMath.log(2.0 * Math::PI * @variances[l, true]) +
102
- ((x - @means[l, true])**2 / @variances[l, true])).sum(1)
103
- end
104
- Numo::DFloat[*log_likelihoods].transpose
105
- end
106
- end
107
-
108
- # MultinomialNB is a class that implements Multinomial Naive Bayes classifier.
109
- #
110
- # @example
111
- # estimator = Rumale::NaiveBayes::MultinomialNB.new(smoothing_param: 1.0)
112
- # estimator.fit(training_samples, training_labels)
113
- # results = estimator.predict(testing_samples)
114
- #
115
- # *Reference*
116
- # - C D. Manning, P. Raghavan, and H. Schutze, "Introduction to Information Retrieval," Cambridge University Press., 2008.
117
- class MultinomialNB < BaseNaiveBayes
118
- # Return the class labels.
119
- # @return [Numo::Int32] (size: n_classes)
120
- attr_reader :classes
121
-
122
- # Return the prior probabilities of the classes.
123
- # @return [Numo::DFloat] (shape: [n_classes])
124
- attr_reader :class_priors
125
-
126
- # Return the conditional probabilities for features of each class.
127
- # @return [Numo::DFloat] (shape: [n_classes, n_features])
128
- attr_reader :feature_probs
129
-
130
- # Create a new classifier with Multinomial Naive Bayes.
131
- #
132
- # @param smoothing_param [Float] The Laplace smoothing parameter.
133
- def initialize(smoothing_param: 1.0)
134
- check_params_numeric(smoothing_param: smoothing_param)
135
- check_params_positive(smoothing_param: smoothing_param)
136
- @params = {}
137
- @params[:smoothing_param] = smoothing_param
138
- end
139
-
140
- # Fit the model with given training data.
141
- #
142
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
143
- # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
144
- # to be used for fitting the model.
145
- # @return [MultinomialNB] The learned classifier itself.
146
- def fit(x, y)
147
- x = check_convert_sample_array(x)
148
- y = check_convert_label_array(y)
149
- check_sample_label_size(x, y)
150
- n_samples, = x.shape
151
- @classes = Numo::Int32[*y.to_a.uniq.sort]
152
- @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
153
- count_features = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].sum(0) }]
154
- count_features += @params[:smoothing_param]
155
- n_classes = @classes.size
156
- @feature_probs = count_features / count_features.sum(1).reshape(n_classes, 1)
157
- self
158
- end
159
-
160
- # Calculate confidence scores for samples.
161
- #
162
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
163
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
164
- def decision_function(x)
165
- x = check_convert_sample_array(x)
166
- n_classes = @classes.size
167
- bin_x = x.gt(0)
168
- log_likelihoods = Array.new(n_classes) do |l|
169
- Math.log(@class_priors[l]) + (Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(1)
170
- end
171
- Numo::DFloat[*log_likelihoods].transpose
172
- end
173
- end
174
-
175
- # BernoulliNB is a class that implements Bernoulli Naive Bayes classifier.
176
- #
177
- # @example
178
- # estimator = Rumale::NaiveBayes::BernoulliNB.new(smoothing_param: 1.0, bin_threshold: 0.0)
179
- # estimator.fit(training_samples, training_labels)
180
- # results = estimator.predict(testing_samples)
181
- #
182
- # *Reference*
183
- # - C D. Manning, P. Raghavan, and H. Schutze, "Introduction to Information Retrieval," Cambridge University Press., 2008.
184
- class BernoulliNB < BaseNaiveBayes
185
- # Return the class labels.
186
- # @return [Numo::Int32] (size: n_classes)
187
- attr_reader :classes
188
-
189
- # Return the prior probabilities of the classes.
190
- # @return [Numo::DFloat] (shape: [n_classes])
191
- attr_reader :class_priors
192
-
193
- # Return the conditional probabilities for features of each class.
194
- # @return [Numo::DFloat] (shape: [n_classes, n_features])
195
- attr_reader :feature_probs
196
-
197
- # Create a new classifier with Bernoulli Naive Bayes.
198
- #
199
- # @param smoothing_param [Float] The Laplace smoothing parameter.
200
- # @param bin_threshold [Float] The threshold for binarizing of features.
201
- def initialize(smoothing_param: 1.0, bin_threshold: 0.0)
202
- check_params_numeric(smoothing_param: smoothing_param, bin_threshold: bin_threshold)
203
- check_params_positive(smoothing_param: smoothing_param)
204
- @params = {}
205
- @params[:smoothing_param] = smoothing_param
206
- @params[:bin_threshold] = bin_threshold
207
- end
208
-
209
- # Fit the model with given training data.
210
- #
211
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
212
- # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
213
- # to be used for fitting the model.
214
- # @return [BernoulliNB] The learned classifier itself.
215
- def fit(x, y)
216
- x = check_convert_sample_array(x)
217
- y = check_convert_label_array(y)
218
- check_sample_label_size(x, y)
219
- n_samples, = x.shape
220
- bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
221
- @classes = Numo::Int32[*y.to_a.uniq.sort]
222
- n_samples_each_class = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count.to_f }]
223
- @class_priors = n_samples_each_class / n_samples
224
- count_features = Numo::DFloat[*@classes.to_a.map { |l| bin_x[y.eq(l).where, true].sum(0) }]
225
- count_features += @params[:smoothing_param]
226
- n_samples_each_class += 2.0 * @params[:smoothing_param]
227
- n_classes = @classes.size
228
- @feature_probs = count_features / n_samples_each_class.reshape(n_classes, 1)
229
- self
230
- end
231
-
232
- # Calculate confidence scores for samples.
233
- #
234
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
235
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
236
- def decision_function(x)
237
- x = check_convert_sample_array(x)
238
- n_classes = @classes.size
239
- bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
240
- not_bin_x = Numo::DFloat[*x.le(@params[:bin_threshold])]
241
- log_likelihoods = Array.new(n_classes) do |l|
242
- Math.log(@class_priors[l]) + (
243
- (Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(1)
244
- (Numo::DFloat[*not_bin_x] * Numo::NMath.log(1.0 - @feature_probs[l, true])).sum(1))
245
- end
246
- Numo::DFloat[*log_likelihoods].transpose
247
- end
248
- end
249
- end
250
- end