rumale 0.18.1 → 0.18.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +0 -1
  3. data/CHANGELOG.md +16 -4
  4. data/lib/rumale.rb +6 -1
  5. data/lib/rumale/clustering/dbscan.rb +0 -17
  6. data/lib/rumale/clustering/gaussian_mixture.rb +0 -21
  7. data/lib/rumale/clustering/hdbscan.rb +0 -15
  8. data/lib/rumale/clustering/k_means.rb +0 -17
  9. data/lib/rumale/clustering/k_medoids.rb +0 -19
  10. data/lib/rumale/clustering/power_iteration.rb +0 -19
  11. data/lib/rumale/clustering/single_linkage.rb +0 -17
  12. data/lib/rumale/clustering/spectral_clustering.rb +0 -17
  13. data/lib/rumale/evaluation_measure/function.rb +34 -0
  14. data/lib/rumale/kernel_approximation/rbf.rb +0 -19
  15. data/lib/rumale/kernel_machine/kernel_pca.rb +0 -21
  16. data/lib/rumale/kernel_machine/kernel_ridge.rb +0 -15
  17. data/lib/rumale/kernel_machine/kernel_svc.rb +0 -21
  18. data/lib/rumale/naive_bayes/base_naive_bayes.rb +47 -0
  19. data/lib/rumale/naive_bayes/bernoulli_nb.rb +82 -0
  20. data/lib/rumale/naive_bayes/complement_nb.rb +85 -0
  21. data/lib/rumale/naive_bayes/gaussian_nb.rb +69 -0
  22. data/lib/rumale/naive_bayes/multinomial_nb.rb +74 -0
  23. data/lib/rumale/naive_bayes/negation_nb.rb +71 -0
  24. data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +0 -19
  25. data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +0 -17
  26. data/lib/rumale/neural_network/adam.rb +0 -19
  27. data/lib/rumale/preprocessing/bin_discretizer.rb +0 -15
  28. data/lib/rumale/preprocessing/label_binarizer.rb +0 -15
  29. data/lib/rumale/preprocessing/label_encoder.rb +0 -15
  30. data/lib/rumale/preprocessing/max_abs_scaler.rb +0 -15
  31. data/lib/rumale/preprocessing/min_max_scaler.rb +0 -17
  32. data/lib/rumale/preprocessing/one_hot_encoder.rb +0 -19
  33. data/lib/rumale/preprocessing/ordinal_encoder.rb +0 -13
  34. data/lib/rumale/preprocessing/standard_scaler.rb +0 -15
  35. data/lib/rumale/version.rb +1 -1
  36. metadata +8 -3
  37. data/lib/rumale/naive_bayes/naive_bayes.rb +0 -250
@@ -88,21 +88,6 @@ module Rumale
88
88
  end
89
89
  transformed
90
90
  end
91
-
92
- # Dump marshal data.
93
- # @return [Hash] The marshal data about BinDiscretizer
94
- def marshal_dump
95
- { params: @params,
96
- feature_steps: @feature_steps }
97
- end
98
-
99
- # Load marshal data.
100
- # @return [nil]
101
- def marshal_load(obj)
102
- @params = obj[:params]
103
- @feature_steps = obj[:feature_steps]
104
- nil
105
- end
106
91
  end
107
92
  end
108
93
  end
@@ -84,21 +84,6 @@ module Rumale
84
84
  n_samples = x.shape[0]
85
85
  Array.new(n_samples) { |n| @classes[x[n, true].ne(@params[:neg_label]).where[0]] }
86
86
  end
87
-
88
- # Dump marshal data.
89
- # @return [Hash] The marshal data about LabelBinarizer.
90
- def marshal_dump
91
- { params: @params,
92
- classes: @classes }
93
- end
94
-
95
- # Load marshal data.
96
- # @return [nil]
97
- def marshal_load(obj)
98
- @params = obj[:params]
99
- @classes = obj[:classes]
100
- nil
101
- end
102
87
  end
103
88
  end
104
89
  end
@@ -74,21 +74,6 @@ module Rumale
74
74
  x = check_convert_label_array(x)
75
75
  x.to_a.map { |n| @classes[n] }
76
76
  end
77
-
78
- # Dump marshal data.
79
- # @return [Hash] The marshal data about LabelEncoder
80
- def marshal_dump
81
- { params: @params,
82
- classes: @classes }
83
- end
84
-
85
- # Load marshal data.
86
- # @return [nil]
87
- def marshal_load(obj)
88
- @params = obj[:params]
89
- @classes = obj[:classes]
90
- nil
91
- end
92
77
  end
93
78
  end
94
79
  end
@@ -56,21 +56,6 @@ module Rumale
56
56
  x = check_convert_sample_array(x)
57
57
  x / @max_abs_vec
58
58
  end
59
-
60
- # Dump marshal data.
61
- # @return [Hash] The marshal data about MaxAbsScaler.
62
- def marshal_dump
63
- { params: @params,
64
- max_abs_vec: @max_abs_vec }
65
- end
66
-
67
- # Load marshal data.
68
- # @return [nil]
69
- def marshal_load(obj)
70
- @params = obj[:params]
71
- @max_abs_vec = obj[:max_abs_vec]
72
- nil
73
- end
74
59
  end
75
60
  end
76
61
  end
@@ -71,23 +71,6 @@ module Rumale
71
71
  nx = (x - @min_vec.tile(n_samples, 1)) / dif_vec.tile(n_samples, 1)
72
72
  nx * (@params[:feature_range][1] - @params[:feature_range][0]) + @params[:feature_range][0]
73
73
  end
74
-
75
- # Dump marshal data.
76
- # @return [Hash] The marshal data about MinMaxScaler.
77
- def marshal_dump
78
- { params: @params,
79
- min_vec: @min_vec,
80
- max_vec: @max_vec }
81
- end
82
-
83
- # Load marshal data.
84
- # @return [nil]
85
- def marshal_load(obj)
86
- @params = obj[:params]
87
- @min_vec = obj[:min_vec]
88
- @max_vec = obj[:max_vec]
89
- nil
90
- end
91
74
  end
92
75
  end
93
76
  end
@@ -81,25 +81,6 @@ module Rumale
81
81
  codes[true, @active_features].dup
82
82
  end
83
83
 
84
- # Dump marshal data.
85
- # @return [Hash] The marshal data about OneHotEncoder.
86
- def marshal_dump
87
- { params: @params,
88
- n_values: @n_values,
89
- active_features: @active_features,
90
- feature_indices: @feature_indices }
91
- end
92
-
93
- # Load marshal data.
94
- # @return [nil]
95
- def marshal_load(obj)
96
- @params = obj[:params]
97
- @n_values = obj[:n_values]
98
- @active_features = obj[:active_features]
99
- @feature_indices = obj[:feature_indices]
100
- nil
101
- end
102
-
103
84
  private
104
85
 
105
86
  def encode(x, indices)
@@ -102,19 +102,6 @@ module Rumale
102
102
 
103
103
  Numo::NArray.asarray(inv_transformed.transpose)
104
104
  end
105
-
106
- # Dump marshal data.
107
- # @return [Hash] The marshal data about OrdinalEncoder.
108
- def marshal_dump
109
- { categories: @categories }
110
- end
111
-
112
- # Load marshal data.
113
- # @return [nil]
114
- def marshal_load(obj)
115
- @categories = obj[:categories]
116
- nil
117
- end
118
105
  end
119
106
  end
120
107
  end
@@ -66,21 +66,6 @@ module Rumale
66
66
  n_samples, = x.shape
67
67
  (x - @mean_vec.tile(n_samples, 1)) / @std_vec.tile(n_samples, 1)
68
68
  end
69
-
70
- # Dump marshal data.
71
- # @return [Hash] The marshal data about StandardScaler.
72
- def marshal_dump
73
- { mean_vec: @mean_vec,
74
- std_vec: @std_vec }
75
- end
76
-
77
- # Load marshal data.
78
- # @return [nil]
79
- def marshal_load(obj)
80
- @mean_vec = obj[:mean_vec]
81
- @std_vec = obj[:std_vec]
82
- nil
83
- end
84
69
  end
85
70
  end
86
71
  end
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.18.1'
6
+ VERSION = '0.18.2'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.18.1
4
+ version: 0.18.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-03-14 00:00:00.000000000 Z
11
+ date: 2020-03-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -266,7 +266,12 @@ files:
266
266
  - lib/rumale/model_selection/stratified_k_fold.rb
267
267
  - lib/rumale/model_selection/stratified_shuffle_split.rb
268
268
  - lib/rumale/multiclass/one_vs_rest_classifier.rb
269
- - lib/rumale/naive_bayes/naive_bayes.rb
269
+ - lib/rumale/naive_bayes/base_naive_bayes.rb
270
+ - lib/rumale/naive_bayes/bernoulli_nb.rb
271
+ - lib/rumale/naive_bayes/complement_nb.rb
272
+ - lib/rumale/naive_bayes/gaussian_nb.rb
273
+ - lib/rumale/naive_bayes/multinomial_nb.rb
274
+ - lib/rumale/naive_bayes/negation_nb.rb
270
275
  - lib/rumale/nearest_neighbors/k_neighbors_classifier.rb
271
276
  - lib/rumale/nearest_neighbors/k_neighbors_regressor.rb
272
277
  - lib/rumale/nearest_neighbors/vp_tree.rb
@@ -1,250 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/base/classifier'
5
-
6
- module Rumale
7
- # This module consists of the classes that implement naive bayes models.
8
- module NaiveBayes
9
- # BaseNaiveBayes is a class that has methods for common processes of naive bayes classifier.
10
- class BaseNaiveBayes
11
- include Base::BaseEstimator
12
- include Base::Classifier
13
-
14
- # Predict class labels for samples.
15
- #
16
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
17
- # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
18
- def predict(x)
19
- x = check_convert_sample_array(x)
20
- n_samples = x.shape.first
21
- decision_values = decision_function(x)
22
- Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
23
- end
24
-
25
- # Predict log-probability for samples.
26
- #
27
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
28
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
29
- def predict_log_proba(x)
30
- x = check_convert_sample_array(x)
31
- n_samples, = x.shape
32
- log_likelihoods = decision_function(x)
33
- log_likelihoods - Numo::NMath.log(Numo::NMath.exp(log_likelihoods).sum(1)).reshape(n_samples, 1)
34
- end
35
-
36
- # Predict probability for samples.
37
- #
38
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
39
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
40
- def predict_proba(x)
41
- x = check_convert_sample_array(x)
42
- Numo::NMath.exp(predict_log_proba(x)).abs
43
- end
44
- end
45
-
46
- # GaussianNB is a class that implements Gaussian Naive Bayes classifier.
47
- #
48
- # @example
49
- # estimator = Rumale::NaiveBayes::GaussianNB.new
50
- # estimator.fit(training_samples, training_labels)
51
- # results = estimator.predict(testing_samples)
52
- class GaussianNB < BaseNaiveBayes
53
- # Return the class labels.
54
- # @return [Numo::Int32] (size: n_classes)
55
- attr_reader :classes
56
-
57
- # Return the prior probabilities of the classes.
58
- # @return [Numo::DFloat] (shape: [n_classes])
59
- attr_reader :class_priors
60
-
61
- # Return the mean vectors of the classes.
62
- # @return [Numo::DFloat] (shape: [n_classes, n_features])
63
- attr_reader :means
64
-
65
- # Return the variance vectors of the classes.
66
- # @return [Numo::DFloat] (shape: [n_classes, n_features])
67
- attr_reader :variances
68
-
69
- # Create a new classifier with Gaussian Naive Bayes.
70
- def initialize
71
- @params = {}
72
- end
73
-
74
- # Fit the model with given training data.
75
- #
76
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
77
- # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
78
- # to be used for fitting the model.
79
- # @return [GaussianNB] The learned classifier itself.
80
- def fit(x, y)
81
- x = check_convert_sample_array(x)
82
- y = check_convert_label_array(y)
83
- check_sample_label_size(x, y)
84
- n_samples, = x.shape
85
- @classes = Numo::Int32[*y.to_a.uniq.sort]
86
- @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
87
- @means = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].mean(0) }]
88
- @variances = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].var(0) }]
89
- self
90
- end
91
-
92
- # Calculate confidence scores for samples.
93
- #
94
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
95
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
96
- def decision_function(x)
97
- x = check_convert_sample_array(x)
98
- n_classes = @classes.size
99
- log_likelihoods = Array.new(n_classes) do |l|
100
- Math.log(@class_priors[l]) - 0.5 * (
101
- Numo::NMath.log(2.0 * Math::PI * @variances[l, true]) +
102
- ((x - @means[l, true])**2 / @variances[l, true])).sum(1)
103
- end
104
- Numo::DFloat[*log_likelihoods].transpose
105
- end
106
- end
107
-
108
- # MultinomialNB is a class that implements Multinomial Naive Bayes classifier.
109
- #
110
- # @example
111
- # estimator = Rumale::NaiveBayes::MultinomialNB.new(smoothing_param: 1.0)
112
- # estimator.fit(training_samples, training_labels)
113
- # results = estimator.predict(testing_samples)
114
- #
115
- # *Reference*
116
- # - C D. Manning, P. Raghavan, and H. Schutze, "Introduction to Information Retrieval," Cambridge University Press., 2008.
117
- class MultinomialNB < BaseNaiveBayes
118
- # Return the class labels.
119
- # @return [Numo::Int32] (size: n_classes)
120
- attr_reader :classes
121
-
122
- # Return the prior probabilities of the classes.
123
- # @return [Numo::DFloat] (shape: [n_classes])
124
- attr_reader :class_priors
125
-
126
- # Return the conditional probabilities for features of each class.
127
- # @return [Numo::DFloat] (shape: [n_classes, n_features])
128
- attr_reader :feature_probs
129
-
130
- # Create a new classifier with Multinomial Naive Bayes.
131
- #
132
- # @param smoothing_param [Float] The Laplace smoothing parameter.
133
- def initialize(smoothing_param: 1.0)
134
- check_params_numeric(smoothing_param: smoothing_param)
135
- check_params_positive(smoothing_param: smoothing_param)
136
- @params = {}
137
- @params[:smoothing_param] = smoothing_param
138
- end
139
-
140
- # Fit the model with given training data.
141
- #
142
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
143
- # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
144
- # to be used for fitting the model.
145
- # @return [MultinomialNB] The learned classifier itself.
146
- def fit(x, y)
147
- x = check_convert_sample_array(x)
148
- y = check_convert_label_array(y)
149
- check_sample_label_size(x, y)
150
- n_samples, = x.shape
151
- @classes = Numo::Int32[*y.to_a.uniq.sort]
152
- @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
153
- count_features = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].sum(0) }]
154
- count_features += @params[:smoothing_param]
155
- n_classes = @classes.size
156
- @feature_probs = count_features / count_features.sum(1).reshape(n_classes, 1)
157
- self
158
- end
159
-
160
- # Calculate confidence scores for samples.
161
- #
162
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
163
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
164
- def decision_function(x)
165
- x = check_convert_sample_array(x)
166
- n_classes = @classes.size
167
- bin_x = x.gt(0)
168
- log_likelihoods = Array.new(n_classes) do |l|
169
- Math.log(@class_priors[l]) + (Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(1)
170
- end
171
- Numo::DFloat[*log_likelihoods].transpose
172
- end
173
- end
174
-
175
- # BernoulliNB is a class that implements Bernoulli Naive Bayes classifier.
176
- #
177
- # @example
178
- # estimator = Rumale::NaiveBayes::BernoulliNB.new(smoothing_param: 1.0, bin_threshold: 0.0)
179
- # estimator.fit(training_samples, training_labels)
180
- # results = estimator.predict(testing_samples)
181
- #
182
- # *Reference*
183
- # - C D. Manning, P. Raghavan, and H. Schutze, "Introduction to Information Retrieval," Cambridge University Press., 2008.
184
- class BernoulliNB < BaseNaiveBayes
185
- # Return the class labels.
186
- # @return [Numo::Int32] (size: n_classes)
187
- attr_reader :classes
188
-
189
- # Return the prior probabilities of the classes.
190
- # @return [Numo::DFloat] (shape: [n_classes])
191
- attr_reader :class_priors
192
-
193
- # Return the conditional probabilities for features of each class.
194
- # @return [Numo::DFloat] (shape: [n_classes, n_features])
195
- attr_reader :feature_probs
196
-
197
- # Create a new classifier with Bernoulli Naive Bayes.
198
- #
199
- # @param smoothing_param [Float] The Laplace smoothing parameter.
200
- # @param bin_threshold [Float] The threshold for binarizing of features.
201
- def initialize(smoothing_param: 1.0, bin_threshold: 0.0)
202
- check_params_numeric(smoothing_param: smoothing_param, bin_threshold: bin_threshold)
203
- check_params_positive(smoothing_param: smoothing_param)
204
- @params = {}
205
- @params[:smoothing_param] = smoothing_param
206
- @params[:bin_threshold] = bin_threshold
207
- end
208
-
209
- # Fit the model with given training data.
210
- #
211
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
212
- # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
213
- # to be used for fitting the model.
214
- # @return [BernoulliNB] The learned classifier itself.
215
- def fit(x, y)
216
- x = check_convert_sample_array(x)
217
- y = check_convert_label_array(y)
218
- check_sample_label_size(x, y)
219
- n_samples, = x.shape
220
- bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
221
- @classes = Numo::Int32[*y.to_a.uniq.sort]
222
- n_samples_each_class = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count.to_f }]
223
- @class_priors = n_samples_each_class / n_samples
224
- count_features = Numo::DFloat[*@classes.to_a.map { |l| bin_x[y.eq(l).where, true].sum(0) }]
225
- count_features += @params[:smoothing_param]
226
- n_samples_each_class += 2.0 * @params[:smoothing_param]
227
- n_classes = @classes.size
228
- @feature_probs = count_features / n_samples_each_class.reshape(n_classes, 1)
229
- self
230
- end
231
-
232
- # Calculate confidence scores for samples.
233
- #
234
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
235
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
236
- def decision_function(x)
237
- x = check_convert_sample_array(x)
238
- n_classes = @classes.size
239
- bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
240
- not_bin_x = Numo::DFloat[*x.le(@params[:bin_threshold])]
241
- log_likelihoods = Array.new(n_classes) do |l|
242
- Math.log(@class_priors[l]) + (
243
- (Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(1)
244
- (Numo::DFloat[*not_bin_x] * Numo::NMath.log(1.0 - @feature_probs[l, true])).sum(1))
245
- end
246
- Numo::DFloat[*log_likelihoods].transpose
247
- end
248
- end
249
- end
250
- end