rumale 0.18.1 → 0.18.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +0 -1
  3. data/CHANGELOG.md +16 -4
  4. data/lib/rumale.rb +6 -1
  5. data/lib/rumale/clustering/dbscan.rb +0 -17
  6. data/lib/rumale/clustering/gaussian_mixture.rb +0 -21
  7. data/lib/rumale/clustering/hdbscan.rb +0 -15
  8. data/lib/rumale/clustering/k_means.rb +0 -17
  9. data/lib/rumale/clustering/k_medoids.rb +0 -19
  10. data/lib/rumale/clustering/power_iteration.rb +0 -19
  11. data/lib/rumale/clustering/single_linkage.rb +0 -17
  12. data/lib/rumale/clustering/spectral_clustering.rb +0 -17
  13. data/lib/rumale/evaluation_measure/function.rb +34 -0
  14. data/lib/rumale/kernel_approximation/rbf.rb +0 -19
  15. data/lib/rumale/kernel_machine/kernel_pca.rb +0 -21
  16. data/lib/rumale/kernel_machine/kernel_ridge.rb +0 -15
  17. data/lib/rumale/kernel_machine/kernel_svc.rb +0 -21
  18. data/lib/rumale/naive_bayes/base_naive_bayes.rb +47 -0
  19. data/lib/rumale/naive_bayes/bernoulli_nb.rb +82 -0
  20. data/lib/rumale/naive_bayes/complement_nb.rb +85 -0
  21. data/lib/rumale/naive_bayes/gaussian_nb.rb +69 -0
  22. data/lib/rumale/naive_bayes/multinomial_nb.rb +74 -0
  23. data/lib/rumale/naive_bayes/negation_nb.rb +71 -0
  24. data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +0 -19
  25. data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +0 -17
  26. data/lib/rumale/neural_network/adam.rb +0 -19
  27. data/lib/rumale/preprocessing/bin_discretizer.rb +0 -15
  28. data/lib/rumale/preprocessing/label_binarizer.rb +0 -15
  29. data/lib/rumale/preprocessing/label_encoder.rb +0 -15
  30. data/lib/rumale/preprocessing/max_abs_scaler.rb +0 -15
  31. data/lib/rumale/preprocessing/min_max_scaler.rb +0 -17
  32. data/lib/rumale/preprocessing/one_hot_encoder.rb +0 -19
  33. data/lib/rumale/preprocessing/ordinal_encoder.rb +0 -13
  34. data/lib/rumale/preprocessing/standard_scaler.rb +0 -15
  35. data/lib/rumale/version.rb +1 -1
  36. metadata +8 -3
  37. data/lib/rumale/naive_bayes/naive_bayes.rb +0 -250
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/naive_bayes/base_naive_bayes'
4
+
5
+ module Rumale
6
+ module NaiveBayes
7
+ # BernoulliNB is a class that implements Bernoulli Naive Bayes classifier.
8
+ #
9
+ # @example
10
+ # estimator = Rumale::NaiveBayes::BernoulliNB.new(smoothing_param: 1.0, bin_threshold: 0.0)
11
+ # estimator.fit(training_samples, training_labels)
12
+ # results = estimator.predict(testing_samples)
13
+ #
14
+ # *Reference*
15
+ # - C D. Manning, P. Raghavan, and H. Schutze, "Introduction to Information Retrieval," Cambridge University Press., 2008.
16
+ class BernoulliNB < BaseNaiveBayes
17
+ # Return the class labels.
18
+ # @return [Numo::Int32] (size: n_classes)
19
+ attr_reader :classes
20
+
21
+ # Return the prior probabilities of the classes.
22
+ # @return [Numo::DFloat] (shape: [n_classes])
23
+ attr_reader :class_priors
24
+
25
+ # Return the conditional probabilities for features of each class.
26
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
27
+ attr_reader :feature_probs
28
+
29
+ # Create a new classifier with Bernoulli Naive Bayes.
30
+ #
31
+ # @param smoothing_param [Float] The Laplace smoothing parameter.
32
+ # @param bin_threshold [Float] The threshold for binarizing of features.
33
+ def initialize(smoothing_param: 1.0, bin_threshold: 0.0)
34
+ check_params_numeric(smoothing_param: smoothing_param, bin_threshold: bin_threshold)
35
+ check_params_positive(smoothing_param: smoothing_param)
36
+ @params = {}
37
+ @params[:smoothing_param] = smoothing_param
38
+ @params[:bin_threshold] = bin_threshold
39
+ end
40
+
41
+ # Fit the model with given training data.
42
+ #
43
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
44
+ # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
45
+ # to be used for fitting the model.
46
+ # @return [BernoulliNB] The learned classifier itself.
47
+ def fit(x, y)
48
+ x = check_convert_sample_array(x)
49
+ y = check_convert_label_array(y)
50
+ check_sample_label_size(x, y)
51
+ n_samples, = x.shape
52
+ bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
53
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
54
+ n_samples_each_class = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count.to_f }]
55
+ @class_priors = n_samples_each_class / n_samples
56
+ count_features = Numo::DFloat[*@classes.to_a.map { |l| bin_x[y.eq(l).where, true].sum(0) }]
57
+ count_features += @params[:smoothing_param]
58
+ n_samples_each_class += 2.0 * @params[:smoothing_param]
59
+ n_classes = @classes.size
60
+ @feature_probs = count_features / n_samples_each_class.reshape(n_classes, 1)
61
+ self
62
+ end
63
+
64
+ # Calculate confidence scores for samples.
65
+ #
66
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
67
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
68
+ def decision_function(x)
69
+ x = check_convert_sample_array(x)
70
+ n_classes = @classes.size
71
+ bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
72
+ not_bin_x = Numo::DFloat[*x.le(@params[:bin_threshold])]
73
+ log_likelihoods = Array.new(n_classes) do |l|
74
+ Math.log(@class_priors[l]) + (
75
+ (Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(1)
76
+ (Numo::DFloat[*not_bin_x] * Numo::NMath.log(1.0 - @feature_probs[l, true])).sum(1))
77
+ end
78
+ Numo::DFloat[*log_likelihoods].transpose
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/naive_bayes/base_naive_bayes'
4
+
5
+ module Rumale
6
+ module NaiveBayes
7
+ # ComplementNB is a class that implements Complement Naive Bayes classifier.
8
+ #
9
+ # @example
10
+ # estimator = Rumale::NaiveBayes::ComplementNB.new(smoothing_param: 1.0)
11
+ # estimator.fit(training_samples, training_labels)
12
+ # results = estimator.predict(testing_samples)
13
+ #
14
+ # *Reference*
15
+ # - Rennie, J. D. M., Shih, L., Teevan, J., and Karger, D. R., "Tackling the Poor Assumptions of Naive Bayes Text Classifiers," ICML' 03, pp. 616--623, 2013.
16
+ class ComplementNB < BaseNaiveBayes
17
+ # Return the class labels.
18
+ # @return [Numo::Int32] (size: n_classes)
19
+ attr_reader :classes
20
+
21
+ # Return the prior probabilities of the classes.
22
+ # @return [Numo::DFloat] (shape: [n_classes])
23
+ attr_reader :class_priors
24
+
25
+ # Return the conditional probabilities for features of each class.
26
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
27
+ attr_reader :feature_probs
28
+
29
+ # Create a new classifier with Complement Naive Bayes.
30
+ #
31
+ # @param smoothing_param [Float] The smoothing parameter.
32
+ # @param norm [Boolean] The flag indicating whether to normlize the weight vectors.
33
+ def initialize(smoothing_param: 1.0, norm: false)
34
+ check_params_numeric(smoothing_param: smoothing_param)
35
+ check_params_positive(smoothing_param: smoothing_param)
36
+ check_params_boolean(norm: norm)
37
+ @params = {}
38
+ @params[:smoothing_param] = smoothing_param
39
+ @params[:norm] = norm
40
+ end
41
+
42
+ # Fit the model with given training data.
43
+ #
44
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
45
+ # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
46
+ # to be used for fitting the model.
47
+ # @return [ComplementNB] The learned classifier itself.
48
+ def fit(x, y)
49
+ x = check_convert_sample_array(x)
50
+ y = check_convert_label_array(y)
51
+ check_sample_label_size(x, y)
52
+ n_samples, = x.shape
53
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
54
+ @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count.fdiv(n_samples) }]
55
+ @class_log_probs = Numo::NMath.log(@class_priors)
56
+ compl_features = Numo::DFloat[*@classes.to_a.map { |l| x[y.ne(l).where, true].sum(0) }]
57
+ compl_features += @params[:smoothing_param]
58
+ n_classes = @classes.size
59
+ @feature_probs = compl_features / compl_features.sum(1).reshape(n_classes, 1)
60
+ feature_log_probs = Numo::NMath.log(@feature_probs)
61
+ @weights = if normalize?
62
+ feature_log_probs / feature_log_probs.sum(1).reshape(n_classes, 1)
63
+ else
64
+ -feature_log_probs
65
+ end
66
+ self
67
+ end
68
+
69
+ # Calculate confidence scores for samples.
70
+ #
71
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
72
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
73
+ def decision_function(x)
74
+ x = check_convert_sample_array(x)
75
+ @class_log_probs + x.dot(@weights.transpose)
76
+ end
77
+
78
+ private
79
+
80
+ def normalize?
81
+ @params[:norm] == true
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/naive_bayes/base_naive_bayes'
4
+
5
+ module Rumale
6
+ module NaiveBayes
7
+ # GaussianNB is a class that implements Gaussian Naive Bayes classifier.
8
+ #
9
+ # @example
10
+ # estimator = Rumale::NaiveBayes::GaussianNB.new
11
+ # estimator.fit(training_samples, training_labels)
12
+ # results = estimator.predict(testing_samples)
13
+ class GaussianNB < BaseNaiveBayes
14
+ # Return the class labels.
15
+ # @return [Numo::Int32] (size: n_classes)
16
+ attr_reader :classes
17
+
18
+ # Return the prior probabilities of the classes.
19
+ # @return [Numo::DFloat] (shape: [n_classes])
20
+ attr_reader :class_priors
21
+
22
+ # Return the mean vectors of the classes.
23
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
24
+ attr_reader :means
25
+
26
+ # Return the variance vectors of the classes.
27
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
28
+ attr_reader :variances
29
+
30
+ # Create a new classifier with Gaussian Naive Bayes.
31
+ def initialize
32
+ @params = {}
33
+ end
34
+
35
+ # Fit the model with given training data.
36
+ #
37
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
38
+ # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
39
+ # to be used for fitting the model.
40
+ # @return [GaussianNB] The learned classifier itself.
41
+ def fit(x, y)
42
+ x = check_convert_sample_array(x)
43
+ y = check_convert_label_array(y)
44
+ check_sample_label_size(x, y)
45
+ n_samples, = x.shape
46
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
47
+ @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
48
+ @means = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].mean(0) }]
49
+ @variances = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].var(0) }]
50
+ self
51
+ end
52
+
53
+ # Calculate confidence scores for samples.
54
+ #
55
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
56
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
57
+ def decision_function(x)
58
+ x = check_convert_sample_array(x)
59
+ n_classes = @classes.size
60
+ log_likelihoods = Array.new(n_classes) do |l|
61
+ Math.log(@class_priors[l]) - 0.5 * (
62
+ Numo::NMath.log(2.0 * Math::PI * @variances[l, true]) +
63
+ ((x - @means[l, true])**2 / @variances[l, true])).sum(1)
64
+ end
65
+ Numo::DFloat[*log_likelihoods].transpose
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/naive_bayes/base_naive_bayes'
4
+
5
+ module Rumale
6
+ module NaiveBayes
7
+ # MultinomialNB is a class that implements Multinomial Naive Bayes classifier.
8
+ #
9
+ # @example
10
+ # estimator = Rumale::NaiveBayes::MultinomialNB.new(smoothing_param: 1.0)
11
+ # estimator.fit(training_samples, training_labels)
12
+ # results = estimator.predict(testing_samples)
13
+ #
14
+ # *Reference*
15
+ # - C D. Manning, P. Raghavan, and H. Schutze, "Introduction to Information Retrieval," Cambridge University Press., 2008.
16
+ class MultinomialNB < BaseNaiveBayes
17
+ # Return the class labels.
18
+ # @return [Numo::Int32] (size: n_classes)
19
+ attr_reader :classes
20
+
21
+ # Return the prior probabilities of the classes.
22
+ # @return [Numo::DFloat] (shape: [n_classes])
23
+ attr_reader :class_priors
24
+
25
+ # Return the conditional probabilities for features of each class.
26
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
27
+ attr_reader :feature_probs
28
+
29
+ # Create a new classifier with Multinomial Naive Bayes.
30
+ #
31
+ # @param smoothing_param [Float] The Laplace smoothing parameter.
32
+ def initialize(smoothing_param: 1.0)
33
+ check_params_numeric(smoothing_param: smoothing_param)
34
+ check_params_positive(smoothing_param: smoothing_param)
35
+ @params = {}
36
+ @params[:smoothing_param] = smoothing_param
37
+ end
38
+
39
+ # Fit the model with given training data.
40
+ #
41
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
42
+ # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
43
+ # to be used for fitting the model.
44
+ # @return [MultinomialNB] The learned classifier itself.
45
+ def fit(x, y)
46
+ x = check_convert_sample_array(x)
47
+ y = check_convert_label_array(y)
48
+ check_sample_label_size(x, y)
49
+ n_samples, = x.shape
50
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
51
+ @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
52
+ count_features = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].sum(0) }]
53
+ count_features += @params[:smoothing_param]
54
+ n_classes = @classes.size
55
+ @feature_probs = count_features / count_features.sum(1).reshape(n_classes, 1)
56
+ self
57
+ end
58
+
59
+ # Calculate confidence scores for samples.
60
+ #
61
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
62
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
63
+ def decision_function(x)
64
+ x = check_convert_sample_array(x)
65
+ n_classes = @classes.size
66
+ bin_x = x.gt(0)
67
+ log_likelihoods = Array.new(n_classes) do |l|
68
+ Math.log(@class_priors[l]) + (Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(1)
69
+ end
70
+ Numo::DFloat[*log_likelihoods].transpose
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/naive_bayes/base_naive_bayes'
4
+
5
+ module Rumale
6
+ module NaiveBayes
7
+ # NegationNB is a class that implements Negation Naive Bayes classifier.
8
+ #
9
+ # @example
10
+ # estimator = Rumale::NaiveBayes::NegationNB.new(smoothing_param: 1.0)
11
+ # estimator.fit(training_samples, training_labels)
12
+ # results = estimator.predict(testing_samples)
13
+ #
14
+ # *Reference*
15
+ # - Komiya, K., Sato, N., Fujimoto, K., and Kotani, Y., "Negation Naive Bayes for Categorization of Product Pages on the Web," RANLP' 11, pp. 586--592, 2011.
16
+ class NegationNB < BaseNaiveBayes
17
+ # Return the class labels.
18
+ # @return [Numo::Int32] (size: n_classes)
19
+ attr_reader :classes
20
+
21
+ # Return the prior probabilities of the classes.
22
+ # @return [Numo::DFloat] (shape: [n_classes])
23
+ attr_reader :class_priors
24
+
25
+ # Return the conditional probabilities for features of each class.
26
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
27
+ attr_reader :feature_probs
28
+
29
+ # Create a new classifier with Complement Naive Bayes.
30
+ #
31
+ # @param smoothing_param [Float] The smoothing parameter.
32
+ def initialize(smoothing_param: 1.0)
33
+ check_params_numeric(smoothing_param: smoothing_param)
34
+ check_params_positive(smoothing_param: smoothing_param)
35
+ @params = {}
36
+ @params[:smoothing_param] = smoothing_param
37
+ end
38
+
39
+ # Fit the model with given training data.
40
+ #
41
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
42
+ # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
43
+ # to be used for fitting the model.
44
+ # @return [ComplementNB] The learned classifier itself.
45
+ def fit(x, y)
46
+ x = check_convert_sample_array(x)
47
+ y = check_convert_label_array(y)
48
+ check_sample_label_size(x, y)
49
+ n_samples, = x.shape
50
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
51
+ @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count.fdiv(n_samples) }]
52
+ @class_log_probs = Numo::NMath.log(1 / (1 - @class_priors))
53
+ compl_features = Numo::DFloat[*@classes.to_a.map { |l| x[y.ne(l).where, true].sum(0) }]
54
+ compl_features += @params[:smoothing_param]
55
+ n_classes = @classes.size
56
+ @feature_probs = compl_features / compl_features.sum(1).reshape(n_classes, 1)
57
+ @weights = Numo::NMath.log(@feature_probs)
58
+ self
59
+ end
60
+
61
+ # Calculate confidence scores for samples.
62
+ #
63
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
64
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
65
+ def decision_function(x)
66
+ x = check_convert_sample_array(x)
67
+ @class_log_probs - x.dot(@weights.transpose)
68
+ end
69
+ end
70
+ end
71
+ end
@@ -127,25 +127,6 @@ module Rumale
127
127
  n_samples = x.shape[0]
128
128
  Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
129
129
  end
130
-
131
- # Dump marshal data.
132
- # @return [Hash] The marshal data about KNeighborsClassifier.
133
- def marshal_dump
134
- { params: @params,
135
- prototypes: @prototypes,
136
- labels: @labels,
137
- classes: @classes }
138
- end
139
-
140
- # Load marshal data.
141
- # @return [nil]
142
- def marshal_load(obj)
143
- @params = obj[:params]
144
- @prototypes = obj[:prototypes]
145
- @labels = obj[:labels]
146
- @classes = obj[:classes]
147
- nil
148
- end
149
130
  end
150
131
  end
151
132
  end
@@ -101,23 +101,6 @@ module Rumale
101
101
  end
102
102
  Numo::DFloat[*predicted_values]
103
103
  end
104
-
105
- # Dump marshal data.
106
- # @return [Hash] The marshal data about KNeighborsRegressor.
107
- def marshal_dump
108
- { params: @params,
109
- prototypes: @prototypes,
110
- values: @values }
111
- end
112
-
113
- # Load marshal data.
114
- # @return [nil]
115
- def marshal_load(obj)
116
- @params = obj[:params]
117
- @prototypes = obj[:prototypes]
118
- @values = obj[:values]
119
- nil
120
- end
121
104
  end
122
105
  end
123
106
  end
@@ -50,25 +50,6 @@ module Rumale
50
50
 
51
51
  weight - @params[:learning_rate] * nm_fst_moment / (nm_sec_moment**0.5 + 1e-8)
52
52
  end
53
-
54
- # Dump marshal data.
55
- # @return [Hash] The marshal data.
56
- # def marshal_dump
57
- # { params: @params,
58
- # fst_moment: @fst_moment,
59
- # sec_moment: @sec_moment,
60
- # iter: @iter }
61
- # end
62
-
63
- # Load marshal data.
64
- # @return [nil]
65
- # def marshal_load(obj)
66
- # @params = obj[:params]
67
- # @fst_moment = obj[:fst_moment]
68
- # @sec_moment = obj[:sec_moment]
69
- # @iter = obj[:iter]
70
- # nil
71
- # end
72
53
  end
73
54
  end
74
55
  end