rumale 0.18.1 → 0.18.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +0 -1
  3. data/CHANGELOG.md +16 -4
  4. data/lib/rumale.rb +6 -1
  5. data/lib/rumale/clustering/dbscan.rb +0 -17
  6. data/lib/rumale/clustering/gaussian_mixture.rb +0 -21
  7. data/lib/rumale/clustering/hdbscan.rb +0 -15
  8. data/lib/rumale/clustering/k_means.rb +0 -17
  9. data/lib/rumale/clustering/k_medoids.rb +0 -19
  10. data/lib/rumale/clustering/power_iteration.rb +0 -19
  11. data/lib/rumale/clustering/single_linkage.rb +0 -17
  12. data/lib/rumale/clustering/spectral_clustering.rb +0 -17
  13. data/lib/rumale/evaluation_measure/function.rb +34 -0
  14. data/lib/rumale/kernel_approximation/rbf.rb +0 -19
  15. data/lib/rumale/kernel_machine/kernel_pca.rb +0 -21
  16. data/lib/rumale/kernel_machine/kernel_ridge.rb +0 -15
  17. data/lib/rumale/kernel_machine/kernel_svc.rb +0 -21
  18. data/lib/rumale/naive_bayes/base_naive_bayes.rb +47 -0
  19. data/lib/rumale/naive_bayes/bernoulli_nb.rb +82 -0
  20. data/lib/rumale/naive_bayes/complement_nb.rb +85 -0
  21. data/lib/rumale/naive_bayes/gaussian_nb.rb +69 -0
  22. data/lib/rumale/naive_bayes/multinomial_nb.rb +74 -0
  23. data/lib/rumale/naive_bayes/negation_nb.rb +71 -0
  24. data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +0 -19
  25. data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +0 -17
  26. data/lib/rumale/neural_network/adam.rb +0 -19
  27. data/lib/rumale/preprocessing/bin_discretizer.rb +0 -15
  28. data/lib/rumale/preprocessing/label_binarizer.rb +0 -15
  29. data/lib/rumale/preprocessing/label_encoder.rb +0 -15
  30. data/lib/rumale/preprocessing/max_abs_scaler.rb +0 -15
  31. data/lib/rumale/preprocessing/min_max_scaler.rb +0 -17
  32. data/lib/rumale/preprocessing/one_hot_encoder.rb +0 -19
  33. data/lib/rumale/preprocessing/ordinal_encoder.rb +0 -13
  34. data/lib/rumale/preprocessing/standard_scaler.rb +0 -15
  35. data/lib/rumale/version.rb +1 -1
  36. metadata +8 -3
  37. data/lib/rumale/naive_bayes/naive_bayes.rb +0 -250
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/naive_bayes/base_naive_bayes'
4
+
5
+ module Rumale
6
+ module NaiveBayes
7
+ # BernoulliNB is a class that implements Bernoulli Naive Bayes classifier.
8
+ #
9
+ # @example
10
+ # estimator = Rumale::NaiveBayes::BernoulliNB.new(smoothing_param: 1.0, bin_threshold: 0.0)
11
+ # estimator.fit(training_samples, training_labels)
12
+ # results = estimator.predict(testing_samples)
13
+ #
14
+ # *Reference*
15
+ # - C D. Manning, P. Raghavan, and H. Schutze, "Introduction to Information Retrieval," Cambridge University Press., 2008.
16
+ class BernoulliNB < BaseNaiveBayes
17
+ # Return the class labels.
18
+ # @return [Numo::Int32] (size: n_classes)
19
+ attr_reader :classes
20
+
21
+ # Return the prior probabilities of the classes.
22
+ # @return [Numo::DFloat] (shape: [n_classes])
23
+ attr_reader :class_priors
24
+
25
+ # Return the conditional probabilities for features of each class.
26
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
27
+ attr_reader :feature_probs
28
+
29
+ # Create a new classifier with Bernoulli Naive Bayes.
30
+ #
31
+ # @param smoothing_param [Float] The Laplace smoothing parameter.
32
+ # @param bin_threshold [Float] The threshold for binarizing of features.
33
+ def initialize(smoothing_param: 1.0, bin_threshold: 0.0)
34
+ check_params_numeric(smoothing_param: smoothing_param, bin_threshold: bin_threshold)
35
+ check_params_positive(smoothing_param: smoothing_param)
36
+ @params = {}
37
+ @params[:smoothing_param] = smoothing_param
38
+ @params[:bin_threshold] = bin_threshold
39
+ end
40
+
41
+ # Fit the model with given training data.
42
+ #
43
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
44
+ # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
45
+ # to be used for fitting the model.
46
+ # @return [BernoulliNB] The learned classifier itself.
47
+ def fit(x, y)
48
+ x = check_convert_sample_array(x)
49
+ y = check_convert_label_array(y)
50
+ check_sample_label_size(x, y)
51
+ n_samples, = x.shape
52
+ bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
53
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
54
+ n_samples_each_class = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count.to_f }]
55
+ @class_priors = n_samples_each_class / n_samples
56
+ count_features = Numo::DFloat[*@classes.to_a.map { |l| bin_x[y.eq(l).where, true].sum(0) }]
57
+ count_features += @params[:smoothing_param]
58
+ n_samples_each_class += 2.0 * @params[:smoothing_param]
59
+ n_classes = @classes.size
60
+ @feature_probs = count_features / n_samples_each_class.reshape(n_classes, 1)
61
+ self
62
+ end
63
+
64
+ # Calculate confidence scores for samples.
65
+ #
66
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
67
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
68
+ def decision_function(x)
69
+ x = check_convert_sample_array(x)
70
+ n_classes = @classes.size
71
+ bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
72
+ not_bin_x = Numo::DFloat[*x.le(@params[:bin_threshold])]
73
+ log_likelihoods = Array.new(n_classes) do |l|
74
+ Math.log(@class_priors[l]) + (
75
+ (Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(1)
76
+ (Numo::DFloat[*not_bin_x] * Numo::NMath.log(1.0 - @feature_probs[l, true])).sum(1))
77
+ end
78
+ Numo::DFloat[*log_likelihoods].transpose
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/naive_bayes/base_naive_bayes'
4
+
5
+ module Rumale
6
+ module NaiveBayes
7
+ # ComplementNB is a class that implements Complement Naive Bayes classifier.
8
+ #
9
+ # @example
10
+ # estimator = Rumale::NaiveBayes::ComplementNB.new(smoothing_param: 1.0)
11
+ # estimator.fit(training_samples, training_labels)
12
+ # results = estimator.predict(testing_samples)
13
+ #
14
+ # *Reference*
15
+ # - Rennie, J. D. M., Shih, L., Teevan, J., and Karger, D. R., "Tackling the Poor Assumptions of Naive Bayes Text Classifiers," ICML' 03, pp. 616--623, 2013.
16
+ class ComplementNB < BaseNaiveBayes
17
+ # Return the class labels.
18
+ # @return [Numo::Int32] (size: n_classes)
19
+ attr_reader :classes
20
+
21
+ # Return the prior probabilities of the classes.
22
+ # @return [Numo::DFloat] (shape: [n_classes])
23
+ attr_reader :class_priors
24
+
25
+ # Return the conditional probabilities for features of each class.
26
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
27
+ attr_reader :feature_probs
28
+
29
+ # Create a new classifier with Complement Naive Bayes.
30
+ #
31
+ # @param smoothing_param [Float] The smoothing parameter.
32
+ # @param norm [Boolean] The flag indicating whether to normlize the weight vectors.
33
+ def initialize(smoothing_param: 1.0, norm: false)
34
+ check_params_numeric(smoothing_param: smoothing_param)
35
+ check_params_positive(smoothing_param: smoothing_param)
36
+ check_params_boolean(norm: norm)
37
+ @params = {}
38
+ @params[:smoothing_param] = smoothing_param
39
+ @params[:norm] = norm
40
+ end
41
+
42
+ # Fit the model with given training data.
43
+ #
44
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
45
+ # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
46
+ # to be used for fitting the model.
47
+ # @return [ComplementNB] The learned classifier itself.
48
+ def fit(x, y)
49
+ x = check_convert_sample_array(x)
50
+ y = check_convert_label_array(y)
51
+ check_sample_label_size(x, y)
52
+ n_samples, = x.shape
53
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
54
+ @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count.fdiv(n_samples) }]
55
+ @class_log_probs = Numo::NMath.log(@class_priors)
56
+ compl_features = Numo::DFloat[*@classes.to_a.map { |l| x[y.ne(l).where, true].sum(0) }]
57
+ compl_features += @params[:smoothing_param]
58
+ n_classes = @classes.size
59
+ @feature_probs = compl_features / compl_features.sum(1).reshape(n_classes, 1)
60
+ feature_log_probs = Numo::NMath.log(@feature_probs)
61
+ @weights = if normalize?
62
+ feature_log_probs / feature_log_probs.sum(1).reshape(n_classes, 1)
63
+ else
64
+ -feature_log_probs
65
+ end
66
+ self
67
+ end
68
+
69
+ # Calculate confidence scores for samples.
70
+ #
71
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
72
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
73
+ def decision_function(x)
74
+ x = check_convert_sample_array(x)
75
+ @class_log_probs + x.dot(@weights.transpose)
76
+ end
77
+
78
+ private
79
+
80
+ def normalize?
81
+ @params[:norm] == true
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/naive_bayes/base_naive_bayes'
4
+
5
+ module Rumale
6
+ module NaiveBayes
7
+ # GaussianNB is a class that implements Gaussian Naive Bayes classifier.
8
+ #
9
+ # @example
10
+ # estimator = Rumale::NaiveBayes::GaussianNB.new
11
+ # estimator.fit(training_samples, training_labels)
12
+ # results = estimator.predict(testing_samples)
13
+ class GaussianNB < BaseNaiveBayes
14
+ # Return the class labels.
15
+ # @return [Numo::Int32] (size: n_classes)
16
+ attr_reader :classes
17
+
18
+ # Return the prior probabilities of the classes.
19
+ # @return [Numo::DFloat] (shape: [n_classes])
20
+ attr_reader :class_priors
21
+
22
+ # Return the mean vectors of the classes.
23
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
24
+ attr_reader :means
25
+
26
+ # Return the variance vectors of the classes.
27
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
28
+ attr_reader :variances
29
+
30
+ # Create a new classifier with Gaussian Naive Bayes.
31
+ def initialize
32
+ @params = {}
33
+ end
34
+
35
+ # Fit the model with given training data.
36
+ #
37
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
38
+ # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
39
+ # to be used for fitting the model.
40
+ # @return [GaussianNB] The learned classifier itself.
41
+ def fit(x, y)
42
+ x = check_convert_sample_array(x)
43
+ y = check_convert_label_array(y)
44
+ check_sample_label_size(x, y)
45
+ n_samples, = x.shape
46
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
47
+ @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
48
+ @means = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].mean(0) }]
49
+ @variances = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].var(0) }]
50
+ self
51
+ end
52
+
53
+ # Calculate confidence scores for samples.
54
+ #
55
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
56
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
57
+ def decision_function(x)
58
+ x = check_convert_sample_array(x)
59
+ n_classes = @classes.size
60
+ log_likelihoods = Array.new(n_classes) do |l|
61
+ Math.log(@class_priors[l]) - 0.5 * (
62
+ Numo::NMath.log(2.0 * Math::PI * @variances[l, true]) +
63
+ ((x - @means[l, true])**2 / @variances[l, true])).sum(1)
64
+ end
65
+ Numo::DFloat[*log_likelihoods].transpose
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/naive_bayes/base_naive_bayes'
4
+
5
+ module Rumale
6
+ module NaiveBayes
7
+ # MultinomialNB is a class that implements Multinomial Naive Bayes classifier.
8
+ #
9
+ # @example
10
+ # estimator = Rumale::NaiveBayes::MultinomialNB.new(smoothing_param: 1.0)
11
+ # estimator.fit(training_samples, training_labels)
12
+ # results = estimator.predict(testing_samples)
13
+ #
14
+ # *Reference*
15
+ # - C D. Manning, P. Raghavan, and H. Schutze, "Introduction to Information Retrieval," Cambridge University Press., 2008.
16
+ class MultinomialNB < BaseNaiveBayes
17
+ # Return the class labels.
18
+ # @return [Numo::Int32] (size: n_classes)
19
+ attr_reader :classes
20
+
21
+ # Return the prior probabilities of the classes.
22
+ # @return [Numo::DFloat] (shape: [n_classes])
23
+ attr_reader :class_priors
24
+
25
+ # Return the conditional probabilities for features of each class.
26
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
27
+ attr_reader :feature_probs
28
+
29
+ # Create a new classifier with Multinomial Naive Bayes.
30
+ #
31
+ # @param smoothing_param [Float] The Laplace smoothing parameter.
32
+ def initialize(smoothing_param: 1.0)
33
+ check_params_numeric(smoothing_param: smoothing_param)
34
+ check_params_positive(smoothing_param: smoothing_param)
35
+ @params = {}
36
+ @params[:smoothing_param] = smoothing_param
37
+ end
38
+
39
+ # Fit the model with given training data.
40
+ #
41
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
42
+ # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
43
+ # to be used for fitting the model.
44
+ # @return [MultinomialNB] The learned classifier itself.
45
+ def fit(x, y)
46
+ x = check_convert_sample_array(x)
47
+ y = check_convert_label_array(y)
48
+ check_sample_label_size(x, y)
49
+ n_samples, = x.shape
50
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
51
+ @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
52
+ count_features = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].sum(0) }]
53
+ count_features += @params[:smoothing_param]
54
+ n_classes = @classes.size
55
+ @feature_probs = count_features / count_features.sum(1).reshape(n_classes, 1)
56
+ self
57
+ end
58
+
59
+ # Calculate confidence scores for samples.
60
+ #
61
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
62
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
63
+ def decision_function(x)
64
+ x = check_convert_sample_array(x)
65
+ n_classes = @classes.size
66
+ bin_x = x.gt(0)
67
+ log_likelihoods = Array.new(n_classes) do |l|
68
+ Math.log(@class_priors[l]) + (Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(1)
69
+ end
70
+ Numo::DFloat[*log_likelihoods].transpose
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/naive_bayes/base_naive_bayes'
4
+
5
+ module Rumale
6
+ module NaiveBayes
7
+ # NegationNB is a class that implements Negation Naive Bayes classifier.
8
+ #
9
+ # @example
10
+ # estimator = Rumale::NaiveBayes::NegationNB.new(smoothing_param: 1.0)
11
+ # estimator.fit(training_samples, training_labels)
12
+ # results = estimator.predict(testing_samples)
13
+ #
14
+ # *Reference*
15
+ # - Komiya, K., Sato, N., Fujimoto, K., and Kotani, Y., "Negation Naive Bayes for Categorization of Product Pages on the Web," RANLP' 11, pp. 586--592, 2011.
16
+ class NegationNB < BaseNaiveBayes
17
+ # Return the class labels.
18
+ # @return [Numo::Int32] (size: n_classes)
19
+ attr_reader :classes
20
+
21
+ # Return the prior probabilities of the classes.
22
+ # @return [Numo::DFloat] (shape: [n_classes])
23
+ attr_reader :class_priors
24
+
25
+ # Return the conditional probabilities for features of each class.
26
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
27
+ attr_reader :feature_probs
28
+
29
+ # Create a new classifier with Complement Naive Bayes.
30
+ #
31
+ # @param smoothing_param [Float] The smoothing parameter.
32
+ def initialize(smoothing_param: 1.0)
33
+ check_params_numeric(smoothing_param: smoothing_param)
34
+ check_params_positive(smoothing_param: smoothing_param)
35
+ @params = {}
36
+ @params[:smoothing_param] = smoothing_param
37
+ end
38
+
39
+ # Fit the model with given training data.
40
+ #
41
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
42
+ # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
43
+ # to be used for fitting the model.
44
+ # @return [ComplementNB] The learned classifier itself.
45
+ def fit(x, y)
46
+ x = check_convert_sample_array(x)
47
+ y = check_convert_label_array(y)
48
+ check_sample_label_size(x, y)
49
+ n_samples, = x.shape
50
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
51
+ @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count.fdiv(n_samples) }]
52
+ @class_log_probs = Numo::NMath.log(1 / (1 - @class_priors))
53
+ compl_features = Numo::DFloat[*@classes.to_a.map { |l| x[y.ne(l).where, true].sum(0) }]
54
+ compl_features += @params[:smoothing_param]
55
+ n_classes = @classes.size
56
+ @feature_probs = compl_features / compl_features.sum(1).reshape(n_classes, 1)
57
+ @weights = Numo::NMath.log(@feature_probs)
58
+ self
59
+ end
60
+
61
+ # Calculate confidence scores for samples.
62
+ #
63
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
64
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
65
+ def decision_function(x)
66
+ x = check_convert_sample_array(x)
67
+ @class_log_probs - x.dot(@weights.transpose)
68
+ end
69
+ end
70
+ end
71
+ end
@@ -127,25 +127,6 @@ module Rumale
127
127
  n_samples = x.shape[0]
128
128
  Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
129
129
  end
130
-
131
- # Dump marshal data.
132
- # @return [Hash] The marshal data about KNeighborsClassifier.
133
- def marshal_dump
134
- { params: @params,
135
- prototypes: @prototypes,
136
- labels: @labels,
137
- classes: @classes }
138
- end
139
-
140
- # Load marshal data.
141
- # @return [nil]
142
- def marshal_load(obj)
143
- @params = obj[:params]
144
- @prototypes = obj[:prototypes]
145
- @labels = obj[:labels]
146
- @classes = obj[:classes]
147
- nil
148
- end
149
130
  end
150
131
  end
151
132
  end
@@ -101,23 +101,6 @@ module Rumale
101
101
  end
102
102
  Numo::DFloat[*predicted_values]
103
103
  end
104
-
105
- # Dump marshal data.
106
- # @return [Hash] The marshal data about KNeighborsRegressor.
107
- def marshal_dump
108
- { params: @params,
109
- prototypes: @prototypes,
110
- values: @values }
111
- end
112
-
113
- # Load marshal data.
114
- # @return [nil]
115
- def marshal_load(obj)
116
- @params = obj[:params]
117
- @prototypes = obj[:prototypes]
118
- @values = obj[:values]
119
- nil
120
- end
121
104
  end
122
105
  end
123
106
  end
@@ -50,25 +50,6 @@ module Rumale
50
50
 
51
51
  weight - @params[:learning_rate] * nm_fst_moment / (nm_sec_moment**0.5 + 1e-8)
52
52
  end
53
-
54
- # Dump marshal data.
55
- # @return [Hash] The marshal data.
56
- # def marshal_dump
57
- # { params: @params,
58
- # fst_moment: @fst_moment,
59
- # sec_moment: @sec_moment,
60
- # iter: @iter }
61
- # end
62
-
63
- # Load marshal data.
64
- # @return [nil]
65
- # def marshal_load(obj)
66
- # @params = obj[:params]
67
- # @fst_moment = obj[:fst_moment]
68
- # @sec_moment = obj[:sec_moment]
69
- # @iter = obj[:iter]
70
- # nil
71
- # end
72
53
  end
73
54
  end
74
55
  end