rumale-naive_bayes 0.24.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 6099259a89800806a4c1cb97f2c6fd76d9a3ee3c06bfafbd26ff585ce029bc8d
4
+ data.tar.gz: bd77f90edab0d920bb9bda3c9c2e21b3fd7fde646c56d7700d131c2eba2005c0
5
+ SHA512:
6
+ metadata.gz: 878ef0a8d13a8df2cb0b27c8f87db50572c7ba7b81ab4ad5840bf6c263a2cc607e3dd12273c630f869df754be9edd69339364f927b33817ec3d17749fc76f74f
7
+ data.tar.gz: fcf815a1f21e9db918f898ee9d917948c1737ab2b66f1dd043c2f3b49d1b340fd40b933c0a1c31ec466a5fd3cbd94bf9dc821bb31f8a1a137bb573327b195799
data/LICENSE.txt ADDED
@@ -0,0 +1,27 @@
1
+ Copyright (c) 2022 Atsushi Tatsuma
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are met:
6
+
7
+ * Redistributions of source code must retain the above copyright notice, this
8
+ list of conditions and the following disclaimer.
9
+
10
+ * Redistributions in binary form must reproduce the above copyright notice,
11
+ this list of conditions and the following disclaimer in the documentation
12
+ and/or other materials provided with the distribution.
13
+
14
+ * Neither the name of the copyright holder nor the names of its
15
+ contributors may be used to endorse or promote products derived from
16
+ this software without specific prior written permission.
17
+
18
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/README.md ADDED
@@ -0,0 +1,34 @@
1
+ # Rumale::NaiveBayes
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/rumale-naive_bayes.svg)](https://badge.fury.io/rb/rumale-naive_bayes)
4
+ [![BSD 3-Clause License](https://img.shields.io/badge/License-BSD%203--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/main/rumale-naive_bayes/LICENSE.txt)
5
+ [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/rumale/doc/Rumale/NaiveBayes.html)
6
+
7
+ Rumale is a machine learning library in Ruby.
8
+ Rumale::NaiveBayes provides naive bayes models,
9
+ such as Gaussian Naive Bayes, Multinomial Naive Bayes, and Bernoulli Naive Bayes,
10
+ with Rumale interface.
11
+
12
+ ## Installation
13
+
14
+ Add this line to your application's Gemfile:
15
+
16
+ ```ruby
17
+ gem 'rumale-naive_bayes'
18
+ ```
19
+
20
+ And then execute:
21
+
22
+ $ bundle install
23
+
24
+ Or install it yourself as:
25
+
26
+ $ gem install rumale-naive_bayes
27
+
28
+ ## Documentation
29
+
30
+ - [Rumale API Documentation - NaiveBayes](https://yoshoku.github.io/rumale/doc/Rumale/NaiveBayes.html)
31
+
32
+ ## License
33
+
34
+ The gem is available as open source under the terms of the [BSD-3-Clause License](https://opensource.org/licenses/BSD-3-Clause).
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/classifier'
5
+ require 'rumale/validation'
6
+
7
+ module Rumale
8
+ module NaiveBayes
9
+ # BaseNaiveBayes is a class that has methods for common processes of naive bayes classifier.
10
+ # This class is used internally.
11
+ class BaseNaiveBayes < ::Rumale::Base::Estimator
12
+ include ::Rumale::Base::Classifier
13
+
14
+ def initialize # rubocop:disable Lint/UselessMethodDefinition
15
+ super()
16
+ end
17
+
18
+ # Predict class labels for samples.
19
+ #
20
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
21
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
22
+ def predict(x)
23
+ x = ::Rumale::Validation.check_convert_sample_array(x)
24
+
25
+ n_samples = x.shape.first
26
+ decision_values = decision_function(x)
27
+ Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
28
+ end
29
+
30
+ # Predict log-probability for samples.
31
+ #
32
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
33
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
34
+ def predict_log_proba(x)
35
+ x = ::Rumale::Validation.check_convert_sample_array(x)
36
+
37
+ n_samples, = x.shape
38
+ log_likelihoods = decision_function(x)
39
+ log_likelihoods - Numo::NMath.log(Numo::NMath.exp(log_likelihoods).sum(axis: 1)).reshape(n_samples, 1)
40
+ end
41
+
42
+ # Predict probability for samples.
43
+ #
44
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
45
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
46
+ def predict_proba(x)
47
+ x = ::Rumale::Validation.check_convert_sample_array(x)
48
+
49
+ Numo::NMath.exp(predict_log_proba(x)).abs
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,86 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/naive_bayes/base_naive_bayes'
4
+
5
+ module Rumale
6
+ module NaiveBayes
7
+ # BernoulliNB is a class that implements Bernoulli Naive Bayes classifier.
8
+ #
9
+ # @example
10
+ # require 'rumale/naive_bayes/bernoulli_nb'
11
+ #
12
+ # estimator = Rumale::NaiveBayes::BernoulliNB.new(smoothing_param: 1.0, bin_threshold: 0.0)
13
+ # estimator.fit(training_samples, training_labels)
14
+ # results = estimator.predict(testing_samples)
15
+ #
16
+ # *Reference*
17
+ # - Manning, C D., Raghavan, P., and Schutze, H., "Introduction to Information Retrieval," Cambridge University Press., 2008.
18
+ class BernoulliNB < BaseNaiveBayes
19
+ # Return the class labels.
20
+ # @return [Numo::Int32] (size: n_classes)
21
+ attr_reader :classes
22
+
23
+ # Return the prior probabilities of the classes.
24
+ # @return [Numo::DFloat] (shape: [n_classes])
25
+ attr_reader :class_priors
26
+
27
+ # Return the conditional probabilities for features of each class.
28
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
29
+ attr_reader :feature_probs
30
+
31
+ # Create a new classifier with Bernoulli Naive Bayes.
32
+ #
33
+ # @param smoothing_param [Float] The Laplace smoothing parameter.
34
+ # @param bin_threshold [Float] The threshold for binarizing of features.
35
+ def initialize(smoothing_param: 1.0, bin_threshold: 0.0)
36
+ super()
37
+ @params = {
38
+ smoothing_param: smoothing_param,
39
+ bin_threshold: bin_threshold
40
+ }
41
+ end
42
+
43
+ # Fit the model with given training data.
44
+ #
45
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
46
+ # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
47
+ # to be used for fitting the model.
48
+ # @return [BernoulliNB] The learned classifier itself.
49
+ def fit(x, y)
50
+ x = ::Rumale::Validation.check_convert_sample_array(x)
51
+ y = ::Rumale::Validation.check_convert_label_array(y)
52
+ ::Rumale::Validation.check_sample_size(x, y)
53
+
54
+ n_samples, = x.shape
55
+ bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
56
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
57
+ n_samples_each_class = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count.to_f }]
58
+ @class_priors = n_samples_each_class / n_samples
59
+ count_features = Numo::DFloat[*@classes.to_a.map { |l| bin_x[y.eq(l).where, true].sum(axis: 0) }]
60
+ count_features += @params[:smoothing_param]
61
+ n_samples_each_class += 2.0 * @params[:smoothing_param]
62
+ n_classes = @classes.size
63
+ @feature_probs = count_features / n_samples_each_class.reshape(n_classes, 1)
64
+ self
65
+ end
66
+
67
+ # Calculate confidence scores for samples.
68
+ #
69
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
70
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
71
+ def decision_function(x)
72
+ x = ::Rumale::Validation.check_convert_sample_array(x)
73
+
74
+ n_classes = @classes.size
75
+ bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
76
+ not_bin_x = Numo::DFloat[*x.le(@params[:bin_threshold])]
77
+ log_likelihoods = Array.new(n_classes) do |l|
78
+ Math.log(@class_priors[l]) + (
79
+ (Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(axis: 1)
80
+ (Numo::DFloat[*not_bin_x] * Numo::NMath.log(1.0 - @feature_probs[l, true])).sum(axis: 1))
81
+ end
82
+ Numo::DFloat[*log_likelihoods].transpose.dup
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/naive_bayes/base_naive_bayes'
4
+
5
+ module Rumale
6
+ module NaiveBayes
7
+ # ComplementNB is a class that implements Complement Naive Bayes classifier.
8
+ #
9
+ # @example
10
+ # require 'rumale/naive_bayes/complement_nb'
11
+ #
12
+ # estimator = Rumale::NaiveBayes::ComplementNB.new(smoothing_param: 1.0)
13
+ # estimator.fit(training_samples, training_labels)
14
+ # results = estimator.predict(testing_samples)
15
+ #
16
+ # *Reference*
17
+ # - Rennie, J. D. M., Shih, L., Teevan, J., and Karger, D. R., "Tackling the Poor Assumptions of Naive Bayes Text Classifiers," ICML' 03, pp. 616--623, 2013.
18
+ class ComplementNB < BaseNaiveBayes
19
+ # Return the class labels.
20
+ # @return [Numo::Int32] (size: n_classes)
21
+ attr_reader :classes
22
+
23
+ # Return the prior probabilities of the classes.
24
+ # @return [Numo::DFloat] (shape: [n_classes])
25
+ attr_reader :class_priors
26
+
27
+ # Return the conditional probabilities for features of each class.
28
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
29
+ attr_reader :feature_probs
30
+
31
+ # Create a new classifier with Complement Naive Bayes.
32
+ #
33
+ # @param smoothing_param [Float] The smoothing parameter.
34
+ # @param norm [Boolean] The flag indicating whether to normlize the weight vectors.
35
+ def initialize(smoothing_param: 1.0, norm: false)
36
+ super()
37
+ @params = {
38
+ smoothing_param: smoothing_param,
39
+ norm: norm
40
+ }
41
+ end
42
+
43
+ # Fit the model with given training data.
44
+ #
45
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
46
+ # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
47
+ # to be used for fitting the model.
48
+ # @return [ComplementNB] The learned classifier itself.
49
+ def fit(x, y)
50
+ x = ::Rumale::Validation.check_convert_sample_array(x)
51
+ y = ::Rumale::Validation.check_convert_label_array(y)
52
+ ::Rumale::Validation.check_sample_size(x, y)
53
+
54
+ n_samples, = x.shape
55
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
56
+ @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count.fdiv(n_samples) }]
57
+ @class_log_probs = Numo::NMath.log(@class_priors)
58
+ compl_features = Numo::DFloat[*@classes.to_a.map { |l| x[y.ne(l).where, true].sum(axis: 0) }]
59
+ compl_features += @params[:smoothing_param]
60
+ n_classes = @classes.size
61
+ @feature_probs = compl_features / compl_features.sum(axis: 1).reshape(n_classes, 1)
62
+ feature_log_probs = Numo::NMath.log(@feature_probs)
63
+ @weights = if normalize?
64
+ feature_log_probs / feature_log_probs.sum(axis: 1).reshape(n_classes, 1)
65
+ else
66
+ -feature_log_probs
67
+ end
68
+ self
69
+ end
70
+
71
+ # Calculate confidence scores for samples.
72
+ #
73
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
74
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
75
+ def decision_function(x)
76
+ x = ::Rumale::Validation.check_convert_sample_array(x)
77
+
78
+ @class_log_probs + x.dot(@weights.transpose)
79
+ end
80
+
81
+ private
82
+
83
+ def normalize?
84
+ @params[:norm] == true
85
+ end
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/naive_bayes/base_naive_bayes'
4
+
5
+ module Rumale
6
+ module NaiveBayes
7
+ # GaussianNB is a class that implements Gaussian Naive Bayes classifier.
8
+ #
9
+ # @example
10
+ # require 'rumale/naive_bayes/gaussian_nb'
11
+ #
12
+ # estimator = Rumale::NaiveBayes::GaussianNB.new
13
+ # estimator.fit(training_samples, training_labels)
14
+ # results = estimator.predict(testing_samples)
15
+ class GaussianNB < BaseNaiveBayes
16
+ # Return the class labels.
17
+ # @return [Numo::Int32] (size: n_classes)
18
+ attr_reader :classes
19
+
20
+ # Return the prior probabilities of the classes.
21
+ # @return [Numo::DFloat] (shape: [n_classes])
22
+ attr_reader :class_priors
23
+
24
+ # Return the mean vectors of the classes.
25
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
26
+ attr_reader :means
27
+
28
+ # Return the variance vectors of the classes.
29
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
30
+ attr_reader :variances
31
+
32
+ # Create a new classifier with Gaussian Naive Bayes.
33
+ def initialize
34
+ super()
35
+ @params = {}
36
+ end
37
+
38
+ # Fit the model with given training data.
39
+ #
40
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
41
+ # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
42
+ # to be used for fitting the model.
43
+ # @return [GaussianNB] The learned classifier itself.
44
+ def fit(x, y)
45
+ x = ::Rumale::Validation.check_convert_sample_array(x)
46
+ y = ::Rumale::Validation.check_convert_label_array(y)
47
+ ::Rumale::Validation.check_sample_size(x, y)
48
+
49
+ n_samples, = x.shape
50
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
51
+ @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
52
+ @means = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].mean(0) }]
53
+ @variances = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].var(0) }]
54
+ self
55
+ end
56
+
57
+ # Calculate confidence scores for samples.
58
+ #
59
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
60
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
61
+ def decision_function(x)
62
+ x = ::Rumale::Validation.check_convert_sample_array(x)
63
+
64
+ n_classes = @classes.size
65
+ log_likelihoods = Array.new(n_classes) do |l|
66
+ Math.log(@class_priors[l]) - 0.5 * (
67
+ Numo::NMath.log(2.0 * Math::PI * @variances[l, true]) +
68
+ ((x - @means[l, true])**2 / @variances[l, true])).sum(axis: 1)
69
+ end
70
+ Numo::DFloat[*log_likelihoods].transpose.dup
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/naive_bayes/base_naive_bayes'
4
+
5
+ module Rumale
6
+ module NaiveBayes
7
+ # MultinomialNB is a class that implements Multinomial Naive Bayes classifier.
8
+ #
9
+ # @example
10
+ # require 'rumale/naive_bayes/multinomial_nb'
11
+ #
12
+ # estimator = Rumale::NaiveBayes::MultinomialNB.new(smoothing_param: 1.0)
13
+ # estimator.fit(training_samples, training_labels)
14
+ # results = estimator.predict(testing_samples)
15
+ #
16
+ # *Reference*
17
+ # - Manning, C D., Raghavan, P., and Schutze, H., "Introduction to Information Retrieval," Cambridge University Press., 2008.
18
+ class MultinomialNB < BaseNaiveBayes
19
+ # Return the class labels.
20
+ # @return [Numo::Int32] (size: n_classes)
21
+ attr_reader :classes
22
+
23
+ # Return the prior probabilities of the classes.
24
+ # @return [Numo::DFloat] (shape: [n_classes])
25
+ attr_reader :class_priors
26
+
27
+ # Return the conditional probabilities for features of each class.
28
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
29
+ attr_reader :feature_probs
30
+
31
+ # Create a new classifier with Multinomial Naive Bayes.
32
+ #
33
+ # @param smoothing_param [Float] The Laplace smoothing parameter.
34
+ def initialize(smoothing_param: 1.0)
35
+ super()
36
+ @params = { smoothing_param: smoothing_param }
37
+ end
38
+
39
+ # Fit the model with given training data.
40
+ #
41
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
42
+ # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
43
+ # to be used for fitting the model.
44
+ # @return [MultinomialNB] The learned classifier itself.
45
+ def fit(x, y)
46
+ x = ::Rumale::Validation.check_convert_sample_array(x)
47
+ y = ::Rumale::Validation.check_convert_label_array(y)
48
+ ::Rumale::Validation.check_sample_size(x, y)
49
+
50
+ n_samples, = x.shape
51
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
52
+ @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
53
+ count_features = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].sum(axis: 0) }]
54
+ count_features += @params[:smoothing_param]
55
+ n_classes = @classes.size
56
+ @feature_probs = count_features / count_features.sum(axis: 1).reshape(n_classes, 1)
57
+ self
58
+ end
59
+
60
+ # Calculate confidence scores for samples.
61
+ #
62
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
63
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
64
+ def decision_function(x)
65
+ x = ::Rumale::Validation.check_convert_sample_array(x)
66
+
67
+ n_classes = @classes.size
68
+ bin_x = x.gt(0)
69
+ log_likelihoods = Array.new(n_classes) do |l|
70
+ Math.log(@class_priors[l]) + (Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(axis: 1)
71
+ end
72
+ Numo::DFloat[*log_likelihoods].transpose.dup
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/naive_bayes/base_naive_bayes'
4
+
5
+ module Rumale
6
+ module NaiveBayes
7
+ # NegationNB is a class that implements Negation Naive Bayes classifier.
8
+ #
9
+ # @example
10
+ # require 'rumale/naive_bayes/negation_nb'
11
+ #
12
+ # estimator = Rumale::NaiveBayes::NegationNB.new(smoothing_param: 1.0)
13
+ # estimator.fit(training_samples, training_labels)
14
+ # results = estimator.predict(testing_samples)
15
+ #
16
+ # *Reference*
17
+ # - Komiya, K., Sato, N., Fujimoto, K., and Kotani, Y., "Negation Naive Bayes for Categorization of Product Pages on the Web," RANLP' 11, pp. 586--592, 2011.
18
+ class NegationNB < BaseNaiveBayes
19
+ # Return the class labels.
20
+ # @return [Numo::Int32] (size: n_classes)
21
+ attr_reader :classes
22
+
23
+ # Return the prior probabilities of the classes.
24
+ # @return [Numo::DFloat] (shape: [n_classes])
25
+ attr_reader :class_priors
26
+
27
+ # Return the conditional probabilities for features of each class.
28
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
29
+ attr_reader :feature_probs
30
+
31
+ # Create a new classifier with Complement Naive Bayes.
32
+ #
33
+ # @param smoothing_param [Float] The smoothing parameter.
34
+ def initialize(smoothing_param: 1.0)
35
+ super()
36
+ @params = { smoothing_param: smoothing_param }
37
+ end
38
+
39
+ # Fit the model with given training data.
40
+ #
41
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
42
+ # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
43
+ # to be used for fitting the model.
44
+ # @return [ComplementNB] The learned classifier itself.
45
+ def fit(x, y)
46
+ x = ::Rumale::Validation.check_convert_sample_array(x)
47
+ y = ::Rumale::Validation.check_convert_label_array(y)
48
+ ::Rumale::Validation.check_sample_size(x, y)
49
+
50
+ n_samples, = x.shape
51
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
52
+ @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count.fdiv(n_samples) }]
53
+ @class_log_probs = Numo::NMath.log(1 / (1 - @class_priors))
54
+ compl_features = Numo::DFloat[*@classes.to_a.map { |l| x[y.ne(l).where, true].sum(axis: 0) }]
55
+ compl_features += @params[:smoothing_param]
56
+ n_classes = @classes.size
57
+ @feature_probs = compl_features / compl_features.sum(axis: 1).reshape(n_classes, 1)
58
+ @weights = Numo::NMath.log(@feature_probs)
59
+ self
60
+ end
61
+
62
+ # Calculate confidence scores for samples.
63
+ #
64
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
65
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
66
+ def decision_function(x)
67
+ x = ::Rumale::Validation.check_convert_sample_array(x)
68
+
69
+ @class_log_probs - x.dot(@weights.transpose)
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Rumale is a machine learning library in Ruby.
4
+ module Rumale
5
+ # This module consists of the classes that implement naive bayes models.
6
+ module NaiveBayes
7
+ # @!visibility private
8
+ VERSION = '0.24.0'
9
+ end
10
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'numo/narray'
4
+
5
+ require_relative 'naive_bayes/base_naive_bayes'
6
+ require_relative 'naive_bayes/bernoulli_nb'
7
+ require_relative 'naive_bayes/complement_nb'
8
+ require_relative 'naive_bayes/gaussian_nb'
9
+ require_relative 'naive_bayes/multinomial_nb'
10
+ require_relative 'naive_bayes/negation_nb'
11
+ require_relative 'naive_bayes/version'
metadata ADDED
@@ -0,0 +1,89 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rumale-naive_bayes
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.24.0
5
+ platform: ruby
6
+ authors:
7
+ - yoshoku
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2022-12-31 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: numo-narray
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.9.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 0.9.1
27
+ - !ruby/object:Gem::Dependency
28
+ name: rumale-core
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.24.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.24.0
41
+ description: |
42
+ Rumale::NaiveBayes provides naive bayes models,
43
+ such as Gaussian Naive Bayes, Multinomial Naive Bayes, and Bernoulli Naive Bayes,
44
+ with Rumale interface.
45
+ email:
46
+ - yoshoku@outlook.com
47
+ executables: []
48
+ extensions: []
49
+ extra_rdoc_files: []
50
+ files:
51
+ - LICENSE.txt
52
+ - README.md
53
+ - lib/rumale/naive_bayes.rb
54
+ - lib/rumale/naive_bayes/base_naive_bayes.rb
55
+ - lib/rumale/naive_bayes/bernoulli_nb.rb
56
+ - lib/rumale/naive_bayes/complement_nb.rb
57
+ - lib/rumale/naive_bayes/gaussian_nb.rb
58
+ - lib/rumale/naive_bayes/multinomial_nb.rb
59
+ - lib/rumale/naive_bayes/negation_nb.rb
60
+ - lib/rumale/naive_bayes/version.rb
61
+ homepage: https://github.com/yoshoku/rumale
62
+ licenses:
63
+ - BSD-3-Clause
64
+ metadata:
65
+ homepage_uri: https://github.com/yoshoku/rumale
66
+ source_code_uri: https://github.com/yoshoku/rumale/tree/main/rumale-naive_bayes
67
+ changelog_uri: https://github.com/yoshoku/rumale/blob/main/CHANGELOG.md
68
+ documentation_uri: https://yoshoku.github.io/rumale/doc/
69
+ rubygems_mfa_required: 'true'
70
+ post_install_message:
71
+ rdoc_options: []
72
+ require_paths:
73
+ - lib
74
+ required_ruby_version: !ruby/object:Gem::Requirement
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ version: '0'
79
+ required_rubygems_version: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ requirements: []
85
+ rubygems_version: 3.3.26
86
+ signing_key:
87
+ specification_version: 4
88
+ summary: Rumale::NaiveBayes provides naive bayes models with Rumale interface.
89
+ test_files: []