rumale-naive_bayes 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 6099259a89800806a4c1cb97f2c6fd76d9a3ee3c06bfafbd26ff585ce029bc8d
4
+ data.tar.gz: bd77f90edab0d920bb9bda3c9c2e21b3fd7fde646c56d7700d131c2eba2005c0
5
+ SHA512:
6
+ metadata.gz: 878ef0a8d13a8df2cb0b27c8f87db50572c7ba7b81ab4ad5840bf6c263a2cc607e3dd12273c630f869df754be9edd69339364f927b33817ec3d17749fc76f74f
7
+ data.tar.gz: fcf815a1f21e9db918f898ee9d917948c1737ab2b66f1dd043c2f3b49d1b340fd40b933c0a1c31ec466a5fd3cbd94bf9dc821bb31f8a1a137bb573327b195799
data/LICENSE.txt ADDED
@@ -0,0 +1,27 @@
1
+ Copyright (c) 2022 Atsushi Tatsuma
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are met:
6
+
7
+ * Redistributions of source code must retain the above copyright notice, this
8
+ list of conditions and the following disclaimer.
9
+
10
+ * Redistributions in binary form must reproduce the above copyright notice,
11
+ this list of conditions and the following disclaimer in the documentation
12
+ and/or other materials provided with the distribution.
13
+
14
+ * Neither the name of the copyright holder nor the names of its
15
+ contributors may be used to endorse or promote products derived from
16
+ this software without specific prior written permission.
17
+
18
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/README.md ADDED
@@ -0,0 +1,34 @@
1
+ # Rumale::NaiveBayes
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/rumale-naive_bayes.svg)](https://badge.fury.io/rb/rumale-naive_bayes)
4
+ [![BSD 3-Clause License](https://img.shields.io/badge/License-BSD%203--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/main/rumale-naive_bayes/LICENSE.txt)
5
+ [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/rumale/doc/Rumale/NaiveBayes.html)
6
+
7
+ Rumale is a machine learning library in Ruby.
8
+ Rumale::NaiveBayes provides naive bayes models,
9
+ such as Gaussian Naive Bayes, Multinomial Naive Bayes, and Bernoulli Naive Bayes,
10
+ with Rumale interface.
11
+
12
+ ## Installation
13
+
14
+ Add this line to your application's Gemfile:
15
+
16
+ ```ruby
17
+ gem 'rumale-naive_bayes'
18
+ ```
19
+
20
+ And then execute:
21
+
22
+ $ bundle install
23
+
24
+ Or install it yourself as:
25
+
26
+ $ gem install rumale-naive_bayes
27
+
28
+ ## Documentation
29
+
30
+ - [Rumale API Documentation - NaiveBayes](https://yoshoku.github.io/rumale/doc/Rumale/NaiveBayes.html)
31
+
32
+ ## License
33
+
34
+ The gem is available as open source under the terms of the [BSD-3-Clause License](https://opensource.org/licenses/BSD-3-Clause).
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/classifier'
5
+ require 'rumale/validation'
6
+
7
+ module Rumale
8
+ module NaiveBayes
9
+ # BaseNaiveBayes is a class that has methods for common processes of naive bayes classifier.
10
+ # This class is used internally.
11
+ class BaseNaiveBayes < ::Rumale::Base::Estimator
12
+ include ::Rumale::Base::Classifier
13
+
14
+ def initialize # rubocop:disable Lint/UselessMethodDefinition
15
+ super()
16
+ end
17
+
18
+ # Predict class labels for samples.
19
+ #
20
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
21
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
22
+ def predict(x)
23
+ x = ::Rumale::Validation.check_convert_sample_array(x)
24
+
25
+ n_samples = x.shape.first
26
+ decision_values = decision_function(x)
27
+ Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
28
+ end
29
+
30
+ # Predict log-probability for samples.
31
+ #
32
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
33
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
34
+ def predict_log_proba(x)
35
+ x = ::Rumale::Validation.check_convert_sample_array(x)
36
+
37
+ n_samples, = x.shape
38
+ log_likelihoods = decision_function(x)
39
+ log_likelihoods - Numo::NMath.log(Numo::NMath.exp(log_likelihoods).sum(axis: 1)).reshape(n_samples, 1)
40
+ end
41
+
42
+ # Predict probability for samples.
43
+ #
44
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
45
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
46
+ def predict_proba(x)
47
+ x = ::Rumale::Validation.check_convert_sample_array(x)
48
+
49
+ Numo::NMath.exp(predict_log_proba(x)).abs
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,86 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/naive_bayes/base_naive_bayes'
4
+
5
+ module Rumale
6
+ module NaiveBayes
7
+ # BernoulliNB is a class that implements Bernoulli Naive Bayes classifier.
8
+ #
9
+ # @example
10
+ # require 'rumale/naive_bayes/bernoulli_nb'
11
+ #
12
+ # estimator = Rumale::NaiveBayes::BernoulliNB.new(smoothing_param: 1.0, bin_threshold: 0.0)
13
+ # estimator.fit(training_samples, training_labels)
14
+ # results = estimator.predict(testing_samples)
15
+ #
16
+ # *Reference*
17
+ # - Manning, C D., Raghavan, P., and Schutze, H., "Introduction to Information Retrieval," Cambridge University Press., 2008.
18
+ class BernoulliNB < BaseNaiveBayes
19
+ # Return the class labels.
20
+ # @return [Numo::Int32] (size: n_classes)
21
+ attr_reader :classes
22
+
23
+ # Return the prior probabilities of the classes.
24
+ # @return [Numo::DFloat] (shape: [n_classes])
25
+ attr_reader :class_priors
26
+
27
+ # Return the conditional probabilities for features of each class.
28
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
29
+ attr_reader :feature_probs
30
+
31
+ # Create a new classifier with Bernoulli Naive Bayes.
32
+ #
33
+ # @param smoothing_param [Float] The Laplace smoothing parameter.
34
+ # @param bin_threshold [Float] The threshold for binarizing of features.
35
+ def initialize(smoothing_param: 1.0, bin_threshold: 0.0)
36
+ super()
37
+ @params = {
38
+ smoothing_param: smoothing_param,
39
+ bin_threshold: bin_threshold
40
+ }
41
+ end
42
+
43
+ # Fit the model with given training data.
44
+ #
45
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
46
+ # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
47
+ # to be used for fitting the model.
48
+ # @return [BernoulliNB] The learned classifier itself.
49
+ def fit(x, y)
50
+ x = ::Rumale::Validation.check_convert_sample_array(x)
51
+ y = ::Rumale::Validation.check_convert_label_array(y)
52
+ ::Rumale::Validation.check_sample_size(x, y)
53
+
54
+ n_samples, = x.shape
55
+ bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
56
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
57
+ n_samples_each_class = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count.to_f }]
58
+ @class_priors = n_samples_each_class / n_samples
59
+ count_features = Numo::DFloat[*@classes.to_a.map { |l| bin_x[y.eq(l).where, true].sum(axis: 0) }]
60
+ count_features += @params[:smoothing_param]
61
+ n_samples_each_class += 2.0 * @params[:smoothing_param]
62
+ n_classes = @classes.size
63
+ @feature_probs = count_features / n_samples_each_class.reshape(n_classes, 1)
64
+ self
65
+ end
66
+
67
+ # Calculate confidence scores for samples.
68
+ #
69
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
70
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
71
+ def decision_function(x)
72
+ x = ::Rumale::Validation.check_convert_sample_array(x)
73
+
74
+ n_classes = @classes.size
75
+ bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
76
+ not_bin_x = Numo::DFloat[*x.le(@params[:bin_threshold])]
77
+ log_likelihoods = Array.new(n_classes) do |l|
78
+ Math.log(@class_priors[l]) + (
79
+ (Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(axis: 1)
80
+ (Numo::DFloat[*not_bin_x] * Numo::NMath.log(1.0 - @feature_probs[l, true])).sum(axis: 1))
81
+ end
82
+ Numo::DFloat[*log_likelihoods].transpose.dup
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/naive_bayes/base_naive_bayes'
4
+
5
+ module Rumale
6
+ module NaiveBayes
7
+ # ComplementNB is a class that implements Complement Naive Bayes classifier.
8
+ #
9
+ # @example
10
+ # require 'rumale/naive_bayes/complement_nb'
11
+ #
12
+ # estimator = Rumale::NaiveBayes::ComplementNB.new(smoothing_param: 1.0)
13
+ # estimator.fit(training_samples, training_labels)
14
+ # results = estimator.predict(testing_samples)
15
+ #
16
+ # *Reference*
17
+ # - Rennie, J. D. M., Shih, L., Teevan, J., and Karger, D. R., "Tackling the Poor Assumptions of Naive Bayes Text Classifiers," ICML' 03, pp. 616--623, 2013.
18
+ class ComplementNB < BaseNaiveBayes
19
+ # Return the class labels.
20
+ # @return [Numo::Int32] (size: n_classes)
21
+ attr_reader :classes
22
+
23
+ # Return the prior probabilities of the classes.
24
+ # @return [Numo::DFloat] (shape: [n_classes])
25
+ attr_reader :class_priors
26
+
27
+ # Return the conditional probabilities for features of each class.
28
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
29
+ attr_reader :feature_probs
30
+
31
+ # Create a new classifier with Complement Naive Bayes.
32
+ #
33
+ # @param smoothing_param [Float] The smoothing parameter.
34
+ # @param norm [Boolean] The flag indicating whether to normlize the weight vectors.
35
+ def initialize(smoothing_param: 1.0, norm: false)
36
+ super()
37
+ @params = {
38
+ smoothing_param: smoothing_param,
39
+ norm: norm
40
+ }
41
+ end
42
+
43
+ # Fit the model with given training data.
44
+ #
45
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
46
+ # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
47
+ # to be used for fitting the model.
48
+ # @return [ComplementNB] The learned classifier itself.
49
+ def fit(x, y)
50
+ x = ::Rumale::Validation.check_convert_sample_array(x)
51
+ y = ::Rumale::Validation.check_convert_label_array(y)
52
+ ::Rumale::Validation.check_sample_size(x, y)
53
+
54
+ n_samples, = x.shape
55
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
56
+ @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count.fdiv(n_samples) }]
57
+ @class_log_probs = Numo::NMath.log(@class_priors)
58
+ compl_features = Numo::DFloat[*@classes.to_a.map { |l| x[y.ne(l).where, true].sum(axis: 0) }]
59
+ compl_features += @params[:smoothing_param]
60
+ n_classes = @classes.size
61
+ @feature_probs = compl_features / compl_features.sum(axis: 1).reshape(n_classes, 1)
62
+ feature_log_probs = Numo::NMath.log(@feature_probs)
63
+ @weights = if normalize?
64
+ feature_log_probs / feature_log_probs.sum(axis: 1).reshape(n_classes, 1)
65
+ else
66
+ -feature_log_probs
67
+ end
68
+ self
69
+ end
70
+
71
+ # Calculate confidence scores for samples.
72
+ #
73
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
74
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
75
+ def decision_function(x)
76
+ x = ::Rumale::Validation.check_convert_sample_array(x)
77
+
78
+ @class_log_probs + x.dot(@weights.transpose)
79
+ end
80
+
81
+ private
82
+
83
+ def normalize?
84
+ @params[:norm] == true
85
+ end
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/naive_bayes/base_naive_bayes'
4
+
5
+ module Rumale
6
+ module NaiveBayes
7
+ # GaussianNB is a class that implements Gaussian Naive Bayes classifier.
8
+ #
9
+ # @example
10
+ # require 'rumale/naive_bayes/gaussian_nb'
11
+ #
12
+ # estimator = Rumale::NaiveBayes::GaussianNB.new
13
+ # estimator.fit(training_samples, training_labels)
14
+ # results = estimator.predict(testing_samples)
15
+ class GaussianNB < BaseNaiveBayes
16
+ # Return the class labels.
17
+ # @return [Numo::Int32] (size: n_classes)
18
+ attr_reader :classes
19
+
20
+ # Return the prior probabilities of the classes.
21
+ # @return [Numo::DFloat] (shape: [n_classes])
22
+ attr_reader :class_priors
23
+
24
+ # Return the mean vectors of the classes.
25
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
26
+ attr_reader :means
27
+
28
+ # Return the variance vectors of the classes.
29
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
30
+ attr_reader :variances
31
+
32
+ # Create a new classifier with Gaussian Naive Bayes.
33
+ def initialize
34
+ super()
35
+ @params = {}
36
+ end
37
+
38
+ # Fit the model with given training data.
39
+ #
40
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
41
+ # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
42
+ # to be used for fitting the model.
43
+ # @return [GaussianNB] The learned classifier itself.
44
+ def fit(x, y)
45
+ x = ::Rumale::Validation.check_convert_sample_array(x)
46
+ y = ::Rumale::Validation.check_convert_label_array(y)
47
+ ::Rumale::Validation.check_sample_size(x, y)
48
+
49
+ n_samples, = x.shape
50
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
51
+ @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
52
+ @means = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].mean(0) }]
53
+ @variances = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].var(0) }]
54
+ self
55
+ end
56
+
57
+ # Calculate confidence scores for samples.
58
+ #
59
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
60
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
61
+ def decision_function(x)
62
+ x = ::Rumale::Validation.check_convert_sample_array(x)
63
+
64
+ n_classes = @classes.size
65
+ log_likelihoods = Array.new(n_classes) do |l|
66
+ Math.log(@class_priors[l]) - 0.5 * (
67
+ Numo::NMath.log(2.0 * Math::PI * @variances[l, true]) +
68
+ ((x - @means[l, true])**2 / @variances[l, true])).sum(axis: 1)
69
+ end
70
+ Numo::DFloat[*log_likelihoods].transpose.dup
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/naive_bayes/base_naive_bayes'
4
+
5
+ module Rumale
6
+ module NaiveBayes
7
+ # MultinomialNB is a class that implements Multinomial Naive Bayes classifier.
8
+ #
9
+ # @example
10
+ # require 'rumale/naive_bayes/multinomial_nb'
11
+ #
12
+ # estimator = Rumale::NaiveBayes::MultinomialNB.new(smoothing_param: 1.0)
13
+ # estimator.fit(training_samples, training_labels)
14
+ # results = estimator.predict(testing_samples)
15
+ #
16
+ # *Reference*
17
+ # - Manning, C D., Raghavan, P., and Schutze, H., "Introduction to Information Retrieval," Cambridge University Press., 2008.
18
+ class MultinomialNB < BaseNaiveBayes
19
+ # Return the class labels.
20
+ # @return [Numo::Int32] (size: n_classes)
21
+ attr_reader :classes
22
+
23
+ # Return the prior probabilities of the classes.
24
+ # @return [Numo::DFloat] (shape: [n_classes])
25
+ attr_reader :class_priors
26
+
27
+ # Return the conditional probabilities for features of each class.
28
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
29
+ attr_reader :feature_probs
30
+
31
+ # Create a new classifier with Multinomial Naive Bayes.
32
+ #
33
+ # @param smoothing_param [Float] The Laplace smoothing parameter.
34
+ def initialize(smoothing_param: 1.0)
35
+ super()
36
+ @params = { smoothing_param: smoothing_param }
37
+ end
38
+
39
+ # Fit the model with given training data.
40
+ #
41
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
42
+ # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
43
+ # to be used for fitting the model.
44
+ # @return [MultinomialNB] The learned classifier itself.
45
+ def fit(x, y)
46
+ x = ::Rumale::Validation.check_convert_sample_array(x)
47
+ y = ::Rumale::Validation.check_convert_label_array(y)
48
+ ::Rumale::Validation.check_sample_size(x, y)
49
+
50
+ n_samples, = x.shape
51
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
52
+ @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
53
+ count_features = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].sum(axis: 0) }]
54
+ count_features += @params[:smoothing_param]
55
+ n_classes = @classes.size
56
+ @feature_probs = count_features / count_features.sum(axis: 1).reshape(n_classes, 1)
57
+ self
58
+ end
59
+
60
+ # Calculate confidence scores for samples.
61
+ #
62
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
63
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
64
+ def decision_function(x)
65
+ x = ::Rumale::Validation.check_convert_sample_array(x)
66
+
67
+ n_classes = @classes.size
68
+ bin_x = x.gt(0)
69
+ log_likelihoods = Array.new(n_classes) do |l|
70
+ Math.log(@class_priors[l]) + (Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(axis: 1)
71
+ end
72
+ Numo::DFloat[*log_likelihoods].transpose.dup
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/naive_bayes/base_naive_bayes'
4
+
5
+ module Rumale
6
+ module NaiveBayes
7
+ # NegationNB is a class that implements Negation Naive Bayes classifier.
8
+ #
9
+ # @example
10
+ # require 'rumale/naive_bayes/negation_nb'
11
+ #
12
+ # estimator = Rumale::NaiveBayes::NegationNB.new(smoothing_param: 1.0)
13
+ # estimator.fit(training_samples, training_labels)
14
+ # results = estimator.predict(testing_samples)
15
+ #
16
+ # *Reference*
17
+ # - Komiya, K., Sato, N., Fujimoto, K., and Kotani, Y., "Negation Naive Bayes for Categorization of Product Pages on the Web," RANLP' 11, pp. 586--592, 2011.
18
+ class NegationNB < BaseNaiveBayes
19
+ # Return the class labels.
20
+ # @return [Numo::Int32] (size: n_classes)
21
+ attr_reader :classes
22
+
23
+ # Return the prior probabilities of the classes.
24
+ # @return [Numo::DFloat] (shape: [n_classes])
25
+ attr_reader :class_priors
26
+
27
+ # Return the conditional probabilities for features of each class.
28
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
29
+ attr_reader :feature_probs
30
+
31
+ # Create a new classifier with Complement Naive Bayes.
32
+ #
33
+ # @param smoothing_param [Float] The smoothing parameter.
34
+ def initialize(smoothing_param: 1.0)
35
+ super()
36
+ @params = { smoothing_param: smoothing_param }
37
+ end
38
+
39
+ # Fit the model with given training data.
40
+ #
41
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
42
+ # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
43
+ # to be used for fitting the model.
44
+ # @return [ComplementNB] The learned classifier itself.
45
+ def fit(x, y)
46
+ x = ::Rumale::Validation.check_convert_sample_array(x)
47
+ y = ::Rumale::Validation.check_convert_label_array(y)
48
+ ::Rumale::Validation.check_sample_size(x, y)
49
+
50
+ n_samples, = x.shape
51
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
52
+ @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count.fdiv(n_samples) }]
53
+ @class_log_probs = Numo::NMath.log(1 / (1 - @class_priors))
54
+ compl_features = Numo::DFloat[*@classes.to_a.map { |l| x[y.ne(l).where, true].sum(axis: 0) }]
55
+ compl_features += @params[:smoothing_param]
56
+ n_classes = @classes.size
57
+ @feature_probs = compl_features / compl_features.sum(axis: 1).reshape(n_classes, 1)
58
+ @weights = Numo::NMath.log(@feature_probs)
59
+ self
60
+ end
61
+
62
+ # Calculate confidence scores for samples.
63
+ #
64
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
65
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
66
+ def decision_function(x)
67
+ x = ::Rumale::Validation.check_convert_sample_array(x)
68
+
69
+ @class_log_probs - x.dot(@weights.transpose)
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Rumale is a machine learning library in Ruby.
4
+ module Rumale
5
+ # This module consists of the classes that implement naive bayes models.
6
+ module NaiveBayes
7
+ # @!visibility private
8
+ VERSION = '0.24.0'
9
+ end
10
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'numo/narray'
4
+
5
+ require_relative 'naive_bayes/base_naive_bayes'
6
+ require_relative 'naive_bayes/bernoulli_nb'
7
+ require_relative 'naive_bayes/complement_nb'
8
+ require_relative 'naive_bayes/gaussian_nb'
9
+ require_relative 'naive_bayes/multinomial_nb'
10
+ require_relative 'naive_bayes/negation_nb'
11
+ require_relative 'naive_bayes/version'
metadata ADDED
@@ -0,0 +1,89 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rumale-naive_bayes
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.24.0
5
+ platform: ruby
6
+ authors:
7
+ - yoshoku
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2022-12-31 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: numo-narray
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.9.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 0.9.1
27
+ - !ruby/object:Gem::Dependency
28
+ name: rumale-core
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.24.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.24.0
41
+ description: |
42
+ Rumale::NaiveBayes provides naive bayes models,
43
+ such as Gaussian Naive Bayes, Multinomial Naive Bayes, and Bernoulli Naive Bayes,
44
+ with Rumale interface.
45
+ email:
46
+ - yoshoku@outlook.com
47
+ executables: []
48
+ extensions: []
49
+ extra_rdoc_files: []
50
+ files:
51
+ - LICENSE.txt
52
+ - README.md
53
+ - lib/rumale/naive_bayes.rb
54
+ - lib/rumale/naive_bayes/base_naive_bayes.rb
55
+ - lib/rumale/naive_bayes/bernoulli_nb.rb
56
+ - lib/rumale/naive_bayes/complement_nb.rb
57
+ - lib/rumale/naive_bayes/gaussian_nb.rb
58
+ - lib/rumale/naive_bayes/multinomial_nb.rb
59
+ - lib/rumale/naive_bayes/negation_nb.rb
60
+ - lib/rumale/naive_bayes/version.rb
61
+ homepage: https://github.com/yoshoku/rumale
62
+ licenses:
63
+ - BSD-3-Clause
64
+ metadata:
65
+ homepage_uri: https://github.com/yoshoku/rumale
66
+ source_code_uri: https://github.com/yoshoku/rumale/tree/main/rumale-naive_bayes
67
+ changelog_uri: https://github.com/yoshoku/rumale/blob/main/CHANGELOG.md
68
+ documentation_uri: https://yoshoku.github.io/rumale/doc/
69
+ rubygems_mfa_required: 'true'
70
+ post_install_message:
71
+ rdoc_options: []
72
+ require_paths:
73
+ - lib
74
+ required_ruby_version: !ruby/object:Gem::Requirement
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ version: '0'
79
+ required_rubygems_version: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ requirements: []
85
+ rubygems_version: 3.3.26
86
+ signing_key:
87
+ specification_version: 4
88
+ summary: Rumale::NaiveBayes provides naive bayes models with Rumale interface.
89
+ test_files: []