rumale-naive_bayes 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +27 -0
- data/README.md +34 -0
- data/lib/rumale/naive_bayes/base_naive_bayes.rb +53 -0
- data/lib/rumale/naive_bayes/bernoulli_nb.rb +86 -0
- data/lib/rumale/naive_bayes/complement_nb.rb +88 -0
- data/lib/rumale/naive_bayes/gaussian_nb.rb +74 -0
- data/lib/rumale/naive_bayes/multinomial_nb.rb +76 -0
- data/lib/rumale/naive_bayes/negation_nb.rb +73 -0
- data/lib/rumale/naive_bayes/version.rb +10 -0
- data/lib/rumale/naive_bayes.rb +11 -0
- metadata +89 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 6099259a89800806a4c1cb97f2c6fd76d9a3ee3c06bfafbd26ff585ce029bc8d
|
4
|
+
data.tar.gz: bd77f90edab0d920bb9bda3c9c2e21b3fd7fde646c56d7700d131c2eba2005c0
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 878ef0a8d13a8df2cb0b27c8f87db50572c7ba7b81ab4ad5840bf6c263a2cc607e3dd12273c630f869df754be9edd69339364f927b33817ec3d17749fc76f74f
|
7
|
+
data.tar.gz: fcf815a1f21e9db918f898ee9d917948c1737ab2b66f1dd043c2f3b49d1b340fd40b933c0a1c31ec466a5fd3cbd94bf9dc821bb31f8a1a137bb573327b195799
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
Copyright (c) 2022 Atsushi Tatsuma
|
2
|
+
All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
5
|
+
modification, are permitted provided that the following conditions are met:
|
6
|
+
|
7
|
+
* Redistributions of source code must retain the above copyright notice, this
|
8
|
+
list of conditions and the following disclaimer.
|
9
|
+
|
10
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
11
|
+
this list of conditions and the following disclaimer in the documentation
|
12
|
+
and/or other materials provided with the distribution.
|
13
|
+
|
14
|
+
* Neither the name of the copyright holder nor the names of its
|
15
|
+
contributors may be used to endorse or promote products derived from
|
16
|
+
this software without specific prior written permission.
|
17
|
+
|
18
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
19
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
20
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
21
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
22
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
23
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
24
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
25
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
26
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
27
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README.md
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# Rumale::NaiveBayes
|
2
|
+
|
3
|
+
[](https://badge.fury.io/rb/rumale-naive_bayes)
|
4
|
+
[](https://github.com/yoshoku/rumale/blob/main/rumale-naive_bayes/LICENSE.txt)
|
5
|
+
[](https://yoshoku.github.io/rumale/doc/Rumale/NaiveBayes.html)
|
6
|
+
|
7
|
+
Rumale is a machine learning library in Ruby.
|
8
|
+
Rumale::NaiveBayes provides naive bayes models,
|
9
|
+
such as Gaussian Naive Bayes, Multinomial Naive Bayes, and Bernoulli Naive Bayes,
|
10
|
+
with Rumale interface.
|
11
|
+
|
12
|
+
## Installation
|
13
|
+
|
14
|
+
Add this line to your application's Gemfile:
|
15
|
+
|
16
|
+
```ruby
|
17
|
+
gem 'rumale-naive_bayes'
|
18
|
+
```
|
19
|
+
|
20
|
+
And then execute:
|
21
|
+
|
22
|
+
$ bundle install
|
23
|
+
|
24
|
+
Or install it yourself as:
|
25
|
+
|
26
|
+
$ gem install rumale-naive_bayes
|
27
|
+
|
28
|
+
## Documentation
|
29
|
+
|
30
|
+
- [Rumale API Documentation - NaiveBayes](https://yoshoku.github.io/rumale/doc/Rumale/NaiveBayes.html)
|
31
|
+
|
32
|
+
## License
|
33
|
+
|
34
|
+
The gem is available as open source under the terms of the [BSD-3-Clause License](https://opensource.org/licenses/BSD-3-Clause).
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/estimator'
|
4
|
+
require 'rumale/base/classifier'
|
5
|
+
require 'rumale/validation'
|
6
|
+
|
7
|
+
module Rumale
|
8
|
+
module NaiveBayes
|
9
|
+
# BaseNaiveBayes is a class that has methods for common processes of naive bayes classifier.
|
10
|
+
# This class is used internally.
|
11
|
+
class BaseNaiveBayes < ::Rumale::Base::Estimator
|
12
|
+
include ::Rumale::Base::Classifier
|
13
|
+
|
14
|
+
def initialize # rubocop:disable Lint/UselessMethodDefinition
|
15
|
+
super()
|
16
|
+
end
|
17
|
+
|
18
|
+
# Predict class labels for samples.
|
19
|
+
#
|
20
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
21
|
+
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
22
|
+
def predict(x)
|
23
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
24
|
+
|
25
|
+
n_samples = x.shape.first
|
26
|
+
decision_values = decision_function(x)
|
27
|
+
Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
|
28
|
+
end
|
29
|
+
|
30
|
+
# Predict log-probability for samples.
|
31
|
+
#
|
32
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
|
33
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
|
34
|
+
def predict_log_proba(x)
|
35
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
36
|
+
|
37
|
+
n_samples, = x.shape
|
38
|
+
log_likelihoods = decision_function(x)
|
39
|
+
log_likelihoods - Numo::NMath.log(Numo::NMath.exp(log_likelihoods).sum(axis: 1)).reshape(n_samples, 1)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Predict probability for samples.
|
43
|
+
#
|
44
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
45
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
46
|
+
def predict_proba(x)
|
47
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
48
|
+
|
49
|
+
Numo::NMath.exp(predict_log_proba(x)).abs
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/naive_bayes/base_naive_bayes'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module NaiveBayes
|
7
|
+
# BernoulliNB is a class that implements Bernoulli Naive Bayes classifier.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/naive_bayes/bernoulli_nb'
|
11
|
+
#
|
12
|
+
# estimator = Rumale::NaiveBayes::BernoulliNB.new(smoothing_param: 1.0, bin_threshold: 0.0)
|
13
|
+
# estimator.fit(training_samples, training_labels)
|
14
|
+
# results = estimator.predict(testing_samples)
|
15
|
+
#
|
16
|
+
# *Reference*
|
17
|
+
# - Manning, C D., Raghavan, P., and Schutze, H., "Introduction to Information Retrieval," Cambridge University Press., 2008.
|
18
|
+
class BernoulliNB < BaseNaiveBayes
|
19
|
+
# Return the class labels.
|
20
|
+
# @return [Numo::Int32] (size: n_classes)
|
21
|
+
attr_reader :classes
|
22
|
+
|
23
|
+
# Return the prior probabilities of the classes.
|
24
|
+
# @return [Numo::DFloat] (shape: [n_classes])
|
25
|
+
attr_reader :class_priors
|
26
|
+
|
27
|
+
# Return the conditional probabilities for features of each class.
|
28
|
+
# @return [Numo::DFloat] (shape: [n_classes, n_features])
|
29
|
+
attr_reader :feature_probs
|
30
|
+
|
31
|
+
# Create a new classifier with Bernoulli Naive Bayes.
|
32
|
+
#
|
33
|
+
# @param smoothing_param [Float] The Laplace smoothing parameter.
|
34
|
+
# @param bin_threshold [Float] The threshold for binarizing of features.
|
35
|
+
def initialize(smoothing_param: 1.0, bin_threshold: 0.0)
|
36
|
+
super()
|
37
|
+
@params = {
|
38
|
+
smoothing_param: smoothing_param,
|
39
|
+
bin_threshold: bin_threshold
|
40
|
+
}
|
41
|
+
end
|
42
|
+
|
43
|
+
# Fit the model with given training data.
|
44
|
+
#
|
45
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
46
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
|
47
|
+
# to be used for fitting the model.
|
48
|
+
# @return [BernoulliNB] The learned classifier itself.
|
49
|
+
def fit(x, y)
|
50
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
51
|
+
y = ::Rumale::Validation.check_convert_label_array(y)
|
52
|
+
::Rumale::Validation.check_sample_size(x, y)
|
53
|
+
|
54
|
+
n_samples, = x.shape
|
55
|
+
bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
|
56
|
+
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
57
|
+
n_samples_each_class = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count.to_f }]
|
58
|
+
@class_priors = n_samples_each_class / n_samples
|
59
|
+
count_features = Numo::DFloat[*@classes.to_a.map { |l| bin_x[y.eq(l).where, true].sum(axis: 0) }]
|
60
|
+
count_features += @params[:smoothing_param]
|
61
|
+
n_samples_each_class += 2.0 * @params[:smoothing_param]
|
62
|
+
n_classes = @classes.size
|
63
|
+
@feature_probs = count_features / n_samples_each_class.reshape(n_classes, 1)
|
64
|
+
self
|
65
|
+
end
|
66
|
+
|
67
|
+
# Calculate confidence scores for samples.
|
68
|
+
#
|
69
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
70
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
71
|
+
def decision_function(x)
|
72
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
73
|
+
|
74
|
+
n_classes = @classes.size
|
75
|
+
bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
|
76
|
+
not_bin_x = Numo::DFloat[*x.le(@params[:bin_threshold])]
|
77
|
+
log_likelihoods = Array.new(n_classes) do |l|
|
78
|
+
Math.log(@class_priors[l]) + (
|
79
|
+
(Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(axis: 1)
|
80
|
+
(Numo::DFloat[*not_bin_x] * Numo::NMath.log(1.0 - @feature_probs[l, true])).sum(axis: 1))
|
81
|
+
end
|
82
|
+
Numo::DFloat[*log_likelihoods].transpose.dup
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/naive_bayes/base_naive_bayes'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module NaiveBayes
|
7
|
+
# ComplementNB is a class that implements Complement Naive Bayes classifier.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/naive_bayes/complement_nb'
|
11
|
+
#
|
12
|
+
# estimator = Rumale::NaiveBayes::ComplementNB.new(smoothing_param: 1.0)
|
13
|
+
# estimator.fit(training_samples, training_labels)
|
14
|
+
# results = estimator.predict(testing_samples)
|
15
|
+
#
|
16
|
+
# *Reference*
|
17
|
+
# - Rennie, J. D. M., Shih, L., Teevan, J., and Karger, D. R., "Tackling the Poor Assumptions of Naive Bayes Text Classifiers," ICML' 03, pp. 616--623, 2013.
|
18
|
+
class ComplementNB < BaseNaiveBayes
|
19
|
+
# Return the class labels.
|
20
|
+
# @return [Numo::Int32] (size: n_classes)
|
21
|
+
attr_reader :classes
|
22
|
+
|
23
|
+
# Return the prior probabilities of the classes.
|
24
|
+
# @return [Numo::DFloat] (shape: [n_classes])
|
25
|
+
attr_reader :class_priors
|
26
|
+
|
27
|
+
# Return the conditional probabilities for features of each class.
|
28
|
+
# @return [Numo::DFloat] (shape: [n_classes, n_features])
|
29
|
+
attr_reader :feature_probs
|
30
|
+
|
31
|
+
# Create a new classifier with Complement Naive Bayes.
|
32
|
+
#
|
33
|
+
# @param smoothing_param [Float] The smoothing parameter.
|
34
|
+
# @param norm [Boolean] The flag indicating whether to normlize the weight vectors.
|
35
|
+
def initialize(smoothing_param: 1.0, norm: false)
|
36
|
+
super()
|
37
|
+
@params = {
|
38
|
+
smoothing_param: smoothing_param,
|
39
|
+
norm: norm
|
40
|
+
}
|
41
|
+
end
|
42
|
+
|
43
|
+
# Fit the model with given training data.
|
44
|
+
#
|
45
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
46
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
|
47
|
+
# to be used for fitting the model.
|
48
|
+
# @return [ComplementNB] The learned classifier itself.
|
49
|
+
def fit(x, y)
|
50
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
51
|
+
y = ::Rumale::Validation.check_convert_label_array(y)
|
52
|
+
::Rumale::Validation.check_sample_size(x, y)
|
53
|
+
|
54
|
+
n_samples, = x.shape
|
55
|
+
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
56
|
+
@class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count.fdiv(n_samples) }]
|
57
|
+
@class_log_probs = Numo::NMath.log(@class_priors)
|
58
|
+
compl_features = Numo::DFloat[*@classes.to_a.map { |l| x[y.ne(l).where, true].sum(axis: 0) }]
|
59
|
+
compl_features += @params[:smoothing_param]
|
60
|
+
n_classes = @classes.size
|
61
|
+
@feature_probs = compl_features / compl_features.sum(axis: 1).reshape(n_classes, 1)
|
62
|
+
feature_log_probs = Numo::NMath.log(@feature_probs)
|
63
|
+
@weights = if normalize?
|
64
|
+
feature_log_probs / feature_log_probs.sum(axis: 1).reshape(n_classes, 1)
|
65
|
+
else
|
66
|
+
-feature_log_probs
|
67
|
+
end
|
68
|
+
self
|
69
|
+
end
|
70
|
+
|
71
|
+
# Calculate confidence scores for samples.
|
72
|
+
#
|
73
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
74
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
75
|
+
def decision_function(x)
|
76
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
77
|
+
|
78
|
+
@class_log_probs + x.dot(@weights.transpose)
|
79
|
+
end
|
80
|
+
|
81
|
+
private
|
82
|
+
|
83
|
+
def normalize?
|
84
|
+
@params[:norm] == true
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/naive_bayes/base_naive_bayes'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module NaiveBayes
|
7
|
+
# GaussianNB is a class that implements Gaussian Naive Bayes classifier.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/naive_bayes/gaussian_nb'
|
11
|
+
#
|
12
|
+
# estimator = Rumale::NaiveBayes::GaussianNB.new
|
13
|
+
# estimator.fit(training_samples, training_labels)
|
14
|
+
# results = estimator.predict(testing_samples)
|
15
|
+
class GaussianNB < BaseNaiveBayes
|
16
|
+
# Return the class labels.
|
17
|
+
# @return [Numo::Int32] (size: n_classes)
|
18
|
+
attr_reader :classes
|
19
|
+
|
20
|
+
# Return the prior probabilities of the classes.
|
21
|
+
# @return [Numo::DFloat] (shape: [n_classes])
|
22
|
+
attr_reader :class_priors
|
23
|
+
|
24
|
+
# Return the mean vectors of the classes.
|
25
|
+
# @return [Numo::DFloat] (shape: [n_classes, n_features])
|
26
|
+
attr_reader :means
|
27
|
+
|
28
|
+
# Return the variance vectors of the classes.
|
29
|
+
# @return [Numo::DFloat] (shape: [n_classes, n_features])
|
30
|
+
attr_reader :variances
|
31
|
+
|
32
|
+
# Create a new classifier with Gaussian Naive Bayes.
|
33
|
+
def initialize
|
34
|
+
super()
|
35
|
+
@params = {}
|
36
|
+
end
|
37
|
+
|
38
|
+
# Fit the model with given training data.
|
39
|
+
#
|
40
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
41
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
|
42
|
+
# to be used for fitting the model.
|
43
|
+
# @return [GaussianNB] The learned classifier itself.
|
44
|
+
def fit(x, y)
|
45
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
46
|
+
y = ::Rumale::Validation.check_convert_label_array(y)
|
47
|
+
::Rumale::Validation.check_sample_size(x, y)
|
48
|
+
|
49
|
+
n_samples, = x.shape
|
50
|
+
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
51
|
+
@class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
|
52
|
+
@means = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].mean(0) }]
|
53
|
+
@variances = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].var(0) }]
|
54
|
+
self
|
55
|
+
end
|
56
|
+
|
57
|
+
# Calculate confidence scores for samples.
|
58
|
+
#
|
59
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
60
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
61
|
+
def decision_function(x)
|
62
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
63
|
+
|
64
|
+
n_classes = @classes.size
|
65
|
+
log_likelihoods = Array.new(n_classes) do |l|
|
66
|
+
Math.log(@class_priors[l]) - 0.5 * (
|
67
|
+
Numo::NMath.log(2.0 * Math::PI * @variances[l, true]) +
|
68
|
+
((x - @means[l, true])**2 / @variances[l, true])).sum(axis: 1)
|
69
|
+
end
|
70
|
+
Numo::DFloat[*log_likelihoods].transpose.dup
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/naive_bayes/base_naive_bayes'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module NaiveBayes
|
7
|
+
# MultinomialNB is a class that implements Multinomial Naive Bayes classifier.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/naive_bayes/multinomial_nb'
|
11
|
+
#
|
12
|
+
# estimator = Rumale::NaiveBayes::MultinomialNB.new(smoothing_param: 1.0)
|
13
|
+
# estimator.fit(training_samples, training_labels)
|
14
|
+
# results = estimator.predict(testing_samples)
|
15
|
+
#
|
16
|
+
# *Reference*
|
17
|
+
# - Manning, C D., Raghavan, P., and Schutze, H., "Introduction to Information Retrieval," Cambridge University Press., 2008.
|
18
|
+
class MultinomialNB < BaseNaiveBayes
|
19
|
+
# Return the class labels.
|
20
|
+
# @return [Numo::Int32] (size: n_classes)
|
21
|
+
attr_reader :classes
|
22
|
+
|
23
|
+
# Return the prior probabilities of the classes.
|
24
|
+
# @return [Numo::DFloat] (shape: [n_classes])
|
25
|
+
attr_reader :class_priors
|
26
|
+
|
27
|
+
# Return the conditional probabilities for features of each class.
|
28
|
+
# @return [Numo::DFloat] (shape: [n_classes, n_features])
|
29
|
+
attr_reader :feature_probs
|
30
|
+
|
31
|
+
# Create a new classifier with Multinomial Naive Bayes.
|
32
|
+
#
|
33
|
+
# @param smoothing_param [Float] The Laplace smoothing parameter.
|
34
|
+
def initialize(smoothing_param: 1.0)
|
35
|
+
super()
|
36
|
+
@params = { smoothing_param: smoothing_param }
|
37
|
+
end
|
38
|
+
|
39
|
+
# Fit the model with given training data.
|
40
|
+
#
|
41
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
42
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
|
43
|
+
# to be used for fitting the model.
|
44
|
+
# @return [MultinomialNB] The learned classifier itself.
|
45
|
+
def fit(x, y)
|
46
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
47
|
+
y = ::Rumale::Validation.check_convert_label_array(y)
|
48
|
+
::Rumale::Validation.check_sample_size(x, y)
|
49
|
+
|
50
|
+
n_samples, = x.shape
|
51
|
+
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
52
|
+
@class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
|
53
|
+
count_features = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].sum(axis: 0) }]
|
54
|
+
count_features += @params[:smoothing_param]
|
55
|
+
n_classes = @classes.size
|
56
|
+
@feature_probs = count_features / count_features.sum(axis: 1).reshape(n_classes, 1)
|
57
|
+
self
|
58
|
+
end
|
59
|
+
|
60
|
+
# Calculate confidence scores for samples.
|
61
|
+
#
|
62
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
63
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
64
|
+
def decision_function(x)
|
65
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
66
|
+
|
67
|
+
n_classes = @classes.size
|
68
|
+
bin_x = x.gt(0)
|
69
|
+
log_likelihoods = Array.new(n_classes) do |l|
|
70
|
+
Math.log(@class_priors[l]) + (Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(axis: 1)
|
71
|
+
end
|
72
|
+
Numo::DFloat[*log_likelihoods].transpose.dup
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/naive_bayes/base_naive_bayes'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module NaiveBayes
|
7
|
+
# NegationNB is a class that implements Negation Naive Bayes classifier.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/naive_bayes/negation_nb'
|
11
|
+
#
|
12
|
+
# estimator = Rumale::NaiveBayes::NegationNB.new(smoothing_param: 1.0)
|
13
|
+
# estimator.fit(training_samples, training_labels)
|
14
|
+
# results = estimator.predict(testing_samples)
|
15
|
+
#
|
16
|
+
# *Reference*
|
17
|
+
# - Komiya, K., Sato, N., Fujimoto, K., and Kotani, Y., "Negation Naive Bayes for Categorization of Product Pages on the Web," RANLP' 11, pp. 586--592, 2011.
|
18
|
+
class NegationNB < BaseNaiveBayes
|
19
|
+
# Return the class labels.
|
20
|
+
# @return [Numo::Int32] (size: n_classes)
|
21
|
+
attr_reader :classes
|
22
|
+
|
23
|
+
# Return the prior probabilities of the classes.
|
24
|
+
# @return [Numo::DFloat] (shape: [n_classes])
|
25
|
+
attr_reader :class_priors
|
26
|
+
|
27
|
+
# Return the conditional probabilities for features of each class.
|
28
|
+
# @return [Numo::DFloat] (shape: [n_classes, n_features])
|
29
|
+
attr_reader :feature_probs
|
30
|
+
|
31
|
+
# Create a new classifier with Complement Naive Bayes.
|
32
|
+
#
|
33
|
+
# @param smoothing_param [Float] The smoothing parameter.
|
34
|
+
def initialize(smoothing_param: 1.0)
|
35
|
+
super()
|
36
|
+
@params = { smoothing_param: smoothing_param }
|
37
|
+
end
|
38
|
+
|
39
|
+
# Fit the model with given training data.
|
40
|
+
#
|
41
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
42
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
|
43
|
+
# to be used for fitting the model.
|
44
|
+
# @return [ComplementNB] The learned classifier itself.
|
45
|
+
def fit(x, y)
|
46
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
47
|
+
y = ::Rumale::Validation.check_convert_label_array(y)
|
48
|
+
::Rumale::Validation.check_sample_size(x, y)
|
49
|
+
|
50
|
+
n_samples, = x.shape
|
51
|
+
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
52
|
+
@class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count.fdiv(n_samples) }]
|
53
|
+
@class_log_probs = Numo::NMath.log(1 / (1 - @class_priors))
|
54
|
+
compl_features = Numo::DFloat[*@classes.to_a.map { |l| x[y.ne(l).where, true].sum(axis: 0) }]
|
55
|
+
compl_features += @params[:smoothing_param]
|
56
|
+
n_classes = @classes.size
|
57
|
+
@feature_probs = compl_features / compl_features.sum(axis: 1).reshape(n_classes, 1)
|
58
|
+
@weights = Numo::NMath.log(@feature_probs)
|
59
|
+
self
|
60
|
+
end
|
61
|
+
|
62
|
+
# Calculate confidence scores for samples.
|
63
|
+
#
|
64
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
65
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
66
|
+
def decision_function(x)
|
67
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
68
|
+
|
69
|
+
@class_log_probs - x.dot(@weights.transpose)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'numo/narray'
|
4
|
+
|
5
|
+
require_relative 'naive_bayes/base_naive_bayes'
|
6
|
+
require_relative 'naive_bayes/bernoulli_nb'
|
7
|
+
require_relative 'naive_bayes/complement_nb'
|
8
|
+
require_relative 'naive_bayes/gaussian_nb'
|
9
|
+
require_relative 'naive_bayes/multinomial_nb'
|
10
|
+
require_relative 'naive_bayes/negation_nb'
|
11
|
+
require_relative 'naive_bayes/version'
|
metadata
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rumale-naive_bayes
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.24.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- yoshoku
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2022-12-31 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: numo-narray
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.9.1
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.9.1
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rumale-core
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.24.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.24.0
|
41
|
+
description: |
|
42
|
+
Rumale::NaiveBayes provides naive bayes models,
|
43
|
+
such as Gaussian Naive Bayes, Multinomial Naive Bayes, and Bernoulli Naive Bayes,
|
44
|
+
with Rumale interface.
|
45
|
+
email:
|
46
|
+
- yoshoku@outlook.com
|
47
|
+
executables: []
|
48
|
+
extensions: []
|
49
|
+
extra_rdoc_files: []
|
50
|
+
files:
|
51
|
+
- LICENSE.txt
|
52
|
+
- README.md
|
53
|
+
- lib/rumale/naive_bayes.rb
|
54
|
+
- lib/rumale/naive_bayes/base_naive_bayes.rb
|
55
|
+
- lib/rumale/naive_bayes/bernoulli_nb.rb
|
56
|
+
- lib/rumale/naive_bayes/complement_nb.rb
|
57
|
+
- lib/rumale/naive_bayes/gaussian_nb.rb
|
58
|
+
- lib/rumale/naive_bayes/multinomial_nb.rb
|
59
|
+
- lib/rumale/naive_bayes/negation_nb.rb
|
60
|
+
- lib/rumale/naive_bayes/version.rb
|
61
|
+
homepage: https://github.com/yoshoku/rumale
|
62
|
+
licenses:
|
63
|
+
- BSD-3-Clause
|
64
|
+
metadata:
|
65
|
+
homepage_uri: https://github.com/yoshoku/rumale
|
66
|
+
source_code_uri: https://github.com/yoshoku/rumale/tree/main/rumale-naive_bayes
|
67
|
+
changelog_uri: https://github.com/yoshoku/rumale/blob/main/CHANGELOG.md
|
68
|
+
documentation_uri: https://yoshoku.github.io/rumale/doc/
|
69
|
+
rubygems_mfa_required: 'true'
|
70
|
+
post_install_message:
|
71
|
+
rdoc_options: []
|
72
|
+
require_paths:
|
73
|
+
- lib
|
74
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
75
|
+
requirements:
|
76
|
+
- - ">="
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: '0'
|
79
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
84
|
+
requirements: []
|
85
|
+
rubygems_version: 3.3.26
|
86
|
+
signing_key:
|
87
|
+
specification_version: 4
|
88
|
+
summary: Rumale::NaiveBayes provides naive bayes models with Rumale interface.
|
89
|
+
test_files: []
|