simple_naive_bayes 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/simple_naive_bayes/version.rb +1 -1
- data/lib/simple_naive_bayes.rb +8 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a7d656f37e20a851ca3011fd417a53c66c8c0804
|
4
|
+
data.tar.gz: 9d62b66d9634bdde996456a734addffdf1bcc417
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8d8c8f8940bc942bc0811d2b790776a656ecd42f5fc060189642f36123c79c8d14df2e6566601a82313de1245e0b39b542339d812bcacc7c0fbfcc49fc1f1713
|
7
|
+
data.tar.gz: 73c7c940206a57020ca1e34e1d843596345102e8d72663bb42e2d3175ef236b9a6047fce43c1de906ae9618a1a5136ab07ad944c2eedb3e5d81fb6fcbd4b7970
|
data/lib/simple_naive_bayes.rb
CHANGED
@@ -68,6 +68,8 @@ module SimpleNaiveBayes
|
|
68
68
|
def classify(doc)
|
69
69
|
result = classify_with_all_result(doc)
|
70
70
|
|
71
|
+
return nil unless result
|
72
|
+
|
71
73
|
best = result.max_by { |classify_relust| classify_relust[1] }
|
72
74
|
best[0]
|
73
75
|
end
|
@@ -77,13 +79,14 @@ module SimpleNaiveBayes
|
|
77
79
|
# return [ [category1, probability1], [category2, probability2]... ]
|
78
80
|
def classify_with_all_result(doc)
|
79
81
|
result = []
|
82
|
+
return nil unless doc
|
80
83
|
|
81
84
|
@categories_count.keys().each do |category|
|
82
85
|
# log(P(doc|cat))
|
83
86
|
document_category = calc_document_category(doc, category)
|
84
87
|
|
85
88
|
# log(P(cat)) = log(@categories_count[cat]) - log( @all_category_num )
|
86
|
-
category_probability =
|
89
|
+
category_probability = calc_category_probability(category)
|
87
90
|
|
88
91
|
# log(P(cat|doc)) = log(P(doc|cat)) + log(P(cat))
|
89
92
|
category_document_probability = document_category + category_probability
|
@@ -93,6 +96,10 @@ module SimpleNaiveBayes
|
|
93
96
|
result
|
94
97
|
end
|
95
98
|
|
99
|
+
def calc_category_probability(category)
|
100
|
+
Math.log2(@categories_count[category]) - Math.log2(@all_category_num)
|
101
|
+
end
|
102
|
+
|
96
103
|
# log(P(doc|cat)) = log(P(word1|cat)) + log(P(word2|cat)) + ....
|
97
104
|
def calc_document_category(doc, category)
|
98
105
|
probability = 0
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_naive_bayes
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- y42sora
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-08-
|
11
|
+
date: 2013-08-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|