nb 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +5 -0
- data/Gemfile.ci +11 -0
- data/README.md +41 -3
- data/lib/nb/naive_bayes.rb +8 -0
- data/lib/nb/version.rb +1 -1
- data/spec/nb/naive_bayes_spec.rb +20 -0
- data/spec/spec_helper.rb +6 -0
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e163cd57a340190efef6d152493f3cbf5f0ee6c7
|
4
|
+
data.tar.gz: 3dc32f2eaf69f8cda5fe9672bdd54c4c51d47a57
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fb8c29aa398fdda618c7c624146435a5d38386f4698f4a4262c5178826089e50568b5eb213af53f7f1de48d5e73f93e5fad9cbf47868403f4a522350f03c1b73
|
7
|
+
data.tar.gz: 3e3bc9a7f6371ad037fb62a1cf3a5cbeb747aa958bea96c8a14879174a3bfd754a7580612961916f6be926b9cc56a80be28add65572e8ad376ce274caf1a1fa5
|
data/.travis.yml
ADDED
data/Gemfile.ci
ADDED
data/README.md
CHANGED
@@ -1,6 +1,9 @@
|
|
1
|
-
#
|
1
|
+
# nb
|
2
2
|
|
3
|
-
|
3
|
+
[![Code Climate](https://codeclimate.com/github/forresty/nb/badges/gpa.svg)](https://codeclimate.com/github/forresty/nb)
|
4
|
+
[![Build Status](https://travis-ci.org/forresty/nb.svg?branch=master)](https://travis-ci.org/forresty/nb)
|
5
|
+
|
6
|
+
yet another Naive Bayes library
|
4
7
|
|
5
8
|
## Installation
|
6
9
|
|
@@ -20,7 +23,42 @@ Or install it yourself as:
|
|
20
23
|
|
21
24
|
## Usage
|
22
25
|
|
23
|
-
|
26
|
+
```ruby
|
27
|
+
bayes = NaiveBayes.new :love, :hate
|
28
|
+
|
29
|
+
bayes.train :love, 'I', 'love', 'you'
|
30
|
+
bayes.train :hate, 'I', 'hate', 'you'
|
31
|
+
|
32
|
+
bayes.classifications(*%w{ I love you }).should == [[:love, 0.5], [:hate, 0.25]]
|
33
|
+
bayes.classify(*%w{ I love you }).should == [:love, 0.5]
|
34
|
+
bayes.classify(*%w{ love }).should == [:love, 0.5]
|
35
|
+
```
|
36
|
+
|
37
|
+
### ability to view top tokens
|
38
|
+
|
39
|
+
`bayes.top_tokens_of_category(:spam)`
|
40
|
+
|
41
|
+
```
|
42
|
+
+------------+------+--------------------+
|
43
|
+
| 学生 | 1966 | 0.9995149465854383 |
|
44
|
+
| 多劳多得 | 1953 | 0.999511719439795 |
|
45
|
+
| 党 | 1517 | 0.9993714712416684 |
|
46
|
+
| 结 | 1327 | 0.9992815430836995 |
|
47
|
+
| 工资 | 1213 | 0.9992140742313297 |
|
48
|
+
| 不等 | 1135 | 0.999160108836817 |
|
49
|
+
| 诚聘 | 1107 | 0.9991388832706672 |
|
50
|
+
| 咨询 | 1095 | 0.9991294545902496 |
|
51
|
+
| 加入 | 1071 | 0.9991099639327047 |
|
52
|
+
| 限制 | 1046 | 0.9990887109454397 |
|
53
|
+
| 50 | 1041 | 0.9990843379645474 |
|
54
|
+
| 上网 | 1020 | 0.9990655037161098 |
|
55
|
+
| 流动资金 | 952 | 0.9989988208099915 |
|
56
|
+
| 曰 | 902 | 0.9989433817121107 |
|
57
|
+
| 办公室 | 861 | 0.9988931222482719 |
|
58
|
+
| 职员 | 827 | 0.9988476682254364 |
|
59
|
+
| 绝对 | 823 | 0.9988420740701035 |
|
60
|
+
+------------+------+--------------------+
|
61
|
+
```
|
24
62
|
|
25
63
|
## Contributing
|
26
64
|
|
data/lib/nb/naive_bayes.rb
CHANGED
@@ -33,6 +33,14 @@ class NaiveBayes
|
|
33
33
|
scores.sort_by { |k, v| -v }
|
34
34
|
end
|
35
35
|
|
36
|
+
def top_tokens_of_category(category, count=20)
|
37
|
+
tokens_count[category].map { |k, v| [k, v, probability_of_a_token_in_category(k, category)] }.sort_by { |i| -i.last }.first(count)
|
38
|
+
end
|
39
|
+
|
40
|
+
def probability_of_a_token_in_category(token, category)
|
41
|
+
probability_of_a_token_given_a_category(token, category) / @categories.inject(0.0) { |r, c| r + probability_of_a_token_given_a_category(token, c) }
|
42
|
+
end
|
43
|
+
|
36
44
|
def probability_of_a_token_given_a_category(token, category)
|
37
45
|
return assumed_probability if @tokens_count[category][token] == 0
|
38
46
|
|
data/lib/nb/version.rb
CHANGED
data/spec/nb/naive_bayes_spec.rb
CHANGED
@@ -8,8 +8,10 @@ describe NaiveBayes do
|
|
8
8
|
it { should respond_to :probability_of_a_token_given_a_category }
|
9
9
|
it { should respond_to :probability_of_tokens_given_a_category }
|
10
10
|
it { should respond_to :probability_of_a_category }
|
11
|
+
it { should respond_to :probability_of_a_token_in_category }
|
11
12
|
# it { should respond_to :total_number_of_tokens }
|
12
13
|
it { should respond_to :total_number_of_items }
|
14
|
+
it { should respond_to :top_tokens_of_category }
|
13
15
|
|
14
16
|
let(:bayes) { NaiveBayes.new(:love, :hate) }
|
15
17
|
subject { bayes }
|
@@ -27,6 +29,24 @@ describe NaiveBayes do
|
|
27
29
|
# end
|
28
30
|
# end
|
29
31
|
|
32
|
+
describe '#probability_of_a_token_in_category' do
|
33
|
+
it 'calculates correctly' do
|
34
|
+
bayes.train :love, 'I', 'love', 'you'
|
35
|
+
bayes.train :hate, 'I', 'hate', 'you'
|
36
|
+
|
37
|
+
bayes.probability_of_a_token_in_category('love', :love).should == 2.0/3 # 1 / ( 1 + 0.5 )
|
38
|
+
bayes.probability_of_a_token_in_category('hate', :love).should == 1.0/3 # 0.5 / ( 1 + 0.5 )
|
39
|
+
bayes.probability_of_a_token_in_category('I', :love).should == 0.5
|
40
|
+
|
41
|
+
bayes.train :love, 'hate', 'is', 'love'
|
42
|
+
bayes.train :love, 'hate', 'is', 'love'
|
43
|
+
bayes.train :love, 'hate', 'is', 'love'
|
44
|
+
|
45
|
+
bayes.probability_of_a_token_in_category('love', :love).should == 5.0/6 # 1 / ( 1 + 0.2 )
|
46
|
+
bayes.probability_of_a_token_in_category('hate', :love).should == 3.0/7 # 0.75 / ( 0.75 + 1 )
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
30
50
|
describe '#total_number_of_items' do
|
31
51
|
it 'calculates correctly' do
|
32
52
|
bayes.train :love, 'I', 'love', 'you'
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Forrest Ye
|
@@ -46,7 +46,9 @@ extensions: []
|
|
46
46
|
extra_rdoc_files: []
|
47
47
|
files:
|
48
48
|
- .gitignore
|
49
|
+
- .travis.yml
|
49
50
|
- Gemfile
|
51
|
+
- Gemfile.ci
|
50
52
|
- Guardfile
|
51
53
|
- LICENSE.txt
|
52
54
|
- README.md
|